diff --git a/Cargo.lock b/Cargo.lock index 1b637c2..35cd7e3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,16 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "ariadne" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36f5e3dca4e09a6f340a61a0e9c7b61e030c69fc27bf29d73218f7e5e3b7638f" +dependencies = [ + "unicode-width", + "yansi", +] + [[package]] name = "console" version = "0.15.11" @@ -47,6 +57,7 @@ checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" name = "osdb" version = "0.1.0" dependencies = [ + "ariadne", "insta", ] @@ -56,6 +67,12 @@ version = "2.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbbb5d9659141646ae647b42fe094daf6c6192d1620870b449d9557f748b2daa" +[[package]] +name = "unicode-width" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" + [[package]] name = "windows-sys" version = "0.59.0" @@ -128,3 +145,9 @@ name = "windows_x86_64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "yansi" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" diff --git a/Cargo.toml b/Cargo.toml index c53c9f7..6ba1a65 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,6 +5,7 @@ edition = "2024" authors = ["Khaïs COLIN"] [dependencies] +ariadne = "0.5.1" [dev-dependencies] insta = "1.43.1" diff --git a/notes.org b/notes.org index 39d3be7..e059314 100644 --- a/notes.org +++ b/notes.org @@ -1,6 +1,6 @@ #+title: Notes -* TODO show errors with ariadne +* DONE show errors with ariadne :PROPERTIES: :EFFORT: 10min :END: @@ -11,8 +11,38 @@ :LOGBOOK: CLOCK: [2025-05-03 sam. 18:40]--[2025-05-03 sam. 18:46] => 0:06 :END: -** TODO OSDBError::display() should take a filename and display it alongside the error - +** DONE OSDBError::display() should take a filename and display it alongside the error +:LOGBOOK: +CLOCK: [2025-05-03 sam. 21:24]--[2025-05-03 sam. 21:28] => 0:04 +:END: +** DONE OSDBError::display() should take an input string, to be able to resolve spans inside the error +:PROPERTIES: +:EFFORT: 10 +:END: +:LOGBOOK: +CLOCK: [2025-05-03 sam. 21:28]--[2025-05-03 sam. 21:30] => 0:02 +:END: +** DONE OSDBError::display() should generate ariadne errors and return those +:PROPERTIES: +:EFFORT: 10 +:END: +:LOGBOOK: +CLOCK: [2025-05-03 sam. 21:30]--[2025-05-03 sam. 21:50] => 0:20 +:END: +** DONE error display should include a span to show where the error occured +:PROPERTIES: +:EFFORT: 10 +:END: +:LOGBOOK: +CLOCK: [2025-05-03 sam. 21:51]--[2025-05-03 sam. 21:54] => 0:03 +:END: +** DONE deduplicate error display code +:PROPERTIES: +:EFFORT: 10 +:END: +:LOGBOOK: +CLOCK: [2025-05-03 sam. 21:55]--[2025-05-03 sam. 22:01] => 0:06 +:END: * DONE snapshot testing ** DONE Find the snapshot testing library @@ -89,3 +119,72 @@ CLOCK: [2025-05-03 sam. 21:21]--[2025-05-03 sam. 21:22] => 0:01 :LOGBOOK: CLOCK: [2025-05-03 sam. 19:06]--[2025-05-03 sam. 19:07] => 0:01 :END: + +* TODO switch statement parsing to more extensible token-based algorithm +:PROPERTIES: +:EFFORT: 10 +:END: +:LOGBOOK: +CLOCK: [2025-05-04 dim. 12:07]--[2025-05-04 dim. 12:10] => 0:03 +:END: + +** TODO use tokens to parse meta-commands +:PROPERTIES: +:EFFORT: 10 +:END: +:LOGBOOK: +CLOCK: [2025-05-04 dim. 12:10]--[2025-05-04 dim. 12:22] => 0:12 +:END: + +*** DONE recognize meta-commands as tokens +:PROPERTIES: +:EFFORT: 10 +:END: +:LOGBOOK: +CLOCK: [2025-05-04 dim. 13:32]--[2025-05-04 dim. 13:35] => 0:03 +CLOCK: [2025-05-04 dim. 13:27]--[2025-05-04 dim. 13:32] => 0:05 +:END: + +*** DONE CommandParseError must have a ScanError variant with an Into impl +:PROPERTIES: +:EFFORT: 10 +:END: +:LOGBOOK: +CLOCK: [2025-05-04 dim. 13:35]--[2025-05-04 dim. 13:38] => 0:03 +:END: + +*** DONE ScanErrors must be convertible to ariadne reports +:PROPERTIES: +:EFFORT: 10 +:END: + +**** DONE Remove the CommandParseError Display implementation +:PROPERTIES: +:EFFORT: 10 +:END: +:LOGBOOK: +CLOCK: [2025-05-04 dim. 13:38]--[2025-05-04 dim. 13:44] => 0:06 +:END: + +**** DONE implement OSDBError for ScanError +:PROPERTIES: +:EFFORT: 10 +:END: +:LOGBOOK: +CLOCK: [2025-05-04 dim. 13:45]--[2025-05-04 dim. 13:56] => 0:11 +:END: + +*** DONE remove token types which are not recognized at all +:PROPERTIES: +:EFFORT: 10 +:END: + +*** TODO parse tokens into meta-commands +:PROPERTIES: +:EFFORT: 10 +:END: + +** TODO use tokens to parse statements +:PROPERTIES: +:EFFORT: +:END: diff --git a/src/command.rs b/src/command.rs index 32e4cd0..d016549 100644 --- a/src/command.rs +++ b/src/command.rs @@ -1,5 +1,6 @@ use crate::meta_commands::{MetaCommand, MetaCommandExecuteResult, MetaCommandParseError}; use crate::statements::{Statement, StatementExecuteResult, StatementParseError}; +use crate::tokens::ScanError; #[derive(Debug)] pub enum Command { @@ -50,17 +51,15 @@ impl Command { pub enum CommandParseError { MetaCommand(MetaCommandParseError), Statement(StatementParseError), + Scan(ScanError), } -impl std::fmt::Display for CommandParseError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { +impl CommandParseError { + pub(crate) fn message(&self) -> String { match self { - CommandParseError::MetaCommand(meta_command_parse_error) => { - write!(f, "{meta_command_parse_error}") - } - CommandParseError::Statement(statement_parse_error) => { - write!(f, "{statement_parse_error}") - } + CommandParseError::MetaCommand(x) => format!("{x}"), + CommandParseError::Statement(x) => format!("{x}"), + CommandParseError::Scan(x) => format!("{x}"), } } } @@ -89,6 +88,12 @@ impl From for CommandParseError { } } +impl From for CommandParseError { + fn from(value: ScanError) -> Self { + CommandParseError::Scan(value) + } +} + impl std::str::FromStr for Command { type Err = CommandParseError; diff --git a/src/error_display.rs b/src/error_display.rs index 5a302ff..838acd4 100644 --- a/src/error_display.rs +++ b/src/error_display.rs @@ -1,26 +1,33 @@ -use crate::{ - command::CommandParseError, meta_commands::MetaCommandParseError, - statements::StatementParseError, -}; +use crate::{command::CommandParseError, tokens::ScanError}; +use ariadne::{Color, Label, Report, ReportKind, Source}; pub trait OSDBError { - fn display(&self); -} - -impl OSDBError for MetaCommandParseError { - fn display(&self) { - println!("{self}") - } -} - -impl OSDBError for StatementParseError { - fn display(&self) { - println!("{self}") - } + fn display(&self, file: &str, input: &str); } impl OSDBError for CommandParseError { - fn display(&self) { - println!("{self}") + fn display(&self, file: &str, input: &str) { + if let CommandParseError::Scan(x) = self { + x.display(file, input); + } else { + Report::build(ReportKind::Error, (file, 0..input.len() - 1)) + .with_message(self.message()) + .with_label(Label::new((file, 0..input.len() - 1)).with_color(Color::Red)) + .finish() + .print((file, Source::from(input))) + .unwrap(); + } + } +} + +impl OSDBError for ScanError { + fn display(&self, file: &str, input: &str) { + let location = (file, Into::>::into(&self.location)); + Report::build(ReportKind::Error, location.clone()) + .with_message(format!("{self}")) + .with_label(Label::new(location).with_color(Color::Red)) + .finish() + .print((file, Source::from(input))) + .unwrap(); } } diff --git a/src/main.rs b/src/main.rs index a0fe1d0..5757d01 100644 --- a/src/main.rs +++ b/src/main.rs @@ -14,7 +14,7 @@ fn main() { break; } } - Err(err) => err.display(), + Err(err) => err.display("", &input), } } println!("Good-bye"); diff --git a/src/meta_commands.rs b/src/meta_commands.rs index e81beb9..e0d42c4 100644 --- a/src/meta_commands.rs +++ b/src/meta_commands.rs @@ -1,4 +1,4 @@ -#[derive(Debug)] +#[derive(Debug, Eq, PartialEq)] pub enum MetaCommand { Exit, } diff --git a/src/snapshots/osdb__tokens__tests__tokenize_meta_command.snap b/src/snapshots/osdb__tokens__tests__tokenize_meta_command.snap new file mode 100644 index 0000000..24454ab --- /dev/null +++ b/src/snapshots/osdb__tokens__tests__tokenize_meta_command.snap @@ -0,0 +1,28 @@ +--- +source: src/tokens.rs +expression: "tokenize(\".exit\".to_string(), \"\".to_string())" +--- +Ok( + [ + Token { + location: Location { + file: "", + offset: 0, + length: 5, + }, + data: MetaCommand( + Exit, + ), + lexeme: ".exit", + }, + Token { + location: Location { + file: "", + offset: 5, + length: 0, + }, + data: EndOfFile, + lexeme: "", + }, + ], +) diff --git a/src/snapshots/osdb__tokens__tests__tokenize_unknown_meta_command.snap b/src/snapshots/osdb__tokens__tests__tokenize_unknown_meta_command.snap new file mode 100644 index 0000000..9e04320 --- /dev/null +++ b/src/snapshots/osdb__tokens__tests__tokenize_unknown_meta_command.snap @@ -0,0 +1,18 @@ +--- +source: src/tokens.rs +expression: "tokenize(\".halp\".to_string(), \"\".to_string())" +--- +Err( + [ + ScanError { + location: Location { + file: "", + offset: 0, + length: 5, + }, + kind: UnknownMetaCommand( + ".halp", + ), + }, + ], +) diff --git a/src/tokens.rs b/src/tokens.rs index 5ee06a4..4ecc87d 100644 --- a/src/tokens.rs +++ b/src/tokens.rs @@ -1,14 +1,10 @@ +use crate::meta_commands::MetaCommand; + #[derive(Debug, Eq, PartialEq)] pub enum TokenData { - /// INSERT Insert, - /// SELECT Select, - /// 0, 1, -21635, 867463 - Integer(i64), - /// Hello World! - String(String), - /// No file O.O? + MetaCommand(MetaCommand), EndOfFile, } @@ -22,6 +18,15 @@ pub struct Location { pub length: usize, } +impl From<&Location> for std::ops::Range { + fn from(val: &Location) -> Self { + std::ops::Range { + start: val.offset, + end: val.offset + val.length, + } + } +} + impl Location { /// ``` /// use osdb::tokens::Location; @@ -61,6 +66,18 @@ pub enum ScanErrorKind { UnexpectedChar(char), UnexpectedEndOfInput, UnknownKeyword(String), + UnknownMetaCommand(String), +} + +impl std::fmt::Display for ScanErrorKind { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match self { + ScanErrorKind::UnexpectedChar(c) => write!(f, "unexpected char: {c:?}"), + ScanErrorKind::UnexpectedEndOfInput => write!(f, "unexpected end of input"), + ScanErrorKind::UnknownKeyword(x) => write!(f, "unknown keyword: {x:?}"), + ScanErrorKind::UnknownMetaCommand(x) => write!(f, "unknown meta-command: {x:?}"), + } + } } #[derive(Debug, Eq, PartialEq)] @@ -69,6 +86,13 @@ pub struct ScanError { pub kind: ScanErrorKind, } +impl std::fmt::Display for ScanError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let kind = &self.kind; + write!(f, "{kind}") + } +} + impl Tokenizer { fn new(input: String, file: String) -> Self { Self { @@ -105,6 +129,44 @@ impl Tokenizer { } } + fn recognize_metacommand(word: &str) -> Option { + match word.to_lowercase().as_str() { + ".exit" => Some(TokenData::MetaCommand(MetaCommand::Exit)), + _ => None, + } + } + + fn scan_meta_command(&mut self) -> Result { + let start_offset = self.offset; + let mut word = String::new(); + let mut length = 0; + if let Some(c) = self.advance() { + word.push(c); + length += 1; + } + while let Some(c) = self.peek() { + if c.is_alphabetic() || c == '_' { + word.push(c); + self.advance(); + } else { + break; + } + length += 1; + } + if let Some(meta) = Self::recognize_metacommand(&word) { + Ok(Token { + location: Location::new(self.file.clone(), start_offset, length), + data: meta, + lexeme: word, + }) + } else { + Err(ScanError { + location: Location::new(self.file.clone(), start_offset, length), + kind: ScanErrorKind::UnknownMetaCommand(word), + }) + } + } + fn scan_identifier_or_keyword(&mut self) -> Result { let start_offset = self.offset; let mut word = String::new(); @@ -149,6 +211,8 @@ impl Tokenizer { if let Some(c) = self.peek() { if Self::ident_or_keyword_start(c) { return self.scan_identifier_or_keyword(); + } else if c == '.' { + return self.scan_meta_command(); } else if c.is_whitespace() { self.advance(); } else { @@ -193,66 +257,82 @@ pub fn tokenize(input: String, file: String) -> Result, Vec".to_string())); + } + + #[test] + fn test_tokenize_unknown_meta_command() { + assert_debug_snapshot!(tokenize(".halp".to_string(), "".to_string())); + } + + #[test] + fn test_tokenizer() { + let mut scanresult = + tokenize("INSERT Select".to_string(), "src/statement.sql".to_string()).unwrap(); + scanresult.reverse(); + assert_eq!( + scanresult.pop(), + Some(Token { + location: Location::new(String::from("src/statement.sql"), 0, 6), + data: TokenData::Insert, + lexeme: String::from("INSERT"), + }) + ); + assert_eq!( + scanresult.pop(), + Some(Token { + location: Location::new(String::from("src/statement.sql"), 7, 6), + data: TokenData::Select, + lexeme: String::from("Select"), + }) + ); + assert_eq!( + scanresult.pop(), + Some(Token { + location: Location::new(String::from("src/statement.sql"), 13, 0), + data: TokenData::EndOfFile, + lexeme: String::from(""), + }) + ); + assert_eq!(scanresult.pop(), None); + assert!(scanresult.is_empty()); + } + + #[test] + fn test_tokenizer_errors() { + let mut scanerrors = tokenize("salact +".to_string(), "src/statement.sql".to_string()) + .err() + .unwrap(); + scanerrors.reverse(); + assert_eq!( + scanerrors.pop(), + Some(ScanError { + location: Location { + file: "src/statement.sql".to_string(), + offset: 0, + length: 6, + }, + kind: ScanErrorKind::UnknownKeyword("salact".to_string()), + }) + ); + assert_eq!( + scanerrors.pop(), + Some(ScanError { + location: Location { + file: "src/statement.sql".to_string(), + offset: 8, + length: 1, + }, + kind: ScanErrorKind::UnexpectedChar('+'), + }) + ); + assert!(scanerrors.is_empty()); + } }