From cbc4a4755c492010576289dd63356dda3f4da20c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kha=C3=AFs=20COLIN?= Date: Sun, 4 May 2025 12:06:47 +0200 Subject: [PATCH] feat(tokenizer): recognize meta-commands --- notes.org | 53 +++++ src/meta_commands.rs | 2 +- ..._tokens__tests__tokenize_meta_command.snap | 28 +++ ..._tests__tokenize_unknown_meta_command.snap | 18 ++ src/tokens.rs | 182 ++++++++++++------ 5 files changed, 221 insertions(+), 62 deletions(-) create mode 100644 src/snapshots/osdb__tokens__tests__tokenize_meta_command.snap create mode 100644 src/snapshots/osdb__tokens__tests__tokenize_unknown_meta_command.snap diff --git a/notes.org b/notes.org index c966b68..c440953 100644 --- a/notes.org +++ b/notes.org @@ -121,3 +121,56 @@ CLOCK: [2025-05-03 sam. 19:06]--[2025-05-03 sam. 19:07] => 0:01 :END: * TODO switch statement parsing to more extensible token-based algorithm +:PROPERTIES: +:EFFORT: 10 +:END: +:LOGBOOK: +CLOCK: [2025-05-04 dim. 12:07]--[2025-05-04 dim. 12:10] => 0:03 +:END: + +** TODO use tokens to parse meta-commands +:PROPERTIES: +:EFFORT: 10 +:END: +:LOGBOOK: +CLOCK: [2025-05-04 dim. 12:10]--[2025-05-04 dim. 12:22] => 0:12 +:END: + +*** DONE recognize meta-commands as tokens +:PROPERTIES: +:EFFORT: 10 +:END: +:LOGBOOK: +CLOCK: [2025-05-04 dim. 13:32]--[2025-05-04 dim. 13:35] => 0:03 +CLOCK: [2025-05-04 dim. 13:27]--[2025-05-04 dim. 13:32] => 0:05 +:END: + +*** TODO CommandParseError must have a ScanError variant with an Into impl +:PROPERTIES: +:EFFORT: 10 +:END: + +*** TODO ScanErrors must be convertible to ariadne reports +:PROPERTIES: +:EFFORT: 10 +:END: + +**** TODO Remove the CommandParseError Display implementation +:PROPERTIES: +:EFFORT: 10 +:END: + +*** TODO remove token types which are not recognized at all +:PROPERTIES: +:EFFORT: 10 +:END: + +*** TODO parse tokens into meta-commands +:PROPERTIES: +:EFFORT: 10 +:END: + +** TODO use tokens to parse statements +:PROPERTIES: +:EFFORT: +:END: diff --git a/src/meta_commands.rs b/src/meta_commands.rs index e81beb9..e0d42c4 100644 --- a/src/meta_commands.rs +++ b/src/meta_commands.rs @@ -1,4 +1,4 @@ -#[derive(Debug)] +#[derive(Debug, Eq, PartialEq)] pub enum MetaCommand { Exit, } diff --git a/src/snapshots/osdb__tokens__tests__tokenize_meta_command.snap b/src/snapshots/osdb__tokens__tests__tokenize_meta_command.snap new file mode 100644 index 0000000..24454ab --- /dev/null +++ b/src/snapshots/osdb__tokens__tests__tokenize_meta_command.snap @@ -0,0 +1,28 @@ +--- +source: src/tokens.rs +expression: "tokenize(\".exit\".to_string(), \"\".to_string())" +--- +Ok( + [ + Token { + location: Location { + file: "", + offset: 0, + length: 5, + }, + data: MetaCommand( + Exit, + ), + lexeme: ".exit", + }, + Token { + location: Location { + file: "", + offset: 5, + length: 0, + }, + data: EndOfFile, + lexeme: "", + }, + ], +) diff --git a/src/snapshots/osdb__tokens__tests__tokenize_unknown_meta_command.snap b/src/snapshots/osdb__tokens__tests__tokenize_unknown_meta_command.snap new file mode 100644 index 0000000..9e04320 --- /dev/null +++ b/src/snapshots/osdb__tokens__tests__tokenize_unknown_meta_command.snap @@ -0,0 +1,18 @@ +--- +source: src/tokens.rs +expression: "tokenize(\".halp\".to_string(), \"\".to_string())" +--- +Err( + [ + ScanError { + location: Location { + file: "", + offset: 0, + length: 5, + }, + kind: UnknownMetaCommand( + ".halp", + ), + }, + ], +) diff --git a/src/tokens.rs b/src/tokens.rs index 5ee06a4..0bff497 100644 --- a/src/tokens.rs +++ b/src/tokens.rs @@ -1,3 +1,5 @@ +use crate::meta_commands::MetaCommand; + #[derive(Debug, Eq, PartialEq)] pub enum TokenData { /// INSERT @@ -8,6 +10,7 @@ pub enum TokenData { Integer(i64), /// Hello World! String(String), + MetaCommand(MetaCommand), /// No file O.O? EndOfFile, } @@ -61,6 +64,7 @@ pub enum ScanErrorKind { UnexpectedChar(char), UnexpectedEndOfInput, UnknownKeyword(String), + UnknownMetaCommand(String), } #[derive(Debug, Eq, PartialEq)] @@ -105,6 +109,44 @@ impl Tokenizer { } } + fn recognize_metacommand(word: &str) -> Option { + match word.to_lowercase().as_str() { + ".exit" => Some(TokenData::MetaCommand(MetaCommand::Exit)), + _ => None, + } + } + + fn scan_meta_command(&mut self) -> Result { + let start_offset = self.offset; + let mut word = String::new(); + let mut length = 0; + if let Some(c) = self.advance() { + word.push(c); + length += 1; + } + while let Some(c) = self.peek() { + if c.is_alphabetic() || c == '_' { + word.push(c); + self.advance(); + } else { + break; + } + length += 1; + } + if let Some(meta) = Self::recognize_metacommand(&word) { + Ok(Token { + location: Location::new(self.file.clone(), start_offset, length), + data: meta, + lexeme: word, + }) + } else { + Err(ScanError { + location: Location::new(self.file.clone(), start_offset, length), + kind: ScanErrorKind::UnknownMetaCommand(word), + }) + } + } + fn scan_identifier_or_keyword(&mut self) -> Result { let start_offset = self.offset; let mut word = String::new(); @@ -149,6 +191,8 @@ impl Tokenizer { if let Some(c) = self.peek() { if Self::ident_or_keyword_start(c) { return self.scan_identifier_or_keyword(); + } else if c == '.' { + return self.scan_meta_command(); } else if c.is_whitespace() { self.advance(); } else { @@ -193,66 +237,82 @@ pub fn tokenize(input: String, file: String) -> Result, Vec".to_string())); + } + + #[test] + fn test_tokenize_unknown_meta_command() { + assert_debug_snapshot!(tokenize(".halp".to_string(), "".to_string())); + } + + #[test] + fn test_tokenizer() { + let mut scanresult = + tokenize("INSERT Select".to_string(), "src/statement.sql".to_string()).unwrap(); + scanresult.reverse(); + assert_eq!( + scanresult.pop(), + Some(Token { + location: Location::new(String::from("src/statement.sql"), 0, 6), + data: TokenData::Insert, + lexeme: String::from("INSERT"), + }) + ); + assert_eq!( + scanresult.pop(), + Some(Token { + location: Location::new(String::from("src/statement.sql"), 7, 6), + data: TokenData::Select, + lexeme: String::from("Select"), + }) + ); + assert_eq!( + scanresult.pop(), + Some(Token { + location: Location::new(String::from("src/statement.sql"), 13, 0), + data: TokenData::EndOfFile, + lexeme: String::from(""), + }) + ); + assert_eq!(scanresult.pop(), None); + assert!(scanresult.is_empty()); + } + + #[test] + fn test_tokenizer_errors() { + let mut scanerrors = tokenize("salact +".to_string(), "src/statement.sql".to_string()) + .err() + .unwrap(); + scanerrors.reverse(); + assert_eq!( + scanerrors.pop(), + Some(ScanError { + location: Location { + file: "src/statement.sql".to_string(), + offset: 0, + length: 6, + }, + kind: ScanErrorKind::UnknownKeyword("salact".to_string()), + }) + ); + assert_eq!( + scanerrors.pop(), + Some(ScanError { + location: Location { + file: "src/statement.sql".to_string(), + offset: 8, + length: 1, + }, + kind: ScanErrorKind::UnexpectedChar('+'), + }) + ); + assert!(scanerrors.is_empty()); + } }