feat(tokenizer): recognize meta-commands

This commit is contained in:
Khaïs COLIN 2025-05-04 12:06:47 +02:00
parent 825511a515
commit cbc4a4755c
5 changed files with 221 additions and 62 deletions

View file

@ -121,3 +121,56 @@ CLOCK: [2025-05-03 sam. 19:06]--[2025-05-03 sam. 19:07] => 0:01
:END:
* TODO switch statement parsing to more extensible token-based algorithm
:PROPERTIES:
:EFFORT: 10
:END:
:LOGBOOK:
CLOCK: [2025-05-04 dim. 12:07]--[2025-05-04 dim. 12:10] => 0:03
:END:
** TODO use tokens to parse meta-commands
:PROPERTIES:
:EFFORT: 10
:END:
:LOGBOOK:
CLOCK: [2025-05-04 dim. 12:10]--[2025-05-04 dim. 12:22] => 0:12
:END:
*** DONE recognize meta-commands as tokens
:PROPERTIES:
:EFFORT: 10
:END:
:LOGBOOK:
CLOCK: [2025-05-04 dim. 13:32]--[2025-05-04 dim. 13:35] => 0:03
CLOCK: [2025-05-04 dim. 13:27]--[2025-05-04 dim. 13:32] => 0:05
:END:
*** TODO CommandParseError must have a ScanError variant with an Into impl
:PROPERTIES:
:EFFORT: 10
:END:
*** TODO ScanErrors must be convertible to ariadne reports
:PROPERTIES:
:EFFORT: 10
:END:
**** TODO Remove the CommandParseError Display implementation
:PROPERTIES:
:EFFORT: 10
:END:
*** TODO remove token types which are not recognized at all
:PROPERTIES:
:EFFORT: 10
:END:
*** TODO parse tokens into meta-commands
:PROPERTIES:
:EFFORT: 10
:END:
** TODO use tokens to parse statements
:PROPERTIES:
:EFFORT:
:END:

View file

@ -1,4 +1,4 @@
#[derive(Debug)]
#[derive(Debug, Eq, PartialEq)]
pub enum MetaCommand {
Exit,
}

View file

@ -0,0 +1,28 @@
---
source: src/tokens.rs
expression: "tokenize(\".exit\".to_string(), \"<stdin>\".to_string())"
---
Ok(
[
Token {
location: Location {
file: "<stdin>",
offset: 0,
length: 5,
},
data: MetaCommand(
Exit,
),
lexeme: ".exit",
},
Token {
location: Location {
file: "<stdin>",
offset: 5,
length: 0,
},
data: EndOfFile,
lexeme: "",
},
],
)

View file

@ -0,0 +1,18 @@
---
source: src/tokens.rs
expression: "tokenize(\".halp\".to_string(), \"<stdin>\".to_string())"
---
Err(
[
ScanError {
location: Location {
file: "<stdin>",
offset: 0,
length: 5,
},
kind: UnknownMetaCommand(
".halp",
),
},
],
)

View file

@ -1,3 +1,5 @@
use crate::meta_commands::MetaCommand;
#[derive(Debug, Eq, PartialEq)]
pub enum TokenData {
/// INSERT
@ -8,6 +10,7 @@ pub enum TokenData {
Integer(i64),
/// Hello World!
String(String),
MetaCommand(MetaCommand),
/// No file O.O?
EndOfFile,
}
@ -61,6 +64,7 @@ pub enum ScanErrorKind {
UnexpectedChar(char),
UnexpectedEndOfInput,
UnknownKeyword(String),
UnknownMetaCommand(String),
}
#[derive(Debug, Eq, PartialEq)]
@ -105,6 +109,44 @@ impl Tokenizer {
}
}
fn recognize_metacommand(word: &str) -> Option<TokenData> {
match word.to_lowercase().as_str() {
".exit" => Some(TokenData::MetaCommand(MetaCommand::Exit)),
_ => None,
}
}
fn scan_meta_command(&mut self) -> Result<Token, ScanError> {
let start_offset = self.offset;
let mut word = String::new();
let mut length = 0;
if let Some(c) = self.advance() {
word.push(c);
length += 1;
}
while let Some(c) = self.peek() {
if c.is_alphabetic() || c == '_' {
word.push(c);
self.advance();
} else {
break;
}
length += 1;
}
if let Some(meta) = Self::recognize_metacommand(&word) {
Ok(Token {
location: Location::new(self.file.clone(), start_offset, length),
data: meta,
lexeme: word,
})
} else {
Err(ScanError {
location: Location::new(self.file.clone(), start_offset, length),
kind: ScanErrorKind::UnknownMetaCommand(word),
})
}
}
fn scan_identifier_or_keyword(&mut self) -> Result<Token, ScanError> {
let start_offset = self.offset;
let mut word = String::new();
@ -149,6 +191,8 @@ impl Tokenizer {
if let Some(c) = self.peek() {
if Self::ident_or_keyword_start(c) {
return self.scan_identifier_or_keyword();
} else if c == '.' {
return self.scan_meta_command();
} else if c.is_whitespace() {
self.advance();
} else {
@ -193,6 +237,21 @@ pub fn tokenize(input: String, file: String) -> Result<Vec<Token>, Vec<ScanError
}
}
#[cfg(test)]
mod tests {
use super::*;
use insta::assert_debug_snapshot;
#[test]
fn test_tokenize_meta_command() {
assert_debug_snapshot!(tokenize(".exit".to_string(), "<stdin>".to_string()));
}
#[test]
fn test_tokenize_unknown_meta_command() {
assert_debug_snapshot!(tokenize(".halp".to_string(), "<stdin>".to_string()));
}
#[test]
fn test_tokenizer() {
let mut scanresult =
@ -256,3 +315,4 @@ fn test_tokenizer_errors() {
);
assert!(scanerrors.is_empty());
}
}