feat(tokenizer): parse integers

This commit is contained in:
Khaïs COLIN 2025-05-10 10:55:44 +02:00
parent 71a9d82d96
commit f259b079b7
Signed by: logistic-bot
SSH key fingerprint: SHA256:RlpiqKeXpcPFZZ4y9Ou4xi2M8OhRJovIwDlbCaMsuAo
13 changed files with 311 additions and 27 deletions

View file

@ -214,6 +214,10 @@ i will use rustyline, since it seems like the most feature-complete
* STRT parse integers
** TODO Function to get a token until condition is false
** TODO Parse the integer
* TODO parse strings
* WAIT cli tests using insta-cmd

View file

@ -11,7 +11,20 @@ impl OSDBError for CommandParseError {
CommandParseError::Scan(x) => {
x.display(file, input);
}
_ => todo!(),
CommandParseError::UnexpectedToken(token, items) => {
let location = (file, Into::<std::ops::Range<usize>>::into(&token.location));
Report::build(ReportKind::Error, location.clone())
.with_message("unexpected token")
.with_label(
Label::new(location.clone())
.with_color(Color::Red)
.with_message(format!("found {token}")),
)
.with_note(format!("expected token type to be one of {items:?}"))
.finish()
.print((file, Source::from(input)))
.unwrap()
}
}
}
}
@ -26,7 +39,6 @@ impl OSDBError for ScanError {
.with_color(Color::Red)
.with_message(format!("{self}")),
)
.with_help("Make sure you don't have any typos or unexpected characters.")
.finish()
.print((file, Source::from(input)))
.unwrap();

View file

@ -3,6 +3,14 @@ pub enum MetaCommand {
Exit,
}
impl std::fmt::Display for MetaCommand {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
MetaCommand::Exit => write!(f, "exit"),
}
}
}
pub struct MetaCommandExecuteResult {
pub should_exit: bool,
}

View file

@ -1,26 +1,30 @@
---
source: src/tokens.rs
expression: scanerrors
expression: "tokenize(\"salact +\".to_string(), \"src/statement.sql\".to_string())"
---
[
ScanError {
location: Location {
file: "src/statement.sql",
offset: 0,
length: 6,
Err(
[
ScanError {
location: Location {
file: "src/statement.sql",
offset: 0,
length: 6,
},
kind: UnknownKeyword(
"salact",
),
},
kind: UnknownKeyword(
"salact",
),
},
ScanError {
location: Location {
file: "src/statement.sql",
offset: 7,
length: 1,
ScanError {
location: Location {
file: "src/statement.sql",
offset: 7,
length: 1,
},
kind: ParseIntError(
ParseIntError {
kind: InvalidDigit,
},
),
},
kind: UnexpectedChar(
'+',
),
},
]
],
)

View file

@ -0,0 +1,28 @@
---
source: src/tokens.rs
expression: "tokenize(\"-10\".to_string(), \"src/ints.sql\".to_string(),)"
---
Ok(
[
Token {
location: Location {
file: "src/ints.sql",
offset: 0,
length: 3,
},
data: Int(
-10,
),
lexeme: "-10",
},
Token {
location: Location {
file: "src/ints.sql",
offset: 3,
length: 0,
},
data: EndOfFile,
lexeme: "",
},
],
)

View file

@ -0,0 +1,28 @@
---
source: src/tokens.rs
expression: "tokenize(\"0\".to_string(), \"src/ints.sql\".to_string(),)"
---
Ok(
[
Token {
location: Location {
file: "src/ints.sql",
offset: 0,
length: 1,
},
data: Int(
0,
),
lexeme: "0",
},
Token {
location: Location {
file: "src/ints.sql",
offset: 1,
length: 0,
},
data: EndOfFile,
lexeme: "",
},
],
)

View file

@ -0,0 +1,28 @@
---
source: src/tokens.rs
expression: "tokenize(\"-0\".to_string(), \"src/ints.sql\".to_string(),)"
---
Ok(
[
Token {
location: Location {
file: "src/ints.sql",
offset: 0,
length: 2,
},
data: Int(
0,
),
lexeme: "-0",
},
Token {
location: Location {
file: "src/ints.sql",
offset: 2,
length: 0,
},
data: EndOfFile,
lexeme: "",
},
],
)

View file

@ -0,0 +1,20 @@
---
source: src/tokens.rs
expression: "tokenize(\"--0\".to_string(), \"src/ints.sql\".to_string(),)"
---
Err(
[
ScanError {
location: Location {
file: "src/ints.sql",
offset: 0,
length: 3,
},
kind: ParseIntError(
ParseIntError {
kind: InvalidDigit,
},
),
},
],
)

View file

@ -0,0 +1,20 @@
---
source: src/tokens.rs
expression: "tokenize(\"++0\".to_string(), \"src/ints.sql\".to_string(),)"
---
Err(
[
ScanError {
location: Location {
file: "src/ints.sql",
offset: 0,
length: 3,
},
kind: ParseIntError(
ParseIntError {
kind: InvalidDigit,
},
),
},
],
)

View file

@ -0,0 +1,20 @@
---
source: src/tokens.rs
expression: "tokenize(\"-\".to_string(), \"src/ints.sql\".to_string(),)"
---
Err(
[
ScanError {
location: Location {
file: "src/ints.sql",
offset: 0,
length: 1,
},
kind: ParseIntError(
ParseIntError {
kind: InvalidDigit,
},
),
},
],
)

View file

@ -0,0 +1,20 @@
---
source: src/tokens.rs
expression: "tokenize(\"+\".to_string(), \"src/ints.sql\".to_string(),)"
---
Err(
[
ScanError {
location: Location {
file: "src/ints.sql",
offset: 0,
length: 1,
},
kind: ParseIntError(
ParseIntError {
kind: InvalidDigit,
},
),
},
],
)

View file

@ -0,0 +1,28 @@
---
source: src/tokens.rs
expression: "tokenize(\"10\".to_string(), \"src/ints.sql\".to_string(),)"
---
Ok(
[
Token {
location: Location {
file: "src/ints.sql",
offset: 0,
length: 2,
},
data: Int(
10,
),
lexeme: "10",
},
Token {
location: Location {
file: "src/ints.sql",
offset: 2,
length: 0,
},
data: EndOfFile,
lexeme: "",
},
],
)

View file

@ -55,6 +55,20 @@ pub struct Token {
pub lexeme: String,
}
impl std::fmt::Display for Token {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match &self.data {
TokenData::Insert => write!(f, "insert statement"),
TokenData::Select => write!(f, "select statement"),
TokenData::MetaCommand(x) => write!(f, "meta-command {x}"),
TokenData::EndOfFile => write!(f, "end of file"),
TokenData::Int(x) => write!(f, "integer {x}"),
}?;
let lexeme = &self.lexeme;
write!(f, " {lexeme:?}")
}
}
struct Tokenizer {
input: String,
file: String,
@ -68,6 +82,7 @@ pub enum ScanErrorKind {
UnexpectedEndOfInput,
UnknownKeyword(String),
UnknownMetaCommand(String),
ParseIntError(std::num::ParseIntError),
}
impl std::fmt::Display for ScanErrorKind {
@ -77,6 +92,7 @@ impl std::fmt::Display for ScanErrorKind {
ScanErrorKind::UnexpectedEndOfInput => write!(f, "unexpected end of input"),
ScanErrorKind::UnknownKeyword(x) => write!(f, "unknown keyword: {x:?}"),
ScanErrorKind::UnknownMetaCommand(x) => write!(f, "unknown meta-command: {x:?}"),
ScanErrorKind::ParseIntError(x) => write!(f, "failed to parse integer: {x}"),
}
}
}
@ -207,6 +223,40 @@ impl Tokenizer {
c.is_alphanumeric() || c == '_'
}
fn digit(c: char) -> bool {
c.is_ascii_digit() || c == '-' || c == '+'
}
fn scan_integer(&mut self) -> Result<Token, ScanError> {
let start_offset = self.offset;
let mut word = String::new();
let mut length = 0;
if let Some(c) = self.advance() {
word.push(c);
length += 1;
}
while let Some(c) = self.peek() {
if Self::digit(c) {
word.push(c);
self.advance();
} else {
break;
}
length += 1;
}
match word.parse::<i64>() {
Ok(int) => Ok(Token {
location: Location::new(self.file.clone(), start_offset, length),
data: TokenData::Int(int),
lexeme: word,
}),
Err(e) => Err(ScanError {
location: Location::new(self.file.clone(), start_offset, length),
kind: ScanErrorKind::ParseIntError(e),
}),
}
}
fn scan_token(&mut self) -> Result<Option<Token>, ScanError> {
loop {
if let Some(c) = self.peek() {
@ -214,6 +264,8 @@ impl Tokenizer {
return self.scan_identifier_or_keyword().map(Some);
} else if c == '.' {
return self.scan_meta_command().map(Some);
} else if Self::digit(c) {
return self.scan_integer().map(Some);
} else if c.is_whitespace() {
self.advance();
} else {
@ -308,9 +360,21 @@ mod tests {
#[test]
fn test_tokenizer_errors() {
let scanerrors = tokenize("salact +".to_string(), "src/statement.sql".to_string())
.err()
.unwrap();
assert_debug_snapshot!(scanerrors);
assert_debug_snapshot!(tokenize(
"salact +".to_string(),
"src/statement.sql".to_string()
));
}
#[test]
fn test_tokenizer_integers() {
assert_debug_snapshot!(tokenize("10".to_string(), "src/ints.sql".to_string(),));
assert_debug_snapshot!(tokenize("-10".to_string(), "src/ints.sql".to_string(),));
assert_debug_snapshot!(tokenize("0".to_string(), "src/ints.sql".to_string(),));
assert_debug_snapshot!(tokenize("-0".to_string(), "src/ints.sql".to_string(),));
assert_debug_snapshot!(tokenize("--0".to_string(), "src/ints.sql".to_string(),));
assert_debug_snapshot!(tokenize("++0".to_string(), "src/ints.sql".to_string(),));
assert_debug_snapshot!(tokenize("-".to_string(), "src/ints.sql".to_string(),));
assert_debug_snapshot!(tokenize("+".to_string(), "src/ints.sql".to_string(),));
}
}