feat(tokenizer): parse integers
This commit is contained in:
parent
71a9d82d96
commit
f259b079b7
13 changed files with 311 additions and 27 deletions
|
|
@ -214,6 +214,10 @@ i will use rustyline, since it seems like the most feature-complete
|
|||
|
||||
* STRT parse integers
|
||||
|
||||
** TODO Function to get a token until condition is false
|
||||
|
||||
** TODO Parse the integer
|
||||
|
||||
* TODO parse strings
|
||||
|
||||
* WAIT cli tests using insta-cmd
|
||||
|
|
|
|||
|
|
@ -11,7 +11,20 @@ impl OSDBError for CommandParseError {
|
|||
CommandParseError::Scan(x) => {
|
||||
x.display(file, input);
|
||||
}
|
||||
_ => todo!(),
|
||||
CommandParseError::UnexpectedToken(token, items) => {
|
||||
let location = (file, Into::<std::ops::Range<usize>>::into(&token.location));
|
||||
Report::build(ReportKind::Error, location.clone())
|
||||
.with_message("unexpected token")
|
||||
.with_label(
|
||||
Label::new(location.clone())
|
||||
.with_color(Color::Red)
|
||||
.with_message(format!("found {token}")),
|
||||
)
|
||||
.with_note(format!("expected token type to be one of {items:?}"))
|
||||
.finish()
|
||||
.print((file, Source::from(input)))
|
||||
.unwrap()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -26,7 +39,6 @@ impl OSDBError for ScanError {
|
|||
.with_color(Color::Red)
|
||||
.with_message(format!("{self}")),
|
||||
)
|
||||
.with_help("Make sure you don't have any typos or unexpected characters.")
|
||||
.finish()
|
||||
.print((file, Source::from(input)))
|
||||
.unwrap();
|
||||
|
|
|
|||
|
|
@ -3,6 +3,14 @@ pub enum MetaCommand {
|
|||
Exit,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for MetaCommand {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
match self {
|
||||
MetaCommand::Exit => write!(f, "exit"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct MetaCommandExecuteResult {
|
||||
pub should_exit: bool,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,8 +1,9 @@
|
|||
---
|
||||
source: src/tokens.rs
|
||||
expression: scanerrors
|
||||
expression: "tokenize(\"salact +\".to_string(), \"src/statement.sql\".to_string())"
|
||||
---
|
||||
[
|
||||
Err(
|
||||
[
|
||||
ScanError {
|
||||
location: Location {
|
||||
file: "src/statement.sql",
|
||||
|
|
@ -19,8 +20,11 @@ expression: scanerrors
|
|||
offset: 7,
|
||||
length: 1,
|
||||
},
|
||||
kind: UnexpectedChar(
|
||||
'+',
|
||||
kind: ParseIntError(
|
||||
ParseIntError {
|
||||
kind: InvalidDigit,
|
||||
},
|
||||
),
|
||||
},
|
||||
]
|
||||
],
|
||||
)
|
||||
|
|
|
|||
28
src/snapshots/osdb__tokens__tests__tokenizer_integers-2.snap
Normal file
28
src/snapshots/osdb__tokens__tests__tokenizer_integers-2.snap
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
---
|
||||
source: src/tokens.rs
|
||||
expression: "tokenize(\"-10\".to_string(), \"src/ints.sql\".to_string(),)"
|
||||
---
|
||||
Ok(
|
||||
[
|
||||
Token {
|
||||
location: Location {
|
||||
file: "src/ints.sql",
|
||||
offset: 0,
|
||||
length: 3,
|
||||
},
|
||||
data: Int(
|
||||
-10,
|
||||
),
|
||||
lexeme: "-10",
|
||||
},
|
||||
Token {
|
||||
location: Location {
|
||||
file: "src/ints.sql",
|
||||
offset: 3,
|
||||
length: 0,
|
||||
},
|
||||
data: EndOfFile,
|
||||
lexeme: "",
|
||||
},
|
||||
],
|
||||
)
|
||||
28
src/snapshots/osdb__tokens__tests__tokenizer_integers-3.snap
Normal file
28
src/snapshots/osdb__tokens__tests__tokenizer_integers-3.snap
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
---
|
||||
source: src/tokens.rs
|
||||
expression: "tokenize(\"0\".to_string(), \"src/ints.sql\".to_string(),)"
|
||||
---
|
||||
Ok(
|
||||
[
|
||||
Token {
|
||||
location: Location {
|
||||
file: "src/ints.sql",
|
||||
offset: 0,
|
||||
length: 1,
|
||||
},
|
||||
data: Int(
|
||||
0,
|
||||
),
|
||||
lexeme: "0",
|
||||
},
|
||||
Token {
|
||||
location: Location {
|
||||
file: "src/ints.sql",
|
||||
offset: 1,
|
||||
length: 0,
|
||||
},
|
||||
data: EndOfFile,
|
||||
lexeme: "",
|
||||
},
|
||||
],
|
||||
)
|
||||
28
src/snapshots/osdb__tokens__tests__tokenizer_integers-4.snap
Normal file
28
src/snapshots/osdb__tokens__tests__tokenizer_integers-4.snap
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
---
|
||||
source: src/tokens.rs
|
||||
expression: "tokenize(\"-0\".to_string(), \"src/ints.sql\".to_string(),)"
|
||||
---
|
||||
Ok(
|
||||
[
|
||||
Token {
|
||||
location: Location {
|
||||
file: "src/ints.sql",
|
||||
offset: 0,
|
||||
length: 2,
|
||||
},
|
||||
data: Int(
|
||||
0,
|
||||
),
|
||||
lexeme: "-0",
|
||||
},
|
||||
Token {
|
||||
location: Location {
|
||||
file: "src/ints.sql",
|
||||
offset: 2,
|
||||
length: 0,
|
||||
},
|
||||
data: EndOfFile,
|
||||
lexeme: "",
|
||||
},
|
||||
],
|
||||
)
|
||||
20
src/snapshots/osdb__tokens__tests__tokenizer_integers-5.snap
Normal file
20
src/snapshots/osdb__tokens__tests__tokenizer_integers-5.snap
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
---
|
||||
source: src/tokens.rs
|
||||
expression: "tokenize(\"--0\".to_string(), \"src/ints.sql\".to_string(),)"
|
||||
---
|
||||
Err(
|
||||
[
|
||||
ScanError {
|
||||
location: Location {
|
||||
file: "src/ints.sql",
|
||||
offset: 0,
|
||||
length: 3,
|
||||
},
|
||||
kind: ParseIntError(
|
||||
ParseIntError {
|
||||
kind: InvalidDigit,
|
||||
},
|
||||
),
|
||||
},
|
||||
],
|
||||
)
|
||||
20
src/snapshots/osdb__tokens__tests__tokenizer_integers-6.snap
Normal file
20
src/snapshots/osdb__tokens__tests__tokenizer_integers-6.snap
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
---
|
||||
source: src/tokens.rs
|
||||
expression: "tokenize(\"++0\".to_string(), \"src/ints.sql\".to_string(),)"
|
||||
---
|
||||
Err(
|
||||
[
|
||||
ScanError {
|
||||
location: Location {
|
||||
file: "src/ints.sql",
|
||||
offset: 0,
|
||||
length: 3,
|
||||
},
|
||||
kind: ParseIntError(
|
||||
ParseIntError {
|
||||
kind: InvalidDigit,
|
||||
},
|
||||
),
|
||||
},
|
||||
],
|
||||
)
|
||||
20
src/snapshots/osdb__tokens__tests__tokenizer_integers-7.snap
Normal file
20
src/snapshots/osdb__tokens__tests__tokenizer_integers-7.snap
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
---
|
||||
source: src/tokens.rs
|
||||
expression: "tokenize(\"-\".to_string(), \"src/ints.sql\".to_string(),)"
|
||||
---
|
||||
Err(
|
||||
[
|
||||
ScanError {
|
||||
location: Location {
|
||||
file: "src/ints.sql",
|
||||
offset: 0,
|
||||
length: 1,
|
||||
},
|
||||
kind: ParseIntError(
|
||||
ParseIntError {
|
||||
kind: InvalidDigit,
|
||||
},
|
||||
),
|
||||
},
|
||||
],
|
||||
)
|
||||
20
src/snapshots/osdb__tokens__tests__tokenizer_integers-8.snap
Normal file
20
src/snapshots/osdb__tokens__tests__tokenizer_integers-8.snap
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
---
|
||||
source: src/tokens.rs
|
||||
expression: "tokenize(\"+\".to_string(), \"src/ints.sql\".to_string(),)"
|
||||
---
|
||||
Err(
|
||||
[
|
||||
ScanError {
|
||||
location: Location {
|
||||
file: "src/ints.sql",
|
||||
offset: 0,
|
||||
length: 1,
|
||||
},
|
||||
kind: ParseIntError(
|
||||
ParseIntError {
|
||||
kind: InvalidDigit,
|
||||
},
|
||||
),
|
||||
},
|
||||
],
|
||||
)
|
||||
28
src/snapshots/osdb__tokens__tests__tokenizer_integers.snap
Normal file
28
src/snapshots/osdb__tokens__tests__tokenizer_integers.snap
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
---
|
||||
source: src/tokens.rs
|
||||
expression: "tokenize(\"10\".to_string(), \"src/ints.sql\".to_string(),)"
|
||||
---
|
||||
Ok(
|
||||
[
|
||||
Token {
|
||||
location: Location {
|
||||
file: "src/ints.sql",
|
||||
offset: 0,
|
||||
length: 2,
|
||||
},
|
||||
data: Int(
|
||||
10,
|
||||
),
|
||||
lexeme: "10",
|
||||
},
|
||||
Token {
|
||||
location: Location {
|
||||
file: "src/ints.sql",
|
||||
offset: 2,
|
||||
length: 0,
|
||||
},
|
||||
data: EndOfFile,
|
||||
lexeme: "",
|
||||
},
|
||||
],
|
||||
)
|
||||
|
|
@ -55,6 +55,20 @@ pub struct Token {
|
|||
pub lexeme: String,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for Token {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
match &self.data {
|
||||
TokenData::Insert => write!(f, "insert statement"),
|
||||
TokenData::Select => write!(f, "select statement"),
|
||||
TokenData::MetaCommand(x) => write!(f, "meta-command {x}"),
|
||||
TokenData::EndOfFile => write!(f, "end of file"),
|
||||
TokenData::Int(x) => write!(f, "integer {x}"),
|
||||
}?;
|
||||
let lexeme = &self.lexeme;
|
||||
write!(f, " {lexeme:?}")
|
||||
}
|
||||
}
|
||||
|
||||
struct Tokenizer {
|
||||
input: String,
|
||||
file: String,
|
||||
|
|
@ -68,6 +82,7 @@ pub enum ScanErrorKind {
|
|||
UnexpectedEndOfInput,
|
||||
UnknownKeyword(String),
|
||||
UnknownMetaCommand(String),
|
||||
ParseIntError(std::num::ParseIntError),
|
||||
}
|
||||
|
||||
impl std::fmt::Display for ScanErrorKind {
|
||||
|
|
@ -77,6 +92,7 @@ impl std::fmt::Display for ScanErrorKind {
|
|||
ScanErrorKind::UnexpectedEndOfInput => write!(f, "unexpected end of input"),
|
||||
ScanErrorKind::UnknownKeyword(x) => write!(f, "unknown keyword: {x:?}"),
|
||||
ScanErrorKind::UnknownMetaCommand(x) => write!(f, "unknown meta-command: {x:?}"),
|
||||
ScanErrorKind::ParseIntError(x) => write!(f, "failed to parse integer: {x}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -207,6 +223,40 @@ impl Tokenizer {
|
|||
c.is_alphanumeric() || c == '_'
|
||||
}
|
||||
|
||||
fn digit(c: char) -> bool {
|
||||
c.is_ascii_digit() || c == '-' || c == '+'
|
||||
}
|
||||
|
||||
fn scan_integer(&mut self) -> Result<Token, ScanError> {
|
||||
let start_offset = self.offset;
|
||||
let mut word = String::new();
|
||||
let mut length = 0;
|
||||
if let Some(c) = self.advance() {
|
||||
word.push(c);
|
||||
length += 1;
|
||||
}
|
||||
while let Some(c) = self.peek() {
|
||||
if Self::digit(c) {
|
||||
word.push(c);
|
||||
self.advance();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
length += 1;
|
||||
}
|
||||
match word.parse::<i64>() {
|
||||
Ok(int) => Ok(Token {
|
||||
location: Location::new(self.file.clone(), start_offset, length),
|
||||
data: TokenData::Int(int),
|
||||
lexeme: word,
|
||||
}),
|
||||
Err(e) => Err(ScanError {
|
||||
location: Location::new(self.file.clone(), start_offset, length),
|
||||
kind: ScanErrorKind::ParseIntError(e),
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
fn scan_token(&mut self) -> Result<Option<Token>, ScanError> {
|
||||
loop {
|
||||
if let Some(c) = self.peek() {
|
||||
|
|
@ -214,6 +264,8 @@ impl Tokenizer {
|
|||
return self.scan_identifier_or_keyword().map(Some);
|
||||
} else if c == '.' {
|
||||
return self.scan_meta_command().map(Some);
|
||||
} else if Self::digit(c) {
|
||||
return self.scan_integer().map(Some);
|
||||
} else if c.is_whitespace() {
|
||||
self.advance();
|
||||
} else {
|
||||
|
|
@ -308,9 +360,21 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn test_tokenizer_errors() {
|
||||
let scanerrors = tokenize("salact +".to_string(), "src/statement.sql".to_string())
|
||||
.err()
|
||||
.unwrap();
|
||||
assert_debug_snapshot!(scanerrors);
|
||||
assert_debug_snapshot!(tokenize(
|
||||
"salact +".to_string(),
|
||||
"src/statement.sql".to_string()
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tokenizer_integers() {
|
||||
assert_debug_snapshot!(tokenize("10".to_string(), "src/ints.sql".to_string(),));
|
||||
assert_debug_snapshot!(tokenize("-10".to_string(), "src/ints.sql".to_string(),));
|
||||
assert_debug_snapshot!(tokenize("0".to_string(), "src/ints.sql".to_string(),));
|
||||
assert_debug_snapshot!(tokenize("-0".to_string(), "src/ints.sql".to_string(),));
|
||||
assert_debug_snapshot!(tokenize("--0".to_string(), "src/ints.sql".to_string(),));
|
||||
assert_debug_snapshot!(tokenize("++0".to_string(), "src/ints.sql".to_string(),));
|
||||
assert_debug_snapshot!(tokenize("-".to_string(), "src/ints.sql".to_string(),));
|
||||
assert_debug_snapshot!(tokenize("+".to_string(), "src/ints.sql".to_string(),));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue