diff --git a/notes.org b/notes.org index 1d0c993..8813f59 100644 --- a/notes.org +++ b/notes.org @@ -214,6 +214,10 @@ i will use rustyline, since it seems like the most feature-complete * STRT parse integers +** TODO Function to get a token until condition is false + +** TODO Parse the integer + * TODO parse strings * WAIT cli tests using insta-cmd diff --git a/src/error_display.rs b/src/error_display.rs index 5297abb..cd303b6 100644 --- a/src/error_display.rs +++ b/src/error_display.rs @@ -11,7 +11,20 @@ impl OSDBError for CommandParseError { CommandParseError::Scan(x) => { x.display(file, input); } - _ => todo!(), + CommandParseError::UnexpectedToken(token, items) => { + let location = (file, Into::>::into(&token.location)); + Report::build(ReportKind::Error, location.clone()) + .with_message("unexpected token") + .with_label( + Label::new(location.clone()) + .with_color(Color::Red) + .with_message(format!("found {token}")), + ) + .with_note(format!("expected token type to be one of {items:?}")) + .finish() + .print((file, Source::from(input))) + .unwrap() + } } } } @@ -26,7 +39,6 @@ impl OSDBError for ScanError { .with_color(Color::Red) .with_message(format!("{self}")), ) - .with_help("Make sure you don't have any typos or unexpected characters.") .finish() .print((file, Source::from(input))) .unwrap(); diff --git a/src/meta_commands.rs b/src/meta_commands.rs index 898776c..59ff37f 100644 --- a/src/meta_commands.rs +++ b/src/meta_commands.rs @@ -3,6 +3,14 @@ pub enum MetaCommand { Exit, } +impl std::fmt::Display for MetaCommand { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match self { + MetaCommand::Exit => write!(f, "exit"), + } + } +} + pub struct MetaCommandExecuteResult { pub should_exit: bool, } diff --git a/src/snapshots/osdb__tokens__tests__tokenizer_errors.snap b/src/snapshots/osdb__tokens__tests__tokenizer_errors.snap index b5886f1..347e0ca 100644 --- a/src/snapshots/osdb__tokens__tests__tokenizer_errors.snap +++ b/src/snapshots/osdb__tokens__tests__tokenizer_errors.snap @@ -1,26 +1,30 @@ --- source: src/tokens.rs -expression: scanerrors +expression: "tokenize(\"salact +\".to_string(), \"src/statement.sql\".to_string())" --- -[ - ScanError { - location: Location { - file: "src/statement.sql", - offset: 0, - length: 6, +Err( + [ + ScanError { + location: Location { + file: "src/statement.sql", + offset: 0, + length: 6, + }, + kind: UnknownKeyword( + "salact", + ), }, - kind: UnknownKeyword( - "salact", - ), - }, - ScanError { - location: Location { - file: "src/statement.sql", - offset: 7, - length: 1, + ScanError { + location: Location { + file: "src/statement.sql", + offset: 7, + length: 1, + }, + kind: ParseIntError( + ParseIntError { + kind: InvalidDigit, + }, + ), }, - kind: UnexpectedChar( - '+', - ), - }, -] + ], +) diff --git a/src/snapshots/osdb__tokens__tests__tokenizer_integers-2.snap b/src/snapshots/osdb__tokens__tests__tokenizer_integers-2.snap new file mode 100644 index 0000000..9b27ecf --- /dev/null +++ b/src/snapshots/osdb__tokens__tests__tokenizer_integers-2.snap @@ -0,0 +1,28 @@ +--- +source: src/tokens.rs +expression: "tokenize(\"-10\".to_string(), \"src/ints.sql\".to_string(),)" +--- +Ok( + [ + Token { + location: Location { + file: "src/ints.sql", + offset: 0, + length: 3, + }, + data: Int( + -10, + ), + lexeme: "-10", + }, + Token { + location: Location { + file: "src/ints.sql", + offset: 3, + length: 0, + }, + data: EndOfFile, + lexeme: "", + }, + ], +) diff --git a/src/snapshots/osdb__tokens__tests__tokenizer_integers-3.snap b/src/snapshots/osdb__tokens__tests__tokenizer_integers-3.snap new file mode 100644 index 0000000..d125fbb --- /dev/null +++ b/src/snapshots/osdb__tokens__tests__tokenizer_integers-3.snap @@ -0,0 +1,28 @@ +--- +source: src/tokens.rs +expression: "tokenize(\"0\".to_string(), \"src/ints.sql\".to_string(),)" +--- +Ok( + [ + Token { + location: Location { + file: "src/ints.sql", + offset: 0, + length: 1, + }, + data: Int( + 0, + ), + lexeme: "0", + }, + Token { + location: Location { + file: "src/ints.sql", + offset: 1, + length: 0, + }, + data: EndOfFile, + lexeme: "", + }, + ], +) diff --git a/src/snapshots/osdb__tokens__tests__tokenizer_integers-4.snap b/src/snapshots/osdb__tokens__tests__tokenizer_integers-4.snap new file mode 100644 index 0000000..ea93fa4 --- /dev/null +++ b/src/snapshots/osdb__tokens__tests__tokenizer_integers-4.snap @@ -0,0 +1,28 @@ +--- +source: src/tokens.rs +expression: "tokenize(\"-0\".to_string(), \"src/ints.sql\".to_string(),)" +--- +Ok( + [ + Token { + location: Location { + file: "src/ints.sql", + offset: 0, + length: 2, + }, + data: Int( + 0, + ), + lexeme: "-0", + }, + Token { + location: Location { + file: "src/ints.sql", + offset: 2, + length: 0, + }, + data: EndOfFile, + lexeme: "", + }, + ], +) diff --git a/src/snapshots/osdb__tokens__tests__tokenizer_integers-5.snap b/src/snapshots/osdb__tokens__tests__tokenizer_integers-5.snap new file mode 100644 index 0000000..31bb3b8 --- /dev/null +++ b/src/snapshots/osdb__tokens__tests__tokenizer_integers-5.snap @@ -0,0 +1,20 @@ +--- +source: src/tokens.rs +expression: "tokenize(\"--0\".to_string(), \"src/ints.sql\".to_string(),)" +--- +Err( + [ + ScanError { + location: Location { + file: "src/ints.sql", + offset: 0, + length: 3, + }, + kind: ParseIntError( + ParseIntError { + kind: InvalidDigit, + }, + ), + }, + ], +) diff --git a/src/snapshots/osdb__tokens__tests__tokenizer_integers-6.snap b/src/snapshots/osdb__tokens__tests__tokenizer_integers-6.snap new file mode 100644 index 0000000..75c6381 --- /dev/null +++ b/src/snapshots/osdb__tokens__tests__tokenizer_integers-6.snap @@ -0,0 +1,20 @@ +--- +source: src/tokens.rs +expression: "tokenize(\"++0\".to_string(), \"src/ints.sql\".to_string(),)" +--- +Err( + [ + ScanError { + location: Location { + file: "src/ints.sql", + offset: 0, + length: 3, + }, + kind: ParseIntError( + ParseIntError { + kind: InvalidDigit, + }, + ), + }, + ], +) diff --git a/src/snapshots/osdb__tokens__tests__tokenizer_integers-7.snap b/src/snapshots/osdb__tokens__tests__tokenizer_integers-7.snap new file mode 100644 index 0000000..daf026f --- /dev/null +++ b/src/snapshots/osdb__tokens__tests__tokenizer_integers-7.snap @@ -0,0 +1,20 @@ +--- +source: src/tokens.rs +expression: "tokenize(\"-\".to_string(), \"src/ints.sql\".to_string(),)" +--- +Err( + [ + ScanError { + location: Location { + file: "src/ints.sql", + offset: 0, + length: 1, + }, + kind: ParseIntError( + ParseIntError { + kind: InvalidDigit, + }, + ), + }, + ], +) diff --git a/src/snapshots/osdb__tokens__tests__tokenizer_integers-8.snap b/src/snapshots/osdb__tokens__tests__tokenizer_integers-8.snap new file mode 100644 index 0000000..43bb23c --- /dev/null +++ b/src/snapshots/osdb__tokens__tests__tokenizer_integers-8.snap @@ -0,0 +1,20 @@ +--- +source: src/tokens.rs +expression: "tokenize(\"+\".to_string(), \"src/ints.sql\".to_string(),)" +--- +Err( + [ + ScanError { + location: Location { + file: "src/ints.sql", + offset: 0, + length: 1, + }, + kind: ParseIntError( + ParseIntError { + kind: InvalidDigit, + }, + ), + }, + ], +) diff --git a/src/snapshots/osdb__tokens__tests__tokenizer_integers.snap b/src/snapshots/osdb__tokens__tests__tokenizer_integers.snap new file mode 100644 index 0000000..96d83a3 --- /dev/null +++ b/src/snapshots/osdb__tokens__tests__tokenizer_integers.snap @@ -0,0 +1,28 @@ +--- +source: src/tokens.rs +expression: "tokenize(\"10\".to_string(), \"src/ints.sql\".to_string(),)" +--- +Ok( + [ + Token { + location: Location { + file: "src/ints.sql", + offset: 0, + length: 2, + }, + data: Int( + 10, + ), + lexeme: "10", + }, + Token { + location: Location { + file: "src/ints.sql", + offset: 2, + length: 0, + }, + data: EndOfFile, + lexeme: "", + }, + ], +) diff --git a/src/tokens.rs b/src/tokens.rs index 958a254..7c44906 100644 --- a/src/tokens.rs +++ b/src/tokens.rs @@ -55,6 +55,20 @@ pub struct Token { pub lexeme: String, } +impl std::fmt::Display for Token { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match &self.data { + TokenData::Insert => write!(f, "insert statement"), + TokenData::Select => write!(f, "select statement"), + TokenData::MetaCommand(x) => write!(f, "meta-command {x}"), + TokenData::EndOfFile => write!(f, "end of file"), + TokenData::Int(x) => write!(f, "integer {x}"), + }?; + let lexeme = &self.lexeme; + write!(f, " {lexeme:?}") + } +} + struct Tokenizer { input: String, file: String, @@ -68,6 +82,7 @@ pub enum ScanErrorKind { UnexpectedEndOfInput, UnknownKeyword(String), UnknownMetaCommand(String), + ParseIntError(std::num::ParseIntError), } impl std::fmt::Display for ScanErrorKind { @@ -77,6 +92,7 @@ impl std::fmt::Display for ScanErrorKind { ScanErrorKind::UnexpectedEndOfInput => write!(f, "unexpected end of input"), ScanErrorKind::UnknownKeyword(x) => write!(f, "unknown keyword: {x:?}"), ScanErrorKind::UnknownMetaCommand(x) => write!(f, "unknown meta-command: {x:?}"), + ScanErrorKind::ParseIntError(x) => write!(f, "failed to parse integer: {x}"), } } } @@ -207,6 +223,40 @@ impl Tokenizer { c.is_alphanumeric() || c == '_' } + fn digit(c: char) -> bool { + c.is_ascii_digit() || c == '-' || c == '+' + } + + fn scan_integer(&mut self) -> Result { + let start_offset = self.offset; + let mut word = String::new(); + let mut length = 0; + if let Some(c) = self.advance() { + word.push(c); + length += 1; + } + while let Some(c) = self.peek() { + if Self::digit(c) { + word.push(c); + self.advance(); + } else { + break; + } + length += 1; + } + match word.parse::() { + Ok(int) => Ok(Token { + location: Location::new(self.file.clone(), start_offset, length), + data: TokenData::Int(int), + lexeme: word, + }), + Err(e) => Err(ScanError { + location: Location::new(self.file.clone(), start_offset, length), + kind: ScanErrorKind::ParseIntError(e), + }), + } + } + fn scan_token(&mut self) -> Result, ScanError> { loop { if let Some(c) = self.peek() { @@ -214,6 +264,8 @@ impl Tokenizer { return self.scan_identifier_or_keyword().map(Some); } else if c == '.' { return self.scan_meta_command().map(Some); + } else if Self::digit(c) { + return self.scan_integer().map(Some); } else if c.is_whitespace() { self.advance(); } else { @@ -308,9 +360,21 @@ mod tests { #[test] fn test_tokenizer_errors() { - let scanerrors = tokenize("salact +".to_string(), "src/statement.sql".to_string()) - .err() - .unwrap(); - assert_debug_snapshot!(scanerrors); + assert_debug_snapshot!(tokenize( + "salact +".to_string(), + "src/statement.sql".to_string() + )); + } + + #[test] + fn test_tokenizer_integers() { + assert_debug_snapshot!(tokenize("10".to_string(), "src/ints.sql".to_string(),)); + assert_debug_snapshot!(tokenize("-10".to_string(), "src/ints.sql".to_string(),)); + assert_debug_snapshot!(tokenize("0".to_string(), "src/ints.sql".to_string(),)); + assert_debug_snapshot!(tokenize("-0".to_string(), "src/ints.sql".to_string(),)); + assert_debug_snapshot!(tokenize("--0".to_string(), "src/ints.sql".to_string(),)); + assert_debug_snapshot!(tokenize("++0".to_string(), "src/ints.sql".to_string(),)); + assert_debug_snapshot!(tokenize("-".to_string(), "src/ints.sql".to_string(),)); + assert_debug_snapshot!(tokenize("+".to_string(), "src/ints.sql".to_string(),)); } }