From 315703d46b3c163238e51458ca0e330f08e76a9a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kha=C3=AFs=20COLIN?= Date: Sat, 31 May 2025 15:59:58 +0200 Subject: [PATCH 1/6] feat(meta): version command --- grammar.ebnf | 1 + notes.org | 4 ++-- src/branding.rs | 10 ++++++++-- src/meta_commands.rs | 12 +++++++++--- src/tokens.rs | 1 + 5 files changed, 21 insertions(+), 7 deletions(-) diff --git a/grammar.ebnf b/grammar.ebnf index e979f99..4fefa7b 100644 --- a/grammar.ebnf +++ b/grammar.ebnf @@ -10,6 +10,7 @@ select ::= "select" meta-command ::= "." "exit" | "about" + | "version" int ::= sign? digit+ sign ::= "+" diff --git a/notes.org b/notes.org index 76423a1..c03ef68 100644 --- a/notes.org +++ b/notes.org @@ -226,8 +226,8 @@ i will use rustyline, since it seems like the most feature-complete * DONE write a proper grammar -* TODO .about meta-command -* TODO .version meta-command +* DONE .about meta-command +* DONE .version meta-command * TODO .license meta-command * TODO .help meta-command diff --git a/src/branding.rs b/src/branding.rs index b4418d7..84e1e54 100644 --- a/src/branding.rs +++ b/src/branding.rs @@ -13,12 +13,18 @@ pub fn startup_msg() -> String { ) } -pub fn about_msg() -> String { +pub fn version_msg() -> String { let name = env!("CARGO_PKG_NAME"); let version = env!("CARGO_PKG_VERSION"); + format!("{name} v{version}") +} + +pub fn about_msg() -> String { + let version = version_msg(); + format!( - "{name} v{version} -- A database engine\n\ + "{version} -- A database engine\n\ Note: This is experimental software. No maintenance is intendend." ) } diff --git a/src/meta_commands.rs b/src/meta_commands.rs index d93a079..1bcc2b9 100644 --- a/src/meta_commands.rs +++ b/src/meta_commands.rs @@ -4,6 +4,7 @@ use crate::branding; pub enum MetaCommand { Exit, About, + Version, } impl std::fmt::Display for MetaCommand { @@ -11,6 +12,7 @@ impl std::fmt::Display for MetaCommand { match self { MetaCommand::Exit => write!(f, "exit"), MetaCommand::About => write!(f, "about"), + MetaCommand::Version => write!(f, "version"), } } } @@ -24,9 +26,13 @@ impl MetaCommand { match self { MetaCommand::Exit => MetaCommandExecuteResult { should_exit: true }, MetaCommand::About => { - print!("{}", branding::about_msg()); - MetaCommandExecuteResult { should_exit: false } - } + print!("{}", branding::about_msg()); + MetaCommandExecuteResult { should_exit: false } + } + MetaCommand::Version => { + print!("{}", branding::version_msg()); + MetaCommandExecuteResult { should_exit: false } + }, } } } diff --git a/src/tokens.rs b/src/tokens.rs index 2c3464b..a1a850b 100644 --- a/src/tokens.rs +++ b/src/tokens.rs @@ -161,6 +161,7 @@ impl Tokenizer { match word.to_lowercase().as_str() { ".exit" => Some(TokenData::MetaCommand(MetaCommand::Exit)), ".about" => Some(TokenData::MetaCommand(MetaCommand::About)), + ".version" => Some(TokenData::MetaCommand(MetaCommand::Version)), _ => None, } } From 28cb288eafdb5564decd813a5456326cdeca685d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kha=C3=AFs=20COLIN?= Date: Sat, 31 May 2025 16:08:42 +0200 Subject: [PATCH 2/6] feat(parser): implement insert command parsing Implements the parse_insert_command function to handle the insert statement according to the grammar definition. The function now correctly parses the command format insert int string string and creates a structured Statement::Insert with the id, username, and email fields. --- grammar.ebnf | 7 ++ notes.org | 2 + src/command.rs | 7 +- src/meta_commands.rs | 10 +- src/parser.rs | 102 +++++++++++++++++- ...mand__tests__execute_insert_statement.snap | 2 +- ...parser__tests__parse_insert_command-2.snap | 60 +++++++++++ ...parser__tests__parse_insert_command-3.snap | 22 ++++ ...__parser__tests__parse_insert_command.snap | 15 +++ ...parser__tests__parse_multiple_correct.snap | 5 +- src/statements.rs | 14 ++- src/tokens.rs | 2 +- 12 files changed, 228 insertions(+), 20 deletions(-) create mode 100644 src/snapshots/osdb__parser__tests__parse_insert_command-2.snap create mode 100644 src/snapshots/osdb__parser__tests__parse_insert_command-3.snap create mode 100644 src/snapshots/osdb__parser__tests__parse_insert_command.snap diff --git a/grammar.ebnf b/grammar.ebnf index 4fefa7b..60e3641 100644 --- a/grammar.ebnf +++ b/grammar.ebnf @@ -1,3 +1,4 @@ +/* token is first stage of parsing */ token ::= insert | select | meta-command @@ -5,6 +6,12 @@ token ::= insert | string | end-of-file +/* command is second stage of parsing */ +command ::= cmd-insert + | cmd-select +cmd-insert ::= insert int string string +cmd-select ::= select + insert ::= "insert" select ::= "select" diff --git a/notes.org b/notes.org index c03ef68..223e8e9 100644 --- a/notes.org +++ b/notes.org @@ -235,6 +235,8 @@ i will use rustyline, since it seems like the most feature-complete insert ** TODO Row struct ** TODO parse row insert +** TODO separate statements with semicolons +** TODO in case of parse error, skip until next semicolon to better recover ** TODO serialize/deserialize row to/from raw bytes *** TODO look for best practices for creating binary formats diff --git a/src/command.rs b/src/command.rs index 75c2792..d19aeaa 100644 --- a/src/command.rs +++ b/src/command.rs @@ -80,7 +80,12 @@ mod tests { #[test] fn test_execute_insert_statement() { - let statement: Command = Statement::Insert.into(); + let statement: Command = Statement::Insert { + id: 45, + username: String::from("user"), + email: String::from("user@example.org"), + } + .into(); let result = statement.execute().display(); assert_snapshot!(result); } diff --git a/src/meta_commands.rs b/src/meta_commands.rs index 1bcc2b9..135e1ab 100644 --- a/src/meta_commands.rs +++ b/src/meta_commands.rs @@ -26,13 +26,13 @@ impl MetaCommand { match self { MetaCommand::Exit => MetaCommandExecuteResult { should_exit: true }, MetaCommand::About => { - print!("{}", branding::about_msg()); - MetaCommandExecuteResult { should_exit: false } - } + print!("{}", branding::about_msg()); + MetaCommandExecuteResult { should_exit: false } + } MetaCommand::Version => { print!("{}", branding::version_msg()); - MetaCommandExecuteResult { should_exit: false } - }, + MetaCommandExecuteResult { should_exit: false } + } } } } diff --git a/src/parser.rs b/src/parser.rs index 7cff3b0..bd6b087 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -3,7 +3,7 @@ use std::collections::VecDeque; use crate::{ command::{Command, CommandParseError}, statements::Statement, - tokens::tokenize, + tokens::{Location, Token, TokenData, tokenize}, }; pub fn parse(file: String, input: String) -> Result, Vec> { @@ -14,7 +14,10 @@ pub fn parse(file: String, input: String) -> Result, Vec cmds.push(Command::Statement(Statement::Insert)), + crate::tokens::TokenData::Insert => match parse_insert_command(&mut tokens) { + Ok(cmd) => cmds.push(cmd), + Err(err) => errs.push(err), + }, crate::tokens::TokenData::Select => cmds.push(Command::Statement(Statement::Select)), crate::tokens::TokenData::MetaCommand(meta_command) => { cmds.push(Command::MetaCommand(meta_command)) @@ -33,6 +36,83 @@ pub fn parse(file: String, input: String) -> Result, Vec, +) -> Result { + // According to grammar.ebnf, insert command should be: insert int string string + + // Parse the id (integer) + let id_token = tokens.pop_front().ok_or_else(|| { + CommandParseError::UnexpectedToken( + Token { + location: tokens.back().map_or_else( + || Location::new(String::from(""), 0, 0), + |t| t.location.clone(), + ), + data: TokenData::EndOfFile, + lexeme: String::new(), + }, + &["integer"], + ) + })?; + + let id = match id_token.data { + TokenData::Int(id) => id, + _ => return Err(CommandParseError::UnexpectedToken(id_token, &["integer"])), + }; + + // Parse the username (string) + let username_token = tokens.pop_front().ok_or_else(|| { + CommandParseError::UnexpectedToken( + Token { + location: tokens.back().map_or_else( + || Location::new(String::from(""), 0, 0), + |t| t.location.clone(), + ), + data: TokenData::EndOfFile, + lexeme: String::new(), + }, + &["string"], + ) + })?; + + let username = match username_token.data { + TokenData::String(username) => username, + _ => { + return Err(CommandParseError::UnexpectedToken( + username_token, + &["string"], + )); + } + }; + + // Parse the email (string) + let email_token = tokens.pop_front().ok_or_else(|| { + CommandParseError::UnexpectedToken( + Token { + location: tokens.back().map_or_else( + || Location::new(String::from(""), 0, 0), + |t| t.location.clone(), + ), + data: TokenData::EndOfFile, + lexeme: String::new(), + }, + &["string"], + ) + })?; + + let email = match email_token.data { + TokenData::String(email) => email, + _ => return Err(CommandParseError::UnexpectedToken(email_token, &["string"])), + }; + + Ok(Command::Statement(Statement::Insert { + id, + username, + email, + })) +} + #[cfg(test)] mod tests { use super::*; @@ -44,8 +124,20 @@ mod tests { assert_debug_snapshot!(parse(file.clone(), String::from(".exit"))); assert_debug_snapshot!(parse(file.clone(), String::from("select"))); assert_debug_snapshot!(parse(file.clone(), String::from("sElEcT"))); - assert_debug_snapshot!(parse(file.clone(), String::from("INSERT"))); - assert_debug_snapshot!(parse(file.clone(), String::from("InSErT"))); + } + + #[test] + fn test_parse_insert_command() { + let file = String::from(""); + assert_debug_snapshot!(parse( + file.clone(), + String::from(r#"insert 1 "username" "email@example.com""#) + )); + assert_debug_snapshot!(parse( + file.clone(), + String::from(r#"insert "not_an_id" "username" "email@example.com""#) + )); + assert_debug_snapshot!(parse(file.clone(), String::from(r#"insert 1 "username""#))); } #[test] @@ -63,7 +155,7 @@ mod tests { let file = String::from(""); assert_debug_snapshot!(parse( file.clone(), - String::from(".exit select select insert select") + String::from(".exit select select select") )); } diff --git a/src/snapshots/osdb__command__tests__execute_insert_statement.snap b/src/snapshots/osdb__command__tests__execute_insert_statement.snap index 89809d2..aaea9a0 100644 --- a/src/snapshots/osdb__command__tests__execute_insert_statement.snap +++ b/src/snapshots/osdb__command__tests__execute_insert_statement.snap @@ -2,4 +2,4 @@ source: src/command.rs expression: result --- -insert +insert 45 "user" "user@example.org" diff --git a/src/snapshots/osdb__parser__tests__parse_insert_command-2.snap b/src/snapshots/osdb__parser__tests__parse_insert_command-2.snap new file mode 100644 index 0000000..8582c85 --- /dev/null +++ b/src/snapshots/osdb__parser__tests__parse_insert_command-2.snap @@ -0,0 +1,60 @@ +--- +source: src/parser.rs +expression: "parse(file.clone(),\nString::from(r#\"insert \"not_an_id\" \"username\" \"email@example.com\"\"#))" +--- +Err( + [ + UnexpectedToken( + Token { + location: Location { + file: "", + offset: 7, + length: 11, + }, + data: String( + "not_an_id", + ), + lexeme: "\"not_an_id\"", + }, + [ + "integer", + ], + ), + UnexpectedToken( + Token { + location: Location { + file: "", + offset: 19, + length: 10, + }, + data: String( + "username", + ), + lexeme: "\"username\"", + }, + [ + "statement", + "meta command", + "eof", + ], + ), + UnexpectedToken( + Token { + location: Location { + file: "", + offset: 30, + length: 19, + }, + data: String( + "email@example.com", + ), + lexeme: "\"email@example.com\"", + }, + [ + "statement", + "meta command", + "eof", + ], + ), + ], +) diff --git a/src/snapshots/osdb__parser__tests__parse_insert_command-3.snap b/src/snapshots/osdb__parser__tests__parse_insert_command-3.snap new file mode 100644 index 0000000..dc13e7a --- /dev/null +++ b/src/snapshots/osdb__parser__tests__parse_insert_command-3.snap @@ -0,0 +1,22 @@ +--- +source: src/parser.rs +expression: "parse(file.clone(), String::from(r#\"insert 1 \"username\"\"#))" +--- +Err( + [ + UnexpectedToken( + Token { + location: Location { + file: "", + offset: 19, + length: 0, + }, + data: EndOfFile, + lexeme: "", + }, + [ + "string", + ], + ), + ], +) diff --git a/src/snapshots/osdb__parser__tests__parse_insert_command.snap b/src/snapshots/osdb__parser__tests__parse_insert_command.snap new file mode 100644 index 0000000..954c83e --- /dev/null +++ b/src/snapshots/osdb__parser__tests__parse_insert_command.snap @@ -0,0 +1,15 @@ +--- +source: src/parser.rs +expression: "parse(file.clone(),\nString::from(r#\"insert 1 \"username\" \"email@example.com\"\"#))" +--- +Ok( + [ + Statement( + Insert { + id: 1, + username: "username", + email: "email@example.com", + }, + ), + ], +) diff --git a/src/snapshots/osdb__parser__tests__parse_multiple_correct.snap b/src/snapshots/osdb__parser__tests__parse_multiple_correct.snap index a90d0ae..4642731 100644 --- a/src/snapshots/osdb__parser__tests__parse_multiple_correct.snap +++ b/src/snapshots/osdb__parser__tests__parse_multiple_correct.snap @@ -1,6 +1,6 @@ --- source: src/parser.rs -expression: "parse(file.clone(), String::from(\".exit select select insert select\"))" +expression: "parse(file.clone(), String::from(\".exit select select select\"))" --- Ok( [ @@ -13,9 +13,6 @@ Ok( Statement( Select, ), - Statement( - Insert, - ), Statement( Select, ), diff --git a/src/statements.rs b/src/statements.rs index 5ba1103..3799caf 100644 --- a/src/statements.rs +++ b/src/statements.rs @@ -1,6 +1,10 @@ #[derive(Debug)] pub enum Statement { - Insert, + Insert { + id: i64, + username: String, + email: String, + }, Select, } @@ -11,8 +15,12 @@ pub struct StatementExecuteResult { impl Statement { pub fn execute(&self) -> StatementExecuteResult { match self { - Statement::Insert => StatementExecuteResult { - msg: String::from("insert"), + Statement::Insert { + id, + username, + email, + } => StatementExecuteResult { + msg: String::from(format!("insert {id:?} {username:?} {email:?}")), }, Statement::Select => StatementExecuteResult { msg: String::from("select"), diff --git a/src/tokens.rs b/src/tokens.rs index a1a850b..14316a2 100644 --- a/src/tokens.rs +++ b/src/tokens.rs @@ -10,7 +10,7 @@ pub enum TokenData { String(String), } -#[derive(Debug, Eq, PartialEq)] +#[derive(Debug, Eq, PartialEq, Clone)] pub struct Location { /// file name pub file: String, From e78511f692db79eb2ae76554f9109388cd5cb128 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kha=C3=AFs=20COLIN?= Date: Tue, 3 Jun 2025 17:53:22 +0200 Subject: [PATCH 3/6] feat(parser): implement semicolon-separated statements Add support for semicolon-terminated statements according to the updated grammar. This change enables executing multiple SQL statements in a single input by separating them with semicolons. Key improvements include: - Update grammar to require semicolons after statements - Add Semicolon token to the tokenizer - Implement error recovery by skipping to next semicolon on parse errors - Create helper functions for checking semicolons in statement parsers - Add tests for multiple statements and error conditions --- grammar.ebnf | 6 +- notes.org | 27 +++- src/meta_commands.rs | 2 +- src/parser.rs | 147 +++++++++++++++--- ...parser__tests__parse_insert_command-2.snap | 38 +---- ...parser__tests__parse_insert_command-3.snap | 8 +- ...ser__tests__parse_missing_semicolon-2.snap | 22 +++ ...arser__tests__parse_missing_semicolon.snap | 22 +++ ...parse_multiple_statements_with_insert.snap | 21 +++ src/statements.rs | 2 +- src/tokens.rs | 17 +- 11 files changed, 237 insertions(+), 75 deletions(-) create mode 100644 src/snapshots/osdb__parser__tests__parse_missing_semicolon-2.snap create mode 100644 src/snapshots/osdb__parser__tests__parse_missing_semicolon.snap create mode 100644 src/snapshots/osdb__parser__tests__parse_multiple_statements_with_insert.snap diff --git a/grammar.ebnf b/grammar.ebnf index 60e3641..2f39fd3 100644 --- a/grammar.ebnf +++ b/grammar.ebnf @@ -4,16 +4,18 @@ token ::= insert | meta-command | int | string + | semicolon | end-of-file /* command is second stage of parsing */ -command ::= cmd-insert - | cmd-select +command ::= cmd-insert semicolon + | cmd-select semicolon cmd-insert ::= insert int string string cmd-select ::= select insert ::= "insert" select ::= "select" +semicolon ::= ";" meta-command ::= "." "exit" | "about" diff --git a/notes.org b/notes.org index 223e8e9..8a40aac 100644 --- a/notes.org +++ b/notes.org @@ -231,14 +231,27 @@ i will use rustyline, since it seems like the most feature-complete * TODO .license meta-command * TODO .help meta-command -* TODO parse insert statements in the form +* DONE parse insert statements in the form insert -** TODO Row struct -** TODO parse row insert -** TODO separate statements with semicolons -** TODO in case of parse error, skip until next semicolon to better recover -** TODO serialize/deserialize row to/from raw bytes -*** TODO look for best practices for creating binary formats +** DONE parse row insert +* DONE separate statements with semicolons +* TODO this error message could be better +#+begin example +Error: unexpected token + ╭─[ :1:24 ] + │ + 1 │ insert 0 "user" "email" + │ │ + │ ╰─ found end of file "" + │ + │ Note: expected token type to be one of ["semicolon"] +───╯ +#+end example +* TODO correct all instances of in locations +* TODO meta-commands must be followed by end-of-file +* DONE in case of parse error, skip until next semicolon to better recover +* TODO serialize/deserialize row to/from raw bytes +** TODO look for best practices for creating binary formats * WAIT cli tests using insta-cmd https://insta.rs/docs/cmd/ diff --git a/src/meta_commands.rs b/src/meta_commands.rs index 135e1ab..fb5d950 100644 --- a/src/meta_commands.rs +++ b/src/meta_commands.rs @@ -1,6 +1,6 @@ use crate::branding; -#[derive(Debug, Eq, PartialEq)] +#[derive(Debug, Eq, PartialEq, Clone)] pub enum MetaCommand { Exit, About, diff --git a/src/parser.rs b/src/parser.rs index bd6b087..585f5f8 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -6,40 +6,123 @@ use crate::{ tokens::{Location, Token, TokenData, tokenize}, }; +// Helper function to skip tokens until reaching a semicolon or end of file +// This helps with error recovery when a statement has a syntax error +fn skip_to_next_statement(tokens: &mut VecDeque) { + while let Some(token) = tokens.front() { + match token.data { + TokenData::Semicolon | TokenData::EndOfFile => break, + _ => { + tokens.pop_front(); + } + } + } + + // Consume the semicolon if that's what we stopped at + if tokens + .front() + .is_some_and(|t| matches!(t.data, TokenData::Semicolon)) + { + tokens.pop_front(); + } +} + +// Helper function to check for a semicolon after a statement +fn expect_semicolon(tokens: &mut VecDeque) -> Result<(), CommandParseError> { + if let Some(next_token) = tokens.front() { + match next_token.data { + TokenData::Semicolon => { + tokens.pop_front(); // Consume the semicolon + Ok(()) + } + _ => Err(CommandParseError::UnexpectedToken( + next_token.clone(), + &["semicolon"], + )), + } + } else { + // Even at the end of input, we need a semicolon + Err(CommandParseError::UnexpectedToken( + Token { + location: tokens.back().map_or_else( + || Location::new(String::from(""), 0, 0), + |t| t.location.clone(), + ), + data: TokenData::EndOfFile, + lexeme: String::new(), + }, + &["semicolon"], + )) + } +} + +fn parse_select_command( + tokens: &mut VecDeque, +) -> Result { + // Parse the select command (currently doesn't require additional tokens) + let cmd = Command::Statement(Statement::Select); + + // Check for semicolon after select command + expect_semicolon(tokens)?; + + Ok(cmd) +} + pub fn parse(file: String, input: String) -> Result, Vec> { let mut tokens: VecDeque<_> = tokenize(input, file) .map_err(|x| x.into_iter().map(|x| x.into()).collect::>())? .into(); let mut cmds = Vec::new(); let mut errs = Vec::new(); + while let Some(token) = tokens.pop_front() { match token.data { - crate::tokens::TokenData::Insert => match parse_insert_command(&mut tokens) { + TokenData::Insert => match parse_insert_command(&mut tokens) { Ok(cmd) => cmds.push(cmd), - Err(err) => errs.push(err), + Err(err) => { + errs.push(err); + skip_to_next_statement(&mut tokens); // Skip to next statement for error recovery + } }, - crate::tokens::TokenData::Select => cmds.push(Command::Statement(Statement::Select)), - crate::tokens::TokenData::MetaCommand(meta_command) => { - cmds.push(Command::MetaCommand(meta_command)) + TokenData::Select => match parse_select_command(&mut tokens) { + Ok(cmd) => cmds.push(cmd), + Err(err) => { + errs.push(err); + skip_to_next_statement(&mut tokens); // Skip to next statement for error recovery + } + }, + TokenData::MetaCommand(meta_command) => { + // Meta commands don't require semicolons per grammar + cmds.push(Command::MetaCommand(meta_command)); } - crate::tokens::TokenData::Int(_) => errs.push(CommandParseError::UnexpectedToken( - token, - &["statement", "meta command", "eof"], - )), - crate::tokens::TokenData::String(_) => errs.push(CommandParseError::UnexpectedToken( - token, - &["statement", "meta command", "eof"], - )), - crate::tokens::TokenData::EndOfFile => (), + TokenData::Semicolon => { + // Empty statement (just a semicolon) - ignore it + } + TokenData::Int(_) => { + errs.push(CommandParseError::UnexpectedToken( + token, + &["statement", "meta command", "eof"], + )); + skip_to_next_statement(&mut tokens); + } + TokenData::String(_) => { + errs.push(CommandParseError::UnexpectedToken( + token, + &["statement", "meta command", "eof"], + )); + skip_to_next_statement(&mut tokens); + } + TokenData::EndOfFile => (), // End of parsing } } + if errs.is_empty() { Ok(cmds) } else { Err(errs) } } fn parse_insert_command( tokens: &mut VecDeque, ) -> Result { - // According to grammar.ebnf, insert command should be: insert int string string + // According to grammar.ebnf, insert command should be: insert int string string semicolon // Parse the id (integer) let id_token = tokens.pop_front().ok_or_else(|| { @@ -106,6 +189,9 @@ fn parse_insert_command( _ => return Err(CommandParseError::UnexpectedToken(email_token, &["string"])), }; + // Check for semicolon after the insert command + expect_semicolon(tokens)?; + Ok(Command::Statement(Statement::Insert { id, username, @@ -122,8 +208,8 @@ mod tests { fn test_parse_single_correct() { let file = String::from(""); assert_debug_snapshot!(parse(file.clone(), String::from(".exit"))); - assert_debug_snapshot!(parse(file.clone(), String::from("select"))); - assert_debug_snapshot!(parse(file.clone(), String::from("sElEcT"))); + assert_debug_snapshot!(parse(file.clone(), String::from("select;"))); + assert_debug_snapshot!(parse(file.clone(), String::from("sElEcT;"))); } #[test] @@ -131,13 +217,23 @@ mod tests { let file = String::from(""); assert_debug_snapshot!(parse( file.clone(), - String::from(r#"insert 1 "username" "email@example.com""#) + String::from(r#"insert 1 "username" "email@example.com";"#) )); assert_debug_snapshot!(parse( file.clone(), - String::from(r#"insert "not_an_id" "username" "email@example.com""#) + String::from(r#"insert "not_an_id" "username" "email@example.com";"#) + )); + assert_debug_snapshot!(parse(file.clone(), String::from(r#"insert 1 "username";"#))); + } + + #[test] + fn test_parse_missing_semicolon() { + let file = String::from(""); + assert_debug_snapshot!(parse(file.clone(), String::from("select"))); + assert_debug_snapshot!(parse( + file.clone(), + String::from(r#"insert 1 "username" "email@example.com""#) )); - assert_debug_snapshot!(parse(file.clone(), String::from(r#"insert 1 "username""#))); } #[test] @@ -155,7 +251,16 @@ mod tests { let file = String::from(""); assert_debug_snapshot!(parse( file.clone(), - String::from(".exit select select select") + String::from(".exit select; select; select;") + )); + } + + #[test] + fn test_parse_multiple_statements_with_insert() { + let file = String::from(""); + assert_debug_snapshot!(parse( + file.clone(), + String::from(r#"select; insert 1 "user" "email@test.com"; select;"#) )); } diff --git a/src/snapshots/osdb__parser__tests__parse_insert_command-2.snap b/src/snapshots/osdb__parser__tests__parse_insert_command-2.snap index 8582c85..72d9ec6 100644 --- a/src/snapshots/osdb__parser__tests__parse_insert_command-2.snap +++ b/src/snapshots/osdb__parser__tests__parse_insert_command-2.snap @@ -1,6 +1,6 @@ --- source: src/parser.rs -expression: "parse(file.clone(),\nString::from(r#\"insert \"not_an_id\" \"username\" \"email@example.com\"\"#))" +expression: "parse(file.clone(),\nString::from(r#\"insert \"not_an_id\" \"username\" \"email@example.com\";\"#))" --- Err( [ @@ -20,41 +20,5 @@ Err( "integer", ], ), - UnexpectedToken( - Token { - location: Location { - file: "", - offset: 19, - length: 10, - }, - data: String( - "username", - ), - lexeme: "\"username\"", - }, - [ - "statement", - "meta command", - "eof", - ], - ), - UnexpectedToken( - Token { - location: Location { - file: "", - offset: 30, - length: 19, - }, - data: String( - "email@example.com", - ), - lexeme: "\"email@example.com\"", - }, - [ - "statement", - "meta command", - "eof", - ], - ), ], ) diff --git a/src/snapshots/osdb__parser__tests__parse_insert_command-3.snap b/src/snapshots/osdb__parser__tests__parse_insert_command-3.snap index dc13e7a..a4fbebd 100644 --- a/src/snapshots/osdb__parser__tests__parse_insert_command-3.snap +++ b/src/snapshots/osdb__parser__tests__parse_insert_command-3.snap @@ -1,6 +1,6 @@ --- source: src/parser.rs -expression: "parse(file.clone(), String::from(r#\"insert 1 \"username\"\"#))" +expression: "parse(file.clone(), String::from(r#\"insert 1 \"username\";\"#))" --- Err( [ @@ -9,10 +9,10 @@ Err( location: Location { file: "", offset: 19, - length: 0, + length: 1, }, - data: EndOfFile, - lexeme: "", + data: Semicolon, + lexeme: ";", }, [ "string", diff --git a/src/snapshots/osdb__parser__tests__parse_missing_semicolon-2.snap b/src/snapshots/osdb__parser__tests__parse_missing_semicolon-2.snap new file mode 100644 index 0000000..a6787ca --- /dev/null +++ b/src/snapshots/osdb__parser__tests__parse_missing_semicolon-2.snap @@ -0,0 +1,22 @@ +--- +source: src/parser.rs +expression: "parse(file.clone(),\nString::from(r#\"insert 1 \"username\" \"email@example.com\"\"#))" +--- +Err( + [ + UnexpectedToken( + Token { + location: Location { + file: "", + offset: 39, + length: 0, + }, + data: EndOfFile, + lexeme: "", + }, + [ + "semicolon", + ], + ), + ], +) diff --git a/src/snapshots/osdb__parser__tests__parse_missing_semicolon.snap b/src/snapshots/osdb__parser__tests__parse_missing_semicolon.snap new file mode 100644 index 0000000..ad0856f --- /dev/null +++ b/src/snapshots/osdb__parser__tests__parse_missing_semicolon.snap @@ -0,0 +1,22 @@ +--- +source: src/parser.rs +expression: "parse(file.clone(), String::from(\"select\"))" +--- +Err( + [ + UnexpectedToken( + Token { + location: Location { + file: "", + offset: 6, + length: 0, + }, + data: EndOfFile, + lexeme: "", + }, + [ + "semicolon", + ], + ), + ], +) diff --git a/src/snapshots/osdb__parser__tests__parse_multiple_statements_with_insert.snap b/src/snapshots/osdb__parser__tests__parse_multiple_statements_with_insert.snap new file mode 100644 index 0000000..0305714 --- /dev/null +++ b/src/snapshots/osdb__parser__tests__parse_multiple_statements_with_insert.snap @@ -0,0 +1,21 @@ +--- +source: src/parser.rs +expression: "parse(file.clone(),\nString::from(r#\"select; insert 1 \"user\" \"email@test.com\"; select;\"#))" +--- +Ok( + [ + Statement( + Select, + ), + Statement( + Insert { + id: 1, + username: "user", + email: "email@test.com", + }, + ), + Statement( + Select, + ), + ], +) diff --git a/src/statements.rs b/src/statements.rs index 3799caf..cf5b3fa 100644 --- a/src/statements.rs +++ b/src/statements.rs @@ -20,7 +20,7 @@ impl Statement { username, email, } => StatementExecuteResult { - msg: String::from(format!("insert {id:?} {username:?} {email:?}")), + msg: format!("insert {id:?} {username:?} {email:?}"), }, Statement::Select => StatementExecuteResult { msg: String::from("select"), diff --git a/src/tokens.rs b/src/tokens.rs index 14316a2..ed2f23b 100644 --- a/src/tokens.rs +++ b/src/tokens.rs @@ -1,6 +1,6 @@ use crate::meta_commands::MetaCommand; -#[derive(Debug, Eq, PartialEq)] +#[derive(Debug, Eq, PartialEq, Clone)] pub enum TokenData { Insert, Select, @@ -8,6 +8,7 @@ pub enum TokenData { EndOfFile, Int(i64), String(String), + Semicolon, } #[derive(Debug, Eq, PartialEq, Clone)] @@ -46,7 +47,7 @@ impl Location { } } -#[derive(Debug, Eq, PartialEq)] +#[derive(Debug, Eq, PartialEq, Clone)] pub struct Token { /// Where in the input was this token found? pub location: Location, @@ -65,6 +66,7 @@ impl std::fmt::Display for Token { TokenData::EndOfFile => write!(f, "end of file"), TokenData::Int(x) => write!(f, "integer {x}"), TokenData::String(x) => write!(f, "string {x:?}"), + TokenData::Semicolon => write!(f, "semicolon"), }?; let lexeme = &self.lexeme; write!(f, " {lexeme:?}") @@ -307,6 +309,15 @@ impl Tokenizer { } } + fn scan_semicolon(&mut self) -> Result { + self.advance(); + Ok(Token { + location: self.previous_location(1), + data: TokenData::Semicolon, + lexeme: String::from(";"), + }) + } + fn scan_token(&mut self) -> Result, ScanError> { loop { if let Some(c) = self.peek() { @@ -318,6 +329,8 @@ impl Tokenizer { return self.scan_integer().map(Some); } else if c == '"' { return self.scan_string().map(Some); + } else if c == ';' { + return self.scan_semicolon().map(Some); } else if c.is_whitespace() { self.advance(); } else { From 567aa31c07c82908cb12989636dab88d001057ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kha=C3=AFs=20COLIN?= Date: Tue, 3 Jun 2025 19:12:50 +0200 Subject: [PATCH 4/6] feat(errors): improve error messages with example values Add token type examples to make error messages more helpful. Created an ExpectedToken enum to replace string literals for better type safety, added example values for each token type, and enhanced error display to show concrete examples of valid syntax. --- notes.org | 25 +++++++++++- src/command.rs | 39 ++++++++++++++++++- src/error_display.rs | 36 +++++++++++++---- src/parser.rs | 22 +++++------ ...parser__tests__parse_insert_command-2.snap | 2 +- ...parser__tests__parse_insert_command-3.snap | 2 +- ...ser__tests__parse_missing_semicolon-2.snap | 2 +- ...arser__tests__parse_missing_semicolon.snap | 2 +- 8 files changed, 105 insertions(+), 25 deletions(-) diff --git a/notes.org b/notes.org index 8a40aac..6ea2e37 100644 --- a/notes.org +++ b/notes.org @@ -235,7 +235,7 @@ i will use rustyline, since it seems like the most feature-complete insert ** DONE parse row insert * DONE separate statements with semicolons -* TODO this error message could be better +* DONE this error message could be better #+begin example Error: unexpected token ╭─[ :1:24 ] @@ -247,8 +247,29 @@ Error: unexpected token │ Note: expected token type to be one of ["semicolon"] ───╯ #+end example -* TODO correct all instances of in locations +** plan +1. Create an example mapping system + - Define a mapping of token types to example values + - Example: "integer" → "42", "string" → "example", "semicolon" → ";" +2. Enhance CommandParseError + - Add a method to generate user-friendly error messages + - Include both the expected token type and concrete examples +3. Implementation approach + - Create a static lookup table or function that returns examples + - Extend existing error handling to include examples in messages + - Make sure the examples follow SQL syntax conventions +4. Error display refinement + - Update error_display.rs to include these examples + - Format error messages to show both what was expected and example syntax +5. Testing + - Add tests that verify the error messages include helpful examples + - Ensure examples are contextually appropriate + +This will make errors like "expected semicolon" more helpful by showing "expected semicolon (example: ;)". +TODO * TODO correct all instances of in locations * TODO meta-commands must be followed by end-of-file +* TODO project code documentation +* TODO project usage documentation * DONE in case of parse error, skip until next semicolon to better recover * TODO serialize/deserialize row to/from raw bytes ** TODO look for best practices for creating binary formats diff --git a/src/command.rs b/src/command.rs index d19aeaa..43204a6 100644 --- a/src/command.rs +++ b/src/command.rs @@ -47,10 +47,47 @@ impl Command { } } +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum ExpectedToken { + Integer, + String, + Semicolon, + Statement, + MetaCommand, + EndOfFile, +} + +impl ExpectedToken { + /// Returns an example value for this token type + pub fn example(&self) -> &'static str { + match self { + ExpectedToken::Integer => "42", + ExpectedToken::String => "\"example\"", + ExpectedToken::Semicolon => ";", + ExpectedToken::Statement => "select", + ExpectedToken::MetaCommand => ".exit", + ExpectedToken::EndOfFile => "", + } + } +} + +impl std::fmt::Display for ExpectedToken { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + ExpectedToken::Integer => write!(f, "integer"), + ExpectedToken::String => write!(f, "string"), + ExpectedToken::Semicolon => write!(f, "semicolon"), + ExpectedToken::Statement => write!(f, "statement"), + ExpectedToken::MetaCommand => write!(f, "meta command"), + ExpectedToken::EndOfFile => write!(f, "end of file"), + } + } +} + #[derive(Debug)] pub enum CommandParseError { Scan(ScanError), - UnexpectedToken(Token, &'static [&'static str]), + UnexpectedToken(Token, &'static [ExpectedToken]), } impl From for Command { diff --git a/src/error_display.rs b/src/error_display.rs index 45b237c..92c8936 100644 --- a/src/error_display.rs +++ b/src/error_display.rs @@ -11,19 +11,41 @@ impl OSDBError for CommandParseError { CommandParseError::Scan(x) => { x.display(file, input); } - CommandParseError::UnexpectedToken(token, items) => { + CommandParseError::UnexpectedToken(token, expected_tokens) => { let location = (file, Into::>::into(&token.location)); - Report::build(ReportKind::Error, location.clone()) + + let mut report = Report::build(ReportKind::Error, location.clone()) .with_message("unexpected token") .with_label( Label::new(location.clone()) .with_color(Color::Red) .with_message(format!("found {token}")), - ) - .with_note(format!("expected token type to be one of {items:?}")) - .finish() - .eprint((file, Source::from(input))) - .unwrap() + ); + + // If we have expected tokens, show an example for the first one + if let Some(first_expected) = expected_tokens.get(0) { + let example = first_expected.example(); + + // Add a note with all expected types + let expected_types: Vec<_> = expected_tokens + .iter() + .map(|t| format!("{}", t)) + .collect(); + + // Use singular form when there's only one expected token type + match expected_types.as_slice() { + [single_type] => { + report = report.with_note(format!("expected: {}", single_type)); + }, + _ => { + report = report.with_note(format!("expected one of: {}", expected_types.join(", "))); + } + } + report = + report.with_help(format!("try a token of the expected type: {example}")) + } + + report.finish().eprint((file, Source::from(input))).unwrap() } } } diff --git a/src/parser.rs b/src/parser.rs index 585f5f8..912aba1 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,7 +1,7 @@ use std::collections::VecDeque; use crate::{ - command::{Command, CommandParseError}, + command::{Command, CommandParseError, ExpectedToken}, statements::Statement, tokens::{Location, Token, TokenData, tokenize}, }; @@ -37,7 +37,7 @@ fn expect_semicolon(tokens: &mut VecDeque) -> Result<(), C } _ => Err(CommandParseError::UnexpectedToken( next_token.clone(), - &["semicolon"], + &[ExpectedToken::Semicolon], )), } } else { @@ -51,7 +51,7 @@ fn expect_semicolon(tokens: &mut VecDeque) -> Result<(), C data: TokenData::EndOfFile, lexeme: String::new(), }, - &["semicolon"], + &[ExpectedToken::Semicolon], )) } } @@ -101,14 +101,14 @@ pub fn parse(file: String, input: String) -> Result, Vec { errs.push(CommandParseError::UnexpectedToken( token, - &["statement", "meta command", "eof"], + &[ExpectedToken::Statement, ExpectedToken::MetaCommand, ExpectedToken::EndOfFile], )); skip_to_next_statement(&mut tokens); } TokenData::String(_) => { errs.push(CommandParseError::UnexpectedToken( token, - &["statement", "meta command", "eof"], + &[ExpectedToken::Statement, ExpectedToken::MetaCommand, ExpectedToken::EndOfFile], )); skip_to_next_statement(&mut tokens); } @@ -135,13 +135,13 @@ fn parse_insert_command( data: TokenData::EndOfFile, lexeme: String::new(), }, - &["integer"], + &[ExpectedToken::Integer], ) })?; let id = match id_token.data { TokenData::Int(id) => id, - _ => return Err(CommandParseError::UnexpectedToken(id_token, &["integer"])), + _ => return Err(CommandParseError::UnexpectedToken(id_token, &[ExpectedToken::Integer])), }; // Parse the username (string) @@ -155,7 +155,7 @@ fn parse_insert_command( data: TokenData::EndOfFile, lexeme: String::new(), }, - &["string"], + &[ExpectedToken::String], ) })?; @@ -164,7 +164,7 @@ fn parse_insert_command( _ => { return Err(CommandParseError::UnexpectedToken( username_token, - &["string"], + &[ExpectedToken::String], )); } }; @@ -180,13 +180,13 @@ fn parse_insert_command( data: TokenData::EndOfFile, lexeme: String::new(), }, - &["string"], + &[ExpectedToken::String], ) })?; let email = match email_token.data { TokenData::String(email) => email, - _ => return Err(CommandParseError::UnexpectedToken(email_token, &["string"])), + _ => return Err(CommandParseError::UnexpectedToken(email_token, &[ExpectedToken::String])), }; // Check for semicolon after the insert command diff --git a/src/snapshots/osdb__parser__tests__parse_insert_command-2.snap b/src/snapshots/osdb__parser__tests__parse_insert_command-2.snap index 72d9ec6..a4a6547 100644 --- a/src/snapshots/osdb__parser__tests__parse_insert_command-2.snap +++ b/src/snapshots/osdb__parser__tests__parse_insert_command-2.snap @@ -17,7 +17,7 @@ Err( lexeme: "\"not_an_id\"", }, [ - "integer", + Integer, ], ), ], diff --git a/src/snapshots/osdb__parser__tests__parse_insert_command-3.snap b/src/snapshots/osdb__parser__tests__parse_insert_command-3.snap index a4fbebd..8a505fc 100644 --- a/src/snapshots/osdb__parser__tests__parse_insert_command-3.snap +++ b/src/snapshots/osdb__parser__tests__parse_insert_command-3.snap @@ -15,7 +15,7 @@ Err( lexeme: ";", }, [ - "string", + String, ], ), ], diff --git a/src/snapshots/osdb__parser__tests__parse_missing_semicolon-2.snap b/src/snapshots/osdb__parser__tests__parse_missing_semicolon-2.snap index a6787ca..9802ae3 100644 --- a/src/snapshots/osdb__parser__tests__parse_missing_semicolon-2.snap +++ b/src/snapshots/osdb__parser__tests__parse_missing_semicolon-2.snap @@ -15,7 +15,7 @@ Err( lexeme: "", }, [ - "semicolon", + Semicolon, ], ), ], diff --git a/src/snapshots/osdb__parser__tests__parse_missing_semicolon.snap b/src/snapshots/osdb__parser__tests__parse_missing_semicolon.snap index ad0856f..92022f3 100644 --- a/src/snapshots/osdb__parser__tests__parse_missing_semicolon.snap +++ b/src/snapshots/osdb__parser__tests__parse_missing_semicolon.snap @@ -15,7 +15,7 @@ Err( lexeme: "", }, [ - "semicolon", + Semicolon, ], ), ], From 64d93e9a27e5df54658beaa589f33e72119ffe5d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kha=C3=AFs=20COLIN?= Date: Tue, 3 Jun 2025 20:09:22 +0200 Subject: [PATCH 5/6] refactor(location): deduplicated usage of Location::new with same value into default --- notes.org | 2 +- src/command.rs | 3 +- src/error_display.rs | 82 +++++++++++++++++++++++++++++++------------- src/parser.rs | 54 ++++++++++++++++++----------- src/tokens.rs | 6 ++++ 5 files changed, 102 insertions(+), 45 deletions(-) diff --git a/notes.org b/notes.org index 6ea2e37..64f3976 100644 --- a/notes.org +++ b/notes.org @@ -266,7 +266,7 @@ Error: unexpected token - Ensure examples are contextually appropriate This will make errors like "expected semicolon" more helpful by showing "expected semicolon (example: ;)". -TODO * TODO correct all instances of in locations +* DONE correct all instances of in locations * TODO meta-commands must be followed by end-of-file * TODO project code documentation * TODO project usage documentation diff --git a/src/command.rs b/src/command.rs index 43204a6..10b1a97 100644 --- a/src/command.rs +++ b/src/command.rs @@ -1,6 +1,6 @@ use crate::meta_commands::{MetaCommand, MetaCommandExecuteResult}; use crate::statements::{Statement, StatementExecuteResult}; -use crate::tokens::{ScanError, Token}; +use crate::tokens::{Location, ScanError, Token}; #[derive(Debug)] pub enum Command { @@ -88,6 +88,7 @@ impl std::fmt::Display for ExpectedToken { pub enum CommandParseError { Scan(ScanError), UnexpectedToken(Token, &'static [ExpectedToken]), + UnexpectedEndOfFile(Location, &'static [ExpectedToken]), } impl From for Command { diff --git a/src/error_display.rs b/src/error_display.rs index 92c8936..6466248 100644 --- a/src/error_display.rs +++ b/src/error_display.rs @@ -1,4 +1,7 @@ -use crate::{command::CommandParseError, tokens::ScanError}; +use crate::{ + command::{CommandParseError, ExpectedToken}, + tokens::ScanError, +}; use ariadne::{Color, Label, Report, ReportKind, Source}; pub trait OSDBError { @@ -22,35 +25,68 @@ impl OSDBError for CommandParseError { .with_message(format!("found {token}")), ); - // If we have expected tokens, show an example for the first one - if let Some(first_expected) = expected_tokens.get(0) { - let example = first_expected.example(); - - // Add a note with all expected types - let expected_types: Vec<_> = expected_tokens - .iter() - .map(|t| format!("{}", t)) - .collect(); - - // Use singular form when there's only one expected token type - match expected_types.as_slice() { - [single_type] => { - report = report.with_note(format!("expected: {}", single_type)); - }, - _ => { - report = report.with_note(format!("expected one of: {}", expected_types.join(", "))); - } - } - report = - report.with_help(format!("try a token of the expected type: {example}")) - } + report = add_expected_tokens_to_report(report, expected_tokens); report.finish().eprint((file, Source::from(input))).unwrap() } + CommandParseError::UnexpectedEndOfFile(location, expected_tokens) => { + let location = (file, Into::>::into(location)); + let report = Report::build(ReportKind::Error, location) + .with_message("unexpected end of file"); + let report = add_expected_tokens_to_report(report, expected_tokens); + report.finish().eprint((file, Source::from(input))).unwrap() + } } } } +type OSDBReport<'a> = ariadne::ReportBuilder<'a, (&'a str, std::ops::Range)>; + +fn add_expected_tokens_to_report<'a>( + mut report: OSDBReport<'a>, + expected_tokens: &'static [ExpectedToken], +) -> OSDBReport<'a> { + // If we have expected tokens, show an example for the first one + if let Some(help) = expected_token_example_msg(expected_tokens) { + report = report.with_help(help) + } + + // If we have at least one expected token, show a message showing what type was expected + if let Some(note) = expected_token_msg(expected_tokens) { + report = report.with_note(note); + } + report +} + +fn expected_token_msg(expected_tokens: &'static [ExpectedToken]) -> Option { + if !expected_tokens.is_empty() { + // Add a note with all expected types + let expected_types: Vec<_> = expected_tokens.iter().map(|t| format!("{}", t)).collect(); + + // Use singular form when there's only one expected token type + Some(match expected_types.as_slice() { + [single_type] => { + format!("expected: {}", single_type) + } + _ => { + format!("expected one of: {}", expected_types.join(", ")) + } + }) + } else { + // there are no expected tokens + None + } +} + +fn expected_token_example_msg(expected_tokens: &'static [ExpectedToken]) -> Option { + if let Some(first_expected) = expected_tokens.first() { + let example = first_expected.example(); + Some(format!("try a token of the expected type: {example}")) + } else { + None + } +} + impl OSDBError for ScanError { fn display(&self, file: &str, input: &str) { let location = (file, Into::>::into(&self.location)); diff --git a/src/parser.rs b/src/parser.rs index 912aba1..3aa5ba3 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -44,10 +44,9 @@ fn expect_semicolon(tokens: &mut VecDeque) -> Result<(), C // Even at the end of input, we need a semicolon Err(CommandParseError::UnexpectedToken( Token { - location: tokens.back().map_or_else( - || Location::new(String::from(""), 0, 0), - |t| t.location.clone(), - ), + location: tokens + .back() + .map_or_else(Location::default, |t| t.location.clone()), data: TokenData::EndOfFile, lexeme: String::new(), }, @@ -101,14 +100,22 @@ pub fn parse(file: String, input: String) -> Result, Vec { errs.push(CommandParseError::UnexpectedToken( token, - &[ExpectedToken::Statement, ExpectedToken::MetaCommand, ExpectedToken::EndOfFile], + &[ + ExpectedToken::Statement, + ExpectedToken::MetaCommand, + ExpectedToken::EndOfFile, + ], )); skip_to_next_statement(&mut tokens); } TokenData::String(_) => { errs.push(CommandParseError::UnexpectedToken( token, - &[ExpectedToken::Statement, ExpectedToken::MetaCommand, ExpectedToken::EndOfFile], + &[ + ExpectedToken::Statement, + ExpectedToken::MetaCommand, + ExpectedToken::EndOfFile, + ], )); skip_to_next_statement(&mut tokens); } @@ -128,10 +135,9 @@ fn parse_insert_command( let id_token = tokens.pop_front().ok_or_else(|| { CommandParseError::UnexpectedToken( Token { - location: tokens.back().map_or_else( - || Location::new(String::from(""), 0, 0), - |t| t.location.clone(), - ), + location: tokens + .back() + .map_or_else(Location::default, |t| t.location.clone()), data: TokenData::EndOfFile, lexeme: String::new(), }, @@ -141,17 +147,21 @@ fn parse_insert_command( let id = match id_token.data { TokenData::Int(id) => id, - _ => return Err(CommandParseError::UnexpectedToken(id_token, &[ExpectedToken::Integer])), + _ => { + return Err(CommandParseError::UnexpectedToken( + id_token, + &[ExpectedToken::Integer], + )); + } }; // Parse the username (string) let username_token = tokens.pop_front().ok_or_else(|| { CommandParseError::UnexpectedToken( Token { - location: tokens.back().map_or_else( - || Location::new(String::from(""), 0, 0), - |t| t.location.clone(), - ), + location: tokens + .back() + .map_or_else(Location::default, |t| t.location.clone()), data: TokenData::EndOfFile, lexeme: String::new(), }, @@ -173,10 +183,9 @@ fn parse_insert_command( let email_token = tokens.pop_front().ok_or_else(|| { CommandParseError::UnexpectedToken( Token { - location: tokens.back().map_or_else( - || Location::new(String::from(""), 0, 0), - |t| t.location.clone(), - ), + location: tokens + .back() + .map_or_else(Location::default, |t| t.location.clone()), data: TokenData::EndOfFile, lexeme: String::new(), }, @@ -186,7 +195,12 @@ fn parse_insert_command( let email = match email_token.data { TokenData::String(email) => email, - _ => return Err(CommandParseError::UnexpectedToken(email_token, &[ExpectedToken::String])), + _ => { + return Err(CommandParseError::UnexpectedToken( + email_token, + &[ExpectedToken::String], + )); + } }; // Check for semicolon after the insert command diff --git a/src/tokens.rs b/src/tokens.rs index ed2f23b..88610d2 100644 --- a/src/tokens.rs +++ b/src/tokens.rs @@ -30,6 +30,12 @@ impl From<&Location> for std::ops::Range { } } +impl Default for Location { + fn default() -> Self { + Self::new(String::from(""), 0, 0) + } +} + impl Location { /// ``` /// use osdb::tokens::Location; From 33c4edf91df882a3ef9e9065f75646092063eced Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kha=C3=AFs=20COLIN?= Date: Tue, 3 Jun 2025 22:00:52 +0200 Subject: [PATCH 6/6] feat(grammar): meta-commands must be followed by end-of-file --- grammar.ebnf | 67 ++++++++++--------- notes.org | 2 +- src/parser.rs | 38 +++++++++-- ...rser__tests__meta_command_require_eof.snap | 22 ++++++ ...parser__tests__parse_multiple_correct.snap | 5 +- 5 files changed, 90 insertions(+), 44 deletions(-) create mode 100644 src/snapshots/osdb__parser__tests__meta_command_require_eof.snap diff --git a/grammar.ebnf b/grammar.ebnf index 2f39fd3..ff4bd2e 100644 --- a/grammar.ebnf +++ b/grammar.ebnf @@ -1,42 +1,43 @@ /* token is first stage of parsing */ -token ::= insert - | select - | meta-command - | int - | string - | semicolon - | end-of-file +token ::= insert + | select + | meta-command + | int + | string + | semicolon + | end-of-file /* command is second stage of parsing */ -command ::= cmd-insert semicolon - | cmd-select semicolon -cmd-insert ::= insert int string string -cmd-select ::= select +command ::= cmd-insert semicolon + | cmd-select semicolon +cmd-insert ::= insert int string string +cmd-select ::= select -insert ::= "insert" -select ::= "select" -semicolon ::= ";" +insert ::= "insert" +select ::= "select" +semicolon ::= ";" -meta-command ::= "." "exit" - | "about" - | "version" +meta-command ::= meta-command-verb end-of-file +meta-command-verb ::= ".exit" + | ".about" + | ".version" -int ::= sign? digit+ -sign ::= "+" - | "-" -digit ::= "0" - | "1" - | "2" - | "3" - | "4" - | "5" - | "6" - | "7" - | "8" - | "9" +int ::= sign? digit+ +sign ::= "+" + | "-" +digit ::= "0" + | "1" + | "2" + | "3" + | "4" + | "5" + | "6" + | "7" + | "8" + | "9" -string ::= '"' string-char* '"' -string-char ::= '\' utf8-char - | utf8-char-not-dbl-quote +string ::= '"' string-char* '"' +string-char ::= '\' utf8-char + | utf8-char-not-dbl-quote diff --git a/notes.org b/notes.org index 64f3976..bf78fcf 100644 --- a/notes.org +++ b/notes.org @@ -267,7 +267,7 @@ Error: unexpected token This will make errors like "expected semicolon" more helpful by showing "expected semicolon (example: ;)". * DONE correct all instances of in locations -* TODO meta-commands must be followed by end-of-file +* DONE meta-commands must be followed by end-of-file * TODO project code documentation * TODO project usage documentation * DONE in case of parse error, skip until next semicolon to better recover diff --git a/src/parser.rs b/src/parser.rs index 3aa5ba3..181f3e3 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -91,8 +91,13 @@ pub fn parse(file: String, input: String) -> Result, Vec { - // Meta commands don't require semicolons per grammar - cmds.push(Command::MetaCommand(meta_command)); + match parse_meta_command(meta_command, &mut tokens) { + Ok(cmd) => cmds.push(cmd), + Err(err) => { + errs.push(err); + skip_to_next_statement(&mut tokens); // Skip to next statement for error recovery + } + } } TokenData::Semicolon => { // Empty statement (just a semicolon) - ignore it @@ -126,6 +131,24 @@ pub fn parse(file: String, input: String) -> Result, Vec, +) -> Result { + if let Some(token) = tokens.pop_front() { + if matches!(token.data, TokenData::EndOfFile) { + Ok(Command::MetaCommand(meta_command)) + } else { + Err(CommandParseError::UnexpectedToken( + token, + &[ExpectedToken::EndOfFile], + )) + } + } else { + Ok(Command::MetaCommand(meta_command)) + } +} + fn parse_insert_command( tokens: &mut VecDeque, ) -> Result { @@ -263,10 +286,13 @@ mod tests { #[test] fn test_parse_multiple_correct() { let file = String::from(""); - assert_debug_snapshot!(parse( - file.clone(), - String::from(".exit select; select; select;") - )); + assert_debug_snapshot!(parse(file.clone(), String::from("select; select; select;"))); + } + + #[test] + fn test_meta_command_require_eof() { + let file = String::from(""); + assert_debug_snapshot!(parse(file.clone(), String::from(".exit select; select;"))); } #[test] diff --git a/src/snapshots/osdb__parser__tests__meta_command_require_eof.snap b/src/snapshots/osdb__parser__tests__meta_command_require_eof.snap new file mode 100644 index 0000000..fb9fffe --- /dev/null +++ b/src/snapshots/osdb__parser__tests__meta_command_require_eof.snap @@ -0,0 +1,22 @@ +--- +source: src/parser.rs +expression: "parse(file.clone(), String::from(\".exit select; select;\"))" +--- +Err( + [ + UnexpectedToken( + Token { + location: Location { + file: "", + offset: 6, + length: 6, + }, + data: Select, + lexeme: "select", + }, + [ + EndOfFile, + ], + ), + ], +) diff --git a/src/snapshots/osdb__parser__tests__parse_multiple_correct.snap b/src/snapshots/osdb__parser__tests__parse_multiple_correct.snap index 4642731..e3f4512 100644 --- a/src/snapshots/osdb__parser__tests__parse_multiple_correct.snap +++ b/src/snapshots/osdb__parser__tests__parse_multiple_correct.snap @@ -1,12 +1,9 @@ --- source: src/parser.rs -expression: "parse(file.clone(), String::from(\".exit select select select\"))" +expression: "parse(file.clone(), String::from(\"select; select; select;\"))" --- Ok( [ - MetaCommand( - Exit, - ), Statement( Select, ),