feat(parser): implement semicolon-separated statements
Add support for semicolon-terminated statements according to the updated grammar. This change enables executing multiple SQL statements in a single input by separating them with semicolons. Key improvements include: - Update grammar to require semicolons after statements - Add Semicolon token to the tokenizer - Implement error recovery by skipping to next semicolon on parse errors - Create helper functions for checking semicolons in statement parsers - Add tests for multiple statements and error conditions
This commit is contained in:
parent
28cb288eaf
commit
e78511f692
11 changed files with 237 additions and 75 deletions
|
|
@ -1,6 +1,6 @@
|
|||
use crate::branding;
|
||||
|
||||
#[derive(Debug, Eq, PartialEq)]
|
||||
#[derive(Debug, Eq, PartialEq, Clone)]
|
||||
pub enum MetaCommand {
|
||||
Exit,
|
||||
About,
|
||||
|
|
|
|||
147
src/parser.rs
147
src/parser.rs
|
|
@ -6,40 +6,123 @@ use crate::{
|
|||
tokens::{Location, Token, TokenData, tokenize},
|
||||
};
|
||||
|
||||
// Helper function to skip tokens until reaching a semicolon or end of file
|
||||
// This helps with error recovery when a statement has a syntax error
|
||||
fn skip_to_next_statement(tokens: &mut VecDeque<Token>) {
|
||||
while let Some(token) = tokens.front() {
|
||||
match token.data {
|
||||
TokenData::Semicolon | TokenData::EndOfFile => break,
|
||||
_ => {
|
||||
tokens.pop_front();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Consume the semicolon if that's what we stopped at
|
||||
if tokens
|
||||
.front()
|
||||
.is_some_and(|t| matches!(t.data, TokenData::Semicolon))
|
||||
{
|
||||
tokens.pop_front();
|
||||
}
|
||||
}
|
||||
|
||||
// Helper function to check for a semicolon after a statement
|
||||
fn expect_semicolon(tokens: &mut VecDeque<crate::tokens::Token>) -> Result<(), CommandParseError> {
|
||||
if let Some(next_token) = tokens.front() {
|
||||
match next_token.data {
|
||||
TokenData::Semicolon => {
|
||||
tokens.pop_front(); // Consume the semicolon
|
||||
Ok(())
|
||||
}
|
||||
_ => Err(CommandParseError::UnexpectedToken(
|
||||
next_token.clone(),
|
||||
&["semicolon"],
|
||||
)),
|
||||
}
|
||||
} else {
|
||||
// Even at the end of input, we need a semicolon
|
||||
Err(CommandParseError::UnexpectedToken(
|
||||
Token {
|
||||
location: tokens.back().map_or_else(
|
||||
|| Location::new(String::from("<unknown>"), 0, 0),
|
||||
|t| t.location.clone(),
|
||||
),
|
||||
data: TokenData::EndOfFile,
|
||||
lexeme: String::new(),
|
||||
},
|
||||
&["semicolon"],
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_select_command(
|
||||
tokens: &mut VecDeque<crate::tokens::Token>,
|
||||
) -> Result<Command, CommandParseError> {
|
||||
// Parse the select command (currently doesn't require additional tokens)
|
||||
let cmd = Command::Statement(Statement::Select);
|
||||
|
||||
// Check for semicolon after select command
|
||||
expect_semicolon(tokens)?;
|
||||
|
||||
Ok(cmd)
|
||||
}
|
||||
|
||||
pub fn parse(file: String, input: String) -> Result<Vec<Command>, Vec<CommandParseError>> {
|
||||
let mut tokens: VecDeque<_> = tokenize(input, file)
|
||||
.map_err(|x| x.into_iter().map(|x| x.into()).collect::<Vec<_>>())?
|
||||
.into();
|
||||
let mut cmds = Vec::new();
|
||||
let mut errs = Vec::new();
|
||||
|
||||
while let Some(token) = tokens.pop_front() {
|
||||
match token.data {
|
||||
crate::tokens::TokenData::Insert => match parse_insert_command(&mut tokens) {
|
||||
TokenData::Insert => match parse_insert_command(&mut tokens) {
|
||||
Ok(cmd) => cmds.push(cmd),
|
||||
Err(err) => errs.push(err),
|
||||
Err(err) => {
|
||||
errs.push(err);
|
||||
skip_to_next_statement(&mut tokens); // Skip to next statement for error recovery
|
||||
}
|
||||
},
|
||||
crate::tokens::TokenData::Select => cmds.push(Command::Statement(Statement::Select)),
|
||||
crate::tokens::TokenData::MetaCommand(meta_command) => {
|
||||
cmds.push(Command::MetaCommand(meta_command))
|
||||
TokenData::Select => match parse_select_command(&mut tokens) {
|
||||
Ok(cmd) => cmds.push(cmd),
|
||||
Err(err) => {
|
||||
errs.push(err);
|
||||
skip_to_next_statement(&mut tokens); // Skip to next statement for error recovery
|
||||
}
|
||||
},
|
||||
TokenData::MetaCommand(meta_command) => {
|
||||
// Meta commands don't require semicolons per grammar
|
||||
cmds.push(Command::MetaCommand(meta_command));
|
||||
}
|
||||
crate::tokens::TokenData::Int(_) => errs.push(CommandParseError::UnexpectedToken(
|
||||
token,
|
||||
&["statement", "meta command", "eof"],
|
||||
)),
|
||||
crate::tokens::TokenData::String(_) => errs.push(CommandParseError::UnexpectedToken(
|
||||
token,
|
||||
&["statement", "meta command", "eof"],
|
||||
)),
|
||||
crate::tokens::TokenData::EndOfFile => (),
|
||||
TokenData::Semicolon => {
|
||||
// Empty statement (just a semicolon) - ignore it
|
||||
}
|
||||
TokenData::Int(_) => {
|
||||
errs.push(CommandParseError::UnexpectedToken(
|
||||
token,
|
||||
&["statement", "meta command", "eof"],
|
||||
));
|
||||
skip_to_next_statement(&mut tokens);
|
||||
}
|
||||
TokenData::String(_) => {
|
||||
errs.push(CommandParseError::UnexpectedToken(
|
||||
token,
|
||||
&["statement", "meta command", "eof"],
|
||||
));
|
||||
skip_to_next_statement(&mut tokens);
|
||||
}
|
||||
TokenData::EndOfFile => (), // End of parsing
|
||||
}
|
||||
}
|
||||
|
||||
if errs.is_empty() { Ok(cmds) } else { Err(errs) }
|
||||
}
|
||||
|
||||
fn parse_insert_command(
|
||||
tokens: &mut VecDeque<crate::tokens::Token>,
|
||||
) -> Result<Command, CommandParseError> {
|
||||
// According to grammar.ebnf, insert command should be: insert int string string
|
||||
// According to grammar.ebnf, insert command should be: insert int string string semicolon
|
||||
|
||||
// Parse the id (integer)
|
||||
let id_token = tokens.pop_front().ok_or_else(|| {
|
||||
|
|
@ -106,6 +189,9 @@ fn parse_insert_command(
|
|||
_ => return Err(CommandParseError::UnexpectedToken(email_token, &["string"])),
|
||||
};
|
||||
|
||||
// Check for semicolon after the insert command
|
||||
expect_semicolon(tokens)?;
|
||||
|
||||
Ok(Command::Statement(Statement::Insert {
|
||||
id,
|
||||
username,
|
||||
|
|
@ -122,8 +208,8 @@ mod tests {
|
|||
fn test_parse_single_correct() {
|
||||
let file = String::from("<stdin>");
|
||||
assert_debug_snapshot!(parse(file.clone(), String::from(".exit")));
|
||||
assert_debug_snapshot!(parse(file.clone(), String::from("select")));
|
||||
assert_debug_snapshot!(parse(file.clone(), String::from("sElEcT")));
|
||||
assert_debug_snapshot!(parse(file.clone(), String::from("select;")));
|
||||
assert_debug_snapshot!(parse(file.clone(), String::from("sElEcT;")));
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -131,13 +217,23 @@ mod tests {
|
|||
let file = String::from("<stdin>");
|
||||
assert_debug_snapshot!(parse(
|
||||
file.clone(),
|
||||
String::from(r#"insert 1 "username" "email@example.com""#)
|
||||
String::from(r#"insert 1 "username" "email@example.com";"#)
|
||||
));
|
||||
assert_debug_snapshot!(parse(
|
||||
file.clone(),
|
||||
String::from(r#"insert "not_an_id" "username" "email@example.com""#)
|
||||
String::from(r#"insert "not_an_id" "username" "email@example.com";"#)
|
||||
));
|
||||
assert_debug_snapshot!(parse(file.clone(), String::from(r#"insert 1 "username";"#)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_missing_semicolon() {
|
||||
let file = String::from("<stdin>");
|
||||
assert_debug_snapshot!(parse(file.clone(), String::from("select")));
|
||||
assert_debug_snapshot!(parse(
|
||||
file.clone(),
|
||||
String::from(r#"insert 1 "username" "email@example.com""#)
|
||||
));
|
||||
assert_debug_snapshot!(parse(file.clone(), String::from(r#"insert 1 "username""#)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -155,7 +251,16 @@ mod tests {
|
|||
let file = String::from("<stdin>");
|
||||
assert_debug_snapshot!(parse(
|
||||
file.clone(),
|
||||
String::from(".exit select select select")
|
||||
String::from(".exit select; select; select;")
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_multiple_statements_with_insert() {
|
||||
let file = String::from("<stdin>");
|
||||
assert_debug_snapshot!(parse(
|
||||
file.clone(),
|
||||
String::from(r#"select; insert 1 "user" "email@test.com"; select;"#)
|
||||
));
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
---
|
||||
source: src/parser.rs
|
||||
expression: "parse(file.clone(),\nString::from(r#\"insert \"not_an_id\" \"username\" \"email@example.com\"\"#))"
|
||||
expression: "parse(file.clone(),\nString::from(r#\"insert \"not_an_id\" \"username\" \"email@example.com\";\"#))"
|
||||
---
|
||||
Err(
|
||||
[
|
||||
|
|
@ -20,41 +20,5 @@ Err(
|
|||
"integer",
|
||||
],
|
||||
),
|
||||
UnexpectedToken(
|
||||
Token {
|
||||
location: Location {
|
||||
file: "<stdin>",
|
||||
offset: 19,
|
||||
length: 10,
|
||||
},
|
||||
data: String(
|
||||
"username",
|
||||
),
|
||||
lexeme: "\"username\"",
|
||||
},
|
||||
[
|
||||
"statement",
|
||||
"meta command",
|
||||
"eof",
|
||||
],
|
||||
),
|
||||
UnexpectedToken(
|
||||
Token {
|
||||
location: Location {
|
||||
file: "<stdin>",
|
||||
offset: 30,
|
||||
length: 19,
|
||||
},
|
||||
data: String(
|
||||
"email@example.com",
|
||||
),
|
||||
lexeme: "\"email@example.com\"",
|
||||
},
|
||||
[
|
||||
"statement",
|
||||
"meta command",
|
||||
"eof",
|
||||
],
|
||||
),
|
||||
],
|
||||
)
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
---
|
||||
source: src/parser.rs
|
||||
expression: "parse(file.clone(), String::from(r#\"insert 1 \"username\"\"#))"
|
||||
expression: "parse(file.clone(), String::from(r#\"insert 1 \"username\";\"#))"
|
||||
---
|
||||
Err(
|
||||
[
|
||||
|
|
@ -9,10 +9,10 @@ Err(
|
|||
location: Location {
|
||||
file: "<stdin>",
|
||||
offset: 19,
|
||||
length: 0,
|
||||
length: 1,
|
||||
},
|
||||
data: EndOfFile,
|
||||
lexeme: "",
|
||||
data: Semicolon,
|
||||
lexeme: ";",
|
||||
},
|
||||
[
|
||||
"string",
|
||||
|
|
|
|||
|
|
@ -0,0 +1,22 @@
|
|||
---
|
||||
source: src/parser.rs
|
||||
expression: "parse(file.clone(),\nString::from(r#\"insert 1 \"username\" \"email@example.com\"\"#))"
|
||||
---
|
||||
Err(
|
||||
[
|
||||
UnexpectedToken(
|
||||
Token {
|
||||
location: Location {
|
||||
file: "<stdin>",
|
||||
offset: 39,
|
||||
length: 0,
|
||||
},
|
||||
data: EndOfFile,
|
||||
lexeme: "",
|
||||
},
|
||||
[
|
||||
"semicolon",
|
||||
],
|
||||
),
|
||||
],
|
||||
)
|
||||
|
|
@ -0,0 +1,22 @@
|
|||
---
|
||||
source: src/parser.rs
|
||||
expression: "parse(file.clone(), String::from(\"select\"))"
|
||||
---
|
||||
Err(
|
||||
[
|
||||
UnexpectedToken(
|
||||
Token {
|
||||
location: Location {
|
||||
file: "<stdin>",
|
||||
offset: 6,
|
||||
length: 0,
|
||||
},
|
||||
data: EndOfFile,
|
||||
lexeme: "",
|
||||
},
|
||||
[
|
||||
"semicolon",
|
||||
],
|
||||
),
|
||||
],
|
||||
)
|
||||
|
|
@ -0,0 +1,21 @@
|
|||
---
|
||||
source: src/parser.rs
|
||||
expression: "parse(file.clone(),\nString::from(r#\"select; insert 1 \"user\" \"email@test.com\"; select;\"#))"
|
||||
---
|
||||
Ok(
|
||||
[
|
||||
Statement(
|
||||
Select,
|
||||
),
|
||||
Statement(
|
||||
Insert {
|
||||
id: 1,
|
||||
username: "user",
|
||||
email: "email@test.com",
|
||||
},
|
||||
),
|
||||
Statement(
|
||||
Select,
|
||||
),
|
||||
],
|
||||
)
|
||||
|
|
@ -20,7 +20,7 @@ impl Statement {
|
|||
username,
|
||||
email,
|
||||
} => StatementExecuteResult {
|
||||
msg: String::from(format!("insert {id:?} {username:?} {email:?}")),
|
||||
msg: format!("insert {id:?} {username:?} {email:?}"),
|
||||
},
|
||||
Statement::Select => StatementExecuteResult {
|
||||
msg: String::from("select"),
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
use crate::meta_commands::MetaCommand;
|
||||
|
||||
#[derive(Debug, Eq, PartialEq)]
|
||||
#[derive(Debug, Eq, PartialEq, Clone)]
|
||||
pub enum TokenData {
|
||||
Insert,
|
||||
Select,
|
||||
|
|
@ -8,6 +8,7 @@ pub enum TokenData {
|
|||
EndOfFile,
|
||||
Int(i64),
|
||||
String(String),
|
||||
Semicolon,
|
||||
}
|
||||
|
||||
#[derive(Debug, Eq, PartialEq, Clone)]
|
||||
|
|
@ -46,7 +47,7 @@ impl Location {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Eq, PartialEq)]
|
||||
#[derive(Debug, Eq, PartialEq, Clone)]
|
||||
pub struct Token {
|
||||
/// Where in the input was this token found?
|
||||
pub location: Location,
|
||||
|
|
@ -65,6 +66,7 @@ impl std::fmt::Display for Token {
|
|||
TokenData::EndOfFile => write!(f, "end of file"),
|
||||
TokenData::Int(x) => write!(f, "integer {x}"),
|
||||
TokenData::String(x) => write!(f, "string {x:?}"),
|
||||
TokenData::Semicolon => write!(f, "semicolon"),
|
||||
}?;
|
||||
let lexeme = &self.lexeme;
|
||||
write!(f, " {lexeme:?}")
|
||||
|
|
@ -307,6 +309,15 @@ impl Tokenizer {
|
|||
}
|
||||
}
|
||||
|
||||
fn scan_semicolon(&mut self) -> Result<Token, ScanError> {
|
||||
self.advance();
|
||||
Ok(Token {
|
||||
location: self.previous_location(1),
|
||||
data: TokenData::Semicolon,
|
||||
lexeme: String::from(";"),
|
||||
})
|
||||
}
|
||||
|
||||
fn scan_token(&mut self) -> Result<Option<Token>, ScanError> {
|
||||
loop {
|
||||
if let Some(c) = self.peek() {
|
||||
|
|
@ -318,6 +329,8 @@ impl Tokenizer {
|
|||
return self.scan_integer().map(Some);
|
||||
} else if c == '"' {
|
||||
return self.scan_string().map(Some);
|
||||
} else if c == ';' {
|
||||
return self.scan_semicolon().map(Some);
|
||||
} else if c.is_whitespace() {
|
||||
self.advance();
|
||||
} else {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue