Compare commits

...

3 commits

16 changed files with 396 additions and 33 deletions

View file

@ -212,7 +212,24 @@ i will use rustyline, since it seems like the most feature-complete
* DONE remove uneeded error variants * DONE remove uneeded error variants
* TODO cli tests using insta-cmd * DONE parse integers
** DONE Function to get a token until condition is false
** DONE Parse the integer
* DONE parse strings
* TODO better error message display for unclosed " in string
* TODO parse insert statements in the form
insert <id:int> <username:string> <email:string>
** TODO Row struct
** TODO parse row insert
** TODO serialize/deserialize row to/from raw bytes
*** TODO look for best practices for creating binary formats
* WAIT cli tests using insta-cmd
https://insta.rs/docs/cmd/ https://insta.rs/docs/cmd/
* WAIT autocompletion * WAIT autocompletion

View file

@ -1,6 +1,6 @@
use std::path::PathBuf; use std::path::PathBuf;
use rustyline::{history::FileHistory, Editor}; use rustyline::{Editor, history::FileHistory};
fn xdg_state_dir() -> Option<PathBuf> { fn xdg_state_dir() -> Option<PathBuf> {
if let Ok(dir) = std::env::var("XDG_STATE_DIR") { if let Ok(dir) = std::env::var("XDG_STATE_DIR") {
@ -31,7 +31,9 @@ pub fn history_file() -> Option<std::path::PathBuf> {
Some(state.join("cli_history")) Some(state.join("cli_history"))
} else { } else {
eprintln!("Warning: failed to find or create XDG_STATE_DIR for osdb."); eprintln!("Warning: failed to find or create XDG_STATE_DIR for osdb.");
eprintln!("Warning: either set XDG_STATE_DIR or HOME, and ensure osdb has write permissions to that directory."); eprintln!(
"Warning: either set XDG_STATE_DIR or HOME, and ensure osdb has write permissions to that directory."
);
None None
} }
} }

View file

@ -1,6 +1,6 @@
use crate::meta_commands::{MetaCommand, MetaCommandExecuteResult}; use crate::meta_commands::{MetaCommand, MetaCommandExecuteResult};
use crate::statements::{Statement, StatementExecuteResult}; use crate::statements::{Statement, StatementExecuteResult};
use crate::tokens::ScanError; use crate::tokens::{ScanError, Token};
#[derive(Debug)] #[derive(Debug)]
pub enum Command { pub enum Command {
@ -50,6 +50,7 @@ impl Command {
#[derive(Debug)] #[derive(Debug)]
pub enum CommandParseError { pub enum CommandParseError {
Scan(ScanError), Scan(ScanError),
UnexpectedToken(Token, &'static [&'static str]),
} }
impl From<MetaCommand> for Command { impl From<MetaCommand> for Command {

View file

@ -7,8 +7,25 @@ pub trait OSDBError {
impl OSDBError for CommandParseError { impl OSDBError for CommandParseError {
fn display(&self, file: &str, input: &str) { fn display(&self, file: &str, input: &str) {
let CommandParseError::Scan(x) = self; match self {
x.display(file, input); CommandParseError::Scan(x) => {
x.display(file, input);
}
CommandParseError::UnexpectedToken(token, items) => {
let location = (file, Into::<std::ops::Range<usize>>::into(&token.location));
Report::build(ReportKind::Error, location.clone())
.with_message("unexpected token")
.with_label(
Label::new(location.clone())
.with_color(Color::Red)
.with_message(format!("found {token}")),
)
.with_note(format!("expected token type to be one of {items:?}"))
.finish()
.print((file, Source::from(input)))
.unwrap()
}
}
} }
} }
@ -22,7 +39,6 @@ impl OSDBError for ScanError {
.with_color(Color::Red) .with_color(Color::Red)
.with_message(format!("{self}")), .with_message(format!("{self}")),
) )
.with_help("Make sure you don't have any typos or unexpected characters.")
.finish() .finish()
.print((file, Source::from(input))) .print((file, Source::from(input)))
.unwrap(); .unwrap();

View file

@ -3,6 +3,14 @@ pub enum MetaCommand {
Exit, Exit,
} }
impl std::fmt::Display for MetaCommand {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
MetaCommand::Exit => write!(f, "exit"),
}
}
}
pub struct MetaCommandExecuteResult { pub struct MetaCommandExecuteResult {
pub should_exit: bool, pub should_exit: bool,
} }

View file

@ -11,7 +11,7 @@ pub fn parse(file: String, input: String) -> Result<Vec<Command>, Vec<CommandPar
.map_err(|x| x.into_iter().map(|x| x.into()).collect::<Vec<_>>())? .map_err(|x| x.into_iter().map(|x| x.into()).collect::<Vec<_>>())?
.into(); .into();
let mut cmds = Vec::new(); let mut cmds = Vec::new();
let errs = Vec::new(); let mut errs = Vec::new();
while let Some(token) = tokens.pop_front() { while let Some(token) = tokens.pop_front() {
match token.data { match token.data {
crate::tokens::TokenData::Insert => cmds.push(Command::Statement(Statement::Insert)), crate::tokens::TokenData::Insert => cmds.push(Command::Statement(Statement::Insert)),
@ -19,6 +19,14 @@ pub fn parse(file: String, input: String) -> Result<Vec<Command>, Vec<CommandPar
crate::tokens::TokenData::MetaCommand(meta_command) => { crate::tokens::TokenData::MetaCommand(meta_command) => {
cmds.push(Command::MetaCommand(meta_command)) cmds.push(Command::MetaCommand(meta_command))
} }
crate::tokens::TokenData::Int(_) => errs.push(CommandParseError::UnexpectedToken(
token,
&["statement", "meta command", "eof"],
)),
crate::tokens::TokenData::String(_) => errs.push(CommandParseError::UnexpectedToken(
token,
&["statement", "meta command", "eof"],
)),
crate::tokens::TokenData::EndOfFile => (), crate::tokens::TokenData::EndOfFile => (),
} }
} }

View file

@ -1,26 +1,30 @@
--- ---
source: src/tokens.rs source: src/tokens.rs
expression: scanerrors expression: "tokenize(\"salact +\".to_string(), \"src/statement.sql\".to_string())"
--- ---
[ Err(
ScanError { [
location: Location { ScanError {
file: "src/statement.sql", location: Location {
offset: 0, file: "src/statement.sql",
length: 6, offset: 0,
length: 6,
},
kind: UnknownKeyword(
"salact",
),
}, },
kind: UnknownKeyword( ScanError {
"salact", location: Location {
), file: "src/statement.sql",
}, offset: 7,
ScanError { length: 1,
location: Location { },
file: "src/statement.sql", kind: ParseIntError(
offset: 7, ParseIntError {
length: 1, kind: InvalidDigit,
},
),
}, },
kind: UnexpectedChar( ],
'+', )
),
},
]

View file

@ -0,0 +1,28 @@
---
source: src/tokens.rs
expression: "tokenize(\"-10\".to_string(), \"src/ints.sql\".to_string(),)"
---
Ok(
[
Token {
location: Location {
file: "src/ints.sql",
offset: 0,
length: 3,
},
data: Int(
-10,
),
lexeme: "-10",
},
Token {
location: Location {
file: "src/ints.sql",
offset: 3,
length: 0,
},
data: EndOfFile,
lexeme: "",
},
],
)

View file

@ -0,0 +1,28 @@
---
source: src/tokens.rs
expression: "tokenize(\"0\".to_string(), \"src/ints.sql\".to_string(),)"
---
Ok(
[
Token {
location: Location {
file: "src/ints.sql",
offset: 0,
length: 1,
},
data: Int(
0,
),
lexeme: "0",
},
Token {
location: Location {
file: "src/ints.sql",
offset: 1,
length: 0,
},
data: EndOfFile,
lexeme: "",
},
],
)

View file

@ -0,0 +1,28 @@
---
source: src/tokens.rs
expression: "tokenize(\"-0\".to_string(), \"src/ints.sql\".to_string(),)"
---
Ok(
[
Token {
location: Location {
file: "src/ints.sql",
offset: 0,
length: 2,
},
data: Int(
0,
),
lexeme: "-0",
},
Token {
location: Location {
file: "src/ints.sql",
offset: 2,
length: 0,
},
data: EndOfFile,
lexeme: "",
},
],
)

View file

@ -0,0 +1,20 @@
---
source: src/tokens.rs
expression: "tokenize(\"--0\".to_string(), \"src/ints.sql\".to_string(),)"
---
Err(
[
ScanError {
location: Location {
file: "src/ints.sql",
offset: 0,
length: 3,
},
kind: ParseIntError(
ParseIntError {
kind: InvalidDigit,
},
),
},
],
)

View file

@ -0,0 +1,20 @@
---
source: src/tokens.rs
expression: "tokenize(\"++0\".to_string(), \"src/ints.sql\".to_string(),)"
---
Err(
[
ScanError {
location: Location {
file: "src/ints.sql",
offset: 0,
length: 3,
},
kind: ParseIntError(
ParseIntError {
kind: InvalidDigit,
},
),
},
],
)

View file

@ -0,0 +1,20 @@
---
source: src/tokens.rs
expression: "tokenize(\"-\".to_string(), \"src/ints.sql\".to_string(),)"
---
Err(
[
ScanError {
location: Location {
file: "src/ints.sql",
offset: 0,
length: 1,
},
kind: ParseIntError(
ParseIntError {
kind: InvalidDigit,
},
),
},
],
)

View file

@ -0,0 +1,20 @@
---
source: src/tokens.rs
expression: "tokenize(\"+\".to_string(), \"src/ints.sql\".to_string(),)"
---
Err(
[
ScanError {
location: Location {
file: "src/ints.sql",
offset: 0,
length: 1,
},
kind: ParseIntError(
ParseIntError {
kind: InvalidDigit,
},
),
},
],
)

View file

@ -0,0 +1,28 @@
---
source: src/tokens.rs
expression: "tokenize(\"10\".to_string(), \"src/ints.sql\".to_string(),)"
---
Ok(
[
Token {
location: Location {
file: "src/ints.sql",
offset: 0,
length: 2,
},
data: Int(
10,
),
lexeme: "10",
},
Token {
location: Location {
file: "src/ints.sql",
offset: 2,
length: 0,
},
data: EndOfFile,
lexeme: "",
},
],
)

View file

@ -6,6 +6,8 @@ pub enum TokenData {
Select, Select,
MetaCommand(MetaCommand), MetaCommand(MetaCommand),
EndOfFile, EndOfFile,
Int(i64),
String(String),
} }
#[derive(Debug, Eq, PartialEq)] #[derive(Debug, Eq, PartialEq)]
@ -54,6 +56,21 @@ pub struct Token {
pub lexeme: String, pub lexeme: String,
} }
impl std::fmt::Display for Token {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match &self.data {
TokenData::Insert => write!(f, "insert statement"),
TokenData::Select => write!(f, "select statement"),
TokenData::MetaCommand(x) => write!(f, "meta-command {x}"),
TokenData::EndOfFile => write!(f, "end of file"),
TokenData::Int(x) => write!(f, "integer {x}"),
TokenData::String(x) => write!(f, "string {x:?}"),
}?;
let lexeme = &self.lexeme;
write!(f, " {lexeme:?}")
}
}
struct Tokenizer { struct Tokenizer {
input: String, input: String,
file: String, file: String,
@ -67,6 +84,8 @@ pub enum ScanErrorKind {
UnexpectedEndOfInput, UnexpectedEndOfInput,
UnknownKeyword(String), UnknownKeyword(String),
UnknownMetaCommand(String), UnknownMetaCommand(String),
ParseIntError(std::num::ParseIntError),
UnexpectedEndOfInputWhileLookingForMatching(char, Location),
} }
impl std::fmt::Display for ScanErrorKind { impl std::fmt::Display for ScanErrorKind {
@ -76,6 +95,11 @@ impl std::fmt::Display for ScanErrorKind {
ScanErrorKind::UnexpectedEndOfInput => write!(f, "unexpected end of input"), ScanErrorKind::UnexpectedEndOfInput => write!(f, "unexpected end of input"),
ScanErrorKind::UnknownKeyword(x) => write!(f, "unknown keyword: {x:?}"), ScanErrorKind::UnknownKeyword(x) => write!(f, "unknown keyword: {x:?}"),
ScanErrorKind::UnknownMetaCommand(x) => write!(f, "unknown meta-command: {x:?}"), ScanErrorKind::UnknownMetaCommand(x) => write!(f, "unknown meta-command: {x:?}"),
ScanErrorKind::ParseIntError(x) => write!(f, "failed to parse integer: {x}"),
ScanErrorKind::UnexpectedEndOfInputWhileLookingForMatching(c, _) => write!(
f,
"unexpected end of input while looking for matching {c:?}"
),
} }
} }
} }
@ -107,6 +131,10 @@ impl Tokenizer {
Location::new(self.file.clone(), self.offset, length) Location::new(self.file.clone(), self.offset, length)
} }
fn previous_location(&self, length: usize) -> Location {
Location::new(self.file.clone(), self.offset - 1, length)
}
fn is_at_end(&self) -> bool { fn is_at_end(&self) -> bool {
self.offset >= self.input.len() self.offset >= self.input.len()
} }
@ -206,6 +234,77 @@ impl Tokenizer {
c.is_alphanumeric() || c == '_' c.is_alphanumeric() || c == '_'
} }
fn digit(c: char) -> bool {
c.is_ascii_digit() || c == '-' || c == '+'
}
fn scan_integer(&mut self) -> Result<Token, ScanError> {
let start_offset = self.offset;
let mut word = String::new();
let mut length = 0;
if let Some(c) = self.advance() {
word.push(c);
length += 1;
}
while let Some(c) = self.peek() {
if Self::digit(c) {
word.push(c);
self.advance();
} else {
break;
}
length += 1;
}
match word.parse::<i64>() {
Ok(int) => Ok(Token {
location: Location::new(self.file.clone(), start_offset, length),
data: TokenData::Int(int),
lexeme: word,
}),
Err(e) => Err(ScanError {
location: Location::new(self.file.clone(), start_offset, length),
kind: ScanErrorKind::ParseIntError(e),
}),
}
}
fn scan_string(&mut self) -> Result<Token, ScanError> {
let start_offset = self.offset;
let mut word = String::new();
let mut lexeme = String::new();
let mut length = 0;
let mut valid = false;
if let Some(c) = self.advance() {
lexeme.push(c);
length += 1;
}
while let Some(c) = self.advance() {
lexeme.push(c);
length += 1;
if c == '"' {
valid = true;
break;
} else {
word.push(c);
}
}
if valid {
Ok(Token {
location: Location::new(self.file.clone(), start_offset, length),
data: TokenData::String(word),
lexeme,
})
} else {
Err(ScanError {
location: self.previous_location(0),
kind: ScanErrorKind::UnexpectedEndOfInputWhileLookingForMatching(
'"',
Location::new(self.file.clone(), start_offset, 1),
),
})
}
}
fn scan_token(&mut self) -> Result<Option<Token>, ScanError> { fn scan_token(&mut self) -> Result<Option<Token>, ScanError> {
loop { loop {
if let Some(c) = self.peek() { if let Some(c) = self.peek() {
@ -213,6 +312,10 @@ impl Tokenizer {
return self.scan_identifier_or_keyword().map(Some); return self.scan_identifier_or_keyword().map(Some);
} else if c == '.' { } else if c == '.' {
return self.scan_meta_command().map(Some); return self.scan_meta_command().map(Some);
} else if Self::digit(c) {
return self.scan_integer().map(Some);
} else if c == '"' {
return self.scan_string().map(Some);
} else if c.is_whitespace() { } else if c.is_whitespace() {
self.advance(); self.advance();
} else { } else {
@ -307,9 +410,21 @@ mod tests {
#[test] #[test]
fn test_tokenizer_errors() { fn test_tokenizer_errors() {
let scanerrors = tokenize("salact +".to_string(), "src/statement.sql".to_string()) assert_debug_snapshot!(tokenize(
.err() "salact +".to_string(),
.unwrap(); "src/statement.sql".to_string()
assert_debug_snapshot!(scanerrors); ));
}
#[test]
fn test_tokenizer_integers() {
assert_debug_snapshot!(tokenize("10".to_string(), "src/ints.sql".to_string(),));
assert_debug_snapshot!(tokenize("-10".to_string(), "src/ints.sql".to_string(),));
assert_debug_snapshot!(tokenize("0".to_string(), "src/ints.sql".to_string(),));
assert_debug_snapshot!(tokenize("-0".to_string(), "src/ints.sql".to_string(),));
assert_debug_snapshot!(tokenize("--0".to_string(), "src/ints.sql".to_string(),));
assert_debug_snapshot!(tokenize("++0".to_string(), "src/ints.sql".to_string(),));
assert_debug_snapshot!(tokenize("-".to_string(), "src/ints.sql".to_string(),));
assert_debug_snapshot!(tokenize("+".to_string(), "src/ints.sql".to_string(),));
} }
} }