Compare commits

...

3 commits

16 changed files with 396 additions and 33 deletions

View file

@ -212,7 +212,24 @@ i will use rustyline, since it seems like the most feature-complete
* DONE remove uneeded error variants
* TODO cli tests using insta-cmd
* DONE parse integers
** DONE Function to get a token until condition is false
** DONE Parse the integer
* DONE parse strings
* TODO better error message display for unclosed " in string
* TODO parse insert statements in the form
insert <id:int> <username:string> <email:string>
** TODO Row struct
** TODO parse row insert
** TODO serialize/deserialize row to/from raw bytes
*** TODO look for best practices for creating binary formats
* WAIT cli tests using insta-cmd
https://insta.rs/docs/cmd/
* WAIT autocompletion

View file

@ -1,6 +1,6 @@
use std::path::PathBuf;
use rustyline::{history::FileHistory, Editor};
use rustyline::{Editor, history::FileHistory};
fn xdg_state_dir() -> Option<PathBuf> {
if let Ok(dir) = std::env::var("XDG_STATE_DIR") {
@ -31,7 +31,9 @@ pub fn history_file() -> Option<std::path::PathBuf> {
Some(state.join("cli_history"))
} else {
eprintln!("Warning: failed to find or create XDG_STATE_DIR for osdb.");
eprintln!("Warning: either set XDG_STATE_DIR or HOME, and ensure osdb has write permissions to that directory.");
eprintln!(
"Warning: either set XDG_STATE_DIR or HOME, and ensure osdb has write permissions to that directory."
);
None
}
}

View file

@ -1,6 +1,6 @@
use crate::meta_commands::{MetaCommand, MetaCommandExecuteResult};
use crate::statements::{Statement, StatementExecuteResult};
use crate::tokens::ScanError;
use crate::tokens::{ScanError, Token};
#[derive(Debug)]
pub enum Command {
@ -50,6 +50,7 @@ impl Command {
#[derive(Debug)]
pub enum CommandParseError {
Scan(ScanError),
UnexpectedToken(Token, &'static [&'static str]),
}
impl From<MetaCommand> for Command {

View file

@ -7,9 +7,26 @@ pub trait OSDBError {
impl OSDBError for CommandParseError {
fn display(&self, file: &str, input: &str) {
let CommandParseError::Scan(x) = self;
match self {
CommandParseError::Scan(x) => {
x.display(file, input);
}
CommandParseError::UnexpectedToken(token, items) => {
let location = (file, Into::<std::ops::Range<usize>>::into(&token.location));
Report::build(ReportKind::Error, location.clone())
.with_message("unexpected token")
.with_label(
Label::new(location.clone())
.with_color(Color::Red)
.with_message(format!("found {token}")),
)
.with_note(format!("expected token type to be one of {items:?}"))
.finish()
.print((file, Source::from(input)))
.unwrap()
}
}
}
}
impl OSDBError for ScanError {
@ -22,7 +39,6 @@ impl OSDBError for ScanError {
.with_color(Color::Red)
.with_message(format!("{self}")),
)
.with_help("Make sure you don't have any typos or unexpected characters.")
.finish()
.print((file, Source::from(input)))
.unwrap();

View file

@ -3,6 +3,14 @@ pub enum MetaCommand {
Exit,
}
impl std::fmt::Display for MetaCommand {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
MetaCommand::Exit => write!(f, "exit"),
}
}
}
pub struct MetaCommandExecuteResult {
pub should_exit: bool,
}

View file

@ -11,7 +11,7 @@ pub fn parse(file: String, input: String) -> Result<Vec<Command>, Vec<CommandPar
.map_err(|x| x.into_iter().map(|x| x.into()).collect::<Vec<_>>())?
.into();
let mut cmds = Vec::new();
let errs = Vec::new();
let mut errs = Vec::new();
while let Some(token) = tokens.pop_front() {
match token.data {
crate::tokens::TokenData::Insert => cmds.push(Command::Statement(Statement::Insert)),
@ -19,6 +19,14 @@ pub fn parse(file: String, input: String) -> Result<Vec<Command>, Vec<CommandPar
crate::tokens::TokenData::MetaCommand(meta_command) => {
cmds.push(Command::MetaCommand(meta_command))
}
crate::tokens::TokenData::Int(_) => errs.push(CommandParseError::UnexpectedToken(
token,
&["statement", "meta command", "eof"],
)),
crate::tokens::TokenData::String(_) => errs.push(CommandParseError::UnexpectedToken(
token,
&["statement", "meta command", "eof"],
)),
crate::tokens::TokenData::EndOfFile => (),
}
}

View file

@ -1,8 +1,9 @@
---
source: src/tokens.rs
expression: scanerrors
expression: "tokenize(\"salact +\".to_string(), \"src/statement.sql\".to_string())"
---
[
Err(
[
ScanError {
location: Location {
file: "src/statement.sql",
@ -19,8 +20,11 @@ expression: scanerrors
offset: 7,
length: 1,
},
kind: UnexpectedChar(
'+',
kind: ParseIntError(
ParseIntError {
kind: InvalidDigit,
},
),
},
]
],
)

View file

@ -0,0 +1,28 @@
---
source: src/tokens.rs
expression: "tokenize(\"-10\".to_string(), \"src/ints.sql\".to_string(),)"
---
Ok(
[
Token {
location: Location {
file: "src/ints.sql",
offset: 0,
length: 3,
},
data: Int(
-10,
),
lexeme: "-10",
},
Token {
location: Location {
file: "src/ints.sql",
offset: 3,
length: 0,
},
data: EndOfFile,
lexeme: "",
},
],
)

View file

@ -0,0 +1,28 @@
---
source: src/tokens.rs
expression: "tokenize(\"0\".to_string(), \"src/ints.sql\".to_string(),)"
---
Ok(
[
Token {
location: Location {
file: "src/ints.sql",
offset: 0,
length: 1,
},
data: Int(
0,
),
lexeme: "0",
},
Token {
location: Location {
file: "src/ints.sql",
offset: 1,
length: 0,
},
data: EndOfFile,
lexeme: "",
},
],
)

View file

@ -0,0 +1,28 @@
---
source: src/tokens.rs
expression: "tokenize(\"-0\".to_string(), \"src/ints.sql\".to_string(),)"
---
Ok(
[
Token {
location: Location {
file: "src/ints.sql",
offset: 0,
length: 2,
},
data: Int(
0,
),
lexeme: "-0",
},
Token {
location: Location {
file: "src/ints.sql",
offset: 2,
length: 0,
},
data: EndOfFile,
lexeme: "",
},
],
)

View file

@ -0,0 +1,20 @@
---
source: src/tokens.rs
expression: "tokenize(\"--0\".to_string(), \"src/ints.sql\".to_string(),)"
---
Err(
[
ScanError {
location: Location {
file: "src/ints.sql",
offset: 0,
length: 3,
},
kind: ParseIntError(
ParseIntError {
kind: InvalidDigit,
},
),
},
],
)

View file

@ -0,0 +1,20 @@
---
source: src/tokens.rs
expression: "tokenize(\"++0\".to_string(), \"src/ints.sql\".to_string(),)"
---
Err(
[
ScanError {
location: Location {
file: "src/ints.sql",
offset: 0,
length: 3,
},
kind: ParseIntError(
ParseIntError {
kind: InvalidDigit,
},
),
},
],
)

View file

@ -0,0 +1,20 @@
---
source: src/tokens.rs
expression: "tokenize(\"-\".to_string(), \"src/ints.sql\".to_string(),)"
---
Err(
[
ScanError {
location: Location {
file: "src/ints.sql",
offset: 0,
length: 1,
},
kind: ParseIntError(
ParseIntError {
kind: InvalidDigit,
},
),
},
],
)

View file

@ -0,0 +1,20 @@
---
source: src/tokens.rs
expression: "tokenize(\"+\".to_string(), \"src/ints.sql\".to_string(),)"
---
Err(
[
ScanError {
location: Location {
file: "src/ints.sql",
offset: 0,
length: 1,
},
kind: ParseIntError(
ParseIntError {
kind: InvalidDigit,
},
),
},
],
)

View file

@ -0,0 +1,28 @@
---
source: src/tokens.rs
expression: "tokenize(\"10\".to_string(), \"src/ints.sql\".to_string(),)"
---
Ok(
[
Token {
location: Location {
file: "src/ints.sql",
offset: 0,
length: 2,
},
data: Int(
10,
),
lexeme: "10",
},
Token {
location: Location {
file: "src/ints.sql",
offset: 2,
length: 0,
},
data: EndOfFile,
lexeme: "",
},
],
)

View file

@ -6,6 +6,8 @@ pub enum TokenData {
Select,
MetaCommand(MetaCommand),
EndOfFile,
Int(i64),
String(String),
}
#[derive(Debug, Eq, PartialEq)]
@ -54,6 +56,21 @@ pub struct Token {
pub lexeme: String,
}
impl std::fmt::Display for Token {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match &self.data {
TokenData::Insert => write!(f, "insert statement"),
TokenData::Select => write!(f, "select statement"),
TokenData::MetaCommand(x) => write!(f, "meta-command {x}"),
TokenData::EndOfFile => write!(f, "end of file"),
TokenData::Int(x) => write!(f, "integer {x}"),
TokenData::String(x) => write!(f, "string {x:?}"),
}?;
let lexeme = &self.lexeme;
write!(f, " {lexeme:?}")
}
}
struct Tokenizer {
input: String,
file: String,
@ -67,6 +84,8 @@ pub enum ScanErrorKind {
UnexpectedEndOfInput,
UnknownKeyword(String),
UnknownMetaCommand(String),
ParseIntError(std::num::ParseIntError),
UnexpectedEndOfInputWhileLookingForMatching(char, Location),
}
impl std::fmt::Display for ScanErrorKind {
@ -76,6 +95,11 @@ impl std::fmt::Display for ScanErrorKind {
ScanErrorKind::UnexpectedEndOfInput => write!(f, "unexpected end of input"),
ScanErrorKind::UnknownKeyword(x) => write!(f, "unknown keyword: {x:?}"),
ScanErrorKind::UnknownMetaCommand(x) => write!(f, "unknown meta-command: {x:?}"),
ScanErrorKind::ParseIntError(x) => write!(f, "failed to parse integer: {x}"),
ScanErrorKind::UnexpectedEndOfInputWhileLookingForMatching(c, _) => write!(
f,
"unexpected end of input while looking for matching {c:?}"
),
}
}
}
@ -107,6 +131,10 @@ impl Tokenizer {
Location::new(self.file.clone(), self.offset, length)
}
fn previous_location(&self, length: usize) -> Location {
Location::new(self.file.clone(), self.offset - 1, length)
}
fn is_at_end(&self) -> bool {
self.offset >= self.input.len()
}
@ -206,6 +234,77 @@ impl Tokenizer {
c.is_alphanumeric() || c == '_'
}
fn digit(c: char) -> bool {
c.is_ascii_digit() || c == '-' || c == '+'
}
fn scan_integer(&mut self) -> Result<Token, ScanError> {
let start_offset = self.offset;
let mut word = String::new();
let mut length = 0;
if let Some(c) = self.advance() {
word.push(c);
length += 1;
}
while let Some(c) = self.peek() {
if Self::digit(c) {
word.push(c);
self.advance();
} else {
break;
}
length += 1;
}
match word.parse::<i64>() {
Ok(int) => Ok(Token {
location: Location::new(self.file.clone(), start_offset, length),
data: TokenData::Int(int),
lexeme: word,
}),
Err(e) => Err(ScanError {
location: Location::new(self.file.clone(), start_offset, length),
kind: ScanErrorKind::ParseIntError(e),
}),
}
}
fn scan_string(&mut self) -> Result<Token, ScanError> {
let start_offset = self.offset;
let mut word = String::new();
let mut lexeme = String::new();
let mut length = 0;
let mut valid = false;
if let Some(c) = self.advance() {
lexeme.push(c);
length += 1;
}
while let Some(c) = self.advance() {
lexeme.push(c);
length += 1;
if c == '"' {
valid = true;
break;
} else {
word.push(c);
}
}
if valid {
Ok(Token {
location: Location::new(self.file.clone(), start_offset, length),
data: TokenData::String(word),
lexeme,
})
} else {
Err(ScanError {
location: self.previous_location(0),
kind: ScanErrorKind::UnexpectedEndOfInputWhileLookingForMatching(
'"',
Location::new(self.file.clone(), start_offset, 1),
),
})
}
}
fn scan_token(&mut self) -> Result<Option<Token>, ScanError> {
loop {
if let Some(c) = self.peek() {
@ -213,6 +312,10 @@ impl Tokenizer {
return self.scan_identifier_or_keyword().map(Some);
} else if c == '.' {
return self.scan_meta_command().map(Some);
} else if Self::digit(c) {
return self.scan_integer().map(Some);
} else if c == '"' {
return self.scan_string().map(Some);
} else if c.is_whitespace() {
self.advance();
} else {
@ -307,9 +410,21 @@ mod tests {
#[test]
fn test_tokenizer_errors() {
let scanerrors = tokenize("salact +".to_string(), "src/statement.sql".to_string())
.err()
.unwrap();
assert_debug_snapshot!(scanerrors);
assert_debug_snapshot!(tokenize(
"salact +".to_string(),
"src/statement.sql".to_string()
));
}
#[test]
fn test_tokenizer_integers() {
assert_debug_snapshot!(tokenize("10".to_string(), "src/ints.sql".to_string(),));
assert_debug_snapshot!(tokenize("-10".to_string(), "src/ints.sql".to_string(),));
assert_debug_snapshot!(tokenize("0".to_string(), "src/ints.sql".to_string(),));
assert_debug_snapshot!(tokenize("-0".to_string(), "src/ints.sql".to_string(),));
assert_debug_snapshot!(tokenize("--0".to_string(), "src/ints.sql".to_string(),));
assert_debug_snapshot!(tokenize("++0".to_string(), "src/ints.sql".to_string(),));
assert_debug_snapshot!(tokenize("-".to_string(), "src/ints.sql".to_string(),));
assert_debug_snapshot!(tokenize("+".to_string(), "src/ints.sql".to_string(),));
}
}