feat(tokenizer): string tokenizing
This commit is contained in:
parent
f259b079b7
commit
00e9bc3b60
4 changed files with 72 additions and 11 deletions
17
notes.org
17
notes.org
|
|
@ -212,13 +212,22 @@ i will use rustyline, since it seems like the most feature-complete
|
||||||
|
|
||||||
* DONE remove uneeded error variants
|
* DONE remove uneeded error variants
|
||||||
|
|
||||||
* STRT parse integers
|
* DONE parse integers
|
||||||
|
|
||||||
** TODO Function to get a token until condition is false
|
** DONE Function to get a token until condition is false
|
||||||
|
|
||||||
** TODO Parse the integer
|
** DONE Parse the integer
|
||||||
|
|
||||||
* TODO parse strings
|
* DONE parse strings
|
||||||
|
|
||||||
|
* TODO better error message display for unclosed " in string
|
||||||
|
|
||||||
|
* TODO parse insert statements in the form
|
||||||
|
insert <id:int> <username:string> <email:string>
|
||||||
|
** TODO Row struct
|
||||||
|
** TODO parse row insert
|
||||||
|
** TODO serialize/deserialize row to/from raw bytes
|
||||||
|
*** TODO look for best practices for creating binary formats
|
||||||
|
|
||||||
* WAIT cli tests using insta-cmd
|
* WAIT cli tests using insta-cmd
|
||||||
https://insta.rs/docs/cmd/
|
https://insta.rs/docs/cmd/
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
|
|
||||||
use rustyline::{history::FileHistory, Editor};
|
use rustyline::{Editor, history::FileHistory};
|
||||||
|
|
||||||
fn xdg_state_dir() -> Option<PathBuf> {
|
fn xdg_state_dir() -> Option<PathBuf> {
|
||||||
if let Ok(dir) = std::env::var("XDG_STATE_DIR") {
|
if let Ok(dir) = std::env::var("XDG_STATE_DIR") {
|
||||||
|
|
@ -31,7 +31,9 @@ pub fn history_file() -> Option<std::path::PathBuf> {
|
||||||
Some(state.join("cli_history"))
|
Some(state.join("cli_history"))
|
||||||
} else {
|
} else {
|
||||||
eprintln!("Warning: failed to find or create XDG_STATE_DIR for osdb.");
|
eprintln!("Warning: failed to find or create XDG_STATE_DIR for osdb.");
|
||||||
eprintln!("Warning: either set XDG_STATE_DIR or HOME, and ensure osdb has write permissions to that directory.");
|
eprintln!(
|
||||||
|
"Warning: either set XDG_STATE_DIR or HOME, and ensure osdb has write permissions to that directory."
|
||||||
|
);
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -23,14 +23,14 @@ pub fn parse(file: String, input: String) -> Result<Vec<Command>, Vec<CommandPar
|
||||||
token,
|
token,
|
||||||
&["statement", "meta command", "eof"],
|
&["statement", "meta command", "eof"],
|
||||||
)),
|
)),
|
||||||
|
crate::tokens::TokenData::String(_) => errs.push(CommandParseError::UnexpectedToken(
|
||||||
|
token,
|
||||||
|
&["statement", "meta command", "eof"],
|
||||||
|
)),
|
||||||
crate::tokens::TokenData::EndOfFile => (),
|
crate::tokens::TokenData::EndOfFile => (),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if errs.is_empty() {
|
if errs.is_empty() { Ok(cmds) } else { Err(errs) }
|
||||||
Ok(cmds)
|
|
||||||
} else {
|
|
||||||
Err(errs)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,7 @@ pub enum TokenData {
|
||||||
MetaCommand(MetaCommand),
|
MetaCommand(MetaCommand),
|
||||||
EndOfFile,
|
EndOfFile,
|
||||||
Int(i64),
|
Int(i64),
|
||||||
|
String(String),
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Eq, PartialEq)]
|
#[derive(Debug, Eq, PartialEq)]
|
||||||
|
|
@ -63,6 +64,7 @@ impl std::fmt::Display for Token {
|
||||||
TokenData::MetaCommand(x) => write!(f, "meta-command {x}"),
|
TokenData::MetaCommand(x) => write!(f, "meta-command {x}"),
|
||||||
TokenData::EndOfFile => write!(f, "end of file"),
|
TokenData::EndOfFile => write!(f, "end of file"),
|
||||||
TokenData::Int(x) => write!(f, "integer {x}"),
|
TokenData::Int(x) => write!(f, "integer {x}"),
|
||||||
|
TokenData::String(x) => write!(f, "string {x:?}"),
|
||||||
}?;
|
}?;
|
||||||
let lexeme = &self.lexeme;
|
let lexeme = &self.lexeme;
|
||||||
write!(f, " {lexeme:?}")
|
write!(f, " {lexeme:?}")
|
||||||
|
|
@ -83,6 +85,7 @@ pub enum ScanErrorKind {
|
||||||
UnknownKeyword(String),
|
UnknownKeyword(String),
|
||||||
UnknownMetaCommand(String),
|
UnknownMetaCommand(String),
|
||||||
ParseIntError(std::num::ParseIntError),
|
ParseIntError(std::num::ParseIntError),
|
||||||
|
UnexpectedEndOfInputWhileLookingForMatching(char, Location),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl std::fmt::Display for ScanErrorKind {
|
impl std::fmt::Display for ScanErrorKind {
|
||||||
|
|
@ -93,6 +96,10 @@ impl std::fmt::Display for ScanErrorKind {
|
||||||
ScanErrorKind::UnknownKeyword(x) => write!(f, "unknown keyword: {x:?}"),
|
ScanErrorKind::UnknownKeyword(x) => write!(f, "unknown keyword: {x:?}"),
|
||||||
ScanErrorKind::UnknownMetaCommand(x) => write!(f, "unknown meta-command: {x:?}"),
|
ScanErrorKind::UnknownMetaCommand(x) => write!(f, "unknown meta-command: {x:?}"),
|
||||||
ScanErrorKind::ParseIntError(x) => write!(f, "failed to parse integer: {x}"),
|
ScanErrorKind::ParseIntError(x) => write!(f, "failed to parse integer: {x}"),
|
||||||
|
ScanErrorKind::UnexpectedEndOfInputWhileLookingForMatching(c, _) => write!(
|
||||||
|
f,
|
||||||
|
"unexpected end of input while looking for matching {c:?}"
|
||||||
|
),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -124,6 +131,10 @@ impl Tokenizer {
|
||||||
Location::new(self.file.clone(), self.offset, length)
|
Location::new(self.file.clone(), self.offset, length)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn previous_location(&self, length: usize) -> Location {
|
||||||
|
Location::new(self.file.clone(), self.offset - 1, length)
|
||||||
|
}
|
||||||
|
|
||||||
fn is_at_end(&self) -> bool {
|
fn is_at_end(&self) -> bool {
|
||||||
self.offset >= self.input.len()
|
self.offset >= self.input.len()
|
||||||
}
|
}
|
||||||
|
|
@ -257,6 +268,43 @@ impl Tokenizer {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn scan_string(&mut self) -> Result<Token, ScanError> {
|
||||||
|
let start_offset = self.offset;
|
||||||
|
let mut word = String::new();
|
||||||
|
let mut lexeme = String::new();
|
||||||
|
let mut length = 0;
|
||||||
|
let mut valid = false;
|
||||||
|
if let Some(c) = self.advance() {
|
||||||
|
lexeme.push(c);
|
||||||
|
length += 1;
|
||||||
|
}
|
||||||
|
while let Some(c) = self.advance() {
|
||||||
|
lexeme.push(c);
|
||||||
|
length += 1;
|
||||||
|
if c == '"' {
|
||||||
|
valid = true;
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
word.push(c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if valid {
|
||||||
|
Ok(Token {
|
||||||
|
location: Location::new(self.file.clone(), start_offset, length),
|
||||||
|
data: TokenData::String(word),
|
||||||
|
lexeme,
|
||||||
|
})
|
||||||
|
} else {
|
||||||
|
Err(ScanError {
|
||||||
|
location: self.previous_location(0),
|
||||||
|
kind: ScanErrorKind::UnexpectedEndOfInputWhileLookingForMatching(
|
||||||
|
'"',
|
||||||
|
Location::new(self.file.clone(), start_offset, 1),
|
||||||
|
),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn scan_token(&mut self) -> Result<Option<Token>, ScanError> {
|
fn scan_token(&mut self) -> Result<Option<Token>, ScanError> {
|
||||||
loop {
|
loop {
|
||||||
if let Some(c) = self.peek() {
|
if let Some(c) = self.peek() {
|
||||||
|
|
@ -266,6 +314,8 @@ impl Tokenizer {
|
||||||
return self.scan_meta_command().map(Some);
|
return self.scan_meta_command().map(Some);
|
||||||
} else if Self::digit(c) {
|
} else if Self::digit(c) {
|
||||||
return self.scan_integer().map(Some);
|
return self.scan_integer().map(Some);
|
||||||
|
} else if c == '"' {
|
||||||
|
return self.scan_string().map(Some);
|
||||||
} else if c.is_whitespace() {
|
} else if c.is_whitespace() {
|
||||||
self.advance();
|
self.advance();
|
||||||
} else {
|
} else {
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue