feat(tokenizer): string tokenizing

This commit is contained in:
Khaïs COLIN 2025-05-24 13:28:56 +02:00
parent f259b079b7
commit 00e9bc3b60
Signed by: logistic-bot
SSH key fingerprint: SHA256:3zI3/tx0ZpCLHCLPmEaGR4oeYCPMCzQxXhXutBmtOAU
4 changed files with 72 additions and 11 deletions

View file

@ -212,13 +212,22 @@ i will use rustyline, since it seems like the most feature-complete
* DONE remove uneeded error variants * DONE remove uneeded error variants
* STRT parse integers * DONE parse integers
** TODO Function to get a token until condition is false ** DONE Function to get a token until condition is false
** TODO Parse the integer ** DONE Parse the integer
* TODO parse strings * DONE parse strings
* TODO better error message display for unclosed " in string
* TODO parse insert statements in the form
insert <id:int> <username:string> <email:string>
** TODO Row struct
** TODO parse row insert
** TODO serialize/deserialize row to/from raw bytes
*** TODO look for best practices for creating binary formats
* WAIT cli tests using insta-cmd * WAIT cli tests using insta-cmd
https://insta.rs/docs/cmd/ https://insta.rs/docs/cmd/

View file

@ -1,6 +1,6 @@
use std::path::PathBuf; use std::path::PathBuf;
use rustyline::{history::FileHistory, Editor}; use rustyline::{Editor, history::FileHistory};
fn xdg_state_dir() -> Option<PathBuf> { fn xdg_state_dir() -> Option<PathBuf> {
if let Ok(dir) = std::env::var("XDG_STATE_DIR") { if let Ok(dir) = std::env::var("XDG_STATE_DIR") {
@ -31,7 +31,9 @@ pub fn history_file() -> Option<std::path::PathBuf> {
Some(state.join("cli_history")) Some(state.join("cli_history"))
} else { } else {
eprintln!("Warning: failed to find or create XDG_STATE_DIR for osdb."); eprintln!("Warning: failed to find or create XDG_STATE_DIR for osdb.");
eprintln!("Warning: either set XDG_STATE_DIR or HOME, and ensure osdb has write permissions to that directory."); eprintln!(
"Warning: either set XDG_STATE_DIR or HOME, and ensure osdb has write permissions to that directory."
);
None None
} }
} }

View file

@ -23,14 +23,14 @@ pub fn parse(file: String, input: String) -> Result<Vec<Command>, Vec<CommandPar
token, token,
&["statement", "meta command", "eof"], &["statement", "meta command", "eof"],
)), )),
crate::tokens::TokenData::String(_) => errs.push(CommandParseError::UnexpectedToken(
token,
&["statement", "meta command", "eof"],
)),
crate::tokens::TokenData::EndOfFile => (), crate::tokens::TokenData::EndOfFile => (),
} }
} }
if errs.is_empty() { if errs.is_empty() { Ok(cmds) } else { Err(errs) }
Ok(cmds)
} else {
Err(errs)
}
} }
#[cfg(test)] #[cfg(test)]

View file

@ -7,6 +7,7 @@ pub enum TokenData {
MetaCommand(MetaCommand), MetaCommand(MetaCommand),
EndOfFile, EndOfFile,
Int(i64), Int(i64),
String(String),
} }
#[derive(Debug, Eq, PartialEq)] #[derive(Debug, Eq, PartialEq)]
@ -63,6 +64,7 @@ impl std::fmt::Display for Token {
TokenData::MetaCommand(x) => write!(f, "meta-command {x}"), TokenData::MetaCommand(x) => write!(f, "meta-command {x}"),
TokenData::EndOfFile => write!(f, "end of file"), TokenData::EndOfFile => write!(f, "end of file"),
TokenData::Int(x) => write!(f, "integer {x}"), TokenData::Int(x) => write!(f, "integer {x}"),
TokenData::String(x) => write!(f, "string {x:?}"),
}?; }?;
let lexeme = &self.lexeme; let lexeme = &self.lexeme;
write!(f, " {lexeme:?}") write!(f, " {lexeme:?}")
@ -83,6 +85,7 @@ pub enum ScanErrorKind {
UnknownKeyword(String), UnknownKeyword(String),
UnknownMetaCommand(String), UnknownMetaCommand(String),
ParseIntError(std::num::ParseIntError), ParseIntError(std::num::ParseIntError),
UnexpectedEndOfInputWhileLookingForMatching(char, Location),
} }
impl std::fmt::Display for ScanErrorKind { impl std::fmt::Display for ScanErrorKind {
@ -93,6 +96,10 @@ impl std::fmt::Display for ScanErrorKind {
ScanErrorKind::UnknownKeyword(x) => write!(f, "unknown keyword: {x:?}"), ScanErrorKind::UnknownKeyword(x) => write!(f, "unknown keyword: {x:?}"),
ScanErrorKind::UnknownMetaCommand(x) => write!(f, "unknown meta-command: {x:?}"), ScanErrorKind::UnknownMetaCommand(x) => write!(f, "unknown meta-command: {x:?}"),
ScanErrorKind::ParseIntError(x) => write!(f, "failed to parse integer: {x}"), ScanErrorKind::ParseIntError(x) => write!(f, "failed to parse integer: {x}"),
ScanErrorKind::UnexpectedEndOfInputWhileLookingForMatching(c, _) => write!(
f,
"unexpected end of input while looking for matching {c:?}"
),
} }
} }
} }
@ -124,6 +131,10 @@ impl Tokenizer {
Location::new(self.file.clone(), self.offset, length) Location::new(self.file.clone(), self.offset, length)
} }
fn previous_location(&self, length: usize) -> Location {
Location::new(self.file.clone(), self.offset - 1, length)
}
fn is_at_end(&self) -> bool { fn is_at_end(&self) -> bool {
self.offset >= self.input.len() self.offset >= self.input.len()
} }
@ -257,6 +268,43 @@ impl Tokenizer {
} }
} }
fn scan_string(&mut self) -> Result<Token, ScanError> {
let start_offset = self.offset;
let mut word = String::new();
let mut lexeme = String::new();
let mut length = 0;
let mut valid = false;
if let Some(c) = self.advance() {
lexeme.push(c);
length += 1;
}
while let Some(c) = self.advance() {
lexeme.push(c);
length += 1;
if c == '"' {
valid = true;
break;
} else {
word.push(c);
}
}
if valid {
Ok(Token {
location: Location::new(self.file.clone(), start_offset, length),
data: TokenData::String(word),
lexeme,
})
} else {
Err(ScanError {
location: self.previous_location(0),
kind: ScanErrorKind::UnexpectedEndOfInputWhileLookingForMatching(
'"',
Location::new(self.file.clone(), start_offset, 1),
),
})
}
}
fn scan_token(&mut self) -> Result<Option<Token>, ScanError> { fn scan_token(&mut self) -> Result<Option<Token>, ScanError> {
loop { loop {
if let Some(c) = self.peek() { if let Some(c) = self.peek() {
@ -266,6 +314,8 @@ impl Tokenizer {
return self.scan_meta_command().map(Some); return self.scan_meta_command().map(Some);
} else if Self::digit(c) { } else if Self::digit(c) {
return self.scan_integer().map(Some); return self.scan_integer().map(Some);
} else if c == '"' {
return self.scan_string().map(Some);
} else if c.is_whitespace() { } else if c.is_whitespace() {
self.advance(); self.advance();
} else { } else {