From 5d9f79135374a41df9ae5d843019f6ffcf076040 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kha=C3=AFs=20COLIN=20=28aider=29?= Date: Wed, 4 Jun 2025 21:14:25 +0200 Subject: [PATCH] refactor: split tokens into data, error, location modules --- src/error_display.rs | 2 +- src/tokens.rs | 496 +--------------------------------------- src/tokens/data.rs | 36 +++ src/tokens/error.rs | 39 ++++ src/tokens/location.rs | 32 +++ src/tokens/tokenizer.rs | 296 ++++++++++++++++++++++++ 6 files changed, 416 insertions(+), 485 deletions(-) create mode 100644 src/tokens/data.rs create mode 100644 src/tokens/error.rs create mode 100644 src/tokens/location.rs create mode 100644 src/tokens/tokenizer.rs diff --git a/src/error_display.rs b/src/error_display.rs index 6466248..73b3438 100644 --- a/src/error_display.rs +++ b/src/error_display.rs @@ -94,7 +94,7 @@ impl OSDBError for ScanError { let report = Report::build(ReportKind::Error, location.clone()).with_message(format!("{self}")); let report = match &self.kind { - crate::tokens::ScanErrorKind::UnexpectedEndOfInputWhileLookingForMatching( + crate::tokens::error::ScanErrorKind::UnexpectedEndOfInputWhileLookingForMatching( c, start_location, ) => { diff --git a/src/tokens.rs b/src/tokens.rs index e872030..074945a 100644 --- a/src/tokens.rs +++ b/src/tokens.rs @@ -1,455 +1,16 @@ -use crate::meta_commands::MetaCommand; +//! Tokenization infrastructure for the database engine +//! +//! This module handles lexical analysis of input strings, converting them +//! into structured tokens with location information for error reporting. -/// Represents the core lexical elements of the SQL-like language. -/// -/// Tokens are produced by the tokenizer and consumed by the parser to build -/// abstract syntax trees. Each variant represents a distinct syntactic element -/// with associated data when applicable. -/// -/// # Examples -/// -/// ``` -/// use osdb::tokens::TokenData; -/// use osdb::meta_commands::MetaCommand; -/// -/// // Keyword tokens -/// let insert = TokenData::Insert; -/// let select = TokenData::Select; -/// -/// // Meta command with parameter -/// let exit_cmd = TokenData::MetaCommand(MetaCommand::Exit); -/// -/// // Literal values -/// let number = TokenData::Int(42); -/// let text = TokenData::String("hello".to_string()); -/// ``` -#[derive(Debug, Eq, PartialEq, Clone)] -pub enum TokenData { - /// INSERT statement keyword - Insert, - /// SELECT statement keyword - Select, - /// Meta command (commands starting with '.') - MetaCommand(MetaCommand), - /// End of file marker - EndOfFile, - /// Integer literal value - Int(i64), - /// String literal value - String(String), - /// Semicolon statement terminator - Semicolon, -} +pub mod data; +pub mod error; +pub mod location; +mod tokenizer; -/// Represents a location in the source input -#[derive(Debug, Eq, PartialEq, Clone)] -pub struct Location { - /// Source file name - pub file: String, - /// Offset from the start of the file in characters - pub offset: usize, - /// Length of the token in characters - pub length: usize, -} - -impl From<&Location> for std::ops::Range { - fn from(val: &Location) -> Self { - std::ops::Range { - start: val.offset, - end: val.offset + val.length, - } - } -} - -impl Default for Location { - fn default() -> Self { - Self::new(String::from(""), 0, 0) - } -} - -impl Location { - /// Creates a new Location with the given file, offset and length. - /// - /// # Examples - /// - /// ``` - /// use osdb::tokens::Location; - /// - /// // Create a location for a token spanning bytes 5-15 in a file - /// let loc = Location::new("query.sql".into(), 5, 10); - /// assert_eq!(loc.offset, 5); - /// assert_eq!(loc.length, 10); - /// - /// // Zero-length location for EOF marker - /// let eof = Location::new("".into(), 20, 0); - /// assert_eq!(eof.length, 0); - /// ``` - pub fn new(file: String, offset: usize, length: usize) -> Self { - Self { - file, - offset, - length, - } - } -} - -/// Represents a token in the input source -#[derive(Debug, Eq, PartialEq, Clone)] -pub struct Token { - /// Location of the token in the source input - pub location: Location, - /// The parsed token data - pub data: TokenData, - /// Original text representation in the source - pub lexeme: String, -} - -/// A lexical token with location information and parsed data. -/// -/// # Examples -/// -/// ``` -/// use osdb::tokens::{Token, TokenData, Location}; -/// use osdb::meta_commands::MetaCommand; -/// -/// let token = Token { -/// location: Location::new("input.sql".into(), 0, 6), -/// data: TokenData::Select, -/// lexeme: "SELECT".to_string(), -/// }; -/// -/// assert_eq!(format!("{token}"), "select statement \"SELECT\""); -/// ``` -impl std::fmt::Display for Token { - /// Formats the token for display, showing both the semantic meaning - /// and original lexeme. - /// - /// # Examples - /// - /// ``` - /// # use osdb::tokens::{Token, TokenData, Location}; - /// # let location = Location::default(); - /// let int_token = Token { - /// location: location.clone(), - /// data: TokenData::Int(42), - /// lexeme: "42".to_string(), - /// }; - /// assert_eq!(format!("{int_token}"), "integer 42 \"42\""); - /// - /// let string_token = Token { - /// location, - /// data: TokenData::String("hello".into()), - /// lexeme: "\"hello\"".to_string(), - /// }; - /// assert_eq!(format!("{string_token}"), "string \"hello\" \"\\\"hello\\\"\""); - /// ``` - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - match &self.data { - TokenData::Insert => write!(f, "insert statement"), - TokenData::Select => write!(f, "select statement"), - TokenData::MetaCommand(x) => write!(f, "meta-command {x}"), - TokenData::EndOfFile => write!(f, "end of file"), - TokenData::Int(x) => write!(f, "integer {x}"), - TokenData::String(x) => write!(f, "string {x:?}"), - TokenData::Semicolon => write!(f, "semicolon"), - }?; - let lexeme = &self.lexeme; - write!(f, " {lexeme:?}") - } -} - -struct Tokenizer { - input: String, - file: String, - tokens: Vec, - offset: usize, -} - -/// Represents different kinds of errors that can occur during tokenization -#[derive(Debug, Eq, PartialEq)] -pub enum ScanErrorKind { - /// Encountered an unexpected character - UnexpectedChar(char), - /// Reached the end of input unexpectedly - UnexpectedEndOfInput, - /// Encountered an unknown keyword - UnknownKeyword(String), - /// Encountered an unknown meta command - UnknownMetaCommand(String), - /// Failed to parse an integer value - ParseIntError(std::num::ParseIntError), - /// Reached the end of input while looking for a matching character - UnexpectedEndOfInputWhileLookingForMatching(char, Location), -} - -impl std::fmt::Display for ScanErrorKind { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - match self { - ScanErrorKind::UnexpectedChar(c) => write!(f, "unexpected char: {c:?}"), - ScanErrorKind::UnexpectedEndOfInput => write!(f, "unexpected end of input"), - ScanErrorKind::UnknownKeyword(x) => write!(f, "unknown keyword: {x:?}"), - ScanErrorKind::UnknownMetaCommand(x) => write!(f, "unknown meta-command: {x:?}"), - ScanErrorKind::ParseIntError(x) => write!(f, "failed to parse integer: {x}"), - ScanErrorKind::UnexpectedEndOfInputWhileLookingForMatching(c, _) => write!( - f, - "unexpected end of input while looking for matching {c:?}" - ), - } - } -} - -/// Error that occurred during tokenization, with location information -#[derive(Debug, Eq, PartialEq)] -pub struct ScanError { - /// Location where the error occurred - pub location: Location, - /// Type of scanning error - pub kind: ScanErrorKind, -} - -impl std::fmt::Display for ScanError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let kind = &self.kind; - write!(f, "{kind}") - } -} - -impl Tokenizer { - fn new(input: String, file: String) -> Self { - Self { - input, - file, - tokens: Vec::new(), - offset: 0, - } - } - - fn current_location(&self, length: usize) -> Location { - Location::new(self.file.clone(), self.offset, length) - } - - fn previous_location(&self, length: usize) -> Location { - Location::new(self.file.clone(), self.offset - 1, length) - } - - fn is_at_end(&self) -> bool { - self.offset >= self.input.len() - } - - fn peek(&self) -> Option { - self.input.chars().nth(self.offset) - } - - fn advance(&mut self) -> Option { - let c = self.input.chars().nth(self.offset); - self.offset += 1; - c - } - - fn recognize_keyword(word: &str) -> Option { - match word.to_lowercase().as_str() { - "insert" => Some(TokenData::Insert), - "select" => Some(TokenData::Select), - _ => None, - } - } - - fn recognize_metacommand(word: &str) -> Option { - match word.to_lowercase().as_str() { - ".exit" => Some(TokenData::MetaCommand(MetaCommand::Exit)), - ".about" => Some(TokenData::MetaCommand(MetaCommand::About)), - ".version" => Some(TokenData::MetaCommand(MetaCommand::Version)), - _ => None, - } - } - - fn scan_meta_command(&mut self) -> Result { - let start_offset = self.offset; - let mut word = String::new(); - let mut length = 0; - if let Some(c) = self.advance() { - word.push(c); - length += 1; - } - while let Some(c) = self.peek() { - if c.is_alphabetic() || c == '_' { - word.push(c); - self.advance(); - } else { - break; - } - length += 1; - } - if let Some(meta) = Self::recognize_metacommand(&word) { - Ok(Token { - location: Location::new(self.file.clone(), start_offset, length), - data: meta, - lexeme: word, - }) - } else { - Err(ScanError { - location: Location::new(self.file.clone(), start_offset, length), - kind: ScanErrorKind::UnknownMetaCommand(word), - }) - } - } - - fn scan_identifier_or_keyword(&mut self) -> Result { - let start_offset = self.offset; - let mut word = String::new(); - let mut length = 0; - if let Some(c) = self.advance() { - word.push(c); - length += 1; - } - while let Some(c) = self.peek() { - if Self::ident_or_keyword_inner(c) { - word.push(c); - self.advance(); - } else { - break; - } - length += 1; - } - if let Some(keyword) = Self::recognize_keyword(&word) { - Ok(Token { - location: Location::new(self.file.clone(), start_offset, length), - data: keyword, - lexeme: word, - }) - } else { - Err(ScanError { - location: Location::new(self.file.clone(), start_offset, length), - kind: ScanErrorKind::UnknownKeyword(word), - }) - } - } - - fn ident_or_keyword_start(c: char) -> bool { - c.is_alphabetic() || c == '_' - } - - fn ident_or_keyword_inner(c: char) -> bool { - c.is_alphanumeric() || c == '_' - } - - fn digit(c: char) -> bool { - c.is_ascii_digit() || c == '-' || c == '+' - } - - fn scan_integer(&mut self) -> Result { - let start_offset = self.offset; - let mut word = String::new(); - let mut length = 0; - if let Some(c) = self.advance() { - word.push(c); - length += 1; - } - while let Some(c) = self.peek() { - if Self::digit(c) { - word.push(c); - self.advance(); - } else { - break; - } - length += 1; - } - match word.parse::() { - Ok(int) => Ok(Token { - location: Location::new(self.file.clone(), start_offset, length), - data: TokenData::Int(int), - lexeme: word, - }), - Err(e) => Err(ScanError { - location: Location::new(self.file.clone(), start_offset, length), - kind: ScanErrorKind::ParseIntError(e), - }), - } - } - - fn scan_string(&mut self) -> Result { - let start_offset = self.offset; - let mut word = String::new(); - let mut lexeme = String::new(); - let mut length = 0; - let mut valid = false; - if let Some(c) = self.advance() { - lexeme.push(c); - length += 1; - } - while let Some(c) = self.advance() { - lexeme.push(c); - length += 1; - if c == '"' { - valid = true; - break; - } else { - word.push(c); - } - } - if valid { - Ok(Token { - location: Location::new(self.file.clone(), start_offset, length), - data: TokenData::String(word), - lexeme, - }) - } else { - Err(ScanError { - location: self.previous_location(0), - kind: ScanErrorKind::UnexpectedEndOfInputWhileLookingForMatching( - '"', - Location::new(self.file.clone(), start_offset, 1), - ), - }) - } - } - - fn scan_semicolon(&mut self) -> Result { - self.advance(); - Ok(Token { - location: self.previous_location(1), - data: TokenData::Semicolon, - lexeme: String::from(";"), - }) - } - - fn scan_token(&mut self) -> Result, ScanError> { - loop { - if let Some(c) = self.peek() { - if Self::ident_or_keyword_start(c) { - return self.scan_identifier_or_keyword().map(Some); - } else if c == '.' { - return self.scan_meta_command().map(Some); - } else if Self::digit(c) { - return self.scan_integer().map(Some); - } else if c == '"' { - return self.scan_string().map(Some); - } else if c == ';' { - return self.scan_semicolon().map(Some); - } else if c.is_whitespace() { - self.advance(); - } else { - let result = Err(ScanError { - location: self.current_location(1), - kind: ScanErrorKind::UnexpectedChar(c), - }); - self.advance(); - return result; - } - } else { - return Ok(None); - } - } - } - - fn finalize(&mut self) { - self.tokens.push(Token { - location: self.current_location(0), - data: TokenData::EndOfFile, - lexeme: String::new(), - }); - } -} +pub use data::{Token, TokenData}; +pub use error::ScanError; +pub use location::Location; /// Converts a string input into a sequence of tokens /// @@ -491,7 +52,7 @@ impl Tokenizer { /// assert_debug_snapshot!("invalid meta-command", err); /// ``` pub fn tokenize(input: String, file: String) -> Result, Vec> { - let mut tokenizer = Tokenizer::new(input, file); + let mut tokenizer = tokenizer::Tokenizer::new(input, file); let mut errors = Vec::new(); while !tokenizer.is_at_end() { let token = tokenizer.scan_token(); @@ -524,39 +85,6 @@ mod tests { assert_debug_snapshot!(tokenize(".halp".to_string(), "".to_string())); } - #[test] - fn test_tokenizer() { - let mut scanresult = - tokenize("INSERT Select".to_string(), "src/statement.sql".to_string()).unwrap(); - scanresult.reverse(); - assert_eq!( - scanresult.pop(), - Some(Token { - location: Location::new(String::from("src/statement.sql"), 0, 6), - data: TokenData::Insert, - lexeme: String::from("INSERT"), - }) - ); - assert_eq!( - scanresult.pop(), - Some(Token { - location: Location::new(String::from("src/statement.sql"), 7, 6), - data: TokenData::Select, - lexeme: String::from("Select"), - }) - ); - assert_eq!( - scanresult.pop(), - Some(Token { - location: Location::new(String::from("src/statement.sql"), 13, 0), - data: TokenData::EndOfFile, - lexeme: String::from(""), - }) - ); - assert_eq!(scanresult.pop(), None); - assert!(scanresult.is_empty()); - } - #[test] fn test_tokenizer_errors() { assert_debug_snapshot!(tokenize( diff --git a/src/tokens/data.rs b/src/tokens/data.rs new file mode 100644 index 0000000..36b9f27 --- /dev/null +++ b/src/tokens/data.rs @@ -0,0 +1,36 @@ +use crate::meta_commands::MetaCommand; + +/// Represents the core lexical elements of the SQL-like language +#[derive(Debug, Eq, PartialEq, Clone)] +pub enum TokenData { + Insert, + Select, + MetaCommand(MetaCommand), + EndOfFile, + Int(i64), + String(String), + Semicolon, +} + +/// Represents a token in the input source +#[derive(Debug, Eq, PartialEq, Clone)] +pub struct Token { + pub location: super::Location, + pub data: TokenData, + pub lexeme: String, +} + +impl std::fmt::Display for Token { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match &self.data { + TokenData::Insert => write!(f, "insert statement"), + TokenData::Select => write!(f, "select statement"), + TokenData::MetaCommand(x) => write!(f, "meta-command {x}"), + TokenData::EndOfFile => write!(f, "end of file"), + TokenData::Int(x) => write!(f, "integer {x}"), + TokenData::String(x) => write!(f, "string {x:?}"), + TokenData::Semicolon => write!(f, "semicolon"), + }?; + write!(f, " {:?}", &self.lexeme) + } +} diff --git a/src/tokens/error.rs b/src/tokens/error.rs new file mode 100644 index 0000000..c71df1a --- /dev/null +++ b/src/tokens/error.rs @@ -0,0 +1,39 @@ +use super::Location; + +#[derive(Debug, Eq, PartialEq)] +pub enum ScanErrorKind { + UnexpectedChar(char), + UnexpectedEndOfInput, + UnknownKeyword(String), + UnknownMetaCommand(String), + ParseIntError(std::num::ParseIntError), + UnexpectedEndOfInputWhileLookingForMatching(char, Location), +} + +impl std::fmt::Display for ScanErrorKind { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match self { + ScanErrorKind::UnexpectedChar(c) => write!(f, "unexpected char: {c:?}"), + ScanErrorKind::UnexpectedEndOfInput => write!(f, "unexpected end of input"), + ScanErrorKind::UnknownKeyword(x) => write!(f, "unknown keyword: {x:?}"), + ScanErrorKind::UnknownMetaCommand(x) => write!(f, "unknown meta-command: {x:?}"), + ScanErrorKind::ParseIntError(x) => write!(f, "failed to parse integer: {x}"), + ScanErrorKind::UnexpectedEndOfInputWhileLookingForMatching(c, _) => write!( + f, + "unexpected end of input while looking for matching {c:?}" + ), + } + } +} + +#[derive(Debug, Eq, PartialEq)] +pub struct ScanError { + pub location: Location, + pub kind: ScanErrorKind, +} + +impl std::fmt::Display for ScanError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", &self.kind) + } +} diff --git a/src/tokens/location.rs b/src/tokens/location.rs new file mode 100644 index 0000000..ff11c9d --- /dev/null +++ b/src/tokens/location.rs @@ -0,0 +1,32 @@ +/// Represents a location in the source input +#[derive(Debug, Eq, PartialEq, Clone)] +pub struct Location { + pub file: String, + pub offset: usize, + pub length: usize, +} + +impl From<&Location> for std::ops::Range { + fn from(val: &Location) -> Self { + std::ops::Range { + start: val.offset, + end: val.offset + val.length, + } + } +} + +impl Default for Location { + fn default() -> Self { + Self::new(String::from(""), 0, 0) + } +} + +impl Location { + pub fn new(file: String, offset: usize, length: usize) -> Self { + Self { + file, + offset, + length, + } + } +} diff --git a/src/tokens/tokenizer.rs b/src/tokens/tokenizer.rs new file mode 100644 index 0000000..50f78c3 --- /dev/null +++ b/src/tokens/tokenizer.rs @@ -0,0 +1,296 @@ +use super::data::{Token, TokenData}; +use super::error::{ScanError, ScanErrorKind}; +use super::location::Location; +use crate::meta_commands::MetaCommand; + +pub(super) struct Tokenizer { + input: String, + file: String, + pub(super) tokens: Vec, + offset: usize, +} + +impl Tokenizer { + pub(super) fn new(input: String, file: String) -> Self { + Self { + input, + file, + tokens: Vec::new(), + offset: 0, + } + } + + fn current_location(&self, length: usize) -> Location { + Location::new(self.file.clone(), self.offset, length) + } + + fn previous_location(&self, length: usize) -> Location { + Location::new(self.file.clone(), self.offset - 1, length) + } + + pub(super) fn is_at_end(&self) -> bool { + self.offset >= self.input.len() + } + + fn peek(&self) -> Option { + self.input.chars().nth(self.offset) + } + + fn advance(&mut self) -> Option { + let c = self.input.chars().nth(self.offset); + self.offset += 1; + c + } + + fn recognize_keyword(word: &str) -> Option { + match word.to_lowercase().as_str() { + "insert" => Some(TokenData::Insert), + "select" => Some(TokenData::Select), + _ => None, + } + } + + fn recognize_metacommand(word: &str) -> Option { + match word.to_lowercase().as_str() { + ".exit" => Some(TokenData::MetaCommand(MetaCommand::Exit)), + ".about" => Some(TokenData::MetaCommand(MetaCommand::About)), + ".version" => Some(TokenData::MetaCommand(MetaCommand::Version)), + _ => None, + } + } + + fn scan_meta_command(&mut self) -> Result { + let start_offset = self.offset; + let mut word = String::new(); + let mut length = 0; + if let Some(c) = self.advance() { + word.push(c); + length += 1; + } + while let Some(c) = self.peek() { + if c.is_alphabetic() || c == '_' { + word.push(c); + self.advance(); + } else { + break; + } + length += 1; + } + if let Some(meta) = Self::recognize_metacommand(&word) { + Ok(Token { + location: Location::new(self.file.clone(), start_offset, length), + data: meta, + lexeme: word, + }) + } else { + Err(ScanError { + location: Location::new(self.file.clone(), start_offset, length), + kind: ScanErrorKind::UnknownMetaCommand(word), + }) + } + } + + fn scan_identifier_or_keyword(&mut self) -> Result { + let start_offset = self.offset; + let mut word = String::new(); + let mut length = 0; + if let Some(c) = self.advance() { + word.push(c); + length += 1; + } + while let Some(c) = self.peek() { + if Self::ident_or_keyword_inner(c) { + word.push(c); + self.advance(); + } else { + break; + } + length += 1; + } + if let Some(keyword) = Self::recognize_keyword(&word) { + Ok(Token { + location: Location::new(self.file.clone(), start_offset, length), + data: keyword, + lexeme: word, + }) + } else { + Err(ScanError { + location: Location::new(self.file.clone(), start_offset, length), + kind: ScanErrorKind::UnknownKeyword(word), + }) + } + } + + fn ident_or_keyword_start(c: char) -> bool { + c.is_alphabetic() || c == '_' + } + + fn ident_or_keyword_inner(c: char) -> bool { + c.is_alphanumeric() || c == '_' + } + + fn digit(c: char) -> bool { + c.is_ascii_digit() || c == '-' || c == '+' + } + + fn scan_integer(&mut self) -> Result { + let start_offset = self.offset; + let mut word = String::new(); + let mut length = 0; + if let Some(c) = self.advance() { + word.push(c); + length += 1; + } + while let Some(c) = self.peek() { + if Self::digit(c) { + word.push(c); + self.advance(); + } else { + break; + } + length += 1; + } + match word.parse::() { + Ok(int) => Ok(Token { + location: Location::new(self.file.clone(), start_offset, length), + data: TokenData::Int(int), + lexeme: word, + }), + Err(e) => Err(ScanError { + location: Location::new(self.file.clone(), start_offset, length), + kind: ScanErrorKind::ParseIntError(e), + }), + } + } + + fn scan_string(&mut self) -> Result { + let start_offset = self.offset; + let mut word = String::new(); + let mut lexeme = String::new(); + let mut length = 0; + let mut valid = false; + if let Some(c) = self.advance() { + lexeme.push(c); + length += 1; + } + while let Some(c) = self.advance() { + lexeme.push(c); + length += 1; + if c == '"' { + valid = true; + break; + } else { + word.push(c); + } + } + if valid { + Ok(Token { + location: Location::new(self.file.clone(), start_offset, length), + data: TokenData::String(word), + lexeme, + }) + } else { + Err(ScanError { + location: self.previous_location(0), + kind: ScanErrorKind::UnexpectedEndOfInputWhileLookingForMatching( + '"', + Location::new(self.file.clone(), start_offset, 1), + ), + }) + } + } + + fn scan_semicolon(&mut self) -> Result { + self.advance(); + Ok(Token { + location: self.previous_location(1), + data: TokenData::Semicolon, + lexeme: String::from(";"), + }) + } + + pub(super) fn scan_token(&mut self) -> Result, ScanError> { + loop { + if let Some(c) = self.peek() { + if Self::ident_or_keyword_start(c) { + return self.scan_identifier_or_keyword().map(Some); + } else if c == '.' { + return self.scan_meta_command().map(Some); + } else if Self::digit(c) { + return self.scan_integer().map(Some); + } else if c == '"' { + return self.scan_string().map(Some); + } else if c == ';' { + return self.scan_semicolon().map(Some); + } else if c.is_whitespace() { + self.advance(); + } else { + let result = Err(ScanError { + location: self.current_location(1), + kind: ScanErrorKind::UnexpectedChar(c), + }); + self.advance(); + return result; + } + } else { + return Ok(None); + } + } + } + + pub(super) fn finalize(&mut self) { + self.tokens.push(Token { + location: self.current_location(0), + data: TokenData::EndOfFile, + lexeme: String::new(), + }); + } +} +#[cfg(test)] +mod tests { + use super::*; + use crate::tokens::{Token, TokenData}; + + #[test] + fn test_tokenizer() { + let mut tokenizer = Tokenizer::new("INSERT Select".into(), "src/statement.sql".into()); + let mut tokens = Vec::new(); + + while !tokenizer.is_at_end() { + if let Ok(Some(token)) = tokenizer.scan_token() { + tokens.push(token); + } + } + tokenizer.finalize(); + tokens.extend(tokenizer.tokens); + + let mut scanresult = tokens; + scanresult.reverse(); + assert_eq!( + scanresult.pop(), + Some(Token { + location: Location::new(String::from("src/statement.sql"), 0, 6), + data: TokenData::Insert, + lexeme: String::from("INSERT"), + }) + ); + assert_eq!( + scanresult.pop(), + Some(Token { + location: Location::new(String::from("src/statement.sql"), 7, 6), + data: TokenData::Select, + lexeme: String::from("Select"), + }) + ); + assert_eq!( + scanresult.pop(), + Some(Token { + location: Location::new(String::from("src/statement.sql"), 13, 0), + data: TokenData::EndOfFile, + lexeme: String::from(""), + }) + ); + assert_eq!(scanresult.pop(), None); + assert!(scanresult.is_empty()); + } +}