From b23eb0f244d437da223703dc29b9bc828c0e59c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kha=C3=AFs=20COLIN?= Date: Wed, 19 Feb 2025 18:48:20 +0100 Subject: [PATCH] wordsplit: increment index + rename rules --- src/parser/wordsplit/tokenizing_1_5.c | 16 +++++++++------- src/parser/wordsplit/tokenizing_6_10.c | 14 +++++++++----- src/parser/wordsplit/wordsplit.c | 25 +++++++++++++++---------- src/parser/wordsplit/wordsplit.h | 20 ++++++++++---------- 4 files changed, 43 insertions(+), 32 deletions(-) diff --git a/src/parser/wordsplit/tokenizing_1_5.c b/src/parser/wordsplit/tokenizing_1_5.c index f80ed56..9c654ea 100644 --- a/src/parser/wordsplit/tokenizing_1_5.c +++ b/src/parser/wordsplit/tokenizing_1_5.c @@ -6,7 +6,7 @@ /* By: jguelen +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2025/02/19 13:20:01 by jguelen #+# #+# */ -/* Updated: 2025/02/19 18:38:33 by khais ### ########.fr */ +/* Updated: 2025/02/19 18:46:49 by khais ### ########.fr */ /* */ /* ************************************************************************** */ @@ -21,7 +21,7 @@ ** 1. If the end of input is recognized, the current token (if any) shall be ** delimited. */ -bool token_rule_1(t_token_build *token_build, char *original) +bool rule_eof(t_token_build *token_build, char *original) { if (original[token_build->current_index] == '\0') { @@ -36,12 +36,13 @@ bool token_rule_1(t_token_build *token_build, char *original) ** character is not quoted and can be used with the previous characters to form ** an operator, it shall be used as part of that (operator) token. */ -bool token_rule_2(t_token_build *token_build, char *original) +bool rule_combine_operator(t_token_build *token_build, char *original) { if (token_build->currently_in_operator && token_build->quote == '\0' && is_operator_combo(token_build->cur_token->buffer, original[token_build->current_index])) { push_char(token_build, original[token_build->current_index]); + token_build->current_index++; return (true); } return (false); @@ -52,7 +53,7 @@ bool token_rule_2(t_token_build *token_build, char *original) ** character cannot be used with the previous characters to form an operator, ** the operator containing the previous character shall be delimited. */ -bool token_rule_3(t_token_build *token_build, char *original) +bool rule_operator_end(t_token_build *token_build, char *original) { if (token_build->currently_in_operator && token_build->quote == '\0' && !is_operator_combo(token_build->cur_token->buffer, original[token_build->current_index])) @@ -71,12 +72,13 @@ bool token_rule_3(t_token_build *token_build, char *original) ** operators, between the and the end of the quoted text. The ** token shall not be delimited by the end of the quoted field. */ -bool token_rule_4(t_token_build *token_build, char *original) +bool rule_quote(t_token_build *token_build, char *original) { if (original[token_build->current_index] == '\'' || original[token_build->current_index] == '"') { - quote_flip(token_build, original[token_build->current_index]); - return (true); + quote_flip(token_build, original[token_build->current_index]); + token_build->current_index++; + return (true); } return (false); diff --git a/src/parser/wordsplit/tokenizing_6_10.c b/src/parser/wordsplit/tokenizing_6_10.c index 1b96421..615ba0e 100644 --- a/src/parser/wordsplit/tokenizing_6_10.c +++ b/src/parser/wordsplit/tokenizing_6_10.c @@ -6,7 +6,7 @@ /* By: jguelen +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2025/02/19 13:21:18 by jguelen #+# #+# */ -/* Updated: 2025/02/19 18:37:05 by khais ### ########.fr */ +/* Updated: 2025/02/19 18:48:03 by khais ### ########.fr */ /* */ /* ************************************************************************** */ @@ -23,11 +23,12 @@ ** The current character shall be used as the beginning of the next (operator) ** token. */ -bool token_rule_6(t_token_build *token_build, char *original) +bool rule_new_operator(t_token_build *token_build, char *original) { if (token_build->quote == '\0' && is_operator_start(original[token_build->current_index])) { operator_start(token_build, original[token_build->current_index]); + token_build->current_index++; return (true); } return (false); @@ -37,11 +38,12 @@ bool token_rule_6(t_token_build *token_build, char *original) ** 7. If the current character is an unquoted , any token containing the ** previous character is delimited and the current character shall be discarded. */ -bool token_rule_7(t_token_build *token_build, char *original) +bool rule_delimit_blank(t_token_build *token_build, char *original) { if (is_blank(original[token_build->current_index]) && token_build->quote == '\0') { delimit(token_build); + token_build->current_index++; return (true); } return (false); @@ -51,11 +53,12 @@ bool token_rule_7(t_token_build *token_build, char *original) ** 8. If the previous character was part of a word, the current character shall ** be appended to that word. */ -bool token_rule_8(t_token_build *token_build, char *original) +bool rule_combine_word(t_token_build *token_build, char *original) { if (token_build->currently_in_word) { push_char(token_build, original[token_build->current_index]); + token_build->current_index++; return (true); } return (false); @@ -64,8 +67,9 @@ bool token_rule_8(t_token_build *token_build, char *original) /* ** 10. The current character is used as the start of a new word. */ -bool token_rule_10(t_token_build *token_build, char *original) +bool rule_new_word(t_token_build *token_build, char *original) { new_word(token_build, original[token_build->current_index]); + token_build->current_index++; return (true); } diff --git a/src/parser/wordsplit/wordsplit.c b/src/parser/wordsplit/wordsplit.c index 21e3c13..129c930 100644 --- a/src/parser/wordsplit/wordsplit.c +++ b/src/parser/wordsplit/wordsplit.c @@ -5,14 +5,19 @@ /* +:+ +:+ +:+ */ /* By: khais +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ -/* Created: 2025/02/19 18:41/00 by khais #+# #+# */ -/* Updated: 2025/02/19 18:41:00 by khais ### ########.fr */ +/* Created: 2025/02/19 18:53/51 by khais #+# #+# */ +/* Updated: 2025/02/19 18:53:51 by khais ### ########.fr */ /* */ /* ************************************************************************** */ #include "wordsplit.h" #include "libft.h" +/* +** TODO: set flags +** TODO: rule 5 ($) +*/ + /* ** split a string into words, respecting quotes etc. ** @@ -28,23 +33,23 @@ t_wordlist *minishell_wordsplit(char *original) t_token_build token_build; ft_bzero(&token_build, sizeof(t_token_build)); - while (!token_rule_1(&token_build, original)) + while (!rule_eof(&token_build, original)) { - if (token_rule_2(&token_build, original)) + if (rule_combine_operator(&token_build, original)) continue ; - if (token_rule_3(&token_build, original)) + if (rule_operator_end(&token_build, original)) continue ; - if (token_rule_4(&token_build, original)) + if (rule_quote(&token_build, original)) continue ; /* if (token_rule_5(&token_build, original)) */ /* continue ; */ - if (token_rule_6(&token_build, original)) + if (rule_new_operator(&token_build, original)) continue ; - if (token_rule_7(&token_build, original)) + if (rule_delimit_blank(&token_build, original)) continue ; - if (token_rule_8(&token_build, original)) + if (rule_combine_word(&token_build, original)) continue ; - token_rule_10(&token_build, original); + rule_new_word(&token_build, original); } if (token_build.quote != '\0') return (wordlist_destroy(token_build.wordlist), NULL); diff --git a/src/parser/wordsplit/wordsplit.h b/src/parser/wordsplit/wordsplit.h index 269a3fa..161bfb9 100644 --- a/src/parser/wordsplit/wordsplit.h +++ b/src/parser/wordsplit/wordsplit.h @@ -5,8 +5,8 @@ /* +:+ +:+ +:+ */ /* By: khais +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ -/* Created: 2025/02/19 18:37/59 by khais #+# #+# */ -/* Updated: 2025/02/19 18:37:59 by khais ### ########.fr */ +/* Created: 2025/02/19 18:48/03 by khais #+# #+# */ +/* Updated: 2025/02/19 18:48:03 by khais ### ########.fr */ /* */ /* ************************************************************************** */ @@ -36,15 +36,15 @@ void push_char(t_token_build *token_build, char c); void new_word(t_token_build *token_build, char c); void quote_flip(t_token_build *token_build, char c); void operator_start(t_token_build *token_build, char c); -bool token_rule_1(t_token_build *token_build, char *original); -bool token_rule_2(t_token_build *token_build, char *original); -bool token_rule_3(t_token_build *token_build, char *original); -bool token_rule_4(t_token_build *token_build, char *original); +bool rule_eof(t_token_build *token_build, char *original); +bool rule_combine_operator(t_token_build *token_build, char *original); +bool rule_operator_end(t_token_build *token_build, char *original); +bool rule_quote(t_token_build *token_build, char *original); bool token_rule_5(t_token_build *token_build, char *original); -bool token_rule_6(t_token_build *token_build, char *original); -bool token_rule_7(t_token_build *token_build, char *original); -bool token_rule_8(t_token_build *token_build, char *original); -bool token_rule_10(t_token_build *token_build, char *original); +bool rule_new_operator(t_token_build *token_build, char *original); +bool rule_delimit_blank(t_token_build *token_build, char *original); +bool rule_combine_word(t_token_build *token_build, char *original); +bool rule_new_word(t_token_build *token_build, char *original); t_wordlist *minishell_wordsplit(char *original);