From edf8946fe382b80d0a44890a22aef5ca48ce0e91 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kha=C3=AFs=20COLIN?= Date: Thu, 20 Feb 2025 12:00:04 +0100 Subject: [PATCH] wordsplit rules: fix basic norm problems also make notes for future refactorings --- src/parser/wordsplit/tokenizing_1_5.c | 19 +++++++++++-------- src/parser/wordsplit/tokenizing_6_10.c | 12 +++++++----- 2 files changed, 18 insertions(+), 13 deletions(-) diff --git a/src/parser/wordsplit/tokenizing_1_5.c b/src/parser/wordsplit/tokenizing_1_5.c index c44d921..8eab918 100644 --- a/src/parser/wordsplit/tokenizing_1_5.c +++ b/src/parser/wordsplit/tokenizing_1_5.c @@ -6,7 +6,7 @@ /* By: jguelen +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2025/02/19 13:20:01 by jguelen #+# #+# */ -/* Updated: 2025/02/20 11:52:28 by khais ### ########.fr */ +/* Updated: 2025/02/20 12:22:28 by khais ### ########.fr */ /* */ /* ************************************************************************** */ @@ -40,7 +40,8 @@ bool rule_eof(t_token_build *builder, char *original) bool rule_combine_operator(t_token_build *builder, char *original) { if (builder->currently_in_operator && builder->quote == '\0' - && is_operator_combo(builder->cur_token->buffer, original[builder->idx])) + && is_operator_combo(builder->cur_token->buffer, + original[builder->idx])) { push_char(builder, original[builder->idx]); builder->idx++; @@ -54,16 +55,18 @@ bool rule_combine_operator(t_token_build *builder, char *original) ** character cannot be used with the previous characters to form an operator, ** the operator containing the previous character shall be delimited. */ -bool rule_operator_end(t_token_build *builder, char *original) +bool rule_operator_end(t_token_build *builder, char *original) { - if (builder->currently_in_operator && builder->quote == '\0' - && !is_operator_combo(builder->cur_token->buffer, original[builder->idx])) + if (builder->currently_in_operator && builder->quote == '\0' // FIXME + && !is_operator_combo(builder->cur_token->buffer, + original[builder->idx])) { delimit(builder); return (true); } return (false); } + /* ** 4. If the current character is single-quote, or double-quote and it is not ** quoted, it shall affect quoting for subsequent characters up to the end of @@ -73,8 +76,9 @@ bool rule_operator_end(t_token_build *builder, char *original) ** operators, between the and the end of the quoted text. The ** token shall not be delimited by the end of the quoted field. */ -bool rule_quote(t_token_build *builder, char *original) +bool rule_quote(t_token_build *builder, char *original) { + // FIXME if (original[builder->idx] == '\'' || original[builder->idx] == '"') { quote_flip(builder, original[builder->idx]); @@ -82,7 +86,6 @@ bool rule_quote(t_token_build *builder, char *original) return (true); } return (false); - } -bool token_rule_5(t_token_build *builder, char *original); +bool token_rule_5(t_token_build *builder, char *original); diff --git a/src/parser/wordsplit/tokenizing_6_10.c b/src/parser/wordsplit/tokenizing_6_10.c index e098fa1..a8b0228 100644 --- a/src/parser/wordsplit/tokenizing_6_10.c +++ b/src/parser/wordsplit/tokenizing_6_10.c @@ -6,7 +6,7 @@ /* By: jguelen +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2025/02/19 13:21:18 by jguelen #+# #+# */ -/* Updated: 2025/02/20 11:52:28 by khais ### ########.fr */ +/* Updated: 2025/02/20 11:59:57 by khais ### ########.fr */ /* */ /* ************************************************************************** */ @@ -25,8 +25,9 @@ ** The current character shall be used as the beginning of the next (operator) ** token. */ -bool rule_new_operator(t_token_build *builder, char *original) +bool rule_new_operator(t_token_build *builder, char *original) { + // FIXME: unquoted if (builder->quote == '\0' && is_operator_start(original[builder->idx])) { operator_start(builder, original[builder->idx]); @@ -40,8 +41,9 @@ bool rule_new_operator(t_token_build *builder, char *original) ** 7. If the current character is an unquoted , any token containing the ** previous character is delimited and the current character shall be discarded. */ -bool rule_delimit_blank(t_token_build *builder, char *original) +bool rule_delimit_blank(t_token_build *builder, char *original) { + // FIXME: unquoted if (is_blank(original[builder->idx]) && builder->quote == '\0') { delimit(builder); @@ -55,7 +57,7 @@ bool rule_delimit_blank(t_token_build *builder, char *original) ** 8. If the previous character was part of a word, the current character shall ** be appended to that word. */ -bool rule_combine_word(t_token_build *builder, char *original) +bool rule_combine_word(t_token_build *builder, char *original) { if (builder->currently_in_word) { @@ -69,7 +71,7 @@ bool rule_combine_word(t_token_build *builder, char *original) /* ** 10. The current character is used as the start of a new word. */ -bool rule_new_word(t_token_build *builder, char *original) +bool rule_new_word(t_token_build *builder, char *original) { new_word(builder, original[builder->idx]); builder->idx++;