From edf8946fe382b80d0a44890a22aef5ca48ce0e91 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kha=C3=AFs=20COLIN?= <khais.colin@gmail.com>
Date: Thu, 20 Feb 2025 12:00:04 +0100
Subject: [PATCH] wordsplit rules: fix basic norm problems

also make notes for future refactorings
---
 src/parser/wordsplit/tokenizing_1_5.c  | 19 +++++++++++--------
 src/parser/wordsplit/tokenizing_6_10.c | 12 +++++++-----
 2 files changed, 18 insertions(+), 13 deletions(-)
diff --git a/src/parser/wordsplit/tokenizing_1_5.c b/src/parser/wordsplit/tokenizing_1_5.c
index c44d921..8eab918 100644
--- a/src/parser/wordsplit/tokenizing_1_5.c
+++ b/src/parser/wordsplit/tokenizing_1_5.c
@@ -6,7 +6,7 @@
 /*   By: jguelen <marvin@42.fr>                     +#+  +:+       +#+        */
 /*                                                +#+#+#+#+#+   +#+           */
 /*   Created: 2025/02/19 13:20:01 by jguelen           #+#    #+#             */
-/*   Updated: 2025/02/20 11:52:28 by khais            ###   ########.fr       */
+/*   Updated: 2025/02/20 12:22:28 by khais            ###   ########.fr       */
 /*                                                                            */
 /* ************************************************************************** */
 
@@ -40,7 +40,8 @@ bool	rule_eof(t_token_build *builder, char *original)
 bool	rule_combine_operator(t_token_build *builder, char *original)
 {
 	if (builder->currently_in_operator && builder->quote == '\0'
-		&& is_operator_combo(builder->cur_token->buffer, original[builder->idx]))
+		&& is_operator_combo(builder->cur_token->buffer,
+			original[builder->idx]))
 	{
 		push_char(builder, original[builder->idx]);
 		builder->idx++;
@@ -54,16 +55,18 @@ bool	rule_combine_operator(t_token_build *builder, char *original)
 ** character cannot be used with the previous characters to form an operator,
 ** the operator containing the previous character shall be delimited.
 */
-bool		rule_operator_end(t_token_build *builder, char *original)
+bool	rule_operator_end(t_token_build *builder, char *original)
 {
-	if (builder->currently_in_operator && builder->quote == '\0'
-			&& !is_operator_combo(builder->cur_token->buffer, original[builder->idx]))
+	if (builder->currently_in_operator && builder->quote == '\0' // FIXME
+		&& !is_operator_combo(builder->cur_token->buffer,
+			original[builder->idx]))
 	{
 		delimit(builder);
 		return (true);
 	}
 	return (false);
 }
+
 /*
 ** 4. If the current character is single-quote, or double-quote and it is not
 ** quoted, it shall affect quoting for subsequent characters up to the end of
@@ -73,8 +76,9 @@ bool		rule_operator_end(t_token_build *builder, char *original)
 ** operators, between the <quotation-mark> and the end of the quoted text. The
 ** token shall not be delimited by the end of the quoted field.
 */
-bool		rule_quote(t_token_build *builder, char *original)
+bool	rule_quote(t_token_build *builder, char *original)
 {
+	// FIXME
 	if (original[builder->idx] == '\'' || original[builder->idx] == '"')
 	{
 		quote_flip(builder, original[builder->idx]);
@@ -82,7 +86,6 @@ bool		rule_quote(t_token_build *builder, char *original)
 		return (true);
 	}
 	return (false);
-
 }
 
-bool		token_rule_5(t_token_build *builder, char *original);
+bool	token_rule_5(t_token_build *builder, char *original);
diff --git a/src/parser/wordsplit/tokenizing_6_10.c b/src/parser/wordsplit/tokenizing_6_10.c
index e098fa1..a8b0228 100644
--- a/src/parser/wordsplit/tokenizing_6_10.c
+++ b/src/parser/wordsplit/tokenizing_6_10.c
@@ -6,7 +6,7 @@
 /*   By: jguelen <marvin@42.fr>                     +#+  +:+       +#+        */
 /*                                                +#+#+#+#+#+   +#+           */
 /*   Created: 2025/02/19 13:21:18 by jguelen           #+#    #+#             */
-/*   Updated: 2025/02/20 11:52:28 by khais            ###   ########.fr       */
+/*   Updated: 2025/02/20 11:59:57 by khais            ###   ########.fr       */
 /*                                                                            */
 /* ************************************************************************** */
 
@@ -25,8 +25,9 @@
 ** The current character shall be used as the beginning of the next (operator)
 ** token.
 */
-bool		rule_new_operator(t_token_build *builder, char *original)
+bool	rule_new_operator(t_token_build *builder, char *original)
 {
+	// FIXME: unquoted
 	if (builder->quote == '\0' && is_operator_start(original[builder->idx]))
 	{
 		operator_start(builder, original[builder->idx]);
@@ -40,8 +41,9 @@ bool		rule_new_operator(t_token_build *builder, char *original)
 ** 7. If the current character is an unquoted <blank>, any token containing the
 ** previous character is delimited and the current character shall be discarded.
 */
-bool		rule_delimit_blank(t_token_build *builder, char *original)
+bool	rule_delimit_blank(t_token_build *builder, char *original)
 {
+	// FIXME: unquoted
 	if (is_blank(original[builder->idx]) && builder->quote == '\0')
 	{
 		delimit(builder);
@@ -55,7 +57,7 @@ bool		rule_delimit_blank(t_token_build *builder, char *original)
 ** 8. If the previous character was part of a word, the current character shall
 ** be appended to that word.
 */
-bool		rule_combine_word(t_token_build *builder, char *original)
+bool	rule_combine_word(t_token_build *builder, char *original)
 {
 	if (builder->currently_in_word)
 	{
@@ -69,7 +71,7 @@ bool		rule_combine_word(t_token_build *builder, char *original)
 /*
 ** 10. The current character is used as the start of a new word.
 */
-bool		rule_new_word(t_token_build *builder, char *original)
+bool	rule_new_word(t_token_build *builder, char *original)
 {
 	new_word(builder, original[builder->idx]);
 	builder->idx++;