From 58be71725b8c48eaf1e7cc42fdf7005418619861 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Gu=C3=A9len?=
 <jguelen@student.42lehavre.fr>
Date: Wed, 19 Feb 2025 18:03:30 +0100
Subject: [PATCH] Wordsplitting: the preparation for a reswitch.

---
 src/parser/wordsplit/tokenizing_1_5.c  | 16 +++++++-
 src/parser/wordsplit/wordsplit.c       | 25 +++--------
 src/parser/wordsplit/wordsplit.h       | 18 ++++----
 src/parser/wordsplit/wordsplit_utils.c | 57 +++++++++++++++-----------
 4 files changed, 63 insertions(+), 53 deletions(-)

diff --git a/src/parser/wordsplit/tokenizing_1_5.c b/src/parser/wordsplit/tokenizing_1_5.c
index 2b3d379..0ca64de 100644
--- a/src/parser/wordsplit/tokenizing_1_5.c
+++ b/src/parser/wordsplit/tokenizing_1_5.c
@@ -6,10 +6,22 @@
 /*   By: jguelen <marvin@42.fr>                     +#+  +:+       +#+        */
 /*                                                +#+#+#+#+#+   +#+           */
 /*   Created: 2025/02/19 13:20:01 by jguelen           #+#    #+#             */
-/*   Updated: 2025/02/19 13:20:49 by jguelen          ###   ########.fr       */
+/*   Updated: 2025/02/19 18:01:39 by jguelen          ###   ########.fr       */
 /*                                                                            */
 /* ************************************************************************** */
 
 #include "wordsplit.h"
 
-
+/*
+** cf. Token Recognition section at
+** https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html
+*/
+/*
+** 1. If the end of input is recognized, the current token (if any) shall be
+** delimited.
+*/
+void	token_rule_1(t_token_build *token_build, char *original)
+{
+	if (original[token_build.current_index] == '\0')
+		token_build->wordlist = delimit(token_build);
+}
diff --git a/src/parser/wordsplit/wordsplit.c b/src/parser/wordsplit/wordsplit.c
index e37379f..59c8f6d 100644
--- a/src/parser/wordsplit/wordsplit.c
+++ b/src/parser/wordsplit/wordsplit.c
@@ -6,25 +6,12 @@
 /*   By: jguelen <jguelen@student.42.fr>            +#+  +:+       +#+        */
 /*                                                +#+#+#+#+#+   +#+           */
 /*   Created: 2025/02/13 17:02:32 by khais             #+#    #+#             */
-/*   Updated: 2025/02/19 15:17:22 by jguelen          ###   ########.fr       */
+/*   Updated: 2025/02/19 16:58:47 by jguelen          ###   ########.fr       */
 /*                                                                            */
 /* ************************************************************************** */
 
 #include "wordsplit.h"
 
-static t_wordlist	*delimit(t_wordlist *wordlist, t_buffer **token,
-	bool *currently_in_word, bool *currently_in_operator)
-{
-	if ((*token) == NULL)
-		return (wordlist);
-	wordlist = wordlist_push(wordlist, worddesc_create((*token)->buffer));
-	free(*token);
-	(*token) = NULL;
-	(*currently_in_word) = false;
-	(*currently_in_operator) = false;
-	return (wordlist);
-}
-
 /*
 ** split a string into words, respecting quotes etc.
 **
@@ -44,16 +31,16 @@ t_wordlist	*minishell_wordsplit(char *original)
 	{
 		// 1. If the end of input is recognized, the current token (if any)
 		// shall be delimited.
-		if (original[idx] == '\0')
-			wordlist = delimit(wordlist, &token, &currently_in_word,
+		if (original[token_build.current_index] == '\0')
+			token_build.wordlist = delimit(wordlist, &token, &currently_in_word,
 					&currently_in_operator);
 		// 2. If the previous character was used as part of an operator and the
 		// current character is not quoted and can be used with the previous
 		// characters to form an operator, it shall be used as part of that
 		// (operator) token.
-		else if (currently_in_operator && quote == '\0'
-			&& is_operator_combo(token->buffer, original[idx]))
-			token = push_char(token, original[idx]);
+		else if (token_build.currently_in_operator && token_build.quote == '\0'
+			&& is_operator_combo(token_build.cur_token->buffer, original[token_build.current_index]))
+			token_build.cur_token = push_char(token, original[idx]);
 		// 3. If the previous character was used as part of an operator and the
 		// current character cannot be used with the previous characters to form
 		// an operator, the operator containing the previous character shall be
diff --git a/src/parser/wordsplit/wordsplit.h b/src/parser/wordsplit/wordsplit.h
index c00a61b..c58656d 100644
--- a/src/parser/wordsplit/wordsplit.h
+++ b/src/parser/wordsplit/wordsplit.h
@@ -3,10 +3,10 @@
 /*                                                        :::      ::::::::   */
 /*   wordsplit.h                                        :+:      :+:    :+:   */
 /*                                                    +:+ +:+         +:+     */
-/*   By: khais <marvin@42.fr>                       +#+  +:+       +#+        */
+/*   By: jguelen <jguelen@student.42.fr>            +#+  +:+       +#+        */
 /*                                                +#+#+#+#+#+   +#+           */
 /*   Created: 2025/02/13 15:52:48 by khais             #+#    #+#             */
-/*   Updated: 2025/02/19 15:17:02 by jguelen          ###   ########.fr       */
+/*   Updated: 2025/02/19 18:02:50 by jguelen          ###   ########.fr       */
 /*                                                                            */
 /* ************************************************************************** */
 
@@ -31,13 +31,13 @@ typedef struct s_token_build
 	size_t		current_index;
 }	t_token_build;
 
-
-t_buffer	*push_char(t_buffer *token, char c);
-t_buffer	*new_word(t_buffer *token, char c, bool *currently_in_word);
-char		quote_flip(t_buffer **token, char c, char quote);
-void		operator_start(t_wordlist **wordlist, t_buffer **token, char c,
-	bool *currently_in_word, bool *currently_in_operator);
+t_wordlist	*delimit(t_token_build *token_build);
+t_buffer	*push_char(t_token_build *token_build, char c);
+t_buffer	*new_word(t_token_build *token_build, char c);
+char		quote_flip(t_token_build *token_build, char c);
+void		operator_start(t_token_build *token_build, char c);
+void		token_rule_1(t_token_build *token_build, char *original);
 
 t_wordlist	*minishell_wordsplit(char *original);
 
-#endif
+#endif
\ No newline at end of file
diff --git a/src/parser/wordsplit/wordsplit_utils.c b/src/parser/wordsplit/wordsplit_utils.c
index 714ec46..569d371 100644
--- a/src/parser/wordsplit/wordsplit_utils.c
+++ b/src/parser/wordsplit/wordsplit_utils.c
@@ -3,44 +3,55 @@
 /*                                                        :::      ::::::::   */
 /*   wordsplit_utils.c                                  :+:      :+:    :+:   */
 /*                                                    +:+ +:+         +:+     */
-/*   By: jguelen <marvin@42.fr>                     +#+  +:+       +#+        */
+/*   By: jguelen <jguelen@student.42.fr>            +#+  +:+       +#+        */
 /*                                                +#+#+#+#+#+   +#+           */
 /*   Created: 2025/02/19 14:27:47 by jguelen           #+#    #+#             */
-/*   Updated: 2025/02/19 14:56:36 by jguelen          ###   ########.fr       */
+/*   Updated: 2025/02/19 18:02:33 by jguelen          ###   ########.fr       */
 /*                                                                            */
 /* ************************************************************************** */
 
 #include "wordsplit.h"
 
-t_buffer	*push_char(t_buffer *token, char c)
+t_wordlist	*delimit(t_token_build *token_build)
 {
-	if (token == NULL)
-		token = ft_buffer_new();
-	return (ft_buffer_pushchar(token, c));
+	if (token_build->cur_token == NULL)
+		return (token_build->wordlist);
+	token_build->wordlist = wordlist_push(token_build->wordlist,
+			worddesc_create(token_build->cur_token->buffer));
+	free(token_build->cur_token);
+	token_build->cur_token = NULL;
+	token_build->currently_in_word = false;
+	token_build->currently_in_operator = false;
+	return (token_build->wordlist);
 }
 
-t_buffer	*new_word(t_buffer *token, char c, bool *currently_in_word)
+t_buffer	*push_char(t_token_build *token_build, char c)
 {
-	token = push_char(token, c);
-	(*currently_in_word) = true;
-	return (token);
+	if (token_build->cur_token == NULL)
+		token_build->cur_token = ft_buffer_new();
+	return (ft_buffer_pushchar(token_build->cur_token, c));
 }
 
-char	quote_flip(t_buffer **token, char c, char quote)
+t_buffer	*new_word(t_token_build *token_build, char c)
 {
-	if (quote == '\0')
-		quote = c;
-	else if (quote == c)
-		quote = '\0';
-	(*token) = push_char((*token), c);
-	return (quote);
+	token_build->cur_token = push_char(token_build->cur_token, c);
+	token_build->currently_in_word = true;
+	return (token_build->cur_token);
 }
 
-void	operator_start(t_wordlist **wordlist, t_buffer **token, char c,
-	bool *currently_in_word, bool *currently_in_operator)
+char	quote_flip(t_token_build *token_build, char c)
 {
-	(*wordlist) = delimit(*wordlist, token, currently_in_word,
-			currently_in_operator);
-	(*token) = new_word(*token, c, currently_in_word);
-	(*currently_in_operator) = true;
+	if (token_build->quote == '\0')
+		token_build->quote = c;
+	else if (token_build->quote == c)
+		token_build->quote = '\0';
+	token_build->cur_token = push_char(token_build->cur_token, c);
+	return (token_build->quote);
+}
+
+void	operator_start(t_token_build *token_build, char c)
+{
+	token_build->wordlist = delimit(token_build);
+	token_build->cur_token = new_word(token_build, c);
+	token_build->currently_in_operator = true;
 }