From 0d0a14d21a0b3d6bdc1df37eebe7237cc76bbdd0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Gu=C3=A9len?=
 <jguelen@student.42lehavre.fr>
Date: Wed, 19 Feb 2025 15:54:38 +0100
Subject: [PATCH] Word splitting refactor: Prototypes to be modified

---
 src/parser/matchers/operator_start.c   |  3 +-
 src/parser/worddesc/worddesc.h         | 12 +++-
 src/parser/wordsplit/tokenizing_1_5.c  | 15 +++++
 src/parser/wordsplit/tokenizing_6_10.c | 15 +++++
 src/parser/wordsplit/wordsplit.c       | 82 +++++++-------------------
 src/parser/wordsplit/wordsplit.h       | 25 +++++++-
 src/parser/wordsplit/wordsplit_utils.c | 46 +++++++++++++++
 7 files changed, 133 insertions(+), 65 deletions(-)
 create mode 100644 src/parser/wordsplit/tokenizing_1_5.c
 create mode 100644 src/parser/wordsplit/tokenizing_6_10.c
 create mode 100644 src/parser/wordsplit/wordsplit_utils.c

diff --git a/src/parser/matchers/operator_start.c b/src/parser/matchers/operator_start.c
index 4184964..afe5481 100644
--- a/src/parser/matchers/operator_start.c
+++ b/src/parser/matchers/operator_start.c
@@ -6,7 +6,7 @@
 /*   By: khais <marvin@42.fr>                       +#+  +:+       +#+        */
 /*                                                +#+#+#+#+#+   +#+           */
 /*   Created: 2025/02/17 16:21:03 by khais             #+#    #+#             */
-/*   Updated: 2025/02/17 16:22:40 by khais            ###   ########.fr       */
+/*   Updated: 2025/02/18 17:53:13 by jguelen          ###   ########.fr       */
 /*                                                                            */
 /* ************************************************************************** */
 
@@ -22,5 +22,4 @@ bool	is_operator_start(char c)
 		return (true);
 	else
 		return (false);
-
 }
diff --git a/src/parser/worddesc/worddesc.h b/src/parser/worddesc/worddesc.h
index 3fbf19b..c50bc40 100644
--- a/src/parser/worddesc/worddesc.h
+++ b/src/parser/worddesc/worddesc.h
@@ -6,13 +6,22 @@
 /*   By: khais <marvin@42.fr>                       +#+  +:+       +#+        */
 /*                                                +#+#+#+#+#+   +#+           */
 /*   Created: 2025/02/13 15:47:58 by khais             #+#    #+#             */
-/*   Updated: 2025/02/14 13:57:10 by khais            ###   ########.fr       */
+/*   Updated: 2025/02/18 17:44:57 by jguelen          ###   ########.fr       */
 /*                                                                            */
 /* ************************************************************************** */
 
 #ifndef WORDDESC_H
 # define WORDDESC_H
 
+/* Possible values for the `flags' field of a WORD_DESC. */
+# define W_HASDOLLAR	0b1	/* Dollar sign present. */
+# define W_QUOTED	0b10	/* Some form of quote character is present. */
+# define W_ASSNBLTIN	0b100	/* word is a builtin command that takes
+								assignments */
+# define W_ASSIGNARG	0b1000	/* word is assignment argument to command */
+# define W_HASQUOTEDNULL	0b10000	/* word contains a quoted null character */
+# define W_DQUOTE	0b100000	/* word should be treated as if double-quoted */
+
 /*
 ** A logical word for the parser.
 **
@@ -26,6 +35,7 @@ typedef struct s_worddesc
 	** The word itself
 	*/
 	char	*word;
+	char	flags;
 }	t_worddesc;
 
 t_worddesc	*worddesc_create(char *word);
diff --git a/src/parser/wordsplit/tokenizing_1_5.c b/src/parser/wordsplit/tokenizing_1_5.c
new file mode 100644
index 0000000..2b3d379
--- /dev/null
+++ b/src/parser/wordsplit/tokenizing_1_5.c
@@ -0,0 +1,15 @@
+/* ************************************************************************** */
+/*                                                                            */
+/*                                                        :::      ::::::::   */
+/*   tokenizing_1_5.c                                   :+:      :+:    :+:   */
+/*                                                    +:+ +:+         +:+     */
+/*   By: jguelen <marvin@42.fr>                     +#+  +:+       +#+        */
+/*                                                +#+#+#+#+#+   +#+           */
+/*   Created: 2025/02/19 13:20:01 by jguelen           #+#    #+#             */
+/*   Updated: 2025/02/19 13:20:49 by jguelen          ###   ########.fr       */
+/*                                                                            */
+/* ************************************************************************** */
+
+#include "wordsplit.h"
+
+
diff --git a/src/parser/wordsplit/tokenizing_6_10.c b/src/parser/wordsplit/tokenizing_6_10.c
new file mode 100644
index 0000000..cc316fe
--- /dev/null
+++ b/src/parser/wordsplit/tokenizing_6_10.c
@@ -0,0 +1,15 @@
+/* ************************************************************************** */
+/*                                                                            */
+/*                                                        :::      ::::::::   */
+/*   tokenizing_6_10.c                                  :+:      :+:    :+:   */
+/*                                                    +:+ +:+         +:+     */
+/*   By: jguelen <marvin@42.fr>                     +#+  +:+       +#+        */
+/*                                                +#+#+#+#+#+   +#+           */
+/*   Created: 2025/02/19 13:21:18 by jguelen           #+#    #+#             */
+/*   Updated: 2025/02/19 13:21:36 by jguelen          ###   ########.fr       */
+/*                                                                            */
+/* ************************************************************************** */
+
+#include "wordsplit.h"
+
+
diff --git a/src/parser/wordsplit/wordsplit.c b/src/parser/wordsplit/wordsplit.c
index c0756c6..e37379f 100644
--- a/src/parser/wordsplit/wordsplit.c
+++ b/src/parser/wordsplit/wordsplit.c
@@ -3,21 +3,17 @@
 /*                                                        :::      ::::::::   */
 /*   wordsplit.c                                        :+:      :+:    :+:   */
 /*                                                    +:+ +:+         +:+     */
-/*   By: khais <marvin@42.fr>                       +#+  +:+       +#+        */
+/*   By: jguelen <jguelen@student.42.fr>            +#+  +:+       +#+        */
 /*                                                +#+#+#+#+#+   +#+           */
 /*   Created: 2025/02/13 17:02:32 by khais             #+#    #+#             */
-/*   Updated: 2025/02/17 16:41:00 by khais            ###   ########.fr       */
+/*   Updated: 2025/02/19 15:17:22 by jguelen          ###   ########.fr       */
 /*                                                                            */
 /* ************************************************************************** */
 
 #include "wordsplit.h"
-#include "../../buffer/buffer.h"
-#include "../matchers/blank.h"
-#include "../matchers/operator_start.h"
-#include "../matchers/operator_combo.h"
-#include <stdlib.h>
 
-static t_wordlist	*delimit(t_wordlist *wordlist, t_buffer **token, bool *currently_in_word, bool *currently_in_operator)
+static t_wordlist	*delimit(t_wordlist *wordlist, t_buffer **token,
+	bool *currently_in_word, bool *currently_in_operator)
 {
 	if ((*token) == NULL)
 		return (wordlist);
@@ -29,38 +25,6 @@ static t_wordlist	*delimit(t_wordlist *wordlist, t_buffer **token, bool *current
 	return (wordlist);
 }
 
-static t_buffer	*push_char(t_buffer *token, char c)
-{
-	if (token == NULL)
-		token = ft_buffer_new();
-	return (ft_buffer_pushchar(token, c));
-}
-
-
-static t_buffer	*new_word(t_buffer *token, char c, bool *currently_in_word)
-{
-	token = push_char(token, c);
-	(*currently_in_word) = true;
-	return (token);
-}
-
-static char	quote_flip(t_buffer **token, char c, char quote)
-{
-	if (quote == '\0')
-		quote = c;
-	else if (quote == c)
-		quote = '\0';
-	(*token) = push_char((*token), c);
-	return (quote);
-}
-
-static void	operator_start(t_wordlist **wordlist, t_buffer **token, char c, bool *currently_in_word, bool *currently_in_operator)
-{
-	(*wordlist) = delimit(*wordlist, token, currently_in_word, currently_in_operator);
-	(*token) = new_word(*token, c, currently_in_word);
-	(*currently_in_operator) = true;
-}
-
 /*
 ** split a string into words, respecting quotes etc.
 **
@@ -73,38 +37,32 @@ static void	operator_start(t_wordlist **wordlist, t_buffer **token, char c, bool
 */
 t_wordlist	*minishell_wordsplit(char *original)
 {
-	size_t		idx;
-	t_wordlist	*wordlist;
-	t_buffer	*token;
-	bool		currently_in_word;
-	bool		currently_in_operator;
-	char		quote;
+	t_token_build	token_build;
 
-	idx = 0;
-	wordlist = NULL;
-	token = NULL;
-	currently_in_word = false;
-	currently_in_operator = false;
-	quote = '\0';
+	ft_bzero(&token_build);
 	while (true)
 	{
 		// 1. If the end of input is recognized, the current token (if any)
 		// shall be delimited.
 		if (original[idx] == '\0')
-			wordlist = delimit(wordlist, &token, &currently_in_word, &currently_in_operator);
+			wordlist = delimit(wordlist, &token, &currently_in_word,
+					&currently_in_operator);
 		// 2. If the previous character was used as part of an operator and the
 		// current character is not quoted and can be used with the previous
 		// characters to form an operator, it shall be used as part of that
 		// (operator) token.
-		else if (currently_in_operator && quote == '\0' && is_operator_combo(token->buffer, original[idx]))
+		else if (currently_in_operator && quote == '\0'
+			&& is_operator_combo(token->buffer, original[idx]))
 			token = push_char(token, original[idx]);
 		// 3. If the previous character was used as part of an operator and the
 		// current character cannot be used with the previous characters to form
 		// an operator, the operator containing the previous character shall be
 		// delimited.
-		else if (currently_in_operator && quote == '\0' && !is_operator_combo(token->buffer, original[idx]))
+		else if (currently_in_operator && quote == '\0'
+			&& !is_operator_combo(token->buffer, original[idx]))
 		{
-			wordlist = delimit(wordlist, &token, &currently_in_word, &currently_in_operator);
+			wordlist = delimit(wordlist, &token, &currently_in_word,
+					&currently_in_operator);
 			continue ;
 		}
 		// 4. If the current character is single-quote, or double-quote and it
@@ -122,12 +80,14 @@ t_wordlist	*minishell_wordsplit(char *original)
 		// be delimited. The current character shall be used as the beginning of
 		// the next (operator) token.
 		else if (quote == '\0' && is_operator_start(original[idx]))
-			operator_start(&wordlist, &token, original[idx], &currently_in_word, &currently_in_operator);
+			operator_start(&wordlist, &token, original[idx],
+				&currently_in_word, &currently_in_operator);
 		// If the current character is an unquoted <blank>, any token containing
 		// the previous character is delimited and the current character shall
 		// be discarded.
 		else if (is_blank(original[idx]) && quote == '\0')
-			wordlist = delimit(wordlist, &token, &currently_in_word, &currently_in_operator);
+			wordlist = delimit(wordlist, &token, &currently_in_word,
+					&currently_in_operator);
 		// If the previous character was part of a word, the current character
 		// shall be appended to that word.
 		else if (currently_in_word)
@@ -139,7 +99,7 @@ t_wordlist	*minishell_wordsplit(char *original)
 			break ;
 		idx++;
 	}
-	if (quote != '\0')
-		return (wordlist_destroy(wordlist), NULL);
-	return (wordlist);
+	if (token_build.quote != '\0')
+		return (wordlist_destroy(token_build.wordlist), NULL);
+	return (token_build.wordlist);
 }
diff --git a/src/parser/wordsplit/wordsplit.h b/src/parser/wordsplit/wordsplit.h
index 58e5d80..c00a61b 100644
--- a/src/parser/wordsplit/wordsplit.h
+++ b/src/parser/wordsplit/wordsplit.h
@@ -6,7 +6,7 @@
 /*   By: khais <marvin@42.fr>                       +#+  +:+       +#+        */
 /*                                                +#+#+#+#+#+   +#+           */
 /*   Created: 2025/02/13 15:52:48 by khais             #+#    #+#             */
-/*   Updated: 2025/02/13 15:54:30 by khais            ###   ########.fr       */
+/*   Updated: 2025/02/19 15:17:02 by jguelen          ###   ########.fr       */
 /*                                                                            */
 /* ************************************************************************** */
 
@@ -14,6 +14,29 @@
 # define WORDSPLIT_H
 
 # include "../wordlist/wordlist.h"
+# include <stdbool.h>
+# include "../../buffer/buffer.h"
+# include "../matchers/blank.h"
+# include "../matchers/operator_start.h"
+# include "../matchers/operator_combo.h"
+# include <stdlib.h>
+
+typedef struct s_token_build
+{
+	t_wordlist	*wordlist;
+	t_buffer	*cur_token;
+	bool		currently_in_word;
+	bool		currently_in_operator;
+	char		quote;
+	size_t		current_index;
+}	t_token_build;
+
+
+t_buffer	*push_char(t_buffer *token, char c);
+t_buffer	*new_word(t_buffer *token, char c, bool *currently_in_word);
+char		quote_flip(t_buffer **token, char c, char quote);
+void		operator_start(t_wordlist **wordlist, t_buffer **token, char c,
+	bool *currently_in_word, bool *currently_in_operator);
 
 t_wordlist	*minishell_wordsplit(char *original);
 
diff --git a/src/parser/wordsplit/wordsplit_utils.c b/src/parser/wordsplit/wordsplit_utils.c
new file mode 100644
index 0000000..714ec46
--- /dev/null
+++ b/src/parser/wordsplit/wordsplit_utils.c
@@ -0,0 +1,46 @@
+/* ************************************************************************** */
+/*                                                                            */
+/*                                                        :::      ::::::::   */
+/*   wordsplit_utils.c                                  :+:      :+:    :+:   */
+/*                                                    +:+ +:+         +:+     */
+/*   By: jguelen <marvin@42.fr>                     +#+  +:+       +#+        */
+/*                                                +#+#+#+#+#+   +#+           */
+/*   Created: 2025/02/19 14:27:47 by jguelen           #+#    #+#             */
+/*   Updated: 2025/02/19 14:56:36 by jguelen          ###   ########.fr       */
+/*                                                                            */
+/* ************************************************************************** */
+
+#include "wordsplit.h"
+
+t_buffer	*push_char(t_buffer *token, char c)
+{
+	if (token == NULL)
+		token = ft_buffer_new();
+	return (ft_buffer_pushchar(token, c));
+}
+
+t_buffer	*new_word(t_buffer *token, char c, bool *currently_in_word)
+{
+	token = push_char(token, c);
+	(*currently_in_word) = true;
+	return (token);
+}
+
+char	quote_flip(t_buffer **token, char c, char quote)
+{
+	if (quote == '\0')
+		quote = c;
+	else if (quote == c)
+		quote = '\0';
+	(*token) = push_char((*token), c);
+	return (quote);
+}
+
+void	operator_start(t_wordlist **wordlist, t_buffer **token, char c,
+	bool *currently_in_word, bool *currently_in_operator)
+{
+	(*wordlist) = delimit(*wordlist, token, currently_in_word,
+			currently_in_operator);
+	(*token) = new_word(*token, c, currently_in_word);
+	(*currently_in_operator) = true;
+}