Word splitting refactor: Prototypes to be modified

2025-12-06 07:28:09 +01:00 · 2025-02-19 15:54:38 +01:00 · 2025-02-19 15:54:38 +01:00 · 0d0a14d21a
commit 0d0a14d21a
parent cba6fba845
7 changed files with 133 additions and 65 deletions
--- a/src/parser/matchers/operator_start.c
+++ b/src/parser/matchers/operator_start.c
@ -6,7 +6,7 @@
 /*   By: khais <marvin@42.fr>                       +#+  +:+       +#+        */
 /*                                                +#+#+#+#+#+   +#+           */
 /*   Created: 2025/02/17 16:21:03 by khais             #+#    #+#             */
-/*   Updated: 2025/02/17 16:22:40 by khais            ###   ########.fr       */
+/*   Updated: 2025/02/18 17:53:13 by jguelen          ###   ########.fr       */
 /*                                                                            */
 /* ************************************************************************** */

@ -22,5 +22,4 @@ bool	is_operator_start(char c)
 		return (true);
 	else
 		return (false);
-
 }
--- a/src/parser/worddesc/worddesc.h
+++ b/src/parser/worddesc/worddesc.h
@ -6,13 +6,22 @@
 /*   By: khais <marvin@42.fr>                       +#+  +:+       +#+        */
 /*                                                +#+#+#+#+#+   +#+           */
 /*   Created: 2025/02/13 15:47:58 by khais             #+#    #+#             */
-/*   Updated: 2025/02/14 13:57:10 by khais            ###   ########.fr       */
+/*   Updated: 2025/02/18 17:44:57 by jguelen          ###   ########.fr       */
 /*                                                                            */
 /* ************************************************************************** */

 #ifndef WORDDESC_H
 # define WORDDESC_H

+/* Possible values for the `flags' field of a WORD_DESC. */
+# define W_HASDOLLAR	0b1	/* Dollar sign present. */
+# define W_QUOTED	0b10	/* Some form of quote character is present. */
+# define W_ASSNBLTIN	0b100	/* word is a builtin command that takes
+								assignments */
+# define W_ASSIGNARG	0b1000	/* word is assignment argument to command */
+# define W_HASQUOTEDNULL	0b10000	/* word contains a quoted null character */
+# define W_DQUOTE	0b100000	/* word should be treated as if double-quoted */
+
 /*
 ** A logical word for the parser.
 **
@ -26,6 +35,7 @@ typedef struct s_worddesc
 	** The word itself
 	*/
 	char	*word;
+	char	flags;
 }	t_worddesc;

 t_worddesc	*worddesc_create(char *word);
--- a/src/parser/wordsplit/tokenizing_1_5.c
+++ b/src/parser/wordsplit/tokenizing_1_5.c
@ -0,0 +1,15 @@
+/* ************************************************************************** */
+/*                                                                            */
+/*                                                        :::      ::::::::   */
+/*   tokenizing_1_5.c                                   :+:      :+:    :+:   */
+/*                                                    +:+ +:+         +:+     */
+/*   By: jguelen <marvin@42.fr>                     +#+  +:+       +#+        */
+/*                                                +#+#+#+#+#+   +#+           */
+/*   Created: 2025/02/19 13:20:01 by jguelen           #+#    #+#             */
+/*   Updated: 2025/02/19 13:20:49 by jguelen          ###   ########.fr       */
+/*                                                                            */
+/* ************************************************************************** */
+
+#include "wordsplit.h"
+
+
--- a/src/parser/wordsplit/tokenizing_6_10.c
+++ b/src/parser/wordsplit/tokenizing_6_10.c
@ -0,0 +1,15 @@
+/* ************************************************************************** */
+/*                                                                            */
+/*                                                        :::      ::::::::   */
+/*   tokenizing_6_10.c                                  :+:      :+:    :+:   */
+/*                                                    +:+ +:+         +:+     */
+/*   By: jguelen <marvin@42.fr>                     +#+  +:+       +#+        */
+/*                                                +#+#+#+#+#+   +#+           */
+/*   Created: 2025/02/19 13:21:18 by jguelen           #+#    #+#             */
+/*   Updated: 2025/02/19 13:21:36 by jguelen          ###   ########.fr       */
+/*                                                                            */
+/* ************************************************************************** */
+
+#include "wordsplit.h"
+
+
--- a/src/parser/wordsplit/wordsplit.c
+++ b/src/parser/wordsplit/wordsplit.c
@ -3,21 +3,17 @@
 /*                                                        :::      ::::::::   */
 /*   wordsplit.c                                        :+:      :+:    :+:   */
 /*                                                    +:+ +:+         +:+     */
-/*   By: khais <marvin@42.fr>                       +#+  +:+       +#+        */
+/*   By: jguelen <jguelen@student.42.fr>            +#+  +:+       +#+        */
 /*                                                +#+#+#+#+#+   +#+           */
 /*   Created: 2025/02/13 17:02:32 by khais             #+#    #+#             */
-/*   Updated: 2025/02/17 16:41:00 by khais            ###   ########.fr       */
+/*   Updated: 2025/02/19 15:17:22 by jguelen          ###   ########.fr       */
 /*                                                                            */
 /* ************************************************************************** */

 #include "wordsplit.h"
-#include "../../buffer/buffer.h"
-#include "../matchers/blank.h"
-#include "../matchers/operator_start.h"
-#include "../matchers/operator_combo.h"
-#include <stdlib.h>

-static t_wordlist	*delimit(t_wordlist *wordlist, t_buffer **token, bool *currently_in_word, bool *currently_in_operator)
+static t_wordlist	*delimit(t_wordlist *wordlist, t_buffer **token,
+	bool *currently_in_word, bool *currently_in_operator)
 {
 	if ((*token) == NULL)
 		return (wordlist);
@ -29,38 +25,6 @@ static t_wordlist	*delimit(t_wordlist *wordlist, t_buffer **token, bool *current
 	return (wordlist);
 }

-static t_buffer	*push_char(t_buffer *token, char c)
-{
-	if (token == NULL)
-		token = ft_buffer_new();
-	return (ft_buffer_pushchar(token, c));
-}
-
-
-static t_buffer	*new_word(t_buffer *token, char c, bool *currently_in_word)
-{
-	token = push_char(token, c);
-	(*currently_in_word) = true;
-	return (token);
-}
-
-static char	quote_flip(t_buffer **token, char c, char quote)
-{
-	if (quote == '\0')
-		quote = c;
-	else if (quote == c)
-		quote = '\0';
-	(*token) = push_char((*token), c);
-	return (quote);
-}
-
-static void	operator_start(t_wordlist **wordlist, t_buffer **token, char c, bool *currently_in_word, bool *currently_in_operator)
-{
-	(*wordlist) = delimit(*wordlist, token, currently_in_word, currently_in_operator);
-	(*token) = new_word(*token, c, currently_in_word);
-	(*currently_in_operator) = true;
-}
-
 /*
 ** split a string into words, respecting quotes etc.
 **
@ -73,38 +37,32 @@ static void	operator_start(t_wordlist **wordlist, t_buffer **token, char c, bool
 */
 t_wordlist	*minishell_wordsplit(char *original)
 {
-	size_t		idx;
-	t_wordlist	*wordlist;
-	t_buffer	*token;
-	bool		currently_in_word;
-	bool		currently_in_operator;
-	char		quote;
+	t_token_build	token_build;

-	idx = 0;
-	wordlist = NULL;
-	token = NULL;
-	currently_in_word = false;
-	currently_in_operator = false;
-	quote = '\0';
+	ft_bzero(&token_build);
 	while (true)
 	{
 		// 1. If the end of input is recognized, the current token (if any)
 		// shall be delimited.
 		if (original[idx] == '\0')
-			wordlist = delimit(wordlist, &token, &currently_in_word, &currently_in_operator);
+			wordlist = delimit(wordlist, &token, &currently_in_word,
+					&currently_in_operator);
 		// 2. If the previous character was used as part of an operator and the
 		// current character is not quoted and can be used with the previous
 		// characters to form an operator, it shall be used as part of that
 		// (operator) token.
-		else if (currently_in_operator && quote == '\0' && is_operator_combo(token->buffer, original[idx]))
+		else if (currently_in_operator && quote == '\0'
+			&& is_operator_combo(token->buffer, original[idx]))
 			token = push_char(token, original[idx]);
 		// 3. If the previous character was used as part of an operator and the
 		// current character cannot be used with the previous characters to form
 		// an operator, the operator containing the previous character shall be
 		// delimited.
-		else if (currently_in_operator && quote == '\0' && !is_operator_combo(token->buffer, original[idx]))
+		else if (currently_in_operator && quote == '\0'
+			&& !is_operator_combo(token->buffer, original[idx]))
 		{
-			wordlist = delimit(wordlist, &token, &currently_in_word, &currently_in_operator);
+			wordlist = delimit(wordlist, &token, &currently_in_word,
+					&currently_in_operator);
 			continue ;
 		}
 		// 4. If the current character is single-quote, or double-quote and it
@ -122,12 +80,14 @@ t_wordlist	*minishell_wordsplit(char *original)
 		// be delimited. The current character shall be used as the beginning of
 		// the next (operator) token.
 		else if (quote == '\0' && is_operator_start(original[idx]))
-			operator_start(&wordlist, &token, original[idx], &currently_in_word, &currently_in_operator);
+			operator_start(&wordlist, &token, original[idx],
+				&currently_in_word, &currently_in_operator);
 		// If the current character is an unquoted <blank>, any token containing
 		// the previous character is delimited and the current character shall
 		// be discarded.
 		else if (is_blank(original[idx]) && quote == '\0')
-			wordlist = delimit(wordlist, &token, &currently_in_word, &currently_in_operator);
+			wordlist = delimit(wordlist, &token, &currently_in_word,
+					&currently_in_operator);
 		// If the previous character was part of a word, the current character
 		// shall be appended to that word.
 		else if (currently_in_word)
@ -139,7 +99,7 @@ t_wordlist	*minishell_wordsplit(char *original)
 			break ;
 		idx++;
 	}
-	if (quote != '\0')
-		return (wordlist_destroy(wordlist), NULL);
-	return (wordlist);
+	if (token_build.quote != '\0')
+		return (wordlist_destroy(token_build.wordlist), NULL);
+	return (token_build.wordlist);
 }
--- a/src/parser/wordsplit/wordsplit.h
+++ b/src/parser/wordsplit/wordsplit.h
@ -6,7 +6,7 @@
 /*   By: khais <marvin@42.fr>                       +#+  +:+       +#+        */
 /*                                                +#+#+#+#+#+   +#+           */
 /*   Created: 2025/02/13 15:52:48 by khais             #+#    #+#             */
-/*   Updated: 2025/02/13 15:54:30 by khais            ###   ########.fr       */
+/*   Updated: 2025/02/19 15:17:02 by jguelen          ###   ########.fr       */
 /*                                                                            */
 /* ************************************************************************** */

@ -14,6 +14,29 @@
 # define WORDSPLIT_H

 # include "../wordlist/wordlist.h"
+# include <stdbool.h>
+# include "../../buffer/buffer.h"
+# include "../matchers/blank.h"
+# include "../matchers/operator_start.h"
+# include "../matchers/operator_combo.h"
+# include <stdlib.h>
+
+typedef struct s_token_build
+{
+	t_wordlist	*wordlist;
+	t_buffer	*cur_token;
+	bool		currently_in_word;
+	bool		currently_in_operator;
+	char		quote;
+	size_t		current_index;
+}	t_token_build;
+
+
+t_buffer	*push_char(t_buffer *token, char c);
+t_buffer	*new_word(t_buffer *token, char c, bool *currently_in_word);
+char		quote_flip(t_buffer **token, char c, char quote);
+void		operator_start(t_wordlist **wordlist, t_buffer **token, char c,
+	bool *currently_in_word, bool *currently_in_operator);

 t_wordlist	*minishell_wordsplit(char *original);

--- a/src/parser/wordsplit/wordsplit_utils.c
+++ b/src/parser/wordsplit/wordsplit_utils.c
@ -0,0 +1,46 @@
+/* ************************************************************************** */
+/*                                                                            */
+/*                                                        :::      ::::::::   */
+/*   wordsplit_utils.c                                  :+:      :+:    :+:   */
+/*                                                    +:+ +:+         +:+     */
+/*   By: jguelen <marvin@42.fr>                     +#+  +:+       +#+        */
+/*                                                +#+#+#+#+#+   +#+           */
+/*   Created: 2025/02/19 14:27:47 by jguelen           #+#    #+#             */
+/*   Updated: 2025/02/19 14:56:36 by jguelen          ###   ########.fr       */
+/*                                                                            */
+/* ************************************************************************** */
+
+#include "wordsplit.h"
+
+t_buffer	*push_char(t_buffer *token, char c)
+{
+	if (token == NULL)
+		token = ft_buffer_new();
+	return (ft_buffer_pushchar(token, c));
+}
+
+t_buffer	*new_word(t_buffer *token, char c, bool *currently_in_word)
+{
+	token = push_char(token, c);
+	(*currently_in_word) = true;
+	return (token);
+}
+
+char	quote_flip(t_buffer **token, char c, char quote)
+{
+	if (quote == '\0')
+		quote = c;
+	else if (quote == c)
+		quote = '\0';
+	(*token) = push_char((*token), c);
+	return (quote);
+}
+
+void	operator_start(t_wordlist **wordlist, t_buffer **token, char c,
+	bool *currently_in_word, bool *currently_in_operator)
+{
+	(*wordlist) = delimit(*wordlist, token, currently_in_word,
+			currently_in_operator);
+	(*token) = new_word(*token, c, currently_in_word);
+	(*currently_in_operator) = true;
+}