From 0d0a14d21a0b3d6bdc1df37eebe7237cc76bbdd0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Gu=C3=A9len?= Date: Wed, 19 Feb 2025 15:54:38 +0100 Subject: [PATCH] Word splitting refactor: Prototypes to be modified --- src/parser/matchers/operator_start.c | 3 +- src/parser/worddesc/worddesc.h | 12 +++- src/parser/wordsplit/tokenizing_1_5.c | 15 +++++ src/parser/wordsplit/tokenizing_6_10.c | 15 +++++ src/parser/wordsplit/wordsplit.c | 82 +++++++------------------- src/parser/wordsplit/wordsplit.h | 25 +++++++- src/parser/wordsplit/wordsplit_utils.c | 46 +++++++++++++++ 7 files changed, 133 insertions(+), 65 deletions(-) create mode 100644 src/parser/wordsplit/tokenizing_1_5.c create mode 100644 src/parser/wordsplit/tokenizing_6_10.c create mode 100644 src/parser/wordsplit/wordsplit_utils.c diff --git a/src/parser/matchers/operator_start.c b/src/parser/matchers/operator_start.c index 4184964..afe5481 100644 --- a/src/parser/matchers/operator_start.c +++ b/src/parser/matchers/operator_start.c @@ -6,7 +6,7 @@ /* By: khais +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2025/02/17 16:21:03 by khais #+# #+# */ -/* Updated: 2025/02/17 16:22:40 by khais ### ########.fr */ +/* Updated: 2025/02/18 17:53:13 by jguelen ### ########.fr */ /* */ /* ************************************************************************** */ @@ -22,5 +22,4 @@ bool is_operator_start(char c) return (true); else return (false); - } diff --git a/src/parser/worddesc/worddesc.h b/src/parser/worddesc/worddesc.h index 3fbf19b..c50bc40 100644 --- a/src/parser/worddesc/worddesc.h +++ b/src/parser/worddesc/worddesc.h @@ -6,13 +6,22 @@ /* By: khais +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2025/02/13 15:47:58 by khais #+# #+# */ -/* Updated: 2025/02/14 13:57:10 by khais ### ########.fr */ +/* Updated: 2025/02/18 17:44:57 by jguelen ### ########.fr */ /* */ /* ************************************************************************** */ #ifndef WORDDESC_H # define WORDDESC_H +/* Possible values for the `flags' field of a WORD_DESC. */ +# define W_HASDOLLAR 0b1 /* Dollar sign present. */ +# define W_QUOTED 0b10 /* Some form of quote character is present. */ +# define W_ASSNBLTIN 0b100 /* word is a builtin command that takes + assignments */ +# define W_ASSIGNARG 0b1000 /* word is assignment argument to command */ +# define W_HASQUOTEDNULL 0b10000 /* word contains a quoted null character */ +# define W_DQUOTE 0b100000 /* word should be treated as if double-quoted */ + /* ** A logical word for the parser. ** @@ -26,6 +35,7 @@ typedef struct s_worddesc ** The word itself */ char *word; + char flags; } t_worddesc; t_worddesc *worddesc_create(char *word); diff --git a/src/parser/wordsplit/tokenizing_1_5.c b/src/parser/wordsplit/tokenizing_1_5.c new file mode 100644 index 0000000..2b3d379 --- /dev/null +++ b/src/parser/wordsplit/tokenizing_1_5.c @@ -0,0 +1,15 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* tokenizing_1_5.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: jguelen +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2025/02/19 13:20:01 by jguelen #+# #+# */ +/* Updated: 2025/02/19 13:20:49 by jguelen ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "wordsplit.h" + + diff --git a/src/parser/wordsplit/tokenizing_6_10.c b/src/parser/wordsplit/tokenizing_6_10.c new file mode 100644 index 0000000..cc316fe --- /dev/null +++ b/src/parser/wordsplit/tokenizing_6_10.c @@ -0,0 +1,15 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* tokenizing_6_10.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: jguelen +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2025/02/19 13:21:18 by jguelen #+# #+# */ +/* Updated: 2025/02/19 13:21:36 by jguelen ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "wordsplit.h" + + diff --git a/src/parser/wordsplit/wordsplit.c b/src/parser/wordsplit/wordsplit.c index c0756c6..e37379f 100644 --- a/src/parser/wordsplit/wordsplit.c +++ b/src/parser/wordsplit/wordsplit.c @@ -3,21 +3,17 @@ /* ::: :::::::: */ /* wordsplit.c :+: :+: :+: */ /* +:+ +:+ +:+ */ -/* By: khais +#+ +:+ +#+ */ +/* By: jguelen +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2025/02/13 17:02:32 by khais #+# #+# */ -/* Updated: 2025/02/17 16:41:00 by khais ### ########.fr */ +/* Updated: 2025/02/19 15:17:22 by jguelen ### ########.fr */ /* */ /* ************************************************************************** */ #include "wordsplit.h" -#include "../../buffer/buffer.h" -#include "../matchers/blank.h" -#include "../matchers/operator_start.h" -#include "../matchers/operator_combo.h" -#include -static t_wordlist *delimit(t_wordlist *wordlist, t_buffer **token, bool *currently_in_word, bool *currently_in_operator) +static t_wordlist *delimit(t_wordlist *wordlist, t_buffer **token, + bool *currently_in_word, bool *currently_in_operator) { if ((*token) == NULL) return (wordlist); @@ -29,38 +25,6 @@ static t_wordlist *delimit(t_wordlist *wordlist, t_buffer **token, bool *current return (wordlist); } -static t_buffer *push_char(t_buffer *token, char c) -{ - if (token == NULL) - token = ft_buffer_new(); - return (ft_buffer_pushchar(token, c)); -} - - -static t_buffer *new_word(t_buffer *token, char c, bool *currently_in_word) -{ - token = push_char(token, c); - (*currently_in_word) = true; - return (token); -} - -static char quote_flip(t_buffer **token, char c, char quote) -{ - if (quote == '\0') - quote = c; - else if (quote == c) - quote = '\0'; - (*token) = push_char((*token), c); - return (quote); -} - -static void operator_start(t_wordlist **wordlist, t_buffer **token, char c, bool *currently_in_word, bool *currently_in_operator) -{ - (*wordlist) = delimit(*wordlist, token, currently_in_word, currently_in_operator); - (*token) = new_word(*token, c, currently_in_word); - (*currently_in_operator) = true; -} - /* ** split a string into words, respecting quotes etc. ** @@ -73,38 +37,32 @@ static void operator_start(t_wordlist **wordlist, t_buffer **token, char c, bool */ t_wordlist *minishell_wordsplit(char *original) { - size_t idx; - t_wordlist *wordlist; - t_buffer *token; - bool currently_in_word; - bool currently_in_operator; - char quote; + t_token_build token_build; - idx = 0; - wordlist = NULL; - token = NULL; - currently_in_word = false; - currently_in_operator = false; - quote = '\0'; + ft_bzero(&token_build); while (true) { // 1. If the end of input is recognized, the current token (if any) // shall be delimited. if (original[idx] == '\0') - wordlist = delimit(wordlist, &token, ¤tly_in_word, ¤tly_in_operator); + wordlist = delimit(wordlist, &token, ¤tly_in_word, + ¤tly_in_operator); // 2. If the previous character was used as part of an operator and the // current character is not quoted and can be used with the previous // characters to form an operator, it shall be used as part of that // (operator) token. - else if (currently_in_operator && quote == '\0' && is_operator_combo(token->buffer, original[idx])) + else if (currently_in_operator && quote == '\0' + && is_operator_combo(token->buffer, original[idx])) token = push_char(token, original[idx]); // 3. If the previous character was used as part of an operator and the // current character cannot be used with the previous characters to form // an operator, the operator containing the previous character shall be // delimited. - else if (currently_in_operator && quote == '\0' && !is_operator_combo(token->buffer, original[idx])) + else if (currently_in_operator && quote == '\0' + && !is_operator_combo(token->buffer, original[idx])) { - wordlist = delimit(wordlist, &token, ¤tly_in_word, ¤tly_in_operator); + wordlist = delimit(wordlist, &token, ¤tly_in_word, + ¤tly_in_operator); continue ; } // 4. If the current character is single-quote, or double-quote and it @@ -122,12 +80,14 @@ t_wordlist *minishell_wordsplit(char *original) // be delimited. The current character shall be used as the beginning of // the next (operator) token. else if (quote == '\0' && is_operator_start(original[idx])) - operator_start(&wordlist, &token, original[idx], ¤tly_in_word, ¤tly_in_operator); + operator_start(&wordlist, &token, original[idx], + ¤tly_in_word, ¤tly_in_operator); // If the current character is an unquoted , any token containing // the previous character is delimited and the current character shall // be discarded. else if (is_blank(original[idx]) && quote == '\0') - wordlist = delimit(wordlist, &token, ¤tly_in_word, ¤tly_in_operator); + wordlist = delimit(wordlist, &token, ¤tly_in_word, + ¤tly_in_operator); // If the previous character was part of a word, the current character // shall be appended to that word. else if (currently_in_word) @@ -139,7 +99,7 @@ t_wordlist *minishell_wordsplit(char *original) break ; idx++; } - if (quote != '\0') - return (wordlist_destroy(wordlist), NULL); - return (wordlist); + if (token_build.quote != '\0') + return (wordlist_destroy(token_build.wordlist), NULL); + return (token_build.wordlist); } diff --git a/src/parser/wordsplit/wordsplit.h b/src/parser/wordsplit/wordsplit.h index 58e5d80..c00a61b 100644 --- a/src/parser/wordsplit/wordsplit.h +++ b/src/parser/wordsplit/wordsplit.h @@ -6,7 +6,7 @@ /* By: khais +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2025/02/13 15:52:48 by khais #+# #+# */ -/* Updated: 2025/02/13 15:54:30 by khais ### ########.fr */ +/* Updated: 2025/02/19 15:17:02 by jguelen ### ########.fr */ /* */ /* ************************************************************************** */ @@ -14,6 +14,29 @@ # define WORDSPLIT_H # include "../wordlist/wordlist.h" +# include +# include "../../buffer/buffer.h" +# include "../matchers/blank.h" +# include "../matchers/operator_start.h" +# include "../matchers/operator_combo.h" +# include + +typedef struct s_token_build +{ + t_wordlist *wordlist; + t_buffer *cur_token; + bool currently_in_word; + bool currently_in_operator; + char quote; + size_t current_index; +} t_token_build; + + +t_buffer *push_char(t_buffer *token, char c); +t_buffer *new_word(t_buffer *token, char c, bool *currently_in_word); +char quote_flip(t_buffer **token, char c, char quote); +void operator_start(t_wordlist **wordlist, t_buffer **token, char c, + bool *currently_in_word, bool *currently_in_operator); t_wordlist *minishell_wordsplit(char *original); diff --git a/src/parser/wordsplit/wordsplit_utils.c b/src/parser/wordsplit/wordsplit_utils.c new file mode 100644 index 0000000..714ec46 --- /dev/null +++ b/src/parser/wordsplit/wordsplit_utils.c @@ -0,0 +1,46 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* wordsplit_utils.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: jguelen +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2025/02/19 14:27:47 by jguelen #+# #+# */ +/* Updated: 2025/02/19 14:56:36 by jguelen ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "wordsplit.h" + +t_buffer *push_char(t_buffer *token, char c) +{ + if (token == NULL) + token = ft_buffer_new(); + return (ft_buffer_pushchar(token, c)); +} + +t_buffer *new_word(t_buffer *token, char c, bool *currently_in_word) +{ + token = push_char(token, c); + (*currently_in_word) = true; + return (token); +} + +char quote_flip(t_buffer **token, char c, char quote) +{ + if (quote == '\0') + quote = c; + else if (quote == c) + quote = '\0'; + (*token) = push_char((*token), c); + return (quote); +} + +void operator_start(t_wordlist **wordlist, t_buffer **token, char c, + bool *currently_in_word, bool *currently_in_operator) +{ + (*wordlist) = delimit(*wordlist, token, currently_in_word, + currently_in_operator); + (*token) = new_word(*token, c, currently_in_word); + (*currently_in_operator) = true; +}