From 58be71725b8c48eaf1e7cc42fdf7005418619861 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Gu=C3=A9len?= Date: Wed, 19 Feb 2025 18:03:30 +0100 Subject: [PATCH] Wordsplitting: the preparation for a reswitch. --- src/parser/wordsplit/tokenizing_1_5.c | 16 +++++++- src/parser/wordsplit/wordsplit.c | 25 +++-------- src/parser/wordsplit/wordsplit.h | 18 ++++---- src/parser/wordsplit/wordsplit_utils.c | 57 +++++++++++++++----------- 4 files changed, 63 insertions(+), 53 deletions(-) diff --git a/src/parser/wordsplit/tokenizing_1_5.c b/src/parser/wordsplit/tokenizing_1_5.c index 2b3d379..0ca64de 100644 --- a/src/parser/wordsplit/tokenizing_1_5.c +++ b/src/parser/wordsplit/tokenizing_1_5.c @@ -6,10 +6,22 @@ /* By: jguelen +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2025/02/19 13:20:01 by jguelen #+# #+# */ -/* Updated: 2025/02/19 13:20:49 by jguelen ### ########.fr */ +/* Updated: 2025/02/19 18:01:39 by jguelen ### ########.fr */ /* */ /* ************************************************************************** */ #include "wordsplit.h" - +/* +** cf. Token Recognition section at +** https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html +*/ +/* +** 1. If the end of input is recognized, the current token (if any) shall be +** delimited. +*/ +void token_rule_1(t_token_build *token_build, char *original) +{ + if (original[token_build.current_index] == '\0') + token_build->wordlist = delimit(token_build); +} diff --git a/src/parser/wordsplit/wordsplit.c b/src/parser/wordsplit/wordsplit.c index e37379f..59c8f6d 100644 --- a/src/parser/wordsplit/wordsplit.c +++ b/src/parser/wordsplit/wordsplit.c @@ -6,25 +6,12 @@ /* By: jguelen +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2025/02/13 17:02:32 by khais #+# #+# */ -/* Updated: 2025/02/19 15:17:22 by jguelen ### ########.fr */ +/* Updated: 2025/02/19 16:58:47 by jguelen ### ########.fr */ /* */ /* ************************************************************************** */ #include "wordsplit.h" -static t_wordlist *delimit(t_wordlist *wordlist, t_buffer **token, - bool *currently_in_word, bool *currently_in_operator) -{ - if ((*token) == NULL) - return (wordlist); - wordlist = wordlist_push(wordlist, worddesc_create((*token)->buffer)); - free(*token); - (*token) = NULL; - (*currently_in_word) = false; - (*currently_in_operator) = false; - return (wordlist); -} - /* ** split a string into words, respecting quotes etc. ** @@ -44,16 +31,16 @@ t_wordlist *minishell_wordsplit(char *original) { // 1. If the end of input is recognized, the current token (if any) // shall be delimited. - if (original[idx] == '\0') - wordlist = delimit(wordlist, &token, ¤tly_in_word, + if (original[token_build.current_index] == '\0') + token_build.wordlist = delimit(wordlist, &token, ¤tly_in_word, ¤tly_in_operator); // 2. If the previous character was used as part of an operator and the // current character is not quoted and can be used with the previous // characters to form an operator, it shall be used as part of that // (operator) token. - else if (currently_in_operator && quote == '\0' - && is_operator_combo(token->buffer, original[idx])) - token = push_char(token, original[idx]); + else if (token_build.currently_in_operator && token_build.quote == '\0' + && is_operator_combo(token_build.cur_token->buffer, original[token_build.current_index])) + token_build.cur_token = push_char(token, original[idx]); // 3. If the previous character was used as part of an operator and the // current character cannot be used with the previous characters to form // an operator, the operator containing the previous character shall be diff --git a/src/parser/wordsplit/wordsplit.h b/src/parser/wordsplit/wordsplit.h index c00a61b..c58656d 100644 --- a/src/parser/wordsplit/wordsplit.h +++ b/src/parser/wordsplit/wordsplit.h @@ -3,10 +3,10 @@ /* ::: :::::::: */ /* wordsplit.h :+: :+: :+: */ /* +:+ +:+ +:+ */ -/* By: khais +#+ +:+ +#+ */ +/* By: jguelen +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2025/02/13 15:52:48 by khais #+# #+# */ -/* Updated: 2025/02/19 15:17:02 by jguelen ### ########.fr */ +/* Updated: 2025/02/19 18:02:50 by jguelen ### ########.fr */ /* */ /* ************************************************************************** */ @@ -31,13 +31,13 @@ typedef struct s_token_build size_t current_index; } t_token_build; - -t_buffer *push_char(t_buffer *token, char c); -t_buffer *new_word(t_buffer *token, char c, bool *currently_in_word); -char quote_flip(t_buffer **token, char c, char quote); -void operator_start(t_wordlist **wordlist, t_buffer **token, char c, - bool *currently_in_word, bool *currently_in_operator); +t_wordlist *delimit(t_token_build *token_build); +t_buffer *push_char(t_token_build *token_build, char c); +t_buffer *new_word(t_token_build *token_build, char c); +char quote_flip(t_token_build *token_build, char c); +void operator_start(t_token_build *token_build, char c); +void token_rule_1(t_token_build *token_build, char *original); t_wordlist *minishell_wordsplit(char *original); -#endif +#endif \ No newline at end of file diff --git a/src/parser/wordsplit/wordsplit_utils.c b/src/parser/wordsplit/wordsplit_utils.c index 714ec46..569d371 100644 --- a/src/parser/wordsplit/wordsplit_utils.c +++ b/src/parser/wordsplit/wordsplit_utils.c @@ -3,44 +3,55 @@ /* ::: :::::::: */ /* wordsplit_utils.c :+: :+: :+: */ /* +:+ +:+ +:+ */ -/* By: jguelen +#+ +:+ +#+ */ +/* By: jguelen +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2025/02/19 14:27:47 by jguelen #+# #+# */ -/* Updated: 2025/02/19 14:56:36 by jguelen ### ########.fr */ +/* Updated: 2025/02/19 18:02:33 by jguelen ### ########.fr */ /* */ /* ************************************************************************** */ #include "wordsplit.h" -t_buffer *push_char(t_buffer *token, char c) +t_wordlist *delimit(t_token_build *token_build) { - if (token == NULL) - token = ft_buffer_new(); - return (ft_buffer_pushchar(token, c)); + if (token_build->cur_token == NULL) + return (token_build->wordlist); + token_build->wordlist = wordlist_push(token_build->wordlist, + worddesc_create(token_build->cur_token->buffer)); + free(token_build->cur_token); + token_build->cur_token = NULL; + token_build->currently_in_word = false; + token_build->currently_in_operator = false; + return (token_build->wordlist); } -t_buffer *new_word(t_buffer *token, char c, bool *currently_in_word) +t_buffer *push_char(t_token_build *token_build, char c) { - token = push_char(token, c); - (*currently_in_word) = true; - return (token); + if (token_build->cur_token == NULL) + token_build->cur_token = ft_buffer_new(); + return (ft_buffer_pushchar(token_build->cur_token, c)); } -char quote_flip(t_buffer **token, char c, char quote) +t_buffer *new_word(t_token_build *token_build, char c) { - if (quote == '\0') - quote = c; - else if (quote == c) - quote = '\0'; - (*token) = push_char((*token), c); - return (quote); + token_build->cur_token = push_char(token_build->cur_token, c); + token_build->currently_in_word = true; + return (token_build->cur_token); } -void operator_start(t_wordlist **wordlist, t_buffer **token, char c, - bool *currently_in_word, bool *currently_in_operator) +char quote_flip(t_token_build *token_build, char c) { - (*wordlist) = delimit(*wordlist, token, currently_in_word, - currently_in_operator); - (*token) = new_word(*token, c, currently_in_word); - (*currently_in_operator) = true; + if (token_build->quote == '\0') + token_build->quote = c; + else if (token_build->quote == c) + token_build->quote = '\0'; + token_build->cur_token = push_char(token_build->cur_token, c); + return (token_build->quote); +} + +void operator_start(t_token_build *token_build, char c) +{ + token_build->wordlist = delimit(token_build); + token_build->cur_token = new_word(token_build, c); + token_build->currently_in_operator = true; }