From ac8475c71d529304f9a9e4da1155de7716c4aaa9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kha=C3=AFs=20COLIN?= Date: Thu, 20 Feb 2025 14:06:28 +0100 Subject: [PATCH] wordsplit: implemet rule for variable substitution detection (rule 5) --- src/parser/worddesc/worddesc.c | 7 +++--- src/parser/worddesc/worddesc.h | 9 ++++++-- src/parser/wordsplit/tokenizing_1_5.c | 30 ++++++++++++++++++++++++-- src/parser/wordsplit/wordsplit.c | 9 ++++---- src/parser/wordsplit/wordsplit.h | 7 +++--- src/parser/wordsplit/wordsplit_utils.c | 7 +++--- tests/word_splitting.c | 18 +++++++++++++++- 7 files changed, 68 insertions(+), 19 deletions(-) diff --git a/src/parser/worddesc/worddesc.c b/src/parser/worddesc/worddesc.c index f7f7081..ce4facf 100644 --- a/src/parser/worddesc/worddesc.c +++ b/src/parser/worddesc/worddesc.c @@ -6,7 +6,7 @@ /* By: khais +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2025/02/13 17:20:36 by khais #+# #+# */ -/* Updated: 2025/02/14 15:23:44 by khais ### ########.fr */ +/* Updated: 2025/02/20 14:02:11 by khais ### ########.fr */ /* */ /* ************************************************************************** */ @@ -15,11 +15,11 @@ #include /* -** allocate a new worddesc with zeroed flags and the given word as word. +** allocate a new worddesc with given flags and the given word as word. ** ** return null in case of error, or if word is null */ -t_worddesc *worddesc_create(char *word) +t_worddesc *worddesc_create(char *word, char flags) { t_worddesc *retvalue; @@ -29,6 +29,7 @@ t_worddesc *worddesc_create(char *word) if (retvalue == NULL) return (NULL); retvalue->word = word; + retvalue->flags = flags; return (retvalue); } diff --git a/src/parser/worddesc/worddesc.h b/src/parser/worddesc/worddesc.h index c50bc40..c5d4fbf 100644 --- a/src/parser/worddesc/worddesc.h +++ b/src/parser/worddesc/worddesc.h @@ -6,7 +6,7 @@ /* By: khais +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2025/02/13 15:47:58 by khais #+# #+# */ -/* Updated: 2025/02/18 17:44:57 by jguelen ### ########.fr */ +/* Updated: 2025/02/20 14:02:21 by khais ### ########.fr */ /* */ /* ************************************************************************** */ @@ -35,10 +35,15 @@ typedef struct s_worddesc ** The word itself */ char *word; + /* + ** flags for this word + ** + ** See above for flag definitions + */ char flags; } t_worddesc; -t_worddesc *worddesc_create(char *word); +t_worddesc *worddesc_create(char *word, char flags); void worddesc_destroy(t_worddesc *worddesc); #endif diff --git a/src/parser/wordsplit/tokenizing_1_5.c b/src/parser/wordsplit/tokenizing_1_5.c index faa4c97..5c8ae0e 100644 --- a/src/parser/wordsplit/tokenizing_1_5.c +++ b/src/parser/wordsplit/tokenizing_1_5.c @@ -6,7 +6,7 @@ /* By: khais +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2025/02/19 13:20:01 by jguelen #+# #+# */ -/* Updated: 2025/02/20 13:22:57 by khais ### ########.fr */ +/* Updated: 2025/02/20 14:10:57 by khais ### ########.fr */ /* */ /* ************************************************************************** */ @@ -84,4 +84,30 @@ bool rule_quote(t_token_build *builder, char *original) return (false); } -bool token_rule_5(t_token_build *builder, char *original); +/* +** 5. If the current character is an unquoted '$' or '`', the shell shall +** identify the start of any candidates for parameter expansion (Parameter +** Expansion), command substitution (Command Substitution), or arithmetic +** expansion (Arithmetic Expansion) from their introductory unquoted character +** sequences: '$' or "${", "$(" or '`', and "$((", respectively. The shell shall +** read sufficient input to determine the end of the unit to be expanded (as +** explained in the cited sections). While processing the characters, if +** instances of expansions or quoting are found nested within the substitution, +** the shell shall recursively process them in the manner specified for the +** construct that is found. The characters found from the beginning of the +** substitution to its end, allowing for any recursion necessary to recognize +** embedded constructs, shall be included unmodified in the result token, +** including any embedded or enclosing substitution operators or quotes. The +** token shall not be delimited by the end of the substitution. +*/ +bool rule_var_substitution(t_token_build *builder, char *original) +{ + if (unquoted(builder) && original[builder->idx] == '$') + { + push_char(builder, original[builder->idx]); + builder->cur_flags |= W_HASDOLLAR; + builder->idx++; + return (true); + } + return (false); +} diff --git a/src/parser/wordsplit/wordsplit.c b/src/parser/wordsplit/wordsplit.c index 129c930..78d5e3b 100644 --- a/src/parser/wordsplit/wordsplit.c +++ b/src/parser/wordsplit/wordsplit.c @@ -5,8 +5,8 @@ /* +:+ +:+ +:+ */ /* By: khais +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ -/* Created: 2025/02/19 18:53/51 by khais #+# #+# */ -/* Updated: 2025/02/19 18:53:51 by khais ### ########.fr */ +/* Created: 2025/02/20 14:10/48 by khais #+# #+# */ +/* Updated: 2025/02/20 14:10:48 by khais ### ########.fr */ /* */ /* ************************************************************************** */ @@ -15,7 +15,6 @@ /* ** TODO: set flags -** TODO: rule 5 ($) */ /* @@ -41,8 +40,8 @@ t_wordlist *minishell_wordsplit(char *original) continue ; if (rule_quote(&token_build, original)) continue ; - /* if (token_rule_5(&token_build, original)) */ - /* continue ; */ + if (rule_var_substitution(&token_build, original)) + continue ; if (rule_new_operator(&token_build, original)) continue ; if (rule_delimit_blank(&token_build, original)) diff --git a/src/parser/wordsplit/wordsplit.h b/src/parser/wordsplit/wordsplit.h index a066622..79397cb 100644 --- a/src/parser/wordsplit/wordsplit.h +++ b/src/parser/wordsplit/wordsplit.h @@ -5,8 +5,8 @@ /* +:+ +:+ +:+ */ /* By: khais +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ -/* Created: 2025/02/20 11:52/28 by khais #+# #+# */ -/* Updated: 2025/02/20 11:52:28 by khais ### ########.fr */ +/* Created: 2025/02/20 14:00/45 by khais #+# #+# */ +/* Updated: 2025/02/20 14:00:45 by khais ### ########.fr */ /* */ /* ************************************************************************** */ @@ -21,6 +21,7 @@ typedef struct s_token_build { t_wordlist *wordlist; t_buffer *cur_token; + char cur_flags; bool currently_in_word; bool currently_in_operator; char quote; @@ -36,7 +37,7 @@ bool rule_eof(t_token_build *builder, char *original); bool rule_combine_operator(t_token_build *builder, char *original); bool rule_operator_end(t_token_build *builder, char *original); bool rule_quote(t_token_build *builder, char *original); -bool token_rule_5(t_token_build *builder, char *original); +bool rule_var_substitution(t_token_build *builder, char *original); bool rule_new_operator(t_token_build *builder, char *original); bool rule_delimit_blank(t_token_build *builder, char *original); bool rule_combine_word(t_token_build *builder, char *original); diff --git a/src/parser/wordsplit/wordsplit_utils.c b/src/parser/wordsplit/wordsplit_utils.c index ab99bd7..2bc1926 100644 --- a/src/parser/wordsplit/wordsplit_utils.c +++ b/src/parser/wordsplit/wordsplit_utils.c @@ -5,8 +5,8 @@ /* +:+ +:+ +:+ */ /* By: khais +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ -/* Created: 2025/02/20 11:51/27 by khais #+# #+# */ -/* Updated: 2025/02/20 11:51:27 by khais ### ########.fr */ +/* Created: 2025/02/20 14:02/29 by khais #+# #+# */ +/* Updated: 2025/02/20 14:02:29 by khais ### ########.fr */ /* */ /* ************************************************************************** */ @@ -18,8 +18,9 @@ void delimit(t_token_build *token_build) if (token_build->cur_token == NULL) return ; token_build->wordlist = wordlist_push(token_build->wordlist, - worddesc_create(token_build->cur_token->buffer)); + worddesc_create(token_build->cur_token->buffer, token_build->cur_flags)); free(token_build->cur_token); + token_build->cur_flags = 0; token_build->cur_token = NULL; token_build->currently_in_word = false; token_build->currently_in_operator = false; diff --git a/tests/word_splitting.c b/tests/word_splitting.c index 17a33c0..8b9fdb2 100644 --- a/tests/word_splitting.c +++ b/tests/word_splitting.c @@ -6,7 +6,7 @@ /* By: khais +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2025/02/13 15:17:56 by khais #+# #+# */ -/* Updated: 2025/02/17 16:47:31 by khais ### ########.fr */ +/* Updated: 2025/02/20 14:05:21 by khais ### ########.fr */ /* */ /* ************************************************************************** */ @@ -175,6 +175,21 @@ static void test_wordsplit_operator_combining(void) wordlist_destroy(words); } +static void test_wordsplit_var_substitution(void) +{ + t_wordlist *words; + + words = minishell_wordsplit("echo VAR=$VAR here"); + assert_strequal("echo", wordlist_get(words, 0)->word); + assert(0 == wordlist_get(words, 0)->flags); + assert_strequal("VAR=$VAR", wordlist_get(words, 1)->word); + assert(W_HASDOLLAR == wordlist_get(words, 1)->flags); + assert_strequal("here", wordlist_get(words, 2)->word); + assert(0 == wordlist_get(words, 2)->flags); + wordlist_destroy(words); + +} + int main(void) { test_wordsplit_singleword(); test_wordsplit_singleword_with_blanks(); @@ -189,5 +204,6 @@ int main(void) { test_wordsplit_operator_word(); test_wordsplit_all_operators(); test_wordsplit_operator_combining(); + test_wordsplit_var_substitution(); return (0); }