From f92763e479c21da189c7162d7da8e9d3c3908a5b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kha=C3=AFs=20COLIN?= Date: Mon, 17 Feb 2025 16:14:14 +0100 Subject: [PATCH] wordsplit: handle operators --- Makefile | 2 + src/parser/matchers/operator_combo.c | 35 ++++++++++++++++ src/parser/matchers/operator_combo.h | 20 +++++++++ src/parser/matchers/operator_start.c | 26 ++++++++++++ src/parser/matchers/operator_start.h | 20 +++++++++ src/parser/wordsplit/wordsplit.c | 61 +++++++++++++++++++++------- tests/word_splitting.c | 51 ++++++++++++++++++++++- 7 files changed, 200 insertions(+), 15 deletions(-) create mode 100644 src/parser/matchers/operator_combo.c create mode 100644 src/parser/matchers/operator_combo.h create mode 100644 src/parser/matchers/operator_start.c create mode 100644 src/parser/matchers/operator_start.h diff --git a/Makefile b/Makefile index f0abf7c..125caa5 100644 --- a/Makefile +++ b/Makefile @@ -29,6 +29,8 @@ srcs = \ src/parser/matchers/blank.c \ src/parser/matchers/identifier.c \ src/parser/matchers/metacharacter.c \ + src/parser/matchers/operator_combo.c \ + src/parser/matchers/operator_start.c \ src/parser/worddesc/worddesc.c \ src/parser/wordlist/wordlist.c \ src/parser/wordsplit/wordsplit.c \ diff --git a/src/parser/matchers/operator_combo.c b/src/parser/matchers/operator_combo.c new file mode 100644 index 0000000..ea914ad --- /dev/null +++ b/src/parser/matchers/operator_combo.c @@ -0,0 +1,35 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* operator_combo.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: khais +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2025/02/17 16:30:54 by khais #+# #+# */ +/* Updated: 2025/02/17 16:34:34 by khais ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include +#include "libft.h" + +/* +** return true if c can be used as the next character for an operator in start +** +** recognized operators are: +** || +** >> +** << +** && +*/ +bool is_operator_combo(char *start, char c) +{ + if (ft_strlen(start) != 1) + return (false); + if ((start[0] == '>' && c == '>') + || (start[0] == '<' && c == '<') + || (start[0] == '|' && c == '|') + || (start[0] == '&' && c == '&')) + return (true); + return (false); +} diff --git a/src/parser/matchers/operator_combo.h b/src/parser/matchers/operator_combo.h new file mode 100644 index 0000000..cf5ff2d --- /dev/null +++ b/src/parser/matchers/operator_combo.h @@ -0,0 +1,20 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* operator_combo.h :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: khais +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2025/02/17 16:30:29 by khais #+# #+# */ +/* Updated: 2025/02/17 16:30:45 by khais ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#ifndef OPERATOR_COMBO_H +# define OPERATOR_COMBO_H + +# include + +bool is_operator_combo(char *start, char c); + +#endif diff --git a/src/parser/matchers/operator_start.c b/src/parser/matchers/operator_start.c new file mode 100644 index 0000000..4184964 --- /dev/null +++ b/src/parser/matchers/operator_start.c @@ -0,0 +1,26 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* operator_start.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: khais +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2025/02/17 16:21:03 by khais #+# #+# */ +/* Updated: 2025/02/17 16:22:40 by khais ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "libft.h" +#include + +/* +** Is the character the start of an operator? +*/ +bool is_operator_start(char c) +{ + if (ft_strchr("<>|&()", c) != NULL) + return (true); + else + return (false); + +} diff --git a/src/parser/matchers/operator_start.h b/src/parser/matchers/operator_start.h new file mode 100644 index 0000000..0fcd151 --- /dev/null +++ b/src/parser/matchers/operator_start.h @@ -0,0 +1,20 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* operator_start.h :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: khais +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2025/02/17 16:22:48 by khais #+# #+# */ +/* Updated: 2025/02/17 16:23:12 by khais ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#ifndef OPERATOR_START_H +# define OPERATOR_START_H + +# include + +bool is_operator_start(char c); + +#endif diff --git a/src/parser/wordsplit/wordsplit.c b/src/parser/wordsplit/wordsplit.c index 2ecaefd..c0756c6 100644 --- a/src/parser/wordsplit/wordsplit.c +++ b/src/parser/wordsplit/wordsplit.c @@ -6,16 +6,18 @@ /* By: khais +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2025/02/13 17:02:32 by khais #+# #+# */ -/* Updated: 2025/02/17 14:54:11 by khais ### ########.fr */ +/* Updated: 2025/02/17 16:41:00 by khais ### ########.fr */ /* */ /* ************************************************************************** */ #include "wordsplit.h" #include "../../buffer/buffer.h" #include "../matchers/blank.h" +#include "../matchers/operator_start.h" +#include "../matchers/operator_combo.h" #include -static t_wordlist *delimit(t_wordlist *wordlist, t_buffer **token, bool *currently_in_word) +static t_wordlist *delimit(t_wordlist *wordlist, t_buffer **token, bool *currently_in_word, bool *currently_in_operator) { if ((*token) == NULL) return (wordlist); @@ -23,6 +25,7 @@ static t_wordlist *delimit(t_wordlist *wordlist, t_buffer **token, bool *current free(*token); (*token) = NULL; (*currently_in_word) = false; + (*currently_in_operator) = false; return (wordlist); } @@ -51,6 +54,13 @@ static char quote_flip(t_buffer **token, char c, char quote) return (quote); } +static void operator_start(t_wordlist **wordlist, t_buffer **token, char c, bool *currently_in_word, bool *currently_in_operator) +{ + (*wordlist) = delimit(*wordlist, token, currently_in_word, currently_in_operator); + (*token) = new_word(*token, c, currently_in_word); + (*currently_in_operator) = true; +} + /* ** split a string into words, respecting quotes etc. ** @@ -67,34 +77,57 @@ t_wordlist *minishell_wordsplit(char *original) t_wordlist *wordlist; t_buffer *token; bool currently_in_word; + bool currently_in_operator; char quote; idx = 0; wordlist = NULL; token = NULL; currently_in_word = false; + currently_in_operator = false; quote = '\0'; while (true) { - // If the end of input is recognized, the current token (if any) shall - // be delimited. + // 1. If the end of input is recognized, the current token (if any) + // shall be delimited. if (original[idx] == '\0') - wordlist = delimit(wordlist, &token, ¤tly_in_word); - // If the current character is single-quote, or double-quote and it is - // not quoted, it shall affect quoting for subsequent characters up to - // the end of the quoted text. The rules for quoting are as described in - // Quoting . The result token shall contain exactly the characters that - // appear in the input, unmodified, including any embedded or enclosing - // quotes or substitution operators, between the and - // the end of the quoted text. The token shall not be delimited by the - // end of the quoted field. + wordlist = delimit(wordlist, &token, ¤tly_in_word, ¤tly_in_operator); + // 2. If the previous character was used as part of an operator and the + // current character is not quoted and can be used with the previous + // characters to form an operator, it shall be used as part of that + // (operator) token. + else if (currently_in_operator && quote == '\0' && is_operator_combo(token->buffer, original[idx])) + token = push_char(token, original[idx]); + // 3. If the previous character was used as part of an operator and the + // current character cannot be used with the previous characters to form + // an operator, the operator containing the previous character shall be + // delimited. + else if (currently_in_operator && quote == '\0' && !is_operator_combo(token->buffer, original[idx])) + { + wordlist = delimit(wordlist, &token, ¤tly_in_word, ¤tly_in_operator); + continue ; + } + // 4. If the current character is single-quote, or double-quote and it + // is not quoted, it shall affect quoting for subsequent characters up + // to the end of the quoted text. The rules for quoting are as described + // in Quoting . The result token shall contain exactly the characters + // that appear in the input, unmodified, including any embedded or + // enclosing quotes or substitution operators, between the + // and the end of the quoted text. The token shall not + // be delimited by the end of the quoted field. else if (original[idx] == '\'' || original[idx] == '"') quote = quote_flip(&token, original[idx], quote); + // 6. If the current character is not quoted and can be used as the + // first character of a new operator, the current token (if any) shall + // be delimited. The current character shall be used as the beginning of + // the next (operator) token. + else if (quote == '\0' && is_operator_start(original[idx])) + operator_start(&wordlist, &token, original[idx], ¤tly_in_word, ¤tly_in_operator); // If the current character is an unquoted , any token containing // the previous character is delimited and the current character shall // be discarded. else if (is_blank(original[idx]) && quote == '\0') - wordlist = delimit(wordlist, &token, ¤tly_in_word); + wordlist = delimit(wordlist, &token, ¤tly_in_word, ¤tly_in_operator); // If the previous character was part of a word, the current character // shall be appended to that word. else if (currently_in_word) diff --git a/tests/word_splitting.c b/tests/word_splitting.c index 80e8484..17a33c0 100644 --- a/tests/word_splitting.c +++ b/tests/word_splitting.c @@ -6,7 +6,7 @@ /* By: khais +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2025/02/13 15:17:56 by khais #+# #+# */ -/* Updated: 2025/02/17 14:55:31 by khais ### ########.fr */ +/* Updated: 2025/02/17 16:47:31 by khais ### ########.fr */ /* */ /* ************************************************************************** */ @@ -129,6 +129,52 @@ static void test_wordsplit_unclosed_double(void) assert(words == NULL); } +static void test_wordsplit_operator_word(void) +{ + t_wordlist *words; + + words = minishell_wordsplit(">test"); + assert_strequal(">", wordlist_get(words, 0)->word); + assert_strequal("test", wordlist_get(words, 1)->word); + assert(NULL == wordlist_get(words, 2)); + wordlist_destroy(words); +} + +static void test_wordsplit_all_operators(void) +{ + t_wordlist *words; + + words = minishell_wordsplit("|&&||()<>><<>"); + assert_strequal("|", wordlist_get(words, 0)->word); + assert_strequal("&&", wordlist_get(words, 1)->word); + assert_strequal("||", wordlist_get(words, 2)->word); + assert_strequal("(", wordlist_get(words, 3)->word); + assert_strequal(")", wordlist_get(words, 4)->word); + assert_strequal("<", wordlist_get(words, 5)->word); + assert_strequal(">>", wordlist_get(words, 6)->word); + assert_strequal("<<", wordlist_get(words, 7)->word); + assert_strequal(">", wordlist_get(words, 8)->word); + assert(NULL == wordlist_get(words, 9)); + wordlist_destroy(words); +} + +static void test_wordsplit_operator_combining(void) +{ + t_wordlist *words; + + words = minishell_wordsplit("|||>>><<<&&&"); + assert_strequal("||", wordlist_get(words, 0)->word); + assert_strequal("|", wordlist_get(words, 1)->word); + assert_strequal(">>", wordlist_get(words, 2)->word); + assert_strequal(">", wordlist_get(words, 3)->word); + assert_strequal("<<", wordlist_get(words, 4)->word); + assert_strequal("<", wordlist_get(words, 5)->word); + assert_strequal("&&", wordlist_get(words, 6)->word); + assert_strequal("&", wordlist_get(words, 7)->word); + assert(NULL == wordlist_get(words, 8)); + wordlist_destroy(words); +} + int main(void) { test_wordsplit_singleword(); test_wordsplit_singleword_with_blanks(); @@ -140,5 +186,8 @@ int main(void) { test_wordsplit_mixed_broken(); test_wordsplit_unclosed_single(); test_wordsplit_unclosed_double(); + test_wordsplit_operator_word(); + test_wordsplit_all_operators(); + test_wordsplit_operator_combining(); return (0); }