wordsplit: handle operators

This commit is contained in:
Khaïs COLIN 2025-02-17 16:14:14 +01:00
parent 558ddb4096
commit f92763e479
Signed by: logistic-bot
SSH key fingerprint: SHA256:RlpiqKeXpcPFZZ4y9Ou4xi2M8OhRJovIwDlbCaMsuAo
7 changed files with 200 additions and 15 deletions

View file

@ -29,6 +29,8 @@ srcs = \
src/parser/matchers/blank.c \ src/parser/matchers/blank.c \
src/parser/matchers/identifier.c \ src/parser/matchers/identifier.c \
src/parser/matchers/metacharacter.c \ src/parser/matchers/metacharacter.c \
src/parser/matchers/operator_combo.c \
src/parser/matchers/operator_start.c \
src/parser/worddesc/worddesc.c \ src/parser/worddesc/worddesc.c \
src/parser/wordlist/wordlist.c \ src/parser/wordlist/wordlist.c \
src/parser/wordsplit/wordsplit.c \ src/parser/wordsplit/wordsplit.c \

View file

@ -0,0 +1,35 @@
/* ************************************************************************** */
/* */
/* ::: :::::::: */
/* operator_combo.c :+: :+: :+: */
/* +:+ +:+ +:+ */
/* By: khais <marvin@42.fr> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */
/* Created: 2025/02/17 16:30:54 by khais #+# #+# */
/* Updated: 2025/02/17 16:34:34 by khais ### ########.fr */
/* */
/* ************************************************************************** */
#include <stdbool.h>
#include "libft.h"
/*
** return true if c can be used as the next character for an operator in start
**
** recognized operators are:
** ||
** >>
** <<
** &&
*/
bool is_operator_combo(char *start, char c)
{
if (ft_strlen(start) != 1)
return (false);
if ((start[0] == '>' && c == '>')
|| (start[0] == '<' && c == '<')
|| (start[0] == '|' && c == '|')
|| (start[0] == '&' && c == '&'))
return (true);
return (false);
}

View file

@ -0,0 +1,20 @@
/* ************************************************************************** */
/* */
/* ::: :::::::: */
/* operator_combo.h :+: :+: :+: */
/* +:+ +:+ +:+ */
/* By: khais <marvin@42.fr> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */
/* Created: 2025/02/17 16:30:29 by khais #+# #+# */
/* Updated: 2025/02/17 16:30:45 by khais ### ########.fr */
/* */
/* ************************************************************************** */
#ifndef OPERATOR_COMBO_H
# define OPERATOR_COMBO_H
# include <stdbool.h>
bool is_operator_combo(char *start, char c);
#endif

View file

@ -0,0 +1,26 @@
/* ************************************************************************** */
/* */
/* ::: :::::::: */
/* operator_start.c :+: :+: :+: */
/* +:+ +:+ +:+ */
/* By: khais <marvin@42.fr> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */
/* Created: 2025/02/17 16:21:03 by khais #+# #+# */
/* Updated: 2025/02/17 16:22:40 by khais ### ########.fr */
/* */
/* ************************************************************************** */
#include "libft.h"
#include <stdbool.h>
/*
** Is the character the start of an operator?
*/
bool is_operator_start(char c)
{
if (ft_strchr("<>|&()", c) != NULL)
return (true);
else
return (false);
}

View file

@ -0,0 +1,20 @@
/* ************************************************************************** */
/* */
/* ::: :::::::: */
/* operator_start.h :+: :+: :+: */
/* +:+ +:+ +:+ */
/* By: khais <marvin@42.fr> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */
/* Created: 2025/02/17 16:22:48 by khais #+# #+# */
/* Updated: 2025/02/17 16:23:12 by khais ### ########.fr */
/* */
/* ************************************************************************** */
#ifndef OPERATOR_START_H
# define OPERATOR_START_H
# include <stdbool.h>
bool is_operator_start(char c);
#endif

View file

@ -6,16 +6,18 @@
/* By: khais <marvin@42.fr> +#+ +:+ +#+ */ /* By: khais <marvin@42.fr> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */ /* +#+#+#+#+#+ +#+ */
/* Created: 2025/02/13 17:02:32 by khais #+# #+# */ /* Created: 2025/02/13 17:02:32 by khais #+# #+# */
/* Updated: 2025/02/17 14:54:11 by khais ### ########.fr */ /* Updated: 2025/02/17 16:41:00 by khais ### ########.fr */
/* */ /* */
/* ************************************************************************** */ /* ************************************************************************** */
#include "wordsplit.h" #include "wordsplit.h"
#include "../../buffer/buffer.h" #include "../../buffer/buffer.h"
#include "../matchers/blank.h" #include "../matchers/blank.h"
#include "../matchers/operator_start.h"
#include "../matchers/operator_combo.h"
#include <stdlib.h> #include <stdlib.h>
static t_wordlist *delimit(t_wordlist *wordlist, t_buffer **token, bool *currently_in_word) static t_wordlist *delimit(t_wordlist *wordlist, t_buffer **token, bool *currently_in_word, bool *currently_in_operator)
{ {
if ((*token) == NULL) if ((*token) == NULL)
return (wordlist); return (wordlist);
@ -23,6 +25,7 @@ static t_wordlist *delimit(t_wordlist *wordlist, t_buffer **token, bool *current
free(*token); free(*token);
(*token) = NULL; (*token) = NULL;
(*currently_in_word) = false; (*currently_in_word) = false;
(*currently_in_operator) = false;
return (wordlist); return (wordlist);
} }
@ -51,6 +54,13 @@ static char quote_flip(t_buffer **token, char c, char quote)
return (quote); return (quote);
} }
static void operator_start(t_wordlist **wordlist, t_buffer **token, char c, bool *currently_in_word, bool *currently_in_operator)
{
(*wordlist) = delimit(*wordlist, token, currently_in_word, currently_in_operator);
(*token) = new_word(*token, c, currently_in_word);
(*currently_in_operator) = true;
}
/* /*
** split a string into words, respecting quotes etc. ** split a string into words, respecting quotes etc.
** **
@ -67,34 +77,57 @@ t_wordlist *minishell_wordsplit(char *original)
t_wordlist *wordlist; t_wordlist *wordlist;
t_buffer *token; t_buffer *token;
bool currently_in_word; bool currently_in_word;
bool currently_in_operator;
char quote; char quote;
idx = 0; idx = 0;
wordlist = NULL; wordlist = NULL;
token = NULL; token = NULL;
currently_in_word = false; currently_in_word = false;
currently_in_operator = false;
quote = '\0'; quote = '\0';
while (true) while (true)
{ {
// If the end of input is recognized, the current token (if any) shall // 1. If the end of input is recognized, the current token (if any)
// be delimited. // shall be delimited.
if (original[idx] == '\0') if (original[idx] == '\0')
wordlist = delimit(wordlist, &token, &currently_in_word); wordlist = delimit(wordlist, &token, &currently_in_word, &currently_in_operator);
// If the current character is single-quote, or double-quote and it is // 2. If the previous character was used as part of an operator and the
// not quoted, it shall affect quoting for subsequent characters up to // current character is not quoted and can be used with the previous
// the end of the quoted text. The rules for quoting are as described in // characters to form an operator, it shall be used as part of that
// Quoting . The result token shall contain exactly the characters that // (operator) token.
// appear in the input, unmodified, including any embedded or enclosing else if (currently_in_operator && quote == '\0' && is_operator_combo(token->buffer, original[idx]))
// quotes or substitution operators, between the <quotation-mark> and token = push_char(token, original[idx]);
// the end of the quoted text. The token shall not be delimited by the // 3. If the previous character was used as part of an operator and the
// end of the quoted field. // current character cannot be used with the previous characters to form
// an operator, the operator containing the previous character shall be
// delimited.
else if (currently_in_operator && quote == '\0' && !is_operator_combo(token->buffer, original[idx]))
{
wordlist = delimit(wordlist, &token, &currently_in_word, &currently_in_operator);
continue ;
}
// 4. If the current character is single-quote, or double-quote and it
// is not quoted, it shall affect quoting for subsequent characters up
// to the end of the quoted text. The rules for quoting are as described
// in Quoting . The result token shall contain exactly the characters
// that appear in the input, unmodified, including any embedded or
// enclosing quotes or substitution operators, between the
// <quotation-mark> and the end of the quoted text. The token shall not
// be delimited by the end of the quoted field.
else if (original[idx] == '\'' || original[idx] == '"') else if (original[idx] == '\'' || original[idx] == '"')
quote = quote_flip(&token, original[idx], quote); quote = quote_flip(&token, original[idx], quote);
// 6. If the current character is not quoted and can be used as the
// first character of a new operator, the current token (if any) shall
// be delimited. The current character shall be used as the beginning of
// the next (operator) token.
else if (quote == '\0' && is_operator_start(original[idx]))
operator_start(&wordlist, &token, original[idx], &currently_in_word, &currently_in_operator);
// If the current character is an unquoted <blank>, any token containing // If the current character is an unquoted <blank>, any token containing
// the previous character is delimited and the current character shall // the previous character is delimited and the current character shall
// be discarded. // be discarded.
else if (is_blank(original[idx]) && quote == '\0') else if (is_blank(original[idx]) && quote == '\0')
wordlist = delimit(wordlist, &token, &currently_in_word); wordlist = delimit(wordlist, &token, &currently_in_word, &currently_in_operator);
// If the previous character was part of a word, the current character // If the previous character was part of a word, the current character
// shall be appended to that word. // shall be appended to that word.
else if (currently_in_word) else if (currently_in_word)

View file

@ -6,7 +6,7 @@
/* By: khais <marvin@42.fr> +#+ +:+ +#+ */ /* By: khais <marvin@42.fr> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */ /* +#+#+#+#+#+ +#+ */
/* Created: 2025/02/13 15:17:56 by khais #+# #+# */ /* Created: 2025/02/13 15:17:56 by khais #+# #+# */
/* Updated: 2025/02/17 14:55:31 by khais ### ########.fr */ /* Updated: 2025/02/17 16:47:31 by khais ### ########.fr */
/* */ /* */
/* ************************************************************************** */ /* ************************************************************************** */
@ -129,6 +129,52 @@ static void test_wordsplit_unclosed_double(void)
assert(words == NULL); assert(words == NULL);
} }
static void test_wordsplit_operator_word(void)
{
t_wordlist *words;
words = minishell_wordsplit(">test");
assert_strequal(">", wordlist_get(words, 0)->word);
assert_strequal("test", wordlist_get(words, 1)->word);
assert(NULL == wordlist_get(words, 2));
wordlist_destroy(words);
}
static void test_wordsplit_all_operators(void)
{
t_wordlist *words;
words = minishell_wordsplit("|&&||()<>><<>");
assert_strequal("|", wordlist_get(words, 0)->word);
assert_strequal("&&", wordlist_get(words, 1)->word);
assert_strequal("||", wordlist_get(words, 2)->word);
assert_strequal("(", wordlist_get(words, 3)->word);
assert_strequal(")", wordlist_get(words, 4)->word);
assert_strequal("<", wordlist_get(words, 5)->word);
assert_strequal(">>", wordlist_get(words, 6)->word);
assert_strequal("<<", wordlist_get(words, 7)->word);
assert_strequal(">", wordlist_get(words, 8)->word);
assert(NULL == wordlist_get(words, 9));
wordlist_destroy(words);
}
static void test_wordsplit_operator_combining(void)
{
t_wordlist *words;
words = minishell_wordsplit("|||>>><<<&&&");
assert_strequal("||", wordlist_get(words, 0)->word);
assert_strequal("|", wordlist_get(words, 1)->word);
assert_strequal(">>", wordlist_get(words, 2)->word);
assert_strequal(">", wordlist_get(words, 3)->word);
assert_strequal("<<", wordlist_get(words, 4)->word);
assert_strequal("<", wordlist_get(words, 5)->word);
assert_strequal("&&", wordlist_get(words, 6)->word);
assert_strequal("&", wordlist_get(words, 7)->word);
assert(NULL == wordlist_get(words, 8));
wordlist_destroy(words);
}
int main(void) { int main(void) {
test_wordsplit_singleword(); test_wordsplit_singleword();
test_wordsplit_singleword_with_blanks(); test_wordsplit_singleword_with_blanks();
@ -140,5 +186,8 @@ int main(void) {
test_wordsplit_mixed_broken(); test_wordsplit_mixed_broken();
test_wordsplit_unclosed_single(); test_wordsplit_unclosed_single();
test_wordsplit_unclosed_double(); test_wordsplit_unclosed_double();
test_wordsplit_operator_word();
test_wordsplit_all_operators();
test_wordsplit_operator_combining();
return (0); return (0);
} }