wordsplit: handle operators

This commit is contained in:
Khaïs COLIN 2025-02-17 16:14:14 +01:00
parent 558ddb4096
commit f92763e479
Signed by: logistic-bot
SSH key fingerprint: SHA256:RlpiqKeXpcPFZZ4y9Ou4xi2M8OhRJovIwDlbCaMsuAo
7 changed files with 200 additions and 15 deletions

View file

@ -29,6 +29,8 @@ srcs = \
src/parser/matchers/blank.c \
src/parser/matchers/identifier.c \
src/parser/matchers/metacharacter.c \
src/parser/matchers/operator_combo.c \
src/parser/matchers/operator_start.c \
src/parser/worddesc/worddesc.c \
src/parser/wordlist/wordlist.c \
src/parser/wordsplit/wordsplit.c \

View file

@ -0,0 +1,35 @@
/* ************************************************************************** */
/* */
/* ::: :::::::: */
/* operator_combo.c :+: :+: :+: */
/* +:+ +:+ +:+ */
/* By: khais <marvin@42.fr> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */
/* Created: 2025/02/17 16:30:54 by khais #+# #+# */
/* Updated: 2025/02/17 16:34:34 by khais ### ########.fr */
/* */
/* ************************************************************************** */
#include <stdbool.h>
#include "libft.h"
/*
** return true if c can be used as the next character for an operator in start
**
** recognized operators are:
** ||
** >>
** <<
** &&
*/
bool is_operator_combo(char *start, char c)
{
if (ft_strlen(start) != 1)
return (false);
if ((start[0] == '>' && c == '>')
|| (start[0] == '<' && c == '<')
|| (start[0] == '|' && c == '|')
|| (start[0] == '&' && c == '&'))
return (true);
return (false);
}

View file

@ -0,0 +1,20 @@
/* ************************************************************************** */
/* */
/* ::: :::::::: */
/* operator_combo.h :+: :+: :+: */
/* +:+ +:+ +:+ */
/* By: khais <marvin@42.fr> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */
/* Created: 2025/02/17 16:30:29 by khais #+# #+# */
/* Updated: 2025/02/17 16:30:45 by khais ### ########.fr */
/* */
/* ************************************************************************** */
#ifndef OPERATOR_COMBO_H
# define OPERATOR_COMBO_H
# include <stdbool.h>
bool is_operator_combo(char *start, char c);
#endif

View file

@ -0,0 +1,26 @@
/* ************************************************************************** */
/* */
/* ::: :::::::: */
/* operator_start.c :+: :+: :+: */
/* +:+ +:+ +:+ */
/* By: khais <marvin@42.fr> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */
/* Created: 2025/02/17 16:21:03 by khais #+# #+# */
/* Updated: 2025/02/17 16:22:40 by khais ### ########.fr */
/* */
/* ************************************************************************** */
#include "libft.h"
#include <stdbool.h>
/*
** Is the character the start of an operator?
*/
bool is_operator_start(char c)
{
if (ft_strchr("<>|&()", c) != NULL)
return (true);
else
return (false);
}

View file

@ -0,0 +1,20 @@
/* ************************************************************************** */
/* */
/* ::: :::::::: */
/* operator_start.h :+: :+: :+: */
/* +:+ +:+ +:+ */
/* By: khais <marvin@42.fr> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */
/* Created: 2025/02/17 16:22:48 by khais #+# #+# */
/* Updated: 2025/02/17 16:23:12 by khais ### ########.fr */
/* */
/* ************************************************************************** */
#ifndef OPERATOR_START_H
# define OPERATOR_START_H
# include <stdbool.h>
bool is_operator_start(char c);
#endif

View file

@ -6,16 +6,18 @@
/* By: khais <marvin@42.fr> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */
/* Created: 2025/02/13 17:02:32 by khais #+# #+# */
/* Updated: 2025/02/17 14:54:11 by khais ### ########.fr */
/* Updated: 2025/02/17 16:41:00 by khais ### ########.fr */
/* */
/* ************************************************************************** */
#include "wordsplit.h"
#include "../../buffer/buffer.h"
#include "../matchers/blank.h"
#include "../matchers/operator_start.h"
#include "../matchers/operator_combo.h"
#include <stdlib.h>
static t_wordlist *delimit(t_wordlist *wordlist, t_buffer **token, bool *currently_in_word)
static t_wordlist *delimit(t_wordlist *wordlist, t_buffer **token, bool *currently_in_word, bool *currently_in_operator)
{
if ((*token) == NULL)
return (wordlist);
@ -23,6 +25,7 @@ static t_wordlist *delimit(t_wordlist *wordlist, t_buffer **token, bool *current
free(*token);
(*token) = NULL;
(*currently_in_word) = false;
(*currently_in_operator) = false;
return (wordlist);
}
@ -51,6 +54,13 @@ static char quote_flip(t_buffer **token, char c, char quote)
return (quote);
}
static void operator_start(t_wordlist **wordlist, t_buffer **token, char c, bool *currently_in_word, bool *currently_in_operator)
{
(*wordlist) = delimit(*wordlist, token, currently_in_word, currently_in_operator);
(*token) = new_word(*token, c, currently_in_word);
(*currently_in_operator) = true;
}
/*
** split a string into words, respecting quotes etc.
**
@ -67,34 +77,57 @@ t_wordlist *minishell_wordsplit(char *original)
t_wordlist *wordlist;
t_buffer *token;
bool currently_in_word;
bool currently_in_operator;
char quote;
idx = 0;
wordlist = NULL;
token = NULL;
currently_in_word = false;
currently_in_operator = false;
quote = '\0';
while (true)
{
// If the end of input is recognized, the current token (if any) shall
// be delimited.
// 1. If the end of input is recognized, the current token (if any)
// shall be delimited.
if (original[idx] == '\0')
wordlist = delimit(wordlist, &token, &currently_in_word);
// If the current character is single-quote, or double-quote and it is
// not quoted, it shall affect quoting for subsequent characters up to
// the end of the quoted text. The rules for quoting are as described in
// Quoting . The result token shall contain exactly the characters that
// appear in the input, unmodified, including any embedded or enclosing
// quotes or substitution operators, between the <quotation-mark> and
// the end of the quoted text. The token shall not be delimited by the
// end of the quoted field.
wordlist = delimit(wordlist, &token, &currently_in_word, &currently_in_operator);
// 2. If the previous character was used as part of an operator and the
// current character is not quoted and can be used with the previous
// characters to form an operator, it shall be used as part of that
// (operator) token.
else if (currently_in_operator && quote == '\0' && is_operator_combo(token->buffer, original[idx]))
token = push_char(token, original[idx]);
// 3. If the previous character was used as part of an operator and the
// current character cannot be used with the previous characters to form
// an operator, the operator containing the previous character shall be
// delimited.
else if (currently_in_operator && quote == '\0' && !is_operator_combo(token->buffer, original[idx]))
{
wordlist = delimit(wordlist, &token, &currently_in_word, &currently_in_operator);
continue ;
}
// 4. If the current character is single-quote, or double-quote and it
// is not quoted, it shall affect quoting for subsequent characters up
// to the end of the quoted text. The rules for quoting are as described
// in Quoting . The result token shall contain exactly the characters
// that appear in the input, unmodified, including any embedded or
// enclosing quotes or substitution operators, between the
// <quotation-mark> and the end of the quoted text. The token shall not
// be delimited by the end of the quoted field.
else if (original[idx] == '\'' || original[idx] == '"')
quote = quote_flip(&token, original[idx], quote);
// 6. If the current character is not quoted and can be used as the
// first character of a new operator, the current token (if any) shall
// be delimited. The current character shall be used as the beginning of
// the next (operator) token.
else if (quote == '\0' && is_operator_start(original[idx]))
operator_start(&wordlist, &token, original[idx], &currently_in_word, &currently_in_operator);
// If the current character is an unquoted <blank>, any token containing
// the previous character is delimited and the current character shall
// be discarded.
else if (is_blank(original[idx]) && quote == '\0')
wordlist = delimit(wordlist, &token, &currently_in_word);
wordlist = delimit(wordlist, &token, &currently_in_word, &currently_in_operator);
// If the previous character was part of a word, the current character
// shall be appended to that word.
else if (currently_in_word)

View file

@ -6,7 +6,7 @@
/* By: khais <marvin@42.fr> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */
/* Created: 2025/02/13 15:17:56 by khais #+# #+# */
/* Updated: 2025/02/17 14:55:31 by khais ### ########.fr */
/* Updated: 2025/02/17 16:47:31 by khais ### ########.fr */
/* */
/* ************************************************************************** */
@ -129,6 +129,52 @@ static void test_wordsplit_unclosed_double(void)
assert(words == NULL);
}
static void test_wordsplit_operator_word(void)
{
t_wordlist *words;
words = minishell_wordsplit(">test");
assert_strequal(">", wordlist_get(words, 0)->word);
assert_strequal("test", wordlist_get(words, 1)->word);
assert(NULL == wordlist_get(words, 2));
wordlist_destroy(words);
}
static void test_wordsplit_all_operators(void)
{
t_wordlist *words;
words = minishell_wordsplit("|&&||()<>><<>");
assert_strequal("|", wordlist_get(words, 0)->word);
assert_strequal("&&", wordlist_get(words, 1)->word);
assert_strequal("||", wordlist_get(words, 2)->word);
assert_strequal("(", wordlist_get(words, 3)->word);
assert_strequal(")", wordlist_get(words, 4)->word);
assert_strequal("<", wordlist_get(words, 5)->word);
assert_strequal(">>", wordlist_get(words, 6)->word);
assert_strequal("<<", wordlist_get(words, 7)->word);
assert_strequal(">", wordlist_get(words, 8)->word);
assert(NULL == wordlist_get(words, 9));
wordlist_destroy(words);
}
static void test_wordsplit_operator_combining(void)
{
t_wordlist *words;
words = minishell_wordsplit("|||>>><<<&&&");
assert_strequal("||", wordlist_get(words, 0)->word);
assert_strequal("|", wordlist_get(words, 1)->word);
assert_strequal(">>", wordlist_get(words, 2)->word);
assert_strequal(">", wordlist_get(words, 3)->word);
assert_strequal("<<", wordlist_get(words, 4)->word);
assert_strequal("<", wordlist_get(words, 5)->word);
assert_strequal("&&", wordlist_get(words, 6)->word);
assert_strequal("&", wordlist_get(words, 7)->word);
assert(NULL == wordlist_get(words, 8));
wordlist_destroy(words);
}
int main(void) {
test_wordsplit_singleword();
test_wordsplit_singleword_with_blanks();
@ -140,5 +186,8 @@ int main(void) {
test_wordsplit_mixed_broken();
test_wordsplit_unclosed_single();
test_wordsplit_unclosed_double();
test_wordsplit_operator_word();
test_wordsplit_all_operators();
test_wordsplit_operator_combining();
return (0);
}