mirror of
https://codeberg.org/la-chouette/minishell.git
synced 2025-12-06 07:28:09 +01:00
wordsplit: handle operators
This commit is contained in:
parent
558ddb4096
commit
f92763e479
7 changed files with 200 additions and 15 deletions
2
Makefile
2
Makefile
|
|
@ -29,6 +29,8 @@ srcs = \
|
|||
src/parser/matchers/blank.c \
|
||||
src/parser/matchers/identifier.c \
|
||||
src/parser/matchers/metacharacter.c \
|
||||
src/parser/matchers/operator_combo.c \
|
||||
src/parser/matchers/operator_start.c \
|
||||
src/parser/worddesc/worddesc.c \
|
||||
src/parser/wordlist/wordlist.c \
|
||||
src/parser/wordsplit/wordsplit.c \
|
||||
|
|
|
|||
35
src/parser/matchers/operator_combo.c
Normal file
35
src/parser/matchers/operator_combo.c
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
/* ************************************************************************** */
|
||||
/* */
|
||||
/* ::: :::::::: */
|
||||
/* operator_combo.c :+: :+: :+: */
|
||||
/* +:+ +:+ +:+ */
|
||||
/* By: khais <marvin@42.fr> +#+ +:+ +#+ */
|
||||
/* +#+#+#+#+#+ +#+ */
|
||||
/* Created: 2025/02/17 16:30:54 by khais #+# #+# */
|
||||
/* Updated: 2025/02/17 16:34:34 by khais ### ########.fr */
|
||||
/* */
|
||||
/* ************************************************************************** */
|
||||
|
||||
#include <stdbool.h>
|
||||
#include "libft.h"
|
||||
|
||||
/*
|
||||
** return true if c can be used as the next character for an operator in start
|
||||
**
|
||||
** recognized operators are:
|
||||
** ||
|
||||
** >>
|
||||
** <<
|
||||
** &&
|
||||
*/
|
||||
bool is_operator_combo(char *start, char c)
|
||||
{
|
||||
if (ft_strlen(start) != 1)
|
||||
return (false);
|
||||
if ((start[0] == '>' && c == '>')
|
||||
|| (start[0] == '<' && c == '<')
|
||||
|| (start[0] == '|' && c == '|')
|
||||
|| (start[0] == '&' && c == '&'))
|
||||
return (true);
|
||||
return (false);
|
||||
}
|
||||
20
src/parser/matchers/operator_combo.h
Normal file
20
src/parser/matchers/operator_combo.h
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
/* ************************************************************************** */
|
||||
/* */
|
||||
/* ::: :::::::: */
|
||||
/* operator_combo.h :+: :+: :+: */
|
||||
/* +:+ +:+ +:+ */
|
||||
/* By: khais <marvin@42.fr> +#+ +:+ +#+ */
|
||||
/* +#+#+#+#+#+ +#+ */
|
||||
/* Created: 2025/02/17 16:30:29 by khais #+# #+# */
|
||||
/* Updated: 2025/02/17 16:30:45 by khais ### ########.fr */
|
||||
/* */
|
||||
/* ************************************************************************** */
|
||||
|
||||
#ifndef OPERATOR_COMBO_H
|
||||
# define OPERATOR_COMBO_H
|
||||
|
||||
# include <stdbool.h>
|
||||
|
||||
bool is_operator_combo(char *start, char c);
|
||||
|
||||
#endif
|
||||
26
src/parser/matchers/operator_start.c
Normal file
26
src/parser/matchers/operator_start.c
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
/* ************************************************************************** */
|
||||
/* */
|
||||
/* ::: :::::::: */
|
||||
/* operator_start.c :+: :+: :+: */
|
||||
/* +:+ +:+ +:+ */
|
||||
/* By: khais <marvin@42.fr> +#+ +:+ +#+ */
|
||||
/* +#+#+#+#+#+ +#+ */
|
||||
/* Created: 2025/02/17 16:21:03 by khais #+# #+# */
|
||||
/* Updated: 2025/02/17 16:22:40 by khais ### ########.fr */
|
||||
/* */
|
||||
/* ************************************************************************** */
|
||||
|
||||
#include "libft.h"
|
||||
#include <stdbool.h>
|
||||
|
||||
/*
|
||||
** Is the character the start of an operator?
|
||||
*/
|
||||
bool is_operator_start(char c)
|
||||
{
|
||||
if (ft_strchr("<>|&()", c) != NULL)
|
||||
return (true);
|
||||
else
|
||||
return (false);
|
||||
|
||||
}
|
||||
20
src/parser/matchers/operator_start.h
Normal file
20
src/parser/matchers/operator_start.h
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
/* ************************************************************************** */
|
||||
/* */
|
||||
/* ::: :::::::: */
|
||||
/* operator_start.h :+: :+: :+: */
|
||||
/* +:+ +:+ +:+ */
|
||||
/* By: khais <marvin@42.fr> +#+ +:+ +#+ */
|
||||
/* +#+#+#+#+#+ +#+ */
|
||||
/* Created: 2025/02/17 16:22:48 by khais #+# #+# */
|
||||
/* Updated: 2025/02/17 16:23:12 by khais ### ########.fr */
|
||||
/* */
|
||||
/* ************************************************************************** */
|
||||
|
||||
#ifndef OPERATOR_START_H
|
||||
# define OPERATOR_START_H
|
||||
|
||||
# include <stdbool.h>
|
||||
|
||||
bool is_operator_start(char c);
|
||||
|
||||
#endif
|
||||
|
|
@ -6,16 +6,18 @@
|
|||
/* By: khais <marvin@42.fr> +#+ +:+ +#+ */
|
||||
/* +#+#+#+#+#+ +#+ */
|
||||
/* Created: 2025/02/13 17:02:32 by khais #+# #+# */
|
||||
/* Updated: 2025/02/17 14:54:11 by khais ### ########.fr */
|
||||
/* Updated: 2025/02/17 16:41:00 by khais ### ########.fr */
|
||||
/* */
|
||||
/* ************************************************************************** */
|
||||
|
||||
#include "wordsplit.h"
|
||||
#include "../../buffer/buffer.h"
|
||||
#include "../matchers/blank.h"
|
||||
#include "../matchers/operator_start.h"
|
||||
#include "../matchers/operator_combo.h"
|
||||
#include <stdlib.h>
|
||||
|
||||
static t_wordlist *delimit(t_wordlist *wordlist, t_buffer **token, bool *currently_in_word)
|
||||
static t_wordlist *delimit(t_wordlist *wordlist, t_buffer **token, bool *currently_in_word, bool *currently_in_operator)
|
||||
{
|
||||
if ((*token) == NULL)
|
||||
return (wordlist);
|
||||
|
|
@ -23,6 +25,7 @@ static t_wordlist *delimit(t_wordlist *wordlist, t_buffer **token, bool *current
|
|||
free(*token);
|
||||
(*token) = NULL;
|
||||
(*currently_in_word) = false;
|
||||
(*currently_in_operator) = false;
|
||||
return (wordlist);
|
||||
}
|
||||
|
||||
|
|
@ -51,6 +54,13 @@ static char quote_flip(t_buffer **token, char c, char quote)
|
|||
return (quote);
|
||||
}
|
||||
|
||||
static void operator_start(t_wordlist **wordlist, t_buffer **token, char c, bool *currently_in_word, bool *currently_in_operator)
|
||||
{
|
||||
(*wordlist) = delimit(*wordlist, token, currently_in_word, currently_in_operator);
|
||||
(*token) = new_word(*token, c, currently_in_word);
|
||||
(*currently_in_operator) = true;
|
||||
}
|
||||
|
||||
/*
|
||||
** split a string into words, respecting quotes etc.
|
||||
**
|
||||
|
|
@ -67,34 +77,57 @@ t_wordlist *minishell_wordsplit(char *original)
|
|||
t_wordlist *wordlist;
|
||||
t_buffer *token;
|
||||
bool currently_in_word;
|
||||
bool currently_in_operator;
|
||||
char quote;
|
||||
|
||||
idx = 0;
|
||||
wordlist = NULL;
|
||||
token = NULL;
|
||||
currently_in_word = false;
|
||||
currently_in_operator = false;
|
||||
quote = '\0';
|
||||
while (true)
|
||||
{
|
||||
// If the end of input is recognized, the current token (if any) shall
|
||||
// be delimited.
|
||||
// 1. If the end of input is recognized, the current token (if any)
|
||||
// shall be delimited.
|
||||
if (original[idx] == '\0')
|
||||
wordlist = delimit(wordlist, &token, ¤tly_in_word);
|
||||
// If the current character is single-quote, or double-quote and it is
|
||||
// not quoted, it shall affect quoting for subsequent characters up to
|
||||
// the end of the quoted text. The rules for quoting are as described in
|
||||
// Quoting . The result token shall contain exactly the characters that
|
||||
// appear in the input, unmodified, including any embedded or enclosing
|
||||
// quotes or substitution operators, between the <quotation-mark> and
|
||||
// the end of the quoted text. The token shall not be delimited by the
|
||||
// end of the quoted field.
|
||||
wordlist = delimit(wordlist, &token, ¤tly_in_word, ¤tly_in_operator);
|
||||
// 2. If the previous character was used as part of an operator and the
|
||||
// current character is not quoted and can be used with the previous
|
||||
// characters to form an operator, it shall be used as part of that
|
||||
// (operator) token.
|
||||
else if (currently_in_operator && quote == '\0' && is_operator_combo(token->buffer, original[idx]))
|
||||
token = push_char(token, original[idx]);
|
||||
// 3. If the previous character was used as part of an operator and the
|
||||
// current character cannot be used with the previous characters to form
|
||||
// an operator, the operator containing the previous character shall be
|
||||
// delimited.
|
||||
else if (currently_in_operator && quote == '\0' && !is_operator_combo(token->buffer, original[idx]))
|
||||
{
|
||||
wordlist = delimit(wordlist, &token, ¤tly_in_word, ¤tly_in_operator);
|
||||
continue ;
|
||||
}
|
||||
// 4. If the current character is single-quote, or double-quote and it
|
||||
// is not quoted, it shall affect quoting for subsequent characters up
|
||||
// to the end of the quoted text. The rules for quoting are as described
|
||||
// in Quoting . The result token shall contain exactly the characters
|
||||
// that appear in the input, unmodified, including any embedded or
|
||||
// enclosing quotes or substitution operators, between the
|
||||
// <quotation-mark> and the end of the quoted text. The token shall not
|
||||
// be delimited by the end of the quoted field.
|
||||
else if (original[idx] == '\'' || original[idx] == '"')
|
||||
quote = quote_flip(&token, original[idx], quote);
|
||||
// 6. If the current character is not quoted and can be used as the
|
||||
// first character of a new operator, the current token (if any) shall
|
||||
// be delimited. The current character shall be used as the beginning of
|
||||
// the next (operator) token.
|
||||
else if (quote == '\0' && is_operator_start(original[idx]))
|
||||
operator_start(&wordlist, &token, original[idx], ¤tly_in_word, ¤tly_in_operator);
|
||||
// If the current character is an unquoted <blank>, any token containing
|
||||
// the previous character is delimited and the current character shall
|
||||
// be discarded.
|
||||
else if (is_blank(original[idx]) && quote == '\0')
|
||||
wordlist = delimit(wordlist, &token, ¤tly_in_word);
|
||||
wordlist = delimit(wordlist, &token, ¤tly_in_word, ¤tly_in_operator);
|
||||
// If the previous character was part of a word, the current character
|
||||
// shall be appended to that word.
|
||||
else if (currently_in_word)
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@
|
|||
/* By: khais <marvin@42.fr> +#+ +:+ +#+ */
|
||||
/* +#+#+#+#+#+ +#+ */
|
||||
/* Created: 2025/02/13 15:17:56 by khais #+# #+# */
|
||||
/* Updated: 2025/02/17 14:55:31 by khais ### ########.fr */
|
||||
/* Updated: 2025/02/17 16:47:31 by khais ### ########.fr */
|
||||
/* */
|
||||
/* ************************************************************************** */
|
||||
|
||||
|
|
@ -129,6 +129,52 @@ static void test_wordsplit_unclosed_double(void)
|
|||
assert(words == NULL);
|
||||
}
|
||||
|
||||
static void test_wordsplit_operator_word(void)
|
||||
{
|
||||
t_wordlist *words;
|
||||
|
||||
words = minishell_wordsplit(">test");
|
||||
assert_strequal(">", wordlist_get(words, 0)->word);
|
||||
assert_strequal("test", wordlist_get(words, 1)->word);
|
||||
assert(NULL == wordlist_get(words, 2));
|
||||
wordlist_destroy(words);
|
||||
}
|
||||
|
||||
static void test_wordsplit_all_operators(void)
|
||||
{
|
||||
t_wordlist *words;
|
||||
|
||||
words = minishell_wordsplit("|&&||()<>><<>");
|
||||
assert_strequal("|", wordlist_get(words, 0)->word);
|
||||
assert_strequal("&&", wordlist_get(words, 1)->word);
|
||||
assert_strequal("||", wordlist_get(words, 2)->word);
|
||||
assert_strequal("(", wordlist_get(words, 3)->word);
|
||||
assert_strequal(")", wordlist_get(words, 4)->word);
|
||||
assert_strequal("<", wordlist_get(words, 5)->word);
|
||||
assert_strequal(">>", wordlist_get(words, 6)->word);
|
||||
assert_strequal("<<", wordlist_get(words, 7)->word);
|
||||
assert_strequal(">", wordlist_get(words, 8)->word);
|
||||
assert(NULL == wordlist_get(words, 9));
|
||||
wordlist_destroy(words);
|
||||
}
|
||||
|
||||
static void test_wordsplit_operator_combining(void)
|
||||
{
|
||||
t_wordlist *words;
|
||||
|
||||
words = minishell_wordsplit("|||>>><<<&&&");
|
||||
assert_strequal("||", wordlist_get(words, 0)->word);
|
||||
assert_strequal("|", wordlist_get(words, 1)->word);
|
||||
assert_strequal(">>", wordlist_get(words, 2)->word);
|
||||
assert_strequal(">", wordlist_get(words, 3)->word);
|
||||
assert_strequal("<<", wordlist_get(words, 4)->word);
|
||||
assert_strequal("<", wordlist_get(words, 5)->word);
|
||||
assert_strequal("&&", wordlist_get(words, 6)->word);
|
||||
assert_strequal("&", wordlist_get(words, 7)->word);
|
||||
assert(NULL == wordlist_get(words, 8));
|
||||
wordlist_destroy(words);
|
||||
}
|
||||
|
||||
int main(void) {
|
||||
test_wordsplit_singleword();
|
||||
test_wordsplit_singleword_with_blanks();
|
||||
|
|
@ -140,5 +186,8 @@ int main(void) {
|
|||
test_wordsplit_mixed_broken();
|
||||
test_wordsplit_unclosed_single();
|
||||
test_wordsplit_unclosed_double();
|
||||
test_wordsplit_operator_word();
|
||||
test_wordsplit_all_operators();
|
||||
test_wordsplit_operator_combining();
|
||||
return (0);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue