mirror of
https://codeberg.org/la-chouette/minishell.git
synced 2025-12-06 07:28:09 +01:00
wordsplit: handle operators
This commit is contained in:
parent
558ddb4096
commit
f92763e479
7 changed files with 200 additions and 15 deletions
2
Makefile
2
Makefile
|
|
@ -29,6 +29,8 @@ srcs = \
|
||||||
src/parser/matchers/blank.c \
|
src/parser/matchers/blank.c \
|
||||||
src/parser/matchers/identifier.c \
|
src/parser/matchers/identifier.c \
|
||||||
src/parser/matchers/metacharacter.c \
|
src/parser/matchers/metacharacter.c \
|
||||||
|
src/parser/matchers/operator_combo.c \
|
||||||
|
src/parser/matchers/operator_start.c \
|
||||||
src/parser/worddesc/worddesc.c \
|
src/parser/worddesc/worddesc.c \
|
||||||
src/parser/wordlist/wordlist.c \
|
src/parser/wordlist/wordlist.c \
|
||||||
src/parser/wordsplit/wordsplit.c \
|
src/parser/wordsplit/wordsplit.c \
|
||||||
|
|
|
||||||
35
src/parser/matchers/operator_combo.c
Normal file
35
src/parser/matchers/operator_combo.c
Normal file
|
|
@ -0,0 +1,35 @@
|
||||||
|
/* ************************************************************************** */
|
||||||
|
/* */
|
||||||
|
/* ::: :::::::: */
|
||||||
|
/* operator_combo.c :+: :+: :+: */
|
||||||
|
/* +:+ +:+ +:+ */
|
||||||
|
/* By: khais <marvin@42.fr> +#+ +:+ +#+ */
|
||||||
|
/* +#+#+#+#+#+ +#+ */
|
||||||
|
/* Created: 2025/02/17 16:30:54 by khais #+# #+# */
|
||||||
|
/* Updated: 2025/02/17 16:34:34 by khais ### ########.fr */
|
||||||
|
/* */
|
||||||
|
/* ************************************************************************** */
|
||||||
|
|
||||||
|
#include <stdbool.h>
|
||||||
|
#include "libft.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
** return true if c can be used as the next character for an operator in start
|
||||||
|
**
|
||||||
|
** recognized operators are:
|
||||||
|
** ||
|
||||||
|
** >>
|
||||||
|
** <<
|
||||||
|
** &&
|
||||||
|
*/
|
||||||
|
bool is_operator_combo(char *start, char c)
|
||||||
|
{
|
||||||
|
if (ft_strlen(start) != 1)
|
||||||
|
return (false);
|
||||||
|
if ((start[0] == '>' && c == '>')
|
||||||
|
|| (start[0] == '<' && c == '<')
|
||||||
|
|| (start[0] == '|' && c == '|')
|
||||||
|
|| (start[0] == '&' && c == '&'))
|
||||||
|
return (true);
|
||||||
|
return (false);
|
||||||
|
}
|
||||||
20
src/parser/matchers/operator_combo.h
Normal file
20
src/parser/matchers/operator_combo.h
Normal file
|
|
@ -0,0 +1,20 @@
|
||||||
|
/* ************************************************************************** */
|
||||||
|
/* */
|
||||||
|
/* ::: :::::::: */
|
||||||
|
/* operator_combo.h :+: :+: :+: */
|
||||||
|
/* +:+ +:+ +:+ */
|
||||||
|
/* By: khais <marvin@42.fr> +#+ +:+ +#+ */
|
||||||
|
/* +#+#+#+#+#+ +#+ */
|
||||||
|
/* Created: 2025/02/17 16:30:29 by khais #+# #+# */
|
||||||
|
/* Updated: 2025/02/17 16:30:45 by khais ### ########.fr */
|
||||||
|
/* */
|
||||||
|
/* ************************************************************************** */
|
||||||
|
|
||||||
|
#ifndef OPERATOR_COMBO_H
|
||||||
|
# define OPERATOR_COMBO_H
|
||||||
|
|
||||||
|
# include <stdbool.h>
|
||||||
|
|
||||||
|
bool is_operator_combo(char *start, char c);
|
||||||
|
|
||||||
|
#endif
|
||||||
26
src/parser/matchers/operator_start.c
Normal file
26
src/parser/matchers/operator_start.c
Normal file
|
|
@ -0,0 +1,26 @@
|
||||||
|
/* ************************************************************************** */
|
||||||
|
/* */
|
||||||
|
/* ::: :::::::: */
|
||||||
|
/* operator_start.c :+: :+: :+: */
|
||||||
|
/* +:+ +:+ +:+ */
|
||||||
|
/* By: khais <marvin@42.fr> +#+ +:+ +#+ */
|
||||||
|
/* +#+#+#+#+#+ +#+ */
|
||||||
|
/* Created: 2025/02/17 16:21:03 by khais #+# #+# */
|
||||||
|
/* Updated: 2025/02/17 16:22:40 by khais ### ########.fr */
|
||||||
|
/* */
|
||||||
|
/* ************************************************************************** */
|
||||||
|
|
||||||
|
#include "libft.h"
|
||||||
|
#include <stdbool.h>
|
||||||
|
|
||||||
|
/*
|
||||||
|
** Is the character the start of an operator?
|
||||||
|
*/
|
||||||
|
bool is_operator_start(char c)
|
||||||
|
{
|
||||||
|
if (ft_strchr("<>|&()", c) != NULL)
|
||||||
|
return (true);
|
||||||
|
else
|
||||||
|
return (false);
|
||||||
|
|
||||||
|
}
|
||||||
20
src/parser/matchers/operator_start.h
Normal file
20
src/parser/matchers/operator_start.h
Normal file
|
|
@ -0,0 +1,20 @@
|
||||||
|
/* ************************************************************************** */
|
||||||
|
/* */
|
||||||
|
/* ::: :::::::: */
|
||||||
|
/* operator_start.h :+: :+: :+: */
|
||||||
|
/* +:+ +:+ +:+ */
|
||||||
|
/* By: khais <marvin@42.fr> +#+ +:+ +#+ */
|
||||||
|
/* +#+#+#+#+#+ +#+ */
|
||||||
|
/* Created: 2025/02/17 16:22:48 by khais #+# #+# */
|
||||||
|
/* Updated: 2025/02/17 16:23:12 by khais ### ########.fr */
|
||||||
|
/* */
|
||||||
|
/* ************************************************************************** */
|
||||||
|
|
||||||
|
#ifndef OPERATOR_START_H
|
||||||
|
# define OPERATOR_START_H
|
||||||
|
|
||||||
|
# include <stdbool.h>
|
||||||
|
|
||||||
|
bool is_operator_start(char c);
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
@ -6,16 +6,18 @@
|
||||||
/* By: khais <marvin@42.fr> +#+ +:+ +#+ */
|
/* By: khais <marvin@42.fr> +#+ +:+ +#+ */
|
||||||
/* +#+#+#+#+#+ +#+ */
|
/* +#+#+#+#+#+ +#+ */
|
||||||
/* Created: 2025/02/13 17:02:32 by khais #+# #+# */
|
/* Created: 2025/02/13 17:02:32 by khais #+# #+# */
|
||||||
/* Updated: 2025/02/17 14:54:11 by khais ### ########.fr */
|
/* Updated: 2025/02/17 16:41:00 by khais ### ########.fr */
|
||||||
/* */
|
/* */
|
||||||
/* ************************************************************************** */
|
/* ************************************************************************** */
|
||||||
|
|
||||||
#include "wordsplit.h"
|
#include "wordsplit.h"
|
||||||
#include "../../buffer/buffer.h"
|
#include "../../buffer/buffer.h"
|
||||||
#include "../matchers/blank.h"
|
#include "../matchers/blank.h"
|
||||||
|
#include "../matchers/operator_start.h"
|
||||||
|
#include "../matchers/operator_combo.h"
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
|
||||||
static t_wordlist *delimit(t_wordlist *wordlist, t_buffer **token, bool *currently_in_word)
|
static t_wordlist *delimit(t_wordlist *wordlist, t_buffer **token, bool *currently_in_word, bool *currently_in_operator)
|
||||||
{
|
{
|
||||||
if ((*token) == NULL)
|
if ((*token) == NULL)
|
||||||
return (wordlist);
|
return (wordlist);
|
||||||
|
|
@ -23,6 +25,7 @@ static t_wordlist *delimit(t_wordlist *wordlist, t_buffer **token, bool *current
|
||||||
free(*token);
|
free(*token);
|
||||||
(*token) = NULL;
|
(*token) = NULL;
|
||||||
(*currently_in_word) = false;
|
(*currently_in_word) = false;
|
||||||
|
(*currently_in_operator) = false;
|
||||||
return (wordlist);
|
return (wordlist);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -51,6 +54,13 @@ static char quote_flip(t_buffer **token, char c, char quote)
|
||||||
return (quote);
|
return (quote);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void operator_start(t_wordlist **wordlist, t_buffer **token, char c, bool *currently_in_word, bool *currently_in_operator)
|
||||||
|
{
|
||||||
|
(*wordlist) = delimit(*wordlist, token, currently_in_word, currently_in_operator);
|
||||||
|
(*token) = new_word(*token, c, currently_in_word);
|
||||||
|
(*currently_in_operator) = true;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
** split a string into words, respecting quotes etc.
|
** split a string into words, respecting quotes etc.
|
||||||
**
|
**
|
||||||
|
|
@ -67,34 +77,57 @@ t_wordlist *minishell_wordsplit(char *original)
|
||||||
t_wordlist *wordlist;
|
t_wordlist *wordlist;
|
||||||
t_buffer *token;
|
t_buffer *token;
|
||||||
bool currently_in_word;
|
bool currently_in_word;
|
||||||
|
bool currently_in_operator;
|
||||||
char quote;
|
char quote;
|
||||||
|
|
||||||
idx = 0;
|
idx = 0;
|
||||||
wordlist = NULL;
|
wordlist = NULL;
|
||||||
token = NULL;
|
token = NULL;
|
||||||
currently_in_word = false;
|
currently_in_word = false;
|
||||||
|
currently_in_operator = false;
|
||||||
quote = '\0';
|
quote = '\0';
|
||||||
while (true)
|
while (true)
|
||||||
{
|
{
|
||||||
// If the end of input is recognized, the current token (if any) shall
|
// 1. If the end of input is recognized, the current token (if any)
|
||||||
// be delimited.
|
// shall be delimited.
|
||||||
if (original[idx] == '\0')
|
if (original[idx] == '\0')
|
||||||
wordlist = delimit(wordlist, &token, ¤tly_in_word);
|
wordlist = delimit(wordlist, &token, ¤tly_in_word, ¤tly_in_operator);
|
||||||
// If the current character is single-quote, or double-quote and it is
|
// 2. If the previous character was used as part of an operator and the
|
||||||
// not quoted, it shall affect quoting for subsequent characters up to
|
// current character is not quoted and can be used with the previous
|
||||||
// the end of the quoted text. The rules for quoting are as described in
|
// characters to form an operator, it shall be used as part of that
|
||||||
// Quoting . The result token shall contain exactly the characters that
|
// (operator) token.
|
||||||
// appear in the input, unmodified, including any embedded or enclosing
|
else if (currently_in_operator && quote == '\0' && is_operator_combo(token->buffer, original[idx]))
|
||||||
// quotes or substitution operators, between the <quotation-mark> and
|
token = push_char(token, original[idx]);
|
||||||
// the end of the quoted text. The token shall not be delimited by the
|
// 3. If the previous character was used as part of an operator and the
|
||||||
// end of the quoted field.
|
// current character cannot be used with the previous characters to form
|
||||||
|
// an operator, the operator containing the previous character shall be
|
||||||
|
// delimited.
|
||||||
|
else if (currently_in_operator && quote == '\0' && !is_operator_combo(token->buffer, original[idx]))
|
||||||
|
{
|
||||||
|
wordlist = delimit(wordlist, &token, ¤tly_in_word, ¤tly_in_operator);
|
||||||
|
continue ;
|
||||||
|
}
|
||||||
|
// 4. If the current character is single-quote, or double-quote and it
|
||||||
|
// is not quoted, it shall affect quoting for subsequent characters up
|
||||||
|
// to the end of the quoted text. The rules for quoting are as described
|
||||||
|
// in Quoting . The result token shall contain exactly the characters
|
||||||
|
// that appear in the input, unmodified, including any embedded or
|
||||||
|
// enclosing quotes or substitution operators, between the
|
||||||
|
// <quotation-mark> and the end of the quoted text. The token shall not
|
||||||
|
// be delimited by the end of the quoted field.
|
||||||
else if (original[idx] == '\'' || original[idx] == '"')
|
else if (original[idx] == '\'' || original[idx] == '"')
|
||||||
quote = quote_flip(&token, original[idx], quote);
|
quote = quote_flip(&token, original[idx], quote);
|
||||||
|
// 6. If the current character is not quoted and can be used as the
|
||||||
|
// first character of a new operator, the current token (if any) shall
|
||||||
|
// be delimited. The current character shall be used as the beginning of
|
||||||
|
// the next (operator) token.
|
||||||
|
else if (quote == '\0' && is_operator_start(original[idx]))
|
||||||
|
operator_start(&wordlist, &token, original[idx], ¤tly_in_word, ¤tly_in_operator);
|
||||||
// If the current character is an unquoted <blank>, any token containing
|
// If the current character is an unquoted <blank>, any token containing
|
||||||
// the previous character is delimited and the current character shall
|
// the previous character is delimited and the current character shall
|
||||||
// be discarded.
|
// be discarded.
|
||||||
else if (is_blank(original[idx]) && quote == '\0')
|
else if (is_blank(original[idx]) && quote == '\0')
|
||||||
wordlist = delimit(wordlist, &token, ¤tly_in_word);
|
wordlist = delimit(wordlist, &token, ¤tly_in_word, ¤tly_in_operator);
|
||||||
// If the previous character was part of a word, the current character
|
// If the previous character was part of a word, the current character
|
||||||
// shall be appended to that word.
|
// shall be appended to that word.
|
||||||
else if (currently_in_word)
|
else if (currently_in_word)
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,7 @@
|
||||||
/* By: khais <marvin@42.fr> +#+ +:+ +#+ */
|
/* By: khais <marvin@42.fr> +#+ +:+ +#+ */
|
||||||
/* +#+#+#+#+#+ +#+ */
|
/* +#+#+#+#+#+ +#+ */
|
||||||
/* Created: 2025/02/13 15:17:56 by khais #+# #+# */
|
/* Created: 2025/02/13 15:17:56 by khais #+# #+# */
|
||||||
/* Updated: 2025/02/17 14:55:31 by khais ### ########.fr */
|
/* Updated: 2025/02/17 16:47:31 by khais ### ########.fr */
|
||||||
/* */
|
/* */
|
||||||
/* ************************************************************************** */
|
/* ************************************************************************** */
|
||||||
|
|
||||||
|
|
@ -129,6 +129,52 @@ static void test_wordsplit_unclosed_double(void)
|
||||||
assert(words == NULL);
|
assert(words == NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void test_wordsplit_operator_word(void)
|
||||||
|
{
|
||||||
|
t_wordlist *words;
|
||||||
|
|
||||||
|
words = minishell_wordsplit(">test");
|
||||||
|
assert_strequal(">", wordlist_get(words, 0)->word);
|
||||||
|
assert_strequal("test", wordlist_get(words, 1)->word);
|
||||||
|
assert(NULL == wordlist_get(words, 2));
|
||||||
|
wordlist_destroy(words);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_wordsplit_all_operators(void)
|
||||||
|
{
|
||||||
|
t_wordlist *words;
|
||||||
|
|
||||||
|
words = minishell_wordsplit("|&&||()<>><<>");
|
||||||
|
assert_strequal("|", wordlist_get(words, 0)->word);
|
||||||
|
assert_strequal("&&", wordlist_get(words, 1)->word);
|
||||||
|
assert_strequal("||", wordlist_get(words, 2)->word);
|
||||||
|
assert_strequal("(", wordlist_get(words, 3)->word);
|
||||||
|
assert_strequal(")", wordlist_get(words, 4)->word);
|
||||||
|
assert_strequal("<", wordlist_get(words, 5)->word);
|
||||||
|
assert_strequal(">>", wordlist_get(words, 6)->word);
|
||||||
|
assert_strequal("<<", wordlist_get(words, 7)->word);
|
||||||
|
assert_strequal(">", wordlist_get(words, 8)->word);
|
||||||
|
assert(NULL == wordlist_get(words, 9));
|
||||||
|
wordlist_destroy(words);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_wordsplit_operator_combining(void)
|
||||||
|
{
|
||||||
|
t_wordlist *words;
|
||||||
|
|
||||||
|
words = minishell_wordsplit("|||>>><<<&&&");
|
||||||
|
assert_strequal("||", wordlist_get(words, 0)->word);
|
||||||
|
assert_strequal("|", wordlist_get(words, 1)->word);
|
||||||
|
assert_strequal(">>", wordlist_get(words, 2)->word);
|
||||||
|
assert_strequal(">", wordlist_get(words, 3)->word);
|
||||||
|
assert_strequal("<<", wordlist_get(words, 4)->word);
|
||||||
|
assert_strequal("<", wordlist_get(words, 5)->word);
|
||||||
|
assert_strequal("&&", wordlist_get(words, 6)->word);
|
||||||
|
assert_strequal("&", wordlist_get(words, 7)->word);
|
||||||
|
assert(NULL == wordlist_get(words, 8));
|
||||||
|
wordlist_destroy(words);
|
||||||
|
}
|
||||||
|
|
||||||
int main(void) {
|
int main(void) {
|
||||||
test_wordsplit_singleword();
|
test_wordsplit_singleword();
|
||||||
test_wordsplit_singleword_with_blanks();
|
test_wordsplit_singleword_with_blanks();
|
||||||
|
|
@ -140,5 +186,8 @@ int main(void) {
|
||||||
test_wordsplit_mixed_broken();
|
test_wordsplit_mixed_broken();
|
||||||
test_wordsplit_unclosed_single();
|
test_wordsplit_unclosed_single();
|
||||||
test_wordsplit_unclosed_double();
|
test_wordsplit_unclosed_double();
|
||||||
|
test_wordsplit_operator_word();
|
||||||
|
test_wordsplit_all_operators();
|
||||||
|
test_wordsplit_operator_combining();
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue