mirror of
https://codeberg.org/la-chouette/minishell.git
synced 2025-12-06 07:28:09 +01:00
wordsplit: use individual rule funcs
This commit is contained in:
parent
58be71725b
commit
3e64ac3769
4 changed files with 153 additions and 69 deletions
|
|
@ -6,7 +6,7 @@
|
|||
/* By: jguelen <marvin@42.fr> +#+ +:+ +#+ */
|
||||
/* +#+#+#+#+#+ +#+ */
|
||||
/* Created: 2025/02/19 13:20:01 by jguelen #+# #+# */
|
||||
/* Updated: 2025/02/19 18:01:39 by jguelen ### ########.fr */
|
||||
/* Updated: 2025/02/19 18:27:37 by khais ### ########.fr */
|
||||
/* */
|
||||
/* ************************************************************************** */
|
||||
|
||||
|
|
@ -16,12 +16,70 @@
|
|||
** cf. Token Recognition section at
|
||||
** https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html
|
||||
*/
|
||||
|
||||
/*
|
||||
** 1. If the end of input is recognized, the current token (if any) shall be
|
||||
** delimited.
|
||||
*/
|
||||
void token_rule_1(t_token_build *token_build, char *original)
|
||||
bool token_rule_1(t_token_build *token_build, char *original)
|
||||
{
|
||||
if (original[token_build.current_index] == '\0')
|
||||
if (original[token_build->current_index] == '\0')
|
||||
{
|
||||
token_build->wordlist = delimit(token_build);
|
||||
return (true);
|
||||
}
|
||||
return (false);
|
||||
}
|
||||
|
||||
/*
|
||||
** 2. If the previous character was used as part of an operator and the current
|
||||
** character is not quoted and can be used with the previous characters to form
|
||||
** an operator, it shall be used as part of that (operator) token.
|
||||
*/
|
||||
bool token_rule_2(t_token_build *token_build, char *original)
|
||||
{
|
||||
if (token_build->currently_in_operator && token_build->quote == '\0'
|
||||
&& is_operator_combo(token_build->cur_token->buffer, original[token_build->current_index]))
|
||||
{
|
||||
token_build->cur_token = push_char(token_build->cur_token, original[token_build->current_index]);
|
||||
return (true);
|
||||
}
|
||||
return (false);
|
||||
}
|
||||
|
||||
/*
|
||||
** 3. If the previous character was used as part of an operator and the current
|
||||
** character cannot be used with the previous characters to form an operator,
|
||||
** the operator containing the previous character shall be delimited.
|
||||
*/
|
||||
bool token_rule_3(t_token_build *token_build, char *original)
|
||||
{
|
||||
if (token_build->currently_in_operator && token_build->quote == '\0'
|
||||
&& !is_operator_combo(token_build->cur_token->buffer, original[token_build->current_index]))
|
||||
{
|
||||
delimit(token_build);
|
||||
return (true);
|
||||
}
|
||||
return (false);
|
||||
}
|
||||
/*
|
||||
** 4. If the current character is single-quote, or double-quote and it is not
|
||||
** quoted, it shall affect quoting for subsequent characters up to the end of
|
||||
** the quoted text. The rules for quoting are as described in Quoting . The
|
||||
** result token shall contain exactly the characters that appear in the input,
|
||||
** unmodified, including any embedded or enclosing quotes or substitution
|
||||
** operators, between the <quotation-mark> and the end of the quoted text. The
|
||||
** token shall not be delimited by the end of the quoted field.
|
||||
*/
|
||||
bool token_rule_4(t_token_build *token_build, char *original)
|
||||
{
|
||||
if (original[idx] == '\'' || original[idx] == '"')
|
||||
{
|
||||
quote = quote_flip(&token, original[idx], quote);
|
||||
return (true);
|
||||
}
|
||||
return (false);
|
||||
|
||||
}
|
||||
|
||||
bool token_rule_5(t_token_build *token_build, char *original);
|
||||
|
|
|
|||
|
|
@ -6,10 +6,68 @@
|
|||
/* By: jguelen <marvin@42.fr> +#+ +:+ +#+ */
|
||||
/* +#+#+#+#+#+ +#+ */
|
||||
/* Created: 2025/02/19 13:21:18 by jguelen #+# #+# */
|
||||
/* Updated: 2025/02/19 13:21:36 by jguelen ### ########.fr */
|
||||
/* Updated: 2025/02/19 18:25:55 by khais ### ########.fr */
|
||||
/* */
|
||||
/* ************************************************************************** */
|
||||
|
||||
#include "wordsplit.h"
|
||||
|
||||
/*
|
||||
** cf. Token Recognition section at
|
||||
** https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html
|
||||
*/
|
||||
|
||||
/*
|
||||
** 6. If the current character is not quoted and can be used as the first
|
||||
** character of a new operator, the current token (if any) shall be delimited.
|
||||
** The current character shall be used as the beginning of the next (operator)
|
||||
** token.
|
||||
*/
|
||||
bool token_rule_6(t_token_build *token_build, char *original)
|
||||
{
|
||||
if (quote == '\0' && is_operator_start(original[idx]))
|
||||
{
|
||||
operator_start(&wordlist, &token, original[idx],
|
||||
¤tly_in_word, ¤tly_in_operator);
|
||||
return (true);
|
||||
}
|
||||
return (false);
|
||||
}
|
||||
|
||||
/*
|
||||
** 7. If the current character is an unquoted <blank>, any token containing the
|
||||
** previous character is delimited and the current character shall be discarded.
|
||||
*/
|
||||
bool token_rule_7(t_token_build *token_build, char *original);
|
||||
{
|
||||
if (is_blank(original[idx]) && quote == '\0')
|
||||
{
|
||||
wordlist = delimit(wordlist, &token, ¤tly_in_word,
|
||||
¤tly_in_operator);
|
||||
return (true);
|
||||
}
|
||||
return (false);
|
||||
}
|
||||
|
||||
/*
|
||||
** 8. If the previous character was part of a word, the current character shall
|
||||
** be appended to that word.
|
||||
*/
|
||||
bool token_rule_8(t_token_build *token_build, char *original)
|
||||
{
|
||||
if (currently_in_word)
|
||||
{
|
||||
token = push_char(token, original[idx]);
|
||||
return (true);
|
||||
}
|
||||
return (false);
|
||||
}
|
||||
|
||||
/*
|
||||
** 10. The current character is used as the start of a new word.
|
||||
*/
|
||||
bool token_rule_10(t_token_build *token_build, char *original)
|
||||
{
|
||||
token = new_word(token, original[idx], ¤tly_in_word);
|
||||
return (true);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,14 +3,15 @@
|
|||
/* ::: :::::::: */
|
||||
/* wordsplit.c :+: :+: :+: */
|
||||
/* +:+ +:+ +:+ */
|
||||
/* By: jguelen <jguelen@student.42.fr> +#+ +:+ +#+ */
|
||||
/* By: khais <marvin@42.fr> +#+ +:+ +#+ */
|
||||
/* +#+#+#+#+#+ +#+ */
|
||||
/* Created: 2025/02/13 17:02:32 by khais #+# #+# */
|
||||
/* Updated: 2025/02/19 16:58:47 by jguelen ### ########.fr */
|
||||
/* Created: 2025/02/19 18:26/57 by khais #+# #+# */
|
||||
/* Updated: 2025/02/19 18:26:57 by khais ### ########.fr */
|
||||
/* */
|
||||
/* ************************************************************************** */
|
||||
|
||||
#include "wordsplit.h"
|
||||
#include "libft.h"
|
||||
|
||||
/*
|
||||
** split a string into words, respecting quotes etc.
|
||||
|
|
@ -26,65 +27,24 @@ t_wordlist *minishell_wordsplit(char *original)
|
|||
{
|
||||
t_token_build token_build;
|
||||
|
||||
ft_bzero(&token_build);
|
||||
while (true)
|
||||
ft_bzero(&token_build, sizeof(t_token_build));
|
||||
while (!token_rule_1(&token_build, original))
|
||||
{
|
||||
// 1. If the end of input is recognized, the current token (if any)
|
||||
// shall be delimited.
|
||||
if (original[token_build.current_index] == '\0')
|
||||
token_build.wordlist = delimit(wordlist, &token, ¤tly_in_word,
|
||||
¤tly_in_operator);
|
||||
// 2. If the previous character was used as part of an operator and the
|
||||
// current character is not quoted and can be used with the previous
|
||||
// characters to form an operator, it shall be used as part of that
|
||||
// (operator) token.
|
||||
else if (token_build.currently_in_operator && token_build.quote == '\0'
|
||||
&& is_operator_combo(token_build.cur_token->buffer, original[token_build.current_index]))
|
||||
token_build.cur_token = push_char(token, original[idx]);
|
||||
// 3. If the previous character was used as part of an operator and the
|
||||
// current character cannot be used with the previous characters to form
|
||||
// an operator, the operator containing the previous character shall be
|
||||
// delimited.
|
||||
else if (currently_in_operator && quote == '\0'
|
||||
&& !is_operator_combo(token->buffer, original[idx]))
|
||||
{
|
||||
wordlist = delimit(wordlist, &token, ¤tly_in_word,
|
||||
¤tly_in_operator);
|
||||
if (token_rule_2(&token_build, original))
|
||||
continue ;
|
||||
}
|
||||
// 4. If the current character is single-quote, or double-quote and it
|
||||
// is not quoted, it shall affect quoting for subsequent characters up
|
||||
// to the end of the quoted text. The rules for quoting are as described
|
||||
// in Quoting . The result token shall contain exactly the characters
|
||||
// that appear in the input, unmodified, including any embedded or
|
||||
// enclosing quotes or substitution operators, between the
|
||||
// <quotation-mark> and the end of the quoted text. The token shall not
|
||||
// be delimited by the end of the quoted field.
|
||||
else if (original[idx] == '\'' || original[idx] == '"')
|
||||
quote = quote_flip(&token, original[idx], quote);
|
||||
// 6. If the current character is not quoted and can be used as the
|
||||
// first character of a new operator, the current token (if any) shall
|
||||
// be delimited. The current character shall be used as the beginning of
|
||||
// the next (operator) token.
|
||||
else if (quote == '\0' && is_operator_start(original[idx]))
|
||||
operator_start(&wordlist, &token, original[idx],
|
||||
¤tly_in_word, ¤tly_in_operator);
|
||||
// If the current character is an unquoted <blank>, any token containing
|
||||
// the previous character is delimited and the current character shall
|
||||
// be discarded.
|
||||
else if (is_blank(original[idx]) && quote == '\0')
|
||||
wordlist = delimit(wordlist, &token, ¤tly_in_word,
|
||||
¤tly_in_operator);
|
||||
// If the previous character was part of a word, the current character
|
||||
// shall be appended to that word.
|
||||
else if (currently_in_word)
|
||||
token = push_char(token, original[idx]);
|
||||
// The current character is used as the start of a new word.
|
||||
else
|
||||
token = new_word(token, original[idx], ¤tly_in_word);
|
||||
if (original[idx] == '\0')
|
||||
break ;
|
||||
idx++;
|
||||
if (token_rule_3(&token_build, original))
|
||||
continue ;
|
||||
if (token_rule_4(&token_build, original))
|
||||
continue ;
|
||||
if (token_rule_5(&token_build, original))
|
||||
continue ;
|
||||
if (token_rule_6(&token_build, original))
|
||||
continue ;
|
||||
if (token_rule_7(&token_build, original))
|
||||
continue ;
|
||||
if (token_rule_8(&token_build, original))
|
||||
continue ;
|
||||
token_rule_10(&token_build, original);
|
||||
}
|
||||
if (token_build.quote != '\0')
|
||||
return (wordlist_destroy(token_build.wordlist), NULL);
|
||||
|
|
|
|||
|
|
@ -3,10 +3,10 @@
|
|||
/* ::: :::::::: */
|
||||
/* wordsplit.h :+: :+: :+: */
|
||||
/* +:+ +:+ +:+ */
|
||||
/* By: jguelen <jguelen@student.42.fr> +#+ +:+ +#+ */
|
||||
/* By: khais <marvin@42.fr> +#+ +:+ +#+ */
|
||||
/* +#+#+#+#+#+ +#+ */
|
||||
/* Created: 2025/02/13 15:52:48 by khais #+# #+# */
|
||||
/* Updated: 2025/02/19 18:02:50 by jguelen ### ########.fr */
|
||||
/* Created: 2025/02/19 18:22/52 by khais #+# #+# */
|
||||
/* Updated: 2025/02/19 18:22:52 by khais ### ########.fr */
|
||||
/* */
|
||||
/* ************************************************************************** */
|
||||
|
||||
|
|
@ -36,7 +36,15 @@ t_buffer *push_char(t_token_build *token_build, char c);
|
|||
t_buffer *new_word(t_token_build *token_build, char c);
|
||||
char quote_flip(t_token_build *token_build, char c);
|
||||
void operator_start(t_token_build *token_build, char c);
|
||||
void token_rule_1(t_token_build *token_build, char *original);
|
||||
bool token_rule_1(t_token_build *token_build, char *original);
|
||||
bool token_rule_2(t_token_build *token_build, char *original);
|
||||
bool token_rule_3(t_token_build *token_build, char *original);
|
||||
bool token_rule_4(t_token_build *token_build, char *original);
|
||||
bool token_rule_5(t_token_build *token_build, char *original);
|
||||
bool token_rule_6(t_token_build *token_build, char *original);
|
||||
bool token_rule_7(t_token_build *token_build, char *original);
|
||||
bool token_rule_8(t_token_build *token_build, char *original);
|
||||
bool token_rule_10(t_token_build *token_build, char *original);
|
||||
|
||||
t_wordlist *minishell_wordsplit(char *original);
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue