Wordsplitting: the preparation for a reswitch.

This commit is contained in:
Jérôme Guélen 2025-02-19 18:03:30 +01:00 committed by Khaïs COLIN
parent 0d0a14d21a
commit 58be71725b
4 changed files with 63 additions and 53 deletions

View file

@ -6,10 +6,22 @@
/* By: jguelen <marvin@42.fr> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */
/* Created: 2025/02/19 13:20:01 by jguelen #+# #+# */
/* Updated: 2025/02/19 13:20:49 by jguelen ### ########.fr */
/* Updated: 2025/02/19 18:01:39 by jguelen ### ########.fr */
/* */
/* ************************************************************************** */
#include "wordsplit.h"
/*
** cf. Token Recognition section at
** https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html
*/
/*
** 1. If the end of input is recognized, the current token (if any) shall be
** delimited.
*/
void token_rule_1(t_token_build *token_build, char *original)
{
if (original[token_build.current_index] == '\0')
token_build->wordlist = delimit(token_build);
}

View file

@ -6,25 +6,12 @@
/* By: jguelen <jguelen@student.42.fr> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */
/* Created: 2025/02/13 17:02:32 by khais #+# #+# */
/* Updated: 2025/02/19 15:17:22 by jguelen ### ########.fr */
/* Updated: 2025/02/19 16:58:47 by jguelen ### ########.fr */
/* */
/* ************************************************************************** */
#include "wordsplit.h"
static t_wordlist *delimit(t_wordlist *wordlist, t_buffer **token,
bool *currently_in_word, bool *currently_in_operator)
{
if ((*token) == NULL)
return (wordlist);
wordlist = wordlist_push(wordlist, worddesc_create((*token)->buffer));
free(*token);
(*token) = NULL;
(*currently_in_word) = false;
(*currently_in_operator) = false;
return (wordlist);
}
/*
** split a string into words, respecting quotes etc.
**
@ -44,16 +31,16 @@ t_wordlist *minishell_wordsplit(char *original)
{
// 1. If the end of input is recognized, the current token (if any)
// shall be delimited.
if (original[idx] == '\0')
wordlist = delimit(wordlist, &token, &currently_in_word,
if (original[token_build.current_index] == '\0')
token_build.wordlist = delimit(wordlist, &token, &currently_in_word,
&currently_in_operator);
// 2. If the previous character was used as part of an operator and the
// current character is not quoted and can be used with the previous
// characters to form an operator, it shall be used as part of that
// (operator) token.
else if (currently_in_operator && quote == '\0'
&& is_operator_combo(token->buffer, original[idx]))
token = push_char(token, original[idx]);
else if (token_build.currently_in_operator && token_build.quote == '\0'
&& is_operator_combo(token_build.cur_token->buffer, original[token_build.current_index]))
token_build.cur_token = push_char(token, original[idx]);
// 3. If the previous character was used as part of an operator and the
// current character cannot be used with the previous characters to form
// an operator, the operator containing the previous character shall be

View file

@ -3,10 +3,10 @@
/* ::: :::::::: */
/* wordsplit.h :+: :+: :+: */
/* +:+ +:+ +:+ */
/* By: khais <marvin@42.fr> +#+ +:+ +#+ */
/* By: jguelen <jguelen@student.42.fr> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */
/* Created: 2025/02/13 15:52:48 by khais #+# #+# */
/* Updated: 2025/02/19 15:17:02 by jguelen ### ########.fr */
/* Updated: 2025/02/19 18:02:50 by jguelen ### ########.fr */
/* */
/* ************************************************************************** */
@ -31,13 +31,13 @@ typedef struct s_token_build
size_t current_index;
} t_token_build;
t_buffer *push_char(t_buffer *token, char c);
t_buffer *new_word(t_buffer *token, char c, bool *currently_in_word);
char quote_flip(t_buffer **token, char c, char quote);
void operator_start(t_wordlist **wordlist, t_buffer **token, char c,
bool *currently_in_word, bool *currently_in_operator);
t_wordlist *delimit(t_token_build *token_build);
t_buffer *push_char(t_token_build *token_build, char c);
t_buffer *new_word(t_token_build *token_build, char c);
char quote_flip(t_token_build *token_build, char c);
void operator_start(t_token_build *token_build, char c);
void token_rule_1(t_token_build *token_build, char *original);
t_wordlist *minishell_wordsplit(char *original);
#endif
#endif

View file

@ -3,44 +3,55 @@
/* ::: :::::::: */
/* wordsplit_utils.c :+: :+: :+: */
/* +:+ +:+ +:+ */
/* By: jguelen <marvin@42.fr> +#+ +:+ +#+ */
/* By: jguelen <jguelen@student.42.fr> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */
/* Created: 2025/02/19 14:27:47 by jguelen #+# #+# */
/* Updated: 2025/02/19 14:56:36 by jguelen ### ########.fr */
/* Updated: 2025/02/19 18:02:33 by jguelen ### ########.fr */
/* */
/* ************************************************************************** */
#include "wordsplit.h"
t_buffer *push_char(t_buffer *token, char c)
t_wordlist *delimit(t_token_build *token_build)
{
if (token == NULL)
token = ft_buffer_new();
return (ft_buffer_pushchar(token, c));
if (token_build->cur_token == NULL)
return (token_build->wordlist);
token_build->wordlist = wordlist_push(token_build->wordlist,
worddesc_create(token_build->cur_token->buffer));
free(token_build->cur_token);
token_build->cur_token = NULL;
token_build->currently_in_word = false;
token_build->currently_in_operator = false;
return (token_build->wordlist);
}
t_buffer *new_word(t_buffer *token, char c, bool *currently_in_word)
t_buffer *push_char(t_token_build *token_build, char c)
{
token = push_char(token, c);
(*currently_in_word) = true;
return (token);
if (token_build->cur_token == NULL)
token_build->cur_token = ft_buffer_new();
return (ft_buffer_pushchar(token_build->cur_token, c));
}
char quote_flip(t_buffer **token, char c, char quote)
t_buffer *new_word(t_token_build *token_build, char c)
{
if (quote == '\0')
quote = c;
else if (quote == c)
quote = '\0';
(*token) = push_char((*token), c);
return (quote);
token_build->cur_token = push_char(token_build->cur_token, c);
token_build->currently_in_word = true;
return (token_build->cur_token);
}
void operator_start(t_wordlist **wordlist, t_buffer **token, char c,
bool *currently_in_word, bool *currently_in_operator)
char quote_flip(t_token_build *token_build, char c)
{
(*wordlist) = delimit(*wordlist, token, currently_in_word,
currently_in_operator);
(*token) = new_word(*token, c, currently_in_word);
(*currently_in_operator) = true;
if (token_build->quote == '\0')
token_build->quote = c;
else if (token_build->quote == c)
token_build->quote = '\0';
token_build->cur_token = push_char(token_build->cur_token, c);
return (token_build->quote);
}
void operator_start(t_token_build *token_build, char c)
{
token_build->wordlist = delimit(token_build);
token_build->cur_token = new_word(token_build, c);
token_build->currently_in_operator = true;
}