wordsplit refactor: follow the specification more closesly

This will make it easier to implement the rest of the specification.

I probably should have started like this..
This commit is contained in:
Khaïs COLIN 2025-02-14 17:50:56 +01:00
parent db0abe82cc
commit 81d28c15d4
Signed by: logistic-bot
SSH key fingerprint: SHA256:RlpiqKeXpcPFZZ4y9Ou4xi2M8OhRJovIwDlbCaMsuAo

View file

@ -6,14 +6,42 @@
/* By: khais <marvin@42.fr> +#+ +:+ +#+ */ /* By: khais <marvin@42.fr> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */ /* +#+#+#+#+#+ +#+ */
/* Created: 2025/02/13 17:02:32 by khais #+# #+# */ /* Created: 2025/02/13 17:02:32 by khais #+# #+# */
/* Updated: 2025/02/14 16:46:27 by khais ### ########.fr */ /* Updated: 2025/02/14 18:06:44 by khais ### ########.fr */
/* */ /* */
/* ************************************************************************** */ /* ************************************************************************** */
#include "wordsplit.h" #include "wordsplit.h"
#include "libft.h" #include "libft.h"
#include "../../buffer/buffer.h"
#include "../matchers/blank.h" #include "../matchers/blank.h"
static t_wordlist *delimit(t_wordlist *wordlist, t_buffer **token, bool *currently_in_word)
{
if ((*token) == NULL)
return (wordlist);
wordlist = wordlist_push(wordlist, worddesc_create((*token)->buffer));
free(*token);
(*token) = NULL;
(*currently_in_word) = false;
return (wordlist);
}
static t_buffer *push_char(t_buffer *token, char c)
{
return (ft_buffer_pushchar(token, c));
}
static t_buffer *new_word(char c, bool *currently_in_word)
{
t_buffer *token;
token = ft_buffer_new();
ft_buffer_pushchar(token, c);
(*currently_in_word) = true;
return (token);
}
/* /*
** split a string into words, respecting quotes etc. ** split a string into words, respecting quotes etc.
** **
@ -26,27 +54,36 @@
*/ */
t_wordlist *minishell_wordsplit(char *original) t_wordlist *minishell_wordsplit(char *original)
{ {
size_t start;
size_t idx; size_t idx;
size_t length;
char *word;
t_wordlist *wordlist; t_wordlist *wordlist;
t_buffer *token;
bool currently_in_word;
start = 0;
idx = 0; idx = 0;
wordlist = NULL; wordlist = NULL;
length = 1; token = NULL;
while (length != 0) currently_in_word = false;
while (true)
{ {
start = ft_strnfchridx(original + idx, is_blank); // If the end of input is recognized, the current token (if any) shall
length = ft_strfchridx(original + idx + start, is_blank); // be delimited.
if (length == 0) if (original[idx] == '\0')
wordlist = delimit(wordlist, &token, &currently_in_word);
// If the current character is an unquoted <blank>, any token containing
// the previous character is delimited and the current character shall
// be discarded.
else if (is_blank(original[idx]))
wordlist = delimit(wordlist, &token, &currently_in_word);
// If the previous character was part of a word, the current character
// shall be appended to that word.
else if (currently_in_word)
token = push_char(token, original[idx]);
// The current character is used as the start of a new word.
else
token = new_word(original[idx], &currently_in_word);
if (original[idx] == '\0')
break ; break ;
word = ft_substr(original + idx, start, length); idx++;
wordlist = wordlist_push(wordlist, worddesc_create(word));
if (wordlist == NULL)
return (NULL);
idx += start + length;
} }
return (wordlist); return (wordlist);
} }