From 81d28c15d468e2f94e7a3c052dcf8ca00f8cd7e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kha=C3=AFs=20COLIN?= Date: Fri, 14 Feb 2025 17:50:56 +0100 Subject: [PATCH] wordsplit refactor: follow the specification more closesly This will make it easier to implement the rest of the specification. I probably should have started like this.. --- src/parser/wordsplit/wordsplit.c | 67 +++++++++++++++++++++++++------- 1 file changed, 52 insertions(+), 15 deletions(-) diff --git a/src/parser/wordsplit/wordsplit.c b/src/parser/wordsplit/wordsplit.c index ce297b7..eb32315 100644 --- a/src/parser/wordsplit/wordsplit.c +++ b/src/parser/wordsplit/wordsplit.c @@ -6,14 +6,42 @@ /* By: khais +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2025/02/13 17:02:32 by khais #+# #+# */ -/* Updated: 2025/02/14 16:46:27 by khais ### ########.fr */ +/* Updated: 2025/02/14 18:06:44 by khais ### ########.fr */ /* */ /* ************************************************************************** */ #include "wordsplit.h" #include "libft.h" +#include "../../buffer/buffer.h" #include "../matchers/blank.h" +static t_wordlist *delimit(t_wordlist *wordlist, t_buffer **token, bool *currently_in_word) +{ + if ((*token) == NULL) + return (wordlist); + wordlist = wordlist_push(wordlist, worddesc_create((*token)->buffer)); + free(*token); + (*token) = NULL; + (*currently_in_word) = false; + return (wordlist); +} + +static t_buffer *push_char(t_buffer *token, char c) +{ + return (ft_buffer_pushchar(token, c)); +} + + +static t_buffer *new_word(char c, bool *currently_in_word) +{ + t_buffer *token; + + token = ft_buffer_new(); + ft_buffer_pushchar(token, c); + (*currently_in_word) = true; + return (token); +} + /* ** split a string into words, respecting quotes etc. ** @@ -26,27 +54,36 @@ */ t_wordlist *minishell_wordsplit(char *original) { - size_t start; size_t idx; - size_t length; - char *word; t_wordlist *wordlist; + t_buffer *token; + bool currently_in_word; - start = 0; idx = 0; wordlist = NULL; - length = 1; - while (length != 0) + token = NULL; + currently_in_word = false; + while (true) { - start = ft_strnfchridx(original + idx, is_blank); - length = ft_strfchridx(original + idx + start, is_blank); - if (length == 0) + // If the end of input is recognized, the current token (if any) shall + // be delimited. + if (original[idx] == '\0') + wordlist = delimit(wordlist, &token, ¤tly_in_word); + // If the current character is an unquoted , any token containing + // the previous character is delimited and the current character shall + // be discarded. + else if (is_blank(original[idx])) + wordlist = delimit(wordlist, &token, ¤tly_in_word); + // If the previous character was part of a word, the current character + // shall be appended to that word. + else if (currently_in_word) + token = push_char(token, original[idx]); + // The current character is used as the start of a new word. + else + token = new_word(original[idx], ¤tly_in_word); + if (original[idx] == '\0') break ; - word = ft_substr(original + idx, start, length); - wordlist = wordlist_push(wordlist, worddesc_create(word)); - if (wordlist == NULL) - return (NULL); - idx += start + length; + idx++; } return (wordlist); }