wordsplit refactor: follow the specification more closesly

This will make it easier to implement the rest of the specification. I probably should have started like this..
2025-12-06 07:28:09 +01:00 · 2025-02-14 17:50:56 +01:00 · 2025-02-14 17:50:56 +01:00 · 81d28c15d4
commit 81d28c15d4
parent db0abe82cc
1 changed files with 52 additions and 15 deletions
--- a/src/parser/wordsplit/wordsplit.c
+++ b/src/parser/wordsplit/wordsplit.c
@ -6,14 +6,42 @@
 /*   By: khais <marvin@42.fr>                       +#+  +:+       +#+        */
 /*                                                +#+#+#+#+#+   +#+           */
 /*   Created: 2025/02/13 17:02:32 by khais             #+#    #+#             */
-/*   Updated: 2025/02/14 16:46:27 by khais            ###   ########.fr       */
+/*   Updated: 2025/02/14 18:06:44 by khais            ###   ########.fr       */
 /*                                                                            */
 /* ************************************************************************** */

 #include "wordsplit.h"
 #include "libft.h"
+#include "../../buffer/buffer.h"
 #include "../matchers/blank.h"

+static t_wordlist	*delimit(t_wordlist *wordlist, t_buffer **token, bool *currently_in_word)
+{
+	if ((*token) == NULL)
+		return (wordlist);
+	wordlist = wordlist_push(wordlist, worddesc_create((*token)->buffer));
+	free(*token);
+	(*token) = NULL;
+	(*currently_in_word) = false;
+	return (wordlist);
+}
+
+static t_buffer	*push_char(t_buffer *token, char c)
+{
+	return (ft_buffer_pushchar(token, c));
+}
+
+
+static t_buffer	*new_word(char c, bool *currently_in_word)
+{
+	t_buffer	*token;
+
+	token = ft_buffer_new();
+	ft_buffer_pushchar(token, c);
+	(*currently_in_word) = true;
+	return (token);
+}
+
 /*
 ** split a string into words, respecting quotes etc.
 **
@ -26,27 +54,36 @@
 */
 t_wordlist	*minishell_wordsplit(char *original)
 {
-	size_t		start;
 	size_t		idx;
-	size_t		length;
-	char		*word;
 	t_wordlist	*wordlist;
+	t_buffer	*token;
+	bool		currently_in_word;

-	start = 0;
 	idx = 0;
 	wordlist = NULL;
-	length = 1;
-	while (length != 0)
+	token = NULL;
+	currently_in_word = false;
+	while (true)
 	{
-		start = ft_strnfchridx(original + idx, is_blank);
-		length = ft_strfchridx(original + idx + start, is_blank);
-		if (length == 0)
+		// If the end of input is recognized, the current token (if any) shall
+		// be delimited.
+		if (original[idx] == '\0')
+			wordlist = delimit(wordlist, &token, &currently_in_word);
+		// If the current character is an unquoted <blank>, any token containing
+		// the previous character is delimited and the current character shall
+		// be discarded.
+		else if (is_blank(original[idx]))
+			wordlist = delimit(wordlist, &token, &currently_in_word);
+		// If the previous character was part of a word, the current character
+		// shall be appended to that word.
+		else if (currently_in_word)
+			token = push_char(token, original[idx]);
+		// The current character is used as the start of a new word.
+		else
+			token = new_word(original[idx], &currently_in_word);
+		if (original[idx] == '\0')
 			break ;
-		word = ft_substr(original + idx, start, length);
-		wordlist = wordlist_push(wordlist, worddesc_create(word));
-		if (wordlist == NULL)
-			return (NULL);
-		idx += start + length;
+		idx++;
 	}
 	return (wordlist);
 }