mirror of
https://codeberg.org/la-chouette/minishell.git
synced 2025-12-06 07:28:09 +01:00
Word splitting refactor: Prototypes to be modified
This commit is contained in:
parent
cba6fba845
commit
0d0a14d21a
7 changed files with 133 additions and 65 deletions
|
|
@ -6,7 +6,7 @@
|
|||
/* By: khais <marvin@42.fr> +#+ +:+ +#+ */
|
||||
/* +#+#+#+#+#+ +#+ */
|
||||
/* Created: 2025/02/17 16:21:03 by khais #+# #+# */
|
||||
/* Updated: 2025/02/17 16:22:40 by khais ### ########.fr */
|
||||
/* Updated: 2025/02/18 17:53:13 by jguelen ### ########.fr */
|
||||
/* */
|
||||
/* ************************************************************************** */
|
||||
|
||||
|
|
@ -22,5 +22,4 @@ bool is_operator_start(char c)
|
|||
return (true);
|
||||
else
|
||||
return (false);
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,13 +6,22 @@
|
|||
/* By: khais <marvin@42.fr> +#+ +:+ +#+ */
|
||||
/* +#+#+#+#+#+ +#+ */
|
||||
/* Created: 2025/02/13 15:47:58 by khais #+# #+# */
|
||||
/* Updated: 2025/02/14 13:57:10 by khais ### ########.fr */
|
||||
/* Updated: 2025/02/18 17:44:57 by jguelen ### ########.fr */
|
||||
/* */
|
||||
/* ************************************************************************** */
|
||||
|
||||
#ifndef WORDDESC_H
|
||||
# define WORDDESC_H
|
||||
|
||||
/* Possible values for the `flags' field of a WORD_DESC. */
|
||||
# define W_HASDOLLAR 0b1 /* Dollar sign present. */
|
||||
# define W_QUOTED 0b10 /* Some form of quote character is present. */
|
||||
# define W_ASSNBLTIN 0b100 /* word is a builtin command that takes
|
||||
assignments */
|
||||
# define W_ASSIGNARG 0b1000 /* word is assignment argument to command */
|
||||
# define W_HASQUOTEDNULL 0b10000 /* word contains a quoted null character */
|
||||
# define W_DQUOTE 0b100000 /* word should be treated as if double-quoted */
|
||||
|
||||
/*
|
||||
** A logical word for the parser.
|
||||
**
|
||||
|
|
@ -26,6 +35,7 @@ typedef struct s_worddesc
|
|||
** The word itself
|
||||
*/
|
||||
char *word;
|
||||
char flags;
|
||||
} t_worddesc;
|
||||
|
||||
t_worddesc *worddesc_create(char *word);
|
||||
|
|
|
|||
15
src/parser/wordsplit/tokenizing_1_5.c
Normal file
15
src/parser/wordsplit/tokenizing_1_5.c
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
/* ************************************************************************** */
|
||||
/* */
|
||||
/* ::: :::::::: */
|
||||
/* tokenizing_1_5.c :+: :+: :+: */
|
||||
/* +:+ +:+ +:+ */
|
||||
/* By: jguelen <marvin@42.fr> +#+ +:+ +#+ */
|
||||
/* +#+#+#+#+#+ +#+ */
|
||||
/* Created: 2025/02/19 13:20:01 by jguelen #+# #+# */
|
||||
/* Updated: 2025/02/19 13:20:49 by jguelen ### ########.fr */
|
||||
/* */
|
||||
/* ************************************************************************** */
|
||||
|
||||
#include "wordsplit.h"
|
||||
|
||||
|
||||
15
src/parser/wordsplit/tokenizing_6_10.c
Normal file
15
src/parser/wordsplit/tokenizing_6_10.c
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
/* ************************************************************************** */
|
||||
/* */
|
||||
/* ::: :::::::: */
|
||||
/* tokenizing_6_10.c :+: :+: :+: */
|
||||
/* +:+ +:+ +:+ */
|
||||
/* By: jguelen <marvin@42.fr> +#+ +:+ +#+ */
|
||||
/* +#+#+#+#+#+ +#+ */
|
||||
/* Created: 2025/02/19 13:21:18 by jguelen #+# #+# */
|
||||
/* Updated: 2025/02/19 13:21:36 by jguelen ### ########.fr */
|
||||
/* */
|
||||
/* ************************************************************************** */
|
||||
|
||||
#include "wordsplit.h"
|
||||
|
||||
|
||||
|
|
@ -3,21 +3,17 @@
|
|||
/* ::: :::::::: */
|
||||
/* wordsplit.c :+: :+: :+: */
|
||||
/* +:+ +:+ +:+ */
|
||||
/* By: khais <marvin@42.fr> +#+ +:+ +#+ */
|
||||
/* By: jguelen <jguelen@student.42.fr> +#+ +:+ +#+ */
|
||||
/* +#+#+#+#+#+ +#+ */
|
||||
/* Created: 2025/02/13 17:02:32 by khais #+# #+# */
|
||||
/* Updated: 2025/02/17 16:41:00 by khais ### ########.fr */
|
||||
/* Updated: 2025/02/19 15:17:22 by jguelen ### ########.fr */
|
||||
/* */
|
||||
/* ************************************************************************** */
|
||||
|
||||
#include "wordsplit.h"
|
||||
#include "../../buffer/buffer.h"
|
||||
#include "../matchers/blank.h"
|
||||
#include "../matchers/operator_start.h"
|
||||
#include "../matchers/operator_combo.h"
|
||||
#include <stdlib.h>
|
||||
|
||||
static t_wordlist *delimit(t_wordlist *wordlist, t_buffer **token, bool *currently_in_word, bool *currently_in_operator)
|
||||
static t_wordlist *delimit(t_wordlist *wordlist, t_buffer **token,
|
||||
bool *currently_in_word, bool *currently_in_operator)
|
||||
{
|
||||
if ((*token) == NULL)
|
||||
return (wordlist);
|
||||
|
|
@ -29,38 +25,6 @@ static t_wordlist *delimit(t_wordlist *wordlist, t_buffer **token, bool *current
|
|||
return (wordlist);
|
||||
}
|
||||
|
||||
static t_buffer *push_char(t_buffer *token, char c)
|
||||
{
|
||||
if (token == NULL)
|
||||
token = ft_buffer_new();
|
||||
return (ft_buffer_pushchar(token, c));
|
||||
}
|
||||
|
||||
|
||||
static t_buffer *new_word(t_buffer *token, char c, bool *currently_in_word)
|
||||
{
|
||||
token = push_char(token, c);
|
||||
(*currently_in_word) = true;
|
||||
return (token);
|
||||
}
|
||||
|
||||
static char quote_flip(t_buffer **token, char c, char quote)
|
||||
{
|
||||
if (quote == '\0')
|
||||
quote = c;
|
||||
else if (quote == c)
|
||||
quote = '\0';
|
||||
(*token) = push_char((*token), c);
|
||||
return (quote);
|
||||
}
|
||||
|
||||
static void operator_start(t_wordlist **wordlist, t_buffer **token, char c, bool *currently_in_word, bool *currently_in_operator)
|
||||
{
|
||||
(*wordlist) = delimit(*wordlist, token, currently_in_word, currently_in_operator);
|
||||
(*token) = new_word(*token, c, currently_in_word);
|
||||
(*currently_in_operator) = true;
|
||||
}
|
||||
|
||||
/*
|
||||
** split a string into words, respecting quotes etc.
|
||||
**
|
||||
|
|
@ -73,38 +37,32 @@ static void operator_start(t_wordlist **wordlist, t_buffer **token, char c, bool
|
|||
*/
|
||||
t_wordlist *minishell_wordsplit(char *original)
|
||||
{
|
||||
size_t idx;
|
||||
t_wordlist *wordlist;
|
||||
t_buffer *token;
|
||||
bool currently_in_word;
|
||||
bool currently_in_operator;
|
||||
char quote;
|
||||
t_token_build token_build;
|
||||
|
||||
idx = 0;
|
||||
wordlist = NULL;
|
||||
token = NULL;
|
||||
currently_in_word = false;
|
||||
currently_in_operator = false;
|
||||
quote = '\0';
|
||||
ft_bzero(&token_build);
|
||||
while (true)
|
||||
{
|
||||
// 1. If the end of input is recognized, the current token (if any)
|
||||
// shall be delimited.
|
||||
if (original[idx] == '\0')
|
||||
wordlist = delimit(wordlist, &token, ¤tly_in_word, ¤tly_in_operator);
|
||||
wordlist = delimit(wordlist, &token, ¤tly_in_word,
|
||||
¤tly_in_operator);
|
||||
// 2. If the previous character was used as part of an operator and the
|
||||
// current character is not quoted and can be used with the previous
|
||||
// characters to form an operator, it shall be used as part of that
|
||||
// (operator) token.
|
||||
else if (currently_in_operator && quote == '\0' && is_operator_combo(token->buffer, original[idx]))
|
||||
else if (currently_in_operator && quote == '\0'
|
||||
&& is_operator_combo(token->buffer, original[idx]))
|
||||
token = push_char(token, original[idx]);
|
||||
// 3. If the previous character was used as part of an operator and the
|
||||
// current character cannot be used with the previous characters to form
|
||||
// an operator, the operator containing the previous character shall be
|
||||
// delimited.
|
||||
else if (currently_in_operator && quote == '\0' && !is_operator_combo(token->buffer, original[idx]))
|
||||
else if (currently_in_operator && quote == '\0'
|
||||
&& !is_operator_combo(token->buffer, original[idx]))
|
||||
{
|
||||
wordlist = delimit(wordlist, &token, ¤tly_in_word, ¤tly_in_operator);
|
||||
wordlist = delimit(wordlist, &token, ¤tly_in_word,
|
||||
¤tly_in_operator);
|
||||
continue ;
|
||||
}
|
||||
// 4. If the current character is single-quote, or double-quote and it
|
||||
|
|
@ -122,12 +80,14 @@ t_wordlist *minishell_wordsplit(char *original)
|
|||
// be delimited. The current character shall be used as the beginning of
|
||||
// the next (operator) token.
|
||||
else if (quote == '\0' && is_operator_start(original[idx]))
|
||||
operator_start(&wordlist, &token, original[idx], ¤tly_in_word, ¤tly_in_operator);
|
||||
operator_start(&wordlist, &token, original[idx],
|
||||
¤tly_in_word, ¤tly_in_operator);
|
||||
// If the current character is an unquoted <blank>, any token containing
|
||||
// the previous character is delimited and the current character shall
|
||||
// be discarded.
|
||||
else if (is_blank(original[idx]) && quote == '\0')
|
||||
wordlist = delimit(wordlist, &token, ¤tly_in_word, ¤tly_in_operator);
|
||||
wordlist = delimit(wordlist, &token, ¤tly_in_word,
|
||||
¤tly_in_operator);
|
||||
// If the previous character was part of a word, the current character
|
||||
// shall be appended to that word.
|
||||
else if (currently_in_word)
|
||||
|
|
@ -139,7 +99,7 @@ t_wordlist *minishell_wordsplit(char *original)
|
|||
break ;
|
||||
idx++;
|
||||
}
|
||||
if (quote != '\0')
|
||||
return (wordlist_destroy(wordlist), NULL);
|
||||
return (wordlist);
|
||||
if (token_build.quote != '\0')
|
||||
return (wordlist_destroy(token_build.wordlist), NULL);
|
||||
return (token_build.wordlist);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@
|
|||
/* By: khais <marvin@42.fr> +#+ +:+ +#+ */
|
||||
/* +#+#+#+#+#+ +#+ */
|
||||
/* Created: 2025/02/13 15:52:48 by khais #+# #+# */
|
||||
/* Updated: 2025/02/13 15:54:30 by khais ### ########.fr */
|
||||
/* Updated: 2025/02/19 15:17:02 by jguelen ### ########.fr */
|
||||
/* */
|
||||
/* ************************************************************************** */
|
||||
|
||||
|
|
@ -14,6 +14,29 @@
|
|||
# define WORDSPLIT_H
|
||||
|
||||
# include "../wordlist/wordlist.h"
|
||||
# include <stdbool.h>
|
||||
# include "../../buffer/buffer.h"
|
||||
# include "../matchers/blank.h"
|
||||
# include "../matchers/operator_start.h"
|
||||
# include "../matchers/operator_combo.h"
|
||||
# include <stdlib.h>
|
||||
|
||||
typedef struct s_token_build
|
||||
{
|
||||
t_wordlist *wordlist;
|
||||
t_buffer *cur_token;
|
||||
bool currently_in_word;
|
||||
bool currently_in_operator;
|
||||
char quote;
|
||||
size_t current_index;
|
||||
} t_token_build;
|
||||
|
||||
|
||||
t_buffer *push_char(t_buffer *token, char c);
|
||||
t_buffer *new_word(t_buffer *token, char c, bool *currently_in_word);
|
||||
char quote_flip(t_buffer **token, char c, char quote);
|
||||
void operator_start(t_wordlist **wordlist, t_buffer **token, char c,
|
||||
bool *currently_in_word, bool *currently_in_operator);
|
||||
|
||||
t_wordlist *minishell_wordsplit(char *original);
|
||||
|
||||
|
|
|
|||
46
src/parser/wordsplit/wordsplit_utils.c
Normal file
46
src/parser/wordsplit/wordsplit_utils.c
Normal file
|
|
@ -0,0 +1,46 @@
|
|||
/* ************************************************************************** */
|
||||
/* */
|
||||
/* ::: :::::::: */
|
||||
/* wordsplit_utils.c :+: :+: :+: */
|
||||
/* +:+ +:+ +:+ */
|
||||
/* By: jguelen <marvin@42.fr> +#+ +:+ +#+ */
|
||||
/* +#+#+#+#+#+ +#+ */
|
||||
/* Created: 2025/02/19 14:27:47 by jguelen #+# #+# */
|
||||
/* Updated: 2025/02/19 14:56:36 by jguelen ### ########.fr */
|
||||
/* */
|
||||
/* ************************************************************************** */
|
||||
|
||||
#include "wordsplit.h"
|
||||
|
||||
t_buffer *push_char(t_buffer *token, char c)
|
||||
{
|
||||
if (token == NULL)
|
||||
token = ft_buffer_new();
|
||||
return (ft_buffer_pushchar(token, c));
|
||||
}
|
||||
|
||||
t_buffer *new_word(t_buffer *token, char c, bool *currently_in_word)
|
||||
{
|
||||
token = push_char(token, c);
|
||||
(*currently_in_word) = true;
|
||||
return (token);
|
||||
}
|
||||
|
||||
char quote_flip(t_buffer **token, char c, char quote)
|
||||
{
|
||||
if (quote == '\0')
|
||||
quote = c;
|
||||
else if (quote == c)
|
||||
quote = '\0';
|
||||
(*token) = push_char((*token), c);
|
||||
return (quote);
|
||||
}
|
||||
|
||||
void operator_start(t_wordlist **wordlist, t_buffer **token, char c,
|
||||
bool *currently_in_word, bool *currently_in_operator)
|
||||
{
|
||||
(*wordlist) = delimit(*wordlist, token, currently_in_word,
|
||||
currently_in_operator);
|
||||
(*token) = new_word(*token, c, currently_in_word);
|
||||
(*currently_in_operator) = true;
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue