wordsplit rules: fix basic norm problems

also make notes for future refactorings
This commit is contained in:
Khaïs COLIN 2025-02-20 12:00:04 +01:00
parent f2469f1600
commit edf8946fe3
Signed by: logistic-bot
SSH key fingerprint: SHA256:RlpiqKeXpcPFZZ4y9Ou4xi2M8OhRJovIwDlbCaMsuAo
2 changed files with 18 additions and 13 deletions

View file

@ -6,7 +6,7 @@
/* By: jguelen <marvin@42.fr> +#+ +:+ +#+ */ /* By: jguelen <marvin@42.fr> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */ /* +#+#+#+#+#+ +#+ */
/* Created: 2025/02/19 13:20:01 by jguelen #+# #+# */ /* Created: 2025/02/19 13:20:01 by jguelen #+# #+# */
/* Updated: 2025/02/20 11:52:28 by khais ### ########.fr */ /* Updated: 2025/02/20 12:22:28 by khais ### ########.fr */
/* */ /* */
/* ************************************************************************** */ /* ************************************************************************** */
@ -40,7 +40,8 @@ bool rule_eof(t_token_build *builder, char *original)
bool rule_combine_operator(t_token_build *builder, char *original) bool rule_combine_operator(t_token_build *builder, char *original)
{ {
if (builder->currently_in_operator && builder->quote == '\0' if (builder->currently_in_operator && builder->quote == '\0'
&& is_operator_combo(builder->cur_token->buffer, original[builder->idx])) && is_operator_combo(builder->cur_token->buffer,
original[builder->idx]))
{ {
push_char(builder, original[builder->idx]); push_char(builder, original[builder->idx]);
builder->idx++; builder->idx++;
@ -54,16 +55,18 @@ bool rule_combine_operator(t_token_build *builder, char *original)
** character cannot be used with the previous characters to form an operator, ** character cannot be used with the previous characters to form an operator,
** the operator containing the previous character shall be delimited. ** the operator containing the previous character shall be delimited.
*/ */
bool rule_operator_end(t_token_build *builder, char *original) bool rule_operator_end(t_token_build *builder, char *original)
{ {
if (builder->currently_in_operator && builder->quote == '\0' if (builder->currently_in_operator && builder->quote == '\0' // FIXME
&& !is_operator_combo(builder->cur_token->buffer, original[builder->idx])) && !is_operator_combo(builder->cur_token->buffer,
original[builder->idx]))
{ {
delimit(builder); delimit(builder);
return (true); return (true);
} }
return (false); return (false);
} }
/* /*
** 4. If the current character is single-quote, or double-quote and it is not ** 4. If the current character is single-quote, or double-quote and it is not
** quoted, it shall affect quoting for subsequent characters up to the end of ** quoted, it shall affect quoting for subsequent characters up to the end of
@ -73,8 +76,9 @@ bool rule_operator_end(t_token_build *builder, char *original)
** operators, between the <quotation-mark> and the end of the quoted text. The ** operators, between the <quotation-mark> and the end of the quoted text. The
** token shall not be delimited by the end of the quoted field. ** token shall not be delimited by the end of the quoted field.
*/ */
bool rule_quote(t_token_build *builder, char *original) bool rule_quote(t_token_build *builder, char *original)
{ {
// FIXME
if (original[builder->idx] == '\'' || original[builder->idx] == '"') if (original[builder->idx] == '\'' || original[builder->idx] == '"')
{ {
quote_flip(builder, original[builder->idx]); quote_flip(builder, original[builder->idx]);
@ -82,7 +86,6 @@ bool rule_quote(t_token_build *builder, char *original)
return (true); return (true);
} }
return (false); return (false);
} }
bool token_rule_5(t_token_build *builder, char *original); bool token_rule_5(t_token_build *builder, char *original);

View file

@ -6,7 +6,7 @@
/* By: jguelen <marvin@42.fr> +#+ +:+ +#+ */ /* By: jguelen <marvin@42.fr> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */ /* +#+#+#+#+#+ +#+ */
/* Created: 2025/02/19 13:21:18 by jguelen #+# #+# */ /* Created: 2025/02/19 13:21:18 by jguelen #+# #+# */
/* Updated: 2025/02/20 11:52:28 by khais ### ########.fr */ /* Updated: 2025/02/20 11:59:57 by khais ### ########.fr */
/* */ /* */
/* ************************************************************************** */ /* ************************************************************************** */
@ -25,8 +25,9 @@
** The current character shall be used as the beginning of the next (operator) ** The current character shall be used as the beginning of the next (operator)
** token. ** token.
*/ */
bool rule_new_operator(t_token_build *builder, char *original) bool rule_new_operator(t_token_build *builder, char *original)
{ {
// FIXME: unquoted
if (builder->quote == '\0' && is_operator_start(original[builder->idx])) if (builder->quote == '\0' && is_operator_start(original[builder->idx]))
{ {
operator_start(builder, original[builder->idx]); operator_start(builder, original[builder->idx]);
@ -40,8 +41,9 @@ bool rule_new_operator(t_token_build *builder, char *original)
** 7. If the current character is an unquoted <blank>, any token containing the ** 7. If the current character is an unquoted <blank>, any token containing the
** previous character is delimited and the current character shall be discarded. ** previous character is delimited and the current character shall be discarded.
*/ */
bool rule_delimit_blank(t_token_build *builder, char *original) bool rule_delimit_blank(t_token_build *builder, char *original)
{ {
// FIXME: unquoted
if (is_blank(original[builder->idx]) && builder->quote == '\0') if (is_blank(original[builder->idx]) && builder->quote == '\0')
{ {
delimit(builder); delimit(builder);
@ -55,7 +57,7 @@ bool rule_delimit_blank(t_token_build *builder, char *original)
** 8. If the previous character was part of a word, the current character shall ** 8. If the previous character was part of a word, the current character shall
** be appended to that word. ** be appended to that word.
*/ */
bool rule_combine_word(t_token_build *builder, char *original) bool rule_combine_word(t_token_build *builder, char *original)
{ {
if (builder->currently_in_word) if (builder->currently_in_word)
{ {
@ -69,7 +71,7 @@ bool rule_combine_word(t_token_build *builder, char *original)
/* /*
** 10. The current character is used as the start of a new word. ** 10. The current character is used as the start of a new word.
*/ */
bool rule_new_word(t_token_build *builder, char *original) bool rule_new_word(t_token_build *builder, char *original)
{ {
new_word(builder, original[builder->idx]); new_word(builder, original[builder->idx]);
builder->idx++; builder->idx++;