summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorPádraig Brady <P@draigBrady.com>2026-03-12 18:58:46 +0000
committerPádraig Brady <P@draigBrady.com>2026-04-05 13:15:56 +0100
commitf644b4ca53d875e6ff62a50b8d02dc2b4e421b76 (patch)
tree4493794a01a334d0fff4aded75b4bbbcb41f3c83 /src
parent57110d8bae0637baab050466fad2946b645b51a0 (diff)
downloadcoreutils-f644b4ca53d875e6ff62a50b8d02dc2b4e421b76.tar.gz
coreutils-f644b4ca53d875e6ff62a50b8d02dc2b4e421b76.zip
cut: refactor multi-byte updates
* src/cut.c: 160 fewer lines Helpers extracted (replacing repeated inline patterns): - write_line_delim(), write_pending_line_delim(), reset_item_line() - line boundary code used by cut_bytes{,no_split}, cut_characters - write_selected_item() - output-delimiter + write logic used by all three byte/char functions - reset_field_line() - field line reset used by cut_fields_mb_any Field functions unified via cut_fields_mb_any(stream, whitespace_mode): - struct mbfield_parser encapsulates the whitespace vs. fixed-delimiter state (saved char, mode flag) - mbfield_get_char() - dispatches to saved-char or direct read - mbfield_terminator() - returns FIELD_{DATA,DELIMETER,LINE_DELIMITER} based on mode - read_mb_field_to_buffer() - replaces the two duplicated first-field buffering loops - scan_mb_field(mbbuf, parser, pending, write_field) - replaces the four duplicated field scan loops (print+skip × two modes) with a single function and a write_field bool - cut_fields_mb and cut_fields_ws are now trivial wrappers
Diffstat (limited to 'src')
-rw-r--r--src/cut.c535
1 files changed, 188 insertions, 347 deletions
diff --git a/src/cut.c b/src/cut.c
index 613e2a9ea..507d1ba07 100644
--- a/src/cut.c
+++ b/src/cut.c
@@ -273,11 +273,19 @@ field_delim_eq (mcel_t g)
enum field_terminator
{
+ FIELD_DATA,
FIELD_DELIMITER,
FIELD_LINE_DELIMITER,
FIELD_EOF
};
+struct mbfield_parser
+{
+ bool whitespace_delimited;
+ bool have_saved;
+ mcel_t saved_g;
+};
+
static inline mcel_t
mbbuf_get_saved_char (mbbuf_t *mbbuf, bool *have_saved, mcel_t *saved_g)
{
@@ -315,6 +323,133 @@ write_bytes (char const *buf, size_t n_bytes)
write_error ();
}
+static inline void
+write_line_delim (void)
+{
+ if (putchar (line_delim) < 0)
+ write_error ();
+}
+
+static inline void
+reset_item_line (uintmax_t *item_idx, bool *print_delimiter)
+{
+ write_line_delim ();
+ *item_idx = 0;
+ *print_delimiter = false;
+ current_rp = frp;
+}
+
+static inline void
+write_pending_line_delim (uintmax_t item_idx)
+{
+ if (item_idx > 0)
+ write_line_delim ();
+}
+
+static inline void
+write_selected_item (bool *print_delimiter, bool range_start,
+ char const *buf, size_t n_bytes)
+{
+ if (output_delimiter_string != output_delimiter_default)
+ {
+ if (*print_delimiter && range_start)
+ write_bytes (output_delimiter_string, output_delimiter_length);
+ *print_delimiter = true;
+ }
+
+ write_bytes (buf, n_bytes);
+}
+
+static inline mcel_t
+mbfield_get_char (mbbuf_t *mbbuf, struct mbfield_parser *parser)
+{
+ return (parser->whitespace_delimited
+ ? mbbuf_get_saved_char (mbbuf, &parser->have_saved, &parser->saved_g)
+ : mbbuf_get_char (mbbuf));
+}
+
+static inline enum field_terminator
+mbfield_terminator (mbbuf_t *mbbuf, struct mbfield_parser *parser, mcel_t g,
+ bool *have_pending_line)
+{
+ if (g.ch == line_delim)
+ return FIELD_LINE_DELIMITER;
+
+ if (parser->whitespace_delimited)
+ return (c32issep (g.ch)
+ ? skip_whitespace_delim (mbbuf, &parser->have_saved,
+ &parser->saved_g, have_pending_line)
+ : FIELD_DATA);
+
+ return field_delim_eq (g) ? FIELD_DELIMITER : FIELD_DATA;
+}
+
+static inline void
+append_field_1_bytes (mbbuf_t *mbbuf, mcel_t g, size_t *n_bytes)
+{
+ if (field_1_bufsize - *n_bytes < g.len)
+ {
+ field_1_buffer = xpalloc (field_1_buffer, &field_1_bufsize,
+ g.len, -1, sizeof *field_1_buffer);
+ }
+
+ memcpy (field_1_buffer + *n_bytes, mbbuf_char_offset (mbbuf, g), g.len);
+ *n_bytes += g.len;
+}
+
+static enum field_terminator
+read_mb_field_to_buffer (mbbuf_t *mbbuf, struct mbfield_parser *parser,
+ bool *have_pending_line, size_t *n_bytes)
+{
+ while (true)
+ {
+ mcel_t g = mbfield_get_char (mbbuf, parser);
+ if (g.ch == MBBUF_EOF)
+ return FIELD_EOF;
+
+ *have_pending_line = true;
+
+ enum field_terminator terminator
+ = mbfield_terminator (mbbuf, parser, g, have_pending_line);
+ if (terminator != FIELD_DATA)
+ return terminator;
+
+ append_field_1_bytes (mbbuf, g, n_bytes);
+ }
+}
+
+static enum field_terminator
+scan_mb_field (mbbuf_t *mbbuf, struct mbfield_parser *parser,
+ bool *have_pending_line, bool write_field)
+{
+ while (true)
+ {
+ mcel_t g = mbfield_get_char (mbbuf, parser);
+ if (g.ch == MBBUF_EOF)
+ return FIELD_EOF;
+
+ *have_pending_line = true;
+
+ enum field_terminator terminator
+ = mbfield_terminator (mbbuf, parser, g, have_pending_line);
+ if (terminator != FIELD_DATA)
+ return terminator;
+
+ if (write_field)
+ write_bytes (mbbuf_char_offset (mbbuf, g), g.len);
+ }
+}
+
+static inline void
+reset_field_line (uintmax_t *field_idx, bool *found_any_selected_field,
+ bool *have_pending_line)
+{
+ *field_idx = 1;
+ current_rp = frp;
+ *found_any_selected_field = false;
+ *have_pending_line = false;
+}
+
/* Read from stream STREAM, printing to standard output any selected bytes. */
static void
@@ -335,20 +470,10 @@ cut_bytes (FILE *stream)
c = getc (stream);
if (c == line_delim)
- {
- if (putchar (c) < 0)
- write_error ();
- byte_idx = 0;
- print_delimiter = false;
- current_rp = frp;
- }
+ reset_item_line (&byte_idx, &print_delimiter);
else if (c == EOF)
{
- if (byte_idx > 0)
- {
- if (putchar (line_delim) < 0)
- write_error ();
- }
+ write_pending_line_delim (byte_idx);
break;
}
else
@@ -356,20 +481,9 @@ cut_bytes (FILE *stream)
next_item (&byte_idx);
if (print_kth (byte_idx))
{
- if (output_delimiter_string != output_delimiter_default)
- {
- if (print_delimiter && is_range_start_index (byte_idx))
- {
- if (fwrite (output_delimiter_string, sizeof (char),
- output_delimiter_length, stdout)
- != output_delimiter_length)
- write_error ();
- }
- print_delimiter = true;
- }
-
- if (putchar (c) < 0)
- write_error ();
+ char ch = c;
+ write_selected_item (&print_delimiter,
+ is_range_start_index (byte_idx), &ch, 1);
}
}
}
@@ -394,20 +508,10 @@ cut_bytes_no_split (FILE *stream)
mcel_t g = mbbuf_get_char (&mbbuf);
if (g.ch == line_delim)
- {
- if (putchar (line_delim) < 0)
- write_error ();
- byte_idx = 0;
- print_delimiter = false;
- current_rp = frp;
- }
+ reset_item_line (&byte_idx, &print_delimiter);
else if (g.ch == MBBUF_EOF)
{
- if (byte_idx > 0)
- {
- if (putchar (line_delim) < 0)
- write_error ();
- }
+ write_pending_line_delim (byte_idx);
break;
}
else
@@ -433,17 +537,8 @@ cut_bytes_no_split (FILE *stream)
}
if (seen_selected && suffix_selected)
- {
- if (output_delimiter_string != output_delimiter_default)
- {
- if (print_delimiter && first_selected_is_range_start)
- write_bytes (output_delimiter_string,
- output_delimiter_length);
- print_delimiter = true;
- }
-
- write_bytes (mbbuf_char_offset (&mbbuf, g), g.len);
- }
+ write_selected_item (&print_delimiter,first_selected_is_range_start,
+ mbbuf_char_offset (&mbbuf, g), g.len);
}
}
}
@@ -466,207 +561,40 @@ cut_characters (FILE *stream)
mcel_t g = mbbuf_get_char (&mbbuf);
if (g.ch == line_delim)
- {
- if (putchar (line_delim) < 0)
- write_error ();
- char_idx = 0;
- print_delimiter = false;
- current_rp = frp;
- }
+ reset_item_line (&char_idx, &print_delimiter);
else if (g.ch == MBBUF_EOF)
{
- if (char_idx > 0)
- {
- if (putchar (line_delim) < 0)
- write_error ();
- }
+ write_pending_line_delim (char_idx);
break;
}
else
{
next_item (&char_idx);
if (print_kth (char_idx))
- {
- if (output_delimiter_string != output_delimiter_default)
- {
- if (print_delimiter && is_range_start_index (char_idx))
- {
- if (fwrite (output_delimiter_string, sizeof (char),
- output_delimiter_length, stdout)
- != output_delimiter_length)
- write_error ();
- }
- print_delimiter = true;
- }
-
- if (fwrite (mbbuf_char_offset (&mbbuf, g), sizeof (char), g.len,
- stdout)
- != g.len)
- write_error ();
- }
+ write_selected_item (&print_delimiter,
+ is_range_start_index (char_idx),
+ mbbuf_char_offset (&mbbuf, g), g.len);
}
}
}
/* Read from STREAM, printing to standard output any selected fields,
- using a multibyte field delimiter. */
+ using a multibyte-aware field delimiter parser. */
static void
-cut_fields_mb (FILE *stream)
+cut_fields_mb_any (FILE *stream, bool whitespace_mode)
{
static char line_in[IO_BUFSIZE];
mbbuf_t mbbuf;
- uintmax_t field_idx = 1;
- bool found_any_selected_field = false;
- bool buffer_first_field;
- bool have_pending_line = false;
-
- current_rp = frp;
- mbbuf_init (&mbbuf, line_in, sizeof line_in, stream);
-
- buffer_first_field = (suppress_non_delimited ^ !print_kth (1));
-
- while (true)
+ struct mbfield_parser parser =
{
- if (field_idx == 1 && buffer_first_field)
- {
- size_t n_bytes = 0;
- enum field_terminator terminator;
-
- while (true)
- {
- mcel_t g = mbbuf_get_char (&mbbuf);
-
- if (g.ch == MBBUF_EOF)
- {
- if (n_bytes == 0)
- return;
- terminator = FIELD_EOF;
- break;
- }
-
- if (field_1_bufsize - n_bytes < g.len)
- {
- field_1_buffer = xpalloc (field_1_buffer, &field_1_bufsize,
- g.len, -1,
- sizeof *field_1_buffer);
- }
-
- memcpy (field_1_buffer + n_bytes, mbbuf_char_offset (&mbbuf, g),
- g.len);
- n_bytes += g.len;
- have_pending_line = true;
-
- if (g.ch == line_delim)
- {
- terminator = FIELD_LINE_DELIMITER;
- break;
- }
-
- if (field_delim_eq (g))
- {
- terminator = FIELD_DELIMITER;
- break;
- }
- }
-
- if (terminator != FIELD_DELIMITER)
- {
- if (!suppress_non_delimited)
- {
- write_bytes (field_1_buffer, n_bytes);
- if (terminator == FIELD_EOF)
- {
- if (putchar (line_delim) < 0)
- write_error ();
- }
- }
-
- if (terminator == FIELD_EOF)
- break;
-
- field_idx = 1;
- current_rp = frp;
- found_any_selected_field = false;
- have_pending_line = false;
- continue;
- }
-
- if (print_kth (1))
- {
- write_bytes (field_1_buffer, n_bytes - delim_length);
- found_any_selected_field = true;
- }
- next_item (&field_idx);
- }
-
- mcel_t g;
-
- if (print_kth (field_idx))
- {
- if (found_any_selected_field)
- write_bytes (output_delimiter_string, output_delimiter_length);
- found_any_selected_field = true;
-
- while (true)
- {
- g = mbbuf_get_char (&mbbuf);
- if (g.ch != MBBUF_EOF)
- have_pending_line = true;
- if (g.ch == MBBUF_EOF || g.ch == line_delim || field_delim_eq (g))
- break;
- write_bytes (mbbuf_char_offset (&mbbuf, g), g.len);
- }
- }
- else
- {
- while (true)
- {
- g = mbbuf_get_char (&mbbuf);
- if (g.ch != MBBUF_EOF)
- have_pending_line = true;
- if (g.ch == MBBUF_EOF || g.ch == line_delim || field_delim_eq (g))
- break;
- }
- }
-
- if (field_delim_eq (g))
- next_item (&field_idx);
- else if (g.ch == line_delim || g.ch == MBBUF_EOF)
- {
- if (g.ch == MBBUF_EOF && !have_pending_line)
- break;
- if (found_any_selected_field
- || !(suppress_non_delimited && field_idx == 1))
- {
- if (putchar (line_delim) < 0)
- write_error ();
- }
- if (g.ch == MBBUF_EOF)
- break;
-
- field_idx = 1;
- current_rp = frp;
- found_any_selected_field = false;
- have_pending_line = false;
- }
- }
-}
-
-/* Read from STREAM, printing to standard output any selected fields,
- using runs of whitespace as the field delimiter. */
-
-static void
-cut_fields_ws (FILE *stream)
-{
- static char line_in[IO_BUFSIZE];
- mbbuf_t mbbuf;
+ .whitespace_delimited = whitespace_mode,
+ .saved_g = { .ch = MBBUF_EOF }
+ };
uintmax_t field_idx = 1;
bool found_any_selected_field = false;
bool buffer_first_field;
bool have_pending_line = false;
- bool have_saved = false;
- mcel_t saved_g = { .ch = MBBUF_EOF };
current_rp = frp;
mbbuf_init (&mbbuf, line_in, sizeof line_in, stream);
@@ -678,71 +606,25 @@ cut_fields_ws (FILE *stream)
if (field_idx == 1 && buffer_first_field)
{
size_t n_bytes = 0;
- enum field_terminator terminator;
-
- while (true)
- {
- mcel_t g = mbbuf_get_saved_char (&mbbuf, &have_saved, &saved_g);
-
- if (g.ch == MBBUF_EOF)
- {
- if (n_bytes == 0)
- return;
- terminator = FIELD_EOF;
- break;
- }
-
- have_pending_line = true;
-
- if (g.ch == line_delim)
- {
- if (field_1_bufsize - n_bytes < g.len)
- field_1_buffer = xpalloc (field_1_buffer, &field_1_bufsize,
- g.len, -1,
- sizeof *field_1_buffer);
- memcpy (field_1_buffer + n_bytes,
- mbbuf_char_offset (&mbbuf, g), g.len);
- n_bytes += g.len;
- terminator = FIELD_LINE_DELIMITER;
- break;
- }
-
- if (c32issep (g.ch))
- {
- terminator = skip_whitespace_delim (&mbbuf, &have_saved,
- &saved_g,
- &have_pending_line);
- break;
- }
-
- if (field_1_bufsize - n_bytes < g.len)
- field_1_buffer = xpalloc (field_1_buffer, &field_1_bufsize,
- g.len, -1,
- sizeof *field_1_buffer);
- memcpy (field_1_buffer + n_bytes, mbbuf_char_offset (&mbbuf, g),
- g.len);
- n_bytes += g.len;
- }
+ enum field_terminator terminator
+ = read_mb_field_to_buffer (&mbbuf, &parser, &have_pending_line,
+ &n_bytes);
+ if (terminator == FIELD_EOF && n_bytes == 0)
+ return;
if (terminator != FIELD_DELIMITER)
{
if (!suppress_non_delimited)
{
write_bytes (field_1_buffer, n_bytes);
- if (terminator == FIELD_EOF)
- {
- if (putchar (line_delim) < 0)
- write_error ();
- }
+ write_line_delim ();
}
if (terminator == FIELD_EOF)
break;
- field_idx = 1;
- current_rp = frp;
- found_any_selected_field = false;
- have_pending_line = false;
+ reset_field_line (&field_idx, &found_any_selected_field,
+ &have_pending_line);
continue;
}
@@ -755,71 +637,17 @@ cut_fields_ws (FILE *stream)
}
enum field_terminator terminator;
+ bool write_field = print_kth (field_idx);
- if (print_kth (field_idx))
+ if (write_field)
{
if (found_any_selected_field)
write_bytes (output_delimiter_string, output_delimiter_length);
found_any_selected_field = true;
-
- while (true)
- {
- mcel_t g = mbbuf_get_saved_char (&mbbuf, &have_saved, &saved_g);
-
- if (g.ch == MBBUF_EOF)
- {
- terminator = FIELD_EOF;
- break;
- }
-
- have_pending_line = true;
-
- if (g.ch == line_delim)
- {
- terminator = FIELD_LINE_DELIMITER;
- break;
- }
-
- if (c32issep (g.ch))
- {
- terminator = skip_whitespace_delim (&mbbuf, &have_saved,
- &saved_g,
- &have_pending_line);
- break;
- }
-
- write_bytes (mbbuf_char_offset (&mbbuf, g), g.len);
- }
}
- else
- {
- while (true)
- {
- mcel_t g = mbbuf_get_saved_char (&mbbuf, &have_saved, &saved_g);
- if (g.ch == MBBUF_EOF)
- {
- terminator = FIELD_EOF;
- break;
- }
-
- have_pending_line = true;
-
- if (g.ch == line_delim)
- {
- terminator = FIELD_LINE_DELIMITER;
- break;
- }
-
- if (c32issep (g.ch))
- {
- terminator = skip_whitespace_delim (&mbbuf, &have_saved,
- &saved_g,
- &have_pending_line);
- break;
- }
- }
- }
+ terminator = scan_mb_field (&mbbuf, &parser, &have_pending_line,
+ write_field);
if (terminator == FIELD_DELIMITER)
next_item (&field_idx);
@@ -829,21 +657,34 @@ cut_fields_ws (FILE *stream)
break;
if (found_any_selected_field
|| !(suppress_non_delimited && field_idx == 1))
- {
- if (putchar (line_delim) < 0)
- write_error ();
- }
+ write_line_delim ();
if (terminator == FIELD_EOF)
break;
- field_idx = 1;
- current_rp = frp;
- found_any_selected_field = false;
- have_pending_line = false;
+ reset_field_line (&field_idx, &found_any_selected_field,
+ &have_pending_line);
}
}
}
+/* Read from STREAM, printing to standard output any selected fields,
+ using a multibyte field delimiter. */
+
+static void
+cut_fields_mb (FILE *stream)
+{
+ cut_fields_mb_any (stream, false);
+}
+
+/* Read from STREAM, printing to standard output any selected fields,
+ using runs of whitespace as the field delimiter. */
+
+static void
+cut_fields_ws (FILE *stream)
+{
+ cut_fields_mb_any (stream, true);
+}
+
/* Read from stream STREAM, printing to standard output any selected fields. */
static void