summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorPádraig Brady <P@draigBrady.com>2026-04-02 21:56:23 +0100
committerPádraig Brady <P@draigBrady.com>2026-04-06 15:52:58 +0100
commit1a44a25808f9bda727ffbda753ff2eeab3bf79cc (patch)
tree08a40465d92b7a3f47ebd02b46cb043758769da1 /src
parent57c87043f6cdb6aeb043b78d607a43a9ae615430 (diff)
downloadcoreutils-1a44a25808f9bda727ffbda753ff2eeab3bf79cc.tar.gz
coreutils-1a44a25808f9bda727ffbda753ff2eeab3bf79cc.zip
cut: -f: fix handling of multi-byte delimiters that span buffers
* src/cut.c (cut_fields_bytesearch): Ensure up to delim_bytes -1 is left for the next refill. * tests/cut/cut.pl: Add a test case.
Diffstat (limited to 'src')
-rw-r--r--src/cut.c25
1 files changed, 25 insertions, 0 deletions
diff --git a/src/cut.c b/src/cut.c
index 80e34cc09..b11a8c4e5 100644
--- a/src/cut.c
+++ b/src/cut.c
@@ -628,6 +628,25 @@ find_field_delim (char *buf, size_t len)
#endif
}
+/* Return the number of trailing bytes in BUF that could be the initial
+ bytes of a delimiter split across buffers. */
+
+ATTRIBUTE_PURE
+static idx_t
+field_delim_overlap (char const *buf, idx_t len)
+{
+ idx_t overlap = MIN (len, delim_length - 1);
+
+ while (0 < overlap)
+ {
+ if (memcmp (buf + len - overlap, delim_bytes, overlap) == 0)
+ return overlap;
+ overlap--;
+ }
+
+ return 0;
+}
+
/* Byte search for line end or delimiter in BUF,
returning results in CTX. */
@@ -1142,6 +1161,12 @@ cut_fields_bytesearch (FILE *stream)
idx_t field_len = terminator ? terminator - (chunk + processed)
: n_avail - processed;
+ if (terminator_kind == FIELD_DATA
+ && !search.at_eof
+ && !whitespace_delimited
+ && !field_delim_is_line_delim ())
+ field_len -= field_delim_overlap (chunk + processed, field_len);
+
if (field_len || terminator)
have_pending_line = true;