diff options
| author | Pádraig Brady <P@draigBrady.com> | 2026-04-02 21:56:23 +0100 |
|---|---|---|
| committer | Pádraig Brady <P@draigBrady.com> | 2026-04-06 15:52:58 +0100 |
| commit | 1a44a25808f9bda727ffbda753ff2eeab3bf79cc (patch) | |
| tree | 08a40465d92b7a3f47ebd02b46cb043758769da1 /src | |
| parent | 57c87043f6cdb6aeb043b78d607a43a9ae615430 (diff) | |
| download | coreutils-1a44a25808f9bda727ffbda753ff2eeab3bf79cc.tar.gz coreutils-1a44a25808f9bda727ffbda753ff2eeab3bf79cc.zip | |
cut: -f: fix handling of multi-byte delimiters that span buffers
* src/cut.c (cut_fields_bytesearch): Ensure up to delim_bytes -1
is left for the next refill.
* tests/cut/cut.pl: Add a test case.
Diffstat (limited to 'src')
| -rw-r--r-- | src/cut.c | 25 |
1 files changed, 25 insertions, 0 deletions
@@ -628,6 +628,25 @@ find_field_delim (char *buf, size_t len) #endif } +/* Return the number of trailing bytes in BUF that could be the initial + bytes of a delimiter split across buffers. */ + +ATTRIBUTE_PURE +static idx_t +field_delim_overlap (char const *buf, idx_t len) +{ + idx_t overlap = MIN (len, delim_length - 1); + + while (0 < overlap) + { + if (memcmp (buf + len - overlap, delim_bytes, overlap) == 0) + return overlap; + overlap--; + } + + return 0; +} + /* Byte search for line end or delimiter in BUF, returning results in CTX. */ @@ -1142,6 +1161,12 @@ cut_fields_bytesearch (FILE *stream) idx_t field_len = terminator ? terminator - (chunk + processed) : n_avail - processed; + if (terminator_kind == FIELD_DATA + && !search.at_eof + && !whitespace_delimited + && !field_delim_is_line_delim ()) + field_len -= field_delim_overlap (chunk + processed, field_len); + if (field_len || terminator) have_pending_line = true; |
