diff options
| author | Pádraig Brady <P@draigBrady.com> | 2025-10-18 17:44:49 +0100 |
|---|---|---|
| committer | Pádraig Brady <P@draigBrady.com> | 2025-10-20 13:13:24 +0100 |
| commit | 09af58f4f96daf9ac469742fb7d061dab55cdf14 (patch) | |
| tree | 65f44c5e1b7228878fefaf894ede818168b46a74 /src | |
| parent | numfmt: use multi-byte aware suffix matching (diff) | |
| download | coreutils-09af58f4f96daf9ac469742fb7d061dab55cdf14.tar.gz coreutils-09af58f4f96daf9ac469742fb7d061dab55cdf14.zip | |
numfmt: support multi-byte --delimiter
* bootstrap.conf: Depend on mbsstr() to robustly search for a
multi-byte delimiter character (string) within a multi-byte string.
* src/numfmt.c (main): Accept a valid multi-byte delimiter character.
(next_field): Adjust delimiter search from single byte
to multi-byte aware. Use mbsstr to find the first match.
* tests/misc/numfmt.pl: Add test case.
* NEWS: Mention the improvement.
Diffstat (limited to 'src')
| -rw-r--r-- | src/numfmt.c | 46 |
1 files changed, 24 insertions, 22 deletions
diff --git a/src/numfmt.c b/src/numfmt.c index cc80ccc5d..0f0a8770b 100644 --- a/src/numfmt.c +++ b/src/numfmt.c @@ -156,9 +156,6 @@ static struct option const longopts[] = {nullptr, 0, nullptr, 0} }; -/* If delimiter has this value, blanks separate fields. */ -enum { DELIMITER_DEFAULT = CHAR_MAX + 1 }; - /* Maximum number of digits we can safely handle without precision loss, if scaling is 'none'. */ enum { MAX_UNSCALED_DIGITS = LDBL_DIG }; @@ -194,8 +191,8 @@ static int conv_exit_code = EXIT_CONVERSION_WARNINGS; /* auto-pad each line based on skipped whitespace. */ static int auto_padding = 0; -/* field delimiter */ -static int delimiter = DELIMITER_DEFAULT; +/* field delimiter - if nullptr, blanks separate fields. */ +static char const *delimiter = nullptr; /* line delimiter. */ static unsigned char line_delim = '\n'; @@ -1374,14 +1371,10 @@ next_field (char **line) char *field_start = *line; char *field_end = field_start; - if (delimiter != DELIMITER_DEFAULT) + if (delimiter) { - if (*field_start != delimiter) - { - while (*field_end && *field_end != delimiter) - ++field_end; - } - /* else empty field */ + if (! *delimiter || ! (field_end = mbsstr (field_start, delimiter))) + field_end = strchr (field_start, '\0'); } else { @@ -1462,11 +1455,13 @@ process_line (char *line, bool newline) if (! process_field (next, field)) valid_number = false; - fputc ((delimiter == DELIMITER_DEFAULT) ? - ' ' : delimiter, stdout); + if (delimiter != nullptr) + fputs (delimiter, stdout); + else + fputc (' ', stdout); - if (delimiter != DELIMITER_DEFAULT) - line++; + if (delimiter) + line += MAX (strlen (delimiter), 1); else { *line = end_field; @@ -1573,10 +1568,17 @@ main (int argc, char **argv) case 'd': /* Interpret -d '' to mean 'use the NUL byte as the delimiter.' */ - if (optarg[0] != '\0' && optarg[1] != '\0') - error (EXIT_FAILURE, 0, - _("the delimiter must be a single character")); - delimiter = optarg[0]; + if (optarg[0] != '\0') + { + mcel_t g = mcel_scanz (optarg); + /* Note we always allow single bytes, especially since mcel + explicitly does not avoid https://sourceware.org/PR29511 + I.e., we ignore g.err, and rely on g.len==1 with g.err. */ + if (optarg[g.len] != '\0') + error (EXIT_FAILURE, 0, + _("the delimiter must be a single character")); + } + delimiter = optarg; break; case 'z': @@ -1642,7 +1644,7 @@ main (int argc, char **argv) && !grouping && (padding_width == 0) && (format_str == nullptr)) error (0, 0, _("no conversion option specified")); - if (debug && unit_separator && delimiter == DELIMITER_DEFAULT) + if (debug && unit_separator && delimiter == nullptr) error (0, 0, _("field delimiters have higher precedence than unit separators")); @@ -1657,7 +1659,7 @@ main (int argc, char **argv) error (0, 0, _("grouping has no effect in this locale")); } - auto_padding = (padding_width == 0 && delimiter == DELIMITER_DEFAULT); + auto_padding = (padding_width == 0 && delimiter == nullptr); if (inval_style != inval_abort) conv_exit_code = 0; |
