diff options
| author | Pádraig Brady <P@draigBrady.com> | 2026-03-09 22:23:12 +0000 |
|---|---|---|
| committer | Pádraig Brady <P@draigBrady.com> | 2026-03-10 16:30:52 +0000 |
| commit | 5ca27c1929d41bee3740add87cce459fb969ef97 (patch) | |
| tree | c2332ae50e5ac9b0b031422f38d64831c8a038e8 /src | |
| parent | 3ef107fa1296b7d32182a4a3b36767ea963a1450 (diff) | |
| download | coreutils-5ca27c1929d41bee3740add87cce459fb969ef97.tar.gz coreutils-5ca27c1929d41bee3740add87cce459fb969ef97.zip | |
all: use more consistent blank character determination
* src/system.h (c32issep): A new function that is essentially
iswblank() on GLIBC platforms, and iswspace() with exceptions elsewhere.
* src/expand.c: Use it instead of c32isblank().
* src/fold.c: Likewise.
* src/join.c: Likewise.
* src/numfmt.c: Likewise.
* src/unexpand.c: Likewise.
* src/uniq.c: Likewise.
* NEWS: Mention the improvement.
Diffstat (limited to 'src')
| -rw-r--r-- | src/expand.c | 3 | ||||
| -rw-r--r-- | src/fold.c | 2 | ||||
| -rw-r--r-- | src/join.c | 4 | ||||
| -rw-r--r-- | src/numfmt.c | 5 | ||||
| -rw-r--r-- | src/system.h | 23 | ||||
| -rw-r--r-- | src/unexpand.c | 2 | ||||
| -rw-r--r-- | src/uniq.c | 2 |
7 files changed, 31 insertions, 10 deletions
diff --git a/src/expand.c b/src/expand.c index 6d4223c9b..1d0759079 100644 --- a/src/expand.c +++ b/src/expand.c @@ -140,8 +140,7 @@ expand (void) if (convert) { - convert &= convert_entire_line - || !! (c32isblank (g.ch) && ! c32isnbspace (g.ch)); + convert &= convert_entire_line || c32issep (g.ch); if (g.ch == '\t') { diff --git a/src/fold.c b/src/fold.c index 666490f95..f49078f01 100644 --- a/src/fold.c +++ b/src/fold.c @@ -210,7 +210,7 @@ fold_file (char const *filename, size_t width) for (mcel_t g2; logical_p < logical_lim; logical_p += g2.len) { g2 = mcel_scan (logical_p, logical_lim); - if (c32isblank (g2.ch) && ! c32isnbspace (g2.ch)) + if (c32issep (g2.ch)) { space_length = g2.len; logical_end = logical_p - line_out; diff --git a/src/join.c b/src/join.c index 883a42005..4346758a6 100644 --- a/src/join.c +++ b/src/join.c @@ -308,7 +308,7 @@ eq_tab (mcel_t g) static bool newline_or_blank (mcel_t g) { - return g.ch == '\n' || c32isblank (g.ch); + return g.ch == '\n' || c32issep (g.ch); } /* Fill in the 'fields' structure in LINE. */ @@ -918,7 +918,7 @@ decode_field_spec (char const *s, int *file_index, idx_t *field_index) static bool comma_or_blank (mcel_t g) { - return g.ch == ',' || c32isblank (g.ch); + return g.ch == ',' || c32issep (g.ch); } /* Add the comma or blank separated field spec(s) in STR to 'outlist'. */ diff --git a/src/numfmt.c b/src/numfmt.c index fb6cb3396..2436c5487 100644 --- a/src/numfmt.c +++ b/src/numfmt.c @@ -215,8 +215,7 @@ static bool dev_debug = false; static bool newline_or_blank (mcel_t g) { - return g.ch == '\n' - || (c32isblank (g.ch) && ! c32isnbspace (g.ch)); + return g.ch == '\n' || c32issep (g.ch); } static inline int @@ -673,7 +672,7 @@ simple_strtod_human (char const *input_str, if (!matched_unit_sep) { mcel_t g = mcel_scanz (*endptr); - if (c32isblank (g.ch) || c32isnbspace (g.ch)) + if (c32issep (g.ch) || c32isnbspace (g.ch)) (*endptr) += g.len; } diff --git a/src/system.h b/src/system.h index 988c7cd9e..79b3e6069 100644 --- a/src/system.h +++ b/src/system.h @@ -160,6 +160,29 @@ c32isnbspace (char32_t wc) return wc == 0x00A0 || wc == 0x2007 || wc == 0x202F || wc == 0x2060; } +ATTRIBUTE_PURE +static inline int +c32isvertspace (char32_t wc) +{ + return wc == 0x000A || wc == 0x000B || wc == 0x000C || wc == 0x000D + || wc == 0x2028 || wc == 0x2029; +} + + +/* c32isblank() is too variable on non GLIBC platforms. + E.g., does not include \u3000 ideographic space on musl. + E.g., does include non-breaking space on Solaris and NetBSD. + This equivalent is more consistent across systems. */ +ATTRIBUTE_PURE +static inline bool +c32issep (char32_t wc) +{ +#if defined __GLIBC__ + return !! c32isblank (wc); +#endif + return !! (c32isspace (wc) && ! c32isvertspace (wc) && ! c32isnbspace (wc)); +} + #include <locale.h> /* Take care of NLS matters. */ diff --git a/src/unexpand.c b/src/unexpand.c index 16d0f0031..4fbf9d3f8 100644 --- a/src/unexpand.c +++ b/src/unexpand.c @@ -176,7 +176,7 @@ unexpand (void) if (convert) { - bool blank = !! (c32isblank (g.ch) && ! c32isnbspace (g.ch)); + bool blank = c32issep (g.ch); if (blank) { diff --git a/src/uniq.c b/src/uniq.c index eebff4b7b..30463598a 100644 --- a/src/uniq.c +++ b/src/uniq.c @@ -254,7 +254,7 @@ size_opt (char const *opt, char const *msgid) static bool newline_or_blank (mcel_t g) { - return g.ch == '\n' || c32isblank (g.ch); + return g.ch == '\n' || c32issep (g.ch); } /* Given a linebuffer LINE, |
