diff options
Diffstat (limited to 'tests/paste')
| -rwxr-xr-x | tests/paste/multi-byte.sh | 103 | ||||
| -rwxr-xr-x | tests/paste/paste.pl | 89 |
2 files changed, 192 insertions, 0 deletions
diff --git a/tests/paste/multi-byte.sh b/tests/paste/multi-byte.sh new file mode 100755 index 000000000..d0749d47d --- /dev/null +++ b/tests/paste/multi-byte.sh @@ -0,0 +1,103 @@ +#!/bin/sh +# Test multi-byte delimiter handling in paste + +# Copyright (C) 2026 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src +print_ver_ paste printf + +test "$LOCALE_FR_UTF8" != none || skip_ 'French UTF-8 locale not available' + +LC_ALL=$LOCALE_FR_UTF8 +export LC_ALL + +# UTF-8 test: 2-byte character (e.g., cent sign) +delim_cent=$(env printf '\xc2\xa2') +# UTF-8 test: 3-byte character (e.g., euro sign) +delim_euro=$(env printf '\xe2\x82\xac') +# UTF-8 test: 4-byte character (e.g., emoji: U+1F600) +delim_emoji=$(env printf '\xf0\x9f\x98\x80') + +printf '1\n2\n' > f1 || framework_failure_ +printf 'a\nb\n' > f2 || framework_failure_ + +# Test parallel mode with multi-byte delimiters +for delim in "$delim_cent" "$delim_euro" "$delim_emoji"; do + paste -d "$delim" f1 f2 > out || fail=1 + printf "1${delim}a\n2${delim}b\n" > exp || framework_failure_ + compare exp out || fail=1 +done + +# Test serial mode with multi-byte delimiters +printf '1\n2\n3\n' > f3 || framework_failure_ +for delim in "$delim_cent" "$delim_euro"; do + paste -s -d "$delim" f3 > out || fail=1 + printf "1${delim}2${delim}3\n" > exp || framework_failure_ + compare exp out || fail=1 +done + +# Test multiple multi-byte delimiters cycling +printf 'a\nb\nc\n' > f4 || framework_failure_ +printf '1\n2\n3\n' > f5 || framework_failure_ +printf 'x\ny\nz\n' > f6 || framework_failure_ +paste -d "${delim_cent}${delim_euro}" f4 f5 f6 > out || fail=1 +printf "a${delim_cent}1${delim_euro}x\n" > exp || framework_failure_ +printf "b${delim_cent}2${delim_euro}y\n" >> exp || framework_failure_ +printf "c${delim_cent}3${delim_euro}z\n" >> exp || framework_failure_ +compare exp out || fail=1 + +# Test multi-byte delimiters mixed with empty delimiter (\0) +paste -s -d "${delim_euro}\\0" f3 > out || fail=1 +printf "1${delim_euro}23\n" > exp || framework_failure_ +compare exp out || fail=1 + +# Test invalid UTF-8 sequences are still passed through +delims_invalid=$(bad_unicode) +delim_invalid=$(env printf '%s' "$delims_invalid" | cut -b1) +paste -d "$delims_invalid" f1 f2 > out || fail=1 +printf "1${delim_invalid}a\n2${delim_invalid}b\n" > exp || framework_failure_ +compare exp out || fail=1 + +# Test that \<multi-byte char> is treated like <multi-byte char> +# (unknown escapes pass through the escaped character) +paste -d "\\${delim_euro}" f1 f2 > out || fail=1 +paste -d "$delim_euro" f1 f2 > exp || fail=1 +compare exp out || fail=1 + + +# Test GB18030 encoding if available +export LC_ALL=zh_CN.gb18030 + +if test "$(locale charmap 2>/dev/null | sed 's/gb/GB/')" = GB18030; then + # GB18030 2-byte character (e.g., 0xA2 0xE3 is a valid GB18030 char) + delim_gb18030=$(env printf '\xa2\xe3') + + paste -d "$delim_gb18030" f1 f2 > out || fail=1 + printf "1${delim_gb18030}a\n2${delim_gb18030}b\n" > exp || framework_failure_ + compare exp out || fail=1 + + paste -s -d "$delim_gb18030" f3 > out || fail=1 + printf "1${delim_gb18030}2${delim_gb18030}3\n" > exp || framework_failure_ + compare exp out || fail=1 + + # Note 0xFF is invalid in GB18030, but we support all single byte delimiters + delim_ff=$(env printf '\xff') + paste -d "$delim_ff" f1 f2 > out || fail=1 + printf "1${delim_ff}a\n2${delim_ff}b\n" > exp || framework_failure_ + compare exp out || fail=1 +fi + +Exit $fail diff --git a/tests/paste/paste.pl b/tests/paste/paste.pl new file mode 100755 index 000000000..c890a2a56 --- /dev/null +++ b/tests/paste/paste.pl @@ -0,0 +1,89 @@ +#!/usr/bin/perl +# Test paste. + +# Copyright (C) 2003-2026 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +use strict; + +(my $program_name = $0) =~ s|.*/||; + +# Turn off localization of executable's output. +@ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3; + +my $prog = 'paste'; +my $msg = "$prog: delimiter list ends with an unescaped backslash: "; + +my @Tests = + ( + # Ensure that paste properly handles files lacking a final newline. + ['no-nl-1', {IN=>"a"}, {IN=>"b"}, {OUT=>"a\tb\n"}], + ['no-nl-2', {IN=>"a\n"}, {IN=>"b"}, {OUT=>"a\tb\n"}], + ['no-nl-3', {IN=>"a"}, {IN=>"b\n"}, {OUT=>"a\tb\n"}], + ['no-nl-4', {IN=>"a\n"}, {IN=>"b\n"}, {OUT=>"a\tb\n"}], + + ['zno-nl-1', '-z', {IN=>"a"}, {IN=>"b"}, {OUT=>"a\tb\0"}], + ['zno-nl-2', '-z', {IN=>"a\0"}, {IN=>"b"}, {OUT=>"a\tb\0"}], + ['zno-nl-3', '-z', {IN=>"a"}, {IN=>"b\0"}, {OUT=>"a\tb\0"}], + ['zno-nl-4', '-z', {IN=>"a\0"}, {IN=>"b\0"}, {OUT=>"a\tb\0"}], + + # Same as above, but with a two lines in each input file and + # the addition of the -d option to make SPACE be the output delimiter. + ['no-nla1', '-d" "', {IN=>"1\na"}, {IN=>"2\nb"}, {OUT=>"1 2\na b\n"}], + ['no-nla2', '-d" "', {IN=>"1\na\n"}, {IN=>"2\nb"}, {OUT=>"1 2\na b\n"}], + ['no-nla3', '-d" "', {IN=>"1\na"}, {IN=>"2\nb\n"}, {OUT=>"1 2\na b\n"}], + ['no-nla4', '-d" "', {IN=>"1\na\n"}, {IN=>"2\nb\n"}, {OUT=>"1 2\na b\n"}], + + ['zno-nla1', '-zd" "', {IN=>"1\0a"}, {IN=>"2\0b"}, {OUT=>"1 2\0a b\0"}], + ['zno-nla2', '-zd" "', {IN=>"1\0a\0"}, {IN=>"2\0b"}, {OUT=>"1 2\0a b\0"}], + ['zno-nla3', '-zd" "', {IN=>"1\0a"}, {IN=>"2\0b\0"}, {OUT=>"1 2\0a b\0"}], + ['zno-nla4', '-zd" "', {IN=>"1\0a\0"}, {IN=>"2\0b\0"}, {OUT=>"1 2\0a b\0"}], + + # Specifying a delimiter with a trailing backslash would overrun a + # malloc'd buffer. + ['delim-bs1', q!-d'\'!, {IN=>{'a'x50=>''}}, {EXIT => 1}, + # We print a single backslash into the expected output + {ERR => $msg . q!\\! . "\n"} ], + + # Prior to coreutils-5.1.2, this sort of abuse would make paste + # scribble on command-line arguments. With paste from coreutils-5.1.0, + # this example would mangle the first file name argument, if it contains + # accepted backslash-escapes: + # $ paste -d\\ '123\b\b\b.....@' 2>&1 |cat -A + # paste: 23^H^H^H.....@...@: No such file or directory$ + ['delim-bs2', q!-d'\'!, {IN=>{'123\b\b\b.....@'=>''}}, {EXIT => 1}, + {ERR => $msg . q!\\! . "\n"} ], + + # \0 allows cycling through an empty delimiter (while "-d ''" does not) + ['delim-empty-1', q{-s -d '\0,'}, {IN=>"1\n2\n3\n"}, {OUT=>"12,3\n"}], + + # POSIX escapes + ['delim-esc-0', q{-s -d '\0'}, {IN=>"1\n2\n"}, {OUT=>"12\n"}], + ['delim-esc-n', q{-s -d '\n'}, {IN=>"1\n2\n"}, {OUT=>"1\n2\n"}], + ['delim-esc-t', q{-s -d '\t'}, {IN=>"1\n2\n"}, {OUT=>"1\t2\n"}], + ['delim-esc-s', q{-s -d '\\\\'}, {IN=>"1\n2\n"}, {OUT=>"1\\2\n"}], + # GNU escapes + ['delim-esc-b', q{-s -d '\b'}, {IN=>"1\n2\n"}, {OUT=>"1\b2\n"}], + ['delim-esc-f', q{-s -d '\f'}, {IN=>"1\n2\n"}, {OUT=>"1\f2\n"}], + ['delim-esc-r', q{-s -d '\r'}, {IN=>"1\n2\n"}, {OUT=>"1\r2\n"}], + ['delim-esc-v', q{-s -d '\v'}, {IN=>"1\n2\n"}, {OUT=>"1\0132\n"}], + ['delim-esc-foo', q{-s -d '\q'}, {IN=>"1\n2\n"}, {OUT=>"1q2\n"}], + ); + +my $save_temps = $ENV{DEBUG}; +my $verbose = $ENV{VERBOSE}; + +my $fail = run_tests ($program_name, $prog, \@Tests, $save_temps, $verbose); +exit $fail; |
