summaryrefslogtreecommitdiffstats
path: root/tests/paste
diff options
context:
space:
mode:
Diffstat (limited to 'tests/paste')
-rwxr-xr-xtests/paste/multi-byte.sh103
-rwxr-xr-xtests/paste/paste.pl89
2 files changed, 192 insertions, 0 deletions
diff --git a/tests/paste/multi-byte.sh b/tests/paste/multi-byte.sh
new file mode 100755
index 000000000..d0749d47d
--- /dev/null
+++ b/tests/paste/multi-byte.sh
@@ -0,0 +1,103 @@
+#!/bin/sh
+# Test multi-byte delimiter handling in paste
+
+# Copyright (C) 2026 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src
+print_ver_ paste printf
+
+test "$LOCALE_FR_UTF8" != none || skip_ 'French UTF-8 locale not available'
+
+LC_ALL=$LOCALE_FR_UTF8
+export LC_ALL
+
+# UTF-8 test: 2-byte character (e.g., cent sign)
+delim_cent=$(env printf '\xc2\xa2')
+# UTF-8 test: 3-byte character (e.g., euro sign)
+delim_euro=$(env printf '\xe2\x82\xac')
+# UTF-8 test: 4-byte character (e.g., emoji: U+1F600)
+delim_emoji=$(env printf '\xf0\x9f\x98\x80')
+
+printf '1\n2\n' > f1 || framework_failure_
+printf 'a\nb\n' > f2 || framework_failure_
+
+# Test parallel mode with multi-byte delimiters
+for delim in "$delim_cent" "$delim_euro" "$delim_emoji"; do
+ paste -d "$delim" f1 f2 > out || fail=1
+ printf "1${delim}a\n2${delim}b\n" > exp || framework_failure_
+ compare exp out || fail=1
+done
+
+# Test serial mode with multi-byte delimiters
+printf '1\n2\n3\n' > f3 || framework_failure_
+for delim in "$delim_cent" "$delim_euro"; do
+ paste -s -d "$delim" f3 > out || fail=1
+ printf "1${delim}2${delim}3\n" > exp || framework_failure_
+ compare exp out || fail=1
+done
+
+# Test multiple multi-byte delimiters cycling
+printf 'a\nb\nc\n' > f4 || framework_failure_
+printf '1\n2\n3\n' > f5 || framework_failure_
+printf 'x\ny\nz\n' > f6 || framework_failure_
+paste -d "${delim_cent}${delim_euro}" f4 f5 f6 > out || fail=1
+printf "a${delim_cent}1${delim_euro}x\n" > exp || framework_failure_
+printf "b${delim_cent}2${delim_euro}y\n" >> exp || framework_failure_
+printf "c${delim_cent}3${delim_euro}z\n" >> exp || framework_failure_
+compare exp out || fail=1
+
+# Test multi-byte delimiters mixed with empty delimiter (\0)
+paste -s -d "${delim_euro}\\0" f3 > out || fail=1
+printf "1${delim_euro}23\n" > exp || framework_failure_
+compare exp out || fail=1
+
+# Test invalid UTF-8 sequences are still passed through
+delims_invalid=$(bad_unicode)
+delim_invalid=$(env printf '%s' "$delims_invalid" | cut -b1)
+paste -d "$delims_invalid" f1 f2 > out || fail=1
+printf "1${delim_invalid}a\n2${delim_invalid}b\n" > exp || framework_failure_
+compare exp out || fail=1
+
+# Test that \<multi-byte char> is treated like <multi-byte char>
+# (unknown escapes pass through the escaped character)
+paste -d "\\${delim_euro}" f1 f2 > out || fail=1
+paste -d "$delim_euro" f1 f2 > exp || fail=1
+compare exp out || fail=1
+
+
+# Test GB18030 encoding if available
+export LC_ALL=zh_CN.gb18030
+
+if test "$(locale charmap 2>/dev/null | sed 's/gb/GB/')" = GB18030; then
+ # GB18030 2-byte character (e.g., 0xA2 0xE3 is a valid GB18030 char)
+ delim_gb18030=$(env printf '\xa2\xe3')
+
+ paste -d "$delim_gb18030" f1 f2 > out || fail=1
+ printf "1${delim_gb18030}a\n2${delim_gb18030}b\n" > exp || framework_failure_
+ compare exp out || fail=1
+
+ paste -s -d "$delim_gb18030" f3 > out || fail=1
+ printf "1${delim_gb18030}2${delim_gb18030}3\n" > exp || framework_failure_
+ compare exp out || fail=1
+
+ # Note 0xFF is invalid in GB18030, but we support all single byte delimiters
+ delim_ff=$(env printf '\xff')
+ paste -d "$delim_ff" f1 f2 > out || fail=1
+ printf "1${delim_ff}a\n2${delim_ff}b\n" > exp || framework_failure_
+ compare exp out || fail=1
+fi
+
+Exit $fail
diff --git a/tests/paste/paste.pl b/tests/paste/paste.pl
new file mode 100755
index 000000000..c890a2a56
--- /dev/null
+++ b/tests/paste/paste.pl
@@ -0,0 +1,89 @@
+#!/usr/bin/perl
+# Test paste.
+
+# Copyright (C) 2003-2026 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+use strict;
+
+(my $program_name = $0) =~ s|.*/||;
+
+# Turn off localization of executable's output.
+@ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3;
+
+my $prog = 'paste';
+my $msg = "$prog: delimiter list ends with an unescaped backslash: ";
+
+my @Tests =
+ (
+ # Ensure that paste properly handles files lacking a final newline.
+ ['no-nl-1', {IN=>"a"}, {IN=>"b"}, {OUT=>"a\tb\n"}],
+ ['no-nl-2', {IN=>"a\n"}, {IN=>"b"}, {OUT=>"a\tb\n"}],
+ ['no-nl-3', {IN=>"a"}, {IN=>"b\n"}, {OUT=>"a\tb\n"}],
+ ['no-nl-4', {IN=>"a\n"}, {IN=>"b\n"}, {OUT=>"a\tb\n"}],
+
+ ['zno-nl-1', '-z', {IN=>"a"}, {IN=>"b"}, {OUT=>"a\tb\0"}],
+ ['zno-nl-2', '-z', {IN=>"a\0"}, {IN=>"b"}, {OUT=>"a\tb\0"}],
+ ['zno-nl-3', '-z', {IN=>"a"}, {IN=>"b\0"}, {OUT=>"a\tb\0"}],
+ ['zno-nl-4', '-z', {IN=>"a\0"}, {IN=>"b\0"}, {OUT=>"a\tb\0"}],
+
+ # Same as above, but with a two lines in each input file and
+ # the addition of the -d option to make SPACE be the output delimiter.
+ ['no-nla1', '-d" "', {IN=>"1\na"}, {IN=>"2\nb"}, {OUT=>"1 2\na b\n"}],
+ ['no-nla2', '-d" "', {IN=>"1\na\n"}, {IN=>"2\nb"}, {OUT=>"1 2\na b\n"}],
+ ['no-nla3', '-d" "', {IN=>"1\na"}, {IN=>"2\nb\n"}, {OUT=>"1 2\na b\n"}],
+ ['no-nla4', '-d" "', {IN=>"1\na\n"}, {IN=>"2\nb\n"}, {OUT=>"1 2\na b\n"}],
+
+ ['zno-nla1', '-zd" "', {IN=>"1\0a"}, {IN=>"2\0b"}, {OUT=>"1 2\0a b\0"}],
+ ['zno-nla2', '-zd" "', {IN=>"1\0a\0"}, {IN=>"2\0b"}, {OUT=>"1 2\0a b\0"}],
+ ['zno-nla3', '-zd" "', {IN=>"1\0a"}, {IN=>"2\0b\0"}, {OUT=>"1 2\0a b\0"}],
+ ['zno-nla4', '-zd" "', {IN=>"1\0a\0"}, {IN=>"2\0b\0"}, {OUT=>"1 2\0a b\0"}],
+
+ # Specifying a delimiter with a trailing backslash would overrun a
+ # malloc'd buffer.
+ ['delim-bs1', q!-d'\'!, {IN=>{'a'x50=>''}}, {EXIT => 1},
+ # We print a single backslash into the expected output
+ {ERR => $msg . q!\\! . "\n"} ],
+
+ # Prior to coreutils-5.1.2, this sort of abuse would make paste
+ # scribble on command-line arguments. With paste from coreutils-5.1.0,
+ # this example would mangle the first file name argument, if it contains
+ # accepted backslash-escapes:
+ # $ paste -d\\ '123\b\b\b.....@' 2>&1 |cat -A
+ # paste: 23^H^H^H.....@...@: No such file or directory$
+ ['delim-bs2', q!-d'\'!, {IN=>{'123\b\b\b.....@'=>''}}, {EXIT => 1},
+ {ERR => $msg . q!\\! . "\n"} ],
+
+ # \0 allows cycling through an empty delimiter (while "-d ''" does not)
+ ['delim-empty-1', q{-s -d '\0,'}, {IN=>"1\n2\n3\n"}, {OUT=>"12,3\n"}],
+
+ # POSIX escapes
+ ['delim-esc-0', q{-s -d '\0'}, {IN=>"1\n2\n"}, {OUT=>"12\n"}],
+ ['delim-esc-n', q{-s -d '\n'}, {IN=>"1\n2\n"}, {OUT=>"1\n2\n"}],
+ ['delim-esc-t', q{-s -d '\t'}, {IN=>"1\n2\n"}, {OUT=>"1\t2\n"}],
+ ['delim-esc-s', q{-s -d '\\\\'}, {IN=>"1\n2\n"}, {OUT=>"1\\2\n"}],
+ # GNU escapes
+ ['delim-esc-b', q{-s -d '\b'}, {IN=>"1\n2\n"}, {OUT=>"1\b2\n"}],
+ ['delim-esc-f', q{-s -d '\f'}, {IN=>"1\n2\n"}, {OUT=>"1\f2\n"}],
+ ['delim-esc-r', q{-s -d '\r'}, {IN=>"1\n2\n"}, {OUT=>"1\r2\n"}],
+ ['delim-esc-v', q{-s -d '\v'}, {IN=>"1\n2\n"}, {OUT=>"1\0132\n"}],
+ ['delim-esc-foo', q{-s -d '\q'}, {IN=>"1\n2\n"}, {OUT=>"1q2\n"}],
+ );
+
+my $save_temps = $ENV{DEBUG};
+my $verbose = $ENV{VERBOSE};
+
+my $fail = run_tests ($program_name, $prog, \@Tests, $save_temps, $verbose);
+exit $fail;