From f3bcb59ebdb4705c8034c01e25758f6eff71df51 Mon Sep 17 00:00:00 2001
From: Pádraig Brady <P@draigBrady.com>
Date: Tue, 27 Feb 2024 14:12:27 +0000
Subject: tests: move join tests to their own directory

* tests/misc/join-utf8.sh: Move to tests/join
since there are now multiple join tests.
* tests/misc/join.pl: Likewise.
---
 tests/join/join-utf8.sh |  53 +++++++
 tests/join/join.pl      | 377 ++++++++++++++++++++++++++++++++++++++++++++++++
 tests/local.mk          |   4 +-
 tests/misc/join-utf8.sh |  53 -------
 tests/misc/join.pl      | 377 ------------------------------------------------
 5 files changed, 432 insertions(+), 432 deletions(-)
 create mode 100755 tests/join/join-utf8.sh
 create mode 100755 tests/join/join.pl
 delete mode 100755 tests/misc/join-utf8.sh
 delete mode 100755 tests/misc/join.pl

(limited to 'tests')

diff --git a/tests/join/join-utf8.sh b/tests/join/join-utf8.sh
new file mode 100755
index 000000000..9af9e55ce
--- /dev/null
+++ b/tests/join/join-utf8.sh
@@ -0,0 +1,53 @@
+#!/bin/sh
+# Test join in a UTF-8 locale.
+
+# Copyright 2023-2024 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src
+print_ver_ join
+
+test "$LOCALE_FR_UTF8" != none || skip_ "French UTF-8 locale not available"
+
+LC_ALL=$LOCALE_FR_UTF8
+export LC_ALL
+
+vertical_line='|'
+multiplication_sign='×'
+en_dash='–'
+old_Persian_word_divider='𐏐'
+
+tflag=
+
+for s in \
+    ' ' \
+    "$vertical_line" \
+    "$multiplication_sign" \
+    "$en_dash" \
+    "$old_Persian_word_divider"
+do
+  printf '0%sA\n1%sa\n2%sb\n4%sc\n' "$s" "$s" "$s" "$s" >a ||
+    framework_failure_
+  printf '0%sB\n1%sd\n3%se\n4%s\0f\n' "$s" "$s" "$s" "$s" >b ||
+    framework_failure_
+  join $tflag$s -a1 -a2 -eouch -o0,1.2,2.2 a b >out || fail=1
+  tflag=-t
+  printf '0%sA%sB\n1%sa%sd\n2%sb%souch\n3%souch%se\n4%sc%s\0f\n' \
+         "$s" "$s" "$s" "$s" "$s" "$s" "$s" "$s" "$s" "$s" >exp ||
+    framework_failure_
+  compare exp out || fail=1
+done
+
+Exit $fail
diff --git a/tests/join/join.pl b/tests/join/join.pl
new file mode 100755
index 000000000..39044fcd7
--- /dev/null
+++ b/tests/join/join.pl
@@ -0,0 +1,377 @@
+#!/usr/bin/perl
+# Test join.
+
+# Copyright (C) 2008-2024 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+use strict;
+
+my $limits = getlimits ();
+
+# Turn off localization of executable's output.
+@ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3;
+
+my $prog = 'join';
+
+my $try = "Try '$prog --help' for more information.\n";
+
+my $mb_locale;
+#Comment out next line to disable multibyte tests
+$mb_locale = $ENV{LOCALE_FR_UTF8};
+! defined $mb_locale || $mb_locale eq 'none'
+  and $mb_locale = 'C';
+
+my $delim = chr 0247;
+sub t_subst ($)
+{
+  (my $s = $_[0]) =~ s/:/$delim/g;
+  return $s;
+}
+
+my @tv = (
+# test name
+#     flags       file-1 file-2    expected output   expected return code
+#
+['1a', '-a1',      ["a 1\n", "b\n"],   "a 1\n",          0],
+['1b', '-a2',      ["a 1\n", "b\n"],   "b\n",            0], # Got "\n"
+['1c', '-a1 -a2',  ["a 1\n", "b\n"],   "a 1\nb\n",       0], # Got "a 1\n\n"
+['1d', '-a1',      ["a 1\nb\n", "b\n"],   "a 1\nb\n",    0],
+['1e', '-a2',      ["a 1\nb\n", "b\n"],   "b\n",         0],
+['1f', '-a2',      ["b\n", "a\nb\n"], "a\nb\n",          0],
+
+['2a', '-a1 -e .',  ["a\nb\nc\n", "a x y\nb\nc\n"], "a x y\nb\nc\n", 0],
+['2b', '-a1 -e . -o 2.1,2.2,2.3',  ["a\nb\nc\n", "a x y\nb\nc\n"],
+ "a x y\nb . .\nc . .\n", 0],
+['2c', '-a1 -e . -o 2.1,2.2,2.3',  ["a\nb\nc\nd\n", "a x y\nb\nc\n"],
+ "a x y\nb . .\nc . .\n. . .\n", 0],
+
+['3a', '-t:', ["a:1\nb:1\n", "a:2:\nb:2:\n"], "a:1:2:\nb:1:2:\n", 0],
+
+# operate on whole line (as sort does by default)
+['3b', '-t ""', ["a 1\nb 1\n", "a 1\nb 2\n"], "a 1\n", 0],
+# use NUL as the field delimiter
+['3c', '-t "\\0"', ["a\0a\n", "a\0b\n"], "a\0a\0b\n", 0],
+
+# Just like -a1 and -a2 when there are no pairable lines
+['4a', '-v 1', ["a 1\n", "b\n"],   "a 1\n",          0],
+['4b', '-v 2', ["a 1\n", "b\n"],   "b\n",            0],
+
+['4c', '-v 1', ["a 1\nb\n", "b\n"],        "a 1\n",       0],
+['4d', '-v 2', ["a 1\nb\n", "b\n"],        "",            0],
+['4e', '-v 2', ["b\n",      "a 1\nb\n"],   "a 1\n",       0],
+['5a', '-a1 -e - -o 1.1,2.2',
+ ["a 1\nb 2\n",  "a 11\nb\n"], "a 11\nb -\n", 0],
+['5b', '-a1 -e - -o 1.1,2.2',
+ ["apr 15\naug 20\ndec 18\nfeb 05\n", "apr 06\naug 14\ndate\nfeb 15"],
+ "apr 06\naug 14\ndec -\nfeb 15\n", 0],
+['5c', '-a1 -e - -o 1.1,2.2',
+ ["aug 20\ndec 18\n", "aug 14\ndate\nfeb 15"],
+ "aug 14\ndec -\n", 0],
+['5d', '-a1 -e - -o 1.1,2.2',
+ ["dec 18\n",  ""], "dec -\n", 0],
+['5e', '-a2 -e - -o 1.1,2.2',
+ ["apr 15\naug 20\ndec 18\nfeb 05\n", "apr 06\naug 14\ndate\nfeb 15\n"],
+ "apr 06\naug 14\n- -\nfeb 15\n", 0],
+['5f', '-a2 -e - -o 2.2,1.1',
+ ["apr 15\naug 20\ndec 18\nfeb 05\n", "apr 06\naug 14\ndate\nfeb 15\n"],
+ "06 apr\n14 aug\n- -\n15 feb\n", 0],
+['5g', '-a1 -e - -o 2.2,1.1',
+ ["apr 15\naug 20\ndec 18\nfeb 05\n", "apr 06\naug 14\ndate\nfeb 15\n"],
+ "06 apr\n14 aug\n- dec\n15 feb\n", 0],
+
+['5h', '-a1 -e - -o 2.2,1.1',
+ ["apr 15\naug 20\ndec 18\nfeb 05\n", "apr 06\naug 14\ndate\n"],
+ "06 apr\n14 aug\n- dec\n- feb\n", 0],
+['5i', '-a1 -e - -o 1.1,2.2',
+ ["apr 15\naug 20\ndec 18\nfeb 05\n", "apr 06\naug 14\ndate\n"],
+ "apr 06\naug 14\ndec -\nfeb -\n", 0],
+
+['5j', '-a2 -e - -o 2.2,1.1',
+ ["apr 15\naug 20\ndec 18\nfeb 05\n", "apr 06\naug 14\ndate\n"],
+ "06 apr\n14 aug\n- -\n", 0],
+['5k', '-a2 -e - -o 2.2,1.1',
+ ["apr 15\naug 20\ndec 18\nfeb 05\n", "apr 06\naug 14\ndate\n"],
+  "06 apr\n14 aug\n- -\n", 0],
+
+['5l', '-a1 -e - -o 2.2,1.1',
+ ["apr 15\naug 20\ndec 18\n", "apr 06\naug 14\ndate\nfeb 15\n"],
+  "06 apr\n14 aug\n- dec\n", 0],
+['5m', '-a2 -e - -o 2.2,1.1',
+ ["apr 15\naug 20\ndec 18\n", "apr 06\naug 14\ndate\nfeb 15\n"],
+  "06 apr\n14 aug\n- -\n15 -\n", 0],
+
+['6a', '-e -',
+ ["a 1\nb 2\nd 4\n", "a 21\nb 22\nc 23\nf 26\n"],
+ "a 1 21\nb 2 22\n", 0],
+['6b', '-a1 -e -',
+ ["a 1\nb 2\nd 4\n", "a 21\nb 22\nc 23\nf 26\n"],
+ "a 1 21\nb 2 22\nd 4\n", 0],
+['6c', '-a1 -e -',
+ ["a 21\nb 22\nc 23\nf 26\n", "a 1\nb 2\nd 4\n"],
+ "a 21 1\nb 22 2\nc 23\nf 26\n", 0],
+
+['7a', '-a1 -e . -o 2.7',
+ ["a\nb\nc\n", "a x y\nb\nc\n"], ".\n.\n.\n", 0],
+
+['8a', '-a1 -e . -o 0,1.2',
+ ["a\nb\nc\nd G\n", "a x y\nb\nc\ne\n"],
+ "a .\nb .\nc .\nd G\n", 0],
+['8b', '-a1 -a2 -e . -o 0,1.2',
+ ["a\nb\nc\nd G\n", "a x y\nb\nc\ne\n"],
+ "a .\nb .\nc .\nd G\ne .\n", 0],
+
+# From David Dyck
+['9a', '', [" a 1\n b 2\n", " a Y\n b Z\n"], "a 1 Y\nb 2 Z\n", 0],
+
+# -o 'auto'
+['10a', '-a1 -a2 -e . -o auto',
+ ["a 1 2\nb 1\nd 1 2\n", "a 3 4\nb 3 4\nc 3 4\n"],
+ "a 1 2 3 4\nb 1 . 3 4\nc . . 3 4\nd 1 2 . .\n", 0],
+['10b', '-a1 -a2 -j3 -e . -o auto',
+ ["a 1 2\nb 1\nd 1 2\n", "a 3 4\nb 3 4\nc 3 4\n"],
+ "2 a 1 . .\n. b 1 . .\n2 d 1 . .\n4 . . a 3\n4 . . b 3\n4 . . c 3\n"],
+['10c', '-a1 -1 1 -2 4 -e. -o auto',
+ ["a 1 2\nb 1\nd 1 2\n", "a 3 4\nb 3 4\nc 3 4\n"],
+ "a 1 2 . . .\nb 1 . . . .\nd 1 2 . . .\n"],
+['10d', '-a2 -1 1 -2 4 -e. -o auto',
+ ["a 1 2\nb 1\nd 1 2\n", "a 3 4\nb 3 4\nc 3 4\n"],
+ ". . . a 3 4\n. . . b 3 4\n. . . c 3 4\n"],
+['10e', '-o auto',
+ ["a 1 2\nb 1 2 discard\n", "a 3 4\nb 3 4 discard\n"],
+ "a 1 2 3 4\nb 1 2 3 4\n"],
+['10f', '-t, -o auto',
+ ["a,1,,2\nb,1,2\n", "a,3,4\nb,3,4\n"],
+ "a,1,,2,3,4\nb,1,2,,3,4\n"],
+
+# For -v2, print the match field correctly with the default output format,
+# when that match field is different between file 1 and file 2.  Fixed in 8.10
+['v2-order', '-v2 -2 2', ["", "2 1\n"], "1 2\n", 0],
+
+# From Tim Smithers: fixed in 1.22l
+['trailing-sp', '-t: -1 1 -2 1', ["a:x \n", "a:y \n"], "a:x :y \n", 0],
+
+# From Paul Eggert: fixed in 1.22n
+['sp-vs-blank', '', ["\f 1\n", "\f 2\n"], "\f 1 2\n", 0],
+
+# From Paul Eggert: fixed in 1.22n (this would fail on Solaris7,
+# with LC_ALL set to en_US).
+# Unfortunately, that Solaris7's en_US locale folds case (making
+# the first input file sorted) is not portable, so this test would
+# fail on e.g. Linux systems, because the input to join isn't sorted.
+# ['lc-collate', '', ["a 1a\nB 1B\n", "B 2B\n"], "B 1B 2B\n", 0],
+
+# Based on a report from Antonio Rendas.  Fixed in 2.0.9.
+['8-bit-t', t_subst "-t:",
+ [t_subst "a:1\nb:1\n", t_subst "a:2:\nb:2:\n"],
+ t_subst "a:1:2:\nb:1:2:\n", 0],
+
+# fields > SIZE_MAX are silently interpreted as SIZE_MAX
+['bigfield1', "-1 $limits->{UINTMAX_OFLOW} -2 2",
+ ["a\n", "b\n"], " a b\n", 0],
+['bigfield2', "-1 $limits->{SIZE_OFLOW} -2 2",
+ ["a\n", "b\n"], " a b\n", 0],
+
+# FIXME: change this to ensure the diagnostic makes sense
+['invalid-j', '-j x', ["", ""], "", 1,
+ "$prog: invalid field number: 'x'\n"],
+
+# With ordering check, inputs in order
+['chkodr-1', '--check-order',
+  [" a 1\n b 2\n", " a Y\n b Z\n"], "a 1 Y\nb 2 Z\n", 0],
+
+# Without check, inputs in order
+['chkodr-2', '--nocheck-order',
+ [" a 1\n b 2\n", " a Y\n b Z\n"], "a 1 Y\nb 2 Z\n", 0],
+
+# Without check, both inputs out of order (in fact, in reverse order)
+# but all pairable.  Support for this is a GNU extension.
+['chkodr-3', '--nocheck-order',
+ [" b 1\n a 2\n", " b Y\n a Z\n"], "b 1 Y\na 2 Z\n", 0],
+
+# The extension should work without --nocheck-order, since that is the
+# default.
+['chkodr-4', '',
+ [" b 1\n a 2\n", " b Y\n a Z\n"], "b 1 Y\na 2 Z\n", 0],
+
+# With check, both inputs out of order (in fact, in reverse order)
+['chkodr-5', '--check-order',
+ [" b 1\n a 2\n", " b Y\n a Z\n"], "", 1,
+ "$prog: chkodr-5.1:2: is not sorted:  a 2\n"],
+
+# Similar, but with only file 2 not sorted.
+['chkodr-5b', '--check-order',
+ [" a 2\n b 1\n", " b Y\n a Z\n"], "", 1,
+ "$prog: chkodr-5b.2:2: is not sorted:  a Z\n"],
+
+# Similar, but with the offending line having length 0 (excluding newline).
+['chkodr-5c', '--check-order',
+ [" a 2\n b 1\n", " b Y\n\n"], "", 1,
+ "$prog: chkodr-5c.2:2: is not sorted: \n"],
+
+# Similar, but elicit a warning for each input file (without --check-order).
+['chkodr-5d', '',
+ ["a\nx\n\n", "b\ny\n\n"], "", 1,
+ "$prog: chkodr-5d.1:3: is not sorted: \n" .
+ "$prog: chkodr-5d.2:3: is not sorted: \n" .
+ "$prog: input is not in sorted order\n"
+ ],
+
+# Similar, but make it so each offending line has no newline.
+['chkodr-5e', '',
+ ["a\nx\no", "b\ny\np"], "", 1,
+ "$prog: chkodr-5e.1:3: is not sorted: o\n" .
+ "$prog: chkodr-5e.2:3: is not sorted: p\n" .
+ "$prog: input is not in sorted order\n"
+ ],
+
+# Without order check, both inputs out of order and some lines
+# unpairable.  This is NOT supported by the GNU extension.  All that
+# we really care about for this test is that the return status is
+# zero, since that is the only way to actually verify that the
+# --nocheck-order option had any effect.   We don't actually want to
+# guarantee that join produces this output on stdout.
+['chkodr-6', '--nocheck-order',
+ [" b 1\n a 2\n", " b Y\n c Z\n"], "b 1 Y\n", 0],
+
+# Before 6.10.143, this would mistakenly fail with the diagnostic:
+# join: File 1 is not in sorted order
+['chkodr-7', '-12', ["2 a\n1 b\n", "2 c\n1 d"], "", 0],
+
+# After 8.9, join doesn't report disorder by default
+# when comparing against an empty input file.
+['chkodr-8', '', ["2 a\n1 b\n", ""], "", 0],
+
+# Test '--header' feature
+['header-1', '--header',
+ [ "ID Name\n1 A\n2 B\n", "ID Color\n1 red\n"], "ID Name Color\n1 A red\n", 0],
+
+# '--header' with '--check-order' : The header line is out-of-order but the
+# actual data is in order. This join should succeed.
+['header-2', '--header --check-order',
+ ["ID Name\n1 A\n2 B\n", "ID Color\n2 green\n"],
+ "ID Name Color\n2 B green\n", 0],
+
+# '--header' with '--check-order' : The header line is out-of-order AND the
+# actual data out-of-order. This join should fail.
+['header-3', '--header --check-order',
+ ["ID Name\n2 B\n1 A\n", "ID Color\n2 blue\n"], "ID Name Color\n", 1,
+ "$prog: header-3.1:3: is not sorted: 1 A\n"],
+
+# '--header' with specific output format '-o'.
+# output header line should respect the requested format
+['header-4', '--header -o "0,1.3,2.2"',
+ ["ID Group Name\n1 Foo A\n2 Bar B\n", "ID Color\n2 blue\n"],
+ "ID Name Color\n2 B blue\n", 0],
+
+# '--header' always outputs headers from the first file
+# even if the headers from the second file don't match
+['header-5', '--header',
+ [ "ID1 Name\n1 A\n2 B\n", "ID2 Color\n1 red\n"],
+   "ID1 Name Color\n1 A red\n", 0],
+
+# '--header' doesn't check order of a header
+# even if there is no header in the second file
+['header-6', '--header -a1',
+ [ "ID1 Name\n1 A\n", ""],
+   "ID1 Name\n1 A\n", 0],
+
+# Zero-terminated lines
+['z1', '-z',
+ ["a\0c\0e\0", "a\0b\0c\0"], "a\0c\0", 0],
+
+# not zero-terminated, but related to the code change:
+#  the old readlinebuffer() auto-added '\n' to the last line.
+#  the new readlinebuffer_delim() does not.
+#  Ensure it doesn't matter.
+['z2', '',
+ ["a\nc\ne\n", "a\nb\nc"], "a\nc\n", 0],
+['z3', '',
+ ["a\nc\ne", "a\nb\nc"], "a\nc\n", 0],
+# missing last NUL at the end of the last line (=end of file)
+['z4', '-z',
+ ["a\0c\0e", "a\0b\0c"], "a\0c\0", 0],
+# With -z, embedded newlines are treated as field separators.
+# Note '\n' are converted to ' ' in this case.
+['z5', '-z -a1 -a2',
+ ["a\n\n1\0c 3\0", "a 2\0b\n8\0c 9\0"], "a 1 2\0b 8\0c 3 9\0"],
+# One can avoid field processing like:
+['z6', '-z -t ""',
+ ["a\n1\n\0", "a\n1\n\0"], "a\n1\n\0"],
+
+);
+
+# Convert the above old-style test vectors to the newer
+# format used by Coreutils.pm.
+
+my @Tests;
+foreach my $t (@tv)
+  {
+    my ($test_name, $flags, $in, $exp, $ret, $err_msg) = @$t;
+    my $new_ent = [$test_name, $flags];
+    if (!ref $in)
+      {
+        push @$new_ent, {IN=>$in};
+      }
+    elsif (ref $in eq 'HASH')
+      {
+        # ignore
+      }
+    else
+      {
+        foreach my $e (@$in)
+          {
+            push @$new_ent, {IN=>$e};
+          }
+      }
+    push @$new_ent, {OUT=>$exp};
+    $ret
+      and push @$new_ent, {EXIT=>$ret}, {ERR=>$err_msg};
+    push @Tests, $new_ent;
+  }
+
+if ($mb_locale ne 'C')
+  {
+    # Duplicate each test vector, appending "-mb" to the test name and
+    # inserting {ENV => "LC_ALL=$mb_locale"} in the copy, so that we
+    # provide coverage for multi-byte code paths.
+    my @new;
+    foreach my $t (@Tests)
+      {
+        my @new_t = @$t;
+        my $test_name = shift @new_t;
+
+        #Adjust the output some error messages including test_name for mb
+        if (grep {ref $_ eq 'HASH' && exists $_->{ERR}}
+             (@new_t))
+          {
+            my $sub2 = {ERR_SUBST => "s/$test_name-mb/$test_name/"};
+            push @new_t, $sub2;
+            push @$t, $sub2;
+          }
+        push @new, ["$test_name-mb", @new_t, {ENV => "LC_ALL=$mb_locale"}];
+      }
+    push @Tests, @new;
+  }
+
+@Tests = triple_test \@Tests;
+
+#skip invalid-j-mb test, it is failing because of the format
+@Tests = grep {$_->[0] ne 'invalid-j-mb'} @Tests;
+
+my $save_temps = $ENV{DEBUG};
+my $verbose = $ENV{VERBOSE};
+
+my $fail = run_tests ($prog, $prog, \@Tests, $save_temps, $verbose);
+exit $fail;
diff --git a/tests/local.mk b/tests/local.mk
index 2f6fa5b98..7cd1ef7b5 100644
--- a/tests/local.mk
+++ b/tests/local.mk
@@ -272,8 +272,6 @@ all_tests =					\
   tests/od/od-float.sh				\
   tests/misc/mktemp.pl				\
   tests/misc/arch.sh				\
-  tests/misc/join.pl				\
-  tests/misc/join-utf8.sh			\
   tests/pr/pr-tests.pl				\
   tests/pwd/pwd-option.sh			\
   tests/chcon/chcon-fail.sh			\
@@ -334,6 +332,8 @@ all_tests =					\
   tests/cksum/md5sum-bsd.sh			\
   tests/cksum/md5sum-newline.pl			\
   tests/cksum/md5sum-parallel.sh		\
+  tests/join/join.pl				\
+  tests/join/join-utf8.sh			\
   tests/misc/mknod.sh				\
   tests/nice/nice.sh				\
   tests/nice/nice-fail.sh			\
diff --git a/tests/misc/join-utf8.sh b/tests/misc/join-utf8.sh
deleted file mode 100755
index 9af9e55ce..000000000
--- a/tests/misc/join-utf8.sh
+++ /dev/null
@@ -1,53 +0,0 @@
-#!/bin/sh
-# Test join in a UTF-8 locale.
-
-# Copyright 2023-2024 Free Software Foundation, Inc.
-
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with this program.  If not, see <https://www.gnu.org/licenses/>.
-
-. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src
-print_ver_ join
-
-test "$LOCALE_FR_UTF8" != none || skip_ "French UTF-8 locale not available"
-
-LC_ALL=$LOCALE_FR_UTF8
-export LC_ALL
-
-vertical_line='|'
-multiplication_sign='×'
-en_dash='–'
-old_Persian_word_divider='𐏐'
-
-tflag=
-
-for s in \
-    ' ' \
-    "$vertical_line" \
-    "$multiplication_sign" \
-    "$en_dash" \
-    "$old_Persian_word_divider"
-do
-  printf '0%sA\n1%sa\n2%sb\n4%sc\n' "$s" "$s" "$s" "$s" >a ||
-    framework_failure_
-  printf '0%sB\n1%sd\n3%se\n4%s\0f\n' "$s" "$s" "$s" "$s" >b ||
-    framework_failure_
-  join $tflag$s -a1 -a2 -eouch -o0,1.2,2.2 a b >out || fail=1
-  tflag=-t
-  printf '0%sA%sB\n1%sa%sd\n2%sb%souch\n3%souch%se\n4%sc%s\0f\n' \
-         "$s" "$s" "$s" "$s" "$s" "$s" "$s" "$s" "$s" "$s" >exp ||
-    framework_failure_
-  compare exp out || fail=1
-done
-
-Exit $fail
diff --git a/tests/misc/join.pl b/tests/misc/join.pl
deleted file mode 100755
index 39044fcd7..000000000
--- a/tests/misc/join.pl
+++ /dev/null
@@ -1,377 +0,0 @@
-#!/usr/bin/perl
-# Test join.
-
-# Copyright (C) 2008-2024 Free Software Foundation, Inc.
-
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with this program.  If not, see <https://www.gnu.org/licenses/>.
-
-use strict;
-
-my $limits = getlimits ();
-
-# Turn off localization of executable's output.
-@ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3;
-
-my $prog = 'join';
-
-my $try = "Try '$prog --help' for more information.\n";
-
-my $mb_locale;
-#Comment out next line to disable multibyte tests
-$mb_locale = $ENV{LOCALE_FR_UTF8};
-! defined $mb_locale || $mb_locale eq 'none'
-  and $mb_locale = 'C';
-
-my $delim = chr 0247;
-sub t_subst ($)
-{
-  (my $s = $_[0]) =~ s/:/$delim/g;
-  return $s;
-}
-
-my @tv = (
-# test name
-#     flags       file-1 file-2    expected output   expected return code
-#
-['1a', '-a1',      ["a 1\n", "b\n"],   "a 1\n",          0],
-['1b', '-a2',      ["a 1\n", "b\n"],   "b\n",            0], # Got "\n"
-['1c', '-a1 -a2',  ["a 1\n", "b\n"],   "a 1\nb\n",       0], # Got "a 1\n\n"
-['1d', '-a1',      ["a 1\nb\n", "b\n"],   "a 1\nb\n",    0],
-['1e', '-a2',      ["a 1\nb\n", "b\n"],   "b\n",         0],
-['1f', '-a2',      ["b\n", "a\nb\n"], "a\nb\n",          0],
-
-['2a', '-a1 -e .',  ["a\nb\nc\n", "a x y\nb\nc\n"], "a x y\nb\nc\n", 0],
-['2b', '-a1 -e . -o 2.1,2.2,2.3',  ["a\nb\nc\n", "a x y\nb\nc\n"],
- "a x y\nb . .\nc . .\n", 0],
-['2c', '-a1 -e . -o 2.1,2.2,2.3',  ["a\nb\nc\nd\n", "a x y\nb\nc\n"],
- "a x y\nb . .\nc . .\n. . .\n", 0],
-
-['3a', '-t:', ["a:1\nb:1\n", "a:2:\nb:2:\n"], "a:1:2:\nb:1:2:\n", 0],
-
-# operate on whole line (as sort does by default)
-['3b', '-t ""', ["a 1\nb 1\n", "a 1\nb 2\n"], "a 1\n", 0],
-# use NUL as the field delimiter
-['3c', '-t "\\0"', ["a\0a\n", "a\0b\n"], "a\0a\0b\n", 0],
-
-# Just like -a1 and -a2 when there are no pairable lines
-['4a', '-v 1', ["a 1\n", "b\n"],   "a 1\n",          0],
-['4b', '-v 2', ["a 1\n", "b\n"],   "b\n",            0],
-
-['4c', '-v 1', ["a 1\nb\n", "b\n"],        "a 1\n",       0],
-['4d', '-v 2', ["a 1\nb\n", "b\n"],        "",            0],
-['4e', '-v 2', ["b\n",      "a 1\nb\n"],   "a 1\n",       0],
-['5a', '-a1 -e - -o 1.1,2.2',
- ["a 1\nb 2\n",  "a 11\nb\n"], "a 11\nb -\n", 0],
-['5b', '-a1 -e - -o 1.1,2.2',
- ["apr 15\naug 20\ndec 18\nfeb 05\n", "apr 06\naug 14\ndate\nfeb 15"],
- "apr 06\naug 14\ndec -\nfeb 15\n", 0],
-['5c', '-a1 -e - -o 1.1,2.2',
- ["aug 20\ndec 18\n", "aug 14\ndate\nfeb 15"],
- "aug 14\ndec -\n", 0],
-['5d', '-a1 -e - -o 1.1,2.2',
- ["dec 18\n",  ""], "dec -\n", 0],
-['5e', '-a2 -e - -o 1.1,2.2',
- ["apr 15\naug 20\ndec 18\nfeb 05\n", "apr 06\naug 14\ndate\nfeb 15\n"],
- "apr 06\naug 14\n- -\nfeb 15\n", 0],
-['5f', '-a2 -e - -o 2.2,1.1',
- ["apr 15\naug 20\ndec 18\nfeb 05\n", "apr 06\naug 14\ndate\nfeb 15\n"],
- "06 apr\n14 aug\n- -\n15 feb\n", 0],
-['5g', '-a1 -e - -o 2.2,1.1',
- ["apr 15\naug 20\ndec 18\nfeb 05\n", "apr 06\naug 14\ndate\nfeb 15\n"],
- "06 apr\n14 aug\n- dec\n15 feb\n", 0],
-
-['5h', '-a1 -e - -o 2.2,1.1',
- ["apr 15\naug 20\ndec 18\nfeb 05\n", "apr 06\naug 14\ndate\n"],
- "06 apr\n14 aug\n- dec\n- feb\n", 0],
-['5i', '-a1 -e - -o 1.1,2.2',
- ["apr 15\naug 20\ndec 18\nfeb 05\n", "apr 06\naug 14\ndate\n"],
- "apr 06\naug 14\ndec -\nfeb -\n", 0],
-
-['5j', '-a2 -e - -o 2.2,1.1',
- ["apr 15\naug 20\ndec 18\nfeb 05\n", "apr 06\naug 14\ndate\n"],
- "06 apr\n14 aug\n- -\n", 0],
-['5k', '-a2 -e - -o 2.2,1.1',
- ["apr 15\naug 20\ndec 18\nfeb 05\n", "apr 06\naug 14\ndate\n"],
-  "06 apr\n14 aug\n- -\n", 0],
-
-['5l', '-a1 -e - -o 2.2,1.1',
- ["apr 15\naug 20\ndec 18\n", "apr 06\naug 14\ndate\nfeb 15\n"],
-  "06 apr\n14 aug\n- dec\n", 0],
-['5m', '-a2 -e - -o 2.2,1.1',
- ["apr 15\naug 20\ndec 18\n", "apr 06\naug 14\ndate\nfeb 15\n"],
-  "06 apr\n14 aug\n- -\n15 -\n", 0],
-
-['6a', '-e -',
- ["a 1\nb 2\nd 4\n", "a 21\nb 22\nc 23\nf 26\n"],
- "a 1 21\nb 2 22\n", 0],
-['6b', '-a1 -e -',
- ["a 1\nb 2\nd 4\n", "a 21\nb 22\nc 23\nf 26\n"],
- "a 1 21\nb 2 22\nd 4\n", 0],
-['6c', '-a1 -e -',
- ["a 21\nb 22\nc 23\nf 26\n", "a 1\nb 2\nd 4\n"],
- "a 21 1\nb 22 2\nc 23\nf 26\n", 0],
-
-['7a', '-a1 -e . -o 2.7',
- ["a\nb\nc\n", "a x y\nb\nc\n"], ".\n.\n.\n", 0],
-
-['8a', '-a1 -e . -o 0,1.2',
- ["a\nb\nc\nd G\n", "a x y\nb\nc\ne\n"],
- "a .\nb .\nc .\nd G\n", 0],
-['8b', '-a1 -a2 -e . -o 0,1.2',
- ["a\nb\nc\nd G\n", "a x y\nb\nc\ne\n"],
- "a .\nb .\nc .\nd G\ne .\n", 0],
-
-# From David Dyck
-['9a', '', [" a 1\n b 2\n", " a Y\n b Z\n"], "a 1 Y\nb 2 Z\n", 0],
-
-# -o 'auto'
-['10a', '-a1 -a2 -e . -o auto',
- ["a 1 2\nb 1\nd 1 2\n", "a 3 4\nb 3 4\nc 3 4\n"],
- "a 1 2 3 4\nb 1 . 3 4\nc . . 3 4\nd 1 2 . .\n", 0],
-['10b', '-a1 -a2 -j3 -e . -o auto',
- ["a 1 2\nb 1\nd 1 2\n", "a 3 4\nb 3 4\nc 3 4\n"],
- "2 a 1 . .\n. b 1 . .\n2 d 1 . .\n4 . . a 3\n4 . . b 3\n4 . . c 3\n"],
-['10c', '-a1 -1 1 -2 4 -e. -o auto',
- ["a 1 2\nb 1\nd 1 2\n", "a 3 4\nb 3 4\nc 3 4\n"],
- "a 1 2 . . .\nb 1 . . . .\nd 1 2 . . .\n"],
-['10d', '-a2 -1 1 -2 4 -e. -o auto',
- ["a 1 2\nb 1\nd 1 2\n", "a 3 4\nb 3 4\nc 3 4\n"],
- ". . . a 3 4\n. . . b 3 4\n. . . c 3 4\n"],
-['10e', '-o auto',
- ["a 1 2\nb 1 2 discard\n", "a 3 4\nb 3 4 discard\n"],
- "a 1 2 3 4\nb 1 2 3 4\n"],
-['10f', '-t, -o auto',
- ["a,1,,2\nb,1,2\n", "a,3,4\nb,3,4\n"],
- "a,1,,2,3,4\nb,1,2,,3,4\n"],
-
-# For -v2, print the match field correctly with the default output format,
-# when that match field is different between file 1 and file 2.  Fixed in 8.10
-['v2-order', '-v2 -2 2', ["", "2 1\n"], "1 2\n", 0],
-
-# From Tim Smithers: fixed in 1.22l
-['trailing-sp', '-t: -1 1 -2 1', ["a:x \n", "a:y \n"], "a:x :y \n", 0],
-
-# From Paul Eggert: fixed in 1.22n
-['sp-vs-blank', '', ["\f 1\n", "\f 2\n"], "\f 1 2\n", 0],
-
-# From Paul Eggert: fixed in 1.22n (this would fail on Solaris7,
-# with LC_ALL set to en_US).
-# Unfortunately, that Solaris7's en_US locale folds case (making
-# the first input file sorted) is not portable, so this test would
-# fail on e.g. Linux systems, because the input to join isn't sorted.
-# ['lc-collate', '', ["a 1a\nB 1B\n", "B 2B\n"], "B 1B 2B\n", 0],
-
-# Based on a report from Antonio Rendas.  Fixed in 2.0.9.
-['8-bit-t', t_subst "-t:",
- [t_subst "a:1\nb:1\n", t_subst "a:2:\nb:2:\n"],
- t_subst "a:1:2:\nb:1:2:\n", 0],
-
-# fields > SIZE_MAX are silently interpreted as SIZE_MAX
-['bigfield1', "-1 $limits->{UINTMAX_OFLOW} -2 2",
- ["a\n", "b\n"], " a b\n", 0],
-['bigfield2', "-1 $limits->{SIZE_OFLOW} -2 2",
- ["a\n", "b\n"], " a b\n", 0],
-
-# FIXME: change this to ensure the diagnostic makes sense
-['invalid-j', '-j x', ["", ""], "", 1,
- "$prog: invalid field number: 'x'\n"],
-
-# With ordering check, inputs in order
-['chkodr-1', '--check-order',
-  [" a 1\n b 2\n", " a Y\n b Z\n"], "a 1 Y\nb 2 Z\n", 0],
-
-# Without check, inputs in order
-['chkodr-2', '--nocheck-order',
- [" a 1\n b 2\n", " a Y\n b Z\n"], "a 1 Y\nb 2 Z\n", 0],
-
-# Without check, both inputs out of order (in fact, in reverse order)
-# but all pairable.  Support for this is a GNU extension.
-['chkodr-3', '--nocheck-order',
- [" b 1\n a 2\n", " b Y\n a Z\n"], "b 1 Y\na 2 Z\n", 0],
-
-# The extension should work without --nocheck-order, since that is the
-# default.
-['chkodr-4', '',
- [" b 1\n a 2\n", " b Y\n a Z\n"], "b 1 Y\na 2 Z\n", 0],
-
-# With check, both inputs out of order (in fact, in reverse order)
-['chkodr-5', '--check-order',
- [" b 1\n a 2\n", " b Y\n a Z\n"], "", 1,
- "$prog: chkodr-5.1:2: is not sorted:  a 2\n"],
-
-# Similar, but with only file 2 not sorted.
-['chkodr-5b', '--check-order',
- [" a 2\n b 1\n", " b Y\n a Z\n"], "", 1,
- "$prog: chkodr-5b.2:2: is not sorted:  a Z\n"],
-
-# Similar, but with the offending line having length 0 (excluding newline).
-['chkodr-5c', '--check-order',
- [" a 2\n b 1\n", " b Y\n\n"], "", 1,
- "$prog: chkodr-5c.2:2: is not sorted: \n"],
-
-# Similar, but elicit a warning for each input file (without --check-order).
-['chkodr-5d', '',
- ["a\nx\n\n", "b\ny\n\n"], "", 1,
- "$prog: chkodr-5d.1:3: is not sorted: \n" .
- "$prog: chkodr-5d.2:3: is not sorted: \n" .
- "$prog: input is not in sorted order\n"
- ],
-
-# Similar, but make it so each offending line has no newline.
-['chkodr-5e', '',
- ["a\nx\no", "b\ny\np"], "", 1,
- "$prog: chkodr-5e.1:3: is not sorted: o\n" .
- "$prog: chkodr-5e.2:3: is not sorted: p\n" .
- "$prog: input is not in sorted order\n"
- ],
-
-# Without order check, both inputs out of order and some lines
-# unpairable.  This is NOT supported by the GNU extension.  All that
-# we really care about for this test is that the return status is
-# zero, since that is the only way to actually verify that the
-# --nocheck-order option had any effect.   We don't actually want to
-# guarantee that join produces this output on stdout.
-['chkodr-6', '--nocheck-order',
- [" b 1\n a 2\n", " b Y\n c Z\n"], "b 1 Y\n", 0],
-
-# Before 6.10.143, this would mistakenly fail with the diagnostic:
-# join: File 1 is not in sorted order
-['chkodr-7', '-12', ["2 a\n1 b\n", "2 c\n1 d"], "", 0],
-
-# After 8.9, join doesn't report disorder by default
-# when comparing against an empty input file.
-['chkodr-8', '', ["2 a\n1 b\n", ""], "", 0],
-
-# Test '--header' feature
-['header-1', '--header',
- [ "ID Name\n1 A\n2 B\n", "ID Color\n1 red\n"], "ID Name Color\n1 A red\n", 0],
-
-# '--header' with '--check-order' : The header line is out-of-order but the
-# actual data is in order. This join should succeed.
-['header-2', '--header --check-order',
- ["ID Name\n1 A\n2 B\n", "ID Color\n2 green\n"],
- "ID Name Color\n2 B green\n", 0],
-
-# '--header' with '--check-order' : The header line is out-of-order AND the
-# actual data out-of-order. This join should fail.
-['header-3', '--header --check-order',
- ["ID Name\n2 B\n1 A\n", "ID Color\n2 blue\n"], "ID Name Color\n", 1,
- "$prog: header-3.1:3: is not sorted: 1 A\n"],
-
-# '--header' with specific output format '-o'.
-# output header line should respect the requested format
-['header-4', '--header -o "0,1.3,2.2"',
- ["ID Group Name\n1 Foo A\n2 Bar B\n", "ID Color\n2 blue\n"],
- "ID Name Color\n2 B blue\n", 0],
-
-# '--header' always outputs headers from the first file
-# even if the headers from the second file don't match
-['header-5', '--header',
- [ "ID1 Name\n1 A\n2 B\n", "ID2 Color\n1 red\n"],
-   "ID1 Name Color\n1 A red\n", 0],
-
-# '--header' doesn't check order of a header
-# even if there is no header in the second file
-['header-6', '--header -a1',
- [ "ID1 Name\n1 A\n", ""],
-   "ID1 Name\n1 A\n", 0],
-
-# Zero-terminated lines
-['z1', '-z',
- ["a\0c\0e\0", "a\0b\0c\0"], "a\0c\0", 0],
-
-# not zero-terminated, but related to the code change:
-#  the old readlinebuffer() auto-added '\n' to the last line.
-#  the new readlinebuffer_delim() does not.
-#  Ensure it doesn't matter.
-['z2', '',
- ["a\nc\ne\n", "a\nb\nc"], "a\nc\n", 0],
-['z3', '',
- ["a\nc\ne", "a\nb\nc"], "a\nc\n", 0],
-# missing last NUL at the end of the last line (=end of file)
-['z4', '-z',
- ["a\0c\0e", "a\0b\0c"], "a\0c\0", 0],
-# With -z, embedded newlines are treated as field separators.
-# Note '\n' are converted to ' ' in this case.
-['z5', '-z -a1 -a2',
- ["a\n\n1\0c 3\0", "a 2\0b\n8\0c 9\0"], "a 1 2\0b 8\0c 3 9\0"],
-# One can avoid field processing like:
-['z6', '-z -t ""',
- ["a\n1\n\0", "a\n1\n\0"], "a\n1\n\0"],
-
-);
-
-# Convert the above old-style test vectors to the newer
-# format used by Coreutils.pm.
-
-my @Tests;
-foreach my $t (@tv)
-  {
-    my ($test_name, $flags, $in, $exp, $ret, $err_msg) = @$t;
-    my $new_ent = [$test_name, $flags];
-    if (!ref $in)
-      {
-        push @$new_ent, {IN=>$in};
-      }
-    elsif (ref $in eq 'HASH')
-      {
-        # ignore
-      }
-    else
-      {
-        foreach my $e (@$in)
-          {
-            push @$new_ent, {IN=>$e};
-          }
-      }
-    push @$new_ent, {OUT=>$exp};
-    $ret
-      and push @$new_ent, {EXIT=>$ret}, {ERR=>$err_msg};
-    push @Tests, $new_ent;
-  }
-
-if ($mb_locale ne 'C')
-  {
-    # Duplicate each test vector, appending "-mb" to the test name and
-    # inserting {ENV => "LC_ALL=$mb_locale"} in the copy, so that we
-    # provide coverage for multi-byte code paths.
-    my @new;
-    foreach my $t (@Tests)
-      {
-        my @new_t = @$t;
-        my $test_name = shift @new_t;
-
-        #Adjust the output some error messages including test_name for mb
-        if (grep {ref $_ eq 'HASH' && exists $_->{ERR}}
-             (@new_t))
-          {
-            my $sub2 = {ERR_SUBST => "s/$test_name-mb/$test_name/"};
-            push @new_t, $sub2;
-            push @$t, $sub2;
-          }
-        push @new, ["$test_name-mb", @new_t, {ENV => "LC_ALL=$mb_locale"}];
-      }
-    push @Tests, @new;
-  }
-
-@Tests = triple_test \@Tests;
-
-#skip invalid-j-mb test, it is failing because of the format
-@Tests = grep {$_->[0] ne 'invalid-j-mb'} @Tests;
-
-my $save_temps = $ENV{DEBUG};
-my $verbose = $ENV{VERBOSE};
-
-my $fail = run_tests ($prog, $prog, \@Tests, $save_temps, $verbose);
-exit $fail;
-- 
cgit v1.2.3