summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJunio C Hamano <gitster@pobox.com>2026-04-17 17:58:45 -0700
committerJunio C Hamano <gitster@pobox.com>2026-04-17 17:58:46 -0700
commitb7e4cbb83467914adc5505d5de5a99b320c7bd46 (patch)
treef78e22f7356ce518f88e958cbfc95b5b398927d7
parent8dfa4d6d2a28d783555f6ebbc90fd3c2ba2edc3e (diff)
parent41f1ee06d5d6163ae2dcbca1fbd97ef39d8a76bc (diff)
downloadgit-seen.tar.gz
git-seen.zip
Merge branch 'en/diffstat-utf8-truncation-fix' into seenseen
The computation to shorten the filenames shown in diffstat measured width of individual UTF-8 characters to add up, but forgot to take into account error cases (e.g., an invalid UTF-8 sequence, or a control character). * en/diffstat-utf8-truncation-fix: diff: fix out-of-bounds reads and NULL deref in diffstat UTF-8 truncation
-rw-r--r--diff.c26
-rwxr-xr-xt/t4052-stat-output.sh25
2 files changed, 49 insertions, 2 deletions
diff --git a/diff.c b/diff.c
index 397e38b41c..1a3b19f71f 100644
--- a/diff.c
+++ b/diff.c
@@ -2927,6 +2927,28 @@ void print_stat_summary(FILE *fp, int files,
print_stat_summary_inserts_deletes(&o, files, insertions, deletions);
}
+/*
+ * Like utf8_width(), but guaranteed safe for use in loops that subtract
+ * per-character widths:
+ *
+ * - utf8_width() sets *start to NULL on invalid UTF-8 and returns 0;
+ * we restore the pointer and advance by one byte, returning width 1
+ * (matching the strlen()-based fallback in utf8_strwidth()).
+ *
+ * - utf8_width() returns -1 for control characters; we return 0
+ * (matching utf8_strnwidth() which skips them).
+ */
+static int utf8_ish_width(const char **start)
+{
+ const char *old = *start;
+ int w = utf8_width(start, NULL);
+ if (!*start) {
+ *start = old + 1;
+ return 1;
+ }
+ return (w < 0) ? 0 : w;
+}
+
static void show_stats(struct diffstat_t *data, struct diff_options *options)
{
int i, len, add, del, adds = 0, dels = 0;
@@ -3093,8 +3115,8 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options)
if (len < 0)
len = 0;
- while (name_len > len)
- name_len -= utf8_width((const char**)&name, NULL);
+ while (name_len > len && *name)
+ name_len -= utf8_ish_width((const char**)&name);
slash = strchr(name, '/');
if (slash)
diff --git a/t/t4052-stat-output.sh b/t/t4052-stat-output.sh
index 7c749062e2..84c53c1a51 100755
--- a/t/t4052-stat-output.sh
+++ b/t/t4052-stat-output.sh
@@ -445,4 +445,29 @@ test_expect_success 'diffstat where line_prefix contains ANSI escape codes is co
test_grep "<RED>|<RESET> ${FILENAME_TRIMMED} | 0" out
'
+test_expect_success 'diffstat truncation with invalid UTF-8 does not crash' '
+ empty_blob=$(git hash-object -w --stdin </dev/null) &&
+ printf "100644 blob $empty_blob\taaa-\300-aaa\n" |
+ git mktree >tree_file &&
+ tree=$(cat tree_file) &&
+ empty_tree=$(git mktree </dev/null) &&
+ c1=$(git commit-tree -m before $empty_tree) &&
+ c2=$(git commit-tree -m after -p $c1 $tree) &&
+ git -c core.quotepath=false diff --stat --stat-name-width=5 $c1..$c2 >output &&
+ test_grep "| 0" output
+'
+
+test_expect_success FUNNYNAMES 'diffstat truncation with control chars does not crash' '
+ FNAME=$(printf "aaa-\x01-aaa") &&
+ git commit --allow-empty -m setup &&
+ >$FNAME &&
+ git add -- $FNAME &&
+ git commit -m "add file with control char name" &&
+ git -c core.quotepath=false diff --stat --stat-name-width=5 HEAD~1..HEAD >output &&
+ test_grep "| 0" output &&
+ rm -- $FNAME &&
+ git rm -- $FNAME &&
+ git commit -m "remove test file"
+'
+
test_done