diff options
| author | Junio C Hamano <gitster@pobox.com> | 2026-04-17 17:58:45 -0700 |
|---|---|---|
| committer | Junio C Hamano <gitster@pobox.com> | 2026-04-17 17:58:46 -0700 |
| commit | b7e4cbb83467914adc5505d5de5a99b320c7bd46 (patch) | |
| tree | f78e22f7356ce518f88e958cbfc95b5b398927d7 | |
| parent | 8dfa4d6d2a28d783555f6ebbc90fd3c2ba2edc3e (diff) | |
| parent | 41f1ee06d5d6163ae2dcbca1fbd97ef39d8a76bc (diff) | |
| download | git-seen.tar.gz git-seen.zip | |
Merge branch 'en/diffstat-utf8-truncation-fix' into seenseen
The computation to shorten the filenames shown in diffstat measured
width of individual UTF-8 characters to add up, but forgot to take
into account error cases (e.g., an invalid UTF-8 sequence, or a
control character).
* en/diffstat-utf8-truncation-fix:
diff: fix out-of-bounds reads and NULL deref in diffstat UTF-8 truncation
| -rw-r--r-- | diff.c | 26 | ||||
| -rwxr-xr-x | t/t4052-stat-output.sh | 25 |
2 files changed, 49 insertions, 2 deletions
@@ -2927,6 +2927,28 @@ void print_stat_summary(FILE *fp, int files, print_stat_summary_inserts_deletes(&o, files, insertions, deletions); } +/* + * Like utf8_width(), but guaranteed safe for use in loops that subtract + * per-character widths: + * + * - utf8_width() sets *start to NULL on invalid UTF-8 and returns 0; + * we restore the pointer and advance by one byte, returning width 1 + * (matching the strlen()-based fallback in utf8_strwidth()). + * + * - utf8_width() returns -1 for control characters; we return 0 + * (matching utf8_strnwidth() which skips them). + */ +static int utf8_ish_width(const char **start) +{ + const char *old = *start; + int w = utf8_width(start, NULL); + if (!*start) { + *start = old + 1; + return 1; + } + return (w < 0) ? 0 : w; +} + static void show_stats(struct diffstat_t *data, struct diff_options *options) { int i, len, add, del, adds = 0, dels = 0; @@ -3093,8 +3115,8 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options) if (len < 0) len = 0; - while (name_len > len) - name_len -= utf8_width((const char**)&name, NULL); + while (name_len > len && *name) + name_len -= utf8_ish_width((const char**)&name); slash = strchr(name, '/'); if (slash) diff --git a/t/t4052-stat-output.sh b/t/t4052-stat-output.sh index 7c749062e2..84c53c1a51 100755 --- a/t/t4052-stat-output.sh +++ b/t/t4052-stat-output.sh @@ -445,4 +445,29 @@ test_expect_success 'diffstat where line_prefix contains ANSI escape codes is co test_grep "<RED>|<RESET> ${FILENAME_TRIMMED} | 0" out ' +test_expect_success 'diffstat truncation with invalid UTF-8 does not crash' ' + empty_blob=$(git hash-object -w --stdin </dev/null) && + printf "100644 blob $empty_blob\taaa-\300-aaa\n" | + git mktree >tree_file && + tree=$(cat tree_file) && + empty_tree=$(git mktree </dev/null) && + c1=$(git commit-tree -m before $empty_tree) && + c2=$(git commit-tree -m after -p $c1 $tree) && + git -c core.quotepath=false diff --stat --stat-name-width=5 $c1..$c2 >output && + test_grep "| 0" output +' + +test_expect_success FUNNYNAMES 'diffstat truncation with control chars does not crash' ' + FNAME=$(printf "aaa-\x01-aaa") && + git commit --allow-empty -m setup && + >$FNAME && + git add -- $FNAME && + git commit -m "add file with control char name" && + git -c core.quotepath=false diff --stat --stat-name-width=5 HEAD~1..HEAD >output && + test_grep "| 0" output && + rm -- $FNAME && + git rm -- $FNAME && + git commit -m "remove test file" +' + test_done |
