From 9622610e55c7d4f81a924387947884b2ac268934 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 9 Oct 2023 17:05:38 -0400 Subject: commit-graph: detect out-of-bounds extra-edges pointers If an entry in a commit-graph file has more than 2 parents, the fixed-size parent fields instead point to an offset within an "extra edges" chunk. We blindly follow these, assuming that the chunk is present and sufficiently large; this can lead to an out-of-bounds read for a corrupt or malicious file. We can fix this by recording the size of the chunk and adding a bounds-check in fill_commit_in_graph(). There are a few tricky bits: 1. We'll switch from working with a pointer to an offset. This makes some corner cases just fall out naturally: a. If we did not find an EDGE chunk at all, our size will correctly be zero (so everything is "out of bounds"). b. Comparing "size / 4" lets us make sure we have at least 4 bytes to read, and we never compute a pointer more than one element past the end of the array (computing a larger pointer is probably OK in practice, but is technically undefined behavior). c. The current code casts to "uint32_t *". Replacing it with an offset avoids any comparison between different types of pointer (since the chunk is stored as "unsigned char *"). 2. This is the first case in which fill_commit_in_graph() may return anything but success. We need to make sure to roll back the "parsed" flag (and any parents we might have added before running out of buffer) so that the caller can cleanly fall back to loading the commit object itself. It's a little non-trivial to do this, and we might benefit from factoring it out. But we can wait on that until we actually see a second case where we return an error. As a bonus, this lets us drop the st_mult() call. Since we've already done a bounds check, we know there won't be any integer overflow (it would imply our buffer is larger than a size_t can hold). The included test does not actually segfault before this patch (though you could construct a case where it does). Instead, it reads garbage from the next chunk which results in it complaining about a bogus parent id. This is sufficient for our needs, though (we care that the fallback succeeds, and that stderr mentions the out-of-bounds read). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- commit-graph.h | 1 + 1 file changed, 1 insertion(+) (limited to 'commit-graph.h') diff --git a/commit-graph.h b/commit-graph.h index 20ada7e891..1f8a9de4fb 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -95,6 +95,7 @@ struct commit_graph { const unsigned char *chunk_generation_data; const unsigned char *chunk_generation_data_overflow; const unsigned char *chunk_extra_edges; + size_t chunk_extra_edges_size; const unsigned char *chunk_base_graphs; const unsigned char *chunk_bloom_indexes; const unsigned char *chunk_bloom_data; -- cgit v1.2.3 From 6cf61d0db55291c3b8406a6ba8f20fdfb9a4a344 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 9 Oct 2023 17:05:41 -0400 Subject: commit-graph: bounds-check base graphs chunk When we are loading a commit-graph chain, we check that each slice of the chain points to the appropriate set of base graphs via its BASE chunk. But since we don't record the size of the chunk, we may access out-of-bounds memory if the file is corrupted. Since we know the number of entries we expect to find (based on the position within the commit-graph-chain file), we can just check the size up front. In theory this would also let us drop the st_mult() call a few lines later when we actually access the memory, since we know that the computed offset will fit in a size_t. But because the operands "g->hash_len" and "n" have types "unsigned char" and "int", we'd have to cast to size_t first. Leaving the st_mult() does that cast, and makes it more obvious that we don't have an overflow problem. Note that the test does not actually segfault before this patch, since it just reads garbage from the chunk after BASE (and indeed, it even rejects the file because that garbage does not have the expected hash value). You could construct a file with BASE at the end that did segfault, but corrupting the existing one is easy, and we can check stderr for the expected message. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- commit-graph.c | 8 +++++++- commit-graph.h | 1 + t/t5324-split-commit-graph.sh | 14 ++++++++++++++ 3 files changed, 22 insertions(+), 1 deletion(-) (limited to 'commit-graph.h') diff --git a/commit-graph.c b/commit-graph.c index e4860841fc..4377b547c8 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -435,7 +435,8 @@ struct commit_graph *parse_commit_graph(struct repo_settings *s, read_chunk(cf, GRAPH_CHUNKID_DATA, graph_read_commit_data, graph); pair_chunk(cf, GRAPH_CHUNKID_EXTRAEDGES, &graph->chunk_extra_edges, &graph->chunk_extra_edges_size); - pair_chunk_unsafe(cf, GRAPH_CHUNKID_BASE, &graph->chunk_base_graphs); + pair_chunk(cf, GRAPH_CHUNKID_BASE, &graph->chunk_base_graphs, + &graph->chunk_base_graphs_size); if (s->commit_graph_generation_version >= 2) { pair_chunk_unsafe(cf, GRAPH_CHUNKID_GENERATION_DATA, @@ -546,6 +547,11 @@ static int add_graph_to_chain(struct commit_graph *g, return 0; } + if (g->chunk_base_graphs_size / g->hash_len < n) { + warning(_("commit-graph base graphs chunk is too small")); + return 0; + } + while (n) { n--; diff --git a/commit-graph.h b/commit-graph.h index 1f8a9de4fb..e4248ea05d 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -97,6 +97,7 @@ struct commit_graph { const unsigned char *chunk_extra_edges; size_t chunk_extra_edges_size; const unsigned char *chunk_base_graphs; + size_t chunk_base_graphs_size; const unsigned char *chunk_bloom_indexes; const unsigned char *chunk_bloom_data; diff --git a/t/t5324-split-commit-graph.sh b/t/t5324-split-commit-graph.sh index 55b5765e2d..3c8482d073 100755 --- a/t/t5324-split-commit-graph.sh +++ b/t/t5324-split-commit-graph.sh @@ -2,6 +2,7 @@ test_description='split commit graph' . ./test-lib.sh +. "$TEST_DIRECTORY"/lib-chunk.sh GIT_TEST_COMMIT_GRAPH=0 GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS=0 @@ -398,6 +399,19 @@ test_expect_success 'verify across alternates' ' ) ' +test_expect_success 'reader bounds-checks base-graph chunk' ' + git clone --no-hardlinks . corrupt-base-chunk && + ( + cd corrupt-base-chunk && + tip_file=$graphdir/graph-$(tail -n 1 $graphdir/commit-graph-chain).graph && + corrupt_chunk_file "$tip_file" BASE clear 01020304 && + git -c core.commitGraph=false log >expect.out && + git -c core.commitGraph=true log >out 2>err && + test_cmp expect.out out && + grep "commit-graph base graphs chunk is too small" err + ) +' + test_expect_success 'add octopus merge' ' git reset --hard commits/10 && git merge commits/3 commits/4 && -- cgit v1.2.3 From ee6a7924124b2a9030da55c94a27829e410b3b64 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 9 Oct 2023 17:05:47 -0400 Subject: commit-graph: bounds-check generation overflow chunk If the generation entry in a commit-graph doesn't fit, we instead insert an offset into a generation overflow chunk. But since we don't record the size of the chunk, we may read outside the chunk if the offset we find on disk is malicious or corrupted. We can't check the size of the chunk up-front; it will vary based on how many entries need overflow. So instead, we'll do a bounds-check before accessing the chunk memory. Unfortunately there is no error-return from this function, so we'll just have to die(), which is what it does for other forms of corruption. As with other cases, we can drop the st_mult() call, since we know our bounds-checked value will fit within a size_t. Before this patch, the test here actually "works" because we read garbage data from the next chunk. And since that garbage data happens not to provide a generation number which changes the output, it appears to work. We could construct a case that actually segfaults or produces wrong output, but it would be a bit tricky. For our purposes its sufficient to check that we've detected the bounds error. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- commit-graph.c | 10 +++++++--- commit-graph.h | 1 + t/t5328-commit-graph-64bit-time.sh | 10 ++++++++++ 3 files changed, 18 insertions(+), 3 deletions(-) (limited to 'commit-graph.h') diff --git a/commit-graph.c b/commit-graph.c index ca26870d1b..f446e76c28 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -451,8 +451,9 @@ struct commit_graph *parse_commit_graph(struct repo_settings *s, if (s->commit_graph_generation_version >= 2) { read_chunk(cf, GRAPH_CHUNKID_GENERATION_DATA, graph_read_generation_data, graph); - pair_chunk_unsafe(cf, GRAPH_CHUNKID_GENERATION_DATA_OVERFLOW, - &graph->chunk_generation_data_overflow); + pair_chunk(cf, GRAPH_CHUNKID_GENERATION_DATA_OVERFLOW, + &graph->chunk_generation_data_overflow, + &graph->chunk_generation_data_overflow_size); if (graph->chunk_generation_data) graph->read_generation_data = 1; @@ -896,7 +897,10 @@ static void fill_commit_graph_info(struct commit *item, struct commit_graph *g, die(_("commit-graph requires overflow generation data but has none")); offset_pos = offset ^ CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW; - graph_data->generation = item->date + get_be64(g->chunk_generation_data_overflow + st_mult(8, offset_pos)); + if (g->chunk_generation_data_overflow_size / sizeof(uint64_t) <= offset_pos) + die(_("commit-graph overflow generation data is too small")); + graph_data->generation = item->date + + get_be64(g->chunk_generation_data_overflow + sizeof(uint64_t) * offset_pos); } else graph_data->generation = item->date + offset; } else diff --git a/commit-graph.h b/commit-graph.h index e4248ea05d..b373f15802 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -94,6 +94,7 @@ struct commit_graph { const unsigned char *chunk_commit_data; const unsigned char *chunk_generation_data; const unsigned char *chunk_generation_data_overflow; + size_t chunk_generation_data_overflow_size; const unsigned char *chunk_extra_edges; size_t chunk_extra_edges_size; const unsigned char *chunk_base_graphs; diff --git a/t/t5328-commit-graph-64bit-time.sh b/t/t5328-commit-graph-64bit-time.sh index e9c521c061..e5ff3e07ad 100755 --- a/t/t5328-commit-graph-64bit-time.sh +++ b/t/t5328-commit-graph-64bit-time.sh @@ -10,6 +10,7 @@ then fi . "$TEST_DIRECTORY"/lib-commit-graph.sh +. "$TEST_DIRECTORY/lib-chunk.sh" UNIX_EPOCH_ZERO="@0 +0000" FUTURE_DATE="@4147483646 +0000" @@ -72,4 +73,13 @@ test_expect_success 'single commit with generation data exceeding UINT32_MAX' ' git -C repo-uint32-max commit-graph verify ' +test_expect_success 'reader notices out-of-bounds generation overflow' ' + graph=.git/objects/info/commit-graph && + test_when_finished "rm -rf $graph" && + git commit-graph write --reachable && + corrupt_chunk_file $graph GDO2 clear && + test_must_fail git log 2>err && + grep "commit-graph overflow generation data is too small" err +' + test_done -- cgit v1.2.3 From 920f400e919c7c51f81adc6989cdd52630220783 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 9 Oct 2023 17:05:50 -0400 Subject: commit-graph: check bounds when accessing BDAT chunk When loading Bloom filters from a commit-graph file, we use the offset values in the BIDX chunk to index into the memory mapped for the BDAT chunk. But since we don't record how big the BDAT chunk is, we just trust that the BIDX offsets won't cause us to read outside of the chunk memory. A corrupted or malicious commit-graph file will cause us to segfault (in practice this isn't a very interesting attack, since commit-graph files are local-only, and the worst case is an out-of-bounds read). We can't fix this by checking the chunk size during parsing, since the data in the BDAT chunk doesn't have a fixed size (that's why we need the BIDX in the first place). So we'll fix it in two parts: 1. Record the BDAT chunk size during parsing, and then later check that the BIDX offsets we look up are within bounds. 2. Because the offsets are relative to the end of the BDAT header, we must also make sure that the BDAT chunk is at least as large as the expected header size. Otherwise, we overflow when trying to move past the header, even for an offset of "0". We can check this early, during the parsing stage. The error messages are rather verbose, but since this is not something you'd expect to see outside of severe bugs or corruption, it makes sense to err on the side of too many details. Sadly we can't mention the filename during the chunk-parsing stage, as we haven't set g->filename at this point, nor passed it down through the stack. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- bloom.c | 24 ++++++++++++++++++++++++ commit-graph.c | 10 ++++++++++ commit-graph.h | 1 + t/t4216-log-bloom.sh | 28 ++++++++++++++++++++++++++++ 4 files changed, 63 insertions(+) (limited to 'commit-graph.h') diff --git a/bloom.c b/bloom.c index aef6b5fea2..61abad7f8c 100644 --- a/bloom.c +++ b/bloom.c @@ -29,6 +29,26 @@ static inline unsigned char get_bitmask(uint32_t pos) return ((unsigned char)1) << (pos & (BITS_PER_WORD - 1)); } +static int check_bloom_offset(struct commit_graph *g, uint32_t pos, + uint32_t offset) +{ + /* + * Note that we allow offsets equal to the data size, which would set + * our pointers at one past the end of the chunk memory. This is + * necessary because the on-disk index points to the end of the + * entries (so we can compute size by comparing adjacent ones). And + * naturally the final entry's end is one-past-the-end of the chunk. + */ + if (offset <= g->chunk_bloom_data_size - BLOOMDATA_CHUNK_HEADER_SIZE) + return 0; + + warning("ignoring out-of-range offset (%"PRIuMAX") for changed-path" + " filter at pos %"PRIuMAX" of %s (chunk size: %"PRIuMAX")", + (uintmax_t)offset, (uintmax_t)pos, + g->filename, (uintmax_t)g->chunk_bloom_data_size); + return -1; +} + static int load_bloom_filter_from_graph(struct commit_graph *g, struct bloom_filter *filter, uint32_t graph_pos) @@ -51,6 +71,10 @@ static int load_bloom_filter_from_graph(struct commit_graph *g, else start_index = 0; + if (check_bloom_offset(g, lex_pos, end_index) < 0 || + check_bloom_offset(g, lex_pos - 1, start_index) < 0) + return 0; + filter->len = end_index - start_index; filter->data = (unsigned char *)(g->chunk_bloom_data + sizeof(unsigned char) * start_index + diff --git a/commit-graph.c b/commit-graph.c index f446e76c28..f7a42be6d0 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -365,7 +365,17 @@ static int graph_read_bloom_data(const unsigned char *chunk_start, { struct commit_graph *g = data; uint32_t hash_version; + + if (chunk_size < BLOOMDATA_CHUNK_HEADER_SIZE) { + warning("ignoring too-small changed-path chunk" + " (%"PRIuMAX" < %"PRIuMAX") in commit-graph file", + (uintmax_t)chunk_size, + (uintmax_t)BLOOMDATA_CHUNK_HEADER_SIZE); + return -1; + } + g->chunk_bloom_data = chunk_start; + g->chunk_bloom_data_size = chunk_size; hash_version = get_be32(chunk_start); if (hash_version != 1) diff --git a/commit-graph.h b/commit-graph.h index b373f15802..c6870274c5 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -101,6 +101,7 @@ struct commit_graph { size_t chunk_base_graphs_size; const unsigned char *chunk_bloom_indexes; const unsigned char *chunk_bloom_data; + size_t chunk_bloom_data_size; struct topo_level_slab *topo_levels; struct bloom_filter_settings *bloom_filter_settings; diff --git a/t/t4216-log-bloom.sh b/t/t4216-log-bloom.sh index fa9d32facf..7a727bcddd 100755 --- a/t/t4216-log-bloom.sh +++ b/t/t4216-log-bloom.sh @@ -5,6 +5,7 @@ GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME . ./test-lib.sh +. "$TEST_DIRECTORY"/lib-chunk.sh GIT_TEST_COMMIT_GRAPH=0 GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS=0 @@ -404,4 +405,31 @@ test_expect_success 'Bloom generation backfills empty commits' ' ) ' +corrupt_graph () { + graph=.git/objects/info/commit-graph && + test_when_finished "rm -rf $graph" && + git commit-graph write --reachable --changed-paths && + corrupt_chunk_file $graph "$@" +} + +check_corrupt_graph () { + corrupt_graph "$@" && + git -c core.commitGraph=false log -- A/B/file2 >expect.out && + git -c core.commitGraph=true log -- A/B/file2 >out 2>err && + test_cmp expect.out out +} + +test_expect_success 'Bloom reader notices too-small data chunk' ' + check_corrupt_graph BDAT clear 00000000 && + echo "warning: ignoring too-small changed-path chunk" \ + "(4 < 12) in commit-graph file" >expect.err && + test_cmp expect.err err +' + +test_expect_success 'Bloom reader notices out-of-bounds filter offsets' ' + check_corrupt_graph BIDX 12 FFFFFFFF && + # use grep to avoid depending on exact chunk size + grep "warning: ignoring out-of-range offset (4294967295) for changed-path filter at pos 3 of .git/objects/info/commit-graph" err +' + test_done -- cgit v1.2.3