From e103f7276f0d809c2935ebc1a3d68c6bbfaed23d Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 12 Jun 2019 06:29:37 -0700 Subject: commit-graph: return with errors during write The write_commit_graph() method uses die() to report failure and exit when confronted with an unexpected condition. This use of die() in a library function is incorrect and is now replaced by error() statements and an int return type. Return zero on success and a negative value on failure. Now that we use 'goto cleanup' to jump to the terminal condition on an error, we have new paths that could lead to uninitialized values. New initializers are added to correct for this. The builtins 'commit-graph', 'gc', and 'commit' call these methods, so update them to check the return value. Test that 'git commit-graph write' returns a proper error code when hitting a failure condition in write_commit_graph(). Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-graph.c | 60 +++++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 41 insertions(+), 19 deletions(-) (limited to 'commit-graph.c') diff --git a/commit-graph.c b/commit-graph.c index 66865acbd7..1b58d1da14 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -851,27 +851,30 @@ static int add_ref_to_list(const char *refname, return 0; } -void write_commit_graph_reachable(const char *obj_dir, int append, - int report_progress) +int write_commit_graph_reachable(const char *obj_dir, int append, + int report_progress) { struct string_list list = STRING_LIST_INIT_DUP; + int result; for_each_ref(add_ref_to_list, &list); - write_commit_graph(obj_dir, NULL, &list, append, report_progress); + result = write_commit_graph(obj_dir, NULL, &list, + append, report_progress); string_list_clear(&list, 0); + return result; } -void write_commit_graph(const char *obj_dir, - struct string_list *pack_indexes, - struct string_list *commit_hex, - int append, int report_progress) +int write_commit_graph(const char *obj_dir, + struct string_list *pack_indexes, + struct string_list *commit_hex, + int append, int report_progress) { struct packed_oid_list oids; struct packed_commit_list commits; struct hashfile *f; uint32_t i, count_distinct = 0; - char *graph_name; + char *graph_name = NULL; struct lock_file lk = LOCK_INIT; uint32_t chunk_ids[5]; uint64_t chunk_offsets[5]; @@ -883,15 +886,17 @@ void write_commit_graph(const char *obj_dir, uint64_t progress_cnt = 0; struct strbuf progress_title = STRBUF_INIT; unsigned long approx_nr_objects; + int res = 0; if (!commit_graph_compatible(the_repository)) - return; + return 0; oids.nr = 0; approx_nr_objects = approximate_object_count(); oids.alloc = approx_nr_objects / 32; oids.progress = NULL; oids.progress_done = 0; + commits.list = NULL; if (append) { prepare_commit_graph_one(the_repository, obj_dir); @@ -932,10 +937,16 @@ void write_commit_graph(const char *obj_dir, strbuf_setlen(&packname, dirlen); strbuf_addstr(&packname, pack_indexes->items[i].string); p = add_packed_git(packname.buf, packname.len, 1); - if (!p) - die(_("error adding pack %s"), packname.buf); - if (open_pack_index(p)) - die(_("error opening index for %s"), packname.buf); + if (!p) { + error(_("error adding pack %s"), packname.buf); + res = -1; + goto cleanup; + } + if (open_pack_index(p)) { + error(_("error opening index for %s"), packname.buf); + res = -1; + goto cleanup; + } for_each_object_in_pack(p, add_packed_commits, &oids, FOR_EACH_OBJECT_PACK_ORDER); close_pack(p); @@ -1006,8 +1017,11 @@ void write_commit_graph(const char *obj_dir, } stop_progress(&progress); - if (count_distinct >= GRAPH_EDGE_LAST_MASK) - die(_("the commit graph format cannot write %d commits"), count_distinct); + if (count_distinct >= GRAPH_EDGE_LAST_MASK) { + error(_("the commit graph format cannot write %d commits"), count_distinct); + res = -1; + goto cleanup; + } commits.nr = 0; commits.alloc = count_distinct; @@ -1039,16 +1053,21 @@ void write_commit_graph(const char *obj_dir, num_chunks = num_extra_edges ? 4 : 3; stop_progress(&progress); - if (commits.nr >= GRAPH_EDGE_LAST_MASK) - die(_("too many commits to write graph")); + if (commits.nr >= GRAPH_EDGE_LAST_MASK) { + error(_("too many commits to write graph")); + res = -1; + goto cleanup; + } compute_generation_numbers(&commits, report_progress); graph_name = get_commit_graph_filename(obj_dir); if (safe_create_leading_directories(graph_name)) { UNLEAK(graph_name); - die_errno(_("unable to create leading directories of %s"), - graph_name); + error(_("unable to create leading directories of %s"), + graph_name); + res = -1; + goto cleanup; } hold_lock_file_for_update(&lk, graph_name, LOCK_DIE_ON_ERROR); @@ -1107,9 +1126,12 @@ void write_commit_graph(const char *obj_dir, finalize_hashfile(f, NULL, CSUM_HASH_IN_STREAM | CSUM_FSYNC); commit_lock_file(&lk); +cleanup: free(graph_name); free(commits.list); free(oids.list); + + return res; } #define VERIFY_COMMIT_GRAPH_ERROR_HASH 2 -- cgit v1.2.3 From 5af803945212af875670582ff153ee05ec368b83 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 12 Jun 2019 06:29:38 -0700 Subject: commit-graph: collapse parameters into flags The write_commit_graph() and write_commit_graph_reachable() methods currently take two boolean parameters: 'append' and 'report_progress'. As we update these methods, adding more parameters this way becomes cluttered and hard to maintain. Collapse these parameters into a 'flags' parameter, and adjust the callers to provide flags as necessary. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- builtin/commit-graph.c | 8 +++++--- builtin/commit.c | 2 +- builtin/gc.c | 4 ++-- commit-graph.c | 9 +++++---- commit-graph.h | 8 +++++--- 5 files changed, 18 insertions(+), 13 deletions(-) (limited to 'commit-graph.c') diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c index 2a1c4d701f..d8efa5bab2 100644 --- a/builtin/commit-graph.c +++ b/builtin/commit-graph.c @@ -142,6 +142,7 @@ static int graph_write(int argc, const char **argv) struct string_list *commit_hex = NULL; struct string_list lines; int result = 0; + unsigned int flags = COMMIT_GRAPH_PROGRESS; static struct option builtin_commit_graph_write_options[] = { OPT_STRING(0, "object-dir", &opts.obj_dir, @@ -166,11 +167,13 @@ static int graph_write(int argc, const char **argv) die(_("use at most one of --reachable, --stdin-commits, or --stdin-packs")); if (!opts.obj_dir) opts.obj_dir = get_object_directory(); + if (opts.append) + flags |= COMMIT_GRAPH_APPEND; read_replace_refs = 0; if (opts.reachable) - return write_commit_graph_reachable(opts.obj_dir, opts.append, 1); + return write_commit_graph_reachable(opts.obj_dir, flags); string_list_init(&lines, 0); if (opts.stdin_packs || opts.stdin_commits) { @@ -190,8 +193,7 @@ static int graph_write(int argc, const char **argv) if (write_commit_graph(opts.obj_dir, pack_indexes, commit_hex, - opts.append, - 1)) + flags)) result = 1; UNLEAK(lines); diff --git a/builtin/commit.c b/builtin/commit.c index b9ea7222fa..b001ef565d 100644 --- a/builtin/commit.c +++ b/builtin/commit.c @@ -1670,7 +1670,7 @@ int cmd_commit(int argc, const char **argv, const char *prefix) "not exceeded, and then \"git reset HEAD\" to recover.")); if (git_env_bool(GIT_TEST_COMMIT_GRAPH, 0) && - write_commit_graph_reachable(get_object_directory(), 0, 0)) + write_commit_graph_reachable(get_object_directory(), 0)) return 1; repo_rerere(the_repository, 0); diff --git a/builtin/gc.c b/builtin/gc.c index 3984addf73..df2573f124 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -665,8 +665,8 @@ int cmd_gc(int argc, const char **argv, const char *prefix) } if (gc_write_commit_graph && - write_commit_graph_reachable(get_object_directory(), 0, - !quiet && !daemonized)) + write_commit_graph_reachable(get_object_directory(), + !quiet && !daemonized ? COMMIT_GRAPH_PROGRESS : 0)) return 1; if (auto_gc && too_many_loose_objects()) diff --git a/commit-graph.c b/commit-graph.c index 1b58d1da14..fc40b531af 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -851,15 +851,14 @@ static int add_ref_to_list(const char *refname, return 0; } -int write_commit_graph_reachable(const char *obj_dir, int append, - int report_progress) +int write_commit_graph_reachable(const char *obj_dir, unsigned int flags) { struct string_list list = STRING_LIST_INIT_DUP; int result; for_each_ref(add_ref_to_list, &list); result = write_commit_graph(obj_dir, NULL, &list, - append, report_progress); + flags); string_list_clear(&list, 0); return result; @@ -868,7 +867,7 @@ int write_commit_graph_reachable(const char *obj_dir, int append, int write_commit_graph(const char *obj_dir, struct string_list *pack_indexes, struct string_list *commit_hex, - int append, int report_progress) + unsigned int flags) { struct packed_oid_list oids; struct packed_commit_list commits; @@ -887,6 +886,8 @@ int write_commit_graph(const char *obj_dir, struct strbuf progress_title = STRBUF_INIT; unsigned long approx_nr_objects; int res = 0; + int append = flags & COMMIT_GRAPH_APPEND; + int report_progress = flags & COMMIT_GRAPH_PROGRESS; if (!commit_graph_compatible(the_repository)) return 0; diff --git a/commit-graph.h b/commit-graph.h index 869717ca19..01538b5cf5 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -65,18 +65,20 @@ struct commit_graph *parse_commit_graph(void *graph_map, int fd, */ int generation_numbers_enabled(struct repository *r); +#define COMMIT_GRAPH_APPEND (1 << 0) +#define COMMIT_GRAPH_PROGRESS (1 << 1) + /* * The write_commit_graph* methods return zero on success * and a negative value on failure. Note that if the repository * is not compatible with the commit-graph feature, then the * methods will return 0 without writing a commit-graph. */ -int write_commit_graph_reachable(const char *obj_dir, int append, - int report_progress); +int write_commit_graph_reachable(const char *obj_dir, unsigned int flags); int write_commit_graph(const char *obj_dir, struct string_list *pack_indexes, struct string_list *commit_hex, - int append, int report_progress); + unsigned int flags); int verify_commit_graph(struct repository *r, struct commit_graph *g); -- cgit v1.2.3 From c9905beade13efff6be9c15ebe03d07fe5278ccc Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 12 Jun 2019 06:29:40 -0700 Subject: commit-graph: create write_commit_graph_context The write_commit_graph() method is too large and complex. To simplify it, we should extract several helper functions. However, we will risk repeating a lot of declarations related to progress incidators and object id or commit lists. Create a new write_commit_graph_context struct that contains the core data structures used in this process. Replace the other local variables with the values inside the context object. Following this change, we will start to lift code segments wholesale out of the write_commit_graph() method and into helper functions. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-graph.c | 390 ++++++++++++++++++++++++++++----------------------------- 1 file changed, 194 insertions(+), 196 deletions(-) (limited to 'commit-graph.c') diff --git a/commit-graph.c b/commit-graph.c index fc40b531af..6d7e83cfe8 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -518,14 +518,38 @@ struct tree *get_commit_tree_in_graph(struct repository *r, const struct commit return get_commit_tree_in_graph_one(r, r->objects->commit_graph, c); } +struct packed_commit_list { + struct commit **list; + int nr; + int alloc; +}; + +struct packed_oid_list { + struct object_id *list; + int nr; + int alloc; +}; + +struct write_commit_graph_context { + struct repository *r; + const char *obj_dir; + char *graph_name; + struct packed_oid_list oids; + struct packed_commit_list commits; + int num_extra_edges; + unsigned long approx_nr_objects; + struct progress *progress; + int progress_done; + uint64_t progress_cnt; + unsigned append:1, + report_progress:1; +}; + static void write_graph_chunk_fanout(struct hashfile *f, - struct commit **commits, - int nr_commits, - struct progress *progress, - uint64_t *progress_cnt) + struct write_commit_graph_context *ctx) { int i, count = 0; - struct commit **list = commits; + struct commit **list = ctx->commits.list; /* * Write the first-level table (the list is sorted, @@ -533,10 +557,10 @@ static void write_graph_chunk_fanout(struct hashfile *f, * having to do eight extra binary search iterations). */ for (i = 0; i < 256; i++) { - while (count < nr_commits) { + while (count < ctx->commits.nr) { if ((*list)->object.oid.hash[0] != i) break; - display_progress(progress, ++*progress_cnt); + display_progress(ctx->progress, ++ctx->progress_cnt); count++; list++; } @@ -546,14 +570,12 @@ static void write_graph_chunk_fanout(struct hashfile *f, } static void write_graph_chunk_oids(struct hashfile *f, int hash_len, - struct commit **commits, int nr_commits, - struct progress *progress, - uint64_t *progress_cnt) + struct write_commit_graph_context *ctx) { - struct commit **list = commits; + struct commit **list = ctx->commits.list; int count; - for (count = 0; count < nr_commits; count++, list++) { - display_progress(progress, ++*progress_cnt); + for (count = 0; count < ctx->commits.nr; count++, list++) { + display_progress(ctx->progress, ++ctx->progress_cnt); hashwrite(f, (*list)->object.oid.hash, (int)hash_len); } } @@ -565,19 +587,17 @@ static const unsigned char *commit_to_sha1(size_t index, void *table) } static void write_graph_chunk_data(struct hashfile *f, int hash_len, - struct commit **commits, int nr_commits, - struct progress *progress, - uint64_t *progress_cnt) + struct write_commit_graph_context *ctx) { - struct commit **list = commits; - struct commit **last = commits + nr_commits; + struct commit **list = ctx->commits.list; + struct commit **last = ctx->commits.list + ctx->commits.nr; uint32_t num_extra_edges = 0; while (list < last) { struct commit_list *parent; int edge_value; uint32_t packedDate[2]; - display_progress(progress, ++*progress_cnt); + display_progress(ctx->progress, ++ctx->progress_cnt); parse_commit_no_graph(*list); hashwrite(f, get_commit_tree_oid(*list)->hash, hash_len); @@ -588,8 +608,8 @@ static void write_graph_chunk_data(struct hashfile *f, int hash_len, edge_value = GRAPH_PARENT_NONE; else { edge_value = sha1_pos(parent->item->object.oid.hash, - commits, - nr_commits, + ctx->commits.list, + ctx->commits.nr, commit_to_sha1); if (edge_value < 0) @@ -609,8 +629,8 @@ static void write_graph_chunk_data(struct hashfile *f, int hash_len, edge_value = GRAPH_EXTRA_EDGES_NEEDED | num_extra_edges; else { edge_value = sha1_pos(parent->item->object.oid.hash, - commits, - nr_commits, + ctx->commits.list, + ctx->commits.nr, commit_to_sha1); if (edge_value < 0) BUG("missing parent %s for commit %s", @@ -642,19 +662,16 @@ static void write_graph_chunk_data(struct hashfile *f, int hash_len, } static void write_graph_chunk_extra_edges(struct hashfile *f, - struct commit **commits, - int nr_commits, - struct progress *progress, - uint64_t *progress_cnt) + struct write_commit_graph_context *ctx) { - struct commit **list = commits; - struct commit **last = commits + nr_commits; + struct commit **list = ctx->commits.list; + struct commit **last = ctx->commits.list + ctx->commits.nr; struct commit_list *parent; while (list < last) { int num_parents = 0; - display_progress(progress, ++*progress_cnt); + display_progress(ctx->progress, ++ctx->progress_cnt); for (parent = (*list)->parents; num_parents < 3 && parent; parent = parent->next) @@ -668,8 +685,8 @@ static void write_graph_chunk_extra_edges(struct hashfile *f, /* Since num_parents > 2, this initializer is safe. */ for (parent = (*list)->parents->next; parent; parent = parent->next) { int edge_value = sha1_pos(parent->item->object.oid.hash, - commits, - nr_commits, + ctx->commits.list, + ctx->commits.nr, commit_to_sha1); if (edge_value < 0) @@ -693,125 +710,111 @@ static int commit_compare(const void *_a, const void *_b) return oidcmp(a, b); } -struct packed_commit_list { - struct commit **list; - int nr; - int alloc; -}; - -struct packed_oid_list { - struct object_id *list; - int nr; - int alloc; - struct progress *progress; - int progress_done; -}; - static int add_packed_commits(const struct object_id *oid, struct packed_git *pack, uint32_t pos, void *data) { - struct packed_oid_list *list = (struct packed_oid_list*)data; + struct write_commit_graph_context *ctx = (struct write_commit_graph_context*)data; enum object_type type; off_t offset = nth_packed_object_offset(pack, pos); struct object_info oi = OBJECT_INFO_INIT; - if (list->progress) - display_progress(list->progress, ++list->progress_done); + if (ctx->progress) + display_progress(ctx->progress, ++ctx->progress_done); oi.typep = &type; - if (packed_object_info(the_repository, pack, offset, &oi) < 0) + if (packed_object_info(ctx->r, pack, offset, &oi) < 0) die(_("unable to get type of object %s"), oid_to_hex(oid)); if (type != OBJ_COMMIT) return 0; - ALLOC_GROW(list->list, list->nr + 1, list->alloc); - oidcpy(&(list->list[list->nr]), oid); - list->nr++; + ALLOC_GROW(ctx->oids.list, ctx->oids.nr + 1, ctx->oids.alloc); + oidcpy(&(ctx->oids.list[ctx->oids.nr]), oid); + ctx->oids.nr++; return 0; } -static void add_missing_parents(struct packed_oid_list *oids, struct commit *commit) +static void add_missing_parents(struct write_commit_graph_context *ctx, struct commit *commit) { struct commit_list *parent; for (parent = commit->parents; parent; parent = parent->next) { if (!(parent->item->object.flags & UNINTERESTING)) { - ALLOC_GROW(oids->list, oids->nr + 1, oids->alloc); - oidcpy(&oids->list[oids->nr], &(parent->item->object.oid)); - oids->nr++; + ALLOC_GROW(ctx->oids.list, ctx->oids.nr + 1, ctx->oids.alloc); + oidcpy(&ctx->oids.list[ctx->oids.nr], &(parent->item->object.oid)); + ctx->oids.nr++; parent->item->object.flags |= UNINTERESTING; } } } -static void close_reachable(struct packed_oid_list *oids, int report_progress) +static void close_reachable(struct write_commit_graph_context *ctx) { int i; struct commit *commit; - struct progress *progress = NULL; - if (report_progress) - progress = start_delayed_progress( - _("Loading known commits in commit graph"), oids->nr); - for (i = 0; i < oids->nr; i++) { - display_progress(progress, i + 1); - commit = lookup_commit(the_repository, &oids->list[i]); + if (ctx->report_progress) + ctx->progress = start_delayed_progress( + _("Loading known commits in commit graph"), + ctx->oids.nr); + for (i = 0; i < ctx->oids.nr; i++) { + display_progress(ctx->progress, i + 1); + commit = lookup_commit(ctx->r, &ctx->oids.list[i]); if (commit) commit->object.flags |= UNINTERESTING; } - stop_progress(&progress); + stop_progress(&ctx->progress); /* - * As this loop runs, oids->nr may grow, but not more + * As this loop runs, ctx->oids.nr may grow, but not more * than the number of missing commits in the reachable * closure. */ - if (report_progress) - progress = start_delayed_progress( - _("Expanding reachable commits in commit graph"), oids->nr); - for (i = 0; i < oids->nr; i++) { - display_progress(progress, i + 1); - commit = lookup_commit(the_repository, &oids->list[i]); + if (ctx->report_progress) + ctx->progress = start_delayed_progress( + _("Expanding reachable commits in commit graph"), + ctx->oids.nr); + for (i = 0; i < ctx->oids.nr; i++) { + display_progress(ctx->progress, i + 1); + commit = lookup_commit(ctx->r, &ctx->oids.list[i]); if (commit && !parse_commit_no_graph(commit)) - add_missing_parents(oids, commit); + add_missing_parents(ctx, commit); } - stop_progress(&progress); + stop_progress(&ctx->progress); - if (report_progress) - progress = start_delayed_progress( - _("Clearing commit marks in commit graph"), oids->nr); - for (i = 0; i < oids->nr; i++) { - display_progress(progress, i + 1); - commit = lookup_commit(the_repository, &oids->list[i]); + if (ctx->report_progress) + ctx->progress = start_delayed_progress( + _("Clearing commit marks in commit graph"), + ctx->oids.nr); + for (i = 0; i < ctx->oids.nr; i++) { + display_progress(ctx->progress, i + 1); + commit = lookup_commit(ctx->r, &ctx->oids.list[i]); if (commit) commit->object.flags &= ~UNINTERESTING; } - stop_progress(&progress); + stop_progress(&ctx->progress); } -static void compute_generation_numbers(struct packed_commit_list* commits, - int report_progress) +static void compute_generation_numbers(struct write_commit_graph_context *ctx) { int i; struct commit_list *list = NULL; - struct progress *progress = NULL; - if (report_progress) - progress = start_progress( - _("Computing commit graph generation numbers"), - commits->nr); - for (i = 0; i < commits->nr; i++) { - display_progress(progress, i + 1); - if (commits->list[i]->generation != GENERATION_NUMBER_INFINITY && - commits->list[i]->generation != GENERATION_NUMBER_ZERO) + if (ctx->report_progress) + ctx->progress = start_progress( + _("Computing commit graph generation numbers"), + ctx->commits.nr); + for (i = 0; i < ctx->commits.nr; i++) { + display_progress(ctx->progress, i + 1); + if (ctx->commits.list[i]->generation != GENERATION_NUMBER_INFINITY && + ctx->commits.list[i]->generation != GENERATION_NUMBER_ZERO) continue; - commit_list_insert(commits->list[i], &list); + commit_list_insert(ctx->commits.list[i], &list); while (list) { struct commit *current = list->item; struct commit_list *parent; @@ -838,7 +841,7 @@ static void compute_generation_numbers(struct packed_commit_list* commits, } } } - stop_progress(&progress); + stop_progress(&ctx->progress); } static int add_ref_to_list(const char *refname, @@ -869,8 +872,7 @@ int write_commit_graph(const char *obj_dir, struct string_list *commit_hex, unsigned int flags) { - struct packed_oid_list oids; - struct packed_commit_list commits; + struct write_commit_graph_context *ctx; struct hashfile *f; uint32_t i, count_distinct = 0; char *graph_name = NULL; @@ -878,44 +880,38 @@ int write_commit_graph(const char *obj_dir, uint32_t chunk_ids[5]; uint64_t chunk_offsets[5]; int num_chunks; - int num_extra_edges; struct commit_list *parent; - struct progress *progress = NULL; const unsigned hashsz = the_hash_algo->rawsz; - uint64_t progress_cnt = 0; struct strbuf progress_title = STRBUF_INIT; - unsigned long approx_nr_objects; int res = 0; - int append = flags & COMMIT_GRAPH_APPEND; - int report_progress = flags & COMMIT_GRAPH_PROGRESS; if (!commit_graph_compatible(the_repository)) return 0; - oids.nr = 0; - approx_nr_objects = approximate_object_count(); - oids.alloc = approx_nr_objects / 32; - oids.progress = NULL; - oids.progress_done = 0; - commits.list = NULL; - - if (append) { - prepare_commit_graph_one(the_repository, obj_dir); - if (the_repository->objects->commit_graph) - oids.alloc += the_repository->objects->commit_graph->num_commits; + ctx = xcalloc(1, sizeof(struct write_commit_graph_context)); + ctx->r = the_repository; + ctx->obj_dir = obj_dir; + ctx->append = flags & COMMIT_GRAPH_APPEND ? 1 : 0; + ctx->report_progress = flags & COMMIT_GRAPH_PROGRESS ? 1 : 0; + + ctx->approx_nr_objects = approximate_object_count(); + ctx->oids.alloc = ctx->approx_nr_objects / 32; + + if (ctx->append) { + prepare_commit_graph_one(ctx->r, ctx->obj_dir); + if (ctx->r->objects->commit_graph) + ctx->oids.alloc += ctx->r->objects->commit_graph->num_commits; } - if (oids.alloc < 1024) - oids.alloc = 1024; - ALLOC_ARRAY(oids.list, oids.alloc); - - if (append && the_repository->objects->commit_graph) { - struct commit_graph *commit_graph = - the_repository->objects->commit_graph; - for (i = 0; i < commit_graph->num_commits; i++) { - const unsigned char *hash = commit_graph->chunk_oid_lookup + - commit_graph->hash_len * i; - hashcpy(oids.list[oids.nr++].hash, hash); + if (ctx->oids.alloc < 1024) + ctx->oids.alloc = 1024; + ALLOC_ARRAY(ctx->oids.list, ctx->oids.alloc); + + if (ctx->append && ctx->r->objects->commit_graph) { + struct commit_graph *g = ctx->r->objects->commit_graph; + for (i = 0; i < g->num_commits; i++) { + const unsigned char *hash = g->chunk_oid_lookup + g->hash_len * i; + hashcpy(ctx->oids.list[ctx->oids.nr++].hash, hash); } } @@ -924,14 +920,14 @@ int write_commit_graph(const char *obj_dir, int dirlen; strbuf_addf(&packname, "%s/pack/", obj_dir); dirlen = packname.len; - if (report_progress) { + if (ctx->report_progress) { strbuf_addf(&progress_title, Q_("Finding commits for commit graph in %d pack", "Finding commits for commit graph in %d packs", pack_indexes->nr), pack_indexes->nr); - oids.progress = start_delayed_progress(progress_title.buf, 0); - oids.progress_done = 0; + ctx->progress = start_delayed_progress(progress_title.buf, 0); + ctx->progress_done = 0; } for (i = 0; i < pack_indexes->nr; i++) { struct packed_git *p; @@ -948,75 +944,76 @@ int write_commit_graph(const char *obj_dir, res = -1; goto cleanup; } - for_each_object_in_pack(p, add_packed_commits, &oids, + for_each_object_in_pack(p, add_packed_commits, ctx, FOR_EACH_OBJECT_PACK_ORDER); close_pack(p); free(p); } - stop_progress(&oids.progress); + stop_progress(&ctx->progress); strbuf_reset(&progress_title); strbuf_release(&packname); } if (commit_hex) { - if (report_progress) { + if (ctx->report_progress) { strbuf_addf(&progress_title, Q_("Finding commits for commit graph from %d ref", "Finding commits for commit graph from %d refs", commit_hex->nr), commit_hex->nr); - progress = start_delayed_progress(progress_title.buf, - commit_hex->nr); + ctx->progress = start_delayed_progress( + progress_title.buf, + commit_hex->nr); } for (i = 0; i < commit_hex->nr; i++) { const char *end; struct object_id oid; struct commit *result; - display_progress(progress, i + 1); + display_progress(ctx->progress, i + 1); if (commit_hex->items[i].string && parse_oid_hex(commit_hex->items[i].string, &oid, &end)) continue; - result = lookup_commit_reference_gently(the_repository, &oid, 1); + result = lookup_commit_reference_gently(ctx->r, &oid, 1); if (result) { - ALLOC_GROW(oids.list, oids.nr + 1, oids.alloc); - oidcpy(&oids.list[oids.nr], &(result->object.oid)); - oids.nr++; + ALLOC_GROW(ctx->oids.list, ctx->oids.nr + 1, ctx->oids.alloc); + oidcpy(&ctx->oids.list[ctx->oids.nr], &(result->object.oid)); + ctx->oids.nr++; } } - stop_progress(&progress); + stop_progress(&ctx->progress); strbuf_reset(&progress_title); } if (!pack_indexes && !commit_hex) { - if (report_progress) - oids.progress = start_delayed_progress( + if (ctx->report_progress) + ctx->progress = start_delayed_progress( _("Finding commits for commit graph among packed objects"), - approx_nr_objects); - for_each_packed_object(add_packed_commits, &oids, + ctx->approx_nr_objects); + for_each_packed_object(add_packed_commits, ctx, FOR_EACH_OBJECT_PACK_ORDER); - if (oids.progress_done < approx_nr_objects) - display_progress(oids.progress, approx_nr_objects); - stop_progress(&oids.progress); + if (ctx->progress_done < ctx->approx_nr_objects) + display_progress(ctx->progress, ctx->approx_nr_objects); + stop_progress(&ctx->progress); } - close_reachable(&oids, report_progress); + close_reachable(ctx); - if (report_progress) - progress = start_delayed_progress( + if (ctx->report_progress) + ctx->progress = start_delayed_progress( _("Counting distinct commits in commit graph"), - oids.nr); - display_progress(progress, 0); /* TODO: Measure QSORT() progress */ - QSORT(oids.list, oids.nr, commit_compare); + ctx->oids.nr); + display_progress(ctx->progress, 0); /* TODO: Measure QSORT() progress */ + QSORT(ctx->oids.list, ctx->oids.nr, commit_compare); count_distinct = 1; - for (i = 1; i < oids.nr; i++) { - display_progress(progress, i + 1); - if (!oideq(&oids.list[i - 1], &oids.list[i])) + for (i = 1; i < ctx->oids.nr; i++) { + display_progress(ctx->progress, i + 1); + if (!oideq(&ctx->oids.list[i - 1], &ctx->oids.list[i])) count_distinct++; } - stop_progress(&progress); + stop_progress(&ctx->progress); if (count_distinct >= GRAPH_EDGE_LAST_MASK) { error(_("the commit graph format cannot write %d commits"), count_distinct); @@ -1024,54 +1021,54 @@ int write_commit_graph(const char *obj_dir, goto cleanup; } - commits.nr = 0; - commits.alloc = count_distinct; - ALLOC_ARRAY(commits.list, commits.alloc); + ctx->commits.alloc = count_distinct; + ALLOC_ARRAY(ctx->commits.list, ctx->commits.alloc); - num_extra_edges = 0; - if (report_progress) - progress = start_delayed_progress( + ctx->num_extra_edges = 0; + if (ctx->report_progress) + ctx->progress = start_delayed_progress( _("Finding extra edges in commit graph"), - oids.nr); - for (i = 0; i < oids.nr; i++) { + ctx->oids.nr); + for (i = 0; i < ctx->oids.nr; i++) { int num_parents = 0; - display_progress(progress, i + 1); - if (i > 0 && oideq(&oids.list[i - 1], &oids.list[i])) + display_progress(ctx->progress, i + 1); + if (i > 0 && oideq(&ctx->oids.list[i - 1], &ctx->oids.list[i])) continue; - commits.list[commits.nr] = lookup_commit(the_repository, &oids.list[i]); - parse_commit_no_graph(commits.list[commits.nr]); + ctx->commits.list[ctx->commits.nr] = lookup_commit(ctx->r, &ctx->oids.list[i]); + parse_commit_no_graph(ctx->commits.list[ctx->commits.nr]); - for (parent = commits.list[commits.nr]->parents; + for (parent = ctx->commits.list[ctx->commits.nr]->parents; parent; parent = parent->next) num_parents++; if (num_parents > 2) - num_extra_edges += num_parents - 1; + ctx->num_extra_edges += num_parents - 1; - commits.nr++; + ctx->commits.nr++; } - num_chunks = num_extra_edges ? 4 : 3; - stop_progress(&progress); + stop_progress(&ctx->progress); - if (commits.nr >= GRAPH_EDGE_LAST_MASK) { + if (ctx->commits.nr >= GRAPH_EDGE_LAST_MASK) { error(_("too many commits to write graph")); res = -1; goto cleanup; } - compute_generation_numbers(&commits, report_progress); + compute_generation_numbers(ctx); - graph_name = get_commit_graph_filename(obj_dir); - if (safe_create_leading_directories(graph_name)) { - UNLEAK(graph_name); + num_chunks = ctx->num_extra_edges ? 4 : 3; + + ctx->graph_name = get_commit_graph_filename(ctx->obj_dir); + if (safe_create_leading_directories(ctx->graph_name)) { + UNLEAK(ctx->graph_name); error(_("unable to create leading directories of %s"), - graph_name); + ctx->graph_name); res = -1; goto cleanup; } - hold_lock_file_for_update(&lk, graph_name, LOCK_DIE_ON_ERROR); + hold_lock_file_for_update(&lk, ctx->graph_name, LOCK_DIE_ON_ERROR); f = hashfd(lk.tempfile->fd, lk.tempfile->filename.buf); hashwrite_be32(f, GRAPH_SIGNATURE); @@ -1084,7 +1081,7 @@ int write_commit_graph(const char *obj_dir, chunk_ids[0] = GRAPH_CHUNKID_OIDFANOUT; chunk_ids[1] = GRAPH_CHUNKID_OIDLOOKUP; chunk_ids[2] = GRAPH_CHUNKID_DATA; - if (num_extra_edges) + if (ctx->num_extra_edges) chunk_ids[3] = GRAPH_CHUNKID_EXTRAEDGES; else chunk_ids[3] = 0; @@ -1092,9 +1089,9 @@ int write_commit_graph(const char *obj_dir, chunk_offsets[0] = 8 + (num_chunks + 1) * GRAPH_CHUNKLOOKUP_WIDTH; chunk_offsets[1] = chunk_offsets[0] + GRAPH_FANOUT_SIZE; - chunk_offsets[2] = chunk_offsets[1] + hashsz * commits.nr; - chunk_offsets[3] = chunk_offsets[2] + (hashsz + 16) * commits.nr; - chunk_offsets[4] = chunk_offsets[3] + 4 * num_extra_edges; + chunk_offsets[2] = chunk_offsets[1] + hashsz * ctx->commits.nr; + chunk_offsets[3] = chunk_offsets[2] + (hashsz + 16) * ctx->commits.nr; + chunk_offsets[4] = chunk_offsets[3] + 4 * ctx->num_extra_edges; for (i = 0; i <= num_chunks; i++) { uint32_t chunk_write[3]; @@ -1105,32 +1102,33 @@ int write_commit_graph(const char *obj_dir, hashwrite(f, chunk_write, 12); } - if (report_progress) { + if (ctx->report_progress) { strbuf_addf(&progress_title, Q_("Writing out commit graph in %d pass", "Writing out commit graph in %d passes", num_chunks), num_chunks); - progress = start_delayed_progress( + ctx->progress = start_delayed_progress( progress_title.buf, - num_chunks * commits.nr); + num_chunks * ctx->commits.nr); } - write_graph_chunk_fanout(f, commits.list, commits.nr, progress, &progress_cnt); - write_graph_chunk_oids(f, hashsz, commits.list, commits.nr, progress, &progress_cnt); - write_graph_chunk_data(f, hashsz, commits.list, commits.nr, progress, &progress_cnt); - if (num_extra_edges) - write_graph_chunk_extra_edges(f, commits.list, commits.nr, progress, &progress_cnt); - stop_progress(&progress); + write_graph_chunk_fanout(f, ctx); + write_graph_chunk_oids(f, hashsz, ctx); + write_graph_chunk_data(f, hashsz, ctx); + if (ctx->num_extra_edges) + write_graph_chunk_extra_edges(f, ctx); + stop_progress(&ctx->progress); strbuf_release(&progress_title); - close_commit_graph(the_repository); + close_commit_graph(ctx->r); finalize_hashfile(f, NULL, CSUM_HASH_IN_STREAM | CSUM_FSYNC); commit_lock_file(&lk); cleanup: free(graph_name); - free(commits.list); - free(oids.list); + free(ctx->commits.list); + free(ctx->oids.list); + free(ctx); return res; } -- cgit v1.2.3 From ef5b83f2cfe11c2feb98b962881e9a8e6281e3ff Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 12 Jun 2019 06:29:41 -0700 Subject: commit-graph: extract fill_oids_from_packs() The write_commit_graph() method is too complex, so we are extracting helper functions one by one. This extracts fill_oids_from_packs() that reads the given pack-file list and fills the oid list in the context. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-graph.c | 83 +++++++++++++++++++++++++++++++++------------------------- 1 file changed, 47 insertions(+), 36 deletions(-) (limited to 'commit-graph.c') diff --git a/commit-graph.c b/commit-graph.c index 6d7e83cfe8..02e5f8c651 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -867,6 +867,51 @@ int write_commit_graph_reachable(const char *obj_dir, unsigned int flags) return result; } +static int fill_oids_from_packs(struct write_commit_graph_context *ctx, + struct string_list *pack_indexes) +{ + uint32_t i; + struct strbuf progress_title = STRBUF_INIT; + struct strbuf packname = STRBUF_INIT; + int dirlen; + + strbuf_addf(&packname, "%s/pack/", ctx->obj_dir); + dirlen = packname.len; + if (ctx->report_progress) { + strbuf_addf(&progress_title, + Q_("Finding commits for commit graph in %d pack", + "Finding commits for commit graph in %d packs", + pack_indexes->nr), + pack_indexes->nr); + ctx->progress = start_delayed_progress(progress_title.buf, 0); + ctx->progress_done = 0; + } + for (i = 0; i < pack_indexes->nr; i++) { + struct packed_git *p; + strbuf_setlen(&packname, dirlen); + strbuf_addstr(&packname, pack_indexes->items[i].string); + p = add_packed_git(packname.buf, packname.len, 1); + if (!p) { + error(_("error adding pack %s"), packname.buf); + return -1; + } + if (open_pack_index(p)) { + error(_("error opening index for %s"), packname.buf); + return -1; + } + for_each_object_in_pack(p, add_packed_commits, ctx, + FOR_EACH_OBJECT_PACK_ORDER); + close_pack(p); + free(p); + } + + stop_progress(&ctx->progress); + strbuf_reset(&progress_title); + strbuf_release(&packname); + + return 0; +} + int write_commit_graph(const char *obj_dir, struct string_list *pack_indexes, struct string_list *commit_hex, @@ -916,42 +961,8 @@ int write_commit_graph(const char *obj_dir, } if (pack_indexes) { - struct strbuf packname = STRBUF_INIT; - int dirlen; - strbuf_addf(&packname, "%s/pack/", obj_dir); - dirlen = packname.len; - if (ctx->report_progress) { - strbuf_addf(&progress_title, - Q_("Finding commits for commit graph in %d pack", - "Finding commits for commit graph in %d packs", - pack_indexes->nr), - pack_indexes->nr); - ctx->progress = start_delayed_progress(progress_title.buf, 0); - ctx->progress_done = 0; - } - for (i = 0; i < pack_indexes->nr; i++) { - struct packed_git *p; - strbuf_setlen(&packname, dirlen); - strbuf_addstr(&packname, pack_indexes->items[i].string); - p = add_packed_git(packname.buf, packname.len, 1); - if (!p) { - error(_("error adding pack %s"), packname.buf); - res = -1; - goto cleanup; - } - if (open_pack_index(p)) { - error(_("error opening index for %s"), packname.buf); - res = -1; - goto cleanup; - } - for_each_object_in_pack(p, add_packed_commits, ctx, - FOR_EACH_OBJECT_PACK_ORDER); - close_pack(p); - free(p); - } - stop_progress(&ctx->progress); - strbuf_reset(&progress_title); - strbuf_release(&packname); + if ((res = fill_oids_from_packs(ctx, pack_indexes))) + goto cleanup; } if (commit_hex) { -- cgit v1.2.3 From 4c9efe850d75115a6deedd57072f1d7383bc03da Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 12 Jun 2019 06:29:42 -0700 Subject: commit-graph: extract fill_oids_from_commit_hex() The write_commit_graph() method is too complex, so we are extracting helper functions one by one. Extract fill_oids_from_commit_hex() that reads the given commit id list and fille the oid list in the context. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-graph.c | 72 ++++++++++++++++++++++++++++++++-------------------------- 1 file changed, 40 insertions(+), 32 deletions(-) (limited to 'commit-graph.c') diff --git a/commit-graph.c b/commit-graph.c index 02e5f8c651..4fae1fcdb2 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -912,6 +912,44 @@ static int fill_oids_from_packs(struct write_commit_graph_context *ctx, return 0; } +static void fill_oids_from_commit_hex(struct write_commit_graph_context *ctx, + struct string_list *commit_hex) +{ + uint32_t i; + struct strbuf progress_title = STRBUF_INIT; + + if (ctx->report_progress) { + strbuf_addf(&progress_title, + Q_("Finding commits for commit graph from %d ref", + "Finding commits for commit graph from %d refs", + commit_hex->nr), + commit_hex->nr); + ctx->progress = start_delayed_progress( + progress_title.buf, + commit_hex->nr); + } + for (i = 0; i < commit_hex->nr; i++) { + const char *end; + struct object_id oid; + struct commit *result; + + display_progress(ctx->progress, i + 1); + if (commit_hex->items[i].string && + parse_oid_hex(commit_hex->items[i].string, &oid, &end)) + continue; + + result = lookup_commit_reference_gently(ctx->r, &oid, 1); + + if (result) { + ALLOC_GROW(ctx->oids.list, ctx->oids.nr + 1, ctx->oids.alloc); + oidcpy(&ctx->oids.list[ctx->oids.nr], &(result->object.oid)); + ctx->oids.nr++; + } + } + stop_progress(&ctx->progress); + strbuf_release(&progress_title); +} + int write_commit_graph(const char *obj_dir, struct string_list *pack_indexes, struct string_list *commit_hex, @@ -965,38 +1003,8 @@ int write_commit_graph(const char *obj_dir, goto cleanup; } - if (commit_hex) { - if (ctx->report_progress) { - strbuf_addf(&progress_title, - Q_("Finding commits for commit graph from %d ref", - "Finding commits for commit graph from %d refs", - commit_hex->nr), - commit_hex->nr); - ctx->progress = start_delayed_progress( - progress_title.buf, - commit_hex->nr); - } - for (i = 0; i < commit_hex->nr; i++) { - const char *end; - struct object_id oid; - struct commit *result; - - display_progress(ctx->progress, i + 1); - if (commit_hex->items[i].string && - parse_oid_hex(commit_hex->items[i].string, &oid, &end)) - continue; - - result = lookup_commit_reference_gently(ctx->r, &oid, 1); - - if (result) { - ALLOC_GROW(ctx->oids.list, ctx->oids.nr + 1, ctx->oids.alloc); - oidcpy(&ctx->oids.list[ctx->oids.nr], &(result->object.oid)); - ctx->oids.nr++; - } - } - stop_progress(&ctx->progress); - strbuf_reset(&progress_title); - } + if (commit_hex) + fill_oids_from_commit_hex(ctx, commit_hex); if (!pack_indexes && !commit_hex) { if (ctx->report_progress) -- cgit v1.2.3 From b2c83060525e69f628fb32836e311978e3899e6e Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 12 Jun 2019 06:29:42 -0700 Subject: commit-graph: extract fill_oids_from_all_packs() The write_commit_graph() method is too complex, so we are extracting helper functions one by one. Extract fill_oids_from_all_packs() that reads all pack-files for commits and fills the oid list in the context. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-graph.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) (limited to 'commit-graph.c') diff --git a/commit-graph.c b/commit-graph.c index 4fae1fcdb2..61cb43ddf8 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -950,6 +950,19 @@ static void fill_oids_from_commit_hex(struct write_commit_graph_context *ctx, strbuf_release(&progress_title); } +static void fill_oids_from_all_packs(struct write_commit_graph_context *ctx) +{ + if (ctx->report_progress) + ctx->progress = start_delayed_progress( + _("Finding commits for commit graph among packed objects"), + ctx->approx_nr_objects); + for_each_packed_object(add_packed_commits, ctx, + FOR_EACH_OBJECT_PACK_ORDER); + if (ctx->progress_done < ctx->approx_nr_objects) + display_progress(ctx->progress, ctx->approx_nr_objects); + stop_progress(&ctx->progress); +} + int write_commit_graph(const char *obj_dir, struct string_list *pack_indexes, struct string_list *commit_hex, @@ -1006,17 +1019,8 @@ int write_commit_graph(const char *obj_dir, if (commit_hex) fill_oids_from_commit_hex(ctx, commit_hex); - if (!pack_indexes && !commit_hex) { - if (ctx->report_progress) - ctx->progress = start_delayed_progress( - _("Finding commits for commit graph among packed objects"), - ctx->approx_nr_objects); - for_each_packed_object(add_packed_commits, ctx, - FOR_EACH_OBJECT_PACK_ORDER); - if (ctx->progress_done < ctx->approx_nr_objects) - display_progress(ctx->progress, ctx->approx_nr_objects); - stop_progress(&ctx->progress); - } + if (!pack_indexes && !commit_hex) + fill_oids_from_all_packs(ctx); close_reachable(ctx); -- cgit v1.2.3 From 014e3440f524a620fd8aba92f1efeac737d9c23d Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 12 Jun 2019 06:29:43 -0700 Subject: commit-graph: extract count_distinct_commits() The write_commit_graph() method is too complex, so we are extracting helper functions one by one. Extract count_distinct_commits(), which sorts the oids list, then iterates through to find duplicates. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-graph.c | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) (limited to 'commit-graph.c') diff --git a/commit-graph.c b/commit-graph.c index 61cb43ddf8..1a0a875a7b 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -963,6 +963,27 @@ static void fill_oids_from_all_packs(struct write_commit_graph_context *ctx) stop_progress(&ctx->progress); } +static uint32_t count_distinct_commits(struct write_commit_graph_context *ctx) +{ + uint32_t i, count_distinct = 1; + + if (ctx->report_progress) + ctx->progress = start_delayed_progress( + _("Counting distinct commits in commit graph"), + ctx->oids.nr); + display_progress(ctx->progress, 0); /* TODO: Measure QSORT() progress */ + QSORT(ctx->oids.list, ctx->oids.nr, commit_compare); + + for (i = 1; i < ctx->oids.nr; i++) { + display_progress(ctx->progress, i + 1); + if (!oideq(&ctx->oids.list[i - 1], &ctx->oids.list[i])) + count_distinct++; + } + stop_progress(&ctx->progress); + + return count_distinct; +} + int write_commit_graph(const char *obj_dir, struct string_list *pack_indexes, struct string_list *commit_hex, @@ -1024,19 +1045,7 @@ int write_commit_graph(const char *obj_dir, close_reachable(ctx); - if (ctx->report_progress) - ctx->progress = start_delayed_progress( - _("Counting distinct commits in commit graph"), - ctx->oids.nr); - display_progress(ctx->progress, 0); /* TODO: Measure QSORT() progress */ - QSORT(ctx->oids.list, ctx->oids.nr, commit_compare); - count_distinct = 1; - for (i = 1; i < ctx->oids.nr; i++) { - display_progress(ctx->progress, i + 1); - if (!oideq(&ctx->oids.list[i - 1], &ctx->oids.list[i])) - count_distinct++; - } - stop_progress(&ctx->progress); + count_distinct = count_distinct_commits(ctx); if (count_distinct >= GRAPH_EDGE_LAST_MASK) { error(_("the commit graph format cannot write %d commits"), count_distinct); -- cgit v1.2.3 From f998d542260ec643875e7f0fa860dbb43c2388ca Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 12 Jun 2019 06:29:44 -0700 Subject: commit-graph: extract copy_oids_to_commits() The write_commit_graph() method is too complex, so we are extracting helper functions one by one. Extract copy_oids_to_commits(), which fills the commits list with the distinct commits from the oids list. During this loop, it also counts the number of "extra" edges from octopus merges. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-graph.c | 57 ++++++++++++++++++++++++++++++++------------------------- 1 file changed, 32 insertions(+), 25 deletions(-) (limited to 'commit-graph.c') diff --git a/commit-graph.c b/commit-graph.c index 1a0a875a7b..72f9c5c7e2 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -984,6 +984,37 @@ static uint32_t count_distinct_commits(struct write_commit_graph_context *ctx) return count_distinct; } +static void copy_oids_to_commits(struct write_commit_graph_context *ctx) +{ + uint32_t i; + struct commit_list *parent; + + ctx->num_extra_edges = 0; + if (ctx->report_progress) + ctx->progress = start_delayed_progress( + _("Finding extra edges in commit graph"), + ctx->oids.nr); + for (i = 0; i < ctx->oids.nr; i++) { + int num_parents = 0; + display_progress(ctx->progress, i + 1); + if (i > 0 && oideq(&ctx->oids.list[i - 1], &ctx->oids.list[i])) + continue; + + ctx->commits.list[ctx->commits.nr] = lookup_commit(ctx->r, &ctx->oids.list[i]); + parse_commit_no_graph(ctx->commits.list[ctx->commits.nr]); + + for (parent = ctx->commits.list[ctx->commits.nr]->parents; + parent; parent = parent->next) + num_parents++; + + if (num_parents > 2) + ctx->num_extra_edges += num_parents - 1; + + ctx->commits.nr++; + } + stop_progress(&ctx->progress); +} + int write_commit_graph(const char *obj_dir, struct string_list *pack_indexes, struct string_list *commit_hex, @@ -997,7 +1028,6 @@ int write_commit_graph(const char *obj_dir, uint32_t chunk_ids[5]; uint64_t chunk_offsets[5]; int num_chunks; - struct commit_list *parent; const unsigned hashsz = the_hash_algo->rawsz; struct strbuf progress_title = STRBUF_INIT; int res = 0; @@ -1056,30 +1086,7 @@ int write_commit_graph(const char *obj_dir, ctx->commits.alloc = count_distinct; ALLOC_ARRAY(ctx->commits.list, ctx->commits.alloc); - ctx->num_extra_edges = 0; - if (ctx->report_progress) - ctx->progress = start_delayed_progress( - _("Finding extra edges in commit graph"), - ctx->oids.nr); - for (i = 0; i < ctx->oids.nr; i++) { - int num_parents = 0; - display_progress(ctx->progress, i + 1); - if (i > 0 && oideq(&ctx->oids.list[i - 1], &ctx->oids.list[i])) - continue; - - ctx->commits.list[ctx->commits.nr] = lookup_commit(ctx->r, &ctx->oids.list[i]); - parse_commit_no_graph(ctx->commits.list[ctx->commits.nr]); - - for (parent = ctx->commits.list[ctx->commits.nr]->parents; - parent; parent = parent->next) - num_parents++; - - if (num_parents > 2) - ctx->num_extra_edges += num_parents - 1; - - ctx->commits.nr++; - } - stop_progress(&ctx->progress); + copy_oids_to_commits(ctx); if (ctx->commits.nr >= GRAPH_EDGE_LAST_MASK) { error(_("too many commits to write graph")); -- cgit v1.2.3 From 238def57fe7ba219c9e50094512fff1068ac5413 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 12 Jun 2019 06:29:45 -0700 Subject: commit-graph: extract write_commit_graph_file() The write_commit_graph() method is too complex, so we are extracting helper functions one by one. Extract write_commit_graph_file() that takes all of the information in the context struct and writes the data to a commit-graph file. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-graph.c | 155 +++++++++++++++++++++++++++++---------------------------- 1 file changed, 80 insertions(+), 75 deletions(-) (limited to 'commit-graph.c') diff --git a/commit-graph.c b/commit-graph.c index 72f9c5c7e2..9d2c72f5b4 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1015,21 +1015,91 @@ static void copy_oids_to_commits(struct write_commit_graph_context *ctx) stop_progress(&ctx->progress); } -int write_commit_graph(const char *obj_dir, - struct string_list *pack_indexes, - struct string_list *commit_hex, - unsigned int flags) +static int write_commit_graph_file(struct write_commit_graph_context *ctx) { - struct write_commit_graph_context *ctx; + uint32_t i; struct hashfile *f; - uint32_t i, count_distinct = 0; - char *graph_name = NULL; struct lock_file lk = LOCK_INIT; uint32_t chunk_ids[5]; uint64_t chunk_offsets[5]; - int num_chunks; const unsigned hashsz = the_hash_algo->rawsz; struct strbuf progress_title = STRBUF_INIT; + int num_chunks = ctx->num_extra_edges ? 4 : 3; + + ctx->graph_name = get_commit_graph_filename(ctx->obj_dir); + if (safe_create_leading_directories(ctx->graph_name)) { + UNLEAK(ctx->graph_name); + error(_("unable to create leading directories of %s"), + ctx->graph_name); + return -1; + } + + hold_lock_file_for_update(&lk, ctx->graph_name, LOCK_DIE_ON_ERROR); + f = hashfd(lk.tempfile->fd, lk.tempfile->filename.buf); + + hashwrite_be32(f, GRAPH_SIGNATURE); + + hashwrite_u8(f, GRAPH_VERSION); + hashwrite_u8(f, oid_version()); + hashwrite_u8(f, num_chunks); + hashwrite_u8(f, 0); /* unused padding byte */ + + chunk_ids[0] = GRAPH_CHUNKID_OIDFANOUT; + chunk_ids[1] = GRAPH_CHUNKID_OIDLOOKUP; + chunk_ids[2] = GRAPH_CHUNKID_DATA; + if (ctx->num_extra_edges) + chunk_ids[3] = GRAPH_CHUNKID_EXTRAEDGES; + else + chunk_ids[3] = 0; + chunk_ids[4] = 0; + + chunk_offsets[0] = 8 + (num_chunks + 1) * GRAPH_CHUNKLOOKUP_WIDTH; + chunk_offsets[1] = chunk_offsets[0] + GRAPH_FANOUT_SIZE; + chunk_offsets[2] = chunk_offsets[1] + hashsz * ctx->commits.nr; + chunk_offsets[3] = chunk_offsets[2] + (hashsz + 16) * ctx->commits.nr; + chunk_offsets[4] = chunk_offsets[3] + 4 * ctx->num_extra_edges; + + for (i = 0; i <= num_chunks; i++) { + uint32_t chunk_write[3]; + + chunk_write[0] = htonl(chunk_ids[i]); + chunk_write[1] = htonl(chunk_offsets[i] >> 32); + chunk_write[2] = htonl(chunk_offsets[i] & 0xffffffff); + hashwrite(f, chunk_write, 12); + } + + if (ctx->report_progress) { + strbuf_addf(&progress_title, + Q_("Writing out commit graph in %d pass", + "Writing out commit graph in %d passes", + num_chunks), + num_chunks); + ctx->progress = start_delayed_progress( + progress_title.buf, + num_chunks * ctx->commits.nr); + } + write_graph_chunk_fanout(f, ctx); + write_graph_chunk_oids(f, hashsz, ctx); + write_graph_chunk_data(f, hashsz, ctx); + if (ctx->num_extra_edges) + write_graph_chunk_extra_edges(f, ctx); + stop_progress(&ctx->progress); + strbuf_release(&progress_title); + + close_commit_graph(ctx->r); + finalize_hashfile(f, NULL, CSUM_HASH_IN_STREAM | CSUM_FSYNC); + commit_lock_file(&lk); + + return 0; +} + +int write_commit_graph(const char *obj_dir, + struct string_list *pack_indexes, + struct string_list *commit_hex, + unsigned int flags) +{ + struct write_commit_graph_context *ctx; + uint32_t i, count_distinct = 0; int res = 0; if (!commit_graph_compatible(the_repository)) @@ -1096,75 +1166,10 @@ int write_commit_graph(const char *obj_dir, compute_generation_numbers(ctx); - num_chunks = ctx->num_extra_edges ? 4 : 3; - - ctx->graph_name = get_commit_graph_filename(ctx->obj_dir); - if (safe_create_leading_directories(ctx->graph_name)) { - UNLEAK(ctx->graph_name); - error(_("unable to create leading directories of %s"), - ctx->graph_name); - res = -1; - goto cleanup; - } - - hold_lock_file_for_update(&lk, ctx->graph_name, LOCK_DIE_ON_ERROR); - f = hashfd(lk.tempfile->fd, lk.tempfile->filename.buf); - - hashwrite_be32(f, GRAPH_SIGNATURE); - - hashwrite_u8(f, GRAPH_VERSION); - hashwrite_u8(f, oid_version()); - hashwrite_u8(f, num_chunks); - hashwrite_u8(f, 0); /* unused padding byte */ - - chunk_ids[0] = GRAPH_CHUNKID_OIDFANOUT; - chunk_ids[1] = GRAPH_CHUNKID_OIDLOOKUP; - chunk_ids[2] = GRAPH_CHUNKID_DATA; - if (ctx->num_extra_edges) - chunk_ids[3] = GRAPH_CHUNKID_EXTRAEDGES; - else - chunk_ids[3] = 0; - chunk_ids[4] = 0; - - chunk_offsets[0] = 8 + (num_chunks + 1) * GRAPH_CHUNKLOOKUP_WIDTH; - chunk_offsets[1] = chunk_offsets[0] + GRAPH_FANOUT_SIZE; - chunk_offsets[2] = chunk_offsets[1] + hashsz * ctx->commits.nr; - chunk_offsets[3] = chunk_offsets[2] + (hashsz + 16) * ctx->commits.nr; - chunk_offsets[4] = chunk_offsets[3] + 4 * ctx->num_extra_edges; - - for (i = 0; i <= num_chunks; i++) { - uint32_t chunk_write[3]; - - chunk_write[0] = htonl(chunk_ids[i]); - chunk_write[1] = htonl(chunk_offsets[i] >> 32); - chunk_write[2] = htonl(chunk_offsets[i] & 0xffffffff); - hashwrite(f, chunk_write, 12); - } - - if (ctx->report_progress) { - strbuf_addf(&progress_title, - Q_("Writing out commit graph in %d pass", - "Writing out commit graph in %d passes", - num_chunks), - num_chunks); - ctx->progress = start_delayed_progress( - progress_title.buf, - num_chunks * ctx->commits.nr); - } - write_graph_chunk_fanout(f, ctx); - write_graph_chunk_oids(f, hashsz, ctx); - write_graph_chunk_data(f, hashsz, ctx); - if (ctx->num_extra_edges) - write_graph_chunk_extra_edges(f, ctx); - stop_progress(&ctx->progress); - strbuf_release(&progress_title); - - close_commit_graph(ctx->r); - finalize_hashfile(f, NULL, CSUM_HASH_IN_STREAM | CSUM_FSYNC); - commit_lock_file(&lk); + res = write_commit_graph_file(ctx); cleanup: - free(graph_name); + free(ctx->graph_name); free(ctx->commits.list); free(ctx->oids.list); free(ctx); -- cgit v1.2.3 From c3a3a964b29221a9b5fa305a08037c90b9f74be0 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 17 May 2019 11:41:47 -0700 Subject: commit-graph: use raw_object_store when closing The close_commit_graph() method took a repository struct, but then only uses the raw_object_store within. Change the function prototype to make the method more flexible. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-graph.c | 8 ++++---- commit-graph.h | 2 +- upload-pack.c | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'commit-graph.c') diff --git a/commit-graph.c b/commit-graph.c index 9d2c72f5b4..76d189de45 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -359,10 +359,10 @@ int generation_numbers_enabled(struct repository *r) return !!first_generation; } -void close_commit_graph(struct repository *r) +void close_commit_graph(struct raw_object_store *o) { - free_commit_graph(r->objects->commit_graph); - r->objects->commit_graph = NULL; + free_commit_graph(o->commit_graph); + o->commit_graph = NULL; } static int bsearch_graph(struct commit_graph *g, struct object_id *oid, uint32_t *pos) @@ -1086,7 +1086,7 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) stop_progress(&ctx->progress); strbuf_release(&progress_title); - close_commit_graph(ctx->r); + close_commit_graph(ctx->r->objects); finalize_hashfile(f, NULL, CSUM_HASH_IN_STREAM | CSUM_FSYNC); commit_lock_file(&lk); diff --git a/commit-graph.h b/commit-graph.h index 01538b5cf5..390c7f6961 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -82,7 +82,7 @@ int write_commit_graph(const char *obj_dir, int verify_commit_graph(struct repository *r, struct commit_graph *g); -void close_commit_graph(struct repository *); +void close_commit_graph(struct raw_object_store *); void free_commit_graph(struct commit_graph *); #endif diff --git a/upload-pack.c b/upload-pack.c index d098ef5982..b51bed21e4 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -717,7 +717,7 @@ static void deepen_by_rev_list(struct packet_writer *writer, int ac, { struct commit_list *result; - close_commit_graph(the_repository); + close_commit_graph(the_repository->objects); result = get_shallow_commits_by_rev_list(ac, av, SHALLOW, NOT_SHALLOW); send_shallow(writer, result); free_commit_list(result); -- cgit v1.2.3 From d4f4d60f6da8b93f768b2b4958c04cc1b3cea443 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 18 Jun 2019 11:14:24 -0700 Subject: commit-graph: prepare for commit-graph chains To prepare for a chain of commit-graph files, augment the commit_graph struct to point to a base commit_graph. As we load commits from the graph, we may actually want to read from a base file according to the graph position. The "graph position" of a commit is given by concatenating the lexicographic commit orders from each of the commit-graph files in the chain. This means that we must distinguish two values: * lexicographic index : the position within the lexicographic order in a single commit-graph file. * graph position: the position within the concatenated order of multiple commit-graph files Given the lexicographic index of a commit in a graph, we can compute the graph position by adding the number of commits in the lower-level graphs. To find the lexicographic index of a commit, we subtract the number of commits in lower-level graphs. While here, change insert_parent_or_die() to take a uint32_t position, as that is the type used by its only caller and that makes more sense with the limits in the commit-graph format. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-graph.c | 89 ++++++++++++++++++++++++++++++++++++++++++++++++++-------- commit-graph.h | 3 ++ 2 files changed, 81 insertions(+), 11 deletions(-) (limited to 'commit-graph.c') diff --git a/commit-graph.c b/commit-graph.c index 76d189de45..8f5c09363c 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -359,9 +359,18 @@ int generation_numbers_enabled(struct repository *r) return !!first_generation; } +static void close_commit_graph_one(struct commit_graph *g) +{ + if (!g) + return; + + close_commit_graph_one(g->base_graph); + free_commit_graph(g); +} + void close_commit_graph(struct raw_object_store *o) { - free_commit_graph(o->commit_graph); + close_commit_graph_one(o->commit_graph); o->commit_graph = NULL; } @@ -371,18 +380,38 @@ static int bsearch_graph(struct commit_graph *g, struct object_id *oid, uint32_t g->chunk_oid_lookup, g->hash_len, pos); } +static void load_oid_from_graph(struct commit_graph *g, + uint32_t pos, + struct object_id *oid) +{ + uint32_t lex_index; + + while (g && pos < g->num_commits_in_base) + g = g->base_graph; + + if (!g) + BUG("NULL commit-graph"); + + if (pos >= g->num_commits + g->num_commits_in_base) + die(_("invalid commit position. commit-graph is likely corrupt")); + + lex_index = pos - g->num_commits_in_base; + + hashcpy(oid->hash, g->chunk_oid_lookup + g->hash_len * lex_index); +} + static struct commit_list **insert_parent_or_die(struct repository *r, struct commit_graph *g, - uint64_t pos, + uint32_t pos, struct commit_list **pptr) { struct commit *c; struct object_id oid; - if (pos >= g->num_commits) - die("invalid parent position %"PRIu64, pos); + if (pos >= g->num_commits + g->num_commits_in_base) + die("invalid parent position %"PRIu32, pos); - hashcpy(oid.hash, g->chunk_oid_lookup + g->hash_len * pos); + load_oid_from_graph(g, pos, &oid); c = lookup_commit(r, &oid); if (!c) die(_("could not find commit %s"), oid_to_hex(&oid)); @@ -392,7 +421,14 @@ static struct commit_list **insert_parent_or_die(struct repository *r, static void fill_commit_graph_info(struct commit *item, struct commit_graph *g, uint32_t pos) { - const unsigned char *commit_data = g->chunk_commit_data + GRAPH_DATA_WIDTH * pos; + const unsigned char *commit_data; + uint32_t lex_index; + + while (pos < g->num_commits_in_base) + g = g->base_graph; + + lex_index = pos - g->num_commits_in_base; + commit_data = g->chunk_commit_data + GRAPH_DATA_WIDTH * lex_index; item->graph_pos = pos; item->generation = get_be32(commit_data + g->hash_len + 8) >> 2; } @@ -405,10 +441,25 @@ static int fill_commit_in_graph(struct repository *r, uint32_t *parent_data_ptr; uint64_t date_low, date_high; struct commit_list **pptr; - const unsigned char *commit_data = g->chunk_commit_data + (g->hash_len + 16) * pos; + const unsigned char *commit_data; + uint32_t lex_index; - item->object.parsed = 1; + while (pos < g->num_commits_in_base) + g = g->base_graph; + + if (pos >= g->num_commits + g->num_commits_in_base) + die(_("invalid commit position. commit-graph is likely corrupt")); + + /* + * Store the "full" position, but then use the + * "local" position for the rest of the calculation. + */ item->graph_pos = pos; + lex_index = pos - g->num_commits_in_base; + + commit_data = g->chunk_commit_data + (g->hash_len + 16) * lex_index; + + item->object.parsed = 1; item->maybe_tree = NULL; @@ -452,7 +503,18 @@ static int find_commit_in_graph(struct commit *item, struct commit_graph *g, uin *pos = item->graph_pos; return 1; } else { - return bsearch_graph(g, &(item->object.oid), pos); + struct commit_graph *cur_g = g; + uint32_t lex_index; + + while (cur_g && !bsearch_graph(cur_g, &(item->object.oid), &lex_index)) + cur_g = cur_g->base_graph; + + if (cur_g) { + *pos = lex_index + cur_g->num_commits_in_base; + return 1; + } + + return 0; } } @@ -492,8 +554,13 @@ static struct tree *load_tree_for_commit(struct repository *r, struct commit *c) { struct object_id oid; - const unsigned char *commit_data = g->chunk_commit_data + - GRAPH_DATA_WIDTH * (c->graph_pos); + const unsigned char *commit_data; + + while (c->graph_pos < g->num_commits_in_base) + g = g->base_graph; + + commit_data = g->chunk_commit_data + + GRAPH_DATA_WIDTH * (c->graph_pos - g->num_commits_in_base); hashcpy(oid.hash, commit_data); c->maybe_tree = lookup_tree(r, &oid); diff --git a/commit-graph.h b/commit-graph.h index 390c7f6961..5819910a5b 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -48,6 +48,9 @@ struct commit_graph { uint32_t num_commits; struct object_id oid; + uint32_t num_commits_in_base; + struct commit_graph *base_graph; + const uint32_t *chunk_oid_fanout; const unsigned char *chunk_oid_lookup; const unsigned char *chunk_commit_data; -- cgit v1.2.3 From 3cbc6ed3ee02ab7c8254d51528ca75644c52d52d Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 18 Jun 2019 11:14:24 -0700 Subject: commit-graph: rename commit_compare to oid_compare The helper function commit_compare() actually compares object_id structs, not commits. A future change to commit-graph.c will need to sort commit structs, so rename this function in advance. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-graph.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'commit-graph.c') diff --git a/commit-graph.c b/commit-graph.c index 8f5c09363c..96b07a674e 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -770,7 +770,7 @@ static void write_graph_chunk_extra_edges(struct hashfile *f, } } -static int commit_compare(const void *_a, const void *_b) +static int oid_compare(const void *_a, const void *_b) { const struct object_id *a = (const struct object_id *)_a; const struct object_id *b = (const struct object_id *)_b; @@ -1039,7 +1039,7 @@ static uint32_t count_distinct_commits(struct write_commit_graph_context *ctx) _("Counting distinct commits in commit graph"), ctx->oids.nr); display_progress(ctx->progress, 0); /* TODO: Measure QSORT() progress */ - QSORT(ctx->oids.list, ctx->oids.nr, commit_compare); + QSORT(ctx->oids.list, ctx->oids.nr, oid_compare); for (i = 1; i < ctx->oids.nr; i++) { display_progress(ctx->progress, i + 1); -- cgit v1.2.3 From 5c84b3396c73382eb49285b4e4bbb1bf82be3a9c Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 18 Jun 2019 11:14:25 -0700 Subject: commit-graph: load commit-graph chains Prepare the logic for reading a chain of commit-graphs. First, look for a file at $OBJDIR/info/commit-graph. If it exists, then use that file and stop. Next, look for the chain file at $OBJDIR/info/commit-graphs/commit-graph-chain. If this file exists, then load the hash values as line-separated values in that file and load $OBJDIR/info/commit-graphs/graph-{hash[i]}.graph for each hash[i] in that file. The file is given in order, so the first hash corresponds to the "base" file and the final hash corresponds to the "tip" file. This implementation assumes that all of the graph-{hash}.graph files are in the same object directory as the commit-graph-chain file. This will be updated in a future change. This change is purposefully simple so we can isolate the different concerns. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-graph.c | 112 +++++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 106 insertions(+), 6 deletions(-) (limited to 'commit-graph.c') diff --git a/commit-graph.c b/commit-graph.c index 96b07a674e..f7dfc6aecd 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -45,6 +45,19 @@ char *get_commit_graph_filename(const char *obj_dir) return xstrfmt("%s/info/commit-graph", obj_dir); } +static char *get_split_graph_filename(const char *obj_dir, + const char *oid_hex) +{ + return xstrfmt("%s/info/commit-graphs/graph-%s.graph", + obj_dir, + oid_hex); +} + +static char *get_chain_filename(const char *obj_dir) +{ + return xstrfmt("%s/info/commit-graphs/commit-graph-chain", obj_dir); +} + static uint8_t oid_version(void) { return 1; @@ -286,18 +299,105 @@ static struct commit_graph *load_commit_graph_one(const char *graph_file) return load_commit_graph_one_fd_st(fd, &st); } +static struct commit_graph *load_commit_graph_v1(struct repository *r, const char *obj_dir) +{ + char *graph_name = get_commit_graph_filename(obj_dir); + struct commit_graph *g = load_commit_graph_one(graph_name); + free(graph_name); + + return g; +} + +static int add_graph_to_chain(struct commit_graph *g, + struct commit_graph *chain, + struct object_id *oids, + int n) +{ + struct commit_graph *cur_g = chain; + + while (n) { + n--; + cur_g = cur_g->base_graph; + } + + g->base_graph = chain; + + if (chain) + g->num_commits_in_base = chain->num_commits + chain->num_commits_in_base; + + return 1; +} + +static struct commit_graph *load_commit_graph_chain(struct repository *r, const char *obj_dir) +{ + struct commit_graph *graph_chain = NULL; + struct strbuf line = STRBUF_INIT; + struct stat st; + struct object_id *oids; + int i = 0, valid = 1, count; + char *chain_name = get_chain_filename(obj_dir); + FILE *fp; + int stat_res; + + fp = fopen(chain_name, "r"); + stat_res = stat(chain_name, &st); + free(chain_name); + + if (!fp || + stat_res || + st.st_size <= the_hash_algo->hexsz) + return NULL; + + count = st.st_size / (the_hash_algo->hexsz + 1); + oids = xcalloc(count, sizeof(struct object_id)); + + for (i = 0; i < count && valid; i++) { + char *graph_name; + struct commit_graph *g; + + if (strbuf_getline_lf(&line, fp) == EOF) + break; + + if (get_oid_hex(line.buf, &oids[i])) { + warning(_("invalid commit-graph chain: line '%s' not a hash"), + line.buf); + valid = 0; + break; + } + + graph_name = get_split_graph_filename(obj_dir, line.buf); + g = load_commit_graph_one(graph_name); + free(graph_name); + + if (g && add_graph_to_chain(g, graph_chain, oids, i)) + graph_chain = g; + else + valid = 0; + } + + free(oids); + fclose(fp); + + return graph_chain; +} + +static struct commit_graph *read_commit_graph_one(struct repository *r, const char *obj_dir) +{ + struct commit_graph *g = load_commit_graph_v1(r, obj_dir); + + if (!g) + g = load_commit_graph_chain(r, obj_dir); + + return g; +} + static void prepare_commit_graph_one(struct repository *r, const char *obj_dir) { - char *graph_name; if (r->objects->commit_graph) return; - graph_name = get_commit_graph_filename(obj_dir); - r->objects->commit_graph = - load_commit_graph_one(graph_name); - - FREE_AND_NULL(graph_name); + r->objects->commit_graph = read_commit_graph_one(r, obj_dir); } /* -- cgit v1.2.3 From 118bd570029f5610abdbf1a220e87d3a5c241f5f Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 18 Jun 2019 11:14:26 -0700 Subject: commit-graph: add base graphs chunk To quickly verify a commit-graph chain is valid on load, we will read from the new "Base Graphs Chunk" of each file in the chain. This will prevent accidentally loading incorrect data from manually editing the commit-graph-chain file or renaming graph-{hash}.graph files. The commit_graph struct already had an object_id struct "oid", but it was never initialized or used. Add a line to read the hash from the end of the commit-graph file and into the oid member. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/technical/commit-graph-format.txt | 11 +++++++++-- commit-graph.c | 22 ++++++++++++++++++++++ commit-graph.h | 1 + 3 files changed, 32 insertions(+), 2 deletions(-) (limited to 'commit-graph.c') diff --git a/Documentation/technical/commit-graph-format.txt b/Documentation/technical/commit-graph-format.txt index 16452a0504..a4f17441ae 100644 --- a/Documentation/technical/commit-graph-format.txt +++ b/Documentation/technical/commit-graph-format.txt @@ -44,8 +44,9 @@ HEADER: 1-byte number (C) of "chunks" - 1-byte (reserved for later use) - Current clients should ignore this value. + 1-byte number (B) of base commit-graphs + We infer the length (H*B) of the Base Graphs chunk + from this value. CHUNK LOOKUP: @@ -92,6 +93,12 @@ CHUNK DATA: positions for the parents until reaching a value with the most-significant bit on. The other bits correspond to the position of the last parent. + Base Graphs List (ID: {'B', 'A', 'S', 'E'}) [Optional] + This list of H-byte hashes describe a set of B commit-graph files that + form a commit-graph chain. The graph position for the ith commit in this + file's OID Lookup chunk is equal to i plus the number of commits in all + base graphs. If B is non-zero, this chunk must exist. + TRAILER: H-byte HASH-checksum of all of the above. diff --git a/commit-graph.c b/commit-graph.c index f7dfc6aecd..6f7961d071 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -22,6 +22,7 @@ #define GRAPH_CHUNKID_OIDLOOKUP 0x4f49444c /* "OIDL" */ #define GRAPH_CHUNKID_DATA 0x43444154 /* "CDAT" */ #define GRAPH_CHUNKID_EXTRAEDGES 0x45444745 /* "EDGE" */ +#define GRAPH_CHUNKID_BASE 0x42415345 /* "BASE" */ #define GRAPH_DATA_WIDTH (the_hash_algo->rawsz + 16) @@ -262,6 +263,12 @@ struct commit_graph *parse_commit_graph(void *graph_map, int fd, else graph->chunk_extra_edges = data + chunk_offset; break; + + case GRAPH_CHUNKID_BASE: + if (graph->chunk_base_graphs) + chunk_repeated = 1; + else + graph->chunk_base_graphs = data + chunk_offset; } if (chunk_repeated) { @@ -280,6 +287,8 @@ struct commit_graph *parse_commit_graph(void *graph_map, int fd, last_chunk_offset = chunk_offset; } + hashcpy(graph->oid.hash, graph->data + graph->data_len - graph->hash_len); + if (verify_commit_graph_lite(graph)) return NULL; @@ -315,8 +324,21 @@ static int add_graph_to_chain(struct commit_graph *g, { struct commit_graph *cur_g = chain; + if (n && !g->chunk_base_graphs) { + warning(_("commit-graph has no base graphs chunk")); + return 0; + } + while (n) { n--; + + if (!cur_g || + !oideq(&oids[n], &cur_g->oid) || + !hasheq(oids[n].hash, g->chunk_base_graphs + g->hash_len * n)) { + warning(_("commit-graph chain does not match")); + return 0; + } + cur_g = cur_g->base_graph; } diff --git a/commit-graph.h b/commit-graph.h index 5819910a5b..6e7d42cf32 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -55,6 +55,7 @@ struct commit_graph { const unsigned char *chunk_oid_lookup; const unsigned char *chunk_commit_data; const unsigned char *chunk_extra_edges; + const unsigned char *chunk_base_graphs; }; struct commit_graph *load_commit_graph_one_fd_st(int fd, struct stat *st); -- cgit v1.2.3 From 144354b054c99b2231be6bae517eed5f8da55268 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 18 Jun 2019 11:14:27 -0700 Subject: commit-graph: rearrange chunk count logic The number of chunks in a commit-graph file can change depending on whether we need the Extra Edges Chunk. We are going to add more optional chunks, and it will be helpful to rearrange this logic around the chunk count before doing so. Specifically, we need to finalize the number of chunks before writing the commit-graph header. Further, we also need to fill out the chunk lookup table dynamically and using "num_chunks" as we add optional chunks is useful for adding optional chunks in the future. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-graph.c | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) (limited to 'commit-graph.c') diff --git a/commit-graph.c b/commit-graph.c index 6f7961d071..f2163e109f 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1213,7 +1213,7 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) uint64_t chunk_offsets[5]; const unsigned hashsz = the_hash_algo->rawsz; struct strbuf progress_title = STRBUF_INIT; - int num_chunks = ctx->num_extra_edges ? 4 : 3; + int num_chunks = 3; ctx->graph_name = get_commit_graph_filename(ctx->obj_dir); if (safe_create_leading_directories(ctx->graph_name)) { @@ -1226,27 +1226,34 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) hold_lock_file_for_update(&lk, ctx->graph_name, LOCK_DIE_ON_ERROR); f = hashfd(lk.tempfile->fd, lk.tempfile->filename.buf); - hashwrite_be32(f, GRAPH_SIGNATURE); - - hashwrite_u8(f, GRAPH_VERSION); - hashwrite_u8(f, oid_version()); - hashwrite_u8(f, num_chunks); - hashwrite_u8(f, 0); /* unused padding byte */ - chunk_ids[0] = GRAPH_CHUNKID_OIDFANOUT; chunk_ids[1] = GRAPH_CHUNKID_OIDLOOKUP; chunk_ids[2] = GRAPH_CHUNKID_DATA; - if (ctx->num_extra_edges) - chunk_ids[3] = GRAPH_CHUNKID_EXTRAEDGES; - else - chunk_ids[3] = 0; - chunk_ids[4] = 0; + if (ctx->num_extra_edges) { + chunk_ids[num_chunks] = GRAPH_CHUNKID_EXTRAEDGES; + num_chunks++; + } + + chunk_ids[num_chunks] = 0; chunk_offsets[0] = 8 + (num_chunks + 1) * GRAPH_CHUNKLOOKUP_WIDTH; chunk_offsets[1] = chunk_offsets[0] + GRAPH_FANOUT_SIZE; chunk_offsets[2] = chunk_offsets[1] + hashsz * ctx->commits.nr; chunk_offsets[3] = chunk_offsets[2] + (hashsz + 16) * ctx->commits.nr; - chunk_offsets[4] = chunk_offsets[3] + 4 * ctx->num_extra_edges; + + num_chunks = 3; + if (ctx->num_extra_edges) { + chunk_offsets[num_chunks + 1] = chunk_offsets[num_chunks] + + 4 * ctx->num_extra_edges; + num_chunks++; + } + + hashwrite_be32(f, GRAPH_SIGNATURE); + + hashwrite_u8(f, GRAPH_VERSION); + hashwrite_u8(f, oid_version()); + hashwrite_u8(f, num_chunks); + hashwrite_u8(f, 0); for (i = 0; i <= num_chunks; i++) { uint32_t chunk_write[3]; -- cgit v1.2.3 From 6c622f9f0bbb38a23341dc4294f56d0d909b3d50 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 18 Jun 2019 11:14:27 -0700 Subject: commit-graph: write commit-graph chains Extend write_commit_graph() to write a commit-graph chain when given the COMMIT_GRAPH_SPLIT flag. This implementation is purposefully simplistic in how it creates a new chain. The commits not already in the chain are added to a new tip commit-graph file. Much of the logic around writing a graph-{hash}.graph file and updating the commit-graph-chain file is the same as the commit-graph file case. However, there are several places where we need to do some extra logic in the split case. Track the list of graph filenames before and after the planned write. This will be more important when we start merging graph files, but it also allows us to upgrade our commit-graph file to the appropriate graph-{hash}.graph file when we upgrade to a chain of commit-graphs. Note that we use the eighth byte of the commit-graph header to store the number of base graph files. This determines the length of the base graphs chunk. A subtle change of behavior with the new logic is that we do not write a commit-graph if we our commit list is empty. This extends to the typical case, which is reflected in t5318-commit-graph.sh. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-graph.c | 286 ++++++++++++++++++++++++++++++++++++++++++++++-- commit-graph.h | 2 + t/t5318-commit-graph.sh | 2 +- 3 files changed, 278 insertions(+), 12 deletions(-) (limited to 'commit-graph.c') diff --git a/commit-graph.c b/commit-graph.c index f2163e109f..f0698b0599 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -300,12 +300,18 @@ static struct commit_graph *load_commit_graph_one(const char *graph_file) struct stat st; int fd; + struct commit_graph *g; int open_ok = open_commit_graph(graph_file, &fd, &st); if (!open_ok) return NULL; - return load_commit_graph_one_fd_st(fd, &st); + g = load_commit_graph_one_fd_st(fd, &st); + + if (g) + g->filename = xstrdup(graph_file); + + return g; } static struct commit_graph *load_commit_graph_v1(struct repository *r, const char *obj_dir) @@ -730,8 +736,19 @@ struct write_commit_graph_context { struct progress *progress; int progress_done; uint64_t progress_cnt; + + char *base_graph_name; + int num_commit_graphs_before; + int num_commit_graphs_after; + char **commit_graph_filenames_before; + char **commit_graph_filenames_after; + char **commit_graph_hash_after; + uint32_t new_num_commits_in_base; + struct commit_graph *new_base_graph; + unsigned append:1, - report_progress:1; + report_progress:1, + split:1; }; static void write_graph_chunk_fanout(struct hashfile *f, @@ -801,6 +818,16 @@ static void write_graph_chunk_data(struct hashfile *f, int hash_len, ctx->commits.nr, commit_to_sha1); + if (edge_value >= 0) + edge_value += ctx->new_num_commits_in_base; + else { + uint32_t pos; + if (find_commit_in_graph(parent->item, + ctx->new_base_graph, + &pos)) + edge_value = pos; + } + if (edge_value < 0) BUG("missing parent %s for commit %s", oid_to_hex(&parent->item->object.oid), @@ -821,6 +848,17 @@ static void write_graph_chunk_data(struct hashfile *f, int hash_len, ctx->commits.list, ctx->commits.nr, commit_to_sha1); + + if (edge_value >= 0) + edge_value += ctx->new_num_commits_in_base; + else { + uint32_t pos; + if (find_commit_in_graph(parent->item, + ctx->new_base_graph, + &pos)) + edge_value = pos; + } + if (edge_value < 0) BUG("missing parent %s for commit %s", oid_to_hex(&parent->item->object.oid), @@ -878,6 +916,16 @@ static void write_graph_chunk_extra_edges(struct hashfile *f, ctx->commits.nr, commit_to_sha1); + if (edge_value >= 0) + edge_value += ctx->new_num_commits_in_base; + else { + uint32_t pos; + if (find_commit_in_graph(parent->item, + ctx->new_base_graph, + &pos)) + edge_value = pos; + } + if (edge_value < 0) BUG("missing parent %s for commit %s", oid_to_hex(&parent->item->object.oid), @@ -969,7 +1017,13 @@ static void close_reachable(struct write_commit_graph_context *ctx) display_progress(ctx->progress, i + 1); commit = lookup_commit(ctx->r, &ctx->oids.list[i]); - if (commit && !parse_commit_no_graph(commit)) + if (!commit) + continue; + if (ctx->split) { + if (!parse_commit(commit) && + commit->graph_pos == COMMIT_NOT_FROM_GRAPH) + add_missing_parents(ctx, commit); + } else if (!parse_commit_no_graph(commit)) add_missing_parents(ctx, commit); } stop_progress(&ctx->progress); @@ -1165,8 +1219,16 @@ static uint32_t count_distinct_commits(struct write_commit_graph_context *ctx) for (i = 1; i < ctx->oids.nr; i++) { display_progress(ctx->progress, i + 1); - if (!oideq(&ctx->oids.list[i - 1], &ctx->oids.list[i])) + if (!oideq(&ctx->oids.list[i - 1], &ctx->oids.list[i])) { + if (ctx->split) { + struct commit *c = lookup_commit(ctx->r, &ctx->oids.list[i]); + + if (!c || c->graph_pos != COMMIT_NOT_FROM_GRAPH) + continue; + } + count_distinct++; + } } stop_progress(&ctx->progress); @@ -1189,7 +1251,13 @@ static void copy_oids_to_commits(struct write_commit_graph_context *ctx) if (i > 0 && oideq(&ctx->oids.list[i - 1], &ctx->oids.list[i])) continue; + ALLOC_GROW(ctx->commits.list, ctx->commits.nr + 1, ctx->commits.alloc); ctx->commits.list[ctx->commits.nr] = lookup_commit(ctx->r, &ctx->oids.list[i]); + + if (ctx->split && + ctx->commits.list[ctx->commits.nr]->graph_pos != COMMIT_NOT_FROM_GRAPH) + continue; + parse_commit_no_graph(ctx->commits.list[ctx->commits.nr]); for (parent = ctx->commits.list[ctx->commits.nr]->parents; @@ -1204,18 +1272,86 @@ static void copy_oids_to_commits(struct write_commit_graph_context *ctx) stop_progress(&ctx->progress); } +static int write_graph_chunk_base_1(struct hashfile *f, + struct commit_graph *g) +{ + int num = 0; + + if (!g) + return 0; + + num = write_graph_chunk_base_1(f, g->base_graph); + hashwrite(f, g->oid.hash, the_hash_algo->rawsz); + return num + 1; +} + +static int write_graph_chunk_base(struct hashfile *f, + struct write_commit_graph_context *ctx) +{ + int num = write_graph_chunk_base_1(f, ctx->new_base_graph); + + if (num != ctx->num_commit_graphs_after - 1) { + error(_("failed to write correct number of base graph ids")); + return -1; + } + + return 0; +} + +static void init_commit_graph_chain(struct write_commit_graph_context *ctx) +{ + struct commit_graph *g = ctx->r->objects->commit_graph; + uint32_t i; + + ctx->new_base_graph = g; + ctx->base_graph_name = xstrdup(g->filename); + ctx->new_num_commits_in_base = g->num_commits + g->num_commits_in_base; + + ctx->num_commit_graphs_after = ctx->num_commit_graphs_before + 1; + + ALLOC_ARRAY(ctx->commit_graph_filenames_after, ctx->num_commit_graphs_after); + ALLOC_ARRAY(ctx->commit_graph_hash_after, ctx->num_commit_graphs_after); + + for (i = 0; i < ctx->num_commit_graphs_before - 1; i++) + ctx->commit_graph_filenames_after[i] = xstrdup(ctx->commit_graph_filenames_before[i]); + + if (ctx->num_commit_graphs_before) + ctx->commit_graph_filenames_after[ctx->num_commit_graphs_before - 1] = + get_split_graph_filename(ctx->obj_dir, oid_to_hex(&g->oid)); + + i = ctx->num_commit_graphs_before - 1; + + while (g) { + ctx->commit_graph_hash_after[i] = xstrdup(oid_to_hex(&g->oid)); + i--; + g = g->base_graph; + } +} + static int write_commit_graph_file(struct write_commit_graph_context *ctx) { uint32_t i; + int fd; struct hashfile *f; struct lock_file lk = LOCK_INIT; - uint32_t chunk_ids[5]; - uint64_t chunk_offsets[5]; + uint32_t chunk_ids[6]; + uint64_t chunk_offsets[6]; const unsigned hashsz = the_hash_algo->rawsz; struct strbuf progress_title = STRBUF_INIT; int num_chunks = 3; + struct object_id file_hash; + + if (ctx->split) { + struct strbuf tmp_file = STRBUF_INIT; + + strbuf_addf(&tmp_file, + "%s/info/commit-graphs/tmp_graph_XXXXXX", + ctx->obj_dir); + ctx->graph_name = strbuf_detach(&tmp_file, NULL); + } else { + ctx->graph_name = get_commit_graph_filename(ctx->obj_dir); + } - ctx->graph_name = get_commit_graph_filename(ctx->obj_dir); if (safe_create_leading_directories(ctx->graph_name)) { UNLEAK(ctx->graph_name); error(_("unable to create leading directories of %s"), @@ -1223,8 +1359,23 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) return -1; } - hold_lock_file_for_update(&lk, ctx->graph_name, LOCK_DIE_ON_ERROR); - f = hashfd(lk.tempfile->fd, lk.tempfile->filename.buf); + if (ctx->split) { + char *lock_name = get_chain_filename(ctx->obj_dir); + + hold_lock_file_for_update(&lk, lock_name, LOCK_DIE_ON_ERROR); + + fd = git_mkstemp_mode(ctx->graph_name, 0444); + if (fd < 0) { + error(_("unable to create '%s'"), ctx->graph_name); + return -1; + } + + f = hashfd(fd, ctx->graph_name); + } else { + hold_lock_file_for_update(&lk, ctx->graph_name, LOCK_DIE_ON_ERROR); + fd = lk.tempfile->fd; + f = hashfd(lk.tempfile->fd, lk.tempfile->filename.buf); + } chunk_ids[0] = GRAPH_CHUNKID_OIDFANOUT; chunk_ids[1] = GRAPH_CHUNKID_OIDLOOKUP; @@ -1233,6 +1384,10 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) chunk_ids[num_chunks] = GRAPH_CHUNKID_EXTRAEDGES; num_chunks++; } + if (ctx->num_commit_graphs_after > 1) { + chunk_ids[num_chunks] = GRAPH_CHUNKID_BASE; + num_chunks++; + } chunk_ids[num_chunks] = 0; @@ -1247,13 +1402,18 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) 4 * ctx->num_extra_edges; num_chunks++; } + if (ctx->num_commit_graphs_after > 1) { + chunk_offsets[num_chunks + 1] = chunk_offsets[num_chunks] + + hashsz * (ctx->num_commit_graphs_after - 1); + num_chunks++; + } hashwrite_be32(f, GRAPH_SIGNATURE); hashwrite_u8(f, GRAPH_VERSION); hashwrite_u8(f, oid_version()); hashwrite_u8(f, num_chunks); - hashwrite_u8(f, 0); + hashwrite_u8(f, ctx->num_commit_graphs_after - 1); for (i = 0; i <= num_chunks; i++) { uint32_t chunk_write[3]; @@ -1279,11 +1439,67 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) write_graph_chunk_data(f, hashsz, ctx); if (ctx->num_extra_edges) write_graph_chunk_extra_edges(f, ctx); + if (ctx->num_commit_graphs_after > 1 && + write_graph_chunk_base(f, ctx)) { + return -1; + } stop_progress(&ctx->progress); strbuf_release(&progress_title); + if (ctx->split && ctx->base_graph_name && ctx->num_commit_graphs_after > 1) { + char *new_base_hash = xstrdup(oid_to_hex(&ctx->new_base_graph->oid)); + char *new_base_name = get_split_graph_filename(ctx->obj_dir, new_base_hash); + + free(ctx->commit_graph_filenames_after[ctx->num_commit_graphs_after - 2]); + free(ctx->commit_graph_hash_after[ctx->num_commit_graphs_after - 2]); + ctx->commit_graph_filenames_after[ctx->num_commit_graphs_after - 2] = new_base_name; + ctx->commit_graph_hash_after[ctx->num_commit_graphs_after - 2] = new_base_hash; + } + close_commit_graph(ctx->r->objects); - finalize_hashfile(f, NULL, CSUM_HASH_IN_STREAM | CSUM_FSYNC); + finalize_hashfile(f, file_hash.hash, CSUM_HASH_IN_STREAM | CSUM_FSYNC); + + if (ctx->split) { + FILE *chainf = fdopen_lock_file(&lk, "w"); + char *final_graph_name; + int result; + + close(fd); + + if (!chainf) { + error(_("unable to open commit-graph chain file")); + return -1; + } + + if (ctx->base_graph_name) { + result = rename(ctx->base_graph_name, + ctx->commit_graph_filenames_after[ctx->num_commit_graphs_after - 2]); + + if (result) { + error(_("failed to rename base commit-graph file")); + return -1; + } + } else { + char *graph_name = get_commit_graph_filename(ctx->obj_dir); + unlink(graph_name); + } + + ctx->commit_graph_hash_after[ctx->num_commit_graphs_after - 1] = xstrdup(oid_to_hex(&file_hash)); + final_graph_name = get_split_graph_filename(ctx->obj_dir, + ctx->commit_graph_hash_after[ctx->num_commit_graphs_after - 1]); + ctx->commit_graph_filenames_after[ctx->num_commit_graphs_after - 1] = final_graph_name; + + result = rename(ctx->graph_name, final_graph_name); + + for (i = 0; i < ctx->num_commit_graphs_after; i++) + fprintf(lk.tempfile->fp, "%s\n", ctx->commit_graph_hash_after[i]); + + if (result) { + error(_("failed to rename temporary commit-graph file")); + return -1; + } + } + commit_lock_file(&lk); return 0; @@ -1306,6 +1522,30 @@ int write_commit_graph(const char *obj_dir, ctx->obj_dir = obj_dir; ctx->append = flags & COMMIT_GRAPH_APPEND ? 1 : 0; ctx->report_progress = flags & COMMIT_GRAPH_PROGRESS ? 1 : 0; + ctx->split = flags & COMMIT_GRAPH_SPLIT ? 1 : 0; + + if (ctx->split) { + struct commit_graph *g; + prepare_commit_graph(ctx->r); + + g = ctx->r->objects->commit_graph; + + while (g) { + ctx->num_commit_graphs_before++; + g = g->base_graph; + } + + if (ctx->num_commit_graphs_before) { + ALLOC_ARRAY(ctx->commit_graph_filenames_before, ctx->num_commit_graphs_before); + i = ctx->num_commit_graphs_before; + g = ctx->r->objects->commit_graph; + + while (g) { + ctx->commit_graph_filenames_before[--i] = xstrdup(g->filename); + g = g->base_graph; + } + } + } ctx->approx_nr_objects = approximate_object_count(); ctx->oids.alloc = ctx->approx_nr_objects / 32; @@ -1360,6 +1600,14 @@ int write_commit_graph(const char *obj_dir, goto cleanup; } + if (!ctx->commits.nr) + goto cleanup; + + if (ctx->split) + init_commit_graph_chain(ctx); + else + ctx->num_commit_graphs_after = 1; + compute_generation_numbers(ctx); res = write_commit_graph_file(ctx); @@ -1368,6 +1616,21 @@ cleanup: free(ctx->graph_name); free(ctx->commits.list); free(ctx->oids.list); + + if (ctx->commit_graph_filenames_after) { + for (i = 0; i < ctx->num_commit_graphs_after; i++) { + free(ctx->commit_graph_filenames_after[i]); + free(ctx->commit_graph_hash_after[i]); + } + + for (i = 0; i < ctx->num_commit_graphs_before; i++) + free(ctx->commit_graph_filenames_before[i]); + + free(ctx->commit_graph_filenames_after); + free(ctx->commit_graph_filenames_before); + free(ctx->commit_graph_hash_after); + } + free(ctx); return res; @@ -1555,5 +1818,6 @@ void free_commit_graph(struct commit_graph *g) g->data = NULL; close(g->graph_fd); } + free(g->filename); free(g); } diff --git a/commit-graph.h b/commit-graph.h index 6e7d42cf32..c321834533 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -47,6 +47,7 @@ struct commit_graph { unsigned char num_chunks; uint32_t num_commits; struct object_id oid; + char *filename; uint32_t num_commits_in_base; struct commit_graph *base_graph; @@ -71,6 +72,7 @@ int generation_numbers_enabled(struct repository *r); #define COMMIT_GRAPH_APPEND (1 << 0) #define COMMIT_GRAPH_PROGRESS (1 << 1) +#define COMMIT_GRAPH_SPLIT (1 << 2) /* * The write_commit_graph* methods return zero on success diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index 3b6fd0d728..063f906b3e 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -20,7 +20,7 @@ test_expect_success 'verify graph with no graph file' ' test_expect_success 'write graph with no packs' ' cd "$TRASH_DIRECTORY/full" && git commit-graph write --object-dir . && - test_path_is_file info/commit-graph + test_path_is_missing info/commit-graph ' test_expect_success 'close with correct error on bad input' ' -- cgit v1.2.3 From 135a7123755bfdde05da18012bebd2776f82b26c Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 18 Jun 2019 11:14:28 -0700 Subject: commit-graph: add --split option to builtin Add a new "--split" option to the 'git commit-graph write' subcommand. This option allows the optional behavior of writing a commit-graph chain. The current behavior will add a tip commit-graph containing any commits that are not in the existing commit-graph or commit-graph chain. Later changes will allow merging the chain and expiring out-dated files. Add a new test script (t5324-split-commit-graph.sh) that demonstrates this behavior. Helped-by: Johannes Schindelin Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- builtin/commit-graph.c | 10 ++-- commit-graph.c | 14 +++-- t/t5324-split-commit-graph.sh | 122 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 138 insertions(+), 8 deletions(-) create mode 100755 t/t5324-split-commit-graph.sh (limited to 'commit-graph.c') diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c index d8efa5bab2..ff8bc3c8b9 100644 --- a/builtin/commit-graph.c +++ b/builtin/commit-graph.c @@ -10,7 +10,7 @@ static char const * const builtin_commit_graph_usage[] = { N_("git commit-graph [--object-dir ]"), N_("git commit-graph read [--object-dir ]"), N_("git commit-graph verify [--object-dir ]"), - N_("git commit-graph write [--object-dir ] [--append] [--reachable|--stdin-packs|--stdin-commits]"), + N_("git commit-graph write [--object-dir ] [--append|--split] [--reachable|--stdin-packs|--stdin-commits]"), NULL }; @@ -25,7 +25,7 @@ static const char * const builtin_commit_graph_read_usage[] = { }; static const char * const builtin_commit_graph_write_usage[] = { - N_("git commit-graph write [--object-dir ] [--append] [--reachable|--stdin-packs|--stdin-commits]"), + N_("git commit-graph write [--object-dir ] [--append|--split] [--reachable|--stdin-packs|--stdin-commits]"), NULL }; @@ -35,9 +35,9 @@ static struct opts_commit_graph { int stdin_packs; int stdin_commits; int append; + int split; } opts; - static int graph_verify(int argc, const char **argv) { struct commit_graph *graph = NULL; @@ -156,6 +156,8 @@ static int graph_write(int argc, const char **argv) N_("start walk at commits listed by stdin")), OPT_BOOL(0, "append", &opts.append, N_("include all commits already in the commit-graph file")), + OPT_BOOL(0, "split", &opts.split, + N_("allow writing an incremental commit-graph file")), OPT_END(), }; @@ -169,6 +171,8 @@ static int graph_write(int argc, const char **argv) opts.obj_dir = get_object_directory(); if (opts.append) flags |= COMMIT_GRAPH_APPEND; + if (opts.split) + flags |= COMMIT_GRAPH_SPLIT; read_replace_refs = 0; diff --git a/commit-graph.c b/commit-graph.c index f0698b0599..1224309e5f 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1472,12 +1472,16 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) } if (ctx->base_graph_name) { - result = rename(ctx->base_graph_name, - ctx->commit_graph_filenames_after[ctx->num_commit_graphs_after - 2]); + const char *dest = ctx->commit_graph_filenames_after[ + ctx->num_commit_graphs_after - 2]; - if (result) { - error(_("failed to rename base commit-graph file")); - return -1; + if (strcmp(ctx->base_graph_name, dest)) { + result = rename(ctx->base_graph_name, dest); + + if (result) { + error(_("failed to rename base commit-graph file")); + return -1; + } } } else { char *graph_name = get_commit_graph_filename(ctx->obj_dir); diff --git a/t/t5324-split-commit-graph.sh b/t/t5324-split-commit-graph.sh new file mode 100755 index 0000000000..ccd24bd22b --- /dev/null +++ b/t/t5324-split-commit-graph.sh @@ -0,0 +1,122 @@ +#!/bin/sh + +test_description='split commit graph' +. ./test-lib.sh + +GIT_TEST_COMMIT_GRAPH=0 + +test_expect_success 'setup repo' ' + git init && + git config core.commitGraph true && + infodir=".git/objects/info" && + graphdir="$infodir/commit-graphs" && + test_oid_init +' + +graph_read_expect() { + NUM_BASE=0 + if test ! -z $2 + then + NUM_BASE=$2 + fi + cat >expect <<- EOF + header: 43475048 1 1 3 $NUM_BASE + num_commits: $1 + chunks: oid_fanout oid_lookup commit_metadata + EOF + git commit-graph read >output && + test_cmp expect output +} + +test_expect_success 'create commits and write commit-graph' ' + for i in $(test_seq 3) + do + test_commit $i && + git branch commits/$i || return 1 + done && + git commit-graph write --reachable && + test_path_is_file $infodir/commit-graph && + graph_read_expect 3 +' + +graph_git_two_modes() { + git -c core.commitGraph=true $1 >output + git -c core.commitGraph=false $1 >expect + test_cmp expect output +} + +graph_git_behavior() { + MSG=$1 + BRANCH=$2 + COMPARE=$3 + test_expect_success "check normal git operations: $MSG" ' + graph_git_two_modes "log --oneline $BRANCH" && + graph_git_two_modes "log --topo-order $BRANCH" && + graph_git_two_modes "log --graph $COMPARE..$BRANCH" && + graph_git_two_modes "branch -vv" && + graph_git_two_modes "merge-base -a $BRANCH $COMPARE" + ' +} + +graph_git_behavior 'graph exists' commits/3 commits/1 + +verify_chain_files_exist() { + for hash in $(cat $1/commit-graph-chain) + do + test_path_is_file $1/graph-$hash.graph || return 1 + done +} + +test_expect_success 'add more commits, and write a new base graph' ' + git reset --hard commits/1 && + for i in $(test_seq 4 5) + do + test_commit $i && + git branch commits/$i || return 1 + done && + git reset --hard commits/2 && + for i in $(test_seq 6 10) + do + test_commit $i && + git branch commits/$i || return 1 + done && + git reset --hard commits/2 && + git merge commits/4 && + git branch merge/1 && + git reset --hard commits/4 && + git merge commits/6 && + git branch merge/2 && + git commit-graph write --reachable && + graph_read_expect 12 +' + +test_expect_success 'add three more commits, write a tip graph' ' + git reset --hard commits/3 && + git merge merge/1 && + git merge commits/5 && + git merge merge/2 && + git branch merge/3 && + git commit-graph write --reachable --split && + test_path_is_missing $infodir/commit-graph && + test_path_is_file $graphdir/commit-graph-chain && + ls $graphdir/graph-*.graph >graph-files && + test_line_count = 2 graph-files && + verify_chain_files_exist $graphdir +' + +graph_git_behavior 'split commit-graph: merge 3 vs 2' merge/3 merge/2 + +test_expect_success 'add one commit, write a tip graph' ' + test_commit 11 && + git branch commits/11 && + git commit-graph write --reachable --split && + test_path_is_missing $infodir/commit-graph && + test_path_is_file $graphdir/commit-graph-chain && + ls $graphdir/graph-*.graph >graph-files && + test_line_count = 3 graph-files && + verify_chain_files_exist $graphdir +' + +graph_git_behavior 'three-layer commit-graph: commit 11 vs 6' commits/11 commits/6 + +test_done -- cgit v1.2.3 From 1771be90c8e4797c2466296d1d570dbfa39d9743 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 18 Jun 2019 11:14:29 -0700 Subject: commit-graph: merge commit-graph chains When searching for a commit in a commit-graph chain of G graphs with N commits, the search takes O(G log N) time. If we always add a new tip graph with every write, the linear G term will start to dominate and slow the lookup process. To keep lookups fast, but also keep most incremental writes fast, create a strategy for merging levels of the commit-graph chain. The strategy is detailed in the commit-graph design document, but is summarized by these two conditions: 1. If the number of commits we are adding is more than half the number of commits in the graph below, then merge with that graph. 2. If we are writing more than 64,000 commits into a single graph, then merge with all lower graphs. The numeric values in the conditions above are currently constant, but can become config options in a future update. As we merge levels of the commit-graph chain, check that the commits still exist in the repository. A garbage-collection operation may have removed those commits from the object store and we do not want to persist them in the commit-graph chain. This is a non-issue if the 'git gc' process wrote a new, single-level commit-graph file. After we merge levels, the old graph-{hash}.graph files are no longer referenced by the commit-graph-chain file. We will expire these files in a future change. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/technical/commit-graph.txt | 80 ++++++++++++++ commit-graph.c | 180 +++++++++++++++++++++++++------ t/t5324-split-commit-graph.sh | 13 +++ 3 files changed, 240 insertions(+), 33 deletions(-) (limited to 'commit-graph.c') diff --git a/Documentation/technical/commit-graph.txt b/Documentation/technical/commit-graph.txt index 1dca3bd8fe..d9c6253b0a 100644 --- a/Documentation/technical/commit-graph.txt +++ b/Documentation/technical/commit-graph.txt @@ -186,6 +186,86 @@ positions to refer to their parents, which may be in `graph-{hash1}.graph` or its containment in the intervals [0, X0), [X0, X0 + X1), [X0 + X1, X0 + X1 + X2). +Each commit-graph file (except the base, `graph-{hash0}.graph`) contains data +specifying the hashes of all files in the lower layers. In the above example, +`graph-{hash1}.graph` contains `{hash0}` while `graph-{hash2}.graph` contains +`{hash0}` and `{hash1}`. + +## Merging commit-graph files + +If we only added a new commit-graph file on every write, we would run into a +linear search problem through many commit-graph files. Instead, we use a merge +strategy to decide when the stack should collapse some number of levels. + +The diagram below shows such a collapse. As a set of new commits are added, it +is determined by the merge strategy that the files should collapse to +`graph-{hash1}`. Thus, the new commits, the commits in `graph-{hash2}` and +the commits in `graph-{hash1}` should be combined into a new `graph-{hash3}` +file. + + +---------------------+ + | | + | (new commits) | + | | + +---------------------+ + | | + +-----------------------+ +---------------------+ + | graph-{hash2} |->| | + +-----------------------+ +---------------------+ + | | | + +-----------------------+ +---------------------+ + | | | | + | graph-{hash1} |->| | + | | | | + +-----------------------+ +---------------------+ + | tmp_graphXXX + +-----------------------+ + | | + | | + | | + | graph-{hash0} | + | | + | | + | | + +-----------------------+ + +During this process, the commits to write are combined, sorted and we write the +contents to a temporary file, all while holding a `commit-graph-chain.lock` +lock-file. When the file is flushed, we rename it to `graph-{hash3}` +according to the computed `{hash3}`. Finally, we write the new chain data to +`commit-graph-chain.lock`: + +``` + {hash3} + {hash0} +``` + +We then close the lock-file. + +## Merge Strategy + +When writing a set of commits that do not exist in the commit-graph stack of +height N, we default to creating a new file at level N + 1. We then decide to +merge with the Nth level if one of two conditions hold: + + 1. The expected file size for level N + 1 is at least half the file size for + level N. + + 2. Level N + 1 contains more than 64,0000 commits. + +This decision cascades down the levels: when we merge a level we create a new +set of commits that then compares to the next level. + +The first condition bounds the number of levels to be logarithmic in the total +number of commits. The second condition bounds the total number of commits in +a `graph-{hashN}` file and not in the `commit-graph` file, preventing +significant performance issues when the stack merges and another process only +partially reads the previous stack. + +The merge strategy values (2 for the size multiple, 64,000 for the maximum +number of commits) could be extracted into config settings for full +flexibility. + Related Links ------------- [0] https://bugs.chromium.org/p/git/issues/detail?id=8 diff --git a/commit-graph.c b/commit-graph.c index 1224309e5f..fb3100921c 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1298,36 +1298,6 @@ static int write_graph_chunk_base(struct hashfile *f, return 0; } -static void init_commit_graph_chain(struct write_commit_graph_context *ctx) -{ - struct commit_graph *g = ctx->r->objects->commit_graph; - uint32_t i; - - ctx->new_base_graph = g; - ctx->base_graph_name = xstrdup(g->filename); - ctx->new_num_commits_in_base = g->num_commits + g->num_commits_in_base; - - ctx->num_commit_graphs_after = ctx->num_commit_graphs_before + 1; - - ALLOC_ARRAY(ctx->commit_graph_filenames_after, ctx->num_commit_graphs_after); - ALLOC_ARRAY(ctx->commit_graph_hash_after, ctx->num_commit_graphs_after); - - for (i = 0; i < ctx->num_commit_graphs_before - 1; i++) - ctx->commit_graph_filenames_after[i] = xstrdup(ctx->commit_graph_filenames_before[i]); - - if (ctx->num_commit_graphs_before) - ctx->commit_graph_filenames_after[ctx->num_commit_graphs_before - 1] = - get_split_graph_filename(ctx->obj_dir, oid_to_hex(&g->oid)); - - i = ctx->num_commit_graphs_before - 1; - - while (g) { - ctx->commit_graph_hash_after[i] = xstrdup(oid_to_hex(&g->oid)); - i--; - g = g->base_graph; - } -} - static int write_commit_graph_file(struct write_commit_graph_context *ctx) { uint32_t i; @@ -1509,6 +1479,145 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) return 0; } +static int split_strategy_max_commits = 64000; +static float split_strategy_size_mult = 2.0f; + +static void split_graph_merge_strategy(struct write_commit_graph_context *ctx) +{ + struct commit_graph *g = ctx->r->objects->commit_graph; + uint32_t num_commits = ctx->commits.nr; + uint32_t i; + + g = ctx->r->objects->commit_graph; + ctx->num_commit_graphs_after = ctx->num_commit_graphs_before + 1; + + while (g && (g->num_commits <= split_strategy_size_mult * num_commits || + num_commits > split_strategy_max_commits)) { + num_commits += g->num_commits; + g = g->base_graph; + + ctx->num_commit_graphs_after--; + } + + ctx->new_base_graph = g; + + ALLOC_ARRAY(ctx->commit_graph_filenames_after, ctx->num_commit_graphs_after); + ALLOC_ARRAY(ctx->commit_graph_hash_after, ctx->num_commit_graphs_after); + + for (i = 0; i < ctx->num_commit_graphs_after && + i < ctx->num_commit_graphs_before; i++) + ctx->commit_graph_filenames_after[i] = xstrdup(ctx->commit_graph_filenames_before[i]); + + i = ctx->num_commit_graphs_before - 1; + g = ctx->r->objects->commit_graph; + + while (g) { + if (i < ctx->num_commit_graphs_after) + ctx->commit_graph_hash_after[i] = xstrdup(oid_to_hex(&g->oid)); + + i--; + g = g->base_graph; + } +} + +static void merge_commit_graph(struct write_commit_graph_context *ctx, + struct commit_graph *g) +{ + uint32_t i; + uint32_t offset = g->num_commits_in_base; + + ALLOC_GROW(ctx->commits.list, ctx->commits.nr + g->num_commits, ctx->commits.alloc); + + for (i = 0; i < g->num_commits; i++) { + struct object_id oid; + struct commit *result; + + display_progress(ctx->progress, i + 1); + + load_oid_from_graph(g, i + offset, &oid); + + /* only add commits if they still exist in the repo */ + result = lookup_commit_reference_gently(ctx->r, &oid, 1); + + if (result) { + ctx->commits.list[ctx->commits.nr] = result; + ctx->commits.nr++; + } + } +} + +static int commit_compare(const void *_a, const void *_b) +{ + const struct commit *a = *(const struct commit **)_a; + const struct commit *b = *(const struct commit **)_b; + return oidcmp(&a->object.oid, &b->object.oid); +} + +static void sort_and_scan_merged_commits(struct write_commit_graph_context *ctx) +{ + uint32_t i, num_parents; + struct commit_list *parent; + + if (ctx->report_progress) + ctx->progress = start_delayed_progress( + _("Scanning merged commits"), + ctx->commits.nr); + + QSORT(ctx->commits.list, ctx->commits.nr, commit_compare); + + ctx->num_extra_edges = 0; + for (i = 0; i < ctx->commits.nr; i++) { + display_progress(ctx->progress, i); + + if (i && oideq(&ctx->commits.list[i - 1]->object.oid, + &ctx->commits.list[i]->object.oid)) { + die(_("unexpected duplicate commit id %s"), + oid_to_hex(&ctx->commits.list[i]->object.oid)); + } else { + num_parents = 0; + for (parent = ctx->commits.list[i]->parents; parent; parent = parent->next) + num_parents++; + + if (num_parents > 2) + ctx->num_extra_edges += num_parents - 2; + } + } + + stop_progress(&ctx->progress); +} + +static void merge_commit_graphs(struct write_commit_graph_context *ctx) +{ + struct commit_graph *g = ctx->r->objects->commit_graph; + uint32_t current_graph_number = ctx->num_commit_graphs_before; + struct strbuf progress_title = STRBUF_INIT; + + while (g && current_graph_number >= ctx->num_commit_graphs_after) { + current_graph_number--; + + if (ctx->report_progress) { + strbuf_addstr(&progress_title, _("Merging commit-graph")); + ctx->progress = start_delayed_progress(progress_title.buf, 0); + } + + merge_commit_graph(ctx, g); + stop_progress(&ctx->progress); + strbuf_release(&progress_title); + + g = g->base_graph; + } + + if (g) { + ctx->new_base_graph = g; + ctx->new_num_commits_in_base = g->num_commits + g->num_commits_in_base; + } + + if (ctx->new_base_graph) + ctx->base_graph_name = xstrdup(ctx->new_base_graph->filename); + + sort_and_scan_merged_commits(ctx); +} + int write_commit_graph(const char *obj_dir, struct string_list *pack_indexes, struct string_list *commit_hex, @@ -1554,6 +1663,9 @@ int write_commit_graph(const char *obj_dir, ctx->approx_nr_objects = approximate_object_count(); ctx->oids.alloc = ctx->approx_nr_objects / 32; + if (ctx->split && ctx->oids.alloc > split_strategy_max_commits) + ctx->oids.alloc = split_strategy_max_commits; + if (ctx->append) { prepare_commit_graph_one(ctx->r, ctx->obj_dir); if (ctx->r->objects->commit_graph) @@ -1607,9 +1719,11 @@ int write_commit_graph(const char *obj_dir, if (!ctx->commits.nr) goto cleanup; - if (ctx->split) - init_commit_graph_chain(ctx); - else + if (ctx->split) { + split_graph_merge_strategy(ctx); + + merge_commit_graphs(ctx); + } else ctx->num_commit_graphs_after = 1; compute_generation_numbers(ctx); diff --git a/t/t5324-split-commit-graph.sh b/t/t5324-split-commit-graph.sh index ccd24bd22b..5cb5663a30 100755 --- a/t/t5324-split-commit-graph.sh +++ b/t/t5324-split-commit-graph.sh @@ -119,4 +119,17 @@ test_expect_success 'add one commit, write a tip graph' ' graph_git_behavior 'three-layer commit-graph: commit 11 vs 6' commits/11 commits/6 +test_expect_success 'add one commit, write a merged graph' ' + test_commit 12 && + git branch commits/12 && + git commit-graph write --reachable --split && + test_path_is_file $graphdir/commit-graph-chain && + test_line_count = 2 $graphdir/commit-graph-chain && + ls $graphdir/graph-*.graph >graph-files && + test_line_count = 4 graph-files && + verify_chain_files_exist $graphdir +' + +graph_git_behavior 'merged commit-graph: commit 12 vs 6' commits/12 commits/6 + test_done -- cgit v1.2.3 From c523035cbd8515d095dc15e9661fd896733bedbc Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 18 Jun 2019 11:14:30 -0700 Subject: commit-graph: allow cross-alternate chains In an environment like a fork network, it is helpful to have a commit-graph chain that spans both the base repo and the fork repo. The fork is usually a small set of data on top of the large repo, but sometimes the fork is much larger. For example, git-for-windows/git has almost double the number of commits as git/git because it rebases its commits on every major version update. To allow cross-alternate commit-graph chains, we need a few pieces: 1. When looking for a graph-{hash}.graph file, check all alternates. 2. When merging commit-graph chains, do not merge across alternates. 3. When writing a new commit-graph chain based on a commit-graph file in another object directory, do not allow success if the base file has of the name "commit-graph" instead of "commit-graphs/graph-{hash}.graph". Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/technical/commit-graph.txt | 40 +++++++++++++++++++++++ commit-graph.c | 56 +++++++++++++++++++++++++------- commit-graph.h | 1 + t/t5324-split-commit-graph.sh | 37 +++++++++++++++++++++ 4 files changed, 123 insertions(+), 11 deletions(-) (limited to 'commit-graph.c') diff --git a/Documentation/technical/commit-graph.txt b/Documentation/technical/commit-graph.txt index d9c6253b0a..473032e476 100644 --- a/Documentation/technical/commit-graph.txt +++ b/Documentation/technical/commit-graph.txt @@ -266,6 +266,42 @@ The merge strategy values (2 for the size multiple, 64,000 for the maximum number of commits) could be extracted into config settings for full flexibility. +## Chains across multiple object directories + +In a repo with alternates, we look for the `commit-graph-chain` file starting +in the local object directory and then in each alternate. The first file that +exists defines our chain. As we look for the `graph-{hash}` files for +each `{hash}` in the chain file, we follow the same pattern for the host +directories. + +This allows commit-graphs to be split across multiple forks in a fork network. +The typical case is a large "base" repo with many smaller forks. + +As the base repo advances, it will likely update and merge its commit-graph +chain more frequently than the forks. If a fork updates their commit-graph after +the base repo, then it should "reparent" the commit-graph chain onto the new +chain in the base repo. When reading each `graph-{hash}` file, we track +the object directory containing it. During a write of a new commit-graph file, +we check for any changes in the source object directory and read the +`commit-graph-chain` file for that source and create a new file based on those +files. During this "reparent" operation, we necessarily need to collapse all +levels in the fork, as all of the files are invalid against the new base file. + +It is crucial to be careful when cleaning up "unreferenced" `graph-{hash}.graph` +files in this scenario. It falls to the user to define the proper settings for +their custom environment: + + 1. When merging levels in the base repo, the unreferenced files may still be + referenced by chains from fork repos. + + 2. The expiry time should be set to a length of time such that every fork has + time to recompute their commit-graph chain to "reparent" onto the new base + file(s). + + 3. If the commit-graph chain is updated in the base, the fork will not have + access to the new chain until its chain is updated to reference those files. + (This may change in the future [5].) + Related Links ------------- [0] https://bugs.chromium.org/p/git/issues/detail?id=8 @@ -292,3 +328,7 @@ Related Links [4] https://public-inbox.org/git/20180108154822.54829-1-git@jeffhostetler.com/T/#u A patch to remove the ahead-behind calculation from 'status'. + +[5] https://public-inbox.org/git/f27db281-abad-5043-6d71-cbb083b1c877@gmail.com/ + A discussion of a "two-dimensional graph position" that can allow reading + multiple commit-graph chains at the same time. diff --git a/commit-graph.c b/commit-graph.c index fb3100921c..fba705bc51 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -320,6 +320,9 @@ static struct commit_graph *load_commit_graph_v1(struct repository *r, const cha struct commit_graph *g = load_commit_graph_one(graph_name); free(graph_name); + if (g) + g->obj_dir = obj_dir; + return g; } @@ -379,9 +382,10 @@ static struct commit_graph *load_commit_graph_chain(struct repository *r, const count = st.st_size / (the_hash_algo->hexsz + 1); oids = xcalloc(count, sizeof(struct object_id)); - for (i = 0; i < count && valid; i++) { - char *graph_name; - struct commit_graph *g; + prepare_alt_odb(r); + + for (i = 0; i < count; i++) { + struct object_directory *odb; if (strbuf_getline_lf(&line, fp) == EOF) break; @@ -393,14 +397,29 @@ static struct commit_graph *load_commit_graph_chain(struct repository *r, const break; } - graph_name = get_split_graph_filename(obj_dir, line.buf); - g = load_commit_graph_one(graph_name); - free(graph_name); + valid = 0; + for (odb = r->objects->odb; odb; odb = odb->next) { + char *graph_name = get_split_graph_filename(odb->path, line.buf); + struct commit_graph *g = load_commit_graph_one(graph_name); - if (g && add_graph_to_chain(g, graph_chain, oids, i)) - graph_chain = g; - else - valid = 0; + free(graph_name); + + if (g) { + g->obj_dir = odb->path; + + if (add_graph_to_chain(g, graph_chain, oids, i)) { + graph_chain = g; + valid = 1; + } + + break; + } + } + + if (!valid) { + warning(_("unable to find all commit-graph files")); + break; + } } free(oids); @@ -1418,7 +1437,7 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) if (ctx->split && ctx->base_graph_name && ctx->num_commit_graphs_after > 1) { char *new_base_hash = xstrdup(oid_to_hex(&ctx->new_base_graph->oid)); - char *new_base_name = get_split_graph_filename(ctx->obj_dir, new_base_hash); + char *new_base_name = get_split_graph_filename(ctx->new_base_graph->obj_dir, new_base_hash); free(ctx->commit_graph_filenames_after[ctx->num_commit_graphs_after - 2]); free(ctx->commit_graph_hash_after[ctx->num_commit_graphs_after - 2]); @@ -1493,6 +1512,9 @@ static void split_graph_merge_strategy(struct write_commit_graph_context *ctx) while (g && (g->num_commits <= split_strategy_size_mult * num_commits || num_commits > split_strategy_max_commits)) { + if (strcmp(g->obj_dir, ctx->obj_dir)) + break; + num_commits += g->num_commits; g = g->base_graph; @@ -1501,6 +1523,18 @@ static void split_graph_merge_strategy(struct write_commit_graph_context *ctx) ctx->new_base_graph = g; + if (ctx->num_commit_graphs_after == 2) { + char *old_graph_name = get_commit_graph_filename(g->obj_dir); + + if (!strcmp(g->filename, old_graph_name) && + strcmp(g->obj_dir, ctx->obj_dir)) { + ctx->num_commit_graphs_after = 1; + ctx->new_base_graph = NULL; + } + + free(old_graph_name); + } + ALLOC_ARRAY(ctx->commit_graph_filenames_after, ctx->num_commit_graphs_after); ALLOC_ARRAY(ctx->commit_graph_hash_after, ctx->num_commit_graphs_after); diff --git a/commit-graph.h b/commit-graph.h index c321834533..802d35254f 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -48,6 +48,7 @@ struct commit_graph { uint32_t num_commits; struct object_id oid; char *filename; + const char *obj_dir; uint32_t num_commits_in_base; struct commit_graph *base_graph; diff --git a/t/t5324-split-commit-graph.sh b/t/t5324-split-commit-graph.sh index 5cb5663a30..46f0832f68 100755 --- a/t/t5324-split-commit-graph.sh +++ b/t/t5324-split-commit-graph.sh @@ -90,6 +90,21 @@ test_expect_success 'add more commits, and write a new base graph' ' graph_read_expect 12 ' +test_expect_success 'fork and fail to base a chain on a commit-graph file' ' + test_when_finished rm -rf fork && + git clone . fork && + ( + cd fork && + rm .git/objects/info/commit-graph && + echo "$(pwd)/../.git/objects" >.git/objects/info/alternates && + test_commit new-commit && + git commit-graph write --reachable --split && + test_path_is_file $graphdir/commit-graph-chain && + test_line_count = 1 $graphdir/commit-graph-chain && + verify_chain_files_exist $graphdir + ) +' + test_expect_success 'add three more commits, write a tip graph' ' git reset --hard commits/3 && git merge merge/1 && @@ -132,4 +147,26 @@ test_expect_success 'add one commit, write a merged graph' ' graph_git_behavior 'merged commit-graph: commit 12 vs 6' commits/12 commits/6 +test_expect_success 'create fork and chain across alternate' ' + git clone . fork && + ( + cd fork && + git config core.commitGraph true && + rm -rf $graphdir && + echo "$(pwd)/../.git/objects" >.git/objects/info/alternates && + test_commit 13 && + git branch commits/13 && + git commit-graph write --reachable --split && + test_path_is_file $graphdir/commit-graph-chain && + test_line_count = 3 $graphdir/commit-graph-chain && + ls $graphdir/graph-*.graph >graph-files && + test_line_count = 1 graph-files && + git -c core.commitGraph=true rev-list HEAD >expect && + git -c core.commitGraph=false rev-list HEAD >actual && + test_cmp expect actual + ) +' + +graph_git_behavior 'alternate: commit 13 vs 6' commits/13 commits/6 + test_done -- cgit v1.2.3 From 8d84097f9655a954ea7e94ba08d93e5940ccb2ae Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 18 Jun 2019 11:14:31 -0700 Subject: commit-graph: expire commit-graph files As we merge commit-graph files in a commit-graph chain, we should clean up the files that are no longer used. This change introduces an 'expiry_window' value to the context, which is always zero (for now). We then check the modified time of each graph-{hash}.graph file in the $OBJDIR/info/commit-graphs folder and unlink the files that are older than the expiry_window. Since this is always zero, this immediately clears all unused graph files. We will update the value to match a config setting in a future change. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/technical/commit-graph.txt | 15 +++++++ commit-graph.c | 69 ++++++++++++++++++++++++++++++++ t/t5324-split-commit-graph.sh | 2 +- 3 files changed, 85 insertions(+), 1 deletion(-) (limited to 'commit-graph.c') diff --git a/Documentation/technical/commit-graph.txt b/Documentation/technical/commit-graph.txt index 473032e476..aed4350a59 100644 --- a/Documentation/technical/commit-graph.txt +++ b/Documentation/technical/commit-graph.txt @@ -266,6 +266,21 @@ The merge strategy values (2 for the size multiple, 64,000 for the maximum number of commits) could be extracted into config settings for full flexibility. +## Deleting graph-{hash} files + +After a new tip file is written, some `graph-{hash}` files may no longer +be part of a chain. It is important to remove these files from disk, eventually. +The main reason to delay removal is that another process could read the +`commit-graph-chain` file before it is rewritten, but then look for the +`graph-{hash}` files after they are deleted. + +To allow holding old split commit-graphs for a while after they are unreferenced, +we update the modified times of the files when they become unreferenced. Then, +we scan the `$OBJDIR/info/commit-graphs/` directory for `graph-{hash}` +files whose modified times are older than a given expiry window. This window +defaults to zero, but can be changed using command-line arguments or a config +setting. + ## Chains across multiple object directories In a repo with alternates, we look for the `commit-graph-chain` file starting diff --git a/commit-graph.c b/commit-graph.c index fba705bc51..0cc2ceb349 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1652,6 +1652,70 @@ static void merge_commit_graphs(struct write_commit_graph_context *ctx) sort_and_scan_merged_commits(ctx); } +static void mark_commit_graphs(struct write_commit_graph_context *ctx) +{ + uint32_t i; + time_t now = time(NULL); + + for (i = ctx->num_commit_graphs_after - 1; i < ctx->num_commit_graphs_before; i++) { + struct stat st; + struct utimbuf updated_time; + + stat(ctx->commit_graph_filenames_before[i], &st); + + updated_time.actime = st.st_atime; + updated_time.modtime = now; + utime(ctx->commit_graph_filenames_before[i], &updated_time); + } +} + +static void expire_commit_graphs(struct write_commit_graph_context *ctx) +{ + struct strbuf path = STRBUF_INIT; + DIR *dir; + struct dirent *de; + size_t dirnamelen; + time_t expire_time = time(NULL); + + strbuf_addstr(&path, ctx->obj_dir); + strbuf_addstr(&path, "/info/commit-graphs"); + dir = opendir(path.buf); + + if (!dir) { + strbuf_release(&path); + return; + } + + strbuf_addch(&path, '/'); + dirnamelen = path.len; + while ((de = readdir(dir)) != NULL) { + struct stat st; + uint32_t i, found = 0; + + strbuf_setlen(&path, dirnamelen); + strbuf_addstr(&path, de->d_name); + + stat(path.buf, &st); + + if (st.st_mtime > expire_time) + continue; + if (path.len < 6 || strcmp(path.buf + path.len - 6, ".graph")) + continue; + + for (i = 0; i < ctx->num_commit_graphs_after; i++) { + if (!strcmp(ctx->commit_graph_filenames_after[i], + path.buf)) { + found = 1; + break; + } + } + + if (!found) + unlink(path.buf); + + } +} + int write_commit_graph(const char *obj_dir, struct string_list *pack_indexes, struct string_list *commit_hex, @@ -1764,6 +1828,11 @@ int write_commit_graph(const char *obj_dir, res = write_commit_graph_file(ctx); + if (ctx->split) { + mark_commit_graphs(ctx); + expire_commit_graphs(ctx); + } + cleanup: free(ctx->graph_name); free(ctx->commits.list); diff --git a/t/t5324-split-commit-graph.sh b/t/t5324-split-commit-graph.sh index 46f0832f68..76068ee407 100755 --- a/t/t5324-split-commit-graph.sh +++ b/t/t5324-split-commit-graph.sh @@ -141,7 +141,7 @@ test_expect_success 'add one commit, write a merged graph' ' test_path_is_file $graphdir/commit-graph-chain && test_line_count = 2 $graphdir/commit-graph-chain && ls $graphdir/graph-*.graph >graph-files && - test_line_count = 4 graph-files && + test_line_count = 2 graph-files && verify_chain_files_exist $graphdir ' -- cgit v1.2.3 From c2bc6e6ab0ade70c475a73d06326e677e70840d2 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 18 Jun 2019 11:14:32 -0700 Subject: commit-graph: create options for split files The split commit-graph feature is now fully implemented, but needs some more run-time configurability. Allow direct callers to 'git commit-graph write --split' to specify the values used in the merge strategy and the expire time. Update the documentation to specify these values. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/git-commit-graph.txt | 21 +++++++++++++- Documentation/technical/commit-graph.txt | 7 +++-- builtin/commit-graph.c | 25 +++++++++++++---- builtin/commit.c | 2 +- builtin/gc.c | 3 +- commit-graph.c | 35 ++++++++++++++++-------- commit-graph.h | 12 ++++++-- t/t5324-split-commit-graph.sh | 47 ++++++++++++++++++++++++++++++++ 8 files changed, 128 insertions(+), 24 deletions(-) (limited to 'commit-graph.c') diff --git a/Documentation/git-commit-graph.txt b/Documentation/git-commit-graph.txt index 624470e198..365e145e82 100644 --- a/Documentation/git-commit-graph.txt +++ b/Documentation/git-commit-graph.txt @@ -26,7 +26,7 @@ OPTIONS Use given directory for the location of packfiles and commit-graph file. This parameter exists to specify the location of an alternate that only has the objects directory, not a full `.git` directory. The - commit-graph file is expected to be at `/info/commit-graph` and + commit-graph file is expected to be in the `/info` directory and the packfiles are expected to be in `/pack`. @@ -51,6 +51,25 @@ or `--stdin-packs`.) + With the `--append` option, include all commits that are present in the existing commit-graph file. ++ +With the `--split` option, write the commit-graph as a chain of multiple +commit-graph files stored in `/info/commit-graphs`. The new commits +not already in the commit-graph are added in a new "tip" file. This file +is merged with the existing file if the following merge conditions are +met: ++ +* If `--size-multiple=` is not specified, let `X` equal 2. If the new +tip file would have `N` commits and the previous tip has `M` commits and +`X` times `N` is greater than `M`, instead merge the two files into a +single file. ++ +* If `--max-commits=` is specified with `M` a positive integer, and the +new tip file would have more than `M` commits, then instead merge the new +tip with the previous tip. ++ +Finally, if `--expire-time=` is not specified, let `datetime` +be the current time. After writing the split commit-graph, delete all +unused commit-graph whose modified times are older than `datetime`. 'read':: diff --git a/Documentation/technical/commit-graph.txt b/Documentation/technical/commit-graph.txt index aed4350a59..729fbcb32f 100644 --- a/Documentation/technical/commit-graph.txt +++ b/Documentation/technical/commit-graph.txt @@ -248,10 +248,11 @@ When writing a set of commits that do not exist in the commit-graph stack of height N, we default to creating a new file at level N + 1. We then decide to merge with the Nth level if one of two conditions hold: - 1. The expected file size for level N + 1 is at least half the file size for - level N. + 1. `--size-multiple=` is specified or X = 2, and the number of commits in + level N is less than X times the number of commits in level N + 1. - 2. Level N + 1 contains more than 64,0000 commits. + 2. `--max-commits=` is specified with non-zero C and the number of commits + in level N + 1 is more than C commits. This decision cascades down the levels: when we merge a level we create a new set of commits that then compares to the next level. diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c index ff8bc3c8b9..6c0b5b17e0 100644 --- a/builtin/commit-graph.c +++ b/builtin/commit-graph.c @@ -10,7 +10,7 @@ static char const * const builtin_commit_graph_usage[] = { N_("git commit-graph [--object-dir ]"), N_("git commit-graph read [--object-dir ]"), N_("git commit-graph verify [--object-dir ]"), - N_("git commit-graph write [--object-dir ] [--append|--split] [--reachable|--stdin-packs|--stdin-commits]"), + N_("git commit-graph write [--object-dir ] [--append|--split] [--reachable|--stdin-packs|--stdin-commits] "), NULL }; @@ -25,7 +25,7 @@ static const char * const builtin_commit_graph_read_usage[] = { }; static const char * const builtin_commit_graph_write_usage[] = { - N_("git commit-graph write [--object-dir ] [--append|--split] [--reachable|--stdin-packs|--stdin-commits]"), + N_("git commit-graph write [--object-dir ] [--append|--split] [--reachable|--stdin-packs|--stdin-commits] "), NULL }; @@ -135,6 +135,7 @@ static int graph_read(int argc, const char **argv) } extern int read_replace_refs; +static struct split_commit_graph_opts split_opts; static int graph_write(int argc, const char **argv) { @@ -158,9 +159,19 @@ static int graph_write(int argc, const char **argv) N_("include all commits already in the commit-graph file")), OPT_BOOL(0, "split", &opts.split, N_("allow writing an incremental commit-graph file")), + OPT_INTEGER(0, "max-commits", &split_opts.max_commits, + N_("maximum number of commits in a non-base split commit-graph")), + OPT_INTEGER(0, "size-multiple", &split_opts.size_multiple, + N_("maximum ratio between two levels of a split commit-graph")), + OPT_EXPIRY_DATE(0, "expire-time", &split_opts.expire_time, + N_("maximum number of commits in a non-base split commit-graph")), OPT_END(), }; + split_opts.size_multiple = 2; + split_opts.max_commits = 0; + split_opts.expire_time = 0; + argc = parse_options(argc, argv, NULL, builtin_commit_graph_write_options, builtin_commit_graph_write_usage, 0); @@ -176,8 +187,11 @@ static int graph_write(int argc, const char **argv) read_replace_refs = 0; - if (opts.reachable) - return write_commit_graph_reachable(opts.obj_dir, flags); + if (opts.reachable) { + if (write_commit_graph_reachable(opts.obj_dir, flags, &split_opts)) + return 1; + return 0; + } string_list_init(&lines, 0); if (opts.stdin_packs || opts.stdin_commits) { @@ -197,7 +211,8 @@ static int graph_write(int argc, const char **argv) if (write_commit_graph(opts.obj_dir, pack_indexes, commit_hex, - flags)) + flags, + &split_opts)) result = 1; UNLEAK(lines); diff --git a/builtin/commit.c b/builtin/commit.c index b001ef565d..9216e9c043 100644 --- a/builtin/commit.c +++ b/builtin/commit.c @@ -1670,7 +1670,7 @@ int cmd_commit(int argc, const char **argv, const char *prefix) "not exceeded, and then \"git reset HEAD\" to recover.")); if (git_env_bool(GIT_TEST_COMMIT_GRAPH, 0) && - write_commit_graph_reachable(get_object_directory(), 0)) + write_commit_graph_reachable(get_object_directory(), 0, NULL)) return 1; repo_rerere(the_repository, 0); diff --git a/builtin/gc.c b/builtin/gc.c index 20c8f1bfe8..0aa8eac747 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -666,7 +666,8 @@ int cmd_gc(int argc, const char **argv, const char *prefix) if (gc_write_commit_graph && write_commit_graph_reachable(get_object_directory(), - !quiet && !daemonized ? COMMIT_GRAPH_PROGRESS : 0)) + !quiet && !daemonized ? COMMIT_GRAPH_PROGRESS : 0, + NULL)) return 1; if (auto_gc && too_many_loose_objects()) diff --git a/commit-graph.c b/commit-graph.c index 0cc2ceb349..315088d205 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -768,6 +768,8 @@ struct write_commit_graph_context { unsigned append:1, report_progress:1, split:1; + + const struct split_commit_graph_opts *split_opts; }; static void write_graph_chunk_fanout(struct hashfile *f, @@ -1116,14 +1118,15 @@ static int add_ref_to_list(const char *refname, return 0; } -int write_commit_graph_reachable(const char *obj_dir, unsigned int flags) +int write_commit_graph_reachable(const char *obj_dir, unsigned int flags, + const struct split_commit_graph_opts *split_opts) { struct string_list list = STRING_LIST_INIT_DUP; int result; for_each_ref(add_ref_to_list, &list); result = write_commit_graph(obj_dir, NULL, &list, - flags); + flags, split_opts); string_list_clear(&list, 0); return result; @@ -1498,20 +1501,25 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) return 0; } -static int split_strategy_max_commits = 64000; -static float split_strategy_size_mult = 2.0f; - static void split_graph_merge_strategy(struct write_commit_graph_context *ctx) { struct commit_graph *g = ctx->r->objects->commit_graph; uint32_t num_commits = ctx->commits.nr; uint32_t i; + int max_commits = 0; + int size_mult = 2; + + if (ctx->split_opts) { + max_commits = ctx->split_opts->max_commits; + size_mult = ctx->split_opts->size_multiple; + } + g = ctx->r->objects->commit_graph; ctx->num_commit_graphs_after = ctx->num_commit_graphs_before + 1; - while (g && (g->num_commits <= split_strategy_size_mult * num_commits || - num_commits > split_strategy_max_commits)) { + while (g && (g->num_commits <= size_mult * num_commits || + (max_commits && num_commits > max_commits))) { if (strcmp(g->obj_dir, ctx->obj_dir)) break; @@ -1675,7 +1683,10 @@ static void expire_commit_graphs(struct write_commit_graph_context *ctx) DIR *dir; struct dirent *de; size_t dirnamelen; - time_t expire_time = time(NULL); + timestamp_t expire_time = time(NULL); + + if (ctx->split_opts && ctx->split_opts->expire_time) + expire_time -= ctx->split_opts->expire_time; strbuf_addstr(&path, ctx->obj_dir); strbuf_addstr(&path, "/info/commit-graphs"); @@ -1719,7 +1730,8 @@ static void expire_commit_graphs(struct write_commit_graph_context *ctx) int write_commit_graph(const char *obj_dir, struct string_list *pack_indexes, struct string_list *commit_hex, - unsigned int flags) + unsigned int flags, + const struct split_commit_graph_opts *split_opts) { struct write_commit_graph_context *ctx; uint32_t i, count_distinct = 0; @@ -1734,6 +1746,7 @@ int write_commit_graph(const char *obj_dir, ctx->append = flags & COMMIT_GRAPH_APPEND ? 1 : 0; ctx->report_progress = flags & COMMIT_GRAPH_PROGRESS ? 1 : 0; ctx->split = flags & COMMIT_GRAPH_SPLIT ? 1 : 0; + ctx->split_opts = split_opts; if (ctx->split) { struct commit_graph *g; @@ -1761,8 +1774,8 @@ int write_commit_graph(const char *obj_dir, ctx->approx_nr_objects = approximate_object_count(); ctx->oids.alloc = ctx->approx_nr_objects / 32; - if (ctx->split && ctx->oids.alloc > split_strategy_max_commits) - ctx->oids.alloc = split_strategy_max_commits; + if (ctx->split && split_opts && ctx->oids.alloc > split_opts->max_commits) + ctx->oids.alloc = split_opts->max_commits; if (ctx->append) { prepare_commit_graph_one(ctx->r, ctx->obj_dir); diff --git a/commit-graph.h b/commit-graph.h index 802d35254f..a84c22a560 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -75,17 +75,25 @@ int generation_numbers_enabled(struct repository *r); #define COMMIT_GRAPH_PROGRESS (1 << 1) #define COMMIT_GRAPH_SPLIT (1 << 2) +struct split_commit_graph_opts { + int size_multiple; + int max_commits; + timestamp_t expire_time; +}; + /* * The write_commit_graph* methods return zero on success * and a negative value on failure. Note that if the repository * is not compatible with the commit-graph feature, then the * methods will return 0 without writing a commit-graph. */ -int write_commit_graph_reachable(const char *obj_dir, unsigned int flags); +int write_commit_graph_reachable(const char *obj_dir, unsigned int flags, + const struct split_commit_graph_opts *split_opts); int write_commit_graph(const char *obj_dir, struct string_list *pack_indexes, struct string_list *commit_hex, - unsigned int flags); + unsigned int flags, + const struct split_commit_graph_opts *split_opts); int verify_commit_graph(struct repository *r, struct commit_graph *g); diff --git a/t/t5324-split-commit-graph.sh b/t/t5324-split-commit-graph.sh index 76068ee407..1b699a543c 100755 --- a/t/t5324-split-commit-graph.sh +++ b/t/t5324-split-commit-graph.sh @@ -169,4 +169,51 @@ test_expect_success 'create fork and chain across alternate' ' graph_git_behavior 'alternate: commit 13 vs 6' commits/13 commits/6 +test_expect_success 'test merge stragety constants' ' + git clone . merge-2 && + ( + cd merge-2 && + git config core.commitGraph true && + test_line_count = 2 $graphdir/commit-graph-chain && + test_commit 14 && + git commit-graph write --reachable --split --size-multiple=2 && + test_line_count = 3 $graphdir/commit-graph-chain + + ) && + git clone . merge-10 && + ( + cd merge-10 && + git config core.commitGraph true && + test_line_count = 2 $graphdir/commit-graph-chain && + test_commit 14 && + git commit-graph write --reachable --split --size-multiple=10 && + test_line_count = 1 $graphdir/commit-graph-chain && + ls $graphdir/graph-*.graph >graph-files && + test_line_count = 1 graph-files + ) && + git clone . merge-10-expire && + ( + cd merge-10-expire && + git config core.commitGraph true && + test_line_count = 2 $graphdir/commit-graph-chain && + test_commit 15 && + git commit-graph write --reachable --split --size-multiple=10 --expire-time=1980-01-01 && + test_line_count = 1 $graphdir/commit-graph-chain && + ls $graphdir/graph-*.graph >graph-files && + test_line_count = 3 graph-files + ) && + git clone --no-hardlinks . max-commits && + ( + cd max-commits && + git config core.commitGraph true && + test_line_count = 2 $graphdir/commit-graph-chain && + test_commit 16 && + test_commit 17 && + git commit-graph write --reachable --split --max-commits=1 && + test_line_count = 1 $graphdir/commit-graph-chain && + ls $graphdir/graph-*.graph >graph-files && + test_line_count = 1 graph-files + ) +' + test_done -- cgit v1.2.3 From 3da4b609bb14b13672f64af908706462617f53cb Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 18 Jun 2019 11:14:32 -0700 Subject: commit-graph: verify chains with --shallow mode If we wrote a commit-graph chain, we only modified the tip file in the chain. It is valuable to verify what we wrote, but not waste time checking files we did not write. Add a '--shallow' option to the 'git commit-graph verify' subcommand and check that it does not read the base graph in a two-file chain. Making the verify subcommand read from a chain of commit-graphs takes some rearranging of the builtin code. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/git-commit-graph.txt | 5 ++- builtin/commit-graph.c | 27 ++++++++++++----- commit-graph.c | 15 +++++++-- commit-graph.h | 6 ++-- t/t5324-split-commit-graph.sh | 62 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 101 insertions(+), 14 deletions(-) (limited to 'commit-graph.c') diff --git a/Documentation/git-commit-graph.txt b/Documentation/git-commit-graph.txt index 365e145e82..eb5e7865f0 100644 --- a/Documentation/git-commit-graph.txt +++ b/Documentation/git-commit-graph.txt @@ -10,7 +10,7 @@ SYNOPSIS -------- [verse] 'git commit-graph read' [--object-dir ] -'git commit-graph verify' [--object-dir ] +'git commit-graph verify' [--object-dir ] [--shallow] 'git commit-graph write' [--object-dir ] @@ -80,6 +80,9 @@ Used for debugging purposes. Read the commit-graph file and verify its contents against the object database. Used to check for corrupted data. ++ +With the `--shallow` option, only check the tip commit-graph file in +a chain of split commit-graphs. EXAMPLES diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c index 6c0b5b17e0..38027b83d9 100644 --- a/builtin/commit-graph.c +++ b/builtin/commit-graph.c @@ -5,17 +5,18 @@ #include "parse-options.h" #include "repository.h" #include "commit-graph.h" +#include "object-store.h" static char const * const builtin_commit_graph_usage[] = { N_("git commit-graph [--object-dir ]"), N_("git commit-graph read [--object-dir ]"), - N_("git commit-graph verify [--object-dir ]"), + N_("git commit-graph verify [--object-dir ] [--shallow]"), N_("git commit-graph write [--object-dir ] [--append|--split] [--reachable|--stdin-packs|--stdin-commits] "), NULL }; static const char * const builtin_commit_graph_verify_usage[] = { - N_("git commit-graph verify [--object-dir ]"), + N_("git commit-graph verify [--object-dir ] [--shallow]"), NULL }; @@ -36,6 +37,7 @@ static struct opts_commit_graph { int stdin_commits; int append; int split; + int shallow; } opts; static int graph_verify(int argc, const char **argv) @@ -45,11 +47,14 @@ static int graph_verify(int argc, const char **argv) int open_ok; int fd; struct stat st; + int flags = 0; static struct option builtin_commit_graph_verify_options[] = { OPT_STRING(0, "object-dir", &opts.obj_dir, N_("dir"), N_("The object directory to store the graph")), + OPT_BOOL(0, "shallow", &opts.shallow, + N_("if the commit-graph is split, only verify the tip file")), OPT_END(), }; @@ -59,21 +64,27 @@ static int graph_verify(int argc, const char **argv) if (!opts.obj_dir) opts.obj_dir = get_object_directory(); + if (opts.shallow) + flags |= COMMIT_GRAPH_VERIFY_SHALLOW; graph_name = get_commit_graph_filename(opts.obj_dir); open_ok = open_commit_graph(graph_name, &fd, &st); - if (!open_ok && errno == ENOENT) - return 0; - if (!open_ok) + if (!open_ok && errno != ENOENT) die_errno(_("Could not open commit-graph '%s'"), graph_name); - graph = load_commit_graph_one_fd_st(fd, &st); + FREE_AND_NULL(graph_name); + if (open_ok) + graph = load_commit_graph_one_fd_st(fd, &st); + else + graph = read_commit_graph_one(the_repository, opts.obj_dir); + + /* Return failure if open_ok predicted success */ if (!graph) - return 1; + return !!open_ok; UNLEAK(graph); - return verify_commit_graph(the_repository, graph); + return verify_commit_graph(the_repository, graph, flags); } static int graph_read(int argc, const char **argv) diff --git a/commit-graph.c b/commit-graph.c index 315088d205..f33f4fe009 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -428,7 +428,7 @@ static struct commit_graph *load_commit_graph_chain(struct repository *r, const return graph_chain; } -static struct commit_graph *read_commit_graph_one(struct repository *r, const char *obj_dir) +struct commit_graph *read_commit_graph_one(struct repository *r, const char *obj_dir) { struct commit_graph *g = load_commit_graph_v1(r, obj_dir); @@ -1887,7 +1887,7 @@ static void graph_report(const char *fmt, ...) #define GENERATION_ZERO_EXISTS 1 #define GENERATION_NUMBER_EXISTS 2 -int verify_commit_graph(struct repository *r, struct commit_graph *g) +int verify_commit_graph(struct repository *r, struct commit_graph *g, int flags) { uint32_t i, cur_fanout_pos = 0; struct object_id prev_oid, cur_oid, checksum; @@ -1895,6 +1895,7 @@ int verify_commit_graph(struct repository *r, struct commit_graph *g) struct hashfile *f; int devnull; struct progress *progress = NULL; + int local_error = 0; if (!g) { graph_report("no commit-graph file loaded"); @@ -1989,6 +1990,9 @@ int verify_commit_graph(struct repository *r, struct commit_graph *g) break; } + /* parse parent in case it is in a base graph */ + parse_commit_in_graph_one(r, g, graph_parents->item); + if (!oideq(&graph_parents->item->object.oid, &odb_parents->item->object.oid)) graph_report(_("commit-graph parent for %s is %s != %s"), oid_to_hex(&cur_oid), @@ -2040,7 +2044,12 @@ int verify_commit_graph(struct repository *r, struct commit_graph *g) } stop_progress(&progress); - return verify_commit_graph_error; + local_error = verify_commit_graph_error; + + if (!(flags & COMMIT_GRAPH_VERIFY_SHALLOW) && g->base_graph) + local_error |= verify_commit_graph(r, g->base_graph, flags); + + return local_error; } void free_commit_graph(struct commit_graph *g) diff --git a/commit-graph.h b/commit-graph.h index a84c22a560..df9a3b20e4 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -61,7 +61,7 @@ struct commit_graph { }; struct commit_graph *load_commit_graph_one_fd_st(int fd, struct stat *st); - +struct commit_graph *read_commit_graph_one(struct repository *r, const char *obj_dir); struct commit_graph *parse_commit_graph(void *graph_map, int fd, size_t graph_size); @@ -95,7 +95,9 @@ int write_commit_graph(const char *obj_dir, unsigned int flags, const struct split_commit_graph_opts *split_opts); -int verify_commit_graph(struct repository *r, struct commit_graph *g); +#define COMMIT_GRAPH_VERIFY_SHALLOW (1 << 0) + +int verify_commit_graph(struct repository *r, struct commit_graph *g, int flags); void close_commit_graph(struct raw_object_store *); void free_commit_graph(struct commit_graph *); diff --git a/t/t5324-split-commit-graph.sh b/t/t5324-split-commit-graph.sh index 1b699a543c..3df90ae58f 100755 --- a/t/t5324-split-commit-graph.sh +++ b/t/t5324-split-commit-graph.sh @@ -216,4 +216,66 @@ test_expect_success 'test merge stragety constants' ' ) ' +corrupt_file() { + file=$1 + pos=$2 + data="${3:-\0}" + printf "$data" | dd of="$file" bs=1 seek="$pos" conv=notrunc +} + +test_expect_success 'verify hashes along chain, even in shallow' ' + git clone --no-hardlinks . verify && + ( + cd verify && + git commit-graph verify && + base_file=$graphdir/graph-$(head -n 1 $graphdir/commit-graph-chain).graph && + corrupt_file "$base_file" 1760 "\01" && + test_must_fail git commit-graph verify --shallow 2>test_err && + grep -v "^+" test_err >err && + test_i18ngrep "incorrect checksum" err + ) +' + +test_expect_success 'verify --shallow does not check base contents' ' + git clone --no-hardlinks . verify-shallow && + ( + cd verify-shallow && + git commit-graph verify && + base_file=$graphdir/graph-$(head -n 1 $graphdir/commit-graph-chain).graph && + corrupt_file "$base_file" 1000 "\01" && + git commit-graph verify --shallow && + test_must_fail git commit-graph verify 2>test_err && + grep -v "^+" test_err >err && + test_i18ngrep "incorrect checksum" err + ) +' + +test_expect_success 'warn on base graph chunk incorrect' ' + git clone --no-hardlinks . base-chunk && + ( + cd base-chunk && + git commit-graph verify && + base_file=$graphdir/graph-$(tail -n 1 $graphdir/commit-graph-chain).graph && + corrupt_file "$base_file" 1376 "\01" && + git commit-graph verify --shallow 2>test_err && + grep -v "^+" test_err >err && + test_i18ngrep "commit-graph chain does not match" err + ) +' + +test_expect_success 'verify after commit-graph-chain corruption' ' + git clone --no-hardlinks . verify-chain && + ( + cd verify-chain && + corrupt_file "$graphdir/commit-graph-chain" 60 "G" && + git commit-graph verify 2>test_err && + grep -v "^+" test_err >err && + test_i18ngrep "invalid commit-graph chain" err && + corrupt_file "$graphdir/commit-graph-chain" 60 "A" && + git commit-graph verify 2>test_err && + grep -v "^+" test_err >err && + test_i18ngrep "unable to find all commit-graph files" err + ) +' + test_done -- cgit v1.2.3 From ba41112a6331e8b454cac6cfd3ee67ae93957a2c Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 18 Jun 2019 11:14:33 -0700 Subject: commit-graph: clean up chains after flattened write If we write a commit-graph file without the split option, then we write to $OBJDIR/info/commit-graph and start to ignore the chains in $OBJDIR/info/commit-graphs/. Unlink the commit-graph-chain file and expire the graph-{hash}.graph files in $OBJDIR/info/commit-graphs/ during every write. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-graph.c | 12 +++++++++--- t/t5324-split-commit-graph.sh | 12 ++++++++++++ 2 files changed, 21 insertions(+), 3 deletions(-) (limited to 'commit-graph.c') diff --git a/commit-graph.c b/commit-graph.c index f33f4fe009..3599ae664d 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1687,6 +1687,12 @@ static void expire_commit_graphs(struct write_commit_graph_context *ctx) if (ctx->split_opts && ctx->split_opts->expire_time) expire_time -= ctx->split_opts->expire_time; + if (!ctx->split) { + char *chain_file_name = get_chain_filename(ctx->obj_dir); + unlink(chain_file_name); + free(chain_file_name); + ctx->num_commit_graphs_after = 0; + } strbuf_addstr(&path, ctx->obj_dir); strbuf_addstr(&path, "/info/commit-graphs"); @@ -1841,10 +1847,10 @@ int write_commit_graph(const char *obj_dir, res = write_commit_graph_file(ctx); - if (ctx->split) { + if (ctx->split) mark_commit_graphs(ctx); - expire_commit_graphs(ctx); - } + + expire_commit_graphs(ctx); cleanup: free(ctx->graph_name); diff --git a/t/t5324-split-commit-graph.sh b/t/t5324-split-commit-graph.sh index 3df90ae58f..e8df35c30b 100755 --- a/t/t5324-split-commit-graph.sh +++ b/t/t5324-split-commit-graph.sh @@ -216,6 +216,18 @@ test_expect_success 'test merge stragety constants' ' ) ' +test_expect_success 'remove commit-graph-chain file after flattening' ' + git clone . flatten && + ( + cd flatten && + test_line_count = 2 $graphdir/commit-graph-chain && + git commit-graph write --reachable && + test_path_is_missing $graphdir/commit-graph-chain && + ls $graphdir >graph-files && + test_line_count = 0 graph-files + ) +' + corrupt_file() { file=$1 pos=$2 -- cgit v1.2.3 From 16110c9348fc2022e78f2581968cf428c056dc5b Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 18 Jun 2019 11:14:36 -0700 Subject: commit-graph: normalize commit-graph filenames When writing commit-graph files, we append path data to an object directory, which may be specified by the user via the '--object-dir' option. If the user supplies a trailing slash, or some other alternative path format, the resulting path may be usable for writing to the correct location. However, when expiring graph files from the /info/commit-graphs directory during a write, we need to compare paths with exact string matches. Normalize the commit-graph filenames to avoid ambiguity. This creates extra allocations, but this is a constant multiple of the number of commit-graph files, which should be a number in the single digits. Further normalize the object directory in the context. Due to a comparison between g->obj_dir and ctx->obj_dir in split_graph_merge_strategy(), a trailing slash would prevent any merging of layers within the same object directory. The check is there to ensure we do not merge across alternates. Update the tests to include a case with this trailing slash problem. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-graph.c | 30 +++++++++++++++++++++++------- t/t5324-split-commit-graph.sh | 7 ++++++- 2 files changed, 29 insertions(+), 8 deletions(-) (limited to 'commit-graph.c') diff --git a/commit-graph.c b/commit-graph.c index 3599ae664d..c91e6f0fb8 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -43,15 +43,23 @@ char *get_commit_graph_filename(const char *obj_dir) { - return xstrfmt("%s/info/commit-graph", obj_dir); + char *filename = xstrfmt("%s/info/commit-graph", obj_dir); + char *normalized = xmalloc(strlen(filename) + 1); + normalize_path_copy(normalized, filename); + free(filename); + return normalized; } static char *get_split_graph_filename(const char *obj_dir, const char *oid_hex) { - return xstrfmt("%s/info/commit-graphs/graph-%s.graph", - obj_dir, - oid_hex); + char *filename = xstrfmt("%s/info/commit-graphs/graph-%s.graph", + obj_dir, + oid_hex); + char *normalized = xmalloc(strlen(filename) + 1); + normalize_path_copy(normalized, filename); + free(filename); + return normalized; } static char *get_chain_filename(const char *obj_dir) @@ -746,7 +754,7 @@ struct packed_oid_list { struct write_commit_graph_context { struct repository *r; - const char *obj_dir; + char *obj_dir; char *graph_name; struct packed_oid_list oids; struct packed_commit_list commits; @@ -1729,7 +1737,6 @@ static void expire_commit_graphs(struct write_commit_graph_context *ctx) if (!found) unlink(path.buf); - } } @@ -1741,6 +1748,7 @@ int write_commit_graph(const char *obj_dir, { struct write_commit_graph_context *ctx; uint32_t i, count_distinct = 0; + size_t len; int res = 0; if (!commit_graph_compatible(the_repository)) @@ -1748,7 +1756,14 @@ int write_commit_graph(const char *obj_dir, ctx = xcalloc(1, sizeof(struct write_commit_graph_context)); ctx->r = the_repository; - ctx->obj_dir = obj_dir; + + /* normalize object dir with no trailing slash */ + ctx->obj_dir = xmallocz(strlen(obj_dir) + 1); + normalize_path_copy(ctx->obj_dir, obj_dir); + len = strlen(ctx->obj_dir); + if (len && ctx->obj_dir[len - 1] == '/') + ctx->obj_dir[len - 1] = 0; + ctx->append = flags & COMMIT_GRAPH_APPEND ? 1 : 0; ctx->report_progress = flags & COMMIT_GRAPH_PROGRESS ? 1 : 0; ctx->split = flags & COMMIT_GRAPH_SPLIT ? 1 : 0; @@ -1856,6 +1871,7 @@ cleanup: free(ctx->graph_name); free(ctx->commits.list); free(ctx->oids.list); + free(ctx->obj_dir); if (ctx->commit_graph_filenames_after) { for (i = 0; i < ctx->num_commit_graphs_after; i++) { diff --git a/t/t5324-split-commit-graph.sh b/t/t5324-split-commit-graph.sh index 130f2baf44..fc0d00751c 100755 --- a/t/t5324-split-commit-graph.sh +++ b/t/t5324-split-commit-graph.sh @@ -163,7 +163,12 @@ test_expect_success 'create fork and chain across alternate' ' test_line_count = 1 graph-files && git -c core.commitGraph=true rev-list HEAD >expect && git -c core.commitGraph=false rev-list HEAD >actual && - test_cmp expect actual + test_cmp expect actual && + test_commit 14 && + git commit-graph write --reachable --split --object-dir=.git/objects/ && + test_line_count = 3 $graphdir/commit-graph-chain && + ls $graphdir/graph-*.graph >graph-files && + test_line_count = 1 graph-files ) ' -- cgit v1.2.3 From a378509e1c8d817b3abe42bd8b3c8aa2a6f9af8a Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 20 Jun 2019 03:41:21 -0400 Subject: object: convert create_object() to use object_id There are no callers left of create_object() that aren't just passing us the "hash" member of a "struct object_id". Let's take the whole struct, which gets us closer to removing all raw sha1 variables. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- blob.c | 3 +-- commit-graph.c | 2 +- commit.c | 3 +-- object.c | 6 +++--- object.h | 2 +- tag.c | 3 +-- tree.c | 3 +-- 7 files changed, 9 insertions(+), 13 deletions(-) (limited to 'commit-graph.c') diff --git a/blob.c b/blob.c index b9c7180b7c..36f9abda19 100644 --- a/blob.c +++ b/blob.c @@ -9,8 +9,7 @@ struct blob *lookup_blob(struct repository *r, const struct object_id *oid) { struct object *obj = lookup_object(r, oid); if (!obj) - return create_object(r, oid->hash, - alloc_blob_node(r)); + return create_object(r, oid, alloc_blob_node(r)); return object_as_type(r, obj, OBJ_BLOB, 0); } diff --git a/commit-graph.c b/commit-graph.c index 7c5e54875f..5a62131d68 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1214,7 +1214,7 @@ int verify_commit_graph(struct repository *r, struct commit_graph *g) hashcpy(cur_oid.hash, g->chunk_oid_lookup + g->hash_len * i); graph_commit = lookup_commit(r, &cur_oid); - odb_commit = (struct commit *)create_object(r, cur_oid.hash, alloc_commit_node(r)); + odb_commit = (struct commit *)create_object(r, &cur_oid, alloc_commit_node(r)); if (parse_commit_internal(odb_commit, 0, 0)) { graph_report(_("failed to parse commit %s from object database for commit-graph"), oid_to_hex(&cur_oid)); diff --git a/commit.c b/commit.c index f47c75afae..b71ac195d4 100644 --- a/commit.c +++ b/commit.c @@ -59,8 +59,7 @@ struct commit *lookup_commit(struct repository *r, const struct object_id *oid) { struct object *obj = lookup_object(r, oid); if (!obj) - return create_object(r, oid->hash, - alloc_commit_node(r)); + return create_object(r, oid, alloc_commit_node(r)); return object_as_type(r, obj, OBJ_COMMIT, 0); } diff --git a/object.c b/object.c index dbfdbe504d..317647da3e 100644 --- a/object.c +++ b/object.c @@ -141,13 +141,13 @@ static void grow_object_hash(struct repository *r) r->parsed_objects->obj_hash_size = new_hash_size; } -void *create_object(struct repository *r, const unsigned char *sha1, void *o) +void *create_object(struct repository *r, const struct object_id *oid, void *o) { struct object *obj = o; obj->parsed = 0; obj->flags = 0; - hashcpy(obj->oid.hash, sha1); + oidcpy(&obj->oid, oid); if (r->parsed_objects->obj_hash_size - 1 <= r->parsed_objects->nr_objs * 2) grow_object_hash(r); @@ -182,7 +182,7 @@ struct object *lookup_unknown_object(const struct object_id *oid) { struct object *obj = lookup_object(the_repository, oid); if (!obj) - obj = create_object(the_repository, oid->hash, + obj = create_object(the_repository, oid, alloc_object_node(the_repository)); return obj; } diff --git a/object.h b/object.h index 47301186a4..0120892bbd 100644 --- a/object.h +++ b/object.h @@ -118,7 +118,7 @@ struct object *get_indexed_object(unsigned int); */ struct object *lookup_object(struct repository *r, const struct object_id *oid); -void *create_object(struct repository *r, const unsigned char *sha1, void *obj); +void *create_object(struct repository *r, const struct object_id *oid, void *obj); void *object_as_type(struct repository *r, struct object *obj, enum object_type type, int quiet); diff --git a/tag.c b/tag.c index 3ae00ba1ab..5db870edb9 100644 --- a/tag.c +++ b/tag.c @@ -102,8 +102,7 @@ struct tag *lookup_tag(struct repository *r, const struct object_id *oid) { struct object *obj = lookup_object(r, oid); if (!obj) - return create_object(r, oid->hash, - alloc_tag_node(r)); + return create_object(r, oid, alloc_tag_node(r)); return object_as_type(r, obj, OBJ_TAG, 0); } diff --git a/tree.c b/tree.c index 0ebb8c5b02..4720945e6a 100644 --- a/tree.c +++ b/tree.c @@ -199,8 +199,7 @@ struct tree *lookup_tree(struct repository *r, const struct object_id *oid) { struct object *obj = lookup_object(r, oid); if (!obj) - return create_object(r, oid->hash, - alloc_tree_node(r)); + return create_object(r, oid, alloc_tree_node(r)); return object_as_type(r, obj, OBJ_TREE, 0); } -- cgit v1.2.3