From 891435d55da80ca3654b19834481205be6bdfe33 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 6 Apr 2018 19:09:32 +0000 Subject: treewide: rename tree to maybe_tree Using the commit-graph file to walk commit history removes the large cost of parsing commits during the walk. This exposes a performance issue: lookup_tree() takes a large portion of the computation time, even when Git never uses those trees. In anticipation of lazy-loading these trees, rename the 'tree' member of struct commit to 'maybe_tree'. This serves two purposes: it hints at the future role of possibly being NULL even if the commit has a valid tree, and it allows for unambiguous transformation from simple member access (i.e. commit->maybe_tree) to method access. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-graph.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'commit-graph.c') diff --git a/commit-graph.c b/commit-graph.c index 3ff8c84c0e..49c622c556 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -258,7 +258,7 @@ static int fill_commit_in_graph(struct commit *item, struct commit_graph *g, uin item->graph_pos = pos; hashcpy(oid.hash, commit_data); - item->tree = lookup_tree(&oid); + item->maybe_tree = lookup_tree(&oid); date_high = get_be32(commit_data + g->hash_len + 8) & 0x3; date_low = get_be32(commit_data + g->hash_len + 12); @@ -369,7 +369,7 @@ static void write_graph_chunk_data(struct hashfile *f, int hash_len, uint32_t packedDate[2]; parse_commit(*list); - hashwrite(f, (*list)->tree->object.oid.hash, hash_len); + hashwrite(f, (*list)->maybe_tree->object.oid.hash, hash_len); parent = (*list)->parents; -- cgit v1.2.3 From 2e27bd7731d47b374c1c14b53af07b3bfa39fcac Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 6 Apr 2018 19:09:38 +0000 Subject: treewide: replace maybe_tree with accessor methods In anticipation of making trees load lazily, create a Coccinelle script (contrib/coccinelle/commit.cocci) to ensure that all references to the 'maybe_tree' member of struct commit are either mutations or accesses through get_commit_tree() or get_commit_tree_oid(). Apply the Coccinelle script to create the rest of the patch. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- blame.c | 18 +++++++++--------- builtin/checkout.c | 18 ++++++++++++------ builtin/diff.c | 2 +- builtin/fast-export.c | 6 +++--- builtin/log.c | 4 ++-- builtin/reflog.c | 2 +- commit-graph.c | 2 +- contrib/coccinelle/commit.cocci | 30 ++++++++++++++++++++++++++++++ fsck.c | 8 +++++--- http-push.c | 2 +- line-log.c | 4 ++-- list-objects.c | 10 +++++----- log-tree.c | 6 +++--- merge-recursive.c | 4 ++-- notes-merge.c | 9 +++++---- packfile.c | 2 +- pretty.c | 5 +++-- ref-filter.c | 2 +- revision.c | 8 ++++---- sequencer.c | 12 ++++++------ sha1_name.c | 2 +- tree.c | 4 ++-- walker.c | 2 +- 23 files changed, 101 insertions(+), 61 deletions(-) create mode 100644 contrib/coccinelle/commit.cocci (limited to 'commit-graph.c') diff --git a/blame.c b/blame.c index b78e649cac..7f5700b324 100644 --- a/blame.c +++ b/blame.c @@ -553,10 +553,10 @@ static struct blame_origin *find_origin(struct commit *parent, diff_setup_done(&diff_opts); if (is_null_oid(&origin->commit->object.oid)) - do_diff_cache(&parent->maybe_tree->object.oid, &diff_opts); + do_diff_cache(get_commit_tree_oid(parent), &diff_opts); else - diff_tree_oid(&parent->maybe_tree->object.oid, - &origin->commit->maybe_tree->object.oid, + diff_tree_oid(get_commit_tree_oid(parent), + get_commit_tree_oid(origin->commit), "", &diff_opts); diffcore_std(&diff_opts); @@ -622,10 +622,10 @@ static struct blame_origin *find_rename(struct commit *parent, diff_setup_done(&diff_opts); if (is_null_oid(&origin->commit->object.oid)) - do_diff_cache(&parent->maybe_tree->object.oid, &diff_opts); + do_diff_cache(get_commit_tree_oid(parent), &diff_opts); else - diff_tree_oid(&parent->maybe_tree->object.oid, - &origin->commit->maybe_tree->object.oid, + diff_tree_oid(get_commit_tree_oid(parent), + get_commit_tree_oid(origin->commit), "", &diff_opts); diffcore_std(&diff_opts); @@ -1257,10 +1257,10 @@ static void find_copy_in_parent(struct blame_scoreboard *sb, diff_opts.flags.find_copies_harder = 1; if (is_null_oid(&target->commit->object.oid)) - do_diff_cache(&parent->maybe_tree->object.oid, &diff_opts); + do_diff_cache(get_commit_tree_oid(parent), &diff_opts); else - diff_tree_oid(&parent->maybe_tree->object.oid, - &target->commit->maybe_tree->object.oid, + diff_tree_oid(get_commit_tree_oid(parent), + get_commit_tree_oid(target->commit), "", &diff_opts); if (!diff_opts.flags.find_copies_harder) diff --git a/builtin/checkout.c b/builtin/checkout.c index ca6aa29794..4e803ae092 100644 --- a/builtin/checkout.c +++ b/builtin/checkout.c @@ -484,7 +484,8 @@ static int merge_working_tree(const struct checkout_opts *opts, resolve_undo_clear(); if (opts->force) { - ret = reset_tree(new_branch_info->commit->maybe_tree, opts, 1, writeout_error); + ret = reset_tree(get_commit_tree(new_branch_info->commit), + opts, 1, writeout_error); if (ret) return ret; } else { @@ -570,18 +571,23 @@ static int merge_working_tree(const struct checkout_opts *opts, o.verbosity = 0; work = write_tree_from_memory(&o); - ret = reset_tree(new_branch_info->commit->maybe_tree, opts, 1, + ret = reset_tree(get_commit_tree(new_branch_info->commit), + opts, 1, writeout_error); if (ret) return ret; o.ancestor = old_branch_info->name; o.branch1 = new_branch_info->name; o.branch2 = "local"; - ret = merge_trees(&o, new_branch_info->commit->maybe_tree, work, - old_branch_info->commit->maybe_tree, &result); + ret = merge_trees(&o, + get_commit_tree(new_branch_info->commit), + work, + get_commit_tree(old_branch_info->commit), + &result); if (ret < 0) exit(128); - ret = reset_tree(new_branch_info->commit->maybe_tree, opts, 0, + ret = reset_tree(get_commit_tree(new_branch_info->commit), + opts, 0, writeout_error); strbuf_release(&o.obuf); if (ret) @@ -1002,7 +1008,7 @@ static int parse_branchname_arg(int argc, const char **argv, *source_tree = parse_tree_indirect(rev); } else { parse_commit_or_die(new_branch_info->commit); - *source_tree = new_branch_info->commit->maybe_tree; + *source_tree = get_commit_tree(new_branch_info->commit); } if (!*source_tree) /* case (1): want a tree */ diff --git a/builtin/diff.c b/builtin/diff.c index 34f18a5f3f..bfefff3a84 100644 --- a/builtin/diff.c +++ b/builtin/diff.c @@ -398,7 +398,7 @@ int cmd_diff(int argc, const char **argv, const char *prefix) if (!obj) die(_("invalid object '%s' given."), name); if (obj->type == OBJ_COMMIT) - obj = &((struct commit *)obj)->maybe_tree->object; + obj = &get_commit_tree(((struct commit *)obj))->object; if (obj->type == OBJ_TREE) { obj->flags |= flags; diff --git a/builtin/fast-export.c b/builtin/fast-export.c index 91e526b30d..c1304234cd 100644 --- a/builtin/fast-export.c +++ b/builtin/fast-export.c @@ -578,11 +578,11 @@ static void handle_commit(struct commit *commit, struct rev_info *rev, get_object_mark(&commit->parents->item->object) != 0 && !full_tree) { parse_commit_or_die(commit->parents->item); - diff_tree_oid(&commit->parents->item->maybe_tree->object.oid, - &commit->maybe_tree->object.oid, "", &rev->diffopt); + diff_tree_oid(get_commit_tree_oid(commit->parents->item), + get_commit_tree_oid(commit), "", &rev->diffopt); } else - diff_root_tree_oid(&commit->maybe_tree->object.oid, + diff_root_tree_oid(get_commit_tree_oid(commit), "", &rev->diffopt); /* Export the referenced blobs, and remember the marks. */ diff --git a/builtin/log.c b/builtin/log.c index a1fbc608bc..f4d30d2f3d 100644 --- a/builtin/log.c +++ b/builtin/log.c @@ -1064,8 +1064,8 @@ static void make_cover_letter(struct rev_info *rev, int use_stdout, diff_setup_done(&opts); - diff_tree_oid(&origin->maybe_tree->object.oid, - &head->maybe_tree->object.oid, + diff_tree_oid(get_commit_tree_oid(origin), + get_commit_tree_oid(head), "", &opts); diffcore_std(&opts); diff_flush(&opts); diff --git a/builtin/reflog.c b/builtin/reflog.c index 29d80a35a1..1ce4276d31 100644 --- a/builtin/reflog.c +++ b/builtin/reflog.c @@ -153,7 +153,7 @@ static int commit_is_complete(struct commit *commit) for (i = 0; i < found.nr; i++) { struct commit *c = (struct commit *)found.objects[i].item; - if (!tree_is_complete(&c->maybe_tree->object.oid)) { + if (!tree_is_complete(get_commit_tree_oid(c))) { is_incomplete = 1; c->object.flags |= INCOMPLETE; } diff --git a/commit-graph.c b/commit-graph.c index 49c622c556..264aa6d919 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -369,7 +369,7 @@ static void write_graph_chunk_data(struct hashfile *f, int hash_len, uint32_t packedDate[2]; parse_commit(*list); - hashwrite(f, (*list)->maybe_tree->object.oid.hash, hash_len); + hashwrite(f, get_commit_tree_oid(*list)->hash, hash_len); parent = (*list)->parents; diff --git a/contrib/coccinelle/commit.cocci b/contrib/coccinelle/commit.cocci new file mode 100644 index 0000000000..ac38525941 --- /dev/null +++ b/contrib/coccinelle/commit.cocci @@ -0,0 +1,30 @@ +@@ +expression c; +@@ +- &c->maybe_tree->object.oid ++ get_commit_tree_oid(c) + +@@ +expression c; +@@ +- c->maybe_tree->object.oid.hash ++ get_commit_tree_oid(c)->hash + +@@ +expression c; +@@ +- c->maybe_tree ++ get_commit_tree(c) + +@@ +expression c; +expression s; +@@ +- get_commit_tree(c) = s ++ c->maybe_tree = s + +@@ +expression c; +@@ +- return get_commit_tree(c); ++ return c->maybe_tree; diff --git a/fsck.c b/fsck.c index 3228ca5bee..695fd71b13 100644 --- a/fsck.c +++ b/fsck.c @@ -396,9 +396,11 @@ static int fsck_walk_commit(struct commit *commit, void *data, struct fsck_optio name = get_object_name(options, &commit->object); if (name) - put_object_name(options, &commit->maybe_tree->object, "%s:", name); + put_object_name(options, &get_commit_tree(commit)->object, + "%s:", name); - result = options->walk((struct object *)commit->maybe_tree, OBJ_TREE, data, options); + result = options->walk((struct object *)get_commit_tree(commit), + OBJ_TREE, data, options); if (result < 0) return result; res = result; @@ -772,7 +774,7 @@ static int fsck_commit_buffer(struct commit *commit, const char *buffer, err = fsck_ident(&buffer, &commit->object, options); if (err) return err; - if (!commit->maybe_tree) { + if (!get_commit_tree(commit)) { err = report(options, &commit->object, FSCK_MSG_BAD_TREE, "could not load commit's tree %s", sha1_to_hex(tree_sha1)); if (err) return err; diff --git a/http-push.c b/http-push.c index d83479f32f..53a217291e 100644 --- a/http-push.c +++ b/http-push.c @@ -1330,7 +1330,7 @@ static int get_delta(struct rev_info *revs, struct remote_lock *lock) int count = 0; while ((commit = get_revision(revs)) != NULL) { - p = process_tree(commit->maybe_tree, p); + p = process_tree(get_commit_tree(commit), p); commit->object.flags |= LOCAL; if (!(commit->object.flags & UNINTERESTING)) count += add_send_request(&commit->object, lock); diff --git a/line-log.c b/line-log.c index e714969ca2..437d44c00a 100644 --- a/line-log.c +++ b/line-log.c @@ -817,8 +817,8 @@ static void queue_diffs(struct line_log_data *range, assert(commit); DIFF_QUEUE_CLEAR(&diff_queued_diff); - diff_tree_oid(parent ? &parent->maybe_tree->object.oid : NULL, - &commit->maybe_tree->object.oid, "", opt); + diff_tree_oid(parent ? get_commit_tree_oid(parent) : NULL, + get_commit_tree_oid(commit), "", opt); if (opt->detect_rename) { filter_diffs_for_paths(range, 1); if (diff_might_be_rename()) diff --git a/list-objects.c b/list-objects.c index bfd09f545c..3eec510357 100644 --- a/list-objects.c +++ b/list-objects.c @@ -195,7 +195,7 @@ static void mark_edge_parents_uninteresting(struct commit *commit, struct commit *parent = parents->item; if (!(parent->object.flags & UNINTERESTING)) continue; - mark_tree_uninteresting(parent->maybe_tree); + mark_tree_uninteresting(get_commit_tree(parent)); if (revs->edge_hint && !(parent->object.flags & SHOWN)) { parent->object.flags |= SHOWN; show_edge(parent); @@ -212,7 +212,7 @@ void mark_edges_uninteresting(struct rev_info *revs, show_edge_fn show_edge) struct commit *commit = list->item; if (commit->object.flags & UNINTERESTING) { - mark_tree_uninteresting(commit->maybe_tree); + mark_tree_uninteresting(get_commit_tree(commit)); if (revs->edge_hint_aggressive && !(commit->object.flags & SHOWN)) { commit->object.flags |= SHOWN; show_edge(commit); @@ -227,7 +227,7 @@ void mark_edges_uninteresting(struct rev_info *revs, show_edge_fn show_edge) struct commit *commit = (struct commit *)obj; if (obj->type != OBJ_COMMIT || !(obj->flags & UNINTERESTING)) continue; - mark_tree_uninteresting(commit->maybe_tree); + mark_tree_uninteresting(get_commit_tree(commit)); if (!(obj->flags & SHOWN)) { obj->flags |= SHOWN; show_edge(commit); @@ -300,8 +300,8 @@ static void do_traverse(struct rev_info *revs, * an uninteresting boundary commit may not have its tree * parsed yet, but we are not going to show them anyway */ - if (commit->maybe_tree) - add_pending_tree(revs, commit->maybe_tree); + if (get_commit_tree(commit)) + add_pending_tree(revs, get_commit_tree(commit)); show_commit(commit, show_data); if (revs->tree_blobs_in_commit_order) diff --git a/log-tree.c b/log-tree.c index 99499af57c..c106bd70df 100644 --- a/log-tree.c +++ b/log-tree.c @@ -806,7 +806,7 @@ static int log_tree_diff(struct rev_info *opt, struct commit *commit, struct log return 0; parse_commit_or_die(commit); - oid = &commit->maybe_tree->object.oid; + oid = get_commit_tree_oid(commit); /* Root commit? */ parents = get_saved_parents(opt, commit); @@ -831,7 +831,7 @@ static int log_tree_diff(struct rev_info *opt, struct commit *commit, struct log * we merged _in_. */ parse_commit_or_die(parents->item); - diff_tree_oid(&parents->item->maybe_tree->object.oid, + diff_tree_oid(get_commit_tree_oid(parents->item), oid, "", &opt->diffopt); log_tree_diff_flush(opt); return !opt->loginfo; @@ -846,7 +846,7 @@ static int log_tree_diff(struct rev_info *opt, struct commit *commit, struct log struct commit *parent = parents->item; parse_commit_or_die(parent); - diff_tree_oid(&parent->maybe_tree->object.oid, + diff_tree_oid(get_commit_tree_oid(parent), oid, "", &opt->diffopt); log_tree_diff_flush(opt); diff --git a/merge-recursive.c b/merge-recursive.c index 0f75d75d17..8de6e36527 100644 --- a/merge-recursive.c +++ b/merge-recursive.c @@ -2154,8 +2154,8 @@ int merge_recursive(struct merge_options *o, read_cache(); o->ancestor = "merged common ancestors"; - clean = merge_trees(o, h1->maybe_tree, h2->maybe_tree, - merged_common_ancestors->maybe_tree, + clean = merge_trees(o, get_commit_tree(h1), get_commit_tree(h2), + get_commit_tree(merged_common_ancestors), &mrtree); if (clean < 0) { flush_output(o); diff --git a/notes-merge.c b/notes-merge.c index 1d3edc8942..4a73a2169b 100644 --- a/notes-merge.c +++ b/notes-merge.c @@ -600,14 +600,14 @@ int notes_merge(struct notes_merge_options *o, printf("No merge base found; doing history-less merge\n"); } else if (!bases->next) { base_oid = &bases->item->object.oid; - base_tree_oid = &bases->item->maybe_tree->object.oid; + base_tree_oid = get_commit_tree_oid(bases->item); if (o->verbosity >= 4) printf("One merge base found (%.7s)\n", oid_to_hex(base_oid)); } else { /* TODO: How to handle multiple merge-bases? */ base_oid = &bases->item->object.oid; - base_tree_oid = &bases->item->maybe_tree->object.oid; + base_tree_oid = get_commit_tree_oid(bases->item); if (o->verbosity >= 3) printf("Multiple merge bases found. Using the first " "(%.7s)\n", oid_to_hex(base_oid)); @@ -634,8 +634,9 @@ int notes_merge(struct notes_merge_options *o, goto found_result; } - result = merge_from_diffs(o, base_tree_oid, &local->maybe_tree->object.oid, - &remote->maybe_tree->object.oid, local_tree); + result = merge_from_diffs(o, base_tree_oid, + get_commit_tree_oid(local), + get_commit_tree_oid(remote), local_tree); if (result != 0) { /* non-trivial merge (with or without conflicts) */ /* Commit (partial) result */ diff --git a/packfile.c b/packfile.c index 3eb9c4a36e..88ba819151 100644 --- a/packfile.c +++ b/packfile.c @@ -1925,7 +1925,7 @@ static int add_promisor_object(const struct object_id *oid, struct commit *commit = (struct commit *) obj; struct commit_list *parents = commit->parents; - oidset_insert(set, &commit->maybe_tree->object.oid); + oidset_insert(set, get_commit_tree_oid(commit)); for (; parents; parents = parents->next) oidset_insert(set, &parents->item->object.oid); } else if (obj->type == OBJ_TAG) { diff --git a/pretty.c b/pretty.c index 42095ea495..15997f5e01 100644 --- a/pretty.c +++ b/pretty.c @@ -1161,10 +1161,11 @@ static size_t format_commit_one(struct strbuf *sb, /* in UTF-8 */ strbuf_addstr(sb, diff_get_color(c->auto_color, DIFF_RESET)); return 1; case 'T': /* tree hash */ - strbuf_addstr(sb, oid_to_hex(&commit->maybe_tree->object.oid)); + strbuf_addstr(sb, oid_to_hex(get_commit_tree_oid(commit))); return 1; case 't': /* abbreviated tree hash */ - strbuf_add_unique_abbrev(sb, commit->maybe_tree->object.oid.hash, + strbuf_add_unique_abbrev(sb, + get_commit_tree_oid(commit)->hash, c->pretty_ctx->abbrev); return 1; case 'P': /* parent hashes */ diff --git a/ref-filter.c b/ref-filter.c index a59654cfd3..cffd8bf3ce 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -815,7 +815,7 @@ static void grab_commit_values(struct atom_value *val, int deref, struct object if (deref) name++; if (!strcmp(name, "tree")) { - v->s = xstrdup(oid_to_hex(&commit->maybe_tree->object.oid)); + v->s = xstrdup(oid_to_hex(get_commit_tree_oid(commit))); } else if (!strcmp(name, "numparent")) { v->value = commit_list_count(commit->parents); diff --git a/revision.c b/revision.c index f61d1c9307..d64d2a859e 100644 --- a/revision.c +++ b/revision.c @@ -439,8 +439,8 @@ static void file_change(struct diff_options *options, static int rev_compare_tree(struct rev_info *revs, struct commit *parent, struct commit *commit) { - struct tree *t1 = parent->maybe_tree; - struct tree *t2 = commit->maybe_tree; + struct tree *t1 = get_commit_tree(parent); + struct tree *t2 = get_commit_tree(commit); if (!t1) return REV_TREE_NEW; @@ -476,7 +476,7 @@ static int rev_compare_tree(struct rev_info *revs, static int rev_same_tree_as_empty(struct rev_info *revs, struct commit *commit) { int retval; - struct tree *t1 = commit->maybe_tree; + struct tree *t1 = get_commit_tree(commit); if (!t1) return 0; @@ -614,7 +614,7 @@ static void try_to_simplify_commit(struct rev_info *revs, struct commit *commit) if (!revs->prune) return; - if (!commit->maybe_tree) + if (!get_commit_tree(commit)) return; if (!commit->parents) { diff --git a/sequencer.c b/sequencer.c index 9269607a48..abebf6988f 100644 --- a/sequencer.c +++ b/sequencer.c @@ -501,8 +501,8 @@ static int do_recursive_merge(struct commit *base, struct commit *next, o.show_rename_progress = 1; head_tree = parse_tree_indirect(head); - next_tree = next ? next->maybe_tree : empty_tree(); - base_tree = base ? base->maybe_tree : empty_tree(); + next_tree = next ? get_commit_tree(next) : empty_tree(); + base_tree = base ? get_commit_tree(base) : empty_tree(); for (xopt = opts->xopts; xopt != opts->xopts + opts->xopts_nr; xopt++) parse_merge_opt(&o, *xopt); @@ -562,7 +562,7 @@ static int is_index_unchanged(void) return error(_("unable to update cache tree")); return !oidcmp(&active_cache_tree->oid, - &head_commit->maybe_tree->object.oid); + get_commit_tree_oid(head_commit)); } static int write_author_script(const char *message) @@ -1119,7 +1119,7 @@ static int try_to_commit(struct strbuf *msg, const char *author, } if (!(flags & ALLOW_EMPTY) && !oidcmp(current_head ? - ¤t_head->maybe_tree->object.oid : + get_commit_tree_oid(current_head) : &empty_tree_oid, &tree)) { res = 1; /* run 'git commit' to display error message */ goto out; @@ -1217,12 +1217,12 @@ static int is_original_commit_empty(struct commit *commit) if (parse_commit(parent)) return error(_("could not parse parent commit %s"), oid_to_hex(&parent->object.oid)); - ptree_oid = &parent->maybe_tree->object.oid; + ptree_oid = get_commit_tree_oid(parent); } else { ptree_oid = the_hash_algo->empty_tree; /* commit is root */ } - return !oidcmp(ptree_oid, &commit->maybe_tree->object.oid); + return !oidcmp(ptree_oid, get_commit_tree_oid(commit)); } /* diff --git a/sha1_name.c b/sha1_name.c index 6ca2a2de37..c9dead6b80 100644 --- a/sha1_name.c +++ b/sha1_name.c @@ -896,7 +896,7 @@ struct object *peel_to_type(const char *name, int namelen, if (o->type == OBJ_TAG) o = ((struct tag*) o)->tagged; else if (o->type == OBJ_COMMIT) - o = &(((struct commit *) o)->maybe_tree->object); + o = &(get_commit_tree(((struct commit *)o))->object); else { if (name) error("%.*s: expected %s type, but the object " diff --git a/tree.c b/tree.c index dbc5e0be54..dec53f3eca 100644 --- a/tree.c +++ b/tree.c @@ -109,7 +109,7 @@ static int read_tree_1(struct tree *tree, struct strbuf *base, oid_to_hex(entry.oid), base->buf, entry.path); - oidcpy(&oid, &commit->maybe_tree->object.oid); + oidcpy(&oid, get_commit_tree_oid(commit)); } else continue; @@ -248,7 +248,7 @@ struct tree *parse_tree_indirect(const struct object_id *oid) if (obj->type == OBJ_TREE) return (struct tree *) obj; else if (obj->type == OBJ_COMMIT) - obj = &(((struct commit *) obj)->maybe_tree->object); + obj = &(get_commit_tree(((struct commit *)obj))->object); else if (obj->type == OBJ_TAG) obj = ((struct tag *) obj)->tagged; else diff --git a/walker.c b/walker.c index 1d5f3059a2..f51b855872 100644 --- a/walker.c +++ b/walker.c @@ -87,7 +87,7 @@ static int process_commit(struct walker *walker, struct commit *commit) walker_say(walker, "walk %s\n", oid_to_hex(&commit->object.oid)); if (walker->get_tree) { - if (process(walker, &commit->maybe_tree->object)) + if (process(walker, &get_commit_tree(commit)->object)) return -1; if (!walker->get_all) walker->get_tree = 0; -- cgit v1.2.3 From 7b8a21dba1bce44d64bd86427d3d92437adc4707 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 6 Apr 2018 19:09:46 +0000 Subject: commit-graph: lazy-load trees for commits The commit-graph file provides quick access to commit data, including the OID of the root tree for each commit in the graph. When performing a deep commit-graph walk, we may not need to load most of the trees for these commits. Delay loading the tree object for a commit loaded from the graph until requested via get_commit_tree(). Do not lazy-load trees for commits not in the graph, since that requires duplicate parsing and the relative peformance improvement when trees are not needed is small. On the Linux repository, performance tests were run for the following command: git log --graph --oneline -1000 Before: 0.92s After: 0.66s Rel %: -28.3% Adding '-- kernel/' to the command requires loading the root tree for every commit that is walked. There was no measureable performance change as a result of this patch. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-graph.c | 26 +++++++++++++++++++++++--- commit-graph.h | 2 ++ commit.c | 8 +++++++- commit.h | 6 ++++++ 4 files changed, 38 insertions(+), 4 deletions(-) (limited to 'commit-graph.c') diff --git a/commit-graph.c b/commit-graph.c index 264aa6d919..70fa1b25fd 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -247,7 +247,6 @@ static struct commit_list **insert_parent_or_die(struct commit_graph *g, static int fill_commit_in_graph(struct commit *item, struct commit_graph *g, uint32_t pos) { - struct object_id oid; uint32_t edge_value; uint32_t *parent_data_ptr; uint64_t date_low, date_high; @@ -257,8 +256,7 @@ static int fill_commit_in_graph(struct commit *item, struct commit_graph *g, uin item->object.parsed = 1; item->graph_pos = pos; - hashcpy(oid.hash, commit_data); - item->maybe_tree = lookup_tree(&oid); + item->maybe_tree = NULL; date_high = get_be32(commit_data + g->hash_len + 8) & 0x3; date_low = get_be32(commit_data + g->hash_len + 12); @@ -317,6 +315,28 @@ int parse_commit_in_graph(struct commit *item) return 0; } +static struct tree *load_tree_for_commit(struct commit_graph *g, struct commit *c) +{ + struct object_id oid; + const unsigned char *commit_data = g->chunk_commit_data + + GRAPH_DATA_WIDTH * (c->graph_pos); + + hashcpy(oid.hash, commit_data); + c->maybe_tree = lookup_tree(&oid); + + return c->maybe_tree; +} + +struct tree *get_commit_tree_in_graph(const struct commit *c) +{ + if (c->maybe_tree) + return c->maybe_tree; + if (c->graph_pos == COMMIT_NOT_FROM_GRAPH) + BUG("get_commit_tree_in_graph called from non-commit-graph commit"); + + return load_tree_for_commit(commit_graph, (struct commit *)c); +} + static void write_graph_chunk_fanout(struct hashfile *f, struct commit **commits, int nr_commits) diff --git a/commit-graph.h b/commit-graph.h index e1d8580c98..260a468e73 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -17,6 +17,8 @@ char *get_commit_graph_filename(const char *obj_dir); */ int parse_commit_in_graph(struct commit *item); +struct tree *get_commit_tree_in_graph(const struct commit *c); + struct commit_graph { int graph_fd; diff --git a/commit.c b/commit.c index aea2ca1f8b..711f674c18 100644 --- a/commit.c +++ b/commit.c @@ -298,7 +298,13 @@ void free_commit_buffer(struct commit *commit) struct tree *get_commit_tree(const struct commit *commit) { - return commit->maybe_tree; + if (commit->maybe_tree || !commit->object.parsed) + return commit->maybe_tree; + + if (commit->graph_pos == COMMIT_NOT_FROM_GRAPH) + BUG("commit has NULL tree, but was not loaded from commit-graph"); + + return get_commit_tree_in_graph(commit); } struct object_id *get_commit_tree_oid(const struct commit *commit) diff --git a/commit.h b/commit.h index dc4bf97d9f..23a3f364ed 100644 --- a/commit.h +++ b/commit.h @@ -22,6 +22,12 @@ struct commit { unsigned int index; timestamp_t date; struct commit_list *parents; + + /* + * If the commit is loaded from the commit-graph file, then this + * member may be NULL. Only access it through get_commit_tree() + * or get_commit_tree_oid(). + */ struct tree *maybe_tree; uint32_t graph_pos; }; -- cgit v1.2.3