From f905a855b1d1e172ba1e51e03fc5ec531445575e Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 30 Oct 2025 11:38:39 +0100 Subject: packfile: move the MRU list into the packfile store Packfiles have two lists associated to them: - A list that keeps track of packfiles in the order that they were added to a packfile store. - A list that keeps track of packfiles in most-recently-used order so that packfiles that are more likely to contain a specific object are ordered towards the front. Both of these lists are hosted by `struct packed_git` itself, So to identify all packfiles in a repository you simply need to grab the first packfile and then iterate the `->next` pointers or the MRU list. This pattern has the problem that all packfiles are part of the same list, regardless of whether or not they belong to the same object source. With the upcoming pluggable object database effort this needs to change: packfiles should be contained by a single object source, and reading an object from any such packfile should use that source to look up the object. Consequently, we need to break up the global lists of packfiles into per-object-source lists. A first step towards this goal is to move those lists out of `struct packed_git` and into the packfile store. While the packfile store is currently sitting on the `struct object_database` level, the intent is to push it down one level into the `struct odb_source` in a subsequent patch series. Introduce a new `struct packfile_list` that is used to manage lists of packfiles and use it to store the list of most-recently-used packfiles in `struct packfile_store`. For now, the new list type is only used in a single spot, but we'll expand its usage in subsequent patches. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'builtin/pack-objects.c') diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index b5454e5df1..5348aebbe9 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -1706,8 +1706,8 @@ static int want_object_in_pack_mtime(const struct object_id *oid, uint32_t found_mtime) { int want; + struct packfile_list_entry *e; struct odb_source *source; - struct list_head *pos; if (!exclude && local) { /* @@ -1748,12 +1748,11 @@ static int want_object_in_pack_mtime(const struct object_id *oid, } } - list_for_each(pos, packfile_store_get_packs_mru(the_repository->objects->packfiles)) { - struct packed_git *p = list_entry(pos, struct packed_git, mru); + for (e = the_repository->objects->packfiles->mru.head; e; e = e->next) { + struct packed_git *p = e->pack; want = want_object_in_pack_one(p, oid, exclude, found_pack, found_offset, found_mtime); if (!exclude && want > 0) - list_move(&p->mru, - packfile_store_get_packs_mru(the_repository->objects->packfiles)); + packfile_list_prepend(&the_repository->objects->packfiles->mru, p); if (want != -1) return want; } -- cgit v1.2.3 From 0d0e4b5954e97fcdfd0a4120e17e2c570497981a Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 30 Oct 2025 11:38:42 +0100 Subject: builtin/pack-objects: simplify logic to find kept or nonlocal objects The function `has_sha1_pack_kept_or_nonlocal()` takes an object ID and then searches through packed objects to figure out whether the object exists in a kept or non-local pack. As a performance optimization we remember the packfile that contains a given object ID so that the next call to the function first checks that same packfile again. The way this is written is rather hard to follow though, as the caching mechanism is intertwined with the loop that iterates through the packs. Consequently, we need to do some gymnastics to re-start the iteration if the cached pack does not contain the objects. Refactor this so that we check the cached packfile at the beginning. We don't have to re-verify whether the packfile meets the properties as we have already verified those when storing the pack in `last_found` in the first place. So all we need to do is to use `find_pack_entry_one()` to check whether the pack contains the object ID, and to skip the cached pack in the loop so that we don't search it twice. Furthermore, stop using the `(void *)1` sentinel value and instead use a simple `NULL` pointer to indicate that we don't have a last-found pack yet. This refactoring significantly simplifies the logic and makes it much easier to follow. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'builtin/pack-objects.c') diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 5348aebbe9..b83eb8ead1 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -4388,27 +4388,27 @@ static void add_unreachable_loose_objects(struct rev_info *revs) static int has_sha1_pack_kept_or_nonlocal(const struct object_id *oid) { - struct packfile_store *packs = the_repository->objects->packfiles; - static struct packed_git *last_found = (void *)1; + static struct packed_git *last_found = NULL; struct packed_git *p; - p = (last_found != (void *)1) ? last_found : - packfile_store_get_packs(packs); + if (last_found && find_pack_entry_one(oid, last_found)) + return 1; - while (p) { - if ((!p->pack_local || p->pack_keep || - p->pack_keep_in_core) && - find_pack_entry_one(oid, p)) { + repo_for_each_pack(the_repository, p) { + /* + * We have already checked `last_found`, so there is no need to + * re-check here. + */ + if (p == last_found) + continue; + + if ((!p->pack_local || p->pack_keep || p->pack_keep_in_core) && + find_pack_entry_one(oid, p)) { last_found = p; return 1; } - if (p == last_found) - p = packfile_store_get_packs(packs); - else - p = p->next; - if (p == last_found) - p = p->next; } + return 0; } -- cgit v1.2.3 From c31bad4f7dcf3e04ae22e7d4a1059fd628acf1a2 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 30 Oct 2025 11:38:45 +0100 Subject: packfile: track packs via the MRU list exclusively We track packfiles via two different lists: - `struct packfile_store::packs` is a list that sorts local packs first. In addition, these packs are sorted so that younger packs are sorted towards the front. - `struct packfile_store::mru` is a list that sorts packs so that most-recently used packs are at the front. The reasoning behind the ordering in the `packs` list is that younger objects stored in the local object store tend to be accessed more frequently, and that is certainly true for some cases. But there are going to be lots of cases where that isn't true. Especially when traversing history it is likely that one needs to access many older objects, and due to our housekeeping it is very likely that almost all of those older objects will be contained in one large pack that is oldest. So whether or not the ordering makes sense really depends on the use case at hand. A flexible approach like our MRU list addresses that need, as it will sort packs towards the front that are accessed all the time. Intuitively, this approach is thus able to satisfy more use cases more efficiently. This reasoning casts some doubt on whether or not it really makes sense to track packs via two different lists. It causes confusion, and it is not clear whether there are use cases where the `packs` list really is such an obvious choice. Merge these two lists into one most-recently-used list. Note that there is one important edge case: `for_each_packed_object()` uses the MRU list to iterate through packs, and then it lists each object in those packs. This would have the effect that we now sort the current pack towards the front, thus modifying the list of packfiles we are iterating over, with the consequence that we'll see an infinite loop. This edge case is worked around by introducing a new field that allows us to skip updating the MRU. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 4 ++-- packfile.c | 27 +++++++-------------------- packfile.h | 27 +++++++++++++++++---------- 3 files changed, 26 insertions(+), 32 deletions(-) (limited to 'builtin/pack-objects.c') diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index b83eb8ead1..0e4e9f8068 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -1748,11 +1748,11 @@ static int want_object_in_pack_mtime(const struct object_id *oid, } } - for (e = the_repository->objects->packfiles->mru.head; e; e = e->next) { + for (e = the_repository->objects->packfiles->packs.head; e; e = e->next) { struct packed_git *p = e->pack; want = want_object_in_pack_one(p, oid, exclude, found_pack, found_offset, found_mtime); if (!exclude && want > 0) - packfile_list_prepend(&the_repository->objects->packfiles->mru, p); + packfile_list_prepend(&the_repository->objects->packfiles->packs, p); if (want != -1) return want; } diff --git a/packfile.c b/packfile.c index 60f2e42876..378b0b1920 100644 --- a/packfile.c +++ b/packfile.c @@ -870,9 +870,7 @@ void packfile_store_add_pack(struct packfile_store *store, if (pack->pack_fd != -1) pack_open_fds++; - packfile_list_prepend(&store->packs, pack); - packfile_list_append(&store->mru, pack); - + packfile_list_append(&store->packs, pack); strmap_put(&store->packs_by_path, pack->pack_name, pack); } @@ -1077,14 +1075,6 @@ static int sort_pack(const struct packfile_list_entry *a, return -1; } -static void packfile_store_prepare_mru(struct packfile_store *store) -{ - packfile_list_clear(&store->mru); - - for (struct packfile_list_entry *e = store->packs.head; e; e = e->next) - packfile_list_append(&store->mru, e->pack); -} - void packfile_store_prepare(struct packfile_store *store) { struct odb_source *source; @@ -1103,7 +1093,6 @@ void packfile_store_prepare(struct packfile_store *store) if (!e->next) store->packs.tail = e; - packfile_store_prepare_mru(store); store->initialized = true; } @@ -1128,12 +1117,6 @@ struct packfile_list_entry *packfile_store_get_packs(struct packfile_store *stor return store->packs.head; } -struct packfile_list_entry *packfile_store_get_packs_mru(struct packfile_store *store) -{ - packfile_store_prepare(store); - return store->mru.head; -} - /* * Give a fast, rough count of the number of objects in the repository. This * ignores loose objects completely. If you have a lot of them, then either @@ -2134,11 +2117,12 @@ int find_pack_entry(struct repository *r, const struct object_id *oid, struct pa if (!r->objects->packfiles->packs.head) return 0; - for (l = r->objects->packfiles->mru.head; l; l = l->next) { + for (l = r->objects->packfiles->packs.head; l; l = l->next) { struct packed_git *p = l->pack; if (!p->multi_pack_index && fill_pack_entry(oid, e, p)) { - packfile_list_prepend(&r->objects->packfiles->mru, p); + if (!r->objects->packfiles->skip_mru_updates) + packfile_list_prepend(&r->objects->packfiles->packs, p); return 1; } } @@ -2270,6 +2254,7 @@ int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, int r = 0; int pack_errors = 0; + repo->objects->packfiles->skip_mru_updates = true; repo_for_each_pack(repo, p) { if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local) continue; @@ -2290,6 +2275,8 @@ int for_each_packed_object(struct repository *repo, each_packed_object_fn cb, if (r) break; } + repo->objects->packfiles->skip_mru_updates = false; + return r ? r : pack_errors; } diff --git a/packfile.h b/packfile.h index d95275e666..27ba607e7c 100644 --- a/packfile.h +++ b/packfile.h @@ -79,8 +79,8 @@ struct packfile_store { struct object_database *odb; /* - * The list of packfiles in the order in which they are being added to - * the store. + * The list of packfiles in the order in which they have been most + * recently used. */ struct packfile_list packs; @@ -98,9 +98,6 @@ struct packfile_store { unsigned flags; } kept_cache; - /* A most-recently-used ordered version of the packs list. */ - struct packfile_list mru; - /* * A map of packfile names to packed_git structs for tracking which * packs have been loaded already. @@ -112,6 +109,21 @@ struct packfile_store { * packs. */ bool initialized; + + /* + * Usually, packfiles will be reordered to the front of the `packs` + * list whenever an object is looked up via them. This has the effect + * that packs that contain a lot of accessed objects will be located + * towards the front. + * + * This is usually desireable, but there are exceptions. One exception + * is when the looking up multiple objects in a loop for each packfile. + * In that case, we may easily end up with an infinite loop as the + * packfiles get reordered to the front repeatedly. + * + * Setting this field to `true` thus disables these reorderings. + */ + bool skip_mru_updates; }; /* @@ -171,11 +183,6 @@ void packfile_store_add_pack(struct packfile_store *store, */ struct packfile_list_entry *packfile_store_get_packs(struct packfile_store *store); -/* - * Get all packs in most-recently-used order. - */ -struct packfile_list_entry *packfile_store_get_packs_mru(struct packfile_store *store); - /* * Open the packfile and add it to the store if it isn't yet known. Returns * either the newly opened packfile or the preexisting packfile. Returns a -- cgit v1.2.3