aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/git-cat-file.txt10
-rw-r--r--builtin/cat-file.c109
-rw-r--r--builtin/prune-packed.c1
-rw-r--r--cache.h56
-rw-r--r--commit-graph.c2
-rw-r--r--object-store.h90
-rw-r--r--packfile.c24
-rw-r--r--packfile.h17
-rw-r--r--sha1-file.c3
-rwxr-xr-xt/t1006-cat-file.sh17
10 files changed, 218 insertions, 111 deletions
diff --git a/Documentation/git-cat-file.txt b/Documentation/git-cat-file.txt
index f90f09b03f..74013335a1 100644
--- a/Documentation/git-cat-file.txt
+++ b/Documentation/git-cat-file.txt
@@ -104,6 +104,16 @@ OPTIONS
buffering; this is much more efficient when invoking
`--batch-check` on a large number of objects.
+--unordered::
+ When `--batch-all-objects` is in use, visit objects in an
+ order which may be more efficient for accessing the object
+ contents than hash order. The exact details of the order are
+ unspecified, but if you do not require a specific order, this
+ should generally result in faster output, especially with
+ `--batch`. Note that `cat-file` will still show each object
+ only once, even if it is stored multiple times in the
+ repository.
+
--allow-unknown-type::
Allow -s or -t to query broken/corrupt objects of unknown type.
diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index 4a44b2404f..08dced2614 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -21,6 +21,7 @@ struct batch_options {
int print_contents;
int buffer_output;
int all_objects;
+ int unordered;
int cmdmode; /* may be 'w' or 'c' for --filters or --textconv */
const char *format;
};
@@ -337,11 +338,11 @@ static void print_object_or_die(struct batch_options *opt, struct expand_data *d
}
}
-static void batch_object_write(const char *obj_name, struct batch_options *opt,
+static void batch_object_write(const char *obj_name,
+ struct strbuf *scratch,
+ struct batch_options *opt,
struct expand_data *data)
{
- struct strbuf buf = STRBUF_INIT;
-
if (!data->skip_object_info &&
oid_object_info_extended(the_repository, &data->oid, &data->info,
OBJECT_INFO_LOOKUP_REPLACE) < 0) {
@@ -351,10 +352,10 @@ static void batch_object_write(const char *obj_name, struct batch_options *opt,
return;
}
- strbuf_expand(&buf, opt->format, expand_format, data);
- strbuf_addch(&buf, '\n');
- batch_write(opt, buf.buf, buf.len);
- strbuf_release(&buf);
+ strbuf_reset(scratch);
+ strbuf_expand(scratch, opt->format, expand_format, data);
+ strbuf_addch(scratch, '\n');
+ batch_write(opt, scratch->buf, scratch->len);
if (opt->print_contents) {
print_object_or_die(opt, data);
@@ -362,7 +363,9 @@ static void batch_object_write(const char *obj_name, struct batch_options *opt,
}
}
-static void batch_one_object(const char *obj_name, struct batch_options *opt,
+static void batch_one_object(const char *obj_name,
+ struct strbuf *scratch,
+ struct batch_options *opt,
struct expand_data *data)
{
struct object_context ctx;
@@ -404,42 +407,70 @@ static void batch_one_object(const char *obj_name, struct batch_options *opt,
return;
}
- batch_object_write(obj_name, opt, data);
+ batch_object_write(obj_name, scratch, opt, data);
}
struct object_cb_data {
struct batch_options *opt;
struct expand_data *expand;
+ struct oidset *seen;
+ struct strbuf *scratch;
};
static int batch_object_cb(const struct object_id *oid, void *vdata)
{
struct object_cb_data *data = vdata;
oidcpy(&data->expand->oid, oid);
- batch_object_write(NULL, data->opt, data->expand);
+ batch_object_write(NULL, data->scratch, data->opt, data->expand);
return 0;
}
-static int batch_loose_object(const struct object_id *oid,
- const char *path,
- void *data)
+static int collect_loose_object(const struct object_id *oid,
+ const char *path,
+ void *data)
{
oid_array_append(data, oid);
return 0;
}
-static int batch_packed_object(const struct object_id *oid,
- struct packed_git *pack,
- uint32_t pos,
- void *data)
+static int collect_packed_object(const struct object_id *oid,
+ struct packed_git *pack,
+ uint32_t pos,
+ void *data)
{
oid_array_append(data, oid);
return 0;
}
+static int batch_unordered_object(const struct object_id *oid, void *vdata)
+{
+ struct object_cb_data *data = vdata;
+
+ if (oidset_insert(data->seen, oid))
+ return 0;
+
+ return batch_object_cb(oid, data);
+}
+
+static int batch_unordered_loose(const struct object_id *oid,
+ const char *path,
+ void *data)
+{
+ return batch_unordered_object(oid, data);
+}
+
+static int batch_unordered_packed(const struct object_id *oid,
+ struct packed_git *pack,
+ uint32_t pos,
+ void *data)
+{
+ return batch_unordered_object(oid, data);
+}
+
static int batch_objects(struct batch_options *opt)
{
- struct strbuf buf = STRBUF_INIT;
+ struct strbuf input = STRBUF_INIT;
+ struct strbuf output = STRBUF_INIT;
struct expand_data data;
int save_warning;
int retval = 0;
@@ -454,8 +485,9 @@ static int batch_objects(struct batch_options *opt)
*/
memset(&data, 0, sizeof(data));
data.mark_query = 1;
- strbuf_expand(&buf, opt->format, expand_format, &data);
+ strbuf_expand(&output, opt->format, expand_format, &data);
data.mark_query = 0;
+ strbuf_release(&output);
if (opt->cmdmode)
data.split_on_whitespace = 1;
@@ -473,19 +505,37 @@ static int batch_objects(struct batch_options *opt)
data.info.typep = &data.type;
if (opt->all_objects) {
- struct oid_array sa = OID_ARRAY_INIT;
struct object_cb_data cb;
- for_each_loose_object(batch_loose_object, &sa, 0);
- for_each_packed_object(batch_packed_object, &sa, 0);
if (repository_format_partial_clone)
warning("This repository has extensions.partialClone set. Some objects may not be loaded.");
cb.opt = opt;
cb.expand = &data;
- oid_array_for_each_unique(&sa, batch_object_cb, &cb);
+ cb.scratch = &output;
+
+ if (opt->unordered) {
+ struct oidset seen = OIDSET_INIT;
+
+ cb.seen = &seen;
+
+ for_each_loose_object(batch_unordered_loose, &cb, 0);
+ for_each_packed_object(batch_unordered_packed, &cb,
+ FOR_EACH_OBJECT_PACK_ORDER);
+
+ oidset_clear(&seen);
+ } else {
+ struct oid_array sa = OID_ARRAY_INIT;
+
+ for_each_loose_object(collect_loose_object, &sa, 0);
+ for_each_packed_object(collect_packed_object, &sa, 0);
+
+ oid_array_for_each_unique(&sa, batch_object_cb, &cb);
+
+ oid_array_clear(&sa);
+ }
- oid_array_clear(&sa);
+ strbuf_release(&output);
return 0;
}
@@ -499,14 +549,14 @@ static int batch_objects(struct batch_options *opt)
save_warning = warn_on_object_refname_ambiguity;
warn_on_object_refname_ambiguity = 0;
- while (strbuf_getline(&buf, stdin) != EOF) {
+ while (strbuf_getline(&input, stdin) != EOF) {
if (data.split_on_whitespace) {
/*
* Split at first whitespace, tying off the beginning
* of the string and saving the remainder (or NULL) in
* data.rest.
*/
- char *p = strpbrk(buf.buf, " \t");
+ char *p = strpbrk(input.buf, " \t");
if (p) {
while (*p && strchr(" \t", *p))
*p++ = '\0';
@@ -514,10 +564,11 @@ static int batch_objects(struct batch_options *opt)
data.rest = p;
}
- batch_one_object(buf.buf, opt, &data);
+ batch_one_object(input.buf, &output, opt, &data);
}
- strbuf_release(&buf);
+ strbuf_release(&input);
+ strbuf_release(&output);
warn_on_object_refname_ambiguity = save_warning;
return retval;
}
@@ -586,6 +637,8 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
N_("follow in-tree symlinks (used with --batch or --batch-check)")),
OPT_BOOL(0, "batch-all-objects", &batch.all_objects,
N_("show all objects with --batch or --batch-check")),
+ OPT_BOOL(0, "unordered", &batch.unordered,
+ N_("do not order --batch-all-objects output")),
OPT_END()
};
diff --git a/builtin/prune-packed.c b/builtin/prune-packed.c
index 4ff525e50f..a9e7b552b9 100644
--- a/builtin/prune-packed.c
+++ b/builtin/prune-packed.c
@@ -3,6 +3,7 @@
#include "progress.h"
#include "parse-options.h"
#include "packfile.h"
+#include "object-store.h"
static const char * const prune_packed_usage[] = {
N_("git prune-packed [-n | --dry-run] [-q | --quiet]"),
diff --git a/cache.h b/cache.h
index 1398b2a4e4..66d3b91cdb 100644
--- a/cache.h
+++ b/cache.h
@@ -1576,62 +1576,6 @@ extern int odb_mkstemp(struct strbuf *temp_filename, const char *pattern);
extern int odb_pack_keep(const char *name);
/*
- * Iterate over the files in the loose-object parts of the object
- * directory "path", triggering the following callbacks:
- *
- * - loose_object is called for each loose object we find.
- *
- * - loose_cruft is called for any files that do not appear to be
- * loose objects. Note that we only look in the loose object
- * directories "objects/[0-9a-f]{2}/", so we will not report
- * "objects/foobar" as cruft.
- *
- * - loose_subdir is called for each top-level hashed subdirectory
- * of the object directory (e.g., "$OBJDIR/f0"). It is called
- * after the objects in the directory are processed.
- *
- * Any callback that is NULL will be ignored. Callbacks returning non-zero
- * will end the iteration.
- *
- * In the "buf" variant, "path" is a strbuf which will also be used as a
- * scratch buffer, but restored to its original contents before
- * the function returns.
- */
-typedef int each_loose_object_fn(const struct object_id *oid,
- const char *path,
- void *data);
-typedef int each_loose_cruft_fn(const char *basename,
- const char *path,
- void *data);
-typedef int each_loose_subdir_fn(unsigned int nr,
- const char *path,
- void *data);
-int for_each_file_in_obj_subdir(unsigned int subdir_nr,
- struct strbuf *path,
- each_loose_object_fn obj_cb,
- each_loose_cruft_fn cruft_cb,
- each_loose_subdir_fn subdir_cb,
- void *data);
-int for_each_loose_file_in_objdir(const char *path,
- each_loose_object_fn obj_cb,
- each_loose_cruft_fn cruft_cb,
- each_loose_subdir_fn subdir_cb,
- void *data);
-int for_each_loose_file_in_objdir_buf(struct strbuf *path,
- each_loose_object_fn obj_cb,
- each_loose_cruft_fn cruft_cb,
- each_loose_subdir_fn subdir_cb,
- void *data);
-
-/*
- * Iterate over loose objects in both the local
- * repository and any alternates repositories (unless the
- * LOCAL_ONLY flag is set).
- */
-#define FOR_EACH_OBJECT_LOCAL_ONLY 0x1
-extern int for_each_loose_object(each_loose_object_fn, void *, unsigned flags);
-
-/*
* Set this to 0 to prevent sha1_object_info_extended() from fetching missing
* blobs. This has a difference only if extensions.partialClone is set.
*
diff --git a/commit-graph.c b/commit-graph.c
index 0034740c26..8a1bec7b8a 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -730,7 +730,7 @@ void write_commit_graph(const char *obj_dir,
die(_("error adding pack %s"), packname.buf);
if (open_pack_index(p))
die(_("error opening index for %s"), packname.buf);
- for_each_object_in_pack(p, add_packed_commits, &oids);
+ for_each_object_in_pack(p, add_packed_commits, &oids, 0);
close_pack(p);
}
strbuf_release(&packname);
diff --git a/object-store.h b/object-store.h
index e481f7ad41..162156f5dc 100644
--- a/object-store.h
+++ b/object-store.h
@@ -262,4 +262,94 @@ int oid_object_info_extended(struct repository *r,
const struct object_id *,
struct object_info *, unsigned flags);
+/*
+ * Iterate over the files in the loose-object parts of the object
+ * directory "path", triggering the following callbacks:
+ *
+ * - loose_object is called for each loose object we find.
+ *
+ * - loose_cruft is called for any files that do not appear to be
+ * loose objects. Note that we only look in the loose object
+ * directories "objects/[0-9a-f]{2}/", so we will not report
+ * "objects/foobar" as cruft.
+ *
+ * - loose_subdir is called for each top-level hashed subdirectory
+ * of the object directory (e.g., "$OBJDIR/f0"). It is called
+ * after the objects in the directory are processed.
+ *
+ * Any callback that is NULL will be ignored. Callbacks returning non-zero
+ * will end the iteration.
+ *
+ * In the "buf" variant, "path" is a strbuf which will also be used as a
+ * scratch buffer, but restored to its original contents before
+ * the function returns.
+ */
+typedef int each_loose_object_fn(const struct object_id *oid,
+ const char *path,
+ void *data);
+typedef int each_loose_cruft_fn(const char *basename,
+ const char *path,
+ void *data);
+typedef int each_loose_subdir_fn(unsigned int nr,
+ const char *path,
+ void *data);
+int for_each_file_in_obj_subdir(unsigned int subdir_nr,
+ struct strbuf *path,
+ each_loose_object_fn obj_cb,
+ each_loose_cruft_fn cruft_cb,
+ each_loose_subdir_fn subdir_cb,
+ void *data);
+int for_each_loose_file_in_objdir(const char *path,
+ each_loose_object_fn obj_cb,
+ each_loose_cruft_fn cruft_cb,
+ each_loose_subdir_fn subdir_cb,
+ void *data);
+int for_each_loose_file_in_objdir_buf(struct strbuf *path,
+ each_loose_object_fn obj_cb,
+ each_loose_cruft_fn cruft_cb,
+ each_loose_subdir_fn subdir_cb,
+ void *data);
+
+/* Flags for for_each_*_object() below. */
+enum for_each_object_flags {
+ /* Iterate only over local objects, not alternates. */
+ FOR_EACH_OBJECT_LOCAL_ONLY = (1<<0),
+
+ /* Only iterate over packs obtained from the promisor remote. */
+ FOR_EACH_OBJECT_PROMISOR_ONLY = (1<<1),
+
+ /*
+ * Visit objects within a pack in packfile order rather than .idx order
+ */
+ FOR_EACH_OBJECT_PACK_ORDER = (1<<2),
+};
+
+/*
+ * Iterate over all accessible loose objects without respect to
+ * reachability. By default, this includes both local and alternate objects.
+ * The order in which objects are visited is unspecified.
+ *
+ * Any flags specific to packs are ignored.
+ */
+int for_each_loose_object(each_loose_object_fn, void *,
+ enum for_each_object_flags flags);
+
+/*
+ * Iterate over all accessible packed objects without respect to reachability.
+ * By default, this includes both local and alternate packs.
+ *
+ * Note that some objects may appear twice if they are found in multiple packs.
+ * Each pack is visited in an unspecified order. By default, objects within a
+ * pack are visited in pack-idx order (i.e., sorted by oid).
+ */
+typedef int each_packed_object_fn(const struct object_id *oid,
+ struct packed_git *pack,
+ uint32_t pos,
+ void *data);
+int for_each_object_in_pack(struct packed_git *p,
+ each_packed_object_fn, void *data,
+ enum for_each_object_flags flags);
+int for_each_packed_object(each_packed_object_fn, void *,
+ enum for_each_object_flags flags);
+
#endif /* OBJECT_STORE_H */
diff --git a/packfile.c b/packfile.c
index 6974903e58..ebcb5742ec 100644
--- a/packfile.c
+++ b/packfile.c
@@ -1885,26 +1885,38 @@ int has_pack_index(const unsigned char *sha1)
return 1;
}
-int for_each_object_in_pack(struct packed_git *p, each_packed_object_fn cb, void *data)
+int for_each_object_in_pack(struct packed_git *p,
+ each_packed_object_fn cb, void *data,
+ enum for_each_object_flags flags)
{
uint32_t i;
int r = 0;
+ if (flags & FOR_EACH_OBJECT_PACK_ORDER)
+ load_pack_revindex(p);
+
for (i = 0; i < p->num_objects; i++) {
+ uint32_t pos;
struct object_id oid;
- if (!nth_packed_object_oid(&oid, p, i))
+ if (flags & FOR_EACH_OBJECT_PACK_ORDER)
+ pos = p->revindex[i].nr;
+ else
+ pos = i;
+
+ if (!nth_packed_object_oid(&oid, p, pos))
return error("unable to get sha1 of object %u in %s",
- i, p->pack_name);
+ pos, p->pack_name);
- r = cb(&oid, p, i, data);
+ r = cb(&oid, p, pos, data);
if (r)
break;
}
return r;
}
-int for_each_packed_object(each_packed_object_fn cb, void *data, unsigned flags)
+int for_each_packed_object(each_packed_object_fn cb, void *data,
+ enum for_each_object_flags flags)
{
struct packed_git *p;
int r = 0;
@@ -1921,7 +1933,7 @@ int for_each_packed_object(each_packed_object_fn cb, void *data, unsigned flags)
pack_errors = 1;
continue;
}
- r = for_each_object_in_pack(p, cb, data);
+ r = for_each_object_in_pack(p, cb, data, flags);
if (r)
break;
}
diff --git a/packfile.h b/packfile.h
index fa36c473ad..630f35cf31 100644
--- a/packfile.h
+++ b/packfile.h
@@ -149,23 +149,6 @@ extern int has_object_pack(const struct object_id *oid);
extern int has_pack_index(const unsigned char *sha1);
/*
- * Only iterate over packs obtained from the promisor remote.
- */
-#define FOR_EACH_OBJECT_PROMISOR_ONLY 2
-
-/*
- * Iterate over packed objects in both the local
- * repository and any alternates repositories (unless the
- * FOR_EACH_OBJECT_LOCAL_ONLY flag, defined in cache.h, is set).
- */
-typedef int each_packed_object_fn(const struct object_id *oid,
- struct packed_git *pack,
- uint32_t pos,
- void *data);
-extern int for_each_object_in_pack(struct packed_git *p, each_packed_object_fn, void *data);
-extern int for_each_packed_object(each_packed_object_fn, void *, unsigned flags);
-
-/*
* Return 1 if an object in a promisor packfile is or refers to the given
* object, 0 otherwise.
*/
diff --git a/sha1-file.c b/sha1-file.c
index c6ca960eb2..56e5329caf 100644
--- a/sha1-file.c
+++ b/sha1-file.c
@@ -2146,7 +2146,8 @@ static int loose_from_alt_odb(struct alternate_object_database *alt,
return r;
}
-int for_each_loose_object(each_loose_object_fn cb, void *data, unsigned flags)
+int for_each_loose_object(each_loose_object_fn cb, void *data,
+ enum for_each_object_flags flags)
{
struct loose_alt_odb_data alt;
int r;
diff --git a/t/t1006-cat-file.sh b/t/t1006-cat-file.sh
index 13dd510b2e..7f19d591f2 100755
--- a/t/t1006-cat-file.sh
+++ b/t/t1006-cat-file.sh
@@ -550,8 +550,8 @@ test_expect_success 'git cat-file --batch --follow-symlink returns correct sha a
test_expect_success 'cat-file --batch-all-objects shows all objects' '
# make new repos so we know the full set of objects; we will
# also make sure that there are some packed and some loose
- # objects, some referenced and some not, and that there are
- # some available only via alternates.
+ # objects, some referenced and some not, some duplicates, and that
+ # there are some available only via alternates.
git init all-one &&
(
cd all-one &&
@@ -567,10 +567,23 @@ test_expect_success 'cat-file --batch-all-objects shows all objects' '
cd all-two &&
echo local-unref | git hash-object -w --stdin
) >>expect.unsorted &&
+ git -C all-two rev-parse HEAD:file |
+ git -C all-two pack-objects .git/objects/pack/pack &&
sort <expect.unsorted >expect &&
git -C all-two cat-file --batch-all-objects \
--batch-check="%(objectname)" >actual &&
test_cmp expect actual
'
+# The only user-visible difference is that the objects are no longer sorted,
+# and the resulting sort order is undefined. So we can only check that it
+# produces the same objects as the ordered case, but that at least exercises
+# the code.
+test_expect_success 'cat-file --unordered works' '
+ git -C all-two cat-file --batch-all-objects --unordered \
+ --batch-check="%(objectname)" >actual.unsorted &&
+ sort <actual.unsorted >actual &&
+ test_cmp expect actual
+'
+
test_done