1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
|
#include "builtin.h"
#include "git-compat-util.h"
#include "config.h"
#include "parse-options.h"
#include "repository.h"
#include "commit.h"
#include "hex.h"
#include "tree.h"
#include "tree-walk.h"
#include "object.h"
#include "object-store-ll.h"
#include "oid-array.h"
#include "oidset.h"
#include "promisor-remote.h"
#include "strmap.h"
#include "string-list.h"
#include "revision.h"
#include "trace2.h"
#include "progress.h"
#include "packfile.h"
#include "path-walk.h"
static const char * const builtin_backfill_usage[] = {
N_("git backfill [--min-batch-size=<n>]"),
NULL
};
struct backfill_context {
struct repository *repo;
struct oid_array current_batch;
size_t min_batch_size;
};
static void backfill_context_clear(struct backfill_context *ctx)
{
oid_array_clear(&ctx->current_batch);
}
static void download_batch(struct backfill_context *ctx)
{
promisor_remote_get_direct(ctx->repo,
ctx->current_batch.oid,
ctx->current_batch.nr);
oid_array_clear(&ctx->current_batch);
/*
* We likely have a new packfile. Add it to the packed list to
* avoid possible duplicate downloads of the same objects.
*/
reprepare_packed_git(ctx->repo);
}
static int fill_missing_blobs(const char *path UNUSED,
struct oid_array *list,
enum object_type type,
void *data)
{
struct backfill_context *ctx = data;
if (type != OBJ_BLOB)
return 0;
for (size_t i = 0; i < list->nr; i++) {
if (!has_object(ctx->repo, &list->oid[i],
OBJECT_INFO_FOR_PREFETCH))
oid_array_append(&ctx->current_batch, &list->oid[i]);
}
if (ctx->current_batch.nr >= ctx->min_batch_size)
download_batch(ctx);
return 0;
}
static int do_backfill(struct backfill_context *ctx)
{
struct rev_info revs;
struct path_walk_info info = PATH_WALK_INFO_INIT;
int ret;
repo_init_revisions(ctx->repo, &revs, "");
handle_revision_arg("HEAD", &revs, 0, 0);
info.blobs = 1;
info.tags = info.commits = info.trees = 0;
info.revs = &revs;
info.path_fn = fill_missing_blobs;
info.path_fn_data = ctx;
ret = walk_objects_by_path(&info);
/* Download the objects that did not fill a batch. */
if (!ret)
download_batch(ctx);
path_walk_info_clear(&info);
release_revisions(&revs);
return ret;
}
int cmd_backfill(int argc, const char **argv, const char *prefix, struct repository *repo)
{
int result;
struct backfill_context ctx = {
.repo = repo,
.current_batch = OID_ARRAY_INIT,
.min_batch_size = 50000,
};
struct option options[] = {
OPT_INTEGER(0, "min-batch-size", &ctx.min_batch_size,
N_("Minimum number of objects to request at a time")),
OPT_END(),
};
show_usage_if_asked(argc, argv, builtin_backfill_usage[0]);
argc = parse_options(argc, argv, prefix, options, builtin_backfill_usage,
0);
repo_config(repo, git_default_config, NULL);
result = do_backfill(&ctx);
backfill_context_clear(&ctx);
return result;
}
|