summaryrefslogtreecommitdiffstats
path: root/odb/source.h
blob: caac5581495ecead5b14d95acfe95329a47c6418 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
#ifndef ODB_SOURCE_H
#define ODB_SOURCE_H

#include "object.h"

enum odb_source_type {
	/*
	 * The "unknown" type, which should never be in use. This type mostly
	 * exists to catch cases where the type field remains zeroed out.
	 */
	ODB_SOURCE_UNKNOWN,

	/* The "files" backend that uses loose objects and packfiles. */
	ODB_SOURCE_FILES,
};

/* Flags that can be passed to `odb_read_object_info_extended()`. */
enum object_info_flags {
	/* Invoke lookup_replace_object() on the given hash. */
	OBJECT_INFO_LOOKUP_REPLACE = (1 << 0),

	/* Do not reprepare object sources when the first lookup has failed. */
	OBJECT_INFO_QUICK = (1 << 1),

	/*
	 * Do not attempt to fetch the object if missing (even if fetch_is_missing is
	 * nonzero).
	 */
	OBJECT_INFO_SKIP_FETCH_OBJECT = (1 << 2),

	/* Die if object corruption (not just an object being missing) was detected. */
	OBJECT_INFO_DIE_IF_CORRUPT = (1 << 3),

	/*
	 * We have already tried reading the object, but it couldn't be found
	 * via any of the attached sources, and are now doing a second read.
	 * This second read asks the individual sources to also evaluate
	 * whether any on-disk state may have changed that may have caused the
	 * object to appear.
	 *
	 * This flag is for internal use, only. The second read only occurs
	 * when `OBJECT_INFO_QUICK` was not passed.
	 */
	OBJECT_INFO_SECOND_READ = (1 << 4),

	/*
	 * This is meant for bulk prefetching of missing blobs in a partial
	 * clone. Implies OBJECT_INFO_SKIP_FETCH_OBJECT and OBJECT_INFO_QUICK.
	 */
	OBJECT_INFO_FOR_PREFETCH = (OBJECT_INFO_SKIP_FETCH_OBJECT | OBJECT_INFO_QUICK),
};

struct object_id;
struct object_info;
struct odb_read_stream;
struct odb_transaction;
struct odb_write_stream;
struct strvec;

/*
 * A callback function that can be used to iterate through objects. If given,
 * the optional `oi` parameter will be populated the same as if you would call
 * `odb_read_object_info()`.
 *
 * Returning a non-zero error code will cause iteration to abort. The error
 * code will be propagated.
 */
typedef int (*odb_for_each_object_cb)(const struct object_id *oid,
				      struct object_info *oi,
				      void *cb_data);

/*
 * The source is the part of the object database that stores the actual
 * objects. It thus encapsulates the logic to read and write the specific
 * on-disk format. An object database can have multiple sources:
 *
 *   - The primary source, which is typically located in "$GIT_DIR/objects".
 *     This is where new objects are usually written to.
 *
 *   - Alternate sources, which are configured via "objects/info/alternates" or
 *     via the GIT_ALTERNATE_OBJECT_DIRECTORIES environment variable. These
 *     alternate sources are only used to read objects.
 */
struct odb_source {
	struct odb_source *next;

	/* Object database that owns this object source. */
	struct object_database *odb;

	/* The type used by this source. */
	enum odb_source_type type;

	/*
	 * Figure out whether this is the local source of the owning
	 * repository, which would typically be its ".git/objects" directory.
	 * This local object directory is usually where objects would be
	 * written to.
	 */
	bool local;

	/*
	 * This object store is ephemeral, so there is no need to fsync.
	 */
	int will_destroy;

	/*
	 * Path to the source. If this is a relative path, it is relative to
	 * the current working directory.
	 */
	char *path;

	/*
	 * This callback is expected to free the underlying object database source and
	 * all associated resources. The function will never be called with a NULL pointer.
	 */
	void (*free)(struct odb_source *source);

	/*
	 * This callback is expected to close any open resources, like for
	 * example file descriptors or connections. The source is expected to
	 * still be usable after it has been closed. Closed resources may need
	 * to be reopened in that case.
	 */
	void (*close)(struct odb_source *source);

	/*
	 * This callback is expected to clear underlying caches of the object
	 * database source. The function is called when the repository has for
	 * example just been repacked so that new objects will become visible.
	 */
	void (*reprepare)(struct odb_source *source);

	/*
	 * This callback is expected to read object information from the object
	 * database source. The object info will be partially populated with
	 * pointers for each bit of information that was requested by the
	 * caller.
	 *
	 * The flags field is a combination of `OBJECT_INFO` flags. Only the
	 * following fields need to be handled by the backend:
	 *
	 *   - `OBJECT_INFO_QUICK` indicates it is fine to use caches without
	 *     re-verifying the data.
	 *
	 *   - `OBJECT_INFO_SECOND_READ` indicates that the initial object
	 *     lookup has failed and that the object sources should check
	 *     whether any of its on-disk state has changed that may have
	 *     caused the object to appear. Sources are free to ignore the
	 *     second read in case they know that the first read would have
	 *     already surfaced the object without reloading any on-disk state.
	 *
	 * The callback is expected to return a negative error code in case
	 * reading the object has failed, 0 otherwise.
	 */
	int (*read_object_info)(struct odb_source *source,
				const struct object_id *oid,
				struct object_info *oi,
				enum object_info_flags flags);

	/*
	 * This callback is expected to create a new read stream that can be
	 * used to stream the object identified by the given ID.
	 *
	 * The callback is expected to return a negative error code in case
	 * creating the object stream has failed, 0 otherwise.
	 */
	int (*read_object_stream)(struct odb_read_stream **out,
				  struct odb_source *source,
				  const struct object_id *oid);

	/*
	 * This callback is expected to iterate over all objects stored in this
	 * source and invoke the callback function for each of them. It is
	 * valid to yield the same object multiple time. A non-zero exit code
	 * from the object callback shall abort iteration.
	 *
	 * The optional `request` structure should serve as a template for
	 * looking up object info for every individual iterated object. It
	 * should not be modified directly and should instead be copied into a
	 * separate `struct object_info` that gets passed to the callback. If
	 * the caller passes a `NULL` pointer then the object itself shall not
	 * be read.
	 *
	 * The callback is expected to return a negative error code in case the
	 * iteration has failed to read all objects, 0 otherwise. When the
	 * callback function returns a non-zero error code then that error code
	 * should be returned.
	 */
	int (*for_each_object)(struct odb_source *source,
			       const struct object_info *request,
			       odb_for_each_object_cb cb,
			       void *cb_data,
			       unsigned flags);

	/*
	 * This callback is expected to freshen the given object so that its
	 * last access time is set to the current time. This is used to ensure
	 * that objects that are recent will not get garbage collected even if
	 * they were unreachable.
	 *
	 * Returns 0 in case the object does not exist, 1 in case the object
	 * has been freshened.
	 */
	int (*freshen_object)(struct odb_source *source,
			      const struct object_id *oid);

	/*
	 * This callback is expected to persist the given object into the
	 * object source. In case the object already exists it shall be
	 * freshened.
	 *
	 * The flags field is a combination of `WRITE_OBJECT` flags.
	 *
	 * The resulting object ID (and optionally the compatibility object ID)
	 * shall be written into the out pointers. The callback is expected to
	 * return 0 on success, a negative error code otherwise.
	 */
	int (*write_object)(struct odb_source *source,
			    const void *buf, unsigned long len,
			    enum object_type type,
			    struct object_id *oid,
			    struct object_id *compat_oid,
			    unsigned flags);

	/*
	 * This callback is expected to persist the given object stream into
	 * the object source.
	 *
	 * The resulting object ID shall be written into the out pointer. The
	 * callback is expected to return 0 on success, a negative error code
	 * otherwise.
	 */
	int (*write_object_stream)(struct odb_source *source,
				   struct odb_write_stream *stream, size_t len,
				   struct object_id *oid);

	/*
	 * This callback is expected to create a new transaction that can be
	 * used to write objects to. The objects shall only be persisted into
	 * the object database when the transcation's commit function is
	 * called. Otherwise, the objects shall be discarded.
	 *
	 * Returns 0 on success, in which case the `*out` pointer will have
	 * been populated with the object database transaction. Returns a
	 * negative error code otherwise.
	 */
	int (*begin_transaction)(struct odb_source *source,
				 struct odb_transaction **out);

	/*
	 * This callback is expected to read the list of alternate object
	 * database sources connected to it and write them into the `strvec`.
	 *
	 * The result is expected to be paths to the alternates. All paths must
	 * be resolved to absolute paths.
	 *
	 * The callback is expected to return 0 on success, a negative error
	 * code otherwise.
	 */
	int (*read_alternates)(struct odb_source *source,
			       struct strvec *out);

	/*
	 * This callback is expected to persist the singular alternate passed
	 * to it into its list of alternates. Any pre-existing alternates are
	 * expected to remain active. Subsequent calls to `read_alternates` are
	 * thus expected to yield the pre-existing list of alternates plus the
	 * newly added alternate appended to its end.
	 *
	 * The callback is expected to return 0 on success, a negative error
	 * code otherwise.
	 */
	int (*write_alternate)(struct odb_source *source,
			       const char *alternate);
};

/*
 * Allocate and initialize a new source for the given object database located
 * at `path`. `local` indicates whether or not the source is the local and thus
 * primary object source of the object database.
 */
struct odb_source *odb_source_new(struct object_database *odb,
				  const char *path,
				  bool local);

/*
 * Initialize the source for the given object database located at `path`.
 * `local` indicates whether or not the source is the local and thus primary
 * object source of the object database.
 *
 * This function is only supposed to be called by specific object source
 * implementations.
 */
void odb_source_init(struct odb_source *source,
		     struct object_database *odb,
		     enum odb_source_type type,
		     const char *path,
		     bool local);

/*
 * Free the object database source, releasing all associated resources and
 * freeing the structure itself.
 */
void odb_source_free(struct odb_source *source);

/*
 * Release the object database source, releasing all associated resources.
 *
 * This function is only supposed to be called by specific object source
 * implementations.
 */
void odb_source_release(struct odb_source *source);

/*
 * Close the object database source without releasing he underlying data. The
 * source can still be used going forward, but it first needs to be reopened.
 * This can be useful to reduce resource usage.
 */
static inline void odb_source_close(struct odb_source *source)
{
	source->close(source);
}

/*
 * Reprepare the object database source and clear any caches. Depending on the
 * backend used this may have the effect that concurrently-written objects
 * become visible.
 */
static inline void odb_source_reprepare(struct odb_source *source)
{
	source->reprepare(source);
}

/*
 * Read an object from the object database source identified by its object ID.
 * Returns 0 on success, a negative error code otherwise.
 */
static inline int odb_source_read_object_info(struct odb_source *source,
					      const struct object_id *oid,
					      struct object_info *oi,
					      enum object_info_flags flags)
{
	return source->read_object_info(source, oid, oi, flags);
}

/*
 * Create a new read stream for the given object ID. Returns 0 on success, a
 * negative error code otherwise.
 */
static inline int odb_source_read_object_stream(struct odb_read_stream **out,
						struct odb_source *source,
						const struct object_id *oid)
{
	return source->read_object_stream(out, source, oid);
}

/*
 * Iterate through all objects contained in the given source and invoke the
 * callback function for each of them. Returning a non-zero code from the
 * callback function aborts iteration. There is no guarantee that objects
 * are only iterated over once.
 *
 * The optional `request` structure serves as a template for retrieving the
 * object info for each indvidual iterated object and will be populated as if
 * `odb_source_read_object_info()` was called on the object. It will not be
 * modified, the callback will instead be invoked with a separate `struct
 * object_info` for every object. Object info will not be read when passing a
 * `NULL` pointer.
 *
 * The flags is a bitfield of `ODB_FOR_EACH_OBJECT_*` flags. Not all flags may
 * apply to a specific backend, so whether or not they are honored is defined
 * by the implementation.
 *
 * Returns 0 when all objects have been iterated over, a negative error code in
 * case iteration has failed, or a non-zero value returned from the callback.
 */
static inline int odb_source_for_each_object(struct odb_source *source,
					     const struct object_info *request,
					     odb_for_each_object_cb cb,
					     void *cb_data,
					     unsigned flags)
{
	return source->for_each_object(source, request, cb, cb_data, flags);
}

/*
 * Freshen an object in the object database by updating its timestamp.
 * Returns 1 in case the object has been freshened, 0 in case the object does
 * not exist.
 */
static inline int odb_source_freshen_object(struct odb_source *source,
					    const struct object_id *oid)
{
	return source->freshen_object(source, oid);
}

/*
 * Write an object into the object database source. Returns 0 on success, a
 * negative error code otherwise. Populates the given out pointers for the
 * object ID and the compatibility object ID, if non-NULL.
 */
static inline int odb_source_write_object(struct odb_source *source,
					  const void *buf, unsigned long len,
					  enum object_type type,
					  struct object_id *oid,
					  struct object_id *compat_oid,
					  unsigned flags)
{
	return source->write_object(source, buf, len, type, oid,
				    compat_oid, flags);
}

/*
 * Write an object into the object database source via a stream. The overall
 * length of the object must be known in advance.
 *
 * Return 0 on success, a negative error code otherwise. Populates the given
 * out pointer for the object ID.
 */
static inline int odb_source_write_object_stream(struct odb_source *source,
						 struct odb_write_stream *stream,
						 size_t len,
						 struct object_id *oid)
{
	return source->write_object_stream(source, stream, len, oid);
}

/*
 * Read the list of alternative object database sources from the given backend
 * and populate the `strvec` with them. The listing is not recursive -- that
 * is, if any of the yielded alternate sources has alternates itself, those
 * will not be yielded as part of this function call.
 *
 * Return 0 on success, a negative error code otherwise.
 */
static inline int odb_source_read_alternates(struct odb_source *source,
					     struct strvec *out)
{
	return source->read_alternates(source, out);
}

/*
 * Write and persist a new alternate object database source for the given
 * source. Any preexisting alternates are expected to stay valid, and the new
 * alternate shall be appended to the end of the list.
 *
 * Returns 0 on success, a negative error code otherwise.
 */
static inline int odb_source_write_alternate(struct odb_source *source,
					      const char *alternate)
{
	return source->write_alternate(source, alternate);
}

/*
 * Create a new transaction that can be used to write objects into a temporary
 * staging area. The objects will only be persisted when the transaction is
 * committed.
 *
 * Returns 0 on success, a negative error code otherwise.
 */
static inline int odb_source_begin_transaction(struct odb_source *source,
					       struct odb_transaction **out)
{
	return source->begin_transaction(source, out);
}

#endif