2
#include "bulk-checkin.h"
4
#include "environment.h"
8
#include "object-store-ll.h"
13
#include "replace-object.h"
19
static int dry_run, quiet, recover, has_errors, strict;
20
static const char unpack_usage[] = "git unpack-objects [-n] [-q] [-r] [--strict]";
22
/* We always read in 4kB chunks. */
23
static unsigned char buffer[4096];
24
static unsigned int offset, len;
25
static off_t consumed_bytes;
26
static off_t max_input_size;
27
static git_hash_ctx ctx;
28
static struct fsck_options fsck_options = FSCK_OPTIONS_STRICT;
29
static struct progress *progress;
32
* When running under --strict mode, objects whose reachability are
33
* suspect are kept in core without getting written in the object
41
static struct decoration obj_decorate;
43
static struct obj_buffer *lookup_object_buffer(struct object *base)
45
return lookup_decoration(&obj_decorate, base);
48
static void add_object_buffer(struct object *object, char *buffer, unsigned long size)
50
struct obj_buffer *obj;
54
if (add_decoration(&obj_decorate, object, obj))
55
die("object %s tried to add buffer twice!", oid_to_hex(&object->oid));
59
* Make sure at least "min" bytes are available in the buffer, and
60
* return the pointer to the buffer.
62
static void *fill(int min)
65
return buffer + offset;
66
if (min > sizeof(buffer))
67
die("cannot fill %d bytes", min);
69
the_hash_algo->update_fn(&ctx, buffer, offset);
70
memmove(buffer, buffer + offset, len);
74
ssize_t ret = xread(0, buffer + len, sizeof(buffer) - len);
78
die_errno("read error on input");
85
static void use(int bytes)
88
die("used more bytes than were available");
92
/* make sure off_t is sufficiently large not to wrap */
93
if (signed_add_overflows(consumed_bytes, bytes))
94
die("pack too large for current definition of off_t");
95
consumed_bytes += bytes;
96
if (max_input_size && consumed_bytes > max_input_size)
97
die(_("pack exceeds maximum allowed size"));
98
display_throughput(progress, consumed_bytes);
102
* Decompress zstream from the standard input into a newly
103
* allocated buffer of specified size and return the buffer.
104
* The caller is responsible to free the returned buffer.
106
* But for dry_run mode, "get_data()" is only used to check the
107
* integrity of data, and the returned buffer is not used at all.
108
* Therefore, in dry_run mode, "get_data()" will release the small
109
* allocated buffer which is reused to hold temporary zstream output
110
* and return NULL instead of returning garbage data.
112
static void *get_data(unsigned long size)
115
unsigned long bufsize = dry_run && size > 8192 ? 8192 : size;
116
void *buf = xmallocz(bufsize);
118
memset(&stream, 0, sizeof(stream));
120
stream.next_out = buf;
121
stream.avail_out = bufsize;
122
stream.next_in = fill(1);
123
stream.avail_in = len;
124
git_inflate_init(&stream);
127
int ret = git_inflate(&stream, 0);
128
use(len - stream.avail_in);
129
if (stream.total_out == size && ret == Z_STREAM_END)
132
error("inflate returned %d", ret);
139
stream.next_in = fill(1);
140
stream.avail_in = len;
142
/* reuse the buffer in dry_run mode */
143
stream.next_out = buf;
144
stream.avail_out = bufsize > size - stream.total_out ?
145
size - stream.total_out :
149
git_inflate_end(&stream);
156
struct object_id base_oid;
161
struct delta_info *next;
164
static struct delta_info *delta_list;
166
static void add_delta_to_list(unsigned nr, const struct object_id *base_oid,
168
void *delta, unsigned long size)
170
struct delta_info *info = xmalloc(sizeof(*info));
172
oidcpy(&info->base_oid, base_oid);
173
info->base_offset = base_offset;
177
info->next = delta_list;
183
struct object_id oid;
187
/* Remember to update object flag allocation in object.h */
188
#define FLAG_OPEN (1u<<20)
189
#define FLAG_WRITTEN (1u<<21)
191
static struct obj_info *obj_list;
192
static unsigned nr_objects;
195
* Called only from check_object() after it verified this object
198
static void write_cached_object(struct object *obj, struct obj_buffer *obj_buf)
200
struct object_id oid;
202
if (write_object_file(obj_buf->buffer, obj_buf->size,
203
obj->type, &oid) < 0)
204
die("failed to write object %s", oid_to_hex(&obj->oid));
205
obj->flags |= FLAG_WRITTEN;
209
* At the very end of the processing, write_rest() scans the objects
210
* that have reachability requirements and calls this function.
211
* Verify its reachability and validity recursively and write it out.
213
static int check_object(struct object *obj, enum object_type type,
215
struct fsck_options *options UNUSED)
217
struct obj_buffer *obj_buf;
222
if (obj->flags & FLAG_WRITTEN)
225
if (type != OBJ_ANY && obj->type != type)
226
die("object type mismatch");
228
if (!(obj->flags & FLAG_OPEN)) {
230
int type = oid_object_info(the_repository, &obj->oid, &size);
231
if (type != obj->type || type <= 0)
232
die("object of unexpected type");
233
obj->flags |= FLAG_WRITTEN;
237
obj_buf = lookup_object_buffer(obj);
239
die("Whoops! Cannot find object '%s'", oid_to_hex(&obj->oid));
240
if (fsck_object(obj, obj_buf->buffer, obj_buf->size, &fsck_options))
241
die("fsck error in packed object");
242
fsck_options.walk = check_object;
243
if (fsck_walk(obj, NULL, &fsck_options))
244
die("Error on reachable objects of %s", oid_to_hex(&obj->oid));
245
write_cached_object(obj, obj_buf);
249
static void write_rest(void)
252
for (i = 0; i < nr_objects; i++) {
254
check_object(obj_list[i].obj, OBJ_ANY, NULL, NULL);
258
static void added_object(unsigned nr, enum object_type type,
259
void *data, unsigned long size);
262
* Write out nr-th object from the list, now we know the contents
263
* of it. Under --strict, this buffers structured objects in-core,
264
* to be checked at the end.
266
static void write_object(unsigned nr, enum object_type type,
267
void *buf, unsigned long size)
270
if (write_object_file(buf, size, type,
271
&obj_list[nr].oid) < 0)
272
die("failed to write object");
273
added_object(nr, type, buf, size);
275
obj_list[nr].obj = NULL;
276
} else if (type == OBJ_BLOB) {
278
if (write_object_file(buf, size, type,
279
&obj_list[nr].oid) < 0)
280
die("failed to write object");
281
added_object(nr, type, buf, size);
284
blob = lookup_blob(the_repository, &obj_list[nr].oid);
286
blob->object.flags |= FLAG_WRITTEN;
288
die("invalid blob object");
289
obj_list[nr].obj = NULL;
293
hash_object_file(the_hash_algo, buf, size, type,
295
added_object(nr, type, buf, size);
296
obj = parse_object_buffer(the_repository, &obj_list[nr].oid,
300
die("invalid %s", type_name(type));
301
add_object_buffer(obj, buf, size);
302
obj->flags |= FLAG_OPEN;
303
obj_list[nr].obj = obj;
307
static void resolve_delta(unsigned nr, enum object_type type,
308
void *base, unsigned long base_size,
309
void *delta, unsigned long delta_size)
312
unsigned long result_size;
314
result = patch_delta(base, base_size,
318
die("failed to apply delta");
320
write_object(nr, type, result, result_size);
324
* We now know the contents of an object (which is nr-th in the pack);
325
* resolve all the deltified objects that are based on it.
327
static void added_object(unsigned nr, enum object_type type,
328
void *data, unsigned long size)
330
struct delta_info **p = &delta_list;
331
struct delta_info *info;
333
while ((info = *p) != NULL) {
334
if (oideq(&info->base_oid, &obj_list[nr].oid) ||
335
info->base_offset == obj_list[nr].offset) {
338
resolve_delta(info->nr, type, data, size,
339
info->delta, info->size);
347
static void unpack_non_delta_entry(enum object_type type, unsigned long size,
350
void *buf = get_data(size);
353
write_object(nr, type, buf, size);
356
struct input_zstream_data {
357
git_zstream *zstream;
358
unsigned char buf[8192];
362
static const void *feed_input_zstream(struct input_stream *in_stream,
363
unsigned long *readlen)
365
struct input_zstream_data *data = in_stream->data;
366
git_zstream *zstream = data->zstream;
369
if (in_stream->is_finished) {
374
zstream->next_out = data->buf;
375
zstream->avail_out = sizeof(data->buf);
376
zstream->next_in = in;
377
zstream->avail_in = len;
379
data->status = git_inflate(zstream, 0);
381
in_stream->is_finished = data->status != Z_OK;
382
use(len - zstream->avail_in);
383
*readlen = sizeof(data->buf) - zstream->avail_out;
388
static void stream_blob(unsigned long size, unsigned nr)
390
git_zstream zstream = { 0 };
391
struct input_zstream_data data = { 0 };
392
struct input_stream in_stream = {
393
.read = feed_input_zstream,
396
struct obj_info *info = &obj_list[nr];
398
data.zstream = &zstream;
399
git_inflate_init(&zstream);
401
if (stream_loose_object(&in_stream, size, &info->oid))
402
die(_("failed to write object in stream"));
404
if (data.status != Z_STREAM_END)
405
die(_("inflate returned (%d)"), data.status);
406
git_inflate_end(&zstream);
409
struct blob *blob = lookup_blob(the_repository, &info->oid);
412
die(_("invalid blob object from stream"));
413
blob->object.flags |= FLAG_WRITTEN;
418
static int resolve_against_held(unsigned nr, const struct object_id *base,
419
void *delta_data, unsigned long delta_size)
422
struct obj_buffer *obj_buffer;
423
obj = lookup_object(the_repository, base);
426
obj_buffer = lookup_object_buffer(obj);
429
resolve_delta(nr, obj->type, obj_buffer->buffer,
430
obj_buffer->size, delta_data, delta_size);
434
static void unpack_delta_entry(enum object_type type, unsigned long delta_size,
437
void *delta_data, *base;
438
unsigned long base_size;
439
struct object_id base_oid;
441
if (type == OBJ_REF_DELTA) {
442
oidread(&base_oid, fill(the_hash_algo->rawsz), the_repository->hash_algo);
443
use(the_hash_algo->rawsz);
444
delta_data = get_data(delta_size);
447
if (repo_has_object_file(the_repository, &base_oid))
448
; /* Ok we have this one */
449
else if (resolve_against_held(nr, &base_oid,
450
delta_data, delta_size))
451
return; /* we are done */
453
/* cannot resolve yet --- queue it */
454
oidclr(&obj_list[nr].oid, the_repository->hash_algo);
455
add_delta_to_list(nr, &base_oid, 0, delta_data, delta_size);
459
unsigned base_found = 0;
460
unsigned char *pack, c;
462
unsigned lo, mid, hi;
467
base_offset = c & 127;
470
if (!base_offset || MSB(base_offset, 7))
471
die("offset value overflow for delta base object");
475
base_offset = (base_offset << 7) + (c & 127);
477
base_offset = obj_list[nr].offset - base_offset;
478
if (base_offset <= 0 || base_offset >= obj_list[nr].offset)
479
die("offset value out of bound for delta base object");
481
delta_data = get_data(delta_size);
487
mid = lo + (hi - lo) / 2;
488
if (base_offset < obj_list[mid].offset) {
490
} else if (base_offset > obj_list[mid].offset) {
493
oidcpy(&base_oid, &obj_list[mid].oid);
494
base_found = !is_null_oid(&base_oid);
500
* The delta base object is itself a delta that
501
* has not been resolved yet.
503
oidclr(&obj_list[nr].oid, the_repository->hash_algo);
504
add_delta_to_list(nr, null_oid(), base_offset,
505
delta_data, delta_size);
510
if (resolve_against_held(nr, &base_oid, delta_data, delta_size))
513
base = repo_read_object_file(the_repository, &base_oid, &type,
516
error("failed to read delta-pack base object %s",
517
oid_to_hex(&base_oid));
523
resolve_delta(nr, type, base, base_size, delta_data, delta_size);
527
static void unpack_one(unsigned nr)
531
unsigned long size, c;
532
enum object_type type;
534
obj_list[nr].offset = consumed_bytes;
546
size += (c & 0x7f) << shift;
552
if (!dry_run && size > big_file_threshold) {
553
stream_blob(size, nr);
560
unpack_non_delta_entry(type, size, nr);
564
unpack_delta_entry(type, size, nr);
567
error("bad object type %d", type);
575
static void unpack_all(void)
578
struct pack_header *hdr = fill(sizeof(struct pack_header));
580
nr_objects = ntohl(hdr->hdr_entries);
582
if (ntohl(hdr->hdr_signature) != PACK_SIGNATURE)
583
die("bad pack file");
584
if (!pack_version_ok(hdr->hdr_version))
585
die("unknown pack file version %"PRIu32,
586
ntohl(hdr->hdr_version));
587
use(sizeof(struct pack_header));
590
progress = start_progress(_("Unpacking objects"), nr_objects);
591
CALLOC_ARRAY(obj_list, nr_objects);
592
begin_odb_transaction();
593
for (i = 0; i < nr_objects; i++) {
595
display_progress(progress, i + 1);
597
end_odb_transaction();
598
stop_progress(&progress);
601
die("unresolved deltas left after unpacking");
604
int cmd_unpack_objects(int argc, const char **argv, const char *prefix UNUSED)
607
struct object_id oid;
608
git_hash_ctx tmp_ctx;
610
disable_replace_refs();
612
git_config(git_default_config, NULL);
616
for (i = 1 ; i < argc; i++) {
617
const char *arg = argv[i];
620
if (!strcmp(arg, "-n")) {
624
if (!strcmp(arg, "-q")) {
628
if (!strcmp(arg, "-r")) {
632
if (!strcmp(arg, "--strict")) {
636
if (skip_prefix(arg, "--strict=", &arg)) {
638
fsck_set_msg_types(&fsck_options, arg);
641
if (starts_with(arg, "--pack_header=")) {
642
struct pack_header *hdr;
645
hdr = (struct pack_header *)buffer;
646
hdr->hdr_signature = htonl(PACK_SIGNATURE);
647
hdr->hdr_version = htonl(strtoul(arg + 14, &c, 10));
650
hdr->hdr_entries = htonl(strtoul(c + 1, &c, 10));
656
if (skip_prefix(arg, "--max-input-size=", &arg)) {
657
max_input_size = strtoumax(arg, NULL, 10);
663
/* We don't take any non-flag arguments now.. Maybe some day */
666
the_hash_algo->init_fn(&ctx);
668
the_hash_algo->update_fn(&ctx, buffer, offset);
669
the_hash_algo->init_fn(&tmp_ctx);
670
the_hash_algo->clone_fn(&tmp_ctx, &ctx);
671
the_hash_algo->final_oid_fn(&oid, &tmp_ctx);
674
if (fsck_finish(&fsck_options))
675
die(_("fsck error in pack objects"));
677
if (!hasheq(fill(the_hash_algo->rawsz), oid.hash,
678
the_repository->hash_algo))
679
die("final sha1 did not match");
680
use(the_hash_algo->rawsz);
682
/* Write the last part of the buffer to stdout */
683
write_in_full(1, buffer + offset, len);