git

unpack-objects.c
687 строк · 16.4 Кб
Перенос по словам
1
#include "builtin.h"
2
#include "bulk-checkin.h"
3
#include "config.h"
4
#include "environment.h"
5
#include "gettext.h"
6
#include "git-zlib.h"
7
#include "hex.h"
8
#include "object-store-ll.h"
9
#include "object.h"
10
#include "delta.h"
11
#include "pack.h"
12
#include "blob.h"
13
#include "replace-object.h"
14
#include "strbuf.h"
15
#include "progress.h"
16
#include "decorate.h"
17
#include "fsck.h"
18

19
static int dry_run, quiet, recover, has_errors, strict;
20
static const char unpack_usage[] = "git unpack-objects [-n] [-q] [-r] [--strict]";
21

22
/* We always read in 4kB chunks. */
23
static unsigned char buffer[4096];
24
static unsigned int offset, len;
25
static off_t consumed_bytes;
26
static off_t max_input_size;
27
static git_hash_ctx ctx;
28
static struct fsck_options fsck_options = FSCK_OPTIONS_STRICT;
29
static struct progress *progress;
30

31
/*
32
 * When running under --strict mode, objects whose reachability are
33
 * suspect are kept in core without getting written in the object
34
 * store.
35
 */
36
struct obj_buffer {
37
	char *buffer;
38
	unsigned long size;
39
};
40

41
static struct decoration obj_decorate;
42

43
static struct obj_buffer *lookup_object_buffer(struct object *base)
44
{
45
	return lookup_decoration(&obj_decorate, base);
46
}
47

48
static void add_object_buffer(struct object *object, char *buffer, unsigned long size)
49
{
50
	struct obj_buffer *obj;
51
	CALLOC_ARRAY(obj, 1);
52
	obj->buffer = buffer;
53
	obj->size = size;
54
	if (add_decoration(&obj_decorate, object, obj))
55
		die("object %s tried to add buffer twice!", oid_to_hex(&object->oid));
56
}
57

58
/*
59
 * Make sure at least "min" bytes are available in the buffer, and
60
 * return the pointer to the buffer.
61
 */
62
static void *fill(int min)
63
{
64
	if (min <= len)
65
		return buffer + offset;
66
	if (min > sizeof(buffer))
67
		die("cannot fill %d bytes", min);
68
	if (offset) {
69
		the_hash_algo->update_fn(&ctx, buffer, offset);
70
		memmove(buffer, buffer + offset, len);
71
		offset = 0;
72
	}
73
	do {
74
		ssize_t ret = xread(0, buffer + len, sizeof(buffer) - len);
75
		if (ret <= 0) {
76
			if (!ret)
77
				die("early EOF");
78
			die_errno("read error on input");
79
		}
80
		len += ret;
81
	} while (len < min);
82
	return buffer;
83
}
84

85
static void use(int bytes)
86
{
87
	if (bytes > len)
88
		die("used more bytes than were available");
89
	len -= bytes;
90
	offset += bytes;
91

92
	/* make sure off_t is sufficiently large not to wrap */
93
	if (signed_add_overflows(consumed_bytes, bytes))
94
		die("pack too large for current definition of off_t");
95
	consumed_bytes += bytes;
96
	if (max_input_size && consumed_bytes > max_input_size)
97
		die(_("pack exceeds maximum allowed size"));
98
	display_throughput(progress, consumed_bytes);
99
}
100

101
/*
102
 * Decompress zstream from the standard input into a newly
103
 * allocated buffer of specified size and return the buffer.
104
 * The caller is responsible to free the returned buffer.
105
 *
106
 * But for dry_run mode, "get_data()" is only used to check the
107
 * integrity of data, and the returned buffer is not used at all.
108
 * Therefore, in dry_run mode, "get_data()" will release the small
109
 * allocated buffer which is reused to hold temporary zstream output
110
 * and return NULL instead of returning garbage data.
111
 */
112
static void *get_data(unsigned long size)
113
{
114
	git_zstream stream;
115
	unsigned long bufsize = dry_run && size > 8192 ? 8192 : size;
116
	void *buf = xmallocz(bufsize);
117

118
	memset(&stream, 0, sizeof(stream));
119

120
	stream.next_out = buf;
121
	stream.avail_out = bufsize;
122
	stream.next_in = fill(1);
123
	stream.avail_in = len;
124
	git_inflate_init(&stream);
125

126
	for (;;) {
127
		int ret = git_inflate(&stream, 0);
128
		use(len - stream.avail_in);
129
		if (stream.total_out == size && ret == Z_STREAM_END)
130
			break;
131
		if (ret != Z_OK) {
132
			error("inflate returned %d", ret);
133
			FREE_AND_NULL(buf);
134
			if (!recover)
135
				exit(1);
136
			has_errors = 1;
137
			break;
138
		}
139
		stream.next_in = fill(1);
140
		stream.avail_in = len;
141
		if (dry_run) {
142
			/* reuse the buffer in dry_run mode */
143
			stream.next_out = buf;
144
			stream.avail_out = bufsize > size - stream.total_out ?
145
						   size - stream.total_out :
146
						   bufsize;
147
		}
148
	}
149
	git_inflate_end(&stream);
150
	if (dry_run)
151
		FREE_AND_NULL(buf);
152
	return buf;
153
}
154

155
struct delta_info {
156
	struct object_id base_oid;
157
	unsigned nr;
158
	off_t base_offset;
159
	unsigned long size;
160
	void *delta;
161
	struct delta_info *next;
162
};
163

164
static struct delta_info *delta_list;
165

166
static void add_delta_to_list(unsigned nr, const struct object_id *base_oid,
167
			      off_t base_offset,
168
			      void *delta, unsigned long size)
169
{
170
	struct delta_info *info = xmalloc(sizeof(*info));
171

172
	oidcpy(&info->base_oid, base_oid);
173
	info->base_offset = base_offset;
174
	info->size = size;
175
	info->delta = delta;
176
	info->nr = nr;
177
	info->next = delta_list;
178
	delta_list = info;
179
}
180

181
struct obj_info {
182
	off_t offset;
183
	struct object_id oid;
184
	struct object *obj;
185
};
186

187
/* Remember to update object flag allocation in object.h */
188
#define FLAG_OPEN (1u<<20)
189
#define FLAG_WRITTEN (1u<<21)
190

191
static struct obj_info *obj_list;
192
static unsigned nr_objects;
193

194
/*
195
 * Called only from check_object() after it verified this object
196
 * is Ok.
197
 */
198
static void write_cached_object(struct object *obj, struct obj_buffer *obj_buf)
199
{
200
	struct object_id oid;
201

202
	if (write_object_file(obj_buf->buffer, obj_buf->size,
203
			      obj->type, &oid) < 0)
204
		die("failed to write object %s", oid_to_hex(&obj->oid));
205
	obj->flags |= FLAG_WRITTEN;
206
}
207

208
/*
209
 * At the very end of the processing, write_rest() scans the objects
210
 * that have reachability requirements and calls this function.
211
 * Verify its reachability and validity recursively and write it out.
212
 */
213
static int check_object(struct object *obj, enum object_type type,
214
			void *data UNUSED,
215
			struct fsck_options *options UNUSED)
216
{
217
	struct obj_buffer *obj_buf;
218

219
	if (!obj)
220
		return 1;
221

222
	if (obj->flags & FLAG_WRITTEN)
223
		return 0;
224

225
	if (type != OBJ_ANY && obj->type != type)
226
		die("object type mismatch");
227

228
	if (!(obj->flags & FLAG_OPEN)) {
229
		unsigned long size;
230
		int type = oid_object_info(the_repository, &obj->oid, &size);
231
		if (type != obj->type || type <= 0)
232
			die("object of unexpected type");
233
		obj->flags |= FLAG_WRITTEN;
234
		return 0;
235
	}
236

237
	obj_buf = lookup_object_buffer(obj);
238
	if (!obj_buf)
239
		die("Whoops! Cannot find object '%s'", oid_to_hex(&obj->oid));
240
	if (fsck_object(obj, obj_buf->buffer, obj_buf->size, &fsck_options))
241
		die("fsck error in packed object");
242
	fsck_options.walk = check_object;
243
	if (fsck_walk(obj, NULL, &fsck_options))
244
		die("Error on reachable objects of %s", oid_to_hex(&obj->oid));
245
	write_cached_object(obj, obj_buf);
246
	return 0;
247
}
248

249
static void write_rest(void)
250
{
251
	unsigned i;
252
	for (i = 0; i < nr_objects; i++) {
253
		if (obj_list[i].obj)
254
			check_object(obj_list[i].obj, OBJ_ANY, NULL, NULL);
255
	}
256
}
257

258
static void added_object(unsigned nr, enum object_type type,
259
			 void *data, unsigned long size);
260

261
/*
262
 * Write out nr-th object from the list, now we know the contents
263
 * of it.  Under --strict, this buffers structured objects in-core,
264
 * to be checked at the end.
265
 */
266
static void write_object(unsigned nr, enum object_type type,
267
			 void *buf, unsigned long size)
268
{
269
	if (!strict) {
270
		if (write_object_file(buf, size, type,
271
				      &obj_list[nr].oid) < 0)
272
			die("failed to write object");
273
		added_object(nr, type, buf, size);
274
		free(buf);
275
		obj_list[nr].obj = NULL;
276
	} else if (type == OBJ_BLOB) {
277
		struct blob *blob;
278
		if (write_object_file(buf, size, type,
279
				      &obj_list[nr].oid) < 0)
280
			die("failed to write object");
281
		added_object(nr, type, buf, size);
282
		free(buf);
283

284
		blob = lookup_blob(the_repository, &obj_list[nr].oid);
285
		if (blob)
286
			blob->object.flags |= FLAG_WRITTEN;
287
		else
288
			die("invalid blob object");
289
		obj_list[nr].obj = NULL;
290
	} else {
291
		struct object *obj;
292
		int eaten;
293
		hash_object_file(the_hash_algo, buf, size, type,
294
				 &obj_list[nr].oid);
295
		added_object(nr, type, buf, size);
296
		obj = parse_object_buffer(the_repository, &obj_list[nr].oid,
297
					  type, size, buf,
298
					  &eaten);
299
		if (!obj)
300
			die("invalid %s", type_name(type));
301
		add_object_buffer(obj, buf, size);
302
		obj->flags |= FLAG_OPEN;
303
		obj_list[nr].obj = obj;
304
	}
305
}
306

307
static void resolve_delta(unsigned nr, enum object_type type,
308
			  void *base, unsigned long base_size,
309
			  void *delta, unsigned long delta_size)
310
{
311
	void *result;
312
	unsigned long result_size;
313

314
	result = patch_delta(base, base_size,
315
			     delta, delta_size,
316
			     &result_size);
317
	if (!result)
318
		die("failed to apply delta");
319
	free(delta);
320
	write_object(nr, type, result, result_size);
321
}
322

323
/*
324
 * We now know the contents of an object (which is nr-th in the pack);
325
 * resolve all the deltified objects that are based on it.
326
 */
327
static void added_object(unsigned nr, enum object_type type,
328
			 void *data, unsigned long size)
329
{
330
	struct delta_info **p = &delta_list;
331
	struct delta_info *info;
332

333
	while ((info = *p) != NULL) {
334
		if (oideq(&info->base_oid, &obj_list[nr].oid) ||
335
		    info->base_offset == obj_list[nr].offset) {
336
			*p = info->next;
337
			p = &delta_list;
338
			resolve_delta(info->nr, type, data, size,
339
				      info->delta, info->size);
340
			free(info);
341
			continue;
342
		}
343
		p = &info->next;
344
	}
345
}
346

347
static void unpack_non_delta_entry(enum object_type type, unsigned long size,
348
				   unsigned nr)
349
{
350
	void *buf = get_data(size);
351

352
	if (buf)
353
		write_object(nr, type, buf, size);
354
}
355

356
struct input_zstream_data {
357
	git_zstream *zstream;
358
	unsigned char buf[8192];
359
	int status;
360
};
361

362
static const void *feed_input_zstream(struct input_stream *in_stream,
363
				      unsigned long *readlen)
364
{
365
	struct input_zstream_data *data = in_stream->data;
366
	git_zstream *zstream = data->zstream;
367
	void *in = fill(1);
368

369
	if (in_stream->is_finished) {
370
		*readlen = 0;
371
		return NULL;
372
	}
373

374
	zstream->next_out = data->buf;
375
	zstream->avail_out = sizeof(data->buf);
376
	zstream->next_in = in;
377
	zstream->avail_in = len;
378

379
	data->status = git_inflate(zstream, 0);
380

381
	in_stream->is_finished = data->status != Z_OK;
382
	use(len - zstream->avail_in);
383
	*readlen = sizeof(data->buf) - zstream->avail_out;
384

385
	return data->buf;
386
}
387

388
static void stream_blob(unsigned long size, unsigned nr)
389
{
390
	git_zstream zstream = { 0 };
391
	struct input_zstream_data data = { 0 };
392
	struct input_stream in_stream = {
393
		.read = feed_input_zstream,
394
		.data = &data,
395
	};
396
	struct obj_info *info = &obj_list[nr];
397

398
	data.zstream = &zstream;
399
	git_inflate_init(&zstream);
400

401
	if (stream_loose_object(&in_stream, size, &info->oid))
402
		die(_("failed to write object in stream"));
403

404
	if (data.status != Z_STREAM_END)
405
		die(_("inflate returned (%d)"), data.status);
406
	git_inflate_end(&zstream);
407

408
	if (strict) {
409
		struct blob *blob = lookup_blob(the_repository, &info->oid);
410

411
		if (!blob)
412
			die(_("invalid blob object from stream"));
413
		blob->object.flags |= FLAG_WRITTEN;
414
	}
415
	info->obj = NULL;
416
}
417

418
static int resolve_against_held(unsigned nr, const struct object_id *base,
419
				void *delta_data, unsigned long delta_size)
420
{
421
	struct object *obj;
422
	struct obj_buffer *obj_buffer;
423
	obj = lookup_object(the_repository, base);
424
	if (!obj)
425
		return 0;
426
	obj_buffer = lookup_object_buffer(obj);
427
	if (!obj_buffer)
428
		return 0;
429
	resolve_delta(nr, obj->type, obj_buffer->buffer,
430
		      obj_buffer->size, delta_data, delta_size);
431
	return 1;
432
}
433

434
static void unpack_delta_entry(enum object_type type, unsigned long delta_size,
435
			       unsigned nr)
436
{
437
	void *delta_data, *base;
438
	unsigned long base_size;
439
	struct object_id base_oid;
440

441
	if (type == OBJ_REF_DELTA) {
442
		oidread(&base_oid, fill(the_hash_algo->rawsz), the_repository->hash_algo);
443
		use(the_hash_algo->rawsz);
444
		delta_data = get_data(delta_size);
445
		if (!delta_data)
446
			return;
447
		if (repo_has_object_file(the_repository, &base_oid))
448
			; /* Ok we have this one */
449
		else if (resolve_against_held(nr, &base_oid,
450
					      delta_data, delta_size))
451
			return; /* we are done */
452
		else {
453
			/* cannot resolve yet --- queue it */
454
			oidclr(&obj_list[nr].oid, the_repository->hash_algo);
455
			add_delta_to_list(nr, &base_oid, 0, delta_data, delta_size);
456
			return;
457
		}
458
	} else {
459
		unsigned base_found = 0;
460
		unsigned char *pack, c;
461
		off_t base_offset;
462
		unsigned lo, mid, hi;
463

464
		pack = fill(1);
465
		c = *pack;
466
		use(1);
467
		base_offset = c & 127;
468
		while (c & 128) {
469
			base_offset += 1;
470
			if (!base_offset || MSB(base_offset, 7))
471
				die("offset value overflow for delta base object");
472
			pack = fill(1);
473
			c = *pack;
474
			use(1);
475
			base_offset = (base_offset << 7) + (c & 127);
476
		}
477
		base_offset = obj_list[nr].offset - base_offset;
478
		if (base_offset <= 0 || base_offset >= obj_list[nr].offset)
479
			die("offset value out of bound for delta base object");
480

481
		delta_data = get_data(delta_size);
482
		if (!delta_data)
483
			return;
484
		lo = 0;
485
		hi = nr;
486
		while (lo < hi) {
487
			mid = lo + (hi - lo) / 2;
488
			if (base_offset < obj_list[mid].offset) {
489
				hi = mid;
490
			} else if (base_offset > obj_list[mid].offset) {
491
				lo = mid + 1;
492
			} else {
493
				oidcpy(&base_oid, &obj_list[mid].oid);
494
				base_found = !is_null_oid(&base_oid);
495
				break;
496
			}
497
		}
498
		if (!base_found) {
499
			/*
500
			 * The delta base object is itself a delta that
501
			 * has not been resolved yet.
502
			 */
503
			oidclr(&obj_list[nr].oid, the_repository->hash_algo);
504
			add_delta_to_list(nr, null_oid(), base_offset,
505
					  delta_data, delta_size);
506
			return;
507
		}
508
	}
509

510
	if (resolve_against_held(nr, &base_oid, delta_data, delta_size))
511
		return;
512

513
	base = repo_read_object_file(the_repository, &base_oid, &type,
514
				     &base_size);
515
	if (!base) {
516
		error("failed to read delta-pack base object %s",
517
		      oid_to_hex(&base_oid));
518
		if (!recover)
519
			exit(1);
520
		has_errors = 1;
521
		return;
522
	}
523
	resolve_delta(nr, type, base, base_size, delta_data, delta_size);
524
	free(base);
525
}
526

527
static void unpack_one(unsigned nr)
528
{
529
	unsigned shift;
530
	unsigned char *pack;
531
	unsigned long size, c;
532
	enum object_type type;
533

534
	obj_list[nr].offset = consumed_bytes;
535

536
	pack = fill(1);
537
	c = *pack;
538
	use(1);
539
	type = (c >> 4) & 7;
540
	size = (c & 15);
541
	shift = 4;
542
	while (c & 0x80) {
543
		pack = fill(1);
544
		c = *pack;
545
		use(1);
546
		size += (c & 0x7f) << shift;
547
		shift += 7;
548
	}
549

550
	switch (type) {
551
	case OBJ_BLOB:
552
		if (!dry_run && size > big_file_threshold) {
553
			stream_blob(size, nr);
554
			return;
555
		}
556
		/* fallthrough */
557
	case OBJ_COMMIT:
558
	case OBJ_TREE:
559
	case OBJ_TAG:
560
		unpack_non_delta_entry(type, size, nr);
561
		return;
562
	case OBJ_REF_DELTA:
563
	case OBJ_OFS_DELTA:
564
		unpack_delta_entry(type, size, nr);
565
		return;
566
	default:
567
		error("bad object type %d", type);
568
		has_errors = 1;
569
		if (recover)
570
			return;
571
		exit(1);
572
	}
573
}
574

575
static void unpack_all(void)
576
{
577
	int i;
578
	struct pack_header *hdr = fill(sizeof(struct pack_header));
579

580
	nr_objects = ntohl(hdr->hdr_entries);
581

582
	if (ntohl(hdr->hdr_signature) != PACK_SIGNATURE)
583
		die("bad pack file");
584
	if (!pack_version_ok(hdr->hdr_version))
585
		die("unknown pack file version %"PRIu32,
586
			ntohl(hdr->hdr_version));
587
	use(sizeof(struct pack_header));
588

589
	if (!quiet)
590
		progress = start_progress(_("Unpacking objects"), nr_objects);
591
	CALLOC_ARRAY(obj_list, nr_objects);
592
	begin_odb_transaction();
593
	for (i = 0; i < nr_objects; i++) {
594
		unpack_one(i);
595
		display_progress(progress, i + 1);
596
	}
597
	end_odb_transaction();
598
	stop_progress(&progress);
599

600
	if (delta_list)
601
		die("unresolved deltas left after unpacking");
602
}
603

604
int cmd_unpack_objects(int argc, const char **argv, const char *prefix UNUSED)
605
{
606
	int i;
607
	struct object_id oid;
608
	git_hash_ctx tmp_ctx;
609

610
	disable_replace_refs();
611

612
	git_config(git_default_config, NULL);
613

614
	quiet = !isatty(2);
615

616
	for (i = 1 ; i < argc; i++) {
617
		const char *arg = argv[i];
618

619
		if (*arg == '-') {
620
			if (!strcmp(arg, "-n")) {
621
				dry_run = 1;
622
				continue;
623
			}
624
			if (!strcmp(arg, "-q")) {
625
				quiet = 1;
626
				continue;
627
			}
628
			if (!strcmp(arg, "-r")) {
629
				recover = 1;
630
				continue;
631
			}
632
			if (!strcmp(arg, "--strict")) {
633
				strict = 1;
634
				continue;
635
			}
636
			if (skip_prefix(arg, "--strict=", &arg)) {
637
				strict = 1;
638
				fsck_set_msg_types(&fsck_options, arg);
639
				continue;
640
			}
641
			if (starts_with(arg, "--pack_header=")) {
642
				struct pack_header *hdr;
643
				char *c;
644

645
				hdr = (struct pack_header *)buffer;
646
				hdr->hdr_signature = htonl(PACK_SIGNATURE);
647
				hdr->hdr_version = htonl(strtoul(arg + 14, &c, 10));
648
				if (*c != ',')
649
					die("bad %s", arg);
650
				hdr->hdr_entries = htonl(strtoul(c + 1, &c, 10));
651
				if (*c)
652
					die("bad %s", arg);
653
				len = sizeof(*hdr);
654
				continue;
655
			}
656
			if (skip_prefix(arg, "--max-input-size=", &arg)) {
657
				max_input_size = strtoumax(arg, NULL, 10);
658
				continue;
659
			}
660
			usage(unpack_usage);
661
		}
662

663
		/* We don't take any non-flag arguments now.. Maybe some day */
664
		usage(unpack_usage);
665
	}
666
	the_hash_algo->init_fn(&ctx);
667
	unpack_all();
668
	the_hash_algo->update_fn(&ctx, buffer, offset);
669
	the_hash_algo->init_fn(&tmp_ctx);
670
	the_hash_algo->clone_fn(&tmp_ctx, &ctx);
671
	the_hash_algo->final_oid_fn(&oid, &tmp_ctx);
672
	if (strict) {
673
		write_rest();
674
		if (fsck_finish(&fsck_options))
675
			die(_("fsck error in pack objects"));
676
	}
677
	if (!hasheq(fill(the_hash_algo->rawsz), oid.hash,
678
		    the_repository->hash_algo))
679
		die("final sha1 did not match");
680
	use(the_hash_algo->rawsz);
681

682
	/* Write the last part of the buffer to stdout */
683
	write_in_full(1, buffer + offset, len);
684

685
	/* All done */
686
	return has_errors;
687
}
688
git

Использование cookies