git

fast-export.c
1308 строк · 34.0 Кб
Перенос по словам
1
/*
2
 * "git fast-export" builtin command
3
 *
4
 * Copyright (C) 2007 Johannes E. Schindelin
5
 */
6
#include "builtin.h"
7
#include "config.h"
8
#include "gettext.h"
9
#include "hex.h"
10
#include "refs.h"
11
#include "refspec.h"
12
#include "object-file.h"
13
#include "object-store-ll.h"
14
#include "commit.h"
15
#include "object.h"
16
#include "tag.h"
17
#include "diff.h"
18
#include "diffcore.h"
19
#include "log-tree.h"
20
#include "revision.h"
21
#include "decorate.h"
22
#include "string-list.h"
23
#include "utf8.h"
24
#include "parse-options.h"
25
#include "quote.h"
26
#include "remote.h"
27
#include "blob.h"
28

29
static const char *fast_export_usage[] = {
30
	N_("git fast-export [<rev-list-opts>]"),
31
	NULL
32
};
33

34
static int progress;
35
static enum signed_tag_mode { SIGNED_TAG_ABORT, VERBATIM, WARN, WARN_STRIP, STRIP } signed_tag_mode = SIGNED_TAG_ABORT;
36
static enum tag_of_filtered_mode { TAG_FILTERING_ABORT, DROP, REWRITE } tag_of_filtered_mode = TAG_FILTERING_ABORT;
37
static enum reencode_mode { REENCODE_ABORT, REENCODE_YES, REENCODE_NO } reencode_mode = REENCODE_ABORT;
38
static int fake_missing_tagger;
39
static int use_done_feature;
40
static int no_data;
41
static int full_tree;
42
static int reference_excluded_commits;
43
static int show_original_ids;
44
static int mark_tags;
45
static struct string_list extra_refs = STRING_LIST_INIT_DUP;
46
static struct string_list tag_refs = STRING_LIST_INIT_DUP;
47
static struct refspec refspecs = REFSPEC_INIT_FETCH;
48
static int anonymize;
49
static struct hashmap anonymized_seeds;
50
static struct revision_sources revision_sources;
51

52
static int parse_opt_signed_tag_mode(const struct option *opt,
53
				     const char *arg, int unset)
54
{
55
	enum signed_tag_mode *val = opt->value;
56

57
	if (unset || !strcmp(arg, "abort"))
58
		*val = SIGNED_TAG_ABORT;
59
	else if (!strcmp(arg, "verbatim") || !strcmp(arg, "ignore"))
60
		*val = VERBATIM;
61
	else if (!strcmp(arg, "warn"))
62
		*val = WARN;
63
	else if (!strcmp(arg, "warn-strip"))
64
		*val = WARN_STRIP;
65
	else if (!strcmp(arg, "strip"))
66
		*val = STRIP;
67
	else
68
		return error("Unknown signed-tags mode: %s", arg);
69
	return 0;
70
}
71

72
static int parse_opt_tag_of_filtered_mode(const struct option *opt,
73
					  const char *arg, int unset)
74
{
75
	enum tag_of_filtered_mode *val = opt->value;
76

77
	if (unset || !strcmp(arg, "abort"))
78
		*val = TAG_FILTERING_ABORT;
79
	else if (!strcmp(arg, "drop"))
80
		*val = DROP;
81
	else if (!strcmp(arg, "rewrite"))
82
		*val = REWRITE;
83
	else
84
		return error("Unknown tag-of-filtered mode: %s", arg);
85
	return 0;
86
}
87

88
static int parse_opt_reencode_mode(const struct option *opt,
89
				   const char *arg, int unset)
90
{
91
	enum reencode_mode *val = opt->value;
92

93
	if (unset) {
94
		*val = REENCODE_ABORT;
95
		return 0;
96
	}
97

98
	switch (git_parse_maybe_bool(arg)) {
99
	case 0:
100
		*val = REENCODE_NO;
101
		break;
102
	case 1:
103
		*val = REENCODE_YES;
104
		break;
105
	default:
106
		if (!strcasecmp(arg, "abort"))
107
			*val = REENCODE_ABORT;
108
		else
109
			return error("Unknown reencoding mode: %s", arg);
110
	}
111

112
	return 0;
113
}
114

115
static struct decoration idnums;
116
static uint32_t last_idnum;
117
struct anonymized_entry {
118
	struct hashmap_entry hash;
119
	char *anon;
120
	const char orig[FLEX_ARRAY];
121
};
122

123
struct anonymized_entry_key {
124
	struct hashmap_entry hash;
125
	const char *orig;
126
	size_t orig_len;
127
};
128

129
static int anonymized_entry_cmp(const void *cmp_data UNUSED,
130
				const struct hashmap_entry *eptr,
131
				const struct hashmap_entry *entry_or_key,
132
				const void *keydata)
133
{
134
	const struct anonymized_entry *a, *b;
135

136
	a = container_of(eptr, const struct anonymized_entry, hash);
137
	if (keydata) {
138
		const struct anonymized_entry_key *key = keydata;
139
		int equal = !xstrncmpz(a->orig, key->orig, key->orig_len);
140
		return !equal;
141
	}
142

143
	b = container_of(entry_or_key, const struct anonymized_entry, hash);
144
	return strcmp(a->orig, b->orig);
145
}
146

147
static struct anonymized_entry *add_anonymized_entry(struct hashmap *map,
148
						     unsigned hash,
149
						     const char *orig, size_t len,
150
						     char *anon)
151
{
152
	struct anonymized_entry *ret, *old;
153

154
	if (!map->cmpfn)
155
		hashmap_init(map, anonymized_entry_cmp, NULL, 0);
156

157
	FLEX_ALLOC_MEM(ret, orig, orig, len);
158
	hashmap_entry_init(&ret->hash, hash);
159
	ret->anon = anon;
160
	old = hashmap_put_entry(map, ret, hash);
161

162
	if (old) {
163
		free(old->anon);
164
		free(old);
165
	}
166

167
	return ret;
168
}
169

170
/*
171
 * Basically keep a cache of X->Y so that we can repeatedly replace
172
 * the same anonymized string with another. The actual generation
173
 * is farmed out to the generate function.
174
 */
175
static const char *anonymize_str(struct hashmap *map,
176
				 char *(*generate)(void),
177
				 const char *orig, size_t len)
178
{
179
	struct anonymized_entry_key key;
180
	struct anonymized_entry *ret;
181

182
	hashmap_entry_init(&key.hash, memhash(orig, len));
183
	key.orig = orig;
184
	key.orig_len = len;
185

186
	/* First check if it's a token the user configured manually... */
187
	ret = hashmap_get_entry(&anonymized_seeds, &key, hash, &key);
188

189
	/* ...otherwise check if we've already seen it in this context... */
190
	if (!ret)
191
		ret = hashmap_get_entry(map, &key, hash, &key);
192

193
	/* ...and finally generate a new mapping if necessary */
194
	if (!ret)
195
		ret = add_anonymized_entry(map, key.hash.hash,
196
					   orig, len, generate());
197

198
	return ret->anon;
199
}
200

201
/*
202
 * We anonymize each component of a path individually,
203
 * so that paths a/b and a/c will share a common root.
204
 * The paths are cached via anonymize_mem so that repeated
205
 * lookups for "a" will yield the same value.
206
 */
207
static void anonymize_path(struct strbuf *out, const char *path,
208
			   struct hashmap *map,
209
			   char *(*generate)(void))
210
{
211
	while (*path) {
212
		const char *end_of_component = strchrnul(path, '/');
213
		size_t len = end_of_component - path;
214
		const char *c = anonymize_str(map, generate, path, len);
215
		strbuf_addstr(out, c);
216
		path = end_of_component;
217
		if (*path)
218
			strbuf_addch(out, *path++);
219
	}
220
}
221

222
static inline void *mark_to_ptr(uint32_t mark)
223
{
224
	return (void *)(uintptr_t)mark;
225
}
226

227
static inline uint32_t ptr_to_mark(void * mark)
228
{
229
	return (uint32_t)(uintptr_t)mark;
230
}
231

232
static inline void mark_object(struct object *object, uint32_t mark)
233
{
234
	add_decoration(&idnums, object, mark_to_ptr(mark));
235
}
236

237
static inline void mark_next_object(struct object *object)
238
{
239
	mark_object(object, ++last_idnum);
240
}
241

242
static int get_object_mark(struct object *object)
243
{
244
	void *decoration = lookup_decoration(&idnums, object);
245
	if (!decoration)
246
		return 0;
247
	return ptr_to_mark(decoration);
248
}
249

250
static struct commit *rewrite_commit(struct commit *p)
251
{
252
	for (;;) {
253
		if (p->parents && p->parents->next)
254
			break;
255
		if (p->object.flags & UNINTERESTING)
256
			break;
257
		if (!(p->object.flags & TREESAME))
258
			break;
259
		if (!p->parents)
260
			return NULL;
261
		p = p->parents->item;
262
	}
263
	return p;
264
}
265

266
static void show_progress(void)
267
{
268
	static int counter = 0;
269
	if (!progress)
270
		return;
271
	if ((++counter % progress) == 0)
272
		printf("progress %d objects\n", counter);
273
}
274

275
/*
276
 * Ideally we would want some transformation of the blob data here
277
 * that is unreversible, but would still be the same size and have
278
 * the same data relationship to other blobs (so that we get the same
279
 * delta and packing behavior as the original). But the first and last
280
 * requirements there are probably mutually exclusive, so let's take
281
 * the easy way out for now, and just generate arbitrary content.
282
 *
283
 * There's no need to cache this result with anonymize_mem, since
284
 * we already handle blob content caching with marks.
285
 */
286
static char *anonymize_blob(unsigned long *size)
287
{
288
	static int counter;
289
	struct strbuf out = STRBUF_INIT;
290
	strbuf_addf(&out, "anonymous blob %d", counter++);
291
	*size = out.len;
292
	return strbuf_detach(&out, NULL);
293
}
294

295
static void export_blob(const struct object_id *oid)
296
{
297
	unsigned long size;
298
	enum object_type type;
299
	char *buf;
300
	struct object *object;
301
	int eaten;
302

303
	if (no_data)
304
		return;
305

306
	if (is_null_oid(oid))
307
		return;
308

309
	object = lookup_object(the_repository, oid);
310
	if (object && object->flags & SHOWN)
311
		return;
312

313
	if (anonymize) {
314
		buf = anonymize_blob(&size);
315
		object = (struct object *)lookup_blob(the_repository, oid);
316
		eaten = 0;
317
	} else {
318
		buf = repo_read_object_file(the_repository, oid, &type, &size);
319
		if (!buf)
320
			die("could not read blob %s", oid_to_hex(oid));
321
		if (check_object_signature(the_repository, oid, buf, size,
322
					   type) < 0)
323
			die("oid mismatch in blob %s", oid_to_hex(oid));
324
		object = parse_object_buffer(the_repository, oid, type,
325
					     size, buf, &eaten);
326
	}
327

328
	if (!object)
329
		die("Could not read blob %s", oid_to_hex(oid));
330

331
	mark_next_object(object);
332

333
	printf("blob\nmark :%"PRIu32"\n", last_idnum);
334
	if (show_original_ids)
335
		printf("original-oid %s\n", oid_to_hex(oid));
336
	printf("data %"PRIuMAX"\n", (uintmax_t)size);
337
	if (size && fwrite(buf, size, 1, stdout) != 1)
338
		die_errno("could not write blob '%s'", oid_to_hex(oid));
339
	printf("\n");
340

341
	show_progress();
342

343
	object->flags |= SHOWN;
344
	if (!eaten)
345
		free(buf);
346
}
347

348
static int depth_first(const void *a_, const void *b_)
349
{
350
	const struct diff_filepair *a = *((const struct diff_filepair **)a_);
351
	const struct diff_filepair *b = *((const struct diff_filepair **)b_);
352
	const char *name_a, *name_b;
353
	int len_a, len_b, len;
354
	int cmp;
355

356
	name_a = a->one ? a->one->path : a->two->path;
357
	name_b = b->one ? b->one->path : b->two->path;
358

359
	len_a = strlen(name_a);
360
	len_b = strlen(name_b);
361
	len = (len_a < len_b) ? len_a : len_b;
362

363
	/* strcmp will sort 'd' before 'd/e', we want 'd/e' before 'd' */
364
	cmp = memcmp(name_a, name_b, len);
365
	if (cmp)
366
		return cmp;
367
	cmp = len_b - len_a;
368
	if (cmp)
369
		return cmp;
370
	/*
371
	 * Move 'R'ename entries last so that all references of the file
372
	 * appear in the output before it is renamed (e.g., when a file
373
	 * was copied and renamed in the same commit).
374
	 */
375
	return (a->status == 'R') - (b->status == 'R');
376
}
377

378
static void print_path_1(const char *path)
379
{
380
	int need_quote = quote_c_style(path, NULL, NULL, 0);
381
	if (need_quote)
382
		quote_c_style(path, NULL, stdout, 0);
383
	else if (strchr(path, ' '))
384
		printf("\"%s\"", path);
385
	else
386
		printf("%s", path);
387
}
388

389
static char *anonymize_path_component(void)
390
{
391
	static int counter;
392
	struct strbuf out = STRBUF_INIT;
393
	strbuf_addf(&out, "path%d", counter++);
394
	return strbuf_detach(&out, NULL);
395
}
396

397
static void print_path(const char *path)
398
{
399
	if (!anonymize)
400
		print_path_1(path);
401
	else {
402
		static struct hashmap paths;
403
		static struct strbuf anon = STRBUF_INIT;
404

405
		anonymize_path(&anon, path, &paths, anonymize_path_component);
406
		print_path_1(anon.buf);
407
		strbuf_reset(&anon);
408
	}
409
}
410

411
static char *generate_fake_oid(void)
412
{
413
	static uint32_t counter = 1; /* avoid null oid */
414
	const unsigned hashsz = the_hash_algo->rawsz;
415
	struct object_id oid;
416
	char *hex = xmallocz(GIT_MAX_HEXSZ);
417

418
	oidclr(&oid, the_repository->hash_algo);
419
	put_be32(oid.hash + hashsz - 4, counter++);
420
	return oid_to_hex_r(hex, &oid);
421
}
422

423
static const char *anonymize_oid(const char *oid_hex)
424
{
425
	static struct hashmap objs;
426
	size_t len = strlen(oid_hex);
427
	return anonymize_str(&objs, generate_fake_oid, oid_hex, len);
428
}
429

430
static void show_filemodify(struct diff_queue_struct *q,
431
			    struct diff_options *options UNUSED, void *data)
432
{
433
	int i;
434
	struct string_list *changed = data;
435

436
	/*
437
	 * Handle files below a directory first, in case they are all deleted
438
	 * and the directory changes to a file or symlink.
439
	 */
440
	QSORT(q->queue, q->nr, depth_first);
441

442
	for (i = 0; i < q->nr; i++) {
443
		struct diff_filespec *ospec = q->queue[i]->one;
444
		struct diff_filespec *spec = q->queue[i]->two;
445

446
		switch (q->queue[i]->status) {
447
		case DIFF_STATUS_DELETED:
448
			printf("D ");
449
			print_path(spec->path);
450
			string_list_insert(changed, spec->path);
451
			putchar('\n');
452
			break;
453

454
		case DIFF_STATUS_COPIED:
455
		case DIFF_STATUS_RENAMED:
456
			/*
457
			 * If a change in the file corresponding to ospec->path
458
			 * has been observed, we cannot trust its contents
459
			 * because the diff is calculated based on the prior
460
			 * contents, not the current contents.  So, declare a
461
			 * copy or rename only if there was no change observed.
462
			 */
463
			if (!string_list_has_string(changed, ospec->path)) {
464
				printf("%c ", q->queue[i]->status);
465
				print_path(ospec->path);
466
				putchar(' ');
467
				print_path(spec->path);
468
				string_list_insert(changed, spec->path);
469
				putchar('\n');
470

471
				if (oideq(&ospec->oid, &spec->oid) &&
472
				    ospec->mode == spec->mode)
473
					break;
474
			}
475
			/* fallthrough */
476

477
		case DIFF_STATUS_TYPE_CHANGED:
478
		case DIFF_STATUS_MODIFIED:
479
		case DIFF_STATUS_ADDED:
480
			/*
481
			 * Links refer to objects in another repositories;
482
			 * output the SHA-1 verbatim.
483
			 */
484
			if (no_data || S_ISGITLINK(spec->mode))
485
				printf("M %06o %s ", spec->mode,
486
				       anonymize ?
487
				       anonymize_oid(oid_to_hex(&spec->oid)) :
488
				       oid_to_hex(&spec->oid));
489
			else {
490
				struct object *object = lookup_object(the_repository,
491
								      &spec->oid);
492
				printf("M %06o :%d ", spec->mode,
493
				       get_object_mark(object));
494
			}
495
			print_path(spec->path);
496
			string_list_insert(changed, spec->path);
497
			putchar('\n');
498
			break;
499

500
		default:
501
			die("Unexpected comparison status '%c' for %s, %s",
502
				q->queue[i]->status,
503
				ospec->path ? ospec->path : "none",
504
				spec->path ? spec->path : "none");
505
		}
506
	}
507
}
508

509
static const char *find_encoding(const char *begin, const char *end)
510
{
511
	const char *needle = "\nencoding ";
512
	char *bol, *eol;
513

514
	bol = memmem(begin, end ? end - begin : strlen(begin),
515
		     needle, strlen(needle));
516
	if (!bol)
517
		return NULL;
518
	bol += strlen(needle);
519
	eol = strchrnul(bol, '\n');
520
	*eol = '\0';
521
	return bol;
522
}
523

524
static char *anonymize_ref_component(void)
525
{
526
	static int counter;
527
	struct strbuf out = STRBUF_INIT;
528
	strbuf_addf(&out, "ref%d", counter++);
529
	return strbuf_detach(&out, NULL);
530
}
531

532
static const char *anonymize_refname(const char *refname)
533
{
534
	/*
535
	 * If any of these prefixes is found, we will leave it intact
536
	 * so that tags remain tags and so forth.
537
	 */
538
	static const char *prefixes[] = {
539
		"refs/heads/",
540
		"refs/tags/",
541
		"refs/remotes/",
542
		"refs/"
543
	};
544
	static struct hashmap refs;
545
	static struct strbuf anon = STRBUF_INIT;
546
	int i;
547

548
	strbuf_reset(&anon);
549
	for (i = 0; i < ARRAY_SIZE(prefixes); i++) {
550
		if (skip_prefix(refname, prefixes[i], &refname)) {
551
			strbuf_addstr(&anon, prefixes[i]);
552
			break;
553
		}
554
	}
555

556
	anonymize_path(&anon, refname, &refs, anonymize_ref_component);
557
	return anon.buf;
558
}
559

560
/*
561
 * We do not even bother to cache commit messages, as they are unlikely
562
 * to be repeated verbatim, and it is not that interesting when they are.
563
 */
564
static char *anonymize_commit_message(void)
565
{
566
	static int counter;
567
	return xstrfmt("subject %d\n\nbody\n", counter++);
568
}
569

570
static char *anonymize_ident(void)
571
{
572
	static int counter;
573
	struct strbuf out = STRBUF_INIT;
574
	strbuf_addf(&out, "User %d <user%d@example.com>", counter, counter);
575
	counter++;
576
	return strbuf_detach(&out, NULL);
577
}
578

579
/*
580
 * Our strategy here is to anonymize the names and email addresses,
581
 * but keep timestamps intact, as they influence things like traversal
582
 * order (and by themselves should not be too revealing).
583
 */
584
static void anonymize_ident_line(const char **beg, const char **end)
585
{
586
	static struct hashmap idents;
587
	static struct strbuf buffers[] = { STRBUF_INIT, STRBUF_INIT };
588
	static unsigned which_buffer;
589

590
	struct strbuf *out;
591
	struct ident_split split;
592
	const char *end_of_header;
593

594
	out = &buffers[which_buffer++];
595
	which_buffer %= ARRAY_SIZE(buffers);
596
	strbuf_reset(out);
597

598
	/* skip "committer", "author", "tagger", etc */
599
	end_of_header = strchr(*beg, ' ');
600
	if (!end_of_header)
601
		BUG("malformed line fed to anonymize_ident_line: %.*s",
602
		    (int)(*end - *beg), *beg);
603
	end_of_header++;
604
	strbuf_add(out, *beg, end_of_header - *beg);
605

606
	if (!split_ident_line(&split, end_of_header, *end - end_of_header) &&
607
	    split.date_begin) {
608
		const char *ident;
609
		size_t len;
610

611
		len = split.mail_end - split.name_begin;
612
		ident = anonymize_str(&idents, anonymize_ident,
613
				      split.name_begin, len);
614
		strbuf_addstr(out, ident);
615
		strbuf_addch(out, ' ');
616
		strbuf_add(out, split.date_begin, split.tz_end - split.date_begin);
617
	} else {
618
		strbuf_addstr(out, "Malformed Ident <malformed@example.com> 0 -0000");
619
	}
620

621
	*beg = out->buf;
622
	*end = out->buf + out->len;
623
}
624

625
static void handle_commit(struct commit *commit, struct rev_info *rev,
626
			  struct string_list *paths_of_changed_objects)
627
{
628
	int saved_output_format = rev->diffopt.output_format;
629
	const char *commit_buffer;
630
	const char *author, *author_end, *committer, *committer_end;
631
	const char *encoding, *message;
632
	char *reencoded = NULL;
633
	struct commit_list *p;
634
	const char *refname;
635
	int i;
636

637
	rev->diffopt.output_format = DIFF_FORMAT_CALLBACK;
638

639
	parse_commit_or_die(commit);
640
	commit_buffer = repo_get_commit_buffer(the_repository, commit, NULL);
641
	author = strstr(commit_buffer, "\nauthor ");
642
	if (!author)
643
		die("could not find author in commit %s",
644
		    oid_to_hex(&commit->object.oid));
645
	author++;
646
	author_end = strchrnul(author, '\n');
647
	committer = strstr(author_end, "\ncommitter ");
648
	if (!committer)
649
		die("could not find committer in commit %s",
650
		    oid_to_hex(&commit->object.oid));
651
	committer++;
652
	committer_end = strchrnul(committer, '\n');
653
	message = strstr(committer_end, "\n\n");
654
	encoding = find_encoding(committer_end, message);
655
	if (message)
656
		message += 2;
657

658
	if (commit->parents &&
659
	    (get_object_mark(&commit->parents->item->object) != 0 ||
660
	     reference_excluded_commits) &&
661
	    !full_tree) {
662
		parse_commit_or_die(commit->parents->item);
663
		diff_tree_oid(get_commit_tree_oid(commit->parents->item),
664
			      get_commit_tree_oid(commit), "", &rev->diffopt);
665
	}
666
	else
667
		diff_root_tree_oid(get_commit_tree_oid(commit),
668
				   "", &rev->diffopt);
669

670
	/* Export the referenced blobs, and remember the marks. */
671
	for (i = 0; i < diff_queued_diff.nr; i++)
672
		if (!S_ISGITLINK(diff_queued_diff.queue[i]->two->mode))
673
			export_blob(&diff_queued_diff.queue[i]->two->oid);
674

675
	refname = *revision_sources_at(&revision_sources, commit);
676
	/*
677
	 * FIXME: string_list_remove() below for each ref is overall
678
	 * O(N^2).  Compared to a history walk and diffing trees, this is
679
	 * just lost in the noise in practice.  However, theoretically a
680
	 * repo may have enough refs for this to become slow.
681
	 */
682
	string_list_remove(&extra_refs, refname, 0);
683
	if (anonymize) {
684
		refname = anonymize_refname(refname);
685
		anonymize_ident_line(&committer, &committer_end);
686
		anonymize_ident_line(&author, &author_end);
687
	}
688

689
	mark_next_object(&commit->object);
690
	if (anonymize) {
691
		reencoded = anonymize_commit_message();
692
	} else if (encoding) {
693
		switch(reencode_mode) {
694
		case REENCODE_YES:
695
			reencoded = reencode_string(message, "UTF-8", encoding);
696
			break;
697
		case REENCODE_NO:
698
			break;
699
		case REENCODE_ABORT:
700
			die("Encountered commit-specific encoding %s in commit "
701
			    "%s; use --reencode=[yes|no] to handle it",
702
			    encoding, oid_to_hex(&commit->object.oid));
703
		}
704
	}
705
	if (!commit->parents)
706
		printf("reset %s\n", refname);
707
	printf("commit %s\nmark :%"PRIu32"\n", refname, last_idnum);
708
	if (show_original_ids)
709
		printf("original-oid %s\n", oid_to_hex(&commit->object.oid));
710
	printf("%.*s\n%.*s\n",
711
	       (int)(author_end - author), author,
712
	       (int)(committer_end - committer), committer);
713
	if (!reencoded && encoding)
714
		printf("encoding %s\n", encoding);
715
	printf("data %u\n%s",
716
	       (unsigned)(reencoded
717
			  ? strlen(reencoded) : message
718
			  ? strlen(message) : 0),
719
	       reencoded ? reencoded : message ? message : "");
720
	free(reencoded);
721
	repo_unuse_commit_buffer(the_repository, commit, commit_buffer);
722

723
	for (i = 0, p = commit->parents; p; p = p->next) {
724
		struct object *obj = &p->item->object;
725
		int mark = get_object_mark(obj);
726

727
		if (!mark && !reference_excluded_commits)
728
			continue;
729
		if (i == 0)
730
			printf("from ");
731
		else
732
			printf("merge ");
733
		if (mark)
734
			printf(":%d\n", mark);
735
		else
736
			printf("%s\n",
737
			       anonymize ?
738
			       anonymize_oid(oid_to_hex(&obj->oid)) :
739
			       oid_to_hex(&obj->oid));
740
		i++;
741
	}
742

743
	if (full_tree)
744
		printf("deleteall\n");
745
	log_tree_diff_flush(rev);
746
	string_list_clear(paths_of_changed_objects, 0);
747
	rev->diffopt.output_format = saved_output_format;
748

749
	printf("\n");
750

751
	show_progress();
752
}
753

754
static char *anonymize_tag(void)
755
{
756
	static int counter;
757
	struct strbuf out = STRBUF_INIT;
758
	strbuf_addf(&out, "tag message %d", counter++);
759
	return strbuf_detach(&out, NULL);
760
}
761

762

763
static void handle_tag(const char *name, struct tag *tag)
764
{
765
	unsigned long size;
766
	enum object_type type;
767
	char *buf;
768
	const char *tagger, *tagger_end, *message;
769
	size_t message_size = 0;
770
	struct object *tagged;
771
	int tagged_mark;
772
	struct commit *p;
773

774
	/* Trees have no identifier in fast-export output, thus we have no way
775
	 * to output tags of trees, tags of tags of trees, etc.  Simply omit
776
	 * such tags.
777
	 */
778
	tagged = tag->tagged;
779
	while (tagged->type == OBJ_TAG) {
780
		tagged = ((struct tag *)tagged)->tagged;
781
	}
782
	if (tagged->type == OBJ_TREE) {
783
		warning("Omitting tag %s,\nsince tags of trees (or tags of tags of trees, etc.) are not supported.",
784
			oid_to_hex(&tag->object.oid));
785
		return;
786
	}
787

788
	buf = repo_read_object_file(the_repository, &tag->object.oid, &type,
789
				    &size);
790
	if (!buf)
791
		die("could not read tag %s", oid_to_hex(&tag->object.oid));
792
	message = memmem(buf, size, "\n\n", 2);
793
	if (message) {
794
		message += 2;
795
		message_size = strlen(message);
796
	}
797
	tagger = memmem(buf, message ? message - buf : size, "\ntagger ", 8);
798
	if (!tagger) {
799
		if (fake_missing_tagger)
800
			tagger = "tagger Unspecified Tagger "
801
				"<unspecified-tagger> 0 +0000";
802
		else
803
			tagger = "";
804
		tagger_end = tagger + strlen(tagger);
805
	} else {
806
		tagger++;
807
		tagger_end = strchrnul(tagger, '\n');
808
		if (anonymize)
809
			anonymize_ident_line(&tagger, &tagger_end);
810
	}
811

812
	if (anonymize) {
813
		name = anonymize_refname(name);
814
		if (message) {
815
			static struct hashmap tags;
816
			message = anonymize_str(&tags, anonymize_tag,
817
						message, message_size);
818
			message_size = strlen(message);
819
		}
820
	}
821

822
	/* handle signed tags */
823
	if (message) {
824
		const char *signature = strstr(message,
825
					       "\n-----BEGIN PGP SIGNATURE-----\n");
826
		if (signature)
827
			switch(signed_tag_mode) {
828
			case SIGNED_TAG_ABORT:
829
				die("encountered signed tag %s; use "
830
				    "--signed-tags=<mode> to handle it",
831
				    oid_to_hex(&tag->object.oid));
832
			case WARN:
833
				warning("exporting signed tag %s",
834
					oid_to_hex(&tag->object.oid));
835
				/* fallthru */
836
			case VERBATIM:
837
				break;
838
			case WARN_STRIP:
839
				warning("stripping signature from tag %s",
840
					oid_to_hex(&tag->object.oid));
841
				/* fallthru */
842
			case STRIP:
843
				message_size = signature + 1 - message;
844
				break;
845
			}
846
	}
847

848
	/* handle tag->tagged having been filtered out due to paths specified */
849
	tagged = tag->tagged;
850
	tagged_mark = get_object_mark(tagged);
851
	if (!tagged_mark) {
852
		switch(tag_of_filtered_mode) {
853
		case TAG_FILTERING_ABORT:
854
			die("tag %s tags unexported object; use "
855
			    "--tag-of-filtered-object=<mode> to handle it",
856
			    oid_to_hex(&tag->object.oid));
857
		case DROP:
858
			/* Ignore this tag altogether */
859
			free(buf);
860
			return;
861
		case REWRITE:
862
			if (tagged->type == OBJ_TAG && !mark_tags) {
863
				die(_("Error: Cannot export nested tags unless --mark-tags is specified."));
864
			} else if (tagged->type == OBJ_COMMIT) {
865
				p = rewrite_commit((struct commit *)tagged);
866
				if (!p) {
867
					printf("reset %s\nfrom %s\n\n",
868
					       name, oid_to_hex(null_oid()));
869
					free(buf);
870
					return;
871
				}
872
				tagged_mark = get_object_mark(&p->object);
873
			} else {
874
				/* tagged->type is either OBJ_BLOB or OBJ_TAG */
875
				tagged_mark = get_object_mark(tagged);
876
			}
877
		}
878
	}
879

880
	if (tagged->type == OBJ_TAG) {
881
		printf("reset %s\nfrom %s\n\n",
882
		       name, oid_to_hex(null_oid()));
883
	}
884
	skip_prefix(name, "refs/tags/", &name);
885
	printf("tag %s\n", name);
886
	if (mark_tags) {
887
		mark_next_object(&tag->object);
888
		printf("mark :%"PRIu32"\n", last_idnum);
889
	}
890
	if (tagged_mark)
891
		printf("from :%d\n", tagged_mark);
892
	else
893
		printf("from %s\n", oid_to_hex(&tagged->oid));
894

895
	if (show_original_ids)
896
		printf("original-oid %s\n", oid_to_hex(&tag->object.oid));
897
	printf("%.*s%sdata %d\n%.*s\n",
898
	       (int)(tagger_end - tagger), tagger,
899
	       tagger == tagger_end ? "" : "\n",
900
	       (int)message_size, (int)message_size, message ? message : "");
901
	free(buf);
902
}
903

904
static struct commit *get_commit(struct rev_cmdline_entry *e, const char *full_name)
905
{
906
	switch (e->item->type) {
907
	case OBJ_COMMIT:
908
		return (struct commit *)e->item;
909
	case OBJ_TAG: {
910
		struct tag *tag = (struct tag *)e->item;
911

912
		/* handle nested tags */
913
		while (tag && tag->object.type == OBJ_TAG) {
914
			parse_object(the_repository, &tag->object.oid);
915
			string_list_append(&tag_refs, full_name)->util = tag;
916
			tag = (struct tag *)tag->tagged;
917
		}
918
		if (!tag)
919
			die("Tag %s points nowhere?", e->name);
920
		return (struct commit *)tag;
921
	}
922
	default:
923
		return NULL;
924
	}
925
}
926

927
static void get_tags_and_duplicates(struct rev_cmdline_info *info)
928
{
929
	int i;
930

931
	for (i = 0; i < info->nr; i++) {
932
		struct rev_cmdline_entry *e = info->rev + i;
933
		struct object_id oid;
934
		struct commit *commit;
935
		char *full_name = NULL;
936

937
		if (e->flags & UNINTERESTING)
938
			continue;
939

940
		if (repo_dwim_ref(the_repository, e->name, strlen(e->name),
941
				  &oid, &full_name, 0) != 1) {
942
			free(full_name);
943
			continue;
944
		}
945

946
		if (refspecs.nr) {
947
			char *private;
948
			private = apply_refspecs(&refspecs, full_name);
949
			if (private) {
950
				free(full_name);
951
				full_name = private;
952
			}
953
		}
954

955
		commit = get_commit(e, full_name);
956
		if (!commit) {
957
			warning("%s: Unexpected object of type %s, skipping.",
958
				e->name,
959
				type_name(e->item->type));
960
			free(full_name);
961
			continue;
962
		}
963

964
		switch(commit->object.type) {
965
		case OBJ_COMMIT:
966
			break;
967
		case OBJ_BLOB:
968
			export_blob(&commit->object.oid);
969
			free(full_name);
970
			continue;
971
		default: /* OBJ_TAG (nested tags) is already handled */
972
			warning("Tag points to object of unexpected type %s, skipping.",
973
				type_name(commit->object.type));
974
			free(full_name);
975
			continue;
976
		}
977

978
		/*
979
		 * Make sure this ref gets properly updated eventually, whether
980
		 * through a commit or manually at the end.
981
		 */
982
		if (e->item->type != OBJ_TAG)
983
			string_list_append(&extra_refs, full_name)->util = commit;
984

985
		if (!*revision_sources_at(&revision_sources, commit))
986
			*revision_sources_at(&revision_sources, commit) = full_name;
987
		else
988
			free(full_name);
989
	}
990

991
	string_list_sort(&extra_refs);
992
	string_list_remove_duplicates(&extra_refs, 0);
993
}
994

995
static void handle_tags_and_duplicates(struct string_list *extras)
996
{
997
	struct commit *commit;
998
	int i;
999

1000
	for (i = extras->nr - 1; i >= 0; i--) {
1001
		const char *name = extras->items[i].string;
1002
		struct object *object = extras->items[i].util;
1003
		int mark;
1004

1005
		switch (object->type) {
1006
		case OBJ_TAG:
1007
			handle_tag(name, (struct tag *)object);
1008
			break;
1009
		case OBJ_COMMIT:
1010
			if (anonymize)
1011
				name = anonymize_refname(name);
1012
			/* create refs pointing to already seen commits */
1013
			commit = rewrite_commit((struct commit *)object);
1014
			if (!commit) {
1015
				/*
1016
				 * Neither this object nor any of its
1017
				 * ancestors touch any relevant paths, so
1018
				 * it has been filtered to nothing.  Delete
1019
				 * it.
1020
				 */
1021
				printf("reset %s\nfrom %s\n\n",
1022
				       name, oid_to_hex(null_oid()));
1023
				continue;
1024
			}
1025

1026
			mark = get_object_mark(&commit->object);
1027
			if (!mark) {
1028
				/*
1029
				 * Getting here means we have a commit which
1030
				 * was excluded by a negative refspec (e.g.
1031
				 * fast-export ^HEAD HEAD).  If we are
1032
				 * referencing excluded commits, set the ref
1033
				 * to the exact commit.  Otherwise, the user
1034
				 * wants the branch exported but every commit
1035
				 * in its history to be deleted, which basically
1036
				 * just means deletion of the ref.
1037
				 */
1038
				if (!reference_excluded_commits) {
1039
					/* delete the ref */
1040
					printf("reset %s\nfrom %s\n\n",
1041
					       name, oid_to_hex(null_oid()));
1042
					continue;
1043
				}
1044
				/* set ref to commit using oid, not mark */
1045
				printf("reset %s\nfrom %s\n\n", name,
1046
				       oid_to_hex(&commit->object.oid));
1047
				continue;
1048
			}
1049

1050
			printf("reset %s\nfrom :%d\n\n", name, mark
1051
			       );
1052
			show_progress();
1053
			break;
1054
		}
1055
	}
1056
}
1057

1058
static void export_marks(char *file)
1059
{
1060
	unsigned int i;
1061
	uint32_t mark;
1062
	struct decoration_entry *deco = idnums.entries;
1063
	FILE *f;
1064
	int e = 0;
1065

1066
	f = fopen_for_writing(file);
1067
	if (!f)
1068
		die_errno("Unable to open marks file %s for writing.", file);
1069

1070
	for (i = 0; i < idnums.size; i++) {
1071
		if (deco->base && deco->base->type == 1) {
1072
			mark = ptr_to_mark(deco->decoration);
1073
			if (fprintf(f, ":%"PRIu32" %s\n", mark,
1074
				oid_to_hex(&deco->base->oid)) < 0) {
1075
			    e = 1;
1076
			    break;
1077
			}
1078
		}
1079
		deco++;
1080
	}
1081

1082
	e |= ferror(f);
1083
	e |= fclose(f);
1084
	if (e)
1085
		error("Unable to write marks file %s.", file);
1086
}
1087

1088
static void import_marks(char *input_file, int check_exists)
1089
{
1090
	char line[512];
1091
	FILE *f;
1092
	struct stat sb;
1093

1094
	if (check_exists && stat(input_file, &sb))
1095
		return;
1096

1097
	f = xfopen(input_file, "r");
1098
	while (fgets(line, sizeof(line), f)) {
1099
		uint32_t mark;
1100
		char *line_end, *mark_end;
1101
		struct object_id oid;
1102
		struct object *object;
1103
		struct commit *commit;
1104
		enum object_type type;
1105

1106
		line_end = strchr(line, '\n');
1107
		if (line[0] != ':' || !line_end)
1108
			die("corrupt mark line: %s", line);
1109
		*line_end = '\0';
1110

1111
		mark = strtoumax(line + 1, &mark_end, 10);
1112
		if (!mark || mark_end == line + 1
1113
			|| *mark_end != ' ' || get_oid_hex(mark_end + 1, &oid))
1114
			die("corrupt mark line: %s", line);
1115

1116
		if (last_idnum < mark)
1117
			last_idnum = mark;
1118

1119
		type = oid_object_info(the_repository, &oid, NULL);
1120
		if (type < 0)
1121
			die("object not found: %s", oid_to_hex(&oid));
1122

1123
		if (type != OBJ_COMMIT)
1124
			/* only commits */
1125
			continue;
1126

1127
		commit = lookup_commit(the_repository, &oid);
1128
		if (!commit)
1129
			die("not a commit? can't happen: %s", oid_to_hex(&oid));
1130

1131
		object = &commit->object;
1132

1133
		if (object->flags & SHOWN)
1134
			error("Object %s already has a mark", oid_to_hex(&oid));
1135

1136
		mark_object(object, mark);
1137

1138
		object->flags |= SHOWN;
1139
	}
1140
	fclose(f);
1141
}
1142

1143
static void handle_deletes(void)
1144
{
1145
	int i;
1146
	for (i = 0; i < refspecs.nr; i++) {
1147
		struct refspec_item *refspec = &refspecs.items[i];
1148
		if (*refspec->src)
1149
			continue;
1150

1151
		printf("reset %s\nfrom %s\n\n",
1152
				refspec->dst, oid_to_hex(null_oid()));
1153
	}
1154
}
1155

1156
static int parse_opt_anonymize_map(const struct option *opt,
1157
				   const char *arg, int unset)
1158
{
1159
	struct hashmap *map = opt->value;
1160
	const char *delim, *value;
1161
	size_t keylen;
1162

1163
	BUG_ON_OPT_NEG(unset);
1164

1165
	delim = strchr(arg, ':');
1166
	if (delim) {
1167
		keylen = delim - arg;
1168
		value = delim + 1;
1169
	} else {
1170
		keylen = strlen(arg);
1171
		value = arg;
1172
	}
1173

1174
	if (!keylen || !*value)
1175
		return error(_("--anonymize-map token cannot be empty"));
1176

1177
	add_anonymized_entry(map, memhash(arg, keylen), arg, keylen,
1178
			     xstrdup(value));
1179

1180
	return 0;
1181
}
1182

1183
int cmd_fast_export(int argc, const char **argv, const char *prefix)
1184
{
1185
	struct rev_info revs;
1186
	struct commit *commit;
1187
	char *export_filename = NULL,
1188
	     *import_filename = NULL,
1189
	     *import_filename_if_exists = NULL;
1190
	uint32_t lastimportid;
1191
	struct string_list refspecs_list = STRING_LIST_INIT_NODUP;
1192
	struct string_list paths_of_changed_objects = STRING_LIST_INIT_DUP;
1193
	struct option options[] = {
1194
		OPT_INTEGER(0, "progress", &progress,
1195
			    N_("show progress after <n> objects")),
1196
		OPT_CALLBACK(0, "signed-tags", &signed_tag_mode, N_("mode"),
1197
			     N_("select handling of signed tags"),
1198
			     parse_opt_signed_tag_mode),
1199
		OPT_CALLBACK(0, "tag-of-filtered-object", &tag_of_filtered_mode, N_("mode"),
1200
			     N_("select handling of tags that tag filtered objects"),
1201
			     parse_opt_tag_of_filtered_mode),
1202
		OPT_CALLBACK(0, "reencode", &reencode_mode, N_("mode"),
1203
			     N_("select handling of commit messages in an alternate encoding"),
1204
			     parse_opt_reencode_mode),
1205
		OPT_STRING(0, "export-marks", &export_filename, N_("file"),
1206
			     N_("dump marks to this file")),
1207
		OPT_STRING(0, "import-marks", &import_filename, N_("file"),
1208
			     N_("import marks from this file")),
1209
		OPT_STRING(0, "import-marks-if-exists",
1210
			     &import_filename_if_exists,
1211
			     N_("file"),
1212
			     N_("import marks from this file if it exists")),
1213
		OPT_BOOL(0, "fake-missing-tagger", &fake_missing_tagger,
1214
			 N_("fake a tagger when tags lack one")),
1215
		OPT_BOOL(0, "full-tree", &full_tree,
1216
			 N_("output full tree for each commit")),
1217
		OPT_BOOL(0, "use-done-feature", &use_done_feature,
1218
			     N_("use the done feature to terminate the stream")),
1219
		OPT_BOOL(0, "no-data", &no_data, N_("skip output of blob data")),
1220
		OPT_STRING_LIST(0, "refspec", &refspecs_list, N_("refspec"),
1221
			     N_("apply refspec to exported refs")),
1222
		OPT_BOOL(0, "anonymize", &anonymize, N_("anonymize output")),
1223
		OPT_CALLBACK_F(0, "anonymize-map", &anonymized_seeds, N_("from:to"),
1224
			       N_("convert <from> to <to> in anonymized output"),
1225
			       PARSE_OPT_NONEG, parse_opt_anonymize_map),
1226
		OPT_BOOL(0, "reference-excluded-parents",
1227
			 &reference_excluded_commits, N_("reference parents which are not in fast-export stream by object id")),
1228
		OPT_BOOL(0, "show-original-ids", &show_original_ids,
1229
			    N_("show original object ids of blobs/commits")),
1230
		OPT_BOOL(0, "mark-tags", &mark_tags,
1231
			    N_("label tags with mark ids")),
1232

1233
		OPT_END()
1234
	};
1235

1236
	if (argc == 1)
1237
		usage_with_options (fast_export_usage, options);
1238

1239
	/* we handle encodings */
1240
	git_config(git_default_config, NULL);
1241

1242
	repo_init_revisions(the_repository, &revs, prefix);
1243
	init_revision_sources(&revision_sources);
1244
	revs.topo_order = 1;
1245
	revs.sources = &revision_sources;
1246
	revs.rewrite_parents = 1;
1247
	argc = parse_options(argc, argv, prefix, options, fast_export_usage,
1248
			PARSE_OPT_KEEP_ARGV0 | PARSE_OPT_KEEP_UNKNOWN_OPT);
1249
	argc = setup_revisions(argc, argv, &revs, NULL);
1250
	if (argc > 1)
1251
		usage_with_options (fast_export_usage, options);
1252

1253
	if (anonymized_seeds.cmpfn && !anonymize)
1254
		die(_("the option '%s' requires '%s'"), "--anonymize-map", "--anonymize");
1255

1256
	if (refspecs_list.nr) {
1257
		int i;
1258

1259
		for (i = 0; i < refspecs_list.nr; i++)
1260
			refspec_append(&refspecs, refspecs_list.items[i].string);
1261

1262
		string_list_clear(&refspecs_list, 1);
1263
	}
1264

1265
	if (use_done_feature)
1266
		printf("feature done\n");
1267

1268
	if (import_filename && import_filename_if_exists)
1269
		die(_("options '%s' and '%s' cannot be used together"), "--import-marks", "--import-marks-if-exists");
1270
	if (import_filename)
1271
		import_marks(import_filename, 0);
1272
	else if (import_filename_if_exists)
1273
		import_marks(import_filename_if_exists, 1);
1274
	lastimportid = last_idnum;
1275

1276
	if (import_filename && revs.prune_data.nr)
1277
		full_tree = 1;
1278

1279
	get_tags_and_duplicates(&revs.cmdline);
1280

1281
	if (prepare_revision_walk(&revs))
1282
		die("revision walk setup failed");
1283

1284
	revs.reverse = 1;
1285
	revs.diffopt.format_callback = show_filemodify;
1286
	revs.diffopt.format_callback_data = &paths_of_changed_objects;
1287
	revs.diffopt.flags.recursive = 1;
1288

1289
	revs.diffopt.no_free = 1;
1290
	while ((commit = get_revision(&revs)))
1291
		handle_commit(commit, &revs, &paths_of_changed_objects);
1292
	revs.diffopt.no_free = 0;
1293

1294
	handle_tags_and_duplicates(&extra_refs);
1295
	handle_tags_and_duplicates(&tag_refs);
1296
	handle_deletes();
1297

1298
	if (export_filename && lastimportid != last_idnum)
1299
		export_marks(export_filename);
1300

1301
	if (use_done_feature)
1302
		printf("done\n");
1303

1304
	refspec_clear(&refspecs);
1305
	release_revisions(&revs);
1306

1307
	return 0;
1308
}
1309
git

Использование cookies