git

mailinfo.c
1304 строки · 28.8 Кб
Перенос по словам
1
#define USE_THE_REPOSITORY_VARIABLE
2

3
#include "git-compat-util.h"
4
#include "config.h"
5
#include "gettext.h"
6
#include "hex-ll.h"
7
#include "utf8.h"
8
#include "strbuf.h"
9
#include "mailinfo.h"
10

11
static void cleanup_space(struct strbuf *sb)
12
{
13
	size_t pos, cnt;
14
	for (pos = 0; pos < sb->len; pos++) {
15
		if (isspace(sb->buf[pos])) {
16
			sb->buf[pos] = ' ';
17
			for (cnt = 0; isspace(sb->buf[pos + cnt + 1]); cnt++);
18
			strbuf_remove(sb, pos + 1, cnt);
19
		}
20
	}
21
}
22

23
static void get_sane_name(struct strbuf *out, struct strbuf *name, struct strbuf *email)
24
{
25
	struct strbuf *src = name;
26
	if (!name->len || 60 < name->len || strpbrk(name->buf, "@<>"))
27
		src = email;
28
	else if (name == out)
29
		return;
30
	strbuf_reset(out);
31
	strbuf_addbuf(out, src);
32
}
33

34
static void parse_bogus_from(struct mailinfo *mi, const struct strbuf *line)
35
{
36
	/* John Doe <johndoe> */
37

38
	char *bra, *ket;
39
	/* This is fallback, so do not bother if we already have an
40
	 * e-mail address.
41
	 */
42
	if (mi->email.len)
43
		return;
44

45
	bra = strchr(line->buf, '<');
46
	if (!bra)
47
		return;
48
	ket = strchr(bra, '>');
49
	if (!ket)
50
		return;
51

52
	strbuf_reset(&mi->email);
53
	strbuf_add(&mi->email, bra + 1, ket - bra - 1);
54

55
	strbuf_reset(&mi->name);
56
	strbuf_add(&mi->name, line->buf, bra - line->buf);
57
	strbuf_trim(&mi->name);
58
	get_sane_name(&mi->name, &mi->name, &mi->email);
59
}
60

61
static const char *unquote_comment(struct strbuf *outbuf, const char *in)
62
{
63
	int take_next_literally = 0;
64
	int depth = 1;
65

66
	strbuf_addch(outbuf, '(');
67

68
	while (*in) {
69
		int c = *in++;
70
		if (take_next_literally == 1) {
71
			take_next_literally = 0;
72
		} else {
73
			switch (c) {
74
			case '\\':
75
				take_next_literally = 1;
76
				continue;
77
			case '(':
78
				strbuf_addch(outbuf, '(');
79
				depth++;
80
				continue;
81
			case ')':
82
				strbuf_addch(outbuf, ')');
83
				if (!--depth)
84
					return in;
85
				continue;
86
			}
87
		}
88

89
		strbuf_addch(outbuf, c);
90
	}
91

92
	return in;
93
}
94

95
static const char *unquote_quoted_string(struct strbuf *outbuf, const char *in)
96
{
97
	int take_next_literally = 0;
98

99
	while (*in) {
100
		int c = *in++;
101
		if (take_next_literally == 1) {
102
			take_next_literally = 0;
103
		} else {
104
			switch (c) {
105
			case '\\':
106
				take_next_literally = 1;
107
				continue;
108
			case '"':
109
				return in;
110
			}
111
		}
112

113
		strbuf_addch(outbuf, c);
114
	}
115

116
	return in;
117
}
118

119
static void unquote_quoted_pair(struct strbuf *line)
120
{
121
	struct strbuf outbuf;
122
	const char *in = line->buf;
123
	int c;
124

125
	strbuf_init(&outbuf, line->len);
126

127
	while ((c = *in++) != 0) {
128
		switch (c) {
129
		case '"':
130
			in = unquote_quoted_string(&outbuf, in);
131
			continue;
132
		case '(':
133
			in = unquote_comment(&outbuf, in);
134
			continue;
135
		}
136

137
		strbuf_addch(&outbuf, c);
138
	}
139

140
	strbuf_swap(&outbuf, line);
141
	strbuf_release(&outbuf);
142

143
}
144

145
static void handle_from(struct mailinfo *mi, const struct strbuf *from)
146
{
147
	char *at;
148
	size_t el;
149
	struct strbuf f;
150

151
	strbuf_init(&f, from->len);
152
	strbuf_addbuf(&f, from);
153

154
	unquote_quoted_pair(&f);
155

156
	at = strchr(f.buf, '@');
157
	if (!at) {
158
		parse_bogus_from(mi, from);
159
		goto out;
160
	}
161

162
	/*
163
	 * If we already have one email, don't take any confusing lines
164
	 */
165
	if (mi->email.len && strchr(at + 1, '@'))
166
		goto out;
167

168
	/* Pick up the string around '@', possibly delimited with <>
169
	 * pair; that is the email part.
170
	 */
171
	while (at > f.buf) {
172
		char c = at[-1];
173
		if (isspace(c))
174
			break;
175
		if (c == '<') {
176
			at[-1] = ' ';
177
			break;
178
		}
179
		at--;
180
	}
181
	el = strcspn(at, " \n\t\r\v\f>");
182
	strbuf_reset(&mi->email);
183
	strbuf_add(&mi->email, at, el);
184
	strbuf_remove(&f, at - f.buf, el + (at[el] ? 1 : 0));
185

186
	/* The remainder is name.  It could be
187
	 *
188
	 * - "John Doe <john.doe@xz>"			(a), or
189
	 * - "john.doe@xz (John Doe)"			(b), or
190
	 * - "John (zzz) Doe <john.doe@xz> (Comment)"	(c)
191
	 *
192
	 * but we have removed the email part, so
193
	 *
194
	 * - remove extra spaces which could stay after email (case 'c'), and
195
	 * - trim from both ends, possibly removing the () pair at the end
196
	 *   (cases 'a' and 'b').
197
	 */
198
	cleanup_space(&f);
199
	strbuf_trim(&f);
200
	if (f.buf[0] == '(' && f.len && f.buf[f.len - 1] == ')') {
201
		strbuf_remove(&f, 0, 1);
202
		strbuf_setlen(&f, f.len - 1);
203
	}
204

205
	get_sane_name(&mi->name, &f, &mi->email);
206
out:
207
	strbuf_release(&f);
208
}
209

210
static void handle_header(struct strbuf **out, const struct strbuf *line)
211
{
212
	if (!*out) {
213
		*out = xmalloc(sizeof(struct strbuf));
214
		strbuf_init(*out, line->len);
215
	} else
216
		strbuf_reset(*out);
217

218
	strbuf_addbuf(*out, line);
219
}
220

221
/* NOTE NOTE NOTE.  We do not claim we do full MIME.  We just attempt
222
 * to have enough heuristics to grok MIME encoded patches often found
223
 * on our mailing lists.  For example, we do not even treat header lines
224
 * case insensitively.
225
 */
226

227
static int slurp_attr(const char *line, const char *name, struct strbuf *attr)
228
{
229
	const char *ends, *ap = strcasestr(line, name);
230
	size_t sz;
231

232
	strbuf_setlen(attr, 0);
233
	if (!ap)
234
		return 0;
235
	ap += strlen(name);
236
	if (*ap == '"') {
237
		ap++;
238
		ends = "\"";
239
	}
240
	else
241
		ends = "; \t";
242
	sz = strcspn(ap, ends);
243
	strbuf_add(attr, ap, sz);
244
	return 1;
245
}
246

247
static int has_attr_value(const char *line, const char *name, const char *value)
248
{
249
	struct strbuf sb = STRBUF_INIT;
250
	int rc = slurp_attr(line, name, &sb) && !strcasecmp(sb.buf, value);
251
	strbuf_release(&sb);
252
	return rc;
253
}
254

255
static void handle_content_type(struct mailinfo *mi, struct strbuf *line)
256
{
257
	struct strbuf *boundary = xmalloc(sizeof(struct strbuf));
258
	strbuf_init(boundary, line->len);
259

260
	mi->format_flowed = has_attr_value(line->buf, "format=", "flowed");
261
	mi->delsp = has_attr_value(line->buf, "delsp=", "yes");
262

263
	if (slurp_attr(line->buf, "boundary=", boundary)) {
264
		strbuf_insertstr(boundary, 0, "--");
265
		if (++mi->content_top >= &mi->content[MAX_BOUNDARIES]) {
266
			error("Too many boundaries to handle");
267
			mi->input_error = -1;
268
			mi->content_top = &mi->content[MAX_BOUNDARIES] - 1;
269
			return;
270
		}
271
		*(mi->content_top) = boundary;
272
		boundary = NULL;
273
	}
274
	slurp_attr(line->buf, "charset=", &mi->charset);
275

276
	if (boundary) {
277
		strbuf_release(boundary);
278
		free(boundary);
279
	}
280
}
281

282
static void handle_content_transfer_encoding(struct mailinfo *mi,
283
					     const struct strbuf *line)
284
{
285
	if (strcasestr(line->buf, "base64"))
286
		mi->transfer_encoding = TE_BASE64;
287
	else if (strcasestr(line->buf, "quoted-printable"))
288
		mi->transfer_encoding = TE_QP;
289
	else
290
		mi->transfer_encoding = TE_DONTCARE;
291
}
292

293
static int is_multipart_boundary(struct mailinfo *mi, const struct strbuf *line)
294
{
295
	struct strbuf *content_top = *(mi->content_top);
296

297
	return ((content_top->len <= line->len) &&
298
		!memcmp(line->buf, content_top->buf, content_top->len));
299
}
300

301
static void cleanup_subject(struct mailinfo *mi, struct strbuf *subject)
302
{
303
	size_t at = 0;
304

305
	while (at < subject->len) {
306
		char *pos;
307
		size_t remove;
308

309
		switch (subject->buf[at]) {
310
		case 'r': case 'R':
311
			if (subject->len <= at + 3)
312
				break;
313
			if ((subject->buf[at + 1] == 'e' ||
314
			     subject->buf[at + 1] == 'E') &&
315
			    subject->buf[at + 2] == ':') {
316
				strbuf_remove(subject, at, 3);
317
				continue;
318
			}
319
			at++;
320
			break;
321
		case ' ': case '\t': case ':':
322
			strbuf_remove(subject, at, 1);
323
			continue;
324
		case '[':
325
			pos = strchr(subject->buf + at, ']');
326
			if (!pos)
327
				break;
328
			remove = pos - (subject->buf + at) + 1;
329
			if (!mi->keep_non_patch_brackets_in_subject ||
330
			    (7 <= remove &&
331
			     memmem(subject->buf + at, remove, "PATCH", 5)))
332
				strbuf_remove(subject, at, remove);
333
			else {
334
				at += remove;
335
				/*
336
				 * If the input had a space after the ], keep
337
				 * it.  We don't bother with finding the end of
338
				 * the space, since we later normalize it
339
				 * anyway.
340
				 */
341
				if (isspace(subject->buf[at]))
342
					at += 1;
343
			}
344
			continue;
345
		}
346
		break;
347
	}
348
	strbuf_trim(subject);
349
}
350

351
#define MAX_HDR_PARSED 10
352
static const char *header[MAX_HDR_PARSED] = {
353
	"From","Subject","Date",
354
};
355

356
static inline int skip_header(const struct strbuf *line, const char *hdr,
357
			      const char **outval)
358
{
359
	const char *val;
360
	if (!skip_iprefix(line->buf, hdr, &val) ||
361
	    *val++ != ':')
362
		return 0;
363
	while (isspace(*val))
364
		val++;
365
	*outval = val;
366
	return 1;
367
}
368

369
static int is_format_patch_separator(const char *line, int len)
370
{
371
	static const char SAMPLE[] =
372
		"From e6807f3efca28b30decfecb1732a56c7db1137ee Mon Sep 17 00:00:00 2001\n";
373
	const char *cp;
374

375
	if (len != strlen(SAMPLE))
376
		return 0;
377
	if (!skip_prefix(line, "From ", &cp))
378
		return 0;
379
	if (strspn(cp, "0123456789abcdef") != 40)
380
		return 0;
381
	cp += 40;
382
	return !memcmp(SAMPLE + (cp - line), cp, strlen(SAMPLE) - (cp - line));
383
}
384

385
static struct strbuf *decode_q_segment(const struct strbuf *q_seg, int rfc2047)
386
{
387
	const char *in = q_seg->buf;
388
	int c;
389
	struct strbuf *out = xmalloc(sizeof(struct strbuf));
390
	strbuf_init(out, q_seg->len);
391

392
	while ((c = *in++) != 0) {
393
		if (c == '=') {
394
			int ch, d = *in;
395
			if (d == '\n' || !d)
396
				break; /* drop trailing newline */
397
			ch = hex2chr(in);
398
			if (ch >= 0) {
399
				strbuf_addch(out, ch);
400
				in += 2;
401
				continue;
402
			}
403
			/* garbage -- fall through */
404
		}
405
		if (rfc2047 && c == '_') /* rfc2047 4.2 (2) */
406
			c = 0x20;
407
		strbuf_addch(out, c);
408
	}
409
	return out;
410
}
411

412
static struct strbuf *decode_b_segment(const struct strbuf *b_seg)
413
{
414
	/* Decode in..ep, possibly in-place to ot */
415
	int c, pos = 0, acc = 0;
416
	const char *in = b_seg->buf;
417
	struct strbuf *out = xmalloc(sizeof(struct strbuf));
418
	strbuf_init(out, b_seg->len);
419

420
	while ((c = *in++) != 0) {
421
		if (c == '+')
422
			c = 62;
423
		else if (c == '/')
424
			c = 63;
425
		else if ('A' <= c && c <= 'Z')
426
			c -= 'A';
427
		else if ('a' <= c && c <= 'z')
428
			c -= 'a' - 26;
429
		else if ('0' <= c && c <= '9')
430
			c -= '0' - 52;
431
		else
432
			continue; /* garbage */
433
		switch (pos++) {
434
		case 0:
435
			acc = (c << 2);
436
			break;
437
		case 1:
438
			strbuf_addch(out, (acc | (c >> 4)));
439
			acc = (c & 15) << 4;
440
			break;
441
		case 2:
442
			strbuf_addch(out, (acc | (c >> 2)));
443
			acc = (c & 3) << 6;
444
			break;
445
		case 3:
446
			strbuf_addch(out, (acc | c));
447
			acc = pos = 0;
448
			break;
449
		}
450
	}
451
	return out;
452
}
453

454
static int convert_to_utf8(struct mailinfo *mi,
455
			   struct strbuf *line, const char *charset)
456
{
457
	char *out;
458
	size_t out_len;
459

460
	if (!mi->metainfo_charset || !charset || !*charset)
461
		return 0;
462

463
	if (same_encoding(mi->metainfo_charset, charset))
464
		return 0;
465
	out = reencode_string_len(line->buf, line->len,
466
				  mi->metainfo_charset, charset, &out_len);
467
	if (!out) {
468
		mi->input_error = -1;
469
		return error("cannot convert from %s to %s",
470
			     charset, mi->metainfo_charset);
471
	}
472
	strbuf_attach(line, out, out_len, out_len);
473
	return 0;
474
}
475

476
static void decode_header(struct mailinfo *mi, struct strbuf *it)
477
{
478
	char *in, *ep, *cp;
479
	struct strbuf outbuf = STRBUF_INIT, *dec;
480
	struct strbuf charset_q = STRBUF_INIT, piecebuf = STRBUF_INIT;
481
	int found_error = 1; /* pessimism */
482

483
	in = it->buf;
484
	while (in - it->buf <= it->len && (ep = strstr(in, "=?")) != NULL) {
485
		int encoding;
486
		strbuf_reset(&charset_q);
487
		strbuf_reset(&piecebuf);
488

489
		if (in != ep) {
490
			/*
491
			 * We are about to process an encoded-word
492
			 * that begins at ep, but there is something
493
			 * before the encoded word.
494
			 */
495
			char *scan;
496
			for (scan = in; scan < ep; scan++)
497
				if (!isspace(*scan))
498
					break;
499

500
			if (scan != ep || in == it->buf) {
501
				/*
502
				 * We should not lose that "something",
503
				 * unless we have just processed an
504
				 * encoded-word, and there is only LWS
505
				 * before the one we are about to process.
506
				 */
507
				strbuf_add(&outbuf, in, ep - in);
508
			}
509
		}
510
		/* E.g.
511
		 * ep : "=?iso-2022-jp?B?GyR...?= foo"
512
		 * ep : "=?ISO-8859-1?Q?Foo=FCbar?= baz"
513
		 */
514
		ep += 2;
515

516
		if (ep - it->buf >= it->len || !(cp = strchr(ep, '?')))
517
			goto release_return;
518

519
		if (cp + 3 - it->buf > it->len)
520
			goto release_return;
521
		strbuf_add(&charset_q, ep, cp - ep);
522

523
		encoding = cp[1];
524
		if (!encoding || cp[2] != '?')
525
			goto release_return;
526
		ep = strstr(cp + 3, "?=");
527
		if (!ep)
528
			goto release_return;
529
		strbuf_add(&piecebuf, cp + 3, ep - cp - 3);
530
		switch (tolower(encoding)) {
531
		default:
532
			goto release_return;
533
		case 'b':
534
			dec = decode_b_segment(&piecebuf);
535
			break;
536
		case 'q':
537
			dec = decode_q_segment(&piecebuf, 1);
538
			break;
539
		}
540
		if (convert_to_utf8(mi, dec, charset_q.buf))
541
			goto release_return;
542

543
		strbuf_addbuf(&outbuf, dec);
544
		strbuf_release(dec);
545
		free(dec);
546
		in = ep + 2;
547
	}
548
	strbuf_addstr(&outbuf, in);
549
	strbuf_reset(it);
550
	strbuf_addbuf(it, &outbuf);
551
	found_error = 0;
552
release_return:
553
	strbuf_release(&outbuf);
554
	strbuf_release(&charset_q);
555
	strbuf_release(&piecebuf);
556

557
	if (found_error)
558
		mi->input_error = -1;
559
}
560

561
/*
562
 * Returns true if "line" contains a header matching "hdr", in which case "val"
563
 * will contain the value of the header with any RFC2047 B and Q encoding
564
 * unwrapped, and optionally normalize the meta information to utf8.
565
 */
566
static int parse_header(const struct strbuf *line,
567
			const char *hdr,
568
			struct mailinfo *mi,
569
			struct strbuf *val)
570
{
571
	const char *val_str;
572

573
	if (!skip_header(line, hdr, &val_str))
574
		return 0;
575
	strbuf_addstr(val, val_str);
576
	decode_header(mi, val);
577
	return 1;
578
}
579

580
static int check_header(struct mailinfo *mi,
581
			const struct strbuf *line,
582
			struct strbuf *hdr_data[], int overwrite)
583
{
584
	int i, ret = 0;
585
	struct strbuf sb = STRBUF_INIT;
586

587
	/* search for the interesting parts */
588
	for (i = 0; header[i]; i++) {
589
		if ((!hdr_data[i] || overwrite) &&
590
		    parse_header(line, header[i], mi, &sb)) {
591
			handle_header(&hdr_data[i], &sb);
592
			ret = 1;
593
			goto check_header_out;
594
		}
595
	}
596

597
	/* Content stuff */
598
	if (parse_header(line, "Content-Type", mi, &sb)) {
599
		handle_content_type(mi, &sb);
600
		ret = 1;
601
		goto check_header_out;
602
	}
603
	if (parse_header(line, "Content-Transfer-Encoding", mi, &sb)) {
604
		handle_content_transfer_encoding(mi, &sb);
605
		ret = 1;
606
		goto check_header_out;
607
	}
608
	if (parse_header(line, "Message-ID", mi, &sb)) {
609
		if (mi->add_message_id)
610
			mi->message_id = strbuf_detach(&sb, NULL);
611
		ret = 1;
612
		goto check_header_out;
613
	}
614

615
check_header_out:
616
	strbuf_release(&sb);
617
	return ret;
618
}
619

620
/*
621
 * Returns 1 if the given line or any line beginning with the given line is an
622
 * in-body header (that is, check_header will succeed when passed
623
 * mi->s_hdr_data).
624
 */
625
static int is_inbody_header(const struct mailinfo *mi,
626
			    const struct strbuf *line)
627
{
628
	int i;
629
	const char *val;
630
	for (i = 0; header[i]; i++)
631
		if (!mi->s_hdr_data[i] && skip_header(line, header[i], &val))
632
			return 1;
633
	return 0;
634
}
635

636
static void decode_transfer_encoding(struct mailinfo *mi, struct strbuf *line)
637
{
638
	struct strbuf *ret;
639

640
	switch (mi->transfer_encoding) {
641
	case TE_QP:
642
		ret = decode_q_segment(line, 0);
643
		break;
644
	case TE_BASE64:
645
		ret = decode_b_segment(line);
646
		break;
647
	case TE_DONTCARE:
648
	default:
649
		return;
650
	}
651
	strbuf_reset(line);
652
	strbuf_addbuf(line, ret);
653
	strbuf_release(ret);
654
	free(ret);
655
}
656

657
static inline int patchbreak(const struct strbuf *line)
658
{
659
	size_t i;
660

661
	/* Beginning of a "diff -" header? */
662
	if (starts_with(line->buf, "diff -"))
663
		return 1;
664

665
	/* CVS "Index: " line? */
666
	if (starts_with(line->buf, "Index: "))
667
		return 1;
668

669
	/*
670
	 * "--- <filename>" starts patches without headers
671
	 * "---<sp>*" is a manual separator
672
	 */
673
	if (line->len < 4)
674
		return 0;
675

676
	if (starts_with(line->buf, "---")) {
677
		/* space followed by a filename? */
678
		if (line->buf[3] == ' ' && !isspace(line->buf[4]))
679
			return 1;
680
		/* Just whitespace? */
681
		for (i = 3; i < line->len; i++) {
682
			unsigned char c = line->buf[i];
683
			if (c == '\n')
684
				return 1;
685
			if (!isspace(c))
686
				break;
687
		}
688
		return 0;
689
	}
690
	return 0;
691
}
692

693
static int is_scissors_line(const char *line)
694
{
695
	const char *c;
696
	int scissors = 0, gap = 0;
697
	const char *first_nonblank = NULL, *last_nonblank = NULL;
698
	int visible, perforation = 0, in_perforation = 0;
699

700
	for (c = line; *c; c++) {
701
		if (isspace(*c)) {
702
			if (in_perforation) {
703
				perforation++;
704
				gap++;
705
			}
706
			continue;
707
		}
708
		last_nonblank = c;
709
		if (!first_nonblank)
710
			first_nonblank = c;
711
		if (*c == '-') {
712
			in_perforation = 1;
713
			perforation++;
714
			continue;
715
		}
716
		if (starts_with(c, ">8") || starts_with(c, "8<") ||
717
		    starts_with(c, ">%") || starts_with(c, "%<")) {
718
			in_perforation = 1;
719
			perforation += 2;
720
			scissors += 2;
721
			c++;
722
			continue;
723
		}
724
		in_perforation = 0;
725
	}
726

727
	/*
728
	 * The mark must be at least 8 bytes long (e.g. "-- >8 --").
729
	 * Even though there can be arbitrary cruft on the same line
730
	 * (e.g. "cut here"), in order to avoid misidentification, the
731
	 * perforation must occupy more than a third of the visible
732
	 * width of the line, and dashes and scissors must occupy more
733
	 * than half of the perforation.
734
	 */
735

736
	if (first_nonblank && last_nonblank)
737
		visible = last_nonblank - first_nonblank + 1;
738
	else
739
		visible = 0;
740
	return (scissors && 8 <= visible &&
741
		visible < perforation * 3 &&
742
		gap * 2 < perforation);
743
}
744

745
static void flush_inbody_header_accum(struct mailinfo *mi)
746
{
747
	if (!mi->inbody_header_accum.len)
748
		return;
749
	if (!check_header(mi, &mi->inbody_header_accum, mi->s_hdr_data, 0))
750
		BUG("inbody_header_accum, if not empty, must always contain a valid in-body header");
751
	strbuf_reset(&mi->inbody_header_accum);
752
}
753

754
static int check_inbody_header(struct mailinfo *mi, const struct strbuf *line)
755
{
756
	if (mi->inbody_header_accum.len &&
757
	    (line->buf[0] == ' ' || line->buf[0] == '\t')) {
758
		if (mi->use_scissors && is_scissors_line(line->buf)) {
759
			/*
760
			 * This is a scissors line; do not consider this line
761
			 * as a header continuation line.
762
			 */
763
			flush_inbody_header_accum(mi);
764
			return 0;
765
		}
766
		strbuf_strip_suffix(&mi->inbody_header_accum, "\n");
767
		strbuf_addbuf(&mi->inbody_header_accum, line);
768
		return 1;
769
	}
770

771
	flush_inbody_header_accum(mi);
772

773
	if (starts_with(line->buf, ">From") && isspace(line->buf[5]))
774
		return is_format_patch_separator(line->buf + 1, line->len - 1);
775
	if (starts_with(line->buf, "[PATCH]") && isspace(line->buf[7])) {
776
		int i;
777
		for (i = 0; header[i]; i++)
778
			if (!strcmp("Subject", header[i])) {
779
				handle_header(&mi->s_hdr_data[i], line);
780
				return 1;
781
			}
782
		return 0;
783
	}
784
	if (is_inbody_header(mi, line)) {
785
		strbuf_addbuf(&mi->inbody_header_accum, line);
786
		return 1;
787
	}
788
	return 0;
789
}
790

791
static int handle_commit_msg(struct mailinfo *mi, struct strbuf *line)
792
{
793
	assert(!mi->filter_stage);
794

795
	if (mi->header_stage) {
796
		if (!line->len || (line->len == 1 && line->buf[0] == '\n')) {
797
			if (mi->inbody_header_accum.len) {
798
				flush_inbody_header_accum(mi);
799
				mi->header_stage = 0;
800
			}
801
			return 0;
802
		}
803
	}
804

805
	if (mi->use_inbody_headers && mi->header_stage) {
806
		mi->header_stage = check_inbody_header(mi, line);
807
		if (mi->header_stage)
808
			return 0;
809
	} else
810
		/* Only trim the first (blank) line of the commit message
811
		 * when ignoring in-body headers.
812
		 */
813
		mi->header_stage = 0;
814

815
	/* normalize the log message to UTF-8. */
816
	if (convert_to_utf8(mi, line, mi->charset.buf))
817
		return 0; /* mi->input_error already set */
818

819
	if (mi->use_scissors && is_scissors_line(line->buf)) {
820
		int i;
821

822
		strbuf_setlen(&mi->log_message, 0);
823
		mi->header_stage = 1;
824

825
		/*
826
		 * We may have already read "secondary headers"; purge
827
		 * them to give ourselves a clean restart.
828
		 */
829
		for (i = 0; header[i]; i++) {
830
			if (mi->s_hdr_data[i])
831
				strbuf_release(mi->s_hdr_data[i]);
832
			FREE_AND_NULL(mi->s_hdr_data[i]);
833
		}
834
		return 0;
835
	}
836

837
	if (patchbreak(line)) {
838
		if (mi->message_id)
839
			strbuf_addf(&mi->log_message,
840
				    "Message-ID: %s\n", mi->message_id);
841
		return 1;
842
	}
843

844
	strbuf_addbuf(&mi->log_message, line);
845
	return 0;
846
}
847

848
static void handle_patch(struct mailinfo *mi, const struct strbuf *line)
849
{
850
	fwrite(line->buf, 1, line->len, mi->patchfile);
851
	mi->patch_lines++;
852
}
853

854
static void handle_filter(struct mailinfo *mi, struct strbuf *line)
855
{
856
	switch (mi->filter_stage) {
857
	case 0:
858
		if (!handle_commit_msg(mi, line))
859
			break;
860
		mi->filter_stage++;
861
		/* fallthrough */
862
	case 1:
863
		handle_patch(mi, line);
864
		break;
865
	}
866
}
867

868
static int is_rfc2822_header(const struct strbuf *line)
869
{
870
	/*
871
	 * The section that defines the loosest possible
872
	 * field name is "3.6.8 Optional fields".
873
	 *
874
	 * optional-field = field-name ":" unstructured CRLF
875
	 * field-name = 1*ftext
876
	 * ftext = %d33-57 / %59-126
877
	 */
878
	int ch;
879
	char *cp = line->buf;
880

881
	/* Count mbox From headers as headers */
882
	if (starts_with(cp, "From ") || starts_with(cp, ">From "))
883
		return 1;
884

885
	while ((ch = *cp++)) {
886
		if (ch == ':')
887
			return 1;
888
		if ((33 <= ch && ch <= 57) ||
889
		    (59 <= ch && ch <= 126))
890
			continue;
891
		break;
892
	}
893
	return 0;
894
}
895

896
static int read_one_header_line(struct strbuf *line, FILE *in)
897
{
898
	struct strbuf continuation = STRBUF_INIT;
899

900
	/* Get the first part of the line. */
901
	if (strbuf_getline_lf(line, in))
902
		return 0;
903

904
	/*
905
	 * Is it an empty line or not a valid rfc2822 header?
906
	 * If so, stop here, and return false ("not a header")
907
	 */
908
	strbuf_rtrim(line);
909
	if (!line->len || !is_rfc2822_header(line)) {
910
		/* Re-add the newline */
911
		strbuf_addch(line, '\n');
912
		return 0;
913
	}
914

915
	/*
916
	 * Now we need to eat all the continuation lines..
917
	 * Yuck, 2822 header "folding"
918
	 */
919
	for (;;) {
920
		int peek;
921

922
		peek = fgetc(in);
923
		if (peek == EOF)
924
			break;
925
		ungetc(peek, in);
926
		if (peek != ' ' && peek != '\t')
927
			break;
928
		if (strbuf_getline_lf(&continuation, in))
929
			break;
930
		continuation.buf[0] = ' ';
931
		strbuf_rtrim(&continuation);
932
		strbuf_addbuf(line, &continuation);
933
	}
934
	strbuf_release(&continuation);
935

936
	return 1;
937
}
938

939
static int find_boundary(struct mailinfo *mi, struct strbuf *line)
940
{
941
	while (!strbuf_getline_lf(line, mi->input)) {
942
		if (*(mi->content_top) && is_multipart_boundary(mi, line))
943
			return 1;
944
	}
945
	return 0;
946
}
947

948
static int handle_boundary(struct mailinfo *mi, struct strbuf *line)
949
{
950
	struct strbuf newline = STRBUF_INIT;
951

952
	strbuf_addch(&newline, '\n');
953
again:
954
	if (line->len >= (*(mi->content_top))->len + 2 &&
955
	    !memcmp(line->buf + (*(mi->content_top))->len, "--", 2)) {
956
		/* we hit an end boundary */
957
		/* pop the current boundary off the stack */
958
		strbuf_release(*(mi->content_top));
959
		FREE_AND_NULL(*(mi->content_top));
960

961
		/* technically won't happen as is_multipart_boundary()
962
		   will fail first.  But just in case..
963
		 */
964
		if (--mi->content_top < mi->content) {
965
			error("Detected mismatched boundaries, can't recover");
966
			mi->input_error = -1;
967
			mi->content_top = mi->content;
968
			strbuf_release(&newline);
969
			return 0;
970
		}
971
		handle_filter(mi, &newline);
972
		strbuf_release(&newline);
973
		if (mi->input_error)
974
			return 0;
975

976
		/* skip to the next boundary */
977
		if (!find_boundary(mi, line))
978
			return 0;
979
		goto again;
980
	}
981

982
	/* set some defaults */
983
	mi->transfer_encoding = TE_DONTCARE;
984
	strbuf_reset(&mi->charset);
985

986
	/* slurp in this section's info */
987
	while (read_one_header_line(line, mi->input))
988
		check_header(mi, line, mi->p_hdr_data, 0);
989

990
	strbuf_release(&newline);
991
	/* replenish line */
992
	if (strbuf_getline_lf(line, mi->input))
993
		return 0;
994
	strbuf_addch(line, '\n');
995
	return 1;
996
}
997

998
static void handle_filter_flowed(struct mailinfo *mi, struct strbuf *line,
999
				 struct strbuf *prev)
1000
{
1001
	size_t len = line->len;
1002
	const char *rest;
1003

1004
	if (!mi->format_flowed) {
1005
		if (len >= 2 &&
1006
		    line->buf[len - 2] == '\r' &&
1007
		    line->buf[len - 1] == '\n') {
1008
			mi->have_quoted_cr = 1;
1009
			if (mi->quoted_cr == quoted_cr_strip) {
1010
				strbuf_setlen(line, len - 2);
1011
				strbuf_addch(line, '\n');
1012
				len--;
1013
			}
1014
		}
1015
		handle_filter(mi, line);
1016
		return;
1017
	}
1018

1019
	if (line->buf[len - 1] == '\n') {
1020
		len--;
1021
		if (len && line->buf[len - 1] == '\r')
1022
			len--;
1023
	}
1024

1025
	/* Keep signature separator as-is. */
1026
	if (skip_prefix(line->buf, "-- ", &rest) && rest - line->buf == len) {
1027
		if (prev->len) {
1028
			handle_filter(mi, prev);
1029
			strbuf_reset(prev);
1030
		}
1031
		handle_filter(mi, line);
1032
		return;
1033
	}
1034

1035
	/* Unstuff space-stuffed line. */
1036
	if (len && line->buf[0] == ' ') {
1037
		strbuf_remove(line, 0, 1);
1038
		len--;
1039
	}
1040

1041
	/* Save flowed line for later, but without the soft line break. */
1042
	if (len && line->buf[len - 1] == ' ') {
1043
		strbuf_add(prev, line->buf, len - !!mi->delsp);
1044
		return;
1045
	}
1046

1047
	/* Prepend any previous partial lines */
1048
	strbuf_insert(line, 0, prev->buf, prev->len);
1049
	strbuf_reset(prev);
1050

1051
	handle_filter(mi, line);
1052
}
1053

1054
static void summarize_quoted_cr(struct mailinfo *mi)
1055
{
1056
	if (mi->have_quoted_cr &&
1057
	    mi->quoted_cr == quoted_cr_warn)
1058
		warning(_("quoted CRLF detected"));
1059
}
1060

1061
static void handle_body(struct mailinfo *mi, struct strbuf *line)
1062
{
1063
	struct strbuf prev = STRBUF_INIT;
1064

1065
	/* Skip up to the first boundary */
1066
	if (*(mi->content_top)) {
1067
		if (!find_boundary(mi, line))
1068
			goto handle_body_out;
1069
	}
1070

1071
	do {
1072
		/* process any boundary lines */
1073
		if (*(mi->content_top) && is_multipart_boundary(mi, line)) {
1074
			/* flush any leftover */
1075
			if (prev.len) {
1076
				handle_filter(mi, &prev);
1077
				strbuf_reset(&prev);
1078
			}
1079
			summarize_quoted_cr(mi);
1080
			mi->have_quoted_cr = 0;
1081
			if (!handle_boundary(mi, line))
1082
				goto handle_body_out;
1083
		}
1084

1085
		/* Unwrap transfer encoding */
1086
		decode_transfer_encoding(mi, line);
1087

1088
		switch (mi->transfer_encoding) {
1089
		case TE_BASE64:
1090
		case TE_QP:
1091
		{
1092
			struct strbuf **lines, **it, *sb;
1093

1094
			/* Prepend any previous partial lines */
1095
			strbuf_insert(line, 0, prev.buf, prev.len);
1096
			strbuf_reset(&prev);
1097

1098
			/*
1099
			 * This is a decoded line that may contain
1100
			 * multiple new lines.  Pass only one chunk
1101
			 * at a time to handle_filter()
1102
			 */
1103
			lines = strbuf_split(line, '\n');
1104
			for (it = lines; (sb = *it); it++) {
1105
				if (!*(it + 1)) /* The last line */
1106
					if (sb->buf[sb->len - 1] != '\n') {
1107
						/* Partial line, save it for later. */
1108
						strbuf_addbuf(&prev, sb);
1109
						break;
1110
					}
1111
				handle_filter_flowed(mi, sb, &prev);
1112
			}
1113
			/*
1114
			 * The partial chunk is saved in "prev" and will be
1115
			 * appended by the next iteration of read_line_with_nul().
1116
			 */
1117
			strbuf_list_free(lines);
1118
			break;
1119
		}
1120
		default:
1121
			handle_filter_flowed(mi, line, &prev);
1122
		}
1123

1124
		if (mi->input_error)
1125
			break;
1126
	} while (!strbuf_getwholeline(line, mi->input, '\n'));
1127

1128
	if (prev.len)
1129
		handle_filter(mi, &prev);
1130
	summarize_quoted_cr(mi);
1131

1132
	flush_inbody_header_accum(mi);
1133

1134
handle_body_out:
1135
	strbuf_release(&prev);
1136
}
1137

1138
static void output_header_lines(FILE *fout, const char *hdr, const struct strbuf *data)
1139
{
1140
	const char *sp = data->buf;
1141
	while (1) {
1142
		char *ep = strchr(sp, '\n');
1143
		int len;
1144
		if (!ep)
1145
			len = strlen(sp);
1146
		else
1147
			len = ep - sp;
1148
		fprintf(fout, "%s: %.*s\n", hdr, len, sp);
1149
		if (!ep)
1150
			break;
1151
		sp = ep + 1;
1152
	}
1153
}
1154

1155
static void handle_info(struct mailinfo *mi)
1156
{
1157
	struct strbuf *hdr;
1158
	int i;
1159

1160
	for (i = 0; header[i]; i++) {
1161
		/* only print inbody headers if we output a patch file */
1162
		if (mi->patch_lines && mi->s_hdr_data[i])
1163
			hdr = mi->s_hdr_data[i];
1164
		else if (mi->p_hdr_data[i])
1165
			hdr = mi->p_hdr_data[i];
1166
		else
1167
			continue;
1168

1169
		if (memchr(hdr->buf, '\0', hdr->len)) {
1170
			error("a NUL byte in '%s' is not allowed.", header[i]);
1171
			mi->input_error = -1;
1172
		}
1173

1174
		if (!strcmp(header[i], "Subject")) {
1175
			if (!mi->keep_subject) {
1176
				cleanup_subject(mi, hdr);
1177
				cleanup_space(hdr);
1178
			}
1179
			output_header_lines(mi->output, "Subject", hdr);
1180
		} else if (!strcmp(header[i], "From")) {
1181
			cleanup_space(hdr);
1182
			handle_from(mi, hdr);
1183
			fprintf(mi->output, "Author: %s\n", mi->name.buf);
1184
			fprintf(mi->output, "Email: %s\n", mi->email.buf);
1185
		} else {
1186
			cleanup_space(hdr);
1187
			fprintf(mi->output, "%s: %s\n", header[i], hdr->buf);
1188
		}
1189
	}
1190
	fprintf(mi->output, "\n");
1191
}
1192

1193
int mailinfo(struct mailinfo *mi, const char *msg, const char *patch)
1194
{
1195
	FILE *cmitmsg;
1196
	int peek;
1197
	struct strbuf line = STRBUF_INIT;
1198

1199
	cmitmsg = fopen(msg, "w");
1200
	if (!cmitmsg) {
1201
		perror(msg);
1202
		return -1;
1203
	}
1204
	mi->patchfile = fopen(patch, "w");
1205
	if (!mi->patchfile) {
1206
		perror(patch);
1207
		fclose(cmitmsg);
1208
		return -1;
1209
	}
1210

1211
	mi->p_hdr_data = xcalloc(MAX_HDR_PARSED, sizeof(*(mi->p_hdr_data)));
1212
	mi->s_hdr_data = xcalloc(MAX_HDR_PARSED, sizeof(*(mi->s_hdr_data)));
1213

1214
	do {
1215
		peek = fgetc(mi->input);
1216
		if (peek == EOF) {
1217
			fclose(cmitmsg);
1218
			return error("empty patch: '%s'", patch);
1219
		}
1220
	} while (isspace(peek));
1221
	ungetc(peek, mi->input);
1222

1223
	/* process the email header */
1224
	while (read_one_header_line(&line, mi->input))
1225
		check_header(mi, &line, mi->p_hdr_data, 1);
1226

1227
	handle_body(mi, &line);
1228
	fwrite(mi->log_message.buf, 1, mi->log_message.len, cmitmsg);
1229
	fclose(cmitmsg);
1230
	fclose(mi->patchfile);
1231

1232
	handle_info(mi);
1233
	strbuf_release(&line);
1234
	return mi->input_error;
1235
}
1236

1237
int mailinfo_parse_quoted_cr_action(const char *actionstr, int *action)
1238
{
1239
	if (!strcmp(actionstr, "nowarn"))
1240
		*action = quoted_cr_nowarn;
1241
	else if (!strcmp(actionstr, "warn"))
1242
		*action = quoted_cr_warn;
1243
	else if (!strcmp(actionstr, "strip"))
1244
		*action = quoted_cr_strip;
1245
	else
1246
		return -1;
1247
	return 0;
1248
}
1249

1250
static int git_mailinfo_config(const char *var, const char *value,
1251
			       const struct config_context *ctx, void *mi_)
1252
{
1253
	struct mailinfo *mi = mi_;
1254

1255
	if (!starts_with(var, "mailinfo."))
1256
		return git_default_config(var, value, ctx, NULL);
1257
	if (!strcmp(var, "mailinfo.scissors")) {
1258
		mi->use_scissors = git_config_bool(var, value);
1259
		return 0;
1260
	}
1261
	if (!strcmp(var, "mailinfo.quotedcr")) {
1262
		if (!value)
1263
			return config_error_nonbool(var);
1264
		if (mailinfo_parse_quoted_cr_action(value, &mi->quoted_cr) != 0)
1265
			return error(_("bad action '%s' for '%s'"), value, var);
1266
		return 0;
1267
	}
1268
	/* perhaps others here */
1269
	return 0;
1270
}
1271

1272
void setup_mailinfo(struct mailinfo *mi)
1273
{
1274
	memset(mi, 0, sizeof(*mi));
1275
	strbuf_init(&mi->name, 0);
1276
	strbuf_init(&mi->email, 0);
1277
	strbuf_init(&mi->charset, 0);
1278
	strbuf_init(&mi->log_message, 0);
1279
	strbuf_init(&mi->inbody_header_accum, 0);
1280
	mi->quoted_cr = quoted_cr_warn;
1281
	mi->header_stage = 1;
1282
	mi->use_inbody_headers = 1;
1283
	mi->content_top = mi->content;
1284
	git_config(git_mailinfo_config, mi);
1285
}
1286

1287
void clear_mailinfo(struct mailinfo *mi)
1288
{
1289
	strbuf_release(&mi->name);
1290
	strbuf_release(&mi->email);
1291
	strbuf_release(&mi->charset);
1292
	strbuf_release(&mi->inbody_header_accum);
1293
	free(mi->message_id);
1294

1295
	strbuf_list_free(mi->p_hdr_data);
1296
	strbuf_list_free(mi->s_hdr_data);
1297

1298
	while (mi->content < mi->content_top) {
1299
		free(*(mi->content_top));
1300
		mi->content_top--;
1301
	}
1302

1303
	strbuf_release(&mi->log_message);
1304
}
1305
git

Использование cookies