gitech

Форк
0
/
references.go 
595 строк · 18.6 Кб
1
// Copyright 2019 The Gitea Authors. All rights reserved.
2
// SPDX-License-Identifier: MIT
3

4
package references
5

6
import (
7
	"bytes"
8
	"net/url"
9
	"regexp"
10
	"strconv"
11
	"strings"
12
	"sync"
13

14
	"code.gitea.io/gitea/modules/log"
15
	"code.gitea.io/gitea/modules/markup/mdstripper"
16
	"code.gitea.io/gitea/modules/setting"
17

18
	"github.com/yuin/goldmark/util"
19
)
20

21
var (
22
	// validNamePattern performs only the most basic validation for user or repository names
23
	// Repository name should contain only alphanumeric, dash ('-'), underscore ('_') and dot ('.') characters.
24
	validNamePattern = regexp.MustCompile(`^[a-z0-9_.-]+$`)
25

26
	// NOTE: All below regex matching do not perform any extra validation.
27
	// Thus a link is produced even if the linked entity does not exist.
28
	// While fast, this is also incorrect and lead to false positives.
29
	// TODO: fix invalid linking issue
30

31
	// mentionPattern matches all mentions in the form of "@user" or "@org/team"
32
	mentionPattern = regexp.MustCompile(`(?:\s|^|\(|\[)(@[0-9a-zA-Z-_]+|@[0-9a-zA-Z-_]+\/?[0-9a-zA-Z-_]+|@[0-9a-zA-Z-_][0-9a-zA-Z-_.]+\/?[0-9a-zA-Z-_.]+[0-9a-zA-Z-_])(?:\s|[:,;.?!]\s|[:,;.?!]?$|\)|\])`)
33
	// issueNumericPattern matches string that references to a numeric issue, e.g. #1287
34
	issueNumericPattern = regexp.MustCompile(`(?:\s|^|\(|\[|\'|\")([#!][0-9]+)(?:\s|$|\)|\]|\'|\"|[:;,.?!]\s|[:;,.?!]$)`)
35
	// issueAlphanumericPattern matches string that references to an alphanumeric issue, e.g. ABC-1234
36
	issueAlphanumericPattern = regexp.MustCompile(`(?:\s|^|\(|\[|\"|\')([A-Z]{1,10}-[1-9][0-9]*)(?:\s|$|\)|\]|:|\.(\s|$)|\"|\')`)
37
	// crossReferenceIssueNumericPattern matches string that references a numeric issue in a different repository
38
	// e.g. org/repo#12345
39
	crossReferenceIssueNumericPattern = regexp.MustCompile(`(?:\s|^|\(|\[)([0-9a-zA-Z-_\.]+/[0-9a-zA-Z-_\.]+[#!][0-9]+)(?:\s|$|\)|\]|[:;,.?!]\s|[:;,.?!]$)`)
40
	// crossReferenceCommitPattern matches a string that references a commit in a different repository
41
	// e.g. go-gitea/gitea@d8a994ef, go-gitea/gitea@d8a994ef243349f321568f9e36d5c3f444b99cae (7-40 characters)
42
	crossReferenceCommitPattern = regexp.MustCompile(`(?:\s|^|\(|\[)([0-9a-zA-Z-_\.]+)/([0-9a-zA-Z-_\.]+)@([0-9a-f]{7,64})(?:\s|$|\)|\]|[:;,.?!]\s|[:;,.?!]$)`)
43
	// spaceTrimmedPattern let's find the trailing space
44
	spaceTrimmedPattern = regexp.MustCompile(`(?:.*[0-9a-zA-Z-_])\s`)
45
	// timeLogPattern matches string for time tracking
46
	timeLogPattern = regexp.MustCompile(`(?:\s|^|\(|\[)(@([0-9]+([\.,][0-9]+)?(w|d|m|h))+)(?:\s|$|\)|\]|[:;,.?!]\s|[:;,.?!]$)`)
47

48
	issueCloseKeywordsPat, issueReopenKeywordsPat *regexp.Regexp
49
	issueKeywordsOnce                             sync.Once
50

51
	giteaHostInit         sync.Once
52
	giteaHost             string
53
	giteaIssuePullPattern *regexp.Regexp
54

55
	actionStrings = []string{
56
		"none",
57
		"closes",
58
		"reopens",
59
		"neutered",
60
	}
61
)
62

63
// XRefAction represents the kind of effect a cross reference has once is resolved
64
type XRefAction int64
65

66
const (
67
	// XRefActionNone means the cross-reference is simply a comment
68
	XRefActionNone XRefAction = iota // 0
69
	// XRefActionCloses means the cross-reference should close an issue if it is resolved
70
	XRefActionCloses // 1
71
	// XRefActionReopens means the cross-reference should reopen an issue if it is resolved
72
	XRefActionReopens // 2
73
	// XRefActionNeutered means the cross-reference will no longer affect the source
74
	XRefActionNeutered // 3
75
)
76

77
func (a XRefAction) String() string {
78
	return actionStrings[a]
79
}
80

81
// IssueReference contains an unverified cross-reference to a local issue or pull request
82
type IssueReference struct {
83
	Index   int64
84
	Owner   string
85
	Name    string
86
	Action  XRefAction
87
	TimeLog string
88
}
89

90
// RenderizableReference contains an unverified cross-reference to with rendering information
91
// The IsPull member means that a `!num` reference was used instead of `#num`.
92
// This kind of reference is used to make pulls available when an external issue tracker
93
// is used. Otherwise, `#` and `!` are completely interchangeable.
94
type RenderizableReference struct {
95
	Issue          string
96
	Owner          string
97
	Name           string
98
	CommitSha      string
99
	IsPull         bool
100
	RefLocation    *RefSpan
101
	Action         XRefAction
102
	ActionLocation *RefSpan
103
}
104

105
type rawReference struct {
106
	index          int64
107
	owner          string
108
	name           string
109
	isPull         bool
110
	action         XRefAction
111
	issue          string
112
	refLocation    *RefSpan
113
	actionLocation *RefSpan
114
	timeLog        string
115
}
116

117
func rawToIssueReferenceList(reflist []*rawReference) []IssueReference {
118
	refarr := make([]IssueReference, len(reflist))
119
	for i, r := range reflist {
120
		refarr[i] = IssueReference{
121
			Index:   r.index,
122
			Owner:   r.owner,
123
			Name:    r.name,
124
			Action:  r.action,
125
			TimeLog: r.timeLog,
126
		}
127
	}
128
	return refarr
129
}
130

131
// RefSpan is the position where the reference was found within the parsed text
132
type RefSpan struct {
133
	Start int
134
	End   int
135
}
136

137
func makeKeywordsPat(words []string) *regexp.Regexp {
138
	acceptedWords := parseKeywords(words)
139
	if len(acceptedWords) == 0 {
140
		// Never match
141
		return nil
142
	}
143
	return regexp.MustCompile(`(?i)(?:\s|^|\(|\[)(` + strings.Join(acceptedWords, `|`) + `):? $`)
144
}
145

146
func parseKeywords(words []string) []string {
147
	acceptedWords := make([]string, 0, 5)
148
	wordPat := regexp.MustCompile(`^[\pL]+$`)
149
	for _, word := range words {
150
		word = strings.ToLower(strings.TrimSpace(word))
151
		// Accept Unicode letter class runes (a-z, á, à, ä, )
152
		if wordPat.MatchString(word) {
153
			acceptedWords = append(acceptedWords, word)
154
		} else {
155
			log.Info("Invalid keyword: %s", word)
156
		}
157
	}
158
	return acceptedWords
159
}
160

161
func newKeywords() {
162
	issueKeywordsOnce.Do(func() {
163
		// Delay initialization until after the settings module is initialized
164
		doNewKeywords(setting.Repository.PullRequest.CloseKeywords, setting.Repository.PullRequest.ReopenKeywords)
165
	})
166
}
167

168
func doNewKeywords(close, reopen []string) {
169
	issueCloseKeywordsPat = makeKeywordsPat(close)
170
	issueReopenKeywordsPat = makeKeywordsPat(reopen)
171
}
172

173
// getGiteaHostName returns a normalized string with the local host name, with no scheme or port information
174
func getGiteaHostName() string {
175
	giteaHostInit.Do(func() {
176
		if uapp, err := url.Parse(setting.AppURL); err == nil {
177
			giteaHost = strings.ToLower(uapp.Host)
178
			giteaIssuePullPattern = regexp.MustCompile(
179
				`(\s|^|\(|\[)` +
180
					regexp.QuoteMeta(strings.TrimSpace(setting.AppURL)) +
181
					`([0-9a-zA-Z-_\.]+/[0-9a-zA-Z-_\.]+)/` +
182
					`((?:issues)|(?:pulls))/([0-9]+)(?:\s|$|\)|\]|[:;,.?!]\s|[:;,.?!]$)`)
183
		} else {
184
			giteaHost = ""
185
			giteaIssuePullPattern = nil
186
		}
187
	})
188
	return giteaHost
189
}
190

191
// getGiteaIssuePullPattern
192
func getGiteaIssuePullPattern() *regexp.Regexp {
193
	getGiteaHostName()
194
	return giteaIssuePullPattern
195
}
196

197
// FindAllMentionsMarkdown matches mention patterns in given content and
198
// returns a list of found unvalidated user names **not including** the @ prefix.
199
func FindAllMentionsMarkdown(content string) []string {
200
	bcontent, _ := mdstripper.StripMarkdownBytes([]byte(content))
201
	locations := FindAllMentionsBytes(bcontent)
202
	mentions := make([]string, len(locations))
203
	for i, val := range locations {
204
		mentions[i] = string(bcontent[val.Start+1 : val.End])
205
	}
206
	return mentions
207
}
208

209
// FindAllMentionsBytes matches mention patterns in given content
210
// and returns a list of locations for the unvalidated user names, including the @ prefix.
211
func FindAllMentionsBytes(content []byte) []RefSpan {
212
	// Sadly we can't use FindAllSubmatchIndex because our pattern checks for starting and
213
	// trailing spaces (\s@mention,\s), so if we get two consecutive references, the space
214
	// from the second reference will be "eaten" by the first one:
215
	// ...\s@mention1\s@mention2\s...	--> ...`\s@mention1\s`, (not) `@mention2,\s...`
216
	ret := make([]RefSpan, 0, 5)
217
	pos := 0
218
	for {
219
		match := mentionPattern.FindSubmatchIndex(content[pos:])
220
		if match == nil {
221
			break
222
		}
223
		ret = append(ret, RefSpan{Start: match[2] + pos, End: match[3] + pos})
224
		notrail := spaceTrimmedPattern.FindSubmatchIndex(content[match[2]+pos : match[3]+pos])
225
		if notrail == nil {
226
			pos = match[3] + pos
227
		} else {
228
			pos = match[3] + pos + notrail[1] - notrail[3]
229
		}
230
	}
231
	return ret
232
}
233

234
// FindFirstMentionBytes matches the first mention in then given content
235
// and returns the location of the unvalidated user name, including the @ prefix.
236
func FindFirstMentionBytes(content []byte) (bool, RefSpan) {
237
	mention := mentionPattern.FindSubmatchIndex(content)
238
	if mention == nil {
239
		return false, RefSpan{}
240
	}
241
	return true, RefSpan{Start: mention[2], End: mention[3]}
242
}
243

244
// FindAllIssueReferencesMarkdown strips content from markdown markup
245
// and returns a list of unvalidated references found in it.
246
func FindAllIssueReferencesMarkdown(content string) []IssueReference {
247
	return rawToIssueReferenceList(findAllIssueReferencesMarkdown(content))
248
}
249

250
func findAllIssueReferencesMarkdown(content string) []*rawReference {
251
	bcontent, links := mdstripper.StripMarkdownBytes([]byte(content))
252
	return findAllIssueReferencesBytes(bcontent, links)
253
}
254

255
func convertFullHTMLReferencesToShortRefs(re *regexp.Regexp, contentBytes *[]byte) {
256
	// We will iterate through the content, rewrite and simplify full references.
257
	//
258
	// We want to transform something like:
259
	//
260
	// this is a https://ourgitea.com/git/owner/repo/issues/123456789, foo
261
	// https://ourgitea.com/git/owner/repo/pulls/123456789
262
	//
263
	// Into something like:
264
	//
265
	// this is a #123456789, foo
266
	// !123456789
267

268
	pos := 0
269
	for {
270
		// re looks for something like: (\s|^|\(|\[)https://ourgitea.com/git/(owner/repo)/(issues)/(123456789)(?:\s|$|\)|\]|[:;,.?!]\s|[:;,.?!]$)
271
		match := re.FindSubmatchIndex((*contentBytes)[pos:])
272
		if match == nil {
273
			break
274
		}
275
		// match is a bunch of indices into the content from pos onwards so
276
		// to simplify things let's just add pos to all of the indices in match
277
		for i := range match {
278
			match[i] += pos
279
		}
280

281
		// match[0]-match[1] is whole string
282
		// match[2]-match[3] is preamble
283

284
		// move the position to the end of the preamble
285
		pos = match[3]
286

287
		// match[4]-match[5] is owner/repo
288
		// now copy the owner/repo to end of the preamble
289
		endPos := pos + match[5] - match[4]
290
		copy((*contentBytes)[pos:endPos], (*contentBytes)[match[4]:match[5]])
291

292
		// move the current position to the end of the newly copied owner/repo
293
		pos = endPos
294

295
		// Now set the issue/pull marker:
296
		//
297
		// match[6]-match[7] == 'issues'
298
		(*contentBytes)[pos] = '#'
299
		if string((*contentBytes)[match[6]:match[7]]) == "pulls" {
300
			(*contentBytes)[pos] = '!'
301
		}
302
		pos++
303

304
		// Then add the issue/pull number
305
		//
306
		// match[8]-match[9] is the number
307
		endPos = pos + match[9] - match[8]
308
		copy((*contentBytes)[pos:endPos], (*contentBytes)[match[8]:match[9]])
309

310
		// Now copy what's left at the end of the string to the new end position
311
		copy((*contentBytes)[endPos:], (*contentBytes)[match[9]:])
312
		// now we reset the length
313

314
		// our new section has length endPos - match[3]
315
		// our old section has length match[9] - match[3]
316
		*contentBytes = (*contentBytes)[:len(*contentBytes)-match[9]+endPos]
317
		pos = endPos
318
	}
319
}
320

321
// FindAllIssueReferences returns a list of unvalidated references found in a string.
322
func FindAllIssueReferences(content string) []IssueReference {
323
	// Need to convert fully qualified html references to local system to #/! short codes
324
	contentBytes := []byte(content)
325
	if re := getGiteaIssuePullPattern(); re != nil {
326
		convertFullHTMLReferencesToShortRefs(re, &contentBytes)
327
	} else {
328
		log.Debug("No GiteaIssuePullPattern pattern")
329
	}
330
	return rawToIssueReferenceList(findAllIssueReferencesBytes(contentBytes, []string{}))
331
}
332

333
// FindRenderizableReferenceNumeric returns the first unvalidated reference found in a string.
334
func FindRenderizableReferenceNumeric(content string, prOnly, crossLinkOnly bool) (bool, *RenderizableReference) {
335
	var match []int
336
	if !crossLinkOnly {
337
		match = issueNumericPattern.FindStringSubmatchIndex(content)
338
	}
339
	if match == nil {
340
		if match = crossReferenceIssueNumericPattern.FindStringSubmatchIndex(content); match == nil {
341
			return false, nil
342
		}
343
	}
344
	r := getCrossReference(util.StringToReadOnlyBytes(content), match[2], match[3], false, prOnly)
345
	if r == nil {
346
		return false, nil
347
	}
348

349
	return true, &RenderizableReference{
350
		Issue:          r.issue,
351
		Owner:          r.owner,
352
		Name:           r.name,
353
		IsPull:         r.isPull,
354
		RefLocation:    r.refLocation,
355
		Action:         r.action,
356
		ActionLocation: r.actionLocation,
357
	}
358
}
359

360
// FindRenderizableCommitCrossReference returns the first unvalidated commit cross reference found in a string.
361
func FindRenderizableCommitCrossReference(content string) (bool, *RenderizableReference) {
362
	m := crossReferenceCommitPattern.FindStringSubmatchIndex(content)
363
	if len(m) < 8 {
364
		return false, nil
365
	}
366

367
	return true, &RenderizableReference{
368
		Owner:       content[m[2]:m[3]],
369
		Name:        content[m[4]:m[5]],
370
		CommitSha:   content[m[6]:m[7]],
371
		RefLocation: &RefSpan{Start: m[2], End: m[7]},
372
	}
373
}
374

375
// FindRenderizableReferenceRegexp returns the first regexp unvalidated references found in a string.
376
func FindRenderizableReferenceRegexp(content string, pattern *regexp.Regexp) (bool, *RenderizableReference) {
377
	match := pattern.FindStringSubmatchIndex(content)
378
	if len(match) < 4 {
379
		return false, nil
380
	}
381

382
	action, location := findActionKeywords([]byte(content), match[2])
383

384
	return true, &RenderizableReference{
385
		Issue:          content[match[2]:match[3]],
386
		RefLocation:    &RefSpan{Start: match[0], End: match[1]},
387
		Action:         action,
388
		ActionLocation: location,
389
		IsPull:         false,
390
	}
391
}
392

393
// FindRenderizableReferenceAlphanumeric returns the first alphanumeric unvalidated references found in a string.
394
func FindRenderizableReferenceAlphanumeric(content string) (bool, *RenderizableReference) {
395
	match := issueAlphanumericPattern.FindStringSubmatchIndex(content)
396
	if match == nil {
397
		return false, nil
398
	}
399

400
	action, location := findActionKeywords([]byte(content), match[2])
401

402
	return true, &RenderizableReference{
403
		Issue:          content[match[2]:match[3]],
404
		RefLocation:    &RefSpan{Start: match[2], End: match[3]},
405
		Action:         action,
406
		ActionLocation: location,
407
		IsPull:         false,
408
	}
409
}
410

411
// FindAllIssueReferencesBytes returns a list of unvalidated references found in a byte slice.
412
func findAllIssueReferencesBytes(content []byte, links []string) []*rawReference {
413
	ret := make([]*rawReference, 0, 10)
414
	pos := 0
415

416
	// Sadly we can't use FindAllSubmatchIndex because our pattern checks for starting and
417
	// trailing spaces (\s#ref,\s), so if we get two consecutive references, the space
418
	// from the second reference will be "eaten" by the first one:
419
	// ...\s#ref1\s#ref2\s...	--> ...`\s#ref1\s`, (not) `#ref2,\s...`
420
	for {
421
		match := issueNumericPattern.FindSubmatchIndex(content[pos:])
422
		if match == nil {
423
			break
424
		}
425
		if ref := getCrossReference(content, match[2]+pos, match[3]+pos, false, false); ref != nil {
426
			ret = append(ret, ref)
427
		}
428
		notrail := spaceTrimmedPattern.FindSubmatchIndex(content[match[2]+pos : match[3]+pos])
429
		if notrail == nil {
430
			pos = match[3] + pos
431
		} else {
432
			pos = match[3] + pos + notrail[1] - notrail[3]
433
		}
434
	}
435

436
	pos = 0
437

438
	for {
439
		match := crossReferenceIssueNumericPattern.FindSubmatchIndex(content[pos:])
440
		if match == nil {
441
			break
442
		}
443
		if ref := getCrossReference(content, match[2]+pos, match[3]+pos, false, false); ref != nil {
444
			ret = append(ret, ref)
445
		}
446
		notrail := spaceTrimmedPattern.FindSubmatchIndex(content[match[2]+pos : match[3]+pos])
447
		if notrail == nil {
448
			pos = match[3] + pos
449
		} else {
450
			pos = match[3] + pos + notrail[1] - notrail[3]
451
		}
452
	}
453

454
	localhost := getGiteaHostName()
455
	for _, link := range links {
456
		if u, err := url.Parse(link); err == nil {
457
			// Note: we're not attempting to match the URL scheme (http/https)
458
			host := strings.ToLower(u.Host)
459
			if host != "" && host != localhost {
460
				continue
461
			}
462
			parts := strings.Split(u.EscapedPath(), "/")
463
			// /user/repo/issues/3
464
			if len(parts) != 5 || parts[0] != "" {
465
				continue
466
			}
467
			var sep string
468
			if parts[3] == "issues" {
469
				sep = "#"
470
			} else if parts[3] == "pulls" {
471
				sep = "!"
472
			} else {
473
				continue
474
			}
475
			// Note: closing/reopening keywords not supported with URLs
476
			bytes := []byte(parts[1] + "/" + parts[2] + sep + parts[4])
477
			if ref := getCrossReference(bytes, 0, len(bytes), true, false); ref != nil {
478
				ref.refLocation = nil
479
				ret = append(ret, ref)
480
			}
481
		}
482
	}
483

484
	if len(ret) == 0 {
485
		return ret
486
	}
487

488
	pos = 0
489

490
	for {
491
		match := timeLogPattern.FindSubmatchIndex(content[pos:])
492
		if match == nil {
493
			break
494
		}
495

496
		timeLogEntry := string(content[match[2]+pos+1 : match[3]+pos])
497

498
		var f *rawReference
499
		for _, ref := range ret {
500
			if ref.refLocation != nil && ref.refLocation.End < match[2]+pos && (f == nil || f.refLocation.End < ref.refLocation.End) {
501
				f = ref
502
			}
503
		}
504

505
		pos = match[1] + pos
506

507
		if f == nil {
508
			f = ret[0]
509
		}
510

511
		if len(f.timeLog) == 0 {
512
			f.timeLog = timeLogEntry
513
		}
514
	}
515

516
	return ret
517
}
518

519
func getCrossReference(content []byte, start, end int, fromLink, prOnly bool) *rawReference {
520
	sep := bytes.IndexAny(content[start:end], "#!")
521
	if sep < 0 {
522
		return nil
523
	}
524
	isPull := content[start+sep] == '!'
525
	if prOnly && !isPull {
526
		return nil
527
	}
528
	repo := string(content[start : start+sep])
529
	issue := string(content[start+sep+1 : end])
530
	index, err := strconv.ParseInt(issue, 10, 64)
531
	if err != nil {
532
		return nil
533
	}
534
	if repo == "" {
535
		if fromLink {
536
			// Markdown links must specify owner/repo
537
			return nil
538
		}
539
		action, location := findActionKeywords(content, start)
540
		return &rawReference{
541
			index:          index,
542
			action:         action,
543
			issue:          issue,
544
			isPull:         isPull,
545
			refLocation:    &RefSpan{Start: start, End: end},
546
			actionLocation: location,
547
		}
548
	}
549
	parts := strings.Split(strings.ToLower(repo), "/")
550
	if len(parts) != 2 {
551
		return nil
552
	}
553
	owner, name := parts[0], parts[1]
554
	if !validNamePattern.MatchString(owner) || !validNamePattern.MatchString(name) {
555
		return nil
556
	}
557
	action, location := findActionKeywords(content, start)
558
	return &rawReference{
559
		index:          index,
560
		owner:          owner,
561
		name:           name,
562
		action:         action,
563
		issue:          issue,
564
		isPull:         isPull,
565
		refLocation:    &RefSpan{Start: start, End: end},
566
		actionLocation: location,
567
	}
568
}
569

570
func findActionKeywords(content []byte, start int) (XRefAction, *RefSpan) {
571
	newKeywords()
572
	var m []int
573
	if issueCloseKeywordsPat != nil {
574
		m = issueCloseKeywordsPat.FindSubmatchIndex(content[:start])
575
		if m != nil {
576
			return XRefActionCloses, &RefSpan{Start: m[2], End: m[3]}
577
		}
578
	}
579
	if issueReopenKeywordsPat != nil {
580
		m = issueReopenKeywordsPat.FindSubmatchIndex(content[:start])
581
		if m != nil {
582
			return XRefActionReopens, &RefSpan{Start: m[2], End: m[3]}
583
		}
584
	}
585
	return XRefActionNone, nil
586
}
587

588
// IsXrefActionable returns true if the xref action is actionable (i.e. produces a result when resolved)
589
func IsXrefActionable(ref *RenderizableReference, extTracker bool) bool {
590
	if extTracker {
591
		// External issues cannot be automatically closed
592
		return false
593
	}
594
	return ref.Action == XRefActionCloses || ref.Action == XRefActionReopens
595
}
596

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.