gitech
595 строк · 18.6 Кб
1// Copyright 2019 The Gitea Authors. All rights reserved.
2// SPDX-License-Identifier: MIT
3
4package references5
6import (7"bytes"8"net/url"9"regexp"10"strconv"11"strings"12"sync"13
14"code.gitea.io/gitea/modules/log"15"code.gitea.io/gitea/modules/markup/mdstripper"16"code.gitea.io/gitea/modules/setting"17
18"github.com/yuin/goldmark/util"19)
20
21var (22// validNamePattern performs only the most basic validation for user or repository names23// Repository name should contain only alphanumeric, dash ('-'), underscore ('_') and dot ('.') characters.24validNamePattern = regexp.MustCompile(`^[a-z0-9_.-]+$`)25
26// NOTE: All below regex matching do not perform any extra validation.27// Thus a link is produced even if the linked entity does not exist.28// While fast, this is also incorrect and lead to false positives.29// TODO: fix invalid linking issue30
31// mentionPattern matches all mentions in the form of "@user" or "@org/team"32mentionPattern = regexp.MustCompile(`(?:\s|^|\(|\[)(@[0-9a-zA-Z-_]+|@[0-9a-zA-Z-_]+\/?[0-9a-zA-Z-_]+|@[0-9a-zA-Z-_][0-9a-zA-Z-_.]+\/?[0-9a-zA-Z-_.]+[0-9a-zA-Z-_])(?:\s|[:,;.?!]\s|[:,;.?!]?$|\)|\])`)33// issueNumericPattern matches string that references to a numeric issue, e.g. #128734issueNumericPattern = regexp.MustCompile(`(?:\s|^|\(|\[|\'|\")([#!][0-9]+)(?:\s|$|\)|\]|\'|\"|[:;,.?!]\s|[:;,.?!]$)`)35// issueAlphanumericPattern matches string that references to an alphanumeric issue, e.g. ABC-123436issueAlphanumericPattern = regexp.MustCompile(`(?:\s|^|\(|\[|\"|\')([A-Z]{1,10}-[1-9][0-9]*)(?:\s|$|\)|\]|:|\.(\s|$)|\"|\')`)37// crossReferenceIssueNumericPattern matches string that references a numeric issue in a different repository38// e.g. org/repo#1234539crossReferenceIssueNumericPattern = regexp.MustCompile(`(?:\s|^|\(|\[)([0-9a-zA-Z-_\.]+/[0-9a-zA-Z-_\.]+[#!][0-9]+)(?:\s|$|\)|\]|[:;,.?!]\s|[:;,.?!]$)`)40// crossReferenceCommitPattern matches a string that references a commit in a different repository41// e.g. go-gitea/gitea@d8a994ef, go-gitea/gitea@d8a994ef243349f321568f9e36d5c3f444b99cae (7-40 characters)42crossReferenceCommitPattern = regexp.MustCompile(`(?:\s|^|\(|\[)([0-9a-zA-Z-_\.]+)/([0-9a-zA-Z-_\.]+)@([0-9a-f]{7,64})(?:\s|$|\)|\]|[:;,.?!]\s|[:;,.?!]$)`)43// spaceTrimmedPattern let's find the trailing space44spaceTrimmedPattern = regexp.MustCompile(`(?:.*[0-9a-zA-Z-_])\s`)45// timeLogPattern matches string for time tracking46timeLogPattern = regexp.MustCompile(`(?:\s|^|\(|\[)(@([0-9]+([\.,][0-9]+)?(w|d|m|h))+)(?:\s|$|\)|\]|[:;,.?!]\s|[:;,.?!]$)`)47
48issueCloseKeywordsPat, issueReopenKeywordsPat *regexp.Regexp49issueKeywordsOnce sync.Once50
51giteaHostInit sync.Once52giteaHost string53giteaIssuePullPattern *regexp.Regexp54
55actionStrings = []string{56"none",57"closes",58"reopens",59"neutered",60}61)
62
63// XRefAction represents the kind of effect a cross reference has once is resolved
64type XRefAction int6465
66const (67// XRefActionNone means the cross-reference is simply a comment68XRefActionNone XRefAction = iota // 069// XRefActionCloses means the cross-reference should close an issue if it is resolved70XRefActionCloses // 171// XRefActionReopens means the cross-reference should reopen an issue if it is resolved72XRefActionReopens // 273// XRefActionNeutered means the cross-reference will no longer affect the source74XRefActionNeutered // 375)
76
77func (a XRefAction) String() string {78return actionStrings[a]79}
80
81// IssueReference contains an unverified cross-reference to a local issue or pull request
82type IssueReference struct {83Index int6484Owner string85Name string86Action XRefAction
87TimeLog string88}
89
90// RenderizableReference contains an unverified cross-reference to with rendering information
91// The IsPull member means that a `!num` reference was used instead of `#num`.
92// This kind of reference is used to make pulls available when an external issue tracker
93// is used. Otherwise, `#` and `!` are completely interchangeable.
94type RenderizableReference struct {95Issue string96Owner string97Name string98CommitSha string99IsPull bool100RefLocation *RefSpan101Action XRefAction
102ActionLocation *RefSpan103}
104
105type rawReference struct {106index int64107owner string108name string109isPull bool110action XRefAction
111issue string112refLocation *RefSpan113actionLocation *RefSpan114timeLog string115}
116
117func rawToIssueReferenceList(reflist []*rawReference) []IssueReference {118refarr := make([]IssueReference, len(reflist))119for i, r := range reflist {120refarr[i] = IssueReference{121Index: r.index,122Owner: r.owner,123Name: r.name,124Action: r.action,125TimeLog: r.timeLog,126}127}128return refarr129}
130
131// RefSpan is the position where the reference was found within the parsed text
132type RefSpan struct {133Start int134End int135}
136
137func makeKeywordsPat(words []string) *regexp.Regexp {138acceptedWords := parseKeywords(words)139if len(acceptedWords) == 0 {140// Never match141return nil142}143return regexp.MustCompile(`(?i)(?:\s|^|\(|\[)(` + strings.Join(acceptedWords, `|`) + `):? $`)144}
145
146func parseKeywords(words []string) []string {147acceptedWords := make([]string, 0, 5)148wordPat := regexp.MustCompile(`^[\pL]+$`)149for _, word := range words {150word = strings.ToLower(strings.TrimSpace(word))151// Accept Unicode letter class runes (a-z, á, à, ä, )152if wordPat.MatchString(word) {153acceptedWords = append(acceptedWords, word)154} else {155log.Info("Invalid keyword: %s", word)156}157}158return acceptedWords159}
160
161func newKeywords() {162issueKeywordsOnce.Do(func() {163// Delay initialization until after the settings module is initialized164doNewKeywords(setting.Repository.PullRequest.CloseKeywords, setting.Repository.PullRequest.ReopenKeywords)165})166}
167
168func doNewKeywords(close, reopen []string) {169issueCloseKeywordsPat = makeKeywordsPat(close)170issueReopenKeywordsPat = makeKeywordsPat(reopen)171}
172
173// getGiteaHostName returns a normalized string with the local host name, with no scheme or port information
174func getGiteaHostName() string {175giteaHostInit.Do(func() {176if uapp, err := url.Parse(setting.AppURL); err == nil {177giteaHost = strings.ToLower(uapp.Host)178giteaIssuePullPattern = regexp.MustCompile(179`(\s|^|\(|\[)` +180regexp.QuoteMeta(strings.TrimSpace(setting.AppURL)) +181`([0-9a-zA-Z-_\.]+/[0-9a-zA-Z-_\.]+)/` +182`((?:issues)|(?:pulls))/([0-9]+)(?:\s|$|\)|\]|[:;,.?!]\s|[:;,.?!]$)`)183} else {184giteaHost = ""185giteaIssuePullPattern = nil186}187})188return giteaHost189}
190
191// getGiteaIssuePullPattern
192func getGiteaIssuePullPattern() *regexp.Regexp {193getGiteaHostName()194return giteaIssuePullPattern195}
196
197// FindAllMentionsMarkdown matches mention patterns in given content and
198// returns a list of found unvalidated user names **not including** the @ prefix.
199func FindAllMentionsMarkdown(content string) []string {200bcontent, _ := mdstripper.StripMarkdownBytes([]byte(content))201locations := FindAllMentionsBytes(bcontent)202mentions := make([]string, len(locations))203for i, val := range locations {204mentions[i] = string(bcontent[val.Start+1 : val.End])205}206return mentions207}
208
209// FindAllMentionsBytes matches mention patterns in given content
210// and returns a list of locations for the unvalidated user names, including the @ prefix.
211func FindAllMentionsBytes(content []byte) []RefSpan {212// Sadly we can't use FindAllSubmatchIndex because our pattern checks for starting and213// trailing spaces (\s@mention,\s), so if we get two consecutive references, the space214// from the second reference will be "eaten" by the first one:215// ...\s@mention1\s@mention2\s... --> ...`\s@mention1\s`, (not) `@mention2,\s...`216ret := make([]RefSpan, 0, 5)217pos := 0218for {219match := mentionPattern.FindSubmatchIndex(content[pos:])220if match == nil {221break222}223ret = append(ret, RefSpan{Start: match[2] + pos, End: match[3] + pos})224notrail := spaceTrimmedPattern.FindSubmatchIndex(content[match[2]+pos : match[3]+pos])225if notrail == nil {226pos = match[3] + pos227} else {228pos = match[3] + pos + notrail[1] - notrail[3]229}230}231return ret232}
233
234// FindFirstMentionBytes matches the first mention in then given content
235// and returns the location of the unvalidated user name, including the @ prefix.
236func FindFirstMentionBytes(content []byte) (bool, RefSpan) {237mention := mentionPattern.FindSubmatchIndex(content)238if mention == nil {239return false, RefSpan{}240}241return true, RefSpan{Start: mention[2], End: mention[3]}242}
243
244// FindAllIssueReferencesMarkdown strips content from markdown markup
245// and returns a list of unvalidated references found in it.
246func FindAllIssueReferencesMarkdown(content string) []IssueReference {247return rawToIssueReferenceList(findAllIssueReferencesMarkdown(content))248}
249
250func findAllIssueReferencesMarkdown(content string) []*rawReference {251bcontent, links := mdstripper.StripMarkdownBytes([]byte(content))252return findAllIssueReferencesBytes(bcontent, links)253}
254
255func convertFullHTMLReferencesToShortRefs(re *regexp.Regexp, contentBytes *[]byte) {256// We will iterate through the content, rewrite and simplify full references.257//258// We want to transform something like:259//260// this is a https://ourgitea.com/git/owner/repo/issues/123456789, foo261// https://ourgitea.com/git/owner/repo/pulls/123456789262//263// Into something like:264//265// this is a #123456789, foo266// !123456789267
268pos := 0269for {270// re looks for something like: (\s|^|\(|\[)https://ourgitea.com/git/(owner/repo)/(issues)/(123456789)(?:\s|$|\)|\]|[:;,.?!]\s|[:;,.?!]$)271match := re.FindSubmatchIndex((*contentBytes)[pos:])272if match == nil {273break274}275// match is a bunch of indices into the content from pos onwards so276// to simplify things let's just add pos to all of the indices in match277for i := range match {278match[i] += pos279}280
281// match[0]-match[1] is whole string282// match[2]-match[3] is preamble283
284// move the position to the end of the preamble285pos = match[3]286
287// match[4]-match[5] is owner/repo288// now copy the owner/repo to end of the preamble289endPos := pos + match[5] - match[4]290copy((*contentBytes)[pos:endPos], (*contentBytes)[match[4]:match[5]])291
292// move the current position to the end of the newly copied owner/repo293pos = endPos294
295// Now set the issue/pull marker:296//297// match[6]-match[7] == 'issues'298(*contentBytes)[pos] = '#'299if string((*contentBytes)[match[6]:match[7]]) == "pulls" {300(*contentBytes)[pos] = '!'301}302pos++303
304// Then add the issue/pull number305//306// match[8]-match[9] is the number307endPos = pos + match[9] - match[8]308copy((*contentBytes)[pos:endPos], (*contentBytes)[match[8]:match[9]])309
310// Now copy what's left at the end of the string to the new end position311copy((*contentBytes)[endPos:], (*contentBytes)[match[9]:])312// now we reset the length313
314// our new section has length endPos - match[3]315// our old section has length match[9] - match[3]316*contentBytes = (*contentBytes)[:len(*contentBytes)-match[9]+endPos]317pos = endPos318}319}
320
321// FindAllIssueReferences returns a list of unvalidated references found in a string.
322func FindAllIssueReferences(content string) []IssueReference {323// Need to convert fully qualified html references to local system to #/! short codes324contentBytes := []byte(content)325if re := getGiteaIssuePullPattern(); re != nil {326convertFullHTMLReferencesToShortRefs(re, &contentBytes)327} else {328log.Debug("No GiteaIssuePullPattern pattern")329}330return rawToIssueReferenceList(findAllIssueReferencesBytes(contentBytes, []string{}))331}
332
333// FindRenderizableReferenceNumeric returns the first unvalidated reference found in a string.
334func FindRenderizableReferenceNumeric(content string, prOnly, crossLinkOnly bool) (bool, *RenderizableReference) {335var match []int336if !crossLinkOnly {337match = issueNumericPattern.FindStringSubmatchIndex(content)338}339if match == nil {340if match = crossReferenceIssueNumericPattern.FindStringSubmatchIndex(content); match == nil {341return false, nil342}343}344r := getCrossReference(util.StringToReadOnlyBytes(content), match[2], match[3], false, prOnly)345if r == nil {346return false, nil347}348
349return true, &RenderizableReference{350Issue: r.issue,351Owner: r.owner,352Name: r.name,353IsPull: r.isPull,354RefLocation: r.refLocation,355Action: r.action,356ActionLocation: r.actionLocation,357}358}
359
360// FindRenderizableCommitCrossReference returns the first unvalidated commit cross reference found in a string.
361func FindRenderizableCommitCrossReference(content string) (bool, *RenderizableReference) {362m := crossReferenceCommitPattern.FindStringSubmatchIndex(content)363if len(m) < 8 {364return false, nil365}366
367return true, &RenderizableReference{368Owner: content[m[2]:m[3]],369Name: content[m[4]:m[5]],370CommitSha: content[m[6]:m[7]],371RefLocation: &RefSpan{Start: m[2], End: m[7]},372}373}
374
375// FindRenderizableReferenceRegexp returns the first regexp unvalidated references found in a string.
376func FindRenderizableReferenceRegexp(content string, pattern *regexp.Regexp) (bool, *RenderizableReference) {377match := pattern.FindStringSubmatchIndex(content)378if len(match) < 4 {379return false, nil380}381
382action, location := findActionKeywords([]byte(content), match[2])383
384return true, &RenderizableReference{385Issue: content[match[2]:match[3]],386RefLocation: &RefSpan{Start: match[0], End: match[1]},387Action: action,388ActionLocation: location,389IsPull: false,390}391}
392
393// FindRenderizableReferenceAlphanumeric returns the first alphanumeric unvalidated references found in a string.
394func FindRenderizableReferenceAlphanumeric(content string) (bool, *RenderizableReference) {395match := issueAlphanumericPattern.FindStringSubmatchIndex(content)396if match == nil {397return false, nil398}399
400action, location := findActionKeywords([]byte(content), match[2])401
402return true, &RenderizableReference{403Issue: content[match[2]:match[3]],404RefLocation: &RefSpan{Start: match[2], End: match[3]},405Action: action,406ActionLocation: location,407IsPull: false,408}409}
410
411// FindAllIssueReferencesBytes returns a list of unvalidated references found in a byte slice.
412func findAllIssueReferencesBytes(content []byte, links []string) []*rawReference {413ret := make([]*rawReference, 0, 10)414pos := 0415
416// Sadly we can't use FindAllSubmatchIndex because our pattern checks for starting and417// trailing spaces (\s#ref,\s), so if we get two consecutive references, the space418// from the second reference will be "eaten" by the first one:419// ...\s#ref1\s#ref2\s... --> ...`\s#ref1\s`, (not) `#ref2,\s...`420for {421match := issueNumericPattern.FindSubmatchIndex(content[pos:])422if match == nil {423break424}425if ref := getCrossReference(content, match[2]+pos, match[3]+pos, false, false); ref != nil {426ret = append(ret, ref)427}428notrail := spaceTrimmedPattern.FindSubmatchIndex(content[match[2]+pos : match[3]+pos])429if notrail == nil {430pos = match[3] + pos431} else {432pos = match[3] + pos + notrail[1] - notrail[3]433}434}435
436pos = 0437
438for {439match := crossReferenceIssueNumericPattern.FindSubmatchIndex(content[pos:])440if match == nil {441break442}443if ref := getCrossReference(content, match[2]+pos, match[3]+pos, false, false); ref != nil {444ret = append(ret, ref)445}446notrail := spaceTrimmedPattern.FindSubmatchIndex(content[match[2]+pos : match[3]+pos])447if notrail == nil {448pos = match[3] + pos449} else {450pos = match[3] + pos + notrail[1] - notrail[3]451}452}453
454localhost := getGiteaHostName()455for _, link := range links {456if u, err := url.Parse(link); err == nil {457// Note: we're not attempting to match the URL scheme (http/https)458host := strings.ToLower(u.Host)459if host != "" && host != localhost {460continue461}462parts := strings.Split(u.EscapedPath(), "/")463// /user/repo/issues/3464if len(parts) != 5 || parts[0] != "" {465continue466}467var sep string468if parts[3] == "issues" {469sep = "#"470} else if parts[3] == "pulls" {471sep = "!"472} else {473continue474}475// Note: closing/reopening keywords not supported with URLs476bytes := []byte(parts[1] + "/" + parts[2] + sep + parts[4])477if ref := getCrossReference(bytes, 0, len(bytes), true, false); ref != nil {478ref.refLocation = nil479ret = append(ret, ref)480}481}482}483
484if len(ret) == 0 {485return ret486}487
488pos = 0489
490for {491match := timeLogPattern.FindSubmatchIndex(content[pos:])492if match == nil {493break494}495
496timeLogEntry := string(content[match[2]+pos+1 : match[3]+pos])497
498var f *rawReference499for _, ref := range ret {500if ref.refLocation != nil && ref.refLocation.End < match[2]+pos && (f == nil || f.refLocation.End < ref.refLocation.End) {501f = ref502}503}504
505pos = match[1] + pos506
507if f == nil {508f = ret[0]509}510
511if len(f.timeLog) == 0 {512f.timeLog = timeLogEntry513}514}515
516return ret517}
518
519func getCrossReference(content []byte, start, end int, fromLink, prOnly bool) *rawReference {520sep := bytes.IndexAny(content[start:end], "#!")521if sep < 0 {522return nil523}524isPull := content[start+sep] == '!'525if prOnly && !isPull {526return nil527}528repo := string(content[start : start+sep])529issue := string(content[start+sep+1 : end])530index, err := strconv.ParseInt(issue, 10, 64)531if err != nil {532return nil533}534if repo == "" {535if fromLink {536// Markdown links must specify owner/repo537return nil538}539action, location := findActionKeywords(content, start)540return &rawReference{541index: index,542action: action,543issue: issue,544isPull: isPull,545refLocation: &RefSpan{Start: start, End: end},546actionLocation: location,547}548}549parts := strings.Split(strings.ToLower(repo), "/")550if len(parts) != 2 {551return nil552}553owner, name := parts[0], parts[1]554if !validNamePattern.MatchString(owner) || !validNamePattern.MatchString(name) {555return nil556}557action, location := findActionKeywords(content, start)558return &rawReference{559index: index,560owner: owner,561name: name,562action: action,563issue: issue,564isPull: isPull,565refLocation: &RefSpan{Start: start, End: end},566actionLocation: location,567}568}
569
570func findActionKeywords(content []byte, start int) (XRefAction, *RefSpan) {571newKeywords()572var m []int573if issueCloseKeywordsPat != nil {574m = issueCloseKeywordsPat.FindSubmatchIndex(content[:start])575if m != nil {576return XRefActionCloses, &RefSpan{Start: m[2], End: m[3]}577}578}579if issueReopenKeywordsPat != nil {580m = issueReopenKeywordsPat.FindSubmatchIndex(content[:start])581if m != nil {582return XRefActionReopens, &RefSpan{Start: m[2], End: m[3]}583}584}585return XRefActionNone, nil586}
587
588// IsXrefActionable returns true if the xref action is actionable (i.e. produces a result when resolved)
589func IsXrefActionable(ref *RenderizableReference, extTracker bool) bool {590if extTracker {591// External issues cannot be automatically closed592return false593}594return ref.Action == XRefActionCloses || ref.Action == XRefActionReopens595}
596