podman
1// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
2
3// Copyright 2016 The Go Authors. All rights reserved.
4// Use of this source code is governed by a BSD-style
5// license that can be found in the LICENSE file.
6
7//go:build go1.10
8
9// Package idna implements IDNA2008 using the compatibility processing
10// defined by UTS (Unicode Technical Standard) #46, which defines a standard to
11// deal with the transition from IDNA2003.
12//
13// IDNA2008 (Internationalized Domain Names for Applications), is defined in RFC
14// 5890, RFC 5891, RFC 5892, RFC 5893 and RFC 5894.
15// UTS #46 is defined in https://www.unicode.org/reports/tr46.
16// See https://unicode.org/cldr/utility/idna.jsp for a visualization of the
17// differences between these two standards.
18package idna // import "golang.org/x/net/idna"19
20import (21"fmt"22"strings"23"unicode/utf8"24
25"golang.org/x/text/secure/bidirule"26"golang.org/x/text/unicode/bidi"27"golang.org/x/text/unicode/norm"28)
29
30// NOTE: Unlike common practice in Go APIs, the functions will return a
31// sanitized domain name in case of errors. Browsers sometimes use a partially
32// evaluated string as lookup.
33// TODO: the current error handling is, in my opinion, the least opinionated.
34// Other strategies are also viable, though:
35// Option 1) Return an empty string in case of error, but allow the user to
36// specify explicitly which errors to ignore.
37// Option 2) Return the partially evaluated string if it is itself a valid
38// string, otherwise return the empty string in case of error.
39// Option 3) Option 1 and 2.
40// Option 4) Always return an empty string for now and implement Option 1 as
41// needed, and document that the return string may not be empty in case of
42// error in the future.
43// I think Option 1 is best, but it is quite opinionated.
44
45// ToASCII is a wrapper for Punycode.ToASCII.
46func ToASCII(s string) (string, error) {47return Punycode.process(s, true)48}
49
50// ToUnicode is a wrapper for Punycode.ToUnicode.
51func ToUnicode(s string) (string, error) {52return Punycode.process(s, false)53}
54
55// An Option configures a Profile at creation time.
56type Option func(*options)57
58// Transitional sets a Profile to use the Transitional mapping as defined in UTS
59// #46. This will cause, for example, "ß" to be mapped to "ss". Using the
60// transitional mapping provides a compromise between IDNA2003 and IDNA2008
61// compatibility. It is used by some browsers when resolving domain names. This
62// option is only meaningful if combined with MapForLookup.
63func Transitional(transitional bool) Option {64return func(o *options) { o.transitional = transitional }65}
66
67// VerifyDNSLength sets whether a Profile should fail if any of the IDN parts
68// are longer than allowed by the RFC.
69//
70// This option corresponds to the VerifyDnsLength flag in UTS #46.
71func VerifyDNSLength(verify bool) Option {72return func(o *options) { o.verifyDNSLength = verify }73}
74
75// RemoveLeadingDots removes leading label separators. Leading runes that map to
76// dots, such as U+3002 IDEOGRAPHIC FULL STOP, are removed as well.
77func RemoveLeadingDots(remove bool) Option {78return func(o *options) { o.removeLeadingDots = remove }79}
80
81// ValidateLabels sets whether to check the mandatory label validation criteria
82// as defined in Section 5.4 of RFC 5891. This includes testing for correct use
83// of hyphens ('-'), normalization, validity of runes, and the context rules.
84// In particular, ValidateLabels also sets the CheckHyphens and CheckJoiners flags
85// in UTS #46.
86func ValidateLabels(enable bool) Option {87return func(o *options) {88// Don't override existing mappings, but set one that at least checks89// normalization if it is not set.90if o.mapping == nil && enable {91o.mapping = normalize92}93o.trie = trie94o.checkJoiners = enable95o.checkHyphens = enable96if enable {97o.fromPuny = validateFromPunycode98} else {99o.fromPuny = nil100}101}102}
103
104// CheckHyphens sets whether to check for correct use of hyphens ('-') in
105// labels. Most web browsers do not have this option set, since labels such as
106// "r3---sn-apo3qvuoxuxbt-j5pe" are in common use.
107//
108// This option corresponds to the CheckHyphens flag in UTS #46.
109func CheckHyphens(enable bool) Option {110return func(o *options) { o.checkHyphens = enable }111}
112
113// CheckJoiners sets whether to check the ContextJ rules as defined in Appendix
114// A of RFC 5892, concerning the use of joiner runes.
115//
116// This option corresponds to the CheckJoiners flag in UTS #46.
117func CheckJoiners(enable bool) Option {118return func(o *options) {119o.trie = trie120o.checkJoiners = enable121}122}
123
124// StrictDomainName limits the set of permissible ASCII characters to those
125// allowed in domain names as defined in RFC 1034 (A-Z, a-z, 0-9 and the
126// hyphen). This is set by default for MapForLookup and ValidateForRegistration,
127// but is only useful if ValidateLabels is set.
128//
129// This option is useful, for instance, for browsers that allow characters
130// outside this range, for example a '_' (U+005F LOW LINE). See
131// http://www.rfc-editor.org/std/std3.txt for more details.
132//
133// This option corresponds to the UseSTD3ASCIIRules flag in UTS #46.
134func StrictDomainName(use bool) Option {135return func(o *options) { o.useSTD3Rules = use }136}
137
138// NOTE: the following options pull in tables. The tables should not be linked
139// in as long as the options are not used.
140
141// BidiRule enables the Bidi rule as defined in RFC 5893. Any application
142// that relies on proper validation of labels should include this rule.
143//
144// This option corresponds to the CheckBidi flag in UTS #46.
145func BidiRule() Option {146return func(o *options) { o.bidirule = bidirule.ValidString }147}
148
149// ValidateForRegistration sets validation options to verify that a given IDN is
150// properly formatted for registration as defined by Section 4 of RFC 5891.
151func ValidateForRegistration() Option {152return func(o *options) {153o.mapping = validateRegistration154StrictDomainName(true)(o)155ValidateLabels(true)(o)156VerifyDNSLength(true)(o)157BidiRule()(o)158}159}
160
161// MapForLookup sets validation and mapping options such that a given IDN is
162// transformed for domain name lookup according to the requirements set out in
163// Section 5 of RFC 5891. The mappings follow the recommendations of RFC 5894,
164// RFC 5895 and UTS 46. It does not add the Bidi Rule. Use the BidiRule option
165// to add this check.
166//
167// The mappings include normalization and mapping case, width and other
168// compatibility mappings.
169func MapForLookup() Option {170return func(o *options) {171o.mapping = validateAndMap172StrictDomainName(true)(o)173ValidateLabels(true)(o)174}175}
176
177type options struct {178transitional bool179useSTD3Rules bool180checkHyphens bool181checkJoiners bool182verifyDNSLength bool183removeLeadingDots bool184
185trie *idnaTrie186
187// fromPuny calls validation rules when converting A-labels to U-labels.188fromPuny func(p *Profile, s string) error189
190// mapping implements a validation and mapping step as defined in RFC 5895191// or UTS 46, tailored to, for example, domain registration or lookup.192mapping func(p *Profile, s string) (mapped string, isBidi bool, err error)193
194// bidirule, if specified, checks whether s conforms to the Bidi Rule195// defined in RFC 5893.196bidirule func(s string) bool197}
198
199// A Profile defines the configuration of an IDNA mapper.
200type Profile struct {201options
202}
203
204func apply(o *options, opts []Option) {205for _, f := range opts {206f(o)207}208}
209
210// New creates a new Profile.
211//
212// With no options, the returned Profile is the most permissive and equals the
213// Punycode Profile. Options can be passed to further restrict the Profile. The
214// MapForLookup and ValidateForRegistration options set a collection of options,
215// for lookup and registration purposes respectively, which can be tailored by
216// adding more fine-grained options, where later options override earlier
217// options.
218func New(o ...Option) *Profile {219p := &Profile{}220apply(&p.options, o)221return p222}
223
224// ToASCII converts a domain or domain label to its ASCII form. For example,
225// ToASCII("bücher.example.com") is "xn--bcher-kva.example.com", and
226// ToASCII("golang") is "golang". If an error is encountered it will return
227// an error and a (partially) processed result.
228func (p *Profile) ToASCII(s string) (string, error) {229return p.process(s, true)230}
231
232// ToUnicode converts a domain or domain label to its Unicode form. For example,
233// ToUnicode("xn--bcher-kva.example.com") is "bücher.example.com", and
234// ToUnicode("golang") is "golang". If an error is encountered it will return
235// an error and a (partially) processed result.
236func (p *Profile) ToUnicode(s string) (string, error) {237pp := *p238pp.transitional = false239return pp.process(s, false)240}
241
242// String reports a string with a description of the profile for debugging
243// purposes. The string format may change with different versions.
244func (p *Profile) String() string {245s := ""246if p.transitional {247s = "Transitional"248} else {249s = "NonTransitional"250}251if p.useSTD3Rules {252s += ":UseSTD3Rules"253}254if p.checkHyphens {255s += ":CheckHyphens"256}257if p.checkJoiners {258s += ":CheckJoiners"259}260if p.verifyDNSLength {261s += ":VerifyDNSLength"262}263return s264}
265
266var (267// Punycode is a Profile that does raw punycode processing with a minimum268// of validation.269Punycode *Profile = punycode270
271// Lookup is the recommended profile for looking up domain names, according272// to Section 5 of RFC 5891. The exact configuration of this profile may273// change over time.274Lookup *Profile = lookup275
276// Display is the recommended profile for displaying domain names.277// The configuration of this profile may change over time.278Display *Profile = display279
280// Registration is the recommended profile for checking whether a given281// IDN is valid for registration, according to Section 4 of RFC 5891.282Registration *Profile = registration283
284punycode = &Profile{}285lookup = &Profile{options{286transitional: transitionalLookup,287useSTD3Rules: true,288checkHyphens: true,289checkJoiners: true,290trie: trie,291fromPuny: validateFromPunycode,292mapping: validateAndMap,293bidirule: bidirule.ValidString,294}}295display = &Profile{options{296useSTD3Rules: true,297checkHyphens: true,298checkJoiners: true,299trie: trie,300fromPuny: validateFromPunycode,301mapping: validateAndMap,302bidirule: bidirule.ValidString,303}}304registration = &Profile{options{305useSTD3Rules: true,306verifyDNSLength: true,307checkHyphens: true,308checkJoiners: true,309trie: trie,310fromPuny: validateFromPunycode,311mapping: validateRegistration,312bidirule: bidirule.ValidString,313}}314
315// TODO: profiles316// Register: recommended for approving domain names: don't do any mappings317// but rather reject on invalid input. Bundle or block deviation characters.318)
319
320type labelError struct{ label, code_ string }321
322func (e labelError) code() string { return e.code_ }323func (e labelError) Error() string {324return fmt.Sprintf("idna: invalid label %q", e.label)325}
326
327type runeError rune328
329func (e runeError) code() string { return "P1" }330func (e runeError) Error() string {331return fmt.Sprintf("idna: disallowed rune %U", e)332}
333
334// process implements the algorithm described in section 4 of UTS #46,
335// see https://www.unicode.org/reports/tr46.
336func (p *Profile) process(s string, toASCII bool) (string, error) {337var err error338var isBidi bool339if p.mapping != nil {340s, isBidi, err = p.mapping(p, s)341}342// Remove leading empty labels.343if p.removeLeadingDots {344for ; len(s) > 0 && s[0] == '.'; s = s[1:] {345}346}347// TODO: allow for a quick check of the tables data.348// It seems like we should only create this error on ToASCII, but the349// UTS 46 conformance tests suggests we should always check this.350if err == nil && p.verifyDNSLength && s == "" {351err = &labelError{s, "A4"}352}353labels := labelIter{orig: s}354for ; !labels.done(); labels.next() {355label := labels.label()356if label == "" {357// Empty labels are not okay. The label iterator skips the last358// label if it is empty.359if err == nil && p.verifyDNSLength {360err = &labelError{s, "A4"}361}362continue363}364if strings.HasPrefix(label, acePrefix) {365u, err2 := decode(label[len(acePrefix):])366if err2 != nil {367if err == nil {368err = err2369}370// Spec says keep the old label.371continue372}373isBidi = isBidi || bidirule.DirectionString(u) != bidi.LeftToRight374labels.set(u)375if err == nil && p.fromPuny != nil {376err = p.fromPuny(p, u)377}378if err == nil {379// This should be called on NonTransitional, according to the380// spec, but that currently does not have any effect. Use the381// original profile to preserve options.382err = p.validateLabel(u)383}384} else if err == nil {385err = p.validateLabel(label)386}387}388if isBidi && p.bidirule != nil && err == nil {389for labels.reset(); !labels.done(); labels.next() {390if !p.bidirule(labels.label()) {391err = &labelError{s, "B"}392break393}394}395}396if toASCII {397for labels.reset(); !labels.done(); labels.next() {398label := labels.label()399if !ascii(label) {400a, err2 := encode(acePrefix, label)401if err == nil {402err = err2403}404label = a405labels.set(a)406}407n := len(label)408if p.verifyDNSLength && err == nil && (n == 0 || n > 63) {409err = &labelError{label, "A4"}410}411}412}413s = labels.result()414if toASCII && p.verifyDNSLength && err == nil {415// Compute the length of the domain name minus the root label and its dot.416n := len(s)417if n > 0 && s[n-1] == '.' {418n--419}420if len(s) < 1 || n > 253 {421err = &labelError{s, "A4"}422}423}424return s, err425}
426
427func normalize(p *Profile, s string) (mapped string, isBidi bool, err error) {428// TODO: consider first doing a quick check to see if any of these checks429// need to be done. This will make it slower in the general case, but430// faster in the common case.431mapped = norm.NFC.String(s)432isBidi = bidirule.DirectionString(mapped) == bidi.RightToLeft433return mapped, isBidi, nil434}
435
436func validateRegistration(p *Profile, s string) (idem string, bidi bool, err error) {437// TODO: filter need for normalization in loop below.438if !norm.NFC.IsNormalString(s) {439return s, false, &labelError{s, "V1"}440}441for i := 0; i < len(s); {442v, sz := trie.lookupString(s[i:])443if sz == 0 {444return s, bidi, runeError(utf8.RuneError)445}446bidi = bidi || info(v).isBidi(s[i:])447// Copy bytes not copied so far.448switch p.simplify(info(v).category()) {449// TODO: handle the NV8 defined in the Unicode idna data set to allow450// for strict conformance to IDNA2008.451case valid, deviation:452case disallowed, mapped, unknown, ignored:453r, _ := utf8.DecodeRuneInString(s[i:])454return s, bidi, runeError(r)455}456i += sz457}458return s, bidi, nil459}
460
461func (c info) isBidi(s string) bool {462if !c.isMapped() {463return c&attributesMask == rtl464}465// TODO: also store bidi info for mapped data. This is possible, but a bit466// cumbersome and not for the common case.467p, _ := bidi.LookupString(s)468switch p.Class() {469case bidi.R, bidi.AL, bidi.AN:470return true471}472return false473}
474
475func validateAndMap(p *Profile, s string) (vm string, bidi bool, err error) {476var (477b []byte478k int479)480// combinedInfoBits contains the or-ed bits of all runes. We use this481// to derive the mayNeedNorm bit later. This may trigger normalization482// overeagerly, but it will not do so in the common case. The end result483// is another 10% saving on BenchmarkProfile for the common case.484var combinedInfoBits info485for i := 0; i < len(s); {486v, sz := trie.lookupString(s[i:])487if sz == 0 {488b = append(b, s[k:i]...)489b = append(b, "\ufffd"...)490k = len(s)491if err == nil {492err = runeError(utf8.RuneError)493}494break495}496combinedInfoBits |= info(v)497bidi = bidi || info(v).isBidi(s[i:])498start := i499i += sz500// Copy bytes not copied so far.501switch p.simplify(info(v).category()) {502case valid:503continue504case disallowed:505if err == nil {506r, _ := utf8.DecodeRuneInString(s[start:])507err = runeError(r)508}509continue510case mapped, deviation:511b = append(b, s[k:start]...)512b = info(v).appendMapping(b, s[start:i])513case ignored:514b = append(b, s[k:start]...)515// drop the rune516case unknown:517b = append(b, s[k:start]...)518b = append(b, "\ufffd"...)519}520k = i521}522if k == 0 {523// No changes so far.524if combinedInfoBits&mayNeedNorm != 0 {525s = norm.NFC.String(s)526}527} else {528b = append(b, s[k:]...)529if norm.NFC.QuickSpan(b) != len(b) {530b = norm.NFC.Bytes(b)531}532// TODO: the punycode converters require strings as input.533s = string(b)534}535return s, bidi, err536}
537
538// A labelIter allows iterating over domain name labels.
539type labelIter struct {540orig string541slice []string542curStart int543curEnd int544i int545}
546
547func (l *labelIter) reset() {548l.curStart = 0549l.curEnd = 0550l.i = 0551}
552
553func (l *labelIter) done() bool {554return l.curStart >= len(l.orig)555}
556
557func (l *labelIter) result() string {558if l.slice != nil {559return strings.Join(l.slice, ".")560}561return l.orig562}
563
564func (l *labelIter) label() string {565if l.slice != nil {566return l.slice[l.i]567}568p := strings.IndexByte(l.orig[l.curStart:], '.')569l.curEnd = l.curStart + p570if p == -1 {571l.curEnd = len(l.orig)572}573return l.orig[l.curStart:l.curEnd]574}
575
576// next sets the value to the next label. It skips the last label if it is empty.
577func (l *labelIter) next() {578l.i++579if l.slice != nil {580if l.i >= len(l.slice) || l.i == len(l.slice)-1 && l.slice[l.i] == "" {581l.curStart = len(l.orig)582}583} else {584l.curStart = l.curEnd + 1585if l.curStart == len(l.orig)-1 && l.orig[l.curStart] == '.' {586l.curStart = len(l.orig)587}588}589}
590
591func (l *labelIter) set(s string) {592if l.slice == nil {593l.slice = strings.Split(l.orig, ".")594}595l.slice[l.i] = s596}
597
598// acePrefix is the ASCII Compatible Encoding prefix.
599const acePrefix = "xn--"600
601func (p *Profile) simplify(cat category) category {602switch cat {603case disallowedSTD3Mapped:604if p.useSTD3Rules {605cat = disallowed606} else {607cat = mapped608}609case disallowedSTD3Valid:610if p.useSTD3Rules {611cat = disallowed612} else {613cat = valid614}615case deviation:616if !p.transitional {617cat = valid618}619case validNV8, validXV8:620// TODO: handle V2008621cat = valid622}623return cat624}
625
626func validateFromPunycode(p *Profile, s string) error {627if !norm.NFC.IsNormalString(s) {628return &labelError{s, "V1"}629}630// TODO: detect whether string may have to be normalized in the following631// loop.632for i := 0; i < len(s); {633v, sz := trie.lookupString(s[i:])634if sz == 0 {635return runeError(utf8.RuneError)636}637if c := p.simplify(info(v).category()); c != valid && c != deviation {638return &labelError{s, "V6"}639}640i += sz641}642return nil643}
644
645const (646zwnj = "\u200c"647zwj = "\u200d"648)
649
650type joinState int8651
652const (653stateStart joinState = iota654stateVirama
655stateBefore
656stateBeforeVirama
657stateAfter
658stateFAIL
659)
660
661var joinStates = [][numJoinTypes]joinState{662stateStart: {663joiningL: stateBefore,664joiningD: stateBefore,665joinZWNJ: stateFAIL,666joinZWJ: stateFAIL,667joinVirama: stateVirama,668},669stateVirama: {670joiningL: stateBefore,671joiningD: stateBefore,672},673stateBefore: {674joiningL: stateBefore,675joiningD: stateBefore,676joiningT: stateBefore,677joinZWNJ: stateAfter,678joinZWJ: stateFAIL,679joinVirama: stateBeforeVirama,680},681stateBeforeVirama: {682joiningL: stateBefore,683joiningD: stateBefore,684joiningT: stateBefore,685},686stateAfter: {687joiningL: stateFAIL,688joiningD: stateBefore,689joiningT: stateAfter,690joiningR: stateStart,691joinZWNJ: stateFAIL,692joinZWJ: stateFAIL,693joinVirama: stateAfter, // no-op as we can't accept joiners here694},695stateFAIL: {6960: stateFAIL,697joiningL: stateFAIL,698joiningD: stateFAIL,699joiningT: stateFAIL,700joiningR: stateFAIL,701joinZWNJ: stateFAIL,702joinZWJ: stateFAIL,703joinVirama: stateFAIL,704},705}
706
707// validateLabel validates the criteria from Section 4.1. Item 1, 4, and 6 are
708// already implicitly satisfied by the overall implementation.
709func (p *Profile) validateLabel(s string) (err error) {710if s == "" {711if p.verifyDNSLength {712return &labelError{s, "A4"}713}714return nil715}716if p.checkHyphens {717if len(s) > 4 && s[2] == '-' && s[3] == '-' {718return &labelError{s, "V2"}719}720if s[0] == '-' || s[len(s)-1] == '-' {721return &labelError{s, "V3"}722}723}724if !p.checkJoiners {725return nil726}727trie := p.trie // p.checkJoiners is only set if trie is set.728// TODO: merge the use of this in the trie.729v, sz := trie.lookupString(s)730x := info(v)731if x.isModifier() {732return &labelError{s, "V5"}733}734// Quickly return in the absence of zero-width (non) joiners.735if strings.Index(s, zwj) == -1 && strings.Index(s, zwnj) == -1 {736return nil737}738st := stateStart739for i := 0; ; {740jt := x.joinType()741if s[i:i+sz] == zwj {742jt = joinZWJ743} else if s[i:i+sz] == zwnj {744jt = joinZWNJ745}746st = joinStates[st][jt]747if x.isViramaModifier() {748st = joinStates[st][joinVirama]749}750if i += sz; i == len(s) {751break752}753v, sz = trie.lookupString(s[i:])754x = info(v)755}756if st == stateFAIL || st == stateAfter {757return &labelError{s, "C"}758}759return nil760}
761
762func ascii(s string) bool {763for i := 0; i < len(s); i++ {764if s[i] >= utf8.RuneSelf {765return false766}767}768return true769}
770