cubefs
1// Copyright 2016 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package httpguts6
7import (8"net"9"strings"10"unicode/utf8"11
12"golang.org/x/net/idna"13)
14
15var isTokenTable = [127]bool{16'!': true,17'#': true,18'$': true,19'%': true,20'&': true,21'\'': true,22'*': true,23'+': true,24'-': true,25'.': true,26'0': true,27'1': true,28'2': true,29'3': true,30'4': true,31'5': true,32'6': true,33'7': true,34'8': true,35'9': true,36'A': true,37'B': true,38'C': true,39'D': true,40'E': true,41'F': true,42'G': true,43'H': true,44'I': true,45'J': true,46'K': true,47'L': true,48'M': true,49'N': true,50'O': true,51'P': true,52'Q': true,53'R': true,54'S': true,55'T': true,56'U': true,57'W': true,58'V': true,59'X': true,60'Y': true,61'Z': true,62'^': true,63'_': true,64'`': true,65'a': true,66'b': true,67'c': true,68'd': true,69'e': true,70'f': true,71'g': true,72'h': true,73'i': true,74'j': true,75'k': true,76'l': true,77'm': true,78'n': true,79'o': true,80'p': true,81'q': true,82'r': true,83's': true,84't': true,85'u': true,86'v': true,87'w': true,88'x': true,89'y': true,90'z': true,91'|': true,92'~': true,93}
94
95func IsTokenRune(r rune) bool {96i := int(r)97return i < len(isTokenTable) && isTokenTable[i]98}
99
100func isNotToken(r rune) bool {101return !IsTokenRune(r)102}
103
104// HeaderValuesContainsToken reports whether any string in values
105// contains the provided token, ASCII case-insensitively.
106func HeaderValuesContainsToken(values []string, token string) bool {107for _, v := range values {108if headerValueContainsToken(v, token) {109return true110}111}112return false113}
114
115// isOWS reports whether b is an optional whitespace byte, as defined
116// by RFC 7230 section 3.2.3.
117func isOWS(b byte) bool { return b == ' ' || b == '\t' }118
119// trimOWS returns x with all optional whitespace removes from the
120// beginning and end.
121func trimOWS(x string) string {122// TODO: consider using strings.Trim(x, " \t") instead,123// if and when it's fast enough. See issue 10292.124// But this ASCII-only code will probably always beat UTF-8125// aware code.126for len(x) > 0 && isOWS(x[0]) {127x = x[1:]128}129for len(x) > 0 && isOWS(x[len(x)-1]) {130x = x[:len(x)-1]131}132return x133}
134
135// headerValueContainsToken reports whether v (assumed to be a
136// 0#element, in the ABNF extension described in RFC 7230 section 7)
137// contains token amongst its comma-separated tokens, ASCII
138// case-insensitively.
139func headerValueContainsToken(v string, token string) bool {140for comma := strings.IndexByte(v, ','); comma != -1; comma = strings.IndexByte(v, ',') {141if tokenEqual(trimOWS(v[:comma]), token) {142return true143}144v = v[comma+1:]145}146return tokenEqual(trimOWS(v), token)147}
148
149// lowerASCII returns the ASCII lowercase version of b.
150func lowerASCII(b byte) byte {151if 'A' <= b && b <= 'Z' {152return b + ('a' - 'A')153}154return b155}
156
157// tokenEqual reports whether t1 and t2 are equal, ASCII case-insensitively.
158func tokenEqual(t1, t2 string) bool {159if len(t1) != len(t2) {160return false161}162for i, b := range t1 {163if b >= utf8.RuneSelf {164// No UTF-8 or non-ASCII allowed in tokens.165return false166}167if lowerASCII(byte(b)) != lowerASCII(t2[i]) {168return false169}170}171return true172}
173
174// isLWS reports whether b is linear white space, according
175// to http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2
176//
177// LWS = [CRLF] 1*( SP | HT )
178func isLWS(b byte) bool { return b == ' ' || b == '\t' }179
180// isCTL reports whether b is a control byte, according
181// to http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2
182//
183// CTL = <any US-ASCII control character
184// (octets 0 - 31) and DEL (127)>
185func isCTL(b byte) bool {186const del = 0x7f // a CTL187return b < ' ' || b == del188}
189
190// ValidHeaderFieldName reports whether v is a valid HTTP/1.x header name.
191// HTTP/2 imposes the additional restriction that uppercase ASCII
192// letters are not allowed.
193//
194// RFC 7230 says:
195//
196// header-field = field-name ":" OWS field-value OWS
197// field-name = token
198// token = 1*tchar
199// tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /
200// "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA
201func ValidHeaderFieldName(v string) bool {202if len(v) == 0 {203return false204}205for _, r := range v {206if !IsTokenRune(r) {207return false208}209}210return true211}
212
213// ValidHostHeader reports whether h is a valid host header.
214func ValidHostHeader(h string) bool {215// The latest spec is actually this:216//217// http://tools.ietf.org/html/rfc7230#section-5.4218// Host = uri-host [ ":" port ]219//220// Where uri-host is:221// http://tools.ietf.org/html/rfc3986#section-3.2.2222//223// But we're going to be much more lenient for now and just224// search for any byte that's not a valid byte in any of those225// expressions.226for i := 0; i < len(h); i++ {227if !validHostByte[h[i]] {228return false229}230}231return true232}
233
234// See the validHostHeader comment.
235var validHostByte = [256]bool{236'0': true, '1': true, '2': true, '3': true, '4': true, '5': true, '6': true, '7': true,237'8': true, '9': true,238
239'a': true, 'b': true, 'c': true, 'd': true, 'e': true, 'f': true, 'g': true, 'h': true,240'i': true, 'j': true, 'k': true, 'l': true, 'm': true, 'n': true, 'o': true, 'p': true,241'q': true, 'r': true, 's': true, 't': true, 'u': true, 'v': true, 'w': true, 'x': true,242'y': true, 'z': true,243
244'A': true, 'B': true, 'C': true, 'D': true, 'E': true, 'F': true, 'G': true, 'H': true,245'I': true, 'J': true, 'K': true, 'L': true, 'M': true, 'N': true, 'O': true, 'P': true,246'Q': true, 'R': true, 'S': true, 'T': true, 'U': true, 'V': true, 'W': true, 'X': true,247'Y': true, 'Z': true,248
249'!': true, // sub-delims250'$': true, // sub-delims251'%': true, // pct-encoded (and used in IPv6 zones)252'&': true, // sub-delims253'(': true, // sub-delims254')': true, // sub-delims255'*': true, // sub-delims256'+': true, // sub-delims257',': true, // sub-delims258'-': true, // unreserved259'.': true, // unreserved260':': true, // IPv6address + Host expression's optional port261';': true, // sub-delims262'=': true, // sub-delims263'[': true,264'\'': true, // sub-delims265']': true,266'_': true, // unreserved267'~': true, // unreserved268}
269
270// ValidHeaderFieldValue reports whether v is a valid "field-value" according to
271// http://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.2 :
272//
273// message-header = field-name ":" [ field-value ]
274// field-value = *( field-content | LWS )
275// field-content = <the OCTETs making up the field-value
276// and consisting of either *TEXT or combinations
277// of token, separators, and quoted-string>
278//
279// http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2 :
280//
281// TEXT = <any OCTET except CTLs,
282// but including LWS>
283// LWS = [CRLF] 1*( SP | HT )
284// CTL = <any US-ASCII control character
285// (octets 0 - 31) and DEL (127)>
286//
287// RFC 7230 says:
288//
289// field-value = *( field-content / obs-fold )
290// obj-fold = N/A to http2, and deprecated
291// field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ]
292// field-vchar = VCHAR / obs-text
293// obs-text = %x80-FF
294// VCHAR = "any visible [USASCII] character"
295//
296// http2 further says: "Similarly, HTTP/2 allows header field values
297// that are not valid. While most of the values that can be encoded
298// will not alter header field parsing, carriage return (CR, ASCII
299// 0xd), line feed (LF, ASCII 0xa), and the zero character (NUL, ASCII
300// 0x0) might be exploited by an attacker if they are translated
301// verbatim. Any request or response that contains a character not
302// permitted in a header field value MUST be treated as malformed
303// (Section 8.1.2.6). Valid characters are defined by the
304// field-content ABNF rule in Section 3.2 of [RFC7230]."
305//
306// This function does not (yet?) properly handle the rejection of
307// strings that begin or end with SP or HTAB.
308func ValidHeaderFieldValue(v string) bool {309for i := 0; i < len(v); i++ {310b := v[i]311if isCTL(b) && !isLWS(b) {312return false313}314}315return true316}
317
318func isASCII(s string) bool {319for i := 0; i < len(s); i++ {320if s[i] >= utf8.RuneSelf {321return false322}323}324return true325}
326
327// PunycodeHostPort returns the IDNA Punycode version
328// of the provided "host" or "host:port" string.
329func PunycodeHostPort(v string) (string, error) {330if isASCII(v) {331return v, nil332}333
334host, port, err := net.SplitHostPort(v)335if err != nil {336// The input 'v' argument was just a "host" argument,337// without a port. This error should not be returned338// to the caller.339host = v340port = ""341}342host, err = idna.ToASCII(host)343if err != nil {344// Non-UTF-8? Not representable in Punycode, in any345// case.346return "", err347}348if port == "" {349return host, nil350}351return net.JoinHostPort(host, port), nil352}
353