cubefs
650 строк · 16.4 Кб
1package lexer
2
3import (
4"bytes"
5"fmt"
6"regexp"
7"strings"
8"unicode/utf8"
9
10"github.com/graphql-go/graphql/gqlerrors"
11"github.com/graphql-go/graphql/language/source"
12)
13
14type TokenKind int
15
16const (
17EOF TokenKind = iota + 1
18BANG
19DOLLAR
20PAREN_L
21PAREN_R
22SPREAD
23COLON
24EQUALS
25AT
26BRACKET_L
27BRACKET_R
28BRACE_L
29PIPE
30BRACE_R
31NAME
32INT
33FLOAT
34STRING
35BLOCK_STRING
36AMP
37)
38
39var tokenDescription = map[TokenKind]string{
40EOF: "EOF",
41BANG: "!",
42DOLLAR: "$",
43PAREN_L: "(",
44PAREN_R: ")",
45SPREAD: "...",
46COLON: ":",
47EQUALS: "=",
48AT: "@",
49BRACKET_L: "[",
50BRACKET_R: "]",
51BRACE_L: "{",
52PIPE: "|",
53BRACE_R: "}",
54NAME: "Name",
55INT: "Int",
56FLOAT: "Float",
57STRING: "String",
58BLOCK_STRING: "BlockString",
59AMP: "&",
60}
61
62func (kind TokenKind) String() string {
63return tokenDescription[kind]
64}
65
66// NAME -> keyword relationship
67const (
68FRAGMENT = "fragment"
69QUERY = "query"
70MUTATION = "mutation"
71SUBSCRIPTION = "subscription"
72SCHEMA = "schema"
73SCALAR = "scalar"
74TYPE = "type"
75INTERFACE = "interface"
76UNION = "union"
77ENUM = "enum"
78INPUT = "input"
79EXTEND = "extend"
80DIRECTIVE = "directive"
81)
82
83// Token is a representation of a lexed Token. Value only appears for non-punctuation
84// tokens: NAME, INT, FLOAT, and STRING.
85type Token struct {
86Kind TokenKind
87Start int
88End int
89Value string
90}
91
92type Lexer func(resetPosition int) (Token, error)
93
94func Lex(s *source.Source) Lexer {
95var prevPosition int
96return func(resetPosition int) (Token, error) {
97if resetPosition == 0 {
98resetPosition = prevPosition
99}
100token, err := readToken(s, resetPosition)
101if err != nil {
102return token, err
103}
104prevPosition = token.End
105return token, nil
106}
107}
108
109// Reads an alphanumeric + underscore name from the source.
110// [_A-Za-z][_0-9A-Za-z]*
111// position: Points to the byte position in the byte array
112// runePosition: Points to the rune position in the byte array
113func readName(source *source.Source, position, runePosition int) Token {
114body := source.Body
115bodyLength := len(body)
116endByte := position + 1
117endRune := runePosition + 1
118for {
119code, _ := runeAt(body, endByte)
120if (endByte != bodyLength) &&
121(code == '_' || // _
122code >= '0' && code <= '9' || // 0-9
123code >= 'A' && code <= 'Z' || // A-Z
124code >= 'a' && code <= 'z') { // a-z
125endByte++
126endRune++
127continue
128} else {
129break
130}
131}
132return makeToken(NAME, runePosition, endRune, string(body[position:endByte]))
133}
134
135// Reads a number token from the source file, either a float
136// or an int depending on whether a decimal point appears.
137// Int: -?(0|[1-9][0-9]*)
138// Float: -?(0|[1-9][0-9]*)(\.[0-9]+)?((E|e)(+|-)?[0-9]+)?
139func readNumber(s *source.Source, start int, firstCode rune, codeLength int) (Token, error) {
140code := firstCode
141body := s.Body
142position := start
143isFloat := false
144if code == '-' { // -
145position += codeLength
146code, codeLength = runeAt(body, position)
147}
148if code == '0' { // 0
149position += codeLength
150code, codeLength = runeAt(body, position)
151if code >= '0' && code <= '9' {
152description := fmt.Sprintf("Invalid number, unexpected digit after 0: %v.", printCharCode(code))
153return Token{}, gqlerrors.NewSyntaxError(s, position, description)
154}
155} else {
156p, err := readDigits(s, position, code, codeLength)
157if err != nil {
158return Token{}, err
159}
160position = p
161code, codeLength = runeAt(body, position)
162}
163if code == '.' { // .
164isFloat = true
165position += codeLength
166code, codeLength = runeAt(body, position)
167p, err := readDigits(s, position, code, codeLength)
168if err != nil {
169return Token{}, err
170}
171position = p
172code, codeLength = runeAt(body, position)
173}
174if code == 'E' || code == 'e' { // E e
175isFloat = true
176position += codeLength
177code, codeLength = runeAt(body, position)
178if code == '+' || code == '-' { // + -
179position += codeLength
180code, codeLength = runeAt(body, position)
181}
182p, err := readDigits(s, position, code, codeLength)
183if err != nil {
184return Token{}, err
185}
186position = p
187}
188kind := INT
189if isFloat {
190kind = FLOAT
191}
192
193return makeToken(kind, start, position, string(body[start:position])), nil
194}
195
196// Returns the new position in the source after reading digits.
197func readDigits(s *source.Source, start int, firstCode rune, codeLength int) (int, error) {
198body := s.Body
199position := start
200code := firstCode
201if code >= '0' && code <= '9' { // 0 - 9
202for {
203if code >= '0' && code <= '9' { // 0 - 9
204position += codeLength
205code, codeLength = runeAt(body, position)
206continue
207} else {
208break
209}
210}
211return position, nil
212}
213var description string
214description = fmt.Sprintf("Invalid number, expected digit but got: %v.", printCharCode(code))
215return position, gqlerrors.NewSyntaxError(s, position, description)
216}
217
218func readString(s *source.Source, start int) (Token, error) {
219body := s.Body
220position := start + 1
221runePosition := start + 1
222chunkStart := position
223var code rune
224var n int
225var valueBuffer bytes.Buffer
226for {
227code, n = runeAt(body, position)
228if position < len(body) &&
229// not LineTerminator
230code != 0x000A && code != 0x000D &&
231// not Quote (")
232code != '"' {
233
234// SourceCharacter
235if code < 0x0020 && code != 0x0009 {
236return Token{}, gqlerrors.NewSyntaxError(s, runePosition, fmt.Sprintf(`Invalid character within String: %v.`, printCharCode(code)))
237}
238position += n
239runePosition++
240if code == '\\' { // \
241valueBuffer.Write(body[chunkStart : position-1])
242code, n = runeAt(body, position)
243switch code {
244case '"':
245valueBuffer.WriteRune('"')
246break
247case '/':
248valueBuffer.WriteRune('/')
249break
250case '\\':
251valueBuffer.WriteRune('\\')
252break
253case 'b':
254valueBuffer.WriteRune('\b')
255break
256case 'f':
257valueBuffer.WriteRune('\f')
258break
259case 'n':
260valueBuffer.WriteRune('\n')
261break
262case 'r':
263valueBuffer.WriteRune('\r')
264break
265case 't':
266valueBuffer.WriteRune('\t')
267break
268case 'u':
269// Check if there are at least 4 bytes available
270if len(body) <= position+4 {
271return Token{}, gqlerrors.NewSyntaxError(s, runePosition,
272fmt.Sprintf("Invalid character escape sequence: "+
273"\\u%v", string(body[position+1:])))
274}
275charCode := uniCharCode(
276rune(body[position+1]),
277rune(body[position+2]),
278rune(body[position+3]),
279rune(body[position+4]),
280)
281if charCode < 0 {
282return Token{}, gqlerrors.NewSyntaxError(s, runePosition,
283fmt.Sprintf("Invalid character escape sequence: "+
284"\\u%v", string(body[position+1:position+5])))
285}
286valueBuffer.WriteRune(charCode)
287position += 4
288runePosition += 4
289break
290default:
291return Token{}, gqlerrors.NewSyntaxError(s, runePosition,
292fmt.Sprintf(`Invalid character escape sequence: \\%c.`, code))
293}
294position += n
295runePosition++
296chunkStart = position
297}
298continue
299} else {
300break
301}
302}
303if code != '"' { // quote (")
304return Token{}, gqlerrors.NewSyntaxError(s, runePosition, "Unterminated string.")
305}
306stringContent := body[chunkStart:position]
307valueBuffer.Write(stringContent)
308value := valueBuffer.String()
309return makeToken(STRING, start, position+1, value), nil
310}
311
312// readBlockString reads a block string token from the source file.
313//
314// """("?"?(\\"""|\\(?!=""")|[^"\\]))*"""
315func readBlockString(s *source.Source, start int) (Token, error) {
316body := s.Body
317position := start + 3
318runePosition := start + 3
319chunkStart := position
320var valueBuffer bytes.Buffer
321
322for {
323// Stop if we've reached the end of the buffer
324if position >= len(body) {
325break
326}
327
328code, n := runeAt(body, position)
329
330// Closing Triple-Quote (""")
331if code == '"' {
332x, _ := runeAt(body, position+1)
333y, _ := runeAt(body, position+2)
334if x == '"' && y == '"' {
335stringContent := body[chunkStart:position]
336valueBuffer.Write(stringContent)
337value := blockStringValue(valueBuffer.String())
338return makeToken(BLOCK_STRING, start, position+3, value), nil
339}
340}
341
342// SourceCharacter
343if code < 0x0020 &&
344code != 0x0009 &&
345code != 0x000a &&
346code != 0x000d {
347return Token{}, gqlerrors.NewSyntaxError(s, runePosition, fmt.Sprintf(`Invalid character within String: %v.`, printCharCode(code)))
348}
349
350// Escape Triple-Quote (\""")
351if code == '\\' { // \
352x, _ := runeAt(body, position+1)
353y, _ := runeAt(body, position+2)
354z, _ := runeAt(body, position+3)
355if x == '"' && y == '"' && z == '"' {
356stringContent := append(body[chunkStart:position], []byte(`"""`)...)
357valueBuffer.Write(stringContent)
358position += 4 // account for `"""` characters
359runePosition += 4 // " " " "
360chunkStart = position
361continue
362}
363}
364
365position += n
366runePosition++
367}
368
369return Token{}, gqlerrors.NewSyntaxError(s, runePosition, "Unterminated string.")
370}
371
372var splitLinesRegex = regexp.MustCompile("\r\n|[\n\r]")
373
374// This implements the GraphQL spec's BlockStringValue() static algorithm.
375//
376// Produces the value of a block string from its parsed raw value, similar to
377// Coffeescript's block string, Python's docstring trim or Ruby's strip_heredoc.
378//
379// Spec: http://facebook.github.io/graphql/draft/#BlockStringValue()
380// Heavily borrows from: https://github.com/graphql/graphql-js/blob/8e0c599ceccfa8c40d6edf3b72ee2a71490b10e0/src/language/blockStringValue.js
381func blockStringValue(in string) string {
382// Expand a block string's raw value into independent lines.
383lines := splitLinesRegex.Split(in, -1)
384
385// Remove common indentation from all lines but first
386commonIndent := -1
387for i := 1; i < len(lines); i++ {
388line := lines[i]
389indent := leadingWhitespaceLen(line)
390if indent < len(line) && (commonIndent == -1 || indent < commonIndent) {
391commonIndent = indent
392if commonIndent == 0 {
393break
394}
395}
396}
397if commonIndent > 0 {
398for i, line := range lines {
399if commonIndent > len(line) {
400continue
401}
402lines[i] = line[commonIndent:]
403}
404}
405
406// Remove leading blank lines.
407for len(lines) > 0 && lineIsBlank(lines[0]) {
408lines = lines[1:]
409}
410
411// Remove trailing blank lines.
412for len(lines) > 0 && lineIsBlank(lines[len(lines)-1]) {
413i := len(lines) - 1
414lines = append(lines[:i], lines[i+1:]...)
415}
416
417// Return a string of the lines joined with U+000A.
418return strings.Join(lines, "\n")
419}
420
421// leadingWhitespaceLen returns count of whitespace characters on given line.
422func leadingWhitespaceLen(in string) (n int) {
423for _, ch := range in {
424if ch == ' ' || ch == '\t' {
425n++
426} else {
427break
428}
429}
430return
431}
432
433// lineIsBlank returns true when given line has no content.
434func lineIsBlank(in string) bool {
435return leadingWhitespaceLen(in) == len(in)
436}
437
438// Converts four hexadecimal chars to the integer that the
439// string represents. For example, uniCharCode('0','0','0','f')
440// will return 15, and uniCharCode('0','0','f','f') returns 255.
441// Returns a negative number on error, if a char was invalid.
442// This is implemented by noting that char2hex() returns -1 on error,
443// which means the result of ORing the char2hex() will also be negative.
444func uniCharCode(a, b, c, d rune) rune {
445return rune(char2hex(a)<<12 | char2hex(b)<<8 | char2hex(c)<<4 | char2hex(d))
446}
447
448// Converts a hex character to its integer value.
449// '0' becomes 0, '9' becomes 9
450// 'A' becomes 10, 'F' becomes 15
451// 'a' becomes 10, 'f' becomes 15
452// Returns -1 on error.
453func char2hex(a rune) int {
454if a >= 48 && a <= 57 { // 0-9
455return int(a) - 48
456} else if a >= 65 && a <= 70 { // A-F
457return int(a) - 55
458} else if a >= 97 && a <= 102 {
459// a-f
460return int(a) - 87
461}
462return -1
463}
464
465func makeToken(kind TokenKind, start int, end int, value string) Token {
466return Token{Kind: kind, Start: start, End: end, Value: value}
467}
468
469func printCharCode(code rune) string {
470// NaN/undefined represents access beyond the end of the file.
471if code < 0 {
472return "<EOF>"
473}
474// print as ASCII for printable range
475if code >= 0x0020 && code < 0x007F {
476return fmt.Sprintf(`"%c"`, code)
477}
478// Otherwise print the escaped form. e.g. `"\\u0007"`
479return fmt.Sprintf(`"\\u%04X"`, code)
480}
481
482func readToken(s *source.Source, fromPosition int) (Token, error) {
483body := s.Body
484bodyLength := len(body)
485position, runePosition := positionAfterWhitespace(body, fromPosition)
486if position >= bodyLength {
487return makeToken(EOF, position, position, ""), nil
488}
489code, codeLength := runeAt(body, position)
490
491// SourceCharacter
492if code < 0x0020 && code != 0x0009 && code != 0x000A && code != 0x000D {
493return Token{}, gqlerrors.NewSyntaxError(s, runePosition, fmt.Sprintf(`Invalid character %v`, printCharCode(code)))
494}
495
496switch code {
497// !
498case '!':
499return makeToken(BANG, position, position+1, ""), nil
500// $
501case '$':
502return makeToken(DOLLAR, position, position+1, ""), nil
503// &
504case '&':
505return makeToken(AMP, position, position+1, ""), nil
506// (
507case '(':
508return makeToken(PAREN_L, position, position+1, ""), nil
509// )
510case ')':
511return makeToken(PAREN_R, position, position+1, ""), nil
512// .
513case '.':
514next1, _ := runeAt(body, position+1)
515next2, _ := runeAt(body, position+2)
516if next1 == '.' && next2 == '.' {
517return makeToken(SPREAD, position, position+3, ""), nil
518}
519break
520// :
521case ':':
522return makeToken(COLON, position, position+1, ""), nil
523// =
524case '=':
525return makeToken(EQUALS, position, position+1, ""), nil
526// @
527case '@':
528return makeToken(AT, position, position+1, ""), nil
529// [
530case '[':
531return makeToken(BRACKET_L, position, position+1, ""), nil
532// ]
533case ']':
534return makeToken(BRACKET_R, position, position+1, ""), nil
535// {
536case '{':
537return makeToken(BRACE_L, position, position+1, ""), nil
538// |
539case '|':
540return makeToken(PIPE, position, position+1, ""), nil
541// }
542case '}':
543return makeToken(BRACE_R, position, position+1, ""), nil
544// A-Z
545case 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
546'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z':
547return readName(s, position, runePosition), nil
548// _
549// a-z
550case '_', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
551'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z':
552return readName(s, position, runePosition), nil
553// -
554// 0-9
555case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
556token, err := readNumber(s, position, code, codeLength)
557if err != nil {
558return token, err
559}
560return token, nil
561// "
562case '"':
563var token Token
564var err error
565x, _ := runeAt(body, position+1)
566y, _ := runeAt(body, position+2)
567if x == '"' && y == '"' {
568token, err = readBlockString(s, position)
569} else {
570token, err = readString(s, position)
571}
572return token, err
573}
574description := fmt.Sprintf("Unexpected character %v.", printCharCode(code))
575return Token{}, gqlerrors.NewSyntaxError(s, runePosition, description)
576}
577
578// Gets the rune from the byte array at given byte position and it's width in bytes
579func runeAt(body []byte, position int) (code rune, charWidth int) {
580if len(body) <= position {
581// <EOF>
582return -1, utf8.RuneError
583}
584
585c := body[position]
586if c < utf8.RuneSelf {
587return rune(c), 1
588}
589
590r, n := utf8.DecodeRune(body[position:])
591return r, n
592}
593
594// Reads from body starting at startPosition until it finds a non-whitespace
595// or commented character, then returns the position of that character for lexing.
596// lexing.
597// Returns both byte positions and rune position
598func positionAfterWhitespace(body []byte, startPosition int) (position int, runePosition int) {
599bodyLength := len(body)
600position = startPosition
601runePosition = startPosition
602for {
603if position < bodyLength {
604code, n := runeAt(body, position)
605
606// Skip Ignored
607if code == 0xFEFF || // BOM
608// White Space
609code == 0x0009 || // tab
610code == 0x0020 || // space
611// Line Terminator
612code == 0x000A || // new line
613code == 0x000D || // carriage return
614// Comma
615code == 0x002C {
616position += n
617runePosition++
618} else if code == 35 { // #
619position += n
620runePosition++
621for {
622code, n := runeAt(body, position)
623if position < bodyLength &&
624code != 0 &&
625// SourceCharacter but not LineTerminator
626(code > 0x001F || code == 0x0009) && code != 0x000A && code != 0x000D {
627position += n
628runePosition++
629continue
630} else {
631break
632}
633}
634} else {
635break
636}
637continue
638} else {
639break
640}
641}
642return position, runePosition
643}
644
645func GetTokenDesc(token Token) string {
646if token.Value == "" {
647return token.Kind.String()
648}
649return fmt.Sprintf("%s \"%s\"", token.Kind.String(), token.Value)
650}
651