cubefs

Форк
0
650 строк · 16.4 Кб
1
package lexer
2

3
import (
4
	"bytes"
5
	"fmt"
6
	"regexp"
7
	"strings"
8
	"unicode/utf8"
9

10
	"github.com/graphql-go/graphql/gqlerrors"
11
	"github.com/graphql-go/graphql/language/source"
12
)
13

14
type TokenKind int
15

16
const (
17
	EOF TokenKind = iota + 1
18
	BANG
19
	DOLLAR
20
	PAREN_L
21
	PAREN_R
22
	SPREAD
23
	COLON
24
	EQUALS
25
	AT
26
	BRACKET_L
27
	BRACKET_R
28
	BRACE_L
29
	PIPE
30
	BRACE_R
31
	NAME
32
	INT
33
	FLOAT
34
	STRING
35
	BLOCK_STRING
36
	AMP
37
)
38

39
var tokenDescription = map[TokenKind]string{
40
	EOF:          "EOF",
41
	BANG:         "!",
42
	DOLLAR:       "$",
43
	PAREN_L:      "(",
44
	PAREN_R:      ")",
45
	SPREAD:       "...",
46
	COLON:        ":",
47
	EQUALS:       "=",
48
	AT:           "@",
49
	BRACKET_L:    "[",
50
	BRACKET_R:    "]",
51
	BRACE_L:      "{",
52
	PIPE:         "|",
53
	BRACE_R:      "}",
54
	NAME:         "Name",
55
	INT:          "Int",
56
	FLOAT:        "Float",
57
	STRING:       "String",
58
	BLOCK_STRING: "BlockString",
59
	AMP:          "&",
60
}
61

62
func (kind TokenKind) String() string {
63
	return tokenDescription[kind]
64
}
65

66
// NAME -> keyword relationship
67
const (
68
	FRAGMENT     = "fragment"
69
	QUERY        = "query"
70
	MUTATION     = "mutation"
71
	SUBSCRIPTION = "subscription"
72
	SCHEMA       = "schema"
73
	SCALAR       = "scalar"
74
	TYPE         = "type"
75
	INTERFACE    = "interface"
76
	UNION        = "union"
77
	ENUM         = "enum"
78
	INPUT        = "input"
79
	EXTEND       = "extend"
80
	DIRECTIVE    = "directive"
81
)
82

83
// Token is a representation of a lexed Token. Value only appears for non-punctuation
84
// tokens: NAME, INT, FLOAT, and STRING.
85
type Token struct {
86
	Kind  TokenKind
87
	Start int
88
	End   int
89
	Value string
90
}
91

92
type Lexer func(resetPosition int) (Token, error)
93

94
func Lex(s *source.Source) Lexer {
95
	var prevPosition int
96
	return func(resetPosition int) (Token, error) {
97
		if resetPosition == 0 {
98
			resetPosition = prevPosition
99
		}
100
		token, err := readToken(s, resetPosition)
101
		if err != nil {
102
			return token, err
103
		}
104
		prevPosition = token.End
105
		return token, nil
106
	}
107
}
108

109
// Reads an alphanumeric + underscore name from the source.
110
// [_A-Za-z][_0-9A-Za-z]*
111
// position: Points to the byte position in the byte array
112
// runePosition: Points to the rune position in the byte array
113
func readName(source *source.Source, position, runePosition int) Token {
114
	body := source.Body
115
	bodyLength := len(body)
116
	endByte := position + 1
117
	endRune := runePosition + 1
118
	for {
119
		code, _ := runeAt(body, endByte)
120
		if (endByte != bodyLength) &&
121
			(code == '_' || // _
122
				code >= '0' && code <= '9' || // 0-9
123
				code >= 'A' && code <= 'Z' || // A-Z
124
				code >= 'a' && code <= 'z') { // a-z
125
			endByte++
126
			endRune++
127
			continue
128
		} else {
129
			break
130
		}
131
	}
132
	return makeToken(NAME, runePosition, endRune, string(body[position:endByte]))
133
}
134

135
// Reads a number token from the source file, either a float
136
// or an int depending on whether a decimal point appears.
137
// Int:   -?(0|[1-9][0-9]*)
138
// Float: -?(0|[1-9][0-9]*)(\.[0-9]+)?((E|e)(+|-)?[0-9]+)?
139
func readNumber(s *source.Source, start int, firstCode rune, codeLength int) (Token, error) {
140
	code := firstCode
141
	body := s.Body
142
	position := start
143
	isFloat := false
144
	if code == '-' { // -
145
		position += codeLength
146
		code, codeLength = runeAt(body, position)
147
	}
148
	if code == '0' { // 0
149
		position += codeLength
150
		code, codeLength = runeAt(body, position)
151
		if code >= '0' && code <= '9' {
152
			description := fmt.Sprintf("Invalid number, unexpected digit after 0: %v.", printCharCode(code))
153
			return Token{}, gqlerrors.NewSyntaxError(s, position, description)
154
		}
155
	} else {
156
		p, err := readDigits(s, position, code, codeLength)
157
		if err != nil {
158
			return Token{}, err
159
		}
160
		position = p
161
		code, codeLength = runeAt(body, position)
162
	}
163
	if code == '.' { // .
164
		isFloat = true
165
		position += codeLength
166
		code, codeLength = runeAt(body, position)
167
		p, err := readDigits(s, position, code, codeLength)
168
		if err != nil {
169
			return Token{}, err
170
		}
171
		position = p
172
		code, codeLength = runeAt(body, position)
173
	}
174
	if code == 'E' || code == 'e' { // E e
175
		isFloat = true
176
		position += codeLength
177
		code, codeLength = runeAt(body, position)
178
		if code == '+' || code == '-' { // + -
179
			position += codeLength
180
			code, codeLength = runeAt(body, position)
181
		}
182
		p, err := readDigits(s, position, code, codeLength)
183
		if err != nil {
184
			return Token{}, err
185
		}
186
		position = p
187
	}
188
	kind := INT
189
	if isFloat {
190
		kind = FLOAT
191
	}
192

193
	return makeToken(kind, start, position, string(body[start:position])), nil
194
}
195

196
// Returns the new position in the source after reading digits.
197
func readDigits(s *source.Source, start int, firstCode rune, codeLength int) (int, error) {
198
	body := s.Body
199
	position := start
200
	code := firstCode
201
	if code >= '0' && code <= '9' { // 0 - 9
202
		for {
203
			if code >= '0' && code <= '9' { // 0 - 9
204
				position += codeLength
205
				code, codeLength = runeAt(body, position)
206
				continue
207
			} else {
208
				break
209
			}
210
		}
211
		return position, nil
212
	}
213
	var description string
214
	description = fmt.Sprintf("Invalid number, expected digit but got: %v.", printCharCode(code))
215
	return position, gqlerrors.NewSyntaxError(s, position, description)
216
}
217

218
func readString(s *source.Source, start int) (Token, error) {
219
	body := s.Body
220
	position := start + 1
221
	runePosition := start + 1
222
	chunkStart := position
223
	var code rune
224
	var n int
225
	var valueBuffer bytes.Buffer
226
	for {
227
		code, n = runeAt(body, position)
228
		if position < len(body) &&
229
			// not LineTerminator
230
			code != 0x000A && code != 0x000D &&
231
			// not Quote (")
232
			code != '"' {
233

234
			// SourceCharacter
235
			if code < 0x0020 && code != 0x0009 {
236
				return Token{}, gqlerrors.NewSyntaxError(s, runePosition, fmt.Sprintf(`Invalid character within String: %v.`, printCharCode(code)))
237
			}
238
			position += n
239
			runePosition++
240
			if code == '\\' { // \
241
				valueBuffer.Write(body[chunkStart : position-1])
242
				code, n = runeAt(body, position)
243
				switch code {
244
				case '"':
245
					valueBuffer.WriteRune('"')
246
					break
247
				case '/':
248
					valueBuffer.WriteRune('/')
249
					break
250
				case '\\':
251
					valueBuffer.WriteRune('\\')
252
					break
253
				case 'b':
254
					valueBuffer.WriteRune('\b')
255
					break
256
				case 'f':
257
					valueBuffer.WriteRune('\f')
258
					break
259
				case 'n':
260
					valueBuffer.WriteRune('\n')
261
					break
262
				case 'r':
263
					valueBuffer.WriteRune('\r')
264
					break
265
				case 't':
266
					valueBuffer.WriteRune('\t')
267
					break
268
				case 'u':
269
					// Check if there are at least 4 bytes available
270
					if len(body) <= position+4 {
271
						return Token{}, gqlerrors.NewSyntaxError(s, runePosition,
272
							fmt.Sprintf("Invalid character escape sequence: "+
273
								"\\u%v", string(body[position+1:])))
274
					}
275
					charCode := uniCharCode(
276
						rune(body[position+1]),
277
						rune(body[position+2]),
278
						rune(body[position+3]),
279
						rune(body[position+4]),
280
					)
281
					if charCode < 0 {
282
						return Token{}, gqlerrors.NewSyntaxError(s, runePosition,
283
							fmt.Sprintf("Invalid character escape sequence: "+
284
								"\\u%v", string(body[position+1:position+5])))
285
					}
286
					valueBuffer.WriteRune(charCode)
287
					position += 4
288
					runePosition += 4
289
					break
290
				default:
291
					return Token{}, gqlerrors.NewSyntaxError(s, runePosition,
292
						fmt.Sprintf(`Invalid character escape sequence: \\%c.`, code))
293
				}
294
				position += n
295
				runePosition++
296
				chunkStart = position
297
			}
298
			continue
299
		} else {
300
			break
301
		}
302
	}
303
	if code != '"' { // quote (")
304
		return Token{}, gqlerrors.NewSyntaxError(s, runePosition, "Unterminated string.")
305
	}
306
	stringContent := body[chunkStart:position]
307
	valueBuffer.Write(stringContent)
308
	value := valueBuffer.String()
309
	return makeToken(STRING, start, position+1, value), nil
310
}
311

312
// readBlockString reads a block string token from the source file.
313
//
314
// """("?"?(\\"""|\\(?!=""")|[^"\\]))*"""
315
func readBlockString(s *source.Source, start int) (Token, error) {
316
	body := s.Body
317
	position := start + 3
318
	runePosition := start + 3
319
	chunkStart := position
320
	var valueBuffer bytes.Buffer
321

322
	for {
323
		// Stop if we've reached the end of the buffer
324
		if position >= len(body) {
325
			break
326
		}
327

328
		code, n := runeAt(body, position)
329

330
		// Closing Triple-Quote (""")
331
		if code == '"' {
332
			x, _ := runeAt(body, position+1)
333
			y, _ := runeAt(body, position+2)
334
			if x == '"' && y == '"' {
335
				stringContent := body[chunkStart:position]
336
				valueBuffer.Write(stringContent)
337
				value := blockStringValue(valueBuffer.String())
338
				return makeToken(BLOCK_STRING, start, position+3, value), nil
339
			}
340
		}
341

342
		// SourceCharacter
343
		if code < 0x0020 &&
344
			code != 0x0009 &&
345
			code != 0x000a &&
346
			code != 0x000d {
347
			return Token{}, gqlerrors.NewSyntaxError(s, runePosition, fmt.Sprintf(`Invalid character within String: %v.`, printCharCode(code)))
348
		}
349

350
		// Escape Triple-Quote (\""")
351
		if code == '\\' { // \
352
			x, _ := runeAt(body, position+1)
353
			y, _ := runeAt(body, position+2)
354
			z, _ := runeAt(body, position+3)
355
			if x == '"' && y == '"' && z == '"' {
356
				stringContent := append(body[chunkStart:position], []byte(`"""`)...)
357
				valueBuffer.Write(stringContent)
358
				position += 4     // account for `"""` characters
359
				runePosition += 4 // "       "   "     "
360
				chunkStart = position
361
				continue
362
			}
363
		}
364

365
		position += n
366
		runePosition++
367
	}
368

369
	return Token{}, gqlerrors.NewSyntaxError(s, runePosition, "Unterminated string.")
370
}
371

372
var splitLinesRegex = regexp.MustCompile("\r\n|[\n\r]")
373

374
// This implements the GraphQL spec's BlockStringValue() static algorithm.
375
//
376
// Produces the value of a block string from its parsed raw value, similar to
377
// Coffeescript's block string, Python's docstring trim or Ruby's strip_heredoc.
378
//
379
// Spec: http://facebook.github.io/graphql/draft/#BlockStringValue()
380
// Heavily borrows from: https://github.com/graphql/graphql-js/blob/8e0c599ceccfa8c40d6edf3b72ee2a71490b10e0/src/language/blockStringValue.js
381
func blockStringValue(in string) string {
382
	// Expand a block string's raw value into independent lines.
383
	lines := splitLinesRegex.Split(in, -1)
384

385
	// Remove common indentation from all lines but first
386
	commonIndent := -1
387
	for i := 1; i < len(lines); i++ {
388
		line := lines[i]
389
		indent := leadingWhitespaceLen(line)
390
		if indent < len(line) && (commonIndent == -1 || indent < commonIndent) {
391
			commonIndent = indent
392
			if commonIndent == 0 {
393
				break
394
			}
395
		}
396
	}
397
	if commonIndent > 0 {
398
		for i, line := range lines {
399
			if commonIndent > len(line) {
400
				continue
401
			}
402
			lines[i] = line[commonIndent:]
403
		}
404
	}
405

406
	// Remove leading blank lines.
407
	for len(lines) > 0 && lineIsBlank(lines[0]) {
408
		lines = lines[1:]
409
	}
410

411
	// Remove trailing blank lines.
412
	for len(lines) > 0 && lineIsBlank(lines[len(lines)-1]) {
413
		i := len(lines) - 1
414
		lines = append(lines[:i], lines[i+1:]...)
415
	}
416

417
	// Return a string of the lines joined with U+000A.
418
	return strings.Join(lines, "\n")
419
}
420

421
// leadingWhitespaceLen returns count of whitespace characters on given line.
422
func leadingWhitespaceLen(in string) (n int) {
423
	for _, ch := range in {
424
		if ch == ' ' || ch == '\t' {
425
			n++
426
		} else {
427
			break
428
		}
429
	}
430
	return
431
}
432

433
// lineIsBlank returns true when given line has no content.
434
func lineIsBlank(in string) bool {
435
	return leadingWhitespaceLen(in) == len(in)
436
}
437

438
// Converts four hexadecimal chars to the integer that the
439
// string represents. For example, uniCharCode('0','0','0','f')
440
// will return 15, and uniCharCode('0','0','f','f') returns 255.
441
// Returns a negative number on error, if a char was invalid.
442
// This is implemented by noting that char2hex() returns -1 on error,
443
// which means the result of ORing the char2hex() will also be negative.
444
func uniCharCode(a, b, c, d rune) rune {
445
	return rune(char2hex(a)<<12 | char2hex(b)<<8 | char2hex(c)<<4 | char2hex(d))
446
}
447

448
// Converts a hex character to its integer value.
449
// '0' becomes 0, '9' becomes 9
450
// 'A' becomes 10, 'F' becomes 15
451
// 'a' becomes 10, 'f' becomes 15
452
// Returns -1 on error.
453
func char2hex(a rune) int {
454
	if a >= 48 && a <= 57 { // 0-9
455
		return int(a) - 48
456
	} else if a >= 65 && a <= 70 { // A-F
457
		return int(a) - 55
458
	} else if a >= 97 && a <= 102 {
459
		// a-f
460
		return int(a) - 87
461
	}
462
	return -1
463
}
464

465
func makeToken(kind TokenKind, start int, end int, value string) Token {
466
	return Token{Kind: kind, Start: start, End: end, Value: value}
467
}
468

469
func printCharCode(code rune) string {
470
	// NaN/undefined represents access beyond the end of the file.
471
	if code < 0 {
472
		return "<EOF>"
473
	}
474
	// print as ASCII for printable range
475
	if code >= 0x0020 && code < 0x007F {
476
		return fmt.Sprintf(`"%c"`, code)
477
	}
478
	// Otherwise print the escaped form. e.g. `"\\u0007"`
479
	return fmt.Sprintf(`"\\u%04X"`, code)
480
}
481

482
func readToken(s *source.Source, fromPosition int) (Token, error) {
483
	body := s.Body
484
	bodyLength := len(body)
485
	position, runePosition := positionAfterWhitespace(body, fromPosition)
486
	if position >= bodyLength {
487
		return makeToken(EOF, position, position, ""), nil
488
	}
489
	code, codeLength := runeAt(body, position)
490

491
	// SourceCharacter
492
	if code < 0x0020 && code != 0x0009 && code != 0x000A && code != 0x000D {
493
		return Token{}, gqlerrors.NewSyntaxError(s, runePosition, fmt.Sprintf(`Invalid character %v`, printCharCode(code)))
494
	}
495

496
	switch code {
497
	// !
498
	case '!':
499
		return makeToken(BANG, position, position+1, ""), nil
500
	// $
501
	case '$':
502
		return makeToken(DOLLAR, position, position+1, ""), nil
503
	// &
504
	case '&':
505
		return makeToken(AMP, position, position+1, ""), nil
506
	// (
507
	case '(':
508
		return makeToken(PAREN_L, position, position+1, ""), nil
509
	// )
510
	case ')':
511
		return makeToken(PAREN_R, position, position+1, ""), nil
512
	// .
513
	case '.':
514
		next1, _ := runeAt(body, position+1)
515
		next2, _ := runeAt(body, position+2)
516
		if next1 == '.' && next2 == '.' {
517
			return makeToken(SPREAD, position, position+3, ""), nil
518
		}
519
		break
520
	// :
521
	case ':':
522
		return makeToken(COLON, position, position+1, ""), nil
523
	// =
524
	case '=':
525
		return makeToken(EQUALS, position, position+1, ""), nil
526
	// @
527
	case '@':
528
		return makeToken(AT, position, position+1, ""), nil
529
	// [
530
	case '[':
531
		return makeToken(BRACKET_L, position, position+1, ""), nil
532
	// ]
533
	case ']':
534
		return makeToken(BRACKET_R, position, position+1, ""), nil
535
	// {
536
	case '{':
537
		return makeToken(BRACE_L, position, position+1, ""), nil
538
	// |
539
	case '|':
540
		return makeToken(PIPE, position, position+1, ""), nil
541
	// }
542
	case '}':
543
		return makeToken(BRACE_R, position, position+1, ""), nil
544
	// A-Z
545
	case 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
546
		'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z':
547
		return readName(s, position, runePosition), nil
548
	// _
549
	// a-z
550
	case '_', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
551
		'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z':
552
		return readName(s, position, runePosition), nil
553
	// -
554
	// 0-9
555
	case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
556
		token, err := readNumber(s, position, code, codeLength)
557
		if err != nil {
558
			return token, err
559
		}
560
		return token, nil
561
	// "
562
	case '"':
563
		var token Token
564
		var err error
565
		x, _ := runeAt(body, position+1)
566
		y, _ := runeAt(body, position+2)
567
		if x == '"' && y == '"' {
568
			token, err = readBlockString(s, position)
569
		} else {
570
			token, err = readString(s, position)
571
		}
572
		return token, err
573
	}
574
	description := fmt.Sprintf("Unexpected character %v.", printCharCode(code))
575
	return Token{}, gqlerrors.NewSyntaxError(s, runePosition, description)
576
}
577

578
// Gets the rune from the byte array at given byte position and it's width in bytes
579
func runeAt(body []byte, position int) (code rune, charWidth int) {
580
	if len(body) <= position {
581
		// <EOF>
582
		return -1, utf8.RuneError
583
	}
584

585
	c := body[position]
586
	if c < utf8.RuneSelf {
587
		return rune(c), 1
588
	}
589

590
	r, n := utf8.DecodeRune(body[position:])
591
	return r, n
592
}
593

594
// Reads from body starting at startPosition until it finds a non-whitespace
595
// or commented character, then returns the position of that character for lexing.
596
// lexing.
597
// Returns both byte positions and rune position
598
func positionAfterWhitespace(body []byte, startPosition int) (position int, runePosition int) {
599
	bodyLength := len(body)
600
	position = startPosition
601
	runePosition = startPosition
602
	for {
603
		if position < bodyLength {
604
			code, n := runeAt(body, position)
605

606
			// Skip Ignored
607
			if code == 0xFEFF || // BOM
608
				// White Space
609
				code == 0x0009 || // tab
610
				code == 0x0020 || // space
611
				// Line Terminator
612
				code == 0x000A || // new line
613
				code == 0x000D || // carriage return
614
				// Comma
615
				code == 0x002C {
616
				position += n
617
				runePosition++
618
			} else if code == 35 { // #
619
				position += n
620
				runePosition++
621
				for {
622
					code, n := runeAt(body, position)
623
					if position < bodyLength &&
624
						code != 0 &&
625
						// SourceCharacter but not LineTerminator
626
						(code > 0x001F || code == 0x0009) && code != 0x000A && code != 0x000D {
627
						position += n
628
						runePosition++
629
						continue
630
					} else {
631
						break
632
					}
633
				}
634
			} else {
635
				break
636
			}
637
			continue
638
		} else {
639
			break
640
		}
641
	}
642
	return position, runePosition
643
}
644

645
func GetTokenDesc(token Token) string {
646
	if token.Value == "" {
647
		return token.Kind.String()
648
	}
649
	return fmt.Sprintf("%s \"%s\"", token.Kind.String(), token.Value)
650
}
651

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.