podman

Форк
0
1283 строки · 30.2 Кб
1
package toml
2

3
import (
4
	"fmt"
5
	"reflect"
6
	"runtime"
7
	"strings"
8
	"unicode"
9
	"unicode/utf8"
10
)
11

12
type itemType int
13

14
const (
15
	itemError itemType = iota
16
	itemNIL            // used in the parser to indicate no type
17
	itemEOF
18
	itemText
19
	itemString
20
	itemRawString
21
	itemMultilineString
22
	itemRawMultilineString
23
	itemBool
24
	itemInteger
25
	itemFloat
26
	itemDatetime
27
	itemArray // the start of an array
28
	itemArrayEnd
29
	itemTableStart
30
	itemTableEnd
31
	itemArrayTableStart
32
	itemArrayTableEnd
33
	itemKeyStart
34
	itemKeyEnd
35
	itemCommentStart
36
	itemInlineTableStart
37
	itemInlineTableEnd
38
)
39

40
const eof = 0
41

42
type stateFn func(lx *lexer) stateFn
43

44
func (p Position) String() string {
45
	return fmt.Sprintf("at line %d; start %d; length %d", p.Line, p.Start, p.Len)
46
}
47

48
type lexer struct {
49
	input    string
50
	start    int
51
	pos      int
52
	line     int
53
	state    stateFn
54
	items    chan item
55
	tomlNext bool
56

57
	// Allow for backing up up to 4 runes. This is necessary because TOML
58
	// contains 3-rune tokens (""" and ''').
59
	prevWidths [4]int
60
	nprev      int  // how many of prevWidths are in use
61
	atEOF      bool // If we emit an eof, we can still back up, but it is not OK to call next again.
62

63
	// A stack of state functions used to maintain context.
64
	//
65
	// The idea is to reuse parts of the state machine in various places. For
66
	// example, values can appear at the top level or within arbitrarily nested
67
	// arrays. The last state on the stack is used after a value has been lexed.
68
	// Similarly for comments.
69
	stack []stateFn
70
}
71

72
type item struct {
73
	typ itemType
74
	val string
75
	err error
76
	pos Position
77
}
78

79
func (lx *lexer) nextItem() item {
80
	for {
81
		select {
82
		case item := <-lx.items:
83
			return item
84
		default:
85
			lx.state = lx.state(lx)
86
			//fmt.Printf("     STATE %-24s  current: %-10s	stack: %s\n", lx.state, lx.current(), lx.stack)
87
		}
88
	}
89
}
90

91
func lex(input string, tomlNext bool) *lexer {
92
	lx := &lexer{
93
		input:    input,
94
		state:    lexTop,
95
		items:    make(chan item, 10),
96
		stack:    make([]stateFn, 0, 10),
97
		line:     1,
98
		tomlNext: tomlNext,
99
	}
100
	return lx
101
}
102

103
func (lx *lexer) push(state stateFn) {
104
	lx.stack = append(lx.stack, state)
105
}
106

107
func (lx *lexer) pop() stateFn {
108
	if len(lx.stack) == 0 {
109
		return lx.errorf("BUG in lexer: no states to pop")
110
	}
111
	last := lx.stack[len(lx.stack)-1]
112
	lx.stack = lx.stack[0 : len(lx.stack)-1]
113
	return last
114
}
115

116
func (lx *lexer) current() string {
117
	return lx.input[lx.start:lx.pos]
118
}
119

120
func (lx lexer) getPos() Position {
121
	p := Position{
122
		Line:  lx.line,
123
		Start: lx.start,
124
		Len:   lx.pos - lx.start,
125
	}
126
	if p.Len <= 0 {
127
		p.Len = 1
128
	}
129
	return p
130
}
131

132
func (lx *lexer) emit(typ itemType) {
133
	// Needed for multiline strings ending with an incomplete UTF-8 sequence.
134
	if lx.start > lx.pos {
135
		lx.error(errLexUTF8{lx.input[lx.pos]})
136
		return
137
	}
138
	lx.items <- item{typ: typ, pos: lx.getPos(), val: lx.current()}
139
	lx.start = lx.pos
140
}
141

142
func (lx *lexer) emitTrim(typ itemType) {
143
	lx.items <- item{typ: typ, pos: lx.getPos(), val: strings.TrimSpace(lx.current())}
144
	lx.start = lx.pos
145
}
146

147
func (lx *lexer) next() (r rune) {
148
	if lx.atEOF {
149
		panic("BUG in lexer: next called after EOF")
150
	}
151
	if lx.pos >= len(lx.input) {
152
		lx.atEOF = true
153
		return eof
154
	}
155

156
	if lx.input[lx.pos] == '\n' {
157
		lx.line++
158
	}
159
	lx.prevWidths[3] = lx.prevWidths[2]
160
	lx.prevWidths[2] = lx.prevWidths[1]
161
	lx.prevWidths[1] = lx.prevWidths[0]
162
	if lx.nprev < 4 {
163
		lx.nprev++
164
	}
165

166
	r, w := utf8.DecodeRuneInString(lx.input[lx.pos:])
167
	if r == utf8.RuneError {
168
		lx.error(errLexUTF8{lx.input[lx.pos]})
169
		return utf8.RuneError
170
	}
171

172
	// Note: don't use peek() here, as this calls next().
173
	if isControl(r) || (r == '\r' && (len(lx.input)-1 == lx.pos || lx.input[lx.pos+1] != '\n')) {
174
		lx.errorControlChar(r)
175
		return utf8.RuneError
176
	}
177

178
	lx.prevWidths[0] = w
179
	lx.pos += w
180
	return r
181
}
182

183
// ignore skips over the pending input before this point.
184
func (lx *lexer) ignore() {
185
	lx.start = lx.pos
186
}
187

188
// backup steps back one rune. Can be called 4 times between calls to next.
189
func (lx *lexer) backup() {
190
	if lx.atEOF {
191
		lx.atEOF = false
192
		return
193
	}
194
	if lx.nprev < 1 {
195
		panic("BUG in lexer: backed up too far")
196
	}
197
	w := lx.prevWidths[0]
198
	lx.prevWidths[0] = lx.prevWidths[1]
199
	lx.prevWidths[1] = lx.prevWidths[2]
200
	lx.prevWidths[2] = lx.prevWidths[3]
201
	lx.nprev--
202

203
	lx.pos -= w
204
	if lx.pos < len(lx.input) && lx.input[lx.pos] == '\n' {
205
		lx.line--
206
	}
207
}
208

209
// accept consumes the next rune if it's equal to `valid`.
210
func (lx *lexer) accept(valid rune) bool {
211
	if lx.next() == valid {
212
		return true
213
	}
214
	lx.backup()
215
	return false
216
}
217

218
// peek returns but does not consume the next rune in the input.
219
func (lx *lexer) peek() rune {
220
	r := lx.next()
221
	lx.backup()
222
	return r
223
}
224

225
// skip ignores all input that matches the given predicate.
226
func (lx *lexer) skip(pred func(rune) bool) {
227
	for {
228
		r := lx.next()
229
		if pred(r) {
230
			continue
231
		}
232
		lx.backup()
233
		lx.ignore()
234
		return
235
	}
236
}
237

238
// error stops all lexing by emitting an error and returning `nil`.
239
//
240
// Note that any value that is a character is escaped if it's a special
241
// character (newlines, tabs, etc.).
242
func (lx *lexer) error(err error) stateFn {
243
	if lx.atEOF {
244
		return lx.errorPrevLine(err)
245
	}
246
	lx.items <- item{typ: itemError, pos: lx.getPos(), err: err}
247
	return nil
248
}
249

250
// errorfPrevline is like error(), but sets the position to the last column of
251
// the previous line.
252
//
253
// This is so that unexpected EOF or NL errors don't show on a new blank line.
254
func (lx *lexer) errorPrevLine(err error) stateFn {
255
	pos := lx.getPos()
256
	pos.Line--
257
	pos.Len = 1
258
	pos.Start = lx.pos - 1
259
	lx.items <- item{typ: itemError, pos: pos, err: err}
260
	return nil
261
}
262

263
// errorPos is like error(), but allows explicitly setting the position.
264
func (lx *lexer) errorPos(start, length int, err error) stateFn {
265
	pos := lx.getPos()
266
	pos.Start = start
267
	pos.Len = length
268
	lx.items <- item{typ: itemError, pos: pos, err: err}
269
	return nil
270
}
271

272
// errorf is like error, and creates a new error.
273
func (lx *lexer) errorf(format string, values ...interface{}) stateFn {
274
	if lx.atEOF {
275
		pos := lx.getPos()
276
		pos.Line--
277
		pos.Len = 1
278
		pos.Start = lx.pos - 1
279
		lx.items <- item{typ: itemError, pos: pos, err: fmt.Errorf(format, values...)}
280
		return nil
281
	}
282
	lx.items <- item{typ: itemError, pos: lx.getPos(), err: fmt.Errorf(format, values...)}
283
	return nil
284
}
285

286
func (lx *lexer) errorControlChar(cc rune) stateFn {
287
	return lx.errorPos(lx.pos-1, 1, errLexControl{cc})
288
}
289

290
// lexTop consumes elements at the top level of TOML data.
291
func lexTop(lx *lexer) stateFn {
292
	r := lx.next()
293
	if isWhitespace(r) || isNL(r) {
294
		return lexSkip(lx, lexTop)
295
	}
296
	switch r {
297
	case '#':
298
		lx.push(lexTop)
299
		return lexCommentStart
300
	case '[':
301
		return lexTableStart
302
	case eof:
303
		if lx.pos > lx.start {
304
			return lx.errorf("unexpected EOF")
305
		}
306
		lx.emit(itemEOF)
307
		return nil
308
	}
309

310
	// At this point, the only valid item can be a key, so we back up
311
	// and let the key lexer do the rest.
312
	lx.backup()
313
	lx.push(lexTopEnd)
314
	return lexKeyStart
315
}
316

317
// lexTopEnd is entered whenever a top-level item has been consumed. (A value
318
// or a table.) It must see only whitespace, and will turn back to lexTop
319
// upon a newline. If it sees EOF, it will quit the lexer successfully.
320
func lexTopEnd(lx *lexer) stateFn {
321
	r := lx.next()
322
	switch {
323
	case r == '#':
324
		// a comment will read to a newline for us.
325
		lx.push(lexTop)
326
		return lexCommentStart
327
	case isWhitespace(r):
328
		return lexTopEnd
329
	case isNL(r):
330
		lx.ignore()
331
		return lexTop
332
	case r == eof:
333
		lx.emit(itemEOF)
334
		return nil
335
	}
336
	return lx.errorf(
337
		"expected a top-level item to end with a newline, comment, or EOF, but got %q instead",
338
		r)
339
}
340

341
// lexTable lexes the beginning of a table. Namely, it makes sure that
342
// it starts with a character other than '.' and ']'.
343
// It assumes that '[' has already been consumed.
344
// It also handles the case that this is an item in an array of tables.
345
// e.g., '[[name]]'.
346
func lexTableStart(lx *lexer) stateFn {
347
	if lx.peek() == '[' {
348
		lx.next()
349
		lx.emit(itemArrayTableStart)
350
		lx.push(lexArrayTableEnd)
351
	} else {
352
		lx.emit(itemTableStart)
353
		lx.push(lexTableEnd)
354
	}
355
	return lexTableNameStart
356
}
357

358
func lexTableEnd(lx *lexer) stateFn {
359
	lx.emit(itemTableEnd)
360
	return lexTopEnd
361
}
362

363
func lexArrayTableEnd(lx *lexer) stateFn {
364
	if r := lx.next(); r != ']' {
365
		return lx.errorf("expected end of table array name delimiter ']', but got %q instead", r)
366
	}
367
	lx.emit(itemArrayTableEnd)
368
	return lexTopEnd
369
}
370

371
func lexTableNameStart(lx *lexer) stateFn {
372
	lx.skip(isWhitespace)
373
	switch r := lx.peek(); {
374
	case r == ']' || r == eof:
375
		return lx.errorf("unexpected end of table name (table names cannot be empty)")
376
	case r == '.':
377
		return lx.errorf("unexpected table separator (table names cannot be empty)")
378
	case r == '"' || r == '\'':
379
		lx.ignore()
380
		lx.push(lexTableNameEnd)
381
		return lexQuotedName
382
	default:
383
		lx.push(lexTableNameEnd)
384
		return lexBareName
385
	}
386
}
387

388
// lexTableNameEnd reads the end of a piece of a table name, optionally
389
// consuming whitespace.
390
func lexTableNameEnd(lx *lexer) stateFn {
391
	lx.skip(isWhitespace)
392
	switch r := lx.next(); {
393
	case isWhitespace(r):
394
		return lexTableNameEnd
395
	case r == '.':
396
		lx.ignore()
397
		return lexTableNameStart
398
	case r == ']':
399
		return lx.pop()
400
	default:
401
		return lx.errorf("expected '.' or ']' to end table name, but got %q instead", r)
402
	}
403
}
404

405
// lexBareName lexes one part of a key or table.
406
//
407
// It assumes that at least one valid character for the table has already been
408
// read.
409
//
410
// Lexes only one part, e.g. only 'a' inside 'a.b'.
411
func lexBareName(lx *lexer) stateFn {
412
	r := lx.next()
413
	if isBareKeyChar(r, lx.tomlNext) {
414
		return lexBareName
415
	}
416
	lx.backup()
417
	lx.emit(itemText)
418
	return lx.pop()
419
}
420

421
// lexBareName lexes one part of a key or table.
422
//
423
// It assumes that at least one valid character for the table has already been
424
// read.
425
//
426
// Lexes only one part, e.g. only '"a"' inside '"a".b'.
427
func lexQuotedName(lx *lexer) stateFn {
428
	r := lx.next()
429
	switch {
430
	case isWhitespace(r):
431
		return lexSkip(lx, lexValue)
432
	case r == '"':
433
		lx.ignore() // ignore the '"'
434
		return lexString
435
	case r == '\'':
436
		lx.ignore() // ignore the "'"
437
		return lexRawString
438
	case r == eof:
439
		return lx.errorf("unexpected EOF; expected value")
440
	default:
441
		return lx.errorf("expected value but found %q instead", r)
442
	}
443
}
444

445
// lexKeyStart consumes all key parts until a '='.
446
func lexKeyStart(lx *lexer) stateFn {
447
	lx.skip(isWhitespace)
448
	switch r := lx.peek(); {
449
	case r == '=' || r == eof:
450
		return lx.errorf("unexpected '=': key name appears blank")
451
	case r == '.':
452
		return lx.errorf("unexpected '.': keys cannot start with a '.'")
453
	case r == '"' || r == '\'':
454
		lx.ignore()
455
		fallthrough
456
	default: // Bare key
457
		lx.emit(itemKeyStart)
458
		return lexKeyNameStart
459
	}
460
}
461

462
func lexKeyNameStart(lx *lexer) stateFn {
463
	lx.skip(isWhitespace)
464
	switch r := lx.peek(); {
465
	case r == '=' || r == eof:
466
		return lx.errorf("unexpected '='")
467
	case r == '.':
468
		return lx.errorf("unexpected '.'")
469
	case r == '"' || r == '\'':
470
		lx.ignore()
471
		lx.push(lexKeyEnd)
472
		return lexQuotedName
473
	default:
474
		lx.push(lexKeyEnd)
475
		return lexBareName
476
	}
477
}
478

479
// lexKeyEnd consumes the end of a key and trims whitespace (up to the key
480
// separator).
481
func lexKeyEnd(lx *lexer) stateFn {
482
	lx.skip(isWhitespace)
483
	switch r := lx.next(); {
484
	case isWhitespace(r):
485
		return lexSkip(lx, lexKeyEnd)
486
	case r == eof:
487
		return lx.errorf("unexpected EOF; expected key separator '='")
488
	case r == '.':
489
		lx.ignore()
490
		return lexKeyNameStart
491
	case r == '=':
492
		lx.emit(itemKeyEnd)
493
		return lexSkip(lx, lexValue)
494
	default:
495
		return lx.errorf("expected '.' or '=', but got %q instead", r)
496
	}
497
}
498

499
// lexValue starts the consumption of a value anywhere a value is expected.
500
// lexValue will ignore whitespace.
501
// After a value is lexed, the last state on the next is popped and returned.
502
func lexValue(lx *lexer) stateFn {
503
	// We allow whitespace to precede a value, but NOT newlines.
504
	// In array syntax, the array states are responsible for ignoring newlines.
505
	r := lx.next()
506
	switch {
507
	case isWhitespace(r):
508
		return lexSkip(lx, lexValue)
509
	case isDigit(r):
510
		lx.backup() // avoid an extra state and use the same as above
511
		return lexNumberOrDateStart
512
	}
513
	switch r {
514
	case '[':
515
		lx.ignore()
516
		lx.emit(itemArray)
517
		return lexArrayValue
518
	case '{':
519
		lx.ignore()
520
		lx.emit(itemInlineTableStart)
521
		return lexInlineTableValue
522
	case '"':
523
		if lx.accept('"') {
524
			if lx.accept('"') {
525
				lx.ignore() // Ignore """
526
				return lexMultilineString
527
			}
528
			lx.backup()
529
		}
530
		lx.ignore() // ignore the '"'
531
		return lexString
532
	case '\'':
533
		if lx.accept('\'') {
534
			if lx.accept('\'') {
535
				lx.ignore() // Ignore """
536
				return lexMultilineRawString
537
			}
538
			lx.backup()
539
		}
540
		lx.ignore() // ignore the "'"
541
		return lexRawString
542
	case '.': // special error case, be kind to users
543
		return lx.errorf("floats must start with a digit, not '.'")
544
	case 'i', 'n':
545
		if (lx.accept('n') && lx.accept('f')) || (lx.accept('a') && lx.accept('n')) {
546
			lx.emit(itemFloat)
547
			return lx.pop()
548
		}
549
	case '-', '+':
550
		return lexDecimalNumberStart
551
	}
552
	if unicode.IsLetter(r) {
553
		// Be permissive here; lexBool will give a nice error if the
554
		// user wrote something like
555
		//   x = foo
556
		// (i.e. not 'true' or 'false' but is something else word-like.)
557
		lx.backup()
558
		return lexBool
559
	}
560
	if r == eof {
561
		return lx.errorf("unexpected EOF; expected value")
562
	}
563
	return lx.errorf("expected value but found %q instead", r)
564
}
565

566
// lexArrayValue consumes one value in an array. It assumes that '[' or ','
567
// have already been consumed. All whitespace and newlines are ignored.
568
func lexArrayValue(lx *lexer) stateFn {
569
	r := lx.next()
570
	switch {
571
	case isWhitespace(r) || isNL(r):
572
		return lexSkip(lx, lexArrayValue)
573
	case r == '#':
574
		lx.push(lexArrayValue)
575
		return lexCommentStart
576
	case r == ',':
577
		return lx.errorf("unexpected comma")
578
	case r == ']':
579
		return lexArrayEnd
580
	}
581

582
	lx.backup()
583
	lx.push(lexArrayValueEnd)
584
	return lexValue
585
}
586

587
// lexArrayValueEnd consumes everything between the end of an array value and
588
// the next value (or the end of the array): it ignores whitespace and newlines
589
// and expects either a ',' or a ']'.
590
func lexArrayValueEnd(lx *lexer) stateFn {
591
	switch r := lx.next(); {
592
	case isWhitespace(r) || isNL(r):
593
		return lexSkip(lx, lexArrayValueEnd)
594
	case r == '#':
595
		lx.push(lexArrayValueEnd)
596
		return lexCommentStart
597
	case r == ',':
598
		lx.ignore()
599
		return lexArrayValue // move on to the next value
600
	case r == ']':
601
		return lexArrayEnd
602
	default:
603
		return lx.errorf("expected a comma (',') or array terminator (']'), but got %s", runeOrEOF(r))
604
	}
605
}
606

607
// lexArrayEnd finishes the lexing of an array.
608
// It assumes that a ']' has just been consumed.
609
func lexArrayEnd(lx *lexer) stateFn {
610
	lx.ignore()
611
	lx.emit(itemArrayEnd)
612
	return lx.pop()
613
}
614

615
// lexInlineTableValue consumes one key/value pair in an inline table.
616
// It assumes that '{' or ',' have already been consumed. Whitespace is ignored.
617
func lexInlineTableValue(lx *lexer) stateFn {
618
	r := lx.next()
619
	switch {
620
	case isWhitespace(r):
621
		return lexSkip(lx, lexInlineTableValue)
622
	case isNL(r):
623
		if lx.tomlNext {
624
			return lexSkip(lx, lexInlineTableValue)
625
		}
626
		return lx.errorPrevLine(errLexInlineTableNL{})
627
	case r == '#':
628
		lx.push(lexInlineTableValue)
629
		return lexCommentStart
630
	case r == ',':
631
		return lx.errorf("unexpected comma")
632
	case r == '}':
633
		return lexInlineTableEnd
634
	}
635
	lx.backup()
636
	lx.push(lexInlineTableValueEnd)
637
	return lexKeyStart
638
}
639

640
// lexInlineTableValueEnd consumes everything between the end of an inline table
641
// key/value pair and the next pair (or the end of the table):
642
// it ignores whitespace and expects either a ',' or a '}'.
643
func lexInlineTableValueEnd(lx *lexer) stateFn {
644
	switch r := lx.next(); {
645
	case isWhitespace(r):
646
		return lexSkip(lx, lexInlineTableValueEnd)
647
	case isNL(r):
648
		if lx.tomlNext {
649
			return lexSkip(lx, lexInlineTableValueEnd)
650
		}
651
		return lx.errorPrevLine(errLexInlineTableNL{})
652
	case r == '#':
653
		lx.push(lexInlineTableValueEnd)
654
		return lexCommentStart
655
	case r == ',':
656
		lx.ignore()
657
		lx.skip(isWhitespace)
658
		if lx.peek() == '}' {
659
			if lx.tomlNext {
660
				return lexInlineTableValueEnd
661
			}
662
			return lx.errorf("trailing comma not allowed in inline tables")
663
		}
664
		return lexInlineTableValue
665
	case r == '}':
666
		return lexInlineTableEnd
667
	default:
668
		return lx.errorf("expected a comma or an inline table terminator '}', but got %s instead", runeOrEOF(r))
669
	}
670
}
671

672
func runeOrEOF(r rune) string {
673
	if r == eof {
674
		return "end of file"
675
	}
676
	return "'" + string(r) + "'"
677
}
678

679
// lexInlineTableEnd finishes the lexing of an inline table.
680
// It assumes that a '}' has just been consumed.
681
func lexInlineTableEnd(lx *lexer) stateFn {
682
	lx.ignore()
683
	lx.emit(itemInlineTableEnd)
684
	return lx.pop()
685
}
686

687
// lexString consumes the inner contents of a string. It assumes that the
688
// beginning '"' has already been consumed and ignored.
689
func lexString(lx *lexer) stateFn {
690
	r := lx.next()
691
	switch {
692
	case r == eof:
693
		return lx.errorf(`unexpected EOF; expected '"'`)
694
	case isNL(r):
695
		return lx.errorPrevLine(errLexStringNL{})
696
	case r == '\\':
697
		lx.push(lexString)
698
		return lexStringEscape
699
	case r == '"':
700
		lx.backup()
701
		lx.emit(itemString)
702
		lx.next()
703
		lx.ignore()
704
		return lx.pop()
705
	}
706
	return lexString
707
}
708

709
// lexMultilineString consumes the inner contents of a string. It assumes that
710
// the beginning '"""' has already been consumed and ignored.
711
func lexMultilineString(lx *lexer) stateFn {
712
	r := lx.next()
713
	switch r {
714
	default:
715
		return lexMultilineString
716
	case eof:
717
		return lx.errorf(`unexpected EOF; expected '"""'`)
718
	case '\\':
719
		return lexMultilineStringEscape
720
	case '"':
721
		/// Found " → try to read two more "".
722
		if lx.accept('"') {
723
			if lx.accept('"') {
724
				/// Peek ahead: the string can contain " and "", including at the
725
				/// end: """str"""""
726
				/// 6 or more at the end, however, is an error.
727
				if lx.peek() == '"' {
728
					/// Check if we already lexed 5 's; if so we have 6 now, and
729
					/// that's just too many man!
730
					///
731
					/// Second check is for the edge case:
732
					///
733
					///            two quotes allowed.
734
					///            vv
735
					///   """lol \""""""
736
					///          ^^  ^^^---- closing three
737
					///     escaped
738
					///
739
					/// But ugly, but it works
740
					if strings.HasSuffix(lx.current(), `"""""`) && !strings.HasSuffix(lx.current(), `\"""""`) {
741
						return lx.errorf(`unexpected '""""""'`)
742
					}
743
					lx.backup()
744
					lx.backup()
745
					return lexMultilineString
746
				}
747

748
				lx.backup() /// backup: don't include the """ in the item.
749
				lx.backup()
750
				lx.backup()
751
				lx.emit(itemMultilineString)
752
				lx.next() /// Read over ''' again and discard it.
753
				lx.next()
754
				lx.next()
755
				lx.ignore()
756
				return lx.pop()
757
			}
758
			lx.backup()
759
		}
760
		return lexMultilineString
761
	}
762
}
763

764
// lexRawString consumes a raw string. Nothing can be escaped in such a string.
765
// It assumes that the beginning "'" has already been consumed and ignored.
766
func lexRawString(lx *lexer) stateFn {
767
	r := lx.next()
768
	switch {
769
	default:
770
		return lexRawString
771
	case r == eof:
772
		return lx.errorf(`unexpected EOF; expected "'"`)
773
	case isNL(r):
774
		return lx.errorPrevLine(errLexStringNL{})
775
	case r == '\'':
776
		lx.backup()
777
		lx.emit(itemRawString)
778
		lx.next()
779
		lx.ignore()
780
		return lx.pop()
781
	}
782
}
783

784
// lexMultilineRawString consumes a raw string. Nothing can be escaped in such a
785
// string. It assumes that the beginning triple-' has already been consumed and
786
// ignored.
787
func lexMultilineRawString(lx *lexer) stateFn {
788
	r := lx.next()
789
	switch r {
790
	default:
791
		return lexMultilineRawString
792
	case eof:
793
		return lx.errorf(`unexpected EOF; expected "'''"`)
794
	case '\'':
795
		/// Found ' → try to read two more ''.
796
		if lx.accept('\'') {
797
			if lx.accept('\'') {
798
				/// Peek ahead: the string can contain ' and '', including at the
799
				/// end: '''str'''''
800
				/// 6 or more at the end, however, is an error.
801
				if lx.peek() == '\'' {
802
					/// Check if we already lexed 5 's; if so we have 6 now, and
803
					/// that's just too many man!
804
					if strings.HasSuffix(lx.current(), "'''''") {
805
						return lx.errorf(`unexpected "''''''"`)
806
					}
807
					lx.backup()
808
					lx.backup()
809
					return lexMultilineRawString
810
				}
811

812
				lx.backup() /// backup: don't include the ''' in the item.
813
				lx.backup()
814
				lx.backup()
815
				lx.emit(itemRawMultilineString)
816
				lx.next() /// Read over ''' again and discard it.
817
				lx.next()
818
				lx.next()
819
				lx.ignore()
820
				return lx.pop()
821
			}
822
			lx.backup()
823
		}
824
		return lexMultilineRawString
825
	}
826
}
827

828
// lexMultilineStringEscape consumes an escaped character. It assumes that the
829
// preceding '\\' has already been consumed.
830
func lexMultilineStringEscape(lx *lexer) stateFn {
831
	if isNL(lx.next()) { /// \ escaping newline.
832
		return lexMultilineString
833
	}
834
	lx.backup()
835
	lx.push(lexMultilineString)
836
	return lexStringEscape(lx)
837
}
838

839
func lexStringEscape(lx *lexer) stateFn {
840
	r := lx.next()
841
	switch r {
842
	case 'e':
843
		if !lx.tomlNext {
844
			return lx.error(errLexEscape{r})
845
		}
846
		fallthrough
847
	case 'b':
848
		fallthrough
849
	case 't':
850
		fallthrough
851
	case 'n':
852
		fallthrough
853
	case 'f':
854
		fallthrough
855
	case 'r':
856
		fallthrough
857
	case '"':
858
		fallthrough
859
	case ' ', '\t':
860
		// Inside """ .. """ strings you can use \ to escape newlines, and any
861
		// amount of whitespace can be between the \ and \n.
862
		fallthrough
863
	case '\\':
864
		return lx.pop()
865
	case 'x':
866
		if !lx.tomlNext {
867
			return lx.error(errLexEscape{r})
868
		}
869
		return lexHexEscape
870
	case 'u':
871
		return lexShortUnicodeEscape
872
	case 'U':
873
		return lexLongUnicodeEscape
874
	}
875
	return lx.error(errLexEscape{r})
876
}
877

878
func lexHexEscape(lx *lexer) stateFn {
879
	var r rune
880
	for i := 0; i < 2; i++ {
881
		r = lx.next()
882
		if !isHexadecimal(r) {
883
			return lx.errorf(
884
				`expected two hexadecimal digits after '\x', but got %q instead`,
885
				lx.current())
886
		}
887
	}
888
	return lx.pop()
889
}
890

891
func lexShortUnicodeEscape(lx *lexer) stateFn {
892
	var r rune
893
	for i := 0; i < 4; i++ {
894
		r = lx.next()
895
		if !isHexadecimal(r) {
896
			return lx.errorf(
897
				`expected four hexadecimal digits after '\u', but got %q instead`,
898
				lx.current())
899
		}
900
	}
901
	return lx.pop()
902
}
903

904
func lexLongUnicodeEscape(lx *lexer) stateFn {
905
	var r rune
906
	for i := 0; i < 8; i++ {
907
		r = lx.next()
908
		if !isHexadecimal(r) {
909
			return lx.errorf(
910
				`expected eight hexadecimal digits after '\U', but got %q instead`,
911
				lx.current())
912
		}
913
	}
914
	return lx.pop()
915
}
916

917
// lexNumberOrDateStart processes the first character of a value which begins
918
// with a digit. It exists to catch values starting with '0', so that
919
// lexBaseNumberOrDate can differentiate base prefixed integers from other
920
// types.
921
func lexNumberOrDateStart(lx *lexer) stateFn {
922
	r := lx.next()
923
	switch r {
924
	case '0':
925
		return lexBaseNumberOrDate
926
	}
927

928
	if !isDigit(r) {
929
		// The only way to reach this state is if the value starts
930
		// with a digit, so specifically treat anything else as an
931
		// error.
932
		return lx.errorf("expected a digit but got %q", r)
933
	}
934

935
	return lexNumberOrDate
936
}
937

938
// lexNumberOrDate consumes either an integer, float or datetime.
939
func lexNumberOrDate(lx *lexer) stateFn {
940
	r := lx.next()
941
	if isDigit(r) {
942
		return lexNumberOrDate
943
	}
944
	switch r {
945
	case '-', ':':
946
		return lexDatetime
947
	case '_':
948
		return lexDecimalNumber
949
	case '.', 'e', 'E':
950
		return lexFloat
951
	}
952

953
	lx.backup()
954
	lx.emit(itemInteger)
955
	return lx.pop()
956
}
957

958
// lexDatetime consumes a Datetime, to a first approximation.
959
// The parser validates that it matches one of the accepted formats.
960
func lexDatetime(lx *lexer) stateFn {
961
	r := lx.next()
962
	if isDigit(r) {
963
		return lexDatetime
964
	}
965
	switch r {
966
	case '-', ':', 'T', 't', ' ', '.', 'Z', 'z', '+':
967
		return lexDatetime
968
	}
969

970
	lx.backup()
971
	lx.emitTrim(itemDatetime)
972
	return lx.pop()
973
}
974

975
// lexHexInteger consumes a hexadecimal integer after seeing the '0x' prefix.
976
func lexHexInteger(lx *lexer) stateFn {
977
	r := lx.next()
978
	if isHexadecimal(r) {
979
		return lexHexInteger
980
	}
981
	switch r {
982
	case '_':
983
		return lexHexInteger
984
	}
985

986
	lx.backup()
987
	lx.emit(itemInteger)
988
	return lx.pop()
989
}
990

991
// lexOctalInteger consumes an octal integer after seeing the '0o' prefix.
992
func lexOctalInteger(lx *lexer) stateFn {
993
	r := lx.next()
994
	if isOctal(r) {
995
		return lexOctalInteger
996
	}
997
	switch r {
998
	case '_':
999
		return lexOctalInteger
1000
	}
1001

1002
	lx.backup()
1003
	lx.emit(itemInteger)
1004
	return lx.pop()
1005
}
1006

1007
// lexBinaryInteger consumes a binary integer after seeing the '0b' prefix.
1008
func lexBinaryInteger(lx *lexer) stateFn {
1009
	r := lx.next()
1010
	if isBinary(r) {
1011
		return lexBinaryInteger
1012
	}
1013
	switch r {
1014
	case '_':
1015
		return lexBinaryInteger
1016
	}
1017

1018
	lx.backup()
1019
	lx.emit(itemInteger)
1020
	return lx.pop()
1021
}
1022

1023
// lexDecimalNumber consumes a decimal float or integer.
1024
func lexDecimalNumber(lx *lexer) stateFn {
1025
	r := lx.next()
1026
	if isDigit(r) {
1027
		return lexDecimalNumber
1028
	}
1029
	switch r {
1030
	case '.', 'e', 'E':
1031
		return lexFloat
1032
	case '_':
1033
		return lexDecimalNumber
1034
	}
1035

1036
	lx.backup()
1037
	lx.emit(itemInteger)
1038
	return lx.pop()
1039
}
1040

1041
// lexDecimalNumber consumes the first digit of a number beginning with a sign.
1042
// It assumes the sign has already been consumed. Values which start with a sign
1043
// are only allowed to be decimal integers or floats.
1044
//
1045
// The special "nan" and "inf" values are also recognized.
1046
func lexDecimalNumberStart(lx *lexer) stateFn {
1047
	r := lx.next()
1048

1049
	// Special error cases to give users better error messages
1050
	switch r {
1051
	case 'i':
1052
		if !lx.accept('n') || !lx.accept('f') {
1053
			return lx.errorf("invalid float: '%s'", lx.current())
1054
		}
1055
		lx.emit(itemFloat)
1056
		return lx.pop()
1057
	case 'n':
1058
		if !lx.accept('a') || !lx.accept('n') {
1059
			return lx.errorf("invalid float: '%s'", lx.current())
1060
		}
1061
		lx.emit(itemFloat)
1062
		return lx.pop()
1063
	case '0':
1064
		p := lx.peek()
1065
		switch p {
1066
		case 'b', 'o', 'x':
1067
			return lx.errorf("cannot use sign with non-decimal numbers: '%s%c'", lx.current(), p)
1068
		}
1069
	case '.':
1070
		return lx.errorf("floats must start with a digit, not '.'")
1071
	}
1072

1073
	if isDigit(r) {
1074
		return lexDecimalNumber
1075
	}
1076

1077
	return lx.errorf("expected a digit but got %q", r)
1078
}
1079

1080
// lexBaseNumberOrDate differentiates between the possible values which
1081
// start with '0'. It assumes that before reaching this state, the initial '0'
1082
// has been consumed.
1083
func lexBaseNumberOrDate(lx *lexer) stateFn {
1084
	r := lx.next()
1085
	// Note: All datetimes start with at least two digits, so we don't
1086
	// handle date characters (':', '-', etc.) here.
1087
	if isDigit(r) {
1088
		return lexNumberOrDate
1089
	}
1090
	switch r {
1091
	case '_':
1092
		// Can only be decimal, because there can't be an underscore
1093
		// between the '0' and the base designator, and dates can't
1094
		// contain underscores.
1095
		return lexDecimalNumber
1096
	case '.', 'e', 'E':
1097
		return lexFloat
1098
	case 'b':
1099
		r = lx.peek()
1100
		if !isBinary(r) {
1101
			lx.errorf("not a binary number: '%s%c'", lx.current(), r)
1102
		}
1103
		return lexBinaryInteger
1104
	case 'o':
1105
		r = lx.peek()
1106
		if !isOctal(r) {
1107
			lx.errorf("not an octal number: '%s%c'", lx.current(), r)
1108
		}
1109
		return lexOctalInteger
1110
	case 'x':
1111
		r = lx.peek()
1112
		if !isHexadecimal(r) {
1113
			lx.errorf("not a hexidecimal number: '%s%c'", lx.current(), r)
1114
		}
1115
		return lexHexInteger
1116
	}
1117

1118
	lx.backup()
1119
	lx.emit(itemInteger)
1120
	return lx.pop()
1121
}
1122

1123
// lexFloat consumes the elements of a float. It allows any sequence of
1124
// float-like characters, so floats emitted by the lexer are only a first
1125
// approximation and must be validated by the parser.
1126
func lexFloat(lx *lexer) stateFn {
1127
	r := lx.next()
1128
	if isDigit(r) {
1129
		return lexFloat
1130
	}
1131
	switch r {
1132
	case '_', '.', '-', '+', 'e', 'E':
1133
		return lexFloat
1134
	}
1135

1136
	lx.backup()
1137
	lx.emit(itemFloat)
1138
	return lx.pop()
1139
}
1140

1141
// lexBool consumes a bool string: 'true' or 'false.
1142
func lexBool(lx *lexer) stateFn {
1143
	var rs []rune
1144
	for {
1145
		r := lx.next()
1146
		if !unicode.IsLetter(r) {
1147
			lx.backup()
1148
			break
1149
		}
1150
		rs = append(rs, r)
1151
	}
1152
	s := string(rs)
1153
	switch s {
1154
	case "true", "false":
1155
		lx.emit(itemBool)
1156
		return lx.pop()
1157
	}
1158
	return lx.errorf("expected value but found %q instead", s)
1159
}
1160

1161
// lexCommentStart begins the lexing of a comment. It will emit
1162
// itemCommentStart and consume no characters, passing control to lexComment.
1163
func lexCommentStart(lx *lexer) stateFn {
1164
	lx.ignore()
1165
	lx.emit(itemCommentStart)
1166
	return lexComment
1167
}
1168

1169
// lexComment lexes an entire comment. It assumes that '#' has been consumed.
1170
// It will consume *up to* the first newline character, and pass control
1171
// back to the last state on the stack.
1172
func lexComment(lx *lexer) stateFn {
1173
	switch r := lx.next(); {
1174
	case isNL(r) || r == eof:
1175
		lx.backup()
1176
		lx.emit(itemText)
1177
		return lx.pop()
1178
	default:
1179
		return lexComment
1180
	}
1181
}
1182

1183
// lexSkip ignores all slurped input and moves on to the next state.
1184
func lexSkip(lx *lexer, nextState stateFn) stateFn {
1185
	lx.ignore()
1186
	return nextState
1187
}
1188

1189
func (s stateFn) String() string {
1190
	name := runtime.FuncForPC(reflect.ValueOf(s).Pointer()).Name()
1191
	if i := strings.LastIndexByte(name, '.'); i > -1 {
1192
		name = name[i+1:]
1193
	}
1194
	if s == nil {
1195
		name = "<nil>"
1196
	}
1197
	return name + "()"
1198
}
1199

1200
func (itype itemType) String() string {
1201
	switch itype {
1202
	case itemError:
1203
		return "Error"
1204
	case itemNIL:
1205
		return "NIL"
1206
	case itemEOF:
1207
		return "EOF"
1208
	case itemText:
1209
		return "Text"
1210
	case itemString, itemRawString, itemMultilineString, itemRawMultilineString:
1211
		return "String"
1212
	case itemBool:
1213
		return "Bool"
1214
	case itemInteger:
1215
		return "Integer"
1216
	case itemFloat:
1217
		return "Float"
1218
	case itemDatetime:
1219
		return "DateTime"
1220
	case itemTableStart:
1221
		return "TableStart"
1222
	case itemTableEnd:
1223
		return "TableEnd"
1224
	case itemKeyStart:
1225
		return "KeyStart"
1226
	case itemKeyEnd:
1227
		return "KeyEnd"
1228
	case itemArray:
1229
		return "Array"
1230
	case itemArrayEnd:
1231
		return "ArrayEnd"
1232
	case itemCommentStart:
1233
		return "CommentStart"
1234
	case itemInlineTableStart:
1235
		return "InlineTableStart"
1236
	case itemInlineTableEnd:
1237
		return "InlineTableEnd"
1238
	}
1239
	panic(fmt.Sprintf("BUG: Unknown type '%d'.", int(itype)))
1240
}
1241

1242
func (item item) String() string {
1243
	return fmt.Sprintf("(%s, %s)", item.typ.String(), item.val)
1244
}
1245

1246
func isWhitespace(r rune) bool { return r == '\t' || r == ' ' }
1247
func isNL(r rune) bool         { return r == '\n' || r == '\r' }
1248
func isControl(r rune) bool { // Control characters except \t, \r, \n
1249
	switch r {
1250
	case '\t', '\r', '\n':
1251
		return false
1252
	default:
1253
		return (r >= 0x00 && r <= 0x1f) || r == 0x7f
1254
	}
1255
}
1256
func isDigit(r rune) bool  { return r >= '0' && r <= '9' }
1257
func isBinary(r rune) bool { return r == '0' || r == '1' }
1258
func isOctal(r rune) bool  { return r >= '0' && r <= '7' }
1259
func isHexadecimal(r rune) bool {
1260
	return (r >= '0' && r <= '9') || (r >= 'a' && r <= 'f') || (r >= 'A' && r <= 'F')
1261
}
1262

1263
func isBareKeyChar(r rune, tomlNext bool) bool {
1264
	if tomlNext {
1265
		return (r >= 'A' && r <= 'Z') ||
1266
			(r >= 'a' && r <= 'z') ||
1267
			(r >= '0' && r <= '9') ||
1268
			r == '_' || r == '-' ||
1269
			r == 0xb2 || r == 0xb3 || r == 0xb9 || (r >= 0xbc && r <= 0xbe) ||
1270
			(r >= 0xc0 && r <= 0xd6) || (r >= 0xd8 && r <= 0xf6) || (r >= 0xf8 && r <= 0x037d) ||
1271
			(r >= 0x037f && r <= 0x1fff) ||
1272
			(r >= 0x200c && r <= 0x200d) || (r >= 0x203f && r <= 0x2040) ||
1273
			(r >= 0x2070 && r <= 0x218f) || (r >= 0x2460 && r <= 0x24ff) ||
1274
			(r >= 0x2c00 && r <= 0x2fef) || (r >= 0x3001 && r <= 0xd7ff) ||
1275
			(r >= 0xf900 && r <= 0xfdcf) || (r >= 0xfdf0 && r <= 0xfffd) ||
1276
			(r >= 0x10000 && r <= 0xeffff)
1277
	}
1278

1279
	return (r >= 'A' && r <= 'Z') ||
1280
		(r >= 'a' && r <= 'z') ||
1281
		(r >= '0' && r <= '9') ||
1282
		r == '_' || r == '-'
1283
}
1284

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.