podman

Форк
0
1239 строк · 26.3 Кб
1
package unstable
2

3
import (
4
	"bytes"
5
	"fmt"
6
	"unicode"
7

8
	"github.com/pelletier/go-toml/v2/internal/characters"
9
	"github.com/pelletier/go-toml/v2/internal/danger"
10
)
11

12
// ParserError describes an error relative to the content of the document.
13
//
14
// It cannot outlive the instance of Parser it refers to, and may cause panics
15
// if the parser is reset.
16
type ParserError struct {
17
	Highlight []byte
18
	Message   string
19
	Key       []string // optional
20
}
21

22
// Error is the implementation of the error interface.
23
func (e *ParserError) Error() string {
24
	return e.Message
25
}
26

27
// NewParserError is a convenience function to create a ParserError
28
//
29
// Warning: Highlight needs to be a subslice of Parser.data, so only slices
30
// returned by Parser.Raw are valid candidates.
31
func NewParserError(highlight []byte, format string, args ...interface{}) error {
32
	return &ParserError{
33
		Highlight: highlight,
34
		Message:   fmt.Errorf(format, args...).Error(),
35
	}
36
}
37

38
// Parser scans over a TOML-encoded document and generates an iterative AST.
39
//
40
// To prime the Parser, first reset it with the contents of a TOML document.
41
// Then, process all top-level expressions sequentially. See Example.
42
//
43
// Don't forget to check Error() after you're done parsing.
44
//
45
// Each top-level expression needs to be fully processed before calling
46
// NextExpression() again. Otherwise, calls to various Node methods may panic if
47
// the parser has moved on the next expression.
48
//
49
// For performance reasons, go-toml doesn't make a copy of the input bytes to
50
// the parser. Make sure to copy all the bytes you need to outlive the slice
51
// given to the parser.
52
type Parser struct {
53
	data    []byte
54
	builder builder
55
	ref     reference
56
	left    []byte
57
	err     error
58
	first   bool
59

60
	KeepComments bool
61
}
62

63
// Data returns the slice provided to the last call to Reset.
64
func (p *Parser) Data() []byte {
65
	return p.data
66
}
67

68
// Range returns a range description that corresponds to a given slice of the
69
// input. If the argument is not a subslice of the parser input, this function
70
// panics.
71
func (p *Parser) Range(b []byte) Range {
72
	return Range{
73
		Offset: uint32(danger.SubsliceOffset(p.data, b)),
74
		Length: uint32(len(b)),
75
	}
76
}
77

78
// Raw returns the slice corresponding to the bytes in the given range.
79
func (p *Parser) Raw(raw Range) []byte {
80
	return p.data[raw.Offset : raw.Offset+raw.Length]
81
}
82

83
// Reset brings the parser to its initial state for a given input. It wipes an
84
// reuses internal storage to reduce allocation.
85
func (p *Parser) Reset(b []byte) {
86
	p.builder.Reset()
87
	p.ref = invalidReference
88
	p.data = b
89
	p.left = b
90
	p.err = nil
91
	p.first = true
92
}
93

94
// NextExpression parses the next top-level expression. If an expression was
95
// successfully parsed, it returns true. If the parser is at the end of the
96
// document or an error occurred, it returns false.
97
//
98
// Retrieve the parsed expression with Expression().
99
func (p *Parser) NextExpression() bool {
100
	if len(p.left) == 0 || p.err != nil {
101
		return false
102
	}
103

104
	p.builder.Reset()
105
	p.ref = invalidReference
106

107
	for {
108
		if len(p.left) == 0 || p.err != nil {
109
			return false
110
		}
111

112
		if !p.first {
113
			p.left, p.err = p.parseNewline(p.left)
114
		}
115

116
		if len(p.left) == 0 || p.err != nil {
117
			return false
118
		}
119

120
		p.ref, p.left, p.err = p.parseExpression(p.left)
121

122
		if p.err != nil {
123
			return false
124
		}
125

126
		p.first = false
127

128
		if p.ref.Valid() {
129
			return true
130
		}
131
	}
132
}
133

134
// Expression returns a pointer to the node representing the last successfully
135
// parsed expression.
136
func (p *Parser) Expression() *Node {
137
	return p.builder.NodeAt(p.ref)
138
}
139

140
// Error returns any error that has occurred during parsing.
141
func (p *Parser) Error() error {
142
	return p.err
143
}
144

145
// Position describes a position in the input.
146
type Position struct {
147
	// Number of bytes from the beginning of the input.
148
	Offset int
149
	// Line number, starting at 1.
150
	Line int
151
	// Column number, starting at 1.
152
	Column int
153
}
154

155
// Shape describes the position of a range in the input.
156
type Shape struct {
157
	Start Position
158
	End   Position
159
}
160

161
func (p *Parser) position(b []byte) Position {
162
	offset := danger.SubsliceOffset(p.data, b)
163

164
	lead := p.data[:offset]
165

166
	return Position{
167
		Offset: offset,
168
		Line:   bytes.Count(lead, []byte{'\n'}) + 1,
169
		Column: len(lead) - bytes.LastIndex(lead, []byte{'\n'}),
170
	}
171
}
172

173
// Shape returns the shape of the given range in the input.  Will
174
// panic if the range is not a subslice of the input.
175
func (p *Parser) Shape(r Range) Shape {
176
	raw := p.Raw(r)
177
	return Shape{
178
		Start: p.position(raw),
179
		End:   p.position(raw[r.Length:]),
180
	}
181
}
182

183
func (p *Parser) parseNewline(b []byte) ([]byte, error) {
184
	if b[0] == '\n' {
185
		return b[1:], nil
186
	}
187

188
	if b[0] == '\r' {
189
		_, rest, err := scanWindowsNewline(b)
190
		return rest, err
191
	}
192

193
	return nil, NewParserError(b[0:1], "expected newline but got %#U", b[0])
194
}
195

196
func (p *Parser) parseComment(b []byte) (reference, []byte, error) {
197
	ref := invalidReference
198
	data, rest, err := scanComment(b)
199
	if p.KeepComments && err == nil {
200
		ref = p.builder.Push(Node{
201
			Kind: Comment,
202
			Raw:  p.Range(data),
203
			Data: data,
204
		})
205
	}
206
	return ref, rest, err
207
}
208

209
func (p *Parser) parseExpression(b []byte) (reference, []byte, error) {
210
	// expression =  ws [ comment ]
211
	// expression =/ ws keyval ws [ comment ]
212
	// expression =/ ws table ws [ comment ]
213
	ref := invalidReference
214

215
	b = p.parseWhitespace(b)
216

217
	if len(b) == 0 {
218
		return ref, b, nil
219
	}
220

221
	if b[0] == '#' {
222
		ref, rest, err := p.parseComment(b)
223
		return ref, rest, err
224
	}
225

226
	if b[0] == '\n' || b[0] == '\r' {
227
		return ref, b, nil
228
	}
229

230
	var err error
231
	if b[0] == '[' {
232
		ref, b, err = p.parseTable(b)
233
	} else {
234
		ref, b, err = p.parseKeyval(b)
235
	}
236

237
	if err != nil {
238
		return ref, nil, err
239
	}
240

241
	b = p.parseWhitespace(b)
242

243
	if len(b) > 0 && b[0] == '#' {
244
		cref, rest, err := p.parseComment(b)
245
		if cref != invalidReference {
246
			p.builder.Chain(ref, cref)
247
		}
248
		return ref, rest, err
249
	}
250

251
	return ref, b, nil
252
}
253

254
func (p *Parser) parseTable(b []byte) (reference, []byte, error) {
255
	// table = std-table / array-table
256
	if len(b) > 1 && b[1] == '[' {
257
		return p.parseArrayTable(b)
258
	}
259

260
	return p.parseStdTable(b)
261
}
262

263
func (p *Parser) parseArrayTable(b []byte) (reference, []byte, error) {
264
	// array-table = array-table-open key array-table-close
265
	// array-table-open  = %x5B.5B ws  ; [[ Double left square bracket
266
	// array-table-close = ws %x5D.5D  ; ]] Double right square bracket
267
	ref := p.builder.Push(Node{
268
		Kind: ArrayTable,
269
	})
270

271
	b = b[2:]
272
	b = p.parseWhitespace(b)
273

274
	k, b, err := p.parseKey(b)
275
	if err != nil {
276
		return ref, nil, err
277
	}
278

279
	p.builder.AttachChild(ref, k)
280
	b = p.parseWhitespace(b)
281

282
	b, err = expect(']', b)
283
	if err != nil {
284
		return ref, nil, err
285
	}
286

287
	b, err = expect(']', b)
288

289
	return ref, b, err
290
}
291

292
func (p *Parser) parseStdTable(b []byte) (reference, []byte, error) {
293
	// std-table = std-table-open key std-table-close
294
	// std-table-open  = %x5B ws     ; [ Left square bracket
295
	// std-table-close = ws %x5D     ; ] Right square bracket
296
	ref := p.builder.Push(Node{
297
		Kind: Table,
298
	})
299

300
	b = b[1:]
301
	b = p.parseWhitespace(b)
302

303
	key, b, err := p.parseKey(b)
304
	if err != nil {
305
		return ref, nil, err
306
	}
307

308
	p.builder.AttachChild(ref, key)
309

310
	b = p.parseWhitespace(b)
311

312
	b, err = expect(']', b)
313

314
	return ref, b, err
315
}
316

317
func (p *Parser) parseKeyval(b []byte) (reference, []byte, error) {
318
	// keyval = key keyval-sep val
319
	ref := p.builder.Push(Node{
320
		Kind: KeyValue,
321
	})
322

323
	key, b, err := p.parseKey(b)
324
	if err != nil {
325
		return invalidReference, nil, err
326
	}
327

328
	// keyval-sep = ws %x3D ws ; =
329

330
	b = p.parseWhitespace(b)
331

332
	if len(b) == 0 {
333
		return invalidReference, nil, NewParserError(b, "expected = after a key, but the document ends there")
334
	}
335

336
	b, err = expect('=', b)
337
	if err != nil {
338
		return invalidReference, nil, err
339
	}
340

341
	b = p.parseWhitespace(b)
342

343
	valRef, b, err := p.parseVal(b)
344
	if err != nil {
345
		return ref, b, err
346
	}
347

348
	p.builder.Chain(valRef, key)
349
	p.builder.AttachChild(ref, valRef)
350

351
	return ref, b, err
352
}
353

354
//nolint:cyclop,funlen
355
func (p *Parser) parseVal(b []byte) (reference, []byte, error) {
356
	// val = string / boolean / array / inline-table / date-time / float / integer
357
	ref := invalidReference
358

359
	if len(b) == 0 {
360
		return ref, nil, NewParserError(b, "expected value, not eof")
361
	}
362

363
	var err error
364
	c := b[0]
365

366
	switch c {
367
	case '"':
368
		var raw []byte
369
		var v []byte
370
		if scanFollowsMultilineBasicStringDelimiter(b) {
371
			raw, v, b, err = p.parseMultilineBasicString(b)
372
		} else {
373
			raw, v, b, err = p.parseBasicString(b)
374
		}
375

376
		if err == nil {
377
			ref = p.builder.Push(Node{
378
				Kind: String,
379
				Raw:  p.Range(raw),
380
				Data: v,
381
			})
382
		}
383

384
		return ref, b, err
385
	case '\'':
386
		var raw []byte
387
		var v []byte
388
		if scanFollowsMultilineLiteralStringDelimiter(b) {
389
			raw, v, b, err = p.parseMultilineLiteralString(b)
390
		} else {
391
			raw, v, b, err = p.parseLiteralString(b)
392
		}
393

394
		if err == nil {
395
			ref = p.builder.Push(Node{
396
				Kind: String,
397
				Raw:  p.Range(raw),
398
				Data: v,
399
			})
400
		}
401

402
		return ref, b, err
403
	case 't':
404
		if !scanFollowsTrue(b) {
405
			return ref, nil, NewParserError(atmost(b, 4), "expected 'true'")
406
		}
407

408
		ref = p.builder.Push(Node{
409
			Kind: Bool,
410
			Data: b[:4],
411
		})
412

413
		return ref, b[4:], nil
414
	case 'f':
415
		if !scanFollowsFalse(b) {
416
			return ref, nil, NewParserError(atmost(b, 5), "expected 'false'")
417
		}
418

419
		ref = p.builder.Push(Node{
420
			Kind: Bool,
421
			Data: b[:5],
422
		})
423

424
		return ref, b[5:], nil
425
	case '[':
426
		return p.parseValArray(b)
427
	case '{':
428
		return p.parseInlineTable(b)
429
	default:
430
		return p.parseIntOrFloatOrDateTime(b)
431
	}
432
}
433

434
func atmost(b []byte, n int) []byte {
435
	if n >= len(b) {
436
		return b
437
	}
438

439
	return b[:n]
440
}
441

442
func (p *Parser) parseLiteralString(b []byte) ([]byte, []byte, []byte, error) {
443
	v, rest, err := scanLiteralString(b)
444
	if err != nil {
445
		return nil, nil, nil, err
446
	}
447

448
	return v, v[1 : len(v)-1], rest, nil
449
}
450

451
func (p *Parser) parseInlineTable(b []byte) (reference, []byte, error) {
452
	// inline-table = inline-table-open [ inline-table-keyvals ] inline-table-close
453
	// inline-table-open  = %x7B ws     ; {
454
	// inline-table-close = ws %x7D     ; }
455
	// inline-table-sep   = ws %x2C ws  ; , Comma
456
	// inline-table-keyvals = keyval [ inline-table-sep inline-table-keyvals ]
457
	parent := p.builder.Push(Node{
458
		Kind: InlineTable,
459
		Raw:  p.Range(b[:1]),
460
	})
461

462
	first := true
463

464
	var child reference
465

466
	b = b[1:]
467

468
	var err error
469

470
	for len(b) > 0 {
471
		previousB := b
472
		b = p.parseWhitespace(b)
473

474
		if len(b) == 0 {
475
			return parent, nil, NewParserError(previousB[:1], "inline table is incomplete")
476
		}
477

478
		if b[0] == '}' {
479
			break
480
		}
481

482
		if !first {
483
			b, err = expect(',', b)
484
			if err != nil {
485
				return parent, nil, err
486
			}
487
			b = p.parseWhitespace(b)
488
		}
489

490
		var kv reference
491

492
		kv, b, err = p.parseKeyval(b)
493
		if err != nil {
494
			return parent, nil, err
495
		}
496

497
		if first {
498
			p.builder.AttachChild(parent, kv)
499
		} else {
500
			p.builder.Chain(child, kv)
501
		}
502
		child = kv
503

504
		first = false
505
	}
506

507
	rest, err := expect('}', b)
508

509
	return parent, rest, err
510
}
511

512
//nolint:funlen,cyclop
513
func (p *Parser) parseValArray(b []byte) (reference, []byte, error) {
514
	// array = array-open [ array-values ] ws-comment-newline array-close
515
	// array-open =  %x5B ; [
516
	// array-close = %x5D ; ]
517
	// array-values =  ws-comment-newline val ws-comment-newline array-sep array-values
518
	// array-values =/ ws-comment-newline val ws-comment-newline [ array-sep ]
519
	// array-sep = %x2C  ; , Comma
520
	// ws-comment-newline = *( wschar / [ comment ] newline )
521
	arrayStart := b
522
	b = b[1:]
523

524
	parent := p.builder.Push(Node{
525
		Kind: Array,
526
	})
527

528
	// First indicates whether the parser is looking for the first element
529
	// (non-comment) of the array.
530
	first := true
531

532
	lastChild := invalidReference
533

534
	addChild := func(valueRef reference) {
535
		if lastChild == invalidReference {
536
			p.builder.AttachChild(parent, valueRef)
537
		} else {
538
			p.builder.Chain(lastChild, valueRef)
539
		}
540
		lastChild = valueRef
541
	}
542

543
	var err error
544
	for len(b) > 0 {
545
		cref := invalidReference
546
		cref, b, err = p.parseOptionalWhitespaceCommentNewline(b)
547
		if err != nil {
548
			return parent, nil, err
549
		}
550

551
		if cref != invalidReference {
552
			addChild(cref)
553
		}
554

555
		if len(b) == 0 {
556
			return parent, nil, NewParserError(arrayStart[:1], "array is incomplete")
557
		}
558

559
		if b[0] == ']' {
560
			break
561
		}
562

563
		if b[0] == ',' {
564
			if first {
565
				return parent, nil, NewParserError(b[0:1], "array cannot start with comma")
566
			}
567
			b = b[1:]
568

569
			cref, b, err = p.parseOptionalWhitespaceCommentNewline(b)
570
			if err != nil {
571
				return parent, nil, err
572
			}
573
			if cref != invalidReference {
574
				addChild(cref)
575
			}
576
		} else if !first {
577
			return parent, nil, NewParserError(b[0:1], "array elements must be separated by commas")
578
		}
579

580
		// TOML allows trailing commas in arrays.
581
		if len(b) > 0 && b[0] == ']' {
582
			break
583
		}
584

585
		var valueRef reference
586
		valueRef, b, err = p.parseVal(b)
587
		if err != nil {
588
			return parent, nil, err
589
		}
590

591
		addChild(valueRef)
592

593
		cref, b, err = p.parseOptionalWhitespaceCommentNewline(b)
594
		if err != nil {
595
			return parent, nil, err
596
		}
597
		if cref != invalidReference {
598
			addChild(cref)
599
		}
600

601
		first = false
602
	}
603

604
	rest, err := expect(']', b)
605

606
	return parent, rest, err
607
}
608

609
func (p *Parser) parseOptionalWhitespaceCommentNewline(b []byte) (reference, []byte, error) {
610
	rootCommentRef := invalidReference
611
	latestCommentRef := invalidReference
612

613
	addComment := func(ref reference) {
614
		if rootCommentRef == invalidReference {
615
			rootCommentRef = ref
616
		} else if latestCommentRef == invalidReference {
617
			p.builder.AttachChild(rootCommentRef, ref)
618
			latestCommentRef = ref
619
		} else {
620
			p.builder.Chain(latestCommentRef, ref)
621
			latestCommentRef = ref
622
		}
623
	}
624

625
	for len(b) > 0 {
626
		var err error
627
		b = p.parseWhitespace(b)
628

629
		if len(b) > 0 && b[0] == '#' {
630
			var ref reference
631
			ref, b, err = p.parseComment(b)
632
			if err != nil {
633
				return invalidReference, nil, err
634
			}
635
			if ref != invalidReference {
636
				addComment(ref)
637
			}
638
		}
639

640
		if len(b) == 0 {
641
			break
642
		}
643

644
		if b[0] == '\n' || b[0] == '\r' {
645
			b, err = p.parseNewline(b)
646
			if err != nil {
647
				return invalidReference, nil, err
648
			}
649
		} else {
650
			break
651
		}
652
	}
653

654
	return rootCommentRef, b, nil
655
}
656

657
func (p *Parser) parseMultilineLiteralString(b []byte) ([]byte, []byte, []byte, error) {
658
	token, rest, err := scanMultilineLiteralString(b)
659
	if err != nil {
660
		return nil, nil, nil, err
661
	}
662

663
	i := 3
664

665
	// skip the immediate new line
666
	if token[i] == '\n' {
667
		i++
668
	} else if token[i] == '\r' && token[i+1] == '\n' {
669
		i += 2
670
	}
671

672
	return token, token[i : len(token)-3], rest, err
673
}
674

675
//nolint:funlen,gocognit,cyclop
676
func (p *Parser) parseMultilineBasicString(b []byte) ([]byte, []byte, []byte, error) {
677
	// ml-basic-string = ml-basic-string-delim [ newline ] ml-basic-body
678
	// ml-basic-string-delim
679
	// ml-basic-string-delim = 3quotation-mark
680
	// ml-basic-body = *mlb-content *( mlb-quotes 1*mlb-content ) [ mlb-quotes ]
681
	//
682
	// mlb-content = mlb-char / newline / mlb-escaped-nl
683
	// mlb-char = mlb-unescaped / escaped
684
	// mlb-quotes = 1*2quotation-mark
685
	// mlb-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii
686
	// mlb-escaped-nl = escape ws newline *( wschar / newline )
687
	token, escaped, rest, err := scanMultilineBasicString(b)
688
	if err != nil {
689
		return nil, nil, nil, err
690
	}
691

692
	i := 3
693

694
	// skip the immediate new line
695
	if token[i] == '\n' {
696
		i++
697
	} else if token[i] == '\r' && token[i+1] == '\n' {
698
		i += 2
699
	}
700

701
	// fast path
702
	startIdx := i
703
	endIdx := len(token) - len(`"""`)
704

705
	if !escaped {
706
		str := token[startIdx:endIdx]
707
		verr := characters.Utf8TomlValidAlreadyEscaped(str)
708
		if verr.Zero() {
709
			return token, str, rest, nil
710
		}
711
		return nil, nil, nil, NewParserError(str[verr.Index:verr.Index+verr.Size], "invalid UTF-8")
712
	}
713

714
	var builder bytes.Buffer
715

716
	// The scanner ensures that the token starts and ends with quotes and that
717
	// escapes are balanced.
718
	for i < len(token)-3 {
719
		c := token[i]
720

721
		//nolint:nestif
722
		if c == '\\' {
723
			// When the last non-whitespace character on a line is an unescaped \,
724
			// it will be trimmed along with all whitespace (including newlines) up
725
			// to the next non-whitespace character or closing delimiter.
726

727
			isLastNonWhitespaceOnLine := false
728
			j := 1
729
		findEOLLoop:
730
			for ; j < len(token)-3-i; j++ {
731
				switch token[i+j] {
732
				case ' ', '\t':
733
					continue
734
				case '\r':
735
					if token[i+j+1] == '\n' {
736
						continue
737
					}
738
				case '\n':
739
					isLastNonWhitespaceOnLine = true
740
				}
741
				break findEOLLoop
742
			}
743
			if isLastNonWhitespaceOnLine {
744
				i += j
745
				for ; i < len(token)-3; i++ {
746
					c := token[i]
747
					if !(c == '\n' || c == '\r' || c == ' ' || c == '\t') {
748
						i--
749
						break
750
					}
751
				}
752
				i++
753
				continue
754
			}
755

756
			// handle escaping
757
			i++
758
			c = token[i]
759

760
			switch c {
761
			case '"', '\\':
762
				builder.WriteByte(c)
763
			case 'b':
764
				builder.WriteByte('\b')
765
			case 'f':
766
				builder.WriteByte('\f')
767
			case 'n':
768
				builder.WriteByte('\n')
769
			case 'r':
770
				builder.WriteByte('\r')
771
			case 't':
772
				builder.WriteByte('\t')
773
			case 'e':
774
				builder.WriteByte(0x1B)
775
			case 'u':
776
				x, err := hexToRune(atmost(token[i+1:], 4), 4)
777
				if err != nil {
778
					return nil, nil, nil, err
779
				}
780
				builder.WriteRune(x)
781
				i += 4
782
			case 'U':
783
				x, err := hexToRune(atmost(token[i+1:], 8), 8)
784
				if err != nil {
785
					return nil, nil, nil, err
786
				}
787

788
				builder.WriteRune(x)
789
				i += 8
790
			default:
791
				return nil, nil, nil, NewParserError(token[i:i+1], "invalid escaped character %#U", c)
792
			}
793
			i++
794
		} else {
795
			size := characters.Utf8ValidNext(token[i:])
796
			if size == 0 {
797
				return nil, nil, nil, NewParserError(token[i:i+1], "invalid character %#U", c)
798
			}
799
			builder.Write(token[i : i+size])
800
			i += size
801
		}
802
	}
803

804
	return token, builder.Bytes(), rest, nil
805
}
806

807
func (p *Parser) parseKey(b []byte) (reference, []byte, error) {
808
	// key = simple-key / dotted-key
809
	// simple-key = quoted-key / unquoted-key
810
	//
811
	// unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _
812
	// quoted-key = basic-string / literal-string
813
	// dotted-key = simple-key 1*( dot-sep simple-key )
814
	//
815
	// dot-sep   = ws %x2E ws  ; . Period
816
	raw, key, b, err := p.parseSimpleKey(b)
817
	if err != nil {
818
		return invalidReference, nil, err
819
	}
820

821
	ref := p.builder.Push(Node{
822
		Kind: Key,
823
		Raw:  p.Range(raw),
824
		Data: key,
825
	})
826

827
	for {
828
		b = p.parseWhitespace(b)
829
		if len(b) > 0 && b[0] == '.' {
830
			b = p.parseWhitespace(b[1:])
831

832
			raw, key, b, err = p.parseSimpleKey(b)
833
			if err != nil {
834
				return ref, nil, err
835
			}
836

837
			p.builder.PushAndChain(Node{
838
				Kind: Key,
839
				Raw:  p.Range(raw),
840
				Data: key,
841
			})
842
		} else {
843
			break
844
		}
845
	}
846

847
	return ref, b, nil
848
}
849

850
func (p *Parser) parseSimpleKey(b []byte) (raw, key, rest []byte, err error) {
851
	if len(b) == 0 {
852
		return nil, nil, nil, NewParserError(b, "expected key but found none")
853
	}
854

855
	// simple-key = quoted-key / unquoted-key
856
	// unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _
857
	// quoted-key = basic-string / literal-string
858
	switch {
859
	case b[0] == '\'':
860
		return p.parseLiteralString(b)
861
	case b[0] == '"':
862
		return p.parseBasicString(b)
863
	case isUnquotedKeyChar(b[0]):
864
		key, rest = scanUnquotedKey(b)
865
		return key, key, rest, nil
866
	default:
867
		return nil, nil, nil, NewParserError(b[0:1], "invalid character at start of key: %c", b[0])
868
	}
869
}
870

871
//nolint:funlen,cyclop
872
func (p *Parser) parseBasicString(b []byte) ([]byte, []byte, []byte, error) {
873
	// basic-string = quotation-mark *basic-char quotation-mark
874
	// quotation-mark = %x22            ; "
875
	// basic-char = basic-unescaped / escaped
876
	// basic-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii
877
	// escaped = escape escape-seq-char
878
	// escape-seq-char =  %x22         ; "    quotation mark  U+0022
879
	// escape-seq-char =/ %x5C         ; \    reverse solidus U+005C
880
	// escape-seq-char =/ %x62         ; b    backspace       U+0008
881
	// escape-seq-char =/ %x66         ; f    form feed       U+000C
882
	// escape-seq-char =/ %x6E         ; n    line feed       U+000A
883
	// escape-seq-char =/ %x72         ; r    carriage return U+000D
884
	// escape-seq-char =/ %x74         ; t    tab             U+0009
885
	// escape-seq-char =/ %x75 4HEXDIG ; uXXXX                U+XXXX
886
	// escape-seq-char =/ %x55 8HEXDIG ; UXXXXXXXX            U+XXXXXXXX
887
	token, escaped, rest, err := scanBasicString(b)
888
	if err != nil {
889
		return nil, nil, nil, err
890
	}
891

892
	startIdx := len(`"`)
893
	endIdx := len(token) - len(`"`)
894

895
	// Fast path. If there is no escape sequence, the string should just be
896
	// an UTF-8 encoded string, which is the same as Go. In that case,
897
	// validate the string and return a direct reference to the buffer.
898
	if !escaped {
899
		str := token[startIdx:endIdx]
900
		verr := characters.Utf8TomlValidAlreadyEscaped(str)
901
		if verr.Zero() {
902
			return token, str, rest, nil
903
		}
904
		return nil, nil, nil, NewParserError(str[verr.Index:verr.Index+verr.Size], "invalid UTF-8")
905
	}
906

907
	i := startIdx
908

909
	var builder bytes.Buffer
910

911
	// The scanner ensures that the token starts and ends with quotes and that
912
	// escapes are balanced.
913
	for i < len(token)-1 {
914
		c := token[i]
915
		if c == '\\' {
916
			i++
917
			c = token[i]
918

919
			switch c {
920
			case '"', '\\':
921
				builder.WriteByte(c)
922
			case 'b':
923
				builder.WriteByte('\b')
924
			case 'f':
925
				builder.WriteByte('\f')
926
			case 'n':
927
				builder.WriteByte('\n')
928
			case 'r':
929
				builder.WriteByte('\r')
930
			case 't':
931
				builder.WriteByte('\t')
932
			case 'e':
933
				builder.WriteByte(0x1B)
934
			case 'u':
935
				x, err := hexToRune(token[i+1:len(token)-1], 4)
936
				if err != nil {
937
					return nil, nil, nil, err
938
				}
939

940
				builder.WriteRune(x)
941
				i += 4
942
			case 'U':
943
				x, err := hexToRune(token[i+1:len(token)-1], 8)
944
				if err != nil {
945
					return nil, nil, nil, err
946
				}
947

948
				builder.WriteRune(x)
949
				i += 8
950
			default:
951
				return nil, nil, nil, NewParserError(token[i:i+1], "invalid escaped character %#U", c)
952
			}
953
			i++
954
		} else {
955
			size := characters.Utf8ValidNext(token[i:])
956
			if size == 0 {
957
				return nil, nil, nil, NewParserError(token[i:i+1], "invalid character %#U", c)
958
			}
959
			builder.Write(token[i : i+size])
960
			i += size
961
		}
962
	}
963

964
	return token, builder.Bytes(), rest, nil
965
}
966

967
func hexToRune(b []byte, length int) (rune, error) {
968
	if len(b) < length {
969
		return -1, NewParserError(b, "unicode point needs %d character, not %d", length, len(b))
970
	}
971
	b = b[:length]
972

973
	var r uint32
974
	for i, c := range b {
975
		d := uint32(0)
976
		switch {
977
		case '0' <= c && c <= '9':
978
			d = uint32(c - '0')
979
		case 'a' <= c && c <= 'f':
980
			d = uint32(c - 'a' + 10)
981
		case 'A' <= c && c <= 'F':
982
			d = uint32(c - 'A' + 10)
983
		default:
984
			return -1, NewParserError(b[i:i+1], "non-hex character")
985
		}
986
		r = r*16 + d
987
	}
988

989
	if r > unicode.MaxRune || 0xD800 <= r && r < 0xE000 {
990
		return -1, NewParserError(b, "escape sequence is invalid Unicode code point")
991
	}
992

993
	return rune(r), nil
994
}
995

996
func (p *Parser) parseWhitespace(b []byte) []byte {
997
	// ws = *wschar
998
	// wschar =  %x20  ; Space
999
	// wschar =/ %x09  ; Horizontal tab
1000
	_, rest := scanWhitespace(b)
1001

1002
	return rest
1003
}
1004

1005
//nolint:cyclop
1006
func (p *Parser) parseIntOrFloatOrDateTime(b []byte) (reference, []byte, error) {
1007
	switch b[0] {
1008
	case 'i':
1009
		if !scanFollowsInf(b) {
1010
			return invalidReference, nil, NewParserError(atmost(b, 3), "expected 'inf'")
1011
		}
1012

1013
		return p.builder.Push(Node{
1014
			Kind: Float,
1015
			Data: b[:3],
1016
		}), b[3:], nil
1017
	case 'n':
1018
		if !scanFollowsNan(b) {
1019
			return invalidReference, nil, NewParserError(atmost(b, 3), "expected 'nan'")
1020
		}
1021

1022
		return p.builder.Push(Node{
1023
			Kind: Float,
1024
			Data: b[:3],
1025
		}), b[3:], nil
1026
	case '+', '-':
1027
		return p.scanIntOrFloat(b)
1028
	}
1029

1030
	if len(b) < 3 {
1031
		return p.scanIntOrFloat(b)
1032
	}
1033

1034
	s := 5
1035
	if len(b) < s {
1036
		s = len(b)
1037
	}
1038

1039
	for idx, c := range b[:s] {
1040
		if isDigit(c) {
1041
			continue
1042
		}
1043

1044
		if idx == 2 && c == ':' || (idx == 4 && c == '-') {
1045
			return p.scanDateTime(b)
1046
		}
1047

1048
		break
1049
	}
1050

1051
	return p.scanIntOrFloat(b)
1052
}
1053

1054
func (p *Parser) scanDateTime(b []byte) (reference, []byte, error) {
1055
	// scans for contiguous characters in [0-9T:Z.+-], and up to one space if
1056
	// followed by a digit.
1057
	hasDate := false
1058
	hasTime := false
1059
	hasTz := false
1060
	seenSpace := false
1061

1062
	i := 0
1063
byteLoop:
1064
	for ; i < len(b); i++ {
1065
		c := b[i]
1066

1067
		switch {
1068
		case isDigit(c):
1069
		case c == '-':
1070
			hasDate = true
1071
			const minOffsetOfTz = 8
1072
			if i >= minOffsetOfTz {
1073
				hasTz = true
1074
			}
1075
		case c == 'T' || c == 't' || c == ':' || c == '.':
1076
			hasTime = true
1077
		case c == '+' || c == '-' || c == 'Z' || c == 'z':
1078
			hasTz = true
1079
		case c == ' ':
1080
			if !seenSpace && i+1 < len(b) && isDigit(b[i+1]) {
1081
				i += 2
1082
				// Avoid reaching past the end of the document in case the time
1083
				// is malformed. See TestIssue585.
1084
				if i >= len(b) {
1085
					i--
1086
				}
1087
				seenSpace = true
1088
				hasTime = true
1089
			} else {
1090
				break byteLoop
1091
			}
1092
		default:
1093
			break byteLoop
1094
		}
1095
	}
1096

1097
	var kind Kind
1098

1099
	if hasTime {
1100
		if hasDate {
1101
			if hasTz {
1102
				kind = DateTime
1103
			} else {
1104
				kind = LocalDateTime
1105
			}
1106
		} else {
1107
			kind = LocalTime
1108
		}
1109
	} else {
1110
		kind = LocalDate
1111
	}
1112

1113
	return p.builder.Push(Node{
1114
		Kind: kind,
1115
		Data: b[:i],
1116
	}), b[i:], nil
1117
}
1118

1119
//nolint:funlen,gocognit,cyclop
1120
func (p *Parser) scanIntOrFloat(b []byte) (reference, []byte, error) {
1121
	i := 0
1122

1123
	if len(b) > 2 && b[0] == '0' && b[1] != '.' && b[1] != 'e' && b[1] != 'E' {
1124
		var isValidRune validRuneFn
1125

1126
		switch b[1] {
1127
		case 'x':
1128
			isValidRune = isValidHexRune
1129
		case 'o':
1130
			isValidRune = isValidOctalRune
1131
		case 'b':
1132
			isValidRune = isValidBinaryRune
1133
		default:
1134
			i++
1135
		}
1136

1137
		if isValidRune != nil {
1138
			i += 2
1139
			for ; i < len(b); i++ {
1140
				if !isValidRune(b[i]) {
1141
					break
1142
				}
1143
			}
1144
		}
1145

1146
		return p.builder.Push(Node{
1147
			Kind: Integer,
1148
			Data: b[:i],
1149
		}), b[i:], nil
1150
	}
1151

1152
	isFloat := false
1153

1154
	for ; i < len(b); i++ {
1155
		c := b[i]
1156

1157
		if c >= '0' && c <= '9' || c == '+' || c == '-' || c == '_' {
1158
			continue
1159
		}
1160

1161
		if c == '.' || c == 'e' || c == 'E' {
1162
			isFloat = true
1163

1164
			continue
1165
		}
1166

1167
		if c == 'i' {
1168
			if scanFollowsInf(b[i:]) {
1169
				return p.builder.Push(Node{
1170
					Kind: Float,
1171
					Data: b[:i+3],
1172
				}), b[i+3:], nil
1173
			}
1174

1175
			return invalidReference, nil, NewParserError(b[i:i+1], "unexpected character 'i' while scanning for a number")
1176
		}
1177

1178
		if c == 'n' {
1179
			if scanFollowsNan(b[i:]) {
1180
				return p.builder.Push(Node{
1181
					Kind: Float,
1182
					Data: b[:i+3],
1183
				}), b[i+3:], nil
1184
			}
1185

1186
			return invalidReference, nil, NewParserError(b[i:i+1], "unexpected character 'n' while scanning for a number")
1187
		}
1188

1189
		break
1190
	}
1191

1192
	if i == 0 {
1193
		return invalidReference, b, NewParserError(b, "incomplete number")
1194
	}
1195

1196
	kind := Integer
1197

1198
	if isFloat {
1199
		kind = Float
1200
	}
1201

1202
	return p.builder.Push(Node{
1203
		Kind: kind,
1204
		Data: b[:i],
1205
	}), b[i:], nil
1206
}
1207

1208
func isDigit(r byte) bool {
1209
	return r >= '0' && r <= '9'
1210
}
1211

1212
type validRuneFn func(r byte) bool
1213

1214
func isValidHexRune(r byte) bool {
1215
	return r >= 'a' && r <= 'f' ||
1216
		r >= 'A' && r <= 'F' ||
1217
		r >= '0' && r <= '9' ||
1218
		r == '_'
1219
}
1220

1221
func isValidOctalRune(r byte) bool {
1222
	return r >= '0' && r <= '7' || r == '_'
1223
}
1224

1225
func isValidBinaryRune(r byte) bool {
1226
	return r == '0' || r == '1' || r == '_'
1227
}
1228

1229
func expect(x byte, b []byte) ([]byte, error) {
1230
	if len(b) == 0 {
1231
		return nil, NewParserError(b, "expected character %c but the document ended here", x)
1232
	}
1233

1234
	if b[0] != x {
1235
		return nil, NewParserError(b[0:1], "expected character %c", x)
1236
	}
1237

1238
	return b[1:], nil
1239
}
1240

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.