podman
215 строк · 4.7 Кб
1package jsoniter
2
3import (
4"fmt"
5"unicode/utf16"
6)
7
8// ReadString read string from iterator
9func (iter *Iterator) ReadString() (ret string) {
10c := iter.nextToken()
11if c == '"' {
12for i := iter.head; i < iter.tail; i++ {
13c := iter.buf[i]
14if c == '"' {
15ret = string(iter.buf[iter.head:i])
16iter.head = i + 1
17return ret
18} else if c == '\\' {
19break
20} else if c < ' ' {
21iter.ReportError("ReadString",
22fmt.Sprintf(`invalid control character found: %d`, c))
23return
24}
25}
26return iter.readStringSlowPath()
27} else if c == 'n' {
28iter.skipThreeBytes('u', 'l', 'l')
29return ""
30}
31iter.ReportError("ReadString", `expects " or n, but found `+string([]byte{c}))
32return
33}
34
35func (iter *Iterator) readStringSlowPath() (ret string) {
36var str []byte
37var c byte
38for iter.Error == nil {
39c = iter.readByte()
40if c == '"' {
41return string(str)
42}
43if c == '\\' {
44c = iter.readByte()
45str = iter.readEscapedChar(c, str)
46} else {
47str = append(str, c)
48}
49}
50iter.ReportError("readStringSlowPath", "unexpected end of input")
51return
52}
53
54func (iter *Iterator) readEscapedChar(c byte, str []byte) []byte {
55switch c {
56case 'u':
57r := iter.readU4()
58if utf16.IsSurrogate(r) {
59c = iter.readByte()
60if iter.Error != nil {
61return nil
62}
63if c != '\\' {
64iter.unreadByte()
65str = appendRune(str, r)
66return str
67}
68c = iter.readByte()
69if iter.Error != nil {
70return nil
71}
72if c != 'u' {
73str = appendRune(str, r)
74return iter.readEscapedChar(c, str)
75}
76r2 := iter.readU4()
77if iter.Error != nil {
78return nil
79}
80combined := utf16.DecodeRune(r, r2)
81if combined == '\uFFFD' {
82str = appendRune(str, r)
83str = appendRune(str, r2)
84} else {
85str = appendRune(str, combined)
86}
87} else {
88str = appendRune(str, r)
89}
90case '"':
91str = append(str, '"')
92case '\\':
93str = append(str, '\\')
94case '/':
95str = append(str, '/')
96case 'b':
97str = append(str, '\b')
98case 'f':
99str = append(str, '\f')
100case 'n':
101str = append(str, '\n')
102case 'r':
103str = append(str, '\r')
104case 't':
105str = append(str, '\t')
106default:
107iter.ReportError("readEscapedChar",
108`invalid escape char after \`)
109return nil
110}
111return str
112}
113
114// ReadStringAsSlice read string from iterator without copying into string form.
115// The []byte can not be kept, as it will change after next iterator call.
116func (iter *Iterator) ReadStringAsSlice() (ret []byte) {
117c := iter.nextToken()
118if c == '"' {
119for i := iter.head; i < iter.tail; i++ {
120// require ascii string and no escape
121// for: field name, base64, number
122if iter.buf[i] == '"' {
123// fast path: reuse the underlying buffer
124ret = iter.buf[iter.head:i]
125iter.head = i + 1
126return ret
127}
128}
129readLen := iter.tail - iter.head
130copied := make([]byte, readLen, readLen*2)
131copy(copied, iter.buf[iter.head:iter.tail])
132iter.head = iter.tail
133for iter.Error == nil {
134c := iter.readByte()
135if c == '"' {
136return copied
137}
138copied = append(copied, c)
139}
140return copied
141}
142iter.ReportError("ReadStringAsSlice", `expects " or n, but found `+string([]byte{c}))
143return
144}
145
146func (iter *Iterator) readU4() (ret rune) {
147for i := 0; i < 4; i++ {
148c := iter.readByte()
149if iter.Error != nil {
150return
151}
152if c >= '0' && c <= '9' {
153ret = ret*16 + rune(c-'0')
154} else if c >= 'a' && c <= 'f' {
155ret = ret*16 + rune(c-'a'+10)
156} else if c >= 'A' && c <= 'F' {
157ret = ret*16 + rune(c-'A'+10)
158} else {
159iter.ReportError("readU4", "expects 0~9 or a~f, but found "+string([]byte{c}))
160return
161}
162}
163return ret
164}
165
166const (
167t1 = 0x00 // 0000 0000
168tx = 0x80 // 1000 0000
169t2 = 0xC0 // 1100 0000
170t3 = 0xE0 // 1110 0000
171t4 = 0xF0 // 1111 0000
172t5 = 0xF8 // 1111 1000
173
174maskx = 0x3F // 0011 1111
175mask2 = 0x1F // 0001 1111
176mask3 = 0x0F // 0000 1111
177mask4 = 0x07 // 0000 0111
178
179rune1Max = 1<<7 - 1
180rune2Max = 1<<11 - 1
181rune3Max = 1<<16 - 1
182
183surrogateMin = 0xD800
184surrogateMax = 0xDFFF
185
186maxRune = '\U0010FFFF' // Maximum valid Unicode code point.
187runeError = '\uFFFD' // the "error" Rune or "Unicode replacement character"
188)
189
190func appendRune(p []byte, r rune) []byte {
191// Negative values are erroneous. Making it unsigned addresses the problem.
192switch i := uint32(r); {
193case i <= rune1Max:
194p = append(p, byte(r))
195return p
196case i <= rune2Max:
197p = append(p, t2|byte(r>>6))
198p = append(p, tx|byte(r)&maskx)
199return p
200case i > maxRune, surrogateMin <= i && i <= surrogateMax:
201r = runeError
202fallthrough
203case i <= rune3Max:
204p = append(p, t3|byte(r>>12))
205p = append(p, tx|byte(r>>6)&maskx)
206p = append(p, tx|byte(r)&maskx)
207return p
208default:
209p = append(p, t4|byte(r>>18))
210p = append(p, tx|byte(r>>12)&maskx)
211p = append(p, tx|byte(r>>6)&maskx)
212p = append(p, tx|byte(r)&maskx)
213return p
214}
215}
216