okhttp
267 строк · 6.8 Кб
1/*
2* Copyright (C) 2023 Square, Inc.
3*
4* Licensed under the Apache License, Version 2.0 (the "License");
5* you may not use this file except in compliance with the License.
6* You may obtain a copy of the License at
7*
8* http://www.apache.org/licenses/LICENSE-2.0
9*
10* Unless required by applicable law or agreed to in writing, software
11* distributed under the License is distributed on an "AS IS" BASIS,
12* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13* See the License for the specific language governing permissions and
14* limitations under the License.
15*/
16package okhttp3.internal.idn
17
18import java.io.IOException
19import okio.Buffer
20import okio.BufferedSink
21import okio.BufferedSource
22import okio.ByteString
23import okio.ByteString.Companion.encodeUtf8
24import okio.Options
25
26/**
27* A decoded [mapping table] that can perform the [mapping step] of IDNA processing.
28*
29* This implementation is optimized for readability over efficiency.
30*
31* This implements non-transitional processing by preserving deviation characters.
32*
33* This implementation's STD3 rules are configured to `UseSTD3ASCIIRules=false`. This is
34* permissive and permits the `_` character.
35*
36* [mapping table]: https://www.unicode.org/reports/tr46/#IDNA_Mapping_Table
37* [mapping step]: https://www.unicode.org/reports/tr46/#ProcessingStepMap
38*/
39class SimpleIdnaMappingTable internal constructor(
40internal val mappings: List<Mapping>,
41) {
42/**
43* Returns true if the [codePoint] was applied successfully. Returns false if it was disallowed.
44*/
45fun map(
46codePoint: Int,
47sink: BufferedSink,
48): Boolean {
49val index =
50mappings.binarySearch {
51when {
52it.sourceCodePoint1 < codePoint -> -1
53it.sourceCodePoint0 > codePoint -> 1
54else -> 0
55}
56}
57
58// Code points must be in 0..0x10ffff.
59require(index in mappings.indices) { "unexpected code point: $codePoint" }
60
61val mapping = mappings[index]
62var result = true
63
64when (mapping.type) {
65TYPE_IGNORED -> Unit
66TYPE_MAPPED, TYPE_DISALLOWED_STD3_MAPPED -> {
67sink.write(mapping.mappedTo)
68}
69
70TYPE_DEVIATION, TYPE_DISALLOWED_STD3_VALID, TYPE_VALID -> {
71sink.writeUtf8CodePoint(codePoint)
72}
73
74TYPE_DISALLOWED -> {
75sink.writeUtf8CodePoint(codePoint)
76result = false
77}
78}
79
80return result
81}
82}
83
84private val optionsDelimiter =
85Options.of(
86// 0.
87".".encodeUtf8(),
88// 1.
89" ".encodeUtf8(),
90// 2.
91";".encodeUtf8(),
92// 3.
93"#".encodeUtf8(),
94// 4.
95"\n".encodeUtf8(),
96)
97
98private val optionsDot =
99Options.of(
100// 0.
101".".encodeUtf8(),
102)
103
104private const val DELIMITER_DOT = 0
105private const val DELIMITER_SPACE = 1
106private const val DELIMITER_SEMICOLON = 2
107private const val DELIMITER_HASH = 3
108private const val DELIMITER_NEWLINE = 4
109
110private val optionsType =
111Options.of(
112// 0.
113"deviation ".encodeUtf8(),
114// 1.
115"disallowed ".encodeUtf8(),
116// 2.
117"disallowed_STD3_mapped ".encodeUtf8(),
118// 3.
119"disallowed_STD3_valid ".encodeUtf8(),
120// 4.
121"ignored ".encodeUtf8(),
122// 5.
123"mapped ".encodeUtf8(),
124// 6.
125"valid ".encodeUtf8(),
126)
127
128internal const val TYPE_DEVIATION = 0
129internal const val TYPE_DISALLOWED = 1
130internal const val TYPE_DISALLOWED_STD3_MAPPED = 2
131internal const val TYPE_DISALLOWED_STD3_VALID = 3
132internal const val TYPE_IGNORED = 4
133internal const val TYPE_MAPPED = 5
134internal const val TYPE_VALID = 6
135
136private fun BufferedSource.skipWhitespace() {
137while (!exhausted()) {
138if (buffer[0] != ' '.code.toByte()) return
139skip(1L)
140}
141}
142
143private fun BufferedSource.skipRestOfLine() {
144when (val newline = indexOf('\n'.code.toByte())) {
145-1L -> skip(buffer.size) // Exhaust this source.
146else -> skip(newline + 1)
147}
148}
149
150/**
151* Reads lines from `IdnaMappingTable.txt`.
152*
153* Comment lines are either blank or start with a `#` character. Lines may also end with a comment.
154* All comments are ignored.
155*
156* Regular lines contain fields separated by semicolons.
157*
158* The first element on each line is a single hex code point (like 0041) or a hex code point range
159* (like 0030..0039).
160*
161* The second element on each line is a mapping type, like `valid` or `mapped`.
162*
163* For lines that contain a mapping target, the next thing is a sequence of hex code points (like
164* 0031 2044 0034).
165*
166* All other data is ignored.
167*/
168fun BufferedSource.readPlainTextIdnaMappingTable(): SimpleIdnaMappingTable {
169val mappedTo = Buffer()
170val result = mutableListOf<Mapping>()
171
172while (!exhausted()) {
173// Skip comment and empty lines.
174when (select(optionsDelimiter)) {
175DELIMITER_HASH -> {
176skipRestOfLine()
177continue
178}
179
180DELIMITER_NEWLINE -> {
181continue
182}
183
184DELIMITER_DOT, DELIMITER_SPACE, DELIMITER_SEMICOLON -> {
185throw IOException("unexpected delimiter")
186}
187}
188
189// "002F" or "0000..002C"
190val sourceCodePoint0 = readHexadecimalUnsignedLong()
191val sourceCodePoint1 =
192when (select(optionsDot)) {
193DELIMITER_DOT -> {
194if (readByte() != '.'.code.toByte()) throw IOException("expected '..'")
195readHexadecimalUnsignedLong()
196}
197
198else -> sourceCodePoint0
199}
200
201skipWhitespace()
202if (readByte() != ';'.code.toByte()) throw IOException("expected ';'")
203
204// "valid" or "mapped"
205skipWhitespace()
206val type = select(optionsType)
207
208when (type) {
209TYPE_DEVIATION, TYPE_MAPPED, TYPE_DISALLOWED_STD3_MAPPED -> {
210skipWhitespace()
211if (readByte() != ';'.code.toByte()) throw IOException("expected ';'")
212
213// Like "0061" or "0031 2044 0034".
214while (true) {
215skipWhitespace()
216
217when (select(optionsDelimiter)) {
218DELIMITER_HASH -> {
219break
220}
221
222DELIMITER_DOT, DELIMITER_SEMICOLON, DELIMITER_NEWLINE -> {
223throw IOException("unexpected delimiter")
224}
225}
226
227mappedTo.writeUtf8CodePoint(readHexadecimalUnsignedLong().toInt())
228}
229}
230
231TYPE_DISALLOWED, TYPE_DISALLOWED_STD3_VALID, TYPE_IGNORED, TYPE_VALID -> Unit
232
233else -> throw IOException("unexpected type")
234}
235
236skipRestOfLine()
237
238result +=
239Mapping(
240sourceCodePoint0.toInt(),
241sourceCodePoint1.toInt(),
242type,
243mappedTo.readByteString(),
244)
245}
246
247return SimpleIdnaMappingTable(result)
248}
249
250internal data class Mapping(
251val sourceCodePoint0: Int,
252val sourceCodePoint1: Int,
253val type: Int,
254val mappedTo: ByteString,
255) {
256val section: Int
257get() = sourceCodePoint0 and 0x1fff80
258
259val rangeStart: Int
260get() = sourceCodePoint0 and 0x7f
261
262val hasSingleSourceCodePoint: Boolean
263get() = sourceCodePoint0 == sourceCodePoint1
264
265val spansSections: Boolean
266get() = (sourceCodePoint0 and 0x1fff80) != (sourceCodePoint1 and 0x1fff80)
267}
268