idna

Форк
0
/
test_idna_uts46.py 
220 строк · 8.1 Кб
1
"""Tests for TR46 code."""
2

3
import os.path
4
import re
5
import unittest
6

7
import idna
8

9
_RE_UNICODE = re.compile("\\\\u([0-9a-fA-F]{4})")
10
_RE_SURROGATE = re.compile("[\ud800-\udbff][\udc00-\udfff]")
11
_SKIP_TESTS = [
12
    # These are strings that are illegal in IDNA 2008. Older versions of the UTS-46 test suite
13
    # had these denoted with the 'NV8' marker but this has been removed, so we need to manually
14
    # review exceptions and add them here to skip them as text vectors if they are invalid.
15
    "\U000102f7\u3002\u200d",
16
    "\U0001d7f5\u9681\u2bee\uff0e\u180d\u200c",
17
    "9\u9681\u2bee.\u180d\u200c",
18
    "\u00df\u200c\uaaf6\u18a5.\u22b6\u2d21\u2d16",
19
    "ss\u200c\uaaf6\u18a5.\u22b6\u2d21\u2d16",
20
    "\u00df\u200c\uaaf6\u18a5\uff0e\u22b6\u2d21\u2d16",
21
    "ss\u200c\uaaf6\u18a5\uff0e\u22b6\u2d21\u2d16",
22
    "\U00010a57\u200d\u3002\u2d09\u2d15",
23
    "\U00010a57\u200d\uff61\u2d09\u2d15",
24
    "\U0001d7cf\U0001da19\u2e16.\u200d",
25
    "1\U0001da19\u2e16.\u200d",
26
    "\U0001d7e04\U000e01d7\U0001d23b\uff0e\u200d\U000102f5\u26e7\u200d",
27
    "84\U000e01d7\U0001d23b.\u200d\U000102f5\u26e7\u200d",
28
    "\u00a1",
29
    "xn--7a",
30
    "\u19da",
31
    "xn--pkf",
32
    "\u2615",
33
    "xn--53h",
34
    "\U0001e937.\U00010b90\U0001e881\U00010e60\u0624",
35
    "\U0001e937.\U00010b90\U0001e881\U00010e60\u0648\u0654",
36
    "\U0001e915.\U00010b90\U0001e881\U00010e60\u0648\u0654",
37
    "\U0001e915.\U00010b90\U0001e881\U00010e60\u0624",
38
    "xn--ve6h.xn--jgb1694kz0b2176a",
39
    "\u00df\u3002\U000102f3\u2d0c\u0fb8",
40
    "ss\u3002\U000102f3\u2d0c\u0fb8",
41
    "ss.xn--lgd921mvv0m",
42
    "ss.\U000102f3\u2d0c\u0fb8",
43
    "xn--zca.xn--lgd921mvv0m",
44
    "\u00df.\U000102f3\u2d0c\u0fb8",
45
    "\u00df\uff61\U000102f3\u2d0c\u0fb8",
46
    "ss\uff61\U000102f3\u2d0c\u0fb8",
47
    "\u16ad\uff61\U0001d320\u00df\U00016af1",
48
    "\u16ad\u3002\U0001d320\u00df\U00016af1",
49
    "\u16ad\u3002\U0001d320SS\U00016af1",
50
    "\u16ad\u3002\U0001d320ss\U00016af1",
51
    "\u16ad\u3002\U0001d320Ss\U00016af1",
52
    "xn--hwe.xn--ss-ci1ub261a",
53
    "\u16ad.\U0001d320ss\U00016af1",
54
    "\u16ad.\U0001d320SS\U00016af1",
55
    "\u16ad.\U0001d320Ss\U00016af1",
56
    "xn--hwe.xn--zca4946pblnc",
57
    "\u16ad.\U0001d320\u00df\U00016af1",
58
    "\u16ad\uff61\U0001d320SS\U00016af1",
59
    "\u16ad\uff61\U0001d320ss\U00016af1",
60
    "\u16ad\uff61\U0001d320Ss\U00016af1",
61
    "\u2d1a\U000102f8\U000e0104\u30025\ud7f6\u103a",
62
    "xn--ilj2659d.xn--5-dug9054m",
63
    "\u2d1a\U000102f8.5\ud7f6\u103a",
64
    "\u2d1a\U000102f8\U000e0104\u3002\U0001d7dd\ud7f6\u103a",
65
    "xn--9-mfs8024b.",
66
    "9\u9681\u2bee.",
67
    "xn--ss-4epx629f.xn--ifh802b6a",
68
    "ss\uaaf6\u18a5.\u22b6\u2d21\u2d16",
69
    "xn--pt9c.xn--0kjya",
70
    "\U00010a57.\u2d09\u2d15",
71
    "\ua5f7\U00011180.\u075d\U00010a52",
72
    "xn--ju8a625r.xn--hpb0073k",
73
    "\u03c2.\u0641\u0645\u064a\U0001f79b1.",
74
    "\u03a3.\u0641\u0645\u064a\U0001f79b1.",
75
    "\u03c3.\u0641\u0645\u064a\U0001f79b1.",
76
    "xn--4xa.xn--1-gocmu97674d.",
77
    "xn--3xa.xn--1-gocmu97674d.",
78
    "xn--1-5bt6845n.",
79
    "1\U0001da19\u2e16.",
80
    "xn--84-s850a.xn--59h6326e",
81
    "84\U0001d23b.\U000102f5\u26e7",
82
    "xn--r97c.",
83
    "\U000102f7.",
84
    # These appear to be errors in the test vectors. All relate to incorrectly applying
85
    # bidi rules across label boundaries. Appears independently confirmed
86
    # at http://www.alvestrand.no/pipermail/idna-update/2017-January/007946.html
87
    "0\u00e0.\u05d0",
88
    "0a\u0300.\u05d0",
89
    "0A\u0300.\u05d0",
90
    "0\u00c0.\u05d0",
91
    "xn--0-sfa.xn--4db",
92
    "\u00e0\u02c7.\u05d0",
93
    "a\u0300\u02c7.\u05d0",
94
    "A\u0300\u02c7.\u05d0",
95
    "\u00c0\u02c7.\u05d0",
96
    "xn--0ca88g.xn--4db",
97
    "0A.\u05d0",
98
    "0a.\u05d0",
99
    "0a.xn--4db",
100
    "c.xn--0-eha.xn--4db",
101
    "c.0\u00fc.\u05d0",
102
    "c.0u\u0308.\u05d0",
103
    "C.0U\u0308.\u05d0",
104
    "C.0\u00dc.\u05d0",
105
    "C.0\u00fc.\u05d0",
106
    "C.0\u0075\u0308.\u05d0",
107
    "\u06b6\u06df\u3002\u2087\ua806",
108
    "\u06b6\u06df\u30027\ua806",
109
    "xn--pkb6f.xn--7-x93e",
110
    "\u06b6\u06df.7\ua806",
111
    "1.\uac7e6.\U00010c41\u06d0",
112
    "1.\u1100\u1165\u11b56.\U00010c41\u06d0",
113
    "1.xn--6-945e.xn--glb1794k",
114
]
115

116

117
def unicode_fixup(string):
118
    """Replace backslash-u-XXXX with appropriate unicode characters."""
119
    return _RE_SURROGATE.sub(
120
        lambda match: chr((ord(match.group(0)[0]) - 0xD800) * 0x400 + ord(match.group(0)[1]) - 0xDC00 + 0x10000),
121
        _RE_UNICODE.sub(lambda match: chr(int(match.group(1), 16)), string),
122
    )
123

124

125
def parse_idna_test_table(inputstream):
126
    """Parse IdnaTestV2.txt and return a list of tuples."""
127
    for lineno, line in enumerate(inputstream):
128
        line = line.decode("utf-8").strip()
129
        if "#" in line:
130
            line = line.split("#", 1)[0]
131
        if not line:
132
            continue
133
        yield ((lineno + 1, tuple(field.strip() for field in line.split(";"))))
134

135

136
class TestIdnaTest(unittest.TestCase):
137
    """Run one of the IdnaTestV2.txt test lines."""
138

139
    def __init__(self, lineno=None, fields=None):
140
        super().__init__()
141
        self.lineno = lineno
142
        self.fields = fields
143

144
    def id(self):
145
        return "{}.{}".format(super().id(), self.lineno)
146

147
    def shortDescription(self):
148
        if not self.fields:
149
            return ""
150
        return "IdnaTestV2.txt line {}: {}".format(self.lineno, "; ".join(self.fields))
151

152
    def runTest(self):
153
        if not self.fields:
154
            return
155
        (
156
            source,
157
            to_unicode,
158
            to_unicode_status,
159
            to_ascii,
160
            to_ascii_status,
161
            to_ascii_t,
162
            to_ascii_t_status,
163
        ) = self.fields
164
        if source in _SKIP_TESTS:
165
            return
166
        if not to_unicode:
167
            to_unicode = source
168
        if not to_unicode_status:
169
            to_unicode_status = "[]"
170
        if not to_ascii:
171
            to_ascii = to_unicode
172
        if not to_ascii_status:
173
            to_ascii_status = to_unicode_status
174
        if not to_ascii_t:
175
            to_ascii_t = to_ascii
176
        if not to_ascii_t_status:
177
            to_ascii_t_status = to_ascii_status
178

179
        try:
180
            output = idna.decode(source, uts46=True, strict=True)
181
            if to_unicode_status != "[]":
182
                self.fail("decode() did not emit required error {} for {}".format(to_unicode, repr(source)))
183
            self.assertEqual(output, to_unicode, "unexpected decode() output")
184
        except (idna.IDNAError, UnicodeError, ValueError) as exc:
185
            if str(exc).startswith("Unknown"):
186
                raise unittest.SkipTest("Test requires support for a newer" " version of Unicode than this Python supports")
187
            if to_unicode_status == "[]":
188
                raise
189

190
        try:
191
            output = idna.encode(source, uts46=True, strict=True).decode("ascii")
192
            if to_ascii_status != "[]":
193
                self.fail("encode() did not emit required error {} for {}".format(to_ascii_status, repr(source)))
194
            self.assertEqual(output, to_ascii, "unexpected encode() output")
195
        except (idna.IDNAError, UnicodeError, ValueError) as exc:
196
            if str(exc).startswith("Unknown"):
197
                raise unittest.SkipTest("Test requires support for a newer" " version of Unicode than this Python supports")
198
            if to_ascii_status == "[]":
199
                raise
200

201
        try:
202
            output = idna.encode(source, uts46=True, strict=True, transitional=True).decode("ascii")
203
            if to_ascii_t_status != "[]":
204
                self.fail(
205
                    "encode(transitional=True) did not emit required error {} for {}".format(to_ascii_t_status, repr(source))
206
                )
207
            self.assertEqual(output, to_ascii_t, "unexpected encode() output")
208
        except (idna.IDNAError, UnicodeError, ValueError) as exc:
209
            if str(exc).startswith("Unknown"):
210
                raise unittest.SkipTest("Test requires support for a newer" " version of Unicode than this Python supports")
211
            if to_ascii_t_status == "[]":
212
                raise
213

214

215
def load_tests(loader, tests, pattern):
216
    """Create a suite of all the individual tests."""
217
    suite = unittest.TestSuite()
218
    with open(os.path.join(os.path.dirname(__file__), "IdnaTestV2.txt"), "rb") as tests_file:
219
        suite.addTests(TestIdnaTest(lineno, fields) for lineno, fields in parse_idna_test_table(tests_file))
220
    return suite
221

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.