1
"""Tests for TR46 code."""
9
_RE_UNICODE = re.compile("\\\\u([0-9a-fA-F]{4})")
10
_RE_SURROGATE = re.compile("[\ud800-\udbff][\udc00-\udfff]")
15
"\U000102f7\u3002\u200d",
16
"\U0001d7f5\u9681\u2bee\uff0e\u180d\u200c",
17
"9\u9681\u2bee.\u180d\u200c",
18
"\u00df\u200c\uaaf6\u18a5.\u22b6\u2d21\u2d16",
19
"ss\u200c\uaaf6\u18a5.\u22b6\u2d21\u2d16",
20
"\u00df\u200c\uaaf6\u18a5\uff0e\u22b6\u2d21\u2d16",
21
"ss\u200c\uaaf6\u18a5\uff0e\u22b6\u2d21\u2d16",
22
"\U00010a57\u200d\u3002\u2d09\u2d15",
23
"\U00010a57\u200d\uff61\u2d09\u2d15",
24
"\U0001d7cf\U0001da19\u2e16.\u200d",
25
"1\U0001da19\u2e16.\u200d",
26
"\U0001d7e04\U000e01d7\U0001d23b\uff0e\u200d\U000102f5\u26e7\u200d",
27
"84\U000e01d7\U0001d23b.\u200d\U000102f5\u26e7\u200d",
34
"\U0001e937.\U00010b90\U0001e881\U00010e60\u0624",
35
"\U0001e937.\U00010b90\U0001e881\U00010e60\u0648\u0654",
36
"\U0001e915.\U00010b90\U0001e881\U00010e60\u0648\u0654",
37
"\U0001e915.\U00010b90\U0001e881\U00010e60\u0624",
38
"xn--ve6h.xn--jgb1694kz0b2176a",
39
"\u00df\u3002\U000102f3\u2d0c\u0fb8",
40
"ss\u3002\U000102f3\u2d0c\u0fb8",
42
"ss.\U000102f3\u2d0c\u0fb8",
43
"xn--zca.xn--lgd921mvv0m",
44
"\u00df.\U000102f3\u2d0c\u0fb8",
45
"\u00df\uff61\U000102f3\u2d0c\u0fb8",
46
"ss\uff61\U000102f3\u2d0c\u0fb8",
47
"\u16ad\uff61\U0001d320\u00df\U00016af1",
48
"\u16ad\u3002\U0001d320\u00df\U00016af1",
49
"\u16ad\u3002\U0001d320SS\U00016af1",
50
"\u16ad\u3002\U0001d320ss\U00016af1",
51
"\u16ad\u3002\U0001d320Ss\U00016af1",
52
"xn--hwe.xn--ss-ci1ub261a",
53
"\u16ad.\U0001d320ss\U00016af1",
54
"\u16ad.\U0001d320SS\U00016af1",
55
"\u16ad.\U0001d320Ss\U00016af1",
56
"xn--hwe.xn--zca4946pblnc",
57
"\u16ad.\U0001d320\u00df\U00016af1",
58
"\u16ad\uff61\U0001d320SS\U00016af1",
59
"\u16ad\uff61\U0001d320ss\U00016af1",
60
"\u16ad\uff61\U0001d320Ss\U00016af1",
61
"\u2d1a\U000102f8\U000e0104\u30025\ud7f6\u103a",
62
"xn--ilj2659d.xn--5-dug9054m",
63
"\u2d1a\U000102f8.5\ud7f6\u103a",
64
"\u2d1a\U000102f8\U000e0104\u3002\U0001d7dd\ud7f6\u103a",
67
"xn--ss-4epx629f.xn--ifh802b6a",
68
"ss\uaaf6\u18a5.\u22b6\u2d21\u2d16",
70
"\U00010a57.\u2d09\u2d15",
71
"\ua5f7\U00011180.\u075d\U00010a52",
72
"xn--ju8a625r.xn--hpb0073k",
73
"\u03c2.\u0641\u0645\u064a\U0001f79b1.",
74
"\u03a3.\u0641\u0645\u064a\U0001f79b1.",
75
"\u03c3.\u0641\u0645\u064a\U0001f79b1.",
76
"xn--4xa.xn--1-gocmu97674d.",
77
"xn--3xa.xn--1-gocmu97674d.",
80
"xn--84-s850a.xn--59h6326e",
81
"84\U0001d23b.\U000102f5\u26e7",
92
"\u00e0\u02c7.\u05d0",
93
"a\u0300\u02c7.\u05d0",
94
"A\u0300\u02c7.\u05d0",
95
"\u00c0\u02c7.\u05d0",
100
"c.xn--0-eha.xn--4db",
106
"C.0\u0075\u0308.\u05d0",
107
"\u06b6\u06df\u3002\u2087\ua806",
108
"\u06b6\u06df\u30027\ua806",
109
"xn--pkb6f.xn--7-x93e",
110
"\u06b6\u06df.7\ua806",
111
"1.\uac7e6.\U00010c41\u06d0",
112
"1.\u1100\u1165\u11b56.\U00010c41\u06d0",
113
"1.xn--6-945e.xn--glb1794k",
117
def unicode_fixup(string):
118
"""Replace backslash-u-XXXX with appropriate unicode characters."""
119
return _RE_SURROGATE.sub(
120
lambda match: chr((ord(match.group(0)[0]) - 0xD800) * 0x400 + ord(match.group(0)[1]) - 0xDC00 + 0x10000),
121
_RE_UNICODE.sub(lambda match: chr(int(match.group(1), 16)), string),
125
def parse_idna_test_table(inputstream):
126
"""Parse IdnaTestV2.txt and return a list of tuples."""
127
for lineno, line in enumerate(inputstream):
128
line = line.decode("utf-8").strip()
130
line = line.split("#", 1)[0]
133
yield ((lineno + 1, tuple(field.strip() for field in line.split(";"))))
136
class TestIdnaTest(unittest.TestCase):
137
"""Run one of the IdnaTestV2.txt test lines."""
139
def __init__(self, lineno=None, fields=None):
145
return "{}.{}".format(super().id(), self.lineno)
147
def shortDescription(self):
150
return "IdnaTestV2.txt line {}: {}".format(self.lineno, "; ".join(self.fields))
164
if source in _SKIP_TESTS:
168
if not to_unicode_status:
169
to_unicode_status = "[]"
171
to_ascii = to_unicode
172
if not to_ascii_status:
173
to_ascii_status = to_unicode_status
175
to_ascii_t = to_ascii
176
if not to_ascii_t_status:
177
to_ascii_t_status = to_ascii_status
180
output = idna.decode(source, uts46=True, strict=True)
181
if to_unicode_status != "[]":
182
self.fail("decode() did not emit required error {} for {}".format(to_unicode, repr(source)))
183
self.assertEqual(output, to_unicode, "unexpected decode() output")
184
except (idna.IDNAError, UnicodeError, ValueError) as exc:
185
if str(exc).startswith("Unknown"):
186
raise unittest.SkipTest("Test requires support for a newer" " version of Unicode than this Python supports")
187
if to_unicode_status == "[]":
191
output = idna.encode(source, uts46=True, strict=True).decode("ascii")
192
if to_ascii_status != "[]":
193
self.fail("encode() did not emit required error {} for {}".format(to_ascii_status, repr(source)))
194
self.assertEqual(output, to_ascii, "unexpected encode() output")
195
except (idna.IDNAError, UnicodeError, ValueError) as exc:
196
if str(exc).startswith("Unknown"):
197
raise unittest.SkipTest("Test requires support for a newer" " version of Unicode than this Python supports")
198
if to_ascii_status == "[]":
202
output = idna.encode(source, uts46=True, strict=True, transitional=True).decode("ascii")
203
if to_ascii_t_status != "[]":
205
"encode(transitional=True) did not emit required error {} for {}".format(to_ascii_t_status, repr(source))
207
self.assertEqual(output, to_ascii_t, "unexpected encode() output")
208
except (idna.IDNAError, UnicodeError, ValueError) as exc:
209
if str(exc).startswith("Unknown"):
210
raise unittest.SkipTest("Test requires support for a newer" " version of Unicode than this Python supports")
211
if to_ascii_t_status == "[]":
215
def load_tests(loader, tests, pattern):
216
"""Create a suite of all the individual tests."""
217
suite = unittest.TestSuite()
218
with open(os.path.join(os.path.dirname(__file__), "IdnaTestV2.txt"), "rb") as tests_file:
219
suite.addTests(TestIdnaTest(lineno, fields) for lineno, fields in parse_idna_test_table(tests_file))