slovnet
1
2import pytest
3
4from slovnet.shape import (
5X, x, xx, XX, Xx, Xx_Xx, OTHER,
6RU, EN, NUM, PUNCT,
7
8word_shape,
9format_shape as s
10)
11from slovnet.token import tokenize
12
13
14TESTS = [
15[
16'В',
17[s(RU, X)],
18],
19[
20'ИЛ-2',
21[s(RU, XX)],
22],
23[
24'105г.',
25[NUM, s(RU, x), s(PUNCT, '.')]
26],
27[
28'Pal-Yz',
29[s(EN, Xx_Xx)]
30],
31[
32'и Я-ДаА',
33[s(RU, x), s(RU, OTHER)]
34],
35[
36'Прибыл на I@',
37[s(RU, Xx), s(RU, xx), s(EN, X), s(PUNCT, '@')]
38],
39[
40'и -‐',
41[s(RU, x), s(PUNCT, OTHER)]
42]
43]
44
45
46@pytest.mark.parametrize('test', TESTS)
47def test_shape(test):
48text, etalon = test
49tokens = tokenize(text)
50guess = [word_shape(_.text) for _ in tokens]
51assert guess == etalon
52