navec
/
tests.py
74 строки · 1.5 Кб
1
2from tempfile import NamedTemporaryFile3
4import numpy as np5import pytest6
7from navec import Navec8from navec.meta import Meta9from navec.vocab import Vocab10from navec.pq import PQ11
12
13@pytest.fixture14def emb():15meta = Meta(16id='test_1B_3k_6d_2q'17)18pq = PQ(19vectors=3,20dim=6,21qdim=2,22# 1 0 0 | 1 0 023# 0 1 1 | 0 0 024# 0 0 0 | 0 1 025centroids=3,26indexes=np.array([ # vectors x qdim27[0, 1],28[1, 0],29[2, 2]30]).astype(np.uint8),31codes=np.array([ # qdim x centroids x chunk32[[1, 0, 0], [0, 1, 1], [0, 0, 0]],33[[0, 0, 0], [1, 0, 0], [0, 1, 0]],34]).astype(np.float32),35)36vocab = Vocab(37words=['a', 'b', 'c'],38counts=[1, 2, 3]39)40return Navec(meta, vocab, pq)41
42
43def test_dump_load(emb):44with NamedTemporaryFile() as file:45path = file.name46emb.dump(path)47Navec.load(path)48
49
50def test_get(emb):51assert np.array_equal(52emb.get('a'),53np.array([1., 0., 0., 1., 0., 0.])54)55assert emb.get('d') is None56
57
58def test_sim(emb):59assert emb.sim('a', 'b') == 0.60
61
62def test_gensim(emb):63model = emb.as_gensim64assert model.most_similar('a') == [65('b', 0.),66('c', 0.)67]68
69
70def test_top(emb):71words = emb.vocab.top(2)72sample = emb.sampled(words)73assert len(sample.pq.indexes) == 274assert sample.sim('b', 'c') == emb.sim('b', 'c')75assert sample.vocab.get('a') is None76