19
from annoy import AnnoyIndex
22
def test_basic_conversion():
24
i = AnnoyIndex(f, "hamming")
25
u = numpy.random.binomial(1, 0.5, f)
26
v = numpy.random.binomial(1, 0.5, f)
29
u2 = i.get_item_vector(0)
30
v2 = i.get_item_vector(1)
31
assert numpy.dot(u - u2, u - u2) == pytest.approx(0.0)
32
assert numpy.dot(v - v2, v - v2) == pytest.approx(0.0)
33
assert i.get_distance(0, 0) == pytest.approx(0.0)
34
assert i.get_distance(1, 1) == pytest.approx(0.0)
35
assert i.get_distance(0, 1) == pytest.approx(numpy.dot(u - v, u - v))
36
assert i.get_distance(1, 0) == pytest.approx(numpy.dot(u - v, u - v))
41
i = AnnoyIndex(f, "hamming")
42
u = numpy.random.binomial(1, 0.5, f)
43
v = numpy.random.binomial(1, 0.5, f)
47
assert i.get_nns_by_item(0, 99) == [0, 1]
48
assert i.get_nns_by_item(1, 99) == [1, 0]
49
rs, ds = i.get_nns_by_item(0, 99, include_distances=True)
51
assert ds[0] == pytest.approx(0)
52
assert ds[1] == pytest.approx(numpy.dot(u - v, u - v))
57
i = AnnoyIndex(f, "hamming")
58
u = numpy.random.binomial(1, 0.5, f)
59
v = numpy.random.binomial(1, 0.5, f)
64
j = AnnoyIndex(f, "hamming")
66
rs, ds = j.get_nns_by_item(0, 99, include_distances=True)
68
assert ds[0] == pytest.approx(0)
69
assert ds[1] == pytest.approx(numpy.dot(u - v, u - v))
72
def test_many_vectors():
74
i = AnnoyIndex(f, "hamming")
75
for x in range(100000):
76
i.add_item(x, numpy.random.binomial(1, 0.5, f))
79
rs, ds = i.get_nns_by_vector([0] * f, 10000, include_distances=True)
85
rs, ds = i.get_nns_by_vector(
86
numpy.random.binomial(1, 0.5, f), 1, search_k=1000, include_distances=True
89
avg_dist = 1.0 * sum(dists) / len(dists)
90
assert avg_dist <= 0.42
94
def test_zero_vectors():
97
"0000000000011000001110000011111000101110111110000100000100000000",
98
"0000000000011000001110000011111000101110111110000100000100000001",
99
"0000000000011000001110000011111000101110111110000100000100000010",
100
"0010010100011001001000010001100101011110000000110000011110001100",
101
"1001011010000110100101101001111010001110100001101000111000001110",
102
"0111100101111001011110010010001100010111000111100001101100011111",
103
"0011000010011101000011010010111000101110100101111000011101001011",
104
"0011000010011100000011010010111000101110100101111000011101001011",
105
"1001100000111010001010000010110000111100100101001001010000000111",
106
"0000000000111101010100010001000101101001000000011000001101000000",
107
"1000101001010001011100010111001100110011001100110011001111001100",
108
"1110011001001111100110010001100100001011000011010010111100100111",
110
vectors = [[int(bit) for bit in bitstring] for bitstring in bitstrings]
113
idx = AnnoyIndex(f, "hamming")
114
for i, v in enumerate(vectors):
119
idx = AnnoyIndex(f, "hamming")
121
js, ds = idx.get_nns_by_item(0, 5, include_distances=True)
123
assert ds[:4] == [0, 1, 1, 22]