1
// Copyright (c) 2013 Spotify AB
3
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
4
// use this file except in compliance with the License. You may obtain a copy of
7
// http://www.apache.org/licenses/LICENSE-2.0
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12
// License for the specific language governing permissions and limitations under
16
#include "kissrandom.h"
18
#include "structmember.h"
20
#if defined(_MSC_VER) && _MSC_VER == 1500
21
typedef signed __int32 int32_t;
27
#if defined(ANNOYLIB_USE_AVX512)
28
#define AVX_INFO "Using 512-bit AVX instructions"
29
#elif defined(ANNOYLIB_USE_AVX128)
30
#define AVX_INFO "Using 128-bit AVX instructions"
32
#define AVX_INFO "Not using AVX instructions"
36
#define COMPILER_INFO "Compiled using MSC"
37
#elif defined(__GNUC__)
38
#define COMPILER_INFO "Compiled on GCC"
40
#define COMPILER_INFO "Compiled on unknown platform"
43
#define ANNOY_DOC (COMPILER_INFO ". " AVX_INFO ".")
45
#if PY_MAJOR_VERSION >= 3
50
#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
54
#define PyInt_FromLong PyLong_FromLong
59
#ifdef ANNOYLIB_MULTITHREADED_BUILD
60
typedef AnnoyIndexMultiThreadedBuildPolicy AnnoyIndexThreadedBuildPolicy;
62
typedef AnnoyIndexSingleThreadedBuildPolicy AnnoyIndexThreadedBuildPolicy;
65
template class Annoy::AnnoyIndexInterface<int32_t, float>;
67
class HammingWrapper : public AnnoyIndexInterface<int32_t, float> {
68
// Wrapper class for Hamming distance, using composition.
69
// This translates binary (float) vectors into packed uint64_t vectors.
70
// This is questionable from a performance point of view. Should reconsider this solution.
72
int32_t _f_external, _f_internal;
73
AnnoyIndex<int32_t, uint64_t, Hamming, Kiss64Random, AnnoyIndexThreadedBuildPolicy> _index;
74
void _pack(const float* src, uint64_t* dst) const {
75
for (int32_t i = 0; i < _f_internal; i++) {
77
for (int32_t j = 0; j < 64 && i*64+j < _f_external; j++) {
78
dst[i] |= (uint64_t)(src[i * 64 + j] > 0.5) << j;
82
void _unpack(const uint64_t* src, float* dst) const {
83
for (int32_t i = 0; i < _f_external; i++) {
84
dst[i] = (src[i / 64] >> (i % 64)) & 1;
88
HammingWrapper(int f) : _f_external(f), _f_internal((f + 63) / 64), _index((f + 63) / 64) {};
89
bool add_item(int32_t item, const float* w, char**error) {
90
vector<uint64_t> w_internal(_f_internal, 0);
91
_pack(w, &w_internal[0]);
92
return _index.add_item(item, &w_internal[0], error);
94
bool build(int q, int n_threads, char** error) { return _index.build(q, n_threads, error); };
95
bool unbuild(char** error) { return _index.unbuild(error); };
96
bool save(const char* filename, bool prefault, char** error) { return _index.save(filename, prefault, error); };
97
void unload() { _index.unload(); };
98
bool load(const char* filename, bool prefault, char** error) { return _index.load(filename, prefault, error); };
99
float get_distance(int32_t i, int32_t j) const { return _index.get_distance(i, j); };
100
void get_nns_by_item(int32_t item, size_t n, int search_k, vector<int32_t>* result, vector<float>* distances) const {
102
vector<uint64_t> distances_internal;
103
_index.get_nns_by_item(item, n, search_k, result, &distances_internal);
104
distances->insert(distances->begin(), distances_internal.begin(), distances_internal.end());
106
_index.get_nns_by_item(item, n, search_k, result, NULL);
109
void get_nns_by_vector(const float* w, size_t n, int search_k, vector<int32_t>* result, vector<float>* distances) const {
110
vector<uint64_t> w_internal(_f_internal, 0);
111
_pack(w, &w_internal[0]);
113
vector<uint64_t> distances_internal;
114
_index.get_nns_by_vector(&w_internal[0], n, search_k, result, &distances_internal);
115
distances->insert(distances->begin(), distances_internal.begin(), distances_internal.end());
117
_index.get_nns_by_vector(&w_internal[0], n, search_k, result, NULL);
120
int32_t get_n_items() const { return _index.get_n_items(); };
121
int32_t get_n_trees() const { return _index.get_n_trees(); };
122
void verbose(bool v) { _index.verbose(v); };
123
void get_item(int32_t item, float* v) const {
124
vector<uint64_t> v_internal(_f_internal, 0);
125
_index.get_item(item, &v_internal[0]);
126
_unpack(&v_internal[0], v);
128
void set_seed(uint64_t q) { _index.set_seed(q); };
129
bool on_disk_build(const char* filename, char** error) { return _index.on_disk_build(filename, error); };
132
// annoy python object
136
AnnoyIndexInterface<int32_t, float>* ptr;
141
py_an_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) {
142
py_annoy *self = (py_annoy *)type->tp_alloc(type, 0);
146
const char *metric = NULL;
148
static char const * kwlist[] = {"f", "metric", NULL};
149
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "i|s", (char**)kwlist, &self->f, &metric))
152
// This keeps coming up, see #368 etc
153
PyErr_WarnEx(PyExc_FutureWarning, "The default argument for metric will be removed "
154
"in future version of Annoy. Please pass metric='angular' explicitly.", 1);
155
self->ptr = new AnnoyIndex<int32_t, float, Angular, Kiss64Random, AnnoyIndexThreadedBuildPolicy>(self->f);
156
} else if (!strcmp(metric, "angular")) {
157
self->ptr = new AnnoyIndex<int32_t, float, Angular, Kiss64Random, AnnoyIndexThreadedBuildPolicy>(self->f);
158
} else if (!strcmp(metric, "euclidean")) {
159
self->ptr = new AnnoyIndex<int32_t, float, Euclidean, Kiss64Random, AnnoyIndexThreadedBuildPolicy>(self->f);
160
} else if (!strcmp(metric, "manhattan")) {
161
self->ptr = new AnnoyIndex<int32_t, float, Manhattan, Kiss64Random, AnnoyIndexThreadedBuildPolicy>(self->f);
162
} else if (!strcmp(metric, "hamming")) {
163
self->ptr = new HammingWrapper(self->f);
164
} else if (!strcmp(metric, "dot")) {
165
self->ptr = new AnnoyIndex<int32_t, float, DotProduct, Kiss64Random, AnnoyIndexThreadedBuildPolicy>(self->f);
167
PyErr_SetString(PyExc_ValueError, "No such metric");
171
return (PyObject *)self;
176
py_an_init(py_annoy *self, PyObject *args, PyObject *kwargs) {
177
// Seems to be needed for Python 3
178
const char *metric = NULL;
180
static char const * kwlist[] = {"f", "metric", NULL};
181
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "i|s", (char**)kwlist, &f, &metric))
188
py_an_dealloc(py_annoy* self) {
190
Py_TYPE(self)->tp_free((PyObject*)self);
194
static PyMemberDef py_annoy_members[] = {
195
{(char*)"f", T_INT, offsetof(py_annoy, f), 0,
197
{NULL} /* Sentinel */
202
py_an_load(py_annoy *self, PyObject *args, PyObject *kwargs) {
203
char *filename, *error;
204
bool prefault = false;
207
static char const * kwlist[] = {"fn", "prefault", NULL};
208
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s|b", (char**)kwlist, &filename, &prefault))
211
if (!self->ptr->load(filename, prefault, &error)) {
212
PyErr_SetString(PyExc_IOError, error);
221
py_an_save(py_annoy *self, PyObject *args, PyObject *kwargs) {
222
char *filename, *error;
223
bool prefault = false;
226
static char const * kwlist[] = {"fn", "prefault", NULL};
227
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s|b", (char**)kwlist, &filename, &prefault))
230
if (!self->ptr->save(filename, prefault, &error)) {
231
PyErr_SetString(PyExc_IOError, error);
240
get_nns_to_python(const vector<int32_t>& result, const vector<float>& distances, int include_distances) {
245
if ((l = PyList_New(result.size())) == NULL) {
248
for (size_t i = 0; i < result.size(); i++) {
249
PyObject* res = PyInt_FromLong(result[i]);
253
PyList_SetItem(l, i, res);
255
if (!include_distances)
258
if ((d = PyList_New(distances.size())) == NULL) {
262
for (size_t i = 0; i < distances.size(); i++) {
263
PyObject* dist = PyFloat_FromDouble(distances[i]);
267
PyList_SetItem(d, i, dist);
270
if ((t = PyTuple_Pack(2, l, d)) == NULL) {
286
bool check_constraints(py_annoy *self, int32_t item, bool building) {
288
PyErr_SetString(PyExc_IndexError, "Item index can not be negative");
290
} else if (!building && item >= self->ptr->get_n_items()) {
291
PyErr_SetString(PyExc_IndexError, "Item index larger than the largest item index");
299
py_an_get_nns_by_item(py_annoy *self, PyObject *args, PyObject *kwargs) {
300
int32_t item, n, search_k=-1, include_distances=0;
304
static char const * kwlist[] = {"i", "n", "search_k", "include_distances", NULL};
305
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "ii|ii", (char**)kwlist, &item, &n, &search_k, &include_distances))
308
if (!check_constraints(self, item, false)) {
312
vector<int32_t> result;
313
vector<float> distances;
315
Py_BEGIN_ALLOW_THREADS;
316
self->ptr->get_nns_by_item(item, n, search_k, &result, include_distances ? &distances : NULL);
317
Py_END_ALLOW_THREADS;
319
return get_nns_to_python(result, distances, include_distances);
324
convert_list_to_vector(PyObject* v, int f, vector<float>* w) {
325
Py_ssize_t length = PyObject_Size(v);
330
PyErr_Format(PyExc_IndexError, "Vector has wrong length (expected %d, got %ld)", f, length);
334
for (int z = 0; z < f; z++) {
335
PyObject *key = PyInt_FromLong(z);
339
PyObject *pf = PyObject_GetItem(v, key);
344
double value = PyFloat_AsDouble(pf);
346
if (value == -1.0 && PyErr_Occurred()) {
355
py_an_get_nns_by_vector(py_annoy *self, PyObject *args, PyObject *kwargs) {
357
int32_t n, search_k=-1, include_distances=0;
361
static char const * kwlist[] = {"vector", "n", "search_k", "include_distances", NULL};
362
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "Oi|ii", (char**)kwlist, &v, &n, &search_k, &include_distances))
365
vector<float> w(self->f);
366
if (!convert_list_to_vector(v, self->f, &w)) {
370
vector<int32_t> result;
371
vector<float> distances;
373
Py_BEGIN_ALLOW_THREADS;
374
self->ptr->get_nns_by_vector(&w[0], n, search_k, &result, include_distances ? &distances : NULL);
375
Py_END_ALLOW_THREADS;
377
return get_nns_to_python(result, distances, include_distances);
382
py_an_get_item_vector(py_annoy *self, PyObject *args) {
386
if (!PyArg_ParseTuple(args, "i", &item))
389
if (!check_constraints(self, item, false)) {
393
vector<float> v(self->f);
394
self->ptr->get_item(item, &v[0]);
395
PyObject* l = PyList_New(self->f);
399
for (int z = 0; z < self->f; z++) {
400
PyObject* dist = PyFloat_FromDouble(v[z]);
404
PyList_SetItem(l, z, dist);
416
py_an_add_item(py_annoy *self, PyObject *args, PyObject* kwargs) {
421
static char const * kwlist[] = {"i", "vector", NULL};
422
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "iO", (char**)kwlist, &item, &v))
425
if (!check_constraints(self, item, true)) {
429
vector<float> w(self->f);
430
if (!convert_list_to_vector(v, self->f, &w)) {
434
if (!self->ptr->add_item(item, &w[0], &error)) {
435
PyErr_SetString(PyExc_Exception, error);
444
py_an_on_disk_build(py_annoy *self, PyObject *args, PyObject *kwargs) {
445
char *filename, *error;
448
static char const * kwlist[] = {"fn", NULL};
449
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s", (char**)kwlist, &filename))
452
if (!self->ptr->on_disk_build(filename, &error)) {
453
PyErr_SetString(PyExc_IOError, error);
461
py_an_build(py_annoy *self, PyObject *args, PyObject *kwargs) {
466
static char const * kwlist[] = {"n_trees", "n_jobs", NULL};
467
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "i|i", (char**)kwlist, &q, &n_jobs))
472
Py_BEGIN_ALLOW_THREADS;
473
res = self->ptr->build(q, n_jobs, &error);
474
Py_END_ALLOW_THREADS;
476
PyErr_SetString(PyExc_Exception, error);
486
py_an_unbuild(py_annoy *self) {
491
if (!self->ptr->unbuild(&error)) {
492
PyErr_SetString(PyExc_Exception, error);
502
py_an_unload(py_annoy *self) {
513
py_an_get_distance(py_annoy *self, PyObject *args) {
517
if (!PyArg_ParseTuple(args, "ii", &i, &j))
520
if (!check_constraints(self, i, false) || !check_constraints(self, j, false)) {
524
double d = self->ptr->get_distance(i,j);
525
return PyFloat_FromDouble(d);
530
py_an_get_n_items(py_annoy *self) {
534
int32_t n = self->ptr->get_n_items();
535
return PyInt_FromLong(n);
539
py_an_get_n_trees(py_annoy *self) {
543
int32_t n = self->ptr->get_n_trees();
544
return PyInt_FromLong(n);
548
py_an_verbose(py_annoy *self, PyObject *args) {
552
if (!PyArg_ParseTuple(args, "i", &verbose))
555
self->ptr->verbose((bool)verbose);
562
py_an_set_seed(py_annoy *self, PyObject *args) {
566
if (!PyArg_ParseTuple(args, "i", &q))
569
self->ptr->set_seed(q);
575
static PyMethodDef AnnoyMethods[] = {
576
{"load", (PyCFunction)py_an_load, METH_VARARGS | METH_KEYWORDS, "Loads (mmaps) an index from disk."},
577
{"save", (PyCFunction)py_an_save, METH_VARARGS | METH_KEYWORDS, "Saves the index to disk."},
578
{"get_nns_by_item",(PyCFunction)py_an_get_nns_by_item, METH_VARARGS | METH_KEYWORDS, "Returns the `n` closest items to item `i`.\n\n:param search_k: the query will inspect up to `search_k` nodes.\n`search_k` gives you a run-time tradeoff between better accuracy and speed.\n`search_k` defaults to `n_trees * n` if not provided.\n\n:param include_distances: If `True`, this function will return a\n2 element tuple of lists. The first list contains the `n` closest items.\nThe second list contains the corresponding distances."},
579
{"get_nns_by_vector",(PyCFunction)py_an_get_nns_by_vector, METH_VARARGS | METH_KEYWORDS, "Returns the `n` closest items to vector `vector`.\n\n:param search_k: the query will inspect up to `search_k` nodes.\n`search_k` gives you a run-time tradeoff between better accuracy and speed.\n`search_k` defaults to `n_trees * n` if not provided.\n\n:param include_distances: If `True`, this function will return a\n2 element tuple of lists. The first list contains the `n` closest items.\nThe second list contains the corresponding distances."},
580
{"get_item_vector",(PyCFunction)py_an_get_item_vector, METH_VARARGS, "Returns the vector for item `i` that was previously added."},
581
{"add_item",(PyCFunction)py_an_add_item, METH_VARARGS | METH_KEYWORDS, "Adds item `i` (any nonnegative integer) with vector `v`.\n\nNote that it will allocate memory for `max(i)+1` items."},
582
{"on_disk_build",(PyCFunction)py_an_on_disk_build, METH_VARARGS | METH_KEYWORDS, "Build will be performed with storage on disk instead of RAM."},
583
{"build",(PyCFunction)py_an_build, METH_VARARGS | METH_KEYWORDS, "Builds a forest of `n_trees` trees.\n\nMore trees give higher precision when querying. After calling `build`,\nno more items can be added. `n_jobs` specifies the number of threads used to build the trees. `n_jobs=-1` uses all available CPU cores."},
584
{"unbuild",(PyCFunction)py_an_unbuild, METH_NOARGS, "Unbuilds the tree in order to allows adding new items.\n\nbuild() has to be called again afterwards in order to\nrun queries."},
585
{"unload",(PyCFunction)py_an_unload, METH_NOARGS, "Unloads an index from disk."},
586
{"get_distance",(PyCFunction)py_an_get_distance, METH_VARARGS, "Returns the distance between items `i` and `j`."},
587
{"get_n_items",(PyCFunction)py_an_get_n_items, METH_NOARGS, "Returns the number of items in the index."},
588
{"get_n_trees",(PyCFunction)py_an_get_n_trees, METH_NOARGS, "Returns the number of trees in the index."},
589
{"verbose",(PyCFunction)py_an_verbose, METH_VARARGS, ""},
590
{"set_seed",(PyCFunction)py_an_set_seed, METH_VARARGS, "Sets the seed of Annoy's random number generator."},
591
{NULL, NULL, 0, NULL} /* Sentinel */
595
static PyTypeObject PyAnnoyType = {
596
PyVarObject_HEAD_INIT(NULL, 0)
597
"annoy.Annoy", /*tp_name*/
598
sizeof(py_annoy), /*tp_basicsize*/
600
(destructor)py_an_dealloc, /*tp_dealloc*/
607
0, /*tp_as_sequence*/
615
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
616
ANNOY_DOC, /* tp_doc */
619
0, /* tp_richcompare */
620
0, /* tp_weaklistoffset */
623
AnnoyMethods, /* tp_methods */
624
py_annoy_members, /* tp_members */
628
0, /* tp_descr_get */
629
0, /* tp_descr_set */
630
0, /* tp_dictoffset */
631
(initproc)py_an_init, /* tp_init */
633
py_an_new, /* tp_new */
636
static PyMethodDef module_methods[] = {
637
{NULL} /* Sentinel */
640
#if PY_MAJOR_VERSION >= 3
641
static struct PyModuleDef moduledef = {
642
PyModuleDef_HEAD_INIT,
643
"annoylib", /* m_name */
644
ANNOY_DOC, /* m_doc */
646
module_methods, /* m_methods */
648
NULL, /* m_traverse */
654
PyObject *create_module(void) {
657
if (PyType_Ready(&PyAnnoyType) < 0)
660
#if PY_MAJOR_VERSION >= 3
661
m = PyModule_Create(&moduledef);
663
m = Py_InitModule("annoylib", module_methods);
669
Py_INCREF(&PyAnnoyType);
670
PyModule_AddObject(m, "Annoy", (PyObject *)&PyAnnoyType);
674
#if PY_MAJOR_VERSION >= 3
675
PyMODINIT_FUNC PyInit_annoylib(void) {
676
return create_module(); // it should return moudule object in py3
679
PyMODINIT_FUNC initannoylib(void) {
685
// vim: tabstop=2 shiftwidth=2