3
from pathlib import Path
5
from haystack.lazy_imports import LazyImport
7
from fastrag.utils import init_cls, init_haystack_cls, load_yaml
9
logger = logging.getLogger(__name__)
11
with LazyImport("Please install Qdrant client by: 'pip install qdrant-haystack'") as qdrant_import:
12
from qdrant_haystack import QdrantDocumentStore
14
if __name__ == "__main__":
15
parser = argparse.ArgumentParser("Create an index using Qdrant as a backend")
16
parser.add_argument("--store", type=Path, required=True)
17
parser.add_argument("--data", type=Path, required=True)
18
parser.add_argument("--embedder", type=Path, required=True)
19
parser.add_argument("--batch_size", type=int, required=False)
21
args = parser.parse_args()
24
store_params = load_yaml(args.store)
25
data_params = load_yaml(args.data)
26
emb_params = load_yaml(args.embedder)
28
store_cls = store_params.pop("type")
29
store = init_haystack_cls(store_cls, store_params)
30
logger.info("Loaded store backend")
32
data_cls = data_params.pop("type")
33
data = init_cls(data_cls, data_params)
34
logger.info("Done loading dataset")
36
logger.info("Indexing documents")
38
store.write_documents(docs, batch_size=args.batch_size or 100)
41
logger.info("Loading Embedder")
42
emb_cls = emb_params.pop("type")
43
emb_params["document_store"] = store
44
emb = init_haystack_cls(emb_cls, emb_params)
46
logger.info("Encoding vectors")
47
store.update_embeddings(emb, batch_size=emb_params["batch_size"])