llm-applications
34 строки · 1.2 Кб
1import os2
3from langchain.embeddings import OpenAIEmbeddings4from langchain.embeddings.huggingface import HuggingFaceEmbeddings5
6
7def get_embedding_model(embedding_model_name, model_kwargs, encode_kwargs):8if embedding_model_name == "text-embedding-ada-002":9embedding_model = OpenAIEmbeddings(10model=embedding_model_name,11openai_api_base=os.environ["OPENAI_API_BASE"],12openai_api_key=os.environ["OPENAI_API_KEY"],13)14else:15embedding_model = HuggingFaceEmbeddings(16model_name=embedding_model_name,17model_kwargs=model_kwargs,18encode_kwargs=encode_kwargs,19)20return embedding_model21
22
23class EmbedChunks:24def __init__(self, model_name):25# Embedding model26self.embedding_model = get_embedding_model(27embedding_model_name=model_name,28model_kwargs={"device": "cuda"},29encode_kwargs={"device": "cuda", "batch_size": 100},30)31
32def __call__(self, batch):33embeddings = self.embedding_model.embed_documents(batch["text"])34return {"text": batch["text"], "source": batch["source"], "embeddings": embeddings}35