llama-index

utils.py
96 строк · 3.5 Кб
Перенос по словам
1
"""Embedding utils for LlamaIndex."""
2

3
import os
4
from typing import TYPE_CHECKING, List, Optional, Union
5

6
if TYPE_CHECKING:
7
    from llama_index.legacy.bridge.langchain import Embeddings as LCEmbeddings
8
from llama_index.legacy.embeddings.base import BaseEmbedding
9
from llama_index.legacy.embeddings.clip import ClipEmbedding
10
from llama_index.legacy.embeddings.huggingface import HuggingFaceEmbedding
11
from llama_index.legacy.embeddings.huggingface_utils import (
12
    INSTRUCTOR_MODELS,
13
)
14
from llama_index.legacy.embeddings.instructor import InstructorEmbedding
15
from llama_index.legacy.embeddings.langchain import LangchainEmbedding
16
from llama_index.legacy.embeddings.openai import OpenAIEmbedding
17
from llama_index.legacy.llms.openai_utils import validate_openai_api_key
18
from llama_index.legacy.token_counter.mock_embed_model import MockEmbedding
19
from llama_index.legacy.utils import get_cache_dir
20

21
EmbedType = Union[BaseEmbedding, "LCEmbeddings", str]
22

23

24
def save_embedding(embedding: List[float], file_path: str) -> None:
25
    """Save embedding to file."""
26
    with open(file_path, "w") as f:
27
        f.write(",".join([str(x) for x in embedding]))
28

29

30
def load_embedding(file_path: str) -> List[float]:
31
    """Load embedding from file. Will only return first embedding in file."""
32
    with open(file_path) as f:
33
        for line in f:
34
            embedding = [float(x) for x in line.strip().split(",")]
35
            break
36
        return embedding
37

38

39
def resolve_embed_model(embed_model: Optional[EmbedType] = None) -> BaseEmbedding:
40
    """Resolve embed model."""
41
    try:
42
        from llama_index.legacy.bridge.langchain import Embeddings as LCEmbeddings
43
    except ImportError:
44
        LCEmbeddings = None  # type: ignore
45

46
    if embed_model == "default":
47
        try:
48
            embed_model = OpenAIEmbedding()
49
            validate_openai_api_key(embed_model.api_key)
50
        except ValueError as e:
51
            raise ValueError(
52
                "\n******\n"
53
                "Could not load OpenAI embedding model. "
54
                "If you intended to use OpenAI, please check your OPENAI_API_KEY.\n"
55
                "Original error:\n"
56
                f"{e!s}"
57
                "\nConsider using embed_model='local'.\n"
58
                "Visit our documentation for more embedding options: "
59
                "https://docs.llamaindex.ai/en/stable/module_guides/models/"
60
                "embeddings.html#modules"
61
                "\n******"
62
            )
63

64
    # for image embeddings
65
    if embed_model == "clip":
66
        embed_model = ClipEmbedding()
67

68
    if isinstance(embed_model, str):
69
        splits = embed_model.split(":", 1)
70
        is_local = splits[0]
71
        model_name = splits[1] if len(splits) > 1 else None
72
        if is_local != "local":
73
            raise ValueError(
74
                "embed_model must start with str 'local' or of type BaseEmbedding"
75
            )
76

77
        cache_folder = os.path.join(get_cache_dir(), "models")
78
        os.makedirs(cache_folder, exist_ok=True)
79

80
        if model_name in INSTRUCTOR_MODELS:
81
            embed_model = InstructorEmbedding(
82
                model_name=model_name, cache_folder=cache_folder
83
            )
84
        else:
85
            embed_model = HuggingFaceEmbedding(
86
                model_name=model_name, cache_folder=cache_folder
87
            )
88

89
    if LCEmbeddings is not None and isinstance(embed_model, LCEmbeddings):
90
        embed_model = LangchainEmbedding(embed_model)
91

92
    if embed_model is None:
93
        print("Embeddings have been explicitly disabled. Using MockEmbedding.")
94
        embed_model = MockEmbedding(embed_dim=1)
95

96
    return embed_model
97
llama-index

Использование cookies