llama-index

Форк
0
172 строки · 5.3 Кб
1
import argparse
2
from typing import Any, Optional
3

4
from llama_index.legacy.command_line.rag import RagCLI, default_ragcli_persist_dir
5
from llama_index.legacy.embeddings import OpenAIEmbedding
6
from llama_index.legacy.ingestion import IngestionCache, IngestionPipeline
7
from llama_index.legacy.llama_dataset.download import (
8
    LLAMA_DATASETS_LFS_URL,
9
    LLAMA_DATASETS_SOURCE_FILES_GITHUB_TREE_URL,
10
    download_llama_dataset,
11
)
12
from llama_index.legacy.llama_pack.download import LLAMA_HUB_URL, download_llama_pack
13
from llama_index.legacy.storage.docstore import SimpleDocumentStore
14
from llama_index.legacy.text_splitter import SentenceSplitter
15
from llama_index.legacy.vector_stores import ChromaVectorStore
16

17

18
def handle_download_llama_pack(
19
    llama_pack_class: Optional[str] = None,
20
    download_dir: Optional[str] = None,
21
    llama_hub_url: str = LLAMA_HUB_URL,
22
    **kwargs: Any,
23
) -> None:
24
    assert llama_pack_class is not None
25
    assert download_dir is not None
26

27
    download_llama_pack(
28
        llama_pack_class=llama_pack_class,
29
        download_dir=download_dir,
30
        llama_hub_url=llama_hub_url,
31
    )
32
    print(f"Successfully downloaded {llama_pack_class} to {download_dir}")
33

34

35
def handle_download_llama_dataset(
36
    llama_dataset_class: Optional[str] = None,
37
    download_dir: Optional[str] = None,
38
    llama_hub_url: str = LLAMA_HUB_URL,
39
    llama_datasets_lfs_url: str = LLAMA_DATASETS_LFS_URL,
40
    llama_datasets_source_files_tree_url: str = LLAMA_DATASETS_SOURCE_FILES_GITHUB_TREE_URL,
41
    **kwargs: Any,
42
) -> None:
43
    assert llama_dataset_class is not None
44
    assert download_dir is not None
45

46
    download_llama_dataset(
47
        llama_dataset_class=llama_dataset_class,
48
        download_dir=download_dir,
49
        llama_hub_url=llama_hub_url,
50
        llama_datasets_lfs_url=llama_datasets_lfs_url,
51
        llama_datasets_source_files_tree_url=llama_datasets_source_files_tree_url,
52
        show_progress=True,
53
        load_documents=False,
54
    )
55

56
    print(f"Successfully downloaded {llama_dataset_class} to {download_dir}")
57

58

59
def default_rag_cli() -> RagCLI:
60
    import chromadb
61

62
    persist_dir = default_ragcli_persist_dir()
63
    chroma_client = chromadb.PersistentClient(path=persist_dir)
64
    chroma_collection = chroma_client.create_collection("default", get_or_create=True)
65
    vector_store = ChromaVectorStore(
66
        chroma_collection=chroma_collection, persist_dir=persist_dir
67
    )
68
    docstore = SimpleDocumentStore()
69

70
    ingestion_pipeline = IngestionPipeline(
71
        transformations=[SentenceSplitter(), OpenAIEmbedding()],
72
        vector_store=vector_store,
73
        docstore=docstore,
74
        cache=IngestionCache(),
75
    )
76
    try:
77
        ingestion_pipeline.load(persist_dir=persist_dir)
78
    except FileNotFoundError:
79
        pass
80

81
    return RagCLI(
82
        ingestion_pipeline=ingestion_pipeline,
83
        verbose=False,
84
        persist_dir=persist_dir,
85
    )
86

87

88
def main() -> None:
89
    parser = argparse.ArgumentParser(description="LlamaIndex CLI tool.")
90

91
    # Subparsers for the main commands
92
    subparsers = parser.add_subparsers(title="commands", dest="command", required=True)
93

94
    # llama rag command
95
    llamarag_parser = subparsers.add_parser(
96
        "rag", help="Ask a question to a document / a directory of documents."
97
    )
98
    RagCLI.add_parser_args(llamarag_parser, default_rag_cli)
99

100
    # download llamapacks command
101
    llamapack_parser = subparsers.add_parser(
102
        "download-llamapack", help="Download a llama-pack"
103
    )
104
    llamapack_parser.add_argument(
105
        "llama_pack_class",
106
        type=str,
107
        help=(
108
            "The name of the llama-pack class you want to download, "
109
            "such as `GmailOpenAIAgentPack`."
110
        ),
111
    )
112
    llamapack_parser.add_argument(
113
        "-d",
114
        "--download-dir",
115
        type=str,
116
        default="./llama_packs",
117
        help="Custom dirpath to download the pack into.",
118
    )
119
    llamapack_parser.add_argument(
120
        "--llama-hub-url",
121
        type=str,
122
        default=LLAMA_HUB_URL,
123
        help="URL to llama hub.",
124
    )
125
    llamapack_parser.set_defaults(
126
        func=lambda args: handle_download_llama_pack(**vars(args))
127
    )
128

129
    # download llamadatasets command
130
    llamadataset_parser = subparsers.add_parser(
131
        "download-llamadataset", help="Download a llama-dataset"
132
    )
133
    llamadataset_parser.add_argument(
134
        "llama_dataset_class",
135
        type=str,
136
        help=(
137
            "The name of the llama-dataset class you want to download, "
138
            "such as `PaulGrahamEssayDataset`."
139
        ),
140
    )
141
    llamadataset_parser.add_argument(
142
        "-d",
143
        "--download-dir",
144
        type=str,
145
        default="./llama_datasets",
146
        help="Custom dirpath to download the pack into.",
147
    )
148
    llamadataset_parser.add_argument(
149
        "--llama-hub-url",
150
        type=str,
151
        default=LLAMA_HUB_URL,
152
        help="URL to llama hub.",
153
    )
154
    llamadataset_parser.add_argument(
155
        "--llama-datasets-lfs-url",
156
        type=str,
157
        default=LLAMA_DATASETS_LFS_URL,
158
        help="URL to llama datasets.",
159
    )
160
    llamadataset_parser.set_defaults(
161
        func=lambda args: handle_download_llama_dataset(**vars(args))
162
    )
163

164
    # Parse the command-line arguments
165
    args = parser.parse_args()
166

167
    # Call the appropriate function based on the command
168
    args.func(args)
169

170

171
if __name__ == "__main__":
172
    main()
173

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.