llama-index

module.py
274 строки · 9.3 Кб
Перенос по словам
1
"""Download."""
2

3
import json
4
import logging
5
import os
6
import subprocess
7
import sys
8
from enum import Enum
9
from importlib import util
10
from pathlib import Path
11
from typing import Any, Dict, List, Optional, Union
12

13
import pkg_resources
14
import requests
15
from pkg_resources import DistributionNotFound
16

17
from llama_index.legacy.download.utils import (
18
    get_exports,
19
    get_file_content,
20
    initialize_directory,
21
    rewrite_exports,
22
)
23

24
LLAMA_HUB_CONTENTS_URL = f"https://raw.githubusercontent.com/run-llama/llama-hub/main"
25
LLAMA_HUB_PATH = "/llama_hub"
26
LLAMA_HUB_URL = LLAMA_HUB_CONTENTS_URL + LLAMA_HUB_PATH
27

28
PATH_TYPE = Union[str, Path]
29

30
logger = logging.getLogger(__name__)
31
LLAMAHUB_ANALYTICS_PROXY_SERVER = "https://llamahub.ai/api/analytics/downloads"
32

33

34
class MODULE_TYPE(str, Enum):
35
    LOADER = "loader"
36
    TOOL = "tool"
37
    LLAMAPACK = "llamapack"
38
    DATASETS = "datasets"
39

40

41
def get_module_info(
42
    local_dir_path: PATH_TYPE,
43
    remote_dir_path: PATH_TYPE,
44
    module_class: str,
45
    refresh_cache: bool = False,
46
    library_path: str = "library.json",
47
    disable_library_cache: bool = False,
48
) -> Dict:
49
    """Get module info."""
50
    if isinstance(local_dir_path, str):
51
        local_dir_path = Path(local_dir_path)
52

53
    local_library_path = f"{local_dir_path}/{library_path}"
54
    module_id = None  # e.g. `web/simple_web`
55
    extra_files = []  # e.g. `web/simple_web/utils.py`
56

57
    # Check cache first
58
    if not refresh_cache and os.path.exists(local_library_path):
59
        with open(local_library_path) as f:
60
            library = json.load(f)
61
        if module_class in library:
62
            module_id = library[module_class]["id"]
63
            extra_files = library[module_class].get("extra_files", [])
64

65
    # Fetch up-to-date library from remote repo if module_id not found
66
    if module_id is None:
67
        library_raw_content, _ = get_file_content(
68
            str(remote_dir_path), f"/{library_path}"
69
        )
70
        library = json.loads(library_raw_content)
71
        if module_class not in library:
72
            raise ValueError("Loader class name not found in library")
73

74
        module_id = library[module_class]["id"]
75
        extra_files = library[module_class].get("extra_files", [])
76

77
        # create cache dir if needed
78
        local_library_dir = os.path.dirname(local_library_path)
79
        if not disable_library_cache:
80
            if not os.path.exists(local_library_dir):
81
                os.makedirs(local_library_dir)
82

83
            # Update cache
84
            with open(local_library_path, "w") as f:
85
                f.write(library_raw_content)
86

87
    if module_id is None:
88
        raise ValueError("Loader class name not found in library")
89

90
    return {
91
        "module_id": module_id,
92
        "extra_files": extra_files,
93
    }
94

95

96
def download_module_and_reqs(
97
    local_dir_path: PATH_TYPE,
98
    remote_dir_path: PATH_TYPE,
99
    module_id: str,
100
    extra_files: List[str],
101
    refresh_cache: bool = False,
102
    use_gpt_index_import: bool = False,
103
    base_file_name: str = "base.py",
104
    override_path: bool = False,
105
) -> None:
106
    """Load module."""
107
    if isinstance(local_dir_path, str):
108
        local_dir_path = Path(local_dir_path)
109

110
    if override_path:
111
        module_path = str(local_dir_path)
112
    else:
113
        module_path = f"{local_dir_path}/{module_id}"
114

115
    if refresh_cache or not os.path.exists(module_path):
116
        os.makedirs(module_path, exist_ok=True)
117

118
        basepy_raw_content, _ = get_file_content(
119
            str(remote_dir_path), f"/{module_id}/{base_file_name}"
120
        )
121
        if use_gpt_index_import:
122
            basepy_raw_content = basepy_raw_content.replace(
123
                "import llama_index.legacy", "import llama_index.legacy"
124
            )
125
            basepy_raw_content = basepy_raw_content.replace(
126
                "from llama_index.legacy", "from llama_index.legacy"
127
            )
128

129
        with open(f"{module_path}/{base_file_name}", "w") as f:
130
            f.write(basepy_raw_content)
131

132
    # Get content of extra files if there are any
133
    # and write them under the loader directory
134
    for extra_file in extra_files:
135
        extra_file_raw_content, _ = get_file_content(
136
            str(remote_dir_path), f"/{module_id}/{extra_file}"
137
        )
138
        # If the extra file is an __init__.py file, we need to
139
        # add the exports to the __init__.py file in the modules directory
140
        if extra_file == "__init__.py":
141
            loader_exports = get_exports(extra_file_raw_content)
142
            existing_exports = []
143
            init_file_path = local_dir_path / "__init__.py"
144
            # if the __init__.py file do not exists, we need to create it
145
            mode = "a+" if not os.path.exists(init_file_path) else "r+"
146
            with open(init_file_path, mode) as f:
147
                f.write(f"from .{module_id} import {', '.join(loader_exports)}")
148
                existing_exports = get_exports(f.read())
149
            rewrite_exports(existing_exports + loader_exports, str(local_dir_path))
150

151
        with open(f"{module_path}/{extra_file}", "w") as f:
152
            f.write(extra_file_raw_content)
153

154
    # install requirements
155
    requirements_path = f"{local_dir_path}/requirements.txt"
156

157
    if not os.path.exists(requirements_path):
158
        # NOTE: need to check the status code
159
        response_txt, status_code = get_file_content(
160
            str(remote_dir_path), f"/{module_id}/requirements.txt"
161
        )
162
        if status_code == 200:
163
            with open(requirements_path, "w") as f:
164
                f.write(response_txt)
165

166
    # Install dependencies if there are any and not already installed
167
    if os.path.exists(requirements_path):
168
        try:
169
            requirements = pkg_resources.parse_requirements(
170
                Path(requirements_path).open()
171
            )
172
            pkg_resources.require([str(r) for r in requirements])
173
        except DistributionNotFound:
174
            subprocess.check_call(
175
                [sys.executable, "-m", "pip", "install", "-r", requirements_path]
176
            )
177

178

179
def download_llama_module(
180
    module_class: str,
181
    llama_hub_url: str = LLAMA_HUB_URL,
182
    refresh_cache: bool = False,
183
    custom_dir: Optional[str] = None,
184
    custom_path: Optional[str] = None,
185
    library_path: str = "library.json",
186
    base_file_name: str = "base.py",
187
    use_gpt_index_import: bool = False,
188
    disable_library_cache: bool = False,
189
    override_path: bool = False,
190
    skip_load: bool = False,
191
) -> Any:
192
    """Download a module from LlamaHub.
193

194
    Can be a loader, tool, pack, or more.
195

196
    Args:
197
        loader_class: The name of the llama module class you want to download,
198
            such as `GmailOpenAIAgentPack`.
199
        refresh_cache: If true, the local cache will be skipped and the
200
            loader will be fetched directly from the remote repo.
201
        custom_dir: Custom dir name to download loader into (under parent folder).
202
        custom_path: Custom dirpath to download loader into.
203
        library_path: File name of the library file.
204
        use_gpt_index_import: If true, the loader files will use
205
            llama_index as the base dependency. By default (False),
206
            the loader files use llama_index as the base dependency.
207
            NOTE: this is a temporary workaround while we fully migrate all usages
208
            to llama_index.
209
        is_dataset: whether or not downloading a LlamaDataset
210

211
    Returns:
212
        A Loader, A Pack, An Agent, or A Dataset
213
    """
214
    # create directory / get path
215
    dirpath = initialize_directory(custom_path=custom_path, custom_dir=custom_dir)
216

217
    # fetch info from library.json file
218
    module_info = get_module_info(
219
        local_dir_path=dirpath,
220
        remote_dir_path=llama_hub_url,
221
        module_class=module_class,
222
        refresh_cache=refresh_cache,
223
        library_path=library_path,
224
        disable_library_cache=disable_library_cache,
225
    )
226
    module_id = module_info["module_id"]
227
    extra_files = module_info["extra_files"]
228

229
    # download the module, install requirements
230
    download_module_and_reqs(
231
        local_dir_path=dirpath,
232
        remote_dir_path=llama_hub_url,
233
        module_id=module_id,
234
        extra_files=extra_files,
235
        refresh_cache=refresh_cache,
236
        use_gpt_index_import=use_gpt_index_import,
237
        base_file_name=base_file_name,
238
        override_path=override_path,
239
    )
240
    if skip_load:
241
        return None
242

243
    # loads the module into memory
244
    if override_path:
245
        path = f"{dirpath}/{base_file_name}"
246
        spec = util.spec_from_file_location("custom_module", location=path)
247
        if spec is None:
248
            raise ValueError(f"Could not find file: {path}.")
249
    else:
250
        path = f"{dirpath}/{module_id}/{base_file_name}"
251
        spec = util.spec_from_file_location("custom_module", location=path)
252
        if spec is None:
253
            raise ValueError(f"Could not find file: {path}.")
254

255
    module = util.module_from_spec(spec)
256
    spec.loader.exec_module(module)  # type: ignore
257

258
    return getattr(module, module_class)
259

260

261
def track_download(module_class: str, module_type: str) -> None:
262
    """Tracks number of downloads via Llamahub proxy.
263

264
    Args:
265
        module_class: The name of the llama module being downloaded, e.g.,`GmailOpenAIAgentPack`.
266
        module_type: Can be "loader", "tool", "llamapack", or "datasets"
267
    """
268
    try:
269
        requests.post(
270
            LLAMAHUB_ANALYTICS_PROXY_SERVER,
271
            json={"type": module_type, "plugin": module_class},
272
        )
273
    except Exception as e:
274
        logger.info(f"Error tracking downloads for {module_class} : {e}")
275
llama-index

Использование cookies