llama-index

Форк
0
387 строк · 11.5 Кб
1
"""
2
Github API client for the LlamaIndex library.
3

4
This module contains the Github API client for the LlamaIndex library.
5
It is used by the Github readers to retrieve the data from Github.
6
"""
7

8
import os
9
from dataclasses import dataclass
10
from typing import Any, Dict, List, Optional
11

12
from dataclasses_json import DataClassJsonMixin
13

14

15
@dataclass
16
class GitTreeResponseModel(DataClassJsonMixin):
17
    """
18
    Dataclass for the response from the Github API's getTree endpoint.
19

20
    Attributes:
21
        - sha (str): SHA1 checksum ID of the tree.
22
        - url (str): URL for the tree.
23
        - tree (List[GitTreeObject]): List of objects in the tree.
24
        - truncated (bool): Whether the tree is truncated.
25

26
    Examples:
27
        >>> tree = client.get_tree("owner", "repo", "branch")
28
        >>> tree.sha
29
    """
30

31
    @dataclass
32
    class GitTreeObject(DataClassJsonMixin):
33
        """
34
        Dataclass for the objects in the tree.
35

36
        Attributes:
37
            - path (str): Path to the object.
38
            - mode (str): Mode of the object.
39
            - type (str): Type of the object.
40
            - sha (str): SHA1 checksum ID of the object.
41
            - url (str): URL for the object.
42
            - size (Optional[int]): Size of the object (only for blobs).
43
        """
44

45
        path: str
46
        mode: str
47
        type: str
48
        sha: str
49
        url: str
50
        size: Optional[int] = None
51

52
    sha: str
53
    url: str
54
    tree: List[GitTreeObject]
55
    truncated: bool
56

57

58
@dataclass
59
class GitBlobResponseModel(DataClassJsonMixin):
60
    """
61
    Dataclass for the response from the Github API's getBlob endpoint.
62

63
    Attributes:
64
        - content (str): Content of the blob.
65
        - encoding (str): Encoding of the blob.
66
        - url (str): URL for the blob.
67
        - sha (str): SHA1 checksum ID of the blob.
68
        - size (int): Size of the blob.
69
        - node_id (str): Node ID of the blob.
70
    """
71

72
    content: str
73
    encoding: str
74
    url: str
75
    sha: str
76
    size: int
77
    node_id: str
78

79

80
@dataclass
81
class GitCommitResponseModel(DataClassJsonMixin):
82
    """
83
    Dataclass for the response from the Github API's getCommit endpoint.
84

85
    Attributes:
86
        - tree (Tree): Tree object for the commit.
87
    """
88

89
    @dataclass
90
    class Commit(DataClassJsonMixin):
91
        """Dataclass for the commit object in the commit. (commit.commit)."""
92

93
        @dataclass
94
        class Tree(DataClassJsonMixin):
95
            """
96
            Dataclass for the tree object in the commit.
97

98
            Attributes:
99
                - sha (str): SHA for the commit
100
            """
101

102
            sha: str
103

104
        tree: Tree
105

106
    commit: Commit
107

108

109
@dataclass
110
class GitBranchResponseModel(DataClassJsonMixin):
111
    """
112
    Dataclass for the response from the Github API's getBranch endpoint.
113

114
    Attributes:
115
        - commit (Commit): Commit object for the branch.
116
    """
117

118
    @dataclass
119
    class Commit(DataClassJsonMixin):
120
        """Dataclass for the commit object in the branch. (commit.commit)."""
121

122
        @dataclass
123
        class Commit(DataClassJsonMixin):
124
            """Dataclass for the commit object in the commit. (commit.commit.tree)."""
125

126
            @dataclass
127
            class Tree(DataClassJsonMixin):
128
                """
129
                Dataclass for the tree object in the commit.
130

131
                Usage: commit.commit.tree.sha
132
                """
133

134
                sha: str
135

136
            tree: Tree
137

138
        commit: Commit
139

140
    commit: Commit
141

142

143
class GithubClient:
144
    """
145
    An asynchronous client for interacting with the Github API.
146

147
    This client is used for making API requests to Github.
148
    It provides methods for accessing the Github API endpoints.
149
    The client requires a Github token for authentication,
150
    which can be passed as an argument or set as an environment variable.
151
    If no Github token is provided, the client will raise a ValueError.
152

153
    Examples:
154
        >>> client = GithubClient("my_github_token")
155
        >>> branch_info = client.get_branch("owner", "repo", "branch")
156
    """
157

158
    DEFAULT_BASE_URL = "https://api.github.com"
159
    DEFAULT_API_VERSION = "2022-11-28"
160

161
    def __init__(
162
        self,
163
        github_token: Optional[str] = None,
164
        base_url: str = DEFAULT_BASE_URL,
165
        api_version: str = DEFAULT_API_VERSION,
166
        verbose: bool = False,
167
    ) -> None:
168
        """
169
        Initialize the GithubClient.
170

171
        Args:
172
            - github_token (str): Github token for authentication.
173
                If not provided, the client will try to get it from
174
                the GITHUB_TOKEN environment variable.
175
            - base_url (str): Base URL for the Github API
176
                (defaults to "https://api.github.com").
177
            - api_version (str): Github API version (defaults to "2022-11-28").
178

179
        Raises:
180
            ValueError: If no Github token is provided.
181
        """
182
        if github_token is None:
183
            github_token = os.getenv("GITHUB_TOKEN")
184
            if github_token is None:
185
                raise ValueError(
186
                    "Please provide a Github token. "
187
                    + "You can do so by passing it as an argument to the GithubReader,"
188
                    + "or by setting the GITHUB_TOKEN environment variable."
189
                )
190

191
        self._base_url = base_url
192
        self._api_version = api_version
193
        self._verbose = verbose
194

195
        self._endpoints = {
196
            "getTree": "/repos/{owner}/{repo}/git/trees/{tree_sha}",
197
            "getBranch": "/repos/{owner}/{repo}/branches/{branch}",
198
            "getBlob": "/repos/{owner}/{repo}/git/blobs/{file_sha}",
199
            "getCommit": "/repos/{owner}/{repo}/commits/{commit_sha}",
200
        }
201

202
        self._headers = {
203
            "Accept": "application/vnd.github+json",
204
            "Authorization": f"Bearer {github_token}",
205
            "X-GitHub-Api-Version": f"{self._api_version}",
206
        }
207

208
    def get_all_endpoints(self) -> Dict[str, str]:
209
        """Get all available endpoints."""
210
        return {**self._endpoints}
211

212
    async def request(
213
        self,
214
        endpoint: str,
215
        method: str,
216
        headers: Dict[str, Any] = {},
217
        **kwargs: Any,
218
    ) -> Any:
219
        """
220
        Make an API request to the Github API.
221

222
        This method is used for making API requests to the Github API.
223
        It is used internally by the other methods in the client.
224

225
        Args:
226
            - `endpoint (str)`: Name of the endpoint to make the request to.
227
            - `method (str)`: HTTP method to use for the request.
228
            - `headers (dict)`: HTTP headers to include in the request.
229
            - `**kwargs`: Keyword arguments to pass to the endpoint URL.
230

231
        Returns:
232
            - `response (httpx.Response)`: Response from the API request.
233

234
        Raises:
235
            - ImportError: If the `httpx` library is not installed.
236
            - httpx.HTTPError: If the API request fails.
237

238
        Examples:
239
            >>> response = client.request("getTree", "GET",
240
                                owner="owner", repo="repo",
241
                                tree_sha="tree_sha")
242
        """
243
        try:
244
            import httpx
245
        except ImportError:
246
            raise ImportError(
247
                "Please install httpx to use the GithubRepositoryReader. "
248
                "You can do so by running `pip install httpx`."
249
            )
250

251
        _headers = {**self._headers, **headers}
252

253
        _client: httpx.AsyncClient
254
        async with httpx.AsyncClient(
255
            headers=_headers, base_url=self._base_url
256
        ) as _client:
257
            try:
258
                response = await _client.request(
259
                    method, url=self._endpoints[endpoint].format(**kwargs)
260
                )
261
                response.raise_for_status()
262
            except httpx.HTTPError as excp:
263
                print(f"HTTP Exception for {excp.request.url} - {excp}")
264
                raise
265
            return response
266

267
    async def get_branch(
268
        self, owner: str, repo: str, branch: str
269
    ) -> GitBranchResponseModel:
270
        """
271
        Get information about a branch. (Github API endpoint: getBranch).
272

273
        Args:
274
            - `owner (str)`: Owner of the repository.
275
            - `repo (str)`: Name of the repository.
276
            - `branch (str)`: Name of the branch.
277

278
        Returns:
279
            - `branch_info (GitBranchResponseModel)`: Information about the branch.
280

281
        Examples:
282
            >>> branch_info = client.get_branch("owner", "repo", "branch")
283
        """
284
        return GitBranchResponseModel.from_json(
285
            (
286
                await self.request(
287
                    "getBranch", "GET", owner=owner, repo=repo, branch=branch
288
                )
289
            ).text
290
        )
291

292
    async def get_tree(
293
        self, owner: str, repo: str, tree_sha: str
294
    ) -> GitTreeResponseModel:
295
        """
296
        Get information about a tree. (Github API endpoint: getTree).
297

298
        Args:
299
            - `owner (str)`: Owner of the repository.
300
            - `repo (str)`: Name of the repository.
301
            - `tree_sha (str)`: SHA of the tree.
302

303
        Returns:
304
            - `tree_info (GitTreeResponseModel)`: Information about the tree.
305

306
        Examples:
307
            >>> tree_info = client.get_tree("owner", "repo", "tree_sha")
308
        """
309
        return GitTreeResponseModel.from_json(
310
            (
311
                await self.request(
312
                    "getTree", "GET", owner=owner, repo=repo, tree_sha=tree_sha
313
                )
314
            ).text
315
        )
316

317
    async def get_blob(
318
        self, owner: str, repo: str, file_sha: str
319
    ) -> GitBlobResponseModel:
320
        """
321
        Get information about a blob. (Github API endpoint: getBlob).
322

323
        Args:
324
            - `owner (str)`: Owner of the repository.
325
            - `repo (str)`: Name of the repository.
326
            - `file_sha (str)`: SHA of the file.
327

328
        Returns:
329
            - `blob_info (GitBlobResponseModel)`: Information about the blob.
330

331
        Examples:
332
            >>> blob_info = client.get_blob("owner", "repo", "file_sha")
333
        """
334
        return GitBlobResponseModel.from_json(
335
            (
336
                await self.request(
337
                    "getBlob", "GET", owner=owner, repo=repo, file_sha=file_sha
338
                )
339
            ).text
340
        )
341

342
    async def get_commit(
343
        self, owner: str, repo: str, commit_sha: str
344
    ) -> GitCommitResponseModel:
345
        """
346
        Get information about a commit. (Github API endpoint: getCommit).
347

348
        Args:
349
            - `owner (str)`: Owner of the repository.
350
            - `repo (str)`: Name of the repository.
351
            - `commit_sha (str)`: SHA of the commit.
352

353
        Returns:
354
            - `commit_info (GitCommitResponseModel)`: Information about the commit.
355

356
        Examples:
357
            >>> commit_info = client.get_commit("owner", "repo", "commit_sha")
358
        """
359
        return GitCommitResponseModel.from_json(
360
            (
361
                await self.request(
362
                    "getCommit", "GET", owner=owner, repo=repo, commit_sha=commit_sha
363
                )
364
            ).text
365
        )
366

367

368
if __name__ == "__main__":
369
    import asyncio
370

371
    async def main() -> None:
372
        """Test the GithubClient."""
373
        client = GithubClient()
374
        response = await client.get_tree(
375
            owner="ahmetkca", repo="CommitAI", tree_sha="with-body"
376
        )
377

378
        for obj in response.tree:
379
            if obj.type == "blob":
380
                print(obj.path)
381
                print(obj.sha)
382
                blob_response = await client.get_blob(
383
                    owner="ahmetkca", repo="CommitAI", file_sha=obj.sha
384
                )
385
                print(blob_response.content)
386

387
    asyncio.run(main())
388

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.