transformers

Форк
0
/
setup.py 
463 строки · 15.6 Кб
1
# Copyright 2021 The HuggingFace Team. All rights reserved.
2
#
3
# Licensed under the Apache License, Version 2.0 (the "License");
4
# you may not use this file except in compliance with the License.
5
# You may obtain a copy of the License at
6
#
7
#     http://www.apache.org/licenses/LICENSE-2.0
8
#
9
# Unless required by applicable law or agreed to in writing, software
10
# distributed under the License is distributed on an "AS IS" BASIS,
11
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
# See the License for the specific language governing permissions and
13
# limitations under the License.
14

15
"""
16
Simple check list from AllenNLP repo: https://github.com/allenai/allennlp/blob/main/setup.py
17

18
To create the package for pypi.
19

20
1. Create the release branch named: v<RELEASE>-release, for example v4.19-release. For a patch release checkout the
21
   current release branch.
22

23
   If releasing on a special branch, copy the updated README.md on the main branch for your the commit you will make
24
   for the post-release and run `make fix-copies` on the main branch as well.
25

26
2. Run `make pre-release` (or `make pre-patch` for a patch release) and commit these changes with the message:
27
   "Release: <VERSION>" and push.
28

29
3. Go back to the main branch and run `make post-release` then `make fix-copies`. Commit these changes with the
30
   message "v<NEXT_VERSION>.dev.0" and push to main.
31

32
# If you were just cutting the branch in preparation for a release, you can stop here for now.
33

34
4. Wait for the tests on the release branch to be completed and be green (otherwise revert and fix bugs)
35

36
5. On the release branch, add a tag in git to mark the release: "git tag v<VERSION> -m 'Adds tag v<VERSION> for pypi' "
37
   Push the tag to git: git push --tags origin v<RELEASE>-release
38

39
6. Build both the sources and the wheel. Do not change anything in setup.py between
40
   creating the wheel and the source distribution (obviously).
41

42
   Run `make build-release`. This will build the release and do some sanity checks for you. If this ends with an error
43
   message, you need to fix things before going further.
44

45
   You should now have a /dist directory with both .whl and .tar.gz source versions.
46

47
7. Check that everything looks correct by uploading the package to the pypi test server:
48

49
   twine upload dist/* -r testpypi
50
   (pypi suggest using twine as other methods upload files via plaintext.)
51
   You may have to specify the repository url, use the following command then:
52
   twine upload dist/* -r testpypi --repository-url=https://test.pypi.org/legacy/
53

54
   Check that you can install it in a virtualenv by running:
55
   pip install -i https://testpypi.python.org/pypi transformers
56

57
   Check you can run the following commands:
58
   python -c "from transformers import pipeline; classifier = pipeline('text-classification'); print(classifier('What a nice release'))"
59
   python -c "from transformers import *"
60
   python utils/check_build.py --check_lib
61

62
   If making a patch release, double check the bug you are patching is indeed resolved.
63

64
8. Upload the final version to actual pypi:
65
   twine upload dist/* -r pypi
66

67
9. Copy the release notes from RELEASE.md to the tag in github once everything is looking hunky-dory.
68
"""
69

70
import os
71
import re
72
import shutil
73
from pathlib import Path
74

75
from setuptools import Command, find_packages, setup
76

77

78
# Remove stale transformers.egg-info directory to avoid https://github.com/pypa/pip/issues/5466
79
stale_egg_info = Path(__file__).parent / "transformers.egg-info"
80
if stale_egg_info.exists():
81
    print(
82
        (
83
            "Warning: {} exists.\n\n"
84
            "If you recently updated transformers to 3.0 or later, this is expected,\n"
85
            "but it may prevent transformers from installing in editable mode.\n\n"
86
            "This directory is automatically generated by Python's packaging tools.\n"
87
            "I will remove it now.\n\n"
88
            "See https://github.com/pypa/pip/issues/5466 for details.\n"
89
        ).format(stale_egg_info)
90
    )
91
    shutil.rmtree(stale_egg_info)
92

93

94
# IMPORTANT:
95
# 1. all dependencies should be listed here with their version requirements if any
96
# 2. once modified, run: `make deps_table_update` to update src/transformers/dependency_versions_table.py
97
_deps = [
98
    "Pillow>=10.0.1,<=15.0",
99
    "accelerate>=0.21.0",
100
    "av==9.2.0",  # Latest version of PyAV (10.0.0) has issues with audio stream.
101
    "beautifulsoup4",
102
    "codecarbon==1.2.0",
103
    "cookiecutter==1.7.3",
104
    "dataclasses",
105
    "datasets!=2.5.0",
106
    "decord==0.6.0",
107
    "deepspeed>=0.9.3",
108
    "diffusers",
109
    "dill<0.3.5",
110
    "evaluate>=0.2.0",
111
    "faiss-cpu",
112
    "fastapi",
113
    "filelock",
114
    "flax>=0.4.1,<=0.7.0",
115
    "fsspec<2023.10.0",
116
    "ftfy",
117
    "fugashi>=1.0",
118
    "GitPython<3.1.19",
119
    "hf-doc-builder>=0.3.0",
120
    "huggingface-hub>=0.19.3,<1.0",
121
    "importlib_metadata",
122
    "ipadic>=1.0.0,<2.0",
123
    "isort>=5.5.4",
124
    "jax>=0.4.1,<=0.4.13",
125
    "jaxlib>=0.4.1,<=0.4.13",
126
    "jieba",
127
    "kenlm",
128
    # Keras pin - this is to make sure Keras 3 doesn't destroy us. Remove or change when we have proper support.
129
    "keras<2.16",
130
    "keras-nlp>=0.3.1",
131
    "librosa",
132
    "nltk",
133
    "natten>=0.14.6,<0.15.0",
134
    "numpy>=1.17",
135
    "onnxconverter-common",
136
    "onnxruntime-tools>=1.4.2",
137
    "onnxruntime>=1.4.0",
138
    "opencv-python",
139
    "optuna",
140
    "optax>=0.0.8,<=0.1.4",
141
    "packaging>=20.0",
142
    "parameterized",
143
    "phonemizer",
144
    "protobuf",
145
    "psutil",
146
    "pyyaml>=5.1",
147
    "pydantic",
148
    "pytest>=7.2.0,<8.0.0",
149
    "pytest-timeout",
150
    "pytest-xdist",
151
    "python>=3.8.0",
152
    "ray[tune]>=2.7.0",
153
    "regex!=2019.12.17",
154
    "requests",
155
    "rhoknp>=1.1.0,<1.3.1",
156
    "rjieba",
157
    "rouge-score!=0.0.7,!=0.0.8,!=0.1,!=0.1.1",
158
    "ruff==0.1.5",
159
    "sacrebleu>=1.4.12,<2.0.0",
160
    "sacremoses",
161
    "safetensors>=0.4.1",
162
    "sagemaker>=2.31.0",
163
    "scikit-learn",
164
    "sentencepiece>=0.1.91,!=0.1.92",
165
    "sigopt",
166
    "starlette",
167
    "sudachipy>=0.6.6",
168
    "sudachidict_core>=20220729",
169
    "tensorboard",
170
    # TensorFlow pin. When changing this value, update examples/tensorflow/_tests_requirements.txt accordingly
171
    "tensorflow-cpu>=2.6,<2.16",
172
    "tensorflow>=2.6,<2.16",
173
    "tensorflow-text<2.16",
174
    "tf2onnx",
175
    "timeout-decorator",
176
    "timm",
177
    "tokenizers>=0.14,<0.19",
178
    "torch",
179
    "torchaudio",
180
    "torchvision",
181
    "pyctcdecode>=0.4.0",
182
    "tqdm>=4.27",
183
    "unidic>=1.0.2",
184
    "unidic_lite>=1.0.7",
185
    "urllib3<2.0.0",
186
    "uvicorn",
187
]
188

189

190
# this is a lookup table with items like:
191
#
192
# tokenizers: "tokenizers==0.9.4"
193
# packaging: "packaging"
194
#
195
# some of the values are versioned whereas others aren't.
196
deps = {b: a for a, b in (re.findall(r"^(([^!=<>~ ]+)(?:[!=<>~ ].*)?$)", x)[0] for x in _deps)}
197

198
# since we save this data in src/transformers/dependency_versions_table.py it can be easily accessed from
199
# anywhere. If you need to quickly access the data from this table in a shell, you can do so easily with:
200
#
201
# python -c 'import sys; from transformers.dependency_versions_table import deps; \
202
# print(" ".join([ deps[x] for x in sys.argv[1:]]))' tokenizers datasets
203
#
204
# Just pass the desired package names to that script as it's shown with 2 packages above.
205
#
206
# If transformers is not yet installed and the work is done from the cloned repo remember to add `PYTHONPATH=src` to the script above
207
#
208
# You can then feed this for example to `pip`:
209
#
210
# pip install -U $(python -c 'import sys; from transformers.dependency_versions_table import deps; \
211
# print(" ".join([deps[x] for x in sys.argv[1:]]))' tokenizers datasets)
212
#
213

214

215
def deps_list(*pkgs):
216
    return [deps[pkg] for pkg in pkgs]
217

218

219
class DepsTableUpdateCommand(Command):
220
    """
221
    A custom distutils command that updates the dependency table.
222
    usage: python setup.py deps_table_update
223
    """
224

225
    description = "build runtime dependency table"
226
    user_options = [
227
        # format: (long option, short option, description).
228
        ("dep-table-update", None, "updates src/transformers/dependency_versions_table.py"),
229
    ]
230

231
    def initialize_options(self):
232
        pass
233

234
    def finalize_options(self):
235
        pass
236

237
    def run(self):
238
        entries = "\n".join([f'    "{k}": "{v}",' for k, v in deps.items()])
239
        content = [
240
            "# THIS FILE HAS BEEN AUTOGENERATED. To update:",
241
            "# 1. modify the `_deps` dict in setup.py",
242
            "# 2. run `make deps_table_update``",
243
            "deps = {",
244
            entries,
245
            "}",
246
            "",
247
        ]
248
        target = "src/transformers/dependency_versions_table.py"
249
        print(f"updating {target}")
250
        with open(target, "w", encoding="utf-8", newline="\n") as f:
251
            f.write("\n".join(content))
252

253

254
extras = {}
255

256
extras["ja"] = deps_list("fugashi", "ipadic", "unidic_lite", "unidic", "sudachipy", "sudachidict_core", "rhoknp")
257
extras["sklearn"] = deps_list("scikit-learn")
258

259
extras["tf"] = deps_list("tensorflow", "onnxconverter-common", "tf2onnx", "tensorflow-text", "keras-nlp")
260
extras["tf-cpu"] = deps_list("tensorflow-cpu", "onnxconverter-common", "tf2onnx", "tensorflow-text", "keras-nlp")
261

262
extras["torch"] = deps_list("torch", "accelerate")
263
extras["accelerate"] = deps_list("accelerate")
264

265
if os.name == "nt":  # windows
266
    extras["retrieval"] = deps_list("datasets")  # faiss is not supported on windows
267
    extras["flax"] = []  # jax is not supported on windows
268
else:
269
    extras["retrieval"] = deps_list("faiss-cpu", "datasets")
270
    extras["flax"] = deps_list("jax", "jaxlib", "flax", "optax")
271

272
extras["tokenizers"] = deps_list("tokenizers")
273
extras["ftfy"] = deps_list("ftfy")
274
extras["onnxruntime"] = deps_list("onnxruntime", "onnxruntime-tools")
275
extras["onnx"] = deps_list("onnxconverter-common", "tf2onnx") + extras["onnxruntime"]
276
extras["modelcreation"] = deps_list("cookiecutter")
277

278
extras["sagemaker"] = deps_list("sagemaker")
279
extras["deepspeed"] = deps_list("deepspeed") + extras["accelerate"]
280
extras["optuna"] = deps_list("optuna")
281
extras["ray"] = deps_list("ray[tune]")
282
extras["sigopt"] = deps_list("sigopt")
283

284
extras["integrations"] = extras["optuna"] + extras["ray"] + extras["sigopt"]
285

286
extras["serving"] = deps_list("pydantic", "uvicorn", "fastapi", "starlette")
287
extras["audio"] = deps_list("librosa", "pyctcdecode", "phonemizer", "kenlm")
288
# `pip install ".[speech]"` is deprecated and `pip install ".[torch-speech]"` should be used instead
289
extras["speech"] = deps_list("torchaudio") + extras["audio"]
290
extras["torch-speech"] = deps_list("torchaudio") + extras["audio"]
291
extras["tf-speech"] = extras["audio"]
292
extras["flax-speech"] = extras["audio"]
293
extras["vision"] = deps_list("Pillow")
294
extras["timm"] = deps_list("timm")
295
extras["torch-vision"] = deps_list("torchvision") + extras["vision"]
296
extras["natten"] = deps_list("natten")
297
extras["codecarbon"] = deps_list("codecarbon")
298
extras["video"] = deps_list("decord", "av")
299

300
extras["sentencepiece"] = deps_list("sentencepiece", "protobuf")
301
extras["testing"] = (
302
    deps_list(
303
        "pytest",
304
        "pytest-xdist",
305
        "timeout-decorator",
306
        "parameterized",
307
        "psutil",
308
        "datasets",
309
        "dill",
310
        "evaluate",
311
        "pytest-timeout",
312
        "ruff",
313
        "sacrebleu",
314
        "rouge-score",
315
        "nltk",
316
        "GitPython",
317
        "hf-doc-builder",
318
        "protobuf",  # Can be removed once we can unpin protobuf
319
        "sacremoses",
320
        "rjieba",
321
        "beautifulsoup4",
322
        "tensorboard",
323
        "pydantic",
324
    )
325
    + extras["retrieval"]
326
    + extras["modelcreation"]
327
)
328

329
extras["deepspeed-testing"] = extras["deepspeed"] + extras["testing"] + extras["optuna"] + extras["sentencepiece"]
330

331
extras["quality"] = deps_list("datasets", "isort", "ruff", "GitPython", "hf-doc-builder", "urllib3")
332

333
extras["all"] = (
334
    extras["tf"]
335
    + extras["torch"]
336
    + extras["flax"]
337
    + extras["sentencepiece"]
338
    + extras["tokenizers"]
339
    + extras["torch-speech"]
340
    + extras["vision"]
341
    + extras["integrations"]
342
    + extras["timm"]
343
    + extras["torch-vision"]
344
    + extras["codecarbon"]
345
    + extras["accelerate"]
346
    + extras["video"]
347
)
348

349
# Might need to add doc-builder and some specific deps in the future
350
extras["docs_specific"] = ["hf-doc-builder"]
351

352
# "docs" needs "all" to resolve all the references
353
extras["docs"] = extras["all"] + extras["docs_specific"]
354

355
extras["dev-torch"] = (
356
    extras["testing"]
357
    + extras["torch"]
358
    + extras["sentencepiece"]
359
    + extras["tokenizers"]
360
    + extras["torch-speech"]
361
    + extras["vision"]
362
    + extras["integrations"]
363
    + extras["timm"]
364
    + extras["torch-vision"]
365
    + extras["codecarbon"]
366
    + extras["quality"]
367
    + extras["ja"]
368
    + extras["docs_specific"]
369
    + extras["sklearn"]
370
    + extras["modelcreation"]
371
    + extras["onnxruntime"]
372
)
373
extras["dev-tensorflow"] = (
374
    extras["testing"]
375
    + extras["tf"]
376
    + extras["sentencepiece"]
377
    + extras["tokenizers"]
378
    + extras["vision"]
379
    + extras["quality"]
380
    + extras["docs_specific"]
381
    + extras["sklearn"]
382
    + extras["modelcreation"]
383
    + extras["onnx"]
384
    + extras["tf-speech"]
385
)
386
extras["dev"] = (
387
    extras["all"]
388
    + extras["testing"]
389
    + extras["quality"]
390
    + extras["ja"]
391
    + extras["docs_specific"]
392
    + extras["sklearn"]
393
    + extras["modelcreation"]
394
)
395

396
extras["torchhub"] = deps_list(
397
    "filelock",
398
    "huggingface-hub",
399
    "importlib_metadata",
400
    "numpy",
401
    "packaging",
402
    "protobuf",
403
    "regex",
404
    "requests",
405
    "sentencepiece",
406
    "torch",
407
    "tokenizers",
408
    "tqdm",
409
)
410

411
extras["agents"] = deps_list(
412
    "diffusers", "accelerate", "datasets", "torch", "sentencepiece", "opencv-python", "Pillow"
413
)
414

415
# when modifying the following list, make sure to update src/transformers/dependency_versions_check.py
416
install_requires = [
417
    deps["filelock"],  # filesystem locks, e.g., to prevent parallel downloads
418
    deps["huggingface-hub"],
419
    deps["numpy"],
420
    deps["packaging"],  # utilities from PyPA to e.g., compare versions
421
    deps["pyyaml"],  # used for the model cards metadata
422
    deps["regex"],  # for OpenAI GPT
423
    deps["requests"],  # for downloading models over HTTPS
424
    deps["tokenizers"],
425
    deps["safetensors"],
426
    deps["tqdm"],  # progress bars in model download and training scripts
427
]
428

429
setup(
430
    name="transformers",
431
    version="4.39.0.dev0",  # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
432
    author="The Hugging Face team (past and future) with the help of all our contributors (https://github.com/huggingface/transformers/graphs/contributors)",
433
    author_email="transformers@huggingface.co",
434
    description="State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow",
435
    long_description=open("README.md", "r", encoding="utf-8").read(),
436
    long_description_content_type="text/markdown",
437
    keywords="NLP vision speech deep learning transformer pytorch tensorflow jax BERT GPT-2 Wav2Vec2 ViT",
438
    license="Apache 2.0 License",
439
    url="https://github.com/huggingface/transformers",
440
    package_dir={"": "src"},
441
    packages=find_packages("src"),
442
    include_package_data=True,
443
    package_data={"": ["**/*.cu", "**/*.cpp", "**/*.cuh", "**/*.h", "**/*.pyx"]},
444
    zip_safe=False,
445
    extras_require=extras,
446
    entry_points={"console_scripts": ["transformers-cli=transformers.commands.transformers_cli:main"]},
447
    python_requires=">=3.8.0",
448
    install_requires=list(install_requires),
449
    classifiers=[
450
        "Development Status :: 5 - Production/Stable",
451
        "Intended Audience :: Developers",
452
        "Intended Audience :: Education",
453
        "Intended Audience :: Science/Research",
454
        "License :: OSI Approved :: Apache Software License",
455
        "Operating System :: OS Independent",
456
        "Programming Language :: Python :: 3",
457
        "Programming Language :: Python :: 3.8",
458
        "Programming Language :: Python :: 3.9",
459
        "Programming Language :: Python :: 3.10",
460
        "Topic :: Scientific/Engineering :: Artificial Intelligence",
461
    ],
462
    cmdclass={"deps_table_update": DepsTableUpdateCommand},
463
)
464

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.