4
from contextlib import contextmanager
5
from typing import Optional
9
from huggingface_hub.hf_api import HfApi, RepositoryNotFoundError
12
CI_HUB_USER = "__DUMMY_TRANSFORMERS_USER__"
13
CI_HUB_USER_FULL_NAME = "Dummy User"
14
CI_HUB_USER_TOKEN = "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"
16
CI_HUB_ENDPOINT = "https://hub-ci.huggingface.co"
17
CI_HUB_DATASETS_URL = CI_HUB_ENDPOINT + "/datasets/{repo_id}/resolve/{revision}/{path}"
18
CI_HFH_HUGGINGFACE_CO_URL_TEMPLATE = CI_HUB_ENDPOINT + "/{repo_id}/resolve/{revision}/{filename}"
22
def ci_hfh_hf_hub_url(monkeypatch):
24
"huggingface_hub.file_download.HUGGINGFACE_CO_URL_TEMPLATE", CI_HFH_HUGGINGFACE_CO_URL_TEMPLATE
29
def ci_hub_config(monkeypatch):
30
monkeypatch.setattr("datasets.config.HF_ENDPOINT", CI_HUB_ENDPOINT)
31
monkeypatch.setattr("datasets.config.HUB_DATASETS_URL", CI_HUB_DATASETS_URL)
35
def set_ci_hub_access_token(ci_hub_config):
36
old_environ = dict(os.environ)
37
os.environ["HF_TOKEN"] = CI_HUB_USER_TOKEN
40
os.environ.update(old_environ)
43
@pytest.fixture(scope="session")
45
return HfApi(endpoint=CI_HUB_ENDPOINT)
48
@pytest.fixture(scope="session")
50
yield CI_HUB_USER_TOKEN
54
def cleanup_repo(hf_api):
55
def _cleanup_repo(repo_id):
56
hf_api.delete_repo(repo_id, token=CI_HUB_USER_TOKEN, repo_type="dataset")
62
def temporary_repo(cleanup_repo):
64
def _temporary_repo(repo_id: Optional[str] = None):
65
repo_id = repo_id or f"{CI_HUB_USER}/test-dataset-{uuid.uuid4().hex[:6]}-{int(time.time() * 10e3)}"
71
except RepositoryNotFoundError:
74
return _temporary_repo
77
@pytest.fixture(scope="session")
78
def hf_private_dataset_repo_txt_data_(hf_api: HfApi, hf_token, text_file):
79
repo_name = f"repo_txt_data-{int(time.time() * 10e6)}"
80
repo_id = f"{CI_HUB_USER}/{repo_name}"
81
hf_api.create_repo(repo_id, token=hf_token, repo_type="dataset", private=True)
84
path_or_fileobj=str(text_file),
85
path_in_repo="data/text_data.txt",
91
hf_api.delete_repo(repo_id, token=hf_token, repo_type="dataset")
92
except (requests.exceptions.HTTPError, ValueError):
97
def hf_private_dataset_repo_txt_data(hf_private_dataset_repo_txt_data_, ci_hub_config, ci_hfh_hf_hub_url):
98
return hf_private_dataset_repo_txt_data_
101
@pytest.fixture(scope="session")
102
def hf_private_dataset_repo_zipped_txt_data_(hf_api: HfApi, hf_token, zip_csv_with_dir_path):
103
repo_name = f"repo_zipped_txt_data-{int(time.time() * 10e6)}"
104
repo_id = f"{CI_HUB_USER}/{repo_name}"
105
hf_api.create_repo(repo_id, token=hf_token, repo_type="dataset", private=True)
108
path_or_fileobj=str(zip_csv_with_dir_path),
109
path_in_repo="data.zip",
115
hf_api.delete_repo(repo_id, token=hf_token, repo_type="dataset")
116
except (requests.exceptions.HTTPError, ValueError):
121
def hf_private_dataset_repo_zipped_txt_data(
122
hf_private_dataset_repo_zipped_txt_data_, ci_hub_config, ci_hfh_hf_hub_url
124
return hf_private_dataset_repo_zipped_txt_data_
127
@pytest.fixture(scope="session")
128
def hf_private_dataset_repo_zipped_img_data_(hf_api: HfApi, hf_token, zip_image_path):
129
repo_name = f"repo_zipped_img_data-{int(time.time() * 10e6)}"
130
repo_id = f"{CI_HUB_USER}/{repo_name}"
131
hf_api.create_repo(repo_id, token=hf_token, repo_type="dataset", private=True)
134
path_or_fileobj=str(zip_image_path),
135
path_in_repo="data.zip",
141
hf_api.delete_repo(repo_id, token=hf_token, repo_type="dataset")
142
except (requests.exceptions.HTTPError, ValueError):
147
def hf_private_dataset_repo_zipped_img_data(
148
hf_private_dataset_repo_zipped_img_data_, ci_hub_config, ci_hfh_hf_hub_url
150
return hf_private_dataset_repo_zipped_img_data_