28
from dataclasses import dataclass
30
from provider.base import repo_interface
31
import provider.utility as utility
43
def __name_map(self, name):
49
def __init__(self, name, hexdigest):
50
self.name = self.__name_map(name)
51
self.hexdigest = hexdigest
54
class repo(repo_interface):
58
def __init__(self, url, dir, opts):
59
super().__init__("yum", url, dir, opts)
64
self.__tree_before = dict()
68
self.__rename_list = list()
72
self.__processed_set = set()
75
self.__checksum_mismatch = list()
103
if os.path.isdir(dir):
104
for entry in utility.scantree(dir):
105
if entry.is_dir(follow_symlinks=False):
108
if os.path.commonprefix([entry.path, tmp]) == tmp:
112
relpath = os.path.relpath(entry.path, dir)
113
self.__tree_before[relpath] = entry
118
elif os.path.exists(dir):
119
raise NotADirectoryError(f"Repository path '{dir}' is not a directory.")
125
def __mirror(self, write):
141
self.__process_extra_files_json()
142
self.__process_repomd()
152
for path in reversed(self.__rename_list):
153
self.log(0, "Update {}".format(path))
154
srcpath = os.path.join(self.tmpdir(), path)
155
dstpath = os.path.join(self.dir() , path)
156
os.makedirs(os.path.dirname(dstpath), exist_ok=True)
157
os.replace(srcpath, dstpath)
161
for path in self.__tree_before:
162
if not path in self.__processed_set:
163
if self.delete_extraneous():
164
self.log(0, "Remove {}".format(path))
165
dstpath = os.path.join(self.dir(), path)
168
self.log(0, "Need remove {}".format(path))
171
shutil.rmtree( self.tmpdir() )
174
utility.delete_empty_folders( self.dir() )
181
def __process_repodata_primary(self, content):
182
dom = xml.dom.minidom.parseString(content)
184
packages = dom.getElementsByTagName("package")
186
for package in packages:
187
package_type = package.getAttribute("type")
190
checksum_node = package.getElementsByTagName("checksum")[0]
191
location_node = package.getElementsByTagName("location")[0]
193
path = location_node.getAttribute("href")
194
checksum = Checksum(checksum_node.getAttribute("type"), checksum_node.firstChild.nodeValue)
196
self.__download( path, checksum = checksum )
202
def __process_repomd(self):
204
self.__download('repodata/repomd.xml.asc')
205
self.__download('repodata/repomd.xml.key')
207
repomd = self.__download(
208
'repodata/repomd.xml',
212
dom = xml.dom.minidom.parseString(repomd.content)
214
datas = dom.getElementsByTagName("data")
217
data_type = data.getAttribute("type")
218
checksum_node = data.getElementsByTagName("checksum")[0]
219
location_node = data.getElementsByTagName("location")[0]
221
path = location_node.getAttribute("href")
222
checksum = Checksum(checksum_node.getAttribute("type"), checksum_node.firstChild.nodeValue)
224
if data_type == 'primary':
225
primary = self.__download(
230
self.__process_repodata_primary( utility.unarchiving(path, primary.content) )
232
self.__download( path, checksum = checksum )
238
def __process_extra_files_json(self):
239
extra_files = self.__download(
244
if extra_files.http_code != 200:
247
j = json.loads(extra_files.content)
248
for entity in j.get('data', list()):
252
path = entity['file']
253
checksums = entity.get('checksums', dict())
254
for name in checksums:
255
if name in hashlib.algorithms_available:
256
checksum = Checksum(name, checksums[name])
267
def __init_http(self):
269
"User-Agent": "libdnf (AlmaLinux 8.8; generic; Linux.x86_64)"
271
self.__http = urllib3.PoolManager(
272
headers = default_headers
279
def __download(self, path, read: bool = False, checksum: Checksum = None) -> RepoEntity:
280
url = utility.url_concat(self.url(), path)
281
srcpath = os.path.join(self.dir(), path)
282
dstpath = os.path.join(self.tmpdir(), path)
284
self.log(0, "Processing {}".format(url))
291
meta = self.__tree_before.get(path)
292
if checksum is not None:
294
info = utility.read_or_checksum(srcpath, read, checksum.name)
295
if info[0] == checksum.hexdigest:
296
self.__processed_set.add(path)
297
return RepoEntity(200, info[0], info[1])
299
if os.path.isfile(dstpath):
300
info = utility.read_or_checksum(dstpath, read, checksum.name)
301
if info[0] == checksum.hexdigest:
302
self.__processed_set.add(path)
303
self.__rename_list.append(path)
304
return RepoEntity(200, info[0], info[1])
306
os.makedirs(os.path.dirname(dstpath), exist_ok=True)
317
stat = meta.stat(follow_symlinks=False)
318
headers["If-Modified-Since"] = utility.time_to_if_modified_since(stat.st_mtime)
320
response = self.__http.request("GET", url, headers = headers, preload_content = False)
323
http_code = response.status
329
if checksum is not None:
330
file_hash = hashlib.new(checksum.name)
332
with open(dstpath, 'wb') as fd:
334
content = response.data
335
if checksum is not None:
336
file_hash.update(content)
339
for chunk in response.stream(10485760):
340
if checksum is not None:
341
file_hash.update(chunk)
344
if 'Last-Modified' in response.headers:
345
mtime = utility.if_modified_since_to_time(response.headers['Last-Modified'])
346
os.utime(dstpath, (mtime, mtime))
348
if checksum is not None:
349
hexdigest = file_hash.hexdigest()
351
if hexdigest != checksum.hexdigest:
352
self.log(0, "Checksum mismatch for {}; Expected: {}; Actual: {}; Digest: {}".format(url, checksum.hexdigest, hexdigest, checksum.name))
353
self.__checksum_mismatch.append(path)
355
self.__processed_set.add(path)
356
self.__rename_list.append(path)
357
elif http_code == 304:
360
info = utility.read_or_checksum(srcpath, read, checksum.name if checksum is not None else None)
365
if checksum is not None:
366
if hexdigest != checksum.hexdigest:
367
self.log(0, "Checksum mismatch for {}; Expected: {}; Actual: {}; Digest: {}".format(url, checksum.hexdigest, hexdigest, checksum.name))
368
self.__checksum_mismatch.append(path)
370
self.__processed_set.add(path)
372
response.release_conn()
374
return RepoEntity(http_code, hexdigest, content)