1 |
Add a --content-db option which is required for the content-hash |
2 |
layout because its file listings return content digests instead of |
3 |
distfile names. |
4 |
|
5 |
The content db serves to translate content digests to distfiles |
6 |
names, and distfiles names to content digests. All keys have a |
7 |
prefix separated by a colon. For digest keys, the prefix is the |
8 |
hash algorithm name. For filename keys, the prefix is "filename". |
9 |
|
10 |
The value associated with a digest key is a plain filename. The |
11 |
value associated with a distfile key is a set of content revisions. |
12 |
Each content revision is expressed as a dictionary of digests which |
13 |
is suitable for construction of a DistfileName instance. |
14 |
|
15 |
Bug: https://bugs.gentoo.org/756778 |
16 |
Signed-off-by: Zac Medico <zmedico@g.o> |
17 |
--- |
18 |
[PATCH v2] Split out ContentDB class and associate distfile key |
19 |
with a set of content revisions, where each content revision is |
20 |
expressed as a dictionary of digests. |
21 |
|
22 |
lib/portage/_emirrordist/Config.py | 8 +- |
23 |
lib/portage/_emirrordist/ContentDB.py | 158 +++++++++++++++++++ |
24 |
lib/portage/_emirrordist/DeletionIterator.py | 25 ++- |
25 |
lib/portage/_emirrordist/DeletionTask.py | 8 + |
26 |
lib/portage/_emirrordist/FetchTask.py | 5 +- |
27 |
lib/portage/_emirrordist/main.py | 15 +- |
28 |
lib/portage/tests/ebuild/test_fetch.py | 14 ++ |
29 |
man/emirrordist.1 | 6 +- |
30 |
8 files changed, 232 insertions(+), 7 deletions(-) |
31 |
create mode 100644 lib/portage/_emirrordist/ContentDB.py |
32 |
|
33 |
diff --git a/lib/portage/_emirrordist/Config.py b/lib/portage/_emirrordist/Config.py |
34 |
index 4bee4f45e..cfe944040 100644 |
35 |
--- a/lib/portage/_emirrordist/Config.py |
36 |
+++ b/lib/portage/_emirrordist/Config.py |
37 |
@@ -1,4 +1,4 @@ |
38 |
-# Copyright 2013-2020 Gentoo Authors |
39 |
+# Copyright 2013-2021 Gentoo Authors |
40 |
# Distributed under the terms of the GNU General Public License v2 |
41 |
|
42 |
import copy |
43 |
@@ -10,6 +10,7 @@ import time |
44 |
from portage import os |
45 |
from portage.package.ebuild.fetch import MirrorLayoutConfig |
46 |
from portage.util import grabdict, grablines |
47 |
+from .ContentDB import ContentDB |
48 |
|
49 |
class Config: |
50 |
def __init__(self, options, portdb, event_loop): |
51 |
@@ -65,6 +66,11 @@ class Config: |
52 |
self.distfiles_db = self._open_shelve( |
53 |
options.distfiles_db, 'distfiles') |
54 |
|
55 |
+ self.content_db = None |
56 |
+ if options.content_db is not None: |
57 |
+ self.content_db = ContentDB(self._open_shelve( |
58 |
+ options.content_db, 'content')) |
59 |
+ |
60 |
self.deletion_db = None |
61 |
if options.deletion_db is not None: |
62 |
self.deletion_db = self._open_shelve( |
63 |
diff --git a/lib/portage/_emirrordist/ContentDB.py b/lib/portage/_emirrordist/ContentDB.py |
64 |
new file mode 100644 |
65 |
index 000000000..60e6ef39d |
66 |
--- /dev/null |
67 |
+++ b/lib/portage/_emirrordist/ContentDB.py |
68 |
@@ -0,0 +1,158 @@ |
69 |
+# Copyright 2021 Gentoo Authors |
70 |
+# Distributed under the terms of the GNU General Public License v2 |
71 |
+ |
72 |
+import logging |
73 |
+import operator |
74 |
+import shelve |
75 |
+import typing |
76 |
+ |
77 |
+from portage.package.ebuild.fetch import DistfileName |
78 |
+ |
79 |
+ |
80 |
+class ContentDB: |
81 |
+ """ |
82 |
+ The content db serves to translate content digests to distfiles |
83 |
+ names, and distfiles names to content digests. All keys have a |
84 |
+ prefix separated by a colon. For digest keys, the prefix is the |
85 |
+ hash algorithm name. For filename keys, the prefix is "filename". |
86 |
+ |
87 |
+ The value associated with a digest key is a plain filename. The |
88 |
+ value associated with a distfile key is a set of content revisions. |
89 |
+ Each content revision is expressed as a dictionary of digests which |
90 |
+ is suitable for construction of a DistfileName instance. |
91 |
+ """ |
92 |
+ |
93 |
+ def __init__(self, shelve_instance: shelve.Shelf): |
94 |
+ self._shelve = shelve_instance |
95 |
+ |
96 |
+ def add(self, filename: DistfileName): |
97 |
+ """ |
98 |
+ Add file name and digests. |
99 |
+ |
100 |
+ @param filename: file name with digests attribute |
101 |
+ """ |
102 |
+ distfile_str = str(filename) |
103 |
+ distfile_key = "filename:{}".format(distfile_str) |
104 |
+ for k, v in filename.digests.items(): |
105 |
+ if k != "size": |
106 |
+ self._shelve["{}:{}".format(k, v).lower()] = distfile_str |
107 |
+ try: |
108 |
+ content_revisions = self._shelve[distfile_key] |
109 |
+ except KeyError: |
110 |
+ content_revisions = set() |
111 |
+ |
112 |
+ revision_key = tuple( |
113 |
+ sorted( |
114 |
+ ( |
115 |
+ (algo.lower(), filename.digests[algo].lower()) |
116 |
+ for algo in filename.digests |
117 |
+ if algo != "size" |
118 |
+ ), |
119 |
+ key=operator.itemgetter(0), |
120 |
+ ) |
121 |
+ ) |
122 |
+ content_revisions.add(revision_key) |
123 |
+ self._shelve[distfile_key] = content_revisions |
124 |
+ |
125 |
+ def remove(self, filename: DistfileName): |
126 |
+ """ |
127 |
+ Remove a file name from the database. |
128 |
+ |
129 |
+ @param filename: file name with digests attribute |
130 |
+ """ |
131 |
+ distfile_key = "filename:{}".format(filename) |
132 |
+ try: |
133 |
+ content_revisions = self._shelve[distfile_key] |
134 |
+ except KeyError: |
135 |
+ pass |
136 |
+ else: |
137 |
+ for revision_key in content_revisions: |
138 |
+ for k, v in revision_key: |
139 |
+ try: |
140 |
+ del self._shelve["{}:{}".format(k, v)] |
141 |
+ except KeyError: |
142 |
+ pass |
143 |
+ |
144 |
+ logging.debug(("drop '%s' from content db") % filename) |
145 |
+ try: |
146 |
+ del self._shelve[distfile_key] |
147 |
+ except KeyError: |
148 |
+ pass |
149 |
+ |
150 |
+ def get_filenames_translate( |
151 |
+ self, filename: typing.Union[str, DistfileName] |
152 |
+ ) -> typing.Generator[DistfileName, None, None]: |
153 |
+ """ |
154 |
+ Translate distfiles content digests to distfile names. |
155 |
+ If filename is already a distfile name, then it will pass |
156 |
+ through unchanged. |
157 |
+ |
158 |
+ @param filename: A filename listed by layout get_filenames |
159 |
+ @return: The distfile name, translated from the corresponding |
160 |
+ content digest when necessary |
161 |
+ """ |
162 |
+ if not isinstance(filename, DistfileName): |
163 |
+ filename = DistfileName(filename) |
164 |
+ if self._shelve is None: |
165 |
+ yield filename |
166 |
+ return |
167 |
+ |
168 |
+ # Match content digests with zero or more content revisions. |
169 |
+ matched_revisions = {} |
170 |
+ |
171 |
+ for k, v in filename.digests.items(): |
172 |
+ digest_item = (k.lower(), v.lower()) |
173 |
+ digest_key = "{}:{}".format(*digest_item) |
174 |
+ try: |
175 |
+ distfile_str = self._shelve[digest_key] |
176 |
+ except KeyError: |
177 |
+ continue |
178 |
+ |
179 |
+ matched_revisions.setdefault(distfile_str, set()) |
180 |
+ try: |
181 |
+ content_revisions = self._shelve["filename:{}".format(distfile_str)] |
182 |
+ except KeyError: |
183 |
+ pass |
184 |
+ else: |
185 |
+ for revision_key in content_revisions: |
186 |
+ if ( |
187 |
+ digest_item in revision_key |
188 |
+ and revision_key not in matched_revisions.get(distfile_str, ()) |
189 |
+ ): |
190 |
+ matched_revisions[distfile_str].add(revision_key) |
191 |
+ yield DistfileName(distfile_str, digests=dict(revision_key)) |
192 |
+ |
193 |
+ if not any(matched_revisions.values()): |
194 |
+ # Since filename matched zero content revisions, allow |
195 |
+ # it to pass through unchanged (on the path toward deletion). |
196 |
+ yield filename |
197 |
+ |
198 |
+ def __len__(self): |
199 |
+ return len(self._shelve) |
200 |
+ |
201 |
+ def __contains__(self, k): |
202 |
+ return k in self._shelve |
203 |
+ |
204 |
+ def __iter__(self): |
205 |
+ return self._shelve.__iter__() |
206 |
+ |
207 |
+ def items(self): |
208 |
+ return self._shelve.iteritems() |
209 |
+ |
210 |
+ def __setitem__(self, k, v): |
211 |
+ self._shelve[k] = v |
212 |
+ |
213 |
+ def __getitem__(self, k): |
214 |
+ return self._shelve[k] |
215 |
+ |
216 |
+ def __delitem__(self, k): |
217 |
+ del self._shelve[k] |
218 |
+ |
219 |
+ def get(self, k, *args): |
220 |
+ return self._shelve.get(k, *args) |
221 |
+ |
222 |
+ def close(self): |
223 |
+ self._shelve.close() |
224 |
+ |
225 |
+ def clear(self): |
226 |
+ self._shelve.clear() |
227 |
diff --git a/lib/portage/_emirrordist/DeletionIterator.py b/lib/portage/_emirrordist/DeletionIterator.py |
228 |
index 08985ed6c..ab4309f9a 100644 |
229 |
--- a/lib/portage/_emirrordist/DeletionIterator.py |
230 |
+++ b/lib/portage/_emirrordist/DeletionIterator.py |
231 |
@@ -1,10 +1,12 @@ |
232 |
-# Copyright 2013-2019 Gentoo Authors |
233 |
+# Copyright 2013-2021 Gentoo Authors |
234 |
# Distributed under the terms of the GNU General Public License v2 |
235 |
|
236 |
+import itertools |
237 |
import logging |
238 |
import stat |
239 |
|
240 |
from portage import os |
241 |
+from portage.package.ebuild.fetch import DistfileName |
242 |
from .DeletionTask import DeletionTask |
243 |
|
244 |
class DeletionIterator: |
245 |
@@ -21,8 +23,25 @@ class DeletionIterator: |
246 |
deletion_delay = self._config.options.deletion_delay |
247 |
start_time = self._config.start_time |
248 |
distfiles_set = set() |
249 |
- for layout in self._config.layouts: |
250 |
- distfiles_set.update(layout.get_filenames(distdir)) |
251 |
+ distfiles_set.update( |
252 |
+ ( |
253 |
+ filename |
254 |
+ if isinstance(filename, DistfileName) |
255 |
+ else DistfileName(filename) |
256 |
+ for filename in itertools.chain.from_iterable( |
257 |
+ layout.get_filenames(distdir) for layout in self._config.layouts |
258 |
+ ) |
259 |
+ ) |
260 |
+ if self._config.content_db is None |
261 |
+ else itertools.chain.from_iterable( |
262 |
+ ( |
263 |
+ self._config.content_db.get_filenames_translate(filename) |
264 |
+ for filename in itertools.chain.from_iterable( |
265 |
+ layout.get_filenames(distdir) for layout in self._config.layouts |
266 |
+ ) |
267 |
+ ) |
268 |
+ ) |
269 |
+ ) |
270 |
for filename in distfiles_set: |
271 |
# require at least one successful stat() |
272 |
exceptions = [] |
273 |
diff --git a/lib/portage/_emirrordist/DeletionTask.py b/lib/portage/_emirrordist/DeletionTask.py |
274 |
index 5eb01d840..73493c5a1 100644 |
275 |
--- a/lib/portage/_emirrordist/DeletionTask.py |
276 |
+++ b/lib/portage/_emirrordist/DeletionTask.py |
277 |
@@ -5,6 +5,7 @@ import errno |
278 |
import logging |
279 |
|
280 |
from portage import os |
281 |
+from portage.package.ebuild.fetch import ContentHashLayout |
282 |
from portage.util._async.FileCopier import FileCopier |
283 |
from _emerge.CompositeTask import CompositeTask |
284 |
|
285 |
@@ -99,6 +100,10 @@ class DeletionTask(CompositeTask): |
286 |
def _delete_links(self): |
287 |
success = True |
288 |
for layout in self.config.layouts: |
289 |
+ if isinstance(layout, ContentHashLayout) and not self.distfile.digests: |
290 |
+ logging.debug(("_delete_links: '%s' has " |
291 |
+ "no digests") % self.distfile) |
292 |
+ continue |
293 |
distfile_path = os.path.join( |
294 |
self.config.options.distfiles, |
295 |
layout.get_path(self.distfile)) |
296 |
@@ -134,6 +139,9 @@ class DeletionTask(CompositeTask): |
297 |
logging.debug(("drop '%s' from " |
298 |
"distfiles db") % self.distfile) |
299 |
|
300 |
+ if self.config.content_db is not None: |
301 |
+ self.config.content_db.remove(self.distfile) |
302 |
+ |
303 |
if self.config.deletion_db is not None: |
304 |
try: |
305 |
del self.config.deletion_db[self.distfile] |
306 |
diff --git a/lib/portage/_emirrordist/FetchTask.py b/lib/portage/_emirrordist/FetchTask.py |
307 |
index 997762082..5a48f91cd 100644 |
308 |
--- a/lib/portage/_emirrordist/FetchTask.py |
309 |
+++ b/lib/portage/_emirrordist/FetchTask.py |
310 |
@@ -1,4 +1,4 @@ |
311 |
-# Copyright 2013-2020 Gentoo Authors |
312 |
+# Copyright 2013-2021 Gentoo Authors |
313 |
# Distributed under the terms of the GNU General Public License v2 |
314 |
|
315 |
import collections |
316 |
@@ -47,6 +47,9 @@ class FetchTask(CompositeTask): |
317 |
# Convert _pkg_str to str in order to prevent pickle problems. |
318 |
self.config.distfiles_db[self.distfile] = str(self.cpv) |
319 |
|
320 |
+ if self.config.content_db is not None: |
321 |
+ self.config.content_db.add(self.distfile) |
322 |
+ |
323 |
if not self._have_needed_digests(): |
324 |
msg = "incomplete digests: %s" % " ".join(self.digests) |
325 |
self.scheduler.output(msg, background=self.background, |
326 |
diff --git a/lib/portage/_emirrordist/main.py b/lib/portage/_emirrordist/main.py |
327 |
index 8d00a05f5..2200ec715 100644 |
328 |
--- a/lib/portage/_emirrordist/main.py |
329 |
+++ b/lib/portage/_emirrordist/main.py |
330 |
@@ -1,4 +1,4 @@ |
331 |
-# Copyright 2013-2020 Gentoo Authors |
332 |
+# Copyright 2013-2021 Gentoo Authors |
333 |
# Distributed under the terms of the GNU General Public License v2 |
334 |
|
335 |
import argparse |
336 |
@@ -7,6 +7,7 @@ import sys |
337 |
|
338 |
import portage |
339 |
from portage import os |
340 |
+from portage.package.ebuild.fetch import ContentHashLayout |
341 |
from portage.util import normalize_path, _recursive_file_list |
342 |
from portage.util._async.run_main_scheduler import run_main_scheduler |
343 |
from portage.util._async.SchedulerInterface import SchedulerInterface |
344 |
@@ -151,6 +152,12 @@ common_options = ( |
345 |
"distfile belongs to", |
346 |
"metavar" : "FILE" |
347 |
}, |
348 |
+ { |
349 |
+ "longopt" : "--content-db", |
350 |
+ "help" : "database file used to map content digests to" |
351 |
+ "distfiles names (required for content-hash layout)", |
352 |
+ "metavar" : "FILE" |
353 |
+ }, |
354 |
{ |
355 |
"longopt" : "--recycle-dir", |
356 |
"help" : "directory for extended retention of files that " |
357 |
@@ -441,6 +448,12 @@ def emirrordist_main(args): |
358 |
if not options.mirror: |
359 |
parser.error('No action specified') |
360 |
|
361 |
+ if options.delete and config.content_db is None: |
362 |
+ for layout in config.layouts: |
363 |
+ if isinstance(layout, ContentHashLayout): |
364 |
+ parser.error("content-hash layout requires " |
365 |
+ "--content-db to be specified") |
366 |
+ |
367 |
returncode = os.EX_OK |
368 |
|
369 |
if options.mirror: |
370 |
diff --git a/lib/portage/tests/ebuild/test_fetch.py b/lib/portage/tests/ebuild/test_fetch.py |
371 |
index d50a4cbfc..881288cdc 100644 |
372 |
--- a/lib/portage/tests/ebuild/test_fetch.py |
373 |
+++ b/lib/portage/tests/ebuild/test_fetch.py |
374 |
@@ -172,6 +172,16 @@ class EbuildFetchTestCase(TestCase): |
375 |
with open(os.path.join(settings['DISTDIR'], 'layout.conf'), 'wt') as f: |
376 |
f.write(layout_data) |
377 |
|
378 |
+ if any(isinstance(layout, ContentHashLayout) for layout in layouts): |
379 |
+ content_db = os.path.join(playground.eprefix, 'var/db/emirrordist/content.db') |
380 |
+ os.makedirs(os.path.dirname(content_db), exist_ok=True) |
381 |
+ try: |
382 |
+ os.unlink(content_db) |
383 |
+ except OSError: |
384 |
+ pass |
385 |
+ else: |
386 |
+ content_db = None |
387 |
+ |
388 |
# Demonstrate that fetch preserves a stale file in DISTDIR when no digests are given. |
389 |
foo_uri = {'foo': ('{scheme}://{host}:{port}/distfiles/foo'.format(scheme=scheme, host=host, port=server.server_port),)} |
390 |
foo_path = os.path.join(settings['DISTDIR'], 'foo') |
391 |
@@ -233,9 +243,13 @@ class EbuildFetchTestCase(TestCase): |
392 |
os.path.join(self.bindir, 'emirrordist'), |
393 |
'--distfiles', settings['DISTDIR'], |
394 |
'--config-root', settings['EPREFIX'], |
395 |
+ '--delete', |
396 |
'--repositories-configuration', settings.repositories.config_string(), |
397 |
'--repo', 'test_repo', '--mirror') |
398 |
|
399 |
+ if content_db is not None: |
400 |
+ emirrordist_cmd = emirrordist_cmd + ('--content-db', content_db,) |
401 |
+ |
402 |
env = settings.environ() |
403 |
env['PYTHONPATH'] = ':'.join( |
404 |
filter(None, [PORTAGE_PYM_PATH] + os.environ.get('PYTHONPATH', '').split(':'))) |
405 |
diff --git a/man/emirrordist.1 b/man/emirrordist.1 |
406 |
index 45108ef8c..7ad10dfd0 100644 |
407 |
--- a/man/emirrordist.1 |
408 |
+++ b/man/emirrordist.1 |
409 |
@@ -1,4 +1,4 @@ |
410 |
-.TH "EMIRRORDIST" "1" "Dec 2015" "Portage VERSION" "Portage" |
411 |
+.TH "EMIRRORDIST" "1" "Feb 2021" "Portage VERSION" "Portage" |
412 |
.SH "NAME" |
413 |
emirrordist \- a fetch tool for mirroring of package distfiles |
414 |
.SH SYNOPSIS |
415 |
@@ -66,6 +66,10 @@ reporting purposes. Opened in append mode. |
416 |
Log file for scheduled deletions, with tab\-delimited output, for |
417 |
reporting purposes. Overwritten with each run. |
418 |
.TP |
419 |
+\fB\-\-content\-db\fR=\fIFILE\fR |
420 |
+Database file used to pair content digests with distfiles names |
421 |
+(required fo content\-hash layout). |
422 |
+.TP |
423 |
\fB\-\-delete\fR |
424 |
Enable deletion of unused distfiles. |
425 |
.TP |
426 |
-- |
427 |
2.26.2 |