1 |
Add a --content-db option which is required for the content-hash |
2 |
layout because its file listings return content digests instead of |
3 |
distfile names. |
4 |
|
5 |
The content db includes a reverse mapping, for use during garbage |
6 |
collection. All keys have a prefix separated by a colon. For digest |
7 |
keys, the prefix is the hash algorithm name. For filename keys, |
8 |
the prefix is "filename". The values for digest keys are plain |
9 |
filenames, and the values for distfile keys are dictionaries |
10 |
of digests suitable for construction of DistfileName instances. |
11 |
|
12 |
Bug: https://bugs.gentoo.org/756778 |
13 |
Signed-off-by: Zac Medico <zmedico@g.o> |
14 |
--- |
15 |
lib/portage/_emirrordist/Config.py | 7 +++- |
16 |
lib/portage/_emirrordist/DeletionIterator.py | 38 ++++++++++++++++++-- |
17 |
lib/portage/_emirrordist/DeletionTask.py | 26 ++++++++++++++ |
18 |
lib/portage/_emirrordist/FetchTask.py | 16 ++++++++- |
19 |
lib/portage/_emirrordist/main.py | 15 +++++++- |
20 |
lib/portage/tests/ebuild/test_fetch.py | 14 ++++++++ |
21 |
man/emirrordist.1 | 6 +++- |
22 |
7 files changed, 116 insertions(+), 6 deletions(-) |
23 |
|
24 |
diff --git a/lib/portage/_emirrordist/Config.py b/lib/portage/_emirrordist/Config.py |
25 |
index 4bee4f45e..53f6582fe 100644 |
26 |
--- a/lib/portage/_emirrordist/Config.py |
27 |
+++ b/lib/portage/_emirrordist/Config.py |
28 |
@@ -1,4 +1,4 @@ |
29 |
-# Copyright 2013-2020 Gentoo Authors |
30 |
+# Copyright 2013-2021 Gentoo Authors |
31 |
# Distributed under the terms of the GNU General Public License v2 |
32 |
|
33 |
import copy |
34 |
@@ -65,6 +65,11 @@ class Config: |
35 |
self.distfiles_db = self._open_shelve( |
36 |
options.distfiles_db, 'distfiles') |
37 |
|
38 |
+ self.content_db = None |
39 |
+ if options.content_db is not None: |
40 |
+ self.content_db = self._open_shelve( |
41 |
+ options.content_db, 'content') |
42 |
+ |
43 |
self.deletion_db = None |
44 |
if options.deletion_db is not None: |
45 |
self.deletion_db = self._open_shelve( |
46 |
diff --git a/lib/portage/_emirrordist/DeletionIterator.py b/lib/portage/_emirrordist/DeletionIterator.py |
47 |
index 08985ed6c..24fb096bf 100644 |
48 |
--- a/lib/portage/_emirrordist/DeletionIterator.py |
49 |
+++ b/lib/portage/_emirrordist/DeletionIterator.py |
50 |
@@ -1,10 +1,12 @@ |
51 |
-# Copyright 2013-2019 Gentoo Authors |
52 |
+# Copyright 2013-2021 Gentoo Authors |
53 |
# Distributed under the terms of the GNU General Public License v2 |
54 |
|
55 |
import logging |
56 |
import stat |
57 |
+import typing |
58 |
|
59 |
from portage import os |
60 |
+from portage.package.ebuild.fetch import DistfileName |
61 |
from .DeletionTask import DeletionTask |
62 |
|
63 |
class DeletionIterator: |
64 |
@@ -12,6 +14,37 @@ class DeletionIterator: |
65 |
def __init__(self, config): |
66 |
self._config = config |
67 |
|
68 |
+ def _map_filename(self, filename: typing.Union[str, DistfileName]) -> typing.Union[str, DistfileName]: |
69 |
+ """ |
70 |
+ Map a filename listed by the layout get_filenames method, |
71 |
+ translating it from a content digest to a distfile name. |
72 |
+ If filename is already a distfile name, then it will pass |
73 |
+ through unchanged. |
74 |
+ |
75 |
+ @param filename: A filename listed by layout get_filenames |
76 |
+ @return: The distfile name, mapped from the corresponding |
77 |
+ content digest when necessary |
78 |
+ """ |
79 |
+ if not isinstance(filename, DistfileName): |
80 |
+ if self._config.content_db is not None: |
81 |
+ distfile_key = "filename:{}".format(filename) |
82 |
+ try: |
83 |
+ digests = self._config.content_db[distfile_key] |
84 |
+ except KeyError: |
85 |
+ pass |
86 |
+ else: |
87 |
+ return DistfileName(filename, digests=digests) |
88 |
+ return DistfileName(filename) |
89 |
+ if filename.digests and self._config.content_db is not None: |
90 |
+ for k, v in filename.digests.items(): |
91 |
+ digest_key = "{}:{}".format(k, v).lower() |
92 |
+ try: |
93 |
+ distfile_str = self._config.content_db[digest_key] |
94 |
+ except KeyError: |
95 |
+ continue |
96 |
+ return DistfileName(distfile_str, digests={k:v}) |
97 |
+ return filename |
98 |
+ |
99 |
def __iter__(self): |
100 |
distdir = self._config.options.distfiles |
101 |
file_owners = self._config.file_owners |
102 |
@@ -22,7 +55,8 @@ class DeletionIterator: |
103 |
start_time = self._config.start_time |
104 |
distfiles_set = set() |
105 |
for layout in self._config.layouts: |
106 |
- distfiles_set.update(layout.get_filenames(distdir)) |
107 |
+ distfiles_set.update(self._map_filename(filename) |
108 |
+ for filename in layout.get_filenames(distdir)) |
109 |
for filename in distfiles_set: |
110 |
# require at least one successful stat() |
111 |
exceptions = [] |
112 |
diff --git a/lib/portage/_emirrordist/DeletionTask.py b/lib/portage/_emirrordist/DeletionTask.py |
113 |
index 5eb01d840..96c52fa93 100644 |
114 |
--- a/lib/portage/_emirrordist/DeletionTask.py |
115 |
+++ b/lib/portage/_emirrordist/DeletionTask.py |
116 |
@@ -5,6 +5,7 @@ import errno |
117 |
import logging |
118 |
|
119 |
from portage import os |
120 |
+from portage.package.ebuild.fetch import ContentHashLayout |
121 |
from portage.util._async.FileCopier import FileCopier |
122 |
from _emerge.CompositeTask import CompositeTask |
123 |
|
124 |
@@ -99,6 +100,10 @@ class DeletionTask(CompositeTask): |
125 |
def _delete_links(self): |
126 |
success = True |
127 |
for layout in self.config.layouts: |
128 |
+ if isinstance(layout, ContentHashLayout) and not self.distfile.digests: |
129 |
+ logging.debug(("_delete_links: '%s' has " |
130 |
+ "no digests") % self.distfile) |
131 |
+ continue |
132 |
distfile_path = os.path.join( |
133 |
self.config.options.distfiles, |
134 |
layout.get_path(self.distfile)) |
135 |
@@ -134,6 +139,27 @@ class DeletionTask(CompositeTask): |
136 |
logging.debug(("drop '%s' from " |
137 |
"distfiles db") % self.distfile) |
138 |
|
139 |
+ if self.config.content_db is not None: |
140 |
+ distfile_key = "filename:{}".format(self.distfile) |
141 |
+ try: |
142 |
+ digests = self.config.content_db[distfile_key] |
143 |
+ except KeyError: |
144 |
+ pass |
145 |
+ else: |
146 |
+ for k, v in digests.items(): |
147 |
+ digest_key = "{}:{}".format(k, v) |
148 |
+ try: |
149 |
+ del self.config.content_db[digest_key] |
150 |
+ except KeyError: |
151 |
+ pass |
152 |
+ |
153 |
+ logging.debug(("drop '%s' from " |
154 |
+ "content db") % self.distfile) |
155 |
+ try: |
156 |
+ del self.config.content_db[distfile_key] |
157 |
+ except KeyError: |
158 |
+ pass |
159 |
+ |
160 |
if self.config.deletion_db is not None: |
161 |
try: |
162 |
del self.config.deletion_db[self.distfile] |
163 |
diff --git a/lib/portage/_emirrordist/FetchTask.py b/lib/portage/_emirrordist/FetchTask.py |
164 |
index 997762082..5a39cdb1a 100644 |
165 |
--- a/lib/portage/_emirrordist/FetchTask.py |
166 |
+++ b/lib/portage/_emirrordist/FetchTask.py |
167 |
@@ -1,4 +1,4 @@ |
168 |
-# Copyright 2013-2020 Gentoo Authors |
169 |
+# Copyright 2013-2021 Gentoo Authors |
170 |
# Distributed under the terms of the GNU General Public License v2 |
171 |
|
172 |
import collections |
173 |
@@ -47,6 +47,20 @@ class FetchTask(CompositeTask): |
174 |
# Convert _pkg_str to str in order to prevent pickle problems. |
175 |
self.config.distfiles_db[self.distfile] = str(self.cpv) |
176 |
|
177 |
+ if self.config.content_db is not None: |
178 |
+ # The content db includes a reverse mapping, for use during garbage |
179 |
+ # collection. All keys have a prefix separated by a colon. For digest |
180 |
+ # keys, the prefix is the hash algorithm name. For filename keys, |
181 |
+ # the prefix is "filename". The values for digest keys are plain |
182 |
+ # filenames, and the values for distfile keys are dictionaries |
183 |
+ # of digests suitable for construction of DistfileName instances. |
184 |
+ distfile_str = str(self.distfile) |
185 |
+ distfile_key = 'filename:{}'.format(distfile_str) |
186 |
+ for k, v in self.distfile.digests.items(): |
187 |
+ digest_key = '{}:{}'.format(k, v).lower() |
188 |
+ self.config.content_db[digest_key] = distfile_str |
189 |
+ self.config.content_db.setdefault(distfile_key, {})[k] = v |
190 |
+ |
191 |
if not self._have_needed_digests(): |
192 |
msg = "incomplete digests: %s" % " ".join(self.digests) |
193 |
self.scheduler.output(msg, background=self.background, |
194 |
diff --git a/lib/portage/_emirrordist/main.py b/lib/portage/_emirrordist/main.py |
195 |
index 8d00a05f5..2200ec715 100644 |
196 |
--- a/lib/portage/_emirrordist/main.py |
197 |
+++ b/lib/portage/_emirrordist/main.py |
198 |
@@ -1,4 +1,4 @@ |
199 |
-# Copyright 2013-2020 Gentoo Authors |
200 |
+# Copyright 2013-2021 Gentoo Authors |
201 |
# Distributed under the terms of the GNU General Public License v2 |
202 |
|
203 |
import argparse |
204 |
@@ -7,6 +7,7 @@ import sys |
205 |
|
206 |
import portage |
207 |
from portage import os |
208 |
+from portage.package.ebuild.fetch import ContentHashLayout |
209 |
from portage.util import normalize_path, _recursive_file_list |
210 |
from portage.util._async.run_main_scheduler import run_main_scheduler |
211 |
from portage.util._async.SchedulerInterface import SchedulerInterface |
212 |
@@ -151,6 +152,12 @@ common_options = ( |
213 |
"distfile belongs to", |
214 |
"metavar" : "FILE" |
215 |
}, |
216 |
+ { |
217 |
+ "longopt" : "--content-db", |
218 |
+ "help" : "database file used to map content digests to" |
219 |
+ "distfiles names (required for content-hash layout)", |
220 |
+ "metavar" : "FILE" |
221 |
+ }, |
222 |
{ |
223 |
"longopt" : "--recycle-dir", |
224 |
"help" : "directory for extended retention of files that " |
225 |
@@ -441,6 +448,12 @@ def emirrordist_main(args): |
226 |
if not options.mirror: |
227 |
parser.error('No action specified') |
228 |
|
229 |
+ if options.delete and config.content_db is None: |
230 |
+ for layout in config.layouts: |
231 |
+ if isinstance(layout, ContentHashLayout): |
232 |
+ parser.error("content-hash layout requires " |
233 |
+ "--content-db to be specified") |
234 |
+ |
235 |
returncode = os.EX_OK |
236 |
|
237 |
if options.mirror: |
238 |
diff --git a/lib/portage/tests/ebuild/test_fetch.py b/lib/portage/tests/ebuild/test_fetch.py |
239 |
index d50a4cbfc..881288cdc 100644 |
240 |
--- a/lib/portage/tests/ebuild/test_fetch.py |
241 |
+++ b/lib/portage/tests/ebuild/test_fetch.py |
242 |
@@ -172,6 +172,16 @@ class EbuildFetchTestCase(TestCase): |
243 |
with open(os.path.join(settings['DISTDIR'], 'layout.conf'), 'wt') as f: |
244 |
f.write(layout_data) |
245 |
|
246 |
+ if any(isinstance(layout, ContentHashLayout) for layout in layouts): |
247 |
+ content_db = os.path.join(playground.eprefix, 'var/db/emirrordist/content.db') |
248 |
+ os.makedirs(os.path.dirname(content_db), exist_ok=True) |
249 |
+ try: |
250 |
+ os.unlink(content_db) |
251 |
+ except OSError: |
252 |
+ pass |
253 |
+ else: |
254 |
+ content_db = None |
255 |
+ |
256 |
# Demonstrate that fetch preserves a stale file in DISTDIR when no digests are given. |
257 |
foo_uri = {'foo': ('{scheme}://{host}:{port}/distfiles/foo'.format(scheme=scheme, host=host, port=server.server_port),)} |
258 |
foo_path = os.path.join(settings['DISTDIR'], 'foo') |
259 |
@@ -233,9 +243,13 @@ class EbuildFetchTestCase(TestCase): |
260 |
os.path.join(self.bindir, 'emirrordist'), |
261 |
'--distfiles', settings['DISTDIR'], |
262 |
'--config-root', settings['EPREFIX'], |
263 |
+ '--delete', |
264 |
'--repositories-configuration', settings.repositories.config_string(), |
265 |
'--repo', 'test_repo', '--mirror') |
266 |
|
267 |
+ if content_db is not None: |
268 |
+ emirrordist_cmd = emirrordist_cmd + ('--content-db', content_db,) |
269 |
+ |
270 |
env = settings.environ() |
271 |
env['PYTHONPATH'] = ':'.join( |
272 |
filter(None, [PORTAGE_PYM_PATH] + os.environ.get('PYTHONPATH', '').split(':'))) |
273 |
diff --git a/man/emirrordist.1 b/man/emirrordist.1 |
274 |
index 45108ef8c..7ad10dfd0 100644 |
275 |
--- a/man/emirrordist.1 |
276 |
+++ b/man/emirrordist.1 |
277 |
@@ -1,4 +1,4 @@ |
278 |
-.TH "EMIRRORDIST" "1" "Dec 2015" "Portage VERSION" "Portage" |
279 |
+.TH "EMIRRORDIST" "1" "Feb 2021" "Portage VERSION" "Portage" |
280 |
.SH "NAME" |
281 |
emirrordist \- a fetch tool for mirroring of package distfiles |
282 |
.SH SYNOPSIS |
283 |
@@ -66,6 +66,10 @@ reporting purposes. Opened in append mode. |
284 |
Log file for scheduled deletions, with tab\-delimited output, for |
285 |
reporting purposes. Overwritten with each run. |
286 |
.TP |
287 |
+\fB\-\-content\-db\fR=\fIFILE\fR |
288 |
+Database file used to pair content digests with distfiles names |
289 |
+(required fo content\-hash layout). |
290 |
+.TP |
291 |
\fB\-\-delete\fR |
292 |
Enable deletion of unused distfiles. |
293 |
.TP |
294 |
-- |
295 |
2.26.2 |