Gentoo Archives: gentoo-portage-dev

From: Zac Medico <zmedico@g.o>
To: gentoo-portage-dev@l.g.o
Cc: Zac Medico <zmedico@g.o>
Subject: [gentoo-portage-dev] [PATCH] emirrordist: add --content-db option required for content-hash layout (bug 756778)
Date: Thu, 25 Feb 2021 01:26:26
Message-Id: 20210225012610.814758-1-zmedico@gentoo.org
1 Add a --content-db option which is required for the content-hash
2 layout because its file listings return content digests instead of
3 distfile names.
4
5 The content db includes a reverse mapping, for use during garbage
6 collection. All keys have a prefix separated by a colon. For digest
7 keys, the prefix is the hash algorithm name. For filename keys,
8 the prefix is "filename". The values for digest keys are plain
9 filenames, and the values for distfile keys are dictionaries
10 of digests suitable for construction of DistfileName instances.
11
12 Bug: https://bugs.gentoo.org/756778
13 Signed-off-by: Zac Medico <zmedico@g.o>
14 ---
15 lib/portage/_emirrordist/Config.py | 7 +++-
16 lib/portage/_emirrordist/DeletionIterator.py | 38 ++++++++++++++++++--
17 lib/portage/_emirrordist/DeletionTask.py | 26 ++++++++++++++
18 lib/portage/_emirrordist/FetchTask.py | 16 ++++++++-
19 lib/portage/_emirrordist/main.py | 15 +++++++-
20 lib/portage/tests/ebuild/test_fetch.py | 14 ++++++++
21 man/emirrordist.1 | 6 +++-
22 7 files changed, 116 insertions(+), 6 deletions(-)
23
24 diff --git a/lib/portage/_emirrordist/Config.py b/lib/portage/_emirrordist/Config.py
25 index 4bee4f45e..53f6582fe 100644
26 --- a/lib/portage/_emirrordist/Config.py
27 +++ b/lib/portage/_emirrordist/Config.py
28 @@ -1,4 +1,4 @@
29 -# Copyright 2013-2020 Gentoo Authors
30 +# Copyright 2013-2021 Gentoo Authors
31 # Distributed under the terms of the GNU General Public License v2
32
33 import copy
34 @@ -65,6 +65,11 @@ class Config:
35 self.distfiles_db = self._open_shelve(
36 options.distfiles_db, 'distfiles')
37
38 + self.content_db = None
39 + if options.content_db is not None:
40 + self.content_db = self._open_shelve(
41 + options.content_db, 'content')
42 +
43 self.deletion_db = None
44 if options.deletion_db is not None:
45 self.deletion_db = self._open_shelve(
46 diff --git a/lib/portage/_emirrordist/DeletionIterator.py b/lib/portage/_emirrordist/DeletionIterator.py
47 index 08985ed6c..24fb096bf 100644
48 --- a/lib/portage/_emirrordist/DeletionIterator.py
49 +++ b/lib/portage/_emirrordist/DeletionIterator.py
50 @@ -1,10 +1,12 @@
51 -# Copyright 2013-2019 Gentoo Authors
52 +# Copyright 2013-2021 Gentoo Authors
53 # Distributed under the terms of the GNU General Public License v2
54
55 import logging
56 import stat
57 +import typing
58
59 from portage import os
60 +from portage.package.ebuild.fetch import DistfileName
61 from .DeletionTask import DeletionTask
62
63 class DeletionIterator:
64 @@ -12,6 +14,37 @@ class DeletionIterator:
65 def __init__(self, config):
66 self._config = config
67
68 + def _map_filename(self, filename: typing.Union[str, DistfileName]) -> typing.Union[str, DistfileName]:
69 + """
70 + Map a filename listed by the layout get_filenames method,
71 + translating it from a content digest to a distfile name.
72 + If filename is already a distfile name, then it will pass
73 + through unchanged.
74 +
75 + @param filename: A filename listed by layout get_filenames
76 + @return: The distfile name, mapped from the corresponding
77 + content digest when necessary
78 + """
79 + if not isinstance(filename, DistfileName):
80 + if self._config.content_db is not None:
81 + distfile_key = "filename:{}".format(filename)
82 + try:
83 + digests = self._config.content_db[distfile_key]
84 + except KeyError:
85 + pass
86 + else:
87 + return DistfileName(filename, digests=digests)
88 + return DistfileName(filename)
89 + if filename.digests and self._config.content_db is not None:
90 + for k, v in filename.digests.items():
91 + digest_key = "{}:{}".format(k, v).lower()
92 + try:
93 + distfile_str = self._config.content_db[digest_key]
94 + except KeyError:
95 + continue
96 + return DistfileName(distfile_str, digests={k:v})
97 + return filename
98 +
99 def __iter__(self):
100 distdir = self._config.options.distfiles
101 file_owners = self._config.file_owners
102 @@ -22,7 +55,8 @@ class DeletionIterator:
103 start_time = self._config.start_time
104 distfiles_set = set()
105 for layout in self._config.layouts:
106 - distfiles_set.update(layout.get_filenames(distdir))
107 + distfiles_set.update(self._map_filename(filename)
108 + for filename in layout.get_filenames(distdir))
109 for filename in distfiles_set:
110 # require at least one successful stat()
111 exceptions = []
112 diff --git a/lib/portage/_emirrordist/DeletionTask.py b/lib/portage/_emirrordist/DeletionTask.py
113 index 5eb01d840..96c52fa93 100644
114 --- a/lib/portage/_emirrordist/DeletionTask.py
115 +++ b/lib/portage/_emirrordist/DeletionTask.py
116 @@ -5,6 +5,7 @@ import errno
117 import logging
118
119 from portage import os
120 +from portage.package.ebuild.fetch import ContentHashLayout
121 from portage.util._async.FileCopier import FileCopier
122 from _emerge.CompositeTask import CompositeTask
123
124 @@ -99,6 +100,10 @@ class DeletionTask(CompositeTask):
125 def _delete_links(self):
126 success = True
127 for layout in self.config.layouts:
128 + if isinstance(layout, ContentHashLayout) and not self.distfile.digests:
129 + logging.debug(("_delete_links: '%s' has "
130 + "no digests") % self.distfile)
131 + continue
132 distfile_path = os.path.join(
133 self.config.options.distfiles,
134 layout.get_path(self.distfile))
135 @@ -134,6 +139,27 @@ class DeletionTask(CompositeTask):
136 logging.debug(("drop '%s' from "
137 "distfiles db") % self.distfile)
138
139 + if self.config.content_db is not None:
140 + distfile_key = "filename:{}".format(self.distfile)
141 + try:
142 + digests = self.config.content_db[distfile_key]
143 + except KeyError:
144 + pass
145 + else:
146 + for k, v in digests.items():
147 + digest_key = "{}:{}".format(k, v)
148 + try:
149 + del self.config.content_db[digest_key]
150 + except KeyError:
151 + pass
152 +
153 + logging.debug(("drop '%s' from "
154 + "content db") % self.distfile)
155 + try:
156 + del self.config.content_db[distfile_key]
157 + except KeyError:
158 + pass
159 +
160 if self.config.deletion_db is not None:
161 try:
162 del self.config.deletion_db[self.distfile]
163 diff --git a/lib/portage/_emirrordist/FetchTask.py b/lib/portage/_emirrordist/FetchTask.py
164 index 997762082..5a39cdb1a 100644
165 --- a/lib/portage/_emirrordist/FetchTask.py
166 +++ b/lib/portage/_emirrordist/FetchTask.py
167 @@ -1,4 +1,4 @@
168 -# Copyright 2013-2020 Gentoo Authors
169 +# Copyright 2013-2021 Gentoo Authors
170 # Distributed under the terms of the GNU General Public License v2
171
172 import collections
173 @@ -47,6 +47,20 @@ class FetchTask(CompositeTask):
174 # Convert _pkg_str to str in order to prevent pickle problems.
175 self.config.distfiles_db[self.distfile] = str(self.cpv)
176
177 + if self.config.content_db is not None:
178 + # The content db includes a reverse mapping, for use during garbage
179 + # collection. All keys have a prefix separated by a colon. For digest
180 + # keys, the prefix is the hash algorithm name. For filename keys,
181 + # the prefix is "filename". The values for digest keys are plain
182 + # filenames, and the values for distfile keys are dictionaries
183 + # of digests suitable for construction of DistfileName instances.
184 + distfile_str = str(self.distfile)
185 + distfile_key = 'filename:{}'.format(distfile_str)
186 + for k, v in self.distfile.digests.items():
187 + digest_key = '{}:{}'.format(k, v).lower()
188 + self.config.content_db[digest_key] = distfile_str
189 + self.config.content_db.setdefault(distfile_key, {})[k] = v
190 +
191 if not self._have_needed_digests():
192 msg = "incomplete digests: %s" % " ".join(self.digests)
193 self.scheduler.output(msg, background=self.background,
194 diff --git a/lib/portage/_emirrordist/main.py b/lib/portage/_emirrordist/main.py
195 index 8d00a05f5..2200ec715 100644
196 --- a/lib/portage/_emirrordist/main.py
197 +++ b/lib/portage/_emirrordist/main.py
198 @@ -1,4 +1,4 @@
199 -# Copyright 2013-2020 Gentoo Authors
200 +# Copyright 2013-2021 Gentoo Authors
201 # Distributed under the terms of the GNU General Public License v2
202
203 import argparse
204 @@ -7,6 +7,7 @@ import sys
205
206 import portage
207 from portage import os
208 +from portage.package.ebuild.fetch import ContentHashLayout
209 from portage.util import normalize_path, _recursive_file_list
210 from portage.util._async.run_main_scheduler import run_main_scheduler
211 from portage.util._async.SchedulerInterface import SchedulerInterface
212 @@ -151,6 +152,12 @@ common_options = (
213 "distfile belongs to",
214 "metavar" : "FILE"
215 },
216 + {
217 + "longopt" : "--content-db",
218 + "help" : "database file used to map content digests to"
219 + "distfiles names (required for content-hash layout)",
220 + "metavar" : "FILE"
221 + },
222 {
223 "longopt" : "--recycle-dir",
224 "help" : "directory for extended retention of files that "
225 @@ -441,6 +448,12 @@ def emirrordist_main(args):
226 if not options.mirror:
227 parser.error('No action specified')
228
229 + if options.delete and config.content_db is None:
230 + for layout in config.layouts:
231 + if isinstance(layout, ContentHashLayout):
232 + parser.error("content-hash layout requires "
233 + "--content-db to be specified")
234 +
235 returncode = os.EX_OK
236
237 if options.mirror:
238 diff --git a/lib/portage/tests/ebuild/test_fetch.py b/lib/portage/tests/ebuild/test_fetch.py
239 index d50a4cbfc..881288cdc 100644
240 --- a/lib/portage/tests/ebuild/test_fetch.py
241 +++ b/lib/portage/tests/ebuild/test_fetch.py
242 @@ -172,6 +172,16 @@ class EbuildFetchTestCase(TestCase):
243 with open(os.path.join(settings['DISTDIR'], 'layout.conf'), 'wt') as f:
244 f.write(layout_data)
245
246 + if any(isinstance(layout, ContentHashLayout) for layout in layouts):
247 + content_db = os.path.join(playground.eprefix, 'var/db/emirrordist/content.db')
248 + os.makedirs(os.path.dirname(content_db), exist_ok=True)
249 + try:
250 + os.unlink(content_db)
251 + except OSError:
252 + pass
253 + else:
254 + content_db = None
255 +
256 # Demonstrate that fetch preserves a stale file in DISTDIR when no digests are given.
257 foo_uri = {'foo': ('{scheme}://{host}:{port}/distfiles/foo'.format(scheme=scheme, host=host, port=server.server_port),)}
258 foo_path = os.path.join(settings['DISTDIR'], 'foo')
259 @@ -233,9 +243,13 @@ class EbuildFetchTestCase(TestCase):
260 os.path.join(self.bindir, 'emirrordist'),
261 '--distfiles', settings['DISTDIR'],
262 '--config-root', settings['EPREFIX'],
263 + '--delete',
264 '--repositories-configuration', settings.repositories.config_string(),
265 '--repo', 'test_repo', '--mirror')
266
267 + if content_db is not None:
268 + emirrordist_cmd = emirrordist_cmd + ('--content-db', content_db,)
269 +
270 env = settings.environ()
271 env['PYTHONPATH'] = ':'.join(
272 filter(None, [PORTAGE_PYM_PATH] + os.environ.get('PYTHONPATH', '').split(':')))
273 diff --git a/man/emirrordist.1 b/man/emirrordist.1
274 index 45108ef8c..7ad10dfd0 100644
275 --- a/man/emirrordist.1
276 +++ b/man/emirrordist.1
277 @@ -1,4 +1,4 @@
278 -.TH "EMIRRORDIST" "1" "Dec 2015" "Portage VERSION" "Portage"
279 +.TH "EMIRRORDIST" "1" "Feb 2021" "Portage VERSION" "Portage"
280 .SH "NAME"
281 emirrordist \- a fetch tool for mirroring of package distfiles
282 .SH SYNOPSIS
283 @@ -66,6 +66,10 @@ reporting purposes. Opened in append mode.
284 Log file for scheduled deletions, with tab\-delimited output, for
285 reporting purposes. Overwritten with each run.
286 .TP
287 +\fB\-\-content\-db\fR=\fIFILE\fR
288 +Database file used to pair content digests with distfiles names
289 +(required fo content\-hash layout).
290 +.TP
291 \fB\-\-delete\fR
292 Enable deletion of unused distfiles.
293 .TP
294 --
295 2.26.2

Replies