1 |
commit: 6a539b7c5163899db1d58cf152aeab1b2b4f9be4 |
2 |
Author: Michał Górny <mgorny <AT> gentoo <DOT> org> |
3 |
AuthorDate: Thu Oct 3 14:19:54 2019 +0000 |
4 |
Commit: Michał Górny <mgorny <AT> gentoo <DOT> org> |
5 |
CommitDate: Fri Oct 4 21:25:00 2019 +0000 |
6 |
URL: https://gitweb.gentoo.org/proj/portage.git/commit/?id=6a539b7c |
7 |
|
8 |
fetch: Support GLEP 75 mirror structure |
9 |
|
10 |
Add a support for the subset of GLEP 75 needed by Gentoo Infra. This |
11 |
includes fetching and parsing layout.conf, and support for flat layout |
12 |
and filename-hash layout with cutoffs being multiplies of 4. |
13 |
|
14 |
Bug: https://bugs.gentoo.org/646898 |
15 |
Closes: https://github.com/gentoo/portage/pull/462 |
16 |
Reviewed-by: Zac Medico <zmedico <AT> gentoo.org> |
17 |
Signed-off-by: Michał Górny <mgorny <AT> gentoo.org> |
18 |
|
19 |
lib/portage/package/ebuild/fetch.py | 160 ++++++++++++++++++++++++++++++++- |
20 |
lib/portage/tests/ebuild/test_fetch.py | 94 ++++++++++++++++++- |
21 |
2 files changed, 250 insertions(+), 4 deletions(-) |
22 |
|
23 |
diff --git a/lib/portage/package/ebuild/fetch.py b/lib/portage/package/ebuild/fetch.py |
24 |
index 227bf45ae..4458796fc 100644 |
25 |
--- a/lib/portage/package/ebuild/fetch.py |
26 |
+++ b/lib/portage/package/ebuild/fetch.py |
27 |
@@ -6,13 +6,17 @@ from __future__ import print_function |
28 |
__all__ = ['fetch'] |
29 |
|
30 |
import errno |
31 |
+import functools |
32 |
import io |
33 |
+import itertools |
34 |
+import json |
35 |
import logging |
36 |
import random |
37 |
import re |
38 |
import stat |
39 |
import sys |
40 |
import tempfile |
41 |
+import time |
42 |
|
43 |
from collections import OrderedDict |
44 |
|
45 |
@@ -27,12 +31,17 @@ portage.proxy.lazyimport.lazyimport(globals(), |
46 |
'portage.package.ebuild.doebuild:doebuild_environment,' + \ |
47 |
'_doebuild_spawn', |
48 |
'portage.package.ebuild.prepare_build_dirs:prepare_build_dirs', |
49 |
+ 'portage.util:atomic_ofstream', |
50 |
+ 'portage.util.configparser:SafeConfigParser,read_configs,' + |
51 |
+ 'NoOptionError,ConfigParserError', |
52 |
+ 'portage.util._urlopen:urlopen', |
53 |
) |
54 |
|
55 |
from portage import os, selinux, shutil, _encodings, \ |
56 |
_movefile, _shell_quote, _unicode_encode |
57 |
from portage.checksum import (get_valid_checksum_keys, perform_md5, verify_all, |
58 |
- _filter_unaccelarated_hashes, _hash_filter, _apply_hash_filter) |
59 |
+ _filter_unaccelarated_hashes, _hash_filter, _apply_hash_filter, |
60 |
+ checksum_str) |
61 |
from portage.const import BASH_BINARY, CUSTOM_MIRRORS_FILE, \ |
62 |
GLOBAL_CONFIG_PATH |
63 |
from portage.data import portage_gid, portage_uid, secpass, userpriv_groups |
64 |
@@ -253,6 +262,146 @@ _size_suffix_map = { |
65 |
'Y' : 80, |
66 |
} |
67 |
|
68 |
+ |
69 |
+class FlatLayout(object): |
70 |
+ def get_path(self, filename): |
71 |
+ return filename |
72 |
+ |
73 |
+ @staticmethod |
74 |
+ def verify_args(args): |
75 |
+ return len(args) == 1 |
76 |
+ |
77 |
+ |
78 |
+class FilenameHashLayout(object): |
79 |
+ def __init__(self, algo, cutoffs): |
80 |
+ self.algo = algo |
81 |
+ self.cutoffs = [int(x) for x in cutoffs.split(':')] |
82 |
+ |
83 |
+ def get_path(self, filename): |
84 |
+ fnhash = checksum_str(filename.encode('utf8'), self.algo) |
85 |
+ ret = '' |
86 |
+ for c in self.cutoffs: |
87 |
+ assert c % 4 == 0 |
88 |
+ c = c // 4 |
89 |
+ ret += fnhash[:c] + '/' |
90 |
+ fnhash = fnhash[c:] |
91 |
+ return ret + filename |
92 |
+ |
93 |
+ @staticmethod |
94 |
+ def verify_args(args): |
95 |
+ if len(args) != 3: |
96 |
+ return False |
97 |
+ if args[1] not in get_valid_checksum_keys(): |
98 |
+ return False |
99 |
+ # argsidate cutoffs |
100 |
+ for c in args[2].split(':'): |
101 |
+ try: |
102 |
+ c = int(c) |
103 |
+ except ValueError: |
104 |
+ break |
105 |
+ else: |
106 |
+ if c % 4 != 0: |
107 |
+ break |
108 |
+ else: |
109 |
+ return True |
110 |
+ return False |
111 |
+ |
112 |
+ |
113 |
+class MirrorLayoutConfig(object): |
114 |
+ """ |
115 |
+ Class to read layout.conf from a mirror. |
116 |
+ """ |
117 |
+ |
118 |
+ def __init__(self): |
119 |
+ self.structure = () |
120 |
+ |
121 |
+ def read_from_file(self, f): |
122 |
+ cp = SafeConfigParser() |
123 |
+ read_configs(cp, [f]) |
124 |
+ vals = [] |
125 |
+ for i in itertools.count(): |
126 |
+ try: |
127 |
+ vals.append(tuple(cp.get('structure', '%d' % i).split())) |
128 |
+ except NoOptionError: |
129 |
+ break |
130 |
+ self.structure = tuple(vals) |
131 |
+ |
132 |
+ def serialize(self): |
133 |
+ return self.structure |
134 |
+ |
135 |
+ def deserialize(self, data): |
136 |
+ self.structure = data |
137 |
+ |
138 |
+ @staticmethod |
139 |
+ def validate_structure(val): |
140 |
+ if val[0] == 'flat': |
141 |
+ return FlatLayout.verify_args(val) |
142 |
+ if val[0] == 'filename-hash': |
143 |
+ return FilenameHashLayout.verify_args(val) |
144 |
+ return False |
145 |
+ |
146 |
+ def get_best_supported_layout(self): |
147 |
+ for val in self.structure: |
148 |
+ if self.validate_structure(val): |
149 |
+ if val[0] == 'flat': |
150 |
+ return FlatLayout(*val[1:]) |
151 |
+ elif val[0] == 'filename-hash': |
152 |
+ return FilenameHashLayout(*val[1:]) |
153 |
+ else: |
154 |
+ # fallback |
155 |
+ return FlatLayout() |
156 |
+ |
157 |
+ |
158 |
+def get_mirror_url(mirror_url, filename, cache_path=None): |
159 |
+ """ |
160 |
+ Get correct fetch URL for a given file, accounting for mirror |
161 |
+ layout configuration. |
162 |
+ |
163 |
+ @param mirror_url: Base URL to the mirror (without '/distfiles') |
164 |
+ @param filename: Filename to fetch |
165 |
+ @param cache_path: Path for mirror metadata cache |
166 |
+ @return: Full URL to fetch |
167 |
+ """ |
168 |
+ |
169 |
+ mirror_conf = MirrorLayoutConfig() |
170 |
+ |
171 |
+ cache = {} |
172 |
+ if cache_path is not None: |
173 |
+ try: |
174 |
+ with open(cache_path, 'r') as f: |
175 |
+ cache = json.load(f) |
176 |
+ except (IOError, ValueError): |
177 |
+ pass |
178 |
+ |
179 |
+ ts, data = cache.get(mirror_url, (0, None)) |
180 |
+ # refresh at least daily |
181 |
+ if ts >= time.time() - 86400: |
182 |
+ mirror_conf.deserialize(data) |
183 |
+ else: |
184 |
+ try: |
185 |
+ f = urlopen(mirror_url + '/distfiles/layout.conf') |
186 |
+ try: |
187 |
+ data = io.StringIO(f.read().decode('utf8')) |
188 |
+ finally: |
189 |
+ f.close() |
190 |
+ |
191 |
+ try: |
192 |
+ mirror_conf.read_from_file(data) |
193 |
+ except ConfigParserError: |
194 |
+ pass |
195 |
+ except IOError: |
196 |
+ pass |
197 |
+ |
198 |
+ cache[mirror_url] = (time.time(), mirror_conf.serialize()) |
199 |
+ if cache_path is not None: |
200 |
+ f = atomic_ofstream(cache_path, 'w') |
201 |
+ json.dump(cache, f) |
202 |
+ f.close() |
203 |
+ |
204 |
+ return (mirror_url + "/distfiles/" + |
205 |
+ mirror_conf.get_best_supported_layout().get_path(filename)) |
206 |
+ |
207 |
+ |
208 |
def fetch(myuris, mysettings, listonly=0, fetchonly=0, |
209 |
locks_in_subdir=".locks", use_locks=1, try_mirrors=1, digests=None, |
210 |
allow_missing_digests=True): |
211 |
@@ -434,8 +583,11 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0, |
212 |
for myfile, myuri in file_uri_tuples: |
213 |
if myfile not in filedict: |
214 |
filedict[myfile]=[] |
215 |
- for y in range(0,len(locations)): |
216 |
- filedict[myfile].append(locations[y]+"/distfiles/"+myfile) |
217 |
+ mirror_cache = os.path.join(mysettings["DISTDIR"], |
218 |
+ ".mirror-cache.json") |
219 |
+ for l in locations: |
220 |
+ filedict[myfile].append(functools.partial( |
221 |
+ get_mirror_url, l, myfile, mirror_cache)) |
222 |
if myuri is None: |
223 |
continue |
224 |
if myuri[:9]=="mirror://": |
225 |
@@ -895,6 +1047,8 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0, |
226 |
tried_locations = set() |
227 |
while uri_list: |
228 |
loc = uri_list.pop() |
229 |
+ if isinstance(loc, functools.partial): |
230 |
+ loc = loc() |
231 |
# Eliminate duplicates here in case we've switched to |
232 |
# "primaryuri" mode on the fly due to a checksum failure. |
233 |
if loc in tried_locations: |
234 |
|
235 |
diff --git a/lib/portage/tests/ebuild/test_fetch.py b/lib/portage/tests/ebuild/test_fetch.py |
236 |
index 83321fed7..f2254c468 100644 |
237 |
--- a/lib/portage/tests/ebuild/test_fetch.py |
238 |
+++ b/lib/portage/tests/ebuild/test_fetch.py |
239 |
@@ -4,6 +4,7 @@ |
240 |
from __future__ import unicode_literals |
241 |
|
242 |
import functools |
243 |
+import io |
244 |
import tempfile |
245 |
|
246 |
import portage |
247 |
@@ -11,12 +12,14 @@ from portage import shutil, os |
248 |
from portage.tests import TestCase |
249 |
from portage.tests.resolver.ResolverPlayground import ResolverPlayground |
250 |
from portage.tests.util.test_socks5 import AsyncHTTPServer |
251 |
+from portage.util.configparser import ConfigParserError |
252 |
from portage.util.futures.executor.fork import ForkExecutor |
253 |
from portage.util._async.SchedulerInterface import SchedulerInterface |
254 |
from portage.util._eventloop.global_event_loop import global_event_loop |
255 |
from portage.package.ebuild.config import config |
256 |
from portage.package.ebuild.digestgen import digestgen |
257 |
-from portage.package.ebuild.fetch import _download_suffix |
258 |
+from portage.package.ebuild.fetch import (_download_suffix, FlatLayout, |
259 |
+ FilenameHashLayout, MirrorLayoutConfig) |
260 |
from _emerge.EbuildFetcher import EbuildFetcher |
261 |
from _emerge.Package import Package |
262 |
|
263 |
@@ -228,3 +231,92 @@ class EbuildFetchTestCase(TestCase): |
264 |
finally: |
265 |
shutil.rmtree(ro_distdir) |
266 |
playground.cleanup() |
267 |
+ |
268 |
+ def test_flat_layout(self): |
269 |
+ self.assertTrue(FlatLayout.verify_args(('flat',))) |
270 |
+ self.assertFalse(FlatLayout.verify_args(('flat', 'extraneous-arg'))) |
271 |
+ self.assertEqual(FlatLayout().get_path('foo-1.tar.gz'), 'foo-1.tar.gz') |
272 |
+ |
273 |
+ def test_filename_hash_layout(self): |
274 |
+ self.assertFalse(FilenameHashLayout.verify_args(('filename-hash',))) |
275 |
+ self.assertTrue(FilenameHashLayout.verify_args(('filename-hash', 'SHA1', '8'))) |
276 |
+ self.assertFalse(FilenameHashLayout.verify_args(('filename-hash', 'INVALID-HASH', '8'))) |
277 |
+ self.assertTrue(FilenameHashLayout.verify_args(('filename-hash', 'SHA1', '4:8:12'))) |
278 |
+ self.assertFalse(FilenameHashLayout.verify_args(('filename-hash', 'SHA1', '3'))) |
279 |
+ self.assertFalse(FilenameHashLayout.verify_args(('filename-hash', 'SHA1', 'junk'))) |
280 |
+ self.assertFalse(FilenameHashLayout.verify_args(('filename-hash', 'SHA1', '4:8:junk'))) |
281 |
+ |
282 |
+ self.assertEqual(FilenameHashLayout('SHA1', '4').get_path('foo-1.tar.gz'), |
283 |
+ '1/foo-1.tar.gz') |
284 |
+ self.assertEqual(FilenameHashLayout('SHA1', '8').get_path('foo-1.tar.gz'), |
285 |
+ '19/foo-1.tar.gz') |
286 |
+ self.assertEqual(FilenameHashLayout('SHA1', '8:16').get_path('foo-1.tar.gz'), |
287 |
+ '19/c3b6/foo-1.tar.gz') |
288 |
+ self.assertEqual(FilenameHashLayout('SHA1', '8:16:24').get_path('foo-1.tar.gz'), |
289 |
+ '19/c3b6/37a94b/foo-1.tar.gz') |
290 |
+ |
291 |
+ def test_mirror_layout_config(self): |
292 |
+ mlc = MirrorLayoutConfig() |
293 |
+ self.assertEqual(mlc.serialize(), ()) |
294 |
+ self.assertIsInstance(mlc.get_best_supported_layout(), FlatLayout) |
295 |
+ |
296 |
+ conf = ''' |
297 |
+[structure] |
298 |
+0=flat |
299 |
+''' |
300 |
+ mlc.read_from_file(io.StringIO(conf)) |
301 |
+ self.assertEqual(mlc.serialize(), (('flat',),)) |
302 |
+ self.assertIsInstance(mlc.get_best_supported_layout(), FlatLayout) |
303 |
+ self.assertEqual(mlc.get_best_supported_layout().get_path('foo-1.tar.gz'), |
304 |
+ 'foo-1.tar.gz') |
305 |
+ |
306 |
+ conf = ''' |
307 |
+[structure] |
308 |
+0=filename-hash SHA1 8:16 |
309 |
+1=flat |
310 |
+''' |
311 |
+ mlc.read_from_file(io.StringIO(conf)) |
312 |
+ self.assertEqual(mlc.serialize(), ( |
313 |
+ ('filename-hash', 'SHA1', '8:16'), |
314 |
+ ('flat',) |
315 |
+ )) |
316 |
+ self.assertIsInstance(mlc.get_best_supported_layout(), FilenameHashLayout) |
317 |
+ self.assertEqual(mlc.get_best_supported_layout().get_path('foo-1.tar.gz'), |
318 |
+ '19/c3b6/foo-1.tar.gz') |
319 |
+ serialized = mlc.serialize() |
320 |
+ |
321 |
+ # test fallback |
322 |
+ conf = ''' |
323 |
+[structure] |
324 |
+0=filename-hash INVALID-HASH 8:16 |
325 |
+1=filename-hash SHA1 32 |
326 |
+2=flat |
327 |
+''' |
328 |
+ mlc.read_from_file(io.StringIO(conf)) |
329 |
+ self.assertEqual(mlc.serialize(), ( |
330 |
+ ('filename-hash', 'INVALID-HASH', '8:16'), |
331 |
+ ('filename-hash', 'SHA1', '32'), |
332 |
+ ('flat',) |
333 |
+ )) |
334 |
+ self.assertIsInstance(mlc.get_best_supported_layout(), FilenameHashLayout) |
335 |
+ self.assertEqual(mlc.get_best_supported_layout().get_path('foo-1.tar.gz'), |
336 |
+ '19c3b637/foo-1.tar.gz') |
337 |
+ |
338 |
+ # test deserialization |
339 |
+ mlc.deserialize(serialized) |
340 |
+ self.assertEqual(mlc.serialize(), ( |
341 |
+ ('filename-hash', 'SHA1', '8:16'), |
342 |
+ ('flat',) |
343 |
+ )) |
344 |
+ self.assertIsInstance(mlc.get_best_supported_layout(), FilenameHashLayout) |
345 |
+ self.assertEqual(mlc.get_best_supported_layout().get_path('foo-1.tar.gz'), |
346 |
+ '19/c3b6/foo-1.tar.gz') |
347 |
+ |
348 |
+ # test erraneous input |
349 |
+ conf = ''' |
350 |
+[#(*DA*&*F |
351 |
+[structure] |
352 |
+0=filename-hash SHA1 32 |
353 |
+''' |
354 |
+ self.assertRaises(ConfigParserError, mlc.read_from_file, |
355 |
+ io.StringIO(conf)) |