Gentoo Archives: gentoo-portage-dev

From: "Michał Górny" <mgorny@g.o>
To: gentoo-portage-dev@l.g.o
Cc: "Michał Górny" <mgorny@g.o>
Subject: [gentoo-portage-dev] [PATCH v3] fetch: Support GLEP 75 mirror structure
Date: Fri, 04 Oct 2019 09:19:26
Message-Id: 20191004091858.44679-1-mgorny@gentoo.org
1 Add a support for the subset of GLEP 75 needed by Gentoo Infra. This
2 includes fetching and parsing layout.conf, and support for flat layout
3 and filename-hash layout with cutoffs being multiplies of 4.
4
5 Bug: https://bugs.gentoo.org/646898
6 Signed-off-by: Michał Górny <mgorny@g.o>
7 ---
8 lib/portage/package/ebuild/fetch.py | 158 ++++++++++++++++++++++++-
9 lib/portage/tests/ebuild/test_fetch.py | 94 ++++++++++++++-
10 2 files changed, 247 insertions(+), 5 deletions(-)
11
12 Chages in v3:
13 - mirrors are evaluated lazily (i.e. Portage doesn't fetch layouts
14 for all mirrors prematurely),
15 - garbage layout.conf is handled gracefully,
16 - cache updates are done atomically,
17 - layout.conf argument verification has been moved to invidual classes,
18 - a few unit and integration tests have been added.
19
20 diff --git a/lib/portage/package/ebuild/fetch.py b/lib/portage/package/ebuild/fetch.py
21 index 227bf45ae..fa250535f 100644
22 --- a/lib/portage/package/ebuild/fetch.py
23 +++ b/lib/portage/package/ebuild/fetch.py
24 @@ -6,13 +6,17 @@ from __future__ import print_function
25 __all__ = ['fetch']
26
27 import errno
28 +import functools
29 import io
30 +import itertools
31 +import json
32 import logging
33 import random
34 import re
35 import stat
36 import sys
37 import tempfile
38 +import time
39
40 from collections import OrderedDict
41
42 @@ -27,14 +31,19 @@ portage.proxy.lazyimport.lazyimport(globals(),
43 'portage.package.ebuild.doebuild:doebuild_environment,' + \
44 '_doebuild_spawn',
45 'portage.package.ebuild.prepare_build_dirs:prepare_build_dirs',
46 + 'portage.util:atomic_ofstream',
47 + 'portage.util.configparser:SafeConfigParser,read_configs,' +
48 + 'NoOptionError,ConfigParserError',
49 + 'portage.util._urlopen:urlopen',
50 )
51
52 from portage import os, selinux, shutil, _encodings, \
53 _movefile, _shell_quote, _unicode_encode
54 from portage.checksum import (get_valid_checksum_keys, perform_md5, verify_all,
55 - _filter_unaccelarated_hashes, _hash_filter, _apply_hash_filter)
56 + _filter_unaccelarated_hashes, _hash_filter, _apply_hash_filter,
57 + checksum_str)
58 from portage.const import BASH_BINARY, CUSTOM_MIRRORS_FILE, \
59 - GLOBAL_CONFIG_PATH
60 + GLOBAL_CONFIG_PATH, CACHE_PATH
61 from portage.data import portage_gid, portage_uid, secpass, userpriv_groups
62 from portage.exception import FileNotFound, OperationNotPermitted, \
63 PortageException, TryAgain
64 @@ -253,6 +262,144 @@ _size_suffix_map = {
65 'Y' : 80,
66 }
67
68 +
69 +class FlatLayout(object):
70 + def get_path(self, filename):
71 + return filename
72 +
73 + @staticmethod
74 + def verify_args(args):
75 + return len(args) == 1
76 +
77 +
78 +class FilenameHashLayout(object):
79 + def __init__(self, algo, cutoffs):
80 + self.algo = algo
81 + self.cutoffs = [int(x) for x in cutoffs.split(':')]
82 +
83 + def get_path(self, filename):
84 + fnhash = checksum_str(filename.encode('utf8'), self.algo)
85 + ret = ''
86 + for c in self.cutoffs:
87 + assert c % 4 == 0
88 + c = c // 4
89 + ret += fnhash[:c] + '/'
90 + fnhash = fnhash[c:]
91 + return ret + filename
92 +
93 + @staticmethod
94 + def verify_args(args):
95 + if len(args) != 3:
96 + return False
97 + if args[1] not in get_valid_checksum_keys():
98 + return False
99 + # argsidate cutoffs
100 + for c in args[2].split(':'):
101 + try:
102 + c = int(c)
103 + except ValueError:
104 + break
105 + else:
106 + if c % 4 != 0:
107 + break
108 + else:
109 + return True
110 + return False
111 +
112 +
113 +class MirrorLayoutConfig(object):
114 + """
115 + Class to read layout.conf from a mirror.
116 + """
117 +
118 + def __init__(self):
119 + self.structure = ()
120 +
121 + def read_from_file(self, f):
122 + cp = SafeConfigParser()
123 + read_configs(cp, [f])
124 + vals = []
125 + for i in itertools.count():
126 + try:
127 + vals.append(tuple(cp.get('structure', '%d' % i).split()))
128 + except NoOptionError:
129 + break
130 + self.structure = tuple(vals)
131 +
132 + def serialize(self):
133 + return self.structure
134 +
135 + def deserialize(self, data):
136 + self.structure = data
137 +
138 + @staticmethod
139 + def validate_structure(val):
140 + if val[0] == 'flat':
141 + return FlatLayout.verify_args(val)
142 + if val[0] == 'filename-hash':
143 + return FilenameHashLayout.verify_args(val)
144 + return False
145 +
146 + def get_best_supported_layout(self):
147 + for val in self.structure:
148 + if self.validate_structure(val):
149 + if val[0] == 'flat':
150 + return FlatLayout(*val[1:])
151 + elif val[0] == 'filename-hash':
152 + return FilenameHashLayout(*val[1:])
153 + else:
154 + # fallback
155 + return FlatLayout()
156 +
157 +
158 +def get_mirror_url(mirror_url, filename, eroot):
159 + """
160 + Get correct fetch URL for a given file, accounting for mirror
161 + layout configuration.
162 +
163 + @param mirror_url: Base URL to the mirror (without '/distfiles')
164 + @param filename: Filename to fetch
165 + @param eroot: EROOT to use for the cache file
166 + @return: Full URL to fetch
167 + """
168 +
169 + mirror_conf = MirrorLayoutConfig()
170 +
171 + cache_file = os.path.join(eroot, CACHE_PATH, 'mirror-metadata.json')
172 + try:
173 + with open(cache_file, 'r') as f:
174 + cache = json.load(f)
175 + except (IOError, ValueError):
176 + cache = {}
177 +
178 + ts, data = cache.get(mirror_url, (0, None))
179 + # refresh at least daily
180 + if ts >= time.time() - 86400:
181 + mirror_conf.deserialize(data)
182 + else:
183 + try:
184 + f = urlopen(mirror_url + '/distfiles/layout.conf')
185 + try:
186 + data = io.StringIO(f.read().decode('utf8'))
187 + finally:
188 + f.close()
189 +
190 + try:
191 + mirror_conf.read_from_file(data)
192 + except ConfigParserError:
193 + pass
194 + except IOError:
195 + pass
196 +
197 + cache[mirror_url] = (time.time(), mirror_conf.serialize())
198 + f = atomic_ofstream(cache_file, 'w')
199 + json.dump(cache, f)
200 + f.close()
201 +
202 + return (mirror_url + "/distfiles/" +
203 + mirror_conf.get_best_supported_layout().get_path(filename))
204 +
205 +
206 def fetch(myuris, mysettings, listonly=0, fetchonly=0,
207 locks_in_subdir=".locks", use_locks=1, try_mirrors=1, digests=None,
208 allow_missing_digests=True):
209 @@ -434,8 +581,9 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
210 for myfile, myuri in file_uri_tuples:
211 if myfile not in filedict:
212 filedict[myfile]=[]
213 - for y in range(0,len(locations)):
214 - filedict[myfile].append(locations[y]+"/distfiles/"+myfile)
215 + for l in locations:
216 + filedict[myfile].append(functools.partial(
217 + get_mirror_url, l, myfile, mysettings["EROOT"]))
218 if myuri is None:
219 continue
220 if myuri[:9]=="mirror://":
221 @@ -895,6 +1043,8 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
222 tried_locations = set()
223 while uri_list:
224 loc = uri_list.pop()
225 + if isinstance(loc, functools.partial):
226 + loc = loc()
227 # Eliminate duplicates here in case we've switched to
228 # "primaryuri" mode on the fly due to a checksum failure.
229 if loc in tried_locations:
230 diff --git a/lib/portage/tests/ebuild/test_fetch.py b/lib/portage/tests/ebuild/test_fetch.py
231 index 83321fed7..f2254c468 100644
232 --- a/lib/portage/tests/ebuild/test_fetch.py
233 +++ b/lib/portage/tests/ebuild/test_fetch.py
234 @@ -4,6 +4,7 @@
235 from __future__ import unicode_literals
236
237 import functools
238 +import io
239 import tempfile
240
241 import portage
242 @@ -11,12 +12,14 @@ from portage import shutil, os
243 from portage.tests import TestCase
244 from portage.tests.resolver.ResolverPlayground import ResolverPlayground
245 from portage.tests.util.test_socks5 import AsyncHTTPServer
246 +from portage.util.configparser import ConfigParserError
247 from portage.util.futures.executor.fork import ForkExecutor
248 from portage.util._async.SchedulerInterface import SchedulerInterface
249 from portage.util._eventloop.global_event_loop import global_event_loop
250 from portage.package.ebuild.config import config
251 from portage.package.ebuild.digestgen import digestgen
252 -from portage.package.ebuild.fetch import _download_suffix
253 +from portage.package.ebuild.fetch import (_download_suffix, FlatLayout,
254 + FilenameHashLayout, MirrorLayoutConfig)
255 from _emerge.EbuildFetcher import EbuildFetcher
256 from _emerge.Package import Package
257
258 @@ -228,3 +231,92 @@ class EbuildFetchTestCase(TestCase):
259 finally:
260 shutil.rmtree(ro_distdir)
261 playground.cleanup()
262 +
263 + def test_flat_layout(self):
264 + self.assertTrue(FlatLayout.verify_args(('flat',)))
265 + self.assertFalse(FlatLayout.verify_args(('flat', 'extraneous-arg')))
266 + self.assertEqual(FlatLayout().get_path('foo-1.tar.gz'), 'foo-1.tar.gz')
267 +
268 + def test_filename_hash_layout(self):
269 + self.assertFalse(FilenameHashLayout.verify_args(('filename-hash',)))
270 + self.assertTrue(FilenameHashLayout.verify_args(('filename-hash', 'SHA1', '8')))
271 + self.assertFalse(FilenameHashLayout.verify_args(('filename-hash', 'INVALID-HASH', '8')))
272 + self.assertTrue(FilenameHashLayout.verify_args(('filename-hash', 'SHA1', '4:8:12')))
273 + self.assertFalse(FilenameHashLayout.verify_args(('filename-hash', 'SHA1', '3')))
274 + self.assertFalse(FilenameHashLayout.verify_args(('filename-hash', 'SHA1', 'junk')))
275 + self.assertFalse(FilenameHashLayout.verify_args(('filename-hash', 'SHA1', '4:8:junk')))
276 +
277 + self.assertEqual(FilenameHashLayout('SHA1', '4').get_path('foo-1.tar.gz'),
278 + '1/foo-1.tar.gz')
279 + self.assertEqual(FilenameHashLayout('SHA1', '8').get_path('foo-1.tar.gz'),
280 + '19/foo-1.tar.gz')
281 + self.assertEqual(FilenameHashLayout('SHA1', '8:16').get_path('foo-1.tar.gz'),
282 + '19/c3b6/foo-1.tar.gz')
283 + self.assertEqual(FilenameHashLayout('SHA1', '8:16:24').get_path('foo-1.tar.gz'),
284 + '19/c3b6/37a94b/foo-1.tar.gz')
285 +
286 + def test_mirror_layout_config(self):
287 + mlc = MirrorLayoutConfig()
288 + self.assertEqual(mlc.serialize(), ())
289 + self.assertIsInstance(mlc.get_best_supported_layout(), FlatLayout)
290 +
291 + conf = '''
292 +[structure]
293 +0=flat
294 +'''
295 + mlc.read_from_file(io.StringIO(conf))
296 + self.assertEqual(mlc.serialize(), (('flat',),))
297 + self.assertIsInstance(mlc.get_best_supported_layout(), FlatLayout)
298 + self.assertEqual(mlc.get_best_supported_layout().get_path('foo-1.tar.gz'),
299 + 'foo-1.tar.gz')
300 +
301 + conf = '''
302 +[structure]
303 +0=filename-hash SHA1 8:16
304 +1=flat
305 +'''
306 + mlc.read_from_file(io.StringIO(conf))
307 + self.assertEqual(mlc.serialize(), (
308 + ('filename-hash', 'SHA1', '8:16'),
309 + ('flat',)
310 + ))
311 + self.assertIsInstance(mlc.get_best_supported_layout(), FilenameHashLayout)
312 + self.assertEqual(mlc.get_best_supported_layout().get_path('foo-1.tar.gz'),
313 + '19/c3b6/foo-1.tar.gz')
314 + serialized = mlc.serialize()
315 +
316 + # test fallback
317 + conf = '''
318 +[structure]
319 +0=filename-hash INVALID-HASH 8:16
320 +1=filename-hash SHA1 32
321 +2=flat
322 +'''
323 + mlc.read_from_file(io.StringIO(conf))
324 + self.assertEqual(mlc.serialize(), (
325 + ('filename-hash', 'INVALID-HASH', '8:16'),
326 + ('filename-hash', 'SHA1', '32'),
327 + ('flat',)
328 + ))
329 + self.assertIsInstance(mlc.get_best_supported_layout(), FilenameHashLayout)
330 + self.assertEqual(mlc.get_best_supported_layout().get_path('foo-1.tar.gz'),
331 + '19c3b637/foo-1.tar.gz')
332 +
333 + # test deserialization
334 + mlc.deserialize(serialized)
335 + self.assertEqual(mlc.serialize(), (
336 + ('filename-hash', 'SHA1', '8:16'),
337 + ('flat',)
338 + ))
339 + self.assertIsInstance(mlc.get_best_supported_layout(), FilenameHashLayout)
340 + self.assertEqual(mlc.get_best_supported_layout().get_path('foo-1.tar.gz'),
341 + '19/c3b6/foo-1.tar.gz')
342 +
343 + # test erraneous input
344 + conf = '''
345 +[#(*DA*&*F
346 +[structure]
347 +0=filename-hash SHA1 32
348 +'''
349 + self.assertRaises(ConfigParserError, mlc.read_from_file,
350 + io.StringIO(conf))
351 --
352 2.23.0