Gentoo Archives: gentoo-portage-dev

From: "W. Trevor King" <wking@×××××××.us>
To: gentoo-portage-dev@l.g.o
Cc: Rafael Goncalves Martins <rafaelmartins@g.o>, "W. Trevor King" <wking@×××××××.us>
Subject: [gentoo-portage-dev] [PATCH v3 3/4] pym/portage/package/ebuild/fetch.py: Factor out _get_uris
Date: Mon, 20 Jan 2014 03:28:49
Message-Id: 2b4c89a8be60c042d9afd2fb4151b9736afe36ac.1390187967.git.wking@tremily.us
In Reply to: [gentoo-portage-dev] [PATCH v3 0/4] Initial fetch() refactoring by "W. Trevor King"
1 The current fetch() function is quite long, which makes it hard to
2 know what I can change without adverse side effects. By pulling this
3 logic out of the main function, we get clearer logic in fetch() and
4 more explicit input for the config extraction.
5
6 This block was especially complicated, so I also created the helper
7 functions _get_file_uri_tuples and _expand_mirror. I'd prefer if
8 _expand_mirror iterated through URIs instead of (group, URI) tuples,
9 but we need a distinct marker for third-party URIs to build
10 third_party_mirror_uris which is used to build primaryuri_dict which
11 is used way down in fetch():
12
13 if checksum_failure_count == \
14 checksum_failure_primaryuri:
15 # Switch to "primaryuri" mode in order
16 # to increase the probablility of
17 # of success.
18 primaryuris = \
19 primaryuri_dict.get(myfile)
20 if primaryuris:
21 uri_list.extend(
22 reversed(primaryuris))
23
24 I don't know if this is useful enough to motivate the uglier
25 _expandmirror return values, but I'll kick that can down the road for
26 now.
27
28 There was some discussion on the list [1] about the defaults in:
29
30 def _get_uris(uris, settings, custom_mirrors=(), locations=()):
31
32 but I prefer this to Alec's floated:
33
34 def _get_uris(uris, settings, custom_mirrors=None, locations=None):
35 if not custom_mirrors:
36 custom_mirrors = ()
37 if not locations:
38 locations = ()
39
40 Which accomplishes the same thing with less clarity ;). My default
41 values are tuples (and not mutable lists) to make it extra-obvious
42 that we're relying on anything crazy like mutating our default values
43 ;).
44
45 There was also discussion about whether the ugly settings object
46 should be passed to _get_uris, or whether the appropriate settings
47 should be extracted first and then passed through [2]. I've passed
48 settings through, because I'd prefer to have the uglyness handled in
49 helper functions. The fetch() function is what most folks will care
50 about, so we want to keep that as clean as possible. If the helper
51 functions have to suffer a bit for a cleaner fetch(), so be it.
52
53 [1]: http://thread.gmane.org/gmane.linux.gentoo.portage.devel/4002/focus=4041
54 [2]: http://thread.gmane.org/gmane.linux.gentoo.portage.devel/4002/focus=4042
55 ---
56 pym/portage/package/ebuild/fetch.py | 208 ++++++++++++++++++++-------------
57 pym/portage/tests/ebuild/test_fetch.py | 121 +++++++++++++++++++
58 2 files changed, 248 insertions(+), 81 deletions(-)
59
60 diff --git a/pym/portage/package/ebuild/fetch.py b/pym/portage/package/ebuild/fetch.py
61 index 4ecefc9..0093a6e 100644
62 --- a/pym/portage/package/ebuild/fetch.py
63 +++ b/pym/portage/package/ebuild/fetch.py
64 @@ -15,9 +15,9 @@ import sys
65 import tempfile
66
67 try:
68 - from urllib.parse import urlparse
69 + from urllib.parse import urlparse, urlunparse
70 except ImportError:
71 - from urlparse import urlparse
72 + from urlparse import urlparse, urlunparse
73
74 import portage
75 portage.proxy.lazyimport.lazyimport(globals(),
76 @@ -301,6 +301,128 @@ def _get_fetch_resume_size(settings, default='350K'):
77 return v
78
79
80 +def _get_file_uri_tuples(uris):
81 + """Return a list of (filename, URI) tuples."""
82 + file_uri_tuples = []
83 + # Check for 'items' attribute since OrderedDict is not a dict.
84 + if hasattr(uris, 'items'):
85 + for filename, uri_set in uris.items():
86 + for uri in uri_set:
87 + file_uri_tuples.append((filename, uri))
88 + if not uri_set:
89 + file_uri_tuples.append((filename, None))
90 + else:
91 + for uri in uris:
92 + if urlparse(uri).scheme:
93 + file_uri_tuples.append(
94 + (os.path.basename(uri), uri))
95 + else:
96 + file_uri_tuples.append(
97 + (os.path.basename(uri), None))
98 + return file_uri_tuples
99 +
100 +
101 +def _expand_mirror(uri, custom_mirrors=(), third_party_mirrors=()):
102 + """
103 + Replace the 'mirror://' scheme and netloc in the URI.
104 +
105 + Returns an iterable listing expanded (group, URI) tuples,
106 + where the group is either 'custom' or 'third-party'.
107 + """
108 + parsed = urlparse(uri)
109 + mirror = parsed.netloc
110 + path = parsed.path
111 + if path:
112 + # Try user-defined mirrors first
113 + if mirror in custom_mirrors:
114 + for cmirr in custom_mirrors[mirror]:
115 + m_uri = urlparse(cmirr)
116 + yield ('custom', urlunparse((
117 + m_uri.scheme, m_uri.netloc, path) +
118 + parsed[3:]))
119 +
120 + # now try the official mirrors
121 + if mirror in third_party_mirrors:
122 + uris = []
123 + for locmirr in third_party_mirrors[mirror]:
124 + m_uri = urlparse(locmirr)
125 + uris.append(urlunparse((
126 + m_uri.scheme, m_uri.netloc, path) +
127 + parsed[3:]))
128 + random.shuffle(uris)
129 + for uri in uris:
130 + yield ('third-party', uri)
131 +
132 + if (not custom_mirrors.get(mirror, []) and
133 + not third_party_mirrors.get(mirror, [])):
134 + writemsg(
135 + _("No known mirror by the name: %s\n")
136 + % mirror)
137 + else:
138 + writemsg(_("Invalid mirror definition in SRC_URI:\n"),
139 + noiselevel=-1)
140 + writemsg(" %s\n" % uri, noiselevel=-1)
141 +
142 +
143 +def _get_uris(uris, settings, custom_mirrors=(), locations=()):
144 + restrict = settings.get("PORTAGE_RESTRICT", "").split()
145 + restrict_fetch = "fetch" in restrict
146 + restrict_mirror = "mirror" in restrict or "nomirror" in restrict
147 + force_mirror = (
148 + "force-mirror" in settings.features and
149 + not restrict_mirror)
150 +
151 + third_party_mirrors = settings.thirdpartymirrors()
152 + third_party_mirror_uris = {}
153 + filedict = OrderedDict()
154 + primaryuri_dict = {}
155 + for filename, uri in _get_file_uri_tuples(uris=uris):
156 + if filename not in filedict:
157 + filedict[filename] = [
158 + os.path.join(location, 'distfiles', filename)
159 + for location in locations]
160 + if uri is None:
161 + continue
162 + if uri.startswith('mirror://'):
163 + expanded_uris = _expand_mirror(
164 + uri=uri, custom_mirrors=custom_mirrors,
165 + third_party_mirrors=third_party_mirrors)
166 + filedict[filename].extend(
167 + uri for _, uri in expanded_uris)
168 + third_party_mirror_uris.setdefault(filename, []).extend(
169 + uri for group, uri in expanded_uris
170 + if group == 'third-party')
171 + else:
172 + if restrict_fetch or force_mirror:
173 + # Only fetch from specific mirrors is allowed.
174 + continue
175 + primaryuris = primaryuri_dict.get(filename)
176 + if primaryuris is None:
177 + primaryuris = []
178 + primaryuri_dict[filename] = primaryuris
179 + primaryuris.append(uri)
180 +
181 + # Order primaryuri_dict values to match that in SRC_URI.
182 + for uris in primaryuri_dict.values():
183 + uris.reverse()
184 +
185 + # Prefer third_party_mirrors over normal mirrors in cases when
186 + # the file does not yet exist on the normal mirrors.
187 + for filename, uris in third_party_mirror_uris.items():
188 + primaryuri_dict.setdefault(filename, []).extend(uris)
189 +
190 + # Now merge primaryuri values into filedict (includes mirrors
191 + # explicitly referenced in SRC_URI).
192 + if "primaryuri" in restrict:
193 + for filename, uris in filedict.items():
194 + filedict[filename] = primaryuri_dict.get(filename, []) + uris
195 + else:
196 + for filename in filedict:
197 + filedict[filename] += primaryuri_dict.get(filename, [])
198 +
199 + return filedict, primaryuri_dict
200 +
201 +
202 def fetch(myuris, mysettings, listonly=0, fetchonly=0,
203 locks_in_subdir=".locks", use_locks=1, try_mirrors=1, digests=None,
204 allow_missing_digests=True):
205 @@ -332,7 +454,6 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
206 # couple of checksum failures, to increase the probablility
207 # of success before checksum_failure_max_tries is reached.
208 checksum_failure_primaryuri = 2
209 - thirdpartymirrors = mysettings.thirdpartymirrors()
210
211 # In the background parallel-fetch process, it's safe to skip checksum
212 # verification of pre-existing files in $DISTDIR that have the correct
213 @@ -405,7 +526,6 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
214 del mymirrors[x]
215
216 restrict_fetch = "fetch" in restrict
217 - force_mirror = "force-mirror" in features and not restrict_mirror
218 custom_local_mirrors = custommirrors.get("local", [])
219 if restrict_fetch:
220 # With fetch restriction, a normal uri may only be fetched from
221 @@ -416,83 +536,9 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
222 else:
223 locations = mymirrors
224
225 - file_uri_tuples = []
226 - # Check for 'items' attribute since OrderedDict is not a dict.
227 - if hasattr(myuris, 'items'):
228 - for myfile, uri_set in myuris.items():
229 - for myuri in uri_set:
230 - file_uri_tuples.append((myfile, myuri))
231 - if not uri_set:
232 - file_uri_tuples.append((myfile, None))
233 - else:
234 - for myuri in myuris:
235 - if urlparse(myuri).scheme:
236 - file_uri_tuples.append((os.path.basename(myuri), myuri))
237 - else:
238 - file_uri_tuples.append((os.path.basename(myuri), None))
239 -
240 - filedict = OrderedDict()
241 - primaryuri_dict = {}
242 - thirdpartymirror_uris = {}
243 - for myfile, myuri in file_uri_tuples:
244 - if myfile not in filedict:
245 - filedict[myfile]=[]
246 - for y in range(0,len(locations)):
247 - filedict[myfile].append(locations[y]+"/distfiles/"+myfile)
248 - if myuri is None:
249 - continue
250 - if myuri[:9]=="mirror://":
251 - eidx = myuri.find("/", 9)
252 - if eidx != -1:
253 - mirrorname = myuri[9:eidx]
254 - path = myuri[eidx+1:]
255 -
256 - # Try user-defined mirrors first
257 - if mirrorname in custommirrors:
258 - for cmirr in custommirrors[mirrorname]:
259 - filedict[myfile].append(
260 - cmirr.rstrip("/") + "/" + path)
261 -
262 - # now try the official mirrors
263 - if mirrorname in thirdpartymirrors:
264 - uris = [locmirr.rstrip("/") + "/" + path \
265 - for locmirr in thirdpartymirrors[mirrorname]]
266 - random.shuffle(uris)
267 - filedict[myfile].extend(uris)
268 - thirdpartymirror_uris.setdefault(myfile, []).extend(uris)
269 -
270 - if not filedict[myfile]:
271 - writemsg(_("No known mirror by the name: %s\n") % (mirrorname))
272 - else:
273 - writemsg(_("Invalid mirror definition in SRC_URI:\n"), noiselevel=-1)
274 - writemsg(" %s\n" % (myuri), noiselevel=-1)
275 - else:
276 - if restrict_fetch or force_mirror:
277 - # Only fetch from specific mirrors is allowed.
278 - continue
279 - primaryuris = primaryuri_dict.get(myfile)
280 - if primaryuris is None:
281 - primaryuris = []
282 - primaryuri_dict[myfile] = primaryuris
283 - primaryuris.append(myuri)
284 -
285 - # Order primaryuri_dict values to match that in SRC_URI.
286 - for uris in primaryuri_dict.values():
287 - uris.reverse()
288 -
289 - # Prefer thirdpartymirrors over normal mirrors in cases when
290 - # the file does not yet exist on the normal mirrors.
291 - for myfile, uris in thirdpartymirror_uris.items():
292 - primaryuri_dict.setdefault(myfile, []).extend(uris)
293 -
294 - # Now merge primaryuri values into filedict (includes mirrors
295 - # explicitly referenced in SRC_URI).
296 - if "primaryuri" in restrict:
297 - for myfile, uris in filedict.items():
298 - filedict[myfile] = primaryuri_dict.get(myfile, []) + uris
299 - else:
300 - for myfile in filedict:
301 - filedict[myfile] += primaryuri_dict.get(myfile, [])
302 + filedict, primaryuri_dict = _get_uris(
303 + uris=myuris, settings=mysettings,
304 + custom_mirrors=custommirrors, locations=locations)
305
306 can_fetch=True
307
308 diff --git a/pym/portage/tests/ebuild/test_fetch.py b/pym/portage/tests/ebuild/test_fetch.py
309 index 2b06190..9e1635a 100644
310 --- a/pym/portage/tests/ebuild/test_fetch.py
311 +++ b/pym/portage/tests/ebuild/test_fetch.py
312 @@ -1,13 +1,27 @@
313 # Copyright 1998-2013 Gentoo Foundation
314 # Distributed under the terms of the GNU General Public License v2
315
316 +from portage import OrderedDict
317 from portage.package.ebuild.fetch import (
318 _get_checksum_failure_max_tries,
319 _get_fetch_resume_size,
320 + _get_file_uri_tuples,
321 + _expand_mirror,
322 + _get_uris,
323 )
324 from portage.tests import TestCase
325
326
327 +class _Settings(dict):
328 + """Mock settings instance for testing."""
329 + @property
330 + def features(self):
331 + return self['features']
332 +
333 + def thirdpartymirrors(self):
334 + return self['third-party mirrors']
335 +
336 +
337 class FetchTestCase(TestCase):
338 """
339 Test fetch and it's helper functions.
340 @@ -20,6 +34,26 @@ class FetchTestCase(TestCase):
341 them directly (outside of these tests).
342 """
343
344 + _third_party_mirrors = {
345 + 'gentoo': [
346 + 'http://distfiles.gentoo.org/',
347 + 'ftp://ftp.gentoo.org/',
348 + ],
349 + }
350 +
351 + _custom_mirrors = {
352 + 'gentoo': [
353 + 'http://192.168.0.1/',
354 + 'ftp://192.168.0.2/',
355 + ],
356 + }
357 +
358 + _settings = _Settings({
359 + 'PORTAGE_RESTRICT': '',
360 + 'features': [],
361 + 'third-party mirrors': _third_party_mirrors,
362 + })
363 +
364 def test_get_checksum_failure_max_tries(self):
365 self.assertEqual(
366 _get_checksum_failure_max_tries(settings={}),
367 @@ -80,3 +114,90 @@ class FetchTestCase(TestCase):
368 self.assertEqual(
369 _get_fetch_resume_size(settings={}, default='3K'),
370 3072)
371 +
372 + def test_get_file_uri_tuples(self):
373 + self.assertEqual(
374 + _get_file_uri_tuples(uris=[
375 + 'http://host/path',
376 + '/filesystem/other-path',
377 + ]),
378 + [
379 + ('path', 'http://host/path'),
380 + ('other-path', None),
381 + ])
382 + uris = OrderedDict()
383 + uris['path'] = {'http://host/path', 'http://other-host/path'}
384 + uris['other-path'] = {}
385 + self.assertEqual(
386 + _get_file_uri_tuples(uris=uris),
387 + [
388 + ('path', 'http://host/path'),
389 + ('path', 'http://other-host/path'),
390 + ('other-path', None),
391 + ])
392 +
393 + def test_expand_mirror(self):
394 + uris = list(_expand_mirror(
395 + uri='mirror://gentoo/some/file',
396 + custom_mirrors=self._custom_mirrors,
397 + third_party_mirrors=self._third_party_mirrors,
398 + ))
399 + self.assertEqual(
400 + uris[:2],
401 + [
402 + ('custom', 'http://192.168.0.1/some/file'),
403 + ('custom', 'ftp://192.168.0.2/some/file'),
404 + ])
405 + self.assertEqual(
406 + sorted(uris[2:]), # de-randomize
407 + [
408 + ('third-party',
409 + 'ftp://ftp.gentoo.org/some/file'),
410 + ('third-party',
411 + 'http://distfiles.gentoo.org/some/file'),
412 + ])
413 +
414 +
415 + def test_get_uris(self):
416 + files, primary_uris = _get_uris(
417 + uris=[
418 + 'mirror://gentoo/some/file',
419 + 'http://example.net/my/other-file',
420 + ],
421 + settings=self._settings,
422 + custom_mirrors=self._custom_mirrors,
423 + locations=[
424 + 'http://location.net/',
425 + 'ftp://location.com/path',
426 + ],
427 + )
428 + self.assertEqual(list(files.keys()), ['file', 'other-file'])
429 + self.assertEqual(
430 + files['file'][:4],
431 + [
432 + 'http://location.net/distfiles/file',
433 + 'ftp://location.com/path/distfiles/file',
434 + 'http://192.168.0.1/some/file',
435 + 'ftp://192.168.0.2/some/file',
436 + ])
437 + self.assertEqual(
438 + sorted(files['file'][4:]), # de-randomize
439 + [
440 + 'ftp://ftp.gentoo.org/some/file',
441 + 'http://distfiles.gentoo.org/some/file',
442 + ])
443 + self.assertEqual(
444 + files['other-file'],
445 + [
446 + 'http://location.net/distfiles/other-file',
447 + 'ftp://location.com/path/distfiles/other-file',
448 + 'http://example.net/my/other-file',
449 + ])
450 + self.assertEqual(
451 + primary_uris,
452 + {
453 + 'file': [],
454 + 'other-file': [
455 + 'http://example.net/my/other-file',
456 + ],
457 + })
458 --
459 1.8.5.2.8.g0f6c0d1

Replies