1 |
On Thu, Oct 3, 2019 at 7:52 AM Michał Górny <mgorny@g.o> wrote: |
2 |
|
3 |
> Add a support for the subset of GLEP 75 needed by Gentoo Infra. This |
4 |
> includes fetching and parsing layout.conf, and support for flat layout |
5 |
> and filename-hash layout with cutoffs being multiplies of 4. |
6 |
> |
7 |
> Bug: https://bugs.gentoo.org/646898 |
8 |
> Signed-off-by: Michał Górny <mgorny@g.o> |
9 |
> --- |
10 |
> lib/portage/package/ebuild/fetch.py | 113 +++++++++++++++++++++++++++- |
11 |
> 1 file changed, 109 insertions(+), 4 deletions(-) |
12 |
> |
13 |
> diff --git a/lib/portage/package/ebuild/fetch.py |
14 |
> b/lib/portage/package/ebuild/fetch.py |
15 |
> index 227bf45ae..692efcc01 100644 |
16 |
> --- a/lib/portage/package/ebuild/fetch.py |
17 |
> +++ b/lib/portage/package/ebuild/fetch.py |
18 |
> @@ -7,12 +7,15 @@ __all__ = ['fetch'] |
19 |
> |
20 |
> import errno |
21 |
> import io |
22 |
> +import itertools |
23 |
> +import json |
24 |
> import logging |
25 |
> import random |
26 |
> import re |
27 |
> import stat |
28 |
> import sys |
29 |
> import tempfile |
30 |
> +import time |
31 |
> |
32 |
> from collections import OrderedDict |
33 |
> |
34 |
> @@ -27,14 +30,17 @@ portage.proxy.lazyimport.lazyimport(globals(), |
35 |
> 'portage.package.ebuild.doebuild:doebuild_environment,' + \ |
36 |
> '_doebuild_spawn', |
37 |
> 'portage.package.ebuild.prepare_build_dirs:prepare_build_dirs', |
38 |
> + |
39 |
> 'portage.util.configparser:SafeConfigParser,read_configs,NoOptionError', |
40 |
> + 'portage.util._urlopen:urlopen', |
41 |
> ) |
42 |
> |
43 |
> from portage import os, selinux, shutil, _encodings, \ |
44 |
> _movefile, _shell_quote, _unicode_encode |
45 |
> from portage.checksum import (get_valid_checksum_keys, perform_md5, |
46 |
> verify_all, |
47 |
> - _filter_unaccelarated_hashes, _hash_filter, _apply_hash_filter) |
48 |
> + _filter_unaccelarated_hashes, _hash_filter, _apply_hash_filter, |
49 |
> + checksum_str) |
50 |
> from portage.const import BASH_BINARY, CUSTOM_MIRRORS_FILE, \ |
51 |
> - GLOBAL_CONFIG_PATH |
52 |
> + GLOBAL_CONFIG_PATH, CACHE_PATH |
53 |
> from portage.data import portage_gid, portage_uid, secpass, |
54 |
> userpriv_groups |
55 |
> from portage.exception import FileNotFound, OperationNotPermitted, \ |
56 |
> PortageException, TryAgain |
57 |
> @@ -253,6 +259,104 @@ _size_suffix_map = { |
58 |
> 'Y' : 80, |
59 |
> } |
60 |
> |
61 |
> + |
62 |
> +def filename_hash_path(filename, algo, cutoffs): |
63 |
> + """ |
64 |
> + Get directory path for filename in filename-hash mirror structure. |
65 |
> + |
66 |
> + @param filename: Filename to fetch |
67 |
> + @param algo: Hash algorithm |
68 |
> + @param cutoffs: Cutoff values (n:n...) |
69 |
> + @return: Directory path |
70 |
> + """ |
71 |
> + |
72 |
> + fnhash = checksum_str(filename.encode('utf8'), algo) |
73 |
> + ret = '' |
74 |
> + for c in cutoffs.split(':'): |
75 |
> + c = int(c) // 4 |
76 |
> + ret += fnhash[:c] + '/' |
77 |
> |
78 |
|
79 |
When making a path, please use os.path.join() |
80 |
|
81 |
|
82 |
> + fnhash = fnhash[c:] |
83 |
> + return ret |
84 |
> + |
85 |
> + |
86 |
> +def get_mirror_url(mirror_url, filename, eroot): |
87 |
> + """ |
88 |
> + Get correct fetch URL for a given file, accounting for mirror |
89 |
> + layout configuration. |
90 |
> + |
91 |
> + @param mirror_url: Base URL to the mirror (without '/distfiles') |
92 |
> + @param filename: Filename to fetch |
93 |
> + @param eroot: EROOT to use for the cache file |
94 |
> + @return: Full URL to fetch |
95 |
> + """ |
96 |
> |
97 |
+ |
98 |
> + cache_file = os.path.join(eroot, CACHE_PATH, |
99 |
> 'mirror-metadata.json') |
100 |
> + try: |
101 |
> + with open(cache_file, 'r') as f: |
102 |
> + cache = json.load(f) |
103 |
> + except (IOError, ValueError): |
104 |
> + cache = {} |
105 |
> |
106 |
|
107 |
I'm a bit worried that we are opening this cache file off of disk every |
108 |
time we call get_mirror_url(). Can we just cache the contents in memory |
109 |
between calls; or even better pass the cache in as argument rather than it |
110 |
be contained in get_mirror_url? |
111 |
|
112 |
|
113 |
> + |
114 |
> + ts, layout = cache.get(mirror_url, (0, None)) |
115 |
> + # refresh at least daily |
116 |
> + if ts < time.time() - 86400: |
117 |
> + # the default |
118 |
> + layout = ('flat',) |
119 |
> + |
120 |
> + try: |
121 |
> + f = urlopen(mirror_url + '/distfiles/layout.conf') |
122 |
> + try: |
123 |
> + data = io.StringIO(f.read().decode('utf8')) |
124 |
> + finally: |
125 |
> + f.close() |
126 |
> + cp = SafeConfigParser() |
127 |
> + read_configs(cp, [data]) |
128 |
> + |
129 |
> + for i in itertools.count(): |
130 |
> + try: |
131 |
> + val = tuple(cp.get('structure', |
132 |
> '%d' % i).split()) |
133 |
> + if val == ('flat',): |
134 |
> + pass |
135 |
> + elif val[0] == 'filename-hash' and |
136 |
> len(val) == 3: |
137 |
> + if val[1] not in |
138 |
> get_valid_checksum_keys(): |
139 |
> + continue |
140 |
> + # validate cutoffs |
141 |
> + cutoffs_good = False |
142 |
> + for c in val[2].split(':'): |
143 |
> + try: |
144 |
> + c = int(c) |
145 |
> + except ValueError: |
146 |
> + break |
147 |
> + else: |
148 |
> + if c % 4 |
149 |
> != 0: |
150 |
> + |
151 |
> break |
152 |
> + else: |
153 |
> + cutoffs_good = True |
154 |
> + if not cutoffs_good: |
155 |
> + continue |
156 |
> + else: |
157 |
> + # (skip unsupported |
158 |
> variant) |
159 |
> + continue |
160 |
> + layout = val |
161 |
> + break |
162 |
> + except NoOptionError: |
163 |
> + break |
164 |
> + except IOError: |
165 |
> + pass |
166 |
> + |
167 |
> + cache[mirror_url] = (time.time(), layout) |
168 |
> + with open(cache_file, 'w') as f: |
169 |
> + json.dump(cache, f) |
170 |
> + |
171 |
> + if layout[0] == 'flat': |
172 |
> + return mirror_url + "/distfiles/" + filename |
173 |
> + elif layout[0] == 'filename-hash': |
174 |
> + return (mirror_url + "/distfiles/" + |
175 |
> + filename_hash_path(filename, *layout[1:]) |
176 |
> + filename) |
177 |
> + else: |
178 |
> + raise AssertionError("get_mirror_url() got unknown layout |
179 |
> type") |
180 |
> |
181 |
|
182 |
raise AssertionError("get_mirror_url() got unknown layout type %s wanted |
183 |
one of %s" % (layout[0], ('flat', 'filename-hash'))) |
184 |
|
185 |
E.g. if you got an unknown thing, it's nice to print what you want and what |
186 |
you wanted so callers can fix it. |
187 |
|
188 |
|
189 |
> + |
190 |
> + |
191 |
> def fetch(myuris, mysettings, listonly=0, fetchonly=0, |
192 |
> locks_in_subdir=".locks", use_locks=1, try_mirrors=1, digests=None, |
193 |
> allow_missing_digests=True): |
194 |
> @@ -434,8 +538,9 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0, |
195 |
> for myfile, myuri in file_uri_tuples: |
196 |
> if myfile not in filedict: |
197 |
> filedict[myfile]=[] |
198 |
> - for y in range(0,len(locations)): |
199 |
> - |
200 |
> filedict[myfile].append(locations[y]+"/distfiles/"+myfile) |
201 |
> + for l in locations: |
202 |
> + filedict[myfile].append(get_mirror_url(l, |
203 |
> myfile, |
204 |
> + mysettings["EROOT"])) |
205 |
> if myuri is None: |
206 |
> continue |
207 |
> if myuri[:9]=="mirror://": |
208 |
> -- |
209 |
> 2.23.0 |
210 |
> |
211 |
> |
212 |
> |