Gentoo Archives: gentoo-portage-dev

From: "Michał Górny" <mgorny@g.o>
To: gentoo-portage-dev@l.g.o
Subject: Re: [gentoo-portage-dev] [PATCH v2] fetch: Support GLEP 75 mirror structure
Date: Fri, 04 Oct 2019 05:53:40
Message-Id: 7aad9f446dcfb3da376d57f65a4b2b6c9a5171aa.camel@gentoo.org
In Reply to: Re: [gentoo-portage-dev] [PATCH v2] fetch: Support GLEP 75 mirror structure by Alec Warner
1 On Thu, 2019-10-03 at 22:01 -0700, Alec Warner wrote:
2 > On Thu, Oct 3, 2019 at 9:37 AM Michał Górny <mgorny@g.o> wrote:
3 >
4 > > Add a support for the subset of GLEP 75 needed by Gentoo Infra. This
5 > > includes fetching and parsing layout.conf, and support for flat layout
6 > > and filename-hash layout with cutoffs being multiplies of 4.
7 > >
8 > > Bug: https://bugs.gentoo.org/646898
9 > > Signed-off-by: Michał Górny <mgorny@g.o>
10 > > ---
11 > > lib/portage/package/ebuild/fetch.py | 139 +++++++++++++++++++++++++++-
12 > > 1 file changed, 135 insertions(+), 4 deletions(-)
13 > >
14 > > Changes in v2: switched to a more classy layout to make the code
15 > > reusable in emirrordist.
16 > >
17 > > diff --git a/lib/portage/package/ebuild/fetch.py
18 > > b/lib/portage/package/ebuild/fetch.py
19 > > index 227bf45ae..18e3d390a 100644
20 > > --- a/lib/portage/package/ebuild/fetch.py
21 > > +++ b/lib/portage/package/ebuild/fetch.py
22 > > @@ -7,12 +7,15 @@ __all__ = ['fetch']
23 > >
24 > > import errno
25 > > import io
26 > > +import itertools
27 > > +import json
28 > > import logging
29 > > import random
30 > > import re
31 > > import stat
32 > > import sys
33 > > import tempfile
34 > > +import time
35 > >
36 > > from collections import OrderedDict
37 > >
38 > > @@ -27,14 +30,17 @@ portage.proxy.lazyimport.lazyimport(globals(),
39 > > 'portage.package.ebuild.doebuild:doebuild_environment,' + \
40 > > '_doebuild_spawn',
41 > > 'portage.package.ebuild.prepare_build_dirs:prepare_build_dirs',
42 > > +
43 > > 'portage.util.configparser:SafeConfigParser,read_configs,NoOptionError',
44 > > + 'portage.util._urlopen:urlopen',
45 > > )
46 > >
47 > > from portage import os, selinux, shutil, _encodings, \
48 > > _movefile, _shell_quote, _unicode_encode
49 > > from portage.checksum import (get_valid_checksum_keys, perform_md5,
50 > > verify_all,
51 > > - _filter_unaccelarated_hashes, _hash_filter, _apply_hash_filter)
52 > > + _filter_unaccelarated_hashes, _hash_filter, _apply_hash_filter,
53 > > + checksum_str)
54 > > from portage.const import BASH_BINARY, CUSTOM_MIRRORS_FILE, \
55 > > - GLOBAL_CONFIG_PATH
56 > > + GLOBAL_CONFIG_PATH, CACHE_PATH
57 > > from portage.data import portage_gid, portage_uid, secpass,
58 > > userpriv_groups
59 > > from portage.exception import FileNotFound, OperationNotPermitted, \
60 > > PortageException, TryAgain
61 > > @@ -253,6 +259,130 @@ _size_suffix_map = {
62 > > 'Y' : 80,
63 > > }
64 > >
65 > > +
66 > > +class FlatLayout(object):
67 > > + def get_path(self, filename):
68 > > + return filename
69 > > +
70 > > +
71 > > +class FilenameHashLayout(object):
72 > > + def __init__(self, algo, cutoffs):
73 > > + self.algo = algo
74 > > + self.cutoffs = [int(x) for x in cutoffs.split(':')]
75 > > +
76 > > + def get_path(self, filename):
77 > > + fnhash = checksum_str(filename.encode('utf8'), self.algo)
78 > > + ret = ''
79 > > + for c in self.cutoffs:
80 > > + assert c % 4 == 0
81 > >
82 >
83 > I'm not quite sure what this assert is doing. I'm not super in favor of
84 > asserts (I'd rather see an exception like raise FooError("..."), but if you
85 > are going to use it please use something like:
86 >
87 > assert c %4 == 0, "Some description of why we put this assert here so if it
88 > fires we can do something useful."
89
90 It's already checked in validate_structure(). Maybe I could indirect it
91 to this class to make things clearer.
92
93 >
94 > + c = c // 4
95 > > + ret += fnhash[:c] + '/'
96 > > + fnhash = fnhash[c:]
97 > > + return ret + filename
98 > > +
99 > > +
100 > > +class MirrorLayoutConfig(object):
101 > > + """
102 > > + Class to read layout.conf from a mirror.
103 > > + """
104 > > +
105 > > + def __init__(self):
106 > > + self.structure = ()
107 > > +
108 > > + def read_from_file(self, f):
109 > > + cp = SafeConfigParser()
110 > > + read_configs(cp, [f])
111 > > + vals = []
112 > > + for i in itertools.count():
113 > > + try:
114 > > + vals.append(tuple(cp.get('structure', '%d'
115 > > % i).split()))
116 > > + except NoOptionError:
117 > > + break
118 > > + self.structure = tuple(vals)
119 > > +
120 > > + def serialize(self):
121 > > + return self.structure
122 > > +
123 > > + def deserialize(self, data):
124 > > + self.structure = data
125 > > +
126 > > + @staticmethod
127 > > + def validate_structure(val):
128 > > + if val == ('flat',):
129 > > + return True
130 > > + if val[0] == 'filename-hash' and len(val) == 3:
131 > > + if val[1] not in get_valid_checksum_keys():
132 > > + return False
133 > > + # validate cutoffs
134 > > + for c in val[2].split(':'):
135 > > + try:
136 > > + c = int(c)
137 > > + except ValueError:
138 > > + break
139 > > + else:
140 > > + if c % 4 != 0:
141 > > + break
142 > > + else:
143 > > + return True
144 > > + return False
145 > > + return False
146 > > +
147 > > + def get_best_supported_layout(self):
148 > > + for val in self.structure:
149 > > + if self.validate_structure(val):
150 > > + if val[0] == 'flat':
151 > > + return FlatLayout()
152 > > + elif val[0] == 'filename-hash':
153 > > + return FilenameHashLayout(val[1],
154 > > val[2])
155 > > + else:
156 > > + # fallback
157 > > + return FlatLayout()
158 > > +
159 > > +
160 > > +def get_mirror_url(mirror_url, filename, eroot):
161 > > + """
162 > > + Get correct fetch URL for a given file, accounting for mirror
163 > > + layout configuration.
164 > > +
165 > > + @param mirror_url: Base URL to the mirror (without '/distfiles')
166 > > + @param filename: Filename to fetch
167 > > + @param eroot: EROOT to use for the cache file
168 > > + @return: Full URL to fetch
169 > > + """
170 > > +
171 > > + mirror_conf = MirrorLayoutConfig()
172 > > +
173 > > + cache_file = os.path.join(eroot, CACHE_PATH,
174 > > 'mirror-metadata.json')
175 > > + try:
176 > > + with open(cache_file, 'r') as f:
177 > > + cache = json.load(f)
178 > > + except (IOError, ValueError):
179 > > + cache = {}
180 > > +
181 > > + ts, data = cache.get(mirror_url, (0, None))
182 > > + # refresh at least daily
183 > > + if ts >= time.time() - 86400:
184 > > + mirror_conf.deserialize(data)
185 > > + else:
186 > > + try:
187 > > + f = urlopen(mirror_url + '/distfiles/layout.conf')
188 > > + try:
189 > > + data = io.StringIO(f.read().decode('utf8'))
190 > > + finally:
191 > > + f.close()
192 > > +
193 > > + mirror_conf.read_from_file(data)
194 > > + except IOError:
195 > > + pass
196 > > +
197 > > + cache[mirror_url] = (time.time(), mirror_conf.serialize())
198 > > + with open(cache_file, 'w') as f:
199 > > + json.dump(cache, f)
200 > > +
201 > > + return (mirror_url + "/distfiles/" +
202 > > +
203 > > mirror_conf.get_best_supported_layout().get_path(filename))
204 > > +
205 > > +
206 > > def fetch(myuris, mysettings, listonly=0, fetchonly=0,
207 > > locks_in_subdir=".locks", use_locks=1, try_mirrors=1, digests=None,
208 > > allow_missing_digests=True):
209 > > @@ -434,8 +564,9 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
210 > > for myfile, myuri in file_uri_tuples:
211 > > if myfile not in filedict:
212 > > filedict[myfile]=[]
213 > > - for y in range(0,len(locations)):
214 > > -
215 > > filedict[myfile].append(locations[y]+"/distfiles/"+myfile)
216 > > + for l in locations:
217 > > + filedict[myfile].append(get_mirror_url(l,
218 > > myfile,
219 > > + mysettings["EROOT"]))
220 > > if myuri is None:
221 > > continue
222 > > if myuri[:9]=="mirror://":
223 > > --
224 > > 2.23.0
225 > >
226 > >
227 > >
228
229 --
230 Best regards,
231 Michał Górny

Attachments

File name MIME type
signature.asc application/pgp-signature