Gentoo Archives: gentoo-portage-dev

From: Alec Warner <antarus@g.o>
To: gentoo-portage-dev@l.g.o
Cc: "Michał Górny" <mgorny@g.o>
Subject: Re: [gentoo-portage-dev] [PATCH v2] fetch: Support GLEP 75 mirror structure
Date: Fri, 04 Oct 2019 05:01:45
Message-Id: CAAr7Pr-V6CNvDiwL7PziQx5CLDNO110WFva+pzN2TUHfTOgaeQ@mail.gmail.com
In Reply to: [gentoo-portage-dev] [PATCH v2] fetch: Support GLEP 75 mirror structure by "Michał Górny"
1 On Thu, Oct 3, 2019 at 9:37 AM Michał Górny <mgorny@g.o> wrote:
2
3 > Add a support for the subset of GLEP 75 needed by Gentoo Infra. This
4 > includes fetching and parsing layout.conf, and support for flat layout
5 > and filename-hash layout with cutoffs being multiplies of 4.
6 >
7 > Bug: https://bugs.gentoo.org/646898
8 > Signed-off-by: Michał Górny <mgorny@g.o>
9 > ---
10 > lib/portage/package/ebuild/fetch.py | 139 +++++++++++++++++++++++++++-
11 > 1 file changed, 135 insertions(+), 4 deletions(-)
12 >
13 > Changes in v2: switched to a more classy layout to make the code
14 > reusable in emirrordist.
15 >
16 > diff --git a/lib/portage/package/ebuild/fetch.py
17 > b/lib/portage/package/ebuild/fetch.py
18 > index 227bf45ae..18e3d390a 100644
19 > --- a/lib/portage/package/ebuild/fetch.py
20 > +++ b/lib/portage/package/ebuild/fetch.py
21 > @@ -7,12 +7,15 @@ __all__ = ['fetch']
22 >
23 > import errno
24 > import io
25 > +import itertools
26 > +import json
27 > import logging
28 > import random
29 > import re
30 > import stat
31 > import sys
32 > import tempfile
33 > +import time
34 >
35 > from collections import OrderedDict
36 >
37 > @@ -27,14 +30,17 @@ portage.proxy.lazyimport.lazyimport(globals(),
38 > 'portage.package.ebuild.doebuild:doebuild_environment,' + \
39 > '_doebuild_spawn',
40 > 'portage.package.ebuild.prepare_build_dirs:prepare_build_dirs',
41 > +
42 > 'portage.util.configparser:SafeConfigParser,read_configs,NoOptionError',
43 > + 'portage.util._urlopen:urlopen',
44 > )
45 >
46 > from portage import os, selinux, shutil, _encodings, \
47 > _movefile, _shell_quote, _unicode_encode
48 > from portage.checksum import (get_valid_checksum_keys, perform_md5,
49 > verify_all,
50 > - _filter_unaccelarated_hashes, _hash_filter, _apply_hash_filter)
51 > + _filter_unaccelarated_hashes, _hash_filter, _apply_hash_filter,
52 > + checksum_str)
53 > from portage.const import BASH_BINARY, CUSTOM_MIRRORS_FILE, \
54 > - GLOBAL_CONFIG_PATH
55 > + GLOBAL_CONFIG_PATH, CACHE_PATH
56 > from portage.data import portage_gid, portage_uid, secpass,
57 > userpriv_groups
58 > from portage.exception import FileNotFound, OperationNotPermitted, \
59 > PortageException, TryAgain
60 > @@ -253,6 +259,130 @@ _size_suffix_map = {
61 > 'Y' : 80,
62 > }
63 >
64 > +
65 > +class FlatLayout(object):
66 > + def get_path(self, filename):
67 > + return filename
68 > +
69 > +
70 > +class FilenameHashLayout(object):
71 > + def __init__(self, algo, cutoffs):
72 > + self.algo = algo
73 > + self.cutoffs = [int(x) for x in cutoffs.split(':')]
74 > +
75 > + def get_path(self, filename):
76 > + fnhash = checksum_str(filename.encode('utf8'), self.algo)
77 > + ret = ''
78 > + for c in self.cutoffs:
79 > + assert c % 4 == 0
80 >
81
82 I'm not quite sure what this assert is doing. I'm not super in favor of
83 asserts (I'd rather see an exception like raise FooError("..."), but if you
84 are going to use it please use something like:
85
86 assert c %4 == 0, "Some description of why we put this assert here so if it
87 fires we can do something useful."
88
89 + c = c // 4
90 > + ret += fnhash[:c] + '/'
91 > + fnhash = fnhash[c:]
92 > + return ret + filename
93 > +
94 > +
95 > +class MirrorLayoutConfig(object):
96 > + """
97 > + Class to read layout.conf from a mirror.
98 > + """
99 > +
100 > + def __init__(self):
101 > + self.structure = ()
102 > +
103 > + def read_from_file(self, f):
104 > + cp = SafeConfigParser()
105 > + read_configs(cp, [f])
106 > + vals = []
107 > + for i in itertools.count():
108 > + try:
109 > + vals.append(tuple(cp.get('structure', '%d'
110 > % i).split()))
111 > + except NoOptionError:
112 > + break
113 > + self.structure = tuple(vals)
114 > +
115 > + def serialize(self):
116 > + return self.structure
117 > +
118 > + def deserialize(self, data):
119 > + self.structure = data
120 > +
121 > + @staticmethod
122 > + def validate_structure(val):
123 > + if val == ('flat',):
124 > + return True
125 > + if val[0] == 'filename-hash' and len(val) == 3:
126 > + if val[1] not in get_valid_checksum_keys():
127 > + return False
128 > + # validate cutoffs
129 > + for c in val[2].split(':'):
130 > + try:
131 > + c = int(c)
132 > + except ValueError:
133 > + break
134 > + else:
135 > + if c % 4 != 0:
136 > + break
137 > + else:
138 > + return True
139 > + return False
140 > + return False
141 > +
142 > + def get_best_supported_layout(self):
143 > + for val in self.structure:
144 > + if self.validate_structure(val):
145 > + if val[0] == 'flat':
146 > + return FlatLayout()
147 > + elif val[0] == 'filename-hash':
148 > + return FilenameHashLayout(val[1],
149 > val[2])
150 > + else:
151 > + # fallback
152 > + return FlatLayout()
153 > +
154 > +
155 > +def get_mirror_url(mirror_url, filename, eroot):
156 > + """
157 > + Get correct fetch URL for a given file, accounting for mirror
158 > + layout configuration.
159 > +
160 > + @param mirror_url: Base URL to the mirror (without '/distfiles')
161 > + @param filename: Filename to fetch
162 > + @param eroot: EROOT to use for the cache file
163 > + @return: Full URL to fetch
164 > + """
165 > +
166 > + mirror_conf = MirrorLayoutConfig()
167 > +
168 > + cache_file = os.path.join(eroot, CACHE_PATH,
169 > 'mirror-metadata.json')
170 > + try:
171 > + with open(cache_file, 'r') as f:
172 > + cache = json.load(f)
173 > + except (IOError, ValueError):
174 > + cache = {}
175 > +
176 > + ts, data = cache.get(mirror_url, (0, None))
177 > + # refresh at least daily
178 > + if ts >= time.time() - 86400:
179 > + mirror_conf.deserialize(data)
180 > + else:
181 > + try:
182 > + f = urlopen(mirror_url + '/distfiles/layout.conf')
183 > + try:
184 > + data = io.StringIO(f.read().decode('utf8'))
185 > + finally:
186 > + f.close()
187 > +
188 > + mirror_conf.read_from_file(data)
189 > + except IOError:
190 > + pass
191 > +
192 > + cache[mirror_url] = (time.time(), mirror_conf.serialize())
193 > + with open(cache_file, 'w') as f:
194 > + json.dump(cache, f)
195 > +
196 > + return (mirror_url + "/distfiles/" +
197 > +
198 > mirror_conf.get_best_supported_layout().get_path(filename))
199 > +
200 > +
201 > def fetch(myuris, mysettings, listonly=0, fetchonly=0,
202 > locks_in_subdir=".locks", use_locks=1, try_mirrors=1, digests=None,
203 > allow_missing_digests=True):
204 > @@ -434,8 +564,9 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
205 > for myfile, myuri in file_uri_tuples:
206 > if myfile not in filedict:
207 > filedict[myfile]=[]
208 > - for y in range(0,len(locations)):
209 > -
210 > filedict[myfile].append(locations[y]+"/distfiles/"+myfile)
211 > + for l in locations:
212 > + filedict[myfile].append(get_mirror_url(l,
213 > myfile,
214 > + mysettings["EROOT"]))
215 > if myuri is None:
216 > continue
217 > if myuri[:9]=="mirror://":
218 > --
219 > 2.23.0
220 >
221 >
222 >

Replies