Gentoo Archives: gentoo-portage-dev

From: "Michał Górny" <mgorny@g.o>
To: gentoo-portage-dev@l.g.o
Subject: Re: [gentoo-portage-dev] [PATCH] fetch: Support GLEP 75 mirror structure
Date: Fri, 04 Oct 2019 05:51:26
Message-Id: 28cb5656b0755a1fe07df1eadd5e3683607064e2.camel@gentoo.org
In Reply to: Re: [gentoo-portage-dev] [PATCH] fetch: Support GLEP 75 mirror structure by Alec Warner
1 On Thu, 2019-10-03 at 21:58 -0700, Alec Warner wrote:
2 > On Thu, Oct 3, 2019 at 7:52 AM Michał Górny <mgorny@g.o> wrote:
3 >
4 > > Add a support for the subset of GLEP 75 needed by Gentoo Infra. This
5 > > includes fetching and parsing layout.conf, and support for flat layout
6 > > and filename-hash layout with cutoffs being multiplies of 4.
7 > >
8 > > Bug: https://bugs.gentoo.org/646898
9 > > Signed-off-by: Michał Górny <mgorny@g.o>
10 > > ---
11 > > lib/portage/package/ebuild/fetch.py | 113 +++++++++++++++++++++++++++-
12 > > 1 file changed, 109 insertions(+), 4 deletions(-)
13 > >
14 > > diff --git a/lib/portage/package/ebuild/fetch.py
15 > > b/lib/portage/package/ebuild/fetch.py
16 > > index 227bf45ae..692efcc01 100644
17 > > --- a/lib/portage/package/ebuild/fetch.py
18 > > +++ b/lib/portage/package/ebuild/fetch.py
19 > > @@ -7,12 +7,15 @@ __all__ = ['fetch']
20 > >
21 > > import errno
22 > > import io
23 > > +import itertools
24 > > +import json
25 > > import logging
26 > > import random
27 > > import re
28 > > import stat
29 > > import sys
30 > > import tempfile
31 > > +import time
32 > >
33 > > from collections import OrderedDict
34 > >
35 > > @@ -27,14 +30,17 @@ portage.proxy.lazyimport.lazyimport(globals(),
36 > > 'portage.package.ebuild.doebuild:doebuild_environment,' + \
37 > > '_doebuild_spawn',
38 > > 'portage.package.ebuild.prepare_build_dirs:prepare_build_dirs',
39 > > +
40 > > 'portage.util.configparser:SafeConfigParser,read_configs,NoOptionError',
41 > > + 'portage.util._urlopen:urlopen',
42 > > )
43 > >
44 > > from portage import os, selinux, shutil, _encodings, \
45 > > _movefile, _shell_quote, _unicode_encode
46 > > from portage.checksum import (get_valid_checksum_keys, perform_md5,
47 > > verify_all,
48 > > - _filter_unaccelarated_hashes, _hash_filter, _apply_hash_filter)
49 > > + _filter_unaccelarated_hashes, _hash_filter, _apply_hash_filter,
50 > > + checksum_str)
51 > > from portage.const import BASH_BINARY, CUSTOM_MIRRORS_FILE, \
52 > > - GLOBAL_CONFIG_PATH
53 > > + GLOBAL_CONFIG_PATH, CACHE_PATH
54 > > from portage.data import portage_gid, portage_uid, secpass,
55 > > userpriv_groups
56 > > from portage.exception import FileNotFound, OperationNotPermitted, \
57 > > PortageException, TryAgain
58 > > @@ -253,6 +259,104 @@ _size_suffix_map = {
59 > > 'Y' : 80,
60 > > }
61 > >
62 > > +
63 > > +def filename_hash_path(filename, algo, cutoffs):
64 > > + """
65 > > + Get directory path for filename in filename-hash mirror structure.
66 > > +
67 > > + @param filename: Filename to fetch
68 > > + @param algo: Hash algorithm
69 > > + @param cutoffs: Cutoff values (n:n...)
70 > > + @return: Directory path
71 > > + """
72 > > +
73 > > + fnhash = checksum_str(filename.encode('utf8'), algo)
74 > > + ret = ''
75 > > + for c in cutoffs.split(':'):
76 > > + c = int(c) // 4
77 > > + ret += fnhash[:c] + '/'
78 > >
79 >
80 > When making a path, please use os.path.join()
81
82 This is URL, not a path.
83
84 >
85 >
86 > > + fnhash = fnhash[c:]
87 > > + return ret
88 > > +
89 > > +
90 > > +def get_mirror_url(mirror_url, filename, eroot):
91 > > + """
92 > > + Get correct fetch URL for a given file, accounting for mirror
93 > > + layout configuration.
94 > > +
95 > > + @param mirror_url: Base URL to the mirror (without '/distfiles')
96 > > + @param filename: Filename to fetch
97 > > + @param eroot: EROOT to use for the cache file
98 > > + @return: Full URL to fetch
99 > > + """
100 > >
101 > +
102 > > + cache_file = os.path.join(eroot, CACHE_PATH,
103 > > 'mirror-metadata.json')
104 > > + try:
105 > > + with open(cache_file, 'r') as f:
106 > > + cache = json.load(f)
107 > > + except (IOError, ValueError):
108 > > + cache = {}
109 > >
110 >
111 > I'm a bit worried that we are opening this cache file off of disk every
112 > time we call get_mirror_url(). Can we just cache the contents in memory
113 > between calls; or even better pass the cache in as argument rather than it
114 > be contained in get_mirror_url?
115
116 We could but this is no bottleneck. That's premature optimization,
117 the way I see it.
118
119 >
120 >
121 > > +
122 > > + ts, layout = cache.get(mirror_url, (0, None))
123 > > + # refresh at least daily
124 > > + if ts < time.time() - 86400:
125 > > + # the default
126 > > + layout = ('flat',)
127 > > +
128 > > + try:
129 > > + f = urlopen(mirror_url + '/distfiles/layout.conf')
130 > > + try:
131 > > + data = io.StringIO(f.read().decode('utf8'))
132 > > + finally:
133 > > + f.close()
134 > > + cp = SafeConfigParser()
135 > > + read_configs(cp, [data])
136 > > +
137 > > + for i in itertools.count():
138 > > + try:
139 > > + val = tuple(cp.get('structure',
140 > > '%d' % i).split())
141 > > + if val == ('flat',):
142 > > + pass
143 > > + elif val[0] == 'filename-hash' and
144 > > len(val) == 3:
145 > > + if val[1] not in
146 > > get_valid_checksum_keys():
147 > > + continue
148 > > + # validate cutoffs
149 > > + cutoffs_good = False
150 > > + for c in val[2].split(':'):
151 > > + try:
152 > > + c = int(c)
153 > > + except ValueError:
154 > > + break
155 > > + else:
156 > > + if c % 4
157 > > != 0:
158 > > +
159 > > break
160 > > + else:
161 > > + cutoffs_good = True
162 > > + if not cutoffs_good:
163 > > + continue
164 > > + else:
165 > > + # (skip unsupported
166 > > variant)
167 > > + continue
168 > > + layout = val
169 > > + break
170 > > + except NoOptionError:
171 > > + break
172 > > + except IOError:
173 > > + pass
174 > > +
175 > > + cache[mirror_url] = (time.time(), layout)
176 > > + with open(cache_file, 'w') as f:
177 > > + json.dump(cache, f)
178 > > +
179 > > + if layout[0] == 'flat':
180 > > + return mirror_url + "/distfiles/" + filename
181 > > + elif layout[0] == 'filename-hash':
182 > > + return (mirror_url + "/distfiles/" +
183 > > + filename_hash_path(filename, *layout[1:])
184 > > + filename)
185 > > + else:
186 > > + raise AssertionError("get_mirror_url() got unknown layout
187 > > type")
188 > >
189 >
190 > raise AssertionError("get_mirror_url() got unknown layout type %s wanted
191 > one of %s" % (layout[0], ('flat', 'filename-hash')))
192 >
193 > E.g. if you got an unknown thing, it's nice to print what you want and what
194 > you wanted so callers can fix it.
195
196 Assertions are not for callers. They merely check that half
197 of the functions wasn't updated, and the other half left.
198
199 >
200 >
201 > > +
202 > > +
203 > > def fetch(myuris, mysettings, listonly=0, fetchonly=0,
204 > > locks_in_subdir=".locks", use_locks=1, try_mirrors=1, digests=None,
205 > > allow_missing_digests=True):
206 > > @@ -434,8 +538,9 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
207 > > for myfile, myuri in file_uri_tuples:
208 > > if myfile not in filedict:
209 > > filedict[myfile]=[]
210 > > - for y in range(0,len(locations)):
211 > > -
212 > > filedict[myfile].append(locations[y]+"/distfiles/"+myfile)
213 > > + for l in locations:
214 > > + filedict[myfile].append(get_mirror_url(l,
215 > > myfile,
216 > > + mysettings["EROOT"]))
217 > > if myuri is None:
218 > > continue
219 > > if myuri[:9]=="mirror://":
220 > > --
221 > > 2.23.0
222 > >
223 > >
224 > >
225
226 --
227 Best regards,
228 Michał Górny

Attachments

File name MIME type
signature.asc application/pgp-signature