Gentoo Archives: gentoo-portage-dev

From: Alec Warner <antarus@g.o>
To: gentoo-portage-dev@l.g.o
Cc: "Michał Górny" <mgorny@g.o>
Subject: Re: [gentoo-portage-dev] [PATCH] fetch: Support GLEP 75 mirror structure
Date: Fri, 04 Oct 2019 04:58:37
Message-Id: CAAr7Pr-9ZvfLA=kFR7Q29PtY5bXXpYjY2=msJ81Lsdo9_wC5mA@mail.gmail.com
In Reply to: [gentoo-portage-dev] [PATCH] fetch: Support GLEP 75 mirror structure by "Michał Górny"
1 On Thu, Oct 3, 2019 at 7:52 AM Michał Górny <mgorny@g.o> wrote:
2
3 > Add a support for the subset of GLEP 75 needed by Gentoo Infra. This
4 > includes fetching and parsing layout.conf, and support for flat layout
5 > and filename-hash layout with cutoffs being multiplies of 4.
6 >
7 > Bug: https://bugs.gentoo.org/646898
8 > Signed-off-by: Michał Górny <mgorny@g.o>
9 > ---
10 > lib/portage/package/ebuild/fetch.py | 113 +++++++++++++++++++++++++++-
11 > 1 file changed, 109 insertions(+), 4 deletions(-)
12 >
13 > diff --git a/lib/portage/package/ebuild/fetch.py
14 > b/lib/portage/package/ebuild/fetch.py
15 > index 227bf45ae..692efcc01 100644
16 > --- a/lib/portage/package/ebuild/fetch.py
17 > +++ b/lib/portage/package/ebuild/fetch.py
18 > @@ -7,12 +7,15 @@ __all__ = ['fetch']
19 >
20 > import errno
21 > import io
22 > +import itertools
23 > +import json
24 > import logging
25 > import random
26 > import re
27 > import stat
28 > import sys
29 > import tempfile
30 > +import time
31 >
32 > from collections import OrderedDict
33 >
34 > @@ -27,14 +30,17 @@ portage.proxy.lazyimport.lazyimport(globals(),
35 > 'portage.package.ebuild.doebuild:doebuild_environment,' + \
36 > '_doebuild_spawn',
37 > 'portage.package.ebuild.prepare_build_dirs:prepare_build_dirs',
38 > +
39 > 'portage.util.configparser:SafeConfigParser,read_configs,NoOptionError',
40 > + 'portage.util._urlopen:urlopen',
41 > )
42 >
43 > from portage import os, selinux, shutil, _encodings, \
44 > _movefile, _shell_quote, _unicode_encode
45 > from portage.checksum import (get_valid_checksum_keys, perform_md5,
46 > verify_all,
47 > - _filter_unaccelarated_hashes, _hash_filter, _apply_hash_filter)
48 > + _filter_unaccelarated_hashes, _hash_filter, _apply_hash_filter,
49 > + checksum_str)
50 > from portage.const import BASH_BINARY, CUSTOM_MIRRORS_FILE, \
51 > - GLOBAL_CONFIG_PATH
52 > + GLOBAL_CONFIG_PATH, CACHE_PATH
53 > from portage.data import portage_gid, portage_uid, secpass,
54 > userpriv_groups
55 > from portage.exception import FileNotFound, OperationNotPermitted, \
56 > PortageException, TryAgain
57 > @@ -253,6 +259,104 @@ _size_suffix_map = {
58 > 'Y' : 80,
59 > }
60 >
61 > +
62 > +def filename_hash_path(filename, algo, cutoffs):
63 > + """
64 > + Get directory path for filename in filename-hash mirror structure.
65 > +
66 > + @param filename: Filename to fetch
67 > + @param algo: Hash algorithm
68 > + @param cutoffs: Cutoff values (n:n...)
69 > + @return: Directory path
70 > + """
71 > +
72 > + fnhash = checksum_str(filename.encode('utf8'), algo)
73 > + ret = ''
74 > + for c in cutoffs.split(':'):
75 > + c = int(c) // 4
76 > + ret += fnhash[:c] + '/'
77 >
78
79 When making a path, please use os.path.join()
80
81
82 > + fnhash = fnhash[c:]
83 > + return ret
84 > +
85 > +
86 > +def get_mirror_url(mirror_url, filename, eroot):
87 > + """
88 > + Get correct fetch URL for a given file, accounting for mirror
89 > + layout configuration.
90 > +
91 > + @param mirror_url: Base URL to the mirror (without '/distfiles')
92 > + @param filename: Filename to fetch
93 > + @param eroot: EROOT to use for the cache file
94 > + @return: Full URL to fetch
95 > + """
96 >
97 +
98 > + cache_file = os.path.join(eroot, CACHE_PATH,
99 > 'mirror-metadata.json')
100 > + try:
101 > + with open(cache_file, 'r') as f:
102 > + cache = json.load(f)
103 > + except (IOError, ValueError):
104 > + cache = {}
105 >
106
107 I'm a bit worried that we are opening this cache file off of disk every
108 time we call get_mirror_url(). Can we just cache the contents in memory
109 between calls; or even better pass the cache in as argument rather than it
110 be contained in get_mirror_url?
111
112
113 > +
114 > + ts, layout = cache.get(mirror_url, (0, None))
115 > + # refresh at least daily
116 > + if ts < time.time() - 86400:
117 > + # the default
118 > + layout = ('flat',)
119 > +
120 > + try:
121 > + f = urlopen(mirror_url + '/distfiles/layout.conf')
122 > + try:
123 > + data = io.StringIO(f.read().decode('utf8'))
124 > + finally:
125 > + f.close()
126 > + cp = SafeConfigParser()
127 > + read_configs(cp, [data])
128 > +
129 > + for i in itertools.count():
130 > + try:
131 > + val = tuple(cp.get('structure',
132 > '%d' % i).split())
133 > + if val == ('flat',):
134 > + pass
135 > + elif val[0] == 'filename-hash' and
136 > len(val) == 3:
137 > + if val[1] not in
138 > get_valid_checksum_keys():
139 > + continue
140 > + # validate cutoffs
141 > + cutoffs_good = False
142 > + for c in val[2].split(':'):
143 > + try:
144 > + c = int(c)
145 > + except ValueError:
146 > + break
147 > + else:
148 > + if c % 4
149 > != 0:
150 > +
151 > break
152 > + else:
153 > + cutoffs_good = True
154 > + if not cutoffs_good:
155 > + continue
156 > + else:
157 > + # (skip unsupported
158 > variant)
159 > + continue
160 > + layout = val
161 > + break
162 > + except NoOptionError:
163 > + break
164 > + except IOError:
165 > + pass
166 > +
167 > + cache[mirror_url] = (time.time(), layout)
168 > + with open(cache_file, 'w') as f:
169 > + json.dump(cache, f)
170 > +
171 > + if layout[0] == 'flat':
172 > + return mirror_url + "/distfiles/" + filename
173 > + elif layout[0] == 'filename-hash':
174 > + return (mirror_url + "/distfiles/" +
175 > + filename_hash_path(filename, *layout[1:])
176 > + filename)
177 > + else:
178 > + raise AssertionError("get_mirror_url() got unknown layout
179 > type")
180 >
181
182 raise AssertionError("get_mirror_url() got unknown layout type %s wanted
183 one of %s" % (layout[0], ('flat', 'filename-hash')))
184
185 E.g. if you got an unknown thing, it's nice to print what you want and what
186 you wanted so callers can fix it.
187
188
189 > +
190 > +
191 > def fetch(myuris, mysettings, listonly=0, fetchonly=0,
192 > locks_in_subdir=".locks", use_locks=1, try_mirrors=1, digests=None,
193 > allow_missing_digests=True):
194 > @@ -434,8 +538,9 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
195 > for myfile, myuri in file_uri_tuples:
196 > if myfile not in filedict:
197 > filedict[myfile]=[]
198 > - for y in range(0,len(locations)):
199 > -
200 > filedict[myfile].append(locations[y]+"/distfiles/"+myfile)
201 > + for l in locations:
202 > + filedict[myfile].append(get_mirror_url(l,
203 > myfile,
204 > + mysettings["EROOT"]))
205 > if myuri is None:
206 > continue
207 > if myuri[:9]=="mirror://":
208 > --
209 > 2.23.0
210 >
211 >
212 >

Replies