Gentoo Archives: gentoo-portage-dev

From: Chun-Yu Shei <cshei@××××××.com>
To: gentoo-portage-dev@l.g.o
Cc: Chun-Yu Shei <cshei@××××××.com>
Subject: [gentoo-portage-dev] [PATCH 3/3] Add partial caching to match_from_list
Date: Sat, 27 Jun 2020 06:34:37
Message-Id: 20200627063415.936177-4-cshei@google.com
In Reply to: [gentoo-portage-dev] Add caching to a few commonly used functions by Chun-Yu Shei
1 This function is called frequently with similar arguments, so cache as
2 much of the partial results as possible. It seems that "match_from_list"
3 must return a list containing actual entries from the "candidate_list" argument,
4 so we store frozensets in "_match_from_list_cache" and test items from
5 "candidate_list" for membership in these sets. The filtering performed
6 by the mydep.unevaluated_atom.use and mydep.repo checks towards the end
7 of the function is also not cached, since this causes some test failures.
8
9 This results in a reduction of "emerge -uDvpU --with-bdeps=y @world"
10 runtime from 43.53 -> 40.15 sec -- an 8.4% speedup.
11 ---
12 lib/portage/dep/__init__.py | 359 +++++++++++++++++++-----------------
13 1 file changed, 189 insertions(+), 170 deletions(-)
14
15 diff --git a/lib/portage/dep/__init__.py b/lib/portage/dep/__init__.py
16 index df296dd81..dbd23bb23 100644
17 --- a/lib/portage/dep/__init__.py
18 +++ b/lib/portage/dep/__init__.py
19 @@ -2174,6 +2174,8 @@ def best_match_to_list(mypkg, mylist):
20
21 return bestm
22
23 +_match_from_list_cache = {}
24 +
25 def match_from_list(mydep, candidate_list):
26 """
27 Searches list for entries that matches the package.
28 @@ -2197,209 +2199,226 @@ def match_from_list(mydep, candidate_list):
29 if not isinstance(mydep, Atom):
30 mydep = Atom(mydep, allow_wildcard=True, allow_repo=True)
31
32 - mycpv = mydep.cpv
33 - mycpv_cps = catpkgsplit(mycpv) # Can be None if not specific
34 - build_id = mydep.build_id
35 + cache_key = (mydep, tuple(candidate_list))
36 + key_has_hash = True
37 + cache_entry = None
38 + if mydep.build_id is None and key_has_hash:
39 + try:
40 + cache_entry = _match_from_list_cache.get(cache_key)
41 + except TypeError:
42 + key_has_hash = False
43
44 - if not mycpv_cps:
45 - ver = None
46 - rev = None
47 - else:
48 - cat, pkg, ver, rev = mycpv_cps
49 - if mydep == mycpv:
50 - raise KeyError(_("Specific key requires an operator"
51 - " (%s) (try adding an '=')") % (mydep))
52 -
53 - if ver and rev:
54 - operator = mydep.operator
55 - if not operator:
56 - writemsg(_("!!! Invalid atom: %s\n") % mydep, noiselevel=-1)
57 - return []
58 + if cache_entry is not None:
59 + # Note: the list returned by this function must contain actual entries
60 + # from "candidate_list", so store frozensets in "_match_from_list_cache"
61 + # and test items from "candidate_list" for membership in these sets.
62 + mylist = [x for x in candidate_list if x in cache_entry]
63 else:
64 - operator = None
65 -
66 - mylist = []
67 + mycpv = mydep.cpv
68 + mycpv_cps = catpkgsplit(mycpv) # Can be None if not specific
69 + build_id = mydep.build_id
70
71 - if mydep.extended_syntax:
72 + if not mycpv_cps:
73 + ver = None
74 + rev = None
75 + else:
76 + cat, pkg, ver, rev = mycpv_cps
77 + if mydep == mycpv:
78 + raise KeyError(_("Specific key requires an operator"
79 + " (%s) (try adding an '=')") % (mydep))
80
81 - for x in candidate_list:
82 - cp = getattr(x, "cp", None)
83 - if cp is None:
84 - mysplit = catpkgsplit(remove_slot(x))
85 - if mysplit is not None:
86 - cp = mysplit[0] + '/' + mysplit[1]
87 + if ver and rev:
88 + operator = mydep.operator
89 + if not operator:
90 + writemsg(_("!!! Invalid atom: %s\n") % mydep, noiselevel=-1)
91 + return []
92 + else:
93 + operator = None
94
95 - if cp is None:
96 - continue
97 + mylist = []
98
99 - if cp == mycpv or extended_cp_match(mydep.cp, cp):
100 - mylist.append(x)
101 + if mydep.extended_syntax:
102
103 - if mylist and mydep.operator == "=*":
104 + for x in candidate_list:
105 + cp = getattr(x, "cp", None)
106 + if cp is None:
107 + mysplit = catpkgsplit(remove_slot(x))
108 + if mysplit is not None:
109 + cp = mysplit[0] + '/' + mysplit[1]
110
111 - candidate_list = mylist
112 - mylist = []
113 - # Currently, only \*\w+\* is supported.
114 - ver = mydep.version[1:-1]
115 + if cp is None:
116 + continue
117
118 - for x in candidate_list:
119 - x_ver = getattr(x, "version", None)
120 - if x_ver is None:
121 - xs = catpkgsplit(remove_slot(x))
122 - if xs is None:
123 - continue
124 - x_ver = "-".join(xs[-2:])
125 - if ver in x_ver:
126 + if cp == mycpv or extended_cp_match(mydep.cp, cp):
127 mylist.append(x)
128
129 - elif operator is None:
130 - for x in candidate_list:
131 - cp = getattr(x, "cp", None)
132 - if cp is None:
133 - mysplit = catpkgsplit(remove_slot(x))
134 - if mysplit is not None:
135 - cp = mysplit[0] + '/' + mysplit[1]
136 + if mylist and mydep.operator == "=*":
137
138 - if cp is None:
139 - continue
140 + candidate_list = mylist
141 + mylist = []
142 + # Currently, only \*\w+\* is supported.
143 + ver = mydep.version[1:-1]
144
145 - if cp == mydep.cp:
146 - mylist.append(x)
147 + for x in candidate_list:
148 + x_ver = getattr(x, "version", None)
149 + if x_ver is None:
150 + xs = catpkgsplit(remove_slot(x))
151 + if xs is None:
152 + continue
153 + x_ver = "-".join(xs[-2:])
154 + if ver in x_ver:
155 + mylist.append(x)
156
157 - elif operator == "=": # Exact match
158 - for x in candidate_list:
159 - xcpv = getattr(x, "cpv", None)
160 - if xcpv is None:
161 - xcpv = remove_slot(x)
162 - if not cpvequal(xcpv, mycpv):
163 - continue
164 - if (build_id is not None and
165 - getattr(xcpv, "build_id", None) != build_id):
166 - continue
167 - mylist.append(x)
168 + elif operator is None:
169 + for x in candidate_list:
170 + cp = getattr(x, "cp", None)
171 + if cp is None:
172 + mysplit = catpkgsplit(remove_slot(x))
173 + if mysplit is not None:
174 + cp = mysplit[0] + '/' + mysplit[1]
175
176 - elif operator == "=*": # glob match
177 - # XXX: Nasty special casing for leading zeros
178 - # Required as =* is a literal prefix match, so can't
179 - # use vercmp
180 - myver = mycpv_cps[2].lstrip("0")
181 - if not myver or not myver[0].isdigit():
182 - myver = "0"+myver
183 - if myver == mycpv_cps[2]:
184 - mycpv_cmp = mycpv
185 - else:
186 - # Use replace to preserve the revision part if it exists
187 - # (mycpv_cps[3] can't be trusted because in contains r0
188 - # even when the input has no revision part).
189 - mycpv_cmp = mycpv.replace(
190 - mydep.cp + "-" + mycpv_cps[2],
191 - mydep.cp + "-" + myver, 1)
192 - for x in candidate_list:
193 - try:
194 - x.cp
195 - except AttributeError:
196 - try:
197 - pkg = _pkg_str(remove_slot(x))
198 - except InvalidData:
199 + if cp is None:
200 continue
201 - else:
202 - pkg = x
203
204 - xs = pkg.cpv_split
205 - myver = xs[2].lstrip("0")
206 - if not myver or not myver[0].isdigit():
207 - myver = "0"+myver
208 - if myver == xs[2]:
209 - xcpv = pkg.cpv
210 - else:
211 - # Use replace to preserve the revision part if it exists.
212 - xcpv = pkg.cpv.replace(
213 - pkg.cp + "-" + xs[2],
214 - pkg.cp + "-" + myver, 1)
215 - if xcpv.startswith(mycpv_cmp):
216 - # =* glob matches only on boundaries between version parts,
217 - # so 1* does not match 10 (bug 560466).
218 - next_char = xcpv[len(mycpv_cmp):len(mycpv_cmp)+1]
219 - if (not next_char or next_char in '._-' or
220 - mycpv_cmp[-1].isdigit() != next_char.isdigit()):
221 + if cp == mydep.cp:
222 mylist.append(x)
223
224 - elif operator == "~": # version, any revision, match
225 - for x in candidate_list:
226 - xs = getattr(x, "cpv_split", None)
227 - if xs is None:
228 - xs = catpkgsplit(remove_slot(x))
229 - if xs is None:
230 - raise InvalidData(x)
231 - if not cpvequal(xs[0]+"/"+xs[1]+"-"+xs[2], mycpv_cps[0]+"/"+mycpv_cps[1]+"-"+mycpv_cps[2]):
232 - continue
233 - if xs[2] != ver:
234 - continue
235 - mylist.append(x)
236 + elif operator == "=": # Exact match
237 + for x in candidate_list:
238 + xcpv = getattr(x, "cpv", None)
239 + if xcpv is None:
240 + xcpv = remove_slot(x)
241 + if not cpvequal(xcpv, mycpv):
242 + continue
243 + if (build_id is not None and
244 + getattr(xcpv, "build_id", None) != build_id):
245 + continue
246 + mylist.append(x)
247
248 - elif operator in (">", ">=", "<", "<="):
249 - for x in candidate_list:
250 - if hasattr(x, 'cp'):
251 - pkg = x
252 + elif operator == "=*": # glob match
253 + # XXX: Nasty special casing for leading zeros
254 + # Required as =* is a literal prefix match, so can't
255 + # use vercmp
256 + myver = mycpv_cps[2].lstrip("0")
257 + if not myver or not myver[0].isdigit():
258 + myver = "0"+myver
259 + if myver == mycpv_cps[2]:
260 + mycpv_cmp = mycpv
261 else:
262 + # Use replace to preserve the revision part if it exists
263 + # (mycpv_cps[3] can't be trusted because in contains r0
264 + # even when the input has no revision part).
265 + mycpv_cmp = mycpv.replace(
266 + mydep.cp + "-" + mycpv_cps[2],
267 + mydep.cp + "-" + myver, 1)
268 + for x in candidate_list:
269 try:
270 - pkg = _pkg_str(remove_slot(x))
271 - except InvalidData:
272 - continue
273 + x.cp
274 + except AttributeError:
275 + try:
276 + pkg = _pkg_str(remove_slot(x))
277 + except InvalidData:
278 + continue
279 + else:
280 + pkg = x
281 +
282 + xs = pkg.cpv_split
283 + myver = xs[2].lstrip("0")
284 + if not myver or not myver[0].isdigit():
285 + myver = "0"+myver
286 + if myver == xs[2]:
287 + xcpv = pkg.cpv
288 + else:
289 + # Use replace to preserve the revision part if it exists.
290 + xcpv = pkg.cpv.replace(
291 + pkg.cp + "-" + xs[2],
292 + pkg.cp + "-" + myver, 1)
293 + if xcpv.startswith(mycpv_cmp):
294 + # =* glob matches only on boundaries between version parts,
295 + # so 1* does not match 10 (bug 560466).
296 + next_char = xcpv[len(mycpv_cmp):len(mycpv_cmp)+1]
297 + if (not next_char or next_char in '._-' or
298 + mycpv_cmp[-1].isdigit() != next_char.isdigit()):
299 + mylist.append(x)
300
301 - if pkg.cp != mydep.cp:
302 - continue
303 - try:
304 - result = vercmp(pkg.version, mydep.version)
305 - except ValueError: # pkgcmp may return ValueError during int() conversion
306 - writemsg(_("\nInvalid package name: %s\n") % x, noiselevel=-1)
307 - raise
308 - if result is None:
309 - continue
310 - elif operator == ">":
311 - if result > 0:
312 - mylist.append(x)
313 - elif operator == ">=":
314 - if result >= 0:
315 - mylist.append(x)
316 - elif operator == "<":
317 - if result < 0:
318 - mylist.append(x)
319 - elif operator == "<=":
320 - if result <= 0:
321 - mylist.append(x)
322 - else:
323 - raise KeyError(_("Unknown operator: %s") % mydep)
324 - else:
325 - raise KeyError(_("Unknown operator: %s") % mydep)
326 + elif operator == "~": # version, any revision, match
327 + for x in candidate_list:
328 + xs = getattr(x, "cpv_split", None)
329 + if xs is None:
330 + xs = catpkgsplit(remove_slot(x))
331 + if xs is None:
332 + raise InvalidData(x)
333 + if not cpvequal(xs[0]+"/"+xs[1]+"-"+xs[2], mycpv_cps[0]+"/"+mycpv_cps[1]+"-"+mycpv_cps[2]):
334 + continue
335 + if xs[2] != ver:
336 + continue
337 + mylist.append(x)
338
339 - if mydep.slot is not None:
340 - candidate_list = mylist
341 - mylist = []
342 - for x in candidate_list:
343 - x_pkg = None
344 - try:
345 - x.cpv
346 - except AttributeError:
347 - xslot = dep_getslot(x)
348 - if xslot is not None:
349 + elif operator in (">", ">=", "<", "<="):
350 + for x in candidate_list:
351 + if hasattr(x, 'cp'):
352 + pkg = x
353 + else:
354 try:
355 - x_pkg = _pkg_str(remove_slot(x), slot=xslot)
356 + pkg = _pkg_str(remove_slot(x))
357 except InvalidData:
358 continue
359 - else:
360 - x_pkg = x
361
362 - if x_pkg is None:
363 - mylist.append(x)
364 - else:
365 + if pkg.cp != mydep.cp:
366 + continue
367 try:
368 - x_pkg.slot
369 + result = vercmp(pkg.version, mydep.version)
370 + except ValueError: # pkgcmp may return ValueError during int() conversion
371 + writemsg(_("\nInvalid package name: %s\n") % x, noiselevel=-1)
372 + raise
373 + if result is None:
374 + continue
375 + elif operator == ">":
376 + if result > 0:
377 + mylist.append(x)
378 + elif operator == ">=":
379 + if result >= 0:
380 + mylist.append(x)
381 + elif operator == "<":
382 + if result < 0:
383 + mylist.append(x)
384 + elif operator == "<=":
385 + if result <= 0:
386 + mylist.append(x)
387 + else:
388 + raise KeyError(_("Unknown operator: %s") % mydep)
389 + else:
390 + raise KeyError(_("Unknown operator: %s") % mydep)
391 +
392 + if mydep.slot is not None:
393 + candidate_list = mylist
394 + mylist = []
395 + for x in candidate_list:
396 + x_pkg = None
397 + try:
398 + x.cpv
399 except AttributeError:
400 + xslot = dep_getslot(x)
401 + if xslot is not None:
402 + try:
403 + x_pkg = _pkg_str(remove_slot(x), slot=xslot)
404 + except InvalidData:
405 + continue
406 + else:
407 + x_pkg = x
408 +
409 + if x_pkg is None:
410 mylist.append(x)
411 else:
412 - if _match_slot(mydep, x_pkg):
413 + try:
414 + x_pkg.slot
415 + except AttributeError:
416 mylist.append(x)
417 + else:
418 + if _match_slot(mydep, x_pkg):
419 + mylist.append(x)
420 + if key_has_hash:
421 + _match_from_list_cache[cache_key] = frozenset(mylist)
422
423 if mydep.unevaluated_atom.use:
424 candidate_list = mylist
425 --
426 2.27.0.212.ge8ba1cc988-goog