Gentoo Archives: gentoo-portage-dev

From: Zac Medico <zmedico@g.o>
To: gentoo-portage-dev@l.g.o
Cc: Zac Medico <zmedico@g.o>
Subject: [gentoo-portage-dev] [PATCH] dep_check: use DNF to optimize overlapping virtual || deps (bug 632026)
Date: Sun, 12 Nov 2017 11:50:56
Message-Id: 20171112114839.50497-1-zmedico@gentoo.org
1 Deps like these:
2
3 || ( foo bar ) || ( bar baz )
4
5 Translate to disjunctive normal form (DNF):
6
7 || ( ( foo bar ) ( foo baz ) ( bar bar ) ( bar baz ) )
8
9 Using DNF, if none of the packages are currently installed,
10 then the ( bar bar ) choice will be automatically preferred
11 since it is satisfied by the fewest number of packages.
12 If the ( foo baz ) choice is already satisfied, then that
13 choice will be preferred instead.
14
15 Since DNF results in exponential explosion of the formula,
16 only use DNF for the parts of the dependencies that have
17 overlapping atoms.
18
19 In order to simplify the implementation of the dnf_convert
20 function, this patch also fixes _expand_new_virtuals to
21 normalize results in the same way as use_reduce (with no
22 redundant nested lists).
23
24 Bug: https://bugs.gentoo.org/632026
25 ---
26 pym/portage/dep/_dnf.py | 97 ++++++++++++++++
27 pym/portage/dep/dep_check.py | 127 +++++++++++++++++++--
28 pym/portage/tests/dep/test_dnf_convert.py | 48 ++++++++
29 pym/portage/tests/dep/test_overlap_dnf.py | 28 +++++
30 .../resolver/test_virtual_minimize_children.py | 92 +++++++++++++++
31 5 files changed, 385 insertions(+), 7 deletions(-)
32 create mode 100644 pym/portage/dep/_dnf.py
33 create mode 100644 pym/portage/tests/dep/test_dnf_convert.py
34 create mode 100644 pym/portage/tests/dep/test_overlap_dnf.py
35 create mode 100644 pym/portage/tests/resolver/test_virtual_minimize_children.py
36
37 diff --git a/pym/portage/dep/_dnf.py b/pym/portage/dep/_dnf.py
38 new file mode 100644
39 index 000000000..503b209c2
40 --- /dev/null
41 +++ b/pym/portage/dep/_dnf.py
42 @@ -0,0 +1,97 @@
43 +# Copyright 2017 Gentoo Foundation
44 +# Distributed under the terms of the GNU General Public License v2
45 +
46 +from __future__ import unicode_literals
47 +
48 +import itertools
49 +
50 +
51 +def dnf_convert(dep_struct):
52 + """
53 + Convert dep_struct to disjunctive normal form (DNF), where dep_struct
54 + is either a conjunction or disjunction of the form produced by
55 + use_reduce(opconvert=True).
56 + """
57 + # Normalize input to have a top-level conjunction.
58 + if isinstance(dep_struct, list):
59 + if dep_struct and dep_struct[0] == '||':
60 + dep_struct = [dep_struct]
61 + else:
62 + dep_struct = [dep_struct]
63 +
64 + conjunction = []
65 + disjunctions = []
66 +
67 + for x in dep_struct:
68 + if isinstance (x, list):
69 + assert x
70 + if x[0] == '||':
71 + if any(isinstance(element, list) for element in x):
72 + x_dnf = ['||']
73 + for element in x[1:]:
74 + if isinstance(element, list):
75 + # Due to normalization, a disjunction must not be
76 + # nested directly in another disjunction, so this
77 + # must be a conjunction.
78 + assert element and element[0] != '||'
79 + element = dnf_convert(element)
80 + if contains_disjunction(element):
81 + assert (len(element) == 1 and
82 + element[0] and element[0][0] == '||')
83 + x_dnf.extend(element[0][1:])
84 + else:
85 + x_dnf.append(element)
86 + else:
87 + x_dnf.append(element)
88 + x = x_dnf
89 + disjunctions.append(x)
90 + else:
91 + for x in dnf_convert(x):
92 + if isinstance (x, list):
93 + # Due to normalization, a conjunction must not be
94 + # nested directly in another conjunction, so this
95 + # must be a disjunction.
96 + assert x and x[0] == '||'
97 + disjunctions.append(x)
98 + else:
99 + conjunction.append(x)
100 + else:
101 + conjunction.append(x)
102 +
103 + if disjunctions and (conjunction or len(disjunctions) > 1):
104 + dnf_form = ['||']
105 + for x in itertools.product(*[x[1:] for x in disjunctions]):
106 + normalized = conjunction[:]
107 + for element in x:
108 + if isinstance(element, list):
109 + normalized.extend(element)
110 + else:
111 + normalized.append(element)
112 + dnf_form.append(normalized)
113 + result = [dnf_form]
114 + else:
115 + result = conjunction + disjunctions
116 +
117 + return result
118 +
119 +
120 +def contains_disjunction(dep_struct):
121 + """
122 + Search for a disjunction contained in dep_struct, where dep_struct
123 + is either a conjunction or disjunction of the form produced by
124 + use_reduce(opconvert=True). If dep_struct is a disjunction, then
125 + this only returns True if there is a nested disjunction. Due to
126 + normalization, recursion is only needed when dep_struct is a
127 + disjunction containing a conjunction. If dep_struct is a conjunction,
128 + then it is assumed that normalization has elevated any nested
129 + disjunctions to the top-level.
130 + """
131 + is_disjunction = dep_struct and dep_struct[0] == '||'
132 + for x in dep_struct:
133 + if isinstance(x, list):
134 + assert x
135 + if x[0] == '||':
136 + return True
137 + elif is_disjunction and contains_disjunction(x):
138 + return True
139 + return False
140 diff --git a/pym/portage/dep/dep_check.py b/pym/portage/dep/dep_check.py
141 index b33f7e5db..6db4cc42e 100644
142 --- a/pym/portage/dep/dep_check.py
143 +++ b/pym/portage/dep/dep_check.py
144 @@ -6,14 +6,20 @@ from __future__ import unicode_literals
145 __all__ = ['dep_check', 'dep_eval', 'dep_wordreduce', 'dep_zapdeps']
146
147 import collections
148 +import itertools
149 import logging
150 import operator
151
152 import portage
153 from portage.dep import Atom, match_from_list, use_reduce
154 +from portage.dep._dnf import (
155 + dnf_convert as _dnf_convert,
156 + contains_disjunction as _contains_disjunction,
157 +)
158 from portage.exception import InvalidDependString, ParseError
159 from portage.localization import _
160 from portage.util import writemsg, writemsg_level
161 +from portage.util.digraph import digraph
162 from portage.util.SlotObject import SlotObject
163 from portage.versions import vercmp, _pkg_str
164
165 @@ -28,7 +34,11 @@ def _expand_new_virtuals(mysplit, edebug, mydbapi, mysettings, myroot="/",
166 atom because it wouldn't necessarily make sense to block all the components
167 of a compound virtual. When more than one new-style virtual is matched,
168 the matches are sorted from highest to lowest versions and the atom is
169 - expanded to || ( highest match ... lowest match )."""
170 + expanded to || ( highest match ... lowest match ).
171 +
172 + The result is normalized in the same way as use_reduce, having a top-level
173 + conjuction, and no redundant nested lists.
174 + """
175 newsplit = []
176 mytrees = trees[myroot]
177 portdb = mytrees["porttree"].dbapi
178 @@ -54,14 +64,30 @@ def _expand_new_virtuals(mysplit, edebug, mydbapi, mysettings, myroot="/",
179 portdb = trees[myroot]["bintree"].dbapi
180 pprovideddict = mysettings.pprovideddict
181 myuse = kwargs["myuse"]
182 + is_disjunction = mysplit and mysplit[0] == '||'
183 for x in mysplit:
184 if x == "||":
185 newsplit.append(x)
186 continue
187 elif isinstance(x, list):
188 - newsplit.append(_expand_new_virtuals(x, edebug, mydbapi,
189 + x = _expand_new_virtuals(x, edebug, mydbapi,
190 mysettings, myroot=myroot, trees=trees, use_mask=use_mask,
191 - use_force=use_force, **kwargs))
192 + use_force=use_force, **kwargs)
193 + if is_disjunction:
194 + if len(x) == 1:
195 + x = x[0]
196 + if isinstance(x, list):
197 + # Due to normalization, a conjunction must not be
198 + # nested directly in another conjunction, so this
199 + # must be a disjunction.
200 + assert x and x[0] == '||'
201 + newsplit.extend(x[1:])
202 + else:
203 + newsplit.append(x)
204 + else:
205 + newsplit.append(x)
206 + else:
207 + newsplit.extend(x)
208 continue
209
210 if not isinstance(x, Atom):
211 @@ -101,6 +127,8 @@ def _expand_new_virtuals(mysplit, edebug, mydbapi, mysettings, myroot="/",
212 a.append(Atom(x.replace(x.cp, y.cp, 1)))
213 if not a:
214 newsplit.append(x)
215 + elif is_disjunction:
216 + newsplit.extend(a)
217 elif len(a) == 1:
218 newsplit.append(a[0])
219 else:
220 @@ -218,11 +246,18 @@ def _expand_new_virtuals(mysplit, edebug, mydbapi, mysettings, myroot="/",
221 newsplit.append(x)
222 if atom_graph is not None:
223 atom_graph.add((x, id(x)), graph_parent)
224 + elif is_disjunction:
225 + newsplit.extend(a)
226 elif len(a) == 1:
227 - newsplit.append(a[0])
228 + newsplit.extend(a[0])
229 else:
230 newsplit.append(['||'] + a)
231
232 + # For consistency with related functions like use_reduce, always
233 + # normalize the result to have a top-level conjunction.
234 + if is_disjunction:
235 + newsplit = [newsplit]
236 +
237 return newsplit
238
239 def dep_eval(deplist):
240 @@ -612,9 +647,9 @@ def dep_zapdeps(unreduced, reduced, myroot, use_binaries=0, trees=None):
241 for choices in choice_bins:
242 if len(choices) < 2:
243 continue
244 - # Prefer choices with all_installed_slots for bug #480736.
245 - choices.sort(key=operator.attrgetter('all_installed_slots'),
246 - reverse=True)
247 + # Prefer choices with all_installed_slots for bug #480736, and
248 + # choices with a smaller number of packages for bug #632026.
249 + choices.sort(key=lambda x: (not x.all_installed_slots, len(x.slot_map)))
250 for choice_1 in choices[1:]:
251 cps = set(choice_1.cp_map)
252 for choice_2 in choices:
253 @@ -741,6 +776,9 @@ def dep_check(depstring, mydbapi, mysettings, use="yes", mode=None, myuse=None,
254 except ParseError as e:
255 return [0, "%s" % (e,)]
256
257 + if mysettings.local_config: # if not repoman
258 + mysplit = _overlap_dnf(mysplit)
259 +
260 mysplit2 = dep_wordreduce(mysplit,
261 mysettings, mydbapi, mode, use_cache=use_cache)
262 if mysplit2 is None:
263 @@ -755,6 +793,81 @@ def dep_check(depstring, mydbapi, mysettings, use="yes", mode=None, myuse=None,
264
265 return [1, selected_atoms]
266
267 +
268 +def _overlap_dnf(dep_struct):
269 + """
270 + Combine overlapping || groups using disjunctive normal form (DNF), in
271 + order to minimize the number of packages chosen to satisfy cases like
272 + "|| ( foo bar ) || ( bar baz )" as in bug #632026. Non-overlapping
273 + groups are excluded from the conversion, since DNF leads to exponential
274 + explosion of the formula.
275 + """
276 + if not _contains_disjunction(dep_struct):
277 + return dep_struct
278 +
279 + # map atom.cp to disjunctions
280 + cp_map = collections.defaultdict(list)
281 + # graph atom.cp, with edges connecting atoms in the same disjunction
282 + overlap_graph = digraph()
283 + # map id(disjunction) to index in dep_struct, for deterministic output
284 + order_map = {}
285 + order_key = lambda x: order_map[id(x)]
286 + result = []
287 + for i, x in enumerate(dep_struct):
288 + if isinstance(x, list):
289 + assert x and x[0] == '||'
290 + order_map[id(x)] = i
291 + prev_cp = None
292 + for atom in _iter_flatten(x):
293 + if isinstance(atom, Atom) and not atom.blocker:
294 + cp_map[atom.cp].append(x)
295 + overlap_graph.add(atom.cp, parent=prev_cp)
296 + prev_cp = atom.cp
297 + if prev_cp is None: # only contains blockers
298 + result.append(x)
299 + else:
300 + result.append(x)
301 +
302 + # group together disjunctions having atom.cp overlap
303 + traversed = set()
304 + for cp in overlap_graph:
305 + if cp in traversed:
306 + continue
307 + disjunctions = {}
308 + stack = [cp]
309 + while stack:
310 + cp = stack.pop()
311 + traversed.add(cp)
312 + for x in cp_map[cp]:
313 + disjunctions[id(x)] = x
314 + for other_cp in itertools.chain(overlap_graph.child_nodes(cp),
315 + overlap_graph.parent_nodes(cp)):
316 + if other_cp not in traversed:
317 + stack.append(other_cp)
318 +
319 + if len(disjunctions) > 1:
320 + # convert overlapping disjunctions to DNF
321 + result.extend(_dnf_convert(
322 + sorted(disjunctions.values(), key=order_key)))
323 + else:
324 + # pass through non-overlapping disjunctions
325 + result.append(disjunctions.popitem()[1])
326 +
327 + return result
328 +
329 +
330 +def _iter_flatten(dep_struct):
331 + """
332 + Yield nested elements of dep_struct.
333 + """
334 + for x in dep_struct:
335 + if isinstance(x, list):
336 + for x in _iter_flatten(x):
337 + yield x
338 + else:
339 + yield x
340 +
341 +
342 def dep_wordreduce(mydeplist,mysettings,mydbapi,mode,use_cache=1):
343 "Reduces the deplist to ones and zeros"
344 deplist=mydeplist[:]
345 diff --git a/pym/portage/tests/dep/test_dnf_convert.py b/pym/portage/tests/dep/test_dnf_convert.py
346 new file mode 100644
347 index 000000000..b92778d4a
348 --- /dev/null
349 +++ b/pym/portage/tests/dep/test_dnf_convert.py
350 @@ -0,0 +1,48 @@
351 +# Copyright 2017 Gentoo Foundation
352 +# Distributed under the terms of the GNU General Public License v2
353 +
354 +from portage.tests import TestCase
355 +from portage.dep import use_reduce
356 +from portage.dep._dnf import dnf_convert
357 +
358 +class DNFConvertTestCase(TestCase):
359 +
360 + def testDNFConvert(self):
361 +
362 + test_cases = (
363 + (
364 + '|| ( A B ) || ( C D )',
365 + [['||', ['A', 'C'], ['A', 'D'], ['B', 'C'], ['B', 'D']]],
366 + ),
367 + (
368 + '|| ( A B ) || ( B C )',
369 + [['||', ['A', 'B'], ['A', 'C'], ['B', 'B'], ['B', 'C']]],
370 + ),
371 + (
372 + '|| ( A ( B C D ) )',
373 + [['||', 'A', ['B', 'C', 'D']]],
374 + ),
375 + (
376 + '|| ( A ( B C D ) ) E',
377 + [['||', ['E', 'A'], ['E', 'B', 'C', 'D']]],
378 + ),
379 + (
380 + '|| ( A ( B C ) ) || ( D E ) F',
381 + [['||', ['F', 'A', 'D'], ['F', 'A', 'E'], ['F', 'B', 'C', 'D'], ['F', 'B', 'C', 'E']]],
382 + ),
383 + (
384 + '|| ( A ( B C || ( D E ) ) ( F G ) H )',
385 + [['||', 'A', ['B', 'C', 'D'], ['B', 'C', 'E'], ['F', 'G'], 'H']],
386 + ),
387 + (
388 + '|| ( A ( B C || ( D E ) ) F )',
389 + [['||', 'A', ['B', 'C', 'D'], ['B', 'C', 'E'], 'F']],
390 + ),
391 + (
392 + '|| ( A ( C || ( D E ) || ( F G ) ) H )',
393 + [['||', 'A', ['C', 'D', 'F'], ['C', 'D', 'G'], ['C', 'E', 'F'], ['C', 'E', 'G'], 'H']],
394 + ),
395 + )
396 +
397 + for dep_str, result in test_cases:
398 + self.assertEqual(dnf_convert(use_reduce(dep_str, opconvert=True)), result)
399 diff --git a/pym/portage/tests/dep/test_overlap_dnf.py b/pym/portage/tests/dep/test_overlap_dnf.py
400 new file mode 100644
401 index 000000000..79b3e8e46
402 --- /dev/null
403 +++ b/pym/portage/tests/dep/test_overlap_dnf.py
404 @@ -0,0 +1,28 @@
405 +# Copyright 2017 Gentoo Foundation
406 +# Distributed under the terms of the GNU General Public License v2
407 +
408 +from portage.tests import TestCase
409 +from portage.dep import Atom, use_reduce
410 +from portage.dep.dep_check import _overlap_dnf
411 +
412 +class OverlapDNFTestCase(TestCase):
413 +
414 + def testOverlapDNF(self):
415 +
416 + test_cases = (
417 + (
418 + '|| ( cat/A cat/B ) cat/E || ( cat/C cat/D )',
419 + ['cat/E', ['||', 'cat/A', 'cat/B'], ['||', 'cat/C', 'cat/D']],
420 + ),
421 + (
422 + '|| ( cat/A cat/B ) cat/D || ( cat/B cat/C )',
423 + ['cat/D', ['||', ['cat/A', 'cat/B'], ['cat/A', 'cat/C'], ['cat/B', 'cat/B'], ['cat/B', 'cat/C']]],
424 + ),
425 + (
426 + '|| ( cat/A cat/B ) || ( cat/C cat/D ) || ( ( cat/B cat/E ) cat/F )',
427 + [['||', ['cat/A', 'cat/B', 'cat/E'], ['cat/A', 'cat/F'], ['cat/B', 'cat/B', 'cat/E'], ['cat/B', 'cat/F']], ['||', 'cat/C', 'cat/D']],
428 + ),
429 + )
430 +
431 + for dep_str, result in test_cases:
432 + self.assertEqual(_overlap_dnf(use_reduce(dep_str, token_class=Atom, opconvert=True)), result)
433 diff --git a/pym/portage/tests/resolver/test_virtual_minimize_children.py b/pym/portage/tests/resolver/test_virtual_minimize_children.py
434 new file mode 100644
435 index 000000000..35b881bfb
436 --- /dev/null
437 +++ b/pym/portage/tests/resolver/test_virtual_minimize_children.py
438 @@ -0,0 +1,92 @@
439 +# Copyright 2017 Gentoo Foundation
440 +# Distributed under the terms of the GNU General Public License v2
441 +
442 +from portage.tests import TestCase
443 +from portage.tests.resolver.ResolverPlayground import (
444 + ResolverPlayground,
445 + ResolverPlaygroundTestCase,
446 +)
447 +
448 +class VirtualMinimizeChildrenTestCase(TestCase):
449 +
450 + def testVirtualMinimizeChildren(self):
451 + ebuilds = {
452 + 'app-misc/bar-1': {
453 + 'EAPI': '6',
454 + 'RDEPEND': 'virtual/foo'
455 + },
456 + 'virtual/foo-1': {
457 + 'EAPI': '6',
458 + 'RDEPEND': '|| ( app-misc/A app-misc/B ) || ( app-misc/B app-misc/C )'
459 + },
460 + 'app-misc/A-1': {
461 + 'EAPI': '6',
462 + },
463 + 'app-misc/B-1': {
464 + 'EAPI': '6',
465 + },
466 + 'app-misc/C-1': {
467 + 'EAPI': '6',
468 + },
469 + }
470 +
471 + test_cases = (
472 + # Test bug 632026, where we want to minimize the number of
473 + # packages chosen to satisfy overlapping || deps like
474 + # "|| ( foo bar ) || ( bar baz )".
475 + ResolverPlaygroundTestCase(
476 + ['app-misc/bar'],
477 + success=True,
478 + mergelist=[
479 + 'app-misc/B-1',
480 + 'virtual/foo-1',
481 + 'app-misc/bar-1',
482 + ],
483 + ),
484 + )
485 +
486 + playground = ResolverPlayground(debug=False,
487 + ebuilds=ebuilds)
488 +
489 + try:
490 + for test_case in test_cases:
491 + playground.run_TestCase(test_case)
492 + self.assertEqual(test_case.test_success, True,
493 + test_case.fail_msg)
494 + finally:
495 + playground.debug = False
496 + playground.cleanup()
497 +
498 + # If app-misc/A and app-misc/C are installed then
499 + # that choice should be preferred over app-misc/B.
500 + installed = {
501 + 'app-misc/A-1': {
502 + 'EAPI': '6',
503 + },
504 + 'app-misc/C-1': {
505 + 'EAPI': '6',
506 + },
507 + }
508 +
509 + test_cases = (
510 + ResolverPlaygroundTestCase(
511 + ['app-misc/bar'],
512 + success=True,
513 + mergelist=[
514 + 'virtual/foo-1',
515 + 'app-misc/bar-1',
516 + ],
517 + ),
518 + )
519 +
520 + playground = ResolverPlayground(debug=False,
521 + ebuilds=ebuilds, installed=installed)
522 +
523 + try:
524 + for test_case in test_cases:
525 + playground.run_TestCase(test_case)
526 + self.assertEqual(test_case.test_success, True,
527 + test_case.fail_msg)
528 + finally:
529 + playground.debug = False
530 + playground.cleanup()
531 --
532 2.13.5

Replies