Gentoo Archives: gentoo-portage-dev

From: Zac Medico <zmedico@g.o>
To: gentoo-portage-dev@l.g.o
Cc: Zac Medico <zmedico@g.o>
Subject: [gentoo-portage-dev] [PATCH v2] dep_check: use DNF to optimize overlapping virtual || deps (bug 632026)
Date: Mon, 13 Nov 2017 22:39:56
Message-Id: 20171113223916.63378-1-zmedico@gentoo.org
In Reply to: [gentoo-portage-dev] [PATCH] dep_check: use DNF to optimize overlapping virtual || deps (bug 632026) by Zac Medico
1 Deps like these:
2
3 || ( foo bar ) || ( bar baz )
4
5 Translate to disjunctive normal form (DNF):
6
7 || ( ( foo bar ) ( foo baz ) ( bar bar ) ( bar baz ) )
8
9 Using DNF, if none of the packages are currently installed,
10 then the ( bar bar ) choice will be automatically preferred
11 since it is satisfied by the fewest number of packages.
12 If the ( foo baz ) choice is already satisfied, then that
13 choice will be preferred instead.
14
15 Since DNF results in exponential explosion of the formula,
16 only use DNF for the parts of the dependencies that have
17 overlapping atoms.
18
19 In order to simplify the implementation of the dnf_convert
20 function, this patch also fixes _expand_new_virtuals to
21 normalize results in the same way as use_reduce (with no
22 redundant nested lists).
23
24 Bug: https://bugs.gentoo.org/632026
25 ---
26 [PATCH v2]
27 * add VirtualMinimizeChildrenTestCase.testOverlapSlotConflict
28 * add message for each assert
29 * fix dnf_convert assert to reject a directly nested conjunction,
30 and eliminate associated conjunction handling code
31
32 pym/portage/dep/_dnf.py | 91 +++++++++++++
33 pym/portage/dep/dep_check.py | 129 +++++++++++++++++-
34 pym/portage/tests/dep/test_dnf_convert.py | 48 +++++++
35 pym/portage/tests/dep/test_overlap_dnf.py | 28 ++++
36 .../resolver/test_virtual_minimize_children.py | 145 +++++++++++++++++++++
37 5 files changed, 434 insertions(+), 7 deletions(-)
38 create mode 100644 pym/portage/dep/_dnf.py
39 create mode 100644 pym/portage/tests/dep/test_dnf_convert.py
40 create mode 100644 pym/portage/tests/dep/test_overlap_dnf.py
41 create mode 100644 pym/portage/tests/resolver/test_virtual_minimize_children.py
42
43 diff --git a/pym/portage/dep/_dnf.py b/pym/portage/dep/_dnf.py
44 new file mode 100644
45 index 000000000..baf229a18
46 --- /dev/null
47 +++ b/pym/portage/dep/_dnf.py
48 @@ -0,0 +1,91 @@
49 +# Copyright 2017 Gentoo Foundation
50 +# Distributed under the terms of the GNU General Public License v2
51 +
52 +from __future__ import unicode_literals
53 +
54 +import itertools
55 +
56 +
57 +def dnf_convert(dep_struct):
58 + """
59 + Convert dep_struct to disjunctive normal form (DNF), where dep_struct
60 + is either a conjunction or disjunction of the form produced by
61 + use_reduce(opconvert=True).
62 + """
63 + # Normalize input to have a top-level conjunction.
64 + if isinstance(dep_struct, list):
65 + if dep_struct and dep_struct[0] == '||':
66 + dep_struct = [dep_struct]
67 + else:
68 + dep_struct = [dep_struct]
69 +
70 + conjunction = []
71 + disjunctions = []
72 +
73 + for x in dep_struct:
74 + if isinstance (x, list):
75 + assert x, 'Normalization error, empty conjunction found in %s' % (dep_struct,)
76 + assert x[0] == '||', \
77 + 'Normalization error, nested conjunction found in %s' % (dep_struct,)
78 + if any(isinstance(element, list) for element in x):
79 + x_dnf = ['||']
80 + for element in x[1:]:
81 + if isinstance(element, list):
82 + # Due to normalization, a disjunction must not be
83 + # nested directly in another disjunction, so this
84 + # must be a conjunction.
85 + assert element, 'Normalization error, empty conjunction found in %s' % (x,)
86 + assert element[0] != '||', \
87 + 'Normalization error, nested disjunction found in %s' % (x,)
88 + element = dnf_convert(element)
89 + if contains_disjunction(element):
90 + assert (len(element) == 1 and
91 + element[0] and element[0][0] == '||'), \
92 + 'Normalization error, expected single disjunction in %s' % (element,)
93 + x_dnf.extend(element[0][1:])
94 + else:
95 + x_dnf.append(element)
96 + else:
97 + x_dnf.append(element)
98 + x = x_dnf
99 + disjunctions.append(x)
100 + else:
101 + conjunction.append(x)
102 +
103 + if disjunctions and (conjunction or len(disjunctions) > 1):
104 + dnf_form = ['||']
105 + for x in itertools.product(*[x[1:] for x in disjunctions]):
106 + normalized = conjunction[:]
107 + for element in x:
108 + if isinstance(element, list):
109 + normalized.extend(element)
110 + else:
111 + normalized.append(element)
112 + dnf_form.append(normalized)
113 + result = [dnf_form]
114 + else:
115 + result = conjunction + disjunctions
116 +
117 + return result
118 +
119 +
120 +def contains_disjunction(dep_struct):
121 + """
122 + Search for a disjunction contained in dep_struct, where dep_struct
123 + is either a conjunction or disjunction of the form produced by
124 + use_reduce(opconvert=True). If dep_struct is a disjunction, then
125 + this only returns True if there is a nested disjunction. Due to
126 + normalization, recursion is only needed when dep_struct is a
127 + disjunction containing a conjunction. If dep_struct is a conjunction,
128 + then it is assumed that normalization has elevated any nested
129 + disjunctions to the top-level.
130 + """
131 + is_disjunction = dep_struct and dep_struct[0] == '||'
132 + for x in dep_struct:
133 + if isinstance(x, list):
134 + assert x, 'Normalization error, empty conjunction found in %s' % (dep_struct,)
135 + if x[0] == '||':
136 + return True
137 + elif is_disjunction and contains_disjunction(x):
138 + return True
139 + return False
140 diff --git a/pym/portage/dep/dep_check.py b/pym/portage/dep/dep_check.py
141 index b33f7e5db..bd04c0391 100644
142 --- a/pym/portage/dep/dep_check.py
143 +++ b/pym/portage/dep/dep_check.py
144 @@ -6,14 +6,20 @@ from __future__ import unicode_literals
145 __all__ = ['dep_check', 'dep_eval', 'dep_wordreduce', 'dep_zapdeps']
146
147 import collections
148 +import itertools
149 import logging
150 import operator
151
152 import portage
153 from portage.dep import Atom, match_from_list, use_reduce
154 +from portage.dep._dnf import (
155 + dnf_convert as _dnf_convert,
156 + contains_disjunction as _contains_disjunction,
157 +)
158 from portage.exception import InvalidDependString, ParseError
159 from portage.localization import _
160 from portage.util import writemsg, writemsg_level
161 +from portage.util.digraph import digraph
162 from portage.util.SlotObject import SlotObject
163 from portage.versions import vercmp, _pkg_str
164
165 @@ -28,7 +34,11 @@ def _expand_new_virtuals(mysplit, edebug, mydbapi, mysettings, myroot="/",
166 atom because it wouldn't necessarily make sense to block all the components
167 of a compound virtual. When more than one new-style virtual is matched,
168 the matches are sorted from highest to lowest versions and the atom is
169 - expanded to || ( highest match ... lowest match )."""
170 + expanded to || ( highest match ... lowest match ).
171 +
172 + The result is normalized in the same way as use_reduce, having a top-level
173 + conjuction, and no redundant nested lists.
174 + """
175 newsplit = []
176 mytrees = trees[myroot]
177 portdb = mytrees["porttree"].dbapi
178 @@ -54,14 +64,31 @@ def _expand_new_virtuals(mysplit, edebug, mydbapi, mysettings, myroot="/",
179 portdb = trees[myroot]["bintree"].dbapi
180 pprovideddict = mysettings.pprovideddict
181 myuse = kwargs["myuse"]
182 + is_disjunction = mysplit and mysplit[0] == '||'
183 for x in mysplit:
184 if x == "||":
185 newsplit.append(x)
186 continue
187 elif isinstance(x, list):
188 - newsplit.append(_expand_new_virtuals(x, edebug, mydbapi,
189 + x_exp = _expand_new_virtuals(x, edebug, mydbapi,
190 mysettings, myroot=myroot, trees=trees, use_mask=use_mask,
191 - use_force=use_force, **kwargs))
192 + use_force=use_force, **kwargs)
193 + if is_disjunction:
194 + if len(x_exp) == 1:
195 + x = x_exp[0]
196 + if isinstance(x, list):
197 + # Due to normalization, a conjunction must not be
198 + # nested directly in another conjunction, so this
199 + # must be a disjunction.
200 + assert x and x[0] == '||', \
201 + 'Normalization error, nested conjunction found in %s' % (x_exp,)
202 + newsplit.extend(x[1:])
203 + else:
204 + newsplit.append(x)
205 + else:
206 + newsplit.append(x_exp)
207 + else:
208 + newsplit.extend(x_exp)
209 continue
210
211 if not isinstance(x, Atom):
212 @@ -101,6 +128,8 @@ def _expand_new_virtuals(mysplit, edebug, mydbapi, mysettings, myroot="/",
213 a.append(Atom(x.replace(x.cp, y.cp, 1)))
214 if not a:
215 newsplit.append(x)
216 + elif is_disjunction:
217 + newsplit.extend(a)
218 elif len(a) == 1:
219 newsplit.append(a[0])
220 else:
221 @@ -218,11 +247,18 @@ def _expand_new_virtuals(mysplit, edebug, mydbapi, mysettings, myroot="/",
222 newsplit.append(x)
223 if atom_graph is not None:
224 atom_graph.add((x, id(x)), graph_parent)
225 + elif is_disjunction:
226 + newsplit.extend(a)
227 elif len(a) == 1:
228 - newsplit.append(a[0])
229 + newsplit.extend(a[0])
230 else:
231 newsplit.append(['||'] + a)
232
233 + # For consistency with related functions like use_reduce, always
234 + # normalize the result to have a top-level conjunction.
235 + if is_disjunction:
236 + newsplit = [newsplit]
237 +
238 return newsplit
239
240 def dep_eval(deplist):
241 @@ -612,9 +648,9 @@ def dep_zapdeps(unreduced, reduced, myroot, use_binaries=0, trees=None):
242 for choices in choice_bins:
243 if len(choices) < 2:
244 continue
245 - # Prefer choices with all_installed_slots for bug #480736.
246 - choices.sort(key=operator.attrgetter('all_installed_slots'),
247 - reverse=True)
248 + # Prefer choices with all_installed_slots for bug #480736, and
249 + # choices with a smaller number of packages for bug #632026.
250 + choices.sort(key=lambda x: (not x.all_installed_slots, len(x.slot_map)))
251 for choice_1 in choices[1:]:
252 cps = set(choice_1.cp_map)
253 for choice_2 in choices:
254 @@ -741,6 +777,9 @@ def dep_check(depstring, mydbapi, mysettings, use="yes", mode=None, myuse=None,
255 except ParseError as e:
256 return [0, "%s" % (e,)]
257
258 + if mysettings.local_config: # if not repoman
259 + mysplit = _overlap_dnf(mysplit)
260 +
261 mysplit2 = dep_wordreduce(mysplit,
262 mysettings, mydbapi, mode, use_cache=use_cache)
263 if mysplit2 is None:
264 @@ -755,6 +794,82 @@ def dep_check(depstring, mydbapi, mysettings, use="yes", mode=None, myuse=None,
265
266 return [1, selected_atoms]
267
268 +
269 +def _overlap_dnf(dep_struct):
270 + """
271 + Combine overlapping || groups using disjunctive normal form (DNF), in
272 + order to minimize the number of packages chosen to satisfy cases like
273 + "|| ( foo bar ) || ( bar baz )" as in bug #632026. Non-overlapping
274 + groups are excluded from the conversion, since DNF leads to exponential
275 + explosion of the formula.
276 + """
277 + if not _contains_disjunction(dep_struct):
278 + return dep_struct
279 +
280 + # map atom.cp to disjunctions
281 + cp_map = collections.defaultdict(list)
282 + # graph atom.cp, with edges connecting atoms in the same disjunction
283 + overlap_graph = digraph()
284 + # map id(disjunction) to index in dep_struct, for deterministic output
285 + order_map = {}
286 + order_key = lambda x: order_map[id(x)]
287 + result = []
288 + for i, x in enumerate(dep_struct):
289 + if isinstance(x, list):
290 + assert x and x[0] == '||', \
291 + 'Normalization error, nested conjunction found in %s' % (dep_struct,)
292 + order_map[id(x)] = i
293 + prev_cp = None
294 + for atom in _iter_flatten(x):
295 + if isinstance(atom, Atom) and not atom.blocker:
296 + cp_map[atom.cp].append(x)
297 + overlap_graph.add(atom.cp, parent=prev_cp)
298 + prev_cp = atom.cp
299 + if prev_cp is None: # only contains blockers
300 + result.append(x)
301 + else:
302 + result.append(x)
303 +
304 + # group together disjunctions having atom.cp overlap
305 + traversed = set()
306 + for cp in overlap_graph:
307 + if cp in traversed:
308 + continue
309 + disjunctions = {}
310 + stack = [cp]
311 + while stack:
312 + cp = stack.pop()
313 + traversed.add(cp)
314 + for x in cp_map[cp]:
315 + disjunctions[id(x)] = x
316 + for other_cp in itertools.chain(overlap_graph.child_nodes(cp),
317 + overlap_graph.parent_nodes(cp)):
318 + if other_cp not in traversed:
319 + stack.append(other_cp)
320 +
321 + if len(disjunctions) > 1:
322 + # convert overlapping disjunctions to DNF
323 + result.extend(_dnf_convert(
324 + sorted(disjunctions.values(), key=order_key)))
325 + else:
326 + # pass through non-overlapping disjunctions
327 + result.append(disjunctions.popitem()[1])
328 +
329 + return result
330 +
331 +
332 +def _iter_flatten(dep_struct):
333 + """
334 + Yield nested elements of dep_struct.
335 + """
336 + for x in dep_struct:
337 + if isinstance(x, list):
338 + for x in _iter_flatten(x):
339 + yield x
340 + else:
341 + yield x
342 +
343 +
344 def dep_wordreduce(mydeplist,mysettings,mydbapi,mode,use_cache=1):
345 "Reduces the deplist to ones and zeros"
346 deplist=mydeplist[:]
347 diff --git a/pym/portage/tests/dep/test_dnf_convert.py b/pym/portage/tests/dep/test_dnf_convert.py
348 new file mode 100644
349 index 000000000..b92778d4a
350 --- /dev/null
351 +++ b/pym/portage/tests/dep/test_dnf_convert.py
352 @@ -0,0 +1,48 @@
353 +# Copyright 2017 Gentoo Foundation
354 +# Distributed under the terms of the GNU General Public License v2
355 +
356 +from portage.tests import TestCase
357 +from portage.dep import use_reduce
358 +from portage.dep._dnf import dnf_convert
359 +
360 +class DNFConvertTestCase(TestCase):
361 +
362 + def testDNFConvert(self):
363 +
364 + test_cases = (
365 + (
366 + '|| ( A B ) || ( C D )',
367 + [['||', ['A', 'C'], ['A', 'D'], ['B', 'C'], ['B', 'D']]],
368 + ),
369 + (
370 + '|| ( A B ) || ( B C )',
371 + [['||', ['A', 'B'], ['A', 'C'], ['B', 'B'], ['B', 'C']]],
372 + ),
373 + (
374 + '|| ( A ( B C D ) )',
375 + [['||', 'A', ['B', 'C', 'D']]],
376 + ),
377 + (
378 + '|| ( A ( B C D ) ) E',
379 + [['||', ['E', 'A'], ['E', 'B', 'C', 'D']]],
380 + ),
381 + (
382 + '|| ( A ( B C ) ) || ( D E ) F',
383 + [['||', ['F', 'A', 'D'], ['F', 'A', 'E'], ['F', 'B', 'C', 'D'], ['F', 'B', 'C', 'E']]],
384 + ),
385 + (
386 + '|| ( A ( B C || ( D E ) ) ( F G ) H )',
387 + [['||', 'A', ['B', 'C', 'D'], ['B', 'C', 'E'], ['F', 'G'], 'H']],
388 + ),
389 + (
390 + '|| ( A ( B C || ( D E ) ) F )',
391 + [['||', 'A', ['B', 'C', 'D'], ['B', 'C', 'E'], 'F']],
392 + ),
393 + (
394 + '|| ( A ( C || ( D E ) || ( F G ) ) H )',
395 + [['||', 'A', ['C', 'D', 'F'], ['C', 'D', 'G'], ['C', 'E', 'F'], ['C', 'E', 'G'], 'H']],
396 + ),
397 + )
398 +
399 + for dep_str, result in test_cases:
400 + self.assertEqual(dnf_convert(use_reduce(dep_str, opconvert=True)), result)
401 diff --git a/pym/portage/tests/dep/test_overlap_dnf.py b/pym/portage/tests/dep/test_overlap_dnf.py
402 new file mode 100644
403 index 000000000..79b3e8e46
404 --- /dev/null
405 +++ b/pym/portage/tests/dep/test_overlap_dnf.py
406 @@ -0,0 +1,28 @@
407 +# Copyright 2017 Gentoo Foundation
408 +# Distributed under the terms of the GNU General Public License v2
409 +
410 +from portage.tests import TestCase
411 +from portage.dep import Atom, use_reduce
412 +from portage.dep.dep_check import _overlap_dnf
413 +
414 +class OverlapDNFTestCase(TestCase):
415 +
416 + def testOverlapDNF(self):
417 +
418 + test_cases = (
419 + (
420 + '|| ( cat/A cat/B ) cat/E || ( cat/C cat/D )',
421 + ['cat/E', ['||', 'cat/A', 'cat/B'], ['||', 'cat/C', 'cat/D']],
422 + ),
423 + (
424 + '|| ( cat/A cat/B ) cat/D || ( cat/B cat/C )',
425 + ['cat/D', ['||', ['cat/A', 'cat/B'], ['cat/A', 'cat/C'], ['cat/B', 'cat/B'], ['cat/B', 'cat/C']]],
426 + ),
427 + (
428 + '|| ( cat/A cat/B ) || ( cat/C cat/D ) || ( ( cat/B cat/E ) cat/F )',
429 + [['||', ['cat/A', 'cat/B', 'cat/E'], ['cat/A', 'cat/F'], ['cat/B', 'cat/B', 'cat/E'], ['cat/B', 'cat/F']], ['||', 'cat/C', 'cat/D']],
430 + ),
431 + )
432 +
433 + for dep_str, result in test_cases:
434 + self.assertEqual(_overlap_dnf(use_reduce(dep_str, token_class=Atom, opconvert=True)), result)
435 diff --git a/pym/portage/tests/resolver/test_virtual_minimize_children.py b/pym/portage/tests/resolver/test_virtual_minimize_children.py
436 new file mode 100644
437 index 000000000..83ae34e77
438 --- /dev/null
439 +++ b/pym/portage/tests/resolver/test_virtual_minimize_children.py
440 @@ -0,0 +1,145 @@
441 +# Copyright 2017 Gentoo Foundation
442 +# Distributed under the terms of the GNU General Public License v2
443 +
444 +from portage.tests import TestCase
445 +from portage.tests.resolver.ResolverPlayground import (
446 + ResolverPlayground,
447 + ResolverPlaygroundTestCase,
448 +)
449 +
450 +class VirtualMinimizeChildrenTestCase(TestCase):
451 +
452 + def testVirtualMinimizeChildren(self):
453 + ebuilds = {
454 + 'app-misc/bar-1': {
455 + 'EAPI': '6',
456 + 'RDEPEND': 'virtual/foo'
457 + },
458 + 'virtual/foo-1': {
459 + 'EAPI': '6',
460 + 'RDEPEND': '|| ( app-misc/A app-misc/B ) || ( app-misc/B app-misc/C )'
461 + },
462 + 'app-misc/A-1': {
463 + 'EAPI': '6',
464 + },
465 + 'app-misc/B-1': {
466 + 'EAPI': '6',
467 + },
468 + 'app-misc/C-1': {
469 + 'EAPI': '6',
470 + },
471 + }
472 +
473 + test_cases = (
474 + # Test bug 632026, where we want to minimize the number of
475 + # packages chosen to satisfy overlapping || deps like
476 + # "|| ( foo bar ) || ( bar baz )".
477 + ResolverPlaygroundTestCase(
478 + ['app-misc/bar'],
479 + success=True,
480 + mergelist=[
481 + 'app-misc/B-1',
482 + 'virtual/foo-1',
483 + 'app-misc/bar-1',
484 + ],
485 + ),
486 + )
487 +
488 + playground = ResolverPlayground(debug=False,
489 + ebuilds=ebuilds)
490 +
491 + try:
492 + for test_case in test_cases:
493 + playground.run_TestCase(test_case)
494 + self.assertEqual(test_case.test_success, True,
495 + test_case.fail_msg)
496 + finally:
497 + playground.debug = False
498 + playground.cleanup()
499 +
500 + # If app-misc/A and app-misc/C are installed then
501 + # that choice should be preferred over app-misc/B.
502 + installed = {
503 + 'app-misc/A-1': {
504 + 'EAPI': '6',
505 + },
506 + 'app-misc/C-1': {
507 + 'EAPI': '6',
508 + },
509 + }
510 +
511 + test_cases = (
512 + ResolverPlaygroundTestCase(
513 + ['app-misc/bar'],
514 + success=True,
515 + mergelist=[
516 + 'virtual/foo-1',
517 + 'app-misc/bar-1',
518 + ],
519 + ),
520 + )
521 +
522 + playground = ResolverPlayground(debug=False,
523 + ebuilds=ebuilds, installed=installed)
524 +
525 + try:
526 + for test_case in test_cases:
527 + playground.run_TestCase(test_case)
528 + self.assertEqual(test_case.test_success, True,
529 + test_case.fail_msg)
530 + finally:
531 + playground.debug = False
532 + playground.cleanup()
533 +
534 + def testOverlapSlotConflict(self):
535 + ebuilds = {
536 + 'app-misc/bar-1': {
537 + 'EAPI': '6',
538 + 'RDEPEND': 'virtual/foo'
539 + },
540 + 'virtual/foo-1': {
541 + 'EAPI': '6',
542 + 'RDEPEND': '|| ( app-misc/A >=app-misc/B-2 ) || ( <app-misc/B-2 app-misc/C )'
543 + },
544 + 'app-misc/A-1': {
545 + 'EAPI': '6',
546 + },
547 + 'app-misc/B-2': {
548 + 'EAPI': '6',
549 + },
550 + 'app-misc/B-1': {
551 + 'EAPI': '6',
552 + },
553 + 'app-misc/C-1': {
554 + 'EAPI': '6',
555 + },
556 + }
557 +
558 + test_cases = (
559 + # Here the ( >=app-misc/B-2 <app-misc/B-2 ) choice is not satisfiable.
560 + ResolverPlaygroundTestCase(
561 + ['app-misc/bar'],
562 + success=True,
563 + ambiguous_merge_order=True,
564 + mergelist=[
565 + (
566 + 'app-misc/C-1',
567 + 'app-misc/A-1',
568 + ),
569 + 'virtual/foo-1',
570 + 'app-misc/bar-1',
571 + ]
572 + ),
573 + )
574 +
575 + playground = ResolverPlayground(debug=False,
576 + ebuilds=ebuilds)
577 +
578 + try:
579 + for test_case in test_cases:
580 + playground.run_TestCase(test_case)
581 + self.assertEqual(test_case.test_success, True,
582 + test_case.fail_msg)
583 + finally:
584 + playground.debug = False
585 + playground.cleanup()
586 --
587 2.13.5