Gentoo Archives: gentoo-portage-dev

From: Alec Warner <antarus@g.o>
To: gentoo-portage-dev@l.g.o
Cc: Zac Medico <zmedico@g.o>
Subject: Re: [gentoo-portage-dev] [PATCH] dep_check: use DNF to optimize overlapping virtual || deps (bug 632026)
Date: Mon, 13 Nov 2017 21:11:49
Message-Id: CAAr7Pr9T-4frqE3wK7534hes+aU6z4inCsk44ngn20iX75588w@mail.gmail.com
In Reply to: [gentoo-portage-dev] [PATCH] dep_check: use DNF to optimize overlapping virtual || deps (bug 632026) by Zac Medico
1 On Sun, Nov 12, 2017 at 6:48 AM, Zac Medico <zmedico@g.o> wrote:
2
3 > Deps like these:
4 >
5 > || ( foo bar ) || ( bar baz )
6 >
7 > Translate to disjunctive normal form (DNF):
8 >
9 > || ( ( foo bar ) ( foo baz ) ( bar bar ) ( bar baz ) )
10 >
11 > Using DNF, if none of the packages are currently installed,
12 > then the ( bar bar ) choice will be automatically preferred
13 > since it is satisfied by the fewest number of packages.
14 > If the ( foo baz ) choice is already satisfied, then that
15 > choice will be preferred instead.
16 >
17 > Since DNF results in exponential explosion of the formula,
18 > only use DNF for the parts of the dependencies that have
19 > overlapping atoms.
20 >
21 > In order to simplify the implementation of the dnf_convert
22 > function, this patch also fixes _expand_new_virtuals to
23 > normalize results in the same way as use_reduce (with no
24 > redundant nested lists).
25 >
26 > Bug: https://bugs.gentoo.org/632026
27 > ---
28 > pym/portage/dep/_dnf.py | 97 ++++++++++++++++
29 > pym/portage/dep/dep_check.py | 127
30 > +++++++++++++++++++--
31 > pym/portage/tests/dep/test_dnf_convert.py | 48 ++++++++
32 > pym/portage/tests/dep/test_overlap_dnf.py | 28 +++++
33 > .../resolver/test_virtual_minimize_children.py | 92 +++++++++++++++
34 > 5 files changed, 385 insertions(+), 7 deletions(-)
35 > create mode 100644 pym/portage/dep/_dnf.py
36 > create mode 100644 pym/portage/tests/dep/test_dnf_convert.py
37 > create mode 100644 pym/portage/tests/dep/test_overlap_dnf.py
38 > create mode 100644 pym/portage/tests/resolver/test_virtual_minimize_
39 > children.py
40 >
41 > diff --git a/pym/portage/dep/_dnf.py b/pym/portage/dep/_dnf.py
42 > new file mode 100644
43 > index 000000000..503b209c2
44 > --- /dev/null
45 > +++ b/pym/portage/dep/_dnf.py
46 > @@ -0,0 +1,97 @@
47 > +# Copyright 2017 Gentoo Foundation
48 > +# Distributed under the terms of the GNU General Public License v2
49 > +
50 > +from __future__ import unicode_literals
51 > +
52 > +import itertools
53 > +
54 > +
55 > +def dnf_convert(dep_struct):
56 > + """
57 > + Convert dep_struct to disjunctive normal form (DNF), where
58 > dep_struct
59 > + is either a conjunction or disjunction of the form produced by
60 > + use_reduce(opconvert=True).
61 > + """
62 > + # Normalize input to have a top-level conjunction.
63 > + if isinstance(dep_struct, list):
64 > + if dep_struct and dep_struct[0] == '||':
65 > + dep_struct = [dep_struct]
66 > + else:
67 > + dep_struct = [dep_struct]
68 > +
69 > + conjunction = []
70 > + disjunctions = []
71 > +
72 > + for x in dep_struct:
73 > + if isinstance (x, list):
74 > + assert x
75 >
76
77
78 I'm not a huge fan of asserts, but if we use them can we use them in the
79 expr, message form?
80
81 assert x, "Assertion failed, wanted x in list form in dep: %s" % dep_struct
82
83 or whatever.
84
85
86 > + if x[0] == '||':
87 > + if any(isinstance(element, list) for
88 > element in x):
89 > + x_dnf = ['||']
90 > + for element in x[1:]:
91 > + if isinstance(element,
92 > list):
93 > + # Due to
94 > normalization, a disjunction must not be
95 > + # nested directly
96 > in another disjunction, so this
97 > + # must be a
98 > conjunction.
99 > + assert element and
100 > element[0] != '||'
101 > + element =
102 > dnf_convert(element)
103 > + if
104 > contains_disjunction(element):
105 > + assert
106 > (len(element) == 1 and
107 > +
108 > element[0] and element[0][0] == '||')
109 > +
110 > x_dnf.extend(element[0][1:])
111 > + else:
112 > +
113 > x_dnf.append(element)
114 > + else:
115 > +
116 > x_dnf.append(element)
117 > + x = x_dnf
118 > + disjunctions.append(x)
119 > + else:
120 > + for x in dnf_convert(x):
121 > + if isinstance (x, list):
122 > + # Due to normalization, a
123 > conjunction must not be
124 > + # nested directly in
125 > another conjunction, so this
126 > + # must be a disjunction.
127 > + assert x and x[0] == '||'
128 > + disjunctions.append(x)
129 > + else:
130 > + conjunction.append(x)
131 > + else:
132 > + conjunction.append(x)
133 > +
134 > + if disjunctions and (conjunction or len(disjunctions) > 1):
135 > + dnf_form = ['||']
136 > + for x in itertools.product(*[x[1:] for x in disjunctions]):
137 > + normalized = conjunction[:]
138 > + for element in x:
139 > + if isinstance(element, list):
140 > + normalized.extend(element)
141 > + else:
142 > + normalized.append(element)
143 > + dnf_form.append(normalized)
144 > + result = [dnf_form]
145 > + else:
146 > + result = conjunction + disjunctions
147 > +
148 > + return result
149 > +
150 > +
151 > +def contains_disjunction(dep_struct):
152 > + """
153 > + Search for a disjunction contained in dep_struct, where dep_struct
154 > + is either a conjunction or disjunction of the form produced by
155 > + use_reduce(opconvert=True). If dep_struct is a disjunction, then
156 > + this only returns True if there is a nested disjunction. Due to
157 > + normalization, recursion is only needed when dep_struct is a
158 > + disjunction containing a conjunction. If dep_struct is a
159 > conjunction,
160 > + then it is assumed that normalization has elevated any nested
161 > + disjunctions to the top-level.
162 > + """
163 > + is_disjunction = dep_struct and dep_struct[0] == '||'
164 > + for x in dep_struct:
165 > + if isinstance(x, list):
166 > + assert x
167 > + if x[0] == '||':
168 > + return True
169 > + elif is_disjunction and contains_disjunction(x):
170 > + return True
171 > + return False
172 > diff --git a/pym/portage/dep/dep_check.py b/pym/portage/dep/dep_check.py
173 > index b33f7e5db..6db4cc42e 100644
174 > --- a/pym/portage/dep/dep_check.py
175 > +++ b/pym/portage/dep/dep_check.py
176 > @@ -6,14 +6,20 @@ from __future__ import unicode_literals
177 > __all__ = ['dep_check', 'dep_eval', 'dep_wordreduce', 'dep_zapdeps']
178 >
179 > import collections
180 > +import itertools
181 > import logging
182 > import operator
183 >
184 > import portage
185 > from portage.dep import Atom, match_from_list, use_reduce
186 > +from portage.dep._dnf import (
187 > + dnf_convert as _dnf_convert,
188 > + contains_disjunction as _contains_disjunction,
189 > +)
190 > from portage.exception import InvalidDependString, ParseError
191 > from portage.localization import _
192 > from portage.util import writemsg, writemsg_level
193 > +from portage.util.digraph import digraph
194 > from portage.util.SlotObject import SlotObject
195 > from portage.versions import vercmp, _pkg_str
196 >
197 > @@ -28,7 +34,11 @@ def _expand_new_virtuals(mysplit, edebug, mydbapi,
198 > mysettings, myroot="/",
199 > atom because it wouldn't necessarily make sense to block all the
200 > components
201 > of a compound virtual. When more than one new-style virtual is
202 > matched,
203 > the matches are sorted from highest to lowest versions and the
204 > atom is
205 > - expanded to || ( highest match ... lowest match )."""
206 > + expanded to || ( highest match ... lowest match ).
207 > +
208 > + The result is normalized in the same way as use_reduce, having a
209 > top-level
210 > + conjuction, and no redundant nested lists.
211 > + """
212 > newsplit = []
213 > mytrees = trees[myroot]
214 > portdb = mytrees["porttree"].dbapi
215 > @@ -54,14 +64,30 @@ def _expand_new_virtuals(mysplit, edebug, mydbapi,
216 > mysettings, myroot="/",
217 > portdb = trees[myroot]["bintree"].dbapi
218 > pprovideddict = mysettings.pprovideddict
219 > myuse = kwargs["myuse"]
220 > + is_disjunction = mysplit and mysplit[0] == '||'
221 > for x in mysplit:
222 > if x == "||":
223 > newsplit.append(x)
224 > continue
225 > elif isinstance(x, list):
226 > - newsplit.append(_expand_new_virtuals(x, edebug,
227 > mydbapi,
228 > + x = _expand_new_virtuals(x, edebug, mydbapi,
229 > mysettings, myroot=myroot, trees=trees,
230 > use_mask=use_mask,
231 > - use_force=use_force, **kwargs))
232 > + use_force=use_force, **kwargs)
233 > + if is_disjunction:
234 > + if len(x) == 1:
235 > + x = x[0]
236 > + if isinstance(x, list):
237 > + # Due to normalization, a
238 > conjunction must not be
239 > + # nested directly in
240 > another conjunction, so this
241 > + # must be a disjunction.
242 > + assert x and x[0] == '||'
243 > + newsplit.extend(x[1:])
244 > + else:
245 > + newsplit.append(x)
246 > + else:
247 > + newsplit.append(x)
248 > + else:
249 > + newsplit.extend(x)
250 > continue
251 >
252 > if not isinstance(x, Atom):
253 > @@ -101,6 +127,8 @@ def _expand_new_virtuals(mysplit, edebug, mydbapi,
254 > mysettings, myroot="/",
255 > a.append(Atom(x.replace(x.cp,
256 > y.cp, 1)))
257 > if not a:
258 > newsplit.append(x)
259 > + elif is_disjunction:
260 > + newsplit.extend(a)
261 > elif len(a) == 1:
262 > newsplit.append(a[0])
263 > else:
264 > @@ -218,11 +246,18 @@ def _expand_new_virtuals(mysplit, edebug, mydbapi,
265 > mysettings, myroot="/",
266 > newsplit.append(x)
267 > if atom_graph is not None:
268 > atom_graph.add((x, id(x)), graph_parent)
269 > + elif is_disjunction:
270 > + newsplit.extend(a)
271 > elif len(a) == 1:
272 > - newsplit.append(a[0])
273 > + newsplit.extend(a[0])
274 > else:
275 > newsplit.append(['||'] + a)
276 >
277 > + # For consistency with related functions like use_reduce, always
278 > + # normalize the result to have a top-level conjunction.
279 > + if is_disjunction:
280 > + newsplit = [newsplit]
281 > +
282 > return newsplit
283 >
284 > def dep_eval(deplist):
285 > @@ -612,9 +647,9 @@ def dep_zapdeps(unreduced, reduced, myroot,
286 > use_binaries=0, trees=None):
287 > for choices in choice_bins:
288 > if len(choices) < 2:
289 > continue
290 > - # Prefer choices with all_installed_slots for bug #480736.
291 > - choices.sort(key=operator.attrgetter('all_installed_
292 > slots'),
293 > - reverse=True)
294 > + # Prefer choices with all_installed_slots for bug #480736,
295 > and
296 > + # choices with a smaller number of packages for bug
297 > #632026.
298 > + choices.sort(key=lambda x: (not x.all_installed_slots,
299 > len(x.slot_map)))
300 > for choice_1 in choices[1:]:
301 > cps = set(choice_1.cp_map)
302 > for choice_2 in choices:
303 > @@ -741,6 +776,9 @@ def dep_check(depstring, mydbapi, mysettings,
304 > use="yes", mode=None, myuse=None,
305 > except ParseError as e:
306 > return [0, "%s" % (e,)]
307 >
308 > + if mysettings.local_config: # if not repoman
309 > + mysplit = _overlap_dnf(mysplit)
310 > +
311 > mysplit2 = dep_wordreduce(mysplit,
312 > mysettings, mydbapi, mode, use_cache=use_cache)
313 > if mysplit2 is None:
314 > @@ -755,6 +793,81 @@ def dep_check(depstring, mydbapi, mysettings,
315 > use="yes", mode=None, myuse=None,
316 >
317 > return [1, selected_atoms]
318 >
319 > +
320 > +def _overlap_dnf(dep_struct):
321 > + """
322 > + Combine overlapping || groups using disjunctive normal form (DNF),
323 > in
324 > + order to minimize the number of packages chosen to satisfy cases
325 > like
326 > + "|| ( foo bar ) || ( bar baz )" as in bug #632026. Non-overlapping
327 > + groups are excluded from the conversion, since DNF leads to
328 > exponential
329 > + explosion of the formula.
330 > + """
331 > + if not _contains_disjunction(dep_struct):
332 > + return dep_struct
333 > +
334 > + # map atom.cp to disjunctions
335 > + cp_map = collections.defaultdict(list)
336 > + # graph atom.cp, with edges connecting atoms in the same
337 > disjunction
338 > + overlap_graph = digraph()
339 > + # map id(disjunction) to index in dep_struct, for deterministic
340 > output
341 > + order_map = {}
342 > + order_key = lambda x: order_map[id(x)]
343 > + result = []
344 > + for i, x in enumerate(dep_struct):
345 > + if isinstance(x, list):
346 > + assert x and x[0] == '||'
347 > + order_map[id(x)] = i
348 > + prev_cp = None
349 > + for atom in _iter_flatten(x):
350 > + if isinstance(atom, Atom) and not
351 > atom.blocker:
352 > + cp_map[atom.cp].append(x)
353 > + overlap_graph.add(atom.cp,
354 > parent=prev_cp)
355 > + prev_cp = atom.cp
356 > + if prev_cp is None: # only contains blockers
357 > + result.append(x)
358 > + else:
359 > + result.append(x)
360 > +
361 > + # group together disjunctions having atom.cp overlap
362 > + traversed = set()
363 > + for cp in overlap_graph:
364 > + if cp in traversed:
365 > + continue
366 > + disjunctions = {}
367 > + stack = [cp]
368 > + while stack:
369 > + cp = stack.pop()
370 > + traversed.add(cp)
371 > + for x in cp_map[cp]:
372 > + disjunctions[id(x)] = x
373 > + for other_cp in itertools.chain(overlap_graph.
374 > child_nodes(cp),
375 > + overlap_graph.parent_nodes(cp)):
376 > + if other_cp not in traversed:
377 > + stack.append(other_cp)
378 > +
379 > + if len(disjunctions) > 1:
380 > + # convert overlapping disjunctions to DNF
381 > + result.extend(_dnf_convert(
382 > + sorted(disjunctions.values(),
383 > key=order_key)))
384 > + else:
385 > + # pass through non-overlapping disjunctions
386 > + result.append(disjunctions.popitem()[1])
387 > +
388 > + return result
389 > +
390 > +
391 > +def _iter_flatten(dep_struct):
392 > + """
393 > + Yield nested elements of dep_struct.
394 > + """
395 > + for x in dep_struct:
396 > + if isinstance(x, list):
397 > + for x in _iter_flatten(x):
398 > + yield x
399 > + else:
400 > + yield x
401 > +
402 > +
403 > def dep_wordreduce(mydeplist,mysettings,mydbapi,mode,use_cache=1):
404 > "Reduces the deplist to ones and zeros"
405 > deplist=mydeplist[:]
406 > diff --git a/pym/portage/tests/dep/test_dnf_convert.py
407 > b/pym/portage/tests/dep/test_dnf_convert.py
408 > new file mode 100644
409 > index 000000000..b92778d4a
410 > --- /dev/null
411 > +++ b/pym/portage/tests/dep/test_dnf_convert.py
412 > @@ -0,0 +1,48 @@
413 > +# Copyright 2017 Gentoo Foundation
414 > +# Distributed under the terms of the GNU General Public License v2
415 > +
416 > +from portage.tests import TestCase
417 > +from portage.dep import use_reduce
418 > +from portage.dep._dnf import dnf_convert
419 > +
420 > +class DNFConvertTestCase(TestCase):
421 > +
422 > + def testDNFConvert(self):
423 > +
424 > + test_cases = (
425 > + (
426 > + '|| ( A B ) || ( C D )',
427 > + [['||', ['A', 'C'], ['A', 'D'], ['B',
428 > 'C'], ['B', 'D']]],
429 > + ),
430 > + (
431 > + '|| ( A B ) || ( B C )',
432 > + [['||', ['A', 'B'], ['A', 'C'], ['B',
433 > 'B'], ['B', 'C']]],
434 > + ),
435 > + (
436 > + '|| ( A ( B C D ) )',
437 > + [['||', 'A', ['B', 'C', 'D']]],
438 > + ),
439 > + (
440 > + '|| ( A ( B C D ) ) E',
441 > + [['||', ['E', 'A'], ['E', 'B', 'C', 'D']]],
442 > + ),
443 > + (
444 > + '|| ( A ( B C ) ) || ( D E ) F',
445 > + [['||', ['F', 'A', 'D'], ['F', 'A', 'E'],
446 > ['F', 'B', 'C', 'D'], ['F', 'B', 'C', 'E']]],
447 > + ),
448 > + (
449 > + '|| ( A ( B C || ( D E ) ) ( F G ) H )',
450 > + [['||', 'A', ['B', 'C', 'D'], ['B', 'C',
451 > 'E'], ['F', 'G'], 'H']],
452 > + ),
453 > + (
454 > + '|| ( A ( B C || ( D E ) ) F )',
455 > + [['||', 'A', ['B', 'C', 'D'], ['B', 'C',
456 > 'E'], 'F']],
457 > + ),
458 > + (
459 > + '|| ( A ( C || ( D E ) || ( F G ) ) H )',
460 > + [['||', 'A', ['C', 'D', 'F'], ['C', 'D',
461 > 'G'], ['C', 'E', 'F'], ['C', 'E', 'G'], 'H']],
462 > + ),
463 > + )
464 > +
465 > + for dep_str, result in test_cases:
466 > + self.assertEqual(dnf_convert(use_reduce(dep_str,
467 > opconvert=True)), result)
468 > diff --git a/pym/portage/tests/dep/test_overlap_dnf.py
469 > b/pym/portage/tests/dep/test_overlap_dnf.py
470 > new file mode 100644
471 > index 000000000..79b3e8e46
472 > --- /dev/null
473 > +++ b/pym/portage/tests/dep/test_overlap_dnf.py
474 > @@ -0,0 +1,28 @@
475 > +# Copyright 2017 Gentoo Foundation
476 > +# Distributed under the terms of the GNU General Public License v2
477 > +
478 > +from portage.tests import TestCase
479 > +from portage.dep import Atom, use_reduce
480 > +from portage.dep.dep_check import _overlap_dnf
481 > +
482 > +class OverlapDNFTestCase(TestCase):
483 > +
484 > + def testOverlapDNF(self):
485 > +
486 > + test_cases = (
487 > + (
488 > + '|| ( cat/A cat/B ) cat/E || ( cat/C cat/D
489 > )',
490 > + ['cat/E', ['||', 'cat/A', 'cat/B'], ['||',
491 > 'cat/C', 'cat/D']],
492 > + ),
493 > + (
494 > + '|| ( cat/A cat/B ) cat/D || ( cat/B cat/C
495 > )',
496 > + ['cat/D', ['||', ['cat/A', 'cat/B'],
497 > ['cat/A', 'cat/C'], ['cat/B', 'cat/B'], ['cat/B', 'cat/C']]],
498 > + ),
499 > + (
500 > + '|| ( cat/A cat/B ) || ( cat/C cat/D ) ||
501 > ( ( cat/B cat/E ) cat/F )',
502 > + [['||', ['cat/A', 'cat/B', 'cat/E'],
503 > ['cat/A', 'cat/F'], ['cat/B', 'cat/B', 'cat/E'], ['cat/B', 'cat/F']],
504 > ['||', 'cat/C', 'cat/D']],
505 > + ),
506 > + )
507 > +
508 > + for dep_str, result in test_cases:
509 > + self.assertEqual(_overlap_dnf(use_reduce(dep_str,
510 > token_class=Atom, opconvert=True)), result)
511 > diff --git a/pym/portage/tests/resolver/test_virtual_minimize_children.py
512 > b/pym/portage/tests/resolver/test_virtual_minimize_children.py
513 > new file mode 100644
514 > index 000000000..35b881bfb
515 > --- /dev/null
516 > +++ b/pym/portage/tests/resolver/test_virtual_minimize_children.py
517 > @@ -0,0 +1,92 @@
518 > +# Copyright 2017 Gentoo Foundation
519 > +# Distributed under the terms of the GNU General Public License v2
520 > +
521 > +from portage.tests import TestCase
522 > +from portage.tests.resolver.ResolverPlayground import (
523 > + ResolverPlayground,
524 > + ResolverPlaygroundTestCase,
525 > +)
526 > +
527 > +class VirtualMinimizeChildrenTestCase(TestCase):
528 > +
529 > + def testVirtualMinimizeChildren(self):
530 > + ebuilds = {
531 > + 'app-misc/bar-1': {
532 > + 'EAPI': '6',
533 > + 'RDEPEND': 'virtual/foo'
534 > + },
535 > + 'virtual/foo-1': {
536 > + 'EAPI': '6',
537 > + 'RDEPEND': '|| ( app-misc/A app-misc/B )
538 > || ( app-misc/B app-misc/C )'
539 > + },
540 > + 'app-misc/A-1': {
541 > + 'EAPI': '6',
542 > + },
543 > + 'app-misc/B-1': {
544 > + 'EAPI': '6',
545 > + },
546 > + 'app-misc/C-1': {
547 > + 'EAPI': '6',
548 > + },
549 > + }
550 > +
551 > + test_cases = (
552 > + # Test bug 632026, where we want to minimize the
553 > number of
554 > + # packages chosen to satisfy overlapping || deps
555 > like
556 > + # "|| ( foo bar ) || ( bar baz )".
557 > + ResolverPlaygroundTestCase(
558 > + ['app-misc/bar'],
559 > + success=True,
560 > + mergelist=[
561 > + 'app-misc/B-1',
562 > + 'virtual/foo-1',
563 > + 'app-misc/bar-1',
564 > + ],
565 > + ),
566 > + )
567 > +
568 > + playground = ResolverPlayground(debug=False,
569 > + ebuilds=ebuilds)
570 > +
571 > + try:
572 > + for test_case in test_cases:
573 > + playground.run_TestCase(test_case)
574 > + self.assertEqual(test_case.test_success,
575 > True,
576 > + test_case.fail_msg)
577 > + finally:
578 > + playground.debug = False
579 > + playground.cleanup()
580 > +
581 > + # If app-misc/A and app-misc/C are installed then
582 > + # that choice should be preferred over app-misc/B.
583 > + installed = {
584 > + 'app-misc/A-1': {
585 > + 'EAPI': '6',
586 > + },
587 > + 'app-misc/C-1': {
588 > + 'EAPI': '6',
589 > + },
590 > + }
591 > +
592 > + test_cases = (
593 > + ResolverPlaygroundTestCase(
594 > + ['app-misc/bar'],
595 > + success=True,
596 > + mergelist=[
597 > + 'virtual/foo-1',
598 > + 'app-misc/bar-1',
599 > + ],
600 > + ),
601 > + )
602 > +
603 > + playground = ResolverPlayground(debug=False,
604 > + ebuilds=ebuilds, installed=installed)
605 > +
606 > + try:
607 > + for test_case in test_cases:
608 > + playground.run_TestCase(test_case)
609 > + self.assertEqual(test_case.test_success,
610 > True,
611 > + test_case.fail_msg)
612 > + finally:
613 > + playground.debug = False
614 > + playground.cleanup()
615 > --
616 > 2.13.5
617 >
618 >
619 >

Replies