Gentoo Archives: gentoo-portage-dev

From: Zac Medico <zmedico@g.o>
To: gentoo-portage-dev@l.g.o
Cc: Zac Medico <zmedico@g.o>
Subject: [gentoo-portage-dev] [PATCH] repoman: Add --jobs and --load-average options (bug 448462)
Date: Mon, 17 Aug 2020 03:30:53
Message-Id: 20200817032656.287222-1-zmedico@gentoo.org
1 Add --jobs and --load-average options which allow dependency checks
2 for multiple profiles to run in parallel. The increase in performance
3 is non-linear for the number of jobs, but it can be worthwhile
4 (I measured a 35% decrease in time when running 'repoman -j8 full'
5 on sys-apps/portage). For the -j1 case (default), all dependency
6 checks run in the main process as usual, so there is no significant
7 performance penalty for the default case.
8
9 Bug: https://bugs.gentoo.org/448462
10 Signed-off-by: Zac Medico <zmedico@g.o>
11 ---
12 repoman/lib/repoman/argparser.py | 9 ++
13 .../repoman/modules/scan/depend/profile.py | 117 +++++++++++++++---
14 repoman/man/repoman.1 | 9 +-
15 3 files changed, 116 insertions(+), 19 deletions(-)
16
17 diff --git a/repoman/lib/repoman/argparser.py b/repoman/lib/repoman/argparser.py
18 index 670a0e91d..6d545ccca 100644
19 --- a/repoman/lib/repoman/argparser.py
20 +++ b/repoman/lib/repoman/argparser.py
21 @@ -199,6 +199,15 @@ def parse_args(argv, repoman_default_opts):
22 '--output-style', dest='output_style', choices=output_keys,
23 help='select output type', default='default')
24
25 + parser.add_argument(
26 + '-j', '--jobs', dest='jobs', action='store', type=int, default=1,
27 + help='Specifies the number of jobs (processes) to run simultaneously.')
28 +
29 + parser.add_argument(
30 + '-l', '--load-average', dest='load_average', action='store', type=float, default=None,
31 + help='Specifies that no new jobs (processes) should be started if there are others '
32 + 'jobs running and the load average is at least load (a floating-point number).')
33 +
34 parser.add_argument(
35 '--mode', dest='mode', choices=mode_keys,
36 help='specify which mode repoman will run in (default=full)')
37 diff --git a/repoman/lib/repoman/modules/scan/depend/profile.py b/repoman/lib/repoman/modules/scan/depend/profile.py
38 index 39d8b550c..1eb69422a 100644
39 --- a/repoman/lib/repoman/modules/scan/depend/profile.py
40 +++ b/repoman/lib/repoman/modules/scan/depend/profile.py
41 @@ -2,7 +2,9 @@
42
43
44 import copy
45 +import functools
46 import os
47 +import types
48 from pprint import pformat
49
50 from _emerge.Package import Package
51 @@ -15,6 +17,10 @@ from repoman.modules.scan.depend._gen_arches import _gen_arches
52 from portage.dep import Atom
53 from portage.package.ebuild.profile_iuse import iter_iuse_vars
54 from portage.util import getconfig
55 +from portage.util.futures import asyncio
56 +from portage.util.futures.compat_coroutine import coroutine, coroutine_return
57 +from portage.util.futures.executor.fork import ForkExecutor
58 +from portage.util.futures.iter_completed import async_iter_completed
59
60
61 def sort_key(item):
62 @@ -58,16 +64,14 @@ class ProfileDependsChecks(ScanBase):
63 def check(self, **kwargs):
64 '''Perform profile dependant dependency checks
65
66 - @param arches:
67 @param pkg: Package in which we check (object).
68 @param ebuild: Ebuild which we check (object).
69 - @param baddepsyntax: boolean
70 - @param unknown_pkgs: set of tuples (type, atom.unevaluated_atom)
71 @returns: dictionary
72 '''
73 ebuild = kwargs.get('ebuild').get()
74 pkg = kwargs.get('pkg').get()
75 - unknown_pkgs, baddepsyntax = _depend_checks(
76 +
77 + ebuild.unknown_pkgs, ebuild.baddepsyntax = _depend_checks(
78 ebuild, pkg, self.portdb, self.qatracker, self.repo_metadata,
79 self.repo_settings.qadata)
80
81 @@ -90,8 +94,64 @@ class ProfileDependsChecks(ScanBase):
82 relevant_profiles.append((keyword, groups, prof))
83
84 relevant_profiles.sort(key=sort_key)
85 + ebuild.relevant_profiles = relevant_profiles
86 +
87 + if self.options.jobs <= 1:
88 + for task in self._iter_tasks(None, None, ebuild, pkg):
89 + task, results = task
90 + for result in results:
91 + self._check_result(task, result)
92 +
93 + loop = asyncio._wrap_loop()
94 + loop.run_until_complete(self._async_check(loop=loop, **kwargs))
95 +
96 + return False
97 +
98 + @coroutine
99 + def _async_check(self, loop=None, **kwargs):
100 + '''Perform async profile dependant dependency checks
101 +
102 + @param arches:
103 + @param pkg: Package in which we check (object).
104 + @param ebuild: Ebuild which we check (object).
105 + @param baddepsyntax: boolean
106 + @param unknown_pkgs: set of tuples (type, atom.unevaluated_atom)
107 + @returns: dictionary
108 + '''
109 + loop = asyncio._wrap_loop(loop)
110 + ebuild = kwargs.get('ebuild').get()
111 + pkg = kwargs.get('pkg').get()
112 + unknown_pkgs = ebuild.unknown_pkgs
113 + baddepsyntax = ebuild.baddepsyntax
114 +
115 + # Use max_workers=True to ensure immediate fork, since _iter_tasks
116 + # needs the fork to create a snapshot of current state.
117 + executor = ForkExecutor(max_workers=self.options.jobs)
118 +
119 + if self.options.jobs > 1:
120 + for future_done_set in async_iter_completed(self._iter_tasks(loop, executor, ebuild, pkg),
121 + max_jobs=self.options.jobs, max_load=self.options.load_average, loop=loop):
122 + for task in (yield future_done_set):
123 + task, results = task.result()
124 + for result in results:
125 + self._check_result(task, result)
126 +
127 + if not baddepsyntax and unknown_pkgs:
128 + type_map = {}
129 + for mytype, atom in unknown_pkgs:
130 + type_map.setdefault(mytype, set()).add(atom)
131 + for mytype, atoms in type_map.items():
132 + self.qatracker.add_error(
133 + "dependency.unknown", "%s: %s: %s"
134 + % (ebuild.relative_path, mytype, ", ".join(sorted(atoms))))
135
136 - for keyword, groups, prof in relevant_profiles:
137 + @coroutine
138 + def _task(self, task):
139 + yield task.future
140 + coroutine_return((task, task.future.result()))
141 +
142 + def _iter_tasks(self, loop, executor, ebuild, pkg):
143 + for keyword, groups, prof in ebuild.relevant_profiles:
144
145 is_stable_profile = prof.status == "stable"
146 is_dev_profile = prof.status == "dev" and \
147 @@ -154,6 +214,22 @@ class ProfileDependsChecks(ScanBase):
148 dep_settings.usemask = dep_settings._use_manager.getUseMask(
149 pkg, stable=dep_settings._parent_stable)
150
151 + task = types.SimpleNamespace(ebuild=ebuild, prof=prof, keyword=keyword)
152 +
153 + target = functools.partial(self._task_subprocess, task, pkg, dep_settings)
154 +
155 + if self.options.jobs <= 1:
156 + yield (task, target())
157 + else:
158 + task.future = asyncio.ensure_future(loop.run_in_executor(executor, target), loop=loop)
159 + yield self._task(task)
160 +
161 +
162 + def _task_subprocess(self, task, pkg, dep_settings):
163 + ebuild = task.ebuild
164 + baddepsyntax = ebuild.baddepsyntax
165 + results = []
166 + prof = task.prof
167 if not baddepsyntax:
168 ismasked = not ebuild.archs or \
169 pkg.cpv not in self.portdb.xmatch("match-visible",
170 @@ -163,7 +239,7 @@ class ProfileDependsChecks(ScanBase):
171 self.have['pmasked'] = bool(dep_settings._getMaskAtom(
172 pkg.cpv, ebuild.metadata))
173 if self.options.ignore_masked:
174 - continue
175 + return results
176 # we are testing deps for a masked package; give it some lee-way
177 suffix = "masked"
178 matchmode = "minimum-all-ignore-profile"
179 @@ -191,6 +267,22 @@ class ProfileDependsChecks(ScanBase):
180 myvalue, self.portdb, dep_settings,
181 use="all", mode=matchmode, trees=self.repo_settings.trees)
182
183 + results.append(types.SimpleNamespace(atoms=atoms, success=success, mykey=mykey, mytype=mytype))
184 +
185 + return results
186 +
187 +
188 + def _check_result(self, task, result):
189 + prof = task.prof
190 + keyword = task.keyword
191 + ebuild = task.ebuild
192 + unknown_pkgs = ebuild.unknown_pkgs
193 +
194 + success = result.success
195 + atoms = result.atoms
196 + mykey = result.mykey
197 + mytype = result.mytype
198 +
199 if success:
200 if atoms:
201
202 @@ -223,7 +315,7 @@ class ProfileDependsChecks(ScanBase):
203
204 # if we emptied out our list, continue:
205 if not all_atoms:
206 - continue
207 + return
208
209 # Filter out duplicates. We do this by hand (rather
210 # than use a set) so the order is stable and better
211 @@ -255,17 +347,6 @@ class ProfileDependsChecks(ScanBase):
212 % (ebuild.relative_path, mytype, keyword,
213 prof, pformat(atoms, indent=6)))
214
215 - if not baddepsyntax and unknown_pkgs:
216 - type_map = {}
217 - for mytype, atom in unknown_pkgs:
218 - type_map.setdefault(mytype, set()).add(atom)
219 - for mytype, atoms in type_map.items():
220 - self.qatracker.add_error(
221 - "dependency.unknown", "%s: %s: %s"
222 - % (ebuild.relative_path, mytype, ", ".join(sorted(atoms))))
223 -
224 - return False
225 -
226 @property
227 def runInEbuilds(self):
228 '''Ebuild level scans'''
229 diff --git a/repoman/man/repoman.1 b/repoman/man/repoman.1
230 index a6a9937e5..6f9a24544 100644
231 --- a/repoman/man/repoman.1
232 +++ b/repoman/man/repoman.1
233 @@ -1,4 +1,4 @@
234 -.TH "REPOMAN" "1" "Mar 2018" "Repoman VERSION" "Repoman"
235 +.TH "REPOMAN" "1" "Aug 2020" "Repoman VERSION" "Repoman"
236 .SH NAME
237 repoman \- Gentoo's program to enforce a minimal level of quality assurance in
238 packages added to the ebuild repository
239 @@ -83,6 +83,13 @@ Be less verbose about extraneous info
240 \fB-p\fR, \fB--pretend\fR
241 Don't commit or fix anything; just show what would be done
242 .TP
243 +\fB\-j\fR, \fB\-\-jobs\fR
244 +Specifies the number of jobs (processes) to run simultaneously.
245 +.TP
246 +\fB\-l\fR, \fB\-\-load-average\fR
247 +Specifies that no new jobs (processes) should be started if there are others
248 +jobs running and the load average is at least load (a floating\-point number).
249 +.TP
250 \fB-x\fR, \fB--xmlparse\fR
251 Forces the metadata.xml parse check to be carried out
252 .TP
253 --
254 2.25.3

Replies