1 |
On Sun, 16 Aug 2020 20:26:56 -0700 |
2 |
Zac Medico <zmedico@g.o> wrote: |
3 |
|
4 |
> Add --jobs and --load-average options which allow dependency checks |
5 |
> for multiple profiles to run in parallel. The increase in performance |
6 |
> is non-linear for the number of jobs, but it can be worthwhile |
7 |
> (I measured a 35% decrease in time when running 'repoman -j8 full' |
8 |
> on sys-apps/portage). For the -j1 case (default), all dependency |
9 |
> checks run in the main process as usual, so there is no significant |
10 |
> performance penalty for the default case. |
11 |
> |
12 |
> Bug: https://bugs.gentoo.org/448462 |
13 |
> Signed-off-by: Zac Medico <zmedico@g.o> |
14 |
> --- |
15 |
> repoman/lib/repoman/argparser.py | 9 ++ |
16 |
> .../repoman/modules/scan/depend/profile.py | 117 |
17 |
> +++++++++++++++--- repoman/man/repoman.1 | |
18 |
> 9 +- 3 files changed, 116 insertions(+), 19 deletions(-) |
19 |
> |
20 |
> diff --git a/repoman/lib/repoman/argparser.py |
21 |
> b/repoman/lib/repoman/argparser.py index 670a0e91d..6d545ccca 100644 |
22 |
> --- a/repoman/lib/repoman/argparser.py |
23 |
> +++ b/repoman/lib/repoman/argparser.py |
24 |
> @@ -199,6 +199,15 @@ def parse_args(argv, repoman_default_opts): |
25 |
> '--output-style', dest='output_style', |
26 |
> choices=output_keys, help='select output type', default='default') |
27 |
> |
28 |
> + parser.add_argument( |
29 |
> + '-j', '--jobs', dest='jobs', action='store', |
30 |
> type=int, default=1, |
31 |
> + help='Specifies the number of jobs (processes) to |
32 |
> run simultaneously.') + |
33 |
> + parser.add_argument( |
34 |
> + '-l', '--load-average', dest='load_average', |
35 |
> action='store', type=float, default=None, |
36 |
> + help='Specifies that no new jobs (processes) should |
37 |
> be started if there are others ' |
38 |
> + 'jobs running and the load average is at |
39 |
> least load (a floating-point number).') + |
40 |
> parser.add_argument( |
41 |
> '--mode', dest='mode', choices=mode_keys, |
42 |
> help='specify which mode repoman will run in |
43 |
> (default=full)') diff --git |
44 |
> a/repoman/lib/repoman/modules/scan/depend/profile.py |
45 |
> b/repoman/lib/repoman/modules/scan/depend/profile.py index |
46 |
> 39d8b550c..1eb69422a 100644 --- |
47 |
> a/repoman/lib/repoman/modules/scan/depend/profile.py +++ |
48 |
> b/repoman/lib/repoman/modules/scan/depend/profile.py @@ -2,7 +2,9 @@ |
49 |
> |
50 |
> import copy |
51 |
> +import functools |
52 |
> import os |
53 |
> +import types |
54 |
> from pprint import pformat |
55 |
> |
56 |
> from _emerge.Package import Package |
57 |
> @@ -15,6 +17,10 @@ from repoman.modules.scan.depend._gen_arches |
58 |
> import _gen_arches from portage.dep import Atom |
59 |
> from portage.package.ebuild.profile_iuse import iter_iuse_vars |
60 |
> from portage.util import getconfig |
61 |
> +from portage.util.futures import asyncio |
62 |
> +from portage.util.futures.compat_coroutine import coroutine, |
63 |
> coroutine_return +from portage.util.futures.executor.fork import |
64 |
> ForkExecutor +from portage.util.futures.iter_completed import |
65 |
> async_iter_completed |
66 |
> |
67 |
> def sort_key(item): |
68 |
> @@ -58,16 +64,14 @@ class ProfileDependsChecks(ScanBase): |
69 |
> def check(self, **kwargs): |
70 |
> '''Perform profile dependant dependency checks |
71 |
> |
72 |
> - @param arches: |
73 |
> @param pkg: Package in which we check (object). |
74 |
> @param ebuild: Ebuild which we check (object). |
75 |
> - @param baddepsyntax: boolean |
76 |
> - @param unknown_pkgs: set of tuples (type, |
77 |
> atom.unevaluated_atom) @returns: dictionary |
78 |
> ''' |
79 |
> ebuild = kwargs.get('ebuild').get() |
80 |
> pkg = kwargs.get('pkg').get() |
81 |
> - unknown_pkgs, baddepsyntax = _depend_checks( |
82 |
> + |
83 |
> + ebuild.unknown_pkgs, ebuild.baddepsyntax = |
84 |
> _depend_checks( ebuild, pkg, self.portdb, self.qatracker, |
85 |
> self.repo_metadata, self.repo_settings.qadata) |
86 |
> |
87 |
> @@ -90,8 +94,64 @@ class ProfileDependsChecks(ScanBase): |
88 |
> relevant_profiles.append((keyword, |
89 |
> groups, prof)) |
90 |
> relevant_profiles.sort(key=sort_key) |
91 |
> + ebuild.relevant_profiles = relevant_profiles |
92 |
> + |
93 |
> + if self.options.jobs <= 1: |
94 |
> + for task in self._iter_tasks(None, None, |
95 |
> ebuild, pkg): |
96 |
> + task, results = task |
97 |
> + for result in results: |
98 |
> + self._check_result(task, |
99 |
> result) + |
100 |
> + loop = asyncio._wrap_loop() |
101 |
> + loop.run_until_complete(self._async_check(loop=loop, |
102 |
> **kwargs)) + |
103 |
> + return False |
104 |
> + |
105 |
> + @coroutine |
106 |
> + def _async_check(self, loop=None, **kwargs): |
107 |
> + '''Perform async profile dependant dependency checks |
108 |
> + |
109 |
> + @param arches: |
110 |
> + @param pkg: Package in which we check (object). |
111 |
> + @param ebuild: Ebuild which we check (object). |
112 |
> + @param baddepsyntax: boolean |
113 |
> + @param unknown_pkgs: set of tuples (type, |
114 |
> atom.unevaluated_atom) |
115 |
> + @returns: dictionary |
116 |
> + ''' |
117 |
> + loop = asyncio._wrap_loop(loop) |
118 |
> + ebuild = kwargs.get('ebuild').get() |
119 |
> + pkg = kwargs.get('pkg').get() |
120 |
> + unknown_pkgs = ebuild.unknown_pkgs |
121 |
> + baddepsyntax = ebuild.baddepsyntax |
122 |
> + |
123 |
> + # Use max_workers=True to ensure immediate fork, |
124 |
> since _iter_tasks |
125 |
> + # needs the fork to create a snapshot of current |
126 |
> state. |
127 |
> + executor = |
128 |
> ForkExecutor(max_workers=self.options.jobs) + |
129 |
> + if self.options.jobs > 1: |
130 |
> + for future_done_set in |
131 |
> async_iter_completed(self._iter_tasks(loop, executor, ebuild, pkg), |
132 |
> + max_jobs=self.options.jobs, |
133 |
> max_load=self.options.load_average, loop=loop): |
134 |
> + for task in (yield future_done_set): |
135 |
> + task, results = task.result() |
136 |
> + for result in results: |
137 |
> + |
138 |
> self._check_result(task, result) + |
139 |
> + if not baddepsyntax and unknown_pkgs: |
140 |
> + type_map = {} |
141 |
> + for mytype, atom in unknown_pkgs: |
142 |
> + type_map.setdefault(mytype, |
143 |
> set()).add(atom) |
144 |
> + for mytype, atoms in type_map.items(): |
145 |
> + self.qatracker.add_error( |
146 |
> + "dependency.unknown", "%s: |
147 |
> %s: %s" |
148 |
> + % (ebuild.relative_path, |
149 |
> mytype, ", ".join(sorted(atoms)))) |
150 |
> - for keyword, groups, prof in relevant_profiles: |
151 |
> + @coroutine |
152 |
> + def _task(self, task): |
153 |
> + yield task.future |
154 |
> + coroutine_return((task, task.future.result())) |
155 |
> + |
156 |
> + def _iter_tasks(self, loop, executor, ebuild, pkg): |
157 |
> + for keyword, groups, prof in |
158 |
> ebuild.relevant_profiles: |
159 |
> is_stable_profile = prof.status == "stable" |
160 |
> is_dev_profile = prof.status == "dev" and \ |
161 |
> @@ -154,6 +214,22 @@ class ProfileDependsChecks(ScanBase): |
162 |
> dep_settings.usemask = |
163 |
> dep_settings._use_manager.getUseMask( pkg, |
164 |
> stable=dep_settings._parent_stable) |
165 |
> + task = types.SimpleNamespace(ebuild=ebuild, |
166 |
> prof=prof, keyword=keyword) + |
167 |
> + target = |
168 |
> functools.partial(self._task_subprocess, task, pkg, dep_settings) + |
169 |
> + if self.options.jobs <= 1: |
170 |
> + yield (task, target()) |
171 |
> + else: |
172 |
> + task.future = |
173 |
> asyncio.ensure_future(loop.run_in_executor(executor, target), |
174 |
> loop=loop) |
175 |
> + yield self._task(task) |
176 |
> + |
177 |
> + |
178 |
> + def _task_subprocess(self, task, pkg, dep_settings): |
179 |
> + ebuild = task.ebuild |
180 |
> + baddepsyntax = ebuild.baddepsyntax |
181 |
> + results = [] |
182 |
> + prof = task.prof |
183 |
> if not baddepsyntax: |
184 |
> ismasked = not ebuild.archs or \ |
185 |
> pkg.cpv not in |
186 |
> self.portdb.xmatch("match-visible", @@ -163,7 +239,7 @@ class |
187 |
> ProfileDependsChecks(ScanBase): self.have['pmasked'] = |
188 |
> bool(dep_settings._getMaskAtom( pkg.cpv, ebuild.metadata)) |
189 |
> if |
190 |
> self.options.ignore_masked: |
191 |
> - continue |
192 |
> + return results |
193 |
> # we are testing deps for a |
194 |
> masked package; give it some lee-way suffix = "masked" |
195 |
> matchmode = |
196 |
> "minimum-all-ignore-profile" @@ -191,6 +267,22 @@ class |
197 |
> ProfileDependsChecks(ScanBase): myvalue, self.portdb, dep_settings, |
198 |
> use="all", |
199 |
> mode=matchmode, trees=self.repo_settings.trees) |
200 |
> + |
201 |
> results.append(types.SimpleNamespace(atoms=atoms, success=success, |
202 |
> mykey=mykey, mytype=mytype)) + |
203 |
> + return results |
204 |
> + |
205 |
> + |
206 |
> + def _check_result(self, task, result): |
207 |
> + prof = task.prof |
208 |
> + keyword = task.keyword |
209 |
> + ebuild = task.ebuild |
210 |
> + unknown_pkgs = |
211 |
> ebuild.unknown_pkgs + |
212 |
> + success = result.success |
213 |
> + atoms = result.atoms |
214 |
> + mykey = result.mykey |
215 |
> + mytype = result.mytype |
216 |
> + |
217 |
> if success: |
218 |
> if atoms: |
219 |
> |
220 |
> @@ -223,7 +315,7 @@ class ProfileDependsChecks(ScanBase): |
221 |
> |
222 |
> # if we |
223 |
> emptied out our list, continue: if not all_atoms: |
224 |
> - |
225 |
> continue |
226 |
> + |
227 |
> return |
228 |
> # Filter out |
229 |
> duplicates. We do this by hand (rather # than use a set) so the |
230 |
> order is stable and better @@ -255,17 +347,6 @@ class |
231 |
> ProfileDependsChecks(ScanBase): % (ebuild.relative_path, mytype, |
232 |
> keyword, prof, pformat(atoms, indent=6))) |
233 |
> |
234 |
> - if not baddepsyntax and unknown_pkgs: |
235 |
> - type_map = {} |
236 |
> - for mytype, atom in unknown_pkgs: |
237 |
> - type_map.setdefault(mytype, |
238 |
> set()).add(atom) |
239 |
> - for mytype, atoms in type_map.items(): |
240 |
> - self.qatracker.add_error( |
241 |
> - "dependency.unknown", "%s: |
242 |
> %s: %s" |
243 |
> - % (ebuild.relative_path, |
244 |
> mytype, ", ".join(sorted(atoms)))) - |
245 |
> - return False |
246 |
> - |
247 |
> @property |
248 |
> def runInEbuilds(self): |
249 |
> '''Ebuild level scans''' |
250 |
> diff --git a/repoman/man/repoman.1 b/repoman/man/repoman.1 |
251 |
> index a6a9937e5..6f9a24544 100644 |
252 |
> --- a/repoman/man/repoman.1 |
253 |
> +++ b/repoman/man/repoman.1 |
254 |
> @@ -1,4 +1,4 @@ |
255 |
> -.TH "REPOMAN" "1" "Mar 2018" "Repoman VERSION" "Repoman" |
256 |
> +.TH "REPOMAN" "1" "Aug 2020" "Repoman VERSION" "Repoman" |
257 |
> .SH NAME |
258 |
> repoman \- Gentoo's program to enforce a minimal level of quality |
259 |
> assurance in packages added to the ebuild repository |
260 |
> @@ -83,6 +83,13 @@ Be less verbose about extraneous info |
261 |
> \fB-p\fR, \fB--pretend\fR |
262 |
> Don't commit or fix anything; just show what would be done |
263 |
> .TP |
264 |
> +\fB\-j\fR, \fB\-\-jobs\fR |
265 |
> +Specifies the number of jobs (processes) to run simultaneously. |
266 |
> +.TP |
267 |
> +\fB\-l\fR, \fB\-\-load-average\fR |
268 |
> +Specifies that no new jobs (processes) should be started if there |
269 |
> are others +jobs running and the load average is at least load (a |
270 |
> floating\-point number). +.TP |
271 |
> \fB-x\fR, \fB--xmlparse\fR |
272 |
> Forces the metadata.xml parse check to be carried out |
273 |
> .TP |
274 |
|
275 |
|
276 |
code looks good for me |