Gentoo Archives: gentoo-portage-dev

From: Brian Dolbec <dolsen@g.o>
To: gentoo-portage-dev@l.g.o
Subject: Re: [gentoo-portage-dev] [PATCH] repoman: Add --jobs and --load-average options (bug 448462)
Date: Mon, 17 Aug 2020 15:24:14
Message-Id: 20200817082410.60abbba1@storm
In Reply to: [gentoo-portage-dev] [PATCH] repoman: Add --jobs and --load-average options (bug 448462) by Zac Medico
1 On Sun, 16 Aug 2020 20:26:56 -0700
2 Zac Medico <zmedico@g.o> wrote:
3
4 > Add --jobs and --load-average options which allow dependency checks
5 > for multiple profiles to run in parallel. The increase in performance
6 > is non-linear for the number of jobs, but it can be worthwhile
7 > (I measured a 35% decrease in time when running 'repoman -j8 full'
8 > on sys-apps/portage). For the -j1 case (default), all dependency
9 > checks run in the main process as usual, so there is no significant
10 > performance penalty for the default case.
11 >
12 > Bug: https://bugs.gentoo.org/448462
13 > Signed-off-by: Zac Medico <zmedico@g.o>
14 > ---
15 > repoman/lib/repoman/argparser.py | 9 ++
16 > .../repoman/modules/scan/depend/profile.py | 117
17 > +++++++++++++++--- repoman/man/repoman.1 |
18 > 9 +- 3 files changed, 116 insertions(+), 19 deletions(-)
19 >
20 > diff --git a/repoman/lib/repoman/argparser.py
21 > b/repoman/lib/repoman/argparser.py index 670a0e91d..6d545ccca 100644
22 > --- a/repoman/lib/repoman/argparser.py
23 > +++ b/repoman/lib/repoman/argparser.py
24 > @@ -199,6 +199,15 @@ def parse_args(argv, repoman_default_opts):
25 > '--output-style', dest='output_style',
26 > choices=output_keys, help='select output type', default='default')
27 >
28 > + parser.add_argument(
29 > + '-j', '--jobs', dest='jobs', action='store',
30 > type=int, default=1,
31 > + help='Specifies the number of jobs (processes) to
32 > run simultaneously.') +
33 > + parser.add_argument(
34 > + '-l', '--load-average', dest='load_average',
35 > action='store', type=float, default=None,
36 > + help='Specifies that no new jobs (processes) should
37 > be started if there are others '
38 > + 'jobs running and the load average is at
39 > least load (a floating-point number).') +
40 > parser.add_argument(
41 > '--mode', dest='mode', choices=mode_keys,
42 > help='specify which mode repoman will run in
43 > (default=full)') diff --git
44 > a/repoman/lib/repoman/modules/scan/depend/profile.py
45 > b/repoman/lib/repoman/modules/scan/depend/profile.py index
46 > 39d8b550c..1eb69422a 100644 ---
47 > a/repoman/lib/repoman/modules/scan/depend/profile.py +++
48 > b/repoman/lib/repoman/modules/scan/depend/profile.py @@ -2,7 +2,9 @@
49 >
50 > import copy
51 > +import functools
52 > import os
53 > +import types
54 > from pprint import pformat
55 >
56 > from _emerge.Package import Package
57 > @@ -15,6 +17,10 @@ from repoman.modules.scan.depend._gen_arches
58 > import _gen_arches from portage.dep import Atom
59 > from portage.package.ebuild.profile_iuse import iter_iuse_vars
60 > from portage.util import getconfig
61 > +from portage.util.futures import asyncio
62 > +from portage.util.futures.compat_coroutine import coroutine,
63 > coroutine_return +from portage.util.futures.executor.fork import
64 > ForkExecutor +from portage.util.futures.iter_completed import
65 > async_iter_completed
66 >
67 > def sort_key(item):
68 > @@ -58,16 +64,14 @@ class ProfileDependsChecks(ScanBase):
69 > def check(self, **kwargs):
70 > '''Perform profile dependant dependency checks
71 >
72 > - @param arches:
73 > @param pkg: Package in which we check (object).
74 > @param ebuild: Ebuild which we check (object).
75 > - @param baddepsyntax: boolean
76 > - @param unknown_pkgs: set of tuples (type,
77 > atom.unevaluated_atom) @returns: dictionary
78 > '''
79 > ebuild = kwargs.get('ebuild').get()
80 > pkg = kwargs.get('pkg').get()
81 > - unknown_pkgs, baddepsyntax = _depend_checks(
82 > +
83 > + ebuild.unknown_pkgs, ebuild.baddepsyntax =
84 > _depend_checks( ebuild, pkg, self.portdb, self.qatracker,
85 > self.repo_metadata, self.repo_settings.qadata)
86 >
87 > @@ -90,8 +94,64 @@ class ProfileDependsChecks(ScanBase):
88 > relevant_profiles.append((keyword,
89 > groups, prof))
90 > relevant_profiles.sort(key=sort_key)
91 > + ebuild.relevant_profiles = relevant_profiles
92 > +
93 > + if self.options.jobs <= 1:
94 > + for task in self._iter_tasks(None, None,
95 > ebuild, pkg):
96 > + task, results = task
97 > + for result in results:
98 > + self._check_result(task,
99 > result) +
100 > + loop = asyncio._wrap_loop()
101 > + loop.run_until_complete(self._async_check(loop=loop,
102 > **kwargs)) +
103 > + return False
104 > +
105 > + @coroutine
106 > + def _async_check(self, loop=None, **kwargs):
107 > + '''Perform async profile dependant dependency checks
108 > +
109 > + @param arches:
110 > + @param pkg: Package in which we check (object).
111 > + @param ebuild: Ebuild which we check (object).
112 > + @param baddepsyntax: boolean
113 > + @param unknown_pkgs: set of tuples (type,
114 > atom.unevaluated_atom)
115 > + @returns: dictionary
116 > + '''
117 > + loop = asyncio._wrap_loop(loop)
118 > + ebuild = kwargs.get('ebuild').get()
119 > + pkg = kwargs.get('pkg').get()
120 > + unknown_pkgs = ebuild.unknown_pkgs
121 > + baddepsyntax = ebuild.baddepsyntax
122 > +
123 > + # Use max_workers=True to ensure immediate fork,
124 > since _iter_tasks
125 > + # needs the fork to create a snapshot of current
126 > state.
127 > + executor =
128 > ForkExecutor(max_workers=self.options.jobs) +
129 > + if self.options.jobs > 1:
130 > + for future_done_set in
131 > async_iter_completed(self._iter_tasks(loop, executor, ebuild, pkg),
132 > + max_jobs=self.options.jobs,
133 > max_load=self.options.load_average, loop=loop):
134 > + for task in (yield future_done_set):
135 > + task, results = task.result()
136 > + for result in results:
137 > +
138 > self._check_result(task, result) +
139 > + if not baddepsyntax and unknown_pkgs:
140 > + type_map = {}
141 > + for mytype, atom in unknown_pkgs:
142 > + type_map.setdefault(mytype,
143 > set()).add(atom)
144 > + for mytype, atoms in type_map.items():
145 > + self.qatracker.add_error(
146 > + "dependency.unknown", "%s:
147 > %s: %s"
148 > + % (ebuild.relative_path,
149 > mytype, ", ".join(sorted(atoms))))
150 > - for keyword, groups, prof in relevant_profiles:
151 > + @coroutine
152 > + def _task(self, task):
153 > + yield task.future
154 > + coroutine_return((task, task.future.result()))
155 > +
156 > + def _iter_tasks(self, loop, executor, ebuild, pkg):
157 > + for keyword, groups, prof in
158 > ebuild.relevant_profiles:
159 > is_stable_profile = prof.status == "stable"
160 > is_dev_profile = prof.status == "dev" and \
161 > @@ -154,6 +214,22 @@ class ProfileDependsChecks(ScanBase):
162 > dep_settings.usemask =
163 > dep_settings._use_manager.getUseMask( pkg,
164 > stable=dep_settings._parent_stable)
165 > + task = types.SimpleNamespace(ebuild=ebuild,
166 > prof=prof, keyword=keyword) +
167 > + target =
168 > functools.partial(self._task_subprocess, task, pkg, dep_settings) +
169 > + if self.options.jobs <= 1:
170 > + yield (task, target())
171 > + else:
172 > + task.future =
173 > asyncio.ensure_future(loop.run_in_executor(executor, target),
174 > loop=loop)
175 > + yield self._task(task)
176 > +
177 > +
178 > + def _task_subprocess(self, task, pkg, dep_settings):
179 > + ebuild = task.ebuild
180 > + baddepsyntax = ebuild.baddepsyntax
181 > + results = []
182 > + prof = task.prof
183 > if not baddepsyntax:
184 > ismasked = not ebuild.archs or \
185 > pkg.cpv not in
186 > self.portdb.xmatch("match-visible", @@ -163,7 +239,7 @@ class
187 > ProfileDependsChecks(ScanBase): self.have['pmasked'] =
188 > bool(dep_settings._getMaskAtom( pkg.cpv, ebuild.metadata))
189 > if
190 > self.options.ignore_masked:
191 > - continue
192 > + return results
193 > # we are testing deps for a
194 > masked package; give it some lee-way suffix = "masked"
195 > matchmode =
196 > "minimum-all-ignore-profile" @@ -191,6 +267,22 @@ class
197 > ProfileDependsChecks(ScanBase): myvalue, self.portdb, dep_settings,
198 > use="all",
199 > mode=matchmode, trees=self.repo_settings.trees)
200 > +
201 > results.append(types.SimpleNamespace(atoms=atoms, success=success,
202 > mykey=mykey, mytype=mytype)) +
203 > + return results
204 > +
205 > +
206 > + def _check_result(self, task, result):
207 > + prof = task.prof
208 > + keyword = task.keyword
209 > + ebuild = task.ebuild
210 > + unknown_pkgs =
211 > ebuild.unknown_pkgs +
212 > + success = result.success
213 > + atoms = result.atoms
214 > + mykey = result.mykey
215 > + mytype = result.mytype
216 > +
217 > if success:
218 > if atoms:
219 >
220 > @@ -223,7 +315,7 @@ class ProfileDependsChecks(ScanBase):
221 >
222 > # if we
223 > emptied out our list, continue: if not all_atoms:
224 > -
225 > continue
226 > +
227 > return
228 > # Filter out
229 > duplicates. We do this by hand (rather # than use a set) so the
230 > order is stable and better @@ -255,17 +347,6 @@ class
231 > ProfileDependsChecks(ScanBase): % (ebuild.relative_path, mytype,
232 > keyword, prof, pformat(atoms, indent=6)))
233 >
234 > - if not baddepsyntax and unknown_pkgs:
235 > - type_map = {}
236 > - for mytype, atom in unknown_pkgs:
237 > - type_map.setdefault(mytype,
238 > set()).add(atom)
239 > - for mytype, atoms in type_map.items():
240 > - self.qatracker.add_error(
241 > - "dependency.unknown", "%s:
242 > %s: %s"
243 > - % (ebuild.relative_path,
244 > mytype, ", ".join(sorted(atoms)))) -
245 > - return False
246 > -
247 > @property
248 > def runInEbuilds(self):
249 > '''Ebuild level scans'''
250 > diff --git a/repoman/man/repoman.1 b/repoman/man/repoman.1
251 > index a6a9937e5..6f9a24544 100644
252 > --- a/repoman/man/repoman.1
253 > +++ b/repoman/man/repoman.1
254 > @@ -1,4 +1,4 @@
255 > -.TH "REPOMAN" "1" "Mar 2018" "Repoman VERSION" "Repoman"
256 > +.TH "REPOMAN" "1" "Aug 2020" "Repoman VERSION" "Repoman"
257 > .SH NAME
258 > repoman \- Gentoo's program to enforce a minimal level of quality
259 > assurance in packages added to the ebuild repository
260 > @@ -83,6 +83,13 @@ Be less verbose about extraneous info
261 > \fB-p\fR, \fB--pretend\fR
262 > Don't commit or fix anything; just show what would be done
263 > .TP
264 > +\fB\-j\fR, \fB\-\-jobs\fR
265 > +Specifies the number of jobs (processes) to run simultaneously.
266 > +.TP
267 > +\fB\-l\fR, \fB\-\-load-average\fR
268 > +Specifies that no new jobs (processes) should be started if there
269 > are others +jobs running and the load average is at least load (a
270 > floating\-point number). +.TP
271 > \fB-x\fR, \fB--xmlparse\fR
272 > Forces the metadata.xml parse check to be carried out
273 > .TP
274
275
276 code looks good for me