Gentoo Archives: gentoo-commits

From: Fabian Groffen <grobian@g.o>
To: gentoo-commits@l.g.o
Subject: [gentoo-commits] proj/portage-utils:master commit in: /
Date: Sat, 15 May 2021 12:19:48
Message-Id: 1621081134.13402fbd8c51f7feedcc85f2f0815768ec45ee7a.grobian@gentoo
1 commit: 13402fbd8c51f7feedcc85f2f0815768ec45ee7a
2 Author: Fabian Groffen <grobian <AT> gentoo <DOT> org>
3 AuthorDate: Sat May 15 12:18:54 2021 +0000
4 Commit: Fabian Groffen <grobian <AT> gentoo <DOT> org>
5 CommitDate: Sat May 15 12:18:54 2021 +0000
6 URL: https://gitweb.gentoo.org/proj/portage-utils.git/commit/?id=13402fbd
7
8 qlop: add predict (-p) mode
9
10 Predict should be a better estimator than average, and become the new
11 source for estimating remaining times (with -r).
12
13 Signed-off-by: Fabian Groffen <grobian <AT> gentoo.org>
14
15 qlop.c | 227 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----
16 1 file changed, 210 insertions(+), 17 deletions(-)
17
18 diff --git a/qlop.c b/qlop.c
19 index 2d01689..4783528 100644
20 --- a/qlop.c
21 +++ b/qlop.c
22 @@ -27,11 +27,12 @@
23
24 #define QLOP_DEFAULT_LOGFILE "emerge.log"
25
26 -#define QLOP_FLAGS "ctaHMmuUsElerd:f:w:F:" COMMON_FLAGS
27 +#define QLOP_FLAGS "ctapHMmuUsElerd:f:w:F:" COMMON_FLAGS
28 static struct option const qlop_long_opts[] = {
29 {"summary", no_argument, NULL, 'c'},
30 {"time", no_argument, NULL, 't'},
31 {"average", no_argument, NULL, 'a'},
32 + {"predict", no_argument, NULL, 'p'},
33 {"human", no_argument, NULL, 'H'},
34 {"machine", no_argument, NULL, 'M'},
35 {"merge", no_argument, NULL, 'm'},
36 @@ -52,6 +53,7 @@ static const char * const qlop_opts_help[] = {
37 "Print summary of average merges (implies -a)",
38 "Print time taken to complete action",
39 "Print average time taken to complete action",
40 + "Print prediction of time it takes to complete action",
41 "Print elapsed time in human readable format (use with -t or -a)",
42 "Print start/elapsed time as seconds with no formatting",
43 "Show merge history",
44 @@ -85,6 +87,7 @@ struct qlop_mode {
45 char do_sync:1;
46 char do_running:1;
47 char do_average:1;
48 + char do_predict:1;
49 char do_summary:1;
50 char do_human:1;
51 char do_machine:1;
52 @@ -361,6 +364,47 @@ portage never got interrupted in a way where it could not write its
53 interruption to the log. Unfortunately these scenarios happen a lot.
54 As such, we can try to remedy this somewhat by using a rule of thumb
55 that currently merging packages need to be withinin the last 10 days.
56 +
57 +Averages
58 +Compile time of packages typically changes over time (assuming the
59 +machine stays the same). New major releases introduce extra code, or
60 +improve stuff considerably. New compilers might take more or less time
61 +compiling the code. The environment (like the toolchain) we cannot take
62 +into account, packages themselves, we could do something with.
63 +Portage unfortunately does not record the SLOT of the package it
64 +emerges, so we cannot use SLOT.
65 +- Gentoo revisions (-rX) typically do not change the code a lot
66 +- minor and below revisions usually are part of the same code branch
67 +- major revisions typically indicate a next generation of the code
68 +So when there is a running emerge:
69 +1. we value most an identical version, minus Gentoo revisions
70 +2. we look for merges with the same major version
71 +3. when there's still not enough (or anything) to balance an average
72 + off, we look at merges for the same package (older major versions)
73 +Think of this value system as something like the weight of 1 is 5x, 2 is
74 +3x and 3 is just 1x.
75 +While this might sound really cool, it often results in the same average
76 +as a normal average, and so it is useless. In particular: the best
77 +prediction is the most recent matching version. If there is no exact
78 +match (ignoring Gentoo revisions), a projection of runtimes for previous
79 +merges is the next best thing, ignoring major or minor revisions, e.g.
80 +for GCC major versions would hold very well, but for binutils, the
81 +minors differ a lot (for it's on major version 2 for ages).
82 +
83 +So it seems what it boils down to, is that prediction of the simplest
84 +kind is probably the best here. What we do, is we order the merge
85 +history of the package, and try to extrapolate the growth between the
86 +two last merges, e.g. 10 and 12 becomes 12 + (12 - 10) = 14.
87 +Now if there is only one point of history here, there's nothing to
88 +predict, and we best just take the value and add a 10% fuzz.
89 +There's two cases in this setup:
90 +1. predict for a version that we already have merge history for,
91 + ignoring revisions, or
92 +2. predict for a version that's different from what we've seen before
93 +In case of 1. we only look at the same versions, and apply extrapolation
94 +based on the revisions, e.g. -r2 -> -r4. In case of 2. we only look at
95 +versions that are older than the version we're predicting for. This to
96 +deal with e.g. merging python-3.6 with 3.9 installed as well.
97 */
98 static int do_emerge_log(
99 const char *log,
100 @@ -653,7 +697,8 @@ static int do_emerge_log(
101 if ((q = strchr(p, '\n')) != NULL)
102 *q = '\0';
103
104 - if (flags->do_average || flags->do_running)
105 + if (flags->do_predict || flags->do_average ||
106 + flags->do_running)
107 {
108 sync_cnt++;
109 sync_time += elapsed;
110 @@ -708,11 +753,15 @@ static int do_emerge_log(
111 /* see if we need this atom */
112 atomw = NULL;
113 if (atomset == NULL) {
114 + int orev = atom->PR_int;
115 + if (flags->do_predict)
116 + atom->PR_int = 0; /* allow matching a revision */
117 array_for_each(atoms, i, atomw) {
118 if (atom_compare(atom, atomw) == EQUAL)
119 break;
120 atomw = NULL;
121 }
122 + atom->PR_int = orev;
123 } else {
124 snprintf(afmt, sizeof(afmt), "%s/%s",
125 atom->CATEGORY, atom->PN);
126 @@ -738,15 +787,16 @@ static int do_emerge_log(
127 /* found, do report */
128 elapsed = tstart - pkgw->tbegin;
129
130 - if (flags->do_average || flags->do_running)
131 + if (flags->do_average || flags->do_predict
132 + || flags->do_running)
133 {
134 /* find in list of averages */
135 - if (!verbose || flags->do_running) {
136 + if (flags->do_predict || verbose) {
137 snprintf(afmt, sizeof(afmt), "%s/%s",
138 - pkgw->atom->CATEGORY, pkgw->atom->PN);
139 + pkgw->atom->CATEGORY, pkgw->atom->PF);
140 } else {
141 snprintf(afmt, sizeof(afmt), "%s/%s",
142 - pkgw->atom->CATEGORY, pkgw->atom->PF);
143 + pkgw->atom->CATEGORY, pkgw->atom->PN);
144 }
145
146 pkg = add_set_value(afmt, pkgw, merge_averages);
147 @@ -880,15 +930,16 @@ static int do_emerge_log(
148 /* found, do report */
149 elapsed = tstart - pkgw->tbegin;
150
151 - if (flags->do_average || flags->do_running)
152 + if (flags->do_average || flags->do_predict
153 + || flags->do_running)
154 {
155 /* find in list of averages */
156 - if (!verbose || flags->do_running) {
157 + if (flags->do_predict || verbose) {
158 snprintf(afmt, sizeof(afmt), "%s/%s",
159 - pkgw->atom->CATEGORY, pkgw->atom->PN);
160 + pkgw->atom->CATEGORY, pkgw->atom->PF);
161 } else {
162 snprintf(afmt, sizeof(afmt), "%s/%s",
163 - pkgw->atom->CATEGORY, pkgw->atom->PF);
164 + pkgw->atom->CATEGORY, pkgw->atom->PN);
165 }
166
167 pkg = add_set_value(afmt, pkgw, unmerge_averages);
168 @@ -1149,6 +1200,119 @@ static int do_emerge_log(
169 GREEN, sync_cnt, NORM, sync_cnt == 1 ? "" : "s");
170 printf("\n");
171 }
172 + } else if (flags->do_predict) {
173 + DECLARE_ARRAY(avgs);
174 + enum { P_INIT = 0, P_SREV, P_SRCH } pkgstate;
175 + size_t j;
176 + time_t ptime;
177 + char found;
178 +
179 + values_set(merge_averages, avgs);
180 + xarraysort(avgs, pkg_sort_cb);
181 + xarraysort(atoms, atom_compar_cb);
182 +
183 + /* each atom has its own matches in here, but since it's all
184 + * sorted, we basically can go around here jumping from CAT-PN
185 + * to CAT-PN (yes this means python-3.6 python-3.9 will result
186 + * in a single record) */
187 + array_for_each(atoms, i, atom) {
188 + pkgstate = P_INIT;
189 + pkgw = NULL;
190 + found = 0;
191 +
192 + /* we added "<=" to the atoms in main() to make sure we
193 + * would collect version ranges, but here we want to know if
194 + * the atom is equal or newer, older, etc. so strip it off
195 + * again */
196 + if (atom->pfx_op == ATOM_OP_OLDER_EQUAL)
197 + atom->pfx_op = ATOM_OP_NONE;
198 +
199 + array_for_each(avgs, j, pkg) {
200 + if (pkgstate == P_INIT) {
201 + int orev = pkg->atom->PR_int;
202 + atom_equality eq;
203 + pkg->atom->PR_int = 0;
204 + eq = atom_compare(pkg->atom, atom);
205 + pkg->atom->PR_int = orev;
206 + switch (eq) {
207 + case EQUAL:
208 + /* version-less atoms equal any versioned
209 + * atom, so check if atom (user input) was
210 + * versioned */
211 + if (atom->PV != NULL)
212 + {
213 + /* 100% match, there's no better
214 + * prediction */
215 + found = 1;
216 + ptime = pkg->time / pkg->cnt;
217 + break;
218 + } else {
219 + pkgstate = P_SRCH;
220 + pkgw = pkg;
221 + continue;
222 + }
223 + case NEWER:
224 + if (atom->pfx_op != ATOM_OP_NEWER ||
225 + atom->pfx_op != ATOM_OP_NEWER_EQUAL)
226 + continue;
227 + /* fall through */
228 + case OLDER:
229 + if (atom->PV != NULL && pkg->atom->PV != NULL &&
230 + strcmp(atom->PV, pkg->atom->PV) == 0)
231 + pkgstate = P_SREV;
232 + else
233 + pkgstate = P_SRCH;
234 + pkgw = pkg;
235 + continue;
236 + default:
237 + continue;
238 + }
239 + } else { /* P_SREV, P_SRCH */
240 + if (atom_compare(pkg->atom, pkgw->atom) == OLDER) {
241 + if (pkgstate == P_SREV) {
242 + /* in here, we only compute a diff if we
243 + * have another (previous) revision,
244 + * else we stick to the version we have,
245 + * because we're compiling a new
246 + * revision, so likely to be the same */
247 + if (pkg->atom->PV != NULL &&
248 + pkgw->atom->PV != NULL &&
249 + strcmp(pkg->atom->PV,
250 + pkgw->atom->PV) == 0)
251 + {
252 + time_t wtime;
253 + /* take diff with previous revision */
254 + wtime = pkgw->time / pkgw->cnt;
255 + ptime = pkg->time / pkg->cnt;
256 + ptime = wtime + (wtime - ptime);
257 + } else {
258 + ptime = pkgw->time / pkgw->cnt;
259 + }
260 + } else {
261 + time_t wtime;
262 + /* we got another version, just compute
263 + * the diff, and return it */
264 + wtime = pkgw->time / pkgw->cnt;
265 + ptime = pkg->time / pkg->cnt;
266 + ptime = wtime + (wtime - ptime);
267 + }
268 + found = 1;
269 + } else {
270 + /* it cannot be newer (because we
271 + * applied sort) and it cannot be equal
272 + * (because it comes from a set) */
273 + break;
274 + }
275 + }
276 +
277 + if (found) {
278 + printf("%s: prediction %s\n",
279 + atom_format(flags->fmt, atom),
280 + fmt_elapsedtime(flags, ptime));
281 + break;
282 + }
283 + }
284 + }
285 }
286
287 {
288 @@ -1406,6 +1570,7 @@ int qlop_main(int argc, char **argv)
289 m.do_sync = 0;
290 m.do_running = 0;
291 m.do_average = 0;
292 + m.do_predict = 0;
293 m.do_summary = 0;
294 m.do_human = 0;
295 m.do_machine = 0;
296 @@ -1431,6 +1596,7 @@ int qlop_main(int argc, char **argv)
297 case 'r': m.do_running = 1;
298 runningmode++; break;
299 case 'a': m.do_average = 1; break;
300 + case 'p': m.do_predict = 1; break;
301 case 'c': m.do_summary = 1; break;
302 case 'H': m.do_human = 1; break;
303 case 'M': m.do_machine = 1; break;
304 @@ -1499,6 +1665,7 @@ int qlop_main(int argc, char **argv)
305 m.do_sync == 0 &&
306 m.do_running == 0 &&
307 m.do_average == 0 &&
308 + m.do_predict == 0 &&
309 m.do_summary == 0
310 )
311 {
312 @@ -1516,16 +1683,23 @@ int qlop_main(int argc, char **argv)
313 if (m.do_summary)
314 m.do_average = 1;
315
316 - /* handle -a / -t conflict */
317 - if (m.do_average && m.do_time) {
318 - warn("-a (or -c) and -t cannot be used together, dropping -t");
319 + /* handle -a / -p conflict */
320 + if (m.do_average && m.do_predict) {
321 + warn("-a and -p cannot be used together, dropping -a");
322 + m.do_average = 0;
323 + }
324 +
325 + /* handle -a / -p / -t conflict */
326 + if ((m.do_average || m.do_predict) && m.do_time) {
327 + warn("-a/-p (or -c) and -t cannot be used together, dropping -t");
328 m.do_time = 0;
329 }
330
331 /* handle -a / -r conflict */
332 - if (m.do_average && m.do_running) {
333 - warn("-a (or -c) and -r cannot be used together, dropping -a");
334 + if ((m.do_average || m.do_predict) && m.do_running) {
335 + warn("-a/-p (or -c) and -r cannot be used together, dropping -a/-p");
336 m.do_average = 0;
337 + m.do_predict = 0;
338 }
339
340 /* handle -H / -M conflict */
341 @@ -1541,6 +1715,12 @@ int qlop_main(int argc, char **argv)
342 m.do_sync = 0;
343 }
344
345 + /* handle -p without atoms */
346 + if (m.do_predict && array_cnt(atoms) == 0) {
347 + err("-p requires at least one atom");
348 + return EXIT_FAILURE;
349 + }
350 +
351 /* handle -l / -d conflict */
352 if (start_time != -1 && m.show_lastmerge) {
353 if (!m.show_emerge)
354 @@ -1548,8 +1728,8 @@ int qlop_main(int argc, char **argv)
355 m.show_lastmerge = 0;
356 }
357
358 - /* set default for -t, -a or -r */
359 - if ((m.do_average || m.do_time || m.do_running) &&
360 + /* set default for -t, -a, -p or -r */
361 + if ((m.do_average || m.do_predict || m.do_time || m.do_running) &&
362 !(m.do_merge || m.do_unmerge || m.do_autoclean || m.do_sync))
363 {
364 m.do_merge = 1;
365 @@ -1588,6 +1768,19 @@ int qlop_main(int argc, char **argv)
366 /* NOTE: new_atoms == atoms when new_atoms != NULL */
367 }
368
369 + if (m.do_predict) {
370 + /* turn non-ranged atoms into a <= match such that the
371 + * prediction magic has something to compare against when a
372 + * version is listed */
373 + array_for_each(atoms, i, atom) {
374 + if (atom->pfx_op == ATOM_OP_NONE ||
375 + atom->pfx_op == ATOM_OP_EQUAL ||
376 + atom->pfx_op == ATOM_OP_PV_EQUAL ||
377 + atom->pfx_op == ATOM_OP_STAR)
378 + atom->pfx_op = ATOM_OP_OLDER_EQUAL;
379 + }
380 + }
381 +
382 if (start_time < LONG_MAX)
383 do_emerge_log(logfile, &m, atoms, start_time, end_time);