1 |
commit: 13402fbd8c51f7feedcc85f2f0815768ec45ee7a |
2 |
Author: Fabian Groffen <grobian <AT> gentoo <DOT> org> |
3 |
AuthorDate: Sat May 15 12:18:54 2021 +0000 |
4 |
Commit: Fabian Groffen <grobian <AT> gentoo <DOT> org> |
5 |
CommitDate: Sat May 15 12:18:54 2021 +0000 |
6 |
URL: https://gitweb.gentoo.org/proj/portage-utils.git/commit/?id=13402fbd |
7 |
|
8 |
qlop: add predict (-p) mode |
9 |
|
10 |
Predict should be a better estimator than average, and become the new |
11 |
source for estimating remaining times (with -r). |
12 |
|
13 |
Signed-off-by: Fabian Groffen <grobian <AT> gentoo.org> |
14 |
|
15 |
qlop.c | 227 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----- |
16 |
1 file changed, 210 insertions(+), 17 deletions(-) |
17 |
|
18 |
diff --git a/qlop.c b/qlop.c |
19 |
index 2d01689..4783528 100644 |
20 |
--- a/qlop.c |
21 |
+++ b/qlop.c |
22 |
@@ -27,11 +27,12 @@ |
23 |
|
24 |
#define QLOP_DEFAULT_LOGFILE "emerge.log" |
25 |
|
26 |
-#define QLOP_FLAGS "ctaHMmuUsElerd:f:w:F:" COMMON_FLAGS |
27 |
+#define QLOP_FLAGS "ctapHMmuUsElerd:f:w:F:" COMMON_FLAGS |
28 |
static struct option const qlop_long_opts[] = { |
29 |
{"summary", no_argument, NULL, 'c'}, |
30 |
{"time", no_argument, NULL, 't'}, |
31 |
{"average", no_argument, NULL, 'a'}, |
32 |
+ {"predict", no_argument, NULL, 'p'}, |
33 |
{"human", no_argument, NULL, 'H'}, |
34 |
{"machine", no_argument, NULL, 'M'}, |
35 |
{"merge", no_argument, NULL, 'm'}, |
36 |
@@ -52,6 +53,7 @@ static const char * const qlop_opts_help[] = { |
37 |
"Print summary of average merges (implies -a)", |
38 |
"Print time taken to complete action", |
39 |
"Print average time taken to complete action", |
40 |
+ "Print prediction of time it takes to complete action", |
41 |
"Print elapsed time in human readable format (use with -t or -a)", |
42 |
"Print start/elapsed time as seconds with no formatting", |
43 |
"Show merge history", |
44 |
@@ -85,6 +87,7 @@ struct qlop_mode { |
45 |
char do_sync:1; |
46 |
char do_running:1; |
47 |
char do_average:1; |
48 |
+ char do_predict:1; |
49 |
char do_summary:1; |
50 |
char do_human:1; |
51 |
char do_machine:1; |
52 |
@@ -361,6 +364,47 @@ portage never got interrupted in a way where it could not write its |
53 |
interruption to the log. Unfortunately these scenarios happen a lot. |
54 |
As such, we can try to remedy this somewhat by using a rule of thumb |
55 |
that currently merging packages need to be withinin the last 10 days. |
56 |
+ |
57 |
+Averages |
58 |
+Compile time of packages typically changes over time (assuming the |
59 |
+machine stays the same). New major releases introduce extra code, or |
60 |
+improve stuff considerably. New compilers might take more or less time |
61 |
+compiling the code. The environment (like the toolchain) we cannot take |
62 |
+into account, packages themselves, we could do something with. |
63 |
+Portage unfortunately does not record the SLOT of the package it |
64 |
+emerges, so we cannot use SLOT. |
65 |
+- Gentoo revisions (-rX) typically do not change the code a lot |
66 |
+- minor and below revisions usually are part of the same code branch |
67 |
+- major revisions typically indicate a next generation of the code |
68 |
+So when there is a running emerge: |
69 |
+1. we value most an identical version, minus Gentoo revisions |
70 |
+2. we look for merges with the same major version |
71 |
+3. when there's still not enough (or anything) to balance an average |
72 |
+ off, we look at merges for the same package (older major versions) |
73 |
+Think of this value system as something like the weight of 1 is 5x, 2 is |
74 |
+3x and 3 is just 1x. |
75 |
+While this might sound really cool, it often results in the same average |
76 |
+as a normal average, and so it is useless. In particular: the best |
77 |
+prediction is the most recent matching version. If there is no exact |
78 |
+match (ignoring Gentoo revisions), a projection of runtimes for previous |
79 |
+merges is the next best thing, ignoring major or minor revisions, e.g. |
80 |
+for GCC major versions would hold very well, but for binutils, the |
81 |
+minors differ a lot (for it's on major version 2 for ages). |
82 |
+ |
83 |
+So it seems what it boils down to, is that prediction of the simplest |
84 |
+kind is probably the best here. What we do, is we order the merge |
85 |
+history of the package, and try to extrapolate the growth between the |
86 |
+two last merges, e.g. 10 and 12 becomes 12 + (12 - 10) = 14. |
87 |
+Now if there is only one point of history here, there's nothing to |
88 |
+predict, and we best just take the value and add a 10% fuzz. |
89 |
+There's two cases in this setup: |
90 |
+1. predict for a version that we already have merge history for, |
91 |
+ ignoring revisions, or |
92 |
+2. predict for a version that's different from what we've seen before |
93 |
+In case of 1. we only look at the same versions, and apply extrapolation |
94 |
+based on the revisions, e.g. -r2 -> -r4. In case of 2. we only look at |
95 |
+versions that are older than the version we're predicting for. This to |
96 |
+deal with e.g. merging python-3.6 with 3.9 installed as well. |
97 |
*/ |
98 |
static int do_emerge_log( |
99 |
const char *log, |
100 |
@@ -653,7 +697,8 @@ static int do_emerge_log( |
101 |
if ((q = strchr(p, '\n')) != NULL) |
102 |
*q = '\0'; |
103 |
|
104 |
- if (flags->do_average || flags->do_running) |
105 |
+ if (flags->do_predict || flags->do_average || |
106 |
+ flags->do_running) |
107 |
{ |
108 |
sync_cnt++; |
109 |
sync_time += elapsed; |
110 |
@@ -708,11 +753,15 @@ static int do_emerge_log( |
111 |
/* see if we need this atom */ |
112 |
atomw = NULL; |
113 |
if (atomset == NULL) { |
114 |
+ int orev = atom->PR_int; |
115 |
+ if (flags->do_predict) |
116 |
+ atom->PR_int = 0; /* allow matching a revision */ |
117 |
array_for_each(atoms, i, atomw) { |
118 |
if (atom_compare(atom, atomw) == EQUAL) |
119 |
break; |
120 |
atomw = NULL; |
121 |
} |
122 |
+ atom->PR_int = orev; |
123 |
} else { |
124 |
snprintf(afmt, sizeof(afmt), "%s/%s", |
125 |
atom->CATEGORY, atom->PN); |
126 |
@@ -738,15 +787,16 @@ static int do_emerge_log( |
127 |
/* found, do report */ |
128 |
elapsed = tstart - pkgw->tbegin; |
129 |
|
130 |
- if (flags->do_average || flags->do_running) |
131 |
+ if (flags->do_average || flags->do_predict |
132 |
+ || flags->do_running) |
133 |
{ |
134 |
/* find in list of averages */ |
135 |
- if (!verbose || flags->do_running) { |
136 |
+ if (flags->do_predict || verbose) { |
137 |
snprintf(afmt, sizeof(afmt), "%s/%s", |
138 |
- pkgw->atom->CATEGORY, pkgw->atom->PN); |
139 |
+ pkgw->atom->CATEGORY, pkgw->atom->PF); |
140 |
} else { |
141 |
snprintf(afmt, sizeof(afmt), "%s/%s", |
142 |
- pkgw->atom->CATEGORY, pkgw->atom->PF); |
143 |
+ pkgw->atom->CATEGORY, pkgw->atom->PN); |
144 |
} |
145 |
|
146 |
pkg = add_set_value(afmt, pkgw, merge_averages); |
147 |
@@ -880,15 +930,16 @@ static int do_emerge_log( |
148 |
/* found, do report */ |
149 |
elapsed = tstart - pkgw->tbegin; |
150 |
|
151 |
- if (flags->do_average || flags->do_running) |
152 |
+ if (flags->do_average || flags->do_predict |
153 |
+ || flags->do_running) |
154 |
{ |
155 |
/* find in list of averages */ |
156 |
- if (!verbose || flags->do_running) { |
157 |
+ if (flags->do_predict || verbose) { |
158 |
snprintf(afmt, sizeof(afmt), "%s/%s", |
159 |
- pkgw->atom->CATEGORY, pkgw->atom->PN); |
160 |
+ pkgw->atom->CATEGORY, pkgw->atom->PF); |
161 |
} else { |
162 |
snprintf(afmt, sizeof(afmt), "%s/%s", |
163 |
- pkgw->atom->CATEGORY, pkgw->atom->PF); |
164 |
+ pkgw->atom->CATEGORY, pkgw->atom->PN); |
165 |
} |
166 |
|
167 |
pkg = add_set_value(afmt, pkgw, unmerge_averages); |
168 |
@@ -1149,6 +1200,119 @@ static int do_emerge_log( |
169 |
GREEN, sync_cnt, NORM, sync_cnt == 1 ? "" : "s"); |
170 |
printf("\n"); |
171 |
} |
172 |
+ } else if (flags->do_predict) { |
173 |
+ DECLARE_ARRAY(avgs); |
174 |
+ enum { P_INIT = 0, P_SREV, P_SRCH } pkgstate; |
175 |
+ size_t j; |
176 |
+ time_t ptime; |
177 |
+ char found; |
178 |
+ |
179 |
+ values_set(merge_averages, avgs); |
180 |
+ xarraysort(avgs, pkg_sort_cb); |
181 |
+ xarraysort(atoms, atom_compar_cb); |
182 |
+ |
183 |
+ /* each atom has its own matches in here, but since it's all |
184 |
+ * sorted, we basically can go around here jumping from CAT-PN |
185 |
+ * to CAT-PN (yes this means python-3.6 python-3.9 will result |
186 |
+ * in a single record) */ |
187 |
+ array_for_each(atoms, i, atom) { |
188 |
+ pkgstate = P_INIT; |
189 |
+ pkgw = NULL; |
190 |
+ found = 0; |
191 |
+ |
192 |
+ /* we added "<=" to the atoms in main() to make sure we |
193 |
+ * would collect version ranges, but here we want to know if |
194 |
+ * the atom is equal or newer, older, etc. so strip it off |
195 |
+ * again */ |
196 |
+ if (atom->pfx_op == ATOM_OP_OLDER_EQUAL) |
197 |
+ atom->pfx_op = ATOM_OP_NONE; |
198 |
+ |
199 |
+ array_for_each(avgs, j, pkg) { |
200 |
+ if (pkgstate == P_INIT) { |
201 |
+ int orev = pkg->atom->PR_int; |
202 |
+ atom_equality eq; |
203 |
+ pkg->atom->PR_int = 0; |
204 |
+ eq = atom_compare(pkg->atom, atom); |
205 |
+ pkg->atom->PR_int = orev; |
206 |
+ switch (eq) { |
207 |
+ case EQUAL: |
208 |
+ /* version-less atoms equal any versioned |
209 |
+ * atom, so check if atom (user input) was |
210 |
+ * versioned */ |
211 |
+ if (atom->PV != NULL) |
212 |
+ { |
213 |
+ /* 100% match, there's no better |
214 |
+ * prediction */ |
215 |
+ found = 1; |
216 |
+ ptime = pkg->time / pkg->cnt; |
217 |
+ break; |
218 |
+ } else { |
219 |
+ pkgstate = P_SRCH; |
220 |
+ pkgw = pkg; |
221 |
+ continue; |
222 |
+ } |
223 |
+ case NEWER: |
224 |
+ if (atom->pfx_op != ATOM_OP_NEWER || |
225 |
+ atom->pfx_op != ATOM_OP_NEWER_EQUAL) |
226 |
+ continue; |
227 |
+ /* fall through */ |
228 |
+ case OLDER: |
229 |
+ if (atom->PV != NULL && pkg->atom->PV != NULL && |
230 |
+ strcmp(atom->PV, pkg->atom->PV) == 0) |
231 |
+ pkgstate = P_SREV; |
232 |
+ else |
233 |
+ pkgstate = P_SRCH; |
234 |
+ pkgw = pkg; |
235 |
+ continue; |
236 |
+ default: |
237 |
+ continue; |
238 |
+ } |
239 |
+ } else { /* P_SREV, P_SRCH */ |
240 |
+ if (atom_compare(pkg->atom, pkgw->atom) == OLDER) { |
241 |
+ if (pkgstate == P_SREV) { |
242 |
+ /* in here, we only compute a diff if we |
243 |
+ * have another (previous) revision, |
244 |
+ * else we stick to the version we have, |
245 |
+ * because we're compiling a new |
246 |
+ * revision, so likely to be the same */ |
247 |
+ if (pkg->atom->PV != NULL && |
248 |
+ pkgw->atom->PV != NULL && |
249 |
+ strcmp(pkg->atom->PV, |
250 |
+ pkgw->atom->PV) == 0) |
251 |
+ { |
252 |
+ time_t wtime; |
253 |
+ /* take diff with previous revision */ |
254 |
+ wtime = pkgw->time / pkgw->cnt; |
255 |
+ ptime = pkg->time / pkg->cnt; |
256 |
+ ptime = wtime + (wtime - ptime); |
257 |
+ } else { |
258 |
+ ptime = pkgw->time / pkgw->cnt; |
259 |
+ } |
260 |
+ } else { |
261 |
+ time_t wtime; |
262 |
+ /* we got another version, just compute |
263 |
+ * the diff, and return it */ |
264 |
+ wtime = pkgw->time / pkgw->cnt; |
265 |
+ ptime = pkg->time / pkg->cnt; |
266 |
+ ptime = wtime + (wtime - ptime); |
267 |
+ } |
268 |
+ found = 1; |
269 |
+ } else { |
270 |
+ /* it cannot be newer (because we |
271 |
+ * applied sort) and it cannot be equal |
272 |
+ * (because it comes from a set) */ |
273 |
+ break; |
274 |
+ } |
275 |
+ } |
276 |
+ |
277 |
+ if (found) { |
278 |
+ printf("%s: prediction %s\n", |
279 |
+ atom_format(flags->fmt, atom), |
280 |
+ fmt_elapsedtime(flags, ptime)); |
281 |
+ break; |
282 |
+ } |
283 |
+ } |
284 |
+ } |
285 |
} |
286 |
|
287 |
{ |
288 |
@@ -1406,6 +1570,7 @@ int qlop_main(int argc, char **argv) |
289 |
m.do_sync = 0; |
290 |
m.do_running = 0; |
291 |
m.do_average = 0; |
292 |
+ m.do_predict = 0; |
293 |
m.do_summary = 0; |
294 |
m.do_human = 0; |
295 |
m.do_machine = 0; |
296 |
@@ -1431,6 +1596,7 @@ int qlop_main(int argc, char **argv) |
297 |
case 'r': m.do_running = 1; |
298 |
runningmode++; break; |
299 |
case 'a': m.do_average = 1; break; |
300 |
+ case 'p': m.do_predict = 1; break; |
301 |
case 'c': m.do_summary = 1; break; |
302 |
case 'H': m.do_human = 1; break; |
303 |
case 'M': m.do_machine = 1; break; |
304 |
@@ -1499,6 +1665,7 @@ int qlop_main(int argc, char **argv) |
305 |
m.do_sync == 0 && |
306 |
m.do_running == 0 && |
307 |
m.do_average == 0 && |
308 |
+ m.do_predict == 0 && |
309 |
m.do_summary == 0 |
310 |
) |
311 |
{ |
312 |
@@ -1516,16 +1683,23 @@ int qlop_main(int argc, char **argv) |
313 |
if (m.do_summary) |
314 |
m.do_average = 1; |
315 |
|
316 |
- /* handle -a / -t conflict */ |
317 |
- if (m.do_average && m.do_time) { |
318 |
- warn("-a (or -c) and -t cannot be used together, dropping -t"); |
319 |
+ /* handle -a / -p conflict */ |
320 |
+ if (m.do_average && m.do_predict) { |
321 |
+ warn("-a and -p cannot be used together, dropping -a"); |
322 |
+ m.do_average = 0; |
323 |
+ } |
324 |
+ |
325 |
+ /* handle -a / -p / -t conflict */ |
326 |
+ if ((m.do_average || m.do_predict) && m.do_time) { |
327 |
+ warn("-a/-p (or -c) and -t cannot be used together, dropping -t"); |
328 |
m.do_time = 0; |
329 |
} |
330 |
|
331 |
/* handle -a / -r conflict */ |
332 |
- if (m.do_average && m.do_running) { |
333 |
- warn("-a (or -c) and -r cannot be used together, dropping -a"); |
334 |
+ if ((m.do_average || m.do_predict) && m.do_running) { |
335 |
+ warn("-a/-p (or -c) and -r cannot be used together, dropping -a/-p"); |
336 |
m.do_average = 0; |
337 |
+ m.do_predict = 0; |
338 |
} |
339 |
|
340 |
/* handle -H / -M conflict */ |
341 |
@@ -1541,6 +1715,12 @@ int qlop_main(int argc, char **argv) |
342 |
m.do_sync = 0; |
343 |
} |
344 |
|
345 |
+ /* handle -p without atoms */ |
346 |
+ if (m.do_predict && array_cnt(atoms) == 0) { |
347 |
+ err("-p requires at least one atom"); |
348 |
+ return EXIT_FAILURE; |
349 |
+ } |
350 |
+ |
351 |
/* handle -l / -d conflict */ |
352 |
if (start_time != -1 && m.show_lastmerge) { |
353 |
if (!m.show_emerge) |
354 |
@@ -1548,8 +1728,8 @@ int qlop_main(int argc, char **argv) |
355 |
m.show_lastmerge = 0; |
356 |
} |
357 |
|
358 |
- /* set default for -t, -a or -r */ |
359 |
- if ((m.do_average || m.do_time || m.do_running) && |
360 |
+ /* set default for -t, -a, -p or -r */ |
361 |
+ if ((m.do_average || m.do_predict || m.do_time || m.do_running) && |
362 |
!(m.do_merge || m.do_unmerge || m.do_autoclean || m.do_sync)) |
363 |
{ |
364 |
m.do_merge = 1; |
365 |
@@ -1588,6 +1768,19 @@ int qlop_main(int argc, char **argv) |
366 |
/* NOTE: new_atoms == atoms when new_atoms != NULL */ |
367 |
} |
368 |
|
369 |
+ if (m.do_predict) { |
370 |
+ /* turn non-ranged atoms into a <= match such that the |
371 |
+ * prediction magic has something to compare against when a |
372 |
+ * version is listed */ |
373 |
+ array_for_each(atoms, i, atom) { |
374 |
+ if (atom->pfx_op == ATOM_OP_NONE || |
375 |
+ atom->pfx_op == ATOM_OP_EQUAL || |
376 |
+ atom->pfx_op == ATOM_OP_PV_EQUAL || |
377 |
+ atom->pfx_op == ATOM_OP_STAR) |
378 |
+ atom->pfx_op = ATOM_OP_OLDER_EQUAL; |
379 |
+ } |
380 |
+ } |
381 |
+ |
382 |
if (start_time < LONG_MAX) |
383 |
do_emerge_log(logfile, &m, atoms, start_time, end_time); |