1 |
commit: 86d046b36e0a8a882a4f5d01a8e1eee82bc335b5 |
2 |
Author: André Erdmann <dywi <AT> mailerd <DOT> de> |
3 |
AuthorDate: Wed Jul 31 21:07:36 2013 +0000 |
4 |
Commit: André Erdmann <dywi <AT> mailerd <DOT> de> |
5 |
CommitDate: Wed Jul 31 21:07:36 2013 +0000 |
6 |
URL: http://git.overlays.gentoo.org/gitweb/?p=proj/R_overlay.git;a=commit;h=86d046b3 |
7 |
|
8 |
incremental ov-creation: postpone revbump check |
9 |
|
10 |
The revbump check needs hashes for comparing. Postponing the hash calculation |
11 |
and executing it in a thread pool may have speed advantages (needs some |
12 |
testing). Additionally, it allows to explicitly disable revbump checks when |
13 |
doing incremental overlay creation. |
14 |
|
15 |
--- |
16 |
roverlay/main.py | 1 + |
17 |
roverlay/overlay/creator.py | 113 +++++++++++++++++++++- |
18 |
roverlay/overlay/pkgdir/manifest/file.py | 3 + |
19 |
roverlay/overlay/pkgdir/packagedir_base.py | 23 ++++- |
20 |
roverlay/overlay/pkgdir/packagedir_newmanifest.py | 2 + |
21 |
roverlay/overlay/root.py | 34 ++++--- |
22 |
roverlay/stats/base.py | 16 ++- |
23 |
roverlay/util/hashpool.py | 66 +++++++++++++ |
24 |
8 files changed, 232 insertions(+), 26 deletions(-) |
25 |
|
26 |
diff --git a/roverlay/main.py b/roverlay/main.py |
27 |
index 9cca0a8..7830d23 100644 |
28 |
--- a/roverlay/main.py |
29 |
+++ b/roverlay/main.py |
30 |
@@ -292,6 +292,7 @@ def main ( |
31 |
) |
32 |
|
33 |
repo_list.add_packages ( overlay_creator.add_package ) |
34 |
+ overlay_creator.enqueue_postponed() |
35 |
|
36 |
overlay_creator.release_package_rules() |
37 |
|
38 |
|
39 |
diff --git a/roverlay/overlay/creator.py b/roverlay/overlay/creator.py |
40 |
index c21617e..32dacbc 100644 |
41 |
--- a/roverlay/overlay/creator.py |
42 |
+++ b/roverlay/overlay/creator.py |
43 |
@@ -38,7 +38,7 @@ import roverlay.overlay.pkgdir.distroot.static |
44 |
import roverlay.recipe.distmap |
45 |
import roverlay.recipe.easyresolver |
46 |
import roverlay.stats.collector |
47 |
- |
48 |
+import roverlay.util.hashpool |
49 |
|
50 |
class OverlayCreator ( object ): |
51 |
"""This is a 'R packages -> Overlay' interface.""" |
52 |
@@ -46,6 +46,8 @@ class OverlayCreator ( object ): |
53 |
LOGGER = logging.getLogger ( 'OverlayCreator' ) |
54 |
STATS = roverlay.stats.collector.static.overlay_creation |
55 |
|
56 |
+ HASHPOOL_WORKER_COUNT = 0 |
57 |
+ |
58 |
def __init__ ( self, |
59 |
skip_manifest, incremental, immediate_ebuild_writes, |
60 |
logger=None, allow_write=True |
61 |
@@ -82,7 +84,8 @@ class OverlayCreator ( object ): |
62 |
|
63 |
self.NUMTHREADS = config.get ( 'EBUILD.jobcount', 0 ) |
64 |
|
65 |
- self._pkg_queue = queue.Queue() |
66 |
+ self._pkg_queue = queue.Queue() |
67 |
+ self._pkg_queue_postponed = list() |
68 |
self._err_queue.attach_queue ( self._pkg_queue, None ) |
69 |
|
70 |
self._workers = None |
71 |
@@ -119,14 +122,18 @@ class OverlayCreator ( object ): |
72 |
) |
73 |
# --- end of _get_resolver_channel (...) --- |
74 |
|
75 |
- def add_package ( self, package_info ): |
76 |
+ def add_package ( self, package_info, allow_postpone=True ): |
77 |
"""Adds a PackageInfo to the package queue. |
78 |
|
79 |
arguments: |
80 |
* package_info -- |
81 |
""" |
82 |
if self.package_rules.apply_actions ( package_info ): |
83 |
- if self.overlay.add ( package_info ): |
84 |
+ add_result = self.overlay.add ( |
85 |
+ package_info, allow_postpone=allow_postpone |
86 |
+ ) |
87 |
+ |
88 |
+ if add_result is True: |
89 |
ejob = roverlay.ebuild.creation.EbuildCreation ( |
90 |
package_info, |
91 |
depres_channel_spawner = self._get_resolver_channel, |
92 |
@@ -134,13 +141,109 @@ class OverlayCreator ( object ): |
93 |
) |
94 |
self._pkg_queue.put ( ejob ) |
95 |
self.stats.pkg_queued.inc() |
96 |
- else: |
97 |
+ |
98 |
+ elif add_result is False: |
99 |
self.stats.pkg_dropped.inc() |
100 |
+ |
101 |
+ elif allow_postpone: |
102 |
+ self.stats.pkg_queue_postponed.inc() |
103 |
+ self._pkg_queue_postponed.append ( ( package_info, add_result ) ) |
104 |
+ |
105 |
+ else: |
106 |
+ raise Exception ( "bad return from overlay.add()" ) |
107 |
+ |
108 |
else: |
109 |
# else filtered out |
110 |
self.stats.pkg_filtered.inc() |
111 |
# --- end of add_package (...) --- |
112 |
|
113 |
+ def discard_postponed ( self ): |
114 |
+ self._pkg_queue_postponed [:] = [] |
115 |
+ # --- end of discard_postponed (...) --- |
116 |
+ |
117 |
+ def enqueue_postponed ( self, prehash_manifest=False ): |
118 |
+ # !!! prehash_manifest results in threaded calculation taking |
119 |
+ # _more_ time than single-threaded. Postponed packages usually |
120 |
+ # don't need a revbump, so the time penalty here likely does |
121 |
+ # not outweigh the benefits (i.e. no need to recalculate on revbump) |
122 |
+ # |
123 |
+ |
124 |
+ if self._pkg_queue_postponed: |
125 |
+ qtime = self.stats.queue_postponed_time |
126 |
+ |
127 |
+ self.logger.info ( "Checking for packages that need a revbump" ) |
128 |
+ |
129 |
+ if self.HASHPOOL_WORKER_COUNT < 1: |
130 |
+ pass |
131 |
+ |
132 |
+ elif roverlay.util.hashpool.HAVE_CONCURRENT_FUTURES: |
133 |
+ qtime.begin ( "setup_hashpool" ) |
134 |
+ |
135 |
+ # determine hashes that should be calculated here |
136 |
+ if prehash_manifest: |
137 |
+ # * calculate all hashes, not just the distmap one |
138 |
+ # * assuming that all package dirs are of the same class/type |
139 |
+ # |
140 |
+ distmap_hash = PackageInfo.DISTMAP_DIGEST_TYPE |
141 |
+ extra_hashes = self._pkg_queue_postponed[0][1]().HASH_TYPES |
142 |
+ |
143 |
+ if not extra_hashes: |
144 |
+ hashes = { distmap_hash, } |
145 |
+ elif distmap_hash in extra_hashes: |
146 |
+ hashes = extra_hashes |
147 |
+ else: |
148 |
+ hashes = extra_hashes | { distmap_hash, } |
149 |
+ else: |
150 |
+ hashes = { PackageInfo.DISTMAP_DIGEST_TYPE, } |
151 |
+ |
152 |
+ my_hashpool = roverlay.util.hashpool.HashPool ( |
153 |
+ hashes, self.HASHPOOL_WORKER_COUNT |
154 |
+ ) |
155 |
+ |
156 |
+ for p_info, pkgdir_ref in self._pkg_queue_postponed: |
157 |
+ my_hashpool.add ( |
158 |
+ id ( p_info ), |
159 |
+ p_info.get ( "package_file" ), p_info.hashdict |
160 |
+ ) |
161 |
+ |
162 |
+ qtime.end ( "setup_hashpool" ) |
163 |
+ |
164 |
+ qtime.begin ( "make_hashes" ) |
165 |
+ my_hashpool.run() |
166 |
+ qtime.end ( "make_hashes" ) |
167 |
+ else: |
168 |
+ self.logger.warning ( |
169 |
+ "enqueue_postponed(): falling back to single-threaded variant." |
170 |
+ ) |
171 |
+ # -- end if HAVE_CONCURRENT_FUTURES |
172 |
+ |
173 |
+ qtime.begin ( "queue_packages" ) |
174 |
+ for p_info, pkgdir_ref in self._pkg_queue_postponed: |
175 |
+ add_result = pkgdir_ref().add ( p_info, allow_postpone=False ) |
176 |
+ |
177 |
+ if add_result is True: |
178 |
+ ejob = roverlay.ebuild.creation.EbuildCreation ( |
179 |
+ p_info, |
180 |
+ depres_channel_spawner = self._get_resolver_channel, |
181 |
+ err_queue = self._err_queue |
182 |
+ ) |
183 |
+ self._pkg_queue.put ( ejob ) |
184 |
+ self.stats.pkg_queued.inc() |
185 |
+ |
186 |
+ elif add_result is False: |
187 |
+ self.stats.pkg_dropped.inc() |
188 |
+ |
189 |
+ else: |
190 |
+ raise Exception ( |
191 |
+ "enqueue_postponed() should not postpone packages further." |
192 |
+ ) |
193 |
+ # -- end for |
194 |
+ qtime.end ( "queue_packages" ) |
195 |
+ |
196 |
+ # clear list |
197 |
+ self._pkg_queue_postponed[:] = [] |
198 |
+ # --- end of enqueue_postponed (...) --- |
199 |
+ |
200 |
def write_overlay ( self ): |
201 |
"""Writes the overlay.""" |
202 |
if self.overlay.writeable(): |
203 |
|
204 |
diff --git a/roverlay/overlay/pkgdir/manifest/file.py b/roverlay/overlay/pkgdir/manifest/file.py |
205 |
index 9cf8425..f9b6b8d 100644 |
206 |
--- a/roverlay/overlay/pkgdir/manifest/file.py |
207 |
+++ b/roverlay/overlay/pkgdir/manifest/file.py |
208 |
@@ -26,6 +26,9 @@ class ManifestFile ( object ): |
209 |
provided by hashlib (via roverlay.digest). |
210 |
""" |
211 |
|
212 |
+ # ref |
213 |
+ HASH_TYPES = ManifestEntry.HASHTYPES |
214 |
+ |
215 |
def __init__ ( self, root ): |
216 |
self.root = root |
217 |
self.filepath = root + os.path.sep + 'Manifest' |
218 |
|
219 |
diff --git a/roverlay/overlay/pkgdir/packagedir_base.py b/roverlay/overlay/pkgdir/packagedir_base.py |
220 |
index 5f54279..7d5f906 100644 |
221 |
--- a/roverlay/overlay/pkgdir/packagedir_base.py |
222 |
+++ b/roverlay/overlay/pkgdir/packagedir_base.py |
223 |
@@ -57,6 +57,11 @@ class PackageDirBase ( object ): |
224 |
# |
225 |
MANIFEST_THREADSAFE = None |
226 |
|
227 |
+ # a set(!) of hash types which are used by the package dir |
228 |
+ # implementation (if any, else None) |
229 |
+ # other subsystems might calculate them in advance if advertised here |
230 |
+ HASH_TYPES = None |
231 |
+ |
232 |
# DOEBUILD_FETCH |
233 |
# doebuild function that fetches $SRC_URI |
234 |
# can be overridden by subclasses if e.g. on-the-fly manifest creation |
235 |
@@ -171,7 +176,9 @@ class PackageDirBase ( object ): |
236 |
return p |
237 |
# --- end of _scan_add_package (...) --- |
238 |
|
239 |
- def add ( self, package_info, add_if_physical=False, _lock=True ): |
240 |
+ def add ( self, |
241 |
+ package_info, add_if_physical=False, allow_postpone=False, _lock=True |
242 |
+ ): |
243 |
"""Adds a package to this PackageDir. |
244 |
|
245 |
arguments: |
246 |
@@ -179,7 +186,7 @@ class PackageDirBase ( object ): |
247 |
* add_if_physical -- add package even if it exists as ebuild file |
248 |
(-> overwrite old ebuilds) |
249 |
|
250 |
- returns: success as bool |
251 |
+ returns: success as bool // weakref |
252 |
|
253 |
raises: Exception when ebuild already exists. |
254 |
""" |
255 |
@@ -194,11 +201,15 @@ class PackageDirBase ( object ): |
256 |
if shortver in self._packages: |
257 |
# package exists, check if it existed before script invocation |
258 |
if self._packages [shortver] ['physical_only']: |
259 |
+ |
260 |
if add_if_physical: |
261 |
# else ignore ebuilds that exist as file |
262 |
self._packages [shortver] = package_info |
263 |
added = True |
264 |
|
265 |
+ elif allow_postpone: |
266 |
+ added = None |
267 |
+ |
268 |
elif self.DISTMAP.check_revbump_necessary ( package_info ): |
269 |
# resolve by recursion |
270 |
added = self.add ( |
271 |
@@ -213,8 +224,8 @@ class PackageDirBase ( object ): |
272 |
) |
273 |
else: |
274 |
# package has been added to this packagedir before, |
275 |
- # this most likely happens if it is available via several |
276 |
- # remotes |
277 |
+ # this most likely happens if it is available from |
278 |
+ # more than one repo |
279 |
self.logger.debug ( |
280 |
"'{PN}-{PVR}.ebuild' already exists, cannot add it!".format ( |
281 |
PN=self.name, PVR=shortver |
282 |
@@ -232,8 +243,10 @@ class PackageDirBase ( object ): |
283 |
# !! package_info <-> self (double-linked) |
284 |
package_info.overlay_package_ref = weakref.ref ( self ) |
285 |
return True |
286 |
+ elif added is None: |
287 |
+ return weakref.ref ( self ) |
288 |
else: |
289 |
- return False |
290 |
+ return added |
291 |
# --- end of add (...) --- |
292 |
|
293 |
def check_empty ( self ): |
294 |
|
295 |
diff --git a/roverlay/overlay/pkgdir/packagedir_newmanifest.py b/roverlay/overlay/pkgdir/packagedir_newmanifest.py |
296 |
index ae6c059..f7c258c 100644 |
297 |
--- a/roverlay/overlay/pkgdir/packagedir_newmanifest.py |
298 |
+++ b/roverlay/overlay/pkgdir/packagedir_newmanifest.py |
299 |
@@ -28,6 +28,8 @@ class PackageDir ( roverlay.overlay.pkgdir.packagedir_base.PackageDirBase ): |
300 |
|
301 |
MANIFEST_THREADSAFE = True |
302 |
|
303 |
+ HASH_TYPES = frozenset ( ManifestFile.HASH_TYPES ) |
304 |
+ |
305 |
# Manifest entries for imported ebuilds have to be created during import |
306 |
DOEBUILD_FETCH = roverlay.tools.ebuild.doebuild_fetch_and_manifest |
307 |
|
308 |
|
309 |
diff --git a/roverlay/overlay/root.py b/roverlay/overlay/root.py |
310 |
index 1e423b0..1ecf4ac 100644 |
311 |
--- a/roverlay/overlay/root.py |
312 |
+++ b/roverlay/overlay/root.py |
313 |
@@ -519,32 +519,44 @@ class Overlay ( object ): |
314 |
) |
315 |
# --- end of _write_rsuggests_use_desc (...) --- |
316 |
|
317 |
- def add ( self, package_info ): |
318 |
+ def add ( self, package_info, allow_postpone=False ): |
319 |
"""Adds a package to this overlay (into its default category). |
320 |
|
321 |
arguments: |
322 |
- * package_info -- PackageInfo of the package to add |
323 |
- |
324 |
- returns: True if successfully added else False |
325 |
+ * package_info -- PackageInfo of the package to add |
326 |
+ * allow_postpone -- do not add the package if it already exists and |
327 |
+ return None |
328 |
+ |
329 |
+ returns: |
330 |
+ * True if successfully added |
331 |
+ * a weak reference to the package dir object if postponed |
332 |
+ * else False |
333 |
""" |
334 |
# NOTE: |
335 |
# * "category" keyword arg has been removed, use add_to(^2) instead |
336 |
# * self.default_category must not be None (else KeyError is raised) |
337 |
return self._get_category ( |
338 |
package_info.get ( "category", self.default_category ) |
339 |
- ).add ( package_info ) |
340 |
+ ).add ( package_info, allow_postpone=allow_postpone ) |
341 |
# --- end of add (...) --- |
342 |
|
343 |
- def add_to ( self, package_info, category ): |
344 |
+ def add_to ( self, package_info, category, allow_postpone=False ): |
345 |
"""Adds a package to this overlay. |
346 |
|
347 |
arguments: |
348 |
- * package_info -- PackageInfo of the package to add |
349 |
- * category -- category where the pkg should be put in |
350 |
- |
351 |
- returns: True if successfully added else False |
352 |
+ * package_info -- PackageInfo of the package to add |
353 |
+ * category -- category where the pkg should be put in |
354 |
+ * allow_postpone -- do not add the package if it already exists and |
355 |
+ return None |
356 |
+ |
357 |
+ returns: |
358 |
+ * True if successfully added |
359 |
+ * a weak reference to the package dir object if postponed |
360 |
+ * else False |
361 |
""" |
362 |
- return self._get_category ( category ).add ( package_info ) |
363 |
+ return self._get_category ( category ).add ( |
364 |
+ package_info, allow_postpone=allow_postpone |
365 |
+ ) |
366 |
# --- end of add_to (...) --- |
367 |
|
368 |
def has_dir ( self, _dir ): |
369 |
|
370 |
diff --git a/roverlay/stats/base.py b/roverlay/stats/base.py |
371 |
index b51226e..28531c2 100644 |
372 |
--- a/roverlay/stats/base.py |
373 |
+++ b/roverlay/stats/base.py |
374 |
@@ -66,16 +66,22 @@ class OverlayCreationStats ( OverlayCreationWorkerStats ): |
375 |
DESCRIPTION = "overlay creation" |
376 |
|
377 |
_MEMBERS = ( |
378 |
- ( 'creation_time', 'pkg_queued', 'pkg_filtered', 'pkg_dropped', ) |
379 |
+ ( |
380 |
+ 'creation_time', 'queue_postponed_time', |
381 |
+ 'pkg_queued', 'pkg_queue_postponed', |
382 |
+ 'pkg_filtered', 'pkg_dropped', |
383 |
+ ) |
384 |
+ OverlayCreationWorkerStats._MEMBERS |
385 |
) |
386 |
|
387 |
def __init__ ( self ): |
388 |
super ( OverlayCreationStats, self ).__init__() |
389 |
- self.pkg_queued = abstract.Counter ( "queued" ) |
390 |
- self.pkg_dropped = abstract.Counter ( "dropped" ) |
391 |
- self.pkg_filtered = abstract.Counter ( "filtered" ) |
392 |
- self.creation_time = abstract.TimeStats ( "ebuild creation" ) |
393 |
+ self.pkg_queued = abstract.Counter ( "queued" ) |
394 |
+ self.pkg_queue_postponed = abstract.Counter ( "queue_postponed" ) |
395 |
+ self.pkg_dropped = abstract.Counter ( "dropped" ) |
396 |
+ self.pkg_filtered = abstract.Counter ( "filtered" ) |
397 |
+ self.creation_time = abstract.TimeStats ( "ebuild creation" ) |
398 |
+ self.queue_postponed_time = abstract.TimeStats ( "queue_postponed" ) |
399 |
# --- end of __init__ (...) --- |
400 |
|
401 |
def get_relevant_package_count ( self ): |
402 |
|
403 |
diff --git a/roverlay/util/hashpool.py b/roverlay/util/hashpool.py |
404 |
new file mode 100644 |
405 |
index 0000000..921a968 |
406 |
--- /dev/null |
407 |
+++ b/roverlay/util/hashpool.py |
408 |
@@ -0,0 +1,66 @@ |
409 |
+# R overlay -- |
410 |
+# -*- coding: utf-8 -*- |
411 |
+# Copyright (C) 2013 André Erdmann <dywi@×××××××.de> |
412 |
+# Distributed under the terms of the GNU General Public License; |
413 |
+# either version 2 of the License, or (at your option) any later version. |
414 |
+ |
415 |
+try: |
416 |
+ import concurrent.futures |
417 |
+except ImportError: |
418 |
+ sys.stderr.write ( |
419 |
+ '!!! concurrent.futures is not available.\n' |
420 |
+ ' Falling back to single-threaded variants.\n\n' |
421 |
+ ) |
422 |
+ HAVE_CONCURRENT_FUTURES = False |
423 |
+else: |
424 |
+ HAVE_CONCURRENT_FUTURES = True |
425 |
+ |
426 |
+ |
427 |
+import roverlay.digest |
428 |
+ |
429 |
+def _calculate_hashes ( hash_job, hashes ): |
430 |
+ hash_job.hashdict.update ( |
431 |
+ roverlay.digest.multihash_file ( hash_job.filepath, hashes ) |
432 |
+ ) |
433 |
+# --- end of _calculate_hashes (...) --- |
434 |
+ |
435 |
+class Hashjob ( object ): |
436 |
+ def __init__ ( self, filepath, hashdict=None ): |
437 |
+ self.filepath = filepath |
438 |
+ self.hashdict = dict() if hashdict is None else hashdict |
439 |
+ # --- end of __init__ (...) --- |
440 |
+ |
441 |
+ |
442 |
+class HashPool ( object ): |
443 |
+ def __init__ ( self, hashes, max_workers ): |
444 |
+ super ( HashPool, self ).__init__() |
445 |
+ self.hashes = frozenset ( hashes ) |
446 |
+ self._jobs = dict() |
447 |
+ self.max_workers = int ( max_workers ) |
448 |
+ # --- end of __init__ (...) --- |
449 |
+ |
450 |
+ def add ( self, backref, filepath, hashdict=None ): |
451 |
+ self._jobs [backref] = Hashjob ( filepath, hashdict ) |
452 |
+ # --- end of add (...) --- |
453 |
+ |
454 |
+ def run ( self ): |
455 |
+ #with concurrent.futures.ProcessPoolExecutor ( self.max_workers ) as exe: |
456 |
+ with concurrent.futures.ThreadPoolExecutor ( self.max_workers ) as exe: |
457 |
+ running_jobs = frozenset ( |
458 |
+ exe.submit ( _calculate_hashes, job, self.hashes ) |
459 |
+ for job in self._jobs.values() |
460 |
+ ) |
461 |
+ |
462 |
+ # wait |
463 |
+ for finished_job in concurrent.futures.as_completed ( running_jobs ): |
464 |
+ if finished_job.exception() is not None: |
465 |
+ raise finished_job.exception() |
466 |
+ # --- end of run (...) --- |
467 |
+ |
468 |
+ def reset ( self ): |
469 |
+ self._jobs.clear() |
470 |
+ # --- end of reset (...) --- |
471 |
+ |
472 |
+ def get ( self, backref ): |
473 |
+ return self._jobs [backref].hashdict |
474 |
+ # --- end of get (...) --- |