Gentoo Archives: gentoo-commits

From: "André Erdmann" <dywi@×××××××.de>
To: gentoo-commits@l.g.o
Subject: [gentoo-commits] proj/R_overlay:master commit in: roverlay/stats/, roverlay/overlay/pkgdir/manifest/, roverlay/, roverlay/util/, ...
Date: Wed, 31 Jul 2013 21:10:36
Message-Id: 1375304856.86d046b36e0a8a882a4f5d01a8e1eee82bc335b5.dywi@gentoo
1 commit: 86d046b36e0a8a882a4f5d01a8e1eee82bc335b5
2 Author: André Erdmann <dywi <AT> mailerd <DOT> de>
3 AuthorDate: Wed Jul 31 21:07:36 2013 +0000
4 Commit: André Erdmann <dywi <AT> mailerd <DOT> de>
5 CommitDate: Wed Jul 31 21:07:36 2013 +0000
6 URL: http://git.overlays.gentoo.org/gitweb/?p=proj/R_overlay.git;a=commit;h=86d046b3
7
8 incremental ov-creation: postpone revbump check
9
10 The revbump check needs hashes for comparing. Postponing the hash calculation
11 and executing it in a thread pool may have speed advantages (needs some
12 testing). Additionally, it allows to explicitly disable revbump checks when
13 doing incremental overlay creation.
14
15 ---
16 roverlay/main.py | 1 +
17 roverlay/overlay/creator.py | 113 +++++++++++++++++++++-
18 roverlay/overlay/pkgdir/manifest/file.py | 3 +
19 roverlay/overlay/pkgdir/packagedir_base.py | 23 ++++-
20 roverlay/overlay/pkgdir/packagedir_newmanifest.py | 2 +
21 roverlay/overlay/root.py | 34 ++++---
22 roverlay/stats/base.py | 16 ++-
23 roverlay/util/hashpool.py | 66 +++++++++++++
24 8 files changed, 232 insertions(+), 26 deletions(-)
25
26 diff --git a/roverlay/main.py b/roverlay/main.py
27 index 9cca0a8..7830d23 100644
28 --- a/roverlay/main.py
29 +++ b/roverlay/main.py
30 @@ -292,6 +292,7 @@ def main (
31 )
32
33 repo_list.add_packages ( overlay_creator.add_package )
34 + overlay_creator.enqueue_postponed()
35
36 overlay_creator.release_package_rules()
37
38
39 diff --git a/roverlay/overlay/creator.py b/roverlay/overlay/creator.py
40 index c21617e..32dacbc 100644
41 --- a/roverlay/overlay/creator.py
42 +++ b/roverlay/overlay/creator.py
43 @@ -38,7 +38,7 @@ import roverlay.overlay.pkgdir.distroot.static
44 import roverlay.recipe.distmap
45 import roverlay.recipe.easyresolver
46 import roverlay.stats.collector
47 -
48 +import roverlay.util.hashpool
49
50 class OverlayCreator ( object ):
51 """This is a 'R packages -> Overlay' interface."""
52 @@ -46,6 +46,8 @@ class OverlayCreator ( object ):
53 LOGGER = logging.getLogger ( 'OverlayCreator' )
54 STATS = roverlay.stats.collector.static.overlay_creation
55
56 + HASHPOOL_WORKER_COUNT = 0
57 +
58 def __init__ ( self,
59 skip_manifest, incremental, immediate_ebuild_writes,
60 logger=None, allow_write=True
61 @@ -82,7 +84,8 @@ class OverlayCreator ( object ):
62
63 self.NUMTHREADS = config.get ( 'EBUILD.jobcount', 0 )
64
65 - self._pkg_queue = queue.Queue()
66 + self._pkg_queue = queue.Queue()
67 + self._pkg_queue_postponed = list()
68 self._err_queue.attach_queue ( self._pkg_queue, None )
69
70 self._workers = None
71 @@ -119,14 +122,18 @@ class OverlayCreator ( object ):
72 )
73 # --- end of _get_resolver_channel (...) ---
74
75 - def add_package ( self, package_info ):
76 + def add_package ( self, package_info, allow_postpone=True ):
77 """Adds a PackageInfo to the package queue.
78
79 arguments:
80 * package_info --
81 """
82 if self.package_rules.apply_actions ( package_info ):
83 - if self.overlay.add ( package_info ):
84 + add_result = self.overlay.add (
85 + package_info, allow_postpone=allow_postpone
86 + )
87 +
88 + if add_result is True:
89 ejob = roverlay.ebuild.creation.EbuildCreation (
90 package_info,
91 depres_channel_spawner = self._get_resolver_channel,
92 @@ -134,13 +141,109 @@ class OverlayCreator ( object ):
93 )
94 self._pkg_queue.put ( ejob )
95 self.stats.pkg_queued.inc()
96 - else:
97 +
98 + elif add_result is False:
99 self.stats.pkg_dropped.inc()
100 +
101 + elif allow_postpone:
102 + self.stats.pkg_queue_postponed.inc()
103 + self._pkg_queue_postponed.append ( ( package_info, add_result ) )
104 +
105 + else:
106 + raise Exception ( "bad return from overlay.add()" )
107 +
108 else:
109 # else filtered out
110 self.stats.pkg_filtered.inc()
111 # --- end of add_package (...) ---
112
113 + def discard_postponed ( self ):
114 + self._pkg_queue_postponed [:] = []
115 + # --- end of discard_postponed (...) ---
116 +
117 + def enqueue_postponed ( self, prehash_manifest=False ):
118 + # !!! prehash_manifest results in threaded calculation taking
119 + # _more_ time than single-threaded. Postponed packages usually
120 + # don't need a revbump, so the time penalty here likely does
121 + # not outweigh the benefits (i.e. no need to recalculate on revbump)
122 + #
123 +
124 + if self._pkg_queue_postponed:
125 + qtime = self.stats.queue_postponed_time
126 +
127 + self.logger.info ( "Checking for packages that need a revbump" )
128 +
129 + if self.HASHPOOL_WORKER_COUNT < 1:
130 + pass
131 +
132 + elif roverlay.util.hashpool.HAVE_CONCURRENT_FUTURES:
133 + qtime.begin ( "setup_hashpool" )
134 +
135 + # determine hashes that should be calculated here
136 + if prehash_manifest:
137 + # * calculate all hashes, not just the distmap one
138 + # * assuming that all package dirs are of the same class/type
139 + #
140 + distmap_hash = PackageInfo.DISTMAP_DIGEST_TYPE
141 + extra_hashes = self._pkg_queue_postponed[0][1]().HASH_TYPES
142 +
143 + if not extra_hashes:
144 + hashes = { distmap_hash, }
145 + elif distmap_hash in extra_hashes:
146 + hashes = extra_hashes
147 + else:
148 + hashes = extra_hashes | { distmap_hash, }
149 + else:
150 + hashes = { PackageInfo.DISTMAP_DIGEST_TYPE, }
151 +
152 + my_hashpool = roverlay.util.hashpool.HashPool (
153 + hashes, self.HASHPOOL_WORKER_COUNT
154 + )
155 +
156 + for p_info, pkgdir_ref in self._pkg_queue_postponed:
157 + my_hashpool.add (
158 + id ( p_info ),
159 + p_info.get ( "package_file" ), p_info.hashdict
160 + )
161 +
162 + qtime.end ( "setup_hashpool" )
163 +
164 + qtime.begin ( "make_hashes" )
165 + my_hashpool.run()
166 + qtime.end ( "make_hashes" )
167 + else:
168 + self.logger.warning (
169 + "enqueue_postponed(): falling back to single-threaded variant."
170 + )
171 + # -- end if HAVE_CONCURRENT_FUTURES
172 +
173 + qtime.begin ( "queue_packages" )
174 + for p_info, pkgdir_ref in self._pkg_queue_postponed:
175 + add_result = pkgdir_ref().add ( p_info, allow_postpone=False )
176 +
177 + if add_result is True:
178 + ejob = roverlay.ebuild.creation.EbuildCreation (
179 + p_info,
180 + depres_channel_spawner = self._get_resolver_channel,
181 + err_queue = self._err_queue
182 + )
183 + self._pkg_queue.put ( ejob )
184 + self.stats.pkg_queued.inc()
185 +
186 + elif add_result is False:
187 + self.stats.pkg_dropped.inc()
188 +
189 + else:
190 + raise Exception (
191 + "enqueue_postponed() should not postpone packages further."
192 + )
193 + # -- end for
194 + qtime.end ( "queue_packages" )
195 +
196 + # clear list
197 + self._pkg_queue_postponed[:] = []
198 + # --- end of enqueue_postponed (...) ---
199 +
200 def write_overlay ( self ):
201 """Writes the overlay."""
202 if self.overlay.writeable():
203
204 diff --git a/roverlay/overlay/pkgdir/manifest/file.py b/roverlay/overlay/pkgdir/manifest/file.py
205 index 9cf8425..f9b6b8d 100644
206 --- a/roverlay/overlay/pkgdir/manifest/file.py
207 +++ b/roverlay/overlay/pkgdir/manifest/file.py
208 @@ -26,6 +26,9 @@ class ManifestFile ( object ):
209 provided by hashlib (via roverlay.digest).
210 """
211
212 + # ref
213 + HASH_TYPES = ManifestEntry.HASHTYPES
214 +
215 def __init__ ( self, root ):
216 self.root = root
217 self.filepath = root + os.path.sep + 'Manifest'
218
219 diff --git a/roverlay/overlay/pkgdir/packagedir_base.py b/roverlay/overlay/pkgdir/packagedir_base.py
220 index 5f54279..7d5f906 100644
221 --- a/roverlay/overlay/pkgdir/packagedir_base.py
222 +++ b/roverlay/overlay/pkgdir/packagedir_base.py
223 @@ -57,6 +57,11 @@ class PackageDirBase ( object ):
224 #
225 MANIFEST_THREADSAFE = None
226
227 + # a set(!) of hash types which are used by the package dir
228 + # implementation (if any, else None)
229 + # other subsystems might calculate them in advance if advertised here
230 + HASH_TYPES = None
231 +
232 # DOEBUILD_FETCH
233 # doebuild function that fetches $SRC_URI
234 # can be overridden by subclasses if e.g. on-the-fly manifest creation
235 @@ -171,7 +176,9 @@ class PackageDirBase ( object ):
236 return p
237 # --- end of _scan_add_package (...) ---
238
239 - def add ( self, package_info, add_if_physical=False, _lock=True ):
240 + def add ( self,
241 + package_info, add_if_physical=False, allow_postpone=False, _lock=True
242 + ):
243 """Adds a package to this PackageDir.
244
245 arguments:
246 @@ -179,7 +186,7 @@ class PackageDirBase ( object ):
247 * add_if_physical -- add package even if it exists as ebuild file
248 (-> overwrite old ebuilds)
249
250 - returns: success as bool
251 + returns: success as bool // weakref
252
253 raises: Exception when ebuild already exists.
254 """
255 @@ -194,11 +201,15 @@ class PackageDirBase ( object ):
256 if shortver in self._packages:
257 # package exists, check if it existed before script invocation
258 if self._packages [shortver] ['physical_only']:
259 +
260 if add_if_physical:
261 # else ignore ebuilds that exist as file
262 self._packages [shortver] = package_info
263 added = True
264
265 + elif allow_postpone:
266 + added = None
267 +
268 elif self.DISTMAP.check_revbump_necessary ( package_info ):
269 # resolve by recursion
270 added = self.add (
271 @@ -213,8 +224,8 @@ class PackageDirBase ( object ):
272 )
273 else:
274 # package has been added to this packagedir before,
275 - # this most likely happens if it is available via several
276 - # remotes
277 + # this most likely happens if it is available from
278 + # more than one repo
279 self.logger.debug (
280 "'{PN}-{PVR}.ebuild' already exists, cannot add it!".format (
281 PN=self.name, PVR=shortver
282 @@ -232,8 +243,10 @@ class PackageDirBase ( object ):
283 # !! package_info <-> self (double-linked)
284 package_info.overlay_package_ref = weakref.ref ( self )
285 return True
286 + elif added is None:
287 + return weakref.ref ( self )
288 else:
289 - return False
290 + return added
291 # --- end of add (...) ---
292
293 def check_empty ( self ):
294
295 diff --git a/roverlay/overlay/pkgdir/packagedir_newmanifest.py b/roverlay/overlay/pkgdir/packagedir_newmanifest.py
296 index ae6c059..f7c258c 100644
297 --- a/roverlay/overlay/pkgdir/packagedir_newmanifest.py
298 +++ b/roverlay/overlay/pkgdir/packagedir_newmanifest.py
299 @@ -28,6 +28,8 @@ class PackageDir ( roverlay.overlay.pkgdir.packagedir_base.PackageDirBase ):
300
301 MANIFEST_THREADSAFE = True
302
303 + HASH_TYPES = frozenset ( ManifestFile.HASH_TYPES )
304 +
305 # Manifest entries for imported ebuilds have to be created during import
306 DOEBUILD_FETCH = roverlay.tools.ebuild.doebuild_fetch_and_manifest
307
308
309 diff --git a/roverlay/overlay/root.py b/roverlay/overlay/root.py
310 index 1e423b0..1ecf4ac 100644
311 --- a/roverlay/overlay/root.py
312 +++ b/roverlay/overlay/root.py
313 @@ -519,32 +519,44 @@ class Overlay ( object ):
314 )
315 # --- end of _write_rsuggests_use_desc (...) ---
316
317 - def add ( self, package_info ):
318 + def add ( self, package_info, allow_postpone=False ):
319 """Adds a package to this overlay (into its default category).
320
321 arguments:
322 - * package_info -- PackageInfo of the package to add
323 -
324 - returns: True if successfully added else False
325 + * package_info -- PackageInfo of the package to add
326 + * allow_postpone -- do not add the package if it already exists and
327 + return None
328 +
329 + returns:
330 + * True if successfully added
331 + * a weak reference to the package dir object if postponed
332 + * else False
333 """
334 # NOTE:
335 # * "category" keyword arg has been removed, use add_to(^2) instead
336 # * self.default_category must not be None (else KeyError is raised)
337 return self._get_category (
338 package_info.get ( "category", self.default_category )
339 - ).add ( package_info )
340 + ).add ( package_info, allow_postpone=allow_postpone )
341 # --- end of add (...) ---
342
343 - def add_to ( self, package_info, category ):
344 + def add_to ( self, package_info, category, allow_postpone=False ):
345 """Adds a package to this overlay.
346
347 arguments:
348 - * package_info -- PackageInfo of the package to add
349 - * category -- category where the pkg should be put in
350 -
351 - returns: True if successfully added else False
352 + * package_info -- PackageInfo of the package to add
353 + * category -- category where the pkg should be put in
354 + * allow_postpone -- do not add the package if it already exists and
355 + return None
356 +
357 + returns:
358 + * True if successfully added
359 + * a weak reference to the package dir object if postponed
360 + * else False
361 """
362 - return self._get_category ( category ).add ( package_info )
363 + return self._get_category ( category ).add (
364 + package_info, allow_postpone=allow_postpone
365 + )
366 # --- end of add_to (...) ---
367
368 def has_dir ( self, _dir ):
369
370 diff --git a/roverlay/stats/base.py b/roverlay/stats/base.py
371 index b51226e..28531c2 100644
372 --- a/roverlay/stats/base.py
373 +++ b/roverlay/stats/base.py
374 @@ -66,16 +66,22 @@ class OverlayCreationStats ( OverlayCreationWorkerStats ):
375 DESCRIPTION = "overlay creation"
376
377 _MEMBERS = (
378 - ( 'creation_time', 'pkg_queued', 'pkg_filtered', 'pkg_dropped', )
379 + (
380 + 'creation_time', 'queue_postponed_time',
381 + 'pkg_queued', 'pkg_queue_postponed',
382 + 'pkg_filtered', 'pkg_dropped',
383 + )
384 + OverlayCreationWorkerStats._MEMBERS
385 )
386
387 def __init__ ( self ):
388 super ( OverlayCreationStats, self ).__init__()
389 - self.pkg_queued = abstract.Counter ( "queued" )
390 - self.pkg_dropped = abstract.Counter ( "dropped" )
391 - self.pkg_filtered = abstract.Counter ( "filtered" )
392 - self.creation_time = abstract.TimeStats ( "ebuild creation" )
393 + self.pkg_queued = abstract.Counter ( "queued" )
394 + self.pkg_queue_postponed = abstract.Counter ( "queue_postponed" )
395 + self.pkg_dropped = abstract.Counter ( "dropped" )
396 + self.pkg_filtered = abstract.Counter ( "filtered" )
397 + self.creation_time = abstract.TimeStats ( "ebuild creation" )
398 + self.queue_postponed_time = abstract.TimeStats ( "queue_postponed" )
399 # --- end of __init__ (...) ---
400
401 def get_relevant_package_count ( self ):
402
403 diff --git a/roverlay/util/hashpool.py b/roverlay/util/hashpool.py
404 new file mode 100644
405 index 0000000..921a968
406 --- /dev/null
407 +++ b/roverlay/util/hashpool.py
408 @@ -0,0 +1,66 @@
409 +# R overlay --
410 +# -*- coding: utf-8 -*-
411 +# Copyright (C) 2013 André Erdmann <dywi@×××××××.de>
412 +# Distributed under the terms of the GNU General Public License;
413 +# either version 2 of the License, or (at your option) any later version.
414 +
415 +try:
416 + import concurrent.futures
417 +except ImportError:
418 + sys.stderr.write (
419 + '!!! concurrent.futures is not available.\n'
420 + ' Falling back to single-threaded variants.\n\n'
421 + )
422 + HAVE_CONCURRENT_FUTURES = False
423 +else:
424 + HAVE_CONCURRENT_FUTURES = True
425 +
426 +
427 +import roverlay.digest
428 +
429 +def _calculate_hashes ( hash_job, hashes ):
430 + hash_job.hashdict.update (
431 + roverlay.digest.multihash_file ( hash_job.filepath, hashes )
432 + )
433 +# --- end of _calculate_hashes (...) ---
434 +
435 +class Hashjob ( object ):
436 + def __init__ ( self, filepath, hashdict=None ):
437 + self.filepath = filepath
438 + self.hashdict = dict() if hashdict is None else hashdict
439 + # --- end of __init__ (...) ---
440 +
441 +
442 +class HashPool ( object ):
443 + def __init__ ( self, hashes, max_workers ):
444 + super ( HashPool, self ).__init__()
445 + self.hashes = frozenset ( hashes )
446 + self._jobs = dict()
447 + self.max_workers = int ( max_workers )
448 + # --- end of __init__ (...) ---
449 +
450 + def add ( self, backref, filepath, hashdict=None ):
451 + self._jobs [backref] = Hashjob ( filepath, hashdict )
452 + # --- end of add (...) ---
453 +
454 + def run ( self ):
455 + #with concurrent.futures.ProcessPoolExecutor ( self.max_workers ) as exe:
456 + with concurrent.futures.ThreadPoolExecutor ( self.max_workers ) as exe:
457 + running_jobs = frozenset (
458 + exe.submit ( _calculate_hashes, job, self.hashes )
459 + for job in self._jobs.values()
460 + )
461 +
462 + # wait
463 + for finished_job in concurrent.futures.as_completed ( running_jobs ):
464 + if finished_job.exception() is not None:
465 + raise finished_job.exception()
466 + # --- end of run (...) ---
467 +
468 + def reset ( self ):
469 + self._jobs.clear()
470 + # --- end of reset (...) ---
471 +
472 + def get ( self, backref ):
473 + return self._jobs [backref].hashdict
474 + # --- end of get (...) ---