Gentoo Archives: gentoo-commits

From: "André Erdmann" <dywi@×××××××.de>
To: gentoo-commits@l.g.o
Subject: [gentoo-commits] proj/R_overlay:master commit in: roverlay/remote/
Date: Tue, 31 Jul 2012 17:51:59
Message-Id: 1343757062.2b49ac8b4752fa1e5efd3f51f15720e7d70f12a9.dywi@gentoo
1 commit: 2b49ac8b4752fa1e5efd3f51f15720e7d70f12a9
2 Author: André Erdmann <dywi <AT> mailerd <DOT> de>
3 AuthorDate: Tue Jul 31 17:51:02 2012 +0000
4 Commit: André Erdmann <dywi <AT> mailerd <DOT> de>
5 CommitDate: Tue Jul 31 17:51:02 2012 +0000
6 URL: http://git.overlays.gentoo.org/gitweb/?p=proj/R_overlay.git;a=commit;h=2b49ac8b
7
8 remote: get packages via http
9
10 ---
11 roverlay/remote/basicrepo.py | 241 ++++++++----------------
12 roverlay/remote/repolist.py | 10 +-
13 roverlay/remote/repoloader.py | 49 ++++--
14 roverlay/remote/rsync.py | 23 ++-
15 roverlay/remote/websync.py | 410 +++++++++++++++++++++++++++++++++++++++++
16 5 files changed, 536 insertions(+), 197 deletions(-)
17
18 diff --git a/roverlay/remote/basicrepo.py b/roverlay/remote/basicrepo.py
19 index 3dd09de..65b07eb 100644
20 --- a/roverlay/remote/basicrepo.py
21 +++ b/roverlay/remote/basicrepo.py
22 @@ -35,14 +35,17 @@ def normalize_uri ( uri, protocol, force_protocol=False ):
23 return uri
24 # --- end of normalize_uri (...) ---
25
26 -class LocalRepo ( object ):
27 +class BasicRepo ( object ):
28 """
29 This class represents a local repository - all packages are assumed
30 to exist in its distfiles dir and no remote syncing will occur.
31 It's the base class for remote repos.
32 """
33
34 - def __init__ ( self, name, distroot, directory=None, src_uri=None ):
35 + def __init__ ( self,
36 + name, distroot,
37 + directory=None, src_uri=None, is_remote=False, remote_uri=None
38 + ):
39 """Initializes a LocalRepo.
40
41 arguments:
42 @@ -50,29 +53,34 @@ class LocalRepo ( object ):
43 * directory -- distfiles dir, defaults to <DISTFILES root>/<name>
44 * src_uri -- SRC_URI, defaults to http://localhost/R-Packages/<name>
45 """
46 - self.name = name
47 -
48 + self.name = name
49 self.logger = logging.getLogger (
50 self.__class__.__name__ + ':' + self.name
51 )
52
53 if directory is None:
54 - self.distdir = os.path.join (
55 - distroot,
56 - # subdir repo names like CRAN/contrib are ok,
57 - # but make sure to use the correct path separator
58 - self.name.replace ( '/', os.path.sep ),
59 - )
60 + # subdir repo names like CRAN/contrib are ok,
61 + # but make sure to use the correct path separator
62 + self.distdir = \
63 + distroot + os.path.sep + self.name.replace ( '/', os.path.sep )
64 +
65 else:
66 self.distdir = directory
67
68 if src_uri is None:
69 - self.src_uri = '/'.join ( ( LOCALREPO_SRC_URI, self.name ) )
70 + self.src_uri = LOCALREPO_SRC_URI + '/' + self.name
71 + elif len ( src_uri ) > 0 and src_uri [-1] == '/':
72 + self.src_uri = src_uri [:-1]
73 else:
74 self.src_uri = src_uri
75
76 self.sync_status = 0
77
78 + if remote_uri is not None:
79 + self.is_remote = True
80 + self.remote_uri = remote_uri
81 + else:
82 + self.is_remote = is_remote
83 # --- end of __init__ (...) ---
84
85 def ready ( self ):
86 @@ -110,9 +118,26 @@ class LocalRepo ( object ):
87 # --- end of _set_fail (...) ---
88
89 def __str__ ( self ):
90 - return "repo '%s': DISTDIR '%s', SRC_URI '%s'" % (
91 - self.name, self.distdir, self.src_uri
92 - )
93 + if hasattr ( self, 'remote_uri' ):
94 + return \
95 + '{cls} {name}: DISTDIR {distdir!r}, SRC_URI {src_uri!r}, '\
96 + 'REMOTE_URI {remote_uri!r}.'.format (
97 + cls = self.__class__.__name__,
98 + name = self.name,
99 + distdir = self.distdir,
100 + src_uri = self.src_uri \
101 + if hasattr ( self, 'src_uri' ) else '[none]',
102 + remote_uri = self.remote_uri
103 + )
104 + else:
105 + return '{cls} {name}: DISTDIR {distdir!r}, SRC_URI {src_uri!r}.'.\
106 + format (
107 + cls = self.__class__.__name__,
108 + name = self.name,
109 + distdir = self.distdir,
110 + src_uri = self.src_uri \
111 + if hasattr ( self, 'src_uri' ) else '[none]'
112 + )
113 # --- end of __str__ (...) ---
114
115 def get_name ( self ):
116 @@ -125,16 +150,24 @@ class LocalRepo ( object ):
117 return self.distdir
118 # --- end of get_distdir (...) ---
119
120 + def get_remote_uri ( self ):
121 + """Returns the remote uri of this RemoteRepo which used for syncing."""
122 + return self.remote_uri if hasattr ( self, 'remote_uri' ) else None
123 + # --- end of get_remote_uri (...) ---
124 +
125 + # get_remote(...) -> get_remote_uri(...)
126 + get_remote = get_remote_uri
127 +
128 def get_src_uri ( self, package_file=None ):
129 """Returns the SRC_URI of this repository.
130
131 arguments:
132 * package_file -- if set and not None: returns a SRC_URI for this pkg
133 """
134 - if package_file is None:
135 - return self.src_uri
136 + if package_file is not None:
137 + return self.src_uri + '/' + package_file
138 else:
139 - return '/'.join ( ( self.src_uri, package_file ) )
140 + return self.src_uri
141 # --- end of get_src_uri (...) ---
142
143 # get_src(...) -> get_src_uri(...)
144 @@ -166,6 +199,28 @@ class LocalRepo ( object ):
145 return status
146 # --- end of sync (...) ---
147
148 + def _package_nofail ( self, log_bad, **data ):
149 + """Tries to create a PackageInfo.
150 + Logs failure if log_bad is True.
151 +
152 + arguments:
153 + * log_bad --
154 + * data -- PackageInfo data
155 +
156 + returns: PackageInfo on success, else None.
157 + """
158 + try:
159 + return PackageInfo ( **data )
160 + except ValueError as expected:
161 + if log_bad:
162 + #self.logger.exception ( expected )
163 + self.logger.info (
164 + "filtered {f!r}: bad package".format ( f=filename )
165 + )
166 + return None
167 +
168 + # --- end of _package_nofail (...) ---
169 +
170 def scan_distdir ( self,
171 is_package=None, log_filtered=False, log_bad=True
172 ):
173 @@ -183,30 +238,9 @@ class LocalRepo ( object ):
174
175 raises: AssertionError if is_package is neither None nor a callable.
176 """
177 -
178 - def package_nofail ( filename, distdir ):
179 - """Tries to create a PackageInfo.
180 - Logs failure if log_bad is True.
181 -
182 - arguments:
183 - * filename -- name of the package file (including .tar* suffix)
184 - * distdir -- filename's directory
185 -
186 - returns: PackageInfo on success, else None.
187 - """
188 - try:
189 - return PackageInfo (
190 - filename=filename, origin=self, distdir=distdir
191 - )
192 - except ( ValueError, ) as expected:
193 - if log_bad:
194 - #self.logger.exception ( expected )
195 - self.logger.info (
196 - "filtered %r: bad package" % filename
197 - )
198 - return None
199 -
200 - # --- end of package_nofail (...) ---
201 + package_nofail = lambda filename, distdir : self._package_nofail (
202 + log_bad=log_bad, filename=filename, distdir=distdir, origin=self
203 + )
204
205 if is_package is None:
206 # unfiltered variant
207 @@ -219,7 +253,7 @@ class LocalRepo ( object ):
208 if pkg is not None:
209 yield pkg
210
211 - elif hasattr ( is_package, '__call__' ):
212 + else:
213 # filtered variant (adds an if is_package... before yield)
214 for dirpath, dirnames, filenames in os.walk ( self.distdir ):
215 distdir = dirpath if dirpath != self.distdir else None
216 @@ -233,127 +267,6 @@ class LocalRepo ( object ):
217 self.logger.debug (
218 "filtered %r: not a package" % filename
219 )
220 -
221 -
222 - else:
223 - # faulty variant, raises Exception
224 - raise AssertionError (
225 - "is_package should either be None or a function."
226 - )
227 - #yield None
228 -
229 # --- end of scan_distdir (...) ---
230
231 -# --- end of LocalRepo ---
232 -
233 -
234 -class RemoteRepo ( LocalRepo ):
235 - """A template for remote repositories."""
236 -
237 - def __init__ (
238 - self, name, distroot, sync_proto,
239 - directory=None,
240 - src_uri=None, remote_uri=None, base_uri=None
241 - ):
242 - """Initializes a RemoteRepo.
243 - Mainly consists of URI calculation that derived classes may find useful.
244 -
245 - arguments:
246 - * name --
247 - * sync_proto -- protocol used for syncing (e.g. 'rsync')
248 - * directory --
249 - * src_uri -- src uri, if set, else calculated using base/remote uri,
250 - the leading <proto>:// can be left out in which case
251 - http is assumed
252 - * remote_uri -- uri used for syncing, if set, else calculated using
253 - base/src uri, the leading <proto>:// can be left out
254 - * base_uri -- used to calculate remote/src uri,
255 - example: localhost/R-packages/something
256 -
257 - keyword condition:
258 - * | { x : x in union(src,remote,base) and x not None } | >= 1
259 - ^= at least one out of src/remote/base uri is not None
260 - """
261 - super ( RemoteRepo, self ) . __init__ (
262 - name, distroot, directory, src_uri=''
263 - )
264 -
265 - self.sync_proto = sync_proto
266 -
267 - # detemerine uris
268 - if src_uri is None and remote_uri is None:
269 - if base_uri is None:
270 - # keyword condition not met
271 - raise Exception ( "Bad initialization of RemoteRepo!" )
272 -
273 - else:
274 - # using base_uri for src,remote
275 - self.src_uri = URI_SEPARATOR.join (
276 - ( DEFAULT_PROTOCOL, base_uri )
277 - )
278 -
279 - self.remote_uri = URI_SEPARATOR.join (
280 - ( sync_proto, base_uri )
281 - )
282 -
283 - elif src_uri is None:
284 - # remote_uri is not None
285 - self.remote_uri = normalize_uri ( remote_uri, self.sync_proto )
286 -
287 - if base_uri is not None:
288 - # using base_uri for src_uri
289 - self.src_uri = URI_SEPARATOR.join (
290 - ( DEFAULT_PROTOCOL, base_uri )
291 - )
292 - else:
293 - # using remote_uri for src_uri
294 - self.src_uri = normalize_uri (
295 - self.remote_uri, DEFAULT_PROTOCOL, force_protocol=True
296 - )
297 -
298 - elif remote_uri is None:
299 - # src_uri is not None
300 - self.src_uri = normalize_uri ( src_uri, DEFAULT_PROTOCOL )
301 -
302 - if base_uri is not None:
303 - # using base_uri for remote_uri
304 - self.remote_uri = URI_SEPARATOR.join (
305 - ( self.sync_proto, base_uri )
306 - )
307 - else:
308 - # using src_uri for remote_uri
309 - self.remote_uri = normalize_uri (
310 - self.src_uri, self.sync_proto, force_protocol=True
311 - )
312 - else:
313 - # remote and src not None
314 - self.remote_uri = normalize_uri ( remote_uri, self.sync_proto )
315 - self.src_uri = normalize_uri ( src_uri, DEFAULT_PROTOCOL )
316 -
317 - # --- end of __init__ (...) ---
318 -
319 - def get_remote_uri ( self ):
320 - """Returns the remote uri of this RemoteRepo which used for syncing."""
321 - return self.remote_uri
322 - # --- end of get_remote_uri (...) ---
323 -
324 - # get_remote(...) -> get_remote_uri(...)
325 - get_remote = get_remote_uri
326 -
327 - def _dosync ( self ):
328 - """Gets packages from remote(s) and returns True if the repo is ready
329 - for overlay creation, else False.
330 -
331 - Derived classes have to implement this method.
332 - """
333 - raise Exception ( "RemoteRepo does not implement sync()." )
334 - # --- end of _dosync (...) ---
335 -
336 - def __str__ ( self ):
337 - return "repo '%s': DISTDIR '%s', SRC_URI '%s', REMOTE_URI '%s'" % (
338 - self.name, self.distdir, self.src_uri, self.remote_uri
339 - )
340 - # --- end of __str__ (...) ---
341 -
342 -# --- end of RemoteRepo ---
343 -
344 +# --- end of BasicRepo ---
345
346 diff --git a/roverlay/remote/repolist.py b/roverlay/remote/repolist.py
347 index cc673e6..a623db0 100644
348 --- a/roverlay/remote/repolist.py
349 +++ b/roverlay/remote/repolist.py
350 @@ -4,7 +4,7 @@ import os.path
351
352 from roverlay import config
353 from roverlay.remote.repoloader import read_repofile
354 -from roverlay.remote.basicrepo import LocalRepo
355 +from roverlay.remote.basicrepo import BasicRepo
356
357 class RepoList ( object ):
358 """Controls several Repo objects."""
359 @@ -55,7 +55,7 @@ class RepoList ( object ):
360 # --- end of _pkg_filter (...) ---
361
362 def add_distdir ( self, distdir, src_uri=None, name=None ):
363 - """Adds a local package directory as LocalRepo.
364 + """Adds a local package directory as BasicRepo.
365
366 arguments:
367 * distdir --
368 @@ -64,7 +64,7 @@ class RepoList ( object ):
369 (FIXME: could add RESTRICT="fetch" to those ebuilds)
370 * name -- name of the repo, defaults to os.path.basename (distdir)
371 """
372 - self.repos.append ( LocalRepo (
373 + self.repos.append ( BasicRepo (
374 name=os.path.basename ( distdir ) if name is None else name,
375 directory=distdir,
376 src_uri=src_uri
377 @@ -72,7 +72,7 @@ class RepoList ( object ):
378 # --- end of add_distdir (...) ---
379
380 def add_distdirs ( self, distdirs ):
381 - """Adds several distdirs as LocalRepos.
382 + """Adds several distdirs as BasicRepos.
383 All distdirs will have an invalid SRC_URI and a default name,
384 use add_distdir() if you want usable ebuilds.
385
386 @@ -81,7 +81,7 @@ class RepoList ( object ):
387 """
388 def gen_repos():
389 for d in distdirs:
390 - repo = LocalRepo (
391 + repo = BasicRepo (
392 name=os.path.basename ( d ),
393 # FIXME: --force_distroot should block --distdir
394 directory=d,
395
396 diff --git a/roverlay/remote/repoloader.py b/roverlay/remote/repoloader.py
397 index 2a92526..ba49d0a 100644
398 --- a/roverlay/remote/repoloader.py
399 +++ b/roverlay/remote/repoloader.py
400 @@ -11,8 +11,9 @@ except ImportError:
401
402 from roverlay import config
403
404 -from roverlay.remote.basicrepo import LocalRepo
405 -from roverlay.remote.rsync import RsyncRepo
406 +from roverlay.remote import basicrepo
407 +from roverlay.remote import rsync
408 +from roverlay.remote import websync
409
410 LOGGER = logging.getLogger ( 'repoloader' )
411
412 @@ -51,35 +52,49 @@ def read_repofile ( repo_file, distroot, lenient=False, force_distroot=False ):
413
414 repo_type = get ( 'type', 'rsync' ).lower()
415
416 - repo_name = get ( 'name', name )
417 + common_kwargs = dict (
418 + name = get ( 'name', name ),
419 + directory = None if force_distroot else get ( 'directory' ),
420 + distroot = distroot,
421 + src_uri = get ( 'src_uri' )
422 + )
423
424 - repo_distdir = None if force_distroot else get ( 'directory' )
425
426
427 if repo_type == 'local':
428 - repo = LocalRepo (
429 - name = repo_name,
430 - distroot = distroot,
431 - directory = repo_distdir,
432 - src_uri = get ( 'src_uri' )
433 - )
434 + repo = basicrepo.BasicRepo ( **common_kwargs )
435 +
436 elif repo_type == 'rsync':
437 extra_opts = get ( 'extra_rsync_opts' )
438 if extra_opts:
439 extra_opts = extra_opts.split ( ' ' )
440
441 - repo = RsyncRepo (
442 - name = repo_name,
443 - distroot = distroot,
444 - directory = repo_distdir,
445 - src_uri = get ( 'src_uri' ),
446 + repo = rsync.RsyncRepo (
447 rsync_uri = get ( 'rsync_uri' ),
448 - base_uri = get ( 'base_uri' ),
449 extra_opts = extra_opts,
450 recursive = get ( 'recursive', False ) == 'yes',
451 + **common_kwargs
452 + )
453 +
454 + elif repo_type == 'websync_repo':
455 + repo = websync.WebsyncRepo (
456 + pkglist_file = get ( 'pkglist_file', 'PACKAGES' ),
457 + pkglist_uri = get ( 'pkglist_uri' ),
458 + digest_type = get ( 'digest_type' ) or get ( 'digest' ),
459 + **common_kwargs
460 )
461 +
462 + elif repo_type in ( 'websync_pkglist', 'websync_package_list' ):
463 + repo = websync.WebsyncPackageList (
464 + pkglist_file = get ( 'pkglist_file' ) or get ( 'pkglist' ),
465 + #digest_type = get ( 'digest_type' ) or get ( 'digest' ),
466 + **common_kwargs
467 + )
468 +
469 else:
470 - LOGGER.error ( "Unknown repo type %s for %s" % ( repo_type, name ) )
471 + LOGGER.error ( "Unknown repo type {} for {}!".format (
472 + repo_type, name
473 + ) )
474
475
476 if repo is not None:
477
478 diff --git a/roverlay/remote/rsync.py b/roverlay/remote/rsync.py
479 index 11efd27..90f21f5 100644
480 --- a/roverlay/remote/rsync.py
481 +++ b/roverlay/remote/rsync.py
482 @@ -4,8 +4,7 @@ import subprocess
483
484 from roverlay import config, util
485
486 -#from roverlay.remote.basicrepo import LocalRepo, RemoteRepo
487 -from roverlay.remote.basicrepo import RemoteRepo
488 +from roverlay.remote.basicrepo import BasicRepo
489
490 RSYNC_ENV = util.keepenv (
491 'PATH',
492 @@ -46,12 +45,16 @@ DEFAULT_RSYNC_OPTS = (
493 '--chmod=ugo=r,u+w,Dugo+x', # 0755 for transferred dirs, 0644 for files
494 )
495
496 -class RsyncRepo ( RemoteRepo ):
497 +class RsyncRepo ( BasicRepo ):
498
499 - def __init__ (
500 - self, name, distroot,
501 - directory=None, src_uri=None, rsync_uri=None, base_uri=None,
502 - recursive=False, extra_opts=None
503 + def __init__ ( self,
504 + name,
505 + distroot,
506 + src_uri,
507 + rsync_uri,
508 + directory=None,
509 + recursive=False,
510 + extra_opts=None
511 ):
512 """Initializes an RsyncRepo.
513
514 @@ -68,8 +71,8 @@ class RsyncRepo ( RemoteRepo ):
515 # using '' as remote protocol which leaves uris unchanged when
516 # normalizing them for rsync usage
517 super ( RsyncRepo, self ) . __init__ (
518 - name, distroot=distroot, sync_proto='', directory=directory,
519 - src_uri=src_uri, remote_uri=rsync_uri, base_uri=base_uri
520 + name=name, distroot=distroot, directory=directory,
521 + src_uri=src_uri, remote_uri=rsync_uri
522 )
523
524 # syncing directories, not files - always appending a slash at the end
525 @@ -83,8 +86,6 @@ class RsyncRepo ( RemoteRepo ):
526 self.extra_opts.extend ( extra_opts )
527 else:
528 self.extra_opts = extra_opts
529 -
530 - self.sync_protocol = 'rsync'
531 # --- end of __init__ (...) ---
532
533 def _rsync_argv ( self ):
534
535 diff --git a/roverlay/remote/websync.py b/roverlay/remote/websync.py
536 new file mode 100644
537 index 0000000..a0ded6c
538 --- /dev/null
539 +++ b/roverlay/remote/websync.py
540 @@ -0,0 +1,410 @@
541 +
542 +import re
543 +import os
544 +import urllib2
545 +
546 +from roverlay import digest, util
547 +from roverlay.packageinfo import PackageInfo
548 +from roverlay.remote.basicrepo import BasicRepo
549 +
550 +class WebsyncBase ( BasicRepo ):
551 + """Provides functionality for retrieving R packages via http.
552 + Not meant for direct usage."""
553 +
554 + def __init__ ( self,
555 + name,
556 + distroot,
557 + src_uri,
558 + directory=None,
559 + digest_type=None
560 + ):
561 + """Initializes a WebsyncBase instance.
562 +
563 + arguments:
564 + * name -- see BasicRepo
565 + * distroot -- ^
566 + * src_uri -- ^
567 + * directory -- ^
568 + * digest_type -- if set and not None/"None":
569 + verify packages using the given digest type
570 + Supported digest types: 'md5'.
571 + """
572 + super ( WebsyncBase, self ) . __init__ (
573 + name=name,
574 + distroot=distroot,
575 + src_uri=src_uri,
576 + remote_uri=src_uri,
577 + directory=directory
578 + )
579 +
580 + if digest_type is None:
581 + self._digest_type = None
582 +
583 + elif str ( digest_type ).lower() in ( 'none', 'disabled', 'off' ):
584 + self._digest_type = None
585 +
586 + elif digest.digest_supported ( digest_type ):
587 + # setting a digest_type (other than None) expects package_list
588 + # to be a 2-tuple <package_file, digest sum> list,
589 + # else a list of package_files is expected.
590 + self._digest_type = digest_type
591 +
592 + else:
593 + raise Exception (
594 + "Unknown/unsupported digest type {}!".format ( digest_type )
595 + )
596 +
597 + # download 8KiB per block
598 + self.transfer_blocksize = 8192
599 + # --- end of __init__ (...) ---
600 +
601 + def _fetch_package_list ( self ):
602 + """This function returns a list of packages to download."""
603 + raise Exception ( "method stub" )
604 + # --- end of _fetch_package_list (...) ---
605 +
606 + def _get_package ( self, package_file, src_uri, expected_digest ):
607 + """Gets a packages, i.e. downloads if it doesn't exist locally
608 + or fails verification (size, digest).
609 +
610 + arguments:
611 + * package_file -- package file name
612 + * src_uri -- uri for package_file
613 + * expected_digest -- expected digest for package_file or None (^=disable)
614 + """
615 + distfile = self.distdir + os.sep + package_file
616 + webh = urllib2.urlopen ( src_uri )
617 + #web_info = webh.info()
618 +
619 + expected_filesize = int ( webh.info().get ( 'content-length', -1 ) )
620 +
621 + if os.access ( distfile, os.F_OK ):
622 + # package exists locally, verify it (size, digest)
623 + fetch_required = False
624 + localsize = os.path.getsize ( distfile )
625 +
626 + if localsize != expected_filesize:
627 + # size mismatch
628 + self.logger.info (
629 + 'size mismatch for {f!r}: expected {websize} bytes '
630 + 'but got {localsize}!'.format (
631 + f = package_file,
632 + websize = expected_filesize,
633 + localsize = localsize
634 + )
635 + )
636 + fetch_required = True
637 +
638 + elif expected_digest is not None:
639 + our_digest = digest.dodigest_file ( distfile, self._digest_type )
640 +
641 + if our_digest != expected_digest:
642 + # digest mismatch
643 + self.logger.warning (
644 + '{dtype} mismatch for {f!r}: '
645 + 'expected {theirs} but got {ours} - refetching.'.format (
646 + dtype = self._digest_type,
647 + f = distfile,
648 + theirs = expected_digest,
649 + ours = our_digest
650 + )
651 + )
652 + fetch_required = True
653 +
654 + else:
655 + fetch_required = True
656 +
657 + if fetch_required:
658 + bytes_fetched = 0
659 +
660 + # FIXME: debug print (?)
661 + print (
662 + "Fetching {f} from {u} ...".format ( f=package_file, u=src_uri )
663 + )
664 +
665 + with open ( distfile, mode='wb' ) as fh:
666 + block = webh.read ( self.transfer_blocksize )
667 + while block:
668 + # write block to file
669 + fh.write ( block )
670 + # ? bytelen
671 + bytes_fetched += len ( block )
672 +
673 + # get the next block
674 + block = webh.read ( self.transfer_blocksize )
675 + # -- with
676 +
677 + if bytes_fetched == expected_filesize:
678 + if expected_digest is not None:
679 + our_digest = digest.dodigest_file ( distfile, self._digest_type )
680 +
681 + if our_digest != expected_digest:
682 + # fetched package's digest does not match the expected one,
683 + # refuse to use it
684 + self.logger.warning (
685 + 'bad {dtype} digest for {f!r}, expected {theirs} but '
686 + 'got {ours} - removing this package.'.format (
687 + dtype = self._digest_type,
688 + f = distfile,
689 + theirs = expected_digest,
690 + ours = our_digest
691 + )
692 + )
693 + os.remove ( distfile )
694 +
695 + # package removed -> return success
696 + return True
697 + # -- if
698 + # -- if
699 +
700 + else:
701 + return False
702 + else:
703 + # FIXME: debug print
704 + print ( "Skipping fetch for {f!r}".format ( f=distfile ) )
705 +
706 + return self._package_synced ( package_file, distfile, src_uri )
707 + # --- end of get_package (...) ---
708 +
709 + def _package_synced ( self, package_filename, distfile, src_uri ):
710 + """Called when a package has been synced (=exists locally when
711 + _get_package() is done).
712 +
713 + arguments:
714 + * package_filename --
715 + * distfile --
716 + * src_uri --
717 + """
718 + return True
719 + # --- end of _package_synced (...) ---
720 +
721 + def _dosync ( self ):
722 + """Syncs this repo."""
723 + package_list = self._fetch_package_list()
724 +
725 + # empty/unset package list
726 + if not package_list: return True
727 +
728 + util.dodir ( self.distdir )
729 +
730 + success = True
731 +
732 + if self._digest_type is not None:
733 + for package_file, expected_digest in package_list:
734 + src_uri = self.get_src_uri ( package_file )
735 +
736 + if not self._get_package (
737 + package_file, src_uri, expected_digest
738 + ):
739 + success = False
740 + break
741 + else:
742 + for package_file in package_list:
743 + src_uri = self.get_src_uri ( package_file )
744 +
745 + if not self._get_package (
746 + package_file, src_uri, expected_digest=None
747 + ):
748 + success = False
749 + break
750 +
751 + return success
752 + # --- end of _dosync (...) ---
753 +
754 +
755 +class WebsyncRepo ( WebsyncBase ):
756 + """Sync a http repo using its PACKAGES file."""
757 + # FIXME: hardcoded for md5
758 +
759 + def __init__ ( self,
760 + pkglist_uri=None,
761 + pkglist_file=None,
762 + *args,
763 + **kwargs
764 + ):
765 + """Initializes a WebsyncRepo instance.
766 +
767 + arguments:
768 + * pkglist_uri -- if set and not None: uri of the package list file
769 + * pkglist_file -- if set and not None: name of the package list file,
770 + this is used to calculate the pkglist_uri
771 + pkglist_uri = <src_uri>/<pkglist_file>
772 + * *args / **kwargs -- see WebsyncBase / BasicRepo
773 +
774 + pkglist file: this is a file with debian control file-like syntax
775 + listing all packages.
776 + Example: http://www.omegahat.org/R/src/contrib/PACKAGES (2012-07-31)
777 + """
778 + super ( WebsyncRepo, self ) . __init__ ( *args, **kwargs )
779 +
780 + if self._digest_type is None:
781 + self.FIELDREGEX = re.compile (
782 + '^\s*(?P<name>(package|version))[:]\s*(?P<value>.+)',
783 + re.IGNORECASE
784 + )
785 + else:
786 + # used to filter field names (package,version,md5sum)
787 + self.FIELDREGEX = re.compile (
788 + '^\s*(?P<name>(package|version|md5sum))[:]\s*(?P<value>.+)',
789 + re.IGNORECASE
790 + )
791 +
792 + self.pkglist_uri = pkglist_uri or self.get_src_uri ( pkglist_file )
793 + if not self.pkglist_uri:
794 + raise Exception ( "pkglist_uri is unset!" )
795 + # --- end of __init__ (...) ---
796 +
797 + def _fetch_package_list ( self ):
798 + """Returns the list of packages to be downloaded.
799 + List format:
800 + * if digest verification is enabled:
801 + List ::= [ ( package_file, digest ), ... ]
802 + * else
803 + List ::= [ package_file, ... ]
804 + """
805 +
806 + def generate_pkglist ( fh ):
807 + """Generates the package list using the given file handle.
808 +
809 + arguments:
810 + * fh -- file handle to read from
811 + """
812 + info = dict()
813 +
814 + max_info_len = 3 if self._digest_type is not None else 2
815 +
816 + for match in (
817 + filter ( None, (
818 + self.FIELDREGEX.match ( l ) for l in fh.readlines()
819 + ) )
820 + ):
821 + name, value = match.group ( 'name', 'value' )
822 + info [name.lower()] = value
823 +
824 + if len ( info.keys() ) == max_info_len:
825 +
826 + pkgfile = '{name}_{version}.tar.gz'.format (
827 + name=info ['package'], version=info ['version']
828 + )
829 +
830 + if self._digest_type is not None:
831 + yield ( pkgfile, info ['md5sum'] )
832 + #yield ( pkgfile, ( 'md5', info ['md5sum'] ) )
833 + else:
834 + yield pkgfile
835 +
836 + info.clear()
837 + # --- end of generate_pkglist (...) ---
838 +
839 + package_list = ()
840 + try:
841 + webh = urllib2.urlopen ( self.pkglist_uri )
842 +
843 + content_type = webh.info().get ( 'content-type', None )
844 +
845 + if content_type != 'text/plain':
846 + print (
847 + "content type {!r} is not supported!".format ( content_type )
848 + )
849 + else:
850 + package_list = tuple ( generate_pkglist ( webh ) )
851 +
852 + webh.close()
853 +
854 + finally:
855 + if 'webh' in locals() and webh: webh.close()
856 +
857 + return package_list
858 + # --- end fetch_pkglist (...) ---
859 +
860 +class WebsyncPackageList ( WebsyncBase ):
861 + """Sync packages from multiple remotes via http. Packages uris are read
862 + from a file."""
863 +
864 + # FIXME: does not support --nosync
865 +
866 + def __init__ ( self, pkglist_file, *args, **kwargs ):
867 + """Initializes a WebsyncPackageList instance.
868 +
869 + arguments:
870 + * pkglist_file -- path to the package list file that lists
871 + one package http uri per line
872 + * *args / **kwargs -- see WebsyncBase, BasicRepo
873 + """
874 + super ( WebsyncPackageList, self ) . __init__ ( *args, **kwargs )
875 +
876 + self._pkglist_file = os.path.abspath ( pkglist_file )
877 +
878 + del self.src_uri
879 +
880 + self._synced_packages = list()
881 +
882 + # --- end of __init__ (...) ---
883 +
884 + def _fetch_package_list ( self ):
885 + """Returns the package list.
886 + Format:
887 + pkglist ::= [ ( package_file, src_uri ), ... ]
888 + """
889 + pkglist = list()
890 + with open ( self._pkglist_file, mode='r' ) as fh:
891 + for line in fh.readlines():
892 + src_uri = line.strip()
893 + if src_uri:
894 + pkglist.append ( (
895 + src_uri.rpartition ( '/' ) [-1],
896 + src_uri
897 + ) )
898 +
899 + return pkglist
900 + # --- end of _fetch_package_list (...) ---
901 +
902 + def _package_synced ( self, package_filename, distfile, src_uri ):
903 + self._synced_packages.append (
904 + ( package_filename, src_uri )
905 + )
906 + return True
907 + # --- end of _package_synced (...) ---
908 +
909 + def scan_distdir ( self, log_bad=True, **kwargs_ignored ):
910 + for package_filename, src_uri in self._synced_packages:
911 + pkg = self._package_nofail (
912 + log_bad,
913 + filename = package_filename,
914 + origin = self,
915 + src_uri = src_uri
916 + )
917 + if pkg is not None:
918 + yield pkg
919 + # --- end of scan_distdir (...) ---
920 +
921 + def _nosync ( self ):
922 + """nosync - report existing packages"""
923 + for package_file, src_uri in self._fetch_package_list():
924 + distfile = self.distdir + os.sep + package_file
925 + if os.access ( distfile, os.F_OK ):
926 + self._package_synced ( package_file, distfile, src_uri )
927 +
928 + return True
929 + # --- end of _nosync (...) ---
930 +
931 + def _dosync ( self ):
932 + """Sync packages."""
933 + package_list = self._fetch_package_list()
934 +
935 + # empty/unset package list
936 + if not package_list: return True
937 +
938 + util.dodir ( self.distdir )
939 +
940 + success = True
941 +
942 + for package_file, src_uri in package_list:
943 + if not self._get_package (
944 + package_file, src_uri, expected_digest=None
945 + ):
946 + success = False
947 + break
948 +
949 + return success
950 + # --- end of _dosync (...) ---