1 |
commit: 2b49ac8b4752fa1e5efd3f51f15720e7d70f12a9 |
2 |
Author: André Erdmann <dywi <AT> mailerd <DOT> de> |
3 |
AuthorDate: Tue Jul 31 17:51:02 2012 +0000 |
4 |
Commit: André Erdmann <dywi <AT> mailerd <DOT> de> |
5 |
CommitDate: Tue Jul 31 17:51:02 2012 +0000 |
6 |
URL: http://git.overlays.gentoo.org/gitweb/?p=proj/R_overlay.git;a=commit;h=2b49ac8b |
7 |
|
8 |
remote: get packages via http |
9 |
|
10 |
--- |
11 |
roverlay/remote/basicrepo.py | 241 ++++++++---------------- |
12 |
roverlay/remote/repolist.py | 10 +- |
13 |
roverlay/remote/repoloader.py | 49 ++++-- |
14 |
roverlay/remote/rsync.py | 23 ++- |
15 |
roverlay/remote/websync.py | 410 +++++++++++++++++++++++++++++++++++++++++ |
16 |
5 files changed, 536 insertions(+), 197 deletions(-) |
17 |
|
18 |
diff --git a/roverlay/remote/basicrepo.py b/roverlay/remote/basicrepo.py |
19 |
index 3dd09de..65b07eb 100644 |
20 |
--- a/roverlay/remote/basicrepo.py |
21 |
+++ b/roverlay/remote/basicrepo.py |
22 |
@@ -35,14 +35,17 @@ def normalize_uri ( uri, protocol, force_protocol=False ): |
23 |
return uri |
24 |
# --- end of normalize_uri (...) --- |
25 |
|
26 |
-class LocalRepo ( object ): |
27 |
+class BasicRepo ( object ): |
28 |
""" |
29 |
This class represents a local repository - all packages are assumed |
30 |
to exist in its distfiles dir and no remote syncing will occur. |
31 |
It's the base class for remote repos. |
32 |
""" |
33 |
|
34 |
- def __init__ ( self, name, distroot, directory=None, src_uri=None ): |
35 |
+ def __init__ ( self, |
36 |
+ name, distroot, |
37 |
+ directory=None, src_uri=None, is_remote=False, remote_uri=None |
38 |
+ ): |
39 |
"""Initializes a LocalRepo. |
40 |
|
41 |
arguments: |
42 |
@@ -50,29 +53,34 @@ class LocalRepo ( object ): |
43 |
* directory -- distfiles dir, defaults to <DISTFILES root>/<name> |
44 |
* src_uri -- SRC_URI, defaults to http://localhost/R-Packages/<name> |
45 |
""" |
46 |
- self.name = name |
47 |
- |
48 |
+ self.name = name |
49 |
self.logger = logging.getLogger ( |
50 |
self.__class__.__name__ + ':' + self.name |
51 |
) |
52 |
|
53 |
if directory is None: |
54 |
- self.distdir = os.path.join ( |
55 |
- distroot, |
56 |
- # subdir repo names like CRAN/contrib are ok, |
57 |
- # but make sure to use the correct path separator |
58 |
- self.name.replace ( '/', os.path.sep ), |
59 |
- ) |
60 |
+ # subdir repo names like CRAN/contrib are ok, |
61 |
+ # but make sure to use the correct path separator |
62 |
+ self.distdir = \ |
63 |
+ distroot + os.path.sep + self.name.replace ( '/', os.path.sep ) |
64 |
+ |
65 |
else: |
66 |
self.distdir = directory |
67 |
|
68 |
if src_uri is None: |
69 |
- self.src_uri = '/'.join ( ( LOCALREPO_SRC_URI, self.name ) ) |
70 |
+ self.src_uri = LOCALREPO_SRC_URI + '/' + self.name |
71 |
+ elif len ( src_uri ) > 0 and src_uri [-1] == '/': |
72 |
+ self.src_uri = src_uri [:-1] |
73 |
else: |
74 |
self.src_uri = src_uri |
75 |
|
76 |
self.sync_status = 0 |
77 |
|
78 |
+ if remote_uri is not None: |
79 |
+ self.is_remote = True |
80 |
+ self.remote_uri = remote_uri |
81 |
+ else: |
82 |
+ self.is_remote = is_remote |
83 |
# --- end of __init__ (...) --- |
84 |
|
85 |
def ready ( self ): |
86 |
@@ -110,9 +118,26 @@ class LocalRepo ( object ): |
87 |
# --- end of _set_fail (...) --- |
88 |
|
89 |
def __str__ ( self ): |
90 |
- return "repo '%s': DISTDIR '%s', SRC_URI '%s'" % ( |
91 |
- self.name, self.distdir, self.src_uri |
92 |
- ) |
93 |
+ if hasattr ( self, 'remote_uri' ): |
94 |
+ return \ |
95 |
+ '{cls} {name}: DISTDIR {distdir!r}, SRC_URI {src_uri!r}, '\ |
96 |
+ 'REMOTE_URI {remote_uri!r}.'.format ( |
97 |
+ cls = self.__class__.__name__, |
98 |
+ name = self.name, |
99 |
+ distdir = self.distdir, |
100 |
+ src_uri = self.src_uri \ |
101 |
+ if hasattr ( self, 'src_uri' ) else '[none]', |
102 |
+ remote_uri = self.remote_uri |
103 |
+ ) |
104 |
+ else: |
105 |
+ return '{cls} {name}: DISTDIR {distdir!r}, SRC_URI {src_uri!r}.'.\ |
106 |
+ format ( |
107 |
+ cls = self.__class__.__name__, |
108 |
+ name = self.name, |
109 |
+ distdir = self.distdir, |
110 |
+ src_uri = self.src_uri \ |
111 |
+ if hasattr ( self, 'src_uri' ) else '[none]' |
112 |
+ ) |
113 |
# --- end of __str__ (...) --- |
114 |
|
115 |
def get_name ( self ): |
116 |
@@ -125,16 +150,24 @@ class LocalRepo ( object ): |
117 |
return self.distdir |
118 |
# --- end of get_distdir (...) --- |
119 |
|
120 |
+ def get_remote_uri ( self ): |
121 |
+ """Returns the remote uri of this RemoteRepo which used for syncing.""" |
122 |
+ return self.remote_uri if hasattr ( self, 'remote_uri' ) else None |
123 |
+ # --- end of get_remote_uri (...) --- |
124 |
+ |
125 |
+ # get_remote(...) -> get_remote_uri(...) |
126 |
+ get_remote = get_remote_uri |
127 |
+ |
128 |
def get_src_uri ( self, package_file=None ): |
129 |
"""Returns the SRC_URI of this repository. |
130 |
|
131 |
arguments: |
132 |
* package_file -- if set and not None: returns a SRC_URI for this pkg |
133 |
""" |
134 |
- if package_file is None: |
135 |
- return self.src_uri |
136 |
+ if package_file is not None: |
137 |
+ return self.src_uri + '/' + package_file |
138 |
else: |
139 |
- return '/'.join ( ( self.src_uri, package_file ) ) |
140 |
+ return self.src_uri |
141 |
# --- end of get_src_uri (...) --- |
142 |
|
143 |
# get_src(...) -> get_src_uri(...) |
144 |
@@ -166,6 +199,28 @@ class LocalRepo ( object ): |
145 |
return status |
146 |
# --- end of sync (...) --- |
147 |
|
148 |
+ def _package_nofail ( self, log_bad, **data ): |
149 |
+ """Tries to create a PackageInfo. |
150 |
+ Logs failure if log_bad is True. |
151 |
+ |
152 |
+ arguments: |
153 |
+ * log_bad -- |
154 |
+ * data -- PackageInfo data |
155 |
+ |
156 |
+ returns: PackageInfo on success, else None. |
157 |
+ """ |
158 |
+ try: |
159 |
+ return PackageInfo ( **data ) |
160 |
+ except ValueError as expected: |
161 |
+ if log_bad: |
162 |
+ #self.logger.exception ( expected ) |
163 |
+ self.logger.info ( |
164 |
+ "filtered {f!r}: bad package".format ( f=filename ) |
165 |
+ ) |
166 |
+ return None |
167 |
+ |
168 |
+ # --- end of _package_nofail (...) --- |
169 |
+ |
170 |
def scan_distdir ( self, |
171 |
is_package=None, log_filtered=False, log_bad=True |
172 |
): |
173 |
@@ -183,30 +238,9 @@ class LocalRepo ( object ): |
174 |
|
175 |
raises: AssertionError if is_package is neither None nor a callable. |
176 |
""" |
177 |
- |
178 |
- def package_nofail ( filename, distdir ): |
179 |
- """Tries to create a PackageInfo. |
180 |
- Logs failure if log_bad is True. |
181 |
- |
182 |
- arguments: |
183 |
- * filename -- name of the package file (including .tar* suffix) |
184 |
- * distdir -- filename's directory |
185 |
- |
186 |
- returns: PackageInfo on success, else None. |
187 |
- """ |
188 |
- try: |
189 |
- return PackageInfo ( |
190 |
- filename=filename, origin=self, distdir=distdir |
191 |
- ) |
192 |
- except ( ValueError, ) as expected: |
193 |
- if log_bad: |
194 |
- #self.logger.exception ( expected ) |
195 |
- self.logger.info ( |
196 |
- "filtered %r: bad package" % filename |
197 |
- ) |
198 |
- return None |
199 |
- |
200 |
- # --- end of package_nofail (...) --- |
201 |
+ package_nofail = lambda filename, distdir : self._package_nofail ( |
202 |
+ log_bad=log_bad, filename=filename, distdir=distdir, origin=self |
203 |
+ ) |
204 |
|
205 |
if is_package is None: |
206 |
# unfiltered variant |
207 |
@@ -219,7 +253,7 @@ class LocalRepo ( object ): |
208 |
if pkg is not None: |
209 |
yield pkg |
210 |
|
211 |
- elif hasattr ( is_package, '__call__' ): |
212 |
+ else: |
213 |
# filtered variant (adds an if is_package... before yield) |
214 |
for dirpath, dirnames, filenames in os.walk ( self.distdir ): |
215 |
distdir = dirpath if dirpath != self.distdir else None |
216 |
@@ -233,127 +267,6 @@ class LocalRepo ( object ): |
217 |
self.logger.debug ( |
218 |
"filtered %r: not a package" % filename |
219 |
) |
220 |
- |
221 |
- |
222 |
- else: |
223 |
- # faulty variant, raises Exception |
224 |
- raise AssertionError ( |
225 |
- "is_package should either be None or a function." |
226 |
- ) |
227 |
- #yield None |
228 |
- |
229 |
# --- end of scan_distdir (...) --- |
230 |
|
231 |
-# --- end of LocalRepo --- |
232 |
- |
233 |
- |
234 |
-class RemoteRepo ( LocalRepo ): |
235 |
- """A template for remote repositories.""" |
236 |
- |
237 |
- def __init__ ( |
238 |
- self, name, distroot, sync_proto, |
239 |
- directory=None, |
240 |
- src_uri=None, remote_uri=None, base_uri=None |
241 |
- ): |
242 |
- """Initializes a RemoteRepo. |
243 |
- Mainly consists of URI calculation that derived classes may find useful. |
244 |
- |
245 |
- arguments: |
246 |
- * name -- |
247 |
- * sync_proto -- protocol used for syncing (e.g. 'rsync') |
248 |
- * directory -- |
249 |
- * src_uri -- src uri, if set, else calculated using base/remote uri, |
250 |
- the leading <proto>:// can be left out in which case |
251 |
- http is assumed |
252 |
- * remote_uri -- uri used for syncing, if set, else calculated using |
253 |
- base/src uri, the leading <proto>:// can be left out |
254 |
- * base_uri -- used to calculate remote/src uri, |
255 |
- example: localhost/R-packages/something |
256 |
- |
257 |
- keyword condition: |
258 |
- * | { x : x in union(src,remote,base) and x not None } | >= 1 |
259 |
- ^= at least one out of src/remote/base uri is not None |
260 |
- """ |
261 |
- super ( RemoteRepo, self ) . __init__ ( |
262 |
- name, distroot, directory, src_uri='' |
263 |
- ) |
264 |
- |
265 |
- self.sync_proto = sync_proto |
266 |
- |
267 |
- # detemerine uris |
268 |
- if src_uri is None and remote_uri is None: |
269 |
- if base_uri is None: |
270 |
- # keyword condition not met |
271 |
- raise Exception ( "Bad initialization of RemoteRepo!" ) |
272 |
- |
273 |
- else: |
274 |
- # using base_uri for src,remote |
275 |
- self.src_uri = URI_SEPARATOR.join ( |
276 |
- ( DEFAULT_PROTOCOL, base_uri ) |
277 |
- ) |
278 |
- |
279 |
- self.remote_uri = URI_SEPARATOR.join ( |
280 |
- ( sync_proto, base_uri ) |
281 |
- ) |
282 |
- |
283 |
- elif src_uri is None: |
284 |
- # remote_uri is not None |
285 |
- self.remote_uri = normalize_uri ( remote_uri, self.sync_proto ) |
286 |
- |
287 |
- if base_uri is not None: |
288 |
- # using base_uri for src_uri |
289 |
- self.src_uri = URI_SEPARATOR.join ( |
290 |
- ( DEFAULT_PROTOCOL, base_uri ) |
291 |
- ) |
292 |
- else: |
293 |
- # using remote_uri for src_uri |
294 |
- self.src_uri = normalize_uri ( |
295 |
- self.remote_uri, DEFAULT_PROTOCOL, force_protocol=True |
296 |
- ) |
297 |
- |
298 |
- elif remote_uri is None: |
299 |
- # src_uri is not None |
300 |
- self.src_uri = normalize_uri ( src_uri, DEFAULT_PROTOCOL ) |
301 |
- |
302 |
- if base_uri is not None: |
303 |
- # using base_uri for remote_uri |
304 |
- self.remote_uri = URI_SEPARATOR.join ( |
305 |
- ( self.sync_proto, base_uri ) |
306 |
- ) |
307 |
- else: |
308 |
- # using src_uri for remote_uri |
309 |
- self.remote_uri = normalize_uri ( |
310 |
- self.src_uri, self.sync_proto, force_protocol=True |
311 |
- ) |
312 |
- else: |
313 |
- # remote and src not None |
314 |
- self.remote_uri = normalize_uri ( remote_uri, self.sync_proto ) |
315 |
- self.src_uri = normalize_uri ( src_uri, DEFAULT_PROTOCOL ) |
316 |
- |
317 |
- # --- end of __init__ (...) --- |
318 |
- |
319 |
- def get_remote_uri ( self ): |
320 |
- """Returns the remote uri of this RemoteRepo which used for syncing.""" |
321 |
- return self.remote_uri |
322 |
- # --- end of get_remote_uri (...) --- |
323 |
- |
324 |
- # get_remote(...) -> get_remote_uri(...) |
325 |
- get_remote = get_remote_uri |
326 |
- |
327 |
- def _dosync ( self ): |
328 |
- """Gets packages from remote(s) and returns True if the repo is ready |
329 |
- for overlay creation, else False. |
330 |
- |
331 |
- Derived classes have to implement this method. |
332 |
- """ |
333 |
- raise Exception ( "RemoteRepo does not implement sync()." ) |
334 |
- # --- end of _dosync (...) --- |
335 |
- |
336 |
- def __str__ ( self ): |
337 |
- return "repo '%s': DISTDIR '%s', SRC_URI '%s', REMOTE_URI '%s'" % ( |
338 |
- self.name, self.distdir, self.src_uri, self.remote_uri |
339 |
- ) |
340 |
- # --- end of __str__ (...) --- |
341 |
- |
342 |
-# --- end of RemoteRepo --- |
343 |
- |
344 |
+# --- end of BasicRepo --- |
345 |
|
346 |
diff --git a/roverlay/remote/repolist.py b/roverlay/remote/repolist.py |
347 |
index cc673e6..a623db0 100644 |
348 |
--- a/roverlay/remote/repolist.py |
349 |
+++ b/roverlay/remote/repolist.py |
350 |
@@ -4,7 +4,7 @@ import os.path |
351 |
|
352 |
from roverlay import config |
353 |
from roverlay.remote.repoloader import read_repofile |
354 |
-from roverlay.remote.basicrepo import LocalRepo |
355 |
+from roverlay.remote.basicrepo import BasicRepo |
356 |
|
357 |
class RepoList ( object ): |
358 |
"""Controls several Repo objects.""" |
359 |
@@ -55,7 +55,7 @@ class RepoList ( object ): |
360 |
# --- end of _pkg_filter (...) --- |
361 |
|
362 |
def add_distdir ( self, distdir, src_uri=None, name=None ): |
363 |
- """Adds a local package directory as LocalRepo. |
364 |
+ """Adds a local package directory as BasicRepo. |
365 |
|
366 |
arguments: |
367 |
* distdir -- |
368 |
@@ -64,7 +64,7 @@ class RepoList ( object ): |
369 |
(FIXME: could add RESTRICT="fetch" to those ebuilds) |
370 |
* name -- name of the repo, defaults to os.path.basename (distdir) |
371 |
""" |
372 |
- self.repos.append ( LocalRepo ( |
373 |
+ self.repos.append ( BasicRepo ( |
374 |
name=os.path.basename ( distdir ) if name is None else name, |
375 |
directory=distdir, |
376 |
src_uri=src_uri |
377 |
@@ -72,7 +72,7 @@ class RepoList ( object ): |
378 |
# --- end of add_distdir (...) --- |
379 |
|
380 |
def add_distdirs ( self, distdirs ): |
381 |
- """Adds several distdirs as LocalRepos. |
382 |
+ """Adds several distdirs as BasicRepos. |
383 |
All distdirs will have an invalid SRC_URI and a default name, |
384 |
use add_distdir() if you want usable ebuilds. |
385 |
|
386 |
@@ -81,7 +81,7 @@ class RepoList ( object ): |
387 |
""" |
388 |
def gen_repos(): |
389 |
for d in distdirs: |
390 |
- repo = LocalRepo ( |
391 |
+ repo = BasicRepo ( |
392 |
name=os.path.basename ( d ), |
393 |
# FIXME: --force_distroot should block --distdir |
394 |
directory=d, |
395 |
|
396 |
diff --git a/roverlay/remote/repoloader.py b/roverlay/remote/repoloader.py |
397 |
index 2a92526..ba49d0a 100644 |
398 |
--- a/roverlay/remote/repoloader.py |
399 |
+++ b/roverlay/remote/repoloader.py |
400 |
@@ -11,8 +11,9 @@ except ImportError: |
401 |
|
402 |
from roverlay import config |
403 |
|
404 |
-from roverlay.remote.basicrepo import LocalRepo |
405 |
-from roverlay.remote.rsync import RsyncRepo |
406 |
+from roverlay.remote import basicrepo |
407 |
+from roverlay.remote import rsync |
408 |
+from roverlay.remote import websync |
409 |
|
410 |
LOGGER = logging.getLogger ( 'repoloader' ) |
411 |
|
412 |
@@ -51,35 +52,49 @@ def read_repofile ( repo_file, distroot, lenient=False, force_distroot=False ): |
413 |
|
414 |
repo_type = get ( 'type', 'rsync' ).lower() |
415 |
|
416 |
- repo_name = get ( 'name', name ) |
417 |
+ common_kwargs = dict ( |
418 |
+ name = get ( 'name', name ), |
419 |
+ directory = None if force_distroot else get ( 'directory' ), |
420 |
+ distroot = distroot, |
421 |
+ src_uri = get ( 'src_uri' ) |
422 |
+ ) |
423 |
|
424 |
- repo_distdir = None if force_distroot else get ( 'directory' ) |
425 |
|
426 |
|
427 |
if repo_type == 'local': |
428 |
- repo = LocalRepo ( |
429 |
- name = repo_name, |
430 |
- distroot = distroot, |
431 |
- directory = repo_distdir, |
432 |
- src_uri = get ( 'src_uri' ) |
433 |
- ) |
434 |
+ repo = basicrepo.BasicRepo ( **common_kwargs ) |
435 |
+ |
436 |
elif repo_type == 'rsync': |
437 |
extra_opts = get ( 'extra_rsync_opts' ) |
438 |
if extra_opts: |
439 |
extra_opts = extra_opts.split ( ' ' ) |
440 |
|
441 |
- repo = RsyncRepo ( |
442 |
- name = repo_name, |
443 |
- distroot = distroot, |
444 |
- directory = repo_distdir, |
445 |
- src_uri = get ( 'src_uri' ), |
446 |
+ repo = rsync.RsyncRepo ( |
447 |
rsync_uri = get ( 'rsync_uri' ), |
448 |
- base_uri = get ( 'base_uri' ), |
449 |
extra_opts = extra_opts, |
450 |
recursive = get ( 'recursive', False ) == 'yes', |
451 |
+ **common_kwargs |
452 |
+ ) |
453 |
+ |
454 |
+ elif repo_type == 'websync_repo': |
455 |
+ repo = websync.WebsyncRepo ( |
456 |
+ pkglist_file = get ( 'pkglist_file', 'PACKAGES' ), |
457 |
+ pkglist_uri = get ( 'pkglist_uri' ), |
458 |
+ digest_type = get ( 'digest_type' ) or get ( 'digest' ), |
459 |
+ **common_kwargs |
460 |
) |
461 |
+ |
462 |
+ elif repo_type in ( 'websync_pkglist', 'websync_package_list' ): |
463 |
+ repo = websync.WebsyncPackageList ( |
464 |
+ pkglist_file = get ( 'pkglist_file' ) or get ( 'pkglist' ), |
465 |
+ #digest_type = get ( 'digest_type' ) or get ( 'digest' ), |
466 |
+ **common_kwargs |
467 |
+ ) |
468 |
+ |
469 |
else: |
470 |
- LOGGER.error ( "Unknown repo type %s for %s" % ( repo_type, name ) ) |
471 |
+ LOGGER.error ( "Unknown repo type {} for {}!".format ( |
472 |
+ repo_type, name |
473 |
+ ) ) |
474 |
|
475 |
|
476 |
if repo is not None: |
477 |
|
478 |
diff --git a/roverlay/remote/rsync.py b/roverlay/remote/rsync.py |
479 |
index 11efd27..90f21f5 100644 |
480 |
--- a/roverlay/remote/rsync.py |
481 |
+++ b/roverlay/remote/rsync.py |
482 |
@@ -4,8 +4,7 @@ import subprocess |
483 |
|
484 |
from roverlay import config, util |
485 |
|
486 |
-#from roverlay.remote.basicrepo import LocalRepo, RemoteRepo |
487 |
-from roverlay.remote.basicrepo import RemoteRepo |
488 |
+from roverlay.remote.basicrepo import BasicRepo |
489 |
|
490 |
RSYNC_ENV = util.keepenv ( |
491 |
'PATH', |
492 |
@@ -46,12 +45,16 @@ DEFAULT_RSYNC_OPTS = ( |
493 |
'--chmod=ugo=r,u+w,Dugo+x', # 0755 for transferred dirs, 0644 for files |
494 |
) |
495 |
|
496 |
-class RsyncRepo ( RemoteRepo ): |
497 |
+class RsyncRepo ( BasicRepo ): |
498 |
|
499 |
- def __init__ ( |
500 |
- self, name, distroot, |
501 |
- directory=None, src_uri=None, rsync_uri=None, base_uri=None, |
502 |
- recursive=False, extra_opts=None |
503 |
+ def __init__ ( self, |
504 |
+ name, |
505 |
+ distroot, |
506 |
+ src_uri, |
507 |
+ rsync_uri, |
508 |
+ directory=None, |
509 |
+ recursive=False, |
510 |
+ extra_opts=None |
511 |
): |
512 |
"""Initializes an RsyncRepo. |
513 |
|
514 |
@@ -68,8 +71,8 @@ class RsyncRepo ( RemoteRepo ): |
515 |
# using '' as remote protocol which leaves uris unchanged when |
516 |
# normalizing them for rsync usage |
517 |
super ( RsyncRepo, self ) . __init__ ( |
518 |
- name, distroot=distroot, sync_proto='', directory=directory, |
519 |
- src_uri=src_uri, remote_uri=rsync_uri, base_uri=base_uri |
520 |
+ name=name, distroot=distroot, directory=directory, |
521 |
+ src_uri=src_uri, remote_uri=rsync_uri |
522 |
) |
523 |
|
524 |
# syncing directories, not files - always appending a slash at the end |
525 |
@@ -83,8 +86,6 @@ class RsyncRepo ( RemoteRepo ): |
526 |
self.extra_opts.extend ( extra_opts ) |
527 |
else: |
528 |
self.extra_opts = extra_opts |
529 |
- |
530 |
- self.sync_protocol = 'rsync' |
531 |
# --- end of __init__ (...) --- |
532 |
|
533 |
def _rsync_argv ( self ): |
534 |
|
535 |
diff --git a/roverlay/remote/websync.py b/roverlay/remote/websync.py |
536 |
new file mode 100644 |
537 |
index 0000000..a0ded6c |
538 |
--- /dev/null |
539 |
+++ b/roverlay/remote/websync.py |
540 |
@@ -0,0 +1,410 @@ |
541 |
+ |
542 |
+import re |
543 |
+import os |
544 |
+import urllib2 |
545 |
+ |
546 |
+from roverlay import digest, util |
547 |
+from roverlay.packageinfo import PackageInfo |
548 |
+from roverlay.remote.basicrepo import BasicRepo |
549 |
+ |
550 |
+class WebsyncBase ( BasicRepo ): |
551 |
+ """Provides functionality for retrieving R packages via http. |
552 |
+ Not meant for direct usage.""" |
553 |
+ |
554 |
+ def __init__ ( self, |
555 |
+ name, |
556 |
+ distroot, |
557 |
+ src_uri, |
558 |
+ directory=None, |
559 |
+ digest_type=None |
560 |
+ ): |
561 |
+ """Initializes a WebsyncBase instance. |
562 |
+ |
563 |
+ arguments: |
564 |
+ * name -- see BasicRepo |
565 |
+ * distroot -- ^ |
566 |
+ * src_uri -- ^ |
567 |
+ * directory -- ^ |
568 |
+ * digest_type -- if set and not None/"None": |
569 |
+ verify packages using the given digest type |
570 |
+ Supported digest types: 'md5'. |
571 |
+ """ |
572 |
+ super ( WebsyncBase, self ) . __init__ ( |
573 |
+ name=name, |
574 |
+ distroot=distroot, |
575 |
+ src_uri=src_uri, |
576 |
+ remote_uri=src_uri, |
577 |
+ directory=directory |
578 |
+ ) |
579 |
+ |
580 |
+ if digest_type is None: |
581 |
+ self._digest_type = None |
582 |
+ |
583 |
+ elif str ( digest_type ).lower() in ( 'none', 'disabled', 'off' ): |
584 |
+ self._digest_type = None |
585 |
+ |
586 |
+ elif digest.digest_supported ( digest_type ): |
587 |
+ # setting a digest_type (other than None) expects package_list |
588 |
+ # to be a 2-tuple <package_file, digest sum> list, |
589 |
+ # else a list of package_files is expected. |
590 |
+ self._digest_type = digest_type |
591 |
+ |
592 |
+ else: |
593 |
+ raise Exception ( |
594 |
+ "Unknown/unsupported digest type {}!".format ( digest_type ) |
595 |
+ ) |
596 |
+ |
597 |
+ # download 8KiB per block |
598 |
+ self.transfer_blocksize = 8192 |
599 |
+ # --- end of __init__ (...) --- |
600 |
+ |
601 |
+ def _fetch_package_list ( self ): |
602 |
+ """This function returns a list of packages to download.""" |
603 |
+ raise Exception ( "method stub" ) |
604 |
+ # --- end of _fetch_package_list (...) --- |
605 |
+ |
606 |
+ def _get_package ( self, package_file, src_uri, expected_digest ): |
607 |
+ """Gets a packages, i.e. downloads if it doesn't exist locally |
608 |
+ or fails verification (size, digest). |
609 |
+ |
610 |
+ arguments: |
611 |
+ * package_file -- package file name |
612 |
+ * src_uri -- uri for package_file |
613 |
+ * expected_digest -- expected digest for package_file or None (^=disable) |
614 |
+ """ |
615 |
+ distfile = self.distdir + os.sep + package_file |
616 |
+ webh = urllib2.urlopen ( src_uri ) |
617 |
+ #web_info = webh.info() |
618 |
+ |
619 |
+ expected_filesize = int ( webh.info().get ( 'content-length', -1 ) ) |
620 |
+ |
621 |
+ if os.access ( distfile, os.F_OK ): |
622 |
+ # package exists locally, verify it (size, digest) |
623 |
+ fetch_required = False |
624 |
+ localsize = os.path.getsize ( distfile ) |
625 |
+ |
626 |
+ if localsize != expected_filesize: |
627 |
+ # size mismatch |
628 |
+ self.logger.info ( |
629 |
+ 'size mismatch for {f!r}: expected {websize} bytes ' |
630 |
+ 'but got {localsize}!'.format ( |
631 |
+ f = package_file, |
632 |
+ websize = expected_filesize, |
633 |
+ localsize = localsize |
634 |
+ ) |
635 |
+ ) |
636 |
+ fetch_required = True |
637 |
+ |
638 |
+ elif expected_digest is not None: |
639 |
+ our_digest = digest.dodigest_file ( distfile, self._digest_type ) |
640 |
+ |
641 |
+ if our_digest != expected_digest: |
642 |
+ # digest mismatch |
643 |
+ self.logger.warning ( |
644 |
+ '{dtype} mismatch for {f!r}: ' |
645 |
+ 'expected {theirs} but got {ours} - refetching.'.format ( |
646 |
+ dtype = self._digest_type, |
647 |
+ f = distfile, |
648 |
+ theirs = expected_digest, |
649 |
+ ours = our_digest |
650 |
+ ) |
651 |
+ ) |
652 |
+ fetch_required = True |
653 |
+ |
654 |
+ else: |
655 |
+ fetch_required = True |
656 |
+ |
657 |
+ if fetch_required: |
658 |
+ bytes_fetched = 0 |
659 |
+ |
660 |
+ # FIXME: debug print (?) |
661 |
+ print ( |
662 |
+ "Fetching {f} from {u} ...".format ( f=package_file, u=src_uri ) |
663 |
+ ) |
664 |
+ |
665 |
+ with open ( distfile, mode='wb' ) as fh: |
666 |
+ block = webh.read ( self.transfer_blocksize ) |
667 |
+ while block: |
668 |
+ # write block to file |
669 |
+ fh.write ( block ) |
670 |
+ # ? bytelen |
671 |
+ bytes_fetched += len ( block ) |
672 |
+ |
673 |
+ # get the next block |
674 |
+ block = webh.read ( self.transfer_blocksize ) |
675 |
+ # -- with |
676 |
+ |
677 |
+ if bytes_fetched == expected_filesize: |
678 |
+ if expected_digest is not None: |
679 |
+ our_digest = digest.dodigest_file ( distfile, self._digest_type ) |
680 |
+ |
681 |
+ if our_digest != expected_digest: |
682 |
+ # fetched package's digest does not match the expected one, |
683 |
+ # refuse to use it |
684 |
+ self.logger.warning ( |
685 |
+ 'bad {dtype} digest for {f!r}, expected {theirs} but ' |
686 |
+ 'got {ours} - removing this package.'.format ( |
687 |
+ dtype = self._digest_type, |
688 |
+ f = distfile, |
689 |
+ theirs = expected_digest, |
690 |
+ ours = our_digest |
691 |
+ ) |
692 |
+ ) |
693 |
+ os.remove ( distfile ) |
694 |
+ |
695 |
+ # package removed -> return success |
696 |
+ return True |
697 |
+ # -- if |
698 |
+ # -- if |
699 |
+ |
700 |
+ else: |
701 |
+ return False |
702 |
+ else: |
703 |
+ # FIXME: debug print |
704 |
+ print ( "Skipping fetch for {f!r}".format ( f=distfile ) ) |
705 |
+ |
706 |
+ return self._package_synced ( package_file, distfile, src_uri ) |
707 |
+ # --- end of get_package (...) --- |
708 |
+ |
709 |
+ def _package_synced ( self, package_filename, distfile, src_uri ): |
710 |
+ """Called when a package has been synced (=exists locally when |
711 |
+ _get_package() is done). |
712 |
+ |
713 |
+ arguments: |
714 |
+ * package_filename -- |
715 |
+ * distfile -- |
716 |
+ * src_uri -- |
717 |
+ """ |
718 |
+ return True |
719 |
+ # --- end of _package_synced (...) --- |
720 |
+ |
721 |
+ def _dosync ( self ): |
722 |
+ """Syncs this repo.""" |
723 |
+ package_list = self._fetch_package_list() |
724 |
+ |
725 |
+ # empty/unset package list |
726 |
+ if not package_list: return True |
727 |
+ |
728 |
+ util.dodir ( self.distdir ) |
729 |
+ |
730 |
+ success = True |
731 |
+ |
732 |
+ if self._digest_type is not None: |
733 |
+ for package_file, expected_digest in package_list: |
734 |
+ src_uri = self.get_src_uri ( package_file ) |
735 |
+ |
736 |
+ if not self._get_package ( |
737 |
+ package_file, src_uri, expected_digest |
738 |
+ ): |
739 |
+ success = False |
740 |
+ break |
741 |
+ else: |
742 |
+ for package_file in package_list: |
743 |
+ src_uri = self.get_src_uri ( package_file ) |
744 |
+ |
745 |
+ if not self._get_package ( |
746 |
+ package_file, src_uri, expected_digest=None |
747 |
+ ): |
748 |
+ success = False |
749 |
+ break |
750 |
+ |
751 |
+ return success |
752 |
+ # --- end of _dosync (...) --- |
753 |
+ |
754 |
+ |
755 |
+class WebsyncRepo ( WebsyncBase ): |
756 |
+ """Sync a http repo using its PACKAGES file.""" |
757 |
+ # FIXME: hardcoded for md5 |
758 |
+ |
759 |
+ def __init__ ( self, |
760 |
+ pkglist_uri=None, |
761 |
+ pkglist_file=None, |
762 |
+ *args, |
763 |
+ **kwargs |
764 |
+ ): |
765 |
+ """Initializes a WebsyncRepo instance. |
766 |
+ |
767 |
+ arguments: |
768 |
+ * pkglist_uri -- if set and not None: uri of the package list file |
769 |
+ * pkglist_file -- if set and not None: name of the package list file, |
770 |
+ this is used to calculate the pkglist_uri |
771 |
+ pkglist_uri = <src_uri>/<pkglist_file> |
772 |
+ * *args / **kwargs -- see WebsyncBase / BasicRepo |
773 |
+ |
774 |
+ pkglist file: this is a file with debian control file-like syntax |
775 |
+ listing all packages. |
776 |
+ Example: http://www.omegahat.org/R/src/contrib/PACKAGES (2012-07-31) |
777 |
+ """ |
778 |
+ super ( WebsyncRepo, self ) . __init__ ( *args, **kwargs ) |
779 |
+ |
780 |
+ if self._digest_type is None: |
781 |
+ self.FIELDREGEX = re.compile ( |
782 |
+ '^\s*(?P<name>(package|version))[:]\s*(?P<value>.+)', |
783 |
+ re.IGNORECASE |
784 |
+ ) |
785 |
+ else: |
786 |
+ # used to filter field names (package,version,md5sum) |
787 |
+ self.FIELDREGEX = re.compile ( |
788 |
+ '^\s*(?P<name>(package|version|md5sum))[:]\s*(?P<value>.+)', |
789 |
+ re.IGNORECASE |
790 |
+ ) |
791 |
+ |
792 |
+ self.pkglist_uri = pkglist_uri or self.get_src_uri ( pkglist_file ) |
793 |
+ if not self.pkglist_uri: |
794 |
+ raise Exception ( "pkglist_uri is unset!" ) |
795 |
+ # --- end of __init__ (...) --- |
796 |
+ |
797 |
+ def _fetch_package_list ( self ): |
798 |
+ """Returns the list of packages to be downloaded. |
799 |
+ List format: |
800 |
+ * if digest verification is enabled: |
801 |
+ List ::= [ ( package_file, digest ), ... ] |
802 |
+ * else |
803 |
+ List ::= [ package_file, ... ] |
804 |
+ """ |
805 |
+ |
806 |
+ def generate_pkglist ( fh ): |
807 |
+ """Generates the package list using the given file handle. |
808 |
+ |
809 |
+ arguments: |
810 |
+ * fh -- file handle to read from |
811 |
+ """ |
812 |
+ info = dict() |
813 |
+ |
814 |
+ max_info_len = 3 if self._digest_type is not None else 2 |
815 |
+ |
816 |
+ for match in ( |
817 |
+ filter ( None, ( |
818 |
+ self.FIELDREGEX.match ( l ) for l in fh.readlines() |
819 |
+ ) ) |
820 |
+ ): |
821 |
+ name, value = match.group ( 'name', 'value' ) |
822 |
+ info [name.lower()] = value |
823 |
+ |
824 |
+ if len ( info.keys() ) == max_info_len: |
825 |
+ |
826 |
+ pkgfile = '{name}_{version}.tar.gz'.format ( |
827 |
+ name=info ['package'], version=info ['version'] |
828 |
+ ) |
829 |
+ |
830 |
+ if self._digest_type is not None: |
831 |
+ yield ( pkgfile, info ['md5sum'] ) |
832 |
+ #yield ( pkgfile, ( 'md5', info ['md5sum'] ) ) |
833 |
+ else: |
834 |
+ yield pkgfile |
835 |
+ |
836 |
+ info.clear() |
837 |
+ # --- end of generate_pkglist (...) --- |
838 |
+ |
839 |
+ package_list = () |
840 |
+ try: |
841 |
+ webh = urllib2.urlopen ( self.pkglist_uri ) |
842 |
+ |
843 |
+ content_type = webh.info().get ( 'content-type', None ) |
844 |
+ |
845 |
+ if content_type != 'text/plain': |
846 |
+ print ( |
847 |
+ "content type {!r} is not supported!".format ( content_type ) |
848 |
+ ) |
849 |
+ else: |
850 |
+ package_list = tuple ( generate_pkglist ( webh ) ) |
851 |
+ |
852 |
+ webh.close() |
853 |
+ |
854 |
+ finally: |
855 |
+ if 'webh' in locals() and webh: webh.close() |
856 |
+ |
857 |
+ return package_list |
858 |
+ # --- end fetch_pkglist (...) --- |
859 |
+ |
860 |
+class WebsyncPackageList ( WebsyncBase ): |
861 |
+ """Sync packages from multiple remotes via http. Packages uris are read |
862 |
+ from a file.""" |
863 |
+ |
864 |
+ # FIXME: does not support --nosync |
865 |
+ |
866 |
+ def __init__ ( self, pkglist_file, *args, **kwargs ): |
867 |
+ """Initializes a WebsyncPackageList instance. |
868 |
+ |
869 |
+ arguments: |
870 |
+ * pkglist_file -- path to the package list file that lists |
871 |
+ one package http uri per line |
872 |
+ * *args / **kwargs -- see WebsyncBase, BasicRepo |
873 |
+ """ |
874 |
+ super ( WebsyncPackageList, self ) . __init__ ( *args, **kwargs ) |
875 |
+ |
876 |
+ self._pkglist_file = os.path.abspath ( pkglist_file ) |
877 |
+ |
878 |
+ del self.src_uri |
879 |
+ |
880 |
+ self._synced_packages = list() |
881 |
+ |
882 |
+ # --- end of __init__ (...) --- |
883 |
+ |
884 |
+ def _fetch_package_list ( self ): |
885 |
+ """Returns the package list. |
886 |
+ Format: |
887 |
+ pkglist ::= [ ( package_file, src_uri ), ... ] |
888 |
+ """ |
889 |
+ pkglist = list() |
890 |
+ with open ( self._pkglist_file, mode='r' ) as fh: |
891 |
+ for line in fh.readlines(): |
892 |
+ src_uri = line.strip() |
893 |
+ if src_uri: |
894 |
+ pkglist.append ( ( |
895 |
+ src_uri.rpartition ( '/' ) [-1], |
896 |
+ src_uri |
897 |
+ ) ) |
898 |
+ |
899 |
+ return pkglist |
900 |
+ # --- end of _fetch_package_list (...) --- |
901 |
+ |
902 |
+ def _package_synced ( self, package_filename, distfile, src_uri ): |
903 |
+ self._synced_packages.append ( |
904 |
+ ( package_filename, src_uri ) |
905 |
+ ) |
906 |
+ return True |
907 |
+ # --- end of _package_synced (...) --- |
908 |
+ |
909 |
+ def scan_distdir ( self, log_bad=True, **kwargs_ignored ): |
910 |
+ for package_filename, src_uri in self._synced_packages: |
911 |
+ pkg = self._package_nofail ( |
912 |
+ log_bad, |
913 |
+ filename = package_filename, |
914 |
+ origin = self, |
915 |
+ src_uri = src_uri |
916 |
+ ) |
917 |
+ if pkg is not None: |
918 |
+ yield pkg |
919 |
+ # --- end of scan_distdir (...) --- |
920 |
+ |
921 |
+ def _nosync ( self ): |
922 |
+ """nosync - report existing packages""" |
923 |
+ for package_file, src_uri in self._fetch_package_list(): |
924 |
+ distfile = self.distdir + os.sep + package_file |
925 |
+ if os.access ( distfile, os.F_OK ): |
926 |
+ self._package_synced ( package_file, distfile, src_uri ) |
927 |
+ |
928 |
+ return True |
929 |
+ # --- end of _nosync (...) --- |
930 |
+ |
931 |
+ def _dosync ( self ): |
932 |
+ """Sync packages.""" |
933 |
+ package_list = self._fetch_package_list() |
934 |
+ |
935 |
+ # empty/unset package list |
936 |
+ if not package_list: return True |
937 |
+ |
938 |
+ util.dodir ( self.distdir ) |
939 |
+ |
940 |
+ success = True |
941 |
+ |
942 |
+ for package_file, src_uri in package_list: |
943 |
+ if not self._get_package ( |
944 |
+ package_file, src_uri, expected_digest=None |
945 |
+ ): |
946 |
+ success = False |
947 |
+ break |
948 |
+ |
949 |
+ return success |
950 |
+ # --- end of _dosync (...) --- |