1 |
commit: 4d7bcbfae3c037d56ac315c393bd9b30cf57a37b |
2 |
Author: André Erdmann <dywi <AT> mailerd <DOT> de> |
3 |
AuthorDate: Thu Jun 28 15:51:53 2012 +0000 |
4 |
Commit: André Erdmann <dywi <AT> mailerd <DOT> de> |
5 |
CommitDate: Thu Jun 28 15:51:53 2012 +0000 |
6 |
URL: http://git.overlays.gentoo.org/gitweb/?p=proj/R_overlay.git;a=commit;h=4d7bcbfa |
7 |
|
8 |
fix for description reader |
9 |
|
10 |
* read field lines as one string and parse it after reading |
11 |
this fixes reading entries like << |
12 |
Depends: a (>1.0) b |
13 |
(>0.9) c |
14 |
>> |
15 |
that should be read as {'Depends':('a (>1.0)','b (>0.9)','c')} |
16 |
and _not_ as {'Depends':('a (>1.0)','b','(>0.9)','c')} |
17 |
|
18 |
modified: roverlay/rpackage/descriptionreader.py |
19 |
|
20 |
--- |
21 |
roverlay/rpackage/descriptionreader.py | 368 +++++++++++++++++--------------- |
22 |
1 files changed, 197 insertions(+), 171 deletions(-) |
23 |
|
24 |
diff --git a/roverlay/rpackage/descriptionreader.py b/roverlay/rpackage/descriptionreader.py |
25 |
index 8aa35b3..1f6bd3e 100644 |
26 |
--- a/roverlay/rpackage/descriptionreader.py |
27 |
+++ b/roverlay/rpackage/descriptionreader.py |
28 |
@@ -10,12 +10,31 @@ import time |
29 |
from roverlay import config, util |
30 |
from roverlay.rpackage import descriptionfields |
31 |
|
32 |
+def make_desc_packageinfo ( filepath ): |
33 |
+ """Creates a minimal dict that can be used as package info in the |
34 |
+ DescriptionReader (for testing/debugging). |
35 |
+ |
36 |
+ arguments: |
37 |
+ * filepath -- |
38 |
+ """ |
39 |
+ name, sep, ver = filepath.partition ( '_' ) |
40 |
+ return dict ( |
41 |
+ package_file = filepath, |
42 |
+ package_name = name, |
43 |
+ ebuild_verstr = ver, |
44 |
+ name = name, |
45 |
+ ) |
46 |
+ |
47 |
+ |
48 |
class DescriptionReader ( object ): |
49 |
"""Description Reader""" |
50 |
|
51 |
WRITE_DESCFILES_DIR = config.get ( 'DESCRIPTION.descfiles_dir', None ) |
52 |
|
53 |
- def __init__ ( self, package_info, logger, read_now=False ): |
54 |
+ def __init__ ( self, |
55 |
+ package_info, logger, |
56 |
+ read_now=False, write_desc=True |
57 |
+ ): |
58 |
"""Initializes a DESCRIPTION file reader.""" |
59 |
|
60 |
if not config.access().get_field_definition(): |
61 |
@@ -26,9 +45,8 @@ class DescriptionReader ( object ): |
62 |
self.field_definition = config.access().get_field_definition() |
63 |
self.fileinfo = package_info |
64 |
self.logger = logger.getChild ( 'desc_reader' ) |
65 |
- self.desc_data = None |
66 |
|
67 |
- if DescriptionReader.WRITE_DESCFILES_DIR is not None: |
68 |
+ if write_desc and DescriptionReader.WRITE_DESCFILES_DIR is not None: |
69 |
self.write_desc_file = os.path.join ( |
70 |
DescriptionReader.WRITE_DESCFILES_DIR, |
71 |
'%s_%s.desc' % ( |
72 |
@@ -36,72 +54,114 @@ class DescriptionReader ( object ): |
73 |
) |
74 |
) |
75 |
|
76 |
- |
77 |
if read_now: |
78 |
self.run() |
79 |
|
80 |
# --- end of __init__ (...) --- |
81 |
|
82 |
def get_desc ( self, run_if_unset=True ): |
83 |
- if self.desc_data is None: |
84 |
- self.run () |
85 |
+ if not hasattr ( self, 'desc_data' ): |
86 |
+ if run_if_unset: |
87 |
+ self.run() |
88 |
+ else: |
89 |
+ raise Exception ( "no desc data" ) |
90 |
|
91 |
return self.desc_data |
92 |
# --- end of get_desc (...) --- |
93 |
|
94 |
- def _parse_read_data ( self, read_data ): |
95 |
- """Verifies and parses/fixes read data. |
96 |
+ def _make_read_data ( self, raw ): |
97 |
+ """Create read data (value or list of values per field) for the given |
98 |
+ raw data (list of text lines per field). |
99 |
|
100 |
arguments: |
101 |
- * read_data -- data from file, will be modified |
102 |
+ * raw -- |
103 |
+ |
104 |
+ returns: read data |
105 |
""" |
106 |
+ # catch None |
107 |
+ if raw is None: return None |
108 |
+ |
109 |
+ # this dict will be returned as result later |
110 |
+ read = dict() |
111 |
+ |
112 |
+ flags = self.field_definition.get_fields_with_flag |
113 |
|
114 |
# insert default values |
115 |
default_values = self.field_definition.get_fields_with_default_value() |
116 |
|
117 |
for field_name in default_values.keys(): |
118 |
- if not field_name in read_data: |
119 |
- read_data [field_name] = default_values [field_name] |
120 |
+ if not field_name in raw: |
121 |
+ read [field_name] = default_values [field_name] |
122 |
+ |
123 |
+ |
124 |
+ # transfer fields from raw as string or list |
125 |
+ fields_join = flags ( 'joinValues' ) |
126 |
+ fields_isList = flags ( 'isList' ) |
127 |
+ fields_wsList = flags ( 'isWhitespaceList' ) |
128 |
+ |
129 |
+ list_split = re.compile ( |
130 |
+ config.get_or_fail ( 'DESCRIPTION.list_split_regex' ) |
131 |
+ ).split |
132 |
+ slist_split = re.compile ( '\s+' ).split |
133 |
+ |
134 |
+ make_list = lambda l : tuple ( filter ( None, list_split ( l, 0 ) ) ) |
135 |
+ make_slist = lambda l : tuple ( filter ( None, slist_split ( l, 0 ) ) ) |
136 |
+ |
137 |
+ for field in raw.keys(): |
138 |
+ value_line = ' '.join ( filter ( None, raw [field] ) ) |
139 |
+ |
140 |
+ # join > isList > wsList [... >= join (implicit)] |
141 |
+ |
142 |
+ if field in fields_join: |
143 |
+ read [field] = value_line |
144 |
+ |
145 |
+ elif field in fields_isList: |
146 |
+ read [field] = make_list ( value_line ) |
147 |
+ |
148 |
+ elif field in fields_wsList: |
149 |
+ read [field] = make_slist ( value_line ) |
150 |
+ |
151 |
+ else: |
152 |
+ read [field] = value_line |
153 |
|
154 |
|
155 |
- # join values to a single string |
156 |
- for field_name in \ |
157 |
- self.field_definition.get_fields_with_flag ( 'joinValues' ) \ |
158 |
- : |
159 |
- if field_name in read_data: |
160 |
- read_data [field_name] = ' ' . join ( read_data [field_name] ) |
161 |
+ return read |
162 |
+ # --- end of _make_read_data (...) --- |
163 |
+ |
164 |
+ def _verify_read_data ( self, read ): |
165 |
+ """Verifies read data. |
166 |
+ Checks that all mandatory fields are set and all fields have suitable |
167 |
+ values. |
168 |
+ |
169 |
+ Returns True (^= valid data) or False (^= cannot use package) |
170 |
+ """ |
171 |
+ fref = self.field_definition |
172 |
|
173 |
# ensure that all mandatory fields are set |
174 |
missing_fields = set () |
175 |
|
176 |
- for field_name in \ |
177 |
- self.field_definition.get_fields_with_flag ( 'mandatory' ): |
178 |
+ for field in fref.get_fields_with_flag ( 'mandatory' ): |
179 |
|
180 |
- if field_name in read_data: |
181 |
- if read_data [field_name] is None or \ |
182 |
- len ( read_data [field_name] ) < 1 \ |
183 |
- : |
184 |
- missing_fields.add ( field_name ) |
185 |
+ if field in read: |
186 |
+ if read [field] is None or len ( read [field] ) < 1: |
187 |
+ missing_fields.add ( field ) |
188 |
#else: ok |
189 |
else: |
190 |
- missing_fields.add ( field_name ) |
191 |
+ missing_fields.add ( field ) |
192 |
|
193 |
|
194 |
# check for fields that allow only certain values |
195 |
unsuitable_fields = set() |
196 |
|
197 |
- restricted_fields = \ |
198 |
- self.field_definition.get_fields_with_allowed_values() |
199 |
+ restricted_fields = fref.get_fields_with_allowed_values() |
200 |
|
201 |
- for field_name in restricted_fields: |
202 |
- if field_name in read_data: |
203 |
- if not self.field_definition.get ( field_name ) . value_allowed ( |
204 |
- read_data [field_name] |
205 |
- ): |
206 |
- unsuitable_fields.add ( field_name ) |
207 |
+ for field in restricted_fields: |
208 |
+ if field in read: |
209 |
+ if not fref.get ( field ).value_allowed ( read [field] ): |
210 |
+ unsuitable_fields.add ( field ) |
211 |
|
212 |
# summarize results |
213 |
- valid = not bool ( len ( missing_fields ) or len ( unsuitable_fields ) ) |
214 |
+ valid = not len ( missing_fields ) and not len ( unsuitable_fields ) |
215 |
if not valid: |
216 |
self.logger.info ( "Cannot use R package" ) # name? |
217 |
if len ( missing_fields ): |
218 |
@@ -117,149 +177,87 @@ class DescriptionReader ( object ): |
219 |
|
220 |
return valid |
221 |
|
222 |
- # --- end of _parse_read_data (...) --- |
223 |
+ # --- end of _verify_read_data (...) --- |
224 |
|
225 |
- def run ( self ): |
226 |
- """Reads a DESCRIPTION file and returns the read data if successful, |
227 |
- else None. |
228 |
+ def _get_desc_from_file ( self, filepath, pkg_name='.' ): |
229 |
+ """Reads a file returns the description data. |
230 |
|
231 |
arguments: |
232 |
- * file -- path to the tarball file (containing the description file) |
233 |
- that should be read |
234 |
+ * filepath -- file to read (str; path to tarball or file) |
235 |
+ * pkg_name -- name of the package, in tarballs the description file |
236 |
+ is located in <pkg_name>/ and thus this argument |
237 |
+ is required. Defaults to '.', set to None to disable. |
238 |
+ |
239 |
+ All exceptions are passed to the caller (TarError, IOErr, <custom>). |
240 |
+ <filepath> can either be a tarball in which case the real DESCRIPTION |
241 |
+ file is read (<pkg_name>/DESCRIPTION) or a normal file. |
242 |
+ """ |
243 |
|
244 |
- It does some pre-parsing, inter alia |
245 |
- -> assigning field identifiers from the file to real field names |
246 |
- -> split field values |
247 |
- -> filter out unwanted/useless fields |
248 |
+ self.logger.debug ( "Starting to read file '%s' ...\n" % filepath ) |
249 |
|
250 |
- The return value is a description_data dict or None if the read data |
251 |
- are "useless" (not suited to create an ebuild for it, |
252 |
- e.g. if OS_TYPE is not unix). |
253 |
- """ |
254 |
+ if not ( isinstance ( filepath, str ) and filepath ): |
255 |
+ raise Exception ( "bad usage" ) |
256 |
|
257 |
- def make_values ( value_str, field_context=None ): |
258 |
- """Extracts relevant data from value_str and returns them as list. |
259 |
- |
260 |
- arguments: |
261 |
- * value_str -- string that represents the (just read) values |
262 |
- * field_context -- field name the value belongs to; |
263 |
- optional, defaults to None |
264 |
- |
265 |
- It's useful to set field_context 'cause several fields ('Depends') |
266 |
- have multiple values arranged in a list (dep0, dep1 [, depK]*). |
267 |
- """ |
268 |
- |
269 |
- svalue_str = value_str.strip() |
270 |
- |
271 |
- if not svalue_str: |
272 |
- # empty value(s) |
273 |
- return [] |
274 |
- |
275 |
- elif field_context is None: |
276 |
- # default return if no context given |
277 |
- return [ svalue_str ] |
278 |
- |
279 |
- elif field_context in \ |
280 |
- self.field_definition.get_fields_with_flag ( 'isList' ) \ |
281 |
- : |
282 |
- # split up this list (separated by commata and/or semicolons) |
283 |
- # *beware*/fixme: py3, filter returns filter object |
284 |
- return filter ( None, re.split ( |
285 |
- config.get ( 'DESCRIPTION.list_split_regex' ), |
286 |
- svalue_str, |
287 |
- 0 |
288 |
- ) ) |
289 |
- |
290 |
- elif field_context in \ |
291 |
- self.field_definition.get_fields_with_flag ( 'isWhitespaceList' ) \ |
292 |
- : |
293 |
- # split up this list (separated by whitespace) |
294 |
- return filter ( None, re.split ( '\s+', svalue_str, 0 ) ) |
295 |
- |
296 |
- # default return |
297 |
- return [ svalue_str ] |
298 |
- |
299 |
- # --- end of make_values (...) --- |
300 |
- |
301 |
- def get_desc_from_file ( filepath, pkg_name='.' ): |
302 |
- """Reads a file returns the description data. |
303 |
- |
304 |
- arguments: |
305 |
- * filepath -- file to read (str; path to tarball or file) |
306 |
- * pkg_name -- name of the package, in tarballs the description file |
307 |
- is located in <pkg_name>/ and thus this argument |
308 |
- is required. Defaults to '.', set to None to disable. |
309 |
- |
310 |
- All exceptions are passed to the caller (TarError, IOErr, <custom>). |
311 |
- <filepath> can either be a tarball in which case the real DESCRIPTION |
312 |
- file is read (<pkg_name>/DESCRIPTION) or a normal file. |
313 |
- """ |
314 |
- |
315 |
- self.logger.debug ( "Starting to read file '%s' ...\n" % filepath ) |
316 |
- |
317 |
- if not ( isinstance ( filepath, str ) and filepath ): |
318 |
- raise Exception ( "bad usage" ) |
319 |
- |
320 |
- # read describes how to import the lines from a file (e.g. rstrip()) |
321 |
- # fh, th are file/tar handles |
322 |
- read = th = fh = None |
323 |
- |
324 |
- if tarfile.is_tarfile ( filepath ): |
325 |
- # filepath is a tarball, open tar handle + file handle |
326 |
- th = tarfile.open ( filepath, 'r' ) |
327 |
- if pkg_name: |
328 |
- fh = th.extractfile ( os.path.join ( |
329 |
- pkg_name, |
330 |
- config.get ( 'DESCRIPTION.file_name' ) |
331 |
- ) ) |
332 |
- else: |
333 |
- fh = th.extractfile ( config.get ( 'DESCRIPTION.file_name' ) ) |
334 |
+ # read describes how to import the lines from a file (e.g. rstrip()) |
335 |
+ # fh, th are file/tar handles |
336 |
+ read = th = fh = None |
337 |
|
338 |
- # have to decode the lines |
339 |
- read = lambda lines : [ line.decode().rstrip() for line in lines ] |
340 |
+ if tarfile.is_tarfile ( filepath ): |
341 |
+ # filepath is a tarball, open tar handle + file handle |
342 |
+ th = tarfile.open ( filepath, 'r' ) |
343 |
+ if pkg_name: |
344 |
+ fh = th.extractfile ( os.path.join ( |
345 |
+ pkg_name, |
346 |
+ config.get ( 'DESCRIPTION.file_name' ) |
347 |
+ ) ) |
348 |
else: |
349 |
- # open file handle only |
350 |
- fh = open ( filepath, 'r' ) |
351 |
- read = lambda lines : [ line.rstrip() for line in lines ] |
352 |
- |
353 |
- x = None |
354 |
- read_lines = read ( fh.readlines() ) |
355 |
- del x, read |
356 |
- |
357 |
- fh.close() |
358 |
- if not th is None: th.close() |
359 |
- del fh, th |
360 |
- |
361 |
- if hasattr ( self, 'write_desc_file' ): |
362 |
- try: |
363 |
- util.dodir ( DescriptionReader.WRITE_DESCFILES_DIR ) |
364 |
- fh = open ( self.write_desc_file, 'w' ) |
365 |
- fh.write ( |
366 |
- '=== This is debug output (%s) ===\n' |
367 |
- % time.strftime ( '%F %H:%M:%S' ) |
368 |
- ) |
369 |
- fh.write ( '\n'.join ( read_lines ) ) |
370 |
- fh.write ( '\n' ) |
371 |
- finally: |
372 |
- if 'fh' in locals() and fh: fh.close() |
373 |
- |
374 |
+ fh = th.extractfile ( config.get ( 'DESCRIPTION.file_name' ) ) |
375 |
+ |
376 |
+ # have to decode the lines |
377 |
+ read = lambda lines : [ line.decode().rstrip() for line in lines ] |
378 |
+ else: |
379 |
+ # open file handle only |
380 |
+ fh = open ( filepath, 'r' ) |
381 |
+ read = lambda lines : [ line.rstrip() for line in lines ] |
382 |
+ |
383 |
+ x = None |
384 |
+ read_lines = read ( fh.readlines() ) |
385 |
+ del x, read |
386 |
+ |
387 |
+ fh.close() |
388 |
+ if not th is None: th.close() |
389 |
+ del fh, th |
390 |
+ |
391 |
+ if hasattr ( self, 'write_desc_file' ): |
392 |
+ try: |
393 |
+ util.dodir ( DescriptionReader.WRITE_DESCFILES_DIR ) |
394 |
+ fh = open ( self.write_desc_file, 'w' ) |
395 |
+ fh.write ( |
396 |
+ '=== This is debug output (%s) ===\n' |
397 |
+ % time.strftime ( '%F %H:%M:%S' ) |
398 |
+ ) |
399 |
+ fh.write ( '\n'.join ( read_lines ) ) |
400 |
+ fh.write ( '\n' ) |
401 |
+ finally: |
402 |
+ if 'fh' in locals() and fh: fh.close() |
403 |
|
404 |
- return read_lines |
405 |
|
406 |
- # --- end of get_desc_from_file (...) --- |
407 |
+ return read_lines |
408 |
|
409 |
- self.desc_data = None |
410 |
- read_data = dict () |
411 |
+ # --- end of _get_desc_from_file (...) --- |
412 |
|
413 |
+ def _get_raw_data ( self ): |
414 |
try: |
415 |
- desc_lines = get_desc_from_file ( |
416 |
+ desc_lines = self._get_desc_from_file ( |
417 |
self.fileinfo ['package_file'], |
418 |
self.fileinfo ['package_name'] |
419 |
) |
420 |
|
421 |
- except IOError as err: |
422 |
+ except Exception as err: |
423 |
self.logger.exception ( err ) |
424 |
- return self.desc_data |
425 |
+ return None |
426 |
+ |
427 |
+ raw = dict() |
428 |
|
429 |
field_context = None |
430 |
|
431 |
@@ -281,8 +279,8 @@ class DescriptionReader ( object ): |
432 |
if field_context: |
433 |
# context is set => append values |
434 |
|
435 |
- for val in make_values ( sline, field_context ): |
436 |
- read_data [field_context] . append ( val ) |
437 |
+ raw [field_context].append ( sline ) |
438 |
+ |
439 |
else: |
440 |
# no valid context => ignore line |
441 |
pass |
442 |
@@ -292,7 +290,7 @@ class DescriptionReader ( object ): |
443 |
field_context = None |
444 |
|
445 |
line_components = sline.partition ( |
446 |
- config.get ( 'DESCRIPTION.field_separator' ) |
447 |
+ config.get ( 'DESCRIPTION.field_separator', ':' ) |
448 |
) |
449 |
|
450 |
if line_components [1]: |
451 |
@@ -319,15 +317,13 @@ class DescriptionReader ( object ): |
452 |
'already been catched in DescriptionField...' |
453 |
) |
454 |
|
455 |
- # create a new empty list for this field_context |
456 |
- read_data [field_context] = [] |
457 |
+ if field_context in raw: |
458 |
+ raise Exception ( "field %s exists!" % field_context ) |
459 |
|
460 |
# add values to read_data, no need to check |
461 |
# line_components [2] 'cause [1] was a true str |
462 |
- for val in \ |
463 |
- make_values ( line_components [2], field_context ) \ |
464 |
- : |
465 |
- read_data [field_context] . append ( val ) |
466 |
+ # create a new empty list for this field_context |
467 |
+ raw[field_context] = [ line_components [2].lstrip() ] |
468 |
|
469 |
else: |
470 |
# reaching this branch means that |
471 |
@@ -341,14 +337,44 @@ class DescriptionReader ( object ): |
472 |
|
473 |
# -- end for -- |
474 |
|
475 |
- if self._parse_read_data ( read_data ): |
476 |
+ return raw |
477 |
+ # --- end of _get_raw_data (...) --- |
478 |
+ |
479 |
+ def run ( self ): |
480 |
+ """Reads a DESCRIPTION file and returns the read data if successful, |
481 |
+ else None. |
482 |
+ |
483 |
+ arguments: |
484 |
+ * file -- path to the tarball file (containing the description file) |
485 |
+ that should be read |
486 |
+ |
487 |
+ It does some pre-parsing, inter alia |
488 |
+ -> assigning field identifiers from the file to real field names |
489 |
+ -> split field values |
490 |
+ -> filter out unwanted/useless fields |
491 |
+ |
492 |
+ The return value is a description_data dict or None if the read data |
493 |
+ are "useless" (not suited to create an ebuild for it, |
494 |
+ e.g. if OS_TYPE is not unix). |
495 |
+ """ |
496 |
+ |
497 |
+ raw_data = self._get_raw_data() |
498 |
+ read_data = self._make_read_data ( raw_data ) |
499 |
+ |
500 |
+ self.desc_data = None |
501 |
+ |
502 |
+ if read_data is None: |
503 |
+ self.logger.warning ( |
504 |
+ "Failed to read file '%s'." % self.fileinfo ['package_file'] |
505 |
+ ) |
506 |
+ |
507 |
+ elif self._verify_read_data ( read_data ): |
508 |
self.logger.debug ( |
509 |
"Successfully read file '%s' with data = %s." |
510 |
% ( self.fileinfo ['package_file'], read_data ) |
511 |
) |
512 |
self.desc_data = read_data |
513 |
|
514 |
- # get_desc() is preferred, but this method returns the desc data, too |
515 |
- return self.desc_data |
516 |
+ # else have log entries from _verify() |
517 |
|
518 |
# --- end of run (...) --- |