Gentoo Archives: gentoo-commits

From: "André Erdmann" <dywi@×××××××.de>
To: gentoo-commits@l.g.o
Subject: [gentoo-commits] proj/R_overlay:master commit in: roverlay/db/, /, roverlay/config/, roverlay/util/
Date: Sat, 22 Feb 2014 14:56:07
Message-Id: 1393079732.580f55bc730343381d0d6f596867a6601f821d28.dywi@gentoo
1 commit: 580f55bc730343381d0d6f596867a6601f821d28
2 Author: André Erdmann <dywi <AT> mailerd <DOT> de>
3 AuthorDate: Sat Feb 22 14:26:44 2014 +0000
4 Commit: André Erdmann <dywi <AT> mailerd <DOT> de>
5 CommitDate: Sat Feb 22 14:35:32 2014 +0000
6 URL: http://git.overlays.gentoo.org/gitweb/?p=proj/R_overlay.git;a=commit;h=580f55bc
7
8 roverlay/util/fileio: support xz compression
9
10 * roverlay/util/fileio:
11 ** read/write_text_file(): add support for xz
12 ** read_text_file(): catch compression-related errors
13 for the first text line only
14 ** read_text_file(): make sure to close the file(!)
15 ** TextFile: add get_default_compression()
16 * move SUPPORTED_COMPRESSION dict to roverlay/util/compression
17 * roverlay/config/entrymap: use roverlay/util/compression
18 * roverlay/db/distmap: get_default_compression()
19
20 ---
21 Makefile | 9 +--
22 roverlay/config/entrymap.py | 14 +++--
23 roverlay/db/distmap.py | 5 ++
24 roverlay/util/compression.py | 63 +++++++++++++++++++++
25 roverlay/util/fileio.py | 131 +++++++++++++++++++++++++------------------
26 5 files changed, 159 insertions(+), 63 deletions(-)
27
28 diff --git a/Makefile b/Makefile
29 index 4f2c60e..495f6c8 100644
30 --- a/Makefile
31 +++ b/Makefile
32 @@ -28,9 +28,10 @@ ROVERLAY_MAIN := ./roverlay.py
33
34 PYVER :=
35 PYTHON := python$(PYVER)
36 -PYDOC_SH = ./bin/build/do_pydoc.sh
37 +PYDOC_SH := ./bin/build/do_pydoc.sh
38 +X_COMPRESS := bzip2
39
40 -RST_HTML = ./bin/build/roverlay_rst2html.sh
41 +RST_HTML := ./bin/build/roverlay_rst2html.sh
42
43 SRC_DOCDIR := ./doc
44
45 @@ -130,8 +131,8 @@ dist: distclean release
46 compress-config: $(BUILDDIR)
47 @install -d $(BUILDDIR)/config
48 cp -vLr -p --no-preserve=ownership config/simple-deprules.d $(BUILDDIR)/config/
49 - find $(BUILDDIR)/config/simple-deprules.d/ -type f -print0 | xargs -0 -n 5 --verbose bzip2
50 - bzip2 -k -c config/license.map > $(BUILDDIR)/config/license.map
51 + find $(BUILDDIR)/config/simple-deprules.d/ -type f -print0 | xargs -0 -n 5 --verbose $(X_COMPRESS)
52 + $(X_COMPRESS) -c config/license.map > $(BUILDDIR)/config/license.map
53
54 install-roverlay: ./roverlay.py
55 install -T -D -- ./roverlay.py $(BINDIR)/roverlay
56
57 diff --git a/roverlay/config/entrymap.py b/roverlay/config/entrymap.py
58 index eb24b39..b53d5a6 100644
59 --- a/roverlay/config/entrymap.py
60 +++ b/roverlay/config/entrymap.py
61 @@ -47,6 +47,8 @@ known dict keys are 'path', 'description'/'desc' and 'value_type':
62
63 __all__ = [ 'CONFIG_ENTRY_MAP', 'prune_description', ]
64
65 +import roverlay.util.compression
66 +
67 fs_file = 'fs_file'
68 fs_abslist = 'list:fs_abs'
69 yesno = 'yesno'
70 @@ -60,6 +62,10 @@ is_yesno = { 'value_type' : 'yesno' }
71
72 CAPSLOCK = ( 'CAPSLOCK', )
73 LOG_LEVEL = ( "DEBUG", "INFO", "WARN", "WARNING", "ERROR", "CRITICAL" )
74 +COMP_FORMATS = [ "default", "none" ] + sorted (
75 + roverlay.util.compression.get_all_compression_formats()
76 + ##roverlay.util.compression.get_supported_compression_formats()
77 +)
78
79 is_log_level = { 'choices' : LOG_LEVEL, 'flags' : CAPSLOCK }
80
81 @@ -324,10 +330,10 @@ CONFIG_ENTRY_MAP = dict (
82 ),
83
84 overlay_distmap_compression = dict (
85 - description = 'distmap compression format (none, bzip2 or gzip)',
86 - choices = frozenset ({
87 - 'none', 'default', 'bz2', 'bzip2', 'gz', 'gzip'
88 - }),
89 + description = 'distmap compression format ({})'.format (
90 + ', '.join ( COMP_FORMATS )
91 + ),
92 + choices = COMP_FORMATS,
93 ),
94
95 overlay_distmap_file = dict (
96
97 diff --git a/roverlay/db/distmap.py b/roverlay/db/distmap.py
98 index 8ed3a16..818d5d0 100644
99 --- a/roverlay/db/distmap.py
100 +++ b/roverlay/db/distmap.py
101 @@ -647,6 +647,11 @@ class FileDistMap ( roverlay.util.fileio.TextFile, _DistMapBase ):
102 # file format (reserved for future usage)
103 FILE_FORMAT = '0'
104
105 + @classmethod
106 + def get_default_compression ( cls ):
107 + return "bzip2" if cls.check_compression_supported ( "bzip2" ) else None
108 + # --- end of get_default_compression (...) ---
109 +
110 def __init__ (
111 self, distmap_file, distmap_compression=None, ignore_missing=False
112 ):
113
114 diff --git a/roverlay/util/compression.py b/roverlay/util/compression.py
115 new file mode 100644
116 index 0000000..172b916
117 --- /dev/null
118 +++ b/roverlay/util/compression.py
119 @@ -0,0 +1,63 @@
120 +# R overlay -- util, compression
121 +# -*- coding: utf-8 -*-
122 +# Copyright (C) 2012-2014 André Erdmann <dywi@×××××××.de>
123 +# Distributed under the terms of the GNU General Public License;
124 +# either version 2 of the License, or (at your option) any later version.
125 +
126 +__all__ = [
127 + 'COMP_GZIP', 'COMP_BZIP2', 'COMP_XZ',
128 + 'get_all_compression_formats', 'get_supported_compression_formats',
129 + 'check_compression_supported', 'get_compress_open',
130 +]
131 +
132 +import gzip
133 +import bz2
134 +
135 +try:
136 + import lzma
137 +except ImportError:
138 + # python < 3.3 without backported lzma
139 + _HAVE_LZMA_MODULE = False
140 + # COULDFIX: compat hack, always catch IOError before LZMAError!
141 + LZMAError = IOError
142 +else:
143 + _HAVE_LZMA_MODULE = True
144 + LZMAError = lzma.LZMAError
145 +
146 +
147 +COMP_GZIP = 1
148 +COMP_BZIP2 = 2
149 +COMP_XZ = 3
150 +
151 +SUPPORTED_COMPRESSION = {
152 + 'gzip' : gzip.GzipFile,
153 + 'gz' : gzip.GzipFile,
154 + COMP_GZIP : gzip.GzipFile,
155 + 'bzip2' : bz2.BZ2File,
156 + 'bz2' : bz2.BZ2File,
157 + COMP_BZIP2 : bz2.BZ2File,
158 +}
159 +
160 +if _HAVE_LZMA_MODULE:
161 + SUPPORTED_COMPRESSION ['xz'] = lzma.LZMAFile
162 + SUPPORTED_COMPRESSION [COMP_XZ] = lzma.LZMAFile
163 +# -- end if _HAVE_LZMA_MODULE
164 +
165 +def get_all_compression_formats():
166 + return [ 'gzip', 'gz', 'bzip2', 'bz2', 'xz' ]
167 +# --- end of get_all_compression_formats (...) ---
168 +
169 +def get_supported_compression_formats():
170 + return [ k for k in SUPPORTED_COMPRESSION if isinstance ( k, str ) ]
171 +# --- end of get_supported_compression_formats (...) ---
172 +
173 +def check_compression_supported ( compression ):
174 + return compression in SUPPORTED_COMPRESSION
175 +# --- end of check_compression_supported (...) ---
176 +
177 +def get_compress_open ( compression, *args ):
178 + if args:
179 + return SUPPORTED_COMPRESSION.get ( compression, *args )
180 + else:
181 + return SUPPORTED_COMPRESSION [compression]
182 +# --- end of get_compress_open (...) ---
183
184 diff --git a/roverlay/util/fileio.py b/roverlay/util/fileio.py
185 index 372cdd6..dc96119 100644
186 --- a/roverlay/util/fileio.py
187 +++ b/roverlay/util/fileio.py
188 @@ -1,11 +1,9 @@
189 # R overlay -- util, file read operations
190 # -*- coding: utf-8 -*-
191 -# Copyright (C) 2012 André Erdmann <dywi@×××××××.de>
192 +# Copyright (C) 2012-2014 André Erdmann <dywi@×××××××.de>
193 # Distributed under the terms of the GNU General Public License;
194 # either version 2 of the License, or (at your option) any later version.
195
196 -import gzip
197 -import bz2
198 import mimetypes
199 import sys
200 import os.path
201 @@ -14,25 +12,17 @@ import errno
202
203 import roverlay.util.common
204 import roverlay.util.objects
205 +import roverlay.util.compression
206 import roverlay.strutil
207 from roverlay.strutil import bytes_try_decode
208 +from roverlay.util.compression import \
209 + COMP_XZ, COMP_BZIP2, COMP_GZIP, LZMAError, \
210 + get_compress_open, check_compression_supported
211
212
213 _MIME = mimetypes.MimeTypes()
214 -
215 guess_filetype = _MIME.guess_type
216
217 -COMP_GZIP = 1
218 -COMP_BZIP2 = 2
219 -
220 -SUPPORTED_COMPRESSION = {
221 - 'gzip' : gzip.GzipFile,
222 - 'gz' : gzip.GzipFile,
223 - COMP_GZIP : gzip.GzipFile,
224 - 'bzip2' : bz2.BZ2File,
225 - 'bz2' : bz2.BZ2File,
226 - COMP_BZIP2 : bz2.BZ2File,
227 -}
228
229 def strip_newline ( s ):
230 return s.rstrip ( '\n' )
231 @@ -61,59 +51,75 @@ def read_text_file ( filepath, preparse=None, try_harder=True ):
232 be detected (defaults to True)
233 """
234
235 -
236 ftype = guess_filetype ( filepath )
237 - compress_open = SUPPORTED_COMPRESSION.get ( ftype[1], None )
238 + compress_open = get_compress_open ( ftype[1], None )
239
240 if compress_open is not None:
241 with compress_open ( filepath, mode='r' ) as CH:
242 for line in read_compressed_file_handle ( CH, preparse ):
243 yield line
244
245 + return
246 +
247 elif try_harder:
248 # guess_filetype detects file extensions only
249 #
250 # try known compression formats
251 #
252 - for comp in ( COMP_BZIP2, COMP_GZIP ):
253 + for comp in ( COMP_BZIP2, COMP_XZ, COMP_GZIP ):
254 CH = None
255 - try:
256 - CH = SUPPORTED_COMPRESSION [comp] ( filepath, mode='r' )
257 - for line in read_compressed_file_handle ( CH, preparse ):
258 - yield line
259 - CH.close()
260 - except IOError as ioerr:
261 - if CH:
262 + copen = get_compress_open ( comp, None )
263 + if copen is not None:
264 + try:
265 + CH = copen ( filepath, mode='r' )
266 + creader = read_compressed_file_handle ( CH, preparse )
267 + # safely read first line only
268 + line = next ( creader )
269 +
270 + except StopIteration:
271 + # empty file (?)
272 CH.close()
273 - if ioerr.errno is not None:
274 + return
275 +
276 + except IOError as ioerr:
277 + # failed to open (gzip, bzip2)
278 + if CH: CH.close()
279 + CH = None
280 + if ioerr.errno is not None:
281 + raise
282 +
283 + except LZMAError as err:
284 + # failed to open (xz)
285 + if CH: CH.close()
286 + CH = None
287 +
288 + except:
289 + if CH: CH.close()
290 raise
291 - else:
292 - break
293 - else:
294 - with open ( filepath, 'rt' ) as FH:
295 - if preparse is None:
296 - for line in FH.readlines():
297 - yield line
298 - elif preparse is True:
299 - for line in FH.readlines():
300 - yield strip_newline ( line )
301 +
302 else:
303 - for line in FH.readlines():
304 - yield preparse ( line )
305 + # read remaining lines
306 + for line in creader:
307 + yield line
308 + CH.close()
309 + return
310 + # -- end try
311 + # -- end if
312 # -- end for <comp>
313 - else:
314 - with open ( filepath, 'rt' ) as FH:
315 - if preparse is None:
316 - for line in FH.readlines():
317 - yield line
318 - elif preparse is True:
319 - for line in FH.readlines():
320 - yield strip_newline ( line )
321 - else:
322 - for line in FH.readlines():
323 - yield preparse ( line )
324 - # -- end if <compress_open?, try_harder?>
325
326 + # -- end if <try to read filepath as compressed file>
327 +
328 + # file doesn't seem to be compressed (or not supported)
329 + with open ( filepath, 'rt' ) as FH:
330 + if preparse is None:
331 + for line in FH.readlines():
332 + yield line
333 + elif preparse is True:
334 + for line in FH.readlines():
335 + yield strip_newline ( line )
336 + else:
337 + for line in FH.readlines():
338 + yield preparse ( line )
339 # --- end of read_text_file (...) ---
340
341 def write_text_file (
342 @@ -121,9 +127,7 @@ def write_text_file (
343 append_newlines=True, append_newline_eof=False, create_dir=True,
344 newline='\n'
345 ):
346 - compress_open = (
347 - SUPPORTED_COMPRESSION [compression] if compression else None
348 - )
349 + compress_open = get_compress_open ( compression ) if compression else None
350
351 if create_dir:
352 roverlay.util.common.dodir_for_file ( filepath )
353 @@ -156,6 +160,16 @@ class TextFile ( roverlay.util.objects.PersistentContent ):
354 READ_PREPARSE = True
355 READ_TRY_HARDER = True
356
357 + @classmethod
358 + def get_default_compression ( cls ):
359 + return None
360 + # --- end of get_default_compression (...) ---
361 +
362 + @classmethod
363 + def check_compression_supported ( cls, compression ):
364 + return check_compression_supported ( compression )
365 + # --- end of check_compression_supported (...) ---
366 +
367 def __init__ ( self, filepath, compression=None ):
368 super ( TextFile, self ).__init__()
369
370 @@ -197,9 +211,16 @@ class TextFile ( roverlay.util.objects.PersistentContent ):
371 # --- end of set_filepath (...) ---
372
373 def set_compression ( self, compression ):
374 - if not compression or compression in { 'default', 'none' }:
375 + if not compression or compression == 'none':
376 self._compression = None
377 - elif compression in SUPPORTED_COMPRESSION:
378 + elif compression == 'default':
379 + if __debug__:
380 + comp = self.get_default_compression()
381 + assert self.check_compression_supported ( comp )
382 + self._compression = comp
383 + else:
384 + self._compression = self.get_default_compression()
385 + elif self.check_compression_supported ( compression ):
386 self._compression = compression
387 else:
388 raise ValueError (