1 |
commit: 580f55bc730343381d0d6f596867a6601f821d28 |
2 |
Author: André Erdmann <dywi <AT> mailerd <DOT> de> |
3 |
AuthorDate: Sat Feb 22 14:26:44 2014 +0000 |
4 |
Commit: André Erdmann <dywi <AT> mailerd <DOT> de> |
5 |
CommitDate: Sat Feb 22 14:35:32 2014 +0000 |
6 |
URL: http://git.overlays.gentoo.org/gitweb/?p=proj/R_overlay.git;a=commit;h=580f55bc |
7 |
|
8 |
roverlay/util/fileio: support xz compression |
9 |
|
10 |
* roverlay/util/fileio: |
11 |
** read/write_text_file(): add support for xz |
12 |
** read_text_file(): catch compression-related errors |
13 |
for the first text line only |
14 |
** read_text_file(): make sure to close the file(!) |
15 |
** TextFile: add get_default_compression() |
16 |
* move SUPPORTED_COMPRESSION dict to roverlay/util/compression |
17 |
* roverlay/config/entrymap: use roverlay/util/compression |
18 |
* roverlay/db/distmap: get_default_compression() |
19 |
|
20 |
--- |
21 |
Makefile | 9 +-- |
22 |
roverlay/config/entrymap.py | 14 +++-- |
23 |
roverlay/db/distmap.py | 5 ++ |
24 |
roverlay/util/compression.py | 63 +++++++++++++++++++++ |
25 |
roverlay/util/fileio.py | 131 +++++++++++++++++++++++++------------------ |
26 |
5 files changed, 159 insertions(+), 63 deletions(-) |
27 |
|
28 |
diff --git a/Makefile b/Makefile |
29 |
index 4f2c60e..495f6c8 100644 |
30 |
--- a/Makefile |
31 |
+++ b/Makefile |
32 |
@@ -28,9 +28,10 @@ ROVERLAY_MAIN := ./roverlay.py |
33 |
|
34 |
PYVER := |
35 |
PYTHON := python$(PYVER) |
36 |
-PYDOC_SH = ./bin/build/do_pydoc.sh |
37 |
+PYDOC_SH := ./bin/build/do_pydoc.sh |
38 |
+X_COMPRESS := bzip2 |
39 |
|
40 |
-RST_HTML = ./bin/build/roverlay_rst2html.sh |
41 |
+RST_HTML := ./bin/build/roverlay_rst2html.sh |
42 |
|
43 |
SRC_DOCDIR := ./doc |
44 |
|
45 |
@@ -130,8 +131,8 @@ dist: distclean release |
46 |
compress-config: $(BUILDDIR) |
47 |
@install -d $(BUILDDIR)/config |
48 |
cp -vLr -p --no-preserve=ownership config/simple-deprules.d $(BUILDDIR)/config/ |
49 |
- find $(BUILDDIR)/config/simple-deprules.d/ -type f -print0 | xargs -0 -n 5 --verbose bzip2 |
50 |
- bzip2 -k -c config/license.map > $(BUILDDIR)/config/license.map |
51 |
+ find $(BUILDDIR)/config/simple-deprules.d/ -type f -print0 | xargs -0 -n 5 --verbose $(X_COMPRESS) |
52 |
+ $(X_COMPRESS) -c config/license.map > $(BUILDDIR)/config/license.map |
53 |
|
54 |
install-roverlay: ./roverlay.py |
55 |
install -T -D -- ./roverlay.py $(BINDIR)/roverlay |
56 |
|
57 |
diff --git a/roverlay/config/entrymap.py b/roverlay/config/entrymap.py |
58 |
index eb24b39..b53d5a6 100644 |
59 |
--- a/roverlay/config/entrymap.py |
60 |
+++ b/roverlay/config/entrymap.py |
61 |
@@ -47,6 +47,8 @@ known dict keys are 'path', 'description'/'desc' and 'value_type': |
62 |
|
63 |
__all__ = [ 'CONFIG_ENTRY_MAP', 'prune_description', ] |
64 |
|
65 |
+import roverlay.util.compression |
66 |
+ |
67 |
fs_file = 'fs_file' |
68 |
fs_abslist = 'list:fs_abs' |
69 |
yesno = 'yesno' |
70 |
@@ -60,6 +62,10 @@ is_yesno = { 'value_type' : 'yesno' } |
71 |
|
72 |
CAPSLOCK = ( 'CAPSLOCK', ) |
73 |
LOG_LEVEL = ( "DEBUG", "INFO", "WARN", "WARNING", "ERROR", "CRITICAL" ) |
74 |
+COMP_FORMATS = [ "default", "none" ] + sorted ( |
75 |
+ roverlay.util.compression.get_all_compression_formats() |
76 |
+ ##roverlay.util.compression.get_supported_compression_formats() |
77 |
+) |
78 |
|
79 |
is_log_level = { 'choices' : LOG_LEVEL, 'flags' : CAPSLOCK } |
80 |
|
81 |
@@ -324,10 +330,10 @@ CONFIG_ENTRY_MAP = dict ( |
82 |
), |
83 |
|
84 |
overlay_distmap_compression = dict ( |
85 |
- description = 'distmap compression format (none, bzip2 or gzip)', |
86 |
- choices = frozenset ({ |
87 |
- 'none', 'default', 'bz2', 'bzip2', 'gz', 'gzip' |
88 |
- }), |
89 |
+ description = 'distmap compression format ({})'.format ( |
90 |
+ ', '.join ( COMP_FORMATS ) |
91 |
+ ), |
92 |
+ choices = COMP_FORMATS, |
93 |
), |
94 |
|
95 |
overlay_distmap_file = dict ( |
96 |
|
97 |
diff --git a/roverlay/db/distmap.py b/roverlay/db/distmap.py |
98 |
index 8ed3a16..818d5d0 100644 |
99 |
--- a/roverlay/db/distmap.py |
100 |
+++ b/roverlay/db/distmap.py |
101 |
@@ -647,6 +647,11 @@ class FileDistMap ( roverlay.util.fileio.TextFile, _DistMapBase ): |
102 |
# file format (reserved for future usage) |
103 |
FILE_FORMAT = '0' |
104 |
|
105 |
+ @classmethod |
106 |
+ def get_default_compression ( cls ): |
107 |
+ return "bzip2" if cls.check_compression_supported ( "bzip2" ) else None |
108 |
+ # --- end of get_default_compression (...) --- |
109 |
+ |
110 |
def __init__ ( |
111 |
self, distmap_file, distmap_compression=None, ignore_missing=False |
112 |
): |
113 |
|
114 |
diff --git a/roverlay/util/compression.py b/roverlay/util/compression.py |
115 |
new file mode 100644 |
116 |
index 0000000..172b916 |
117 |
--- /dev/null |
118 |
+++ b/roverlay/util/compression.py |
119 |
@@ -0,0 +1,63 @@ |
120 |
+# R overlay -- util, compression |
121 |
+# -*- coding: utf-8 -*- |
122 |
+# Copyright (C) 2012-2014 André Erdmann <dywi@×××××××.de> |
123 |
+# Distributed under the terms of the GNU General Public License; |
124 |
+# either version 2 of the License, or (at your option) any later version. |
125 |
+ |
126 |
+__all__ = [ |
127 |
+ 'COMP_GZIP', 'COMP_BZIP2', 'COMP_XZ', |
128 |
+ 'get_all_compression_formats', 'get_supported_compression_formats', |
129 |
+ 'check_compression_supported', 'get_compress_open', |
130 |
+] |
131 |
+ |
132 |
+import gzip |
133 |
+import bz2 |
134 |
+ |
135 |
+try: |
136 |
+ import lzma |
137 |
+except ImportError: |
138 |
+ # python < 3.3 without backported lzma |
139 |
+ _HAVE_LZMA_MODULE = False |
140 |
+ # COULDFIX: compat hack, always catch IOError before LZMAError! |
141 |
+ LZMAError = IOError |
142 |
+else: |
143 |
+ _HAVE_LZMA_MODULE = True |
144 |
+ LZMAError = lzma.LZMAError |
145 |
+ |
146 |
+ |
147 |
+COMP_GZIP = 1 |
148 |
+COMP_BZIP2 = 2 |
149 |
+COMP_XZ = 3 |
150 |
+ |
151 |
+SUPPORTED_COMPRESSION = { |
152 |
+ 'gzip' : gzip.GzipFile, |
153 |
+ 'gz' : gzip.GzipFile, |
154 |
+ COMP_GZIP : gzip.GzipFile, |
155 |
+ 'bzip2' : bz2.BZ2File, |
156 |
+ 'bz2' : bz2.BZ2File, |
157 |
+ COMP_BZIP2 : bz2.BZ2File, |
158 |
+} |
159 |
+ |
160 |
+if _HAVE_LZMA_MODULE: |
161 |
+ SUPPORTED_COMPRESSION ['xz'] = lzma.LZMAFile |
162 |
+ SUPPORTED_COMPRESSION [COMP_XZ] = lzma.LZMAFile |
163 |
+# -- end if _HAVE_LZMA_MODULE |
164 |
+ |
165 |
+def get_all_compression_formats(): |
166 |
+ return [ 'gzip', 'gz', 'bzip2', 'bz2', 'xz' ] |
167 |
+# --- end of get_all_compression_formats (...) --- |
168 |
+ |
169 |
+def get_supported_compression_formats(): |
170 |
+ return [ k for k in SUPPORTED_COMPRESSION if isinstance ( k, str ) ] |
171 |
+# --- end of get_supported_compression_formats (...) --- |
172 |
+ |
173 |
+def check_compression_supported ( compression ): |
174 |
+ return compression in SUPPORTED_COMPRESSION |
175 |
+# --- end of check_compression_supported (...) --- |
176 |
+ |
177 |
+def get_compress_open ( compression, *args ): |
178 |
+ if args: |
179 |
+ return SUPPORTED_COMPRESSION.get ( compression, *args ) |
180 |
+ else: |
181 |
+ return SUPPORTED_COMPRESSION [compression] |
182 |
+# --- end of get_compress_open (...) --- |
183 |
|
184 |
diff --git a/roverlay/util/fileio.py b/roverlay/util/fileio.py |
185 |
index 372cdd6..dc96119 100644 |
186 |
--- a/roverlay/util/fileio.py |
187 |
+++ b/roverlay/util/fileio.py |
188 |
@@ -1,11 +1,9 @@ |
189 |
# R overlay -- util, file read operations |
190 |
# -*- coding: utf-8 -*- |
191 |
-# Copyright (C) 2012 André Erdmann <dywi@×××××××.de> |
192 |
+# Copyright (C) 2012-2014 André Erdmann <dywi@×××××××.de> |
193 |
# Distributed under the terms of the GNU General Public License; |
194 |
# either version 2 of the License, or (at your option) any later version. |
195 |
|
196 |
-import gzip |
197 |
-import bz2 |
198 |
import mimetypes |
199 |
import sys |
200 |
import os.path |
201 |
@@ -14,25 +12,17 @@ import errno |
202 |
|
203 |
import roverlay.util.common |
204 |
import roverlay.util.objects |
205 |
+import roverlay.util.compression |
206 |
import roverlay.strutil |
207 |
from roverlay.strutil import bytes_try_decode |
208 |
+from roverlay.util.compression import \ |
209 |
+ COMP_XZ, COMP_BZIP2, COMP_GZIP, LZMAError, \ |
210 |
+ get_compress_open, check_compression_supported |
211 |
|
212 |
|
213 |
_MIME = mimetypes.MimeTypes() |
214 |
- |
215 |
guess_filetype = _MIME.guess_type |
216 |
|
217 |
-COMP_GZIP = 1 |
218 |
-COMP_BZIP2 = 2 |
219 |
- |
220 |
-SUPPORTED_COMPRESSION = { |
221 |
- 'gzip' : gzip.GzipFile, |
222 |
- 'gz' : gzip.GzipFile, |
223 |
- COMP_GZIP : gzip.GzipFile, |
224 |
- 'bzip2' : bz2.BZ2File, |
225 |
- 'bz2' : bz2.BZ2File, |
226 |
- COMP_BZIP2 : bz2.BZ2File, |
227 |
-} |
228 |
|
229 |
def strip_newline ( s ): |
230 |
return s.rstrip ( '\n' ) |
231 |
@@ -61,59 +51,75 @@ def read_text_file ( filepath, preparse=None, try_harder=True ): |
232 |
be detected (defaults to True) |
233 |
""" |
234 |
|
235 |
- |
236 |
ftype = guess_filetype ( filepath ) |
237 |
- compress_open = SUPPORTED_COMPRESSION.get ( ftype[1], None ) |
238 |
+ compress_open = get_compress_open ( ftype[1], None ) |
239 |
|
240 |
if compress_open is not None: |
241 |
with compress_open ( filepath, mode='r' ) as CH: |
242 |
for line in read_compressed_file_handle ( CH, preparse ): |
243 |
yield line |
244 |
|
245 |
+ return |
246 |
+ |
247 |
elif try_harder: |
248 |
# guess_filetype detects file extensions only |
249 |
# |
250 |
# try known compression formats |
251 |
# |
252 |
- for comp in ( COMP_BZIP2, COMP_GZIP ): |
253 |
+ for comp in ( COMP_BZIP2, COMP_XZ, COMP_GZIP ): |
254 |
CH = None |
255 |
- try: |
256 |
- CH = SUPPORTED_COMPRESSION [comp] ( filepath, mode='r' ) |
257 |
- for line in read_compressed_file_handle ( CH, preparse ): |
258 |
- yield line |
259 |
- CH.close() |
260 |
- except IOError as ioerr: |
261 |
- if CH: |
262 |
+ copen = get_compress_open ( comp, None ) |
263 |
+ if copen is not None: |
264 |
+ try: |
265 |
+ CH = copen ( filepath, mode='r' ) |
266 |
+ creader = read_compressed_file_handle ( CH, preparse ) |
267 |
+ # safely read first line only |
268 |
+ line = next ( creader ) |
269 |
+ |
270 |
+ except StopIteration: |
271 |
+ # empty file (?) |
272 |
CH.close() |
273 |
- if ioerr.errno is not None: |
274 |
+ return |
275 |
+ |
276 |
+ except IOError as ioerr: |
277 |
+ # failed to open (gzip, bzip2) |
278 |
+ if CH: CH.close() |
279 |
+ CH = None |
280 |
+ if ioerr.errno is not None: |
281 |
+ raise |
282 |
+ |
283 |
+ except LZMAError as err: |
284 |
+ # failed to open (xz) |
285 |
+ if CH: CH.close() |
286 |
+ CH = None |
287 |
+ |
288 |
+ except: |
289 |
+ if CH: CH.close() |
290 |
raise |
291 |
- else: |
292 |
- break |
293 |
- else: |
294 |
- with open ( filepath, 'rt' ) as FH: |
295 |
- if preparse is None: |
296 |
- for line in FH.readlines(): |
297 |
- yield line |
298 |
- elif preparse is True: |
299 |
- for line in FH.readlines(): |
300 |
- yield strip_newline ( line ) |
301 |
+ |
302 |
else: |
303 |
- for line in FH.readlines(): |
304 |
- yield preparse ( line ) |
305 |
+ # read remaining lines |
306 |
+ for line in creader: |
307 |
+ yield line |
308 |
+ CH.close() |
309 |
+ return |
310 |
+ # -- end try |
311 |
+ # -- end if |
312 |
# -- end for <comp> |
313 |
- else: |
314 |
- with open ( filepath, 'rt' ) as FH: |
315 |
- if preparse is None: |
316 |
- for line in FH.readlines(): |
317 |
- yield line |
318 |
- elif preparse is True: |
319 |
- for line in FH.readlines(): |
320 |
- yield strip_newline ( line ) |
321 |
- else: |
322 |
- for line in FH.readlines(): |
323 |
- yield preparse ( line ) |
324 |
- # -- end if <compress_open?, try_harder?> |
325 |
|
326 |
+ # -- end if <try to read filepath as compressed file> |
327 |
+ |
328 |
+ # file doesn't seem to be compressed (or not supported) |
329 |
+ with open ( filepath, 'rt' ) as FH: |
330 |
+ if preparse is None: |
331 |
+ for line in FH.readlines(): |
332 |
+ yield line |
333 |
+ elif preparse is True: |
334 |
+ for line in FH.readlines(): |
335 |
+ yield strip_newline ( line ) |
336 |
+ else: |
337 |
+ for line in FH.readlines(): |
338 |
+ yield preparse ( line ) |
339 |
# --- end of read_text_file (...) --- |
340 |
|
341 |
def write_text_file ( |
342 |
@@ -121,9 +127,7 @@ def write_text_file ( |
343 |
append_newlines=True, append_newline_eof=False, create_dir=True, |
344 |
newline='\n' |
345 |
): |
346 |
- compress_open = ( |
347 |
- SUPPORTED_COMPRESSION [compression] if compression else None |
348 |
- ) |
349 |
+ compress_open = get_compress_open ( compression ) if compression else None |
350 |
|
351 |
if create_dir: |
352 |
roverlay.util.common.dodir_for_file ( filepath ) |
353 |
@@ -156,6 +160,16 @@ class TextFile ( roverlay.util.objects.PersistentContent ): |
354 |
READ_PREPARSE = True |
355 |
READ_TRY_HARDER = True |
356 |
|
357 |
+ @classmethod |
358 |
+ def get_default_compression ( cls ): |
359 |
+ return None |
360 |
+ # --- end of get_default_compression (...) --- |
361 |
+ |
362 |
+ @classmethod |
363 |
+ def check_compression_supported ( cls, compression ): |
364 |
+ return check_compression_supported ( compression ) |
365 |
+ # --- end of check_compression_supported (...) --- |
366 |
+ |
367 |
def __init__ ( self, filepath, compression=None ): |
368 |
super ( TextFile, self ).__init__() |
369 |
|
370 |
@@ -197,9 +211,16 @@ class TextFile ( roverlay.util.objects.PersistentContent ): |
371 |
# --- end of set_filepath (...) --- |
372 |
|
373 |
def set_compression ( self, compression ): |
374 |
- if not compression or compression in { 'default', 'none' }: |
375 |
+ if not compression or compression == 'none': |
376 |
self._compression = None |
377 |
- elif compression in SUPPORTED_COMPRESSION: |
378 |
+ elif compression == 'default': |
379 |
+ if __debug__: |
380 |
+ comp = self.get_default_compression() |
381 |
+ assert self.check_compression_supported ( comp ) |
382 |
+ self._compression = comp |
383 |
+ else: |
384 |
+ self._compression = self.get_default_compression() |
385 |
+ elif self.check_compression_supported ( compression ): |
386 |
self._compression = compression |
387 |
else: |
388 |
raise ValueError ( |