1 |
commit: aa77ac2daaac2b4666fdecf574db00f2d5e1bba2 |
2 |
Author: André Erdmann <dywi <AT> mailerd <DOT> de> |
3 |
AuthorDate: Tue Jul 30 16:03:37 2013 +0000 |
4 |
Commit: André Erdmann <dywi <AT> mailerd <DOT> de> |
5 |
CommitDate: Tue Jul 30 16:03:37 2013 +0000 |
6 |
URL: http://git.overlays.gentoo.org/gitweb/?p=proj/R_overlay.git;a=commit;h=aa77ac2d |
7 |
|
8 |
roverlay/util/fileio: detect filetype when reading |
9 |
|
10 |
--- |
11 |
roverlay/strutil.py | 2 +- |
12 |
roverlay/util/common.py | 2 +- |
13 |
roverlay/util/fileio.py | 75 ++++++++++++++++++++++++++++++++++++++++--------- |
14 |
3 files changed, 63 insertions(+), 16 deletions(-) |
15 |
|
16 |
diff --git a/roverlay/strutil.py b/roverlay/strutil.py |
17 |
index 755a191..a8e4361 100644 |
18 |
--- a/roverlay/strutil.py |
19 |
+++ b/roverlay/strutil.py |
20 |
@@ -145,7 +145,7 @@ def bytes_try_decode ( |
21 |
* charwise_only -- do charwise conversion only |
22 |
* force_decode -- decode byte_str even if it's already a str |
23 |
""" |
24 |
- if not isinstance ( byte_str, str ): |
25 |
+ if not isinstance ( byte_str, str ) or force_decode: |
26 |
if not charwise_only and encodings: |
27 |
ret = None |
28 |
if not isinstance ( encodings, str ): |
29 |
|
30 |
diff --git a/roverlay/util/common.py b/roverlay/util/common.py |
31 |
index 6053015..8449b2c 100644 |
32 |
--- a/roverlay/util/common.py |
33 |
+++ b/roverlay/util/common.py |
34 |
@@ -219,7 +219,7 @@ def dodir ( directory, mkdir_p=False, **makedirs_kw ): |
35 |
# --- end of dodir (...) --- |
36 |
|
37 |
def dodir_for_file ( filepath, mkdir_p=True, **kw ): |
38 |
- return dodir ( os.path.basename ( filepath ), mkdir_p=mkdir_p, **kw ) |
39 |
+ return dodir ( os.path.dirname ( filepath ), mkdir_p=mkdir_p, **kw ) |
40 |
# --- end of dodir_for_file (...) --- |
41 |
|
42 |
def getsize ( filepath ): |
43 |
|
44 |
diff --git a/roverlay/util/fileio.py b/roverlay/util/fileio.py |
45 |
index 5d996f8..c6e33e5 100644 |
46 |
--- a/roverlay/util/fileio.py |
47 |
+++ b/roverlay/util/fileio.py |
48 |
@@ -10,14 +10,8 @@ import mimetypes |
49 |
import sys |
50 |
|
51 |
import roverlay.util.common |
52 |
- |
53 |
-if sys.hexversion >= 0x3000000: |
54 |
- def iter_decode ( lv ): |
55 |
- for l in lv: |
56 |
- yield l.decode() |
57 |
-else: |
58 |
- def iter_decode ( lv ): |
59 |
- return lv |
60 |
+import roverlay.strutil |
61 |
+from roverlay.strutil import bytes_try_decode |
62 |
|
63 |
|
64 |
_MIME = mimetypes.MimeTypes() |
65 |
@@ -37,19 +31,72 @@ SUPPORTED_COMPRESSION = { |
66 |
} |
67 |
|
68 |
|
69 |
-def read_text_file ( filepath, preparse=None ): |
70 |
+def read_compressed_file_handle ( CH, preparse=None ): |
71 |
+ if preparse is None: |
72 |
+ for line in CH.readlines(): |
73 |
+ yield bytes_try_decode ( line ) |
74 |
+ else: |
75 |
+ yield preparse ( bytes_try_decode ( line ) ) |
76 |
+# --- end of read_compressed_file_handle (...) --- |
77 |
+ |
78 |
+def read_text_file ( filepath, preparse=None, try_harder=True ): |
79 |
+ """Generator that reads a compressed/uncompressed file and yields text |
80 |
+ lines. Optionally preparses the rext lines. |
81 |
+ |
82 |
+ arguments: |
83 |
+ * filepath -- file to read |
84 |
+ * preparse -- function for (pre-)parsing lines |
85 |
+ * try_harder -- try known compression formats if file extension cannot |
86 |
+ be detected (defaults to True) |
87 |
+ """ |
88 |
+ |
89 |
ftype = guess_filetype ( filepath ) |
90 |
compress_open = SUPPORTED_COMPRESSION.get ( ftype[1], None ) |
91 |
|
92 |
if compress_open is not None: |
93 |
with compress_open ( filepath, mode='r' ) as CH: |
94 |
- for line in iter_decode ( CH.readlines() ): |
95 |
- yield line if preparse is None else preparse ( line ) |
96 |
+ for line in read_compressed_file_handle ( CH, preparse ): |
97 |
+ yield line |
98 |
+ |
99 |
+ elif try_harder: |
100 |
+ # guess_filetype detects file extensions only |
101 |
+ # |
102 |
+ # try known compression formats |
103 |
+ # |
104 |
+ for comp in ( COMP_BZIP2, COMP_GZIP ): |
105 |
+ CH = None |
106 |
+ try: |
107 |
+ CH = SUPPORTED_COMPRESSION [comp] ( filepath, mode='r' ) |
108 |
+ for line in read_compressed_file_handle ( CH, preparse ): |
109 |
+ yield line |
110 |
+ CH.close() |
111 |
+ except IOError as ioerr: |
112 |
+ if CH: |
113 |
+ CH.close() |
114 |
+ if ioerr.errno is not None: |
115 |
+ raise |
116 |
+ else: |
117 |
+ break |
118 |
+ else: |
119 |
+ with open ( filepath, 'rt' ) as FH: |
120 |
+ if preparse is None: |
121 |
+ for line in FH.readlines(): |
122 |
+ yield line |
123 |
+ else: |
124 |
+ for line in FH.readlines(): |
125 |
+ yield preparse ( line ) |
126 |
+ # -- end for <comp> |
127 |
else: |
128 |
with open ( filepath, 'rt' ) as FH: |
129 |
- for line in FH.readlines(): |
130 |
- yield line if preparse is None else preparse ( line ) |
131 |
+ if preparse is None: |
132 |
+ for line in FH.readlines(): |
133 |
+ yield line |
134 |
+ else: |
135 |
+ for line in FH.readlines(): |
136 |
+ yield preparse ( line ) |
137 |
+ # -- end if <compress_open?, try_harder?> |
138 |
|
139 |
+# --- end of read_text_file (...) --- |
140 |
|
141 |
def write_text_file ( |
142 |
filepath, lines, compression=None, mode='wt', |
143 |
@@ -64,7 +111,7 @@ def write_text_file ( |
144 |
roverlay.util.common.dodir_for_file ( filepath ) |
145 |
|
146 |
if compress_open: |
147 |
- NL = '\n'.encode() |
148 |
+ NL = newline.encode() |
149 |
with compress_open ( filepath, mode.rstrip ( 'tu' ) ) as CH: |
150 |
for line in lines: |
151 |
CH.write ( str ( line ).encode() ) |