1 |
Decode all arguments and listdir results as UTF-8, and return |
2 |
unsuccessfully if anything fails to decode as UTF-8. Use portage |
3 |
os and shutil wrappers to encode file names as UTF-8 regardless |
4 |
of locale. |
5 |
|
6 |
X-Gentoo-Bug: 561846 |
7 |
X-Gentoo-Bug-URL: https://bugs.gentoo.org/show_bug.cgi?id=561846 |
8 |
--- |
9 |
bin/dohtml.py | 47 +++++++++++++++++++++++++++++++++++------------ |
10 |
1 file changed, 35 insertions(+), 12 deletions(-) |
11 |
|
12 |
diff --git a/bin/dohtml.py b/bin/dohtml.py |
13 |
index 5359f5e..dfcaa60 100755 |
14 |
--- a/bin/dohtml.py |
15 |
+++ b/bin/dohtml.py |
16 |
@@ -28,13 +28,13 @@ |
17 |
# - will do as 'dohtml -r', but ignore directories named CVS, SCCS, RCS |
18 |
# |
19 |
|
20 |
-from __future__ import print_function |
21 |
+from __future__ import print_function, unicode_literals |
22 |
|
23 |
-import os |
24 |
-import shutil |
25 |
+import os as _os |
26 |
import sys |
27 |
|
28 |
-from portage.util import normalize_path |
29 |
+from portage import _unicode_encode, _unicode_decode, os, shutil |
30 |
+from portage.util import normalize_path, writemsg |
31 |
|
32 |
# Change back to original cwd _after_ all imports (bug #469338). |
33 |
os.chdir(os.environ["__PORTAGE_HELPER_CWD"]) |
34 |
@@ -92,7 +92,13 @@ def install(basename, dirname, options, prefix=""): |
35 |
skipped_files.append(fullpath) |
36 |
elif options.recurse and os.path.isdir(fullpath) and \ |
37 |
basename not in options.disallowed_dirs: |
38 |
- for i in os.listdir(fullpath): |
39 |
+ for i in _os.listdir(_unicode_encode(fullpath)): |
40 |
+ try: |
41 |
+ i = _unicode_decode(i, errors='strict') |
42 |
+ except UnicodeDecodeError: |
43 |
+ writemsg('dohtml: argument is not encoded as UTF-8: %s\n' % |
44 |
+ _unicode_decode(i), noiselevel=-1) |
45 |
+ sys.exit(1) |
46 |
pfx = basename |
47 |
if prefix: |
48 |
pfx = os.path.join(prefix, pfx) |
49 |
@@ -155,12 +161,29 @@ def print_help(): |
50 |
print() |
51 |
|
52 |
def parse_args(): |
53 |
+ argv = sys.argv[:] |
54 |
+ |
55 |
+ if sys.hexversion >= 0x3000000: |
56 |
+ # We can't trust that the filesystem encoding (locale dependent) |
57 |
+ # correctly matches the arguments, so use surrogateescape to |
58 |
+ # pass through the original argv bytes for Python 3. |
59 |
+ fs_encoding = sys.getfilesystemencoding() |
60 |
+ argv = [x.encode(fs_encoding, 'surrogateescape') for x in argv] |
61 |
+ |
62 |
+ for x, arg in enumerate(argv): |
63 |
+ try: |
64 |
+ argv[x] = _unicode_decode(arg, errors='strict') |
65 |
+ except UnicodeDecodeError: |
66 |
+ writemsg('dohtml: argument is not encoded as UTF-8: %s\n' % |
67 |
+ _unicode_decode(arg), noiselevel=-1) |
68 |
+ sys.exit(1) |
69 |
+ |
70 |
options = OptionsClass() |
71 |
args = [] |
72 |
|
73 |
x = 1 |
74 |
- while x < len(sys.argv): |
75 |
- arg = sys.argv[x] |
76 |
+ while x < len(argv): |
77 |
+ arg = argv[x] |
78 |
if arg in ["-h","-r","-V"]: |
79 |
if arg == "-h": |
80 |
print_help() |
81 |
@@ -169,17 +192,17 @@ def parse_args(): |
82 |
options.recurse = True |
83 |
elif arg == "-V": |
84 |
options.verbose = True |
85 |
- elif sys.argv[x] in ["-A","-a","-f","-x","-p"]: |
86 |
+ elif argv[x] in ["-A","-a","-f","-x","-p"]: |
87 |
x += 1 |
88 |
- if x == len(sys.argv): |
89 |
+ if x == len(argv): |
90 |
print_help() |
91 |
sys.exit(0) |
92 |
elif arg == "-p": |
93 |
- options.doc_prefix = sys.argv[x] |
94 |
+ options.doc_prefix = argv[x] |
95 |
if options.doc_prefix: |
96 |
options.doc_prefix = normalize_path(options.doc_prefix) |
97 |
else: |
98 |
- values = sys.argv[x].split(",") |
99 |
+ values = argv[x].split(",") |
100 |
if arg == "-A": |
101 |
options.allowed_exts.extend(values) |
102 |
elif arg == "-a": |
103 |
@@ -189,7 +212,7 @@ def parse_args(): |
104 |
elif arg == "-x": |
105 |
options.disallowed_dirs = values |
106 |
else: |
107 |
- args.append(sys.argv[x]) |
108 |
+ args.append(argv[x]) |
109 |
x += 1 |
110 |
|
111 |
return (options, args) |
112 |
-- |
113 |
2.4.6 |