Gentoo Archives: gentoo-portage-dev

From: Zac Medico <zmedico@g.o>
To: gentoo-portage-dev@l.g.o
Cc: Zac Medico <zmedico@g.o>
Subject: [gentoo-portage-dev] [PATCH] dohtml: handle unicode (bug 561846)
Date: Sat, 03 Oct 2015 22:33:33
Message-Id: 1443911583-545-1-git-send-email-zmedico@gentoo.org
1 Decode all arguments and listdir results as UTF-8, and return
2 unsuccessfully if anything fails to decode as UTF-8. Use portage
3 os and shutil wrappers to encode file names as UTF-8 regardless
4 of locale.
5
6 X-Gentoo-Bug: 561846
7 X-Gentoo-Bug-URL: https://bugs.gentoo.org/show_bug.cgi?id=561846
8 ---
9 bin/dohtml.py | 47 +++++++++++++++++++++++++++++++++++------------
10 1 file changed, 35 insertions(+), 12 deletions(-)
11
12 diff --git a/bin/dohtml.py b/bin/dohtml.py
13 index 5359f5e..dfcaa60 100755
14 --- a/bin/dohtml.py
15 +++ b/bin/dohtml.py
16 @@ -28,13 +28,13 @@
17 # - will do as 'dohtml -r', but ignore directories named CVS, SCCS, RCS
18 #
19
20 -from __future__ import print_function
21 +from __future__ import print_function, unicode_literals
22
23 -import os
24 -import shutil
25 +import os as _os
26 import sys
27
28 -from portage.util import normalize_path
29 +from portage import _unicode_encode, _unicode_decode, os, shutil
30 +from portage.util import normalize_path, writemsg
31
32 # Change back to original cwd _after_ all imports (bug #469338).
33 os.chdir(os.environ["__PORTAGE_HELPER_CWD"])
34 @@ -92,7 +92,13 @@ def install(basename, dirname, options, prefix=""):
35 skipped_files.append(fullpath)
36 elif options.recurse and os.path.isdir(fullpath) and \
37 basename not in options.disallowed_dirs:
38 - for i in os.listdir(fullpath):
39 + for i in _os.listdir(_unicode_encode(fullpath)):
40 + try:
41 + i = _unicode_decode(i, errors='strict')
42 + except UnicodeDecodeError:
43 + writemsg('dohtml: argument is not encoded as UTF-8: %s\n' %
44 + _unicode_decode(i), noiselevel=-1)
45 + sys.exit(1)
46 pfx = basename
47 if prefix:
48 pfx = os.path.join(prefix, pfx)
49 @@ -155,12 +161,29 @@ def print_help():
50 print()
51
52 def parse_args():
53 + argv = sys.argv[:]
54 +
55 + if sys.hexversion >= 0x3000000:
56 + # We can't trust that the filesystem encoding (locale dependent)
57 + # correctly matches the arguments, so use surrogateescape to
58 + # pass through the original argv bytes for Python 3.
59 + fs_encoding = sys.getfilesystemencoding()
60 + argv = [x.encode(fs_encoding, 'surrogateescape') for x in argv]
61 +
62 + for x, arg in enumerate(argv):
63 + try:
64 + argv[x] = _unicode_decode(arg, errors='strict')
65 + except UnicodeDecodeError:
66 + writemsg('dohtml: argument is not encoded as UTF-8: %s\n' %
67 + _unicode_decode(arg), noiselevel=-1)
68 + sys.exit(1)
69 +
70 options = OptionsClass()
71 args = []
72
73 x = 1
74 - while x < len(sys.argv):
75 - arg = sys.argv[x]
76 + while x < len(argv):
77 + arg = argv[x]
78 if arg in ["-h","-r","-V"]:
79 if arg == "-h":
80 print_help()
81 @@ -169,17 +192,17 @@ def parse_args():
82 options.recurse = True
83 elif arg == "-V":
84 options.verbose = True
85 - elif sys.argv[x] in ["-A","-a","-f","-x","-p"]:
86 + elif argv[x] in ["-A","-a","-f","-x","-p"]:
87 x += 1
88 - if x == len(sys.argv):
89 + if x == len(argv):
90 print_help()
91 sys.exit(0)
92 elif arg == "-p":
93 - options.doc_prefix = sys.argv[x]
94 + options.doc_prefix = argv[x]
95 if options.doc_prefix:
96 options.doc_prefix = normalize_path(options.doc_prefix)
97 else:
98 - values = sys.argv[x].split(",")
99 + values = argv[x].split(",")
100 if arg == "-A":
101 options.allowed_exts.extend(values)
102 elif arg == "-a":
103 @@ -189,7 +212,7 @@ def parse_args():
104 elif arg == "-x":
105 options.disallowed_dirs = values
106 else:
107 - args.append(sys.argv[x])
108 + args.append(argv[x])
109 x += 1
110
111 return (options, args)
112 --
113 2.4.6

Replies