Gentoo Archives: gentoo-portage-dev

From: Zac Medico <zmedico@g.o>
To: gentoo-portage-dev@l.g.o
Cc: Zac Medico <zmedico@g.o>
Subject: [gentoo-portage-dev] [PATCH] filter-bash-environment.py: use buffered input, raw bytes (bug 647654)
Date: Wed, 14 Feb 2018 20:38:54
Message-Id: 20180214203828.30111-1-zmedico@gentoo.org
1 Use sys.stdin.buffer instead of sys.stdin.buffer.raw, for buffered input.
2 Also use raw bytes instead of unicode strings, in order to avoid making
3 assumptions about character encodings, and also to avoid overhead from
4 unicode decoding/encoding.
5
6 Bug: https://bugs.gentoo.org/647654
7 ---
8 bin/filter-bash-environment.py | 45 ++++++++++++++++++++----------------------
9 1 file changed, 21 insertions(+), 24 deletions(-)
10
11 diff --git a/bin/filter-bash-environment.py b/bin/filter-bash-environment.py
12 index a4cdc5429..91c194b95 100755
13 --- a/bin/filter-bash-environment.py
14 +++ b/bin/filter-bash-environment.py
15 @@ -2,21 +2,19 @@
16 # Copyright 1999-2014 Gentoo Foundation
17 # Distributed under the terms of the GNU General Public License v2
18
19 -import codecs
20 -import io
21 import os
22 import re
23 import sys
24
25 -here_doc_re = re.compile(r'.*\s<<[-]?(\w+)$')
26 -func_start_re = re.compile(r'^[-\w]+\s*\(\)\s*$')
27 -func_end_re = re.compile(r'^\}$')
28 +here_doc_re = re.compile(br'.*\s<<[-]?(\w+)$')
29 +func_start_re = re.compile(br'^[-\w]+\s*\(\)\s*$')
30 +func_end_re = re.compile(br'^\}$')
31
32 -var_assign_re = re.compile(r'(^|^declare\s+-\S+\s+|^declare\s+|^export\s+)([^=\s]+)=("|\')?.*$')
33 -close_quote_re = re.compile(r'(\\"|"|\')\s*$')
34 -readonly_re = re.compile(r'^declare\s+-(\S*)r(\S*)\s+')
35 +var_assign_re = re.compile(br'(^|^declare\s+-\S+\s+|^declare\s+|^export\s+)([^=\s]+)=("|\')?.*$')
36 +close_quote_re = re.compile(br'(\\"|"|\')\s*$')
37 +readonly_re = re.compile(br'^declare\s+-(\S*)r(\S*)\s+')
38 # declare without assignment
39 -var_declare_re = re.compile(r'^declare(\s+-\S+)?\s+([^=\s]+)\s*$')
40 +var_declare_re = re.compile(br'^declare(\s+-\S+)?\s+([^=\s]+)\s*$')
41
42 def have_end_quote(quote, line):
43 """
44 @@ -32,16 +30,16 @@ def have_end_quote(quote, line):
45 def filter_declare_readonly_opt(line):
46 readonly_match = readonly_re.match(line)
47 if readonly_match is not None:
48 - declare_opts = ''
49 + declare_opts = b''
50 for i in (1, 2):
51 group = readonly_match.group(i)
52 if group is not None:
53 declare_opts += group
54 if declare_opts:
55 - line = 'declare -%s %s' % \
56 + line = b'declare -%s %s' % \
57 (declare_opts, line[readonly_match.end():])
58 else:
59 - line = 'declare ' + line[readonly_match.end():]
60 + line = b'declare ' + line[readonly_match.end():]
61 return line
62
63 def filter_bash_environment(pattern, file_in, file_out):
64 @@ -57,7 +55,7 @@ def filter_bash_environment(pattern, file_in, file_out):
65 for line in file_in:
66 if multi_line_quote is not None:
67 if not multi_line_quote_filter:
68 - file_out.write(line.replace("\1", ""))
69 + file_out.write(line.replace(b"\1", b""))
70 if have_end_quote(multi_line_quote, line):
71 multi_line_quote = None
72 multi_line_quote_filter = None
73 @@ -78,7 +76,7 @@ def filter_bash_environment(pattern, file_in, file_out):
74 multi_line_quote_filter = filter_this
75 if not filter_this:
76 line = filter_declare_readonly_opt(line)
77 - file_out.write(line.replace("\1", ""))
78 + file_out.write(line.replace(b"\1", b""))
79 continue
80 else:
81 declare_match = var_declare_re.match(line)
82 @@ -98,7 +96,7 @@ def filter_bash_environment(pattern, file_in, file_out):
83 continue
84 here_doc = here_doc_re.match(line)
85 if here_doc is not None:
86 - here_doc_delim = re.compile("^%s$" % here_doc.group(1))
87 + here_doc_delim = re.compile(b"^%s$" % here_doc.group(1))
88 file_out.write(line)
89 continue
90 # Note: here-documents are handled before functions since otherwise
91 @@ -141,18 +139,17 @@ if __name__ == "__main__":
92 file_in = sys.stdin
93 file_out = sys.stdout
94 if sys.hexversion >= 0x3000000:
95 - file_in = codecs.iterdecode(sys.stdin.buffer.raw,
96 - 'utf_8', errors='replace')
97 - file_out = io.TextIOWrapper(sys.stdout.buffer,
98 - 'utf_8', errors='backslashreplace')
99 -
100 - var_pattern = args[0].split()
101 + file_in = sys.stdin.buffer
102 + file_out = sys.stdout.buffer
103 + var_pattern = os.fsencode(args[0]).split()
104 + else:
105 + var_pattern = args[0].split()
106
107 # Filter invalid variable names that are not supported by bash.
108 - var_pattern.append(r'\d.*')
109 - var_pattern.append(r'.*\W.*')
110 + var_pattern.append(br'\d.*')
111 + var_pattern.append(br'.*\W.*')
112
113 - var_pattern = "^(%s)$" % "|".join(var_pattern)
114 + var_pattern = b"^(%s)$" % b"|".join(var_pattern)
115 filter_bash_environment(
116 re.compile(var_pattern), file_in, file_out)
117 file_out.flush()
118 --
119 2.13.6

Replies