Gentoo Archives: gentoo-portage-dev

From: Zac Medico <zmedico@g.o>
To: gentoo-portage-dev@l.g.o
Cc: Zac Medico <zmedico@g.o>
Subject: [gentoo-portage-dev] [PATCH] repoman: use regular expression to detect line continuations
Date: Wed, 22 Feb 2017 00:32:32
Message-Id: 20170222003156.25319-1-zmedico@gentoo.org
1 Use a regular expression to detect line continuations, instead
2 of the unicode_escape codec, since the unicode_escape codec is
3 not really intended to be used this way.
4
5 This solves an issue with python3.6, where a DeprecationWarning
6 is triggered by ebuilds containing escape sequences, like this
7 warning triggered by a sed expression in the dev-db/sqlite
8 ebuilds:
9
10 DeprecationWarning: invalid escape sequence '\['
11 ---
12 repoman/pym/repoman/modules/scan/ebuild/checks.py | 28 +++++++----------------
13 1 file changed, 8 insertions(+), 20 deletions(-)
14
15 diff --git a/repoman/pym/repoman/modules/scan/ebuild/checks.py b/repoman/pym/repoman/modules/scan/ebuild/checks.py
16 index 15e2251..d21bf0c 100644
17 --- a/repoman/pym/repoman/modules/scan/ebuild/checks.py
18 +++ b/repoman/pym/repoman/modules/scan/ebuild/checks.py
19 @@ -8,8 +8,8 @@ and correctness of an ebuild."""
20
21 from __future__ import unicode_literals
22
23 -import codecs
24 from itertools import chain
25 +import operator
26 import re
27 import time
28
29 @@ -923,11 +923,10 @@ def checks_init(experimental_inherit=False):
30
31 _here_doc_re = re.compile(r'.*<<[-]?(\w+)\s*(>\s*\S+\s*)?$')
32 _ignore_comment_re = re.compile(r'^\s*#')
33 +_continuation_re = re.compile(r'(\\)*$')
34
35
36 def run_checks(contents, pkg):
37 - unicode_escape_codec = codecs.lookup('unicode_escape')
38 - unicode_escape = lambda x: unicode_escape_codec.decode(x)[0]
39 if _constant_checks is None:
40 checks_init()
41 checks = _constant_checks
42 @@ -957,32 +956,21 @@ def run_checks(contents, pkg):
43 # cow
44 # This will merge these lines like so:
45 # inherit foo bar moo cow
46 - try:
47 - # A normal line will end in the two bytes: <\> <\n>. So decoding
48 - # that will result in python thinking the <\n> is being escaped
49 - # and eat the single <\> which makes it hard for us to detect.
50 - # Instead, strip the newline (which we know all lines have), and
51 - # append a <0>. Then when python escapes it, if the line ended
52 - # in a <\>, we'll end up with a <\0> marker to key off of. This
53 - # shouldn't be a problem with any valid ebuild ...
54 - line_escaped = unicode_escape(line.rstrip('\n') + '0')
55 - except SystemExit:
56 - raise
57 - except:
58 - # Who knows what kind of crazy crap an ebuild will have
59 - # in it -- don't allow it to kill us.
60 - line_escaped = line
61 + # A line ending with an even number of backslashes does not count,
62 + # because the last backslash is escaped. Therefore, search for an
63 + # odd number of backslashes.
64 + line_escaped = operator.sub(*_continuation_re.search(line).span()) % 2 == 1
65 if multiline:
66 # Chop off the \ and \n bytes from the previous line.
67 multiline = multiline[:-2] + line
68 - if not line_escaped.endswith('\0'):
69 + if not line_escaped:
70 line = multiline
71 num = multinum
72 multiline = None
73 else:
74 continue
75 else:
76 - if line_escaped.endswith('\0'):
77 + if line_escaped:
78 multinum = num
79 multiline = line
80 continue
81 --
82 2.10.2

Replies