Gentoo Archives: gentoo-portage-dev

From: Mike Frysinger <vapier@g.o>
To: gentoo-portage-dev@l.g.o
Subject: [gentoo-portage-dev] [PATCH v3] repoman: unroll escaped lines so we can check the entirety of it
Date: Fri, 25 May 2012 06:10:18
Message-Id: 1337923370-32699-1-git-send-email-vapier@gentoo.org
In Reply to: [gentoo-portage-dev] [RFC/PATCH] repoman: unroll escaped lines so we can check the entirety of it by Mike Frysinger
1 Sometimes people wrap long lines in their ebuilds to make it easier to
2 read, but this causes us issues when doing line-by-line checking. So
3 automatically unroll those lines before passing the full content down
4 to our checkers.
5
6 Signed-off-by: Mike Frysinger <vapier@g.o>
7 ---
8 v3
9 - use import codecs for escaping strings
10
11 pym/repoman/checks.py | 63 +++++++++++++++++++++++++++++++++++++++---------
12 1 files changed, 51 insertions(+), 12 deletions(-)
13
14 diff --git a/pym/repoman/checks.py b/pym/repoman/checks.py
15 index c17a0bd..402169e 100644
16 --- a/pym/repoman/checks.py
17 +++ b/pym/repoman/checks.py
18 @@ -5,6 +5,7 @@
19 """This module contains functions used in Repoman to ascertain the quality
20 and correctness of an ebuild."""
21
22 +import codecs
23 import re
24 import time
25 import repoman.errors as errors
26 @@ -757,8 +758,11 @@ _here_doc_re = re.compile(r'.*\s<<[-]?(\w+)$')
27 _ignore_comment_re = re.compile(r'^\s*#')
28
29 def run_checks(contents, pkg):
30 + unicode_escape_codec = codecs.lookup('unicode_escape')
31 + unicode_escape = lambda x: unicode_escape_codec.decode(x)[0]
32 checks = _constant_checks
33 here_doc_delim = None
34 + multiline = None
35
36 for lc in checks:
37 lc.new(pkg)
38 @@ -772,19 +776,54 @@ def run_checks(contents, pkg):
39 here_doc = _here_doc_re.match(line)
40 if here_doc is not None:
41 here_doc_delim = re.compile(r'^\s*%s$' % here_doc.group(1))
42 + if here_doc_delim is not None:
43 + continue
44 +
45 + # Unroll multiline escaped strings so that we can check things:
46 + # inherit foo bar \
47 + # moo \
48 + # cow
49 + # This will merge these lines like so:
50 + # inherit foo bar moo cow
51 + try:
52 + # A normal line will end in the two bytes: <\> <\n>. So decoding
53 + # that will result in python thinking the <\n> is being escaped
54 + # and eat the single <\> which makes it hard for us to detect.
55 + # Instead, strip the newline (which we know all lines have), and
56 + # append a <0>. Then when python escapes it, if the line ended
57 + # in a <\>, we'll end up with a <\0> marker to key off of. This
58 + # shouldn't be a problem with any valid ebuild ...
59 + line_escaped = unicode_escape(line.rstrip('\n') + '0')
60 + except:
61 + # Who knows what kind of crazy crap an ebuild will have
62 + # in it -- don't allow it to kill us.
63 + line_escaped = line
64 + if multiline:
65 + # Chop off the \ and \n bytes from the previous line.
66 + multiline = multiline[:-2] + line
67 + if not line_escaped.endswith('\0'):
68 + line = multiline
69 + num = multinum
70 + multiline = None
71 + else:
72 + continue
73 + else:
74 + if line_escaped.endswith('\0'):
75 + multinum = num
76 + multiline = line
77 + continue
78
79 - if here_doc_delim is None:
80 - # We're not in a here-document.
81 - is_comment = _ignore_comment_re.match(line) is not None
82 - for lc in checks:
83 - if is_comment and lc.ignore_comment:
84 - continue
85 - if lc.check_eapi(pkg.metadata['EAPI']):
86 - ignore = lc.ignore_line
87 - if not ignore or not ignore.match(line):
88 - e = lc.check(num, line)
89 - if e:
90 - yield lc.repoman_check_name, e % (num + 1)
91 + # Finally we have a full line to parse.
92 + is_comment = _ignore_comment_re.match(line) is not None
93 + for lc in checks:
94 + if is_comment and lc.ignore_comment:
95 + continue
96 + if lc.check_eapi(pkg.metadata['EAPI']):
97 + ignore = lc.ignore_line
98 + if not ignore or not ignore.match(line):
99 + e = lc.check(num, line)
100 + if e:
101 + yield lc.repoman_check_name, e % (num + 1)
102
103 for lc in checks:
104 i = lc.end()
105 --
106 1.7.8.6

Replies