1 |
Sometimes people wrap long lines in their ebuilds to make it easier to |
2 |
read, but this causes us issues when doing line-by-line checking. So |
3 |
automatically unroll those lines before passing the full content down |
4 |
to our checkers. |
5 |
|
6 |
Signed-off-by: Mike Frysinger <vapier@g.o> |
7 |
--- |
8 |
v3 |
9 |
- use import codecs for escaping strings |
10 |
|
11 |
pym/repoman/checks.py | 63 +++++++++++++++++++++++++++++++++++++++--------- |
12 |
1 files changed, 51 insertions(+), 12 deletions(-) |
13 |
|
14 |
diff --git a/pym/repoman/checks.py b/pym/repoman/checks.py |
15 |
index c17a0bd..402169e 100644 |
16 |
--- a/pym/repoman/checks.py |
17 |
+++ b/pym/repoman/checks.py |
18 |
@@ -5,6 +5,7 @@ |
19 |
"""This module contains functions used in Repoman to ascertain the quality |
20 |
and correctness of an ebuild.""" |
21 |
|
22 |
+import codecs |
23 |
import re |
24 |
import time |
25 |
import repoman.errors as errors |
26 |
@@ -757,8 +758,11 @@ _here_doc_re = re.compile(r'.*\s<<[-]?(\w+)$') |
27 |
_ignore_comment_re = re.compile(r'^\s*#') |
28 |
|
29 |
def run_checks(contents, pkg): |
30 |
+ unicode_escape_codec = codecs.lookup('unicode_escape') |
31 |
+ unicode_escape = lambda x: unicode_escape_codec.decode(x)[0] |
32 |
checks = _constant_checks |
33 |
here_doc_delim = None |
34 |
+ multiline = None |
35 |
|
36 |
for lc in checks: |
37 |
lc.new(pkg) |
38 |
@@ -772,19 +776,54 @@ def run_checks(contents, pkg): |
39 |
here_doc = _here_doc_re.match(line) |
40 |
if here_doc is not None: |
41 |
here_doc_delim = re.compile(r'^\s*%s$' % here_doc.group(1)) |
42 |
+ if here_doc_delim is not None: |
43 |
+ continue |
44 |
+ |
45 |
+ # Unroll multiline escaped strings so that we can check things: |
46 |
+ # inherit foo bar \ |
47 |
+ # moo \ |
48 |
+ # cow |
49 |
+ # This will merge these lines like so: |
50 |
+ # inherit foo bar moo cow |
51 |
+ try: |
52 |
+ # A normal line will end in the two bytes: <\> <\n>. So decoding |
53 |
+ # that will result in python thinking the <\n> is being escaped |
54 |
+ # and eat the single <\> which makes it hard for us to detect. |
55 |
+ # Instead, strip the newline (which we know all lines have), and |
56 |
+ # append a <0>. Then when python escapes it, if the line ended |
57 |
+ # in a <\>, we'll end up with a <\0> marker to key off of. This |
58 |
+ # shouldn't be a problem with any valid ebuild ... |
59 |
+ line_escaped = unicode_escape(line.rstrip('\n') + '0') |
60 |
+ except: |
61 |
+ # Who knows what kind of crazy crap an ebuild will have |
62 |
+ # in it -- don't allow it to kill us. |
63 |
+ line_escaped = line |
64 |
+ if multiline: |
65 |
+ # Chop off the \ and \n bytes from the previous line. |
66 |
+ multiline = multiline[:-2] + line |
67 |
+ if not line_escaped.endswith('\0'): |
68 |
+ line = multiline |
69 |
+ num = multinum |
70 |
+ multiline = None |
71 |
+ else: |
72 |
+ continue |
73 |
+ else: |
74 |
+ if line_escaped.endswith('\0'): |
75 |
+ multinum = num |
76 |
+ multiline = line |
77 |
+ continue |
78 |
|
79 |
- if here_doc_delim is None: |
80 |
- # We're not in a here-document. |
81 |
- is_comment = _ignore_comment_re.match(line) is not None |
82 |
- for lc in checks: |
83 |
- if is_comment and lc.ignore_comment: |
84 |
- continue |
85 |
- if lc.check_eapi(pkg.metadata['EAPI']): |
86 |
- ignore = lc.ignore_line |
87 |
- if not ignore or not ignore.match(line): |
88 |
- e = lc.check(num, line) |
89 |
- if e: |
90 |
- yield lc.repoman_check_name, e % (num + 1) |
91 |
+ # Finally we have a full line to parse. |
92 |
+ is_comment = _ignore_comment_re.match(line) is not None |
93 |
+ for lc in checks: |
94 |
+ if is_comment and lc.ignore_comment: |
95 |
+ continue |
96 |
+ if lc.check_eapi(pkg.metadata['EAPI']): |
97 |
+ ignore = lc.ignore_line |
98 |
+ if not ignore or not ignore.match(line): |
99 |
+ e = lc.check(num, line) |
100 |
+ if e: |
101 |
+ yield lc.repoman_check_name, e % (num + 1) |
102 |
|
103 |
for lc in checks: |
104 |
i = lc.end() |
105 |
-- |
106 |
1.7.8.6 |