1 |
commit: 8dfea24b40c34292f20ab60975d3585094b70cb0 |
2 |
Author: Antanas Uršulis <antanas.ursulis <AT> gmail <DOT> com> |
3 |
AuthorDate: Mon Jul 29 16:02:17 2013 +0000 |
4 |
Commit: Antanas Ursulis <antanas.ursulis <AT> gmail <DOT> com> |
5 |
CommitDate: Mon Jul 29 16:02:17 2013 +0000 |
6 |
URL: http://git.overlays.gentoo.org/gitweb/?p=proj/log-analysis.git;a=commit;h=8dfea24b |
7 |
|
8 |
Introduce processors and PortageProcessor. |
9 |
|
10 |
A processor is initialised with a database and storage provider. It |
11 |
should implement the process(request, source) method, where request is a |
12 |
protobuf Submission message. process() should analyse the received |
13 |
files, perform any required transformations and should usually store the |
14 |
files and create appropriate database entries. |
15 |
|
16 |
Processors are multiplexed through the 'provider' variable in the |
17 |
protobuf Submission message. This allows to process/analyse various |
18 |
types of logs differently. |
19 |
|
20 |
--- |
21 |
flask_app.py | 8 +++--- |
22 |
portage_processor.py | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++ |
23 |
simple_client.py | 1 + |
24 |
submission.proto | 3 ++- |
25 |
4 files changed, 82 insertions(+), 4 deletions(-) |
26 |
|
27 |
diff --git a/flask_app.py b/flask_app.py |
28 |
index ce55c38..832702c 100644 |
29 |
--- a/flask_app.py |
30 |
+++ b/flask_app.py |
31 |
@@ -7,8 +7,11 @@ import os, socket |
32 |
import submission_pb2, storage |
33 |
from flask import Flask, request |
34 |
|
35 |
+from portage_processor import PortageProcessor |
36 |
+ |
37 |
app = Flask(__name__) |
38 |
store = storage.FilesystemStorage('logs/') |
39 |
+processors = {'portage' : PortageProcessor(None, store)} # TODO: initialise from config file |
40 |
|
41 |
@app.route('/') |
42 |
def index(): |
43 |
@@ -19,9 +22,8 @@ def submit(): |
44 |
submission = submission_pb2.Submission() |
45 |
submission.ParseFromString(request.data) |
46 |
source = socket.getfqdn(request.remote_addr) # TODO: is this ok? |
47 |
- # TODO: pass through analyser |
48 |
- for f in submission.files: |
49 |
- store.save_file(source, f.filename, f.data) |
50 |
+ |
51 |
+ processors[submission.provider].process(submission, source) |
52 |
return '' |
53 |
|
54 |
if __name__ == '__main__': |
55 |
|
56 |
diff --git a/portage_processor.py b/portage_processor.py |
57 |
new file mode 100644 |
58 |
index 0000000..2403cdf |
59 |
--- /dev/null |
60 |
+++ b/portage_processor.py |
61 |
@@ -0,0 +1,74 @@ |
62 |
+import re, StringIO |
63 |
+ |
64 |
+class PortageProcessor: |
65 |
+ _r = { |
66 |
+ 'warnings' : re.compile(r"(Tinderbox QA Warning!|QA Notice: (Pre-stripped|file does not exist|command not found|USE flag|Files built without respecting|The following files)|linux_config_exists|will always overflow|called with bigger|maintainer mode detected|econf called in src_compile|udev rules should be installed)"), |
67 |
+ 'testfailed' : re.compile(r"^ \* ERROR: .* failed \(test phase\):"), |
68 |
+ 'failed' : re.compile(r"^ \* ERROR: .* failed"), |
69 |
+ 'collision' : re.compile(r"Detected file collision"), |
70 |
+ 'maintainer' : re.compile(r"^ \* Maintainer: ([a-zA-Z0-9.@_+-]+)(?: ([a-zA-Z0-9.@_+,-]+))?$"), |
71 |
+ 'escapes' : re.compile(r"\x1b\[[^\x40-\x7e]*[\x40-\x7e]") |
72 |
+ } |
73 |
+ |
74 |
+ def __init__(self, db, storage): |
75 |
+ self.db = db |
76 |
+ self.storage = storage |
77 |
+ |
78 |
+ def process(self, request, source): |
79 |
+ for f in request.files: |
80 |
+ matches = 0 |
81 |
+ pkg_failed = False |
82 |
+ test_failed = False |
83 |
+ collision = False |
84 |
+ bug_assignee = 'bug-wranglers@g.o' |
85 |
+ bug_cc = '' |
86 |
+ |
87 |
+ # TODO: look at proper HTML generation methods: |
88 |
+ # (*) either XHTML via xml.etree |
89 |
+ # (*) or Jinja2 (is it possible to parse and generate in one pass?) |
90 |
+ output = StringIO.StringIO() |
91 |
+ output.write('''\ |
92 |
+<!doctype html> |
93 |
+<html> |
94 |
+ <head> |
95 |
+ <link rel="stylesheet" type="text/css" href="htmlgrep.css"> |
96 |
+ </head> |
97 |
+ <body> |
98 |
+ <ol> |
99 |
+''') |
100 |
+ |
101 |
+ for line in f.data.split("\n"): |
102 |
+ match = False |
103 |
+ |
104 |
+ line = self._r['escapes'].sub('', line) |
105 |
+ |
106 |
+ if self._r['warnings'].search(line): |
107 |
+ match = True |
108 |
+ elif self._r['testfailed'].search(line): |
109 |
+ test_failed = True |
110 |
+ match = True |
111 |
+ elif self._r['failed'].search(line): |
112 |
+ pkg_failed = True |
113 |
+ match = True |
114 |
+ elif self._r['collision'].search(line): |
115 |
+ pkg_failed = True |
116 |
+ collision = True |
117 |
+ match = True |
118 |
+ else: |
119 |
+ m = self._r['maintainer'].search(line) |
120 |
+ if m: |
121 |
+ bug_assignee, bug_cc = m.group(1, 2) |
122 |
+ |
123 |
+ if match: |
124 |
+ matches += 1 |
125 |
+ output.write('\t'*3 + '<li class="match">' + line + '</li>\n') |
126 |
+ else: |
127 |
+ output.write('\t'*3 + '<li>' + line + '</li>\n') |
128 |
+ |
129 |
+ output.write('''\ |
130 |
+ </ol> |
131 |
+ </body> |
132 |
+</html> |
133 |
+''') |
134 |
+ |
135 |
+ self.storage.save_file(source, f.filename, output.getvalue()) |
136 |
|
137 |
diff --git a/simple_client.py b/simple_client.py |
138 |
index 99a4116..ab4bccf 100644 |
139 |
--- a/simple_client.py |
140 |
+++ b/simple_client.py |
141 |
@@ -6,6 +6,7 @@ import submission_pb2, sys, urllib2, os |
142 |
|
143 |
def send_submission(filenames): |
144 |
submission = submission_pb2.Submission() |
145 |
+ submission.provider = "portage" |
146 |
|
147 |
for f in filenames: |
148 |
new_file = submission.files.add() |
149 |
|
150 |
diff --git a/submission.proto b/submission.proto |
151 |
index b06310f..42cf97c 100644 |
152 |
--- a/submission.proto |
153 |
+++ b/submission.proto |
154 |
@@ -4,5 +4,6 @@ message Submission { |
155 |
required bytes data = 2; |
156 |
} |
157 |
|
158 |
- repeated File files = 1; |
159 |
+ required string provider = 1; |
160 |
+ repeated File files = 2; |
161 |
} |