Gentoo Archives: gentoo-commits

From: "Jesus Rivero (neurogeek)" <neurogeek@g.o>
To: gentoo-commits@l.g.o
Subject: [gentoo-commits] gentoo-x86 commit in dev-python/reverend/files: reverend-0.3-email.patch
Date: Fri, 28 Nov 2008 01:39:23
Message-Id: E1L5sKG-00035f-Je@stork.gentoo.org
1 neurogeek 08/11/28 01:39:20
2
3 Added: reverend-0.3-email.patch
4 Log:
5 Initial commit. Thanks to David Guerizec for ebuild
6 (Portage version: 2.2_rc12/cvs/Linux 2.6.18-gentoo-r3 i686)
7
8 Revision Changes Path
9 1.1 dev-python/reverend/files/reverend-0.3-email.patch
10
11 file : http://sources.gentoo.org/viewcvs.py/gentoo-x86/dev-python/reverend/files/reverend-0.3-email.patch?rev=1.1&view=markup
12 plain: http://sources.gentoo.org/viewcvs.py/gentoo-x86/dev-python/reverend/files/reverend-0.3-email.patch?rev=1.1&content-type=text/plain
13
14 Index: reverend-0.3-email.patch
15 ===================================================================
16 --- reverend/guessers/email.py 2006-04-25 00:15:27.000000000 +0200
17 +++ reverend/guessers/email.py 2006-04-25 01:12:16.000000000 +0200
18 @@ -9,7 +9,6 @@
19 import email
20
21 from reverend.thomas import Bayes
22 -from reverend.splitter import Splitter
23
24
25 class EmailClassifier(Bayes):
26 @@ -19,19 +18,22 @@
27 # This should return a list of strings
28 # which will be used as the key into
29 # the table of token counts
30 - tokens = self.getHeaderTokens(msg)
31 - tokens += self.getBodyTokens(msg)
32 -
33 + for tok in self.getHeaderTokens(msg):
34 + yield tok
35 +
36 + for tok in self.getBodyTokens(msg):
37 + yield tok
38 +
39 # Get some tokens that are generated from the
40 # header and the structure
41 - tokens += self.getMetaTokens(msg)
42 - return tokens
43 + for tok in self.getMetaTokens(msg):
44 + yield tok
45
46 def getBodyTokens(self, msg):
47 text = self.getTextPlain(msg)
48 if text is None:
49 text = ''
50 - tl = self.splitter.split(text)
51 + tl = self._tokenizer.tokenize(text)
52 return tl
53
54 def getHeaderTokens(self, msg):
55 @@ -40,12 +42,12 @@
56 text += msg.get('from','fromnoone') + ' '
57 text += msg.get('to','tonoone') + ' '
58 text += msg.get('cc','ccnoone') + ' '
59 - tl = self.splitter.split(text)
60 + tl = self._tokenizer.tokenize(text)
61 return tl
62
63 def getTextPlain(self, msg):
64 for part in msg.walk():
65 - typ = part.get_type()
66 + typ = part.get_content_type()
67 if typ and typ.lower() == "text/plain":
68 text = part.get_payload(decode=True)
69 return text
70 @@ -53,7 +55,7 @@
71
72 def getTextHtml(self, msg):
73 for part in msg.walk():
74 - typ = part.get_type()
75 + typ = part.get_content_type()
76 if typ and typ.lower() == "text/html":
77 text = part.get_payload(decode=False)
78 return text