1 |
commit: 66df1d045a64f8ad6453d9668cdb66980c128b69 |
2 |
Author: Zac Medico <zmedico <AT> gentoo <DOT> org> |
3 |
AuthorDate: Sat Jul 8 19:44:40 2017 +0000 |
4 |
Commit: Zac Medico <zmedico <AT> gentoo <DOT> org> |
5 |
CommitDate: Sun Jul 9 17:18:03 2017 +0000 |
6 |
URL: https://gitweb.gentoo.org/proj/portage.git/commit/?id=66df1d04 |
7 |
|
8 |
fuzzy search: weigh category similarity independently (bug 623648) |
9 |
|
10 |
Weigh the similarity of category and package names independently, |
11 |
in order to avoid matching lots of irrelevant packages in the same |
12 |
category when the package name is much shorter than the category |
13 |
name. |
14 |
|
15 |
X-Gentoo-bug: 623648 |
16 |
X-Gentoo-bug-url: https://bugs.gentoo.org/show_bug.cgi?id=623648 |
17 |
Acked-by: Brian Dolbec <dolsen <AT> gentoo.org> |
18 |
|
19 |
pym/_emerge/search.py | 24 +++++++++++++++++++++--- |
20 |
1 file changed, 21 insertions(+), 3 deletions(-) |
21 |
|
22 |
diff --git a/pym/_emerge/search.py b/pym/_emerge/search.py |
23 |
index 20a0c026e..dc91ad315 100644 |
24 |
--- a/pym/_emerge/search.py |
25 |
+++ b/pym/_emerge/search.py |
26 |
@@ -264,15 +264,33 @@ class search(object): |
27 |
if self.fuzzy: |
28 |
fuzzy = True |
29 |
cutoff = float(self.search_similarity) / 100 |
30 |
- seq_match = difflib.SequenceMatcher() |
31 |
- seq_match.set_seq2(self.searchkey.lower()) |
32 |
+ if match_category: |
33 |
+ # Weigh the similarity of category and package |
34 |
+ # names independently, in order to avoid matching |
35 |
+ # lots of irrelevant packages in the same category |
36 |
+ # when the package name is much shorter than the |
37 |
+ # category name. |
38 |
+ part_split = portage.catsplit |
39 |
+ else: |
40 |
+ part_split = lambda match_string: (match_string,) |
41 |
|
42 |
- def fuzzy_search(match_string): |
43 |
+ part_matchers = [] |
44 |
+ for part in part_split(self.searchkey): |
45 |
+ seq_match = difflib.SequenceMatcher() |
46 |
+ seq_match.set_seq2(part.lower()) |
47 |
+ part_matchers.append(seq_match) |
48 |
+ |
49 |
+ def fuzzy_search_part(seq_match, match_string): |
50 |
seq_match.set_seq1(match_string.lower()) |
51 |
return (seq_match.real_quick_ratio() >= cutoff and |
52 |
seq_match.quick_ratio() >= cutoff and |
53 |
seq_match.ratio() >= cutoff) |
54 |
|
55 |
+ def fuzzy_search(match_string): |
56 |
+ return all(fuzzy_search_part(seq_match, part) |
57 |
+ for seq_match, part in zip( |
58 |
+ part_matchers, part_split(match_string))) |
59 |
+ |
60 |
for package in self._cp_all(): |
61 |
self._spinner_update() |