1 |
On Sat, 8 Jul 2017 13:03:58 -0700 |
2 |
Zac Medico <zmedico@g.o> wrote: |
3 |
|
4 |
> Weigh the similarity of category and package names independently, |
5 |
> in order to avoid matching lots of irrelevant packages in the same |
6 |
> category when the package name is much shorter than the category |
7 |
> name. |
8 |
> |
9 |
> X-Gentoo-bug: 623648 |
10 |
> X-Gentoo-bug-url: https://bugs.gentoo.org/show_bug.cgi?id=623648 |
11 |
> --- |
12 |
> pym/_emerge/search.py | 24 +++++++++++++++++++++--- |
13 |
> 1 file changed, 21 insertions(+), 3 deletions(-) |
14 |
> |
15 |
> diff --git a/pym/_emerge/search.py b/pym/_emerge/search.py |
16 |
> index 20a0c026e..dc91ad315 100644 |
17 |
> --- a/pym/_emerge/search.py |
18 |
> +++ b/pym/_emerge/search.py |
19 |
> @@ -264,15 +264,33 @@ class search(object): |
20 |
> if self.fuzzy: |
21 |
> fuzzy = True |
22 |
> cutoff = |
23 |
> float(self.search_similarity) / 100 |
24 |
> - seq_match = difflib.SequenceMatcher() |
25 |
> - |
26 |
> seq_match.set_seq2(self.searchkey.lower()) |
27 |
> + if match_category: |
28 |
> + # Weigh the similarity of |
29 |
> category and package |
30 |
> + # names independently, in |
31 |
> order to avoid matching |
32 |
> + # lots of irrelevant |
33 |
> packages in the same category |
34 |
> + # when the package name is |
35 |
> much shorter than the |
36 |
> + # category name. |
37 |
> + part_split = portage.catsplit |
38 |
> + else: |
39 |
> + part_split = lambda |
40 |
> match_string: (match_string,) |
41 |
> - def fuzzy_search(match_string): |
42 |
> + part_matchers = [] |
43 |
> + for part in |
44 |
> part_split(self.searchkey): |
45 |
> + seq_match = |
46 |
> difflib.SequenceMatcher() |
47 |
> + |
48 |
> seq_match.set_seq2(part.lower()) |
49 |
> + |
50 |
> part_matchers.append(seq_match) + |
51 |
> + def fuzzy_search_part(seq_match, |
52 |
> match_string): seq_match.set_seq1(match_string.lower()) |
53 |
> return |
54 |
> (seq_match.real_quick_ratio() >= cutoff and seq_match.quick_ratio() |
55 |
> >= cutoff and seq_match.ratio() >= cutoff) |
56 |
> |
57 |
> + def fuzzy_search(match_string): |
58 |
> + return |
59 |
> all(fuzzy_search_part(seq_match, part) |
60 |
> + for seq_match, part |
61 |
> in zip( |
62 |
> + part_matchers, |
63 |
> part_split(match_string))) + |
64 |
> for package in self._cp_all(): |
65 |
> self._spinner_update() |
66 |
> |
67 |
|
68 |
|
69 |
looks fine |
70 |
|
71 |
-- |
72 |
Brian Dolbec <dolsen> |