1 |
Add --fuzzy-search option, and --search-similarity option to adjust |
2 |
the minimum similarity for search results (defaults to 80%). |
3 |
|
4 |
X-Gentoo-bug: 65566 |
5 |
X-Gentoo-bug-url: https://bugs.gentoo.org/show_bug.cgi?id=65566 |
6 |
--- |
7 |
man/emerge.1 | 14 ++++++++++++++ |
8 |
pym/_emerge/actions.py | 7 +++++-- |
9 |
pym/_emerge/main.py | 32 +++++++++++++++++++++++++++++++- |
10 |
pym/_emerge/search.py | 26 ++++++++++++++++++++++++-- |
11 |
4 files changed, 74 insertions(+), 5 deletions(-) |
12 |
|
13 |
diff --git a/man/emerge.1 b/man/emerge.1 |
14 |
index da1d852..7442220 100644 |
15 |
--- a/man/emerge.1 |
16 |
+++ b/man/emerge.1 |
17 |
@@ -565,6 +565,14 @@ packages (fetch things from SRC_URI based upon USE setting). |
18 |
Instead of doing any package building, just perform fetches for all |
19 |
packages (fetch everything in SRC_URI regardless of USE setting). |
20 |
.TP |
21 |
+.BR "\-\-fuzzy\-search [ y | n ]" |
22 |
+Enable or disable fuzzy search for search actions. When fuzzy search |
23 |
+is enabled, a result is returned if it is sufficiently similar to the |
24 |
+search string, without requiring an exact match. This option is enabled |
25 |
+by default. Fuzzy search does not support regular expressions, therefore |
26 |
+it is automatically disabled for regular expression searches. Fuzzy |
27 |
+search is slightly slower than non\-fuzzy search. |
28 |
+.TP |
29 |
.BR "\-\-getbinpkg [ y | n ] (\-g short option)" |
30 |
Using the server and location defined in \fIPORTAGE_BINHOST\fR (see |
31 |
\fBmake.conf\fR(5)), portage will download the information from each binary |
32 |
@@ -874,6 +882,12 @@ enabled by default. The search index needs to be regenerated by |
33 |
to \fBEMERGE_DEFAULT_OPTS\fR (see \fBmake.conf\fR(5)) and later |
34 |
overridden via the command line. |
35 |
.TP |
36 |
+.BR "\-\-search\-similarity PERCENTAGE" |
37 |
+Set the minimum similarity percentage (a floating-point number between |
38 |
+0 and 100). Search results with similarity percentages lower than this |
39 |
+are discarded (default: \'80\'). This option has no effect unless the |
40 |
+\fB\-\-fuzzy\-search\fR option is enabled. |
41 |
+.TP |
42 |
.BR "\-\-select [ y | n ] (\-w short option)" |
43 |
Add specified packages to the world set (inverse of |
44 |
\fB\-\-oneshot\fR). This is useful if you want to |
45 |
diff --git a/pym/_emerge/actions.py b/pym/_emerge/actions.py |
46 |
index 1dc2b0d..6704afc 100644 |
47 |
--- a/pym/_emerge/actions.py |
48 |
+++ b/pym/_emerge/actions.py |
49 |
@@ -1,4 +1,4 @@ |
50 |
-# Copyright 1999-2015 Gentoo Foundation |
51 |
+# Copyright 1999-2016 Gentoo Foundation |
52 |
# Distributed under the terms of the GNU General Public License v2 |
53 |
|
54 |
from __future__ import division, print_function, unicode_literals |
55 |
@@ -1974,7 +1974,10 @@ def action_search(root_config, myopts, myfiles, spinner): |
56 |
spinner, "--searchdesc" in myopts, |
57 |
"--quiet" not in myopts, "--usepkg" in myopts, |
58 |
"--usepkgonly" in myopts, |
59 |
- search_index = myopts.get("--search-index", "y") != "n") |
60 |
+ search_index=myopts.get("--search-index", "y") != "n", |
61 |
+ search_similarity=myopts.get("--search-similarity"), |
62 |
+ fuzzy=myopts.get("--fuzzy-search") != "n", |
63 |
+ ) |
64 |
for mysearch in myfiles: |
65 |
try: |
66 |
searchinstance.execute(mysearch) |
67 |
diff --git a/pym/_emerge/main.py b/pym/_emerge/main.py |
68 |
index 0e672a2..eae1954 100644 |
69 |
--- a/pym/_emerge/main.py |
70 |
+++ b/pym/_emerge/main.py |
71 |
@@ -1,4 +1,4 @@ |
72 |
-# Copyright 1999-2015 Gentoo Foundation |
73 |
+# Copyright 1999-2016 Gentoo Foundation |
74 |
# Distributed under the terms of the GNU General Public License v2 |
75 |
|
76 |
from __future__ import print_function |
77 |
@@ -141,6 +141,7 @@ def insert_optional_args(args): |
78 |
'--deselect' : y_or_n, |
79 |
'--binpkg-respect-use' : y_or_n, |
80 |
'--fail-clean' : y_or_n, |
81 |
+ '--fuzzy-search' : y_or_n, |
82 |
'--getbinpkg' : y_or_n, |
83 |
'--getbinpkgonly' : y_or_n, |
84 |
'--jobs' : valid_integers, |
85 |
@@ -458,6 +459,11 @@ def parse_opts(tmpcmdline, silent=False): |
86 |
"choices" : true_y_or_n |
87 |
}, |
88 |
|
89 |
+ "--fuzzy-search": { |
90 |
+ "help": "Enable or disable fuzzy search", |
91 |
+ "choices": true_y_or_n |
92 |
+ }, |
93 |
+ |
94 |
"--ignore-built-slot-operator-deps": { |
95 |
"help": "Ignore the slot/sub-slot := operator parts of dependencies that have " |
96 |
"been recorded when packages where built. This option is intended " |
97 |
@@ -658,6 +664,12 @@ def parse_opts(tmpcmdline, silent=False): |
98 |
"choices": y_or_n |
99 |
}, |
100 |
|
101 |
+ "--search-similarity": { |
102 |
+ "help": ("Set minimum similarity percentage for fuzzy seach " |
103 |
+ "(a floating-point number between 0 and 100)"), |
104 |
+ "action": "store" |
105 |
+ }, |
106 |
+ |
107 |
"--select": { |
108 |
"shortopt" : "-w", |
109 |
"help" : "add specified packages to the world set " + \ |
110 |
@@ -855,6 +867,9 @@ def parse_opts(tmpcmdline, silent=False): |
111 |
if myoptions.fail_clean in true_y: |
112 |
myoptions.fail_clean = True |
113 |
|
114 |
+ if myoptions.fuzzy_search in true_y: |
115 |
+ myoptions.fuzzy_search = True |
116 |
+ |
117 |
if myoptions.getbinpkg in true_y: |
118 |
myoptions.getbinpkg = True |
119 |
else: |
120 |
@@ -1009,6 +1024,21 @@ def parse_opts(tmpcmdline, silent=False): |
121 |
|
122 |
myoptions.rebuilt_binaries_timestamp = rebuilt_binaries_timestamp |
123 |
|
124 |
+ if myoptions.search_similarity: |
125 |
+ try: |
126 |
+ search_similarity = float(myoptions.search_similarity) |
127 |
+ except ValueError: |
128 |
+ parser.error("Invalid --search-similarity parameter " |
129 |
+ "(not a number): '{}'\n".format( |
130 |
+ myoptions.search_similarity)) |
131 |
+ |
132 |
+ if search_similarity < 0 or search_similarity > 100: |
133 |
+ parser.error("Invalid --search-similarity parameter " |
134 |
+ "(not between 0 and 100): '{}'\n".format( |
135 |
+ myoptions.search_similarity)) |
136 |
+ |
137 |
+ myoptions.search_similarity = search_similarity |
138 |
+ |
139 |
if myoptions.use_ebuild_visibility in true_y: |
140 |
myoptions.use_ebuild_visibility = True |
141 |
else: |
142 |
diff --git a/pym/_emerge/search.py b/pym/_emerge/search.py |
143 |
index 32d326e..20a0c02 100644 |
144 |
--- a/pym/_emerge/search.py |
145 |
+++ b/pym/_emerge/search.py |
146 |
@@ -1,8 +1,9 @@ |
147 |
-# Copyright 1999-2015 Gentoo Foundation |
148 |
+# Copyright 1999-2016 Gentoo Foundation |
149 |
# Distributed under the terms of the GNU General Public License v2 |
150 |
|
151 |
from __future__ import unicode_literals |
152 |
|
153 |
+import difflib |
154 |
import re |
155 |
import portage |
156 |
from portage import os |
157 |
@@ -28,7 +29,8 @@ class search(object): |
158 |
# public interface |
159 |
# |
160 |
def __init__(self, root_config, spinner, searchdesc, |
161 |
- verbose, usepkg, usepkgonly, search_index=True): |
162 |
+ verbose, usepkg, usepkgonly, search_index=True, |
163 |
+ search_similarity=None, fuzzy=True): |
164 |
"""Searches the available and installed packages for the supplied search key. |
165 |
The list of available and installed packages is created at object instantiation. |
166 |
This makes successive searches faster.""" |
167 |
@@ -42,6 +44,9 @@ class search(object): |
168 |
self.spinner = None |
169 |
self.root_config = root_config |
170 |
self.setconfig = root_config.setconfig |
171 |
+ self.fuzzy = fuzzy |
172 |
+ self.search_similarity = (80 if search_similarity is None |
173 |
+ else search_similarity) |
174 |
self.matches = {"pkg" : []} |
175 |
self.mlen = 0 |
176 |
|
177 |
@@ -248,11 +253,26 @@ class search(object): |
178 |
if self.searchkey.startswith('@'): |
179 |
match_category = 1 |
180 |
self.searchkey = self.searchkey[1:] |
181 |
+ fuzzy = False |
182 |
if regexsearch: |
183 |
self.searchre=re.compile(self.searchkey,re.I) |
184 |
else: |
185 |
self.searchre=re.compile(re.escape(self.searchkey), re.I) |
186 |
|
187 |
+ # Fuzzy search does not support regular expressions, therefore |
188 |
+ # it is disabled for regular expression searches. |
189 |
+ if self.fuzzy: |
190 |
+ fuzzy = True |
191 |
+ cutoff = float(self.search_similarity) / 100 |
192 |
+ seq_match = difflib.SequenceMatcher() |
193 |
+ seq_match.set_seq2(self.searchkey.lower()) |
194 |
+ |
195 |
+ def fuzzy_search(match_string): |
196 |
+ seq_match.set_seq1(match_string.lower()) |
197 |
+ return (seq_match.real_quick_ratio() >= cutoff and |
198 |
+ seq_match.quick_ratio() >= cutoff and |
199 |
+ seq_match.ratio() >= cutoff) |
200 |
+ |
201 |
for package in self._cp_all(): |
202 |
self._spinner_update() |
203 |
|
204 |
@@ -263,6 +283,8 @@ class search(object): |
205 |
|
206 |
if self.searchre.search(match_string): |
207 |
yield ("pkg", package) |
208 |
+ elif fuzzy and fuzzy_search(match_string): |
209 |
+ yield ("pkg", package) |
210 |
elif self.searchdesc: # DESCRIPTION searching |
211 |
# Use _first_cp to avoid an expensive visibility check, |
212 |
# since the visibility check can be avoided entirely |
213 |
-- |
214 |
2.7.4 |