Gentoo Archives: gentoo-portage-dev

From: Zac Medico <zmedico@g.o>
To: gentoo-portage-dev@l.g.o
Cc: Zac Medico <zmedico@g.o>
Subject: [gentoo-portage-dev] [PATCH] emerge: add --fuzzy-search and --search-similarity (bug 65566)
Date: Mon, 25 Jul 2016 02:55:05
Message-Id: 1469415280-4900-1-git-send-email-zmedico@gentoo.org
1 Add --fuzzy-search option, and --search-similarity option to adjust
2 the minimum similarity for search results (defaults to 80%).
3
4 X-Gentoo-bug: 65566
5 X-Gentoo-bug-url: https://bugs.gentoo.org/show_bug.cgi?id=65566
6 ---
7 man/emerge.1 | 14 ++++++++++++++
8 pym/_emerge/actions.py | 7 +++++--
9 pym/_emerge/main.py | 32 +++++++++++++++++++++++++++++++-
10 pym/_emerge/search.py | 26 ++++++++++++++++++++++++--
11 4 files changed, 74 insertions(+), 5 deletions(-)
12
13 diff --git a/man/emerge.1 b/man/emerge.1
14 index da1d852..7442220 100644
15 --- a/man/emerge.1
16 +++ b/man/emerge.1
17 @@ -565,6 +565,14 @@ packages (fetch things from SRC_URI based upon USE setting).
18 Instead of doing any package building, just perform fetches for all
19 packages (fetch everything in SRC_URI regardless of USE setting).
20 .TP
21 +.BR "\-\-fuzzy\-search [ y | n ]"
22 +Enable or disable fuzzy search for search actions. When fuzzy search
23 +is enabled, a result is returned if it is sufficiently similar to the
24 +search string, without requiring an exact match. This option is enabled
25 +by default. Fuzzy search does not support regular expressions, therefore
26 +it is automatically disabled for regular expression searches. Fuzzy
27 +search is slightly slower than non\-fuzzy search.
28 +.TP
29 .BR "\-\-getbinpkg [ y | n ] (\-g short option)"
30 Using the server and location defined in \fIPORTAGE_BINHOST\fR (see
31 \fBmake.conf\fR(5)), portage will download the information from each binary
32 @@ -874,6 +882,12 @@ enabled by default. The search index needs to be regenerated by
33 to \fBEMERGE_DEFAULT_OPTS\fR (see \fBmake.conf\fR(5)) and later
34 overridden via the command line.
35 .TP
36 +.BR "\-\-search\-similarity PERCENTAGE"
37 +Set the minimum similarity percentage (a floating-point number between
38 +0 and 100). Search results with similarity percentages lower than this
39 +are discarded (default: \'80\'). This option has no effect unless the
40 +\fB\-\-fuzzy\-search\fR option is enabled.
41 +.TP
42 .BR "\-\-select [ y | n ] (\-w short option)"
43 Add specified packages to the world set (inverse of
44 \fB\-\-oneshot\fR). This is useful if you want to
45 diff --git a/pym/_emerge/actions.py b/pym/_emerge/actions.py
46 index 1dc2b0d..6704afc 100644
47 --- a/pym/_emerge/actions.py
48 +++ b/pym/_emerge/actions.py
49 @@ -1,4 +1,4 @@
50 -# Copyright 1999-2015 Gentoo Foundation
51 +# Copyright 1999-2016 Gentoo Foundation
52 # Distributed under the terms of the GNU General Public License v2
53
54 from __future__ import division, print_function, unicode_literals
55 @@ -1974,7 +1974,10 @@ def action_search(root_config, myopts, myfiles, spinner):
56 spinner, "--searchdesc" in myopts,
57 "--quiet" not in myopts, "--usepkg" in myopts,
58 "--usepkgonly" in myopts,
59 - search_index = myopts.get("--search-index", "y") != "n")
60 + search_index=myopts.get("--search-index", "y") != "n",
61 + search_similarity=myopts.get("--search-similarity"),
62 + fuzzy=myopts.get("--fuzzy-search") != "n",
63 + )
64 for mysearch in myfiles:
65 try:
66 searchinstance.execute(mysearch)
67 diff --git a/pym/_emerge/main.py b/pym/_emerge/main.py
68 index 0e672a2..eae1954 100644
69 --- a/pym/_emerge/main.py
70 +++ b/pym/_emerge/main.py
71 @@ -1,4 +1,4 @@
72 -# Copyright 1999-2015 Gentoo Foundation
73 +# Copyright 1999-2016 Gentoo Foundation
74 # Distributed under the terms of the GNU General Public License v2
75
76 from __future__ import print_function
77 @@ -141,6 +141,7 @@ def insert_optional_args(args):
78 '--deselect' : y_or_n,
79 '--binpkg-respect-use' : y_or_n,
80 '--fail-clean' : y_or_n,
81 + '--fuzzy-search' : y_or_n,
82 '--getbinpkg' : y_or_n,
83 '--getbinpkgonly' : y_or_n,
84 '--jobs' : valid_integers,
85 @@ -458,6 +459,11 @@ def parse_opts(tmpcmdline, silent=False):
86 "choices" : true_y_or_n
87 },
88
89 + "--fuzzy-search": {
90 + "help": "Enable or disable fuzzy search",
91 + "choices": true_y_or_n
92 + },
93 +
94 "--ignore-built-slot-operator-deps": {
95 "help": "Ignore the slot/sub-slot := operator parts of dependencies that have "
96 "been recorded when packages where built. This option is intended "
97 @@ -658,6 +664,12 @@ def parse_opts(tmpcmdline, silent=False):
98 "choices": y_or_n
99 },
100
101 + "--search-similarity": {
102 + "help": ("Set minimum similarity percentage for fuzzy seach "
103 + "(a floating-point number between 0 and 100)"),
104 + "action": "store"
105 + },
106 +
107 "--select": {
108 "shortopt" : "-w",
109 "help" : "add specified packages to the world set " + \
110 @@ -855,6 +867,9 @@ def parse_opts(tmpcmdline, silent=False):
111 if myoptions.fail_clean in true_y:
112 myoptions.fail_clean = True
113
114 + if myoptions.fuzzy_search in true_y:
115 + myoptions.fuzzy_search = True
116 +
117 if myoptions.getbinpkg in true_y:
118 myoptions.getbinpkg = True
119 else:
120 @@ -1009,6 +1024,21 @@ def parse_opts(tmpcmdline, silent=False):
121
122 myoptions.rebuilt_binaries_timestamp = rebuilt_binaries_timestamp
123
124 + if myoptions.search_similarity:
125 + try:
126 + search_similarity = float(myoptions.search_similarity)
127 + except ValueError:
128 + parser.error("Invalid --search-similarity parameter "
129 + "(not a number): '{}'\n".format(
130 + myoptions.search_similarity))
131 +
132 + if search_similarity < 0 or search_similarity > 100:
133 + parser.error("Invalid --search-similarity parameter "
134 + "(not between 0 and 100): '{}'\n".format(
135 + myoptions.search_similarity))
136 +
137 + myoptions.search_similarity = search_similarity
138 +
139 if myoptions.use_ebuild_visibility in true_y:
140 myoptions.use_ebuild_visibility = True
141 else:
142 diff --git a/pym/_emerge/search.py b/pym/_emerge/search.py
143 index 32d326e..20a0c02 100644
144 --- a/pym/_emerge/search.py
145 +++ b/pym/_emerge/search.py
146 @@ -1,8 +1,9 @@
147 -# Copyright 1999-2015 Gentoo Foundation
148 +# Copyright 1999-2016 Gentoo Foundation
149 # Distributed under the terms of the GNU General Public License v2
150
151 from __future__ import unicode_literals
152
153 +import difflib
154 import re
155 import portage
156 from portage import os
157 @@ -28,7 +29,8 @@ class search(object):
158 # public interface
159 #
160 def __init__(self, root_config, spinner, searchdesc,
161 - verbose, usepkg, usepkgonly, search_index=True):
162 + verbose, usepkg, usepkgonly, search_index=True,
163 + search_similarity=None, fuzzy=True):
164 """Searches the available and installed packages for the supplied search key.
165 The list of available and installed packages is created at object instantiation.
166 This makes successive searches faster."""
167 @@ -42,6 +44,9 @@ class search(object):
168 self.spinner = None
169 self.root_config = root_config
170 self.setconfig = root_config.setconfig
171 + self.fuzzy = fuzzy
172 + self.search_similarity = (80 if search_similarity is None
173 + else search_similarity)
174 self.matches = {"pkg" : []}
175 self.mlen = 0
176
177 @@ -248,11 +253,26 @@ class search(object):
178 if self.searchkey.startswith('@'):
179 match_category = 1
180 self.searchkey = self.searchkey[1:]
181 + fuzzy = False
182 if regexsearch:
183 self.searchre=re.compile(self.searchkey,re.I)
184 else:
185 self.searchre=re.compile(re.escape(self.searchkey), re.I)
186
187 + # Fuzzy search does not support regular expressions, therefore
188 + # it is disabled for regular expression searches.
189 + if self.fuzzy:
190 + fuzzy = True
191 + cutoff = float(self.search_similarity) / 100
192 + seq_match = difflib.SequenceMatcher()
193 + seq_match.set_seq2(self.searchkey.lower())
194 +
195 + def fuzzy_search(match_string):
196 + seq_match.set_seq1(match_string.lower())
197 + return (seq_match.real_quick_ratio() >= cutoff and
198 + seq_match.quick_ratio() >= cutoff and
199 + seq_match.ratio() >= cutoff)
200 +
201 for package in self._cp_all():
202 self._spinner_update()
203
204 @@ -263,6 +283,8 @@ class search(object):
205
206 if self.searchre.search(match_string):
207 yield ("pkg", package)
208 + elif fuzzy and fuzzy_search(match_string):
209 + yield ("pkg", package)
210 elif self.searchdesc: # DESCRIPTION searching
211 # Use _first_cp to avoid an expensive visibility check,
212 # since the visibility check can be avoided entirely
213 --
214 2.7.4

Replies