Gentoo Archives: gentoo-portage-dev

From: Zac Medico <zmedico@g.o>
To: gentoo-portage-dev@l.g.o
Cc: Zac Medico <zmedico@g.o>
Subject: [gentoo-portage-dev] [PATCH 5/5 v3] Add emerge --search-index option.
Date: Tue, 04 Nov 2014 05:42:20
Message-Id: 1415079734-14386-1-git-send-email-zmedico@gentoo.org
In Reply to: [gentoo-portage-dev] [PATCH 5/5] Add emerge --search-index option. by Zac Medico
1 The new emerge --search-index option, which is enabled by default,
2 causes pkg_desc_index to be used for search optimization. The search
3 index needs to be regenerated by egencache after changes are made to
4 a repository (see the --update-pkg-desc-index action).
5
6 For users that would like to modify ebuilds in a repository without
7 running egencache afterwards, emerge --search-index=n can be used to
8 get non-indexed search. Alternatively, the user could simply remove
9 the stale index file, in order to disable the search index for a
10 particular repository.
11
12 In order to conserve memory, indices are read as streams, and
13 MultiIterGroupBy is used to group results from IndexedPortdb and
14 IndexedVardb. Stream-oriented search also makes it possible to
15 display search results incrementally (fixing bug #412471).
16
17 X-Gentoo-Bug: 525718
18 X-Gentoo-Bug-URL: https://bugs.gentoo.org/show_bug.cgi?id=525718
19 ---
20 This updated patch causes indexed search to be enabled only for
21 searchdesc, since indexed variants can actually be slower when
22 only package names need to be searched.
23
24 man/emerge.1 | 8 ++++
25 pym/_emerge/actions.py | 3 +-
26 pym/_emerge/depgraph.py | 2 +-
27 pym/_emerge/main.py | 5 ++
28 pym/_emerge/search.py | 122 +++++++++++++++++++++++++++++++++++-------------
29 5 files changed, 105 insertions(+), 35 deletions(-)
30
31 diff --git a/man/emerge.1 b/man/emerge.1
32 index bbe71ac..7bcdd9a 100644
33 --- a/man/emerge.1
34 +++ b/man/emerge.1
35 @@ -796,6 +796,14 @@ If ebuilds using EAPIs which \fIdo not\fR support \fBHDEPEND\fR are built in
36 the same \fBemerge\fR run as those using EAPIs which \fIdo\fR support
37 \fBHDEPEND\fR, this option affects only the former.
38 .TP
39 +.BR "\-\-search\-index < y | n >"
40 +Enable or disable indexed search for search actions. This option is
41 +enabled by default. The search index needs to be regenerated by
42 +\fBegencache\fR(1) after changes are made to a repository (see the
43 +\fB\-\-update\-pkg\-desc\-index\fR action). This setting can be added
44 +to \fBEMERGE_DEFAULT_OPTS\fR (see \fBmake.conf\fR(5)) and later
45 +overridden via the command line.
46 +.TP
47 .BR "\-\-select [ y | n ] (\-w short option)"
48 Add specified packages to the world set (inverse of
49 \fB\-\-oneshot\fR). This is useful if you want to
50 diff --git a/pym/_emerge/actions.py b/pym/_emerge/actions.py
51 index 48b0826..8a22ab5 100644
52 --- a/pym/_emerge/actions.py
53 +++ b/pym/_emerge/actions.py
54 @@ -2015,7 +2015,8 @@ def action_search(root_config, myopts, myfiles, spinner):
55 searchinstance = search(root_config,
56 spinner, "--searchdesc" in myopts,
57 "--quiet" not in myopts, "--usepkg" in myopts,
58 - "--usepkgonly" in myopts)
59 + "--usepkgonly" in myopts,
60 + search_index = myopts.get("--search-index", "y") != "n")
61 for mysearch in myfiles:
62 try:
63 searchinstance.execute(mysearch)
64 diff --git a/pym/_emerge/depgraph.py b/pym/_emerge/depgraph.py
65 index 94eaed8..da408ad 100644
66 --- a/pym/_emerge/depgraph.py
67 +++ b/pym/_emerge/depgraph.py
68 @@ -8656,7 +8656,7 @@ def ambiguous_package_name(arg, atoms, root_config, spinner, myopts):
69
70 s = search(root_config, spinner, "--searchdesc" in myopts,
71 "--quiet" not in myopts, "--usepkg" in myopts,
72 - "--usepkgonly" in myopts)
73 + "--usepkgonly" in myopts, search_index = False)
74 null_cp = portage.dep_getkey(insert_category_into_atom(
75 arg, "null"))
76 cat, atom_pn = portage.catsplit(null_cp)
77 diff --git a/pym/_emerge/main.py b/pym/_emerge/main.py
78 index cf7966c..c08e12a 100644
79 --- a/pym/_emerge/main.py
80 +++ b/pym/_emerge/main.py
81 @@ -616,6 +616,11 @@ def parse_opts(tmpcmdline, silent=False):
82 "choices" :("True", "rdeps")
83 },
84
85 + "--search-index": {
86 + "help": "Enable or disable indexed search (enabled by default)",
87 + "choices": y_or_n
88 + },
89 +
90 "--select": {
91 "shortopt" : "-w",
92 "help" : "add specified packages to the world set " + \
93 diff --git a/pym/_emerge/search.py b/pym/_emerge/search.py
94 index 4b0fd9f..5821c37 100644
95 --- a/pym/_emerge/search.py
96 +++ b/pym/_emerge/search.py
97 @@ -7,9 +7,12 @@ import re
98 import portage
99 from portage import os
100 from portage.dbapi.porttree import _parse_uri_map
101 +from portage.dbapi.IndexedPortdb import IndexedPortdb
102 +from portage.dbapi.IndexedVardb import IndexedVardb
103 from portage.localization import localized_size
104 from portage.output import bold, bold as white, darkgreen, green, red
105 from portage.util import writemsg_stdout
106 +from portage.util.iterators.MultiIterGroupBy import MultiIterGroupBy
107
108 from _emerge.Package import Package
109
110 @@ -25,15 +28,17 @@ class search(object):
111 # public interface
112 #
113 def __init__(self, root_config, spinner, searchdesc,
114 - verbose, usepkg, usepkgonly):
115 + verbose, usepkg, usepkgonly, search_index = True):
116 """Searches the available and installed packages for the supplied search key.
117 The list of available and installed packages is created at object instantiation.
118 This makes successive searches faster."""
119 self.settings = root_config.settings
120 - self.vartree = root_config.trees["vartree"]
121 - self.spinner = spinner
122 self.verbose = verbose
123 self.searchdesc = searchdesc
124 + self.searchkey = None
125 + # Disable the spinner since search results are displayed
126 + # incrementally.
127 + self.spinner = None
128 self.root_config = root_config
129 self.setconfig = root_config.setconfig
130 self.matches = {"pkg" : []}
131 @@ -45,6 +50,13 @@ class search(object):
132 bindb = root_config.trees["bintree"].dbapi
133 vardb = root_config.trees["vartree"].dbapi
134
135 + # The indexed variants can actually be slower when only
136 + # package names need to be searched, so only use indices
137 + # for searchdesc.
138 + if search_index and searchdesc:
139 + portdb = IndexedPortdb(portdb)
140 + vardb = IndexedVardb(vardb)
141 +
142 if not usepkgonly and portdb._have_root_eclass_dir:
143 self._dbs.append(portdb)
144
145 @@ -53,16 +65,23 @@ class search(object):
146
147 self._dbs.append(vardb)
148 self._portdb = portdb
149 + self._vardb = vardb
150
151 def _spinner_update(self):
152 if self.spinner:
153 self.spinner.update()
154
155 def _cp_all(self):
156 - cp_all = set()
157 + iterators = []
158 for db in self._dbs:
159 - cp_all.update(db.cp_all())
160 - return list(sorted(cp_all))
161 + i = db.cp_all()
162 + try:
163 + i = iter(i)
164 + except TypeError:
165 + pass
166 + iterators.append(i)
167 + for group in MultiIterGroupBy(iterators):
168 + yield group[0]
169
170 def _aux_get(self, *args, **kwargs):
171 for db in self._dbs:
172 @@ -97,7 +116,7 @@ class search(object):
173 return {}
174
175 def _visible(self, db, cpv, metadata):
176 - installed = db is self.vartree.dbapi
177 + installed = db is self._vardb
178 built = installed or db is not self._portdb
179 pkg_type = "ebuild"
180 if installed:
181 @@ -171,8 +190,11 @@ class search(object):
182
183 def execute(self,searchkey):
184 """Performs the search for the supplied search key"""
185 + self.searchkey = searchkey
186 +
187 + def _iter_search(self):
188 +
189 match_category = 0
190 - self.searchkey=searchkey
191 self.packagematches = []
192 if self.searchdesc:
193 self.searchdesc=1
194 @@ -181,6 +203,7 @@ class search(object):
195 self.searchdesc=0
196 self.matches = {"pkg":[], "set":[]}
197 print("Searching... ", end=' ')
198 + print()
199
200 regexsearch = False
201 if self.searchkey.startswith('%'):
202 @@ -206,8 +229,24 @@ class search(object):
203 if self.searchre.search(match_string):
204 if not self._xmatch("match-visible", package):
205 masked=1
206 - self.matches["pkg"].append([package,masked])
207 + yield ("pkg", package, masked)
208 elif self.searchdesc: # DESCRIPTION searching
209 + # Check for DESCRIPTION match first, so that we can skip
210 + # the expensive visiblity check if it doesn't match.
211 + full_package = self._xmatch("match-all", package)
212 + if not full_package:
213 + continue
214 + full_package = full_package[-1]
215 + try:
216 + full_desc = self._aux_get(
217 + full_package, ["DESCRIPTION"])[0]
218 + except KeyError:
219 + portage.writemsg(
220 + "emerge: search: aux_get() failed, skipping\n",
221 + noiselevel=-1)
222 + continue
223 + if not self.searchre.search(full_desc):
224 + continue
225 full_package = self._xmatch("bestmatch-visible", package)
226 if not full_package:
227 #no match found; we don't want to query description
228 @@ -217,14 +256,8 @@ class search(object):
229 continue
230 else:
231 masked=1
232 - try:
233 - full_desc = self._aux_get(
234 - full_package, ["DESCRIPTION"])[0]
235 - except KeyError:
236 - print("emerge: search: aux_get() failed, skipping")
237 - continue
238 - if self.searchre.search(full_desc):
239 - self.matches["desc"].append([full_package,masked])
240 +
241 + yield ("desc", full_package, masked)
242
243 self.sdict = self.setconfig.getSets()
244 for setname in self.sdict:
245 @@ -235,16 +268,11 @@ class search(object):
246 match_string = setname.split("/")[-1]
247
248 if self.searchre.search(match_string):
249 - self.matches["set"].append([setname, False])
250 + yield ("set", setname, False)
251 elif self.searchdesc:
252 if self.searchre.search(
253 self.sdict[setname].getMetadata("DESCRIPTION")):
254 - self.matches["set"].append([setname, False])
255 -
256 - self.mlen=0
257 - for mtype in self.matches:
258 - self.matches[mtype].sort()
259 - self.mlen += len(self.matches[mtype])
260 + yield ("set", setname, False)
261
262 def addCP(self, cp):
263 if not self._xmatch("match-all", cp):
264 @@ -257,17 +285,32 @@ class search(object):
265
266 def output(self):
267 """Outputs the results of the search."""
268 - msg = []
269 +
270 + class msg(object):
271 + @staticmethod
272 + def append(msg):
273 + writemsg_stdout(msg, noiselevel=-1)
274 +
275 msg.append("\b\b \n[ Results for search key : " + \
276 bold(self.searchkey) + " ]\n")
277 - msg.append("[ Applications found : " + \
278 - bold(str(self.mlen)) + " ]\n\n")
279 - vardb = self.vartree.dbapi
280 + vardb = self._vardb
281 metadata_keys = set(Package.metadata_keys)
282 metadata_keys.update(["DESCRIPTION", "HOMEPAGE", "LICENSE", "SRC_URI"])
283 metadata_keys = tuple(metadata_keys)
284 - for mtype in self.matches:
285 - for match,masked in self.matches[mtype]:
286 +
287 + if self.searchkey is None:
288 + # Handle results added via addCP
289 + addCP_matches = []
290 + for mytype, (match, masked) in self.matches.items():
291 + addCP_matches.append(mytype, match, masked)
292 + iterator = iter(addCP_matches)
293 +
294 + else:
295 + # Do a normal search
296 + iterator = self._iter_search()
297 +
298 + for mtype, match, masked in iterator:
299 + self.mlen += 1
300 full_package = None
301 if mtype == "pkg":
302 full_package = self._xmatch(
303 @@ -367,12 +410,26 @@ class search(object):
304 + " " + desc + "\n")
305 msg.append(" " + darkgreen("License:") + \
306 " " + license + "\n\n")
307 - writemsg_stdout(''.join(msg), noiselevel=-1)
308 +
309 + msg.append("[ Applications found : " + \
310 + bold(str(self.mlen)) + " ]\n\n")
311 +
312 + # This method can be called multiple times, so
313 + # reset the match count for the next call. Don't
314 + # reset it at the beginning of this method, since
315 + # that would lose modfications from the addCP
316 + # method.
317 + self.mlen = 0
318 +
319 #
320 # private interface
321 #
322 def getInstallationStatus(self,package):
323 - installed_package = self.vartree.dep_bestmatch(package)
324 + installed_package = self._vardb.match(package)
325 + if installed_package:
326 + installed_package = installed_package[-1]
327 + else:
328 + installed_package = ""
329 result = ""
330 version = self.getVersion(installed_package,search.VERSION_RELEASE)
331 if len(version) > 0:
332 @@ -391,4 +448,3 @@ class search(object):
333 else:
334 result = ""
335 return result
336 -
337 --
338 2.0.4

Replies