Gentoo Archives: gentoo-portage-dev

From: Zac Medico <zmedico@g.o>
To: gentoo-portage-dev@l.g.o
Cc: Zac Medico <zmedico@g.o>
Subject: [gentoo-portage-dev] [PATCH 5/5 v4] Add emerge --search-index option.
Date: Tue, 04 Nov 2014 22:09:36
Message-Id: 1415138963-26863-1-git-send-email-zmedico@gentoo.org
In Reply to: [gentoo-portage-dev] [PATCH 5/5] Add emerge --search-index option. by Zac Medico
1 The new emerge --search-index option, which is enabled by default,
2 causes pkg_desc_index to be used for search optimization. The search
3 index needs to be regenerated by egencache after changes are made to
4 a repository (see the --update-pkg-desc-index action).
5
6 For users that would like to modify ebuilds in a repository without
7 running egencache afterwards, emerge --search-index=n can be used to
8 get non-indexed search. Alternatively, the user could simply remove
9 the stale index file, in order to disable the search index for a
10 particular repository.
11
12 In order to conserve memory, indices are read as streams, and
13 MultiIterGroupBy is used to group results from IndexedPortdb and
14 IndexedVardb. Stream-oriented search also makes it possible to
15 display search results incrementally (fixing bug #412471).
16
17 X-Gentoo-Bug: 525718
18 X-Gentoo-Bug-URL: https://bugs.gentoo.org/show_bug.cgi?id=525718
19 ---
20 This updates patch optimizes search._iter_search and search.output to use
21 fewer search._xmatch calls. With this optimization, performance with
22 IndexedPortdb is nearly indistinguishable from regular portdbapi for the
23 case where no repositories are indexed.
24
25 man/emerge.1 | 8 +++
26 pym/_emerge/actions.py | 3 +-
27 pym/_emerge/depgraph.py | 2 +-
28 pym/_emerge/main.py | 5 ++
29 pym/_emerge/search.py | 137 ++++++++++++++++++++++++++++++------------------
30 5 files changed, 103 insertions(+), 52 deletions(-)
31
32 diff --git a/man/emerge.1 b/man/emerge.1
33 index bbe71ac..7bcdd9a 100644
34 --- a/man/emerge.1
35 +++ b/man/emerge.1
36 @@ -796,6 +796,14 @@ If ebuilds using EAPIs which \fIdo not\fR support \fBHDEPEND\fR are built in
37 the same \fBemerge\fR run as those using EAPIs which \fIdo\fR support
38 \fBHDEPEND\fR, this option affects only the former.
39 .TP
40 +.BR "\-\-search\-index < y | n >"
41 +Enable or disable indexed search for search actions. This option is
42 +enabled by default. The search index needs to be regenerated by
43 +\fBegencache\fR(1) after changes are made to a repository (see the
44 +\fB\-\-update\-pkg\-desc\-index\fR action). This setting can be added
45 +to \fBEMERGE_DEFAULT_OPTS\fR (see \fBmake.conf\fR(5)) and later
46 +overridden via the command line.
47 +.TP
48 .BR "\-\-select [ y | n ] (\-w short option)"
49 Add specified packages to the world set (inverse of
50 \fB\-\-oneshot\fR). This is useful if you want to
51 diff --git a/pym/_emerge/actions.py b/pym/_emerge/actions.py
52 index 48b0826..8a22ab5 100644
53 --- a/pym/_emerge/actions.py
54 +++ b/pym/_emerge/actions.py
55 @@ -2015,7 +2015,8 @@ def action_search(root_config, myopts, myfiles, spinner):
56 searchinstance = search(root_config,
57 spinner, "--searchdesc" in myopts,
58 "--quiet" not in myopts, "--usepkg" in myopts,
59 - "--usepkgonly" in myopts)
60 + "--usepkgonly" in myopts,
61 + search_index = myopts.get("--search-index", "y") != "n")
62 for mysearch in myfiles:
63 try:
64 searchinstance.execute(mysearch)
65 diff --git a/pym/_emerge/depgraph.py b/pym/_emerge/depgraph.py
66 index 94eaed8..da408ad 100644
67 --- a/pym/_emerge/depgraph.py
68 +++ b/pym/_emerge/depgraph.py
69 @@ -8656,7 +8656,7 @@ def ambiguous_package_name(arg, atoms, root_config, spinner, myopts):
70
71 s = search(root_config, spinner, "--searchdesc" in myopts,
72 "--quiet" not in myopts, "--usepkg" in myopts,
73 - "--usepkgonly" in myopts)
74 + "--usepkgonly" in myopts, search_index = False)
75 null_cp = portage.dep_getkey(insert_category_into_atom(
76 arg, "null"))
77 cat, atom_pn = portage.catsplit(null_cp)
78 diff --git a/pym/_emerge/main.py b/pym/_emerge/main.py
79 index cf7966c..c08e12a 100644
80 --- a/pym/_emerge/main.py
81 +++ b/pym/_emerge/main.py
82 @@ -616,6 +616,11 @@ def parse_opts(tmpcmdline, silent=False):
83 "choices" :("True", "rdeps")
84 },
85
86 + "--search-index": {
87 + "help": "Enable or disable indexed search (enabled by default)",
88 + "choices": y_or_n
89 + },
90 +
91 "--select": {
92 "shortopt" : "-w",
93 "help" : "add specified packages to the world set " + \
94 diff --git a/pym/_emerge/search.py b/pym/_emerge/search.py
95 index 4b0fd9f..1916afe 100644
96 --- a/pym/_emerge/search.py
97 +++ b/pym/_emerge/search.py
98 @@ -7,9 +7,12 @@ import re
99 import portage
100 from portage import os
101 from portage.dbapi.porttree import _parse_uri_map
102 +from portage.dbapi.IndexedPortdb import IndexedPortdb
103 +from portage.dbapi.IndexedVardb import IndexedVardb
104 from portage.localization import localized_size
105 from portage.output import bold, bold as white, darkgreen, green, red
106 from portage.util import writemsg_stdout
107 +from portage.util.iterators.MultiIterGroupBy import MultiIterGroupBy
108
109 from _emerge.Package import Package
110
111 @@ -25,15 +28,17 @@ class search(object):
112 # public interface
113 #
114 def __init__(self, root_config, spinner, searchdesc,
115 - verbose, usepkg, usepkgonly):
116 + verbose, usepkg, usepkgonly, search_index = True):
117 """Searches the available and installed packages for the supplied search key.
118 The list of available and installed packages is created at object instantiation.
119 This makes successive searches faster."""
120 self.settings = root_config.settings
121 - self.vartree = root_config.trees["vartree"]
122 - self.spinner = spinner
123 self.verbose = verbose
124 self.searchdesc = searchdesc
125 + self.searchkey = None
126 + # Disable the spinner since search results are displayed
127 + # incrementally.
128 + self.spinner = None
129 self.root_config = root_config
130 self.setconfig = root_config.setconfig
131 self.matches = {"pkg" : []}
132 @@ -45,6 +50,10 @@ class search(object):
133 bindb = root_config.trees["bintree"].dbapi
134 vardb = root_config.trees["vartree"].dbapi
135
136 + if search_index:
137 + portdb = IndexedPortdb(portdb)
138 + vardb = IndexedVardb(vardb)
139 +
140 if not usepkgonly and portdb._have_root_eclass_dir:
141 self._dbs.append(portdb)
142
143 @@ -53,16 +62,23 @@ class search(object):
144
145 self._dbs.append(vardb)
146 self._portdb = portdb
147 + self._vardb = vardb
148
149 def _spinner_update(self):
150 if self.spinner:
151 self.spinner.update()
152
153 def _cp_all(self):
154 - cp_all = set()
155 + iterators = []
156 for db in self._dbs:
157 - cp_all.update(db.cp_all())
158 - return list(sorted(cp_all))
159 + i = db.cp_all()
160 + try:
161 + i = iter(i)
162 + except TypeError:
163 + pass
164 + iterators.append(i)
165 + for group in MultiIterGroupBy(iterators):
166 + yield group[0]
167
168 def _aux_get(self, *args, **kwargs):
169 for db in self._dbs:
170 @@ -97,7 +113,7 @@ class search(object):
171 return {}
172
173 def _visible(self, db, cpv, metadata):
174 - installed = db is self.vartree.dbapi
175 + installed = db is self._vardb
176 built = installed or db is not self._portdb
177 pkg_type = "ebuild"
178 if installed:
179 @@ -171,8 +187,11 @@ class search(object):
180
181 def execute(self,searchkey):
182 """Performs the search for the supplied search key"""
183 + self.searchkey = searchkey
184 +
185 + def _iter_search(self):
186 +
187 match_category = 0
188 - self.searchkey=searchkey
189 self.packagematches = []
190 if self.searchdesc:
191 self.searchdesc=1
192 @@ -181,6 +200,7 @@ class search(object):
193 self.searchdesc=0
194 self.matches = {"pkg":[], "set":[]}
195 print("Searching... ", end=' ')
196 + print()
197
198 regexsearch = False
199 if self.searchkey.startswith('%'):
200 @@ -204,27 +224,26 @@ class search(object):
201
202 masked=0
203 if self.searchre.search(match_string):
204 - if not self._xmatch("match-visible", package):
205 - masked=1
206 - self.matches["pkg"].append([package,masked])
207 + yield ("pkg", package)
208 elif self.searchdesc: # DESCRIPTION searching
209 - full_package = self._xmatch("bestmatch-visible", package)
210 + # Check for DESCRIPTION match first, so that we can skip
211 + # the expensive visiblity check if it doesn't match.
212 + full_package = self._xmatch("match-all", package)
213 if not full_package:
214 - #no match found; we don't want to query description
215 - full_package = portage.best(
216 - self._xmatch("match-all", package))
217 - if not full_package:
218 - continue
219 - else:
220 - masked=1
221 + continue
222 + full_package = full_package[-1]
223 try:
224 full_desc = self._aux_get(
225 full_package, ["DESCRIPTION"])[0]
226 except KeyError:
227 - print("emerge: search: aux_get() failed, skipping")
228 + portage.writemsg(
229 + "emerge: search: aux_get() failed, skipping\n",
230 + noiselevel=-1)
231 continue
232 - if self.searchre.search(full_desc):
233 - self.matches["desc"].append([full_package,masked])
234 + if not self.searchre.search(full_desc):
235 + continue
236 +
237 + yield ("desc", package)
238
239 self.sdict = self.setconfig.getSets()
240 for setname in self.sdict:
241 @@ -235,51 +254,56 @@ class search(object):
242 match_string = setname.split("/")[-1]
243
244 if self.searchre.search(match_string):
245 - self.matches["set"].append([setname, False])
246 + yield ("set", setname, False)
247 elif self.searchdesc:
248 if self.searchre.search(
249 self.sdict[setname].getMetadata("DESCRIPTION")):
250 - self.matches["set"].append([setname, False])
251 -
252 - self.mlen=0
253 - for mtype in self.matches:
254 - self.matches[mtype].sort()
255 - self.mlen += len(self.matches[mtype])
256 + yield ("set", setname)
257
258 def addCP(self, cp):
259 if not self._xmatch("match-all", cp):
260 return
261 - masked = 0
262 - if not self._xmatch("bestmatch-visible", cp):
263 - masked = 1
264 - self.matches["pkg"].append([cp, masked])
265 + self.matches["pkg"].append(cp)
266 self.mlen += 1
267
268 def output(self):
269 """Outputs the results of the search."""
270 - msg = []
271 +
272 + class msg(object):
273 + @staticmethod
274 + def append(msg):
275 + writemsg_stdout(msg, noiselevel=-1)
276 +
277 msg.append("\b\b \n[ Results for search key : " + \
278 bold(self.searchkey) + " ]\n")
279 - msg.append("[ Applications found : " + \
280 - bold(str(self.mlen)) + " ]\n\n")
281 - vardb = self.vartree.dbapi
282 + vardb = self._vardb
283 metadata_keys = set(Package.metadata_keys)
284 metadata_keys.update(["DESCRIPTION", "HOMEPAGE", "LICENSE", "SRC_URI"])
285 metadata_keys = tuple(metadata_keys)
286 - for mtype in self.matches:
287 - for match,masked in self.matches[mtype]:
288 +
289 + if self.searchkey is None:
290 + # Handle results added via addCP
291 + addCP_matches = []
292 + for mytype, match in self.matches.items():
293 + addCP_matches.append(mytype, match)
294 + iterator = iter(addCP_matches)
295 +
296 + else:
297 + # Do a normal search
298 + iterator = self._iter_search()
299 +
300 + for mtype, match in iterator:
301 + self.mlen += 1
302 + masked = False
303 full_package = None
304 - if mtype == "pkg":
305 + if mtype in ("pkg", "desc"):
306 full_package = self._xmatch(
307 "bestmatch-visible", match)
308 if not full_package:
309 - #no match found; we don't want to query description
310 - masked=1
311 - full_package = portage.best(
312 - self._xmatch("match-all",match))
313 - elif mtype == "desc":
314 - full_package = match
315 - match = portage.cpv_getkey(match)
316 + masked = True
317 + full_package = self._xmatch("match-all", match)
318 + if full_package:
319 + full_package = full_package[-1]
320 elif mtype == "set":
321 msg.append(green("*") + " " + bold(match) + "\n")
322 if self.verbose:
323 @@ -367,12 +391,26 @@ class search(object):
324 + " " + desc + "\n")
325 msg.append(" " + darkgreen("License:") + \
326 " " + license + "\n\n")
327 - writemsg_stdout(''.join(msg), noiselevel=-1)
328 +
329 + msg.append("[ Applications found : " + \
330 + bold(str(self.mlen)) + " ]\n\n")
331 +
332 + # This method can be called multiple times, so
333 + # reset the match count for the next call. Don't
334 + # reset it at the beginning of this method, since
335 + # that would lose modfications from the addCP
336 + # method.
337 + self.mlen = 0
338 +
339 #
340 # private interface
341 #
342 def getInstallationStatus(self,package):
343 - installed_package = self.vartree.dep_bestmatch(package)
344 + installed_package = self._vardb.match(package)
345 + if installed_package:
346 + installed_package = installed_package[-1]
347 + else:
348 + installed_package = ""
349 result = ""
350 version = self.getVersion(installed_package,search.VERSION_RELEASE)
351 if len(version) > 0:
352 @@ -391,4 +429,3 @@ class search(object):
353 else:
354 result = ""
355 return result
356 -
357 --
358 2.0.4