Gentoo Archives: gentoo-portage-dev

From: Zac Medico <zmedico@g.o>
To: gentoo-portage-dev@l.g.o
Cc: Zac Medico <zmedico@g.o>
Subject: [gentoo-portage-dev] [PATCH 5/5] Add emerge --search-index option.
Date: Sat, 01 Nov 2014 23:04:59
Message-Id: 1414883090-20554-1-git-send-email-zmedico@gentoo.org
In Reply to: [gentoo-portage-dev] [PATCH 5/5] Add emerge --search-index option. by Zac Medico
1 The new emerge --search-index option, which is enabled by default,
2 causes pkg_desc_index to be used for search optimization. The search
3 index needs to be regenerated by egencache after changes are made to
4 a repository (see the --update-pkg-desc-index action).
5
6 For users that would like to modify ebuilds in a repository without
7 running egencache afterwards, emerge --search-index=n can be used to
8 get non-indexed search. Alternatively, the user could simply remove
9 the stale index file, in order to disable the search index for a
10 particular repository.
11
12 In order to conserve memory, indices are read as streams, and
13 MultiIterGroupBy is used to group results from IndexedPortdb and
14 IndexedVardb. Stream-oriented search also makes it possible to
15 display search results incrementally (fixing bug #412471).
16
17 X-Gentoo-Bug: 525718
18 X-Gentoo-Bug-URL: https://bugs.gentoo.org/show_bug.cgi?id=525718
19 ---
20 This updated patch fixes the search.output method to reset the match
21 count after each search is performed, in case there are multiple
22 searches.
23
24 man/emerge.1 | 8 ++++
25 pym/_emerge/actions.py | 3 +-
26 pym/_emerge/depgraph.py | 2 +-
27 pym/_emerge/main.py | 5 ++
28 pym/_emerge/search.py | 119 ++++++++++++++++++++++++++++++++++--------------
29 5 files changed, 102 insertions(+), 35 deletions(-)
30
31 diff --git a/man/emerge.1 b/man/emerge.1
32 index bbe71ac..7bcdd9a 100644
33 --- a/man/emerge.1
34 +++ b/man/emerge.1
35 @@ -796,6 +796,14 @@ If ebuilds using EAPIs which \fIdo not\fR support \fBHDEPEND\fR are built in
36 the same \fBemerge\fR run as those using EAPIs which \fIdo\fR support
37 \fBHDEPEND\fR, this option affects only the former.
38 .TP
39 +.BR "\-\-search\-index < y | n >"
40 +Enable or disable indexed search for search actions. This option is
41 +enabled by default. The search index needs to be regenerated by
42 +\fBegencache\fR(1) after changes are made to a repository (see the
43 +\fB\-\-update\-pkg\-desc\-index\fR action). This setting can be added
44 +to \fBEMERGE_DEFAULT_OPTS\fR (see \fBmake.conf\fR(5)) and later
45 +overridden via the command line.
46 +.TP
47 .BR "\-\-select [ y | n ] (\-w short option)"
48 Add specified packages to the world set (inverse of
49 \fB\-\-oneshot\fR). This is useful if you want to
50 diff --git a/pym/_emerge/actions.py b/pym/_emerge/actions.py
51 index 48b0826..8a22ab5 100644
52 --- a/pym/_emerge/actions.py
53 +++ b/pym/_emerge/actions.py
54 @@ -2015,7 +2015,8 @@ def action_search(root_config, myopts, myfiles, spinner):
55 searchinstance = search(root_config,
56 spinner, "--searchdesc" in myopts,
57 "--quiet" not in myopts, "--usepkg" in myopts,
58 - "--usepkgonly" in myopts)
59 + "--usepkgonly" in myopts,
60 + search_index = myopts.get("--search-index", "y") != "n")
61 for mysearch in myfiles:
62 try:
63 searchinstance.execute(mysearch)
64 diff --git a/pym/_emerge/depgraph.py b/pym/_emerge/depgraph.py
65 index 78b9236..2fbb7ce 100644
66 --- a/pym/_emerge/depgraph.py
67 +++ b/pym/_emerge/depgraph.py
68 @@ -8596,7 +8596,7 @@ def ambiguous_package_name(arg, atoms, root_config, spinner, myopts):
69
70 s = search(root_config, spinner, "--searchdesc" in myopts,
71 "--quiet" not in myopts, "--usepkg" in myopts,
72 - "--usepkgonly" in myopts)
73 + "--usepkgonly" in myopts, search_index = False)
74 null_cp = portage.dep_getkey(insert_category_into_atom(
75 arg, "null"))
76 cat, atom_pn = portage.catsplit(null_cp)
77 diff --git a/pym/_emerge/main.py b/pym/_emerge/main.py
78 index cf7966c..c08e12a 100644
79 --- a/pym/_emerge/main.py
80 +++ b/pym/_emerge/main.py
81 @@ -616,6 +616,11 @@ def parse_opts(tmpcmdline, silent=False):
82 "choices" :("True", "rdeps")
83 },
84
85 + "--search-index": {
86 + "help": "Enable or disable indexed search (enabled by default)",
87 + "choices": y_or_n
88 + },
89 +
90 "--select": {
91 "shortopt" : "-w",
92 "help" : "add specified packages to the world set " + \
93 diff --git a/pym/_emerge/search.py b/pym/_emerge/search.py
94 index 4b0fd9f..1d710ee 100644
95 --- a/pym/_emerge/search.py
96 +++ b/pym/_emerge/search.py
97 @@ -7,9 +7,12 @@ import re
98 import portage
99 from portage import os
100 from portage.dbapi.porttree import _parse_uri_map
101 +from portage.dbapi.IndexedPortdb import IndexedPortdb
102 +from portage.dbapi.IndexedVardb import IndexedVardb
103 from portage.localization import localized_size
104 from portage.output import bold, bold as white, darkgreen, green, red
105 from portage.util import writemsg_stdout
106 +from portage.util.iterators.MultiIterGroupBy import MultiIterGroupBy
107
108 from _emerge.Package import Package
109
110 @@ -25,15 +28,17 @@ class search(object):
111 # public interface
112 #
113 def __init__(self, root_config, spinner, searchdesc,
114 - verbose, usepkg, usepkgonly):
115 + verbose, usepkg, usepkgonly, search_index = True):
116 """Searches the available and installed packages for the supplied search key.
117 The list of available and installed packages is created at object instantiation.
118 This makes successive searches faster."""
119 self.settings = root_config.settings
120 - self.vartree = root_config.trees["vartree"]
121 - self.spinner = spinner
122 self.verbose = verbose
123 self.searchdesc = searchdesc
124 + self.searchkey = None
125 + # Disable the spinner since search results are displayed
126 + # incrementally.
127 + self.spinner = None
128 self.root_config = root_config
129 self.setconfig = root_config.setconfig
130 self.matches = {"pkg" : []}
131 @@ -45,6 +50,10 @@ class search(object):
132 bindb = root_config.trees["bintree"].dbapi
133 vardb = root_config.trees["vartree"].dbapi
134
135 + if search_index:
136 + portdb = IndexedPortdb(portdb)
137 + vardb = IndexedVardb(vardb)
138 +
139 if not usepkgonly and portdb._have_root_eclass_dir:
140 self._dbs.append(portdb)
141
142 @@ -53,16 +62,23 @@ class search(object):
143
144 self._dbs.append(vardb)
145 self._portdb = portdb
146 + self._vardb = vardb
147
148 def _spinner_update(self):
149 if self.spinner:
150 self.spinner.update()
151
152 def _cp_all(self):
153 - cp_all = set()
154 + iterators = []
155 for db in self._dbs:
156 - cp_all.update(db.cp_all())
157 - return list(sorted(cp_all))
158 + i = db.cp_all()
159 + try:
160 + i = iter(i)
161 + except TypeError:
162 + pass
163 + iterators.append(i)
164 + for group in MultiIterGroupBy(iterators):
165 + yield group[0]
166
167 def _aux_get(self, *args, **kwargs):
168 for db in self._dbs:
169 @@ -97,7 +113,7 @@ class search(object):
170 return {}
171
172 def _visible(self, db, cpv, metadata):
173 - installed = db is self.vartree.dbapi
174 + installed = db is self._vardb
175 built = installed or db is not self._portdb
176 pkg_type = "ebuild"
177 if installed:
178 @@ -171,8 +187,11 @@ class search(object):
179
180 def execute(self,searchkey):
181 """Performs the search for the supplied search key"""
182 + self.searchkey = searchkey
183 +
184 + def _iter_search(self):
185 +
186 match_category = 0
187 - self.searchkey=searchkey
188 self.packagematches = []
189 if self.searchdesc:
190 self.searchdesc=1
191 @@ -181,6 +200,7 @@ class search(object):
192 self.searchdesc=0
193 self.matches = {"pkg":[], "set":[]}
194 print("Searching... ", end=' ')
195 + print()
196
197 regexsearch = False
198 if self.searchkey.startswith('%'):
199 @@ -206,8 +226,24 @@ class search(object):
200 if self.searchre.search(match_string):
201 if not self._xmatch("match-visible", package):
202 masked=1
203 - self.matches["pkg"].append([package,masked])
204 + yield ("pkg", package, masked)
205 elif self.searchdesc: # DESCRIPTION searching
206 + # Check for DESCRIPTION match first, so that we can skip
207 + # the expensive visiblity check if it doesn't match.
208 + full_package = self._xmatch("match-all", package)
209 + if not full_package:
210 + continue
211 + full_package = full_package[-1]
212 + try:
213 + full_desc = self._aux_get(
214 + full_package, ["DESCRIPTION"])[0]
215 + except KeyError:
216 + portage.writemsg(
217 + "emerge: search: aux_get() failed, skipping\n",
218 + noiselevel=-1)
219 + continue
220 + if not self.searchre.search(full_desc):
221 + continue
222 full_package = self._xmatch("bestmatch-visible", package)
223 if not full_package:
224 #no match found; we don't want to query description
225 @@ -217,14 +253,8 @@ class search(object):
226 continue
227 else:
228 masked=1
229 - try:
230 - full_desc = self._aux_get(
231 - full_package, ["DESCRIPTION"])[0]
232 - except KeyError:
233 - print("emerge: search: aux_get() failed, skipping")
234 - continue
235 - if self.searchre.search(full_desc):
236 - self.matches["desc"].append([full_package,masked])
237 +
238 + yield ("desc", full_package, masked)
239
240 self.sdict = self.setconfig.getSets()
241 for setname in self.sdict:
242 @@ -235,16 +265,11 @@ class search(object):
243 match_string = setname.split("/")[-1]
244
245 if self.searchre.search(match_string):
246 - self.matches["set"].append([setname, False])
247 + yield ("set", setname, False)
248 elif self.searchdesc:
249 if self.searchre.search(
250 self.sdict[setname].getMetadata("DESCRIPTION")):
251 - self.matches["set"].append([setname, False])
252 -
253 - self.mlen=0
254 - for mtype in self.matches:
255 - self.matches[mtype].sort()
256 - self.mlen += len(self.matches[mtype])
257 + yield ("set", setname, False)
258
259 def addCP(self, cp):
260 if not self._xmatch("match-all", cp):
261 @@ -257,17 +282,32 @@ class search(object):
262
263 def output(self):
264 """Outputs the results of the search."""
265 - msg = []
266 +
267 + class msg(object):
268 + @staticmethod
269 + def append(msg):
270 + writemsg_stdout(msg, noiselevel=-1)
271 +
272 msg.append("\b\b \n[ Results for search key : " + \
273 bold(self.searchkey) + " ]\n")
274 - msg.append("[ Applications found : " + \
275 - bold(str(self.mlen)) + " ]\n\n")
276 - vardb = self.vartree.dbapi
277 + vardb = self._vardb
278 metadata_keys = set(Package.metadata_keys)
279 metadata_keys.update(["DESCRIPTION", "HOMEPAGE", "LICENSE", "SRC_URI"])
280 metadata_keys = tuple(metadata_keys)
281 - for mtype in self.matches:
282 - for match,masked in self.matches[mtype]:
283 +
284 + if self.searchkey is None:
285 + # Handle results added via addCP
286 + addCP_matches = []
287 + for mytype, (match, masked) in self.matches.items():
288 + addCP_matches.append(mytype, match, masked)
289 + iterator = iter(addCP_matches)
290 +
291 + else:
292 + # Do a normal search
293 + iterator = self._iter_search()
294 +
295 + for mtype, match, masked in iterator:
296 + self.mlen += 1
297 full_package = None
298 if mtype == "pkg":
299 full_package = self._xmatch(
300 @@ -367,12 +407,26 @@ class search(object):
301 + " " + desc + "\n")
302 msg.append(" " + darkgreen("License:") + \
303 " " + license + "\n\n")
304 - writemsg_stdout(''.join(msg), noiselevel=-1)
305 +
306 + msg.append("[ Applications found : " + \
307 + bold(str(self.mlen)) + " ]\n\n")
308 +
309 + # This method can be called multiple times, so
310 + # reset the match count for the next call. Don't
311 + # reset it at the beginning of this method, since
312 + # that would lose modfications from the addCP
313 + # method.
314 + self.mlen = 0
315 +
316 #
317 # private interface
318 #
319 def getInstallationStatus(self,package):
320 - installed_package = self.vartree.dep_bestmatch(package)
321 + installed_package = self._vardb.match(package)
322 + if installed_package:
323 + installed_package = installed_package[-1]
324 + else:
325 + installed_package = ""
326 result = ""
327 version = self.getVersion(installed_package,search.VERSION_RELEASE)
328 if len(version) > 0:
329 @@ -391,4 +445,3 @@ class search(object):
330 else:
331 result = ""
332 return result
333 -
334 --
335 2.0.4

Replies