1 |
This updated patch adds --search-index < y | n >. I'll be maintaining |
2 |
this patch in the following branch: |
3 |
|
4 |
https://github.com/zmedico/portage/tree/bug_525718 |
5 |
|
6 |
From 2aca92f664fd2ff669b77b38a49b06fafbc66b8d Mon Sep 17 00:00:00 2001 |
7 |
From: Zac Medico <zmedico@g.o> |
8 |
Date: Fri, 17 Oct 2014 17:38:59 -0700 |
9 |
Subject: [PATCH] emerge --search: use description index |
10 |
|
11 |
This adds an egencache --update-pkg-desc-index action which generates |
12 |
a plain-text index of package names, versions, and descriptions. The |
13 |
index can then be used to optimize emerge --search / --searchdesc |
14 |
actions. If the package description index is missing from a particular |
15 |
repository, then all metadata for that repository is obtained using the |
16 |
normal pordbapi.aux_get method. |
17 |
|
18 |
Searching of installed packages is optimized to take advantage of |
19 |
vardbdbapi._aux_cache, which is backed by vardb_metadata.pickle. |
20 |
See the IndexedVardb docstring some more details. |
21 |
|
22 |
For users that would like to modify ebuilds in a repository without |
23 |
running egencache afterwards, the new emerge --search-index < y | n > |
24 |
option can be used to get non-indexed search. Alternatively, the user |
25 |
could simply remove the stale index file, in order to disable the |
26 |
search index for a particular repository. |
27 |
|
28 |
X-Gentoo-Bug: 525718 |
29 |
X-Gentoo-Bug-URL: https://bugs.gentoo.org/show_bug.cgi?id=525718 |
30 |
--- |
31 |
bin/egencache | 43 ++++++++++- |
32 |
man/egencache.1 | 4 + |
33 |
man/emerge.1 | 8 ++ |
34 |
man/portage.5 | 6 ++ |
35 |
pym/_emerge/actions.py | 3 +- |
36 |
pym/_emerge/main.py | 5 ++ |
37 |
pym/_emerge/search.py | 198 +++++++++++++++++++++++++++++++++++++++++++++---- |
38 |
7 files changed, 250 insertions(+), 17 deletions(-) |
39 |
|
40 |
diff --git a/bin/egencache b/bin/egencache |
41 |
index e366058..90d5e68 100755 |
42 |
--- a/bin/egencache |
43 |
+++ b/bin/egencache |
44 |
@@ -57,7 +57,7 @@ from portage.util._async.run_main_scheduler import run_main_scheduler |
45 |
from portage.util._eventloop.global_event_loop import global_event_loop |
46 |
from portage import cpv_getkey |
47 |
from portage.dep import Atom, isjustname |
48 |
-from portage.versions import pkgsplit, vercmp |
49 |
+from portage.versions import pkgsplit, vercmp, _pkg_str |
50 |
|
51 |
try: |
52 |
from xml.etree import ElementTree |
53 |
@@ -91,6 +91,9 @@ def parse_args(args): |
54 |
actions.add_argument("--update-changelogs", |
55 |
action="store_true", |
56 |
help="update the ChangeLog files from SCM logs") |
57 |
+ actions.add_argument("--update-pkg-desc-index", |
58 |
+ action="store_true", |
59 |
+ help="update package description index") |
60 |
actions.add_argument("--update-manifests", |
61 |
action="store_true", |
62 |
help="update manifests") |
63 |
@@ -451,6 +454,35 @@ class GenCache(object): |
64 |
if hasattr(trg_cache, '_prune_empty_dirs'): |
65 |
trg_cache._prune_empty_dirs() |
66 |
|
67 |
+class GenPkgDescIndex(object): |
68 |
+ def __init__(self, portdb, output_file): |
69 |
+ self.returncode = os.EX_OK |
70 |
+ self._portdb = portdb |
71 |
+ self._output_file = output_file |
72 |
+ |
73 |
+ def run(self): |
74 |
+ |
75 |
+ portage.util.ensure_dirs(os.path.dirname(self._output_file)) |
76 |
+ f = portage.util.atomic_ofstream(self._output_file, |
77 |
+ encoding=_encodings["repo.content"]) |
78 |
+ |
79 |
+ portdb = self._portdb |
80 |
+ for cp in portdb.cp_all(): |
81 |
+ pkgs = portdb.cp_list(cp) |
82 |
+ if not pkgs: |
83 |
+ continue |
84 |
+ desc, = portdb.aux_get(pkgs[-1], ["DESCRIPTION"]) |
85 |
+ |
86 |
+ if len(pkgs) == 1: |
87 |
+ output = "%s: %s\n" % (pkgs[0], desc) |
88 |
+ else: |
89 |
+ output = "%s,%s: %s\n" % (pkgs[0], |
90 |
+ ",".join(_pkg_str(cpv).version |
91 |
+ for cpv in pkgs[1:]), desc) |
92 |
+ f.write(output) |
93 |
+ |
94 |
+ f.close() |
95 |
+ |
96 |
class GenUseLocalDesc(object): |
97 |
def __init__(self, portdb, output=None, |
98 |
preserve_comments=False): |
99 |
@@ -893,7 +925,8 @@ def egencache_main(args): |
100 |
local_config=False, env=env) |
101 |
|
102 |
if not (options.update or options.update_use_local_desc or |
103 |
- options.update_changelogs or options.update_manifests): |
104 |
+ options.update_changelogs or options.update_manifests or |
105 |
+ options.update_pkg_desc_index): |
106 |
parser.error('No action specified') |
107 |
return 1 |
108 |
|
109 |
@@ -1057,6 +1090,12 @@ def egencache_main(args): |
110 |
else: |
111 |
ret.append(scheduler.returncode) |
112 |
|
113 |
+ if options.update_pkg_desc_index: |
114 |
+ gen_index = GenPkgDescIndex(portdb, os.path.join( |
115 |
+ repo_config.location, "metadata", "pkg_desc_index")) |
116 |
+ gen_index.run() |
117 |
+ ret.append(gen_index.returncode) |
118 |
+ |
119 |
if options.update_use_local_desc: |
120 |
gen_desc = GenUseLocalDesc(portdb, |
121 |
output=options.uld_output, |
122 |
diff --git a/man/egencache.1 b/man/egencache.1 |
123 |
index f71feb3..3a3197f 100644 |
124 |
--- a/man/egencache.1 |
125 |
+++ b/man/egencache.1 |
126 |
@@ -19,6 +19,10 @@ for the details on package atom syntax. |
127 |
.BR "\-\-update\-changelogs" |
128 |
Update the ChangeLog files from SCM logs (supported only in git repos). |
129 |
.TP |
130 |
+.BR "\-\-update\-pkg\-desc\-index" |
131 |
+Update the package description index which is located at |
132 |
+\fImetadata/pkg_desc_index\fR in the repository. |
133 |
+.TP |
134 |
.BR "\-\-update\-use\-local\-desc" |
135 |
Update the \fIprofiles/use.local.desc\fR file from metadata.xml. |
136 |
.TP |
137 |
diff --git a/man/emerge.1 b/man/emerge.1 |
138 |
index 2264b58..efd5d41 100644 |
139 |
--- a/man/emerge.1 |
140 |
+++ b/man/emerge.1 |
141 |
@@ -790,6 +790,14 @@ If ebuilds using EAPIs which \fIdo not\fR support \fBHDEPEND\fR are built in |
142 |
the same \fBemerge\fR run as those using EAPIs which \fIdo\fR support |
143 |
\fBHDEPEND\fR, this option affects only the former. |
144 |
.TP |
145 |
+.BR "\-\-search\-index < y | n >" |
146 |
+Enable or disable indexed search for search actions. This option is |
147 |
+enabled by default. The search index needs to be regenerated by |
148 |
+\fBegencache\fR(1) after changes are made to a repository (see the |
149 |
+\fB\-\-update\-pkg\-desc\-index\fR action). This setting can be added |
150 |
+to \fBEMERGE_DEFAULT_OPTS\fR (see \fBmake.conf\fR(5)) and later |
151 |
+overridden via the command line. |
152 |
+.TP |
153 |
.BR "\-\-select [ y | n ] (\-w short option)" |
154 |
Add specified packages to the world set (inverse of |
155 |
\fB\-\-oneshot\fR). This is useful if you want to |
156 |
diff --git a/man/portage.5 b/man/portage.5 |
157 |
index e399f0f..26856d1 100644 |
158 |
--- a/man/portage.5 |
159 |
+++ b/man/portage.5 |
160 |
@@ -75,6 +75,7 @@ user\-defined package sets |
161 |
.BR /usr/portage/metadata/ |
162 |
.nf |
163 |
layout.conf |
164 |
+pkg_desc_index |
165 |
.fi |
166 |
.TP |
167 |
.BR /usr/portage/profiles/ |
168 |
@@ -1110,6 +1111,11 @@ cache\-formats = md5-dict pms |
169 |
profile\-formats = portage-2 |
170 |
.fi |
171 |
.RE |
172 |
+.TP |
173 |
+.BR pkg_desc_index |
174 |
+This is an index of packages and descriptions which may be generated |
175 |
+by \fBegencache\fR(1) in order to optimize \fBemerge\fR(1) search |
176 |
+actions. |
177 |
.RE |
178 |
.TP |
179 |
.BR /usr/portage/profiles/ |
180 |
diff --git a/pym/_emerge/actions.py b/pym/_emerge/actions.py |
181 |
index 4e8b83b..a81212c 100644 |
182 |
--- a/pym/_emerge/actions.py |
183 |
+++ b/pym/_emerge/actions.py |
184 |
@@ -2015,7 +2015,8 @@ def action_search(root_config, myopts, myfiles, spinner): |
185 |
searchinstance = search(root_config, |
186 |
spinner, "--searchdesc" in myopts, |
187 |
"--quiet" not in myopts, "--usepkg" in myopts, |
188 |
- "--usepkgonly" in myopts) |
189 |
+ "--usepkgonly" in myopts, |
190 |
+ search_index = myopts.get("--search-index", "y") != "n") |
191 |
for mysearch in myfiles: |
192 |
try: |
193 |
searchinstance.execute(mysearch) |
194 |
diff --git a/pym/_emerge/main.py b/pym/_emerge/main.py |
195 |
index 3883f72..d403b36 100644 |
196 |
--- a/pym/_emerge/main.py |
197 |
+++ b/pym/_emerge/main.py |
198 |
@@ -616,6 +616,11 @@ def parse_opts(tmpcmdline, silent=False): |
199 |
"choices" :("True", "rdeps") |
200 |
}, |
201 |
|
202 |
+ "--search-index": { |
203 |
+ "help": "Enable or disable indexed search (enabled by default)", |
204 |
+ "choices": y_or_n |
205 |
+ }, |
206 |
+ |
207 |
"--select": { |
208 |
"shortopt" : "-w", |
209 |
"help" : "add specified packages to the world set " + \ |
210 |
diff --git a/pym/_emerge/search.py b/pym/_emerge/search.py |
211 |
index 4b0fd9f..007abf2 100644 |
212 |
--- a/pym/_emerge/search.py |
213 |
+++ b/pym/_emerge/search.py |
214 |
@@ -3,13 +3,17 @@ |
215 |
|
216 |
from __future__ import print_function |
217 |
|
218 |
+import io |
219 |
import re |
220 |
import portage |
221 |
-from portage import os |
222 |
+from portage import os, _encodings |
223 |
from portage.dbapi.porttree import _parse_uri_map |
224 |
+from portage.dep import Atom |
225 |
+from portage.exception import InvalidData |
226 |
from portage.localization import localized_size |
227 |
from portage.output import bold, bold as white, darkgreen, green, red |
228 |
from portage.util import writemsg_stdout |
229 |
+from portage.versions import _pkg_str |
230 |
|
231 |
from _emerge.Package import Package |
232 |
|
233 |
@@ -25,12 +29,11 @@ class search(object): |
234 |
# public interface |
235 |
# |
236 |
def __init__(self, root_config, spinner, searchdesc, |
237 |
- verbose, usepkg, usepkgonly): |
238 |
+ verbose, usepkg, usepkgonly, search_index=True): |
239 |
"""Searches the available and installed packages for the supplied search key. |
240 |
The list of available and installed packages is created at object instantiation. |
241 |
This makes successive searches faster.""" |
242 |
self.settings = root_config.settings |
243 |
- self.vartree = root_config.trees["vartree"] |
244 |
self.spinner = spinner |
245 |
self.verbose = verbose |
246 |
self.searchdesc = searchdesc |
247 |
@@ -45,6 +48,10 @@ class search(object): |
248 |
bindb = root_config.trees["bintree"].dbapi |
249 |
vardb = root_config.trees["vartree"].dbapi |
250 |
|
251 |
+ if search_index: |
252 |
+ portdb = IndexedPortdb(portdb) |
253 |
+ vardb = IndexedVardb(vardb) |
254 |
+ |
255 |
if not usepkgonly and portdb._have_root_eclass_dir: |
256 |
self._dbs.append(portdb) |
257 |
|
258 |
@@ -53,6 +60,7 @@ class search(object): |
259 |
|
260 |
self._dbs.append(vardb) |
261 |
self._portdb = portdb |
262 |
+ self._vardb = vardb |
263 |
|
264 |
def _spinner_update(self): |
265 |
if self.spinner: |
266 |
@@ -97,7 +105,7 @@ class search(object): |
267 |
return {} |
268 |
|
269 |
def _visible(self, db, cpv, metadata): |
270 |
- installed = db is self.vartree.dbapi |
271 |
+ installed = db is self._vardb |
272 |
built = installed or db is not self._portdb |
273 |
pkg_type = "ebuild" |
274 |
if installed: |
275 |
@@ -208,6 +216,20 @@ class search(object): |
276 |
masked=1 |
277 |
self.matches["pkg"].append([package,masked]) |
278 |
elif self.searchdesc: # DESCRIPTION searching |
279 |
+ # Check for DESCRIPTION match first, so that we can skip |
280 |
+ # the expensive visiblity check if it doesn't match. |
281 |
+ full_package = portage.best( |
282 |
+ self._xmatch("match-all", package)) |
283 |
+ try: |
284 |
+ full_desc = self._aux_get( |
285 |
+ full_package, ["DESCRIPTION"])[0] |
286 |
+ except KeyError: |
287 |
+ portage.writemsg( |
288 |
+ "emerge: search: aux_get() failed, skipping\n", |
289 |
+ noiselevel=-1) |
290 |
+ continue |
291 |
+ if not self.searchre.search(full_desc): |
292 |
+ continue |
293 |
full_package = self._xmatch("bestmatch-visible", package) |
294 |
if not full_package: |
295 |
#no match found; we don't want to query description |
296 |
@@ -217,14 +239,8 @@ class search(object): |
297 |
continue |
298 |
else: |
299 |
masked=1 |
300 |
- try: |
301 |
- full_desc = self._aux_get( |
302 |
- full_package, ["DESCRIPTION"])[0] |
303 |
- except KeyError: |
304 |
- print("emerge: search: aux_get() failed, skipping") |
305 |
- continue |
306 |
- if self.searchre.search(full_desc): |
307 |
- self.matches["desc"].append([full_package,masked]) |
308 |
+ |
309 |
+ self.matches["desc"].append((full_package, masked)) |
310 |
|
311 |
self.sdict = self.setconfig.getSets() |
312 |
for setname in self.sdict: |
313 |
@@ -262,7 +278,7 @@ class search(object): |
314 |
bold(self.searchkey) + " ]\n") |
315 |
msg.append("[ Applications found : " + \ |
316 |
bold(str(self.mlen)) + " ]\n\n") |
317 |
- vardb = self.vartree.dbapi |
318 |
+ vardb = self._vardb |
319 |
metadata_keys = set(Package.metadata_keys) |
320 |
metadata_keys.update(["DESCRIPTION", "HOMEPAGE", "LICENSE", "SRC_URI"]) |
321 |
metadata_keys = tuple(metadata_keys) |
322 |
@@ -372,7 +388,11 @@ class search(object): |
323 |
# private interface |
324 |
# |
325 |
def getInstallationStatus(self,package): |
326 |
- installed_package = self.vartree.dep_bestmatch(package) |
327 |
+ installed_package = self._vardb.match(package) |
328 |
+ if installed_package: |
329 |
+ installed_package = installed_package[-1] |
330 |
+ else: |
331 |
+ installed_package = "" |
332 |
result = "" |
333 |
version = self.getVersion(installed_package,search.VERSION_RELEASE) |
334 |
if len(version) > 0: |
335 |
@@ -392,3 +412,153 @@ class search(object): |
336 |
result = "" |
337 |
return result |
338 |
|
339 |
+ |
340 |
+class IndexedPortdb(object): |
341 |
+ """ |
342 |
+ A portdbapi interface that uses a package description index to |
343 |
+ improve performance. If the description index is missing for a |
344 |
+ particular repository, then all metadata for that repository is |
345 |
+ obtained using the normal pordbapi.aux_get method. |
346 |
+ """ |
347 |
+ def __init__(self, portdb): |
348 |
+ self._portdb = portdb |
349 |
+ self.cpv_exists = portdb.cpv_exists |
350 |
+ self.getFetchMap = portdb.getFetchMap |
351 |
+ self.findname = portdb.findname |
352 |
+ self._aux_cache_keys = portdb._aux_cache_keys |
353 |
+ self._have_root_eclass_dir = portdb._have_root_eclass_dir |
354 |
+ self._cpv_sort_ascending = portdb._cpv_sort_ascending |
355 |
+ self._desc_cache = None |
356 |
+ self._cp_map = None |
357 |
+ |
358 |
+ def _init_index(self): |
359 |
+ cp_map = {} |
360 |
+ desc_cache = {} |
361 |
+ for repo_path in self._portdb.porttrees: |
362 |
+ outside_repo = os.path.join(self._portdb.depcachedir, |
363 |
+ repo_path.lstrip(os.sep)) |
364 |
+ for parent_dir in (repo_path, outside_repo): |
365 |
+ file_path = os.path.join(parent_dir, |
366 |
+ "metadata", "pkg_desc_index") |
367 |
+ |
368 |
+ try: |
369 |
+ with io.open(file_path, |
370 |
+ encoding=_encodings["repo.content"]) as f: |
371 |
+ for line in f: |
372 |
+ pkgs, desc = line.split(":", 1) |
373 |
+ desc = desc.strip() |
374 |
+ pkgs = pkgs.split(",") |
375 |
+ if not pkgs[0]: |
376 |
+ continue |
377 |
+ try: |
378 |
+ pkg = _pkg_str(pkgs[0]) |
379 |
+ except InvalidData: |
380 |
+ continue |
381 |
+ cp_list = cp_map.get(pkg.cp) |
382 |
+ if cp_list is None: |
383 |
+ cp_list = [] |
384 |
+ cp_map[pkg.cp] = cp_list |
385 |
+ cp_list.append(pkg) |
386 |
+ for ver in pkgs[1:]: |
387 |
+ try: |
388 |
+ cp_list.append( |
389 |
+ _pkg_str(pkg.cp + "-" + ver)) |
390 |
+ except InvalidData: |
391 |
+ pass |
392 |
+ for cpv in cp_list: |
393 |
+ desc_cache[cpv] = desc |
394 |
+ except IOError: |
395 |
+ pass |
396 |
+ else: |
397 |
+ break |
398 |
+ else: |
399 |
+ # No descriptions index was found, so populate |
400 |
+ # cp_map the slow way. |
401 |
+ for cp in self._portdb.cp_all(trees=[repo_path]): |
402 |
+ cp_list = cp_map.get(cp) |
403 |
+ if cp_list is None: |
404 |
+ cp_list = [] |
405 |
+ cp_map[cp] = cp_list |
406 |
+ for cpv in self._portdb.cp_list(cp, mytree=repo_path): |
407 |
+ if cpv not in cp_list: |
408 |
+ cp_list.append(_pkg_str(cpv)) |
409 |
+ |
410 |
+ self._desc_cache = desc_cache |
411 |
+ self._cp_map = cp_map |
412 |
+ |
413 |
+ def cp_all(self): |
414 |
+ if self._cp_map is None: |
415 |
+ self._init_index() |
416 |
+ return list(self._cp_map) |
417 |
+ |
418 |
+ def match(self, atom): |
419 |
+ if not isinstance(atom, Atom): |
420 |
+ atom = Atom(atom) |
421 |
+ cp_list = self._cp_map.get(atom.cp) |
422 |
+ if cp_list is None: |
423 |
+ return [] |
424 |
+ self._portdb._cpv_sort_ascending(cp_list) |
425 |
+ return portage.match_from_list(atom, cp_list) |
426 |
+ |
427 |
+ def aux_get(self, cpv, attrs, myrepo = None): |
428 |
+ if len(attrs) == 1 and attrs[0] == "DESCRIPTION": |
429 |
+ try: |
430 |
+ return [self._desc_cache[cpv]] |
431 |
+ except KeyError: |
432 |
+ pass |
433 |
+ return self._portdb.aux_get(cpv, attrs) |
434 |
+ |
435 |
+ |
436 |
+class IndexedVardb(object): |
437 |
+ """ |
438 |
+ A vardbapi interface that sacrifices validation in order to |
439 |
+ improve performance. It takes advantage of vardbdbapi._aux_cache, |
440 |
+ which is backed by vardb_metadata.pickle. Since _aux_cache is |
441 |
+ not updated for every single merge/unmerge (see |
442 |
+ _aux_cache_threshold), the list of packages is obtained directly |
443 |
+ from the real vardbapi instance. If a package is missing from |
444 |
+ _aux_cache, then its metadata is obtained using the normal |
445 |
+ (validated) vardbapi.aux_get method. |
446 |
+ """ |
447 |
+ def __init__(self, vardb): |
448 |
+ self._vardb = vardb |
449 |
+ self._aux_cache_keys = vardb._aux_cache_keys |
450 |
+ self._cpv_sort_ascending = vardb._cpv_sort_ascending |
451 |
+ self._cp_map = {} |
452 |
+ self.cpv_exists = vardb.cpv_exists |
453 |
+ |
454 |
+ def cp_all(self): |
455 |
+ if self._cp_map: |
456 |
+ return list(self._cp_map) |
457 |
+ cp_map = self._cp_map |
458 |
+ for cpv in self._vardb.cpv_all(): |
459 |
+ cp = portage.cpv_getkey(cpv) |
460 |
+ if cp is not None: |
461 |
+ cp_list = cp_map.get(cp) |
462 |
+ if cp_list is None: |
463 |
+ cp_list = [] |
464 |
+ cp_map[cp] = cp_list |
465 |
+ cp_list.append(_pkg_str(cpv)) |
466 |
+ return list(cp_map) |
467 |
+ |
468 |
+ def match(self, atom): |
469 |
+ if not isinstance(atom, Atom): |
470 |
+ atom = Atom(atom) |
471 |
+ cp_list = self._cp_map.get(atom.cp) |
472 |
+ if cp_list is None: |
473 |
+ return [] |
474 |
+ self._vardb._cpv_sort_ascending(cp_list) |
475 |
+ return portage.match_from_list(atom, cp_list) |
476 |
+ |
477 |
+ def aux_get(self, cpv, attrs, myrepo = None): |
478 |
+ pkg_data = self._vardb._aux_cache["packages"].get(cpv) |
479 |
+ if not isinstance(pkg_data, tuple) or \ |
480 |
+ len(pkg_data) != 2 or \ |
481 |
+ not isinstance(pkg_data[1], dict): |
482 |
+ pkg_data = None |
483 |
+ if pkg_data is None: |
484 |
+ # It may be missing from _aux_cache due to |
485 |
+ # _aux_cache_threshold. |
486 |
+ return self._vardb.aux_get(cpv, attrs) |
487 |
+ metadata = pkg_data[1] |
488 |
+ return [metadata.get(k, "") for k in attrs] |
489 |
-- |
490 |
2.0.4 |