1 |
This adds an egencache --update-pkg-desc-index action which generates |
2 |
a plain-text index of package names, versions, and descriptions. The |
3 |
index can then be used to optimize emerge --search / --searchdesc |
4 |
actions. If the package description index is missing from a particular |
5 |
repository, then all metadata for that repository is obtained using the |
6 |
normal pordbapi.aux_get method. |
7 |
|
8 |
Searching of installed packages is optimized to take advantage of |
9 |
vardbdbapi._aux_cache, which is backed by vardb_metadata.pickle. |
10 |
See the IndexedVardb docstring some more details. |
11 |
|
12 |
X-Gentoo-Bug: 525718 |
13 |
X-Gentoo-Bug-URL: https://bugs.gentoo.org/show_bug.cgi?id=525718 |
14 |
--- |
15 |
bin/egencache | 43 ++++++++++- |
16 |
man/egencache.1 | 4 ++ |
17 |
man/portage.5 | 6 ++ |
18 |
pym/_emerge/search.py | 196 ++++++++++++++++++++++++++++++++++++++++++++++---- |
19 |
4 files changed, 232 insertions(+), 17 deletions(-) |
20 |
|
21 |
diff --git a/bin/egencache b/bin/egencache |
22 |
index e366058..90d5e68 100755 |
23 |
--- a/bin/egencache |
24 |
+++ b/bin/egencache |
25 |
@@ -57,7 +57,7 @@ from portage.util._async.run_main_scheduler import run_main_scheduler |
26 |
from portage.util._eventloop.global_event_loop import global_event_loop |
27 |
from portage import cpv_getkey |
28 |
from portage.dep import Atom, isjustname |
29 |
-from portage.versions import pkgsplit, vercmp |
30 |
+from portage.versions import pkgsplit, vercmp, _pkg_str |
31 |
|
32 |
try: |
33 |
from xml.etree import ElementTree |
34 |
@@ -91,6 +91,9 @@ def parse_args(args): |
35 |
actions.add_argument("--update-changelogs", |
36 |
action="store_true", |
37 |
help="update the ChangeLog files from SCM logs") |
38 |
+ actions.add_argument("--update-pkg-desc-index", |
39 |
+ action="store_true", |
40 |
+ help="update package description index") |
41 |
actions.add_argument("--update-manifests", |
42 |
action="store_true", |
43 |
help="update manifests") |
44 |
@@ -451,6 +454,35 @@ class GenCache(object): |
45 |
if hasattr(trg_cache, '_prune_empty_dirs'): |
46 |
trg_cache._prune_empty_dirs() |
47 |
|
48 |
+class GenPkgDescIndex(object): |
49 |
+ def __init__(self, portdb, output_file): |
50 |
+ self.returncode = os.EX_OK |
51 |
+ self._portdb = portdb |
52 |
+ self._output_file = output_file |
53 |
+ |
54 |
+ def run(self): |
55 |
+ |
56 |
+ portage.util.ensure_dirs(os.path.dirname(self._output_file)) |
57 |
+ f = portage.util.atomic_ofstream(self._output_file, |
58 |
+ encoding=_encodings["repo.content"]) |
59 |
+ |
60 |
+ portdb = self._portdb |
61 |
+ for cp in portdb.cp_all(): |
62 |
+ pkgs = portdb.cp_list(cp) |
63 |
+ if not pkgs: |
64 |
+ continue |
65 |
+ desc, = portdb.aux_get(pkgs[-1], ["DESCRIPTION"]) |
66 |
+ |
67 |
+ if len(pkgs) == 1: |
68 |
+ output = "%s: %s\n" % (pkgs[0], desc) |
69 |
+ else: |
70 |
+ output = "%s,%s: %s\n" % (pkgs[0], |
71 |
+ ",".join(_pkg_str(cpv).version |
72 |
+ for cpv in pkgs[1:]), desc) |
73 |
+ f.write(output) |
74 |
+ |
75 |
+ f.close() |
76 |
+ |
77 |
class GenUseLocalDesc(object): |
78 |
def __init__(self, portdb, output=None, |
79 |
preserve_comments=False): |
80 |
@@ -893,7 +925,8 @@ def egencache_main(args): |
81 |
local_config=False, env=env) |
82 |
|
83 |
if not (options.update or options.update_use_local_desc or |
84 |
- options.update_changelogs or options.update_manifests): |
85 |
+ options.update_changelogs or options.update_manifests or |
86 |
+ options.update_pkg_desc_index): |
87 |
parser.error('No action specified') |
88 |
return 1 |
89 |
|
90 |
@@ -1057,6 +1090,12 @@ def egencache_main(args): |
91 |
else: |
92 |
ret.append(scheduler.returncode) |
93 |
|
94 |
+ if options.update_pkg_desc_index: |
95 |
+ gen_index = GenPkgDescIndex(portdb, os.path.join( |
96 |
+ repo_config.location, "metadata", "pkg_desc_index")) |
97 |
+ gen_index.run() |
98 |
+ ret.append(gen_index.returncode) |
99 |
+ |
100 |
if options.update_use_local_desc: |
101 |
gen_desc = GenUseLocalDesc(portdb, |
102 |
output=options.uld_output, |
103 |
diff --git a/man/egencache.1 b/man/egencache.1 |
104 |
index f71feb3..3a3197f 100644 |
105 |
--- a/man/egencache.1 |
106 |
+++ b/man/egencache.1 |
107 |
@@ -19,6 +19,10 @@ for the details on package atom syntax. |
108 |
.BR "\-\-update\-changelogs" |
109 |
Update the ChangeLog files from SCM logs (supported only in git repos). |
110 |
.TP |
111 |
+.BR "\-\-update\-pkg\-desc\-index" |
112 |
+Update the package description index which is located at |
113 |
+\fImetadata/pkg_desc_index\fR in the repository. |
114 |
+.TP |
115 |
.BR "\-\-update\-use\-local\-desc" |
116 |
Update the \fIprofiles/use.local.desc\fR file from metadata.xml. |
117 |
.TP |
118 |
diff --git a/man/portage.5 b/man/portage.5 |
119 |
index e399f0f..26856d1 100644 |
120 |
--- a/man/portage.5 |
121 |
+++ b/man/portage.5 |
122 |
@@ -75,6 +75,7 @@ user\-defined package sets |
123 |
.BR /usr/portage/metadata/ |
124 |
.nf |
125 |
layout.conf |
126 |
+pkg_desc_index |
127 |
.fi |
128 |
.TP |
129 |
.BR /usr/portage/profiles/ |
130 |
@@ -1110,6 +1111,11 @@ cache\-formats = md5-dict pms |
131 |
profile\-formats = portage-2 |
132 |
.fi |
133 |
.RE |
134 |
+.TP |
135 |
+.BR pkg_desc_index |
136 |
+This is an index of packages and descriptions which may be generated |
137 |
+by \fBegencache\fR(1) in order to optimize \fBemerge\fR(1) search |
138 |
+actions. |
139 |
.RE |
140 |
.TP |
141 |
.BR /usr/portage/profiles/ |
142 |
diff --git a/pym/_emerge/search.py b/pym/_emerge/search.py |
143 |
index 4b0fd9f..bf15f11 100644 |
144 |
--- a/pym/_emerge/search.py |
145 |
+++ b/pym/_emerge/search.py |
146 |
@@ -3,13 +3,17 @@ |
147 |
|
148 |
from __future__ import print_function |
149 |
|
150 |
+import io |
151 |
import re |
152 |
import portage |
153 |
-from portage import os |
154 |
+from portage import os, _encodings |
155 |
from portage.dbapi.porttree import _parse_uri_map |
156 |
+from portage.dep import Atom |
157 |
+from portage.exception import InvalidData |
158 |
from portage.localization import localized_size |
159 |
from portage.output import bold, bold as white, darkgreen, green, red |
160 |
from portage.util import writemsg_stdout |
161 |
+from portage.versions import _pkg_str |
162 |
|
163 |
from _emerge.Package import Package |
164 |
|
165 |
@@ -30,7 +34,6 @@ class search(object): |
166 |
The list of available and installed packages is created at object instantiation. |
167 |
This makes successive searches faster.""" |
168 |
self.settings = root_config.settings |
169 |
- self.vartree = root_config.trees["vartree"] |
170 |
self.spinner = spinner |
171 |
self.verbose = verbose |
172 |
self.searchdesc = searchdesc |
173 |
@@ -41,9 +44,9 @@ class search(object): |
174 |
|
175 |
self._dbs = [] |
176 |
|
177 |
- portdb = root_config.trees["porttree"].dbapi |
178 |
+ portdb = IndexedPortdb(root_config.trees["porttree"].dbapi) |
179 |
bindb = root_config.trees["bintree"].dbapi |
180 |
- vardb = root_config.trees["vartree"].dbapi |
181 |
+ vardb = IndexedVardb(root_config.trees["vartree"].dbapi) |
182 |
|
183 |
if not usepkgonly and portdb._have_root_eclass_dir: |
184 |
self._dbs.append(portdb) |
185 |
@@ -53,6 +56,7 @@ class search(object): |
186 |
|
187 |
self._dbs.append(vardb) |
188 |
self._portdb = portdb |
189 |
+ self._vardb = vardb |
190 |
|
191 |
def _spinner_update(self): |
192 |
if self.spinner: |
193 |
@@ -97,7 +101,7 @@ class search(object): |
194 |
return {} |
195 |
|
196 |
def _visible(self, db, cpv, metadata): |
197 |
- installed = db is self.vartree.dbapi |
198 |
+ installed = db is self._vardb |
199 |
built = installed or db is not self._portdb |
200 |
pkg_type = "ebuild" |
201 |
if installed: |
202 |
@@ -208,6 +212,20 @@ class search(object): |
203 |
masked=1 |
204 |
self.matches["pkg"].append([package,masked]) |
205 |
elif self.searchdesc: # DESCRIPTION searching |
206 |
+ # Check for DESCRIPTION match first, so that we can skip |
207 |
+ # the expensive visiblity check if it doesn't match. |
208 |
+ full_package = portage.best( |
209 |
+ self._xmatch("match-all", package)) |
210 |
+ try: |
211 |
+ full_desc = self._aux_get( |
212 |
+ full_package, ["DESCRIPTION"])[0] |
213 |
+ except KeyError: |
214 |
+ portage.writemsg( |
215 |
+ "emerge: search: aux_get() failed, skipping\n", |
216 |
+ noiselevel=-1) |
217 |
+ continue |
218 |
+ if not self.searchre.search(full_desc): |
219 |
+ continue |
220 |
full_package = self._xmatch("bestmatch-visible", package) |
221 |
if not full_package: |
222 |
#no match found; we don't want to query description |
223 |
@@ -217,14 +235,8 @@ class search(object): |
224 |
continue |
225 |
else: |
226 |
masked=1 |
227 |
- try: |
228 |
- full_desc = self._aux_get( |
229 |
- full_package, ["DESCRIPTION"])[0] |
230 |
- except KeyError: |
231 |
- print("emerge: search: aux_get() failed, skipping") |
232 |
- continue |
233 |
- if self.searchre.search(full_desc): |
234 |
- self.matches["desc"].append([full_package,masked]) |
235 |
+ |
236 |
+ self.matches["desc"].append((full_package, masked)) |
237 |
|
238 |
self.sdict = self.setconfig.getSets() |
239 |
for setname in self.sdict: |
240 |
@@ -262,7 +274,7 @@ class search(object): |
241 |
bold(self.searchkey) + " ]\n") |
242 |
msg.append("[ Applications found : " + \ |
243 |
bold(str(self.mlen)) + " ]\n\n") |
244 |
- vardb = self.vartree.dbapi |
245 |
+ vardb = self._vardb |
246 |
metadata_keys = set(Package.metadata_keys) |
247 |
metadata_keys.update(["DESCRIPTION", "HOMEPAGE", "LICENSE", "SRC_URI"]) |
248 |
metadata_keys = tuple(metadata_keys) |
249 |
@@ -372,7 +384,11 @@ class search(object): |
250 |
# private interface |
251 |
# |
252 |
def getInstallationStatus(self,package): |
253 |
- installed_package = self.vartree.dep_bestmatch(package) |
254 |
+ installed_package = self._vardb.match(package) |
255 |
+ if installed_package: |
256 |
+ installed_package = installed_package[-1] |
257 |
+ else: |
258 |
+ installed_package = "" |
259 |
result = "" |
260 |
version = self.getVersion(installed_package,search.VERSION_RELEASE) |
261 |
if len(version) > 0: |
262 |
@@ -392,3 +408,153 @@ class search(object): |
263 |
result = "" |
264 |
return result |
265 |
|
266 |
+ |
267 |
+class IndexedPortdb(object): |
268 |
+ """ |
269 |
+ A portdbapi interface that uses a package description index to |
270 |
+ improve performance. If the description index is missing for a |
271 |
+ particular repository, then all metadata for that repository is |
272 |
+ obtained using the normal pordbapi.aux_get method. |
273 |
+ """ |
274 |
+ def __init__(self, portdb): |
275 |
+ self._portdb = portdb |
276 |
+ self.cpv_exists = portdb.cpv_exists |
277 |
+ self.getFetchMap = portdb.getFetchMap |
278 |
+ self.findname = portdb.findname |
279 |
+ self._aux_cache_keys = portdb._aux_cache_keys |
280 |
+ self._have_root_eclass_dir = portdb._have_root_eclass_dir |
281 |
+ self._cpv_sort_ascending = portdb._cpv_sort_ascending |
282 |
+ self._desc_cache = None |
283 |
+ self._cp_map = None |
284 |
+ |
285 |
+ def _init_index(self): |
286 |
+ cp_map = {} |
287 |
+ desc_cache = {} |
288 |
+ for repo_path in self._portdb.porttrees: |
289 |
+ outside_repo = os.path.join(self._portdb.depcachedir, |
290 |
+ repo_path.lstrip(os.sep)) |
291 |
+ for parent_dir in (repo_path, outside_repo): |
292 |
+ file_path = os.path.join(parent_dir, |
293 |
+ "metadata", "pkg_desc_index") |
294 |
+ |
295 |
+ try: |
296 |
+ with io.open(file_path, |
297 |
+ encoding=_encodings["repo.content"]) as f: |
298 |
+ for line in f: |
299 |
+ pkgs, desc = line.split(":", 1) |
300 |
+ desc = desc.strip() |
301 |
+ pkgs = pkgs.split(",") |
302 |
+ if not pkgs[0]: |
303 |
+ continue |
304 |
+ try: |
305 |
+ pkg = _pkg_str(pkgs[0]) |
306 |
+ except InvalidData: |
307 |
+ continue |
308 |
+ cp_list = cp_map.get(pkg.cp) |
309 |
+ if cp_list is None: |
310 |
+ cp_list = [] |
311 |
+ cp_map[pkg.cp] = cp_list |
312 |
+ cp_list.append(pkg) |
313 |
+ for ver in pkgs[1:]: |
314 |
+ try: |
315 |
+ cp_list.append( |
316 |
+ _pkg_str(pkg.cp + "-" + ver)) |
317 |
+ except InvalidData: |
318 |
+ pass |
319 |
+ for cpv in cp_list: |
320 |
+ desc_cache[cpv] = desc |
321 |
+ except IOError: |
322 |
+ pass |
323 |
+ else: |
324 |
+ break |
325 |
+ else: |
326 |
+ # No descriptions index was found, so populate |
327 |
+ # cp_map the slow way. |
328 |
+ for cp in self._portdb.cp_all(trees=[repo_path]): |
329 |
+ cp_list = cp_map.get(cp) |
330 |
+ if cp_list is None: |
331 |
+ cp_list = [] |
332 |
+ cp_map[cp] = cp_list |
333 |
+ for cpv in self._portdb.cp_list(cp, mytree=repo_path): |
334 |
+ if cpv not in cp_list: |
335 |
+ cp_list.append(_pkg_str(cpv)) |
336 |
+ |
337 |
+ self._desc_cache = desc_cache |
338 |
+ self._cp_map = cp_map |
339 |
+ |
340 |
+ def cp_all(self): |
341 |
+ if self._cp_map is None: |
342 |
+ self._init_index() |
343 |
+ return list(self._cp_map) |
344 |
+ |
345 |
+ def match(self, atom): |
346 |
+ if not isinstance(atom, Atom): |
347 |
+ atom = Atom(atom) |
348 |
+ cp_list = self._cp_map.get(atom.cp) |
349 |
+ if cp_list is None: |
350 |
+ return [] |
351 |
+ self._portdb._cpv_sort_ascending(cp_list) |
352 |
+ return portage.match_from_list(atom, cp_list) |
353 |
+ |
354 |
+ def aux_get(self, cpv, attrs, myrepo = None): |
355 |
+ if len(attrs) == 1 and attrs[0] == "DESCRIPTION": |
356 |
+ try: |
357 |
+ return [self._desc_cache[cpv]] |
358 |
+ except KeyError: |
359 |
+ pass |
360 |
+ return self._portdb.aux_get(cpv, attrs) |
361 |
+ |
362 |
+ |
363 |
+class IndexedVardb(object): |
364 |
+ """ |
365 |
+ A vardbapi interface that sacrifices validation in order to |
366 |
+ improve performance. It takes advantage of vardbdbapi._aux_cache, |
367 |
+ which is backed by vardb_metadata.pickle. Since _aux_cache is |
368 |
+ not updated for every single merge/unmerge (see |
369 |
+ _aux_cache_threshold), the list of packages is obtained directly |
370 |
+ from the real vardbapi instance. If a package is missing from |
371 |
+ _aux_cache, then its metadata is obtained using the normal |
372 |
+ (validated) vardbapi.aux_get method. |
373 |
+ """ |
374 |
+ def __init__(self, vardb): |
375 |
+ self._vardb = vardb |
376 |
+ self._aux_cache_keys = vardb._aux_cache_keys |
377 |
+ self._cpv_sort_ascending = vardb._cpv_sort_ascending |
378 |
+ self._cp_map = {} |
379 |
+ self.cpv_exists = vardb.cpv_exists |
380 |
+ |
381 |
+ def cp_all(self): |
382 |
+ if self._cp_map: |
383 |
+ return list(self._cp_map) |
384 |
+ cp_map = self._cp_map |
385 |
+ for cpv in self._vardb.cpv_all(): |
386 |
+ cp = portage.cpv_getkey(cpv) |
387 |
+ if cp is not None: |
388 |
+ cp_list = cp_map.get(cp) |
389 |
+ if cp_list is None: |
390 |
+ cp_list = [] |
391 |
+ cp_map[cp] = cp_list |
392 |
+ cp_list.append(_pkg_str(cpv)) |
393 |
+ return list(cp_map) |
394 |
+ |
395 |
+ def match(self, atom): |
396 |
+ if not isinstance(atom, Atom): |
397 |
+ atom = Atom(atom) |
398 |
+ cp_list = self._cp_map.get(atom.cp) |
399 |
+ if cp_list is None: |
400 |
+ return [] |
401 |
+ self._vardb._cpv_sort_ascending(cp_list) |
402 |
+ return portage.match_from_list(atom, cp_list) |
403 |
+ |
404 |
+ def aux_get(self, cpv, attrs, myrepo = None): |
405 |
+ pkg_data = self._vardb._aux_cache["packages"].get(cpv) |
406 |
+ if not isinstance(pkg_data, tuple) or \ |
407 |
+ len(pkg_data) != 2 or \ |
408 |
+ not isinstance(pkg_data[1], dict): |
409 |
+ pkg_data = None |
410 |
+ if pkg_data is None: |
411 |
+ # It may be missing from _aux_cache due to |
412 |
+ # _aux_cache_threshold. |
413 |
+ return self._vardb.aux_get(cpv, attrs) |
414 |
+ metadata = pkg_data[1] |
415 |
+ return [metadata.get(k, "") for k in attrs] |
416 |
-- |
417 |
2.0.4 |