1 |
On Sun, 19 Oct 2014 14:51:05 -0700 |
2 |
Zac Medico <zmedico@g.o> wrote: |
3 |
|
4 |
> This updated patch changes the index format to use spaces instead of |
5 |
> commas, for readability. This example given in man/portage.5: |
6 |
> |
7 |
> sys-apps/sed 4.2 4.2.1 4.2.1-r1 4.2.2: Super-useful stream editor |
8 |
> sys-apps/usleep 0.1: A wrapper for usleep |
9 |
> |
10 |
> Hopeful057,6 +1086,12 @@ def egencache_main(args): |
11 |
> |
12 |
... |
13 |
/snip |
14 |
|
15 |
All the above looks good to me |
16 |
|
17 |
|
18 |
|
19 |
|
20 |
|
21 |
> b/pym/_emerge/search.py index 4b0fd9f..37fee20 100644 |
22 |
> --- a/pym/_emerge/search.py |
23 |
> +++ b/pym/_emerge/search.py |
24 |
> @@ -3,13 +3,17 @@ |
25 |
> |
26 |
> from __future__ import print_function |
27 |
> |
28 |
> +import io |
29 |
> import re |
30 |
> import portage |
31 |
> -from portage import os |
32 |
> +from portage import os, _encodings |
33 |
> from portage.dbapi.porttree import _parse_uri_map |
34 |
> +from portage.dep import Atom |
35 |
> +from portage.exception import InvalidAtom, InvalidData |
36 |
> from portage.localization import localized_size |
37 |
> from portage.output import bold, bold as white, darkgreen, green, |
38 |
> red from portage.util import writemsg_stdout |
39 |
> +from portage.versions import _pkg_str |
40 |
> |
41 |
> from _emerge.Package import Package |
42 |
> |
43 |
> @@ -25,12 +29,11 @@ class search(object): |
44 |
> # public interface |
45 |
> # |
46 |
> def __init__(self, root_config, spinner, searchdesc, |
47 |
> - verbose, usepkg, usepkgonly): |
48 |
> + verbose, usepkg, usepkgonly, search_index = True): |
49 |
> """Searches the available and installed packages for |
50 |
> the supplied search key. The list of available and installed packages |
51 |
> is created at object instantiation. This makes successive searches |
52 |
> faster.""" self.settings = root_config.settings |
53 |
> - self.vartree = root_config.trees["vartree"] |
54 |
> self.spinner = spinner |
55 |
> self.verbose = verbose |
56 |
> self.searchdesc = searchdesc |
57 |
> @@ -45,6 +48,10 @@ class search(object): |
58 |
> bindb = root_config.trees["bintree"].dbapi |
59 |
> vardb = root_config.trees["vartree"].dbapi |
60 |
> |
61 |
> + if search_index: |
62 |
> + portdb = IndexedPortdb(portdb) |
63 |
> + vardb = IndexedVardb(vardb) |
64 |
> + |
65 |
> if not usepkgonly and portdb._have_root_eclass_dir: |
66 |
> self._dbs.append(portdb) |
67 |
> |
68 |
> @@ -53,6 +60,7 @@ class search(object): |
69 |
> |
70 |
> self._dbs.append(vardb) |
71 |
> self._portdb = portdb |
72 |
> + self._vardb = vardb |
73 |
> |
74 |
> def _spinner_update(self): |
75 |
> if self.spinner: |
76 |
> @@ -97,7 +105,7 @@ class search(object): |
77 |
> return {} |
78 |
> |
79 |
> def _visible(self, db, cpv, metadata): |
80 |
> - installed = db is self.vartree.dbapi |
81 |
> + installed = db is self._vardb |
82 |
> built = installed or db is not self._portdb |
83 |
> pkg_type = "ebuild" |
84 |
> if installed: |
85 |
> @@ -208,6 +216,22 @@ class search(object): |
86 |
> masked=1 |
87 |
> self.matches["pkg"].append([package,masked]) |
88 |
> elif self.searchdesc: # DESCRIPTION searching |
89 |
> + # Check for DESCRIPTION match first, |
90 |
> so that we can skip |
91 |
> + # the expensive visiblity check if |
92 |
> it doesn't match. |
93 |
> + full_package = |
94 |
> self._xmatch("match-all", package) |
95 |
> + if not full_package: |
96 |
> + continue |
97 |
> + full_package = full_package[-1] |
98 |
> + try: |
99 |
> + full_desc = self._aux_get( |
100 |
> + full_package, |
101 |
> ["DESCRIPTION"])[0] |
102 |
> + except KeyError: |
103 |
> + portage.writemsg( |
104 |
> + "emerge: search: |
105 |
> aux_get() failed, skipping\n", |
106 |
> + noiselevel=-1) |
107 |
> + continue |
108 |
> + if not |
109 |
> self.searchre.search(full_desc): |
110 |
> + continue |
111 |
> full_package = |
112 |
> self._xmatch("bestmatch-visible", package) if not full_package: |
113 |
> #no match found; we don't |
114 |
> want to query description @@ -217,14 +241,8 @@ class search(object): |
115 |
> continue |
116 |
> else: |
117 |
> masked=1 |
118 |
> - try: |
119 |
> - full_desc = self._aux_get( |
120 |
> - full_package, |
121 |
> ["DESCRIPTION"])[0] |
122 |
> - except KeyError: |
123 |
> - print("emerge: search: |
124 |
> aux_get() failed, skipping") |
125 |
> - continue |
126 |
> - if self.searchre.search(full_desc): |
127 |
> - |
128 |
> self.matches["desc"].append([full_package,masked]) + |
129 |
> + |
130 |
> self.matches["desc"].append((full_package, masked)) |
131 |
> self.sdict = self.setconfig.getSets() |
132 |
> for setname in self.sdict: |
133 |
> @@ -262,7 +280,7 @@ class search(object): |
134 |
> bold(self.searchkey) + " ]\n") |
135 |
> msg.append("[ Applications found : " + \ |
136 |
> bold(str(self.mlen)) + " ]\n\n") |
137 |
> - vardb = self.vartree.dbapi |
138 |
> + vardb = self._vardb |
139 |
> metadata_keys = set(Package.metadata_keys) |
140 |
> metadata_keys.update(["DESCRIPTION", "HOMEPAGE", |
141 |
> "LICENSE", "SRC_URI"]) metadata_keys = tuple(metadata_keys) |
142 |
> @@ -372,7 +390,11 @@ class search(object): |
143 |
> # private interface |
144 |
> # |
145 |
> def getInstallationStatus(self,package): |
146 |
> - installed_package = |
147 |
> self.vartree.dep_bestmatch(package) |
148 |
> + installed_package = self._vardb.match(package) |
149 |
> + if installed_package: |
150 |
> + installed_package = installed_package[-1] |
151 |
> + else: |
152 |
> + installed_package = "" |
153 |
> result = "" |
154 |
> version = |
155 |
> self.getVersion(installed_package,search.VERSION_RELEASE) if |
156 |
> len(version) > 0: @@ -392,3 +414,160 @@ class search(object): |
157 |
> result = "" |
158 |
> return result |
159 |
> |
160 |
|
161 |
|
162 |
|
163 |
|
164 |
|
165 |
|
166 |
|
167 |
|
168 |
What I wonder, is why the following two classes aren't in the portage |
169 |
namespace. There is far too much logic embedded in the _emerge |
170 |
namespace. Most probably under the portage/dpapi subpkg. Looking at |
171 |
them, they do look very similar to the portdbapi and vardbapi classes. |
172 |
They are just stripped down and optimised for this data. They also |
173 |
don't seem to use any _emerge specific namespace modules that I saw. |
174 |
|
175 |
Perhaps with a file name of index.py or indexers.py |
176 |
|
177 |
|
178 |
> + |
179 |
> +class IndexedPortdb(object): |
180 |
> + """ |
181 |
> + A portdbapi interface that uses a package description index |
182 |
> to |
183 |
|
184 |
|
185 |
*** See ^^^ even the second word of the class description seems |
186 |
to agree with me :) |
187 |
|
188 |
|
189 |
> + improve performance. If the description index is missing for |
190 |
> a |
191 |
> + particular repository, then all metadata for that repository |
192 |
> is |
193 |
> + obtained using the normal pordbapi.aux_get method. |
194 |
> + """ |
195 |
> + def __init__(self, portdb): |
196 |
> + self._portdb = portdb |
197 |
> + self.cpv_exists = portdb.cpv_exists |
198 |
> + self.getFetchMap = portdb.getFetchMap |
199 |
> + self.findname = portdb.findname |
200 |
> + self._aux_cache_keys = portdb._aux_cache_keys |
201 |
> + self._have_root_eclass_dir = |
202 |
> portdb._have_root_eclass_dir |
203 |
> + self._cpv_sort_ascending = portdb._cpv_sort_ascending |
204 |
> + self._desc_cache = None |
205 |
> + self._cp_map = None |
206 |
> + |
207 |
> + def _init_index(self): |
208 |
> + cp_map = {} |
209 |
> + desc_cache = {} |
210 |
> + for repo_path in self._portdb.porttrees: |
211 |
> + outside_repo = |
212 |
> os.path.join(self._portdb.depcachedir, |
213 |
> + repo_path.lstrip(os.sep)) |
214 |
> + for parent_dir in (repo_path, outside_repo): |
215 |
> + file_path = os.path.join(parent_dir, |
216 |
> + "metadata", "pkg_desc_index") |
217 |
> + |
218 |
> + try: |
219 |
> + with io.open(file_path, |
220 |
> + |
221 |
> encoding=_encodings["repo.content"]) as f: |
222 |
> + for line in f: |
223 |
> + try: |
224 |
> + |
225 |
> pkgs, desc = line.split(":", 1) |
226 |
> + except |
227 |
> ValueError: |
228 |
> + |
229 |
> continue |
230 |
> + desc = |
231 |
> desc.strip() |
232 |
> + try: |
233 |
> + cp, |
234 |
> pkgs = pkgs.split(" ", 1) |
235 |
> + except |
236 |
> ValueError: |
237 |
> + |
238 |
> continue |
239 |
> + if not cp: |
240 |
> + |
241 |
> continue |
242 |
> + try: |
243 |
> + atom |
244 |
> = Atom(cp) |
245 |
> + except |
246 |
> InvalidAtom: |
247 |
> + |
248 |
> continue |
249 |
> + if cp != |
250 |
> atom.cp: |
251 |
> + |
252 |
> continue |
253 |
> + cp_list = |
254 |
> cp_map.get(cp) |
255 |
> + if cp_list |
256 |
> is None: |
257 |
> + |
258 |
> cp_list = [] |
259 |
> + |
260 |
> cp_map[cp] = cp_list |
261 |
> + for ver in |
262 |
> pkgs.split(): |
263 |
> + try: |
264 |
> + |
265 |
> cpv = _pkg_str(cp + "-" + ver) |
266 |
> + |
267 |
> except InvalidData: |
268 |
> + |
269 |
> pass |
270 |
> + else: |
271 |
> + |
272 |
> cp_list.append(cpv) |
273 |
> + |
274 |
> desc_cache[cpv] = desc |
275 |
> + except IOError: |
276 |
> + pass |
277 |
> + else: |
278 |
> + break |
279 |
> + else: |
280 |
> + # No descriptions index was found, |
281 |
> so populate |
282 |
> + # cp_map the slow way. |
283 |
> + for cp in |
284 |
> self._portdb.cp_all(trees=[repo_path]): |
285 |
> + cp_list = cp_map.get(cp) |
286 |
> + if cp_list is None: |
287 |
> + cp_list = [] |
288 |
> + cp_map[cp] = cp_list |
289 |
> + for cpv in |
290 |
> self._portdb.cp_list(cp, mytree=repo_path): |
291 |
> + if cpv not in |
292 |
> cp_list: |
293 |
> + |
294 |
> cp_list.append(_pkg_str(cpv)) + |
295 |
> + self._desc_cache = desc_cache |
296 |
> + self._cp_map = cp_map |
297 |
> + |
298 |
> + def cp_all(self): |
299 |
> + if self._cp_map is None: |
300 |
> + self._init_index() |
301 |
> + return list(self._cp_map) |
302 |
> + |
303 |
> + def match(self, atom): |
304 |
> + if not isinstance(atom, Atom): |
305 |
> + atom = Atom(atom) |
306 |
> + cp_list = self._cp_map.get(atom.cp) |
307 |
> + if cp_list is None: |
308 |
> + return [] |
309 |
> + self._portdb._cpv_sort_ascending(cp_list) |
310 |
> + return portage.match_from_list(atom, cp_list) |
311 |
> + |
312 |
> + def aux_get(self, cpv, attrs, myrepo = None): |
313 |
> + if len(attrs) == 1 and attrs[0] == "DESCRIPTION": |
314 |
> + try: |
315 |
> + return [self._desc_cache[cpv]] |
316 |
> + except KeyError: |
317 |
> + pass |
318 |
> + return self._portdb.aux_get(cpv, attrs) |
319 |
> + |
320 |
> + |
321 |
> +class IndexedVardb(object): |
322 |
> + """ |
323 |
> + A vardbapi interface that sacrifices validation in order to |
324 |
> + improve performance. It takes advantage of |
325 |
> vardbdbapi._aux_cache, |
326 |
> + which is backed by vdb_metadata.pickle. Since _aux_cache is |
327 |
> + not updated for every single merge/unmerge (see |
328 |
> + _aux_cache_threshold), the list of packages is obtained |
329 |
> directly |
330 |
> + from the real vardbapi instance. If a package is missing from |
331 |
> + _aux_cache, then its metadata is obtained using the normal |
332 |
> + (validated) vardbapi.aux_get method. |
333 |
> + """ |
334 |
> + def __init__(self, vardb): |
335 |
> + self._vardb = vardb |
336 |
> + self._aux_cache_keys = vardb._aux_cache_keys |
337 |
> + self._cpv_sort_ascending = vardb._cpv_sort_ascending |
338 |
> + self._cp_map = {} |
339 |
> + self.cpv_exists = vardb.cpv_exists |
340 |
> + |
341 |
> + def cp_all(self): |
342 |
> + if self._cp_map: |
343 |
> + return list(self._cp_map) |
344 |
> + cp_map = self._cp_map |
345 |
> + for cpv in self._vardb.cpv_all(): |
346 |
> + cp = portage.cpv_getkey(cpv) |
347 |
> + if cp is not None: |
348 |
> + cp_list = cp_map.get(cp) |
349 |
> + if cp_list is None: |
350 |
> + cp_list = [] |
351 |
> + cp_map[cp] = cp_list |
352 |
> + cp_list.append(_pkg_str(cpv)) |
353 |
> + return list(cp_map) |
354 |
> + |
355 |
> + def match(self, atom): |
356 |
> + if not isinstance(atom, Atom): |
357 |
> + atom = Atom(atom) |
358 |
> + cp_list = self._cp_map.get(atom.cp) |
359 |
> + if cp_list is None: |
360 |
> + return [] |
361 |
> + self._vardb._cpv_sort_ascending(cp_list) |
362 |
> + return portage.match_from_list(atom, cp_list) |
363 |
> + |
364 |
> + def aux_get(self, cpv, attrs, myrepo = None): |
365 |
> + pkg_data = |
366 |
> self._vardb._aux_cache["packages"].get(cpv) |
367 |
> + if not isinstance(pkg_data, tuple) or \ |
368 |
> + len(pkg_data) != 2 or \ |
369 |
> + not isinstance(pkg_data[1], dict): |
370 |
> + pkg_data = None |
371 |
> + if pkg_data is None: |
372 |
> + # It may be missing from _aux_cache due to |
373 |
> + # _aux_cache_threshold. |
374 |
> + return self._vardb.aux_get(cpv, attrs) |
375 |
> + metadata = pkg_data[1] |
376 |
> + return [metadata.get(k, "") for k in attrs] |
377 |
|
378 |
|
379 |
Otherwise it looks good. |
380 |
-- |
381 |
Brian Dolbec <dolsen> |