Gentoo Archives: gentoo-portage-dev

From: Brian Dolbec <dolsen@g.o>
To: gentoo-portage-dev@l.g.o
Subject: Re: [gentoo-portage-dev] Re: [PATCH] emerge --search: use description index
Date: Thu, 23 Oct 2014 08:56:02
Message-Id: 20141023015515.26ca0c7e.dolsen@gentoo.org
In Reply to: [gentoo-portage-dev] Re: [PATCH] emerge --search: use description index by Zac Medico
1 On Sun, 19 Oct 2014 14:51:05 -0700
2 Zac Medico <zmedico@g.o> wrote:
3
4 > This updated patch changes the index format to use spaces instead of
5 > commas, for readability. This example given in man/portage.5:
6 >
7 > sys-apps/sed 4.2 4.2.1 4.2.1-r1 4.2.2: Super-useful stream editor
8 > sys-apps/usleep 0.1: A wrapper for usleep
9 >
10 > Hopeful057,6 +1086,12 @@ def egencache_main(args):
11 >
12 ...
13 /snip
14
15 All the above looks good to me
16
17
18
19
20
21 > b/pym/_emerge/search.py index 4b0fd9f..37fee20 100644
22 > --- a/pym/_emerge/search.py
23 > +++ b/pym/_emerge/search.py
24 > @@ -3,13 +3,17 @@
25 >
26 > from __future__ import print_function
27 >
28 > +import io
29 > import re
30 > import portage
31 > -from portage import os
32 > +from portage import os, _encodings
33 > from portage.dbapi.porttree import _parse_uri_map
34 > +from portage.dep import Atom
35 > +from portage.exception import InvalidAtom, InvalidData
36 > from portage.localization import localized_size
37 > from portage.output import bold, bold as white, darkgreen, green,
38 > red from portage.util import writemsg_stdout
39 > +from portage.versions import _pkg_str
40 >
41 > from _emerge.Package import Package
42 >
43 > @@ -25,12 +29,11 @@ class search(object):
44 > # public interface
45 > #
46 > def __init__(self, root_config, spinner, searchdesc,
47 > - verbose, usepkg, usepkgonly):
48 > + verbose, usepkg, usepkgonly, search_index = True):
49 > """Searches the available and installed packages for
50 > the supplied search key. The list of available and installed packages
51 > is created at object instantiation. This makes successive searches
52 > faster.""" self.settings = root_config.settings
53 > - self.vartree = root_config.trees["vartree"]
54 > self.spinner = spinner
55 > self.verbose = verbose
56 > self.searchdesc = searchdesc
57 > @@ -45,6 +48,10 @@ class search(object):
58 > bindb = root_config.trees["bintree"].dbapi
59 > vardb = root_config.trees["vartree"].dbapi
60 >
61 > + if search_index:
62 > + portdb = IndexedPortdb(portdb)
63 > + vardb = IndexedVardb(vardb)
64 > +
65 > if not usepkgonly and portdb._have_root_eclass_dir:
66 > self._dbs.append(portdb)
67 >
68 > @@ -53,6 +60,7 @@ class search(object):
69 >
70 > self._dbs.append(vardb)
71 > self._portdb = portdb
72 > + self._vardb = vardb
73 >
74 > def _spinner_update(self):
75 > if self.spinner:
76 > @@ -97,7 +105,7 @@ class search(object):
77 > return {}
78 >
79 > def _visible(self, db, cpv, metadata):
80 > - installed = db is self.vartree.dbapi
81 > + installed = db is self._vardb
82 > built = installed or db is not self._portdb
83 > pkg_type = "ebuild"
84 > if installed:
85 > @@ -208,6 +216,22 @@ class search(object):
86 > masked=1
87 > self.matches["pkg"].append([package,masked])
88 > elif self.searchdesc: # DESCRIPTION searching
89 > + # Check for DESCRIPTION match first,
90 > so that we can skip
91 > + # the expensive visiblity check if
92 > it doesn't match.
93 > + full_package =
94 > self._xmatch("match-all", package)
95 > + if not full_package:
96 > + continue
97 > + full_package = full_package[-1]
98 > + try:
99 > + full_desc = self._aux_get(
100 > + full_package,
101 > ["DESCRIPTION"])[0]
102 > + except KeyError:
103 > + portage.writemsg(
104 > + "emerge: search:
105 > aux_get() failed, skipping\n",
106 > + noiselevel=-1)
107 > + continue
108 > + if not
109 > self.searchre.search(full_desc):
110 > + continue
111 > full_package =
112 > self._xmatch("bestmatch-visible", package) if not full_package:
113 > #no match found; we don't
114 > want to query description @@ -217,14 +241,8 @@ class search(object):
115 > continue
116 > else:
117 > masked=1
118 > - try:
119 > - full_desc = self._aux_get(
120 > - full_package,
121 > ["DESCRIPTION"])[0]
122 > - except KeyError:
123 > - print("emerge: search:
124 > aux_get() failed, skipping")
125 > - continue
126 > - if self.searchre.search(full_desc):
127 > -
128 > self.matches["desc"].append([full_package,masked]) +
129 > +
130 > self.matches["desc"].append((full_package, masked))
131 > self.sdict = self.setconfig.getSets()
132 > for setname in self.sdict:
133 > @@ -262,7 +280,7 @@ class search(object):
134 > bold(self.searchkey) + " ]\n")
135 > msg.append("[ Applications found : " + \
136 > bold(str(self.mlen)) + " ]\n\n")
137 > - vardb = self.vartree.dbapi
138 > + vardb = self._vardb
139 > metadata_keys = set(Package.metadata_keys)
140 > metadata_keys.update(["DESCRIPTION", "HOMEPAGE",
141 > "LICENSE", "SRC_URI"]) metadata_keys = tuple(metadata_keys)
142 > @@ -372,7 +390,11 @@ class search(object):
143 > # private interface
144 > #
145 > def getInstallationStatus(self,package):
146 > - installed_package =
147 > self.vartree.dep_bestmatch(package)
148 > + installed_package = self._vardb.match(package)
149 > + if installed_package:
150 > + installed_package = installed_package[-1]
151 > + else:
152 > + installed_package = ""
153 > result = ""
154 > version =
155 > self.getVersion(installed_package,search.VERSION_RELEASE) if
156 > len(version) > 0: @@ -392,3 +414,160 @@ class search(object):
157 > result = ""
158 > return result
159 >
160
161
162
163
164
165
166
167
168 What I wonder, is why the following two classes aren't in the portage
169 namespace. There is far too much logic embedded in the _emerge
170 namespace. Most probably under the portage/dpapi subpkg. Looking at
171 them, they do look very similar to the portdbapi and vardbapi classes.
172 They are just stripped down and optimised for this data. They also
173 don't seem to use any _emerge specific namespace modules that I saw.
174
175 Perhaps with a file name of index.py or indexers.py
176
177
178 > +
179 > +class IndexedPortdb(object):
180 > + """
181 > + A portdbapi interface that uses a package description index
182 > to
183
184
185 *** See ^^^ even the second word of the class description seems
186 to agree with me :)
187
188
189 > + improve performance. If the description index is missing for
190 > a
191 > + particular repository, then all metadata for that repository
192 > is
193 > + obtained using the normal pordbapi.aux_get method.
194 > + """
195 > + def __init__(self, portdb):
196 > + self._portdb = portdb
197 > + self.cpv_exists = portdb.cpv_exists
198 > + self.getFetchMap = portdb.getFetchMap
199 > + self.findname = portdb.findname
200 > + self._aux_cache_keys = portdb._aux_cache_keys
201 > + self._have_root_eclass_dir =
202 > portdb._have_root_eclass_dir
203 > + self._cpv_sort_ascending = portdb._cpv_sort_ascending
204 > + self._desc_cache = None
205 > + self._cp_map = None
206 > +
207 > + def _init_index(self):
208 > + cp_map = {}
209 > + desc_cache = {}
210 > + for repo_path in self._portdb.porttrees:
211 > + outside_repo =
212 > os.path.join(self._portdb.depcachedir,
213 > + repo_path.lstrip(os.sep))
214 > + for parent_dir in (repo_path, outside_repo):
215 > + file_path = os.path.join(parent_dir,
216 > + "metadata", "pkg_desc_index")
217 > +
218 > + try:
219 > + with io.open(file_path,
220 > +
221 > encoding=_encodings["repo.content"]) as f:
222 > + for line in f:
223 > + try:
224 > +
225 > pkgs, desc = line.split(":", 1)
226 > + except
227 > ValueError:
228 > +
229 > continue
230 > + desc =
231 > desc.strip()
232 > + try:
233 > + cp,
234 > pkgs = pkgs.split(" ", 1)
235 > + except
236 > ValueError:
237 > +
238 > continue
239 > + if not cp:
240 > +
241 > continue
242 > + try:
243 > + atom
244 > = Atom(cp)
245 > + except
246 > InvalidAtom:
247 > +
248 > continue
249 > + if cp !=
250 > atom.cp:
251 > +
252 > continue
253 > + cp_list =
254 > cp_map.get(cp)
255 > + if cp_list
256 > is None:
257 > +
258 > cp_list = []
259 > +
260 > cp_map[cp] = cp_list
261 > + for ver in
262 > pkgs.split():
263 > + try:
264 > +
265 > cpv = _pkg_str(cp + "-" + ver)
266 > +
267 > except InvalidData:
268 > +
269 > pass
270 > + else:
271 > +
272 > cp_list.append(cpv)
273 > +
274 > desc_cache[cpv] = desc
275 > + except IOError:
276 > + pass
277 > + else:
278 > + break
279 > + else:
280 > + # No descriptions index was found,
281 > so populate
282 > + # cp_map the slow way.
283 > + for cp in
284 > self._portdb.cp_all(trees=[repo_path]):
285 > + cp_list = cp_map.get(cp)
286 > + if cp_list is None:
287 > + cp_list = []
288 > + cp_map[cp] = cp_list
289 > + for cpv in
290 > self._portdb.cp_list(cp, mytree=repo_path):
291 > + if cpv not in
292 > cp_list:
293 > +
294 > cp_list.append(_pkg_str(cpv)) +
295 > + self._desc_cache = desc_cache
296 > + self._cp_map = cp_map
297 > +
298 > + def cp_all(self):
299 > + if self._cp_map is None:
300 > + self._init_index()
301 > + return list(self._cp_map)
302 > +
303 > + def match(self, atom):
304 > + if not isinstance(atom, Atom):
305 > + atom = Atom(atom)
306 > + cp_list = self._cp_map.get(atom.cp)
307 > + if cp_list is None:
308 > + return []
309 > + self._portdb._cpv_sort_ascending(cp_list)
310 > + return portage.match_from_list(atom, cp_list)
311 > +
312 > + def aux_get(self, cpv, attrs, myrepo = None):
313 > + if len(attrs) == 1 and attrs[0] == "DESCRIPTION":
314 > + try:
315 > + return [self._desc_cache[cpv]]
316 > + except KeyError:
317 > + pass
318 > + return self._portdb.aux_get(cpv, attrs)
319 > +
320 > +
321 > +class IndexedVardb(object):
322 > + """
323 > + A vardbapi interface that sacrifices validation in order to
324 > + improve performance. It takes advantage of
325 > vardbdbapi._aux_cache,
326 > + which is backed by vdb_metadata.pickle. Since _aux_cache is
327 > + not updated for every single merge/unmerge (see
328 > + _aux_cache_threshold), the list of packages is obtained
329 > directly
330 > + from the real vardbapi instance. If a package is missing from
331 > + _aux_cache, then its metadata is obtained using the normal
332 > + (validated) vardbapi.aux_get method.
333 > + """
334 > + def __init__(self, vardb):
335 > + self._vardb = vardb
336 > + self._aux_cache_keys = vardb._aux_cache_keys
337 > + self._cpv_sort_ascending = vardb._cpv_sort_ascending
338 > + self._cp_map = {}
339 > + self.cpv_exists = vardb.cpv_exists
340 > +
341 > + def cp_all(self):
342 > + if self._cp_map:
343 > + return list(self._cp_map)
344 > + cp_map = self._cp_map
345 > + for cpv in self._vardb.cpv_all():
346 > + cp = portage.cpv_getkey(cpv)
347 > + if cp is not None:
348 > + cp_list = cp_map.get(cp)
349 > + if cp_list is None:
350 > + cp_list = []
351 > + cp_map[cp] = cp_list
352 > + cp_list.append(_pkg_str(cpv))
353 > + return list(cp_map)
354 > +
355 > + def match(self, atom):
356 > + if not isinstance(atom, Atom):
357 > + atom = Atom(atom)
358 > + cp_list = self._cp_map.get(atom.cp)
359 > + if cp_list is None:
360 > + return []
361 > + self._vardb._cpv_sort_ascending(cp_list)
362 > + return portage.match_from_list(atom, cp_list)
363 > +
364 > + def aux_get(self, cpv, attrs, myrepo = None):
365 > + pkg_data =
366 > self._vardb._aux_cache["packages"].get(cpv)
367 > + if not isinstance(pkg_data, tuple) or \
368 > + len(pkg_data) != 2 or \
369 > + not isinstance(pkg_data[1], dict):
370 > + pkg_data = None
371 > + if pkg_data is None:
372 > + # It may be missing from _aux_cache due to
373 > + # _aux_cache_threshold.
374 > + return self._vardb.aux_get(cpv, attrs)
375 > + metadata = pkg_data[1]
376 > + return [metadata.get(k, "") for k in attrs]
377
378
379 Otherwise it looks good.
380 --
381 Brian Dolbec <dolsen>

Replies