Gentoo Archives: gentoo-portage-dev

From: Zac Medico <zmedico@g.o>
To: gentoo-portage-dev@l.g.o
Cc: Zac Medico <zmedico@g.o>
Subject: [gentoo-portage-dev] [PATCH 3/5 v2] Add IndexedPortdb class.
Date: Tue, 04 Nov 2014 05:07:36
Message-Id: 1415077647-13708-1-git-send-email-zmedico@gentoo.org
In Reply to: [gentoo-portage-dev] [PATCH 3/5] Add IndexedPortdb class. by Zac Medico
1 The IndexedPortdb class uses pkg_desc_index to optimize searchs for
2 package names and descriptions. If the package description index is
3 missing from a particular repository, then all metadata for that
4 repository is obtained using the normal pordbapi.aux_get method.
5
6 This class only implements a subset of portdbapi functionality that is
7 useful for searching pkg_desc_index incrementally. For this reason,
8 the cp_all method returns an ordered iterator instead of a list, so
9 that search results can be displayed incrementally.
10
11 X-Gentoo-Bug: 525718
12 X-Gentoo-Bug-URL: https://bugs.gentoo.org/show_bug.cgi?id=525718
13 ---
14 This updated patch has some optimizations in _init_index that should improve
15 performance for cases where some repositories are not indexed.
16
17 pym/portage/dbapi/IndexedPortdb.py | 153 +++++++++++++++++++++++++++++++++++++
18 1 file changed, 153 insertions(+)
19 create mode 100644 pym/portage/dbapi/IndexedPortdb.py
20
21 diff --git a/pym/portage/dbapi/IndexedPortdb.py b/pym/portage/dbapi/IndexedPortdb.py
22 new file mode 100644
23 index 0000000..e95ff4b
24 --- /dev/null
25 +++ b/pym/portage/dbapi/IndexedPortdb.py
26 @@ -0,0 +1,153 @@
27 +# Copyright 2014 Gentoo Foundation
28 +# Distributed under the terms of the GNU General Public License v2
29 +
30 +import errno
31 +import io
32 +import functools
33 +import operator
34 +import os
35 +
36 +import portage
37 +from portage import _encodings
38 +from portage.dep import Atom
39 +from portage.exception import FileNotFound
40 +from portage.cache.index.IndexStreamIterator import IndexStreamIterator
41 +from portage.cache.index.pkg_desc_index import \
42 + pkg_desc_index_line_read, pkg_desc_index_node
43 +from portage.util.iterators.MultiIterGroupBy import MultiIterGroupBy
44 +from portage.versions import _pkg_str
45 +
46 +class IndexedPortdb(object):
47 + """
48 + A portdbapi interface that uses a package description index to
49 + improve performance. If the description index is missing for a
50 + particular repository, then all metadata for that repository is
51 + obtained using the normal pordbapi.aux_get method.
52 +
53 + For performance reasons, the match method only supports package
54 + name and version constraints. For the same reason, the xmatch
55 + method is not implemented.
56 + """
57 +
58 + _copy_attrs = ('cpv_exists', 'findname', 'getFetchMap',
59 + '_aux_cache_keys', '_cpv_sort_ascending',
60 + '_have_root_eclass_dir')
61 +
62 + def __init__(self, portdb):
63 +
64 + self._portdb = portdb
65 +
66 + for k in self._copy_attrs:
67 + setattr(self, k, getattr(portdb, k))
68 +
69 + self._desc_cache = None
70 + self._cp_map = None
71 +
72 + def _init_index(self):
73 +
74 + cp_map = {}
75 + desc_cache = {}
76 + self._desc_cache = desc_cache
77 + self._cp_map = cp_map
78 + index_missing = []
79 +
80 + streams = []
81 + for repo_path in self._portdb.porttrees:
82 + outside_repo = os.path.join(self._portdb.depcachedir,
83 + repo_path.lstrip(os.sep))
84 + filenames = []
85 + for parent_dir in (repo_path, outside_repo):
86 + filenames.append(os.path.join(parent_dir,
87 + "metadata", "pkg_desc_index"))
88 +
89 + repo_name = self._portdb.getRepositoryName(repo_path)
90 +
91 + try:
92 + f = None
93 + for filename in filenames:
94 + try:
95 + f = io.open(filename,
96 + encoding=_encodings["repo.content"])
97 + except IOError as e:
98 + if e.errno not in (errno.ENOENT, errno.ESTALE):
99 + raise
100 + else:
101 + break
102 +
103 + if f is None:
104 + raise FileNotFound(filename)
105 +
106 + streams.append(iter(IndexStreamIterator(f,
107 + functools.partial(pkg_desc_index_line_read,
108 + repo = repo_name))))
109 + except FileNotFound:
110 + index_missing.append(repo_path)
111 +
112 + if index_missing:
113 +
114 + class _NonIndexedStream(object):
115 + def __iter__(self_):
116 + for cp in self._portdb.cp_all(
117 + trees = index_missing):
118 + cp_list = self._portdb.cp_list(
119 + cp, mytree = index_missing)
120 + yield pkg_desc_index_node(cp,
121 + tuple(_pkg_str(cpv) for cpv in cp_list),
122 + None)
123 +
124 + streams.append(iter(_NonIndexedStream()))
125 +
126 + if streams:
127 + if len(streams) == 1:
128 + cp_group_iter = ([node] for node in streams[0])
129 + else:
130 + cp_group_iter = MultiIterGroupBy(streams,
131 + key = operator.attrgetter("cp"))
132 +
133 + for cp_group in cp_group_iter:
134 +
135 + new_cp = None
136 + cp_list = cp_map.get(cp_group[0].cp)
137 + if cp_list is None:
138 + new_cp = cp_group[0].cp
139 + cp_list = []
140 + cp_map[cp_group[0].cp] = cp_list
141 +
142 + for entry in cp_group:
143 + cp_list.extend(entry.cpv_list)
144 + if entry.desc is not None:
145 + for cpv in entry.cpv_list:
146 + desc_cache[cpv] = entry.desc
147 +
148 + if new_cp is not None:
149 + yield cp_group[0].cp
150 +
151 + def cp_all(self):
152 + """
153 + Returns an ordered iterator instead of a list, so that search
154 + results can be displayed incrementally.
155 + """
156 + if self._cp_map is None:
157 + return self._init_index()
158 + return iter(sorted(self._cp_map))
159 +
160 + def match(self, atom):
161 + """
162 + For performance reasons, only package name and version
163 + constraints are supported.
164 + """
165 + if not isinstance(atom, Atom):
166 + atom = Atom(atom)
167 + cp_list = self._cp_map.get(atom.cp)
168 + if cp_list is None:
169 + return []
170 + self._portdb._cpv_sort_ascending(cp_list)
171 + return portage.match_from_list(atom, cp_list)
172 +
173 + def aux_get(self, cpv, attrs, myrepo = None):
174 + if len(attrs) == 1 and attrs[0] == "DESCRIPTION":
175 + try:
176 + return [self._desc_cache[cpv]]
177 + except KeyError:
178 + pass
179 + return self._portdb.aux_get(cpv, attrs)
180 --
181 2.0.4

Replies

Subject Author
[gentoo-portage-dev] [PATCH 3/5 v3] Add IndexedPortdb class. Zac Medico <zmedico@g.o>