Gentoo Archives: gentoo-portage-dev

From: Zac Medico <zmedico@g.o>
To: gentoo-portage-dev@l.g.o
Cc: Zac Medico <zmedico@g.o>
Subject: [gentoo-portage-dev] [PATCH 3/5 v3] Add IndexedPortdb class.
Date: Tue, 04 Nov 2014 20:34:53
Message-Id: 1415133274-24929-1-git-send-email-zmedico@gentoo.org
In Reply to: [gentoo-portage-dev] [PATCH 3/5 v2] Add IndexedPortdb class. by Zac Medico
1 The IndexedPortdb class uses pkg_desc_index to optimize searchs for
2 package names and descriptions. If the package description index is
3 missing from a particular repository, then all metadata for that
4 repository is obtained using the normal pordbapi.aux_get method.
5
6 This class only implements a subset of portdbapi functionality that is
7 useful for searching pkg_desc_index incrementally. For this reason,
8 the cp_all method returns an ordered iterator instead of a list, so
9 that search results can be displayed incrementally.
10
11 X-Gentoo-Bug: 525718
12 X-Gentoo-Bug-URL: https://bugs.gentoo.org/show_bug.cgi?id=525718
13 ---
14 This updated patch optimizes IndexedPortdb to avoid unnecessary cp_list calls
15 for repositories that are not indexed. Now IndexedPortdb performs almost as well
16 as the regular portdbapi for the case where no repositories are indexed.
17
18 pym/portage/dbapi/IndexedPortdb.py | 165 +++++++++++++++++++++++++++++++++++++
19 1 file changed, 165 insertions(+)
20 create mode 100644 pym/portage/dbapi/IndexedPortdb.py
21
22 diff --git a/pym/portage/dbapi/IndexedPortdb.py b/pym/portage/dbapi/IndexedPortdb.py
23 new file mode 100644
24 index 0000000..fc431a2
25 --- /dev/null
26 +++ b/pym/portage/dbapi/IndexedPortdb.py
27 @@ -0,0 +1,165 @@
28 +# Copyright 2014 Gentoo Foundation
29 +# Distributed under the terms of the GNU General Public License v2
30 +
31 +import errno
32 +import io
33 +import functools
34 +import operator
35 +import os
36 +
37 +import portage
38 +from portage import _encodings
39 +from portage.dep import Atom
40 +from portage.exception import FileNotFound
41 +from portage.cache.index.IndexStreamIterator import IndexStreamIterator
42 +from portage.cache.index.pkg_desc_index import \
43 + pkg_desc_index_line_read, pkg_desc_index_node
44 +from portage.util.iterators.MultiIterGroupBy import MultiIterGroupBy
45 +from portage.versions import _pkg_str
46 +
47 +class IndexedPortdb(object):
48 + """
49 + A portdbapi interface that uses a package description index to
50 + improve performance. If the description index is missing for a
51 + particular repository, then all metadata for that repository is
52 + obtained using the normal pordbapi.aux_get method.
53 +
54 + For performance reasons, the match method only supports package
55 + name and version constraints. For the same reason, the xmatch
56 + method is not implemented.
57 + """
58 +
59 + _copy_attrs = ('cpv_exists', 'findname', 'getFetchMap',
60 + '_aux_cache_keys', '_cpv_sort_ascending',
61 + '_have_root_eclass_dir')
62 +
63 + def __init__(self, portdb):
64 +
65 + self._portdb = portdb
66 +
67 + for k in self._copy_attrs:
68 + setattr(self, k, getattr(portdb, k))
69 +
70 + self._desc_cache = None
71 + self._cp_map = None
72 + self._unindexed_cp_map = None
73 +
74 + def _init_index(self):
75 +
76 + cp_map = {}
77 + desc_cache = {}
78 + self._desc_cache = desc_cache
79 + self._cp_map = cp_map
80 + index_missing = []
81 +
82 + streams = []
83 + for repo_path in self._portdb.porttrees:
84 + outside_repo = os.path.join(self._portdb.depcachedir,
85 + repo_path.lstrip(os.sep))
86 + filenames = []
87 + for parent_dir in (repo_path, outside_repo):
88 + filenames.append(os.path.join(parent_dir,
89 + "metadata", "pkg_desc_index"))
90 +
91 + repo_name = self._portdb.getRepositoryName(repo_path)
92 +
93 + try:
94 + f = None
95 + for filename in filenames:
96 + try:
97 + f = io.open(filename,
98 + encoding=_encodings["repo.content"])
99 + except IOError as e:
100 + if e.errno not in (errno.ENOENT, errno.ESTALE):
101 + raise
102 + else:
103 + break
104 +
105 + if f is None:
106 + raise FileNotFound(filename)
107 +
108 + streams.append(iter(IndexStreamIterator(f,
109 + functools.partial(pkg_desc_index_line_read,
110 + repo = repo_name))))
111 + except FileNotFound:
112 + index_missing.append(repo_path)
113 +
114 + if index_missing:
115 + self._unindexed_cp_map = {}
116 +
117 + class _NonIndexedStream(object):
118 + def __iter__(self_):
119 + for cp in self._portdb.cp_all(
120 + trees = index_missing):
121 + # Don't call cp_list yet, since it's a waste
122 + # if the package name does not match the current
123 + # search.
124 + self._unindexed_cp_map[cp] = index_missing
125 + yield pkg_desc_index_node(cp, (), None)
126 +
127 + streams.append(iter(_NonIndexedStream()))
128 +
129 + if streams:
130 + if len(streams) == 1:
131 + cp_group_iter = ([node] for node in streams[0])
132 + else:
133 + cp_group_iter = MultiIterGroupBy(streams,
134 + key = operator.attrgetter("cp"))
135 +
136 + for cp_group in cp_group_iter:
137 +
138 + new_cp = None
139 + cp_list = cp_map.get(cp_group[0].cp)
140 + if cp_list is None:
141 + new_cp = cp_group[0].cp
142 + cp_list = []
143 + cp_map[cp_group[0].cp] = cp_list
144 +
145 + for entry in cp_group:
146 + cp_list.extend(entry.cpv_list)
147 + if entry.desc is not None:
148 + for cpv in entry.cpv_list:
149 + desc_cache[cpv] = entry.desc
150 +
151 + if new_cp is not None:
152 + yield cp_group[0].cp
153 +
154 + def cp_all(self):
155 + """
156 + Returns an ordered iterator instead of a list, so that search
157 + results can be displayed incrementally.
158 + """
159 + if self._cp_map is None:
160 + return self._init_index()
161 + return iter(sorted(self._cp_map))
162 +
163 + def match(self, atom):
164 + """
165 + For performance reasons, only package name and version
166 + constraints are supported.
167 + """
168 + if not isinstance(atom, Atom):
169 + atom = Atom(atom)
170 + cp_list = self._cp_map.get(atom.cp)
171 + if cp_list is None:
172 + return []
173 +
174 + if self._unindexed_cp_map is not None:
175 + try:
176 + unindexed = self._unindexed_cp_map.pop(atom.cp)
177 + except KeyError:
178 + pass
179 + else:
180 + cp_list.extend(self._portdb.cp_list(atom.cp,
181 + mytree = unindexed))
182 +
183 + self._portdb._cpv_sort_ascending(cp_list)
184 + return portage.match_from_list(atom, cp_list)
185 +
186 + def aux_get(self, cpv, attrs, myrepo = None):
187 + if len(attrs) == 1 and attrs[0] == "DESCRIPTION":
188 + try:
189 + return [self._desc_cache[cpv]]
190 + except KeyError:
191 + pass
192 + return self._portdb.aux_get(cpv, attrs)
193 --
194 2.0.4