1 |
The IndexedPortdb class uses pkg_desc_index to optimize searchs for |
2 |
package names and descriptions. If the package description index is |
3 |
missing from a particular repository, then all metadata for that |
4 |
repository is obtained using the normal pordbapi.aux_get method. |
5 |
|
6 |
This class only implements a subset of portdbapi functionality that is |
7 |
useful for searching pkg_desc_index incrementally. For this reason, |
8 |
the cp_all method returns an ordered iterator instead of a list, so |
9 |
that search results can be displayed incrementally. |
10 |
|
11 |
X-Gentoo-Bug: 525718 |
12 |
X-Gentoo-Bug-URL: https://bugs.gentoo.org/show_bug.cgi?id=525718 |
13 |
--- |
14 |
This updated patch optimizes IndexedPortdb to avoid unnecessary cp_list calls |
15 |
for repositories that are not indexed. Now IndexedPortdb performs almost as well |
16 |
as the regular portdbapi for the case where no repositories are indexed. |
17 |
|
18 |
pym/portage/dbapi/IndexedPortdb.py | 165 +++++++++++++++++++++++++++++++++++++ |
19 |
1 file changed, 165 insertions(+) |
20 |
create mode 100644 pym/portage/dbapi/IndexedPortdb.py |
21 |
|
22 |
diff --git a/pym/portage/dbapi/IndexedPortdb.py b/pym/portage/dbapi/IndexedPortdb.py |
23 |
new file mode 100644 |
24 |
index 0000000..fc431a2 |
25 |
--- /dev/null |
26 |
+++ b/pym/portage/dbapi/IndexedPortdb.py |
27 |
@@ -0,0 +1,165 @@ |
28 |
+# Copyright 2014 Gentoo Foundation |
29 |
+# Distributed under the terms of the GNU General Public License v2 |
30 |
+ |
31 |
+import errno |
32 |
+import io |
33 |
+import functools |
34 |
+import operator |
35 |
+import os |
36 |
+ |
37 |
+import portage |
38 |
+from portage import _encodings |
39 |
+from portage.dep import Atom |
40 |
+from portage.exception import FileNotFound |
41 |
+from portage.cache.index.IndexStreamIterator import IndexStreamIterator |
42 |
+from portage.cache.index.pkg_desc_index import \ |
43 |
+ pkg_desc_index_line_read, pkg_desc_index_node |
44 |
+from portage.util.iterators.MultiIterGroupBy import MultiIterGroupBy |
45 |
+from portage.versions import _pkg_str |
46 |
+ |
47 |
+class IndexedPortdb(object): |
48 |
+ """ |
49 |
+ A portdbapi interface that uses a package description index to |
50 |
+ improve performance. If the description index is missing for a |
51 |
+ particular repository, then all metadata for that repository is |
52 |
+ obtained using the normal pordbapi.aux_get method. |
53 |
+ |
54 |
+ For performance reasons, the match method only supports package |
55 |
+ name and version constraints. For the same reason, the xmatch |
56 |
+ method is not implemented. |
57 |
+ """ |
58 |
+ |
59 |
+ _copy_attrs = ('cpv_exists', 'findname', 'getFetchMap', |
60 |
+ '_aux_cache_keys', '_cpv_sort_ascending', |
61 |
+ '_have_root_eclass_dir') |
62 |
+ |
63 |
+ def __init__(self, portdb): |
64 |
+ |
65 |
+ self._portdb = portdb |
66 |
+ |
67 |
+ for k in self._copy_attrs: |
68 |
+ setattr(self, k, getattr(portdb, k)) |
69 |
+ |
70 |
+ self._desc_cache = None |
71 |
+ self._cp_map = None |
72 |
+ self._unindexed_cp_map = None |
73 |
+ |
74 |
+ def _init_index(self): |
75 |
+ |
76 |
+ cp_map = {} |
77 |
+ desc_cache = {} |
78 |
+ self._desc_cache = desc_cache |
79 |
+ self._cp_map = cp_map |
80 |
+ index_missing = [] |
81 |
+ |
82 |
+ streams = [] |
83 |
+ for repo_path in self._portdb.porttrees: |
84 |
+ outside_repo = os.path.join(self._portdb.depcachedir, |
85 |
+ repo_path.lstrip(os.sep)) |
86 |
+ filenames = [] |
87 |
+ for parent_dir in (repo_path, outside_repo): |
88 |
+ filenames.append(os.path.join(parent_dir, |
89 |
+ "metadata", "pkg_desc_index")) |
90 |
+ |
91 |
+ repo_name = self._portdb.getRepositoryName(repo_path) |
92 |
+ |
93 |
+ try: |
94 |
+ f = None |
95 |
+ for filename in filenames: |
96 |
+ try: |
97 |
+ f = io.open(filename, |
98 |
+ encoding=_encodings["repo.content"]) |
99 |
+ except IOError as e: |
100 |
+ if e.errno not in (errno.ENOENT, errno.ESTALE): |
101 |
+ raise |
102 |
+ else: |
103 |
+ break |
104 |
+ |
105 |
+ if f is None: |
106 |
+ raise FileNotFound(filename) |
107 |
+ |
108 |
+ streams.append(iter(IndexStreamIterator(f, |
109 |
+ functools.partial(pkg_desc_index_line_read, |
110 |
+ repo = repo_name)))) |
111 |
+ except FileNotFound: |
112 |
+ index_missing.append(repo_path) |
113 |
+ |
114 |
+ if index_missing: |
115 |
+ self._unindexed_cp_map = {} |
116 |
+ |
117 |
+ class _NonIndexedStream(object): |
118 |
+ def __iter__(self_): |
119 |
+ for cp in self._portdb.cp_all( |
120 |
+ trees = index_missing): |
121 |
+ # Don't call cp_list yet, since it's a waste |
122 |
+ # if the package name does not match the current |
123 |
+ # search. |
124 |
+ self._unindexed_cp_map[cp] = index_missing |
125 |
+ yield pkg_desc_index_node(cp, (), None) |
126 |
+ |
127 |
+ streams.append(iter(_NonIndexedStream())) |
128 |
+ |
129 |
+ if streams: |
130 |
+ if len(streams) == 1: |
131 |
+ cp_group_iter = ([node] for node in streams[0]) |
132 |
+ else: |
133 |
+ cp_group_iter = MultiIterGroupBy(streams, |
134 |
+ key = operator.attrgetter("cp")) |
135 |
+ |
136 |
+ for cp_group in cp_group_iter: |
137 |
+ |
138 |
+ new_cp = None |
139 |
+ cp_list = cp_map.get(cp_group[0].cp) |
140 |
+ if cp_list is None: |
141 |
+ new_cp = cp_group[0].cp |
142 |
+ cp_list = [] |
143 |
+ cp_map[cp_group[0].cp] = cp_list |
144 |
+ |
145 |
+ for entry in cp_group: |
146 |
+ cp_list.extend(entry.cpv_list) |
147 |
+ if entry.desc is not None: |
148 |
+ for cpv in entry.cpv_list: |
149 |
+ desc_cache[cpv] = entry.desc |
150 |
+ |
151 |
+ if new_cp is not None: |
152 |
+ yield cp_group[0].cp |
153 |
+ |
154 |
+ def cp_all(self): |
155 |
+ """ |
156 |
+ Returns an ordered iterator instead of a list, so that search |
157 |
+ results can be displayed incrementally. |
158 |
+ """ |
159 |
+ if self._cp_map is None: |
160 |
+ return self._init_index() |
161 |
+ return iter(sorted(self._cp_map)) |
162 |
+ |
163 |
+ def match(self, atom): |
164 |
+ """ |
165 |
+ For performance reasons, only package name and version |
166 |
+ constraints are supported. |
167 |
+ """ |
168 |
+ if not isinstance(atom, Atom): |
169 |
+ atom = Atom(atom) |
170 |
+ cp_list = self._cp_map.get(atom.cp) |
171 |
+ if cp_list is None: |
172 |
+ return [] |
173 |
+ |
174 |
+ if self._unindexed_cp_map is not None: |
175 |
+ try: |
176 |
+ unindexed = self._unindexed_cp_map.pop(atom.cp) |
177 |
+ except KeyError: |
178 |
+ pass |
179 |
+ else: |
180 |
+ cp_list.extend(self._portdb.cp_list(atom.cp, |
181 |
+ mytree = unindexed)) |
182 |
+ |
183 |
+ self._portdb._cpv_sort_ascending(cp_list) |
184 |
+ return portage.match_from_list(atom, cp_list) |
185 |
+ |
186 |
+ def aux_get(self, cpv, attrs, myrepo = None): |
187 |
+ if len(attrs) == 1 and attrs[0] == "DESCRIPTION": |
188 |
+ try: |
189 |
+ return [self._desc_cache[cpv]] |
190 |
+ except KeyError: |
191 |
+ pass |
192 |
+ return self._portdb.aux_get(cpv, attrs) |
193 |
-- |
194 |
2.0.4 |