1 |
This IndexStreamIterator class can be used together with the |
2 |
pkg_desc_index_line_read function to read an index file incrementally |
3 |
as a stream. |
4 |
|
5 |
The MultiIterGroupBy class can be used to iterate over multiple |
6 |
IndexStreamIterator instances at once, incrementally grouping results |
7 |
for a particular package from multiple indices, while limiting the |
8 |
amount of any given index that must be in memory at once. |
9 |
|
10 |
Both of these classes are used by the IndexedPortdb class in the next |
11 |
patch of this series. |
12 |
|
13 |
X-Gentoo-Bug: 525718 |
14 |
X-Gentoo-Bug-URL: https://bugs.gentoo.org/show_bug.cgi?id=525718 |
15 |
--- |
16 |
This updated patch includes a logic fix to ensure that all buffered |
17 |
objects are yielded when all of the iterators are exhausted. |
18 |
|
19 |
pym/portage/cache/index/IndexStreamIterator.py | 27 +++++++++ |
20 |
pym/portage/util/iterators/MultiIterGroupBy.py | 82 ++++++++++++++++++++++++++ |
21 |
pym/portage/util/iterators/__init__.py | 2 + |
22 |
3 files changed, 111 insertions(+) |
23 |
create mode 100644 pym/portage/cache/index/IndexStreamIterator.py |
24 |
create mode 100644 pym/portage/util/iterators/MultiIterGroupBy.py |
25 |
create mode 100644 pym/portage/util/iterators/__init__.py |
26 |
|
27 |
diff --git a/pym/portage/cache/index/IndexStreamIterator.py b/pym/portage/cache/index/IndexStreamIterator.py |
28 |
new file mode 100644 |
29 |
index 0000000..972aee1 |
30 |
--- /dev/null |
31 |
+++ b/pym/portage/cache/index/IndexStreamIterator.py |
32 |
@@ -0,0 +1,27 @@ |
33 |
+# Copyright 2014 Gentoo Foundation |
34 |
+# Distributed under the terms of the GNU General Public License v2 |
35 |
+ |
36 |
+class IndexStreamIterator(object): |
37 |
+ |
38 |
+ def __init__(self, f, parser): |
39 |
+ |
40 |
+ self.parser = parser |
41 |
+ self._file = f |
42 |
+ |
43 |
+ def close(self): |
44 |
+ |
45 |
+ if self._file is not None: |
46 |
+ self._file.close() |
47 |
+ self._file = None |
48 |
+ |
49 |
+ def __iter__(self): |
50 |
+ |
51 |
+ try: |
52 |
+ |
53 |
+ for line in self._file: |
54 |
+ node = self.parser(line) |
55 |
+ if node is not None: |
56 |
+ yield node |
57 |
+ |
58 |
+ finally: |
59 |
+ self.close() |
60 |
diff --git a/pym/portage/util/iterators/MultiIterGroupBy.py b/pym/portage/util/iterators/MultiIterGroupBy.py |
61 |
new file mode 100644 |
62 |
index 0000000..ece7a4c |
63 |
--- /dev/null |
64 |
+++ b/pym/portage/util/iterators/MultiIterGroupBy.py |
65 |
@@ -0,0 +1,82 @@ |
66 |
+# Copyright 2014 Gentoo Foundation |
67 |
+# Distributed under the terms of the GNU General Public License v2 |
68 |
+ |
69 |
+class MultiIterGroupBy(object): |
70 |
+ """ |
71 |
+ This class functions similarly to the itertools.groupby function, |
72 |
+ except that it takes multiple source iterators as input. The source |
73 |
+ iterators must yield objects in sorted order. A group is yielded as |
74 |
+ soon as the progress of all iterators reaches a state which |
75 |
+ guarantees that there can not be any remaining (unseen) elements of |
76 |
+ the group. This is useful for incremental display of grouped search |
77 |
+ results. |
78 |
+ """ |
79 |
+ |
80 |
+ def __init__(self, iterators, key = None): |
81 |
+ self._iterators = iterators |
82 |
+ self._key = key |
83 |
+ |
84 |
+ def __iter__(self): |
85 |
+ |
86 |
+ progress = [] |
87 |
+ iterators = self._iterators[:] |
88 |
+ for index in iterators: |
89 |
+ progress.append(None) |
90 |
+ |
91 |
+ key_map = {} |
92 |
+ eof = [] |
93 |
+ key_getter = self._key |
94 |
+ if key_getter is None: |
95 |
+ key_getter = lambda x: x |
96 |
+ max_progress = None |
97 |
+ |
98 |
+ while iterators: |
99 |
+ min_progress = None |
100 |
+ for i, index in enumerate(iterators): |
101 |
+ |
102 |
+ if max_progress is not None and \ |
103 |
+ max_progress == progress[i] and \ |
104 |
+ min_progress is not None and \ |
105 |
+ max_progress != min_progress: |
106 |
+ # This one has the most progress, |
107 |
+ # so allow the others to catch up. |
108 |
+ continue |
109 |
+ |
110 |
+ for entry in index: |
111 |
+ progress[i] = key_getter(entry) |
112 |
+ key_group = key_map.get(key_getter(entry)) |
113 |
+ if key_group is None: |
114 |
+ key_group = [] |
115 |
+ key_map[key_getter(entry)] = key_group |
116 |
+ |
117 |
+ key_group.append(entry) |
118 |
+ |
119 |
+ if min_progress is None or \ |
120 |
+ key_getter(entry) < min_progress: |
121 |
+ min_progress = key_getter(entry) |
122 |
+ |
123 |
+ if max_progress is None or \ |
124 |
+ key_getter(entry) >= max_progress: |
125 |
+ max_progress = key_getter(entry) |
126 |
+ # This one has the most progress, |
127 |
+ # so allow the others to catch up. |
128 |
+ break |
129 |
+ |
130 |
+ else: |
131 |
+ eof.append(i) |
132 |
+ |
133 |
+ if eof: |
134 |
+ for i in reversed(eof): |
135 |
+ del iterators[i] |
136 |
+ del progress[i] |
137 |
+ del eof[:] |
138 |
+ |
139 |
+ yield_these = [] |
140 |
+ for k in key_map: |
141 |
+ if not iterators or k <= min_progress: |
142 |
+ yield_these.append(k) |
143 |
+ |
144 |
+ if yield_these: |
145 |
+ yield_these.sort() |
146 |
+ for k in yield_these: |
147 |
+ yield key_map.pop(k) |
148 |
diff --git a/pym/portage/util/iterators/__init__.py b/pym/portage/util/iterators/__init__.py |
149 |
new file mode 100644 |
150 |
index 0000000..7cd880e |
151 |
--- /dev/null |
152 |
+++ b/pym/portage/util/iterators/__init__.py |
153 |
@@ -0,0 +1,2 @@ |
154 |
+# Copyright 2014 Gentoo Foundation |
155 |
+# Distributed under the terms of the GNU General Public License v2 |
156 |
-- |
157 |
2.0.4 |