1 |
This IndexStreamIterator class can be used together with the |
2 |
pkg_desc_index_line_read function to read and index file incrementally |
3 |
as a stream. |
4 |
|
5 |
The MultiIterGroupBy class can be used to iterate over multiple |
6 |
IndexStreamIterator instances at once, incrementally grouping results |
7 |
for a particular package from multiple indices, while limiting the |
8 |
amount of any given index that must be in memory at once. |
9 |
|
10 |
Both of these classes are used by the IndexedPortdb class in the next |
11 |
patch of this series. |
12 |
|
13 |
X-Gentoo-Bug: 525718 |
14 |
X-Gentoo-Bug-URL: https://bugs.gentoo.org/show_bug.cgi?id=525718 |
15 |
--- |
16 |
pym/portage/cache/index/IndexStreamIterator.py | 27 +++++++++ |
17 |
pym/portage/util/iterators/MultiIterGroupBy.py | 82 ++++++++++++++++++++++++++ |
18 |
pym/portage/util/iterators/__init__.py | 2 + |
19 |
3 files changed, 111 insertions(+) |
20 |
create mode 100644 pym/portage/cache/index/IndexStreamIterator.py |
21 |
create mode 100644 pym/portage/util/iterators/MultiIterGroupBy.py |
22 |
create mode 100644 pym/portage/util/iterators/__init__.py |
23 |
|
24 |
diff --git a/pym/portage/cache/index/IndexStreamIterator.py b/pym/portage/cache/index/IndexStreamIterator.py |
25 |
new file mode 100644 |
26 |
index 0000000..972aee1 |
27 |
--- /dev/null |
28 |
+++ b/pym/portage/cache/index/IndexStreamIterator.py |
29 |
@@ -0,0 +1,27 @@ |
30 |
+# Copyright 2014 Gentoo Foundation |
31 |
+# Distributed under the terms of the GNU General Public License v2 |
32 |
+ |
33 |
+class IndexStreamIterator(object): |
34 |
+ |
35 |
+ def __init__(self, f, parser): |
36 |
+ |
37 |
+ self.parser = parser |
38 |
+ self._file = f |
39 |
+ |
40 |
+ def close(self): |
41 |
+ |
42 |
+ if self._file is not None: |
43 |
+ self._file.close() |
44 |
+ self._file = None |
45 |
+ |
46 |
+ def __iter__(self): |
47 |
+ |
48 |
+ try: |
49 |
+ |
50 |
+ for line in self._file: |
51 |
+ node = self.parser(line) |
52 |
+ if node is not None: |
53 |
+ yield node |
54 |
+ |
55 |
+ finally: |
56 |
+ self.close() |
57 |
diff --git a/pym/portage/util/iterators/MultiIterGroupBy.py b/pym/portage/util/iterators/MultiIterGroupBy.py |
58 |
new file mode 100644 |
59 |
index 0000000..d4e62ad |
60 |
--- /dev/null |
61 |
+++ b/pym/portage/util/iterators/MultiIterGroupBy.py |
62 |
@@ -0,0 +1,82 @@ |
63 |
+# Copyright 2014 Gentoo Foundation |
64 |
+# Distributed under the terms of the GNU General Public License v2 |
65 |
+ |
66 |
+class MultiIterGroupBy(object): |
67 |
+ """ |
68 |
+ This class functions similarly to the itertools.groupby function, |
69 |
+ except that it takes multiple source iterators as input. The source |
70 |
+ iterators must yield objects in sorted order. A group is yielded as |
71 |
+ soon as the progress of all iterators reaches a state which |
72 |
+ guarantees that there can not be any remaining (unseen) elements of |
73 |
+ the group. This is useful for incremental display of grouped search |
74 |
+ results. |
75 |
+ """ |
76 |
+ |
77 |
+ def __init__(self, iterators, key = None): |
78 |
+ self._iterators = iterators |
79 |
+ self._key = key |
80 |
+ |
81 |
+ def __iter__(self): |
82 |
+ |
83 |
+ progress = [] |
84 |
+ iterators = self._iterators[:] |
85 |
+ for index in iterators: |
86 |
+ progress.append(None) |
87 |
+ |
88 |
+ key_map = {} |
89 |
+ eof = [] |
90 |
+ key_getter = self._key |
91 |
+ if key_getter is None: |
92 |
+ key_getter = lambda x: x |
93 |
+ max_progress = None |
94 |
+ |
95 |
+ while iterators: |
96 |
+ min_progress = None |
97 |
+ for i, index in enumerate(iterators): |
98 |
+ |
99 |
+ if max_progress is not None and \ |
100 |
+ max_progress == progress[i] and \ |
101 |
+ min_progress is not None and \ |
102 |
+ max_progress != min_progress: |
103 |
+ # This one has the most progress, |
104 |
+ # so allow the others to catch up. |
105 |
+ continue |
106 |
+ |
107 |
+ for entry in index: |
108 |
+ progress[i] = key_getter(entry) |
109 |
+ key_group = key_map.get(key_getter(entry)) |
110 |
+ if key_group is None: |
111 |
+ key_group = [] |
112 |
+ key_map[key_getter(entry)] = key_group |
113 |
+ |
114 |
+ key_group.append(entry) |
115 |
+ |
116 |
+ if min_progress is None or \ |
117 |
+ key_getter(entry) < min_progress: |
118 |
+ min_progress = key_getter(entry) |
119 |
+ |
120 |
+ if max_progress is None or \ |
121 |
+ key_getter(entry) >= max_progress: |
122 |
+ max_progress = key_getter(entry) |
123 |
+ # This one has the most progress, |
124 |
+ # so allow the others to catch up. |
125 |
+ break |
126 |
+ |
127 |
+ else: |
128 |
+ eof.append(i) |
129 |
+ |
130 |
+ if eof: |
131 |
+ for i in reversed(eof): |
132 |
+ del iterators[i] |
133 |
+ del progress[i] |
134 |
+ del eof[:] |
135 |
+ |
136 |
+ yield_these = [] |
137 |
+ for k in key_map: |
138 |
+ if k <= min_progress: |
139 |
+ yield_these.append(k) |
140 |
+ |
141 |
+ if yield_these: |
142 |
+ yield_these.sort() |
143 |
+ for k in yield_these: |
144 |
+ yield key_map.pop(k) |
145 |
diff --git a/pym/portage/util/iterators/__init__.py b/pym/portage/util/iterators/__init__.py |
146 |
new file mode 100644 |
147 |
index 0000000..7cd880e |
148 |
--- /dev/null |
149 |
+++ b/pym/portage/util/iterators/__init__.py |
150 |
@@ -0,0 +1,2 @@ |
151 |
+# Copyright 2014 Gentoo Foundation |
152 |
+# Distributed under the terms of the GNU General Public License v2 |
153 |
-- |
154 |
2.0.4 |