Gentoo Archives: gentoo-portage-dev

From: Zac Medico <zmedico@g.o>
To: gentoo-portage-dev@l.g.o
Cc: Zac Medico <zmedico@g.o>
Subject: [gentoo-portage-dev] [PATCH 2/5] Add IndexStreamIterator and MultiIterGroupBy.
Date: Sun, 02 Nov 2014 00:18:39
Message-Id: 1414887509-21930-1-git-send-email-zmedico@gentoo.org
In Reply to: [gentoo-portage-dev] [PATCH 2/5] Add IndexStreamIterator and MultiIterGroupBy. by Zac Medico
1 This IndexStreamIterator class can be used together with the
2 pkg_desc_index_line_read function to read an index file incrementally
3 as a stream.
4
5 The MultiIterGroupBy class can be used to iterate over multiple
6 IndexStreamIterator instances at once, incrementally grouping results
7 for a particular package from multiple indices, while limiting the
8 amount of any given index that must be in memory at once.
9
10 Both of these classes are used by the IndexedPortdb class in the next
11 patch of this series.
12
13 X-Gentoo-Bug: 525718
14 X-Gentoo-Bug-URL: https://bugs.gentoo.org/show_bug.cgi?id=525718
15 ---
16 This updated patch includes a logic fix to ensure that all buffered
17 objects are yielded when all of the iterators are exhausted.
18
19 pym/portage/cache/index/IndexStreamIterator.py | 27 +++++++++
20 pym/portage/util/iterators/MultiIterGroupBy.py | 82 ++++++++++++++++++++++++++
21 pym/portage/util/iterators/__init__.py | 2 +
22 3 files changed, 111 insertions(+)
23 create mode 100644 pym/portage/cache/index/IndexStreamIterator.py
24 create mode 100644 pym/portage/util/iterators/MultiIterGroupBy.py
25 create mode 100644 pym/portage/util/iterators/__init__.py
26
27 diff --git a/pym/portage/cache/index/IndexStreamIterator.py b/pym/portage/cache/index/IndexStreamIterator.py
28 new file mode 100644
29 index 0000000..972aee1
30 --- /dev/null
31 +++ b/pym/portage/cache/index/IndexStreamIterator.py
32 @@ -0,0 +1,27 @@
33 +# Copyright 2014 Gentoo Foundation
34 +# Distributed under the terms of the GNU General Public License v2
35 +
36 +class IndexStreamIterator(object):
37 +
38 + def __init__(self, f, parser):
39 +
40 + self.parser = parser
41 + self._file = f
42 +
43 + def close(self):
44 +
45 + if self._file is not None:
46 + self._file.close()
47 + self._file = None
48 +
49 + def __iter__(self):
50 +
51 + try:
52 +
53 + for line in self._file:
54 + node = self.parser(line)
55 + if node is not None:
56 + yield node
57 +
58 + finally:
59 + self.close()
60 diff --git a/pym/portage/util/iterators/MultiIterGroupBy.py b/pym/portage/util/iterators/MultiIterGroupBy.py
61 new file mode 100644
62 index 0000000..ece7a4c
63 --- /dev/null
64 +++ b/pym/portage/util/iterators/MultiIterGroupBy.py
65 @@ -0,0 +1,82 @@
66 +# Copyright 2014 Gentoo Foundation
67 +# Distributed under the terms of the GNU General Public License v2
68 +
69 +class MultiIterGroupBy(object):
70 + """
71 + This class functions similarly to the itertools.groupby function,
72 + except that it takes multiple source iterators as input. The source
73 + iterators must yield objects in sorted order. A group is yielded as
74 + soon as the progress of all iterators reaches a state which
75 + guarantees that there can not be any remaining (unseen) elements of
76 + the group. This is useful for incremental display of grouped search
77 + results.
78 + """
79 +
80 + def __init__(self, iterators, key = None):
81 + self._iterators = iterators
82 + self._key = key
83 +
84 + def __iter__(self):
85 +
86 + progress = []
87 + iterators = self._iterators[:]
88 + for index in iterators:
89 + progress.append(None)
90 +
91 + key_map = {}
92 + eof = []
93 + key_getter = self._key
94 + if key_getter is None:
95 + key_getter = lambda x: x
96 + max_progress = None
97 +
98 + while iterators:
99 + min_progress = None
100 + for i, index in enumerate(iterators):
101 +
102 + if max_progress is not None and \
103 + max_progress == progress[i] and \
104 + min_progress is not None and \
105 + max_progress != min_progress:
106 + # This one has the most progress,
107 + # so allow the others to catch up.
108 + continue
109 +
110 + for entry in index:
111 + progress[i] = key_getter(entry)
112 + key_group = key_map.get(key_getter(entry))
113 + if key_group is None:
114 + key_group = []
115 + key_map[key_getter(entry)] = key_group
116 +
117 + key_group.append(entry)
118 +
119 + if min_progress is None or \
120 + key_getter(entry) < min_progress:
121 + min_progress = key_getter(entry)
122 +
123 + if max_progress is None or \
124 + key_getter(entry) >= max_progress:
125 + max_progress = key_getter(entry)
126 + # This one has the most progress,
127 + # so allow the others to catch up.
128 + break
129 +
130 + else:
131 + eof.append(i)
132 +
133 + if eof:
134 + for i in reversed(eof):
135 + del iterators[i]
136 + del progress[i]
137 + del eof[:]
138 +
139 + yield_these = []
140 + for k in key_map:
141 + if not iterators or k <= min_progress:
142 + yield_these.append(k)
143 +
144 + if yield_these:
145 + yield_these.sort()
146 + for k in yield_these:
147 + yield key_map.pop(k)
148 diff --git a/pym/portage/util/iterators/__init__.py b/pym/portage/util/iterators/__init__.py
149 new file mode 100644
150 index 0000000..7cd880e
151 --- /dev/null
152 +++ b/pym/portage/util/iterators/__init__.py
153 @@ -0,0 +1,2 @@
154 +# Copyright 2014 Gentoo Foundation
155 +# Distributed under the terms of the GNU General Public License v2
156 --
157 2.0.4