Gentoo Archives: gentoo-portage-dev

From: Zac Medico <zmedico@g.o>
To: gentoo-portage-dev@l.g.o
Cc: Zac Medico <zmedico@g.o>
Subject: [gentoo-portage-dev] [PATCH 2/5] Add IndexStreamIterator and MultiIterGroupBy.
Date: Sat, 01 Nov 2014 22:46:42
Message-Id: 1414881983-19877-3-git-send-email-zmedico@gentoo.org
In Reply to: [gentoo-portage-dev] by Zac Medico
1 This IndexStreamIterator class can be used together with the
2 pkg_desc_index_line_read function to read and index file incrementally
3 as a stream.
4
5 The MultiIterGroupBy class can be used to iterate over multiple
6 IndexStreamIterator instances at once, incrementally grouping results
7 for a particular package from multiple indices, while limiting the
8 amount of any given index that must be in memory at once.
9
10 Both of these classes are used by the IndexedPortdb class in the next
11 patch of this series.
12
13 X-Gentoo-Bug: 525718
14 X-Gentoo-Bug-URL: https://bugs.gentoo.org/show_bug.cgi?id=525718
15 ---
16 pym/portage/cache/index/IndexStreamIterator.py | 27 +++++++++
17 pym/portage/util/iterators/MultiIterGroupBy.py | 82 ++++++++++++++++++++++++++
18 pym/portage/util/iterators/__init__.py | 2 +
19 3 files changed, 111 insertions(+)
20 create mode 100644 pym/portage/cache/index/IndexStreamIterator.py
21 create mode 100644 pym/portage/util/iterators/MultiIterGroupBy.py
22 create mode 100644 pym/portage/util/iterators/__init__.py
23
24 diff --git a/pym/portage/cache/index/IndexStreamIterator.py b/pym/portage/cache/index/IndexStreamIterator.py
25 new file mode 100644
26 index 0000000..972aee1
27 --- /dev/null
28 +++ b/pym/portage/cache/index/IndexStreamIterator.py
29 @@ -0,0 +1,27 @@
30 +# Copyright 2014 Gentoo Foundation
31 +# Distributed under the terms of the GNU General Public License v2
32 +
33 +class IndexStreamIterator(object):
34 +
35 + def __init__(self, f, parser):
36 +
37 + self.parser = parser
38 + self._file = f
39 +
40 + def close(self):
41 +
42 + if self._file is not None:
43 + self._file.close()
44 + self._file = None
45 +
46 + def __iter__(self):
47 +
48 + try:
49 +
50 + for line in self._file:
51 + node = self.parser(line)
52 + if node is not None:
53 + yield node
54 +
55 + finally:
56 + self.close()
57 diff --git a/pym/portage/util/iterators/MultiIterGroupBy.py b/pym/portage/util/iterators/MultiIterGroupBy.py
58 new file mode 100644
59 index 0000000..d4e62ad
60 --- /dev/null
61 +++ b/pym/portage/util/iterators/MultiIterGroupBy.py
62 @@ -0,0 +1,82 @@
63 +# Copyright 2014 Gentoo Foundation
64 +# Distributed under the terms of the GNU General Public License v2
65 +
66 +class MultiIterGroupBy(object):
67 + """
68 + This class functions similarly to the itertools.groupby function,
69 + except that it takes multiple source iterators as input. The source
70 + iterators must yield objects in sorted order. A group is yielded as
71 + soon as the progress of all iterators reaches a state which
72 + guarantees that there can not be any remaining (unseen) elements of
73 + the group. This is useful for incremental display of grouped search
74 + results.
75 + """
76 +
77 + def __init__(self, iterators, key = None):
78 + self._iterators = iterators
79 + self._key = key
80 +
81 + def __iter__(self):
82 +
83 + progress = []
84 + iterators = self._iterators[:]
85 + for index in iterators:
86 + progress.append(None)
87 +
88 + key_map = {}
89 + eof = []
90 + key_getter = self._key
91 + if key_getter is None:
92 + key_getter = lambda x: x
93 + max_progress = None
94 +
95 + while iterators:
96 + min_progress = None
97 + for i, index in enumerate(iterators):
98 +
99 + if max_progress is not None and \
100 + max_progress == progress[i] and \
101 + min_progress is not None and \
102 + max_progress != min_progress:
103 + # This one has the most progress,
104 + # so allow the others to catch up.
105 + continue
106 +
107 + for entry in index:
108 + progress[i] = key_getter(entry)
109 + key_group = key_map.get(key_getter(entry))
110 + if key_group is None:
111 + key_group = []
112 + key_map[key_getter(entry)] = key_group
113 +
114 + key_group.append(entry)
115 +
116 + if min_progress is None or \
117 + key_getter(entry) < min_progress:
118 + min_progress = key_getter(entry)
119 +
120 + if max_progress is None or \
121 + key_getter(entry) >= max_progress:
122 + max_progress = key_getter(entry)
123 + # This one has the most progress,
124 + # so allow the others to catch up.
125 + break
126 +
127 + else:
128 + eof.append(i)
129 +
130 + if eof:
131 + for i in reversed(eof):
132 + del iterators[i]
133 + del progress[i]
134 + del eof[:]
135 +
136 + yield_these = []
137 + for k in key_map:
138 + if k <= min_progress:
139 + yield_these.append(k)
140 +
141 + if yield_these:
142 + yield_these.sort()
143 + for k in yield_these:
144 + yield key_map.pop(k)
145 diff --git a/pym/portage/util/iterators/__init__.py b/pym/portage/util/iterators/__init__.py
146 new file mode 100644
147 index 0000000..7cd880e
148 --- /dev/null
149 +++ b/pym/portage/util/iterators/__init__.py
150 @@ -0,0 +1,2 @@
151 +# Copyright 2014 Gentoo Foundation
152 +# Distributed under the terms of the GNU General Public License v2
153 --
154 2.0.4

Replies