Gentoo Archives: gentoo-commits

From: Zac Medico <zmedico@g.o>
To: gentoo-commits@l.g.o
Subject: [gentoo-commits] proj/portage:master commit in: lib/portage/tests/util/, lib/portage/util/
Date: Sun, 20 Jan 2019 23:31:15
Message-Id: 1548025048.eb2674c1c2d84b2c9e9923e1c1666cb7a596c36c.zmedico@gentoo
1 commit: eb2674c1c2d84b2c9e9923e1c1666cb7a596c36c
2 Author: Zac Medico <zmedico <AT> gentoo <DOT> org>
3 AuthorDate: Sat Jan 19 08:39:47 2019 +0000
4 Commit: Zac Medico <zmedico <AT> gentoo <DOT> org>
5 CommitDate: Sun Jan 20 22:57:28 2019 +0000
6 URL: https://gitweb.gentoo.org/proj/portage.git/commit/?id=eb2674c1
7
8 INSTALL_MASK: index patterns anchored with leading slash (bug 675826)
9
10 For scalability, use a tree structure to index patterns that are
11 anchored with a leading slash.
12
13 Patterns anchored with leading slash are indexed by leading non-glob
14 components, making it possible to minimize the number of fnmatch
15 calls. For example:
16
17 /foo*/bar -> {'.': ['/foo*/bar']}
18
19 /foo/bar* -> {'foo': {'.': ['/foo/bar*']}}
20
21 /foo/bar/ -> {'foo': {'bar': {'.': ['/foo/bar/']}}}
22
23 Bug: https://bugs.gentoo.org/675826
24 Signed-off-by: Zac Medico <zmedico <AT> gentoo.org>
25
26 lib/portage/tests/util/test_install_mask.py | 36 +++++++++++++
27 lib/portage/util/install_mask.py | 84 ++++++++++++++++++++++++++---
28 2 files changed, 113 insertions(+), 7 deletions(-)
29
30 diff --git a/lib/portage/tests/util/test_install_mask.py b/lib/portage/tests/util/test_install_mask.py
31 index f651eb4b7..6a29db79a 100644
32 --- a/lib/portage/tests/util/test_install_mask.py
33 +++ b/lib/portage/tests/util/test_install_mask.py
34 @@ -119,6 +119,42 @@ class InstallMaskTestCase(TestCase):
35 ),
36 )
37 ),
38 + (
39 + '/usr/share/locale '
40 + '-/usr/share/locale/en* '
41 + '-/usr/share/locale/kf5_all_languages '
42 + '-/usr/share/locale/locale.alias',
43 + (
44 + (
45 + 'usr/share/locale/en',
46 + False,
47 + ),
48 + (
49 + 'usr/share/locale/en_GB',
50 + False,
51 + ),
52 + (
53 + 'usr/share/locale/en/kf5_all_languages',
54 + False,
55 + ),
56 + (
57 + 'usr/share/locale/locale.alias',
58 + False,
59 + ),
60 + (
61 + 'usr/share/locale/es',
62 + True,
63 + ),
64 + (
65 + 'usr/share/locale/fr',
66 + True,
67 + ),
68 + (
69 + 'usr/share/locale',
70 + True,
71 + ),
72 + )
73 + ),
74 )
75
76 for install_mask_str, paths in cases:
77
78 diff --git a/lib/portage/util/install_mask.py b/lib/portage/util/install_mask.py
79 index 32627eb05..a8c0cbda5 100644
80 --- a/lib/portage/util/install_mask.py
81 +++ b/lib/portage/util/install_mask.py
82 @@ -3,8 +3,11 @@
83
84 __all__ = ['install_mask_dir', 'InstallMask']
85
86 +import collections
87 import errno
88 import fnmatch
89 +import functools
90 +import operator
91 import sys
92
93 from portage import os, _unicode_decode
94 @@ -18,13 +21,82 @@ else:
95 _unicode = unicode
96
97
98 +def _defaultdict_tree():
99 + return collections.defaultdict(_defaultdict_tree)
100 +
101 +
102 +_pattern = collections.namedtuple('_pattern', (
103 + 'orig_index',
104 + 'is_inclusive',
105 + 'pattern',
106 + 'leading_slash',
107 +))
108 +
109 +
110 class InstallMask(object):
111 def __init__(self, install_mask):
112 """
113 @param install_mask: INSTALL_MASK value
114 @type install_mask: str
115 """
116 - self._install_mask = install_mask.split()
117 + # Patterns not anchored with leading slash
118 + self._unanchored = []
119 +
120 + # Patterns anchored with leading slash are indexed by leading
121 + # non-glob components, making it possible to minimize the
122 + # number of fnmatch calls. For example:
123 + # /foo*/bar -> {'.': ['/foo*/bar']}
124 + # /foo/bar* -> {'foo': {'.': ['/foo/bar*']}}
125 + # /foo/bar/ -> {'foo': {'bar': {'.': ['/foo/bar/']}}}
126 + self._anchored = _defaultdict_tree()
127 + for orig_index, pattern in enumerate(install_mask.split()):
128 + # if pattern starts with -, possibly exclude this path
129 + is_inclusive = not pattern.startswith('-')
130 + if not is_inclusive:
131 + pattern = pattern[1:]
132 + pattern_obj = _pattern(orig_index, is_inclusive, pattern, pattern.startswith('/'))
133 + # absolute path pattern
134 + if pattern_obj.leading_slash:
135 + current_dir = self._anchored
136 + for component in list(filter(None, pattern.split('/'))):
137 + if '*' in component:
138 + break
139 + else:
140 + current_dir = current_dir[component]
141 + current_dir.setdefault('.', []).append(pattern_obj)
142 +
143 + # filename
144 + else:
145 + self._unanchored.append(pattern_obj)
146 +
147 + def _iter_relevant_patterns(self, path):
148 + """
149 + Iterate over patterns that may be relevant for the given path.
150 +
151 + Patterns anchored with leading / are indexed by leading
152 + non-glob components, making it possible to minimize the
153 + number of fnmatch calls.
154 + """
155 + current_dir = self._anchored
156 + components = list(filter(None, path.split('/')))
157 + patterns = []
158 + patterns.extend(current_dir.get('.', []))
159 + for component in components:
160 + next_dir = current_dir.get(component, None)
161 + if next_dir is None:
162 + break
163 + current_dir = next_dir
164 + patterns.extend(current_dir.get('.', []))
165 +
166 + if patterns:
167 + # Sort by original pattern index, since order matters for
168 + # non-inclusive patterns.
169 + patterns.extend(self._unanchored)
170 + if any(not pattern.is_inclusive for pattern in patterns):
171 + patterns.sort(key=operator.attrgetter('orig_index'))
172 + return iter(patterns)
173 +
174 + return iter(self._unanchored)
175
176 def match(self, path):
177 """
178 @@ -34,13 +106,11 @@ class InstallMask(object):
179 @return: True if path matches INSTALL_MASK, False otherwise
180 """
181 ret = False
182 - for pattern in self._install_mask:
183 - # if pattern starts with -, possibly exclude this path
184 - is_inclusive = not pattern.startswith('-')
185 - if not is_inclusive:
186 - pattern = pattern[1:]
187 +
188 + for pattern_obj in self._iter_relevant_patterns(path):
189 + is_inclusive, pattern = pattern_obj.is_inclusive, pattern_obj.pattern
190 # absolute path pattern
191 - if pattern.startswith('/'):
192 + if pattern_obj.leading_slash:
193 # handle trailing slash for explicit directory match
194 if path.endswith('/'):
195 pattern = pattern.rstrip('/') + '/'