1 |
commit: eb2674c1c2d84b2c9e9923e1c1666cb7a596c36c |
2 |
Author: Zac Medico <zmedico <AT> gentoo <DOT> org> |
3 |
AuthorDate: Sat Jan 19 08:39:47 2019 +0000 |
4 |
Commit: Zac Medico <zmedico <AT> gentoo <DOT> org> |
5 |
CommitDate: Sun Jan 20 22:57:28 2019 +0000 |
6 |
URL: https://gitweb.gentoo.org/proj/portage.git/commit/?id=eb2674c1 |
7 |
|
8 |
INSTALL_MASK: index patterns anchored with leading slash (bug 675826) |
9 |
|
10 |
For scalability, use a tree structure to index patterns that are |
11 |
anchored with a leading slash. |
12 |
|
13 |
Patterns anchored with leading slash are indexed by leading non-glob |
14 |
components, making it possible to minimize the number of fnmatch |
15 |
calls. For example: |
16 |
|
17 |
/foo*/bar -> {'.': ['/foo*/bar']} |
18 |
|
19 |
/foo/bar* -> {'foo': {'.': ['/foo/bar*']}} |
20 |
|
21 |
/foo/bar/ -> {'foo': {'bar': {'.': ['/foo/bar/']}}} |
22 |
|
23 |
Bug: https://bugs.gentoo.org/675826 |
24 |
Signed-off-by: Zac Medico <zmedico <AT> gentoo.org> |
25 |
|
26 |
lib/portage/tests/util/test_install_mask.py | 36 +++++++++++++ |
27 |
lib/portage/util/install_mask.py | 84 ++++++++++++++++++++++++++--- |
28 |
2 files changed, 113 insertions(+), 7 deletions(-) |
29 |
|
30 |
diff --git a/lib/portage/tests/util/test_install_mask.py b/lib/portage/tests/util/test_install_mask.py |
31 |
index f651eb4b7..6a29db79a 100644 |
32 |
--- a/lib/portage/tests/util/test_install_mask.py |
33 |
+++ b/lib/portage/tests/util/test_install_mask.py |
34 |
@@ -119,6 +119,42 @@ class InstallMaskTestCase(TestCase): |
35 |
), |
36 |
) |
37 |
), |
38 |
+ ( |
39 |
+ '/usr/share/locale ' |
40 |
+ '-/usr/share/locale/en* ' |
41 |
+ '-/usr/share/locale/kf5_all_languages ' |
42 |
+ '-/usr/share/locale/locale.alias', |
43 |
+ ( |
44 |
+ ( |
45 |
+ 'usr/share/locale/en', |
46 |
+ False, |
47 |
+ ), |
48 |
+ ( |
49 |
+ 'usr/share/locale/en_GB', |
50 |
+ False, |
51 |
+ ), |
52 |
+ ( |
53 |
+ 'usr/share/locale/en/kf5_all_languages', |
54 |
+ False, |
55 |
+ ), |
56 |
+ ( |
57 |
+ 'usr/share/locale/locale.alias', |
58 |
+ False, |
59 |
+ ), |
60 |
+ ( |
61 |
+ 'usr/share/locale/es', |
62 |
+ True, |
63 |
+ ), |
64 |
+ ( |
65 |
+ 'usr/share/locale/fr', |
66 |
+ True, |
67 |
+ ), |
68 |
+ ( |
69 |
+ 'usr/share/locale', |
70 |
+ True, |
71 |
+ ), |
72 |
+ ) |
73 |
+ ), |
74 |
) |
75 |
|
76 |
for install_mask_str, paths in cases: |
77 |
|
78 |
diff --git a/lib/portage/util/install_mask.py b/lib/portage/util/install_mask.py |
79 |
index 32627eb05..a8c0cbda5 100644 |
80 |
--- a/lib/portage/util/install_mask.py |
81 |
+++ b/lib/portage/util/install_mask.py |
82 |
@@ -3,8 +3,11 @@ |
83 |
|
84 |
__all__ = ['install_mask_dir', 'InstallMask'] |
85 |
|
86 |
+import collections |
87 |
import errno |
88 |
import fnmatch |
89 |
+import functools |
90 |
+import operator |
91 |
import sys |
92 |
|
93 |
from portage import os, _unicode_decode |
94 |
@@ -18,13 +21,82 @@ else: |
95 |
_unicode = unicode |
96 |
|
97 |
|
98 |
+def _defaultdict_tree(): |
99 |
+ return collections.defaultdict(_defaultdict_tree) |
100 |
+ |
101 |
+ |
102 |
+_pattern = collections.namedtuple('_pattern', ( |
103 |
+ 'orig_index', |
104 |
+ 'is_inclusive', |
105 |
+ 'pattern', |
106 |
+ 'leading_slash', |
107 |
+)) |
108 |
+ |
109 |
+ |
110 |
class InstallMask(object): |
111 |
def __init__(self, install_mask): |
112 |
""" |
113 |
@param install_mask: INSTALL_MASK value |
114 |
@type install_mask: str |
115 |
""" |
116 |
- self._install_mask = install_mask.split() |
117 |
+ # Patterns not anchored with leading slash |
118 |
+ self._unanchored = [] |
119 |
+ |
120 |
+ # Patterns anchored with leading slash are indexed by leading |
121 |
+ # non-glob components, making it possible to minimize the |
122 |
+ # number of fnmatch calls. For example: |
123 |
+ # /foo*/bar -> {'.': ['/foo*/bar']} |
124 |
+ # /foo/bar* -> {'foo': {'.': ['/foo/bar*']}} |
125 |
+ # /foo/bar/ -> {'foo': {'bar': {'.': ['/foo/bar/']}}} |
126 |
+ self._anchored = _defaultdict_tree() |
127 |
+ for orig_index, pattern in enumerate(install_mask.split()): |
128 |
+ # if pattern starts with -, possibly exclude this path |
129 |
+ is_inclusive = not pattern.startswith('-') |
130 |
+ if not is_inclusive: |
131 |
+ pattern = pattern[1:] |
132 |
+ pattern_obj = _pattern(orig_index, is_inclusive, pattern, pattern.startswith('/')) |
133 |
+ # absolute path pattern |
134 |
+ if pattern_obj.leading_slash: |
135 |
+ current_dir = self._anchored |
136 |
+ for component in list(filter(None, pattern.split('/'))): |
137 |
+ if '*' in component: |
138 |
+ break |
139 |
+ else: |
140 |
+ current_dir = current_dir[component] |
141 |
+ current_dir.setdefault('.', []).append(pattern_obj) |
142 |
+ |
143 |
+ # filename |
144 |
+ else: |
145 |
+ self._unanchored.append(pattern_obj) |
146 |
+ |
147 |
+ def _iter_relevant_patterns(self, path): |
148 |
+ """ |
149 |
+ Iterate over patterns that may be relevant for the given path. |
150 |
+ |
151 |
+ Patterns anchored with leading / are indexed by leading |
152 |
+ non-glob components, making it possible to minimize the |
153 |
+ number of fnmatch calls. |
154 |
+ """ |
155 |
+ current_dir = self._anchored |
156 |
+ components = list(filter(None, path.split('/'))) |
157 |
+ patterns = [] |
158 |
+ patterns.extend(current_dir.get('.', [])) |
159 |
+ for component in components: |
160 |
+ next_dir = current_dir.get(component, None) |
161 |
+ if next_dir is None: |
162 |
+ break |
163 |
+ current_dir = next_dir |
164 |
+ patterns.extend(current_dir.get('.', [])) |
165 |
+ |
166 |
+ if patterns: |
167 |
+ # Sort by original pattern index, since order matters for |
168 |
+ # non-inclusive patterns. |
169 |
+ patterns.extend(self._unanchored) |
170 |
+ if any(not pattern.is_inclusive for pattern in patterns): |
171 |
+ patterns.sort(key=operator.attrgetter('orig_index')) |
172 |
+ return iter(patterns) |
173 |
+ |
174 |
+ return iter(self._unanchored) |
175 |
|
176 |
def match(self, path): |
177 |
""" |
178 |
@@ -34,13 +106,11 @@ class InstallMask(object): |
179 |
@return: True if path matches INSTALL_MASK, False otherwise |
180 |
""" |
181 |
ret = False |
182 |
- for pattern in self._install_mask: |
183 |
- # if pattern starts with -, possibly exclude this path |
184 |
- is_inclusive = not pattern.startswith('-') |
185 |
- if not is_inclusive: |
186 |
- pattern = pattern[1:] |
187 |
+ |
188 |
+ for pattern_obj in self._iter_relevant_patterns(path): |
189 |
+ is_inclusive, pattern = pattern_obj.is_inclusive, pattern_obj.pattern |
190 |
# absolute path pattern |
191 |
- if pattern.startswith('/'): |
192 |
+ if pattern_obj.leading_slash: |
193 |
# handle trailing slash for explicit directory match |
194 |
if path.endswith('/'): |
195 |
pattern = pattern.rstrip('/') + '/' |