1 |
For scalability, use a tree structure to index patterns that are |
2 |
anchored with a leading slash. |
3 |
|
4 |
Patterns anchored with leading slash are indexed by leading non-glob |
5 |
components, making it possible to minimize the number of fnmatch |
6 |
calls. For example: |
7 |
|
8 |
/foo*/bar -> {'.': ['/foo*/bar']} |
9 |
|
10 |
/foo/bar* -> {'foo': {'.': ['/foo/bar*']}} |
11 |
|
12 |
/foo/bar/ -> {'foo': {'bar': {'.': ['/foo/bar/']}}} |
13 |
|
14 |
Bug: https://bugs.gentoo.org/675826 |
15 |
Signed-off-by: Zac Medico <zmedico@g.o> |
16 |
--- |
17 |
lib/portage/util/install_mask.py | 78 +++++++++++++++++++++++++++++--- |
18 |
1 file changed, 72 insertions(+), 6 deletions(-) |
19 |
|
20 |
diff --git a/lib/portage/util/install_mask.py b/lib/portage/util/install_mask.py |
21 |
index 32627eb05..84305644c 100644 |
22 |
--- a/lib/portage/util/install_mask.py |
23 |
+++ b/lib/portage/util/install_mask.py |
24 |
@@ -3,8 +3,11 @@ |
25 |
|
26 |
__all__ = ['install_mask_dir', 'InstallMask'] |
27 |
|
28 |
+import collections |
29 |
import errno |
30 |
import fnmatch |
31 |
+import functools |
32 |
+import operator |
33 |
import sys |
34 |
|
35 |
from portage import os, _unicode_decode |
36 |
@@ -18,13 +21,76 @@ else: |
37 |
_unicode = unicode |
38 |
|
39 |
|
40 |
+def _defaultdict_tree(): |
41 |
+ return collections.defaultdict(_defaultdict_tree) |
42 |
+ |
43 |
+ |
44 |
+_pattern = collections.namedtuple('_pattern', ('orig_index', 'is_inclusive', 'pattern')) |
45 |
+ |
46 |
+ |
47 |
class InstallMask(object): |
48 |
def __init__(self, install_mask): |
49 |
""" |
50 |
@param install_mask: INSTALL_MASK value |
51 |
@type install_mask: str |
52 |
""" |
53 |
- self._install_mask = install_mask.split() |
54 |
+ # Patterns not anchored with leading slash |
55 |
+ self._unanchored = [] |
56 |
+ |
57 |
+ # Patterns anchored with leading slash are indexed by leading |
58 |
+ # non-glob components, making it possible to minimize the |
59 |
+ # number of fnmatch calls. For example: |
60 |
+ # /foo*/bar -> {'.': ['/foo*/bar']} |
61 |
+ # /foo/bar* -> {'foo': {'.': ['/foo/bar*']}} |
62 |
+ # /foo/bar/ -> {'foo': {'bar': {'.': ['/foo/bar/']}}} |
63 |
+ self._anchored = _defaultdict_tree() |
64 |
+ for orig_index, pattern in enumerate(install_mask.split()): |
65 |
+ # if pattern starts with -, possibly exclude this path |
66 |
+ is_inclusive = not pattern.startswith('-') |
67 |
+ if not is_inclusive: |
68 |
+ pattern = pattern[1:] |
69 |
+ pattern_obj = _pattern(orig_index, is_inclusive, pattern) |
70 |
+ # absolute path pattern |
71 |
+ if pattern.startswith('/'): |
72 |
+ current_dir = self._anchored |
73 |
+ for component in list(filter(None, pattern.split('/'))): |
74 |
+ if '*' in component: |
75 |
+ break |
76 |
+ else: |
77 |
+ current_dir = current_dir[component] |
78 |
+ current_dir.setdefault('.', []).append(pattern_obj) |
79 |
+ |
80 |
+ # filename |
81 |
+ else: |
82 |
+ self._unanchored.append(pattern_obj) |
83 |
+ |
84 |
+ def _iter_relevant_patterns(self, path): |
85 |
+ """ |
86 |
+ Iterate over patterns that may be relevant for the given path. |
87 |
+ |
88 |
+ Patterns anchored with leading / are indexed by leading |
89 |
+ non-glob components, making it possible to minimize the |
90 |
+ number of fnmatch calls. |
91 |
+ """ |
92 |
+ current_dir = self._anchored |
93 |
+ components = list(filter(None, path.split('/'))) |
94 |
+ patterns = [] |
95 |
+ patterns.extend(current_dir.get('.', [])) |
96 |
+ for component in components: |
97 |
+ next_dir = current_dir.get(component, None) |
98 |
+ if next_dir is None: |
99 |
+ break |
100 |
+ current_dir = next_dir |
101 |
+ patterns.extend(current_dir.get('.', [])) |
102 |
+ |
103 |
+ if patterns: |
104 |
+ # Sort by original pattern index, since order matters for |
105 |
+ # non-inclusive patterns. |
106 |
+ patterns.extend(self._unanchored) |
107 |
+ patterns.sort(key=operator.attrgetter('orig_index')) |
108 |
+ return iter(patterns) |
109 |
+ |
110 |
+ return iter(self._unanchored) |
111 |
|
112 |
def match(self, path): |
113 |
""" |
114 |
@@ -34,11 +100,10 @@ class InstallMask(object): |
115 |
@return: True if path matches INSTALL_MASK, False otherwise |
116 |
""" |
117 |
ret = False |
118 |
- for pattern in self._install_mask: |
119 |
- # if pattern starts with -, possibly exclude this path |
120 |
- is_inclusive = not pattern.startswith('-') |
121 |
- if not is_inclusive: |
122 |
- pattern = pattern[1:] |
123 |
+ |
124 |
+ for pattern_obj in self._iter_relevant_patterns(path): |
125 |
+ is_inclusive = pattern_obj.is_inclusive |
126 |
+ pattern = pattern_obj.pattern |
127 |
# absolute path pattern |
128 |
if pattern.startswith('/'): |
129 |
# handle trailing slash for explicit directory match |
130 |
@@ -49,6 +114,7 @@ class InstallMask(object): |
131 |
if (fnmatch.fnmatch(path, pattern[1:]) |
132 |
or fnmatch.fnmatch(path, pattern[1:].rstrip('/') + '/*')): |
133 |
ret = is_inclusive |
134 |
+ |
135 |
# filename |
136 |
else: |
137 |
if fnmatch.fnmatch(os.path.basename(path), pattern): |
138 |
-- |
139 |
2.18.1 |