Gentoo Archives: gentoo-portage-dev

From: Zac Medico <zmedico@g.o>
To: gentoo-portage-dev@l.g.o
Cc: Zac Medico <zmedico@g.o>
Subject: [gentoo-portage-dev] [PATCH] INSTALL_MASK: index patterns anchored with leading slash (bug 675826)
Date: Sun, 20 Jan 2019 11:27:14
Message-Id: 20190120112453.28751-1-zmedico@gentoo.org
1 For scalability, use a tree structure to index patterns that are
2 anchored with a leading slash.
3
4 Patterns anchored with leading slash are indexed by leading non-glob
5 components, making it possible to minimize the number of fnmatch
6 calls. For example:
7
8 /foo*/bar -> {'.': ['/foo*/bar']}
9
10 /foo/bar* -> {'foo': {'.': ['/foo/bar*']}}
11
12 /foo/bar/ -> {'foo': {'bar': {'.': ['/foo/bar/']}}}
13
14 Bug: https://bugs.gentoo.org/675826
15 Signed-off-by: Zac Medico <zmedico@g.o>
16 ---
17 lib/portage/util/install_mask.py | 78 +++++++++++++++++++++++++++++---
18 1 file changed, 72 insertions(+), 6 deletions(-)
19
20 diff --git a/lib/portage/util/install_mask.py b/lib/portage/util/install_mask.py
21 index 32627eb05..84305644c 100644
22 --- a/lib/portage/util/install_mask.py
23 +++ b/lib/portage/util/install_mask.py
24 @@ -3,8 +3,11 @@
25
26 __all__ = ['install_mask_dir', 'InstallMask']
27
28 +import collections
29 import errno
30 import fnmatch
31 +import functools
32 +import operator
33 import sys
34
35 from portage import os, _unicode_decode
36 @@ -18,13 +21,76 @@ else:
37 _unicode = unicode
38
39
40 +def _defaultdict_tree():
41 + return collections.defaultdict(_defaultdict_tree)
42 +
43 +
44 +_pattern = collections.namedtuple('_pattern', ('orig_index', 'is_inclusive', 'pattern'))
45 +
46 +
47 class InstallMask(object):
48 def __init__(self, install_mask):
49 """
50 @param install_mask: INSTALL_MASK value
51 @type install_mask: str
52 """
53 - self._install_mask = install_mask.split()
54 + # Patterns not anchored with leading slash
55 + self._unanchored = []
56 +
57 + # Patterns anchored with leading slash are indexed by leading
58 + # non-glob components, making it possible to minimize the
59 + # number of fnmatch calls. For example:
60 + # /foo*/bar -> {'.': ['/foo*/bar']}
61 + # /foo/bar* -> {'foo': {'.': ['/foo/bar*']}}
62 + # /foo/bar/ -> {'foo': {'bar': {'.': ['/foo/bar/']}}}
63 + self._anchored = _defaultdict_tree()
64 + for orig_index, pattern in enumerate(install_mask.split()):
65 + # if pattern starts with -, possibly exclude this path
66 + is_inclusive = not pattern.startswith('-')
67 + if not is_inclusive:
68 + pattern = pattern[1:]
69 + pattern_obj = _pattern(orig_index, is_inclusive, pattern)
70 + # absolute path pattern
71 + if pattern.startswith('/'):
72 + current_dir = self._anchored
73 + for component in list(filter(None, pattern.split('/'))):
74 + if '*' in component:
75 + break
76 + else:
77 + current_dir = current_dir[component]
78 + current_dir.setdefault('.', []).append(pattern_obj)
79 +
80 + # filename
81 + else:
82 + self._unanchored.append(pattern_obj)
83 +
84 + def _iter_relevant_patterns(self, path):
85 + """
86 + Iterate over patterns that may be relevant for the given path.
87 +
88 + Patterns anchored with leading / are indexed by leading
89 + non-glob components, making it possible to minimize the
90 + number of fnmatch calls.
91 + """
92 + current_dir = self._anchored
93 + components = list(filter(None, path.split('/')))
94 + patterns = []
95 + patterns.extend(current_dir.get('.', []))
96 + for component in components:
97 + next_dir = current_dir.get(component, None)
98 + if next_dir is None:
99 + break
100 + current_dir = next_dir
101 + patterns.extend(current_dir.get('.', []))
102 +
103 + if patterns:
104 + # Sort by original pattern index, since order matters for
105 + # non-inclusive patterns.
106 + patterns.extend(self._unanchored)
107 + patterns.sort(key=operator.attrgetter('orig_index'))
108 + return iter(patterns)
109 +
110 + return iter(self._unanchored)
111
112 def match(self, path):
113 """
114 @@ -34,11 +100,10 @@ class InstallMask(object):
115 @return: True if path matches INSTALL_MASK, False otherwise
116 """
117 ret = False
118 - for pattern in self._install_mask:
119 - # if pattern starts with -, possibly exclude this path
120 - is_inclusive = not pattern.startswith('-')
121 - if not is_inclusive:
122 - pattern = pattern[1:]
123 +
124 + for pattern_obj in self._iter_relevant_patterns(path):
125 + is_inclusive = pattern_obj.is_inclusive
126 + pattern = pattern_obj.pattern
127 # absolute path pattern
128 if pattern.startswith('/'):
129 # handle trailing slash for explicit directory match
130 @@ -49,6 +114,7 @@ class InstallMask(object):
131 if (fnmatch.fnmatch(path, pattern[1:])
132 or fnmatch.fnmatch(path, pattern[1:].rstrip('/') + '/*')):
133 ret = is_inclusive
134 +
135 # filename
136 else:
137 if fnmatch.fnmatch(os.path.basename(path), pattern):
138 --
139 2.18.1