1 |
Author: zmedico |
2 |
Date: 2008-06-09 13:55:06 +0000 (Mon, 09 Jun 2008) |
3 |
New Revision: 10609 |
4 |
|
5 |
Modified: |
6 |
main/trunk/bin/portageq |
7 |
main/trunk/pym/_emerge/__init__.py |
8 |
main/trunk/pym/portage/dbapi/vartree.py |
9 |
Log: |
10 |
Add CONTENTS indexing support for optimization of owner lookups. The |
11 |
vardbapi cache maintains a hash table (inside vdb_metadata.pickle) |
12 |
that serves to index package contents by mapping the basename of file |
13 |
to a list of possible packages that own it. This is used to optimize |
14 |
owner lookups by narrowing the search down to a smaller number of |
15 |
packages. It increases the size of vdb_metadata.pickle by approximately |
16 |
30% and it's used in the following cases: |
17 |
|
18 |
* When an unexpected file collision occurs (whether |
19 |
or not collision-protect is enabled) |
20 |
|
21 |
* `emerge <filename>` |
22 |
|
23 |
* `portageq owners` |
24 |
|
25 |
The |
26 |
|
27 |
|
28 |
Modified: main/trunk/bin/portageq |
29 |
=================================================================== |
30 |
--- main/trunk/bin/portageq 2008-06-08 20:23:14 UTC (rev 10608) |
31 |
+++ main/trunk/bin/portageq 2008-06-09 13:55:06 UTC (rev 10609) |
32 |
@@ -188,27 +188,22 @@ |
33 |
return 2 |
34 |
files.append(f[len(root):]) |
35 |
|
36 |
- found_owner = False |
37 |
- for cpv in vardb.cpv_all(): |
38 |
- cat, pkg = catsplit(cpv) |
39 |
- mylink = dblink(cat, pkg, root, settings, vartree=vardb.vartree) |
40 |
- myfiles = [] |
41 |
- for f in files: |
42 |
- if mylink.isowner(f, root): |
43 |
- myfiles.append(f) |
44 |
- if myfiles: |
45 |
- found_owner = True |
46 |
- sys.stdout.write("%s\n" % cpv) |
47 |
- for f in myfiles: |
48 |
- sys.stdout.write("\t%s\n" % \ |
49 |
- os.path.join(root, f.lstrip(os.path.sep))) |
50 |
- sys.stdout.flush() |
51 |
- if not found_owner: |
52 |
- sys.stderr.write("None of the installed packages claim the file(s).\n") |
53 |
- sys.stderr.flush() |
54 |
- return 1 |
55 |
- return 0 |
56 |
+ owners = vardb._owners.get_owners(files) |
57 |
|
58 |
+ for pkg, owned_files in owners.iteritems(): |
59 |
+ cpv = pkg.mycpv |
60 |
+ sys.stdout.write("%s\n" % cpv) |
61 |
+ for f in sorted(owned_files): |
62 |
+ sys.stdout.write("\t%s\n" % \ |
63 |
+ os.path.join(root, f.lstrip(os.path.sep))) |
64 |
+ if owners: |
65 |
+ sys.stdout.flush() |
66 |
+ return 0 |
67 |
+ |
68 |
+ sys.stderr.write("None of the installed packages claim the file(s).\n") |
69 |
+ sys.stderr.flush() |
70 |
+ return 1 |
71 |
+ |
72 |
owners.uses_root = True |
73 |
|
74 |
def best_visible(argv): |
75 |
|
76 |
Modified: main/trunk/pym/_emerge/__init__.py |
77 |
=================================================================== |
78 |
--- main/trunk/pym/_emerge/__init__.py 2008-06-08 20:23:14 UTC (rev 10608) |
79 |
+++ main/trunk/pym/_emerge/__init__.py 2008-06-09 13:55:06 UTC (rev 10609) |
80 |
@@ -2560,6 +2560,7 @@ |
81 |
myroot = self.target_root |
82 |
dbs = self._filtered_trees[myroot]["dbs"] |
83 |
vardb = self.trees[myroot]["vartree"].dbapi |
84 |
+ real_vardb = self._trees_orig[myroot]["vartree"].dbapi |
85 |
portdb = self.trees[myroot]["porttree"].dbapi |
86 |
bindb = self.trees[myroot]["bintree"].dbapi |
87 |
pkgsettings = self.pkgsettings[myroot] |
88 |
@@ -2638,16 +2639,12 @@ |
89 |
" $ROOT.\n") % x, noiselevel=-1) |
90 |
return 0, [] |
91 |
relative_path = x[len(myroot):] |
92 |
- vartree = self._trees_orig[myroot]["vartree"] |
93 |
owner_cpv = None |
94 |
- for cpv in vardb.cpv_all(): |
95 |
- self.spinner.update() |
96 |
- cat, pf = portage.catsplit(cpv) |
97 |
- if portage.dblink(cat, pf, myroot, |
98 |
- pkgsettings, vartree=vartree).isowner( |
99 |
- relative_path, myroot): |
100 |
- owner_cpv = cpv |
101 |
- break |
102 |
+ for pkg, relative_path in \ |
103 |
+ real_vardb._owners.iter_owners([relative_path]): |
104 |
+ owner_cpv = pkg.mycpv |
105 |
+ break |
106 |
+ |
107 |
if owner_cpv is None: |
108 |
portage.writemsg(("\n\n!!! '%s' is not claimed " + \ |
109 |
"by any package.\n") % x, noiselevel=-1) |
110 |
|
111 |
Modified: main/trunk/pym/portage/dbapi/vartree.py |
112 |
=================================================================== |
113 |
--- main/trunk/pym/portage/dbapi/vartree.py 2008-06-08 20:23:14 UTC (rev 10608) |
114 |
+++ main/trunk/pym/portage/dbapi/vartree.py 2008-06-09 13:55:06 UTC (rev 10609) |
115 |
@@ -238,6 +238,9 @@ |
116 |
_excluded_dirs = re.compile(r'^(\..*|-MERGING-.*|' + \ |
117 |
"|".join(_excluded_dirs) + r')$') |
118 |
|
119 |
+ _aux_cache_version = "1" |
120 |
+ _owners_cache_version = "1" |
121 |
+ |
122 |
# Number of uncached packages to trigger cache update, since |
123 |
# it's wasteful to update it for every vdb change. |
124 |
_aux_cache_threshold = 5 |
125 |
@@ -275,8 +278,7 @@ |
126 |
"EAPI", "HOMEPAGE", "IUSE", "KEYWORDS", |
127 |
"LICENSE", "PDEPEND", "PROVIDE", "RDEPEND", |
128 |
"repository", "RESTRICT" , "SLOT", "USE"]) |
129 |
- self._aux_cache = None |
130 |
- self._aux_cache_version = "1" |
131 |
+ self._aux_cache_obj = None |
132 |
self._aux_cache_filename = os.path.join(self.root, |
133 |
CACHE_PATH.lstrip(os.path.sep), "vdb_metadata.pickle") |
134 |
self._counter_path = os.path.join(root, |
135 |
@@ -290,6 +292,7 @@ |
136 |
self.plib_registry = None |
137 |
|
138 |
self.linkmap = LinkageMap(self) |
139 |
+ self._owners = self._owners_db(self) |
140 |
|
141 |
def getpath(self, mykey, filename=None): |
142 |
rValue = os.path.join(self.root, VDB_PATH, mykey) |
143 |
@@ -562,6 +565,7 @@ |
144 |
if self._aux_cache is not None and \ |
145 |
len(self._aux_cache["modified"]) >= self._aux_cache_threshold and \ |
146 |
secpass >= 2: |
147 |
+ self._owners.populate() # index any unindexed contents |
148 |
valid_nodes = set(self.cpv_all()) |
149 |
for cpv in self._aux_cache["packages"].keys(): |
150 |
if cpv not in valid_nodes: |
151 |
@@ -577,6 +581,54 @@ |
152 |
pass |
153 |
self._aux_cache["modified"] = set() |
154 |
|
155 |
+ @property |
156 |
+ def _aux_cache(self): |
157 |
+ if self._aux_cache_obj is None: |
158 |
+ self._aux_cache_init() |
159 |
+ return self._aux_cache_obj |
160 |
+ |
161 |
+ def _aux_cache_init(self): |
162 |
+ try: |
163 |
+ f = open(self._aux_cache_filename) |
164 |
+ mypickle = cPickle.Unpickler(f) |
165 |
+ mypickle.find_global = None |
166 |
+ aux_cache = mypickle.load() |
167 |
+ f.close() |
168 |
+ del f |
169 |
+ except (IOError, OSError, EOFError, cPickle.UnpicklingError), e: |
170 |
+ if isinstance(e, cPickle.UnpicklingError): |
171 |
+ writemsg("!!! Error loading '%s': %s\n" % \ |
172 |
+ (self._aux_cache_filename, str(e)), noiselevel=-1) |
173 |
+ del e |
174 |
+ |
175 |
+ if not aux_cache or \ |
176 |
+ not isinstance(aux_cache, dict) or \ |
177 |
+ aux_cache.get("version") != self._aux_cache_version or \ |
178 |
+ not aux_cache.get("packages"): |
179 |
+ aux_cache = {"version": self._aux_cache_version} |
180 |
+ aux_cache["packages"] = {} |
181 |
+ |
182 |
+ owners = aux_cache.get("owners") |
183 |
+ if owners is not None: |
184 |
+ if not isinstance(owners, dict): |
185 |
+ owners = None |
186 |
+ elif "version" not in owners: |
187 |
+ owners = None |
188 |
+ elif owners["version"] != self._owners_cache_version: |
189 |
+ owners = None |
190 |
+ elif "base_names" not in owners: |
191 |
+ owners = None |
192 |
+ |
193 |
+ if owners is None: |
194 |
+ owners = { |
195 |
+ "base_names" : {}, |
196 |
+ "version" : self._owners_cache_version |
197 |
+ } |
198 |
+ aux_cache["owners"] = owners |
199 |
+ |
200 |
+ aux_cache["modified"] = set() |
201 |
+ self._aux_cache_obj = aux_cache |
202 |
+ |
203 |
def aux_get(self, mycpv, wants): |
204 |
"""This automatically caches selected keys that are frequently needed |
205 |
by emerge for dependency calculations. The cached metadata is |
206 |
@@ -601,26 +653,6 @@ |
207 |
cache_these = set(self._aux_cache_keys) |
208 |
cache_these.update(cache_these_wants) |
209 |
|
210 |
- if self._aux_cache is None: |
211 |
- try: |
212 |
- f = open(self._aux_cache_filename) |
213 |
- mypickle = cPickle.Unpickler(f) |
214 |
- mypickle.find_global = None |
215 |
- self._aux_cache = mypickle.load() |
216 |
- f.close() |
217 |
- del f |
218 |
- except (IOError, OSError, EOFError, cPickle.UnpicklingError), e: |
219 |
- if isinstance(e, cPickle.UnpicklingError): |
220 |
- writemsg("!!! Error loading '%s': %s\n" % \ |
221 |
- (self._aux_cache_filename, str(e)), noiselevel=-1) |
222 |
- del e |
223 |
- if not self._aux_cache or \ |
224 |
- not isinstance(self._aux_cache, dict) or \ |
225 |
- self._aux_cache.get("version") != self._aux_cache_version or \ |
226 |
- not self._aux_cache.get("packages"): |
227 |
- self._aux_cache = {"version": self._aux_cache_version} |
228 |
- self._aux_cache["packages"] = {} |
229 |
- self._aux_cache["modified"] = set() |
230 |
mydir = self.getpath(mycpv) |
231 |
mydir_stat = None |
232 |
try: |
233 |
@@ -801,6 +833,173 @@ |
234 |
write_atomic(self._counter_path, str(counter)) |
235 |
return counter |
236 |
|
237 |
+ def _dblink(self, cpv): |
238 |
+ category, pf = catsplit(cpv) |
239 |
+ return dblink(category, pf, self.root, |
240 |
+ self.settings, vartree=self.vartree) |
241 |
+ |
242 |
+ class _owners_cache(object): |
243 |
+ """ |
244 |
+ This class maintains an hash table that serves to index package |
245 |
+ contents by mapping the basename of file to a list of possible |
246 |
+ packages that own it. This is used to optimize owner lookups |
247 |
+ by narrowing the search down to a smaller number of packages. |
248 |
+ """ |
249 |
+ try: |
250 |
+ from hashlib import md5 as _new_hash |
251 |
+ except ImportError: |
252 |
+ from md5 import new as _new_hash |
253 |
+ |
254 |
+ _hash_bits = 16 |
255 |
+ _hex_chars = _hash_bits / 4 |
256 |
+ |
257 |
+ def __init__(self, vardb): |
258 |
+ self._vardb = vardb |
259 |
+ |
260 |
+ def add(self, cpv): |
261 |
+ root_len = len(self._vardb.root) |
262 |
+ contents = self._vardb._dblink(cpv).getcontents() |
263 |
+ pkg_hash = self._hash_pkg(cpv) |
264 |
+ if not contents: |
265 |
+ # Empty path is a code used to represent empty contents. |
266 |
+ self._add_path("", pkg_hash) |
267 |
+ for x in contents: |
268 |
+ relative_path = x[root_len:] |
269 |
+ self._add_path(x, pkg_hash) |
270 |
+ self._vardb._aux_cache["modified"].add(cpv) |
271 |
+ |
272 |
+ def _add_path(self, path, pkg_hash): |
273 |
+ """ |
274 |
+ Empty path is a code that represents empty contents. |
275 |
+ """ |
276 |
+ if path: |
277 |
+ name = os.path.basename(path.rstrip(os.path.sep)) |
278 |
+ if not name: |
279 |
+ return |
280 |
+ else: |
281 |
+ name = path |
282 |
+ name_hash = self._hash_str(name) |
283 |
+ base_names = self._vardb._aux_cache["owners"]["base_names"] |
284 |
+ pkgs = base_names.get(name_hash) |
285 |
+ if pkgs is None: |
286 |
+ pkgs = {} |
287 |
+ base_names[name_hash] = pkgs |
288 |
+ pkgs[pkg_hash] = None |
289 |
+ |
290 |
+ def _hash_str(self, s): |
291 |
+ h = self._new_hash() |
292 |
+ h.update(s) |
293 |
+ h = h.hexdigest() |
294 |
+ h = h[-self._hex_chars:] |
295 |
+ h = int(h, 16) |
296 |
+ return h |
297 |
+ |
298 |
+ def _hash_pkg(self, cpv): |
299 |
+ counter, mtime = self._vardb.aux_get( |
300 |
+ cpv, ["COUNTER", "_mtime_"]) |
301 |
+ try: |
302 |
+ counter = int(counter) |
303 |
+ except ValueError: |
304 |
+ counter = 0 |
305 |
+ return (cpv, counter, mtime) |
306 |
+ |
307 |
+ class _owners_db(object): |
308 |
+ |
309 |
+ def __init__(self, vardb): |
310 |
+ self._vardb = vardb |
311 |
+ |
312 |
+ def populate(self): |
313 |
+ self._populate() |
314 |
+ |
315 |
+ def _populate(self): |
316 |
+ owners_cache = vardbapi._owners_cache(self._vardb) |
317 |
+ cached_hashes = set() |
318 |
+ base_names = self._vardb._aux_cache["owners"]["base_names"] |
319 |
+ |
320 |
+ # Take inventory of all cached package hashes. |
321 |
+ for hash_values in base_names.itervalues(): |
322 |
+ cached_hashes.update(hash_values) |
323 |
+ |
324 |
+ # Create sets of valid package hashes and uncached packages. |
325 |
+ uncached_pkgs = set() |
326 |
+ hash_pkg = owners_cache._hash_pkg |
327 |
+ valid_pkg_hashes = set() |
328 |
+ for cpv in self._vardb.cpv_all(): |
329 |
+ hash_value = hash_pkg(cpv) |
330 |
+ valid_pkg_hashes.add(hash_value) |
331 |
+ if hash_value not in cached_hashes: |
332 |
+ uncached_pkgs.add(cpv) |
333 |
+ |
334 |
+ # Cache any missing packages. |
335 |
+ for cpv in uncached_pkgs: |
336 |
+ owners_cache.add(cpv) |
337 |
+ |
338 |
+ # Delete any stale cache. |
339 |
+ stale_hashes = cached_hashes.difference(valid_pkg_hashes) |
340 |
+ if stale_hashes: |
341 |
+ for base_name_hash, bucket in base_names.items(): |
342 |
+ for hash_value in stale_hashes.intersection(bucket): |
343 |
+ del bucket[hash_value] |
344 |
+ if not bucket: |
345 |
+ del base_names[base_name_hash] |
346 |
+ |
347 |
+ return owners_cache |
348 |
+ |
349 |
+ def get_owners(self, path_iter): |
350 |
+ """ |
351 |
+ @return the owners as a dblink -> set(files) mapping. |
352 |
+ """ |
353 |
+ owners = {} |
354 |
+ for owner, f in self.iter_owners(path_iter): |
355 |
+ owned_files = owners.get(owner) |
356 |
+ if owned_files is None: |
357 |
+ owned_files = set() |
358 |
+ owners[owner] = owned_files |
359 |
+ owned_files.add(f) |
360 |
+ return owners |
361 |
+ |
362 |
+ def iter_owners(self, path_iter): |
363 |
+ """ |
364 |
+ Iterate over tuples of (dblink, path). In order to avoid |
365 |
+ consuming too many resources for too much time, resources |
366 |
+ are only allocated for the duration of a given iter_owners() |
367 |
+ call. Therefore, to maximize reuse of resources when searching |
368 |
+ for multiple files, it's best to search for them all in a single |
369 |
+ call. |
370 |
+ """ |
371 |
+ |
372 |
+ owners_cache = self._populate() |
373 |
+ |
374 |
+ vardb = self._vardb |
375 |
+ root = vardb.root |
376 |
+ hash_pkg = owners_cache._hash_pkg |
377 |
+ hash_str = owners_cache._hash_str |
378 |
+ base_names = self._vardb._aux_cache["owners"]["base_names"] |
379 |
+ |
380 |
+ dblink_cache = {} |
381 |
+ |
382 |
+ def dblink(cpv): |
383 |
+ x = dblink_cache.get(cpv) |
384 |
+ if x is None: |
385 |
+ x = self._vardb._dblink(cpv) |
386 |
+ dblink_cache[cpv] = x |
387 |
+ return x |
388 |
+ |
389 |
+ for path in path_iter: |
390 |
+ name = os.path.basename(path.rstrip(os.path.sep)) |
391 |
+ if not name: |
392 |
+ continue |
393 |
+ |
394 |
+ name_hash = hash_str(name) |
395 |
+ pkgs = base_names.get(name_hash) |
396 |
+ if pkgs is not None: |
397 |
+ for hash_value in pkgs: |
398 |
+ cpv, counter, mtime = hash_value |
399 |
+ if hash_pkg(cpv) != hash_value: |
400 |
+ continue |
401 |
+ if dblink(cpv).isowner(path, root): |
402 |
+ yield dblink(cpv), path |
403 |
+ |
404 |
class vartree(object): |
405 |
"this tree will scan a var/db/pkg database located at root (passed to init)" |
406 |
def __init__(self, root="/", virtual=None, clone=None, categories=None, |
407 |
@@ -2202,36 +2401,30 @@ |
408 |
|
409 |
eerror(msg) |
410 |
|
411 |
- if collision_protect: |
412 |
+ msg = [] |
413 |
+ msg.append("") |
414 |
+ msg.append("Searching all installed" + \ |
415 |
+ " packages for file collisions...") |
416 |
+ msg.append("") |
417 |
+ msg.append("Press Ctrl-C to Stop") |
418 |
+ msg.append("") |
419 |
+ eerror(msg) |
420 |
+ |
421 |
+ owners = self.vartree.dbapi._owners.get_owners(files) |
422 |
+ self.vartree.dbapi.flush_cache() |
423 |
+ |
424 |
+ for pkg, owned_files in owners.iteritems(): |
425 |
+ cpv = pkg.mycpv |
426 |
msg = [] |
427 |
- msg.append("") |
428 |
- msg.append("Searching all installed" + \ |
429 |
- " packages for file collisions...") |
430 |
- msg.append("") |
431 |
- msg.append("Press Ctrl-C to Stop") |
432 |
- msg.append("") |
433 |
+ msg.append("%s" % cpv) |
434 |
+ for f in sorted(owned_files): |
435 |
+ msg.append("\t%s" % os.path.join(destroot, |
436 |
+ f.lstrip(os.path.sep))) |
437 |
eerror(msg) |
438 |
- |
439 |
- found_owner = False |
440 |
- for cpv in self.vartree.dbapi.cpv_all(): |
441 |
- cat, pkg = catsplit(cpv) |
442 |
- mylink = dblink(cat, pkg, destroot, self.settings, |
443 |
- vartree=self.vartree) |
444 |
- mycollisions = [] |
445 |
- for f in collisions: |
446 |
- if mylink.isowner(f, destroot): |
447 |
- mycollisions.append(f) |
448 |
- if mycollisions: |
449 |
- found_owner = True |
450 |
- msg = [] |
451 |
- msg.append("%s" % cpv) |
452 |
- for f in mycollisions: |
453 |
- msg.append("\t%s" % os.path.join(destroot, |
454 |
- f.lstrip(os.path.sep))) |
455 |
- eerror(msg) |
456 |
- if not found_owner: |
457 |
- eerror(["None of the installed" + \ |
458 |
- " packages claim the file(s)."]) |
459 |
+ if not owners: |
460 |
+ eerror(["None of the installed" + \ |
461 |
+ " packages claim the file(s)."]) |
462 |
+ if collision_protect: |
463 |
return 1 |
464 |
|
465 |
writemsg_stdout(">>> Merging %s to %s\n" % (self.mycpv, destroot)) |
466 |
|
467 |
-- |
468 |
gentoo-commits@l.g.o mailing list |