Gentoo Archives: gentoo-commits

From: "Zac Medico (zmedico)" <zmedico@g.o>
To: gentoo-commits@l.g.o
Subject: [gentoo-commits] portage r11447 - main/trunk/pym/portage/dbapi
Date: Fri, 22 Aug 2008 06:21:31
Message-Id: E1KWQ1X-0004tM-Qd@stork.gentoo.org
1 Author: zmedico
2 Date: 2008-08-22 06:21:26 +0000 (Fri, 22 Aug 2008)
3 New Revision: 11447
4
5 Modified:
6 main/trunk/pym/portage/dbapi/vartree.py
7 Log:
8 Optimize LinkageMap to use tuples of device and inode numbers from stat
9 calls, instead of paths from realpath, as unique keys for identification
10 of files. This is the same approach used by dblink.isowner() for cases
11 in which path comparison fails due to symlinks.
12
13 Thanks to Lucian Poston <lucianposton@×××××.com> for submitting this patch
14 (along with the missing-rebuild package set which I haven't merged yet). These
15 patches are hosted in the following location:
16
17 http://repo.or.cz/w/revdep-rebuild-reimplementation.git?a=tree;h=refs/heads/rc3;hb=refs/heads/rc3
18
19
20 Modified: main/trunk/pym/portage/dbapi/vartree.py
21 ===================================================================
22 --- main/trunk/pym/portage/dbapi/vartree.py 2008-08-21 19:49:56 UTC (rev 11446)
23 +++ main/trunk/pym/portage/dbapi/vartree.py 2008-08-22 06:21:26 UTC (rev 11447)
24 @@ -143,10 +143,12 @@
25 self._dbapi = vardbapi
26 self._libs = {}
27 self._obj_properties = {}
28 - self._defpath = getlibpaths()
29 -
30 + self._defpath = set(getlibpaths())
31 + self._obj_key_cache = {}
32 +
33 def rebuild(self, include_file=None):
34 libs = {}
35 + obj_key_cache = {}
36 obj_properties = {}
37 lines = []
38 for cpv in self._dbapi.cpv_all():
39 @@ -176,29 +178,61 @@
40 # insufficient field length
41 continue
42 arch = fields[0]
43 - obj = os.path.realpath(fields[1])
44 + obj = fields[1]
45 + obj_key = self._generateObjKey(obj)
46 soname = fields[2]
47 - path = filter(None, fields[3].replace(
48 + path = set([normalize_path(x)
49 + for x in filter(None, fields[3].replace(
50 "${ORIGIN}", os.path.dirname(obj)).replace(
51 - "$ORIGIN", os.path.dirname(obj)).split(":"))
52 + "$ORIGIN", os.path.dirname(obj)).split(":"))])
53 needed = filter(None, fields[4].split(","))
54 if soname:
55 - libs.setdefault(soname, {arch: {"providers": [], "consumers": []}})
56 - libs[soname].setdefault(arch, {"providers": [], "consumers": []})
57 - libs[soname][arch]["providers"].append(obj)
58 + libs.setdefault(soname, \
59 + {arch: {"providers": set(), "consumers": set()}})
60 + libs[soname].setdefault(arch, \
61 + {"providers": set(), "consumers": set()})
62 + libs[soname][arch]["providers"].add(obj_key)
63 for x in needed:
64 - libs.setdefault(x, {arch: {"providers": [], "consumers": []}})
65 - libs[x].setdefault(arch, {"providers": [], "consumers": []})
66 - libs[x][arch]["consumers"].append(obj)
67 - obj_properties[obj] = (arch, needed, path, soname)
68 -
69 + libs.setdefault(x, \
70 + {arch: {"providers": set(), "consumers": set()}})
71 + libs[x].setdefault(arch, {"providers": set(), "consumers": set()})
72 + libs[x][arch]["consumers"].add(obj_key)
73 + obj_key_cache.setdefault(obj, obj_key)
74 + # All object paths are added into the obj_properties tuple
75 + obj_properties.setdefault(obj_key, \
76 + (arch, needed, path, soname, set()))[4].add(obj)
77 +
78 self._libs = libs
79 self._obj_properties = obj_properties
80 + self._obj_key_cache = obj_key_cache
81
82 - def listBrokenBinaries(self):
83 + def _generateObjKey(self, obj):
84 """
85 + Generate obj key for a given object.
86 +
87 + @param obj: path to an existing file
88 + @type obj: string (example: '/usr/bin/bar')
89 + @rtype: 2-tuple of longs if obj exists. string if obj does not exist.
90 + @return:
91 + 1. 2-tuple of obj's inode and device from a stat call, if obj exists.
92 + 2. realpath of object if obj does not exist.
93 +
94 + """
95 + try:
96 + obj_st = os.stat(obj)
97 + except OSError:
98 + # Use the realpath as the key if the file does not exists on the
99 + # filesystem.
100 + return os.path.realpath(obj)
101 + # Return a tuple of the device and inode.
102 + return (obj_st.st_dev, obj_st.st_ino)
103 +
104 + def listBrokenBinaries(self, debug=False):
105 + """
106 Find binaries and their needed sonames, which have no providers.
107
108 + @param debug: Boolean to enable debug output
109 + @type debug: Boolean
110 @rtype: dict (example: {'/usr/bin/foo': set(['libbar.so'])})
111 @return: The return value is an object -> set-of-sonames mapping, where
112 object is a broken binary and the set consists of sonames needed by
113 @@ -208,65 +242,66 @@
114 class LibraryCache(object):
115
116 """
117 - Caches sonames and realpaths associated with paths.
118 + Caches properties associated with paths.
119
120 The purpose of this class is to prevent multiple calls of
121 - os.path.realpath and os.path.isfile on the same paths.
122 + _generateObjKey on the same paths.
123
124 """
125
126 def __init__(cache_self):
127 cache_self.cache = {}
128
129 - def get(cache_self, path):
130 + def get(cache_self, obj):
131 """
132 - Caches and returns the soname and realpath for a path.
133 + Caches and returns properties associated with an object.
134
135 - @param path: absolute path (can be symlink)
136 - @type path: string (example: '/usr/lib/libfoo.so')
137 - @rtype: 3-tuple with types (string or None, string, boolean)
138 - @return: 3-tuple with the following components:
139 - 1. soname as a string or None if it does not exist,
140 - 2. realpath as a string,
141 - 3. the result of os.path.isfile(realpath)
142 - (example: ('libfoo.so.1', '/usr/lib/libfoo.so.1.5.1', True))
143 + @param obj: absolute path (can be symlink)
144 + @type obj: string (example: '/usr/lib/libfoo.so')
145 + @rtype: 4-tuple with types
146 + (string or None, string or None, 2-tuple, Boolean)
147 + @return: 4-tuple with the following components:
148 + 1. arch as a string or None if it does not exist,
149 + 2. soname as a string or None if it does not exist,
150 + 3. obj_key as 2-tuple,
151 + 4. Boolean representing whether the object exists.
152 + (example: ('libfoo.so.1', (123L, 456L), True))
153
154 """
155 - if path in cache_self.cache:
156 - return cache_self.cache[path]
157 + if obj in cache_self.cache:
158 + return cache_self.cache[obj]
159 else:
160 - realpath = os.path.realpath(path)
161 + if obj in self._obj_key_cache:
162 + obj_key = self._obj_key_cache.get(obj)
163 + else:
164 + obj_key = self._generateObjKey(obj)
165 # Check that the library exists on the filesystem.
166 - if os.path.isfile(realpath):
167 - # Get the soname from LinkageMap._obj_properties if it
168 - # exists. Otherwise, None.
169 - soname = self._obj_properties.get(realpath, (None,)*3)[3]
170 - # Both path and realpath are cached and the result is
171 - # returned.
172 - cache_self.cache.setdefault(realpath, \
173 - (soname, realpath, True))
174 - return cache_self.cache.setdefault(path, \
175 - (soname, realpath, True))
176 + if isinstance(obj_key, tuple):
177 + # Get the arch and soname from LinkageMap._obj_properties if
178 + # it exists. Otherwise, None.
179 + arch, _, _, soname, _ = \
180 + self._obj_properties.get(obj_key, (None,)*5)
181 + return cache_self.cache.setdefault(obj, \
182 + (arch, soname, obj_key, True))
183 else:
184 - # realpath is not cached here, because the majority of cases
185 - # where realpath is not a file, path is the same as realpath.
186 - # Thus storing twice slows down the cache performance.
187 - return cache_self.cache.setdefault(path, \
188 - (None, realpath, False))
189 + return cache_self.cache.setdefault(obj, \
190 + (None, None, obj_key, False))
191
192 - debug = False
193 rValue = {}
194 cache = LibraryCache()
195 providers = self.listProviders()
196
197 - # Iterate over all binaries and their providers.
198 - for obj, sonames in providers.items():
199 + # Iterate over all obj_keys and their providers.
200 + for obj_key, sonames in providers.items():
201 + arch, _, path, _, objs = self._obj_properties[obj_key]
202 + path = path.union(self._defpath)
203 # Iterate over each needed soname and the set of library paths that
204 # fulfill the soname to determine if the dependency is broken.
205 for soname, libraries in sonames.items():
206 # validLibraries is used to store libraries, which satisfy soname,
207 # so if no valid libraries are found, the soname is not satisfied
208 - # for obj. Thus obj must be emerged.
209 + # for obj_key. If unsatisfied, objects associated with obj_key
210 + # must be emerged.
211 validLibraries = set()
212 # It could be the case that the library to satisfy the soname is
213 # not in the obj's runpath, but a symlink to the library is (eg
214 @@ -274,67 +309,60 @@
215 # does not catalog symlinks, broken or missing symlinks may go
216 # unnoticed. As a result of these cases, check that a file with
217 # the same name as the soname exists in obj's runpath.
218 - path = self._obj_properties[obj][2] + self._defpath
219 - for d in path:
220 - cachedSoname, cachedRealpath, cachedExists = \
221 - cache.get(os.path.join(d, soname))
222 - # Check that the this library provides the needed soname. Doing
223 + # XXX If we catalog symlinks in LinkageMap, this could be improved.
224 + for directory in path:
225 + cachedArch, cachedSoname, cachedKey, cachedExists = \
226 + cache.get(os.path.join(directory, soname))
227 + # Check that this library provides the needed soname. Doing
228 # this, however, will cause consumers of libraries missing
229 # sonames to be unnecessarily emerged. (eg libmix.so)
230 - if cachedSoname == soname:
231 - validLibraries.add(cachedRealpath)
232 - if debug and cachedRealpath not in libraries:
233 + if cachedSoname == soname and cachedArch == arch:
234 + validLibraries.add(cachedKey)
235 + if debug and cachedKey not in \
236 + set(map(self._obj_key_cache.get, libraries)):
237 + # XXX This is most often due to soname symlinks not in
238 + # a library's directory. We could catalog symlinks in
239 + # LinkageMap to avoid checking for this edge case here.
240 print "Found provider outside of findProviders:", \
241 - os.path.join(d, soname), "->", cachedRealpath
242 + os.path.join(directory, soname), "->", \
243 + self._obj_properties[cachedKey][4], libraries
244 # A valid library has been found, so there is no need to
245 # continue.
246 break
247 - if debug and cachedRealpath in self._obj_properties:
248 + if debug and cachedArch == arch and \
249 + cachedKey in self._obj_properties:
250 print "Broken symlink or missing/bad soname:", \
251 - os.path.join(d, soname), '->', cachedRealpath, \
252 - "with soname", cachedSoname, "but expecting", soname
253 + os.path.join(directory, soname), '->', \
254 + self._obj_properties[cachedKey], "with soname", \
255 + cachedSoname, "but expecting", soname
256 # This conditional checks if there are no libraries to satisfy the
257 # soname (empty set).
258 if not validLibraries:
259 - rValue.setdefault(obj, set()).add(soname)
260 + for obj in objs:
261 + rValue.setdefault(obj, set()).add(soname)
262 # If no valid libraries have been found by this point, then
263 # there are no files named with the soname within obj's runpath,
264 # but if there are libraries (from the providers mapping), it is
265 - # likely that symlinks or the actual libraries are missing.
266 - # Thus possible symlinks and missing libraries are added to
267 - # rValue in order to emerge corrupt library packages.
268 + # likely that soname symlinks or the actual libraries are
269 + # missing or broken. Thus those libraries are added to rValue
270 + # in order to emerge corrupt library packages.
271 for lib in libraries:
272 - cachedSoname, cachedRealpath, cachedExists = cache.get(lib)
273 - if not cachedExists:
274 - # The library's package needs to be emerged to repair the
275 - # missing library.
276 - rValue.setdefault(lib, set()).add(soname)
277 - else:
278 - # A library providing the soname exists in the obj's
279 - # runpath, but no file named as the soname exists, so add
280 - # the path constructed from the lib's directory and the
281 - # soname to rValue to fix cases of vanishing (or modified)
282 - # symlinks. This path is not guaranteed to exist, but it
283 - # follows the symlink convention found in the majority of
284 - # packages.
285 - rValue.setdefault(os.path.join(os.path.dirname(lib), \
286 - soname), set()).add(soname)
287 + rValue.setdefault(lib, set()).add(soname)
288 if debug:
289 - if not cachedExists:
290 + if not os.path.isfile(lib):
291 print "Missing library:", lib
292 else:
293 print "Possibly missing symlink:", \
294 os.path.join(os.path.dirname(lib), soname)
295 -
296 return rValue
297
298 def listProviders(self):
299 """
300 - Find the providers for all binaries.
301 + Find the providers for all object keys in LinkageMap.
302
303 @rtype: dict (example:
304 - {'/usr/bin/foo': {'libbar.so': set(['/lib/libbar.so.1.5'])}})
305 - @return: The return value is an object -> providers mapping, where
306 + {(123L, 456L): {'libbar.so': set(['/lib/libbar.so.1.5'])}})
307 + @return: The return value is an object key -> providers mapping, where
308 providers is a mapping of soname -> set-of-library-paths returned
309 from the findProviders method.
310
311 @@ -342,118 +370,188 @@
312 rValue = {}
313 if not self._libs:
314 self.rebuild()
315 - # Iterate over all binaries within LinkageMap.
316 - for obj in self._obj_properties:
317 - rValue.setdefault(obj, self.findProviders(obj))
318 + # Iterate over all object keys within LinkageMap.
319 + for obj_key in self._obj_properties:
320 + rValue.setdefault(obj_key, self.findProviders(obj_key=obj_key))
321 return rValue
322
323 def isMasterLink(self, obj):
324 + """
325 + Determine whether an object is a master link.
326 +
327 + @param obj: absolute path to an object
328 + @type obj: string (example: '/usr/bin/foo')
329 + @rtype: Boolean
330 + @return:
331 + 1. True if obj is a master link
332 + 2. False if obj is not a master link
333 +
334 + """
335 basename = os.path.basename(obj)
336 - if obj not in self._obj_properties:
337 - obj = os.path.realpath(obj)
338 - if obj not in self._obj_properties:
339 - raise KeyError("%s not in object list" % obj)
340 - soname = self._obj_properties[obj][3]
341 + obj_key = self._generateObjKey(obj)
342 + if obj_key not in self._obj_properties:
343 + raise KeyError("%s (%s) not in object list" % (obj_key, obj))
344 + soname = self._obj_properties[obj_key][3]
345 return (len(basename) < len(soname))
346 -
347 +
348 def listLibraryObjects(self):
349 + """
350 + Return a list of library objects.
351 +
352 + Known limitation: library objects lacking an soname are not included.
353 +
354 + @rtype: list of strings
355 + @return: list of paths to all providers
356 +
357 + """
358 rValue = []
359 if not self._libs:
360 self.rebuild()
361 for soname in self._libs:
362 for arch in self._libs[soname]:
363 - rValue.extend(self._libs[soname][arch]["providers"])
364 + for obj_key in self._libs[soname][arch]["providers"]:
365 + rValue.extend(self._obj_properties[obj_key][4])
366 return rValue
367
368 def getSoname(self, obj):
369 + """
370 + Return the soname associated with an object.
371 +
372 + @param obj: absolute path to an object
373 + @type obj: string (example: '/usr/bin/bar')
374 + @rtype: string
375 + @return: soname as a string
376 +
377 + """
378 if not self._libs:
379 self.rebuild()
380 - if obj not in self._obj_properties:
381 - obj = os.path.realpath(obj)
382 - if obj not in self._obj_properties:
383 - raise KeyError("%s not in object list" % obj)
384 - arch, needed, path, soname = self._obj_properties[obj]
385 - return soname
386 + if obj not in self._obj_key_cache:
387 + raise KeyError("%s not in object list" % obj)
388 + return self._obj_properties[self._obj_key_cache[obj]][3]
389
390 - def findProviders(self, obj):
391 + def findProviders(self, obj=None, obj_key=None):
392 + """
393 + Find providers for an object or object key.
394 +
395 + This method should be called with either an obj or obj_key. If called
396 + with both, the obj_key is ignored. If called with neither, KeyError is
397 + raised as if an invalid obj was passed.
398 +
399 + In some cases, not all valid libraries are returned. This may occur when
400 + an soname symlink referencing a library is in an object's runpath while
401 + the actual library is not.
402 +
403 + @param obj: absolute path to an object
404 + @type obj: string (example: '/usr/bin/bar')
405 + @param obj_key: key from LinkageMap._generateObjKey
406 + @type obj_key: 2-tuple of longs or string
407 + @rtype: dict (example: {'libbar.so': set(['/lib/libbar.so.1.5'])})
408 + @return: The return value is a soname -> set-of-library-paths, where
409 + set-of-library-paths satisfy soname.
410 +
411 + """
412 + rValue = {}
413 +
414 if not self._libs:
415 self.rebuild()
416
417 - realpath_cache = {}
418 - def realpath(p):
419 - real_path = realpath_cache.get(p)
420 - if real_path is None:
421 - real_path = os.path.realpath(p)
422 - realpath_cache[p] = real_path
423 - return real_path
424 + # Determine the obj_key from the arguments.
425 + if obj is not None:
426 + obj_key = self._obj_key_cache.get(obj)
427 + if obj_key not in self._obj_properties:
428 + obj_key = self._generateObjKey(obj)
429 + if obj_key not in self._obj_properties:
430 + raise KeyError("%s (%s) not in object list" % (obj_key, obj))
431 + elif obj_key not in self._obj_properties:
432 + raise KeyError("%s not in object list" % obj_key)
433
434 - rValue = {}
435 - if obj not in self._obj_properties:
436 - obj = realpath(obj)
437 - if obj not in self._obj_properties:
438 - raise KeyError("%s not in object list" % obj)
439 - arch, needed, path, soname = self._obj_properties[obj]
440 - path = path[:]
441 - path.extend(self._defpath)
442 - path = set(realpath(x) for x in path)
443 - for x in needed:
444 - rValue[x] = set()
445 - if x not in self._libs or arch not in self._libs[x]:
446 + arch, needed, path, _, _ = self._obj_properties[obj_key]
447 + path = path.union(self._defpath)
448 + for soname in needed:
449 + rValue[soname] = set()
450 + if soname not in self._libs or arch not in self._libs[soname]:
451 continue
452 - for y in self._libs[x][arch]["providers"]:
453 - if x[0] == os.sep and realpath(x) == realpath(y):
454 - rValue[x].add(y)
455 - elif realpath(os.path.dirname(y)) in path:
456 - rValue[x].add(y)
457 + # For each potential provider of the soname, add it to rValue if it
458 + # resides in the obj's runpath.
459 + for provider_key in self._libs[soname][arch]["providers"]:
460 + providers = self._obj_properties[provider_key][4]
461 + for provider in providers:
462 + if os.path.dirname(provider) in path:
463 + rValue[soname].add(provider)
464 return rValue
465 -
466 - def findConsumers(self, obj):
467 +
468 + def findConsumers(self, obj=None, obj_key=None):
469 + """
470 + Find consumers of an object or object key.
471 +
472 + This method should be called with either an obj or obj_key. If called
473 + with both, the obj_key is ignored. If called with neither, KeyError is
474 + raised as if an invalid obj was passed.
475 +
476 + In some cases, not all consumers are returned. This may occur when
477 + an soname symlink referencing a library is in an object's runpath while
478 + the actual library is not.
479 +
480 + @param obj: absolute path to an object
481 + @type obj: string (example: '/usr/bin/bar')
482 + @param obj_key: key from LinkageMap._generateObjKey
483 + @type obj_key: 2-tuple of longs or string
484 + @rtype: set of strings (example: )
485 + @return: The return value is a soname -> set-of-library-paths, where
486 + set-of-library-paths satisfy soname.
487 +
488 + """
489 + rValue = set()
490 +
491 if not self._libs:
492 self.rebuild()
493
494 - realpath_cache = {}
495 - def realpath(p):
496 - real_path = realpath_cache.get(p)
497 - if real_path is None:
498 - real_path = os.path.realpath(p)
499 - realpath_cache[p] = real_path
500 - return real_path
501 + # Determine the obj_key and the set of objects matching the arguments.
502 + if obj is not None:
503 + objs = set([obj])
504 + obj_key = self._obj_key_cache.get(obj)
505 + if obj_key not in self._obj_properties:
506 + obj_key = self._generateObjKey(obj)
507 + if obj_key not in self._obj_properties:
508 + raise KeyError("%s (%s) not in object list" % (obj_key, obj))
509 + else:
510 + if obj_key not in self._obj_properties:
511 + raise KeyError("%s not in object list" % obj_key)
512 + objs = self._obj_properties[obj_key][4]
513
514 - if obj not in self._obj_properties:
515 - obj = realpath(obj)
516 - if obj not in self._obj_properties:
517 - raise KeyError("%s not in object list" % obj)
518 + # Determine the directory(ies) from the set of objects.
519 + objs_dirs = set([os.path.dirname(x) for x in objs])
520
521 # If there is another version of this lib with the
522 # same soname and the master link points to that
523 # other version, this lib will be shadowed and won't
524 # have any consumers.
525 - arch, needed, path, soname = self._obj_properties[obj]
526 - obj_dir = os.path.dirname(obj)
527 - master_link = os.path.join(obj_dir, soname)
528 - try:
529 - master_st = os.stat(master_link)
530 - obj_st = os.stat(obj)
531 - except OSError:
532 - pass
533 - else:
534 - if (obj_st.st_dev, obj_st.st_ino) != \
535 - (master_st.st_dev, master_st.st_ino):
536 - return set()
537 + if obj is not None:
538 + soname = self._obj_properties[obj_key][3]
539 + obj_dir = os.path.dirname(obj)
540 + master_link = os.path.join(obj_dir, soname)
541 + try:
542 + master_st = os.stat(master_link)
543 + obj_st = os.stat(obj)
544 + except OSError:
545 + pass
546 + else:
547 + if (obj_st.st_dev, obj_st.st_ino) != \
548 + (master_st.st_dev, master_st.st_ino):
549 + return set()
550
551 - rValue = set()
552 - for soname in self._libs:
553 - for arch in self._libs[soname]:
554 - if obj in self._libs[soname][arch]["providers"]:
555 - for x in self._libs[soname][arch]["consumers"]:
556 - path = self._obj_properties[x][2]
557 - path = [realpath(y) for y in path+self._defpath]
558 - if soname[0] == os.sep and realpath(soname) == realpath(obj):
559 - rValue.add(x)
560 - elif realpath(obj_dir) in path:
561 - rValue.add(x)
562 + arch, _, _, soname, _ = self._obj_properties[obj_key]
563 + if soname in self._libs and arch in self._libs[soname]:
564 + # For each potential consumer, add it to rValue if an object from the
565 + # arguments resides in the consumer's runpath.
566 + for consumer_key in self._libs[soname][arch]["consumers"]:
567 + _, _, path, _, consumer_objs = \
568 + self._obj_properties[consumer_key]
569 + path = path.union(self._defpath)
570 + if objs_dirs.intersection(path):
571 + rValue.update(consumer_objs)
572 return rValue
573 -
574 +
575 class vardbapi(dbapi):
576
577 _excluded_dirs = ["CVS", "lost+found"]