Gentoo Archives: gentoo-commits

From: "Fabian Groffen (grobian)" <grobian@g.o>
To: gentoo-commits@l.g.o
Subject: [gentoo-commits] portage r11464 - main/branches/prefix/pym/portage/dbapi
Date: Sun, 24 Aug 2008 15:16:49
Message-Id: E1KXHKf-0007UY-13@stork.gentoo.org
1 Author: grobian
2 Date: 2008-08-24 15:16:43 +0000 (Sun, 24 Aug 2008)
3 New Revision: 11464
4
5 Modified:
6 main/branches/prefix/pym/portage/dbapi/vartree.py
7 Log:
8 Merged from trunk -r11446:11448
9
10 |11447 |Optimize LinkageMap to use tuples of device and inode numbers from stat calls, instead of paths from |
11 |zmedico|realpath, as unique keys for identification of files. This is the same approach used by |
12 | |dblink.isowner() for cases in which path comparison fails due to symlinks. Thanks to Lucian Poston |
13 | |<lucianposton@×××××.com> for submitting this patch (along with the missing-rebuild package set which |
14 | |I haven't merged yet). These patches are hosted in the following location: |
15 | |http://repo.or.cz/w/revdep-rebuild-reimplementation.git?a=tree;h=refs/heads/rc3;hb=refs/heads/rc3 |
16
17 |11448 |Update comment, from Lucian Poston's git repo. |
18 |zmedico| |
19 Merged from trunk -r11454:11455
20
21 | 11455 | More LinkageMap enhancments from Lucian Poston: * Added |
22 | zmedico | _ObjectKey helper class to LinkageMap. (commit |
23 | | eac5528887656abec65fc3a825506187397482e4) * Minor change to |
24 | | docstrings. (commit |
25 | | adde422145d81f25b4024eac1e78b80e1b4a4531) |
26
27
28 Ported above commits to the LinkageMapMachO
29
30
31 Modified: main/branches/prefix/pym/portage/dbapi/vartree.py
32 ===================================================================
33 --- main/branches/prefix/pym/portage/dbapi/vartree.py 2008-08-24 13:26:44 UTC (rev 11463)
34 +++ main/branches/prefix/pym/portage/dbapi/vartree.py 2008-08-24 15:16:43 UTC (rev 11464)
35 @@ -139,14 +139,74 @@
36 return rValue
37
38 class LinkageMap(object):
39 +
40 + """Models dynamic linker dependencies."""
41 +
42 def __init__(self, vardbapi):
43 self._dbapi = vardbapi
44 self._libs = {}
45 self._obj_properties = {}
46 - self._defpath = getlibpaths()
47 -
48 + self._defpath = set(getlibpaths())
49 + self._obj_key_cache = {}
50 +
51 + class _ObjectKey(object):
52 +
53 + """Helper class used as _obj_properties keys for objects."""
54 +
55 + def __init__(self, object):
56 + """
57 + This takes a path to an object.
58 +
59 + @param object: path to a file
60 + @type object: string (example: '/usr/bin/bar')
61 +
62 + """
63 + self._key = self._generate_object_key(object)
64 +
65 + def __hash__(self):
66 + return hash(self._key)
67 +
68 + def __eq__(self, other):
69 + return self._key == other._key
70 +
71 + def _generate_object_key(self, object):
72 + """
73 + Generate object key for a given object.
74 +
75 + @param object: path to a file
76 + @type object: string (example: '/usr/bin/bar')
77 + @rtype: 2-tuple of types (long, int) if object exists. string if
78 + object does not exist.
79 + @return:
80 + 1. 2-tuple of object's inode and device from a stat call, if object
81 + exists.
82 + 2. realpath of object if object does not exist.
83 +
84 + """
85 + try:
86 + object_stat = os.stat(object)
87 + except OSError:
88 + # Use the realpath as the key if the file does not exists on the
89 + # filesystem.
90 + return os.path.realpath(object)
91 + # Return a tuple of the device and inode.
92 + return (object_stat.st_dev, object_stat.st_ino)
93 +
94 + def file_exists(self):
95 + """
96 + Determine if the file for this key exists on the filesystem.
97 +
98 + @rtype: Boolean
99 + @return:
100 + 1. True if the file exists.
101 + 2. False if the file does not exist or is a broken symlink.
102 +
103 + """
104 + return isinstance(self._key, tuple)
105 +
106 def rebuild(self, include_file=None):
107 libs = {}
108 + obj_key_cache = {}
109 obj_properties = {}
110 lines = []
111 for cpv in self._dbapi.cpv_all():
112 @@ -176,97 +236,109 @@
113 # insufficient field length
114 continue
115 arch = fields[0]
116 - obj = os.path.realpath(fields[1])
117 + obj = fields[1]
118 + obj_key = self._ObjectKey(obj)
119 soname = fields[2]
120 - path = filter(None, fields[3].replace(
121 + path = set([normalize_path(x)
122 + for x in filter(None, fields[3].replace(
123 "${ORIGIN}", os.path.dirname(obj)).replace(
124 - "$ORIGIN", os.path.dirname(obj)).split(":"))
125 + "$ORIGIN", os.path.dirname(obj)).split(":"))])
126 needed = filter(None, fields[4].split(","))
127 if soname:
128 - libs.setdefault(soname, {arch: {"providers": [], "consumers": []}})
129 - libs[soname].setdefault(arch, {"providers": [], "consumers": []})
130 - libs[soname][arch]["providers"].append(obj)
131 + libs.setdefault(soname, \
132 + {arch: {"providers": set(), "consumers": set()}})
133 + libs[soname].setdefault(arch, \
134 + {"providers": set(), "consumers": set()})
135 + libs[soname][arch]["providers"].add(obj_key)
136 for x in needed:
137 - libs.setdefault(x, {arch: {"providers": [], "consumers": []}})
138 - libs[x].setdefault(arch, {"providers": [], "consumers": []})
139 - libs[x][arch]["consumers"].append(obj)
140 - obj_properties[obj] = (arch, needed, path, soname)
141 -
142 + libs.setdefault(x, \
143 + {arch: {"providers": set(), "consumers": set()}})
144 + libs[x].setdefault(arch, {"providers": set(), "consumers": set()})
145 + libs[x][arch]["consumers"].add(obj_key)
146 + obj_key_cache.setdefault(obj, obj_key)
147 + # All object paths are added into the obj_properties tuple
148 + obj_properties.setdefault(obj_key, \
149 + (arch, needed, path, soname, set()))[4].add(obj)
150 +
151 self._libs = libs
152 self._obj_properties = obj_properties
153 + self._obj_key_cache = obj_key_cache
154
155 - def listBrokenBinaries(self):
156 + def listBrokenBinaries(self, debug=False):
157 """
158 Find binaries and their needed sonames, which have no providers.
159
160 + @param debug: Boolean to enable debug output
161 + @type debug: Boolean
162 @rtype: dict (example: {'/usr/bin/foo': set(['libbar.so'])})
163 @return: The return value is an object -> set-of-sonames mapping, where
164 object is a broken binary and the set consists of sonames needed by
165 object that have no corresponding libraries to fulfill the dependency.
166
167 """
168 - class LibraryCache(object):
169 + class _LibraryCache(object):
170
171 """
172 - Caches sonames and realpaths associated with paths.
173 + Caches properties associated with paths.
174
175 - The purpose of this class is to prevent multiple calls of
176 - os.path.realpath and os.path.isfile on the same paths.
177 + The purpose of this class is to prevent multiple instances of
178 + _ObjectKey for the same paths.
179
180 """
181
182 def __init__(cache_self):
183 cache_self.cache = {}
184
185 - def get(cache_self, path):
186 + def get(cache_self, obj):
187 """
188 - Caches and returns the soname and realpath for a path.
189 + Caches and returns properties associated with an object.
190
191 - @param path: absolute path (can be symlink)
192 - @type path: string (example: '/usr/lib/libfoo.so')
193 - @rtype: 3-tuple with types (string or None, string, boolean)
194 - @return: 3-tuple with the following components:
195 - 1. soname as a string or None if it does not exist,
196 - 2. realpath as a string,
197 - 3. the result of os.path.isfile(realpath)
198 - (example: ('libfoo.so.1', '/usr/lib/libfoo.so.1.5.1', True))
199 + @param obj: absolute path (can be symlink)
200 + @type obj: string (example: '/usr/lib/libfoo.so')
201 + @rtype: 4-tuple with types
202 + (string or None, string or None, 2-tuple, Boolean)
203 + @return: 4-tuple with the following components:
204 + 1. arch as a string or None if it does not exist,
205 + 2. soname as a string or None if it does not exist,
206 + 3. obj_key as 2-tuple,
207 + 4. Boolean representing whether the object exists.
208 + (example: ('libfoo.so.1', (123L, 456L), True))
209
210 """
211 - if path in cache_self.cache:
212 - return cache_self.cache[path]
213 + if obj in cache_self.cache:
214 + return cache_self.cache[obj]
215 else:
216 - realpath = os.path.realpath(path)
217 + if obj in self._obj_key_cache:
218 + obj_key = self._obj_key_cache.get(obj)
219 + else:
220 + obj_key = self._ObjectKey(obj)
221 # Check that the library exists on the filesystem.
222 - if os.path.isfile(realpath):
223 - # Get the soname from LinkageMap._obj_properties if it
224 - # exists. Otherwise, None.
225 - soname = self._obj_properties.get(realpath, (None,)*3)[3]
226 - # Both path and realpath are cached and the result is
227 - # returned.
228 - cache_self.cache.setdefault(realpath, \
229 - (soname, realpath, True))
230 - return cache_self.cache.setdefault(path, \
231 - (soname, realpath, True))
232 + if obj_key.file_exists():
233 + # Get the arch and soname from LinkageMap._obj_properties if
234 + # it exists. Otherwise, None.
235 + arch, _, _, soname, _ = \
236 + self._obj_properties.get(obj_key, (None,)*5)
237 + return cache_self.cache.setdefault(obj, \
238 + (arch, soname, obj_key, True))
239 else:
240 - # realpath is not cached here, because the majority of cases
241 - # where realpath is not a file, path is the same as realpath.
242 - # Thus storing twice slows down the cache performance.
243 - return cache_self.cache.setdefault(path, \
244 - (None, realpath, False))
245 + return cache_self.cache.setdefault(obj, \
246 + (None, None, obj_key, False))
247
248 - debug = False
249 rValue = {}
250 - cache = LibraryCache()
251 + cache = _LibraryCache()
252 providers = self.listProviders()
253
254 - # Iterate over all binaries and their providers.
255 - for obj, sonames in providers.items():
256 + # Iterate over all obj_keys and their providers.
257 + for obj_key, sonames in providers.items():
258 + arch, _, path, _, objs = self._obj_properties[obj_key]
259 + path = path.union(self._defpath)
260 # Iterate over each needed soname and the set of library paths that
261 # fulfill the soname to determine if the dependency is broken.
262 for soname, libraries in sonames.items():
263 # validLibraries is used to store libraries, which satisfy soname,
264 # so if no valid libraries are found, the soname is not satisfied
265 - # for obj. Thus obj must be emerged.
266 + # for obj_key. If unsatisfied, objects associated with obj_key
267 + # must be emerged.
268 validLibraries = set()
269 # It could be the case that the library to satisfy the soname is
270 # not in the obj's runpath, but a symlink to the library is (eg
271 @@ -274,67 +346,60 @@
272 # does not catalog symlinks, broken or missing symlinks may go
273 # unnoticed. As a result of these cases, check that a file with
274 # the same name as the soname exists in obj's runpath.
275 - path = self._obj_properties[obj][2] + self._defpath
276 - for d in path:
277 - cachedSoname, cachedRealpath, cachedExists = \
278 - cache.get(os.path.join(d, soname))
279 - # Check that the this library provides the needed soname. Doing
280 + # XXX If we catalog symlinks in LinkageMap, this could be improved.
281 + for directory in path:
282 + cachedArch, cachedSoname, cachedKey, cachedExists = \
283 + cache.get(os.path.join(directory, soname))
284 + # Check that this library provides the needed soname. Doing
285 # this, however, will cause consumers of libraries missing
286 # sonames to be unnecessarily emerged. (eg libmix.so)
287 - if cachedSoname == soname:
288 - validLibraries.add(cachedRealpath)
289 - if debug and cachedRealpath not in libraries:
290 + if cachedSoname == soname and cachedArch == arch:
291 + validLibraries.add(cachedKey)
292 + if debug and cachedKey not in \
293 + set(map(self._obj_key_cache.get, libraries)):
294 + # XXX This is most often due to soname symlinks not in
295 + # a library's directory. We could catalog symlinks in
296 + # LinkageMap to avoid checking for this edge case here.
297 print "Found provider outside of findProviders:", \
298 - os.path.join(d, soname), "->", cachedRealpath
299 + os.path.join(directory, soname), "->", \
300 + self._obj_properties[cachedKey][4], libraries
301 # A valid library has been found, so there is no need to
302 # continue.
303 break
304 - if debug and cachedRealpath in self._obj_properties:
305 + if debug and cachedArch == arch and \
306 + cachedKey in self._obj_properties:
307 print "Broken symlink or missing/bad soname:", \
308 - os.path.join(d, soname), '->', cachedRealpath, \
309 - "with soname", cachedSoname, "but expecting", soname
310 + os.path.join(directory, soname), '->', \
311 + self._obj_properties[cachedKey], "with soname", \
312 + cachedSoname, "but expecting", soname
313 # This conditional checks if there are no libraries to satisfy the
314 # soname (empty set).
315 if not validLibraries:
316 - rValue.setdefault(obj, set()).add(soname)
317 + for obj in objs:
318 + rValue.setdefault(obj, set()).add(soname)
319 # If no valid libraries have been found by this point, then
320 # there are no files named with the soname within obj's runpath,
321 # but if there are libraries (from the providers mapping), it is
322 - # likely that symlinks or the actual libraries are missing.
323 - # Thus possible symlinks and missing libraries are added to
324 - # rValue in order to emerge corrupt library packages.
325 + # likely that soname symlinks or the actual libraries are
326 + # missing or broken. Thus those libraries are added to rValue
327 + # in order to emerge corrupt library packages.
328 for lib in libraries:
329 - cachedSoname, cachedRealpath, cachedExists = cache.get(lib)
330 - if not cachedExists:
331 - # The library's package needs to be emerged to repair the
332 - # missing library.
333 - rValue.setdefault(lib, set()).add(soname)
334 - else:
335 - # A library providing the soname exists in the obj's
336 - # runpath, but no file named as the soname exists, so add
337 - # the path constructed from the lib's directory and the
338 - # soname to rValue to fix cases of vanishing (or modified)
339 - # symlinks. This path is not guaranteed to exist, but it
340 - # follows the symlink convention found in the majority of
341 - # packages.
342 - rValue.setdefault(os.path.join(os.path.dirname(lib), \
343 - soname), set()).add(soname)
344 + rValue.setdefault(lib, set()).add(soname)
345 if debug:
346 - if not cachedExists:
347 + if not os.path.isfile(lib):
348 print "Missing library:", lib
349 else:
350 print "Possibly missing symlink:", \
351 os.path.join(os.path.dirname(lib), soname)
352 -
353 return rValue
354
355 def listProviders(self):
356 """
357 - Find the providers for all binaries.
358 + Find the providers for all object keys in LinkageMap.
359
360 @rtype: dict (example:
361 - {'/usr/bin/foo': {'libbar.so': set(['/lib/libbar.so.1.5'])}})
362 - @return: The return value is an object -> providers mapping, where
363 + {(123L, 456L): {'libbar.so': set(['/lib/libbar.so.1.5'])}})
364 + @return: The return value is an object key -> providers mapping, where
365 providers is a mapping of soname -> set-of-library-paths returned
366 from the findProviders method.
367
368 @@ -342,126 +407,257 @@
369 rValue = {}
370 if not self._libs:
371 self.rebuild()
372 - # Iterate over all binaries within LinkageMap.
373 - for obj in self._obj_properties:
374 - rValue.setdefault(obj, self.findProviders(obj))
375 + # Iterate over all object keys within LinkageMap.
376 + for obj_key in self._obj_properties:
377 + rValue.setdefault(obj_key, self.findProviders(obj_key))
378 return rValue
379
380 def isMasterLink(self, obj):
381 + """
382 + Determine whether an object is a master link.
383 +
384 + @param obj: absolute path to an object
385 + @type obj: string (example: '/usr/bin/foo')
386 + @rtype: Boolean
387 + @return:
388 + 1. True if obj is a master link
389 + 2. False if obj is not a master link
390 +
391 + """
392 basename = os.path.basename(obj)
393 - if obj not in self._obj_properties:
394 - obj = os.path.realpath(obj)
395 - if obj not in self._obj_properties:
396 - raise KeyError("%s not in object list" % obj)
397 - soname = self._obj_properties[obj][3]
398 + obj_key = self._ObjectKey(obj)
399 + if obj_key not in self._obj_properties:
400 + raise KeyError("%s (%s) not in object list" % (obj_key, obj))
401 + soname = self._obj_properties[obj_key][3]
402 return (len(basename) < len(soname))
403 -
404 +
405 def listLibraryObjects(self):
406 + """
407 + Return a list of library objects.
408 +
409 + Known limitation: library objects lacking an soname are not included.
410 +
411 + @rtype: list of strings
412 + @return: list of paths to all providers
413 +
414 + """
415 rValue = []
416 if not self._libs:
417 self.rebuild()
418 for soname in self._libs:
419 for arch in self._libs[soname]:
420 - rValue.extend(self._libs[soname][arch]["providers"])
421 + for obj_key in self._libs[soname][arch]["providers"]:
422 + rValue.extend(self._obj_properties[obj_key][4])
423 return rValue
424
425 def getSoname(self, obj):
426 + """
427 + Return the soname associated with an object.
428 +
429 + @param obj: absolute path to an object
430 + @type obj: string (example: '/usr/bin/bar')
431 + @rtype: string
432 + @return: soname as a string
433 +
434 + """
435 if not self._libs:
436 self.rebuild()
437 - if obj not in self._obj_properties:
438 - obj = os.path.realpath(obj)
439 - if obj not in self._obj_properties:
440 - raise KeyError("%s not in object list" % obj)
441 - arch, needed, path, soname = self._obj_properties[obj]
442 - return soname
443 + if obj not in self._obj_key_cache:
444 + raise KeyError("%s not in object list" % obj)
445 + return self._obj_properties[self._obj_key_cache[obj]][3]
446
447 def findProviders(self, obj):
448 + """
449 + Find providers for an object or object key.
450 +
451 + This method may be called with a key from _obj_properties.
452 +
453 + In some cases, not all valid libraries are returned. This may occur when
454 + an soname symlink referencing a library is in an object's runpath while
455 + the actual library is not. We should consider cataloging symlinks within
456 + LinkageMap as this would avoid those cases and would be a better model of
457 + library dependencies (since the dynamic linker actually searches for
458 + files named with the soname in the runpaths).
459 +
460 + @param obj: absolute path to an object or a key from _obj_properties
461 + @type obj: string (example: '/usr/bin/bar') or _ObjectKey
462 + @rtype: dict (example: {'libbar.so': set(['/lib/libbar.so.1.5'])})
463 + @return: The return value is a soname -> set-of-library-paths, where
464 + set-of-library-paths satisfy soname.
465 +
466 + """
467 + rValue = {}
468 +
469 if not self._libs:
470 self.rebuild()
471
472 - realpath_cache = {}
473 - def realpath(p):
474 - real_path = realpath_cache.get(p)
475 - if real_path is None:
476 - real_path = os.path.realpath(p)
477 - realpath_cache[p] = real_path
478 - return real_path
479 + # Determine the obj_key from the arguments.
480 + if isinstance(obj, self._ObjectKey):
481 + obj_key = obj
482 + if obj_key not in self._obj_properties:
483 + raise KeyError("%s not in object list" % obj_key)
484 + else:
485 + obj_key = self._obj_key_cache.get(obj)
486 + if obj_key not in self._obj_properties:
487 + obj_key = self._ObjectKey(obj)
488 + if obj_key not in self._obj_properties:
489 + raise KeyError("%s (%s) not in object list" % (obj_key, obj))
490
491 - rValue = {}
492 - if obj not in self._obj_properties:
493 - obj = realpath(obj)
494 - if obj not in self._obj_properties:
495 - raise KeyError("%s not in object list" % obj)
496 - arch, needed, path, soname = self._obj_properties[obj]
497 - path = path[:]
498 - path.extend(self._defpath)
499 - path = set(realpath(x) for x in path)
500 - for x in needed:
501 - rValue[x] = set()
502 - if x not in self._libs or arch not in self._libs[x]:
503 + arch, needed, path, _, _ = self._obj_properties[obj_key]
504 + path = path.union(self._defpath)
505 + for soname in needed:
506 + rValue[soname] = set()
507 + if soname not in self._libs or arch not in self._libs[soname]:
508 continue
509 - for y in self._libs[x][arch]["providers"]:
510 - if x[0] == os.sep and realpath(x) == realpath(y):
511 - rValue[x].add(y)
512 - elif realpath(os.path.dirname(y)) in path:
513 - rValue[x].add(y)
514 + # For each potential provider of the soname, add it to rValue if it
515 + # resides in the obj's runpath.
516 + for provider_key in self._libs[soname][arch]["providers"]:
517 + providers = self._obj_properties[provider_key][4]
518 + for provider in providers:
519 + if os.path.dirname(provider) in path:
520 + rValue[soname].add(provider)
521 return rValue
522 -
523 +
524 def findConsumers(self, obj):
525 + """
526 + Find consumers of an object or object key.
527 +
528 + This method may be called with a key from _obj_properties. If this
529 + method is going to be called with an object key, to avoid not catching
530 + shadowed libraries, do not pass new _ObjectKey instances to this method.
531 + Instead pass the obj as a string.
532 +
533 + In some cases, not all consumers are returned. This may occur when
534 + an soname symlink referencing a library is in an object's runpath while
535 + the actual library is not.
536 +
537 + @param obj: absolute path to an object or a key from _obj_properties
538 + @type obj: string (example: '/usr/bin/bar') or _ObjectKey
539 + @rtype: set of strings (example: set(['/bin/foo', '/usr/bin/bar']))
540 + @return: The return value is a soname -> set-of-library-paths, where
541 + set-of-library-paths satisfy soname.
542 +
543 + """
544 + rValue = set()
545 +
546 if not self._libs:
547 self.rebuild()
548
549 - realpath_cache = {}
550 - def realpath(p):
551 - real_path = realpath_cache.get(p)
552 - if real_path is None:
553 - real_path = os.path.realpath(p)
554 - realpath_cache[p] = real_path
555 - return real_path
556 + # Determine the obj_key and the set of objects matching the arguments.
557 + if isinstance(obj, self._ObjectKey):
558 + obj_key = obj
559 + if obj_key not in self._obj_properties:
560 + raise KeyError("%s not in object list" % obj_key)
561 + objs = self._obj_properties[obj_key][4]
562 + else:
563 + objs = set([obj])
564 + obj_key = self._obj_key_cache.get(obj)
565 + if obj_key not in self._obj_properties:
566 + obj_key = self._ObjectKey(obj)
567 + if obj_key not in self._obj_properties:
568 + raise KeyError("%s (%s) not in object list" % (obj_key, obj))
569
570 - if obj not in self._obj_properties:
571 - obj = realpath(obj)
572 - if obj not in self._obj_properties:
573 - raise KeyError("%s not in object list" % obj)
574 + # Determine the directory(ies) from the set of objects.
575 + objs_dirs = set([os.path.dirname(x) for x in objs])
576
577 # If there is another version of this lib with the
578 # same soname and the master link points to that
579 # other version, this lib will be shadowed and won't
580 # have any consumers.
581 - arch, needed, path, soname = self._obj_properties[obj]
582 - obj_dir = os.path.dirname(obj)
583 - master_link = os.path.join(obj_dir, soname)
584 - try:
585 - master_st = os.stat(master_link)
586 - obj_st = os.stat(obj)
587 - except OSError:
588 - pass
589 - else:
590 - if (obj_st.st_dev, obj_st.st_ino) != \
591 - (master_st.st_dev, master_st.st_ino):
592 - return set()
593 + if not isinstance(obj, self._ObjectKey):
594 + soname = self._obj_properties[obj_key][3]
595 + obj_dir = os.path.dirname(obj)
596 + master_link = os.path.join(obj_dir, soname)
597 + try:
598 + master_st = os.stat(master_link)
599 + obj_st = os.stat(obj)
600 + except OSError:
601 + pass
602 + else:
603 + if (obj_st.st_dev, obj_st.st_ino) != \
604 + (master_st.st_dev, master_st.st_ino):
605 + return set()
606
607 - rValue = set()
608 - for soname in self._libs:
609 - for arch in self._libs[soname]:
610 - if obj in self._libs[soname][arch]["providers"]:
611 - for x in self._libs[soname][arch]["consumers"]:
612 - path = self._obj_properties[x][2]
613 - path = [realpath(y) for y in path+self._defpath]
614 - if soname[0] == os.sep and realpath(soname) == realpath(obj):
615 - rValue.add(x)
616 - elif realpath(obj_dir) in path:
617 - rValue.add(x)
618 + arch, _, _, soname, _ = self._obj_properties[obj_key]
619 + if soname in self._libs and arch in self._libs[soname]:
620 + # For each potential consumer, add it to rValue if an object from the
621 + # arguments resides in the consumer's runpath.
622 + for consumer_key in self._libs[soname][arch]["consumers"]:
623 + _, _, path, _, consumer_objs = \
624 + self._obj_properties[consumer_key]
625 + path = path.union(self._defpath)
626 + if objs_dirs.intersection(path):
627 + rValue.update(consumer_objs)
628 return rValue
629 -
630 +
631 class LinkageMapMachO(object):
632 +
633 + """Models dynamic linker dependencies."""
634 +
635 def __init__(self, vardbapi):
636 self._dbapi = vardbapi
637 self._libs = {}
638 self._obj_properties = {}
639 + self._obj_key_cache = {}
640
641 + class _ObjectKey(object):
642 +
643 + """Helper class used as _obj_properties keys for objects."""
644 +
645 + def __init__(self, object):
646 + """
647 + This takes a path to an object.
648 +
649 + @param object: path to a file
650 + @type object: string (example: '/usr/bin/bar')
651 +
652 + """
653 + self._key = self._generate_object_key(object)
654 +
655 + def __hash__(self):
656 + return hash(self._key)
657 +
658 + def __eq__(self, other):
659 + return self._key == other._key
660 +
661 + def _generate_object_key(self, object):
662 + """
663 + Generate object key for a given object.
664 +
665 + @param object: path to a file
666 + @type object: string (example: '/usr/bin/bar')
667 + @rtype: 2-tuple of types (long, int) if object exists. string if
668 + object does not exist.
669 + @return:
670 + 1. 2-tuple of object's inode and device from a stat call, if object
671 + exists.
672 + 2. realpath of object if object does not exist.
673 +
674 + """
675 + try:
676 + object_stat = os.stat(object)
677 + except OSError:
678 + # Use the realpath as the key if the file does not exists on the
679 + # filesystem.
680 + return os.path.realpath(object)
681 + # Return a tuple of the device and inode.
682 + return (object_stat.st_dev, object_stat.st_ino)
683 +
684 + def file_exists(self):
685 + """
686 + Determine if the file for this key exists on the filesystem.
687 +
688 + @rtype: Boolean
689 + @return:
690 + 1. True if the file exists.
691 + 2. False if the file does not exist or is a broken symlink.
692 +
693 + """
694 + return isinstance(self._key, tuple)
695 +
696 def rebuild(self, include_file=None):
697 libs = {}
698 + obj_key_cache = {}
699 obj_properties = {}
700 lines = []
701 for cpv in self._dbapi.cpv_all():
702 @@ -514,7 +710,8 @@
703
704 # Linking an object to a library is registered by recording
705 # the install_name of the library in the object.
706 - obj = os.path.realpath(fields[0])
707 + obj = fields[0]
708 + obj_key = self._ObjectKey(obj)
709 install_name = os.path.normpath(fields[1])
710 needed = filter(None, fields[2].split(","))
711
712 @@ -524,20 +721,29 @@
713 # an absolute path, consumers are those objects that
714 # reference the install_name
715 if install_name:
716 - libs.setdefault(install_name, {"providers": [], "consumers": []})
717 - libs[install_name]["providers"].append(obj)
718 + libs.setdefault(install_name, \
719 + {"providers": set(), "consumers": set()})
720 + libs[install_name]["providers"].append(obj_key)
721 for x in needed:
722 - libs.setdefault(x, {"providers": [], "consumers": []})
723 - libs[x]["consumers"].append(obj)
724 + libs.setdefault(x, \
725 + {"providers": set(), "consumers": set()})
726 + libs[x]["consumers"].append(obj_key)
727 obj_properties[obj] = (needed, install_name)
728 + obj_key_cache.setdefault(obj, obj_key)
729 + # All object paths are added into the obj_properties tuple
730 + obj_properties.setdefault(obj_key, \
731 + (needed, install_name, set()))[2].add(obj)
732
733 self._libs = libs
734 self._obj_properties = obj_properties
735 + self._obj_key_cache = obj_key_cache
736
737 - def listBrokenBinaries(self):
738 + def listBrokenBinaries(self, debug=False):
739 """
740 Find binaries and their needed install_names, which have no providers.
741
742 + @param debug: Boolean to enable debug output
743 + @type debug: Boolean
744 @rtype: dict (example: {'/usr/bin/foo': set(['/usr/lib/libbar.dylib'])})
745 @return: The return value is an object -> set-of-install_names
746 mapping, where object is a broken binary and the set
747 @@ -545,71 +751,70 @@
748 corresponding libraries to fulfill the dependency.
749
750 """
751 - class LibraryCache(object):
752 + class _LibraryCache(object):
753
754 """
755 - Caches install_names and realpaths associated with paths.
756 + Caches properties associated with paths.
757
758 - The purpose of this class is to prevent multiple calls of
759 - os.path.realpath and os.path.isfile on the same paths.
760 + The purpose of this class is to prevent multiple instances of
761 + _ObjectKey for the same paths.
762
763 """
764
765 def __init__(cache_self):
766 cache_self.cache = {}
767
768 - def get(cache_self, path):
769 + def get(cache_self, obj):
770 """
771 - Caches and returns the install_name and realpath for a path.
772 + Caches and returns properties associated with an object.
773
774 - @param path: absolute path (can be symlink)
775 - @type path: string (example: '/usr/lib/libfoo.dylib')
776 - @rtype: 3-tuple with types (string or None, string, boolean)
777 - @return: 3-tuple with the following components:
778 - 1. soname as a string or None if it does not exist,
779 - 2. realpath as a string,
780 - 3. the result of os.path.isfile(realpath)
781 - (example: ('/usr/lib/libfoo.1.dylib', '/usr/lib/libfoo.1.5.1.dylib', True))
782 + @param obj: absolute path (can be symlink)
783 + @type obj: string (example: '/usr/lib/libfoo.dylib')
784 + @rtype: 4-tuple with types
785 + (string or None, string or None, 2-tuple, Boolean)
786 + @return: 4-tuple with the following components:
787 + 1. arch as a string or None if it does not exist,
788 + 2. soname as a string or None if it does not exist,
789 + 3. obj_key as 2-tuple,
790 + 4. Boolean representing whether the object exists.
791 + (example: ('libfoo.1.dylib', (123L, 456L), True))
792
793 """
794 - if path in cache_self.cache:
795 - return cache_self.cache[path]
796 + if obj in cache_self.cache:
797 + return cache_self.cache[obj]
798 else:
799 - realpath = os.path.realpath(path)
800 + if obj in self._obj_key_cache:
801 + obj_key = self._obj_key_cache.get(obj)
802 + else:
803 + obj_key = self._ObjectKey(obj)
804 # Check that the library exists on the filesystem.
805 - if os.path.isfile(realpath):
806 - # Get the install_name from
807 - # LinkageMap._obj_properties if it exists.
808 - # Otherwise, None.
809 - install_name = self._obj_properties.get(realpath, (None,)*2)[1]
810 - # Both path and realpath are cached and the result is
811 - # returned.
812 - cache_self.cache.setdefault(realpath, \
813 - (install_name, realpath, True))
814 - return cache_self.cache.setdefault(path, \
815 - (install_name, realpath, True))
816 + if obj_key.file_exists():
817 + # Get the install_name from LinkageMapMachO._obj_properties if
818 + # it exists. Otherwise, None.
819 + _, install_name, _ = \
820 + self._obj_properties.get(obj_key, (None,)*3)
821 + return cache_self.cache.setdefault(obj, \
822 + (install_name, obj_key, True))
823 else:
824 - # realpath is not cached here, because the majority of cases
825 - # where realpath is not a file, path is the same as realpath.
826 - # Thus storing twice slows down the cache performance.
827 - return cache_self.cache.setdefault(path, \
828 - (None, realpath, False))
829 + return cache_self.cache.setdefault(obj, \
830 + (None, obj_key, False))
831
832 - debug = False
833 rValue = {}
834 - cache = LibraryCache()
835 + cache = _LibraryCache()
836 providers = self.listProviders()
837
838 - # Iterate over all binaries and their providers.
839 - for obj, install_names in providers.items():
840 + # Iterate over all obj_keys and their providers.
841 + for obj_key, install_names in providers.items():
842 + _, _, objs = self._obj_properties[obj_key]
843 # Iterate over each needed install_name and the set of
844 # library paths that fulfill the install_name to determine
845 # if the dependency is broken.
846 for install_name, libraries in install_names.items():
847 # validLibraries is used to store libraries, which
848 # satisfy install_name, so if no valid libraries are
849 - # found, the install_name is not satisfied for obj.
850 - # Thus obj must be emerged.
851 + # found, the install_name is not satisfied for obj_key.
852 + # If unsatisfied, objects associated with obj_key must
853 + # be emerged.
854 validLibrary = None
855 cachedInstallname, cachedRealpath, cachedExists = \
856 cache.get(install_name)
857 @@ -621,54 +826,38 @@
858 if debug and cachedRealpath not in libraries:
859 print "Found provider outside of findProviders:", \
860 install_name, "->", cachedRealpath
861 - if debug and cachedRealpath in self._obj_properties:
862 + if debug and cachedKey in self._obj_properties:
863 print "Broken symlink or missing/bad install_name:", \
864 install_name, '->', cachedRealpath, \
865 "with install_name", cachedInstallname, "but expecting", install_name
866 # This conditional checks if there are no libraries to
867 # satisfy the install_name (empty set).
868 if not validLibrary:
869 - rValue.setdefault(obj, set()).add(install_name)
870 + for obj in objs:
871 + rValue.setdefault(obj, set()).add(install_name)
872 # If no valid libraries have been found by this
873 # point, then the install_name does not exist in the
874 # filesystem, but if there are libraries (from the
875 - # providers mapping), it is likely that symlinks or
876 - # the actual libraries are missing. Thus possible
877 - # symlinks and missing libraries are added to rValue
878 + # providers mapping), it is likely that soname
879 + # symlinks or the actual libraries are missing or
880 + # broken. Thus those libraries are added to rValue
881 # in order to emerge corrupt library packages.
882 for lib in libraries:
883 - cachedInstallname, cachedRealpath, cachedExists = cache.get(lib)
884 - if not cachedExists:
885 - # The library's package needs to be emerged to repair the
886 - # missing library.
887 - rValue.setdefault(lib, set()).add(install_name)
888 - else:
889 - # A library providing the install_name
890 - # exists in the obj's runpath, but no file
891 - # named as the install_name exists, so add
892 - # the path and the install_name to rValue to
893 - # fix cases of vanishing (or modified)
894 - # symlinks. This path is not guaranteed to
895 - # exist, but it follows the symlink
896 - # convention found in the majority of
897 - # packages.
898 - rValue.setdefault(install_name,
899 - set()).add(install_name)
900 + rValue.setdefault(lib, set()).add(install_name)
901 if debug:
902 - if not cachedExists:
903 + if not os.path.isfile(lib):
904 print "Missing library:", lib
905 else:
906 print "Possibly missing symlink:", \
907 install_name
908 -
909 return rValue
910
911 def listProviders(self):
912 """
913 - Find the providers for all binaries.
914 + Find the providers for all object keys in LinkageMap.
915
916 @rtype: dict (example:
917 - {'/usr/bin/foo': {'libbar.dylib': set(['/lib/libbar.1.5.dylib'])}})
918 + {(123L, 456L): {'libbar.dylib': set(['/lib/libbar.1.5.dylib'])}})
919 @return: The return value is an object -> providers mapping, where
920 providers is a mapping of install_name ->
921 set-of-library-paths returned from the findProviders method.
922 @@ -677,86 +866,179 @@
923 rValue = {}
924 if not self._libs:
925 self.rebuild()
926 - # Iterate over all binaries within LinkageMap.
927 - for obj in self._obj_properties.keys():
928 - rValue.setdefault(obj, self.findProviders(obj))
929 + # Iterate over all binaries within LinkageMapMachO.
930 + for obj_key in self._obj_properties:
931 + rValue.setdefault(obj_key, self.findProviders(obj_key))
932 return rValue
933
934 def isMasterLink(self, obj):
935 + """
936 + Determine whether an object is a master link.
937 +
938 + @param obj: absolute path to an object
939 + @type obj: string (example: '/usr/bin/foo')
940 + @rtype: Boolean
941 + @return:
942 + 1. True if obj is a master link
943 + 2. False if obj is not a master link
944 +
945 + """
946 basename = os.path.basename(obj)
947 - if os.path.normpath(obj) not in self._obj_properties:
948 - obj = os.path.realpath(obj)
949 - if obj not in self._obj_properties:
950 - raise KeyError("%s not in object list" % obj)
951 - install_name = self._obj_properties[obj][1]
952 + if obj_key not in self._obj_properties:
953 + raise KeyError("%s (%s) not in object list" % (obj_key, obj))
954 + install_name = self._obj_properties[obj_key][1]
955 return (len(basename) < len(os.path.basename(install_name)))
956 -
957 +
958 def listLibraryObjects(self):
959 + """
960 + Return a list of library objects.
961 +
962 + Known limitation: library objects lacking an soname are not included.
963 +
964 + @rtype: list of strings
965 + @return: list of paths to all providers
966 +
967 + """
968 rValue = []
969 if not self._libs:
970 self.rebuild()
971 for install_name in self._libs:
972 - rValue.extend(self._libs[install_name]["providers"])
973 + for obj_key in self._libs[install_name]["providers"]:
974 + rValue.extend(self._obj_properties[obj_key][2])
975 return rValue
976
977 def getSoname(self, obj):
978 + """
979 + Return the soname associated with an object.
980 +
981 + @param obj: absolute path to an object
982 + @type obj: string (example: '/usr/bin/bar')
983 + @rtype: string
984 + @return: soname as a string
985 +
986 + """
987 if not self._libs:
988 self.rebuild()
989 - if obj not in self._obj_properties:
990 - obj = os.path.realpath(obj)
991 - if obj not in self._obj_properties:
992 - raise KeyError("%s not in object list" % obj)
993 - install_name = self._obj_properties[obj][1]
994 - return install_name
995 + if obj not in self._obj_key_cache:
996 + raise KeyError("%s not in object list" % obj)
997 + return self._obj_properties[self._obj_key_cache[obj]][1]
998
999 - # the missing documentation, part X.
1000 - # This function appears to return all (valid) providers for all
1001 - # needed entries that the given object has.
1002 def findProviders(self, obj):
1003 + """
1004 + Find providers for an object or object key.
1005 +
1006 + This method may be called with a key from _obj_properties.
1007 +
1008 + In some cases, not all valid libraries are returned. This may occur when
1009 + an soname symlink referencing a library is in an object's runpath while
1010 + the actual library is not. We should consider cataloging symlinks within
1011 + LinkageMap as this would avoid those cases and would be a better model of
1012 + library dependencies (since the dynamic linker actually searches for
1013 + files named with the soname in the runpaths).
1014 +
1015 + @param obj: absolute path to an object or a key from _obj_properties
1016 + @type obj: string (example: '/usr/bin/bar') or _ObjectKey
1017 + @rtype: dict (example: {'libbar.dylib': set(['/lib/libbar.1.5.dylib'])})
1018 + @return: The return value is a install_name -> set-of-library-paths, where
1019 + set-of-library-paths satisfy install_name.
1020 +
1021 + """
1022 + rValue = {}
1023 +
1024 if not self._libs:
1025 self.rebuild()
1026
1027 - realpath_cache = {}
1028 - def realpath(p):
1029 - real_path = realpath_cache.get(p)
1030 - if real_path is None:
1031 - real_path = os.path.realpath(p)
1032 - realpath_cache[p] = real_path
1033 - return real_path
1034 + # Determine the obj_key from the arguments.
1035 + if isinstance(obj, self._ObjectKey):
1036 + obj_key = obj
1037 + if obj_key not in self._obj_properties:
1038 + raise KeyError("%s not in object list" % obj_key)
1039 + else:
1040 + obj_key = self._obj_key_cache.get(obj)
1041 + if obj_key not in self._obj_properties:
1042 + obj_key = self._ObjectKey(obj)
1043 + if obj_key not in self._obj_properties:
1044 + raise KeyError("%s (%s) not in object list" % (obj_key, obj))
1045
1046 - obj = os.path.normpath(obj)
1047 - rValue = {}
1048 - if obj not in self._obj_properties:
1049 - obj = realpath(obj)
1050 - if obj not in self._obj_properties:
1051 - raise KeyError("%s not in object list" % obj)
1052 - needed, install_name = self._obj_properties[obj]
1053 -
1054 - for x in needed:
1055 - rValue[x] = set()
1056 - if x not in self._libs:
1057 + needed, install_name, _ = self._obj_properties[obj_key]
1058 + for install_name in needed:
1059 + rValue[install_name] = set()
1060 + if install_name not in self._libs:
1061 continue
1062 - for y in self._libs[x]["providers"]:
1063 - if realpath(x) == realpath(y):
1064 - rValue[x].add(y)
1065 + # For each potential provider of the install_name, add it to
1066 + # rValue if it exists. (Should be one)
1067 + for provider_key in self._libs[install_name]["providers"]:
1068 + providers = self._obj_properties[provider_key][2]
1069 + for provider in providers:
1070 + if os.path.exists(provider):
1071 + rValue[install_name].add(provider)
1072 return rValue
1073 -
1074 +
1075 def findConsumers(self, obj):
1076 + """
1077 + Find consumers of an object or object key.
1078 +
1079 + This method may be called with a key from _obj_properties. If this
1080 + method is going to be called with an object key, to avoid not catching
1081 + shadowed libraries, do not pass new _ObjectKey instances to this method.
1082 + Instead pass the obj as a string.
1083 +
1084 + In some cases, not all consumers are returned. This may occur when
1085 + an soname symlink referencing a library is in an object's runpath while
1086 + the actual library is not.
1087 +
1088 + @param obj: absolute path to an object or a key from _obj_properties
1089 + @type obj: string (example: '/usr/bin/bar') or _ObjectKey
1090 + @rtype: set of strings (example: set(['/bin/foo', '/usr/bin/bar']))
1091 + @return: The return value is a install_name -> set-of-library-paths, where
1092 + set-of-library-paths satisfy install_name.
1093 +
1094 + """
1095 + rValue = set()
1096 +
1097 if not self._libs:
1098 self.rebuild()
1099
1100 - obj = os.path.normpath(obj)
1101 - if obj not in self._obj_properties:
1102 - obj = os.path.realpath(obj)
1103 - if obj not in self._obj_properties:
1104 - raise KeyError("%s not in object list" % obj)
1105 - rValue = set()
1106 - for install_name in self._libs:
1107 - if obj in self._libs[install_name]["providers"]:
1108 - for x in self._libs[install_name]["consumers"]:
1109 - rValue.add(x)
1110 + # Determine the obj_key and the set of objects matching the arguments.
1111 + if isinstance(obj, self._ObjectKey):
1112 + obj_key = obj
1113 + if obj_key not in self._obj_properties:
1114 + raise KeyError("%s not in object list" % obj_key)
1115 + objs = self._obj_properties[obj_key][2]
1116 + else:
1117 + objs = set([obj])
1118 + obj_key = self._obj_key_cache.get(obj)
1119 + if obj_key not in self._obj_properties:
1120 + obj_key = self._ObjectKey(obj)
1121 + if obj_key not in self._obj_properties:
1122 + raise KeyError("%s (%s) not in object list" % (obj_key, obj))
1123 +
1124 + # If there is another version of this lib with the
1125 + # same soname and the master link points to that
1126 + # other version, this lib will be shadowed and won't
1127 + # have any consumers.
1128 + if not isinstance(obj, self._ObjectKey):
1129 + master_link = self._obj_properties[obj_key][1]
1130 + try:
1131 + master_st = os.stat(master_link)
1132 + obj_st = os.stat(obj)
1133 + except OSError:
1134 + pass
1135 + else:
1136 + if (obj_st.st_dev, obj_st.st_ino) != \
1137 + (master_st.st_dev, master_st.st_ino):
1138 + return set()
1139 +
1140 + _, install_name, _ = self._obj_properties[obj_key]
1141 + if install_name in self._libs:
1142 + # For each potential consumer, add it to rValue if an object from the
1143 + # arguments resides in the consumer's runpath.
1144 + for consumer_key in self._libs[install_name]["consumers"]:
1145 + _, _, consumer_objs = \
1146 + self._obj_properties[consumer_key]
1147 + rValue.update(consumer_objs)
1148 return rValue
1149 -
1150 +
1151 class vardbapi(dbapi):
1152
1153 _excluded_dirs = ["CVS", "lost+found"]