Gentoo Archives: gentoo-portage-dev

From: Brian Harring <ferringb@×××××.com>
To: gentoo-portage-dev@l.g.o
Subject: [gentoo-portage-dev] Re: r5975 - FEATURES=preserve-libs
Date: Sat, 17 Feb 2007 14:29:08
Message-Id: 20070217142723.GB22681@seldon
1 Realize you didn't want comments upon the implementation, but tough
2 cookies, already reviewed it; suckers in svn mainline anyways, thus
3 it's fair game.
4
5 > Modified: main/trunk/pym/portage/dbapi/vartree.py
6 > ===================================================================
7 > --- main/trunk/pym/portage/dbapi/vartree.py 2007-02-17 04:17:14 UTC (rev 5974)
8 > +++ main/trunk/pym/portage/dbapi/vartree.py 2007-02-17 08:53:35 UTC (rev 5975)
9 > @@ -523,6 +523,39 @@
10 > write_atomic(cpath, str(counter))
11 > return counter
12 >
13 > + def get_library_map(self):
14 > + """ Read the global library->consumer map for this vdb instance """
15 > + mapfilename = self.getpath(".NEEDED")
16 > + if not os.path.exists(mapfilename):
17 > + self.update_library_map()
18 > + rValue = {}
19 > + for l in open(mapfilename, "r").read().split("\n"):
20
21 for l in open(mapfilename):...
22 # no point in building the list in memory when you're just itering.
23
24 > + mysplit = l.split()
25 > + if len(mysplit) > 1:
26 > + rValue[mysplit[0]] = mysplit[1].split(",")
27
28 # testing for this is stupid; your update code doesn't write
29 # empty entries, nor should it be allowed in the file.
30
31 rValue[myvalue[0]] = mysplit[1].split(',')
32
33 > + return rValue
34 > +
35 > + def update_library_map(self):
36 > + """ Update the global library->consumer map for this vdb instance. """
37 > + mapfilename = self.getpath(".NEEDED")
38 > + obj_dict = {}
39 > + for cpv in self.cpv_all():
40 > + needed_list = self.aux_get(cpv, ["NEEDED"])[0]
41 > + for l in needed_list.split("\n"):
42 > + mysplit = l.split()
43 > + if len(mysplit) < 2:
44 > + continue
45 > + libs = mysplit[1].split(",")
46 > + for lib in libs:
47 > + if not obj_dict.has_key(lib):
48
49 # __contains__ is your friend.
50 # has_key is the slower, slightly retarded older brother.
51 # avoid the older brother.
52
53 if lib in obj_dict:
54 ...
55
56 > + obj_dict[lib] = [mysplit[0]]
57 > + else:
58 > + obj_dict[lib].append(mysplit[0])
59
60 # alternative, conciser form.
61 obj_dict.setdefault(lib, []).append(mysplit[0])
62
63
64 > + mapfile = open(mapfilename, "w")
65 > + for lib in obj_dict.keys():
66 > + mapfile.write(lib+" "+",".join(obj_dict[lib])+"\n")
67
68 # per the norm, stop using keys(). You're not mutating the dict, thus
69 # no reason to for list creation unless you *want* to make portage
70 # slower then it is already.
71 # use iteritems further, instead of forcing a lookup for every single
72 # item when you're already itering the dict.
73
74 for lib, revs in obj_dict.iteritems():
75 mapfile.write("%s %s\n" % (lib, ','.join(revs)))
76
77
78 > + mapfile.close()
79 > +
80 > class vartree(object):
81 > "this tree will scan a var/db/pkg database located at root (passed to init)"
82 > def __init__(self, root="/", virtual=None, clone=None, categories=None,
83 > @@ -952,6 +985,9 @@
84 > doebuild(myebuildpath, "cleanrm", self.myroot, self.settings,
85 > tree="vartree", mydbapi=self.vartree.dbapi,
86 > vartree=self.vartree)
87 > +
88 > + # regenerate reverse NEEDED map
89 > + self.vartree.dbapi.update_library_map()
90 >
91 > finally:
92 > if builddir_lock:
93 > @@ -1162,6 +1198,7 @@
94 >
95 > This function does the following:
96 >
97 > + Preserve old libraries that are still used.
98 > Collision Protection.
99 > calls doebuild(mydo=pkg_preinst)
100 > Merges the package to the livefs
101 > @@ -1197,6 +1234,10 @@
102 > if not os.path.exists(self.dbcatdir):
103 > os.makedirs(self.dbcatdir)
104 >
105 > + myfilelist = listdir(srcroot, recursive=1, filesonly=1, followSymlinks=True)
106 > + mysymlinks = filter(os.path.islink, listdir(srcroot, recursive=1, filesonly=0, followSymlinks=False))
107 > + myfilelist.extend(mysymlinks)
108 > +
109
110 # I hope this feature is really worth it, since previously, the
111 # two walks of ${D} occured only if FEATURES=collision-protect.
112 # via this change, even if FEATURES="-collision-protect -preserve-libs",
113 # the user now gets to enjoy the two additional walks.
114 #
115 # low mem system with large ${D}, just jacked up the runtime a fair
116 # bit via that regression.
117 #
118
119
120 > otherversions = []
121 > for v in self.vartree.dbapi.cp_list(self.mysplit[0]):
122 > otherversions.append(v.split("/")[1])
123 > @@ -1209,17 +1250,59 @@
124 > catsplit(slot_matches[0])[1], destroot, self.settings,
125 > vartree=self.vartree)
126 >
127 > + # Preserve old libs if they are still in use
128 > + if slot_matches and "preserve-libs" in self.settings.features:
129 > + # read global reverse NEEDED map
130 > + libmap = self.vartree.dbapi.get_library_map()
131 > +
132 > + # get list of libraries from old package instance
133 > + old_contents = self._installed_instance.getcontents().keys()
134 > + old_libs = set([os.path.basename(x) for x in old_contents]).intersection(libmap.keys())
135 > +
136 > + # get list of libraries from new package instance
137 > + mylibs = set([os.path.basename(x) for x in myfilelist]).intersection(libmap.keys())
138 > +
139 > + # check which libs are present in the old, but not the new package instance
140 > + preserve_libs = old_libs.difference(mylibs)
141
142 # offhand, way too many sets generated here.
143 from itertools import izip, imap
144
145 def f(iterable):
146 return imap(os.path.basename, iterable)
147
148 libmap = self.vartree.dbapi.get_library_map()
149 contents = self._installed_instance.getcontents()
150 old_basename = dict(izip(f(contents), contents))
151 del contents
152 preserve_libs = set(libmap)
153 preserve_libs.intersection_update(old_basename)
154 preserve_libs.difference_update(f(myfilelist))
155
156 # ... roughly. the codes fairly horrible to follow the purpose of
157 # considering the followings behaviour
158
159 > + # ignore any libs that are only internally used by the package
160 > + for lib in preserve_libs.copy():
161 > + old_contents_without_libs = [x for x in old_contents if os.path.basename(x) not in preserve_libs]
162 > + if len(set(libmap[lib]).intersection(old_contents_without_libs)) == len(libmap[lib]):
163 > + preserve_libs.remove(lib)
164 > +
165 > + # get the real paths for the libs
166 > + preserve_paths = [x for x in old_contents if os.path.basename(x) in preserve_libs]
167
168 # you're just itering over it, thus use list instead of copy.
169
170 for lib in list(preserve_libs):
171 old_contents = [v for k,v in old_basename.iteritems() if k not in preserve_libs]
172 if not set(libmap[lib]).difference(old_contents):
173 preserve_libs.remove(lib)
174
175 preserve_paths = [v for k, v in old_basename.iteritems() if k in preserve_libs]
176 del old_basename, contents, libmap
177
178 # further, that code looks buggy; sets are unordered, and NEEDED isn't
179 # recursive; mildly problematic since lib deps are a graph.
180 # order of processing, lib1 requires lib2, lib2 will be pruned.
181 # if lib1 is processed prior to lib2, lib1 lives on while lib2 gets
182 # pruned; if lib2 processed prior to lib1, things are hunky dory.
183 # since you're not ordering it prior (plus using unordered sets),
184 # potential is there.
185
186 > + # inject files that should be preserved into our image dir
187 > + import shutil
188 > + for x in preserve_paths:
189 > + print "injecting %s into %s" % (x, srcroot)
190 > + mydir = os.path.join(srcroot, os.path.dirname(x))
191 > + if not os.path.exists(mydir):
192 > + os.makedirs(mydir)
193 > +
194 > + # resolve symlinks and extend preserve list
195 > + # NOTE: we're extending the list in the loop to emulate recursion to
196 > + # also get indirect symlinks
197 > + if os.path.islink(x):
198 > + linktarget = os.readlink(x)
199 > + os.symlink(linktarget, os.path.join(srcroot, x.lstrip(os.sep)))
200
201 stat it once, and examine the object in memory.
202 further, this will explode if the symlink exists in ${D} already with
203 an OSError/EEXIST
204
205 > + if linktarget[0] != os.sep:
206 > + linktarget = os.path.join(os.path.dirname(x), linktarget)
207 > + preserve_paths.append(linktarget)
208 > + else:
209 > + shutil.copy2(x, os.path.join(srcroot, x.lstrip(os.sep)))
210 > +
211 > +
212 > # check for package collisions
213 > if "collision-protect" in self.settings.features:
214 > collision_ignore = set([normalize_path(myignore) for myignore in \
215 > self.settings.get("COLLISION_IGNORE", "").split()])
216 > - myfilelist = listdir(srcroot, recursive=1, filesonly=1, followSymlinks=False)
217 >
218 > # the linkcheck only works if we are in srcroot
219 > mycwd = os.getcwd()
220 > os.chdir(srcroot)
221 > - mysymlinks = filter(os.path.islink, listdir(srcroot, recursive=1, filesonly=0, followSymlinks=False))
222 > - myfilelist.extend(mysymlinks)
223 > mysymlinked_directories = [s + os.path.sep for s in mysymlinks]
224 > del mysymlinks
225 >
226 > @@ -1442,6 +1525,9 @@
227 > writedict(cfgfiledict, conf_mem_file)
228 > del conf_mem_file
229 >
230 > + # regenerate reverse NEEDED map
231 > + self.vartree.dbapi.update_library_map()
232 > +
233
234 That should be collapsed to scale sanely rather then accessing
235 NEEDED/.NEEDED every single merge.
236
237 Again, low mem systems, or just crappy IO following a compile that
238 sucked down a decent amount of memory (c++ apps anyone?), this is
239 going to be a nasty hit everytime around.
240
241 Upshot, gets .NEEDED the hell out of the vdb entry, which is already
242 too much of a dumping ground that folks are trying to standardize.
243
244
245 Patch needs work, ignoring whether this is a good idea...
246
247 ~harring