Gentoo Archives: gentoo-commits

From: "Zac Medico (zmedico)" <zmedico@g.o>
To: gentoo-commits@l.g.o
Subject: [gentoo-commits] portage r13441 - in main/branches/2.1.6: bin pym/portage/cache
Date: Thu, 30 Apr 2009 06:53:02
Message-Id: E1LzQ8j-0004sv-3G@stork.gentoo.org
1 Author: zmedico
2 Date: 2009-04-30 06:53:00 +0000 (Thu, 30 Apr 2009)
3 New Revision: 13441
4
5 Modified:
6 main/branches/2.1.6/bin/egencache
7 main/branches/2.1.6/pym/portage/cache/cache_errors.py
8 main/branches/2.1.6/pym/portage/cache/metadata.py
9 Log:
10 Add a new egencache --rsync option which enables a stat collision workaround
11 for cases in which the content of a cache entry changes and neither the file
12 mtime nor size changes (preventing rsync from detecting changes). See bug
13 #139134. This option should only be needed for distribution via something
14 like rsync, which relies on timestamps and file sizes to detect changes. It's
15 not needed with git since that uses a more thorough mechanism which allows it
16 to detect changed inode numbers (described in racy-git.txt in the git
17 technical docs). (trunk r13262)
18
19 Modified: main/branches/2.1.6/bin/egencache
20 ===================================================================
21 --- main/branches/2.1.6/bin/egencache 2009-04-30 06:52:39 UTC (rev 13440)
22 +++ main/branches/2.1.6/bin/egencache 2009-04-30 06:53:00 UTC (rev 13441)
23 @@ -24,7 +24,7 @@
24 import os
25 import portage
26 import _emerge
27 -from portage.cache.cache_errors import CacheError
28 +from portage.cache.cache_errors import CacheError, StatCollision
29 from portage.util import writemsg_level
30
31 def parse_args(args):
32 @@ -46,6 +46,10 @@
33 action="store",
34 help="max load allowed when spawning multiple jobs",
35 dest="load_average")
36 + parser.add_option("--rsync",
37 + action="store_true",
38 + help="enable rsync stat collision workaround " + \
39 + "for bug 139134 (use with --update)")
40 options, args = parser.parse_args(args)
41
42 if not options.update:
43 @@ -73,7 +77,8 @@
44 return options, args
45
46 class GenCache(object):
47 - def __init__(self, portdb, cp_iter=None, max_jobs=None, max_load=None):
48 + def __init__(self, portdb, cp_iter=None, max_jobs=None, max_load=None,
49 + rsync=False):
50 self._portdb = portdb
51 # We can globally cleanse stale cache only if we
52 # iterate over every single cp.
53 @@ -90,22 +95,47 @@
54 metadbmodule = portdb.mysettings.load_best_module("portdbapi.metadbmodule")
55 self._trg_cache = metadbmodule(portdb.porttree_root,
56 "metadata/cache", portage.auxdbkeys[:])
57 + if rsync:
58 + self._trg_cache.raise_stat_collision = True
59 self._existing_nodes = set()
60
61 def _metadata_callback(self, cpv, ebuild_path, repo_path, metadata):
62 self._existing_nodes.add(cpv)
63 if metadata is not None:
64 - # TODO: Implement a workaround for bug 139134 here. The cache
65 - # should be able to optionally raise an exception in order to
66 - # indicate any mtime + size collisions that will prevent rsync
67 - # from detecting changes. These exceptions will be handled by
68 - # bumping the mtime on the ebuild (and the corresponding cache
69 - # entry).
70 if metadata.get('EAPI') == '0':
71 del metadata['EAPI']
72 try:
73 - self._trg_cache[cpv] = metadata
74 + try:
75 + self._trg_cache[cpv] = metadata
76 + except StatCollision, sc:
77 + # If the content of a cache entry changes and neither the
78 + # file mtime nor size changes, it will prevent rsync from
79 + # detecting changes. Cache backends may raise this
80 + # exception from _setitem() if they detect this type of stat
81 + # collision. These exceptions are be handled by bumping the
82 + # mtime on the ebuild (and the corresponding cache entry).
83 + # See bug #139134.
84 + max_mtime = sc.mtime
85 + for ec, (loc, ec_mtime) in metadata['_eclasses_'].iteritems():
86 + if max_mtime < ec_mtime:
87 + max_mtime = ec_mtime
88 + if max_mtime == sc.mtime:
89 + max_mtime += 1
90 + max_mtime = long(max_mtime)
91 + try:
92 + os.utime(ebuild_path, (max_mtime, max_mtime))
93 + except OSError, e:
94 + self.returncode |= 1
95 + writemsg_level(
96 + "%s writing target: %s\n" % (cpv, e),
97 + level=logging.ERROR, noiselevel=-1)
98 + else:
99 + metadata['_mtime_'] = max_mtime
100 + self._trg_cache[cpv] = metadata
101 + self._portdb.auxdb[repo_path][cpv] = metadata
102 +
103 except CacheError, ce:
104 + self.returncode |= 1
105 writemsg_level(
106 "%s writing target: %s\n" % (cpv, ce),
107 level=logging.ERROR, noiselevel=-1)
108 @@ -195,7 +225,8 @@
109
110 gen_cache = GenCache(portdb, cp_iter=cp_iter,
111 max_jobs=options.jobs,
112 - max_load=options.load_average)
113 + max_load=options.load_average,
114 + rsync=options.rsync)
115 gen_cache.run()
116 return gen_cache.returncode
117
118
119 Modified: main/branches/2.1.6/pym/portage/cache/cache_errors.py
120 ===================================================================
121 --- main/branches/2.1.6/pym/portage/cache/cache_errors.py 2009-04-30 06:52:39 UTC (rev 13440)
122 +++ main/branches/2.1.6/pym/portage/cache/cache_errors.py 2009-04-30 06:53:00 UTC (rev 13441)
123 @@ -39,3 +39,25 @@
124 self.info = info
125 def __str__(self):
126 return "cache is non-modifiable"+str(self.info)
127 +
128 +class StatCollision(CacheError):
129 + """
130 + If the content of a cache entry changes and neither the file mtime nor
131 + size changes, it will prevent rsync from detecting changes. Cache backends
132 + may raise this exception from _setitem() if they detect this type of stat
133 + collision. See bug #139134.
134 + """
135 + def __init__(self, key, filename, mtime, size):
136 + self.key = key
137 + self.filename = filename
138 + self.mtime = mtime
139 + self.size = size
140 +
141 + def __str__(self):
142 + return "%s has stat collision with size %s and mtime %s" % \
143 + (self.key, self.size, self.mtime)
144 +
145 + def __repr__(self):
146 + return "portage.cache.cache_errors.StatCollision(%s)" % \
147 + (', '.join((repr(self.key), repr(self.filename),
148 + repr(self.mtime), repr(self.size))),)
149
150 Modified: main/branches/2.1.6/pym/portage/cache/metadata.py
151 ===================================================================
152 --- main/branches/2.1.6/pym/portage/cache/metadata.py 2009-04-30 06:52:39 UTC (rev 13440)
153 +++ main/branches/2.1.6/pym/portage/cache/metadata.py 2009-04-30 06:53:00 UTC (rev 13441)
154 @@ -3,7 +3,7 @@
155 # License: GPL2
156 # $Id$
157
158 -import errno, os, re
159 +import errno, os, re, sys
160 from portage.cache import cache_errors, flat_hash
161 import portage.eclass_cache
162 from portage.cache.template import reconstruct_eclasses
163 @@ -30,6 +30,7 @@
164 super(database, self).__init__(location, *args, **config)
165 self.location = os.path.join(loc, "metadata","cache")
166 self.ec = portage.eclass_cache.cache(loc)
167 + self.raise_stat_collision = False
168
169 def _parse_data(self, data, cpv):
170 _hashed_re_match = self._hashed_re.match
171 @@ -73,31 +74,63 @@
172 values = ProtectedDict(values)
173 values["INHERITED"] = ' '.join(sorted(values["_eclasses_"]))
174
175 + new_content = []
176 + for k in self.auxdbkey_order:
177 + new_content.append(unicode(values.get(k, ''), errors='replace'))
178 + new_content.append(u'\n')
179 + for i in xrange(magic_line_count - len(self.auxdbkey_order)):
180 + new_content.append(u'\n')
181 + new_content = u''.join(new_content)
182 + new_content = new_content.encode(
183 + sys.getdefaultencoding(), 'backslashreplace')
184 +
185 + new_fp = os.path.join(self.location, cpv)
186 + try:
187 + f = open(new_fp, 'rb')
188 + except EnvironmentError:
189 + pass
190 + else:
191 + try:
192 + try:
193 + existing_st = os.fstat(f.fileno())
194 + existing_content = f.read()
195 + finally:
196 + f.close()
197 + except EnvironmentError:
198 + pass
199 + else:
200 + existing_mtime = long(existing_st.st_mtime)
201 + if values['_mtime_'] == existing_mtime and \
202 + existing_content == new_content:
203 + return
204 +
205 + if self.raise_stat_collision and \
206 + values['_mtime_'] == existing_mtime and \
207 + len(new_content) == existing_st.st_size:
208 + raise cache_errors.StatCollision(cpv, new_fp,
209 + existing_mtime, existing_st.st_size)
210 +
211 s = cpv.rfind("/")
212 fp = os.path.join(self.location,cpv[:s],
213 ".update.%i.%s" % (os.getpid(), cpv[s+1:]))
214 try:
215 - myf = open(fp, "w")
216 + myf = open(fp, 'wb')
217 except EnvironmentError, e:
218 if errno.ENOENT == e.errno:
219 try:
220 self._ensure_dirs(cpv)
221 - myf = open(fp, "w")
222 + myf = open(fp, 'wb')
223 except EnvironmentError, e:
224 raise cache_errors.CacheCorruption(cpv, e)
225 else:
226 raise cache_errors.CacheCorruption(cpv, e)
227
228 try:
229 - for k in self.auxdbkey_order:
230 - myf.write(values.get(k, "") + "\n")
231 - for i in xrange(magic_line_count - len(self.auxdbkey_order)):
232 - myf.write("\n")
233 + myf.write(new_content)
234 finally:
235 myf.close()
236 self._ensure_access(fp, mtime=values["_mtime_"])
237
238 - new_fp = os.path.join(self.location, cpv)
239 try:
240 os.rename(fp, new_fp)
241 except EnvironmentError, e: