1 |
Author: zmedico |
2 |
Date: 2009-04-30 06:53:00 +0000 (Thu, 30 Apr 2009) |
3 |
New Revision: 13441 |
4 |
|
5 |
Modified: |
6 |
main/branches/2.1.6/bin/egencache |
7 |
main/branches/2.1.6/pym/portage/cache/cache_errors.py |
8 |
main/branches/2.1.6/pym/portage/cache/metadata.py |
9 |
Log: |
10 |
Add a new egencache --rsync option which enables a stat collision workaround |
11 |
for cases in which the content of a cache entry changes and neither the file |
12 |
mtime nor size changes (preventing rsync from detecting changes). See bug |
13 |
#139134. This option should only be needed for distribution via something |
14 |
like rsync, which relies on timestamps and file sizes to detect changes. It's |
15 |
not needed with git since that uses a more thorough mechanism which allows it |
16 |
to detect changed inode numbers (described in racy-git.txt in the git |
17 |
technical docs). (trunk r13262) |
18 |
|
19 |
Modified: main/branches/2.1.6/bin/egencache |
20 |
=================================================================== |
21 |
--- main/branches/2.1.6/bin/egencache 2009-04-30 06:52:39 UTC (rev 13440) |
22 |
+++ main/branches/2.1.6/bin/egencache 2009-04-30 06:53:00 UTC (rev 13441) |
23 |
@@ -24,7 +24,7 @@ |
24 |
import os |
25 |
import portage |
26 |
import _emerge |
27 |
-from portage.cache.cache_errors import CacheError |
28 |
+from portage.cache.cache_errors import CacheError, StatCollision |
29 |
from portage.util import writemsg_level |
30 |
|
31 |
def parse_args(args): |
32 |
@@ -46,6 +46,10 @@ |
33 |
action="store", |
34 |
help="max load allowed when spawning multiple jobs", |
35 |
dest="load_average") |
36 |
+ parser.add_option("--rsync", |
37 |
+ action="store_true", |
38 |
+ help="enable rsync stat collision workaround " + \ |
39 |
+ "for bug 139134 (use with --update)") |
40 |
options, args = parser.parse_args(args) |
41 |
|
42 |
if not options.update: |
43 |
@@ -73,7 +77,8 @@ |
44 |
return options, args |
45 |
|
46 |
class GenCache(object): |
47 |
- def __init__(self, portdb, cp_iter=None, max_jobs=None, max_load=None): |
48 |
+ def __init__(self, portdb, cp_iter=None, max_jobs=None, max_load=None, |
49 |
+ rsync=False): |
50 |
self._portdb = portdb |
51 |
# We can globally cleanse stale cache only if we |
52 |
# iterate over every single cp. |
53 |
@@ -90,22 +95,47 @@ |
54 |
metadbmodule = portdb.mysettings.load_best_module("portdbapi.metadbmodule") |
55 |
self._trg_cache = metadbmodule(portdb.porttree_root, |
56 |
"metadata/cache", portage.auxdbkeys[:]) |
57 |
+ if rsync: |
58 |
+ self._trg_cache.raise_stat_collision = True |
59 |
self._existing_nodes = set() |
60 |
|
61 |
def _metadata_callback(self, cpv, ebuild_path, repo_path, metadata): |
62 |
self._existing_nodes.add(cpv) |
63 |
if metadata is not None: |
64 |
- # TODO: Implement a workaround for bug 139134 here. The cache |
65 |
- # should be able to optionally raise an exception in order to |
66 |
- # indicate any mtime + size collisions that will prevent rsync |
67 |
- # from detecting changes. These exceptions will be handled by |
68 |
- # bumping the mtime on the ebuild (and the corresponding cache |
69 |
- # entry). |
70 |
if metadata.get('EAPI') == '0': |
71 |
del metadata['EAPI'] |
72 |
try: |
73 |
- self._trg_cache[cpv] = metadata |
74 |
+ try: |
75 |
+ self._trg_cache[cpv] = metadata |
76 |
+ except StatCollision, sc: |
77 |
+ # If the content of a cache entry changes and neither the |
78 |
+ # file mtime nor size changes, it will prevent rsync from |
79 |
+ # detecting changes. Cache backends may raise this |
80 |
+ # exception from _setitem() if they detect this type of stat |
81 |
+ # collision. These exceptions are be handled by bumping the |
82 |
+ # mtime on the ebuild (and the corresponding cache entry). |
83 |
+ # See bug #139134. |
84 |
+ max_mtime = sc.mtime |
85 |
+ for ec, (loc, ec_mtime) in metadata['_eclasses_'].iteritems(): |
86 |
+ if max_mtime < ec_mtime: |
87 |
+ max_mtime = ec_mtime |
88 |
+ if max_mtime == sc.mtime: |
89 |
+ max_mtime += 1 |
90 |
+ max_mtime = long(max_mtime) |
91 |
+ try: |
92 |
+ os.utime(ebuild_path, (max_mtime, max_mtime)) |
93 |
+ except OSError, e: |
94 |
+ self.returncode |= 1 |
95 |
+ writemsg_level( |
96 |
+ "%s writing target: %s\n" % (cpv, e), |
97 |
+ level=logging.ERROR, noiselevel=-1) |
98 |
+ else: |
99 |
+ metadata['_mtime_'] = max_mtime |
100 |
+ self._trg_cache[cpv] = metadata |
101 |
+ self._portdb.auxdb[repo_path][cpv] = metadata |
102 |
+ |
103 |
except CacheError, ce: |
104 |
+ self.returncode |= 1 |
105 |
writemsg_level( |
106 |
"%s writing target: %s\n" % (cpv, ce), |
107 |
level=logging.ERROR, noiselevel=-1) |
108 |
@@ -195,7 +225,8 @@ |
109 |
|
110 |
gen_cache = GenCache(portdb, cp_iter=cp_iter, |
111 |
max_jobs=options.jobs, |
112 |
- max_load=options.load_average) |
113 |
+ max_load=options.load_average, |
114 |
+ rsync=options.rsync) |
115 |
gen_cache.run() |
116 |
return gen_cache.returncode |
117 |
|
118 |
|
119 |
Modified: main/branches/2.1.6/pym/portage/cache/cache_errors.py |
120 |
=================================================================== |
121 |
--- main/branches/2.1.6/pym/portage/cache/cache_errors.py 2009-04-30 06:52:39 UTC (rev 13440) |
122 |
+++ main/branches/2.1.6/pym/portage/cache/cache_errors.py 2009-04-30 06:53:00 UTC (rev 13441) |
123 |
@@ -39,3 +39,25 @@ |
124 |
self.info = info |
125 |
def __str__(self): |
126 |
return "cache is non-modifiable"+str(self.info) |
127 |
+ |
128 |
+class StatCollision(CacheError): |
129 |
+ """ |
130 |
+ If the content of a cache entry changes and neither the file mtime nor |
131 |
+ size changes, it will prevent rsync from detecting changes. Cache backends |
132 |
+ may raise this exception from _setitem() if they detect this type of stat |
133 |
+ collision. See bug #139134. |
134 |
+ """ |
135 |
+ def __init__(self, key, filename, mtime, size): |
136 |
+ self.key = key |
137 |
+ self.filename = filename |
138 |
+ self.mtime = mtime |
139 |
+ self.size = size |
140 |
+ |
141 |
+ def __str__(self): |
142 |
+ return "%s has stat collision with size %s and mtime %s" % \ |
143 |
+ (self.key, self.size, self.mtime) |
144 |
+ |
145 |
+ def __repr__(self): |
146 |
+ return "portage.cache.cache_errors.StatCollision(%s)" % \ |
147 |
+ (', '.join((repr(self.key), repr(self.filename), |
148 |
+ repr(self.mtime), repr(self.size))),) |
149 |
|
150 |
Modified: main/branches/2.1.6/pym/portage/cache/metadata.py |
151 |
=================================================================== |
152 |
--- main/branches/2.1.6/pym/portage/cache/metadata.py 2009-04-30 06:52:39 UTC (rev 13440) |
153 |
+++ main/branches/2.1.6/pym/portage/cache/metadata.py 2009-04-30 06:53:00 UTC (rev 13441) |
154 |
@@ -3,7 +3,7 @@ |
155 |
# License: GPL2 |
156 |
# $Id$ |
157 |
|
158 |
-import errno, os, re |
159 |
+import errno, os, re, sys |
160 |
from portage.cache import cache_errors, flat_hash |
161 |
import portage.eclass_cache |
162 |
from portage.cache.template import reconstruct_eclasses |
163 |
@@ -30,6 +30,7 @@ |
164 |
super(database, self).__init__(location, *args, **config) |
165 |
self.location = os.path.join(loc, "metadata","cache") |
166 |
self.ec = portage.eclass_cache.cache(loc) |
167 |
+ self.raise_stat_collision = False |
168 |
|
169 |
def _parse_data(self, data, cpv): |
170 |
_hashed_re_match = self._hashed_re.match |
171 |
@@ -73,31 +74,63 @@ |
172 |
values = ProtectedDict(values) |
173 |
values["INHERITED"] = ' '.join(sorted(values["_eclasses_"])) |
174 |
|
175 |
+ new_content = [] |
176 |
+ for k in self.auxdbkey_order: |
177 |
+ new_content.append(unicode(values.get(k, ''), errors='replace')) |
178 |
+ new_content.append(u'\n') |
179 |
+ for i in xrange(magic_line_count - len(self.auxdbkey_order)): |
180 |
+ new_content.append(u'\n') |
181 |
+ new_content = u''.join(new_content) |
182 |
+ new_content = new_content.encode( |
183 |
+ sys.getdefaultencoding(), 'backslashreplace') |
184 |
+ |
185 |
+ new_fp = os.path.join(self.location, cpv) |
186 |
+ try: |
187 |
+ f = open(new_fp, 'rb') |
188 |
+ except EnvironmentError: |
189 |
+ pass |
190 |
+ else: |
191 |
+ try: |
192 |
+ try: |
193 |
+ existing_st = os.fstat(f.fileno()) |
194 |
+ existing_content = f.read() |
195 |
+ finally: |
196 |
+ f.close() |
197 |
+ except EnvironmentError: |
198 |
+ pass |
199 |
+ else: |
200 |
+ existing_mtime = long(existing_st.st_mtime) |
201 |
+ if values['_mtime_'] == existing_mtime and \ |
202 |
+ existing_content == new_content: |
203 |
+ return |
204 |
+ |
205 |
+ if self.raise_stat_collision and \ |
206 |
+ values['_mtime_'] == existing_mtime and \ |
207 |
+ len(new_content) == existing_st.st_size: |
208 |
+ raise cache_errors.StatCollision(cpv, new_fp, |
209 |
+ existing_mtime, existing_st.st_size) |
210 |
+ |
211 |
s = cpv.rfind("/") |
212 |
fp = os.path.join(self.location,cpv[:s], |
213 |
".update.%i.%s" % (os.getpid(), cpv[s+1:])) |
214 |
try: |
215 |
- myf = open(fp, "w") |
216 |
+ myf = open(fp, 'wb') |
217 |
except EnvironmentError, e: |
218 |
if errno.ENOENT == e.errno: |
219 |
try: |
220 |
self._ensure_dirs(cpv) |
221 |
- myf = open(fp, "w") |
222 |
+ myf = open(fp, 'wb') |
223 |
except EnvironmentError, e: |
224 |
raise cache_errors.CacheCorruption(cpv, e) |
225 |
else: |
226 |
raise cache_errors.CacheCorruption(cpv, e) |
227 |
|
228 |
try: |
229 |
- for k in self.auxdbkey_order: |
230 |
- myf.write(values.get(k, "") + "\n") |
231 |
- for i in xrange(magic_line_count - len(self.auxdbkey_order)): |
232 |
- myf.write("\n") |
233 |
+ myf.write(new_content) |
234 |
finally: |
235 |
myf.close() |
236 |
self._ensure_access(fp, mtime=values["_mtime_"]) |
237 |
|
238 |
- new_fp = os.path.join(self.location, cpv) |
239 |
try: |
240 |
os.rename(fp, new_fp) |
241 |
except EnvironmentError, e: |