1 |
Author: zmedico |
2 |
Date: 2009-03-31 20:14:03 +0000 (Tue, 31 Mar 2009) |
3 |
New Revision: 13262 |
4 |
|
5 |
Modified: |
6 |
main/trunk/bin/egencache |
7 |
main/trunk/pym/portage/cache/cache_errors.py |
8 |
main/trunk/pym/portage/cache/metadata.py |
9 |
Log: |
10 |
Add a new egencache --rsync option which enables a stat collision workaround |
11 |
for cases in which the content of a cache entry changes and neither the file |
12 |
mtime nor size changes (preventing rsync from detecting changes). See bug |
13 |
#139134. This option should only be needed for distribution via something |
14 |
like rsync, which relies on timestamps and file sizes to detect changes. It's |
15 |
not needed with git since that uses a more thorough mechanism which allows it |
16 |
to detect changed inode numbers (described in racy-git.txt in the git |
17 |
technical docs). |
18 |
|
19 |
|
20 |
Modified: main/trunk/bin/egencache |
21 |
=================================================================== |
22 |
--- main/trunk/bin/egencache 2009-03-31 16:46:32 UTC (rev 13261) |
23 |
+++ main/trunk/bin/egencache 2009-03-31 20:14:03 UTC (rev 13262) |
24 |
@@ -24,7 +24,7 @@ |
25 |
import os |
26 |
import portage |
27 |
import _emerge |
28 |
-from portage.cache.cache_errors import CacheError |
29 |
+from portage.cache.cache_errors import CacheError, StatCollision |
30 |
from portage.util import writemsg_level |
31 |
|
32 |
def parse_args(args): |
33 |
@@ -46,6 +46,10 @@ |
34 |
action="store", |
35 |
help="max load allowed when spawning multiple jobs", |
36 |
dest="load_average") |
37 |
+ parser.add_option("--rsync", |
38 |
+ action="store_true", |
39 |
+ help="enable rsync stat collision workaround " + \ |
40 |
+ "for bug 139134 (use with --update)") |
41 |
options, args = parser.parse_args(args) |
42 |
|
43 |
if not options.update: |
44 |
@@ -73,7 +77,8 @@ |
45 |
return options, args |
46 |
|
47 |
class GenCache(object): |
48 |
- def __init__(self, portdb, cp_iter=None, max_jobs=None, max_load=None): |
49 |
+ def __init__(self, portdb, cp_iter=None, max_jobs=None, max_load=None, |
50 |
+ rsync=False): |
51 |
self._portdb = portdb |
52 |
# We can globally cleanse stale cache only if we |
53 |
# iterate over every single cp. |
54 |
@@ -90,22 +95,47 @@ |
55 |
metadbmodule = portdb.mysettings.load_best_module("portdbapi.metadbmodule") |
56 |
self._trg_cache = metadbmodule(portdb.porttree_root, |
57 |
"metadata/cache", portage.auxdbkeys[:]) |
58 |
+ if rsync: |
59 |
+ self._trg_cache.raise_stat_collision = True |
60 |
self._existing_nodes = set() |
61 |
|
62 |
def _metadata_callback(self, cpv, ebuild_path, repo_path, metadata): |
63 |
self._existing_nodes.add(cpv) |
64 |
if metadata is not None: |
65 |
- # TODO: Implement a workaround for bug 139134 here. The cache |
66 |
- # should be able to optionally raise an exception in order to |
67 |
- # indicate any mtime + size collisions that will prevent rsync |
68 |
- # from detecting changes. These exceptions will be handled by |
69 |
- # bumping the mtime on the ebuild (and the corresponding cache |
70 |
- # entry). |
71 |
if metadata.get('EAPI') == '0': |
72 |
del metadata['EAPI'] |
73 |
try: |
74 |
- self._trg_cache[cpv] = metadata |
75 |
+ try: |
76 |
+ self._trg_cache[cpv] = metadata |
77 |
+ except StatCollision, sc: |
78 |
+ # If the content of a cache entry changes and neither the |
79 |
+ # file mtime nor size changes, it will prevent rsync from |
80 |
+ # detecting changes. Cache backends may raise this |
81 |
+ # exception from _setitem() if they detect this type of stat |
82 |
+ # collision. These exceptions are be handled by bumping the |
83 |
+ # mtime on the ebuild (and the corresponding cache entry). |
84 |
+ # See bug #139134. |
85 |
+ max_mtime = sc.mtime |
86 |
+ for ec, (loc, ec_mtime) in metadata['_eclasses_'].iteritems(): |
87 |
+ if max_mtime < ec_mtime: |
88 |
+ max_mtime = ec_mtime |
89 |
+ if max_mtime == sc.mtime: |
90 |
+ max_mtime += 1 |
91 |
+ max_mtime = long(max_mtime) |
92 |
+ try: |
93 |
+ os.utime(ebuild_path, (max_mtime, max_mtime)) |
94 |
+ except OSError, e: |
95 |
+ self.returncode |= 1 |
96 |
+ writemsg_level( |
97 |
+ "%s writing target: %s\n" % (cpv, e), |
98 |
+ level=logging.ERROR, noiselevel=-1) |
99 |
+ else: |
100 |
+ metadata['_mtime_'] = max_mtime |
101 |
+ self._trg_cache[cpv] = metadata |
102 |
+ self._portdb.auxdb[repo_path][cpv] = metadata |
103 |
+ |
104 |
except CacheError, ce: |
105 |
+ self.returncode |= 1 |
106 |
writemsg_level( |
107 |
"%s writing target: %s\n" % (cpv, ce), |
108 |
level=logging.ERROR, noiselevel=-1) |
109 |
@@ -195,7 +225,8 @@ |
110 |
|
111 |
gen_cache = GenCache(portdb, cp_iter=cp_iter, |
112 |
max_jobs=options.jobs, |
113 |
- max_load=options.load_average) |
114 |
+ max_load=options.load_average, |
115 |
+ rsync=options.rsync) |
116 |
gen_cache.run() |
117 |
return gen_cache.returncode |
118 |
|
119 |
|
120 |
Modified: main/trunk/pym/portage/cache/cache_errors.py |
121 |
=================================================================== |
122 |
--- main/trunk/pym/portage/cache/cache_errors.py 2009-03-31 16:46:32 UTC (rev 13261) |
123 |
+++ main/trunk/pym/portage/cache/cache_errors.py 2009-03-31 20:14:03 UTC (rev 13262) |
124 |
@@ -39,3 +39,25 @@ |
125 |
self.info = info |
126 |
def __str__(self): |
127 |
return "cache is non-modifiable"+str(self.info) |
128 |
+ |
129 |
+class StatCollision(CacheError): |
130 |
+ """ |
131 |
+ If the content of a cache entry changes and neither the file mtime nor |
132 |
+ size changes, it will prevent rsync from detecting changes. Cache backends |
133 |
+ may raise this exception from _setitem() if they detect this type of stat |
134 |
+ collision. See bug #139134. |
135 |
+ """ |
136 |
+ def __init__(self, key, filename, mtime, size): |
137 |
+ self.key = key |
138 |
+ self.filename = filename |
139 |
+ self.mtime = mtime |
140 |
+ self.size = size |
141 |
+ |
142 |
+ def __str__(self): |
143 |
+ return "%s has stat collision with size %s and mtime %s" % \ |
144 |
+ (self.key, self.size, self.mtime) |
145 |
+ |
146 |
+ def __repr__(self): |
147 |
+ return "portage.cache.cache_errors.StatCollision(%s)" % \ |
148 |
+ (', '.join((repr(self.key), repr(self.filename), |
149 |
+ repr(self.mtime), repr(self.size))),) |
150 |
|
151 |
Modified: main/trunk/pym/portage/cache/metadata.py |
152 |
=================================================================== |
153 |
--- main/trunk/pym/portage/cache/metadata.py 2009-03-31 16:46:32 UTC (rev 13261) |
154 |
+++ main/trunk/pym/portage/cache/metadata.py 2009-03-31 20:14:03 UTC (rev 13262) |
155 |
@@ -3,7 +3,7 @@ |
156 |
# License: GPL2 |
157 |
# $Id$ |
158 |
|
159 |
-import errno, os, re |
160 |
+import errno, os, re, sys |
161 |
from portage.cache import cache_errors, flat_hash |
162 |
import portage.eclass_cache |
163 |
from portage.cache.template import reconstruct_eclasses |
164 |
@@ -30,6 +30,7 @@ |
165 |
super(database, self).__init__(location, *args, **config) |
166 |
self.location = os.path.join(loc, "metadata","cache") |
167 |
self.ec = portage.eclass_cache.cache(loc) |
168 |
+ self.raise_stat_collision = False |
169 |
|
170 |
def _parse_data(self, data, cpv): |
171 |
_hashed_re_match = self._hashed_re.match |
172 |
@@ -73,31 +74,63 @@ |
173 |
values = ProtectedDict(values) |
174 |
values["INHERITED"] = ' '.join(sorted(values["_eclasses_"])) |
175 |
|
176 |
+ new_content = [] |
177 |
+ for k in self.auxdbkey_order: |
178 |
+ new_content.append(unicode(values.get(k, ''), errors='replace')) |
179 |
+ new_content.append(u'\n') |
180 |
+ for i in xrange(magic_line_count - len(self.auxdbkey_order)): |
181 |
+ new_content.append(u'\n') |
182 |
+ new_content = u''.join(new_content) |
183 |
+ new_content = new_content.encode( |
184 |
+ sys.getdefaultencoding(), 'backslashreplace') |
185 |
+ |
186 |
+ new_fp = os.path.join(self.location, cpv) |
187 |
+ try: |
188 |
+ f = open(new_fp, 'rb') |
189 |
+ except EnvironmentError: |
190 |
+ pass |
191 |
+ else: |
192 |
+ try: |
193 |
+ try: |
194 |
+ existing_st = os.fstat(f.fileno()) |
195 |
+ existing_content = f.read() |
196 |
+ finally: |
197 |
+ f.close() |
198 |
+ except EnvironmentError: |
199 |
+ pass |
200 |
+ else: |
201 |
+ existing_mtime = long(existing_st.st_mtime) |
202 |
+ if values['_mtime_'] == existing_mtime and \ |
203 |
+ existing_content == new_content: |
204 |
+ return |
205 |
+ |
206 |
+ if self.raise_stat_collision and \ |
207 |
+ values['_mtime_'] == existing_mtime and \ |
208 |
+ len(new_content) == existing_st.st_size: |
209 |
+ raise cache_errors.StatCollision(cpv, new_fp, |
210 |
+ existing_mtime, existing_st.st_size) |
211 |
+ |
212 |
s = cpv.rfind("/") |
213 |
fp = os.path.join(self.location,cpv[:s], |
214 |
".update.%i.%s" % (os.getpid(), cpv[s+1:])) |
215 |
try: |
216 |
- myf = open(fp, "w") |
217 |
+ myf = open(fp, 'wb') |
218 |
except EnvironmentError, e: |
219 |
if errno.ENOENT == e.errno: |
220 |
try: |
221 |
self._ensure_dirs(cpv) |
222 |
- myf = open(fp, "w") |
223 |
+ myf = open(fp, 'wb') |
224 |
except EnvironmentError, e: |
225 |
raise cache_errors.CacheCorruption(cpv, e) |
226 |
else: |
227 |
raise cache_errors.CacheCorruption(cpv, e) |
228 |
|
229 |
try: |
230 |
- for k in self.auxdbkey_order: |
231 |
- myf.write(values.get(k, "") + "\n") |
232 |
- for i in xrange(magic_line_count - len(self.auxdbkey_order)): |
233 |
- myf.write("\n") |
234 |
+ myf.write(new_content) |
235 |
finally: |
236 |
myf.close() |
237 |
self._ensure_access(fp, mtime=values["_mtime_"]) |
238 |
|
239 |
- new_fp = os.path.join(self.location, cpv) |
240 |
try: |
241 |
os.rename(fp, new_fp) |
242 |
except EnvironmentError, e: |