Gentoo Archives: gentoo-commits

From: "Zac Medico (zmedico)" <zmedico@g.o>
To: gentoo-commits@l.g.o
Subject: [gentoo-commits] portage r13262 - in main/trunk: bin pym/portage/cache
Date: Tue, 31 Mar 2009 20:14:06
Message-Id: E1LokLU-0003tm-2a@stork.gentoo.org
1 Author: zmedico
2 Date: 2009-03-31 20:14:03 +0000 (Tue, 31 Mar 2009)
3 New Revision: 13262
4
5 Modified:
6 main/trunk/bin/egencache
7 main/trunk/pym/portage/cache/cache_errors.py
8 main/trunk/pym/portage/cache/metadata.py
9 Log:
10 Add a new egencache --rsync option which enables a stat collision workaround
11 for cases in which the content of a cache entry changes and neither the file
12 mtime nor size changes (preventing rsync from detecting changes). See bug
13 #139134. This option should only be needed for distribution via something
14 like rsync, which relies on timestamps and file sizes to detect changes. It's
15 not needed with git since that uses a more thorough mechanism which allows it
16 to detect changed inode numbers (described in racy-git.txt in the git
17 technical docs).
18
19
20 Modified: main/trunk/bin/egencache
21 ===================================================================
22 --- main/trunk/bin/egencache 2009-03-31 16:46:32 UTC (rev 13261)
23 +++ main/trunk/bin/egencache 2009-03-31 20:14:03 UTC (rev 13262)
24 @@ -24,7 +24,7 @@
25 import os
26 import portage
27 import _emerge
28 -from portage.cache.cache_errors import CacheError
29 +from portage.cache.cache_errors import CacheError, StatCollision
30 from portage.util import writemsg_level
31
32 def parse_args(args):
33 @@ -46,6 +46,10 @@
34 action="store",
35 help="max load allowed when spawning multiple jobs",
36 dest="load_average")
37 + parser.add_option("--rsync",
38 + action="store_true",
39 + help="enable rsync stat collision workaround " + \
40 + "for bug 139134 (use with --update)")
41 options, args = parser.parse_args(args)
42
43 if not options.update:
44 @@ -73,7 +77,8 @@
45 return options, args
46
47 class GenCache(object):
48 - def __init__(self, portdb, cp_iter=None, max_jobs=None, max_load=None):
49 + def __init__(self, portdb, cp_iter=None, max_jobs=None, max_load=None,
50 + rsync=False):
51 self._portdb = portdb
52 # We can globally cleanse stale cache only if we
53 # iterate over every single cp.
54 @@ -90,22 +95,47 @@
55 metadbmodule = portdb.mysettings.load_best_module("portdbapi.metadbmodule")
56 self._trg_cache = metadbmodule(portdb.porttree_root,
57 "metadata/cache", portage.auxdbkeys[:])
58 + if rsync:
59 + self._trg_cache.raise_stat_collision = True
60 self._existing_nodes = set()
61
62 def _metadata_callback(self, cpv, ebuild_path, repo_path, metadata):
63 self._existing_nodes.add(cpv)
64 if metadata is not None:
65 - # TODO: Implement a workaround for bug 139134 here. The cache
66 - # should be able to optionally raise an exception in order to
67 - # indicate any mtime + size collisions that will prevent rsync
68 - # from detecting changes. These exceptions will be handled by
69 - # bumping the mtime on the ebuild (and the corresponding cache
70 - # entry).
71 if metadata.get('EAPI') == '0':
72 del metadata['EAPI']
73 try:
74 - self._trg_cache[cpv] = metadata
75 + try:
76 + self._trg_cache[cpv] = metadata
77 + except StatCollision, sc:
78 + # If the content of a cache entry changes and neither the
79 + # file mtime nor size changes, it will prevent rsync from
80 + # detecting changes. Cache backends may raise this
81 + # exception from _setitem() if they detect this type of stat
82 + # collision. These exceptions are be handled by bumping the
83 + # mtime on the ebuild (and the corresponding cache entry).
84 + # See bug #139134.
85 + max_mtime = sc.mtime
86 + for ec, (loc, ec_mtime) in metadata['_eclasses_'].iteritems():
87 + if max_mtime < ec_mtime:
88 + max_mtime = ec_mtime
89 + if max_mtime == sc.mtime:
90 + max_mtime += 1
91 + max_mtime = long(max_mtime)
92 + try:
93 + os.utime(ebuild_path, (max_mtime, max_mtime))
94 + except OSError, e:
95 + self.returncode |= 1
96 + writemsg_level(
97 + "%s writing target: %s\n" % (cpv, e),
98 + level=logging.ERROR, noiselevel=-1)
99 + else:
100 + metadata['_mtime_'] = max_mtime
101 + self._trg_cache[cpv] = metadata
102 + self._portdb.auxdb[repo_path][cpv] = metadata
103 +
104 except CacheError, ce:
105 + self.returncode |= 1
106 writemsg_level(
107 "%s writing target: %s\n" % (cpv, ce),
108 level=logging.ERROR, noiselevel=-1)
109 @@ -195,7 +225,8 @@
110
111 gen_cache = GenCache(portdb, cp_iter=cp_iter,
112 max_jobs=options.jobs,
113 - max_load=options.load_average)
114 + max_load=options.load_average,
115 + rsync=options.rsync)
116 gen_cache.run()
117 return gen_cache.returncode
118
119
120 Modified: main/trunk/pym/portage/cache/cache_errors.py
121 ===================================================================
122 --- main/trunk/pym/portage/cache/cache_errors.py 2009-03-31 16:46:32 UTC (rev 13261)
123 +++ main/trunk/pym/portage/cache/cache_errors.py 2009-03-31 20:14:03 UTC (rev 13262)
124 @@ -39,3 +39,25 @@
125 self.info = info
126 def __str__(self):
127 return "cache is non-modifiable"+str(self.info)
128 +
129 +class StatCollision(CacheError):
130 + """
131 + If the content of a cache entry changes and neither the file mtime nor
132 + size changes, it will prevent rsync from detecting changes. Cache backends
133 + may raise this exception from _setitem() if they detect this type of stat
134 + collision. See bug #139134.
135 + """
136 + def __init__(self, key, filename, mtime, size):
137 + self.key = key
138 + self.filename = filename
139 + self.mtime = mtime
140 + self.size = size
141 +
142 + def __str__(self):
143 + return "%s has stat collision with size %s and mtime %s" % \
144 + (self.key, self.size, self.mtime)
145 +
146 + def __repr__(self):
147 + return "portage.cache.cache_errors.StatCollision(%s)" % \
148 + (', '.join((repr(self.key), repr(self.filename),
149 + repr(self.mtime), repr(self.size))),)
150
151 Modified: main/trunk/pym/portage/cache/metadata.py
152 ===================================================================
153 --- main/trunk/pym/portage/cache/metadata.py 2009-03-31 16:46:32 UTC (rev 13261)
154 +++ main/trunk/pym/portage/cache/metadata.py 2009-03-31 20:14:03 UTC (rev 13262)
155 @@ -3,7 +3,7 @@
156 # License: GPL2
157 # $Id$
158
159 -import errno, os, re
160 +import errno, os, re, sys
161 from portage.cache import cache_errors, flat_hash
162 import portage.eclass_cache
163 from portage.cache.template import reconstruct_eclasses
164 @@ -30,6 +30,7 @@
165 super(database, self).__init__(location, *args, **config)
166 self.location = os.path.join(loc, "metadata","cache")
167 self.ec = portage.eclass_cache.cache(loc)
168 + self.raise_stat_collision = False
169
170 def _parse_data(self, data, cpv):
171 _hashed_re_match = self._hashed_re.match
172 @@ -73,31 +74,63 @@
173 values = ProtectedDict(values)
174 values["INHERITED"] = ' '.join(sorted(values["_eclasses_"]))
175
176 + new_content = []
177 + for k in self.auxdbkey_order:
178 + new_content.append(unicode(values.get(k, ''), errors='replace'))
179 + new_content.append(u'\n')
180 + for i in xrange(magic_line_count - len(self.auxdbkey_order)):
181 + new_content.append(u'\n')
182 + new_content = u''.join(new_content)
183 + new_content = new_content.encode(
184 + sys.getdefaultencoding(), 'backslashreplace')
185 +
186 + new_fp = os.path.join(self.location, cpv)
187 + try:
188 + f = open(new_fp, 'rb')
189 + except EnvironmentError:
190 + pass
191 + else:
192 + try:
193 + try:
194 + existing_st = os.fstat(f.fileno())
195 + existing_content = f.read()
196 + finally:
197 + f.close()
198 + except EnvironmentError:
199 + pass
200 + else:
201 + existing_mtime = long(existing_st.st_mtime)
202 + if values['_mtime_'] == existing_mtime and \
203 + existing_content == new_content:
204 + return
205 +
206 + if self.raise_stat_collision and \
207 + values['_mtime_'] == existing_mtime and \
208 + len(new_content) == existing_st.st_size:
209 + raise cache_errors.StatCollision(cpv, new_fp,
210 + existing_mtime, existing_st.st_size)
211 +
212 s = cpv.rfind("/")
213 fp = os.path.join(self.location,cpv[:s],
214 ".update.%i.%s" % (os.getpid(), cpv[s+1:]))
215 try:
216 - myf = open(fp, "w")
217 + myf = open(fp, 'wb')
218 except EnvironmentError, e:
219 if errno.ENOENT == e.errno:
220 try:
221 self._ensure_dirs(cpv)
222 - myf = open(fp, "w")
223 + myf = open(fp, 'wb')
224 except EnvironmentError, e:
225 raise cache_errors.CacheCorruption(cpv, e)
226 else:
227 raise cache_errors.CacheCorruption(cpv, e)
228
229 try:
230 - for k in self.auxdbkey_order:
231 - myf.write(values.get(k, "") + "\n")
232 - for i in xrange(magic_line_count - len(self.auxdbkey_order)):
233 - myf.write("\n")
234 + myf.write(new_content)
235 finally:
236 myf.close()
237 self._ensure_access(fp, mtime=values["_mtime_"])
238
239 - new_fp = os.path.join(self.location, cpv)
240 try:
241 os.rename(fp, new_fp)
242 except EnvironmentError, e: