1 |
Add a boolean sync-rcu repos.conf setting that behaves as follows: |
2 |
|
3 |
Enable read-copy-update (RCU) behavior for sync operations. The |
4 |
current latest immutable version of a repository will be referenced |
5 |
by a symlink found where the repository would normally be located |
6 |
(see the location setting). Repository consumers should resolve |
7 |
the cannonical path of this symlink before attempt to access |
8 |
the repository, and all operations should be read-only, since |
9 |
the repository is considered immutable. Updates occur by atomic |
10 |
replacement of the symlink, which causes new consumers to use the |
11 |
new immutable version, while any earlier consumers continue to |
12 |
use the cannonical path that was resolved earlier. This option |
13 |
requires sync-allow-hardlinks and sync-rcu-store-dir options to |
14 |
be enabled, and currently also requires that sync-type is set |
15 |
to rsync. This option is disabled by default, since the symlink |
16 |
usage would require special handling for scenarios involving bind |
17 |
mounts and chroots. |
18 |
|
19 |
Bug: https://bugs.gentoo.org/662070 |
20 |
--- |
21 |
lib/portage/repository/config.py | 36 +++- |
22 |
lib/portage/repository/storage/hardlink_rcu.py | 251 +++++++++++++++++++++++++ |
23 |
lib/portage/sync/syncbase.py | 4 +- |
24 |
man/portage.5 | 35 ++++ |
25 |
4 files changed, 323 insertions(+), 3 deletions(-) |
26 |
create mode 100644 lib/portage/repository/storage/hardlink_rcu.py |
27 |
|
28 |
diff --git a/lib/portage/repository/config.py b/lib/portage/repository/config.py |
29 |
index f790f9392..8cdc2a696 100644 |
30 |
--- a/lib/portage/repository/config.py |
31 |
+++ b/lib/portage/repository/config.py |
32 |
@@ -84,7 +84,7 @@ class RepoConfig(object): |
33 |
'profile_formats', 'sign_commit', 'sign_manifest', 'strict_misc_digests', |
34 |
'sync_depth', 'sync_hooks_only_on_change', |
35 |
'sync_type', 'sync_umask', 'sync_uri', 'sync_user', 'thin_manifest', |
36 |
- 'update_changelog', '_eapis_banned', '_eapis_deprecated', |
37 |
+ 'update_changelog', 'user_location', '_eapis_banned', '_eapis_deprecated', |
38 |
'_masters_orig', 'module_specific_options', 'manifest_required_hashes', |
39 |
'sync_allow_hardlinks', |
40 |
'sync_openpgp_key_path', |
41 |
@@ -93,6 +93,10 @@ class RepoConfig(object): |
42 |
'sync_openpgp_key_refresh_retry_delay_exp_base', |
43 |
'sync_openpgp_key_refresh_retry_delay_mult', |
44 |
'sync_openpgp_key_refresh_retry_overall_timeout', |
45 |
+ 'sync_rcu', |
46 |
+ 'sync_rcu_store_dir', |
47 |
+ 'sync_rcu_spare_snapshots', |
48 |
+ 'sync_rcu_ttl_days', |
49 |
) |
50 |
|
51 |
def __init__(self, name, repo_opts, local_config=True): |
52 |
@@ -198,6 +202,22 @@ class RepoConfig(object): |
53 |
'sync_openpgp_key_refresh_retry_overall_timeout'): |
54 |
setattr(self, k, repo_opts.get(k.replace('_', '-'), None)) |
55 |
|
56 |
+ self.sync_rcu = repo_opts.get( |
57 |
+ 'sync-rcu', 'false').lower() in ('true', 'yes') |
58 |
+ |
59 |
+ self.sync_rcu_store_dir = repo_opts.get('sync-rcu-store-dir') |
60 |
+ |
61 |
+ for k in ('sync-rcu-spare-snapshots', 'sync-rcu-ttl-days'): |
62 |
+ v = repo_opts.get(k, '').strip() or None |
63 |
+ if v: |
64 |
+ try: |
65 |
+ v = int(v) |
66 |
+ except (OverflowError, ValueError): |
67 |
+ writemsg(_("!!! Invalid %s setting for repo" |
68 |
+ " %s: %s\n") % (k, name, v), noiselevel=-1) |
69 |
+ v = None |
70 |
+ setattr(self, k.replace('-', '_'), v) |
71 |
+ |
72 |
self.module_specific_options = {} |
73 |
|
74 |
# Not implemented. |
75 |
@@ -206,9 +226,14 @@ class RepoConfig(object): |
76 |
format = format.strip() |
77 |
self.format = format |
78 |
|
79 |
+ self.user_location = None |
80 |
location = repo_opts.get('location') |
81 |
if location is not None and location.strip(): |
82 |
if os.path.isdir(location) or portage._sync_mode: |
83 |
+ # The user_location is required for sync-rcu support, |
84 |
+ # since it manages a symlink which resides at that |
85 |
+ # location (and realpath is irreversible). |
86 |
+ self.user_location = location |
87 |
location = os.path.realpath(location) |
88 |
else: |
89 |
location = None |
90 |
@@ -542,6 +567,10 @@ class RepoConfigLoader(object): |
91 |
'sync_openpgp_key_refresh_retry_delay_exp_base', |
92 |
'sync_openpgp_key_refresh_retry_delay_mult', |
93 |
'sync_openpgp_key_refresh_retry_overall_timeout', |
94 |
+ 'sync_rcu', |
95 |
+ 'sync_rcu_store_dir', |
96 |
+ 'sync_rcu_spare_snapshots', |
97 |
+ 'sync_rcu_ttl_days', |
98 |
'sync_type', 'sync_umask', 'sync_uri', 'sync_user', |
99 |
'module_specific_options'): |
100 |
v = getattr(repos_conf_opts, k, None) |
101 |
@@ -962,7 +991,7 @@ class RepoConfigLoader(object): |
102 |
return repo_name in self.prepos |
103 |
|
104 |
def config_string(self): |
105 |
- bool_keys = ("strict_misc_digests", "sync_allow_hardlinks") |
106 |
+ bool_keys = ("strict_misc_digests", "sync_allow_hardlinks", "sync_rcu") |
107 |
str_or_int_keys = ("auto_sync", "clone_depth", "format", "location", |
108 |
"main_repo", "priority", "sync_depth", "sync_openpgp_key_path", |
109 |
"sync_openpgp_key_refresh_retry_count", |
110 |
@@ -970,6 +999,9 @@ class RepoConfigLoader(object): |
111 |
"sync_openpgp_key_refresh_retry_delay_exp_base", |
112 |
"sync_openpgp_key_refresh_retry_delay_mult", |
113 |
"sync_openpgp_key_refresh_retry_overall_timeout", |
114 |
+ "sync_rcu_store_dir", |
115 |
+ "sync_rcu_spare_snapshots", |
116 |
+ "sync_rcu_ttl_days", |
117 |
"sync_type", "sync_umask", "sync_uri", 'sync_user') |
118 |
str_tuple_keys = ("aliases", "eclass_overrides", "force") |
119 |
repo_config_tuple_keys = ("masters",) |
120 |
diff --git a/lib/portage/repository/storage/hardlink_rcu.py b/lib/portage/repository/storage/hardlink_rcu.py |
121 |
new file mode 100644 |
122 |
index 000000000..80cdbb0d7 |
123 |
--- /dev/null |
124 |
+++ b/lib/portage/repository/storage/hardlink_rcu.py |
125 |
@@ -0,0 +1,251 @@ |
126 |
+# Copyright 2018 Gentoo Foundation |
127 |
+# Distributed under the terms of the GNU General Public License v2 |
128 |
+ |
129 |
+import datetime |
130 |
+ |
131 |
+import portage |
132 |
+from portage import os |
133 |
+from portage.repository.storage.interface import ( |
134 |
+ RepoStorageException, |
135 |
+ RepoStorageInterface, |
136 |
+) |
137 |
+from portage.util.futures import asyncio |
138 |
+from portage.util.futures.compat_coroutine import ( |
139 |
+ coroutine, |
140 |
+ coroutine_return, |
141 |
+) |
142 |
+ |
143 |
+from _emerge.SpawnProcess import SpawnProcess |
144 |
+ |
145 |
+ |
146 |
+class HardlinkRcuRepoStorage(RepoStorageInterface): |
147 |
+ """ |
148 |
+ Enable read-copy-update (RCU) behavior for sync operations. The |
149 |
+ current latest immutable version of a repository will be |
150 |
+ reference by a symlink found where the repository would normally |
151 |
+ be located. Repository consumers should resolve the cannonical |
152 |
+ path of this symlink before attempt to access the repository, |
153 |
+ and all operations should be read-only, since the repository |
154 |
+ is considered immutable. Updates occur by atomic replacement |
155 |
+ of the symlink, which causes new consumers to use the new |
156 |
+ immutable version, while any earlier consumers continue to use |
157 |
+ the cannonical path that was resolved earlier. |
158 |
+ |
159 |
+ Performance is better than HardlinkQuarantineRepoStorage, |
160 |
+ since commit involves atomic replacement of a symlink. Since |
161 |
+ the symlink usage would require special handling for scenarios |
162 |
+ involving bind mounts and chroots, this module is not enabled |
163 |
+ by default. |
164 |
+ |
165 |
+ repos.conf parameters: |
166 |
+ |
167 |
+ sync-rcu-store-dir |
168 |
+ |
169 |
+ Directory path reserved for sync-rcu storage. This |
170 |
+ directory must have a unique value for each repository |
171 |
+ (do not set it in the DEFAULT section). This directory |
172 |
+ must not contain any other files or directories aside |
173 |
+ from those that are created automatically when sync-rcu |
174 |
+ is enabled. |
175 |
+ |
176 |
+ sync-rcu-spare-snapshots = 1 |
177 |
+ |
178 |
+ Number of spare snapshots for sync-rcu to retain with |
179 |
+ expired ttl. This protects the previous latest snapshot |
180 |
+ from being removed immediately after a new version |
181 |
+ becomes available, since it might still be used by |
182 |
+ running processes. |
183 |
+ |
184 |
+ sync-rcu-ttl-days = 7 |
185 |
+ |
186 |
+ Number of days for sync-rcu to retain previous immutable |
187 |
+ snapshots of a repository. After the ttl of a particular |
188 |
+ snapshot has expired, it will be remove automatically (the |
189 |
+ latest snapshot is exempt, and sync-rcu-spare-snapshots |
190 |
+ configures the number of previous snapshots that are |
191 |
+ exempt). If the ttl is set too low, then a snapshot could |
192 |
+ expire while it is in use by a running process. |
193 |
+ |
194 |
+ """ |
195 |
+ def __init__(self, repo, spawn_kwargs): |
196 |
+ # Note that repo.location cannot substitute for repo.user_location here, |
197 |
+ # since we manage a symlink that resides at repo.user_location, and |
198 |
+ # repo.location is the irreversible result of realpath(repo.user_location). |
199 |
+ self._user_location = repo.user_location |
200 |
+ self._spawn_kwargs = spawn_kwargs |
201 |
+ |
202 |
+ if not repo.sync_allow_hardlinks: |
203 |
+ raise RepoStorageException("repos.conf sync-rcu setting" |
204 |
+ " for repo '%s' requires that sync-allow-hardlinks be enabled" % repo.name) |
205 |
+ |
206 |
+ # Raise an exception if repo.sync_rcu_store_dir is unset, since the |
207 |
+ # user needs to be aware of this location for bind mount and chroot |
208 |
+ # scenarios |
209 |
+ if not repo.sync_rcu_store_dir: |
210 |
+ raise RepoStorageException("repos.conf sync-rcu setting" |
211 |
+ " for repo '%s' requires that sync-rcu-store-dir be set" % repo.name) |
212 |
+ |
213 |
+ self._storage_location = repo.sync_rcu_store_dir |
214 |
+ if repo.sync_rcu_spare_snapshots is None or repo.sync_rcu_spare_snapshots < 0: |
215 |
+ self._spare_snapshots = 1 |
216 |
+ else: |
217 |
+ self._spare_snapshots = repo.sync_rcu_spare_snapshots |
218 |
+ if self._spare_snapshots < 0: |
219 |
+ self._spare_snapshots = 0 |
220 |
+ if repo.sync_rcu_ttl_days is None or repo.sync_rcu_ttl_days < 0: |
221 |
+ self._ttl_days = 1 |
222 |
+ else: |
223 |
+ self._ttl_days = repo.sync_rcu_ttl_days |
224 |
+ self._update_location = None |
225 |
+ self._latest_symlink = os.path.join(self._storage_location, 'latest') |
226 |
+ self._latest_canonical = os.path.realpath(self._latest_symlink) |
227 |
+ if not os.path.exists(self._latest_canonical) or os.path.islink(self._latest_canonical): |
228 |
+ # It doesn't exist, or it's a broken symlink. |
229 |
+ self._latest_canonical = None |
230 |
+ self._snapshots_dir = os.path.join(self._storage_location, 'snapshots') |
231 |
+ |
232 |
+ @coroutine |
233 |
+ def _check_call(self, cmd, privileged=False): |
234 |
+ """ |
235 |
+ Run cmd and raise RepoStorageException on failure. |
236 |
+ |
237 |
+ @param cmd: command to executre |
238 |
+ @type cmd: list |
239 |
+ @param privileged: run with maximum privileges |
240 |
+ @type privileged: bool |
241 |
+ """ |
242 |
+ if privileged: |
243 |
+ kwargs = dict(fd_pipes=self._spawn_kwargs.get('fd_pipes')) |
244 |
+ else: |
245 |
+ kwargs = self._spawn_kwargs |
246 |
+ p = SpawnProcess(args=cmd, scheduler=asyncio._wrap_loop(), **kwargs) |
247 |
+ p.start() |
248 |
+ if (yield p.async_wait()) != os.EX_OK: |
249 |
+ raise RepoStorageException('command exited with status {}: {}'.\ |
250 |
+ format(p.returncode, ' '.join(cmd))) |
251 |
+ |
252 |
+ @coroutine |
253 |
+ def init_update(self): |
254 |
+ update_location = os.path.join(self._storage_location, 'update') |
255 |
+ yield self._check_call(['rm', '-rf', update_location]) |
256 |
+ |
257 |
+ # This assumes normal umask permissions if it doesn't exist yet. |
258 |
+ portage.util.ensure_dirs(self._storage_location) |
259 |
+ |
260 |
+ if self._latest_canonical is not None: |
261 |
+ portage.util.ensure_dirs(update_location) |
262 |
+ portage.util.apply_stat_permissions(update_location, |
263 |
+ os.stat(self._user_location)) |
264 |
+ # Use rsync --link-dest to hardlink a files into update_location, |
265 |
+ # since cp -l is not portable. |
266 |
+ yield self._check_call(['rsync', '-a', '--link-dest', self._latest_canonical, |
267 |
+ self._latest_canonical + '/', update_location + '/']) |
268 |
+ |
269 |
+ elif not os.path.islink(self._user_location): |
270 |
+ yield self._migrate(update_location) |
271 |
+ update_location = (yield self.init_update()) |
272 |
+ |
273 |
+ self._update_location = update_location |
274 |
+ |
275 |
+ coroutine_return(self._update_location) |
276 |
+ |
277 |
+ @coroutine |
278 |
+ def _migrate(self, update_location): |
279 |
+ """ |
280 |
+ When repo.user_location is a normal directory, migrate it to |
281 |
+ storage so that it can be replaced with a symlink. After migration, |
282 |
+ commit the content as the latest snapshot. |
283 |
+ """ |
284 |
+ try: |
285 |
+ os.rename(self._user_location, update_location) |
286 |
+ except OSError: |
287 |
+ portage.util.ensure_dirs(update_location) |
288 |
+ portage.util.apply_stat_permissions(update_location, |
289 |
+ os.stat(self._user_location)) |
290 |
+ # It's probably on a different device, so copy it. |
291 |
+ yield self._check_call(['rsync', '-a', |
292 |
+ self._user_location + '/', update_location + '/']) |
293 |
+ |
294 |
+ # Remove the old copy so that symlink can be created. Run with |
295 |
+ # maximum privileges, since removal requires write access to |
296 |
+ # the parent directory. |
297 |
+ yield self._check_call(['rm', '-rf', user_location], privileged=True) |
298 |
+ |
299 |
+ self._update_location = update_location |
300 |
+ |
301 |
+ # Make this copy the latest snapshot |
302 |
+ yield self.commit_update() |
303 |
+ |
304 |
+ @property |
305 |
+ def current_update(self): |
306 |
+ if self._update_location is None: |
307 |
+ raise RepoStorageException('current update does not exist') |
308 |
+ return self._update_location |
309 |
+ |
310 |
+ @coroutine |
311 |
+ def commit_update(self): |
312 |
+ update_location = self.current_update |
313 |
+ self._update_location = None |
314 |
+ try: |
315 |
+ snapshots = [int(name) for name in os.listdir(self._snapshots_dir)] |
316 |
+ except OSError: |
317 |
+ snapshots = [] |
318 |
+ portage.util.ensure_dirs(self._snapshots_dir) |
319 |
+ portage.util.apply_stat_permissions(self._snapshots_dir, |
320 |
+ os.stat(self._storage_location)) |
321 |
+ if snapshots: |
322 |
+ new_id = max(snapshots) + 1 |
323 |
+ else: |
324 |
+ new_id = 1 |
325 |
+ os.rename(update_location, os.path.join(self._snapshots_dir, str(new_id))) |
326 |
+ new_symlink = self._latest_symlink + '.new' |
327 |
+ try: |
328 |
+ os.unlink(new_symlink) |
329 |
+ except OSError: |
330 |
+ pass |
331 |
+ os.symlink('snapshots/{}'.format(new_id), new_symlink) |
332 |
+ os.rename(new_symlink, self._latest_symlink) |
333 |
+ |
334 |
+ try: |
335 |
+ user_location_correct = os.path.samefile(self._user_location, self._latest_symlink) |
336 |
+ except OSError: |
337 |
+ user_location_correct = False |
338 |
+ |
339 |
+ if not user_location_correct: |
340 |
+ new_symlink = self._user_location + '.new' |
341 |
+ try: |
342 |
+ os.unlink(new_symlink) |
343 |
+ except OSError: |
344 |
+ pass |
345 |
+ os.symlink(self._latest_symlink, new_symlink) |
346 |
+ os.rename(new_symlink, self._user_location) |
347 |
+ |
348 |
+ coroutine_return() |
349 |
+ yield None |
350 |
+ |
351 |
+ @coroutine |
352 |
+ def abort_update(self): |
353 |
+ if self._update_location is not None: |
354 |
+ update_location = self._update_location |
355 |
+ self._update_location = None |
356 |
+ yield self._check_call(['rm', '-rf', update_location]) |
357 |
+ |
358 |
+ @coroutine |
359 |
+ def garbage_collection(self): |
360 |
+ snap_ttl = datetime.timedelta(days=self._ttl_days) |
361 |
+ snapshots = sorted(int(name) for name in os.listdir(self._snapshots_dir)) |
362 |
+ # always preserve the latest snapshot |
363 |
+ protect_count = self._spare_snapshots + 1 |
364 |
+ while snapshots and protect_count: |
365 |
+ protect_count -= 1 |
366 |
+ snapshots.pop() |
367 |
+ for snap_id in snapshots: |
368 |
+ snap_path = os.path.join(self._snapshots_dir, str(snap_id)) |
369 |
+ try: |
370 |
+ st = os.stat(snap_path) |
371 |
+ except OSError: |
372 |
+ continue |
373 |
+ snap_timestamp = datetime.datetime.utcfromtimestamp(st.st_mtime) |
374 |
+ if (datetime.datetime.utcnow() - snap_timestamp) < snap_ttl: |
375 |
+ continue |
376 |
+ yield self._check_call(['rm', '-rf', snap_path]) |
377 |
diff --git a/lib/portage/sync/syncbase.py b/lib/portage/sync/syncbase.py |
378 |
index 1d2a00b7c..5d9455f93 100644 |
379 |
--- a/lib/portage/sync/syncbase.py |
380 |
+++ b/lib/portage/sync/syncbase.py |
381 |
@@ -86,7 +86,9 @@ class SyncBase(object): |
382 |
@rtype: str |
383 |
@return: name of the selected repo storage constructor |
384 |
''' |
385 |
- if self.repo.sync_allow_hardlinks: |
386 |
+ if self.repo.sync_rcu: |
387 |
+ mod_name = 'portage.repository.storage.hardlink_rcu.HardlinkRcuRepoStorage' |
388 |
+ elif self.repo.sync_allow_hardlinks: |
389 |
mod_name = 'portage.repository.storage.hardlink_quarantine.HardlinkQuarantineRepoStorage' |
390 |
else: |
391 |
mod_name = 'portage.repository.storage.inplace.InplaceRepoStorage' |
392 |
diff --git a/man/portage.5 b/man/portage.5 |
393 |
index cd9d5036d..20f9aae7a 100644 |
394 |
--- a/man/portage.5 |
395 |
+++ b/man/portage.5 |
396 |
@@ -1025,6 +1025,41 @@ If set to true, then sync of a given repository will not trigger postsync |
397 |
hooks unless hooks would have executed for a master repository or the |
398 |
repository has changed since the previous sync operation. |
399 |
.TP |
400 |
+.B sync\-rcu = yes|no |
401 |
+Enable read\-copy\-update (RCU) behavior for sync operations. The current |
402 |
+latest immutable version of a repository will be referenced by a symlink |
403 |
+found where the repository would normally be located (see the \fBlocation\fR |
404 |
+setting). Repository consumers should resolve the cannonical path of this |
405 |
+symlink before attempt to access the repository, and all operations should |
406 |
+be read\-only, since the repository is considered immutable. Updates occur |
407 |
+by atomic replacement of the symlink, which causes new consumers to use the |
408 |
+new immutable version, while any earlier consumers continue to use the |
409 |
+cannonical path that was resolved earlier. This option requires |
410 |
+sync\-allow\-hardlinks and sync\-rcu\-store\-dir options to be enabled, and |
411 |
+currently also requires that sync\-type is set to rsync. This option is |
412 |
+disabled by default, since the symlink usage would require special handling |
413 |
+for scenarios involving bind mounts and chroots. |
414 |
+.TP |
415 |
+.B sync\-rcu\-store\-dir |
416 |
+Directory path reserved for sync\-rcu storage. This directory must have a |
417 |
+unique value for each repository (do not set it in the DEFAULT section). |
418 |
+This directory must not contain any other files or directories aside from |
419 |
+those that are created automatically when sync\-rcu is enabled. |
420 |
+.TP |
421 |
+.B sync\-rcu\-spare\-snapshots = 1 |
422 |
+Number of spare snapshots for sync\-rcu to retain with expired ttl. This |
423 |
+protects the previous latest snapshot from being removed immediately after |
424 |
+a new version becomes available, since it might still be used by running |
425 |
+processes. |
426 |
+.TP |
427 |
+.B sync\-rcu\-ttl\-days = 7 |
428 |
+Number of days for sync\-rcu to retain previous immutable snapshots of |
429 |
+a repository. After the ttl of a particular snapshot has expired, it |
430 |
+will be remove automatically (the latest snapshot is exempt, and |
431 |
+sync\-rcu\-spare\-snapshots configures the number of previous snapshots |
432 |
+that are exempt). If the ttl is set too low, then a snapshot could |
433 |
+expire while it is in use by a running process. |
434 |
+.TP |
435 |
.B sync\-type |
436 |
Specifies type of synchronization performed by `emerge \-\-sync`. |
437 |
.br |
438 |
-- |
439 |
2.16.4 |