1 |
Sync into a quarantine subdirectory, using the rsync --link-dest option |
2 |
to create hardlinks to identical files in the previous snapshot of the |
3 |
repository. If hardlinks are not supported, then show a warning message |
4 |
and sync directly to the normal repository location. |
5 |
|
6 |
If verification succeeds, then the quarantine subdirectory is synced |
7 |
to the normal repository location, and the quarantine subdirectory |
8 |
is deleted. If verification fails, then the quarantine directory is |
9 |
preserved for purposes of analysis. |
10 |
|
11 |
Even if verification happens to be disabled, the quarantine directory |
12 |
is still useful for making the repository update more atomic, so that |
13 |
it is less likely that normal repository location will be observed in |
14 |
a partially synced state. |
15 |
|
16 |
The new behavior may conflict with configurations that restrict the |
17 |
use of hardlinks, such as overlay filesystems. Therefore, users will |
18 |
have to set "sync-allow-hardlinks = no" in repos.conf if they have |
19 |
a configuration that prevents the use of hardlinks, but this should |
20 |
not be very common. |
21 |
|
22 |
Bug: https://bugs.gentoo.org/660410 |
23 |
--- |
24 |
[PATCH v2] makes it possible to disable the new behavior by setting |
25 |
"sync-allow-hardlinks = no" in repos.conf |
26 |
|
27 |
cnf/repos.conf | 1 + |
28 |
man/portage.5 | 8 +++ |
29 |
pym/portage/repository/config.py | 8 ++- |
30 |
pym/portage/sync/modules/rsync/rsync.py | 89 ++++++++++++++++++++++++++++++--- |
31 |
4 files changed, 97 insertions(+), 9 deletions(-) |
32 |
|
33 |
diff --git a/cnf/repos.conf b/cnf/repos.conf |
34 |
index 352073cfd5..419f6d1182 100644 |
35 |
--- a/cnf/repos.conf |
36 |
+++ b/cnf/repos.conf |
37 |
@@ -6,6 +6,7 @@ location = /usr/portage |
38 |
sync-type = rsync |
39 |
sync-uri = rsync://rsync.gentoo.org/gentoo-portage |
40 |
auto-sync = yes |
41 |
+sync-allow-hardlinks = yes |
42 |
sync-rsync-verify-jobs = 1 |
43 |
sync-rsync-verify-metamanifest = yes |
44 |
sync-rsync-verify-max-age = 24 |
45 |
diff --git a/man/portage.5 b/man/portage.5 |
46 |
index 5adb07d821..acc80791be 100644 |
47 |
--- a/man/portage.5 |
48 |
+++ b/man/portage.5 |
49 |
@@ -973,6 +973,14 @@ files). Defaults to true. |
50 |
.br |
51 |
Valid values: true, false. |
52 |
.TP |
53 |
+.B sync\-allow\-hardlinks = yes|no |
54 |
+Allow sync plugins to use hardlinks in order to ensure that a repository |
55 |
+remains in a valid state if something goes wrong during the sync operation. |
56 |
+For example, if signature verification fails during a sync operation, |
57 |
+the previous state of the repository will be preserved. This option may |
58 |
+conflict with configurations that restrict the use of hardlinks, such as |
59 |
+overlay filesystems. |
60 |
+.TP |
61 |
.B sync\-cvs\-repo |
62 |
Specifies CVS repository. |
63 |
.TP |
64 |
diff --git a/pym/portage/repository/config.py b/pym/portage/repository/config.py |
65 |
index 1d897bb903..c7440369c2 100644 |
66 |
--- a/pym/portage/repository/config.py |
67 |
+++ b/pym/portage/repository/config.py |
68 |
@@ -86,6 +86,7 @@ class RepoConfig(object): |
69 |
'sync_type', 'sync_umask', 'sync_uri', 'sync_user', 'thin_manifest', |
70 |
'update_changelog', '_eapis_banned', '_eapis_deprecated', |
71 |
'_masters_orig', 'module_specific_options', 'manifest_required_hashes', |
72 |
+ 'sync_allow_hardlinks', |
73 |
'sync_openpgp_key_path', |
74 |
'sync_openpgp_key_refresh_retry_count', |
75 |
'sync_openpgp_key_refresh_retry_delay_max', |
76 |
@@ -188,6 +189,9 @@ class RepoConfig(object): |
77 |
self.strict_misc_digests = repo_opts.get( |
78 |
'strict-misc-digests', 'true').lower() == 'true' |
79 |
|
80 |
+ self.sync_allow_hardlinks = repo_opts.get( |
81 |
+ 'sync-allow-hardlinks', 'true').lower() in ('true', 'yes') |
82 |
+ |
83 |
self.sync_openpgp_key_path = repo_opts.get( |
84 |
'sync-openpgp-key-path', None) |
85 |
|
86 |
@@ -534,6 +538,7 @@ class RepoConfigLoader(object): |
87 |
'clone_depth', 'eclass_overrides', |
88 |
'force', 'masters', 'priority', 'strict_misc_digests', |
89 |
'sync_depth', 'sync_hooks_only_on_change', |
90 |
+ 'sync_allow_hardlinks', |
91 |
'sync_openpgp_key_path', |
92 |
'sync_openpgp_key_refresh_retry_count', |
93 |
'sync_openpgp_key_refresh_retry_delay_max', |
94 |
@@ -962,7 +967,8 @@ class RepoConfigLoader(object): |
95 |
def config_string(self): |
96 |
bool_keys = ("strict_misc_digests",) |
97 |
str_or_int_keys = ("auto_sync", "clone_depth", "format", "location", |
98 |
- "main_repo", "priority", "sync_depth", "sync_openpgp_key_path", |
99 |
+ "main_repo", "priority", "sync_depth", |
100 |
+ "sync_allow_hardlinks", "sync_openpgp_key_path", |
101 |
"sync_openpgp_key_refresh_retry_count", |
102 |
"sync_openpgp_key_refresh_retry_delay_max", |
103 |
"sync_openpgp_key_refresh_retry_delay_exp_base", |
104 |
diff --git a/pym/portage/sync/modules/rsync/rsync.py b/pym/portage/sync/modules/rsync/rsync.py |
105 |
index 382a1eaaef..59211d2bb8 100644 |
106 |
--- a/pym/portage/sync/modules/rsync/rsync.py |
107 |
+++ b/pym/portage/sync/modules/rsync/rsync.py |
108 |
@@ -11,6 +11,7 @@ import functools |
109 |
import io |
110 |
import re |
111 |
import random |
112 |
+import subprocess |
113 |
import tempfile |
114 |
|
115 |
import portage |
116 |
@@ -61,6 +62,56 @@ class RsyncSync(NewBase): |
117 |
def __init__(self): |
118 |
NewBase.__init__(self, "rsync", RSYNC_PACKAGE_ATOM) |
119 |
|
120 |
+ def _select_download_dir(self): |
121 |
+ ''' |
122 |
+ Select and return the download directory. It's desirable to be able |
123 |
+ to create shared hardlinks between the download directory to the |
124 |
+ normal repository, and this is facilitated by making the download |
125 |
+ directory be a subdirectory of the normal repository location |
126 |
+ (ensuring that no mountpoints are crossed). Shared hardlinks are |
127 |
+ created by using the rsync --link-dest option. |
128 |
+ |
129 |
+ Since the download is initially unverified, it is safest to save |
130 |
+ it in a quarantine directory. The quarantine directory is also |
131 |
+ useful for making the repository update more atomic, so that it |
132 |
+ less likely that normal repository location will be observed in |
133 |
+ a partially synced state. |
134 |
+ |
135 |
+ This method returns a quarantine directory if sync-allow-hardlinks |
136 |
+ is enabled in repos.conf, and otherwise it returne the normal |
137 |
+ repository location. |
138 |
+ ''' |
139 |
+ if self.repo.sync_allow_hardlinks: |
140 |
+ return os.path.join(self.repo.location, '.tmp-unverified-download-quarantine') |
141 |
+ else: |
142 |
+ return self.repo.location |
143 |
+ |
144 |
+ def _commit_download(self, download_dir): |
145 |
+ ''' |
146 |
+ Commit changes from download_dir if it does not refer to the |
147 |
+ normal repository location. |
148 |
+ ''' |
149 |
+ exitcode = 0 |
150 |
+ if self.repo.location != download_dir: |
151 |
+ rsynccommand = [self.bin_command] + self.rsync_opts + self.extra_rsync_opts |
152 |
+ rsynccommand.append('--exclude=/%s' % os.path.basename(download_dir)) |
153 |
+ rsynccommand.append('%s/' % download_dir.rstrip('/')) |
154 |
+ rsynccommand.append('%s/' % self.repo.location) |
155 |
+ exitcode = subprocess.call(rsynccommand) |
156 |
+ if exitcode == 0: |
157 |
+ exitcode = self._remove_download(download_dir) |
158 |
+ |
159 |
+ return exitcode |
160 |
+ |
161 |
+ def _remove_download(self, download_dir): |
162 |
+ """ |
163 |
+ Remove download_dir if it does not refer to the normal repository |
164 |
+ location. |
165 |
+ """ |
166 |
+ exitcode = 0 |
167 |
+ if self.repo.location != download_dir: |
168 |
+ exitcode = subprocess.call(['rm', '-rf', download_dir]) |
169 |
+ return exitcode |
170 |
|
171 |
def update(self): |
172 |
'''Internal update function which performs the transfer''' |
173 |
@@ -97,6 +148,9 @@ class RsyncSync(NewBase): |
174 |
self.extra_rsync_opts.extend(portage.util.shlex_split( |
175 |
self.repo.module_specific_options['sync-rsync-extra-opts'])) |
176 |
|
177 |
+ download_dir = self._select_download_dir() |
178 |
+ exitcode = 0 |
179 |
+ |
180 |
# Process GLEP74 verification options. |
181 |
# Default verification to 'no'; it's enabled for ::gentoo |
182 |
# via default repos.conf though. |
183 |
@@ -219,8 +273,10 @@ class RsyncSync(NewBase): |
184 |
self.proto = "file" |
185 |
dosyncuri = syncuri[7:] |
186 |
unchanged, is_synced, exitcode, updatecache_flg = self._do_rsync( |
187 |
- dosyncuri, timestamp, opts) |
188 |
+ dosyncuri, timestamp, opts, download_dir) |
189 |
self._process_exitcode(exitcode, dosyncuri, out, 1) |
190 |
+ if exitcode == 0 and not unchanged: |
191 |
+ self._commit_download(download_dir) |
192 |
return (exitcode, updatecache_flg) |
193 |
|
194 |
retries=0 |
195 |
@@ -352,7 +408,7 @@ class RsyncSync(NewBase): |
196 |
dosyncuri = dosyncuri[6:].replace('/', ':/', 1) |
197 |
|
198 |
unchanged, is_synced, exitcode, updatecache_flg = self._do_rsync( |
199 |
- dosyncuri, timestamp, opts) |
200 |
+ dosyncuri, timestamp, opts, download_dir) |
201 |
if not unchanged: |
202 |
local_state_unchanged = False |
203 |
if is_synced: |
204 |
@@ -369,6 +425,12 @@ class RsyncSync(NewBase): |
205 |
break |
206 |
self._process_exitcode(exitcode, dosyncuri, out, maxretries) |
207 |
|
208 |
+ if local_state_unchanged: |
209 |
+ # The quarantine download_dir is not intended to exist |
210 |
+ # in this case, so refer gemato to the normal repository |
211 |
+ # location. |
212 |
+ download_dir = self.repo.location |
213 |
+ |
214 |
# if synced successfully, verify now |
215 |
if exitcode == 0 and self.verify_metamanifest: |
216 |
if gemato is None: |
217 |
@@ -380,7 +442,7 @@ class RsyncSync(NewBase): |
218 |
# we always verify the Manifest signature, in case |
219 |
# we had to deal with key revocation case |
220 |
m = gemato.recursiveloader.ManifestRecursiveLoader( |
221 |
- os.path.join(self.repo.location, 'Manifest'), |
222 |
+ os.path.join(download_dir, 'Manifest'), |
223 |
verify_openpgp=True, |
224 |
openpgp_env=openpgp_env, |
225 |
max_jobs=self.verify_jobs) |
226 |
@@ -411,7 +473,7 @@ class RsyncSync(NewBase): |
227 |
# if nothing has changed, skip the actual Manifest |
228 |
# verification |
229 |
if not local_state_unchanged: |
230 |
- out.ebegin('Verifying %s' % (self.repo.location,)) |
231 |
+ out.ebegin('Verifying %s' % (download_dir,)) |
232 |
m.assert_directory_verifies() |
233 |
out.eend(0) |
234 |
except GematoException as e: |
235 |
@@ -420,12 +482,16 @@ class RsyncSync(NewBase): |
236 |
level=logging.ERROR, noiselevel=-1) |
237 |
exitcode = 1 |
238 |
|
239 |
+ if exitcode == 0 and not local_state_unchanged: |
240 |
+ exitcode = self._commit_download(download_dir) |
241 |
+ |
242 |
return (exitcode, updatecache_flg) |
243 |
finally: |
244 |
+ if exitcode == 0: |
245 |
+ self._remove_download(download_dir) |
246 |
if openpgp_env is not None: |
247 |
openpgp_env.close() |
248 |
|
249 |
- |
250 |
def _process_exitcode(self, exitcode, syncuri, out, maxretries): |
251 |
if (exitcode==0): |
252 |
pass |
253 |
@@ -561,7 +627,7 @@ class RsyncSync(NewBase): |
254 |
return rsync_opts |
255 |
|
256 |
|
257 |
- def _do_rsync(self, syncuri, timestamp, opts): |
258 |
+ def _do_rsync(self, syncuri, timestamp, opts, download_dir): |
259 |
updatecache_flg = False |
260 |
is_synced = False |
261 |
if timestamp != 0 and "--quiet" not in opts: |
262 |
@@ -686,6 +752,12 @@ class RsyncSync(NewBase): |
263 |
elif (servertimestamp == 0) or (servertimestamp > timestamp): |
264 |
# actual sync |
265 |
command = rsynccommand[:] |
266 |
+ |
267 |
+ if self.repo.location != download_dir: |
268 |
+ # Use shared hardlinks for files that are identical |
269 |
+ # in the previous snapshot of the repository. |
270 |
+ command.append('--link-dest=%s' % self.repo.location) |
271 |
+ |
272 |
submodule_paths = self._get_submodule_paths() |
273 |
if submodule_paths: |
274 |
# The only way to select multiple directories to |
275 |
@@ -696,9 +768,10 @@ class RsyncSync(NewBase): |
276 |
# /./ is special syntax supported with the |
277 |
# rsync --relative option. |
278 |
command.append(syncuri + "/./" + path) |
279 |
- command.append(self.repo.location) |
280 |
else: |
281 |
- command.extend([syncuri + "/", self.repo.location]) |
282 |
+ command.append(syncuri + "/") |
283 |
+ |
284 |
+ command.append(download_dir) |
285 |
|
286 |
exitcode = None |
287 |
try: |
288 |
-- |
289 |
2.13.6 |