1 |
commit: 84822ef7a21494d3f044c2ffa7b112e4d29665ab |
2 |
Author: Zac Medico <zmedico <AT> gentoo <DOT> org> |
3 |
AuthorDate: Thu Jul 5 13:10:43 2018 +0000 |
4 |
Commit: Zac Medico <zmedico <AT> gentoo <DOT> org> |
5 |
CommitDate: Tue Jul 10 05:03:53 2018 +0000 |
6 |
URL: https://gitweb.gentoo.org/proj/portage.git/commit/?id=84822ef7 |
7 |
|
8 |
rsync: quarantine data prior to verification (bug 660410) |
9 |
|
10 |
Sync into a quarantine subdirectory, using the rsync --link-dest option |
11 |
to create hardlinks to identical files in the previous snapshot of the |
12 |
repository. If hardlinks are not supported, then show a warning message |
13 |
and sync directly to the normal repository location. |
14 |
|
15 |
If verification succeeds, then the quarantine subdirectory is synced |
16 |
to the normal repository location, and the quarantine subdirectory |
17 |
is deleted. If verification fails, then the quarantine directory is |
18 |
preserved for purposes of analysis. |
19 |
|
20 |
Even if verification happens to be disabled, the quarantine directory |
21 |
is still useful for making the repository update more atomic, so that |
22 |
it is less likely that normal repository location will be observed in |
23 |
a partially synced state. |
24 |
|
25 |
The new behavior may conflict with configurations that restrict the |
26 |
use of hardlinks, such as overlay filesystems. Therefore, users will |
27 |
have to set "sync-allow-hardlinks = no" in repos.conf if they have |
28 |
a configuration that prevents the use of hardlinks, but this should |
29 |
not be very common. |
30 |
|
31 |
Bug: https://bugs.gentoo.org/660410 |
32 |
|
33 |
cnf/repos.conf | 1 + |
34 |
man/portage.5 | 8 +++ |
35 |
pym/portage/repository/config.py | 7 ++- |
36 |
pym/portage/sync/modules/rsync/rsync.py | 87 ++++++++++++++++++++++++++++++--- |
37 |
4 files changed, 94 insertions(+), 9 deletions(-) |
38 |
|
39 |
diff --git a/cnf/repos.conf b/cnf/repos.conf |
40 |
index 352073cfd..419f6d118 100644 |
41 |
--- a/cnf/repos.conf |
42 |
+++ b/cnf/repos.conf |
43 |
@@ -6,6 +6,7 @@ location = /usr/portage |
44 |
sync-type = rsync |
45 |
sync-uri = rsync://rsync.gentoo.org/gentoo-portage |
46 |
auto-sync = yes |
47 |
+sync-allow-hardlinks = yes |
48 |
sync-rsync-verify-jobs = 1 |
49 |
sync-rsync-verify-metamanifest = yes |
50 |
sync-rsync-verify-max-age = 24 |
51 |
|
52 |
diff --git a/man/portage.5 b/man/portage.5 |
53 |
index 5adb07d82..acc80791b 100644 |
54 |
--- a/man/portage.5 |
55 |
+++ b/man/portage.5 |
56 |
@@ -973,6 +973,14 @@ files). Defaults to true. |
57 |
.br |
58 |
Valid values: true, false. |
59 |
.TP |
60 |
+.B sync\-allow\-hardlinks = yes|no |
61 |
+Allow sync plugins to use hardlinks in order to ensure that a repository |
62 |
+remains in a valid state if something goes wrong during the sync operation. |
63 |
+For example, if signature verification fails during a sync operation, |
64 |
+the previous state of the repository will be preserved. This option may |
65 |
+conflict with configurations that restrict the use of hardlinks, such as |
66 |
+overlay filesystems. |
67 |
+.TP |
68 |
.B sync\-cvs\-repo |
69 |
Specifies CVS repository. |
70 |
.TP |
71 |
|
72 |
diff --git a/pym/portage/repository/config.py b/pym/portage/repository/config.py |
73 |
index 1d897bb90..ad7ae9d18 100644 |
74 |
--- a/pym/portage/repository/config.py |
75 |
+++ b/pym/portage/repository/config.py |
76 |
@@ -86,6 +86,7 @@ class RepoConfig(object): |
77 |
'sync_type', 'sync_umask', 'sync_uri', 'sync_user', 'thin_manifest', |
78 |
'update_changelog', '_eapis_banned', '_eapis_deprecated', |
79 |
'_masters_orig', 'module_specific_options', 'manifest_required_hashes', |
80 |
+ 'sync_allow_hardlinks', |
81 |
'sync_openpgp_key_path', |
82 |
'sync_openpgp_key_refresh_retry_count', |
83 |
'sync_openpgp_key_refresh_retry_delay_max', |
84 |
@@ -188,6 +189,9 @@ class RepoConfig(object): |
85 |
self.strict_misc_digests = repo_opts.get( |
86 |
'strict-misc-digests', 'true').lower() == 'true' |
87 |
|
88 |
+ self.sync_allow_hardlinks = repo_opts.get( |
89 |
+ 'sync-allow-hardlinks', 'true').lower() in ('true', 'yes') |
90 |
+ |
91 |
self.sync_openpgp_key_path = repo_opts.get( |
92 |
'sync-openpgp-key-path', None) |
93 |
|
94 |
@@ -534,6 +538,7 @@ class RepoConfigLoader(object): |
95 |
'clone_depth', 'eclass_overrides', |
96 |
'force', 'masters', 'priority', 'strict_misc_digests', |
97 |
'sync_depth', 'sync_hooks_only_on_change', |
98 |
+ 'sync_allow_hardlinks', |
99 |
'sync_openpgp_key_path', |
100 |
'sync_openpgp_key_refresh_retry_count', |
101 |
'sync_openpgp_key_refresh_retry_delay_max', |
102 |
@@ -960,7 +965,7 @@ class RepoConfigLoader(object): |
103 |
return repo_name in self.prepos |
104 |
|
105 |
def config_string(self): |
106 |
- bool_keys = ("strict_misc_digests",) |
107 |
+ bool_keys = ("strict_misc_digests", "sync_allow_hardlinks") |
108 |
str_or_int_keys = ("auto_sync", "clone_depth", "format", "location", |
109 |
"main_repo", "priority", "sync_depth", "sync_openpgp_key_path", |
110 |
"sync_openpgp_key_refresh_retry_count", |
111 |
|
112 |
diff --git a/pym/portage/sync/modules/rsync/rsync.py b/pym/portage/sync/modules/rsync/rsync.py |
113 |
index a715e2818..fb1960a3c 100644 |
114 |
--- a/pym/portage/sync/modules/rsync/rsync.py |
115 |
+++ b/pym/portage/sync/modules/rsync/rsync.py |
116 |
@@ -10,6 +10,7 @@ import datetime |
117 |
import io |
118 |
import re |
119 |
import random |
120 |
+import subprocess |
121 |
import tempfile |
122 |
|
123 |
import portage |
124 |
@@ -58,6 +59,54 @@ class RsyncSync(NewBase): |
125 |
def __init__(self): |
126 |
NewBase.__init__(self, "rsync", RSYNC_PACKAGE_ATOM) |
127 |
|
128 |
+ def _select_download_dir(self): |
129 |
+ ''' |
130 |
+ Select and return the download directory. It's desirable to be able |
131 |
+ to create shared hardlinks between the download directory to the |
132 |
+ normal repository, and this is facilitated by making the download |
133 |
+ directory be a subdirectory of the normal repository location |
134 |
+ (ensuring that no mountpoints are crossed). Shared hardlinks are |
135 |
+ created by using the rsync --link-dest option. |
136 |
+ |
137 |
+ Since the download is initially unverified, it is safest to save |
138 |
+ it in a quarantine directory. The quarantine directory is also |
139 |
+ useful for making the repository update more atomic, so that it |
140 |
+ less likely that normal repository location will be observed in |
141 |
+ a partially synced state. |
142 |
+ |
143 |
+ This method returns a quarantine directory if sync-allow-hardlinks |
144 |
+ is enabled in repos.conf, and otherwise it returne the normal |
145 |
+ repository location. |
146 |
+ ''' |
147 |
+ if self.repo.sync_allow_hardlinks: |
148 |
+ return os.path.join(self.repo.location, '.tmp-unverified-download-quarantine') |
149 |
+ else: |
150 |
+ return self.repo.location |
151 |
+ |
152 |
+ def _commit_download(self, download_dir): |
153 |
+ ''' |
154 |
+ Commit changes from download_dir if it does not refer to the |
155 |
+ normal repository location. |
156 |
+ ''' |
157 |
+ exitcode = 0 |
158 |
+ if self.repo.location != download_dir: |
159 |
+ rsynccommand = [self.bin_command] + self.rsync_opts + self.extra_rsync_opts |
160 |
+ rsynccommand.append('--exclude=/%s' % os.path.basename(download_dir)) |
161 |
+ rsynccommand.append('%s/' % download_dir.rstrip('/')) |
162 |
+ rsynccommand.append('%s/' % self.repo.location) |
163 |
+ exitcode = subprocess.call(rsynccommand) |
164 |
+ |
165 |
+ return exitcode |
166 |
+ |
167 |
+ def _remove_download(self, download_dir): |
168 |
+ """ |
169 |
+ Remove download_dir if it does not refer to the normal repository |
170 |
+ location. |
171 |
+ """ |
172 |
+ exitcode = 0 |
173 |
+ if self.repo.location != download_dir: |
174 |
+ exitcode = subprocess.call(['rm', '-rf', download_dir]) |
175 |
+ return exitcode |
176 |
|
177 |
def update(self): |
178 |
'''Internal update function which performs the transfer''' |
179 |
@@ -94,6 +143,9 @@ class RsyncSync(NewBase): |
180 |
self.extra_rsync_opts.extend(portage.util.shlex_split( |
181 |
self.repo.module_specific_options['sync-rsync-extra-opts'])) |
182 |
|
183 |
+ download_dir = self._select_download_dir() |
184 |
+ exitcode = 0 |
185 |
+ |
186 |
# Process GLEP74 verification options. |
187 |
# Default verification to 'no'; it's enabled for ::gentoo |
188 |
# via default repos.conf though. |
189 |
@@ -188,8 +240,10 @@ class RsyncSync(NewBase): |
190 |
self.proto = "file" |
191 |
dosyncuri = syncuri[7:] |
192 |
unchanged, is_synced, exitcode, updatecache_flg = self._do_rsync( |
193 |
- dosyncuri, timestamp, opts) |
194 |
+ dosyncuri, timestamp, opts, download_dir) |
195 |
self._process_exitcode(exitcode, dosyncuri, out, 1) |
196 |
+ if exitcode == 0 and not unchanged: |
197 |
+ self._commit_download(download_dir) |
198 |
return (exitcode, updatecache_flg) |
199 |
|
200 |
retries=0 |
201 |
@@ -321,7 +375,7 @@ class RsyncSync(NewBase): |
202 |
dosyncuri = dosyncuri[6:].replace('/', ':/', 1) |
203 |
|
204 |
unchanged, is_synced, exitcode, updatecache_flg = self._do_rsync( |
205 |
- dosyncuri, timestamp, opts) |
206 |
+ dosyncuri, timestamp, opts, download_dir) |
207 |
if not unchanged: |
208 |
local_state_unchanged = False |
209 |
if is_synced: |
210 |
@@ -338,6 +392,12 @@ class RsyncSync(NewBase): |
211 |
break |
212 |
self._process_exitcode(exitcode, dosyncuri, out, maxretries) |
213 |
|
214 |
+ if local_state_unchanged: |
215 |
+ # The quarantine download_dir is not intended to exist |
216 |
+ # in this case, so refer gemato to the normal repository |
217 |
+ # location. |
218 |
+ download_dir = self.repo.location |
219 |
+ |
220 |
# if synced successfully, verify now |
221 |
if exitcode == 0 and self.verify_metamanifest: |
222 |
if gemato is None: |
223 |
@@ -349,7 +409,7 @@ class RsyncSync(NewBase): |
224 |
# we always verify the Manifest signature, in case |
225 |
# we had to deal with key revocation case |
226 |
m = gemato.recursiveloader.ManifestRecursiveLoader( |
227 |
- os.path.join(self.repo.location, 'Manifest'), |
228 |
+ os.path.join(download_dir, 'Manifest'), |
229 |
verify_openpgp=True, |
230 |
openpgp_env=openpgp_env, |
231 |
max_jobs=self.verify_jobs) |
232 |
@@ -380,7 +440,7 @@ class RsyncSync(NewBase): |
233 |
# if nothing has changed, skip the actual Manifest |
234 |
# verification |
235 |
if not local_state_unchanged: |
236 |
- out.ebegin('Verifying %s' % (self.repo.location,)) |
237 |
+ out.ebegin('Verifying %s' % (download_dir,)) |
238 |
m.assert_directory_verifies() |
239 |
out.eend(0) |
240 |
except GematoException as e: |
241 |
@@ -389,12 +449,16 @@ class RsyncSync(NewBase): |
242 |
level=logging.ERROR, noiselevel=-1) |
243 |
exitcode = 1 |
244 |
|
245 |
+ if exitcode == 0 and not local_state_unchanged: |
246 |
+ exitcode = self._commit_download(download_dir) |
247 |
+ |
248 |
return (exitcode, updatecache_flg) |
249 |
finally: |
250 |
+ if exitcode == 0: |
251 |
+ self._remove_download(download_dir) |
252 |
if openpgp_env is not None: |
253 |
openpgp_env.close() |
254 |
|
255 |
- |
256 |
def _process_exitcode(self, exitcode, syncuri, out, maxretries): |
257 |
if (exitcode==0): |
258 |
pass |
259 |
@@ -530,7 +594,7 @@ class RsyncSync(NewBase): |
260 |
return rsync_opts |
261 |
|
262 |
|
263 |
- def _do_rsync(self, syncuri, timestamp, opts): |
264 |
+ def _do_rsync(self, syncuri, timestamp, opts, download_dir): |
265 |
updatecache_flg = False |
266 |
is_synced = False |
267 |
if timestamp != 0 and "--quiet" not in opts: |
268 |
@@ -655,6 +719,12 @@ class RsyncSync(NewBase): |
269 |
elif (servertimestamp == 0) or (servertimestamp > timestamp): |
270 |
# actual sync |
271 |
command = rsynccommand[:] |
272 |
+ |
273 |
+ if self.repo.location != download_dir: |
274 |
+ # Use shared hardlinks for files that are identical |
275 |
+ # in the previous snapshot of the repository. |
276 |
+ command.append('--link-dest=%s' % self.repo.location) |
277 |
+ |
278 |
submodule_paths = self._get_submodule_paths() |
279 |
if submodule_paths: |
280 |
# The only way to select multiple directories to |
281 |
@@ -665,9 +735,10 @@ class RsyncSync(NewBase): |
282 |
# /./ is special syntax supported with the |
283 |
# rsync --relative option. |
284 |
command.append(syncuri + "/./" + path) |
285 |
- command.append(self.repo.location) |
286 |
else: |
287 |
- command.extend([syncuri + "/", self.repo.location]) |
288 |
+ command.append(syncuri + "/") |
289 |
+ |
290 |
+ command.append(download_dir) |
291 |
|
292 |
exitcode = None |
293 |
try: |