1 |
Sync into a quarantine subdirectory, using the rsync --link-dest option |
2 |
to create hardlinks to identical files in the previous snapshot of the |
3 |
repository. If hardlinks are not supported, then show a warning message |
4 |
and sync directly to the normal repository location. |
5 |
|
6 |
If verification succeeds, then the quarantine subdirectory is synced |
7 |
to the normal repository location, and the quarantine subdirectory |
8 |
is deleted. If verification fails, then the quarantine directory is |
9 |
preserved for purposes of analysis. |
10 |
|
11 |
Even if verification happens to be disabled, the quarantine directory |
12 |
is still useful for making the repository update more atomic, so that |
13 |
it is less likely that normal repository location will be observed in |
14 |
a partially synced state. |
15 |
|
16 |
Bug: https://bugs.gentoo.org/660410 |
17 |
--- |
18 |
pym/portage/sync/modules/rsync/rsync.py | 101 +++++++++++++++++++++++++++++--- |
19 |
1 file changed, 93 insertions(+), 8 deletions(-) |
20 |
|
21 |
diff --git a/pym/portage/sync/modules/rsync/rsync.py b/pym/portage/sync/modules/rsync/rsync.py |
22 |
index 382a1eaaef..9334c8a791 100644 |
23 |
--- a/pym/portage/sync/modules/rsync/rsync.py |
24 |
+++ b/pym/portage/sync/modules/rsync/rsync.py |
25 |
@@ -11,6 +11,7 @@ import functools |
26 |
import io |
27 |
import re |
28 |
import random |
29 |
+import subprocess |
30 |
import tempfile |
31 |
|
32 |
import portage |
33 |
@@ -61,6 +62,68 @@ class RsyncSync(NewBase): |
34 |
def __init__(self): |
35 |
NewBase.__init__(self, "rsync", RSYNC_PACKAGE_ATOM) |
36 |
|
37 |
+ def _select_download_dir(self): |
38 |
+ ''' |
39 |
+ Select and return the download directory. It's desirable to be able |
40 |
+ to create shared hardlinks between the download directory to the |
41 |
+ normal repository, and this is facilitated by making the download |
42 |
+ directory be a subdirectory of the normal repository location |
43 |
+ (ensuring that no mountpoints are crossed). Shared hardlinks are |
44 |
+ created by using the rsync --link-dest option. |
45 |
+ |
46 |
+ Since the download is initially unverified, it is safest to save |
47 |
+ it in a quarantine directory. The quarantine directory is also |
48 |
+ useful for making the repository update more atomic, so that it |
49 |
+ less likely that normal repository location will be observed in |
50 |
+ a partially synced state. |
51 |
+ |
52 |
+ This method tests if it is possible to create hardlinks in the |
53 |
+ repository directory, and if that fails then it issues a warning |
54 |
+ message and returns the normal repository location. |
55 |
+ ''' |
56 |
+ with tempfile.NamedTemporaryFile(dir=self.repo.location, |
57 |
+ prefix='.tmp', suffix='-portage-hardlink-test') as f: |
58 |
+ hardlink = f.name + '-hardlink' |
59 |
+ try: |
60 |
+ os.link(f.name, hardlink) |
61 |
+ except OSError as e: |
62 |
+ writemsg_level("!!! Syncing directly to '%s' because hardlink creation failed: %s\n" % |
63 |
+ (self.repo.location, e), level=logging.WARNING, noiselevel=-1) |
64 |
+ return self.repo.location |
65 |
+ finally: |
66 |
+ try: |
67 |
+ os.unlink(hardlink) |
68 |
+ except OSError: |
69 |
+ pass |
70 |
+ |
71 |
+ return os.path.join(self.repo.location, '.tmp-unverified-download-quarantine') |
72 |
+ |
73 |
+ def _commit_download(self, download_dir): |
74 |
+ ''' |
75 |
+ Commit changes from download_dir if it does not refer to the |
76 |
+ normal repository location. |
77 |
+ ''' |
78 |
+ exitcode = 0 |
79 |
+ if self.repo.location != download_dir: |
80 |
+ rsynccommand = [self.bin_command] + self.rsync_opts + self.extra_rsync_opts |
81 |
+ rsynccommand.append('--exclude=/%s' % os.path.basename(download_dir)) |
82 |
+ rsynccommand.append('%s/' % download_dir.rstrip('/')) |
83 |
+ rsynccommand.append('%s/' % self.repo.location) |
84 |
+ exitcode = subprocess.call(rsynccommand) |
85 |
+ if exitcode == 0: |
86 |
+ exitcode = self._remove_download(download_dir) |
87 |
+ |
88 |
+ return exitcode |
89 |
+ |
90 |
+ def _remove_download(self, download_dir): |
91 |
+ """ |
92 |
+ Remove download_dir if it does not refer to the normal repository |
93 |
+ location. |
94 |
+ """ |
95 |
+ exitcode = 0 |
96 |
+ if self.repo.location != download_dir: |
97 |
+ exitcode = subprocess.call(['rm', '-rf', download_dir]) |
98 |
+ return exitcode |
99 |
|
100 |
def update(self): |
101 |
'''Internal update function which performs the transfer''' |
102 |
@@ -97,6 +160,9 @@ class RsyncSync(NewBase): |
103 |
self.extra_rsync_opts.extend(portage.util.shlex_split( |
104 |
self.repo.module_specific_options['sync-rsync-extra-opts'])) |
105 |
|
106 |
+ download_dir = self._select_download_dir() |
107 |
+ exitcode = 0 |
108 |
+ |
109 |
# Process GLEP74 verification options. |
110 |
# Default verification to 'no'; it's enabled for ::gentoo |
111 |
# via default repos.conf though. |
112 |
@@ -219,8 +285,10 @@ class RsyncSync(NewBase): |
113 |
self.proto = "file" |
114 |
dosyncuri = syncuri[7:] |
115 |
unchanged, is_synced, exitcode, updatecache_flg = self._do_rsync( |
116 |
- dosyncuri, timestamp, opts) |
117 |
+ dosyncuri, timestamp, opts, download_dir) |
118 |
self._process_exitcode(exitcode, dosyncuri, out, 1) |
119 |
+ if exitcode == 0 and not unchanged: |
120 |
+ self._commit_download(download_dir) |
121 |
return (exitcode, updatecache_flg) |
122 |
|
123 |
retries=0 |
124 |
@@ -352,7 +420,7 @@ class RsyncSync(NewBase): |
125 |
dosyncuri = dosyncuri[6:].replace('/', ':/', 1) |
126 |
|
127 |
unchanged, is_synced, exitcode, updatecache_flg = self._do_rsync( |
128 |
- dosyncuri, timestamp, opts) |
129 |
+ dosyncuri, timestamp, opts, download_dir) |
130 |
if not unchanged: |
131 |
local_state_unchanged = False |
132 |
if is_synced: |
133 |
@@ -369,6 +437,12 @@ class RsyncSync(NewBase): |
134 |
break |
135 |
self._process_exitcode(exitcode, dosyncuri, out, maxretries) |
136 |
|
137 |
+ if local_state_unchanged: |
138 |
+ # The quarantine download_dir is not intended to exist |
139 |
+ # in this case, so refer gemato to the normal repository |
140 |
+ # location. |
141 |
+ download_dir = self.repo.location |
142 |
+ |
143 |
# if synced successfully, verify now |
144 |
if exitcode == 0 and self.verify_metamanifest: |
145 |
if gemato is None: |
146 |
@@ -380,7 +454,7 @@ class RsyncSync(NewBase): |
147 |
# we always verify the Manifest signature, in case |
148 |
# we had to deal with key revocation case |
149 |
m = gemato.recursiveloader.ManifestRecursiveLoader( |
150 |
- os.path.join(self.repo.location, 'Manifest'), |
151 |
+ os.path.join(download_dir, 'Manifest'), |
152 |
verify_openpgp=True, |
153 |
openpgp_env=openpgp_env, |
154 |
max_jobs=self.verify_jobs) |
155 |
@@ -411,7 +485,7 @@ class RsyncSync(NewBase): |
156 |
# if nothing has changed, skip the actual Manifest |
157 |
# verification |
158 |
if not local_state_unchanged: |
159 |
- out.ebegin('Verifying %s' % (self.repo.location,)) |
160 |
+ out.ebegin('Verifying %s' % (download_dir,)) |
161 |
m.assert_directory_verifies() |
162 |
out.eend(0) |
163 |
except GematoException as e: |
164 |
@@ -420,12 +494,16 @@ class RsyncSync(NewBase): |
165 |
level=logging.ERROR, noiselevel=-1) |
166 |
exitcode = 1 |
167 |
|
168 |
+ if exitcode == 0 and not local_state_unchanged: |
169 |
+ exitcode = self._commit_download(download_dir) |
170 |
+ |
171 |
return (exitcode, updatecache_flg) |
172 |
finally: |
173 |
+ if exitcode == 0: |
174 |
+ self._remove_download(download_dir) |
175 |
if openpgp_env is not None: |
176 |
openpgp_env.close() |
177 |
|
178 |
- |
179 |
def _process_exitcode(self, exitcode, syncuri, out, maxretries): |
180 |
if (exitcode==0): |
181 |
pass |
182 |
@@ -561,7 +639,7 @@ class RsyncSync(NewBase): |
183 |
return rsync_opts |
184 |
|
185 |
|
186 |
- def _do_rsync(self, syncuri, timestamp, opts): |
187 |
+ def _do_rsync(self, syncuri, timestamp, opts, download_dir): |
188 |
updatecache_flg = False |
189 |
is_synced = False |
190 |
if timestamp != 0 and "--quiet" not in opts: |
191 |
@@ -686,6 +764,12 @@ class RsyncSync(NewBase): |
192 |
elif (servertimestamp == 0) or (servertimestamp > timestamp): |
193 |
# actual sync |
194 |
command = rsynccommand[:] |
195 |
+ |
196 |
+ if self.repo.location != download_dir: |
197 |
+ # Use shared hardlinks for files that are identical |
198 |
+ # in the previous snapshot of the repository. |
199 |
+ command.append('--link-dest=%s' % self.repo.location) |
200 |
+ |
201 |
submodule_paths = self._get_submodule_paths() |
202 |
if submodule_paths: |
203 |
# The only way to select multiple directories to |
204 |
@@ -696,9 +780,10 @@ class RsyncSync(NewBase): |
205 |
# /./ is special syntax supported with the |
206 |
# rsync --relative option. |
207 |
command.append(syncuri + "/./" + path) |
208 |
- command.append(self.repo.location) |
209 |
else: |
210 |
- command.extend([syncuri + "/", self.repo.location]) |
211 |
+ command.append(syncuri + "/") |
212 |
+ |
213 |
+ command.append(download_dir) |
214 |
|
215 |
exitcode = None |
216 |
try: |
217 |
-- |
218 |
2.13.6 |