Gentoo Archives: gentoo-portage-dev

From: Zac Medico <zmedico@g.o>
To: gentoo-portage-dev@l.g.o
Cc: Zac Medico <zmedico@g.o>
Subject: [gentoo-portage-dev] [PATCH] rsync: quarantine data prior to verification (bug 660410)
Date: Thu, 05 Jul 2018 18:04:13
Message-Id: 20180705180342.18250-1-zmedico@gentoo.org
1 Sync into a quarantine subdirectory, using the rsync --link-dest option
2 to create hardlinks to identical files in the previous snapshot of the
3 repository. If hardlinks are not supported, then show a warning message
4 and sync directly to the normal repository location.
5
6 If verification succeeds, then the quarantine subdirectory is synced
7 to the normal repository location, and the quarantine subdirectory
8 is deleted. If verification fails, then the quarantine directory is
9 preserved for purposes of analysis.
10
11 Even if verification happens to be disabled, the quarantine directory
12 is still useful for making the repository update more atomic, so that
13 it is less likely that normal repository location will be observed in
14 a partially synced state.
15
16 Bug: https://bugs.gentoo.org/660410
17 ---
18 pym/portage/sync/modules/rsync/rsync.py | 101 +++++++++++++++++++++++++++++---
19 1 file changed, 93 insertions(+), 8 deletions(-)
20
21 diff --git a/pym/portage/sync/modules/rsync/rsync.py b/pym/portage/sync/modules/rsync/rsync.py
22 index 382a1eaaef..9334c8a791 100644
23 --- a/pym/portage/sync/modules/rsync/rsync.py
24 +++ b/pym/portage/sync/modules/rsync/rsync.py
25 @@ -11,6 +11,7 @@ import functools
26 import io
27 import re
28 import random
29 +import subprocess
30 import tempfile
31
32 import portage
33 @@ -61,6 +62,68 @@ class RsyncSync(NewBase):
34 def __init__(self):
35 NewBase.__init__(self, "rsync", RSYNC_PACKAGE_ATOM)
36
37 + def _select_download_dir(self):
38 + '''
39 + Select and return the download directory. It's desirable to be able
40 + to create shared hardlinks between the download directory to the
41 + normal repository, and this is facilitated by making the download
42 + directory be a subdirectory of the normal repository location
43 + (ensuring that no mountpoints are crossed). Shared hardlinks are
44 + created by using the rsync --link-dest option.
45 +
46 + Since the download is initially unverified, it is safest to save
47 + it in a quarantine directory. The quarantine directory is also
48 + useful for making the repository update more atomic, so that it
49 + less likely that normal repository location will be observed in
50 + a partially synced state.
51 +
52 + This method tests if it is possible to create hardlinks in the
53 + repository directory, and if that fails then it issues a warning
54 + message and returns the normal repository location.
55 + '''
56 + with tempfile.NamedTemporaryFile(dir=self.repo.location,
57 + prefix='.tmp', suffix='-portage-hardlink-test') as f:
58 + hardlink = f.name + '-hardlink'
59 + try:
60 + os.link(f.name, hardlink)
61 + except OSError as e:
62 + writemsg_level("!!! Syncing directly to '%s' because hardlink creation failed: %s\n" %
63 + (self.repo.location, e), level=logging.WARNING, noiselevel=-1)
64 + return self.repo.location
65 + finally:
66 + try:
67 + os.unlink(hardlink)
68 + except OSError:
69 + pass
70 +
71 + return os.path.join(self.repo.location, '.tmp-unverified-download-quarantine')
72 +
73 + def _commit_download(self, download_dir):
74 + '''
75 + Commit changes from download_dir if it does not refer to the
76 + normal repository location.
77 + '''
78 + exitcode = 0
79 + if self.repo.location != download_dir:
80 + rsynccommand = [self.bin_command] + self.rsync_opts + self.extra_rsync_opts
81 + rsynccommand.append('--exclude=/%s' % os.path.basename(download_dir))
82 + rsynccommand.append('%s/' % download_dir.rstrip('/'))
83 + rsynccommand.append('%s/' % self.repo.location)
84 + exitcode = subprocess.call(rsynccommand)
85 + if exitcode == 0:
86 + exitcode = self._remove_download(download_dir)
87 +
88 + return exitcode
89 +
90 + def _remove_download(self, download_dir):
91 + """
92 + Remove download_dir if it does not refer to the normal repository
93 + location.
94 + """
95 + exitcode = 0
96 + if self.repo.location != download_dir:
97 + exitcode = subprocess.call(['rm', '-rf', download_dir])
98 + return exitcode
99
100 def update(self):
101 '''Internal update function which performs the transfer'''
102 @@ -97,6 +160,9 @@ class RsyncSync(NewBase):
103 self.extra_rsync_opts.extend(portage.util.shlex_split(
104 self.repo.module_specific_options['sync-rsync-extra-opts']))
105
106 + download_dir = self._select_download_dir()
107 + exitcode = 0
108 +
109 # Process GLEP74 verification options.
110 # Default verification to 'no'; it's enabled for ::gentoo
111 # via default repos.conf though.
112 @@ -219,8 +285,10 @@ class RsyncSync(NewBase):
113 self.proto = "file"
114 dosyncuri = syncuri[7:]
115 unchanged, is_synced, exitcode, updatecache_flg = self._do_rsync(
116 - dosyncuri, timestamp, opts)
117 + dosyncuri, timestamp, opts, download_dir)
118 self._process_exitcode(exitcode, dosyncuri, out, 1)
119 + if exitcode == 0 and not unchanged:
120 + self._commit_download(download_dir)
121 return (exitcode, updatecache_flg)
122
123 retries=0
124 @@ -352,7 +420,7 @@ class RsyncSync(NewBase):
125 dosyncuri = dosyncuri[6:].replace('/', ':/', 1)
126
127 unchanged, is_synced, exitcode, updatecache_flg = self._do_rsync(
128 - dosyncuri, timestamp, opts)
129 + dosyncuri, timestamp, opts, download_dir)
130 if not unchanged:
131 local_state_unchanged = False
132 if is_synced:
133 @@ -369,6 +437,12 @@ class RsyncSync(NewBase):
134 break
135 self._process_exitcode(exitcode, dosyncuri, out, maxretries)
136
137 + if local_state_unchanged:
138 + # The quarantine download_dir is not intended to exist
139 + # in this case, so refer gemato to the normal repository
140 + # location.
141 + download_dir = self.repo.location
142 +
143 # if synced successfully, verify now
144 if exitcode == 0 and self.verify_metamanifest:
145 if gemato is None:
146 @@ -380,7 +454,7 @@ class RsyncSync(NewBase):
147 # we always verify the Manifest signature, in case
148 # we had to deal with key revocation case
149 m = gemato.recursiveloader.ManifestRecursiveLoader(
150 - os.path.join(self.repo.location, 'Manifest'),
151 + os.path.join(download_dir, 'Manifest'),
152 verify_openpgp=True,
153 openpgp_env=openpgp_env,
154 max_jobs=self.verify_jobs)
155 @@ -411,7 +485,7 @@ class RsyncSync(NewBase):
156 # if nothing has changed, skip the actual Manifest
157 # verification
158 if not local_state_unchanged:
159 - out.ebegin('Verifying %s' % (self.repo.location,))
160 + out.ebegin('Verifying %s' % (download_dir,))
161 m.assert_directory_verifies()
162 out.eend(0)
163 except GematoException as e:
164 @@ -420,12 +494,16 @@ class RsyncSync(NewBase):
165 level=logging.ERROR, noiselevel=-1)
166 exitcode = 1
167
168 + if exitcode == 0 and not local_state_unchanged:
169 + exitcode = self._commit_download(download_dir)
170 +
171 return (exitcode, updatecache_flg)
172 finally:
173 + if exitcode == 0:
174 + self._remove_download(download_dir)
175 if openpgp_env is not None:
176 openpgp_env.close()
177
178 -
179 def _process_exitcode(self, exitcode, syncuri, out, maxretries):
180 if (exitcode==0):
181 pass
182 @@ -561,7 +639,7 @@ class RsyncSync(NewBase):
183 return rsync_opts
184
185
186 - def _do_rsync(self, syncuri, timestamp, opts):
187 + def _do_rsync(self, syncuri, timestamp, opts, download_dir):
188 updatecache_flg = False
189 is_synced = False
190 if timestamp != 0 and "--quiet" not in opts:
191 @@ -686,6 +764,12 @@ class RsyncSync(NewBase):
192 elif (servertimestamp == 0) or (servertimestamp > timestamp):
193 # actual sync
194 command = rsynccommand[:]
195 +
196 + if self.repo.location != download_dir:
197 + # Use shared hardlinks for files that are identical
198 + # in the previous snapshot of the repository.
199 + command.append('--link-dest=%s' % self.repo.location)
200 +
201 submodule_paths = self._get_submodule_paths()
202 if submodule_paths:
203 # The only way to select multiple directories to
204 @@ -696,9 +780,10 @@ class RsyncSync(NewBase):
205 # /./ is special syntax supported with the
206 # rsync --relative option.
207 command.append(syncuri + "/./" + path)
208 - command.append(self.repo.location)
209 else:
210 - command.extend([syncuri + "/", self.repo.location])
211 + command.append(syncuri + "/")
212 +
213 + command.append(download_dir)
214
215 exitcode = None
216 try:
217 --
218 2.13.6

Replies