Gentoo Archives: gentoo-portage-dev

From: Zac Medico <zmedico@g.o>
To: gentoo-portage-dev@l.g.o
Cc: Zac Medico <zmedico@g.o>
Subject: [gentoo-portage-dev] [PATCH v2] rsync: quarantine data prior to verification (bug 660410)
Date: Sun, 08 Jul 2018 01:59:36
Message-Id: 20180708015552.22154-1-zmedico@gentoo.org
In Reply to: [gentoo-portage-dev] [PATCH] rsync: quarantine data prior to verification (bug 660410) by Zac Medico
1 Sync into a quarantine subdirectory, using the rsync --link-dest option
2 to create hardlinks to identical files in the previous snapshot of the
3 repository. If hardlinks are not supported, then show a warning message
4 and sync directly to the normal repository location.
5
6 If verification succeeds, then the quarantine subdirectory is synced
7 to the normal repository location, and the quarantine subdirectory
8 is deleted. If verification fails, then the quarantine directory is
9 preserved for purposes of analysis.
10
11 Even if verification happens to be disabled, the quarantine directory
12 is still useful for making the repository update more atomic, so that
13 it is less likely that normal repository location will be observed in
14 a partially synced state.
15
16 The new behavior may conflict with configurations that restrict the
17 use of hardlinks, such as overlay filesystems. Therefore, users will
18 have to set "sync-allow-hardlinks = no" in repos.conf if they have
19 a configuration that prevents the use of hardlinks, but this should
20 not be very common.
21
22 Bug: https://bugs.gentoo.org/660410
23 ---
24 [PATCH v2] makes it possible to disable the new behavior by setting
25 "sync-allow-hardlinks = no" in repos.conf
26
27 cnf/repos.conf | 1 +
28 man/portage.5 | 8 +++
29 pym/portage/repository/config.py | 8 ++-
30 pym/portage/sync/modules/rsync/rsync.py | 89 ++++++++++++++++++++++++++++++---
31 4 files changed, 97 insertions(+), 9 deletions(-)
32
33 diff --git a/cnf/repos.conf b/cnf/repos.conf
34 index 352073cfd5..419f6d1182 100644
35 --- a/cnf/repos.conf
36 +++ b/cnf/repos.conf
37 @@ -6,6 +6,7 @@ location = /usr/portage
38 sync-type = rsync
39 sync-uri = rsync://rsync.gentoo.org/gentoo-portage
40 auto-sync = yes
41 +sync-allow-hardlinks = yes
42 sync-rsync-verify-jobs = 1
43 sync-rsync-verify-metamanifest = yes
44 sync-rsync-verify-max-age = 24
45 diff --git a/man/portage.5 b/man/portage.5
46 index 5adb07d821..acc80791be 100644
47 --- a/man/portage.5
48 +++ b/man/portage.5
49 @@ -973,6 +973,14 @@ files). Defaults to true.
50 .br
51 Valid values: true, false.
52 .TP
53 +.B sync\-allow\-hardlinks = yes|no
54 +Allow sync plugins to use hardlinks in order to ensure that a repository
55 +remains in a valid state if something goes wrong during the sync operation.
56 +For example, if signature verification fails during a sync operation,
57 +the previous state of the repository will be preserved. This option may
58 +conflict with configurations that restrict the use of hardlinks, such as
59 +overlay filesystems.
60 +.TP
61 .B sync\-cvs\-repo
62 Specifies CVS repository.
63 .TP
64 diff --git a/pym/portage/repository/config.py b/pym/portage/repository/config.py
65 index 1d897bb903..c7440369c2 100644
66 --- a/pym/portage/repository/config.py
67 +++ b/pym/portage/repository/config.py
68 @@ -86,6 +86,7 @@ class RepoConfig(object):
69 'sync_type', 'sync_umask', 'sync_uri', 'sync_user', 'thin_manifest',
70 'update_changelog', '_eapis_banned', '_eapis_deprecated',
71 '_masters_orig', 'module_specific_options', 'manifest_required_hashes',
72 + 'sync_allow_hardlinks',
73 'sync_openpgp_key_path',
74 'sync_openpgp_key_refresh_retry_count',
75 'sync_openpgp_key_refresh_retry_delay_max',
76 @@ -188,6 +189,9 @@ class RepoConfig(object):
77 self.strict_misc_digests = repo_opts.get(
78 'strict-misc-digests', 'true').lower() == 'true'
79
80 + self.sync_allow_hardlinks = repo_opts.get(
81 + 'sync-allow-hardlinks', 'true').lower() in ('true', 'yes')
82 +
83 self.sync_openpgp_key_path = repo_opts.get(
84 'sync-openpgp-key-path', None)
85
86 @@ -534,6 +538,7 @@ class RepoConfigLoader(object):
87 'clone_depth', 'eclass_overrides',
88 'force', 'masters', 'priority', 'strict_misc_digests',
89 'sync_depth', 'sync_hooks_only_on_change',
90 + 'sync_allow_hardlinks',
91 'sync_openpgp_key_path',
92 'sync_openpgp_key_refresh_retry_count',
93 'sync_openpgp_key_refresh_retry_delay_max',
94 @@ -962,7 +967,8 @@ class RepoConfigLoader(object):
95 def config_string(self):
96 bool_keys = ("strict_misc_digests",)
97 str_or_int_keys = ("auto_sync", "clone_depth", "format", "location",
98 - "main_repo", "priority", "sync_depth", "sync_openpgp_key_path",
99 + "main_repo", "priority", "sync_depth",
100 + "sync_allow_hardlinks", "sync_openpgp_key_path",
101 "sync_openpgp_key_refresh_retry_count",
102 "sync_openpgp_key_refresh_retry_delay_max",
103 "sync_openpgp_key_refresh_retry_delay_exp_base",
104 diff --git a/pym/portage/sync/modules/rsync/rsync.py b/pym/portage/sync/modules/rsync/rsync.py
105 index 382a1eaaef..59211d2bb8 100644
106 --- a/pym/portage/sync/modules/rsync/rsync.py
107 +++ b/pym/portage/sync/modules/rsync/rsync.py
108 @@ -11,6 +11,7 @@ import functools
109 import io
110 import re
111 import random
112 +import subprocess
113 import tempfile
114
115 import portage
116 @@ -61,6 +62,56 @@ class RsyncSync(NewBase):
117 def __init__(self):
118 NewBase.__init__(self, "rsync", RSYNC_PACKAGE_ATOM)
119
120 + def _select_download_dir(self):
121 + '''
122 + Select and return the download directory. It's desirable to be able
123 + to create shared hardlinks between the download directory to the
124 + normal repository, and this is facilitated by making the download
125 + directory be a subdirectory of the normal repository location
126 + (ensuring that no mountpoints are crossed). Shared hardlinks are
127 + created by using the rsync --link-dest option.
128 +
129 + Since the download is initially unverified, it is safest to save
130 + it in a quarantine directory. The quarantine directory is also
131 + useful for making the repository update more atomic, so that it
132 + less likely that normal repository location will be observed in
133 + a partially synced state.
134 +
135 + This method returns a quarantine directory if sync-allow-hardlinks
136 + is enabled in repos.conf, and otherwise it returne the normal
137 + repository location.
138 + '''
139 + if self.repo.sync_allow_hardlinks:
140 + return os.path.join(self.repo.location, '.tmp-unverified-download-quarantine')
141 + else:
142 + return self.repo.location
143 +
144 + def _commit_download(self, download_dir):
145 + '''
146 + Commit changes from download_dir if it does not refer to the
147 + normal repository location.
148 + '''
149 + exitcode = 0
150 + if self.repo.location != download_dir:
151 + rsynccommand = [self.bin_command] + self.rsync_opts + self.extra_rsync_opts
152 + rsynccommand.append('--exclude=/%s' % os.path.basename(download_dir))
153 + rsynccommand.append('%s/' % download_dir.rstrip('/'))
154 + rsynccommand.append('%s/' % self.repo.location)
155 + exitcode = subprocess.call(rsynccommand)
156 + if exitcode == 0:
157 + exitcode = self._remove_download(download_dir)
158 +
159 + return exitcode
160 +
161 + def _remove_download(self, download_dir):
162 + """
163 + Remove download_dir if it does not refer to the normal repository
164 + location.
165 + """
166 + exitcode = 0
167 + if self.repo.location != download_dir:
168 + exitcode = subprocess.call(['rm', '-rf', download_dir])
169 + return exitcode
170
171 def update(self):
172 '''Internal update function which performs the transfer'''
173 @@ -97,6 +148,9 @@ class RsyncSync(NewBase):
174 self.extra_rsync_opts.extend(portage.util.shlex_split(
175 self.repo.module_specific_options['sync-rsync-extra-opts']))
176
177 + download_dir = self._select_download_dir()
178 + exitcode = 0
179 +
180 # Process GLEP74 verification options.
181 # Default verification to 'no'; it's enabled for ::gentoo
182 # via default repos.conf though.
183 @@ -219,8 +273,10 @@ class RsyncSync(NewBase):
184 self.proto = "file"
185 dosyncuri = syncuri[7:]
186 unchanged, is_synced, exitcode, updatecache_flg = self._do_rsync(
187 - dosyncuri, timestamp, opts)
188 + dosyncuri, timestamp, opts, download_dir)
189 self._process_exitcode(exitcode, dosyncuri, out, 1)
190 + if exitcode == 0 and not unchanged:
191 + self._commit_download(download_dir)
192 return (exitcode, updatecache_flg)
193
194 retries=0
195 @@ -352,7 +408,7 @@ class RsyncSync(NewBase):
196 dosyncuri = dosyncuri[6:].replace('/', ':/', 1)
197
198 unchanged, is_synced, exitcode, updatecache_flg = self._do_rsync(
199 - dosyncuri, timestamp, opts)
200 + dosyncuri, timestamp, opts, download_dir)
201 if not unchanged:
202 local_state_unchanged = False
203 if is_synced:
204 @@ -369,6 +425,12 @@ class RsyncSync(NewBase):
205 break
206 self._process_exitcode(exitcode, dosyncuri, out, maxretries)
207
208 + if local_state_unchanged:
209 + # The quarantine download_dir is not intended to exist
210 + # in this case, so refer gemato to the normal repository
211 + # location.
212 + download_dir = self.repo.location
213 +
214 # if synced successfully, verify now
215 if exitcode == 0 and self.verify_metamanifest:
216 if gemato is None:
217 @@ -380,7 +442,7 @@ class RsyncSync(NewBase):
218 # we always verify the Manifest signature, in case
219 # we had to deal with key revocation case
220 m = gemato.recursiveloader.ManifestRecursiveLoader(
221 - os.path.join(self.repo.location, 'Manifest'),
222 + os.path.join(download_dir, 'Manifest'),
223 verify_openpgp=True,
224 openpgp_env=openpgp_env,
225 max_jobs=self.verify_jobs)
226 @@ -411,7 +473,7 @@ class RsyncSync(NewBase):
227 # if nothing has changed, skip the actual Manifest
228 # verification
229 if not local_state_unchanged:
230 - out.ebegin('Verifying %s' % (self.repo.location,))
231 + out.ebegin('Verifying %s' % (download_dir,))
232 m.assert_directory_verifies()
233 out.eend(0)
234 except GematoException as e:
235 @@ -420,12 +482,16 @@ class RsyncSync(NewBase):
236 level=logging.ERROR, noiselevel=-1)
237 exitcode = 1
238
239 + if exitcode == 0 and not local_state_unchanged:
240 + exitcode = self._commit_download(download_dir)
241 +
242 return (exitcode, updatecache_flg)
243 finally:
244 + if exitcode == 0:
245 + self._remove_download(download_dir)
246 if openpgp_env is not None:
247 openpgp_env.close()
248
249 -
250 def _process_exitcode(self, exitcode, syncuri, out, maxretries):
251 if (exitcode==0):
252 pass
253 @@ -561,7 +627,7 @@ class RsyncSync(NewBase):
254 return rsync_opts
255
256
257 - def _do_rsync(self, syncuri, timestamp, opts):
258 + def _do_rsync(self, syncuri, timestamp, opts, download_dir):
259 updatecache_flg = False
260 is_synced = False
261 if timestamp != 0 and "--quiet" not in opts:
262 @@ -686,6 +752,12 @@ class RsyncSync(NewBase):
263 elif (servertimestamp == 0) or (servertimestamp > timestamp):
264 # actual sync
265 command = rsynccommand[:]
266 +
267 + if self.repo.location != download_dir:
268 + # Use shared hardlinks for files that are identical
269 + # in the previous snapshot of the repository.
270 + command.append('--link-dest=%s' % self.repo.location)
271 +
272 submodule_paths = self._get_submodule_paths()
273 if submodule_paths:
274 # The only way to select multiple directories to
275 @@ -696,9 +768,10 @@ class RsyncSync(NewBase):
276 # /./ is special syntax supported with the
277 # rsync --relative option.
278 command.append(syncuri + "/./" + path)
279 - command.append(self.repo.location)
280 else:
281 - command.extend([syncuri + "/", self.repo.location])
282 + command.append(syncuri + "/")
283 +
284 + command.append(download_dir)
285
286 exitcode = None
287 try:
288 --
289 2.13.6

Replies