Gentoo Archives: gentoo-portage-dev

From: Fabian Groffen <grobian@g.o>
To: gentoo-portage-dev@l.g.o
Subject: Re: [gentoo-portage-dev] [PATCH v2] rsync: quarantine data prior to verification (bug 660410)
Date: Sun, 08 Jul 2018 08:48:14
Message-Id: 20180708084801.GA38116@gentoo.org
In Reply to: [gentoo-portage-dev] [PATCH v2] rsync: quarantine data prior to verification (bug 660410) by Zac Medico
1 Would it be worth having a flag to keep long standing sync behaviour for
2 situations where either diskspace, or verification results in problems
3 (like bug #657320)?
4
5 Fabian
6
7 On 07-07-2018 18:55:52 -0700, Zac Medico wrote:
8 > Sync into a quarantine subdirectory, using the rsync --link-dest option
9 > to create hardlinks to identical files in the previous snapshot of the
10 > repository. If hardlinks are not supported, then show a warning message
11 > and sync directly to the normal repository location.
12 >
13 > If verification succeeds, then the quarantine subdirectory is synced
14 > to the normal repository location, and the quarantine subdirectory
15 > is deleted. If verification fails, then the quarantine directory is
16 > preserved for purposes of analysis.
17 >
18 > Even if verification happens to be disabled, the quarantine directory
19 > is still useful for making the repository update more atomic, so that
20 > it is less likely that normal repository location will be observed in
21 > a partially synced state.
22 >
23 > The new behavior may conflict with configurations that restrict the
24 > use of hardlinks, such as overlay filesystems. Therefore, users will
25 > have to set "sync-allow-hardlinks = no" in repos.conf if they have
26 > a configuration that prevents the use of hardlinks, but this should
27 > not be very common.
28 >
29 > Bug: https://bugs.gentoo.org/660410
30 > ---
31 > [PATCH v2] makes it possible to disable the new behavior by setting
32 > "sync-allow-hardlinks = no" in repos.conf
33 >
34 > cnf/repos.conf | 1 +
35 > man/portage.5 | 8 +++
36 > pym/portage/repository/config.py | 8 ++-
37 > pym/portage/sync/modules/rsync/rsync.py | 89 ++++++++++++++++++++++++++++++---
38 > 4 files changed, 97 insertions(+), 9 deletions(-)
39 >
40 > diff --git a/cnf/repos.conf b/cnf/repos.conf
41 > index 352073cfd5..419f6d1182 100644
42 > --- a/cnf/repos.conf
43 > +++ b/cnf/repos.conf
44 > @@ -6,6 +6,7 @@ location = /usr/portage
45 > sync-type = rsync
46 > sync-uri = rsync://rsync.gentoo.org/gentoo-portage
47 > auto-sync = yes
48 > +sync-allow-hardlinks = yes
49 > sync-rsync-verify-jobs = 1
50 > sync-rsync-verify-metamanifest = yes
51 > sync-rsync-verify-max-age = 24
52 > diff --git a/man/portage.5 b/man/portage.5
53 > index 5adb07d821..acc80791be 100644
54 > --- a/man/portage.5
55 > +++ b/man/portage.5
56 > @@ -973,6 +973,14 @@ files). Defaults to true.
57 > .br
58 > Valid values: true, false.
59 > .TP
60 > +.B sync\-allow\-hardlinks = yes|no
61 > +Allow sync plugins to use hardlinks in order to ensure that a repository
62 > +remains in a valid state if something goes wrong during the sync operation.
63 > +For example, if signature verification fails during a sync operation,
64 > +the previous state of the repository will be preserved. This option may
65 > +conflict with configurations that restrict the use of hardlinks, such as
66 > +overlay filesystems.
67 > +.TP
68 > .B sync\-cvs\-repo
69 > Specifies CVS repository.
70 > .TP
71 > diff --git a/pym/portage/repository/config.py b/pym/portage/repository/config.py
72 > index 1d897bb903..c7440369c2 100644
73 > --- a/pym/portage/repository/config.py
74 > +++ b/pym/portage/repository/config.py
75 > @@ -86,6 +86,7 @@ class RepoConfig(object):
76 > 'sync_type', 'sync_umask', 'sync_uri', 'sync_user', 'thin_manifest',
77 > 'update_changelog', '_eapis_banned', '_eapis_deprecated',
78 > '_masters_orig', 'module_specific_options', 'manifest_required_hashes',
79 > + 'sync_allow_hardlinks',
80 > 'sync_openpgp_key_path',
81 > 'sync_openpgp_key_refresh_retry_count',
82 > 'sync_openpgp_key_refresh_retry_delay_max',
83 > @@ -188,6 +189,9 @@ class RepoConfig(object):
84 > self.strict_misc_digests = repo_opts.get(
85 > 'strict-misc-digests', 'true').lower() == 'true'
86 >
87 > + self.sync_allow_hardlinks = repo_opts.get(
88 > + 'sync-allow-hardlinks', 'true').lower() in ('true', 'yes')
89 > +
90 > self.sync_openpgp_key_path = repo_opts.get(
91 > 'sync-openpgp-key-path', None)
92 >
93 > @@ -534,6 +538,7 @@ class RepoConfigLoader(object):
94 > 'clone_depth', 'eclass_overrides',
95 > 'force', 'masters', 'priority', 'strict_misc_digests',
96 > 'sync_depth', 'sync_hooks_only_on_change',
97 > + 'sync_allow_hardlinks',
98 > 'sync_openpgp_key_path',
99 > 'sync_openpgp_key_refresh_retry_count',
100 > 'sync_openpgp_key_refresh_retry_delay_max',
101 > @@ -962,7 +967,8 @@ class RepoConfigLoader(object):
102 > def config_string(self):
103 > bool_keys = ("strict_misc_digests",)
104 > str_or_int_keys = ("auto_sync", "clone_depth", "format", "location",
105 > - "main_repo", "priority", "sync_depth", "sync_openpgp_key_path",
106 > + "main_repo", "priority", "sync_depth",
107 > + "sync_allow_hardlinks", "sync_openpgp_key_path",
108 > "sync_openpgp_key_refresh_retry_count",
109 > "sync_openpgp_key_refresh_retry_delay_max",
110 > "sync_openpgp_key_refresh_retry_delay_exp_base",
111 > diff --git a/pym/portage/sync/modules/rsync/rsync.py b/pym/portage/sync/modules/rsync/rsync.py
112 > index 382a1eaaef..59211d2bb8 100644
113 > --- a/pym/portage/sync/modules/rsync/rsync.py
114 > +++ b/pym/portage/sync/modules/rsync/rsync.py
115 > @@ -11,6 +11,7 @@ import functools
116 > import io
117 > import re
118 > import random
119 > +import subprocess
120 > import tempfile
121 >
122 > import portage
123 > @@ -61,6 +62,56 @@ class RsyncSync(NewBase):
124 > def __init__(self):
125 > NewBase.__init__(self, "rsync", RSYNC_PACKAGE_ATOM)
126 >
127 > + def _select_download_dir(self):
128 > + '''
129 > + Select and return the download directory. It's desirable to be able
130 > + to create shared hardlinks between the download directory to the
131 > + normal repository, and this is facilitated by making the download
132 > + directory be a subdirectory of the normal repository location
133 > + (ensuring that no mountpoints are crossed). Shared hardlinks are
134 > + created by using the rsync --link-dest option.
135 > +
136 > + Since the download is initially unverified, it is safest to save
137 > + it in a quarantine directory. The quarantine directory is also
138 > + useful for making the repository update more atomic, so that it
139 > + less likely that normal repository location will be observed in
140 > + a partially synced state.
141 > +
142 > + This method returns a quarantine directory if sync-allow-hardlinks
143 > + is enabled in repos.conf, and otherwise it returne the normal
144 > + repository location.
145 > + '''
146 > + if self.repo.sync_allow_hardlinks:
147 > + return os.path.join(self.repo.location, '.tmp-unverified-download-quarantine')
148 > + else:
149 > + return self.repo.location
150 > +
151 > + def _commit_download(self, download_dir):
152 > + '''
153 > + Commit changes from download_dir if it does not refer to the
154 > + normal repository location.
155 > + '''
156 > + exitcode = 0
157 > + if self.repo.location != download_dir:
158 > + rsynccommand = [self.bin_command] + self.rsync_opts + self.extra_rsync_opts
159 > + rsynccommand.append('--exclude=/%s' % os.path.basename(download_dir))
160 > + rsynccommand.append('%s/' % download_dir.rstrip('/'))
161 > + rsynccommand.append('%s/' % self.repo.location)
162 > + exitcode = subprocess.call(rsynccommand)
163 > + if exitcode == 0:
164 > + exitcode = self._remove_download(download_dir)
165 > +
166 > + return exitcode
167 > +
168 > + def _remove_download(self, download_dir):
169 > + """
170 > + Remove download_dir if it does not refer to the normal repository
171 > + location.
172 > + """
173 > + exitcode = 0
174 > + if self.repo.location != download_dir:
175 > + exitcode = subprocess.call(['rm', '-rf', download_dir])
176 > + return exitcode
177 >
178 > def update(self):
179 > '''Internal update function which performs the transfer'''
180 > @@ -97,6 +148,9 @@ class RsyncSync(NewBase):
181 > self.extra_rsync_opts.extend(portage.util.shlex_split(
182 > self.repo.module_specific_options['sync-rsync-extra-opts']))
183 >
184 > + download_dir = self._select_download_dir()
185 > + exitcode = 0
186 > +
187 > # Process GLEP74 verification options.
188 > # Default verification to 'no'; it's enabled for ::gentoo
189 > # via default repos.conf though.
190 > @@ -219,8 +273,10 @@ class RsyncSync(NewBase):
191 > self.proto = "file"
192 > dosyncuri = syncuri[7:]
193 > unchanged, is_synced, exitcode, updatecache_flg = self._do_rsync(
194 > - dosyncuri, timestamp, opts)
195 > + dosyncuri, timestamp, opts, download_dir)
196 > self._process_exitcode(exitcode, dosyncuri, out, 1)
197 > + if exitcode == 0 and not unchanged:
198 > + self._commit_download(download_dir)
199 > return (exitcode, updatecache_flg)
200 >
201 > retries=0
202 > @@ -352,7 +408,7 @@ class RsyncSync(NewBase):
203 > dosyncuri = dosyncuri[6:].replace('/', ':/', 1)
204 >
205 > unchanged, is_synced, exitcode, updatecache_flg = self._do_rsync(
206 > - dosyncuri, timestamp, opts)
207 > + dosyncuri, timestamp, opts, download_dir)
208 > if not unchanged:
209 > local_state_unchanged = False
210 > if is_synced:
211 > @@ -369,6 +425,12 @@ class RsyncSync(NewBase):
212 > break
213 > self._process_exitcode(exitcode, dosyncuri, out, maxretries)
214 >
215 > + if local_state_unchanged:
216 > + # The quarantine download_dir is not intended to exist
217 > + # in this case, so refer gemato to the normal repository
218 > + # location.
219 > + download_dir = self.repo.location
220 > +
221 > # if synced successfully, verify now
222 > if exitcode == 0 and self.verify_metamanifest:
223 > if gemato is None:
224 > @@ -380,7 +442,7 @@ class RsyncSync(NewBase):
225 > # we always verify the Manifest signature, in case
226 > # we had to deal with key revocation case
227 > m = gemato.recursiveloader.ManifestRecursiveLoader(
228 > - os.path.join(self.repo.location, 'Manifest'),
229 > + os.path.join(download_dir, 'Manifest'),
230 > verify_openpgp=True,
231 > openpgp_env=openpgp_env,
232 > max_jobs=self.verify_jobs)
233 > @@ -411,7 +473,7 @@ class RsyncSync(NewBase):
234 > # if nothing has changed, skip the actual Manifest
235 > # verification
236 > if not local_state_unchanged:
237 > - out.ebegin('Verifying %s' % (self.repo.location,))
238 > + out.ebegin('Verifying %s' % (download_dir,))
239 > m.assert_directory_verifies()
240 > out.eend(0)
241 > except GematoException as e:
242 > @@ -420,12 +482,16 @@ class RsyncSync(NewBase):
243 > level=logging.ERROR, noiselevel=-1)
244 > exitcode = 1
245 >
246 > + if exitcode == 0 and not local_state_unchanged:
247 > + exitcode = self._commit_download(download_dir)
248 > +
249 > return (exitcode, updatecache_flg)
250 > finally:
251 > + if exitcode == 0:
252 > + self._remove_download(download_dir)
253 > if openpgp_env is not None:
254 > openpgp_env.close()
255 >
256 > -
257 > def _process_exitcode(self, exitcode, syncuri, out, maxretries):
258 > if (exitcode==0):
259 > pass
260 > @@ -561,7 +627,7 @@ class RsyncSync(NewBase):
261 > return rsync_opts
262 >
263 >
264 > - def _do_rsync(self, syncuri, timestamp, opts):
265 > + def _do_rsync(self, syncuri, timestamp, opts, download_dir):
266 > updatecache_flg = False
267 > is_synced = False
268 > if timestamp != 0 and "--quiet" not in opts:
269 > @@ -686,6 +752,12 @@ class RsyncSync(NewBase):
270 > elif (servertimestamp == 0) or (servertimestamp > timestamp):
271 > # actual sync
272 > command = rsynccommand[:]
273 > +
274 > + if self.repo.location != download_dir:
275 > + # Use shared hardlinks for files that are identical
276 > + # in the previous snapshot of the repository.
277 > + command.append('--link-dest=%s' % self.repo.location)
278 > +
279 > submodule_paths = self._get_submodule_paths()
280 > if submodule_paths:
281 > # The only way to select multiple directories to
282 > @@ -696,9 +768,10 @@ class RsyncSync(NewBase):
283 > # /./ is special syntax supported with the
284 > # rsync --relative option.
285 > command.append(syncuri + "/./" + path)
286 > - command.append(self.repo.location)
287 > else:
288 > - command.extend([syncuri + "/", self.repo.location])
289 > + command.append(syncuri + "/")
290 > +
291 > + command.append(download_dir)
292 >
293 > exitcode = None
294 > try:
295 > --
296 > 2.13.6
297 >
298 >
299
300 --
301 Fabian Groffen
302 Gentoo on a different level

Attachments

File name MIME type
signature.asc application/pgp-signature

Replies