Gentoo Archives: gentoo-commits

From: Zac Medico <zmedico@g.o>
To: gentoo-commits@l.g.o
Subject: [gentoo-commits] proj/portage:master commit in: pym/portage/sync/modules/rsync/, cnf/, pym/portage/repository/, man/
Date: Tue, 10 Jul 2018 06:38:08
Message-Id: 1531199033.84822ef7a21494d3f044c2ffa7b112e4d29665ab.zmedico@gentoo
1 commit: 84822ef7a21494d3f044c2ffa7b112e4d29665ab
2 Author: Zac Medico <zmedico <AT> gentoo <DOT> org>
3 AuthorDate: Thu Jul 5 13:10:43 2018 +0000
4 Commit: Zac Medico <zmedico <AT> gentoo <DOT> org>
5 CommitDate: Tue Jul 10 05:03:53 2018 +0000
6 URL: https://gitweb.gentoo.org/proj/portage.git/commit/?id=84822ef7
7
8 rsync: quarantine data prior to verification (bug 660410)
9
10 Sync into a quarantine subdirectory, using the rsync --link-dest option
11 to create hardlinks to identical files in the previous snapshot of the
12 repository. If hardlinks are not supported, then show a warning message
13 and sync directly to the normal repository location.
14
15 If verification succeeds, then the quarantine subdirectory is synced
16 to the normal repository location, and the quarantine subdirectory
17 is deleted. If verification fails, then the quarantine directory is
18 preserved for purposes of analysis.
19
20 Even if verification happens to be disabled, the quarantine directory
21 is still useful for making the repository update more atomic, so that
22 it is less likely that normal repository location will be observed in
23 a partially synced state.
24
25 The new behavior may conflict with configurations that restrict the
26 use of hardlinks, such as overlay filesystems. Therefore, users will
27 have to set "sync-allow-hardlinks = no" in repos.conf if they have
28 a configuration that prevents the use of hardlinks, but this should
29 not be very common.
30
31 Bug: https://bugs.gentoo.org/660410
32
33 cnf/repos.conf | 1 +
34 man/portage.5 | 8 +++
35 pym/portage/repository/config.py | 7 ++-
36 pym/portage/sync/modules/rsync/rsync.py | 87 ++++++++++++++++++++++++++++++---
37 4 files changed, 94 insertions(+), 9 deletions(-)
38
39 diff --git a/cnf/repos.conf b/cnf/repos.conf
40 index 352073cfd..419f6d118 100644
41 --- a/cnf/repos.conf
42 +++ b/cnf/repos.conf
43 @@ -6,6 +6,7 @@ location = /usr/portage
44 sync-type = rsync
45 sync-uri = rsync://rsync.gentoo.org/gentoo-portage
46 auto-sync = yes
47 +sync-allow-hardlinks = yes
48 sync-rsync-verify-jobs = 1
49 sync-rsync-verify-metamanifest = yes
50 sync-rsync-verify-max-age = 24
51
52 diff --git a/man/portage.5 b/man/portage.5
53 index 5adb07d82..acc80791b 100644
54 --- a/man/portage.5
55 +++ b/man/portage.5
56 @@ -973,6 +973,14 @@ files). Defaults to true.
57 .br
58 Valid values: true, false.
59 .TP
60 +.B sync\-allow\-hardlinks = yes|no
61 +Allow sync plugins to use hardlinks in order to ensure that a repository
62 +remains in a valid state if something goes wrong during the sync operation.
63 +For example, if signature verification fails during a sync operation,
64 +the previous state of the repository will be preserved. This option may
65 +conflict with configurations that restrict the use of hardlinks, such as
66 +overlay filesystems.
67 +.TP
68 .B sync\-cvs\-repo
69 Specifies CVS repository.
70 .TP
71
72 diff --git a/pym/portage/repository/config.py b/pym/portage/repository/config.py
73 index 1d897bb90..ad7ae9d18 100644
74 --- a/pym/portage/repository/config.py
75 +++ b/pym/portage/repository/config.py
76 @@ -86,6 +86,7 @@ class RepoConfig(object):
77 'sync_type', 'sync_umask', 'sync_uri', 'sync_user', 'thin_manifest',
78 'update_changelog', '_eapis_banned', '_eapis_deprecated',
79 '_masters_orig', 'module_specific_options', 'manifest_required_hashes',
80 + 'sync_allow_hardlinks',
81 'sync_openpgp_key_path',
82 'sync_openpgp_key_refresh_retry_count',
83 'sync_openpgp_key_refresh_retry_delay_max',
84 @@ -188,6 +189,9 @@ class RepoConfig(object):
85 self.strict_misc_digests = repo_opts.get(
86 'strict-misc-digests', 'true').lower() == 'true'
87
88 + self.sync_allow_hardlinks = repo_opts.get(
89 + 'sync-allow-hardlinks', 'true').lower() in ('true', 'yes')
90 +
91 self.sync_openpgp_key_path = repo_opts.get(
92 'sync-openpgp-key-path', None)
93
94 @@ -534,6 +538,7 @@ class RepoConfigLoader(object):
95 'clone_depth', 'eclass_overrides',
96 'force', 'masters', 'priority', 'strict_misc_digests',
97 'sync_depth', 'sync_hooks_only_on_change',
98 + 'sync_allow_hardlinks',
99 'sync_openpgp_key_path',
100 'sync_openpgp_key_refresh_retry_count',
101 'sync_openpgp_key_refresh_retry_delay_max',
102 @@ -960,7 +965,7 @@ class RepoConfigLoader(object):
103 return repo_name in self.prepos
104
105 def config_string(self):
106 - bool_keys = ("strict_misc_digests",)
107 + bool_keys = ("strict_misc_digests", "sync_allow_hardlinks")
108 str_or_int_keys = ("auto_sync", "clone_depth", "format", "location",
109 "main_repo", "priority", "sync_depth", "sync_openpgp_key_path",
110 "sync_openpgp_key_refresh_retry_count",
111
112 diff --git a/pym/portage/sync/modules/rsync/rsync.py b/pym/portage/sync/modules/rsync/rsync.py
113 index a715e2818..fb1960a3c 100644
114 --- a/pym/portage/sync/modules/rsync/rsync.py
115 +++ b/pym/portage/sync/modules/rsync/rsync.py
116 @@ -10,6 +10,7 @@ import datetime
117 import io
118 import re
119 import random
120 +import subprocess
121 import tempfile
122
123 import portage
124 @@ -58,6 +59,54 @@ class RsyncSync(NewBase):
125 def __init__(self):
126 NewBase.__init__(self, "rsync", RSYNC_PACKAGE_ATOM)
127
128 + def _select_download_dir(self):
129 + '''
130 + Select and return the download directory. It's desirable to be able
131 + to create shared hardlinks between the download directory to the
132 + normal repository, and this is facilitated by making the download
133 + directory be a subdirectory of the normal repository location
134 + (ensuring that no mountpoints are crossed). Shared hardlinks are
135 + created by using the rsync --link-dest option.
136 +
137 + Since the download is initially unverified, it is safest to save
138 + it in a quarantine directory. The quarantine directory is also
139 + useful for making the repository update more atomic, so that it
140 + less likely that normal repository location will be observed in
141 + a partially synced state.
142 +
143 + This method returns a quarantine directory if sync-allow-hardlinks
144 + is enabled in repos.conf, and otherwise it returne the normal
145 + repository location.
146 + '''
147 + if self.repo.sync_allow_hardlinks:
148 + return os.path.join(self.repo.location, '.tmp-unverified-download-quarantine')
149 + else:
150 + return self.repo.location
151 +
152 + def _commit_download(self, download_dir):
153 + '''
154 + Commit changes from download_dir if it does not refer to the
155 + normal repository location.
156 + '''
157 + exitcode = 0
158 + if self.repo.location != download_dir:
159 + rsynccommand = [self.bin_command] + self.rsync_opts + self.extra_rsync_opts
160 + rsynccommand.append('--exclude=/%s' % os.path.basename(download_dir))
161 + rsynccommand.append('%s/' % download_dir.rstrip('/'))
162 + rsynccommand.append('%s/' % self.repo.location)
163 + exitcode = subprocess.call(rsynccommand)
164 +
165 + return exitcode
166 +
167 + def _remove_download(self, download_dir):
168 + """
169 + Remove download_dir if it does not refer to the normal repository
170 + location.
171 + """
172 + exitcode = 0
173 + if self.repo.location != download_dir:
174 + exitcode = subprocess.call(['rm', '-rf', download_dir])
175 + return exitcode
176
177 def update(self):
178 '''Internal update function which performs the transfer'''
179 @@ -94,6 +143,9 @@ class RsyncSync(NewBase):
180 self.extra_rsync_opts.extend(portage.util.shlex_split(
181 self.repo.module_specific_options['sync-rsync-extra-opts']))
182
183 + download_dir = self._select_download_dir()
184 + exitcode = 0
185 +
186 # Process GLEP74 verification options.
187 # Default verification to 'no'; it's enabled for ::gentoo
188 # via default repos.conf though.
189 @@ -188,8 +240,10 @@ class RsyncSync(NewBase):
190 self.proto = "file"
191 dosyncuri = syncuri[7:]
192 unchanged, is_synced, exitcode, updatecache_flg = self._do_rsync(
193 - dosyncuri, timestamp, opts)
194 + dosyncuri, timestamp, opts, download_dir)
195 self._process_exitcode(exitcode, dosyncuri, out, 1)
196 + if exitcode == 0 and not unchanged:
197 + self._commit_download(download_dir)
198 return (exitcode, updatecache_flg)
199
200 retries=0
201 @@ -321,7 +375,7 @@ class RsyncSync(NewBase):
202 dosyncuri = dosyncuri[6:].replace('/', ':/', 1)
203
204 unchanged, is_synced, exitcode, updatecache_flg = self._do_rsync(
205 - dosyncuri, timestamp, opts)
206 + dosyncuri, timestamp, opts, download_dir)
207 if not unchanged:
208 local_state_unchanged = False
209 if is_synced:
210 @@ -338,6 +392,12 @@ class RsyncSync(NewBase):
211 break
212 self._process_exitcode(exitcode, dosyncuri, out, maxretries)
213
214 + if local_state_unchanged:
215 + # The quarantine download_dir is not intended to exist
216 + # in this case, so refer gemato to the normal repository
217 + # location.
218 + download_dir = self.repo.location
219 +
220 # if synced successfully, verify now
221 if exitcode == 0 and self.verify_metamanifest:
222 if gemato is None:
223 @@ -349,7 +409,7 @@ class RsyncSync(NewBase):
224 # we always verify the Manifest signature, in case
225 # we had to deal with key revocation case
226 m = gemato.recursiveloader.ManifestRecursiveLoader(
227 - os.path.join(self.repo.location, 'Manifest'),
228 + os.path.join(download_dir, 'Manifest'),
229 verify_openpgp=True,
230 openpgp_env=openpgp_env,
231 max_jobs=self.verify_jobs)
232 @@ -380,7 +440,7 @@ class RsyncSync(NewBase):
233 # if nothing has changed, skip the actual Manifest
234 # verification
235 if not local_state_unchanged:
236 - out.ebegin('Verifying %s' % (self.repo.location,))
237 + out.ebegin('Verifying %s' % (download_dir,))
238 m.assert_directory_verifies()
239 out.eend(0)
240 except GematoException as e:
241 @@ -389,12 +449,16 @@ class RsyncSync(NewBase):
242 level=logging.ERROR, noiselevel=-1)
243 exitcode = 1
244
245 + if exitcode == 0 and not local_state_unchanged:
246 + exitcode = self._commit_download(download_dir)
247 +
248 return (exitcode, updatecache_flg)
249 finally:
250 + if exitcode == 0:
251 + self._remove_download(download_dir)
252 if openpgp_env is not None:
253 openpgp_env.close()
254
255 -
256 def _process_exitcode(self, exitcode, syncuri, out, maxretries):
257 if (exitcode==0):
258 pass
259 @@ -530,7 +594,7 @@ class RsyncSync(NewBase):
260 return rsync_opts
261
262
263 - def _do_rsync(self, syncuri, timestamp, opts):
264 + def _do_rsync(self, syncuri, timestamp, opts, download_dir):
265 updatecache_flg = False
266 is_synced = False
267 if timestamp != 0 and "--quiet" not in opts:
268 @@ -655,6 +719,12 @@ class RsyncSync(NewBase):
269 elif (servertimestamp == 0) or (servertimestamp > timestamp):
270 # actual sync
271 command = rsynccommand[:]
272 +
273 + if self.repo.location != download_dir:
274 + # Use shared hardlinks for files that are identical
275 + # in the previous snapshot of the repository.
276 + command.append('--link-dest=%s' % self.repo.location)
277 +
278 submodule_paths = self._get_submodule_paths()
279 if submodule_paths:
280 # The only way to select multiple directories to
281 @@ -665,9 +735,10 @@ class RsyncSync(NewBase):
282 # /./ is special syntax supported with the
283 # rsync --relative option.
284 command.append(syncuri + "/./" + path)
285 - command.append(self.repo.location)
286 else:
287 - command.extend([syncuri + "/", self.repo.location])
288 + command.append(syncuri + "/")
289 +
290 + command.append(download_dir)
291
292 exitcode = None
293 try: