1 |
Would it be worth having a flag to keep long standing sync behaviour for |
2 |
situations where either diskspace, or verification results in problems |
3 |
(like bug #657320)? |
4 |
|
5 |
Fabian |
6 |
|
7 |
On 07-07-2018 18:55:52 -0700, Zac Medico wrote: |
8 |
> Sync into a quarantine subdirectory, using the rsync --link-dest option |
9 |
> to create hardlinks to identical files in the previous snapshot of the |
10 |
> repository. If hardlinks are not supported, then show a warning message |
11 |
> and sync directly to the normal repository location. |
12 |
> |
13 |
> If verification succeeds, then the quarantine subdirectory is synced |
14 |
> to the normal repository location, and the quarantine subdirectory |
15 |
> is deleted. If verification fails, then the quarantine directory is |
16 |
> preserved for purposes of analysis. |
17 |
> |
18 |
> Even if verification happens to be disabled, the quarantine directory |
19 |
> is still useful for making the repository update more atomic, so that |
20 |
> it is less likely that normal repository location will be observed in |
21 |
> a partially synced state. |
22 |
> |
23 |
> The new behavior may conflict with configurations that restrict the |
24 |
> use of hardlinks, such as overlay filesystems. Therefore, users will |
25 |
> have to set "sync-allow-hardlinks = no" in repos.conf if they have |
26 |
> a configuration that prevents the use of hardlinks, but this should |
27 |
> not be very common. |
28 |
> |
29 |
> Bug: https://bugs.gentoo.org/660410 |
30 |
> --- |
31 |
> [PATCH v2] makes it possible to disable the new behavior by setting |
32 |
> "sync-allow-hardlinks = no" in repos.conf |
33 |
> |
34 |
> cnf/repos.conf | 1 + |
35 |
> man/portage.5 | 8 +++ |
36 |
> pym/portage/repository/config.py | 8 ++- |
37 |
> pym/portage/sync/modules/rsync/rsync.py | 89 ++++++++++++++++++++++++++++++--- |
38 |
> 4 files changed, 97 insertions(+), 9 deletions(-) |
39 |
> |
40 |
> diff --git a/cnf/repos.conf b/cnf/repos.conf |
41 |
> index 352073cfd5..419f6d1182 100644 |
42 |
> --- a/cnf/repos.conf |
43 |
> +++ b/cnf/repos.conf |
44 |
> @@ -6,6 +6,7 @@ location = /usr/portage |
45 |
> sync-type = rsync |
46 |
> sync-uri = rsync://rsync.gentoo.org/gentoo-portage |
47 |
> auto-sync = yes |
48 |
> +sync-allow-hardlinks = yes |
49 |
> sync-rsync-verify-jobs = 1 |
50 |
> sync-rsync-verify-metamanifest = yes |
51 |
> sync-rsync-verify-max-age = 24 |
52 |
> diff --git a/man/portage.5 b/man/portage.5 |
53 |
> index 5adb07d821..acc80791be 100644 |
54 |
> --- a/man/portage.5 |
55 |
> +++ b/man/portage.5 |
56 |
> @@ -973,6 +973,14 @@ files). Defaults to true. |
57 |
> .br |
58 |
> Valid values: true, false. |
59 |
> .TP |
60 |
> +.B sync\-allow\-hardlinks = yes|no |
61 |
> +Allow sync plugins to use hardlinks in order to ensure that a repository |
62 |
> +remains in a valid state if something goes wrong during the sync operation. |
63 |
> +For example, if signature verification fails during a sync operation, |
64 |
> +the previous state of the repository will be preserved. This option may |
65 |
> +conflict with configurations that restrict the use of hardlinks, such as |
66 |
> +overlay filesystems. |
67 |
> +.TP |
68 |
> .B sync\-cvs\-repo |
69 |
> Specifies CVS repository. |
70 |
> .TP |
71 |
> diff --git a/pym/portage/repository/config.py b/pym/portage/repository/config.py |
72 |
> index 1d897bb903..c7440369c2 100644 |
73 |
> --- a/pym/portage/repository/config.py |
74 |
> +++ b/pym/portage/repository/config.py |
75 |
> @@ -86,6 +86,7 @@ class RepoConfig(object): |
76 |
> 'sync_type', 'sync_umask', 'sync_uri', 'sync_user', 'thin_manifest', |
77 |
> 'update_changelog', '_eapis_banned', '_eapis_deprecated', |
78 |
> '_masters_orig', 'module_specific_options', 'manifest_required_hashes', |
79 |
> + 'sync_allow_hardlinks', |
80 |
> 'sync_openpgp_key_path', |
81 |
> 'sync_openpgp_key_refresh_retry_count', |
82 |
> 'sync_openpgp_key_refresh_retry_delay_max', |
83 |
> @@ -188,6 +189,9 @@ class RepoConfig(object): |
84 |
> self.strict_misc_digests = repo_opts.get( |
85 |
> 'strict-misc-digests', 'true').lower() == 'true' |
86 |
> |
87 |
> + self.sync_allow_hardlinks = repo_opts.get( |
88 |
> + 'sync-allow-hardlinks', 'true').lower() in ('true', 'yes') |
89 |
> + |
90 |
> self.sync_openpgp_key_path = repo_opts.get( |
91 |
> 'sync-openpgp-key-path', None) |
92 |
> |
93 |
> @@ -534,6 +538,7 @@ class RepoConfigLoader(object): |
94 |
> 'clone_depth', 'eclass_overrides', |
95 |
> 'force', 'masters', 'priority', 'strict_misc_digests', |
96 |
> 'sync_depth', 'sync_hooks_only_on_change', |
97 |
> + 'sync_allow_hardlinks', |
98 |
> 'sync_openpgp_key_path', |
99 |
> 'sync_openpgp_key_refresh_retry_count', |
100 |
> 'sync_openpgp_key_refresh_retry_delay_max', |
101 |
> @@ -962,7 +967,8 @@ class RepoConfigLoader(object): |
102 |
> def config_string(self): |
103 |
> bool_keys = ("strict_misc_digests",) |
104 |
> str_or_int_keys = ("auto_sync", "clone_depth", "format", "location", |
105 |
> - "main_repo", "priority", "sync_depth", "sync_openpgp_key_path", |
106 |
> + "main_repo", "priority", "sync_depth", |
107 |
> + "sync_allow_hardlinks", "sync_openpgp_key_path", |
108 |
> "sync_openpgp_key_refresh_retry_count", |
109 |
> "sync_openpgp_key_refresh_retry_delay_max", |
110 |
> "sync_openpgp_key_refresh_retry_delay_exp_base", |
111 |
> diff --git a/pym/portage/sync/modules/rsync/rsync.py b/pym/portage/sync/modules/rsync/rsync.py |
112 |
> index 382a1eaaef..59211d2bb8 100644 |
113 |
> --- a/pym/portage/sync/modules/rsync/rsync.py |
114 |
> +++ b/pym/portage/sync/modules/rsync/rsync.py |
115 |
> @@ -11,6 +11,7 @@ import functools |
116 |
> import io |
117 |
> import re |
118 |
> import random |
119 |
> +import subprocess |
120 |
> import tempfile |
121 |
> |
122 |
> import portage |
123 |
> @@ -61,6 +62,56 @@ class RsyncSync(NewBase): |
124 |
> def __init__(self): |
125 |
> NewBase.__init__(self, "rsync", RSYNC_PACKAGE_ATOM) |
126 |
> |
127 |
> + def _select_download_dir(self): |
128 |
> + ''' |
129 |
> + Select and return the download directory. It's desirable to be able |
130 |
> + to create shared hardlinks between the download directory to the |
131 |
> + normal repository, and this is facilitated by making the download |
132 |
> + directory be a subdirectory of the normal repository location |
133 |
> + (ensuring that no mountpoints are crossed). Shared hardlinks are |
134 |
> + created by using the rsync --link-dest option. |
135 |
> + |
136 |
> + Since the download is initially unverified, it is safest to save |
137 |
> + it in a quarantine directory. The quarantine directory is also |
138 |
> + useful for making the repository update more atomic, so that it |
139 |
> + less likely that normal repository location will be observed in |
140 |
> + a partially synced state. |
141 |
> + |
142 |
> + This method returns a quarantine directory if sync-allow-hardlinks |
143 |
> + is enabled in repos.conf, and otherwise it returne the normal |
144 |
> + repository location. |
145 |
> + ''' |
146 |
> + if self.repo.sync_allow_hardlinks: |
147 |
> + return os.path.join(self.repo.location, '.tmp-unverified-download-quarantine') |
148 |
> + else: |
149 |
> + return self.repo.location |
150 |
> + |
151 |
> + def _commit_download(self, download_dir): |
152 |
> + ''' |
153 |
> + Commit changes from download_dir if it does not refer to the |
154 |
> + normal repository location. |
155 |
> + ''' |
156 |
> + exitcode = 0 |
157 |
> + if self.repo.location != download_dir: |
158 |
> + rsynccommand = [self.bin_command] + self.rsync_opts + self.extra_rsync_opts |
159 |
> + rsynccommand.append('--exclude=/%s' % os.path.basename(download_dir)) |
160 |
> + rsynccommand.append('%s/' % download_dir.rstrip('/')) |
161 |
> + rsynccommand.append('%s/' % self.repo.location) |
162 |
> + exitcode = subprocess.call(rsynccommand) |
163 |
> + if exitcode == 0: |
164 |
> + exitcode = self._remove_download(download_dir) |
165 |
> + |
166 |
> + return exitcode |
167 |
> + |
168 |
> + def _remove_download(self, download_dir): |
169 |
> + """ |
170 |
> + Remove download_dir if it does not refer to the normal repository |
171 |
> + location. |
172 |
> + """ |
173 |
> + exitcode = 0 |
174 |
> + if self.repo.location != download_dir: |
175 |
> + exitcode = subprocess.call(['rm', '-rf', download_dir]) |
176 |
> + return exitcode |
177 |
> |
178 |
> def update(self): |
179 |
> '''Internal update function which performs the transfer''' |
180 |
> @@ -97,6 +148,9 @@ class RsyncSync(NewBase): |
181 |
> self.extra_rsync_opts.extend(portage.util.shlex_split( |
182 |
> self.repo.module_specific_options['sync-rsync-extra-opts'])) |
183 |
> |
184 |
> + download_dir = self._select_download_dir() |
185 |
> + exitcode = 0 |
186 |
> + |
187 |
> # Process GLEP74 verification options. |
188 |
> # Default verification to 'no'; it's enabled for ::gentoo |
189 |
> # via default repos.conf though. |
190 |
> @@ -219,8 +273,10 @@ class RsyncSync(NewBase): |
191 |
> self.proto = "file" |
192 |
> dosyncuri = syncuri[7:] |
193 |
> unchanged, is_synced, exitcode, updatecache_flg = self._do_rsync( |
194 |
> - dosyncuri, timestamp, opts) |
195 |
> + dosyncuri, timestamp, opts, download_dir) |
196 |
> self._process_exitcode(exitcode, dosyncuri, out, 1) |
197 |
> + if exitcode == 0 and not unchanged: |
198 |
> + self._commit_download(download_dir) |
199 |
> return (exitcode, updatecache_flg) |
200 |
> |
201 |
> retries=0 |
202 |
> @@ -352,7 +408,7 @@ class RsyncSync(NewBase): |
203 |
> dosyncuri = dosyncuri[6:].replace('/', ':/', 1) |
204 |
> |
205 |
> unchanged, is_synced, exitcode, updatecache_flg = self._do_rsync( |
206 |
> - dosyncuri, timestamp, opts) |
207 |
> + dosyncuri, timestamp, opts, download_dir) |
208 |
> if not unchanged: |
209 |
> local_state_unchanged = False |
210 |
> if is_synced: |
211 |
> @@ -369,6 +425,12 @@ class RsyncSync(NewBase): |
212 |
> break |
213 |
> self._process_exitcode(exitcode, dosyncuri, out, maxretries) |
214 |
> |
215 |
> + if local_state_unchanged: |
216 |
> + # The quarantine download_dir is not intended to exist |
217 |
> + # in this case, so refer gemato to the normal repository |
218 |
> + # location. |
219 |
> + download_dir = self.repo.location |
220 |
> + |
221 |
> # if synced successfully, verify now |
222 |
> if exitcode == 0 and self.verify_metamanifest: |
223 |
> if gemato is None: |
224 |
> @@ -380,7 +442,7 @@ class RsyncSync(NewBase): |
225 |
> # we always verify the Manifest signature, in case |
226 |
> # we had to deal with key revocation case |
227 |
> m = gemato.recursiveloader.ManifestRecursiveLoader( |
228 |
> - os.path.join(self.repo.location, 'Manifest'), |
229 |
> + os.path.join(download_dir, 'Manifest'), |
230 |
> verify_openpgp=True, |
231 |
> openpgp_env=openpgp_env, |
232 |
> max_jobs=self.verify_jobs) |
233 |
> @@ -411,7 +473,7 @@ class RsyncSync(NewBase): |
234 |
> # if nothing has changed, skip the actual Manifest |
235 |
> # verification |
236 |
> if not local_state_unchanged: |
237 |
> - out.ebegin('Verifying %s' % (self.repo.location,)) |
238 |
> + out.ebegin('Verifying %s' % (download_dir,)) |
239 |
> m.assert_directory_verifies() |
240 |
> out.eend(0) |
241 |
> except GematoException as e: |
242 |
> @@ -420,12 +482,16 @@ class RsyncSync(NewBase): |
243 |
> level=logging.ERROR, noiselevel=-1) |
244 |
> exitcode = 1 |
245 |
> |
246 |
> + if exitcode == 0 and not local_state_unchanged: |
247 |
> + exitcode = self._commit_download(download_dir) |
248 |
> + |
249 |
> return (exitcode, updatecache_flg) |
250 |
> finally: |
251 |
> + if exitcode == 0: |
252 |
> + self._remove_download(download_dir) |
253 |
> if openpgp_env is not None: |
254 |
> openpgp_env.close() |
255 |
> |
256 |
> - |
257 |
> def _process_exitcode(self, exitcode, syncuri, out, maxretries): |
258 |
> if (exitcode==0): |
259 |
> pass |
260 |
> @@ -561,7 +627,7 @@ class RsyncSync(NewBase): |
261 |
> return rsync_opts |
262 |
> |
263 |
> |
264 |
> - def _do_rsync(self, syncuri, timestamp, opts): |
265 |
> + def _do_rsync(self, syncuri, timestamp, opts, download_dir): |
266 |
> updatecache_flg = False |
267 |
> is_synced = False |
268 |
> if timestamp != 0 and "--quiet" not in opts: |
269 |
> @@ -686,6 +752,12 @@ class RsyncSync(NewBase): |
270 |
> elif (servertimestamp == 0) or (servertimestamp > timestamp): |
271 |
> # actual sync |
272 |
> command = rsynccommand[:] |
273 |
> + |
274 |
> + if self.repo.location != download_dir: |
275 |
> + # Use shared hardlinks for files that are identical |
276 |
> + # in the previous snapshot of the repository. |
277 |
> + command.append('--link-dest=%s' % self.repo.location) |
278 |
> + |
279 |
> submodule_paths = self._get_submodule_paths() |
280 |
> if submodule_paths: |
281 |
> # The only way to select multiple directories to |
282 |
> @@ -696,9 +768,10 @@ class RsyncSync(NewBase): |
283 |
> # /./ is special syntax supported with the |
284 |
> # rsync --relative option. |
285 |
> command.append(syncuri + "/./" + path) |
286 |
> - command.append(self.repo.location) |
287 |
> else: |
288 |
> - command.extend([syncuri + "/", self.repo.location]) |
289 |
> + command.append(syncuri + "/") |
290 |
> + |
291 |
> + command.append(download_dir) |
292 |
> |
293 |
> exitcode = None |
294 |
> try: |
295 |
> -- |
296 |
> 2.13.6 |
297 |
> |
298 |
> |
299 |
|
300 |
-- |
301 |
Fabian Groffen |
302 |
Gentoo on a different level |