1 |
The squashdelta module provides syncing via SquashFS snapshots. For the |
2 |
initial sync, a complete snapshot is fetched and placed in |
3 |
/var/cache/portage/squashfs. On subsequent sync operations, deltas are |
4 |
fetched from the mirror and used to reconstruct the newest snapshot. |
5 |
|
6 |
The distfile fetching logic is reused to fetch the remote files |
7 |
and verify their checksums. Additionally, the sha512sum.txt file should |
8 |
be OpenPGP-verified after fetching but this is currently unimplemented. |
9 |
|
10 |
After fetching, Portage tries to (re-)mount the SquashFS in repository |
11 |
location. |
12 |
--- |
13 |
cnf/repos.conf | 4 + |
14 |
pym/portage/sync/modules/squashdelta/README | 124 +++++++++++ |
15 |
pym/portage/sync/modules/squashdelta/__init__.py | 37 ++++ |
16 |
.../sync/modules/squashdelta/squashdelta.py | 231 +++++++++++++++++++++ |
17 |
4 files changed, 396 insertions(+) |
18 |
create mode 100644 pym/portage/sync/modules/squashdelta/README |
19 |
create mode 100644 pym/portage/sync/modules/squashdelta/__init__.py |
20 |
create mode 100644 pym/portage/sync/modules/squashdelta/squashdelta.py |
21 |
|
22 |
diff --git a/cnf/repos.conf b/cnf/repos.conf |
23 |
index 1ca98ca..062fc0d 100644 |
24 |
--- a/cnf/repos.conf |
25 |
+++ b/cnf/repos.conf |
26 |
@@ -6,3 +6,7 @@ location = /usr/portage |
27 |
sync-type = rsync |
28 |
sync-uri = rsync://rsync.gentoo.org/gentoo-portage |
29 |
auto-sync = yes |
30 |
+ |
31 |
+# for daily squashfs snapshots |
32 |
+#sync-type = squashdelta |
33 |
+#sync-uri = mirror://gentoo/../snapshots/squashfs |
34 |
diff --git a/pym/portage/sync/modules/squashdelta/README b/pym/portage/sync/modules/squashdelta/README |
35 |
new file mode 100644 |
36 |
index 0000000..994ae6d |
37 |
--- /dev/null |
38 |
+++ b/pym/portage/sync/modules/squashdelta/README |
39 |
@@ -0,0 +1,124 @@ |
40 |
+================== |
41 |
+ squashdelta-sync |
42 |
+================== |
43 |
+ |
44 |
+Introduction |
45 |
+============ |
46 |
+ |
47 |
+Squashdelta-sync provides the squashfs syncing module for Portage. |
48 |
+When used as sync-type for the repository, it fetches the complete |
49 |
+repository snapshot on initial sync, and then uses squashdeltas to |
50 |
+efficiently update it. |
51 |
+ |
52 |
+While initially intended for the daily snapshot of the Gentoo |
53 |
+repository, the module is designed with flexibility in mind. It can be |
54 |
+used to sync any repository, without enforcing any specific snapshotting |
55 |
+interval or versioning rules. However, each snapshot version identifier |
56 |
+must be unique in the scope of repository. |
57 |
+ |
58 |
+ |
59 |
+Technical hosting details |
60 |
+========================= |
61 |
+ |
62 |
+The snapshot hosting needs to provide the following files: |
63 |
+ |
64 |
+1. the current (newest) full SquashFS snapshot of the repository, |
65 |
+ and optionally M past snapshots, |
66 |
+ |
67 |
+2. the deltas from N past snapshots to the current snapshot, |
68 |
+ |
69 |
+3. a ``sha512sum.txt`` file containing SHA-512 checksums of all hosted |
70 |
+ files, optionally OpenPGP-signed. |
71 |
+ |
72 |
+The following naming schemes are used for the snapshots and deltas, |
73 |
+respectively:: |
74 |
+ |
75 |
+ ${repo_name}-${version}.sqfs |
76 |
+ ${repo_name}-${old_version}-${new_version}.sqdelta |
77 |
+ |
78 |
+where: |
79 |
+ |
80 |
+* ``${repo_name}`` is the repository name (as specified |
81 |
+ in ``repos.conf``), |
82 |
+* ``${version}`` specifies the snapshot version, |
83 |
+* ``${old_version}`` specifies the snapshot version which the delta |
84 |
+ updates from, |
85 |
+* ``${new_version}`` specifies the snapshot version which the delta |
86 |
+ updates to. |
87 |
+ |
88 |
+Version can be an arbitrary string. It does not need to be incremental, |
89 |
+however each version must be unique in the repository scope. |
90 |
+For example, the version can be a date, a revision number or a commit |
91 |
+hash. |
92 |
+ |
93 |
+The ``sha512sum.txt`` uses the format used by the GNU coreutils |
94 |
+``sha512sum`` program. That is, it contains one or more lines consisting |
95 |
+of hexadecimal SHA-512 checksum followed by whitespace, followed by |
96 |
+a filename. Lines not matching that format should be ignored. |
97 |
+ |
98 |
+Optionally, the ``sha512sum.txt`` may be OpenPGP-signed. In that case, |
99 |
+the file conforms to the ASCII-armored OpenPGP message format, with |
100 |
+the checksums being stored in the message body. |
101 |
+ |
102 |
+Additionally, the ``sha512sum.txt`` needs to contain an additional line |
103 |
+containing the following string:: |
104 |
+ |
105 |
+ Current: ${repo_name}-${version} |
106 |
+ |
107 |
+Stating the current (newest) snapshot version. If snapshots for multiple |
108 |
+repositories are provided in the same directory (using the same |
109 |
+``sha512sum.txt`` file), this line can occur multiple times or list |
110 |
+multiple snapshots, whitespace-separated. In order not to introduce |
111 |
+stray lines in the file, it is recommended to embed this information |
112 |
+in the OpenPGP comment field. |
113 |
+ |
114 |
+An example script generating daily deltas for a repository can be found |
115 |
+in squashdelta-daily-gen_ repository. |
116 |
+ |
117 |
+.. _squashdelta-daily-gen: https://bitbucket.org/mgorny/squashdelta-daily-gen |
118 |
+ |
119 |
+ |
120 |
+Technical syncing details |
121 |
+========================= |
122 |
+ |
123 |
+When performing a sync, the script first fetches the ``sha512sum.txt`` |
124 |
+and processes it in order to determine the list of files available |
125 |
+on the mirror. It should be noted that the script will never use |
126 |
+a snapshot or delta that is not listed there. If the file is |
127 |
+OpenPGP-signed, the signature is verified. |
128 |
+ |
129 |
+The script scans scans the ``sha512sum.txt`` for a line containing |
130 |
+the following string (case-insensitive):: |
131 |
+ |
132 |
+ Current: |
133 |
+ |
134 |
+The text following this string is split on spaces, and the resulting |
135 |
+tokens are parsed as snapshot names. The one matching the current |
136 |
+repository name is used to determine the current (newest) snapshot |
137 |
+version. |
138 |
+ |
139 |
+Afterwards, the script scans the local cache directory for the following |
140 |
+symlink:: |
141 |
+ |
142 |
+ ${repo_name}-current.sqfs |
143 |
+ |
144 |
+If the symlink exists, the file pointed by it is assumed to be |
145 |
+the current (newest) local snapshot. Otherwise, the script assumes |
146 |
+initial sync. |
147 |
+ |
148 |
+On initial sync, the script fetches the newest snapshot from mirror |
149 |
+and places it inside cache directory. The snapshot checksum is verified |
150 |
+using ``sha512sum.txt`` and ``${repo_name}-current.sqfs`` symlink is |
151 |
+created. |
152 |
+ |
153 |
+On update, the script scans the file list for a delta transforming |
154 |
+the current local snapshot to the newest remote snapshot. If such |
155 |
+a delta is found, it is fetched, verified and applied to obtain |
156 |
+the new snapshot. Afterwards, the resulting snapshot checksum is |
157 |
+verified and the ``${repo_name}-current.sqfs`` symlink is updated. |
158 |
+ |
159 |
+If no delta matches the version pair, it is assumed that the system is |
160 |
+outdated beyond available deltas and a new snapshot is fetched instead |
161 |
+(alike initial sync). |
162 |
+ |
163 |
+.. vim:ft=rst |
164 |
diff --git a/pym/portage/sync/modules/squashdelta/__init__.py b/pym/portage/sync/modules/squashdelta/__init__.py |
165 |
new file mode 100644 |
166 |
index 0000000..680835c |
167 |
--- /dev/null |
168 |
+++ b/pym/portage/sync/modules/squashdelta/__init__.py |
169 |
@@ -0,0 +1,37 @@ |
170 |
+# vim:fileencoding=utf-8:noet |
171 |
+# (c) 2015 Michał Górny <mgorny@g.o> |
172 |
+# Distributed under the terms of the GNU General Public License v2 |
173 |
+ |
174 |
+from portage.sync.config_checks import CheckSyncConfig |
175 |
+ |
176 |
+ |
177 |
+DEFAULT_CACHE_LOCATION = '/var/cache/portage/squashfs' |
178 |
+ |
179 |
+ |
180 |
+class CheckSquashDeltaConfig(CheckSyncConfig): |
181 |
+ def __init__(self, repo, logger): |
182 |
+ CheckSyncConfig.__init__(self, repo, logger) |
183 |
+ self.checks.append('check_cache_location') |
184 |
+ |
185 |
+ def check_cache_location(self): |
186 |
+ # TODO: make it configurable when Portage is fixed to support |
187 |
+ # arbitrary config variables |
188 |
+ pass |
189 |
+ |
190 |
+ |
191 |
+module_spec = { |
192 |
+ 'name': 'squashdelta', |
193 |
+ 'description': 'Syncing SquashFS images using SquashDeltas', |
194 |
+ 'provides': { |
195 |
+ 'squashdelta-module': { |
196 |
+ 'name': "squashdelta", |
197 |
+ 'class': "SquashDeltaSync", |
198 |
+ 'description': 'Syncing SquashFS images using SquashDeltas', |
199 |
+ 'functions': ['sync'], |
200 |
+ 'func_desc': { |
201 |
+ 'sync': 'Performs the sync of the repository', |
202 |
+ }, |
203 |
+ 'validate_config': CheckSquashDeltaConfig, |
204 |
+ } |
205 |
+ } |
206 |
+} |
207 |
diff --git a/pym/portage/sync/modules/squashdelta/squashdelta.py b/pym/portage/sync/modules/squashdelta/squashdelta.py |
208 |
new file mode 100644 |
209 |
index 0000000..796a5f0 |
210 |
--- /dev/null |
211 |
+++ b/pym/portage/sync/modules/squashdelta/squashdelta.py |
212 |
@@ -0,0 +1,231 @@ |
213 |
+# vim:fileencoding=utf-8:noet |
214 |
+# (c) 2015 Michał Górny <mgorny@g.o> |
215 |
+# Distributed under the terms of the GNU General Public License v2 |
216 |
+ |
217 |
+import errno |
218 |
+import io |
219 |
+import logging |
220 |
+import os |
221 |
+import os.path |
222 |
+import re |
223 |
+ |
224 |
+import portage |
225 |
+from portage.package.ebuild.fetch import fetch |
226 |
+from portage.sync.syncbase import SyncBase |
227 |
+ |
228 |
+from . import DEFAULT_CACHE_LOCATION |
229 |
+ |
230 |
+ |
231 |
+class SquashDeltaError(Exception): |
232 |
+ pass |
233 |
+ |
234 |
+ |
235 |
+class SquashDeltaSync(SyncBase): |
236 |
+ short_desc = "Repository syncing using SquashFS deltas" |
237 |
+ |
238 |
+ @staticmethod |
239 |
+ def name(): |
240 |
+ return "SquashDeltaSync" |
241 |
+ |
242 |
+ def __init__(self): |
243 |
+ super(SquashDeltaSync, self).__init__( |
244 |
+ 'squashmerge', 'dev-util/squashmerge') |
245 |
+ self.repo_re = re.compile(self.repo.name + '-(.*)$') |
246 |
+ |
247 |
+ def _configure(self): |
248 |
+ self.my_settings = portage.config(clone = self.settings) |
249 |
+ self.cache_location = DEFAULT_CACHE_LOCATION |
250 |
+ |
251 |
+ # override fetching location |
252 |
+ self.my_settings['DISTDIR'] = self.cache_location |
253 |
+ |
254 |
+ # make sure we append paths correctly |
255 |
+ self.base_uri = self.repo.sync_uri |
256 |
+ if not self.base_uri.endswith('/'): |
257 |
+ self.base_uri += '/' |
258 |
+ |
259 |
+ def _fetch(self, fn, **kwargs): |
260 |
+ # disable implicit mirrors support since it relies on file |
261 |
+ # being in distfiles/ |
262 |
+ kwargs['try_mirrors'] = 0 |
263 |
+ if not fetch([self.base_uri + fn], self.my_settings, **kwargs): |
264 |
+ raise SquashDeltaError() |
265 |
+ |
266 |
+ def _openpgp_verify(self, data): |
267 |
+ if 'webrsync-gpg' in self.my_settings.features: |
268 |
+ # TODO: OpenPGP signature verification |
269 |
+ # raise SquashDeltaError if it fails |
270 |
+ pass |
271 |
+ |
272 |
+ def _parse_sha512sum(self, path): |
273 |
+ # sha512sum.txt parsing |
274 |
+ with io.open(path, 'r', encoding='utf8') as f: |
275 |
+ data = f.readlines() |
276 |
+ |
277 |
+ if not self._openpgp_verify(data): |
278 |
+ logging.error('OpenPGP verification failed for sha512sum.txt') |
279 |
+ raise SquashDeltaError() |
280 |
+ |
281 |
+ # current tag |
282 |
+ current_re = re.compile('current:', re.IGNORECASE) |
283 |
+ # checksum |
284 |
+ checksum_re = re.compile('^([a-f0-9]{128})\s+(.*)$', re.IGNORECASE) |
285 |
+ |
286 |
+ def iter_snapshots(lines): |
287 |
+ for l in lines: |
288 |
+ m = current_re.search(l) |
289 |
+ if m: |
290 |
+ for s in l[m.end():].split(): |
291 |
+ yield s |
292 |
+ |
293 |
+ def iter_checksums(lines): |
294 |
+ for l in lines: |
295 |
+ m = checksum_re.match(l) |
296 |
+ if m: |
297 |
+ yield (m.group(2), { |
298 |
+ 'size': None, |
299 |
+ 'SHA512': m.group(1), |
300 |
+ }) |
301 |
+ |
302 |
+ return (iter_snapshots(data), dict(iter_checksums(data))) |
303 |
+ |
304 |
+ def _find_newest_snapshot(self, snapshots): |
305 |
+ # look for current indicator |
306 |
+ for s in snapshots: |
307 |
+ m = self.repo_re.match(s) |
308 |
+ if m: |
309 |
+ new_snapshot = m.group(0) + '.sqfs' |
310 |
+ new_version = m.group(1) |
311 |
+ break |
312 |
+ else: |
313 |
+ logging.error('Unable to find current snapshot in sha512sum.txt') |
314 |
+ raise SquashDeltaError() |
315 |
+ |
316 |
+ new_path = os.path.join(self.cache_location, new_snapshot) |
317 |
+ return (new_snapshot, new_version, new_path) |
318 |
+ |
319 |
+ def _find_local_snapshot(self, current_path): |
320 |
+ # try to find a local snapshot |
321 |
+ try: |
322 |
+ old_snapshot = os.readlink(current_path) |
323 |
+ except OSError: |
324 |
+ return ('', '', '') |
325 |
+ else: |
326 |
+ m = self.repo_re.match(old_snapshot) |
327 |
+ if m and old_snapshot.endswith('.sqfs'): |
328 |
+ old_version = m.group(1)[:-5] |
329 |
+ old_path = os.path.join(self.cache_location, old_snapshot) |
330 |
+ |
331 |
+ return (old_snapshot, old_version, old_path) |
332 |
+ |
333 |
+ def _try_delta(self, old_version, new_version, old_path, new_path, my_digests): |
334 |
+ # attempt to update |
335 |
+ delta_path = None |
336 |
+ expected_delta = '%s-%s-%s.sqdelta' % ( |
337 |
+ self.repo.name, old_version, new_version) |
338 |
+ if expected_delta not in my_digests: |
339 |
+ logging.warning('No delta for %s->%s, fetching new snapshot.' |
340 |
+ % (old_version, new_version)) |
341 |
+ else: |
342 |
+ delta_path = os.path.join(self.cache_location, expected_delta) |
343 |
+ |
344 |
+ if not self._fetch(expected_delta, digests = my_digests): |
345 |
+ raise SquashDeltaError() |
346 |
+ if not self.has_bin: |
347 |
+ raise SquashDeltaError() |
348 |
+ |
349 |
+ ret = portage.process.spawn([self.bin_command, |
350 |
+ old_path, delta_path, new_path], **self.spawn_kwargs) |
351 |
+ if ret != os.EX_OK: |
352 |
+ logging.error('Merging the delta failed') |
353 |
+ raise SquashDeltaError() |
354 |
+ return delta_path |
355 |
+ |
356 |
+ def _update_symlink(self, new_snapshot, current_path): |
357 |
+ # using external ln for two reasons: |
358 |
+ # 1. clean --force (unlike python's unlink+symlink) |
359 |
+ # 2. easy userpriv (otherwise we'd have to lchown()) |
360 |
+ ret = portage.process.spawn(['ln', '-s', '-f', new_snapshot, current_path], |
361 |
+ **self.spawn_kwargs) |
362 |
+ if ret != os.EX_OK: |
363 |
+ logging.error('Unable to set -current symlink') |
364 |
+ raise SquashDeltaError() |
365 |
+ |
366 |
+ def _cleanup(self, path): |
367 |
+ try: |
368 |
+ os.unlink(path) |
369 |
+ except OSError as e: |
370 |
+ logging.warning('Unable to clean up ' + path + ': ' + str(e)) |
371 |
+ |
372 |
+ def _update_mount(self, current_path): |
373 |
+ mount_cmd = ['mount', current_path, self.repo.location] |
374 |
+ can_mount = True |
375 |
+ if os.path.ismount(self.repo.location): |
376 |
+ # need to umount old snapshot |
377 |
+ ret = portage.process.spawn(['umount', '-l', self.repo.location]) |
378 |
+ if ret != os.EX_OK: |
379 |
+ logging.warning('Unable to unmount old SquashFS after update') |
380 |
+ can_mount = False |
381 |
+ else: |
382 |
+ try: |
383 |
+ os.makedirs(self.repo.location) |
384 |
+ except OSError as e: |
385 |
+ if e.errno != errno.EEXIST: |
386 |
+ raise |
387 |
+ |
388 |
+ if can_mount: |
389 |
+ ret = portage.process.spawn(mount_cmd) |
390 |
+ if ret != os.EX_OK: |
391 |
+ logging.warning('Unable to (re-)mount SquashFS after update') |
392 |
+ |
393 |
+ def sync(self, **kwargs): |
394 |
+ self._kwargs(kwargs) |
395 |
+ |
396 |
+ try: |
397 |
+ self._configure() |
398 |
+ |
399 |
+ # fetch sha512sum.txt |
400 |
+ sha512_path = os.path.join(self.cache_location, 'sha512sum.txt') |
401 |
+ try: |
402 |
+ os.unlink(sha512_path) |
403 |
+ except OSError as e: |
404 |
+ if e.errno != errno.ENOENT: |
405 |
+ logging.error('Unable to unlink sha512sum.txt') |
406 |
+ return (1, False) |
407 |
+ self._fetch('sha512sum.txt') |
408 |
+ |
409 |
+ snapshots, my_digests = self._parse_sha512sum(sha512_path) |
410 |
+ |
411 |
+ current_path = os.path.join(self.cache_location, |
412 |
+ self.repo.name + '-current.sqfs') |
413 |
+ new_snapshot, new_version, new_path = ( |
414 |
+ self._find_newest_snapshot(snapshots)) |
415 |
+ old_snapshot, old_version, old_path = ( |
416 |
+ self._find_local_snapshot(current_path)) |
417 |
+ |
418 |
+ if old_version: |
419 |
+ if old_version == new_version: |
420 |
+ logging.info('Snapshot up-to-date, verifying integrity.') |
421 |
+ else: |
422 |
+ delta_path = self._try_delta(old_version, new_version, |
423 |
+ old_path, new_path, my_digests) |
424 |
+ # pass-through to verification and cleanup |
425 |
+ |
426 |
+ # fetch full snapshot or verify the one we have |
427 |
+ self._fetch(new_snapshot, digests = my_digests) |
428 |
+ |
429 |
+ # create/update -current symlink |
430 |
+ self._update_symlink(new_snapshot, current_path) |
431 |
+ |
432 |
+ # remove old snapshot |
433 |
+ if old_version is not None and old_version != new_version: |
434 |
+ self._cleanup(old_path) |
435 |
+ if delta_path is not None: |
436 |
+ self._cleanup(delta_path) |
437 |
+ self._cleanup(sha512_path) |
438 |
+ |
439 |
+ self._update_mount(current_path) |
440 |
+ |
441 |
+ return (0, True) |
442 |
+ except SquashDeltaError: |
443 |
+ return (1, False) |
444 |
-- |
445 |
2.3.5 |