1 |
On Sun, 5 Apr 2015 12:08:31 +0200 |
2 |
Michał Górny <mgorny@g.o> wrote: |
3 |
|
4 |
> The squashdelta module provides syncing via SquashFS snapshots. For |
5 |
> the initial sync, a complete snapshot is fetched and placed in |
6 |
> /var/cache/portage/squashfs. On subsequent sync operations, deltas are |
7 |
> fetched from the mirror and used to reconstruct the newest snapshot. |
8 |
> |
9 |
> The distfile fetching logic is reused to fetch the remote files |
10 |
> and verify their checksums. Additionally, the sha512sum.txt file |
11 |
> should be OpenPGP-verified after fetching but this is currently |
12 |
> unimplemented. |
13 |
> |
14 |
> After fetching, Portage tries to (re-)mount the SquashFS in repository |
15 |
> location. |
16 |
> --- |
17 |
> cnf/repos.conf | 4 + |
18 |
> pym/portage/sync/modules/squashdelta/README | 124 |
19 |
> +++++++++++++ pym/portage/sync/modules/squashdelta/__init__.py | |
20 |
> 37 ++++ .../sync/modules/squashdelta/squashdelta.py | 192 |
21 |
> +++++++++++++++++++++ 4 files changed, 357 insertions(+) |
22 |
> create mode 100644 pym/portage/sync/modules/squashdelta/README |
23 |
> create mode 100644 pym/portage/sync/modules/squashdelta/__init__.py |
24 |
> create mode 100644 |
25 |
> pym/portage/sync/modules/squashdelta/squashdelta.py |
26 |
> |
27 |
> diff --git a/cnf/repos.conf b/cnf/repos.conf |
28 |
> index 1ca98ca..062fc0d 100644 |
29 |
> --- a/cnf/repos.conf |
30 |
> +++ b/cnf/repos.conf |
31 |
> @@ -6,3 +6,7 @@ location = /usr/portage |
32 |
> sync-type = rsync |
33 |
> sync-uri = rsync://rsync.gentoo.org/gentoo-portage |
34 |
> auto-sync = yes |
35 |
> + |
36 |
> +# for daily squashfs snapshots |
37 |
> +#sync-type = squashdelta |
38 |
> +#sync-uri = mirror://gentoo/../snapshots/squashfs |
39 |
> |
40 |
|
41 |
<snip> |
42 |
|
43 |
> diff --git a/pym/portage/sync/modules/squashdelta/__init__.py |
44 |
> b/pym/portage/sync/modules/squashdelta/__init__.py new file mode |
45 |
> 100644 index 0000000..1a17dea |
46 |
> --- /dev/null |
47 |
> +++ b/pym/portage/sync/modules/squashdelta/__init__.py |
48 |
> @@ -0,0 +1,37 @@ |
49 |
> +# vim:fileencoding=utf-8:noet |
50 |
> +# (c) 2015 Michał Górny <mgorny@g.o> |
51 |
> +# Distributed under the terms of the GNU General Public License v2 |
52 |
> + |
53 |
> +from portage.sync.config_checks import CheckSyncConfig |
54 |
> + |
55 |
> + |
56 |
> +DEFAULT_CACHE_LOCATION = '/var/cache/portage/squashfs' |
57 |
> + |
58 |
> + |
59 |
> +class CheckSquashDeltaConfig(CheckSyncConfig): |
60 |
> + def __init__(self, repo, logger): |
61 |
> + CheckSyncConfig.__init__(self, repo, logger) |
62 |
> + self.checks.append('check_cache_location') |
63 |
> + |
64 |
> + def check_cache_location(self): |
65 |
> + # TODO: make it configurable when Portage is fixed |
66 |
> to support |
67 |
> + # arbitrary config variables |
68 |
> + pass |
69 |
> + |
70 |
> + |
71 |
> +module_spec = { |
72 |
> + 'name': 'squashdelta', |
73 |
> + 'description': 'Syncing SquashFS images using SquashDeltas', |
74 |
> + 'provides': { |
75 |
> + 'squashdelta-module': { |
76 |
> + 'name': "squashdelta", |
77 |
> + 'class': "SquashDeltaSync", |
78 |
> + 'description': 'Syncing SquashFS images |
79 |
> using SquashDeltas', |
80 |
> + 'functions': ['sync', 'new', 'exists'], |
81 |
> + 'func_desc': { |
82 |
> + 'sync': 'Performs the sync of the |
83 |
> repository', |
84 |
> + }, |
85 |
> + 'validate_config': CheckSquashDeltaConfig, |
86 |
> + } |
87 |
> + } |
88 |
> +} |
89 |
> diff --git a/pym/portage/sync/modules/squashdelta/squashdelta.py |
90 |
> b/pym/portage/sync/modules/squashdelta/squashdelta.py new file mode |
91 |
> 100644 index 0000000..a0dfc46 |
92 |
> --- /dev/null |
93 |
> +++ b/pym/portage/sync/modules/squashdelta/squashdelta.py |
94 |
> @@ -0,0 +1,192 @@ |
95 |
> +# vim:fileencoding=utf-8:noet |
96 |
> +# (c) 2015 Michał Górny <mgorny@g.o> |
97 |
> +# Distributed under the terms of the GNU General Public License v2 |
98 |
> + |
99 |
> +import errno |
100 |
> +import io |
101 |
> +import logging |
102 |
> +import os |
103 |
> +import os.path |
104 |
> +import re |
105 |
> + |
106 |
> +import portage |
107 |
> +from portage.package.ebuild.fetch import fetch |
108 |
> +from portage.sync.syncbase import SyncBase |
109 |
> + |
110 |
> +from . import DEFAULT_CACHE_LOCATION |
111 |
> + |
112 |
> + |
113 |
> +class SquashDeltaSync(SyncBase): |
114 |
|
115 |
|
116 |
OK, I see a small mistake. You are subclassing SyncBase which does not |
117 |
stub out a new() and you do not define one here. But you export a new() |
118 |
in the module-spec above. |
119 |
|
120 |
|
121 |
> + short_desc = "Repository syncing using SquashFS deltas" |
122 |
> + |
123 |
> + @staticmethod |
124 |
> + def name(): |
125 |
> + return "SquashDeltaSync" |
126 |
> + |
127 |
> + def __init__(self): |
128 |
> + super(SquashDeltaSync, self).__init__( |
129 |
> + 'squashmerge', |
130 |
> 'dev-util/squashmerge') + |
131 |
> + def sync(self, **kwargs): |
132 |
> + self._kwargs(kwargs) |
133 |
> + my_settings = portage.config(clone = self.settings) |
134 |
> + cache_location = DEFAULT_CACHE_LOCATION |
135 |
> + |
136 |
> + # override fetching location |
137 |
> + my_settings['DISTDIR'] = cache_location |
138 |
> + |
139 |
> + # make sure we append paths correctly |
140 |
> + base_uri = self.repo.sync_uri |
141 |
> + if not base_uri.endswith('/'): |
142 |
> + base_uri += '/' |
143 |
> + |
144 |
> + def my_fetch(fn, **kwargs): |
145 |
> + kwargs['try_mirrors'] = 0 |
146 |
> + return fetch([base_uri + fn], my_settings, |
147 |
> **kwargs) + |
148 |
> + # fetch sha512sum.txt |
149 |
> + sha512_path = os.path.join(cache_location, |
150 |
> 'sha512sum.txt') |
151 |
> + try: |
152 |
> + os.unlink(sha512_path) |
153 |
> + except OSError: |
154 |
> + pass |
155 |
> + if not my_fetch('sha512sum.txt'): |
156 |
> + return (1, False) |
157 |
> + |
158 |
> + if 'webrsync-gpg' in my_settings.features: |
159 |
> + # TODO: GPG signature verification |
160 |
> + pass |
161 |
> + |
162 |
> + # sha512sum.txt parsing |
163 |
> + with io.open(sha512_path, 'r', encoding='utf8') as f: |
164 |
> + data = f.readlines() |
165 |
> + |
166 |
> + repo_re = re.compile(self.repo.name + '-(.*)$') |
167 |
> + # current tag |
168 |
> + current_re = re.compile('current:', re.IGNORECASE) |
169 |
> + # checksum |
170 |
> + checksum_re = re.compile('^([a-f0-9]{128})\s+(.*)$', |
171 |
> re.IGNORECASE) + |
172 |
> + def iter_snapshots(lines): |
173 |
> + for l in lines: |
174 |
> + m = current_re.search(l) |
175 |
> + if m: |
176 |
> + for s in l[m.end():].split(): |
177 |
> + yield s |
178 |
> + |
179 |
> + def iter_checksums(lines): |
180 |
> + for l in lines: |
181 |
> + m = checksum_re.match(l) |
182 |
> + if m: |
183 |
> + yield (m.group(2), { |
184 |
> + 'size': None, |
185 |
> + 'SHA512': m.group(1), |
186 |
> + }) |
187 |
> + |
188 |
> + # look for current indicator |
189 |
> + for s in iter_snapshots(data): |
190 |
> + m = repo_re.match(s) |
191 |
> + if m: |
192 |
> + new_snapshot = m.group(0) + '.sqfs' |
193 |
> + new_version = m.group(1) |
194 |
> + break |
195 |
> + else: |
196 |
> + logging.error('Unable to find current |
197 |
> snapshot in sha512sum.txt') |
198 |
> + return (1, False) |
199 |
> + new_path = os.path.join(cache_location, new_snapshot) |
200 |
> + |
201 |
> + # get digests |
202 |
> + my_digests = dict(iter_checksums(data)) |
203 |
> + |
204 |
> + # try to find a local snapshot |
205 |
> + old_version = None |
206 |
> + current_path = os.path.join(cache_location, |
207 |
> + self.repo.name + '-current.sqfs') |
208 |
> + try: |
209 |
> + old_snapshot = os.readlink(current_path) |
210 |
> + except OSError: |
211 |
> + pass |
212 |
> + else: |
213 |
> + m = repo_re.match(old_snapshot) |
214 |
> + if m and old_snapshot.endswith('.sqfs'): |
215 |
> + old_version = m.group(1)[:-5] |
216 |
> + old_path = |
217 |
> os.path.join(cache_location, old_snapshot) + |
218 |
> + if old_version is not None: |
219 |
> + if old_version == new_version: |
220 |
> + logging.info('Snapshot up-to-date, |
221 |
> verifying integrity.') |
222 |
> + else: |
223 |
> + # attempt to update |
224 |
> + delta_path = None |
225 |
> + expected_delta = '%s-%s-%s.sqdelta' |
226 |
> % ( |
227 |
> + self.repo.name, |
228 |
> old_version, new_version) |
229 |
> + if expected_delta not in my_digests: |
230 |
> + logging.warning('No delta |
231 |
> for %s->%s, fetching new snapshot.' |
232 |
> + % |
233 |
> (old_version, new_version)) |
234 |
> + else: |
235 |
> + delta_path = |
236 |
> os.path.join(cache_location, expected_delta) + |
237 |
> + if not |
238 |
> my_fetch(expected_delta, digests = my_digests): |
239 |
> + return (4, False) |
240 |
> + if not self.has_bin: |
241 |
> + return (5, False) |
242 |
> + |
243 |
> + ret = |
244 |
> portage.process.spawn([self.bin_command, |
245 |
> + old_path, |
246 |
> delta_path, new_path], **self.spawn_kwargs) |
247 |
> + if ret != os.EX_OK: |
248 |
> + |
249 |
> logging.error('Merging the delta failed') |
250 |
> + return (6, False) |
251 |
> + |
252 |
> + # pass-through to |
253 |
> verification and cleanup + |
254 |
> + # fetch full snapshot or verify the one we have |
255 |
> + if not my_fetch(new_snapshot, digests = my_digests): |
256 |
> + return (2, False) |
257 |
> + |
258 |
> + # create/update -current symlink |
259 |
> + # using external ln for two reasons: |
260 |
> + # 1. clean --force (unlike python's unlink+symlink) |
261 |
> + # 2. easy userpriv (otherwise we'd have to lchown()) |
262 |
> + ret = portage.process.spawn(['ln', '-s', '-f', |
263 |
> new_snapshot, current_path], |
264 |
> + **self.spawn_kwargs) |
265 |
> + if ret != os.EX_OK: |
266 |
> + logging.error('Unable to set -current |
267 |
> symlink') |
268 |
> + retrurn (3, False) |
269 |
> + |
270 |
> + # remove old snapshot |
271 |
> + if old_version is not None and old_version != |
272 |
> new_version: |
273 |
> + try: |
274 |
> + os.unlink(old_path) |
275 |
> + except OSError as e: |
276 |
> + logging.warning('Unable to unlink |
277 |
> old snapshot: ' + str(e)) |
278 |
> + if delta_path is not None: |
279 |
> + try: |
280 |
> + os.unlink(delta_path) |
281 |
> + except OSError as e: |
282 |
> + logging.warning('Unable to |
283 |
> unlink old delta: ' + str(e)) |
284 |
> + try: |
285 |
> + os.unlink(sha512_path) |
286 |
> + except OSError as e: |
287 |
> + logging.warning('Unable to unlink |
288 |
> sha512sum.txt: ' + str(e)) + |
289 |
> + mount_cmd = ['mount', current_path, |
290 |
> self.repo.location] |
291 |
> + can_mount = True |
292 |
> + if os.path.ismount(self.repo.location): |
293 |
> + # need to umount old snapshot |
294 |
> + ret = portage.process.spawn(['umount', '-l', |
295 |
> self.repo.location]) |
296 |
> + if ret != os.EX_OK: |
297 |
> + logging.warning('Unable to unmount |
298 |
> old SquashFS after update') |
299 |
> + can_mount = False |
300 |
> + else: |
301 |
> + try: |
302 |
> + os.makedirs(self.repo.location) |
303 |
> + except OSError as e: |
304 |
> + if e.errno != errno.EEXIST: |
305 |
> + raise |
306 |
> + |
307 |
> + if can_mount: |
308 |
> + ret = portage.process.spawn(mount_cmd) |
309 |
> + if ret != os.EX_OK: |
310 |
> + logging.warning('Unable to |
311 |
> (re-)mount SquashFS after update') + |
312 |
> + return (0, True) |
313 |
|
314 |
Overall the code itself looks decent. Aside from the small mistake |
315 |
mentioned inline, my only concern is the sheer size of the sync(). It |
316 |
is 162 lines and embeds 2 private functions. This code could easily be |
317 |
broken up into several smaller task functions. It would make reading |
318 |
the main sync() logic easier as well as the smaller task sections. I |
319 |
am not a fan of the long winded functions and scripts present in |
320 |
portage (this by no means is in the same category as many of those). |
321 |
But I certainly don't want to let more of that in if I can help it. And |
322 |
aim to reduce it while I'm the lead. |
323 |
|
324 |
|
325 |
Ok, so the only data variable you wanted to add to the repos.conf was |
326 |
the cache location? |
327 |
|
328 |
I'll work on adding the gkeys integration in the gkeys branch I started |
329 |
for the gpg verification. I see no point in porting the code from |
330 |
emerge-webrsync's bash to python only to be replaced by gkeys in the |
331 |
very near future. Please stub out a function & call for it when you |
332 |
address the above issues. I'll fill in the code for it. |
333 |
|
334 |
-- |
335 |
Brian Dolbec <dolsen> |