1 |
Dnia 2015-04-16, o godz. 10:38:22 |
2 |
Brian Dolbec <dolsen@g.o> napisał(a): |
3 |
|
4 |
> On Sun, 5 Apr 2015 12:08:31 +0200 |
5 |
> Michał Górny <mgorny@g.o> wrote: |
6 |
> |
7 |
> > The squashdelta module provides syncing via SquashFS snapshots. For |
8 |
> > the initial sync, a complete snapshot is fetched and placed in |
9 |
> > /var/cache/portage/squashfs. On subsequent sync operations, deltas are |
10 |
> > fetched from the mirror and used to reconstruct the newest snapshot. |
11 |
> > |
12 |
> > The distfile fetching logic is reused to fetch the remote files |
13 |
> > and verify their checksums. Additionally, the sha512sum.txt file |
14 |
> > should be OpenPGP-verified after fetching but this is currently |
15 |
> > unimplemented. |
16 |
> > |
17 |
> > After fetching, Portage tries to (re-)mount the SquashFS in repository |
18 |
> > location. |
19 |
> > --- |
20 |
> > cnf/repos.conf | 4 + |
21 |
> > pym/portage/sync/modules/squashdelta/README | 124 |
22 |
> > +++++++++++++ pym/portage/sync/modules/squashdelta/__init__.py | |
23 |
> > 37 ++++ .../sync/modules/squashdelta/squashdelta.py | 192 |
24 |
> > +++++++++++++++++++++ 4 files changed, 357 insertions(+) |
25 |
> > create mode 100644 pym/portage/sync/modules/squashdelta/README |
26 |
> > create mode 100644 pym/portage/sync/modules/squashdelta/__init__.py |
27 |
> > create mode 100644 |
28 |
> > pym/portage/sync/modules/squashdelta/squashdelta.py |
29 |
> > |
30 |
> > diff --git a/cnf/repos.conf b/cnf/repos.conf |
31 |
> > index 1ca98ca..062fc0d 100644 |
32 |
> > --- a/cnf/repos.conf |
33 |
> > +++ b/cnf/repos.conf |
34 |
> > @@ -6,3 +6,7 @@ location = /usr/portage |
35 |
> > sync-type = rsync |
36 |
> > sync-uri = rsync://rsync.gentoo.org/gentoo-portage |
37 |
> > auto-sync = yes |
38 |
> > + |
39 |
> > +# for daily squashfs snapshots |
40 |
> > +#sync-type = squashdelta |
41 |
> > +#sync-uri = mirror://gentoo/../snapshots/squashfs |
42 |
> > |
43 |
> |
44 |
> <snip> |
45 |
> |
46 |
> > diff --git a/pym/portage/sync/modules/squashdelta/__init__.py |
47 |
> > b/pym/portage/sync/modules/squashdelta/__init__.py new file mode |
48 |
> > 100644 index 0000000..1a17dea |
49 |
> > --- /dev/null |
50 |
> > +++ b/pym/portage/sync/modules/squashdelta/__init__.py |
51 |
> > @@ -0,0 +1,37 @@ |
52 |
> > +# vim:fileencoding=utf-8:noet |
53 |
> > +# (c) 2015 Michał Górny <mgorny@g.o> |
54 |
> > +# Distributed under the terms of the GNU General Public License v2 |
55 |
> > + |
56 |
> > +from portage.sync.config_checks import CheckSyncConfig |
57 |
> > + |
58 |
> > + |
59 |
> > +DEFAULT_CACHE_LOCATION = '/var/cache/portage/squashfs' |
60 |
> > + |
61 |
> > + |
62 |
> > +class CheckSquashDeltaConfig(CheckSyncConfig): |
63 |
> > + def __init__(self, repo, logger): |
64 |
> > + CheckSyncConfig.__init__(self, repo, logger) |
65 |
> > + self.checks.append('check_cache_location') |
66 |
> > + |
67 |
> > + def check_cache_location(self): |
68 |
> > + # TODO: make it configurable when Portage is fixed |
69 |
> > to support |
70 |
> > + # arbitrary config variables |
71 |
> > + pass |
72 |
> > + |
73 |
> > + |
74 |
> > +module_spec = { |
75 |
> > + 'name': 'squashdelta', |
76 |
> > + 'description': 'Syncing SquashFS images using SquashDeltas', |
77 |
> > + 'provides': { |
78 |
> > + 'squashdelta-module': { |
79 |
> > + 'name': "squashdelta", |
80 |
> > + 'class': "SquashDeltaSync", |
81 |
> > + 'description': 'Syncing SquashFS images |
82 |
> > using SquashDeltas', |
83 |
> > + 'functions': ['sync', 'new', 'exists'], |
84 |
> > + 'func_desc': { |
85 |
> > + 'sync': 'Performs the sync of the |
86 |
> > repository', |
87 |
> > + }, |
88 |
> > + 'validate_config': CheckSquashDeltaConfig, |
89 |
> > + } |
90 |
> > + } |
91 |
> > +} |
92 |
> > diff --git a/pym/portage/sync/modules/squashdelta/squashdelta.py |
93 |
> > b/pym/portage/sync/modules/squashdelta/squashdelta.py new file mode |
94 |
> > 100644 index 0000000..a0dfc46 |
95 |
> > --- /dev/null |
96 |
> > +++ b/pym/portage/sync/modules/squashdelta/squashdelta.py |
97 |
> > @@ -0,0 +1,192 @@ |
98 |
> > +# vim:fileencoding=utf-8:noet |
99 |
> > +# (c) 2015 Michał Górny <mgorny@g.o> |
100 |
> > +# Distributed under the terms of the GNU General Public License v2 |
101 |
> > + |
102 |
> > +import errno |
103 |
> > +import io |
104 |
> > +import logging |
105 |
> > +import os |
106 |
> > +import os.path |
107 |
> > +import re |
108 |
> > + |
109 |
> > +import portage |
110 |
> > +from portage.package.ebuild.fetch import fetch |
111 |
> > +from portage.sync.syncbase import SyncBase |
112 |
> > + |
113 |
> > +from . import DEFAULT_CACHE_LOCATION |
114 |
> > + |
115 |
> > + |
116 |
> > +class SquashDeltaSync(SyncBase): |
117 |
> |
118 |
> |
119 |
> OK, I see a small mistake. You are subclassing SyncBase which does not |
120 |
> stub out a new() and you do not define one here. But you export a new() |
121 |
> in the module-spec above. |
122 |
|
123 |
Fixed. I removed them from [func_desc], and apparently forgot |
124 |
to do so from [functions]. |
125 |
|
126 |
> > + short_desc = "Repository syncing using SquashFS deltas" |
127 |
> > + |
128 |
> > + @staticmethod |
129 |
> > + def name(): |
130 |
> > + return "SquashDeltaSync" |
131 |
> > + |
132 |
> > + def __init__(self): |
133 |
> > + super(SquashDeltaSync, self).__init__( |
134 |
> > + 'squashmerge', |
135 |
> > 'dev-util/squashmerge') + |
136 |
> > + def sync(self, **kwargs): |
137 |
> > + self._kwargs(kwargs) |
138 |
> > + my_settings = portage.config(clone = self.settings) |
139 |
> > + cache_location = DEFAULT_CACHE_LOCATION |
140 |
> > + |
141 |
> > + # override fetching location |
142 |
> > + my_settings['DISTDIR'] = cache_location |
143 |
> > + |
144 |
> > + # make sure we append paths correctly |
145 |
> > + base_uri = self.repo.sync_uri |
146 |
> > + if not base_uri.endswith('/'): |
147 |
> > + base_uri += '/' |
148 |
> > + |
149 |
> > + def my_fetch(fn, **kwargs): |
150 |
> > + kwargs['try_mirrors'] = 0 |
151 |
> > + return fetch([base_uri + fn], my_settings, |
152 |
> > **kwargs) + |
153 |
> > + # fetch sha512sum.txt |
154 |
> > + sha512_path = os.path.join(cache_location, |
155 |
> > 'sha512sum.txt') |
156 |
> > + try: |
157 |
> > + os.unlink(sha512_path) |
158 |
> > + except OSError: |
159 |
> > + pass |
160 |
> > + if not my_fetch('sha512sum.txt'): |
161 |
> > + return (1, False) |
162 |
> > + |
163 |
> > + if 'webrsync-gpg' in my_settings.features: |
164 |
> > + # TODO: GPG signature verification |
165 |
> > + pass |
166 |
> > + |
167 |
> > + # sha512sum.txt parsing |
168 |
> > + with io.open(sha512_path, 'r', encoding='utf8') as f: |
169 |
> > + data = f.readlines() |
170 |
> > + |
171 |
> > + repo_re = re.compile(self.repo.name + '-(.*)$') |
172 |
> > + # current tag |
173 |
> > + current_re = re.compile('current:', re.IGNORECASE) |
174 |
> > + # checksum |
175 |
> > + checksum_re = re.compile('^([a-f0-9]{128})\s+(.*)$', |
176 |
> > re.IGNORECASE) + |
177 |
> > + def iter_snapshots(lines): |
178 |
> > + for l in lines: |
179 |
> > + m = current_re.search(l) |
180 |
> > + if m: |
181 |
> > + for s in l[m.end():].split(): |
182 |
> > + yield s |
183 |
> > + |
184 |
> > + def iter_checksums(lines): |
185 |
> > + for l in lines: |
186 |
> > + m = checksum_re.match(l) |
187 |
> > + if m: |
188 |
> > + yield (m.group(2), { |
189 |
> > + 'size': None, |
190 |
> > + 'SHA512': m.group(1), |
191 |
> > + }) |
192 |
> > + |
193 |
> > + # look for current indicator |
194 |
> > + for s in iter_snapshots(data): |
195 |
> > + m = repo_re.match(s) |
196 |
> > + if m: |
197 |
> > + new_snapshot = m.group(0) + '.sqfs' |
198 |
> > + new_version = m.group(1) |
199 |
> > + break |
200 |
> > + else: |
201 |
> > + logging.error('Unable to find current |
202 |
> > snapshot in sha512sum.txt') |
203 |
> > + return (1, False) |
204 |
> > + new_path = os.path.join(cache_location, new_snapshot) |
205 |
> > + |
206 |
> > + # get digests |
207 |
> > + my_digests = dict(iter_checksums(data)) |
208 |
> > + |
209 |
> > + # try to find a local snapshot |
210 |
> > + old_version = None |
211 |
> > + current_path = os.path.join(cache_location, |
212 |
> > + self.repo.name + '-current.sqfs') |
213 |
> > + try: |
214 |
> > + old_snapshot = os.readlink(current_path) |
215 |
> > + except OSError: |
216 |
> > + pass |
217 |
> > + else: |
218 |
> > + m = repo_re.match(old_snapshot) |
219 |
> > + if m and old_snapshot.endswith('.sqfs'): |
220 |
> > + old_version = m.group(1)[:-5] |
221 |
> > + old_path = |
222 |
> > os.path.join(cache_location, old_snapshot) + |
223 |
> > + if old_version is not None: |
224 |
> > + if old_version == new_version: |
225 |
> > + logging.info('Snapshot up-to-date, |
226 |
> > verifying integrity.') |
227 |
> > + else: |
228 |
> > + # attempt to update |
229 |
> > + delta_path = None |
230 |
> > + expected_delta = '%s-%s-%s.sqdelta' |
231 |
> > % ( |
232 |
> > + self.repo.name, |
233 |
> > old_version, new_version) |
234 |
> > + if expected_delta not in my_digests: |
235 |
> > + logging.warning('No delta |
236 |
> > for %s->%s, fetching new snapshot.' |
237 |
> > + % |
238 |
> > (old_version, new_version)) |
239 |
> > + else: |
240 |
> > + delta_path = |
241 |
> > os.path.join(cache_location, expected_delta) + |
242 |
> > + if not |
243 |
> > my_fetch(expected_delta, digests = my_digests): |
244 |
> > + return (4, False) |
245 |
> > + if not self.has_bin: |
246 |
> > + return (5, False) |
247 |
> > + |
248 |
> > + ret = |
249 |
> > portage.process.spawn([self.bin_command, |
250 |
> > + old_path, |
251 |
> > delta_path, new_path], **self.spawn_kwargs) |
252 |
> > + if ret != os.EX_OK: |
253 |
> > + |
254 |
> > logging.error('Merging the delta failed') |
255 |
> > + return (6, False) |
256 |
> > + |
257 |
> > + # pass-through to |
258 |
> > verification and cleanup + |
259 |
> > + # fetch full snapshot or verify the one we have |
260 |
> > + if not my_fetch(new_snapshot, digests = my_digests): |
261 |
> > + return (2, False) |
262 |
> > + |
263 |
> > + # create/update -current symlink |
264 |
> > + # using external ln for two reasons: |
265 |
> > + # 1. clean --force (unlike python's unlink+symlink) |
266 |
> > + # 2. easy userpriv (otherwise we'd have to lchown()) |
267 |
> > + ret = portage.process.spawn(['ln', '-s', '-f', |
268 |
> > new_snapshot, current_path], |
269 |
> > + **self.spawn_kwargs) |
270 |
> > + if ret != os.EX_OK: |
271 |
> > + logging.error('Unable to set -current |
272 |
> > symlink') |
273 |
> > + retrurn (3, False) |
274 |
> > + |
275 |
> > + # remove old snapshot |
276 |
> > + if old_version is not None and old_version != |
277 |
> > new_version: |
278 |
> > + try: |
279 |
> > + os.unlink(old_path) |
280 |
> > + except OSError as e: |
281 |
> > + logging.warning('Unable to unlink |
282 |
> > old snapshot: ' + str(e)) |
283 |
> > + if delta_path is not None: |
284 |
> > + try: |
285 |
> > + os.unlink(delta_path) |
286 |
> > + except OSError as e: |
287 |
> > + logging.warning('Unable to |
288 |
> > unlink old delta: ' + str(e)) |
289 |
> > + try: |
290 |
> > + os.unlink(sha512_path) |
291 |
> > + except OSError as e: |
292 |
> > + logging.warning('Unable to unlink |
293 |
> > sha512sum.txt: ' + str(e)) + |
294 |
> > + mount_cmd = ['mount', current_path, |
295 |
> > self.repo.location] |
296 |
> > + can_mount = True |
297 |
> > + if os.path.ismount(self.repo.location): |
298 |
> > + # need to umount old snapshot |
299 |
> > + ret = portage.process.spawn(['umount', '-l', |
300 |
> > self.repo.location]) |
301 |
> > + if ret != os.EX_OK: |
302 |
> > + logging.warning('Unable to unmount |
303 |
> > old SquashFS after update') |
304 |
> > + can_mount = False |
305 |
> > + else: |
306 |
> > + try: |
307 |
> > + os.makedirs(self.repo.location) |
308 |
> > + except OSError as e: |
309 |
> > + if e.errno != errno.EEXIST: |
310 |
> > + raise |
311 |
> > + |
312 |
> > + if can_mount: |
313 |
> > + ret = portage.process.spawn(mount_cmd) |
314 |
> > + if ret != os.EX_OK: |
315 |
> > + logging.warning('Unable to |
316 |
> > (re-)mount SquashFS after update') + |
317 |
> > + return (0, True) |
318 |
> |
319 |
> Overall the code itself looks decent. Aside from the small mistake |
320 |
> mentioned inline, my only concern is the sheer size of the sync(). It |
321 |
> is 162 lines and embeds 2 private functions. This code could easily be |
322 |
> broken up into several smaller task functions. It would make reading |
323 |
> the main sync() logic easier as well as the smaller task sections. I |
324 |
> am not a fan of the long winded functions and scripts present in |
325 |
> portage (this by no means is in the same category as many of those). |
326 |
> But I certainly don't want to let more of that in if I can help it. And |
327 |
> aim to reduce it while I'm the lead. |
328 |
|
329 |
Will try. |
330 |
|
331 |
> Ok, so the only data variable you wanted to add to the repos.conf was |
332 |
> the cache location? |
333 |
|
334 |
Yes, right now just that, I think. Maybe in the future we'd use |
335 |
per-repo OpenPGP verification switch. Something like: |
336 |
|
337 |
openpgp-verify = [no|yes|required] |
338 |
|
339 |
With 'yes' meaning 'verify if signed', and 'required' meaning 'refuse |
340 |
if not signed'. |
341 |
|
342 |
> I'll work on adding the gkeys integration in the gkeys branch I started |
343 |
> for the gpg verification. I see no point in porting the code from |
344 |
> emerge-webrsync's bash to python only to be replaced by gkeys in the |
345 |
> very near future. Please stub out a function & call for it when you |
346 |
> address the above issues. I'll fill in the code for it. |
347 |
|
348 |
Ok. |
349 |
|
350 |
-- |
351 |
Best regards, |
352 |
Michał Górny |