Gentoo Archives: gentoo-commits

From: Zac Medico <zmedico@g.o>
To: gentoo-commits@l.g.o
Subject: [gentoo-commits] proj/portage:master commit in: pym/portage/cache/
Date: Wed, 13 Jul 2016 11:32:16
Message-Id: 1468409374.9abbda7d054761ae6c333d3e6d420632b9658b6d.zmedico@gentoo
1 commit: 9abbda7d054761ae6c333d3e6d420632b9658b6d
2 Author: Zac Medico <zmedico <AT> gentoo <DOT> org>
3 AuthorDate: Sun Jul 10 06:11:41 2016 +0000
4 Commit: Zac Medico <zmedico <AT> gentoo <DOT> org>
5 CommitDate: Wed Jul 13 11:29:34 2016 +0000
6 URL: https://gitweb.gentoo.org/proj/portage.git/commit/?id=9abbda7d
7
8 portage.cache: write md5 instead of mtime (bug 568934)
9
10 Change cache modules to write md5 in cache entries, instead of mtime.
11 Since portage-2.2.27, the relevant cache modules have had the ability
12 to read cache entries containing either md5 or mtime, therefore this
13 change is backward-compatible with portage-2.2.27 and later.
14
15 Also fix the reconstruct_eclasses function to raise CacheCorruption
16 when the specified chf_type is md5 and the cache entry contains mtime
17 data, and optimize __getitem__ to skip reconstruct_eclasses calls when
18 the entry appears to have a different chf_type.
19
20 X-Gentoo-Bug: 568934
21 X-Gentoo-Bug-url: https://bugs.gentoo.org/show_bug.cgi?id=568934
22 Acked-by: Alexander Berntsen <bernalex <AT> gentoo.org>
23
24 pym/portage/cache/anydbm.py | 4 ++--
25 pym/portage/cache/flat_hash.py | 4 ++--
26 pym/portage/cache/sqlite.py | 4 ++--
27 pym/portage/cache/template.py | 36 ++++++++++++++++++++++++++++++++----
28 4 files changed, 38 insertions(+), 10 deletions(-)
29
30 diff --git a/pym/portage/cache/anydbm.py b/pym/portage/cache/anydbm.py
31 index 80d24e5..88d85b0 100644
32 --- a/pym/portage/cache/anydbm.py
33 +++ b/pym/portage/cache/anydbm.py
34 @@ -36,8 +36,8 @@ from portage.cache import cache_errors
35
36 class database(fs_template.FsBased):
37
38 - validation_chf = 'mtime'
39 - chf_types = ('mtime', 'md5')
40 + validation_chf = 'md5'
41 + chf_types = ('md5', 'mtime')
42
43 autocommits = True
44 cleanse_keys = True
45
46 diff --git a/pym/portage/cache/flat_hash.py b/pym/portage/cache/flat_hash.py
47 index cca0f10..3a899c0 100644
48 --- a/pym/portage/cache/flat_hash.py
49 +++ b/pym/portage/cache/flat_hash.py
50 @@ -163,5 +163,5 @@ class md5_database(database):
51
52
53 class mtime_md5_database(database):
54 - validation_chf = 'mtime'
55 - chf_types = ('mtime', 'md5')
56 + validation_chf = 'md5'
57 + chf_types = ('md5', 'mtime')
58
59 diff --git a/pym/portage/cache/sqlite.py b/pym/portage/cache/sqlite.py
60 index 32e4076..69150f6 100644
61 --- a/pym/portage/cache/sqlite.py
62 +++ b/pym/portage/cache/sqlite.py
63 @@ -18,8 +18,8 @@ if sys.hexversion >= 0x3000000:
64
65 class database(fs_template.FsBased):
66
67 - validation_chf = 'mtime'
68 - chf_types = ('mtime', 'md5')
69 + validation_chf = 'md5'
70 + chf_types = ('md5', 'mtime')
71
72 autocommits = False
73 synchronous = False
74
75 diff --git a/pym/portage/cache/template.py b/pym/portage/cache/template.py
76 index a7c6de0..8662d85 100644
77 --- a/pym/portage/cache/template.py
78 +++ b/pym/portage/cache/template.py
79 @@ -54,6 +54,15 @@ class database(object):
80
81 if self.serialize_eclasses and "_eclasses_" in d:
82 for chf_type in chf_types:
83 + if '_%s_' % chf_type not in d:
84 + # Skip the reconstruct_eclasses call, since it's
85 + # a waste of time if it contains a different chf_type
86 + # than the current one. In the past, it was possible
87 + # for reconstruct_eclasses called with chf_type='md5'
88 + # to "successfully" return invalid data here, because
89 + # it was unable to distinguish between md5 data and
90 + # mtime data.
91 + continue
92 try:
93 d["_eclasses_"] = reconstruct_eclasses(cpv, d["_eclasses_"],
94 chf_type, paths=self.store_eclass_paths)
95 @@ -62,6 +71,9 @@ class database(object):
96 raise
97 else:
98 break
99 + else:
100 + raise cache_errors.CacheCorruption(cpv,
101 + 'entry does not contain a recognized chf_type')
102
103 elif "_eclasses_" not in d:
104 d["_eclasses_"] = {}
105 @@ -310,6 +322,23 @@ def serialize_eclasses(eclass_dict, chf_type='mtime', paths=True):
106 for k, v in sorted(eclass_dict.items(), key=_keysorter))
107
108
109 +def _md5_deserializer(md5):
110 + """
111 + Without this validation, it's possible for reconstruct_eclasses to
112 + mistakenly interpret mtime data as md5 data, and return an invalid
113 + data structure containing strings where ints are expected.
114 + """
115 + if len(md5) != 32:
116 + raise ValueError('expected 32 hex digits')
117 + return md5
118 +
119 +
120 +_chf_deserializers = {
121 + 'md5': _md5_deserializer,
122 + 'mtime': long,
123 +}
124 +
125 +
126 def reconstruct_eclasses(cpv, eclass_string, chf_type='mtime', paths=True):
127 """returns a dict when handed a string generated by serialize_eclasses"""
128 eclasses = eclass_string.rstrip().lstrip().split("\t")
129 @@ -317,9 +346,7 @@ def reconstruct_eclasses(cpv, eclass_string, chf_type='mtime', paths=True):
130 # occasionally this occurs in the fs backends. they suck.
131 return {}
132
133 - converter = _unicode
134 - if chf_type == 'mtime':
135 - converter = long
136 + converter = _chf_deserializers.get(chf_type, lambda x: x)
137
138 if paths:
139 if len(eclasses) % 3 != 0:
140 @@ -340,6 +367,7 @@ def reconstruct_eclasses(cpv, eclass_string, chf_type='mtime', paths=True):
141 raise cache_errors.CacheCorruption(cpv,
142 "_eclasses_ was of invalid len %i" % len(eclasses))
143 except ValueError:
144 - raise cache_errors.CacheCorruption(cpv, "_eclasses_ mtime conversion to long failed")
145 + raise cache_errors.CacheCorruption(cpv,
146 + "_eclasses_ not valid for chf_type {}".format(chf_type))
147 del eclasses
148 return d