1 |
Change cache modules to write md5 in cache entries, instead of mtime. |
2 |
Since portage-2.2.27, the relevant cache modules have had the ability |
3 |
to read cache entries containing either md5 or mtime, therefore this |
4 |
change is backward-compatible with portage-2.2.27 and later. |
5 |
|
6 |
Also fix the reconstruct_eclasses function to raise CacheCorruption |
7 |
when the specified chf_type is md5 and the cache entry contains mtime |
8 |
data, and optimize __getitem__ to skip reconstruct_eclasses calls when |
9 |
the entry appears to have a different chf_type. |
10 |
|
11 |
X-Gentoo-Bug: 568934 |
12 |
X-Gentoo-Bug-url: https://bugs.gentoo.org/show_bug.cgi?id=568934 |
13 |
--- |
14 |
[PATCH v4] adds some comments to clarify the purposes of the __getitem__ |
15 |
optimization and _md5_deserializer stuff |
16 |
|
17 |
pym/portage/cache/anydbm.py | 4 ++-- |
18 |
pym/portage/cache/flat_hash.py | 4 ++-- |
19 |
pym/portage/cache/sqlite.py | 4 ++-- |
20 |
pym/portage/cache/template.py | 36 ++++++++++++++++++++++++++++++++---- |
21 |
4 files changed, 38 insertions(+), 10 deletions(-) |
22 |
|
23 |
diff --git a/pym/portage/cache/anydbm.py b/pym/portage/cache/anydbm.py |
24 |
index 80d24e5..88d85b0 100644 |
25 |
--- a/pym/portage/cache/anydbm.py |
26 |
+++ b/pym/portage/cache/anydbm.py |
27 |
@@ -36,8 +36,8 @@ from portage.cache import cache_errors |
28 |
|
29 |
class database(fs_template.FsBased): |
30 |
|
31 |
- validation_chf = 'mtime' |
32 |
- chf_types = ('mtime', 'md5') |
33 |
+ validation_chf = 'md5' |
34 |
+ chf_types = ('md5', 'mtime') |
35 |
|
36 |
autocommits = True |
37 |
cleanse_keys = True |
38 |
diff --git a/pym/portage/cache/flat_hash.py b/pym/portage/cache/flat_hash.py |
39 |
index cca0f10..3a899c0 100644 |
40 |
--- a/pym/portage/cache/flat_hash.py |
41 |
+++ b/pym/portage/cache/flat_hash.py |
42 |
@@ -163,5 +163,5 @@ class md5_database(database): |
43 |
|
44 |
|
45 |
class mtime_md5_database(database): |
46 |
- validation_chf = 'mtime' |
47 |
- chf_types = ('mtime', 'md5') |
48 |
+ validation_chf = 'md5' |
49 |
+ chf_types = ('md5', 'mtime') |
50 |
diff --git a/pym/portage/cache/sqlite.py b/pym/portage/cache/sqlite.py |
51 |
index 32e4076..69150f6 100644 |
52 |
--- a/pym/portage/cache/sqlite.py |
53 |
+++ b/pym/portage/cache/sqlite.py |
54 |
@@ -18,8 +18,8 @@ if sys.hexversion >= 0x3000000: |
55 |
|
56 |
class database(fs_template.FsBased): |
57 |
|
58 |
- validation_chf = 'mtime' |
59 |
- chf_types = ('mtime', 'md5') |
60 |
+ validation_chf = 'md5' |
61 |
+ chf_types = ('md5', 'mtime') |
62 |
|
63 |
autocommits = False |
64 |
synchronous = False |
65 |
diff --git a/pym/portage/cache/template.py b/pym/portage/cache/template.py |
66 |
index a7c6de0..8662d85 100644 |
67 |
--- a/pym/portage/cache/template.py |
68 |
+++ b/pym/portage/cache/template.py |
69 |
@@ -54,6 +54,15 @@ class database(object): |
70 |
|
71 |
if self.serialize_eclasses and "_eclasses_" in d: |
72 |
for chf_type in chf_types: |
73 |
+ if '_%s_' % chf_type not in d: |
74 |
+ # Skip the reconstruct_eclasses call, since it's |
75 |
+ # a waste of time if it contains a different chf_type |
76 |
+ # than the current one. In the past, it was possible |
77 |
+ # for reconstruct_eclasses called with chf_type='md5' |
78 |
+ # to "successfully" return invalid data here, because |
79 |
+ # it was unable to distinguish between md5 data and |
80 |
+ # mtime data. |
81 |
+ continue |
82 |
try: |
83 |
d["_eclasses_"] = reconstruct_eclasses(cpv, d["_eclasses_"], |
84 |
chf_type, paths=self.store_eclass_paths) |
85 |
@@ -62,6 +71,9 @@ class database(object): |
86 |
raise |
87 |
else: |
88 |
break |
89 |
+ else: |
90 |
+ raise cache_errors.CacheCorruption(cpv, |
91 |
+ 'entry does not contain a recognized chf_type') |
92 |
|
93 |
elif "_eclasses_" not in d: |
94 |
d["_eclasses_"] = {} |
95 |
@@ -310,6 +322,23 @@ def serialize_eclasses(eclass_dict, chf_type='mtime', paths=True): |
96 |
for k, v in sorted(eclass_dict.items(), key=_keysorter)) |
97 |
|
98 |
|
99 |
+def _md5_deserializer(md5): |
100 |
+ """ |
101 |
+ Without this validation, it's possible for reconstruct_eclasses to |
102 |
+ mistakenly interpret mtime data as md5 data, and return an invalid |
103 |
+ data structure containing strings where ints are expected. |
104 |
+ """ |
105 |
+ if len(md5) != 32: |
106 |
+ raise ValueError('expected 32 hex digits') |
107 |
+ return md5 |
108 |
+ |
109 |
+ |
110 |
+_chf_deserializers = { |
111 |
+ 'md5': _md5_deserializer, |
112 |
+ 'mtime': long, |
113 |
+} |
114 |
+ |
115 |
+ |
116 |
def reconstruct_eclasses(cpv, eclass_string, chf_type='mtime', paths=True): |
117 |
"""returns a dict when handed a string generated by serialize_eclasses""" |
118 |
eclasses = eclass_string.rstrip().lstrip().split("\t") |
119 |
@@ -317,9 +346,7 @@ def reconstruct_eclasses(cpv, eclass_string, chf_type='mtime', paths=True): |
120 |
# occasionally this occurs in the fs backends. they suck. |
121 |
return {} |
122 |
|
123 |
- converter = _unicode |
124 |
- if chf_type == 'mtime': |
125 |
- converter = long |
126 |
+ converter = _chf_deserializers.get(chf_type, lambda x: x) |
127 |
|
128 |
if paths: |
129 |
if len(eclasses) % 3 != 0: |
130 |
@@ -340,6 +367,7 @@ def reconstruct_eclasses(cpv, eclass_string, chf_type='mtime', paths=True): |
131 |
raise cache_errors.CacheCorruption(cpv, |
132 |
"_eclasses_ was of invalid len %i" % len(eclasses)) |
133 |
except ValueError: |
134 |
- raise cache_errors.CacheCorruption(cpv, "_eclasses_ mtime conversion to long failed") |
135 |
+ raise cache_errors.CacheCorruption(cpv, |
136 |
+ "_eclasses_ not valid for chf_type {}".format(chf_type)) |
137 |
del eclasses |
138 |
return d |
139 |
-- |
140 |
2.7.4 |