1 |
commit: 9abbda7d054761ae6c333d3e6d420632b9658b6d |
2 |
Author: Zac Medico <zmedico <AT> gentoo <DOT> org> |
3 |
AuthorDate: Sun Jul 10 06:11:41 2016 +0000 |
4 |
Commit: Zac Medico <zmedico <AT> gentoo <DOT> org> |
5 |
CommitDate: Wed Jul 13 11:29:34 2016 +0000 |
6 |
URL: https://gitweb.gentoo.org/proj/portage.git/commit/?id=9abbda7d |
7 |
|
8 |
portage.cache: write md5 instead of mtime (bug 568934) |
9 |
|
10 |
Change cache modules to write md5 in cache entries, instead of mtime. |
11 |
Since portage-2.2.27, the relevant cache modules have had the ability |
12 |
to read cache entries containing either md5 or mtime, therefore this |
13 |
change is backward-compatible with portage-2.2.27 and later. |
14 |
|
15 |
Also fix the reconstruct_eclasses function to raise CacheCorruption |
16 |
when the specified chf_type is md5 and the cache entry contains mtime |
17 |
data, and optimize __getitem__ to skip reconstruct_eclasses calls when |
18 |
the entry appears to have a different chf_type. |
19 |
|
20 |
X-Gentoo-Bug: 568934 |
21 |
X-Gentoo-Bug-url: https://bugs.gentoo.org/show_bug.cgi?id=568934 |
22 |
Acked-by: Alexander Berntsen <bernalex <AT> gentoo.org> |
23 |
|
24 |
pym/portage/cache/anydbm.py | 4 ++-- |
25 |
pym/portage/cache/flat_hash.py | 4 ++-- |
26 |
pym/portage/cache/sqlite.py | 4 ++-- |
27 |
pym/portage/cache/template.py | 36 ++++++++++++++++++++++++++++++++---- |
28 |
4 files changed, 38 insertions(+), 10 deletions(-) |
29 |
|
30 |
diff --git a/pym/portage/cache/anydbm.py b/pym/portage/cache/anydbm.py |
31 |
index 80d24e5..88d85b0 100644 |
32 |
--- a/pym/portage/cache/anydbm.py |
33 |
+++ b/pym/portage/cache/anydbm.py |
34 |
@@ -36,8 +36,8 @@ from portage.cache import cache_errors |
35 |
|
36 |
class database(fs_template.FsBased): |
37 |
|
38 |
- validation_chf = 'mtime' |
39 |
- chf_types = ('mtime', 'md5') |
40 |
+ validation_chf = 'md5' |
41 |
+ chf_types = ('md5', 'mtime') |
42 |
|
43 |
autocommits = True |
44 |
cleanse_keys = True |
45 |
|
46 |
diff --git a/pym/portage/cache/flat_hash.py b/pym/portage/cache/flat_hash.py |
47 |
index cca0f10..3a899c0 100644 |
48 |
--- a/pym/portage/cache/flat_hash.py |
49 |
+++ b/pym/portage/cache/flat_hash.py |
50 |
@@ -163,5 +163,5 @@ class md5_database(database): |
51 |
|
52 |
|
53 |
class mtime_md5_database(database): |
54 |
- validation_chf = 'mtime' |
55 |
- chf_types = ('mtime', 'md5') |
56 |
+ validation_chf = 'md5' |
57 |
+ chf_types = ('md5', 'mtime') |
58 |
|
59 |
diff --git a/pym/portage/cache/sqlite.py b/pym/portage/cache/sqlite.py |
60 |
index 32e4076..69150f6 100644 |
61 |
--- a/pym/portage/cache/sqlite.py |
62 |
+++ b/pym/portage/cache/sqlite.py |
63 |
@@ -18,8 +18,8 @@ if sys.hexversion >= 0x3000000: |
64 |
|
65 |
class database(fs_template.FsBased): |
66 |
|
67 |
- validation_chf = 'mtime' |
68 |
- chf_types = ('mtime', 'md5') |
69 |
+ validation_chf = 'md5' |
70 |
+ chf_types = ('md5', 'mtime') |
71 |
|
72 |
autocommits = False |
73 |
synchronous = False |
74 |
|
75 |
diff --git a/pym/portage/cache/template.py b/pym/portage/cache/template.py |
76 |
index a7c6de0..8662d85 100644 |
77 |
--- a/pym/portage/cache/template.py |
78 |
+++ b/pym/portage/cache/template.py |
79 |
@@ -54,6 +54,15 @@ class database(object): |
80 |
|
81 |
if self.serialize_eclasses and "_eclasses_" in d: |
82 |
for chf_type in chf_types: |
83 |
+ if '_%s_' % chf_type not in d: |
84 |
+ # Skip the reconstruct_eclasses call, since it's |
85 |
+ # a waste of time if it contains a different chf_type |
86 |
+ # than the current one. In the past, it was possible |
87 |
+ # for reconstruct_eclasses called with chf_type='md5' |
88 |
+ # to "successfully" return invalid data here, because |
89 |
+ # it was unable to distinguish between md5 data and |
90 |
+ # mtime data. |
91 |
+ continue |
92 |
try: |
93 |
d["_eclasses_"] = reconstruct_eclasses(cpv, d["_eclasses_"], |
94 |
chf_type, paths=self.store_eclass_paths) |
95 |
@@ -62,6 +71,9 @@ class database(object): |
96 |
raise |
97 |
else: |
98 |
break |
99 |
+ else: |
100 |
+ raise cache_errors.CacheCorruption(cpv, |
101 |
+ 'entry does not contain a recognized chf_type') |
102 |
|
103 |
elif "_eclasses_" not in d: |
104 |
d["_eclasses_"] = {} |
105 |
@@ -310,6 +322,23 @@ def serialize_eclasses(eclass_dict, chf_type='mtime', paths=True): |
106 |
for k, v in sorted(eclass_dict.items(), key=_keysorter)) |
107 |
|
108 |
|
109 |
+def _md5_deserializer(md5): |
110 |
+ """ |
111 |
+ Without this validation, it's possible for reconstruct_eclasses to |
112 |
+ mistakenly interpret mtime data as md5 data, and return an invalid |
113 |
+ data structure containing strings where ints are expected. |
114 |
+ """ |
115 |
+ if len(md5) != 32: |
116 |
+ raise ValueError('expected 32 hex digits') |
117 |
+ return md5 |
118 |
+ |
119 |
+ |
120 |
+_chf_deserializers = { |
121 |
+ 'md5': _md5_deserializer, |
122 |
+ 'mtime': long, |
123 |
+} |
124 |
+ |
125 |
+ |
126 |
def reconstruct_eclasses(cpv, eclass_string, chf_type='mtime', paths=True): |
127 |
"""returns a dict when handed a string generated by serialize_eclasses""" |
128 |
eclasses = eclass_string.rstrip().lstrip().split("\t") |
129 |
@@ -317,9 +346,7 @@ def reconstruct_eclasses(cpv, eclass_string, chf_type='mtime', paths=True): |
130 |
# occasionally this occurs in the fs backends. they suck. |
131 |
return {} |
132 |
|
133 |
- converter = _unicode |
134 |
- if chf_type == 'mtime': |
135 |
- converter = long |
136 |
+ converter = _chf_deserializers.get(chf_type, lambda x: x) |
137 |
|
138 |
if paths: |
139 |
if len(eclasses) % 3 != 0: |
140 |
@@ -340,6 +367,7 @@ def reconstruct_eclasses(cpv, eclass_string, chf_type='mtime', paths=True): |
141 |
raise cache_errors.CacheCorruption(cpv, |
142 |
"_eclasses_ was of invalid len %i" % len(eclasses)) |
143 |
except ValueError: |
144 |
- raise cache_errors.CacheCorruption(cpv, "_eclasses_ mtime conversion to long failed") |
145 |
+ raise cache_errors.CacheCorruption(cpv, |
146 |
+ "_eclasses_ not valid for chf_type {}".format(chf_type)) |
147 |
del eclasses |
148 |
return d |