Gentoo Archives: gentoo-portage-dev

From: Zac Medico <zmedico@g.o>
To: gentoo-portage-dev@l.g.o
Cc: Zac Medico <zmedico@g.o>
Subject: [gentoo-portage-dev] [PATCH v3] portage.cache: write md5 instead of mtime (bug 568934)
Date: Sun, 10 Jul 2016 20:18:22
Message-Id: 1468181882-24038-1-git-send-email-zmedico@gentoo.org
In Reply to: [gentoo-portage-dev] [PATCH] portage.cache: write md5 instead of mtime (bug 568934) by Zac Medico
1 Change cache modules to write md5 in cache entries, instead of mtime.
2 Since portage-2.2.27, the relevant cache modules have had the ability
3 to read cache entries containing either md5 or mtime, therefore this
4 change is backward-compatible with portage-2.2.27 and later.
5
6 Also fix the reconstruct_eclasses function to raise CacheCorruption
7 when the specified chf_type is md5 and the cache entry contains mtime
8 data, and optimize __getitem__ to skip reconstruct_eclasses calls when
9 the entry appears to have a different chf_type.
10
11 X-Gentoo-Bug: 568934
12 X-Gentoo-Bug-url: https://bugs.gentoo.org/show_bug.cgi?id=568934
13 ---
14 [PATCH v3] fixes the __getitem__ optimization to ensure that
15 CacheCorruption is raised if a cache entry does not contain a
16 recognized chf_type
17
18 pym/portage/cache/anydbm.py | 4 ++--
19 pym/portage/cache/flat_hash.py | 4 ++--
20 pym/portage/cache/sqlite.py | 4 ++--
21 pym/portage/cache/template.py | 26 ++++++++++++++++++++++----
22 4 files changed, 28 insertions(+), 10 deletions(-)
23
24 diff --git a/pym/portage/cache/anydbm.py b/pym/portage/cache/anydbm.py
25 index 80d24e5..88d85b0 100644
26 --- a/pym/portage/cache/anydbm.py
27 +++ b/pym/portage/cache/anydbm.py
28 @@ -36,8 +36,8 @@ from portage.cache import cache_errors
29
30 class database(fs_template.FsBased):
31
32 - validation_chf = 'mtime'
33 - chf_types = ('mtime', 'md5')
34 + validation_chf = 'md5'
35 + chf_types = ('md5', 'mtime')
36
37 autocommits = True
38 cleanse_keys = True
39 diff --git a/pym/portage/cache/flat_hash.py b/pym/portage/cache/flat_hash.py
40 index cca0f10..3a899c0 100644
41 --- a/pym/portage/cache/flat_hash.py
42 +++ b/pym/portage/cache/flat_hash.py
43 @@ -163,5 +163,5 @@ class md5_database(database):
44
45
46 class mtime_md5_database(database):
47 - validation_chf = 'mtime'
48 - chf_types = ('mtime', 'md5')
49 + validation_chf = 'md5'
50 + chf_types = ('md5', 'mtime')
51 diff --git a/pym/portage/cache/sqlite.py b/pym/portage/cache/sqlite.py
52 index 32e4076..69150f6 100644
53 --- a/pym/portage/cache/sqlite.py
54 +++ b/pym/portage/cache/sqlite.py
55 @@ -18,8 +18,8 @@ if sys.hexversion >= 0x3000000:
56
57 class database(fs_template.FsBased):
58
59 - validation_chf = 'mtime'
60 - chf_types = ('mtime', 'md5')
61 + validation_chf = 'md5'
62 + chf_types = ('md5', 'mtime')
63
64 autocommits = False
65 synchronous = False
66 diff --git a/pym/portage/cache/template.py b/pym/portage/cache/template.py
67 index a7c6de0..d292eed 100644
68 --- a/pym/portage/cache/template.py
69 +++ b/pym/portage/cache/template.py
70 @@ -54,6 +54,10 @@ class database(object):
71
72 if self.serialize_eclasses and "_eclasses_" in d:
73 for chf_type in chf_types:
74 + if '_%s_' % chf_type not in d:
75 + # Skip the reconstruct_eclasses call, since this
76 + # entry appears to have a different chf_type.
77 + continue
78 try:
79 d["_eclasses_"] = reconstruct_eclasses(cpv, d["_eclasses_"],
80 chf_type, paths=self.store_eclass_paths)
81 @@ -62,6 +66,9 @@ class database(object):
82 raise
83 else:
84 break
85 + else:
86 + raise cache_errors.CacheCorruption(cpv,
87 + 'entry does not contain a recognized chf_type')
88
89 elif "_eclasses_" not in d:
90 d["_eclasses_"] = {}
91 @@ -310,6 +317,18 @@ def serialize_eclasses(eclass_dict, chf_type='mtime', paths=True):
92 for k, v in sorted(eclass_dict.items(), key=_keysorter))
93
94
95 +def _md5_deserializer(md5):
96 + if len(md5) != 32:
97 + raise ValueError('expected 32 hex digits')
98 + return md5
99 +
100 +
101 +_chf_deserializers = {
102 + 'md5': _md5_deserializer,
103 + 'mtime': long,
104 +}
105 +
106 +
107 def reconstruct_eclasses(cpv, eclass_string, chf_type='mtime', paths=True):
108 """returns a dict when handed a string generated by serialize_eclasses"""
109 eclasses = eclass_string.rstrip().lstrip().split("\t")
110 @@ -317,9 +336,7 @@ def reconstruct_eclasses(cpv, eclass_string, chf_type='mtime', paths=True):
111 # occasionally this occurs in the fs backends. they suck.
112 return {}
113
114 - converter = _unicode
115 - if chf_type == 'mtime':
116 - converter = long
117 + converter = _chf_deserializers.get(chf_type, lambda x: x)
118
119 if paths:
120 if len(eclasses) % 3 != 0:
121 @@ -340,6 +357,7 @@ def reconstruct_eclasses(cpv, eclass_string, chf_type='mtime', paths=True):
122 raise cache_errors.CacheCorruption(cpv,
123 "_eclasses_ was of invalid len %i" % len(eclasses))
124 except ValueError:
125 - raise cache_errors.CacheCorruption(cpv, "_eclasses_ mtime conversion to long failed")
126 + raise cache_errors.CacheCorruption(cpv,
127 + "_eclasses_ not valid for chf_type {}".format(chf_type))
128 del eclasses
129 return d
130 --
131 2.7.4

Replies