Gentoo Archives: gentoo-portage-dev

From: Zac Medico <zmedico@g.o>
To: gentoo-portage-dev@l.g.o
Cc: Zac Medico <zmedico@g.o>
Subject: [gentoo-portage-dev] [PATCH v2] portage.cache: write md5 instead of mtime (bug 568934)
Date: Sun, 10 Jul 2016 19:45:27
Message-Id: 1468179892-23288-1-git-send-email-zmedico@gentoo.org
In Reply to: [gentoo-portage-dev] [PATCH] portage.cache: write md5 instead of mtime (bug 568934) by Zac Medico
1 Change cache modules to write md5 in cache entries, instead of mtime.
2 Since portage-2.2.27, the relevant cache modules have had the ability
3 to read cache entries containing either md5 or mtime, therefore this
4 change is backward-compatible with portage-2.2.27 and later.
5
6 Also fix the reconstruct_eclasses function to raise CacheCorruption
7 when the specified chf_type is md5 and the cache entry contains mtime
8 data, and optimize __getitem__ to skip reconstruct_eclasses calls when
9 the entry appears to have a different chf_type.
10
11 X-Gentoo-Bug: 568934
12 X-Gentoo-Bug-url: https://bugs.gentoo.org/show_bug.cgi?id=568934
13 ---
14 [PATCH v2] adds a __getitem__ optimization to skip reconstruct_eclasses
15 calls when the entry appears to have a different chf_type
16
17 pym/portage/cache/anydbm.py | 4 ++--
18 pym/portage/cache/flat_hash.py | 4 ++--
19 pym/portage/cache/sqlite.py | 4 ++--
20 pym/portage/cache/template.py | 23 +++++++++++++++++++----
21 4 files changed, 25 insertions(+), 10 deletions(-)
22
23 diff --git a/pym/portage/cache/anydbm.py b/pym/portage/cache/anydbm.py
24 index 80d24e5..88d85b0 100644
25 --- a/pym/portage/cache/anydbm.py
26 +++ b/pym/portage/cache/anydbm.py
27 @@ -36,8 +36,8 @@ from portage.cache import cache_errors
28
29 class database(fs_template.FsBased):
30
31 - validation_chf = 'mtime'
32 - chf_types = ('mtime', 'md5')
33 + validation_chf = 'md5'
34 + chf_types = ('md5', 'mtime')
35
36 autocommits = True
37 cleanse_keys = True
38 diff --git a/pym/portage/cache/flat_hash.py b/pym/portage/cache/flat_hash.py
39 index cca0f10..3a899c0 100644
40 --- a/pym/portage/cache/flat_hash.py
41 +++ b/pym/portage/cache/flat_hash.py
42 @@ -163,5 +163,5 @@ class md5_database(database):
43
44
45 class mtime_md5_database(database):
46 - validation_chf = 'mtime'
47 - chf_types = ('mtime', 'md5')
48 + validation_chf = 'md5'
49 + chf_types = ('md5', 'mtime')
50 diff --git a/pym/portage/cache/sqlite.py b/pym/portage/cache/sqlite.py
51 index 32e4076..69150f6 100644
52 --- a/pym/portage/cache/sqlite.py
53 +++ b/pym/portage/cache/sqlite.py
54 @@ -18,8 +18,8 @@ if sys.hexversion >= 0x3000000:
55
56 class database(fs_template.FsBased):
57
58 - validation_chf = 'mtime'
59 - chf_types = ('mtime', 'md5')
60 + validation_chf = 'md5'
61 + chf_types = ('md5', 'mtime')
62
63 autocommits = False
64 synchronous = False
65 diff --git a/pym/portage/cache/template.py b/pym/portage/cache/template.py
66 index a7c6de0..24d8f8f 100644
67 --- a/pym/portage/cache/template.py
68 +++ b/pym/portage/cache/template.py
69 @@ -54,6 +54,10 @@ class database(object):
70
71 if self.serialize_eclasses and "_eclasses_" in d:
72 for chf_type in chf_types:
73 + if '_%s_' % chf_type not in d:
74 + # Skip the reconstruct_eclasses call, since this
75 + # entry appears to have a different chf_type.
76 + continue
77 try:
78 d["_eclasses_"] = reconstruct_eclasses(cpv, d["_eclasses_"],
79 chf_type, paths=self.store_eclass_paths)
80 @@ -310,6 +314,18 @@ def serialize_eclasses(eclass_dict, chf_type='mtime', paths=True):
81 for k, v in sorted(eclass_dict.items(), key=_keysorter))
82
83
84 +def _md5_deserializer(md5):
85 + if len(md5) != 32:
86 + raise ValueError('expected 32 hex digits')
87 + return md5
88 +
89 +
90 +_chf_deserializers = {
91 + 'md5': _md5_deserializer,
92 + 'mtime': long,
93 +}
94 +
95 +
96 def reconstruct_eclasses(cpv, eclass_string, chf_type='mtime', paths=True):
97 """returns a dict when handed a string generated by serialize_eclasses"""
98 eclasses = eclass_string.rstrip().lstrip().split("\t")
99 @@ -317,9 +333,7 @@ def reconstruct_eclasses(cpv, eclass_string, chf_type='mtime', paths=True):
100 # occasionally this occurs in the fs backends. they suck.
101 return {}
102
103 - converter = _unicode
104 - if chf_type == 'mtime':
105 - converter = long
106 + converter = _chf_deserializers.get(chf_type, lambda x: x)
107
108 if paths:
109 if len(eclasses) % 3 != 0:
110 @@ -340,6 +354,7 @@ def reconstruct_eclasses(cpv, eclass_string, chf_type='mtime', paths=True):
111 raise cache_errors.CacheCorruption(cpv,
112 "_eclasses_ was of invalid len %i" % len(eclasses))
113 except ValueError:
114 - raise cache_errors.CacheCorruption(cpv, "_eclasses_ mtime conversion to long failed")
115 + raise cache_errors.CacheCorruption(cpv,
116 + "_eclasses_ not valid for chf_type {}".format(chf_type))
117 del eclasses
118 return d
119 --
120 2.7.4