1 |
On Sat, Jun 27, 2020, at 1:34 AM, Chun-Yu Shei wrote: |
2 |
> According to cProfile, catpkgsplit is called up to 1-5.5 million times |
3 |
> during "emerge -uDvpU --with-bdeps=y @world". Adding a dict to cache its |
4 |
> results reduces the time for this command from 43.53 -> 41.53 seconds -- |
5 |
> a 4.8% speedup. |
6 |
> --- |
7 |
> lib/portage/versions.py | 7 +++++++ |
8 |
> 1 file changed, 7 insertions(+) |
9 |
> |
10 |
> diff --git a/lib/portage/versions.py b/lib/portage/versions.py |
11 |
> index 0c21373cc..ffec316ce 100644 |
12 |
> --- a/lib/portage/versions.py |
13 |
> +++ b/lib/portage/versions.py |
14 |
> @@ -312,6 +312,7 @@ def _pkgsplit(mypkg, eapi=None): |
15 |
> |
16 |
> _cat_re = re.compile('^%s$' % _cat, re.UNICODE) |
17 |
> _missing_cat = 'null' |
18 |
> +_catpkgsplit_cache = {} |
19 |
> |
20 |
> def catpkgsplit(mydata, silent=1, eapi=None): |
21 |
> """ |
22 |
> @@ -331,6 +332,11 @@ def catpkgsplit(mydata, silent=1, eapi=None): |
23 |
> return mydata.cpv_split |
24 |
> except AttributeError: |
25 |
> pass |
26 |
> + |
27 |
> + cache_entry = _catpkgsplit_cache.get(mydata) |
28 |
> + if cache_entry is not None: |
29 |
> + return cache_entry |
30 |
> + |
31 |
> mysplit = mydata.split('/', 1) |
32 |
> p_split = None |
33 |
> if len(mysplit) == 1: |
34 |
> @@ -343,6 +349,7 @@ def catpkgsplit(mydata, silent=1, eapi=None): |
35 |
> if not p_split: |
36 |
> return None |
37 |
> retval = (cat, p_split[0], p_split[1], p_split[2]) |
38 |
> + _catpkgsplit_cache[mydata] = retval |
39 |
> return retval |
40 |
> |
41 |
> class _pkg_str(_unicode): |
42 |
> -- |
43 |
> 2.27.0.212.ge8ba1cc988-goog |
44 |
> |
45 |
|
46 |
There are libraries that provide decorators, etc, for caching and memoization. |
47 |
Have you evaluated any of those? One is available in the standard library: |
48 |
https://docs.python.org/dev/library/functools.html#functools.lru_cache |
49 |
|
50 |
I comment as this would increase code clarity. |