Gentoo Archives: gentoo-portage-dev

From: Zac Medico <zmedico@g.o>
To: gentoo-portage-dev@l.g.o
Cc: Zac Medico <zmedico@g.o>
Subject: [gentoo-portage-dev] [PATCH 1/5 v2] Add egencache --update-pkg-desc-index action.
Date: Tue, 04 Nov 2014 09:04:01
Message-Id: 1415091834-20792-1-git-send-email-zmedico@gentoo.org
In Reply to: [gentoo-portage-dev] [PATCH 1/5] Add egencache --update-pkg-desc-index action. by Zac Medico
1 This adds an egencache --update-pkg-desc-index action which generates
2 a plain-text index of package names, versions, and descriptions. The
3 index can then be used to optimize emerge --search / --searchdesc
4 actions.
5
6 X-Gentoo-Bug: 525718
7 X-Gentoo-Bug-URL: https://bugs.gentoo.org/show_bug.cgi?id=525718
8 ---
9 This updated patch optimizes pkg_desc_index_line_read to skip package name
10 and version validation. This fixes a performance problem reported by
11 Brian Dolbec.
12
13 bin/egencache | 38 ++++++++++++++++++--
14 man/egencache.1 | 4 +++
15 man/portage.5 | 12 +++++++
16 pym/portage/cache/index/__init__.py | 2 ++
17 pym/portage/cache/index/pkg_desc_index.py | 59 +++++++++++++++++++++++++++++++
18 5 files changed, 113 insertions(+), 2 deletions(-)
19 create mode 100644 pym/portage/cache/index/__init__.py
20 create mode 100644 pym/portage/cache/index/pkg_desc_index.py
21
22 diff --git a/bin/egencache b/bin/egencache
23 index e366058..f97432f 100755
24 --- a/bin/egencache
25 +++ b/bin/egencache
26 @@ -48,6 +48,7 @@ portage._internal_caller = True
27 from portage import os, _encodings, _unicode_encode, _unicode_decode
28 from _emerge.MetadataRegen import MetadataRegen
29 from portage.cache.cache_errors import CacheError, StatCollision
30 +from portage.cache.index.pkg_desc_index import pkg_desc_index_line_format
31 from portage.const import TIMESTAMP_FORMAT
32 from portage.manifest import guessManifestFileType
33 from portage.package.ebuild._parallel_manifest.ManifestScheduler import ManifestScheduler
34 @@ -57,7 +58,7 @@ from portage.util._async.run_main_scheduler import run_main_scheduler
35 from portage.util._eventloop.global_event_loop import global_event_loop
36 from portage import cpv_getkey
37 from portage.dep import Atom, isjustname
38 -from portage.versions import pkgsplit, vercmp
39 +from portage.versions import pkgsplit, vercmp, _pkg_str
40
41 try:
42 from xml.etree import ElementTree
43 @@ -91,6 +92,9 @@ def parse_args(args):
44 actions.add_argument("--update-changelogs",
45 action="store_true",
46 help="update the ChangeLog files from SCM logs")
47 + actions.add_argument("--update-pkg-desc-index",
48 + action="store_true",
49 + help="update package description index")
50 actions.add_argument("--update-manifests",
51 action="store_true",
52 help="update manifests")
53 @@ -451,6 +455,29 @@ class GenCache(object):
54 if hasattr(trg_cache, '_prune_empty_dirs'):
55 trg_cache._prune_empty_dirs()
56
57 +class GenPkgDescIndex(object):
58 + def __init__(self, portdb, output_file):
59 + self.returncode = os.EX_OK
60 + self._portdb = portdb
61 + self._output_file = output_file
62 +
63 + def run(self):
64 +
65 + portage.util.ensure_dirs(os.path.dirname(self._output_file))
66 + f = portage.util.atomic_ofstream(self._output_file,
67 + encoding=_encodings["repo.content"])
68 +
69 + portdb = self._portdb
70 + for cp in portdb.cp_all():
71 + pkgs = portdb.cp_list(cp)
72 + if not pkgs:
73 + continue
74 + desc, = portdb.aux_get(pkgs[-1], ["DESCRIPTION"])
75 +
76 + f.write(pkg_desc_index_line_format(cp, pkgs, desc))
77 +
78 + f.close()
79 +
80 class GenUseLocalDesc(object):
81 def __init__(self, portdb, output=None,
82 preserve_comments=False):
83 @@ -893,7 +920,8 @@ def egencache_main(args):
84 local_config=False, env=env)
85
86 if not (options.update or options.update_use_local_desc or
87 - options.update_changelogs or options.update_manifests):
88 + options.update_changelogs or options.update_manifests or
89 + options.update_pkg_desc_index):
90 parser.error('No action specified')
91 return 1
92
93 @@ -1057,6 +1085,12 @@ def egencache_main(args):
94 else:
95 ret.append(scheduler.returncode)
96
97 + if options.update_pkg_desc_index:
98 + gen_index = GenPkgDescIndex(portdb, os.path.join(
99 + repo_config.location, "metadata", "pkg_desc_index"))
100 + gen_index.run()
101 + ret.append(gen_index.returncode)
102 +
103 if options.update_use_local_desc:
104 gen_desc = GenUseLocalDesc(portdb,
105 output=options.uld_output,
106 diff --git a/man/egencache.1 b/man/egencache.1
107 index f71feb3..3a3197f 100644
108 --- a/man/egencache.1
109 +++ b/man/egencache.1
110 @@ -19,6 +19,10 @@ for the details on package atom syntax.
111 .BR "\-\-update\-changelogs"
112 Update the ChangeLog files from SCM logs (supported only in git repos).
113 .TP
114 +.BR "\-\-update\-pkg\-desc\-index"
115 +Update the package description index which is located at
116 +\fImetadata/pkg_desc_index\fR in the repository.
117 +.TP
118 .BR "\-\-update\-use\-local\-desc"
119 Update the \fIprofiles/use.local.desc\fR file from metadata.xml.
120 .TP
121 diff --git a/man/portage.5 b/man/portage.5
122 index 309e259..f2f5243 100644
123 --- a/man/portage.5
124 +++ b/man/portage.5
125 @@ -76,6 +76,7 @@ user\-defined package sets
126 .BR /usr/portage/metadata/
127 .nf
128 layout.conf
129 +pkg_desc_index
130 .fi
131 .TP
132 .BR /usr/portage/profiles/
133 @@ -1138,6 +1139,17 @@ cache\-formats = md5-dict pms
134 profile\-formats = portage-2
135 .fi
136 .RE
137 +.TP
138 +.BR pkg_desc_index
139 +This is an index of package names, versions, and descriptions which
140 +may be generated by \fBegencache\fR(1) in order to optimize
141 +\fBemerge\fR(1) search actions.
142 +
143 +.I Example:
144 +.nf
145 +sys-apps/sed 4.2 4.2.1 4.2.1-r1 4.2.2: Super-useful stream editor
146 +sys-apps/usleep 0.1: A wrapper for usleep
147 +.fi
148 .RE
149 .TP
150 .BR /usr/portage/profiles/
151 diff --git a/pym/portage/cache/index/__init__.py b/pym/portage/cache/index/__init__.py
152 new file mode 100644
153 index 0000000..7cd880e
154 --- /dev/null
155 +++ b/pym/portage/cache/index/__init__.py
156 @@ -0,0 +1,2 @@
157 +# Copyright 2014 Gentoo Foundation
158 +# Distributed under the terms of the GNU General Public License v2
159 diff --git a/pym/portage/cache/index/pkg_desc_index.py b/pym/portage/cache/index/pkg_desc_index.py
160 new file mode 100644
161 index 0000000..ed2cdf7
162 --- /dev/null
163 +++ b/pym/portage/cache/index/pkg_desc_index.py
164 @@ -0,0 +1,59 @@
165 +# Copyright 2014 Gentoo Foundation
166 +# Distributed under the terms of the GNU General Public License v2
167 +
168 +from __future__ import unicode_literals
169 +
170 +import collections
171 +import sys
172 +
173 +from portage.versions import _pkg_str
174 +
175 +if sys.hexversion >= 0x3000000:
176 + _unicode = str
177 +else:
178 + _unicode = unicode
179 +
180 +pkg_desc_index_node = collections.namedtuple("pkg_desc_index_node",
181 + ["cp", "cpv_list", "desc"])
182 +
183 +class pkg_node(_unicode):
184 + """
185 + A minimal package node class. For performance reasons, inputs
186 + are not validated.
187 + """
188 +
189 + def __init__(self, cp, version, repo = None):
190 + self.__dict__['cp'] = cp
191 + self.__dict__['repo'] = repo
192 + self.__dict__['version'] = version
193 +
194 + def __new__(cls, cp, version, repo = None):
195 + return _unicode.__new__(cls, cp + "-" + version)
196 +
197 + def __setattr__(self, name, value):
198 + raise AttributeError("pkg_node instances are immutable",
199 + self.__class__, name, value)
200 +
201 +def pkg_desc_index_line_format(cp, pkgs, desc):
202 + return "%s %s: %s\n" % (cp,
203 + " ".join(_pkg_str(cpv).version
204 + for cpv in pkgs), desc)
205 +
206 +def pkg_desc_index_line_read(line, repo = None):
207 +
208 + try:
209 + pkgs, desc = line.split(":", 1)
210 + except ValueError:
211 + return None
212 + desc = desc.strip()
213 +
214 + try:
215 + cp, pkgs = pkgs.split(" ", 1)
216 + except ValueError:
217 + return None
218 +
219 + cp_list = []
220 + for ver in pkgs.split():
221 + cp_list.append(pkg_node(cp, ver, repo))
222 +
223 + return pkg_desc_index_node(cp, tuple(cp_list), desc)
224 --
225 2.0.4