Gentoo Archives: gentoo-portage-dev

From: "Michał Górny" <mgorny@g.o>
To: gentoo-portage-dev@l.g.o
Cc: Hidehiko Abe <hidehiko@××××××××.org>
Subject: Re: [gentoo-portage-dev] [PATCH] Rewrite doins in python (bug 624526)
Date: Wed, 23 Aug 2017 14:51:43
Message-Id: 1503499896.2219.1.camel@gentoo.org
In Reply to: [gentoo-portage-dev] [PATCH] Rewrite doins in python (bug 624526) by Zac Medico
1 W dniu pon, 14.08.2017 o godzinie 00∶39 -0700, użytkownik Zac Medico
2 napisał:
3 > From: Hidehiko Abe <hidehiko@××××××××.org>
4 >
5 > doins is written in bash. However, specifically in case that
6 > too many files are installed, it is very slow.
7 > This CL rewrites the script in python for performance.
8 >
9 > BUG=chromium:712659
10 > TEST=time (./setup_board --forace && \
11 > ./build_package --withdev && \
12 > ./build_image --noenable_rootfs_verification test)
13 > ===Before===
14 > real 21m35.445s
15 > user 93m40.588s
16 > sys 21m31.224s
17 >
18 > ===After===
19 > real 17m30.106s
20 > user 94m1.812s
21 > sys 20m13.468s
22 >
23 > Change-Id: Ib10f623961ba316753d58397cff5e72fbc343339
24 > Reviewed-on: https://chromium-review.googlesource.com/559225
25 > X-Chromium-Bug: 712659
26 > X-Chromium-Bug-url: https://bugs.chromium.org/p/chromium/issues/detail?id=712659
27 > X-Gentoo-Bug: 624526
28 > X-Gentoo-Bug-url: https://bugs.gentoo.org/624526
29 > ---
30 > bin/doins.py | 341 +++++++++++++++++++++++++++++++++++++++++++++++
31 > bin/ebuild-helpers/doins | 123 ++---------------
32 > 2 files changed, 353 insertions(+), 111 deletions(-)
33 > create mode 100644 bin/doins.py
34 >
35 > diff --git a/bin/doins.py b/bin/doins.py
36 > new file mode 100644
37 > index 000000000..4a17287ca
38 > --- /dev/null
39 > +++ b/bin/doins.py
40 > @@ -0,0 +1,341 @@
41 > +#!/usr/bin/python -b
42 > +# Copyright 2017 The Chromium OS Authors. All rights reserved.
43 > +# Use of this source code is governed by a BSD-style license that can be
44 > +# found in the LICENSE file.
45 > +
46 > +from __future__ import print_function
47 > +
48 > +import argparse
49 > +import errno
50 > +import grp
51 > +import logging
52 > +import os
53 > +import pwd
54 > +import re
55 > +import shlex
56 > +import shutil
57 > +import stat
58 > +import subprocess
59 > +import sys
60 > +
61 > +from portage.util import movefile
62 > +
63 > +# Change back to original cwd _after_ all imports (bug #469338).
64 > +# See also bin/ebuild-helpers/doins, which sets the cwd.
65 > +os.chdir(os.environ["__PORTAGE_HELPER_CWD"])
66 > +
67 > +_PORTAGE_ACTUAL_DISTDIR = (
68 > + (os.environb if sys.version_info.major >= 3 else os.environ).get(
69 > + b'PORTAGE_ACTUAL_DISTDIR', b'') + b'/')
70 > +
71 > +
72 > +def _parse_install_options(
73 > + options, inprocess_runner_class, subprocess_runner_class):
74 > + """Parses command line arguments for install command."""
75 > + parser = argparse.ArgumentParser()
76 > + parser.add_argument('-g', '--group', default=-1, type=_parse_group)
77 > + parser.add_argument('-o', '--owner', default=-1, type=_parse_user)
78 > + # "install"'s --mode option is complicated. So here is partially
79 > + # supported.
80 > + parser.add_argument('-m', '--mode', type=_parse_mode)
81 > + split_options = shlex.split(options)
82 > + namespace, remaining = parser.parse_known_args(split_options)
83 > + if remaining:
84 > + print('Unknown install options: %s, %r' % (options, remaining),
85 > + file=sys.stderr)
86 > + if os.environ.get('DOINSSTRICTOPTION', '') == '1':
87 > + sys.exit(1)
88 > + print('Continue with falling back to \'install\' '
89 > + 'command execution, which can be slower.',
90 > + file=sys.stderr)
91 > + return subprocess_runner_class(split_options)
92 > + return inprocess_runner_class(namespace)
93 > +
94 > +
95 > +def _parse_group(group):
96 > + """Parses gid."""
97 > + g = grp.getgrnam(group)
98 > + if g:
99 > + return g.gr_gid
100 > + return int(group)
101 > +
102 > +
103 > +def _parse_user(user):
104 > + """Parses uid."""
105 > + u = pwd.getpwnam(user)
106 > + if u:
107 > + return u.pw_uid
108 > + return int(user)
109 > +
110 > +
111 > +def _parse_mode(mode):
112 > + # "install"'s --mode option is complicated. So here is partially
113 > + # supported.
114 > + # In Python 3, the prefix of octal int must be '0o' rather than '0'.
115 > + # So, set base explicitly in that case.
116 > + return int(mode, 8 if re.search(r'^0[0-7]*$', mode) else 0)
117 > +
118 > +
119 > +def _set_attributes(options, path):
120 > + """Sets attributes the file/dir at given |path|.
121 > +
122 > + Args:
123 > + options: object which has |owner|, |group| and |mode| fields.
124 > + |owner| is int value representing uid. Similary |group|
125 > + represents gid.
126 > + If -1 is set, just unchanged.
127 > + |mode| is the bits of permissions.
128 > + path: File/directory path.
129 > + """
130 > + if options.owner != -1 or options.group != -1:
131 > + os.lchown(path, options.owner, options.group)
132 > + if options.mode is not None:
133 > + os.chmod(path, options.mode)
134 > +
135 > +
136 > +class _InsInProcessInstallRunner(object):
137 > + def __init__(self, parsed_options):
138 > + self._parsed_options = parsed_options
139 > + self._copy_xattr = (
140 > + 'xattr' in os.environ.get('FEATURES', '').split())
141 > + if self._copy_xattr:
142 > + self._xattr_exclude = os.environ.get(
143 > + 'PORTAGE_XATTR_EXCLUDE',
144 > + 'security.* system.nfs4_acl')
145 > +
146 > + def run(self, source, dest_dir):
147 > + """Installs a file at |source| into |dest_dir| in process."""
148 > + dest = os.path.join(dest_dir, os.path.basename(source))
149 > + try:
150 > + # TODO: Consider to use portage.util.file_copy.copyfile
151 > + # introduced by
152 > + # https://gitweb.gentoo.org/proj/portage.git/commit/
153 > + # ?id=8ab5c8835931fd9ec098dbf4c5f416eb32e4a3a4
154 > + # after uprev.
155 > + shutil.copyfile(source, dest)
156 > + _set_attributes(self._parsed_options, dest)
157 > + if self._copy_xattr:
158 > + movefile._copyxattr(
159 > + source, dest,
160 > + exclude=self._xattr_exclude)
161 > + except Exception:
162 > + logging.exception(
163 > + 'Failed to copy file: '
164 > + '_parsed_options=%r, source=%r, dest_dir=%r',
165 > + self._parsed_options, source, dest_dir)
166 > + return False
167 > + return True
168 > +
169 > +
170 > +class _InsSubprocessInstallRunner(object):
171 > + def __init__(self, split_options):
172 > + self._split_options = split_options
173 > +
174 > + def run(self, source, dest_dir):
175 > + """Installs a file at |source| into |dest_dir| by 'install'."""
176 > + command = ['install'] + self._split_options + [source, dest_dir]
177 > + return subprocess.call(command) == 0
178 > +
179 > +
180 > +class _DirInProcessInstallRunner(object):
181 > + def __init__(self, parsed_options):
182 > + self._parsed_options = parsed_options
183 > +
184 > + def run(self, dest):
185 > + """Installs a dir into |dest| in process."""
186 > + try:
187 > + os.makedirs(dest)
188 > + except OSError as e:
189 > + if e.errno != errno.EEXIST or not os.path.isdir(dest):
190 > + raise
191 > + _set_attributes(self._parsed_options, dest)
192 > +
193 > +
194 > +class _DirSubprocessInstallRunner(object):
195 > + """Runs real 'install' command to install files or directories."""
196 > +
197 > + def __init__(self, split_options):
198 > + self._split_options = split_options
199 > +
200 > + def run(self, dest):
201 > + """Installs a dir into |dest| by 'install' command."""
202 > + command = ['install', '-d'] + self._split_options + [dest]
203 > + subprocess.call(command)
204 > +
205 > +
206 > +class _InstallRunner(object):
207 > + def __init__(self):
208 > + self._ins_runner = _parse_install_options(
209 > + os.environ.get('INSOPTIONS', ''),
210 > + _InsInProcessInstallRunner,
211 > + _InsSubprocessInstallRunner)
212 > + self._dir_runner = _parse_install_options(
213 > + os.environ.get('DIROPTIONS', ''),
214 > + _DirInProcessInstallRunner,
215 > + _DirSubprocessInstallRunner)
216 > +
217 > + def install_file(self, source, dest_dir):
218 > + """Installs a file at |source| into |dest_dir| directory."""
219 > + return self._ins_runner.run(source, dest_dir)
220 > +
221 > + def install_dir(self, dest):
222 > + """Creates a directory at |dest|."""
223 > + self._dir_runner.run(dest)
224 > +
225 > +
226 > +def _doins(args, install_runner, relpath, source_root):
227 > + """Installs a file as if 'install' command runs.
228 > +
229 > + Installs a file at |source_root|/|relpath| into |args.dest|/|relpath|.
230 > + If |args.preserve_symlinks| is set, creates symlink if the source is a
231 > + symlink.
232 > +
233 > + Args:
234 > + args: parsed arguments. It should have following fields.
235 > + - preserve_symlinks: bool representing whether symlinks
236 > + needs to be preserved.
237 > + - dest: Destination root directory.
238 > + - insoptions: options for 'install' command.
239 > + intall_runner: _InstallRunner instance for file install.
240 > + relpath: Relative path of the file being installed.
241 > + source_root: Source root directory.
242 > +
243 > + Returns: True on success.
244 > + """
245 > + source = os.path.join(source_root, relpath)
246 > + dest = os.path.join(args.dest, relpath)
247 > + if os.path.islink(source):
248 > + # Our fake $DISTDIR contains symlinks that should not be
249 > + # reproduced inside $D. In order to ensure that things like
250 > + # dodoc "$DISTDIR"/foo.pdf work as expected, we dereference
251 > + # symlinked files that refer to absolute paths inside
252 > + # $PORTAGE_ACTUAL_DISTDIR/.
253 > + try:
254 > + if (args.preserve_symlinks and
255 > + not os.readlink(source).startswith(
256 > + _PORTAGE_ACTUAL_DISTDIR)):
257 > + linkto = os.readlink(source)
258 > + shutil.rmtree(dest, ignore_errors=True)
259 > + os.symlink(linkto, dest)
260 > + return True
261 > + except Exception:
262 > + logging.exception(
263 > + 'Failed to create symlink: '
264 > + 'args=%r, relpath=%r, source_root=%r',
265 > + args, relpath, source_root)
266 > + return False
267 > +
268 > + return install_runner.install_file(source, os.path.dirname(dest))
269 > +
270 > +
271 > +def _parse_args():
272 > + """Parses the command line arguments."""
273 > + parser = argparse.ArgumentParser()
274 > + parser.add_argument(
275 > + '--recursive', action='store_true',
276 > + help='If set, installs files recursively. Otherwise, '
277 > + 'just skips directories.')
278 > + parser.add_argument(
279 > + '--preserve_symlinks', action='store_true',
280 > + help='If set, a symlink will be installed as symlink.')
281 > + # If helper is dodoc, it changes the behavior for the directory
282 > + # install without --recursive.
283 > + parser.add_argument('--helper', help='Name of helper.')
284 > + parser.add_argument(
285 > + '--dest',
286 > + help='Destination where the files are installed.')
287 > + parser.add_argument(
288 > + 'sources', nargs='*',
289 > + help='Source file/directory paths to be installed.')
290 > +
291 > + args = parser.parse_args()
292 > +
293 > + # Encode back to the original byte stream. Please see
294 > + # http://bugs.python.org/issue8776.
295 > + if sys.version_info.major >= 3:
296 > + args.dest = os.fsencode(args.dest)
297 > + args.sources = [os.fsencode(source) for source in args.sources]
298 > +
299 > + return args
300 > +
301 > +
302 > +def _install_dir(args, install_runner, source):
303 > + """Installs directory at |source|.
304 > +
305 > + Returns:
306 > + True on success, False on failure, or None on skipped.
307 > + """
308 > + if not args.recursive:
309 > + if args.helper == 'dodoc':
310 > + print('!!! %s: %s is a directory' % (
311 > + args.helper, source),
312 > + file=sys.stderr)
313 > + return False
314 > + # Neither success nor fail. Return None to indicate skipped.
315 > + return None
316 > +
317 > + # Strip trailing '/'s.
318 > + source = source.rstrip(b'/')
319 > + source_root = os.path.dirname(source)
320 > + dest_dir = os.path.join(args.dest, os.path.basename(source))
321 > + install_runner.install_dir(dest_dir)
322 > +
323 > + relpath_list = []
324 > + for dirpath, dirnames, filenames in os.walk(source):
325 > + for dirname in dirnames:
326 > + relpath = os.path.relpath(
327 > + os.path.join(dirpath, dirname),
328 > + source_root)
329 > + dest = os.path.join(args.dest, relpath)
330 > + install_runner.install_dir(dest)
331 > + relpath_list.extend(
332 > + os.path.relpath(
333 > + os.path.join(dirpath, filename), source_root)
334 > + for filename in filenames)
335 > +
336 > + if not relpath_list:
337 > + # NOTE: Even if only an empty directory is installed here, it
338 > + # still counts as success, since an empty directory given as
339 > + # an argument to doins -r should not trigger failure.
340 > + return True
341 > + success = True
342 > + for relpath in relpath_list:
343 > + if not _doins(args, install_runner, relpath, source_root):
344 > + success = False
345 > + return success
346 > +
347 > +
348 > +def main():
349 > + args = _parse_args()
350 > + install_runner = _InstallRunner()
351 > +
352 > + if not os.path.isdir(args.dest):
353 > + install_runner.install_dir(args.dest)
354 > +
355 > + any_success = False
356 > + any_failure = False
357 > + for source in args.sources:
358 > + if (os.path.isdir(source) and
359 > + (not args.preserve_symlinks or
360 > + not os.path.islink(source))):
361 > + ret = _install_dir(args, install_runner, source)
362 > + if ret is None:
363 > + continue
364 > + if ret:
365 > + any_success = True
366 > + else:
367 > + any_failure = True
368 > + else:
369 > + if _doins(
370 > + args, install_runner,
371 > + os.path.basename(source),
372 > + os.path.dirname(source)):
373 > + any_success = True
374 > + else:
375 > + any_failure = True
376 > +
377 > + return 0 if not any_failure and any_success else 1
378 > +
379 > +
380 > +if __name__ == '__main__':
381 > + sys.exit(main())
382 > diff --git a/bin/ebuild-helpers/doins b/bin/ebuild-helpers/doins
383 > index 93052c2ea..de559780c 100755
384 > --- a/bin/ebuild-helpers/doins
385 > +++ b/bin/ebuild-helpers/doins
386 > @@ -24,10 +24,10 @@ if [ $# -lt 1 ] ; then
387 > fi
388 >
389 > if [[ "$1" == "-r" ]] ; then
390 > - DOINSRECUR=y
391 > + RECURSIVE_OPTION='--recursive'
392 > shift
393 > else
394 > - DOINSRECUR=n
395 > + RECURSIVE_OPTION=''
396 > fi
397 >
398 > if ! ___eapi_has_prefix_variables; then
399 > @@ -44,116 +44,17 @@ if [[ ${INSDESTTREE#${ED}} != "${INSDESTTREE}" ]]; then
400 > fi
401 >
402 > if ___eapi_doins_and_newins_preserve_symlinks; then
403 > - PRESERVE_SYMLINKS=y
404 > + SYMLINK_OPTION='--preserve_symlinks'
405 > else
406 > - PRESERVE_SYMLINKS=n
407 > + SYMLINK_OPTION=''
408 > fi
409 >
410 > -export TMP=$(mktemp -d "${T}/.doins_tmp_XXXXXX")
411 > -# Use separate directories to avoid potential name collisions.
412 > -mkdir -p "$TMP"/{1,2}
413 > +# Use safe cwd, avoiding unsafe import for bug #469338.
414 > +export __PORTAGE_HELPER_CWD=${PWD}
415 > +cd "${PORTAGE_PYM_PATH:-/usr/lib/portage/pym}"
416 >
417 > -[[ ! -d ${ED}${INSDESTTREE} ]] && dodir "${INSDESTTREE}"
418 > -
419 > -_doins() {
420 > - local mysrc="$1" mydir="$2" cleanup="" rval
421 > -
422 > - if [ -L "$mysrc" ] ; then
423 > - # Our fake $DISTDIR contains symlinks that should
424 > - # not be reproduced inside $D. In order to ensure
425 > - # that things like dodoc "$DISTDIR"/foo.pdf work
426 > - # as expected, we dereference symlinked files that
427 > - # refer to absolute paths inside
428 > - # $PORTAGE_ACTUAL_DISTDIR/.
429 > - if [ $PRESERVE_SYMLINKS = y ] && \
430 > - ! [[ $(readlink "$mysrc") == "$PORTAGE_ACTUAL_DISTDIR"/* ]] ; then
431 > - rm -rf "${ED}$INSDESTTREE/$mydir/${mysrc##*/}" || return $?
432 > - cp -P "$mysrc" "${ED}$INSDESTTREE/$mydir/${mysrc##*/}"
433 > - return $?
434 > - else
435 > - cp "$mysrc" "$TMP/2/${mysrc##*/}" || return $?
436 > - mysrc="$TMP/2/${mysrc##*/}"
437 > - cleanup=$mysrc
438 > - fi
439 > - fi
440 > -
441 > - install ${INSOPTIONS} "${mysrc}" "${ED}${INSDESTTREE}/${mydir}"
442 > - rval=$?
443 > - [[ -n ${cleanup} ]] && rm -f "${cleanup}"
444 > - [ $rval -ne 0 ] && echo "!!! ${helper}: $mysrc does not exist" 1>&2
445 > - return $rval
446 > -}
447 > -
448 > -_xdoins() {
449 > - local -i failed=0
450 > - while read -r -d $'\0' x ; do
451 > - _doins "$x" "${x%/*}"
452 > - ((failed|=$?))
453 > - done
454 > - return $failed
455 > -}
456 > -
457 > -success=0
458 > -failed=0
459 > -
460 > -for x in "$@" ; do
461 > - if [[ $PRESERVE_SYMLINKS = n && -d $x ]] || \
462 > - [[ $PRESERVE_SYMLINKS = y && -d $x && ! -L $x ]] ; then
463 > - if [ "${DOINSRECUR}" == "n" ] ; then
464 > - if [[ ${helper} == dodoc ]] ; then
465 > - echo "!!! ${helper}: $x is a directory" 1>&2
466 > - ((failed|=1))
467 > - fi
468 > - continue
469 > - fi
470 > -
471 > - while [ "$x" != "${x%/}" ] ; do
472 > - x=${x%/}
473 > - done
474 > - if [ "$x" = "${x%/*}" ] ; then
475 > - pushd "$PWD" >/dev/null
476 > - else
477 > - pushd "${x%/*}" >/dev/null
478 > - fi
479 > - x=${x##*/}
480 > - x_orig=$x
481 > - # Follow any symlinks recursively until we've got
482 > - # a normal directory for 'find' to traverse. The
483 > - # name of the symlink will be used for the name
484 > - # of the installed directory, as discussed in
485 > - # bug #239529.
486 > - while [ -L "$x" ] ; do
487 > - pushd "$(readlink "$x")" >/dev/null
488 > - x=${PWD##*/}
489 > - pushd "${PWD%/*}" >/dev/null
490 > - done
491 > - if [[ $x != $x_orig ]] ; then
492 > - mv "$x" "$TMP/1/$x_orig"
493 > - pushd "$TMP/1" >/dev/null
494 > - fi
495 > - find "$x_orig" -type d -exec dodir "${INSDESTTREE}/{}" \;
496 > - find "$x_orig" \( -type f -or -type l \) -print0 | _xdoins
497 > - if [[ ${PIPESTATUS[1]} -eq 0 ]] ; then
498 > - # NOTE: Even if only an empty directory is installed here, it
499 > - # still counts as success, since an empty directory given as
500 > - # an argument to doins -r should not trigger failure.
501 > - ((success|=1))
502 > - else
503 > - ((failed|=1))
504 > - fi
505 > - if [[ $x != $x_orig ]] ; then
506 > - popd >/dev/null
507 > - mv "$TMP/1/$x_orig" "$x"
508 > - fi
509 > - while popd >/dev/null 2>&1 ; do true ; done
510 > - else
511 > - _doins "${x}"
512 > - if [[ $? -eq 0 ]] ; then
513 > - ((success|=1))
514 > - else
515 > - ((failed|=1))
516 > - fi
517 > - fi
518 > -done
519 > -rm -rf "$TMP"
520 > -[[ $failed -ne 0 || $success -eq 0 ]] && { __helpers_die "${helper} failed"; exit 1; } || exit 0
521 > +"${PORTAGE_PYTHON:-/usr/bin/python}" \
522 > + "${PORTAGE_BIN_PATH:-/usr/lib/portage/bin}"/doins.py \
523 > + ${RECURSIVE_OPTION} ${SYMLINK_OPTION} \
524 > + --helper "${helper}" --dest "${ED}${INSDESTTREE}" "$@" || \
525 > +{ __helpers_die "${helper} failed"; exit 1; }
526
527 To be honest, I don't like the idea of using more Python inside ebuild
528 helpers. But if you're sure this is safe and not going to collide with
529 ebuilds doing random stuff with Python, feel free to proceed with it.
530
531 --
532 Best regards,
533 Michał Górny