Gentoo Archives: gentoo-portage-dev

From: Mike Frysinger <vapier@g.o>
To: gentoo-portage-dev@l.g.o
Subject: [gentoo-portage-dev] [RFC/PATCH] prepstrip/ecompressdir: parallelize operations
Date: Fri, 11 May 2012 18:08:45
Message-Id: 1336754399-559-1-git-send-email-vapier@gentoo.org
1 Stealing some ideas from ferringb, add a new API for doing parallel
2 processing in bash, and then deploy this with the stripping and
3 compressing stages.
4
5 For stripping coreutils which has about 100 ELFs, this brings time
6 to strip down from ~7 seconds to ~0.7 seconds on my system.
7
8 Signed-off-by: Mike Frysinger <vapier@g.o>
9 ---
10 note: i'm not terribly happy with the name "helper-functions.sh", so any
11 better suggestions would be good. i didn't want to use "ebuild-helpers.sh"
12 as that messes up tab completion ;).
13
14 bin/ebuild-helpers/ecompressdir | 30 ++++++++++++++++---
15 bin/ebuild-helpers/prepstrip | 20 ++++++++++---
16 bin/helper-functions.sh | 62 +++++++++++++++++++++++++++++++++++++++
17 3 files changed, 104 insertions(+), 8 deletions(-)
18 create mode 100644 bin/helper-functions.sh
19
20 diff --git a/bin/ebuild-helpers/ecompressdir b/bin/ebuild-helpers/ecompressdir
21 index 17ecd80..a2c9e52 100755
22 --- a/bin/ebuild-helpers/ecompressdir
23 +++ b/bin/ebuild-helpers/ecompressdir
24 @@ -2,7 +2,7 @@
25 # Copyright 1999-2011 Gentoo Foundation
26 # Distributed under the terms of the GNU General Public License v2
27
28 -source "${PORTAGE_BIN_PATH:-/usr/lib/portage/bin}"/isolated-functions.sh
29 +source "${PORTAGE_BIN_PATH:-/usr/lib/portage/bin}"/helper-functions.sh
30
31 if [[ -z $1 ]] ; then
32 helpers_die "${0##*/}: at least one argument needed"
33 @@ -116,6 +116,16 @@ ret=0
34
35 rm -rf "${T}"/ecompress-skip
36
37 +decompressors=(
38 + ".Z" "gunzip -f"
39 + ".gz" "gunzip -f"
40 + ".bz2" "bunzip2 -f"
41 + ".xz" "unxz -f"
42 + ".lzma" "unxz -f"
43 +)
44 +
45 +multijob_init
46 +
47 for dir in "$@" ; do
48 dir=${dir#/}
49 dir="${ED}${dir}"
50 @@ -136,14 +146,26 @@ for dir in "$@" ; do
51 find "${dir}" -type f -name '*.ecompress.file' -print0 | ${XARGS} -0 rm -f
52
53 # not uncommon for packages to compress doc files themselves
54 - funk_up_dir "decompress" ".Z" "gunzip -f"
55 - funk_up_dir "decompress" ".gz" "gunzip -f"
56 - funk_up_dir "decompress" ".bz2" "bunzip2 -f"
57 + for (( d = 0; d < ${#decompressors[@]}; d += 2 )) ; do
58 + # It's faster to parallelize at this stage than to try to
59 + # parallelize the compressors. This is because the find|xargs
60 + # ends up launching less compressors overall, so the overhead
61 + # of forking children ends up dominating.
62 + (
63 + multijob_child_init
64 + funk_up_dir "decompress" "${decompressors[i]}" "${decompressors[i+1]}"
65 + ) &
66 + multijob_post_fork
67 + : $(( ret |= $? ))
68 + done
69
70 # forcibly break all hard links as some compressors whine about it
71 find "${dir}" -type f -links +1 -exec env file="{}" sh -c \
72 'cp -p "${file}" "${file}.ecompress.break" ; mv -f "${file}.ecompress.break" "${file}"' \;
73
74 + multijob_finish
75 + : $(( ret |= $? ))
76 +
77 # now lets do our work
78 if [[ -n ${suffix} ]] ; then
79 vecho "${0##*/}: $(ecompress --bin) /${actual_dir#${ED}}"
80 diff --git a/bin/ebuild-helpers/prepstrip b/bin/ebuild-helpers/prepstrip
81 index daaa252..09b0333 100755
82 --- a/bin/ebuild-helpers/prepstrip
83 +++ b/bin/ebuild-helpers/prepstrip
84 @@ -1,8 +1,8 @@
85 #!/bin/bash
86 -# Copyright 1999-2011 Gentoo Foundation
87 +# Copyright 1999-2012 Gentoo Foundation
88 # Distributed under the terms of the GNU General Public License v2
89
90 -source "${PORTAGE_BIN_PATH:-/usr/lib/portage/bin}"/isolated-functions.sh
91 +source "${PORTAGE_BIN_PATH:-/usr/lib/portage/bin}"/helper-functions.sh
92
93 # avoid multiple calls to `has`. this creates things like:
94 # FEATURES_foo=false
95 @@ -62,6 +62,8 @@ prepstrip_sources_dir=${EPREFIX}/usr/src/debug/${CATEGORY}/${PF}
96 type -P debugedit >/dev/null && debugedit_found=true || debugedit_found=false
97 debugedit_warned=false
98
99 +multijob_init
100 +
101 unset ${!INODE_*}
102
103 inode_var_name() {
104 @@ -171,6 +173,8 @@ process_elf() {
105 # We want to log already stripped binaries, as this may be a QA violation.
106 # They prevent us from getting the splitdebug data.
107 if ! ${RESTRICT_binchecks} && ! ${RESTRICT_strip} ; then
108 + (
109 + multijob_child_init
110 log=$T/scanelf-already-stripped.log
111 qa_var="QA_PRESTRIPPED_${ARCH/-/_}"
112 [[ -n ${!qa_var} ]] && QA_PRESTRIPPED="${!qa_var}"
113 @@ -193,6 +197,8 @@ if ! ${RESTRICT_binchecks} && ! ${RESTRICT_strip} ; then
114 else
115 rm -f "$log"
116 fi
117 + ) &
118 + multijob_post_fork
119 fi
120
121 # Now we look for unstripped binaries.
122 @@ -205,8 +211,10 @@ do
123 banner=true
124 fi
125
126 - f=$(file "${x}") || continue
127 - [[ -z ${f} ]] && continue
128 + (
129 + multijob_child_init
130 + f=$(file "${x}") || exit 0
131 + [[ -z ${f} ]] && exit 0
132
133 if ! ${SKIP_STRIP} ; then
134 # The noglob funk is to support STRIP_MASK="/*/booga" and to keep
135 @@ -253,6 +261,8 @@ do
136 if ${was_not_writable} ; then
137 chmod u-w "${x}"
138 fi
139 + ) &
140 + multijob_post_fork
141 done
142
143 if [[ -s ${T}/debug.sources ]] && \
144 @@ -274,3 +284,5 @@ then
145 >> "$emptydir"/.keepdir
146 done < <(find "${D}${prepstrip_sources_dir}/" -type d -empty -print0)
147 fi
148 +
149 +multijob_finish
150 diff --git a/bin/helper-functions.sh b/bin/helper-functions.sh
151 new file mode 100644
152 index 0000000..c69a41a
153 --- /dev/null
154 +++ b/bin/helper-functions.sh
155 @@ -0,0 +1,62 @@
156 +#!/bin/bash
157 +# Copyright 1999-2012 Gentoo Foundation
158 +# Distributed under the terms of the GNU General Public License v2
159 +
160 +# For routines we want to use in ebuild-helpers/ but don't want to
161 +# expose to the general ebuild environment.
162 +
163 +source "${PORTAGE_BIN_PATH:-/usr/lib/portage/bin}"/isolated-functions.sh
164 +
165 +#
166 +# API functions for doing parallel processing
167 +#
168 +numjobs() {
169 + # Copied from eutils.eclass:makeopts_jobs()
170 + local jobs=$(echo " ${MAKEOPTS} " | \
171 + sed -r -n 's:.*[[:space:]](-j|--jobs[=[:space:]])[[:space:]]*([0-9]+).*:\2:p')
172 + echo ${jobs:-1}
173 +}
174 +
175 +multijob_init() {
176 + # Setup a pipe for children to write their pids to when they finish.
177 + mj_control_pipe=$(mktemp -t multijob.XXXXXX)
178 + rm "${mj_control_pipe}"
179 + mkfifo "${mj_control_pipe}"
180 + exec {mj_control_fd}<>${mj_control_pipe}
181 + rm -f "${mj_control_pipe}"
182 +
183 + # See how many children we can fork based on the user's settings.
184 + mj_max_jobs=$(numjobs)
185 + mj_num_jobs=0
186 +}
187 +
188 +multijob_child_init() {
189 + trap 'echo ${BASHPID} $? >&'${mj_control_fd} EXIT
190 + trap 'exit 1' INT TERM
191 +}
192 +
193 +multijob_finish_one() {
194 + local pid ret
195 + read -r -u ${mj_control_fd} pid ret
196 + : $(( --mj_num_jobs ))
197 + return ${ret}
198 +}
199 +
200 +multijob_finish() {
201 + local ret=0
202 + while [[ ${mj_num_jobs} -gt 0 ]] ; do
203 + multijob_finish_one
204 + : $(( ret += $? ))
205 + done
206 + # Let bash clean up its internal child tracking state.
207 + wait
208 + return ${ret}
209 +}
210 +
211 +multijob_post_fork() {
212 + : $(( ++mj_num_jobs ))
213 + if [[ ${mj_num_jobs} -ge ${mj_max_jobs} ]] ; then
214 + multijob_finish_one
215 + fi
216 + return 0
217 +}
218 --
219 1.7.9.7

Replies