Gentoo Archives: gentoo-dev

From: Mike Frysinger <vapier@g.o>
To: gentoo-dev@l.g.o
Subject: Re: [gentoo-dev] multiprocessing.eclass: doing parallel work in bash
Date: Sun, 03 Jun 2012 05:09:29
Message-Id: 201206030108.30493.vapier@gentoo.org
In Reply to: [gentoo-dev] multiprocessing.eclass: doing parallel work in bash by Mike Frysinger
1 v3
2 -mike
3
4 # Copyright 1999-2012 Gentoo Foundation
5 # Distributed under the terms of the GNU General Public License v2
6 # $Header: $
7
8 # @ECLASS: multiprocessing.eclass
9 # @MAINTAINER:
10 # base-system@g.o
11 # @AUTHOR:
12 # Brian Harring <ferringb@g.o>
13 # Mike Frysinger <vapier@g.o>
14 # @BLURB: parallelization with bash (wtf?)
15 # @DESCRIPTION:
16 # The multiprocessing eclass contains a suite of functions that allow ebuilds
17 # to quickly run things in parallel using shell code.
18 #
19 # It has two modes: pre-fork and post-fork. If you don't want to dive into any
20 # more nuts & bolts, just use the pre-fork mode. For main threads that mostly
21 # spawn children and then wait for them to finish, use the pre-fork mode. For
22 # main threads that do a bit of processing themselves, use the post-fork mode.
23 # You may mix & match them for longer computation loops.
24 # @EXAMPLE:
25 #
26 # @CODE
27 # # First initialize things:
28 # multijob_init
29 #
30 # # Then hash a bunch of files in parallel:
31 # for n in {0..20} ; do
32 # multijob_child_init md5sum data.${n} > data.${n}
33 # done
34 #
35 # # Then wait for all the children to finish:
36 # multijob_finish
37 # @CODE
38
39 if [[ ${___ECLASS_ONCE_MULTIPROCESSING} != "recur -_+^+_- spank" ]] ; then
40 ___ECLASS_ONCE_MULTIPROCESSING="recur -_+^+_- spank"
41
42 # @FUNCTION: makeopts_jobs
43 # @USAGE: [${MAKEOPTS}]
44 # @DESCRIPTION:
45 # Searches the arguments (defaults to ${MAKEOPTS}) and extracts the jobs number
46 # specified therein. Useful for running non-make tools in parallel too.
47 # i.e. if the user has MAKEOPTS=-j9, this will echo "9" -- we can't return the
48 # number as bash normalizes it to [0, 255]. If the flags haven't specified a
49 # -j flag, then "1" is shown as that is the default `make` uses. Since there's
50 # no way to represent infinity, we return 999 if the user has -j without a number.
51 makeopts_jobs() {
52 [[ $# -eq 0 ]] && set -- ${MAKEOPTS}
53 # This assumes the first .* will be more greedy than the second .*
54 # since POSIX doesn't specify a non-greedy match (i.e. ".*?").
55 local jobs=$(echo " $* " | sed -r -n \
56 -e 's:.*[[:space:]](-j|--jobs[=[:space:]])[[:space:]]*([0-9]+).*:\2:p' \
57 -e 's:.*[[:space:]](-j|--jobs)[[:space:]].*:999:p')
58 echo ${jobs:-1}
59 }
60
61 # @FUNCTION: multijob_init
62 # @USAGE: [${MAKEOPTS}]
63 # @DESCRIPTION:
64 # Setup the environment for executing code in parallel.
65 # You must call this before any other multijob function.
66 multijob_init() {
67 # When something goes wrong, try to wait for all the children so we
68 # don't leave any zombies around.
69 has wait ${EBUILD_DEATH_HOOKS} || EBUILD_DEATH_HOOKS+=" wait"
70
71 # Setup a pipe for children to write their pids to when they finish.
72 local pipe="${T}/multijob.pipe"
73 mkfifo "${pipe}"
74 redirect_alloc_fd mj_control_fd "${pipe}"
75 rm -f "${pipe}"
76
77 # See how many children we can fork based on the user's settings.
78 mj_max_jobs=$(makeopts_jobs "$@")
79 mj_num_jobs=0
80 }
81
82 # @FUNCTION: multijob_child_init
83 # @USAGE: [--pre|--post] [command to run in background]
84 # @DESCRIPTION:
85 # This function has two forms. You can use it to execute a simple command
86 # in the background (and it takes care of everything else), or you must
87 # call this first thing in your forked child process.
88 #
89 # The --pre/--post options allow you to select the child generation mode.
90 #
91 # @CODE
92 # # 1st form: pass the command line as arguments:
93 # multijob_child_init ls /dev
94 # # Or if you want to use pre/post fork modes:
95 # multijob_child_init --pre ls /dev
96 # multijob_child_init --post ls /dev
97 #
98 # # 2nd form: execute multiple stuff in the background (post fork):
99 # (
100 # multijob_child_init
101 # out=`ls`
102 # if echo "${out}" | grep foo ; then
103 # echo "YEAH"
104 # fi
105 # ) &
106 # multijob_post_fork
107 #
108 # # 2nd form: execute multiple stuff in the background (pre fork):
109 # multijob_pre_fork
110 # (
111 # multijob_child_init
112 # out=`ls`
113 # if echo "${out}" | grep foo ; then
114 # echo "YEAH"
115 # fi
116 # ) &
117 # @CODE
118 multijob_child_init() {
119 local mode="pre"
120 case $1 in
121 --pre) mode="pre" ; shift ;;
122 --post) mode="post"; shift ;;
123 esac
124
125 if [[ $# -eq 0 ]] ; then
126 trap 'echo ${BASHPID} $? >&'${mj_control_fd} EXIT
127 trap 'exit 1' INT TERM
128 else
129 local ret
130 [[ ${mode} == "pre" ]] && { multijob_pre_fork; ret=$?; }
131 ( multijob_child_init ; "$@" ) &
132 [[ ${mode} == "post" ]] && { multijob_post_fork; ret=$?; }
133 return ${ret}
134 fi
135 }
136
137 # @FUNCTION: _multijob_fork
138 # @INTERNAL
139 # @DESCRIPTION:
140 # Do the actual book keeping.
141 _multijob_fork() {
142 [[ $# -eq 1 ]] || die "incorrect number of arguments"
143
144 local ret=0
145 [[ $1 == "pre" ]] && : $(( ++mj_num_jobs ))
146 if [[ ${mj_num_jobs} -ge ${mj_max_jobs} ]] ; then
147 multijob_finish_one
148 ret=$?
149 fi
150 [[ $1 == "post" ]] && : $(( ++mj_num_jobs ))
151 return ${ret}
152 }
153
154 # @FUNCTION: multijob_pre_fork
155 # @DESCRIPTION:
156 # You must call this in the parent process before forking a child process.
157 # If the parallel limit has been hit, it will wait for one child to finish
158 # and return its exit status.
159 multijob_pre_fork() { _multijob_fork pre "$@" ; }
160
161 # @FUNCTION: multijob_post_fork
162 # @DESCRIPTION:
163 # You must call this in the parent process after forking a child process.
164 # If the parallel limit has been hit, it will wait for one child to finish
165 # and return its exit status.
166 multijob_post_fork() { _multijob_fork post "$@" ; }
167
168 # @FUNCTION: multijob_finish_one
169 # @DESCRIPTION:
170 # Wait for a single process to exit and return its exit code.
171 multijob_finish_one() {
172 [[ $# -eq 0 ]] || die "${FUNCNAME} takes no arguments"
173
174 local pid ret
175 read -r -u ${mj_control_fd} pid ret || die
176 : $(( --mj_num_jobs ))
177 return ${ret}
178 }
179
180 # @FUNCTION: multijob_finish
181 # @DESCRIPTION:
182 # Wait for all pending processes to exit and return the bitwise or
183 # of all their exit codes.
184 multijob_finish() {
185 local ret=0
186 while [[ ${mj_num_jobs} -gt 0 ]] ; do
187 multijob_finish_one
188 : $(( ret |= $? ))
189 done
190 # Let bash clean up its internal child tracking state.
191 wait
192
193 # Do this after reaping all the children.
194 [[ $# -eq 0 ]] || die "${FUNCNAME} takes no arguments"
195
196 return ${ret}
197 }
198
199 # @FUNCTION: redirect_alloc_fd
200 # @USAGE: <var> <file> [redirection]
201 # @DESCRIPTION:
202 # Find a free fd and redirect the specified file via it. Store the new
203 # fd in the specified variable. Useful for the cases where we don't care
204 # about the exact fd #.
205 redirect_alloc_fd() {
206 local var=$1 file=$2 redir=${3:-"<>"}
207
208 if [[ $(( (BASH_VERSINFO[0] << 8) + BASH_VERSINFO[1] )) -ge $(( (4 << 8) + 1 )) ]] ; then
209 # Newer bash provides this functionality.
210 eval "exec {${var}}${redir}'${file}'"
211 else
212 # Need to provide the functionality ourselves.
213 local fd=10
214 while :; do
215 # Make sure the fd isn't open. It could be a char device,
216 # or a symlink (possibly broken) to something else.
217 if [[ ! -e /dev/fd/${fd} ]] && [[ ! -L /dev/fd/${fd} ]] ; then
218 eval "exec ${fd}${redir}'${file}'" && break
219 fi
220 [[ ${fd} -gt 1024 ]] && return 1 # sanity
221 : $(( ++fd ))
222 done
223 : $(( ${var} = fd ))
224 fi
225 }
226
227 fi

Attachments

File name MIME type
signature.asc application/pgp-signature

Replies