Gentoo Archives: gentoo-dev

From: Mike Frysinger <vapier@g.o>
To: gentoo-dev@l.g.o
Subject: Re: [gentoo-dev] multiprocessing.eclass: doing parallel work in bash
Date: Sat, 02 Jun 2012 19:55:11
Message-Id: 201206021554.04552.vapier@gentoo.org
In Reply to: [gentoo-dev] multiprocessing.eclass: doing parallel work in bash by Mike Frysinger
1 v2
2 -mike
3
4 # Copyright 1999-2012 Gentoo Foundation
5 # Distributed under the terms of the GNU General Public License v2
6 # $Header: $
7
8 # @ECLASS: multiprocessing.eclass
9 # @MAINTAINER:
10 # base-system@g.o
11 # @AUTHOR:
12 # Brian Harring <ferringb@g.o>
13 # Mike Frysinger <vapier@g.o>
14 # @BLURB: parallelization with bash (wtf?)
15 # @DESCRIPTION:
16 # The multiprocessing eclass contains a suite of functions that allow ebuilds
17 # to quickly run things in parallel using shell code.
18 # @EXAMPLE:
19 #
20 # @CODE
21 # # First initialize things:
22 # multijob_init
23 #
24 # # Then hash a bunch of files in parallel:
25 # for n in {0..20} ; do
26 # multijob_child_init md5sum data.${n} > data.${n}
27 # done
28 #
29 # # Then wait for all the children to finish:
30 # multijob_finish
31 # @CODE
32
33 if [[ ${___ECLASS_ONCE_MULTIPROCESSING} != "recur -_+^+_- spank" ]] ; then
34 ___ECLASS_ONCE_MULTIPROCESSING="recur -_+^+_- spank"
35
36 # @FUNCTION: makeopts_jobs
37 # @USAGE: [${MAKEOPTS}]
38 # @DESCRIPTION:
39 # Searches the arguments (defaults to ${MAKEOPTS}) and extracts the jobs number
40 # specified therein. Useful for running non-make tools in parallel too.
41 # i.e. if the user has MAKEOPTS=-j9, this will echo "9" -- we can't return the
42 # number as bash normalizes it to [0, 255]. If the flags haven't specified a
43 # -j flag, then "1" is shown as that is the default `make` uses. Since there's
44 # no way to represent infinity, we return 999 if the user has -j without a number.
45 makeopts_jobs() {
46 [[ $# -eq 0 ]] && set -- ${MAKEOPTS}
47 # This assumes the first .* will be more greedy than the second .*
48 # since POSIX doesn't specify a non-greedy match (i.e. ".*?").
49 local jobs=$(echo " $* " | sed -r -n \
50 -e 's:.*[[:space:]](-j|--jobs[=[:space:]])[[:space:]]*([0-9]+).*:\2:p' \
51 -e 's:.*[[:space:]](-j|--jobs)[[:space:]].*:999:p')
52 echo ${jobs:-1}
53 }
54
55 # @FUNCTION: redirect_alloc_fd
56 # @USAGE: <var> <file> [redirection]
57 # @DESCRIPTION:
58 # Find a free fd and redirect the specified file via it. Store the new
59 # fd in the specified variable. Useful for the cases where we don't care
60 # about the exact fd #.
61 redirect_alloc_fd() {
62 local var=$1 file=$2 redir=${3:-"<>"}
63
64 if [[ $(( (BASH_VERSINFO[0] << 8) + BASH_VERSINFO[1] )) -ge $(( (4 << 8) + 1 )) ]] ; then
65 # Newer bash provides this functionality.
66 eval "exec {${var}}${redir}'${file}'"
67 else
68 # Need to provide the functionality ourselves.
69 local fd=10
70 while :; do
71 if [[ ! -L /dev/fd/${fd} ]] ; then
72 eval "exec ${fd}${redir}'${file}'" && break
73 fi
74 [[ ${fd} -gt 1024 ]] && return 1 # sanity
75 : $(( ++fd ))
76 done
77 : $(( ${var} = fd ))
78 fi
79 }
80
81 # @FUNCTION: multijob_init
82 # @USAGE: [${MAKEOPTS}]
83 # @DESCRIPTION:
84 # Setup the environment for executing code in parallel.
85 # You must call this before any other multijob function.
86 multijob_init() {
87 # When something goes wrong, try to wait for all the children so we
88 # don't leave any zombies around.
89 has wait ${EBUILD_DEATH_HOOKS} || EBUILD_DEATH_HOOKS+=" wait"
90
91 # Setup a pipe for children to write their pids to when they finish.
92 mj_control_pipe="${T}/multijob.pipe"
93 mkfifo "${mj_control_pipe}"
94 redirect_alloc_fd mj_control_fd "${mj_control_pipe}"
95 rm -f "${mj_control_pipe}"
96
97 # See how many children we can fork based on the user's settings.
98 mj_max_jobs=$(makeopts_jobs "$@")
99 mj_num_jobs=0
100 }
101
102 # @FUNCTION: multijob_child_init
103 # @USAGE: [command to run in background]
104 # @DESCRIPTION:
105 # This function has two forms. You can use it to execute a simple command
106 # in the background (and it takes care of everything else), or you must
107 # call this first thing in your forked child process.
108 #
109 # @CODE
110 # # 1st form: pass the command line as arguments:
111 # multijob_child_init ls /dev
112 #
113 # # 2nd form: execute multiple stuff in the background:
114 # (
115 # multijob_child_init
116 # out=`ls`
117 # if echo "${out}" | grep foo ; then
118 # echo "YEAH"
119 # fi
120 # ) &
121 # multijob_post_fork
122 # @CODE
123 multijob_child_init() {
124 if [[ $# -eq 0 ]] ; then
125 trap 'echo ${BASHPID} $? >&'${mj_control_fd} EXIT
126 trap 'exit 1' INT TERM
127 else
128 ( multijob_child_init ; "$@" ) &
129 multijob_post_fork
130 fi
131 }
132
133 # @FUNCTION: multijob_post_fork
134 # @DESCRIPTION:
135 # You must call this in the parent process after forking a child process.
136 # If the parallel limit has been hit, it will wait for one child to finish
137 # and return the its exit status.
138 multijob_post_fork() {
139 [[ $# -eq 0 ]] || die "${FUNCNAME} takes no arguments"
140
141 : $(( ++mj_num_jobs ))
142 if [[ ${mj_num_jobs} -ge ${mj_max_jobs} ]] ; then
143 multijob_finish_one
144 fi
145 return $?
146 }
147
148 # @FUNCTION: multijob_finish_one
149 # @DESCRIPTION:
150 # Wait for a single process to exit and return its exit code.
151 multijob_finish_one() {
152 [[ $# -eq 0 ]] || die "${FUNCNAME} takes no arguments"
153
154 local pid ret
155 read -r -u ${mj_control_fd} pid ret || die
156 : $(( --mj_num_jobs ))
157 return ${ret}
158 }
159
160 # @FUNCTION: multijob_finish
161 # @DESCRIPTION:
162 # Wait for all pending processes to exit and return the bitwise or
163 # of all their exit codes.
164 multijob_finish() {
165 local ret=0
166 while [[ ${mj_num_jobs} -gt 0 ]] ; do
167 multijob_finish_one
168 : $(( ret |= $? ))
169 done
170 # Let bash clean up its internal child tracking state.
171 wait
172
173 # Do this after reaping all the children.
174 [[ $# -eq 0 ]] || die "${FUNCNAME} takes no arguments"
175
176 return ${ret}
177 }
178
179 fi

Attachments

File name MIME type
signature.asc application/pgp-signature

Replies