1 |
On Sun, 2018-11-18 at 15:37 +0800, Jason Zaman wrote: |
2 |
> On Sat, Nov 17, 2018 at 11:54:24PM +0100, Michał Górny wrote: |
3 |
> > On Sun, 2018-11-18 at 03:37 +0800, Jason Zaman wrote: |
4 |
> > > Hey all, |
5 |
> > > |
6 |
> > > I've been using Bazel (https://bazel.build/) to build TensorFlow for a |
7 |
> > > while now. Here is a bazel.eclass I'd like to commit to make it easier |
8 |
> > > for packages that use it to build. It's basically bits that I've |
9 |
> > > refactored out of the TensorFlow ebuild that would be useful to other |
10 |
> > > packages as well. I have a bump to sci-libs/tensorflow-1.12.0 prepared |
11 |
> > > that uses this eclass and have tested a full install. |
12 |
> > > |
13 |
> > > -- Jason |
14 |
> > > |
15 |
> > > # Copyright 1999-2018 Jason Zaman |
16 |
> > > # Distributed under the terms of the GNU General Public License v2 |
17 |
> > > |
18 |
> > > # @ECLASS: bazel.eclass |
19 |
> > > # @MAINTAINER: |
20 |
> > > # Jason Zaman <perfinion@g.o> |
21 |
> > > # @AUTHOR: |
22 |
> > > # Jason Zaman <perfinion@g.o> |
23 |
> > > # @BLURB: Utility functions for packages using Bazel Build |
24 |
> > > # @DESCRIPTION: |
25 |
> > > # A utility eclass providing functions to run the Bazel Build system. |
26 |
> > > # |
27 |
> > > # This eclass does not export any phase functions. |
28 |
> > > |
29 |
> > > case "${EAPI:-0}" in |
30 |
> > > 0|1|2|3|4|5|6) |
31 |
> > > die "Unsupported EAPI=${EAPI:-0} (too old) for ${ECLASS}" |
32 |
> > > ;; |
33 |
> > > 7) |
34 |
> > > ;; |
35 |
> > > *) |
36 |
> > > die "Unsupported EAPI=${EAPI} (unknown) for ${ECLASS}" |
37 |
> > > ;; |
38 |
> > > esac |
39 |
> > > |
40 |
> > > if [[ ! ${_BAZEL_ECLASS} ]]; then |
41 |
> > > |
42 |
> > > inherit multiprocessing toolchain-funcs |
43 |
> > > |
44 |
> > > BDEPEND=">=dev-util/bazel-0.19" |
45 |
> > > |
46 |
> > > # @FUNCTION: bazel_get_flags |
47 |
> > > # @DESCRIPTION: |
48 |
> > > # Obtain and print the bazel flags for target and host *FLAGS. |
49 |
> > > # |
50 |
> > > # To add more flags to this, append the flags to the |
51 |
> > > # appropriate variable before calling this function |
52 |
> > > bazel_get_flags() { |
53 |
> > > local i fs=() |
54 |
> > > for i in ${CFLAGS}; do |
55 |
> > > fs+=( "--conlyopt=${i}" ) |
56 |
> > > done |
57 |
> > > for i in ${BUILD_CFLAGS}; do |
58 |
> > > fs+=( "--host_conlyopt=${i}" ) |
59 |
> > > done |
60 |
> > > for i in ${CXXFLAGS}; do |
61 |
> > > fs+=( "--cxxopt=${i}" ) |
62 |
> > > done |
63 |
> > > for i in ${BUILD_CXXFLAGS}; do |
64 |
> > > fs+=( "--host_cxxopt=${i}" ) |
65 |
> > > done |
66 |
> > > for i in ${CPPFLAGS}; do |
67 |
> > > fs+=( "--conlyopt=${i}" "--cxxopt=${i}" ) |
68 |
> > > done |
69 |
> > > for i in ${BUILD_CPPFLAGS}; do |
70 |
> > > fs+=( "--host_conlyopt=${i}" "--host_cxxopt=${i}" ) |
71 |
> > > done |
72 |
> > > for i in ${LDFLAGS}; do |
73 |
> > > fs+=( "--linkopt=${i}" ) |
74 |
> > > done |
75 |
> > > for i in ${BUILD_LDFLAGS}; do |
76 |
> > > fs+=( "--host_linkopt=${i}" ) |
77 |
> > > done |
78 |
> > > echo "${fs[*]}" |
79 |
> > > } |
80 |
> > > |
81 |
> > > # @FUNCTION: bazel_setup_bazelrc |
82 |
> > > # @DESCRIPTION: |
83 |
> > > # Creates the bazelrc with common options that will be passed |
84 |
> > > # to bazel. This will be called by ebazel automatically so |
85 |
> > > # does not need to be called from the ebuild. |
86 |
> > > bazel_setup_bazelrc() { |
87 |
> > > if [[ -f "${T}/bazelrc" ]]; then |
88 |
> > > return |
89 |
> > > fi |
90 |
> > > |
91 |
> > > # F: fopen_wr |
92 |
> > > # P: /proc/self/setgroups |
93 |
> > > # Even with standalone enabled, the Bazel sandbox binary is run for feature test: |
94 |
> > > # https://github.com/bazelbuild/bazel/blob/7b091c1397a82258e26ab5336df6c8dae1d97384/src/main/java/com/google/devtools/build/lib/sandbox/LinuxSandboxedSpawnRunner.java#L61 |
95 |
> > > # https://github.com/bazelbuild/bazel/blob/76555482873ffcf1d32fb40106f89231b37f850a/src/main/tools/linux-sandbox-pid1.cc#L113 |
96 |
> > > addpredict /proc |
97 |
> > > |
98 |
> > > mkdir -p "${T}/bazel-cache" || die |
99 |
> > > mkdir -p "${T}/bazel-distdir" || die |
100 |
> > > |
101 |
> > > cat > "${T}/bazelrc" <<-EOF || die |
102 |
> > > startup --batch |
103 |
> > |
104 |
> > Maybe indent this stuff to make it stand out from ebuild code. |
105 |
> > |
106 |
> > > |
107 |
> > > # dont strip HOME, portage sets a temp per-package dir |
108 |
> > > build --action_env HOME |
109 |
> > > |
110 |
> > > # make bazel respect MAKEOPTS |
111 |
> > > build --jobs=$(makeopts_jobs) |
112 |
> > > build --compilation_mode=opt --host_compilation_mode=opt |
113 |
> > > |
114 |
> > > # FLAGS |
115 |
> > > build $(bazel_get_flags) |
116 |
> > > |
117 |
> > > # Use standalone strategy to deactivate the bazel sandbox, since it |
118 |
> > > # conflicts with FEATURES=sandbox. |
119 |
> > > build --spawn_strategy=standalone --genrule_strategy=standalone |
120 |
> > > test --spawn_strategy=standalone --genrule_strategy=standalone |
121 |
> > > |
122 |
> > > build --strip=never |
123 |
> > > build --verbose_failures --noshow_loading_progress |
124 |
> > > test --verbose_test_summary --verbose_failures --noshow_loading_progress |
125 |
> > > |
126 |
> > > # make bazel only fetch distfiles from the cache |
127 |
> > > fetch --repository_cache="${T}/bazel-cache/" --distdir="${T}/bazel-distdir/" |
128 |
> > > build --repository_cache="${T}/bazel-cache/" --distdir="${T}/bazel-distdir/" |
129 |
> > > |
130 |
> > > build --define=PREFIX=${EPREFIX%/}/usr |
131 |
> > > build --define=LIBDIR=\$(PREFIX)/$(get_libdir) |
132 |
> > > |
133 |
> > > EOF |
134 |
> > > |
135 |
> > > tc-is-cross-compiler || \ |
136 |
> > > echo "build --nodistinct_host_configuration" >> "${T}/bazelrc" || die |
137 |
> > |
138 |
> > Don't do || chains, they are unreadable. |
139 |
> |
140 |
> ok |
141 |
> |
142 |
> > > } |
143 |
> > > |
144 |
> > > # @FUNCTION: ebazel |
145 |
> > > # @USAGE: [<args>...] |
146 |
> > > # @DESCRIPTION: |
147 |
> > > # Run bazel with the bazelrc and output_base. |
148 |
> > > # |
149 |
> > > # If $MULTIBUILD_VARIANT is set, this will make an output_base |
150 |
> > > # specific to that variant. |
151 |
> > > # bazel_setup_bazelrc will be called and the created bazelrc |
152 |
> > > # will be passed to bazel. |
153 |
> > > # |
154 |
> > > # Will automatically die if bazel does not exit cleanly. |
155 |
> > > ebazel() { |
156 |
> > > bazel_setup_bazelrc |
157 |
> > > |
158 |
> > > # Use different build folders for each multibuild variant. |
159 |
> > > local base_suffix="${MULTIBUILD_VARIANT+-}${MULTIBUILD_VARIANT}" |
160 |
> > |
161 |
> > Any reason not to use BUILD_DIR instead of reinventing it? |
162 |
> |
163 |
> Isnt $BUILD_DIR $S by default? output_base is a bazel thing with a weird |
164 |
> structure and has nothing to do with the source tree. Doing it this way |
165 |
> meant python_foreach_impl was easy. but I might be confused and will |
166 |
> look into it again and see if it can be better. this way means with and |
167 |
> without multibuild just works tho. |
168 |
|
169 |
There's no default. It's defined by the few eclasses such as multibuild |
170 |
and cmake-utils. You just need to default if it other eclasses don't |
171 |
set it. |
172 |
|
173 |
> |
174 |
> > > local output_base="${WORKDIR}/bazel-base${base_suffix}" |
175 |
> > > mkdir -p "${output_base}" || die |
176 |
> > > |
177 |
> > > einfo Running: bazel --output_base="${output_base}" "$@" |
178 |
> > > bazel --bazelrc="${T}/bazelrc" --output_base="${output_base}" $@ || die "ebazel $@" |
179 |
> > |
180 |
> > The common practice is to echo >&2 it. Also, you output different |
181 |
> > arguments than you execute which is going to confuse the hell out of |
182 |
> > users who'll end up having to debug this. You can use a trick like |
183 |
> > the following to avoid typing args twice: |
184 |
> > |
185 |
> > set -- bazel --bazelrc... |
186 |
> > echo "${*}" >&2 |
187 |
> > "${@}" || die ... |
188 |
> |
189 |
> Oh, forgot that trick. yeah I'll do that. |
190 |
> |
191 |
> > > } |
192 |
> > > |
193 |
> > > # @FUNCTION: bazel_load_distfiles |
194 |
> > > # @USAGE: <distfiles>... |
195 |
> > > # @DESCRIPTION: |
196 |
> > > # Populate the bazel distdir to fetch from since it cannot use |
197 |
> > > # the network. Bazel looks in distdir but will only look for the |
198 |
> > > # original filename, not the possibly renamed one that portage |
199 |
> > > # downloaded. If the line has -> we to rename it back. This also |
200 |
> > > # handles use-conditionals that SRC_URI does. |
201 |
> > |
202 |
> > Why oh why do you have to implement custom parser for the ebuild syntax? |
203 |
> > That's just asking for horrible failures. |
204 |
> |
205 |
> Yeah ... so thats the problem with bazel and the main reason for the |
206 |
> eclass. Bazel has this idea that downloading and unpacking random |
207 |
> tarballs during build is okay. Now bazel has an option when it needs to |
208 |
> fetch something it will look in the dir --distdir= points to instead and |
209 |
> read the file instead of trying to fetch from the (sandboxed thus |
210 |
> non-existent) internet. But bazel only knows the original filenames not |
211 |
> the names that portage renamed things to so I need to rename them back |
212 |
> to the original names. |
213 |
> |
214 |
> The other option would be to have ebuilds maintain a second list of urls |
215 |
> on top of the SRC_URI one which seems way more of a maintenance burden, |
216 |
> especially when use-flags are involved. So all I really do here is strip |
217 |
> the use? and ()'s and take the list of urls and put them in the |
218 |
> bazel-distdir with the original filenames. |
219 |
> |
220 |
> If you can think of a better way to accomplish that I'd love to hear it |
221 |
> tho. I'll add more comments about what it does but bazel is weird and |
222 |
> just utterly fails if it cant read the tarballs. And unpacking things |
223 |
> myself doesnt work because of the weird file structure bazel uses so |
224 |
> knowing where to put them is non-trivial. |
225 |
|
226 |
What if the original URL goes down and somebody replaces 'foo -> bar' |
227 |
with 'mirror://gentoo/bar'? |
228 |
|
229 |
> |
230 |
> > > # |
231 |
> > > # Example: |
232 |
> > > # @CODE |
233 |
> > > # bazel_external_uris="http://a/file-2.0.tgz |
234 |
> > > # python? ( http://b/1.0.tgz -> foo-1.0.tgz )" |
235 |
> > > # SRC_URI="http://c/${PV}.tgz |
236 |
> > > # ${bazel_external_uris}" |
237 |
> > > # |
238 |
> > > # src_unpack() { |
239 |
> > > # unpack ${PV}.tgz |
240 |
> > > # bazel_load_distfiles "${bazel_external_uris}" |
241 |
> > > # } |
242 |
> > > # @CODE |
243 |
> > > bazel_load_distfiles() { |
244 |
> > > local src dst uri rename |
245 |
> > > |
246 |
> > > [[ "$@" ]] || die "Missing args" |
247 |
> > > mkdir -p "${T}/bazel-distdir" || die |
248 |
> > > |
249 |
> > > while read uri rename dst; do |
250 |
> > > src="${uri##*/}" |
251 |
> > > [[ -z $src ]] && continue |
252 |
> > |
253 |
> > Please use ${foo} syntax in ebuilds, consistently. |
254 |
> |
255 |
> ok |
256 |
> |
257 |
> > > if [[ "$rename" != "->" ]]; then |
258 |
> > > dst="${src}" |
259 |
> > > fi |
260 |
> > > |
261 |
> > > [[ ${A} =~ ${dst} ]] || continue |
262 |
> > |
263 |
> > Why are you doing regex match here? Last I checked, we didn't use |
264 |
> > regular expressions in SRC_URI. |
265 |
> |
266 |
> It was to skip this one if its not in $A (eg disabled with a use-flag) |
267 |
> I'll change it to == *${dst}* instead then. |
268 |
> |
269 |
> > > |
270 |
> > > if [[ "$dst" == "$src" ]]; then |
271 |
> > > einfo "Copying $dst to bazel distdir ..." |
272 |
> > > else |
273 |
> > > einfo "Copying $dst to bazel distdir $src ..." |
274 |
> > |
275 |
> > Are you using src and dst to mean the opposite? |
276 |
> |
277 |
> No, this is right. its SRC_URI="src -> dst" so I need to rename dst back |
278 |
> so its named src. |
279 |
> |
280 |
> > > fi |
281 |
> > > dst="$(readlink -f "${DISTDIR}/${dst}")" |
282 |
> > |
283 |
> > Looks like you are hardcoding hacks for implementation details which |
284 |
> > indicates whatever you're doing is a very bad idea, and is going to fail |
285 |
> > whenever the implementation is subtly different than what you've worked |
286 |
> > around so far. |
287 |
> |
288 |
> I can skip this line then no problem. It will just end up a link to a |
289 |
> link but thats fine. |
290 |
> |
291 |
> > > ln -s "${dst}" "${T}/bazel-distdir/${src}" || die |
292 |
> > > done <<< "$(sed -re 's/!?[A-Za-z]+\?\s+\(\s*//g; s/\s+\)//g' <<< "$@")" |
293 |
> > |
294 |
> > Please don't use horribly unreadable sed expressions. This just means |
295 |
> > that whoever will have to touch this eclass in the future will wish you |
296 |
> > were never recruited. |
297 |
> |
298 |
> I will split it up and add more comments. It just removes "use? (", |
299 |
> "!use? (", and " )". If I don't remove the useflag bits first then figuring out |
300 |
> if its a rename or not becomes a lot more complicated. If you have |
301 |
> other ideas im all ears. |
302 |
|
303 |
The syntax is token-oriented, so just tokenize it and use a state |
304 |
machine to parse it if you must. |
305 |
|
306 |
> |
307 |
> |
308 |
> > |
309 |
> > > } |
310 |
> > > |
311 |
> > > _BAZEL_ECLASS=1 |
312 |
> > > fi |
313 |
> > > |
314 |
> > > |
315 |
> > > |
316 |
> > |
317 |
> > -- |
318 |
> > Best regards, |
319 |
> > Michał Górny |
320 |
> |
321 |
> |
322 |
> |
323 |
|
324 |
-- |
325 |
Best regards, |
326 |
Michał Górny |