Gentoo Archives: gentoo-commits

From: Andrew Ammerlaan <andrewammerlaan@g.o>
To: gentoo-commits@l.g.o
Subject: [gentoo-commits] repo/gentoo:master commit in: dev-util/Tensile/, dev-util/Tensile/files/
Date: Sat, 05 Mar 2022 20:52:04
Message-Id: 1646513512.db81ee2a9a65c0aaebb4df461f8be388c66cdd16.andrewammerlaan@gentoo
1 commit: db81ee2a9a65c0aaebb4df461f8be388c66cdd16
2 Author: Andrew Ammerlaan <andrewammerlaan <AT> gentoo <DOT> org>
3 AuthorDate: Sat Mar 5 20:49:57 2022 +0000
4 Commit: Andrew Ammerlaan <andrewammerlaan <AT> gentoo <DOT> org>
5 CommitDate: Sat Mar 5 20:51:52 2022 +0000
6 URL: https://gitweb.gentoo.org/repo/gentoo.git/commit/?id=db81ee2a
7
8 dev-util/Tensile: add new dependency of rocBLAS
9
10 Package-Manager: Portage-3.0.30, Repoman-3.0.3
11 Signed-off-by: Andrew Ammerlaan <andrewammerlaan <AT> gentoo.org>
12
13 dev-util/Tensile/Manifest | 1 +
14 dev-util/Tensile/Tensile-4.3.0.ebuild | 74 +++++++++++++++++
15 ...ensile-4.3.0-hsaco-compile-specified-arch.patch | 96 ++++++++++++++++++++++
16 .../files/Tensile-4.3.0-output-commands.patch | 23 ++++++
17 dev-util/Tensile/metadata.xml | 11 +++
18 5 files changed, 205 insertions(+)
19
20 diff --git a/dev-util/Tensile/Manifest b/dev-util/Tensile/Manifest
21 new file mode 100644
22 index 000000000000..fd4cac044af8
23 --- /dev/null
24 +++ b/dev-util/Tensile/Manifest
25 @@ -0,0 +1 @@
26 +DIST rocm-Tensile-4.3.0.tar.gz 14250149 BLAKE2B 030138eaca2a0aadd96801e6dbd72e510716dd90553ef3795c5e04e00a34a05ecae82b24f755e4033a4acfcdb1cf26291da1e7902bb090f89a010d403e832beb SHA512 126db0b413c716fba8d5be9bff7a44fd1badacbf32f3db8d0db649819177db37ebd56fd22dd3c809655f5d29675be115e698cd10bc3d0b4b23878ae3726fce47
27
28 diff --git a/dev-util/Tensile/Tensile-4.3.0.ebuild b/dev-util/Tensile/Tensile-4.3.0.ebuild
29 new file mode 100644
30 index 000000000000..cf33fb19cf10
31 --- /dev/null
32 +++ b/dev-util/Tensile/Tensile-4.3.0.ebuild
33 @@ -0,0 +1,74 @@
34 +# Copyright 1999-2022 Gentoo Authors
35 +# Distributed under the terms of the GNU General Public License v2
36 +
37 +EAPI=8
38 +
39 +PYTHON_COMPAT=( python3_{8..10} )
40 +DISTUTILS_USE_PEP517=setuptools
41 +inherit distutils-r1
42 +
43 +DESCRIPTION="Stretching GPU performance for GEMMs and tensor contractions"
44 +HOMEPAGE="https://github.com/ROCmSoftwarePlatform/Tensile"
45 +SRC_URI="https://github.com/ROCmSoftwarePlatform/Tensile/archive/rocm-${PV}.tar.gz -> rocm-Tensile-${PV}.tar.gz"
46 +S="${WORKDIR}/${PN}-rocm-${PV}"
47 +
48 +LICENSE="MIT"
49 +KEYWORDS="~amd64"
50 +SLOT="0/$(ver_cut 1-2)"
51 +
52 +# Not compatible with recent versions of pytest
53 +RESTRICT="test"
54 +
55 +RDEPEND="${PYTHON_DEPS}
56 + dev-python/pyyaml[${PYTHON_USEDEP}]
57 + dev-python/msgpack[${PYTHON_USEDEP}]
58 +"
59 +DEPEND="${RDEPEND}
60 + dev-util/hip:${SLOT}
61 +"
62 +BDEPEND="test? (
63 + dev-util/rocminfo:${SLOT}
64 +)"
65 +
66 +PATCHES=(
67 + "${FILESDIR}/Tensile-${PV}-hsaco-compile-specified-arch.patch" # backported from upstream, should remove after 4.3.0
68 + "${FILESDIR}/Tensile-4.3.0-output-commands.patch"
69 +)
70 +
71 +CMAKE_USE_DIR="${WORKDIR}/Source"
72 +
73 +distutils_enable_tests pytest
74 +
75 +src_prepare() {
76 + distutils-r1_src_prepare
77 +
78 + mv ${PN}/Source "${WORKDIR}"/ || die
79 + sed -e "/ROCM_SMI_ROOT/s,lib,$(get_libdir)," \
80 + -i "${WORKDIR}"/Source/cmake/FindROCmSMI.cmake || die
81 + sed -r -e "/TENSILE_USE_LLVM/s/ON/OFF/" \
82 + -i "${WORKDIR}"/Source/CMakeLists.txt || die
83 +
84 + sed -e "/HipClangVersion/s/0,0,0/$(ver_rs 1-3 ,)/" \
85 + -e "/SourcePath/s,os\.path\.join.*$,\"${EPREFIX}/usr/share/${PN}\"," \
86 + -i ${PN}/Common.py || die
87 +
88 + sed -e "s|os\.path\.dirname.*$|\"${EPREFIX}/usr/share/Tensile\", end='')|" \
89 + -i ${PN}/__init__.py || die
90 +}
91 +
92 +src_test() {
93 + ROCM_PATH="${EPREFIX}/usr/" distutils-r1_src_test
94 +}
95 +
96 +src_install() {
97 + distutils-r1_src_install
98 +
99 + # Move the cmake files to the correct directory
100 + mkdir -p "${ED}/usr/$(get_libdir)/cmake/${PN}" || die
101 + mv "${ED}/usr/cmake/"* "${ED}/usr/$(get_libdir)/cmake/${PN}" || die
102 + rm -r "${ED}/usr/cmake" || die
103 +
104 + insinto /usr/share/${PN}
105 + doins -r "${WORKDIR}"/Source/*
106 + dosym . /usr/share/${PN}/Source
107 +}
108
109 diff --git a/dev-util/Tensile/files/Tensile-4.3.0-hsaco-compile-specified-arch.patch b/dev-util/Tensile/files/Tensile-4.3.0-hsaco-compile-specified-arch.patch
110 new file mode 100644
111 index 000000000000..8e6753781a2a
112 --- /dev/null
113 +++ b/dev-util/Tensile/files/Tensile-4.3.0-hsaco-compile-specified-arch.patch
114 @@ -0,0 +1,96 @@
115 +https://github.com/ROCmSoftwarePlatform/Tensile/issues/1395
116 +https://github.com/ROCmSoftwarePlatform/Tensile/pull/1398
117 +
118 +--- a/Tensile/TensileCreateLibrary.py
119 ++++ b/Tensile/TensileCreateLibrary.py
120 +@@ -136,6 +136,35 @@ def which(p):
121 + return candidate
122 + return None
123 +
124 ++def splitArchs():
125 ++ # Helper for architecture
126 ++ def isSupported(arch):
127 ++ return globalParameters["AsmCaps"][arch]["SupportedISA"] and \
128 ++ globalParameters["AsmCaps"][arch]["SupportedSource"]
129 ++
130 ++ if ";" in globalParameters["Architecture"]:
131 ++ wantedArchs = globalParameters["Architecture"].split(";")
132 ++ else:
133 ++ wantedArchs = globalParameters["Architecture"].split("_")
134 ++ archs = []
135 ++ cmdlineArchs = []
136 ++ if "all" in wantedArchs:
137 ++ for arch in globalParameters['SupportedISA']:
138 ++ if isSupported(arch):
139 ++ if (arch == (9,0,6) or arch == (9,0,8) or arch == (9,0,10)):
140 ++ if (arch == (9,0,10)):
141 ++ archs += [gfxName(arch) + '-xnack+']
142 ++ cmdlineArchs += [gfxName(arch) + ':xnack+']
143 ++ archs += [gfxName(arch) + '-xnack-']
144 ++ cmdlineArchs += [gfxName(arch) + ':xnack-']
145 ++ else:
146 ++ archs += [gfxName(arch)]
147 ++ cmdlineArchs += [gfxName(arch)]
148 ++ else:
149 ++ for arch in wantedArchs:
150 ++ archs += [re.sub(":", "-", arch)]
151 ++ cmdlineArchs += [arch]
152 ++ return archs, cmdlineArchs
153 +
154 + def buildSourceCodeObjectFile(CxxCompiler, outputPath, kernelFile):
155 + buildPath = ensurePath(os.path.join(globalParameters['WorkingPath'], 'code_object_tmp'))
156 +@@ -149,24 +178,8 @@ def buildSourceCodeObjectFile(CxxCompiler, outputPath, kernelFile):
157 + objectFilename = base + '.o'
158 + soFilename = base + '.so'
159 +
160 +- def isSupported(arch):
161 +- return globalParameters["AsmCaps"][arch]["SupportedISA"] and \
162 +- globalParameters["AsmCaps"][arch]["SupportedSource"]
163 +-
164 + if (CxxCompiler == "hipcc"):
165 +- archs = []
166 +- cmdlineArchs = []
167 +- for arch in globalParameters['SupportedISA']:
168 +- if isSupported(arch):
169 +- if (arch == (9,0,6) or arch == (9,0,8) or arch == (9,0,10)):
170 +- if (arch == (9,0,10)):
171 +- archs += [gfxName(arch) + '-xnack+']
172 +- cmdlineArchs += [gfxName(arch) + ':xnack+']
173 +- archs += [gfxName(arch) + '-xnack-']
174 +- cmdlineArchs += [gfxName(arch) + ':xnack-']
175 +- else:
176 +- archs += [gfxName(arch)]
177 +- cmdlineArchs += [gfxName(arch)]
178 ++ archs, cmdlineArchs = splitArchs()
179 +
180 + archFlags = ['--offload-arch=' + arch for arch in cmdlineArchs]
181 +
182 +@@ -1063,11 +1076,6 @@ def buildObjectFileNames(solutionWriter, kernelWriterSource, kernelWriterAssembl
183 + sourceKernels = list([k for k in kernels if k['KernelLanguage'] == 'Source'])
184 + asmKernels = list([k for k in kernels if k['KernelLanguage'] == 'Assembly'])
185 +
186 +- # Helper for architecture
187 +- def isSupported(arch):
188 +- return globalParameters["AsmCaps"][arch]["SupportedISA"] and \
189 +- globalParameters["AsmCaps"][arch]["SupportedSource"]
190 +-
191 + # Build a list of kernel object names.
192 + for kernel in sourceKernels:
193 + sourceKernelNames += [kernelWriterSource.getKernelFileBase(kernel)]
194 +@@ -1081,15 +1089,7 @@ def buildObjectFileNames(solutionWriter, kernelWriterSource, kernelWriterAssembl
195 +
196 + # Source based kernels are built for all supported architectures
197 + if (cxxCompiler == 'hipcc'):
198 +- sourceArchs = []
199 +- for arch in globalParameters['SupportedISA']:
200 +- if isSupported(arch):
201 +- if (arch == (9,0,6) or arch == (9,0,8) or arch == (9,0,10)):
202 +- if (arch == (9,0,10)):
203 +- sourceArchs += [gfxName(arch) + '-xnack+']
204 +- sourceArchs += [gfxName(arch) + '-xnack-']
205 +- else:
206 +- sourceArchs += [gfxName(arch)]
207 ++ sourceArchs, _ = splitArchs()
208 + else:
209 + raise RuntimeError("Unknown compiler %s" % cxxCompiler)
210 +
211
212 diff --git a/dev-util/Tensile/files/Tensile-4.3.0-output-commands.patch b/dev-util/Tensile/files/Tensile-4.3.0-output-commands.patch
213 new file mode 100644
214 index 000000000000..be5a4db21429
215 --- /dev/null
216 +++ b/dev-util/Tensile/files/Tensile-4.3.0-output-commands.patch
217 @@ -0,0 +1,23 @@
218 +diff --color -uprN orig/Tensile/cmake/TensileConfig.cmake Tensile-rocm-4.3.0/Tensile/cmake/TensileConfig.cmake
219 +--- orig/Tensile/cmake/TensileConfig.cmake 2021-08-18 17:48:00.115478470 +0800
220 ++++ Tensile-rocm-4.3.0/Tensile/cmake/TensileConfig.cmake 2021-08-18 17:48:49.963478801 +0800
221 +@@ -234,6 +234,7 @@ function(TensileCreateLibraryFiles
222 + COMMENT "Generating Tensile Libraries"
223 + OUTPUT ${Tensile_EMBED_LIBRARY_SOURCE};${Tensile_MANIFEST_CONTENTS}
224 + COMMAND ${CommandLine}
225 ++ USES_TERMINAL
226 + )
227 +
228 + set("${Tensile_VAR_PREFIX}_ALL_FILES" ${Tensile_MANIFEST_CONTENTS} PARENT_SCOPE)
229 +diff --color -uprN orig/Tensile/Common.py Tensile-rocm-4.3.0/Tensile/Common.py
230 +--- orig/Tensile/Common.py 2021-08-18 17:48:00.075478470 +0800
231 ++++ Tensile-rocm-4.3.0/Tensile/Common.py 2021-08-18 17:48:23.287478624 +0800
232 +@@ -179,7 +179,7 @@ globalParameters["PrintTensorD"] = 0
233 + globalParameters["PrintTensorRef"] = 0 # Print reference tensor. 0x1=after init; 0x2=after copy-back; 0x3=both
234 + globalParameters["PrintIndexAssignments"] = 0 # Print the tensor index assignment info
235 + globalParameters["PrintWinnersOnly"] = False # Only print the solutions which become the fastest
236 +-globalParameters["PrintCodeCommands"] = False # print the commands used to generate the code objects (asm,link,hip-clang, etc)
237 ++globalParameters["PrintCodeCommands"] = True # print the commands used to generate the code objects (asm,link,hip-clang, etc)
238 + globalParameters["DumpTensors"] = False # If True, dump tensors to binary files instead of printing them.
239 +
240 + # TODO - remove this when NewClient is mainstream
241
242 diff --git a/dev-util/Tensile/metadata.xml b/dev-util/Tensile/metadata.xml
243 new file mode 100644
244 index 000000000000..9bbebec502ca
245 --- /dev/null
246 +++ b/dev-util/Tensile/metadata.xml
247 @@ -0,0 +1,11 @@
248 +<?xml version="1.0" encoding="UTF-8"?>
249 +<!DOCTYPE pkgmetadata SYSTEM 'http://www.gentoo.org/dtd/metadata.dtd'>
250 +<pkgmetadata>
251 + <maintainer type="project">
252 + <email>sci@g.o</email>
253 + <name>Science Project</name>
254 + </maintainer>
255 + <upstream>
256 + <remote-id type="github">ROCmSoftwarePlatform/Tensile</remote-id>
257 + </upstream>
258 +</pkgmetadata>