1 |
commit: db81ee2a9a65c0aaebb4df461f8be388c66cdd16 |
2 |
Author: Andrew Ammerlaan <andrewammerlaan <AT> gentoo <DOT> org> |
3 |
AuthorDate: Sat Mar 5 20:49:57 2022 +0000 |
4 |
Commit: Andrew Ammerlaan <andrewammerlaan <AT> gentoo <DOT> org> |
5 |
CommitDate: Sat Mar 5 20:51:52 2022 +0000 |
6 |
URL: https://gitweb.gentoo.org/repo/gentoo.git/commit/?id=db81ee2a |
7 |
|
8 |
dev-util/Tensile: add new dependency of rocBLAS |
9 |
|
10 |
Package-Manager: Portage-3.0.30, Repoman-3.0.3 |
11 |
Signed-off-by: Andrew Ammerlaan <andrewammerlaan <AT> gentoo.org> |
12 |
|
13 |
dev-util/Tensile/Manifest | 1 + |
14 |
dev-util/Tensile/Tensile-4.3.0.ebuild | 74 +++++++++++++++++ |
15 |
...ensile-4.3.0-hsaco-compile-specified-arch.patch | 96 ++++++++++++++++++++++ |
16 |
.../files/Tensile-4.3.0-output-commands.patch | 23 ++++++ |
17 |
dev-util/Tensile/metadata.xml | 11 +++ |
18 |
5 files changed, 205 insertions(+) |
19 |
|
20 |
diff --git a/dev-util/Tensile/Manifest b/dev-util/Tensile/Manifest |
21 |
new file mode 100644 |
22 |
index 000000000000..fd4cac044af8 |
23 |
--- /dev/null |
24 |
+++ b/dev-util/Tensile/Manifest |
25 |
@@ -0,0 +1 @@ |
26 |
+DIST rocm-Tensile-4.3.0.tar.gz 14250149 BLAKE2B 030138eaca2a0aadd96801e6dbd72e510716dd90553ef3795c5e04e00a34a05ecae82b24f755e4033a4acfcdb1cf26291da1e7902bb090f89a010d403e832beb SHA512 126db0b413c716fba8d5be9bff7a44fd1badacbf32f3db8d0db649819177db37ebd56fd22dd3c809655f5d29675be115e698cd10bc3d0b4b23878ae3726fce47 |
27 |
|
28 |
diff --git a/dev-util/Tensile/Tensile-4.3.0.ebuild b/dev-util/Tensile/Tensile-4.3.0.ebuild |
29 |
new file mode 100644 |
30 |
index 000000000000..cf33fb19cf10 |
31 |
--- /dev/null |
32 |
+++ b/dev-util/Tensile/Tensile-4.3.0.ebuild |
33 |
@@ -0,0 +1,74 @@ |
34 |
+# Copyright 1999-2022 Gentoo Authors |
35 |
+# Distributed under the terms of the GNU General Public License v2 |
36 |
+ |
37 |
+EAPI=8 |
38 |
+ |
39 |
+PYTHON_COMPAT=( python3_{8..10} ) |
40 |
+DISTUTILS_USE_PEP517=setuptools |
41 |
+inherit distutils-r1 |
42 |
+ |
43 |
+DESCRIPTION="Stretching GPU performance for GEMMs and tensor contractions" |
44 |
+HOMEPAGE="https://github.com/ROCmSoftwarePlatform/Tensile" |
45 |
+SRC_URI="https://github.com/ROCmSoftwarePlatform/Tensile/archive/rocm-${PV}.tar.gz -> rocm-Tensile-${PV}.tar.gz" |
46 |
+S="${WORKDIR}/${PN}-rocm-${PV}" |
47 |
+ |
48 |
+LICENSE="MIT" |
49 |
+KEYWORDS="~amd64" |
50 |
+SLOT="0/$(ver_cut 1-2)" |
51 |
+ |
52 |
+# Not compatible with recent versions of pytest |
53 |
+RESTRICT="test" |
54 |
+ |
55 |
+RDEPEND="${PYTHON_DEPS} |
56 |
+ dev-python/pyyaml[${PYTHON_USEDEP}] |
57 |
+ dev-python/msgpack[${PYTHON_USEDEP}] |
58 |
+" |
59 |
+DEPEND="${RDEPEND} |
60 |
+ dev-util/hip:${SLOT} |
61 |
+" |
62 |
+BDEPEND="test? ( |
63 |
+ dev-util/rocminfo:${SLOT} |
64 |
+)" |
65 |
+ |
66 |
+PATCHES=( |
67 |
+ "${FILESDIR}/Tensile-${PV}-hsaco-compile-specified-arch.patch" # backported from upstream, should remove after 4.3.0 |
68 |
+ "${FILESDIR}/Tensile-4.3.0-output-commands.patch" |
69 |
+) |
70 |
+ |
71 |
+CMAKE_USE_DIR="${WORKDIR}/Source" |
72 |
+ |
73 |
+distutils_enable_tests pytest |
74 |
+ |
75 |
+src_prepare() { |
76 |
+ distutils-r1_src_prepare |
77 |
+ |
78 |
+ mv ${PN}/Source "${WORKDIR}"/ || die |
79 |
+ sed -e "/ROCM_SMI_ROOT/s,lib,$(get_libdir)," \ |
80 |
+ -i "${WORKDIR}"/Source/cmake/FindROCmSMI.cmake || die |
81 |
+ sed -r -e "/TENSILE_USE_LLVM/s/ON/OFF/" \ |
82 |
+ -i "${WORKDIR}"/Source/CMakeLists.txt || die |
83 |
+ |
84 |
+ sed -e "/HipClangVersion/s/0,0,0/$(ver_rs 1-3 ,)/" \ |
85 |
+ -e "/SourcePath/s,os\.path\.join.*$,\"${EPREFIX}/usr/share/${PN}\"," \ |
86 |
+ -i ${PN}/Common.py || die |
87 |
+ |
88 |
+ sed -e "s|os\.path\.dirname.*$|\"${EPREFIX}/usr/share/Tensile\", end='')|" \ |
89 |
+ -i ${PN}/__init__.py || die |
90 |
+} |
91 |
+ |
92 |
+src_test() { |
93 |
+ ROCM_PATH="${EPREFIX}/usr/" distutils-r1_src_test |
94 |
+} |
95 |
+ |
96 |
+src_install() { |
97 |
+ distutils-r1_src_install |
98 |
+ |
99 |
+ # Move the cmake files to the correct directory |
100 |
+ mkdir -p "${ED}/usr/$(get_libdir)/cmake/${PN}" || die |
101 |
+ mv "${ED}/usr/cmake/"* "${ED}/usr/$(get_libdir)/cmake/${PN}" || die |
102 |
+ rm -r "${ED}/usr/cmake" || die |
103 |
+ |
104 |
+ insinto /usr/share/${PN} |
105 |
+ doins -r "${WORKDIR}"/Source/* |
106 |
+ dosym . /usr/share/${PN}/Source |
107 |
+} |
108 |
|
109 |
diff --git a/dev-util/Tensile/files/Tensile-4.3.0-hsaco-compile-specified-arch.patch b/dev-util/Tensile/files/Tensile-4.3.0-hsaco-compile-specified-arch.patch |
110 |
new file mode 100644 |
111 |
index 000000000000..8e6753781a2a |
112 |
--- /dev/null |
113 |
+++ b/dev-util/Tensile/files/Tensile-4.3.0-hsaco-compile-specified-arch.patch |
114 |
@@ -0,0 +1,96 @@ |
115 |
+https://github.com/ROCmSoftwarePlatform/Tensile/issues/1395 |
116 |
+https://github.com/ROCmSoftwarePlatform/Tensile/pull/1398 |
117 |
+ |
118 |
+--- a/Tensile/TensileCreateLibrary.py |
119 |
++++ b/Tensile/TensileCreateLibrary.py |
120 |
+@@ -136,6 +136,35 @@ def which(p): |
121 |
+ return candidate |
122 |
+ return None |
123 |
+ |
124 |
++def splitArchs(): |
125 |
++ # Helper for architecture |
126 |
++ def isSupported(arch): |
127 |
++ return globalParameters["AsmCaps"][arch]["SupportedISA"] and \ |
128 |
++ globalParameters["AsmCaps"][arch]["SupportedSource"] |
129 |
++ |
130 |
++ if ";" in globalParameters["Architecture"]: |
131 |
++ wantedArchs = globalParameters["Architecture"].split(";") |
132 |
++ else: |
133 |
++ wantedArchs = globalParameters["Architecture"].split("_") |
134 |
++ archs = [] |
135 |
++ cmdlineArchs = [] |
136 |
++ if "all" in wantedArchs: |
137 |
++ for arch in globalParameters['SupportedISA']: |
138 |
++ if isSupported(arch): |
139 |
++ if (arch == (9,0,6) or arch == (9,0,8) or arch == (9,0,10)): |
140 |
++ if (arch == (9,0,10)): |
141 |
++ archs += [gfxName(arch) + '-xnack+'] |
142 |
++ cmdlineArchs += [gfxName(arch) + ':xnack+'] |
143 |
++ archs += [gfxName(arch) + '-xnack-'] |
144 |
++ cmdlineArchs += [gfxName(arch) + ':xnack-'] |
145 |
++ else: |
146 |
++ archs += [gfxName(arch)] |
147 |
++ cmdlineArchs += [gfxName(arch)] |
148 |
++ else: |
149 |
++ for arch in wantedArchs: |
150 |
++ archs += [re.sub(":", "-", arch)] |
151 |
++ cmdlineArchs += [arch] |
152 |
++ return archs, cmdlineArchs |
153 |
+ |
154 |
+ def buildSourceCodeObjectFile(CxxCompiler, outputPath, kernelFile): |
155 |
+ buildPath = ensurePath(os.path.join(globalParameters['WorkingPath'], 'code_object_tmp')) |
156 |
+@@ -149,24 +178,8 @@ def buildSourceCodeObjectFile(CxxCompiler, outputPath, kernelFile): |
157 |
+ objectFilename = base + '.o' |
158 |
+ soFilename = base + '.so' |
159 |
+ |
160 |
+- def isSupported(arch): |
161 |
+- return globalParameters["AsmCaps"][arch]["SupportedISA"] and \ |
162 |
+- globalParameters["AsmCaps"][arch]["SupportedSource"] |
163 |
+- |
164 |
+ if (CxxCompiler == "hipcc"): |
165 |
+- archs = [] |
166 |
+- cmdlineArchs = [] |
167 |
+- for arch in globalParameters['SupportedISA']: |
168 |
+- if isSupported(arch): |
169 |
+- if (arch == (9,0,6) or arch == (9,0,8) or arch == (9,0,10)): |
170 |
+- if (arch == (9,0,10)): |
171 |
+- archs += [gfxName(arch) + '-xnack+'] |
172 |
+- cmdlineArchs += [gfxName(arch) + ':xnack+'] |
173 |
+- archs += [gfxName(arch) + '-xnack-'] |
174 |
+- cmdlineArchs += [gfxName(arch) + ':xnack-'] |
175 |
+- else: |
176 |
+- archs += [gfxName(arch)] |
177 |
+- cmdlineArchs += [gfxName(arch)] |
178 |
++ archs, cmdlineArchs = splitArchs() |
179 |
+ |
180 |
+ archFlags = ['--offload-arch=' + arch for arch in cmdlineArchs] |
181 |
+ |
182 |
+@@ -1063,11 +1076,6 @@ def buildObjectFileNames(solutionWriter, kernelWriterSource, kernelWriterAssembl |
183 |
+ sourceKernels = list([k for k in kernels if k['KernelLanguage'] == 'Source']) |
184 |
+ asmKernels = list([k for k in kernels if k['KernelLanguage'] == 'Assembly']) |
185 |
+ |
186 |
+- # Helper for architecture |
187 |
+- def isSupported(arch): |
188 |
+- return globalParameters["AsmCaps"][arch]["SupportedISA"] and \ |
189 |
+- globalParameters["AsmCaps"][arch]["SupportedSource"] |
190 |
+- |
191 |
+ # Build a list of kernel object names. |
192 |
+ for kernel in sourceKernels: |
193 |
+ sourceKernelNames += [kernelWriterSource.getKernelFileBase(kernel)] |
194 |
+@@ -1081,15 +1089,7 @@ def buildObjectFileNames(solutionWriter, kernelWriterSource, kernelWriterAssembl |
195 |
+ |
196 |
+ # Source based kernels are built for all supported architectures |
197 |
+ if (cxxCompiler == 'hipcc'): |
198 |
+- sourceArchs = [] |
199 |
+- for arch in globalParameters['SupportedISA']: |
200 |
+- if isSupported(arch): |
201 |
+- if (arch == (9,0,6) or arch == (9,0,8) or arch == (9,0,10)): |
202 |
+- if (arch == (9,0,10)): |
203 |
+- sourceArchs += [gfxName(arch) + '-xnack+'] |
204 |
+- sourceArchs += [gfxName(arch) + '-xnack-'] |
205 |
+- else: |
206 |
+- sourceArchs += [gfxName(arch)] |
207 |
++ sourceArchs, _ = splitArchs() |
208 |
+ else: |
209 |
+ raise RuntimeError("Unknown compiler %s" % cxxCompiler) |
210 |
+ |
211 |
|
212 |
diff --git a/dev-util/Tensile/files/Tensile-4.3.0-output-commands.patch b/dev-util/Tensile/files/Tensile-4.3.0-output-commands.patch |
213 |
new file mode 100644 |
214 |
index 000000000000..be5a4db21429 |
215 |
--- /dev/null |
216 |
+++ b/dev-util/Tensile/files/Tensile-4.3.0-output-commands.patch |
217 |
@@ -0,0 +1,23 @@ |
218 |
+diff --color -uprN orig/Tensile/cmake/TensileConfig.cmake Tensile-rocm-4.3.0/Tensile/cmake/TensileConfig.cmake |
219 |
+--- orig/Tensile/cmake/TensileConfig.cmake 2021-08-18 17:48:00.115478470 +0800 |
220 |
++++ Tensile-rocm-4.3.0/Tensile/cmake/TensileConfig.cmake 2021-08-18 17:48:49.963478801 +0800 |
221 |
+@@ -234,6 +234,7 @@ function(TensileCreateLibraryFiles |
222 |
+ COMMENT "Generating Tensile Libraries" |
223 |
+ OUTPUT ${Tensile_EMBED_LIBRARY_SOURCE};${Tensile_MANIFEST_CONTENTS} |
224 |
+ COMMAND ${CommandLine} |
225 |
++ USES_TERMINAL |
226 |
+ ) |
227 |
+ |
228 |
+ set("${Tensile_VAR_PREFIX}_ALL_FILES" ${Tensile_MANIFEST_CONTENTS} PARENT_SCOPE) |
229 |
+diff --color -uprN orig/Tensile/Common.py Tensile-rocm-4.3.0/Tensile/Common.py |
230 |
+--- orig/Tensile/Common.py 2021-08-18 17:48:00.075478470 +0800 |
231 |
++++ Tensile-rocm-4.3.0/Tensile/Common.py 2021-08-18 17:48:23.287478624 +0800 |
232 |
+@@ -179,7 +179,7 @@ globalParameters["PrintTensorD"] = 0 |
233 |
+ globalParameters["PrintTensorRef"] = 0 # Print reference tensor. 0x1=after init; 0x2=after copy-back; 0x3=both |
234 |
+ globalParameters["PrintIndexAssignments"] = 0 # Print the tensor index assignment info |
235 |
+ globalParameters["PrintWinnersOnly"] = False # Only print the solutions which become the fastest |
236 |
+-globalParameters["PrintCodeCommands"] = False # print the commands used to generate the code objects (asm,link,hip-clang, etc) |
237 |
++globalParameters["PrintCodeCommands"] = True # print the commands used to generate the code objects (asm,link,hip-clang, etc) |
238 |
+ globalParameters["DumpTensors"] = False # If True, dump tensors to binary files instead of printing them. |
239 |
+ |
240 |
+ # TODO - remove this when NewClient is mainstream |
241 |
|
242 |
diff --git a/dev-util/Tensile/metadata.xml b/dev-util/Tensile/metadata.xml |
243 |
new file mode 100644 |
244 |
index 000000000000..9bbebec502ca |
245 |
--- /dev/null |
246 |
+++ b/dev-util/Tensile/metadata.xml |
247 |
@@ -0,0 +1,11 @@ |
248 |
+<?xml version="1.0" encoding="UTF-8"?> |
249 |
+<!DOCTYPE pkgmetadata SYSTEM 'http://www.gentoo.org/dtd/metadata.dtd'> |
250 |
+<pkgmetadata> |
251 |
+ <maintainer type="project"> |
252 |
+ <email>sci@g.o</email> |
253 |
+ <name>Science Project</name> |
254 |
+ </maintainer> |
255 |
+ <upstream> |
256 |
+ <remote-id type="github">ROCmSoftwarePlatform/Tensile</remote-id> |
257 |
+ </upstream> |
258 |
+</pkgmetadata> |