Gentoo Archives: gentoo-commits

From: Andrew Ammerlaan <andrewammerlaan@g.o>
To: gentoo-commits@l.g.o
Subject: [gentoo-commits] proj/sci:master commit in: dev-util/Tensile/files/, dev-util/Tensile/
Date: Sat, 05 Mar 2022 14:44:03
Message-Id: 1646491332.f1b7b457296505b40a3ff204fed5d605086e39e6.andrewammerlaan@gentoo
1 commit: f1b7b457296505b40a3ff204fed5d605086e39e6
2 Author: Andrew Ammerlaan <andrewammerlaan <AT> gentoo <DOT> org>
3 AuthorDate: Sat Mar 5 14:41:24 2022 +0000
4 Commit: Andrew Ammerlaan <andrewammerlaan <AT> gentoo <DOT> org>
5 CommitDate: Sat Mar 5 14:42:12 2022 +0000
6 URL: https://gitweb.gentoo.org/proj/sci.git/commit/?id=f1b7b457
7
8 dev-util/Tensile: add version 4.3.0
9
10 https://github.com/gentoo/gentoo/pull/23450
11
12 Package-Manager: Portage-3.0.30, Repoman-3.0.3
13 Signed-off-by: Andrew Ammerlaan <andrewammerlaan <AT> gentoo.org>
14
15 dev-util/Tensile/Tensile-4.3.0.ebuild | 74 +++++++++++++++++
16 ...ensile-4.3.0-hsaco-compile-specified-arch.patch | 96 ++++++++++++++++++++++
17 .../files/Tensile-4.3.0-output-commands.patch | 23 ++++++
18 3 files changed, 193 insertions(+)
19
20 diff --git a/dev-util/Tensile/Tensile-4.3.0.ebuild b/dev-util/Tensile/Tensile-4.3.0.ebuild
21 new file mode 100644
22 index 000000000..bfd86ecea
23 --- /dev/null
24 +++ b/dev-util/Tensile/Tensile-4.3.0.ebuild
25 @@ -0,0 +1,74 @@
26 +# Copyright 1999-2022 Gentoo Authors
27 +# Distributed under the terms of the GNU General Public License v2
28 +
29 +EAPI=8
30 +
31 +PYTHON_COMPAT=( python3_{8..10} )
32 +DISTUTILS_USE_PEP517=setuptools
33 +inherit distutils-r1
34 +
35 +DESCRIPTION="Stretching GPU performance for GEMMs and tensor contractions"
36 +HOMEPAGE="https://github.com/ROCmSoftwarePlatform/Tensile"
37 +SRC_URI="https://github.com/ROCmSoftwarePlatform/Tensile/archive/rocm-${PV}.tar.gz -> rocm-Tensile-${PV}.tar.gz"
38 +S="${WORKDIR}/${PN}-rocm-${PV}"
39 +
40 +LICENSE="MIT"
41 +KEYWORDS="~amd64"
42 +SLOT="0"
43 +
44 +# Not compatible with recent versions of pytest
45 +RESTRICT="test"
46 +
47 +RDEPEND="${PYTHON_DEPS}
48 + dev-python/pyyaml[${PYTHON_USEDEP}]
49 + dev-python/msgpack[${PYTHON_USEDEP}]
50 +"
51 +DEPEND="${RDEPEND}
52 + dev-util/hip:${SLOT}
53 +"
54 +BDEPEND="test? (
55 + dev-util/rocminfo:${SLOT}
56 +)"
57 +
58 +PATCHES=(
59 + "${FILESDIR}/Tensile-${PV}-hsaco-compile-specified-arch.patch" # backported from upstream, should remove after 4.3.0
60 + "${FILESDIR}/Tensile-4.3.0-output-commands.patch"
61 +)
62 +
63 +CMAKE_USE_DIR="${WORKDIR}/Source"
64 +
65 +distutils_enable_tests pytest
66 +
67 +src_prepare() {
68 + distutils-r1_src_prepare
69 +
70 + mv ${PN}/Source "${WORKDIR}"/ || die
71 + sed -e "/ROCM_SMI_ROOT/s,lib,$(get_libdir)," \
72 + -i "${WORKDIR}"/Source/cmake/FindROCmSMI.cmake || die
73 + sed -r -e "/TENSILE_USE_LLVM/s/ON/OFF/" \
74 + -i "${WORKDIR}"/Source/CMakeLists.txt || die
75 +
76 + sed -e "/HipClangVersion/s/0,0,0/$(ver_rs 1-3 ,)/" \
77 + -e "/SourcePath/s,os\.path\.join.*$,\"${EPREFIX}/usr/share/${PN}\"," \
78 + -i ${PN}/Common.py || die
79 +
80 + sed -e "s|os\.path\.dirname.*$|\"${EPREFIX}/usr/share/Tensile\", end='')|" \
81 + -i ${PN}/__init__.py || die
82 +}
83 +
84 +src_test() {
85 + ROCM_PATH="${EPREFIX}/usr/" distutils-r1_src_test
86 +}
87 +
88 +src_install() {
89 + distutils-r1_src_install
90 +
91 + # Move the cmake files to the correct directory
92 + mkdir -p "${ED}/usr/$(get_libdir)/cmake/${PN}" || die
93 + mv "${ED}/usr/cmake/"* "${ED}/usr/$(get_libdir)/cmake/${PN}" || die
94 + rm -r "${ED}/usr/cmake" || die
95 +
96 + insinto /usr/share/${PN}
97 + doins -r "${WORKDIR}"/Source/*
98 + dosym . /usr/share/${PN}/Source
99 +}
100
101 diff --git a/dev-util/Tensile/files/Tensile-4.3.0-hsaco-compile-specified-arch.patch b/dev-util/Tensile/files/Tensile-4.3.0-hsaco-compile-specified-arch.patch
102 new file mode 100644
103 index 000000000..8e6753781
104 --- /dev/null
105 +++ b/dev-util/Tensile/files/Tensile-4.3.0-hsaco-compile-specified-arch.patch
106 @@ -0,0 +1,96 @@
107 +https://github.com/ROCmSoftwarePlatform/Tensile/issues/1395
108 +https://github.com/ROCmSoftwarePlatform/Tensile/pull/1398
109 +
110 +--- a/Tensile/TensileCreateLibrary.py
111 ++++ b/Tensile/TensileCreateLibrary.py
112 +@@ -136,6 +136,35 @@ def which(p):
113 + return candidate
114 + return None
115 +
116 ++def splitArchs():
117 ++ # Helper for architecture
118 ++ def isSupported(arch):
119 ++ return globalParameters["AsmCaps"][arch]["SupportedISA"] and \
120 ++ globalParameters["AsmCaps"][arch]["SupportedSource"]
121 ++
122 ++ if ";" in globalParameters["Architecture"]:
123 ++ wantedArchs = globalParameters["Architecture"].split(";")
124 ++ else:
125 ++ wantedArchs = globalParameters["Architecture"].split("_")
126 ++ archs = []
127 ++ cmdlineArchs = []
128 ++ if "all" in wantedArchs:
129 ++ for arch in globalParameters['SupportedISA']:
130 ++ if isSupported(arch):
131 ++ if (arch == (9,0,6) or arch == (9,0,8) or arch == (9,0,10)):
132 ++ if (arch == (9,0,10)):
133 ++ archs += [gfxName(arch) + '-xnack+']
134 ++ cmdlineArchs += [gfxName(arch) + ':xnack+']
135 ++ archs += [gfxName(arch) + '-xnack-']
136 ++ cmdlineArchs += [gfxName(arch) + ':xnack-']
137 ++ else:
138 ++ archs += [gfxName(arch)]
139 ++ cmdlineArchs += [gfxName(arch)]
140 ++ else:
141 ++ for arch in wantedArchs:
142 ++ archs += [re.sub(":", "-", arch)]
143 ++ cmdlineArchs += [arch]
144 ++ return archs, cmdlineArchs
145 +
146 + def buildSourceCodeObjectFile(CxxCompiler, outputPath, kernelFile):
147 + buildPath = ensurePath(os.path.join(globalParameters['WorkingPath'], 'code_object_tmp'))
148 +@@ -149,24 +178,8 @@ def buildSourceCodeObjectFile(CxxCompiler, outputPath, kernelFile):
149 + objectFilename = base + '.o'
150 + soFilename = base + '.so'
151 +
152 +- def isSupported(arch):
153 +- return globalParameters["AsmCaps"][arch]["SupportedISA"] and \
154 +- globalParameters["AsmCaps"][arch]["SupportedSource"]
155 +-
156 + if (CxxCompiler == "hipcc"):
157 +- archs = []
158 +- cmdlineArchs = []
159 +- for arch in globalParameters['SupportedISA']:
160 +- if isSupported(arch):
161 +- if (arch == (9,0,6) or arch == (9,0,8) or arch == (9,0,10)):
162 +- if (arch == (9,0,10)):
163 +- archs += [gfxName(arch) + '-xnack+']
164 +- cmdlineArchs += [gfxName(arch) + ':xnack+']
165 +- archs += [gfxName(arch) + '-xnack-']
166 +- cmdlineArchs += [gfxName(arch) + ':xnack-']
167 +- else:
168 +- archs += [gfxName(arch)]
169 +- cmdlineArchs += [gfxName(arch)]
170 ++ archs, cmdlineArchs = splitArchs()
171 +
172 + archFlags = ['--offload-arch=' + arch for arch in cmdlineArchs]
173 +
174 +@@ -1063,11 +1076,6 @@ def buildObjectFileNames(solutionWriter, kernelWriterSource, kernelWriterAssembl
175 + sourceKernels = list([k for k in kernels if k['KernelLanguage'] == 'Source'])
176 + asmKernels = list([k for k in kernels if k['KernelLanguage'] == 'Assembly'])
177 +
178 +- # Helper for architecture
179 +- def isSupported(arch):
180 +- return globalParameters["AsmCaps"][arch]["SupportedISA"] and \
181 +- globalParameters["AsmCaps"][arch]["SupportedSource"]
182 +-
183 + # Build a list of kernel object names.
184 + for kernel in sourceKernels:
185 + sourceKernelNames += [kernelWriterSource.getKernelFileBase(kernel)]
186 +@@ -1081,15 +1089,7 @@ def buildObjectFileNames(solutionWriter, kernelWriterSource, kernelWriterAssembl
187 +
188 + # Source based kernels are built for all supported architectures
189 + if (cxxCompiler == 'hipcc'):
190 +- sourceArchs = []
191 +- for arch in globalParameters['SupportedISA']:
192 +- if isSupported(arch):
193 +- if (arch == (9,0,6) or arch == (9,0,8) or arch == (9,0,10)):
194 +- if (arch == (9,0,10)):
195 +- sourceArchs += [gfxName(arch) + '-xnack+']
196 +- sourceArchs += [gfxName(arch) + '-xnack-']
197 +- else:
198 +- sourceArchs += [gfxName(arch)]
199 ++ sourceArchs, _ = splitArchs()
200 + else:
201 + raise RuntimeError("Unknown compiler %s" % cxxCompiler)
202 +
203
204 diff --git a/dev-util/Tensile/files/Tensile-4.3.0-output-commands.patch b/dev-util/Tensile/files/Tensile-4.3.0-output-commands.patch
205 new file mode 100644
206 index 000000000..be5a4db21
207 --- /dev/null
208 +++ b/dev-util/Tensile/files/Tensile-4.3.0-output-commands.patch
209 @@ -0,0 +1,23 @@
210 +diff --color -uprN orig/Tensile/cmake/TensileConfig.cmake Tensile-rocm-4.3.0/Tensile/cmake/TensileConfig.cmake
211 +--- orig/Tensile/cmake/TensileConfig.cmake 2021-08-18 17:48:00.115478470 +0800
212 ++++ Tensile-rocm-4.3.0/Tensile/cmake/TensileConfig.cmake 2021-08-18 17:48:49.963478801 +0800
213 +@@ -234,6 +234,7 @@ function(TensileCreateLibraryFiles
214 + COMMENT "Generating Tensile Libraries"
215 + OUTPUT ${Tensile_EMBED_LIBRARY_SOURCE};${Tensile_MANIFEST_CONTENTS}
216 + COMMAND ${CommandLine}
217 ++ USES_TERMINAL
218 + )
219 +
220 + set("${Tensile_VAR_PREFIX}_ALL_FILES" ${Tensile_MANIFEST_CONTENTS} PARENT_SCOPE)
221 +diff --color -uprN orig/Tensile/Common.py Tensile-rocm-4.3.0/Tensile/Common.py
222 +--- orig/Tensile/Common.py 2021-08-18 17:48:00.075478470 +0800
223 ++++ Tensile-rocm-4.3.0/Tensile/Common.py 2021-08-18 17:48:23.287478624 +0800
224 +@@ -179,7 +179,7 @@ globalParameters["PrintTensorD"] = 0
225 + globalParameters["PrintTensorRef"] = 0 # Print reference tensor. 0x1=after init; 0x2=after copy-back; 0x3=both
226 + globalParameters["PrintIndexAssignments"] = 0 # Print the tensor index assignment info
227 + globalParameters["PrintWinnersOnly"] = False # Only print the solutions which become the fastest
228 +-globalParameters["PrintCodeCommands"] = False # print the commands used to generate the code objects (asm,link,hip-clang, etc)
229 ++globalParameters["PrintCodeCommands"] = True # print the commands used to generate the code objects (asm,link,hip-clang, etc)
230 + globalParameters["DumpTensors"] = False # If True, dump tensors to binary files instead of printing them.
231 +
232 + # TODO - remove this when NewClient is mainstream