1 |
commit: f1b7b457296505b40a3ff204fed5d605086e39e6 |
2 |
Author: Andrew Ammerlaan <andrewammerlaan <AT> gentoo <DOT> org> |
3 |
AuthorDate: Sat Mar 5 14:41:24 2022 +0000 |
4 |
Commit: Andrew Ammerlaan <andrewammerlaan <AT> gentoo <DOT> org> |
5 |
CommitDate: Sat Mar 5 14:42:12 2022 +0000 |
6 |
URL: https://gitweb.gentoo.org/proj/sci.git/commit/?id=f1b7b457 |
7 |
|
8 |
dev-util/Tensile: add version 4.3.0 |
9 |
|
10 |
https://github.com/gentoo/gentoo/pull/23450 |
11 |
|
12 |
Package-Manager: Portage-3.0.30, Repoman-3.0.3 |
13 |
Signed-off-by: Andrew Ammerlaan <andrewammerlaan <AT> gentoo.org> |
14 |
|
15 |
dev-util/Tensile/Tensile-4.3.0.ebuild | 74 +++++++++++++++++ |
16 |
...ensile-4.3.0-hsaco-compile-specified-arch.patch | 96 ++++++++++++++++++++++ |
17 |
.../files/Tensile-4.3.0-output-commands.patch | 23 ++++++ |
18 |
3 files changed, 193 insertions(+) |
19 |
|
20 |
diff --git a/dev-util/Tensile/Tensile-4.3.0.ebuild b/dev-util/Tensile/Tensile-4.3.0.ebuild |
21 |
new file mode 100644 |
22 |
index 000000000..bfd86ecea |
23 |
--- /dev/null |
24 |
+++ b/dev-util/Tensile/Tensile-4.3.0.ebuild |
25 |
@@ -0,0 +1,74 @@ |
26 |
+# Copyright 1999-2022 Gentoo Authors |
27 |
+# Distributed under the terms of the GNU General Public License v2 |
28 |
+ |
29 |
+EAPI=8 |
30 |
+ |
31 |
+PYTHON_COMPAT=( python3_{8..10} ) |
32 |
+DISTUTILS_USE_PEP517=setuptools |
33 |
+inherit distutils-r1 |
34 |
+ |
35 |
+DESCRIPTION="Stretching GPU performance for GEMMs and tensor contractions" |
36 |
+HOMEPAGE="https://github.com/ROCmSoftwarePlatform/Tensile" |
37 |
+SRC_URI="https://github.com/ROCmSoftwarePlatform/Tensile/archive/rocm-${PV}.tar.gz -> rocm-Tensile-${PV}.tar.gz" |
38 |
+S="${WORKDIR}/${PN}-rocm-${PV}" |
39 |
+ |
40 |
+LICENSE="MIT" |
41 |
+KEYWORDS="~amd64" |
42 |
+SLOT="0" |
43 |
+ |
44 |
+# Not compatible with recent versions of pytest |
45 |
+RESTRICT="test" |
46 |
+ |
47 |
+RDEPEND="${PYTHON_DEPS} |
48 |
+ dev-python/pyyaml[${PYTHON_USEDEP}] |
49 |
+ dev-python/msgpack[${PYTHON_USEDEP}] |
50 |
+" |
51 |
+DEPEND="${RDEPEND} |
52 |
+ dev-util/hip:${SLOT} |
53 |
+" |
54 |
+BDEPEND="test? ( |
55 |
+ dev-util/rocminfo:${SLOT} |
56 |
+)" |
57 |
+ |
58 |
+PATCHES=( |
59 |
+ "${FILESDIR}/Tensile-${PV}-hsaco-compile-specified-arch.patch" # backported from upstream, should remove after 4.3.0 |
60 |
+ "${FILESDIR}/Tensile-4.3.0-output-commands.patch" |
61 |
+) |
62 |
+ |
63 |
+CMAKE_USE_DIR="${WORKDIR}/Source" |
64 |
+ |
65 |
+distutils_enable_tests pytest |
66 |
+ |
67 |
+src_prepare() { |
68 |
+ distutils-r1_src_prepare |
69 |
+ |
70 |
+ mv ${PN}/Source "${WORKDIR}"/ || die |
71 |
+ sed -e "/ROCM_SMI_ROOT/s,lib,$(get_libdir)," \ |
72 |
+ -i "${WORKDIR}"/Source/cmake/FindROCmSMI.cmake || die |
73 |
+ sed -r -e "/TENSILE_USE_LLVM/s/ON/OFF/" \ |
74 |
+ -i "${WORKDIR}"/Source/CMakeLists.txt || die |
75 |
+ |
76 |
+ sed -e "/HipClangVersion/s/0,0,0/$(ver_rs 1-3 ,)/" \ |
77 |
+ -e "/SourcePath/s,os\.path\.join.*$,\"${EPREFIX}/usr/share/${PN}\"," \ |
78 |
+ -i ${PN}/Common.py || die |
79 |
+ |
80 |
+ sed -e "s|os\.path\.dirname.*$|\"${EPREFIX}/usr/share/Tensile\", end='')|" \ |
81 |
+ -i ${PN}/__init__.py || die |
82 |
+} |
83 |
+ |
84 |
+src_test() { |
85 |
+ ROCM_PATH="${EPREFIX}/usr/" distutils-r1_src_test |
86 |
+} |
87 |
+ |
88 |
+src_install() { |
89 |
+ distutils-r1_src_install |
90 |
+ |
91 |
+ # Move the cmake files to the correct directory |
92 |
+ mkdir -p "${ED}/usr/$(get_libdir)/cmake/${PN}" || die |
93 |
+ mv "${ED}/usr/cmake/"* "${ED}/usr/$(get_libdir)/cmake/${PN}" || die |
94 |
+ rm -r "${ED}/usr/cmake" || die |
95 |
+ |
96 |
+ insinto /usr/share/${PN} |
97 |
+ doins -r "${WORKDIR}"/Source/* |
98 |
+ dosym . /usr/share/${PN}/Source |
99 |
+} |
100 |
|
101 |
diff --git a/dev-util/Tensile/files/Tensile-4.3.0-hsaco-compile-specified-arch.patch b/dev-util/Tensile/files/Tensile-4.3.0-hsaco-compile-specified-arch.patch |
102 |
new file mode 100644 |
103 |
index 000000000..8e6753781 |
104 |
--- /dev/null |
105 |
+++ b/dev-util/Tensile/files/Tensile-4.3.0-hsaco-compile-specified-arch.patch |
106 |
@@ -0,0 +1,96 @@ |
107 |
+https://github.com/ROCmSoftwarePlatform/Tensile/issues/1395 |
108 |
+https://github.com/ROCmSoftwarePlatform/Tensile/pull/1398 |
109 |
+ |
110 |
+--- a/Tensile/TensileCreateLibrary.py |
111 |
++++ b/Tensile/TensileCreateLibrary.py |
112 |
+@@ -136,6 +136,35 @@ def which(p): |
113 |
+ return candidate |
114 |
+ return None |
115 |
+ |
116 |
++def splitArchs(): |
117 |
++ # Helper for architecture |
118 |
++ def isSupported(arch): |
119 |
++ return globalParameters["AsmCaps"][arch]["SupportedISA"] and \ |
120 |
++ globalParameters["AsmCaps"][arch]["SupportedSource"] |
121 |
++ |
122 |
++ if ";" in globalParameters["Architecture"]: |
123 |
++ wantedArchs = globalParameters["Architecture"].split(";") |
124 |
++ else: |
125 |
++ wantedArchs = globalParameters["Architecture"].split("_") |
126 |
++ archs = [] |
127 |
++ cmdlineArchs = [] |
128 |
++ if "all" in wantedArchs: |
129 |
++ for arch in globalParameters['SupportedISA']: |
130 |
++ if isSupported(arch): |
131 |
++ if (arch == (9,0,6) or arch == (9,0,8) or arch == (9,0,10)): |
132 |
++ if (arch == (9,0,10)): |
133 |
++ archs += [gfxName(arch) + '-xnack+'] |
134 |
++ cmdlineArchs += [gfxName(arch) + ':xnack+'] |
135 |
++ archs += [gfxName(arch) + '-xnack-'] |
136 |
++ cmdlineArchs += [gfxName(arch) + ':xnack-'] |
137 |
++ else: |
138 |
++ archs += [gfxName(arch)] |
139 |
++ cmdlineArchs += [gfxName(arch)] |
140 |
++ else: |
141 |
++ for arch in wantedArchs: |
142 |
++ archs += [re.sub(":", "-", arch)] |
143 |
++ cmdlineArchs += [arch] |
144 |
++ return archs, cmdlineArchs |
145 |
+ |
146 |
+ def buildSourceCodeObjectFile(CxxCompiler, outputPath, kernelFile): |
147 |
+ buildPath = ensurePath(os.path.join(globalParameters['WorkingPath'], 'code_object_tmp')) |
148 |
+@@ -149,24 +178,8 @@ def buildSourceCodeObjectFile(CxxCompiler, outputPath, kernelFile): |
149 |
+ objectFilename = base + '.o' |
150 |
+ soFilename = base + '.so' |
151 |
+ |
152 |
+- def isSupported(arch): |
153 |
+- return globalParameters["AsmCaps"][arch]["SupportedISA"] and \ |
154 |
+- globalParameters["AsmCaps"][arch]["SupportedSource"] |
155 |
+- |
156 |
+ if (CxxCompiler == "hipcc"): |
157 |
+- archs = [] |
158 |
+- cmdlineArchs = [] |
159 |
+- for arch in globalParameters['SupportedISA']: |
160 |
+- if isSupported(arch): |
161 |
+- if (arch == (9,0,6) or arch == (9,0,8) or arch == (9,0,10)): |
162 |
+- if (arch == (9,0,10)): |
163 |
+- archs += [gfxName(arch) + '-xnack+'] |
164 |
+- cmdlineArchs += [gfxName(arch) + ':xnack+'] |
165 |
+- archs += [gfxName(arch) + '-xnack-'] |
166 |
+- cmdlineArchs += [gfxName(arch) + ':xnack-'] |
167 |
+- else: |
168 |
+- archs += [gfxName(arch)] |
169 |
+- cmdlineArchs += [gfxName(arch)] |
170 |
++ archs, cmdlineArchs = splitArchs() |
171 |
+ |
172 |
+ archFlags = ['--offload-arch=' + arch for arch in cmdlineArchs] |
173 |
+ |
174 |
+@@ -1063,11 +1076,6 @@ def buildObjectFileNames(solutionWriter, kernelWriterSource, kernelWriterAssembl |
175 |
+ sourceKernels = list([k for k in kernels if k['KernelLanguage'] == 'Source']) |
176 |
+ asmKernels = list([k for k in kernels if k['KernelLanguage'] == 'Assembly']) |
177 |
+ |
178 |
+- # Helper for architecture |
179 |
+- def isSupported(arch): |
180 |
+- return globalParameters["AsmCaps"][arch]["SupportedISA"] and \ |
181 |
+- globalParameters["AsmCaps"][arch]["SupportedSource"] |
182 |
+- |
183 |
+ # Build a list of kernel object names. |
184 |
+ for kernel in sourceKernels: |
185 |
+ sourceKernelNames += [kernelWriterSource.getKernelFileBase(kernel)] |
186 |
+@@ -1081,15 +1089,7 @@ def buildObjectFileNames(solutionWriter, kernelWriterSource, kernelWriterAssembl |
187 |
+ |
188 |
+ # Source based kernels are built for all supported architectures |
189 |
+ if (cxxCompiler == 'hipcc'): |
190 |
+- sourceArchs = [] |
191 |
+- for arch in globalParameters['SupportedISA']: |
192 |
+- if isSupported(arch): |
193 |
+- if (arch == (9,0,6) or arch == (9,0,8) or arch == (9,0,10)): |
194 |
+- if (arch == (9,0,10)): |
195 |
+- sourceArchs += [gfxName(arch) + '-xnack+'] |
196 |
+- sourceArchs += [gfxName(arch) + '-xnack-'] |
197 |
+- else: |
198 |
+- sourceArchs += [gfxName(arch)] |
199 |
++ sourceArchs, _ = splitArchs() |
200 |
+ else: |
201 |
+ raise RuntimeError("Unknown compiler %s" % cxxCompiler) |
202 |
+ |
203 |
|
204 |
diff --git a/dev-util/Tensile/files/Tensile-4.3.0-output-commands.patch b/dev-util/Tensile/files/Tensile-4.3.0-output-commands.patch |
205 |
new file mode 100644 |
206 |
index 000000000..be5a4db21 |
207 |
--- /dev/null |
208 |
+++ b/dev-util/Tensile/files/Tensile-4.3.0-output-commands.patch |
209 |
@@ -0,0 +1,23 @@ |
210 |
+diff --color -uprN orig/Tensile/cmake/TensileConfig.cmake Tensile-rocm-4.3.0/Tensile/cmake/TensileConfig.cmake |
211 |
+--- orig/Tensile/cmake/TensileConfig.cmake 2021-08-18 17:48:00.115478470 +0800 |
212 |
++++ Tensile-rocm-4.3.0/Tensile/cmake/TensileConfig.cmake 2021-08-18 17:48:49.963478801 +0800 |
213 |
+@@ -234,6 +234,7 @@ function(TensileCreateLibraryFiles |
214 |
+ COMMENT "Generating Tensile Libraries" |
215 |
+ OUTPUT ${Tensile_EMBED_LIBRARY_SOURCE};${Tensile_MANIFEST_CONTENTS} |
216 |
+ COMMAND ${CommandLine} |
217 |
++ USES_TERMINAL |
218 |
+ ) |
219 |
+ |
220 |
+ set("${Tensile_VAR_PREFIX}_ALL_FILES" ${Tensile_MANIFEST_CONTENTS} PARENT_SCOPE) |
221 |
+diff --color -uprN orig/Tensile/Common.py Tensile-rocm-4.3.0/Tensile/Common.py |
222 |
+--- orig/Tensile/Common.py 2021-08-18 17:48:00.075478470 +0800 |
223 |
++++ Tensile-rocm-4.3.0/Tensile/Common.py 2021-08-18 17:48:23.287478624 +0800 |
224 |
+@@ -179,7 +179,7 @@ globalParameters["PrintTensorD"] = 0 |
225 |
+ globalParameters["PrintTensorRef"] = 0 # Print reference tensor. 0x1=after init; 0x2=after copy-back; 0x3=both |
226 |
+ globalParameters["PrintIndexAssignments"] = 0 # Print the tensor index assignment info |
227 |
+ globalParameters["PrintWinnersOnly"] = False # Only print the solutions which become the fastest |
228 |
+-globalParameters["PrintCodeCommands"] = False # print the commands used to generate the code objects (asm,link,hip-clang, etc) |
229 |
++globalParameters["PrintCodeCommands"] = True # print the commands used to generate the code objects (asm,link,hip-clang, etc) |
230 |
+ globalParameters["DumpTensors"] = False # If True, dump tensors to binary files instead of printing them. |
231 |
+ |
232 |
+ # TODO - remove this when NewClient is mainstream |