Gentoo Archives: gentoo-commits

From: Benda XU <heroxbd@g.o>
To: gentoo-commits@l.g.o
Subject: [gentoo-commits] repo/gentoo:master commit in: sci-libs/rocSOLVER/, sci-libs/miopen/, sci-libs/miopen/files/
Date: Thu, 22 Sep 2022 01:08:59
Message-Id: 1663808893.7fab2f99183ab9524f07182af5efecea9e817d8e.heroxbd@gentoo
1 commit: 7fab2f99183ab9524f07182af5efecea9e817d8e
2 Author: Yiyang Wu <xgreenlandforwyy <AT> gmail <DOT> com>
3 AuthorDate: Tue Aug 23 10:26:01 2022 +0000
4 Commit: Benda XU <heroxbd <AT> gentoo <DOT> org>
5 CommitDate: Thu Sep 22 01:08:13 2022 +0000
6 URL: https://gitweb.gentoo.org/repo/gentoo.git/commit/?id=7fab2f99
7
8 sci-libs/miopen: bump to 5.1.3, switch to vanilla clang-14
9
10 Comparing to 5.0.2, 5.1.3 also has:
11 1. corrected dependencies
12 2. compilation errors fixed for gcc-12 (although ROCm-5.1.3 on clang-14
13 is incompatible with gcc-12, the patch would be useful when using
14 clang-15)
15 3. deprecation of clang-ocl
16 4. cmake_src_prepare moved to the front, because src_prepare needs
17 BUILD_DIR
18 5. fix invalid metadata issue found by using vanilla clang
19 6. enable test on specific arch
20
21 Closes: https://github.com/gentoo/gentoo/pull/27219
22 Bugs: https://github.com/ROCmSoftwarePlatform/MIOpen/issues/1731
23 Signed-off-by: Yiyang Wu <xgreenlandforwyy <AT> gmail.com>
24 Signed-off-by: Benda Xu <heroxbd <AT> gentoo.org>
25
26 sci-libs/miopen/Manifest | 1 +
27 ....3-avoid-metadata-error-for-vanilla-clang.patch | 183 ++++++++++
28 .../files/miopen-5.1.3-deprecate-clang-ocl.patch | 55 +++
29 sci-libs/miopen/files/miopen-5.1.3-gfx1031.patch | 397 +++++++++++++++++++++
30 .../miopen/files/miopen-5.1.3-include-array.patch | 12 +
31 sci-libs/miopen/files/miopen-5.1.3-no-strip.patch | 17 +
32 sci-libs/miopen/miopen-5.1.3.ebuild | 109 ++++++
33 sci-libs/rocSOLVER/rocSOLVER-5.1.3.ebuild | 1 -
34 8 files changed, 774 insertions(+), 1 deletion(-)
35
36 diff --git a/sci-libs/miopen/Manifest b/sci-libs/miopen/Manifest
37 index 2a8dbb55fead..c987c1548233 100644
38 --- a/sci-libs/miopen/Manifest
39 +++ b/sci-libs/miopen/Manifest
40 @@ -1,2 +1,3 @@
41 DIST MIOpen-4.3.0.tar.gz 59405686 BLAKE2B fe91bd91a74023866883d6d0a2a8071a8fe40c4cff2fb4ef58fc6e343a05ac2a731f73e657f4d183ade4e5b7c1fbbe41f3f032918f6e50cb713073aee8d97dc5 SHA512 a8615b7738acfbc7f68d9417b0746c62630d2b48fb8485fafba4db65a4b277a8230f601d249d7e54f89ba25c14176429ca76ad8365a437b09d836b031b0c4fbb
42 DIST MIOpen-5.0.2.tar.gz 76294827 BLAKE2B 7b2a1f0e675793aee4a0fa2a270caac8332cda36c8f04cee483cc2882ed987b6e676e9c24a1acf4976a16a10f922b1a6263470b419aa88a29cfcb6d6d4b4cc29 SHA512 a581b45220797904db3e4dd3840f2ef96085f00baf8187c5ab574325a66da4f599dee6496457bb1cc32825b57a13fb0ef35a2ef1bd2a5f449c7e7b9fa64b27d1
43 +DIST MIOpen-5.1.3.tar.gz 88118329 BLAKE2B d24722ffc5f5dab6d6a1de2ce34193ad2f25c9a2562e38c52e010a29870f01d9ea1c56970ba0601a088c8286e97958ee95d0da27fc8082126dd2ebe5ccb36b70 SHA512 a14e28cfcb12e5061e0e7b999ef3e67fa0a0e897e31bc50e7288b8a23eb1791312e33d3b697021c2b654ccc065ae1b046c1cfd77ba8e04b0f3e87e9cc0626dcd
44
45 diff --git a/sci-libs/miopen/files/miopen-5.1.3-avoid-metadata-error-for-vanilla-clang.patch b/sci-libs/miopen/files/miopen-5.1.3-avoid-metadata-error-for-vanilla-clang.patch
46 new file mode 100644
47 index 000000000000..3dca20f1fb12
48 --- /dev/null
49 +++ b/sci-libs/miopen/files/miopen-5.1.3-avoid-metadata-error-for-vanilla-clang.patch
50 @@ -0,0 +1,183 @@
51 +Together with find-sed command in 5.1.3 ebuild, this fixes
52 +https://github.com/ROCmSoftwarePlatform/MIOpen/issues/1731
53 +index 71b2cabf1..60e7fab6e 100644
54 +--- a/src/kernels/Conv_Winograd_v13_3_12_epilogue.inc
55 ++++ b/src/kernels/Conv_Winograd_v13_3_12_epilogue.inc
56 +@@ -76,7 +76,7 @@ amdhsa.kernels:
57 + .max_flat_workgroup_size: \wg_x
58 + .wavefront_size: 64
59 + .args:
60 +- - { .size: 4, .offset: 0, .value_kind: by_value, .value_type: i32, .name: N }
61 ++ - { .size: 4, .offset: 0, .value_kind: by_value, .value_type: i32, .name: X }
62 + - { .size: 4, .offset: 4, .value_kind: by_value, .value_type: i32, .name: C }
63 + - { .size: 4, .offset: 8, .value_kind: by_value, .value_type: i32, .name: H }
64 + - { .size: 4, .offset: 12, .value_kind: by_value, .value_type: i32, .name: W }
65 +diff --git a/src/kernels/Conv_Winograd_v16_5_0_epilogue.inc b/src/kernels/Conv_Winograd_v16_5_0_epilogue.inc
66 +index 36d47c862..f8f677ec6 100644
67 +--- a/src/kernels/Conv_Winograd_v16_5_0_epilogue.inc
68 ++++ b/src/kernels/Conv_Winograd_v16_5_0_epilogue.inc
69 +@@ -76,7 +76,7 @@ amdhsa.kernels:
70 + .max_flat_workgroup_size: \wg_x
71 + .wavefront_size: 64
72 + .args:
73 +- - { .size: 4, .offset: 0, .value_kind: by_value, .value_type: i32, .name: N }
74 ++ - { .size: 4, .offset: 0, .value_kind: by_value, .value_type: i32, .name: X }
75 + - { .size: 4, .offset: 4, .value_kind: by_value, .value_type: i32, .name: C }
76 + - { .size: 4, .offset: 8, .value_kind: by_value, .value_type: i32, .name: H }
77 + - { .size: 4, .offset: 12, .value_kind: by_value, .value_type: i32, .name: W }
78 +diff --git a/src/kernels/Conv_Winograd_v21_1_3_metadata.inc b/src/kernels/Conv_Winograd_v21_1_3_metadata.inc
79 +index deff81e84..ed47abea7 100644
80 +--- a/src/kernels/Conv_Winograd_v21_1_3_metadata.inc
81 ++++ b/src/kernels/Conv_Winograd_v21_1_3_metadata.inc
82 +@@ -51,7 +51,7 @@ amdhsa.kernels:
83 + .max_flat_workgroup_size: \wg_x
84 + .wavefront_size: 64
85 + .args:
86 +- - { .size: 4, .offset: 0, .value_kind: by_value, .value_type: i32, .name: N }
87 ++ - { .size: 4, .offset: 0, .value_kind: by_value, .value_type: i32, .name: X }
88 + - { .size: 4, .offset: 4, .value_kind: by_value, .value_type: i32, .name: C }
89 + - { .size: 4, .offset: 8, .value_kind: by_value, .value_type: i32, .name: H }
90 + - { .size: 4, .offset: 12, .value_kind: by_value, .value_type: i32, .name: W }
91 +diff --git a/src/kernels/conv1x1u.s b/src/kernels/conv1x1u.s
92 +index 5dc213546..c890d45a4 100644
93 +--- a/src/kernels/conv1x1u.s
94 ++++ b/src/kernels/conv1x1u.s
95 +@@ -1076,7 +1076,7 @@ amdhsa.kernels:
96 + .max_flat_workgroup_size: \wg_x
97 + .wavefront_size: 64
98 + .args:
99 +- - { .size: 4, .offset: 0, .value_kind: by_value, .value_type: i32, .name: N }
100 ++ - { .size: 4, .offset: 0, .value_kind: by_value, .value_type: i32, .name: X }
101 + - { .size: 4, .offset: 4, .value_kind: by_value, .value_type: i32, .name: C }
102 + - { .size: 4, .offset: 8, .value_kind: by_value, .value_type: i32, .name: H }
103 + - { .size: 4, .offset: 12, .value_kind: by_value, .value_type: i32, .name: W }
104 +diff --git a/src/kernels/conv1x1u_bias_activ.s b/src/kernels/conv1x1u_bias_activ.s
105 +index 1675e819a..6bbdd9936 100644
106 +--- a/src/kernels/conv1x1u_bias_activ.s
107 ++++ b/src/kernels/conv1x1u_bias_activ.s
108 +@@ -1230,7 +1230,7 @@ amdhsa.kernels:
109 + .max_flat_workgroup_size: \wg_x
110 + .wavefront_size: 64
111 + .args:
112 +- - { .size: 4, .offset: 0, .value_kind: by_value, .value_type: i32, .name: N }
113 ++ - { .size: 4, .offset: 0, .value_kind: by_value, .value_type: i32, .name: X }
114 + - { .size: 4, .offset: 4, .value_kind: by_value, .value_type: i32, .name: C }
115 + - { .size: 4, .offset: 8, .value_kind: by_value, .value_type: i32, .name: H }
116 + - { .size: 4, .offset: 12, .value_kind: by_value, .value_type: i32, .name: W }
117 +diff --git a/src/kernels/conv1x1u_stride2.s b/src/kernels/conv1x1u_stride2.s
118 +index c5ea1e90c..6241edcf7 100644
119 +--- a/src/kernels/conv1x1u_stride2.s
120 ++++ b/src/kernels/conv1x1u_stride2.s
121 +@@ -1162,7 +1162,7 @@ amdhsa.kernels:
122 + .max_flat_workgroup_size: \wg_x
123 + .wavefront_size: 64
124 + .args:
125 +- - { .size: 4, .offset: 0, .value_kind: by_value, .value_type: i32, .name: N }
126 ++ - { .size: 4, .offset: 0, .value_kind: by_value, .value_type: i32, .name: X }
127 + - { .size: 4, .offset: 4, .value_kind: by_value, .value_type: i32, .name: C }
128 + - { .size: 4, .offset: 8, .value_kind: by_value, .value_type: i32, .name: H }
129 + - { .size: 4, .offset: 12, .value_kind: by_value, .value_type: i32, .name: W }
130 +diff --git a/src/kernels/conv1x1wrw.s b/src/kernels/conv1x1wrw.s
131 +index b13b6ffa4..eb63f17c6 100644
132 +--- a/src/kernels/conv1x1wrw.s
133 ++++ b/src/kernels/conv1x1wrw.s
134 +@@ -1243,7 +1243,7 @@ amdhsa.kernels:
135 + .max_flat_workgroup_size: \wg_x
136 + .wavefront_size: 64
137 + .args:
138 +- - { .size: 4, .offset: 0, .value_kind: by_value, .value_type: i32, .name: N }
139 ++ - { .size: 4, .offset: 0, .value_kind: by_value, .value_type: i32, .name: X }
140 + - { .size: 4, .offset: 4, .value_kind: by_value, .value_type: i32, .name: C }
141 + - { .size: 4, .offset: 8, .value_kind: by_value, .value_type: i32, .name: H }
142 + - { .size: 4, .offset: 12, .value_kind: by_value, .value_type: i32, .name: W }
143 +diff --git a/src/kernels/conv3x3wrw.s b/src/kernels/conv3x3wrw.s
144 +index a3f73aeae..b6fb1632c 100755
145 +--- a/src/kernels/conv3x3wrw.s
146 ++++ b/src/kernels/conv3x3wrw.s
147 +@@ -1033,7 +1033,7 @@ amdhsa.kernels:
148 + .max_flat_workgroup_size: \wg_x
149 + .wavefront_size: 64
150 + .args:
151 +- - { .size: 4, .offset: 0, .value_kind: by_value, .value_type: i32, .name: N }
152 ++ - { .size: 4, .offset: 0, .value_kind: by_value, .value_type: i32, .name: X }
153 + - { .size: 4, .offset: 4, .value_kind: by_value, .value_type: i32, .name: C }
154 + - { .size: 4, .offset: 8, .value_kind: by_value, .value_type: i32, .name: H }
155 + - { .size: 4, .offset: 12, .value_kind: by_value, .value_type: i32, .name: W }
156 +diff --git a/src/kernels/conv_3x3_wheel_alpha_v3_0b_epilogue.inc b/src/kernels/conv_3x3_wheel_alpha_v3_0b_epilogue.inc
157 +index 358772e63..b27ad5284 100644
158 +--- a/src/kernels/conv_3x3_wheel_alpha_v3_0b_epilogue.inc
159 ++++ b/src/kernels/conv_3x3_wheel_alpha_v3_0b_epilogue.inc
160 +@@ -76,7 +76,7 @@ amdhsa.kernels:
161 + .max_flat_workgroup_size: \wg_x
162 + .wavefront_size: 64
163 + .args:
164 +- - { .size: 4, .offset: 0, .value_kind: by_value, .value_type: i32, .name: N }
165 ++ - { .size: 4, .offset: 0, .value_kind: by_value, .value_type: i32, .name: X }
166 + - { .size: 4, .offset: 4, .value_kind: by_value, .value_type: i32, .name: C }
167 + - { .size: 4, .offset: 8, .value_kind: by_value, .value_type: i32, .name: H }
168 + - { .size: 4, .offset: 12, .value_kind: by_value, .value_type: i32, .name: W }
169 +diff --git a/src/kernels/conv_3x3_wheel_alpha_v7_0_3b_epilogue.inc b/src/kernels/conv_3x3_wheel_alpha_v7_0_3b_epilogue.inc
170 +index d3296969a..f873ce153 100644
171 +--- a/src/kernels/conv_3x3_wheel_alpha_v7_0_3b_epilogue.inc
172 ++++ b/src/kernels/conv_3x3_wheel_alpha_v7_0_3b_epilogue.inc
173 +@@ -76,7 +76,7 @@ amdhsa.kernels:
174 + .max_flat_workgroup_size: \wg_x
175 + .wavefront_size: 64
176 + .args:
177 +- - { .size: 4, .offset: 0, .value_kind: by_value, .value_type: i32, .name: N }
178 ++ - { .size: 4, .offset: 0, .value_kind: by_value, .value_type: i32, .name: X }
179 + - { .size: 4, .offset: 4, .value_kind: by_value, .value_type: i32, .name: C }
180 + - { .size: 4, .offset: 8, .value_kind: by_value, .value_type: i32, .name: H }
181 + - { .size: 4, .offset: 12, .value_kind: by_value, .value_type: i32, .name: W }
182 +diff --git a/src/kernels/conv_3x3_wheel_alpha_v9_0_15_epilogue.inc b/src/kernels/conv_3x3_wheel_alpha_v9_0_15_epilogue.inc
183 +index a253cc0f9..1582d002c 100644
184 +--- a/src/kernels/conv_3x3_wheel_alpha_v9_0_15_epilogue.inc
185 ++++ b/src/kernels/conv_3x3_wheel_alpha_v9_0_15_epilogue.inc
186 +@@ -76,7 +76,7 @@ amdhsa.kernels:
187 + .max_flat_workgroup_size: \wg_x
188 + .wavefront_size: 64
189 + .args:
190 +- - { .size: 4, .offset: 0, .value_kind: by_value, .value_type: i32, .name: N }
191 ++ - { .size: 4, .offset: 0, .value_kind: by_value, .value_type: i32, .name: X }
192 + - { .size: 4, .offset: 4, .value_kind: by_value, .value_type: i32, .name: C }
193 + - { .size: 4, .offset: 8, .value_kind: by_value, .value_type: i32, .name: H }
194 + - { .size: 4, .offset: 12, .value_kind: by_value, .value_type: i32, .name: W }
195 +diff --git a/src/kernels/conv_3x3_wheel_alpha_v9_2_7_epilogue.inc b/src/kernels/conv_3x3_wheel_alpha_v9_2_7_epilogue.inc
196 +index e40ac1f60..78495e024 100644
197 +--- a/src/kernels/conv_3x3_wheel_alpha_v9_2_7_epilogue.inc
198 ++++ b/src/kernels/conv_3x3_wheel_alpha_v9_2_7_epilogue.inc
199 +@@ -76,7 +76,7 @@ amdhsa.kernels:
200 + .max_flat_workgroup_size: \wg_x
201 + .wavefront_size: 64
202 + .args:
203 +- - { .size: 4, .offset: 0, .value_kind: by_value, .value_type: i32, .name: N }
204 ++ - { .size: 4, .offset: 0, .value_kind: by_value, .value_type: i32, .name: X }
205 + - { .size: 4, .offset: 4, .value_kind: by_value, .value_type: i32, .name: C }
206 + - { .size: 4, .offset: 8, .value_kind: by_value, .value_type: i32, .name: H }
207 + - { .size: 4, .offset: 12, .value_kind: by_value, .value_type: i32, .name: W }
208 +diff --git a/src/kernels/xform_bidirect_winograd_code.inc b/src/kernels/xform_bidirect_winograd_code.inc
209 +index 724845f49..d03314ef5 100644
210 +--- a/src/kernels/xform_bidirect_winograd_code.inc
211 ++++ b/src/kernels/xform_bidirect_winograd_code.inc
212 +@@ -1566,7 +1566,7 @@ amdhsa.kernels:
213 + .max_flat_workgroup_size: \wg_x
214 + .wavefront_size: 64
215 + .args:
216 +- - { .size: 4, .offset: 0, .value_kind: by_value, .value_type: i32, .name: N }
217 ++ - { .size: 4, .offset: 0, .value_kind: by_value, .value_type: i32, .name: X }
218 + - { .size: 4, .offset: 4, .value_kind: by_value, .value_type: i32, .name: C }
219 + - { .size: 4, .offset: 8, .value_kind: by_value, .value_type: i32, .name: H }
220 + - { .size: 4, .offset: 12, .value_kind: by_value, .value_type: i32, .name: W }
221 +diff --git a/src/kernels/xform_metadata.inc b/src/kernels/xform_metadata.inc
222 +index 960a9a2d8..83b736bee 100644
223 +--- a/src/kernels/xform_metadata.inc
224 ++++ b/src/kernels/xform_metadata.inc
225 +@@ -74,7 +74,7 @@ amdhsa.kernels:
226 + .max_flat_workgroup_size: \wg_x
227 + .wavefront_size: 64
228 + .args:
229 +- - { .size: 4, .offset: 0, .value_kind: by_value, .value_type: i32, .name: N }
230 ++ - { .size: 4, .offset: 0, .value_kind: by_value, .value_type: i32, .name: X }
231 + - { .size: 4, .offset: 4, .value_kind: by_value, .value_type: i32, .name: C }
232 + - { .size: 4, .offset: 8, .value_kind: by_value, .value_type: i32, .name: H }
233 + - { .size: 4, .offset: 12, .value_kind: by_value, .value_type: i32, .name: W }
234
235 diff --git a/sci-libs/miopen/files/miopen-5.1.3-deprecate-clang-ocl.patch b/sci-libs/miopen/files/miopen-5.1.3-deprecate-clang-ocl.patch
236 new file mode 100644
237 index 000000000000..93c66fd3ef05
238 --- /dev/null
239 +++ b/sci-libs/miopen/files/miopen-5.1.3-deprecate-clang-ocl.patch
240 @@ -0,0 +1,55 @@
241 +This is a cherry picked PR on 5.1.3, which replace clang-ocl with clang
242 +From 98f001dfe61208af04ecf7690023efd772ee7d43 Mon Sep 17 00:00:00 2001
243 +From: Jehandad Khan <jahandad@×××××.com>
244 +Date: Tue, 19 Jul 2022 17:24:05 -0500
245 +Subject: [PATCH] Remove clang-ocl and replace with clang
246 +
247 +---
248 + CMakeLists.txt | 7 +------
249 + README.md | 1 -
250 + src/hipoc/hipoc_program.cpp | 7 ++++++-
251 + 3 files changed, 7 insertions(+), 8 deletions(-)
252 +
253 +Index: MIOpen-rocm-5.1.3/CMakeLists.txt
254 +===================================================================
255 +--- MIOpen-rocm-5.1.3.orig/CMakeLists.txt
256 ++++ MIOpen-rocm-5.1.3/CMakeLists.txt
257 +@@ -241,7 +241,7 @@ if( MIOPEN_BACKEND STREQUAL "HIP" OR MIO
258 + # miopentensile default off
259 + set(MIOPEN_USE_MIOPENTENSILE OFF CACHE BOOL "")
260 +
261 +- find_program(HIP_OC_COMPILER clang-ocl
262 ++ find_program(HIP_OC_COMPILER clang
263 + PATH_SUFFIXES bin
264 + PATHS
265 + /opt/rocm
266 +Index: MIOpen-rocm-5.1.3/README.md
267 +===================================================================
268 +--- MIOpen-rocm-5.1.3.orig/README.md
269 ++++ MIOpen-rocm-5.1.3/README.md
270 +@@ -14,7 +14,6 @@ MIOpen supports two programming models -
271 + * OpenCL - OpenCL libraries and header files
272 + * HIP -
273 + * HIP and HCC libraries and header files
274 +- * [clang-ocl](https://github.com/RadeonOpenCompute/clang-ocl) -- **required**
275 + * [MIOpenGEMM](https://github.com/ROCmSoftwarePlatform/MIOpenGEMM) to enable various functionalities including transposed and dilated convolutions. This is optional on the HIP backend. Users can enable this library using the cmake configuration flag `-DMIOPEN_USE_MIOPENGEMM=On`.
276 + * ROCm cmake modules can be installed from [here](https://github.com/RadeonOpenCompute/rocm-cmake)
277 + * [Half](http://half.sourceforge.net/) - IEEE 754-based half-precision floating point library
278 +Index: MIOpen-rocm-5.1.3/src/hipoc/hipoc_program.cpp
279 +===================================================================
280 +--- MIOpen-rocm-5.1.3.orig/src/hipoc/hipoc_program.cpp
281 ++++ MIOpen-rocm-5.1.3/src/hipoc/hipoc_program.cpp
282 +@@ -255,7 +255,12 @@ void HIPOCProgramImpl::BuildCodeObjectIn
283 + if(miopen::IsEnabled(MIOPEN_DEBUG_OPENCL_WAVE64_NOWGP{}))
284 + params += " -mwavefrontsize64 -mcumode";
285 + WriteFile(src, dir->path / filename);
286 +- dir->Execute(HIP_OC_COMPILER, params + " " + filename + " -o " + hsaco_file.string());
287 ++ params += " -target amdgcn-amd-amdhsa -x cl -D__AMD__=1 -O3";
288 ++ params += " -cl-kernel-arg-info -cl-denorms-are-zero";
289 ++ params += " -cl-std=CL1.2 -mllvm -amdgpu-early-inline-all";
290 ++ params += " -mllvm -amdgpu-internalize-symbols ";
291 ++ params += " " + filename + " -o " + hsaco_file.string();
292 ++ dir->Execute(HIP_OC_COMPILER, params);
293 + }
294 + if(!boost::filesystem::exists(hsaco_file))
295 + MIOPEN_THROW("Cant find file: " + hsaco_file.string());
296
297 diff --git a/sci-libs/miopen/files/miopen-5.1.3-gfx1031.patch b/sci-libs/miopen/files/miopen-5.1.3-gfx1031.patch
298 new file mode 100644
299 index 000000000000..42041f2414ed
300 --- /dev/null
301 +++ b/sci-libs/miopen/files/miopen-5.1.3-gfx1031.patch
302 @@ -0,0 +1,397 @@
303 +Enable gfx1031 support
304 +======================
305 +--- MIOpen-rocm-5.1.3/fin/src/include/conv_fin.hpp 2022-05-08 14:08:05.000000000 +0800
306 ++++ gfx1031/fin/src/include/conv_fin.hpp 2022-09-01 18:36:18.630980283 +0800
307 +@@ -111,6 +111,10 @@ class ConvFin : public Fin
308 + {
309 + assert(num_cu == 72 || num_cu == 36);
310 + }
311 ++ else if(arch == "gfx1031")
312 ++ {
313 ++ assert(num_cu == 40 || num_cu == 20);
314 ++ }
315 + else if(arch == "gfx90a")
316 + {
317 + assert(num_cu == 110);
318 +Only in MIOpen-rocm-5.1.3: patches
319 +diff --color -upr MIOpen-rocm-5.1.3/src/composable_kernel/composable_kernel/include/utility/config.hpp gfx1031/src/composable_kernel/composable_kernel/include/utility/config.hpp
320 +--- MIOpen-rocm-5.1.3/src/composable_kernel/composable_kernel/include/utility/config.hpp 2022-05-08 14:08:05.000000000 +0800
321 ++++ gfx1031/src/composable_kernel/composable_kernel/include/utility/config.hpp 2022-09-01 18:36:18.634980274 +0800
322 +@@ -13,7 +13,7 @@
323 + // GPU target
324 + // should enable one and only one GPU target
325 + #if !(defined(CK_AMD_GPU_GFX803) || defined(CK_AMD_GPU_GFX900) || defined(CK_AMD_GPU_GFX906) || \
326 +- defined(CK_AMD_GPU_GFX908) || defined(CK_AMD_GPU_GFX90A) || defined(CK_AMD_GPU_GFX1030))
327 ++ defined(CK_AMD_GPU_GFX908) || defined(CK_AMD_GPU_GFX90A) || defined(CK_AMD_GPU_GFX1030) || defined(CK_AMD_GPU_GFX1031))
328 + #error Need to define (only) one GPU target
329 + #endif
330 +
331 +@@ -29,7 +29,7 @@
332 + #if defined(CK_AMD_GPU_GFX803) || defined(CK_AMD_GPU_GFX900) || defined(CK_AMD_GPU_GFX906) || \
333 + defined(CK_AMD_GPU_GFX908) || defined(CK_AMD_GPU_GFX90A)
334 + #define CK_BUFFER_RESOURCE_3RD_DWORD 0x00020000
335 +-#elif defined(CK_AMD_GPU_GFX1030)
336 ++#elif (defined(CK_AMD_GPU_GFX1030) || defined(CK_AMD_GPU_GFX1031))
337 + #define CK_BUFFER_RESOURCE_3RD_DWORD 0x31014000
338 + #endif
339 +
340 +@@ -37,7 +37,7 @@
341 + #if defined(CK_AMD_GPU_GFX803) || defined(CK_AMD_GPU_GFX900)
342 + #define CK_USE_AMD_V_MAC_F32
343 + #elif defined(CK_AMD_GPU_GFX906) || defined(CK_AMD_GPU_GFX908) || defined(CK_AMD_GPU_GFX90a) || \
344 +- defined(CK_AMD_GPU_GFX1030)
345 ++ (defined(CK_AMD_GPU_GFX1030) || defined(CK_AMD_GPU_GFX1031))
346 + #define CK_USE_AMD_V_FMAC_F32
347 + #define CK_USE_AMD_V_DOT2_F32_F16
348 + #define CK_USE_AMD_V_DOT4_I32_I8
349 +diff --color -upr MIOpen-rocm-5.1.3/src/include/miopen/solver/ck_utility_common.hpp gfx1031/src/include/miopen/solver/ck_utility_common.hpp
350 +--- MIOpen-rocm-5.1.3/src/include/miopen/solver/ck_utility_common.hpp 2022-05-08 14:08:05.000000000 +0800
351 ++++ gfx1031/src/include/miopen/solver/ck_utility_common.hpp 2022-09-01 18:36:18.638980266 +0800
352 +@@ -54,6 +54,7 @@ static inline bool is_ck_supported_hardw
353 + StartsWith(handle.GetDeviceName(), "gfx908") ||
354 + StartsWith(handle.GetDeviceName(), "gfx90a") ||
355 + StartsWith(handle.GetDeviceName(), "gfx1030");
356 ++ StartsWith(handle.GetDeviceName(), "gfx1031");
357 + }
358 +
359 + static inline bool is_support_amd_buffer_atomic_fadd(const std::string& device_name)
360 +@@ -83,6 +84,8 @@ static inline auto get_ck_common_compile
361 + compiler_flag << " -DCK_AMD_GPU_GFX90A";
362 + else if(StartsWith(device_name, "gfx1030"))
363 + compiler_flag << " -DCK_AMD_GPU_GFX1030";
364 ++ else if(StartsWith(device_name, "gfx1031"))
365 ++ compiler_flag << " -DCK_AMD_GPU_GFX1031";
366 +
367 + // buffer atomic-fadd
368 + compiler_flag << " -DCK_USE_AMD_BUFFER_ATOMIC_FADD="
369 +diff --color -upr MIOpen-rocm-5.1.3/src/include/miopen/solver/implicitgemm_util.hpp gfx1031/src/include/miopen/solver/implicitgemm_util.hpp
370 +--- MIOpen-rocm-5.1.3/src/include/miopen/solver/implicitgemm_util.hpp 2022-05-08 14:08:05.000000000 +0800
371 ++++ gfx1031/src/include/miopen/solver/implicitgemm_util.hpp 2022-09-01 18:36:18.638980266 +0800
372 +@@ -469,7 +469,7 @@ static inline bool is_use_amd_buffer_loa
373 + {
374 + #if WORKAROUND_MIOPEN_ISSUE_557
375 + const auto device_name = ctx.GetStream().GetDeviceName();
376 +- return !StartsWith(device_name, "gfx1030");
377 ++ return !StartsWith(device_name, "gfx1030") && !StartsWith(device_name, "gfx1031");
378 + #else
379 + return true;
380 + #endif
381 +@@ -478,7 +478,7 @@ static inline bool is_use_amd_buffer_loa
382 + static inline bool is_use_v_fmac_f32(const ConvolutionContext& ctx)
383 + {
384 + const auto device_name = ctx.GetStream().GetDeviceName();
385 +- return StartsWith(device_name, "gfx1030");
386 ++ return StartsWith(device_name, "gfx1030") || StartsWith(device_name, "gfx1031");
387 + }
388 +
389 + static inline bool support_amd_buffer_atomic_fadd(const std::string& device_name)
390 +@@ -599,7 +599,8 @@ static inline bool IsComposableKernelSup
391 + StartsWith(c.GetStream().GetDeviceName(), "gfx906") ||
392 + StartsWith(c.GetStream().GetDeviceName(), "gfx908") ||
393 + StartsWith(c.GetStream().GetDeviceName(), "gfx90a") ||
394 +- StartsWith(c.GetStream().GetDeviceName(), "gfx1030");
395 ++ StartsWith(c.GetStream().GetDeviceName(), "gfx1030")||
396 ++ StartsWith(c.GetStream().GetDeviceName(), "gfx1031");
397 + }
398 +
399 + // greatest common divisor, aka highest common factor
400 +diff --color -upr MIOpen-rocm-5.1.3/src/kernels/batchnorm_functions.h gfx1031/src/kernels/batchnorm_functions.h
401 +--- MIOpen-rocm-5.1.3/src/kernels/batchnorm_functions.h 2022-05-08 14:08:05.000000000 +0800
402 ++++ gfx1031/src/kernels/batchnorm_functions.h 2022-09-01 18:36:18.858979772 +0800
403 +@@ -159,6 +159,10 @@
404 + #define MIO_BN_GFX1030 0
405 + #endif
406 +
407 ++#ifndef MIO_BN_GFX1031
408 ++#define MIO_BN_GFX1031 0
409 ++#endif
410 ++
411 + #define UNUSED __attribute__((__unused__))
412 +
413 + #if(MIO_BN_VARIANT != 4)
414 +diff --color -upr MIOpen-rocm-5.1.3/src/kernels/MIOpenBatchNormActivBwdPerAct.cl gfx1031/src/kernels/MIOpenBatchNormActivBwdPerAct.cl
415 +--- MIOpen-rocm-5.1.3/src/kernels/MIOpenBatchNormActivBwdPerAct.cl 2022-05-08 14:08:05.000000000 +0800
416 ++++ gfx1031/src/kernels/MIOpenBatchNormActivBwdPerAct.cl 2022-09-01 18:36:18.858979772 +0800
417 +@@ -34,7 +34,7 @@
418 + #endif
419 +
420 + #define MIOPEN_USE_AMDGCN 0
421 +-#if defined(__AMDGCN__) && MIO_BN_GFX1030 != 1
422 ++#if defined(__AMDGCN__) && MIO_BN_GFX1030 != 1 && MIO_BN_GFX1031 != 1
423 + #undef MIOPEN_USE_AMDGCN
424 + #define MIOPEN_USE_AMDGCN 1
425 + #endif
426 +diff --color -upr MIOpen-rocm-5.1.3/src/kernels/MIOpenBatchNormActivBwdSpatial.cl gfx1031/src/kernels/MIOpenBatchNormActivBwdSpatial.cl
427 +--- MIOpen-rocm-5.1.3/src/kernels/MIOpenBatchNormActivBwdSpatial.cl 2022-05-08 14:08:05.000000000 +0800
428 ++++ gfx1031/src/kernels/MIOpenBatchNormActivBwdSpatial.cl 2022-09-01 18:36:18.858979772 +0800
429 +@@ -32,7 +32,7 @@
430 + #endif
431 +
432 + #define MIOPEN_USE_AMDGCN 0
433 +-#if defined(__AMDGCN__) && MIO_BN_GFX1030 != 1
434 ++#if defined(__AMDGCN__) && MIO_BN_GFX1030 != 1 && MIO_BN_GFX1031 != 1
435 + #undef MIOPEN_USE_AMDGCN
436 + #define MIOPEN_USE_AMDGCN 1
437 + #endif
438 +diff --color -upr MIOpen-rocm-5.1.3/src/kernels/MIOpenBatchNormActivFwdTrainSpatial.cl gfx1031/src/kernels/MIOpenBatchNormActivFwdTrainSpatial.cl
439 +--- MIOpen-rocm-5.1.3/src/kernels/MIOpenBatchNormActivFwdTrainSpatial.cl 2022-05-08 14:08:05.000000000 +0800
440 ++++ gfx1031/src/kernels/MIOpenBatchNormActivFwdTrainSpatial.cl 2022-09-01 18:36:18.858979772 +0800
441 +@@ -33,7 +33,7 @@
442 + #endif
443 +
444 + #define MIOPEN_USE_AMDGCN 0
445 +-#if defined(__AMDGCN__) && MIO_BN_GFX1030 != 1
446 ++#if defined(__AMDGCN__) && MIO_BN_GFX1030 != 1 && MIO_BN_GFX1031 != 1
447 + #undef MIOPEN_USE_AMDGCN
448 + #define MIOPEN_USE_AMDGCN 1
449 + #endif
450 +diff --color -upr MIOpen-rocm-5.1.3/src/kernels/MIOpenBatchNormBwdSpatial.cl gfx1031/src/kernels/MIOpenBatchNormBwdSpatial.cl
451 +--- MIOpen-rocm-5.1.3/src/kernels/MIOpenBatchNormBwdSpatial.cl 2022-05-08 14:08:05.000000000 +0800
452 ++++ gfx1031/src/kernels/MIOpenBatchNormBwdSpatial.cl 2022-09-01 18:36:18.858979772 +0800
453 +@@ -33,7 +33,7 @@
454 + #endif
455 +
456 + #define MIOPEN_USE_AMDGCN 0
457 +-#if defined(__AMDGCN__) && MIO_BN_GFX1030 != 1
458 ++#if defined(__AMDGCN__) && MIO_BN_GFX1030 != 1 && MIO_BN_GFX1031 != 1
459 + #undef MIOPEN_USE_AMDGCN
460 + #define MIOPEN_USE_AMDGCN 1
461 + #endif
462 +diff --color -upr MIOpen-rocm-5.1.3/src/kernels/MIOpenBatchNormFwdTrainSpatial.cl gfx1031/src/kernels/MIOpenBatchNormFwdTrainSpatial.cl
463 +--- MIOpen-rocm-5.1.3/src/kernels/MIOpenBatchNormFwdTrainSpatial.cl 2022-05-08 14:08:05.000000000 +0800
464 ++++ gfx1031/src/kernels/MIOpenBatchNormFwdTrainSpatial.cl 2022-09-01 18:36:18.858979772 +0800
465 +@@ -33,7 +33,7 @@
466 + #endif
467 +
468 + #define MIOPEN_USE_AMDGCN 0
469 +-#if defined(__AMDGCN__) && MIO_BN_GFX1030 != 1
470 ++#if defined(__AMDGCN__) && MIO_BN_GFX1030 != 1 && MIO_BN_GFX1031 != 1
471 + #undef MIOPEN_USE_AMDGCN
472 + #define MIOPEN_USE_AMDGCN 1
473 + #endif
474 +diff --color -upr MIOpen-rocm-5.1.3/src/md_graph.cpp gfx1031/src/md_graph.cpp
475 +--- MIOpen-rocm-5.1.3/src/md_graph.cpp 2022-05-08 14:08:05.000000000 +0800
476 ++++ gfx1031/src/md_graph.cpp 2022-09-01 18:36:18.630980283 +0800
477 +@@ -738,8 +738,8 @@ void FusionMDGraph::InitConv(FusionMDGra
478 +
479 + add_v21_wino("gfx9", {"gfx900", "gfx906", "gfx908", "gfx90a"}, 1);
480 + add_v21_wino("gfx9", {"gfx900", "gfx906", "gfx908", "gfx90a"}, 2);
481 +- add_v21_wino("gfx10", {"gfx1011", "gfx1012", "gfx1030"}, 1);
482 +- add_v21_wino("gfx10", {"gfx1011", "gfx1012", "gfx1030"}, 2);
483 ++ add_v21_wino("gfx10", {"gfx1011", "gfx1012", "gfx1030", "gfx1031"}, 1);
484 ++ add_v21_wino("gfx10", {"gfx1011", "gfx1012", "gfx1030", "gfx1031"}, 2);
485 + }
486 + }
487 +
488 +diff --color -upr MIOpen-rocm-5.1.3/src/ocl/fusionopbiasbnactivocl.cpp gfx1031/src/ocl/fusionopbiasbnactivocl.cpp
489 +--- MIOpen-rocm-5.1.3/src/ocl/fusionopbiasbnactivocl.cpp 2022-05-08 14:08:05.000000000 +0800
490 ++++ gfx1031/src/ocl/fusionopbiasbnactivocl.cpp 2022-09-01 18:36:18.634980274 +0800
491 +@@ -392,7 +392,8 @@ miopenStatus_t BatchNormBwdTrainFusionOp
492 + " -DMIO_BN_USESAVED=" + std::to_string(static_cast<int>(true)) +
493 + " -DMIO_BN_VARIANT=" + std::to_string(variant) +
494 + " -DMIO_BN_CBA_WRITE_INTERMEDIATE=" + std::to_string(0) +
495 +- " -DMIO_BN_GFX1030=" + ((handle.GetDeviceName() == "gfx1030") ? "1" : "0");
496 ++ " -DMIO_BN_GFX1030=" + ((handle.GetDeviceName() == "gfx1030") ? "1" : "0") +
497 ++ " -DMIO_BN_GFX1031=" + ((handle.GetDeviceName() == "gfx1031") ? "1" : "0");
498 +
499 + compile_config += add;
500 + MIOPEN_LOG_I2(add);
501 +@@ -607,7 +608,8 @@ miopenStatus_t BatchNormFwdTrainFusionOp
502 + " -DMIO_SAVE_MEAN_VARIANCE=" + (saveBatchStats ? "1" : "0") +
503 + " -DMIO_RUNNING_RESULT=" + ((savePopStats) ? "1" : "0") +
504 + " -DMIO_BN_VARIANT=" + std::to_string(variant) +
505 +- " -DMIO_BN_GFX1030=" + ((handle.GetDeviceName() == "gfx1030") ? "1" : "0");
506 ++ " -DMIO_BN_GFX1030=" + ((handle.GetDeviceName() == "gfx1030") ? "1" : "0") +
507 ++ " -DMIO_BN_GFX1031=" + ((handle.GetDeviceName() == "gfx1031") ? "1" : "0");
508 +
509 + compile_config += add;
510 + MIOPEN_LOG_I2(add);
511 +diff --color -upr MIOpen-rocm-5.1.3/src/solver/batchnorm/backward_per_activation.cpp gfx1031/src/solver/batchnorm/backward_per_activation.cpp
512 +--- MIOpen-rocm-5.1.3/src/solver/batchnorm/backward_per_activation.cpp 2022-05-08 14:08:05.000000000 +0800
513 ++++ gfx1031/src/solver/batchnorm/backward_per_activation.cpp 2022-09-01 18:36:18.638980266 +0800
514 +@@ -113,6 +113,7 @@ BnBwdTrainingPerActivation::GetSolution(
515 + {"MIO_BN_GRP1", ylocalsize},
516 + {"MIO_BN_GRP2", zlocalsize},
517 + {"MIO_BN_GFX1030", ((handle.GetDeviceName() == "gfx1030") ? "1" : "0")},
518 ++ {"MIO_BN_GFX1031", ((handle.GetDeviceName() == "gfx1031") ? "1" : "0")},
519 + };
520 +
521 + kernel.comp_options = build_params.GenerateFor(kbp::OpenCL{});
522 +diff --color -upr MIOpen-rocm-5.1.3/src/solver/batchnorm/backward_spatial_multiple.cpp gfx1031/src/solver/batchnorm/backward_spatial_multiple.cpp
523 +--- MIOpen-rocm-5.1.3/src/solver/batchnorm/backward_spatial_multiple.cpp 2022-05-08 14:08:05.000000000 +0800
524 ++++ gfx1031/src/solver/batchnorm/backward_spatial_multiple.cpp 2022-09-01 18:36:18.638980266 +0800
525 +@@ -210,6 +210,7 @@ ConvSolution BnBwdTrainingSpatialMultipl
526 + {"MIO_BN_GRP1", ylocalsize},
527 + {"MIO_BN_GRP2", zlocalsize},
528 + {"MIO_BN_GFX1030", ((handle.GetDeviceName() == "gfx1030") ? "1" : "0")},
529 ++ {"MIO_BN_GFX1031", ((handle.GetDeviceName() == "gfx1031") ? "1" : "0")},
530 + {"MIO_LAYOUT_NHWC", static_cast<int>(problem.IsLayoutNHWC())},
531 + };
532 +
533 +diff --color -upr MIOpen-rocm-5.1.3/src/solver/batchnorm/backward_spatial_single.cpp gfx1031/src/solver/batchnorm/backward_spatial_single.cpp
534 +--- MIOpen-rocm-5.1.3/src/solver/batchnorm/backward_spatial_single.cpp 2022-05-08 14:08:05.000000000 +0800
535 ++++ gfx1031/src/solver/batchnorm/backward_spatial_single.cpp 2022-09-01 18:36:18.638980266 +0800
536 +@@ -247,6 +247,7 @@ BnBwdTrainingSpatialSingle::GetSolution(
537 +
538 + build_params << KernelBuildParameters{
539 + {"MIO_BN_GFX1030", (handle.GetDeviceName() == "gfx1030") ? "1" : "0"},
540 ++ {"MIO_BN_GFX1031", ((handle.GetDeviceName() == "gfx1031") ? "1" : "0")},
541 + };
542 +
543 + kernel.comp_options = build_params.GenerateFor(kbp::OpenCL{});
544 +diff --color -upr MIOpen-rocm-5.1.3/src/solver/batchnorm/forward_inference.cpp gfx1031/src/solver/batchnorm/forward_inference.cpp
545 +--- MIOpen-rocm-5.1.3/src/solver/batchnorm/forward_inference.cpp 2022-05-08 14:08:05.000000000 +0800
546 ++++ gfx1031/src/solver/batchnorm/forward_inference.cpp 2022-09-01 18:36:18.638980266 +0800
547 +@@ -103,6 +103,7 @@ ConvSolution BnFwdInference::GetSolution
548 + {"MIO_BN_GRP1", ylocalsize},
549 + {"MIO_BN_GRP2", zlocalsize},
550 + {"MIO_BN_GFX1030", ((handle.GetDeviceName() == "gfx1030") ? "1" : "0")},
551 ++ {"MIO_BN_GFX1031", ((handle.GetDeviceName() == "gfx1031") ? "1" : "0")},
552 + };
553 +
554 + kernel.comp_options = build_params.GenerateFor(kbp::OpenCL{});
555 +diff --color -upr MIOpen-rocm-5.1.3/src/solver/batchnorm/forward_per_activation.cpp gfx1031/src/solver/batchnorm/forward_per_activation.cpp
556 +--- MIOpen-rocm-5.1.3/src/solver/batchnorm/forward_per_activation.cpp 2022-05-08 14:08:05.000000000 +0800
557 ++++ gfx1031/src/solver/batchnorm/forward_per_activation.cpp 2022-09-01 18:36:18.638980266 +0800
558 +@@ -105,6 +105,7 @@ BnFwdTrainingPerActivation::GetSolution(
559 + {"MIO_BN_GRP1", ylocalsize},
560 + {"MIO_BN_GRP2", zlocalsize},
561 + {"MIO_BN_GFX1030", ((handle.GetDeviceName() == "gfx1030") ? "1" : "0")},
562 ++ {"MIO_BN_GFX1031", ((handle.GetDeviceName() == "gfx1031") ? "1" : "0")},
563 + };
564 +
565 + auto kernel = KernelInfo{};
566 +diff --color -upr MIOpen-rocm-5.1.3/src/solver/batchnorm/forward_spatial_multiple.cpp gfx1031/src/solver/batchnorm/forward_spatial_multiple.cpp
567 +--- MIOpen-rocm-5.1.3/src/solver/batchnorm/forward_spatial_multiple.cpp 2022-05-08 14:08:05.000000000 +0800
568 ++++ gfx1031/src/solver/batchnorm/forward_spatial_multiple.cpp 2022-09-01 18:36:18.638980266 +0800
569 +@@ -177,6 +177,7 @@ ConvSolution BnFwdTrainingSpatialMultipl
570 + {"MIO_BN_GRP1", ylocalsize},
571 + {"MIO_BN_GRP2", zlocalsize},
572 + {"MIO_BN_GFX1030", ((handle.GetDeviceName() == "gfx1030") ? "1" : "0")},
573 ++ {"MIO_BN_GFX1031", ((handle.GetDeviceName() == "gfx1031") ? "1" : "0")},
574 + {"MIO_LAYOUT_NHWC", static_cast<int>(problem.IsLayoutNHWC())},
575 + };
576 +
577 +diff --color -upr MIOpen-rocm-5.1.3/src/solver/batchnorm/forward_spatial_single.cpp gfx1031/src/solver/batchnorm/forward_spatial_single.cpp
578 +--- MIOpen-rocm-5.1.3/src/solver/batchnorm/forward_spatial_single.cpp 2022-05-08 14:08:05.000000000 +0800
579 ++++ gfx1031/src/solver/batchnorm/forward_spatial_single.cpp 2022-09-01 18:36:18.638980266 +0800
580 +@@ -211,6 +211,7 @@ BnFwdTrainingSpatialSingle::GetSolution(
581 + {"MIO_BN_GRP1", ylocalsize},
582 + {"MIO_BN_GRP2", zlocalsize},
583 + {"MIO_BN_GFX1030", ((handle.GetDeviceName() == "gfx1030") ? "1" : "0")},
584 ++ {"MIO_BN_GFX1031", ((handle.GetDeviceName() == "gfx1031") ? "1" : "0")},
585 + {"MIO_LAYOUT_NHWC", static_cast<int>(problem.IsLayoutNHWC())},
586 + };
587 +
588 +diff --color -upr MIOpen-rocm-5.1.3/src/target_properties.cpp gfx1031/src/target_properties.cpp
589 +--- MIOpen-rocm-5.1.3/src/target_properties.cpp 2022-05-08 14:08:05.000000000 +0800
590 ++++ gfx1031/src/target_properties.cpp 2022-09-01 18:36:18.630980283 +0800
591 +@@ -54,6 +54,7 @@ static std::string GetDeviceNameFromMap(
592 + {"Vega10", "gfx900"},
593 + {"gfx901", "gfx900"},
594 + {"10.3.0 Sienna_Cichlid 18", "gfx1030"},
595 ++ {"10.3.1 Navi_flounder 18", "gfx1031"},
596 + };
597 +
598 + const char* const p_asciz = miopen::GetStringEnv(MIOPEN_DEBUG_ENFORCE_DEVICE{});
599 +diff --color -upr MIOpen-rocm-5.1.3/test/CMakeLists.txt gfx1031/test/CMakeLists.txt
600 +--- MIOpen-rocm-5.1.3/test/CMakeLists.txt 2022-05-08 14:08:05.000000000 +0800
601 ++++ gfx1031/test/CMakeLists.txt 2022-09-01 18:36:19.022979405 +0800
602 +@@ -38,6 +38,7 @@ option( MIOPEN_TEST_GFX90A "Test on gfx9
603 + option( MIOPEN_TEST_GFX900 "Test on Vega10 (gfx900)" OFF )
604 + option( MIOPEN_TEST_GFX906 "Test on Vega20 (gfx906)" OFF )
605 + option( MIOPEN_TEST_GFX1030 "Test on Navi21 (gfx1030)" OFF )
606 ++option( MIOPEN_TEST_GFX1031 "Test on Navi21 (gfx1031)" OFF )
607 + option( MIOPEN_TEST_GPU_XNACK_ENABLED "Test as if XNACK mode is enabled" OFF )
608 + option( MIOPEN_TEST_CONV Off)
609 + option( MIOPEN_TEST_DEEPBENCH Off)
610 +@@ -74,7 +75,7 @@ endif()
611 + # Also we do not detect GPU when target GPU for testing is specified explicitly.
612 + set(MIOPEN_TEST_GPU_DETECTION_FAILED FALSE)
613 + set(MIOPEN_NO_GPU FALSE)
614 +-if(NOT (MIOPEN_TEST_GFX900 OR MIOPEN_TEST_GFX906 OR MIOPEN_TEST_GFX908 OR MIOPEN_TEST_GFX90A OR MIOPEN_TEST_GFX1030 OR MIOPEN_TEST_HIP_NOGPU))
615 ++if(NOT (MIOPEN_TEST_GFX900 OR MIOPEN_TEST_GFX906 OR MIOPEN_TEST_GFX908 OR MIOPEN_TEST_GFX90A OR MIOPEN_TEST_GFX1030 OR MIOPEN_TEST_GFX1031 OR MIOPEN_TEST_HIP_NOGPU))
616 + find_program(ROCMINFO
617 + NAMES rocminfo
618 + PATHS
619 +@@ -96,6 +97,8 @@ if(NOT (MIOPEN_TEST_GFX900 OR MIOPEN_TES
620 + elseif (NOT ROCMINFO_EXIT_STATUS EQUAL 0)
621 + message(WARNING "ROCMINFO FAILED, GPU TYPE UNKNOWN. Manually set respective MIOPEN_TEST_GFX* CMake variable to specify target GPU for testing.")
622 + set(MIOPEN_TEST_GPU_DETECTION_FAILED TRUE)
623 ++ elseif(ROCMINFO_OUTPUT MATCHES "gfx1031")
624 ++ set(MIOPEN_TEST_GFX1031 ON)
625 + elseif(ROCMINFO_OUTPUT MATCHES "gfx1030")
626 + set(MIOPEN_TEST_GFX1030 ON)
627 + elseif(ROCMINFO_OUTPUT MATCHES "gfx900")
628 +@@ -125,6 +128,7 @@ message(STATUS "MIOPEN_TEST_GFX906 ${MIO
629 + message(STATUS "MIOPEN_TEST_GFX908 ${MIOPEN_TEST_GFX908}")
630 + message(STATUS "MIOPEN_TEST_GFX90A ${MIOPEN_TEST_GFX90A}")
631 + message(STATUS "MIOPEN_TEST_GFX1030 ${MIOPEN_TEST_GFX1030}")
632 ++message(STATUS "MIOPEN_TEST_GFX1031 ${MIOPEN_TEST_GFX1031}")
633 + message(STATUS "MIOPEN_TEST_GPU_XNACK_ENABLED ${MIOPEN_TEST_GPU_XNACK_ENABLED}")
634 + message(STATUS "MIOPEN_TEST_GPU_DETECTION_FAILED ${MIOPEN_TEST_GPU_DETECTION_FAILED}")
635 +
636 +@@ -167,10 +171,10 @@ endmacro()
637 + set_var_to_condition(WORKAROUND_ISSUE_1187_DEFAULT MIOPEN_TEST_GFX90A AND MIOPEN_TEST_FLOAT)
638 + option( WORKAROUND_ISSUE_1187 "" ${WORKAROUND_ISSUE_1187_DEFAULT})
639 +
640 +-set_var_to_condition(WORKAROUND_ISSUE_1148_DEFAULT MIOPEN_TEST_GFX1030 AND MIOPEN_TEST_FLOAT)
641 ++set_var_to_condition(WORKAROUND_ISSUE_1148_DEFAULT MIOPEN_TEST_GFX1030 OR MIOPEN_TEST_GFX1031 AND MIOPEN_TEST_FLOAT)
642 + option( WORKAROUND_ISSUE_1148 "" ${WORKAROUND_ISSUE_1148_DEFAULT})
643 +
644 +-set_var_to_condition(WORKAROUND_ISSUE_1334_DEFAULT MIOPEN_TEST_GFX1030 AND MIOPEN_TEST_FLOAT)
645 ++set_var_to_condition(WORKAROUND_ISSUE_1334_DEFAULT MIOPEN_TEST_GFX1030 OR MIOPEN_TEST_GFX1031 AND MIOPEN_TEST_FLOAT)
646 + option( WORKAROUND_ISSUE_1334 "" ${WORKAROUND_ISSUE_1334_DEFAULT})
647 +
648 + if(NOT MIOPEN_TEST_MIOTENSILE)
649 +@@ -216,7 +220,7 @@ if (MIOPEN_NO_GPU)
650 + test_pooling3d test_perfdb)
651 + endif()
652 +
653 +-if(MIOPEN_TEST_GFX1030)
654 ++if(MIOPEN_TEST_GFX1030 OR MIOPEN_TEST_GFX1031)
655 + if(WORKAROUND_ISSUE_1053 AND MIOPEN_TEST_ALL)
656 + list(APPEND SKIP_TESTS test_lrn_test)
657 + endif()
658 +@@ -439,7 +443,7 @@ endfunction()
659 + # If nothing is specified, the default value is taken.
660 + # Default: FLOAT_ENABLED HALF_DISABLED BF16_DISABLED INT8_DISABLED
661 + #
662 +-# GPU types: GFX900, GFX906, GFX908, GFX90A, GFX1030
663 ++# GPU types: GFX900, GFX906, GFX908, GFX90A, GFX1030, GFX1031
664 + # The option can be enabled or disabled by using '_ENABLED' and '_DISABLED' suffix.
665 + # If nothing is specified, the default value is taken.
666 + # Default: GFX900_ENABLED, GFX906_ENABLED, GFX908_ENABLED, GFX90A_ENABLED, GFX1030_DISABLED
667 +@@ -571,7 +575,7 @@ function(add_custom_test NAME)
668 + set_tests_properties(${NAME} PROPERTIES RUN_SERIAL On)
669 + endif()
670 +
671 +- if( (is_gfx900_check OR is_gfx906_check OR is_gfx908_check OR is_gfx1030_check OR is_gfx90a_check)
672 ++ if( (is_gfx900_check OR is_gfx906_check OR is_gfx908_check OR is_gfx1030_check OR is_gfx1031_check OR is_gfx90a_check)
673 + AND is_full_check
674 + AND is_xnack_on_check
675 + AND (is_miotensile_check AND is_mlir_check)
676 +diff --color -upr MIOpen-rocm-5.1.3/test/handle_test.cpp gfx1031/test/handle_test.cpp
677 +--- MIOpen-rocm-5.1.3/test/handle_test.cpp 2022-05-08 14:08:05.000000000 +0800
678 ++++ gfx1031/test/handle_test.cpp 2022-09-01 18:36:19.018979413 +0800
679 +@@ -234,7 +234,7 @@ void test_warnings(kernel_type_t kern_ty
680 + void test_arch_name()
681 + {
682 + auto&& h = get_handle();
683 +- auto known_arch = {"gfx908", "gfx90a", "gfx906", "gfx900", "gfx803", "gfx1030"};
684 ++ auto known_arch = {"gfx908", "gfx90a", "gfx906", "gfx900", "gfx803", "gfx1030", "gfx1031"};
685 + auto this_arch = h.GetDeviceName();
686 + EXPECT(std::any_of(
687 + known_arch.begin(), known_arch.end(), [&](std::string arch) { return arch == this_arch; }));
688 +diff --color -upr MIOpen-rocm-5.1.3/test/mdgraph.cpp gfx1031/test/mdgraph.cpp
689 +--- MIOpen-rocm-5.1.3/test/mdgraph.cpp 2022-05-08 14:08:05.000000000 +0800
690 ++++ gfx1031/test/mdgraph.cpp 2022-09-01 18:36:19.022979405 +0800
691 +@@ -222,7 +222,7 @@ struct mdgraph_driver : test_driver
692 + auto target = h.GetTargetProperties();
693 +
694 + auto wino_supported_arch = {
695 +- "gfx1030", "gfx1012", "gfx1011", "gfx90a", "gfx908", "gfx906", "gfx900", "gfx803"};
696 ++ "gfx1030", "gfx1031","gfx1012", "gfx1011", "gfx90a", "gfx908", "gfx906", "gfx900", "gfx803"};
697 +
698 + auto is_wino_support = !xnack_enabled &&
699 + !miopen::IsDisabled(MIOPEN_DEBUG_GCN_ASM_KERNELS{}) &&
700
701 diff --git a/sci-libs/miopen/files/miopen-5.1.3-include-array.patch b/sci-libs/miopen/files/miopen-5.1.3-include-array.patch
702 new file mode 100644
703 index 000000000000..fc6a36d22548
704 --- /dev/null
705 +++ b/sci-libs/miopen/files/miopen-5.1.3-include-array.patch
706 @@ -0,0 +1,12 @@
707 +This fixes compile error upon gcc-12 libstdc++
708 +index 1cfb2a72c..0f4feb406 100644
709 +--- a/test/sequences.cpp
710 ++++ b/test/sequences.cpp
711 +@@ -25,6 +25,7 @@
712 + *******************************************************************************/
713 + #include "test.hpp"
714 + #include <miopen/sequences.hpp>
715 ++#include <array>
716 +
717 + namespace miopen {
718 + namespace seq {
719
720 diff --git a/sci-libs/miopen/files/miopen-5.1.3-no-strip.patch b/sci-libs/miopen/files/miopen-5.1.3-no-strip.patch
721 new file mode 100644
722 index 000000000000..0d1d429dbb85
723 --- /dev/null
724 +++ b/sci-libs/miopen/files/miopen-5.1.3-no-strip.patch
725 @@ -0,0 +1,17 @@
726 +Don't strip for release. Let portage handle stripping.
727 +Index: MIOpen-rocm-5.1.3/CMakeLists.txt
728 +===================================================================
729 +--- MIOpen-rocm-5.1.3.orig/CMakeLists.txt
730 ++++ MIOpen-rocm-5.1.3/CMakeLists.txt
731 +@@ -78,11 +78,6 @@ option( BUILD_DEV "Build for development
732 + option(MIOPEN_ENABLE_FIN "Enable the fin driver for MIOpen" OFF)
733 +
734 +
735 +-# Strip symbols for release
736 +-if(NOT WIN32 AND NOT APPLE)
737 +- set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -s")
738 +- set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -s")
739 +-endif()
740 +
741 + rocm_setup_version(VERSION 2.16.0)
742 +
743
744 diff --git a/sci-libs/miopen/miopen-5.1.3.ebuild b/sci-libs/miopen/miopen-5.1.3.ebuild
745 new file mode 100644
746 index 000000000000..a7d8b5083f60
747 --- /dev/null
748 +++ b/sci-libs/miopen/miopen-5.1.3.ebuild
749 @@ -0,0 +1,109 @@
750 +# Copyright 1999-2022 Gentoo Authors
751 +# Distributed under the terms of the GNU General Public License v2
752 +
753 +EAPI=8
754 +
755 +ROCM_VERSION=${PV}
756 +
757 +inherit cmake flag-o-matic llvm rocm
758 +
759 +LLVM_MAX_SLOT=14
760 +
761 +DESCRIPTION="AMD's Machine Intelligence Library"
762 +HOMEPAGE="https://github.com/ROCmSoftwarePlatform/MIOpen"
763 +SRC_URI="https://github.com/ROCmSoftwarePlatform/MIOpen/archive/rocm-${PV}.tar.gz -> MIOpen-${PV}.tar.gz"
764 +
765 +LICENSE="MIT"
766 +KEYWORDS="~amd64"
767 +SLOT="0/$(ver_cut 1-2)"
768 +
769 +IUSE="debug test"
770 +RESTRICT="!test? ( test )"
771 +
772 +RDEPEND="
773 + >=dev-util/hip-5.1.3
774 + >=dev-db/sqlite-3.17
775 + sci-libs/rocBLAS:${SLOT}[${ROCM_USEDEP}]
776 + >=dev-libs/boost-1.72
777 +"
778 +
779 +DEPEND="${RDEPEND}"
780 +
781 +BDEPEND="dev-libs/half:0/1
782 + dev-util/rocm-cmake
783 +"
784 +
785 +S="${WORKDIR}/MIOpen-rocm-${PV}"
786 +
787 +PATCHES=(
788 + "${FILESDIR}/${PN}-4.2.0-disable-no-inline-boost.patch"
789 + "${FILESDIR}/${PN}-4.2.0-gcc11-numeric_limits.patch"
790 + "${FILESDIR}/${PN}-5.0.2-strip-xnack-in-flags.patch"
791 + "${FILESDIR}/${PN}-4.3.0-fix-interface-include-in-HIP_COMPILER_FLAGS.patch"
792 + "${FILESDIR}/${PN}-4.3.0-enable-test.patch"
793 + "${FILESDIR}/${PN}-5.1.3-gfx1031.patch"
794 + "${FILESDIR}/${PN}-5.1.3-deprecate-clang-ocl.patch"
795 + "${FILESDIR}/${PN}-5.1.3-no-strip.patch"
796 + "${FILESDIR}/${PN}-5.1.3-include-array.patch"
797 + "${FILESDIR}/${PN}-5.1.3-avoid-metadata-error-for-vanilla-clang.patch"
798 +)
799 +
800 +src_prepare() {
801 + cmake_src_prepare
802 +
803 + sed -e "s:/opt/rocm/llvm:$(get_llvm_prefix ${LLVM_MAX_SLOT}) NO_DEFAULT_PATH:" \
804 + -e "s:/opt/rocm/hip:$(hipconfig -p) NO_DEFAULT_PATH:" \
805 + -e '/set( MIOPEN_INSTALL_DIR/s:miopen:${CMAKE_INSTALL_PREFIX}:' \
806 + -e '/MIOPEN_TIDY_ERRORS ALL/d' \
807 + -i CMakeLists.txt || die
808 +
809 + sed -e "/rocm_install_symlink_subdir(\${MIOPEN_INSTALL_DIR})/d" -i src/CMakeLists.txt || die
810 + sed -e "/add_test/s:--build \${CMAKE_CURRENT_BINARY_DIR}:--build ${BUILD_DIR}:" -i test/CMakeLists.txt || die
811 +
812 + sed -e "s:\${AMD_DEVICE_LIBS_PREFIX}/lib:${EPREFIX}/usr/lib/amdgcn/bitcode:" -i cmake/hip-config.cmake || die
813 +
814 + # This plus avoid-metadata-error-for-vanilla-clang.patch fix bug mentioned
815 + # in https://github.com/ROCmSoftwarePlatform/MIOpen/issues/1731
816 + find src/kernels -name "*.s" -exec \
817 + sed -e "s/.name: n /.name: x /g" -e "s/.name: y /.name: z /g" \
818 + -e "s/.name: y,/.name: z,/g" -i {} \; || die
819 +}
820 +
821 +src_configure() {
822 + if ! use debug; then
823 + append-cflags "-DNDEBUG"
824 + append-cxxflags "-DNDEBUG"
825 + CMAKE_BUILD_TYPE="Release"
826 + else
827 + CMAKE_BUILD_TYPE="Debug"
828 + fi
829 +
830 + local mycmakeargs=(
831 + -DCMAKE_SKIP_RPATH=ON
832 + -DAMDGPU_TARGETS="$(get_amdgpu_flags)"
833 + -DCMAKE_INSTALL_PREFIX="${EPREFIX}/usr"
834 + -DMIOPEN_BACKEND=HIP
835 + -DBoost_USE_STATIC_LIBS=OFF
836 + -DMIOPEN_USE_MLIR=OFF
837 + -DBUILD_TESTS=$(usex test ON OFF)
838 + -DMIOPEN_TEST_ALL=$(usex test ON OFF)
839 + )
840 +
841 + if use test; then
842 + for gpu_target in ${AMDGPU_TARGETS}; do
843 + mycmakeargs+=( -DMIOPEN_TEST_${gpu_target^^}=ON )
844 + done
845 + fi
846 +
847 + addpredict /dev/kfd
848 + addpredict /dev/dri/
849 + append-cxxflags "--rocm-path=$(hipconfig -R)"
850 + append-cxxflags "--hip-device-lib-path=${EPREFIX}/usr/lib/amdgcn/bitcode"
851 + CXX="$(get_llvm_prefix ${LLVM_MAX_SLOT})/bin/clang++" cmake_src_configure
852 +}
853 +
854 +src_test() {
855 + check_amdgpu
856 + export LD_LIBRARY_PATH="${BUILD_DIR}"/lib
857 + MAKEOPTS="-j1" cmake_src_test
858 +}
859
860 diff --git a/sci-libs/rocSOLVER/rocSOLVER-5.1.3.ebuild b/sci-libs/rocSOLVER/rocSOLVER-5.1.3.ebuild
861 index e643014fda76..77092f820467 100644
862 --- a/sci-libs/rocSOLVER/rocSOLVER-5.1.3.ebuild
863 +++ b/sci-libs/rocSOLVER/rocSOLVER-5.1.3.ebuild
864 @@ -18,7 +18,6 @@ SLOT="0/$(ver_cut 1-2)"
865 IUSE="test benchmark"
866 REQUIRED_USE="${ROCM_REQUIRED_USE}"
867
868 -# sci-libs/rocBLAS:${SLOT}
869 RDEPEND="dev-util/hip
870 sci-libs/rocBLAS:${SLOT}[${ROCM_USEDEP}]
871 =dev-libs/libfmt-8*