1 |
commit: 19e8945bd388537423c2b8290a889f4453edfe08 |
2 |
Author: Sebastien Fabbro <sfabbro <AT> uvic <DOT> ca> |
3 |
AuthorDate: Tue Jul 3 19:31:08 2012 +0000 |
4 |
Commit: Sebastien Fabbro <bicatali <AT> gentoo <DOT> org> |
5 |
CommitDate: Tue Jul 3 19:31:08 2012 +0000 |
6 |
URL: http://git.overlays.gentoo.org/gitweb/?p=proj/sci.git;a=commit;h=19e8945b |
7 |
|
8 |
sci-libs/magma: Version bump. Updated patches |
9 |
|
10 |
(Portage version: 2.2.01.20430-prefix/git/Linux x86_64, unsigned Manifest commit) |
11 |
|
12 |
--- |
13 |
sci-libs/magma/ChangeLog | 9 + |
14 |
sci-libs/magma/files/magma-1.2.0-cblas-dotc.patch | 71 -- |
15 |
.../files/magma-1.2.0-duplicate-symbols.patch | 1227 -------------------- |
16 |
.../files/magma-1.2.1-duplicate-symbols.patch | 612 ++++++++++ |
17 |
.../magma/files/magma-1.2.1-no-cuda-driver.patch | 38 + |
18 |
.../{magma-1.2.0.ebuild => magma-1.2.1.ebuild} | 18 +- |
19 |
6 files changed, 667 insertions(+), 1308 deletions(-) |
20 |
|
21 |
diff --git a/sci-libs/magma/ChangeLog b/sci-libs/magma/ChangeLog |
22 |
index 05bcaf9..460bde8 100644 |
23 |
--- a/sci-libs/magma/ChangeLog |
24 |
+++ b/sci-libs/magma/ChangeLog |
25 |
@@ -2,6 +2,15 @@ |
26 |
# Copyright 1999-2012 Gentoo Foundation; Distributed under the GPL v2 |
27 |
# $Header: $ |
28 |
|
29 |
+*magma-1.2.1 (03 Jul 2012) |
30 |
+ |
31 |
+ 03 Jul 2012; Sébastien Fabbro <bicatali@g.o> |
32 |
+ +files/magma-1.2.1-duplicate-symbols.patch, |
33 |
+ +files/magma-1.2.1-no-cuda-driver.patch, +magma-1.2.1.ebuild, |
34 |
+ -files/magma-1.2.0-cblas-dotc.patch, |
35 |
+ -files/magma-1.2.0-duplicate-symbols.patch, -magma-1.2.0.ebuild: |
36 |
+ sci-libs/magma: Version bump. Updated patches |
37 |
+ |
38 |
*magma-1.2.0 (25 Jun 2012) |
39 |
|
40 |
25 Jun 2012; Sébastien Fabbro <bicatali@g.o> |
41 |
|
42 |
diff --git a/sci-libs/magma/files/magma-1.2.0-cblas-dotc.patch b/sci-libs/magma/files/magma-1.2.0-cblas-dotc.patch |
43 |
deleted file mode 100644 |
44 |
index e2d16a4..0000000 |
45 |
--- a/sci-libs/magma/files/magma-1.2.0-cblas-dotc.patch |
46 |
+++ /dev/null |
47 |
@@ -1,71 +0,0 @@ |
48 |
-Description: Use cblas instead of calls to non existing codtc and zdotc C routines |
49 |
-Author: Sebastien Fabbro <bicatali@g.o> |
50 |
- |
51 |
-diff -Nur magma-1.2.0.orig/src/clatrd2.cpp magma-1.2.0/src/clatrd2.cpp |
52 |
---- magma-1.2.0.orig/src/clatrd2.cpp 2012-06-06 17:41:50.000000000 +0100 |
53 |
-+++ magma-1.2.0/src/clatrd2.cpp 2012-06-06 17:45:23.000000000 +0100 |
54 |
-@@ -12,6 +12,7 @@ |
55 |
- |
56 |
- */ |
57 |
- #include "common_magma.h" |
58 |
-+#include <cblas.h> |
59 |
- |
60 |
- #define PRECISION_c |
61 |
- |
62 |
-@@ -270,7 +271,9 @@ |
63 |
- blasf77_cscal(&i, &tau[i - 1], W(0, iw), &ione); |
64 |
- |
65 |
- #if defined(PRECISION_z) || defined(PRECISION_c) |
66 |
-- blasf77_cdotc(&value, &i, W(0, iw), &ione, A(0, i), &ione); |
67 |
-+ cblas_cdotc_sub(i, W(0, iw), ione, A(0, i), ione, &value); |
68 |
-+ |
69 |
-+// blasf77_cdotc(&value, &i, W(0, iw), &ione, A(0, i), &ione); |
70 |
- alpha = tau[i - 1] * -.5f * value; |
71 |
- #else |
72 |
- alpha = tau[i - 1] * -.5f * blasf77_cdotc(&i, W(0, iw), &ione, A(0, i), &ione); |
73 |
-@@ -349,7 +352,10 @@ |
74 |
- W(0, i), &ione, &c_one, W(i+1, i), &ione); |
75 |
- blasf77_cscal(&i_n, &tau[i], W(i+1,i), &ione); |
76 |
- #if defined(PRECISION_z) || defined(PRECISION_c) |
77 |
-- blasf77_cdotc(&value, &i_n, W(i+1,i), &ione, A(i+1, i), &ione); |
78 |
-+ cblas_cdotc_sub(i_n, W(i +1, i), ione, |
79 |
-+ A(i +1, i), ione, &value); |
80 |
-+ |
81 |
-+ //blasf77_cdotc(&value, &i_n, W(i+1,i), &ione, A(i+1, i), &ione); |
82 |
- alpha = tau[i]* -.5f * value; |
83 |
- #else |
84 |
- alpha = tau[i]* -.5f* blasf77_cdotc(&i_n, W(i+1,i), &ione, A(i+1, i), &ione); |
85 |
-diff -Nur magma-1.2.0.orig/src/zlatrd2.cpp magma-1.2.0/src/zlatrd2.cpp |
86 |
---- magma-1.2.0.orig/src/zlatrd2.cpp 2012-06-06 17:41:50.000000000 +0100 |
87 |
-+++ magma-1.2.0/src/zlatrd2.cpp 2012-06-06 17:46:33.000000000 +0100 |
88 |
-@@ -12,6 +12,7 @@ |
89 |
- |
90 |
- */ |
91 |
- #include "common_magma.h" |
92 |
-+#include <cblas.h> |
93 |
- |
94 |
- #define PRECISION_z |
95 |
- |
96 |
-@@ -270,7 +271,9 @@ |
97 |
- blasf77_zscal(&i, &tau[i - 1], W(0, iw), &ione); |
98 |
- |
99 |
- #if defined(PRECISION_z) || defined(PRECISION_c) |
100 |
-- blasf77_zdotc(&value, &i, W(0, iw), &ione, A(0, i), &ione); |
101 |
-+ cblas_zdotc_sub(i, W(0, iw), ione, A(0, i), ione, &value); |
102 |
-+ |
103 |
-+// blasf77_zdotc(&value, &i, W(0, iw), &ione, A(0, i), &ione); |
104 |
- alpha = tau[i - 1] * -.5f * value; |
105 |
- #else |
106 |
- alpha = tau[i - 1] * -.5f * blasf77_zdotc(&i, W(0, iw), &ione, A(0, i), &ione); |
107 |
-@@ -349,7 +352,10 @@ |
108 |
- W(0, i), &ione, &c_one, W(i+1, i), &ione); |
109 |
- blasf77_zscal(&i_n, &tau[i], W(i+1,i), &ione); |
110 |
- #if defined(PRECISION_z) || defined(PRECISION_c) |
111 |
-- blasf77_zdotc(&value, &i_n, W(i+1,i), &ione, A(i+1, i), &ione); |
112 |
-+ cblas_zdotc_sub(i_n, W(i +1, i), ione, |
113 |
-+ A(i +1, i), ione, &value); |
114 |
-+ |
115 |
-+ //blasf77_zdotc(&value, &i_n, W(i+1,i), &ione, A(i+1, i), &ione); |
116 |
- alpha = tau[i]* -.5f * value; |
117 |
- #else |
118 |
- alpha = tau[i]* -.5f* blasf77_zdotc(&i_n, W(i+1,i), &ione, A(i+1, i), &ione); |
119 |
|
120 |
diff --git a/sci-libs/magma/files/magma-1.2.0-duplicate-symbols.patch b/sci-libs/magma/files/magma-1.2.0-duplicate-symbols.patch |
121 |
deleted file mode 100644 |
122 |
index 0627f7f..0000000 |
123 |
--- a/sci-libs/magma/files/magma-1.2.0-duplicate-symbols.patch |
124 |
+++ /dev/null |
125 |
@@ -1,1227 +0,0 @@ |
126 |
-diff -Nur src.orig/cgeqrf_mgpu-trace.cpp src/cgeqrf_mgpu-trace.cpp |
127 |
---- src.orig/cgeqrf_mgpu-trace.cpp 2012-06-23 21:52:09.000000000 +0100 |
128 |
-+++ src/cgeqrf_mgpu-trace.cpp 2012-06-25 17:10:21.000000000 +0100 |
129 |
-@@ -16,108 +16,42 @@ |
130 |
- #include <sys/time.h> |
131 |
- #include <assert.h> |
132 |
- |
133 |
--float get_current_cpu_time(void) |
134 |
--{ |
135 |
-- struct timeval time_val; |
136 |
-- |
137 |
-- gettimeofday(&time_val, NULL); |
138 |
-- |
139 |
-- return (float)(time_val.tv_sec) + (float)(time_val.tv_usec) / 1000000.0; |
140 |
--} |
141 |
-+extern float get_current_cpu_time_float(void); |
142 |
- |
143 |
- #define MAX_THREADS 5 |
144 |
- |
145 |
- //#define MAX_EVENTS 163840 |
146 |
- #define MAX_EVENTS 1048576 |
147 |
- |
148 |
--int event_num [MAX_THREADS] __attribute__ ((aligned (128))); |
149 |
--float event_start_time [MAX_THREADS] __attribute__ ((aligned (128))); |
150 |
--float event_end_time [MAX_THREADS] __attribute__ ((aligned (128))); |
151 |
--float event_log [MAX_THREADS][MAX_EVENTS] __attribute__ ((aligned (128))); |
152 |
--int log_events = 1; |
153 |
-+extern int event_num [MAX_THREADS] __attribute__ ((aligned (128))); |
154 |
-+extern float event_start_time_float [MAX_THREADS] __attribute__ ((aligned (128))); |
155 |
-+extern float event_end_time_float [MAX_THREADS] __attribute__ ((aligned (128))); |
156 |
-+extern float event_log_float [MAX_THREADS][MAX_EVENTS] __attribute__ ((aligned (128))); |
157 |
-+extern int log_events; |
158 |
- |
159 |
- #define core_cpu_event_start(my_core_id) \ |
160 |
-- event_start_time[my_core_id] = get_current_cpu_time(); \ |
161 |
-+ event_start_time_float[my_core_id] = get_current_cpu_time_float(); \ |
162 |
- |
163 |
- #define core_cpu_event_end(my_core_id) \ |
164 |
-- event_end_time[my_core_id] = get_current_cpu_time(); \ |
165 |
-+ event_end_time_float[my_core_id] = get_current_cpu_time_float(); \ |
166 |
- |
167 |
- #define core_gpu_event_start(my_core_id, e1, e2) \ |
168 |
- cudaEventElapsedTime(&ctime, e1, e2); \ |
169 |
-- event_start_time[my_core_id] = ctime/1000.+dtime; \ |
170 |
-+ event_start_time_float[my_core_id] = ctime/1000.+dtime; \ |
171 |
- |
172 |
- #define core_gpu_event_end(my_core_id, e1, e2) \ |
173 |
- cudaEventElapsedTime(&ctime, e1, e2); \ |
174 |
-- event_end_time[my_core_id] = ctime/1000.+dtime; \ |
175 |
-+ event_end_time_float[my_core_id] = ctime/1000.+dtime; \ |
176 |
- |
177 |
- #define core_log_event(event, my_core_id) \ |
178 |
-- event_log[my_core_id][event_num[my_core_id]+0] = my_core_id;\ |
179 |
-- event_log[my_core_id][event_num[my_core_id]+1] = event_start_time[my_core_id];\ |
180 |
-- event_log[my_core_id][event_num[my_core_id]+2] = event_end_time[my_core_id];\ |
181 |
-- event_log[my_core_id][event_num[my_core_id]+3] = (event);\ |
182 |
-+ event_log_float[my_core_id][event_num[my_core_id]+0] = my_core_id;\ |
183 |
-+ event_log_float[my_core_id][event_num[my_core_id]+1] = event_start_time_float[my_core_id];\ |
184 |
-+ event_log_float[my_core_id][event_num[my_core_id]+2] = event_end_time_float[my_core_id];\ |
185 |
-+ event_log_float[my_core_id][event_num[my_core_id]+3] = (event);\ |
186 |
- event_num[my_core_id] += (log_events << 2); \ |
187 |
- event_num[my_core_id] &= (MAX_EVENTS-1); |
188 |
- |
189 |
--void dump_trace(int cores_num) |
190 |
--{ |
191 |
-- char trace_file_name[32]; |
192 |
-- FILE *trace_file; |
193 |
-- int event; |
194 |
-- int core; |
195 |
-- |
196 |
-- float scale = 100000.0; |
197 |
-- float large = 100.0; |
198 |
-- |
199 |
-- sprintf(trace_file_name, "trace.svg"); |
200 |
-- trace_file = fopen(trace_file_name, "w"); |
201 |
-- assert(trace_file != NULL); |
202 |
-- |
203 |
-- fprintf(trace_file, |
204 |
-- "<?xml version=\"1.0\" standalone=\"no\"?>" |
205 |
-- "<svg version=\"1.1\" baseProfile=\"full\" xmlns=\"http://www.w3.org/2000/svg\" " |
206 |
-- "xmlns:xlink=\"http://www.w3.org/1999/xlink\" xmlns:ev=\"http://www.w3.org/2001/xml-events\" " |
207 |
-- ">\n" |
208 |
-- " <g font-size=\"20\">\n"); |
209 |
-- |
210 |
-- for (core = 0; core < cores_num; core++) |
211 |
-- for (event = 0; event < event_num[core]; event += 4) |
212 |
-- { |
213 |
-- int thread = event_log[core][event+0]; |
214 |
-- float start = event_log[core][event+1]; |
215 |
-- float end = event_log[core][event+2]; |
216 |
-- int color = event_log[core][event+3]; |
217 |
-- |
218 |
-- start -= event_log[core][2]; |
219 |
-- end -= event_log[core][2]; |
220 |
-- /* |
221 |
-- fprintf(trace_file, |
222 |
-- " " |
223 |
-- "<rect x=\"%.2lf\" y=\"%.0lf\" width=\"%.2lf\" height=\"%.0lf\" " |
224 |
-- "fill=\"#%06x\" stroke=\"#000000\" stroke-width=\"1\"/>\n", |
225 |
-- start * scale, |
226 |
-- thread * 100.0, |
227 |
-- (end - start) * scale, |
228 |
-- 90.0, |
229 |
-- color); |
230 |
-- */ |
231 |
-- fprintf(trace_file, |
232 |
-- " " |
233 |
-- "<rect x=\"%.2lf\" y=\"%.0lf\" width=\"%.2lf\" height=\"%.0lf\" " |
234 |
-- // "fill=\"#%06x\" />\n", |
235 |
-- "fill=\"#%06x\" stroke=\"#000000\" stroke-width=\"1\"/>\n", |
236 |
-- start * scale, |
237 |
-- thread * (large+20.0), |
238 |
-- (end - start) * scale, |
239 |
-- large, |
240 |
-- color); |
241 |
-- } |
242 |
-- |
243 |
-- fprintf(trace_file, |
244 |
-- " </g>\n" |
245 |
-- "</svg>\n"); |
246 |
-- |
247 |
-- fclose(trace_file); |
248 |
--} |
249 |
-+extern void dump_trace_float(int cores_num); |
250 |
- |
251 |
- //=========================================================================== |
252 |
- |
253 |
-@@ -279,7 +213,7 @@ |
254 |
- core_cpu_event_end(num_gpus); |
255 |
- core_log_event(0x666666, num_gpus); |
256 |
- |
257 |
-- dtime = get_current_cpu_time(); |
258 |
-+ dtime = get_current_cpu_time_float(); |
259 |
- |
260 |
- for(j=0; j<num_gpus; j++){ |
261 |
- magma_setdevice(j); |
262 |
-@@ -536,7 +470,7 @@ |
263 |
- } |
264 |
- |
265 |
- magma_setdevice(cdevice); |
266 |
-- dump_trace(num_gpus+1); |
267 |
-+ dump_trace_float(num_gpus+1); |
268 |
- |
269 |
- return *info; |
270 |
- } /* magma_cgeqrf2_mgpu */ |
271 |
-diff -Nur src.orig/cheevr_gpu.cpp src/cheevr_gpu.cpp |
272 |
---- src.orig/cheevr_gpu.cpp 2012-06-23 21:52:09.000000000 +0100 |
273 |
-+++ src/cheevr_gpu.cpp 2012-06-25 06:59:15.000000000 +0100 |
274 |
-@@ -13,34 +13,6 @@ |
275 |
- #include "common_magma.h" |
276 |
- |
277 |
- /* These interfaces are used for TAU profiling */ |
278 |
--extern "C" { |
279 |
-- void Mylapackf77_cstemr(const char *jobz, const char *range, magma_int_t *n, float *d, float *e, |
280 |
-- float *vl, float *vu, magma_int_t *il, magma_int_t *iu, |
281 |
-- magma_int_t *m, float *w, cuFloatComplex *z, magma_int_t *ldz, |
282 |
-- magma_int_t *nzc, magma_int_t *isuppz, magma_int_t *tryrac, |
283 |
-- float *work, magma_int_t *lwork, magma_int_t *iwork, |
284 |
-- magma_int_t *liwork, magma_int_t *info) |
285 |
-- { |
286 |
-- lapackf77_cstemr(jobz, range, n, d, e, vl, vu, il, iu, m, w, z, ldz, nzc, |
287 |
-- isuppz, tryrac, work, lwork, iwork, liwork, info); |
288 |
-- } |
289 |
-- |
290 |
-- void Mylapackf77_cstein(int *n, float *d, float *e, int *m, float *w, int *iblock, |
291 |
-- int *isplit, cuFloatComplex *z, int *ldz, float *work, |
292 |
-- int *iwork, int *ifail, int *info) |
293 |
-- { |
294 |
-- lapackf77_cstein(n, d, e, m, w, iblock, isplit, z, ldz, work, iwork, ifail, info); |
295 |
-- } |
296 |
-- |
297 |
-- void Mylapackf77_sstebz(const char *range, const char *order, int *n, float *vl, |
298 |
-- float *vu, int *il, int *iu, float *abstol, |
299 |
-- float *d, float *e, int *m, int *nsplit, float *w, |
300 |
-- int *iblock, int *isplit, float *work, int *iwork, int *info) |
301 |
-- { |
302 |
-- lapackf77_sstebz(range, order, n, vl, vu, il, iu, abstol, d, e, m, |
303 |
-- nsplit, w, iblock, isplit, work, iwork,info); |
304 |
-- } |
305 |
--} |
306 |
- |
307 |
- extern "C" { |
308 |
- #ifdef ADD_ |
309 |
-@@ -497,7 +469,7 @@ |
310 |
- else |
311 |
- tryrac=0; |
312 |
- |
313 |
-- Mylapackf77_cstemr(jobz_, range_, &n, &rwork[indrdd], &rwork[indree], &vl, &vu, &il, |
314 |
-+ lapackf77_cstemr(jobz_, range_, &n, &rwork[indrdd], &rwork[indree], &vl, &vu, &il, |
315 |
- &iu, m, &w[1], wz, &ldwz, &n, &isuppz[1], &tryrac, &rwork[indrwk], |
316 |
- &llrwork, &iwork[1], &liwork, info); |
317 |
- |
318 |
-@@ -514,10 +486,10 @@ |
319 |
- printf("B/I\n"); |
320 |
- *info = 0; |
321 |
- |
322 |
-- Mylapackf77_sstebz(range_, "B", &n, &vl, &vu, &il, &iu, &abstol, &rwork[indrd], &rwork[indre], m, |
323 |
-+ lapackf77_sstebz(range_, "B", &n, &vl, &vu, &il, &iu, &abstol, &rwork[indrd], &rwork[indre], m, |
324 |
- &nsplit, &w[1], &iwork[indibl], &iwork[indisp], &rwork[indrwk], &iwork[indiwo], info); |
325 |
- |
326 |
-- Mylapackf77_cstein(&n, &rwork[indrd], &rwork[indre], m, &w[1], &iwork[indibl], &iwork[indisp], |
327 |
-+ lapackf77_cstein(&n, &rwork[indrd], &rwork[indre], m, &w[1], &iwork[indibl], &iwork[indisp], |
328 |
- wz, &ldwz, &rwork[indrwk], &iwork[indiwo], &iwork[indifl], info); |
329 |
- |
330 |
- /* Apply unitary matrix used in reduction to tridiagonal |
331 |
-diff -Nur src.orig/cheevx_gpu.cpp src/cheevx_gpu.cpp |
332 |
---- src.orig/cheevx_gpu.cpp 2012-06-23 21:52:09.000000000 +0100 |
333 |
-+++ src/cheevx_gpu.cpp 2012-06-25 06:59:15.000000000 +0100 |
334 |
-@@ -12,27 +12,6 @@ |
335 |
- */ |
336 |
- #include "common_magma.h" |
337 |
- |
338 |
--/* These interfaces are used for TAU profiling */ |
339 |
--extern"C"{ |
340 |
-- void Mylapackf77_cstein(magma_int_t *n, float *d, float *e, magma_int_t *m, |
341 |
-- float *w, magma_int_t *iblock, magma_int_t *isplit, |
342 |
-- cuFloatComplex *z, magma_int_t *ldz, float *work, |
343 |
-- magma_int_t *iwork, magma_int_t *ifail, magma_int_t *info) |
344 |
-- { |
345 |
-- lapackf77_cstein(n, d, e, m, w, iblock, isplit, z, ldz, work, iwork, ifail, info); |
346 |
-- } |
347 |
-- |
348 |
-- void Mylapackf77_sstebz(char *range, char *order, magma_int_t *n, float *vl, |
349 |
-- float *vu, magma_int_t *il, magma_int_t *iu, float *abstol, |
350 |
-- float *d, float *e, magma_int_t *m, magma_int_t *nsplit, |
351 |
-- float *w, magma_int_t *iblock, magma_int_t *isplit, |
352 |
-- float *work, magma_int_t *iwork, magma_int_t *info) |
353 |
-- { |
354 |
-- lapackf77_sstebz(range, order, n, vl, vu, il, iu, abstol, |
355 |
-- d, e, m, nsplit, w, iblock, isplit, work, iwork,info); |
356 |
-- } |
357 |
--} |
358 |
-- |
359 |
- extern "C" magma_int_t |
360 |
- magma_cheevx_gpu(char jobz, char range, char uplo, magma_int_t n, |
361 |
- cuFloatComplex *da, magma_int_t ldda, float vl, float vu, |
362 |
-@@ -404,12 +383,12 @@ |
363 |
- indisp = indibl + n; |
364 |
- indiwk = indisp + n; |
365 |
- |
366 |
-- Mylapackf77_sstebz(range_, order, &n, &vl, &vu, &il, &iu, &abstol, &rwork[indd], &rwork[inde], m, |
367 |
-+ lapackf77_sstebz(range_, order, &n, &vl, &vu, &il, &iu, &abstol, &rwork[indd], &rwork[inde], m, |
368 |
- &nsplit, &w[1], &iwork[indibl], &iwork[indisp], &rwork[indrwk], &iwork[indiwk], info); |
369 |
- |
370 |
- if (wantz) { |
371 |
- |
372 |
-- Mylapackf77_cstein(&n, &rwork[indd], &rwork[inde], m, &w[1], &iwork[indibl], &iwork[indisp], |
373 |
-+ lapackf77_cstein(&n, &rwork[indd], &rwork[inde], m, &w[1], &iwork[indibl], &iwork[indisp], |
374 |
- wz, &ldwz, &rwork[indrwk], &iwork[indiwk], &ifail[1], info); |
375 |
- |
376 |
- magma_csetmatrix( n, *m, wz, ldwz, dz, lddz ); |
377 |
-diff -Nur src.orig/chegvd.cpp src/chegvd.cpp |
378 |
---- src.orig/chegvd.cpp 2012-06-23 21:52:09.000000000 +0100 |
379 |
-+++ src/chegvd.cpp 2012-06-25 06:59:15.000000000 +0100 |
380 |
-@@ -13,26 +13,6 @@ |
381 |
- */ |
382 |
- #include "common_magma.h" |
383 |
- |
384 |
--/* This ctrmm interface is used for TAU profiling */ |
385 |
--void Mymagma_ctrmm(char side, char uplo, char trans, char unit, |
386 |
-- magma_int_t n, magma_int_t m, |
387 |
-- cuFloatComplex alpha, cuFloatComplex *db, magma_int_t lddb, |
388 |
-- cuFloatComplex *dz, magma_int_t lddz) |
389 |
--{ |
390 |
-- magma_ctrmm(side, uplo, trans, unit, n, m, alpha, db, lddb, dz, lddz); |
391 |
-- magma_device_sync(); |
392 |
--} |
393 |
-- |
394 |
--/* This ctrsm interface is used for TAU profiling */ |
395 |
--void Mymagma_ctrsm(char side, char uplo, char trans, char unit, |
396 |
-- magma_int_t n, magma_int_t m, |
397 |
-- cuFloatComplex alpha, cuFloatComplex *db, magma_int_t lddb, |
398 |
-- cuFloatComplex *dz, magma_int_t lddz) |
399 |
--{ |
400 |
-- magma_ctrsm(side, uplo, trans, unit, n, m, alpha, db, lddb, dz, lddz); |
401 |
-- magma_device_sync(); |
402 |
--} |
403 |
-- |
404 |
- extern "C" magma_int_t |
405 |
- magma_chegvd(magma_int_t itype, char jobz, char uplo, magma_int_t n, |
406 |
- cuFloatComplex *a, magma_int_t lda, cuFloatComplex *b, magma_int_t ldb, |
407 |
-@@ -324,7 +304,7 @@ |
408 |
- *(unsigned char *)trans = MagmaNoTrans; |
409 |
- } |
410 |
- |
411 |
-- Mymagma_ctrsm(MagmaLeft, uplo_[0], *trans, MagmaNonUnit, |
412 |
-+ magma_ctrsm(MagmaLeft, uplo_[0], *trans, MagmaNonUnit, |
413 |
- n, n, c_one, db, lddb, da, ldda); |
414 |
- |
415 |
- } else if (itype == 3) |
416 |
-@@ -337,7 +317,7 @@ |
417 |
- *(unsigned char *)trans = MagmaConjTrans; |
418 |
- } |
419 |
- |
420 |
-- Mymagma_ctrmm(MagmaLeft, uplo_[0], *trans, MagmaNonUnit, |
421 |
-+ magma_ctrmm(MagmaLeft, uplo_[0], *trans, MagmaNonUnit, |
422 |
- n, n, c_one, db, lddb, da, ldda); |
423 |
- } |
424 |
- |
425 |
-diff -Nur src.orig/chegvdx.cpp src/chegvdx.cpp |
426 |
---- src.orig/chegvdx.cpp 2012-06-23 21:52:09.000000000 +0100 |
427 |
-+++ src/chegvdx.cpp 2012-06-25 06:59:15.000000000 +0100 |
428 |
-@@ -12,22 +12,6 @@ |
429 |
- */ |
430 |
- #include "common_magma.h" |
431 |
- |
432 |
--void Mymagma_ctrmm(char side, char uplo, char trans, char unit, magma_int_t n, magma_int_t m, |
433 |
-- cuFloatComplex alpha, cuFloatComplex *db, magma_int_t lddb, |
434 |
-- cuFloatComplex *dz, magma_int_t lddz) |
435 |
--{ |
436 |
-- magma_ctrmm(side, uplo, trans, unit, n, m, alpha, db, lddb, dz, lddz); |
437 |
-- magma_device_sync(); |
438 |
--} |
439 |
-- |
440 |
--void Mymagma_ctrsm(char side, char uplo, char trans, char unit, magma_int_t n, magma_int_t m, |
441 |
-- cuFloatComplex alpha, cuFloatComplex *db, magma_int_t lddb, |
442 |
-- cuFloatComplex *dz, magma_int_t lddz) |
443 |
--{ |
444 |
-- magma_ctrsm(side, uplo, trans, unit, n, m, alpha, db, lddb, dz, lddz); |
445 |
-- magma_device_sync(); |
446 |
--} |
447 |
-- |
448 |
- extern "C" magma_int_t |
449 |
- magma_chegvdx(magma_int_t itype, char jobz, char range, char uplo, magma_int_t n, |
450 |
- cuFloatComplex *a, magma_int_t lda, cuFloatComplex *b, magma_int_t ldb, |
451 |
-@@ -363,7 +347,7 @@ |
452 |
- *(unsigned char *)trans = MagmaNoTrans; |
453 |
- } |
454 |
- |
455 |
-- Mymagma_ctrsm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, db, lddb, da, ldda); |
456 |
-+ magma_ctrsm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, db, lddb, da, ldda); |
457 |
- |
458 |
- } else if (itype == 3) { |
459 |
- |
460 |
-@@ -375,7 +359,7 @@ |
461 |
- *(unsigned char *)trans = MagmaConjTrans; |
462 |
- } |
463 |
- |
464 |
-- Mymagma_ctrmm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, db, lddb, da, ldda); |
465 |
-+ magma_ctrmm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, db, lddb, da, ldda); |
466 |
- |
467 |
- } |
468 |
- |
469 |
-diff -Nur src.orig/chegvr.cpp src/chegvr.cpp |
470 |
---- src.orig/chegvr.cpp 2012-06-23 21:52:09.000000000 +0100 |
471 |
-+++ src/chegvr.cpp 2012-06-25 06:59:15.000000000 +0100 |
472 |
-@@ -12,24 +12,6 @@ |
473 |
- */ |
474 |
- #include "common_magma.h" |
475 |
- |
476 |
--void Mymagma_ctrmm(char side, char uplo, char trans, char unit, |
477 |
-- magma_int_t n, magma_int_t m, |
478 |
-- cuFloatComplex alpha, cuFloatComplex *db, magma_int_t lddb, |
479 |
-- cuFloatComplex *dz, magma_int_t lddz) |
480 |
--{ |
481 |
-- magma_ctrmm(side, uplo, trans, unit, n, m, alpha, db, lddb, dz, lddz); |
482 |
-- magma_device_sync(); |
483 |
--} |
484 |
-- |
485 |
--void Mymagma_ctrsm(char side, char uplo, char trans, char unit, |
486 |
-- magma_int_t n, magma_int_t m, |
487 |
-- cuFloatComplex alpha, cuFloatComplex *db, magma_int_t lddb, |
488 |
-- cuFloatComplex *dz, magma_int_t lddz) |
489 |
--{ |
490 |
-- magma_ctrsm(side, uplo, trans, unit, n, m, alpha, db, lddb, dz, lddz); |
491 |
-- magma_device_sync(); |
492 |
--} |
493 |
-- |
494 |
- extern "C" magma_int_t |
495 |
- magma_chegvr(magma_int_t itype, char jobz, char range, char uplo, magma_int_t n, |
496 |
- cuFloatComplex *a, magma_int_t lda, cuFloatComplex *b, magma_int_t ldb, |
497 |
-@@ -408,7 +390,7 @@ |
498 |
- *(unsigned char *)trans = MagmaNoTrans; |
499 |
- } |
500 |
- |
501 |
-- Mymagma_ctrsm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, |
502 |
-+ magma_ctrsm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, |
503 |
- db, lddb, dz, lddz); |
504 |
- |
505 |
- } else if (itype == 3) { |
506 |
-@@ -421,7 +403,7 @@ |
507 |
- *(unsigned char *)trans = MagmaConjTrans; |
508 |
- } |
509 |
- |
510 |
-- Mymagma_ctrmm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, |
511 |
-+ magma_ctrmm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, |
512 |
- db, lddb, dz, lddz); |
513 |
- } |
514 |
- |
515 |
-diff -Nur src.orig/chegvx.cpp src/chegvx.cpp |
516 |
---- src.orig/chegvx.cpp 2012-06-23 21:52:09.000000000 +0100 |
517 |
-+++ src/chegvx.cpp 2012-06-25 06:59:15.000000000 +0100 |
518 |
-@@ -12,22 +12,6 @@ |
519 |
- */ |
520 |
- #include "common_magma.h" |
521 |
- |
522 |
--void Mymagma_ctrmm(char side, char uplo, char trans, char unit, magma_int_t n, magma_int_t m, |
523 |
-- cuFloatComplex alpha, cuFloatComplex *db, magma_int_t lddb, |
524 |
-- cuFloatComplex *dz, magma_int_t lddz) |
525 |
--{ |
526 |
-- magma_ctrmm(side, uplo, trans, unit, n, m, alpha, db, lddb, dz, lddz); |
527 |
-- magma_device_sync(); |
528 |
--} |
529 |
-- |
530 |
--void Mymagma_ctrsm(char side, char uplo, char trans, char unit, magma_int_t n, magma_int_t m, |
531 |
-- cuFloatComplex alpha, cuFloatComplex *db, magma_int_t lddb, |
532 |
-- cuFloatComplex *dz, magma_int_t lddz) |
533 |
--{ |
534 |
-- magma_ctrsm(side, uplo, trans, unit, n, m, alpha, db, lddb, dz, lddz); |
535 |
-- magma_device_sync(); |
536 |
--} |
537 |
-- |
538 |
- extern "C" magma_int_t |
539 |
- magma_chegvx(magma_int_t itype, char jobz, char range, char uplo, magma_int_t n, |
540 |
- cuFloatComplex *a, magma_int_t lda, cuFloatComplex *b, magma_int_t ldb, |
541 |
-@@ -330,7 +314,7 @@ |
542 |
- *(unsigned char *)trans = MagmaNoTrans; |
543 |
- } |
544 |
- |
545 |
-- Mymagma_ctrsm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, db, lddb, dz, lddz); |
546 |
-+ magma_ctrsm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, db, lddb, dz, lddz); |
547 |
- |
548 |
- } else if (itype == 3) { |
549 |
- |
550 |
-@@ -343,7 +327,7 @@ |
551 |
- *(unsigned char *)trans = MagmaConjTrans; |
552 |
- } |
553 |
- |
554 |
-- Mymagma_ctrmm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, db, lddb, dz, lddz); |
555 |
-+ magma_ctrmm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, db, lddb, dz, lddz); |
556 |
- |
557 |
- } |
558 |
- |
559 |
-diff -Nur src.orig/cstedx_m.cpp src/cstedx_m.cpp |
560 |
---- src.orig/cstedx_m.cpp 2012-06-23 21:52:09.000000000 +0100 |
561 |
-+++ src/cstedx_m.cpp 2012-06-25 06:59:15.000000000 +0100 |
562 |
-@@ -16,10 +16,7 @@ |
563 |
- float* work, magma_int_t lwork, magma_int_t* iwork, magma_int_t liwork, |
564 |
- magma_int_t* info); |
565 |
- |
566 |
-- magma_int_t get_cstedx_smlsize() |
567 |
-- { |
568 |
-- return 25; |
569 |
-- } |
570 |
-+ magma_int_t get_cstedx_smlsize(); |
571 |
- } |
572 |
- |
573 |
- extern "C" magma_int_t |
574 |
-diff -Nur src.orig/dgeqrf_mgpu-trace.cpp src/dgeqrf_mgpu-trace.cpp |
575 |
---- src.orig/dgeqrf_mgpu-trace.cpp 2012-06-23 21:52:09.000000000 +0100 |
576 |
-+++ src/dgeqrf_mgpu-trace.cpp 2012-06-25 06:59:15.000000000 +0100 |
577 |
-@@ -1,4 +1,4 @@ |
578 |
--/* |
579 |
-+/*5A5A5A |
580 |
- -- MAGMA (version 1.2.0) -- |
581 |
- Univ. of Tennessee, Knoxville |
582 |
- Univ. of California, Berkeley |
583 |
-@@ -16,7 +16,7 @@ |
584 |
- #include <sys/time.h> |
585 |
- #include <assert.h> |
586 |
- |
587 |
--double get_current_cpu_time(void) |
588 |
-+double get_current_cpu_time_double(void) |
589 |
- { |
590 |
- struct timeval time_val; |
591 |
- |
592 |
-@@ -30,35 +30,35 @@ |
593 |
- //#define MAX_EVENTS 163840 |
594 |
- #define MAX_EVENTS 1048576 |
595 |
- |
596 |
--int event_num [MAX_THREADS] __attribute__ ((aligned (128))); |
597 |
--double event_start_time [MAX_THREADS] __attribute__ ((aligned (128))); |
598 |
--double event_end_time [MAX_THREADS] __attribute__ ((aligned (128))); |
599 |
--double event_log [MAX_THREADS][MAX_EVENTS] __attribute__ ((aligned (128))); |
600 |
--int log_events = 1; |
601 |
-+extern int event_num [MAX_THREADS] __attribute__ ((aligned (128))); |
602 |
-+double event_start_time_double [MAX_THREADS] __attribute__ ((aligned (128))); |
603 |
-+double event_end_time_double [MAX_THREADS] __attribute__ ((aligned (128))); |
604 |
-+double event_log_double [MAX_THREADS][MAX_EVENTS] __attribute__ ((aligned (128))); |
605 |
-+extern int log_events; |
606 |
- |
607 |
- #define core_cpu_event_start(my_core_id) \ |
608 |
-- event_start_time[my_core_id] = get_current_cpu_time(); \ |
609 |
-+ event_start_time_double[my_core_id] = get_current_cpu_time_double(); \ |
610 |
- |
611 |
- #define core_cpu_event_end(my_core_id) \ |
612 |
-- event_end_time[my_core_id] = get_current_cpu_time(); \ |
613 |
-+ event_end_time_double[my_core_id] = get_current_cpu_time_double(); \ |
614 |
- |
615 |
- #define core_gpu_event_start(my_core_id, e1, e2) \ |
616 |
- cudaEventElapsedTime(&ctime, e1, e2); \ |
617 |
-- event_start_time[my_core_id] = ctime/1000.+dtime; \ |
618 |
-+ event_start_time_double[my_core_id] = ctime/1000.+dtime; \ |
619 |
- |
620 |
- #define core_gpu_event_end(my_core_id, e1, e2) \ |
621 |
- cudaEventElapsedTime(&ctime, e1, e2); \ |
622 |
-- event_end_time[my_core_id] = ctime/1000.+dtime; \ |
623 |
-+ event_end_time_double[my_core_id] = ctime/1000.+dtime; \ |
624 |
- |
625 |
- #define core_log_event(event, my_core_id) \ |
626 |
-- event_log[my_core_id][event_num[my_core_id]+0] = my_core_id;\ |
627 |
-- event_log[my_core_id][event_num[my_core_id]+1] = event_start_time[my_core_id];\ |
628 |
-- event_log[my_core_id][event_num[my_core_id]+2] = event_end_time[my_core_id];\ |
629 |
-- event_log[my_core_id][event_num[my_core_id]+3] = (event);\ |
630 |
-+ event_log_double[my_core_id][event_num[my_core_id]+0] = my_core_id;\ |
631 |
-+ event_log_double[my_core_id][event_num[my_core_id]+1] = event_start_time_double[my_core_id];\ |
632 |
-+ event_log_double[my_core_id][event_num[my_core_id]+2] = event_end_time_double[my_core_id];\ |
633 |
-+ event_log_double[my_core_id][event_num[my_core_id]+3] = (event);\ |
634 |
- event_num[my_core_id] += (log_events << 2); \ |
635 |
- event_num[my_core_id] &= (MAX_EVENTS-1); |
636 |
- |
637 |
--void dump_trace(int cores_num) |
638 |
-+void dump_trace_double(int cores_num) |
639 |
- { |
640 |
- char trace_file_name[32]; |
641 |
- FILE *trace_file; |
642 |
-@@ -82,13 +82,13 @@ |
643 |
- for (core = 0; core < cores_num; core++) |
644 |
- for (event = 0; event < event_num[core]; event += 4) |
645 |
- { |
646 |
-- int thread = event_log[core][event+0]; |
647 |
-- double start = event_log[core][event+1]; |
648 |
-- double end = event_log[core][event+2]; |
649 |
-- int color = event_log[core][event+3]; |
650 |
-+ int thread = event_log_double[core][event+0]; |
651 |
-+ double start = event_log_double[core][event+1]; |
652 |
-+ double end = event_log_double[core][event+2]; |
653 |
-+ int color = event_log_double[core][event+3]; |
654 |
- |
655 |
-- start -= event_log[core][2]; |
656 |
-- end -= event_log[core][2]; |
657 |
-+ start -= event_log_double[core][2]; |
658 |
-+ end -= event_log_double[core][2]; |
659 |
- /* |
660 |
- fprintf(trace_file, |
661 |
- " " |
662 |
-@@ -279,7 +279,7 @@ |
663 |
- core_cpu_event_end(num_gpus); |
664 |
- core_log_event(0x666666, num_gpus); |
665 |
- |
666 |
-- dtime = get_current_cpu_time(); |
667 |
-+ dtime = get_current_cpu_time_double(); |
668 |
- |
669 |
- for(j=0; j<num_gpus; j++){ |
670 |
- magma_setdevice(j); |
671 |
-@@ -536,7 +536,7 @@ |
672 |
- } |
673 |
- |
674 |
- magma_setdevice(cdevice); |
675 |
-- dump_trace(num_gpus+1); |
676 |
-+ dump_trace_double(num_gpus+1); |
677 |
- |
678 |
- return *info; |
679 |
- } /* magma_dgeqrf2_mgpu */ |
680 |
-diff -Nur src.orig/dlaex0_m.cpp src/dlaex0_m.cpp |
681 |
---- src.orig/dlaex0_m.cpp 2012-06-23 21:52:09.000000000 +0100 |
682 |
-+++ src/dlaex0_m.cpp 2012-06-25 06:59:15.000000000 +0100 |
683 |
-@@ -23,10 +23,7 @@ |
684 |
- |
685 |
- int magma_get_dlaex3_m_nb(); |
686 |
- |
687 |
-- magma_int_t get_dlaex0_smlsize() |
688 |
-- { |
689 |
-- return 25; |
690 |
-- } |
691 |
-+ magma_int_t get_dlaex0_smlsize(); |
692 |
- } |
693 |
- |
694 |
- extern "C" magma_int_t |
695 |
-diff -Nur src.orig/dlaex3_m.cpp src/dlaex3_m.cpp |
696 |
---- src.orig/dlaex3_m.cpp 2012-06-23 21:52:09.000000000 +0100 |
697 |
-+++ src/dlaex3_m.cpp 2012-06-25 06:59:15.000000000 +0100 |
698 |
-@@ -36,7 +36,7 @@ |
699 |
- } |
700 |
- |
701 |
- extern"C"{ |
702 |
-- void dvrange(magma_int_t k, double *d, magma_int_t *il, magma_int_t *iu, double vl, double vu) |
703 |
-+ void ddvrange(magma_int_t k, double *d, magma_int_t *il, magma_int_t *iu, double vl, double vu) |
704 |
- { |
705 |
- magma_int_t i; |
706 |
- |
707 |
-@@ -53,7 +53,7 @@ |
708 |
- return; |
709 |
- } |
710 |
- |
711 |
-- void dirange(magma_int_t k, magma_int_t* indxq, magma_int_t *iil, magma_int_t *iiu, magma_int_t il, magma_int_t iu) |
712 |
-+ void ddirange(magma_int_t k, magma_int_t* indxq, magma_int_t *iil, magma_int_t *iiu, magma_int_t il, magma_int_t iu) |
713 |
- { |
714 |
- magma_int_t i; |
715 |
- |
716 |
-@@ -364,9 +364,9 @@ |
717 |
- |
718 |
- //compute the lower and upper bound of the non-deflated eigenvectors |
719 |
- if (valeig) |
720 |
-- dvrange(k, d, &iil, &iiu, vl, vu); |
721 |
-+ ddvrange(k, d, &iil, &iiu, vl, vu); |
722 |
- else if (indeig) |
723 |
-- dirange(k, indxq, &iil, &iiu, il, iu); |
724 |
-+ ddirange(k, indxq, &iil, &iiu, il, iu); |
725 |
- else { |
726 |
- iil = 1; |
727 |
- iiu = k; |
728 |
-diff -Nur src.orig/dstedx_m.cpp src/dstedx_m.cpp |
729 |
---- src.orig/dstedx_m.cpp 2012-06-23 21:52:09.000000000 +0100 |
730 |
-+++ src/dstedx_m.cpp 2012-06-25 06:59:15.000000000 +0100 |
731 |
-@@ -26,10 +26,7 @@ |
732 |
- |
733 |
- double lapackf77_dlanst( char* norm, magma_int_t* n, double* d, double* e); |
734 |
- |
735 |
-- magma_int_t get_dstedx_smlsize() |
736 |
-- { |
737 |
-- return 25; |
738 |
-- } |
739 |
-+ magma_int_t get_dstedx_smlsize(); |
740 |
- } |
741 |
- |
742 |
- extern "C" magma_int_t |
743 |
-diff -Nur src.orig/sgeqrf_mgpu-trace.cpp src/sgeqrf_mgpu-trace.cpp |
744 |
---- src.orig/sgeqrf_mgpu-trace.cpp 2012-06-23 21:52:09.000000000 +0100 |
745 |
-+++ src/sgeqrf_mgpu-trace.cpp 2012-06-25 06:59:15.000000000 +0100 |
746 |
-@@ -16,7 +16,7 @@ |
747 |
- #include <sys/time.h> |
748 |
- #include <assert.h> |
749 |
- |
750 |
--float get_current_cpu_time(void) |
751 |
-+float get_current_cpu_time_float(void) |
752 |
- { |
753 |
- struct timeval time_val; |
754 |
- |
755 |
-@@ -31,34 +31,34 @@ |
756 |
- #define MAX_EVENTS 1048576 |
757 |
- |
758 |
- int event_num [MAX_THREADS] __attribute__ ((aligned (128))); |
759 |
--float event_start_time [MAX_THREADS] __attribute__ ((aligned (128))); |
760 |
--float event_end_time [MAX_THREADS] __attribute__ ((aligned (128))); |
761 |
--float event_log [MAX_THREADS][MAX_EVENTS] __attribute__ ((aligned (128))); |
762 |
-+float event_start_time_float [MAX_THREADS] __attribute__ ((aligned (128))); |
763 |
-+float event_end_time_float [MAX_THREADS] __attribute__ ((aligned (128))); |
764 |
-+float event_log_float [MAX_THREADS][MAX_EVENTS] __attribute__ ((aligned (128))); |
765 |
- int log_events = 1; |
766 |
- |
767 |
- #define core_cpu_event_start(my_core_id) \ |
768 |
-- event_start_time[my_core_id] = get_current_cpu_time(); \ |
769 |
-+ event_start_time_float[my_core_id] = get_current_cpu_time_float(); \ |
770 |
- |
771 |
- #define core_cpu_event_end(my_core_id) \ |
772 |
-- event_end_time[my_core_id] = get_current_cpu_time(); \ |
773 |
-+ event_end_time_float[my_core_id] = get_current_cpu_time_float(); \ |
774 |
- |
775 |
- #define core_gpu_event_start(my_core_id, e1, e2) \ |
776 |
- cudaEventElapsedTime(&ctime, e1, e2); \ |
777 |
-- event_start_time[my_core_id] = ctime/1000.+dtime; \ |
778 |
-+ event_start_time_float[my_core_id] = ctime/1000.+dtime; \ |
779 |
- |
780 |
- #define core_gpu_event_end(my_core_id, e1, e2) \ |
781 |
- cudaEventElapsedTime(&ctime, e1, e2); \ |
782 |
-- event_end_time[my_core_id] = ctime/1000.+dtime; \ |
783 |
-+ event_end_time_float[my_core_id] = ctime/1000.+dtime; \ |
784 |
- |
785 |
- #define core_log_event(event, my_core_id) \ |
786 |
-- event_log[my_core_id][event_num[my_core_id]+0] = my_core_id;\ |
787 |
-- event_log[my_core_id][event_num[my_core_id]+1] = event_start_time[my_core_id];\ |
788 |
-- event_log[my_core_id][event_num[my_core_id]+2] = event_end_time[my_core_id];\ |
789 |
-- event_log[my_core_id][event_num[my_core_id]+3] = (event);\ |
790 |
-+ event_log_float[my_core_id][event_num[my_core_id]+0] = my_core_id;\ |
791 |
-+ event_log_float[my_core_id][event_num[my_core_id]+1] = event_start_time_float[my_core_id];\ |
792 |
-+ event_log_float[my_core_id][event_num[my_core_id]+2] = event_end_time_float[my_core_id];\ |
793 |
-+ event_log_float[my_core_id][event_num[my_core_id]+3] = (event);\ |
794 |
- event_num[my_core_id] += (log_events << 2); \ |
795 |
- event_num[my_core_id] &= (MAX_EVENTS-1); |
796 |
- |
797 |
--void dump_trace(int cores_num) |
798 |
-+void dump_trace_float(int cores_num) |
799 |
- { |
800 |
- char trace_file_name[32]; |
801 |
- FILE *trace_file; |
802 |
-@@ -82,13 +82,13 @@ |
803 |
- for (core = 0; core < cores_num; core++) |
804 |
- for (event = 0; event < event_num[core]; event += 4) |
805 |
- { |
806 |
-- int thread = event_log[core][event+0]; |
807 |
-- float start = event_log[core][event+1]; |
808 |
-- float end = event_log[core][event+2]; |
809 |
-- int color = event_log[core][event+3]; |
810 |
-+ int thread = event_log_float[core][event+0]; |
811 |
-+ float start = event_log_float[core][event+1]; |
812 |
-+ float end = event_log_float[core][event+2]; |
813 |
-+ int color = event_log_float[core][event+3]; |
814 |
- |
815 |
-- start -= event_log[core][2]; |
816 |
-- end -= event_log[core][2]; |
817 |
-+ start -= event_log_float[core][2]; |
818 |
-+ end -= event_log_float[core][2]; |
819 |
- /* |
820 |
- fprintf(trace_file, |
821 |
- " " |
822 |
-@@ -279,7 +279,7 @@ |
823 |
- core_cpu_event_end(num_gpus); |
824 |
- core_log_event(0x666666, num_gpus); |
825 |
- |
826 |
-- dtime = get_current_cpu_time(); |
827 |
-+ dtime = get_current_cpu_time_float(); |
828 |
- |
829 |
- for(j=0; j<num_gpus; j++){ |
830 |
- magma_setdevice(j); |
831 |
-@@ -536,7 +536,7 @@ |
832 |
- } |
833 |
- |
834 |
- magma_setdevice(cdevice); |
835 |
-- dump_trace(num_gpus+1); |
836 |
-+ dump_trace_float(num_gpus+1); |
837 |
- |
838 |
- return *info; |
839 |
- } /* magma_sgeqrf2_mgpu */ |
840 |
-diff -Nur src.orig/slaex0_m.cpp src/slaex0_m.cpp |
841 |
---- src.orig/slaex0_m.cpp 2012-06-23 21:52:09.000000000 +0100 |
842 |
-+++ src/slaex0_m.cpp 2012-06-25 06:59:15.000000000 +0100 |
843 |
-@@ -23,10 +23,7 @@ |
844 |
- |
845 |
- int magma_get_slaex3_m_nb(); |
846 |
- |
847 |
-- magma_int_t get_slaex0_smlsize() |
848 |
-- { |
849 |
-- return 25; |
850 |
-- } |
851 |
-+ magma_int_t get_slaex0_smlsize(); |
852 |
- } |
853 |
- |
854 |
- extern "C" magma_int_t |
855 |
-diff -Nur src.orig/slaex3_m.cpp src/slaex3_m.cpp |
856 |
---- src.orig/slaex3_m.cpp 2012-06-23 21:52:09.000000000 +0100 |
857 |
-+++ src/slaex3_m.cpp 2012-06-25 06:59:15.000000000 +0100 |
858 |
-@@ -36,7 +36,7 @@ |
859 |
- } |
860 |
- |
861 |
- extern"C"{ |
862 |
-- void dvrange(magma_int_t k, float *d, magma_int_t *il, magma_int_t *iu, float vl, float vu) |
863 |
-+ void sdvrange(magma_int_t k, float *d, magma_int_t *il, magma_int_t *iu, float vl, float vu) |
864 |
- { |
865 |
- magma_int_t i; |
866 |
- |
867 |
-@@ -53,7 +53,7 @@ |
868 |
- return; |
869 |
- } |
870 |
- |
871 |
-- void dirange(magma_int_t k, magma_int_t* indxq, magma_int_t *iil, magma_int_t *iiu, magma_int_t il, magma_int_t iu) |
872 |
-+ void sdirange(magma_int_t k, magma_int_t* indxq, magma_int_t *iil, magma_int_t *iiu, magma_int_t il, magma_int_t iu) |
873 |
- { |
874 |
- magma_int_t i; |
875 |
- |
876 |
-@@ -364,9 +364,9 @@ |
877 |
- |
878 |
- //compute the lower and upper bound of the non-deflated eigenvectors |
879 |
- if (valeig) |
880 |
-- dvrange(k, d, &iil, &iiu, vl, vu); |
881 |
-+ sdvrange(k, d, &iil, &iiu, vl, vu); |
882 |
- else if (indeig) |
883 |
-- dirange(k, indxq, &iil, &iiu, il, iu); |
884 |
-+ sdirange(k, indxq, &iil, &iiu, il, iu); |
885 |
- else { |
886 |
- iil = 1; |
887 |
- iiu = k; |
888 |
-diff -Nur src.orig/sstedx_m.cpp src/sstedx_m.cpp |
889 |
---- src.orig/sstedx_m.cpp 2012-06-23 21:52:09.000000000 +0100 |
890 |
-+++ src/sstedx_m.cpp 2012-06-25 06:59:15.000000000 +0100 |
891 |
-@@ -26,10 +26,7 @@ |
892 |
- |
893 |
- float lapackf77_slanst( char* norm, magma_int_t* n, float* d, float* e); |
894 |
- |
895 |
-- magma_int_t get_sstedx_smlsize() |
896 |
-- { |
897 |
-- return 25; |
898 |
-- } |
899 |
-+ magma_int_t get_sstedx_smlsize(); |
900 |
- } |
901 |
- |
902 |
- extern "C" magma_int_t |
903 |
-diff -Nur src.orig/zgeqrf_mgpu-trace.cpp src/zgeqrf_mgpu-trace.cpp |
904 |
---- src.orig/zgeqrf_mgpu-trace.cpp 2012-06-23 21:52:09.000000000 +0100 |
905 |
-+++ src/zgeqrf_mgpu-trace.cpp 2012-06-25 17:20:18.000000000 +0100 |
906 |
-@@ -16,108 +16,42 @@ |
907 |
- #include <sys/time.h> |
908 |
- #include <assert.h> |
909 |
- |
910 |
--double get_current_cpu_time(void) |
911 |
--{ |
912 |
-- struct timeval time_val; |
913 |
-- |
914 |
-- gettimeofday(&time_val, NULL); |
915 |
-- |
916 |
-- return (double)(time_val.tv_sec) + (double)(time_val.tv_usec) / 1000000.0; |
917 |
--} |
918 |
-+extern double get_current_cpu_time_double(void); |
919 |
- |
920 |
- #define MAX_THREADS 5 |
921 |
- |
922 |
- //#define MAX_EVENTS 163840 |
923 |
- #define MAX_EVENTS 1048576 |
924 |
- |
925 |
--int event_num [MAX_THREADS] __attribute__ ((aligned (128))); |
926 |
--double event_start_time [MAX_THREADS] __attribute__ ((aligned (128))); |
927 |
--double event_end_time [MAX_THREADS] __attribute__ ((aligned (128))); |
928 |
--double event_log [MAX_THREADS][MAX_EVENTS] __attribute__ ((aligned (128))); |
929 |
--int log_events = 1; |
930 |
-+extern int event_num [MAX_THREADS] __attribute__ ((aligned (128))); |
931 |
-+extern double event_start_time_double [MAX_THREADS] __attribute__ ((aligned (128))); |
932 |
-+extern double event_end_time_double [MAX_THREADS] __attribute__ ((aligned (128))); |
933 |
-+extern double event_log_double [MAX_THREADS][MAX_EVENTS] __attribute__ ((aligned (128))); |
934 |
-+extern int log_events; |
935 |
- |
936 |
- #define core_cpu_event_start(my_core_id) \ |
937 |
-- event_start_time[my_core_id] = get_current_cpu_time(); \ |
938 |
-+ event_start_time_double[my_core_id] = get_current_cpu_time_double(); \ |
939 |
- |
940 |
- #define core_cpu_event_end(my_core_id) \ |
941 |
-- event_end_time[my_core_id] = get_current_cpu_time(); \ |
942 |
-+ event_end_time_double[my_core_id] = get_current_cpu_time_double(); \ |
943 |
- |
944 |
- #define core_gpu_event_start(my_core_id, e1, e2) \ |
945 |
- cudaEventElapsedTime(&ctime, e1, e2); \ |
946 |
-- event_start_time[my_core_id] = ctime/1000.+dtime; \ |
947 |
-+ event_start_time_double[my_core_id] = ctime/1000.+dtime; \ |
948 |
- |
949 |
- #define core_gpu_event_end(my_core_id, e1, e2) \ |
950 |
- cudaEventElapsedTime(&ctime, e1, e2); \ |
951 |
-- event_end_time[my_core_id] = ctime/1000.+dtime; \ |
952 |
-+ event_end_time_double[my_core_id] = ctime/1000.+dtime; \ |
953 |
- |
954 |
- #define core_log_event(event, my_core_id) \ |
955 |
-- event_log[my_core_id][event_num[my_core_id]+0] = my_core_id;\ |
956 |
-- event_log[my_core_id][event_num[my_core_id]+1] = event_start_time[my_core_id];\ |
957 |
-- event_log[my_core_id][event_num[my_core_id]+2] = event_end_time[my_core_id];\ |
958 |
-- event_log[my_core_id][event_num[my_core_id]+3] = (event);\ |
959 |
-+ event_log_double[my_core_id][event_num[my_core_id]+0] = my_core_id;\ |
960 |
-+ event_log_double[my_core_id][event_num[my_core_id]+1] = event_start_time_double[my_core_id];\ |
961 |
-+ event_log_double[my_core_id][event_num[my_core_id]+2] = event_end_time_double[my_core_id];\ |
962 |
-+ event_log_double[my_core_id][event_num[my_core_id]+3] = (event);\ |
963 |
- event_num[my_core_id] += (log_events << 2); \ |
964 |
- event_num[my_core_id] &= (MAX_EVENTS-1); |
965 |
- |
966 |
--void dump_trace(int cores_num) |
967 |
--{ |
968 |
-- char trace_file_name[32]; |
969 |
-- FILE *trace_file; |
970 |
-- int event; |
971 |
-- int core; |
972 |
-- |
973 |
-- double scale = 100000.0; |
974 |
-- double large = 100.0; |
975 |
-- |
976 |
-- sprintf(trace_file_name, "trace.svg"); |
977 |
-- trace_file = fopen(trace_file_name, "w"); |
978 |
-- assert(trace_file != NULL); |
979 |
-- |
980 |
-- fprintf(trace_file, |
981 |
-- "<?xml version=\"1.0\" standalone=\"no\"?>" |
982 |
-- "<svg version=\"1.1\" baseProfile=\"full\" xmlns=\"http://www.w3.org/2000/svg\" " |
983 |
-- "xmlns:xlink=\"http://www.w3.org/1999/xlink\" xmlns:ev=\"http://www.w3.org/2001/xml-events\" " |
984 |
-- ">\n" |
985 |
-- " <g font-size=\"20\">\n"); |
986 |
-- |
987 |
-- for (core = 0; core < cores_num; core++) |
988 |
-- for (event = 0; event < event_num[core]; event += 4) |
989 |
-- { |
990 |
-- int thread = event_log[core][event+0]; |
991 |
-- double start = event_log[core][event+1]; |
992 |
-- double end = event_log[core][event+2]; |
993 |
-- int color = event_log[core][event+3]; |
994 |
-- |
995 |
-- start -= event_log[core][2]; |
996 |
-- end -= event_log[core][2]; |
997 |
-- /* |
998 |
-- fprintf(trace_file, |
999 |
-- " " |
1000 |
-- "<rect x=\"%.2lf\" y=\"%.0lf\" width=\"%.2lf\" height=\"%.0lf\" " |
1001 |
-- "fill=\"#%06x\" stroke=\"#000000\" stroke-width=\"1\"/>\n", |
1002 |
-- start * scale, |
1003 |
-- thread * 100.0, |
1004 |
-- (end - start) * scale, |
1005 |
-- 90.0, |
1006 |
-- color); |
1007 |
-- */ |
1008 |
-- fprintf(trace_file, |
1009 |
-- " " |
1010 |
-- "<rect x=\"%.2lf\" y=\"%.0lf\" width=\"%.2lf\" height=\"%.0lf\" " |
1011 |
-- // "fill=\"#%06x\" />\n", |
1012 |
-- "fill=\"#%06x\" stroke=\"#000000\" stroke-width=\"1\"/>\n", |
1013 |
-- start * scale, |
1014 |
-- thread * (large+20.0), |
1015 |
-- (end - start) * scale, |
1016 |
-- large, |
1017 |
-- color); |
1018 |
-- } |
1019 |
-- |
1020 |
-- fprintf(trace_file, |
1021 |
-- " </g>\n" |
1022 |
-- "</svg>\n"); |
1023 |
-- |
1024 |
-- fclose(trace_file); |
1025 |
--} |
1026 |
-+extern void dump_trace_double(int cores_num); |
1027 |
- |
1028 |
- //=========================================================================== |
1029 |
- |
1030 |
-@@ -279,7 +213,7 @@ |
1031 |
- core_cpu_event_end(num_gpus); |
1032 |
- core_log_event(0x666666, num_gpus); |
1033 |
- |
1034 |
-- dtime = get_current_cpu_time(); |
1035 |
-+ dtime = get_current_cpu_time_double(); |
1036 |
- |
1037 |
- for(j=0; j<num_gpus; j++){ |
1038 |
- magma_setdevice(j); |
1039 |
-@@ -536,7 +470,7 @@ |
1040 |
- } |
1041 |
- |
1042 |
- magma_setdevice(cdevice); |
1043 |
-- dump_trace(num_gpus+1); |
1044 |
-+ dump_trace_double(num_gpus+1); |
1045 |
- |
1046 |
- return *info; |
1047 |
- } /* magma_zgeqrf2_mgpu */ |
1048 |
-diff -Nur src.orig/zheevr_gpu.cpp src/zheevr_gpu.cpp |
1049 |
---- src.orig/zheevr_gpu.cpp 2012-06-23 21:52:09.000000000 +0100 |
1050 |
-+++ src/zheevr_gpu.cpp 2012-06-25 06:59:15.000000000 +0100 |
1051 |
-@@ -12,36 +12,6 @@ |
1052 |
- */ |
1053 |
- #include "common_magma.h" |
1054 |
- |
1055 |
--/* These interfaces are used for TAU profiling */ |
1056 |
--extern "C" { |
1057 |
-- void Mylapackf77_zstemr(const char *jobz, const char *range, magma_int_t *n, double *d, double *e, |
1058 |
-- double *vl, double *vu, magma_int_t *il, magma_int_t *iu, |
1059 |
-- magma_int_t *m, double *w, cuDoubleComplex *z, magma_int_t *ldz, |
1060 |
-- magma_int_t *nzc, magma_int_t *isuppz, magma_int_t *tryrac, |
1061 |
-- double *work, magma_int_t *lwork, magma_int_t *iwork, |
1062 |
-- magma_int_t *liwork, magma_int_t *info) |
1063 |
-- { |
1064 |
-- lapackf77_zstemr(jobz, range, n, d, e, vl, vu, il, iu, m, w, z, ldz, nzc, |
1065 |
-- isuppz, tryrac, work, lwork, iwork, liwork, info); |
1066 |
-- } |
1067 |
-- |
1068 |
-- void Mylapackf77_zstein(int *n, double *d, double *e, int *m, double *w, int *iblock, |
1069 |
-- int *isplit, cuDoubleComplex *z, int *ldz, double *work, |
1070 |
-- int *iwork, int *ifail, int *info) |
1071 |
-- { |
1072 |
-- lapackf77_zstein(n, d, e, m, w, iblock, isplit, z, ldz, work, iwork, ifail, info); |
1073 |
-- } |
1074 |
-- |
1075 |
-- void Mylapackf77_dstebz(const char *range, const char *order, int *n, double *vl, |
1076 |
-- double *vu, int *il, int *iu, double *abstol, |
1077 |
-- double *d, double *e, int *m, int *nsplit, double *w, |
1078 |
-- int *iblock, int *isplit, double *work, int *iwork, int *info) |
1079 |
-- { |
1080 |
-- lapackf77_dstebz(range, order, n, vl, vu, il, iu, abstol, d, e, m, |
1081 |
-- nsplit, w, iblock, isplit, work, iwork,info); |
1082 |
-- } |
1083 |
--} |
1084 |
-- |
1085 |
- extern "C" { |
1086 |
- #ifdef ADD_ |
1087 |
- # define lapackf77_ieeeck ieeeck_ |
1088 |
-@@ -497,7 +467,7 @@ |
1089 |
- else |
1090 |
- tryrac=0; |
1091 |
- |
1092 |
-- Mylapackf77_zstemr(jobz_, range_, &n, &rwork[indrdd], &rwork[indree], &vl, &vu, &il, |
1093 |
-+ lapackf77_zstemr(jobz_, range_, &n, &rwork[indrdd], &rwork[indree], &vl, &vu, &il, |
1094 |
- &iu, m, &w[1], wz, &ldwz, &n, &isuppz[1], &tryrac, &rwork[indrwk], |
1095 |
- &llrwork, &iwork[1], &liwork, info); |
1096 |
- |
1097 |
-@@ -514,10 +484,10 @@ |
1098 |
- printf("B/I\n"); |
1099 |
- *info = 0; |
1100 |
- |
1101 |
-- Mylapackf77_dstebz(range_, "B", &n, &vl, &vu, &il, &iu, &abstol, &rwork[indrd], &rwork[indre], m, |
1102 |
-+ lapackf77_dstebz(range_, "B", &n, &vl, &vu, &il, &iu, &abstol, &rwork[indrd], &rwork[indre], m, |
1103 |
- &nsplit, &w[1], &iwork[indibl], &iwork[indisp], &rwork[indrwk], &iwork[indiwo], info); |
1104 |
- |
1105 |
-- Mylapackf77_zstein(&n, &rwork[indrd], &rwork[indre], m, &w[1], &iwork[indibl], &iwork[indisp], |
1106 |
-+ lapackf77_zstein(&n, &rwork[indrd], &rwork[indre], m, &w[1], &iwork[indibl], &iwork[indisp], |
1107 |
- wz, &ldwz, &rwork[indrwk], &iwork[indiwo], &iwork[indifl], info); |
1108 |
- |
1109 |
- /* Apply unitary matrix used in reduction to tridiagonal |
1110 |
-diff -Nur src.orig/zheevx_gpu.cpp src/zheevx_gpu.cpp |
1111 |
---- src.orig/zheevx_gpu.cpp 2012-06-23 21:52:09.000000000 +0100 |
1112 |
-+++ src/zheevx_gpu.cpp 2012-06-25 06:59:15.000000000 +0100 |
1113 |
-@@ -12,27 +12,6 @@ |
1114 |
- */ |
1115 |
- #include "common_magma.h" |
1116 |
- |
1117 |
--/* These interfaces are used for TAU profiling */ |
1118 |
--extern"C"{ |
1119 |
-- void Mylapackf77_zstein(magma_int_t *n, double *d, double *e, magma_int_t *m, |
1120 |
-- double *w, magma_int_t *iblock, magma_int_t *isplit, |
1121 |
-- cuDoubleComplex *z, magma_int_t *ldz, double *work, |
1122 |
-- magma_int_t *iwork, magma_int_t *ifail, magma_int_t *info) |
1123 |
-- { |
1124 |
-- lapackf77_zstein(n, d, e, m, w, iblock, isplit, z, ldz, work, iwork, ifail, info); |
1125 |
-- } |
1126 |
-- |
1127 |
-- void Mylapackf77_dstebz(char *range, char *order, magma_int_t *n, double *vl, |
1128 |
-- double *vu, magma_int_t *il, magma_int_t *iu, double *abstol, |
1129 |
-- double *d, double *e, magma_int_t *m, magma_int_t *nsplit, |
1130 |
-- double *w, magma_int_t *iblock, magma_int_t *isplit, |
1131 |
-- double *work, magma_int_t *iwork, magma_int_t *info) |
1132 |
-- { |
1133 |
-- lapackf77_dstebz(range, order, n, vl, vu, il, iu, abstol, |
1134 |
-- d, e, m, nsplit, w, iblock, isplit, work, iwork,info); |
1135 |
-- } |
1136 |
--} |
1137 |
-- |
1138 |
- extern "C" magma_int_t |
1139 |
- magma_zheevx_gpu(char jobz, char range, char uplo, magma_int_t n, |
1140 |
- cuDoubleComplex *da, magma_int_t ldda, double vl, double vu, |
1141 |
-@@ -404,12 +383,12 @@ |
1142 |
- indisp = indibl + n; |
1143 |
- indiwk = indisp + n; |
1144 |
- |
1145 |
-- Mylapackf77_dstebz(range_, order, &n, &vl, &vu, &il, &iu, &abstol, &rwork[indd], &rwork[inde], m, |
1146 |
-+ lapackf77_dstebz(range_, order, &n, &vl, &vu, &il, &iu, &abstol, &rwork[indd], &rwork[inde], m, |
1147 |
- &nsplit, &w[1], &iwork[indibl], &iwork[indisp], &rwork[indrwk], &iwork[indiwk], info); |
1148 |
- |
1149 |
- if (wantz) { |
1150 |
- |
1151 |
-- Mylapackf77_zstein(&n, &rwork[indd], &rwork[inde], m, &w[1], &iwork[indibl], &iwork[indisp], |
1152 |
-+ lapackf77_zstein(&n, &rwork[indd], &rwork[inde], m, &w[1], &iwork[indibl], &iwork[indisp], |
1153 |
- wz, &ldwz, &rwork[indrwk], &iwork[indiwk], &ifail[1], info); |
1154 |
- |
1155 |
- magma_zsetmatrix( n, *m, wz, ldwz, dz, lddz ); |
1156 |
-diff -Nur src.orig/zhegvd.cpp src/zhegvd.cpp |
1157 |
---- src.orig/zhegvd.cpp 2012-06-23 21:52:09.000000000 +0100 |
1158 |
-+++ src/zhegvd.cpp 2012-06-25 06:59:15.000000000 +0100 |
1159 |
-@@ -13,26 +13,6 @@ |
1160 |
- */ |
1161 |
- #include "common_magma.h" |
1162 |
- |
1163 |
--/* This ztrmm interface is used for TAU profiling */ |
1164 |
--void Mymagma_ztrmm(char side, char uplo, char trans, char unit, |
1165 |
-- magma_int_t n, magma_int_t m, |
1166 |
-- cuDoubleComplex alpha, cuDoubleComplex *db, magma_int_t lddb, |
1167 |
-- cuDoubleComplex *dz, magma_int_t lddz) |
1168 |
--{ |
1169 |
-- magma_ztrmm(side, uplo, trans, unit, n, m, alpha, db, lddb, dz, lddz); |
1170 |
-- magma_device_sync(); |
1171 |
--} |
1172 |
-- |
1173 |
--/* This ztrsm interface is used for TAU profiling */ |
1174 |
--void Mymagma_ztrsm(char side, char uplo, char trans, char unit, |
1175 |
-- magma_int_t n, magma_int_t m, |
1176 |
-- cuDoubleComplex alpha, cuDoubleComplex *db, magma_int_t lddb, |
1177 |
-- cuDoubleComplex *dz, magma_int_t lddz) |
1178 |
--{ |
1179 |
-- magma_ztrsm(side, uplo, trans, unit, n, m, alpha, db, lddb, dz, lddz); |
1180 |
-- magma_device_sync(); |
1181 |
--} |
1182 |
-- |
1183 |
- extern "C" magma_int_t |
1184 |
- magma_zhegvd(magma_int_t itype, char jobz, char uplo, magma_int_t n, |
1185 |
- cuDoubleComplex *a, magma_int_t lda, cuDoubleComplex *b, magma_int_t ldb, |
1186 |
-@@ -324,7 +304,7 @@ |
1187 |
- *(unsigned char *)trans = MagmaNoTrans; |
1188 |
- } |
1189 |
- |
1190 |
-- Mymagma_ztrsm(MagmaLeft, uplo_[0], *trans, MagmaNonUnit, |
1191 |
-+ magma_ztrsm(MagmaLeft, uplo_[0], *trans, MagmaNonUnit, |
1192 |
- n, n, c_one, db, lddb, da, ldda); |
1193 |
- |
1194 |
- } else if (itype == 3) |
1195 |
-@@ -337,7 +317,7 @@ |
1196 |
- *(unsigned char *)trans = MagmaConjTrans; |
1197 |
- } |
1198 |
- |
1199 |
-- Mymagma_ztrmm(MagmaLeft, uplo_[0], *trans, MagmaNonUnit, |
1200 |
-+ magma_ztrmm(MagmaLeft, uplo_[0], *trans, MagmaNonUnit, |
1201 |
- n, n, c_one, db, lddb, da, ldda); |
1202 |
- } |
1203 |
- |
1204 |
-diff -Nur src.orig/zhegvdx.cpp src/zhegvdx.cpp |
1205 |
---- src.orig/zhegvdx.cpp 2012-06-23 21:52:09.000000000 +0100 |
1206 |
-+++ src/zhegvdx.cpp 2012-06-25 06:59:15.000000000 +0100 |
1207 |
-@@ -12,22 +12,6 @@ |
1208 |
- */ |
1209 |
- #include "common_magma.h" |
1210 |
- |
1211 |
--void Mymagma_ztrmm(char side, char uplo, char trans, char unit, magma_int_t n, magma_int_t m, |
1212 |
-- cuDoubleComplex alpha, cuDoubleComplex *db, magma_int_t lddb, |
1213 |
-- cuDoubleComplex *dz, magma_int_t lddz) |
1214 |
--{ |
1215 |
-- magma_ztrmm(side, uplo, trans, unit, n, m, alpha, db, lddb, dz, lddz); |
1216 |
-- magma_device_sync(); |
1217 |
--} |
1218 |
-- |
1219 |
--void Mymagma_ztrsm(char side, char uplo, char trans, char unit, magma_int_t n, magma_int_t m, |
1220 |
-- cuDoubleComplex alpha, cuDoubleComplex *db, magma_int_t lddb, |
1221 |
-- cuDoubleComplex *dz, magma_int_t lddz) |
1222 |
--{ |
1223 |
-- magma_ztrsm(side, uplo, trans, unit, n, m, alpha, db, lddb, dz, lddz); |
1224 |
-- magma_device_sync(); |
1225 |
--} |
1226 |
-- |
1227 |
- extern "C" magma_int_t |
1228 |
- magma_zhegvdx(magma_int_t itype, char jobz, char range, char uplo, magma_int_t n, |
1229 |
- cuDoubleComplex *a, magma_int_t lda, cuDoubleComplex *b, magma_int_t ldb, |
1230 |
-@@ -363,7 +347,7 @@ |
1231 |
- *(unsigned char *)trans = MagmaNoTrans; |
1232 |
- } |
1233 |
- |
1234 |
-- Mymagma_ztrsm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, db, lddb, da, ldda); |
1235 |
-+ magma_ztrsm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, db, lddb, da, ldda); |
1236 |
- |
1237 |
- } else if (itype == 3) { |
1238 |
- |
1239 |
-@@ -375,7 +359,7 @@ |
1240 |
- *(unsigned char *)trans = MagmaConjTrans; |
1241 |
- } |
1242 |
- |
1243 |
-- Mymagma_ztrmm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, db, lddb, da, ldda); |
1244 |
-+ magma_ztrmm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, db, lddb, da, ldda); |
1245 |
- |
1246 |
- } |
1247 |
- |
1248 |
-diff -Nur src.orig/zhegvr.cpp src/zhegvr.cpp |
1249 |
---- src.orig/zhegvr.cpp 2012-06-23 21:52:09.000000000 +0100 |
1250 |
-+++ src/zhegvr.cpp 2012-06-25 06:59:15.000000000 +0100 |
1251 |
-@@ -12,24 +12,6 @@ |
1252 |
- */ |
1253 |
- #include "common_magma.h" |
1254 |
- |
1255 |
--void Mymagma_ztrmm(char side, char uplo, char trans, char unit, |
1256 |
-- magma_int_t n, magma_int_t m, |
1257 |
-- cuDoubleComplex alpha, cuDoubleComplex *db, magma_int_t lddb, |
1258 |
-- cuDoubleComplex *dz, magma_int_t lddz) |
1259 |
--{ |
1260 |
-- magma_ztrmm(side, uplo, trans, unit, n, m, alpha, db, lddb, dz, lddz); |
1261 |
-- magma_device_sync(); |
1262 |
--} |
1263 |
-- |
1264 |
--void Mymagma_ztrsm(char side, char uplo, char trans, char unit, |
1265 |
-- magma_int_t n, magma_int_t m, |
1266 |
-- cuDoubleComplex alpha, cuDoubleComplex *db, magma_int_t lddb, |
1267 |
-- cuDoubleComplex *dz, magma_int_t lddz) |
1268 |
--{ |
1269 |
-- magma_ztrsm(side, uplo, trans, unit, n, m, alpha, db, lddb, dz, lddz); |
1270 |
-- magma_device_sync(); |
1271 |
--} |
1272 |
-- |
1273 |
- extern "C" magma_int_t |
1274 |
- magma_zhegvr(magma_int_t itype, char jobz, char range, char uplo, magma_int_t n, |
1275 |
- cuDoubleComplex *a, magma_int_t lda, cuDoubleComplex *b, magma_int_t ldb, |
1276 |
-@@ -408,7 +390,7 @@ |
1277 |
- *(unsigned char *)trans = MagmaNoTrans; |
1278 |
- } |
1279 |
- |
1280 |
-- Mymagma_ztrsm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, |
1281 |
-+ magma_ztrsm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, |
1282 |
- db, lddb, dz, lddz); |
1283 |
- |
1284 |
- } else if (itype == 3) { |
1285 |
-@@ -421,7 +403,7 @@ |
1286 |
- *(unsigned char *)trans = MagmaConjTrans; |
1287 |
- } |
1288 |
- |
1289 |
-- Mymagma_ztrmm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, |
1290 |
-+ magma_ztrmm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, |
1291 |
- db, lddb, dz, lddz); |
1292 |
- } |
1293 |
- |
1294 |
-diff -Nur src.orig/zhegvx.cpp src/zhegvx.cpp |
1295 |
---- src.orig/zhegvx.cpp 2012-06-23 21:52:09.000000000 +0100 |
1296 |
-+++ src/zhegvx.cpp 2012-06-25 06:59:15.000000000 +0100 |
1297 |
-@@ -12,22 +12,6 @@ |
1298 |
- */ |
1299 |
- #include "common_magma.h" |
1300 |
- |
1301 |
--void Mymagma_ztrmm(char side, char uplo, char trans, char unit, magma_int_t n, magma_int_t m, |
1302 |
-- cuDoubleComplex alpha, cuDoubleComplex *db, magma_int_t lddb, |
1303 |
-- cuDoubleComplex *dz, magma_int_t lddz) |
1304 |
--{ |
1305 |
-- magma_ztrmm(side, uplo, trans, unit, n, m, alpha, db, lddb, dz, lddz); |
1306 |
-- magma_device_sync(); |
1307 |
--} |
1308 |
-- |
1309 |
--void Mymagma_ztrsm(char side, char uplo, char trans, char unit, magma_int_t n, magma_int_t m, |
1310 |
-- cuDoubleComplex alpha, cuDoubleComplex *db, magma_int_t lddb, |
1311 |
-- cuDoubleComplex *dz, magma_int_t lddz) |
1312 |
--{ |
1313 |
-- magma_ztrsm(side, uplo, trans, unit, n, m, alpha, db, lddb, dz, lddz); |
1314 |
-- magma_device_sync(); |
1315 |
--} |
1316 |
-- |
1317 |
- extern "C" magma_int_t |
1318 |
- magma_zhegvx(magma_int_t itype, char jobz, char range, char uplo, magma_int_t n, |
1319 |
- cuDoubleComplex *a, magma_int_t lda, cuDoubleComplex *b, magma_int_t ldb, |
1320 |
-@@ -330,7 +314,7 @@ |
1321 |
- *(unsigned char *)trans = MagmaNoTrans; |
1322 |
- } |
1323 |
- |
1324 |
-- Mymagma_ztrsm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, db, lddb, dz, lddz); |
1325 |
-+ magma_ztrsm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, db, lddb, dz, lddz); |
1326 |
- |
1327 |
- } else if (itype == 3) { |
1328 |
- |
1329 |
-@@ -343,7 +327,7 @@ |
1330 |
- *(unsigned char *)trans = MagmaConjTrans; |
1331 |
- } |
1332 |
- |
1333 |
-- Mymagma_ztrmm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, db, lddb, dz, lddz); |
1334 |
-+ magma_ztrmm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, db, lddb, dz, lddz); |
1335 |
- |
1336 |
- } |
1337 |
- |
1338 |
-diff -Nur src.orig/zstedx_m.cpp src/zstedx_m.cpp |
1339 |
---- src.orig/zstedx_m.cpp 2012-06-23 21:52:09.000000000 +0100 |
1340 |
-+++ src/zstedx_m.cpp 2012-06-25 06:59:15.000000000 +0100 |
1341 |
-@@ -16,10 +16,7 @@ |
1342 |
- double* work, magma_int_t lwork, magma_int_t* iwork, magma_int_t liwork, |
1343 |
- magma_int_t* info); |
1344 |
- |
1345 |
-- magma_int_t get_zstedx_smlsize() |
1346 |
-- { |
1347 |
-- return 25; |
1348 |
-- } |
1349 |
-+ magma_int_t get_zstedx_smlsize(); |
1350 |
- } |
1351 |
- |
1352 |
- extern "C" magma_int_t |
1353 |
|
1354 |
diff --git a/sci-libs/magma/files/magma-1.2.1-duplicate-symbols.patch b/sci-libs/magma/files/magma-1.2.1-duplicate-symbols.patch |
1355 |
new file mode 100644 |
1356 |
index 0000000..8fd2d0d |
1357 |
--- /dev/null |
1358 |
+++ b/sci-libs/magma/files/magma-1.2.1-duplicate-symbols.patch |
1359 |
@@ -0,0 +1,612 @@ |
1360 |
+diff -Nur magma-1.2.1.orig/src/cheevx_gpu.cpp magma-1.2.1/src/cheevx_gpu.cpp |
1361 |
+--- magma-1.2.1.orig/src/cheevx_gpu.cpp 2012-07-03 11:59:28.923659921 -0700 |
1362 |
++++ magma-1.2.1/src/cheevx_gpu.cpp 2012-07-03 11:59:45.428746349 -0700 |
1363 |
+@@ -12,27 +12,6 @@ |
1364 |
+ */ |
1365 |
+ #include "common_magma.h" |
1366 |
+ |
1367 |
+-/* These interfaces are used for TAU profiling */ |
1368 |
+-extern"C"{ |
1369 |
+- void Mylapackf77_cstein(magma_int_t *n, float *d, float *e, magma_int_t *m, |
1370 |
+- float *w, magma_int_t *iblock, magma_int_t *isplit, |
1371 |
+- cuFloatComplex *z, magma_int_t *ldz, float *work, |
1372 |
+- magma_int_t *iwork, magma_int_t *ifail, magma_int_t *info) |
1373 |
+- { |
1374 |
+- lapackf77_cstein(n, d, e, m, w, iblock, isplit, z, ldz, work, iwork, ifail, info); |
1375 |
+- } |
1376 |
+- |
1377 |
+- void Mylapackf77_sstebz(char *range, char *order, magma_int_t *n, float *vl, |
1378 |
+- float *vu, magma_int_t *il, magma_int_t *iu, float *abstol, |
1379 |
+- float *d, float *e, magma_int_t *m, magma_int_t *nsplit, |
1380 |
+- float *w, magma_int_t *iblock, magma_int_t *isplit, |
1381 |
+- float *work, magma_int_t *iwork, magma_int_t *info) |
1382 |
+- { |
1383 |
+- lapackf77_sstebz(range, order, n, vl, vu, il, iu, abstol, |
1384 |
+- d, e, m, nsplit, w, iblock, isplit, work, iwork,info); |
1385 |
+- } |
1386 |
+-} |
1387 |
+- |
1388 |
+ extern "C" magma_int_t |
1389 |
+ magma_cheevx_gpu(char jobz, char range, char uplo, magma_int_t n, |
1390 |
+ cuFloatComplex *da, magma_int_t ldda, float vl, float vu, |
1391 |
+@@ -404,12 +383,12 @@ |
1392 |
+ indisp = indibl + n; |
1393 |
+ indiwk = indisp + n; |
1394 |
+ |
1395 |
+- Mylapackf77_sstebz(range_, order, &n, &vl, &vu, &il, &iu, &abstol, &rwork[indd], &rwork[inde], m, |
1396 |
++ lapackf77_sstebz(range_, order, &n, &vl, &vu, &il, &iu, &abstol, &rwork[indd], &rwork[inde], m, |
1397 |
+ &nsplit, &w[1], &iwork[indibl], &iwork[indisp], &rwork[indrwk], &iwork[indiwk], info); |
1398 |
+ |
1399 |
+ if (wantz) { |
1400 |
+ |
1401 |
+- Mylapackf77_cstein(&n, &rwork[indd], &rwork[inde], m, &w[1], &iwork[indibl], &iwork[indisp], |
1402 |
++ lapackf77_cstein(&n, &rwork[indd], &rwork[inde], m, &w[1], &iwork[indibl], &iwork[indisp], |
1403 |
+ wz, &ldwz, &rwork[indrwk], &iwork[indiwk], &ifail[1], info); |
1404 |
+ |
1405 |
+ magma_csetmatrix( n, *m, wz, ldwz, dz, lddz ); |
1406 |
+diff -Nur magma-1.2.1.orig/src/chegvd.cpp magma-1.2.1/src/chegvd.cpp |
1407 |
+--- magma-1.2.1.orig/src/chegvd.cpp 2012-07-03 11:59:28.932659966 -0700 |
1408 |
++++ magma-1.2.1/src/chegvd.cpp 2012-07-03 11:59:45.428746349 -0700 |
1409 |
+@@ -13,26 +13,6 @@ |
1410 |
+ */ |
1411 |
+ #include "common_magma.h" |
1412 |
+ |
1413 |
+-/* This ctrmm interface is used for TAU profiling */ |
1414 |
+-void Mymagma_ctrmm(char side, char uplo, char trans, char unit, |
1415 |
+- magma_int_t n, magma_int_t m, |
1416 |
+- cuFloatComplex alpha, cuFloatComplex *db, magma_int_t lddb, |
1417 |
+- cuFloatComplex *dz, magma_int_t lddz) |
1418 |
+-{ |
1419 |
+- magma_ctrmm(side, uplo, trans, unit, n, m, alpha, db, lddb, dz, lddz); |
1420 |
+- magma_device_sync(); |
1421 |
+-} |
1422 |
+- |
1423 |
+-/* This ctrsm interface is used for TAU profiling */ |
1424 |
+-void Mymagma_ctrsm(char side, char uplo, char trans, char unit, |
1425 |
+- magma_int_t n, magma_int_t m, |
1426 |
+- cuFloatComplex alpha, cuFloatComplex *db, magma_int_t lddb, |
1427 |
+- cuFloatComplex *dz, magma_int_t lddz) |
1428 |
+-{ |
1429 |
+- magma_ctrsm(side, uplo, trans, unit, n, m, alpha, db, lddb, dz, lddz); |
1430 |
+- magma_device_sync(); |
1431 |
+-} |
1432 |
+- |
1433 |
+ extern "C" magma_int_t |
1434 |
+ magma_chegvd(magma_int_t itype, char jobz, char uplo, magma_int_t n, |
1435 |
+ cuFloatComplex *a, magma_int_t lda, cuFloatComplex *b, magma_int_t ldb, |
1436 |
+@@ -324,7 +304,7 @@ |
1437 |
+ *(unsigned char *)trans = MagmaNoTrans; |
1438 |
+ } |
1439 |
+ |
1440 |
+- Mymagma_ctrsm(MagmaLeft, uplo_[0], *trans, MagmaNonUnit, |
1441 |
++ magma_ctrsm(MagmaLeft, uplo_[0], *trans, MagmaNonUnit, |
1442 |
+ n, n, c_one, db, lddb, da, ldda); |
1443 |
+ |
1444 |
+ } else if (itype == 3) |
1445 |
+@@ -337,7 +317,7 @@ |
1446 |
+ *(unsigned char *)trans = MagmaConjTrans; |
1447 |
+ } |
1448 |
+ |
1449 |
+- Mymagma_ctrmm(MagmaLeft, uplo_[0], *trans, MagmaNonUnit, |
1450 |
++ magma_ctrmm(MagmaLeft, uplo_[0], *trans, MagmaNonUnit, |
1451 |
+ n, n, c_one, db, lddb, da, ldda); |
1452 |
+ } |
1453 |
+ |
1454 |
+diff -Nur magma-1.2.1.orig/src/chegvdx.cpp magma-1.2.1/src/chegvdx.cpp |
1455 |
+--- magma-1.2.1.orig/src/chegvdx.cpp 2012-07-03 11:59:28.921659909 -0700 |
1456 |
++++ magma-1.2.1/src/chegvdx.cpp 2012-07-03 11:59:45.428746349 -0700 |
1457 |
+@@ -12,22 +12,6 @@ |
1458 |
+ */ |
1459 |
+ #include "common_magma.h" |
1460 |
+ |
1461 |
+-void Mymagma_ctrmm(char side, char uplo, char trans, char unit, magma_int_t n, magma_int_t m, |
1462 |
+- cuFloatComplex alpha, cuFloatComplex *db, magma_int_t lddb, |
1463 |
+- cuFloatComplex *dz, magma_int_t lddz) |
1464 |
+-{ |
1465 |
+- magma_ctrmm(side, uplo, trans, unit, n, m, alpha, db, lddb, dz, lddz); |
1466 |
+- magma_device_sync(); |
1467 |
+-} |
1468 |
+- |
1469 |
+-void Mymagma_ctrsm(char side, char uplo, char trans, char unit, magma_int_t n, magma_int_t m, |
1470 |
+- cuFloatComplex alpha, cuFloatComplex *db, magma_int_t lddb, |
1471 |
+- cuFloatComplex *dz, magma_int_t lddz) |
1472 |
+-{ |
1473 |
+- magma_ctrsm(side, uplo, trans, unit, n, m, alpha, db, lddb, dz, lddz); |
1474 |
+- magma_device_sync(); |
1475 |
+-} |
1476 |
+- |
1477 |
+ extern "C" magma_int_t |
1478 |
+ magma_chegvdx(magma_int_t itype, char jobz, char range, char uplo, magma_int_t n, |
1479 |
+ cuFloatComplex *a, magma_int_t lda, cuFloatComplex *b, magma_int_t ldb, |
1480 |
+@@ -363,7 +347,7 @@ |
1481 |
+ *(unsigned char *)trans = MagmaNoTrans; |
1482 |
+ } |
1483 |
+ |
1484 |
+- Mymagma_ctrsm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, db, lddb, da, ldda); |
1485 |
++ magma_ctrsm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, db, lddb, da, ldda); |
1486 |
+ |
1487 |
+ } else if (itype == 3) { |
1488 |
+ |
1489 |
+@@ -375,7 +359,7 @@ |
1490 |
+ *(unsigned char *)trans = MagmaConjTrans; |
1491 |
+ } |
1492 |
+ |
1493 |
+- Mymagma_ctrmm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, db, lddb, da, ldda); |
1494 |
++ magma_ctrmm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, db, lddb, da, ldda); |
1495 |
+ |
1496 |
+ } |
1497 |
+ |
1498 |
+diff -Nur magma-1.2.1.orig/src/chegvr.cpp magma-1.2.1/src/chegvr.cpp |
1499 |
+--- magma-1.2.1.orig/src/chegvr.cpp 2012-07-03 11:59:28.932659966 -0700 |
1500 |
++++ magma-1.2.1/src/chegvr.cpp 2012-07-03 11:59:45.429746354 -0700 |
1501 |
+@@ -12,24 +12,6 @@ |
1502 |
+ */ |
1503 |
+ #include "common_magma.h" |
1504 |
+ |
1505 |
+-void Mymagma_ctrmm(char side, char uplo, char trans, char unit, |
1506 |
+- magma_int_t n, magma_int_t m, |
1507 |
+- cuFloatComplex alpha, cuFloatComplex *db, magma_int_t lddb, |
1508 |
+- cuFloatComplex *dz, magma_int_t lddz) |
1509 |
+-{ |
1510 |
+- magma_ctrmm(side, uplo, trans, unit, n, m, alpha, db, lddb, dz, lddz); |
1511 |
+- magma_device_sync(); |
1512 |
+-} |
1513 |
+- |
1514 |
+-void Mymagma_ctrsm(char side, char uplo, char trans, char unit, |
1515 |
+- magma_int_t n, magma_int_t m, |
1516 |
+- cuFloatComplex alpha, cuFloatComplex *db, magma_int_t lddb, |
1517 |
+- cuFloatComplex *dz, magma_int_t lddz) |
1518 |
+-{ |
1519 |
+- magma_ctrsm(side, uplo, trans, unit, n, m, alpha, db, lddb, dz, lddz); |
1520 |
+- magma_device_sync(); |
1521 |
+-} |
1522 |
+- |
1523 |
+ extern "C" magma_int_t |
1524 |
+ magma_chegvr(magma_int_t itype, char jobz, char range, char uplo, magma_int_t n, |
1525 |
+ cuFloatComplex *a, magma_int_t lda, cuFloatComplex *b, magma_int_t ldb, |
1526 |
+@@ -408,7 +390,7 @@ |
1527 |
+ *(unsigned char *)trans = MagmaNoTrans; |
1528 |
+ } |
1529 |
+ |
1530 |
+- Mymagma_ctrsm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, |
1531 |
++ magma_ctrsm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, |
1532 |
+ db, lddb, dz, lddz); |
1533 |
+ |
1534 |
+ } else if (itype == 3) { |
1535 |
+@@ -421,7 +403,7 @@ |
1536 |
+ *(unsigned char *)trans = MagmaConjTrans; |
1537 |
+ } |
1538 |
+ |
1539 |
+- Mymagma_ctrmm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, |
1540 |
++ magma_ctrmm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, |
1541 |
+ db, lddb, dz, lddz); |
1542 |
+ } |
1543 |
+ |
1544 |
+diff -Nur magma-1.2.1.orig/src/chegvx.cpp magma-1.2.1/src/chegvx.cpp |
1545 |
+--- magma-1.2.1.orig/src/chegvx.cpp 2012-07-03 11:59:28.923659921 -0700 |
1546 |
++++ magma-1.2.1/src/chegvx.cpp 2012-07-03 11:59:45.429746354 -0700 |
1547 |
+@@ -12,22 +12,6 @@ |
1548 |
+ */ |
1549 |
+ #include "common_magma.h" |
1550 |
+ |
1551 |
+-void Mymagma_ctrmm(char side, char uplo, char trans, char unit, magma_int_t n, magma_int_t m, |
1552 |
+- cuFloatComplex alpha, cuFloatComplex *db, magma_int_t lddb, |
1553 |
+- cuFloatComplex *dz, magma_int_t lddz) |
1554 |
+-{ |
1555 |
+- magma_ctrmm(side, uplo, trans, unit, n, m, alpha, db, lddb, dz, lddz); |
1556 |
+- magma_device_sync(); |
1557 |
+-} |
1558 |
+- |
1559 |
+-void Mymagma_ctrsm(char side, char uplo, char trans, char unit, magma_int_t n, magma_int_t m, |
1560 |
+- cuFloatComplex alpha, cuFloatComplex *db, magma_int_t lddb, |
1561 |
+- cuFloatComplex *dz, magma_int_t lddz) |
1562 |
+-{ |
1563 |
+- magma_ctrsm(side, uplo, trans, unit, n, m, alpha, db, lddb, dz, lddz); |
1564 |
+- magma_device_sync(); |
1565 |
+-} |
1566 |
+- |
1567 |
+ extern "C" magma_int_t |
1568 |
+ magma_chegvx(magma_int_t itype, char jobz, char range, char uplo, magma_int_t n, |
1569 |
+ cuFloatComplex *a, magma_int_t lda, cuFloatComplex *b, magma_int_t ldb, |
1570 |
+@@ -330,7 +314,7 @@ |
1571 |
+ *(unsigned char *)trans = MagmaNoTrans; |
1572 |
+ } |
1573 |
+ |
1574 |
+- Mymagma_ctrsm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, db, lddb, dz, lddz); |
1575 |
++ magma_ctrsm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, db, lddb, dz, lddz); |
1576 |
+ |
1577 |
+ } else if (itype == 3) { |
1578 |
+ |
1579 |
+@@ -343,7 +327,7 @@ |
1580 |
+ *(unsigned char *)trans = MagmaConjTrans; |
1581 |
+ } |
1582 |
+ |
1583 |
+- Mymagma_ctrmm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, db, lddb, dz, lddz); |
1584 |
++ magma_ctrmm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, db, lddb, dz, lddz); |
1585 |
+ |
1586 |
+ } |
1587 |
+ |
1588 |
+diff -Nur magma-1.2.1.orig/src/cstedx_m.cpp magma-1.2.1/src/cstedx_m.cpp |
1589 |
+--- magma-1.2.1.orig/src/cstedx_m.cpp 2012-07-03 11:59:28.921659909 -0700 |
1590 |
++++ magma-1.2.1/src/cstedx_m.cpp 2012-07-03 11:59:45.429746354 -0700 |
1591 |
+@@ -16,10 +16,7 @@ |
1592 |
+ float* work, magma_int_t lwork, magma_int_t* iwork, magma_int_t liwork, |
1593 |
+ magma_int_t* info); |
1594 |
+ |
1595 |
+- magma_int_t get_cstedx_smlsize() |
1596 |
+- { |
1597 |
+- return 25; |
1598 |
+- } |
1599 |
++ magma_int_t get_cstedx_smlsize(); |
1600 |
+ } |
1601 |
+ |
1602 |
+ extern "C" magma_int_t |
1603 |
+diff -Nur magma-1.2.1.orig/src/dlaex0_m.cpp magma-1.2.1/src/dlaex0_m.cpp |
1604 |
+--- magma-1.2.1.orig/src/dlaex0_m.cpp 2012-07-03 11:59:28.933659971 -0700 |
1605 |
++++ magma-1.2.1/src/dlaex0_m.cpp 2012-07-03 11:59:45.430746359 -0700 |
1606 |
+@@ -22,10 +22,7 @@ |
1607 |
+ |
1608 |
+ int magma_get_dlaex3_m_nb(); |
1609 |
+ |
1610 |
+- magma_int_t get_dlaex0_smlsize() |
1611 |
+- { |
1612 |
+- return 25; |
1613 |
+- } |
1614 |
++ magma_int_t get_dlaex0_smlsize(); |
1615 |
+ } |
1616 |
+ |
1617 |
+ extern "C" magma_int_t |
1618 |
+diff -Nur magma-1.2.1.orig/src/dlaex3_m.cpp magma-1.2.1/src/dlaex3_m.cpp |
1619 |
+--- magma-1.2.1.orig/src/dlaex3_m.cpp 2012-07-03 11:59:28.929659951 -0700 |
1620 |
++++ magma-1.2.1/src/dlaex3_m.cpp 2012-07-03 11:59:45.430746359 -0700 |
1621 |
+@@ -34,7 +34,7 @@ |
1622 |
+ } |
1623 |
+ |
1624 |
+ extern"C"{ |
1625 |
+- void dvrange(magma_int_t k, double *d, magma_int_t *il, magma_int_t *iu, double vl, double vu) |
1626 |
++ void ddvrange(magma_int_t k, double *d, magma_int_t *il, magma_int_t *iu, double vl, double vu) |
1627 |
+ { |
1628 |
+ magma_int_t i; |
1629 |
+ |
1630 |
+@@ -51,7 +51,7 @@ |
1631 |
+ return; |
1632 |
+ } |
1633 |
+ |
1634 |
+- void dirange(magma_int_t k, magma_int_t* indxq, magma_int_t *iil, magma_int_t *iiu, magma_int_t il, magma_int_t iu) |
1635 |
++ void ddirange(magma_int_t k, magma_int_t* indxq, magma_int_t *iil, magma_int_t *iiu, magma_int_t il, magma_int_t iu) |
1636 |
+ { |
1637 |
+ magma_int_t i; |
1638 |
+ |
1639 |
+@@ -362,9 +362,9 @@ |
1640 |
+ |
1641 |
+ //compute the lower and upper bound of the non-deflated eigenvectors |
1642 |
+ if (valeig) |
1643 |
+- dvrange(k, d, &iil, &iiu, vl, vu); |
1644 |
++ ddvrange(k, d, &iil, &iiu, vl, vu); |
1645 |
+ else if (indeig) |
1646 |
+- dirange(k, indxq, &iil, &iiu, il, iu); |
1647 |
++ ddirange(k, indxq, &iil, &iiu, il, iu); |
1648 |
+ else { |
1649 |
+ iil = 1; |
1650 |
+ iiu = k; |
1651 |
+diff -Nur magma-1.2.1.orig/src/dstedx_m.cpp magma-1.2.1/src/dstedx_m.cpp |
1652 |
+--- magma-1.2.1.orig/src/dstedx_m.cpp 2012-07-03 11:59:28.920659903 -0700 |
1653 |
++++ magma-1.2.1/src/dstedx_m.cpp 2012-07-03 11:59:45.430746359 -0700 |
1654 |
+@@ -26,10 +26,7 @@ |
1655 |
+ |
1656 |
+ double lapackf77_dlanst( char* norm, magma_int_t* n, double* d, double* e); |
1657 |
+ |
1658 |
+- magma_int_t get_dstedx_smlsize() |
1659 |
+- { |
1660 |
+- return 25; |
1661 |
+- } |
1662 |
++ magma_int_t get_dstedx_smlsize(); |
1663 |
+ } |
1664 |
+ |
1665 |
+ extern "C" magma_int_t |
1666 |
+diff -Nur magma-1.2.1.orig/src/slaex0_m.cpp magma-1.2.1/src/slaex0_m.cpp |
1667 |
+--- magma-1.2.1.orig/src/slaex0_m.cpp 2012-07-03 11:59:28.931659961 -0700 |
1668 |
++++ magma-1.2.1/src/slaex0_m.cpp 2012-07-03 11:59:45.431746364 -0700 |
1669 |
+@@ -22,10 +22,7 @@ |
1670 |
+ |
1671 |
+ int magma_get_slaex3_m_nb(); |
1672 |
+ |
1673 |
+- magma_int_t get_slaex0_smlsize() |
1674 |
+- { |
1675 |
+- return 25; |
1676 |
+- } |
1677 |
++ magma_int_t get_slaex0_smlsize(); |
1678 |
+ } |
1679 |
+ |
1680 |
+ extern "C" magma_int_t |
1681 |
+diff -Nur magma-1.2.1.orig/src/slaex3_m.cpp magma-1.2.1/src/slaex3_m.cpp |
1682 |
+--- magma-1.2.1.orig/src/slaex3_m.cpp 2012-07-03 11:59:28.920659903 -0700 |
1683 |
++++ magma-1.2.1/src/slaex3_m.cpp 2012-07-03 11:59:45.431746364 -0700 |
1684 |
+@@ -34,7 +34,7 @@ |
1685 |
+ } |
1686 |
+ |
1687 |
+ extern"C"{ |
1688 |
+- void dvrange(magma_int_t k, float *d, magma_int_t *il, magma_int_t *iu, float vl, float vu) |
1689 |
++ void sdvrange(magma_int_t k, float *d, magma_int_t *il, magma_int_t *iu, float vl, float vu) |
1690 |
+ { |
1691 |
+ magma_int_t i; |
1692 |
+ |
1693 |
+@@ -51,7 +51,7 @@ |
1694 |
+ return; |
1695 |
+ } |
1696 |
+ |
1697 |
+- void dirange(magma_int_t k, magma_int_t* indxq, magma_int_t *iil, magma_int_t *iiu, magma_int_t il, magma_int_t iu) |
1698 |
++ void sdirange(magma_int_t k, magma_int_t* indxq, magma_int_t *iil, magma_int_t *iiu, magma_int_t il, magma_int_t iu) |
1699 |
+ { |
1700 |
+ magma_int_t i; |
1701 |
+ |
1702 |
+@@ -362,9 +362,9 @@ |
1703 |
+ |
1704 |
+ //compute the lower and upper bound of the non-deflated eigenvectors |
1705 |
+ if (valeig) |
1706 |
+- dvrange(k, d, &iil, &iiu, vl, vu); |
1707 |
++ sdvrange(k, d, &iil, &iiu, vl, vu); |
1708 |
+ else if (indeig) |
1709 |
+- dirange(k, indxq, &iil, &iiu, il, iu); |
1710 |
++ sdirange(k, indxq, &iil, &iiu, il, iu); |
1711 |
+ else { |
1712 |
+ iil = 1; |
1713 |
+ iiu = k; |
1714 |
+diff -Nur magma-1.2.1.orig/src/sstedx_m.cpp magma-1.2.1/src/sstedx_m.cpp |
1715 |
+--- magma-1.2.1.orig/src/sstedx_m.cpp 2012-07-03 11:59:28.932659966 -0700 |
1716 |
++++ magma-1.2.1/src/sstedx_m.cpp 2012-07-03 11:59:45.431746364 -0700 |
1717 |
+@@ -26,10 +26,7 @@ |
1718 |
+ |
1719 |
+ float lapackf77_slanst( char* norm, magma_int_t* n, float* d, float* e); |
1720 |
+ |
1721 |
+- magma_int_t get_sstedx_smlsize() |
1722 |
+- { |
1723 |
+- return 25; |
1724 |
+- } |
1725 |
++ magma_int_t get_sstedx_smlsize(); |
1726 |
+ } |
1727 |
+ |
1728 |
+ extern "C" magma_int_t |
1729 |
+diff -Nur magma-1.2.1.orig/src/zheevx_gpu.cpp magma-1.2.1/src/zheevx_gpu.cpp |
1730 |
+--- magma-1.2.1.orig/src/zheevx_gpu.cpp 2012-07-03 11:59:28.934659976 -0700 |
1731 |
++++ magma-1.2.1/src/zheevx_gpu.cpp 2012-07-03 11:59:45.431746364 -0700 |
1732 |
+@@ -12,27 +12,6 @@ |
1733 |
+ */ |
1734 |
+ #include "common_magma.h" |
1735 |
+ |
1736 |
+-/* These interfaces are used for TAU profiling */ |
1737 |
+-extern"C"{ |
1738 |
+- void Mylapackf77_zstein(magma_int_t *n, double *d, double *e, magma_int_t *m, |
1739 |
+- double *w, magma_int_t *iblock, magma_int_t *isplit, |
1740 |
+- cuDoubleComplex *z, magma_int_t *ldz, double *work, |
1741 |
+- magma_int_t *iwork, magma_int_t *ifail, magma_int_t *info) |
1742 |
+- { |
1743 |
+- lapackf77_zstein(n, d, e, m, w, iblock, isplit, z, ldz, work, iwork, ifail, info); |
1744 |
+- } |
1745 |
+- |
1746 |
+- void Mylapackf77_dstebz(char *range, char *order, magma_int_t *n, double *vl, |
1747 |
+- double *vu, magma_int_t *il, magma_int_t *iu, double *abstol, |
1748 |
+- double *d, double *e, magma_int_t *m, magma_int_t *nsplit, |
1749 |
+- double *w, magma_int_t *iblock, magma_int_t *isplit, |
1750 |
+- double *work, magma_int_t *iwork, magma_int_t *info) |
1751 |
+- { |
1752 |
+- lapackf77_dstebz(range, order, n, vl, vu, il, iu, abstol, |
1753 |
+- d, e, m, nsplit, w, iblock, isplit, work, iwork,info); |
1754 |
+- } |
1755 |
+-} |
1756 |
+- |
1757 |
+ extern "C" magma_int_t |
1758 |
+ magma_zheevx_gpu(char jobz, char range, char uplo, magma_int_t n, |
1759 |
+ cuDoubleComplex *da, magma_int_t ldda, double vl, double vu, |
1760 |
+@@ -404,12 +383,12 @@ |
1761 |
+ indisp = indibl + n; |
1762 |
+ indiwk = indisp + n; |
1763 |
+ |
1764 |
+- Mylapackf77_dstebz(range_, order, &n, &vl, &vu, &il, &iu, &abstol, &rwork[indd], &rwork[inde], m, |
1765 |
++ lapackf77_dstebz(range_, order, &n, &vl, &vu, &il, &iu, &abstol, &rwork[indd], &rwork[inde], m, |
1766 |
+ &nsplit, &w[1], &iwork[indibl], &iwork[indisp], &rwork[indrwk], &iwork[indiwk], info); |
1767 |
+ |
1768 |
+ if (wantz) { |
1769 |
+ |
1770 |
+- Mylapackf77_zstein(&n, &rwork[indd], &rwork[inde], m, &w[1], &iwork[indibl], &iwork[indisp], |
1771 |
++ lapackf77_zstein(&n, &rwork[indd], &rwork[inde], m, &w[1], &iwork[indibl], &iwork[indisp], |
1772 |
+ wz, &ldwz, &rwork[indrwk], &iwork[indiwk], &ifail[1], info); |
1773 |
+ |
1774 |
+ magma_zsetmatrix( n, *m, wz, ldwz, dz, lddz ); |
1775 |
+diff -Nur magma-1.2.1.orig/src/zhegvd.cpp magma-1.2.1/src/zhegvd.cpp |
1776 |
+--- magma-1.2.1.orig/src/zhegvd.cpp 2012-07-03 11:59:28.920659903 -0700 |
1777 |
++++ magma-1.2.1/src/zhegvd.cpp 2012-07-03 11:59:45.432746370 -0700 |
1778 |
+@@ -13,26 +13,6 @@ |
1779 |
+ */ |
1780 |
+ #include "common_magma.h" |
1781 |
+ |
1782 |
+-/* This ztrmm interface is used for TAU profiling */ |
1783 |
+-void Mymagma_ztrmm(char side, char uplo, char trans, char unit, |
1784 |
+- magma_int_t n, magma_int_t m, |
1785 |
+- cuDoubleComplex alpha, cuDoubleComplex *db, magma_int_t lddb, |
1786 |
+- cuDoubleComplex *dz, magma_int_t lddz) |
1787 |
+-{ |
1788 |
+- magma_ztrmm(side, uplo, trans, unit, n, m, alpha, db, lddb, dz, lddz); |
1789 |
+- magma_device_sync(); |
1790 |
+-} |
1791 |
+- |
1792 |
+-/* This ztrsm interface is used for TAU profiling */ |
1793 |
+-void Mymagma_ztrsm(char side, char uplo, char trans, char unit, |
1794 |
+- magma_int_t n, magma_int_t m, |
1795 |
+- cuDoubleComplex alpha, cuDoubleComplex *db, magma_int_t lddb, |
1796 |
+- cuDoubleComplex *dz, magma_int_t lddz) |
1797 |
+-{ |
1798 |
+- magma_ztrsm(side, uplo, trans, unit, n, m, alpha, db, lddb, dz, lddz); |
1799 |
+- magma_device_sync(); |
1800 |
+-} |
1801 |
+- |
1802 |
+ extern "C" magma_int_t |
1803 |
+ magma_zhegvd(magma_int_t itype, char jobz, char uplo, magma_int_t n, |
1804 |
+ cuDoubleComplex *a, magma_int_t lda, cuDoubleComplex *b, magma_int_t ldb, |
1805 |
+@@ -324,7 +304,7 @@ |
1806 |
+ *(unsigned char *)trans = MagmaNoTrans; |
1807 |
+ } |
1808 |
+ |
1809 |
+- Mymagma_ztrsm(MagmaLeft, uplo_[0], *trans, MagmaNonUnit, |
1810 |
++ magma_ztrsm(MagmaLeft, uplo_[0], *trans, MagmaNonUnit, |
1811 |
+ n, n, c_one, db, lddb, da, ldda); |
1812 |
+ |
1813 |
+ } else if (itype == 3) |
1814 |
+@@ -337,7 +317,7 @@ |
1815 |
+ *(unsigned char *)trans = MagmaConjTrans; |
1816 |
+ } |
1817 |
+ |
1818 |
+- Mymagma_ztrmm(MagmaLeft, uplo_[0], *trans, MagmaNonUnit, |
1819 |
++ magma_ztrmm(MagmaLeft, uplo_[0], *trans, MagmaNonUnit, |
1820 |
+ n, n, c_one, db, lddb, da, ldda); |
1821 |
+ } |
1822 |
+ |
1823 |
+diff -Nur magma-1.2.1.orig/src/zhegvdx.cpp magma-1.2.1/src/zhegvdx.cpp |
1824 |
+--- magma-1.2.1.orig/src/zhegvdx.cpp 2012-07-03 11:59:28.922659915 -0700 |
1825 |
++++ magma-1.2.1/src/zhegvdx.cpp 2012-07-03 11:59:45.432746370 -0700 |
1826 |
+@@ -12,22 +12,6 @@ |
1827 |
+ */ |
1828 |
+ #include "common_magma.h" |
1829 |
+ |
1830 |
+-void Mymagma_ztrmm(char side, char uplo, char trans, char unit, magma_int_t n, magma_int_t m, |
1831 |
+- cuDoubleComplex alpha, cuDoubleComplex *db, magma_int_t lddb, |
1832 |
+- cuDoubleComplex *dz, magma_int_t lddz) |
1833 |
+-{ |
1834 |
+- magma_ztrmm(side, uplo, trans, unit, n, m, alpha, db, lddb, dz, lddz); |
1835 |
+- magma_device_sync(); |
1836 |
+-} |
1837 |
+- |
1838 |
+-void Mymagma_ztrsm(char side, char uplo, char trans, char unit, magma_int_t n, magma_int_t m, |
1839 |
+- cuDoubleComplex alpha, cuDoubleComplex *db, magma_int_t lddb, |
1840 |
+- cuDoubleComplex *dz, magma_int_t lddz) |
1841 |
+-{ |
1842 |
+- magma_ztrsm(side, uplo, trans, unit, n, m, alpha, db, lddb, dz, lddz); |
1843 |
+- magma_device_sync(); |
1844 |
+-} |
1845 |
+- |
1846 |
+ extern "C" magma_int_t |
1847 |
+ magma_zhegvdx(magma_int_t itype, char jobz, char range, char uplo, magma_int_t n, |
1848 |
+ cuDoubleComplex *a, magma_int_t lda, cuDoubleComplex *b, magma_int_t ldb, |
1849 |
+@@ -363,7 +347,7 @@ |
1850 |
+ *(unsigned char *)trans = MagmaNoTrans; |
1851 |
+ } |
1852 |
+ |
1853 |
+- Mymagma_ztrsm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, db, lddb, da, ldda); |
1854 |
++ magma_ztrsm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, db, lddb, da, ldda); |
1855 |
+ |
1856 |
+ } else if (itype == 3) { |
1857 |
+ |
1858 |
+@@ -375,7 +359,7 @@ |
1859 |
+ *(unsigned char *)trans = MagmaConjTrans; |
1860 |
+ } |
1861 |
+ |
1862 |
+- Mymagma_ztrmm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, db, lddb, da, ldda); |
1863 |
++ magma_ztrmm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, db, lddb, da, ldda); |
1864 |
+ |
1865 |
+ } |
1866 |
+ |
1867 |
+diff -Nur magma-1.2.1.orig/src/zhegvr.cpp magma-1.2.1/src/zhegvr.cpp |
1868 |
+--- magma-1.2.1.orig/src/zhegvr.cpp 2012-07-03 11:59:28.930659956 -0700 |
1869 |
++++ magma-1.2.1/src/zhegvr.cpp 2012-07-03 11:59:45.432746370 -0700 |
1870 |
+@@ -12,24 +12,6 @@ |
1871 |
+ */ |
1872 |
+ #include "common_magma.h" |
1873 |
+ |
1874 |
+-void Mymagma_ztrmm(char side, char uplo, char trans, char unit, |
1875 |
+- magma_int_t n, magma_int_t m, |
1876 |
+- cuDoubleComplex alpha, cuDoubleComplex *db, magma_int_t lddb, |
1877 |
+- cuDoubleComplex *dz, magma_int_t lddz) |
1878 |
+-{ |
1879 |
+- magma_ztrmm(side, uplo, trans, unit, n, m, alpha, db, lddb, dz, lddz); |
1880 |
+- magma_device_sync(); |
1881 |
+-} |
1882 |
+- |
1883 |
+-void Mymagma_ztrsm(char side, char uplo, char trans, char unit, |
1884 |
+- magma_int_t n, magma_int_t m, |
1885 |
+- cuDoubleComplex alpha, cuDoubleComplex *db, magma_int_t lddb, |
1886 |
+- cuDoubleComplex *dz, magma_int_t lddz) |
1887 |
+-{ |
1888 |
+- magma_ztrsm(side, uplo, trans, unit, n, m, alpha, db, lddb, dz, lddz); |
1889 |
+- magma_device_sync(); |
1890 |
+-} |
1891 |
+- |
1892 |
+ extern "C" magma_int_t |
1893 |
+ magma_zhegvr(magma_int_t itype, char jobz, char range, char uplo, magma_int_t n, |
1894 |
+ cuDoubleComplex *a, magma_int_t lda, cuDoubleComplex *b, magma_int_t ldb, |
1895 |
+@@ -408,7 +390,7 @@ |
1896 |
+ *(unsigned char *)trans = MagmaNoTrans; |
1897 |
+ } |
1898 |
+ |
1899 |
+- Mymagma_ztrsm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, |
1900 |
++ magma_ztrsm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, |
1901 |
+ db, lddb, dz, lddz); |
1902 |
+ |
1903 |
+ } else if (itype == 3) { |
1904 |
+@@ -421,7 +403,7 @@ |
1905 |
+ *(unsigned char *)trans = MagmaConjTrans; |
1906 |
+ } |
1907 |
+ |
1908 |
+- Mymagma_ztrmm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, |
1909 |
++ magma_ztrmm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, |
1910 |
+ db, lddb, dz, lddz); |
1911 |
+ } |
1912 |
+ |
1913 |
+diff -Nur magma-1.2.1.orig/src/zhegvx.cpp magma-1.2.1/src/zhegvx.cpp |
1914 |
+--- magma-1.2.1.orig/src/zhegvx.cpp 2012-07-03 11:59:28.914659873 -0700 |
1915 |
++++ magma-1.2.1/src/zhegvx.cpp 2012-07-03 11:59:45.433746376 -0700 |
1916 |
+@@ -12,22 +12,6 @@ |
1917 |
+ */ |
1918 |
+ #include "common_magma.h" |
1919 |
+ |
1920 |
+-void Mymagma_ztrmm(char side, char uplo, char trans, char unit, magma_int_t n, magma_int_t m, |
1921 |
+- cuDoubleComplex alpha, cuDoubleComplex *db, magma_int_t lddb, |
1922 |
+- cuDoubleComplex *dz, magma_int_t lddz) |
1923 |
+-{ |
1924 |
+- magma_ztrmm(side, uplo, trans, unit, n, m, alpha, db, lddb, dz, lddz); |
1925 |
+- magma_device_sync(); |
1926 |
+-} |
1927 |
+- |
1928 |
+-void Mymagma_ztrsm(char side, char uplo, char trans, char unit, magma_int_t n, magma_int_t m, |
1929 |
+- cuDoubleComplex alpha, cuDoubleComplex *db, magma_int_t lddb, |
1930 |
+- cuDoubleComplex *dz, magma_int_t lddz) |
1931 |
+-{ |
1932 |
+- magma_ztrsm(side, uplo, trans, unit, n, m, alpha, db, lddb, dz, lddz); |
1933 |
+- magma_device_sync(); |
1934 |
+-} |
1935 |
+- |
1936 |
+ extern "C" magma_int_t |
1937 |
+ magma_zhegvx(magma_int_t itype, char jobz, char range, char uplo, magma_int_t n, |
1938 |
+ cuDoubleComplex *a, magma_int_t lda, cuDoubleComplex *b, magma_int_t ldb, |
1939 |
+@@ -330,7 +314,7 @@ |
1940 |
+ *(unsigned char *)trans = MagmaNoTrans; |
1941 |
+ } |
1942 |
+ |
1943 |
+- Mymagma_ztrsm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, db, lddb, dz, lddz); |
1944 |
++ magma_ztrsm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, db, lddb, dz, lddz); |
1945 |
+ |
1946 |
+ } else if (itype == 3) { |
1947 |
+ |
1948 |
+@@ -343,7 +327,7 @@ |
1949 |
+ *(unsigned char *)trans = MagmaConjTrans; |
1950 |
+ } |
1951 |
+ |
1952 |
+- Mymagma_ztrmm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, db, lddb, dz, lddz); |
1953 |
++ magma_ztrmm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, db, lddb, dz, lddz); |
1954 |
+ |
1955 |
+ } |
1956 |
+ |
1957 |
+diff -Nur magma-1.2.1.orig/src/zstedx_m.cpp magma-1.2.1/src/zstedx_m.cpp |
1958 |
+--- magma-1.2.1.orig/src/zstedx_m.cpp 2012-07-03 11:59:28.921659909 -0700 |
1959 |
++++ magma-1.2.1/src/zstedx_m.cpp 2012-07-03 11:59:45.433746376 -0700 |
1960 |
+@@ -16,10 +16,7 @@ |
1961 |
+ double* work, magma_int_t lwork, magma_int_t* iwork, magma_int_t liwork, |
1962 |
+ magma_int_t* info); |
1963 |
+ |
1964 |
+- magma_int_t get_zstedx_smlsize() |
1965 |
+- { |
1966 |
+- return 25; |
1967 |
+- } |
1968 |
++ magma_int_t get_zstedx_smlsize(); |
1969 |
+ } |
1970 |
+ |
1971 |
+ extern "C" magma_int_t |
1972 |
|
1973 |
diff --git a/sci-libs/magma/files/magma-1.2.1-no-cuda-driver.patch b/sci-libs/magma/files/magma-1.2.1-no-cuda-driver.patch |
1974 |
new file mode 100644 |
1975 |
index 0000000..9dc99a8 |
1976 |
--- /dev/null |
1977 |
+++ b/sci-libs/magma/files/magma-1.2.1-no-cuda-driver.patch |
1978 |
@@ -0,0 +1,38 @@ |
1979 |
+--- magma-1.2.1.orig/testing/lin/Makefile 2012-07-03 11:59:28.875659669 -0700 |
1980 |
++++ magma-1.2.1/testing/lin/Makefile 2012-07-03 12:01:05.546166252 -0700 |
1981 |
+@@ -266,7 +266,7 @@ |
1982 |
+ ../matgen/$(TMGLIB) -o xlintsts \ |
1983 |
+ ../fortran.o \ |
1984 |
+ $(LDOPTS) -L../../lib \ |
1985 |
+- -lcuda -lmagma -lmagmablas \ |
1986 |
++ -lmagma -lmagmablas \ |
1987 |
+ $(LIBDIR) $(LIB) |
1988 |
+ |
1989 |
+ xlintstc : $(ALINTST) $(CLINTST) $(SCLNTST) ../fortran.o |
1990 |
+@@ -274,7 +274,7 @@ |
1991 |
+ ../matgen/$(TMGLIB) -o xlintstc \ |
1992 |
+ ../fortran.o \ |
1993 |
+ $(LDOPTS) -L../../lib \ |
1994 |
+- -lcuda -lmagma -lmagmablas \ |
1995 |
++ -lmagma -lmagmablas \ |
1996 |
+ $(LIBDIR) $(LIB) |
1997 |
+ |
1998 |
+ xlintstd : $(ALINTST) $(DLINTST) $(DZLNTST) ../fortran.o |
1999 |
+@@ -282,7 +282,7 @@ |
2000 |
+ ../matgen/$(TMGLIB) -o xlintstd \ |
2001 |
+ ../fortran.o \ |
2002 |
+ $(LDOPTS) -L../../lib \ |
2003 |
+- -lcuda -lmagma -lmagmablas \ |
2004 |
++ -lmagma -lmagmablas \ |
2005 |
+ $(LIBDIR) $(LIB) |
2006 |
+ |
2007 |
+ xlintstz : $(ALINTST) $(ZLINTST) $(DZLNTST) ../fortran.o |
2008 |
+@@ -290,7 +290,7 @@ |
2009 |
+ ../matgen/$(TMGLIB) -o xlintstz \ |
2010 |
+ ../fortran.o \ |
2011 |
+ $(LDOPTS) -L../../lib \ |
2012 |
+- -lcuda -lmagma -lmagmablas -lmagma \ |
2013 |
++ -lmagma -lmagmablas -lmagma \ |
2014 |
+ $(LIBDIR) $(LIB) |
2015 |
+ |
2016 |
+ $(ALINTST): $(FRC) |
2017 |
|
2018 |
diff --git a/sci-libs/magma/magma-1.2.0.ebuild b/sci-libs/magma/magma-1.2.1.ebuild |
2019 |
similarity index 87% |
2020 |
rename from sci-libs/magma/magma-1.2.0.ebuild |
2021 |
rename to sci-libs/magma/magma-1.2.1.ebuild |
2022 |
index 18e16f2..65b822c 100644 |
2023 |
--- a/sci-libs/magma/magma-1.2.0.ebuild |
2024 |
+++ b/sci-libs/magma/magma-1.2.1.ebuild |
2025 |
@@ -3,25 +3,22 @@ |
2026 |
# $Header: $ |
2027 |
|
2028 |
EAPI=4 |
2029 |
-FORTRAN_STANDARD="77 90" |
2030 |
|
2031 |
+FORTRAN_STANDARD="77 90" |
2032 |
inherit eutils fortran-2 toolchain-funcs versionator |
2033 |
|
2034 |
-MYP=${PN}_${PV} |
2035 |
- |
2036 |
DESCRIPTION="Matrix Algebra on GPU and Multicore Architectures" |
2037 |
HOMEPAGE="http://icl.cs.utk.edu/magma/" |
2038 |
-SRC_URI="http://icl.cs.utk.edu/projectsfiles/${PN}/${MYP}.tar.gz" |
2039 |
+SRC_URI="http://icl.cs.utk.edu/projectsfiles/${PN}/${P}.tar.gz" |
2040 |
|
2041 |
LICENSE="BSD" |
2042 |
SLOT="0" |
2043 |
-KEYWORDS="~amd64 ~x86" |
2044 |
+KEYWORDS="~amd64 ~x86 ~amd64-linux ~x86-linux" |
2045 |
IUSE="fermi static-libs tesla" |
2046 |
|
2047 |
RDEPEND="dev-util/nvidia-cuda-toolkit |
2048 |
virtual/cblas |
2049 |
virtual/lapack" |
2050 |
- |
2051 |
DEPEND="${RDEPEND} |
2052 |
virtual/pkgconfig" |
2053 |
|
2054 |
@@ -40,8 +37,8 @@ make_shared_lib() { |
2055 |
|
2056 |
src_prepare() { |
2057 |
epatch \ |
2058 |
- "${FILESDIR}"/${P}-cblas-dotc.patch \ |
2059 |
- "${FILESDIR}"/${P}-duplicate-symbols.patch |
2060 |
+ "${FILESDIR}"/${P}-duplicate-symbols.patch \ |
2061 |
+ "${FILESDIR}"/${P}-no-cuda-driver.patch |
2062 |
|
2063 |
# distributed pc file not so useful so replace it |
2064 |
cat <<-EOF > ${PN}.pc |
2065 |
@@ -53,7 +50,7 @@ src_prepare() { |
2066 |
Version: ${PV} |
2067 |
URL: ${HOMEPAGE} |
2068 |
Libs: -L\${libdir} -lmagma -lmagmablas |
2069 |
- Libs.private: -lm -lpthread -ldl -lcublas -lcudart -lcuda |
2070 |
+ Libs.private: -lm -lpthread -ldl -lcublas -lcudart |
2071 |
Cflags: -I\${includedir} |
2072 |
Requires: cblas lapack |
2073 |
EOF |
2074 |
@@ -70,12 +67,13 @@ src_configure() { |
2075 |
INC = -I${EPREFIX}/opt/cuda/include -DADD_ |
2076 |
OPTS = ${CFLAGS} -fPIC |
2077 |
FOPTS = ${FFLAGS} -fPIC -x f95-cpp-input |
2078 |
+ F77OPTS = ${FFLAGS} -fPIC |
2079 |
NVOPTS = -DADD_ --compiler-options '-fPIC ${CFLAGS}' -DUNIX |
2080 |
LOADER = $(tc-getFC) |
2081 |
LIBBLAS = $(pkg-config --libs cblas) |
2082 |
LIBLAPACK = $(pkg-config --libs lapack) |
2083 |
CUDADIR = ${EPREFIX}/opt/cuda |
2084 |
- LIBCUDA = -L\$(CUDADIR)/$(get_libdir) -lcublas -lcudart -lcuda |
2085 |
+ LIBCUDA = -L\$(CUDADIR)/$(get_libdir) -lcublas -lcudart |
2086 |
LIB = -pthread -lm -ldl \$(LIBCUDA) \$(LIBBLAS) \$(LIBLAPACK) -lstdc++ |
2087 |
EOF |
2088 |
if use fermi; then |