Gentoo Archives: gentoo-commits

From: Sebastien Fabbro <bicatali@g.o>
To: gentoo-commits@l.g.o
Subject: [gentoo-commits] proj/sci:master commit in: sci-libs/magma/, sci-libs/magma/files/
Date: Tue, 03 Jul 2012 19:35:02
Message-Id: 1341343868.19e8945bd388537423c2b8290a889f4453edfe08.bicatali@gentoo
1 commit: 19e8945bd388537423c2b8290a889f4453edfe08
2 Author: Sebastien Fabbro <sfabbro <AT> uvic <DOT> ca>
3 AuthorDate: Tue Jul 3 19:31:08 2012 +0000
4 Commit: Sebastien Fabbro <bicatali <AT> gentoo <DOT> org>
5 CommitDate: Tue Jul 3 19:31:08 2012 +0000
6 URL: http://git.overlays.gentoo.org/gitweb/?p=proj/sci.git;a=commit;h=19e8945b
7
8 sci-libs/magma: Version bump. Updated patches
9
10 (Portage version: 2.2.01.20430-prefix/git/Linux x86_64, unsigned Manifest commit)
11
12 ---
13 sci-libs/magma/ChangeLog | 9 +
14 sci-libs/magma/files/magma-1.2.0-cblas-dotc.patch | 71 --
15 .../files/magma-1.2.0-duplicate-symbols.patch | 1227 --------------------
16 .../files/magma-1.2.1-duplicate-symbols.patch | 612 ++++++++++
17 .../magma/files/magma-1.2.1-no-cuda-driver.patch | 38 +
18 .../{magma-1.2.0.ebuild => magma-1.2.1.ebuild} | 18 +-
19 6 files changed, 667 insertions(+), 1308 deletions(-)
20
21 diff --git a/sci-libs/magma/ChangeLog b/sci-libs/magma/ChangeLog
22 index 05bcaf9..460bde8 100644
23 --- a/sci-libs/magma/ChangeLog
24 +++ b/sci-libs/magma/ChangeLog
25 @@ -2,6 +2,15 @@
26 # Copyright 1999-2012 Gentoo Foundation; Distributed under the GPL v2
27 # $Header: $
28
29 +*magma-1.2.1 (03 Jul 2012)
30 +
31 + 03 Jul 2012; Sébastien Fabbro <bicatali@g.o>
32 + +files/magma-1.2.1-duplicate-symbols.patch,
33 + +files/magma-1.2.1-no-cuda-driver.patch, +magma-1.2.1.ebuild,
34 + -files/magma-1.2.0-cblas-dotc.patch,
35 + -files/magma-1.2.0-duplicate-symbols.patch, -magma-1.2.0.ebuild:
36 + sci-libs/magma: Version bump. Updated patches
37 +
38 *magma-1.2.0 (25 Jun 2012)
39
40 25 Jun 2012; Sébastien Fabbro <bicatali@g.o>
41
42 diff --git a/sci-libs/magma/files/magma-1.2.0-cblas-dotc.patch b/sci-libs/magma/files/magma-1.2.0-cblas-dotc.patch
43 deleted file mode 100644
44 index e2d16a4..0000000
45 --- a/sci-libs/magma/files/magma-1.2.0-cblas-dotc.patch
46 +++ /dev/null
47 @@ -1,71 +0,0 @@
48 -Description: Use cblas instead of calls to non existing codtc and zdotc C routines
49 -Author: Sebastien Fabbro <bicatali@g.o>
50 -
51 -diff -Nur magma-1.2.0.orig/src/clatrd2.cpp magma-1.2.0/src/clatrd2.cpp
52 ---- magma-1.2.0.orig/src/clatrd2.cpp 2012-06-06 17:41:50.000000000 +0100
53 -+++ magma-1.2.0/src/clatrd2.cpp 2012-06-06 17:45:23.000000000 +0100
54 -@@ -12,6 +12,7 @@
55 -
56 - */
57 - #include "common_magma.h"
58 -+#include <cblas.h>
59 -
60 - #define PRECISION_c
61 -
62 -@@ -270,7 +271,9 @@
63 - blasf77_cscal(&i, &tau[i - 1], W(0, iw), &ione);
64 -
65 - #if defined(PRECISION_z) || defined(PRECISION_c)
66 -- blasf77_cdotc(&value, &i, W(0, iw), &ione, A(0, i), &ione);
67 -+ cblas_cdotc_sub(i, W(0, iw), ione, A(0, i), ione, &value);
68 -+
69 -+// blasf77_cdotc(&value, &i, W(0, iw), &ione, A(0, i), &ione);
70 - alpha = tau[i - 1] * -.5f * value;
71 - #else
72 - alpha = tau[i - 1] * -.5f * blasf77_cdotc(&i, W(0, iw), &ione, A(0, i), &ione);
73 -@@ -349,7 +352,10 @@
74 - W(0, i), &ione, &c_one, W(i+1, i), &ione);
75 - blasf77_cscal(&i_n, &tau[i], W(i+1,i), &ione);
76 - #if defined(PRECISION_z) || defined(PRECISION_c)
77 -- blasf77_cdotc(&value, &i_n, W(i+1,i), &ione, A(i+1, i), &ione);
78 -+ cblas_cdotc_sub(i_n, W(i +1, i), ione,
79 -+ A(i +1, i), ione, &value);
80 -+
81 -+ //blasf77_cdotc(&value, &i_n, W(i+1,i), &ione, A(i+1, i), &ione);
82 - alpha = tau[i]* -.5f * value;
83 - #else
84 - alpha = tau[i]* -.5f* blasf77_cdotc(&i_n, W(i+1,i), &ione, A(i+1, i), &ione);
85 -diff -Nur magma-1.2.0.orig/src/zlatrd2.cpp magma-1.2.0/src/zlatrd2.cpp
86 ---- magma-1.2.0.orig/src/zlatrd2.cpp 2012-06-06 17:41:50.000000000 +0100
87 -+++ magma-1.2.0/src/zlatrd2.cpp 2012-06-06 17:46:33.000000000 +0100
88 -@@ -12,6 +12,7 @@
89 -
90 - */
91 - #include "common_magma.h"
92 -+#include <cblas.h>
93 -
94 - #define PRECISION_z
95 -
96 -@@ -270,7 +271,9 @@
97 - blasf77_zscal(&i, &tau[i - 1], W(0, iw), &ione);
98 -
99 - #if defined(PRECISION_z) || defined(PRECISION_c)
100 -- blasf77_zdotc(&value, &i, W(0, iw), &ione, A(0, i), &ione);
101 -+ cblas_zdotc_sub(i, W(0, iw), ione, A(0, i), ione, &value);
102 -+
103 -+// blasf77_zdotc(&value, &i, W(0, iw), &ione, A(0, i), &ione);
104 - alpha = tau[i - 1] * -.5f * value;
105 - #else
106 - alpha = tau[i - 1] * -.5f * blasf77_zdotc(&i, W(0, iw), &ione, A(0, i), &ione);
107 -@@ -349,7 +352,10 @@
108 - W(0, i), &ione, &c_one, W(i+1, i), &ione);
109 - blasf77_zscal(&i_n, &tau[i], W(i+1,i), &ione);
110 - #if defined(PRECISION_z) || defined(PRECISION_c)
111 -- blasf77_zdotc(&value, &i_n, W(i+1,i), &ione, A(i+1, i), &ione);
112 -+ cblas_zdotc_sub(i_n, W(i +1, i), ione,
113 -+ A(i +1, i), ione, &value);
114 -+
115 -+ //blasf77_zdotc(&value, &i_n, W(i+1,i), &ione, A(i+1, i), &ione);
116 - alpha = tau[i]* -.5f * value;
117 - #else
118 - alpha = tau[i]* -.5f* blasf77_zdotc(&i_n, W(i+1,i), &ione, A(i+1, i), &ione);
119
120 diff --git a/sci-libs/magma/files/magma-1.2.0-duplicate-symbols.patch b/sci-libs/magma/files/magma-1.2.0-duplicate-symbols.patch
121 deleted file mode 100644
122 index 0627f7f..0000000
123 --- a/sci-libs/magma/files/magma-1.2.0-duplicate-symbols.patch
124 +++ /dev/null
125 @@ -1,1227 +0,0 @@
126 -diff -Nur src.orig/cgeqrf_mgpu-trace.cpp src/cgeqrf_mgpu-trace.cpp
127 ---- src.orig/cgeqrf_mgpu-trace.cpp 2012-06-23 21:52:09.000000000 +0100
128 -+++ src/cgeqrf_mgpu-trace.cpp 2012-06-25 17:10:21.000000000 +0100
129 -@@ -16,108 +16,42 @@
130 - #include <sys/time.h>
131 - #include <assert.h>
132 -
133 --float get_current_cpu_time(void)
134 --{
135 -- struct timeval time_val;
136 --
137 -- gettimeofday(&time_val, NULL);
138 --
139 -- return (float)(time_val.tv_sec) + (float)(time_val.tv_usec) / 1000000.0;
140 --}
141 -+extern float get_current_cpu_time_float(void);
142 -
143 - #define MAX_THREADS 5
144 -
145 - //#define MAX_EVENTS 163840
146 - #define MAX_EVENTS 1048576
147 -
148 --int event_num [MAX_THREADS] __attribute__ ((aligned (128)));
149 --float event_start_time [MAX_THREADS] __attribute__ ((aligned (128)));
150 --float event_end_time [MAX_THREADS] __attribute__ ((aligned (128)));
151 --float event_log [MAX_THREADS][MAX_EVENTS] __attribute__ ((aligned (128)));
152 --int log_events = 1;
153 -+extern int event_num [MAX_THREADS] __attribute__ ((aligned (128)));
154 -+extern float event_start_time_float [MAX_THREADS] __attribute__ ((aligned (128)));
155 -+extern float event_end_time_float [MAX_THREADS] __attribute__ ((aligned (128)));
156 -+extern float event_log_float [MAX_THREADS][MAX_EVENTS] __attribute__ ((aligned (128)));
157 -+extern int log_events;
158 -
159 - #define core_cpu_event_start(my_core_id) \
160 -- event_start_time[my_core_id] = get_current_cpu_time(); \
161 -+ event_start_time_float[my_core_id] = get_current_cpu_time_float(); \
162 -
163 - #define core_cpu_event_end(my_core_id) \
164 -- event_end_time[my_core_id] = get_current_cpu_time(); \
165 -+ event_end_time_float[my_core_id] = get_current_cpu_time_float(); \
166 -
167 - #define core_gpu_event_start(my_core_id, e1, e2) \
168 - cudaEventElapsedTime(&ctime, e1, e2); \
169 -- event_start_time[my_core_id] = ctime/1000.+dtime; \
170 -+ event_start_time_float[my_core_id] = ctime/1000.+dtime; \
171 -
172 - #define core_gpu_event_end(my_core_id, e1, e2) \
173 - cudaEventElapsedTime(&ctime, e1, e2); \
174 -- event_end_time[my_core_id] = ctime/1000.+dtime; \
175 -+ event_end_time_float[my_core_id] = ctime/1000.+dtime; \
176 -
177 - #define core_log_event(event, my_core_id) \
178 -- event_log[my_core_id][event_num[my_core_id]+0] = my_core_id;\
179 -- event_log[my_core_id][event_num[my_core_id]+1] = event_start_time[my_core_id];\
180 -- event_log[my_core_id][event_num[my_core_id]+2] = event_end_time[my_core_id];\
181 -- event_log[my_core_id][event_num[my_core_id]+3] = (event);\
182 -+ event_log_float[my_core_id][event_num[my_core_id]+0] = my_core_id;\
183 -+ event_log_float[my_core_id][event_num[my_core_id]+1] = event_start_time_float[my_core_id];\
184 -+ event_log_float[my_core_id][event_num[my_core_id]+2] = event_end_time_float[my_core_id];\
185 -+ event_log_float[my_core_id][event_num[my_core_id]+3] = (event);\
186 - event_num[my_core_id] += (log_events << 2); \
187 - event_num[my_core_id] &= (MAX_EVENTS-1);
188 -
189 --void dump_trace(int cores_num)
190 --{
191 -- char trace_file_name[32];
192 -- FILE *trace_file;
193 -- int event;
194 -- int core;
195 --
196 -- float scale = 100000.0;
197 -- float large = 100.0;
198 --
199 -- sprintf(trace_file_name, "trace.svg");
200 -- trace_file = fopen(trace_file_name, "w");
201 -- assert(trace_file != NULL);
202 --
203 -- fprintf(trace_file,
204 -- "<?xml version=\"1.0\" standalone=\"no\"?>"
205 -- "<svg version=\"1.1\" baseProfile=\"full\" xmlns=\"http://www.w3.org/2000/svg\" "
206 -- "xmlns:xlink=\"http://www.w3.org/1999/xlink\" xmlns:ev=\"http://www.w3.org/2001/xml-events\" "
207 -- ">\n"
208 -- " <g font-size=\"20\">\n");
209 --
210 -- for (core = 0; core < cores_num; core++)
211 -- for (event = 0; event < event_num[core]; event += 4)
212 -- {
213 -- int thread = event_log[core][event+0];
214 -- float start = event_log[core][event+1];
215 -- float end = event_log[core][event+2];
216 -- int color = event_log[core][event+3];
217 --
218 -- start -= event_log[core][2];
219 -- end -= event_log[core][2];
220 -- /*
221 -- fprintf(trace_file,
222 -- " "
223 -- "<rect x=\"%.2lf\" y=\"%.0lf\" width=\"%.2lf\" height=\"%.0lf\" "
224 -- "fill=\"#%06x\" stroke=\"#000000\" stroke-width=\"1\"/>\n",
225 -- start * scale,
226 -- thread * 100.0,
227 -- (end - start) * scale,
228 -- 90.0,
229 -- color);
230 -- */
231 -- fprintf(trace_file,
232 -- " "
233 -- "<rect x=\"%.2lf\" y=\"%.0lf\" width=\"%.2lf\" height=\"%.0lf\" "
234 -- // "fill=\"#%06x\" />\n",
235 -- "fill=\"#%06x\" stroke=\"#000000\" stroke-width=\"1\"/>\n",
236 -- start * scale,
237 -- thread * (large+20.0),
238 -- (end - start) * scale,
239 -- large,
240 -- color);
241 -- }
242 --
243 -- fprintf(trace_file,
244 -- " </g>\n"
245 -- "</svg>\n");
246 --
247 -- fclose(trace_file);
248 --}
249 -+extern void dump_trace_float(int cores_num);
250 -
251 - //===========================================================================
252 -
253 -@@ -279,7 +213,7 @@
254 - core_cpu_event_end(num_gpus);
255 - core_log_event(0x666666, num_gpus);
256 -
257 -- dtime = get_current_cpu_time();
258 -+ dtime = get_current_cpu_time_float();
259 -
260 - for(j=0; j<num_gpus; j++){
261 - magma_setdevice(j);
262 -@@ -536,7 +470,7 @@
263 - }
264 -
265 - magma_setdevice(cdevice);
266 -- dump_trace(num_gpus+1);
267 -+ dump_trace_float(num_gpus+1);
268 -
269 - return *info;
270 - } /* magma_cgeqrf2_mgpu */
271 -diff -Nur src.orig/cheevr_gpu.cpp src/cheevr_gpu.cpp
272 ---- src.orig/cheevr_gpu.cpp 2012-06-23 21:52:09.000000000 +0100
273 -+++ src/cheevr_gpu.cpp 2012-06-25 06:59:15.000000000 +0100
274 -@@ -13,34 +13,6 @@
275 - #include "common_magma.h"
276 -
277 - /* These interfaces are used for TAU profiling */
278 --extern "C" {
279 -- void Mylapackf77_cstemr(const char *jobz, const char *range, magma_int_t *n, float *d, float *e,
280 -- float *vl, float *vu, magma_int_t *il, magma_int_t *iu,
281 -- magma_int_t *m, float *w, cuFloatComplex *z, magma_int_t *ldz,
282 -- magma_int_t *nzc, magma_int_t *isuppz, magma_int_t *tryrac,
283 -- float *work, magma_int_t *lwork, magma_int_t *iwork,
284 -- magma_int_t *liwork, magma_int_t *info)
285 -- {
286 -- lapackf77_cstemr(jobz, range, n, d, e, vl, vu, il, iu, m, w, z, ldz, nzc,
287 -- isuppz, tryrac, work, lwork, iwork, liwork, info);
288 -- }
289 --
290 -- void Mylapackf77_cstein(int *n, float *d, float *e, int *m, float *w, int *iblock,
291 -- int *isplit, cuFloatComplex *z, int *ldz, float *work,
292 -- int *iwork, int *ifail, int *info)
293 -- {
294 -- lapackf77_cstein(n, d, e, m, w, iblock, isplit, z, ldz, work, iwork, ifail, info);
295 -- }
296 --
297 -- void Mylapackf77_sstebz(const char *range, const char *order, int *n, float *vl,
298 -- float *vu, int *il, int *iu, float *abstol,
299 -- float *d, float *e, int *m, int *nsplit, float *w,
300 -- int *iblock, int *isplit, float *work, int *iwork, int *info)
301 -- {
302 -- lapackf77_sstebz(range, order, n, vl, vu, il, iu, abstol, d, e, m,
303 -- nsplit, w, iblock, isplit, work, iwork,info);
304 -- }
305 --}
306 -
307 - extern "C" {
308 - #ifdef ADD_
309 -@@ -497,7 +469,7 @@
310 - else
311 - tryrac=0;
312 -
313 -- Mylapackf77_cstemr(jobz_, range_, &n, &rwork[indrdd], &rwork[indree], &vl, &vu, &il,
314 -+ lapackf77_cstemr(jobz_, range_, &n, &rwork[indrdd], &rwork[indree], &vl, &vu, &il,
315 - &iu, m, &w[1], wz, &ldwz, &n, &isuppz[1], &tryrac, &rwork[indrwk],
316 - &llrwork, &iwork[1], &liwork, info);
317 -
318 -@@ -514,10 +486,10 @@
319 - printf("B/I\n");
320 - *info = 0;
321 -
322 -- Mylapackf77_sstebz(range_, "B", &n, &vl, &vu, &il, &iu, &abstol, &rwork[indrd], &rwork[indre], m,
323 -+ lapackf77_sstebz(range_, "B", &n, &vl, &vu, &il, &iu, &abstol, &rwork[indrd], &rwork[indre], m,
324 - &nsplit, &w[1], &iwork[indibl], &iwork[indisp], &rwork[indrwk], &iwork[indiwo], info);
325 -
326 -- Mylapackf77_cstein(&n, &rwork[indrd], &rwork[indre], m, &w[1], &iwork[indibl], &iwork[indisp],
327 -+ lapackf77_cstein(&n, &rwork[indrd], &rwork[indre], m, &w[1], &iwork[indibl], &iwork[indisp],
328 - wz, &ldwz, &rwork[indrwk], &iwork[indiwo], &iwork[indifl], info);
329 -
330 - /* Apply unitary matrix used in reduction to tridiagonal
331 -diff -Nur src.orig/cheevx_gpu.cpp src/cheevx_gpu.cpp
332 ---- src.orig/cheevx_gpu.cpp 2012-06-23 21:52:09.000000000 +0100
333 -+++ src/cheevx_gpu.cpp 2012-06-25 06:59:15.000000000 +0100
334 -@@ -12,27 +12,6 @@
335 - */
336 - #include "common_magma.h"
337 -
338 --/* These interfaces are used for TAU profiling */
339 --extern"C"{
340 -- void Mylapackf77_cstein(magma_int_t *n, float *d, float *e, magma_int_t *m,
341 -- float *w, magma_int_t *iblock, magma_int_t *isplit,
342 -- cuFloatComplex *z, magma_int_t *ldz, float *work,
343 -- magma_int_t *iwork, magma_int_t *ifail, magma_int_t *info)
344 -- {
345 -- lapackf77_cstein(n, d, e, m, w, iblock, isplit, z, ldz, work, iwork, ifail, info);
346 -- }
347 --
348 -- void Mylapackf77_sstebz(char *range, char *order, magma_int_t *n, float *vl,
349 -- float *vu, magma_int_t *il, magma_int_t *iu, float *abstol,
350 -- float *d, float *e, magma_int_t *m, magma_int_t *nsplit,
351 -- float *w, magma_int_t *iblock, magma_int_t *isplit,
352 -- float *work, magma_int_t *iwork, magma_int_t *info)
353 -- {
354 -- lapackf77_sstebz(range, order, n, vl, vu, il, iu, abstol,
355 -- d, e, m, nsplit, w, iblock, isplit, work, iwork,info);
356 -- }
357 --}
358 --
359 - extern "C" magma_int_t
360 - magma_cheevx_gpu(char jobz, char range, char uplo, magma_int_t n,
361 - cuFloatComplex *da, magma_int_t ldda, float vl, float vu,
362 -@@ -404,12 +383,12 @@
363 - indisp = indibl + n;
364 - indiwk = indisp + n;
365 -
366 -- Mylapackf77_sstebz(range_, order, &n, &vl, &vu, &il, &iu, &abstol, &rwork[indd], &rwork[inde], m,
367 -+ lapackf77_sstebz(range_, order, &n, &vl, &vu, &il, &iu, &abstol, &rwork[indd], &rwork[inde], m,
368 - &nsplit, &w[1], &iwork[indibl], &iwork[indisp], &rwork[indrwk], &iwork[indiwk], info);
369 -
370 - if (wantz) {
371 -
372 -- Mylapackf77_cstein(&n, &rwork[indd], &rwork[inde], m, &w[1], &iwork[indibl], &iwork[indisp],
373 -+ lapackf77_cstein(&n, &rwork[indd], &rwork[inde], m, &w[1], &iwork[indibl], &iwork[indisp],
374 - wz, &ldwz, &rwork[indrwk], &iwork[indiwk], &ifail[1], info);
375 -
376 - magma_csetmatrix( n, *m, wz, ldwz, dz, lddz );
377 -diff -Nur src.orig/chegvd.cpp src/chegvd.cpp
378 ---- src.orig/chegvd.cpp 2012-06-23 21:52:09.000000000 +0100
379 -+++ src/chegvd.cpp 2012-06-25 06:59:15.000000000 +0100
380 -@@ -13,26 +13,6 @@
381 - */
382 - #include "common_magma.h"
383 -
384 --/* This ctrmm interface is used for TAU profiling */
385 --void Mymagma_ctrmm(char side, char uplo, char trans, char unit,
386 -- magma_int_t n, magma_int_t m,
387 -- cuFloatComplex alpha, cuFloatComplex *db, magma_int_t lddb,
388 -- cuFloatComplex *dz, magma_int_t lddz)
389 --{
390 -- magma_ctrmm(side, uplo, trans, unit, n, m, alpha, db, lddb, dz, lddz);
391 -- magma_device_sync();
392 --}
393 --
394 --/* This ctrsm interface is used for TAU profiling */
395 --void Mymagma_ctrsm(char side, char uplo, char trans, char unit,
396 -- magma_int_t n, magma_int_t m,
397 -- cuFloatComplex alpha, cuFloatComplex *db, magma_int_t lddb,
398 -- cuFloatComplex *dz, magma_int_t lddz)
399 --{
400 -- magma_ctrsm(side, uplo, trans, unit, n, m, alpha, db, lddb, dz, lddz);
401 -- magma_device_sync();
402 --}
403 --
404 - extern "C" magma_int_t
405 - magma_chegvd(magma_int_t itype, char jobz, char uplo, magma_int_t n,
406 - cuFloatComplex *a, magma_int_t lda, cuFloatComplex *b, magma_int_t ldb,
407 -@@ -324,7 +304,7 @@
408 - *(unsigned char *)trans = MagmaNoTrans;
409 - }
410 -
411 -- Mymagma_ctrsm(MagmaLeft, uplo_[0], *trans, MagmaNonUnit,
412 -+ magma_ctrsm(MagmaLeft, uplo_[0], *trans, MagmaNonUnit,
413 - n, n, c_one, db, lddb, da, ldda);
414 -
415 - } else if (itype == 3)
416 -@@ -337,7 +317,7 @@
417 - *(unsigned char *)trans = MagmaConjTrans;
418 - }
419 -
420 -- Mymagma_ctrmm(MagmaLeft, uplo_[0], *trans, MagmaNonUnit,
421 -+ magma_ctrmm(MagmaLeft, uplo_[0], *trans, MagmaNonUnit,
422 - n, n, c_one, db, lddb, da, ldda);
423 - }
424 -
425 -diff -Nur src.orig/chegvdx.cpp src/chegvdx.cpp
426 ---- src.orig/chegvdx.cpp 2012-06-23 21:52:09.000000000 +0100
427 -+++ src/chegvdx.cpp 2012-06-25 06:59:15.000000000 +0100
428 -@@ -12,22 +12,6 @@
429 - */
430 - #include "common_magma.h"
431 -
432 --void Mymagma_ctrmm(char side, char uplo, char trans, char unit, magma_int_t n, magma_int_t m,
433 -- cuFloatComplex alpha, cuFloatComplex *db, magma_int_t lddb,
434 -- cuFloatComplex *dz, magma_int_t lddz)
435 --{
436 -- magma_ctrmm(side, uplo, trans, unit, n, m, alpha, db, lddb, dz, lddz);
437 -- magma_device_sync();
438 --}
439 --
440 --void Mymagma_ctrsm(char side, char uplo, char trans, char unit, magma_int_t n, magma_int_t m,
441 -- cuFloatComplex alpha, cuFloatComplex *db, magma_int_t lddb,
442 -- cuFloatComplex *dz, magma_int_t lddz)
443 --{
444 -- magma_ctrsm(side, uplo, trans, unit, n, m, alpha, db, lddb, dz, lddz);
445 -- magma_device_sync();
446 --}
447 --
448 - extern "C" magma_int_t
449 - magma_chegvdx(magma_int_t itype, char jobz, char range, char uplo, magma_int_t n,
450 - cuFloatComplex *a, magma_int_t lda, cuFloatComplex *b, magma_int_t ldb,
451 -@@ -363,7 +347,7 @@
452 - *(unsigned char *)trans = MagmaNoTrans;
453 - }
454 -
455 -- Mymagma_ctrsm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, db, lddb, da, ldda);
456 -+ magma_ctrsm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, db, lddb, da, ldda);
457 -
458 - } else if (itype == 3) {
459 -
460 -@@ -375,7 +359,7 @@
461 - *(unsigned char *)trans = MagmaConjTrans;
462 - }
463 -
464 -- Mymagma_ctrmm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, db, lddb, da, ldda);
465 -+ magma_ctrmm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, db, lddb, da, ldda);
466 -
467 - }
468 -
469 -diff -Nur src.orig/chegvr.cpp src/chegvr.cpp
470 ---- src.orig/chegvr.cpp 2012-06-23 21:52:09.000000000 +0100
471 -+++ src/chegvr.cpp 2012-06-25 06:59:15.000000000 +0100
472 -@@ -12,24 +12,6 @@
473 - */
474 - #include "common_magma.h"
475 -
476 --void Mymagma_ctrmm(char side, char uplo, char trans, char unit,
477 -- magma_int_t n, magma_int_t m,
478 -- cuFloatComplex alpha, cuFloatComplex *db, magma_int_t lddb,
479 -- cuFloatComplex *dz, magma_int_t lddz)
480 --{
481 -- magma_ctrmm(side, uplo, trans, unit, n, m, alpha, db, lddb, dz, lddz);
482 -- magma_device_sync();
483 --}
484 --
485 --void Mymagma_ctrsm(char side, char uplo, char trans, char unit,
486 -- magma_int_t n, magma_int_t m,
487 -- cuFloatComplex alpha, cuFloatComplex *db, magma_int_t lddb,
488 -- cuFloatComplex *dz, magma_int_t lddz)
489 --{
490 -- magma_ctrsm(side, uplo, trans, unit, n, m, alpha, db, lddb, dz, lddz);
491 -- magma_device_sync();
492 --}
493 --
494 - extern "C" magma_int_t
495 - magma_chegvr(magma_int_t itype, char jobz, char range, char uplo, magma_int_t n,
496 - cuFloatComplex *a, magma_int_t lda, cuFloatComplex *b, magma_int_t ldb,
497 -@@ -408,7 +390,7 @@
498 - *(unsigned char *)trans = MagmaNoTrans;
499 - }
500 -
501 -- Mymagma_ctrsm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one,
502 -+ magma_ctrsm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one,
503 - db, lddb, dz, lddz);
504 -
505 - } else if (itype == 3) {
506 -@@ -421,7 +403,7 @@
507 - *(unsigned char *)trans = MagmaConjTrans;
508 - }
509 -
510 -- Mymagma_ctrmm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one,
511 -+ magma_ctrmm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one,
512 - db, lddb, dz, lddz);
513 - }
514 -
515 -diff -Nur src.orig/chegvx.cpp src/chegvx.cpp
516 ---- src.orig/chegvx.cpp 2012-06-23 21:52:09.000000000 +0100
517 -+++ src/chegvx.cpp 2012-06-25 06:59:15.000000000 +0100
518 -@@ -12,22 +12,6 @@
519 - */
520 - #include "common_magma.h"
521 -
522 --void Mymagma_ctrmm(char side, char uplo, char trans, char unit, magma_int_t n, magma_int_t m,
523 -- cuFloatComplex alpha, cuFloatComplex *db, magma_int_t lddb,
524 -- cuFloatComplex *dz, magma_int_t lddz)
525 --{
526 -- magma_ctrmm(side, uplo, trans, unit, n, m, alpha, db, lddb, dz, lddz);
527 -- magma_device_sync();
528 --}
529 --
530 --void Mymagma_ctrsm(char side, char uplo, char trans, char unit, magma_int_t n, magma_int_t m,
531 -- cuFloatComplex alpha, cuFloatComplex *db, magma_int_t lddb,
532 -- cuFloatComplex *dz, magma_int_t lddz)
533 --{
534 -- magma_ctrsm(side, uplo, trans, unit, n, m, alpha, db, lddb, dz, lddz);
535 -- magma_device_sync();
536 --}
537 --
538 - extern "C" magma_int_t
539 - magma_chegvx(magma_int_t itype, char jobz, char range, char uplo, magma_int_t n,
540 - cuFloatComplex *a, magma_int_t lda, cuFloatComplex *b, magma_int_t ldb,
541 -@@ -330,7 +314,7 @@
542 - *(unsigned char *)trans = MagmaNoTrans;
543 - }
544 -
545 -- Mymagma_ctrsm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, db, lddb, dz, lddz);
546 -+ magma_ctrsm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, db, lddb, dz, lddz);
547 -
548 - } else if (itype == 3) {
549 -
550 -@@ -343,7 +327,7 @@
551 - *(unsigned char *)trans = MagmaConjTrans;
552 - }
553 -
554 -- Mymagma_ctrmm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, db, lddb, dz, lddz);
555 -+ magma_ctrmm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, db, lddb, dz, lddz);
556 -
557 - }
558 -
559 -diff -Nur src.orig/cstedx_m.cpp src/cstedx_m.cpp
560 ---- src.orig/cstedx_m.cpp 2012-06-23 21:52:09.000000000 +0100
561 -+++ src/cstedx_m.cpp 2012-06-25 06:59:15.000000000 +0100
562 -@@ -16,10 +16,7 @@
563 - float* work, magma_int_t lwork, magma_int_t* iwork, magma_int_t liwork,
564 - magma_int_t* info);
565 -
566 -- magma_int_t get_cstedx_smlsize()
567 -- {
568 -- return 25;
569 -- }
570 -+ magma_int_t get_cstedx_smlsize();
571 - }
572 -
573 - extern "C" magma_int_t
574 -diff -Nur src.orig/dgeqrf_mgpu-trace.cpp src/dgeqrf_mgpu-trace.cpp
575 ---- src.orig/dgeqrf_mgpu-trace.cpp 2012-06-23 21:52:09.000000000 +0100
576 -+++ src/dgeqrf_mgpu-trace.cpp 2012-06-25 06:59:15.000000000 +0100
577 -@@ -1,4 +1,4 @@
578 --/*
579 -+/*5A5A5A
580 - -- MAGMA (version 1.2.0) --
581 - Univ. of Tennessee, Knoxville
582 - Univ. of California, Berkeley
583 -@@ -16,7 +16,7 @@
584 - #include <sys/time.h>
585 - #include <assert.h>
586 -
587 --double get_current_cpu_time(void)
588 -+double get_current_cpu_time_double(void)
589 - {
590 - struct timeval time_val;
591 -
592 -@@ -30,35 +30,35 @@
593 - //#define MAX_EVENTS 163840
594 - #define MAX_EVENTS 1048576
595 -
596 --int event_num [MAX_THREADS] __attribute__ ((aligned (128)));
597 --double event_start_time [MAX_THREADS] __attribute__ ((aligned (128)));
598 --double event_end_time [MAX_THREADS] __attribute__ ((aligned (128)));
599 --double event_log [MAX_THREADS][MAX_EVENTS] __attribute__ ((aligned (128)));
600 --int log_events = 1;
601 -+extern int event_num [MAX_THREADS] __attribute__ ((aligned (128)));
602 -+double event_start_time_double [MAX_THREADS] __attribute__ ((aligned (128)));
603 -+double event_end_time_double [MAX_THREADS] __attribute__ ((aligned (128)));
604 -+double event_log_double [MAX_THREADS][MAX_EVENTS] __attribute__ ((aligned (128)));
605 -+extern int log_events;
606 -
607 - #define core_cpu_event_start(my_core_id) \
608 -- event_start_time[my_core_id] = get_current_cpu_time(); \
609 -+ event_start_time_double[my_core_id] = get_current_cpu_time_double(); \
610 -
611 - #define core_cpu_event_end(my_core_id) \
612 -- event_end_time[my_core_id] = get_current_cpu_time(); \
613 -+ event_end_time_double[my_core_id] = get_current_cpu_time_double(); \
614 -
615 - #define core_gpu_event_start(my_core_id, e1, e2) \
616 - cudaEventElapsedTime(&ctime, e1, e2); \
617 -- event_start_time[my_core_id] = ctime/1000.+dtime; \
618 -+ event_start_time_double[my_core_id] = ctime/1000.+dtime; \
619 -
620 - #define core_gpu_event_end(my_core_id, e1, e2) \
621 - cudaEventElapsedTime(&ctime, e1, e2); \
622 -- event_end_time[my_core_id] = ctime/1000.+dtime; \
623 -+ event_end_time_double[my_core_id] = ctime/1000.+dtime; \
624 -
625 - #define core_log_event(event, my_core_id) \
626 -- event_log[my_core_id][event_num[my_core_id]+0] = my_core_id;\
627 -- event_log[my_core_id][event_num[my_core_id]+1] = event_start_time[my_core_id];\
628 -- event_log[my_core_id][event_num[my_core_id]+2] = event_end_time[my_core_id];\
629 -- event_log[my_core_id][event_num[my_core_id]+3] = (event);\
630 -+ event_log_double[my_core_id][event_num[my_core_id]+0] = my_core_id;\
631 -+ event_log_double[my_core_id][event_num[my_core_id]+1] = event_start_time_double[my_core_id];\
632 -+ event_log_double[my_core_id][event_num[my_core_id]+2] = event_end_time_double[my_core_id];\
633 -+ event_log_double[my_core_id][event_num[my_core_id]+3] = (event);\
634 - event_num[my_core_id] += (log_events << 2); \
635 - event_num[my_core_id] &= (MAX_EVENTS-1);
636 -
637 --void dump_trace(int cores_num)
638 -+void dump_trace_double(int cores_num)
639 - {
640 - char trace_file_name[32];
641 - FILE *trace_file;
642 -@@ -82,13 +82,13 @@
643 - for (core = 0; core < cores_num; core++)
644 - for (event = 0; event < event_num[core]; event += 4)
645 - {
646 -- int thread = event_log[core][event+0];
647 -- double start = event_log[core][event+1];
648 -- double end = event_log[core][event+2];
649 -- int color = event_log[core][event+3];
650 -+ int thread = event_log_double[core][event+0];
651 -+ double start = event_log_double[core][event+1];
652 -+ double end = event_log_double[core][event+2];
653 -+ int color = event_log_double[core][event+3];
654 -
655 -- start -= event_log[core][2];
656 -- end -= event_log[core][2];
657 -+ start -= event_log_double[core][2];
658 -+ end -= event_log_double[core][2];
659 - /*
660 - fprintf(trace_file,
661 - " "
662 -@@ -279,7 +279,7 @@
663 - core_cpu_event_end(num_gpus);
664 - core_log_event(0x666666, num_gpus);
665 -
666 -- dtime = get_current_cpu_time();
667 -+ dtime = get_current_cpu_time_double();
668 -
669 - for(j=0; j<num_gpus; j++){
670 - magma_setdevice(j);
671 -@@ -536,7 +536,7 @@
672 - }
673 -
674 - magma_setdevice(cdevice);
675 -- dump_trace(num_gpus+1);
676 -+ dump_trace_double(num_gpus+1);
677 -
678 - return *info;
679 - } /* magma_dgeqrf2_mgpu */
680 -diff -Nur src.orig/dlaex0_m.cpp src/dlaex0_m.cpp
681 ---- src.orig/dlaex0_m.cpp 2012-06-23 21:52:09.000000000 +0100
682 -+++ src/dlaex0_m.cpp 2012-06-25 06:59:15.000000000 +0100
683 -@@ -23,10 +23,7 @@
684 -
685 - int magma_get_dlaex3_m_nb();
686 -
687 -- magma_int_t get_dlaex0_smlsize()
688 -- {
689 -- return 25;
690 -- }
691 -+ magma_int_t get_dlaex0_smlsize();
692 - }
693 -
694 - extern "C" magma_int_t
695 -diff -Nur src.orig/dlaex3_m.cpp src/dlaex3_m.cpp
696 ---- src.orig/dlaex3_m.cpp 2012-06-23 21:52:09.000000000 +0100
697 -+++ src/dlaex3_m.cpp 2012-06-25 06:59:15.000000000 +0100
698 -@@ -36,7 +36,7 @@
699 - }
700 -
701 - extern"C"{
702 -- void dvrange(magma_int_t k, double *d, magma_int_t *il, magma_int_t *iu, double vl, double vu)
703 -+ void ddvrange(magma_int_t k, double *d, magma_int_t *il, magma_int_t *iu, double vl, double vu)
704 - {
705 - magma_int_t i;
706 -
707 -@@ -53,7 +53,7 @@
708 - return;
709 - }
710 -
711 -- void dirange(magma_int_t k, magma_int_t* indxq, magma_int_t *iil, magma_int_t *iiu, magma_int_t il, magma_int_t iu)
712 -+ void ddirange(magma_int_t k, magma_int_t* indxq, magma_int_t *iil, magma_int_t *iiu, magma_int_t il, magma_int_t iu)
713 - {
714 - magma_int_t i;
715 -
716 -@@ -364,9 +364,9 @@
717 -
718 - //compute the lower and upper bound of the non-deflated eigenvectors
719 - if (valeig)
720 -- dvrange(k, d, &iil, &iiu, vl, vu);
721 -+ ddvrange(k, d, &iil, &iiu, vl, vu);
722 - else if (indeig)
723 -- dirange(k, indxq, &iil, &iiu, il, iu);
724 -+ ddirange(k, indxq, &iil, &iiu, il, iu);
725 - else {
726 - iil = 1;
727 - iiu = k;
728 -diff -Nur src.orig/dstedx_m.cpp src/dstedx_m.cpp
729 ---- src.orig/dstedx_m.cpp 2012-06-23 21:52:09.000000000 +0100
730 -+++ src/dstedx_m.cpp 2012-06-25 06:59:15.000000000 +0100
731 -@@ -26,10 +26,7 @@
732 -
733 - double lapackf77_dlanst( char* norm, magma_int_t* n, double* d, double* e);
734 -
735 -- magma_int_t get_dstedx_smlsize()
736 -- {
737 -- return 25;
738 -- }
739 -+ magma_int_t get_dstedx_smlsize();
740 - }
741 -
742 - extern "C" magma_int_t
743 -diff -Nur src.orig/sgeqrf_mgpu-trace.cpp src/sgeqrf_mgpu-trace.cpp
744 ---- src.orig/sgeqrf_mgpu-trace.cpp 2012-06-23 21:52:09.000000000 +0100
745 -+++ src/sgeqrf_mgpu-trace.cpp 2012-06-25 06:59:15.000000000 +0100
746 -@@ -16,7 +16,7 @@
747 - #include <sys/time.h>
748 - #include <assert.h>
749 -
750 --float get_current_cpu_time(void)
751 -+float get_current_cpu_time_float(void)
752 - {
753 - struct timeval time_val;
754 -
755 -@@ -31,34 +31,34 @@
756 - #define MAX_EVENTS 1048576
757 -
758 - int event_num [MAX_THREADS] __attribute__ ((aligned (128)));
759 --float event_start_time [MAX_THREADS] __attribute__ ((aligned (128)));
760 --float event_end_time [MAX_THREADS] __attribute__ ((aligned (128)));
761 --float event_log [MAX_THREADS][MAX_EVENTS] __attribute__ ((aligned (128)));
762 -+float event_start_time_float [MAX_THREADS] __attribute__ ((aligned (128)));
763 -+float event_end_time_float [MAX_THREADS] __attribute__ ((aligned (128)));
764 -+float event_log_float [MAX_THREADS][MAX_EVENTS] __attribute__ ((aligned (128)));
765 - int log_events = 1;
766 -
767 - #define core_cpu_event_start(my_core_id) \
768 -- event_start_time[my_core_id] = get_current_cpu_time(); \
769 -+ event_start_time_float[my_core_id] = get_current_cpu_time_float(); \
770 -
771 - #define core_cpu_event_end(my_core_id) \
772 -- event_end_time[my_core_id] = get_current_cpu_time(); \
773 -+ event_end_time_float[my_core_id] = get_current_cpu_time_float(); \
774 -
775 - #define core_gpu_event_start(my_core_id, e1, e2) \
776 - cudaEventElapsedTime(&ctime, e1, e2); \
777 -- event_start_time[my_core_id] = ctime/1000.+dtime; \
778 -+ event_start_time_float[my_core_id] = ctime/1000.+dtime; \
779 -
780 - #define core_gpu_event_end(my_core_id, e1, e2) \
781 - cudaEventElapsedTime(&ctime, e1, e2); \
782 -- event_end_time[my_core_id] = ctime/1000.+dtime; \
783 -+ event_end_time_float[my_core_id] = ctime/1000.+dtime; \
784 -
785 - #define core_log_event(event, my_core_id) \
786 -- event_log[my_core_id][event_num[my_core_id]+0] = my_core_id;\
787 -- event_log[my_core_id][event_num[my_core_id]+1] = event_start_time[my_core_id];\
788 -- event_log[my_core_id][event_num[my_core_id]+2] = event_end_time[my_core_id];\
789 -- event_log[my_core_id][event_num[my_core_id]+3] = (event);\
790 -+ event_log_float[my_core_id][event_num[my_core_id]+0] = my_core_id;\
791 -+ event_log_float[my_core_id][event_num[my_core_id]+1] = event_start_time_float[my_core_id];\
792 -+ event_log_float[my_core_id][event_num[my_core_id]+2] = event_end_time_float[my_core_id];\
793 -+ event_log_float[my_core_id][event_num[my_core_id]+3] = (event);\
794 - event_num[my_core_id] += (log_events << 2); \
795 - event_num[my_core_id] &= (MAX_EVENTS-1);
796 -
797 --void dump_trace(int cores_num)
798 -+void dump_trace_float(int cores_num)
799 - {
800 - char trace_file_name[32];
801 - FILE *trace_file;
802 -@@ -82,13 +82,13 @@
803 - for (core = 0; core < cores_num; core++)
804 - for (event = 0; event < event_num[core]; event += 4)
805 - {
806 -- int thread = event_log[core][event+0];
807 -- float start = event_log[core][event+1];
808 -- float end = event_log[core][event+2];
809 -- int color = event_log[core][event+3];
810 -+ int thread = event_log_float[core][event+0];
811 -+ float start = event_log_float[core][event+1];
812 -+ float end = event_log_float[core][event+2];
813 -+ int color = event_log_float[core][event+3];
814 -
815 -- start -= event_log[core][2];
816 -- end -= event_log[core][2];
817 -+ start -= event_log_float[core][2];
818 -+ end -= event_log_float[core][2];
819 - /*
820 - fprintf(trace_file,
821 - " "
822 -@@ -279,7 +279,7 @@
823 - core_cpu_event_end(num_gpus);
824 - core_log_event(0x666666, num_gpus);
825 -
826 -- dtime = get_current_cpu_time();
827 -+ dtime = get_current_cpu_time_float();
828 -
829 - for(j=0; j<num_gpus; j++){
830 - magma_setdevice(j);
831 -@@ -536,7 +536,7 @@
832 - }
833 -
834 - magma_setdevice(cdevice);
835 -- dump_trace(num_gpus+1);
836 -+ dump_trace_float(num_gpus+1);
837 -
838 - return *info;
839 - } /* magma_sgeqrf2_mgpu */
840 -diff -Nur src.orig/slaex0_m.cpp src/slaex0_m.cpp
841 ---- src.orig/slaex0_m.cpp 2012-06-23 21:52:09.000000000 +0100
842 -+++ src/slaex0_m.cpp 2012-06-25 06:59:15.000000000 +0100
843 -@@ -23,10 +23,7 @@
844 -
845 - int magma_get_slaex3_m_nb();
846 -
847 -- magma_int_t get_slaex0_smlsize()
848 -- {
849 -- return 25;
850 -- }
851 -+ magma_int_t get_slaex0_smlsize();
852 - }
853 -
854 - extern "C" magma_int_t
855 -diff -Nur src.orig/slaex3_m.cpp src/slaex3_m.cpp
856 ---- src.orig/slaex3_m.cpp 2012-06-23 21:52:09.000000000 +0100
857 -+++ src/slaex3_m.cpp 2012-06-25 06:59:15.000000000 +0100
858 -@@ -36,7 +36,7 @@
859 - }
860 -
861 - extern"C"{
862 -- void dvrange(magma_int_t k, float *d, magma_int_t *il, magma_int_t *iu, float vl, float vu)
863 -+ void sdvrange(magma_int_t k, float *d, magma_int_t *il, magma_int_t *iu, float vl, float vu)
864 - {
865 - magma_int_t i;
866 -
867 -@@ -53,7 +53,7 @@
868 - return;
869 - }
870 -
871 -- void dirange(magma_int_t k, magma_int_t* indxq, magma_int_t *iil, magma_int_t *iiu, magma_int_t il, magma_int_t iu)
872 -+ void sdirange(magma_int_t k, magma_int_t* indxq, magma_int_t *iil, magma_int_t *iiu, magma_int_t il, magma_int_t iu)
873 - {
874 - magma_int_t i;
875 -
876 -@@ -364,9 +364,9 @@
877 -
878 - //compute the lower and upper bound of the non-deflated eigenvectors
879 - if (valeig)
880 -- dvrange(k, d, &iil, &iiu, vl, vu);
881 -+ sdvrange(k, d, &iil, &iiu, vl, vu);
882 - else if (indeig)
883 -- dirange(k, indxq, &iil, &iiu, il, iu);
884 -+ sdirange(k, indxq, &iil, &iiu, il, iu);
885 - else {
886 - iil = 1;
887 - iiu = k;
888 -diff -Nur src.orig/sstedx_m.cpp src/sstedx_m.cpp
889 ---- src.orig/sstedx_m.cpp 2012-06-23 21:52:09.000000000 +0100
890 -+++ src/sstedx_m.cpp 2012-06-25 06:59:15.000000000 +0100
891 -@@ -26,10 +26,7 @@
892 -
893 - float lapackf77_slanst( char* norm, magma_int_t* n, float* d, float* e);
894 -
895 -- magma_int_t get_sstedx_smlsize()
896 -- {
897 -- return 25;
898 -- }
899 -+ magma_int_t get_sstedx_smlsize();
900 - }
901 -
902 - extern "C" magma_int_t
903 -diff -Nur src.orig/zgeqrf_mgpu-trace.cpp src/zgeqrf_mgpu-trace.cpp
904 ---- src.orig/zgeqrf_mgpu-trace.cpp 2012-06-23 21:52:09.000000000 +0100
905 -+++ src/zgeqrf_mgpu-trace.cpp 2012-06-25 17:20:18.000000000 +0100
906 -@@ -16,108 +16,42 @@
907 - #include <sys/time.h>
908 - #include <assert.h>
909 -
910 --double get_current_cpu_time(void)
911 --{
912 -- struct timeval time_val;
913 --
914 -- gettimeofday(&time_val, NULL);
915 --
916 -- return (double)(time_val.tv_sec) + (double)(time_val.tv_usec) / 1000000.0;
917 --}
918 -+extern double get_current_cpu_time_double(void);
919 -
920 - #define MAX_THREADS 5
921 -
922 - //#define MAX_EVENTS 163840
923 - #define MAX_EVENTS 1048576
924 -
925 --int event_num [MAX_THREADS] __attribute__ ((aligned (128)));
926 --double event_start_time [MAX_THREADS] __attribute__ ((aligned (128)));
927 --double event_end_time [MAX_THREADS] __attribute__ ((aligned (128)));
928 --double event_log [MAX_THREADS][MAX_EVENTS] __attribute__ ((aligned (128)));
929 --int log_events = 1;
930 -+extern int event_num [MAX_THREADS] __attribute__ ((aligned (128)));
931 -+extern double event_start_time_double [MAX_THREADS] __attribute__ ((aligned (128)));
932 -+extern double event_end_time_double [MAX_THREADS] __attribute__ ((aligned (128)));
933 -+extern double event_log_double [MAX_THREADS][MAX_EVENTS] __attribute__ ((aligned (128)));
934 -+extern int log_events;
935 -
936 - #define core_cpu_event_start(my_core_id) \
937 -- event_start_time[my_core_id] = get_current_cpu_time(); \
938 -+ event_start_time_double[my_core_id] = get_current_cpu_time_double(); \
939 -
940 - #define core_cpu_event_end(my_core_id) \
941 -- event_end_time[my_core_id] = get_current_cpu_time(); \
942 -+ event_end_time_double[my_core_id] = get_current_cpu_time_double(); \
943 -
944 - #define core_gpu_event_start(my_core_id, e1, e2) \
945 - cudaEventElapsedTime(&ctime, e1, e2); \
946 -- event_start_time[my_core_id] = ctime/1000.+dtime; \
947 -+ event_start_time_double[my_core_id] = ctime/1000.+dtime; \
948 -
949 - #define core_gpu_event_end(my_core_id, e1, e2) \
950 - cudaEventElapsedTime(&ctime, e1, e2); \
951 -- event_end_time[my_core_id] = ctime/1000.+dtime; \
952 -+ event_end_time_double[my_core_id] = ctime/1000.+dtime; \
953 -
954 - #define core_log_event(event, my_core_id) \
955 -- event_log[my_core_id][event_num[my_core_id]+0] = my_core_id;\
956 -- event_log[my_core_id][event_num[my_core_id]+1] = event_start_time[my_core_id];\
957 -- event_log[my_core_id][event_num[my_core_id]+2] = event_end_time[my_core_id];\
958 -- event_log[my_core_id][event_num[my_core_id]+3] = (event);\
959 -+ event_log_double[my_core_id][event_num[my_core_id]+0] = my_core_id;\
960 -+ event_log_double[my_core_id][event_num[my_core_id]+1] = event_start_time_double[my_core_id];\
961 -+ event_log_double[my_core_id][event_num[my_core_id]+2] = event_end_time_double[my_core_id];\
962 -+ event_log_double[my_core_id][event_num[my_core_id]+3] = (event);\
963 - event_num[my_core_id] += (log_events << 2); \
964 - event_num[my_core_id] &= (MAX_EVENTS-1);
965 -
966 --void dump_trace(int cores_num)
967 --{
968 -- char trace_file_name[32];
969 -- FILE *trace_file;
970 -- int event;
971 -- int core;
972 --
973 -- double scale = 100000.0;
974 -- double large = 100.0;
975 --
976 -- sprintf(trace_file_name, "trace.svg");
977 -- trace_file = fopen(trace_file_name, "w");
978 -- assert(trace_file != NULL);
979 --
980 -- fprintf(trace_file,
981 -- "<?xml version=\"1.0\" standalone=\"no\"?>"
982 -- "<svg version=\"1.1\" baseProfile=\"full\" xmlns=\"http://www.w3.org/2000/svg\" "
983 -- "xmlns:xlink=\"http://www.w3.org/1999/xlink\" xmlns:ev=\"http://www.w3.org/2001/xml-events\" "
984 -- ">\n"
985 -- " <g font-size=\"20\">\n");
986 --
987 -- for (core = 0; core < cores_num; core++)
988 -- for (event = 0; event < event_num[core]; event += 4)
989 -- {
990 -- int thread = event_log[core][event+0];
991 -- double start = event_log[core][event+1];
992 -- double end = event_log[core][event+2];
993 -- int color = event_log[core][event+3];
994 --
995 -- start -= event_log[core][2];
996 -- end -= event_log[core][2];
997 -- /*
998 -- fprintf(trace_file,
999 -- " "
1000 -- "<rect x=\"%.2lf\" y=\"%.0lf\" width=\"%.2lf\" height=\"%.0lf\" "
1001 -- "fill=\"#%06x\" stroke=\"#000000\" stroke-width=\"1\"/>\n",
1002 -- start * scale,
1003 -- thread * 100.0,
1004 -- (end - start) * scale,
1005 -- 90.0,
1006 -- color);
1007 -- */
1008 -- fprintf(trace_file,
1009 -- " "
1010 -- "<rect x=\"%.2lf\" y=\"%.0lf\" width=\"%.2lf\" height=\"%.0lf\" "
1011 -- // "fill=\"#%06x\" />\n",
1012 -- "fill=\"#%06x\" stroke=\"#000000\" stroke-width=\"1\"/>\n",
1013 -- start * scale,
1014 -- thread * (large+20.0),
1015 -- (end - start) * scale,
1016 -- large,
1017 -- color);
1018 -- }
1019 --
1020 -- fprintf(trace_file,
1021 -- " </g>\n"
1022 -- "</svg>\n");
1023 --
1024 -- fclose(trace_file);
1025 --}
1026 -+extern void dump_trace_double(int cores_num);
1027 -
1028 - //===========================================================================
1029 -
1030 -@@ -279,7 +213,7 @@
1031 - core_cpu_event_end(num_gpus);
1032 - core_log_event(0x666666, num_gpus);
1033 -
1034 -- dtime = get_current_cpu_time();
1035 -+ dtime = get_current_cpu_time_double();
1036 -
1037 - for(j=0; j<num_gpus; j++){
1038 - magma_setdevice(j);
1039 -@@ -536,7 +470,7 @@
1040 - }
1041 -
1042 - magma_setdevice(cdevice);
1043 -- dump_trace(num_gpus+1);
1044 -+ dump_trace_double(num_gpus+1);
1045 -
1046 - return *info;
1047 - } /* magma_zgeqrf2_mgpu */
1048 -diff -Nur src.orig/zheevr_gpu.cpp src/zheevr_gpu.cpp
1049 ---- src.orig/zheevr_gpu.cpp 2012-06-23 21:52:09.000000000 +0100
1050 -+++ src/zheevr_gpu.cpp 2012-06-25 06:59:15.000000000 +0100
1051 -@@ -12,36 +12,6 @@
1052 - */
1053 - #include "common_magma.h"
1054 -
1055 --/* These interfaces are used for TAU profiling */
1056 --extern "C" {
1057 -- void Mylapackf77_zstemr(const char *jobz, const char *range, magma_int_t *n, double *d, double *e,
1058 -- double *vl, double *vu, magma_int_t *il, magma_int_t *iu,
1059 -- magma_int_t *m, double *w, cuDoubleComplex *z, magma_int_t *ldz,
1060 -- magma_int_t *nzc, magma_int_t *isuppz, magma_int_t *tryrac,
1061 -- double *work, magma_int_t *lwork, magma_int_t *iwork,
1062 -- magma_int_t *liwork, magma_int_t *info)
1063 -- {
1064 -- lapackf77_zstemr(jobz, range, n, d, e, vl, vu, il, iu, m, w, z, ldz, nzc,
1065 -- isuppz, tryrac, work, lwork, iwork, liwork, info);
1066 -- }
1067 --
1068 -- void Mylapackf77_zstein(int *n, double *d, double *e, int *m, double *w, int *iblock,
1069 -- int *isplit, cuDoubleComplex *z, int *ldz, double *work,
1070 -- int *iwork, int *ifail, int *info)
1071 -- {
1072 -- lapackf77_zstein(n, d, e, m, w, iblock, isplit, z, ldz, work, iwork, ifail, info);
1073 -- }
1074 --
1075 -- void Mylapackf77_dstebz(const char *range, const char *order, int *n, double *vl,
1076 -- double *vu, int *il, int *iu, double *abstol,
1077 -- double *d, double *e, int *m, int *nsplit, double *w,
1078 -- int *iblock, int *isplit, double *work, int *iwork, int *info)
1079 -- {
1080 -- lapackf77_dstebz(range, order, n, vl, vu, il, iu, abstol, d, e, m,
1081 -- nsplit, w, iblock, isplit, work, iwork,info);
1082 -- }
1083 --}
1084 --
1085 - extern "C" {
1086 - #ifdef ADD_
1087 - # define lapackf77_ieeeck ieeeck_
1088 -@@ -497,7 +467,7 @@
1089 - else
1090 - tryrac=0;
1091 -
1092 -- Mylapackf77_zstemr(jobz_, range_, &n, &rwork[indrdd], &rwork[indree], &vl, &vu, &il,
1093 -+ lapackf77_zstemr(jobz_, range_, &n, &rwork[indrdd], &rwork[indree], &vl, &vu, &il,
1094 - &iu, m, &w[1], wz, &ldwz, &n, &isuppz[1], &tryrac, &rwork[indrwk],
1095 - &llrwork, &iwork[1], &liwork, info);
1096 -
1097 -@@ -514,10 +484,10 @@
1098 - printf("B/I\n");
1099 - *info = 0;
1100 -
1101 -- Mylapackf77_dstebz(range_, "B", &n, &vl, &vu, &il, &iu, &abstol, &rwork[indrd], &rwork[indre], m,
1102 -+ lapackf77_dstebz(range_, "B", &n, &vl, &vu, &il, &iu, &abstol, &rwork[indrd], &rwork[indre], m,
1103 - &nsplit, &w[1], &iwork[indibl], &iwork[indisp], &rwork[indrwk], &iwork[indiwo], info);
1104 -
1105 -- Mylapackf77_zstein(&n, &rwork[indrd], &rwork[indre], m, &w[1], &iwork[indibl], &iwork[indisp],
1106 -+ lapackf77_zstein(&n, &rwork[indrd], &rwork[indre], m, &w[1], &iwork[indibl], &iwork[indisp],
1107 - wz, &ldwz, &rwork[indrwk], &iwork[indiwo], &iwork[indifl], info);
1108 -
1109 - /* Apply unitary matrix used in reduction to tridiagonal
1110 -diff -Nur src.orig/zheevx_gpu.cpp src/zheevx_gpu.cpp
1111 ---- src.orig/zheevx_gpu.cpp 2012-06-23 21:52:09.000000000 +0100
1112 -+++ src/zheevx_gpu.cpp 2012-06-25 06:59:15.000000000 +0100
1113 -@@ -12,27 +12,6 @@
1114 - */
1115 - #include "common_magma.h"
1116 -
1117 --/* These interfaces are used for TAU profiling */
1118 --extern"C"{
1119 -- void Mylapackf77_zstein(magma_int_t *n, double *d, double *e, magma_int_t *m,
1120 -- double *w, magma_int_t *iblock, magma_int_t *isplit,
1121 -- cuDoubleComplex *z, magma_int_t *ldz, double *work,
1122 -- magma_int_t *iwork, magma_int_t *ifail, magma_int_t *info)
1123 -- {
1124 -- lapackf77_zstein(n, d, e, m, w, iblock, isplit, z, ldz, work, iwork, ifail, info);
1125 -- }
1126 --
1127 -- void Mylapackf77_dstebz(char *range, char *order, magma_int_t *n, double *vl,
1128 -- double *vu, magma_int_t *il, magma_int_t *iu, double *abstol,
1129 -- double *d, double *e, magma_int_t *m, magma_int_t *nsplit,
1130 -- double *w, magma_int_t *iblock, magma_int_t *isplit,
1131 -- double *work, magma_int_t *iwork, magma_int_t *info)
1132 -- {
1133 -- lapackf77_dstebz(range, order, n, vl, vu, il, iu, abstol,
1134 -- d, e, m, nsplit, w, iblock, isplit, work, iwork,info);
1135 -- }
1136 --}
1137 --
1138 - extern "C" magma_int_t
1139 - magma_zheevx_gpu(char jobz, char range, char uplo, magma_int_t n,
1140 - cuDoubleComplex *da, magma_int_t ldda, double vl, double vu,
1141 -@@ -404,12 +383,12 @@
1142 - indisp = indibl + n;
1143 - indiwk = indisp + n;
1144 -
1145 -- Mylapackf77_dstebz(range_, order, &n, &vl, &vu, &il, &iu, &abstol, &rwork[indd], &rwork[inde], m,
1146 -+ lapackf77_dstebz(range_, order, &n, &vl, &vu, &il, &iu, &abstol, &rwork[indd], &rwork[inde], m,
1147 - &nsplit, &w[1], &iwork[indibl], &iwork[indisp], &rwork[indrwk], &iwork[indiwk], info);
1148 -
1149 - if (wantz) {
1150 -
1151 -- Mylapackf77_zstein(&n, &rwork[indd], &rwork[inde], m, &w[1], &iwork[indibl], &iwork[indisp],
1152 -+ lapackf77_zstein(&n, &rwork[indd], &rwork[inde], m, &w[1], &iwork[indibl], &iwork[indisp],
1153 - wz, &ldwz, &rwork[indrwk], &iwork[indiwk], &ifail[1], info);
1154 -
1155 - magma_zsetmatrix( n, *m, wz, ldwz, dz, lddz );
1156 -diff -Nur src.orig/zhegvd.cpp src/zhegvd.cpp
1157 ---- src.orig/zhegvd.cpp 2012-06-23 21:52:09.000000000 +0100
1158 -+++ src/zhegvd.cpp 2012-06-25 06:59:15.000000000 +0100
1159 -@@ -13,26 +13,6 @@
1160 - */
1161 - #include "common_magma.h"
1162 -
1163 --/* This ztrmm interface is used for TAU profiling */
1164 --void Mymagma_ztrmm(char side, char uplo, char trans, char unit,
1165 -- magma_int_t n, magma_int_t m,
1166 -- cuDoubleComplex alpha, cuDoubleComplex *db, magma_int_t lddb,
1167 -- cuDoubleComplex *dz, magma_int_t lddz)
1168 --{
1169 -- magma_ztrmm(side, uplo, trans, unit, n, m, alpha, db, lddb, dz, lddz);
1170 -- magma_device_sync();
1171 --}
1172 --
1173 --/* This ztrsm interface is used for TAU profiling */
1174 --void Mymagma_ztrsm(char side, char uplo, char trans, char unit,
1175 -- magma_int_t n, magma_int_t m,
1176 -- cuDoubleComplex alpha, cuDoubleComplex *db, magma_int_t lddb,
1177 -- cuDoubleComplex *dz, magma_int_t lddz)
1178 --{
1179 -- magma_ztrsm(side, uplo, trans, unit, n, m, alpha, db, lddb, dz, lddz);
1180 -- magma_device_sync();
1181 --}
1182 --
1183 - extern "C" magma_int_t
1184 - magma_zhegvd(magma_int_t itype, char jobz, char uplo, magma_int_t n,
1185 - cuDoubleComplex *a, magma_int_t lda, cuDoubleComplex *b, magma_int_t ldb,
1186 -@@ -324,7 +304,7 @@
1187 - *(unsigned char *)trans = MagmaNoTrans;
1188 - }
1189 -
1190 -- Mymagma_ztrsm(MagmaLeft, uplo_[0], *trans, MagmaNonUnit,
1191 -+ magma_ztrsm(MagmaLeft, uplo_[0], *trans, MagmaNonUnit,
1192 - n, n, c_one, db, lddb, da, ldda);
1193 -
1194 - } else if (itype == 3)
1195 -@@ -337,7 +317,7 @@
1196 - *(unsigned char *)trans = MagmaConjTrans;
1197 - }
1198 -
1199 -- Mymagma_ztrmm(MagmaLeft, uplo_[0], *trans, MagmaNonUnit,
1200 -+ magma_ztrmm(MagmaLeft, uplo_[0], *trans, MagmaNonUnit,
1201 - n, n, c_one, db, lddb, da, ldda);
1202 - }
1203 -
1204 -diff -Nur src.orig/zhegvdx.cpp src/zhegvdx.cpp
1205 ---- src.orig/zhegvdx.cpp 2012-06-23 21:52:09.000000000 +0100
1206 -+++ src/zhegvdx.cpp 2012-06-25 06:59:15.000000000 +0100
1207 -@@ -12,22 +12,6 @@
1208 - */
1209 - #include "common_magma.h"
1210 -
1211 --void Mymagma_ztrmm(char side, char uplo, char trans, char unit, magma_int_t n, magma_int_t m,
1212 -- cuDoubleComplex alpha, cuDoubleComplex *db, magma_int_t lddb,
1213 -- cuDoubleComplex *dz, magma_int_t lddz)
1214 --{
1215 -- magma_ztrmm(side, uplo, trans, unit, n, m, alpha, db, lddb, dz, lddz);
1216 -- magma_device_sync();
1217 --}
1218 --
1219 --void Mymagma_ztrsm(char side, char uplo, char trans, char unit, magma_int_t n, magma_int_t m,
1220 -- cuDoubleComplex alpha, cuDoubleComplex *db, magma_int_t lddb,
1221 -- cuDoubleComplex *dz, magma_int_t lddz)
1222 --{
1223 -- magma_ztrsm(side, uplo, trans, unit, n, m, alpha, db, lddb, dz, lddz);
1224 -- magma_device_sync();
1225 --}
1226 --
1227 - extern "C" magma_int_t
1228 - magma_zhegvdx(magma_int_t itype, char jobz, char range, char uplo, magma_int_t n,
1229 - cuDoubleComplex *a, magma_int_t lda, cuDoubleComplex *b, magma_int_t ldb,
1230 -@@ -363,7 +347,7 @@
1231 - *(unsigned char *)trans = MagmaNoTrans;
1232 - }
1233 -
1234 -- Mymagma_ztrsm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, db, lddb, da, ldda);
1235 -+ magma_ztrsm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, db, lddb, da, ldda);
1236 -
1237 - } else if (itype == 3) {
1238 -
1239 -@@ -375,7 +359,7 @@
1240 - *(unsigned char *)trans = MagmaConjTrans;
1241 - }
1242 -
1243 -- Mymagma_ztrmm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, db, lddb, da, ldda);
1244 -+ magma_ztrmm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, db, lddb, da, ldda);
1245 -
1246 - }
1247 -
1248 -diff -Nur src.orig/zhegvr.cpp src/zhegvr.cpp
1249 ---- src.orig/zhegvr.cpp 2012-06-23 21:52:09.000000000 +0100
1250 -+++ src/zhegvr.cpp 2012-06-25 06:59:15.000000000 +0100
1251 -@@ -12,24 +12,6 @@
1252 - */
1253 - #include "common_magma.h"
1254 -
1255 --void Mymagma_ztrmm(char side, char uplo, char trans, char unit,
1256 -- magma_int_t n, magma_int_t m,
1257 -- cuDoubleComplex alpha, cuDoubleComplex *db, magma_int_t lddb,
1258 -- cuDoubleComplex *dz, magma_int_t lddz)
1259 --{
1260 -- magma_ztrmm(side, uplo, trans, unit, n, m, alpha, db, lddb, dz, lddz);
1261 -- magma_device_sync();
1262 --}
1263 --
1264 --void Mymagma_ztrsm(char side, char uplo, char trans, char unit,
1265 -- magma_int_t n, magma_int_t m,
1266 -- cuDoubleComplex alpha, cuDoubleComplex *db, magma_int_t lddb,
1267 -- cuDoubleComplex *dz, magma_int_t lddz)
1268 --{
1269 -- magma_ztrsm(side, uplo, trans, unit, n, m, alpha, db, lddb, dz, lddz);
1270 -- magma_device_sync();
1271 --}
1272 --
1273 - extern "C" magma_int_t
1274 - magma_zhegvr(magma_int_t itype, char jobz, char range, char uplo, magma_int_t n,
1275 - cuDoubleComplex *a, magma_int_t lda, cuDoubleComplex *b, magma_int_t ldb,
1276 -@@ -408,7 +390,7 @@
1277 - *(unsigned char *)trans = MagmaNoTrans;
1278 - }
1279 -
1280 -- Mymagma_ztrsm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one,
1281 -+ magma_ztrsm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one,
1282 - db, lddb, dz, lddz);
1283 -
1284 - } else if (itype == 3) {
1285 -@@ -421,7 +403,7 @@
1286 - *(unsigned char *)trans = MagmaConjTrans;
1287 - }
1288 -
1289 -- Mymagma_ztrmm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one,
1290 -+ magma_ztrmm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one,
1291 - db, lddb, dz, lddz);
1292 - }
1293 -
1294 -diff -Nur src.orig/zhegvx.cpp src/zhegvx.cpp
1295 ---- src.orig/zhegvx.cpp 2012-06-23 21:52:09.000000000 +0100
1296 -+++ src/zhegvx.cpp 2012-06-25 06:59:15.000000000 +0100
1297 -@@ -12,22 +12,6 @@
1298 - */
1299 - #include "common_magma.h"
1300 -
1301 --void Mymagma_ztrmm(char side, char uplo, char trans, char unit, magma_int_t n, magma_int_t m,
1302 -- cuDoubleComplex alpha, cuDoubleComplex *db, magma_int_t lddb,
1303 -- cuDoubleComplex *dz, magma_int_t lddz)
1304 --{
1305 -- magma_ztrmm(side, uplo, trans, unit, n, m, alpha, db, lddb, dz, lddz);
1306 -- magma_device_sync();
1307 --}
1308 --
1309 --void Mymagma_ztrsm(char side, char uplo, char trans, char unit, magma_int_t n, magma_int_t m,
1310 -- cuDoubleComplex alpha, cuDoubleComplex *db, magma_int_t lddb,
1311 -- cuDoubleComplex *dz, magma_int_t lddz)
1312 --{
1313 -- magma_ztrsm(side, uplo, trans, unit, n, m, alpha, db, lddb, dz, lddz);
1314 -- magma_device_sync();
1315 --}
1316 --
1317 - extern "C" magma_int_t
1318 - magma_zhegvx(magma_int_t itype, char jobz, char range, char uplo, magma_int_t n,
1319 - cuDoubleComplex *a, magma_int_t lda, cuDoubleComplex *b, magma_int_t ldb,
1320 -@@ -330,7 +314,7 @@
1321 - *(unsigned char *)trans = MagmaNoTrans;
1322 - }
1323 -
1324 -- Mymagma_ztrsm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, db, lddb, dz, lddz);
1325 -+ magma_ztrsm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, db, lddb, dz, lddz);
1326 -
1327 - } else if (itype == 3) {
1328 -
1329 -@@ -343,7 +327,7 @@
1330 - *(unsigned char *)trans = MagmaConjTrans;
1331 - }
1332 -
1333 -- Mymagma_ztrmm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, db, lddb, dz, lddz);
1334 -+ magma_ztrmm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, db, lddb, dz, lddz);
1335 -
1336 - }
1337 -
1338 -diff -Nur src.orig/zstedx_m.cpp src/zstedx_m.cpp
1339 ---- src.orig/zstedx_m.cpp 2012-06-23 21:52:09.000000000 +0100
1340 -+++ src/zstedx_m.cpp 2012-06-25 06:59:15.000000000 +0100
1341 -@@ -16,10 +16,7 @@
1342 - double* work, magma_int_t lwork, magma_int_t* iwork, magma_int_t liwork,
1343 - magma_int_t* info);
1344 -
1345 -- magma_int_t get_zstedx_smlsize()
1346 -- {
1347 -- return 25;
1348 -- }
1349 -+ magma_int_t get_zstedx_smlsize();
1350 - }
1351 -
1352 - extern "C" magma_int_t
1353
1354 diff --git a/sci-libs/magma/files/magma-1.2.1-duplicate-symbols.patch b/sci-libs/magma/files/magma-1.2.1-duplicate-symbols.patch
1355 new file mode 100644
1356 index 0000000..8fd2d0d
1357 --- /dev/null
1358 +++ b/sci-libs/magma/files/magma-1.2.1-duplicate-symbols.patch
1359 @@ -0,0 +1,612 @@
1360 +diff -Nur magma-1.2.1.orig/src/cheevx_gpu.cpp magma-1.2.1/src/cheevx_gpu.cpp
1361 +--- magma-1.2.1.orig/src/cheevx_gpu.cpp 2012-07-03 11:59:28.923659921 -0700
1362 ++++ magma-1.2.1/src/cheevx_gpu.cpp 2012-07-03 11:59:45.428746349 -0700
1363 +@@ -12,27 +12,6 @@
1364 + */
1365 + #include "common_magma.h"
1366 +
1367 +-/* These interfaces are used for TAU profiling */
1368 +-extern"C"{
1369 +- void Mylapackf77_cstein(magma_int_t *n, float *d, float *e, magma_int_t *m,
1370 +- float *w, magma_int_t *iblock, magma_int_t *isplit,
1371 +- cuFloatComplex *z, magma_int_t *ldz, float *work,
1372 +- magma_int_t *iwork, magma_int_t *ifail, magma_int_t *info)
1373 +- {
1374 +- lapackf77_cstein(n, d, e, m, w, iblock, isplit, z, ldz, work, iwork, ifail, info);
1375 +- }
1376 +-
1377 +- void Mylapackf77_sstebz(char *range, char *order, magma_int_t *n, float *vl,
1378 +- float *vu, magma_int_t *il, magma_int_t *iu, float *abstol,
1379 +- float *d, float *e, magma_int_t *m, magma_int_t *nsplit,
1380 +- float *w, magma_int_t *iblock, magma_int_t *isplit,
1381 +- float *work, magma_int_t *iwork, magma_int_t *info)
1382 +- {
1383 +- lapackf77_sstebz(range, order, n, vl, vu, il, iu, abstol,
1384 +- d, e, m, nsplit, w, iblock, isplit, work, iwork,info);
1385 +- }
1386 +-}
1387 +-
1388 + extern "C" magma_int_t
1389 + magma_cheevx_gpu(char jobz, char range, char uplo, magma_int_t n,
1390 + cuFloatComplex *da, magma_int_t ldda, float vl, float vu,
1391 +@@ -404,12 +383,12 @@
1392 + indisp = indibl + n;
1393 + indiwk = indisp + n;
1394 +
1395 +- Mylapackf77_sstebz(range_, order, &n, &vl, &vu, &il, &iu, &abstol, &rwork[indd], &rwork[inde], m,
1396 ++ lapackf77_sstebz(range_, order, &n, &vl, &vu, &il, &iu, &abstol, &rwork[indd], &rwork[inde], m,
1397 + &nsplit, &w[1], &iwork[indibl], &iwork[indisp], &rwork[indrwk], &iwork[indiwk], info);
1398 +
1399 + if (wantz) {
1400 +
1401 +- Mylapackf77_cstein(&n, &rwork[indd], &rwork[inde], m, &w[1], &iwork[indibl], &iwork[indisp],
1402 ++ lapackf77_cstein(&n, &rwork[indd], &rwork[inde], m, &w[1], &iwork[indibl], &iwork[indisp],
1403 + wz, &ldwz, &rwork[indrwk], &iwork[indiwk], &ifail[1], info);
1404 +
1405 + magma_csetmatrix( n, *m, wz, ldwz, dz, lddz );
1406 +diff -Nur magma-1.2.1.orig/src/chegvd.cpp magma-1.2.1/src/chegvd.cpp
1407 +--- magma-1.2.1.orig/src/chegvd.cpp 2012-07-03 11:59:28.932659966 -0700
1408 ++++ magma-1.2.1/src/chegvd.cpp 2012-07-03 11:59:45.428746349 -0700
1409 +@@ -13,26 +13,6 @@
1410 + */
1411 + #include "common_magma.h"
1412 +
1413 +-/* This ctrmm interface is used for TAU profiling */
1414 +-void Mymagma_ctrmm(char side, char uplo, char trans, char unit,
1415 +- magma_int_t n, magma_int_t m,
1416 +- cuFloatComplex alpha, cuFloatComplex *db, magma_int_t lddb,
1417 +- cuFloatComplex *dz, magma_int_t lddz)
1418 +-{
1419 +- magma_ctrmm(side, uplo, trans, unit, n, m, alpha, db, lddb, dz, lddz);
1420 +- magma_device_sync();
1421 +-}
1422 +-
1423 +-/* This ctrsm interface is used for TAU profiling */
1424 +-void Mymagma_ctrsm(char side, char uplo, char trans, char unit,
1425 +- magma_int_t n, magma_int_t m,
1426 +- cuFloatComplex alpha, cuFloatComplex *db, magma_int_t lddb,
1427 +- cuFloatComplex *dz, magma_int_t lddz)
1428 +-{
1429 +- magma_ctrsm(side, uplo, trans, unit, n, m, alpha, db, lddb, dz, lddz);
1430 +- magma_device_sync();
1431 +-}
1432 +-
1433 + extern "C" magma_int_t
1434 + magma_chegvd(magma_int_t itype, char jobz, char uplo, magma_int_t n,
1435 + cuFloatComplex *a, magma_int_t lda, cuFloatComplex *b, magma_int_t ldb,
1436 +@@ -324,7 +304,7 @@
1437 + *(unsigned char *)trans = MagmaNoTrans;
1438 + }
1439 +
1440 +- Mymagma_ctrsm(MagmaLeft, uplo_[0], *trans, MagmaNonUnit,
1441 ++ magma_ctrsm(MagmaLeft, uplo_[0], *trans, MagmaNonUnit,
1442 + n, n, c_one, db, lddb, da, ldda);
1443 +
1444 + } else if (itype == 3)
1445 +@@ -337,7 +317,7 @@
1446 + *(unsigned char *)trans = MagmaConjTrans;
1447 + }
1448 +
1449 +- Mymagma_ctrmm(MagmaLeft, uplo_[0], *trans, MagmaNonUnit,
1450 ++ magma_ctrmm(MagmaLeft, uplo_[0], *trans, MagmaNonUnit,
1451 + n, n, c_one, db, lddb, da, ldda);
1452 + }
1453 +
1454 +diff -Nur magma-1.2.1.orig/src/chegvdx.cpp magma-1.2.1/src/chegvdx.cpp
1455 +--- magma-1.2.1.orig/src/chegvdx.cpp 2012-07-03 11:59:28.921659909 -0700
1456 ++++ magma-1.2.1/src/chegvdx.cpp 2012-07-03 11:59:45.428746349 -0700
1457 +@@ -12,22 +12,6 @@
1458 + */
1459 + #include "common_magma.h"
1460 +
1461 +-void Mymagma_ctrmm(char side, char uplo, char trans, char unit, magma_int_t n, magma_int_t m,
1462 +- cuFloatComplex alpha, cuFloatComplex *db, magma_int_t lddb,
1463 +- cuFloatComplex *dz, magma_int_t lddz)
1464 +-{
1465 +- magma_ctrmm(side, uplo, trans, unit, n, m, alpha, db, lddb, dz, lddz);
1466 +- magma_device_sync();
1467 +-}
1468 +-
1469 +-void Mymagma_ctrsm(char side, char uplo, char trans, char unit, magma_int_t n, magma_int_t m,
1470 +- cuFloatComplex alpha, cuFloatComplex *db, magma_int_t lddb,
1471 +- cuFloatComplex *dz, magma_int_t lddz)
1472 +-{
1473 +- magma_ctrsm(side, uplo, trans, unit, n, m, alpha, db, lddb, dz, lddz);
1474 +- magma_device_sync();
1475 +-}
1476 +-
1477 + extern "C" magma_int_t
1478 + magma_chegvdx(magma_int_t itype, char jobz, char range, char uplo, magma_int_t n,
1479 + cuFloatComplex *a, magma_int_t lda, cuFloatComplex *b, magma_int_t ldb,
1480 +@@ -363,7 +347,7 @@
1481 + *(unsigned char *)trans = MagmaNoTrans;
1482 + }
1483 +
1484 +- Mymagma_ctrsm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, db, lddb, da, ldda);
1485 ++ magma_ctrsm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, db, lddb, da, ldda);
1486 +
1487 + } else if (itype == 3) {
1488 +
1489 +@@ -375,7 +359,7 @@
1490 + *(unsigned char *)trans = MagmaConjTrans;
1491 + }
1492 +
1493 +- Mymagma_ctrmm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, db, lddb, da, ldda);
1494 ++ magma_ctrmm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, db, lddb, da, ldda);
1495 +
1496 + }
1497 +
1498 +diff -Nur magma-1.2.1.orig/src/chegvr.cpp magma-1.2.1/src/chegvr.cpp
1499 +--- magma-1.2.1.orig/src/chegvr.cpp 2012-07-03 11:59:28.932659966 -0700
1500 ++++ magma-1.2.1/src/chegvr.cpp 2012-07-03 11:59:45.429746354 -0700
1501 +@@ -12,24 +12,6 @@
1502 + */
1503 + #include "common_magma.h"
1504 +
1505 +-void Mymagma_ctrmm(char side, char uplo, char trans, char unit,
1506 +- magma_int_t n, magma_int_t m,
1507 +- cuFloatComplex alpha, cuFloatComplex *db, magma_int_t lddb,
1508 +- cuFloatComplex *dz, magma_int_t lddz)
1509 +-{
1510 +- magma_ctrmm(side, uplo, trans, unit, n, m, alpha, db, lddb, dz, lddz);
1511 +- magma_device_sync();
1512 +-}
1513 +-
1514 +-void Mymagma_ctrsm(char side, char uplo, char trans, char unit,
1515 +- magma_int_t n, magma_int_t m,
1516 +- cuFloatComplex alpha, cuFloatComplex *db, magma_int_t lddb,
1517 +- cuFloatComplex *dz, magma_int_t lddz)
1518 +-{
1519 +- magma_ctrsm(side, uplo, trans, unit, n, m, alpha, db, lddb, dz, lddz);
1520 +- magma_device_sync();
1521 +-}
1522 +-
1523 + extern "C" magma_int_t
1524 + magma_chegvr(magma_int_t itype, char jobz, char range, char uplo, magma_int_t n,
1525 + cuFloatComplex *a, magma_int_t lda, cuFloatComplex *b, magma_int_t ldb,
1526 +@@ -408,7 +390,7 @@
1527 + *(unsigned char *)trans = MagmaNoTrans;
1528 + }
1529 +
1530 +- Mymagma_ctrsm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one,
1531 ++ magma_ctrsm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one,
1532 + db, lddb, dz, lddz);
1533 +
1534 + } else if (itype == 3) {
1535 +@@ -421,7 +403,7 @@
1536 + *(unsigned char *)trans = MagmaConjTrans;
1537 + }
1538 +
1539 +- Mymagma_ctrmm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one,
1540 ++ magma_ctrmm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one,
1541 + db, lddb, dz, lddz);
1542 + }
1543 +
1544 +diff -Nur magma-1.2.1.orig/src/chegvx.cpp magma-1.2.1/src/chegvx.cpp
1545 +--- magma-1.2.1.orig/src/chegvx.cpp 2012-07-03 11:59:28.923659921 -0700
1546 ++++ magma-1.2.1/src/chegvx.cpp 2012-07-03 11:59:45.429746354 -0700
1547 +@@ -12,22 +12,6 @@
1548 + */
1549 + #include "common_magma.h"
1550 +
1551 +-void Mymagma_ctrmm(char side, char uplo, char trans, char unit, magma_int_t n, magma_int_t m,
1552 +- cuFloatComplex alpha, cuFloatComplex *db, magma_int_t lddb,
1553 +- cuFloatComplex *dz, magma_int_t lddz)
1554 +-{
1555 +- magma_ctrmm(side, uplo, trans, unit, n, m, alpha, db, lddb, dz, lddz);
1556 +- magma_device_sync();
1557 +-}
1558 +-
1559 +-void Mymagma_ctrsm(char side, char uplo, char trans, char unit, magma_int_t n, magma_int_t m,
1560 +- cuFloatComplex alpha, cuFloatComplex *db, magma_int_t lddb,
1561 +- cuFloatComplex *dz, magma_int_t lddz)
1562 +-{
1563 +- magma_ctrsm(side, uplo, trans, unit, n, m, alpha, db, lddb, dz, lddz);
1564 +- magma_device_sync();
1565 +-}
1566 +-
1567 + extern "C" magma_int_t
1568 + magma_chegvx(magma_int_t itype, char jobz, char range, char uplo, magma_int_t n,
1569 + cuFloatComplex *a, magma_int_t lda, cuFloatComplex *b, magma_int_t ldb,
1570 +@@ -330,7 +314,7 @@
1571 + *(unsigned char *)trans = MagmaNoTrans;
1572 + }
1573 +
1574 +- Mymagma_ctrsm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, db, lddb, dz, lddz);
1575 ++ magma_ctrsm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, db, lddb, dz, lddz);
1576 +
1577 + } else if (itype == 3) {
1578 +
1579 +@@ -343,7 +327,7 @@
1580 + *(unsigned char *)trans = MagmaConjTrans;
1581 + }
1582 +
1583 +- Mymagma_ctrmm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, db, lddb, dz, lddz);
1584 ++ magma_ctrmm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, db, lddb, dz, lddz);
1585 +
1586 + }
1587 +
1588 +diff -Nur magma-1.2.1.orig/src/cstedx_m.cpp magma-1.2.1/src/cstedx_m.cpp
1589 +--- magma-1.2.1.orig/src/cstedx_m.cpp 2012-07-03 11:59:28.921659909 -0700
1590 ++++ magma-1.2.1/src/cstedx_m.cpp 2012-07-03 11:59:45.429746354 -0700
1591 +@@ -16,10 +16,7 @@
1592 + float* work, magma_int_t lwork, magma_int_t* iwork, magma_int_t liwork,
1593 + magma_int_t* info);
1594 +
1595 +- magma_int_t get_cstedx_smlsize()
1596 +- {
1597 +- return 25;
1598 +- }
1599 ++ magma_int_t get_cstedx_smlsize();
1600 + }
1601 +
1602 + extern "C" magma_int_t
1603 +diff -Nur magma-1.2.1.orig/src/dlaex0_m.cpp magma-1.2.1/src/dlaex0_m.cpp
1604 +--- magma-1.2.1.orig/src/dlaex0_m.cpp 2012-07-03 11:59:28.933659971 -0700
1605 ++++ magma-1.2.1/src/dlaex0_m.cpp 2012-07-03 11:59:45.430746359 -0700
1606 +@@ -22,10 +22,7 @@
1607 +
1608 + int magma_get_dlaex3_m_nb();
1609 +
1610 +- magma_int_t get_dlaex0_smlsize()
1611 +- {
1612 +- return 25;
1613 +- }
1614 ++ magma_int_t get_dlaex0_smlsize();
1615 + }
1616 +
1617 + extern "C" magma_int_t
1618 +diff -Nur magma-1.2.1.orig/src/dlaex3_m.cpp magma-1.2.1/src/dlaex3_m.cpp
1619 +--- magma-1.2.1.orig/src/dlaex3_m.cpp 2012-07-03 11:59:28.929659951 -0700
1620 ++++ magma-1.2.1/src/dlaex3_m.cpp 2012-07-03 11:59:45.430746359 -0700
1621 +@@ -34,7 +34,7 @@
1622 + }
1623 +
1624 + extern"C"{
1625 +- void dvrange(magma_int_t k, double *d, magma_int_t *il, magma_int_t *iu, double vl, double vu)
1626 ++ void ddvrange(magma_int_t k, double *d, magma_int_t *il, magma_int_t *iu, double vl, double vu)
1627 + {
1628 + magma_int_t i;
1629 +
1630 +@@ -51,7 +51,7 @@
1631 + return;
1632 + }
1633 +
1634 +- void dirange(magma_int_t k, magma_int_t* indxq, magma_int_t *iil, magma_int_t *iiu, magma_int_t il, magma_int_t iu)
1635 ++ void ddirange(magma_int_t k, magma_int_t* indxq, magma_int_t *iil, magma_int_t *iiu, magma_int_t il, magma_int_t iu)
1636 + {
1637 + magma_int_t i;
1638 +
1639 +@@ -362,9 +362,9 @@
1640 +
1641 + //compute the lower and upper bound of the non-deflated eigenvectors
1642 + if (valeig)
1643 +- dvrange(k, d, &iil, &iiu, vl, vu);
1644 ++ ddvrange(k, d, &iil, &iiu, vl, vu);
1645 + else if (indeig)
1646 +- dirange(k, indxq, &iil, &iiu, il, iu);
1647 ++ ddirange(k, indxq, &iil, &iiu, il, iu);
1648 + else {
1649 + iil = 1;
1650 + iiu = k;
1651 +diff -Nur magma-1.2.1.orig/src/dstedx_m.cpp magma-1.2.1/src/dstedx_m.cpp
1652 +--- magma-1.2.1.orig/src/dstedx_m.cpp 2012-07-03 11:59:28.920659903 -0700
1653 ++++ magma-1.2.1/src/dstedx_m.cpp 2012-07-03 11:59:45.430746359 -0700
1654 +@@ -26,10 +26,7 @@
1655 +
1656 + double lapackf77_dlanst( char* norm, magma_int_t* n, double* d, double* e);
1657 +
1658 +- magma_int_t get_dstedx_smlsize()
1659 +- {
1660 +- return 25;
1661 +- }
1662 ++ magma_int_t get_dstedx_smlsize();
1663 + }
1664 +
1665 + extern "C" magma_int_t
1666 +diff -Nur magma-1.2.1.orig/src/slaex0_m.cpp magma-1.2.1/src/slaex0_m.cpp
1667 +--- magma-1.2.1.orig/src/slaex0_m.cpp 2012-07-03 11:59:28.931659961 -0700
1668 ++++ magma-1.2.1/src/slaex0_m.cpp 2012-07-03 11:59:45.431746364 -0700
1669 +@@ -22,10 +22,7 @@
1670 +
1671 + int magma_get_slaex3_m_nb();
1672 +
1673 +- magma_int_t get_slaex0_smlsize()
1674 +- {
1675 +- return 25;
1676 +- }
1677 ++ magma_int_t get_slaex0_smlsize();
1678 + }
1679 +
1680 + extern "C" magma_int_t
1681 +diff -Nur magma-1.2.1.orig/src/slaex3_m.cpp magma-1.2.1/src/slaex3_m.cpp
1682 +--- magma-1.2.1.orig/src/slaex3_m.cpp 2012-07-03 11:59:28.920659903 -0700
1683 ++++ magma-1.2.1/src/slaex3_m.cpp 2012-07-03 11:59:45.431746364 -0700
1684 +@@ -34,7 +34,7 @@
1685 + }
1686 +
1687 + extern"C"{
1688 +- void dvrange(magma_int_t k, float *d, magma_int_t *il, magma_int_t *iu, float vl, float vu)
1689 ++ void sdvrange(magma_int_t k, float *d, magma_int_t *il, magma_int_t *iu, float vl, float vu)
1690 + {
1691 + magma_int_t i;
1692 +
1693 +@@ -51,7 +51,7 @@
1694 + return;
1695 + }
1696 +
1697 +- void dirange(magma_int_t k, magma_int_t* indxq, magma_int_t *iil, magma_int_t *iiu, magma_int_t il, magma_int_t iu)
1698 ++ void sdirange(magma_int_t k, magma_int_t* indxq, magma_int_t *iil, magma_int_t *iiu, magma_int_t il, magma_int_t iu)
1699 + {
1700 + magma_int_t i;
1701 +
1702 +@@ -362,9 +362,9 @@
1703 +
1704 + //compute the lower and upper bound of the non-deflated eigenvectors
1705 + if (valeig)
1706 +- dvrange(k, d, &iil, &iiu, vl, vu);
1707 ++ sdvrange(k, d, &iil, &iiu, vl, vu);
1708 + else if (indeig)
1709 +- dirange(k, indxq, &iil, &iiu, il, iu);
1710 ++ sdirange(k, indxq, &iil, &iiu, il, iu);
1711 + else {
1712 + iil = 1;
1713 + iiu = k;
1714 +diff -Nur magma-1.2.1.orig/src/sstedx_m.cpp magma-1.2.1/src/sstedx_m.cpp
1715 +--- magma-1.2.1.orig/src/sstedx_m.cpp 2012-07-03 11:59:28.932659966 -0700
1716 ++++ magma-1.2.1/src/sstedx_m.cpp 2012-07-03 11:59:45.431746364 -0700
1717 +@@ -26,10 +26,7 @@
1718 +
1719 + float lapackf77_slanst( char* norm, magma_int_t* n, float* d, float* e);
1720 +
1721 +- magma_int_t get_sstedx_smlsize()
1722 +- {
1723 +- return 25;
1724 +- }
1725 ++ magma_int_t get_sstedx_smlsize();
1726 + }
1727 +
1728 + extern "C" magma_int_t
1729 +diff -Nur magma-1.2.1.orig/src/zheevx_gpu.cpp magma-1.2.1/src/zheevx_gpu.cpp
1730 +--- magma-1.2.1.orig/src/zheevx_gpu.cpp 2012-07-03 11:59:28.934659976 -0700
1731 ++++ magma-1.2.1/src/zheevx_gpu.cpp 2012-07-03 11:59:45.431746364 -0700
1732 +@@ -12,27 +12,6 @@
1733 + */
1734 + #include "common_magma.h"
1735 +
1736 +-/* These interfaces are used for TAU profiling */
1737 +-extern"C"{
1738 +- void Mylapackf77_zstein(magma_int_t *n, double *d, double *e, magma_int_t *m,
1739 +- double *w, magma_int_t *iblock, magma_int_t *isplit,
1740 +- cuDoubleComplex *z, magma_int_t *ldz, double *work,
1741 +- magma_int_t *iwork, magma_int_t *ifail, magma_int_t *info)
1742 +- {
1743 +- lapackf77_zstein(n, d, e, m, w, iblock, isplit, z, ldz, work, iwork, ifail, info);
1744 +- }
1745 +-
1746 +- void Mylapackf77_dstebz(char *range, char *order, magma_int_t *n, double *vl,
1747 +- double *vu, magma_int_t *il, magma_int_t *iu, double *abstol,
1748 +- double *d, double *e, magma_int_t *m, magma_int_t *nsplit,
1749 +- double *w, magma_int_t *iblock, magma_int_t *isplit,
1750 +- double *work, magma_int_t *iwork, magma_int_t *info)
1751 +- {
1752 +- lapackf77_dstebz(range, order, n, vl, vu, il, iu, abstol,
1753 +- d, e, m, nsplit, w, iblock, isplit, work, iwork,info);
1754 +- }
1755 +-}
1756 +-
1757 + extern "C" magma_int_t
1758 + magma_zheevx_gpu(char jobz, char range, char uplo, magma_int_t n,
1759 + cuDoubleComplex *da, magma_int_t ldda, double vl, double vu,
1760 +@@ -404,12 +383,12 @@
1761 + indisp = indibl + n;
1762 + indiwk = indisp + n;
1763 +
1764 +- Mylapackf77_dstebz(range_, order, &n, &vl, &vu, &il, &iu, &abstol, &rwork[indd], &rwork[inde], m,
1765 ++ lapackf77_dstebz(range_, order, &n, &vl, &vu, &il, &iu, &abstol, &rwork[indd], &rwork[inde], m,
1766 + &nsplit, &w[1], &iwork[indibl], &iwork[indisp], &rwork[indrwk], &iwork[indiwk], info);
1767 +
1768 + if (wantz) {
1769 +
1770 +- Mylapackf77_zstein(&n, &rwork[indd], &rwork[inde], m, &w[1], &iwork[indibl], &iwork[indisp],
1771 ++ lapackf77_zstein(&n, &rwork[indd], &rwork[inde], m, &w[1], &iwork[indibl], &iwork[indisp],
1772 + wz, &ldwz, &rwork[indrwk], &iwork[indiwk], &ifail[1], info);
1773 +
1774 + magma_zsetmatrix( n, *m, wz, ldwz, dz, lddz );
1775 +diff -Nur magma-1.2.1.orig/src/zhegvd.cpp magma-1.2.1/src/zhegvd.cpp
1776 +--- magma-1.2.1.orig/src/zhegvd.cpp 2012-07-03 11:59:28.920659903 -0700
1777 ++++ magma-1.2.1/src/zhegvd.cpp 2012-07-03 11:59:45.432746370 -0700
1778 +@@ -13,26 +13,6 @@
1779 + */
1780 + #include "common_magma.h"
1781 +
1782 +-/* This ztrmm interface is used for TAU profiling */
1783 +-void Mymagma_ztrmm(char side, char uplo, char trans, char unit,
1784 +- magma_int_t n, magma_int_t m,
1785 +- cuDoubleComplex alpha, cuDoubleComplex *db, magma_int_t lddb,
1786 +- cuDoubleComplex *dz, magma_int_t lddz)
1787 +-{
1788 +- magma_ztrmm(side, uplo, trans, unit, n, m, alpha, db, lddb, dz, lddz);
1789 +- magma_device_sync();
1790 +-}
1791 +-
1792 +-/* This ztrsm interface is used for TAU profiling */
1793 +-void Mymagma_ztrsm(char side, char uplo, char trans, char unit,
1794 +- magma_int_t n, magma_int_t m,
1795 +- cuDoubleComplex alpha, cuDoubleComplex *db, magma_int_t lddb,
1796 +- cuDoubleComplex *dz, magma_int_t lddz)
1797 +-{
1798 +- magma_ztrsm(side, uplo, trans, unit, n, m, alpha, db, lddb, dz, lddz);
1799 +- magma_device_sync();
1800 +-}
1801 +-
1802 + extern "C" magma_int_t
1803 + magma_zhegvd(magma_int_t itype, char jobz, char uplo, magma_int_t n,
1804 + cuDoubleComplex *a, magma_int_t lda, cuDoubleComplex *b, magma_int_t ldb,
1805 +@@ -324,7 +304,7 @@
1806 + *(unsigned char *)trans = MagmaNoTrans;
1807 + }
1808 +
1809 +- Mymagma_ztrsm(MagmaLeft, uplo_[0], *trans, MagmaNonUnit,
1810 ++ magma_ztrsm(MagmaLeft, uplo_[0], *trans, MagmaNonUnit,
1811 + n, n, c_one, db, lddb, da, ldda);
1812 +
1813 + } else if (itype == 3)
1814 +@@ -337,7 +317,7 @@
1815 + *(unsigned char *)trans = MagmaConjTrans;
1816 + }
1817 +
1818 +- Mymagma_ztrmm(MagmaLeft, uplo_[0], *trans, MagmaNonUnit,
1819 ++ magma_ztrmm(MagmaLeft, uplo_[0], *trans, MagmaNonUnit,
1820 + n, n, c_one, db, lddb, da, ldda);
1821 + }
1822 +
1823 +diff -Nur magma-1.2.1.orig/src/zhegvdx.cpp magma-1.2.1/src/zhegvdx.cpp
1824 +--- magma-1.2.1.orig/src/zhegvdx.cpp 2012-07-03 11:59:28.922659915 -0700
1825 ++++ magma-1.2.1/src/zhegvdx.cpp 2012-07-03 11:59:45.432746370 -0700
1826 +@@ -12,22 +12,6 @@
1827 + */
1828 + #include "common_magma.h"
1829 +
1830 +-void Mymagma_ztrmm(char side, char uplo, char trans, char unit, magma_int_t n, magma_int_t m,
1831 +- cuDoubleComplex alpha, cuDoubleComplex *db, magma_int_t lddb,
1832 +- cuDoubleComplex *dz, magma_int_t lddz)
1833 +-{
1834 +- magma_ztrmm(side, uplo, trans, unit, n, m, alpha, db, lddb, dz, lddz);
1835 +- magma_device_sync();
1836 +-}
1837 +-
1838 +-void Mymagma_ztrsm(char side, char uplo, char trans, char unit, magma_int_t n, magma_int_t m,
1839 +- cuDoubleComplex alpha, cuDoubleComplex *db, magma_int_t lddb,
1840 +- cuDoubleComplex *dz, magma_int_t lddz)
1841 +-{
1842 +- magma_ztrsm(side, uplo, trans, unit, n, m, alpha, db, lddb, dz, lddz);
1843 +- magma_device_sync();
1844 +-}
1845 +-
1846 + extern "C" magma_int_t
1847 + magma_zhegvdx(magma_int_t itype, char jobz, char range, char uplo, magma_int_t n,
1848 + cuDoubleComplex *a, magma_int_t lda, cuDoubleComplex *b, magma_int_t ldb,
1849 +@@ -363,7 +347,7 @@
1850 + *(unsigned char *)trans = MagmaNoTrans;
1851 + }
1852 +
1853 +- Mymagma_ztrsm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, db, lddb, da, ldda);
1854 ++ magma_ztrsm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, db, lddb, da, ldda);
1855 +
1856 + } else if (itype == 3) {
1857 +
1858 +@@ -375,7 +359,7 @@
1859 + *(unsigned char *)trans = MagmaConjTrans;
1860 + }
1861 +
1862 +- Mymagma_ztrmm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, db, lddb, da, ldda);
1863 ++ magma_ztrmm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, db, lddb, da, ldda);
1864 +
1865 + }
1866 +
1867 +diff -Nur magma-1.2.1.orig/src/zhegvr.cpp magma-1.2.1/src/zhegvr.cpp
1868 +--- magma-1.2.1.orig/src/zhegvr.cpp 2012-07-03 11:59:28.930659956 -0700
1869 ++++ magma-1.2.1/src/zhegvr.cpp 2012-07-03 11:59:45.432746370 -0700
1870 +@@ -12,24 +12,6 @@
1871 + */
1872 + #include "common_magma.h"
1873 +
1874 +-void Mymagma_ztrmm(char side, char uplo, char trans, char unit,
1875 +- magma_int_t n, magma_int_t m,
1876 +- cuDoubleComplex alpha, cuDoubleComplex *db, magma_int_t lddb,
1877 +- cuDoubleComplex *dz, magma_int_t lddz)
1878 +-{
1879 +- magma_ztrmm(side, uplo, trans, unit, n, m, alpha, db, lddb, dz, lddz);
1880 +- magma_device_sync();
1881 +-}
1882 +-
1883 +-void Mymagma_ztrsm(char side, char uplo, char trans, char unit,
1884 +- magma_int_t n, magma_int_t m,
1885 +- cuDoubleComplex alpha, cuDoubleComplex *db, magma_int_t lddb,
1886 +- cuDoubleComplex *dz, magma_int_t lddz)
1887 +-{
1888 +- magma_ztrsm(side, uplo, trans, unit, n, m, alpha, db, lddb, dz, lddz);
1889 +- magma_device_sync();
1890 +-}
1891 +-
1892 + extern "C" magma_int_t
1893 + magma_zhegvr(magma_int_t itype, char jobz, char range, char uplo, magma_int_t n,
1894 + cuDoubleComplex *a, magma_int_t lda, cuDoubleComplex *b, magma_int_t ldb,
1895 +@@ -408,7 +390,7 @@
1896 + *(unsigned char *)trans = MagmaNoTrans;
1897 + }
1898 +
1899 +- Mymagma_ztrsm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one,
1900 ++ magma_ztrsm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one,
1901 + db, lddb, dz, lddz);
1902 +
1903 + } else if (itype == 3) {
1904 +@@ -421,7 +403,7 @@
1905 + *(unsigned char *)trans = MagmaConjTrans;
1906 + }
1907 +
1908 +- Mymagma_ztrmm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one,
1909 ++ magma_ztrmm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one,
1910 + db, lddb, dz, lddz);
1911 + }
1912 +
1913 +diff -Nur magma-1.2.1.orig/src/zhegvx.cpp magma-1.2.1/src/zhegvx.cpp
1914 +--- magma-1.2.1.orig/src/zhegvx.cpp 2012-07-03 11:59:28.914659873 -0700
1915 ++++ magma-1.2.1/src/zhegvx.cpp 2012-07-03 11:59:45.433746376 -0700
1916 +@@ -12,22 +12,6 @@
1917 + */
1918 + #include "common_magma.h"
1919 +
1920 +-void Mymagma_ztrmm(char side, char uplo, char trans, char unit, magma_int_t n, magma_int_t m,
1921 +- cuDoubleComplex alpha, cuDoubleComplex *db, magma_int_t lddb,
1922 +- cuDoubleComplex *dz, magma_int_t lddz)
1923 +-{
1924 +- magma_ztrmm(side, uplo, trans, unit, n, m, alpha, db, lddb, dz, lddz);
1925 +- magma_device_sync();
1926 +-}
1927 +-
1928 +-void Mymagma_ztrsm(char side, char uplo, char trans, char unit, magma_int_t n, magma_int_t m,
1929 +- cuDoubleComplex alpha, cuDoubleComplex *db, magma_int_t lddb,
1930 +- cuDoubleComplex *dz, magma_int_t lddz)
1931 +-{
1932 +- magma_ztrsm(side, uplo, trans, unit, n, m, alpha, db, lddb, dz, lddz);
1933 +- magma_device_sync();
1934 +-}
1935 +-
1936 + extern "C" magma_int_t
1937 + magma_zhegvx(magma_int_t itype, char jobz, char range, char uplo, magma_int_t n,
1938 + cuDoubleComplex *a, magma_int_t lda, cuDoubleComplex *b, magma_int_t ldb,
1939 +@@ -330,7 +314,7 @@
1940 + *(unsigned char *)trans = MagmaNoTrans;
1941 + }
1942 +
1943 +- Mymagma_ztrsm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, db, lddb, dz, lddz);
1944 ++ magma_ztrsm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, db, lddb, dz, lddz);
1945 +
1946 + } else if (itype == 3) {
1947 +
1948 +@@ -343,7 +327,7 @@
1949 + *(unsigned char *)trans = MagmaConjTrans;
1950 + }
1951 +
1952 +- Mymagma_ztrmm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, db, lddb, dz, lddz);
1953 ++ magma_ztrmm(MagmaLeft, uplo, *trans, MagmaNonUnit, n, *m, c_one, db, lddb, dz, lddz);
1954 +
1955 + }
1956 +
1957 +diff -Nur magma-1.2.1.orig/src/zstedx_m.cpp magma-1.2.1/src/zstedx_m.cpp
1958 +--- magma-1.2.1.orig/src/zstedx_m.cpp 2012-07-03 11:59:28.921659909 -0700
1959 ++++ magma-1.2.1/src/zstedx_m.cpp 2012-07-03 11:59:45.433746376 -0700
1960 +@@ -16,10 +16,7 @@
1961 + double* work, magma_int_t lwork, magma_int_t* iwork, magma_int_t liwork,
1962 + magma_int_t* info);
1963 +
1964 +- magma_int_t get_zstedx_smlsize()
1965 +- {
1966 +- return 25;
1967 +- }
1968 ++ magma_int_t get_zstedx_smlsize();
1969 + }
1970 +
1971 + extern "C" magma_int_t
1972
1973 diff --git a/sci-libs/magma/files/magma-1.2.1-no-cuda-driver.patch b/sci-libs/magma/files/magma-1.2.1-no-cuda-driver.patch
1974 new file mode 100644
1975 index 0000000..9dc99a8
1976 --- /dev/null
1977 +++ b/sci-libs/magma/files/magma-1.2.1-no-cuda-driver.patch
1978 @@ -0,0 +1,38 @@
1979 +--- magma-1.2.1.orig/testing/lin/Makefile 2012-07-03 11:59:28.875659669 -0700
1980 ++++ magma-1.2.1/testing/lin/Makefile 2012-07-03 12:01:05.546166252 -0700
1981 +@@ -266,7 +266,7 @@
1982 + ../matgen/$(TMGLIB) -o xlintsts \
1983 + ../fortran.o \
1984 + $(LDOPTS) -L../../lib \
1985 +- -lcuda -lmagma -lmagmablas \
1986 ++ -lmagma -lmagmablas \
1987 + $(LIBDIR) $(LIB)
1988 +
1989 + xlintstc : $(ALINTST) $(CLINTST) $(SCLNTST) ../fortran.o
1990 +@@ -274,7 +274,7 @@
1991 + ../matgen/$(TMGLIB) -o xlintstc \
1992 + ../fortran.o \
1993 + $(LDOPTS) -L../../lib \
1994 +- -lcuda -lmagma -lmagmablas \
1995 ++ -lmagma -lmagmablas \
1996 + $(LIBDIR) $(LIB)
1997 +
1998 + xlintstd : $(ALINTST) $(DLINTST) $(DZLNTST) ../fortran.o
1999 +@@ -282,7 +282,7 @@
2000 + ../matgen/$(TMGLIB) -o xlintstd \
2001 + ../fortran.o \
2002 + $(LDOPTS) -L../../lib \
2003 +- -lcuda -lmagma -lmagmablas \
2004 ++ -lmagma -lmagmablas \
2005 + $(LIBDIR) $(LIB)
2006 +
2007 + xlintstz : $(ALINTST) $(ZLINTST) $(DZLNTST) ../fortran.o
2008 +@@ -290,7 +290,7 @@
2009 + ../matgen/$(TMGLIB) -o xlintstz \
2010 + ../fortran.o \
2011 + $(LDOPTS) -L../../lib \
2012 +- -lcuda -lmagma -lmagmablas -lmagma \
2013 ++ -lmagma -lmagmablas -lmagma \
2014 + $(LIBDIR) $(LIB)
2015 +
2016 + $(ALINTST): $(FRC)
2017
2018 diff --git a/sci-libs/magma/magma-1.2.0.ebuild b/sci-libs/magma/magma-1.2.1.ebuild
2019 similarity index 87%
2020 rename from sci-libs/magma/magma-1.2.0.ebuild
2021 rename to sci-libs/magma/magma-1.2.1.ebuild
2022 index 18e16f2..65b822c 100644
2023 --- a/sci-libs/magma/magma-1.2.0.ebuild
2024 +++ b/sci-libs/magma/magma-1.2.1.ebuild
2025 @@ -3,25 +3,22 @@
2026 # $Header: $
2027
2028 EAPI=4
2029 -FORTRAN_STANDARD="77 90"
2030
2031 +FORTRAN_STANDARD="77 90"
2032 inherit eutils fortran-2 toolchain-funcs versionator
2033
2034 -MYP=${PN}_${PV}
2035 -
2036 DESCRIPTION="Matrix Algebra on GPU and Multicore Architectures"
2037 HOMEPAGE="http://icl.cs.utk.edu/magma/"
2038 -SRC_URI="http://icl.cs.utk.edu/projectsfiles/${PN}/${MYP}.tar.gz"
2039 +SRC_URI="http://icl.cs.utk.edu/projectsfiles/${PN}/${P}.tar.gz"
2040
2041 LICENSE="BSD"
2042 SLOT="0"
2043 -KEYWORDS="~amd64 ~x86"
2044 +KEYWORDS="~amd64 ~x86 ~amd64-linux ~x86-linux"
2045 IUSE="fermi static-libs tesla"
2046
2047 RDEPEND="dev-util/nvidia-cuda-toolkit
2048 virtual/cblas
2049 virtual/lapack"
2050 -
2051 DEPEND="${RDEPEND}
2052 virtual/pkgconfig"
2053
2054 @@ -40,8 +37,8 @@ make_shared_lib() {
2055
2056 src_prepare() {
2057 epatch \
2058 - "${FILESDIR}"/${P}-cblas-dotc.patch \
2059 - "${FILESDIR}"/${P}-duplicate-symbols.patch
2060 + "${FILESDIR}"/${P}-duplicate-symbols.patch \
2061 + "${FILESDIR}"/${P}-no-cuda-driver.patch
2062
2063 # distributed pc file not so useful so replace it
2064 cat <<-EOF > ${PN}.pc
2065 @@ -53,7 +50,7 @@ src_prepare() {
2066 Version: ${PV}
2067 URL: ${HOMEPAGE}
2068 Libs: -L\${libdir} -lmagma -lmagmablas
2069 - Libs.private: -lm -lpthread -ldl -lcublas -lcudart -lcuda
2070 + Libs.private: -lm -lpthread -ldl -lcublas -lcudart
2071 Cflags: -I\${includedir}
2072 Requires: cblas lapack
2073 EOF
2074 @@ -70,12 +67,13 @@ src_configure() {
2075 INC = -I${EPREFIX}/opt/cuda/include -DADD_
2076 OPTS = ${CFLAGS} -fPIC
2077 FOPTS = ${FFLAGS} -fPIC -x f95-cpp-input
2078 + F77OPTS = ${FFLAGS} -fPIC
2079 NVOPTS = -DADD_ --compiler-options '-fPIC ${CFLAGS}' -DUNIX
2080 LOADER = $(tc-getFC)
2081 LIBBLAS = $(pkg-config --libs cblas)
2082 LIBLAPACK = $(pkg-config --libs lapack)
2083 CUDADIR = ${EPREFIX}/opt/cuda
2084 - LIBCUDA = -L\$(CUDADIR)/$(get_libdir) -lcublas -lcudart -lcuda
2085 + LIBCUDA = -L\$(CUDADIR)/$(get_libdir) -lcublas -lcudart
2086 LIB = -pthread -lm -ldl \$(LIBCUDA) \$(LIBBLAS) \$(LIBLAPACK) -lstdc++
2087 EOF
2088 if use fermi; then