1 |
commit: cb193b088a585330d86c73498fad309665b929bd |
2 |
Author: Andrea Arteaga <andyspiros <AT> gmail <DOT> com> |
3 |
AuthorDate: Sun Sep 30 00:03:51 2012 +0000 |
4 |
Commit: Andrea Arteaga <andyspiros <AT> gmail <DOT> com> |
5 |
CommitDate: Sun Sep 30 00:03:51 2012 +0000 |
6 |
URL: http://git.overlays.gentoo.org/gitweb/?p=proj/auto-numerical-bench.git;a=commit;h=cb193b08 |
7 |
|
8 |
Complete the implementation of the CInterface for BLAS actions. |
9 |
|
10 |
--- |
11 |
btl/NumericInterface/NI_internal/CDeclarations.hpp | 62 +++++++- |
12 |
btl/NumericInterface/NI_internal/CInterface.hpp | 164 +++++++++++++++++++- |
13 |
.../NI_internal/FortranDeclarations.hpp | 1 + |
14 |
.../NI_internal/FortranInterface.hpp | 16 +-- |
15 |
btl/actions/action_TriSolveVector.hpp | 14 +- |
16 |
5 files changed, 227 insertions(+), 30 deletions(-) |
17 |
|
18 |
diff --git a/btl/NumericInterface/NI_internal/CDeclarations.hpp b/btl/NumericInterface/NI_internal/CDeclarations.hpp |
19 |
index 0109559..872cd2b 100644 |
20 |
--- a/btl/NumericInterface/NI_internal/CDeclarations.hpp |
21 |
+++ b/btl/NumericInterface/NI_internal/CDeclarations.hpp |
22 |
@@ -40,11 +40,65 @@ const int CblasRight = 142; |
23 |
|
24 |
// Cblas functions |
25 |
extern "C" { |
26 |
- void cblas_sgemv(int, int, int, int, float, const float*, int, |
27 |
- const float*, int, float, float*, int); |
28 |
|
29 |
- void cblas_dgemv(int, int, int, int, double, const double*, int, |
30 |
- const double*, int, double, double*, int); |
31 |
+ |
32 |
+ |
33 |
+/**************** |
34 |
+ * LEVEL 1 BLAS * |
35 |
+ ****************/ |
36 |
+ |
37 |
+ void cblas_srot(int, float*, int, float*, int, float, float); |
38 |
+ void cblas_drot(int, double*, int, double*, int, double, double); |
39 |
+ |
40 |
+ void cblas_saxpy(int, float, const float*, int, float*, int); |
41 |
+ void cblas_daxpy(int, double, const double*, int, double*, int); |
42 |
+ |
43 |
+ float cblas_sdot(int, const float*, int, const float*, int); |
44 |
+ double cblas_ddot(int, const double*, int, const double*, int); |
45 |
+ |
46 |
+ float cblas_snrm2(int, const float*, int); |
47 |
+ double cblas_dnrm2(int, const double*, int); |
48 |
+ |
49 |
+ |
50 |
+ |
51 |
+ |
52 |
+ /**************** |
53 |
+ * LEVEL 2 BLAS * |
54 |
+ ****************/ |
55 |
+ |
56 |
+ void cblas_sgemv(int, int, int, int, float, const float*, int, const float*, int, float, float*, int); |
57 |
+ void cblas_dgemv(int, int, int, int, double, const double*, int, const double*, int, double, double*, int); |
58 |
+ |
59 |
+ void cblas_ssymv(int, int, int, float, const float*, int, const float*, int, float, float*, int); |
60 |
+ void cblas_dsymv(int, int, int, double, const double*, int, const double*, int, double, double*, int); |
61 |
+ |
62 |
+ void cblas_strmv(int, int, int, int, int, const float*, int, float*, int); |
63 |
+ void cblas_dtrmv(int, int, int, int, int, const double*, int, double*, int); |
64 |
+ |
65 |
+ void cblas_strsv(int, int, int, int, int, const float*, int, float*, int); |
66 |
+ void cblas_dtrsv(int, int, int, int, int, const double*, int, double*, int); |
67 |
+ |
68 |
+ void cblas_sger(int, int, int, float, const float*, int, const float*, int, float*, int); |
69 |
+ void cblas_dger(int, int, int, double, const double*, int, const double*, int, double*, int); |
70 |
+ |
71 |
+ void cblas_ssyr2(int, int, int, float, const float*, int, const float*, int, float*, int); |
72 |
+ void cblas_dsyr2(int, int, int, double, const double*, int, const double*, int, double*, int); |
73 |
+ |
74 |
+ |
75 |
+ |
76 |
+ |
77 |
+ /**************** |
78 |
+ * LEVEL 3 BLAS * |
79 |
+ ****************/ |
80 |
+ |
81 |
+ void cblas_sgemm(int, int, int, int, int, int, float, const float*, int, const float*, int, float, float*, int); |
82 |
+ void cblas_dgemm(int, int, int, int, int, int, double, const double*, int, const double*, int, double, double*, int); |
83 |
+ |
84 |
+ void cblas_strmm(int, int, int, int, int, int, int, float, const float*, int, float*, int); |
85 |
+ void cblas_dtrmm(int, int, int, int, int, int, int, double, const double*, int, double*, int); |
86 |
+ |
87 |
+ void cblas_strsm(int, int, int, int, int, int, int, float, const float*, int, float*, int); |
88 |
+ void cblas_dtrsm(int, int, int, int, int, int, int, double, const double*, int, double*, int); |
89 |
|
90 |
} |
91 |
|
92 |
|
93 |
diff --git a/btl/NumericInterface/NI_internal/CInterface.hpp b/btl/NumericInterface/NI_internal/CInterface.hpp |
94 |
index a067144..c6db258 100644 |
95 |
--- a/btl/NumericInterface/NI_internal/CInterface.hpp |
96 |
+++ b/btl/NumericInterface/NI_internal/CInterface.hpp |
97 |
@@ -23,6 +23,11 @@ |
98 |
|
99 |
#define CBLASFUNC(F) CAT(cblas_, CAT(NI_SCALARPREFIX, F)) |
100 |
|
101 |
+#ifndef NI_NAME |
102 |
+# define NI_NAME CAT(CAT(CInterface,$),NI_SCALAR) |
103 |
+#endif |
104 |
+ |
105 |
+ |
106 |
template<> |
107 |
class NumericInterface<NI_SCALAR> |
108 |
{ |
109 |
@@ -32,19 +37,162 @@ public: |
110 |
public: |
111 |
static std::string name() |
112 |
{ |
113 |
- std::string name = "CInterface<"; |
114 |
- name += MAKE_STRING(NI_SCALAR); |
115 |
- name += ">"; |
116 |
+ std::string name = MAKE_STRING(NI_NAME); |
117 |
return name; |
118 |
} |
119 |
|
120 |
|
121 |
- static void matrixVector( |
122 |
- const int& M, const int& N, const Scalar& alpha, const Scalar* A, |
123 |
- const Scalar* x, const Scalar& beta, Scalar* y |
124 |
- ) |
125 |
+ |
126 |
+ /**************** |
127 |
+ * LEVEL 1 BLAS * |
128 |
+ ****************/ |
129 |
+ |
130 |
+ static void rot(const int& N, Scalar* x, Scalar* y, |
131 |
+ const Scalar& cosine, const Scalar& sine) |
132 |
+ { |
133 |
+ CBLASFUNC(rot)(N, x, 1, y, 1, cosine, sine); |
134 |
+ } |
135 |
+ |
136 |
+ |
137 |
+ static void axpy(const int& N, const Scalar& alpha, |
138 |
+ const Scalar* x, Scalar* y) |
139 |
+ { |
140 |
+ CBLASFUNC(axpy)(N, alpha, x, 1, y, 1); |
141 |
+ } |
142 |
+ |
143 |
+ static Scalar dot(const int& N, const Scalar* x, const Scalar* y) |
144 |
+ { |
145 |
+ return CBLASFUNC(dot)(N, x, 1, y, 1); |
146 |
+ } |
147 |
+ |
148 |
+ static Scalar norm(const int& N, const Scalar* x) |
149 |
+ { |
150 |
+ return CBLASFUNC(nrm2)(N, x, 1); |
151 |
+ } |
152 |
+ |
153 |
+ |
154 |
+ |
155 |
+ /**************** |
156 |
+ * LEVEL 2 BLAS * |
157 |
+ ****************/ |
158 |
+ |
159 |
+ static void MatrixVector(const bool& trans, const int& M, const int& N, |
160 |
+ const Scalar& alpha, const Scalar* A, const Scalar* x, |
161 |
+ const Scalar& beta, Scalar* y) |
162 |
{ |
163 |
- CBLASFUNC(gemv)(CblasColMajor, CblasNoTrans, M, N, alpha, A, M, |
164 |
+ const int LDA = trans ? N : M; |
165 |
+ const int tA = trans ? CblasTrans : CblasNoTrans; |
166 |
+ CBLASFUNC(gemv)(CblasColMajor, tA, M, N, alpha, A, LDA, |
167 |
x, 1, beta, y, 1); |
168 |
} |
169 |
+ |
170 |
+ static void SymMatrixVector(const char& uplo, const int& N, |
171 |
+ const Scalar& alpha, const Scalar* A, const Scalar* x, |
172 |
+ const Scalar& beta, Scalar* y) |
173 |
+ { |
174 |
+ int uplo_ = -1; |
175 |
+ if (uplo == 'u' || uplo == 'U') |
176 |
+ uplo_ = CblasUpper; |
177 |
+ else if (uplo == 'l' || uplo == 'L') |
178 |
+ uplo_ = CblasLower; |
179 |
+ |
180 |
+ CBLASFUNC(symv)(CblasColMajor, uplo_, N, alpha, A, N, x, 1, beta, y, 1); |
181 |
+ } |
182 |
+ |
183 |
+ static void TriMatrixVector(const char& uplo, const int& N, |
184 |
+ const Scalar* A, Scalar* x) |
185 |
+ { |
186 |
+ int uplo_ = -1; |
187 |
+ if (uplo == 'u' || uplo == 'U') |
188 |
+ uplo_ = CblasUpper; |
189 |
+ else if (uplo == 'l' || uplo == 'L') |
190 |
+ uplo_ = CblasLower; |
191 |
+ |
192 |
+ CBLASFUNC(trmv)(CblasColMajor, uplo_, CblasNoTrans, CblasNonUnit, N, |
193 |
+ A, N, x, 1); |
194 |
+ } |
195 |
+ |
196 |
+ static void TriSolveVector(const char& uplo, |
197 |
+ const int& N, const Scalar* A, Scalar* x) |
198 |
+ { |
199 |
+ int uplo_ = -1; |
200 |
+ if (uplo == 'u' || uplo == 'U') |
201 |
+ uplo_ = CblasUpper; |
202 |
+ else if (uplo == 'l' || uplo == 'L') |
203 |
+ uplo_ = CblasLower; |
204 |
+ |
205 |
+ CBLASFUNC(trsv)(CblasColMajor, uplo_, CblasNoTrans, CblasNonUnit, N, |
206 |
+ A, N, x, 1); |
207 |
+ } |
208 |
+ |
209 |
+ static void Rank1Update(const int& M, const int& N, const Scalar& alpha, |
210 |
+ const Scalar* x, const Scalar* y, Scalar* A) |
211 |
+ { |
212 |
+ CBLASFUNC(ger)(CblasColMajor, M, N, alpha, x, 1, y, 1, A, M); |
213 |
+ } |
214 |
+ |
215 |
+ static void Rank2Update(const char& uplo, const int& N, const Scalar& alpha, |
216 |
+ const Scalar* x, const Scalar* y, Scalar* A) |
217 |
+ { |
218 |
+ int uplo_ = -1; |
219 |
+ if (uplo == 'u' || uplo == 'U') |
220 |
+ uplo_ = CblasUpper; |
221 |
+ else if (uplo == 'l' || uplo == 'L') |
222 |
+ uplo_ = CblasLower; |
223 |
+ |
224 |
+ CBLASFUNC(syr2)(CblasColMajor, uplo_, N, alpha, x, 1, y, 1, A, N); |
225 |
+ } |
226 |
+ |
227 |
+ |
228 |
+ |
229 |
+ /**************** |
230 |
+ * LEVEL 3 BLAS * |
231 |
+ ****************/ |
232 |
+ |
233 |
+ static void MatrixMatrix(const bool& transA, const bool& transB, |
234 |
+ const int& M, const int& N, const int& K, |
235 |
+ const Scalar& alpha, const Scalar* A, const Scalar* B, |
236 |
+ const Scalar& beta, Scalar* C) |
237 |
+ { |
238 |
+ int LDA = M, LDB = K; |
239 |
+ int tA = CblasNoTrans, tB = CblasNoTrans; |
240 |
+ |
241 |
+ if (transA) { |
242 |
+ LDA = K; |
243 |
+ tA = CblasTrans; |
244 |
+ } |
245 |
+ if (transB) { |
246 |
+ LDB = N; |
247 |
+ tB = CblasTrans; |
248 |
+ } |
249 |
+ |
250 |
+ CBLASFUNC(gemm)(CblasColMajor, tA, tB, M, N, K, alpha, A, LDA, B, LDB, |
251 |
+ beta, C, M); |
252 |
+ } |
253 |
+ |
254 |
+ static void TriMatrixMatrix(const char& uplo, |
255 |
+ const int& M, const int& N, const Scalar* A, Scalar* B) |
256 |
+ { |
257 |
+ int uplo_ = -1; |
258 |
+ if (uplo == 'u' || uplo == 'U') |
259 |
+ uplo_ = CblasUpper; |
260 |
+ else if (uplo == 'l' || uplo == 'L') |
261 |
+ uplo_ = CblasLower; |
262 |
+ |
263 |
+ CBLASFUNC(trmm)(CblasColMajor, CblasLeft, uplo_, CblasNoTrans, |
264 |
+ CblasNonUnit, M, N, 1., A, M, B, M); |
265 |
+ } |
266 |
+ |
267 |
+ static void TriSolveMatrix(const char& uplo, |
268 |
+ const int& M, const int& N, const Scalar* A, Scalar *B) |
269 |
+ { |
270 |
+ int uplo_ = -1; |
271 |
+ if (uplo == 'u' || uplo == 'U') |
272 |
+ uplo_ = CblasUpper; |
273 |
+ else if (uplo == 'l' || uplo == 'L') |
274 |
+ uplo_ = CblasLower; |
275 |
+ |
276 |
+ CBLASFUNC(trsm)(CblasColMajor, CblasLeft, uplo_, CblasNoTrans, |
277 |
+ CblasNonUnit, M, N, 1., A, M, B, M); |
278 |
+ } |
279 |
}; |
280 |
|
281 |
diff --git a/btl/NumericInterface/NI_internal/FortranDeclarations.hpp b/btl/NumericInterface/NI_internal/FortranDeclarations.hpp |
282 |
index 76dfc01..35bce39 100644 |
283 |
--- a/btl/NumericInterface/NI_internal/FortranDeclarations.hpp |
284 |
+++ b/btl/NumericInterface/NI_internal/FortranDeclarations.hpp |
285 |
@@ -78,6 +78,7 @@ extern "C" { |
286 |
|
287 |
void strsm_(const char*, const char*, const char*, const char*, const int*, const int*, const float*, const float*, const int*, float*, const int*); |
288 |
void dtrsm_(const char*, const char*, const char*, const char*, const int*, const int*, const double*, const double*, const int*, double*, const int*); |
289 |
+ |
290 |
} |
291 |
|
292 |
|
293 |
|
294 |
diff --git a/btl/NumericInterface/NI_internal/FortranInterface.hpp b/btl/NumericInterface/NI_internal/FortranInterface.hpp |
295 |
index 576145d..eea5898 100644 |
296 |
--- a/btl/NumericInterface/NI_internal/FortranInterface.hpp |
297 |
+++ b/btl/NumericInterface/NI_internal/FortranInterface.hpp |
298 |
@@ -129,7 +129,7 @@ public: |
299 |
* LEVEL 3 BLAS * |
300 |
****************/ |
301 |
|
302 |
- static void matrixMatrix(const bool& transA, const bool& transB, |
303 |
+ static void MatrixMatrix(const bool& transA, const bool& transB, |
304 |
const int& M, const int& N, const int& K, |
305 |
const Scalar& alpha, const Scalar* A, const Scalar* B, |
306 |
const Scalar& beta, Scalar* C) |
307 |
@@ -150,13 +150,13 @@ public: |
308 |
&beta, C, &M); |
309 |
} |
310 |
|
311 |
- static void triangularMatrixMatrix(const char& uplo, |
312 |
+ static void TriMatrixMatrix(const char& uplo, |
313 |
const int& M, const int& N, const Scalar* A, Scalar* B) |
314 |
{ |
315 |
FORTFUNC(trmm)("L", &uplo, "N", "N", &M, &N, &fONE, A, &M, B, &M); |
316 |
} |
317 |
|
318 |
- static void triangularSolveMatrix(const char& uplo, |
319 |
+ static void TriSolveMatrix(const char& uplo, |
320 |
const int& M, const int& N, const Scalar* A, Scalar *B) |
321 |
{ |
322 |
FORTFUNC(trsm)("L", &uplo, "N", "N", &M, &N, &fONE, A, &M, B, &M); |
323 |
@@ -168,13 +168,3 @@ const int NumericInterface<NI_SCALAR>::ONE = 1; |
324 |
const NI_SCALAR NumericInterface<NI_SCALAR>::fONE = 1.; |
325 |
const char NumericInterface<NI_SCALAR>::NoTrans = 'N'; |
326 |
const char NumericInterface<NI_SCALAR>::Trans = 'T'; |
327 |
- |
328 |
- |
329 |
- |
330 |
- |
331 |
- |
332 |
- |
333 |
- |
334 |
- |
335 |
- |
336 |
- |
337 |
|
338 |
diff --git a/btl/actions/action_TriSolveVector.hpp b/btl/actions/action_TriSolveVector.hpp |
339 |
index 2bfefb8..6be18b0 100644 |
340 |
--- a/btl/actions/action_TriSolveVector.hpp |
341 |
+++ b/btl/actions/action_TriSolveVector.hpp |
342 |
@@ -37,10 +37,14 @@ public: |
343 |
// Constructor |
344 |
Action_TriSolveVector(int size) |
345 |
: _size(size), lc(10), |
346 |
- A(lc.fillVector<Scalar>(size*size)), x(lc.fillVector<Scalar>(size)), |
347 |
+ A(lc.fillVector<Scalar>(size*size)), b(lc.fillVector<Scalar>(size)), |
348 |
x_work(size) |
349 |
{ |
350 |
MESSAGE("Action_TriSolveVector Constructor"); |
351 |
+ |
352 |
+ // Adding size to the diagonal of A to make it invertible |
353 |
+ for (int i = 0; i < size; ++i) |
354 |
+ A[i+size*i] += size; |
355 |
} |
356 |
|
357 |
// Action name |
358 |
@@ -54,7 +58,7 @@ public: |
359 |
} |
360 |
|
361 |
inline void initialize(){ |
362 |
- std::copy(x.begin(), x.end(), x_work.begin()); |
363 |
+ std::copy(b.begin(), b.end(), x_work.begin()); |
364 |
} |
365 |
|
366 |
inline void calculate() { |
367 |
@@ -65,15 +69,15 @@ public: |
368 |
initialize(); |
369 |
calculate(); |
370 |
Interface::TriMatrixVector('U', _size, &A[0], &x_work[0]); |
371 |
- Interface::axpy(_size, -1., &x[0], &x_work[0]); |
372 |
+ Interface::axpy(_size, -1., &b[0], &x_work[0]); |
373 |
return Interface::norm(_size, &x_work[0]); |
374 |
} |
375 |
|
376 |
-private: |
377 |
+//private: |
378 |
const int _size; |
379 |
LinearCongruential<> lc; |
380 |
|
381 |
- const vector_t A, x; |
382 |
+ vector_t A, b; |
383 |
vector_t x_work; |
384 |
|
385 |
}; |