1 |
commit: 47af1c14eaed0149ab6a3eb2b040d767a9fca82e |
2 |
Author: spiros <andyspiros <AT> gmail <DOT> com> |
3 |
AuthorDate: Fri Jul 22 21:53:02 2011 +0000 |
4 |
Commit: Andrea Arteaga <andyspiros <AT> gmail <DOT> com> |
5 |
CommitDate: Fri Jul 22 21:53:02 2011 +0000 |
6 |
URL: http://git.overlays.gentoo.org/gitweb/?p=proj/auto-numerical-bench.git;a=commit;h=47af1c14 |
7 |
|
8 |
Added support for parallel LU decomposition in the PBLAS module. Much |
9 |
work on the distributed memory framework. |
10 |
|
11 |
--- |
12 |
btl/actions/action_parallel_lu_decomp.hh | 116 ++++++++++++++++++++ |
13 |
.../action_parallel_matrix_vector_product.hh | 19 +--- |
14 |
btl/generic_bench/init/init_function.hh | 2 +- |
15 |
btl/libs/BLACS/blacs_interface.hh | 37 ++++++ |
16 |
btl/libs/LAPACK/lapack_interface.hh | 11 ++- |
17 |
btl/libs/PBLAS/main.cpp | 10 +- |
18 |
btl/libs/PBLAS/pblas.h | 10 ++- |
19 |
btl/libs/PBLAS/pblas_interface.hh | 24 +---- |
20 |
btl/libs/PBLAS/pblas_interface_impl.hh | 12 ++ |
21 |
pblas.py | 2 +- |
22 |
10 files changed, 197 insertions(+), 46 deletions(-) |
23 |
|
24 |
diff --git a/btl/actions/action_parallel_lu_decomp.hh b/btl/actions/action_parallel_lu_decomp.hh |
25 |
new file mode 100644 |
26 |
index 0000000..4882a21 |
27 |
--- /dev/null |
28 |
+++ b/btl/actions/action_parallel_lu_decomp.hh |
29 |
@@ -0,0 +1,116 @@ |
30 |
+#ifndef ACTION_PARALLEL_LU_DECOMP_HH_ |
31 |
+#define ACTION_PARALLEL_LU_DECOMP_HH_ |
32 |
+ |
33 |
+#include "utilities.h" |
34 |
+#include "init/init_function.hh" |
35 |
+#include "init/init_vector.hh" |
36 |
+ |
37 |
+#include "lapack_interface.hh" |
38 |
+#include "STL_interface.hh" |
39 |
+ |
40 |
+#include <string> |
41 |
+ |
42 |
+template<class Interface> |
43 |
+class Action_parallel_lu_decomp { |
44 |
+ |
45 |
+public : |
46 |
+ |
47 |
+ // Constructor |
48 |
+ BTL_DONT_INLINE Action_parallel_lu_decomp( int size ) : _size(size) |
49 |
+ { |
50 |
+ MESSAGE("Action_parallel_lu_decomp Ctor"); |
51 |
+ |
52 |
+ int myid, procnum; |
53 |
+ blacs_pinfo_(&myid, &procnum); |
54 |
+ iamroot = (myid == 0); |
55 |
+ |
56 |
+ // STL matrix and vector initialization |
57 |
+ if (iamroot) { |
58 |
+ init_vector<pseudo_random>(Global_A_stl, size*size); |
59 |
+ } |
60 |
+ |
61 |
+ Interface::scatter_matrix(Global_A_stl, Local_A_stl, desc, size, size, 64, 64); |
62 |
+ LocalRows = desc[8]; |
63 |
+ LocalCols = Local_A_stl.size()/desc[8]; |
64 |
+ |
65 |
+ // Generic local matrix and vectors initialization |
66 |
+ Interface::matrix_from_stl(Local_A_ref, Local_A_stl); |
67 |
+ Interface::matrix_from_stl(Local_A , Local_A_stl); |
68 |
+ |
69 |
+ _cost = 2.0*size*size*size/3.0 + static_cast<double>(size*size); |
70 |
+ } |
71 |
+ |
72 |
+ |
73 |
+ // Invalidate copy constructor |
74 |
+ Action_parallel_lu_decomp(const Action_parallel_lu_decomp&) |
75 |
+ { |
76 |
+ INFOS("illegal call to Action_parallel_lu_decomp copy constructor"); |
77 |
+ exit(1); |
78 |
+ } |
79 |
+ |
80 |
+ // Destructor |
81 |
+ ~Action_parallel_lu_decomp() |
82 |
+ { |
83 |
+ MESSAGE("Action_parallel_lu_decomp destructor"); |
84 |
+ |
85 |
+ // Deallocation |
86 |
+ Interface::free_matrix(Local_A_ref, Local_A_stl.size()); |
87 |
+ Interface::free_matrix(Local_A , Local_A_stl.size()); |
88 |
+ } |
89 |
+ |
90 |
+ // Action name |
91 |
+ static inline std::string name() |
92 |
+ { |
93 |
+ return "lu_decomp_" + Interface::name(); |
94 |
+ } |
95 |
+ |
96 |
+ double nb_op_base() |
97 |
+ { |
98 |
+ return _cost; |
99 |
+ } |
100 |
+ |
101 |
+ BTL_DONT_INLINE void initialize() |
102 |
+ { |
103 |
+ Interface::copy_matrix(Local_A_ref, Local_A, Local_A_stl.size()); |
104 |
+ } |
105 |
+ |
106 |
+ BTL_DONT_INLINE void calculate() |
107 |
+ { |
108 |
+ Interface::parallel_lu_decomp(Local_A, desc); |
109 |
+ } |
110 |
+ |
111 |
+ BTL_DONT_INLINE void check_result() |
112 |
+ { |
113 |
+ /* Gather */ |
114 |
+ typename Interface::stl_matrix Global_Y; |
115 |
+ typename Interface::stl_matrix Local_Y(Local_A_stl.size()); |
116 |
+ Interface::matrix_to_stl(Local_A, Local_Y); |
117 |
+ Interface::gather_matrix(Global_Y, Local_Y, desc); |
118 |
+ |
119 |
+ if (iamroot) { |
120 |
+ |
121 |
+ typename Interface::gene_matrix A; |
122 |
+ Interface::matrix_from_stl(A, Global_A_stl); |
123 |
+ lapack_interface<typename Interface::real_type>::lu_decomp(&Global_A_stl[0], A, _size); |
124 |
+ typename Interface::stl_vector correct(A, A+_size*_size); |
125 |
+ typename Interface::real_type error = STL_interface<typename Interface::real_type>::norm_diff(Global_Y, correct); |
126 |
+ if (error > 10e-5) |
127 |
+ cerr << " { !! error : " << error << " !! } "; |
128 |
+ |
129 |
+ Interface::free_matrix(A, _size*_size); |
130 |
+ } |
131 |
+ } |
132 |
+ |
133 |
+private: |
134 |
+ int _size, desc[9], LocalRows, LocalCols; |
135 |
+ double _cost; |
136 |
+ bool iamroot; |
137 |
+ |
138 |
+ typename Interface::stl_matrix Global_A_stl; |
139 |
+ typename Interface::stl_matrix Local_A_stl; |
140 |
+ typename Interface::gene_matrix Local_A_ref; |
141 |
+ typename Interface::gene_matrix Local_A; |
142 |
+}; |
143 |
+ |
144 |
+ |
145 |
+#endif /* ACTION_PARALLEL_LU_DECOMP_HH_ */ |
146 |
|
147 |
diff --git a/btl/actions/action_parallel_matrix_vector_product.hh b/btl/actions/action_parallel_matrix_vector_product.hh |
148 |
index 5c97b1d..67e64bf 100644 |
149 |
--- a/btl/actions/action_parallel_matrix_vector_product.hh |
150 |
+++ b/btl/actions/action_parallel_matrix_vector_product.hh |
151 |
@@ -10,8 +10,6 @@ |
152 |
|
153 |
#include "blas.h" |
154 |
|
155 |
-using namespace std; |
156 |
- |
157 |
template<class Interface> |
158 |
class Action_parallel_matrix_vector_product { |
159 |
|
160 |
@@ -42,9 +40,9 @@ public : |
161 |
init_vector<null_function>(Global_y_stl, GlobalRows); |
162 |
} |
163 |
|
164 |
- Interface::scatter_matrix(Global_A_stl, Local_A_stl, GlobalRows, GlobalCols, BlockRows, BlockCols, LocalRows, LocalCols); |
165 |
- Interface::scatter_matrix(Global_x_stl, Local_x_stl, GlobalCols, LocalXCols, BlockRows, BlockCols, LocalXRows, LocalXCols); |
166 |
- Interface::scatter_matrix(Global_y_stl, Local_y_stl, GlobalRows, LocalYCols, BlockRows, BlockCols, LocalYRows, LocalYCols); |
167 |
+ Interface::scatter_matrix(Global_A_stl, Local_A_stl, descA, GlobalRows, GlobalCols, BlockRows, BlockCols); |
168 |
+ Interface::scatter_matrix(Global_x_stl, Local_x_stl, descX, GlobalCols, 1, BlockRows, BlockCols); |
169 |
+ Interface::scatter_matrix(Global_y_stl, Local_y_stl, descY, GlobalRows, 1, BlockRows, BlockCols); |
170 |
|
171 |
// generic local matrix and vectors initialization |
172 |
|
173 |
@@ -54,17 +52,6 @@ public : |
174 |
Interface::vector_from_stl(Local_x , Local_x_stl); |
175 |
Interface::vector_from_stl(Local_y_ref, Local_y_stl); |
176 |
Interface::vector_from_stl(Local_y , Local_y_stl); |
177 |
- |
178 |
- // Descinit |
179 |
- int context = Interface::context(); |
180 |
- int info; |
181 |
- int LDA, LDX, LDY; |
182 |
- LDA = std::max(1, LocalRows); |
183 |
- LDX = std::max(1, LocalXRows); |
184 |
- LDY = std::max(1, LocalYRows); |
185 |
- descinit_(descA, &GlobalRows, &GlobalCols, &BlockRows, &BlockCols, &iZERO, &iZERO, &context, &LDA, &info); |
186 |
- descinit_(descX, &GlobalCols, &iONE, &BlockRows, &BlockCols, &iZERO, &iZERO, &context, &LDX, &info); |
187 |
- descinit_(descY, &GlobalRows, &iONE, &BlockRows, &BlockCols, &iZERO, &iZERO, &context, &LDY, &info); |
188 |
} |
189 |
|
190 |
// invalidate copy ctor |
191 |
|
192 |
diff --git a/btl/generic_bench/init/init_function.hh b/btl/generic_bench/init/init_function.hh |
193 |
index 7b3bdba..5401673 100644 |
194 |
--- a/btl/generic_bench/init/init_function.hh |
195 |
+++ b/btl/generic_bench/init/init_function.hh |
196 |
@@ -32,7 +32,7 @@ double simple_function(int index_i, int index_j) |
197 |
|
198 |
double pseudo_random(int index) |
199 |
{ |
200 |
- return std::rand()/double(RAND_MAX); |
201 |
+ return static_cast<int>((std::rand()/double(RAND_MAX)-.5)*20); |
202 |
} |
203 |
|
204 |
double pseudo_random(int index_i, int index_j) |
205 |
|
206 |
diff --git a/btl/libs/BLACS/blacs_interface.hh b/btl/libs/BLACS/blacs_interface.hh |
207 |
index 00aafee..6932150 100644 |
208 |
--- a/btl/libs/BLACS/blacs_interface.hh |
209 |
+++ b/btl/libs/BLACS/blacs_interface.hh |
210 |
@@ -2,7 +2,13 @@ |
211 |
#define BTL_BLACS_INTERFACE_H |
212 |
|
213 |
#include <vector> |
214 |
+#include <algorithm> |
215 |
#include "blacs.h" |
216 |
+extern "C" { |
217 |
+ void descinit_(int*, const int*, const int*, const int*, const int*, const int*, const int*, const int*, const int*, int*); |
218 |
+ int numroc_(const int*, const int*, const int*, const int*, const int*); |
219 |
+} |
220 |
+ |
221 |
#include "scatter.h" |
222 |
#include "gather.h" |
223 |
|
224 |
@@ -80,6 +86,27 @@ public: |
225 |
scatter(context(), GlobalMatrix, LocalMatrix, GlobalRows, GlobalCols, BlockRows, BlockCols, LocalRows, LocalCols); |
226 |
} |
227 |
|
228 |
+ static void scatter_matrix(const stl_vector& GlobalMatrix, stl_vector& LocalMatrix, |
229 |
+ int *desc, |
230 |
+ const int& GlobalRows=0, const int& GlobalCols=0, |
231 |
+ const int& BlockRows=0, const int& BlockCols=0 |
232 |
+ ) { |
233 |
+ int GlobalRows_ = GlobalRows, GlobalCols_ = GlobalCols, |
234 |
+ BlockRows_ = BlockRows, BlockCols_ = BlockCols, |
235 |
+ LocalRows_, LocalCols_; |
236 |
+ const int ctxt = context(); |
237 |
+ scatter(ctxt, GlobalMatrix, LocalMatrix, |
238 |
+ GlobalRows_, GlobalCols_, BlockRows_, BlockCols_, LocalRows_, LocalCols_ |
239 |
+ ); |
240 |
+ |
241 |
+ const int iZERO = 0; |
242 |
+ int info; |
243 |
+ const int LLD = std::max(1, LocalRows_); |
244 |
+ descinit_(desc, &GlobalRows_, &GlobalCols_, &BlockRows_, &BlockCols_, |
245 |
+ &iZERO, &iZERO, &ctxt, &LLD, &info |
246 |
+ ); |
247 |
+ } |
248 |
+ |
249 |
static void gather_matrix(stl_vector& GlobalMatrix, const stl_vector& LocalMatrix, |
250 |
int& GlobalRows, int& GlobalCols, |
251 |
int& BlockRows, int& BlockCols, |
252 |
@@ -88,6 +115,16 @@ public: |
253 |
gather(context(), GlobalMatrix, LocalMatrix, GlobalRows, GlobalCols, BlockRows, BlockCols, LocalRows, LocalCols); |
254 |
} |
255 |
|
256 |
+ static void gather_matrix(stl_vector& GlobalMatrix, const stl_vector& LocalMatrix, |
257 |
+ int* desc |
258 |
+ ) { |
259 |
+ int GlobalRows = desc[2], GlobalCols = desc[3], |
260 |
+ BlockRows = desc[4], BlockCols = desc[5], |
261 |
+ LocalRows = desc[8], LocalCols = LocalMatrix.size()/desc[8]; |
262 |
+ const int ctxt = context(); |
263 |
+ gather(ctxt, GlobalMatrix, LocalMatrix, GlobalRows, GlobalCols, BlockRows, BlockCols, LocalRows, LocalCols); |
264 |
+ } |
265 |
+ |
266 |
|
267 |
}; |
268 |
|
269 |
|
270 |
diff --git a/btl/libs/LAPACK/lapack_interface.hh b/btl/libs/LAPACK/lapack_interface.hh |
271 |
index 33de2be..1840741 100644 |
272 |
--- a/btl/libs/LAPACK/lapack_interface.hh |
273 |
+++ b/btl/libs/LAPACK/lapack_interface.hh |
274 |
@@ -1,3 +1,6 @@ |
275 |
+#ifndef LAPACK_INTERFACE_HH |
276 |
+#define LAPACK_INTERFACE_HH |
277 |
+ |
278 |
#include <../BLAS/c_interface_base.h> |
279 |
#include <complex> |
280 |
|
281 |
@@ -24,8 +27,10 @@ void dsyev_(char*, char*, int*, double*, int*, double*, double*, int*, int*); |
282 |
#define MAKE_STRING2(S) #S |
283 |
#define MAKE_STRING(S) MAKE_STRING2(S) |
284 |
|
285 |
-#define CAT2(A,B) A##B |
286 |
-#define CAT(A,B) CAT2(A,B) |
287 |
+#ifndef CAT |
288 |
+# define CAT2(A,B) A##B |
289 |
+# define CAT(A,B) CAT2(A,B) |
290 |
+#endif |
291 |
|
292 |
template <typename real> class lapack_interface; |
293 |
|
294 |
@@ -51,3 +56,5 @@ static int zeroint = 0; |
295 |
#include "lapack_interface_impl.hh" |
296 |
#undef SCALAR |
297 |
#undef SCALAR_PREFIX |
298 |
+ |
299 |
+#endif /* LAPACK_INTERFACE_HH */ |
300 |
|
301 |
diff --git a/btl/libs/PBLAS/main.cpp b/btl/libs/PBLAS/main.cpp |
302 |
index 4b64f12..a2aae2a 100644 |
303 |
--- a/btl/libs/PBLAS/main.cpp |
304 |
+++ b/btl/libs/PBLAS/main.cpp |
305 |
@@ -1,3 +1,5 @@ |
306 |
+#define DISTRIBUTED |
307 |
+ |
308 |
#include "mpi.h" |
309 |
#include "utilities.h" |
310 |
#include "bench.hh" |
311 |
@@ -5,9 +7,10 @@ |
312 |
#include <iostream> |
313 |
//using namespace std; |
314 |
|
315 |
-#include "blacsinit.hh" |
316 |
#include "pblas_interface.hh" |
317 |
+#include "blacsinit.hh" |
318 |
#include "action_parallel_matrix_vector_product.hh" |
319 |
+#include "action_parallel_lu_decomp.hh" |
320 |
#include "action_parallel_axpy.hh" |
321 |
|
322 |
#include <string> |
323 |
@@ -19,8 +22,9 @@ int main(int argc, char **argv) |
324 |
bool iamroot = blacsinit(&argc, &argv); |
325 |
|
326 |
// distr_bench<Action_parallel_matrix_vector_product<pblas_interface<double> > >(10,MAX_MV,NB_POINT,!iamroot); |
327 |
- distr_bench<Action_parallel_axpy<pblas_interface<REAL_TYPE> > >(10,MAX_MV,NB_POINT,!iamroot); |
328 |
-// Action_parallel_axpy<pblas_interface<double> > action(8); |
329 |
+// distr_bench<Action_parallel_axpy<pblas_interface<REAL_TYPE> > >(10,MAX_MV,NB_POINT,!iamroot); |
330 |
+ distr_bench<Action_parallel_lu_decomp<pblas_interface<REAL_TYPE> > >(10,MAX_MV,NB_POINT,!iamroot); |
331 |
+// Action_parallel_lu_decomp<pblas_interface<REAL_TYPE> > action(8); |
332 |
// action.initialize(); |
333 |
// action.calculate(); |
334 |
// action.check_result(); |
335 |
|
336 |
diff --git a/btl/libs/PBLAS/pblas.h b/btl/libs/PBLAS/pblas.h |
337 |
index adc6c91..2b5860e 100644 |
338 |
--- a/btl/libs/PBLAS/pblas.h |
339 |
+++ b/btl/libs/PBLAS/pblas.h |
340 |
@@ -6,7 +6,7 @@ extern "C" { |
341 |
#endif |
342 |
|
343 |
int numroc_(const int*, const int*, const int*, const int*, const int*); |
344 |
- int descinit_(const int*, const int*, const int*, const int*, const int*, const int*, const int*, const int*, const int*, int*); |
345 |
+ void descinit_(int*, const int*, const int*, const int*, const int*, const int*, const int*, const int*, const int*, int*); |
346 |
|
347 |
|
348 |
/* Level 1 */ |
349 |
@@ -42,6 +42,14 @@ extern "C" { |
350 |
); |
351 |
|
352 |
|
353 |
+ |
354 |
+ /************* |
355 |
+ * Scalapack * |
356 |
+ *************/ |
357 |
+ void psgetrf_(const int*, const int*, float*, const int*, const int*, const int*, int*, int*); |
358 |
+ void pdgetrf_(const int*, const int*, double*, const int*, const int*, const int*, int*, int*); |
359 |
+ |
360 |
+ |
361 |
#ifdef __cplusplus |
362 |
} |
363 |
#endif |
364 |
|
365 |
diff --git a/btl/libs/PBLAS/pblas_interface.hh b/btl/libs/PBLAS/pblas_interface.hh |
366 |
index b969e4e..cdfb70a 100644 |
367 |
--- a/btl/libs/PBLAS/pblas_interface.hh |
368 |
+++ b/btl/libs/PBLAS/pblas_interface.hh |
369 |
@@ -1,23 +1,3 @@ |
370 |
-//===================================================== |
371 |
-// File : blas_interface.hh |
372 |
-// Author : L. Plagne <laurent.plagne@×××.fr)> |
373 |
-// Copyright (C) EDF R&D, lun sep 30 14:23:28 CEST 2002 |
374 |
-//===================================================== |
375 |
-// |
376 |
-// This program is free software; you can redistribute it and/or |
377 |
-// modify it under the terms of the GNU General Public License |
378 |
-// as published by the Free Software Foundation; either version 2 |
379 |
-// of the License, or (at your option) any later version. |
380 |
-// |
381 |
-// This program is distributed in the hope that it will be useful, |
382 |
-// but WITHOUT ANY WARRANTY; without even the implied warranty of |
383 |
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
384 |
-// GNU General Public License for more details. |
385 |
-// You should have received a copy of the GNU General Public License |
386 |
-// along with this program; if not, write to the Free Software |
387 |
-// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. |
388 |
-// |
389 |
- |
390 |
#include "pblas.h" |
391 |
#include "blacs_interface.hh" |
392 |
|
393 |
@@ -32,7 +12,7 @@ |
394 |
|
395 |
template<class real> class pblas_interface; |
396 |
|
397 |
- |
398 |
+/* |
399 |
static char notrans = 'N'; |
400 |
static char trans = 'T'; |
401 |
static char nonunit = 'N'; |
402 |
@@ -40,7 +20,7 @@ static char lower = 'L'; |
403 |
static char right = 'R'; |
404 |
static char left = 'L'; |
405 |
static int intone = 1; |
406 |
- |
407 |
+*/ |
408 |
|
409 |
#define SCALAR float |
410 |
#define SCALAR_PREFIX s |
411 |
|
412 |
diff --git a/btl/libs/PBLAS/pblas_interface_impl.hh b/btl/libs/PBLAS/pblas_interface_impl.hh |
413 |
index b534a4e..c36c249 100644 |
414 |
--- a/btl/libs/PBLAS/pblas_interface_impl.hh |
415 |
+++ b/btl/libs/PBLAS/pblas_interface_impl.hh |
416 |
@@ -33,6 +33,7 @@ public: |
417 |
real_type alpha = 1., beta = 0.; |
418 |
int iONE = 1; |
419 |
int myid, procnum; |
420 |
+ const char notrans = 'N'; |
421 |
blacs_pinfo_(&myid, &procnum); |
422 |
|
423 |
PBLAS_FUNC(gemv)(¬rans, &GlobalRows, &GlobalCols, |
424 |
@@ -42,5 +43,16 @@ public: |
425 |
); |
426 |
|
427 |
} |
428 |
+ |
429 |
+ |
430 |
+ static inline void parallel_lu_decomp(gene_matrix& X, const int* desc) |
431 |
+ { |
432 |
+ const int GlobalRows = desc[2], GlobalCols = desc[3]; |
433 |
+ const int iONE = 1; |
434 |
+ int info; |
435 |
+ std::vector<int> ipiv(desc[8] + desc[4]); |
436 |
+ PBLAS_FUNC(getrf)(&GlobalRows, &GlobalCols, X, &iONE, &iONE, desc, |
437 |
+ &ipiv[0], &info); |
438 |
+ } |
439 |
}; |
440 |
|
441 |
|
442 |
diff --git a/pblas.py b/pblas.py |
443 |
index e1ec738..6f0f6cd 100644 |
444 |
--- a/pblas.py |
445 |
+++ b/pblas.py |
446 |
@@ -5,7 +5,7 @@ numproc = 4 |
447 |
class Module(btlbase.BTLBase): |
448 |
def _initialize(self): |
449 |
self.libname = "scalapack" |
450 |
- self.avail = ['axpy', 'matrix_vector'] |
451 |
+ self.avail = ['axpy', 'matrix_vector', 'lu_decomp'] |
452 |
|
453 |
def _parse_args(self, args): |
454 |
# Parse arguments |