1 |
commit: 073daf866484570b163359dc3e50a1c9f57acfa7 |
2 |
Author: spiros <andyspiros <AT> gmail <DOT> com> |
3 |
AuthorDate: Fri Jul 22 22:51:54 2011 +0000 |
4 |
Commit: Andrea Arteaga <andyspiros <AT> gmail <DOT> com> |
5 |
CommitDate: Fri Jul 22 22:51:54 2011 +0000 |
6 |
URL: http://git.overlays.gentoo.org/gitweb/?p=proj/auto-numerical-bench.git;a=commit;h=073daf86 |
7 |
|
8 |
Added cholesky. Solved some issues. |
9 |
|
10 |
--- |
11 |
...el_lu_decomp.hh => action_parallel_cholesky.hh} | 62 +++++++++---------- |
12 |
btl/actions/action_parallel_lu_decomp.hh | 36 ++++++------ |
13 |
btl/generic_bench/init/init_function.hh | 2 +- |
14 |
btl/libs/PBLAS/main.cpp | 42 +++++++++++--- |
15 |
btl/libs/PBLAS/pblas.h | 5 ++ |
16 |
btl/libs/PBLAS/pblas_interface_impl.hh | 10 +++ |
17 |
pblas.py | 4 +- |
18 |
7 files changed, 99 insertions(+), 62 deletions(-) |
19 |
|
20 |
diff --git a/btl/actions/action_parallel_lu_decomp.hh b/btl/actions/action_parallel_cholesky.hh |
21 |
similarity index 51% |
22 |
copy from btl/actions/action_parallel_lu_decomp.hh |
23 |
copy to btl/actions/action_parallel_cholesky.hh |
24 |
index 4882a21..f89eb98 100644 |
25 |
--- a/btl/actions/action_parallel_lu_decomp.hh |
26 |
+++ b/btl/actions/action_parallel_cholesky.hh |
27 |
@@ -1,5 +1,5 @@ |
28 |
-#ifndef ACTION_PARALLEL_LU_DECOMP_HH_ |
29 |
-#define ACTION_PARALLEL_LU_DECOMP_HH_ |
30 |
+#ifndef ACTION_PARALLEL_CHOLESKY_HH_ |
31 |
+#define ACTION_PARALLEL_CHOLESKY_HH_ |
32 |
|
33 |
#include "utilities.h" |
34 |
#include "init/init_function.hh" |
35 |
@@ -11,14 +11,15 @@ |
36 |
#include <string> |
37 |
|
38 |
template<class Interface> |
39 |
-class Action_parallel_lu_decomp { |
40 |
+class Action_parallel_cholesky { |
41 |
+ typedef lapack_interface<typename Interface::real_type> LapackInterface; |
42 |
|
43 |
public : |
44 |
|
45 |
// Constructor |
46 |
- BTL_DONT_INLINE Action_parallel_lu_decomp( int size ) : _size(size) |
47 |
+ BTL_DONT_INLINE Action_parallel_cholesky( int size ) : _size(size) |
48 |
{ |
49 |
- MESSAGE("Action_parallel_lu_decomp Ctor"); |
50 |
+ MESSAGE("Action_parallel_cholesky Ctor"); |
51 |
|
52 |
int myid, procnum; |
53 |
blacs_pinfo_(&myid, &procnum); |
54 |
@@ -26,7 +27,16 @@ public : |
55 |
|
56 |
// STL matrix and vector initialization |
57 |
if (iamroot) { |
58 |
- init_vector<pseudo_random>(Global_A_stl, size*size); |
59 |
+ typename LapackInterface::stl_matrix temp_stl; |
60 |
+ init_matrix_symm<pseudo_random>(temp_stl, size); |
61 |
+ Global_A_stl.reserve(size*size); |
62 |
+ const double add = 5000./size; |
63 |
+ for (int r = 0; r < size; ++r) |
64 |
+ for (int c = 0; c < size; ++c) |
65 |
+ if (r==c) |
66 |
+ Global_A_stl.push_back((std::abs(temp_stl[r][c])+add)*size); |
67 |
+ else |
68 |
+ Global_A_stl.push_back(temp_stl[r][c]); |
69 |
} |
70 |
|
71 |
Interface::scatter_matrix(Global_A_stl, Local_A_stl, desc, size, size, 64, 64); |
72 |
@@ -37,21 +47,25 @@ public : |
73 |
Interface::matrix_from_stl(Local_A_ref, Local_A_stl); |
74 |
Interface::matrix_from_stl(Local_A , Local_A_stl); |
75 |
|
76 |
- _cost = 2.0*size*size*size/3.0 + static_cast<double>(size*size); |
77 |
+ _cost = 0; |
78 |
+ for (int j=0; j<_size; ++j) { |
79 |
+ double r = std::max(_size - j -1,0); |
80 |
+ _cost += 2*(r*j+r+j); |
81 |
+ } |
82 |
} |
83 |
|
84 |
|
85 |
// Invalidate copy constructor |
86 |
- Action_parallel_lu_decomp(const Action_parallel_lu_decomp&) |
87 |
+ Action_parallel_cholesky(const Action_parallel_cholesky&) |
88 |
{ |
89 |
- INFOS("illegal call to Action_parallel_lu_decomp copy constructor"); |
90 |
+ INFOS("illegal call to Action_parallel_cholesky copy constructor"); |
91 |
exit(1); |
92 |
} |
93 |
|
94 |
// Destructor |
95 |
- ~Action_parallel_lu_decomp() |
96 |
+ ~Action_parallel_cholesky() |
97 |
{ |
98 |
- MESSAGE("Action_parallel_lu_decomp destructor"); |
99 |
+ MESSAGE("Action_parallel_cholesky destructor"); |
100 |
|
101 |
// Deallocation |
102 |
Interface::free_matrix(Local_A_ref, Local_A_stl.size()); |
103 |
@@ -61,7 +75,7 @@ public : |
104 |
// Action name |
105 |
static inline std::string name() |
106 |
{ |
107 |
- return "lu_decomp_" + Interface::name(); |
108 |
+ return "cholesky_" + Interface::name(); |
109 |
} |
110 |
|
111 |
double nb_op_base() |
112 |
@@ -76,31 +90,14 @@ public : |
113 |
|
114 |
BTL_DONT_INLINE void calculate() |
115 |
{ |
116 |
- Interface::parallel_lu_decomp(Local_A, desc); |
117 |
+ Interface::parallel_cholesky(Local_A, desc); |
118 |
} |
119 |
|
120 |
BTL_DONT_INLINE void check_result() |
121 |
{ |
122 |
- /* Gather */ |
123 |
- typename Interface::stl_matrix Global_Y; |
124 |
- typename Interface::stl_matrix Local_Y(Local_A_stl.size()); |
125 |
- Interface::matrix_to_stl(Local_A, Local_Y); |
126 |
- Interface::gather_matrix(Global_Y, Local_Y, desc); |
127 |
- |
128 |
- if (iamroot) { |
129 |
- |
130 |
- typename Interface::gene_matrix A; |
131 |
- Interface::matrix_from_stl(A, Global_A_stl); |
132 |
- lapack_interface<typename Interface::real_type>::lu_decomp(&Global_A_stl[0], A, _size); |
133 |
- typename Interface::stl_vector correct(A, A+_size*_size); |
134 |
- typename Interface::real_type error = STL_interface<typename Interface::real_type>::norm_diff(Global_Y, correct); |
135 |
- if (error > 10e-5) |
136 |
- cerr << " { !! error : " << error << " !! } "; |
137 |
- |
138 |
- Interface::free_matrix(A, _size*_size); |
139 |
- } |
140 |
} |
141 |
|
142 |
+ |
143 |
private: |
144 |
int _size, desc[9], LocalRows, LocalCols; |
145 |
double _cost; |
146 |
@@ -112,5 +109,4 @@ private: |
147 |
typename Interface::gene_matrix Local_A; |
148 |
}; |
149 |
|
150 |
- |
151 |
-#endif /* ACTION_PARALLEL_LU_DECOMP_HH_ */ |
152 |
+#endif /* ACTION_PARALLEL_CHOLESKY_HH_ */ |
153 |
|
154 |
diff --git a/btl/actions/action_parallel_lu_decomp.hh b/btl/actions/action_parallel_lu_decomp.hh |
155 |
index 4882a21..18b4ac7 100644 |
156 |
--- a/btl/actions/action_parallel_lu_decomp.hh |
157 |
+++ b/btl/actions/action_parallel_lu_decomp.hh |
158 |
@@ -81,24 +81,24 @@ public : |
159 |
|
160 |
BTL_DONT_INLINE void check_result() |
161 |
{ |
162 |
- /* Gather */ |
163 |
- typename Interface::stl_matrix Global_Y; |
164 |
- typename Interface::stl_matrix Local_Y(Local_A_stl.size()); |
165 |
- Interface::matrix_to_stl(Local_A, Local_Y); |
166 |
- Interface::gather_matrix(Global_Y, Local_Y, desc); |
167 |
- |
168 |
- if (iamroot) { |
169 |
- |
170 |
- typename Interface::gene_matrix A; |
171 |
- Interface::matrix_from_stl(A, Global_A_stl); |
172 |
- lapack_interface<typename Interface::real_type>::lu_decomp(&Global_A_stl[0], A, _size); |
173 |
- typename Interface::stl_vector correct(A, A+_size*_size); |
174 |
- typename Interface::real_type error = STL_interface<typename Interface::real_type>::norm_diff(Global_Y, correct); |
175 |
- if (error > 10e-5) |
176 |
- cerr << " { !! error : " << error << " !! } "; |
177 |
- |
178 |
- Interface::free_matrix(A, _size*_size); |
179 |
- } |
180 |
+// /* Gather */ |
181 |
+// typename Interface::stl_matrix Global_Y; |
182 |
+// typename Interface::stl_matrix Local_Y(Local_A_stl.size()); |
183 |
+// Interface::matrix_to_stl(Local_A, Local_Y); |
184 |
+// Interface::gather_matrix(Global_Y, Local_Y, desc); |
185 |
+// |
186 |
+// if (iamroot) { |
187 |
+// |
188 |
+// typename Interface::gene_matrix A; |
189 |
+// Interface::matrix_from_stl(A, Global_A_stl); |
190 |
+// lapack_interface<typename Interface::real_type>::lu_decomp(&Global_A_stl[0], A, _size); |
191 |
+// typename Interface::stl_vector correct(A, A+_size*_size); |
192 |
+// typename Interface::real_type error = STL_interface<typename Interface::real_type>::norm_diff(Global_Y, correct); |
193 |
+// if (error > 10e-5) |
194 |
+// cerr << " { !! error : " << error << " !! } "; |
195 |
+// |
196 |
+// Interface::free_matrix(A, _size*_size); |
197 |
+// } |
198 |
} |
199 |
|
200 |
private: |
201 |
|
202 |
diff --git a/btl/generic_bench/init/init_function.hh b/btl/generic_bench/init/init_function.hh |
203 |
index 5401673..7b3bdba 100644 |
204 |
--- a/btl/generic_bench/init/init_function.hh |
205 |
+++ b/btl/generic_bench/init/init_function.hh |
206 |
@@ -32,7 +32,7 @@ double simple_function(int index_i, int index_j) |
207 |
|
208 |
double pseudo_random(int index) |
209 |
{ |
210 |
- return static_cast<int>((std::rand()/double(RAND_MAX)-.5)*20); |
211 |
+ return std::rand()/double(RAND_MAX); |
212 |
} |
213 |
|
214 |
double pseudo_random(int index_i, int index_j) |
215 |
|
216 |
diff --git a/btl/libs/PBLAS/main.cpp b/btl/libs/PBLAS/main.cpp |
217 |
index a2aae2a..e7b636b 100644 |
218 |
--- a/btl/libs/PBLAS/main.cpp |
219 |
+++ b/btl/libs/PBLAS/main.cpp |
220 |
@@ -9,9 +9,11 @@ |
221 |
|
222 |
#include "pblas_interface.hh" |
223 |
#include "blacsinit.hh" |
224 |
+ |
225 |
+#include "action_parallel_axpy.hh" |
226 |
#include "action_parallel_matrix_vector_product.hh" |
227 |
#include "action_parallel_lu_decomp.hh" |
228 |
-#include "action_parallel_axpy.hh" |
229 |
+#include "action_parallel_cholesky.hh" |
230 |
|
231 |
#include <string> |
232 |
|
233 |
@@ -21,13 +23,37 @@ int main(int argc, char **argv) |
234 |
{ |
235 |
bool iamroot = blacsinit(&argc, &argv); |
236 |
|
237 |
-// distr_bench<Action_parallel_matrix_vector_product<pblas_interface<double> > >(10,MAX_MV,NB_POINT,!iamroot); |
238 |
-// distr_bench<Action_parallel_axpy<pblas_interface<REAL_TYPE> > >(10,MAX_MV,NB_POINT,!iamroot); |
239 |
- distr_bench<Action_parallel_lu_decomp<pblas_interface<REAL_TYPE> > >(10,MAX_MV,NB_POINT,!iamroot); |
240 |
-// Action_parallel_lu_decomp<pblas_interface<REAL_TYPE> > action(8); |
241 |
-// action.initialize(); |
242 |
-// action.calculate(); |
243 |
-// action.check_result(); |
244 |
+ bool |
245 |
+ general_solve=false, least_squares=false, lu_decomp=false, cholesky=false, |
246 |
+ symm_ev=false |
247 |
+ ; |
248 |
+ |
249 |
+ |
250 |
+ for (int i = 1; i < argc; ++i) { |
251 |
+ std::string arg = argv[i]; |
252 |
+ if (arg == "general_solve") general_solve = true; |
253 |
+ else if (arg == "least_squares") least_squares = true; |
254 |
+ else if (arg == "lu_decomp") lu_decomp = true; |
255 |
+ else if (arg == "cholesky") cholesky = true; |
256 |
+ else if (arg == "symm_ev") symm_ev = true; |
257 |
+ } |
258 |
+ |
259 |
+ |
260 |
+// if (general_solve) |
261 |
+// distr_bench<Action_general_solve<lapack_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT, !iamroot); |
262 |
+ |
263 |
+// if (least_squares) |
264 |
+// distr_bench<Action_least_squares<lapack_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT, !iamroot); |
265 |
+ |
266 |
+ if (lu_decomp) |
267 |
+ distr_bench<Action_parallel_lu_decomp<pblas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT, !iamroot); |
268 |
+ |
269 |
+ if (cholesky) |
270 |
+ distr_bench<Action_parallel_cholesky<pblas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT, !iamroot); |
271 |
+ |
272 |
+// if (symm_ev) |
273 |
+// distr_bench<Action_symm_ev<lapack_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT, !iamroot); |
274 |
+ |
275 |
|
276 |
int iZERO = 0; |
277 |
blacs_exit_(&iZERO); |
278 |
|
279 |
diff --git a/btl/libs/PBLAS/pblas.h b/btl/libs/PBLAS/pblas.h |
280 |
index 2b5860e..973b91c 100644 |
281 |
--- a/btl/libs/PBLAS/pblas.h |
282 |
+++ b/btl/libs/PBLAS/pblas.h |
283 |
@@ -46,9 +46,14 @@ extern "C" { |
284 |
/************* |
285 |
* Scalapack * |
286 |
*************/ |
287 |
+ // lu_decomp |
288 |
void psgetrf_(const int*, const int*, float*, const int*, const int*, const int*, int*, int*); |
289 |
void pdgetrf_(const int*, const int*, double*, const int*, const int*, const int*, int*, int*); |
290 |
|
291 |
+ // cholesky |
292 |
+ void pspotrf_(const char*, const int*, float*, const int*, const int*, const int*, int*); |
293 |
+ void pdpotrf_(const char*, const int*, double*, const int*, const int*, const int*, int*); |
294 |
+ |
295 |
|
296 |
#ifdef __cplusplus |
297 |
} |
298 |
|
299 |
diff --git a/btl/libs/PBLAS/pblas_interface_impl.hh b/btl/libs/PBLAS/pblas_interface_impl.hh |
300 |
index c36c249..1dbf3b9 100644 |
301 |
--- a/btl/libs/PBLAS/pblas_interface_impl.hh |
302 |
+++ b/btl/libs/PBLAS/pblas_interface_impl.hh |
303 |
@@ -54,5 +54,15 @@ public: |
304 |
PBLAS_FUNC(getrf)(&GlobalRows, &GlobalCols, X, &iONE, &iONE, desc, |
305 |
&ipiv[0], &info); |
306 |
} |
307 |
+ |
308 |
+ static inline void parallel_cholesky(gene_matrix& X, const int* desc) |
309 |
+ { |
310 |
+ const int N = desc[2], iONE = 1; |
311 |
+ const char UPLO = 'U'; |
312 |
+ int info; |
313 |
+ PBLAS_FUNC(potrf)(&UPLO, &N, X, &iONE, &iONE, desc, &info); |
314 |
+// if (info != 0) |
315 |
+// cerr << " { cholesky error : " << info << " } "; |
316 |
+ } |
317 |
}; |
318 |
|
319 |
|
320 |
diff --git a/pblas.py b/pblas.py |
321 |
index 6f0f6cd..9cd087e 100644 |
322 |
--- a/pblas.py |
323 |
+++ b/pblas.py |
324 |
@@ -5,7 +5,7 @@ numproc = 4 |
325 |
class Module(btlbase.BTLBase): |
326 |
def _initialize(self): |
327 |
self.libname = "scalapack" |
328 |
- self.avail = ['axpy', 'matrix_vector', 'lu_decomp'] |
329 |
+ self.avail = ['axpy', 'matrix_vector', 'lu_decomp', 'cholesky'] |
330 |
|
331 |
def _parse_args(self, args): |
332 |
# Parse arguments |
333 |
@@ -61,7 +61,7 @@ class PBLASTest(btlbase.BTLTest): |
334 |
|
335 |
@staticmethod |
336 |
def _btl_includes(): |
337 |
- return ["libs/BLAS", "libs/BLACS", "libs/PBLAS"] |
338 |
+ return ["libs/BLAS", "libs/BLACS", "libs/PBLAS", "libs/STL"] |
339 |
|
340 |
def _btl_defines(self): |
341 |
return ["PBLASNAME="+self.libname] |
342 |
\ No newline at end of file |