1 #include <hara.h>
2 #include <distributed/distributed_hara_handle.h>
3 #include <distributed/distributed_hmatrix.h>
4 #include <distributed/distributed_geometric_construction.h>
5 #include <distributed/distributed_hgemv.h>
6 #include <petsc/private/matimpl.h>
7 #include <petscsf.h>
8
9 #define MatHaraGetThrustPointer(v) thrust::raw_pointer_cast((v).data())
10
11 // TODO HARA:
12 // MatDuplicate buggy with commsize > 1
13 // kernel needs (global?) id of points (issues with Chebyshev points and coupling matrix computation)
14 // unsymmetrix DistributedHMatrix (transpose for distributed_hgemv?)
15 // Unify interface for sequential and parallel?
16 // Reuse geometric construction (almost possible, only the unsymmetric case is explicitly handled)
17 // Fix includes:
18 // - everything under hara/ dir (except for hara.h)
19 // - fix kblas includes
20 // - namespaces?
21 // Fix naming convention DistributedHMatrix_GPU vs GPU_HMatrix
22 // Diagnostics? FLOPS, MEMORY USAGE IN PARALLEL
23 // Why do we need to template the admissibility condition in the hmatrix construction?
24 //
25 template<typename T, int D>
26 struct PetscPointCloud
27 {
28 static const int Dim = D;
29 typedef T ElementType;
30
31 struct Point
32 {
33 T x[D];
PointPetscPointCloud::Point34 Point() {
35 for (int i = 0; i < D; i++) x[i] = 0;
36 }
PointPetscPointCloud::Point37 Point(const T xx[]) {
38 for (int i = 0; i < D; i++) x[i] = xx[i];
39 }
40 };
41
42 std::vector<Point> pts;
43
44 // Must return the number of data points
kdtree_get_point_countPetscPointCloud45 inline size_t kdtree_get_point_count() const { return pts.size(); }
46
47 // Returns the dim'th component of the idx'th point in the class:
kdtree_get_ptPetscPointCloud48 inline T kdtree_get_pt(const size_t idx, int dim) const
49 {
50 return pts[idx].x[dim];
51 }
52
get_ptPetscPointCloud53 inline T get_pt(const size_t idx, int dim) const
54 {
55 return kdtree_get_pt(idx, dim);
56 }
57
kdtree_ignore_pointPetscPointCloud58 inline bool kdtree_ignore_point(const size_t idx) const { return false; }
59
60 // Optional bounding-box computation: return false to default to a standard bbox computation loop.
61 // Return true if the BBOX was already computed by the class and returned in "bb" so it can be avoided to redo it again.
62 // Look at bb.size() to find out the expected dimensionality (e.g. 2 or 3 for point clouds)
63 template <class BBOX>
kdtree_get_bboxPetscPointCloud64 bool kdtree_get_bbox(BBOX& /* bb */) const { return false; }
65
66 PetscPointCloud(PetscInt,const T[]);
67 PetscPointCloud(PetscPointCloud&);
68 };
69
70 template<typename T, int D>
PetscPointCloud(PetscInt n,const T coords[])71 PetscPointCloud<T,D>::PetscPointCloud(PetscInt n, const T coords[])
72 {
73 this->pts.resize(n);
74 for (PetscInt i = 0; i < n; i++) {
75 Point p(coords);
76 this->pts[i] = p;
77 coords += D;
78 }
79 }
80
81 template<typename T, int D>
PetscPointCloud(PetscPointCloud<T,D> & other)82 PetscPointCloud<T,D>::PetscPointCloud(PetscPointCloud<T,D>& other)
83 {
84 this->pts = other.pts;
85 }
86
87 template <typename T>
88 using PetscPointCloud3D = PetscPointCloud<T,3>;
89 template <typename T>
90 using PetscPointCloud2D = PetscPointCloud<T,2>;
91 template <typename T>
92 using PetscPointCloud1D = PetscPointCloud<T,1>;
93
94 template<typename T, int Dim>
95 struct PetscFunctionGenerator
96 {
97 private:
98 MatHaraKernel k;
99 void *ctx;
100
101 public:
PetscFunctionGeneratorPetscFunctionGenerator102 PetscFunctionGenerator(MatHaraKernel k, void* ctx) { this->k = k; this->ctx = ctx; }
PetscFunctionGeneratorPetscFunctionGenerator103 PetscFunctionGenerator(PetscFunctionGenerator& other) { this->k = other.k; this->ctx = other.ctx; }
operator ()PetscFunctionGenerator104 T operator()(PetscReal pt1[Dim], PetscReal pt2[Dim])
105 {
106 return (T)(this->k ? (*this->k)(Dim,pt1,pt2,this->ctx) : 0);
107 }
108 };
109
110 template <typename T>
111 using PetscFunctionGenerator3D = PetscFunctionGenerator<T,3>;
112 template <typename T>
113 using PetscFunctionGenerator2D = PetscFunctionGenerator<T,2>;
114 template <typename T>
115 using PetscFunctionGenerator1D = PetscFunctionGenerator<T,1>;
116
117 #include <../src/mat/impls/hara/matharasampler.hpp>
118
119 typedef struct {
120 distributedHaraHandle_t handle;
121
122 /* two different classes at the moment */
123 HMatrix *hmatrix;
124 DistributedHMatrix *dist_hmatrix;
125
126 /* May use permutations */
127 PetscSF sf;
128 PetscLayout hara_rmap;
129 thrust::host_vector<PetscScalar> *xx,*yy;
130 PetscInt xxs,yys;
131 PetscBool multsetup;
132
133 /* GPU */
134 #if defined(HARA_USE_GPU)
135 GPU_HMatrix *hmatrix_gpu;
136 DistributedHMatrix_GPU *dist_hmatrix_gpu;
137 thrust::device_vector<PetscScalar> *xx_gpu,*yy_gpu;
138 PetscInt xxs_gpu,yys_gpu;
139 #endif
140
141 /* construction from matvecs */
142 PetscMatrixSampler* sampler;
143
144 /* Admissibility */
145 PetscReal eta;
146 PetscInt leafsize;
147
148 /* for dof reordering */
149 PetscInt spacedim;
150 PetscPointCloud1D<PetscReal> *ptcloud1;
151 PetscPointCloud2D<PetscReal> *ptcloud2;
152 PetscPointCloud3D<PetscReal> *ptcloud3;
153
154 /* kernel for generating matrix entries */
155 PetscFunctionGenerator1D<PetscScalar> *kernel1;
156 PetscFunctionGenerator2D<PetscScalar> *kernel2;
157 PetscFunctionGenerator3D<PetscScalar> *kernel3;
158
159 /* customization */
160 PetscInt basisord;
161 PetscInt max_rank;
162 PetscInt bs;
163 PetscReal rtol;
164 PetscInt norm_max_samples;
165 PetscBool check_construction;
166
167 /* keeps track of MatScale values */
168 PetscScalar s;
169 } Mat_HARA;
170
MatDestroy_HARA(Mat A)171 static PetscErrorCode MatDestroy_HARA(Mat A)
172 {
173 Mat_HARA *a = (Mat_HARA*)A->data;
174 PetscErrorCode ierr;
175
176 PetscFunctionBegin;
177 haraDestroyDistributedHandle(a->handle);
178 delete a->hmatrix;
179 delete a->dist_hmatrix;
180 ierr = PetscSFDestroy(&a->sf);CHKERRQ(ierr);
181 ierr = PetscLayoutDestroy(&a->hara_rmap);CHKERRQ(ierr);
182 delete a->xx;
183 delete a->yy;
184 #if defined(HARA_USE_GPU)
185 delete a->hmatrix_gpu;
186 delete a->dist_hmatrix_gpu;
187 delete a->xx_gpu;
188 delete a->yy_gpu;
189 #endif
190 delete a->sampler;
191 delete a->ptcloud1;
192 delete a->ptcloud2;
193 delete a->ptcloud3;
194 delete a->kernel1;
195 delete a->kernel2;
196 delete a->kernel3;
197 ierr = PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_hara_seqdense_C",NULL);CHKERRQ(ierr);
198 ierr = PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_hara_seqdensecuda_C",NULL);CHKERRQ(ierr);
199 ierr = PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_hara_mpidense_C",NULL);CHKERRQ(ierr);
200 ierr = PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_hara_mpidensecuda_C",NULL);CHKERRQ(ierr);
201 ierr = PetscObjectChangeTypeName((PetscObject)A,NULL);CHKERRQ(ierr);
202 ierr = PetscFree(A->data);CHKERRQ(ierr);
203 PetscFunctionReturn(0);
204 }
205
PetscSFGetVectorSF(PetscSF sf,PetscInt nv,PetscInt ldr,PetscInt ldl,PetscSF * vsf)206 static PetscErrorCode PetscSFGetVectorSF(PetscSF sf, PetscInt nv, PetscInt ldr, PetscInt ldl, PetscSF *vsf)
207 {
208 PetscSF rankssf;
209 const PetscSFNode *iremote;
210 PetscSFNode *viremote,*rremotes;
211 const PetscInt *ilocal;
212 PetscInt *vilocal = NULL,*ldrs;
213 const PetscMPIInt *ranks;
214 PetscMPIInt *sranks;
215 PetscInt nranks,nr,nl,vnr,vnl,i,v,j,maxl;
216 MPI_Comm comm;
217 PetscErrorCode ierr;
218
219 PetscFunctionBegin;
220 if (nv == 1) {
221 ierr = PetscObjectReference((PetscObject)sf);CHKERRQ(ierr);
222 *vsf = sf;
223 PetscFunctionReturn(0);
224 }
225 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
226 ierr = PetscSFGetGraph(sf,&nr,&nl,&ilocal,&iremote);CHKERRQ(ierr);
227 ierr = PetscSFGetLeafRange(sf,NULL,&maxl);CHKERRQ(ierr);
228 maxl += 1;
229 if (ldl == PETSC_DECIDE) ldl = maxl;
230 if (ldr == PETSC_DECIDE) ldr = nr;
231 if (ldr < nr) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid leading dimension %D < %D",ldr,nr);
232 if (ldl < maxl) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid leading dimension %D < %D",ldl,maxl);
233 vnr = nr*nv;
234 vnl = nl*nv;
235 ierr = PetscMalloc1(vnl,&viremote);CHKERRQ(ierr);
236 if (ilocal) {
237 ierr = PetscMalloc1(vnl,&vilocal);CHKERRQ(ierr);
238 }
239
240 /* TODO: Should this special SF be available, e.g.
241 PetscSFGetRanksSF or similar? */
242 ierr = PetscSFGetRootRanks(sf,&nranks,&ranks,NULL,NULL,NULL);CHKERRQ(ierr);
243 ierr = PetscMalloc1(nranks,&sranks);CHKERRQ(ierr);
244 ierr = PetscArraycpy(sranks,ranks,nranks);CHKERRQ(ierr);
245 ierr = PetscSortMPIInt(nranks,sranks);CHKERRQ(ierr);
246 ierr = PetscMalloc1(nranks,&rremotes);CHKERRQ(ierr);
247 for (i=0;i<nranks;i++) {
248 rremotes[i].rank = sranks[i];
249 rremotes[i].index = 0;
250 }
251 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&rankssf);CHKERRQ(ierr);
252 ierr = PetscSFSetGraph(rankssf,1,nranks,NULL,PETSC_OWN_POINTER,rremotes,PETSC_OWN_POINTER);CHKERRQ(ierr);
253 ierr = PetscMalloc1(nranks,&ldrs);CHKERRQ(ierr);
254 ierr = PetscSFBcastBegin(rankssf,MPIU_INT,&ldr,ldrs);CHKERRQ(ierr);
255 ierr = PetscSFBcastEnd(rankssf,MPIU_INT,&ldr,ldrs);CHKERRQ(ierr);
256 ierr = PetscSFDestroy(&rankssf);CHKERRQ(ierr);
257
258 j = -1;
259 for (i=0;i<nl;i++) {
260 const PetscInt r = iremote[i].rank;
261 const PetscInt ii = iremote[i].index;
262
263 if (j < 0 || sranks[j] != r) {
264 ierr = PetscFindMPIInt(r,nranks,sranks,&j);CHKERRQ(ierr);
265 }
266 if (j < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unable to locate neighbor rank %D",r);
267 for (v=0;v<nv;v++) {
268 viremote[v*nl + i].rank = r;
269 viremote[v*nl + i].index = v*ldrs[j] + ii;
270 if (ilocal) vilocal[v*nl + i] = v*ldl + ilocal[i];
271 }
272 }
273 ierr = PetscFree(sranks);CHKERRQ(ierr);
274 ierr = PetscFree(ldrs);CHKERRQ(ierr);
275 ierr = PetscSFCreate(comm,vsf);CHKERRQ(ierr);
276 ierr = PetscSFSetGraph(*vsf,vnr,vnl,vilocal,PETSC_OWN_POINTER,viremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
277 PetscFunctionReturn(0);
278 }
279
VecSign(Vec v,Vec s)280 static PetscErrorCode VecSign(Vec v, Vec s)
281 {
282 const PetscScalar *av;
283 PetscScalar *as;
284 PetscInt i,n;
285 PetscErrorCode ierr;
286
287 PetscFunctionBegin;
288 PetscValidHeaderSpecific(v,VEC_CLASSID,1);
289 PetscValidHeaderSpecific(s,VEC_CLASSID,2);
290 ierr = VecGetArrayRead(v,&av);CHKERRQ(ierr);
291 ierr = VecGetArrayWrite(s,&as);CHKERRQ(ierr);
292 ierr = VecGetLocalSize(s,&n);CHKERRQ(ierr);
293 ierr = VecGetLocalSize(v,&i);CHKERRQ(ierr);
294 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Invalid local sizes %D != %D",i,n);
295 for (i=0;i<n;i++) as[i] = PetscAbsScalar(av[i]) < 0 ? -1. : 1.;
296 ierr = VecRestoreArrayWrite(s,&as);CHKERRQ(ierr);
297 ierr = VecRestoreArrayRead(v,&av);CHKERRQ(ierr);
298 PetscFunctionReturn(0);
299 }
300
301 /* these are approximate norms */
302 /* NORM_2: Estimating the matrix p-norm Nicholas J. Higham
303 NORM_1/NORM_INFINITY: A block algorithm for matrix 1-norm estimation, with an application to 1-norm pseudospectra Higham, Nicholas J. and Tisseur, Francoise */
MatApproximateNorm_Private(Mat A,NormType normtype,PetscInt normsamples,PetscReal * n)304 static PetscErrorCode MatApproximateNorm_Private(Mat A, NormType normtype, PetscInt normsamples, PetscReal* n)
305 {
306 Vec x,y,w,z;
307 PetscReal normz,adot;
308 PetscScalar dot;
309 PetscInt i,j,N,jold = -1;
310 PetscErrorCode ierr;
311
312 PetscFunctionBegin;
313 switch (normtype) {
314 case NORM_INFINITY:
315 case NORM_1:
316 if (normsamples < 0) normsamples = 10; /* pure guess */
317 if (normtype == NORM_INFINITY) {
318 Mat B;
319 ierr = MatCreateTranspose(A,&B);CHKERRQ(ierr);
320 A = B;
321 } else {
322 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
323 }
324 ierr = MatCreateVecs(A,&x,&y);CHKERRQ(ierr);
325 ierr = MatCreateVecs(A,&z,&w);CHKERRQ(ierr);
326 ierr = VecGetSize(x,&N);CHKERRQ(ierr);
327 ierr = VecSet(x,1./N);CHKERRQ(ierr);
328 ierr = VecSetOption(x,VEC_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
329 *n = 0.0;
330 for (i = 0; i < normsamples; i++) {
331 ierr = MatMult(A,x,y);CHKERRQ(ierr);
332 ierr = VecSign(y,w);CHKERRQ(ierr);
333 ierr = MatMultTranspose(A,w,z);CHKERRQ(ierr);
334 ierr = VecNorm(z,NORM_INFINITY,&normz);CHKERRQ(ierr);
335 ierr = VecDot(x,z,&dot);CHKERRQ(ierr);
336 adot = PetscAbsScalar(dot);
337 ierr = PetscInfo4(A,"%s norm it %D -> (%g %g)\n",NormTypes[normtype],i,(double)normz,(double)adot);CHKERRQ(ierr);
338 if (normz <= adot && i > 0) {
339 ierr = VecNorm(y,NORM_1,n);CHKERRQ(ierr);
340 break;
341 }
342 ierr = VecSet(x,0.);CHKERRQ(ierr);
343 ierr = VecMax(z,&j,&normz);CHKERRQ(ierr);
344 if (j == jold) {
345 ierr = VecNorm(y,NORM_1,n);CHKERRQ(ierr);
346 ierr = PetscInfo2(A,"%s norm it %D -> breakdown (j==jold)\n",NormTypes[normtype],i);CHKERRQ(ierr);
347 break;
348 }
349 jold = j;
350 ierr = VecSetValue(x,j,1.0,INSERT_VALUES);CHKERRQ(ierr);
351 ierr = VecAssemblyBegin(x);CHKERRQ(ierr);
352 ierr = VecAssemblyEnd(x);CHKERRQ(ierr);
353 }
354 ierr = MatDestroy(&A);CHKERRQ(ierr);
355 ierr = VecDestroy(&x);CHKERRQ(ierr);
356 ierr = VecDestroy(&w);CHKERRQ(ierr);
357 ierr = VecDestroy(&y);CHKERRQ(ierr);
358 ierr = VecDestroy(&z);CHKERRQ(ierr);
359 break;
360 case NORM_2:
361 if (normsamples < 0) normsamples = 20; /* pure guess */
362 ierr = MatCreateVecs(A,&x,&y);CHKERRQ(ierr);
363 ierr = MatCreateVecs(A,&z,NULL);CHKERRQ(ierr);
364 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
365 ierr = VecNormalize(x,NULL);CHKERRQ(ierr);
366 *n = 0.0;
367 for (i = 0; i < normsamples; i++) {
368 ierr = MatMult(A,x,y);CHKERRQ(ierr);
369 ierr = VecNormalize(y,n);CHKERRQ(ierr);
370 ierr = MatMultTranspose(A,y,z);CHKERRQ(ierr);
371 ierr = VecNorm(z,NORM_2,&normz);CHKERRQ(ierr);
372 ierr = VecDot(x,z,&dot);CHKERRQ(ierr);
373 adot = PetscAbsScalar(dot);
374 ierr = PetscInfo5(A,"%s norm it %D -> %g (%g %g)\n",NormTypes[normtype],i,(double)*n,(double)normz,(double)adot);CHKERRQ(ierr);
375 if (normz <= adot) break;
376 if (i < normsamples - 1) {
377 Vec t;
378
379 ierr = VecNormalize(z,NULL);CHKERRQ(ierr);
380 t = x;
381 x = z;
382 z = t;
383 }
384 }
385 ierr = VecDestroy(&x);CHKERRQ(ierr);
386 ierr = VecDestroy(&y);CHKERRQ(ierr);
387 ierr = VecDestroy(&z);CHKERRQ(ierr);
388 break;
389 default:
390 SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"%s norm not supported",NormTypes[normtype]);
391 }
392 ierr = PetscInfo3(A,"%s norm %g computed in %D iterations\n",NormTypes[normtype],(double)*n,i);CHKERRQ(ierr);
393 PetscFunctionReturn(0);
394 }
395
MatNorm_HARA(Mat A,NormType normtype,PetscReal * n)396 PETSC_EXTERN PetscErrorCode MatNorm_HARA(Mat A, NormType normtype, PetscReal* n)
397 {
398 PetscErrorCode ierr;
399 PetscBool ishara;
400 PetscInt nmax = PETSC_DECIDE;
401
402 PetscFunctionBegin;
403 ierr = PetscObjectTypeCompare((PetscObject)A,MATHARA,&ishara);CHKERRQ(ierr);
404 if (ishara) {
405 Mat_HARA *a = (Mat_HARA*)A->data;
406
407 nmax = a->norm_max_samples;
408 }
409 ierr = MatApproximateNorm_Private(A,normtype,nmax,n);CHKERRQ(ierr);
410 PetscFunctionReturn(0);
411 }
412
MatMultNKernel_HARA(Mat A,PetscBool transA,Mat B,Mat C)413 static PetscErrorCode MatMultNKernel_HARA(Mat A, PetscBool transA, Mat B, Mat C)
414 {
415 Mat_HARA *hara = (Mat_HARA*)A->data;
416 haraHandle_t handle = hara->handle->handle;
417 PetscBool boundtocpu = PETSC_TRUE;
418 PetscScalar *xx,*yy,*uxx,*uyy;
419 PetscInt blda,clda;
420 PetscSF bsf,csf;
421 PetscErrorCode ierr;
422
423 PetscFunctionBegin;
424 #if defined(HARA_USE_GPU)
425 boundtocpu = A->boundtocpu;
426 #endif
427 ierr = MatDenseGetLDA(B,&blda);CHKERRQ(ierr);
428 ierr = MatDenseGetLDA(C,&clda);CHKERRQ(ierr);
429 if (hara->sf) {
430 PetscInt n;
431
432 ierr = PetscSFGetGraph(hara->sf,NULL,&n,NULL,NULL);CHKERRQ(ierr);
433 ierr = PetscObjectQuery((PetscObject)B,"_mathara_vectorsf",(PetscObject*)&bsf);CHKERRQ(ierr);
434 if (!bsf) {
435 ierr = PetscSFGetVectorSF(hara->sf,B->cmap->N,blda,PETSC_DECIDE,&bsf);CHKERRQ(ierr);
436 ierr = PetscObjectCompose((PetscObject)B,"_mathara_vectorsf",(PetscObject)bsf);CHKERRQ(ierr);
437 ierr = PetscObjectDereference((PetscObject)bsf);CHKERRQ(ierr);
438 }
439 ierr = PetscObjectQuery((PetscObject)C,"_mathara_vectorsf",(PetscObject*)&csf);CHKERRQ(ierr);
440 if (!csf) {
441 ierr = PetscSFGetVectorSF(hara->sf,B->cmap->N,clda,PETSC_DECIDE,&csf);CHKERRQ(ierr);
442 ierr = PetscObjectCompose((PetscObject)C,"_mathara_vectorsf",(PetscObject)csf);CHKERRQ(ierr);
443 ierr = PetscObjectDereference((PetscObject)csf);CHKERRQ(ierr);
444 }
445 blda = n;
446 clda = n;
447 }
448 if (!boundtocpu) {
449 #if defined(HARA_USE_GPU)
450 PetscBool ciscuda,biscuda;
451
452 if (hara->sf) {
453 PetscInt n;
454
455 ierr = PetscSFGetGraph(hara->sf,NULL,&n,NULL,NULL);CHKERRQ(ierr);
456 if (hara->xxs_gpu < B->cmap->n) { hara->xx_gpu->resize(n*B->cmap->N); hara->xxs_gpu = B->cmap->N; }
457 if (hara->yys_gpu < B->cmap->n) { hara->yy_gpu->resize(n*B->cmap->N); hara->yys_gpu = B->cmap->N; }
458 }
459 /* If not of type seqdensecuda, convert on the fly (i.e. allocate GPU memory) */
460 ierr = PetscObjectTypeCompareAny((PetscObject)B,&biscuda,MATSEQDENSECUDA,MATMPIDENSECUDA,"");CHKERRQ(ierr);
461 if (!biscuda) {
462 ierr = MatConvert(B,MATDENSECUDA,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr);
463 }
464 ierr = PetscObjectTypeCompareAny((PetscObject)C,&ciscuda,MATSEQDENSECUDA,MATMPIDENSECUDA,"");CHKERRQ(ierr);
465 if (!ciscuda) {
466 C->assembled = PETSC_TRUE;
467 ierr = MatConvert(C,MATDENSECUDA,MAT_INPLACE_MATRIX,&C);CHKERRQ(ierr);
468 }
469 ierr = MatDenseCUDAGetArrayRead(B,(const PetscScalar**)&xx);CHKERRQ(ierr);
470 ierr = MatDenseCUDAGetArrayWrite(C,&yy);CHKERRQ(ierr);
471 if (hara->sf) {
472 uxx = MatHaraGetThrustPointer(*hara->xx_gpu);
473 uyy = MatHaraGetThrustPointer(*hara->yy_gpu);
474 ierr = PetscSFBcastBegin(bsf,MPIU_SCALAR,xx,uxx);CHKERRQ(ierr);
475 ierr = PetscSFBcastEnd(bsf,MPIU_SCALAR,xx,uxx);CHKERRQ(ierr);
476 } else {
477 uxx = xx;
478 uyy = yy;
479 }
480 if (hara->dist_hmatrix_gpu) {
481 if (transA && !A->symmetric) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MatMultTranspose not yet coded in parallel");
482 distributed_hgemv(/* transA ? HARA_Trans : HARA_NoTrans, */hara->s, *hara->dist_hmatrix_gpu, uxx, blda, 0.0, uyy, clda, B->cmap->N, hara->handle);
483 } else {
484 hgemv(transA ? HARA_Trans : HARA_NoTrans, hara->s, *hara->hmatrix_gpu, uxx, blda, 0.0, uyy, clda, B->cmap->N, handle);
485 }
486 ierr = MatDenseCUDARestoreArrayRead(B,(const PetscScalar**)&xx);CHKERRQ(ierr);
487 if (hara->sf) {
488 ierr = PetscSFReduceBegin(csf,MPIU_SCALAR,uyy,yy,MPIU_REPLACE);CHKERRQ(ierr);
489 ierr = PetscSFReduceEnd(csf,MPIU_SCALAR,uyy,yy,MPIU_REPLACE);CHKERRQ(ierr);
490 }
491 ierr = MatDenseCUDARestoreArrayWrite(C,&yy);CHKERRQ(ierr);
492 if (!biscuda) {
493 ierr = MatConvert(B,MATDENSE,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr);
494 }
495 if (!ciscuda) {
496 ierr = MatConvert(C,MATDENSE,MAT_INPLACE_MATRIX,&C);CHKERRQ(ierr);
497 }
498 #endif
499 } else {
500 if (hara->sf) {
501 PetscInt n;
502
503 ierr = PetscSFGetGraph(hara->sf,NULL,&n,NULL,NULL);CHKERRQ(ierr);
504 if (hara->xxs < B->cmap->n) { hara->xx->resize(n*B->cmap->N); hara->xxs = B->cmap->N; }
505 if (hara->yys < B->cmap->n) { hara->yy->resize(n*B->cmap->N); hara->yys = B->cmap->N; }
506 }
507 ierr = MatDenseGetArrayRead(B,(const PetscScalar**)&xx);CHKERRQ(ierr);
508 ierr = MatDenseGetArrayWrite(C,&yy);CHKERRQ(ierr);
509 if (hara->sf) {
510 uxx = MatHaraGetThrustPointer(*hara->xx);
511 uyy = MatHaraGetThrustPointer(*hara->yy);
512 ierr = PetscSFBcastBegin(bsf,MPIU_SCALAR,xx,uxx);CHKERRQ(ierr);
513 ierr = PetscSFBcastEnd(bsf,MPIU_SCALAR,xx,uxx);CHKERRQ(ierr);
514 } else {
515 uxx = xx;
516 uyy = yy;
517 }
518 if (hara->dist_hmatrix) {
519 if (transA && !A->symmetric) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MatMultTranspose not yet coded in parallel");
520 distributed_hgemv(/*transA ? HARA_Trans : HARA_NoTrans, */hara->s, *hara->dist_hmatrix, uxx, blda, 0.0, uyy, clda, B->cmap->N, hara->handle);
521 } else {
522 hgemv(transA ? HARA_Trans : HARA_NoTrans, hara->s, *hara->hmatrix, uxx, blda, 0.0, uyy, clda, B->cmap->N, handle);
523 }
524 ierr = MatDenseRestoreArrayRead(B,(const PetscScalar**)&xx);CHKERRQ(ierr);
525 if (hara->sf) {
526 ierr = PetscSFReduceBegin(csf,MPIU_SCALAR,uyy,yy,MPIU_REPLACE);CHKERRQ(ierr);
527 ierr = PetscSFReduceEnd(csf,MPIU_SCALAR,uyy,yy,MPIU_REPLACE);CHKERRQ(ierr);
528 }
529 ierr = MatDenseRestoreArrayWrite(C,&yy);CHKERRQ(ierr);
530 }
531 PetscFunctionReturn(0);
532 }
533
MatProductNumeric_HARA(Mat C)534 static PetscErrorCode MatProductNumeric_HARA(Mat C)
535 {
536 Mat_Product *product = C->product;
537 PetscErrorCode ierr;
538
539 PetscFunctionBegin;
540 MatCheckProduct(C,1);
541 switch (product->type) {
542 case MATPRODUCT_AB:
543 ierr = MatMultNKernel_HARA(product->A,PETSC_FALSE,product->B,C);CHKERRQ(ierr);
544 break;
545 case MATPRODUCT_AtB:
546 ierr = MatMultNKernel_HARA(product->A,PETSC_TRUE,product->B,C);CHKERRQ(ierr);
547 break;
548 default:
549 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatProduct type %s is not supported",MatProductTypes[product->type]);
550 }
551 PetscFunctionReturn(0);
552 }
553
MatProductSymbolic_HARA(Mat C)554 static PetscErrorCode MatProductSymbolic_HARA(Mat C)
555 {
556 PetscErrorCode ierr;
557 Mat_Product *product = C->product;
558 PetscBool cisdense;
559 Mat A,B;
560
561 PetscFunctionBegin;
562 MatCheckProduct(C,1);
563 A = product->A;
564 B = product->B;
565 switch (product->type) {
566 case MATPRODUCT_AB:
567 ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N);CHKERRQ(ierr);
568 ierr = MatSetBlockSizesFromMats(C,product->A,product->B);CHKERRQ(ierr);
569 ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATSEQDENSE,MATMPIDENSE,MATSEQDENSECUDA,MATMPIDENSECUDA,"");CHKERRQ(ierr);
570 if (!cisdense) { ierr = MatSetType(C,((PetscObject)product->B)->type_name);CHKERRQ(ierr); }
571 ierr = MatSetUp(C);CHKERRQ(ierr);
572 break;
573 case MATPRODUCT_AtB:
574 ierr = MatSetSizes(C,A->cmap->n,B->cmap->n,A->cmap->N,B->cmap->N);CHKERRQ(ierr);
575 ierr = MatSetBlockSizesFromMats(C,product->A,product->B);CHKERRQ(ierr);
576 ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATSEQDENSE,MATMPIDENSE,MATSEQDENSECUDA,MATMPIDENSECUDA,"");CHKERRQ(ierr);
577 if (!cisdense) { ierr = MatSetType(C,((PetscObject)product->B)->type_name);CHKERRQ(ierr); }
578 ierr = MatSetUp(C);CHKERRQ(ierr);
579 break;
580 default:
581 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatProduct type %s is not supported",MatProductTypes[product->type]);
582 }
583 C->ops->productsymbolic = NULL;
584 C->ops->productnumeric = MatProductNumeric_HARA;
585 PetscFunctionReturn(0);
586 }
587
MatProductSetFromOptions_HARA(Mat C)588 static PetscErrorCode MatProductSetFromOptions_HARA(Mat C)
589 {
590 PetscFunctionBegin;
591 MatCheckProduct(C,1);
592 if (C->product->type == MATPRODUCT_AB || C->product->type == MATPRODUCT_AtB) {
593 C->ops->productsymbolic = MatProductSymbolic_HARA;
594 }
595 PetscFunctionReturn(0);
596 }
597
MatMultKernel_HARA(Mat A,Vec x,PetscScalar sy,Vec y,PetscBool trans)598 static PetscErrorCode MatMultKernel_HARA(Mat A, Vec x, PetscScalar sy, Vec y, PetscBool trans)
599 {
600 Mat_HARA *hara = (Mat_HARA*)A->data;
601 haraHandle_t handle = hara->handle->handle;
602 PetscBool boundtocpu = PETSC_TRUE;
603 PetscInt n;
604 PetscScalar *xx,*yy,*uxx,*uyy;
605 PetscErrorCode ierr;
606
607 PetscFunctionBegin;
608 #if defined(HARA_USE_GPU)
609 boundtocpu = A->boundtocpu;
610 #endif
611 if (hara->sf) {
612 ierr = PetscSFGetGraph(hara->sf,NULL,&n,NULL,NULL);CHKERRQ(ierr);
613 } else n = A->rmap->n;
614 if (!boundtocpu) {
615 #if defined(HARA_USE_GPU)
616 ierr = VecCUDAGetArrayRead(x,(const PetscScalar**)&xx);CHKERRQ(ierr);
617 if (sy == 0.0) {
618 ierr = VecCUDAGetArrayWrite(y,&yy);CHKERRQ(ierr);
619 } else {
620 ierr = VecCUDAGetArray(y,&yy);CHKERRQ(ierr);
621 }
622 if (hara->sf) {
623 uxx = MatHaraGetThrustPointer(*hara->xx_gpu);
624 uyy = MatHaraGetThrustPointer(*hara->yy_gpu);
625
626 ierr = PetscSFBcastBegin(hara->sf,MPIU_SCALAR,xx,uxx);CHKERRQ(ierr);
627 ierr = PetscSFBcastEnd(hara->sf,MPIU_SCALAR,xx,uxx);CHKERRQ(ierr);
628 if (sy != 0.0) {
629 ierr = PetscSFBcastBegin(hara->sf,MPIU_SCALAR,yy,uyy);CHKERRQ(ierr);
630 ierr = PetscSFBcastEnd(hara->sf,MPIU_SCALAR,yy,uyy);CHKERRQ(ierr);
631 }
632 } else {
633 uxx = xx;
634 uyy = yy;
635 }
636 if (hara->dist_hmatrix_gpu) {
637 if (trans && !A->symmetric) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MatMultTranspose not yet coded in parallel");
638 distributed_hgemv(/*trans ? HARA_Trans : HARA_NoTrans, */hara->s, *hara->dist_hmatrix_gpu, uxx, n, sy, uyy, n, 1, hara->handle);
639 } else {
640 hgemv(trans ? HARA_Trans : HARA_NoTrans, hara->s, *hara->hmatrix_gpu, uxx, n, sy, uyy, n, 1, handle);
641 }
642 ierr = VecCUDARestoreArrayRead(x,(const PetscScalar**)&xx);CHKERRQ(ierr);
643 if (hara->sf) {
644 ierr = PetscSFReduceBegin(hara->sf,MPIU_SCALAR,uyy,yy,MPIU_REPLACE);CHKERRQ(ierr);
645 ierr = PetscSFReduceEnd(hara->sf,MPIU_SCALAR,uyy,yy,MPIU_REPLACE);CHKERRQ(ierr);
646 }
647 if (sy == 0.0) {
648 ierr = VecCUDARestoreArrayWrite(y,&yy);CHKERRQ(ierr);
649 } else {
650 ierr = VecCUDARestoreArray(y,&yy);CHKERRQ(ierr);
651 }
652 #endif
653 } else {
654 ierr = VecGetArrayRead(x,(const PetscScalar**)&xx);CHKERRQ(ierr);
655 if (sy == 0.0) {
656 ierr = VecGetArrayWrite(y,&yy);CHKERRQ(ierr);
657 } else {
658 ierr = VecGetArray(y,&yy);CHKERRQ(ierr);
659 }
660 if (hara->sf) {
661 uxx = MatHaraGetThrustPointer(*hara->xx);
662 uyy = MatHaraGetThrustPointer(*hara->yy);
663
664 ierr = PetscSFBcastBegin(hara->sf,MPIU_SCALAR,xx,uxx);CHKERRQ(ierr);
665 ierr = PetscSFBcastEnd(hara->sf,MPIU_SCALAR,xx,uxx);CHKERRQ(ierr);
666 if (sy != 0.0) {
667 ierr = PetscSFBcastBegin(hara->sf,MPIU_SCALAR,yy,uyy);CHKERRQ(ierr);
668 ierr = PetscSFBcastEnd(hara->sf,MPIU_SCALAR,yy,uyy);CHKERRQ(ierr);
669 }
670 } else {
671 uxx = xx;
672 uyy = yy;
673 }
674 if (hara->dist_hmatrix) {
675 if (trans && !A->symmetric) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MatMultTranspose not yet coded in parallel");
676 distributed_hgemv(/*trans ? HARA_Trans : HARA_NoTrans, */hara->s, *hara->dist_hmatrix, uxx, n, sy, uyy, n, 1, hara->handle);
677 } else {
678 hgemv(trans ? HARA_Trans : HARA_NoTrans, hara->s, *hara->hmatrix, uxx, n, sy, uyy, n, 1, handle);
679 }
680 ierr = VecRestoreArrayRead(x,(const PetscScalar**)&xx);CHKERRQ(ierr);
681 if (hara->sf) {
682 ierr = PetscSFReduceBegin(hara->sf,MPIU_SCALAR,uyy,yy,MPIU_REPLACE);CHKERRQ(ierr);
683 ierr = PetscSFReduceEnd(hara->sf,MPIU_SCALAR,uyy,yy,MPIU_REPLACE);CHKERRQ(ierr);
684 }
685 if (sy == 0.0) {
686 ierr = VecRestoreArrayWrite(y,&yy);CHKERRQ(ierr);
687 } else {
688 ierr = VecRestoreArray(y,&yy);CHKERRQ(ierr);
689 }
690 }
691 PetscFunctionReturn(0);
692 }
693
MatMultTranspose_HARA(Mat A,Vec x,Vec y)694 static PetscErrorCode MatMultTranspose_HARA(Mat A, Vec x, Vec y)
695 {
696 PetscErrorCode ierr;
697
698 PetscFunctionBegin;
699 ierr = MatMultKernel_HARA(A,x,0.0,y,PETSC_TRUE);CHKERRQ(ierr);
700 PetscFunctionReturn(0);
701 }
702
MatMult_HARA(Mat A,Vec x,Vec y)703 static PetscErrorCode MatMult_HARA(Mat A, Vec x, Vec y)
704 {
705 PetscErrorCode ierr;
706
707 PetscFunctionBegin;
708 ierr = MatMultKernel_HARA(A,x,0.0,y,PETSC_FALSE);CHKERRQ(ierr);
709 PetscFunctionReturn(0);
710 }
711
MatMultTransposeAdd_HARA(Mat A,Vec x,Vec y,Vec z)712 static PetscErrorCode MatMultTransposeAdd_HARA(Mat A, Vec x, Vec y, Vec z)
713 {
714 PetscErrorCode ierr;
715
716 PetscFunctionBegin;
717 ierr = VecCopy(y,z);CHKERRQ(ierr);
718 ierr = MatMultKernel_HARA(A,x,1.0,z,PETSC_TRUE);CHKERRQ(ierr);
719 PetscFunctionReturn(0);
720 }
721
MatMultAdd_HARA(Mat A,Vec x,Vec y,Vec z)722 static PetscErrorCode MatMultAdd_HARA(Mat A, Vec x, Vec y, Vec z)
723 {
724 PetscErrorCode ierr;
725
726 PetscFunctionBegin;
727 ierr = VecCopy(y,z);CHKERRQ(ierr);
728 ierr = MatMultKernel_HARA(A,x,1.0,z,PETSC_FALSE);CHKERRQ(ierr);
729 PetscFunctionReturn(0);
730 }
731
MatScale_HARA(Mat A,PetscScalar s)732 static PetscErrorCode MatScale_HARA(Mat A, PetscScalar s)
733 {
734 Mat_HARA *a = (Mat_HARA*)A->data;
735
736 PetscFunctionBegin;
737 a->s *= s;
738 PetscFunctionReturn(0);
739 }
740
MatSetFromOptions_HARA(PetscOptionItems * PetscOptionsObject,Mat A)741 static PetscErrorCode MatSetFromOptions_HARA(PetscOptionItems *PetscOptionsObject,Mat A)
742 {
743 Mat_HARA *a = (Mat_HARA*)A->data;
744 PetscErrorCode ierr;
745
746 PetscFunctionBegin;
747 ierr = PetscOptionsHead(PetscOptionsObject,"HARA options");CHKERRQ(ierr);
748 ierr = PetscOptionsInt("-mat_hara_leafsize","Leaf size when constructed from kernel",NULL,a->leafsize,&a->leafsize,NULL);CHKERRQ(ierr);
749 ierr = PetscOptionsReal("-mat_hara_eta","Admissibility condition tolerance",NULL,a->eta,&a->eta,NULL);CHKERRQ(ierr);
750 ierr = PetscOptionsInt("-mat_hara_order","Basis order for off-diagonal sampling when constructed from kernel",NULL,a->basisord,&a->basisord,NULL);CHKERRQ(ierr);
751 ierr = PetscOptionsInt("-mat_hara_maxrank","Maximum rank when constructed from matvecs",NULL,a->max_rank,&a->max_rank,NULL);CHKERRQ(ierr);
752 ierr = PetscOptionsInt("-mat_hara_samples","Number of samples to be taken concurrently when constructing from matvecs",NULL,a->bs,&a->bs,NULL);CHKERRQ(ierr);
753 ierr = PetscOptionsReal("-mat_hara_rtol","Relative tolerance for construction from sampling",NULL,a->rtol,&a->rtol,NULL);CHKERRQ(ierr);
754 ierr = PetscOptionsBool("-mat_hara_check","Check error when constructing from sampling during MatAssemblyEnd()",NULL,a->check_construction,&a->check_construction,NULL);CHKERRQ(ierr);
755 ierr = PetscOptionsTail();CHKERRQ(ierr);
756 PetscFunctionReturn(0);
757 }
758
759 static PetscErrorCode MatHaraSetCoords_HARA(Mat,PetscInt,const PetscReal[],MatHaraKernel,void*);
760
MatHaraInferCoordinates_Private(Mat A)761 static PetscErrorCode MatHaraInferCoordinates_Private(Mat A)
762 {
763 Mat_HARA *a = (Mat_HARA*)A->data;
764 Vec c;
765 PetscInt spacedim;
766 const PetscScalar *coords;
767 PetscErrorCode ierr;
768
769 PetscFunctionBegin;
770 if (a->spacedim) PetscFunctionReturn(0);
771 ierr = PetscObjectQuery((PetscObject)A,"__mathara_coords",(PetscObject*)&c);CHKERRQ(ierr);
772 if (!c && a->sampler) {
773 Mat S = a->sampler->GetSamplingMat();
774
775 ierr = PetscObjectQuery((PetscObject)S,"__mathara_coords",(PetscObject*)&c);CHKERRQ(ierr);
776 if (!c) {
777 PetscBool ishara;
778
779 ierr = PetscObjectTypeCompare((PetscObject)S,MATHARA,&ishara);CHKERRQ(ierr);
780 if (ishara) {
781 Mat_HARA *s = (Mat_HARA*)S->data;
782
783 a->spacedim = s->spacedim;
784 if (s->ptcloud1) {
785 a->ptcloud1 = new PetscPointCloud1D<PetscReal>(*s->ptcloud1);
786 } else if (s->ptcloud2) {
787 a->ptcloud2 = new PetscPointCloud2D<PetscReal>(*s->ptcloud2);
788 } else if (s->ptcloud3) {
789 a->ptcloud3 = new PetscPointCloud3D<PetscReal>(*s->ptcloud3);
790 }
791 }
792 }
793 }
794 if (!c) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Missing coordinates");
795 ierr = VecGetArrayRead(c,&coords);CHKERRQ(ierr);
796 ierr = VecGetBlockSize(c,&spacedim);CHKERRQ(ierr);
797 ierr = MatHaraSetCoords_HARA(A,spacedim,coords,NULL,NULL);CHKERRQ(ierr);
798 ierr = VecRestoreArrayRead(c,&coords);CHKERRQ(ierr);
799 PetscFunctionReturn(0);
800 }
801
MatSetUpMultiply_HARA(Mat A)802 static PetscErrorCode MatSetUpMultiply_HARA(Mat A)
803 {
804 MPI_Comm comm;
805 PetscMPIInt size;
806 PetscErrorCode ierr;
807 Mat_HARA *a = (Mat_HARA*)A->data;
808 IS is;
809 PetscInt n,*idx;
810 int *iidx;
811 PetscCopyMode own;
812 PetscBool rid;
813
814 PetscFunctionBegin;
815 if (a->multsetup) PetscFunctionReturn(0);
816 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
817 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
818 if (size > 1) {
819 iidx = MatHaraGetThrustPointer(a->dist_hmatrix->basis_tree.basis_branch.index_map);
820 n = a->dist_hmatrix->basis_tree.basis_branch.index_map.size();
821 } else {
822 n = a->hmatrix->u_basis_tree.index_map.size();
823 iidx = MatHaraGetThrustPointer(a->hmatrix->u_basis_tree.index_map);
824 }
825 if (PetscDefined(USE_64BIT_INDICES)) {
826 PetscInt i;
827
828 own = PETSC_OWN_POINTER;
829 ierr = PetscMalloc1(n,&idx);CHKERRQ(ierr);
830 for (i=0;i<n;i++) idx[i] = iidx[i];
831 } else {
832 own = PETSC_USE_POINTER;
833 idx = iidx;
834 }
835 ierr = ISCreateGeneral(comm,n,idx,own,&is);CHKERRQ(ierr);
836 ierr = ISIdentity(is,&rid);CHKERRQ(ierr);
837 if (!rid) {
838 ierr = PetscSFCreate(comm,&a->sf);CHKERRQ(ierr);
839 ierr = PetscSFSetGraphLayout(a->sf,A->rmap,n,NULL,PETSC_OWN_POINTER,idx);CHKERRQ(ierr);
840 #if defined(HARA_USE_GPU)
841 a->xx_gpu = new thrust::device_vector<PetscScalar>(n);
842 a->yy_gpu = new thrust::device_vector<PetscScalar>(n);
843 a->xxs_gpu = 1;
844 a->yys_gpu = 1;
845 #endif
846 a->xx = new thrust::host_vector<PetscScalar>(n);
847 a->yy = new thrust::host_vector<PetscScalar>(n);
848 a->xxs = 1;
849 a->yys = 1;
850 }
851 ierr = ISDestroy(&is);CHKERRQ(ierr);
852 a->multsetup = PETSC_TRUE;
853 PetscFunctionReturn(0);
854 }
855
MatAssemblyEnd_HARA(Mat A,MatAssemblyType asstype)856 static PetscErrorCode MatAssemblyEnd_HARA(Mat A, MatAssemblyType asstype)
857 {
858 Mat_HARA *a = (Mat_HARA*)A->data;
859 haraHandle_t handle = a->handle->handle;
860 PetscBool kernel = PETSC_FALSE;
861 PetscBool boundtocpu = PETSC_TRUE;
862 MPI_Comm comm;
863 PetscMPIInt size;
864 PetscErrorCode ierr;
865
866 PetscFunctionBegin;
867 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
868 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
869 /* TODO REUSABILITY of geometric construction */
870 delete a->hmatrix;
871 delete a->dist_hmatrix;
872 #if defined(HARA_USE_GPU)
873 delete a->hmatrix_gpu;
874 delete a->dist_hmatrix_gpu;
875 #endif
876 /* TODO: other? */
877 BoxCenterAdmissibility<Hara_Real,1> adm1(a->eta,a->leafsize);
878 BoxCenterAdmissibility<Hara_Real,2> adm2(a->eta,a->leafsize);
879 BoxCenterAdmissibility<Hara_Real,3> adm3(a->eta,a->leafsize);
880 if (size > 1) {
881 a->dist_hmatrix = new DistributedHMatrix(A->rmap->n/*,A->symmetric*/);
882 } else {
883 a->hmatrix = new HMatrix(A->rmap->n,A->symmetric);
884 }
885 ierr = MatHaraInferCoordinates_Private(A);CHKERRQ(ierr);
886 switch (a->spacedim) {
887 case 1:
888 if (!a->ptcloud1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing pointcloud");
889 if (a->kernel1) {
890 kernel = PETSC_TRUE;
891 if (size > 1) {
892 buildDistributedHMatrix(*a->dist_hmatrix,*a->ptcloud1,adm1,*a->kernel1,a->leafsize,a->basisord/*,a->basisord*/,a->handle);
893 } else {
894 buildHMatrix(*a->hmatrix,*a->ptcloud1,adm1,*a->kernel1,a->leafsize,a->basisord,a->basisord);
895 }
896 } else {
897 if (size > 1) SETERRQ(comm,PETSC_ERR_SUP,"Construction from sampling not supported in parallel");
898 buildHMatrixStructure(*a->hmatrix,*a->ptcloud1,adm1,a->leafsize,0,0);
899 }
900 break;
901 case 2:
902 if (!a->ptcloud2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing pointcloud");
903 if (a->kernel2) {
904 kernel = PETSC_TRUE;
905 if (size > 1) {
906 buildDistributedHMatrix(*a->dist_hmatrix,*a->ptcloud2,adm2,*a->kernel2,a->leafsize,a->basisord/*,a->basisord*/,a->handle);
907 } else {
908 buildHMatrix(*a->hmatrix,*a->ptcloud2,adm2,*a->kernel2,a->leafsize,a->basisord,a->basisord);
909 }
910 } else {
911 if (size > 1) SETERRQ(comm,PETSC_ERR_SUP,"Construction from sampling not supported in parallel");
912 buildHMatrixStructure(*a->hmatrix,*a->ptcloud2,adm2,a->leafsize,0,0);
913 }
914 break;
915 case 3:
916 if (!a->ptcloud3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing pointcloud");
917 if (a->kernel3) {
918 kernel = PETSC_TRUE;
919 if (size > 1) {
920 buildDistributedHMatrix(*a->dist_hmatrix,*a->ptcloud3,adm3,*a->kernel3,a->leafsize,a->basisord/*,a->basisord*/,a->handle);
921 } else {
922 buildHMatrix(*a->hmatrix,*a->ptcloud3,adm3,*a->kernel3,a->leafsize,a->basisord,a->basisord);
923 }
924 } else {
925 if (size > 1) SETERRQ(comm,PETSC_ERR_SUP,"Construction from sampling not supported in parallel");
926 buildHMatrixStructure(*a->hmatrix,*a->ptcloud3,adm3,a->leafsize,0,0);
927 }
928 break;
929 default:
930 SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Unhandled dimension %D",a->spacedim);
931 }
932
933 ierr = MatSetUpMultiply_HARA(A);CHKERRQ(ierr);
934
935 #if defined(HARA_USE_GPU)
936 boundtocpu = A->boundtocpu;
937 if (!boundtocpu) {
938 if (size > 1) {
939 a->dist_hmatrix_gpu = new DistributedHMatrix_GPU(*a->dist_hmatrix);
940 } else {
941 a->hmatrix_gpu = new GPU_HMatrix(*a->hmatrix);
942 }
943 }
944 #endif
945 if (size == 1) {
946 if (!kernel && a->sampler) {
947 PetscReal Anorm;
948 bool verbose = false;
949
950 ierr = MatApproximateNorm_Private(a->sampler->GetSamplingMat(),NORM_2,PETSC_DECIDE,&Anorm);CHKERRQ(ierr);
951 if (boundtocpu) {
952 a->sampler->SetGPUSampling(false);
953 hara(a->sampler, *a->hmatrix, a->max_rank, 10 /* TODO */,a->rtol*Anorm,a->bs,handle,verbose);
954 #if defined(HARA_USE_GPU)
955 } else {
956 a->sampler->SetGPUSampling(true);
957 hara(a->sampler, *a->hmatrix_gpu, a->max_rank, 10 /* TODO */,a->rtol*Anorm,a->bs,handle,verbose);
958 #endif
959 }
960 }
961 }
962 #if defined(HARA_USE_GPU)
963 if (kernel) A->offloadmask = PETSC_OFFLOAD_BOTH;
964 else A->offloadmask = boundtocpu ? PETSC_OFFLOAD_CPU : PETSC_OFFLOAD_GPU;
965 #endif
966
967 if (!a->s) a->s = 1.0;
968 A->assembled = PETSC_TRUE;
969
970 if (a->sampler) {
971 PetscBool check = a->check_construction;
972
973 ierr = PetscOptionsGetBool(((PetscObject)A)->options,((PetscObject)A)->prefix,"-mat_hara_check",&check,NULL);CHKERRQ(ierr);
974 if (check) {
975 Mat E,Ae;
976 PetscReal n1,ni,n2;
977 PetscReal n1A,niA,n2A;
978 void (*normfunc)(void);
979
980 Ae = a->sampler->GetSamplingMat();
981 ierr = MatConvert(A,MATSHELL,MAT_INITIAL_MATRIX,&E);CHKERRQ(ierr);
982 ierr = MatShellSetOperation(E,MATOP_NORM,(void (*)(void))MatNorm_HARA);CHKERRQ(ierr);
983 ierr = MatAXPY(E,-1.0,Ae,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
984 ierr = MatNorm(E,NORM_1,&n1);CHKERRQ(ierr);
985 ierr = MatNorm(E,NORM_INFINITY,&ni);CHKERRQ(ierr);
986 ierr = MatNorm(E,NORM_2,&n2);CHKERRQ(ierr);
987
988 ierr = MatGetOperation(Ae,MATOP_NORM,&normfunc);CHKERRQ(ierr);
989 ierr = MatSetOperation(Ae,MATOP_NORM,(void (*)(void))MatNorm_HARA);CHKERRQ(ierr);
990 ierr = MatNorm(Ae,NORM_1,&n1A);CHKERRQ(ierr);
991 ierr = MatNorm(Ae,NORM_INFINITY,&niA);CHKERRQ(ierr);
992 ierr = MatNorm(Ae,NORM_2,&n2A);CHKERRQ(ierr);
993 n1A = PetscMax(n1A,PETSC_SMALL);
994 n2A = PetscMax(n2A,PETSC_SMALL);
995 niA = PetscMax(niA,PETSC_SMALL);
996 ierr = MatSetOperation(Ae,MATOP_NORM,normfunc);CHKERRQ(ierr);
997 ierr = PetscPrintf(PetscObjectComm((PetscObject)A),"MATHARA construction errors: NORM_1 %g, NORM_INFINITY %g, NORM_2 %g (%g %g %g)\n",(double)n1,(double)ni,(double)n2,(double)(n1/n1A),(double)(ni/niA),(double)(n2/n2A));
998 ierr = MatDestroy(&E);CHKERRQ(ierr);
999 }
1000 }
1001 PetscFunctionReturn(0);
1002 }
1003
MatZeroEntries_HARA(Mat A)1004 static PetscErrorCode MatZeroEntries_HARA(Mat A)
1005 {
1006 PetscErrorCode ierr;
1007 PetscMPIInt size;
1008 Mat_HARA *a = (Mat_HARA*)A->data;
1009
1010 PetscFunctionBegin;
1011 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
1012 if (size > 1) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Not yet supported");
1013 else {
1014 a->hmatrix->clearData();
1015 #if defined(HARA_USE_GPU)
1016 if (a->hmatrix_gpu) a->hmatrix_gpu->clearData();
1017 #endif
1018 }
1019 PetscFunctionReturn(0);
1020 }
1021
MatDuplicate_HARA(Mat B,MatDuplicateOption op,Mat * nA)1022 static PetscErrorCode MatDuplicate_HARA(Mat B, MatDuplicateOption op, Mat *nA)
1023 {
1024 Mat A;
1025 Mat_HARA *a, *b = (Mat_HARA*)B->data;
1026 #if defined(PETSC_HAVE_CUDA)
1027 PetscBool iscpu = PETSC_FALSE;
1028 #else
1029 PetscBool iscpu = PETSC_TRUE;
1030 #endif
1031 PetscErrorCode ierr;
1032 MPI_Comm comm;
1033
1034 PetscFunctionBegin;
1035 ierr = PetscObjectGetComm((PetscObject)B,&comm);CHKERRQ(ierr);
1036 ierr = MatCreate(comm,&A);CHKERRQ(ierr);
1037 ierr = MatSetSizes(A,B->rmap->n,B->cmap->n,B->rmap->N,B->cmap->N);CHKERRQ(ierr);
1038 ierr = MatSetType(A,MATHARA);CHKERRQ(ierr);
1039 ierr = MatPropagateSymmetryOptions(B,A);CHKERRQ(ierr);
1040
1041 a = (Mat_HARA*)A->data;
1042 a->s = b->s;
1043 a->spacedim = b->spacedim;
1044 if (b->ptcloud1) {
1045 a->ptcloud1 = new PetscPointCloud1D<PetscReal>(*b->ptcloud1);
1046 } else if (b->ptcloud2) {
1047 a->ptcloud2 = new PetscPointCloud2D<PetscReal>(*b->ptcloud2);
1048 } else if (b->ptcloud3) {
1049 a->ptcloud3 = new PetscPointCloud3D<PetscReal>(*b->ptcloud3);
1050 }
1051 if (op == MAT_COPY_VALUES) {
1052 if (b->kernel1) {
1053 a->kernel1 = new PetscFunctionGenerator1D<PetscScalar>(*b->kernel1);
1054 } else if (b->kernel2) {
1055 a->kernel2 = new PetscFunctionGenerator2D<PetscScalar>(*b->kernel2);
1056 } else if (b->kernel3) {
1057 a->kernel3 = new PetscFunctionGenerator3D<PetscScalar>(*b->kernel3);
1058 }
1059 }
1060
1061 if (b->dist_hmatrix) { a->dist_hmatrix = new DistributedHMatrix(*b->dist_hmatrix); }
1062 #if defined(HARA_USE_GPU)
1063 if (b->dist_hmatrix_gpu) { a->dist_hmatrix_gpu = new DistributedHMatrix_GPU(*b->dist_hmatrix_gpu); }
1064 #endif
1065 if (b->hmatrix) {
1066 a->hmatrix = new HMatrix(*b->hmatrix);
1067 if (op == MAT_DO_NOT_COPY_VALUES) a->hmatrix->clearData();
1068 }
1069 #if defined(HARA_USE_GPU)
1070 if (b->hmatrix_gpu) {
1071 a->hmatrix_gpu = new GPU_HMatrix(*b->hmatrix_gpu);
1072 if (op == MAT_DO_NOT_COPY_VALUES) a->hmatrix_gpu->clearData();
1073 }
1074 #endif
1075
1076 ierr = MatSetUp(A);CHKERRQ(ierr);
1077 ierr = MatSetUpMultiply_HARA(A);CHKERRQ(ierr);
1078 if (op == MAT_COPY_VALUES) {
1079 A->assembled = PETSC_TRUE;
1080 #if defined(PETSC_HAVE_CUDA)
1081 iscpu = B->boundtocpu;
1082 #endif
1083 }
1084 ierr = MatBindToCPU(A,iscpu);CHKERRQ(ierr);
1085
1086 *nA = A;
1087 PetscFunctionReturn(0);
1088 }
1089
MatView_HARA(Mat A,PetscViewer view)1090 static PetscErrorCode MatView_HARA(Mat A, PetscViewer view)
1091 {
1092 Mat_HARA *hara = (Mat_HARA*)A->data;
1093 PetscBool isascii;
1094 PetscErrorCode ierr;
1095 PetscMPIInt size;
1096 PetscViewerFormat format;
1097
1098 PetscFunctionBegin;
1099 ierr = PetscObjectTypeCompare((PetscObject)view,PETSCVIEWERASCII,&isascii);CHKERRQ(ierr);
1100 ierr = PetscViewerGetFormat(view,&format);CHKERRQ(ierr);
1101 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
1102 if (isascii) {
1103 ierr = PetscViewerASCIIPrintf(view," H-Matrix constructed from %s\n",hara->sampler ? "Mat" : (hara->spacedim ? "Kernel" : "None"));CHKERRQ(ierr);
1104 ierr = PetscViewerASCIIPrintf(view," PointCloud dim %D\n",hara->spacedim);CHKERRQ(ierr);
1105 ierr = PetscViewerASCIIPrintf(view," Admissibility parameters: leaf size %D, eta %g\n",hara->leafsize,(double)hara->eta);CHKERRQ(ierr);
1106 if (hara->sampler) {
1107 ierr = PetscViewerASCIIPrintf(view," Sampling parameters: max_rank %D, samples %D, tolerance %g\n",hara->max_rank,hara->bs,(double)hara->rtol);CHKERRQ(ierr);
1108 } else {
1109 ierr = PetscViewerASCIIPrintf(view," Offdiagonal blocks approximation order %D\n",hara->basisord);CHKERRQ(ierr);
1110 }
1111 if (size == 1) {
1112 double dense_mem_cpu = hara->hmatrix ? hara->hmatrix->getDenseMemoryUsage() : 0;
1113 double low_rank_cpu = hara->hmatrix ? hara->hmatrix->getLowRankMemoryUsage() : 0;
1114 #if defined(HARA_USE_GPU)
1115 double dense_mem_gpu = hara->hmatrix_gpu ? hara->hmatrix_gpu->getDenseMemoryUsage() : 0;
1116 double low_rank_gpu = hara->hmatrix_gpu ? hara->hmatrix_gpu->getLowRankMemoryUsage() : 0;
1117 #endif
1118 ierr = PetscViewerASCIIPrintf(view," Memory consumption (CPU): %g (dense) %g (low rank) %g GB (total)\n", dense_mem_cpu, low_rank_cpu, low_rank_cpu + dense_mem_cpu);CHKERRQ(ierr);
1119 #if defined(HARA_USE_GPU)
1120 ierr = PetscViewerASCIIPrintf(view," Memory consumption (GPU): %g (dense) %g (low rank) %g GB (total)\n", dense_mem_gpu, low_rank_gpu, low_rank_gpu + dense_mem_gpu);CHKERRQ(ierr);
1121 #endif
1122 }
1123 }
1124 PetscFunctionReturn(0);
1125 }
1126
MatHaraSetCoords_HARA(Mat A,PetscInt spacedim,const PetscReal coords[],MatHaraKernel kernel,void * kernelctx)1127 static PetscErrorCode MatHaraSetCoords_HARA(Mat A, PetscInt spacedim, const PetscReal coords[], MatHaraKernel kernel, void *kernelctx)
1128 {
1129 Mat_HARA *hara = (Mat_HARA*)A->data;
1130 PetscReal *gcoords;
1131 PetscInt N;
1132 MPI_Comm comm;
1133 PetscMPIInt size;
1134 PetscBool cong;
1135 PetscErrorCode ierr;
1136
1137 PetscFunctionBegin;
1138 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
1139 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
1140 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1141 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
1142 if (!cong) SETERRQ(comm,PETSC_ERR_SUP,"Only for square matrices with congruent layouts");
1143 N = A->rmap->N;
1144 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1145 if (size > 1) {
1146 PetscSF sf;
1147 MPI_Datatype dtype;
1148
1149 ierr = MPI_Type_contiguous(spacedim,MPIU_REAL,&dtype);CHKERRQ(ierr);
1150 ierr = MPI_Type_commit(&dtype);CHKERRQ(ierr);
1151
1152 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
1153 ierr = PetscSFSetGraphWithPattern(sf,A->rmap,PETSCSF_PATTERN_ALLGATHER);CHKERRQ(ierr);
1154 ierr = PetscMalloc1(spacedim*N,&gcoords);CHKERRQ(ierr);
1155 ierr = PetscSFBcastBegin(sf,dtype,coords,gcoords);CHKERRQ(ierr);
1156 ierr = PetscSFBcastEnd(sf,dtype,coords,gcoords);CHKERRQ(ierr);
1157 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1158 ierr = MPI_Type_free(&dtype);CHKERRQ(ierr);
1159 } else gcoords = (PetscReal*)coords;
1160
1161 delete hara->ptcloud1;
1162 delete hara->ptcloud2;
1163 delete hara->ptcloud3;
1164 delete hara->kernel1;
1165 delete hara->kernel2;
1166 delete hara->kernel3;
1167 hara->spacedim = spacedim;
1168 switch (spacedim) {
1169 case 1:
1170 hara->ptcloud1 = new PetscPointCloud1D<PetscReal>(N,gcoords);
1171 if (kernel) hara->kernel1 = new PetscFunctionGenerator1D<PetscScalar>(kernel,kernelctx);
1172 break;
1173 case 2:
1174 hara->ptcloud2 = new PetscPointCloud2D<PetscReal>(N,gcoords);
1175 if (kernel) hara->kernel2 = new PetscFunctionGenerator2D<PetscScalar>(kernel,kernelctx);
1176 break;
1177 case 3:
1178 hara->ptcloud3 = new PetscPointCloud3D<PetscReal>(N,gcoords);
1179 if (kernel) hara->kernel3 = new PetscFunctionGenerator3D<PetscScalar>(kernel,kernelctx);
1180 break;
1181 default:
1182 SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Unhandled dimension %D",hara->spacedim);
1183 }
1184 if (gcoords != coords) { ierr = PetscFree(gcoords);CHKERRQ(ierr); }
1185 A->preallocated = PETSC_TRUE;
1186 PetscFunctionReturn(0);
1187 }
1188
MatCreate_HARA(Mat A)1189 PETSC_EXTERN PetscErrorCode MatCreate_HARA(Mat A)
1190 {
1191 Mat_HARA *a;
1192 PetscErrorCode ierr;
1193 PetscMPIInt size;
1194
1195 PetscFunctionBegin;
1196 ierr = PetscNewLog(A,&a);CHKERRQ(ierr);
1197 A->data = (void*)a;
1198
1199 a->eta = 0.9;
1200 a->leafsize = 32;
1201 a->basisord = 4;
1202 a->max_rank = 64;
1203 a->bs = 32;
1204 a->rtol = 1.e-4;
1205 a->s = 1.0;
1206 a->norm_max_samples = 10;
1207 haraCreateDistributedHandleComm(&a->handle,PetscObjectComm((PetscObject)A));
1208
1209 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
1210 ierr = PetscObjectChangeTypeName((PetscObject)A,MATHARA);CHKERRQ(ierr);
1211 ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1212
1213 A->ops->destroy = MatDestroy_HARA;
1214 A->ops->view = MatView_HARA;
1215 A->ops->assemblyend = MatAssemblyEnd_HARA;
1216 A->ops->mult = MatMult_HARA;
1217 A->ops->multtranspose = MatMultTranspose_HARA;
1218 A->ops->multadd = MatMultAdd_HARA;
1219 A->ops->multtransposeadd = MatMultTransposeAdd_HARA;
1220 A->ops->scale = MatScale_HARA;
1221 A->ops->duplicate = MatDuplicate_HARA;
1222 A->ops->setfromoptions = MatSetFromOptions_HARA;
1223 A->ops->norm = MatNorm_HARA;
1224 A->ops->zeroentries = MatZeroEntries_HARA;
1225
1226 ierr = PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_hara_seqdense_C",MatProductSetFromOptions_HARA);CHKERRQ(ierr);
1227 ierr = PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_hara_seqdensecuda_C",MatProductSetFromOptions_HARA);CHKERRQ(ierr);
1228 ierr = PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_hara_mpidense_C",MatProductSetFromOptions_HARA);CHKERRQ(ierr);
1229 ierr = PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_hara_mpidensecuda_C",MatProductSetFromOptions_HARA);CHKERRQ(ierr);
1230 #if defined(PETSC_HAVE_CUDA)
1231 ierr = PetscFree(A->defaultvectype);CHKERRQ(ierr);
1232 ierr = PetscStrallocpy(VECCUDA,&A->defaultvectype);CHKERRQ(ierr);
1233 #endif
1234 PetscFunctionReturn(0);
1235 }
1236
MatHaraSetSamplingMat(Mat A,Mat B,PetscInt bs,PetscReal tol)1237 PetscErrorCode MatHaraSetSamplingMat(Mat A, Mat B, PetscInt bs, PetscReal tol)
1238 {
1239 PetscBool ishara;
1240 PetscErrorCode ierr;
1241
1242 PetscFunctionBegin;
1243 PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1244 PetscValidType(A,1);
1245 PetscValidHeaderSpecific(B,MAT_CLASSID,2);
1246 ierr = PetscObjectTypeCompare((PetscObject)A,MATHARA,&ishara);CHKERRQ(ierr);
1247 if (ishara) {
1248 Mat_HARA *a = (Mat_HARA*)A->data;
1249
1250
1251 if (!a->sampler) a->sampler = new PetscMatrixSampler();
1252 a->sampler->SetSamplingMat(B);
1253 if (bs > 0) a->bs = bs;
1254 if (tol > 0.) a->rtol = tol;
1255 delete a->kernel1;
1256 delete a->kernel2;
1257 delete a->kernel3;
1258 }
1259 PetscFunctionReturn(0);
1260 }
1261
MatCreateHaraFromKernel(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt spacedim,const PetscReal coords[],MatHaraKernel kernel,void * kernelctx,PetscReal eta,PetscInt leafsize,PetscInt basisord,Mat * nA)1262 PetscErrorCode MatCreateHaraFromKernel(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt spacedim, const PetscReal coords[], MatHaraKernel kernel,void *kernelctx, PetscReal eta, PetscInt leafsize, PetscInt basisord, Mat* nA)
1263 {
1264 Mat A;
1265 Mat_HARA *hara;
1266 #if defined(PETSC_HAVE_CUDA)
1267 PetscBool iscpu = PETSC_FALSE;
1268 #else
1269 PetscBool iscpu = PETSC_TRUE;
1270 #endif
1271 PetscErrorCode ierr;
1272
1273 PetscFunctionBegin;
1274 ierr = MatCreate(comm,&A);CHKERRQ(ierr);
1275 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
1276 ierr = MatSetType(A,MATHARA);CHKERRQ(ierr);
1277 ierr = MatBindToCPU(A,iscpu);CHKERRQ(ierr);
1278 ierr = MatHaraSetCoords_HARA(A,spacedim,coords,kernel,kernelctx);CHKERRQ(ierr);
1279
1280 hara = (Mat_HARA*)A->data;
1281 if (eta > 0.) hara->eta = eta;
1282 if (leafsize > 0) hara->leafsize = leafsize;
1283 if (basisord > 0) hara->basisord = basisord;
1284
1285 *nA = A;
1286 PetscFunctionReturn(0);
1287 }
1288
MatCreateHaraFromMat(Mat B,PetscInt spacedim,const PetscReal coords[],PetscReal eta,PetscInt leafsize,PetscInt maxrank,PetscInt bs,PetscReal rtol,Mat * nA)1289 PetscErrorCode MatCreateHaraFromMat(Mat B, PetscInt spacedim, const PetscReal coords[], PetscReal eta, PetscInt leafsize, PetscInt maxrank, PetscInt bs, PetscReal rtol, Mat *nA)
1290 {
1291 Mat A;
1292 Mat_HARA *hara;
1293 MPI_Comm comm;
1294 #if defined(PETSC_HAVE_CUDA)
1295 PetscBool iscpu = PETSC_FALSE;
1296 #else
1297 PetscBool iscpu = PETSC_TRUE;
1298 #endif
1299 PetscErrorCode ierr;
1300
1301 PetscFunctionBegin;
1302 PetscValidHeaderSpecific(B,MAT_CLASSID,1);
1303 PetscValidLogicalCollectiveInt(B,spacedim,2);
1304 PetscValidPointer(nA,4);
1305 ierr = PetscObjectGetComm((PetscObject)B,&comm);CHKERRQ(ierr);
1306 ierr = MatCreate(comm,&A);CHKERRQ(ierr);
1307 ierr = MatSetSizes(A,B->rmap->n,B->cmap->n,B->rmap->N,B->cmap->N);CHKERRQ(ierr);
1308 ierr = MatSetType(A,MATHARA);CHKERRQ(ierr);
1309 ierr = MatBindToCPU(A,iscpu);CHKERRQ(ierr);
1310 if (spacedim) {
1311 ierr = MatHaraSetCoords_HARA(A,spacedim,coords,NULL,NULL);CHKERRQ(ierr);
1312 }
1313 ierr = MatPropagateSymmetryOptions(B,A);CHKERRQ(ierr);
1314 /* if (!A->symmetric) SETERRQ(comm,PETSC_ERR_SUP,"Unsymmetric sampling does not work"); */
1315
1316 hara = (Mat_HARA*)A->data;
1317 hara->sampler = new PetscMatrixSampler(B);
1318 if (eta > 0.) hara->eta = eta;
1319 if (leafsize > 0) hara->leafsize = leafsize;
1320 if (maxrank > 0) hara->max_rank = maxrank;
1321 if (bs > 0) hara->bs = bs;
1322 if (rtol > 0.) hara->rtol = rtol;
1323
1324 *nA = A;
1325 A->preallocated = PETSC_TRUE;
1326 PetscFunctionReturn(0);
1327 }
1328