1 /* -----------------------------------------------------------------
2 * Programmer(s): David J. Gardner @ LLNL
3 * -----------------------------------------------------------------
4 * Acknowledgements: This NVECTOR module is based on the NVECTOR
5 * Serial module by Scott D. Cohen, Alan C.
6 * Hindmarsh, Radu Serban, and Aaron Collier
7 * @ LLNL
8 * -----------------------------------------------------------------
9 * SUNDIALS Copyright Start
10 * Copyright (c) 2002-2021, Lawrence Livermore National Security
11 * and Southern Methodist University.
12 * All rights reserved.
13 *
14 * See the top-level LICENSE and NOTICE files for details.
15 *
16 * SPDX-License-Identifier: BSD-3-Clause
17 * SUNDIALS Copyright End
18 * -----------------------------------------------------------------
19 * This is the implementation file for a POSIX Threads (Pthreads)
20 * implementation of the NVECTOR package using a LOCAL array of
21 * structures to pass data to threads.
22 * -----------------------------------------------------------------*/
23
24 #include <stdio.h>
25 #include <stdlib.h>
26
27 #include <nvector/nvector_pthreads.h>
28 #include <sundials/sundials_math.h>
29 #include <math.h> /* define NAN */
30
31 #define ZERO RCONST(0.0)
32 #define HALF RCONST(0.5)
33 #define ONE RCONST(1.0)
34 #define ONEPT5 RCONST(1.5)
35
36 /* Private functions for special cases of vector operations */
37 static void VCopy_Pthreads(N_Vector x, N_Vector z); /* z=x */
38 static void VSum_Pthreads(N_Vector x, N_Vector y, N_Vector z); /* z=x+y */
39 static void VDiff_Pthreads(N_Vector x, N_Vector y, N_Vector z); /* z=x-y */
40 static void VNeg_Pthreads(N_Vector x, N_Vector z); /* z=-x */
41 static void VScaleSum_Pthreads(realtype c, N_Vector x, N_Vector y, N_Vector z); /* z=c(x+y) */
42 static void VScaleDiff_Pthreads(realtype c, N_Vector x, N_Vector y, N_Vector z); /* z=c(x-y) */
43 static void VLin1_Pthreads(realtype a, N_Vector x, N_Vector y, N_Vector z); /* z=ax+y */
44 static void VLin2_Pthreads(realtype a, N_Vector x, N_Vector y, N_Vector z); /* z=ax-y */
45 static void Vaxpy_Pthreads(realtype a, N_Vector x, N_Vector y); /* y <- ax+y */
46 static void VScaleBy_Pthreads(realtype a, N_Vector x); /* x <- ax */
47
48 /* Private functions for special cases of vector array operations */
49 static int VSumVectorArray_Pthreads(int nvec, N_Vector* X, N_Vector* Y, N_Vector* Z); /* Z=X+Y */
50 static int VDiffVectorArray_Pthreads(int nvec, N_Vector* X, N_Vector* Y, N_Vector* Z); /* Z=X-Y */
51 static int VScaleSumVectorArray_Pthreads(int nvec, realtype c, N_Vector* X, N_Vector* Y, N_Vector* Z); /* Z=c(X+Y) */
52 static int VScaleDiffVectorArray_Pthreads(int nvec, realtype c, N_Vector* X, N_Vector* Y, N_Vector* Z); /* Z=c(X-Y) */
53 static int VLin1VectorArray_Pthreads(int nvec, realtype a, N_Vector* X, N_Vector* Y, N_Vector* Z); /* Z=aX+Y */
54 static int VLin2VectorArray_Pthreads(int nvec, realtype a, N_Vector* X, N_Vector* Y, N_Vector* Z); /* Z=aX-Y */
55 static int VaxpyVectorArray_Pthreads(int nvec, realtype a, N_Vector* X, N_Vector* Y); /* Y <- aX+Y */
56
57 /* Pthread companion functions for vector operations */
58 static void *N_VLinearSum_PT(void *thread_data);
59 static void *N_VConst_PT(void *thread_data);
60 static void *N_VProd_PT(void *thread_data);
61 static void *N_VDiv_PT(void *thread_data);
62 static void *N_VScale_PT(void *thread_data);
63 static void *N_VAbs_PT(void *thread_data);
64 static void *N_VInv_PT(void *thread_data);
65 static void *N_VAddConst_PT(void *thread_data);
66 static void *N_VCompare_PT(void *thread_data);
67 static void *N_VDotProd_PT(void *thread_data);
68 static void *N_VMaxNorm_PT(void *thread_data);
69 static void *N_VWSqrSum_PT(void *thread_data);
70 static void *N_VMin_PT(void *thread_data);
71 static void *N_VWL2Norm_PT(void *thread_data);
72 static void *N_VL1Norm_PT(void *thread_data);
73 static void *N_VInvTest_PT(void *thread_data);
74 static void *N_VWSqrSumMask_PT(void *thread_data);
75 static void *N_VConstrMask_PT(void *thread_data);
76 static void *N_VMinQuotient_PT(void *thread_data);
77
78 /* Pthread companion functions special cases of vector operations */
79 static void *VCopy_PT(void *thread_data);
80 static void *VSum_PT(void *thread_data);
81 static void *VDiff_PT(void *thread_data);
82 static void *VNeg_PT(void *thread_data);
83 static void *VScaleSum_PT(void *thread_data);
84 static void *VScaleDiff_PT(void *thread_data);
85 static void *VLin1_PT(void *thread_data);
86 static void *VLin2_PT(void *thread_data);
87 static void *VScaleBy_PT(void *thread_data);
88 static void *Vaxpy_PT(void *thread_data);
89
90 /* Pthread companion functions for fused vector operations */
91 static void *N_VLinearCombination_PT(void *thread_data);
92 static void *N_VScaleAddMulti_PT(void *thread_data);
93 static void *N_VDotProdMulti_PT(void *thread_data);
94
95 /* Pthread companion functions for vector array operations */
96 static void *N_VLinearSumVectorArray_PT(void *thread_data);
97 static void *N_VScaleVectorArray_PT(void *thread_data);
98 static void *N_VConstVectorArray_PT(void *thread_data);
99 static void *N_VWrmsNormVectorArray_PT(void *thread_data);
100 static void *N_VWrmsNormMaskVectorArray_PT(void *thread_data);
101 static void *N_VScaleAddMultiVectorArray_PT(void *thread_data);
102 static void *N_VLinearCombinationVectorArray_PT(void *thread_data);
103
104 /* Pthread companion functions special cases of vector array operations */
105 static void *VSumVectorArray_PT(void *thread_data);
106 static void *VDiffVectorArray_PT(void *thread_data);
107 static void *VScaleSumVectorArray_PT(void *thread_data);
108 static void *VScaleDiffVectorArray_PT(void *thread_data);
109 static void *VLin1VectorArray_PT(void *thread_data);
110 static void *VLin2VectorArray_PT(void *thread_data);
111 static void *VaxpyVectorArray_PT(void *thread_data);
112
113 /* Pthread companion functions for XBraid interface operations */
114 static void *VBufPack_PT(void *thread_data);
115 static void *VBufUnpack_PT(void *thread_data);
116
117 /* Function to determine loop values for threads */
118 static void N_VSplitLoop(int myid, int *nthreads, sunindextype *N,
119 sunindextype *start, sunindextype *end);
120
121 /* Function to initialize thread data */
122 static void N_VInitThreadData(Pthreads_Data *thread_data);
123
124 /*
125 * -----------------------------------------------------------------
126 * exported functions
127 * -----------------------------------------------------------------
128 */
129
130 /* ----------------------------------------------------------------
131 * Returns vector type ID. Used to identify vector implementation
132 * from abstract N_Vector interface.
133 */
N_VGetVectorID_Pthreads(N_Vector v)134 N_Vector_ID N_VGetVectorID_Pthreads(N_Vector v)
135 {
136 return SUNDIALS_NVEC_PTHREADS;
137 }
138
139 /* ----------------------------------------------------------------------------
140 * Function to create a new empty vector
141 */
142
N_VNewEmpty_Pthreads(sunindextype length,int num_threads)143 N_Vector N_VNewEmpty_Pthreads(sunindextype length, int num_threads)
144 {
145 N_Vector v;
146 N_VectorContent_Pthreads content;
147
148 /* Create an empty vector object */
149 v = NULL;
150 v = N_VNewEmpty();
151 if (v == NULL) return(NULL);
152
153 /* Attach operations */
154
155 /* constructors, destructors, and utility operations */
156 v->ops->nvgetvectorid = N_VGetVectorID_Pthreads;
157 v->ops->nvclone = N_VClone_Pthreads;
158 v->ops->nvcloneempty = N_VCloneEmpty_Pthreads;
159 v->ops->nvdestroy = N_VDestroy_Pthreads;
160 v->ops->nvspace = N_VSpace_Pthreads;
161 v->ops->nvgetarraypointer = N_VGetArrayPointer_Pthreads;
162 v->ops->nvsetarraypointer = N_VSetArrayPointer_Pthreads;
163 v->ops->nvgetlength = N_VGetLength_Pthreads;
164
165 /* standard vector operations */
166 v->ops->nvlinearsum = N_VLinearSum_Pthreads;
167 v->ops->nvconst = N_VConst_Pthreads;
168 v->ops->nvprod = N_VProd_Pthreads;
169 v->ops->nvdiv = N_VDiv_Pthreads;
170 v->ops->nvscale = N_VScale_Pthreads;
171 v->ops->nvabs = N_VAbs_Pthreads;
172 v->ops->nvinv = N_VInv_Pthreads;
173 v->ops->nvaddconst = N_VAddConst_Pthreads;
174 v->ops->nvdotprod = N_VDotProd_Pthreads;
175 v->ops->nvmaxnorm = N_VMaxNorm_Pthreads;
176 v->ops->nvwrmsnormmask = N_VWrmsNormMask_Pthreads;
177 v->ops->nvwrmsnorm = N_VWrmsNorm_Pthreads;
178 v->ops->nvmin = N_VMin_Pthreads;
179 v->ops->nvwl2norm = N_VWL2Norm_Pthreads;
180 v->ops->nvl1norm = N_VL1Norm_Pthreads;
181 v->ops->nvcompare = N_VCompare_Pthreads;
182 v->ops->nvinvtest = N_VInvTest_Pthreads;
183 v->ops->nvconstrmask = N_VConstrMask_Pthreads;
184 v->ops->nvminquotient = N_VMinQuotient_Pthreads;
185
186 /* fused and vector array operations are disabled (NULL) by default */
187
188 /* local reduction operations */
189 v->ops->nvdotprodlocal = N_VDotProd_Pthreads;
190 v->ops->nvmaxnormlocal = N_VMaxNorm_Pthreads;
191 v->ops->nvminlocal = N_VMin_Pthreads;
192 v->ops->nvl1normlocal = N_VL1Norm_Pthreads;
193 v->ops->nvinvtestlocal = N_VInvTest_Pthreads;
194 v->ops->nvconstrmasklocal = N_VConstrMask_Pthreads;
195 v->ops->nvminquotientlocal = N_VMinQuotient_Pthreads;
196 v->ops->nvwsqrsumlocal = N_VWSqrSumLocal_Pthreads;
197 v->ops->nvwsqrsummasklocal = N_VWSqrSumMaskLocal_Pthreads;
198
199 /* XBraid interface operations */
200 v->ops->nvbufsize = N_VBufSize_Pthreads;
201 v->ops->nvbufpack = N_VBufPack_Pthreads;
202 v->ops->nvbufunpack = N_VBufUnpack_Pthreads;
203
204 /* Create content */
205 content = NULL;
206 content = (N_VectorContent_Pthreads) malloc(sizeof *content);
207 if (content == NULL) { N_VDestroy(v); return(NULL); }
208
209 /* Attach content */
210 v->content = content;
211
212 /* Initialize content */
213 content->length = length;
214 content->num_threads = num_threads;
215 content->own_data = SUNFALSE;
216 content->data = NULL;
217
218 return(v);
219 }
220
221 /* ----------------------------------------------------------------------------
222 * Function to create a new vector
223 */
224
N_VNew_Pthreads(sunindextype length,int num_threads)225 N_Vector N_VNew_Pthreads(sunindextype length, int num_threads)
226 {
227 N_Vector v;
228 realtype *data;
229
230 v = NULL;
231 v = N_VNewEmpty_Pthreads(length, num_threads);
232 if (v == NULL) return(NULL);
233
234 /* Create data */
235 if (length > 0) {
236
237 /* Allocate memory */
238 data = NULL;
239 data = (realtype *) malloc(length * sizeof(realtype));
240 if(data == NULL) { N_VDestroy_Pthreads(v); return(NULL); }
241
242 /* Attach data */
243 NV_OWN_DATA_PT(v) = SUNTRUE;
244 NV_DATA_PT(v) = data;
245
246 }
247
248 return(v);
249 }
250
251 /* ----------------------------------------------------------------------------
252 * Function to create a vector with user data component
253 */
254
N_VMake_Pthreads(sunindextype length,int num_threads,realtype * v_data)255 N_Vector N_VMake_Pthreads(sunindextype length, int num_threads, realtype *v_data)
256 {
257 N_Vector v;
258
259 v = NULL;
260 v = N_VNewEmpty_Pthreads(length, num_threads);
261 if (v == NULL) return(NULL);
262
263 if (length > 0) {
264 /* Attach data */
265 NV_OWN_DATA_PT(v) = SUNFALSE;
266 NV_DATA_PT(v) = v_data;
267 }
268
269 return(v);
270 }
271
272 /* ----------------------------------------------------------------------------
273 * Function to create an array of new vectors.
274 */
275
N_VCloneVectorArray_Pthreads(int count,N_Vector w)276 N_Vector* N_VCloneVectorArray_Pthreads(int count, N_Vector w)
277 {
278 N_Vector* vs;
279 int j;
280
281 if (count <= 0) return(NULL);
282
283 vs = NULL;
284 vs = (N_Vector*) malloc(count * sizeof(N_Vector));
285 if(vs == NULL) return(NULL);
286
287 for (j = 0; j < count; j++) {
288 vs[j] = NULL;
289 vs[j] = N_VClone_Pthreads(w);
290 if (vs[j] == NULL) {
291 N_VDestroyVectorArray_Pthreads(vs, j-1);
292 return(NULL);
293 }
294 }
295
296 return(vs);
297 }
298
299 /* ----------------------------------------------------------------------------
300 * Function to create an array of new vectors with NULL data array.
301 */
302
N_VCloneVectorArrayEmpty_Pthreads(int count,N_Vector w)303 N_Vector* N_VCloneVectorArrayEmpty_Pthreads(int count, N_Vector w)
304 {
305 N_Vector* vs;
306 int j;
307
308 if (count <= 0) return(NULL);
309
310 vs = NULL;
311 vs = (N_Vector*) malloc(count * sizeof(N_Vector));
312 if(vs == NULL) return(NULL);
313
314 for (j = 0; j < count; j++) {
315 vs[j] = NULL;
316 vs[j] = N_VCloneEmpty_Pthreads(w);
317 if (vs[j] == NULL) {
318 N_VDestroyVectorArray_Pthreads(vs, j-1);
319 return(NULL);
320 }
321 }
322
323 return(vs);
324 }
325
326 /* ----------------------------------------------------------------------------
327 * Function to free an array created with N_VCloneVectorArray_Pthreads
328 */
329
N_VDestroyVectorArray_Pthreads(N_Vector * vs,int count)330 void N_VDestroyVectorArray_Pthreads(N_Vector* vs, int count)
331 {
332 int j;
333
334 for (j = 0; j < count; j++) N_VDestroy_Pthreads(vs[j]);
335
336 free(vs); vs = NULL;
337
338 return;
339 }
340
341 /* ----------------------------------------------------------------------------
342 * Function to return number of vector elements
343 */
N_VGetLength_Pthreads(N_Vector v)344 sunindextype N_VGetLength_Pthreads(N_Vector v)
345 {
346 return NV_LENGTH_PT(v);
347 }
348
349 /* ----------------------------------------------------------------------------
350 * Function to print a vector to stdout
351 */
352
N_VPrint_Pthreads(N_Vector x)353 void N_VPrint_Pthreads(N_Vector x)
354 {
355 N_VPrintFile_Pthreads(x, stdout);
356 }
357
358 /* ----------------------------------------------------------------------------
359 * Function to print a vector to outfile
360 */
361
N_VPrintFile_Pthreads(N_Vector x,FILE * outfile)362 void N_VPrintFile_Pthreads(N_Vector x, FILE *outfile)
363 {
364 sunindextype i, N;
365 realtype *xd;
366
367 xd = NULL;
368
369 N = NV_LENGTH_PT(x);
370 xd = NV_DATA_PT(x);
371
372 for (i = 0; i < N; i++) {
373 #if defined(SUNDIALS_EXTENDED_PRECISION)
374 fprintf(outfile, "%11.8Lg\n", xd[i]);
375 #elif defined(SUNDIALS_DOUBLE_PRECISION)
376 fprintf(outfile, "%11.8g\n", xd[i]);
377 #else
378 fprintf(outfile, "%11.8g\n", xd[i]);
379 #endif
380 }
381 fprintf(outfile, "\n");
382
383 return;
384 }
385
386 /*
387 * -----------------------------------------------------------------
388 * implementation of vector operations
389 * -----------------------------------------------------------------
390 */
391
392 /* ----------------------------------------------------------------------------
393 * Create new vector from existing vector without attaching data
394 */
395
N_VCloneEmpty_Pthreads(N_Vector w)396 N_Vector N_VCloneEmpty_Pthreads(N_Vector w)
397 {
398 N_Vector v;
399 N_VectorContent_Pthreads content;
400
401 if (w == NULL) return(NULL);
402
403 /* Create vector */
404 v = NULL;
405 v = N_VNewEmpty();
406 if (v == NULL) return(NULL);
407
408 /* Attach operations */
409 if (N_VCopyOps(w, v)) { N_VDestroy(v); return(NULL); }
410
411 /* Create content */
412 content = NULL;
413 content = (N_VectorContent_Pthreads) malloc(sizeof *content);
414 if (content == NULL) { N_VDestroy(v); return(NULL); }
415
416 /* Attach content */
417 v->content = content;
418
419 /* Initialize content */
420 content->length = NV_LENGTH_PT(w);
421 content->num_threads = NV_NUM_THREADS_PT(w);
422 content->own_data = SUNFALSE;
423 content->data = NULL;
424
425 return(v);
426 }
427
428
429 /* ----------------------------------------------------------------------------
430 * Create new vector from existing vector and attach data
431 */
432
N_VClone_Pthreads(N_Vector w)433 N_Vector N_VClone_Pthreads(N_Vector w)
434 {
435 N_Vector v;
436 realtype *data;
437 sunindextype length;
438
439 v = NULL;
440 v = N_VCloneEmpty_Pthreads(w);
441 if (v == NULL) return(NULL);
442
443 length = NV_LENGTH_PT(w);
444
445 /* Create data */
446 if (length > 0) {
447
448 /* Allocate memory */
449 data = NULL;
450 data = (realtype *) malloc(length * sizeof(realtype));
451 if(data == NULL) { N_VDestroy_Pthreads(v); return(NULL); }
452
453 /* Attach data */
454 NV_OWN_DATA_PT(v) = SUNTRUE;
455 NV_DATA_PT(v) = data;
456
457 }
458
459 return(v);
460 }
461
462 /* ----------------------------------------------------------------------------
463 * Destroy vector and free vector memory
464 */
465
N_VDestroy_Pthreads(N_Vector v)466 void N_VDestroy_Pthreads(N_Vector v)
467 {
468 if (v == NULL) return;
469
470 /* free content */
471 if (v->content != NULL) {
472 if (NV_OWN_DATA_PT(v) && NV_DATA_PT(v) != NULL) {
473 free(NV_DATA_PT(v));
474 NV_DATA_PT(v) = NULL;
475 }
476 free(v->content);
477 v->content = NULL;
478 }
479
480 /* free ops and vector */
481 if (v->ops != NULL) { free(v->ops); v->ops = NULL; }
482 free(v); v = NULL;
483
484 return;
485 }
486
487 /* ----------------------------------------------------------------------------
488 * Get storage requirement for vector
489 */
490
N_VSpace_Pthreads(N_Vector v,sunindextype * lrw,sunindextype * liw)491 void N_VSpace_Pthreads(N_Vector v, sunindextype *lrw, sunindextype *liw)
492 {
493 *lrw = NV_LENGTH_PT(v);
494 *liw = 1;
495
496 return;
497 }
498
499
500 /* ----------------------------------------------------------------------------
501 * Get vector data pointer
502 */
503
N_VGetArrayPointer_Pthreads(N_Vector v)504 realtype *N_VGetArrayPointer_Pthreads(N_Vector v)
505 {
506 return((realtype *) NV_DATA_PT(v));
507 }
508
509
510 /* ----------------------------------------------------------------------------
511 * Set vector data pointer
512 */
513
N_VSetArrayPointer_Pthreads(realtype * v_data,N_Vector v)514 void N_VSetArrayPointer_Pthreads(realtype *v_data, N_Vector v)
515 {
516 if (NV_LENGTH_PT(v) > 0) NV_DATA_PT(v) = v_data;
517
518 return;
519 }
520
521
522 /* ----------------------------------------------------------------------------
523 * Compute linear sum z[i] = a*x[i]+b*y[i]
524 */
525
N_VLinearSum_Pthreads(realtype a,N_Vector x,realtype b,N_Vector y,N_Vector z)526 void N_VLinearSum_Pthreads(realtype a, N_Vector x, realtype b, N_Vector y, N_Vector z)
527 {
528 sunindextype N;
529 int i, nthreads;
530 pthread_t *threads;
531 Pthreads_Data *thread_data;
532 pthread_attr_t attr;
533
534 realtype c;
535 N_Vector v1, v2;
536 booleantype test;
537
538 if ((b == ONE) && (z == y)) { /* BLAS usage: axpy y <- ax+y */
539 Vaxpy_Pthreads(a,x,y);
540 return;
541 }
542
543 if ((a == ONE) && (z == x)) { /* BLAS usage: axpy x <- by+x */
544 Vaxpy_Pthreads(b,y,x);
545 return;
546 }
547
548 /* Case: a == b == 1.0 */
549
550 if ((a == ONE) && (b == ONE)) {
551 VSum_Pthreads(x, y, z);
552 return;
553 }
554
555 /* Cases: (1) a == 1.0, b = -1.0, (2) a == -1.0, b == 1.0 */
556
557 if ((test = ((a == ONE) && (b == -ONE))) || ((a == -ONE) && (b == ONE))) {
558 v1 = test ? y : x;
559 v2 = test ? x : y;
560 VDiff_Pthreads(v2, v1, z);
561 return;
562 }
563
564 /* Cases: (1) a == 1.0, b == other or 0.0, (2) a == other or 0.0, b == 1.0 */
565 /* if a or b is 0.0, then user should have called N_VScale */
566
567 if ((test = (a == ONE)) || (b == ONE)) {
568 c = test ? b : a;
569 v1 = test ? y : x;
570 v2 = test ? x : y;
571 VLin1_Pthreads(c, v1, v2, z);
572 return;
573 }
574
575 /* Cases: (1) a == -1.0, b != 1.0, (2) a != 1.0, b == -1.0 */
576
577 if ((test = (a == -ONE)) || (b == -ONE)) {
578 c = test ? b : a;
579 v1 = test ? y : x;
580 v2 = test ? x : y;
581 VLin2_Pthreads(c, v1, v2, z);
582 return;
583 }
584
585 /* Case: a == b */
586 /* catches case both a and b are 0.0 - user should have called N_VConst */
587
588 if (a == b) {
589 VScaleSum_Pthreads(a, x, y, z);
590 return;
591 }
592
593 /* Case: a == -b */
594
595 if (a == -b) {
596 VScaleDiff_Pthreads(a, x, y, z);
597 return;
598 }
599
600 /* Do all cases not handled above:
601 (1) a == other, b == 0.0 - user should have called N_VScale
602 (2) a == 0.0, b == other - user should have called N_VScale
603 (3) a,b == other, a !=b, a != -b */
604
605 /* allocate threads and thread data structs */
606 N = NV_LENGTH_PT(x);
607 nthreads = NV_NUM_THREADS_PT(x);
608 threads = (pthread_t *) malloc(nthreads*sizeof(pthread_t));
609 thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
610
611 /* set thread attributes */
612 pthread_attr_init(&attr);
613 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
614
615 for (i=0; i<nthreads; i++) {
616 /* initialize thread data */
617 N_VInitThreadData(&thread_data[i]);
618
619 /* compute start and end loop index for thread */
620 N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
621
622 /* pack thread data */
623 thread_data[i].c1 = a;
624 thread_data[i].c2 = b;
625 thread_data[i].v1 = NV_DATA_PT(x);
626 thread_data[i].v2 = NV_DATA_PT(y);
627 thread_data[i].v3 = NV_DATA_PT(z);
628
629 /* create threads and call pthread companion function */
630 pthread_create(&threads[i], &attr, N_VLinearSum_PT, (void *) &thread_data[i]);
631 }
632
633 /* wait for all threads to finish */
634 for (i=0; i<nthreads; i++) {
635 pthread_join(threads[i], NULL);
636 }
637
638 /* clean up and return */
639 pthread_attr_destroy(&attr);
640 free(threads);
641 free(thread_data);
642
643 return;
644 }
645
646 /* ----------------------------------------------------------------------------
647 * Pthread companion function to N_VLinearSum
648 */
649
N_VLinearSum_PT(void * thread_data)650 static void *N_VLinearSum_PT(void *thread_data)
651 {
652 sunindextype i, start, end;
653 realtype a, b;
654 realtype *xd, *yd, *zd;
655 Pthreads_Data *my_data;
656
657 /* extract thread data */
658 my_data = (Pthreads_Data *) thread_data;
659
660 a = my_data->c1;
661 b = my_data->c2;
662 xd = my_data->v1;
663 yd = my_data->v2;
664 zd = my_data->v3;
665
666 start = my_data->start;
667 end = my_data->end;
668
669 /* compute linear sum */
670 for (i = start; i < end; i++){
671 zd[i] = (a*xd[i])+(b*yd[i]);
672 }
673
674 /* exit */
675 pthread_exit(NULL);
676 }
677
678
679 /* ----------------------------------------------------------------------------
680 * Assigns constant value to all vector elements, z[i] = c
681 */
682
N_VConst_Pthreads(realtype c,N_Vector z)683 void N_VConst_Pthreads(realtype c, N_Vector z)
684 {
685 sunindextype N;
686 int i, nthreads;
687 pthread_t *threads;
688 Pthreads_Data *thread_data;
689 pthread_attr_t attr;
690
691 /* allocate threads and thread data structs */
692 N = NV_LENGTH_PT(z);
693 nthreads = NV_NUM_THREADS_PT(z);
694 threads = malloc(nthreads*sizeof(pthread_t));
695 thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
696
697 /* set thread attributes */
698 pthread_attr_init(&attr);
699 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
700
701 for (i=0; i<nthreads; i++) {
702 /* initialize thread data */
703 N_VInitThreadData(&thread_data[i]);
704
705 /* compute start and end loop index for thread */
706 N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
707
708 /* pack thread data */
709 thread_data[i].c1 = c;
710 thread_data[i].v1 = NV_DATA_PT(z);
711
712 /* create threads and call pthread companion function */
713 pthread_create(&threads[i], &attr, N_VConst_PT, (void *) &thread_data[i]);
714 }
715
716 /* wait for all threads to finish */
717 for (i=0; i<nthreads; i++) {
718 pthread_join(threads[i], NULL);
719 }
720
721 /* clean up and return */
722 pthread_attr_destroy(&attr);
723 free(threads);
724 free(thread_data);
725
726 return;
727 }
728
729
730 /* ----------------------------------------------------------------------------
731 * Pthread companion function to N_VConst
732 */
733
N_VConst_PT(void * thread_data)734 static void *N_VConst_PT(void *thread_data)
735 {
736 sunindextype i, start, end;
737 realtype c;
738 realtype *zd;
739 Pthreads_Data *my_data;
740
741 /* extract thread data */
742 my_data = (Pthreads_Data *) thread_data;
743
744 c = my_data->c1;
745 zd = my_data->v1;
746
747 start = my_data->start;
748 end = my_data->end;
749
750 /* assign constant values */
751 for (i = start; i < end; i++)
752 zd[i] = c;
753
754 /* exit */
755 pthread_exit(NULL);
756 }
757
758
759 /* ----------------------------------------------------------------------------
760 * Compute componentwise product z[i] = x[i]*y[i]
761 */
762
N_VProd_Pthreads(N_Vector x,N_Vector y,N_Vector z)763 void N_VProd_Pthreads(N_Vector x, N_Vector y, N_Vector z)
764 {
765 sunindextype N;
766 int i, nthreads;
767 pthread_t *threads;
768 Pthreads_Data *thread_data;
769 pthread_attr_t attr;
770
771 /* allocate threads and thread data structs */
772 N = NV_LENGTH_PT(x);
773 nthreads = NV_NUM_THREADS_PT(x);
774 threads = malloc(nthreads*sizeof(pthread_t));
775 thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
776
777 /* set thread attributes */
778 pthread_attr_init(&attr);
779 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
780
781 for (i=0; i<nthreads; i++) {
782 /* initialize thread data */
783 N_VInitThreadData(&thread_data[i]);
784
785 /* compute start and end loop index for thread */
786 N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
787
788 /* pack thread data */
789 thread_data[i].v1 = NV_DATA_PT(x);
790 thread_data[i].v2 = NV_DATA_PT(y);
791 thread_data[i].v3 = NV_DATA_PT(z);
792
793 /* create threads and call pthread companion function */
794 pthread_create(&threads[i], &attr, N_VProd_PT, (void *) &thread_data[i]);
795 }
796
797 /* wait for all threads to finish */
798 for (i=0; i<nthreads; i++) {
799 pthread_join(threads[i], NULL);
800 }
801
802 /* clean up and exit */
803 pthread_attr_destroy(&attr);
804 free(threads);
805 free(thread_data);
806
807 return;
808 }
809
810 /* ----------------------------------------------------------------------------
811 * Pthread companion function to N_VProd
812 */
813
N_VProd_PT(void * thread_data)814 static void *N_VProd_PT(void *thread_data)
815 {
816 sunindextype i, start, end;
817 realtype *xd, *yd, *zd;
818 Pthreads_Data *my_data;
819
820 /* extract thread data */
821 my_data = (Pthreads_Data *) thread_data;
822
823 xd = my_data->v1;
824 yd = my_data->v2;
825 zd = my_data->v3;
826
827 start = my_data->start;
828 end = my_data->end;
829
830 /* compute componentwise product */
831 for (i = start; i < end; i++)
832 zd[i] = xd[i]*yd[i];
833
834 /* exit */
835 pthread_exit(NULL);
836 }
837
838
839 /* ----------------------------------------------------------------------------
840 * Compute componentwise division z[i] = x[i]/y[i]
841 */
842
N_VDiv_Pthreads(N_Vector x,N_Vector y,N_Vector z)843 void N_VDiv_Pthreads(N_Vector x, N_Vector y, N_Vector z)
844 {
845 sunindextype N;
846 int i, nthreads;
847 pthread_t *threads;
848 Pthreads_Data *thread_data;
849 pthread_attr_t attr;
850
851 /* allocate threads and thread data structs */
852 N = NV_LENGTH_PT(x);
853 nthreads = NV_NUM_THREADS_PT(x);
854 threads = malloc(nthreads*sizeof(pthread_t));
855 thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
856
857 /* set thread attributes */
858 pthread_attr_init(&attr);
859 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
860
861 for (i=0; i<nthreads; i++) {
862 /* initialize thread data */
863 N_VInitThreadData(&thread_data[i]);
864
865 /* compute start and end loop index for thread */
866 N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
867
868 /* pack thread data */
869 thread_data[i].v1 = NV_DATA_PT(x);
870 thread_data[i].v2 = NV_DATA_PT(y);
871 thread_data[i].v3 = NV_DATA_PT(z);
872
873 /* create threads and call pthread companion function */
874 pthread_create(&threads[i], &attr, N_VDiv_PT, (void *) &thread_data[i]);
875 }
876
877 /* wait for all threads to finish */
878 for (i=0; i<nthreads; i++) {
879 pthread_join(threads[i], NULL);
880 }
881
882 /* clean up and return */
883 pthread_attr_destroy(&attr);
884 free(threads);
885 free(thread_data);
886
887 return;
888 }
889
890
891 /* ----------------------------------------------------------------------------
892 * Pthread companion function to N_VDiv
893 */
894
N_VDiv_PT(void * thread_data)895 static void *N_VDiv_PT(void *thread_data)
896 {
897 sunindextype i, start, end;
898 realtype *xd, *yd, *zd;
899 Pthreads_Data *my_data;
900
901 /* extract thread data */
902 my_data = (Pthreads_Data *) thread_data;
903
904 xd = my_data->v1;
905 yd = my_data->v2;
906 zd = my_data->v3;
907
908 start = my_data->start;
909 end = my_data->end;
910
911 /* compute componentwise division */
912 for (i = start; i < end; i++)
913 zd[i] = xd[i]/yd[i];
914
915 /* exit */
916 pthread_exit(NULL);
917 }
918
919
920 /* ----------------------------------------------------------------------------
921 * Compute scaler multiplication z[i] = c*x[i]
922 */
923
N_VScale_Pthreads(realtype c,N_Vector x,N_Vector z)924 void N_VScale_Pthreads(realtype c, N_Vector x, N_Vector z)
925 {
926 sunindextype N;
927 int i, nthreads;
928 pthread_t *threads;
929 Pthreads_Data *thread_data;
930 pthread_attr_t attr;
931
932 if (z == x) { /* BLAS usage: scale x <- cx */
933 VScaleBy_Pthreads(c, x);
934 return;
935 }
936
937 if (c == ONE) {
938 VCopy_Pthreads(x, z);
939 } else if (c == -ONE) {
940 VNeg_Pthreads(x, z);
941 } else {
942 /* allocate threads and thread data structs */
943 N = NV_LENGTH_PT(x);
944 nthreads = NV_NUM_THREADS_PT(x);
945 threads = malloc(nthreads*sizeof(pthread_t));
946 thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
947
948 /* set thread attributes */
949 pthread_attr_init(&attr);
950 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
951
952 for (i=0; i<nthreads; i++) {
953 /* initialize thread data */
954 N_VInitThreadData(&thread_data[i]);
955
956 /* compute start and end loop index for thread */
957 N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
958
959 /* pack thread data */
960 thread_data[i].c1 = c;
961 thread_data[i].v1 = NV_DATA_PT(x);
962 thread_data[i].v2 = NV_DATA_PT(z);
963
964 /* create threads and call pthread companion function */
965 pthread_create(&threads[i], &attr, N_VScale_PT, (void *) &thread_data[i]);
966 }
967
968 /* wait for all threads to finish */
969 for (i=0; i<nthreads; i++) {
970 pthread_join(threads[i], NULL);
971 }
972
973 /* clean up */
974 pthread_attr_destroy(&attr);
975 free(threads);
976 free(thread_data);
977 }
978
979 return;
980 }
981
982
983 /* ----------------------------------------------------------------------------
984 * Pthread companion function to N_VScale
985 */
986
N_VScale_PT(void * thread_data)987 static void *N_VScale_PT(void *thread_data)
988 {
989 sunindextype i, start, end;
990 realtype c;
991 realtype *xd, *zd;
992 Pthreads_Data *my_data;
993
994 /* extract thread data */
995 my_data = (Pthreads_Data *) thread_data;
996
997 c = my_data->c1;
998 xd = my_data->v1;
999 zd = my_data->v2;
1000
1001 start = my_data->start;
1002 end = my_data->end;
1003
1004 /* compute scaler multiplication */
1005 for (i = start; i < end; i++)
1006 zd[i] = c*xd[i];
1007
1008 /* exit */
1009 pthread_exit(NULL);
1010 }
1011
1012
1013 /* ----------------------------------------------------------------------------
1014 * Compute absolute value of vector components z[i] = SUNRabs(x[i])
1015 */
1016
N_VAbs_Pthreads(N_Vector x,N_Vector z)1017 void N_VAbs_Pthreads(N_Vector x, N_Vector z)
1018 {
1019 sunindextype N;
1020 int i, nthreads;
1021 pthread_t *threads;
1022 Pthreads_Data *thread_data;
1023 pthread_attr_t attr;
1024
1025 /* allocate threads and thread data structs */
1026 N = NV_LENGTH_PT(x);
1027 nthreads = NV_NUM_THREADS_PT(x);
1028 threads = malloc(nthreads*sizeof(pthread_t));
1029 thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
1030
1031 /* set thread attributes */
1032 pthread_attr_init(&attr);
1033 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
1034
1035 for (i=0; i<nthreads; i++) {
1036 /* initialize thread data */
1037 N_VInitThreadData(&thread_data[i]);
1038
1039 /* compute start and end loop index for thread */
1040 N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
1041
1042 /* pack thread data */
1043 thread_data[i].v1 = NV_DATA_PT(x);
1044 thread_data[i].v2 = NV_DATA_PT(z);
1045
1046 /* create threads and call pthread companion function */
1047 pthread_create(&threads[i], &attr, N_VAbs_PT, (void *) &thread_data[i]);
1048 }
1049
1050 /* wait for all threads to finish */
1051 for (i=0; i<nthreads; i++) {
1052 pthread_join(threads[i], NULL);
1053 }
1054
1055 /* clean up and return */
1056 pthread_attr_destroy(&attr);
1057 free(threads);
1058 free(thread_data);
1059
1060 return;
1061 }
1062
1063
1064 /* ----------------------------------------------------------------------------
1065 * Pthread companion function to N_VAbs
1066 */
1067
N_VAbs_PT(void * thread_data)1068 static void *N_VAbs_PT(void *thread_data)
1069 {
1070 sunindextype i, start, end;
1071 realtype *xd, *zd;
1072 Pthreads_Data *my_data;
1073
1074 /* extract thread data */
1075 my_data = (Pthreads_Data *) thread_data;
1076
1077 xd = my_data->v1;
1078 zd = my_data->v2;
1079
1080 start = my_data->start;
1081 end = my_data->end;
1082
1083 /* compute absolute value of components */
1084 for (i = start; i < end; i++)
1085 zd[i] = SUNRabs(xd[i]);
1086
1087 /* exit */
1088 pthread_exit(NULL);
1089 }
1090
1091
1092 /* ----------------------------------------------------------------------------
1093 * Compute componentwise inverse z[i] = 1 / x[i]
1094 */
1095
N_VInv_Pthreads(N_Vector x,N_Vector z)1096 void N_VInv_Pthreads(N_Vector x, N_Vector z)
1097 {
1098 sunindextype N;
1099 int i, nthreads;
1100 pthread_t *threads;
1101 Pthreads_Data *thread_data;
1102 pthread_attr_t attr;
1103
1104 /* allocate threads and thread data structs */
1105 N = NV_LENGTH_PT(x);
1106 nthreads = NV_NUM_THREADS_PT(x);
1107 threads = malloc(nthreads*sizeof(pthread_t));
1108 thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
1109
1110 /* set thread attributes */
1111 pthread_attr_init(&attr);
1112 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
1113
1114 for (i=0; i<nthreads; i++) {
1115 /* initialize thread data */
1116 N_VInitThreadData(&thread_data[i]);
1117
1118 /* compute start and end loop index for thread */
1119 N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
1120
1121 /* pack thread data */
1122 thread_data[i].v1 = NV_DATA_PT(x);
1123 thread_data[i].v2 = NV_DATA_PT(z);
1124
1125 /* create threads and call pthread companion function */
1126 pthread_create(&threads[i], &attr, N_VInv_PT, (void *) &thread_data[i]);
1127 }
1128
1129 /* wait for all threads to finish */
1130 for (i=0; i<nthreads; i++) {
1131 pthread_join(threads[i], NULL);
1132 }
1133
1134 /* clean up and return */
1135 pthread_attr_destroy(&attr);
1136 free(threads);
1137 free(thread_data);
1138
1139 return;
1140 }
1141
1142
1143 /* ----------------------------------------------------------------------------
1144 * Pthread companion function to N_VInv
1145 */
1146
N_VInv_PT(void * thread_data)1147 static void *N_VInv_PT(void *thread_data)
1148 {
1149 sunindextype i, start, end;
1150 realtype *xd, *zd;
1151 Pthreads_Data *my_data;
1152
1153 /* extract thread data */
1154 my_data = (Pthreads_Data *) thread_data;
1155
1156 xd = my_data->v1;
1157 zd = my_data->v2;
1158
1159 start = my_data->start;
1160 end = my_data->end;
1161
1162 /* compute componentwise inverse */
1163 for (i = start; i < end; i++)
1164 zd[i] = ONE/xd[i];
1165
1166 /* exit */
1167 pthread_exit(NULL);
1168 }
1169
1170
1171 /* ----------------------------------------------------------------------------
1172 * Compute componentwise addition of a scaler to a vector z[i] = x[i] + b
1173 */
1174
N_VAddConst_Pthreads(N_Vector x,realtype b,N_Vector z)1175 void N_VAddConst_Pthreads(N_Vector x, realtype b, N_Vector z)
1176 {
1177 sunindextype N;
1178 int i, nthreads;
1179 pthread_t *threads;
1180 Pthreads_Data *thread_data;
1181 pthread_attr_t attr;
1182
1183 /* allocate threads and thread data structs */
1184 N = NV_LENGTH_PT(x);
1185 nthreads = NV_NUM_THREADS_PT(x);
1186 threads = malloc(nthreads*sizeof(pthread_t));
1187 thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
1188
1189 /* set thread attributes */
1190 pthread_attr_init(&attr);
1191 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
1192
1193 for (i=0; i<nthreads; i++) {
1194 /* initialize thread data */
1195 N_VInitThreadData(&thread_data[i]);
1196
1197 /* compute start and end loop index for thread */
1198 N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
1199
1200 /* pack thread data */
1201 thread_data[i].c1 = b;
1202 thread_data[i].v1 = NV_DATA_PT(x);
1203 thread_data[i].v2 = NV_DATA_PT(z);
1204
1205 /* create threads and call pthread companion function */
1206 pthread_create(&threads[i], &attr, N_VAddConst_PT, (void *) &thread_data[i]);
1207 }
1208
1209 /* wait for all threads to finish */
1210 for (i=0; i<nthreads; i++) {
1211 pthread_join(threads[i], NULL);
1212 }
1213
1214 /* clean up and return */
1215 pthread_attr_destroy(&attr);
1216 free(threads);
1217 free(thread_data);
1218
1219 return;
1220 }
1221
1222
1223 /* ----------------------------------------------------------------------------
1224 * Pthread companion function to N_VAddConst
1225 */
1226
N_VAddConst_PT(void * thread_data)1227 static void *N_VAddConst_PT(void *thread_data)
1228 {
1229 sunindextype i, start, end;
1230 realtype b;
1231 realtype *xd, *zd;
1232 Pthreads_Data *my_data;
1233
1234 /* extract thread data */
1235 my_data = (Pthreads_Data *) thread_data;
1236
1237 b = my_data->c1;
1238 xd = my_data->v1;
1239 zd = my_data->v2;
1240
1241 start = my_data->start;
1242 end = my_data->end;
1243
1244 /* compute componentwise constant addition */
1245 for (i = start; i < end; i++)
1246 zd[i] = xd[i] + b;
1247
1248 /* exit */
1249 pthread_exit(NULL);
1250 }
1251
1252
1253 /* ----------------------------------------------------------------------------
1254 * Computes the dot product of two vectors, a = sum(x[i]*y[i])
1255 */
1256
N_VDotProd_Pthreads(N_Vector x,N_Vector y)1257 realtype N_VDotProd_Pthreads(N_Vector x, N_Vector y)
1258 {
1259 sunindextype N;
1260 int i, nthreads;
1261 pthread_t *threads;
1262 Pthreads_Data *thread_data;
1263 pthread_attr_t attr;
1264 pthread_mutex_t global_mutex;
1265 realtype sum = ZERO;
1266
1267 /* allocate threads and thread data structs */
1268 N = NV_LENGTH_PT(x);
1269 nthreads = NV_NUM_THREADS_PT(x);
1270 threads = malloc(nthreads*sizeof(pthread_t));
1271 thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
1272
1273 /* set thread attributes */
1274 pthread_attr_init(&attr);
1275 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
1276
1277 /* lock for reduction */
1278 pthread_mutex_init(&global_mutex, NULL);
1279
1280 for (i=0; i<nthreads; i++) {
1281 /* initialize thread data */
1282 N_VInitThreadData(&thread_data[i]);
1283
1284 /* compute start and end loop index for thread */
1285 N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
1286
1287 /* pack thread data */
1288 thread_data[i].v1 = NV_DATA_PT(x);
1289 thread_data[i].v2 = NV_DATA_PT(y);
1290 thread_data[i].global_val = ∑
1291 thread_data[i].global_mutex = &global_mutex;
1292
1293 /* create threads and call pthread companion function */
1294 pthread_create(&threads[i], &attr, N_VDotProd_PT, (void *) &thread_data[i]);
1295 }
1296
1297 /* wait for all threads to finish */
1298 for (i=0; i<nthreads; i++) {
1299 pthread_join(threads[i], NULL);
1300 }
1301
1302 /* clean up and return */
1303 pthread_attr_destroy(&attr);
1304 pthread_mutex_destroy(&global_mutex);
1305 free(threads);
1306 free(thread_data);
1307
1308 return(sum);
1309 }
1310
1311
1312 /* ----------------------------------------------------------------------------
1313 * Pthread companion function to N_VDotProd
1314 */
1315
N_VDotProd_PT(void * thread_data)1316 static void *N_VDotProd_PT(void *thread_data)
1317 {
1318 sunindextype i, start, end;
1319 realtype *xd, *yd;
1320 realtype local_sum, *global_sum;
1321 Pthreads_Data *my_data;
1322 pthread_mutex_t *global_mutex;
1323
1324 /* extract thread data */
1325 my_data = (Pthreads_Data *) thread_data;
1326
1327 xd = my_data->v1;
1328 yd = my_data->v2;
1329
1330 global_sum = my_data->global_val;
1331 global_mutex = my_data->global_mutex;
1332
1333 start = my_data->start;
1334 end = my_data->end;
1335
1336 /* compute dot product */
1337 local_sum = ZERO;
1338 for (i = start; i < end; i++)
1339 local_sum += xd[i] * yd[i];
1340
1341 /* update global sum */
1342 pthread_mutex_lock(global_mutex);
1343 *global_sum += local_sum;
1344 pthread_mutex_unlock(global_mutex);
1345
1346 /* exit */
1347 pthread_exit(NULL);
1348 }
1349
1350
1351 /* ----------------------------------------------------------------------------
1352 * Computes max norm of the vector
1353 */
1354
N_VMaxNorm_Pthreads(N_Vector x)1355 realtype N_VMaxNorm_Pthreads(N_Vector x)
1356 {
1357 sunindextype N;
1358 int i, nthreads;
1359 pthread_t *threads;
1360 Pthreads_Data *thread_data;
1361 pthread_attr_t attr;
1362 pthread_mutex_t global_mutex;
1363 realtype max = ZERO;
1364
1365 /* allocate threads and thread data structs */
1366 N = NV_LENGTH_PT(x);
1367 nthreads = NV_NUM_THREADS_PT(x);
1368 threads = malloc(nthreads*sizeof(pthread_t));
1369 thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
1370
1371 /* set thread attributes */
1372 pthread_attr_init(&attr);
1373 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
1374
1375 /* lock for reduction */
1376 pthread_mutex_init(&global_mutex, NULL);
1377
1378 for (i=0; i<nthreads; i++) {
1379 /* initialize thread data */
1380 N_VInitThreadData(&thread_data[i]);
1381
1382 /* compute start and end loop index for thread */
1383 N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
1384
1385 /* pack thread data */
1386 thread_data[i].v1 = NV_DATA_PT(x);
1387 thread_data[i].global_val = &max;
1388 thread_data[i].global_mutex = &global_mutex;
1389
1390 /* create threads and call pthread companion function */
1391 pthread_create(&threads[i], &attr, N_VMaxNorm_PT, (void *) &thread_data[i]);
1392 }
1393
1394 /* wait for all threads to finish */
1395 for (i=0; i<nthreads; i++) {
1396 pthread_join(threads[i], NULL);
1397 }
1398
1399 /* clean up and return */
1400 pthread_attr_destroy(&attr);
1401 pthread_mutex_destroy(&global_mutex);
1402 free(threads);
1403 free(thread_data);
1404
1405 return(max);
1406 }
1407
1408
1409 /* ----------------------------------------------------------------------------
1410 * Pthread companion function to N_VMaxNorm
1411 */
1412
N_VMaxNorm_PT(void * thread_data)1413 static void *N_VMaxNorm_PT(void *thread_data)
1414 {
1415 sunindextype i, start, end;
1416 realtype *xd;
1417 realtype local_max, *global_max;
1418 Pthreads_Data *my_data;
1419 pthread_mutex_t *global_mutex;
1420
1421 /* extract thread data */
1422 my_data = (Pthreads_Data *) thread_data;
1423
1424 xd = my_data->v1;
1425
1426 global_max = my_data->global_val;
1427 global_mutex = my_data->global_mutex;
1428
1429 start = my_data->start;
1430 end = my_data->end;
1431
1432 /* find local max */
1433 local_max = ZERO;
1434 for (i = start; i < end; i++)
1435 if (SUNRabs(xd[i]) > local_max) local_max = SUNRabs(xd[i]);
1436
1437 /* update global max */
1438 pthread_mutex_lock(global_mutex);
1439 if (local_max > *global_max) {
1440 *global_max = local_max;
1441 }
1442 pthread_mutex_unlock(global_mutex);
1443
1444 /* exit */
1445 pthread_exit(NULL);
1446 }
1447
1448
1449 /* ----------------------------------------------------------------------------
1450 * Computes weighted root mean square norm of a vector
1451 */
1452
N_VWrmsNorm_Pthreads(N_Vector x,N_Vector w)1453 realtype N_VWrmsNorm_Pthreads(N_Vector x, N_Vector w)
1454 {
1455 return(SUNRsqrt(N_VWSqrSumLocal_Pthreads(x, w)/(NV_LENGTH_PT(x))));
1456 }
1457
1458
1459 /* ----------------------------------------------------------------------------
1460 * Computes weighted square sum of a vector
1461 */
1462
N_VWSqrSumLocal_Pthreads(N_Vector x,N_Vector w)1463 realtype N_VWSqrSumLocal_Pthreads(N_Vector x, N_Vector w)
1464 {
1465 sunindextype N;
1466 int i, nthreads;
1467 pthread_t *threads;
1468 Pthreads_Data *thread_data;
1469 pthread_attr_t attr;
1470 pthread_mutex_t global_mutex;
1471 realtype sum = ZERO;
1472
1473 /* allocate threads and thread data structs */
1474 N = NV_LENGTH_PT(x);
1475 nthreads = NV_NUM_THREADS_PT(x);
1476 threads = malloc(nthreads*sizeof(pthread_t));
1477 thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
1478
1479 /* set thread attributes */
1480 pthread_attr_init(&attr);
1481 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
1482
1483 /* lock for reduction */
1484 pthread_mutex_init(&global_mutex, NULL);
1485
1486 for (i=0; i<nthreads; i++) {
1487 /* initialize thread data */
1488 N_VInitThreadData(&thread_data[i]);
1489
1490 /* compute start and end loop index for thread */
1491 N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
1492
1493 /* pack thread data */
1494 thread_data[i].v1 = NV_DATA_PT(x);
1495 thread_data[i].v2 = NV_DATA_PT(w);
1496 thread_data[i].global_val = ∑
1497 thread_data[i].global_mutex = &global_mutex;
1498
1499 /* create threads and call pthread companion function */
1500 pthread_create(&threads[i], &attr, N_VWSqrSum_PT, (void *) &thread_data[i]);
1501 }
1502
1503 /* wait for all threads to finish */
1504 for (i=0; i<nthreads; i++) {
1505 pthread_join(threads[i], NULL);
1506 }
1507
1508 /* clean up and return */
1509 pthread_attr_destroy(&attr);
1510 pthread_mutex_destroy(&global_mutex);
1511 free(threads);
1512 free(thread_data);
1513
1514 return(sum);
1515 }
1516
1517
1518 /* ----------------------------------------------------------------------------
1519 * Pthread companion function to N_VWrmsNorm
1520 */
1521
N_VWSqrSum_PT(void * thread_data)1522 static void *N_VWSqrSum_PT(void *thread_data)
1523 {
1524 sunindextype i, start, end;
1525 realtype *xd, *wd;
1526 realtype local_sum, *global_sum;
1527 Pthreads_Data *my_data;
1528 pthread_mutex_t *global_mutex;
1529
1530 /* extract thread data */
1531 my_data = (Pthreads_Data *) thread_data;
1532
1533 xd = my_data->v1;
1534 wd = my_data->v2;
1535
1536 global_sum = my_data->global_val;
1537 global_mutex = my_data->global_mutex;
1538
1539 start = my_data->start;
1540 end = my_data->end;
1541
1542 /* compute wrms norm */
1543 local_sum = ZERO;
1544 for (i = start; i < end; i++)
1545 local_sum += SUNSQR(xd[i] * wd[i]);
1546
1547 /* update global sum */
1548 pthread_mutex_lock(global_mutex);
1549 *global_sum += local_sum;
1550 pthread_mutex_unlock(global_mutex);
1551
1552 /* exit */
1553 pthread_exit(NULL);
1554 }
1555
1556
1557 /* ----------------------------------------------------------------------------
1558 * Computes weighted root mean square norm of a masked vector
1559 */
1560
N_VWrmsNormMask_Pthreads(N_Vector x,N_Vector w,N_Vector id)1561 realtype N_VWrmsNormMask_Pthreads(N_Vector x, N_Vector w, N_Vector id)
1562 {
1563 return(SUNRsqrt(N_VWSqrSumMaskLocal_Pthreads(x, w, id)/(NV_LENGTH_PT(x))));
1564 }
1565
1566
1567 /* ----------------------------------------------------------------------------
1568 * Computes weighted square sum of a masked vector
1569 */
1570
N_VWSqrSumMaskLocal_Pthreads(N_Vector x,N_Vector w,N_Vector id)1571 realtype N_VWSqrSumMaskLocal_Pthreads(N_Vector x, N_Vector w, N_Vector id)
1572 {
1573 sunindextype N;
1574 int i, nthreads;
1575 pthread_t *threads;
1576 Pthreads_Data *thread_data;
1577 pthread_attr_t attr;
1578 pthread_mutex_t global_mutex;
1579 realtype sum = ZERO;
1580
1581 /* allocate threads and thread data structs */
1582 N = NV_LENGTH_PT(x);
1583 nthreads = NV_NUM_THREADS_PT(x);
1584 threads = malloc(nthreads*sizeof(pthread_t));
1585 thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
1586
1587 /* set thread attributes */
1588 pthread_attr_init(&attr);
1589 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
1590
1591 /* lock for reduction */
1592 pthread_mutex_init(&global_mutex, NULL);
1593
1594 for (i=0; i<nthreads; i++) {
1595 /* initialize thread data */
1596 N_VInitThreadData(&thread_data[i]);
1597
1598 /* compute start and end loop index for thread */
1599 N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
1600
1601 /* pack thread data */
1602 thread_data[i].v1 = NV_DATA_PT(x);
1603 thread_data[i].v2 = NV_DATA_PT(w);
1604 thread_data[i].v3 = NV_DATA_PT(id);
1605 thread_data[i].global_val = ∑
1606 thread_data[i].global_mutex = &global_mutex;
1607
1608 /* create threads and call pthread companion function */
1609 pthread_create(&threads[i], &attr, N_VWSqrSumMask_PT, (void *) &thread_data[i]);
1610 }
1611
1612 /* wait for all threads to finish */
1613 for (i=0; i<nthreads; i++) {
1614 pthread_join(threads[i], NULL);
1615 }
1616
1617 /* clean up and return */
1618 pthread_attr_destroy(&attr);
1619 pthread_mutex_destroy(&global_mutex);
1620 free(threads);
1621 free(thread_data);
1622
1623 return(sum);
1624 }
1625
1626
1627 /* ----------------------------------------------------------------------------
1628 * Pthread companion function to N_VWSqrSumMask
1629 */
1630
N_VWSqrSumMask_PT(void * thread_data)1631 static void *N_VWSqrSumMask_PT(void *thread_data)
1632 {
1633 sunindextype i, start, end;
1634 realtype *xd, *wd, *idd;
1635 realtype local_sum, *global_sum;
1636 Pthreads_Data *my_data;
1637 pthread_mutex_t *global_mutex;
1638
1639 /* extract thread data */
1640 my_data = (Pthreads_Data *) thread_data;
1641
1642 xd = my_data->v1;
1643 wd = my_data->v2;
1644 idd = my_data->v3;
1645
1646 global_sum = my_data->global_val;
1647 global_mutex = my_data->global_mutex;
1648
1649 start = my_data->start;
1650 end = my_data->end;
1651
1652 /* compute wrms norm with mask */
1653 local_sum = ZERO;
1654 for (i = start; i < end; i++) {
1655 if (idd[i] > ZERO)
1656 local_sum += SUNSQR(xd[i]*wd[i]);
1657 }
1658
1659 /* update global sum */
1660 pthread_mutex_lock(global_mutex);
1661 *global_sum += local_sum;
1662 pthread_mutex_unlock(global_mutex);
1663
1664 /* exit */
1665 pthread_exit(NULL);
1666 }
1667
1668
1669 /* ----------------------------------------------------------------------------
1670 * Finds the minimun component of a vector
1671 */
1672
N_VMin_Pthreads(N_Vector x)1673 realtype N_VMin_Pthreads(N_Vector x)
1674 {
1675 sunindextype N;
1676 int i, nthreads;
1677 pthread_t *threads;
1678 Pthreads_Data *thread_data;
1679 pthread_attr_t attr;
1680 pthread_mutex_t global_mutex;
1681 realtype min;
1682
1683 /* initialize global min */
1684 min = NV_Ith_PT(x,0);
1685
1686 /* allocate threads and thread data structs */
1687 N = NV_LENGTH_PT(x);
1688 nthreads = NV_NUM_THREADS_PT(x);
1689 threads = malloc(nthreads*sizeof(pthread_t));
1690 thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
1691
1692 /* set thread attributes */
1693 pthread_attr_init(&attr);
1694 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
1695
1696 /* lock for reduction */
1697 pthread_mutex_init(&global_mutex, NULL);
1698
1699 for (i=0; i<nthreads; i++) {
1700 /* initialize thread data */
1701 N_VInitThreadData(&thread_data[i]);
1702
1703 /* compute start and end loop index for thread */
1704 N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
1705
1706 /* pack thread data */
1707 thread_data[i].v1 = NV_DATA_PT(x);
1708 thread_data[i].global_val = &min;
1709 thread_data[i].global_mutex = &global_mutex;
1710
1711 /* create threads and call pthread companion function */
1712 pthread_create(&threads[i], &attr, N_VMin_PT, (void *) &thread_data[i]);
1713 }
1714
1715 /* wait for all threads to finish */
1716 for (i=0; i<nthreads; i++) {
1717 pthread_join(threads[i], NULL);
1718 }
1719
1720 /* clean up and return */
1721 pthread_attr_destroy(&attr);
1722 pthread_mutex_destroy(&global_mutex);
1723 free(threads);
1724 free(thread_data);
1725
1726 return(min);
1727 }
1728
1729
1730 /* ----------------------------------------------------------------------------
1731 * Pthread companion function to N_VMin
1732 */
1733
N_VMin_PT(void * thread_data)1734 static void *N_VMin_PT(void *thread_data)
1735 {
1736 sunindextype i, start, end;
1737 realtype *xd;
1738 realtype local_min, *global_min;
1739 Pthreads_Data *my_data;
1740 pthread_mutex_t *global_mutex;
1741
1742 /* extract thread data */
1743 my_data = (Pthreads_Data *) thread_data;
1744
1745 xd = my_data->v1;
1746
1747 global_min = my_data->global_val;
1748 global_mutex = my_data->global_mutex;
1749
1750 start = my_data->start;
1751 end = my_data->end;
1752
1753 /* find local min */
1754 local_min = *global_min;
1755 for (i = start; i < end; i++) {
1756 if (xd[i] < local_min)
1757 local_min = xd[i];
1758 }
1759
1760 /* update global min */
1761 pthread_mutex_lock(global_mutex);
1762 if (local_min < *global_min)
1763 *global_min = local_min;
1764 pthread_mutex_unlock(global_mutex);
1765
1766 /* exit */
1767 pthread_exit(NULL);
1768 }
1769
1770
1771 /* ----------------------------------------------------------------------------
1772 * Computes weighted L2 norm of a vector
1773 */
1774
N_VWL2Norm_Pthreads(N_Vector x,N_Vector w)1775 realtype N_VWL2Norm_Pthreads(N_Vector x, N_Vector w)
1776 {
1777 sunindextype N;
1778 int i, nthreads;
1779 pthread_t *threads;
1780 Pthreads_Data *thread_data;
1781 pthread_attr_t attr;
1782 pthread_mutex_t global_mutex;
1783 realtype sum = ZERO;
1784
1785 /* allocate threads and thread data structs */
1786 N = NV_LENGTH_PT(x);
1787 nthreads = NV_NUM_THREADS_PT(x);
1788 threads = malloc(nthreads*sizeof(pthread_t));
1789 thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
1790
1791 /* set thread attributes */
1792 pthread_attr_init(&attr);
1793 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
1794
1795 /* lock for reduction */
1796 pthread_mutex_init(&global_mutex, NULL);
1797
1798 for (i=0; i<nthreads; i++) {
1799 /* initialize thread data */
1800 N_VInitThreadData(&thread_data[i]);
1801
1802 /* compute start and end loop index for thread */
1803 N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
1804
1805 /* pack thread data */
1806 thread_data[i].v1 = NV_DATA_PT(x);
1807 thread_data[i].v2 = NV_DATA_PT(w);
1808 thread_data[i].global_val = ∑
1809 thread_data[i].global_mutex = &global_mutex;
1810
1811 /* create threads and call pthread companion function */
1812 pthread_create(&threads[i], &attr, N_VWL2Norm_PT, (void *) &thread_data[i]);
1813 }
1814
1815 /* wait for all threads to finish */
1816 for (i=0; i<nthreads; i++) {
1817 pthread_join(threads[i], NULL);
1818 }
1819
1820 /* clean up and return */
1821 pthread_attr_destroy(&attr);
1822 pthread_mutex_destroy(&global_mutex);
1823 free(threads);
1824 free(thread_data);
1825
1826 return(SUNRsqrt(sum));
1827 }
1828
1829
1830 /* ----------------------------------------------------------------------------
1831 * Pthread companion function to N_VWL2Norm
1832 */
1833
N_VWL2Norm_PT(void * thread_data)1834 static void *N_VWL2Norm_PT(void *thread_data)
1835 {
1836 sunindextype i, start, end;
1837 realtype *xd, *wd;
1838 realtype local_sum, *global_sum;
1839 Pthreads_Data *my_data;
1840 pthread_mutex_t *global_mutex;
1841
1842 /* extract thread data */
1843 my_data = (Pthreads_Data *) thread_data;
1844
1845 xd = my_data->v1;
1846 wd = my_data->v2;
1847
1848 global_sum = my_data->global_val;
1849 global_mutex = my_data->global_mutex;
1850
1851 start = my_data->start;
1852 end = my_data->end;
1853
1854 /* compute WL2 norm */
1855 local_sum = ZERO;
1856 for (i = start; i < end; i++)
1857 local_sum += SUNSQR(xd[i]*wd[i]);
1858
1859 /* update global sum */
1860 pthread_mutex_lock(global_mutex);
1861 *global_sum += local_sum;
1862 pthread_mutex_unlock(global_mutex);
1863
1864 /* exit */
1865 pthread_exit(NULL);
1866 }
1867
1868
1869 /* ----------------------------------------------------------------------------
1870 * Computes L1 norm of a vector
1871 */
1872
N_VL1Norm_Pthreads(N_Vector x)1873 realtype N_VL1Norm_Pthreads(N_Vector x)
1874 {
1875 sunindextype N;
1876 int i, nthreads;
1877 pthread_t *threads;
1878 Pthreads_Data *thread_data;
1879 pthread_attr_t attr;
1880 pthread_mutex_t global_mutex;
1881 realtype sum = ZERO;
1882
1883 /* allocate threads and thread data structs */
1884 N = NV_LENGTH_PT(x);
1885 nthreads = NV_NUM_THREADS_PT(x);
1886 threads = malloc(nthreads*sizeof(pthread_t));
1887 thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
1888
1889 /* set thread attributes */
1890 pthread_attr_init(&attr);
1891 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
1892
1893 /* lock for reduction */
1894 pthread_mutex_init(&global_mutex, NULL);
1895
1896 for (i=0; i<nthreads; i++) {
1897 /* initialize thread data */
1898 N_VInitThreadData(&thread_data[i]);
1899
1900 /* compute start and end loop index for thread */
1901 N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
1902
1903 /* pack thread data */
1904 thread_data[i].v1 = NV_DATA_PT(x);
1905 thread_data[i].global_val = ∑
1906 thread_data[i].global_mutex = &global_mutex;
1907
1908 /* create threads and call pthread companion function */
1909 pthread_create(&threads[i], &attr, N_VL1Norm_PT, (void *) &thread_data[i]);
1910 }
1911
1912 /* wait for all threads to finish */
1913 for (i=0; i<nthreads; i++) {
1914 pthread_join(threads[i], NULL);
1915 }
1916
1917 /* clean up and return */
1918 pthread_attr_destroy(&attr);
1919 pthread_mutex_destroy(&global_mutex);
1920 free(threads);
1921 free(thread_data);
1922
1923 return(sum);
1924 }
1925
1926
1927 /* ----------------------------------------------------------------------------
1928 * Pthread companion function to N_VL1Norm
1929 */
1930
N_VL1Norm_PT(void * thread_data)1931 static void *N_VL1Norm_PT(void *thread_data)
1932 {
1933 sunindextype i, start, end;
1934 realtype *xd;
1935 realtype local_sum, *global_sum;
1936 Pthreads_Data *my_data;
1937 pthread_mutex_t *global_mutex;
1938
1939 /* extract thread data */
1940 my_data = (Pthreads_Data *) thread_data;
1941
1942 xd = my_data->v1;
1943
1944 global_sum = my_data->global_val;
1945 global_mutex = my_data->global_mutex;
1946
1947 start = my_data->start;
1948 end = my_data->end;
1949
1950 /* compute L1 norm */
1951 local_sum = ZERO;
1952 for (i = start; i < end; i++)
1953 local_sum += SUNRabs(xd[i]);
1954
1955 /* update global sum */
1956 pthread_mutex_lock(global_mutex);
1957 *global_sum += local_sum;
1958 pthread_mutex_unlock(global_mutex);
1959
1960 /* exit */
1961 pthread_exit(NULL);
1962 }
1963
1964
1965 /* ----------------------------------------------------------------------------
1966 * Compare vector component values to a scaler
1967 */
1968
N_VCompare_Pthreads(realtype c,N_Vector x,N_Vector z)1969 void N_VCompare_Pthreads(realtype c, N_Vector x, N_Vector z)
1970 {
1971 sunindextype N;
1972 int i, nthreads;
1973 pthread_t *threads;
1974 Pthreads_Data *thread_data;
1975 pthread_attr_t attr;
1976
1977 /* allocate threads and thread data structs */
1978 N = NV_LENGTH_PT(x);
1979 nthreads = NV_NUM_THREADS_PT(x);
1980 threads = malloc(nthreads*sizeof(pthread_t));
1981 thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
1982
1983 /* set thread attributes */
1984 pthread_attr_init(&attr);
1985 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
1986
1987 for (i=0; i<nthreads; i++) {
1988 /* initialize thread data */
1989 N_VInitThreadData(&thread_data[i]);
1990
1991 /* compute start and end loop index for thread */
1992 N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
1993
1994 /* pack thread data */
1995 thread_data[i].c1 = c;
1996 thread_data[i].v1 = NV_DATA_PT(x);
1997 thread_data[i].v2 = NV_DATA_PT(z);
1998
1999 /* create threads and call pthread companion function */
2000 pthread_create(&threads[i], &attr, N_VCompare_PT, (void *) &thread_data[i]);
2001 }
2002
2003 /* wait for all threads to finish */
2004 for (i=0; i<nthreads; i++) {
2005 pthread_join(threads[i], NULL);
2006 }
2007
2008 /* clean up and return */
2009 pthread_attr_destroy(&attr);
2010 free(threads);
2011 free(thread_data);
2012
2013 return;
2014 }
2015
2016 /* ----------------------------------------------------------------------------
2017 * Pthread companion function to N_VCompare
2018 */
2019
N_VCompare_PT(void * thread_data)2020 static void *N_VCompare_PT(void *thread_data)
2021 {
2022 sunindextype i, start, end;
2023 realtype c;
2024 realtype *xd, *zd;
2025 Pthreads_Data *my_data;
2026
2027 /* extract thread data */
2028 my_data = (Pthreads_Data *) thread_data;
2029
2030 c = my_data->c1;
2031 xd = my_data->v1;
2032 zd = my_data->v2;
2033
2034 start = my_data->start;
2035 end = my_data->end;
2036
2037 /* compare component to scaler */
2038 for (i = start; i < end; i++)
2039 zd[i] = (SUNRabs(xd[i]) >= c) ? ONE : ZERO;
2040
2041 /* exit */
2042 pthread_exit(NULL);
2043 }
2044
2045
2046 /* ----------------------------------------------------------------------------
2047 * Compute componentwise inverse z[i] = ONE/x[i] and check if x[i] == ZERO
2048 */
2049
N_VInvTest_Pthreads(N_Vector x,N_Vector z)2050 booleantype N_VInvTest_Pthreads(N_Vector x, N_Vector z)
2051 {
2052 sunindextype N;
2053 int i, nthreads;
2054 pthread_t *threads;
2055 Pthreads_Data *thread_data;
2056 pthread_attr_t attr;
2057
2058 realtype val = ZERO;
2059
2060 /* allocate threads and thread data structs */
2061 N = NV_LENGTH_PT(x);
2062 nthreads = NV_NUM_THREADS_PT(x);
2063 threads = malloc(nthreads*sizeof(pthread_t));
2064 thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
2065
2066 /* set thread attributes */
2067 pthread_attr_init(&attr);
2068 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
2069
2070 for (i=0; i<nthreads; i++) {
2071 /* initialize thread data */
2072 N_VInitThreadData(&thread_data[i]);
2073
2074 /* compute start and end loop index for thread */
2075 N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
2076
2077 /* pack thread data */
2078 thread_data[i].v1 = NV_DATA_PT(x);
2079 thread_data[i].v2 = NV_DATA_PT(z);
2080 thread_data[i].global_val = &val;
2081
2082 /* create threads and call pthread companion function */
2083 pthread_create(&threads[i], &attr, N_VInvTest_PT, (void *) &thread_data[i]);
2084 }
2085
2086 /* wait for all threads to finish */
2087 for (i=0; i<nthreads; i++) {
2088 pthread_join(threads[i], NULL);
2089 }
2090
2091 /* clean up and return */
2092 pthread_attr_destroy(&attr);
2093 free(threads);
2094 free(thread_data);
2095
2096 if (val > ZERO)
2097 return (SUNFALSE);
2098 else
2099 return (SUNTRUE);
2100 }
2101
2102
2103 /* ----------------------------------------------------------------------------
2104 * Pthread companion function to N_VInvTest
2105 */
2106
N_VInvTest_PT(void * thread_data)2107 static void *N_VInvTest_PT(void *thread_data)
2108 {
2109 sunindextype i, start, end;
2110 realtype *xd, *zd;
2111 realtype local_val, *global_val;
2112 Pthreads_Data *my_data;
2113
2114 /* extract thread data */
2115 my_data = (Pthreads_Data *) thread_data;
2116
2117 xd = my_data->v1;
2118 zd = my_data->v2;
2119
2120 global_val = my_data->global_val;
2121
2122 start = my_data->start;
2123 end = my_data->end;
2124
2125 /* compute inverse with check for divide by ZERO */
2126 local_val = ZERO;
2127 for (i = start; i < end; i++) {
2128 if (xd[i] == ZERO)
2129 local_val = ONE;
2130 else
2131 zd[i] = ONE/xd[i];
2132 }
2133
2134 /* update global val */
2135 if (local_val > ZERO) {
2136 *global_val = local_val;
2137 }
2138
2139 /* exit */
2140 pthread_exit(NULL);
2141 }
2142
2143
2144 /* ----------------------------------------------------------------------------
2145 * Compute constraint mask of a vector
2146 */
2147
N_VConstrMask_Pthreads(N_Vector c,N_Vector x,N_Vector m)2148 booleantype N_VConstrMask_Pthreads(N_Vector c, N_Vector x, N_Vector m)
2149 {
2150 sunindextype N;
2151 int i, nthreads;
2152 pthread_t *threads;
2153 Pthreads_Data *thread_data;
2154 pthread_attr_t attr;
2155
2156 realtype val = ZERO;
2157
2158 /* allocate threads and thread data structs */
2159 N = NV_LENGTH_PT(x);
2160 nthreads = NV_NUM_THREADS_PT(x);
2161 threads = malloc(nthreads*sizeof(pthread_t));
2162 thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
2163
2164 /* set thread attributes */
2165 pthread_attr_init(&attr);
2166 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
2167
2168 for (i=0; i<nthreads; i++) {
2169 /* initialize thread data */
2170 N_VInitThreadData(&thread_data[i]);
2171
2172 /* compute start and end loop index for thread */
2173 N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
2174
2175 /* pack thread data */
2176 thread_data[i].v1 = NV_DATA_PT(c);
2177 thread_data[i].v2 = NV_DATA_PT(x);
2178 thread_data[i].v3 = NV_DATA_PT(m);
2179 thread_data[i].global_val = &val;
2180
2181 /* create threads and call pthread companion function */
2182 pthread_create(&threads[i], &attr, N_VConstrMask_PT, (void *) &thread_data[i]);
2183 }
2184
2185 /* wait for all threads to finish */
2186 for (i=0; i<nthreads; i++) {
2187 pthread_join(threads[i], NULL);
2188 }
2189
2190 /* clean up and return */
2191 pthread_attr_destroy(&attr);
2192 free(threads);
2193 free(thread_data);
2194
2195 if (val > ZERO)
2196 return(SUNFALSE);
2197 else
2198 return(SUNTRUE);
2199 }
2200
2201
2202 /* ----------------------------------------------------------------------------
2203 * Pthread companion function to N_VConstrMask
2204 */
2205
N_VConstrMask_PT(void * thread_data)2206 static void *N_VConstrMask_PT(void *thread_data)
2207 {
2208 sunindextype i, start, end;
2209 realtype *cd, *xd, *md;
2210 realtype local_val, *global_val;
2211 Pthreads_Data *my_data;
2212
2213 /* extract thread data */
2214 my_data = (Pthreads_Data *) thread_data;
2215
2216 cd = my_data->v1;
2217 xd = my_data->v2;
2218 md = my_data->v3;
2219
2220 global_val = my_data->global_val;
2221
2222 start = my_data->start;
2223 end = my_data->end;
2224
2225 /* compute constraint mask */
2226 local_val = ZERO;
2227 for (i = start; i < end; i++) {
2228 md[i] = ZERO;
2229
2230 /* Continue if no constraints were set for the variable */
2231 if (cd[i] == ZERO)
2232 continue;
2233
2234 /* Check if a set constraint has been violated */
2235 if ((SUNRabs(cd[i]) > ONEPT5 && xd[i]*cd[i] <= ZERO) ||
2236 (SUNRabs(cd[i]) > HALF && xd[i]*cd[i] < ZERO)) {
2237 local_val = md[i] = ONE;
2238 }
2239 }
2240
2241 /* update global val */
2242 if (local_val > ZERO) {
2243 *global_val = local_val;
2244 }
2245
2246 /* exit */
2247 pthread_exit(NULL);
2248 }
2249
2250
2251 /* ----------------------------------------------------------------------------
2252 * Compute minimum componentwise quotient
2253 */
2254
N_VMinQuotient_Pthreads(N_Vector num,N_Vector denom)2255 realtype N_VMinQuotient_Pthreads(N_Vector num, N_Vector denom)
2256 {
2257 sunindextype N;
2258 int i, nthreads;
2259 pthread_t *threads;
2260 Pthreads_Data *thread_data;
2261 pthread_attr_t attr;
2262 pthread_mutex_t global_mutex;
2263 realtype min = BIG_REAL;
2264
2265 /* allocate threads and thread data structs */
2266 N = NV_LENGTH_PT(num);
2267 nthreads = NV_NUM_THREADS_PT(num);
2268 threads = malloc(nthreads*sizeof(pthread_t));
2269 thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
2270
2271 /* set thread attributes */
2272 pthread_attr_init(&attr);
2273 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
2274
2275 /* lock for reduction */
2276 pthread_mutex_init(&global_mutex, NULL);
2277
2278 for (i=0; i<nthreads; i++) {
2279 /* initialize thread data */
2280 N_VInitThreadData(&thread_data[i]);
2281
2282 /* compute start and end loop index for thread */
2283 N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
2284
2285 /* pack thread data */
2286 thread_data[i].v1 = NV_DATA_PT(num);
2287 thread_data[i].v2 = NV_DATA_PT(denom);
2288 thread_data[i].global_val = &min;
2289 thread_data[i].global_mutex = &global_mutex;
2290
2291 /* create threads and call pthread companion function */
2292 pthread_create(&threads[i], &attr, N_VMinQuotient_PT, (void *) &thread_data[i]);
2293 }
2294
2295 /* wait for all threads to finish */
2296 for (i=0; i<nthreads; i++) {
2297 pthread_join(threads[i], NULL);
2298 }
2299
2300 /* clean up and return */
2301 pthread_attr_destroy(&attr);
2302 pthread_mutex_destroy(&global_mutex);
2303 free(threads);
2304 free(thread_data);
2305
2306 return(min);
2307 }
2308
2309
2310 /* ----------------------------------------------------------------------------
2311 * Pthread companion function to N_VConstrMask
2312 */
2313
N_VMinQuotient_PT(void * thread_data)2314 static void *N_VMinQuotient_PT(void *thread_data)
2315 {
2316 sunindextype i, start, end;
2317 realtype *nd, *dd;
2318 realtype local_min, *global_min;
2319 Pthreads_Data *my_data;
2320 pthread_mutex_t *global_mutex;
2321
2322 /* extract thread data */
2323 my_data = (Pthreads_Data *) thread_data;
2324
2325 nd = my_data->v1;
2326 dd = my_data->v2;
2327
2328 global_min = my_data->global_val;
2329 global_mutex = my_data->global_mutex;
2330
2331 start = my_data->start;
2332 end = my_data->end;
2333
2334 /* compute minimum quotient */
2335 local_min = BIG_REAL;
2336 for (i = start; i < end; i++) {
2337 if (dd[i] == ZERO)
2338 continue;
2339 local_min = SUNMIN(local_min, nd[i]/dd[i]);
2340 }
2341
2342 /* update global min */
2343 pthread_mutex_lock(global_mutex);
2344 if (local_min < *global_min)
2345 *global_min = local_min;
2346 pthread_mutex_unlock(global_mutex);
2347
2348 /* exit */
2349 pthread_exit(NULL);
2350 }
2351
2352
2353 /*
2354 * -----------------------------------------------------------------------------
2355 * fused vector operations
2356 * -----------------------------------------------------------------------------
2357 */
2358
2359
2360 /* -----------------------------------------------------------------------------
2361 * Compute the linear combination z = c[i]*X[i]
2362 */
2363
N_VLinearCombination_Pthreads(int nvec,realtype * c,N_Vector * X,N_Vector z)2364 int N_VLinearCombination_Pthreads(int nvec, realtype* c, N_Vector* X, N_Vector z)
2365 {
2366 sunindextype N;
2367 int i, nthreads;
2368 pthread_t *threads;
2369 Pthreads_Data *thread_data;
2370 pthread_attr_t attr;
2371
2372 /* invalid number of vectors */
2373 if (nvec < 1) return(-1);
2374
2375 /* should have called N_VScale */
2376 if (nvec == 1) {
2377 N_VScale_Pthreads(c[0], X[0], z);
2378 return(0);
2379 }
2380
2381 /* should have called N_VLinearSum */
2382 if (nvec == 2) {
2383 N_VLinearSum_Pthreads(c[0], X[0], c[1], X[1], z);
2384 return(0);
2385 }
2386
2387 /* get vector length and data array */
2388 N = NV_LENGTH_PT(z);
2389 nthreads = NV_NUM_THREADS_PT(z);
2390 threads = malloc(nthreads*sizeof(pthread_t));
2391 thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
2392
2393 /* set thread attributes */
2394 pthread_attr_init(&attr);
2395 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
2396
2397 for (i=0; i<nthreads; i++) {
2398 /* initialize thread data */
2399 N_VInitThreadData(&thread_data[i]);
2400
2401 /* compute start and end loop index for thread */
2402 N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
2403
2404 /* pack thread data */
2405 thread_data[i].nvec = nvec;
2406 thread_data[i].cvals = c;
2407 thread_data[i].Y1 = X;
2408 thread_data[i].x1 = z;
2409
2410 /* create threads and call pthread companion function */
2411 pthread_create(&threads[i], &attr, N_VLinearCombination_PT, (void *) &thread_data[i]);
2412 }
2413
2414 /* wait for all threads to finish */
2415 for (i=0; i<nthreads; i++) {
2416 pthread_join(threads[i], NULL);
2417 }
2418
2419 /* clean up and return */
2420 pthread_attr_destroy(&attr);
2421 free(threads);
2422 free(thread_data);
2423
2424 return(0);
2425 }
2426
2427
2428 /* -----------------------------------------------------------------------------
2429 * Pthread companion function to N_VLinearCombination
2430 */
2431
N_VLinearCombination_PT(void * thread_data)2432 static void *N_VLinearCombination_PT(void *thread_data)
2433 {
2434 Pthreads_Data *my_data;
2435 sunindextype j, start, end;
2436
2437 int i;
2438 realtype* c=NULL;
2439 realtype* xd=NULL;
2440 realtype* zd=NULL;
2441
2442 /* extract thread data */
2443 my_data = (Pthreads_Data *) thread_data;
2444
2445 start = my_data->start;
2446 end = my_data->end;
2447
2448 c = my_data->cvals;
2449 zd = NV_DATA_PT(my_data->x1);
2450
2451 /*
2452 * X[0] += c[i]*X[i], i = 1,...,nvec-1
2453 */
2454 if ((my_data->Y1[0] == my_data->x1) && (c[0] == ONE)) {
2455 for (i=1; i<my_data->nvec; i++) {
2456 xd = NV_DATA_PT(my_data->Y1[i]);
2457 for (j=start; j<end; j++) {
2458 zd[j] += c[i] * xd[j];
2459 }
2460 }
2461 pthread_exit(NULL);
2462 }
2463
2464 /*
2465 * X[0] = c[0] * X[0] + sum{ c[i] * X[i] }, i = 1,...,nvec-1
2466 */
2467 if (my_data->Y1[0] == my_data->x1) {
2468 for (j=start; j<end; j++) {
2469 zd[j] *= c[0];
2470 }
2471 for (i=1; i<my_data->nvec; i++) {
2472 xd = NV_DATA_PT(my_data->Y1[i]);
2473 for (j=start; j<end; j++) {
2474 zd[j] += c[i] * xd[j];
2475 }
2476 }
2477 pthread_exit(NULL);
2478 }
2479
2480 /*
2481 * z = sum{ c[i] * X[i] }, i = 0,...,nvec-1
2482 */
2483 xd = NV_DATA_PT(my_data->Y1[0]);
2484 for (j=start; j<end; j++) {
2485 zd[j] = c[0] * xd[j];
2486 }
2487 for (i=1; i<my_data->nvec; i++) {
2488 xd = NV_DATA_PT(my_data->Y1[i]);
2489 for (j=start; j<end; j++) {
2490 zd[j] += c[i] * xd[j];
2491 }
2492 }
2493 pthread_exit(NULL);
2494 }
2495
2496
2497 /* -----------------------------------------------------------------------------
2498 * Compute multiple linear sums Z[i] = Y[i] + a*x
2499 */
2500
N_VScaleAddMulti_Pthreads(int nvec,realtype * a,N_Vector x,N_Vector * Y,N_Vector * Z)2501 int N_VScaleAddMulti_Pthreads(int nvec, realtype* a, N_Vector x, N_Vector* Y, N_Vector* Z)
2502 {
2503 sunindextype N;
2504 int i, nthreads;
2505 pthread_t *threads;
2506 Pthreads_Data *thread_data;
2507 pthread_attr_t attr;
2508
2509 /* invalid number of vectors */
2510 if (nvec < 1) return(-1);
2511
2512 /* should have called N_VLinearSum */
2513 if (nvec == 1) {
2514 N_VLinearSum_Pthreads(a[0], x, ONE, Y[0], Z[0]);
2515 return(0);
2516 }
2517
2518 /* get vector length and data array */
2519 N = NV_LENGTH_PT(x);
2520 nthreads = NV_NUM_THREADS_PT(x);
2521 threads = malloc(nthreads*sizeof(pthread_t));
2522 thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
2523
2524 /* set thread attributes */
2525 pthread_attr_init(&attr);
2526 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
2527
2528 for (i=0; i<nthreads; i++) {
2529 /* initialize thread data */
2530 N_VInitThreadData(&thread_data[i]);
2531
2532 /* compute start and end loop index for thread */
2533 N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
2534
2535 /* pack thread data */
2536 thread_data[i].nvec = nvec;
2537 thread_data[i].cvals = a;
2538 thread_data[i].x1 = x;
2539 thread_data[i].Y1 = Y;
2540 thread_data[i].Y2 = Z;
2541
2542 /* create threads and call pthread companion function */
2543 pthread_create(&threads[i], &attr, N_VScaleAddMulti_PT, (void *) &thread_data[i]);
2544 }
2545
2546 /* wait for all threads to finish */
2547 for (i=0; i<nthreads; i++) {
2548 pthread_join(threads[i], NULL);
2549 }
2550
2551 /* clean up and return */
2552 pthread_attr_destroy(&attr);
2553 free(threads);
2554 free(thread_data);
2555
2556 return(0);
2557 }
2558
2559
2560 /* -----------------------------------------------------------------------------
2561 * Pthread companion function to N_VScaleAddMulti
2562 */
2563
N_VScaleAddMulti_PT(void * thread_data)2564 static void *N_VScaleAddMulti_PT(void *thread_data)
2565 {
2566 Pthreads_Data *my_data;
2567 sunindextype j, start, end;
2568
2569 int i;
2570 realtype* a=NULL;
2571 realtype* xd=NULL;
2572 realtype* yd=NULL;
2573 realtype* zd=NULL;
2574
2575 /* extract thread data */
2576 my_data = (Pthreads_Data *) thread_data;
2577
2578 start = my_data->start;
2579 end = my_data->end;
2580
2581 a = my_data->cvals;
2582 xd = NV_DATA_PT(my_data->x1);
2583
2584 /*
2585 * Y[i][j] += a[i] * x[j]
2586 */
2587 if (my_data->Y1 == my_data->Y2) {
2588 for (i=0; i<my_data->nvec; i++) {
2589 yd = NV_DATA_PT(my_data->Y1[i]);
2590 for (j=start; j<end; j++) {
2591 yd[j] += a[i] * xd[j];
2592 }
2593 }
2594 pthread_exit(NULL);
2595 }
2596
2597 /*
2598 * Z[i][j] = Y[i][j] + a[i] * x[j]
2599 */
2600 for (i=0; i<my_data->nvec; i++) {
2601 yd = NV_DATA_PT(my_data->Y1[i]);
2602 zd = NV_DATA_PT(my_data->Y2[i]);
2603 for (j=start; j<end; j++) {
2604 zd[j] = a[i] * xd[j] + yd[j];
2605 }
2606 }
2607 pthread_exit(NULL);
2608 }
2609
2610
2611 /* -----------------------------------------------------------------------------
2612 * Compute the dot product of a vector with multiple vectors, a[i] = sum(x*Y[i])
2613 */
2614
N_VDotProdMulti_Pthreads(int nvec,N_Vector x,N_Vector * Y,realtype * dotprods)2615 int N_VDotProdMulti_Pthreads(int nvec, N_Vector x, N_Vector* Y, realtype* dotprods)
2616 {
2617 sunindextype N;
2618 int i, nthreads;
2619 pthread_t *threads;
2620 Pthreads_Data *thread_data;
2621 pthread_attr_t attr;
2622 pthread_mutex_t global_mutex;
2623
2624 /* invalid number of vectors */
2625 if (nvec < 1) return(-1);
2626
2627 /* should have called N_VDotProd */
2628 if (nvec == 1) {
2629 dotprods[0] = N_VDotProd_Pthreads(x, Y[0]);
2630 return(0);
2631 }
2632
2633 /* initialize output array */
2634 for (i=0; i<nvec; i++)
2635 dotprods[i] = ZERO;
2636
2637 /* allocate threads and thread data structs */
2638 N = NV_LENGTH_PT(x);
2639 nthreads = NV_NUM_THREADS_PT(x);
2640 threads = malloc(nthreads*sizeof(pthread_t));
2641 thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
2642
2643 /* set thread attributes */
2644 pthread_attr_init(&attr);
2645 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
2646
2647 /* lock for reduction */
2648 pthread_mutex_init(&global_mutex, NULL);
2649
2650 for (i=0; i<nthreads; i++) {
2651 /* initialize thread data */
2652 N_VInitThreadData(&thread_data[i]);
2653
2654 /* compute start and end loop index for thread */
2655 N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
2656
2657 /* pack thread data */
2658 thread_data[i].nvec = nvec;
2659 thread_data[i].x1 = x;
2660 thread_data[i].Y1 = Y;
2661 thread_data[i].cvals = dotprods;
2662
2663 thread_data[i].global_mutex = &global_mutex;
2664
2665 /* create threads and call pthread companion function */
2666 pthread_create(&threads[i], &attr, N_VDotProdMulti_PT, (void *) &thread_data[i]);
2667 }
2668
2669 /* wait for all threads to finish */
2670 for (i=0; i<nthreads; i++) {
2671 pthread_join(threads[i], NULL);
2672 }
2673
2674 /* clean up and return */
2675 pthread_attr_destroy(&attr);
2676 pthread_mutex_destroy(&global_mutex);
2677 free(threads);
2678 free(thread_data);
2679
2680 return(0);
2681 }
2682
2683
2684 /* -----------------------------------------------------------------------------
2685 * Pthread companion function to N_VDotProdMulti
2686 */
2687
N_VDotProdMulti_PT(void * thread_data)2688 static void *N_VDotProdMulti_PT(void *thread_data)
2689 {
2690 Pthreads_Data *my_data;
2691 sunindextype j, start, end;
2692 pthread_mutex_t *lock;
2693
2694 int i;
2695 realtype sum;
2696 realtype* dotprods=NULL;
2697 realtype* xd=NULL;
2698 realtype* yd=NULL;
2699
2700 /* extract thread data */
2701 my_data = (Pthreads_Data *) thread_data;
2702
2703 start = my_data->start;
2704 end = my_data->end;
2705 lock = my_data->global_mutex;
2706
2707 xd = NV_DATA_PT(my_data->x1);
2708 dotprods = my_data->cvals;
2709
2710 /* compute multiple dot products */
2711 for (i=0; i<my_data->nvec; i++) {
2712 yd = NV_DATA_PT(my_data->Y1[i]);
2713 sum = ZERO;
2714 for (j=start; j<end; j++) {
2715 sum += xd[j] * yd[j];
2716 }
2717 /* update global sum */
2718 pthread_mutex_lock(lock);
2719 dotprods[i] += sum;
2720 pthread_mutex_unlock(lock);
2721 }
2722
2723 /* exit */
2724 pthread_exit(NULL);
2725 }
2726
2727
2728 /*
2729 * -----------------------------------------------------------------------------
2730 * vector array operations
2731 * -----------------------------------------------------------------------------
2732 */
2733
2734
2735 /* -----------------------------------------------------------------------------
2736 * Compute multiple linear sums Z[i] = a*X[i] + b*Y[i]
2737 */
2738
N_VLinearSumVectorArray_Pthreads(int nvec,realtype a,N_Vector * X,realtype b,N_Vector * Y,N_Vector * Z)2739 int N_VLinearSumVectorArray_Pthreads(int nvec, realtype a, N_Vector* X,
2740 realtype b, N_Vector* Y, N_Vector* Z)
2741 {
2742 sunindextype N;
2743 int i, nthreads;
2744 pthread_t *threads;
2745 Pthreads_Data *thread_data;
2746 pthread_attr_t attr;
2747
2748 realtype c;
2749 N_Vector* V1;
2750 N_Vector* V2;
2751 booleantype test;
2752
2753 /* invalid number of vectors */
2754 if (nvec < 1) return(-1);
2755
2756 /* should have called N_VLinearSum */
2757 if (nvec == 1) {
2758 N_VLinearSum_Pthreads(a, X[0], b, Y[0], Z[0]);
2759 return(0);
2760 }
2761
2762 /* BLAS usage: axpy y <- ax+y */
2763 if ((b == ONE) && (Z == Y))
2764 return(VaxpyVectorArray_Pthreads(nvec, a, X, Y));
2765
2766 /* BLAS usage: axpy x <- by+x */
2767 if ((a == ONE) && (Z == X))
2768 return(VaxpyVectorArray_Pthreads(nvec, b, Y, X));
2769
2770 /* Case: a == b == 1.0 */
2771 if ((a == ONE) && (b == ONE))
2772 return(VSumVectorArray_Pthreads(nvec, X, Y, Z));
2773
2774 /* Cases: */
2775 /* (1) a == 1.0, b = -1.0, */
2776 /* (2) a == -1.0, b == 1.0 */
2777 if ((test = ((a == ONE) && (b == -ONE))) || ((a == -ONE) && (b == ONE))) {
2778 V1 = test ? Y : X;
2779 V2 = test ? X : Y;
2780 return(VDiffVectorArray_Pthreads(nvec, V2, V1, Z));
2781 }
2782
2783 /* Cases: */
2784 /* (1) a == 1.0, b == other or 0.0, */
2785 /* (2) a == other or 0.0, b == 1.0 */
2786 /* if a or b is 0.0, then user should have called N_VScale */
2787 if ((test = (a == ONE)) || (b == ONE)) {
2788 c = test ? b : a;
2789 V1 = test ? Y : X;
2790 V2 = test ? X : Y;
2791 return(VLin1VectorArray_Pthreads(nvec, c, V1, V2, Z));
2792 }
2793
2794 /* Cases: */
2795 /* (1) a == -1.0, b != 1.0, */
2796 /* (2) a != 1.0, b == -1.0 */
2797 if ((test = (a == -ONE)) || (b == -ONE)) {
2798 c = test ? b : a;
2799 V1 = test ? Y : X;
2800 V2 = test ? X : Y;
2801 return(VLin2VectorArray_Pthreads(nvec, c, V1, V2, Z));
2802 }
2803
2804 /* Case: a == b */
2805 /* catches case both a and b are 0.0 - user should have called N_VConst */
2806 if (a == b)
2807 return(VScaleSumVectorArray_Pthreads(nvec, a, X, Y, Z));
2808
2809 /* Case: a == -b */
2810 if (a == -b)
2811 return(VScaleDiffVectorArray_Pthreads(nvec, a, X, Y, Z));
2812
2813 /* Do all cases not handled above: */
2814 /* (1) a == other, b == 0.0 - user should have called N_VScale */
2815 /* (2) a == 0.0, b == other - user should have called N_VScale */
2816 /* (3) a,b == other, a !=b, a != -b */
2817
2818 /* get vector length and data array */
2819 N = NV_LENGTH_PT(Z[0]);
2820 nthreads = NV_NUM_THREADS_PT(Z[0]);
2821 threads = malloc(nthreads*sizeof(pthread_t));
2822 thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
2823
2824 /* set thread attributes */
2825 pthread_attr_init(&attr);
2826 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
2827
2828 for (i=0; i<nthreads; i++) {
2829 /* initialize thread data */
2830 N_VInitThreadData(&thread_data[i]);
2831
2832 /* compute start and end loop index for thread */
2833 N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
2834
2835 /* pack thread data */
2836 thread_data[i].nvec = nvec;
2837 thread_data[i].c1 = a;
2838 thread_data[i].c2 = b;
2839 thread_data[i].Y1 = X;
2840 thread_data[i].Y2 = Y;
2841 thread_data[i].Y3 = Z;
2842
2843 /* create threads and call pthread companion function */
2844 pthread_create(&threads[i], &attr, N_VLinearSumVectorArray_PT,
2845 (void *) &thread_data[i]);
2846 }
2847
2848 /* wait for all threads to finish */
2849 for (i=0; i<nthreads; i++) {
2850 pthread_join(threads[i], NULL);
2851 }
2852
2853 /* clean up and return */
2854 pthread_attr_destroy(&attr);
2855 free(threads);
2856 free(thread_data);
2857
2858 return(0);
2859 }
2860
2861
2862 /* -----------------------------------------------------------------------------
2863 * Pthread companion function to N_VLinearSumVectorArray
2864 */
2865
N_VLinearSumVectorArray_PT(void * thread_data)2866 static void *N_VLinearSumVectorArray_PT(void *thread_data)
2867 {
2868 Pthreads_Data *my_data;
2869 sunindextype j, start, end;
2870
2871 int i;
2872 realtype a, b;
2873 realtype* xd=NULL;
2874 realtype* yd=NULL;
2875 realtype* zd=NULL;
2876
2877 /* extract thread data */
2878 my_data = (Pthreads_Data *) thread_data;
2879
2880 start = my_data->start;
2881 end = my_data->end;
2882
2883 a = my_data->c1;
2884 b = my_data->c2;
2885
2886 /* compute linear sum for each vector pair in vector arrays */
2887 for (i=0; i<my_data->nvec; i++) {
2888 xd = NV_DATA_PT(my_data->Y1[i]);
2889 yd = NV_DATA_PT(my_data->Y2[i]);
2890 zd = NV_DATA_PT(my_data->Y3[i]);
2891 for (j=start; j<end; j++) {
2892 zd[j] = a * xd[j] + b * yd[j];
2893 }
2894 }
2895
2896 /* exit */
2897 pthread_exit(NULL);
2898 }
2899
2900
2901 /* -----------------------------------------------------------------------------
2902 * Scale multiple vectors Z[i] = c[i]*X[i]
2903 */
2904
N_VScaleVectorArray_Pthreads(int nvec,realtype * c,N_Vector * X,N_Vector * Z)2905 int N_VScaleVectorArray_Pthreads(int nvec, realtype* c, N_Vector* X, N_Vector* Z)
2906 {
2907 sunindextype N;
2908 int i, nthreads;
2909 pthread_t *threads;
2910 Pthreads_Data *thread_data;
2911 pthread_attr_t attr;
2912
2913 /* invalid number of vectors */
2914 if (nvec < 1) return(-1);
2915
2916 /* should have called N_VScale */
2917 if (nvec == 1) {
2918 N_VScale_Pthreads(c[0], X[0], Z[0]);
2919 return(0);
2920 }
2921
2922 /* get vector length and data array */
2923 N = NV_LENGTH_PT(Z[0]);
2924 nthreads = NV_NUM_THREADS_PT(Z[0]);
2925 threads = malloc(nthreads*sizeof(pthread_t));
2926 thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
2927
2928 /* set thread attributes */
2929 pthread_attr_init(&attr);
2930 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
2931
2932 for (i=0; i<nthreads; i++) {
2933 /* initialize thread data */
2934 N_VInitThreadData(&thread_data[i]);
2935
2936 /* compute start and end loop index for thread */
2937 N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
2938
2939 /* pack thread data */
2940 thread_data[i].nvec = nvec;
2941 thread_data[i].cvals = c;
2942 thread_data[i].Y1 = X;
2943 thread_data[i].Y2 = Z;
2944
2945 /* create threads and call pthread companion function */
2946 pthread_create(&threads[i], &attr, N_VScaleVectorArray_PT,
2947 (void *) &thread_data[i]);
2948 }
2949
2950 /* wait for all threads to finish */
2951 for (i=0; i<nthreads; i++) {
2952 pthread_join(threads[i], NULL);
2953 }
2954
2955 /* clean up and return */
2956 pthread_attr_destroy(&attr);
2957 free(threads);
2958 free(thread_data);
2959
2960 return(0);
2961 }
2962
2963
2964 /* -----------------------------------------------------------------------------
2965 * Pthread companion function to N_VScaleVectorArray
2966 */
2967
N_VScaleVectorArray_PT(void * thread_data)2968 static void *N_VScaleVectorArray_PT(void *thread_data)
2969 {
2970 Pthreads_Data *my_data;
2971 sunindextype j, start, end;
2972
2973 int i;
2974 realtype* c;
2975 realtype* xd=NULL;
2976 realtype* zd=NULL;
2977
2978 /* extract thread data */
2979 my_data = (Pthreads_Data *) thread_data;
2980
2981 start = my_data->start;
2982 end = my_data->end;
2983
2984 c = my_data->cvals;
2985
2986 /*
2987 * X[i] *= c[i]
2988 */
2989 if (my_data->Y1 == my_data->Y2) {
2990 for (i=0; i<my_data->nvec; i++) {
2991 xd = NV_DATA_PT(my_data->Y1[i]);
2992 for (j=start; j<end; j++) {
2993 xd[j] *= c[i];
2994 }
2995 }
2996 pthread_exit(NULL);
2997 }
2998
2999 /*
3000 * Z[i] = c[i] * X[i]
3001 */
3002 for (i=0; i<my_data->nvec; i++) {
3003 xd = NV_DATA_PT(my_data->Y1[i]);
3004 zd = NV_DATA_PT(my_data->Y2[i]);
3005 for (j=start; j<end; j++) {
3006 zd[j] = c[i] * xd[j];
3007 }
3008 }
3009 pthread_exit(NULL);
3010 }
3011
3012
3013 /* -----------------------------------------------------------------------------
3014 * Set multiple vectors to a constant value Z[i] = c
3015 */
3016
N_VConstVectorArray_Pthreads(int nvec,realtype c,N_Vector * Z)3017 int N_VConstVectorArray_Pthreads(int nvec, realtype c, N_Vector* Z)
3018 {
3019 sunindextype N;
3020 int i, nthreads;
3021 pthread_t *threads;
3022 Pthreads_Data *thread_data;
3023 pthread_attr_t attr;
3024
3025 /* invalid number of vectors */
3026 if (nvec < 1) return(-1);
3027
3028 /* should have called N_VConst */
3029 if (nvec == 1) {
3030 N_VConst_Pthreads(c, Z[0]);
3031 return(0);
3032 }
3033
3034 /* get vector length and data array */
3035 N = NV_LENGTH_PT(Z[0]);
3036 nthreads = NV_NUM_THREADS_PT(Z[0]);
3037 threads = malloc(nthreads*sizeof(pthread_t));
3038 thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
3039
3040 /* set thread attributes */
3041 pthread_attr_init(&attr);
3042 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
3043
3044 for (i=0; i<nthreads; i++) {
3045 /* initialize thread data */
3046 N_VInitThreadData(&thread_data[i]);
3047
3048 /* compute start and end loop index for thread */
3049 N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
3050
3051 /* pack thread data */
3052 thread_data[i].nvec = nvec;
3053 thread_data[i].c1 = c;
3054 thread_data[i].Y1 = Z;
3055
3056 /* create threads and call pthread companion function */
3057 pthread_create(&threads[i], &attr, N_VConstVectorArray_PT,
3058 (void *) &thread_data[i]);
3059 }
3060
3061 /* wait for all threads to finish */
3062 for (i=0; i<nthreads; i++) {
3063 pthread_join(threads[i], NULL);
3064 }
3065
3066 /* clean up and return */
3067 pthread_attr_destroy(&attr);
3068 free(threads);
3069 free(thread_data);
3070
3071 return(0);
3072 }
3073
3074
3075 /* -----------------------------------------------------------------------------
3076 * Pthread companion function to N_VConstVectorArray
3077 */
3078
N_VConstVectorArray_PT(void * thread_data)3079 static void *N_VConstVectorArray_PT(void *thread_data)
3080 {
3081 Pthreads_Data *my_data;
3082 sunindextype j, start, end;
3083
3084 int i;
3085 realtype* zd=NULL;
3086
3087 /* extract thread data */
3088 my_data = (Pthreads_Data *) thread_data;
3089
3090 start = my_data->start;
3091 end = my_data->end;
3092
3093 /* set each vector in the vector array to a constant */
3094 for (i=0; i<my_data->nvec; i++) {
3095 zd = NV_DATA_PT(my_data->Y1[i]);
3096 for (j=start; j<end; j++) {
3097 zd[j] = my_data->c1;
3098 }
3099 }
3100
3101 /* exit */
3102 pthread_exit(NULL);
3103 }
3104
3105
3106 /* ----------------------------------------------------------------------------
3107 * Compute the weighted root mean square norm of multiple vectors
3108 */
3109
N_VWrmsNormVectorArray_Pthreads(int nvec,N_Vector * X,N_Vector * W,realtype * nrm)3110 int N_VWrmsNormVectorArray_Pthreads(int nvec, N_Vector* X, N_Vector* W, realtype* nrm)
3111 {
3112 sunindextype N;
3113 int i, nthreads;
3114 pthread_t *threads;
3115 Pthreads_Data *thread_data;
3116 pthread_attr_t attr;
3117 pthread_mutex_t global_mutex;
3118
3119 /* invalid number of vectors */
3120 if (nvec < 1) return(-1);
3121
3122 /* should have called N_VWrmsNorm */
3123 if (nvec == 1) {
3124 nrm[0] = N_VWrmsNorm_Pthreads(X[0], W[0]);
3125 return(0);
3126 }
3127
3128 /* initialize output array */
3129 for (i=0; i<nvec; i++)
3130 nrm[i] = ZERO;
3131
3132 /* allocate threads and thread data structs */
3133 N = NV_LENGTH_PT(X[0]);
3134 nthreads = NV_NUM_THREADS_PT(X[0]);
3135 threads = malloc(nthreads*sizeof(pthread_t));
3136 thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
3137
3138 /* set thread attributes */
3139 pthread_attr_init(&attr);
3140 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
3141
3142 /* lock for reduction */
3143 pthread_mutex_init(&global_mutex, NULL);
3144
3145 for (i=0; i<nthreads; i++) {
3146 /* initialize thread data */
3147 N_VInitThreadData(&thread_data[i]);
3148
3149 /* compute start and end loop index for thread */
3150 N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
3151
3152 /* pack thread data */
3153 thread_data[i].nvec = nvec;
3154 thread_data[i].Y1 = X;
3155 thread_data[i].Y2 = W;
3156 thread_data[i].cvals = nrm;
3157
3158 thread_data[i].global_mutex = &global_mutex;
3159
3160 /* create threads and call pthread companion function */
3161 pthread_create(&threads[i], &attr, N_VWrmsNormVectorArray_PT,
3162 (void *) &thread_data[i]);
3163 }
3164
3165 /* wait for all threads to finish */
3166 for (i=0; i<nthreads; i++) {
3167 pthread_join(threads[i], NULL);
3168 }
3169
3170 /* finalize wrms calculation */
3171 for (i=0; i<nvec; i++)
3172 nrm[i] = SUNRsqrt(nrm[i]/N);
3173
3174 /* clean up and return */
3175 pthread_attr_destroy(&attr);
3176 pthread_mutex_destroy(&global_mutex);
3177 free(threads);
3178 free(thread_data);
3179
3180 return(0);
3181 }
3182
3183
3184 /* ----------------------------------------------------------------------------
3185 * Pthread companion function to N_VWrmsNormVectorArray
3186 */
3187
N_VWrmsNormVectorArray_PT(void * thread_data)3188 static void *N_VWrmsNormVectorArray_PT(void *thread_data)
3189 {
3190 Pthreads_Data *my_data;
3191 sunindextype j, start, end;
3192 pthread_mutex_t *lock;
3193
3194 int i;
3195 realtype sum;
3196 realtype* nrm=NULL;
3197 realtype* xd=NULL;
3198 realtype* wd=NULL;
3199
3200 /* extract thread data */
3201 my_data = (Pthreads_Data *) thread_data;
3202
3203 start = my_data->start;
3204 end = my_data->end;
3205 lock = my_data->global_mutex;
3206
3207 nrm = my_data->cvals;
3208
3209 /* compute the WRMS norm for each vector in the vector array */
3210 for (i=0; i<my_data->nvec; i++) {
3211 xd = NV_DATA_PT(my_data->Y1[i]);
3212 wd = NV_DATA_PT(my_data->Y2[i]);
3213 sum = ZERO;
3214 for (j=start; j<end; j++) {
3215 sum += SUNSQR(xd[j] * wd[j]);
3216 }
3217 /* update global sum */
3218 pthread_mutex_lock(lock);
3219 nrm[i] += sum;
3220 pthread_mutex_unlock(lock);
3221 }
3222
3223 /* exit */
3224 pthread_exit(NULL);
3225 }
3226
3227
3228 /* ----------------------------------------------------------------------------
3229 * Compute the weighted root mean square norm of multiple vectors
3230 */
3231
N_VWrmsNormMaskVectorArray_Pthreads(int nvec,N_Vector * X,N_Vector * W,N_Vector id,realtype * nrm)3232 int N_VWrmsNormMaskVectorArray_Pthreads(int nvec, N_Vector* X, N_Vector* W,
3233 N_Vector id, realtype* nrm)
3234 {
3235 sunindextype N;
3236 int i, nthreads;
3237 pthread_t *threads;
3238 Pthreads_Data *thread_data;
3239 pthread_attr_t attr;
3240 pthread_mutex_t global_mutex;
3241
3242 /* invalid number of vectors */
3243 if (nvec < 1) return(-1);
3244
3245 /* should have called N_VWrmsNorm */
3246 if (nvec == 1) {
3247 nrm[0] = N_VWrmsNormMask_Pthreads(X[0], W[0], id);
3248 return(0);
3249 }
3250
3251 /* initialize output array */
3252 for (i=0; i<nvec; i++)
3253 nrm[i] = ZERO;
3254
3255 /* allocate threads and thread data structs */
3256 N = NV_LENGTH_PT(X[0]);
3257 nthreads = NV_NUM_THREADS_PT(X[0]);
3258 threads = malloc(nthreads*sizeof(pthread_t));
3259 thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
3260
3261 /* set thread attributes */
3262 pthread_attr_init(&attr);
3263 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
3264
3265 /* lock for reduction */
3266 pthread_mutex_init(&global_mutex, NULL);
3267
3268 for (i=0; i<nthreads; i++) {
3269 /* initialize thread data */
3270 N_VInitThreadData(&thread_data[i]);
3271
3272 /* compute start and end loop index for thread */
3273 N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
3274
3275 /* pack thread data */
3276 thread_data[i].nvec = nvec;
3277 thread_data[i].Y1 = X;
3278 thread_data[i].Y2 = W;
3279 thread_data[i].x1 = id;
3280 thread_data[i].cvals = nrm;
3281
3282 thread_data[i].global_mutex = &global_mutex;
3283
3284 /* create threads and call pthread companion function */
3285 pthread_create(&threads[i], &attr, N_VWrmsNormMaskVectorArray_PT,
3286 (void *) &thread_data[i]);
3287 }
3288
3289 /* wait for all threads to finish */
3290 for (i=0; i<nthreads; i++) {
3291 pthread_join(threads[i], NULL);
3292 }
3293
3294 /* finalize wrms calculation */
3295 for (i=0; i<nvec; i++)
3296 nrm[i] = SUNRsqrt(nrm[i]/N);
3297
3298 /* clean up and return */
3299 pthread_attr_destroy(&attr);
3300 pthread_mutex_destroy(&global_mutex);
3301 free(threads);
3302 free(thread_data);
3303
3304 return(0);
3305 }
3306
3307
3308 /* ----------------------------------------------------------------------------
3309 * Pthread companion function to N_VWrmsNormVectorArray
3310 */
3311
N_VWrmsNormMaskVectorArray_PT(void * thread_data)3312 static void *N_VWrmsNormMaskVectorArray_PT(void *thread_data)
3313 {
3314 Pthreads_Data *my_data;
3315 sunindextype j, start, end;
3316 pthread_mutex_t *lock;
3317
3318 int i;
3319 realtype sum;
3320 realtype* nrm=NULL;
3321 realtype* xd=NULL;
3322 realtype* wd=NULL;
3323 realtype* idd=NULL;
3324
3325 /* extract thread data */
3326 my_data = (Pthreads_Data *) thread_data;
3327
3328 start = my_data->start;
3329 end = my_data->end;
3330 lock = my_data->global_mutex;
3331
3332 nrm = my_data->cvals;
3333 idd = NV_DATA_PT(my_data->x1);
3334
3335 /* compute the WRMS norm for each vector in the vector array */
3336 for (i=0; i<my_data->nvec; i++) {
3337 xd = NV_DATA_PT(my_data->Y1[i]);
3338 wd = NV_DATA_PT(my_data->Y2[i]);
3339 sum = ZERO;
3340 for (j=start; j<end; j++) {
3341 if (idd[j] > ZERO)
3342 sum += SUNSQR(xd[j] * wd[j]);
3343 }
3344 /* update global sum */
3345 pthread_mutex_lock(lock);
3346 nrm[i] += sum;
3347 pthread_mutex_unlock(lock);
3348 }
3349
3350 /* exit */
3351 pthread_exit(NULL);
3352 }
3353
3354
3355 /* -----------------------------------------------------------------------------
3356 * Scale and add a vector to multiple vectors Z = Y + a*X
3357 */
3358
N_VScaleAddMultiVectorArray_Pthreads(int nvec,int nsum,realtype * a,N_Vector * X,N_Vector ** Y,N_Vector ** Z)3359 int N_VScaleAddMultiVectorArray_Pthreads(int nvec, int nsum, realtype* a,
3360 N_Vector* X, N_Vector** Y, N_Vector** Z)
3361 {
3362 sunindextype N;
3363 int i, j, nthreads;
3364 pthread_t *threads;
3365 Pthreads_Data *thread_data;
3366 pthread_attr_t attr;
3367
3368 int retval;
3369 N_Vector* YY;
3370 N_Vector* ZZ;
3371
3372 /* invalid number of vectors */
3373 if (nvec < 1) return(-1);
3374 if (nsum < 1) return(-1);
3375
3376 /* ---------------------------
3377 * Special cases for nvec == 1
3378 * --------------------------- */
3379
3380 if (nvec == 1) {
3381
3382 /* should have called N_VLinearSum */
3383 if (nsum == 1) {
3384 N_VLinearSum_Pthreads(a[0], X[0], ONE, Y[0][0], Z[0][0]);
3385 return(0);
3386 }
3387
3388 /* should have called N_VScaleAddMulti */
3389 YY = (N_Vector*) malloc(nsum * sizeof(N_Vector));
3390 ZZ = (N_Vector*) malloc(nsum * sizeof(N_Vector));
3391
3392 for (j=0; j<nsum; j++) {
3393 YY[j] = Y[j][0];
3394 ZZ[j] = Z[j][0];
3395 }
3396
3397 retval = N_VScaleAddMulti_Pthreads(nsum, a, X[0], YY, ZZ);
3398
3399 free(YY);
3400 free(ZZ);
3401 return(retval);
3402 }
3403
3404 /* --------------------------
3405 * Special cases for nvec > 1
3406 * -------------------------- */
3407
3408 /* should have called N_VLinearSumVectorArray */
3409 if (nsum == 1) {
3410 retval = N_VLinearSumVectorArray_Pthreads(nvec, a[0], X, ONE, Y[0], Z[0]);
3411 return(retval);
3412 }
3413
3414 /* ----------------------------
3415 * Compute multiple linear sums
3416 * ---------------------------- */
3417
3418 /* get vector length and data array */
3419 N = NV_LENGTH_PT(X[0]);
3420 nthreads = NV_NUM_THREADS_PT(X[0]);
3421 threads = malloc(nthreads*sizeof(pthread_t));
3422 thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
3423
3424 /* set thread attributes */
3425 pthread_attr_init(&attr);
3426 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
3427
3428 for (i=0; i<nthreads; i++) {
3429 /* initialize thread data */
3430 N_VInitThreadData(&thread_data[i]);
3431
3432 /* compute start and end loop index for thread */
3433 N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
3434
3435 /* pack thread data */
3436 thread_data[i].nvec = nvec;
3437 thread_data[i].nsum = nsum;
3438 thread_data[i].cvals = a;
3439 thread_data[i].Y1 = X;
3440 thread_data[i].ZZ1 = Y;
3441 thread_data[i].ZZ2 = Z;
3442
3443 /* create threads and call pthread companion function */
3444 pthread_create(&threads[i], &attr, N_VScaleAddMultiVectorArray_PT,
3445 (void *) &thread_data[i]);
3446 }
3447
3448 /* wait for all threads to finish */
3449 for (i=0; i<nthreads; i++) {
3450 pthread_join(threads[i], NULL);
3451 }
3452
3453 /* clean up and return */
3454 pthread_attr_destroy(&attr);
3455 free(threads);
3456 free(thread_data);
3457
3458 return(0);
3459 }
3460
3461
3462 /* -----------------------------------------------------------------------------
3463 * Pthread companion function to N_VScaleAddMultiVectorArray
3464 */
3465
N_VScaleAddMultiVectorArray_PT(void * thread_data)3466 static void *N_VScaleAddMultiVectorArray_PT(void *thread_data)
3467 {
3468 Pthreads_Data *my_data;
3469 sunindextype k, start, end;
3470
3471 int i, j;
3472 realtype* a=NULL;
3473 realtype* xd=NULL;
3474 realtype* yd=NULL;
3475 realtype* zd=NULL;
3476
3477 /* extract thread data */
3478 my_data = (Pthreads_Data *) thread_data;
3479
3480 start = my_data->start;
3481 end = my_data->end;
3482
3483 a = my_data->cvals;
3484
3485 /*
3486 * Y[i][j] += a[i] * x[j]
3487 */
3488 if (my_data->ZZ1 == my_data->ZZ2) {
3489 for (i=0; i<my_data->nvec; i++) {
3490 xd = NV_DATA_PT(my_data->Y1[i]);
3491 for (j=0; j<my_data->nsum; j++){
3492 yd = NV_DATA_PT(my_data->ZZ1[j][i]);
3493 for (k=start; k<end; k++) {
3494 yd[k] += a[j] * xd[k];
3495 }
3496 }
3497 }
3498 pthread_exit(NULL);
3499 }
3500
3501 /*
3502 * Z[i][j] = Y[i][j] + a[i] * x[j]
3503 */
3504 for (i=0; i<my_data->nvec; i++) {
3505 xd = NV_DATA_PT(my_data->Y1[i]);
3506 for (j=0; j<my_data->nsum; j++){
3507 yd = NV_DATA_PT(my_data->ZZ1[j][i]);
3508 zd = NV_DATA_PT(my_data->ZZ2[j][i]);
3509 for (k=start; k<end; k++) {
3510 zd[k] = a[j] * xd[k] + yd[k];
3511 }
3512 }
3513 }
3514 pthread_exit(NULL);
3515 }
3516
3517
3518 /* -----------------------------------------------------------------------------
3519 * Compute a linear combination for multiple vectors
3520 */
3521
N_VLinearCombinationVectorArray_Pthreads(int nvec,int nsum,realtype * c,N_Vector ** X,N_Vector * Z)3522 int N_VLinearCombinationVectorArray_Pthreads(int nvec, int nsum, realtype* c,
3523 N_Vector** X, N_Vector* Z)
3524 {
3525 sunindextype N;
3526 int i, j, nthreads;
3527 pthread_t *threads;
3528 Pthreads_Data *thread_data;
3529 pthread_attr_t attr;
3530
3531 int retval;
3532 realtype* ctmp;
3533 N_Vector* Y;
3534
3535 /* invalid number of vectors */
3536 if (nvec < 1) return(-1);
3537 if (nsum < 1) return(-1);
3538
3539 /* ---------------------------
3540 * Special cases for nvec == 1
3541 * --------------------------- */
3542
3543 if (nvec == 1) {
3544
3545 /* should have called N_VScale */
3546 if (nsum == 1) {
3547 N_VScale_Pthreads(c[0], X[0][0], Z[0]);
3548 return(0);
3549 }
3550
3551 /* should have called N_VLinearSum */
3552 if (nsum == 2) {
3553 N_VLinearSum_Pthreads(c[0], X[0][0], c[1], X[1][0], Z[0]);
3554 return(0);
3555 }
3556
3557 /* should have called N_VLinearCombination */
3558 Y = (N_Vector*) malloc(nsum * sizeof(N_Vector));
3559
3560 for (i=0; i<nsum; i++) {
3561 Y[i] = X[i][0];
3562 }
3563
3564 retval = N_VLinearCombination_Pthreads(nsum, c, Y, Z[0]);
3565
3566 free(Y);
3567 return(retval);
3568 }
3569
3570 /* --------------------------
3571 * Special cases for nvec > 1
3572 * -------------------------- */
3573
3574 /* should have called N_VScaleVectorArray */
3575 if (nsum == 1) {
3576
3577 ctmp = (realtype*) malloc(nvec * sizeof(realtype));
3578
3579 for (j=0; j<nvec; j++) {
3580 ctmp[j] = c[0];
3581 }
3582
3583 retval = N_VScaleVectorArray_Pthreads(nvec, ctmp, X[0], Z);
3584
3585 free(ctmp);
3586 return(retval);
3587 }
3588
3589 /* should have called N_VLinearSumVectorArray */
3590 if (nsum == 2) {
3591 retval = N_VLinearSumVectorArray_Pthreads(nvec, c[0], X[0], c[1], X[1], Z);
3592 return(retval);
3593 }
3594
3595 /* --------------------------
3596 * Compute linear combination
3597 * -------------------------- */
3598
3599 /* get vector length and data array */
3600 N = NV_LENGTH_PT(Z[0]);
3601 nthreads = NV_NUM_THREADS_PT(Z[0]);
3602 threads = malloc(nthreads*sizeof(pthread_t));
3603 thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
3604
3605 /* set thread attributes */
3606 pthread_attr_init(&attr);
3607 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
3608
3609 for (i=0; i<nthreads; i++) {
3610 /* initialize thread data */
3611 N_VInitThreadData(&thread_data[i]);
3612
3613 /* compute start and end loop index for thread */
3614 N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
3615
3616 /* pack thread data */
3617 thread_data[i].nvec = nvec;
3618 thread_data[i].nsum = nsum;
3619 thread_data[i].cvals = c;
3620 thread_data[i].ZZ1 = X;
3621 thread_data[i].Y1 = Z;
3622
3623 /* create threads and call pthread companion function */
3624 pthread_create(&threads[i], &attr, N_VLinearCombinationVectorArray_PT,
3625 (void *) &thread_data[i]);
3626 }
3627
3628 /* wait for all threads to finish */
3629 for (i=0; i<nthreads; i++) {
3630 pthread_join(threads[i], NULL);
3631 }
3632
3633 /* clean up and return */
3634 pthread_attr_destroy(&attr);
3635 free(threads);
3636 free(thread_data);
3637
3638 return(0);
3639 }
3640
3641
3642 /* -----------------------------------------------------------------------------
3643 * Pthread companion function to N_VLinearCombinationVectorArray
3644 */
3645
N_VLinearCombinationVectorArray_PT(void * thread_data)3646 static void *N_VLinearCombinationVectorArray_PT(void *thread_data)
3647 {
3648 Pthreads_Data *my_data;
3649 sunindextype k, start, end;
3650
3651 int i; /* vector arrays index in summation [0,nsum) */
3652 int j; /* vector index in vector array [0,nvec) */
3653 realtype* c=NULL;
3654 realtype* zd=NULL;
3655 realtype* xd=NULL;
3656
3657 /* extract thread data */
3658 my_data = (Pthreads_Data *) thread_data;
3659
3660 start = my_data->start;
3661 end = my_data->end;
3662
3663 c = my_data->cvals;
3664
3665 /*
3666 * X[0][j] += c[i]*X[i][j], i = 1,...,nvec-1
3667 */
3668 if ((my_data->ZZ1[0] == my_data->Y1) && (c[0] == ONE)) {
3669 for (j=0; j<my_data->nvec; j++) {
3670 zd = NV_DATA_PT(my_data->Y1[j]);
3671 for (i=1; i<my_data->nsum; i++) {
3672 xd = NV_DATA_PT(my_data->ZZ1[i][j]);
3673 for (k=start; k<end; k++) {
3674 zd[k] += c[i] * xd[k];
3675 }
3676 }
3677 }
3678 pthread_exit(NULL);
3679 }
3680
3681 /*
3682 * X[0][j] = c[0] * X[0][j] + sum{ c[i] * X[i][j] }, i = 1,...,nvec-1
3683 */
3684 if (my_data->ZZ1[0] == my_data->Y1) {
3685 for (j=0; j<my_data->nvec; j++) {
3686 zd = NV_DATA_PT(my_data->Y1[j]);
3687 for (k=start; k<end; k++) {
3688 zd[k] *= c[0];
3689 }
3690 for (i=1; i<my_data->nsum; i++) {
3691 xd = NV_DATA_PT(my_data->ZZ1[i][j]);
3692 for (k=start; k<end; k++) {
3693 zd[k] += c[i] * xd[k];
3694 }
3695 }
3696 }
3697 pthread_exit(NULL);
3698 }
3699
3700 /*
3701 * Z[j] = sum{ c[i] * X[i][j] }, i = 0,...,nvec-1
3702 */
3703 for (j=0; j<my_data->nvec; j++) {
3704 xd = NV_DATA_PT(my_data->ZZ1[0][j]);
3705 zd = NV_DATA_PT(my_data->Y1[j]);
3706 for (k=start; k<end; k++) {
3707 zd[k] = c[0] * xd[k];
3708 }
3709 for (i=1; i<my_data->nsum; i++) {
3710 xd = NV_DATA_PT(my_data->ZZ1[i][j]);
3711 for (k=start; k<end; k++) {
3712 zd[k] += c[i] * xd[k];
3713 }
3714 }
3715 }
3716 pthread_exit(NULL);
3717 }
3718
3719
3720 /*
3721 * -----------------------------------------------------------------
3722 * OPTIONAL XBraid interface operations
3723 * -----------------------------------------------------------------
3724 */
3725
3726
3727
3728 /* -----------------------------------------------------------------------------
3729 * Set buffer size
3730 */
3731
N_VBufSize_Pthreads(N_Vector x,sunindextype * size)3732 int N_VBufSize_Pthreads(N_Vector x, sunindextype *size)
3733 {
3734 if (x == NULL) return(-1);
3735 *size = NV_LENGTH_PT(x) * ((sunindextype)sizeof(realtype));
3736 return(0);
3737 }
3738
3739
3740 /* -----------------------------------------------------------------------------
3741 * Pack butter
3742 */
3743
N_VBufPack_Pthreads(N_Vector x,void * buf)3744 int N_VBufPack_Pthreads(N_Vector x, void *buf)
3745 {
3746 sunindextype N;
3747 int i, nthreads;
3748 pthread_t *threads;
3749 Pthreads_Data *thread_data;
3750 pthread_attr_t attr;
3751
3752 if (x == NULL || buf == NULL) return(-1);
3753
3754 /* allocate threads and thread data structs */
3755 N = NV_LENGTH_PT(x);
3756 nthreads = NV_NUM_THREADS_PT(x);
3757 threads = malloc(nthreads*sizeof(pthread_t));
3758 thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
3759
3760 /* set thread attributes */
3761 pthread_attr_init(&attr);
3762 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
3763
3764 for (i=0; i<nthreads; i++) {
3765 /* initialize thread data */
3766 N_VInitThreadData(&thread_data[i]);
3767
3768 /* compute start and end loop index for thread */
3769 N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
3770
3771 /* pack thread data */
3772 thread_data[i].v1 = NV_DATA_PT(x);
3773 thread_data[i].v2 = (realtype*)buf;
3774
3775 /* create threads and call pthread companion function */
3776 pthread_create(&threads[i], &attr, VBufPack_PT, (void *) &thread_data[i]);
3777 }
3778
3779 /* wait for all threads to finish */
3780 for (i=0; i<nthreads; i++) {
3781 pthread_join(threads[i], NULL);
3782 }
3783
3784 /* clean up */
3785 pthread_attr_destroy(&attr);
3786 free(threads);
3787 free(thread_data);
3788
3789 return(0);
3790 }
3791
3792
3793 /* -----------------------------------------------------------------------------
3794 * Pthread companion function to N_VBufPack
3795 */
3796
3797
VBufPack_PT(void * thread_data)3798 static void *VBufPack_PT(void *thread_data)
3799 {
3800 sunindextype i, start, end;
3801 realtype *xd, *bd;
3802 Pthreads_Data *my_data;
3803
3804 /* extract thread data */
3805 my_data = (Pthreads_Data *) thread_data;
3806
3807 xd = my_data->v1;
3808 bd = my_data->v2;
3809
3810 start = my_data->start;
3811 end = my_data->end;
3812
3813 /* pack the buffer */
3814 for (i = start; i < end; i++)
3815 bd[i] = xd[i];
3816
3817 /* exit */
3818 pthread_exit(NULL);
3819 }
3820
3821
3822 /* -----------------------------------------------------------------------------
3823 * Unpack butter
3824 */
3825
N_VBufUnpack_Pthreads(N_Vector x,void * buf)3826 int N_VBufUnpack_Pthreads(N_Vector x, void *buf)
3827 {
3828 sunindextype N;
3829 int i, nthreads;
3830 pthread_t *threads;
3831 Pthreads_Data *thread_data;
3832 pthread_attr_t attr;
3833
3834 if (x == NULL || buf == NULL) return(-1);
3835
3836 /* allocate threads and thread data structs */
3837 N = NV_LENGTH_PT(x);
3838 nthreads = NV_NUM_THREADS_PT(x);
3839 threads = malloc(nthreads*sizeof(pthread_t));
3840 thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
3841
3842 /* set thread attributes */
3843 pthread_attr_init(&attr);
3844 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
3845
3846 for (i=0; i<nthreads; i++) {
3847 /* initialize thread data */
3848 N_VInitThreadData(&thread_data[i]);
3849
3850 /* compute start and end loop index for thread */
3851 N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
3852
3853 /* pack thread data */
3854 thread_data[i].v1 = NV_DATA_PT(x);
3855 thread_data[i].v2 = (realtype*)buf;
3856
3857 /* create threads and call pthread companion function */
3858 pthread_create(&threads[i], &attr, VBufUnpack_PT, (void *) &thread_data[i]);
3859 }
3860
3861 /* wait for all threads to finish */
3862 for (i=0; i<nthreads; i++) {
3863 pthread_join(threads[i], NULL);
3864 }
3865
3866 /* clean up */
3867 pthread_attr_destroy(&attr);
3868 free(threads);
3869 free(thread_data);
3870
3871 return(0);
3872 }
3873
3874
3875 /* -----------------------------------------------------------------------------
3876 * Pthread companion function to N_VBufUnpack
3877 */
3878
3879
VBufUnpack_PT(void * thread_data)3880 static void *VBufUnpack_PT(void *thread_data)
3881 {
3882 sunindextype i, start, end;
3883 realtype *xd, *bd;
3884 Pthreads_Data *my_data;
3885
3886 /* extract thread data */
3887 my_data = (Pthreads_Data *) thread_data;
3888
3889 xd = my_data->v1;
3890 bd = my_data->v2;
3891
3892 start = my_data->start;
3893 end = my_data->end;
3894
3895 /* unpack the buffer */
3896 for (i = start; i < end; i++)
3897 xd[i] = bd[i];
3898
3899 /* exit */
3900 pthread_exit(NULL);
3901 }
3902
3903
3904 /*
3905 * -----------------------------------------------------------------
3906 * private functions for special cases of vector operations
3907 * -----------------------------------------------------------------
3908 */
3909
3910
3911 /* ----------------------------------------------------------------------------
3912 * Copy vector components into second vector
3913 */
3914
VCopy_Pthreads(N_Vector x,N_Vector z)3915 static void VCopy_Pthreads(N_Vector x, N_Vector z)
3916 {
3917 sunindextype N;
3918 int i, nthreads;
3919 pthread_t *threads;
3920 Pthreads_Data *thread_data;
3921 pthread_attr_t attr;
3922
3923 /* allocate threads and thread data structs */
3924 N = NV_LENGTH_PT(x);
3925 nthreads = NV_NUM_THREADS_PT(x);
3926 threads = malloc(nthreads*sizeof(pthread_t));
3927 thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
3928
3929 /* set thread attributes */
3930 pthread_attr_init(&attr);
3931 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
3932
3933 for (i=0; i<nthreads; i++) {
3934 /* initialize thread data */
3935 N_VInitThreadData(&thread_data[i]);
3936
3937 /* compute start and end loop index for thread */
3938 N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
3939
3940 /* pack thread data */
3941 thread_data[i].v1 = NV_DATA_PT(x);
3942 thread_data[i].v2 = NV_DATA_PT(z);
3943
3944 /* create threads and call pthread companion function */
3945 pthread_create(&threads[i], &attr, VCopy_PT, (void *) &thread_data[i]);
3946 }
3947
3948 /* wait for all threads to finish */
3949 for (i=0; i<nthreads; i++) {
3950 pthread_join(threads[i], NULL);
3951 }
3952
3953 /* clean up and return */
3954 pthread_attr_destroy(&attr);
3955 free(threads);
3956 free(thread_data);
3957
3958 return;
3959 }
3960
3961
3962 /* ----------------------------------------------------------------------------
3963 * Pthread companion function to VCopy
3964 */
3965
VCopy_PT(void * thread_data)3966 static void *VCopy_PT(void *thread_data)
3967 {
3968 sunindextype i, start, end;
3969 realtype *xd, *zd;
3970 Pthreads_Data *my_data;
3971
3972 /* extract thread data */
3973 my_data = (Pthreads_Data *) thread_data;
3974
3975 xd = my_data->v1;
3976 zd = my_data->v2;
3977
3978 start = my_data->start;
3979 end = my_data->end;
3980
3981 /* copy vector components */
3982 for (i = start; i < end; i++)
3983 zd[i] = xd[i];
3984
3985 /* exit */
3986 pthread_exit(NULL);
3987 }
3988
3989
3990 /* ----------------------------------------------------------------------------
3991 * Compute vector sum
3992 */
3993
VSum_Pthreads(N_Vector x,N_Vector y,N_Vector z)3994 static void VSum_Pthreads(N_Vector x, N_Vector y, N_Vector z)
3995 {
3996 sunindextype N;
3997 int i, nthreads;
3998 pthread_t *threads;
3999 Pthreads_Data *thread_data;
4000 pthread_attr_t attr;
4001
4002 /* allocate threads and thread data structs */
4003 N = NV_LENGTH_PT(x);
4004 nthreads = NV_NUM_THREADS_PT(x);
4005 threads = malloc(nthreads*sizeof(pthread_t));
4006 thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
4007
4008 /* set thread attributes */
4009 pthread_attr_init(&attr);
4010 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
4011
4012 for (i=0; i<nthreads; i++) {
4013 /* initialize thread data */
4014 N_VInitThreadData(&thread_data[i]);
4015
4016 /* compute start and end loop index for thread */
4017 N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
4018
4019 /* pack thread data */
4020 thread_data[i].v1 = NV_DATA_PT(x);
4021 thread_data[i].v2 = NV_DATA_PT(y);
4022 thread_data[i].v3 = NV_DATA_PT(z);
4023
4024 /* create threads and call pthread companion function */
4025 pthread_create(&threads[i], &attr, VSum_PT, (void *) &thread_data[i]);
4026 }
4027
4028 /* wait for all threads to finish */
4029 for (i=0; i<nthreads; i++) {
4030 pthread_join(threads[i], NULL);
4031 }
4032
4033 /* clean up and return */
4034 pthread_attr_destroy(&attr);
4035 free(threads);
4036 free(thread_data);
4037
4038 return;
4039 }
4040
4041
4042 /* ----------------------------------------------------------------------------
4043 * Pthread companion function to VSum
4044 */
4045
VSum_PT(void * thread_data)4046 static void *VSum_PT(void *thread_data)
4047 {
4048 sunindextype i, start, end;
4049 realtype *xd, *yd, *zd;
4050 Pthreads_Data *my_data;
4051
4052 /* extract thread data */
4053 my_data = (Pthreads_Data *) thread_data;
4054
4055 xd = my_data->v1;
4056 yd = my_data->v2;
4057 zd = my_data->v3;
4058
4059 start = my_data->start;
4060 end = my_data->end;
4061
4062 /* compute vector sum */
4063 for (i = start; i < end; i++)
4064 zd[i] = xd[i] + yd[i];
4065
4066 /* exit */
4067 pthread_exit(NULL);
4068 }
4069
4070
4071 /* ----------------------------------------------------------------------------
4072 * Compute vector difference
4073 */
4074
VDiff_Pthreads(N_Vector x,N_Vector y,N_Vector z)4075 static void VDiff_Pthreads(N_Vector x, N_Vector y, N_Vector z)
4076 {
4077 sunindextype N;
4078 int i, nthreads;
4079 pthread_t *threads;
4080 Pthreads_Data *thread_data;
4081 pthread_attr_t attr;
4082
4083 /* allocate threads and thread data structs */
4084 N = NV_LENGTH_PT(x);
4085 nthreads = NV_NUM_THREADS_PT(x);
4086 threads = malloc(nthreads*sizeof(pthread_t));
4087 thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
4088
4089 /* set thread attributes */
4090 pthread_attr_init(&attr);
4091 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
4092
4093 for (i=0; i<nthreads; i++) {
4094 /* initialize thread data */
4095 N_VInitThreadData(&thread_data[i]);
4096
4097 /* compute start and end loop index for thread */
4098 N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
4099
4100 /* pack thread data */
4101 thread_data[i].v1 = NV_DATA_PT(x);
4102 thread_data[i].v2 = NV_DATA_PT(y);
4103 thread_data[i].v3 = NV_DATA_PT(z);
4104
4105 /* create threads and call pthread companion function */
4106 pthread_create(&threads[i], &attr, VDiff_PT, (void *) &thread_data[i]);
4107 }
4108
4109 /* wait for all threads to finish */
4110 for (i=0; i<nthreads; i++) {
4111 pthread_join(threads[i], NULL);
4112 }
4113
4114 /* clean up and return */
4115 pthread_attr_destroy(&attr);
4116 free(threads);
4117 free(thread_data);
4118
4119 return;
4120 }
4121
4122
4123 /* ----------------------------------------------------------------------------
4124 * Pthread companion function to VDiff
4125 */
4126
VDiff_PT(void * thread_data)4127 static void *VDiff_PT(void *thread_data)
4128 {
4129 sunindextype i, start, end;
4130 realtype *xd, *yd, *zd;
4131 Pthreads_Data *my_data;
4132
4133 /* extract thread data */
4134 my_data = (Pthreads_Data *) thread_data;
4135
4136 xd = my_data->v1;
4137 yd = my_data->v2;
4138 zd = my_data->v3;
4139
4140 start = my_data->start;
4141 end = my_data->end;
4142
4143 /* compute vector difference */
4144 for (i = start; i < end; i++)
4145 zd[i] = xd[i] - yd[i];
4146
4147 /* exit */
4148 pthread_exit(NULL);
4149 }
4150
4151
4152 /* ----------------------------------------------------------------------------
4153 * Compute the negative of a vector
4154 */
4155
VNeg_Pthreads(N_Vector x,N_Vector z)4156 static void VNeg_Pthreads(N_Vector x, N_Vector z)
4157 {
4158 sunindextype N;
4159 int i, nthreads;
4160 pthread_t *threads;
4161 Pthreads_Data *thread_data;
4162 pthread_attr_t attr;
4163
4164 /* allocate threads and thread data structs */
4165 N = NV_LENGTH_PT(x);
4166 nthreads = NV_NUM_THREADS_PT(x);
4167 threads = malloc(nthreads*sizeof(pthread_t));
4168 thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
4169
4170 /* set thread attributes */
4171 pthread_attr_init(&attr);
4172 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
4173
4174 for (i=0; i<nthreads; i++) {
4175 /* initialize thread data */
4176 N_VInitThreadData(&thread_data[i]);
4177
4178 /* compute start and end loop index for thread */
4179 N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
4180
4181 /* pack thread data */
4182 thread_data[i].v1 = NV_DATA_PT(x);
4183 thread_data[i].v2 = NV_DATA_PT(z);
4184
4185 /* create threads and call pthread companion function */
4186 pthread_create(&threads[i], &attr, VNeg_PT, (void *) &thread_data[i]);
4187 }
4188
4189 /* wait for all threads to finish */
4190 for (i=0; i<nthreads; i++) {
4191 pthread_join(threads[i], NULL);
4192 }
4193
4194 /* clean up and return */
4195 pthread_attr_destroy(&attr);
4196 free(threads);
4197 free(thread_data);
4198
4199 return;
4200 }
4201
4202
4203 /* ----------------------------------------------------------------------------
4204 * Pthread companion function to VNeg
4205 */
4206
VNeg_PT(void * thread_data)4207 static void *VNeg_PT(void *thread_data)
4208 {
4209 sunindextype i, start, end;
4210 realtype *xd, *zd;
4211 Pthreads_Data *my_data;
4212
4213 /* extract thread data */
4214 my_data = (Pthreads_Data *) thread_data;
4215
4216 xd = my_data->v1;
4217 zd = my_data->v2;
4218
4219 start = my_data->start;
4220 end = my_data->end;
4221
4222 /* compute negative of vector */
4223 for (i = start; i < end; i++)
4224 zd[i] = -xd[i];
4225
4226 /* exit */
4227 pthread_exit(NULL);
4228 }
4229
4230
4231 /* ----------------------------------------------------------------------------
4232 * Compute scaled vector sum
4233 */
4234
VScaleSum_Pthreads(realtype c,N_Vector x,N_Vector y,N_Vector z)4235 static void VScaleSum_Pthreads(realtype c, N_Vector x, N_Vector y, N_Vector z)
4236 {
4237 sunindextype N;
4238 int i, nthreads;
4239 pthread_t *threads;
4240 Pthreads_Data *thread_data;
4241 pthread_attr_t attr;
4242
4243 /* allocate threads and thread data structs */
4244 N = NV_LENGTH_PT(x);
4245 nthreads = NV_NUM_THREADS_PT(x);
4246 threads = malloc(nthreads*sizeof(pthread_t));
4247 thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
4248
4249 /* set thread attributes */
4250 pthread_attr_init(&attr);
4251 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
4252
4253 for (i=0; i<nthreads; i++) {
4254 /* initialize thread data */
4255 N_VInitThreadData(&thread_data[i]);
4256
4257 /* compute start and end loop index for thread */
4258 N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
4259
4260 /* pack thread data */
4261 thread_data[i].c1 = c;
4262 thread_data[i].v1 = NV_DATA_PT(x);
4263 thread_data[i].v2 = NV_DATA_PT(y);
4264 thread_data[i].v3 = NV_DATA_PT(z);
4265
4266 /* create threads and call pthread companion function */
4267 pthread_create(&threads[i], &attr, VScaleSum_PT, (void *) &thread_data[i]);
4268 }
4269
4270 /* wait for all threads to finish */
4271 for (i=0; i<nthreads; i++) {
4272 pthread_join(threads[i], NULL);
4273 }
4274
4275 /* clean up and return */
4276 pthread_attr_destroy(&attr);
4277 free(threads);
4278 free(thread_data);
4279
4280 return;
4281 }
4282
4283
4284 /* ----------------------------------------------------------------------------
4285 * Pthread companion function to VScaleSum
4286 */
4287
VScaleSum_PT(void * thread_data)4288 static void *VScaleSum_PT(void *thread_data)
4289 {
4290 sunindextype i, start, end;
4291 realtype c;
4292 realtype *xd, *yd, *zd;
4293 Pthreads_Data *my_data;
4294
4295 /* extract thread data */
4296 my_data = (Pthreads_Data *) thread_data;
4297
4298 c = my_data->c1;
4299 xd = my_data->v1;
4300 yd = my_data->v2;
4301 zd = my_data->v3;
4302
4303 start = my_data->start;
4304 end = my_data->end;
4305
4306 /* compute scaled vector sum */
4307 for (i = start; i < end; i++)
4308 zd[i] = c*(xd[i] + yd[i]);
4309
4310 /* exit */
4311 pthread_exit(NULL);
4312 }
4313
4314
4315 /* ----------------------------------------------------------------------------
4316 * Compute scaled vector difference
4317 */
4318
VScaleDiff_Pthreads(realtype c,N_Vector x,N_Vector y,N_Vector z)4319 static void VScaleDiff_Pthreads(realtype c, N_Vector x, N_Vector y, N_Vector z)
4320 {
4321 sunindextype N;
4322 int i, nthreads;
4323 pthread_t *threads;
4324 Pthreads_Data *thread_data;
4325 pthread_attr_t attr;
4326
4327 /* allocate threads and thread data structs */
4328 N = NV_LENGTH_PT(x);
4329 nthreads = NV_NUM_THREADS_PT(x);
4330 threads = malloc(nthreads*sizeof(pthread_t));
4331 thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
4332
4333 /* set thread attributes */
4334 pthread_attr_init(&attr);
4335 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
4336
4337 for (i=0; i<nthreads; i++) {
4338 /* initialize thread data */
4339 N_VInitThreadData(&thread_data[i]);
4340
4341 /* compute start and end loop index for thread */
4342 N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
4343
4344 /* pack thread data */
4345 thread_data[i].c1 = c;
4346 thread_data[i].v1 = NV_DATA_PT(x);
4347 thread_data[i].v2 = NV_DATA_PT(y);
4348 thread_data[i].v3 = NV_DATA_PT(z);
4349
4350 /* create threads and call pthread companion function */
4351 pthread_create(&threads[i], &attr, VScaleDiff_PT, (void *) &thread_data[i]);
4352 }
4353
4354 /* wait for all threads to finish */
4355 for (i=0; i<nthreads; i++) {
4356 pthread_join(threads[i], NULL);
4357 }
4358
4359 /* clean up and return */
4360 pthread_attr_destroy(&attr);
4361 free(threads);
4362 free(thread_data);
4363
4364 return;
4365 }
4366
4367
4368 /* ----------------------------------------------------------------------------
4369 * Pthread companion function to VScaleDiff
4370 */
4371
VScaleDiff_PT(void * thread_data)4372 static void *VScaleDiff_PT(void *thread_data)
4373 {
4374 sunindextype i, start, end;
4375 realtype c;
4376 realtype *xd, *yd, *zd;
4377 Pthreads_Data *my_data;
4378
4379 /* extract thread data */
4380 my_data = (Pthreads_Data *) thread_data;
4381
4382 c = my_data->c1;
4383 xd = my_data->v1;
4384 yd = my_data->v2;
4385 zd = my_data->v3;
4386
4387 start = my_data->start;
4388 end = my_data->end;
4389
4390 /* compute scaled vector difference */
4391 for (i = start; i < end; i++)
4392 zd[i] = c*(xd[i] - yd[i]);
4393
4394 /* exit */
4395 pthread_exit(NULL);
4396 }
4397
4398
4399 /* ----------------------------------------------------------------------------
4400 * Compute vector sum z[i] = a*x[i]+y[i]
4401 */
4402
VLin1_Pthreads(realtype a,N_Vector x,N_Vector y,N_Vector z)4403 static void VLin1_Pthreads(realtype a, N_Vector x, N_Vector y, N_Vector z)
4404 {
4405 sunindextype N;
4406 int i, nthreads;
4407 pthread_t *threads;
4408 Pthreads_Data *thread_data;
4409 pthread_attr_t attr;
4410
4411 /* allocate threads and thread data structs */
4412 N = NV_LENGTH_PT(x);
4413 nthreads = NV_NUM_THREADS_PT(x);
4414 threads = malloc(nthreads*sizeof(pthread_t));
4415 thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
4416
4417 /* set thread attributes */
4418 pthread_attr_init(&attr);
4419 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
4420
4421 for (i=0; i<nthreads; i++) {
4422 /* initialize thread data */
4423 N_VInitThreadData(&thread_data[i]);
4424
4425 /* compute start and end loop index for thread */
4426 N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
4427
4428 /* pack thread data */
4429 thread_data[i].c1 = a;
4430 thread_data[i].v1 = NV_DATA_PT(x);
4431 thread_data[i].v2 = NV_DATA_PT(y);
4432 thread_data[i].v3 = NV_DATA_PT(z);
4433
4434 /* create threads and call pthread companion function */
4435 pthread_create(&threads[i], &attr, VLin1_PT, (void *) &thread_data[i]);
4436 }
4437
4438 /* wait for all threads to finish */
4439 for (i=0; i<nthreads; i++) {
4440 pthread_join(threads[i], NULL);
4441 }
4442
4443 /* clean up and return */
4444 pthread_attr_destroy(&attr);
4445 free(threads);
4446 free(thread_data);
4447
4448 return;
4449 }
4450
4451
4452 /* ----------------------------------------------------------------------------
4453 * Pthread companion function to VLin1
4454 */
4455
VLin1_PT(void * thread_data)4456 static void *VLin1_PT(void *thread_data)
4457 {
4458 sunindextype i, start, end;
4459 realtype a;
4460 realtype *xd, *yd, *zd;
4461 Pthreads_Data *my_data;
4462
4463 /* extract thread data */
4464 my_data = (Pthreads_Data *) thread_data;
4465
4466 a = my_data->c1;
4467 xd = my_data->v1;
4468 yd = my_data->v2;
4469 zd = my_data->v3;
4470
4471 start = my_data->start;
4472 end = my_data->end;
4473
4474 /* compute vector sum */
4475 for (i = start; i < end; i++)
4476 zd[i] = (a*xd[i]) + yd[i];
4477
4478 /* exit */
4479 pthread_exit(NULL);
4480 }
4481
4482
4483 /* ----------------------------------------------------------------------------
4484 * Compute vector difference z[i] = a*x[i]-y[i]
4485 */
4486
VLin2_Pthreads(realtype a,N_Vector x,N_Vector y,N_Vector z)4487 static void VLin2_Pthreads(realtype a, N_Vector x, N_Vector y, N_Vector z)
4488 {
4489 sunindextype N;
4490 int i, nthreads;
4491 pthread_t *threads;
4492 Pthreads_Data *thread_data;
4493 pthread_attr_t attr;
4494
4495 /* allocate threads and thread data structs */
4496 N = NV_LENGTH_PT(x);
4497 nthreads = NV_NUM_THREADS_PT(x);
4498 threads = malloc(nthreads*sizeof(pthread_t));
4499 thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
4500
4501 /* set thread attributes */
4502 pthread_attr_init(&attr);
4503 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
4504
4505 for (i=0; i<nthreads; i++) {
4506 /* initialize thread data */
4507 N_VInitThreadData(&thread_data[i]);
4508
4509 /* compute start and end loop index for thread */
4510 N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
4511
4512 /* pack thread data */
4513 thread_data[i].c1 = a;
4514 thread_data[i].v1 = NV_DATA_PT(x);
4515 thread_data[i].v2 = NV_DATA_PT(y);
4516 thread_data[i].v3 = NV_DATA_PT(z);
4517
4518 /* create threads and call pthread companion function */
4519 pthread_create(&threads[i], &attr, VLin2_PT, (void *) &thread_data[i]);
4520 }
4521
4522 /* wait for all threads to finish */
4523 for (i=0; i<nthreads; i++) {
4524 pthread_join(threads[i], NULL);
4525 }
4526
4527 /* clean up and return */
4528 pthread_attr_destroy(&attr);
4529 free(threads);
4530 free(thread_data);
4531
4532 return;
4533 }
4534
4535 /* ----------------------------------------------------------------------------
4536 * Pthread companion function to VLin2
4537 */
4538
VLin2_PT(void * thread_data)4539 static void *VLin2_PT(void *thread_data)
4540 {
4541 sunindextype i, start, end;
4542 realtype a;
4543 realtype *xd, *yd, *zd;
4544 Pthreads_Data *my_data;
4545
4546 /* extract thread data */
4547 my_data = (Pthreads_Data *) thread_data;
4548
4549 a = my_data->c1;
4550 xd = my_data->v1;
4551 yd = my_data->v2;
4552 zd = my_data->v3;
4553
4554 start = my_data->start;
4555 end = my_data->end;
4556
4557 /* compute vector difference */
4558 for (i = start; i < end; i++)
4559 zd[i] = (a*xd[i]) - yd[i];
4560
4561 /* exit */
4562 pthread_exit(NULL);
4563 }
4564
4565
4566 /* ----------------------------------------------------------------------------
4567 * Compute special cases of linear sum
4568 */
4569
Vaxpy_Pthreads(realtype a,N_Vector x,N_Vector y)4570 static void Vaxpy_Pthreads(realtype a, N_Vector x, N_Vector y)
4571 {
4572 sunindextype N;
4573 int i, nthreads;
4574 pthread_t *threads;
4575 Pthreads_Data *thread_data;
4576 pthread_attr_t attr;
4577
4578 /* allocate threads and thread data structs */
4579 N = NV_LENGTH_PT(x);
4580 nthreads = NV_NUM_THREADS_PT(x);
4581 threads = malloc(nthreads*sizeof(pthread_t));
4582 thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
4583
4584 /* set thread attributes */
4585 pthread_attr_init(&attr);
4586 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
4587
4588 for (i=0; i<nthreads; i++) {
4589 /* initialize thread data */
4590 N_VInitThreadData(&thread_data[i]);
4591
4592 /* compute start and end loop index for thread */
4593 N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
4594
4595 /* pack thread data */
4596 thread_data[i].c1 = a;
4597 thread_data[i].v1 = NV_DATA_PT(x);
4598 thread_data[i].v2 = NV_DATA_PT(y);
4599
4600 /* create threads and call pthread companion function */
4601 pthread_create(&threads[i], &attr, Vaxpy_PT, (void *) &thread_data[i]);
4602 }
4603
4604 /* wait for all threads to finish */
4605 for (i=0; i<nthreads; i++) {
4606 pthread_join(threads[i], NULL);
4607 }
4608
4609 /* clean up and return */
4610 pthread_attr_destroy(&attr);
4611 free(threads);
4612 free(thread_data);
4613
4614 return;
4615 }
4616
4617 /* ----------------------------------------------------------------------------
4618 * Pthread companion function to Vaxpy
4619 */
4620
Vaxpy_PT(void * thread_data)4621 static void *Vaxpy_PT(void *thread_data)
4622 {
4623 sunindextype i, start, end;
4624 realtype a;
4625 realtype *xd, *yd;
4626 Pthreads_Data *my_data;
4627
4628 /* extract thread data */
4629 my_data = (Pthreads_Data *) thread_data;
4630
4631 a = my_data->c1;
4632 xd = my_data->v1;
4633 yd = my_data->v2;
4634
4635 start = my_data->start;
4636 end = my_data->end;
4637
4638 /* compute axpy */
4639 if (a == ONE) {
4640 for (i = start; i < end; i++)
4641 yd[i] += xd[i];
4642
4643 /* exit */
4644 pthread_exit(NULL);
4645 }
4646
4647 if (a == -ONE) {
4648 for (i = start; i < end; i++)
4649 yd[i] -= xd[i];
4650
4651 /* exit */
4652 pthread_exit(NULL);
4653 }
4654
4655 for (i = start; i < end; i++)
4656 yd[i] += a*xd[i];
4657
4658 /* return */
4659 pthread_exit(NULL);
4660 }
4661
4662
4663 /* ----------------------------------------------------------------------------
4664 * Compute scaled vector
4665 */
4666
VScaleBy_Pthreads(realtype a,N_Vector x)4667 static void VScaleBy_Pthreads(realtype a, N_Vector x)
4668 {
4669 sunindextype N;
4670 int i, nthreads;
4671 pthread_t *threads;
4672 Pthreads_Data *thread_data;
4673 pthread_attr_t attr;
4674
4675 /* allocate threads and thread data structs */
4676 N = NV_LENGTH_PT(x);
4677 nthreads = NV_NUM_THREADS_PT(x);
4678 threads = malloc(nthreads*sizeof(pthread_t));
4679 thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
4680
4681 /* set thread attributes */
4682 pthread_attr_init(&attr);
4683 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
4684
4685 for (i=0; i<nthreads; i++) {
4686 /* initialize thread data */
4687 N_VInitThreadData(&thread_data[i]);
4688
4689 /* compute start and end loop index for thread */
4690 N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
4691
4692 /* pack thread data */
4693 thread_data[i].c1 = a;
4694 thread_data[i].v1 = NV_DATA_PT(x);
4695
4696 /* create threads and call pthread companion function */
4697 pthread_create(&threads[i], &attr, VScaleBy_PT, (void *) &thread_data[i]);
4698 }
4699
4700 /* wait for all threads to finish */
4701 for (i=0; i<nthreads; i++) {
4702 pthread_join(threads[i], NULL);
4703 }
4704
4705 /* clean up and return */
4706 pthread_attr_destroy(&attr);
4707 free(threads);
4708 free(thread_data);
4709
4710 return;
4711 }
4712
4713
4714 /* ----------------------------------------------------------------------------
4715 * Pthread companion function to VScaleBy
4716 */
4717
VScaleBy_PT(void * thread_data)4718 static void *VScaleBy_PT(void *thread_data)
4719 {
4720 sunindextype i, start, end;
4721 realtype a;
4722 realtype *xd;
4723 Pthreads_Data *my_data;
4724
4725 /* extract thread data */
4726 my_data = (Pthreads_Data *) thread_data;
4727
4728 a = my_data->c1;
4729 xd = my_data->v1;
4730
4731 start = my_data->start;
4732 end = my_data->end;
4733
4734 /* compute scaled vector */
4735 for (i = start; i < end; i++)
4736 xd[i] *= a;
4737
4738 /* exit */
4739 pthread_exit(NULL);
4740 }
4741
4742
4743 /*
4744 * -----------------------------------------------------------------------------
4745 * private functions for special cases of vector array operations
4746 * -----------------------------------------------------------------------------
4747 */
4748
VSumVectorArray_Pthreads(int nvec,N_Vector * X,N_Vector * Y,N_Vector * Z)4749 static int VSumVectorArray_Pthreads(int nvec, N_Vector* X, N_Vector* Y, N_Vector* Z)
4750 {
4751 sunindextype N;
4752 int i, nthreads;
4753 pthread_t *threads;
4754 Pthreads_Data *thread_data;
4755 pthread_attr_t attr;
4756
4757 /* allocate threads and thread data structs */
4758 N = NV_LENGTH_PT(X[0]);
4759 nthreads = NV_NUM_THREADS_PT(X[0]);
4760 threads = malloc(nthreads*sizeof(pthread_t));
4761 thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
4762
4763 /* set thread attributes */
4764 pthread_attr_init(&attr);
4765 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
4766
4767 /* pack thread data, distribute loop indices, and create threads/call kernel */
4768 for (i=0; i<nthreads; i++) {
4769 N_VInitThreadData(&thread_data[i]);
4770
4771 thread_data[i].nvec = nvec;
4772 thread_data[i].Y1 = X;
4773 thread_data[i].Y2 = Y;
4774 thread_data[i].Y3 = Z;
4775
4776 N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
4777
4778 pthread_create(&threads[i], &attr, VSumVectorArray_PT, (void *) &thread_data[i]);
4779 }
4780
4781 /* wait for all threads to finish */
4782 for (i=0; i<nthreads; i++)
4783 pthread_join(threads[i], NULL);
4784
4785 /* clean up and return */
4786 pthread_attr_destroy(&attr);
4787 free(threads);
4788 free(thread_data);
4789
4790 return(0);
4791 }
4792
VSumVectorArray_PT(void * thread_data)4793 static void *VSumVectorArray_PT(void *thread_data)
4794 {
4795 Pthreads_Data *my_data;
4796 sunindextype j, start, end;
4797
4798 int i;
4799 realtype* xd=NULL;
4800 realtype* yd=NULL;
4801 realtype* zd=NULL;
4802
4803 my_data = (Pthreads_Data *) thread_data;
4804 start = my_data->start;
4805 end = my_data->end;
4806
4807 for (i=0; i<my_data->nvec; i++) {
4808 xd = NV_DATA_PT(my_data->Y1[i]);
4809 yd = NV_DATA_PT(my_data->Y2[i]);
4810 zd = NV_DATA_PT(my_data->Y3[i]);
4811 for (j=start; j<end; j++)
4812 zd[j] = xd[j] + yd[j];
4813 }
4814
4815 pthread_exit(NULL);
4816 }
4817
4818
VDiffVectorArray_Pthreads(int nvec,N_Vector * X,N_Vector * Y,N_Vector * Z)4819 static int VDiffVectorArray_Pthreads(int nvec, N_Vector* X, N_Vector* Y, N_Vector* Z)
4820 {
4821 sunindextype N;
4822 int i, nthreads;
4823 pthread_t *threads;
4824 Pthreads_Data *thread_data;
4825 pthread_attr_t attr;
4826
4827 /* allocate threads and thread data structs */
4828 N = NV_LENGTH_PT(X[0]);
4829 nthreads = NV_NUM_THREADS_PT(X[0]);
4830 threads = malloc(nthreads*sizeof(pthread_t));
4831 thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
4832
4833 /* set thread attributes */
4834 pthread_attr_init(&attr);
4835 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
4836
4837 /* pack thread data, distribute loop indices, and create threads/call kernel */
4838 for (i=0; i<nthreads; i++) {
4839 N_VInitThreadData(&thread_data[i]);
4840
4841 thread_data[i].nvec = nvec;
4842 thread_data[i].Y1 = X;
4843 thread_data[i].Y2 = Y;
4844 thread_data[i].Y3 = Z;
4845
4846 N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
4847
4848 pthread_create(&threads[i], &attr, VDiffVectorArray_PT, (void *) &thread_data[i]);
4849 }
4850
4851 /* wait for all threads to finish */
4852 for (i=0; i<nthreads; i++)
4853 pthread_join(threads[i], NULL);
4854
4855 /* clean up and return */
4856 pthread_attr_destroy(&attr);
4857 free(threads);
4858 free(thread_data);
4859
4860 return(0);
4861 }
4862
VDiffVectorArray_PT(void * thread_data)4863 static void *VDiffVectorArray_PT(void *thread_data)
4864 {
4865 Pthreads_Data *my_data;
4866 sunindextype j, start, end;
4867
4868 int i;
4869 realtype* xd=NULL;
4870 realtype* yd=NULL;
4871 realtype* zd=NULL;
4872
4873 my_data = (Pthreads_Data *) thread_data;
4874 start = my_data->start;
4875 end = my_data->end;
4876
4877 for (i=0; i<my_data->nvec; i++) {
4878 xd = NV_DATA_PT(my_data->Y1[i]);
4879 yd = NV_DATA_PT(my_data->Y2[i]);
4880 zd = NV_DATA_PT(my_data->Y3[i]);
4881 for (j=start; j<end; j++)
4882 zd[j] = xd[j] - yd[j];
4883 }
4884
4885 pthread_exit(NULL);
4886 }
4887
4888
VScaleSumVectorArray_Pthreads(int nvec,realtype c,N_Vector * X,N_Vector * Y,N_Vector * Z)4889 static int VScaleSumVectorArray_Pthreads(int nvec, realtype c, N_Vector* X, N_Vector* Y, N_Vector* Z)
4890 {
4891 sunindextype N;
4892 int i, nthreads;
4893 pthread_t *threads;
4894 Pthreads_Data *thread_data;
4895 pthread_attr_t attr;
4896
4897 /* allocate threads and thread data structs */
4898 N = NV_LENGTH_PT(X[0]);
4899 nthreads = NV_NUM_THREADS_PT(X[0]);
4900 threads = malloc(nthreads*sizeof(pthread_t));
4901 thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
4902
4903 /* set thread attributes */
4904 pthread_attr_init(&attr);
4905 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
4906
4907 /* pack thread data, distribute loop indices, and create threads/call kernel */
4908 for (i=0; i<nthreads; i++) {
4909 N_VInitThreadData(&thread_data[i]);
4910
4911 thread_data[i].nvec = nvec;
4912 thread_data[i].c1 = c;
4913 thread_data[i].Y1 = X;
4914 thread_data[i].Y2 = Y;
4915 thread_data[i].Y3 = Z;
4916
4917 N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
4918
4919 pthread_create(&threads[i], &attr, VScaleSumVectorArray_PT, (void *) &thread_data[i]);
4920 }
4921
4922 /* wait for all threads to finish */
4923 for (i=0; i<nthreads; i++)
4924 pthread_join(threads[i], NULL);
4925
4926 /* clean up and return */
4927 pthread_attr_destroy(&attr);
4928 free(threads);
4929 free(thread_data);
4930
4931 return(0);
4932 }
4933
VScaleSumVectorArray_PT(void * thread_data)4934 static void *VScaleSumVectorArray_PT(void *thread_data)
4935 {
4936 Pthreads_Data *my_data;
4937 sunindextype j, start, end;
4938
4939 int i;
4940 realtype c;
4941 realtype* xd=NULL;
4942 realtype* yd=NULL;
4943 realtype* zd=NULL;
4944
4945 my_data = (Pthreads_Data *) thread_data;
4946 start = my_data->start;
4947 end = my_data->end;
4948 c = my_data->c1;
4949
4950 for (i=0; i<my_data->nvec; i++) {
4951 xd = NV_DATA_PT(my_data->Y1[i]);
4952 yd = NV_DATA_PT(my_data->Y2[i]);
4953 zd = NV_DATA_PT(my_data->Y3[i]);
4954 for (j=start; j<end; j++)
4955 zd[j] = c * (xd[j] + yd[j]);
4956 }
4957
4958 pthread_exit(NULL);
4959 }
4960
4961
VScaleDiffVectorArray_Pthreads(int nvec,realtype c,N_Vector * X,N_Vector * Y,N_Vector * Z)4962 static int VScaleDiffVectorArray_Pthreads(int nvec, realtype c, N_Vector* X, N_Vector* Y, N_Vector* Z)
4963 {
4964 sunindextype N;
4965 int i, nthreads;
4966 pthread_t *threads;
4967 Pthreads_Data *thread_data;
4968 pthread_attr_t attr;
4969
4970 /* allocate threads and thread data structs */
4971 N = NV_LENGTH_PT(X[0]);
4972 nthreads = NV_NUM_THREADS_PT(X[0]);
4973 threads = malloc(nthreads*sizeof(pthread_t));
4974 thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
4975
4976 /* set thread attributes */
4977 pthread_attr_init(&attr);
4978 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
4979
4980 /* pack thread data, distribute loop indices, and create threads/call kernel */
4981 for (i=0; i<nthreads; i++) {
4982 N_VInitThreadData(&thread_data[i]);
4983
4984 thread_data[i].nvec = nvec;
4985 thread_data[i].c1 = c;
4986 thread_data[i].Y1 = X;
4987 thread_data[i].Y2 = Y;
4988 thread_data[i].Y3 = Z;
4989
4990 N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
4991
4992 pthread_create(&threads[i], &attr, VScaleDiffVectorArray_PT, (void *) &thread_data[i]);
4993 }
4994
4995 /* wait for all threads to finish */
4996 for (i=0; i<nthreads; i++)
4997 pthread_join(threads[i], NULL);
4998
4999 /* clean up and return */
5000 pthread_attr_destroy(&attr);
5001 free(threads);
5002 free(thread_data);
5003
5004 return(0);
5005 }
5006
VScaleDiffVectorArray_PT(void * thread_data)5007 static void *VScaleDiffVectorArray_PT(void *thread_data)
5008 {
5009 Pthreads_Data *my_data;
5010 sunindextype j, start, end;
5011
5012 int i;
5013 realtype c;
5014 realtype* xd=NULL;
5015 realtype* yd=NULL;
5016 realtype* zd=NULL;
5017
5018 my_data = (Pthreads_Data *) thread_data;
5019 start = my_data->start;
5020 end = my_data->end;
5021 c = my_data->c1;
5022
5023 for (i=0; i<my_data->nvec; i++) {
5024 xd = NV_DATA_PT(my_data->Y1[i]);
5025 yd = NV_DATA_PT(my_data->Y2[i]);
5026 zd = NV_DATA_PT(my_data->Y3[i]);
5027 for (j=start; j<end; j++)
5028 zd[j] = c * (xd[j] - yd[j]);
5029 }
5030
5031 pthread_exit(NULL);
5032 }
5033
5034
VLin1VectorArray_Pthreads(int nvec,realtype a,N_Vector * X,N_Vector * Y,N_Vector * Z)5035 static int VLin1VectorArray_Pthreads(int nvec, realtype a, N_Vector* X, N_Vector* Y, N_Vector* Z)
5036 {
5037 sunindextype N;
5038 int i, nthreads;
5039 pthread_t *threads;
5040 Pthreads_Data *thread_data;
5041 pthread_attr_t attr;
5042
5043 /* allocate threads and thread data structs */
5044 N = NV_LENGTH_PT(X[0]);
5045 nthreads = NV_NUM_THREADS_PT(X[0]);
5046 threads = malloc(nthreads*sizeof(pthread_t));
5047 thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
5048
5049 /* set thread attributes */
5050 pthread_attr_init(&attr);
5051 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
5052
5053 /* pack thread data, distribute loop indices, and create threads/call kernel */
5054 for (i=0; i<nthreads; i++) {
5055 N_VInitThreadData(&thread_data[i]);
5056
5057 thread_data[i].nvec = nvec;
5058 thread_data[i].c1 = a;
5059 thread_data[i].Y1 = X;
5060 thread_data[i].Y2 = Y;
5061 thread_data[i].Y3 = Z;
5062
5063 N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
5064
5065 pthread_create(&threads[i], &attr, VLin1VectorArray_PT, (void *) &thread_data[i]);
5066 }
5067
5068 /* wait for all threads to finish */
5069 for (i=0; i<nthreads; i++)
5070 pthread_join(threads[i], NULL);
5071
5072 /* clean up and return */
5073 pthread_attr_destroy(&attr);
5074 free(threads);
5075 free(thread_data);
5076
5077 return(0);
5078 }
5079
VLin1VectorArray_PT(void * thread_data)5080 static void *VLin1VectorArray_PT(void *thread_data)
5081 {
5082 Pthreads_Data *my_data;
5083 sunindextype j, start, end;
5084
5085 int i;
5086 realtype a;
5087 realtype* xd=NULL;
5088 realtype* yd=NULL;
5089 realtype* zd=NULL;
5090
5091 my_data = (Pthreads_Data *) thread_data;
5092 start = my_data->start;
5093 end = my_data->end;
5094 a = my_data->c1;
5095
5096 for (i=0; i<my_data->nvec; i++) {
5097 xd = NV_DATA_PT(my_data->Y1[i]);
5098 yd = NV_DATA_PT(my_data->Y2[i]);
5099 zd = NV_DATA_PT(my_data->Y3[i]);
5100 for (j=start; j<end; j++)
5101 zd[j] = (a * xd[j]) + yd[j];
5102 }
5103
5104 pthread_exit(NULL);
5105 }
5106
5107
VLin2VectorArray_Pthreads(int nvec,realtype a,N_Vector * X,N_Vector * Y,N_Vector * Z)5108 static int VLin2VectorArray_Pthreads(int nvec, realtype a, N_Vector* X, N_Vector* Y, N_Vector* Z)
5109 {
5110 sunindextype N;
5111 int i, nthreads;
5112 pthread_t *threads;
5113 Pthreads_Data *thread_data;
5114 pthread_attr_t attr;
5115
5116 /* allocate threads and thread data structs */
5117 N = NV_LENGTH_PT(X[0]);
5118 nthreads = NV_NUM_THREADS_PT(X[0]);
5119 threads = malloc(nthreads*sizeof(pthread_t));
5120 thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
5121
5122 /* set thread attributes */
5123 pthread_attr_init(&attr);
5124 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
5125
5126 /* pack thread data, distribute loop indices, and create threads/call kernel */
5127 for (i=0; i<nthreads; i++) {
5128 N_VInitThreadData(&thread_data[i]);
5129
5130 thread_data[i].nvec = nvec;
5131 thread_data[i].c1 = a;
5132 thread_data[i].Y1 = X;
5133 thread_data[i].Y2 = Y;
5134 thread_data[i].Y3 = Z;
5135
5136 N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
5137
5138 pthread_create(&threads[i], &attr, VLin2VectorArray_PT, (void *) &thread_data[i]);
5139 }
5140
5141 /* wait for all threads to finish */
5142 for (i=0; i<nthreads; i++)
5143 pthread_join(threads[i], NULL);
5144
5145 /* clean up and return */
5146 pthread_attr_destroy(&attr);
5147 free(threads);
5148 free(thread_data);
5149
5150 return(0);
5151 }
5152
VLin2VectorArray_PT(void * thread_data)5153 static void *VLin2VectorArray_PT(void *thread_data)
5154 {
5155 Pthreads_Data *my_data;
5156 sunindextype j, start, end;
5157
5158 int i;
5159 realtype a;
5160 realtype* xd=NULL;
5161 realtype* yd=NULL;
5162 realtype* zd=NULL;
5163
5164 my_data = (Pthreads_Data *) thread_data;
5165 start = my_data->start;
5166 end = my_data->end;
5167 a = my_data->c1;
5168
5169 for (i=0; i<my_data->nvec; i++) {
5170 xd = NV_DATA_PT(my_data->Y1[i]);
5171 yd = NV_DATA_PT(my_data->Y2[i]);
5172 zd = NV_DATA_PT(my_data->Y3[i]);
5173 for (j=start; j<end; j++)
5174 zd[j] = (a * xd[j]) - yd[j];
5175 }
5176
5177 pthread_exit(NULL);
5178 }
5179
5180
VaxpyVectorArray_Pthreads(int nvec,realtype a,N_Vector * X,N_Vector * Y)5181 static int VaxpyVectorArray_Pthreads(int nvec, realtype a, N_Vector* X, N_Vector* Y)
5182 {
5183 sunindextype N;
5184 int i, nthreads;
5185 pthread_t *threads;
5186 Pthreads_Data *thread_data;
5187 pthread_attr_t attr;
5188
5189 /* allocate threads and thread data structs */
5190 N = NV_LENGTH_PT(X[0]);
5191 nthreads = NV_NUM_THREADS_PT(X[0]);
5192 threads = malloc(nthreads*sizeof(pthread_t));
5193 thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
5194
5195 /* set thread attributes */
5196 pthread_attr_init(&attr);
5197 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
5198
5199 /* pack thread data, distribute loop indices, and create threads/call kernel */
5200 for (i=0; i<nthreads; i++) {
5201 N_VInitThreadData(&thread_data[i]);
5202
5203 thread_data[i].nvec = nvec;
5204 thread_data[i].c1 = a;
5205 thread_data[i].Y1 = X;
5206 thread_data[i].Y2 = Y;
5207
5208 N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
5209
5210 pthread_create(&threads[i], &attr, VaxpyVectorArray_PT, (void *) &thread_data[i]);
5211 }
5212
5213 /* wait for all threads to finish */
5214 for (i=0; i<nthreads; i++)
5215 pthread_join(threads[i], NULL);
5216
5217 /* clean up and return */
5218 pthread_attr_destroy(&attr);
5219 free(threads);
5220 free(thread_data);
5221
5222 return(0);
5223 }
5224
VaxpyVectorArray_PT(void * thread_data)5225 static void *VaxpyVectorArray_PT(void *thread_data)
5226 {
5227 Pthreads_Data *my_data;
5228 sunindextype j, start, end;
5229
5230 int i;
5231 realtype a;
5232 realtype* xd=NULL;
5233 realtype* yd=NULL;
5234
5235 my_data = (Pthreads_Data *) thread_data;
5236 start = my_data->start;
5237 end = my_data->end;
5238 a = my_data->c1;
5239
5240 if (a == ONE) {
5241 for (i=0; i<my_data->nvec; i++) {
5242 xd = NV_DATA_PT(my_data->Y1[i]);
5243 yd = NV_DATA_PT(my_data->Y2[i]);
5244 for (j=start; j<end; j++)
5245 yd[j] += xd[j];
5246 }
5247 pthread_exit(NULL);
5248 }
5249
5250 if (a == -ONE) {
5251 for (i=0; i<my_data->nvec; i++) {
5252 xd = NV_DATA_PT(my_data->Y1[i]);
5253 yd = NV_DATA_PT(my_data->Y2[i]);
5254 for (j=start; j<end; j++)
5255 yd[j] -= xd[j];
5256 }
5257 pthread_exit(NULL);
5258 }
5259
5260 for (i=0; i<my_data->nvec; i++) {
5261 xd = NV_DATA_PT(my_data->Y1[i]);
5262 yd = NV_DATA_PT(my_data->Y2[i]);
5263 for (j=start; j<end; j++)
5264 yd[j] += a * xd[j];
5265 }
5266 pthread_exit(NULL);
5267 }
5268
5269
5270 /*
5271 * -----------------------------------------------------------------
5272 * private utility functions
5273 * -----------------------------------------------------------------
5274 */
5275
5276 /* ----------------------------------------------------------------------------
5277 * Determine loop indices for a thread
5278 */
5279
N_VSplitLoop(int myid,int * nthreads,sunindextype * N,sunindextype * start,sunindextype * end)5280 static void N_VSplitLoop(int myid, int *nthreads, sunindextype *N,
5281 sunindextype *start, sunindextype *end)
5282 {
5283 sunindextype q, r; /* quotient and remainder */
5284
5285 /* work per thread and leftover work */
5286 q = *N / *nthreads;
5287 r = *N % *nthreads;
5288
5289 /* assign work */
5290 if (myid < r) {
5291 *start = myid * q + myid;
5292 *end = *start + q + 1;
5293 } else {
5294 *start = myid * q + r;
5295 *end = *start + q;
5296 }
5297 }
5298
5299
5300 /* ----------------------------------------------------------------------------
5301 * Initialize values of local thread data struct
5302 */
5303
N_VInitThreadData(Pthreads_Data * thread_data)5304 static void N_VInitThreadData(Pthreads_Data *thread_data)
5305 {
5306 thread_data->start = -1;
5307 thread_data->end = -1;
5308
5309 #if __STDC_VERSION__ >= 199901L
5310 thread_data->c1 = NAN;
5311 thread_data->c2 = NAN;
5312 #else
5313 thread_data->c1 = ZERO;
5314 thread_data->c2 = ZERO;
5315 #endif
5316
5317 thread_data->v1 = NULL;
5318 thread_data->v2 = NULL;
5319 thread_data->v3 = NULL;
5320 thread_data->global_val = NULL;
5321 thread_data->global_mutex = NULL;
5322
5323 thread_data->nvec = ZERO;
5324 thread_data->nsum = ZERO;
5325 thread_data->cvals = NULL;
5326 thread_data->Y1 = NULL;
5327 thread_data->Y2 = NULL;
5328 thread_data->Y3 = NULL;
5329 }
5330
5331
5332 /*
5333 * -----------------------------------------------------------------
5334 * Enable / Disable fused and vector array operations
5335 * -----------------------------------------------------------------
5336 */
5337
N_VEnableFusedOps_Pthreads(N_Vector v,booleantype tf)5338 int N_VEnableFusedOps_Pthreads(N_Vector v, booleantype tf)
5339 {
5340 /* check that vector is non-NULL */
5341 if (v == NULL) return(-1);
5342
5343 /* check that ops structure is non-NULL */
5344 if (v->ops == NULL) return(-1);
5345
5346 if (tf) {
5347 /* enable all fused vector operations */
5348 v->ops->nvlinearcombination = N_VLinearCombination_Pthreads;
5349 v->ops->nvscaleaddmulti = N_VScaleAddMulti_Pthreads;
5350 v->ops->nvdotprodmulti = N_VDotProdMulti_Pthreads;
5351 /* enable all vector array operations */
5352 v->ops->nvlinearsumvectorarray = N_VLinearSumVectorArray_Pthreads;
5353 v->ops->nvscalevectorarray = N_VScaleVectorArray_Pthreads;
5354 v->ops->nvconstvectorarray = N_VConstVectorArray_Pthreads;
5355 v->ops->nvwrmsnormvectorarray = N_VWrmsNormVectorArray_Pthreads;
5356 v->ops->nvwrmsnormmaskvectorarray = N_VWrmsNormMaskVectorArray_Pthreads;
5357 v->ops->nvscaleaddmultivectorarray = N_VScaleAddMultiVectorArray_Pthreads;
5358 v->ops->nvlinearcombinationvectorarray = N_VLinearCombinationVectorArray_Pthreads;
5359 } else {
5360 /* disable all fused vector operations */
5361 v->ops->nvlinearcombination = NULL;
5362 v->ops->nvscaleaddmulti = NULL;
5363 v->ops->nvdotprodmulti = NULL;
5364 /* disable all vector array operations */
5365 v->ops->nvlinearsumvectorarray = NULL;
5366 v->ops->nvscalevectorarray = NULL;
5367 v->ops->nvconstvectorarray = NULL;
5368 v->ops->nvwrmsnormvectorarray = NULL;
5369 v->ops->nvwrmsnormmaskvectorarray = NULL;
5370 v->ops->nvscaleaddmultivectorarray = NULL;
5371 v->ops->nvlinearcombinationvectorarray = NULL;
5372 }
5373
5374 /* return success */
5375 return(0);
5376 }
5377
5378
N_VEnableLinearCombination_Pthreads(N_Vector v,booleantype tf)5379 int N_VEnableLinearCombination_Pthreads(N_Vector v, booleantype tf)
5380 {
5381 /* check that vector is non-NULL */
5382 if (v == NULL) return(-1);
5383
5384 /* check that ops structure is non-NULL */
5385 if (v->ops == NULL) return(-1);
5386
5387 /* enable/disable operation */
5388 if (tf)
5389 v->ops->nvlinearcombination = N_VLinearCombination_Pthreads;
5390 else
5391 v->ops->nvlinearcombination = NULL;
5392
5393 /* return success */
5394 return(0);
5395 }
5396
N_VEnableScaleAddMulti_Pthreads(N_Vector v,booleantype tf)5397 int N_VEnableScaleAddMulti_Pthreads(N_Vector v, booleantype tf)
5398 {
5399 /* check that vector is non-NULL */
5400 if (v == NULL) return(-1);
5401
5402 /* check that ops structure is non-NULL */
5403 if (v->ops == NULL) return(-1);
5404
5405 /* enable/disable operation */
5406 if (tf)
5407 v->ops->nvscaleaddmulti = N_VScaleAddMulti_Pthreads;
5408 else
5409 v->ops->nvscaleaddmulti = NULL;
5410
5411 /* return success */
5412 return(0);
5413 }
5414
N_VEnableDotProdMulti_Pthreads(N_Vector v,booleantype tf)5415 int N_VEnableDotProdMulti_Pthreads(N_Vector v, booleantype tf)
5416 {
5417 /* check that vector is non-NULL */
5418 if (v == NULL) return(-1);
5419
5420 /* check that ops structure is non-NULL */
5421 if (v->ops == NULL) return(-1);
5422
5423 /* enable/disable operation */
5424 if (tf)
5425 v->ops->nvdotprodmulti = N_VDotProdMulti_Pthreads;
5426 else
5427 v->ops->nvdotprodmulti = NULL;
5428
5429 /* return success */
5430 return(0);
5431 }
5432
N_VEnableLinearSumVectorArray_Pthreads(N_Vector v,booleantype tf)5433 int N_VEnableLinearSumVectorArray_Pthreads(N_Vector v, booleantype tf)
5434 {
5435 /* check that vector is non-NULL */
5436 if (v == NULL) return(-1);
5437
5438 /* check that ops structure is non-NULL */
5439 if (v->ops == NULL) return(-1);
5440
5441 /* enable/disable operation */
5442 if (tf)
5443 v->ops->nvlinearsumvectorarray = N_VLinearSumVectorArray_Pthreads;
5444 else
5445 v->ops->nvlinearsumvectorarray = NULL;
5446
5447 /* return success */
5448 return(0);
5449 }
5450
N_VEnableScaleVectorArray_Pthreads(N_Vector v,booleantype tf)5451 int N_VEnableScaleVectorArray_Pthreads(N_Vector v, booleantype tf)
5452 {
5453 /* check that vector is non-NULL */
5454 if (v == NULL) return(-1);
5455
5456 /* check that ops structure is non-NULL */
5457 if (v->ops == NULL) return(-1);
5458
5459 /* enable/disable operation */
5460 if (tf)
5461 v->ops->nvscalevectorarray = N_VScaleVectorArray_Pthreads;
5462 else
5463 v->ops->nvscalevectorarray = NULL;
5464
5465 /* return success */
5466 return(0);
5467 }
5468
N_VEnableConstVectorArray_Pthreads(N_Vector v,booleantype tf)5469 int N_VEnableConstVectorArray_Pthreads(N_Vector v, booleantype tf)
5470 {
5471 /* check that vector is non-NULL */
5472 if (v == NULL) return(-1);
5473
5474 /* check that ops structure is non-NULL */
5475 if (v->ops == NULL) return(-1);
5476
5477 /* enable/disable operation */
5478 if (tf)
5479 v->ops->nvconstvectorarray = N_VConstVectorArray_Pthreads;
5480 else
5481 v->ops->nvconstvectorarray = NULL;
5482
5483 /* return success */
5484 return(0);
5485 }
5486
N_VEnableWrmsNormVectorArray_Pthreads(N_Vector v,booleantype tf)5487 int N_VEnableWrmsNormVectorArray_Pthreads(N_Vector v, booleantype tf)
5488 {
5489 /* check that vector is non-NULL */
5490 if (v == NULL) return(-1);
5491
5492 /* check that ops structure is non-NULL */
5493 if (v->ops == NULL) return(-1);
5494
5495 /* enable/disable operation */
5496 if (tf)
5497 v->ops->nvwrmsnormvectorarray = N_VWrmsNormVectorArray_Pthreads;
5498 else
5499 v->ops->nvwrmsnormvectorarray = NULL;
5500
5501 /* return success */
5502 return(0);
5503 }
5504
N_VEnableWrmsNormMaskVectorArray_Pthreads(N_Vector v,booleantype tf)5505 int N_VEnableWrmsNormMaskVectorArray_Pthreads(N_Vector v, booleantype tf)
5506 {
5507 /* check that vector is non-NULL */
5508 if (v == NULL) return(-1);
5509
5510 /* check that ops structure is non-NULL */
5511 if (v->ops == NULL) return(-1);
5512
5513 /* enable/disable operation */
5514 if (tf)
5515 v->ops->nvwrmsnormmaskvectorarray = N_VWrmsNormMaskVectorArray_Pthreads;
5516 else
5517 v->ops->nvwrmsnormmaskvectorarray = NULL;
5518
5519 /* return success */
5520 return(0);
5521 }
5522
N_VEnableScaleAddMultiVectorArray_Pthreads(N_Vector v,booleantype tf)5523 int N_VEnableScaleAddMultiVectorArray_Pthreads(N_Vector v, booleantype tf)
5524 {
5525 /* check that vector is non-NULL */
5526 if (v == NULL) return(-1);
5527
5528 /* check that ops structure is non-NULL */
5529 if (v->ops == NULL) return(-1);
5530
5531 /* enable/disable operation */
5532 if (tf)
5533 v->ops->nvscaleaddmultivectorarray = N_VScaleAddMultiVectorArray_Pthreads;
5534 else
5535 v->ops->nvscaleaddmultivectorarray = NULL;
5536
5537 /* return success */
5538 return(0);
5539 }
5540
N_VEnableLinearCombinationVectorArray_Pthreads(N_Vector v,booleantype tf)5541 int N_VEnableLinearCombinationVectorArray_Pthreads(N_Vector v, booleantype tf)
5542 {
5543 /* check that vector is non-NULL */
5544 if (v == NULL) return(-1);
5545
5546 /* check that ops structure is non-NULL */
5547 if (v->ops == NULL) return(-1);
5548
5549 /* enable/disable operation */
5550 if (tf)
5551 v->ops->nvlinearcombinationvectorarray = N_VLinearCombinationVectorArray_Pthreads;
5552 else
5553 v->ops->nvlinearcombinationvectorarray = NULL;
5554
5555 /* return success */
5556 return(0);
5557 }
5558