1 /* -----------------------------------------------------------------
2  * Programmer(s): David J. Gardner @ LLNL
3  * -----------------------------------------------------------------
4  * Acknowledgements: This NVECTOR module is based on the NVECTOR
5  *                   Serial module by Scott D. Cohen, Alan C.
6  *                   Hindmarsh, Radu Serban, and Aaron Collier
7  *                   @ LLNL
8  * -----------------------------------------------------------------
9  * SUNDIALS Copyright Start
10  * Copyright (c) 2002-2020, Lawrence Livermore National Security
11  * and Southern Methodist University.
12  * All rights reserved.
13  *
14  * See the top-level LICENSE and NOTICE files for details.
15  *
16  * SPDX-License-Identifier: BSD-3-Clause
17  * SUNDIALS Copyright End
18  * -----------------------------------------------------------------
19  * This is the implementation file for a POSIX Threads (Pthreads)
20  * implementation of the NVECTOR package using a LOCAL array of
21  * structures to pass data to threads.
22  * -----------------------------------------------------------------*/
23 
24 #include <stdio.h>
25 #include <stdlib.h>
26 
27 #include <nvector/nvector_pthreads.h>
28 #include <sundials/sundials_math.h>
29 #include <math.h> /* define NAN */
30 
31 #define ZERO   RCONST(0.0)
32 #define HALF   RCONST(0.5)
33 #define ONE    RCONST(1.0)
34 #define ONEPT5 RCONST(1.5)
35 
36 /* Private functions for special cases of vector operations */
37 static void VCopy_Pthreads(N_Vector x, N_Vector z);                              /* z=x       */
38 static void VSum_Pthreads(N_Vector x, N_Vector y, N_Vector z);                   /* z=x+y     */
39 static void VDiff_Pthreads(N_Vector x, N_Vector y, N_Vector z);                  /* z=x-y     */
40 static void VNeg_Pthreads(N_Vector x, N_Vector z);                               /* z=-x      */
41 static void VScaleSum_Pthreads(realtype c, N_Vector x, N_Vector y, N_Vector z);  /* z=c(x+y)  */
42 static void VScaleDiff_Pthreads(realtype c, N_Vector x, N_Vector y, N_Vector z); /* z=c(x-y)  */
43 static void VLin1_Pthreads(realtype a, N_Vector x, N_Vector y, N_Vector z);      /* z=ax+y    */
44 static void VLin2_Pthreads(realtype a, N_Vector x, N_Vector y, N_Vector z);      /* z=ax-y    */
45 static void Vaxpy_Pthreads(realtype a, N_Vector x, N_Vector y);                  /* y <- ax+y */
46 static void VScaleBy_Pthreads(realtype a, N_Vector x);                           /* x <- ax   */
47 
48 /* Private functions for special cases of vector array operations */
49 static int VSumVectorArray_Pthreads(int nvec, N_Vector* X, N_Vector* Y, N_Vector* Z);                   /* Z=X+Y     */
50 static int VDiffVectorArray_Pthreads(int nvec, N_Vector* X, N_Vector* Y, N_Vector* Z);                  /* Z=X-Y     */
51 static int VScaleSumVectorArray_Pthreads(int nvec, realtype c, N_Vector* X, N_Vector* Y, N_Vector* Z);  /* Z=c(X+Y)  */
52 static int VScaleDiffVectorArray_Pthreads(int nvec, realtype c, N_Vector* X, N_Vector* Y, N_Vector* Z); /* Z=c(X-Y)  */
53 static int VLin1VectorArray_Pthreads(int nvec, realtype a, N_Vector* X, N_Vector* Y, N_Vector* Z);      /* Z=aX+Y    */
54 static int VLin2VectorArray_Pthreads(int nvec, realtype a, N_Vector* X, N_Vector* Y, N_Vector* Z);      /* Z=aX-Y    */
55 static int VaxpyVectorArray_Pthreads(int nvec, realtype a, N_Vector* X, N_Vector* Y);                    /* Y <- aX+Y */
56 
57 /* Pthread companion functions for vector operations */
58 static void *N_VLinearSum_PT(void *thread_data);
59 static void *N_VConst_PT(void *thread_data);
60 static void *N_VProd_PT(void *thread_data);
61 static void *N_VDiv_PT(void *thread_data);
62 static void *N_VScale_PT(void *thread_data);
63 static void *N_VAbs_PT(void *thread_data);
64 static void *N_VInv_PT(void *thread_data);
65 static void *N_VAddConst_PT(void *thread_data);
66 static void *N_VCompare_PT(void *thread_data);
67 static void *N_VDotProd_PT(void *thread_data);
68 static void *N_VMaxNorm_PT(void *thread_data);
69 static void *N_VWSqrSum_PT(void *thread_data);
70 static void *N_VMin_PT(void *thread_data);
71 static void *N_VWL2Norm_PT(void *thread_data);
72 static void *N_VL1Norm_PT(void *thread_data);
73 static void *N_VInvTest_PT(void *thread_data);
74 static void *N_VWSqrSumMask_PT(void *thread_data);
75 static void *N_VConstrMask_PT(void *thread_data);
76 static void *N_VMinQuotient_PT(void *thread_data);
77 
78 /* Pthread companion functions special cases of vector operations */
79 static void *VCopy_PT(void *thread_data);
80 static void *VSum_PT(void *thread_data);
81 static void *VDiff_PT(void *thread_data);
82 static void *VNeg_PT(void *thread_data);
83 static void *VScaleSum_PT(void *thread_data);
84 static void *VScaleDiff_PT(void *thread_data);
85 static void *VLin1_PT(void *thread_data);
86 static void *VLin2_PT(void *thread_data);
87 static void *VScaleBy_PT(void *thread_data);
88 static void *Vaxpy_PT(void *thread_data);
89 
90 /* Pthread companion functions for fused vector operations */
91 static void *N_VLinearCombination_PT(void *thread_data);
92 static void *N_VScaleAddMulti_PT(void *thread_data);
93 static void *N_VDotProdMulti_PT(void *thread_data);
94 
95 /* Pthread companion functions for vector array operations */
96 static void *N_VLinearSumVectorArray_PT(void *thread_data);
97 static void *N_VScaleVectorArray_PT(void *thread_data);
98 static void *N_VConstVectorArray_PT(void *thread_data);
99 static void *N_VWrmsNormVectorArray_PT(void *thread_data);
100 static void *N_VWrmsNormMaskVectorArray_PT(void *thread_data);
101 static void *N_VScaleAddMultiVectorArray_PT(void *thread_data);
102 static void *N_VLinearCombinationVectorArray_PT(void *thread_data);
103 
104 /* Pthread companion functions special cases of vector array operations */
105 static void *VSumVectorArray_PT(void *thread_data);
106 static void *VDiffVectorArray_PT(void *thread_data);
107 static void *VScaleSumVectorArray_PT(void *thread_data);
108 static void *VScaleDiffVectorArray_PT(void *thread_data);
109 static void *VLin1VectorArray_PT(void *thread_data);
110 static void *VLin2VectorArray_PT(void *thread_data);
111 static void *VaxpyVectorArray_PT(void *thread_data);
112 
113 /* Function to determine loop values for threads */
114 static void N_VSplitLoop(int myid, int *nthreads, sunindextype *N,
115                          sunindextype *start, sunindextype *end);
116 
117 /* Function to initialize thread data */
118 static void N_VInitThreadData(Pthreads_Data *thread_data);
119 
120 /*
121  * -----------------------------------------------------------------
122  * exported functions
123  * -----------------------------------------------------------------
124  */
125 
126 /* ----------------------------------------------------------------
127  * Returns vector type ID. Used to identify vector implementation
128  * from abstract N_Vector interface.
129  */
N_VGetVectorID_Pthreads(N_Vector v)130 N_Vector_ID N_VGetVectorID_Pthreads(N_Vector v)
131 {
132   return SUNDIALS_NVEC_PTHREADS;
133 }
134 
135 /* ----------------------------------------------------------------------------
136  * Function to create a new empty vector
137  */
138 
N_VNewEmpty_Pthreads(sunindextype length,int num_threads)139 N_Vector N_VNewEmpty_Pthreads(sunindextype length, int num_threads)
140 {
141   N_Vector v;
142   N_VectorContent_Pthreads content;
143 
144   /* Create an empty vector object */
145   v = NULL;
146   v = N_VNewEmpty();
147   if (v == NULL) return(NULL);
148 
149   /* Attach operations */
150 
151   /* constructors, destructors, and utility operations */
152   v->ops->nvgetvectorid     = N_VGetVectorID_Pthreads;
153   v->ops->nvclone           = N_VClone_Pthreads;
154   v->ops->nvcloneempty      = N_VCloneEmpty_Pthreads;
155   v->ops->nvdestroy         = N_VDestroy_Pthreads;
156   v->ops->nvspace           = N_VSpace_Pthreads;
157   v->ops->nvgetarraypointer = N_VGetArrayPointer_Pthreads;
158   v->ops->nvsetarraypointer = N_VSetArrayPointer_Pthreads;
159   v->ops->nvgetlength       = N_VGetLength_Pthreads;
160 
161   /* standard vector operations */
162   v->ops->nvlinearsum    = N_VLinearSum_Pthreads;
163   v->ops->nvconst        = N_VConst_Pthreads;
164   v->ops->nvprod         = N_VProd_Pthreads;
165   v->ops->nvdiv          = N_VDiv_Pthreads;
166   v->ops->nvscale        = N_VScale_Pthreads;
167   v->ops->nvabs          = N_VAbs_Pthreads;
168   v->ops->nvinv          = N_VInv_Pthreads;
169   v->ops->nvaddconst     = N_VAddConst_Pthreads;
170   v->ops->nvdotprod      = N_VDotProd_Pthreads;
171   v->ops->nvmaxnorm      = N_VMaxNorm_Pthreads;
172   v->ops->nvwrmsnormmask = N_VWrmsNormMask_Pthreads;
173   v->ops->nvwrmsnorm     = N_VWrmsNorm_Pthreads;
174   v->ops->nvmin          = N_VMin_Pthreads;
175   v->ops->nvwl2norm      = N_VWL2Norm_Pthreads;
176   v->ops->nvl1norm       = N_VL1Norm_Pthreads;
177   v->ops->nvcompare      = N_VCompare_Pthreads;
178   v->ops->nvinvtest      = N_VInvTest_Pthreads;
179   v->ops->nvconstrmask   = N_VConstrMask_Pthreads;
180   v->ops->nvminquotient  = N_VMinQuotient_Pthreads;
181 
182   /* fused and vector array operations are disabled (NULL) by default */
183 
184   /* local reduction operations */
185   v->ops->nvdotprodlocal     = N_VDotProd_Pthreads;
186   v->ops->nvmaxnormlocal     = N_VMaxNorm_Pthreads;
187   v->ops->nvminlocal         = N_VMin_Pthreads;
188   v->ops->nvl1normlocal      = N_VL1Norm_Pthreads;
189   v->ops->nvinvtestlocal     = N_VInvTest_Pthreads;
190   v->ops->nvconstrmasklocal  = N_VConstrMask_Pthreads;
191   v->ops->nvminquotientlocal = N_VMinQuotient_Pthreads;
192   v->ops->nvwsqrsumlocal     = N_VWSqrSumLocal_Pthreads;
193   v->ops->nvwsqrsummasklocal = N_VWSqrSumMaskLocal_Pthreads;
194 
195   /* Create content */
196   content = NULL;
197   content = (N_VectorContent_Pthreads) malloc(sizeof *content);
198   if (content == NULL) { N_VDestroy(v); return(NULL); }
199 
200   /* Attach content */
201   v->content = content;
202 
203   /* Initialize content */
204   content->length      = length;
205   content->num_threads = num_threads;
206   content->own_data    = SUNFALSE;
207   content->data        = NULL;
208 
209   return(v);
210 }
211 
212 /* ----------------------------------------------------------------------------
213  * Function to create a new vector
214  */
215 
N_VNew_Pthreads(sunindextype length,int num_threads)216 N_Vector N_VNew_Pthreads(sunindextype length, int num_threads)
217 {
218   N_Vector v;
219   realtype *data;
220 
221   v = NULL;
222   v = N_VNewEmpty_Pthreads(length, num_threads);
223   if (v == NULL) return(NULL);
224 
225   /* Create data */
226   if (length > 0) {
227 
228     /* Allocate memory */
229     data = NULL;
230     data = (realtype *) malloc(length * sizeof(realtype));
231     if(data == NULL) { N_VDestroy_Pthreads(v); return(NULL); }
232 
233     /* Attach data */
234     NV_OWN_DATA_PT(v) = SUNTRUE;
235     NV_DATA_PT(v)     = data;
236 
237   }
238 
239   return(v);
240 }
241 
242 /* ----------------------------------------------------------------------------
243  * Function to create a vector with user data component
244  */
245 
N_VMake_Pthreads(sunindextype length,int num_threads,realtype * v_data)246 N_Vector N_VMake_Pthreads(sunindextype length, int num_threads, realtype *v_data)
247 {
248   N_Vector v;
249 
250   v = NULL;
251   v = N_VNewEmpty_Pthreads(length, num_threads);
252   if (v == NULL) return(NULL);
253 
254   if (length > 0) {
255     /* Attach data */
256     NV_OWN_DATA_PT(v) = SUNFALSE;
257     NV_DATA_PT(v)     = v_data;
258   }
259 
260   return(v);
261 }
262 
263 /* ----------------------------------------------------------------------------
264  * Function to create an array of new vectors.
265  */
266 
N_VCloneVectorArray_Pthreads(int count,N_Vector w)267 N_Vector* N_VCloneVectorArray_Pthreads(int count, N_Vector w)
268 {
269   N_Vector* vs;
270   int j;
271 
272   if (count <= 0) return(NULL);
273 
274   vs = NULL;
275   vs = (N_Vector*) malloc(count * sizeof(N_Vector));
276   if(vs == NULL) return(NULL);
277 
278   for (j = 0; j < count; j++) {
279     vs[j] = NULL;
280     vs[j] = N_VClone_Pthreads(w);
281     if (vs[j] == NULL) {
282       N_VDestroyVectorArray_Pthreads(vs, j-1);
283       return(NULL);
284     }
285   }
286 
287   return(vs);
288 }
289 
290 /* ----------------------------------------------------------------------------
291  * Function to create an array of new vectors with NULL data array.
292  */
293 
N_VCloneVectorArrayEmpty_Pthreads(int count,N_Vector w)294 N_Vector* N_VCloneVectorArrayEmpty_Pthreads(int count, N_Vector w)
295 {
296   N_Vector* vs;
297   int j;
298 
299   if (count <= 0) return(NULL);
300 
301   vs = NULL;
302   vs = (N_Vector*) malloc(count * sizeof(N_Vector));
303   if(vs == NULL) return(NULL);
304 
305   for (j = 0; j < count; j++) {
306     vs[j] = NULL;
307     vs[j] = N_VCloneEmpty_Pthreads(w);
308     if (vs[j] == NULL) {
309       N_VDestroyVectorArray_Pthreads(vs, j-1);
310       return(NULL);
311     }
312   }
313 
314   return(vs);
315 }
316 
317 /* ----------------------------------------------------------------------------
318  * Function to free an array created with N_VCloneVectorArray_Pthreads
319  */
320 
N_VDestroyVectorArray_Pthreads(N_Vector * vs,int count)321 void N_VDestroyVectorArray_Pthreads(N_Vector* vs, int count)
322 {
323   int j;
324 
325   for (j = 0; j < count; j++) N_VDestroy_Pthreads(vs[j]);
326 
327   free(vs); vs = NULL;
328 
329   return;
330 }
331 
332 /* ----------------------------------------------------------------------------
333  * Function to return number of vector elements
334  */
N_VGetLength_Pthreads(N_Vector v)335 sunindextype N_VGetLength_Pthreads(N_Vector v)
336 {
337   return NV_LENGTH_PT(v);
338 }
339 
340 /* ----------------------------------------------------------------------------
341  * Function to print a vector to stdout
342  */
343 
N_VPrint_Pthreads(N_Vector x)344 void N_VPrint_Pthreads(N_Vector x)
345 {
346   N_VPrintFile_Pthreads(x, stdout);
347 }
348 
349 /* ----------------------------------------------------------------------------
350  * Function to print a vector to outfile
351  */
352 
N_VPrintFile_Pthreads(N_Vector x,FILE * outfile)353 void N_VPrintFile_Pthreads(N_Vector x, FILE *outfile)
354 {
355   sunindextype i, N;
356   realtype *xd;
357 
358   xd = NULL;
359 
360   N  = NV_LENGTH_PT(x);
361   xd = NV_DATA_PT(x);
362 
363   for (i = 0; i < N; i++) {
364 #if defined(SUNDIALS_EXTENDED_PRECISION)
365     fprintf(outfile, "%11.8Lg\n", xd[i]);
366 #elif defined(SUNDIALS_DOUBLE_PRECISION)
367     fprintf(outfile, "%11.8g\n", xd[i]);
368 #else
369     fprintf(outfile, "%11.8g\n", xd[i]);
370 #endif
371   }
372   fprintf(outfile, "\n");
373 
374   return;
375 }
376 
377 /*
378  * -----------------------------------------------------------------
379  * implementation of vector operations
380  * -----------------------------------------------------------------
381  */
382 
383 /* ----------------------------------------------------------------------------
384  * Create new vector from existing vector without attaching data
385  */
386 
N_VCloneEmpty_Pthreads(N_Vector w)387 N_Vector N_VCloneEmpty_Pthreads(N_Vector w)
388 {
389   N_Vector v;
390   N_VectorContent_Pthreads content;
391 
392   if (w == NULL) return(NULL);
393 
394   /* Create vector */
395   v = NULL;
396   v = N_VNewEmpty();
397   if (v == NULL) return(NULL);
398 
399   /* Attach operations */
400   if (N_VCopyOps(w, v)) { N_VDestroy(v); return(NULL); }
401 
402   /* Create content */
403   content = NULL;
404   content = (N_VectorContent_Pthreads) malloc(sizeof *content);
405   if (content == NULL) { N_VDestroy(v); return(NULL); }
406 
407   /* Attach content */
408   v->content = content;
409 
410   /* Initialize content */
411   content->length      = NV_LENGTH_PT(w);
412   content->num_threads = NV_NUM_THREADS_PT(w);
413   content->own_data    = SUNFALSE;
414   content->data        = NULL;
415 
416   return(v);
417 }
418 
419 
420 /* ----------------------------------------------------------------------------
421  * Create new vector from existing vector and attach data
422  */
423 
N_VClone_Pthreads(N_Vector w)424 N_Vector N_VClone_Pthreads(N_Vector w)
425 {
426   N_Vector v;
427   realtype *data;
428   sunindextype length;
429 
430   v = NULL;
431   v = N_VCloneEmpty_Pthreads(w);
432   if (v == NULL) return(NULL);
433 
434   length = NV_LENGTH_PT(w);
435 
436   /* Create data */
437   if (length > 0) {
438 
439     /* Allocate memory */
440     data = NULL;
441     data = (realtype *) malloc(length * sizeof(realtype));
442     if(data == NULL) { N_VDestroy_Pthreads(v); return(NULL); }
443 
444     /* Attach data */
445     NV_OWN_DATA_PT(v) = SUNTRUE;
446     NV_DATA_PT(v)     = data;
447 
448   }
449 
450   return(v);
451 }
452 
453 /* ----------------------------------------------------------------------------
454  * Destroy vector and free vector memory
455  */
456 
N_VDestroy_Pthreads(N_Vector v)457 void N_VDestroy_Pthreads(N_Vector v)
458 {
459   if (v == NULL) return;
460 
461   /* free content */
462   if (v->content != NULL) {
463     if (NV_OWN_DATA_PT(v) && NV_DATA_PT(v) != NULL) {
464       free(NV_DATA_PT(v));
465       NV_DATA_PT(v) = NULL;
466     }
467     free(v->content);
468     v->content = NULL;
469   }
470 
471   /* free ops and vector */
472   if (v->ops != NULL) { free(v->ops); v->ops = NULL; }
473   free(v); v = NULL;
474 
475   return;
476 }
477 
478 /* ----------------------------------------------------------------------------
479  * Get storage requirement for vector
480  */
481 
N_VSpace_Pthreads(N_Vector v,sunindextype * lrw,sunindextype * liw)482 void N_VSpace_Pthreads(N_Vector v, sunindextype *lrw, sunindextype *liw)
483 {
484   *lrw = NV_LENGTH_PT(v);
485   *liw = 1;
486 
487   return;
488 }
489 
490 
491 /* ----------------------------------------------------------------------------
492  * Get vector data pointer
493  */
494 
N_VGetArrayPointer_Pthreads(N_Vector v)495 realtype *N_VGetArrayPointer_Pthreads(N_Vector v)
496 {
497   return((realtype *) NV_DATA_PT(v));
498 }
499 
500 
501 /* ----------------------------------------------------------------------------
502  * Set vector data pointer
503  */
504 
N_VSetArrayPointer_Pthreads(realtype * v_data,N_Vector v)505 void N_VSetArrayPointer_Pthreads(realtype *v_data, N_Vector v)
506 {
507   if (NV_LENGTH_PT(v) > 0) NV_DATA_PT(v) = v_data;
508 
509   return;
510 }
511 
512 
513 /* ----------------------------------------------------------------------------
514  * Compute linear sum z[i] = a*x[i]+b*y[i]
515  */
516 
N_VLinearSum_Pthreads(realtype a,N_Vector x,realtype b,N_Vector y,N_Vector z)517 void N_VLinearSum_Pthreads(realtype a, N_Vector x, realtype b, N_Vector y, N_Vector z)
518 {
519   sunindextype   N;
520   int            i, nthreads;
521   pthread_t      *threads;
522   Pthreads_Data  *thread_data;
523   pthread_attr_t attr;
524 
525   realtype c;
526   N_Vector v1, v2;
527   booleantype test;
528 
529   if ((b == ONE) && (z == y)) {    /* BLAS usage: axpy y <- ax+y */
530     Vaxpy_Pthreads(a,x,y);
531     return;
532   }
533 
534   if ((a == ONE) && (z == x)) {    /* BLAS usage: axpy x <- by+x */
535     Vaxpy_Pthreads(b,y,x);
536     return;
537   }
538 
539   /* Case: a == b == 1.0 */
540 
541   if ((a == ONE) && (b == ONE)) {
542     VSum_Pthreads(x, y, z);
543     return;
544   }
545 
546   /* Cases: (1) a == 1.0, b = -1.0, (2) a == -1.0, b == 1.0 */
547 
548   if ((test = ((a == ONE) && (b == -ONE))) || ((a == -ONE) && (b == ONE))) {
549     v1 = test ? y : x;
550     v2 = test ? x : y;
551     VDiff_Pthreads(v2, v1, z);
552     return;
553   }
554 
555   /* Cases: (1) a == 1.0, b == other or 0.0, (2) a == other or 0.0, b == 1.0 */
556   /* if a or b is 0.0, then user should have called N_VScale */
557 
558   if ((test = (a == ONE)) || (b == ONE)) {
559     c  = test ? b : a;
560     v1 = test ? y : x;
561     v2 = test ? x : y;
562     VLin1_Pthreads(c, v1, v2, z);
563     return;
564   }
565 
566   /* Cases: (1) a == -1.0, b != 1.0, (2) a != 1.0, b == -1.0 */
567 
568   if ((test = (a == -ONE)) || (b == -ONE)) {
569     c = test ? b : a;
570     v1 = test ? y : x;
571     v2 = test ? x : y;
572     VLin2_Pthreads(c, v1, v2, z);
573     return;
574   }
575 
576   /* Case: a == b */
577   /* catches case both a and b are 0.0 - user should have called N_VConst */
578 
579   if (a == b) {
580     VScaleSum_Pthreads(a, x, y, z);
581     return;
582   }
583 
584   /* Case: a == -b */
585 
586   if (a == -b) {
587     VScaleDiff_Pthreads(a, x, y, z);
588     return;
589   }
590 
591   /* Do all cases not handled above:
592      (1) a == other, b == 0.0 - user should have called N_VScale
593      (2) a == 0.0, b == other - user should have called N_VScale
594      (3) a,b == other, a !=b, a != -b */
595 
596   /* allocate threads and thread data structs */
597   N            = NV_LENGTH_PT(x);
598   nthreads     = NV_NUM_THREADS_PT(x);
599   threads      = (pthread_t *) malloc(nthreads*sizeof(pthread_t));
600   thread_data  = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
601 
602   /* set thread attributes */
603   pthread_attr_init(&attr);
604   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
605 
606   for (i=0; i<nthreads; i++) {
607     /* initialize thread data */
608     N_VInitThreadData(&thread_data[i]);
609 
610     /* compute start and end loop index for thread */
611     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
612 
613     /* pack thread data */
614     thread_data[i].c1 = a;
615     thread_data[i].c2 = b;
616     thread_data[i].v1 = NV_DATA_PT(x);
617     thread_data[i].v2 = NV_DATA_PT(y);
618     thread_data[i].v3 = NV_DATA_PT(z);
619 
620     /* create threads and call pthread companion function */
621     pthread_create(&threads[i], &attr, N_VLinearSum_PT, (void *) &thread_data[i]);
622   }
623 
624   /* wait for all threads to finish */
625   for (i=0; i<nthreads; i++) {
626     pthread_join(threads[i], NULL);
627   }
628 
629   /* clean up and return */
630   pthread_attr_destroy(&attr);
631   free(threads);
632   free(thread_data);
633 
634   return;
635 }
636 
637 /* ----------------------------------------------------------------------------
638  * Pthread companion function to N_VLinearSum
639  */
640 
N_VLinearSum_PT(void * thread_data)641 static void *N_VLinearSum_PT(void *thread_data)
642 {
643   sunindextype i, start, end;
644   realtype a, b;
645   realtype *xd, *yd, *zd;
646   Pthreads_Data *my_data;
647 
648   /* extract thread data */
649   my_data = (Pthreads_Data *) thread_data;
650 
651   a  = my_data->c1;
652   b  = my_data->c2;
653   xd = my_data->v1;
654   yd = my_data->v2;
655   zd = my_data->v3;
656 
657   start = my_data->start;
658   end   = my_data->end;
659 
660   /* compute linear sum */
661   for (i = start; i < end; i++){
662     zd[i] = (a*xd[i])+(b*yd[i]);
663   }
664 
665   /* exit */
666   pthread_exit(NULL);
667 }
668 
669 
670 /* ----------------------------------------------------------------------------
671  * Assigns constant value to all vector elements, z[i] = c
672  */
673 
N_VConst_Pthreads(realtype c,N_Vector z)674 void N_VConst_Pthreads(realtype c, N_Vector z)
675 {
676   sunindextype   N;
677   int            i, nthreads;
678   pthread_t      *threads;
679   Pthreads_Data  *thread_data;
680   pthread_attr_t attr;
681 
682   /* allocate threads and thread data structs */
683   N            = NV_LENGTH_PT(z);
684   nthreads     = NV_NUM_THREADS_PT(z);
685   threads      = malloc(nthreads*sizeof(pthread_t));
686   thread_data  = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
687 
688   /* set thread attributes */
689   pthread_attr_init(&attr);
690   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
691 
692   for (i=0; i<nthreads; i++) {
693     /* initialize thread data */
694     N_VInitThreadData(&thread_data[i]);
695 
696     /* compute start and end loop index for thread */
697     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
698 
699     /* pack thread data */
700     thread_data[i].c1 = c;
701     thread_data[i].v1 = NV_DATA_PT(z);
702 
703     /* create threads and call pthread companion function */
704     pthread_create(&threads[i], &attr, N_VConst_PT, (void *) &thread_data[i]);
705   }
706 
707   /* wait for all threads to finish */
708   for (i=0; i<nthreads; i++) {
709     pthread_join(threads[i], NULL);
710   }
711 
712   /* clean up and return */
713   pthread_attr_destroy(&attr);
714   free(threads);
715   free(thread_data);
716 
717   return;
718 }
719 
720 
721 /* ----------------------------------------------------------------------------
722  * Pthread companion function to N_VConst
723  */
724 
N_VConst_PT(void * thread_data)725 static void *N_VConst_PT(void *thread_data)
726 {
727   sunindextype i, start, end;
728   realtype c;
729   realtype *zd;
730   Pthreads_Data *my_data;
731 
732   /* extract thread data */
733   my_data = (Pthreads_Data *) thread_data;
734 
735   c  = my_data->c1;
736   zd = my_data->v1;
737 
738   start = my_data->start;
739   end   = my_data->end;
740 
741   /* assign constant values */
742   for (i = start; i < end; i++)
743     zd[i] = c;
744 
745   /* exit */
746   pthread_exit(NULL);
747 }
748 
749 
750 /* ----------------------------------------------------------------------------
751  * Compute componentwise product z[i] = x[i]*y[i]
752  */
753 
N_VProd_Pthreads(N_Vector x,N_Vector y,N_Vector z)754 void N_VProd_Pthreads(N_Vector x, N_Vector y, N_Vector z)
755 {
756   sunindextype   N;
757   int            i, nthreads;
758   pthread_t      *threads;
759   Pthreads_Data  *thread_data;
760   pthread_attr_t attr;
761 
762   /* allocate threads and thread data structs */
763   N            = NV_LENGTH_PT(x);
764   nthreads     = NV_NUM_THREADS_PT(x);
765   threads      = malloc(nthreads*sizeof(pthread_t));
766   thread_data  = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
767 
768   /* set thread attributes */
769   pthread_attr_init(&attr);
770   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
771 
772   for (i=0; i<nthreads; i++) {
773     /* initialize thread data */
774     N_VInitThreadData(&thread_data[i]);
775 
776     /* compute start and end loop index for thread */
777     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
778 
779     /* pack thread data */
780     thread_data[i].v1 = NV_DATA_PT(x);
781     thread_data[i].v2 = NV_DATA_PT(y);
782     thread_data[i].v3 = NV_DATA_PT(z);
783 
784     /* create threads and call pthread companion function */
785     pthread_create(&threads[i], &attr, N_VProd_PT, (void *) &thread_data[i]);
786   }
787 
788   /* wait for all threads to finish */
789   for (i=0; i<nthreads; i++) {
790     pthread_join(threads[i], NULL);
791   }
792 
793   /* clean up and exit */
794   pthread_attr_destroy(&attr);
795   free(threads);
796   free(thread_data);
797 
798   return;
799 }
800 
801 /* ----------------------------------------------------------------------------
802  * Pthread companion function to N_VProd
803  */
804 
N_VProd_PT(void * thread_data)805 static void *N_VProd_PT(void *thread_data)
806 {
807   sunindextype i, start, end;
808   realtype *xd, *yd, *zd;
809   Pthreads_Data *my_data;
810 
811   /* extract thread data */
812   my_data = (Pthreads_Data *) thread_data;
813 
814   xd = my_data->v1;
815   yd = my_data->v2;
816   zd = my_data->v3;
817 
818   start = my_data->start;
819   end   = my_data->end;
820 
821   /* compute componentwise product */
822   for (i = start; i < end; i++)
823     zd[i] = xd[i]*yd[i];
824 
825   /* exit */
826   pthread_exit(NULL);
827 }
828 
829 
830 /* ----------------------------------------------------------------------------
831  * Compute componentwise division z[i] = x[i]/y[i]
832  */
833 
N_VDiv_Pthreads(N_Vector x,N_Vector y,N_Vector z)834 void N_VDiv_Pthreads(N_Vector x, N_Vector y, N_Vector z)
835 {
836   sunindextype   N;
837   int            i, nthreads;
838   pthread_t      *threads;
839   Pthreads_Data  *thread_data;
840   pthread_attr_t attr;
841 
842   /* allocate threads and thread data structs */
843   N            = NV_LENGTH_PT(x);
844   nthreads     = NV_NUM_THREADS_PT(x);
845   threads      = malloc(nthreads*sizeof(pthread_t));
846   thread_data  = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
847 
848   /* set thread attributes */
849   pthread_attr_init(&attr);
850   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
851 
852   for (i=0; i<nthreads; i++) {
853     /* initialize thread data */
854     N_VInitThreadData(&thread_data[i]);
855 
856     /* compute start and end loop index for thread */
857     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
858 
859     /* pack thread data */
860     thread_data[i].v1 = NV_DATA_PT(x);
861     thread_data[i].v2 = NV_DATA_PT(y);
862     thread_data[i].v3 = NV_DATA_PT(z);
863 
864     /* create threads and call pthread companion function */
865     pthread_create(&threads[i], &attr, N_VDiv_PT, (void *) &thread_data[i]);
866   }
867 
868   /* wait for all threads to finish */
869   for (i=0; i<nthreads; i++) {
870     pthread_join(threads[i], NULL);
871   }
872 
873   /* clean up and return */
874   pthread_attr_destroy(&attr);
875   free(threads);
876   free(thread_data);
877 
878   return;
879 }
880 
881 
882 /* ----------------------------------------------------------------------------
883  * Pthread companion function to N_VDiv
884  */
885 
N_VDiv_PT(void * thread_data)886 static void *N_VDiv_PT(void *thread_data)
887 {
888   sunindextype i, start, end;
889   realtype *xd, *yd, *zd;
890   Pthreads_Data *my_data;
891 
892   /* extract thread data */
893   my_data = (Pthreads_Data *) thread_data;
894 
895   xd = my_data->v1;
896   yd = my_data->v2;
897   zd = my_data->v3;
898 
899   start = my_data->start;
900   end   = my_data->end;
901 
902   /* compute componentwise division */
903   for (i = start; i < end; i++)
904     zd[i] = xd[i]/yd[i];
905 
906   /* exit */
907   pthread_exit(NULL);
908 }
909 
910 
911 /* ----------------------------------------------------------------------------
912  * Compute scaler multiplication z[i] = c*x[i]
913  */
914 
N_VScale_Pthreads(realtype c,N_Vector x,N_Vector z)915 void N_VScale_Pthreads(realtype c, N_Vector x, N_Vector z)
916 {
917   sunindextype   N;
918   int            i, nthreads;
919   pthread_t      *threads;
920   Pthreads_Data  *thread_data;
921   pthread_attr_t attr;
922 
923   if (z == x) {  /* BLAS usage: scale x <- cx */
924     VScaleBy_Pthreads(c, x);
925     return;
926   }
927 
928   if (c == ONE) {
929     VCopy_Pthreads(x, z);
930   } else if (c == -ONE) {
931     VNeg_Pthreads(x, z);
932   } else {
933     /* allocate threads and thread data structs */
934     N            = NV_LENGTH_PT(x);
935     nthreads     = NV_NUM_THREADS_PT(x);
936     threads      = malloc(nthreads*sizeof(pthread_t));
937     thread_data  = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
938 
939     /* set thread attributes */
940     pthread_attr_init(&attr);
941     pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
942 
943     for (i=0; i<nthreads; i++) {
944       /* initialize thread data */
945       N_VInitThreadData(&thread_data[i]);
946 
947       /* compute start and end loop index for thread */
948       N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
949 
950       /* pack thread data */
951       thread_data[i].c1 = c;
952       thread_data[i].v1 = NV_DATA_PT(x);
953       thread_data[i].v2 = NV_DATA_PT(z);
954 
955       /* create threads and call pthread companion function */
956       pthread_create(&threads[i], &attr, N_VScale_PT, (void *) &thread_data[i]);
957     }
958 
959     /* wait for all threads to finish */
960     for (i=0; i<nthreads; i++) {
961       pthread_join(threads[i], NULL);
962     }
963 
964     /* clean up */
965     pthread_attr_destroy(&attr);
966     free(threads);
967     free(thread_data);
968   }
969 
970   return;
971 }
972 
973 
974 /* ----------------------------------------------------------------------------
975  * Pthread companion function to N_VScale
976  */
977 
N_VScale_PT(void * thread_data)978 static void *N_VScale_PT(void *thread_data)
979 {
980   sunindextype i, start, end;
981   realtype c;
982   realtype *xd, *zd;
983   Pthreads_Data *my_data;
984 
985   /* extract thread data */
986   my_data = (Pthreads_Data *) thread_data;
987 
988   c  = my_data->c1;
989   xd = my_data->v1;
990   zd = my_data->v2;
991 
992   start = my_data->start;
993   end   = my_data->end;
994 
995   /* compute scaler multiplication */
996   for (i = start; i < end; i++)
997     zd[i] = c*xd[i];
998 
999   /* exit */
1000   pthread_exit(NULL);
1001 }
1002 
1003 
1004 /* ----------------------------------------------------------------------------
1005  * Compute absolute value of vector components z[i] = SUNRabs(x[i])
1006  */
1007 
N_VAbs_Pthreads(N_Vector x,N_Vector z)1008 void N_VAbs_Pthreads(N_Vector x, N_Vector z)
1009 {
1010   sunindextype   N;
1011   int            i, nthreads;
1012   pthread_t      *threads;
1013   Pthreads_Data  *thread_data;
1014   pthread_attr_t attr;
1015 
1016   /* allocate threads and thread data structs */
1017   N            = NV_LENGTH_PT(x);
1018   nthreads     = NV_NUM_THREADS_PT(x);
1019   threads      = malloc(nthreads*sizeof(pthread_t));
1020   thread_data  = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
1021 
1022   /* set thread attributes */
1023   pthread_attr_init(&attr);
1024   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
1025 
1026   for (i=0; i<nthreads; i++) {
1027     /* initialize thread data */
1028     N_VInitThreadData(&thread_data[i]);
1029 
1030     /* compute start and end loop index for thread */
1031     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
1032 
1033     /* pack thread data */
1034     thread_data[i].v1 = NV_DATA_PT(x);
1035     thread_data[i].v2 = NV_DATA_PT(z);
1036 
1037     /* create threads and call pthread companion function */
1038     pthread_create(&threads[i], &attr, N_VAbs_PT, (void *) &thread_data[i]);
1039   }
1040 
1041   /* wait for all threads to finish */
1042   for (i=0; i<nthreads; i++) {
1043     pthread_join(threads[i], NULL);
1044   }
1045 
1046   /* clean up and return */
1047   pthread_attr_destroy(&attr);
1048   free(threads);
1049   free(thread_data);
1050 
1051   return;
1052 }
1053 
1054 
1055 /* ----------------------------------------------------------------------------
1056  * Pthread companion function to N_VAbs
1057  */
1058 
N_VAbs_PT(void * thread_data)1059 static void *N_VAbs_PT(void *thread_data)
1060 {
1061   sunindextype i, start, end;
1062   realtype *xd, *zd;
1063   Pthreads_Data *my_data;
1064 
1065   /* extract thread data */
1066   my_data = (Pthreads_Data *) thread_data;
1067 
1068   xd = my_data->v1;
1069   zd = my_data->v2;
1070 
1071   start = my_data->start;
1072   end   = my_data->end;
1073 
1074   /* compute absolute value of components */
1075   for (i = start; i < end; i++)
1076     zd[i] = SUNRabs(xd[i]);
1077 
1078   /* exit */
1079   pthread_exit(NULL);
1080 }
1081 
1082 
1083 /* ----------------------------------------------------------------------------
1084  * Compute componentwise inverse z[i] = 1 / x[i]
1085  */
1086 
N_VInv_Pthreads(N_Vector x,N_Vector z)1087 void N_VInv_Pthreads(N_Vector x, N_Vector z)
1088 {
1089   sunindextype   N;
1090   int            i, nthreads;
1091   pthread_t      *threads;
1092   Pthreads_Data  *thread_data;
1093   pthread_attr_t attr;
1094 
1095   /* allocate threads and thread data structs */
1096   N            = NV_LENGTH_PT(x);
1097   nthreads     = NV_NUM_THREADS_PT(x);
1098   threads      = malloc(nthreads*sizeof(pthread_t));
1099   thread_data  = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
1100 
1101   /* set thread attributes */
1102   pthread_attr_init(&attr);
1103   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
1104 
1105   for (i=0; i<nthreads; i++) {
1106     /* initialize thread data */
1107     N_VInitThreadData(&thread_data[i]);
1108 
1109     /* compute start and end loop index for thread */
1110     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
1111 
1112     /* pack thread data */
1113     thread_data[i].v1 = NV_DATA_PT(x);
1114     thread_data[i].v2 = NV_DATA_PT(z);
1115 
1116     /* create threads and call pthread companion function */
1117     pthread_create(&threads[i], &attr, N_VInv_PT, (void *) &thread_data[i]);
1118   }
1119 
1120   /* wait for all threads to finish */
1121   for (i=0; i<nthreads; i++) {
1122     pthread_join(threads[i], NULL);
1123   }
1124 
1125   /* clean up and return */
1126   pthread_attr_destroy(&attr);
1127   free(threads);
1128   free(thread_data);
1129 
1130   return;
1131 }
1132 
1133 
1134 /* ----------------------------------------------------------------------------
1135  * Pthread companion function to N_VInv
1136  */
1137 
N_VInv_PT(void * thread_data)1138 static void *N_VInv_PT(void *thread_data)
1139 {
1140   sunindextype i, start, end;
1141   realtype *xd, *zd;
1142   Pthreads_Data *my_data;
1143 
1144   /* extract thread data */
1145   my_data = (Pthreads_Data *) thread_data;
1146 
1147   xd = my_data->v1;
1148   zd = my_data->v2;
1149 
1150   start = my_data->start;
1151   end   = my_data->end;
1152 
1153   /* compute componentwise inverse */
1154   for (i = start; i < end; i++)
1155     zd[i] = ONE/xd[i];
1156 
1157   /* exit */
1158   pthread_exit(NULL);
1159 }
1160 
1161 
1162 /* ----------------------------------------------------------------------------
1163  * Compute componentwise addition of a scaler to a vector z[i] = x[i] + b
1164  */
1165 
N_VAddConst_Pthreads(N_Vector x,realtype b,N_Vector z)1166 void N_VAddConst_Pthreads(N_Vector x, realtype b, N_Vector z)
1167 {
1168   sunindextype   N;
1169   int            i, nthreads;
1170   pthread_t      *threads;
1171   Pthreads_Data  *thread_data;
1172   pthread_attr_t attr;
1173 
1174   /* allocate threads and thread data structs */
1175   N            = NV_LENGTH_PT(x);
1176   nthreads     = NV_NUM_THREADS_PT(x);
1177   threads      = malloc(nthreads*sizeof(pthread_t));
1178   thread_data  = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
1179 
1180   /* set thread attributes */
1181   pthread_attr_init(&attr);
1182   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
1183 
1184   for (i=0; i<nthreads; i++) {
1185     /* initialize thread data */
1186     N_VInitThreadData(&thread_data[i]);
1187 
1188     /* compute start and end loop index for thread */
1189     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
1190 
1191     /* pack thread data */
1192     thread_data[i].c1 = b;
1193     thread_data[i].v1 = NV_DATA_PT(x);
1194     thread_data[i].v2 = NV_DATA_PT(z);
1195 
1196     /* create threads and call pthread companion function */
1197     pthread_create(&threads[i], &attr, N_VAddConst_PT, (void *) &thread_data[i]);
1198   }
1199 
1200   /* wait for all threads to finish */
1201   for (i=0; i<nthreads; i++) {
1202     pthread_join(threads[i], NULL);
1203   }
1204 
1205   /* clean up and return */
1206   pthread_attr_destroy(&attr);
1207   free(threads);
1208   free(thread_data);
1209 
1210   return;
1211 }
1212 
1213 
1214 /* ----------------------------------------------------------------------------
1215  * Pthread companion function to N_VAddConst
1216  */
1217 
N_VAddConst_PT(void * thread_data)1218 static void *N_VAddConst_PT(void *thread_data)
1219 {
1220   sunindextype i, start, end;
1221   realtype b;
1222   realtype *xd, *zd;
1223   Pthreads_Data *my_data;
1224 
1225   /* extract thread data */
1226   my_data = (Pthreads_Data *) thread_data;
1227 
1228   b  = my_data->c1;
1229   xd = my_data->v1;
1230   zd = my_data->v2;
1231 
1232   start = my_data->start;
1233   end   = my_data->end;
1234 
1235   /* compute componentwise constant addition */
1236   for (i = start; i < end; i++)
1237     zd[i] = xd[i] + b;
1238 
1239   /* exit */
1240   pthread_exit(NULL);
1241 }
1242 
1243 
1244 /* ----------------------------------------------------------------------------
1245  * Computes the dot product of two vectors, a = sum(x[i]*y[i])
1246  */
1247 
N_VDotProd_Pthreads(N_Vector x,N_Vector y)1248 realtype N_VDotProd_Pthreads(N_Vector x, N_Vector y)
1249 {
1250   sunindextype    N;
1251   int             i, nthreads;
1252   pthread_t       *threads;
1253   Pthreads_Data   *thread_data;
1254   pthread_attr_t  attr;
1255   pthread_mutex_t global_mutex;
1256   realtype        sum = ZERO;
1257 
1258   /* allocate threads and thread data structs */
1259   N           = NV_LENGTH_PT(x);
1260   nthreads    = NV_NUM_THREADS_PT(x);
1261   threads     = malloc(nthreads*sizeof(pthread_t));
1262   thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
1263 
1264   /* set thread attributes */
1265   pthread_attr_init(&attr);
1266   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
1267 
1268   /* lock for reduction */
1269   pthread_mutex_init(&global_mutex, NULL);
1270 
1271   for (i=0; i<nthreads; i++) {
1272     /* initialize thread data */
1273     N_VInitThreadData(&thread_data[i]);
1274 
1275     /* compute start and end loop index for thread */
1276     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
1277 
1278     /* pack thread data */
1279     thread_data[i].v1 = NV_DATA_PT(x);
1280     thread_data[i].v2 = NV_DATA_PT(y);
1281     thread_data[i].global_val   = &sum;
1282     thread_data[i].global_mutex = &global_mutex;
1283 
1284     /* create threads and call pthread companion function */
1285     pthread_create(&threads[i], &attr, N_VDotProd_PT, (void *) &thread_data[i]);
1286   }
1287 
1288   /* wait for all threads to finish */
1289   for (i=0; i<nthreads; i++) {
1290     pthread_join(threads[i], NULL);
1291   }
1292 
1293   /* clean up and return */
1294   pthread_attr_destroy(&attr);
1295   pthread_mutex_destroy(&global_mutex);
1296   free(threads);
1297   free(thread_data);
1298 
1299   return(sum);
1300 }
1301 
1302 
1303 /* ----------------------------------------------------------------------------
1304  * Pthread companion function to N_VDotProd
1305  */
1306 
N_VDotProd_PT(void * thread_data)1307 static void *N_VDotProd_PT(void *thread_data)
1308 {
1309   sunindextype i, start, end;
1310   realtype *xd, *yd;
1311   realtype local_sum, *global_sum;
1312   Pthreads_Data *my_data;
1313   pthread_mutex_t *global_mutex;
1314 
1315   /* extract thread data */
1316   my_data = (Pthreads_Data *) thread_data;
1317 
1318   xd = my_data->v1;
1319   yd = my_data->v2;
1320 
1321   global_sum   = my_data->global_val;
1322   global_mutex = my_data->global_mutex;
1323 
1324   start = my_data->start;
1325   end   = my_data->end;
1326 
1327   /* compute dot product */
1328   local_sum = ZERO;
1329   for (i = start; i < end; i++)
1330     local_sum += xd[i] * yd[i];
1331 
1332   /* update global sum */
1333   pthread_mutex_lock(global_mutex);
1334   *global_sum += local_sum;
1335   pthread_mutex_unlock(global_mutex);
1336 
1337   /* exit */
1338   pthread_exit(NULL);
1339 }
1340 
1341 
1342 /* ----------------------------------------------------------------------------
1343  * Computes max norm of the vector
1344  */
1345 
N_VMaxNorm_Pthreads(N_Vector x)1346 realtype N_VMaxNorm_Pthreads(N_Vector x)
1347 {
1348   sunindextype    N;
1349   int             i, nthreads;
1350   pthread_t       *threads;
1351   Pthreads_Data   *thread_data;
1352   pthread_attr_t  attr;
1353   pthread_mutex_t global_mutex;
1354   realtype        max = ZERO;
1355 
1356   /* allocate threads and thread data structs */
1357   N            = NV_LENGTH_PT(x);
1358   nthreads     = NV_NUM_THREADS_PT(x);
1359   threads      = malloc(nthreads*sizeof(pthread_t));
1360   thread_data  = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
1361 
1362   /* set thread attributes */
1363   pthread_attr_init(&attr);
1364   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
1365 
1366   /* lock for reduction */
1367   pthread_mutex_init(&global_mutex, NULL);
1368 
1369   for (i=0; i<nthreads; i++) {
1370     /* initialize thread data */
1371     N_VInitThreadData(&thread_data[i]);
1372 
1373     /* compute start and end loop index for thread */
1374     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
1375 
1376     /* pack thread data */
1377     thread_data[i].v1 = NV_DATA_PT(x);
1378     thread_data[i].global_val   = &max;
1379     thread_data[i].global_mutex = &global_mutex;
1380 
1381     /* create threads and call pthread companion function */
1382     pthread_create(&threads[i], &attr, N_VMaxNorm_PT, (void *) &thread_data[i]);
1383   }
1384 
1385   /* wait for all threads to finish */
1386   for (i=0; i<nthreads; i++) {
1387     pthread_join(threads[i], NULL);
1388   }
1389 
1390   /* clean up and return */
1391   pthread_attr_destroy(&attr);
1392   pthread_mutex_destroy(&global_mutex);
1393   free(threads);
1394   free(thread_data);
1395 
1396   return(max);
1397 }
1398 
1399 
1400 /* ----------------------------------------------------------------------------
1401  * Pthread companion function to N_VMaxNorm
1402  */
1403 
N_VMaxNorm_PT(void * thread_data)1404 static void *N_VMaxNorm_PT(void *thread_data)
1405 {
1406   sunindextype i, start, end;
1407   realtype *xd;
1408   realtype local_max, *global_max;
1409   Pthreads_Data *my_data;
1410   pthread_mutex_t *global_mutex;
1411 
1412   /* extract thread data */
1413   my_data = (Pthreads_Data *) thread_data;
1414 
1415   xd = my_data->v1;
1416 
1417   global_max   = my_data->global_val;
1418   global_mutex = my_data->global_mutex;
1419 
1420   start = my_data->start;
1421   end   = my_data->end;
1422 
1423   /* find local max */
1424   local_max = ZERO;
1425   for (i = start; i < end; i++)
1426     if (SUNRabs(xd[i]) > local_max) local_max = SUNRabs(xd[i]);
1427 
1428   /* update global max */
1429   pthread_mutex_lock(global_mutex);
1430   if (local_max > *global_max) {
1431     *global_max = local_max;
1432   }
1433   pthread_mutex_unlock(global_mutex);
1434 
1435   /* exit */
1436   pthread_exit(NULL);
1437 }
1438 
1439 
1440 /* ----------------------------------------------------------------------------
1441  * Computes weighted root mean square norm of a vector
1442  */
1443 
N_VWrmsNorm_Pthreads(N_Vector x,N_Vector w)1444 realtype N_VWrmsNorm_Pthreads(N_Vector x, N_Vector w)
1445 {
1446   return(SUNRsqrt(N_VWSqrSumLocal_Pthreads(x, w)/(NV_LENGTH_PT(x))));
1447 }
1448 
1449 
1450 /* ----------------------------------------------------------------------------
1451  * Computes weighted square sum of a vector
1452  */
1453 
N_VWSqrSumLocal_Pthreads(N_Vector x,N_Vector w)1454 realtype N_VWSqrSumLocal_Pthreads(N_Vector x, N_Vector w)
1455 {
1456   sunindextype    N;
1457   int             i, nthreads;
1458   pthread_t       *threads;
1459   Pthreads_Data   *thread_data;
1460   pthread_attr_t  attr;
1461   pthread_mutex_t global_mutex;
1462   realtype        sum = ZERO;
1463 
1464   /* allocate threads and thread data structs */
1465   N           = NV_LENGTH_PT(x);
1466   nthreads    = NV_NUM_THREADS_PT(x);
1467   threads     = malloc(nthreads*sizeof(pthread_t));
1468   thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
1469 
1470   /* set thread attributes */
1471   pthread_attr_init(&attr);
1472   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
1473 
1474   /* lock for reduction */
1475   pthread_mutex_init(&global_mutex, NULL);
1476 
1477   for (i=0; i<nthreads; i++) {
1478     /* initialize thread data */
1479     N_VInitThreadData(&thread_data[i]);
1480 
1481     /* compute start and end loop index for thread */
1482     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
1483 
1484     /* pack thread data */
1485     thread_data[i].v1 = NV_DATA_PT(x);
1486     thread_data[i].v2 = NV_DATA_PT(w);
1487     thread_data[i].global_val   = &sum;
1488     thread_data[i].global_mutex = &global_mutex;
1489 
1490     /* create threads and call pthread companion function */
1491     pthread_create(&threads[i], &attr, N_VWSqrSum_PT, (void *) &thread_data[i]);
1492   }
1493 
1494   /* wait for all threads to finish */
1495   for (i=0; i<nthreads; i++) {
1496     pthread_join(threads[i], NULL);
1497   }
1498 
1499   /* clean up and return */
1500   pthread_attr_destroy(&attr);
1501   pthread_mutex_destroy(&global_mutex);
1502   free(threads);
1503   free(thread_data);
1504 
1505   return(sum);
1506 }
1507 
1508 
1509 /* ----------------------------------------------------------------------------
1510  * Pthread companion function to N_VWrmsNorm
1511  */
1512 
N_VWSqrSum_PT(void * thread_data)1513 static void *N_VWSqrSum_PT(void *thread_data)
1514 {
1515   sunindextype i, start, end;
1516   realtype *xd, *wd;
1517   realtype local_sum, *global_sum;
1518   Pthreads_Data *my_data;
1519   pthread_mutex_t *global_mutex;
1520 
1521   /* extract thread data */
1522   my_data = (Pthreads_Data *) thread_data;
1523 
1524   xd = my_data->v1;
1525   wd = my_data->v2;
1526 
1527   global_sum   = my_data->global_val;
1528   global_mutex = my_data->global_mutex;
1529 
1530   start = my_data->start;
1531   end   = my_data->end;
1532 
1533   /* compute wrms norm */
1534   local_sum = ZERO;
1535   for (i = start; i < end; i++)
1536     local_sum += SUNSQR(xd[i] * wd[i]);
1537 
1538   /* update global sum */
1539   pthread_mutex_lock(global_mutex);
1540   *global_sum += local_sum;
1541   pthread_mutex_unlock(global_mutex);
1542 
1543   /* exit */
1544   pthread_exit(NULL);
1545 }
1546 
1547 
1548 /* ----------------------------------------------------------------------------
1549  * Computes weighted root mean square norm of a masked vector
1550  */
1551 
N_VWrmsNormMask_Pthreads(N_Vector x,N_Vector w,N_Vector id)1552 realtype N_VWrmsNormMask_Pthreads(N_Vector x, N_Vector w, N_Vector id)
1553 {
1554   return(SUNRsqrt(N_VWSqrSumMaskLocal_Pthreads(x, w, id)/(NV_LENGTH_PT(x))));
1555 }
1556 
1557 
1558 /* ----------------------------------------------------------------------------
1559  * Computes weighted square sum of a masked vector
1560  */
1561 
N_VWSqrSumMaskLocal_Pthreads(N_Vector x,N_Vector w,N_Vector id)1562 realtype N_VWSqrSumMaskLocal_Pthreads(N_Vector x, N_Vector w, N_Vector id)
1563 {
1564   sunindextype    N;
1565   int             i, nthreads;
1566   pthread_t       *threads;
1567   Pthreads_Data   *thread_data;
1568   pthread_attr_t  attr;
1569   pthread_mutex_t global_mutex;
1570   realtype        sum = ZERO;
1571 
1572   /* allocate threads and thread data structs */
1573   N            = NV_LENGTH_PT(x);
1574   nthreads     = NV_NUM_THREADS_PT(x);
1575   threads      = malloc(nthreads*sizeof(pthread_t));
1576   thread_data  = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
1577 
1578   /* set thread attributes */
1579   pthread_attr_init(&attr);
1580   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
1581 
1582   /* lock for reduction */
1583   pthread_mutex_init(&global_mutex, NULL);
1584 
1585   for (i=0; i<nthreads; i++) {
1586     /* initialize thread data */
1587     N_VInitThreadData(&thread_data[i]);
1588 
1589     /* compute start and end loop index for thread */
1590     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
1591 
1592     /* pack thread data */
1593     thread_data[i].v1 = NV_DATA_PT(x);
1594     thread_data[i].v2 = NV_DATA_PT(w);
1595     thread_data[i].v3 = NV_DATA_PT(id);
1596     thread_data[i].global_val   = &sum;
1597     thread_data[i].global_mutex = &global_mutex;
1598 
1599     /* create threads and call pthread companion function */
1600     pthread_create(&threads[i], &attr, N_VWSqrSumMask_PT, (void *) &thread_data[i]);
1601   }
1602 
1603   /* wait for all threads to finish */
1604   for (i=0; i<nthreads; i++) {
1605     pthread_join(threads[i], NULL);
1606   }
1607 
1608   /* clean up and return */
1609   pthread_attr_destroy(&attr);
1610   pthread_mutex_destroy(&global_mutex);
1611   free(threads);
1612   free(thread_data);
1613 
1614   return(sum);
1615 }
1616 
1617 
1618 /* ----------------------------------------------------------------------------
1619  * Pthread companion function to N_VWSqrSumMask
1620  */
1621 
N_VWSqrSumMask_PT(void * thread_data)1622 static void *N_VWSqrSumMask_PT(void *thread_data)
1623 {
1624   sunindextype i, start, end;
1625   realtype *xd, *wd, *idd;
1626   realtype local_sum, *global_sum;
1627   Pthreads_Data *my_data;
1628   pthread_mutex_t *global_mutex;
1629 
1630   /* extract thread data */
1631   my_data = (Pthreads_Data *) thread_data;
1632 
1633   xd  = my_data->v1;
1634   wd  = my_data->v2;
1635   idd = my_data->v3;
1636 
1637   global_sum   = my_data->global_val;
1638   global_mutex = my_data->global_mutex;
1639 
1640   start = my_data->start;
1641   end   = my_data->end;
1642 
1643   /* compute wrms norm with mask */
1644   local_sum = ZERO;
1645   for (i = start; i < end; i++) {
1646     if (idd[i] > ZERO)
1647       local_sum += SUNSQR(xd[i]*wd[i]);
1648   }
1649 
1650   /* update global sum */
1651   pthread_mutex_lock(global_mutex);
1652   *global_sum += local_sum;
1653   pthread_mutex_unlock(global_mutex);
1654 
1655   /* exit */
1656   pthread_exit(NULL);
1657 }
1658 
1659 
1660 /* ----------------------------------------------------------------------------
1661  * Finds the minimun component of a vector
1662  */
1663 
N_VMin_Pthreads(N_Vector x)1664 realtype N_VMin_Pthreads(N_Vector x)
1665 {
1666   sunindextype    N;
1667   int             i, nthreads;
1668   pthread_t       *threads;
1669   Pthreads_Data   *thread_data;
1670   pthread_attr_t  attr;
1671   pthread_mutex_t global_mutex;
1672   realtype        min;
1673 
1674   /* initialize global min */
1675   min = NV_Ith_PT(x,0);
1676 
1677   /* allocate threads and thread data structs */
1678   N           = NV_LENGTH_PT(x);
1679   nthreads    = NV_NUM_THREADS_PT(x);
1680   threads     = malloc(nthreads*sizeof(pthread_t));
1681   thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
1682 
1683   /* set thread attributes */
1684   pthread_attr_init(&attr);
1685   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
1686 
1687   /* lock for reduction */
1688   pthread_mutex_init(&global_mutex, NULL);
1689 
1690   for (i=0; i<nthreads; i++) {
1691     /* initialize thread data */
1692     N_VInitThreadData(&thread_data[i]);
1693 
1694     /* compute start and end loop index for thread */
1695     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
1696 
1697     /* pack thread data */
1698     thread_data[i].v1 = NV_DATA_PT(x);
1699     thread_data[i].global_val   = &min;
1700     thread_data[i].global_mutex = &global_mutex;
1701 
1702     /* create threads and call pthread companion function */
1703     pthread_create(&threads[i], &attr, N_VMin_PT, (void *) &thread_data[i]);
1704   }
1705 
1706   /* wait for all threads to finish */
1707   for (i=0; i<nthreads; i++) {
1708     pthread_join(threads[i], NULL);
1709   }
1710 
1711   /* clean up and return */
1712   pthread_attr_destroy(&attr);
1713   pthread_mutex_destroy(&global_mutex);
1714   free(threads);
1715   free(thread_data);
1716 
1717   return(min);
1718 }
1719 
1720 
1721 /* ----------------------------------------------------------------------------
1722  * Pthread companion function to N_VMin
1723  */
1724 
N_VMin_PT(void * thread_data)1725 static void *N_VMin_PT(void *thread_data)
1726 {
1727   sunindextype i, start, end;
1728   realtype *xd;
1729   realtype local_min, *global_min;
1730   Pthreads_Data *my_data;
1731   pthread_mutex_t *global_mutex;
1732 
1733   /* extract thread data */
1734   my_data = (Pthreads_Data *) thread_data;
1735 
1736   xd = my_data->v1;
1737 
1738   global_min   = my_data->global_val;
1739   global_mutex = my_data->global_mutex;
1740 
1741   start = my_data->start;
1742   end   = my_data->end;
1743 
1744   /* find local min */
1745   local_min = *global_min;
1746   for (i = start; i < end; i++) {
1747     if (xd[i] < local_min)
1748       local_min = xd[i];
1749   }
1750 
1751   /* update global min */
1752   pthread_mutex_lock(global_mutex);
1753   if (local_min < *global_min)
1754     *global_min = local_min;
1755   pthread_mutex_unlock(global_mutex);
1756 
1757   /* exit */
1758   pthread_exit(NULL);
1759 }
1760 
1761 
1762 /* ----------------------------------------------------------------------------
1763  * Computes weighted L2 norm of a vector
1764  */
1765 
N_VWL2Norm_Pthreads(N_Vector x,N_Vector w)1766 realtype N_VWL2Norm_Pthreads(N_Vector x, N_Vector w)
1767 {
1768   sunindextype    N;
1769   int             i, nthreads;
1770   pthread_t       *threads;
1771   Pthreads_Data   *thread_data;
1772   pthread_attr_t  attr;
1773   pthread_mutex_t global_mutex;
1774   realtype        sum = ZERO;
1775 
1776   /* allocate threads and thread data structs */
1777   N           = NV_LENGTH_PT(x);
1778   nthreads    = NV_NUM_THREADS_PT(x);
1779   threads     = malloc(nthreads*sizeof(pthread_t));
1780   thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
1781 
1782   /* set thread attributes */
1783   pthread_attr_init(&attr);
1784   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
1785 
1786   /* lock for reduction */
1787   pthread_mutex_init(&global_mutex, NULL);
1788 
1789   for (i=0; i<nthreads; i++) {
1790     /* initialize thread data */
1791     N_VInitThreadData(&thread_data[i]);
1792 
1793     /* compute start and end loop index for thread */
1794     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
1795 
1796     /* pack thread data */
1797     thread_data[i].v1 = NV_DATA_PT(x);
1798     thread_data[i].v2 = NV_DATA_PT(w);
1799     thread_data[i].global_val   = &sum;
1800     thread_data[i].global_mutex = &global_mutex;
1801 
1802     /* create threads and call pthread companion function */
1803     pthread_create(&threads[i], &attr, N_VWL2Norm_PT, (void *) &thread_data[i]);
1804   }
1805 
1806   /* wait for all threads to finish */
1807   for (i=0; i<nthreads; i++) {
1808     pthread_join(threads[i], NULL);
1809   }
1810 
1811   /* clean up and return */
1812   pthread_attr_destroy(&attr);
1813   pthread_mutex_destroy(&global_mutex);
1814   free(threads);
1815   free(thread_data);
1816 
1817   return(SUNRsqrt(sum));
1818 }
1819 
1820 
1821 /* ----------------------------------------------------------------------------
1822  * Pthread companion function to N_VWL2Norm
1823  */
1824 
N_VWL2Norm_PT(void * thread_data)1825 static void *N_VWL2Norm_PT(void *thread_data)
1826 {
1827   sunindextype i, start, end;
1828   realtype *xd, *wd;
1829   realtype local_sum, *global_sum;
1830   Pthreads_Data *my_data;
1831   pthread_mutex_t *global_mutex;
1832 
1833   /* extract thread data */
1834   my_data = (Pthreads_Data *) thread_data;
1835 
1836   xd = my_data->v1;
1837   wd = my_data->v2;
1838 
1839   global_sum   = my_data->global_val;
1840   global_mutex = my_data->global_mutex;
1841 
1842   start = my_data->start;
1843   end   = my_data->end;
1844 
1845   /* compute WL2 norm */
1846   local_sum = ZERO;
1847   for (i = start; i < end; i++)
1848     local_sum += SUNSQR(xd[i]*wd[i]);
1849 
1850   /* update global sum */
1851   pthread_mutex_lock(global_mutex);
1852   *global_sum += local_sum;
1853   pthread_mutex_unlock(global_mutex);
1854 
1855   /* exit */
1856   pthread_exit(NULL);
1857 }
1858 
1859 
1860 /* ----------------------------------------------------------------------------
1861  * Computes L1 norm of a vector
1862  */
1863 
N_VL1Norm_Pthreads(N_Vector x)1864 realtype N_VL1Norm_Pthreads(N_Vector x)
1865 {
1866   sunindextype    N;
1867   int             i, nthreads;
1868   pthread_t       *threads;
1869   Pthreads_Data   *thread_data;
1870   pthread_attr_t  attr;
1871   pthread_mutex_t global_mutex;
1872   realtype        sum = ZERO;
1873 
1874   /* allocate threads and thread data structs */
1875   N            = NV_LENGTH_PT(x);
1876   nthreads     = NV_NUM_THREADS_PT(x);
1877   threads      = malloc(nthreads*sizeof(pthread_t));
1878   thread_data  = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
1879 
1880   /* set thread attributes */
1881   pthread_attr_init(&attr);
1882   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
1883 
1884   /* lock for reduction */
1885   pthread_mutex_init(&global_mutex, NULL);
1886 
1887   for (i=0; i<nthreads; i++) {
1888     /* initialize thread data */
1889     N_VInitThreadData(&thread_data[i]);
1890 
1891     /* compute start and end loop index for thread */
1892     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
1893 
1894     /* pack thread data */
1895     thread_data[i].v1 = NV_DATA_PT(x);
1896     thread_data[i].global_val   = &sum;
1897     thread_data[i].global_mutex = &global_mutex;
1898 
1899     /* create threads and call pthread companion function */
1900     pthread_create(&threads[i], &attr, N_VL1Norm_PT, (void *) &thread_data[i]);
1901   }
1902 
1903   /* wait for all threads to finish */
1904   for (i=0; i<nthreads; i++) {
1905     pthread_join(threads[i], NULL);
1906   }
1907 
1908   /* clean up and return */
1909   pthread_attr_destroy(&attr);
1910   pthread_mutex_destroy(&global_mutex);
1911   free(threads);
1912   free(thread_data);
1913 
1914   return(sum);
1915 }
1916 
1917 
1918 /* ----------------------------------------------------------------------------
1919  * Pthread companion function to N_VL1Norm
1920  */
1921 
N_VL1Norm_PT(void * thread_data)1922 static void *N_VL1Norm_PT(void *thread_data)
1923 {
1924   sunindextype i, start, end;
1925   realtype *xd;
1926   realtype local_sum, *global_sum;
1927   Pthreads_Data *my_data;
1928   pthread_mutex_t *global_mutex;
1929 
1930   /* extract thread data */
1931   my_data = (Pthreads_Data *) thread_data;
1932 
1933   xd = my_data->v1;
1934 
1935   global_sum   = my_data->global_val;
1936   global_mutex = my_data->global_mutex;
1937 
1938   start = my_data->start;
1939   end   = my_data->end;
1940 
1941   /* compute L1 norm */
1942   local_sum = ZERO;
1943   for (i = start; i < end; i++)
1944     local_sum += SUNRabs(xd[i]);
1945 
1946   /* update global sum */
1947   pthread_mutex_lock(global_mutex);
1948   *global_sum += local_sum;
1949   pthread_mutex_unlock(global_mutex);
1950 
1951   /* exit */
1952   pthread_exit(NULL);
1953 }
1954 
1955 
1956 /* ----------------------------------------------------------------------------
1957  * Compare vector component values to a scaler
1958  */
1959 
N_VCompare_Pthreads(realtype c,N_Vector x,N_Vector z)1960 void N_VCompare_Pthreads(realtype c, N_Vector x, N_Vector z)
1961 {
1962   sunindextype   N;
1963   int            i, nthreads;
1964   pthread_t      *threads;
1965   Pthreads_Data  *thread_data;
1966   pthread_attr_t attr;
1967 
1968   /* allocate threads and thread data structs */
1969   N            = NV_LENGTH_PT(x);
1970   nthreads     = NV_NUM_THREADS_PT(x);
1971   threads      = malloc(nthreads*sizeof(pthread_t));
1972   thread_data  = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
1973 
1974   /* set thread attributes */
1975   pthread_attr_init(&attr);
1976   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
1977 
1978   for (i=0; i<nthreads; i++) {
1979     /* initialize thread data */
1980     N_VInitThreadData(&thread_data[i]);
1981 
1982     /* compute start and end loop index for thread */
1983     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
1984 
1985     /* pack thread data */
1986     thread_data[i].c1  = c;
1987     thread_data[i].v1 = NV_DATA_PT(x);
1988     thread_data[i].v2 = NV_DATA_PT(z);
1989 
1990     /* create threads and call pthread companion function */
1991     pthread_create(&threads[i], &attr, N_VCompare_PT, (void *) &thread_data[i]);
1992   }
1993 
1994   /* wait for all threads to finish */
1995   for (i=0; i<nthreads; i++) {
1996     pthread_join(threads[i], NULL);
1997   }
1998 
1999   /* clean up and return */
2000   pthread_attr_destroy(&attr);
2001   free(threads);
2002   free(thread_data);
2003 
2004   return;
2005 }
2006 
2007 /* ----------------------------------------------------------------------------
2008  * Pthread companion function to N_VCompare
2009  */
2010 
N_VCompare_PT(void * thread_data)2011 static void *N_VCompare_PT(void *thread_data)
2012 {
2013   sunindextype i, start, end;
2014   realtype c;
2015   realtype *xd, *zd;
2016   Pthreads_Data *my_data;
2017 
2018   /* extract thread data */
2019   my_data = (Pthreads_Data *) thread_data;
2020 
2021   c  = my_data->c1;
2022   xd = my_data->v1;
2023   zd = my_data->v2;
2024 
2025   start = my_data->start;
2026   end   = my_data->end;
2027 
2028   /* compare component to scaler */
2029   for (i = start; i < end; i++)
2030     zd[i] = (SUNRabs(xd[i]) >= c) ? ONE : ZERO;
2031 
2032   /* exit */
2033   pthread_exit(NULL);
2034 }
2035 
2036 
2037 /* ----------------------------------------------------------------------------
2038  * Compute componentwise inverse z[i] = ONE/x[i] and check if x[i] == ZERO
2039  */
2040 
N_VInvTest_Pthreads(N_Vector x,N_Vector z)2041 booleantype N_VInvTest_Pthreads(N_Vector x, N_Vector z)
2042 {
2043   sunindextype   N;
2044   int            i, nthreads;
2045   pthread_t      *threads;
2046   Pthreads_Data  *thread_data;
2047   pthread_attr_t attr;
2048 
2049   realtype val = ZERO;
2050 
2051   /* allocate threads and thread data structs */
2052   N           = NV_LENGTH_PT(x);
2053   nthreads    = NV_NUM_THREADS_PT(x);
2054   threads     = malloc(nthreads*sizeof(pthread_t));
2055   thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
2056 
2057   /* set thread attributes */
2058   pthread_attr_init(&attr);
2059   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
2060 
2061   for (i=0; i<nthreads; i++) {
2062     /* initialize thread data */
2063     N_VInitThreadData(&thread_data[i]);
2064 
2065     /* compute start and end loop index for thread */
2066     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
2067 
2068     /* pack thread data */
2069     thread_data[i].v1 = NV_DATA_PT(x);
2070     thread_data[i].v2 = NV_DATA_PT(z);
2071     thread_data[i].global_val = &val;
2072 
2073     /* create threads and call pthread companion function */
2074     pthread_create(&threads[i], &attr, N_VInvTest_PT, (void *) &thread_data[i]);
2075   }
2076 
2077   /* wait for all threads to finish */
2078   for (i=0; i<nthreads; i++) {
2079     pthread_join(threads[i], NULL);
2080   }
2081 
2082   /* clean up and return */
2083   pthread_attr_destroy(&attr);
2084   free(threads);
2085   free(thread_data);
2086 
2087   if (val > ZERO)
2088     return (SUNFALSE);
2089   else
2090     return (SUNTRUE);
2091 }
2092 
2093 
2094 /* ----------------------------------------------------------------------------
2095  * Pthread companion function to N_VInvTest
2096  */
2097 
N_VInvTest_PT(void * thread_data)2098 static void *N_VInvTest_PT(void *thread_data)
2099 {
2100   sunindextype i, start, end;
2101   realtype *xd, *zd;
2102   realtype local_val, *global_val;
2103   Pthreads_Data *my_data;
2104 
2105   /* extract thread data */
2106   my_data = (Pthreads_Data *) thread_data;
2107 
2108   xd = my_data->v1;
2109   zd = my_data->v2;
2110 
2111   global_val = my_data->global_val;
2112 
2113   start = my_data->start;
2114   end   = my_data->end;
2115 
2116   /* compute inverse with check for divide by ZERO */
2117   local_val = ZERO;
2118   for (i = start; i < end; i++) {
2119     if (xd[i] == ZERO)
2120       local_val = ONE;
2121     else
2122       zd[i] = ONE/xd[i];
2123   }
2124 
2125   /* update global val */
2126   if (local_val > ZERO) {
2127     *global_val = local_val;
2128   }
2129 
2130   /* exit */
2131   pthread_exit(NULL);
2132 }
2133 
2134 
2135 /* ----------------------------------------------------------------------------
2136  * Compute constraint mask of a vector
2137  */
2138 
N_VConstrMask_Pthreads(N_Vector c,N_Vector x,N_Vector m)2139 booleantype N_VConstrMask_Pthreads(N_Vector c, N_Vector x, N_Vector m)
2140 {
2141   sunindextype   N;
2142   int            i, nthreads;
2143   pthread_t      *threads;
2144   Pthreads_Data  *thread_data;
2145   pthread_attr_t attr;
2146 
2147   realtype val = ZERO;
2148 
2149   /* allocate threads and thread data structs */
2150   N           = NV_LENGTH_PT(x);
2151   nthreads    = NV_NUM_THREADS_PT(x);
2152   threads     = malloc(nthreads*sizeof(pthread_t));
2153   thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
2154 
2155   /* set thread attributes */
2156   pthread_attr_init(&attr);
2157   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
2158 
2159   for (i=0; i<nthreads; i++) {
2160     /* initialize thread data */
2161     N_VInitThreadData(&thread_data[i]);
2162 
2163     /* compute start and end loop index for thread */
2164     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
2165 
2166     /* pack thread data */
2167     thread_data[i].v1 = NV_DATA_PT(c);
2168     thread_data[i].v2 = NV_DATA_PT(x);
2169     thread_data[i].v3 = NV_DATA_PT(m);
2170     thread_data[i].global_val = &val;
2171 
2172     /* create threads and call pthread companion function */
2173     pthread_create(&threads[i], &attr, N_VConstrMask_PT, (void *) &thread_data[i]);
2174   }
2175 
2176   /* wait for all threads to finish */
2177   for (i=0; i<nthreads; i++) {
2178     pthread_join(threads[i], NULL);
2179   }
2180 
2181   /* clean up and return */
2182   pthread_attr_destroy(&attr);
2183   free(threads);
2184   free(thread_data);
2185 
2186   if (val > ZERO)
2187     return(SUNFALSE);
2188   else
2189     return(SUNTRUE);
2190 }
2191 
2192 
2193 /* ----------------------------------------------------------------------------
2194  * Pthread companion function to N_VConstrMask
2195  */
2196 
N_VConstrMask_PT(void * thread_data)2197 static void *N_VConstrMask_PT(void *thread_data)
2198 {
2199   sunindextype i, start, end;
2200   realtype *cd, *xd, *md;
2201   realtype local_val, *global_val;
2202   Pthreads_Data *my_data;
2203 
2204   /* extract thread data */
2205   my_data = (Pthreads_Data *) thread_data;
2206 
2207   cd = my_data->v1;
2208   xd = my_data->v2;
2209   md = my_data->v3;
2210 
2211   global_val = my_data->global_val;
2212 
2213   start = my_data->start;
2214   end   = my_data->end;
2215 
2216   /* compute constraint mask */
2217   local_val = ZERO;
2218   for (i = start; i < end; i++) {
2219     md[i] = ZERO;
2220 
2221     /* Continue if no constraints were set for the variable */
2222     if (cd[i] == ZERO)
2223       continue;
2224 
2225     /* Check if a set constraint has been violated */
2226     if ((SUNRabs(cd[i]) > ONEPT5 && xd[i]*cd[i] <= ZERO) ||
2227         (SUNRabs(cd[i]) > HALF   && xd[i]*cd[i] <  ZERO)) {
2228       local_val = md[i] = ONE;
2229     }
2230   }
2231 
2232   /* update global val */
2233   if (local_val > ZERO) {
2234     *global_val = local_val;
2235   }
2236 
2237   /* exit */
2238   pthread_exit(NULL);
2239 }
2240 
2241 
2242 /* ----------------------------------------------------------------------------
2243  * Compute minimum componentwise quotient
2244  */
2245 
N_VMinQuotient_Pthreads(N_Vector num,N_Vector denom)2246 realtype N_VMinQuotient_Pthreads(N_Vector num, N_Vector denom)
2247 {
2248   sunindextype    N;
2249   int             i, nthreads;
2250   pthread_t       *threads;
2251   Pthreads_Data   *thread_data;
2252   pthread_attr_t  attr;
2253   pthread_mutex_t global_mutex;
2254   realtype        min = BIG_REAL;
2255 
2256   /* allocate threads and thread data structs */
2257   N           = NV_LENGTH_PT(num);
2258   nthreads    = NV_NUM_THREADS_PT(num);
2259   threads     = malloc(nthreads*sizeof(pthread_t));
2260   thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
2261 
2262   /* set thread attributes */
2263   pthread_attr_init(&attr);
2264   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
2265 
2266   /* lock for reduction */
2267   pthread_mutex_init(&global_mutex, NULL);
2268 
2269   for (i=0; i<nthreads; i++) {
2270     /* initialize thread data */
2271     N_VInitThreadData(&thread_data[i]);
2272 
2273     /* compute start and end loop index for thread */
2274     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
2275 
2276     /* pack thread data */
2277     thread_data[i].v1 = NV_DATA_PT(num);
2278     thread_data[i].v2 = NV_DATA_PT(denom);
2279     thread_data[i].global_val   = &min;
2280     thread_data[i].global_mutex = &global_mutex;
2281 
2282     /* create threads and call pthread companion function */
2283     pthread_create(&threads[i], &attr, N_VMinQuotient_PT, (void *) &thread_data[i]);
2284   }
2285 
2286   /* wait for all threads to finish */
2287   for (i=0; i<nthreads; i++) {
2288     pthread_join(threads[i], NULL);
2289   }
2290 
2291   /* clean up and return */
2292   pthread_attr_destroy(&attr);
2293   pthread_mutex_destroy(&global_mutex);
2294   free(threads);
2295   free(thread_data);
2296 
2297   return(min);
2298 }
2299 
2300 
2301 /* ----------------------------------------------------------------------------
2302  * Pthread companion function to N_VConstrMask
2303  */
2304 
N_VMinQuotient_PT(void * thread_data)2305 static void *N_VMinQuotient_PT(void *thread_data)
2306 {
2307   sunindextype i, start, end;
2308   realtype *nd, *dd;
2309   realtype local_min, *global_min;
2310   Pthreads_Data *my_data;
2311   pthread_mutex_t *global_mutex;
2312 
2313   /* extract thread data */
2314   my_data = (Pthreads_Data *) thread_data;
2315 
2316   nd = my_data->v1;
2317   dd = my_data->v2;
2318 
2319   global_min   = my_data->global_val;
2320   global_mutex = my_data->global_mutex;
2321 
2322   start = my_data->start;
2323   end   = my_data->end;
2324 
2325   /* compute minimum quotient */
2326   local_min = BIG_REAL;
2327   for (i = start; i < end; i++) {
2328     if (dd[i] == ZERO)
2329       continue;
2330     local_min = SUNMIN(local_min, nd[i]/dd[i]);
2331   }
2332 
2333   /* update global min */
2334   pthread_mutex_lock(global_mutex);
2335   if (local_min < *global_min)
2336     *global_min = local_min;
2337   pthread_mutex_unlock(global_mutex);
2338 
2339   /* exit */
2340   pthread_exit(NULL);
2341 }
2342 
2343 
2344 /*
2345  * -----------------------------------------------------------------------------
2346  * fused vector operations
2347  * -----------------------------------------------------------------------------
2348  */
2349 
2350 
2351 /* -----------------------------------------------------------------------------
2352  * Compute the linear combination z = c[i]*X[i]
2353  */
2354 
N_VLinearCombination_Pthreads(int nvec,realtype * c,N_Vector * X,N_Vector z)2355 int N_VLinearCombination_Pthreads(int nvec, realtype* c, N_Vector* X, N_Vector z)
2356 {
2357   sunindextype   N;
2358   int            i, nthreads;
2359   pthread_t      *threads;
2360   Pthreads_Data  *thread_data;
2361   pthread_attr_t attr;
2362 
2363   /* invalid number of vectors */
2364   if (nvec < 1) return(-1);
2365 
2366   /* should have called N_VScale */
2367   if (nvec == 1) {
2368     N_VScale_Pthreads(c[0], X[0], z);
2369     return(0);
2370   }
2371 
2372   /* should have called N_VLinearSum */
2373   if (nvec == 2) {
2374     N_VLinearSum_Pthreads(c[0], X[0], c[1], X[1], z);
2375     return(0);
2376   }
2377 
2378   /* get vector length and data array */
2379   N           = NV_LENGTH_PT(z);
2380   nthreads    = NV_NUM_THREADS_PT(z);
2381   threads     = malloc(nthreads*sizeof(pthread_t));
2382   thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
2383 
2384   /* set thread attributes */
2385   pthread_attr_init(&attr);
2386   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
2387 
2388   for (i=0; i<nthreads; i++) {
2389     /* initialize thread data */
2390     N_VInitThreadData(&thread_data[i]);
2391 
2392     /* compute start and end loop index for thread */
2393     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
2394 
2395     /* pack thread data */
2396     thread_data[i].nvec  = nvec;
2397     thread_data[i].cvals = c;
2398     thread_data[i].Y1    = X;
2399     thread_data[i].x1    = z;
2400 
2401     /* create threads and call pthread companion function */
2402     pthread_create(&threads[i], &attr, N_VLinearCombination_PT, (void *) &thread_data[i]);
2403   }
2404 
2405   /* wait for all threads to finish */
2406   for (i=0; i<nthreads; i++) {
2407     pthread_join(threads[i], NULL);
2408   }
2409 
2410   /* clean up and return */
2411   pthread_attr_destroy(&attr);
2412   free(threads);
2413   free(thread_data);
2414 
2415   return(0);
2416 }
2417 
2418 
2419 /* -----------------------------------------------------------------------------
2420  * Pthread companion function to N_VLinearCombination
2421  */
2422 
N_VLinearCombination_PT(void * thread_data)2423 static void *N_VLinearCombination_PT(void *thread_data)
2424 {
2425   Pthreads_Data *my_data;
2426   sunindextype  j, start, end;
2427 
2428   int       i;
2429   realtype* c=NULL;
2430   realtype* xd=NULL;
2431   realtype* zd=NULL;
2432 
2433   /* extract thread data */
2434   my_data = (Pthreads_Data *) thread_data;
2435 
2436   start = my_data->start;
2437   end   = my_data->end;
2438 
2439   c  = my_data->cvals;
2440   zd = NV_DATA_PT(my_data->x1);
2441 
2442   /*
2443    * X[0] += c[i]*X[i], i = 1,...,nvec-1
2444    */
2445   if ((my_data->Y1[0] == my_data->x1) && (c[0] == ONE)) {
2446     for (i=1; i<my_data->nvec; i++) {
2447       xd = NV_DATA_PT(my_data->Y1[i]);
2448       for (j=start; j<end; j++) {
2449         zd[j] += c[i] * xd[j];
2450       }
2451     }
2452     pthread_exit(NULL);
2453   }
2454 
2455   /*
2456    * X[0] = c[0] * X[0] + sum{ c[i] * X[i] }, i = 1,...,nvec-1
2457    */
2458   if (my_data->Y1[0] == my_data->x1) {
2459     for (j=start; j<end; j++) {
2460       zd[j] *= c[0];
2461     }
2462     for (i=1; i<my_data->nvec; i++) {
2463       xd = NV_DATA_PT(my_data->Y1[i]);
2464       for (j=start; j<end; j++) {
2465         zd[j] += c[i] * xd[j];
2466       }
2467     }
2468     pthread_exit(NULL);
2469   }
2470 
2471   /*
2472    * z = sum{ c[i] * X[i] }, i = 0,...,nvec-1
2473    */
2474   xd = NV_DATA_PT(my_data->Y1[0]);
2475   for (j=start; j<end; j++) {
2476     zd[j] = c[0] * xd[j];
2477   }
2478   for (i=1; i<my_data->nvec; i++) {
2479     xd = NV_DATA_PT(my_data->Y1[i]);
2480     for (j=start; j<end; j++) {
2481       zd[j] += c[i] * xd[j];
2482     }
2483   }
2484   pthread_exit(NULL);
2485 }
2486 
2487 
2488 /* -----------------------------------------------------------------------------
2489  * Compute multiple linear sums Z[i] = Y[i] + a*x
2490  */
2491 
N_VScaleAddMulti_Pthreads(int nvec,realtype * a,N_Vector x,N_Vector * Y,N_Vector * Z)2492 int N_VScaleAddMulti_Pthreads(int nvec, realtype* a, N_Vector x, N_Vector* Y, N_Vector* Z)
2493 {
2494   sunindextype   N;
2495   int            i, nthreads;
2496   pthread_t      *threads;
2497   Pthreads_Data  *thread_data;
2498   pthread_attr_t attr;
2499 
2500   /* invalid number of vectors */
2501   if (nvec < 1) return(-1);
2502 
2503   /* should have called N_VLinearSum */
2504   if (nvec == 1) {
2505     N_VLinearSum_Pthreads(a[0], x, ONE, Y[0], Z[0]);
2506     return(0);
2507   }
2508 
2509   /* get vector length and data array */
2510   N           = NV_LENGTH_PT(x);
2511   nthreads    = NV_NUM_THREADS_PT(x);
2512   threads     = malloc(nthreads*sizeof(pthread_t));
2513   thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
2514 
2515   /* set thread attributes */
2516   pthread_attr_init(&attr);
2517   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
2518 
2519   for (i=0; i<nthreads; i++) {
2520     /* initialize thread data */
2521     N_VInitThreadData(&thread_data[i]);
2522 
2523     /* compute start and end loop index for thread */
2524     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
2525 
2526     /* pack thread data */
2527     thread_data[i].nvec  = nvec;
2528     thread_data[i].cvals = a;
2529     thread_data[i].x1    = x;
2530     thread_data[i].Y1    = Y;
2531     thread_data[i].Y2    = Z;
2532 
2533     /* create threads and call pthread companion function */
2534     pthread_create(&threads[i], &attr, N_VScaleAddMulti_PT, (void *) &thread_data[i]);
2535   }
2536 
2537   /* wait for all threads to finish */
2538   for (i=0; i<nthreads; i++) {
2539     pthread_join(threads[i], NULL);
2540   }
2541 
2542   /* clean up and return */
2543   pthread_attr_destroy(&attr);
2544   free(threads);
2545   free(thread_data);
2546 
2547   return(0);
2548 }
2549 
2550 
2551 /* -----------------------------------------------------------------------------
2552  * Pthread companion function to N_VScaleAddMulti
2553  */
2554 
N_VScaleAddMulti_PT(void * thread_data)2555 static void *N_VScaleAddMulti_PT(void *thread_data)
2556 {
2557   Pthreads_Data *my_data;
2558   sunindextype  j, start, end;
2559 
2560   int       i;
2561   realtype* a=NULL;
2562   realtype* xd=NULL;
2563   realtype* yd=NULL;
2564   realtype* zd=NULL;
2565 
2566   /* extract thread data */
2567   my_data = (Pthreads_Data *) thread_data;
2568 
2569   start = my_data->start;
2570   end   = my_data->end;
2571 
2572   a  = my_data->cvals;
2573   xd = NV_DATA_PT(my_data->x1);
2574 
2575   /*
2576    * Y[i][j] += a[i] * x[j]
2577    */
2578   if (my_data->Y1 == my_data->Y2) {
2579     for (i=0; i<my_data->nvec; i++) {
2580       yd = NV_DATA_PT(my_data->Y1[i]);
2581       for (j=start; j<end; j++) {
2582         yd[j] += a[i] * xd[j];
2583       }
2584     }
2585     pthread_exit(NULL);
2586   }
2587 
2588   /*
2589    * Z[i][j] = Y[i][j] + a[i] * x[j]
2590    */
2591   for (i=0; i<my_data->nvec; i++) {
2592     yd = NV_DATA_PT(my_data->Y1[i]);
2593     zd = NV_DATA_PT(my_data->Y2[i]);
2594     for (j=start; j<end; j++) {
2595       zd[j] = a[i] * xd[j] + yd[j];
2596     }
2597   }
2598   pthread_exit(NULL);
2599 }
2600 
2601 
2602 /* -----------------------------------------------------------------------------
2603  * Compute the dot product of a vector with multiple vectors, a[i] = sum(x*Y[i])
2604  */
2605 
N_VDotProdMulti_Pthreads(int nvec,N_Vector x,N_Vector * Y,realtype * dotprods)2606 int N_VDotProdMulti_Pthreads(int nvec, N_Vector x, N_Vector* Y, realtype* dotprods)
2607 {
2608   sunindextype    N;
2609   int             i, nthreads;
2610   pthread_t       *threads;
2611   Pthreads_Data   *thread_data;
2612   pthread_attr_t  attr;
2613   pthread_mutex_t global_mutex;
2614 
2615   /* invalid number of vectors */
2616   if (nvec < 1) return(-1);
2617 
2618   /* should have called N_VDotProd */
2619   if (nvec == 1) {
2620     dotprods[0] = N_VDotProd_Pthreads(x, Y[0]);
2621     return(0);
2622   }
2623 
2624   /* initialize output array */
2625   for (i=0; i<nvec; i++)
2626     dotprods[i] = ZERO;
2627 
2628   /* allocate threads and thread data structs */
2629   N           = NV_LENGTH_PT(x);
2630   nthreads    = NV_NUM_THREADS_PT(x);
2631   threads     = malloc(nthreads*sizeof(pthread_t));
2632   thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
2633 
2634   /* set thread attributes */
2635   pthread_attr_init(&attr);
2636   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
2637 
2638   /* lock for reduction */
2639   pthread_mutex_init(&global_mutex, NULL);
2640 
2641   for (i=0; i<nthreads; i++) {
2642     /* initialize thread data */
2643     N_VInitThreadData(&thread_data[i]);
2644 
2645     /* compute start and end loop index for thread */
2646     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
2647 
2648     /* pack thread data */
2649     thread_data[i].nvec  = nvec;
2650     thread_data[i].x1    = x;
2651     thread_data[i].Y1    = Y;
2652     thread_data[i].cvals = dotprods;
2653 
2654     thread_data[i].global_mutex = &global_mutex;
2655 
2656     /* create threads and call pthread companion function */
2657     pthread_create(&threads[i], &attr, N_VDotProdMulti_PT, (void *) &thread_data[i]);
2658   }
2659 
2660   /* wait for all threads to finish */
2661   for (i=0; i<nthreads; i++) {
2662     pthread_join(threads[i], NULL);
2663   }
2664 
2665   /* clean up and return */
2666   pthread_attr_destroy(&attr);
2667   pthread_mutex_destroy(&global_mutex);
2668   free(threads);
2669   free(thread_data);
2670 
2671   return(0);
2672 }
2673 
2674 
2675 /* -----------------------------------------------------------------------------
2676  * Pthread companion function to N_VDotProdMulti
2677  */
2678 
N_VDotProdMulti_PT(void * thread_data)2679 static void *N_VDotProdMulti_PT(void *thread_data)
2680 {
2681   Pthreads_Data   *my_data;
2682   sunindextype    j, start, end;
2683   pthread_mutex_t *lock;
2684 
2685   int       i;
2686   realtype  sum;
2687   realtype* dotprods=NULL;
2688   realtype* xd=NULL;
2689   realtype* yd=NULL;
2690 
2691   /* extract thread data */
2692   my_data = (Pthreads_Data *) thread_data;
2693 
2694   start = my_data->start;
2695   end   = my_data->end;
2696   lock  = my_data->global_mutex;
2697 
2698   xd       = NV_DATA_PT(my_data->x1);
2699   dotprods = my_data->cvals;
2700 
2701   /* compute multiple dot products */
2702   for (i=0; i<my_data->nvec; i++) {
2703     yd  = NV_DATA_PT(my_data->Y1[i]);
2704     sum = ZERO;
2705     for (j=start; j<end; j++) {
2706       sum += xd[j] * yd[j];
2707     }
2708     /* update global sum */
2709     pthread_mutex_lock(lock);
2710     dotprods[i] += sum;
2711     pthread_mutex_unlock(lock);
2712   }
2713 
2714   /* exit */
2715   pthread_exit(NULL);
2716 }
2717 
2718 
2719 /*
2720  * -----------------------------------------------------------------------------
2721  * vector array operations
2722  * -----------------------------------------------------------------------------
2723  */
2724 
2725 
2726 /* -----------------------------------------------------------------------------
2727  * Compute multiple linear sums Z[i] = a*X[i] + b*Y[i]
2728  */
2729 
N_VLinearSumVectorArray_Pthreads(int nvec,realtype a,N_Vector * X,realtype b,N_Vector * Y,N_Vector * Z)2730 int N_VLinearSumVectorArray_Pthreads(int nvec, realtype a, N_Vector* X,
2731                                      realtype b, N_Vector* Y,  N_Vector* Z)
2732 {
2733   sunindextype   N;
2734   int            i, nthreads;
2735   pthread_t      *threads;
2736   Pthreads_Data  *thread_data;
2737   pthread_attr_t attr;
2738 
2739   realtype    c;
2740   N_Vector*  V1;
2741   N_Vector*  V2;
2742   booleantype test;
2743 
2744   /* invalid number of vectors */
2745   if (nvec < 1) return(-1);
2746 
2747   /* should have called N_VLinearSum */
2748   if (nvec == 1) {
2749     N_VLinearSum_Pthreads(a, X[0], b, Y[0], Z[0]);
2750     return(0);
2751   }
2752 
2753   /* BLAS usage: axpy y <- ax+y */
2754   if ((b == ONE) && (Z == Y))
2755     return(VaxpyVectorArray_Pthreads(nvec, a, X, Y));
2756 
2757   /* BLAS usage: axpy x <- by+x */
2758   if ((a == ONE) && (Z == X))
2759     return(VaxpyVectorArray_Pthreads(nvec, b, Y, X));
2760 
2761   /* Case: a == b == 1.0 */
2762   if ((a == ONE) && (b == ONE))
2763     return(VSumVectorArray_Pthreads(nvec, X, Y, Z));
2764 
2765   /* Cases:                    */
2766   /*   (1) a == 1.0, b = -1.0, */
2767   /*   (2) a == -1.0, b == 1.0 */
2768   if ((test = ((a == ONE) && (b == -ONE))) || ((a == -ONE) && (b == ONE))) {
2769     V1 = test ? Y : X;
2770     V2 = test ? X : Y;
2771     return(VDiffVectorArray_Pthreads(nvec, V2, V1, Z));
2772   }
2773 
2774   /* Cases:                                                  */
2775   /*   (1) a == 1.0, b == other or 0.0,                      */
2776   /*   (2) a == other or 0.0, b == 1.0                       */
2777   /* if a or b is 0.0, then user should have called N_VScale */
2778   if ((test = (a == ONE)) || (b == ONE)) {
2779     c  = test ? b : a;
2780     V1 = test ? Y : X;
2781     V2 = test ? X : Y;
2782     return(VLin1VectorArray_Pthreads(nvec, c, V1, V2, Z));
2783   }
2784 
2785   /* Cases:                     */
2786   /*   (1) a == -1.0, b != 1.0, */
2787   /*   (2) a != 1.0, b == -1.0  */
2788   if ((test = (a == -ONE)) || (b == -ONE)) {
2789     c = test ? b : a;
2790     V1 = test ? Y : X;
2791     V2 = test ? X : Y;
2792     return(VLin2VectorArray_Pthreads(nvec, c, V1, V2, Z));
2793   }
2794 
2795   /* Case: a == b                                                         */
2796   /* catches case both a and b are 0.0 - user should have called N_VConst */
2797   if (a == b)
2798     return(VScaleSumVectorArray_Pthreads(nvec, a, X, Y, Z));
2799 
2800   /* Case: a == -b */
2801   if (a == -b)
2802     return(VScaleDiffVectorArray_Pthreads(nvec, a, X, Y, Z));
2803 
2804   /* Do all cases not handled above:                               */
2805   /*   (1) a == other, b == 0.0 - user should have called N_VScale */
2806   /*   (2) a == 0.0, b == other - user should have called N_VScale */
2807   /*   (3) a,b == other, a !=b, a != -b                            */
2808 
2809   /* get vector length and data array */
2810   N           = NV_LENGTH_PT(Z[0]);
2811   nthreads    = NV_NUM_THREADS_PT(Z[0]);
2812   threads     = malloc(nthreads*sizeof(pthread_t));
2813   thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
2814 
2815   /* set thread attributes */
2816   pthread_attr_init(&attr);
2817   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
2818 
2819   for (i=0; i<nthreads; i++) {
2820     /* initialize thread data */
2821     N_VInitThreadData(&thread_data[i]);
2822 
2823     /* compute start and end loop index for thread */
2824     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
2825 
2826     /* pack thread data */
2827     thread_data[i].nvec = nvec;
2828     thread_data[i].c1   = a;
2829     thread_data[i].c2   = b;
2830     thread_data[i].Y1   = X;
2831     thread_data[i].Y2   = Y;
2832     thread_data[i].Y3   = Z;
2833 
2834     /* create threads and call pthread companion function */
2835     pthread_create(&threads[i], &attr, N_VLinearSumVectorArray_PT,
2836                    (void *) &thread_data[i]);
2837   }
2838 
2839   /* wait for all threads to finish */
2840   for (i=0; i<nthreads; i++) {
2841     pthread_join(threads[i], NULL);
2842   }
2843 
2844   /* clean up and return */
2845   pthread_attr_destroy(&attr);
2846   free(threads);
2847   free(thread_data);
2848 
2849   return(0);
2850 }
2851 
2852 
2853 /* -----------------------------------------------------------------------------
2854  * Pthread companion function to N_VLinearSumVectorArray
2855  */
2856 
N_VLinearSumVectorArray_PT(void * thread_data)2857 static void *N_VLinearSumVectorArray_PT(void *thread_data)
2858 {
2859   Pthreads_Data *my_data;
2860   sunindextype  j, start, end;
2861 
2862   int       i;
2863   realtype  a, b;
2864   realtype* xd=NULL;
2865   realtype* yd=NULL;
2866   realtype* zd=NULL;
2867 
2868   /* extract thread data */
2869   my_data = (Pthreads_Data *) thread_data;
2870 
2871   start = my_data->start;
2872   end   = my_data->end;
2873 
2874   a = my_data->c1;
2875   b = my_data->c2;
2876 
2877   /* compute linear sum for each vector pair in vector arrays */
2878   for (i=0; i<my_data->nvec; i++) {
2879     xd = NV_DATA_PT(my_data->Y1[i]);
2880     yd = NV_DATA_PT(my_data->Y2[i]);
2881     zd = NV_DATA_PT(my_data->Y3[i]);
2882     for (j=start; j<end; j++) {
2883       zd[j] = a * xd[j] + b * yd[j];
2884     }
2885   }
2886 
2887   /* exit */
2888   pthread_exit(NULL);
2889 }
2890 
2891 
2892 /* -----------------------------------------------------------------------------
2893  * Scale multiple vectors Z[i] = c[i]*X[i]
2894  */
2895 
N_VScaleVectorArray_Pthreads(int nvec,realtype * c,N_Vector * X,N_Vector * Z)2896 int N_VScaleVectorArray_Pthreads(int nvec, realtype* c, N_Vector* X, N_Vector* Z)
2897 {
2898   sunindextype   N;
2899   int            i, nthreads;
2900   pthread_t      *threads;
2901   Pthreads_Data  *thread_data;
2902   pthread_attr_t attr;
2903 
2904   /* invalid number of vectors */
2905   if (nvec < 1) return(-1);
2906 
2907   /* should have called N_VScale */
2908   if (nvec == 1) {
2909     N_VScale_Pthreads(c[0], X[0], Z[0]);
2910     return(0);
2911   }
2912 
2913   /* get vector length and data array */
2914   N           = NV_LENGTH_PT(Z[0]);
2915   nthreads    = NV_NUM_THREADS_PT(Z[0]);
2916   threads     = malloc(nthreads*sizeof(pthread_t));
2917   thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
2918 
2919   /* set thread attributes */
2920   pthread_attr_init(&attr);
2921   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
2922 
2923   for (i=0; i<nthreads; i++) {
2924     /* initialize thread data */
2925     N_VInitThreadData(&thread_data[i]);
2926 
2927     /* compute start and end loop index for thread */
2928     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
2929 
2930     /* pack thread data */
2931     thread_data[i].nvec  = nvec;
2932     thread_data[i].cvals = c;
2933     thread_data[i].Y1    = X;
2934     thread_data[i].Y2    = Z;
2935 
2936     /* create threads and call pthread companion function */
2937     pthread_create(&threads[i], &attr, N_VScaleVectorArray_PT,
2938                    (void *) &thread_data[i]);
2939   }
2940 
2941   /* wait for all threads to finish */
2942   for (i=0; i<nthreads; i++) {
2943     pthread_join(threads[i], NULL);
2944   }
2945 
2946   /* clean up and return */
2947   pthread_attr_destroy(&attr);
2948   free(threads);
2949   free(thread_data);
2950 
2951   return(0);
2952 }
2953 
2954 
2955 /* -----------------------------------------------------------------------------
2956  * Pthread companion function to N_VScaleVectorArray
2957  */
2958 
N_VScaleVectorArray_PT(void * thread_data)2959 static void *N_VScaleVectorArray_PT(void *thread_data)
2960 {
2961   Pthreads_Data *my_data;
2962   sunindextype  j, start, end;
2963 
2964   int       i;
2965   realtype* c;
2966   realtype* xd=NULL;
2967   realtype* zd=NULL;
2968 
2969   /* extract thread data */
2970   my_data = (Pthreads_Data *) thread_data;
2971 
2972   start = my_data->start;
2973   end   = my_data->end;
2974 
2975   c = my_data->cvals;
2976 
2977   /*
2978    * X[i] *= c[i]
2979    */
2980   if (my_data->Y1 == my_data->Y2) {
2981     for (i=0; i<my_data->nvec; i++) {
2982       xd = NV_DATA_PT(my_data->Y1[i]);
2983       for (j=start; j<end; j++) {
2984         xd[j] *= c[i];
2985       }
2986     }
2987     pthread_exit(NULL);
2988   }
2989 
2990   /*
2991    * Z[i] = c[i] * X[i]
2992    */
2993   for (i=0; i<my_data->nvec; i++) {
2994     xd = NV_DATA_PT(my_data->Y1[i]);
2995     zd = NV_DATA_PT(my_data->Y2[i]);
2996     for (j=start; j<end; j++) {
2997       zd[j] = c[i] * xd[j];
2998     }
2999   }
3000   pthread_exit(NULL);
3001 }
3002 
3003 
3004 /* -----------------------------------------------------------------------------
3005  * Set multiple vectors to a constant value Z[i] = c
3006  */
3007 
N_VConstVectorArray_Pthreads(int nvec,realtype c,N_Vector * Z)3008 int N_VConstVectorArray_Pthreads(int nvec, realtype c, N_Vector* Z)
3009 {
3010   sunindextype   N;
3011   int            i, nthreads;
3012   pthread_t      *threads;
3013   Pthreads_Data  *thread_data;
3014   pthread_attr_t attr;
3015 
3016   /* invalid number of vectors */
3017   if (nvec < 1) return(-1);
3018 
3019   /* should have called N_VConst */
3020   if (nvec == 1) {
3021     N_VConst_Pthreads(c, Z[0]);
3022     return(0);
3023   }
3024 
3025   /* get vector length and data array */
3026   N           = NV_LENGTH_PT(Z[0]);
3027   nthreads    = NV_NUM_THREADS_PT(Z[0]);
3028   threads     = malloc(nthreads*sizeof(pthread_t));
3029   thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
3030 
3031   /* set thread attributes */
3032   pthread_attr_init(&attr);
3033   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
3034 
3035   for (i=0; i<nthreads; i++) {
3036     /* initialize thread data */
3037     N_VInitThreadData(&thread_data[i]);
3038 
3039     /* compute start and end loop index for thread */
3040     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
3041 
3042     /* pack thread data */
3043     thread_data[i].nvec = nvec;
3044     thread_data[i].c1   = c;
3045     thread_data[i].Y1   = Z;
3046 
3047     /* create threads and call pthread companion function */
3048     pthread_create(&threads[i], &attr, N_VConstVectorArray_PT,
3049                    (void *) &thread_data[i]);
3050   }
3051 
3052   /* wait for all threads to finish */
3053   for (i=0; i<nthreads; i++) {
3054     pthread_join(threads[i], NULL);
3055   }
3056 
3057   /* clean up and return */
3058   pthread_attr_destroy(&attr);
3059   free(threads);
3060   free(thread_data);
3061 
3062   return(0);
3063 }
3064 
3065 
3066 /* -----------------------------------------------------------------------------
3067  * Pthread companion function to N_VConstVectorArray
3068  */
3069 
N_VConstVectorArray_PT(void * thread_data)3070 static void *N_VConstVectorArray_PT(void *thread_data)
3071 {
3072   Pthreads_Data *my_data;
3073   sunindextype  j, start, end;
3074 
3075   int       i;
3076   realtype* zd=NULL;
3077 
3078   /* extract thread data */
3079   my_data = (Pthreads_Data *) thread_data;
3080 
3081   start = my_data->start;
3082   end   = my_data->end;
3083 
3084   /* set each vector in the vector array to a constant */
3085   for (i=0; i<my_data->nvec; i++) {
3086     zd = NV_DATA_PT(my_data->Y1[i]);
3087     for (j=start; j<end; j++) {
3088       zd[j] = my_data->c1;
3089     }
3090   }
3091 
3092   /* exit */
3093   pthread_exit(NULL);
3094 }
3095 
3096 
3097 /* ----------------------------------------------------------------------------
3098  * Compute the weighted root mean square norm of multiple vectors
3099  */
3100 
N_VWrmsNormVectorArray_Pthreads(int nvec,N_Vector * X,N_Vector * W,realtype * nrm)3101 int N_VWrmsNormVectorArray_Pthreads(int nvec, N_Vector* X, N_Vector* W, realtype* nrm)
3102 {
3103   sunindextype    N;
3104   int             i, nthreads;
3105   pthread_t       *threads;
3106   Pthreads_Data   *thread_data;
3107   pthread_attr_t  attr;
3108   pthread_mutex_t global_mutex;
3109 
3110   /* invalid number of vectors */
3111   if (nvec < 1) return(-1);
3112 
3113   /* should have called N_VWrmsNorm */
3114   if (nvec == 1) {
3115     nrm[0] = N_VWrmsNorm_Pthreads(X[0], W[0]);
3116     return(0);
3117   }
3118 
3119   /* initialize output array */
3120   for (i=0; i<nvec; i++)
3121     nrm[i] = ZERO;
3122 
3123   /* allocate threads and thread data structs */
3124   N           = NV_LENGTH_PT(X[0]);
3125   nthreads    = NV_NUM_THREADS_PT(X[0]);
3126   threads     = malloc(nthreads*sizeof(pthread_t));
3127   thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
3128 
3129   /* set thread attributes */
3130   pthread_attr_init(&attr);
3131   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
3132 
3133   /* lock for reduction */
3134   pthread_mutex_init(&global_mutex, NULL);
3135 
3136   for (i=0; i<nthreads; i++) {
3137     /* initialize thread data */
3138     N_VInitThreadData(&thread_data[i]);
3139 
3140     /* compute start and end loop index for thread */
3141     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
3142 
3143     /* pack thread data */
3144     thread_data[i].nvec  = nvec;
3145     thread_data[i].Y1    = X;
3146     thread_data[i].Y2    = W;
3147     thread_data[i].cvals = nrm;
3148 
3149     thread_data[i].global_mutex = &global_mutex;
3150 
3151     /* create threads and call pthread companion function */
3152     pthread_create(&threads[i], &attr, N_VWrmsNormVectorArray_PT,
3153                    (void *) &thread_data[i]);
3154   }
3155 
3156   /* wait for all threads to finish */
3157   for (i=0; i<nthreads; i++) {
3158     pthread_join(threads[i], NULL);
3159   }
3160 
3161   /* finalize wrms calculation */
3162   for (i=0; i<nvec; i++)
3163     nrm[i] = SUNRsqrt(nrm[i]/N);
3164 
3165   /* clean up and return */
3166   pthread_attr_destroy(&attr);
3167   pthread_mutex_destroy(&global_mutex);
3168   free(threads);
3169   free(thread_data);
3170 
3171   return(0);
3172 }
3173 
3174 
3175 /* ----------------------------------------------------------------------------
3176  * Pthread companion function to N_VWrmsNormVectorArray
3177  */
3178 
N_VWrmsNormVectorArray_PT(void * thread_data)3179 static void *N_VWrmsNormVectorArray_PT(void *thread_data)
3180 {
3181   Pthreads_Data   *my_data;
3182   sunindextype    j, start, end;
3183   pthread_mutex_t *lock;
3184 
3185   int       i;
3186   realtype  sum;
3187   realtype* nrm=NULL;
3188   realtype* xd=NULL;
3189   realtype* wd=NULL;
3190 
3191   /* extract thread data */
3192   my_data = (Pthreads_Data *) thread_data;
3193 
3194   start = my_data->start;
3195   end   = my_data->end;
3196   lock  = my_data->global_mutex;
3197 
3198   nrm = my_data->cvals;
3199 
3200   /* compute the WRMS norm for each vector in the vector array */
3201   for (i=0; i<my_data->nvec; i++) {
3202     xd = NV_DATA_PT(my_data->Y1[i]);
3203     wd = NV_DATA_PT(my_data->Y2[i]);
3204     sum = ZERO;
3205     for (j=start; j<end; j++) {
3206       sum += SUNSQR(xd[j] * wd[j]);
3207     }
3208     /* update global sum */
3209     pthread_mutex_lock(lock);
3210     nrm[i] += sum;
3211     pthread_mutex_unlock(lock);
3212   }
3213 
3214   /* exit */
3215   pthread_exit(NULL);
3216 }
3217 
3218 
3219 /* ----------------------------------------------------------------------------
3220  * Compute the weighted root mean square norm of multiple vectors
3221  */
3222 
N_VWrmsNormMaskVectorArray_Pthreads(int nvec,N_Vector * X,N_Vector * W,N_Vector id,realtype * nrm)3223 int N_VWrmsNormMaskVectorArray_Pthreads(int nvec, N_Vector* X, N_Vector* W,
3224                                         N_Vector id, realtype* nrm)
3225 {
3226   sunindextype    N;
3227   int             i, nthreads;
3228   pthread_t       *threads;
3229   Pthreads_Data   *thread_data;
3230   pthread_attr_t  attr;
3231   pthread_mutex_t global_mutex;
3232 
3233   /* invalid number of vectors */
3234   if (nvec < 1) return(-1);
3235 
3236   /* should have called N_VWrmsNorm */
3237   if (nvec == 1) {
3238     nrm[0] = N_VWrmsNormMask_Pthreads(X[0], W[0], id);
3239     return(0);
3240   }
3241 
3242   /* initialize output array */
3243   for (i=0; i<nvec; i++)
3244     nrm[i] = ZERO;
3245 
3246   /* allocate threads and thread data structs */
3247   N           = NV_LENGTH_PT(X[0]);
3248   nthreads    = NV_NUM_THREADS_PT(X[0]);
3249   threads     = malloc(nthreads*sizeof(pthread_t));
3250   thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
3251 
3252   /* set thread attributes */
3253   pthread_attr_init(&attr);
3254   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
3255 
3256   /* lock for reduction */
3257   pthread_mutex_init(&global_mutex, NULL);
3258 
3259   for (i=0; i<nthreads; i++) {
3260     /* initialize thread data */
3261     N_VInitThreadData(&thread_data[i]);
3262 
3263     /* compute start and end loop index for thread */
3264     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
3265 
3266     /* pack thread data */
3267     thread_data[i].nvec  = nvec;
3268     thread_data[i].Y1    = X;
3269     thread_data[i].Y2    = W;
3270     thread_data[i].x1    = id;
3271     thread_data[i].cvals = nrm;
3272 
3273     thread_data[i].global_mutex = &global_mutex;
3274 
3275     /* create threads and call pthread companion function */
3276     pthread_create(&threads[i], &attr, N_VWrmsNormMaskVectorArray_PT,
3277                    (void *) &thread_data[i]);
3278   }
3279 
3280   /* wait for all threads to finish */
3281   for (i=0; i<nthreads; i++) {
3282     pthread_join(threads[i], NULL);
3283   }
3284 
3285   /* finalize wrms calculation */
3286   for (i=0; i<nvec; i++)
3287     nrm[i] = SUNRsqrt(nrm[i]/N);
3288 
3289   /* clean up and return */
3290   pthread_attr_destroy(&attr);
3291   pthread_mutex_destroy(&global_mutex);
3292   free(threads);
3293   free(thread_data);
3294 
3295   return(0);
3296 }
3297 
3298 
3299 /* ----------------------------------------------------------------------------
3300  * Pthread companion function to N_VWrmsNormVectorArray
3301  */
3302 
N_VWrmsNormMaskVectorArray_PT(void * thread_data)3303 static void *N_VWrmsNormMaskVectorArray_PT(void *thread_data)
3304 {
3305   Pthreads_Data   *my_data;
3306   sunindextype    j, start, end;
3307   pthread_mutex_t *lock;
3308 
3309   int       i;
3310   realtype  sum;
3311   realtype* nrm=NULL;
3312   realtype* xd=NULL;
3313   realtype* wd=NULL;
3314   realtype* idd=NULL;
3315 
3316   /* extract thread data */
3317   my_data = (Pthreads_Data *) thread_data;
3318 
3319   start = my_data->start;
3320   end   = my_data->end;
3321   lock  = my_data->global_mutex;
3322 
3323   nrm = my_data->cvals;
3324   idd = NV_DATA_PT(my_data->x1);
3325 
3326   /* compute the WRMS norm for each vector in the vector array */
3327   for (i=0; i<my_data->nvec; i++) {
3328     xd = NV_DATA_PT(my_data->Y1[i]);
3329     wd = NV_DATA_PT(my_data->Y2[i]);
3330     sum = ZERO;
3331     for (j=start; j<end; j++) {
3332       if (idd[j] > ZERO)
3333         sum += SUNSQR(xd[j] * wd[j]);
3334     }
3335     /* update global sum */
3336     pthread_mutex_lock(lock);
3337     nrm[i] += sum;
3338     pthread_mutex_unlock(lock);
3339   }
3340 
3341   /* exit */
3342   pthread_exit(NULL);
3343 }
3344 
3345 
3346 /* -----------------------------------------------------------------------------
3347  * Scale and add a vector to multiple vectors Z = Y + a*X
3348  */
3349 
N_VScaleAddMultiVectorArray_Pthreads(int nvec,int nsum,realtype * a,N_Vector * X,N_Vector ** Y,N_Vector ** Z)3350 int N_VScaleAddMultiVectorArray_Pthreads(int nvec, int nsum, realtype* a,
3351                                           N_Vector* X, N_Vector** Y, N_Vector** Z)
3352 {
3353   sunindextype   N;
3354   int            i, j, nthreads;
3355   pthread_t      *threads;
3356   Pthreads_Data  *thread_data;
3357   pthread_attr_t attr;
3358 
3359   int          retval;
3360   N_Vector*   YY;
3361   N_Vector*   ZZ;
3362 
3363   /* invalid number of vectors */
3364   if (nvec < 1) return(-1);
3365   if (nsum < 1) return(-1);
3366 
3367   /* ---------------------------
3368    * Special cases for nvec == 1
3369    * --------------------------- */
3370 
3371   if (nvec == 1) {
3372 
3373     /* should have called N_VLinearSum */
3374     if (nsum == 1) {
3375       N_VLinearSum_Pthreads(a[0], X[0], ONE, Y[0][0], Z[0][0]);
3376       return(0);
3377     }
3378 
3379     /* should have called N_VScaleAddMulti */
3380     YY = (N_Vector*) malloc(nsum * sizeof(N_Vector));
3381     ZZ = (N_Vector*) malloc(nsum * sizeof(N_Vector));
3382 
3383     for (j=0; j<nsum; j++) {
3384       YY[j] = Y[j][0];
3385       ZZ[j] = Z[j][0];
3386     }
3387 
3388     retval = N_VScaleAddMulti_Pthreads(nsum, a, X[0], YY, ZZ);
3389 
3390     free(YY);
3391     free(ZZ);
3392     return(retval);
3393   }
3394 
3395   /* --------------------------
3396    * Special cases for nvec > 1
3397    * -------------------------- */
3398 
3399   /* should have called N_VLinearSumVectorArray */
3400   if (nsum == 1) {
3401     retval = N_VLinearSumVectorArray_Pthreads(nvec, a[0], X, ONE, Y[0], Z[0]);
3402     return(retval);
3403   }
3404 
3405   /* ----------------------------
3406    * Compute multiple linear sums
3407    * ---------------------------- */
3408 
3409   /* get vector length and data array */
3410   N           = NV_LENGTH_PT(X[0]);
3411   nthreads    = NV_NUM_THREADS_PT(X[0]);
3412   threads     = malloc(nthreads*sizeof(pthread_t));
3413   thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
3414 
3415   /* set thread attributes */
3416   pthread_attr_init(&attr);
3417   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
3418 
3419   for (i=0; i<nthreads; i++) {
3420     /* initialize thread data */
3421     N_VInitThreadData(&thread_data[i]);
3422 
3423     /* compute start and end loop index for thread */
3424     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
3425 
3426     /* pack thread data */
3427     thread_data[i].nvec  = nvec;
3428     thread_data[i].nsum  = nsum;
3429     thread_data[i].cvals = a;
3430     thread_data[i].Y1    = X;
3431     thread_data[i].ZZ1   = Y;
3432     thread_data[i].ZZ2   = Z;
3433 
3434     /* create threads and call pthread companion function */
3435     pthread_create(&threads[i], &attr, N_VScaleAddMultiVectorArray_PT,
3436                    (void *) &thread_data[i]);
3437   }
3438 
3439   /* wait for all threads to finish */
3440   for (i=0; i<nthreads; i++) {
3441     pthread_join(threads[i], NULL);
3442   }
3443 
3444   /* clean up and return */
3445   pthread_attr_destroy(&attr);
3446   free(threads);
3447   free(thread_data);
3448 
3449   return(0);
3450 }
3451 
3452 
3453 /* -----------------------------------------------------------------------------
3454  * Pthread companion function to N_VScaleAddMultiVectorArray
3455  */
3456 
N_VScaleAddMultiVectorArray_PT(void * thread_data)3457 static void *N_VScaleAddMultiVectorArray_PT(void *thread_data)
3458 {
3459   Pthreads_Data *my_data;
3460   sunindextype  k, start, end;
3461 
3462   int       i, j;
3463   realtype* a=NULL;
3464   realtype* xd=NULL;
3465   realtype* yd=NULL;
3466   realtype* zd=NULL;
3467 
3468   /* extract thread data */
3469   my_data = (Pthreads_Data *) thread_data;
3470 
3471   start = my_data->start;
3472   end   = my_data->end;
3473 
3474   a = my_data->cvals;
3475 
3476   /*
3477    * Y[i][j] += a[i] * x[j]
3478    */
3479   if (my_data->ZZ1 == my_data->ZZ2) {
3480     for (i=0; i<my_data->nvec; i++) {
3481       xd = NV_DATA_PT(my_data->Y1[i]);
3482       for (j=0; j<my_data->nsum; j++){
3483         yd = NV_DATA_PT(my_data->ZZ1[j][i]);
3484         for (k=start; k<end; k++) {
3485           yd[k] += a[j] * xd[k];
3486         }
3487       }
3488     }
3489     pthread_exit(NULL);
3490   }
3491 
3492   /*
3493    * Z[i][j] = Y[i][j] + a[i] * x[j]
3494    */
3495   for (i=0; i<my_data->nvec; i++) {
3496     xd = NV_DATA_PT(my_data->Y1[i]);
3497     for (j=0; j<my_data->nsum; j++){
3498       yd = NV_DATA_PT(my_data->ZZ1[j][i]);
3499       zd = NV_DATA_PT(my_data->ZZ2[j][i]);
3500       for (k=start; k<end; k++) {
3501         zd[k] = a[j] * xd[k] + yd[k];
3502       }
3503     }
3504   }
3505   pthread_exit(NULL);
3506 }
3507 
3508 
3509 /* -----------------------------------------------------------------------------
3510  * Compute a linear combination for multiple vectors
3511  */
3512 
N_VLinearCombinationVectorArray_Pthreads(int nvec,int nsum,realtype * c,N_Vector ** X,N_Vector * Z)3513 int N_VLinearCombinationVectorArray_Pthreads(int nvec, int nsum, realtype* c,
3514                                            N_Vector** X, N_Vector* Z)
3515 {
3516   sunindextype   N;
3517   int            i, j, nthreads;
3518   pthread_t      *threads;
3519   Pthreads_Data  *thread_data;
3520   pthread_attr_t attr;
3521 
3522   int          retval;
3523   realtype*    ctmp;
3524   N_Vector*   Y;
3525 
3526   /* invalid number of vectors */
3527   if (nvec < 1) return(-1);
3528   if (nsum < 1) return(-1);
3529 
3530   /* ---------------------------
3531    * Special cases for nvec == 1
3532    * --------------------------- */
3533 
3534   if (nvec == 1) {
3535 
3536     /* should have called N_VScale */
3537     if (nsum == 1) {
3538       N_VScale_Pthreads(c[0], X[0][0], Z[0]);
3539       return(0);
3540     }
3541 
3542     /* should have called N_VLinearSum */
3543     if (nsum == 2) {
3544       N_VLinearSum_Pthreads(c[0], X[0][0], c[1], X[1][0], Z[0]);
3545       return(0);
3546     }
3547 
3548     /* should have called N_VLinearCombination */
3549     Y = (N_Vector*) malloc(nsum * sizeof(N_Vector));
3550 
3551     for (i=0; i<nsum; i++) {
3552       Y[i] = X[i][0];
3553     }
3554 
3555     retval = N_VLinearCombination_Pthreads(nsum, c, Y, Z[0]);
3556 
3557     free(Y);
3558     return(retval);
3559   }
3560 
3561   /* --------------------------
3562    * Special cases for nvec > 1
3563    * -------------------------- */
3564 
3565   /* should have called N_VScaleVectorArray */
3566   if (nsum == 1) {
3567 
3568     ctmp = (realtype*) malloc(nvec * sizeof(realtype));
3569 
3570     for (j=0; j<nvec; j++) {
3571       ctmp[j] = c[0];
3572     }
3573 
3574     retval = N_VScaleVectorArray_Pthreads(nvec, ctmp, X[0], Z);
3575 
3576     free(ctmp);
3577     return(retval);
3578   }
3579 
3580   /* should have called N_VLinearSumVectorArray */
3581   if (nsum == 2) {
3582     retval = N_VLinearSumVectorArray_Pthreads(nvec, c[0], X[0], c[1], X[1], Z);
3583     return(retval);
3584   }
3585 
3586   /* --------------------------
3587    * Compute linear combination
3588    * -------------------------- */
3589 
3590   /* get vector length and data array */
3591   N           = NV_LENGTH_PT(Z[0]);
3592   nthreads    = NV_NUM_THREADS_PT(Z[0]);
3593   threads     = malloc(nthreads*sizeof(pthread_t));
3594   thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
3595 
3596   /* set thread attributes */
3597   pthread_attr_init(&attr);
3598   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
3599 
3600   for (i=0; i<nthreads; i++) {
3601     /* initialize thread data */
3602     N_VInitThreadData(&thread_data[i]);
3603 
3604     /* compute start and end loop index for thread */
3605     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
3606 
3607     /* pack thread data */
3608     thread_data[i].nvec  = nvec;
3609     thread_data[i].nsum  = nsum;
3610     thread_data[i].cvals = c;
3611     thread_data[i].ZZ1   = X;
3612     thread_data[i].Y1    = Z;
3613 
3614     /* create threads and call pthread companion function */
3615     pthread_create(&threads[i], &attr, N_VLinearCombinationVectorArray_PT,
3616                    (void *) &thread_data[i]);
3617   }
3618 
3619   /* wait for all threads to finish */
3620   for (i=0; i<nthreads; i++) {
3621     pthread_join(threads[i], NULL);
3622   }
3623 
3624   /* clean up and return */
3625   pthread_attr_destroy(&attr);
3626   free(threads);
3627   free(thread_data);
3628 
3629   return(0);
3630 }
3631 
3632 
3633 /* -----------------------------------------------------------------------------
3634  * Pthread companion function to N_VLinearCombinationVectorArray
3635  */
3636 
N_VLinearCombinationVectorArray_PT(void * thread_data)3637 static void *N_VLinearCombinationVectorArray_PT(void *thread_data)
3638 {
3639   Pthreads_Data *my_data;
3640   sunindextype  k, start, end;
3641 
3642   int       i; /* vector arrays index in summation [0,nsum) */
3643   int       j; /* vector index in vector array     [0,nvec) */
3644   realtype* c=NULL;
3645   realtype* zd=NULL;
3646   realtype* xd=NULL;
3647 
3648   /* extract thread data */
3649   my_data = (Pthreads_Data *) thread_data;
3650 
3651   start = my_data->start;
3652   end   = my_data->end;
3653 
3654   c = my_data->cvals;
3655 
3656   /*
3657    * X[0][j] += c[i]*X[i][j], i = 1,...,nvec-1
3658    */
3659   if ((my_data->ZZ1[0] == my_data->Y1) && (c[0] == ONE)) {
3660     for (j=0; j<my_data->nvec; j++) {
3661       zd = NV_DATA_PT(my_data->Y1[j]);
3662       for (i=1; i<my_data->nsum; i++) {
3663         xd = NV_DATA_PT(my_data->ZZ1[i][j]);
3664         for (k=start; k<end; k++) {
3665           zd[k] += c[i] * xd[k];
3666         }
3667       }
3668     }
3669     pthread_exit(NULL);
3670   }
3671 
3672   /*
3673    * X[0][j] = c[0] * X[0][j] + sum{ c[i] * X[i][j] }, i = 1,...,nvec-1
3674    */
3675   if (my_data->ZZ1[0] == my_data->Y1) {
3676     for (j=0; j<my_data->nvec; j++) {
3677       zd = NV_DATA_PT(my_data->Y1[j]);
3678       for (k=start; k<end; k++) {
3679         zd[k] *= c[0];
3680       }
3681       for (i=1; i<my_data->nsum; i++) {
3682         xd = NV_DATA_PT(my_data->ZZ1[i][j]);
3683         for (k=start; k<end; k++) {
3684           zd[k] += c[i] * xd[k];
3685         }
3686       }
3687     }
3688     pthread_exit(NULL);
3689   }
3690 
3691   /*
3692    * Z[j] = sum{ c[i] * X[i][j] }, i = 0,...,nvec-1
3693    */
3694   for (j=0; j<my_data->nvec; j++) {
3695     xd = NV_DATA_PT(my_data->ZZ1[0][j]);
3696     zd = NV_DATA_PT(my_data->Y1[j]);
3697     for (k=start; k<end; k++) {
3698       zd[k] = c[0] * xd[k];
3699     }
3700     for (i=1; i<my_data->nsum; i++) {
3701       xd = NV_DATA_PT(my_data->ZZ1[i][j]);
3702       for (k=start; k<end; k++) {
3703         zd[k] += c[i] * xd[k];
3704       }
3705     }
3706   }
3707   pthread_exit(NULL);
3708 }
3709 
3710 
3711 /*
3712  * -----------------------------------------------------------------
3713  * private functions for special cases of vector operations
3714  * -----------------------------------------------------------------
3715  */
3716 
3717 
3718 /* ----------------------------------------------------------------------------
3719  * Copy vector components into second vector
3720  */
3721 
VCopy_Pthreads(N_Vector x,N_Vector z)3722 static void VCopy_Pthreads(N_Vector x, N_Vector z)
3723 {
3724   sunindextype      N;
3725   int           i, nthreads;
3726   pthread_t     *threads;
3727   Pthreads_Data *thread_data;
3728   pthread_attr_t attr;
3729 
3730   /* allocate threads and thread data structs */
3731   N            = NV_LENGTH_PT(x);
3732   nthreads     = NV_NUM_THREADS_PT(x);
3733   threads      = malloc(nthreads*sizeof(pthread_t));
3734   thread_data  = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
3735 
3736   /* set thread attributes */
3737   pthread_attr_init(&attr);
3738   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
3739 
3740   for (i=0; i<nthreads; i++) {
3741     /* initialize thread data */
3742     N_VInitThreadData(&thread_data[i]);
3743 
3744     /* compute start and end loop index for thread */
3745     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
3746 
3747     /* pack thread data */
3748     thread_data[i].v1 = NV_DATA_PT(x);
3749     thread_data[i].v2 = NV_DATA_PT(z);
3750 
3751     /* create threads and call pthread companion function */
3752     pthread_create(&threads[i], &attr, VCopy_PT, (void *) &thread_data[i]);
3753   }
3754 
3755   /* wait for all threads to finish */
3756   for (i=0; i<nthreads; i++) {
3757     pthread_join(threads[i], NULL);
3758   }
3759 
3760   /* clean up and return */
3761   pthread_attr_destroy(&attr);
3762   free(threads);
3763   free(thread_data);
3764 
3765   return;
3766 }
3767 
3768 
3769 /* ----------------------------------------------------------------------------
3770  * Pthread companion function to VCopy
3771  */
3772 
VCopy_PT(void * thread_data)3773 static void *VCopy_PT(void *thread_data)
3774 {
3775   sunindextype i, start, end;
3776   realtype *xd, *zd;
3777   Pthreads_Data *my_data;
3778 
3779   /* extract thread data */
3780   my_data = (Pthreads_Data *) thread_data;
3781 
3782   xd = my_data->v1;
3783   zd = my_data->v2;
3784 
3785   start = my_data->start;
3786   end   = my_data->end;
3787 
3788   /* copy vector components */
3789   for (i = start; i < end; i++)
3790     zd[i] = xd[i];
3791 
3792   /* exit */
3793   pthread_exit(NULL);
3794 }
3795 
3796 
3797 /* ----------------------------------------------------------------------------
3798  * Compute vector sum
3799  */
3800 
VSum_Pthreads(N_Vector x,N_Vector y,N_Vector z)3801 static void VSum_Pthreads(N_Vector x, N_Vector y, N_Vector z)
3802 {
3803   sunindextype      N;
3804   int           i, nthreads;
3805   pthread_t     *threads;
3806   Pthreads_Data *thread_data;
3807   pthread_attr_t attr;
3808 
3809   /* allocate threads and thread data structs */
3810   N            = NV_LENGTH_PT(x);
3811   nthreads     = NV_NUM_THREADS_PT(x);
3812   threads      = malloc(nthreads*sizeof(pthread_t));
3813   thread_data  = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
3814 
3815   /* set thread attributes */
3816   pthread_attr_init(&attr);
3817   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
3818 
3819   for (i=0; i<nthreads; i++) {
3820     /* initialize thread data */
3821     N_VInitThreadData(&thread_data[i]);
3822 
3823     /* compute start and end loop index for thread */
3824     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
3825 
3826     /* pack thread data */
3827     thread_data[i].v1 = NV_DATA_PT(x);
3828     thread_data[i].v2 = NV_DATA_PT(y);
3829     thread_data[i].v3 = NV_DATA_PT(z);
3830 
3831     /* create threads and call pthread companion function */
3832     pthread_create(&threads[i], &attr, VSum_PT, (void *) &thread_data[i]);
3833   }
3834 
3835   /* wait for all threads to finish */
3836   for (i=0; i<nthreads; i++) {
3837     pthread_join(threads[i], NULL);
3838   }
3839 
3840   /* clean up and return */
3841   pthread_attr_destroy(&attr);
3842   free(threads);
3843   free(thread_data);
3844 
3845   return;
3846 }
3847 
3848 
3849 /* ----------------------------------------------------------------------------
3850  * Pthread companion function to VSum
3851  */
3852 
VSum_PT(void * thread_data)3853 static void *VSum_PT(void *thread_data)
3854 {
3855   sunindextype i, start, end;
3856   realtype *xd, *yd, *zd;
3857   Pthreads_Data *my_data;
3858 
3859   /* extract thread data */
3860   my_data = (Pthreads_Data *) thread_data;
3861 
3862   xd = my_data->v1;
3863   yd = my_data->v2;
3864   zd = my_data->v3;
3865 
3866   start = my_data->start;
3867   end   = my_data->end;
3868 
3869   /* compute vector sum */
3870   for (i = start; i < end; i++)
3871     zd[i] = xd[i] + yd[i];
3872 
3873   /* exit */
3874   pthread_exit(NULL);
3875 }
3876 
3877 
3878 /* ----------------------------------------------------------------------------
3879  * Compute vector difference
3880  */
3881 
VDiff_Pthreads(N_Vector x,N_Vector y,N_Vector z)3882 static void VDiff_Pthreads(N_Vector x, N_Vector y, N_Vector z)
3883 {
3884   sunindextype  N;
3885   int           i, nthreads;
3886   pthread_t     *threads;
3887   Pthreads_Data *thread_data;
3888   pthread_attr_t attr;
3889 
3890   /* allocate threads and thread data structs */
3891   N            = NV_LENGTH_PT(x);
3892   nthreads     = NV_NUM_THREADS_PT(x);
3893   threads      = malloc(nthreads*sizeof(pthread_t));
3894   thread_data  = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
3895 
3896   /* set thread attributes */
3897   pthread_attr_init(&attr);
3898   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
3899 
3900   for (i=0; i<nthreads; i++) {
3901     /* initialize thread data */
3902     N_VInitThreadData(&thread_data[i]);
3903 
3904     /* compute start and end loop index for thread */
3905     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
3906 
3907     /* pack thread data */
3908     thread_data[i].v1 = NV_DATA_PT(x);
3909     thread_data[i].v2 = NV_DATA_PT(y);
3910     thread_data[i].v3 = NV_DATA_PT(z);
3911 
3912     /* create threads and call pthread companion function */
3913     pthread_create(&threads[i], &attr, VDiff_PT, (void *) &thread_data[i]);
3914   }
3915 
3916   /* wait for all threads to finish */
3917   for (i=0; i<nthreads; i++) {
3918     pthread_join(threads[i], NULL);
3919   }
3920 
3921   /* clean up and return */
3922   pthread_attr_destroy(&attr);
3923   free(threads);
3924   free(thread_data);
3925 
3926   return;
3927 }
3928 
3929 
3930 /* ----------------------------------------------------------------------------
3931  * Pthread companion function to VDiff
3932  */
3933 
VDiff_PT(void * thread_data)3934 static void *VDiff_PT(void *thread_data)
3935 {
3936   sunindextype i, start, end;
3937   realtype *xd, *yd, *zd;
3938   Pthreads_Data *my_data;
3939 
3940   /* extract thread data */
3941   my_data = (Pthreads_Data *) thread_data;
3942 
3943   xd = my_data->v1;
3944   yd = my_data->v2;
3945   zd = my_data->v3;
3946 
3947   start = my_data->start;
3948   end   = my_data->end;
3949 
3950   /* compute vector difference */
3951   for (i = start; i < end; i++)
3952     zd[i] = xd[i] - yd[i];
3953 
3954   /* exit */
3955   pthread_exit(NULL);
3956 }
3957 
3958 
3959 /* ----------------------------------------------------------------------------
3960  * Compute the negative of a vector
3961  */
3962 
VNeg_Pthreads(N_Vector x,N_Vector z)3963 static void VNeg_Pthreads(N_Vector x, N_Vector z)
3964 {
3965   sunindextype  N;
3966   int           i, nthreads;
3967   pthread_t     *threads;
3968   Pthreads_Data *thread_data;
3969   pthread_attr_t attr;
3970 
3971   /* allocate threads and thread data structs */
3972   N            = NV_LENGTH_PT(x);
3973   nthreads     = NV_NUM_THREADS_PT(x);
3974   threads      = malloc(nthreads*sizeof(pthread_t));
3975   thread_data  = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
3976 
3977   /* set thread attributes */
3978   pthread_attr_init(&attr);
3979   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
3980 
3981   for (i=0; i<nthreads; i++) {
3982     /* initialize thread data */
3983     N_VInitThreadData(&thread_data[i]);
3984 
3985     /* compute start and end loop index for thread */
3986     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
3987 
3988     /* pack thread data */
3989     thread_data[i].v1 = NV_DATA_PT(x);
3990     thread_data[i].v2 = NV_DATA_PT(z);
3991 
3992     /* create threads and call pthread companion function */
3993     pthread_create(&threads[i], &attr, VNeg_PT, (void *) &thread_data[i]);
3994   }
3995 
3996   /* wait for all threads to finish */
3997   for (i=0; i<nthreads; i++) {
3998     pthread_join(threads[i], NULL);
3999   }
4000 
4001   /* clean up and return */
4002   pthread_attr_destroy(&attr);
4003   free(threads);
4004   free(thread_data);
4005 
4006   return;
4007 }
4008 
4009 
4010 /* ----------------------------------------------------------------------------
4011  * Pthread companion function to VNeg
4012  */
4013 
VNeg_PT(void * thread_data)4014 static void *VNeg_PT(void *thread_data)
4015 {
4016   sunindextype i, start, end;
4017   realtype *xd, *zd;
4018   Pthreads_Data *my_data;
4019 
4020   /* extract thread data */
4021   my_data = (Pthreads_Data *) thread_data;
4022 
4023   xd = my_data->v1;
4024   zd = my_data->v2;
4025 
4026   start = my_data->start;
4027   end   = my_data->end;
4028 
4029   /* compute negative of vector */
4030   for (i = start; i < end; i++)
4031     zd[i] = -xd[i];
4032 
4033   /* exit */
4034   pthread_exit(NULL);
4035 }
4036 
4037 
4038 /* ----------------------------------------------------------------------------
4039  * Compute scaled vector sum
4040  */
4041 
VScaleSum_Pthreads(realtype c,N_Vector x,N_Vector y,N_Vector z)4042 static void VScaleSum_Pthreads(realtype c, N_Vector x, N_Vector y, N_Vector z)
4043 {
4044   sunindextype  N;
4045   int           i, nthreads;
4046   pthread_t     *threads;
4047   Pthreads_Data *thread_data;
4048   pthread_attr_t attr;
4049 
4050   /* allocate threads and thread data structs */
4051   N            = NV_LENGTH_PT(x);
4052   nthreads     = NV_NUM_THREADS_PT(x);
4053   threads      = malloc(nthreads*sizeof(pthread_t));
4054   thread_data  = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
4055 
4056   /* set thread attributes */
4057   pthread_attr_init(&attr);
4058   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
4059 
4060   for (i=0; i<nthreads; i++) {
4061     /* initialize thread data */
4062     N_VInitThreadData(&thread_data[i]);
4063 
4064     /* compute start and end loop index for thread */
4065     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
4066 
4067     /* pack thread data */
4068     thread_data[i].c1 = c;
4069     thread_data[i].v1 = NV_DATA_PT(x);
4070     thread_data[i].v2 = NV_DATA_PT(y);
4071     thread_data[i].v3 = NV_DATA_PT(z);
4072 
4073     /* create threads and call pthread companion function */
4074     pthread_create(&threads[i], &attr, VScaleSum_PT, (void *) &thread_data[i]);
4075   }
4076 
4077   /* wait for all threads to finish */
4078   for (i=0; i<nthreads; i++) {
4079     pthread_join(threads[i], NULL);
4080   }
4081 
4082   /* clean up and return */
4083   pthread_attr_destroy(&attr);
4084   free(threads);
4085   free(thread_data);
4086 
4087   return;
4088 }
4089 
4090 
4091 /* ----------------------------------------------------------------------------
4092  * Pthread companion function to VScaleSum
4093  */
4094 
VScaleSum_PT(void * thread_data)4095 static void *VScaleSum_PT(void *thread_data)
4096 {
4097   sunindextype i, start, end;
4098   realtype c;
4099   realtype *xd, *yd, *zd;
4100   Pthreads_Data *my_data;
4101 
4102   /* extract thread data */
4103   my_data = (Pthreads_Data *) thread_data;
4104 
4105   c  = my_data->c1;
4106   xd = my_data->v1;
4107   yd = my_data->v2;
4108   zd = my_data->v3;
4109 
4110   start = my_data->start;
4111   end   = my_data->end;
4112 
4113   /* compute scaled vector sum */
4114   for (i = start; i < end; i++)
4115     zd[i] = c*(xd[i] + yd[i]);
4116 
4117   /* exit */
4118   pthread_exit(NULL);
4119 }
4120 
4121 
4122 /* ----------------------------------------------------------------------------
4123  * Compute scaled vector difference
4124  */
4125 
VScaleDiff_Pthreads(realtype c,N_Vector x,N_Vector y,N_Vector z)4126 static void VScaleDiff_Pthreads(realtype c, N_Vector x, N_Vector y, N_Vector z)
4127 {
4128   sunindextype  N;
4129   int           i, nthreads;
4130   pthread_t     *threads;
4131   Pthreads_Data *thread_data;
4132   pthread_attr_t attr;
4133 
4134   /* allocate threads and thread data structs */
4135   N            = NV_LENGTH_PT(x);
4136   nthreads     = NV_NUM_THREADS_PT(x);
4137   threads      = malloc(nthreads*sizeof(pthread_t));
4138   thread_data  = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
4139 
4140   /* set thread attributes */
4141   pthread_attr_init(&attr);
4142   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
4143 
4144   for (i=0; i<nthreads; i++) {
4145     /* initialize thread data */
4146     N_VInitThreadData(&thread_data[i]);
4147 
4148     /* compute start and end loop index for thread */
4149     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
4150 
4151     /* pack thread data */
4152     thread_data[i].c1 = c;
4153     thread_data[i].v1 = NV_DATA_PT(x);
4154     thread_data[i].v2 = NV_DATA_PT(y);
4155     thread_data[i].v3 = NV_DATA_PT(z);
4156 
4157     /* create threads and call pthread companion function */
4158     pthread_create(&threads[i], &attr, VScaleDiff_PT, (void *) &thread_data[i]);
4159   }
4160 
4161   /* wait for all threads to finish */
4162   for (i=0; i<nthreads; i++) {
4163     pthread_join(threads[i], NULL);
4164   }
4165 
4166   /* clean up and return */
4167   pthread_attr_destroy(&attr);
4168   free(threads);
4169   free(thread_data);
4170 
4171   return;
4172 }
4173 
4174 
4175 /* ----------------------------------------------------------------------------
4176  * Pthread companion function to VScaleDiff
4177  */
4178 
VScaleDiff_PT(void * thread_data)4179 static void *VScaleDiff_PT(void *thread_data)
4180 {
4181   sunindextype i, start, end;
4182   realtype c;
4183   realtype *xd, *yd, *zd;
4184   Pthreads_Data *my_data;
4185 
4186   /* extract thread data */
4187   my_data = (Pthreads_Data *) thread_data;
4188 
4189   c  = my_data->c1;
4190   xd = my_data->v1;
4191   yd = my_data->v2;
4192   zd = my_data->v3;
4193 
4194   start = my_data->start;
4195   end   = my_data->end;
4196 
4197   /* compute scaled vector difference */
4198   for (i = start; i < end; i++)
4199     zd[i] = c*(xd[i] - yd[i]);
4200 
4201   /* exit */
4202   pthread_exit(NULL);
4203 }
4204 
4205 
4206 /* ----------------------------------------------------------------------------
4207  * Compute vector sum z[i] = a*x[i]+y[i]
4208  */
4209 
VLin1_Pthreads(realtype a,N_Vector x,N_Vector y,N_Vector z)4210 static void VLin1_Pthreads(realtype a, N_Vector x, N_Vector y, N_Vector z)
4211 {
4212   sunindextype  N;
4213   int           i, nthreads;
4214   pthread_t     *threads;
4215   Pthreads_Data *thread_data;
4216   pthread_attr_t attr;
4217 
4218   /* allocate threads and thread data structs */
4219   N            = NV_LENGTH_PT(x);
4220   nthreads     = NV_NUM_THREADS_PT(x);
4221   threads      = malloc(nthreads*sizeof(pthread_t));
4222   thread_data  = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
4223 
4224   /* set thread attributes */
4225   pthread_attr_init(&attr);
4226   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
4227 
4228   for (i=0; i<nthreads; i++) {
4229     /* initialize thread data */
4230     N_VInitThreadData(&thread_data[i]);
4231 
4232     /* compute start and end loop index for thread */
4233     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
4234 
4235     /* pack thread data */
4236     thread_data[i].c1 = a;
4237     thread_data[i].v1 = NV_DATA_PT(x);
4238     thread_data[i].v2 = NV_DATA_PT(y);
4239     thread_data[i].v3 = NV_DATA_PT(z);
4240 
4241     /* create threads and call pthread companion function */
4242     pthread_create(&threads[i], &attr, VLin1_PT, (void *) &thread_data[i]);
4243   }
4244 
4245   /* wait for all threads to finish */
4246   for (i=0; i<nthreads; i++) {
4247     pthread_join(threads[i], NULL);
4248   }
4249 
4250   /* clean up and return */
4251   pthread_attr_destroy(&attr);
4252   free(threads);
4253   free(thread_data);
4254 
4255   return;
4256 }
4257 
4258 
4259 /* ----------------------------------------------------------------------------
4260  * Pthread companion function to VLin1
4261  */
4262 
VLin1_PT(void * thread_data)4263 static void *VLin1_PT(void *thread_data)
4264 {
4265   sunindextype i, start, end;
4266   realtype a;
4267   realtype *xd, *yd, *zd;
4268   Pthreads_Data *my_data;
4269 
4270   /* extract thread data */
4271   my_data = (Pthreads_Data *) thread_data;
4272 
4273   a  = my_data->c1;
4274   xd = my_data->v1;
4275   yd = my_data->v2;
4276   zd = my_data->v3;
4277 
4278   start = my_data->start;
4279   end   = my_data->end;
4280 
4281   /* compute vector sum */
4282   for (i = start; i < end; i++)
4283     zd[i] = (a*xd[i]) + yd[i];
4284 
4285   /* exit */
4286   pthread_exit(NULL);
4287 }
4288 
4289 
4290 /* ----------------------------------------------------------------------------
4291  * Compute vector difference z[i] = a*x[i]-y[i]
4292  */
4293 
VLin2_Pthreads(realtype a,N_Vector x,N_Vector y,N_Vector z)4294 static void VLin2_Pthreads(realtype a, N_Vector x, N_Vector y, N_Vector z)
4295 {
4296   sunindextype  N;
4297   int           i, nthreads;
4298   pthread_t     *threads;
4299   Pthreads_Data *thread_data;
4300   pthread_attr_t attr;
4301 
4302   /* allocate threads and thread data structs */
4303   N            = NV_LENGTH_PT(x);
4304   nthreads     = NV_NUM_THREADS_PT(x);
4305   threads      = malloc(nthreads*sizeof(pthread_t));
4306   thread_data  = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
4307 
4308   /* set thread attributes */
4309   pthread_attr_init(&attr);
4310   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
4311 
4312   for (i=0; i<nthreads; i++) {
4313     /* initialize thread data */
4314     N_VInitThreadData(&thread_data[i]);
4315 
4316     /* compute start and end loop index for thread */
4317     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
4318 
4319     /* pack thread data */
4320     thread_data[i].c1 = a;
4321     thread_data[i].v1 = NV_DATA_PT(x);
4322     thread_data[i].v2 = NV_DATA_PT(y);
4323     thread_data[i].v3 = NV_DATA_PT(z);
4324 
4325     /* create threads and call pthread companion function */
4326     pthread_create(&threads[i], &attr, VLin2_PT, (void *) &thread_data[i]);
4327   }
4328 
4329   /* wait for all threads to finish */
4330   for (i=0; i<nthreads; i++) {
4331     pthread_join(threads[i], NULL);
4332   }
4333 
4334   /* clean up and return */
4335   pthread_attr_destroy(&attr);
4336   free(threads);
4337   free(thread_data);
4338 
4339   return;
4340 }
4341 
4342 /* ----------------------------------------------------------------------------
4343  * Pthread companion function to VLin2
4344  */
4345 
VLin2_PT(void * thread_data)4346 static void *VLin2_PT(void *thread_data)
4347 {
4348   sunindextype i, start, end;
4349   realtype a;
4350   realtype *xd, *yd, *zd;
4351   Pthreads_Data *my_data;
4352 
4353   /* extract thread data */
4354   my_data = (Pthreads_Data *) thread_data;
4355 
4356   a  = my_data->c1;
4357   xd = my_data->v1;
4358   yd = my_data->v2;
4359   zd = my_data->v3;
4360 
4361   start = my_data->start;
4362   end   = my_data->end;
4363 
4364   /* compute vector difference */
4365   for (i = start; i < end; i++)
4366     zd[i] = (a*xd[i]) - yd[i];
4367 
4368   /* exit */
4369   pthread_exit(NULL);
4370 }
4371 
4372 
4373 /* ----------------------------------------------------------------------------
4374  * Compute special cases of linear sum
4375  */
4376 
Vaxpy_Pthreads(realtype a,N_Vector x,N_Vector y)4377 static void Vaxpy_Pthreads(realtype a, N_Vector x, N_Vector y)
4378 {
4379   sunindextype  N;
4380   int           i, nthreads;
4381   pthread_t     *threads;
4382   Pthreads_Data *thread_data;
4383   pthread_attr_t attr;
4384 
4385   /* allocate threads and thread data structs */
4386   N            = NV_LENGTH_PT(x);
4387   nthreads     = NV_NUM_THREADS_PT(x);
4388   threads      = malloc(nthreads*sizeof(pthread_t));
4389   thread_data  = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
4390 
4391   /* set thread attributes */
4392   pthread_attr_init(&attr);
4393   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
4394 
4395   for (i=0; i<nthreads; i++) {
4396     /* initialize thread data */
4397     N_VInitThreadData(&thread_data[i]);
4398 
4399     /* compute start and end loop index for thread */
4400     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
4401 
4402     /* pack thread data */
4403     thread_data[i].c1 = a;
4404     thread_data[i].v1 = NV_DATA_PT(x);
4405     thread_data[i].v2 = NV_DATA_PT(y);
4406 
4407     /* create threads and call pthread companion function */
4408     pthread_create(&threads[i], &attr, Vaxpy_PT, (void *) &thread_data[i]);
4409   }
4410 
4411   /* wait for all threads to finish */
4412   for (i=0; i<nthreads; i++) {
4413     pthread_join(threads[i], NULL);
4414   }
4415 
4416   /* clean up and return */
4417   pthread_attr_destroy(&attr);
4418   free(threads);
4419   free(thread_data);
4420 
4421   return;
4422 }
4423 
4424 /* ----------------------------------------------------------------------------
4425  * Pthread companion function to Vaxpy
4426  */
4427 
Vaxpy_PT(void * thread_data)4428 static void *Vaxpy_PT(void *thread_data)
4429 {
4430   sunindextype i, start, end;
4431   realtype a;
4432   realtype *xd, *yd;
4433   Pthreads_Data *my_data;
4434 
4435   /* extract thread data */
4436   my_data = (Pthreads_Data *) thread_data;
4437 
4438   a  = my_data->c1;
4439   xd = my_data->v1;
4440   yd = my_data->v2;
4441 
4442   start = my_data->start;
4443   end   = my_data->end;
4444 
4445   /* compute axpy */
4446   if (a == ONE) {
4447     for (i = start; i < end; i++)
4448       yd[i] += xd[i];
4449 
4450     /* exit */
4451     pthread_exit(NULL);
4452   }
4453 
4454   if (a == -ONE) {
4455     for (i = start; i < end; i++)
4456       yd[i] -= xd[i];
4457 
4458     /* exit */
4459     pthread_exit(NULL);
4460   }
4461 
4462   for (i = start; i < end; i++)
4463     yd[i] += a*xd[i];
4464 
4465   /* return */
4466   pthread_exit(NULL);
4467 }
4468 
4469 
4470 /* ----------------------------------------------------------------------------
4471  * Compute scaled vector
4472  */
4473 
VScaleBy_Pthreads(realtype a,N_Vector x)4474 static void VScaleBy_Pthreads(realtype a, N_Vector x)
4475 {
4476   sunindextype  N;
4477   int           i, nthreads;
4478   pthread_t     *threads;
4479   Pthreads_Data *thread_data;
4480   pthread_attr_t attr;
4481 
4482   /* allocate threads and thread data structs */
4483   N            = NV_LENGTH_PT(x);
4484   nthreads     = NV_NUM_THREADS_PT(x);
4485   threads      = malloc(nthreads*sizeof(pthread_t));
4486   thread_data  = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
4487 
4488   /* set thread attributes */
4489   pthread_attr_init(&attr);
4490   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
4491 
4492   for (i=0; i<nthreads; i++) {
4493     /* initialize thread data */
4494     N_VInitThreadData(&thread_data[i]);
4495 
4496     /* compute start and end loop index for thread */
4497     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
4498 
4499     /* pack thread data */
4500     thread_data[i].c1 = a;
4501     thread_data[i].v1 = NV_DATA_PT(x);
4502 
4503     /* create threads and call pthread companion function */
4504     pthread_create(&threads[i], &attr, VScaleBy_PT, (void *) &thread_data[i]);
4505   }
4506 
4507   /* wait for all threads to finish */
4508   for (i=0; i<nthreads; i++) {
4509     pthread_join(threads[i], NULL);
4510   }
4511 
4512   /* clean up and return */
4513   pthread_attr_destroy(&attr);
4514   free(threads);
4515   free(thread_data);
4516 
4517   return;
4518 }
4519 
4520 
4521 /* ----------------------------------------------------------------------------
4522  * Pthread companion function to VScaleBy
4523  */
4524 
VScaleBy_PT(void * thread_data)4525 static void *VScaleBy_PT(void *thread_data)
4526 {
4527   sunindextype i, start, end;
4528   realtype a;
4529   realtype *xd;
4530   Pthreads_Data *my_data;
4531 
4532   /* extract thread data */
4533   my_data = (Pthreads_Data *) thread_data;
4534 
4535   a  = my_data->c1;
4536   xd = my_data->v1;
4537 
4538   start = my_data->start;
4539   end   = my_data->end;
4540 
4541   /* compute scaled vector */
4542   for (i = start; i < end; i++)
4543     xd[i] *= a;
4544 
4545   /* exit */
4546   pthread_exit(NULL);
4547 }
4548 
4549 
4550 /*
4551  * -----------------------------------------------------------------------------
4552  * private functions for special cases of vector array operations
4553  * -----------------------------------------------------------------------------
4554  */
4555 
VSumVectorArray_Pthreads(int nvec,N_Vector * X,N_Vector * Y,N_Vector * Z)4556 static int VSumVectorArray_Pthreads(int nvec, N_Vector* X, N_Vector* Y, N_Vector* Z)
4557 {
4558   sunindextype   N;
4559   int            i, nthreads;
4560   pthread_t      *threads;
4561   Pthreads_Data  *thread_data;
4562   pthread_attr_t attr;
4563 
4564   /* allocate threads and thread data structs */
4565   N           = NV_LENGTH_PT(X[0]);
4566   nthreads    = NV_NUM_THREADS_PT(X[0]);
4567   threads     = malloc(nthreads*sizeof(pthread_t));
4568   thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
4569 
4570   /* set thread attributes */
4571   pthread_attr_init(&attr);
4572   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
4573 
4574   /* pack thread data, distribute loop indices, and create threads/call kernel */
4575   for (i=0; i<nthreads; i++) {
4576     N_VInitThreadData(&thread_data[i]);
4577 
4578     thread_data[i].nvec = nvec;
4579     thread_data[i].Y1   = X;
4580     thread_data[i].Y2   = Y;
4581     thread_data[i].Y3   = Z;
4582 
4583     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
4584 
4585     pthread_create(&threads[i], &attr, VSumVectorArray_PT, (void *) &thread_data[i]);
4586   }
4587 
4588   /* wait for all threads to finish */
4589   for (i=0; i<nthreads; i++)
4590     pthread_join(threads[i], NULL);
4591 
4592   /* clean up and return */
4593   pthread_attr_destroy(&attr);
4594   free(threads);
4595   free(thread_data);
4596 
4597   return(0);
4598 }
4599 
VSumVectorArray_PT(void * thread_data)4600 static void *VSumVectorArray_PT(void *thread_data)
4601 {
4602   Pthreads_Data *my_data;
4603   sunindextype  j, start, end;
4604 
4605   int       i;
4606   realtype* xd=NULL;
4607   realtype* yd=NULL;
4608   realtype* zd=NULL;
4609 
4610   my_data = (Pthreads_Data *) thread_data;
4611   start   = my_data->start;
4612   end     = my_data->end;
4613 
4614   for (i=0; i<my_data->nvec; i++) {
4615     xd = NV_DATA_PT(my_data->Y1[i]);
4616     yd = NV_DATA_PT(my_data->Y2[i]);
4617     zd = NV_DATA_PT(my_data->Y3[i]);
4618     for (j=start; j<end; j++)
4619       zd[j] = xd[j] + yd[j];
4620   }
4621 
4622   pthread_exit(NULL);
4623 }
4624 
4625 
VDiffVectorArray_Pthreads(int nvec,N_Vector * X,N_Vector * Y,N_Vector * Z)4626 static int VDiffVectorArray_Pthreads(int nvec, N_Vector* X, N_Vector* Y, N_Vector* Z)
4627 {
4628   sunindextype   N;
4629   int            i, nthreads;
4630   pthread_t      *threads;
4631   Pthreads_Data  *thread_data;
4632   pthread_attr_t attr;
4633 
4634   /* allocate threads and thread data structs */
4635   N           = NV_LENGTH_PT(X[0]);
4636   nthreads    = NV_NUM_THREADS_PT(X[0]);
4637   threads     = malloc(nthreads*sizeof(pthread_t));
4638   thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
4639 
4640   /* set thread attributes */
4641   pthread_attr_init(&attr);
4642   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
4643 
4644   /* pack thread data, distribute loop indices, and create threads/call kernel */
4645   for (i=0; i<nthreads; i++) {
4646     N_VInitThreadData(&thread_data[i]);
4647 
4648     thread_data[i].nvec = nvec;
4649     thread_data[i].Y1   = X;
4650     thread_data[i].Y2   = Y;
4651     thread_data[i].Y3   = Z;
4652 
4653     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
4654 
4655     pthread_create(&threads[i], &attr, VDiffVectorArray_PT, (void *) &thread_data[i]);
4656   }
4657 
4658   /* wait for all threads to finish */
4659   for (i=0; i<nthreads; i++)
4660     pthread_join(threads[i], NULL);
4661 
4662   /* clean up and return */
4663   pthread_attr_destroy(&attr);
4664   free(threads);
4665   free(thread_data);
4666 
4667   return(0);
4668 }
4669 
VDiffVectorArray_PT(void * thread_data)4670 static void *VDiffVectorArray_PT(void *thread_data)
4671 {
4672   Pthreads_Data *my_data;
4673   sunindextype  j, start, end;
4674 
4675   int       i;
4676   realtype* xd=NULL;
4677   realtype* yd=NULL;
4678   realtype* zd=NULL;
4679 
4680   my_data = (Pthreads_Data *) thread_data;
4681   start   = my_data->start;
4682   end     = my_data->end;
4683 
4684   for (i=0; i<my_data->nvec; i++) {
4685     xd = NV_DATA_PT(my_data->Y1[i]);
4686     yd = NV_DATA_PT(my_data->Y2[i]);
4687     zd = NV_DATA_PT(my_data->Y3[i]);
4688     for (j=start; j<end; j++)
4689       zd[j] = xd[j] - yd[j];
4690   }
4691 
4692   pthread_exit(NULL);
4693 }
4694 
4695 
VScaleSumVectorArray_Pthreads(int nvec,realtype c,N_Vector * X,N_Vector * Y,N_Vector * Z)4696 static int VScaleSumVectorArray_Pthreads(int nvec, realtype c, N_Vector* X, N_Vector* Y, N_Vector* Z)
4697 {
4698   sunindextype   N;
4699   int            i, nthreads;
4700   pthread_t      *threads;
4701   Pthreads_Data  *thread_data;
4702   pthread_attr_t attr;
4703 
4704   /* allocate threads and thread data structs */
4705   N            = NV_LENGTH_PT(X[0]);
4706   nthreads     = NV_NUM_THREADS_PT(X[0]);
4707   threads      = malloc(nthreads*sizeof(pthread_t));
4708   thread_data  = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
4709 
4710   /* set thread attributes */
4711   pthread_attr_init(&attr);
4712   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
4713 
4714   /* pack thread data, distribute loop indices, and create threads/call kernel */
4715   for (i=0; i<nthreads; i++) {
4716     N_VInitThreadData(&thread_data[i]);
4717 
4718     thread_data[i].nvec = nvec;
4719     thread_data[i].c1   = c;
4720     thread_data[i].Y1   = X;
4721     thread_data[i].Y2   = Y;
4722     thread_data[i].Y3   = Z;
4723 
4724     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
4725 
4726     pthread_create(&threads[i], &attr, VScaleSumVectorArray_PT, (void *) &thread_data[i]);
4727   }
4728 
4729   /* wait for all threads to finish */
4730   for (i=0; i<nthreads; i++)
4731     pthread_join(threads[i], NULL);
4732 
4733   /* clean up and return */
4734   pthread_attr_destroy(&attr);
4735   free(threads);
4736   free(thread_data);
4737 
4738   return(0);
4739 }
4740 
VScaleSumVectorArray_PT(void * thread_data)4741 static void *VScaleSumVectorArray_PT(void *thread_data)
4742 {
4743   Pthreads_Data *my_data;
4744   sunindextype  j, start, end;
4745 
4746   int       i;
4747   realtype  c;
4748   realtype* xd=NULL;
4749   realtype* yd=NULL;
4750   realtype* zd=NULL;
4751 
4752   my_data = (Pthreads_Data *) thread_data;
4753   start   = my_data->start;
4754   end     = my_data->end;
4755   c       = my_data->c1;
4756 
4757   for (i=0; i<my_data->nvec; i++) {
4758     xd = NV_DATA_PT(my_data->Y1[i]);
4759     yd = NV_DATA_PT(my_data->Y2[i]);
4760     zd = NV_DATA_PT(my_data->Y3[i]);
4761     for (j=start; j<end; j++)
4762       zd[j] = c * (xd[j] + yd[j]);
4763   }
4764 
4765   pthread_exit(NULL);
4766 }
4767 
4768 
VScaleDiffVectorArray_Pthreads(int nvec,realtype c,N_Vector * X,N_Vector * Y,N_Vector * Z)4769 static int VScaleDiffVectorArray_Pthreads(int nvec, realtype c, N_Vector* X, N_Vector* Y, N_Vector* Z)
4770 {
4771   sunindextype   N;
4772   int            i, nthreads;
4773   pthread_t      *threads;
4774   Pthreads_Data  *thread_data;
4775   pthread_attr_t attr;
4776 
4777   /* allocate threads and thread data structs */
4778   N           = NV_LENGTH_PT(X[0]);
4779   nthreads    = NV_NUM_THREADS_PT(X[0]);
4780   threads     = malloc(nthreads*sizeof(pthread_t));
4781   thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
4782 
4783   /* set thread attributes */
4784   pthread_attr_init(&attr);
4785   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
4786 
4787   /* pack thread data, distribute loop indices, and create threads/call kernel */
4788   for (i=0; i<nthreads; i++) {
4789     N_VInitThreadData(&thread_data[i]);
4790 
4791     thread_data[i].nvec = nvec;
4792     thread_data[i].c1   = c;
4793     thread_data[i].Y1   = X;
4794     thread_data[i].Y2   = Y;
4795     thread_data[i].Y3   = Z;
4796 
4797     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
4798 
4799     pthread_create(&threads[i], &attr, VScaleDiffVectorArray_PT, (void *) &thread_data[i]);
4800   }
4801 
4802   /* wait for all threads to finish */
4803   for (i=0; i<nthreads; i++)
4804     pthread_join(threads[i], NULL);
4805 
4806   /* clean up and return */
4807   pthread_attr_destroy(&attr);
4808   free(threads);
4809   free(thread_data);
4810 
4811   return(0);
4812 }
4813 
VScaleDiffVectorArray_PT(void * thread_data)4814 static void *VScaleDiffVectorArray_PT(void *thread_data)
4815 {
4816   Pthreads_Data *my_data;
4817   sunindextype  j, start, end;
4818 
4819   int       i;
4820   realtype  c;
4821   realtype* xd=NULL;
4822   realtype* yd=NULL;
4823   realtype* zd=NULL;
4824 
4825   my_data = (Pthreads_Data *) thread_data;
4826   start   = my_data->start;
4827   end     = my_data->end;
4828   c       = my_data->c1;
4829 
4830   for (i=0; i<my_data->nvec; i++) {
4831     xd = NV_DATA_PT(my_data->Y1[i]);
4832     yd = NV_DATA_PT(my_data->Y2[i]);
4833     zd = NV_DATA_PT(my_data->Y3[i]);
4834     for (j=start; j<end; j++)
4835       zd[j] = c * (xd[j] - yd[j]);
4836   }
4837 
4838   pthread_exit(NULL);
4839 }
4840 
4841 
VLin1VectorArray_Pthreads(int nvec,realtype a,N_Vector * X,N_Vector * Y,N_Vector * Z)4842 static int VLin1VectorArray_Pthreads(int nvec, realtype a, N_Vector* X, N_Vector* Y, N_Vector* Z)
4843 {
4844   sunindextype   N;
4845   int            i, nthreads;
4846   pthread_t      *threads;
4847   Pthreads_Data  *thread_data;
4848   pthread_attr_t attr;
4849 
4850   /* allocate threads and thread data structs */
4851   N           = NV_LENGTH_PT(X[0]);
4852   nthreads    = NV_NUM_THREADS_PT(X[0]);
4853   threads     = malloc(nthreads*sizeof(pthread_t));
4854   thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
4855 
4856   /* set thread attributes */
4857   pthread_attr_init(&attr);
4858   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
4859 
4860   /* pack thread data, distribute loop indices, and create threads/call kernel */
4861   for (i=0; i<nthreads; i++) {
4862     N_VInitThreadData(&thread_data[i]);
4863 
4864     thread_data[i].nvec = nvec;
4865     thread_data[i].c1   = a;
4866     thread_data[i].Y1   = X;
4867     thread_data[i].Y2   = Y;
4868     thread_data[i].Y3   = Z;
4869 
4870     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
4871 
4872     pthread_create(&threads[i], &attr, VLin1VectorArray_PT, (void *) &thread_data[i]);
4873   }
4874 
4875   /* wait for all threads to finish */
4876   for (i=0; i<nthreads; i++)
4877     pthread_join(threads[i], NULL);
4878 
4879   /* clean up and return */
4880   pthread_attr_destroy(&attr);
4881   free(threads);
4882   free(thread_data);
4883 
4884   return(0);
4885 }
4886 
VLin1VectorArray_PT(void * thread_data)4887 static void *VLin1VectorArray_PT(void *thread_data)
4888 {
4889   Pthreads_Data *my_data;
4890   sunindextype  j, start, end;
4891 
4892   int       i;
4893   realtype  a;
4894   realtype* xd=NULL;
4895   realtype* yd=NULL;
4896   realtype* zd=NULL;
4897 
4898   my_data = (Pthreads_Data *) thread_data;
4899   start   = my_data->start;
4900   end     = my_data->end;
4901   a       = my_data->c1;
4902 
4903   for (i=0; i<my_data->nvec; i++) {
4904     xd = NV_DATA_PT(my_data->Y1[i]);
4905     yd = NV_DATA_PT(my_data->Y2[i]);
4906     zd = NV_DATA_PT(my_data->Y3[i]);
4907     for (j=start; j<end; j++)
4908       zd[j] = (a * xd[j]) + yd[j];
4909   }
4910 
4911   pthread_exit(NULL);
4912 }
4913 
4914 
VLin2VectorArray_Pthreads(int nvec,realtype a,N_Vector * X,N_Vector * Y,N_Vector * Z)4915 static int VLin2VectorArray_Pthreads(int nvec, realtype a, N_Vector* X, N_Vector* Y, N_Vector* Z)
4916 {
4917   sunindextype   N;
4918   int            i, nthreads;
4919   pthread_t      *threads;
4920   Pthreads_Data  *thread_data;
4921   pthread_attr_t attr;
4922 
4923   /* allocate threads and thread data structs */
4924   N           = NV_LENGTH_PT(X[0]);
4925   nthreads    = NV_NUM_THREADS_PT(X[0]);
4926   threads     = malloc(nthreads*sizeof(pthread_t));
4927   thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
4928 
4929   /* set thread attributes */
4930   pthread_attr_init(&attr);
4931   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
4932 
4933   /* pack thread data, distribute loop indices, and create threads/call kernel */
4934   for (i=0; i<nthreads; i++) {
4935     N_VInitThreadData(&thread_data[i]);
4936 
4937     thread_data[i].nvec = nvec;
4938     thread_data[i].c1   = a;
4939     thread_data[i].Y1   = X;
4940     thread_data[i].Y2   = Y;
4941     thread_data[i].Y3   = Z;
4942 
4943     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
4944 
4945     pthread_create(&threads[i], &attr, VLin2VectorArray_PT, (void *) &thread_data[i]);
4946   }
4947 
4948   /* wait for all threads to finish */
4949   for (i=0; i<nthreads; i++)
4950     pthread_join(threads[i], NULL);
4951 
4952   /* clean up and return */
4953   pthread_attr_destroy(&attr);
4954   free(threads);
4955   free(thread_data);
4956 
4957   return(0);
4958 }
4959 
VLin2VectorArray_PT(void * thread_data)4960 static void *VLin2VectorArray_PT(void *thread_data)
4961 {
4962   Pthreads_Data *my_data;
4963   sunindextype  j, start, end;
4964 
4965   int       i;
4966   realtype  a;
4967   realtype* xd=NULL;
4968   realtype* yd=NULL;
4969   realtype* zd=NULL;
4970 
4971   my_data = (Pthreads_Data *) thread_data;
4972   start   = my_data->start;
4973   end     = my_data->end;
4974   a       = my_data->c1;
4975 
4976   for (i=0; i<my_data->nvec; i++) {
4977     xd = NV_DATA_PT(my_data->Y1[i]);
4978     yd = NV_DATA_PT(my_data->Y2[i]);
4979     zd = NV_DATA_PT(my_data->Y3[i]);
4980     for (j=start; j<end; j++)
4981       zd[j] = (a * xd[j]) - yd[j];
4982   }
4983 
4984   pthread_exit(NULL);
4985 }
4986 
4987 
VaxpyVectorArray_Pthreads(int nvec,realtype a,N_Vector * X,N_Vector * Y)4988 static int VaxpyVectorArray_Pthreads(int nvec, realtype a, N_Vector* X, N_Vector* Y)
4989 {
4990   sunindextype   N;
4991   int            i, nthreads;
4992   pthread_t      *threads;
4993   Pthreads_Data  *thread_data;
4994   pthread_attr_t attr;
4995 
4996   /* allocate threads and thread data structs */
4997   N           = NV_LENGTH_PT(X[0]);
4998   nthreads    = NV_NUM_THREADS_PT(X[0]);
4999   threads     = malloc(nthreads*sizeof(pthread_t));
5000   thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
5001 
5002   /* set thread attributes */
5003   pthread_attr_init(&attr);
5004   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
5005 
5006   /* pack thread data, distribute loop indices, and create threads/call kernel */
5007   for (i=0; i<nthreads; i++) {
5008     N_VInitThreadData(&thread_data[i]);
5009 
5010     thread_data[i].nvec = nvec;
5011     thread_data[i].c1   = a;
5012     thread_data[i].Y1   = X;
5013     thread_data[i].Y2   = Y;
5014 
5015     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
5016 
5017     pthread_create(&threads[i], &attr, VaxpyVectorArray_PT, (void *) &thread_data[i]);
5018   }
5019 
5020   /* wait for all threads to finish */
5021   for (i=0; i<nthreads; i++)
5022     pthread_join(threads[i], NULL);
5023 
5024   /* clean up and return */
5025   pthread_attr_destroy(&attr);
5026   free(threads);
5027   free(thread_data);
5028 
5029   return(0);
5030 }
5031 
VaxpyVectorArray_PT(void * thread_data)5032 static void *VaxpyVectorArray_PT(void *thread_data)
5033 {
5034   Pthreads_Data *my_data;
5035   sunindextype  j, start, end;
5036 
5037   int       i;
5038   realtype  a;
5039   realtype* xd=NULL;
5040   realtype* yd=NULL;
5041 
5042   my_data = (Pthreads_Data *) thread_data;
5043   start   = my_data->start;
5044   end     = my_data->end;
5045   a       = my_data->c1;
5046 
5047   if (a == ONE) {
5048     for (i=0; i<my_data->nvec; i++) {
5049       xd = NV_DATA_PT(my_data->Y1[i]);
5050       yd = NV_DATA_PT(my_data->Y2[i]);
5051       for (j=start; j<end; j++)
5052         yd[j] += xd[j];
5053     }
5054     pthread_exit(NULL);
5055   }
5056 
5057   if (a == -ONE) {
5058     for (i=0; i<my_data->nvec; i++) {
5059       xd = NV_DATA_PT(my_data->Y1[i]);
5060       yd = NV_DATA_PT(my_data->Y2[i]);
5061       for (j=start; j<end; j++)
5062         yd[j] -= xd[j];
5063     }
5064     pthread_exit(NULL);
5065   }
5066 
5067   for (i=0; i<my_data->nvec; i++) {
5068     xd = NV_DATA_PT(my_data->Y1[i]);
5069     yd = NV_DATA_PT(my_data->Y2[i]);
5070     for (j=start; j<end; j++)
5071       yd[j] += a * xd[j];
5072   }
5073   pthread_exit(NULL);
5074 }
5075 
5076 
5077 /*
5078  * -----------------------------------------------------------------
5079  * private utility functions
5080  * -----------------------------------------------------------------
5081  */
5082 
5083 /* ----------------------------------------------------------------------------
5084  * Determine loop indices for a thread
5085  */
5086 
N_VSplitLoop(int myid,int * nthreads,sunindextype * N,sunindextype * start,sunindextype * end)5087 static void N_VSplitLoop(int myid, int *nthreads, sunindextype *N,
5088 			 sunindextype *start, sunindextype *end)
5089 {
5090   sunindextype q, r; /* quotient and remainder */
5091 
5092   /* work per thread and leftover work */
5093   q = *N / *nthreads;
5094   r = *N % *nthreads;
5095 
5096   /* assign work */
5097   if (myid < r) {
5098     *start = myid * q + myid;
5099     *end   = *start + q + 1;
5100   } else {
5101     *start = myid * q + r;
5102     *end   = *start + q;
5103   }
5104 }
5105 
5106 
5107 /* ----------------------------------------------------------------------------
5108  * Initialize values of local thread data struct
5109  */
5110 
N_VInitThreadData(Pthreads_Data * thread_data)5111 static void N_VInitThreadData(Pthreads_Data *thread_data)
5112 {
5113   thread_data->start = -1;
5114   thread_data->end   = -1;
5115 
5116 #if __STDC_VERSION__ >= 199901L
5117   thread_data->c1 = NAN;
5118   thread_data->c2 = NAN;
5119 #else
5120   thread_data->c1 = ZERO;
5121   thread_data->c2 = ZERO;
5122 #endif
5123 
5124   thread_data->v1 = NULL;
5125   thread_data->v2 = NULL;
5126   thread_data->v3 = NULL;
5127   thread_data->global_val = NULL;
5128   thread_data->global_mutex = NULL;
5129 
5130   thread_data->nvec = ZERO;
5131   thread_data->nsum = ZERO;
5132   thread_data->cvals = NULL;
5133   thread_data->Y1 = NULL;
5134   thread_data->Y2 = NULL;
5135   thread_data->Y3 = NULL;
5136 }
5137 
5138 
5139 /*
5140  * -----------------------------------------------------------------
5141  * Enable / Disable fused and vector array operations
5142  * -----------------------------------------------------------------
5143  */
5144 
N_VEnableFusedOps_Pthreads(N_Vector v,booleantype tf)5145 int N_VEnableFusedOps_Pthreads(N_Vector v, booleantype tf)
5146 {
5147   /* check that vector is non-NULL */
5148   if (v == NULL) return(-1);
5149 
5150   /* check that ops structure is non-NULL */
5151   if (v->ops == NULL) return(-1);
5152 
5153   if (tf) {
5154     /* enable all fused vector operations */
5155     v->ops->nvlinearcombination = N_VLinearCombination_Pthreads;
5156     v->ops->nvscaleaddmulti     = N_VScaleAddMulti_Pthreads;
5157     v->ops->nvdotprodmulti      = N_VDotProdMulti_Pthreads;
5158     /* enable all vector array operations */
5159     v->ops->nvlinearsumvectorarray         = N_VLinearSumVectorArray_Pthreads;
5160     v->ops->nvscalevectorarray             = N_VScaleVectorArray_Pthreads;
5161     v->ops->nvconstvectorarray             = N_VConstVectorArray_Pthreads;
5162     v->ops->nvwrmsnormvectorarray          = N_VWrmsNormVectorArray_Pthreads;
5163     v->ops->nvwrmsnormmaskvectorarray      = N_VWrmsNormMaskVectorArray_Pthreads;
5164     v->ops->nvscaleaddmultivectorarray     = N_VScaleAddMultiVectorArray_Pthreads;
5165     v->ops->nvlinearcombinationvectorarray = N_VLinearCombinationVectorArray_Pthreads;
5166   } else {
5167     /* disable all fused vector operations */
5168     v->ops->nvlinearcombination = NULL;
5169     v->ops->nvscaleaddmulti     = NULL;
5170     v->ops->nvdotprodmulti      = NULL;
5171     /* disable all vector array operations */
5172     v->ops->nvlinearsumvectorarray         = NULL;
5173     v->ops->nvscalevectorarray             = NULL;
5174     v->ops->nvconstvectorarray             = NULL;
5175     v->ops->nvwrmsnormvectorarray          = NULL;
5176     v->ops->nvwrmsnormmaskvectorarray      = NULL;
5177     v->ops->nvscaleaddmultivectorarray     = NULL;
5178     v->ops->nvlinearcombinationvectorarray = NULL;
5179   }
5180 
5181   /* return success */
5182   return(0);
5183 }
5184 
5185 
N_VEnableLinearCombination_Pthreads(N_Vector v,booleantype tf)5186 int N_VEnableLinearCombination_Pthreads(N_Vector v, booleantype tf)
5187 {
5188   /* check that vector is non-NULL */
5189   if (v == NULL) return(-1);
5190 
5191   /* check that ops structure is non-NULL */
5192   if (v->ops == NULL) return(-1);
5193 
5194   /* enable/disable operation */
5195   if (tf)
5196     v->ops->nvlinearcombination = N_VLinearCombination_Pthreads;
5197   else
5198     v->ops->nvlinearcombination = NULL;
5199 
5200   /* return success */
5201   return(0);
5202 }
5203 
N_VEnableScaleAddMulti_Pthreads(N_Vector v,booleantype tf)5204 int N_VEnableScaleAddMulti_Pthreads(N_Vector v, booleantype tf)
5205 {
5206   /* check that vector is non-NULL */
5207   if (v == NULL) return(-1);
5208 
5209   /* check that ops structure is non-NULL */
5210   if (v->ops == NULL) return(-1);
5211 
5212   /* enable/disable operation */
5213   if (tf)
5214     v->ops->nvscaleaddmulti = N_VScaleAddMulti_Pthreads;
5215   else
5216     v->ops->nvscaleaddmulti = NULL;
5217 
5218   /* return success */
5219   return(0);
5220 }
5221 
N_VEnableDotProdMulti_Pthreads(N_Vector v,booleantype tf)5222 int N_VEnableDotProdMulti_Pthreads(N_Vector v, booleantype tf)
5223 {
5224   /* check that vector is non-NULL */
5225   if (v == NULL) return(-1);
5226 
5227   /* check that ops structure is non-NULL */
5228   if (v->ops == NULL) return(-1);
5229 
5230   /* enable/disable operation */
5231   if (tf)
5232     v->ops->nvdotprodmulti = N_VDotProdMulti_Pthreads;
5233   else
5234     v->ops->nvdotprodmulti = NULL;
5235 
5236   /* return success */
5237   return(0);
5238 }
5239 
N_VEnableLinearSumVectorArray_Pthreads(N_Vector v,booleantype tf)5240 int N_VEnableLinearSumVectorArray_Pthreads(N_Vector v, booleantype tf)
5241 {
5242   /* check that vector is non-NULL */
5243   if (v == NULL) return(-1);
5244 
5245   /* check that ops structure is non-NULL */
5246   if (v->ops == NULL) return(-1);
5247 
5248   /* enable/disable operation */
5249   if (tf)
5250     v->ops->nvlinearsumvectorarray = N_VLinearSumVectorArray_Pthreads;
5251   else
5252     v->ops->nvlinearsumvectorarray = NULL;
5253 
5254   /* return success */
5255   return(0);
5256 }
5257 
N_VEnableScaleVectorArray_Pthreads(N_Vector v,booleantype tf)5258 int N_VEnableScaleVectorArray_Pthreads(N_Vector v, booleantype tf)
5259 {
5260   /* check that vector is non-NULL */
5261   if (v == NULL) return(-1);
5262 
5263   /* check that ops structure is non-NULL */
5264   if (v->ops == NULL) return(-1);
5265 
5266   /* enable/disable operation */
5267   if (tf)
5268     v->ops->nvscalevectorarray = N_VScaleVectorArray_Pthreads;
5269   else
5270     v->ops->nvscalevectorarray = NULL;
5271 
5272   /* return success */
5273   return(0);
5274 }
5275 
N_VEnableConstVectorArray_Pthreads(N_Vector v,booleantype tf)5276 int N_VEnableConstVectorArray_Pthreads(N_Vector v, booleantype tf)
5277 {
5278   /* check that vector is non-NULL */
5279   if (v == NULL) return(-1);
5280 
5281   /* check that ops structure is non-NULL */
5282   if (v->ops == NULL) return(-1);
5283 
5284   /* enable/disable operation */
5285   if (tf)
5286     v->ops->nvconstvectorarray = N_VConstVectorArray_Pthreads;
5287   else
5288     v->ops->nvconstvectorarray = NULL;
5289 
5290   /* return success */
5291   return(0);
5292 }
5293 
N_VEnableWrmsNormVectorArray_Pthreads(N_Vector v,booleantype tf)5294 int N_VEnableWrmsNormVectorArray_Pthreads(N_Vector v, booleantype tf)
5295 {
5296   /* check that vector is non-NULL */
5297   if (v == NULL) return(-1);
5298 
5299   /* check that ops structure is non-NULL */
5300   if (v->ops == NULL) return(-1);
5301 
5302   /* enable/disable operation */
5303   if (tf)
5304     v->ops->nvwrmsnormvectorarray = N_VWrmsNormVectorArray_Pthreads;
5305   else
5306     v->ops->nvwrmsnormvectorarray = NULL;
5307 
5308   /* return success */
5309   return(0);
5310 }
5311 
N_VEnableWrmsNormMaskVectorArray_Pthreads(N_Vector v,booleantype tf)5312 int N_VEnableWrmsNormMaskVectorArray_Pthreads(N_Vector v, booleantype tf)
5313 {
5314   /* check that vector is non-NULL */
5315   if (v == NULL) return(-1);
5316 
5317   /* check that ops structure is non-NULL */
5318   if (v->ops == NULL) return(-1);
5319 
5320   /* enable/disable operation */
5321   if (tf)
5322     v->ops->nvwrmsnormmaskvectorarray = N_VWrmsNormMaskVectorArray_Pthreads;
5323   else
5324     v->ops->nvwrmsnormmaskvectorarray = NULL;
5325 
5326   /* return success */
5327   return(0);
5328 }
5329 
N_VEnableScaleAddMultiVectorArray_Pthreads(N_Vector v,booleantype tf)5330 int N_VEnableScaleAddMultiVectorArray_Pthreads(N_Vector v, booleantype tf)
5331 {
5332   /* check that vector is non-NULL */
5333   if (v == NULL) return(-1);
5334 
5335   /* check that ops structure is non-NULL */
5336   if (v->ops == NULL) return(-1);
5337 
5338   /* enable/disable operation */
5339   if (tf)
5340     v->ops->nvscaleaddmultivectorarray = N_VScaleAddMultiVectorArray_Pthreads;
5341   else
5342     v->ops->nvscaleaddmultivectorarray = NULL;
5343 
5344   /* return success */
5345   return(0);
5346 }
5347 
N_VEnableLinearCombinationVectorArray_Pthreads(N_Vector v,booleantype tf)5348 int N_VEnableLinearCombinationVectorArray_Pthreads(N_Vector v, booleantype tf)
5349 {
5350   /* check that vector is non-NULL */
5351   if (v == NULL) return(-1);
5352 
5353   /* check that ops structure is non-NULL */
5354   if (v->ops == NULL) return(-1);
5355 
5356   /* enable/disable operation */
5357   if (tf)
5358     v->ops->nvlinearcombinationvectorarray = N_VLinearCombinationVectorArray_Pthreads;
5359   else
5360     v->ops->nvlinearcombinationvectorarray = NULL;
5361 
5362   /* return success */
5363   return(0);
5364 }
5365