1 /* -----------------------------------------------------------------
2  * Programmer(s): David J. Gardner @ LLNL
3  * -----------------------------------------------------------------
4  * Acknowledgements: This NVECTOR module is based on the NVECTOR
5  *                   Serial module by Scott D. Cohen, Alan C.
6  *                   Hindmarsh, Radu Serban, and Aaron Collier
7  *                   @ LLNL
8  * -----------------------------------------------------------------
9  * SUNDIALS Copyright Start
10  * Copyright (c) 2002-2021, Lawrence Livermore National Security
11  * and Southern Methodist University.
12  * All rights reserved.
13  *
14  * See the top-level LICENSE and NOTICE files for details.
15  *
16  * SPDX-License-Identifier: BSD-3-Clause
17  * SUNDIALS Copyright End
18  * -----------------------------------------------------------------
19  * This is the implementation file for a POSIX Threads (Pthreads)
20  * implementation of the NVECTOR package using a LOCAL array of
21  * structures to pass data to threads.
22  * -----------------------------------------------------------------*/
23 
24 #include <stdio.h>
25 #include <stdlib.h>
26 
27 #include <nvector/nvector_pthreads.h>
28 #include <sundials/sundials_math.h>
29 #include <math.h> /* define NAN */
30 
31 #define ZERO   RCONST(0.0)
32 #define HALF   RCONST(0.5)
33 #define ONE    RCONST(1.0)
34 #define ONEPT5 RCONST(1.5)
35 
36 /* Private functions for special cases of vector operations */
37 static void VCopy_Pthreads(N_Vector x, N_Vector z);                              /* z=x       */
38 static void VSum_Pthreads(N_Vector x, N_Vector y, N_Vector z);                   /* z=x+y     */
39 static void VDiff_Pthreads(N_Vector x, N_Vector y, N_Vector z);                  /* z=x-y     */
40 static void VNeg_Pthreads(N_Vector x, N_Vector z);                               /* z=-x      */
41 static void VScaleSum_Pthreads(realtype c, N_Vector x, N_Vector y, N_Vector z);  /* z=c(x+y)  */
42 static void VScaleDiff_Pthreads(realtype c, N_Vector x, N_Vector y, N_Vector z); /* z=c(x-y)  */
43 static void VLin1_Pthreads(realtype a, N_Vector x, N_Vector y, N_Vector z);      /* z=ax+y    */
44 static void VLin2_Pthreads(realtype a, N_Vector x, N_Vector y, N_Vector z);      /* z=ax-y    */
45 static void Vaxpy_Pthreads(realtype a, N_Vector x, N_Vector y);                  /* y <- ax+y */
46 static void VScaleBy_Pthreads(realtype a, N_Vector x);                           /* x <- ax   */
47 
48 /* Private functions for special cases of vector array operations */
49 static int VSumVectorArray_Pthreads(int nvec, N_Vector* X, N_Vector* Y, N_Vector* Z);                   /* Z=X+Y     */
50 static int VDiffVectorArray_Pthreads(int nvec, N_Vector* X, N_Vector* Y, N_Vector* Z);                  /* Z=X-Y     */
51 static int VScaleSumVectorArray_Pthreads(int nvec, realtype c, N_Vector* X, N_Vector* Y, N_Vector* Z);  /* Z=c(X+Y)  */
52 static int VScaleDiffVectorArray_Pthreads(int nvec, realtype c, N_Vector* X, N_Vector* Y, N_Vector* Z); /* Z=c(X-Y)  */
53 static int VLin1VectorArray_Pthreads(int nvec, realtype a, N_Vector* X, N_Vector* Y, N_Vector* Z);      /* Z=aX+Y    */
54 static int VLin2VectorArray_Pthreads(int nvec, realtype a, N_Vector* X, N_Vector* Y, N_Vector* Z);      /* Z=aX-Y    */
55 static int VaxpyVectorArray_Pthreads(int nvec, realtype a, N_Vector* X, N_Vector* Y);                    /* Y <- aX+Y */
56 
57 /* Pthread companion functions for vector operations */
58 static void *N_VLinearSum_PT(void *thread_data);
59 static void *N_VConst_PT(void *thread_data);
60 static void *N_VProd_PT(void *thread_data);
61 static void *N_VDiv_PT(void *thread_data);
62 static void *N_VScale_PT(void *thread_data);
63 static void *N_VAbs_PT(void *thread_data);
64 static void *N_VInv_PT(void *thread_data);
65 static void *N_VAddConst_PT(void *thread_data);
66 static void *N_VCompare_PT(void *thread_data);
67 static void *N_VDotProd_PT(void *thread_data);
68 static void *N_VMaxNorm_PT(void *thread_data);
69 static void *N_VWSqrSum_PT(void *thread_data);
70 static void *N_VMin_PT(void *thread_data);
71 static void *N_VWL2Norm_PT(void *thread_data);
72 static void *N_VL1Norm_PT(void *thread_data);
73 static void *N_VInvTest_PT(void *thread_data);
74 static void *N_VWSqrSumMask_PT(void *thread_data);
75 static void *N_VConstrMask_PT(void *thread_data);
76 static void *N_VMinQuotient_PT(void *thread_data);
77 
78 /* Pthread companion functions special cases of vector operations */
79 static void *VCopy_PT(void *thread_data);
80 static void *VSum_PT(void *thread_data);
81 static void *VDiff_PT(void *thread_data);
82 static void *VNeg_PT(void *thread_data);
83 static void *VScaleSum_PT(void *thread_data);
84 static void *VScaleDiff_PT(void *thread_data);
85 static void *VLin1_PT(void *thread_data);
86 static void *VLin2_PT(void *thread_data);
87 static void *VScaleBy_PT(void *thread_data);
88 static void *Vaxpy_PT(void *thread_data);
89 
90 /* Pthread companion functions for fused vector operations */
91 static void *N_VLinearCombination_PT(void *thread_data);
92 static void *N_VScaleAddMulti_PT(void *thread_data);
93 static void *N_VDotProdMulti_PT(void *thread_data);
94 
95 /* Pthread companion functions for vector array operations */
96 static void *N_VLinearSumVectorArray_PT(void *thread_data);
97 static void *N_VScaleVectorArray_PT(void *thread_data);
98 static void *N_VConstVectorArray_PT(void *thread_data);
99 static void *N_VWrmsNormVectorArray_PT(void *thread_data);
100 static void *N_VWrmsNormMaskVectorArray_PT(void *thread_data);
101 static void *N_VScaleAddMultiVectorArray_PT(void *thread_data);
102 static void *N_VLinearCombinationVectorArray_PT(void *thread_data);
103 
104 /* Pthread companion functions special cases of vector array operations */
105 static void *VSumVectorArray_PT(void *thread_data);
106 static void *VDiffVectorArray_PT(void *thread_data);
107 static void *VScaleSumVectorArray_PT(void *thread_data);
108 static void *VScaleDiffVectorArray_PT(void *thread_data);
109 static void *VLin1VectorArray_PT(void *thread_data);
110 static void *VLin2VectorArray_PT(void *thread_data);
111 static void *VaxpyVectorArray_PT(void *thread_data);
112 
113 /* Pthread companion functions for XBraid interface operations */
114 static void *VBufPack_PT(void *thread_data);
115 static void *VBufUnpack_PT(void *thread_data);
116 
117 /* Function to determine loop values for threads */
118 static void N_VSplitLoop(int myid, int *nthreads, sunindextype *N,
119                          sunindextype *start, sunindextype *end);
120 
121 /* Function to initialize thread data */
122 static void N_VInitThreadData(Pthreads_Data *thread_data);
123 
124 /*
125  * -----------------------------------------------------------------
126  * exported functions
127  * -----------------------------------------------------------------
128  */
129 
130 /* ----------------------------------------------------------------
131  * Returns vector type ID. Used to identify vector implementation
132  * from abstract N_Vector interface.
133  */
N_VGetVectorID_Pthreads(N_Vector v)134 N_Vector_ID N_VGetVectorID_Pthreads(N_Vector v)
135 {
136   return SUNDIALS_NVEC_PTHREADS;
137 }
138 
139 /* ----------------------------------------------------------------------------
140  * Function to create a new empty vector
141  */
142 
N_VNewEmpty_Pthreads(sunindextype length,int num_threads)143 N_Vector N_VNewEmpty_Pthreads(sunindextype length, int num_threads)
144 {
145   N_Vector v;
146   N_VectorContent_Pthreads content;
147 
148   /* Create an empty vector object */
149   v = NULL;
150   v = N_VNewEmpty();
151   if (v == NULL) return(NULL);
152 
153   /* Attach operations */
154 
155   /* constructors, destructors, and utility operations */
156   v->ops->nvgetvectorid     = N_VGetVectorID_Pthreads;
157   v->ops->nvclone           = N_VClone_Pthreads;
158   v->ops->nvcloneempty      = N_VCloneEmpty_Pthreads;
159   v->ops->nvdestroy         = N_VDestroy_Pthreads;
160   v->ops->nvspace           = N_VSpace_Pthreads;
161   v->ops->nvgetarraypointer = N_VGetArrayPointer_Pthreads;
162   v->ops->nvsetarraypointer = N_VSetArrayPointer_Pthreads;
163   v->ops->nvgetlength       = N_VGetLength_Pthreads;
164 
165   /* standard vector operations */
166   v->ops->nvlinearsum    = N_VLinearSum_Pthreads;
167   v->ops->nvconst        = N_VConst_Pthreads;
168   v->ops->nvprod         = N_VProd_Pthreads;
169   v->ops->nvdiv          = N_VDiv_Pthreads;
170   v->ops->nvscale        = N_VScale_Pthreads;
171   v->ops->nvabs          = N_VAbs_Pthreads;
172   v->ops->nvinv          = N_VInv_Pthreads;
173   v->ops->nvaddconst     = N_VAddConst_Pthreads;
174   v->ops->nvdotprod      = N_VDotProd_Pthreads;
175   v->ops->nvmaxnorm      = N_VMaxNorm_Pthreads;
176   v->ops->nvwrmsnormmask = N_VWrmsNormMask_Pthreads;
177   v->ops->nvwrmsnorm     = N_VWrmsNorm_Pthreads;
178   v->ops->nvmin          = N_VMin_Pthreads;
179   v->ops->nvwl2norm      = N_VWL2Norm_Pthreads;
180   v->ops->nvl1norm       = N_VL1Norm_Pthreads;
181   v->ops->nvcompare      = N_VCompare_Pthreads;
182   v->ops->nvinvtest      = N_VInvTest_Pthreads;
183   v->ops->nvconstrmask   = N_VConstrMask_Pthreads;
184   v->ops->nvminquotient  = N_VMinQuotient_Pthreads;
185 
186   /* fused and vector array operations are disabled (NULL) by default */
187 
188   /* local reduction operations */
189   v->ops->nvdotprodlocal     = N_VDotProd_Pthreads;
190   v->ops->nvmaxnormlocal     = N_VMaxNorm_Pthreads;
191   v->ops->nvminlocal         = N_VMin_Pthreads;
192   v->ops->nvl1normlocal      = N_VL1Norm_Pthreads;
193   v->ops->nvinvtestlocal     = N_VInvTest_Pthreads;
194   v->ops->nvconstrmasklocal  = N_VConstrMask_Pthreads;
195   v->ops->nvminquotientlocal = N_VMinQuotient_Pthreads;
196   v->ops->nvwsqrsumlocal     = N_VWSqrSumLocal_Pthreads;
197   v->ops->nvwsqrsummasklocal = N_VWSqrSumMaskLocal_Pthreads;
198 
199   /* XBraid interface operations */
200   v->ops->nvbufsize   = N_VBufSize_Pthreads;
201   v->ops->nvbufpack   = N_VBufPack_Pthreads;
202   v->ops->nvbufunpack = N_VBufUnpack_Pthreads;
203 
204   /* Create content */
205   content = NULL;
206   content = (N_VectorContent_Pthreads) malloc(sizeof *content);
207   if (content == NULL) { N_VDestroy(v); return(NULL); }
208 
209   /* Attach content */
210   v->content = content;
211 
212   /* Initialize content */
213   content->length      = length;
214   content->num_threads = num_threads;
215   content->own_data    = SUNFALSE;
216   content->data        = NULL;
217 
218   return(v);
219 }
220 
221 /* ----------------------------------------------------------------------------
222  * Function to create a new vector
223  */
224 
N_VNew_Pthreads(sunindextype length,int num_threads)225 N_Vector N_VNew_Pthreads(sunindextype length, int num_threads)
226 {
227   N_Vector v;
228   realtype *data;
229 
230   v = NULL;
231   v = N_VNewEmpty_Pthreads(length, num_threads);
232   if (v == NULL) return(NULL);
233 
234   /* Create data */
235   if (length > 0) {
236 
237     /* Allocate memory */
238     data = NULL;
239     data = (realtype *) malloc(length * sizeof(realtype));
240     if(data == NULL) { N_VDestroy_Pthreads(v); return(NULL); }
241 
242     /* Attach data */
243     NV_OWN_DATA_PT(v) = SUNTRUE;
244     NV_DATA_PT(v)     = data;
245 
246   }
247 
248   return(v);
249 }
250 
251 /* ----------------------------------------------------------------------------
252  * Function to create a vector with user data component
253  */
254 
N_VMake_Pthreads(sunindextype length,int num_threads,realtype * v_data)255 N_Vector N_VMake_Pthreads(sunindextype length, int num_threads, realtype *v_data)
256 {
257   N_Vector v;
258 
259   v = NULL;
260   v = N_VNewEmpty_Pthreads(length, num_threads);
261   if (v == NULL) return(NULL);
262 
263   if (length > 0) {
264     /* Attach data */
265     NV_OWN_DATA_PT(v) = SUNFALSE;
266     NV_DATA_PT(v)     = v_data;
267   }
268 
269   return(v);
270 }
271 
272 /* ----------------------------------------------------------------------------
273  * Function to create an array of new vectors.
274  */
275 
N_VCloneVectorArray_Pthreads(int count,N_Vector w)276 N_Vector* N_VCloneVectorArray_Pthreads(int count, N_Vector w)
277 {
278   N_Vector* vs;
279   int j;
280 
281   if (count <= 0) return(NULL);
282 
283   vs = NULL;
284   vs = (N_Vector*) malloc(count * sizeof(N_Vector));
285   if(vs == NULL) return(NULL);
286 
287   for (j = 0; j < count; j++) {
288     vs[j] = NULL;
289     vs[j] = N_VClone_Pthreads(w);
290     if (vs[j] == NULL) {
291       N_VDestroyVectorArray_Pthreads(vs, j-1);
292       return(NULL);
293     }
294   }
295 
296   return(vs);
297 }
298 
299 /* ----------------------------------------------------------------------------
300  * Function to create an array of new vectors with NULL data array.
301  */
302 
N_VCloneVectorArrayEmpty_Pthreads(int count,N_Vector w)303 N_Vector* N_VCloneVectorArrayEmpty_Pthreads(int count, N_Vector w)
304 {
305   N_Vector* vs;
306   int j;
307 
308   if (count <= 0) return(NULL);
309 
310   vs = NULL;
311   vs = (N_Vector*) malloc(count * sizeof(N_Vector));
312   if(vs == NULL) return(NULL);
313 
314   for (j = 0; j < count; j++) {
315     vs[j] = NULL;
316     vs[j] = N_VCloneEmpty_Pthreads(w);
317     if (vs[j] == NULL) {
318       N_VDestroyVectorArray_Pthreads(vs, j-1);
319       return(NULL);
320     }
321   }
322 
323   return(vs);
324 }
325 
326 /* ----------------------------------------------------------------------------
327  * Function to free an array created with N_VCloneVectorArray_Pthreads
328  */
329 
N_VDestroyVectorArray_Pthreads(N_Vector * vs,int count)330 void N_VDestroyVectorArray_Pthreads(N_Vector* vs, int count)
331 {
332   int j;
333 
334   for (j = 0; j < count; j++) N_VDestroy_Pthreads(vs[j]);
335 
336   free(vs); vs = NULL;
337 
338   return;
339 }
340 
341 /* ----------------------------------------------------------------------------
342  * Function to return number of vector elements
343  */
N_VGetLength_Pthreads(N_Vector v)344 sunindextype N_VGetLength_Pthreads(N_Vector v)
345 {
346   return NV_LENGTH_PT(v);
347 }
348 
349 /* ----------------------------------------------------------------------------
350  * Function to print a vector to stdout
351  */
352 
N_VPrint_Pthreads(N_Vector x)353 void N_VPrint_Pthreads(N_Vector x)
354 {
355   N_VPrintFile_Pthreads(x, stdout);
356 }
357 
358 /* ----------------------------------------------------------------------------
359  * Function to print a vector to outfile
360  */
361 
N_VPrintFile_Pthreads(N_Vector x,FILE * outfile)362 void N_VPrintFile_Pthreads(N_Vector x, FILE *outfile)
363 {
364   sunindextype i, N;
365   realtype *xd;
366 
367   xd = NULL;
368 
369   N  = NV_LENGTH_PT(x);
370   xd = NV_DATA_PT(x);
371 
372   for (i = 0; i < N; i++) {
373 #if defined(SUNDIALS_EXTENDED_PRECISION)
374     fprintf(outfile, "%11.8Lg\n", xd[i]);
375 #elif defined(SUNDIALS_DOUBLE_PRECISION)
376     fprintf(outfile, "%11.8g\n", xd[i]);
377 #else
378     fprintf(outfile, "%11.8g\n", xd[i]);
379 #endif
380   }
381   fprintf(outfile, "\n");
382 
383   return;
384 }
385 
386 /*
387  * -----------------------------------------------------------------
388  * implementation of vector operations
389  * -----------------------------------------------------------------
390  */
391 
392 /* ----------------------------------------------------------------------------
393  * Create new vector from existing vector without attaching data
394  */
395 
N_VCloneEmpty_Pthreads(N_Vector w)396 N_Vector N_VCloneEmpty_Pthreads(N_Vector w)
397 {
398   N_Vector v;
399   N_VectorContent_Pthreads content;
400 
401   if (w == NULL) return(NULL);
402 
403   /* Create vector */
404   v = NULL;
405   v = N_VNewEmpty();
406   if (v == NULL) return(NULL);
407 
408   /* Attach operations */
409   if (N_VCopyOps(w, v)) { N_VDestroy(v); return(NULL); }
410 
411   /* Create content */
412   content = NULL;
413   content = (N_VectorContent_Pthreads) malloc(sizeof *content);
414   if (content == NULL) { N_VDestroy(v); return(NULL); }
415 
416   /* Attach content */
417   v->content = content;
418 
419   /* Initialize content */
420   content->length      = NV_LENGTH_PT(w);
421   content->num_threads = NV_NUM_THREADS_PT(w);
422   content->own_data    = SUNFALSE;
423   content->data        = NULL;
424 
425   return(v);
426 }
427 
428 
429 /* ----------------------------------------------------------------------------
430  * Create new vector from existing vector and attach data
431  */
432 
N_VClone_Pthreads(N_Vector w)433 N_Vector N_VClone_Pthreads(N_Vector w)
434 {
435   N_Vector v;
436   realtype *data;
437   sunindextype length;
438 
439   v = NULL;
440   v = N_VCloneEmpty_Pthreads(w);
441   if (v == NULL) return(NULL);
442 
443   length = NV_LENGTH_PT(w);
444 
445   /* Create data */
446   if (length > 0) {
447 
448     /* Allocate memory */
449     data = NULL;
450     data = (realtype *) malloc(length * sizeof(realtype));
451     if(data == NULL) { N_VDestroy_Pthreads(v); return(NULL); }
452 
453     /* Attach data */
454     NV_OWN_DATA_PT(v) = SUNTRUE;
455     NV_DATA_PT(v)     = data;
456 
457   }
458 
459   return(v);
460 }
461 
462 /* ----------------------------------------------------------------------------
463  * Destroy vector and free vector memory
464  */
465 
N_VDestroy_Pthreads(N_Vector v)466 void N_VDestroy_Pthreads(N_Vector v)
467 {
468   if (v == NULL) return;
469 
470   /* free content */
471   if (v->content != NULL) {
472     if (NV_OWN_DATA_PT(v) && NV_DATA_PT(v) != NULL) {
473       free(NV_DATA_PT(v));
474       NV_DATA_PT(v) = NULL;
475     }
476     free(v->content);
477     v->content = NULL;
478   }
479 
480   /* free ops and vector */
481   if (v->ops != NULL) { free(v->ops); v->ops = NULL; }
482   free(v); v = NULL;
483 
484   return;
485 }
486 
487 /* ----------------------------------------------------------------------------
488  * Get storage requirement for vector
489  */
490 
N_VSpace_Pthreads(N_Vector v,sunindextype * lrw,sunindextype * liw)491 void N_VSpace_Pthreads(N_Vector v, sunindextype *lrw, sunindextype *liw)
492 {
493   *lrw = NV_LENGTH_PT(v);
494   *liw = 1;
495 
496   return;
497 }
498 
499 
500 /* ----------------------------------------------------------------------------
501  * Get vector data pointer
502  */
503 
N_VGetArrayPointer_Pthreads(N_Vector v)504 realtype *N_VGetArrayPointer_Pthreads(N_Vector v)
505 {
506   return((realtype *) NV_DATA_PT(v));
507 }
508 
509 
510 /* ----------------------------------------------------------------------------
511  * Set vector data pointer
512  */
513 
N_VSetArrayPointer_Pthreads(realtype * v_data,N_Vector v)514 void N_VSetArrayPointer_Pthreads(realtype *v_data, N_Vector v)
515 {
516   if (NV_LENGTH_PT(v) > 0) NV_DATA_PT(v) = v_data;
517 
518   return;
519 }
520 
521 
522 /* ----------------------------------------------------------------------------
523  * Compute linear sum z[i] = a*x[i]+b*y[i]
524  */
525 
N_VLinearSum_Pthreads(realtype a,N_Vector x,realtype b,N_Vector y,N_Vector z)526 void N_VLinearSum_Pthreads(realtype a, N_Vector x, realtype b, N_Vector y, N_Vector z)
527 {
528   sunindextype   N;
529   int            i, nthreads;
530   pthread_t      *threads;
531   Pthreads_Data  *thread_data;
532   pthread_attr_t attr;
533 
534   realtype c;
535   N_Vector v1, v2;
536   booleantype test;
537 
538   if ((b == ONE) && (z == y)) {    /* BLAS usage: axpy y <- ax+y */
539     Vaxpy_Pthreads(a,x,y);
540     return;
541   }
542 
543   if ((a == ONE) && (z == x)) {    /* BLAS usage: axpy x <- by+x */
544     Vaxpy_Pthreads(b,y,x);
545     return;
546   }
547 
548   /* Case: a == b == 1.0 */
549 
550   if ((a == ONE) && (b == ONE)) {
551     VSum_Pthreads(x, y, z);
552     return;
553   }
554 
555   /* Cases: (1) a == 1.0, b = -1.0, (2) a == -1.0, b == 1.0 */
556 
557   if ((test = ((a == ONE) && (b == -ONE))) || ((a == -ONE) && (b == ONE))) {
558     v1 = test ? y : x;
559     v2 = test ? x : y;
560     VDiff_Pthreads(v2, v1, z);
561     return;
562   }
563 
564   /* Cases: (1) a == 1.0, b == other or 0.0, (2) a == other or 0.0, b == 1.0 */
565   /* if a or b is 0.0, then user should have called N_VScale */
566 
567   if ((test = (a == ONE)) || (b == ONE)) {
568     c  = test ? b : a;
569     v1 = test ? y : x;
570     v2 = test ? x : y;
571     VLin1_Pthreads(c, v1, v2, z);
572     return;
573   }
574 
575   /* Cases: (1) a == -1.0, b != 1.0, (2) a != 1.0, b == -1.0 */
576 
577   if ((test = (a == -ONE)) || (b == -ONE)) {
578     c = test ? b : a;
579     v1 = test ? y : x;
580     v2 = test ? x : y;
581     VLin2_Pthreads(c, v1, v2, z);
582     return;
583   }
584 
585   /* Case: a == b */
586   /* catches case both a and b are 0.0 - user should have called N_VConst */
587 
588   if (a == b) {
589     VScaleSum_Pthreads(a, x, y, z);
590     return;
591   }
592 
593   /* Case: a == -b */
594 
595   if (a == -b) {
596     VScaleDiff_Pthreads(a, x, y, z);
597     return;
598   }
599 
600   /* Do all cases not handled above:
601      (1) a == other, b == 0.0 - user should have called N_VScale
602      (2) a == 0.0, b == other - user should have called N_VScale
603      (3) a,b == other, a !=b, a != -b */
604 
605   /* allocate threads and thread data structs */
606   N            = NV_LENGTH_PT(x);
607   nthreads     = NV_NUM_THREADS_PT(x);
608   threads      = (pthread_t *) malloc(nthreads*sizeof(pthread_t));
609   thread_data  = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
610 
611   /* set thread attributes */
612   pthread_attr_init(&attr);
613   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
614 
615   for (i=0; i<nthreads; i++) {
616     /* initialize thread data */
617     N_VInitThreadData(&thread_data[i]);
618 
619     /* compute start and end loop index for thread */
620     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
621 
622     /* pack thread data */
623     thread_data[i].c1 = a;
624     thread_data[i].c2 = b;
625     thread_data[i].v1 = NV_DATA_PT(x);
626     thread_data[i].v2 = NV_DATA_PT(y);
627     thread_data[i].v3 = NV_DATA_PT(z);
628 
629     /* create threads and call pthread companion function */
630     pthread_create(&threads[i], &attr, N_VLinearSum_PT, (void *) &thread_data[i]);
631   }
632 
633   /* wait for all threads to finish */
634   for (i=0; i<nthreads; i++) {
635     pthread_join(threads[i], NULL);
636   }
637 
638   /* clean up and return */
639   pthread_attr_destroy(&attr);
640   free(threads);
641   free(thread_data);
642 
643   return;
644 }
645 
646 /* ----------------------------------------------------------------------------
647  * Pthread companion function to N_VLinearSum
648  */
649 
N_VLinearSum_PT(void * thread_data)650 static void *N_VLinearSum_PT(void *thread_data)
651 {
652   sunindextype i, start, end;
653   realtype a, b;
654   realtype *xd, *yd, *zd;
655   Pthreads_Data *my_data;
656 
657   /* extract thread data */
658   my_data = (Pthreads_Data *) thread_data;
659 
660   a  = my_data->c1;
661   b  = my_data->c2;
662   xd = my_data->v1;
663   yd = my_data->v2;
664   zd = my_data->v3;
665 
666   start = my_data->start;
667   end   = my_data->end;
668 
669   /* compute linear sum */
670   for (i = start; i < end; i++){
671     zd[i] = (a*xd[i])+(b*yd[i]);
672   }
673 
674   /* exit */
675   pthread_exit(NULL);
676 }
677 
678 
679 /* ----------------------------------------------------------------------------
680  * Assigns constant value to all vector elements, z[i] = c
681  */
682 
N_VConst_Pthreads(realtype c,N_Vector z)683 void N_VConst_Pthreads(realtype c, N_Vector z)
684 {
685   sunindextype   N;
686   int            i, nthreads;
687   pthread_t      *threads;
688   Pthreads_Data  *thread_data;
689   pthread_attr_t attr;
690 
691   /* allocate threads and thread data structs */
692   N            = NV_LENGTH_PT(z);
693   nthreads     = NV_NUM_THREADS_PT(z);
694   threads      = malloc(nthreads*sizeof(pthread_t));
695   thread_data  = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
696 
697   /* set thread attributes */
698   pthread_attr_init(&attr);
699   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
700 
701   for (i=0; i<nthreads; i++) {
702     /* initialize thread data */
703     N_VInitThreadData(&thread_data[i]);
704 
705     /* compute start and end loop index for thread */
706     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
707 
708     /* pack thread data */
709     thread_data[i].c1 = c;
710     thread_data[i].v1 = NV_DATA_PT(z);
711 
712     /* create threads and call pthread companion function */
713     pthread_create(&threads[i], &attr, N_VConst_PT, (void *) &thread_data[i]);
714   }
715 
716   /* wait for all threads to finish */
717   for (i=0; i<nthreads; i++) {
718     pthread_join(threads[i], NULL);
719   }
720 
721   /* clean up and return */
722   pthread_attr_destroy(&attr);
723   free(threads);
724   free(thread_data);
725 
726   return;
727 }
728 
729 
730 /* ----------------------------------------------------------------------------
731  * Pthread companion function to N_VConst
732  */
733 
N_VConst_PT(void * thread_data)734 static void *N_VConst_PT(void *thread_data)
735 {
736   sunindextype i, start, end;
737   realtype c;
738   realtype *zd;
739   Pthreads_Data *my_data;
740 
741   /* extract thread data */
742   my_data = (Pthreads_Data *) thread_data;
743 
744   c  = my_data->c1;
745   zd = my_data->v1;
746 
747   start = my_data->start;
748   end   = my_data->end;
749 
750   /* assign constant values */
751   for (i = start; i < end; i++)
752     zd[i] = c;
753 
754   /* exit */
755   pthread_exit(NULL);
756 }
757 
758 
759 /* ----------------------------------------------------------------------------
760  * Compute componentwise product z[i] = x[i]*y[i]
761  */
762 
N_VProd_Pthreads(N_Vector x,N_Vector y,N_Vector z)763 void N_VProd_Pthreads(N_Vector x, N_Vector y, N_Vector z)
764 {
765   sunindextype   N;
766   int            i, nthreads;
767   pthread_t      *threads;
768   Pthreads_Data  *thread_data;
769   pthread_attr_t attr;
770 
771   /* allocate threads and thread data structs */
772   N            = NV_LENGTH_PT(x);
773   nthreads     = NV_NUM_THREADS_PT(x);
774   threads      = malloc(nthreads*sizeof(pthread_t));
775   thread_data  = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
776 
777   /* set thread attributes */
778   pthread_attr_init(&attr);
779   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
780 
781   for (i=0; i<nthreads; i++) {
782     /* initialize thread data */
783     N_VInitThreadData(&thread_data[i]);
784 
785     /* compute start and end loop index for thread */
786     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
787 
788     /* pack thread data */
789     thread_data[i].v1 = NV_DATA_PT(x);
790     thread_data[i].v2 = NV_DATA_PT(y);
791     thread_data[i].v3 = NV_DATA_PT(z);
792 
793     /* create threads and call pthread companion function */
794     pthread_create(&threads[i], &attr, N_VProd_PT, (void *) &thread_data[i]);
795   }
796 
797   /* wait for all threads to finish */
798   for (i=0; i<nthreads; i++) {
799     pthread_join(threads[i], NULL);
800   }
801 
802   /* clean up and exit */
803   pthread_attr_destroy(&attr);
804   free(threads);
805   free(thread_data);
806 
807   return;
808 }
809 
810 /* ----------------------------------------------------------------------------
811  * Pthread companion function to N_VProd
812  */
813 
N_VProd_PT(void * thread_data)814 static void *N_VProd_PT(void *thread_data)
815 {
816   sunindextype i, start, end;
817   realtype *xd, *yd, *zd;
818   Pthreads_Data *my_data;
819 
820   /* extract thread data */
821   my_data = (Pthreads_Data *) thread_data;
822 
823   xd = my_data->v1;
824   yd = my_data->v2;
825   zd = my_data->v3;
826 
827   start = my_data->start;
828   end   = my_data->end;
829 
830   /* compute componentwise product */
831   for (i = start; i < end; i++)
832     zd[i] = xd[i]*yd[i];
833 
834   /* exit */
835   pthread_exit(NULL);
836 }
837 
838 
839 /* ----------------------------------------------------------------------------
840  * Compute componentwise division z[i] = x[i]/y[i]
841  */
842 
N_VDiv_Pthreads(N_Vector x,N_Vector y,N_Vector z)843 void N_VDiv_Pthreads(N_Vector x, N_Vector y, N_Vector z)
844 {
845   sunindextype   N;
846   int            i, nthreads;
847   pthread_t      *threads;
848   Pthreads_Data  *thread_data;
849   pthread_attr_t attr;
850 
851   /* allocate threads and thread data structs */
852   N            = NV_LENGTH_PT(x);
853   nthreads     = NV_NUM_THREADS_PT(x);
854   threads      = malloc(nthreads*sizeof(pthread_t));
855   thread_data  = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
856 
857   /* set thread attributes */
858   pthread_attr_init(&attr);
859   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
860 
861   for (i=0; i<nthreads; i++) {
862     /* initialize thread data */
863     N_VInitThreadData(&thread_data[i]);
864 
865     /* compute start and end loop index for thread */
866     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
867 
868     /* pack thread data */
869     thread_data[i].v1 = NV_DATA_PT(x);
870     thread_data[i].v2 = NV_DATA_PT(y);
871     thread_data[i].v3 = NV_DATA_PT(z);
872 
873     /* create threads and call pthread companion function */
874     pthread_create(&threads[i], &attr, N_VDiv_PT, (void *) &thread_data[i]);
875   }
876 
877   /* wait for all threads to finish */
878   for (i=0; i<nthreads; i++) {
879     pthread_join(threads[i], NULL);
880   }
881 
882   /* clean up and return */
883   pthread_attr_destroy(&attr);
884   free(threads);
885   free(thread_data);
886 
887   return;
888 }
889 
890 
891 /* ----------------------------------------------------------------------------
892  * Pthread companion function to N_VDiv
893  */
894 
N_VDiv_PT(void * thread_data)895 static void *N_VDiv_PT(void *thread_data)
896 {
897   sunindextype i, start, end;
898   realtype *xd, *yd, *zd;
899   Pthreads_Data *my_data;
900 
901   /* extract thread data */
902   my_data = (Pthreads_Data *) thread_data;
903 
904   xd = my_data->v1;
905   yd = my_data->v2;
906   zd = my_data->v3;
907 
908   start = my_data->start;
909   end   = my_data->end;
910 
911   /* compute componentwise division */
912   for (i = start; i < end; i++)
913     zd[i] = xd[i]/yd[i];
914 
915   /* exit */
916   pthread_exit(NULL);
917 }
918 
919 
920 /* ----------------------------------------------------------------------------
921  * Compute scaler multiplication z[i] = c*x[i]
922  */
923 
N_VScale_Pthreads(realtype c,N_Vector x,N_Vector z)924 void N_VScale_Pthreads(realtype c, N_Vector x, N_Vector z)
925 {
926   sunindextype   N;
927   int            i, nthreads;
928   pthread_t      *threads;
929   Pthreads_Data  *thread_data;
930   pthread_attr_t attr;
931 
932   if (z == x) {  /* BLAS usage: scale x <- cx */
933     VScaleBy_Pthreads(c, x);
934     return;
935   }
936 
937   if (c == ONE) {
938     VCopy_Pthreads(x, z);
939   } else if (c == -ONE) {
940     VNeg_Pthreads(x, z);
941   } else {
942     /* allocate threads and thread data structs */
943     N            = NV_LENGTH_PT(x);
944     nthreads     = NV_NUM_THREADS_PT(x);
945     threads      = malloc(nthreads*sizeof(pthread_t));
946     thread_data  = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
947 
948     /* set thread attributes */
949     pthread_attr_init(&attr);
950     pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
951 
952     for (i=0; i<nthreads; i++) {
953       /* initialize thread data */
954       N_VInitThreadData(&thread_data[i]);
955 
956       /* compute start and end loop index for thread */
957       N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
958 
959       /* pack thread data */
960       thread_data[i].c1 = c;
961       thread_data[i].v1 = NV_DATA_PT(x);
962       thread_data[i].v2 = NV_DATA_PT(z);
963 
964       /* create threads and call pthread companion function */
965       pthread_create(&threads[i], &attr, N_VScale_PT, (void *) &thread_data[i]);
966     }
967 
968     /* wait for all threads to finish */
969     for (i=0; i<nthreads; i++) {
970       pthread_join(threads[i], NULL);
971     }
972 
973     /* clean up */
974     pthread_attr_destroy(&attr);
975     free(threads);
976     free(thread_data);
977   }
978 
979   return;
980 }
981 
982 
983 /* ----------------------------------------------------------------------------
984  * Pthread companion function to N_VScale
985  */
986 
N_VScale_PT(void * thread_data)987 static void *N_VScale_PT(void *thread_data)
988 {
989   sunindextype i, start, end;
990   realtype c;
991   realtype *xd, *zd;
992   Pthreads_Data *my_data;
993 
994   /* extract thread data */
995   my_data = (Pthreads_Data *) thread_data;
996 
997   c  = my_data->c1;
998   xd = my_data->v1;
999   zd = my_data->v2;
1000 
1001   start = my_data->start;
1002   end   = my_data->end;
1003 
1004   /* compute scaler multiplication */
1005   for (i = start; i < end; i++)
1006     zd[i] = c*xd[i];
1007 
1008   /* exit */
1009   pthread_exit(NULL);
1010 }
1011 
1012 
1013 /* ----------------------------------------------------------------------------
1014  * Compute absolute value of vector components z[i] = SUNRabs(x[i])
1015  */
1016 
N_VAbs_Pthreads(N_Vector x,N_Vector z)1017 void N_VAbs_Pthreads(N_Vector x, N_Vector z)
1018 {
1019   sunindextype   N;
1020   int            i, nthreads;
1021   pthread_t      *threads;
1022   Pthreads_Data  *thread_data;
1023   pthread_attr_t attr;
1024 
1025   /* allocate threads and thread data structs */
1026   N            = NV_LENGTH_PT(x);
1027   nthreads     = NV_NUM_THREADS_PT(x);
1028   threads      = malloc(nthreads*sizeof(pthread_t));
1029   thread_data  = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
1030 
1031   /* set thread attributes */
1032   pthread_attr_init(&attr);
1033   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
1034 
1035   for (i=0; i<nthreads; i++) {
1036     /* initialize thread data */
1037     N_VInitThreadData(&thread_data[i]);
1038 
1039     /* compute start and end loop index for thread */
1040     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
1041 
1042     /* pack thread data */
1043     thread_data[i].v1 = NV_DATA_PT(x);
1044     thread_data[i].v2 = NV_DATA_PT(z);
1045 
1046     /* create threads and call pthread companion function */
1047     pthread_create(&threads[i], &attr, N_VAbs_PT, (void *) &thread_data[i]);
1048   }
1049 
1050   /* wait for all threads to finish */
1051   for (i=0; i<nthreads; i++) {
1052     pthread_join(threads[i], NULL);
1053   }
1054 
1055   /* clean up and return */
1056   pthread_attr_destroy(&attr);
1057   free(threads);
1058   free(thread_data);
1059 
1060   return;
1061 }
1062 
1063 
1064 /* ----------------------------------------------------------------------------
1065  * Pthread companion function to N_VAbs
1066  */
1067 
N_VAbs_PT(void * thread_data)1068 static void *N_VAbs_PT(void *thread_data)
1069 {
1070   sunindextype i, start, end;
1071   realtype *xd, *zd;
1072   Pthreads_Data *my_data;
1073 
1074   /* extract thread data */
1075   my_data = (Pthreads_Data *) thread_data;
1076 
1077   xd = my_data->v1;
1078   zd = my_data->v2;
1079 
1080   start = my_data->start;
1081   end   = my_data->end;
1082 
1083   /* compute absolute value of components */
1084   for (i = start; i < end; i++)
1085     zd[i] = SUNRabs(xd[i]);
1086 
1087   /* exit */
1088   pthread_exit(NULL);
1089 }
1090 
1091 
1092 /* ----------------------------------------------------------------------------
1093  * Compute componentwise inverse z[i] = 1 / x[i]
1094  */
1095 
N_VInv_Pthreads(N_Vector x,N_Vector z)1096 void N_VInv_Pthreads(N_Vector x, N_Vector z)
1097 {
1098   sunindextype   N;
1099   int            i, nthreads;
1100   pthread_t      *threads;
1101   Pthreads_Data  *thread_data;
1102   pthread_attr_t attr;
1103 
1104   /* allocate threads and thread data structs */
1105   N            = NV_LENGTH_PT(x);
1106   nthreads     = NV_NUM_THREADS_PT(x);
1107   threads      = malloc(nthreads*sizeof(pthread_t));
1108   thread_data  = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
1109 
1110   /* set thread attributes */
1111   pthread_attr_init(&attr);
1112   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
1113 
1114   for (i=0; i<nthreads; i++) {
1115     /* initialize thread data */
1116     N_VInitThreadData(&thread_data[i]);
1117 
1118     /* compute start and end loop index for thread */
1119     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
1120 
1121     /* pack thread data */
1122     thread_data[i].v1 = NV_DATA_PT(x);
1123     thread_data[i].v2 = NV_DATA_PT(z);
1124 
1125     /* create threads and call pthread companion function */
1126     pthread_create(&threads[i], &attr, N_VInv_PT, (void *) &thread_data[i]);
1127   }
1128 
1129   /* wait for all threads to finish */
1130   for (i=0; i<nthreads; i++) {
1131     pthread_join(threads[i], NULL);
1132   }
1133 
1134   /* clean up and return */
1135   pthread_attr_destroy(&attr);
1136   free(threads);
1137   free(thread_data);
1138 
1139   return;
1140 }
1141 
1142 
1143 /* ----------------------------------------------------------------------------
1144  * Pthread companion function to N_VInv
1145  */
1146 
N_VInv_PT(void * thread_data)1147 static void *N_VInv_PT(void *thread_data)
1148 {
1149   sunindextype i, start, end;
1150   realtype *xd, *zd;
1151   Pthreads_Data *my_data;
1152 
1153   /* extract thread data */
1154   my_data = (Pthreads_Data *) thread_data;
1155 
1156   xd = my_data->v1;
1157   zd = my_data->v2;
1158 
1159   start = my_data->start;
1160   end   = my_data->end;
1161 
1162   /* compute componentwise inverse */
1163   for (i = start; i < end; i++)
1164     zd[i] = ONE/xd[i];
1165 
1166   /* exit */
1167   pthread_exit(NULL);
1168 }
1169 
1170 
1171 /* ----------------------------------------------------------------------------
1172  * Compute componentwise addition of a scaler to a vector z[i] = x[i] + b
1173  */
1174 
N_VAddConst_Pthreads(N_Vector x,realtype b,N_Vector z)1175 void N_VAddConst_Pthreads(N_Vector x, realtype b, N_Vector z)
1176 {
1177   sunindextype   N;
1178   int            i, nthreads;
1179   pthread_t      *threads;
1180   Pthreads_Data  *thread_data;
1181   pthread_attr_t attr;
1182 
1183   /* allocate threads and thread data structs */
1184   N            = NV_LENGTH_PT(x);
1185   nthreads     = NV_NUM_THREADS_PT(x);
1186   threads      = malloc(nthreads*sizeof(pthread_t));
1187   thread_data  = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
1188 
1189   /* set thread attributes */
1190   pthread_attr_init(&attr);
1191   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
1192 
1193   for (i=0; i<nthreads; i++) {
1194     /* initialize thread data */
1195     N_VInitThreadData(&thread_data[i]);
1196 
1197     /* compute start and end loop index for thread */
1198     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
1199 
1200     /* pack thread data */
1201     thread_data[i].c1 = b;
1202     thread_data[i].v1 = NV_DATA_PT(x);
1203     thread_data[i].v2 = NV_DATA_PT(z);
1204 
1205     /* create threads and call pthread companion function */
1206     pthread_create(&threads[i], &attr, N_VAddConst_PT, (void *) &thread_data[i]);
1207   }
1208 
1209   /* wait for all threads to finish */
1210   for (i=0; i<nthreads; i++) {
1211     pthread_join(threads[i], NULL);
1212   }
1213 
1214   /* clean up and return */
1215   pthread_attr_destroy(&attr);
1216   free(threads);
1217   free(thread_data);
1218 
1219   return;
1220 }
1221 
1222 
1223 /* ----------------------------------------------------------------------------
1224  * Pthread companion function to N_VAddConst
1225  */
1226 
N_VAddConst_PT(void * thread_data)1227 static void *N_VAddConst_PT(void *thread_data)
1228 {
1229   sunindextype i, start, end;
1230   realtype b;
1231   realtype *xd, *zd;
1232   Pthreads_Data *my_data;
1233 
1234   /* extract thread data */
1235   my_data = (Pthreads_Data *) thread_data;
1236 
1237   b  = my_data->c1;
1238   xd = my_data->v1;
1239   zd = my_data->v2;
1240 
1241   start = my_data->start;
1242   end   = my_data->end;
1243 
1244   /* compute componentwise constant addition */
1245   for (i = start; i < end; i++)
1246     zd[i] = xd[i] + b;
1247 
1248   /* exit */
1249   pthread_exit(NULL);
1250 }
1251 
1252 
1253 /* ----------------------------------------------------------------------------
1254  * Computes the dot product of two vectors, a = sum(x[i]*y[i])
1255  */
1256 
N_VDotProd_Pthreads(N_Vector x,N_Vector y)1257 realtype N_VDotProd_Pthreads(N_Vector x, N_Vector y)
1258 {
1259   sunindextype    N;
1260   int             i, nthreads;
1261   pthread_t       *threads;
1262   Pthreads_Data   *thread_data;
1263   pthread_attr_t  attr;
1264   pthread_mutex_t global_mutex;
1265   realtype        sum = ZERO;
1266 
1267   /* allocate threads and thread data structs */
1268   N           = NV_LENGTH_PT(x);
1269   nthreads    = NV_NUM_THREADS_PT(x);
1270   threads     = malloc(nthreads*sizeof(pthread_t));
1271   thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
1272 
1273   /* set thread attributes */
1274   pthread_attr_init(&attr);
1275   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
1276 
1277   /* lock for reduction */
1278   pthread_mutex_init(&global_mutex, NULL);
1279 
1280   for (i=0; i<nthreads; i++) {
1281     /* initialize thread data */
1282     N_VInitThreadData(&thread_data[i]);
1283 
1284     /* compute start and end loop index for thread */
1285     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
1286 
1287     /* pack thread data */
1288     thread_data[i].v1 = NV_DATA_PT(x);
1289     thread_data[i].v2 = NV_DATA_PT(y);
1290     thread_data[i].global_val   = &sum;
1291     thread_data[i].global_mutex = &global_mutex;
1292 
1293     /* create threads and call pthread companion function */
1294     pthread_create(&threads[i], &attr, N_VDotProd_PT, (void *) &thread_data[i]);
1295   }
1296 
1297   /* wait for all threads to finish */
1298   for (i=0; i<nthreads; i++) {
1299     pthread_join(threads[i], NULL);
1300   }
1301 
1302   /* clean up and return */
1303   pthread_attr_destroy(&attr);
1304   pthread_mutex_destroy(&global_mutex);
1305   free(threads);
1306   free(thread_data);
1307 
1308   return(sum);
1309 }
1310 
1311 
1312 /* ----------------------------------------------------------------------------
1313  * Pthread companion function to N_VDotProd
1314  */
1315 
N_VDotProd_PT(void * thread_data)1316 static void *N_VDotProd_PT(void *thread_data)
1317 {
1318   sunindextype i, start, end;
1319   realtype *xd, *yd;
1320   realtype local_sum, *global_sum;
1321   Pthreads_Data *my_data;
1322   pthread_mutex_t *global_mutex;
1323 
1324   /* extract thread data */
1325   my_data = (Pthreads_Data *) thread_data;
1326 
1327   xd = my_data->v1;
1328   yd = my_data->v2;
1329 
1330   global_sum   = my_data->global_val;
1331   global_mutex = my_data->global_mutex;
1332 
1333   start = my_data->start;
1334   end   = my_data->end;
1335 
1336   /* compute dot product */
1337   local_sum = ZERO;
1338   for (i = start; i < end; i++)
1339     local_sum += xd[i] * yd[i];
1340 
1341   /* update global sum */
1342   pthread_mutex_lock(global_mutex);
1343   *global_sum += local_sum;
1344   pthread_mutex_unlock(global_mutex);
1345 
1346   /* exit */
1347   pthread_exit(NULL);
1348 }
1349 
1350 
1351 /* ----------------------------------------------------------------------------
1352  * Computes max norm of the vector
1353  */
1354 
N_VMaxNorm_Pthreads(N_Vector x)1355 realtype N_VMaxNorm_Pthreads(N_Vector x)
1356 {
1357   sunindextype    N;
1358   int             i, nthreads;
1359   pthread_t       *threads;
1360   Pthreads_Data   *thread_data;
1361   pthread_attr_t  attr;
1362   pthread_mutex_t global_mutex;
1363   realtype        max = ZERO;
1364 
1365   /* allocate threads and thread data structs */
1366   N            = NV_LENGTH_PT(x);
1367   nthreads     = NV_NUM_THREADS_PT(x);
1368   threads      = malloc(nthreads*sizeof(pthread_t));
1369   thread_data  = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
1370 
1371   /* set thread attributes */
1372   pthread_attr_init(&attr);
1373   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
1374 
1375   /* lock for reduction */
1376   pthread_mutex_init(&global_mutex, NULL);
1377 
1378   for (i=0; i<nthreads; i++) {
1379     /* initialize thread data */
1380     N_VInitThreadData(&thread_data[i]);
1381 
1382     /* compute start and end loop index for thread */
1383     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
1384 
1385     /* pack thread data */
1386     thread_data[i].v1 = NV_DATA_PT(x);
1387     thread_data[i].global_val   = &max;
1388     thread_data[i].global_mutex = &global_mutex;
1389 
1390     /* create threads and call pthread companion function */
1391     pthread_create(&threads[i], &attr, N_VMaxNorm_PT, (void *) &thread_data[i]);
1392   }
1393 
1394   /* wait for all threads to finish */
1395   for (i=0; i<nthreads; i++) {
1396     pthread_join(threads[i], NULL);
1397   }
1398 
1399   /* clean up and return */
1400   pthread_attr_destroy(&attr);
1401   pthread_mutex_destroy(&global_mutex);
1402   free(threads);
1403   free(thread_data);
1404 
1405   return(max);
1406 }
1407 
1408 
1409 /* ----------------------------------------------------------------------------
1410  * Pthread companion function to N_VMaxNorm
1411  */
1412 
N_VMaxNorm_PT(void * thread_data)1413 static void *N_VMaxNorm_PT(void *thread_data)
1414 {
1415   sunindextype i, start, end;
1416   realtype *xd;
1417   realtype local_max, *global_max;
1418   Pthreads_Data *my_data;
1419   pthread_mutex_t *global_mutex;
1420 
1421   /* extract thread data */
1422   my_data = (Pthreads_Data *) thread_data;
1423 
1424   xd = my_data->v1;
1425 
1426   global_max   = my_data->global_val;
1427   global_mutex = my_data->global_mutex;
1428 
1429   start = my_data->start;
1430   end   = my_data->end;
1431 
1432   /* find local max */
1433   local_max = ZERO;
1434   for (i = start; i < end; i++)
1435     if (SUNRabs(xd[i]) > local_max) local_max = SUNRabs(xd[i]);
1436 
1437   /* update global max */
1438   pthread_mutex_lock(global_mutex);
1439   if (local_max > *global_max) {
1440     *global_max = local_max;
1441   }
1442   pthread_mutex_unlock(global_mutex);
1443 
1444   /* exit */
1445   pthread_exit(NULL);
1446 }
1447 
1448 
1449 /* ----------------------------------------------------------------------------
1450  * Computes weighted root mean square norm of a vector
1451  */
1452 
N_VWrmsNorm_Pthreads(N_Vector x,N_Vector w)1453 realtype N_VWrmsNorm_Pthreads(N_Vector x, N_Vector w)
1454 {
1455   return(SUNRsqrt(N_VWSqrSumLocal_Pthreads(x, w)/(NV_LENGTH_PT(x))));
1456 }
1457 
1458 
1459 /* ----------------------------------------------------------------------------
1460  * Computes weighted square sum of a vector
1461  */
1462 
N_VWSqrSumLocal_Pthreads(N_Vector x,N_Vector w)1463 realtype N_VWSqrSumLocal_Pthreads(N_Vector x, N_Vector w)
1464 {
1465   sunindextype    N;
1466   int             i, nthreads;
1467   pthread_t       *threads;
1468   Pthreads_Data   *thread_data;
1469   pthread_attr_t  attr;
1470   pthread_mutex_t global_mutex;
1471   realtype        sum = ZERO;
1472 
1473   /* allocate threads and thread data structs */
1474   N           = NV_LENGTH_PT(x);
1475   nthreads    = NV_NUM_THREADS_PT(x);
1476   threads     = malloc(nthreads*sizeof(pthread_t));
1477   thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
1478 
1479   /* set thread attributes */
1480   pthread_attr_init(&attr);
1481   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
1482 
1483   /* lock for reduction */
1484   pthread_mutex_init(&global_mutex, NULL);
1485 
1486   for (i=0; i<nthreads; i++) {
1487     /* initialize thread data */
1488     N_VInitThreadData(&thread_data[i]);
1489 
1490     /* compute start and end loop index for thread */
1491     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
1492 
1493     /* pack thread data */
1494     thread_data[i].v1 = NV_DATA_PT(x);
1495     thread_data[i].v2 = NV_DATA_PT(w);
1496     thread_data[i].global_val   = &sum;
1497     thread_data[i].global_mutex = &global_mutex;
1498 
1499     /* create threads and call pthread companion function */
1500     pthread_create(&threads[i], &attr, N_VWSqrSum_PT, (void *) &thread_data[i]);
1501   }
1502 
1503   /* wait for all threads to finish */
1504   for (i=0; i<nthreads; i++) {
1505     pthread_join(threads[i], NULL);
1506   }
1507 
1508   /* clean up and return */
1509   pthread_attr_destroy(&attr);
1510   pthread_mutex_destroy(&global_mutex);
1511   free(threads);
1512   free(thread_data);
1513 
1514   return(sum);
1515 }
1516 
1517 
1518 /* ----------------------------------------------------------------------------
1519  * Pthread companion function to N_VWrmsNorm
1520  */
1521 
N_VWSqrSum_PT(void * thread_data)1522 static void *N_VWSqrSum_PT(void *thread_data)
1523 {
1524   sunindextype i, start, end;
1525   realtype *xd, *wd;
1526   realtype local_sum, *global_sum;
1527   Pthreads_Data *my_data;
1528   pthread_mutex_t *global_mutex;
1529 
1530   /* extract thread data */
1531   my_data = (Pthreads_Data *) thread_data;
1532 
1533   xd = my_data->v1;
1534   wd = my_data->v2;
1535 
1536   global_sum   = my_data->global_val;
1537   global_mutex = my_data->global_mutex;
1538 
1539   start = my_data->start;
1540   end   = my_data->end;
1541 
1542   /* compute wrms norm */
1543   local_sum = ZERO;
1544   for (i = start; i < end; i++)
1545     local_sum += SUNSQR(xd[i] * wd[i]);
1546 
1547   /* update global sum */
1548   pthread_mutex_lock(global_mutex);
1549   *global_sum += local_sum;
1550   pthread_mutex_unlock(global_mutex);
1551 
1552   /* exit */
1553   pthread_exit(NULL);
1554 }
1555 
1556 
1557 /* ----------------------------------------------------------------------------
1558  * Computes weighted root mean square norm of a masked vector
1559  */
1560 
N_VWrmsNormMask_Pthreads(N_Vector x,N_Vector w,N_Vector id)1561 realtype N_VWrmsNormMask_Pthreads(N_Vector x, N_Vector w, N_Vector id)
1562 {
1563   return(SUNRsqrt(N_VWSqrSumMaskLocal_Pthreads(x, w, id)/(NV_LENGTH_PT(x))));
1564 }
1565 
1566 
1567 /* ----------------------------------------------------------------------------
1568  * Computes weighted square sum of a masked vector
1569  */
1570 
N_VWSqrSumMaskLocal_Pthreads(N_Vector x,N_Vector w,N_Vector id)1571 realtype N_VWSqrSumMaskLocal_Pthreads(N_Vector x, N_Vector w, N_Vector id)
1572 {
1573   sunindextype    N;
1574   int             i, nthreads;
1575   pthread_t       *threads;
1576   Pthreads_Data   *thread_data;
1577   pthread_attr_t  attr;
1578   pthread_mutex_t global_mutex;
1579   realtype        sum = ZERO;
1580 
1581   /* allocate threads and thread data structs */
1582   N            = NV_LENGTH_PT(x);
1583   nthreads     = NV_NUM_THREADS_PT(x);
1584   threads      = malloc(nthreads*sizeof(pthread_t));
1585   thread_data  = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
1586 
1587   /* set thread attributes */
1588   pthread_attr_init(&attr);
1589   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
1590 
1591   /* lock for reduction */
1592   pthread_mutex_init(&global_mutex, NULL);
1593 
1594   for (i=0; i<nthreads; i++) {
1595     /* initialize thread data */
1596     N_VInitThreadData(&thread_data[i]);
1597 
1598     /* compute start and end loop index for thread */
1599     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
1600 
1601     /* pack thread data */
1602     thread_data[i].v1 = NV_DATA_PT(x);
1603     thread_data[i].v2 = NV_DATA_PT(w);
1604     thread_data[i].v3 = NV_DATA_PT(id);
1605     thread_data[i].global_val   = &sum;
1606     thread_data[i].global_mutex = &global_mutex;
1607 
1608     /* create threads and call pthread companion function */
1609     pthread_create(&threads[i], &attr, N_VWSqrSumMask_PT, (void *) &thread_data[i]);
1610   }
1611 
1612   /* wait for all threads to finish */
1613   for (i=0; i<nthreads; i++) {
1614     pthread_join(threads[i], NULL);
1615   }
1616 
1617   /* clean up and return */
1618   pthread_attr_destroy(&attr);
1619   pthread_mutex_destroy(&global_mutex);
1620   free(threads);
1621   free(thread_data);
1622 
1623   return(sum);
1624 }
1625 
1626 
1627 /* ----------------------------------------------------------------------------
1628  * Pthread companion function to N_VWSqrSumMask
1629  */
1630 
N_VWSqrSumMask_PT(void * thread_data)1631 static void *N_VWSqrSumMask_PT(void *thread_data)
1632 {
1633   sunindextype i, start, end;
1634   realtype *xd, *wd, *idd;
1635   realtype local_sum, *global_sum;
1636   Pthreads_Data *my_data;
1637   pthread_mutex_t *global_mutex;
1638 
1639   /* extract thread data */
1640   my_data = (Pthreads_Data *) thread_data;
1641 
1642   xd  = my_data->v1;
1643   wd  = my_data->v2;
1644   idd = my_data->v3;
1645 
1646   global_sum   = my_data->global_val;
1647   global_mutex = my_data->global_mutex;
1648 
1649   start = my_data->start;
1650   end   = my_data->end;
1651 
1652   /* compute wrms norm with mask */
1653   local_sum = ZERO;
1654   for (i = start; i < end; i++) {
1655     if (idd[i] > ZERO)
1656       local_sum += SUNSQR(xd[i]*wd[i]);
1657   }
1658 
1659   /* update global sum */
1660   pthread_mutex_lock(global_mutex);
1661   *global_sum += local_sum;
1662   pthread_mutex_unlock(global_mutex);
1663 
1664   /* exit */
1665   pthread_exit(NULL);
1666 }
1667 
1668 
1669 /* ----------------------------------------------------------------------------
1670  * Finds the minimun component of a vector
1671  */
1672 
N_VMin_Pthreads(N_Vector x)1673 realtype N_VMin_Pthreads(N_Vector x)
1674 {
1675   sunindextype    N;
1676   int             i, nthreads;
1677   pthread_t       *threads;
1678   Pthreads_Data   *thread_data;
1679   pthread_attr_t  attr;
1680   pthread_mutex_t global_mutex;
1681   realtype        min;
1682 
1683   /* initialize global min */
1684   min = NV_Ith_PT(x,0);
1685 
1686   /* allocate threads and thread data structs */
1687   N           = NV_LENGTH_PT(x);
1688   nthreads    = NV_NUM_THREADS_PT(x);
1689   threads     = malloc(nthreads*sizeof(pthread_t));
1690   thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
1691 
1692   /* set thread attributes */
1693   pthread_attr_init(&attr);
1694   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
1695 
1696   /* lock for reduction */
1697   pthread_mutex_init(&global_mutex, NULL);
1698 
1699   for (i=0; i<nthreads; i++) {
1700     /* initialize thread data */
1701     N_VInitThreadData(&thread_data[i]);
1702 
1703     /* compute start and end loop index for thread */
1704     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
1705 
1706     /* pack thread data */
1707     thread_data[i].v1 = NV_DATA_PT(x);
1708     thread_data[i].global_val   = &min;
1709     thread_data[i].global_mutex = &global_mutex;
1710 
1711     /* create threads and call pthread companion function */
1712     pthread_create(&threads[i], &attr, N_VMin_PT, (void *) &thread_data[i]);
1713   }
1714 
1715   /* wait for all threads to finish */
1716   for (i=0; i<nthreads; i++) {
1717     pthread_join(threads[i], NULL);
1718   }
1719 
1720   /* clean up and return */
1721   pthread_attr_destroy(&attr);
1722   pthread_mutex_destroy(&global_mutex);
1723   free(threads);
1724   free(thread_data);
1725 
1726   return(min);
1727 }
1728 
1729 
1730 /* ----------------------------------------------------------------------------
1731  * Pthread companion function to N_VMin
1732  */
1733 
N_VMin_PT(void * thread_data)1734 static void *N_VMin_PT(void *thread_data)
1735 {
1736   sunindextype i, start, end;
1737   realtype *xd;
1738   realtype local_min, *global_min;
1739   Pthreads_Data *my_data;
1740   pthread_mutex_t *global_mutex;
1741 
1742   /* extract thread data */
1743   my_data = (Pthreads_Data *) thread_data;
1744 
1745   xd = my_data->v1;
1746 
1747   global_min   = my_data->global_val;
1748   global_mutex = my_data->global_mutex;
1749 
1750   start = my_data->start;
1751   end   = my_data->end;
1752 
1753   /* find local min */
1754   local_min = *global_min;
1755   for (i = start; i < end; i++) {
1756     if (xd[i] < local_min)
1757       local_min = xd[i];
1758   }
1759 
1760   /* update global min */
1761   pthread_mutex_lock(global_mutex);
1762   if (local_min < *global_min)
1763     *global_min = local_min;
1764   pthread_mutex_unlock(global_mutex);
1765 
1766   /* exit */
1767   pthread_exit(NULL);
1768 }
1769 
1770 
1771 /* ----------------------------------------------------------------------------
1772  * Computes weighted L2 norm of a vector
1773  */
1774 
N_VWL2Norm_Pthreads(N_Vector x,N_Vector w)1775 realtype N_VWL2Norm_Pthreads(N_Vector x, N_Vector w)
1776 {
1777   sunindextype    N;
1778   int             i, nthreads;
1779   pthread_t       *threads;
1780   Pthreads_Data   *thread_data;
1781   pthread_attr_t  attr;
1782   pthread_mutex_t global_mutex;
1783   realtype        sum = ZERO;
1784 
1785   /* allocate threads and thread data structs */
1786   N           = NV_LENGTH_PT(x);
1787   nthreads    = NV_NUM_THREADS_PT(x);
1788   threads     = malloc(nthreads*sizeof(pthread_t));
1789   thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
1790 
1791   /* set thread attributes */
1792   pthread_attr_init(&attr);
1793   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
1794 
1795   /* lock for reduction */
1796   pthread_mutex_init(&global_mutex, NULL);
1797 
1798   for (i=0; i<nthreads; i++) {
1799     /* initialize thread data */
1800     N_VInitThreadData(&thread_data[i]);
1801 
1802     /* compute start and end loop index for thread */
1803     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
1804 
1805     /* pack thread data */
1806     thread_data[i].v1 = NV_DATA_PT(x);
1807     thread_data[i].v2 = NV_DATA_PT(w);
1808     thread_data[i].global_val   = &sum;
1809     thread_data[i].global_mutex = &global_mutex;
1810 
1811     /* create threads and call pthread companion function */
1812     pthread_create(&threads[i], &attr, N_VWL2Norm_PT, (void *) &thread_data[i]);
1813   }
1814 
1815   /* wait for all threads to finish */
1816   for (i=0; i<nthreads; i++) {
1817     pthread_join(threads[i], NULL);
1818   }
1819 
1820   /* clean up and return */
1821   pthread_attr_destroy(&attr);
1822   pthread_mutex_destroy(&global_mutex);
1823   free(threads);
1824   free(thread_data);
1825 
1826   return(SUNRsqrt(sum));
1827 }
1828 
1829 
1830 /* ----------------------------------------------------------------------------
1831  * Pthread companion function to N_VWL2Norm
1832  */
1833 
N_VWL2Norm_PT(void * thread_data)1834 static void *N_VWL2Norm_PT(void *thread_data)
1835 {
1836   sunindextype i, start, end;
1837   realtype *xd, *wd;
1838   realtype local_sum, *global_sum;
1839   Pthreads_Data *my_data;
1840   pthread_mutex_t *global_mutex;
1841 
1842   /* extract thread data */
1843   my_data = (Pthreads_Data *) thread_data;
1844 
1845   xd = my_data->v1;
1846   wd = my_data->v2;
1847 
1848   global_sum   = my_data->global_val;
1849   global_mutex = my_data->global_mutex;
1850 
1851   start = my_data->start;
1852   end   = my_data->end;
1853 
1854   /* compute WL2 norm */
1855   local_sum = ZERO;
1856   for (i = start; i < end; i++)
1857     local_sum += SUNSQR(xd[i]*wd[i]);
1858 
1859   /* update global sum */
1860   pthread_mutex_lock(global_mutex);
1861   *global_sum += local_sum;
1862   pthread_mutex_unlock(global_mutex);
1863 
1864   /* exit */
1865   pthread_exit(NULL);
1866 }
1867 
1868 
1869 /* ----------------------------------------------------------------------------
1870  * Computes L1 norm of a vector
1871  */
1872 
N_VL1Norm_Pthreads(N_Vector x)1873 realtype N_VL1Norm_Pthreads(N_Vector x)
1874 {
1875   sunindextype    N;
1876   int             i, nthreads;
1877   pthread_t       *threads;
1878   Pthreads_Data   *thread_data;
1879   pthread_attr_t  attr;
1880   pthread_mutex_t global_mutex;
1881   realtype        sum = ZERO;
1882 
1883   /* allocate threads and thread data structs */
1884   N            = NV_LENGTH_PT(x);
1885   nthreads     = NV_NUM_THREADS_PT(x);
1886   threads      = malloc(nthreads*sizeof(pthread_t));
1887   thread_data  = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
1888 
1889   /* set thread attributes */
1890   pthread_attr_init(&attr);
1891   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
1892 
1893   /* lock for reduction */
1894   pthread_mutex_init(&global_mutex, NULL);
1895 
1896   for (i=0; i<nthreads; i++) {
1897     /* initialize thread data */
1898     N_VInitThreadData(&thread_data[i]);
1899 
1900     /* compute start and end loop index for thread */
1901     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
1902 
1903     /* pack thread data */
1904     thread_data[i].v1 = NV_DATA_PT(x);
1905     thread_data[i].global_val   = &sum;
1906     thread_data[i].global_mutex = &global_mutex;
1907 
1908     /* create threads and call pthread companion function */
1909     pthread_create(&threads[i], &attr, N_VL1Norm_PT, (void *) &thread_data[i]);
1910   }
1911 
1912   /* wait for all threads to finish */
1913   for (i=0; i<nthreads; i++) {
1914     pthread_join(threads[i], NULL);
1915   }
1916 
1917   /* clean up and return */
1918   pthread_attr_destroy(&attr);
1919   pthread_mutex_destroy(&global_mutex);
1920   free(threads);
1921   free(thread_data);
1922 
1923   return(sum);
1924 }
1925 
1926 
1927 /* ----------------------------------------------------------------------------
1928  * Pthread companion function to N_VL1Norm
1929  */
1930 
N_VL1Norm_PT(void * thread_data)1931 static void *N_VL1Norm_PT(void *thread_data)
1932 {
1933   sunindextype i, start, end;
1934   realtype *xd;
1935   realtype local_sum, *global_sum;
1936   Pthreads_Data *my_data;
1937   pthread_mutex_t *global_mutex;
1938 
1939   /* extract thread data */
1940   my_data = (Pthreads_Data *) thread_data;
1941 
1942   xd = my_data->v1;
1943 
1944   global_sum   = my_data->global_val;
1945   global_mutex = my_data->global_mutex;
1946 
1947   start = my_data->start;
1948   end   = my_data->end;
1949 
1950   /* compute L1 norm */
1951   local_sum = ZERO;
1952   for (i = start; i < end; i++)
1953     local_sum += SUNRabs(xd[i]);
1954 
1955   /* update global sum */
1956   pthread_mutex_lock(global_mutex);
1957   *global_sum += local_sum;
1958   pthread_mutex_unlock(global_mutex);
1959 
1960   /* exit */
1961   pthread_exit(NULL);
1962 }
1963 
1964 
1965 /* ----------------------------------------------------------------------------
1966  * Compare vector component values to a scaler
1967  */
1968 
N_VCompare_Pthreads(realtype c,N_Vector x,N_Vector z)1969 void N_VCompare_Pthreads(realtype c, N_Vector x, N_Vector z)
1970 {
1971   sunindextype   N;
1972   int            i, nthreads;
1973   pthread_t      *threads;
1974   Pthreads_Data  *thread_data;
1975   pthread_attr_t attr;
1976 
1977   /* allocate threads and thread data structs */
1978   N            = NV_LENGTH_PT(x);
1979   nthreads     = NV_NUM_THREADS_PT(x);
1980   threads      = malloc(nthreads*sizeof(pthread_t));
1981   thread_data  = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
1982 
1983   /* set thread attributes */
1984   pthread_attr_init(&attr);
1985   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
1986 
1987   for (i=0; i<nthreads; i++) {
1988     /* initialize thread data */
1989     N_VInitThreadData(&thread_data[i]);
1990 
1991     /* compute start and end loop index for thread */
1992     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
1993 
1994     /* pack thread data */
1995     thread_data[i].c1  = c;
1996     thread_data[i].v1 = NV_DATA_PT(x);
1997     thread_data[i].v2 = NV_DATA_PT(z);
1998 
1999     /* create threads and call pthread companion function */
2000     pthread_create(&threads[i], &attr, N_VCompare_PT, (void *) &thread_data[i]);
2001   }
2002 
2003   /* wait for all threads to finish */
2004   for (i=0; i<nthreads; i++) {
2005     pthread_join(threads[i], NULL);
2006   }
2007 
2008   /* clean up and return */
2009   pthread_attr_destroy(&attr);
2010   free(threads);
2011   free(thread_data);
2012 
2013   return;
2014 }
2015 
2016 /* ----------------------------------------------------------------------------
2017  * Pthread companion function to N_VCompare
2018  */
2019 
N_VCompare_PT(void * thread_data)2020 static void *N_VCompare_PT(void *thread_data)
2021 {
2022   sunindextype i, start, end;
2023   realtype c;
2024   realtype *xd, *zd;
2025   Pthreads_Data *my_data;
2026 
2027   /* extract thread data */
2028   my_data = (Pthreads_Data *) thread_data;
2029 
2030   c  = my_data->c1;
2031   xd = my_data->v1;
2032   zd = my_data->v2;
2033 
2034   start = my_data->start;
2035   end   = my_data->end;
2036 
2037   /* compare component to scaler */
2038   for (i = start; i < end; i++)
2039     zd[i] = (SUNRabs(xd[i]) >= c) ? ONE : ZERO;
2040 
2041   /* exit */
2042   pthread_exit(NULL);
2043 }
2044 
2045 
2046 /* ----------------------------------------------------------------------------
2047  * Compute componentwise inverse z[i] = ONE/x[i] and check if x[i] == ZERO
2048  */
2049 
N_VInvTest_Pthreads(N_Vector x,N_Vector z)2050 booleantype N_VInvTest_Pthreads(N_Vector x, N_Vector z)
2051 {
2052   sunindextype   N;
2053   int            i, nthreads;
2054   pthread_t      *threads;
2055   Pthreads_Data  *thread_data;
2056   pthread_attr_t attr;
2057 
2058   realtype val = ZERO;
2059 
2060   /* allocate threads and thread data structs */
2061   N           = NV_LENGTH_PT(x);
2062   nthreads    = NV_NUM_THREADS_PT(x);
2063   threads     = malloc(nthreads*sizeof(pthread_t));
2064   thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
2065 
2066   /* set thread attributes */
2067   pthread_attr_init(&attr);
2068   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
2069 
2070   for (i=0; i<nthreads; i++) {
2071     /* initialize thread data */
2072     N_VInitThreadData(&thread_data[i]);
2073 
2074     /* compute start and end loop index for thread */
2075     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
2076 
2077     /* pack thread data */
2078     thread_data[i].v1 = NV_DATA_PT(x);
2079     thread_data[i].v2 = NV_DATA_PT(z);
2080     thread_data[i].global_val = &val;
2081 
2082     /* create threads and call pthread companion function */
2083     pthread_create(&threads[i], &attr, N_VInvTest_PT, (void *) &thread_data[i]);
2084   }
2085 
2086   /* wait for all threads to finish */
2087   for (i=0; i<nthreads; i++) {
2088     pthread_join(threads[i], NULL);
2089   }
2090 
2091   /* clean up and return */
2092   pthread_attr_destroy(&attr);
2093   free(threads);
2094   free(thread_data);
2095 
2096   if (val > ZERO)
2097     return (SUNFALSE);
2098   else
2099     return (SUNTRUE);
2100 }
2101 
2102 
2103 /* ----------------------------------------------------------------------------
2104  * Pthread companion function to N_VInvTest
2105  */
2106 
N_VInvTest_PT(void * thread_data)2107 static void *N_VInvTest_PT(void *thread_data)
2108 {
2109   sunindextype i, start, end;
2110   realtype *xd, *zd;
2111   realtype local_val, *global_val;
2112   Pthreads_Data *my_data;
2113 
2114   /* extract thread data */
2115   my_data = (Pthreads_Data *) thread_data;
2116 
2117   xd = my_data->v1;
2118   zd = my_data->v2;
2119 
2120   global_val = my_data->global_val;
2121 
2122   start = my_data->start;
2123   end   = my_data->end;
2124 
2125   /* compute inverse with check for divide by ZERO */
2126   local_val = ZERO;
2127   for (i = start; i < end; i++) {
2128     if (xd[i] == ZERO)
2129       local_val = ONE;
2130     else
2131       zd[i] = ONE/xd[i];
2132   }
2133 
2134   /* update global val */
2135   if (local_val > ZERO) {
2136     *global_val = local_val;
2137   }
2138 
2139   /* exit */
2140   pthread_exit(NULL);
2141 }
2142 
2143 
2144 /* ----------------------------------------------------------------------------
2145  * Compute constraint mask of a vector
2146  */
2147 
N_VConstrMask_Pthreads(N_Vector c,N_Vector x,N_Vector m)2148 booleantype N_VConstrMask_Pthreads(N_Vector c, N_Vector x, N_Vector m)
2149 {
2150   sunindextype   N;
2151   int            i, nthreads;
2152   pthread_t      *threads;
2153   Pthreads_Data  *thread_data;
2154   pthread_attr_t attr;
2155 
2156   realtype val = ZERO;
2157 
2158   /* allocate threads and thread data structs */
2159   N           = NV_LENGTH_PT(x);
2160   nthreads    = NV_NUM_THREADS_PT(x);
2161   threads     = malloc(nthreads*sizeof(pthread_t));
2162   thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
2163 
2164   /* set thread attributes */
2165   pthread_attr_init(&attr);
2166   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
2167 
2168   for (i=0; i<nthreads; i++) {
2169     /* initialize thread data */
2170     N_VInitThreadData(&thread_data[i]);
2171 
2172     /* compute start and end loop index for thread */
2173     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
2174 
2175     /* pack thread data */
2176     thread_data[i].v1 = NV_DATA_PT(c);
2177     thread_data[i].v2 = NV_DATA_PT(x);
2178     thread_data[i].v3 = NV_DATA_PT(m);
2179     thread_data[i].global_val = &val;
2180 
2181     /* create threads and call pthread companion function */
2182     pthread_create(&threads[i], &attr, N_VConstrMask_PT, (void *) &thread_data[i]);
2183   }
2184 
2185   /* wait for all threads to finish */
2186   for (i=0; i<nthreads; i++) {
2187     pthread_join(threads[i], NULL);
2188   }
2189 
2190   /* clean up and return */
2191   pthread_attr_destroy(&attr);
2192   free(threads);
2193   free(thread_data);
2194 
2195   if (val > ZERO)
2196     return(SUNFALSE);
2197   else
2198     return(SUNTRUE);
2199 }
2200 
2201 
2202 /* ----------------------------------------------------------------------------
2203  * Pthread companion function to N_VConstrMask
2204  */
2205 
N_VConstrMask_PT(void * thread_data)2206 static void *N_VConstrMask_PT(void *thread_data)
2207 {
2208   sunindextype i, start, end;
2209   realtype *cd, *xd, *md;
2210   realtype local_val, *global_val;
2211   Pthreads_Data *my_data;
2212 
2213   /* extract thread data */
2214   my_data = (Pthreads_Data *) thread_data;
2215 
2216   cd = my_data->v1;
2217   xd = my_data->v2;
2218   md = my_data->v3;
2219 
2220   global_val = my_data->global_val;
2221 
2222   start = my_data->start;
2223   end   = my_data->end;
2224 
2225   /* compute constraint mask */
2226   local_val = ZERO;
2227   for (i = start; i < end; i++) {
2228     md[i] = ZERO;
2229 
2230     /* Continue if no constraints were set for the variable */
2231     if (cd[i] == ZERO)
2232       continue;
2233 
2234     /* Check if a set constraint has been violated */
2235     if ((SUNRabs(cd[i]) > ONEPT5 && xd[i]*cd[i] <= ZERO) ||
2236         (SUNRabs(cd[i]) > HALF   && xd[i]*cd[i] <  ZERO)) {
2237       local_val = md[i] = ONE;
2238     }
2239   }
2240 
2241   /* update global val */
2242   if (local_val > ZERO) {
2243     *global_val = local_val;
2244   }
2245 
2246   /* exit */
2247   pthread_exit(NULL);
2248 }
2249 
2250 
2251 /* ----------------------------------------------------------------------------
2252  * Compute minimum componentwise quotient
2253  */
2254 
N_VMinQuotient_Pthreads(N_Vector num,N_Vector denom)2255 realtype N_VMinQuotient_Pthreads(N_Vector num, N_Vector denom)
2256 {
2257   sunindextype    N;
2258   int             i, nthreads;
2259   pthread_t       *threads;
2260   Pthreads_Data   *thread_data;
2261   pthread_attr_t  attr;
2262   pthread_mutex_t global_mutex;
2263   realtype        min = BIG_REAL;
2264 
2265   /* allocate threads and thread data structs */
2266   N           = NV_LENGTH_PT(num);
2267   nthreads    = NV_NUM_THREADS_PT(num);
2268   threads     = malloc(nthreads*sizeof(pthread_t));
2269   thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
2270 
2271   /* set thread attributes */
2272   pthread_attr_init(&attr);
2273   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
2274 
2275   /* lock for reduction */
2276   pthread_mutex_init(&global_mutex, NULL);
2277 
2278   for (i=0; i<nthreads; i++) {
2279     /* initialize thread data */
2280     N_VInitThreadData(&thread_data[i]);
2281 
2282     /* compute start and end loop index for thread */
2283     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
2284 
2285     /* pack thread data */
2286     thread_data[i].v1 = NV_DATA_PT(num);
2287     thread_data[i].v2 = NV_DATA_PT(denom);
2288     thread_data[i].global_val   = &min;
2289     thread_data[i].global_mutex = &global_mutex;
2290 
2291     /* create threads and call pthread companion function */
2292     pthread_create(&threads[i], &attr, N_VMinQuotient_PT, (void *) &thread_data[i]);
2293   }
2294 
2295   /* wait for all threads to finish */
2296   for (i=0; i<nthreads; i++) {
2297     pthread_join(threads[i], NULL);
2298   }
2299 
2300   /* clean up and return */
2301   pthread_attr_destroy(&attr);
2302   pthread_mutex_destroy(&global_mutex);
2303   free(threads);
2304   free(thread_data);
2305 
2306   return(min);
2307 }
2308 
2309 
2310 /* ----------------------------------------------------------------------------
2311  * Pthread companion function to N_VConstrMask
2312  */
2313 
N_VMinQuotient_PT(void * thread_data)2314 static void *N_VMinQuotient_PT(void *thread_data)
2315 {
2316   sunindextype i, start, end;
2317   realtype *nd, *dd;
2318   realtype local_min, *global_min;
2319   Pthreads_Data *my_data;
2320   pthread_mutex_t *global_mutex;
2321 
2322   /* extract thread data */
2323   my_data = (Pthreads_Data *) thread_data;
2324 
2325   nd = my_data->v1;
2326   dd = my_data->v2;
2327 
2328   global_min   = my_data->global_val;
2329   global_mutex = my_data->global_mutex;
2330 
2331   start = my_data->start;
2332   end   = my_data->end;
2333 
2334   /* compute minimum quotient */
2335   local_min = BIG_REAL;
2336   for (i = start; i < end; i++) {
2337     if (dd[i] == ZERO)
2338       continue;
2339     local_min = SUNMIN(local_min, nd[i]/dd[i]);
2340   }
2341 
2342   /* update global min */
2343   pthread_mutex_lock(global_mutex);
2344   if (local_min < *global_min)
2345     *global_min = local_min;
2346   pthread_mutex_unlock(global_mutex);
2347 
2348   /* exit */
2349   pthread_exit(NULL);
2350 }
2351 
2352 
2353 /*
2354  * -----------------------------------------------------------------------------
2355  * fused vector operations
2356  * -----------------------------------------------------------------------------
2357  */
2358 
2359 
2360 /* -----------------------------------------------------------------------------
2361  * Compute the linear combination z = c[i]*X[i]
2362  */
2363 
N_VLinearCombination_Pthreads(int nvec,realtype * c,N_Vector * X,N_Vector z)2364 int N_VLinearCombination_Pthreads(int nvec, realtype* c, N_Vector* X, N_Vector z)
2365 {
2366   sunindextype   N;
2367   int            i, nthreads;
2368   pthread_t      *threads;
2369   Pthreads_Data  *thread_data;
2370   pthread_attr_t attr;
2371 
2372   /* invalid number of vectors */
2373   if (nvec < 1) return(-1);
2374 
2375   /* should have called N_VScale */
2376   if (nvec == 1) {
2377     N_VScale_Pthreads(c[0], X[0], z);
2378     return(0);
2379   }
2380 
2381   /* should have called N_VLinearSum */
2382   if (nvec == 2) {
2383     N_VLinearSum_Pthreads(c[0], X[0], c[1], X[1], z);
2384     return(0);
2385   }
2386 
2387   /* get vector length and data array */
2388   N           = NV_LENGTH_PT(z);
2389   nthreads    = NV_NUM_THREADS_PT(z);
2390   threads     = malloc(nthreads*sizeof(pthread_t));
2391   thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
2392 
2393   /* set thread attributes */
2394   pthread_attr_init(&attr);
2395   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
2396 
2397   for (i=0; i<nthreads; i++) {
2398     /* initialize thread data */
2399     N_VInitThreadData(&thread_data[i]);
2400 
2401     /* compute start and end loop index for thread */
2402     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
2403 
2404     /* pack thread data */
2405     thread_data[i].nvec  = nvec;
2406     thread_data[i].cvals = c;
2407     thread_data[i].Y1    = X;
2408     thread_data[i].x1    = z;
2409 
2410     /* create threads and call pthread companion function */
2411     pthread_create(&threads[i], &attr, N_VLinearCombination_PT, (void *) &thread_data[i]);
2412   }
2413 
2414   /* wait for all threads to finish */
2415   for (i=0; i<nthreads; i++) {
2416     pthread_join(threads[i], NULL);
2417   }
2418 
2419   /* clean up and return */
2420   pthread_attr_destroy(&attr);
2421   free(threads);
2422   free(thread_data);
2423 
2424   return(0);
2425 }
2426 
2427 
2428 /* -----------------------------------------------------------------------------
2429  * Pthread companion function to N_VLinearCombination
2430  */
2431 
N_VLinearCombination_PT(void * thread_data)2432 static void *N_VLinearCombination_PT(void *thread_data)
2433 {
2434   Pthreads_Data *my_data;
2435   sunindextype  j, start, end;
2436 
2437   int       i;
2438   realtype* c=NULL;
2439   realtype* xd=NULL;
2440   realtype* zd=NULL;
2441 
2442   /* extract thread data */
2443   my_data = (Pthreads_Data *) thread_data;
2444 
2445   start = my_data->start;
2446   end   = my_data->end;
2447 
2448   c  = my_data->cvals;
2449   zd = NV_DATA_PT(my_data->x1);
2450 
2451   /*
2452    * X[0] += c[i]*X[i], i = 1,...,nvec-1
2453    */
2454   if ((my_data->Y1[0] == my_data->x1) && (c[0] == ONE)) {
2455     for (i=1; i<my_data->nvec; i++) {
2456       xd = NV_DATA_PT(my_data->Y1[i]);
2457       for (j=start; j<end; j++) {
2458         zd[j] += c[i] * xd[j];
2459       }
2460     }
2461     pthread_exit(NULL);
2462   }
2463 
2464   /*
2465    * X[0] = c[0] * X[0] + sum{ c[i] * X[i] }, i = 1,...,nvec-1
2466    */
2467   if (my_data->Y1[0] == my_data->x1) {
2468     for (j=start; j<end; j++) {
2469       zd[j] *= c[0];
2470     }
2471     for (i=1; i<my_data->nvec; i++) {
2472       xd = NV_DATA_PT(my_data->Y1[i]);
2473       for (j=start; j<end; j++) {
2474         zd[j] += c[i] * xd[j];
2475       }
2476     }
2477     pthread_exit(NULL);
2478   }
2479 
2480   /*
2481    * z = sum{ c[i] * X[i] }, i = 0,...,nvec-1
2482    */
2483   xd = NV_DATA_PT(my_data->Y1[0]);
2484   for (j=start; j<end; j++) {
2485     zd[j] = c[0] * xd[j];
2486   }
2487   for (i=1; i<my_data->nvec; i++) {
2488     xd = NV_DATA_PT(my_data->Y1[i]);
2489     for (j=start; j<end; j++) {
2490       zd[j] += c[i] * xd[j];
2491     }
2492   }
2493   pthread_exit(NULL);
2494 }
2495 
2496 
2497 /* -----------------------------------------------------------------------------
2498  * Compute multiple linear sums Z[i] = Y[i] + a*x
2499  */
2500 
N_VScaleAddMulti_Pthreads(int nvec,realtype * a,N_Vector x,N_Vector * Y,N_Vector * Z)2501 int N_VScaleAddMulti_Pthreads(int nvec, realtype* a, N_Vector x, N_Vector* Y, N_Vector* Z)
2502 {
2503   sunindextype   N;
2504   int            i, nthreads;
2505   pthread_t      *threads;
2506   Pthreads_Data  *thread_data;
2507   pthread_attr_t attr;
2508 
2509   /* invalid number of vectors */
2510   if (nvec < 1) return(-1);
2511 
2512   /* should have called N_VLinearSum */
2513   if (nvec == 1) {
2514     N_VLinearSum_Pthreads(a[0], x, ONE, Y[0], Z[0]);
2515     return(0);
2516   }
2517 
2518   /* get vector length and data array */
2519   N           = NV_LENGTH_PT(x);
2520   nthreads    = NV_NUM_THREADS_PT(x);
2521   threads     = malloc(nthreads*sizeof(pthread_t));
2522   thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
2523 
2524   /* set thread attributes */
2525   pthread_attr_init(&attr);
2526   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
2527 
2528   for (i=0; i<nthreads; i++) {
2529     /* initialize thread data */
2530     N_VInitThreadData(&thread_data[i]);
2531 
2532     /* compute start and end loop index for thread */
2533     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
2534 
2535     /* pack thread data */
2536     thread_data[i].nvec  = nvec;
2537     thread_data[i].cvals = a;
2538     thread_data[i].x1    = x;
2539     thread_data[i].Y1    = Y;
2540     thread_data[i].Y2    = Z;
2541 
2542     /* create threads and call pthread companion function */
2543     pthread_create(&threads[i], &attr, N_VScaleAddMulti_PT, (void *) &thread_data[i]);
2544   }
2545 
2546   /* wait for all threads to finish */
2547   for (i=0; i<nthreads; i++) {
2548     pthread_join(threads[i], NULL);
2549   }
2550 
2551   /* clean up and return */
2552   pthread_attr_destroy(&attr);
2553   free(threads);
2554   free(thread_data);
2555 
2556   return(0);
2557 }
2558 
2559 
2560 /* -----------------------------------------------------------------------------
2561  * Pthread companion function to N_VScaleAddMulti
2562  */
2563 
N_VScaleAddMulti_PT(void * thread_data)2564 static void *N_VScaleAddMulti_PT(void *thread_data)
2565 {
2566   Pthreads_Data *my_data;
2567   sunindextype  j, start, end;
2568 
2569   int       i;
2570   realtype* a=NULL;
2571   realtype* xd=NULL;
2572   realtype* yd=NULL;
2573   realtype* zd=NULL;
2574 
2575   /* extract thread data */
2576   my_data = (Pthreads_Data *) thread_data;
2577 
2578   start = my_data->start;
2579   end   = my_data->end;
2580 
2581   a  = my_data->cvals;
2582   xd = NV_DATA_PT(my_data->x1);
2583 
2584   /*
2585    * Y[i][j] += a[i] * x[j]
2586    */
2587   if (my_data->Y1 == my_data->Y2) {
2588     for (i=0; i<my_data->nvec; i++) {
2589       yd = NV_DATA_PT(my_data->Y1[i]);
2590       for (j=start; j<end; j++) {
2591         yd[j] += a[i] * xd[j];
2592       }
2593     }
2594     pthread_exit(NULL);
2595   }
2596 
2597   /*
2598    * Z[i][j] = Y[i][j] + a[i] * x[j]
2599    */
2600   for (i=0; i<my_data->nvec; i++) {
2601     yd = NV_DATA_PT(my_data->Y1[i]);
2602     zd = NV_DATA_PT(my_data->Y2[i]);
2603     for (j=start; j<end; j++) {
2604       zd[j] = a[i] * xd[j] + yd[j];
2605     }
2606   }
2607   pthread_exit(NULL);
2608 }
2609 
2610 
2611 /* -----------------------------------------------------------------------------
2612  * Compute the dot product of a vector with multiple vectors, a[i] = sum(x*Y[i])
2613  */
2614 
N_VDotProdMulti_Pthreads(int nvec,N_Vector x,N_Vector * Y,realtype * dotprods)2615 int N_VDotProdMulti_Pthreads(int nvec, N_Vector x, N_Vector* Y, realtype* dotprods)
2616 {
2617   sunindextype    N;
2618   int             i, nthreads;
2619   pthread_t       *threads;
2620   Pthreads_Data   *thread_data;
2621   pthread_attr_t  attr;
2622   pthread_mutex_t global_mutex;
2623 
2624   /* invalid number of vectors */
2625   if (nvec < 1) return(-1);
2626 
2627   /* should have called N_VDotProd */
2628   if (nvec == 1) {
2629     dotprods[0] = N_VDotProd_Pthreads(x, Y[0]);
2630     return(0);
2631   }
2632 
2633   /* initialize output array */
2634   for (i=0; i<nvec; i++)
2635     dotprods[i] = ZERO;
2636 
2637   /* allocate threads and thread data structs */
2638   N           = NV_LENGTH_PT(x);
2639   nthreads    = NV_NUM_THREADS_PT(x);
2640   threads     = malloc(nthreads*sizeof(pthread_t));
2641   thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
2642 
2643   /* set thread attributes */
2644   pthread_attr_init(&attr);
2645   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
2646 
2647   /* lock for reduction */
2648   pthread_mutex_init(&global_mutex, NULL);
2649 
2650   for (i=0; i<nthreads; i++) {
2651     /* initialize thread data */
2652     N_VInitThreadData(&thread_data[i]);
2653 
2654     /* compute start and end loop index for thread */
2655     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
2656 
2657     /* pack thread data */
2658     thread_data[i].nvec  = nvec;
2659     thread_data[i].x1    = x;
2660     thread_data[i].Y1    = Y;
2661     thread_data[i].cvals = dotprods;
2662 
2663     thread_data[i].global_mutex = &global_mutex;
2664 
2665     /* create threads and call pthread companion function */
2666     pthread_create(&threads[i], &attr, N_VDotProdMulti_PT, (void *) &thread_data[i]);
2667   }
2668 
2669   /* wait for all threads to finish */
2670   for (i=0; i<nthreads; i++) {
2671     pthread_join(threads[i], NULL);
2672   }
2673 
2674   /* clean up and return */
2675   pthread_attr_destroy(&attr);
2676   pthread_mutex_destroy(&global_mutex);
2677   free(threads);
2678   free(thread_data);
2679 
2680   return(0);
2681 }
2682 
2683 
2684 /* -----------------------------------------------------------------------------
2685  * Pthread companion function to N_VDotProdMulti
2686  */
2687 
N_VDotProdMulti_PT(void * thread_data)2688 static void *N_VDotProdMulti_PT(void *thread_data)
2689 {
2690   Pthreads_Data   *my_data;
2691   sunindextype    j, start, end;
2692   pthread_mutex_t *lock;
2693 
2694   int       i;
2695   realtype  sum;
2696   realtype* dotprods=NULL;
2697   realtype* xd=NULL;
2698   realtype* yd=NULL;
2699 
2700   /* extract thread data */
2701   my_data = (Pthreads_Data *) thread_data;
2702 
2703   start = my_data->start;
2704   end   = my_data->end;
2705   lock  = my_data->global_mutex;
2706 
2707   xd       = NV_DATA_PT(my_data->x1);
2708   dotprods = my_data->cvals;
2709 
2710   /* compute multiple dot products */
2711   for (i=0; i<my_data->nvec; i++) {
2712     yd  = NV_DATA_PT(my_data->Y1[i]);
2713     sum = ZERO;
2714     for (j=start; j<end; j++) {
2715       sum += xd[j] * yd[j];
2716     }
2717     /* update global sum */
2718     pthread_mutex_lock(lock);
2719     dotprods[i] += sum;
2720     pthread_mutex_unlock(lock);
2721   }
2722 
2723   /* exit */
2724   pthread_exit(NULL);
2725 }
2726 
2727 
2728 /*
2729  * -----------------------------------------------------------------------------
2730  * vector array operations
2731  * -----------------------------------------------------------------------------
2732  */
2733 
2734 
2735 /* -----------------------------------------------------------------------------
2736  * Compute multiple linear sums Z[i] = a*X[i] + b*Y[i]
2737  */
2738 
N_VLinearSumVectorArray_Pthreads(int nvec,realtype a,N_Vector * X,realtype b,N_Vector * Y,N_Vector * Z)2739 int N_VLinearSumVectorArray_Pthreads(int nvec, realtype a, N_Vector* X,
2740                                      realtype b, N_Vector* Y,  N_Vector* Z)
2741 {
2742   sunindextype   N;
2743   int            i, nthreads;
2744   pthread_t      *threads;
2745   Pthreads_Data  *thread_data;
2746   pthread_attr_t attr;
2747 
2748   realtype    c;
2749   N_Vector*  V1;
2750   N_Vector*  V2;
2751   booleantype test;
2752 
2753   /* invalid number of vectors */
2754   if (nvec < 1) return(-1);
2755 
2756   /* should have called N_VLinearSum */
2757   if (nvec == 1) {
2758     N_VLinearSum_Pthreads(a, X[0], b, Y[0], Z[0]);
2759     return(0);
2760   }
2761 
2762   /* BLAS usage: axpy y <- ax+y */
2763   if ((b == ONE) && (Z == Y))
2764     return(VaxpyVectorArray_Pthreads(nvec, a, X, Y));
2765 
2766   /* BLAS usage: axpy x <- by+x */
2767   if ((a == ONE) && (Z == X))
2768     return(VaxpyVectorArray_Pthreads(nvec, b, Y, X));
2769 
2770   /* Case: a == b == 1.0 */
2771   if ((a == ONE) && (b == ONE))
2772     return(VSumVectorArray_Pthreads(nvec, X, Y, Z));
2773 
2774   /* Cases:                    */
2775   /*   (1) a == 1.0, b = -1.0, */
2776   /*   (2) a == -1.0, b == 1.0 */
2777   if ((test = ((a == ONE) && (b == -ONE))) || ((a == -ONE) && (b == ONE))) {
2778     V1 = test ? Y : X;
2779     V2 = test ? X : Y;
2780     return(VDiffVectorArray_Pthreads(nvec, V2, V1, Z));
2781   }
2782 
2783   /* Cases:                                                  */
2784   /*   (1) a == 1.0, b == other or 0.0,                      */
2785   /*   (2) a == other or 0.0, b == 1.0                       */
2786   /* if a or b is 0.0, then user should have called N_VScale */
2787   if ((test = (a == ONE)) || (b == ONE)) {
2788     c  = test ? b : a;
2789     V1 = test ? Y : X;
2790     V2 = test ? X : Y;
2791     return(VLin1VectorArray_Pthreads(nvec, c, V1, V2, Z));
2792   }
2793 
2794   /* Cases:                     */
2795   /*   (1) a == -1.0, b != 1.0, */
2796   /*   (2) a != 1.0, b == -1.0  */
2797   if ((test = (a == -ONE)) || (b == -ONE)) {
2798     c = test ? b : a;
2799     V1 = test ? Y : X;
2800     V2 = test ? X : Y;
2801     return(VLin2VectorArray_Pthreads(nvec, c, V1, V2, Z));
2802   }
2803 
2804   /* Case: a == b                                                         */
2805   /* catches case both a and b are 0.0 - user should have called N_VConst */
2806   if (a == b)
2807     return(VScaleSumVectorArray_Pthreads(nvec, a, X, Y, Z));
2808 
2809   /* Case: a == -b */
2810   if (a == -b)
2811     return(VScaleDiffVectorArray_Pthreads(nvec, a, X, Y, Z));
2812 
2813   /* Do all cases not handled above:                               */
2814   /*   (1) a == other, b == 0.0 - user should have called N_VScale */
2815   /*   (2) a == 0.0, b == other - user should have called N_VScale */
2816   /*   (3) a,b == other, a !=b, a != -b                            */
2817 
2818   /* get vector length and data array */
2819   N           = NV_LENGTH_PT(Z[0]);
2820   nthreads    = NV_NUM_THREADS_PT(Z[0]);
2821   threads     = malloc(nthreads*sizeof(pthread_t));
2822   thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
2823 
2824   /* set thread attributes */
2825   pthread_attr_init(&attr);
2826   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
2827 
2828   for (i=0; i<nthreads; i++) {
2829     /* initialize thread data */
2830     N_VInitThreadData(&thread_data[i]);
2831 
2832     /* compute start and end loop index for thread */
2833     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
2834 
2835     /* pack thread data */
2836     thread_data[i].nvec = nvec;
2837     thread_data[i].c1   = a;
2838     thread_data[i].c2   = b;
2839     thread_data[i].Y1   = X;
2840     thread_data[i].Y2   = Y;
2841     thread_data[i].Y3   = Z;
2842 
2843     /* create threads and call pthread companion function */
2844     pthread_create(&threads[i], &attr, N_VLinearSumVectorArray_PT,
2845                    (void *) &thread_data[i]);
2846   }
2847 
2848   /* wait for all threads to finish */
2849   for (i=0; i<nthreads; i++) {
2850     pthread_join(threads[i], NULL);
2851   }
2852 
2853   /* clean up and return */
2854   pthread_attr_destroy(&attr);
2855   free(threads);
2856   free(thread_data);
2857 
2858   return(0);
2859 }
2860 
2861 
2862 /* -----------------------------------------------------------------------------
2863  * Pthread companion function to N_VLinearSumVectorArray
2864  */
2865 
N_VLinearSumVectorArray_PT(void * thread_data)2866 static void *N_VLinearSumVectorArray_PT(void *thread_data)
2867 {
2868   Pthreads_Data *my_data;
2869   sunindextype  j, start, end;
2870 
2871   int       i;
2872   realtype  a, b;
2873   realtype* xd=NULL;
2874   realtype* yd=NULL;
2875   realtype* zd=NULL;
2876 
2877   /* extract thread data */
2878   my_data = (Pthreads_Data *) thread_data;
2879 
2880   start = my_data->start;
2881   end   = my_data->end;
2882 
2883   a = my_data->c1;
2884   b = my_data->c2;
2885 
2886   /* compute linear sum for each vector pair in vector arrays */
2887   for (i=0; i<my_data->nvec; i++) {
2888     xd = NV_DATA_PT(my_data->Y1[i]);
2889     yd = NV_DATA_PT(my_data->Y2[i]);
2890     zd = NV_DATA_PT(my_data->Y3[i]);
2891     for (j=start; j<end; j++) {
2892       zd[j] = a * xd[j] + b * yd[j];
2893     }
2894   }
2895 
2896   /* exit */
2897   pthread_exit(NULL);
2898 }
2899 
2900 
2901 /* -----------------------------------------------------------------------------
2902  * Scale multiple vectors Z[i] = c[i]*X[i]
2903  */
2904 
N_VScaleVectorArray_Pthreads(int nvec,realtype * c,N_Vector * X,N_Vector * Z)2905 int N_VScaleVectorArray_Pthreads(int nvec, realtype* c, N_Vector* X, N_Vector* Z)
2906 {
2907   sunindextype   N;
2908   int            i, nthreads;
2909   pthread_t      *threads;
2910   Pthreads_Data  *thread_data;
2911   pthread_attr_t attr;
2912 
2913   /* invalid number of vectors */
2914   if (nvec < 1) return(-1);
2915 
2916   /* should have called N_VScale */
2917   if (nvec == 1) {
2918     N_VScale_Pthreads(c[0], X[0], Z[0]);
2919     return(0);
2920   }
2921 
2922   /* get vector length and data array */
2923   N           = NV_LENGTH_PT(Z[0]);
2924   nthreads    = NV_NUM_THREADS_PT(Z[0]);
2925   threads     = malloc(nthreads*sizeof(pthread_t));
2926   thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
2927 
2928   /* set thread attributes */
2929   pthread_attr_init(&attr);
2930   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
2931 
2932   for (i=0; i<nthreads; i++) {
2933     /* initialize thread data */
2934     N_VInitThreadData(&thread_data[i]);
2935 
2936     /* compute start and end loop index for thread */
2937     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
2938 
2939     /* pack thread data */
2940     thread_data[i].nvec  = nvec;
2941     thread_data[i].cvals = c;
2942     thread_data[i].Y1    = X;
2943     thread_data[i].Y2    = Z;
2944 
2945     /* create threads and call pthread companion function */
2946     pthread_create(&threads[i], &attr, N_VScaleVectorArray_PT,
2947                    (void *) &thread_data[i]);
2948   }
2949 
2950   /* wait for all threads to finish */
2951   for (i=0; i<nthreads; i++) {
2952     pthread_join(threads[i], NULL);
2953   }
2954 
2955   /* clean up and return */
2956   pthread_attr_destroy(&attr);
2957   free(threads);
2958   free(thread_data);
2959 
2960   return(0);
2961 }
2962 
2963 
2964 /* -----------------------------------------------------------------------------
2965  * Pthread companion function to N_VScaleVectorArray
2966  */
2967 
N_VScaleVectorArray_PT(void * thread_data)2968 static void *N_VScaleVectorArray_PT(void *thread_data)
2969 {
2970   Pthreads_Data *my_data;
2971   sunindextype  j, start, end;
2972 
2973   int       i;
2974   realtype* c;
2975   realtype* xd=NULL;
2976   realtype* zd=NULL;
2977 
2978   /* extract thread data */
2979   my_data = (Pthreads_Data *) thread_data;
2980 
2981   start = my_data->start;
2982   end   = my_data->end;
2983 
2984   c = my_data->cvals;
2985 
2986   /*
2987    * X[i] *= c[i]
2988    */
2989   if (my_data->Y1 == my_data->Y2) {
2990     for (i=0; i<my_data->nvec; i++) {
2991       xd = NV_DATA_PT(my_data->Y1[i]);
2992       for (j=start; j<end; j++) {
2993         xd[j] *= c[i];
2994       }
2995     }
2996     pthread_exit(NULL);
2997   }
2998 
2999   /*
3000    * Z[i] = c[i] * X[i]
3001    */
3002   for (i=0; i<my_data->nvec; i++) {
3003     xd = NV_DATA_PT(my_data->Y1[i]);
3004     zd = NV_DATA_PT(my_data->Y2[i]);
3005     for (j=start; j<end; j++) {
3006       zd[j] = c[i] * xd[j];
3007     }
3008   }
3009   pthread_exit(NULL);
3010 }
3011 
3012 
3013 /* -----------------------------------------------------------------------------
3014  * Set multiple vectors to a constant value Z[i] = c
3015  */
3016 
N_VConstVectorArray_Pthreads(int nvec,realtype c,N_Vector * Z)3017 int N_VConstVectorArray_Pthreads(int nvec, realtype c, N_Vector* Z)
3018 {
3019   sunindextype   N;
3020   int            i, nthreads;
3021   pthread_t      *threads;
3022   Pthreads_Data  *thread_data;
3023   pthread_attr_t attr;
3024 
3025   /* invalid number of vectors */
3026   if (nvec < 1) return(-1);
3027 
3028   /* should have called N_VConst */
3029   if (nvec == 1) {
3030     N_VConst_Pthreads(c, Z[0]);
3031     return(0);
3032   }
3033 
3034   /* get vector length and data array */
3035   N           = NV_LENGTH_PT(Z[0]);
3036   nthreads    = NV_NUM_THREADS_PT(Z[0]);
3037   threads     = malloc(nthreads*sizeof(pthread_t));
3038   thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
3039 
3040   /* set thread attributes */
3041   pthread_attr_init(&attr);
3042   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
3043 
3044   for (i=0; i<nthreads; i++) {
3045     /* initialize thread data */
3046     N_VInitThreadData(&thread_data[i]);
3047 
3048     /* compute start and end loop index for thread */
3049     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
3050 
3051     /* pack thread data */
3052     thread_data[i].nvec = nvec;
3053     thread_data[i].c1   = c;
3054     thread_data[i].Y1   = Z;
3055 
3056     /* create threads and call pthread companion function */
3057     pthread_create(&threads[i], &attr, N_VConstVectorArray_PT,
3058                    (void *) &thread_data[i]);
3059   }
3060 
3061   /* wait for all threads to finish */
3062   for (i=0; i<nthreads; i++) {
3063     pthread_join(threads[i], NULL);
3064   }
3065 
3066   /* clean up and return */
3067   pthread_attr_destroy(&attr);
3068   free(threads);
3069   free(thread_data);
3070 
3071   return(0);
3072 }
3073 
3074 
3075 /* -----------------------------------------------------------------------------
3076  * Pthread companion function to N_VConstVectorArray
3077  */
3078 
N_VConstVectorArray_PT(void * thread_data)3079 static void *N_VConstVectorArray_PT(void *thread_data)
3080 {
3081   Pthreads_Data *my_data;
3082   sunindextype  j, start, end;
3083 
3084   int       i;
3085   realtype* zd=NULL;
3086 
3087   /* extract thread data */
3088   my_data = (Pthreads_Data *) thread_data;
3089 
3090   start = my_data->start;
3091   end   = my_data->end;
3092 
3093   /* set each vector in the vector array to a constant */
3094   for (i=0; i<my_data->nvec; i++) {
3095     zd = NV_DATA_PT(my_data->Y1[i]);
3096     for (j=start; j<end; j++) {
3097       zd[j] = my_data->c1;
3098     }
3099   }
3100 
3101   /* exit */
3102   pthread_exit(NULL);
3103 }
3104 
3105 
3106 /* ----------------------------------------------------------------------------
3107  * Compute the weighted root mean square norm of multiple vectors
3108  */
3109 
N_VWrmsNormVectorArray_Pthreads(int nvec,N_Vector * X,N_Vector * W,realtype * nrm)3110 int N_VWrmsNormVectorArray_Pthreads(int nvec, N_Vector* X, N_Vector* W, realtype* nrm)
3111 {
3112   sunindextype    N;
3113   int             i, nthreads;
3114   pthread_t       *threads;
3115   Pthreads_Data   *thread_data;
3116   pthread_attr_t  attr;
3117   pthread_mutex_t global_mutex;
3118 
3119   /* invalid number of vectors */
3120   if (nvec < 1) return(-1);
3121 
3122   /* should have called N_VWrmsNorm */
3123   if (nvec == 1) {
3124     nrm[0] = N_VWrmsNorm_Pthreads(X[0], W[0]);
3125     return(0);
3126   }
3127 
3128   /* initialize output array */
3129   for (i=0; i<nvec; i++)
3130     nrm[i] = ZERO;
3131 
3132   /* allocate threads and thread data structs */
3133   N           = NV_LENGTH_PT(X[0]);
3134   nthreads    = NV_NUM_THREADS_PT(X[0]);
3135   threads     = malloc(nthreads*sizeof(pthread_t));
3136   thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
3137 
3138   /* set thread attributes */
3139   pthread_attr_init(&attr);
3140   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
3141 
3142   /* lock for reduction */
3143   pthread_mutex_init(&global_mutex, NULL);
3144 
3145   for (i=0; i<nthreads; i++) {
3146     /* initialize thread data */
3147     N_VInitThreadData(&thread_data[i]);
3148 
3149     /* compute start and end loop index for thread */
3150     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
3151 
3152     /* pack thread data */
3153     thread_data[i].nvec  = nvec;
3154     thread_data[i].Y1    = X;
3155     thread_data[i].Y2    = W;
3156     thread_data[i].cvals = nrm;
3157 
3158     thread_data[i].global_mutex = &global_mutex;
3159 
3160     /* create threads and call pthread companion function */
3161     pthread_create(&threads[i], &attr, N_VWrmsNormVectorArray_PT,
3162                    (void *) &thread_data[i]);
3163   }
3164 
3165   /* wait for all threads to finish */
3166   for (i=0; i<nthreads; i++) {
3167     pthread_join(threads[i], NULL);
3168   }
3169 
3170   /* finalize wrms calculation */
3171   for (i=0; i<nvec; i++)
3172     nrm[i] = SUNRsqrt(nrm[i]/N);
3173 
3174   /* clean up and return */
3175   pthread_attr_destroy(&attr);
3176   pthread_mutex_destroy(&global_mutex);
3177   free(threads);
3178   free(thread_data);
3179 
3180   return(0);
3181 }
3182 
3183 
3184 /* ----------------------------------------------------------------------------
3185  * Pthread companion function to N_VWrmsNormVectorArray
3186  */
3187 
N_VWrmsNormVectorArray_PT(void * thread_data)3188 static void *N_VWrmsNormVectorArray_PT(void *thread_data)
3189 {
3190   Pthreads_Data   *my_data;
3191   sunindextype    j, start, end;
3192   pthread_mutex_t *lock;
3193 
3194   int       i;
3195   realtype  sum;
3196   realtype* nrm=NULL;
3197   realtype* xd=NULL;
3198   realtype* wd=NULL;
3199 
3200   /* extract thread data */
3201   my_data = (Pthreads_Data *) thread_data;
3202 
3203   start = my_data->start;
3204   end   = my_data->end;
3205   lock  = my_data->global_mutex;
3206 
3207   nrm = my_data->cvals;
3208 
3209   /* compute the WRMS norm for each vector in the vector array */
3210   for (i=0; i<my_data->nvec; i++) {
3211     xd = NV_DATA_PT(my_data->Y1[i]);
3212     wd = NV_DATA_PT(my_data->Y2[i]);
3213     sum = ZERO;
3214     for (j=start; j<end; j++) {
3215       sum += SUNSQR(xd[j] * wd[j]);
3216     }
3217     /* update global sum */
3218     pthread_mutex_lock(lock);
3219     nrm[i] += sum;
3220     pthread_mutex_unlock(lock);
3221   }
3222 
3223   /* exit */
3224   pthread_exit(NULL);
3225 }
3226 
3227 
3228 /* ----------------------------------------------------------------------------
3229  * Compute the weighted root mean square norm of multiple vectors
3230  */
3231 
N_VWrmsNormMaskVectorArray_Pthreads(int nvec,N_Vector * X,N_Vector * W,N_Vector id,realtype * nrm)3232 int N_VWrmsNormMaskVectorArray_Pthreads(int nvec, N_Vector* X, N_Vector* W,
3233                                         N_Vector id, realtype* nrm)
3234 {
3235   sunindextype    N;
3236   int             i, nthreads;
3237   pthread_t       *threads;
3238   Pthreads_Data   *thread_data;
3239   pthread_attr_t  attr;
3240   pthread_mutex_t global_mutex;
3241 
3242   /* invalid number of vectors */
3243   if (nvec < 1) return(-1);
3244 
3245   /* should have called N_VWrmsNorm */
3246   if (nvec == 1) {
3247     nrm[0] = N_VWrmsNormMask_Pthreads(X[0], W[0], id);
3248     return(0);
3249   }
3250 
3251   /* initialize output array */
3252   for (i=0; i<nvec; i++)
3253     nrm[i] = ZERO;
3254 
3255   /* allocate threads and thread data structs */
3256   N           = NV_LENGTH_PT(X[0]);
3257   nthreads    = NV_NUM_THREADS_PT(X[0]);
3258   threads     = malloc(nthreads*sizeof(pthread_t));
3259   thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
3260 
3261   /* set thread attributes */
3262   pthread_attr_init(&attr);
3263   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
3264 
3265   /* lock for reduction */
3266   pthread_mutex_init(&global_mutex, NULL);
3267 
3268   for (i=0; i<nthreads; i++) {
3269     /* initialize thread data */
3270     N_VInitThreadData(&thread_data[i]);
3271 
3272     /* compute start and end loop index for thread */
3273     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
3274 
3275     /* pack thread data */
3276     thread_data[i].nvec  = nvec;
3277     thread_data[i].Y1    = X;
3278     thread_data[i].Y2    = W;
3279     thread_data[i].x1    = id;
3280     thread_data[i].cvals = nrm;
3281 
3282     thread_data[i].global_mutex = &global_mutex;
3283 
3284     /* create threads and call pthread companion function */
3285     pthread_create(&threads[i], &attr, N_VWrmsNormMaskVectorArray_PT,
3286                    (void *) &thread_data[i]);
3287   }
3288 
3289   /* wait for all threads to finish */
3290   for (i=0; i<nthreads; i++) {
3291     pthread_join(threads[i], NULL);
3292   }
3293 
3294   /* finalize wrms calculation */
3295   for (i=0; i<nvec; i++)
3296     nrm[i] = SUNRsqrt(nrm[i]/N);
3297 
3298   /* clean up and return */
3299   pthread_attr_destroy(&attr);
3300   pthread_mutex_destroy(&global_mutex);
3301   free(threads);
3302   free(thread_data);
3303 
3304   return(0);
3305 }
3306 
3307 
3308 /* ----------------------------------------------------------------------------
3309  * Pthread companion function to N_VWrmsNormVectorArray
3310  */
3311 
N_VWrmsNormMaskVectorArray_PT(void * thread_data)3312 static void *N_VWrmsNormMaskVectorArray_PT(void *thread_data)
3313 {
3314   Pthreads_Data   *my_data;
3315   sunindextype    j, start, end;
3316   pthread_mutex_t *lock;
3317 
3318   int       i;
3319   realtype  sum;
3320   realtype* nrm=NULL;
3321   realtype* xd=NULL;
3322   realtype* wd=NULL;
3323   realtype* idd=NULL;
3324 
3325   /* extract thread data */
3326   my_data = (Pthreads_Data *) thread_data;
3327 
3328   start = my_data->start;
3329   end   = my_data->end;
3330   lock  = my_data->global_mutex;
3331 
3332   nrm = my_data->cvals;
3333   idd = NV_DATA_PT(my_data->x1);
3334 
3335   /* compute the WRMS norm for each vector in the vector array */
3336   for (i=0; i<my_data->nvec; i++) {
3337     xd = NV_DATA_PT(my_data->Y1[i]);
3338     wd = NV_DATA_PT(my_data->Y2[i]);
3339     sum = ZERO;
3340     for (j=start; j<end; j++) {
3341       if (idd[j] > ZERO)
3342         sum += SUNSQR(xd[j] * wd[j]);
3343     }
3344     /* update global sum */
3345     pthread_mutex_lock(lock);
3346     nrm[i] += sum;
3347     pthread_mutex_unlock(lock);
3348   }
3349 
3350   /* exit */
3351   pthread_exit(NULL);
3352 }
3353 
3354 
3355 /* -----------------------------------------------------------------------------
3356  * Scale and add a vector to multiple vectors Z = Y + a*X
3357  */
3358 
N_VScaleAddMultiVectorArray_Pthreads(int nvec,int nsum,realtype * a,N_Vector * X,N_Vector ** Y,N_Vector ** Z)3359 int N_VScaleAddMultiVectorArray_Pthreads(int nvec, int nsum, realtype* a,
3360                                           N_Vector* X, N_Vector** Y, N_Vector** Z)
3361 {
3362   sunindextype   N;
3363   int            i, j, nthreads;
3364   pthread_t      *threads;
3365   Pthreads_Data  *thread_data;
3366   pthread_attr_t attr;
3367 
3368   int          retval;
3369   N_Vector*   YY;
3370   N_Vector*   ZZ;
3371 
3372   /* invalid number of vectors */
3373   if (nvec < 1) return(-1);
3374   if (nsum < 1) return(-1);
3375 
3376   /* ---------------------------
3377    * Special cases for nvec == 1
3378    * --------------------------- */
3379 
3380   if (nvec == 1) {
3381 
3382     /* should have called N_VLinearSum */
3383     if (nsum == 1) {
3384       N_VLinearSum_Pthreads(a[0], X[0], ONE, Y[0][0], Z[0][0]);
3385       return(0);
3386     }
3387 
3388     /* should have called N_VScaleAddMulti */
3389     YY = (N_Vector*) malloc(nsum * sizeof(N_Vector));
3390     ZZ = (N_Vector*) malloc(nsum * sizeof(N_Vector));
3391 
3392     for (j=0; j<nsum; j++) {
3393       YY[j] = Y[j][0];
3394       ZZ[j] = Z[j][0];
3395     }
3396 
3397     retval = N_VScaleAddMulti_Pthreads(nsum, a, X[0], YY, ZZ);
3398 
3399     free(YY);
3400     free(ZZ);
3401     return(retval);
3402   }
3403 
3404   /* --------------------------
3405    * Special cases for nvec > 1
3406    * -------------------------- */
3407 
3408   /* should have called N_VLinearSumVectorArray */
3409   if (nsum == 1) {
3410     retval = N_VLinearSumVectorArray_Pthreads(nvec, a[0], X, ONE, Y[0], Z[0]);
3411     return(retval);
3412   }
3413 
3414   /* ----------------------------
3415    * Compute multiple linear sums
3416    * ---------------------------- */
3417 
3418   /* get vector length and data array */
3419   N           = NV_LENGTH_PT(X[0]);
3420   nthreads    = NV_NUM_THREADS_PT(X[0]);
3421   threads     = malloc(nthreads*sizeof(pthread_t));
3422   thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
3423 
3424   /* set thread attributes */
3425   pthread_attr_init(&attr);
3426   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
3427 
3428   for (i=0; i<nthreads; i++) {
3429     /* initialize thread data */
3430     N_VInitThreadData(&thread_data[i]);
3431 
3432     /* compute start and end loop index for thread */
3433     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
3434 
3435     /* pack thread data */
3436     thread_data[i].nvec  = nvec;
3437     thread_data[i].nsum  = nsum;
3438     thread_data[i].cvals = a;
3439     thread_data[i].Y1    = X;
3440     thread_data[i].ZZ1   = Y;
3441     thread_data[i].ZZ2   = Z;
3442 
3443     /* create threads and call pthread companion function */
3444     pthread_create(&threads[i], &attr, N_VScaleAddMultiVectorArray_PT,
3445                    (void *) &thread_data[i]);
3446   }
3447 
3448   /* wait for all threads to finish */
3449   for (i=0; i<nthreads; i++) {
3450     pthread_join(threads[i], NULL);
3451   }
3452 
3453   /* clean up and return */
3454   pthread_attr_destroy(&attr);
3455   free(threads);
3456   free(thread_data);
3457 
3458   return(0);
3459 }
3460 
3461 
3462 /* -----------------------------------------------------------------------------
3463  * Pthread companion function to N_VScaleAddMultiVectorArray
3464  */
3465 
N_VScaleAddMultiVectorArray_PT(void * thread_data)3466 static void *N_VScaleAddMultiVectorArray_PT(void *thread_data)
3467 {
3468   Pthreads_Data *my_data;
3469   sunindextype  k, start, end;
3470 
3471   int       i, j;
3472   realtype* a=NULL;
3473   realtype* xd=NULL;
3474   realtype* yd=NULL;
3475   realtype* zd=NULL;
3476 
3477   /* extract thread data */
3478   my_data = (Pthreads_Data *) thread_data;
3479 
3480   start = my_data->start;
3481   end   = my_data->end;
3482 
3483   a = my_data->cvals;
3484 
3485   /*
3486    * Y[i][j] += a[i] * x[j]
3487    */
3488   if (my_data->ZZ1 == my_data->ZZ2) {
3489     for (i=0; i<my_data->nvec; i++) {
3490       xd = NV_DATA_PT(my_data->Y1[i]);
3491       for (j=0; j<my_data->nsum; j++){
3492         yd = NV_DATA_PT(my_data->ZZ1[j][i]);
3493         for (k=start; k<end; k++) {
3494           yd[k] += a[j] * xd[k];
3495         }
3496       }
3497     }
3498     pthread_exit(NULL);
3499   }
3500 
3501   /*
3502    * Z[i][j] = Y[i][j] + a[i] * x[j]
3503    */
3504   for (i=0; i<my_data->nvec; i++) {
3505     xd = NV_DATA_PT(my_data->Y1[i]);
3506     for (j=0; j<my_data->nsum; j++){
3507       yd = NV_DATA_PT(my_data->ZZ1[j][i]);
3508       zd = NV_DATA_PT(my_data->ZZ2[j][i]);
3509       for (k=start; k<end; k++) {
3510         zd[k] = a[j] * xd[k] + yd[k];
3511       }
3512     }
3513   }
3514   pthread_exit(NULL);
3515 }
3516 
3517 
3518 /* -----------------------------------------------------------------------------
3519  * Compute a linear combination for multiple vectors
3520  */
3521 
N_VLinearCombinationVectorArray_Pthreads(int nvec,int nsum,realtype * c,N_Vector ** X,N_Vector * Z)3522 int N_VLinearCombinationVectorArray_Pthreads(int nvec, int nsum, realtype* c,
3523                                            N_Vector** X, N_Vector* Z)
3524 {
3525   sunindextype   N;
3526   int            i, j, nthreads;
3527   pthread_t      *threads;
3528   Pthreads_Data  *thread_data;
3529   pthread_attr_t attr;
3530 
3531   int          retval;
3532   realtype*    ctmp;
3533   N_Vector*   Y;
3534 
3535   /* invalid number of vectors */
3536   if (nvec < 1) return(-1);
3537   if (nsum < 1) return(-1);
3538 
3539   /* ---------------------------
3540    * Special cases for nvec == 1
3541    * --------------------------- */
3542 
3543   if (nvec == 1) {
3544 
3545     /* should have called N_VScale */
3546     if (nsum == 1) {
3547       N_VScale_Pthreads(c[0], X[0][0], Z[0]);
3548       return(0);
3549     }
3550 
3551     /* should have called N_VLinearSum */
3552     if (nsum == 2) {
3553       N_VLinearSum_Pthreads(c[0], X[0][0], c[1], X[1][0], Z[0]);
3554       return(0);
3555     }
3556 
3557     /* should have called N_VLinearCombination */
3558     Y = (N_Vector*) malloc(nsum * sizeof(N_Vector));
3559 
3560     for (i=0; i<nsum; i++) {
3561       Y[i] = X[i][0];
3562     }
3563 
3564     retval = N_VLinearCombination_Pthreads(nsum, c, Y, Z[0]);
3565 
3566     free(Y);
3567     return(retval);
3568   }
3569 
3570   /* --------------------------
3571    * Special cases for nvec > 1
3572    * -------------------------- */
3573 
3574   /* should have called N_VScaleVectorArray */
3575   if (nsum == 1) {
3576 
3577     ctmp = (realtype*) malloc(nvec * sizeof(realtype));
3578 
3579     for (j=0; j<nvec; j++) {
3580       ctmp[j] = c[0];
3581     }
3582 
3583     retval = N_VScaleVectorArray_Pthreads(nvec, ctmp, X[0], Z);
3584 
3585     free(ctmp);
3586     return(retval);
3587   }
3588 
3589   /* should have called N_VLinearSumVectorArray */
3590   if (nsum == 2) {
3591     retval = N_VLinearSumVectorArray_Pthreads(nvec, c[0], X[0], c[1], X[1], Z);
3592     return(retval);
3593   }
3594 
3595   /* --------------------------
3596    * Compute linear combination
3597    * -------------------------- */
3598 
3599   /* get vector length and data array */
3600   N           = NV_LENGTH_PT(Z[0]);
3601   nthreads    = NV_NUM_THREADS_PT(Z[0]);
3602   threads     = malloc(nthreads*sizeof(pthread_t));
3603   thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
3604 
3605   /* set thread attributes */
3606   pthread_attr_init(&attr);
3607   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
3608 
3609   for (i=0; i<nthreads; i++) {
3610     /* initialize thread data */
3611     N_VInitThreadData(&thread_data[i]);
3612 
3613     /* compute start and end loop index for thread */
3614     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
3615 
3616     /* pack thread data */
3617     thread_data[i].nvec  = nvec;
3618     thread_data[i].nsum  = nsum;
3619     thread_data[i].cvals = c;
3620     thread_data[i].ZZ1   = X;
3621     thread_data[i].Y1    = Z;
3622 
3623     /* create threads and call pthread companion function */
3624     pthread_create(&threads[i], &attr, N_VLinearCombinationVectorArray_PT,
3625                    (void *) &thread_data[i]);
3626   }
3627 
3628   /* wait for all threads to finish */
3629   for (i=0; i<nthreads; i++) {
3630     pthread_join(threads[i], NULL);
3631   }
3632 
3633   /* clean up and return */
3634   pthread_attr_destroy(&attr);
3635   free(threads);
3636   free(thread_data);
3637 
3638   return(0);
3639 }
3640 
3641 
3642 /* -----------------------------------------------------------------------------
3643  * Pthread companion function to N_VLinearCombinationVectorArray
3644  */
3645 
N_VLinearCombinationVectorArray_PT(void * thread_data)3646 static void *N_VLinearCombinationVectorArray_PT(void *thread_data)
3647 {
3648   Pthreads_Data *my_data;
3649   sunindextype  k, start, end;
3650 
3651   int       i; /* vector arrays index in summation [0,nsum) */
3652   int       j; /* vector index in vector array     [0,nvec) */
3653   realtype* c=NULL;
3654   realtype* zd=NULL;
3655   realtype* xd=NULL;
3656 
3657   /* extract thread data */
3658   my_data = (Pthreads_Data *) thread_data;
3659 
3660   start = my_data->start;
3661   end   = my_data->end;
3662 
3663   c = my_data->cvals;
3664 
3665   /*
3666    * X[0][j] += c[i]*X[i][j], i = 1,...,nvec-1
3667    */
3668   if ((my_data->ZZ1[0] == my_data->Y1) && (c[0] == ONE)) {
3669     for (j=0; j<my_data->nvec; j++) {
3670       zd = NV_DATA_PT(my_data->Y1[j]);
3671       for (i=1; i<my_data->nsum; i++) {
3672         xd = NV_DATA_PT(my_data->ZZ1[i][j]);
3673         for (k=start; k<end; k++) {
3674           zd[k] += c[i] * xd[k];
3675         }
3676       }
3677     }
3678     pthread_exit(NULL);
3679   }
3680 
3681   /*
3682    * X[0][j] = c[0] * X[0][j] + sum{ c[i] * X[i][j] }, i = 1,...,nvec-1
3683    */
3684   if (my_data->ZZ1[0] == my_data->Y1) {
3685     for (j=0; j<my_data->nvec; j++) {
3686       zd = NV_DATA_PT(my_data->Y1[j]);
3687       for (k=start; k<end; k++) {
3688         zd[k] *= c[0];
3689       }
3690       for (i=1; i<my_data->nsum; i++) {
3691         xd = NV_DATA_PT(my_data->ZZ1[i][j]);
3692         for (k=start; k<end; k++) {
3693           zd[k] += c[i] * xd[k];
3694         }
3695       }
3696     }
3697     pthread_exit(NULL);
3698   }
3699 
3700   /*
3701    * Z[j] = sum{ c[i] * X[i][j] }, i = 0,...,nvec-1
3702    */
3703   for (j=0; j<my_data->nvec; j++) {
3704     xd = NV_DATA_PT(my_data->ZZ1[0][j]);
3705     zd = NV_DATA_PT(my_data->Y1[j]);
3706     for (k=start; k<end; k++) {
3707       zd[k] = c[0] * xd[k];
3708     }
3709     for (i=1; i<my_data->nsum; i++) {
3710       xd = NV_DATA_PT(my_data->ZZ1[i][j]);
3711       for (k=start; k<end; k++) {
3712         zd[k] += c[i] * xd[k];
3713       }
3714     }
3715   }
3716   pthread_exit(NULL);
3717 }
3718 
3719 
3720 /*
3721  * -----------------------------------------------------------------
3722  * OPTIONAL XBraid interface operations
3723  * -----------------------------------------------------------------
3724  */
3725 
3726 
3727 
3728 /* -----------------------------------------------------------------------------
3729  * Set buffer size
3730  */
3731 
N_VBufSize_Pthreads(N_Vector x,sunindextype * size)3732 int N_VBufSize_Pthreads(N_Vector x, sunindextype *size)
3733 {
3734   if (x == NULL) return(-1);
3735   *size = NV_LENGTH_PT(x) * ((sunindextype)sizeof(realtype));
3736   return(0);
3737 }
3738 
3739 
3740 /* -----------------------------------------------------------------------------
3741  * Pack butter
3742  */
3743 
N_VBufPack_Pthreads(N_Vector x,void * buf)3744 int N_VBufPack_Pthreads(N_Vector x, void *buf)
3745 {
3746   sunindextype   N;
3747   int            i, nthreads;
3748   pthread_t      *threads;
3749   Pthreads_Data  *thread_data;
3750   pthread_attr_t attr;
3751 
3752   if (x == NULL || buf == NULL) return(-1);
3753 
3754   /* allocate threads and thread data structs */
3755   N           = NV_LENGTH_PT(x);
3756   nthreads    = NV_NUM_THREADS_PT(x);
3757   threads     = malloc(nthreads*sizeof(pthread_t));
3758   thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
3759 
3760   /* set thread attributes */
3761   pthread_attr_init(&attr);
3762   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
3763 
3764   for (i=0; i<nthreads; i++) {
3765     /* initialize thread data */
3766     N_VInitThreadData(&thread_data[i]);
3767 
3768     /* compute start and end loop index for thread */
3769     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
3770 
3771     /* pack thread data */
3772     thread_data[i].v1 = NV_DATA_PT(x);
3773     thread_data[i].v2 = (realtype*)buf;
3774 
3775     /* create threads and call pthread companion function */
3776     pthread_create(&threads[i], &attr, VBufPack_PT, (void *) &thread_data[i]);
3777   }
3778 
3779   /* wait for all threads to finish */
3780   for (i=0; i<nthreads; i++) {
3781     pthread_join(threads[i], NULL);
3782   }
3783 
3784   /* clean up */
3785   pthread_attr_destroy(&attr);
3786   free(threads);
3787   free(thread_data);
3788 
3789   return(0);
3790 }
3791 
3792 
3793 /* -----------------------------------------------------------------------------
3794  * Pthread companion function to N_VBufPack
3795  */
3796 
3797 
VBufPack_PT(void * thread_data)3798 static void *VBufPack_PT(void *thread_data)
3799 {
3800   sunindextype  i, start, end;
3801   realtype      *xd, *bd;
3802   Pthreads_Data *my_data;
3803 
3804   /* extract thread data */
3805   my_data = (Pthreads_Data *) thread_data;
3806 
3807   xd = my_data->v1;
3808   bd = my_data->v2;
3809 
3810   start = my_data->start;
3811   end   = my_data->end;
3812 
3813   /* pack the buffer */
3814   for (i = start; i < end; i++)
3815     bd[i] = xd[i];
3816 
3817   /* exit */
3818   pthread_exit(NULL);
3819 }
3820 
3821 
3822 /* -----------------------------------------------------------------------------
3823  * Unpack butter
3824  */
3825 
N_VBufUnpack_Pthreads(N_Vector x,void * buf)3826 int N_VBufUnpack_Pthreads(N_Vector x, void *buf)
3827 {
3828   sunindextype   N;
3829   int            i, nthreads;
3830   pthread_t      *threads;
3831   Pthreads_Data  *thread_data;
3832   pthread_attr_t attr;
3833 
3834   if (x == NULL || buf == NULL) return(-1);
3835 
3836   /* allocate threads and thread data structs */
3837   N           = NV_LENGTH_PT(x);
3838   nthreads    = NV_NUM_THREADS_PT(x);
3839   threads     = malloc(nthreads*sizeof(pthread_t));
3840   thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
3841 
3842   /* set thread attributes */
3843   pthread_attr_init(&attr);
3844   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
3845 
3846   for (i=0; i<nthreads; i++) {
3847     /* initialize thread data */
3848     N_VInitThreadData(&thread_data[i]);
3849 
3850     /* compute start and end loop index for thread */
3851     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
3852 
3853     /* pack thread data */
3854     thread_data[i].v1 = NV_DATA_PT(x);
3855     thread_data[i].v2 = (realtype*)buf;
3856 
3857     /* create threads and call pthread companion function */
3858     pthread_create(&threads[i], &attr, VBufUnpack_PT, (void *) &thread_data[i]);
3859   }
3860 
3861   /* wait for all threads to finish */
3862   for (i=0; i<nthreads; i++) {
3863     pthread_join(threads[i], NULL);
3864   }
3865 
3866   /* clean up */
3867   pthread_attr_destroy(&attr);
3868   free(threads);
3869   free(thread_data);
3870 
3871   return(0);
3872 }
3873 
3874 
3875 /* -----------------------------------------------------------------------------
3876  * Pthread companion function to N_VBufUnpack
3877  */
3878 
3879 
VBufUnpack_PT(void * thread_data)3880 static void *VBufUnpack_PT(void *thread_data)
3881 {
3882   sunindextype  i, start, end;
3883   realtype      *xd, *bd;
3884   Pthreads_Data *my_data;
3885 
3886   /* extract thread data */
3887   my_data = (Pthreads_Data *) thread_data;
3888 
3889   xd = my_data->v1;
3890   bd = my_data->v2;
3891 
3892   start = my_data->start;
3893   end   = my_data->end;
3894 
3895   /* unpack the buffer */
3896   for (i = start; i < end; i++)
3897     xd[i] = bd[i];
3898 
3899   /* exit */
3900   pthread_exit(NULL);
3901 }
3902 
3903 
3904 /*
3905  * -----------------------------------------------------------------
3906  * private functions for special cases of vector operations
3907  * -----------------------------------------------------------------
3908  */
3909 
3910 
3911 /* ----------------------------------------------------------------------------
3912  * Copy vector components into second vector
3913  */
3914 
VCopy_Pthreads(N_Vector x,N_Vector z)3915 static void VCopy_Pthreads(N_Vector x, N_Vector z)
3916 {
3917   sunindextype      N;
3918   int           i, nthreads;
3919   pthread_t     *threads;
3920   Pthreads_Data *thread_data;
3921   pthread_attr_t attr;
3922 
3923   /* allocate threads and thread data structs */
3924   N            = NV_LENGTH_PT(x);
3925   nthreads     = NV_NUM_THREADS_PT(x);
3926   threads      = malloc(nthreads*sizeof(pthread_t));
3927   thread_data  = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
3928 
3929   /* set thread attributes */
3930   pthread_attr_init(&attr);
3931   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
3932 
3933   for (i=0; i<nthreads; i++) {
3934     /* initialize thread data */
3935     N_VInitThreadData(&thread_data[i]);
3936 
3937     /* compute start and end loop index for thread */
3938     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
3939 
3940     /* pack thread data */
3941     thread_data[i].v1 = NV_DATA_PT(x);
3942     thread_data[i].v2 = NV_DATA_PT(z);
3943 
3944     /* create threads and call pthread companion function */
3945     pthread_create(&threads[i], &attr, VCopy_PT, (void *) &thread_data[i]);
3946   }
3947 
3948   /* wait for all threads to finish */
3949   for (i=0; i<nthreads; i++) {
3950     pthread_join(threads[i], NULL);
3951   }
3952 
3953   /* clean up and return */
3954   pthread_attr_destroy(&attr);
3955   free(threads);
3956   free(thread_data);
3957 
3958   return;
3959 }
3960 
3961 
3962 /* ----------------------------------------------------------------------------
3963  * Pthread companion function to VCopy
3964  */
3965 
VCopy_PT(void * thread_data)3966 static void *VCopy_PT(void *thread_data)
3967 {
3968   sunindextype i, start, end;
3969   realtype *xd, *zd;
3970   Pthreads_Data *my_data;
3971 
3972   /* extract thread data */
3973   my_data = (Pthreads_Data *) thread_data;
3974 
3975   xd = my_data->v1;
3976   zd = my_data->v2;
3977 
3978   start = my_data->start;
3979   end   = my_data->end;
3980 
3981   /* copy vector components */
3982   for (i = start; i < end; i++)
3983     zd[i] = xd[i];
3984 
3985   /* exit */
3986   pthread_exit(NULL);
3987 }
3988 
3989 
3990 /* ----------------------------------------------------------------------------
3991  * Compute vector sum
3992  */
3993 
VSum_Pthreads(N_Vector x,N_Vector y,N_Vector z)3994 static void VSum_Pthreads(N_Vector x, N_Vector y, N_Vector z)
3995 {
3996   sunindextype      N;
3997   int           i, nthreads;
3998   pthread_t     *threads;
3999   Pthreads_Data *thread_data;
4000   pthread_attr_t attr;
4001 
4002   /* allocate threads and thread data structs */
4003   N            = NV_LENGTH_PT(x);
4004   nthreads     = NV_NUM_THREADS_PT(x);
4005   threads      = malloc(nthreads*sizeof(pthread_t));
4006   thread_data  = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
4007 
4008   /* set thread attributes */
4009   pthread_attr_init(&attr);
4010   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
4011 
4012   for (i=0; i<nthreads; i++) {
4013     /* initialize thread data */
4014     N_VInitThreadData(&thread_data[i]);
4015 
4016     /* compute start and end loop index for thread */
4017     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
4018 
4019     /* pack thread data */
4020     thread_data[i].v1 = NV_DATA_PT(x);
4021     thread_data[i].v2 = NV_DATA_PT(y);
4022     thread_data[i].v3 = NV_DATA_PT(z);
4023 
4024     /* create threads and call pthread companion function */
4025     pthread_create(&threads[i], &attr, VSum_PT, (void *) &thread_data[i]);
4026   }
4027 
4028   /* wait for all threads to finish */
4029   for (i=0; i<nthreads; i++) {
4030     pthread_join(threads[i], NULL);
4031   }
4032 
4033   /* clean up and return */
4034   pthread_attr_destroy(&attr);
4035   free(threads);
4036   free(thread_data);
4037 
4038   return;
4039 }
4040 
4041 
4042 /* ----------------------------------------------------------------------------
4043  * Pthread companion function to VSum
4044  */
4045 
VSum_PT(void * thread_data)4046 static void *VSum_PT(void *thread_data)
4047 {
4048   sunindextype i, start, end;
4049   realtype *xd, *yd, *zd;
4050   Pthreads_Data *my_data;
4051 
4052   /* extract thread data */
4053   my_data = (Pthreads_Data *) thread_data;
4054 
4055   xd = my_data->v1;
4056   yd = my_data->v2;
4057   zd = my_data->v3;
4058 
4059   start = my_data->start;
4060   end   = my_data->end;
4061 
4062   /* compute vector sum */
4063   for (i = start; i < end; i++)
4064     zd[i] = xd[i] + yd[i];
4065 
4066   /* exit */
4067   pthread_exit(NULL);
4068 }
4069 
4070 
4071 /* ----------------------------------------------------------------------------
4072  * Compute vector difference
4073  */
4074 
VDiff_Pthreads(N_Vector x,N_Vector y,N_Vector z)4075 static void VDiff_Pthreads(N_Vector x, N_Vector y, N_Vector z)
4076 {
4077   sunindextype  N;
4078   int           i, nthreads;
4079   pthread_t     *threads;
4080   Pthreads_Data *thread_data;
4081   pthread_attr_t attr;
4082 
4083   /* allocate threads and thread data structs */
4084   N            = NV_LENGTH_PT(x);
4085   nthreads     = NV_NUM_THREADS_PT(x);
4086   threads      = malloc(nthreads*sizeof(pthread_t));
4087   thread_data  = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
4088 
4089   /* set thread attributes */
4090   pthread_attr_init(&attr);
4091   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
4092 
4093   for (i=0; i<nthreads; i++) {
4094     /* initialize thread data */
4095     N_VInitThreadData(&thread_data[i]);
4096 
4097     /* compute start and end loop index for thread */
4098     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
4099 
4100     /* pack thread data */
4101     thread_data[i].v1 = NV_DATA_PT(x);
4102     thread_data[i].v2 = NV_DATA_PT(y);
4103     thread_data[i].v3 = NV_DATA_PT(z);
4104 
4105     /* create threads and call pthread companion function */
4106     pthread_create(&threads[i], &attr, VDiff_PT, (void *) &thread_data[i]);
4107   }
4108 
4109   /* wait for all threads to finish */
4110   for (i=0; i<nthreads; i++) {
4111     pthread_join(threads[i], NULL);
4112   }
4113 
4114   /* clean up and return */
4115   pthread_attr_destroy(&attr);
4116   free(threads);
4117   free(thread_data);
4118 
4119   return;
4120 }
4121 
4122 
4123 /* ----------------------------------------------------------------------------
4124  * Pthread companion function to VDiff
4125  */
4126 
VDiff_PT(void * thread_data)4127 static void *VDiff_PT(void *thread_data)
4128 {
4129   sunindextype i, start, end;
4130   realtype *xd, *yd, *zd;
4131   Pthreads_Data *my_data;
4132 
4133   /* extract thread data */
4134   my_data = (Pthreads_Data *) thread_data;
4135 
4136   xd = my_data->v1;
4137   yd = my_data->v2;
4138   zd = my_data->v3;
4139 
4140   start = my_data->start;
4141   end   = my_data->end;
4142 
4143   /* compute vector difference */
4144   for (i = start; i < end; i++)
4145     zd[i] = xd[i] - yd[i];
4146 
4147   /* exit */
4148   pthread_exit(NULL);
4149 }
4150 
4151 
4152 /* ----------------------------------------------------------------------------
4153  * Compute the negative of a vector
4154  */
4155 
VNeg_Pthreads(N_Vector x,N_Vector z)4156 static void VNeg_Pthreads(N_Vector x, N_Vector z)
4157 {
4158   sunindextype  N;
4159   int           i, nthreads;
4160   pthread_t     *threads;
4161   Pthreads_Data *thread_data;
4162   pthread_attr_t attr;
4163 
4164   /* allocate threads and thread data structs */
4165   N            = NV_LENGTH_PT(x);
4166   nthreads     = NV_NUM_THREADS_PT(x);
4167   threads      = malloc(nthreads*sizeof(pthread_t));
4168   thread_data  = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
4169 
4170   /* set thread attributes */
4171   pthread_attr_init(&attr);
4172   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
4173 
4174   for (i=0; i<nthreads; i++) {
4175     /* initialize thread data */
4176     N_VInitThreadData(&thread_data[i]);
4177 
4178     /* compute start and end loop index for thread */
4179     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
4180 
4181     /* pack thread data */
4182     thread_data[i].v1 = NV_DATA_PT(x);
4183     thread_data[i].v2 = NV_DATA_PT(z);
4184 
4185     /* create threads and call pthread companion function */
4186     pthread_create(&threads[i], &attr, VNeg_PT, (void *) &thread_data[i]);
4187   }
4188 
4189   /* wait for all threads to finish */
4190   for (i=0; i<nthreads; i++) {
4191     pthread_join(threads[i], NULL);
4192   }
4193 
4194   /* clean up and return */
4195   pthread_attr_destroy(&attr);
4196   free(threads);
4197   free(thread_data);
4198 
4199   return;
4200 }
4201 
4202 
4203 /* ----------------------------------------------------------------------------
4204  * Pthread companion function to VNeg
4205  */
4206 
VNeg_PT(void * thread_data)4207 static void *VNeg_PT(void *thread_data)
4208 {
4209   sunindextype i, start, end;
4210   realtype *xd, *zd;
4211   Pthreads_Data *my_data;
4212 
4213   /* extract thread data */
4214   my_data = (Pthreads_Data *) thread_data;
4215 
4216   xd = my_data->v1;
4217   zd = my_data->v2;
4218 
4219   start = my_data->start;
4220   end   = my_data->end;
4221 
4222   /* compute negative of vector */
4223   for (i = start; i < end; i++)
4224     zd[i] = -xd[i];
4225 
4226   /* exit */
4227   pthread_exit(NULL);
4228 }
4229 
4230 
4231 /* ----------------------------------------------------------------------------
4232  * Compute scaled vector sum
4233  */
4234 
VScaleSum_Pthreads(realtype c,N_Vector x,N_Vector y,N_Vector z)4235 static void VScaleSum_Pthreads(realtype c, N_Vector x, N_Vector y, N_Vector z)
4236 {
4237   sunindextype  N;
4238   int           i, nthreads;
4239   pthread_t     *threads;
4240   Pthreads_Data *thread_data;
4241   pthread_attr_t attr;
4242 
4243   /* allocate threads and thread data structs */
4244   N            = NV_LENGTH_PT(x);
4245   nthreads     = NV_NUM_THREADS_PT(x);
4246   threads      = malloc(nthreads*sizeof(pthread_t));
4247   thread_data  = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
4248 
4249   /* set thread attributes */
4250   pthread_attr_init(&attr);
4251   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
4252 
4253   for (i=0; i<nthreads; i++) {
4254     /* initialize thread data */
4255     N_VInitThreadData(&thread_data[i]);
4256 
4257     /* compute start and end loop index for thread */
4258     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
4259 
4260     /* pack thread data */
4261     thread_data[i].c1 = c;
4262     thread_data[i].v1 = NV_DATA_PT(x);
4263     thread_data[i].v2 = NV_DATA_PT(y);
4264     thread_data[i].v3 = NV_DATA_PT(z);
4265 
4266     /* create threads and call pthread companion function */
4267     pthread_create(&threads[i], &attr, VScaleSum_PT, (void *) &thread_data[i]);
4268   }
4269 
4270   /* wait for all threads to finish */
4271   for (i=0; i<nthreads; i++) {
4272     pthread_join(threads[i], NULL);
4273   }
4274 
4275   /* clean up and return */
4276   pthread_attr_destroy(&attr);
4277   free(threads);
4278   free(thread_data);
4279 
4280   return;
4281 }
4282 
4283 
4284 /* ----------------------------------------------------------------------------
4285  * Pthread companion function to VScaleSum
4286  */
4287 
VScaleSum_PT(void * thread_data)4288 static void *VScaleSum_PT(void *thread_data)
4289 {
4290   sunindextype i, start, end;
4291   realtype c;
4292   realtype *xd, *yd, *zd;
4293   Pthreads_Data *my_data;
4294 
4295   /* extract thread data */
4296   my_data = (Pthreads_Data *) thread_data;
4297 
4298   c  = my_data->c1;
4299   xd = my_data->v1;
4300   yd = my_data->v2;
4301   zd = my_data->v3;
4302 
4303   start = my_data->start;
4304   end   = my_data->end;
4305 
4306   /* compute scaled vector sum */
4307   for (i = start; i < end; i++)
4308     zd[i] = c*(xd[i] + yd[i]);
4309 
4310   /* exit */
4311   pthread_exit(NULL);
4312 }
4313 
4314 
4315 /* ----------------------------------------------------------------------------
4316  * Compute scaled vector difference
4317  */
4318 
VScaleDiff_Pthreads(realtype c,N_Vector x,N_Vector y,N_Vector z)4319 static void VScaleDiff_Pthreads(realtype c, N_Vector x, N_Vector y, N_Vector z)
4320 {
4321   sunindextype  N;
4322   int           i, nthreads;
4323   pthread_t     *threads;
4324   Pthreads_Data *thread_data;
4325   pthread_attr_t attr;
4326 
4327   /* allocate threads and thread data structs */
4328   N            = NV_LENGTH_PT(x);
4329   nthreads     = NV_NUM_THREADS_PT(x);
4330   threads      = malloc(nthreads*sizeof(pthread_t));
4331   thread_data  = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
4332 
4333   /* set thread attributes */
4334   pthread_attr_init(&attr);
4335   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
4336 
4337   for (i=0; i<nthreads; i++) {
4338     /* initialize thread data */
4339     N_VInitThreadData(&thread_data[i]);
4340 
4341     /* compute start and end loop index for thread */
4342     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
4343 
4344     /* pack thread data */
4345     thread_data[i].c1 = c;
4346     thread_data[i].v1 = NV_DATA_PT(x);
4347     thread_data[i].v2 = NV_DATA_PT(y);
4348     thread_data[i].v3 = NV_DATA_PT(z);
4349 
4350     /* create threads and call pthread companion function */
4351     pthread_create(&threads[i], &attr, VScaleDiff_PT, (void *) &thread_data[i]);
4352   }
4353 
4354   /* wait for all threads to finish */
4355   for (i=0; i<nthreads; i++) {
4356     pthread_join(threads[i], NULL);
4357   }
4358 
4359   /* clean up and return */
4360   pthread_attr_destroy(&attr);
4361   free(threads);
4362   free(thread_data);
4363 
4364   return;
4365 }
4366 
4367 
4368 /* ----------------------------------------------------------------------------
4369  * Pthread companion function to VScaleDiff
4370  */
4371 
VScaleDiff_PT(void * thread_data)4372 static void *VScaleDiff_PT(void *thread_data)
4373 {
4374   sunindextype i, start, end;
4375   realtype c;
4376   realtype *xd, *yd, *zd;
4377   Pthreads_Data *my_data;
4378 
4379   /* extract thread data */
4380   my_data = (Pthreads_Data *) thread_data;
4381 
4382   c  = my_data->c1;
4383   xd = my_data->v1;
4384   yd = my_data->v2;
4385   zd = my_data->v3;
4386 
4387   start = my_data->start;
4388   end   = my_data->end;
4389 
4390   /* compute scaled vector difference */
4391   for (i = start; i < end; i++)
4392     zd[i] = c*(xd[i] - yd[i]);
4393 
4394   /* exit */
4395   pthread_exit(NULL);
4396 }
4397 
4398 
4399 /* ----------------------------------------------------------------------------
4400  * Compute vector sum z[i] = a*x[i]+y[i]
4401  */
4402 
VLin1_Pthreads(realtype a,N_Vector x,N_Vector y,N_Vector z)4403 static void VLin1_Pthreads(realtype a, N_Vector x, N_Vector y, N_Vector z)
4404 {
4405   sunindextype  N;
4406   int           i, nthreads;
4407   pthread_t     *threads;
4408   Pthreads_Data *thread_data;
4409   pthread_attr_t attr;
4410 
4411   /* allocate threads and thread data structs */
4412   N            = NV_LENGTH_PT(x);
4413   nthreads     = NV_NUM_THREADS_PT(x);
4414   threads      = malloc(nthreads*sizeof(pthread_t));
4415   thread_data  = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
4416 
4417   /* set thread attributes */
4418   pthread_attr_init(&attr);
4419   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
4420 
4421   for (i=0; i<nthreads; i++) {
4422     /* initialize thread data */
4423     N_VInitThreadData(&thread_data[i]);
4424 
4425     /* compute start and end loop index for thread */
4426     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
4427 
4428     /* pack thread data */
4429     thread_data[i].c1 = a;
4430     thread_data[i].v1 = NV_DATA_PT(x);
4431     thread_data[i].v2 = NV_DATA_PT(y);
4432     thread_data[i].v3 = NV_DATA_PT(z);
4433 
4434     /* create threads and call pthread companion function */
4435     pthread_create(&threads[i], &attr, VLin1_PT, (void *) &thread_data[i]);
4436   }
4437 
4438   /* wait for all threads to finish */
4439   for (i=0; i<nthreads; i++) {
4440     pthread_join(threads[i], NULL);
4441   }
4442 
4443   /* clean up and return */
4444   pthread_attr_destroy(&attr);
4445   free(threads);
4446   free(thread_data);
4447 
4448   return;
4449 }
4450 
4451 
4452 /* ----------------------------------------------------------------------------
4453  * Pthread companion function to VLin1
4454  */
4455 
VLin1_PT(void * thread_data)4456 static void *VLin1_PT(void *thread_data)
4457 {
4458   sunindextype i, start, end;
4459   realtype a;
4460   realtype *xd, *yd, *zd;
4461   Pthreads_Data *my_data;
4462 
4463   /* extract thread data */
4464   my_data = (Pthreads_Data *) thread_data;
4465 
4466   a  = my_data->c1;
4467   xd = my_data->v1;
4468   yd = my_data->v2;
4469   zd = my_data->v3;
4470 
4471   start = my_data->start;
4472   end   = my_data->end;
4473 
4474   /* compute vector sum */
4475   for (i = start; i < end; i++)
4476     zd[i] = (a*xd[i]) + yd[i];
4477 
4478   /* exit */
4479   pthread_exit(NULL);
4480 }
4481 
4482 
4483 /* ----------------------------------------------------------------------------
4484  * Compute vector difference z[i] = a*x[i]-y[i]
4485  */
4486 
VLin2_Pthreads(realtype a,N_Vector x,N_Vector y,N_Vector z)4487 static void VLin2_Pthreads(realtype a, N_Vector x, N_Vector y, N_Vector z)
4488 {
4489   sunindextype  N;
4490   int           i, nthreads;
4491   pthread_t     *threads;
4492   Pthreads_Data *thread_data;
4493   pthread_attr_t attr;
4494 
4495   /* allocate threads and thread data structs */
4496   N            = NV_LENGTH_PT(x);
4497   nthreads     = NV_NUM_THREADS_PT(x);
4498   threads      = malloc(nthreads*sizeof(pthread_t));
4499   thread_data  = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
4500 
4501   /* set thread attributes */
4502   pthread_attr_init(&attr);
4503   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
4504 
4505   for (i=0; i<nthreads; i++) {
4506     /* initialize thread data */
4507     N_VInitThreadData(&thread_data[i]);
4508 
4509     /* compute start and end loop index for thread */
4510     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
4511 
4512     /* pack thread data */
4513     thread_data[i].c1 = a;
4514     thread_data[i].v1 = NV_DATA_PT(x);
4515     thread_data[i].v2 = NV_DATA_PT(y);
4516     thread_data[i].v3 = NV_DATA_PT(z);
4517 
4518     /* create threads and call pthread companion function */
4519     pthread_create(&threads[i], &attr, VLin2_PT, (void *) &thread_data[i]);
4520   }
4521 
4522   /* wait for all threads to finish */
4523   for (i=0; i<nthreads; i++) {
4524     pthread_join(threads[i], NULL);
4525   }
4526 
4527   /* clean up and return */
4528   pthread_attr_destroy(&attr);
4529   free(threads);
4530   free(thread_data);
4531 
4532   return;
4533 }
4534 
4535 /* ----------------------------------------------------------------------------
4536  * Pthread companion function to VLin2
4537  */
4538 
VLin2_PT(void * thread_data)4539 static void *VLin2_PT(void *thread_data)
4540 {
4541   sunindextype i, start, end;
4542   realtype a;
4543   realtype *xd, *yd, *zd;
4544   Pthreads_Data *my_data;
4545 
4546   /* extract thread data */
4547   my_data = (Pthreads_Data *) thread_data;
4548 
4549   a  = my_data->c1;
4550   xd = my_data->v1;
4551   yd = my_data->v2;
4552   zd = my_data->v3;
4553 
4554   start = my_data->start;
4555   end   = my_data->end;
4556 
4557   /* compute vector difference */
4558   for (i = start; i < end; i++)
4559     zd[i] = (a*xd[i]) - yd[i];
4560 
4561   /* exit */
4562   pthread_exit(NULL);
4563 }
4564 
4565 
4566 /* ----------------------------------------------------------------------------
4567  * Compute special cases of linear sum
4568  */
4569 
Vaxpy_Pthreads(realtype a,N_Vector x,N_Vector y)4570 static void Vaxpy_Pthreads(realtype a, N_Vector x, N_Vector y)
4571 {
4572   sunindextype  N;
4573   int           i, nthreads;
4574   pthread_t     *threads;
4575   Pthreads_Data *thread_data;
4576   pthread_attr_t attr;
4577 
4578   /* allocate threads and thread data structs */
4579   N            = NV_LENGTH_PT(x);
4580   nthreads     = NV_NUM_THREADS_PT(x);
4581   threads      = malloc(nthreads*sizeof(pthread_t));
4582   thread_data  = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
4583 
4584   /* set thread attributes */
4585   pthread_attr_init(&attr);
4586   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
4587 
4588   for (i=0; i<nthreads; i++) {
4589     /* initialize thread data */
4590     N_VInitThreadData(&thread_data[i]);
4591 
4592     /* compute start and end loop index for thread */
4593     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
4594 
4595     /* pack thread data */
4596     thread_data[i].c1 = a;
4597     thread_data[i].v1 = NV_DATA_PT(x);
4598     thread_data[i].v2 = NV_DATA_PT(y);
4599 
4600     /* create threads and call pthread companion function */
4601     pthread_create(&threads[i], &attr, Vaxpy_PT, (void *) &thread_data[i]);
4602   }
4603 
4604   /* wait for all threads to finish */
4605   for (i=0; i<nthreads; i++) {
4606     pthread_join(threads[i], NULL);
4607   }
4608 
4609   /* clean up and return */
4610   pthread_attr_destroy(&attr);
4611   free(threads);
4612   free(thread_data);
4613 
4614   return;
4615 }
4616 
4617 /* ----------------------------------------------------------------------------
4618  * Pthread companion function to Vaxpy
4619  */
4620 
Vaxpy_PT(void * thread_data)4621 static void *Vaxpy_PT(void *thread_data)
4622 {
4623   sunindextype i, start, end;
4624   realtype a;
4625   realtype *xd, *yd;
4626   Pthreads_Data *my_data;
4627 
4628   /* extract thread data */
4629   my_data = (Pthreads_Data *) thread_data;
4630 
4631   a  = my_data->c1;
4632   xd = my_data->v1;
4633   yd = my_data->v2;
4634 
4635   start = my_data->start;
4636   end   = my_data->end;
4637 
4638   /* compute axpy */
4639   if (a == ONE) {
4640     for (i = start; i < end; i++)
4641       yd[i] += xd[i];
4642 
4643     /* exit */
4644     pthread_exit(NULL);
4645   }
4646 
4647   if (a == -ONE) {
4648     for (i = start; i < end; i++)
4649       yd[i] -= xd[i];
4650 
4651     /* exit */
4652     pthread_exit(NULL);
4653   }
4654 
4655   for (i = start; i < end; i++)
4656     yd[i] += a*xd[i];
4657 
4658   /* return */
4659   pthread_exit(NULL);
4660 }
4661 
4662 
4663 /* ----------------------------------------------------------------------------
4664  * Compute scaled vector
4665  */
4666 
VScaleBy_Pthreads(realtype a,N_Vector x)4667 static void VScaleBy_Pthreads(realtype a, N_Vector x)
4668 {
4669   sunindextype  N;
4670   int           i, nthreads;
4671   pthread_t     *threads;
4672   Pthreads_Data *thread_data;
4673   pthread_attr_t attr;
4674 
4675   /* allocate threads and thread data structs */
4676   N            = NV_LENGTH_PT(x);
4677   nthreads     = NV_NUM_THREADS_PT(x);
4678   threads      = malloc(nthreads*sizeof(pthread_t));
4679   thread_data  = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
4680 
4681   /* set thread attributes */
4682   pthread_attr_init(&attr);
4683   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
4684 
4685   for (i=0; i<nthreads; i++) {
4686     /* initialize thread data */
4687     N_VInitThreadData(&thread_data[i]);
4688 
4689     /* compute start and end loop index for thread */
4690     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
4691 
4692     /* pack thread data */
4693     thread_data[i].c1 = a;
4694     thread_data[i].v1 = NV_DATA_PT(x);
4695 
4696     /* create threads and call pthread companion function */
4697     pthread_create(&threads[i], &attr, VScaleBy_PT, (void *) &thread_data[i]);
4698   }
4699 
4700   /* wait for all threads to finish */
4701   for (i=0; i<nthreads; i++) {
4702     pthread_join(threads[i], NULL);
4703   }
4704 
4705   /* clean up and return */
4706   pthread_attr_destroy(&attr);
4707   free(threads);
4708   free(thread_data);
4709 
4710   return;
4711 }
4712 
4713 
4714 /* ----------------------------------------------------------------------------
4715  * Pthread companion function to VScaleBy
4716  */
4717 
VScaleBy_PT(void * thread_data)4718 static void *VScaleBy_PT(void *thread_data)
4719 {
4720   sunindextype i, start, end;
4721   realtype a;
4722   realtype *xd;
4723   Pthreads_Data *my_data;
4724 
4725   /* extract thread data */
4726   my_data = (Pthreads_Data *) thread_data;
4727 
4728   a  = my_data->c1;
4729   xd = my_data->v1;
4730 
4731   start = my_data->start;
4732   end   = my_data->end;
4733 
4734   /* compute scaled vector */
4735   for (i = start; i < end; i++)
4736     xd[i] *= a;
4737 
4738   /* exit */
4739   pthread_exit(NULL);
4740 }
4741 
4742 
4743 /*
4744  * -----------------------------------------------------------------------------
4745  * private functions for special cases of vector array operations
4746  * -----------------------------------------------------------------------------
4747  */
4748 
VSumVectorArray_Pthreads(int nvec,N_Vector * X,N_Vector * Y,N_Vector * Z)4749 static int VSumVectorArray_Pthreads(int nvec, N_Vector* X, N_Vector* Y, N_Vector* Z)
4750 {
4751   sunindextype   N;
4752   int            i, nthreads;
4753   pthread_t      *threads;
4754   Pthreads_Data  *thread_data;
4755   pthread_attr_t attr;
4756 
4757   /* allocate threads and thread data structs */
4758   N           = NV_LENGTH_PT(X[0]);
4759   nthreads    = NV_NUM_THREADS_PT(X[0]);
4760   threads     = malloc(nthreads*sizeof(pthread_t));
4761   thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
4762 
4763   /* set thread attributes */
4764   pthread_attr_init(&attr);
4765   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
4766 
4767   /* pack thread data, distribute loop indices, and create threads/call kernel */
4768   for (i=0; i<nthreads; i++) {
4769     N_VInitThreadData(&thread_data[i]);
4770 
4771     thread_data[i].nvec = nvec;
4772     thread_data[i].Y1   = X;
4773     thread_data[i].Y2   = Y;
4774     thread_data[i].Y3   = Z;
4775 
4776     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
4777 
4778     pthread_create(&threads[i], &attr, VSumVectorArray_PT, (void *) &thread_data[i]);
4779   }
4780 
4781   /* wait for all threads to finish */
4782   for (i=0; i<nthreads; i++)
4783     pthread_join(threads[i], NULL);
4784 
4785   /* clean up and return */
4786   pthread_attr_destroy(&attr);
4787   free(threads);
4788   free(thread_data);
4789 
4790   return(0);
4791 }
4792 
VSumVectorArray_PT(void * thread_data)4793 static void *VSumVectorArray_PT(void *thread_data)
4794 {
4795   Pthreads_Data *my_data;
4796   sunindextype  j, start, end;
4797 
4798   int       i;
4799   realtype* xd=NULL;
4800   realtype* yd=NULL;
4801   realtype* zd=NULL;
4802 
4803   my_data = (Pthreads_Data *) thread_data;
4804   start   = my_data->start;
4805   end     = my_data->end;
4806 
4807   for (i=0; i<my_data->nvec; i++) {
4808     xd = NV_DATA_PT(my_data->Y1[i]);
4809     yd = NV_DATA_PT(my_data->Y2[i]);
4810     zd = NV_DATA_PT(my_data->Y3[i]);
4811     for (j=start; j<end; j++)
4812       zd[j] = xd[j] + yd[j];
4813   }
4814 
4815   pthread_exit(NULL);
4816 }
4817 
4818 
VDiffVectorArray_Pthreads(int nvec,N_Vector * X,N_Vector * Y,N_Vector * Z)4819 static int VDiffVectorArray_Pthreads(int nvec, N_Vector* X, N_Vector* Y, N_Vector* Z)
4820 {
4821   sunindextype   N;
4822   int            i, nthreads;
4823   pthread_t      *threads;
4824   Pthreads_Data  *thread_data;
4825   pthread_attr_t attr;
4826 
4827   /* allocate threads and thread data structs */
4828   N           = NV_LENGTH_PT(X[0]);
4829   nthreads    = NV_NUM_THREADS_PT(X[0]);
4830   threads     = malloc(nthreads*sizeof(pthread_t));
4831   thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
4832 
4833   /* set thread attributes */
4834   pthread_attr_init(&attr);
4835   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
4836 
4837   /* pack thread data, distribute loop indices, and create threads/call kernel */
4838   for (i=0; i<nthreads; i++) {
4839     N_VInitThreadData(&thread_data[i]);
4840 
4841     thread_data[i].nvec = nvec;
4842     thread_data[i].Y1   = X;
4843     thread_data[i].Y2   = Y;
4844     thread_data[i].Y3   = Z;
4845 
4846     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
4847 
4848     pthread_create(&threads[i], &attr, VDiffVectorArray_PT, (void *) &thread_data[i]);
4849   }
4850 
4851   /* wait for all threads to finish */
4852   for (i=0; i<nthreads; i++)
4853     pthread_join(threads[i], NULL);
4854 
4855   /* clean up and return */
4856   pthread_attr_destroy(&attr);
4857   free(threads);
4858   free(thread_data);
4859 
4860   return(0);
4861 }
4862 
VDiffVectorArray_PT(void * thread_data)4863 static void *VDiffVectorArray_PT(void *thread_data)
4864 {
4865   Pthreads_Data *my_data;
4866   sunindextype  j, start, end;
4867 
4868   int       i;
4869   realtype* xd=NULL;
4870   realtype* yd=NULL;
4871   realtype* zd=NULL;
4872 
4873   my_data = (Pthreads_Data *) thread_data;
4874   start   = my_data->start;
4875   end     = my_data->end;
4876 
4877   for (i=0; i<my_data->nvec; i++) {
4878     xd = NV_DATA_PT(my_data->Y1[i]);
4879     yd = NV_DATA_PT(my_data->Y2[i]);
4880     zd = NV_DATA_PT(my_data->Y3[i]);
4881     for (j=start; j<end; j++)
4882       zd[j] = xd[j] - yd[j];
4883   }
4884 
4885   pthread_exit(NULL);
4886 }
4887 
4888 
VScaleSumVectorArray_Pthreads(int nvec,realtype c,N_Vector * X,N_Vector * Y,N_Vector * Z)4889 static int VScaleSumVectorArray_Pthreads(int nvec, realtype c, N_Vector* X, N_Vector* Y, N_Vector* Z)
4890 {
4891   sunindextype   N;
4892   int            i, nthreads;
4893   pthread_t      *threads;
4894   Pthreads_Data  *thread_data;
4895   pthread_attr_t attr;
4896 
4897   /* allocate threads and thread data structs */
4898   N            = NV_LENGTH_PT(X[0]);
4899   nthreads     = NV_NUM_THREADS_PT(X[0]);
4900   threads      = malloc(nthreads*sizeof(pthread_t));
4901   thread_data  = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
4902 
4903   /* set thread attributes */
4904   pthread_attr_init(&attr);
4905   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
4906 
4907   /* pack thread data, distribute loop indices, and create threads/call kernel */
4908   for (i=0; i<nthreads; i++) {
4909     N_VInitThreadData(&thread_data[i]);
4910 
4911     thread_data[i].nvec = nvec;
4912     thread_data[i].c1   = c;
4913     thread_data[i].Y1   = X;
4914     thread_data[i].Y2   = Y;
4915     thread_data[i].Y3   = Z;
4916 
4917     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
4918 
4919     pthread_create(&threads[i], &attr, VScaleSumVectorArray_PT, (void *) &thread_data[i]);
4920   }
4921 
4922   /* wait for all threads to finish */
4923   for (i=0; i<nthreads; i++)
4924     pthread_join(threads[i], NULL);
4925 
4926   /* clean up and return */
4927   pthread_attr_destroy(&attr);
4928   free(threads);
4929   free(thread_data);
4930 
4931   return(0);
4932 }
4933 
VScaleSumVectorArray_PT(void * thread_data)4934 static void *VScaleSumVectorArray_PT(void *thread_data)
4935 {
4936   Pthreads_Data *my_data;
4937   sunindextype  j, start, end;
4938 
4939   int       i;
4940   realtype  c;
4941   realtype* xd=NULL;
4942   realtype* yd=NULL;
4943   realtype* zd=NULL;
4944 
4945   my_data = (Pthreads_Data *) thread_data;
4946   start   = my_data->start;
4947   end     = my_data->end;
4948   c       = my_data->c1;
4949 
4950   for (i=0; i<my_data->nvec; i++) {
4951     xd = NV_DATA_PT(my_data->Y1[i]);
4952     yd = NV_DATA_PT(my_data->Y2[i]);
4953     zd = NV_DATA_PT(my_data->Y3[i]);
4954     for (j=start; j<end; j++)
4955       zd[j] = c * (xd[j] + yd[j]);
4956   }
4957 
4958   pthread_exit(NULL);
4959 }
4960 
4961 
VScaleDiffVectorArray_Pthreads(int nvec,realtype c,N_Vector * X,N_Vector * Y,N_Vector * Z)4962 static int VScaleDiffVectorArray_Pthreads(int nvec, realtype c, N_Vector* X, N_Vector* Y, N_Vector* Z)
4963 {
4964   sunindextype   N;
4965   int            i, nthreads;
4966   pthread_t      *threads;
4967   Pthreads_Data  *thread_data;
4968   pthread_attr_t attr;
4969 
4970   /* allocate threads and thread data structs */
4971   N           = NV_LENGTH_PT(X[0]);
4972   nthreads    = NV_NUM_THREADS_PT(X[0]);
4973   threads     = malloc(nthreads*sizeof(pthread_t));
4974   thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
4975 
4976   /* set thread attributes */
4977   pthread_attr_init(&attr);
4978   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
4979 
4980   /* pack thread data, distribute loop indices, and create threads/call kernel */
4981   for (i=0; i<nthreads; i++) {
4982     N_VInitThreadData(&thread_data[i]);
4983 
4984     thread_data[i].nvec = nvec;
4985     thread_data[i].c1   = c;
4986     thread_data[i].Y1   = X;
4987     thread_data[i].Y2   = Y;
4988     thread_data[i].Y3   = Z;
4989 
4990     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
4991 
4992     pthread_create(&threads[i], &attr, VScaleDiffVectorArray_PT, (void *) &thread_data[i]);
4993   }
4994 
4995   /* wait for all threads to finish */
4996   for (i=0; i<nthreads; i++)
4997     pthread_join(threads[i], NULL);
4998 
4999   /* clean up and return */
5000   pthread_attr_destroy(&attr);
5001   free(threads);
5002   free(thread_data);
5003 
5004   return(0);
5005 }
5006 
VScaleDiffVectorArray_PT(void * thread_data)5007 static void *VScaleDiffVectorArray_PT(void *thread_data)
5008 {
5009   Pthreads_Data *my_data;
5010   sunindextype  j, start, end;
5011 
5012   int       i;
5013   realtype  c;
5014   realtype* xd=NULL;
5015   realtype* yd=NULL;
5016   realtype* zd=NULL;
5017 
5018   my_data = (Pthreads_Data *) thread_data;
5019   start   = my_data->start;
5020   end     = my_data->end;
5021   c       = my_data->c1;
5022 
5023   for (i=0; i<my_data->nvec; i++) {
5024     xd = NV_DATA_PT(my_data->Y1[i]);
5025     yd = NV_DATA_PT(my_data->Y2[i]);
5026     zd = NV_DATA_PT(my_data->Y3[i]);
5027     for (j=start; j<end; j++)
5028       zd[j] = c * (xd[j] - yd[j]);
5029   }
5030 
5031   pthread_exit(NULL);
5032 }
5033 
5034 
VLin1VectorArray_Pthreads(int nvec,realtype a,N_Vector * X,N_Vector * Y,N_Vector * Z)5035 static int VLin1VectorArray_Pthreads(int nvec, realtype a, N_Vector* X, N_Vector* Y, N_Vector* Z)
5036 {
5037   sunindextype   N;
5038   int            i, nthreads;
5039   pthread_t      *threads;
5040   Pthreads_Data  *thread_data;
5041   pthread_attr_t attr;
5042 
5043   /* allocate threads and thread data structs */
5044   N           = NV_LENGTH_PT(X[0]);
5045   nthreads    = NV_NUM_THREADS_PT(X[0]);
5046   threads     = malloc(nthreads*sizeof(pthread_t));
5047   thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
5048 
5049   /* set thread attributes */
5050   pthread_attr_init(&attr);
5051   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
5052 
5053   /* pack thread data, distribute loop indices, and create threads/call kernel */
5054   for (i=0; i<nthreads; i++) {
5055     N_VInitThreadData(&thread_data[i]);
5056 
5057     thread_data[i].nvec = nvec;
5058     thread_data[i].c1   = a;
5059     thread_data[i].Y1   = X;
5060     thread_data[i].Y2   = Y;
5061     thread_data[i].Y3   = Z;
5062 
5063     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
5064 
5065     pthread_create(&threads[i], &attr, VLin1VectorArray_PT, (void *) &thread_data[i]);
5066   }
5067 
5068   /* wait for all threads to finish */
5069   for (i=0; i<nthreads; i++)
5070     pthread_join(threads[i], NULL);
5071 
5072   /* clean up and return */
5073   pthread_attr_destroy(&attr);
5074   free(threads);
5075   free(thread_data);
5076 
5077   return(0);
5078 }
5079 
VLin1VectorArray_PT(void * thread_data)5080 static void *VLin1VectorArray_PT(void *thread_data)
5081 {
5082   Pthreads_Data *my_data;
5083   sunindextype  j, start, end;
5084 
5085   int       i;
5086   realtype  a;
5087   realtype* xd=NULL;
5088   realtype* yd=NULL;
5089   realtype* zd=NULL;
5090 
5091   my_data = (Pthreads_Data *) thread_data;
5092   start   = my_data->start;
5093   end     = my_data->end;
5094   a       = my_data->c1;
5095 
5096   for (i=0; i<my_data->nvec; i++) {
5097     xd = NV_DATA_PT(my_data->Y1[i]);
5098     yd = NV_DATA_PT(my_data->Y2[i]);
5099     zd = NV_DATA_PT(my_data->Y3[i]);
5100     for (j=start; j<end; j++)
5101       zd[j] = (a * xd[j]) + yd[j];
5102   }
5103 
5104   pthread_exit(NULL);
5105 }
5106 
5107 
VLin2VectorArray_Pthreads(int nvec,realtype a,N_Vector * X,N_Vector * Y,N_Vector * Z)5108 static int VLin2VectorArray_Pthreads(int nvec, realtype a, N_Vector* X, N_Vector* Y, N_Vector* Z)
5109 {
5110   sunindextype   N;
5111   int            i, nthreads;
5112   pthread_t      *threads;
5113   Pthreads_Data  *thread_data;
5114   pthread_attr_t attr;
5115 
5116   /* allocate threads and thread data structs */
5117   N           = NV_LENGTH_PT(X[0]);
5118   nthreads    = NV_NUM_THREADS_PT(X[0]);
5119   threads     = malloc(nthreads*sizeof(pthread_t));
5120   thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
5121 
5122   /* set thread attributes */
5123   pthread_attr_init(&attr);
5124   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
5125 
5126   /* pack thread data, distribute loop indices, and create threads/call kernel */
5127   for (i=0; i<nthreads; i++) {
5128     N_VInitThreadData(&thread_data[i]);
5129 
5130     thread_data[i].nvec = nvec;
5131     thread_data[i].c1   = a;
5132     thread_data[i].Y1   = X;
5133     thread_data[i].Y2   = Y;
5134     thread_data[i].Y3   = Z;
5135 
5136     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
5137 
5138     pthread_create(&threads[i], &attr, VLin2VectorArray_PT, (void *) &thread_data[i]);
5139   }
5140 
5141   /* wait for all threads to finish */
5142   for (i=0; i<nthreads; i++)
5143     pthread_join(threads[i], NULL);
5144 
5145   /* clean up and return */
5146   pthread_attr_destroy(&attr);
5147   free(threads);
5148   free(thread_data);
5149 
5150   return(0);
5151 }
5152 
VLin2VectorArray_PT(void * thread_data)5153 static void *VLin2VectorArray_PT(void *thread_data)
5154 {
5155   Pthreads_Data *my_data;
5156   sunindextype  j, start, end;
5157 
5158   int       i;
5159   realtype  a;
5160   realtype* xd=NULL;
5161   realtype* yd=NULL;
5162   realtype* zd=NULL;
5163 
5164   my_data = (Pthreads_Data *) thread_data;
5165   start   = my_data->start;
5166   end     = my_data->end;
5167   a       = my_data->c1;
5168 
5169   for (i=0; i<my_data->nvec; i++) {
5170     xd = NV_DATA_PT(my_data->Y1[i]);
5171     yd = NV_DATA_PT(my_data->Y2[i]);
5172     zd = NV_DATA_PT(my_data->Y3[i]);
5173     for (j=start; j<end; j++)
5174       zd[j] = (a * xd[j]) - yd[j];
5175   }
5176 
5177   pthread_exit(NULL);
5178 }
5179 
5180 
VaxpyVectorArray_Pthreads(int nvec,realtype a,N_Vector * X,N_Vector * Y)5181 static int VaxpyVectorArray_Pthreads(int nvec, realtype a, N_Vector* X, N_Vector* Y)
5182 {
5183   sunindextype   N;
5184   int            i, nthreads;
5185   pthread_t      *threads;
5186   Pthreads_Data  *thread_data;
5187   pthread_attr_t attr;
5188 
5189   /* allocate threads and thread data structs */
5190   N           = NV_LENGTH_PT(X[0]);
5191   nthreads    = NV_NUM_THREADS_PT(X[0]);
5192   threads     = malloc(nthreads*sizeof(pthread_t));
5193   thread_data = (Pthreads_Data *) malloc(nthreads*sizeof(struct _Pthreads_Data));
5194 
5195   /* set thread attributes */
5196   pthread_attr_init(&attr);
5197   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
5198 
5199   /* pack thread data, distribute loop indices, and create threads/call kernel */
5200   for (i=0; i<nthreads; i++) {
5201     N_VInitThreadData(&thread_data[i]);
5202 
5203     thread_data[i].nvec = nvec;
5204     thread_data[i].c1   = a;
5205     thread_data[i].Y1   = X;
5206     thread_data[i].Y2   = Y;
5207 
5208     N_VSplitLoop(i, &nthreads, &N, &thread_data[i].start, &thread_data[i].end);
5209 
5210     pthread_create(&threads[i], &attr, VaxpyVectorArray_PT, (void *) &thread_data[i]);
5211   }
5212 
5213   /* wait for all threads to finish */
5214   for (i=0; i<nthreads; i++)
5215     pthread_join(threads[i], NULL);
5216 
5217   /* clean up and return */
5218   pthread_attr_destroy(&attr);
5219   free(threads);
5220   free(thread_data);
5221 
5222   return(0);
5223 }
5224 
VaxpyVectorArray_PT(void * thread_data)5225 static void *VaxpyVectorArray_PT(void *thread_data)
5226 {
5227   Pthreads_Data *my_data;
5228   sunindextype  j, start, end;
5229 
5230   int       i;
5231   realtype  a;
5232   realtype* xd=NULL;
5233   realtype* yd=NULL;
5234 
5235   my_data = (Pthreads_Data *) thread_data;
5236   start   = my_data->start;
5237   end     = my_data->end;
5238   a       = my_data->c1;
5239 
5240   if (a == ONE) {
5241     for (i=0; i<my_data->nvec; i++) {
5242       xd = NV_DATA_PT(my_data->Y1[i]);
5243       yd = NV_DATA_PT(my_data->Y2[i]);
5244       for (j=start; j<end; j++)
5245         yd[j] += xd[j];
5246     }
5247     pthread_exit(NULL);
5248   }
5249 
5250   if (a == -ONE) {
5251     for (i=0; i<my_data->nvec; i++) {
5252       xd = NV_DATA_PT(my_data->Y1[i]);
5253       yd = NV_DATA_PT(my_data->Y2[i]);
5254       for (j=start; j<end; j++)
5255         yd[j] -= xd[j];
5256     }
5257     pthread_exit(NULL);
5258   }
5259 
5260   for (i=0; i<my_data->nvec; i++) {
5261     xd = NV_DATA_PT(my_data->Y1[i]);
5262     yd = NV_DATA_PT(my_data->Y2[i]);
5263     for (j=start; j<end; j++)
5264       yd[j] += a * xd[j];
5265   }
5266   pthread_exit(NULL);
5267 }
5268 
5269 
5270 /*
5271  * -----------------------------------------------------------------
5272  * private utility functions
5273  * -----------------------------------------------------------------
5274  */
5275 
5276 /* ----------------------------------------------------------------------------
5277  * Determine loop indices for a thread
5278  */
5279 
N_VSplitLoop(int myid,int * nthreads,sunindextype * N,sunindextype * start,sunindextype * end)5280 static void N_VSplitLoop(int myid, int *nthreads, sunindextype *N,
5281 			 sunindextype *start, sunindextype *end)
5282 {
5283   sunindextype q, r; /* quotient and remainder */
5284 
5285   /* work per thread and leftover work */
5286   q = *N / *nthreads;
5287   r = *N % *nthreads;
5288 
5289   /* assign work */
5290   if (myid < r) {
5291     *start = myid * q + myid;
5292     *end   = *start + q + 1;
5293   } else {
5294     *start = myid * q + r;
5295     *end   = *start + q;
5296   }
5297 }
5298 
5299 
5300 /* ----------------------------------------------------------------------------
5301  * Initialize values of local thread data struct
5302  */
5303 
N_VInitThreadData(Pthreads_Data * thread_data)5304 static void N_VInitThreadData(Pthreads_Data *thread_data)
5305 {
5306   thread_data->start = -1;
5307   thread_data->end   = -1;
5308 
5309 #if __STDC_VERSION__ >= 199901L
5310   thread_data->c1 = NAN;
5311   thread_data->c2 = NAN;
5312 #else
5313   thread_data->c1 = ZERO;
5314   thread_data->c2 = ZERO;
5315 #endif
5316 
5317   thread_data->v1 = NULL;
5318   thread_data->v2 = NULL;
5319   thread_data->v3 = NULL;
5320   thread_data->global_val = NULL;
5321   thread_data->global_mutex = NULL;
5322 
5323   thread_data->nvec = ZERO;
5324   thread_data->nsum = ZERO;
5325   thread_data->cvals = NULL;
5326   thread_data->Y1 = NULL;
5327   thread_data->Y2 = NULL;
5328   thread_data->Y3 = NULL;
5329 }
5330 
5331 
5332 /*
5333  * -----------------------------------------------------------------
5334  * Enable / Disable fused and vector array operations
5335  * -----------------------------------------------------------------
5336  */
5337 
N_VEnableFusedOps_Pthreads(N_Vector v,booleantype tf)5338 int N_VEnableFusedOps_Pthreads(N_Vector v, booleantype tf)
5339 {
5340   /* check that vector is non-NULL */
5341   if (v == NULL) return(-1);
5342 
5343   /* check that ops structure is non-NULL */
5344   if (v->ops == NULL) return(-1);
5345 
5346   if (tf) {
5347     /* enable all fused vector operations */
5348     v->ops->nvlinearcombination = N_VLinearCombination_Pthreads;
5349     v->ops->nvscaleaddmulti     = N_VScaleAddMulti_Pthreads;
5350     v->ops->nvdotprodmulti      = N_VDotProdMulti_Pthreads;
5351     /* enable all vector array operations */
5352     v->ops->nvlinearsumvectorarray         = N_VLinearSumVectorArray_Pthreads;
5353     v->ops->nvscalevectorarray             = N_VScaleVectorArray_Pthreads;
5354     v->ops->nvconstvectorarray             = N_VConstVectorArray_Pthreads;
5355     v->ops->nvwrmsnormvectorarray          = N_VWrmsNormVectorArray_Pthreads;
5356     v->ops->nvwrmsnormmaskvectorarray      = N_VWrmsNormMaskVectorArray_Pthreads;
5357     v->ops->nvscaleaddmultivectorarray     = N_VScaleAddMultiVectorArray_Pthreads;
5358     v->ops->nvlinearcombinationvectorarray = N_VLinearCombinationVectorArray_Pthreads;
5359   } else {
5360     /* disable all fused vector operations */
5361     v->ops->nvlinearcombination = NULL;
5362     v->ops->nvscaleaddmulti     = NULL;
5363     v->ops->nvdotprodmulti      = NULL;
5364     /* disable all vector array operations */
5365     v->ops->nvlinearsumvectorarray         = NULL;
5366     v->ops->nvscalevectorarray             = NULL;
5367     v->ops->nvconstvectorarray             = NULL;
5368     v->ops->nvwrmsnormvectorarray          = NULL;
5369     v->ops->nvwrmsnormmaskvectorarray      = NULL;
5370     v->ops->nvscaleaddmultivectorarray     = NULL;
5371     v->ops->nvlinearcombinationvectorarray = NULL;
5372   }
5373 
5374   /* return success */
5375   return(0);
5376 }
5377 
5378 
N_VEnableLinearCombination_Pthreads(N_Vector v,booleantype tf)5379 int N_VEnableLinearCombination_Pthreads(N_Vector v, booleantype tf)
5380 {
5381   /* check that vector is non-NULL */
5382   if (v == NULL) return(-1);
5383 
5384   /* check that ops structure is non-NULL */
5385   if (v->ops == NULL) return(-1);
5386 
5387   /* enable/disable operation */
5388   if (tf)
5389     v->ops->nvlinearcombination = N_VLinearCombination_Pthreads;
5390   else
5391     v->ops->nvlinearcombination = NULL;
5392 
5393   /* return success */
5394   return(0);
5395 }
5396 
N_VEnableScaleAddMulti_Pthreads(N_Vector v,booleantype tf)5397 int N_VEnableScaleAddMulti_Pthreads(N_Vector v, booleantype tf)
5398 {
5399   /* check that vector is non-NULL */
5400   if (v == NULL) return(-1);
5401 
5402   /* check that ops structure is non-NULL */
5403   if (v->ops == NULL) return(-1);
5404 
5405   /* enable/disable operation */
5406   if (tf)
5407     v->ops->nvscaleaddmulti = N_VScaleAddMulti_Pthreads;
5408   else
5409     v->ops->nvscaleaddmulti = NULL;
5410 
5411   /* return success */
5412   return(0);
5413 }
5414 
N_VEnableDotProdMulti_Pthreads(N_Vector v,booleantype tf)5415 int N_VEnableDotProdMulti_Pthreads(N_Vector v, booleantype tf)
5416 {
5417   /* check that vector is non-NULL */
5418   if (v == NULL) return(-1);
5419 
5420   /* check that ops structure is non-NULL */
5421   if (v->ops == NULL) return(-1);
5422 
5423   /* enable/disable operation */
5424   if (tf)
5425     v->ops->nvdotprodmulti = N_VDotProdMulti_Pthreads;
5426   else
5427     v->ops->nvdotprodmulti = NULL;
5428 
5429   /* return success */
5430   return(0);
5431 }
5432 
N_VEnableLinearSumVectorArray_Pthreads(N_Vector v,booleantype tf)5433 int N_VEnableLinearSumVectorArray_Pthreads(N_Vector v, booleantype tf)
5434 {
5435   /* check that vector is non-NULL */
5436   if (v == NULL) return(-1);
5437 
5438   /* check that ops structure is non-NULL */
5439   if (v->ops == NULL) return(-1);
5440 
5441   /* enable/disable operation */
5442   if (tf)
5443     v->ops->nvlinearsumvectorarray = N_VLinearSumVectorArray_Pthreads;
5444   else
5445     v->ops->nvlinearsumvectorarray = NULL;
5446 
5447   /* return success */
5448   return(0);
5449 }
5450 
N_VEnableScaleVectorArray_Pthreads(N_Vector v,booleantype tf)5451 int N_VEnableScaleVectorArray_Pthreads(N_Vector v, booleantype tf)
5452 {
5453   /* check that vector is non-NULL */
5454   if (v == NULL) return(-1);
5455 
5456   /* check that ops structure is non-NULL */
5457   if (v->ops == NULL) return(-1);
5458 
5459   /* enable/disable operation */
5460   if (tf)
5461     v->ops->nvscalevectorarray = N_VScaleVectorArray_Pthreads;
5462   else
5463     v->ops->nvscalevectorarray = NULL;
5464 
5465   /* return success */
5466   return(0);
5467 }
5468 
N_VEnableConstVectorArray_Pthreads(N_Vector v,booleantype tf)5469 int N_VEnableConstVectorArray_Pthreads(N_Vector v, booleantype tf)
5470 {
5471   /* check that vector is non-NULL */
5472   if (v == NULL) return(-1);
5473 
5474   /* check that ops structure is non-NULL */
5475   if (v->ops == NULL) return(-1);
5476 
5477   /* enable/disable operation */
5478   if (tf)
5479     v->ops->nvconstvectorarray = N_VConstVectorArray_Pthreads;
5480   else
5481     v->ops->nvconstvectorarray = NULL;
5482 
5483   /* return success */
5484   return(0);
5485 }
5486 
N_VEnableWrmsNormVectorArray_Pthreads(N_Vector v,booleantype tf)5487 int N_VEnableWrmsNormVectorArray_Pthreads(N_Vector v, booleantype tf)
5488 {
5489   /* check that vector is non-NULL */
5490   if (v == NULL) return(-1);
5491 
5492   /* check that ops structure is non-NULL */
5493   if (v->ops == NULL) return(-1);
5494 
5495   /* enable/disable operation */
5496   if (tf)
5497     v->ops->nvwrmsnormvectorarray = N_VWrmsNormVectorArray_Pthreads;
5498   else
5499     v->ops->nvwrmsnormvectorarray = NULL;
5500 
5501   /* return success */
5502   return(0);
5503 }
5504 
N_VEnableWrmsNormMaskVectorArray_Pthreads(N_Vector v,booleantype tf)5505 int N_VEnableWrmsNormMaskVectorArray_Pthreads(N_Vector v, booleantype tf)
5506 {
5507   /* check that vector is non-NULL */
5508   if (v == NULL) return(-1);
5509 
5510   /* check that ops structure is non-NULL */
5511   if (v->ops == NULL) return(-1);
5512 
5513   /* enable/disable operation */
5514   if (tf)
5515     v->ops->nvwrmsnormmaskvectorarray = N_VWrmsNormMaskVectorArray_Pthreads;
5516   else
5517     v->ops->nvwrmsnormmaskvectorarray = NULL;
5518 
5519   /* return success */
5520   return(0);
5521 }
5522 
N_VEnableScaleAddMultiVectorArray_Pthreads(N_Vector v,booleantype tf)5523 int N_VEnableScaleAddMultiVectorArray_Pthreads(N_Vector v, booleantype tf)
5524 {
5525   /* check that vector is non-NULL */
5526   if (v == NULL) return(-1);
5527 
5528   /* check that ops structure is non-NULL */
5529   if (v->ops == NULL) return(-1);
5530 
5531   /* enable/disable operation */
5532   if (tf)
5533     v->ops->nvscaleaddmultivectorarray = N_VScaleAddMultiVectorArray_Pthreads;
5534   else
5535     v->ops->nvscaleaddmultivectorarray = NULL;
5536 
5537   /* return success */
5538   return(0);
5539 }
5540 
N_VEnableLinearCombinationVectorArray_Pthreads(N_Vector v,booleantype tf)5541 int N_VEnableLinearCombinationVectorArray_Pthreads(N_Vector v, booleantype tf)
5542 {
5543   /* check that vector is non-NULL */
5544   if (v == NULL) return(-1);
5545 
5546   /* check that ops structure is non-NULL */
5547   if (v->ops == NULL) return(-1);
5548 
5549   /* enable/disable operation */
5550   if (tf)
5551     v->ops->nvlinearcombinationvectorarray = N_VLinearCombinationVectorArray_Pthreads;
5552   else
5553     v->ops->nvlinearcombinationvectorarray = NULL;
5554 
5555   /* return success */
5556   return(0);
5557 }
5558