1 /******************************************************************************
2  * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other
3  * HYPRE Project Developers. See the top-level COPYRIGHT file for details.
4  *
5  * SPDX-License-Identifier: (Apache-2.0 OR MIT)
6  ******************************************************************************/
7 
8 /******************************************************************************
9  *
10  * Member functions for hypre_Vector class.
11  *
12  *****************************************************************************/
13 
14 #include "seq_mv.h"
15 #include "_hypre_utilities.hpp" //RL: TODO vector_device.c, include cuda there
16 
17 /*--------------------------------------------------------------------------
18  * hypre_SeqVectorCreate
19  *--------------------------------------------------------------------------*/
20 
21 hypre_Vector *
hypre_SeqVectorCreate(HYPRE_Int size)22 hypre_SeqVectorCreate( HYPRE_Int size )
23 {
24    hypre_Vector  *vector;
25 
26    vector = hypre_CTAlloc(hypre_Vector, 1, HYPRE_MEMORY_HOST);
27 
28    hypre_VectorData(vector) = NULL;
29    hypre_VectorSize(vector) = size;
30 
31    hypre_VectorNumVectors(vector) = 1;
32    hypre_VectorMultiVecStorageMethod(vector) = 0;
33 
34    /* set defaults */
35    hypre_VectorOwnsData(vector) = 1;
36 
37    hypre_VectorMemoryLocation(vector) = hypre_HandleMemoryLocation(hypre_handle());
38 
39    return vector;
40 }
41 
42 /*--------------------------------------------------------------------------
43  * hypre_SeqMultiVectorCreate
44  *--------------------------------------------------------------------------*/
45 
46 hypre_Vector *
hypre_SeqMultiVectorCreate(HYPRE_Int size,HYPRE_Int num_vectors)47 hypre_SeqMultiVectorCreate( HYPRE_Int size, HYPRE_Int num_vectors )
48 {
49    hypre_Vector *vector = hypre_SeqVectorCreate(size);
50    hypre_VectorNumVectors(vector) = num_vectors;
51 
52    return vector;
53 }
54 
55 /*--------------------------------------------------------------------------
56  * hypre_SeqVectorDestroy
57  *--------------------------------------------------------------------------*/
58 
59 HYPRE_Int
hypre_SeqVectorDestroy(hypre_Vector * vector)60 hypre_SeqVectorDestroy( hypre_Vector *vector )
61 {
62    HYPRE_Int ierr=0;
63 
64    if (vector)
65    {
66       HYPRE_MemoryLocation memory_location = hypre_VectorMemoryLocation(vector);
67 
68       if ( hypre_VectorOwnsData(vector) )
69       {
70          hypre_TFree(hypre_VectorData(vector), memory_location);
71       }
72 
73       hypre_TFree(vector, HYPRE_MEMORY_HOST);
74    }
75 
76    return ierr;
77 }
78 
79 /*--------------------------------------------------------------------------
80  * hypre_SeqVectorInitialize
81  *--------------------------------------------------------------------------*/
82 
83 HYPRE_Int
hypre_SeqVectorInitialize_v2(hypre_Vector * vector,HYPRE_MemoryLocation memory_location)84 hypre_SeqVectorInitialize_v2( hypre_Vector *vector, HYPRE_MemoryLocation memory_location )
85 {
86    HYPRE_Int  size = hypre_VectorSize(vector);
87    HYPRE_Int  ierr = 0;
88    HYPRE_Int  num_vectors = hypre_VectorNumVectors(vector);
89    HYPRE_Int  multivec_storage_method = hypre_VectorMultiVecStorageMethod(vector);
90 
91    hypre_VectorMemoryLocation(vector) = memory_location;
92 
93    /* Caveat: for pre-existing data, the memory location must be guaranteed
94     * to be consistent with `memory_location'
95     * Otherwise, mismatches will exist and problems will be encountered
96     * when being used, and freed */
97    if ( !hypre_VectorData(vector) )
98    {
99       hypre_VectorData(vector) = hypre_CTAlloc(HYPRE_Complex, num_vectors*size, memory_location);
100    }
101 
102    if ( multivec_storage_method == 0 )
103    {
104       hypre_VectorVectorStride(vector) = size;
105       hypre_VectorIndexStride(vector) = 1;
106    }
107    else if ( multivec_storage_method == 1 )
108    {
109       hypre_VectorVectorStride(vector) = 1;
110       hypre_VectorIndexStride(vector) = num_vectors;
111    }
112    else
113    {
114       ++ierr;
115    }
116 
117    return ierr;
118 }
119 
120 HYPRE_Int
hypre_SeqVectorInitialize(hypre_Vector * vector)121 hypre_SeqVectorInitialize( hypre_Vector *vector )
122 {
123    HYPRE_Int ierr;
124 
125    ierr = hypre_SeqVectorInitialize_v2( vector, hypre_VectorMemoryLocation(vector) );
126 
127    return ierr;
128 }
129 
130 /*--------------------------------------------------------------------------
131  * hypre_SeqVectorSetDataOwner
132  *--------------------------------------------------------------------------*/
133 
134 HYPRE_Int
hypre_SeqVectorSetDataOwner(hypre_Vector * vector,HYPRE_Int owns_data)135 hypre_SeqVectorSetDataOwner( hypre_Vector *vector,
136                              HYPRE_Int     owns_data   )
137 {
138    HYPRE_Int    ierr=0;
139 
140    hypre_VectorOwnsData(vector) = owns_data;
141 
142    return ierr;
143 }
144 
145 /*--------------------------------------------------------------------------
146  * ReadVector
147  *--------------------------------------------------------------------------*/
148 
149 hypre_Vector *
hypre_SeqVectorRead(char * file_name)150 hypre_SeqVectorRead( char *file_name )
151 {
152    hypre_Vector  *vector;
153 
154    FILE    *fp;
155 
156    HYPRE_Complex *data;
157    HYPRE_Int      size;
158 
159    HYPRE_Int      j;
160 
161    /*----------------------------------------------------------
162     * Read in the data
163     *----------------------------------------------------------*/
164 
165    fp = fopen(file_name, "r");
166 
167    hypre_fscanf(fp, "%d", &size);
168 
169    vector = hypre_SeqVectorCreate(size);
170 
171    hypre_VectorMemoryLocation(vector) = HYPRE_MEMORY_HOST;
172 
173    hypre_SeqVectorInitialize(vector);
174 
175    data = hypre_VectorData(vector);
176    for (j = 0; j < size; j++)
177    {
178       hypre_fscanf(fp, "%le", &data[j]);
179    }
180 
181    fclose(fp);
182 
183    /* multivector code not written yet */
184    hypre_assert( hypre_VectorNumVectors(vector) == 1 );
185 
186    return vector;
187 }
188 
189 /*--------------------------------------------------------------------------
190  * hypre_SeqVectorPrint
191  *--------------------------------------------------------------------------*/
192 
193 HYPRE_Int
hypre_SeqVectorPrint(hypre_Vector * vector,char * file_name)194 hypre_SeqVectorPrint( hypre_Vector *vector,
195                       char         *file_name )
196 {
197    FILE    *fp;
198 
199    HYPRE_Complex *data;
200    HYPRE_Int      size, num_vectors, vecstride, idxstride;
201 
202    HYPRE_Int      i, j;
203    HYPRE_Complex  value;
204 
205    HYPRE_Int      ierr = 0;
206 
207    num_vectors = hypre_VectorNumVectors(vector);
208    vecstride = hypre_VectorVectorStride(vector);
209    idxstride = hypre_VectorIndexStride(vector);
210 
211    /*----------------------------------------------------------
212     * Print in the data
213     *----------------------------------------------------------*/
214 
215    data = hypre_VectorData(vector);
216    size = hypre_VectorSize(vector);
217 
218    fp = fopen(file_name, "w");
219 
220    if ( hypre_VectorNumVectors(vector) == 1 )
221    {
222       hypre_fprintf(fp, "%d\n", size);
223    }
224    else
225    {
226       hypre_fprintf(fp, "%d vectors of size %d\n", num_vectors, size );
227    }
228 
229    if ( num_vectors>1 )
230    {
231       for ( j=0; j<num_vectors; ++j )
232       {
233          hypre_fprintf(fp, "vector %d\n", j );
234          for (i = 0; i < size; i++)
235          {
236             value = data[ j*vecstride + i*idxstride ];
237 #ifdef HYPRE_COMPLEX
238             hypre_fprintf(fp, "%.14e , %.14e\n",
239                           hypre_creal(value), hypre_cimag(value));
240 #else
241             hypre_fprintf(fp, "%.14e\n", value);
242 #endif
243          }
244       }
245    }
246    else
247    {
248       for (i = 0; i < size; i++)
249       {
250 #ifdef HYPRE_COMPLEX
251          hypre_fprintf(fp, "%.14e , %.14e\n",
252                        hypre_creal(data[i]), hypre_cimag(data[i]));
253 #else
254          hypre_fprintf(fp, "%.14e\n", data[i]);
255 #endif
256       }
257    }
258 
259    fclose(fp);
260 
261    return ierr;
262 }
263 
264 /*--------------------------------------------------------------------------
265  * hypre_SeqVectorSetConstantValues
266  *--------------------------------------------------------------------------*/
267 
268 HYPRE_Int
hypre_SeqVectorSetConstantValues(hypre_Vector * v,HYPRE_Complex value)269 hypre_SeqVectorSetConstantValues( hypre_Vector *v,
270                                   HYPRE_Complex value )
271 {
272 #ifdef HYPRE_PROFILE
273    hypre_profile_times[HYPRE_TIMER_ID_BLAS1] -= hypre_MPI_Wtime();
274 #endif
275 
276    HYPRE_Complex *vector_data = hypre_VectorData(v);
277    HYPRE_Int      size        = hypre_VectorSize(v);
278    HYPRE_Int      ierr  = 0;
279 
280    size *= hypre_VectorNumVectors(v);
281 
282    //hypre_SeqVectorPrefetch(v, HYPRE_MEMORY_DEVICE);
283 
284 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
285    if (size > 0)
286    {
287       HYPRE_THRUST_CALL( fill_n, vector_data, size, value );
288    }
289 #else
290    HYPRE_Int i;
291 #if defined(HYPRE_USING_DEVICE_OPENMP)
292 #pragma omp target teams distribute parallel for private(i) is_device_ptr(vector_data)
293 #elif defined(HYPRE_USING_OPENMP)
294 #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE
295 #endif
296    for (i = 0; i < size; i++)
297    {
298       vector_data[i] = value;
299    }
300 #endif /* defined(HYPRE_USING_CUDA)  || defined(HYPRE_USING_HIP) */
301 
302 #if defined(HYPRE_USING_GPU)
303    hypre_SyncCudaComputeStream(hypre_handle());
304 #endif
305 
306 #ifdef HYPRE_PROFILE
307    hypre_profile_times[HYPRE_TIMER_ID_BLAS1] += hypre_MPI_Wtime();
308 #endif
309 
310    return ierr;
311 }
312 
313 /*--------------------------------------------------------------------------
314  * hypre_SeqVectorSetRandomValues
315  *
316  *     returns vector of values randomly distributed between -1.0 and +1.0
317  *--------------------------------------------------------------------------*/
318 
319 HYPRE_Int
hypre_SeqVectorSetRandomValues(hypre_Vector * v,HYPRE_Int seed)320 hypre_SeqVectorSetRandomValues( hypre_Vector *v,
321                                 HYPRE_Int     seed )
322 {
323    HYPRE_Complex *vector_data = hypre_VectorData(v);
324    HYPRE_Int      size        = hypre_VectorSize(v);
325    HYPRE_Int      i;
326    HYPRE_Int      ierr  = 0;
327    hypre_SeedRand(seed);
328 
329    size *= hypre_VectorNumVectors(v);
330 
331    if (hypre_GetActualMemLocation(hypre_VectorMemoryLocation(v)) == hypre_MEMORY_HOST)
332    {
333       /* RDF: threading this loop may cause problems because of hypre_Rand() */
334       for (i = 0; i < size; i++)
335       {
336          vector_data[i] = 2.0 * hypre_Rand() - 1.0;
337       }
338    }
339    else
340    {
341       HYPRE_Complex *h_data = hypre_TAlloc(HYPRE_Complex, size, HYPRE_MEMORY_HOST);
342       for (i = 0; i < size; i++)
343       {
344          h_data[i] = 2.0 * hypre_Rand() - 1.0;
345       }
346       hypre_TMemcpy(vector_data, h_data, HYPRE_Complex, size, hypre_VectorMemoryLocation(v), HYPRE_MEMORY_HOST);
347       hypre_TFree(h_data, HYPRE_MEMORY_HOST);
348    }
349 
350    return ierr;
351 }
352 
353 /*--------------------------------------------------------------------------
354  * hypre_SeqVectorCopy
355  * copies data from x to y
356  * if size of x is larger than y only the first size_y elements of x are
357  * copied to y
358  *--------------------------------------------------------------------------*/
359 HYPRE_Int
hypre_SeqVectorCopy(hypre_Vector * x,hypre_Vector * y)360 hypre_SeqVectorCopy( hypre_Vector *x,
361                      hypre_Vector *y )
362 {
363 #ifdef HYPRE_PROFILE
364    hypre_profile_times[HYPRE_TIMER_ID_BLAS1] -= hypre_MPI_Wtime();
365 #endif
366 
367    HYPRE_Int ierr = 0;
368 
369    size_t size = hypre_min( hypre_VectorSize(x), hypre_VectorSize(y) ) * hypre_VectorNumVectors(x);
370 
371    hypre_TMemcpy( hypre_VectorData(y),
372                   hypre_VectorData(x),
373                   HYPRE_Complex,
374                   size,
375                   hypre_VectorMemoryLocation(y),
376                   hypre_VectorMemoryLocation(x) );
377 
378 #ifdef HYPRE_PROFILE
379    hypre_profile_times[HYPRE_TIMER_ID_BLAS1] += hypre_MPI_Wtime();
380 #endif
381 
382    return ierr;
383 }
384 
385 /*--------------------------------------------------------------------------
386  * hypre_SeqVectorCloneDeep
387  * Returns a complete copy of x - a deep copy, with its own copy of the data.
388  *--------------------------------------------------------------------------*/
389 
390 hypre_Vector*
hypre_SeqVectorCloneDeep_v2(hypre_Vector * x,HYPRE_MemoryLocation memory_location)391 hypre_SeqVectorCloneDeep_v2( hypre_Vector *x, HYPRE_MemoryLocation memory_location )
392 {
393    HYPRE_Int      size          = hypre_VectorSize(x);
394    HYPRE_Int      num_vectors   = hypre_VectorNumVectors(x);
395 
396    hypre_Vector *y = hypre_SeqMultiVectorCreate( size, num_vectors );
397 
398    hypre_VectorMultiVecStorageMethod(y) = hypre_VectorMultiVecStorageMethod(x);
399    hypre_VectorVectorStride(y) = hypre_VectorVectorStride(x);
400    hypre_VectorIndexStride(y) = hypre_VectorIndexStride(x);
401 
402    hypre_SeqVectorInitialize_v2(y, memory_location);
403    hypre_SeqVectorCopy( x, y );
404 
405    return y;
406 }
407 
408 hypre_Vector*
hypre_SeqVectorCloneDeep(hypre_Vector * x)409 hypre_SeqVectorCloneDeep( hypre_Vector *x )
410 {
411    return hypre_SeqVectorCloneDeep_v2(x, hypre_VectorMemoryLocation(x));
412 }
413 
414 /*--------------------------------------------------------------------------
415  * hypre_SeqVectorCloneShallow
416  * Returns a complete copy of x - a shallow copy, pointing the data of x
417  *--------------------------------------------------------------------------*/
418 
419 hypre_Vector *
hypre_SeqVectorCloneShallow(hypre_Vector * x)420 hypre_SeqVectorCloneShallow( hypre_Vector *x )
421 {
422    HYPRE_Int      size   = hypre_VectorSize(x);
423    HYPRE_Int      num_vectors   = hypre_VectorNumVectors(x);
424    hypre_Vector * y = hypre_SeqMultiVectorCreate( size, num_vectors );
425 
426    hypre_VectorMultiVecStorageMethod(y) = hypre_VectorMultiVecStorageMethod(x);
427    hypre_VectorVectorStride(y) = hypre_VectorVectorStride(x);
428    hypre_VectorIndexStride(y) = hypre_VectorIndexStride(x);
429 
430    hypre_VectorMemoryLocation(y) = hypre_VectorMemoryLocation(x);
431 
432    hypre_VectorData(y) = hypre_VectorData(x);
433    hypre_SeqVectorSetDataOwner( y, 0 );
434    hypre_SeqVectorInitialize(y);
435 
436    return y;
437 }
438 
439 /*--------------------------------------------------------------------------
440  * hypre_SeqVectorScale
441  *--------------------------------------------------------------------------*/
442 HYPRE_Int
hypre_SeqVectorScale(HYPRE_Complex alpha,hypre_Vector * y)443 hypre_SeqVectorScale( HYPRE_Complex alpha,
444                       hypre_Vector *y )
445 {
446    /* special cases */
447    if (alpha == 1.0)
448    {
449       return 0;
450    }
451 
452    if (alpha == 0.0)
453    {
454       return hypre_SeqVectorSetConstantValues(y, 0.0);
455    }
456 
457 #ifdef HYPRE_PROFILE
458    hypre_profile_times[HYPRE_TIMER_ID_BLAS1] -= hypre_MPI_Wtime();
459 #endif
460 
461    HYPRE_Complex *y_data = hypre_VectorData(y);
462    HYPRE_Int      size   = hypre_VectorSize(y);
463    HYPRE_Int      ierr = 0;
464 
465    size *= hypre_VectorNumVectors(y);
466 
467    //hypre_SeqVectorPrefetch(y, HYPRE_MEMORY_DEVICE);
468 
469 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
470 #if defined(HYPRE_USING_CUBLAS)
471    HYPRE_CUBLAS_CALL( cublasDscal(hypre_HandleCublasHandle(hypre_handle()), size, &alpha, y_data, 1) );
472 #else
473    HYPRE_THRUST_CALL( transform, y_data, y_data + size, y_data, alpha * _1 );
474 #endif
475 #else
476    HYPRE_Int i;
477 #if defined(HYPRE_USING_DEVICE_OPENMP)
478 #pragma omp target teams distribute parallel for private(i) is_device_ptr(y_data)
479 #elif defined(HYPRE_USING_OPENMP)
480 #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE
481 #endif
482    for (i = 0; i < size; i++)
483    {
484       y_data[i] *= alpha;
485    }
486 
487 #endif /* defined(HYPRE_USING_CUDA)  || defined(HYPRE_USING_HIP) */
488 
489 #if defined(HYPRE_USING_GPU)
490    hypre_SyncCudaComputeStream(hypre_handle());
491 #endif
492 
493 #ifdef HYPRE_PROFILE
494    hypre_profile_times[HYPRE_TIMER_ID_BLAS1] += hypre_MPI_Wtime();
495 #endif
496 
497    return ierr;
498 }
499 
500 /*--------------------------------------------------------------------------
501  * hypre_SeqVectorAxpy
502  *--------------------------------------------------------------------------*/
503 HYPRE_Int
hypre_SeqVectorAxpy(HYPRE_Complex alpha,hypre_Vector * x,hypre_Vector * y)504 hypre_SeqVectorAxpy( HYPRE_Complex alpha,
505                      hypre_Vector *x,
506                      hypre_Vector *y )
507 {
508 #ifdef HYPRE_PROFILE
509    hypre_profile_times[HYPRE_TIMER_ID_BLAS1] -= hypre_MPI_Wtime();
510 #endif
511 
512    HYPRE_Complex *x_data = hypre_VectorData(x);
513    HYPRE_Complex *y_data = hypre_VectorData(y);
514    HYPRE_Int      size   = hypre_VectorSize(x);
515    HYPRE_Int      ierr = 0;
516 
517    size *= hypre_VectorNumVectors(x);
518 
519    //hypre_SeqVectorPrefetch(x, HYPRE_MEMORY_DEVICE);
520    //hypre_SeqVectorPrefetch(y, HYPRE_MEMORY_DEVICE);
521 
522 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
523 #if defined(HYPRE_USING_CUBLAS)
524    HYPRE_CUBLAS_CALL( cublasDaxpy(hypre_HandleCublasHandle(hypre_handle()), size, &alpha, x_data, 1, y_data, 1) );
525 #else
526    HYPRE_THRUST_CALL( transform, x_data, x_data + size, y_data, y_data, alpha * _1 + _2 );
527 #endif
528 #else
529    HYPRE_Int i;
530 #if defined(HYPRE_USING_DEVICE_OPENMP)
531 #pragma omp target teams distribute parallel for private(i) is_device_ptr(y_data, x_data)
532 #elif defined(HYPRE_USING_OPENMP)
533 #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE
534 #endif
535    for (i = 0; i < size; i++)
536    {
537       y_data[i] += alpha * x_data[i];
538    }
539 
540 #endif /* defined(HYPRE_USING_CUDA)  || defined(HYPRE_USING_HIP) */
541 
542 #if defined(HYPRE_USING_GPU)
543    hypre_SyncCudaComputeStream(hypre_handle());
544 #endif
545 
546 #ifdef HYPRE_PROFILE
547    hypre_profile_times[HYPRE_TIMER_ID_BLAS1] += hypre_MPI_Wtime();
548 #endif
549 
550    return ierr;
551 }
552 
553 /* y = y + x ./ b */
554 HYPRE_Int
hypre_SeqVectorElmdivpy(hypre_Vector * x,hypre_Vector * b,hypre_Vector * y)555 hypre_SeqVectorElmdivpy( hypre_Vector *x,
556                          hypre_Vector *b,
557                          hypre_Vector *y )
558 {
559 #ifdef HYPRE_PROFILE
560    hypre_profile_times[HYPRE_TIMER_ID_BLAS1] -= hypre_MPI_Wtime();
561 #endif
562 
563    HYPRE_Complex *x_data = hypre_VectorData(x);
564    HYPRE_Complex *b_data = hypre_VectorData(b);
565    HYPRE_Complex *y_data = hypre_VectorData(y);
566    HYPRE_Int      size   = hypre_VectorSize(b);
567 
568 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
569    //HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy2( hypre_VectorMemoryLocation(x), hypre_VectorMemoryLocation(b) );
570    //RL: TODO back to hypre_GetExecPolicy2 later
571    HYPRE_ExecutionPolicy exec = HYPRE_EXEC_DEVICE;
572    if (exec == HYPRE_EXEC_DEVICE)
573    {
574       //TODO
575       //hypre_SeqVectorElmdivpyDevice(x, b, y);
576       /*
577 #if defined(HYPRE_USING_DEVICE_OPENMP)
578 #pragma omp target teams distribute parallel for private(i) is_device_ptr(u_data,v_data,l1_norms)
579 #endif
580       */
581       hypreDevice_IVAXPY(size, b_data, x_data, y_data);
582    }
583    else
584 #endif
585    {
586       HYPRE_Int i;
587 #ifdef HYPRE_USING_OPENMP
588 #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE
589 #endif
590       for (i = 0; i < size; i++)
591       {
592          y_data[i] += x_data[i] / b_data[i];
593       }
594    }
595 
596 #if defined(HYPRE_USING_GPU)
597    hypre_SyncCudaComputeStream(hypre_handle());
598 #endif
599 
600 #ifdef HYPRE_PROFILE
601    hypre_profile_times[HYPRE_TIMER_ID_BLAS1] += hypre_MPI_Wtime();
602 #endif
603 
604    return hypre_error_flag;
605 }
606 
607 /* y[i] += x[i] / b[i] where marker[i] == marker_val */
608 HYPRE_Int
hypre_SeqVectorElmdivpyMarked(hypre_Vector * x,hypre_Vector * b,hypre_Vector * y,HYPRE_Int * marker,HYPRE_Int marker_val)609 hypre_SeqVectorElmdivpyMarked( hypre_Vector *x,
610                                hypre_Vector *b,
611                                hypre_Vector *y,
612                                HYPRE_Int    *marker,
613                                HYPRE_Int     marker_val)
614 {
615 #ifdef HYPRE_PROFILE
616    hypre_profile_times[HYPRE_TIMER_ID_BLAS1] -= hypre_MPI_Wtime();
617 #endif
618 
619    HYPRE_Complex *x_data = hypre_VectorData(x);
620    HYPRE_Complex *b_data = hypre_VectorData(b);
621    HYPRE_Complex *y_data = hypre_VectorData(y);
622    HYPRE_Int      size   = hypre_VectorSize(b);
623 
624 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
625    HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy2( hypre_VectorMemoryLocation(x), hypre_VectorMemoryLocation(b) );
626    if (exec == HYPRE_EXEC_DEVICE)
627    {
628       hypreDevice_IVAXPYMarked(size, b_data, x_data, y_data, marker, marker_val);
629    }
630    else
631 #endif
632    {
633       HYPRE_Int i;
634 #ifdef HYPRE_USING_OPENMP
635 #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE
636 #endif
637       for (i = 0; i < size; i++)
638       {
639          if (marker[i] == marker_val)
640          {
641             y_data[i] += x_data[i] / b_data[i];
642          }
643       }
644    }
645 
646 #if defined(HYPRE_USING_GPU)
647    hypre_SyncCudaComputeStream(hypre_handle());
648 #endif
649 
650 #ifdef HYPRE_PROFILE
651    hypre_profile_times[HYPRE_TIMER_ID_BLAS1] += hypre_MPI_Wtime();
652 #endif
653 
654    return hypre_error_flag;
655 }
656 
657 /*--------------------------------------------------------------------------
658  * hypre_SeqVectorInnerProd
659  *--------------------------------------------------------------------------*/
660 HYPRE_Real
hypre_SeqVectorInnerProd(hypre_Vector * x,hypre_Vector * y)661 hypre_SeqVectorInnerProd( hypre_Vector *x,
662                           hypre_Vector *y )
663 {
664 #ifdef HYPRE_PROFILE
665    hypre_profile_times[HYPRE_TIMER_ID_BLAS1] -= hypre_MPI_Wtime();
666 #endif
667 
668    HYPRE_Complex *x_data = hypre_VectorData(x);
669    HYPRE_Complex *y_data = hypre_VectorData(y);
670    HYPRE_Int      size   = hypre_VectorSize(x);
671    HYPRE_Real     result = 0.0;
672 
673    size *= hypre_VectorNumVectors(x);
674 
675    //hypre_SeqVectorPrefetch(x, HYPRE_MEMORY_DEVICE);
676    //hypre_SeqVectorPrefetch(y, HYPRE_MEMORY_DEVICE);
677 
678 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
679 #ifndef HYPRE_COMPLEX
680 #if defined(HYPRE_USING_CUBLAS)
681    HYPRE_CUBLAS_CALL( cublasDdot(hypre_HandleCublasHandle(hypre_handle()), size, x_data, 1, y_data, 1, &result) );
682 #else
683    result = HYPRE_THRUST_CALL( inner_product, x_data, x_data + size, y_data, 0.0 );
684 #endif
685 #else
686    /* TODO */
687 #error "Complex inner product"
688 #endif
689 #else /* #if defined(HYPRE_USING_CUDA)  || defined(HYPRE_USING_HIP) */
690    HYPRE_Int i;
691 #if defined(HYPRE_USING_DEVICE_OPENMP)
692 #pragma omp target teams  distribute  parallel for private(i) reduction(+:result) is_device_ptr(y_data,x_data) map(result)
693 #elif defined(HYPRE_USING_OPENMP)
694 #pragma omp parallel for private(i) reduction(+:result) HYPRE_SMP_SCHEDULE
695 #endif
696    for (i = 0; i < size; i++)
697    {
698      result += hypre_conj(y_data[i]) * x_data[i];
699    }
700 #endif /* defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) */
701 
702 #if defined(HYPRE_USING_GPU)
703    hypre_SyncCudaComputeStream(hypre_handle());
704 #endif
705 
706 #ifdef HYPRE_PROFILE
707    hypre_profile_times[HYPRE_TIMER_ID_BLAS1] += hypre_MPI_Wtime();
708 #endif
709 
710    return result;
711 }
712 
713 //TODO
714 
715 /*--------------------------------------------------------------------------
716  * hypre_VectorSumElts:
717  * Returns the sum of all vector elements.
718  *--------------------------------------------------------------------------*/
719 
hypre_SeqVectorSumElts(hypre_Vector * vector)720 HYPRE_Complex hypre_SeqVectorSumElts( hypre_Vector *vector )
721 {
722    HYPRE_Complex  sum = 0;
723    HYPRE_Complex *data = hypre_VectorData( vector );
724    HYPRE_Int      size = hypre_VectorSize( vector );
725    HYPRE_Int      i;
726 
727 #ifdef HYPRE_USING_OPENMP
728 #pragma omp parallel for private(i) reduction(+:sum) HYPRE_SMP_SCHEDULE
729 #endif
730    for ( i=0; i<size; ++i ) sum += data[i];
731 
732    return sum;
733 }
734 
735 HYPRE_Int
hypre_SeqVectorPrefetch(hypre_Vector * x,HYPRE_MemoryLocation memory_location)736 hypre_SeqVectorPrefetch( hypre_Vector *x, HYPRE_MemoryLocation memory_location)
737 {
738    HYPRE_Int      ierr = 0;
739 #ifdef HYPRE_USING_UNIFIED_MEMORY
740    if (hypre_VectorMemoryLocation(x) != HYPRE_MEMORY_DEVICE)
741    {
742       /* hypre_error_w_msg(HYPRE_ERROR_GENERIC," Error! CUDA Prefetch with non-unified momory\n");*/
743       return 1;
744    }
745 
746    HYPRE_Complex *x_data = hypre_VectorData(x);
747    HYPRE_Int      size   = hypre_VectorSize(x) * hypre_VectorNumVectors(x);
748 
749    if (size == 0)
750    {
751       return ierr;
752    }
753 
754    hypre_MemPrefetch(x_data, sizeof(HYPRE_Complex)*size, memory_location);
755 #endif
756 
757    return ierr;
758 }
759 
760 #if 0
761 /* y[i] = max(alpha*x[i], beta*y[i]) */
762 HYPRE_Int
763 hypre_SeqVectorMax( HYPRE_Complex alpha,
764                     hypre_Vector *x,
765                     HYPRE_Complex beta,
766                     hypre_Vector *y     )
767 {
768 #ifdef HYPRE_PROFILE
769    hypre_profile_times[HYPRE_TIMER_ID_BLAS1] -= hypre_MPI_Wtime();
770 #endif
771 
772    HYPRE_Complex *x_data = hypre_VectorData(x);
773    HYPRE_Complex *y_data = hypre_VectorData(y);
774    HYPRE_Int      size   = hypre_VectorSize(x);
775    HYPRE_Int      ierr = 0;
776 
777    size *= hypre_VectorNumVectors(x);
778 
779    //hypre_SeqVectorPrefetch(x, HYPRE_MEMORY_DEVICE);
780    //hypre_SeqVectorPrefetch(y, HYPRE_MEMORY_DEVICE);
781 
782    thrust::maximum<HYPRE_Complex> mx;
783 
784 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
785    HYPRE_THRUST_CALL( transform,
786                       thrust::make_transform_iterator(x_data,        alpha * _1),
787                       thrust::make_transform_iterator(x_data + size, alpha * _1),
788                       thrust::make_transform_iterator(y_data,        beta  * _1),
789                       y_data,
790                       mx );
791 #else
792    HYPRE_Int i;
793 #if defined(HYPRE_USING_DEVICE_OPENMP)
794 #pragma omp target teams distribute parallel for private(i) is_device_ptr(y_data, x_data)
795 #elif defined(HYPRE_USING_OPENMP)
796 #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE
797 #endif
798    for (i = 0; i < size; i++)
799    {
800       y_data[i] += hypre_max(alpha * x_data[i], beta * y_data[i]);
801    }
802 
803 #endif /* defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) */
804 
805    hypre_SyncCudaComputeStream(hypre_handle());
806 
807 #ifdef HYPRE_PROFILE
808    hypre_profile_times[HYPRE_TIMER_ID_BLAS1] += hypre_MPI_Wtime();
809 #endif
810 
811    return ierr;
812 }
813 #endif
814