1 /******************************************************************************
2 * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other
3 * HYPRE Project Developers. See the top-level COPYRIGHT file for details.
4 *
5 * SPDX-License-Identifier: (Apache-2.0 OR MIT)
6 ******************************************************************************/
7
8 /******************************************************************************
9 *
10 * Member functions for hypre_Vector class.
11 *
12 *****************************************************************************/
13
14 #include "seq_mv.h"
15 #include "_hypre_utilities.hpp" //RL: TODO vector_device.c, include cuda there
16
17 /*--------------------------------------------------------------------------
18 * hypre_SeqVectorCreate
19 *--------------------------------------------------------------------------*/
20
21 hypre_Vector *
hypre_SeqVectorCreate(HYPRE_Int size)22 hypre_SeqVectorCreate( HYPRE_Int size )
23 {
24 hypre_Vector *vector;
25
26 vector = hypre_CTAlloc(hypre_Vector, 1, HYPRE_MEMORY_HOST);
27
28 hypre_VectorData(vector) = NULL;
29 hypre_VectorSize(vector) = size;
30
31 hypre_VectorNumVectors(vector) = 1;
32 hypre_VectorMultiVecStorageMethod(vector) = 0;
33
34 /* set defaults */
35 hypre_VectorOwnsData(vector) = 1;
36
37 hypre_VectorMemoryLocation(vector) = hypre_HandleMemoryLocation(hypre_handle());
38
39 return vector;
40 }
41
42 /*--------------------------------------------------------------------------
43 * hypre_SeqMultiVectorCreate
44 *--------------------------------------------------------------------------*/
45
46 hypre_Vector *
hypre_SeqMultiVectorCreate(HYPRE_Int size,HYPRE_Int num_vectors)47 hypre_SeqMultiVectorCreate( HYPRE_Int size, HYPRE_Int num_vectors )
48 {
49 hypre_Vector *vector = hypre_SeqVectorCreate(size);
50 hypre_VectorNumVectors(vector) = num_vectors;
51
52 return vector;
53 }
54
55 /*--------------------------------------------------------------------------
56 * hypre_SeqVectorDestroy
57 *--------------------------------------------------------------------------*/
58
59 HYPRE_Int
hypre_SeqVectorDestroy(hypre_Vector * vector)60 hypre_SeqVectorDestroy( hypre_Vector *vector )
61 {
62 HYPRE_Int ierr=0;
63
64 if (vector)
65 {
66 HYPRE_MemoryLocation memory_location = hypre_VectorMemoryLocation(vector);
67
68 if ( hypre_VectorOwnsData(vector) )
69 {
70 hypre_TFree(hypre_VectorData(vector), memory_location);
71 }
72
73 hypre_TFree(vector, HYPRE_MEMORY_HOST);
74 }
75
76 return ierr;
77 }
78
79 /*--------------------------------------------------------------------------
80 * hypre_SeqVectorInitialize
81 *--------------------------------------------------------------------------*/
82
83 HYPRE_Int
hypre_SeqVectorInitialize_v2(hypre_Vector * vector,HYPRE_MemoryLocation memory_location)84 hypre_SeqVectorInitialize_v2( hypre_Vector *vector, HYPRE_MemoryLocation memory_location )
85 {
86 HYPRE_Int size = hypre_VectorSize(vector);
87 HYPRE_Int ierr = 0;
88 HYPRE_Int num_vectors = hypre_VectorNumVectors(vector);
89 HYPRE_Int multivec_storage_method = hypre_VectorMultiVecStorageMethod(vector);
90
91 hypre_VectorMemoryLocation(vector) = memory_location;
92
93 /* Caveat: for pre-existing data, the memory location must be guaranteed
94 * to be consistent with `memory_location'
95 * Otherwise, mismatches will exist and problems will be encountered
96 * when being used, and freed */
97 if ( !hypre_VectorData(vector) )
98 {
99 hypre_VectorData(vector) = hypre_CTAlloc(HYPRE_Complex, num_vectors*size, memory_location);
100 }
101
102 if ( multivec_storage_method == 0 )
103 {
104 hypre_VectorVectorStride(vector) = size;
105 hypre_VectorIndexStride(vector) = 1;
106 }
107 else if ( multivec_storage_method == 1 )
108 {
109 hypre_VectorVectorStride(vector) = 1;
110 hypre_VectorIndexStride(vector) = num_vectors;
111 }
112 else
113 {
114 ++ierr;
115 }
116
117 return ierr;
118 }
119
120 HYPRE_Int
hypre_SeqVectorInitialize(hypre_Vector * vector)121 hypre_SeqVectorInitialize( hypre_Vector *vector )
122 {
123 HYPRE_Int ierr;
124
125 ierr = hypre_SeqVectorInitialize_v2( vector, hypre_VectorMemoryLocation(vector) );
126
127 return ierr;
128 }
129
130 /*--------------------------------------------------------------------------
131 * hypre_SeqVectorSetDataOwner
132 *--------------------------------------------------------------------------*/
133
134 HYPRE_Int
hypre_SeqVectorSetDataOwner(hypre_Vector * vector,HYPRE_Int owns_data)135 hypre_SeqVectorSetDataOwner( hypre_Vector *vector,
136 HYPRE_Int owns_data )
137 {
138 HYPRE_Int ierr=0;
139
140 hypre_VectorOwnsData(vector) = owns_data;
141
142 return ierr;
143 }
144
145 /*--------------------------------------------------------------------------
146 * ReadVector
147 *--------------------------------------------------------------------------*/
148
149 hypre_Vector *
hypre_SeqVectorRead(char * file_name)150 hypre_SeqVectorRead( char *file_name )
151 {
152 hypre_Vector *vector;
153
154 FILE *fp;
155
156 HYPRE_Complex *data;
157 HYPRE_Int size;
158
159 HYPRE_Int j;
160
161 /*----------------------------------------------------------
162 * Read in the data
163 *----------------------------------------------------------*/
164
165 fp = fopen(file_name, "r");
166
167 hypre_fscanf(fp, "%d", &size);
168
169 vector = hypre_SeqVectorCreate(size);
170
171 hypre_VectorMemoryLocation(vector) = HYPRE_MEMORY_HOST;
172
173 hypre_SeqVectorInitialize(vector);
174
175 data = hypre_VectorData(vector);
176 for (j = 0; j < size; j++)
177 {
178 hypre_fscanf(fp, "%le", &data[j]);
179 }
180
181 fclose(fp);
182
183 /* multivector code not written yet */
184 hypre_assert( hypre_VectorNumVectors(vector) == 1 );
185
186 return vector;
187 }
188
189 /*--------------------------------------------------------------------------
190 * hypre_SeqVectorPrint
191 *--------------------------------------------------------------------------*/
192
193 HYPRE_Int
hypre_SeqVectorPrint(hypre_Vector * vector,char * file_name)194 hypre_SeqVectorPrint( hypre_Vector *vector,
195 char *file_name )
196 {
197 FILE *fp;
198
199 HYPRE_Complex *data;
200 HYPRE_Int size, num_vectors, vecstride, idxstride;
201
202 HYPRE_Int i, j;
203 HYPRE_Complex value;
204
205 HYPRE_Int ierr = 0;
206
207 num_vectors = hypre_VectorNumVectors(vector);
208 vecstride = hypre_VectorVectorStride(vector);
209 idxstride = hypre_VectorIndexStride(vector);
210
211 /*----------------------------------------------------------
212 * Print in the data
213 *----------------------------------------------------------*/
214
215 data = hypre_VectorData(vector);
216 size = hypre_VectorSize(vector);
217
218 fp = fopen(file_name, "w");
219
220 if ( hypre_VectorNumVectors(vector) == 1 )
221 {
222 hypre_fprintf(fp, "%d\n", size);
223 }
224 else
225 {
226 hypre_fprintf(fp, "%d vectors of size %d\n", num_vectors, size );
227 }
228
229 if ( num_vectors>1 )
230 {
231 for ( j=0; j<num_vectors; ++j )
232 {
233 hypre_fprintf(fp, "vector %d\n", j );
234 for (i = 0; i < size; i++)
235 {
236 value = data[ j*vecstride + i*idxstride ];
237 #ifdef HYPRE_COMPLEX
238 hypre_fprintf(fp, "%.14e , %.14e\n",
239 hypre_creal(value), hypre_cimag(value));
240 #else
241 hypre_fprintf(fp, "%.14e\n", value);
242 #endif
243 }
244 }
245 }
246 else
247 {
248 for (i = 0; i < size; i++)
249 {
250 #ifdef HYPRE_COMPLEX
251 hypre_fprintf(fp, "%.14e , %.14e\n",
252 hypre_creal(data[i]), hypre_cimag(data[i]));
253 #else
254 hypre_fprintf(fp, "%.14e\n", data[i]);
255 #endif
256 }
257 }
258
259 fclose(fp);
260
261 return ierr;
262 }
263
264 /*--------------------------------------------------------------------------
265 * hypre_SeqVectorSetConstantValues
266 *--------------------------------------------------------------------------*/
267
268 HYPRE_Int
hypre_SeqVectorSetConstantValues(hypre_Vector * v,HYPRE_Complex value)269 hypre_SeqVectorSetConstantValues( hypre_Vector *v,
270 HYPRE_Complex value )
271 {
272 #ifdef HYPRE_PROFILE
273 hypre_profile_times[HYPRE_TIMER_ID_BLAS1] -= hypre_MPI_Wtime();
274 #endif
275
276 HYPRE_Complex *vector_data = hypre_VectorData(v);
277 HYPRE_Int size = hypre_VectorSize(v);
278 HYPRE_Int ierr = 0;
279
280 size *= hypre_VectorNumVectors(v);
281
282 //hypre_SeqVectorPrefetch(v, HYPRE_MEMORY_DEVICE);
283
284 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
285 if (size > 0)
286 {
287 HYPRE_THRUST_CALL( fill_n, vector_data, size, value );
288 }
289 #else
290 HYPRE_Int i;
291 #if defined(HYPRE_USING_DEVICE_OPENMP)
292 #pragma omp target teams distribute parallel for private(i) is_device_ptr(vector_data)
293 #elif defined(HYPRE_USING_OPENMP)
294 #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE
295 #endif
296 for (i = 0; i < size; i++)
297 {
298 vector_data[i] = value;
299 }
300 #endif /* defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) */
301
302 #if defined(HYPRE_USING_GPU)
303 hypre_SyncCudaComputeStream(hypre_handle());
304 #endif
305
306 #ifdef HYPRE_PROFILE
307 hypre_profile_times[HYPRE_TIMER_ID_BLAS1] += hypre_MPI_Wtime();
308 #endif
309
310 return ierr;
311 }
312
313 /*--------------------------------------------------------------------------
314 * hypre_SeqVectorSetRandomValues
315 *
316 * returns vector of values randomly distributed between -1.0 and +1.0
317 *--------------------------------------------------------------------------*/
318
319 HYPRE_Int
hypre_SeqVectorSetRandomValues(hypre_Vector * v,HYPRE_Int seed)320 hypre_SeqVectorSetRandomValues( hypre_Vector *v,
321 HYPRE_Int seed )
322 {
323 HYPRE_Complex *vector_data = hypre_VectorData(v);
324 HYPRE_Int size = hypre_VectorSize(v);
325 HYPRE_Int i;
326 HYPRE_Int ierr = 0;
327 hypre_SeedRand(seed);
328
329 size *= hypre_VectorNumVectors(v);
330
331 if (hypre_GetActualMemLocation(hypre_VectorMemoryLocation(v)) == hypre_MEMORY_HOST)
332 {
333 /* RDF: threading this loop may cause problems because of hypre_Rand() */
334 for (i = 0; i < size; i++)
335 {
336 vector_data[i] = 2.0 * hypre_Rand() - 1.0;
337 }
338 }
339 else
340 {
341 HYPRE_Complex *h_data = hypre_TAlloc(HYPRE_Complex, size, HYPRE_MEMORY_HOST);
342 for (i = 0; i < size; i++)
343 {
344 h_data[i] = 2.0 * hypre_Rand() - 1.0;
345 }
346 hypre_TMemcpy(vector_data, h_data, HYPRE_Complex, size, hypre_VectorMemoryLocation(v), HYPRE_MEMORY_HOST);
347 hypre_TFree(h_data, HYPRE_MEMORY_HOST);
348 }
349
350 return ierr;
351 }
352
353 /*--------------------------------------------------------------------------
354 * hypre_SeqVectorCopy
355 * copies data from x to y
356 * if size of x is larger than y only the first size_y elements of x are
357 * copied to y
358 *--------------------------------------------------------------------------*/
359 HYPRE_Int
hypre_SeqVectorCopy(hypre_Vector * x,hypre_Vector * y)360 hypre_SeqVectorCopy( hypre_Vector *x,
361 hypre_Vector *y )
362 {
363 #ifdef HYPRE_PROFILE
364 hypre_profile_times[HYPRE_TIMER_ID_BLAS1] -= hypre_MPI_Wtime();
365 #endif
366
367 HYPRE_Int ierr = 0;
368
369 size_t size = hypre_min( hypre_VectorSize(x), hypre_VectorSize(y) ) * hypre_VectorNumVectors(x);
370
371 hypre_TMemcpy( hypre_VectorData(y),
372 hypre_VectorData(x),
373 HYPRE_Complex,
374 size,
375 hypre_VectorMemoryLocation(y),
376 hypre_VectorMemoryLocation(x) );
377
378 #ifdef HYPRE_PROFILE
379 hypre_profile_times[HYPRE_TIMER_ID_BLAS1] += hypre_MPI_Wtime();
380 #endif
381
382 return ierr;
383 }
384
385 /*--------------------------------------------------------------------------
386 * hypre_SeqVectorCloneDeep
387 * Returns a complete copy of x - a deep copy, with its own copy of the data.
388 *--------------------------------------------------------------------------*/
389
390 hypre_Vector*
hypre_SeqVectorCloneDeep_v2(hypre_Vector * x,HYPRE_MemoryLocation memory_location)391 hypre_SeqVectorCloneDeep_v2( hypre_Vector *x, HYPRE_MemoryLocation memory_location )
392 {
393 HYPRE_Int size = hypre_VectorSize(x);
394 HYPRE_Int num_vectors = hypre_VectorNumVectors(x);
395
396 hypre_Vector *y = hypre_SeqMultiVectorCreate( size, num_vectors );
397
398 hypre_VectorMultiVecStorageMethod(y) = hypre_VectorMultiVecStorageMethod(x);
399 hypre_VectorVectorStride(y) = hypre_VectorVectorStride(x);
400 hypre_VectorIndexStride(y) = hypre_VectorIndexStride(x);
401
402 hypre_SeqVectorInitialize_v2(y, memory_location);
403 hypre_SeqVectorCopy( x, y );
404
405 return y;
406 }
407
408 hypre_Vector*
hypre_SeqVectorCloneDeep(hypre_Vector * x)409 hypre_SeqVectorCloneDeep( hypre_Vector *x )
410 {
411 return hypre_SeqVectorCloneDeep_v2(x, hypre_VectorMemoryLocation(x));
412 }
413
414 /*--------------------------------------------------------------------------
415 * hypre_SeqVectorCloneShallow
416 * Returns a complete copy of x - a shallow copy, pointing the data of x
417 *--------------------------------------------------------------------------*/
418
419 hypre_Vector *
hypre_SeqVectorCloneShallow(hypre_Vector * x)420 hypre_SeqVectorCloneShallow( hypre_Vector *x )
421 {
422 HYPRE_Int size = hypre_VectorSize(x);
423 HYPRE_Int num_vectors = hypre_VectorNumVectors(x);
424 hypre_Vector * y = hypre_SeqMultiVectorCreate( size, num_vectors );
425
426 hypre_VectorMultiVecStorageMethod(y) = hypre_VectorMultiVecStorageMethod(x);
427 hypre_VectorVectorStride(y) = hypre_VectorVectorStride(x);
428 hypre_VectorIndexStride(y) = hypre_VectorIndexStride(x);
429
430 hypre_VectorMemoryLocation(y) = hypre_VectorMemoryLocation(x);
431
432 hypre_VectorData(y) = hypre_VectorData(x);
433 hypre_SeqVectorSetDataOwner( y, 0 );
434 hypre_SeqVectorInitialize(y);
435
436 return y;
437 }
438
439 /*--------------------------------------------------------------------------
440 * hypre_SeqVectorScale
441 *--------------------------------------------------------------------------*/
442 HYPRE_Int
hypre_SeqVectorScale(HYPRE_Complex alpha,hypre_Vector * y)443 hypre_SeqVectorScale( HYPRE_Complex alpha,
444 hypre_Vector *y )
445 {
446 /* special cases */
447 if (alpha == 1.0)
448 {
449 return 0;
450 }
451
452 if (alpha == 0.0)
453 {
454 return hypre_SeqVectorSetConstantValues(y, 0.0);
455 }
456
457 #ifdef HYPRE_PROFILE
458 hypre_profile_times[HYPRE_TIMER_ID_BLAS1] -= hypre_MPI_Wtime();
459 #endif
460
461 HYPRE_Complex *y_data = hypre_VectorData(y);
462 HYPRE_Int size = hypre_VectorSize(y);
463 HYPRE_Int ierr = 0;
464
465 size *= hypre_VectorNumVectors(y);
466
467 //hypre_SeqVectorPrefetch(y, HYPRE_MEMORY_DEVICE);
468
469 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
470 #if defined(HYPRE_USING_CUBLAS)
471 HYPRE_CUBLAS_CALL( cublasDscal(hypre_HandleCublasHandle(hypre_handle()), size, &alpha, y_data, 1) );
472 #else
473 HYPRE_THRUST_CALL( transform, y_data, y_data + size, y_data, alpha * _1 );
474 #endif
475 #else
476 HYPRE_Int i;
477 #if defined(HYPRE_USING_DEVICE_OPENMP)
478 #pragma omp target teams distribute parallel for private(i) is_device_ptr(y_data)
479 #elif defined(HYPRE_USING_OPENMP)
480 #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE
481 #endif
482 for (i = 0; i < size; i++)
483 {
484 y_data[i] *= alpha;
485 }
486
487 #endif /* defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) */
488
489 #if defined(HYPRE_USING_GPU)
490 hypre_SyncCudaComputeStream(hypre_handle());
491 #endif
492
493 #ifdef HYPRE_PROFILE
494 hypre_profile_times[HYPRE_TIMER_ID_BLAS1] += hypre_MPI_Wtime();
495 #endif
496
497 return ierr;
498 }
499
500 /*--------------------------------------------------------------------------
501 * hypre_SeqVectorAxpy
502 *--------------------------------------------------------------------------*/
503 HYPRE_Int
hypre_SeqVectorAxpy(HYPRE_Complex alpha,hypre_Vector * x,hypre_Vector * y)504 hypre_SeqVectorAxpy( HYPRE_Complex alpha,
505 hypre_Vector *x,
506 hypre_Vector *y )
507 {
508 #ifdef HYPRE_PROFILE
509 hypre_profile_times[HYPRE_TIMER_ID_BLAS1] -= hypre_MPI_Wtime();
510 #endif
511
512 HYPRE_Complex *x_data = hypre_VectorData(x);
513 HYPRE_Complex *y_data = hypre_VectorData(y);
514 HYPRE_Int size = hypre_VectorSize(x);
515 HYPRE_Int ierr = 0;
516
517 size *= hypre_VectorNumVectors(x);
518
519 //hypre_SeqVectorPrefetch(x, HYPRE_MEMORY_DEVICE);
520 //hypre_SeqVectorPrefetch(y, HYPRE_MEMORY_DEVICE);
521
522 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
523 #if defined(HYPRE_USING_CUBLAS)
524 HYPRE_CUBLAS_CALL( cublasDaxpy(hypre_HandleCublasHandle(hypre_handle()), size, &alpha, x_data, 1, y_data, 1) );
525 #else
526 HYPRE_THRUST_CALL( transform, x_data, x_data + size, y_data, y_data, alpha * _1 + _2 );
527 #endif
528 #else
529 HYPRE_Int i;
530 #if defined(HYPRE_USING_DEVICE_OPENMP)
531 #pragma omp target teams distribute parallel for private(i) is_device_ptr(y_data, x_data)
532 #elif defined(HYPRE_USING_OPENMP)
533 #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE
534 #endif
535 for (i = 0; i < size; i++)
536 {
537 y_data[i] += alpha * x_data[i];
538 }
539
540 #endif /* defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) */
541
542 #if defined(HYPRE_USING_GPU)
543 hypre_SyncCudaComputeStream(hypre_handle());
544 #endif
545
546 #ifdef HYPRE_PROFILE
547 hypre_profile_times[HYPRE_TIMER_ID_BLAS1] += hypre_MPI_Wtime();
548 #endif
549
550 return ierr;
551 }
552
553 /* y = y + x ./ b */
554 HYPRE_Int
hypre_SeqVectorElmdivpy(hypre_Vector * x,hypre_Vector * b,hypre_Vector * y)555 hypre_SeqVectorElmdivpy( hypre_Vector *x,
556 hypre_Vector *b,
557 hypre_Vector *y )
558 {
559 #ifdef HYPRE_PROFILE
560 hypre_profile_times[HYPRE_TIMER_ID_BLAS1] -= hypre_MPI_Wtime();
561 #endif
562
563 HYPRE_Complex *x_data = hypre_VectorData(x);
564 HYPRE_Complex *b_data = hypre_VectorData(b);
565 HYPRE_Complex *y_data = hypre_VectorData(y);
566 HYPRE_Int size = hypre_VectorSize(b);
567
568 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
569 //HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy2( hypre_VectorMemoryLocation(x), hypre_VectorMemoryLocation(b) );
570 //RL: TODO back to hypre_GetExecPolicy2 later
571 HYPRE_ExecutionPolicy exec = HYPRE_EXEC_DEVICE;
572 if (exec == HYPRE_EXEC_DEVICE)
573 {
574 //TODO
575 //hypre_SeqVectorElmdivpyDevice(x, b, y);
576 /*
577 #if defined(HYPRE_USING_DEVICE_OPENMP)
578 #pragma omp target teams distribute parallel for private(i) is_device_ptr(u_data,v_data,l1_norms)
579 #endif
580 */
581 hypreDevice_IVAXPY(size, b_data, x_data, y_data);
582 }
583 else
584 #endif
585 {
586 HYPRE_Int i;
587 #ifdef HYPRE_USING_OPENMP
588 #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE
589 #endif
590 for (i = 0; i < size; i++)
591 {
592 y_data[i] += x_data[i] / b_data[i];
593 }
594 }
595
596 #if defined(HYPRE_USING_GPU)
597 hypre_SyncCudaComputeStream(hypre_handle());
598 #endif
599
600 #ifdef HYPRE_PROFILE
601 hypre_profile_times[HYPRE_TIMER_ID_BLAS1] += hypre_MPI_Wtime();
602 #endif
603
604 return hypre_error_flag;
605 }
606
607 /* y[i] += x[i] / b[i] where marker[i] == marker_val */
608 HYPRE_Int
hypre_SeqVectorElmdivpyMarked(hypre_Vector * x,hypre_Vector * b,hypre_Vector * y,HYPRE_Int * marker,HYPRE_Int marker_val)609 hypre_SeqVectorElmdivpyMarked( hypre_Vector *x,
610 hypre_Vector *b,
611 hypre_Vector *y,
612 HYPRE_Int *marker,
613 HYPRE_Int marker_val)
614 {
615 #ifdef HYPRE_PROFILE
616 hypre_profile_times[HYPRE_TIMER_ID_BLAS1] -= hypre_MPI_Wtime();
617 #endif
618
619 HYPRE_Complex *x_data = hypre_VectorData(x);
620 HYPRE_Complex *b_data = hypre_VectorData(b);
621 HYPRE_Complex *y_data = hypre_VectorData(y);
622 HYPRE_Int size = hypre_VectorSize(b);
623
624 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
625 HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy2( hypre_VectorMemoryLocation(x), hypre_VectorMemoryLocation(b) );
626 if (exec == HYPRE_EXEC_DEVICE)
627 {
628 hypreDevice_IVAXPYMarked(size, b_data, x_data, y_data, marker, marker_val);
629 }
630 else
631 #endif
632 {
633 HYPRE_Int i;
634 #ifdef HYPRE_USING_OPENMP
635 #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE
636 #endif
637 for (i = 0; i < size; i++)
638 {
639 if (marker[i] == marker_val)
640 {
641 y_data[i] += x_data[i] / b_data[i];
642 }
643 }
644 }
645
646 #if defined(HYPRE_USING_GPU)
647 hypre_SyncCudaComputeStream(hypre_handle());
648 #endif
649
650 #ifdef HYPRE_PROFILE
651 hypre_profile_times[HYPRE_TIMER_ID_BLAS1] += hypre_MPI_Wtime();
652 #endif
653
654 return hypre_error_flag;
655 }
656
657 /*--------------------------------------------------------------------------
658 * hypre_SeqVectorInnerProd
659 *--------------------------------------------------------------------------*/
660 HYPRE_Real
hypre_SeqVectorInnerProd(hypre_Vector * x,hypre_Vector * y)661 hypre_SeqVectorInnerProd( hypre_Vector *x,
662 hypre_Vector *y )
663 {
664 #ifdef HYPRE_PROFILE
665 hypre_profile_times[HYPRE_TIMER_ID_BLAS1] -= hypre_MPI_Wtime();
666 #endif
667
668 HYPRE_Complex *x_data = hypre_VectorData(x);
669 HYPRE_Complex *y_data = hypre_VectorData(y);
670 HYPRE_Int size = hypre_VectorSize(x);
671 HYPRE_Real result = 0.0;
672
673 size *= hypre_VectorNumVectors(x);
674
675 //hypre_SeqVectorPrefetch(x, HYPRE_MEMORY_DEVICE);
676 //hypre_SeqVectorPrefetch(y, HYPRE_MEMORY_DEVICE);
677
678 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
679 #ifndef HYPRE_COMPLEX
680 #if defined(HYPRE_USING_CUBLAS)
681 HYPRE_CUBLAS_CALL( cublasDdot(hypre_HandleCublasHandle(hypre_handle()), size, x_data, 1, y_data, 1, &result) );
682 #else
683 result = HYPRE_THRUST_CALL( inner_product, x_data, x_data + size, y_data, 0.0 );
684 #endif
685 #else
686 /* TODO */
687 #error "Complex inner product"
688 #endif
689 #else /* #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) */
690 HYPRE_Int i;
691 #if defined(HYPRE_USING_DEVICE_OPENMP)
692 #pragma omp target teams distribute parallel for private(i) reduction(+:result) is_device_ptr(y_data,x_data) map(result)
693 #elif defined(HYPRE_USING_OPENMP)
694 #pragma omp parallel for private(i) reduction(+:result) HYPRE_SMP_SCHEDULE
695 #endif
696 for (i = 0; i < size; i++)
697 {
698 result += hypre_conj(y_data[i]) * x_data[i];
699 }
700 #endif /* defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) */
701
702 #if defined(HYPRE_USING_GPU)
703 hypre_SyncCudaComputeStream(hypre_handle());
704 #endif
705
706 #ifdef HYPRE_PROFILE
707 hypre_profile_times[HYPRE_TIMER_ID_BLAS1] += hypre_MPI_Wtime();
708 #endif
709
710 return result;
711 }
712
713 //TODO
714
715 /*--------------------------------------------------------------------------
716 * hypre_VectorSumElts:
717 * Returns the sum of all vector elements.
718 *--------------------------------------------------------------------------*/
719
hypre_SeqVectorSumElts(hypre_Vector * vector)720 HYPRE_Complex hypre_SeqVectorSumElts( hypre_Vector *vector )
721 {
722 HYPRE_Complex sum = 0;
723 HYPRE_Complex *data = hypre_VectorData( vector );
724 HYPRE_Int size = hypre_VectorSize( vector );
725 HYPRE_Int i;
726
727 #ifdef HYPRE_USING_OPENMP
728 #pragma omp parallel for private(i) reduction(+:sum) HYPRE_SMP_SCHEDULE
729 #endif
730 for ( i=0; i<size; ++i ) sum += data[i];
731
732 return sum;
733 }
734
735 HYPRE_Int
hypre_SeqVectorPrefetch(hypre_Vector * x,HYPRE_MemoryLocation memory_location)736 hypre_SeqVectorPrefetch( hypre_Vector *x, HYPRE_MemoryLocation memory_location)
737 {
738 HYPRE_Int ierr = 0;
739 #ifdef HYPRE_USING_UNIFIED_MEMORY
740 if (hypre_VectorMemoryLocation(x) != HYPRE_MEMORY_DEVICE)
741 {
742 /* hypre_error_w_msg(HYPRE_ERROR_GENERIC," Error! CUDA Prefetch with non-unified momory\n");*/
743 return 1;
744 }
745
746 HYPRE_Complex *x_data = hypre_VectorData(x);
747 HYPRE_Int size = hypre_VectorSize(x) * hypre_VectorNumVectors(x);
748
749 if (size == 0)
750 {
751 return ierr;
752 }
753
754 hypre_MemPrefetch(x_data, sizeof(HYPRE_Complex)*size, memory_location);
755 #endif
756
757 return ierr;
758 }
759
760 #if 0
761 /* y[i] = max(alpha*x[i], beta*y[i]) */
762 HYPRE_Int
763 hypre_SeqVectorMax( HYPRE_Complex alpha,
764 hypre_Vector *x,
765 HYPRE_Complex beta,
766 hypre_Vector *y )
767 {
768 #ifdef HYPRE_PROFILE
769 hypre_profile_times[HYPRE_TIMER_ID_BLAS1] -= hypre_MPI_Wtime();
770 #endif
771
772 HYPRE_Complex *x_data = hypre_VectorData(x);
773 HYPRE_Complex *y_data = hypre_VectorData(y);
774 HYPRE_Int size = hypre_VectorSize(x);
775 HYPRE_Int ierr = 0;
776
777 size *= hypre_VectorNumVectors(x);
778
779 //hypre_SeqVectorPrefetch(x, HYPRE_MEMORY_DEVICE);
780 //hypre_SeqVectorPrefetch(y, HYPRE_MEMORY_DEVICE);
781
782 thrust::maximum<HYPRE_Complex> mx;
783
784 #if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
785 HYPRE_THRUST_CALL( transform,
786 thrust::make_transform_iterator(x_data, alpha * _1),
787 thrust::make_transform_iterator(x_data + size, alpha * _1),
788 thrust::make_transform_iterator(y_data, beta * _1),
789 y_data,
790 mx );
791 #else
792 HYPRE_Int i;
793 #if defined(HYPRE_USING_DEVICE_OPENMP)
794 #pragma omp target teams distribute parallel for private(i) is_device_ptr(y_data, x_data)
795 #elif defined(HYPRE_USING_OPENMP)
796 #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE
797 #endif
798 for (i = 0; i < size; i++)
799 {
800 y_data[i] += hypre_max(alpha * x_data[i], beta * y_data[i]);
801 }
802
803 #endif /* defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) */
804
805 hypre_SyncCudaComputeStream(hypre_handle());
806
807 #ifdef HYPRE_PROFILE
808 hypre_profile_times[HYPRE_TIMER_ID_BLAS1] += hypre_MPI_Wtime();
809 #endif
810
811 return ierr;
812 }
813 #endif
814