1 /*
2  * Copyright (C) by Argonne National Laboratory
3  *     See COPYRIGHT in top-level directory
4  *
5  * DO NOT EDIT: AUTOMATICALLY GENERATED FILE !!
6  */
7 
8 #include <string.h>
9 #include <stdint.h>
10 #include <wchar.h>
11 #include <assert.h>
12 #include <cuda.h>
13 #include <cuda_runtime.h>
14 #include "yaksuri_cudai_base.h"
15 #include "yaksuri_cudai_pup.h"
16 
yaksuri_cudai_kernel_pack_resized_int8_t(const void * inbuf,void * outbuf,uintptr_t count,const yaksuri_cudai_md_s * __restrict__ md)17 __global__ void yaksuri_cudai_kernel_pack_resized_int8_t(const void *inbuf, void *outbuf, uintptr_t count, const yaksuri_cudai_md_s *__restrict__ md)
18 {
19     const char *__restrict__ sbuf = (const char *) inbuf;
20     char *__restrict__ dbuf = (char *) outbuf;
21     uintptr_t extent = md->extent;
22     uintptr_t idx = blockIdx.x * blockDim.x + threadIdx.x;
23     uintptr_t res = idx;
24     uintptr_t inner_elements = md->num_elements;
25 
26     if (idx >= (count * inner_elements))
27         return;
28 
29     uintptr_t x0 = res;
30 
31     *((int8_t *) (void *) (dbuf + idx * sizeof(int8_t))) = *((const int8_t *) (const void *) (sbuf + x0 * extent));
32 }
33 
yaksuri_cudai_pack_resized_int8_t(const void * inbuf,void * outbuf,uintptr_t count,yaksuri_cudai_md_s * md,int n_threads,int n_blocks_x,int n_blocks_y,int n_blocks_z,int device)34 void yaksuri_cudai_pack_resized_int8_t(const void *inbuf, void *outbuf, uintptr_t count, yaksuri_cudai_md_s *md, int n_threads, int n_blocks_x, int n_blocks_y, int n_blocks_z, int device)
35 {
36 void *args[] = { &inbuf, &outbuf, &count, &md };
37     cudaError_t cerr = cudaLaunchKernel((const void *) yaksuri_cudai_kernel_pack_resized_int8_t,
38         dim3(n_blocks_x, n_blocks_y, n_blocks_z), dim3(n_threads), args, 0, yaksuri_cudai_global.stream[device]);
39     YAKSURI_CUDAI_CUDA_ERR_CHECK(cerr);
40 }
41 
yaksuri_cudai_kernel_unpack_resized_int8_t(const void * inbuf,void * outbuf,uintptr_t count,const yaksuri_cudai_md_s * __restrict__ md)42 __global__ void yaksuri_cudai_kernel_unpack_resized_int8_t(const void *inbuf, void *outbuf, uintptr_t count, const yaksuri_cudai_md_s *__restrict__ md)
43 {
44     const char *__restrict__ sbuf = (const char *) inbuf;
45     char *__restrict__ dbuf = (char *) outbuf;
46     uintptr_t extent = md->extent;
47     uintptr_t idx = blockIdx.x * blockDim.x + threadIdx.x;
48     uintptr_t res = idx;
49     uintptr_t inner_elements = md->num_elements;
50 
51     if (idx >= (count * inner_elements))
52         return;
53 
54     uintptr_t x0 = res;
55 
56     *((int8_t *) (void *) (dbuf + x0 * extent)) = *((const int8_t *) (const void *) (sbuf + idx * sizeof(int8_t)));
57 }
58 
yaksuri_cudai_unpack_resized_int8_t(const void * inbuf,void * outbuf,uintptr_t count,yaksuri_cudai_md_s * md,int n_threads,int n_blocks_x,int n_blocks_y,int n_blocks_z,int device)59 void yaksuri_cudai_unpack_resized_int8_t(const void *inbuf, void *outbuf, uintptr_t count, yaksuri_cudai_md_s *md, int n_threads, int n_blocks_x, int n_blocks_y, int n_blocks_z, int device)
60 {
61 void *args[] = { &inbuf, &outbuf, &count, &md };
62     cudaError_t cerr = cudaLaunchKernel((const void *) yaksuri_cudai_kernel_unpack_resized_int8_t,
63         dim3(n_blocks_x, n_blocks_y, n_blocks_z), dim3(n_threads), args, 0, yaksuri_cudai_global.stream[device]);
64     YAKSURI_CUDAI_CUDA_ERR_CHECK(cerr);
65 }
66 
67