1/*
2 * Copyright (C) 2019-2021 Intel Corporation
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 */
7
8R"===(
9__kernel void CopyBufferToBufferBytes(
10    const __global uchar* pSrc,
11    __global uchar* pDst,
12    ulong srcOffsetInBytes,
13    ulong dstOffsetInBytes,
14    ulong bytesToRead )
15{
16    pSrc += ( srcOffsetInBytes + get_global_id(0) );
17    pDst += ( dstOffsetInBytes + get_global_id(0) );
18    pDst[ 0 ] = pSrc[ 0 ];
19}
20
21__kernel void CopyBufferToBufferLeftLeftover(
22    const __global uchar* pSrc,
23    __global uchar* pDst,
24    ulong srcOffsetInBytes,
25    ulong dstOffsetInBytes)
26{
27    size_t gid = get_global_id(0);
28    pDst[ gid + dstOffsetInBytes ] = pSrc[ gid + srcOffsetInBytes ];
29}
30
31__kernel void CopyBufferToBufferMiddle(
32    const __global uint* pSrc,
33    __global uint* pDst,
34    ulong srcOffsetInBytes,
35    ulong dstOffsetInBytes)
36{
37    size_t gid = get_global_id(0);
38    pDst += dstOffsetInBytes >> 2;
39    pSrc += srcOffsetInBytes >> 2;
40    uint4 loaded = vload4(gid, pSrc);
41    vstore4(loaded, gid, pDst);
42}
43
44__kernel void CopyBufferToBufferMiddleMisaligned(
45    __global const uint* pSrc,
46     __global uint* pDst,
47     ulong srcOffsetInBytes,
48     ulong dstOffsetInBytes,
49     uint misalignmentInBits)
50{
51    const size_t gid = get_global_id(0);
52    pDst += dstOffsetInBytes >> 2;
53    pSrc += srcOffsetInBytes >> 2;
54    const uint4 src0 = vload4(gid, pSrc);
55    const uint4 src1 = vload4(gid + 1, pSrc);
56
57    uint4 result;
58    result.x = (src0.x >> misalignmentInBits) | (src0.y << (32 - misalignmentInBits));
59    result.y = (src0.y >> misalignmentInBits) | (src0.z << (32 - misalignmentInBits));
60    result.z = (src0.z >> misalignmentInBits) | (src0.w << (32 - misalignmentInBits));
61    result.w = (src0.w >> misalignmentInBits) | (src1.x << (32 - misalignmentInBits));
62    vstore4(result, gid, pDst);
63}
64
65__kernel void CopyBufferToBufferRightLeftover(
66    const __global uchar* pSrc,
67    __global uchar* pDst,
68    ulong srcOffsetInBytes,
69    ulong dstOffsetInBytes)
70{
71    size_t gid = get_global_id(0);
72    pDst[ gid + dstOffsetInBytes ] = pSrc[ gid + srcOffsetInBytes ];
73}
74
75__kernel void copyBufferToBufferBytesSingle(__global uchar *dst, const __global uchar *src) {
76    size_t gid = get_global_id(0);
77    dst[gid] = (uchar)(src[gid]);
78}
79
80__kernel void CopyBufferToBufferSideRegion(
81    __global uchar* pDst,
82    const __global uchar* pSrc,
83    ulong len,
84    ulong dstSshOffset,
85    ulong srcSshOffset
86    )
87{
88    size_t gid = get_global_id(0);
89    __global uchar* pDstWithOffset = (__global uchar*)((__global uchar*)pDst + dstSshOffset);
90    __global uchar* pSrcWithOffset = (__global uchar*)((__global uchar*)pSrc + srcSshOffset);
91    if (gid < len) {
92        pDstWithOffset[ gid ] = pSrcWithOffset[ gid ];
93    }
94}
95
96__kernel void CopyBufferToBufferMiddleRegion(
97    __global uint* pDst,
98    const __global uint* pSrc,
99    ulong elems,
100    ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
101    ulong srcSshOffset // Offset needed in case ptr has been adjusted for SSH alignment
102    )
103{
104    size_t gid = get_global_id(0);
105    __global uint* pDstWithOffset = (__global uint*)((__global uchar*)pDst + dstSshOffset);
106    __global uint* pSrcWithOffset = (__global uint*)((__global uchar*)pSrc + srcSshOffset);
107    if (gid < elems) {
108        uint4 loaded = vload4(gid, pSrcWithOffset);
109        vstore4(loaded, gid, pDstWithOffset);
110    }
111}
112
113)==="