1/* 2 * Copyright (C) 2019-2021 Intel Corporation 3 * 4 * SPDX-License-Identifier: MIT 5 * 6 */ 7 8R"===( 9__kernel void CopyBufferToBufferBytes( 10 const __global uchar* pSrc, 11 __global uchar* pDst, 12 ulong srcOffsetInBytes, 13 ulong dstOffsetInBytes, 14 ulong bytesToRead ) 15{ 16 pSrc += ( srcOffsetInBytes + get_global_id(0) ); 17 pDst += ( dstOffsetInBytes + get_global_id(0) ); 18 pDst[ 0 ] = pSrc[ 0 ]; 19} 20 21__kernel void CopyBufferToBufferLeftLeftover( 22 const __global uchar* pSrc, 23 __global uchar* pDst, 24 ulong srcOffsetInBytes, 25 ulong dstOffsetInBytes) 26{ 27 size_t gid = get_global_id(0); 28 pDst[ gid + dstOffsetInBytes ] = pSrc[ gid + srcOffsetInBytes ]; 29} 30 31__kernel void CopyBufferToBufferMiddle( 32 const __global uint* pSrc, 33 __global uint* pDst, 34 ulong srcOffsetInBytes, 35 ulong dstOffsetInBytes) 36{ 37 size_t gid = get_global_id(0); 38 pDst += dstOffsetInBytes >> 2; 39 pSrc += srcOffsetInBytes >> 2; 40 uint4 loaded = vload4(gid, pSrc); 41 vstore4(loaded, gid, pDst); 42} 43 44__kernel void CopyBufferToBufferMiddleMisaligned( 45 __global const uint* pSrc, 46 __global uint* pDst, 47 ulong srcOffsetInBytes, 48 ulong dstOffsetInBytes, 49 uint misalignmentInBits) 50{ 51 const size_t gid = get_global_id(0); 52 pDst += dstOffsetInBytes >> 2; 53 pSrc += srcOffsetInBytes >> 2; 54 const uint4 src0 = vload4(gid, pSrc); 55 const uint4 src1 = vload4(gid + 1, pSrc); 56 57 uint4 result; 58 result.x = (src0.x >> misalignmentInBits) | (src0.y << (32 - misalignmentInBits)); 59 result.y = (src0.y >> misalignmentInBits) | (src0.z << (32 - misalignmentInBits)); 60 result.z = (src0.z >> misalignmentInBits) | (src0.w << (32 - misalignmentInBits)); 61 result.w = (src0.w >> misalignmentInBits) | (src1.x << (32 - misalignmentInBits)); 62 vstore4(result, gid, pDst); 63} 64 65__kernel void CopyBufferToBufferRightLeftover( 66 const __global uchar* pSrc, 67 __global uchar* pDst, 68 ulong srcOffsetInBytes, 69 ulong dstOffsetInBytes) 70{ 71 size_t gid = get_global_id(0); 72 pDst[ gid + dstOffsetInBytes ] = pSrc[ gid + srcOffsetInBytes ]; 73} 74 75__kernel void copyBufferToBufferBytesSingle(__global uchar *dst, const __global uchar *src) { 76 size_t gid = get_global_id(0); 77 dst[gid] = (uchar)(src[gid]); 78} 79 80__kernel void CopyBufferToBufferSideRegion( 81 __global uchar* pDst, 82 const __global uchar* pSrc, 83 ulong len, 84 ulong dstSshOffset, 85 ulong srcSshOffset 86 ) 87{ 88 size_t gid = get_global_id(0); 89 __global uchar* pDstWithOffset = (__global uchar*)((__global uchar*)pDst + dstSshOffset); 90 __global uchar* pSrcWithOffset = (__global uchar*)((__global uchar*)pSrc + srcSshOffset); 91 if (gid < len) { 92 pDstWithOffset[ gid ] = pSrcWithOffset[ gid ]; 93 } 94} 95 96__kernel void CopyBufferToBufferMiddleRegion( 97 __global uint* pDst, 98 const __global uint* pSrc, 99 ulong elems, 100 ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment 101 ulong srcSshOffset // Offset needed in case ptr has been adjusted for SSH alignment 102 ) 103{ 104 size_t gid = get_global_id(0); 105 __global uint* pDstWithOffset = (__global uint*)((__global uchar*)pDst + dstSshOffset); 106 __global uint* pSrcWithOffset = (__global uint*)((__global uchar*)pSrc + srcSshOffset); 107 if (gid < elems) { 108 uint4 loaded = vload4(gid, pSrcWithOffset); 109 vstore4(loaded, gid, pDstWithOffset); 110 } 111} 112 113)==="