1 /*
2  * Copyright (C) 2018-2021 Intel Corporation
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  */
7 
8 #pragma once
9 #include "shared/source/built_ins/built_ins.h"
10 #include "shared/source/command_stream/command_stream_receiver.h"
11 #include "shared/source/helpers/basic_math.h"
12 #include "shared/source/memory_manager/graphics_allocation.h"
13 
14 #include "opencl/source/command_queue/command_queue_hw.h"
15 #include "opencl/source/helpers/hardware_commands_helper.h"
16 #include "opencl/source/helpers/mipmap.h"
17 #include "opencl/source/mem_obj/image.h"
18 
19 #include <algorithm>
20 #include <new>
21 
22 namespace NEO {
23 
24 template <typename GfxFamily>
enqueueWriteImage(Image * dstImage,cl_bool blockingWrite,const size_t * origin,const size_t * region,size_t inputRowPitch,size_t inputSlicePitch,const void * ptr,GraphicsAllocation * mapAllocation,cl_uint numEventsInWaitList,const cl_event * eventWaitList,cl_event * event)25 cl_int CommandQueueHw<GfxFamily>::enqueueWriteImage(
26     Image *dstImage,
27     cl_bool blockingWrite,
28     const size_t *origin,
29     const size_t *region,
30     size_t inputRowPitch,
31     size_t inputSlicePitch,
32     const void *ptr,
33     GraphicsAllocation *mapAllocation,
34     cl_uint numEventsInWaitList,
35     const cl_event *eventWaitList,
36     cl_event *event) {
37     constexpr cl_command_type cmdType = CL_COMMAND_WRITE_IMAGE;
38 
39     CsrSelectionArgs csrSelectionArgs{cmdType, nullptr, dstImage, device->getRootDeviceIndex(), region, nullptr, origin};
40     CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs);
41 
42     auto isMemTransferNeeded = true;
43 
44     if (dstImage->isMemObjZeroCopy()) {
45         size_t hostOffset;
46         Image::calculateHostPtrOffset(&hostOffset, origin, region, inputRowPitch, inputSlicePitch, dstImage->getImageDesc().image_type, dstImage->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes);
47         isMemTransferNeeded = dstImage->checkIfMemoryTransferIsRequired(hostOffset, 0, ptr, cmdType);
48     }
49     if (!isMemTransferNeeded) {
50         return enqueueMarkerForReadWriteOperation(dstImage, const_cast<void *>(ptr), cmdType, blockingWrite,
51                                                   numEventsInWaitList, eventWaitList, event);
52     }
53 
54     size_t hostPtrSize = calculateHostPtrSizeForImage(region, inputRowPitch, inputSlicePitch, dstImage);
55     void *srcPtr = const_cast<void *>(ptr);
56 
57     MemObjSurface dstImgSurf(dstImage);
58     HostPtrSurface hostPtrSurf(srcPtr, hostPtrSize, true);
59     GeneralSurface mapSurface;
60     Surface *surfaces[] = {&dstImgSurf, nullptr};
61     if (mapAllocation) {
62         surfaces[1] = &mapSurface;
63         mapSurface.setGraphicsAllocation(mapAllocation);
64         //get offset between base cpu ptr of map allocation and dst ptr
65         size_t srcOffset = ptrDiff(srcPtr, mapAllocation->getUnderlyingBuffer());
66         srcPtr = reinterpret_cast<void *>(mapAllocation->getGpuAddress() + srcOffset);
67     } else {
68         surfaces[1] = &hostPtrSurf;
69         if (region[0] != 0 &&
70             region[1] != 0 &&
71             region[2] != 0) {
72             bool status = csr.createAllocationForHostSurface(hostPtrSurf, false);
73             if (!status) {
74                 return CL_OUT_OF_RESOURCES;
75             }
76             srcPtr = reinterpret_cast<void *>(hostPtrSurf.getAllocation()->getGpuAddress());
77         }
78     }
79 
80     void *alignedSrcPtr = alignDown(srcPtr, 4);
81     size_t srcPtrOffset = ptrDiff(srcPtr, alignedSrcPtr);
82 
83     BuiltinOpParams dc;
84     dc.srcPtr = alignedSrcPtr;
85     dc.srcOffset.x = srcPtrOffset;
86     dc.dstMemObj = dstImage;
87     dc.dstOffset = origin;
88     dc.size = region;
89     dc.srcRowPitch = ((dstImage->getImageDesc().image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) && (inputSlicePitch > inputRowPitch)) ? inputSlicePitch : inputRowPitch;
90     dc.srcSlicePitch = inputSlicePitch;
91     if (isMipMapped(dstImage->getImageDesc())) {
92         dc.dstMipLevel = findMipLevel(dstImage->getImageDesc().image_type, origin);
93     }
94     dc.transferAllocation = mapAllocation ? mapAllocation : hostPtrSurf.getAllocation();
95 
96     auto eBuiltInOps = EBuiltInOps::CopyBufferToImage3d;
97     MultiDispatchInfo dispatchInfo(dc);
98 
99     dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_IMAGE>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingWrite == CL_TRUE, csr);
100 
101     if (context->isProvidingPerformanceHints()) {
102         context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, CL_ENQUEUE_WRITE_IMAGE_REQUIRES_COPY_DATA, static_cast<cl_mem>(dstImage));
103     }
104 
105     return CL_SUCCESS;
106 }
107 } // namespace NEO
108