1 /*
2 * Copyright (C) 2018-2021 Intel Corporation
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 */
7
8 #pragma once
9 #include "shared/source/built_ins/built_ins.h"
10 #include "shared/source/command_stream/command_stream_receiver.h"
11 #include "shared/source/helpers/basic_math.h"
12 #include "shared/source/memory_manager/graphics_allocation.h"
13
14 #include "opencl/source/command_queue/command_queue_hw.h"
15 #include "opencl/source/helpers/hardware_commands_helper.h"
16 #include "opencl/source/helpers/mipmap.h"
17 #include "opencl/source/mem_obj/image.h"
18
19 #include <algorithm>
20 #include <new>
21
22 namespace NEO {
23
24 template <typename GfxFamily>
enqueueWriteImage(Image * dstImage,cl_bool blockingWrite,const size_t * origin,const size_t * region,size_t inputRowPitch,size_t inputSlicePitch,const void * ptr,GraphicsAllocation * mapAllocation,cl_uint numEventsInWaitList,const cl_event * eventWaitList,cl_event * event)25 cl_int CommandQueueHw<GfxFamily>::enqueueWriteImage(
26 Image *dstImage,
27 cl_bool blockingWrite,
28 const size_t *origin,
29 const size_t *region,
30 size_t inputRowPitch,
31 size_t inputSlicePitch,
32 const void *ptr,
33 GraphicsAllocation *mapAllocation,
34 cl_uint numEventsInWaitList,
35 const cl_event *eventWaitList,
36 cl_event *event) {
37 constexpr cl_command_type cmdType = CL_COMMAND_WRITE_IMAGE;
38
39 CsrSelectionArgs csrSelectionArgs{cmdType, nullptr, dstImage, device->getRootDeviceIndex(), region, nullptr, origin};
40 CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs);
41
42 auto isMemTransferNeeded = true;
43
44 if (dstImage->isMemObjZeroCopy()) {
45 size_t hostOffset;
46 Image::calculateHostPtrOffset(&hostOffset, origin, region, inputRowPitch, inputSlicePitch, dstImage->getImageDesc().image_type, dstImage->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes);
47 isMemTransferNeeded = dstImage->checkIfMemoryTransferIsRequired(hostOffset, 0, ptr, cmdType);
48 }
49 if (!isMemTransferNeeded) {
50 return enqueueMarkerForReadWriteOperation(dstImage, const_cast<void *>(ptr), cmdType, blockingWrite,
51 numEventsInWaitList, eventWaitList, event);
52 }
53
54 size_t hostPtrSize = calculateHostPtrSizeForImage(region, inputRowPitch, inputSlicePitch, dstImage);
55 void *srcPtr = const_cast<void *>(ptr);
56
57 MemObjSurface dstImgSurf(dstImage);
58 HostPtrSurface hostPtrSurf(srcPtr, hostPtrSize, true);
59 GeneralSurface mapSurface;
60 Surface *surfaces[] = {&dstImgSurf, nullptr};
61 if (mapAllocation) {
62 surfaces[1] = &mapSurface;
63 mapSurface.setGraphicsAllocation(mapAllocation);
64 //get offset between base cpu ptr of map allocation and dst ptr
65 size_t srcOffset = ptrDiff(srcPtr, mapAllocation->getUnderlyingBuffer());
66 srcPtr = reinterpret_cast<void *>(mapAllocation->getGpuAddress() + srcOffset);
67 } else {
68 surfaces[1] = &hostPtrSurf;
69 if (region[0] != 0 &&
70 region[1] != 0 &&
71 region[2] != 0) {
72 bool status = csr.createAllocationForHostSurface(hostPtrSurf, false);
73 if (!status) {
74 return CL_OUT_OF_RESOURCES;
75 }
76 srcPtr = reinterpret_cast<void *>(hostPtrSurf.getAllocation()->getGpuAddress());
77 }
78 }
79
80 void *alignedSrcPtr = alignDown(srcPtr, 4);
81 size_t srcPtrOffset = ptrDiff(srcPtr, alignedSrcPtr);
82
83 BuiltinOpParams dc;
84 dc.srcPtr = alignedSrcPtr;
85 dc.srcOffset.x = srcPtrOffset;
86 dc.dstMemObj = dstImage;
87 dc.dstOffset = origin;
88 dc.size = region;
89 dc.srcRowPitch = ((dstImage->getImageDesc().image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) && (inputSlicePitch > inputRowPitch)) ? inputSlicePitch : inputRowPitch;
90 dc.srcSlicePitch = inputSlicePitch;
91 if (isMipMapped(dstImage->getImageDesc())) {
92 dc.dstMipLevel = findMipLevel(dstImage->getImageDesc().image_type, origin);
93 }
94 dc.transferAllocation = mapAllocation ? mapAllocation : hostPtrSurf.getAllocation();
95
96 auto eBuiltInOps = EBuiltInOps::CopyBufferToImage3d;
97 MultiDispatchInfo dispatchInfo(dc);
98
99 dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_IMAGE>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingWrite == CL_TRUE, csr);
100
101 if (context->isProvidingPerformanceHints()) {
102 context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, CL_ENQUEUE_WRITE_IMAGE_REQUIRES_COPY_DATA, static_cast<cl_mem>(dstImage));
103 }
104
105 return CL_SUCCESS;
106 }
107 } // namespace NEO
108