1 /*
2  * Copyright (C) 2018-2021 Intel Corporation
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  */
7 
8 #include "shared/source/gen8/hw_info.h"
9 
10 #include "opencl/source/command_queue/gpgpu_walker_bdw_and_later.inl"
11 #include "opencl/source/command_queue/gpgpu_walker_disabled_device_enqueue.inl"
12 #include "opencl/source/command_queue/hardware_interface_bdw_and_later.inl"
13 
14 namespace NEO {
15 
16 template <>
applyWADisableLSQCROPERFforOCL(NEO::LinearStream * pCommandStream,const Kernel & kernel,bool disablePerfMode)17 void GpgpuWalkerHelper<BDWFamily>::applyWADisableLSQCROPERFforOCL(NEO::LinearStream *pCommandStream, const Kernel &kernel, bool disablePerfMode) {
18     if (disablePerfMode) {
19         if (kernel.getKernelInfo().kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages) {
20             // Set bit L3SQC_BIT_LQSC_RO_PERF_DIS in L3SQC_REG4
21             GpgpuWalkerHelper<BDWFamily>::addAluReadModifyWriteRegister(pCommandStream, L3SQC_REG4, AluRegisters::OPCODE_OR, L3SQC_BIT_LQSC_RO_PERF_DIS);
22         }
23     } else {
24         if (kernel.getKernelInfo().kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages) {
25             // Add PIPE_CONTROL with CS_Stall to wait till GPU finishes its work
26             typedef typename BDWFamily::PIPE_CONTROL PIPE_CONTROL;
27             auto pipeControlSpace = reinterpret_cast<PIPE_CONTROL *>(pCommandStream->getSpace(sizeof(PIPE_CONTROL)));
28             auto pipeControl = BDWFamily::cmdInitPipeControl;
29             pipeControl.setCommandStreamerStallEnable(true);
30             *pipeControlSpace = pipeControl;
31             // Clear bit L3SQC_BIT_LQSC_RO_PERF_DIS in L3SQC_REG4
32             GpgpuWalkerHelper<BDWFamily>::addAluReadModifyWriteRegister(pCommandStream, L3SQC_REG4, AluRegisters::OPCODE_AND, ~L3SQC_BIT_LQSC_RO_PERF_DIS);
33         }
34     }
35 }
36 
37 template <>
getSizeForWADisableLSQCROPERFforOCL(const Kernel * pKernel)38 size_t GpgpuWalkerHelper<BDWFamily>::getSizeForWADisableLSQCROPERFforOCL(const Kernel *pKernel) {
39     typedef typename BDWFamily::MI_LOAD_REGISTER_REG MI_LOAD_REGISTER_REG;
40     typedef typename BDWFamily::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM;
41     typedef typename BDWFamily::PIPE_CONTROL PIPE_CONTROL;
42     typedef typename BDWFamily::MI_MATH MI_MATH;
43     typedef typename BDWFamily::MI_MATH_ALU_INST_INLINE MI_MATH_ALU_INST_INLINE;
44     size_t n = 0;
45     if (pKernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages) {
46         n += sizeof(PIPE_CONTROL) +
47              (2 * sizeof(MI_LOAD_REGISTER_REG) +
48               sizeof(MI_LOAD_REGISTER_IMM) +
49               sizeof(PIPE_CONTROL) +
50               sizeof(MI_MATH) +
51               NUM_ALU_INST_FOR_READ_MODIFY_WRITE * sizeof(MI_MATH_ALU_INST_INLINE)) *
52                  2; // For 2 WADisableLSQCROPERFforOCL WAs
53     }
54     return n;
55 }
56 
57 template class HardwareInterface<BDWFamily>;
58 
59 template class GpgpuWalkerHelper<BDWFamily>;
60 
61 template struct EnqueueOperation<BDWFamily>;
62 
63 } // namespace NEO
64