1 /****************************************************************************
2  * Copyright (C) 2014-2018 Intel Corporation.   All Rights Reserved.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * @file backend.cpp
24  *
25  * @brief Backend handles rasterization, pixel shading and output merger
26  *        operations.
27  *
28  ******************************************************************************/
29 
30 #include <smmintrin.h>
31 
32 #include "backend.h"
33 #include "backend_impl.h"
34 #include "tilemgr.h"
35 #include "memory/tilingtraits.h"
36 #include "core/multisample.h"
37 
38 #include <algorithm>
39 
40 template <SWR_FORMAT format>
ClearRasterTile(uint8_t * pTileBuffer,simd16vector & value)41 void ClearRasterTile(uint8_t* pTileBuffer, simd16vector& value)
42 {
43     auto lambda = [&](int32_t comp)
44     {
45         FormatTraits<format>::storeSOA(comp, pTileBuffer, value.v[comp]);
46 
47         pTileBuffer += (KNOB_SIMD16_WIDTH * FormatTraits<format>::GetBPC(comp) / 8);
48     };
49 
50     const uint32_t numIter =
51         (KNOB_TILE_Y_DIM / SIMD16_TILE_Y_DIM) * (KNOB_TILE_X_DIM / SIMD16_TILE_X_DIM);
52 
53     for (uint32_t i = 0; i < numIter; ++i)
54     {
55         UnrollerL<0, FormatTraits<format>::numComps, 1>::step(lambda);
56     }
57 }
58 
59 template <SWR_FORMAT format>
ClearMacroTile(DRAW_CONTEXT * pDC,HANDLE hWorkerPrivateData,SWR_RENDERTARGET_ATTACHMENT rt,uint32_t macroTile,uint32_t renderTargetArrayIndex,uint32_t clear[4],const SWR_RECT & rect)60 INLINE void ClearMacroTile(DRAW_CONTEXT*               pDC,
61                            HANDLE                      hWorkerPrivateData,
62                            SWR_RENDERTARGET_ATTACHMENT rt,
63                            uint32_t                    macroTile,
64                            uint32_t                    renderTargetArrayIndex,
65                            uint32_t                    clear[4],
66                            const SWR_RECT&             rect)
67 {
68     // convert clear color to hottile format
69     // clear color is in RGBA float/uint32
70 
71     simd16vector vClear;
72     for (uint32_t comp = 0; comp < FormatTraits<format>::numComps; ++comp)
73     {
74         simd16scalar vComp = _simd16_load1_ps((const float*)&clear[comp]);
75 
76         if (FormatTraits<format>::isNormalized(comp))
77         {
78             vComp = _simd16_mul_ps(vComp, _simd16_set1_ps(FormatTraits<format>::fromFloat(comp)));
79             vComp = _simd16_castsi_ps(_simd16_cvtps_epi32(vComp));
80         }
81         vComp = FormatTraits<format>::pack(comp, vComp);
82 
83         vClear.v[FormatTraits<format>::swizzle(comp)] = vComp;
84     }
85 
86     uint32_t tileX, tileY;
87     MacroTileMgr::getTileIndices(macroTile, tileX, tileY);
88 
89     // Init to full macrotile
90     SWR_RECT clearTile = {
91         KNOB_MACROTILE_X_DIM * int32_t(tileX),
92         KNOB_MACROTILE_Y_DIM * int32_t(tileY),
93         KNOB_MACROTILE_X_DIM * int32_t(tileX + 1),
94         KNOB_MACROTILE_Y_DIM * int32_t(tileY + 1),
95     };
96 
97     // intersect with clear rect
98     clearTile &= rect;
99 
100     // translate to local hottile origin
101     clearTile.Translate(-int32_t(tileX) * KNOB_MACROTILE_X_DIM,
102                         -int32_t(tileY) * KNOB_MACROTILE_Y_DIM);
103 
104     // Make maximums inclusive (needed for convert to raster tiles)
105     clearTile.xmax -= 1;
106     clearTile.ymax -= 1;
107 
108     // convert to raster tiles
109     clearTile.ymin >>= (KNOB_TILE_Y_DIM_SHIFT);
110     clearTile.ymax >>= (KNOB_TILE_Y_DIM_SHIFT);
111     clearTile.xmin >>= (KNOB_TILE_X_DIM_SHIFT);
112     clearTile.xmax >>= (KNOB_TILE_X_DIM_SHIFT);
113 
114     const int32_t numSamples = GetNumSamples(pDC->pState->state.rastState.sampleCount);
115     // compute steps between raster tile samples / raster tiles / macro tile rows
116     const uint32_t rasterTileSampleStep =
117         KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<format>::bpp / 8;
118     const uint32_t rasterTileStep =
119         (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * (FormatTraits<format>::bpp / 8)) * numSamples;
120     const uint32_t macroTileRowStep = (KNOB_MACROTILE_X_DIM / KNOB_TILE_X_DIM) * rasterTileStep;
121     const uint32_t pitch            = (FormatTraits<format>::bpp * KNOB_MACROTILE_X_DIM / 8);
122 
123     HOTTILE* pHotTile = pDC->pContext->pHotTileMgr->GetHotTile(pDC->pContext,
124                                                                pDC,
125                                                                hWorkerPrivateData,
126                                                                macroTile,
127                                                                rt,
128                                                                true,
129                                                                numSamples,
130                                                                renderTargetArrayIndex);
131     uint32_t rasterTileStartOffset =
132         (ComputeTileOffset2D<TilingTraits<SWR_TILE_SWRZ, FormatTraits<format>::bpp>>(
133             pitch, clearTile.xmin, clearTile.ymin)) *
134         numSamples;
135     uint8_t* pRasterTileRow =
136         pHotTile->pBuffer +
137         rasterTileStartOffset; //(ComputeTileOffset2D< TilingTraits<SWR_TILE_SWRZ,
138                                // FormatTraits<format>::bpp > >(pitch, x, y)) * numSamples;
139 
140     // loop over all raster tiles in the current hot tile
141     for (int32_t y = clearTile.ymin; y <= clearTile.ymax; ++y)
142     {
143         uint8_t* pRasterTile = pRasterTileRow;
144         for (int32_t x = clearTile.xmin; x <= clearTile.xmax; ++x)
145         {
146             for (int32_t sampleNum = 0; sampleNum < numSamples; sampleNum++)
147             {
148                 ClearRasterTile<format>(pRasterTile, vClear);
149                 pRasterTile += rasterTileSampleStep;
150             }
151         }
152         pRasterTileRow += macroTileRowStep;
153     }
154 
155     pHotTile->state = HOTTILE_DIRTY;
156 }
157 
ProcessClearBE(DRAW_CONTEXT * pDC,uint32_t workerId,uint32_t macroTile,void * pUserData)158 void ProcessClearBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pUserData)
159 {
160     SWR_CONTEXT* pContext           = pDC->pContext;
161     HANDLE       hWorkerPrivateData = pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
162 
163     if (KNOB_FAST_CLEAR)
164     {
165         CLEAR_DESC*           pClear      = (CLEAR_DESC*)pUserData;
166         SWR_MULTISAMPLE_COUNT sampleCount = pDC->pState->state.rastState.sampleCount;
167         uint32_t              numSamples  = GetNumSamples(sampleCount);
168 
169         SWR_ASSERT(pClear->attachmentMask != 0); // shouldn't be here without a reason.
170 
171         RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEClear, pDC->drawId);
172 
173         if (pClear->attachmentMask & SWR_ATTACHMENT_MASK_COLOR)
174         {
175             unsigned long rt   = 0;
176             uint32_t      mask = pClear->attachmentMask & SWR_ATTACHMENT_MASK_COLOR;
177             while (_BitScanForward(&rt, mask))
178             {
179                 mask &= ~(1 << rt);
180 
181                 HOTTILE* pHotTile =
182                     pContext->pHotTileMgr->GetHotTile(pContext,
183                                                       pDC,
184                                                       hWorkerPrivateData,
185                                                       macroTile,
186                                                       (SWR_RENDERTARGET_ATTACHMENT)rt,
187                                                       true,
188                                                       numSamples,
189                                                       pClear->renderTargetArrayIndex);
190 
191                 // All we want to do here is to mark the hot tile as being in a "needs clear" state.
192                 pHotTile->clearData[0] = *(uint32_t*)&(pClear->clearRTColor[0]);
193                 pHotTile->clearData[1] = *(uint32_t*)&(pClear->clearRTColor[1]);
194                 pHotTile->clearData[2] = *(uint32_t*)&(pClear->clearRTColor[2]);
195                 pHotTile->clearData[3] = *(uint32_t*)&(pClear->clearRTColor[3]);
196                 pHotTile->state        = HOTTILE_CLEAR;
197             }
198         }
199 
200         if (pClear->attachmentMask & SWR_ATTACHMENT_DEPTH_BIT)
201         {
202             HOTTILE* pHotTile      = pContext->pHotTileMgr->GetHotTile(pContext,
203                                                                   pDC,
204                                                                   hWorkerPrivateData,
205                                                                   macroTile,
206                                                                   SWR_ATTACHMENT_DEPTH,
207                                                                   true,
208                                                                   numSamples,
209                                                                   pClear->renderTargetArrayIndex);
210             pHotTile->clearData[0] = *(uint32_t*)&pClear->clearDepth;
211             pHotTile->state        = HOTTILE_CLEAR;
212         }
213 
214         if (pClear->attachmentMask & SWR_ATTACHMENT_STENCIL_BIT)
215         {
216             HOTTILE* pHotTile = pContext->pHotTileMgr->GetHotTile(pContext,
217                                                                   pDC,
218                                                                   hWorkerPrivateData,
219                                                                   macroTile,
220                                                                   SWR_ATTACHMENT_STENCIL,
221                                                                   true,
222                                                                   numSamples,
223                                                                   pClear->renderTargetArrayIndex);
224 
225             pHotTile->clearData[0] = pClear->clearStencil;
226             pHotTile->state        = HOTTILE_CLEAR;
227         }
228 
229         RDTSC_END(pDC->pContext->pBucketMgr, BEClear, 1);
230     }
231     else
232     {
233         // Legacy clear
234         CLEAR_DESC* pClear = (CLEAR_DESC*)pUserData;
235         RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEClear, pDC->drawId);
236 
237         if (pClear->attachmentMask & SWR_ATTACHMENT_MASK_COLOR)
238         {
239             uint32_t clearData[4];
240             clearData[0] = *(uint32_t*)&(pClear->clearRTColor[0]);
241             clearData[1] = *(uint32_t*)&(pClear->clearRTColor[1]);
242             clearData[2] = *(uint32_t*)&(pClear->clearRTColor[2]);
243             clearData[3] = *(uint32_t*)&(pClear->clearRTColor[3]);
244 
245             PFN_CLEAR_TILES pfnClearTiles = gClearTilesTable[KNOB_COLOR_HOT_TILE_FORMAT];
246             SWR_ASSERT(pfnClearTiles != nullptr);
247 
248             unsigned long rt   = 0;
249             uint32_t      mask = pClear->attachmentMask & SWR_ATTACHMENT_MASK_COLOR;
250             while (_BitScanForward(&rt, mask))
251             {
252                 mask &= ~(1 << rt);
253 
254                 pfnClearTiles(pDC,
255                               hWorkerPrivateData,
256                               (SWR_RENDERTARGET_ATTACHMENT)rt,
257                               macroTile,
258                               pClear->renderTargetArrayIndex,
259                               clearData,
260                               pClear->rect);
261             }
262         }
263 
264         if (pClear->attachmentMask & SWR_ATTACHMENT_DEPTH_BIT)
265         {
266             uint32_t clearData[4];
267             clearData[0]                  = *(uint32_t*)&pClear->clearDepth;
268             PFN_CLEAR_TILES pfnClearTiles = gClearTilesTable[KNOB_DEPTH_HOT_TILE_FORMAT];
269             SWR_ASSERT(pfnClearTiles != nullptr);
270 
271             pfnClearTiles(pDC,
272                           hWorkerPrivateData,
273                           SWR_ATTACHMENT_DEPTH,
274                           macroTile,
275                           pClear->renderTargetArrayIndex,
276                           clearData,
277                           pClear->rect);
278         }
279 
280         if (pClear->attachmentMask & SWR_ATTACHMENT_STENCIL_BIT)
281         {
282             uint32_t clearData[4];
283             clearData[0]                  = pClear->clearStencil;
284             PFN_CLEAR_TILES pfnClearTiles = gClearTilesTable[KNOB_STENCIL_HOT_TILE_FORMAT];
285 
286             pfnClearTiles(pDC,
287                           hWorkerPrivateData,
288                           SWR_ATTACHMENT_STENCIL,
289                           macroTile,
290                           pClear->renderTargetArrayIndex,
291                           clearData,
292                           pClear->rect);
293         }
294 
295         RDTSC_END(pDC->pContext->pBucketMgr, BEClear, 1);
296     }
297 }
298 
InitClearTilesTable()299 void InitClearTilesTable()
300 {
301     memset(gClearTilesTable, 0, sizeof(gClearTilesTable));
302 
303     gClearTilesTable[R8G8B8A8_UNORM]     = ClearMacroTile<R8G8B8A8_UNORM>;
304     gClearTilesTable[B8G8R8A8_UNORM]     = ClearMacroTile<B8G8R8A8_UNORM>;
305     gClearTilesTable[R32_FLOAT]          = ClearMacroTile<R32_FLOAT>;
306     gClearTilesTable[R32G32B32A32_FLOAT] = ClearMacroTile<R32G32B32A32_FLOAT>;
307     gClearTilesTable[R8_UINT]            = ClearMacroTile<R8_UINT>;
308 }
309