1 /****************************************************************************
2  * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * @file depthstencil.h
24  *
25  * @brief Implements depth/stencil functionality
26  *
27  ******************************************************************************/
28 #pragma once
29 #include "common/os.h"
30 #include "format_conversion.h"
31 
32 INLINE
StencilOp(SWR_STENCILOP op,simdscalar const & mask,simdscalar const & stencilRefps,simdscalar & stencilps)33 void StencilOp(SWR_STENCILOP     op,
34                simdscalar const& mask,
35                simdscalar const& stencilRefps,
36                simdscalar&       stencilps)
37 {
38     simdscalari stencil = _simd_castps_si(stencilps);
39 
40     switch (op)
41     {
42     case STENCILOP_KEEP:
43         break;
44     case STENCILOP_ZERO:
45         stencilps = _simd_blendv_ps(stencilps, _simd_setzero_ps(), mask);
46         break;
47     case STENCILOP_REPLACE:
48         stencilps = _simd_blendv_ps(stencilps, stencilRefps, mask);
49         break;
50     case STENCILOP_INCRSAT:
51     {
52         simdscalari stencilincr = _simd_adds_epu8(stencil, _simd_set1_epi32(1));
53         stencilps               = _simd_blendv_ps(stencilps, _simd_castsi_ps(stencilincr), mask);
54         break;
55     }
56     case STENCILOP_DECRSAT:
57     {
58         simdscalari stencildecr = _simd_subs_epu8(stencil, _simd_set1_epi32(1));
59         stencilps               = _simd_blendv_ps(stencilps, _simd_castsi_ps(stencildecr), mask);
60         break;
61     }
62     case STENCILOP_INCR:
63     {
64         simdscalari stencilincr = _simd_add_epi8(stencil, _simd_set1_epi32(1));
65         stencilps               = _simd_blendv_ps(stencilps, _simd_castsi_ps(stencilincr), mask);
66         break;
67     }
68     case STENCILOP_DECR:
69     {
70         simdscalari stencildecr = _simd_add_epi8(stencil, _simd_set1_epi32((-1) & 0xff));
71         stencilps               = _simd_blendv_ps(stencilps, _simd_castsi_ps(stencildecr), mask);
72         break;
73     }
74     case STENCILOP_INVERT:
75     {
76         simdscalar stencilinvert =
77             _simd_andnot_ps(stencilps, _simd_cmpeq_ps(_simd_setzero_ps(), _simd_setzero_ps()));
78         stencilps = _simd_blendv_ps(stencilps, stencilinvert, mask);
79         break;
80     }
81     default:
82         break;
83     }
84 }
85 
86 template <SWR_FORMAT depthFormatT>
QuantizeDepth(simdscalar const & depth)87 simdscalar QuantizeDepth(simdscalar const& depth)
88 {
89     SWR_TYPE depthType = FormatTraits<depthFormatT>::GetType(0);
90     uint32_t depthBpc  = FormatTraits<depthFormatT>::GetBPC(0);
91 
92     if (depthType == SWR_TYPE_FLOAT)
93     {
94         // assume only 32bit float depth supported
95         SWR_ASSERT(depthBpc == 32);
96 
97         // matches shader precision, no quantizing needed
98         return depth;
99     }
100 
101     // should be unorm depth if not float
102     SWR_ASSERT(depthType == SWR_TYPE_UNORM);
103 
104     float      quantize = (float)((1 << depthBpc) - 1);
105     simdscalar result   = _simd_mul_ps(depth, _simd_set1_ps(quantize));
106     result              = _simd_add_ps(result, _simd_set1_ps(0.5f));
107     result              = _simd_round_ps(result, _MM_FROUND_TO_ZERO);
108 
109     if (depthBpc > 16)
110     {
111         result = _simd_div_ps(result, _simd_set1_ps(quantize));
112     }
113     else
114     {
115         result = _simd_mul_ps(result, _simd_set1_ps(1.0f / quantize));
116     }
117 
118     return result;
119 }
120 
121 INLINE
DepthStencilTest(const API_STATE * pState,bool frontFacing,uint32_t viewportIndex,simdscalar const & iZ,uint8_t * pDepthBase,simdscalar const & coverageMask,uint8_t * pStencilBase,simdscalar * pStencilMask)122 simdscalar DepthStencilTest(const API_STATE*  pState,
123                             bool              frontFacing,
124                             uint32_t          viewportIndex,
125                             simdscalar const& iZ,
126                             uint8_t*          pDepthBase,
127                             simdscalar const& coverageMask,
128                             uint8_t*          pStencilBase,
129                             simdscalar*       pStencilMask)
130 {
131     static_assert(KNOB_DEPTH_HOT_TILE_FORMAT == R32_FLOAT, "Unsupported depth hot tile format");
132     static_assert(KNOB_STENCIL_HOT_TILE_FORMAT == R8_UINT, "Unsupported stencil hot tile format");
133 
134     const SWR_DEPTH_STENCIL_STATE* pDSState  = &pState->depthStencilState;
135     const SWR_VIEWPORT*            pViewport = &pState->vp[viewportIndex];
136 
137     simdscalar depthResult = _simd_set1_ps(-1.0f);
138     simdscalar zbuf;
139 
140     // clamp Z to viewport [minZ..maxZ]
141     simdscalar vMinZ   = _simd_broadcast_ss(&pViewport->minZ);
142     simdscalar vMaxZ   = _simd_broadcast_ss(&pViewport->maxZ);
143     simdscalar interpZ = _simd_min_ps(vMaxZ, _simd_max_ps(vMinZ, iZ));
144 
145     if (pDSState->depthTestEnable)
146     {
147         switch (pDSState->depthTestFunc)
148         {
149         case ZFUNC_NEVER:
150             depthResult = _simd_setzero_ps();
151             break;
152         case ZFUNC_ALWAYS:
153             break;
154         default:
155             zbuf = _simd_load_ps((const float*)pDepthBase);
156         }
157 
158         switch (pDSState->depthTestFunc)
159         {
160         case ZFUNC_LE:
161             depthResult = _simd_cmple_ps(interpZ, zbuf);
162             break;
163         case ZFUNC_LT:
164             depthResult = _simd_cmplt_ps(interpZ, zbuf);
165             break;
166         case ZFUNC_GT:
167             depthResult = _simd_cmpgt_ps(interpZ, zbuf);
168             break;
169         case ZFUNC_GE:
170             depthResult = _simd_cmpge_ps(interpZ, zbuf);
171             break;
172         case ZFUNC_EQ:
173             depthResult = _simd_cmpeq_ps(interpZ, zbuf);
174             break;
175         case ZFUNC_NE:
176             depthResult = _simd_cmpneq_ps(interpZ, zbuf);
177             break;
178         }
179     }
180 
181     simdscalar stencilMask = _simd_set1_ps(-1.0f);
182 
183     if (pDSState->stencilTestEnable)
184     {
185         uint8_t  stencilRefValue;
186         uint32_t stencilTestFunc;
187         uint8_t  stencilTestMask;
188         if (frontFacing || !pDSState->doubleSidedStencilTestEnable)
189         {
190             stencilRefValue = pDSState->stencilRefValue;
191             stencilTestFunc = pDSState->stencilTestFunc;
192             stencilTestMask = pDSState->stencilTestMask;
193         }
194         else
195         {
196             stencilRefValue = pDSState->backfaceStencilRefValue;
197             stencilTestFunc = pDSState->backfaceStencilTestFunc;
198             stencilTestMask = pDSState->backfaceStencilTestMask;
199         }
200 
201         simdvector sbuf;
202         simdscalar stencilWithMask;
203         simdscalar stencilRef;
204         switch (stencilTestFunc)
205         {
206         case ZFUNC_NEVER:
207             stencilMask = _simd_setzero_ps();
208             break;
209         case ZFUNC_ALWAYS:
210             break;
211         default:
212             LoadSOA<R8_UINT>(pStencilBase, sbuf);
213 
214             // apply stencil read mask
215             stencilWithMask = _simd_castsi_ps(
216                 _simd_and_si(_simd_castps_si(sbuf.v[0]), _simd_set1_epi32(stencilTestMask)));
217 
218             // do stencil compare in float to avoid simd integer emulation in AVX1
219             stencilWithMask = _simd_cvtepi32_ps(_simd_castps_si(stencilWithMask));
220 
221             stencilRef = _simd_set1_ps((float)(stencilRefValue & stencilTestMask));
222             break;
223         }
224 
225         switch (stencilTestFunc)
226         {
227         case ZFUNC_LE:
228             stencilMask = _simd_cmple_ps(stencilRef, stencilWithMask);
229             break;
230         case ZFUNC_LT:
231             stencilMask = _simd_cmplt_ps(stencilRef, stencilWithMask);
232             break;
233         case ZFUNC_GT:
234             stencilMask = _simd_cmpgt_ps(stencilRef, stencilWithMask);
235             break;
236         case ZFUNC_GE:
237             stencilMask = _simd_cmpge_ps(stencilRef, stencilWithMask);
238             break;
239         case ZFUNC_EQ:
240             stencilMask = _simd_cmpeq_ps(stencilRef, stencilWithMask);
241             break;
242         case ZFUNC_NE:
243             stencilMask = _simd_cmpneq_ps(stencilRef, stencilWithMask);
244             break;
245         }
246     }
247 
248     simdscalar depthWriteMask = _simd_and_ps(depthResult, stencilMask);
249     depthWriteMask            = _simd_and_ps(depthWriteMask, coverageMask);
250 
251     *pStencilMask = stencilMask;
252     return depthWriteMask;
253 }
254 
255 INLINE
DepthStencilWrite(const SWR_VIEWPORT * pViewport,const SWR_DEPTH_STENCIL_STATE * pDSState,bool frontFacing,simdscalar const & iZ,uint8_t * pDepthBase,const simdscalar & depthMask,const simdscalar & coverageMask,uint8_t * pStencilBase,const simdscalar & stencilMask)256 void DepthStencilWrite(const SWR_VIEWPORT*            pViewport,
257                        const SWR_DEPTH_STENCIL_STATE* pDSState,
258                        bool                           frontFacing,
259                        simdscalar const&              iZ,
260                        uint8_t*                       pDepthBase,
261                        const simdscalar&              depthMask,
262                        const simdscalar&              coverageMask,
263                        uint8_t*                       pStencilBase,
264                        const simdscalar&              stencilMask)
265 {
266     if (pDSState->depthWriteEnable)
267     {
268         // clamp Z to viewport [minZ..maxZ]
269         simdscalar vMinZ   = _simd_broadcast_ss(&pViewport->minZ);
270         simdscalar vMaxZ   = _simd_broadcast_ss(&pViewport->maxZ);
271         simdscalar interpZ = _simd_min_ps(vMaxZ, _simd_max_ps(vMinZ, iZ));
272 
273         simdscalar vMask = _simd_and_ps(depthMask, coverageMask);
274         _simd_maskstore_ps((float*)pDepthBase, _simd_castps_si(vMask), interpZ);
275     }
276 
277     if (pDSState->stencilWriteEnable)
278     {
279         simdvector sbuf;
280         LoadSOA<R8_UINT>(pStencilBase, sbuf);
281         simdscalar stencilbuf = sbuf.v[0];
282 
283         uint8_t  stencilRefValue;
284         uint32_t stencilFailOp;
285         uint32_t stencilPassDepthPassOp;
286         uint32_t stencilPassDepthFailOp;
287         uint8_t  stencilWriteMask;
288         if (frontFacing || !pDSState->doubleSidedStencilTestEnable)
289         {
290             stencilRefValue        = pDSState->stencilRefValue;
291             stencilFailOp          = pDSState->stencilFailOp;
292             stencilPassDepthPassOp = pDSState->stencilPassDepthPassOp;
293             stencilPassDepthFailOp = pDSState->stencilPassDepthFailOp;
294             stencilWriteMask       = pDSState->stencilWriteMask;
295         }
296         else
297         {
298             stencilRefValue        = pDSState->backfaceStencilRefValue;
299             stencilFailOp          = pDSState->backfaceStencilFailOp;
300             stencilPassDepthPassOp = pDSState->backfaceStencilPassDepthPassOp;
301             stencilPassDepthFailOp = pDSState->backfaceStencilPassDepthFailOp;
302             stencilWriteMask       = pDSState->backfaceStencilWriteMask;
303         }
304 
305         simdscalar stencilps    = stencilbuf;
306         simdscalar stencilRefps = _simd_castsi_ps(_simd_set1_epi32(stencilRefValue));
307 
308         simdscalar stencilFailMask          = _simd_andnot_ps(stencilMask, coverageMask);
309         simdscalar stencilPassDepthPassMask = _simd_and_ps(stencilMask, depthMask);
310         simdscalar stencilPassDepthFailMask =
311             _simd_and_ps(stencilMask, _simd_andnot_ps(depthMask, _simd_set1_ps(-1)));
312 
313         simdscalar origStencil = stencilps;
314 
315         StencilOp((SWR_STENCILOP)stencilFailOp, stencilFailMask, stencilRefps, stencilps);
316         StencilOp((SWR_STENCILOP)stencilPassDepthFailOp,
317                   stencilPassDepthFailMask,
318                   stencilRefps,
319                   stencilps);
320         StencilOp((SWR_STENCILOP)stencilPassDepthPassOp,
321                   stencilPassDepthPassMask,
322                   stencilRefps,
323                   stencilps);
324 
325         // apply stencil write mask
326         simdscalari vWriteMask = _simd_set1_epi32(stencilWriteMask);
327         stencilps              = _simd_and_ps(stencilps, _simd_castsi_ps(vWriteMask));
328         stencilps =
329             _simd_or_ps(_simd_andnot_ps(_simd_castsi_ps(vWriteMask), origStencil), stencilps);
330 
331         simdvector stencilResult;
332         stencilResult.v[0] = _simd_blendv_ps(origStencil, stencilps, coverageMask);
333         StoreSOA<R8_UINT>(stencilResult, pStencilBase);
334     }
335 }
336