1 /****************************************************************************
2 * Copyright (C) 2014-2016 Intel Corporation.   All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * @file LoadTile.h
24 *
25 * @brief Functionality for Load
26 *
27 ******************************************************************************/
28 #include "common/os.h"
29 #include "common/formats.h"
30 #include "core/context.h"
31 #include "core/rdtsc_core.h"
32 #include "memory/TilingFunctions.h"
33 #include "memory/tilingtraits.h"
34 #include "memory/Convert.h"
35 
36 typedef void(*PFN_LOAD_TILES)(const SWR_SURFACE_STATE*, uint8_t*, uint32_t, uint32_t, uint32_t);
37 typedef void(*PFN_LOAD_RASTER_TILES)(const SWR_SURFACE_STATE*, uint8_t*, uint32_t, uint32_t, uint32_t, uint32_t);
38 
39 //////////////////////////////////////////////////////////////////////////
40 /// Load Raster Tile Function Tables.
41 //////////////////////////////////////////////////////////////////////////
42 extern PFN_LOAD_TILES sLoadTilesColorTable_SWR_TILE_NONE[NUM_SWR_FORMATS];
43 extern PFN_LOAD_TILES sLoadTilesDepthTable_SWR_TILE_NONE[NUM_SWR_FORMATS];
44 
45 extern PFN_LOAD_TILES sLoadTilesColorTable_SWR_TILE_MODE_YMAJOR[NUM_SWR_FORMATS];
46 extern PFN_LOAD_TILES sLoadTilesColorTable_SWR_TILE_MODE_XMAJOR[NUM_SWR_FORMATS];
47 
48 extern PFN_LOAD_TILES sLoadTilesDepthTable_SWR_TILE_MODE_YMAJOR[NUM_SWR_FORMATS];
49 
50 void InitLoadTilesTable_Linear();
51 void InitLoadTilesTable_XMajor();
52 void InitLoadTilesTable_YMajor();
53 
54 //////////////////////////////////////////////////////////////////////////
55 /// LoadRasterTile
56 //////////////////////////////////////////////////////////////////////////
57 template<typename TTraits, SWR_FORMAT SrcFormat, SWR_FORMAT DstFormat>
58 struct LoadRasterTile
59 {
60     //////////////////////////////////////////////////////////////////////////
61     /// @brief Retrieve color from hot tile source which is always float.
62     /// @param pSrc - Pointer to raster tile.
63     /// @param x, y - Coordinates to raster tile.
64     /// @param output - output color
SetSwizzledDstColorLoadRasterTile65     INLINE static void SetSwizzledDstColor(
66         const float srcColor[4],
67         uint32_t x, uint32_t y,
68         uint8_t* pDst)
69     {
70         typedef SimdTile_16<DstFormat, SrcFormat> SimdT;
71 
72         SimdT* pDstSimdTiles = (SimdT*)pDst;
73 
74         // Compute which simd tile we're accessing within 8x8 tile.
75         //   i.e. Compute linear simd tile coordinate given (x, y) in pixel coordinates.
76         uint32_t simdIndex = (y / SIMD16_TILE_Y_DIM) * (KNOB_TILE_X_DIM / SIMD16_TILE_X_DIM) + (x / SIMD16_TILE_X_DIM);
77 
78         SimdT* pSimdTile = &pDstSimdTiles[simdIndex];
79 
80         uint32_t simdOffset = (y % SIMD16_TILE_Y_DIM) * SIMD16_TILE_X_DIM + (x % SIMD16_TILE_X_DIM);
81 
82         pSimdTile->SetSwizzledColor(simdOffset, srcColor);
83     }
84 
85     //////////////////////////////////////////////////////////////////////////
86     /// @brief Loads an 8x8 raster tile from the src surface.
87     /// @param pSrcSurface - Src surface state
88     /// @param pDst - Destination hot tile pointer
89     /// @param x, y - Coordinates to raster tile.
LoadLoadRasterTile90     INLINE static void Load(
91         const SWR_SURFACE_STATE* pSrcSurface,
92         uint8_t* pDst,
93         uint32_t x, uint32_t y, uint32_t sampleNum, uint32_t renderTargetArrayIndex) // (x, y) pixel coordinate to start of raster tile.
94     {
95         uint32_t lodWidth = (pSrcSurface->width == 1) ? 1 : pSrcSurface->width >> pSrcSurface->lod;
96         uint32_t lodHeight = (pSrcSurface->height == 1) ? 1 : pSrcSurface->height >> pSrcSurface->lod;
97 
98         // For each raster tile pixel (rx, ry)
99         for (uint32_t ry = 0; ry < KNOB_TILE_Y_DIM; ++ry)
100         {
101             for (uint32_t rx = 0; rx < KNOB_TILE_X_DIM; ++rx)
102             {
103                 if (((x + rx) < lodWidth) &&
104                     ((y + ry) < lodHeight))
105                 {
106                     uint8_t* pSrc = (uint8_t*)ComputeSurfaceAddress<false, true>(x + rx, y + ry, pSrcSurface->arrayIndex + renderTargetArrayIndex,
107                             pSrcSurface->arrayIndex + renderTargetArrayIndex, sampleNum,
108                             pSrcSurface->lod, pSrcSurface);
109 
110                     float srcColor[4];
111                     ConvertPixelToFloat<SrcFormat>(srcColor, pSrc);
112 
113                     // store pixel to hottile
114                     SetSwizzledDstColor(srcColor, rx, ry, pDst);
115                 }
116             }
117         }
118     }
119 };
120 
121 //////////////////////////////////////////////////////////////////////////
122 /// LoadMacroTile - Loads a macro tile which consists of raster tiles.
123 //////////////////////////////////////////////////////////////////////////
124 template<typename TTraits, SWR_FORMAT SrcFormat, SWR_FORMAT DstFormat>
125 struct LoadMacroTile
126 {
127     //////////////////////////////////////////////////////////////////////////
128     /// @brief Load a macrotile to the destination surface.
129     /// @param pSrc - Pointer to macro tile.
130     /// @param pDstSurface - Destination surface state
131     /// @param x, y - Coordinates to macro tile
LoadLoadMacroTile132     static void Load(
133         const SWR_SURFACE_STATE* pSrcSurface,
134         uint8_t *pDstHotTile,
135         uint32_t x, uint32_t y, uint32_t renderTargetArrayIndex)
136     {
137         PFN_LOAD_RASTER_TILES loadRasterTileFn;
138         loadRasterTileFn = LoadRasterTile<TTraits, SrcFormat, DstFormat>::Load;
139 
140         // Load each raster tile from the hot tile to the destination surface.
141         for (uint32_t row = 0; row < KNOB_MACROTILE_Y_DIM; row += KNOB_TILE_Y_DIM)
142         {
143             for (uint32_t col = 0; col < KNOB_MACROTILE_X_DIM; col += KNOB_TILE_X_DIM)
144             {
145                 for (uint32_t sampleNum = 0; sampleNum < pSrcSurface->numSamples; sampleNum++)
146                 {
147                     loadRasterTileFn(pSrcSurface, pDstHotTile, (x + col), (y + row), sampleNum, renderTargetArrayIndex);
148                     pDstHotTile += KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * (FormatTraits<DstFormat>::bpp / 8);
149                 }
150             }
151         }
152     }
153 };
154 
155 //////////////////////////////////////////////////////////////////////////
156 /// InitLoadTileColorTable - Helper function for setting up the tables.
157 template<SWR_TILE_MODE TTileMode>
InitLoadTileColorTable(PFN_LOAD_TILES (& table)[NUM_SWR_FORMATS])158 static INLINE void InitLoadTileColorTable(PFN_LOAD_TILES (&table)[NUM_SWR_FORMATS])
159 {
160     memset(table, 0, sizeof(table));
161 
162     table[R32G32B32A32_FLOAT]              = LoadMacroTile<TilingTraits<TTileMode, 128>, R32G32B32A32_FLOAT, R32G32B32A32_FLOAT>::Load;
163     table[R32G32B32A32_SINT]               = LoadMacroTile<TilingTraits<TTileMode, 128>, R32G32B32A32_SINT, R32G32B32A32_FLOAT>::Load;
164     table[R32G32B32A32_UINT]               = LoadMacroTile<TilingTraits<TTileMode, 128>, R32G32B32A32_UINT, R32G32B32A32_FLOAT>::Load;
165     table[R32G32B32X32_FLOAT]              = LoadMacroTile<TilingTraits<TTileMode, 128>, R32G32B32X32_FLOAT, R32G32B32A32_FLOAT>::Load;
166     table[R32G32B32A32_SSCALED]            = LoadMacroTile<TilingTraits<TTileMode, 128>, R32G32B32A32_SSCALED, R32G32B32A32_FLOAT>::Load;
167     table[R32G32B32A32_USCALED]            = LoadMacroTile<TilingTraits<TTileMode, 128>, R32G32B32A32_USCALED, R32G32B32A32_FLOAT>::Load;
168     table[R32G32B32_FLOAT]                 = LoadMacroTile<TilingTraits<TTileMode, 96>, R32G32B32_FLOAT, R32G32B32A32_FLOAT>::Load;
169     table[R32G32B32_SINT]                  = LoadMacroTile<TilingTraits<TTileMode, 96>, R32G32B32_SINT, R32G32B32A32_FLOAT>::Load;
170     table[R32G32B32_UINT]                  = LoadMacroTile<TilingTraits<TTileMode, 96>, R32G32B32_UINT, R32G32B32A32_FLOAT>::Load;
171     table[R32G32B32_SSCALED]               = LoadMacroTile<TilingTraits<TTileMode, 96>, R32G32B32_SSCALED, R32G32B32A32_FLOAT>::Load;
172     table[R32G32B32_USCALED]               = LoadMacroTile<TilingTraits<TTileMode, 96>, R32G32B32_USCALED, R32G32B32A32_FLOAT>::Load;
173     table[R16G16B16A16_UNORM]              = LoadMacroTile<TilingTraits<TTileMode, 64>, R16G16B16A16_UNORM, R32G32B32A32_FLOAT>::Load;
174     table[R16G16B16A16_SNORM]              = LoadMacroTile<TilingTraits<TTileMode, 64>, R16G16B16A16_SNORM, R32G32B32A32_FLOAT>::Load;
175     table[R16G16B16A16_SINT]               = LoadMacroTile<TilingTraits<TTileMode, 64>, R16G16B16A16_SINT, R32G32B32A32_FLOAT>::Load;
176     table[R16G16B16A16_UINT]               = LoadMacroTile<TilingTraits<TTileMode, 64>, R16G16B16A16_UINT, R32G32B32A32_FLOAT>::Load;
177     table[R16G16B16A16_FLOAT]              = LoadMacroTile<TilingTraits<TTileMode, 64>, R16G16B16A16_FLOAT, R32G32B32A32_FLOAT>::Load;
178     table[R32G32_FLOAT]                    = LoadMacroTile<TilingTraits<TTileMode, 64>, R32G32_FLOAT, R32G32B32A32_FLOAT>::Load;
179     table[R32G32_SINT]                     = LoadMacroTile<TilingTraits<TTileMode, 64>, R32G32_SINT, R32G32B32A32_FLOAT>::Load;
180     table[R32G32_UINT]                     = LoadMacroTile<TilingTraits<TTileMode, 64>, R32G32_UINT, R32G32B32A32_FLOAT>::Load;
181     table[R32_FLOAT_X8X24_TYPELESS]        = LoadMacroTile<TilingTraits<TTileMode, 64>, R32_FLOAT_X8X24_TYPELESS, R32G32B32A32_FLOAT>::Load;
182     table[X32_TYPELESS_G8X24_UINT]         = LoadMacroTile<TilingTraits<TTileMode, 64>, X32_TYPELESS_G8X24_UINT, R32G32B32A32_FLOAT>::Load;
183     table[L32A32_FLOAT]                    = LoadMacroTile<TilingTraits<TTileMode, 64>, L32A32_FLOAT, R32G32B32A32_FLOAT>::Load;
184     table[R16G16B16X16_UNORM]              = LoadMacroTile<TilingTraits<TTileMode, 64>, R16G16B16X16_UNORM, R32G32B32A32_FLOAT>::Load;
185     table[R16G16B16X16_FLOAT]              = LoadMacroTile<TilingTraits<TTileMode, 64>, R16G16B16X16_FLOAT, R32G32B32A32_FLOAT>::Load;
186     table[L32X32_FLOAT]                    = LoadMacroTile<TilingTraits<TTileMode, 64>, L32X32_FLOAT, R32G32B32A32_FLOAT>::Load;
187     table[I32X32_FLOAT]                    = LoadMacroTile<TilingTraits<TTileMode, 64>, I32X32_FLOAT, R32G32B32A32_FLOAT>::Load;
188     table[R16G16B16A16_SSCALED]            = LoadMacroTile<TilingTraits<TTileMode, 64>, R16G16B16A16_SSCALED, R32G32B32A32_FLOAT>::Load;
189     table[R16G16B16A16_USCALED]            = LoadMacroTile<TilingTraits<TTileMode, 64>, R16G16B16A16_USCALED, R32G32B32A32_FLOAT>::Load;
190     table[R32G32_SSCALED]                  = LoadMacroTile<TilingTraits<TTileMode, 64>, R32G32_SSCALED, R32G32B32A32_FLOAT>::Load;
191     table[R32G32_USCALED]                  = LoadMacroTile<TilingTraits<TTileMode, 64>, R32G32_USCALED, R32G32B32A32_FLOAT>::Load;
192     table[B8G8R8A8_UNORM]                  = LoadMacroTile<TilingTraits<TTileMode, 32>, B8G8R8A8_UNORM, R32G32B32A32_FLOAT>::Load;
193     table[B8G8R8A8_UNORM_SRGB]             = LoadMacroTile<TilingTraits<TTileMode, 32>, B8G8R8A8_UNORM_SRGB, R32G32B32A32_FLOAT>::Load;
194     table[R10G10B10A2_UNORM]               = LoadMacroTile<TilingTraits<TTileMode, 32>, R10G10B10A2_UNORM, R32G32B32A32_FLOAT>::Load;
195     table[R10G10B10A2_UNORM_SRGB]          = LoadMacroTile<TilingTraits<TTileMode, 32>, R10G10B10A2_UNORM_SRGB, R32G32B32A32_FLOAT>::Load;
196     table[R10G10B10A2_UINT]                = LoadMacroTile<TilingTraits<TTileMode, 32>, R10G10B10A2_UINT, R32G32B32A32_FLOAT>::Load;
197     table[R8G8B8A8_UNORM]                  = LoadMacroTile<TilingTraits<TTileMode, 32>, R8G8B8A8_UNORM, R32G32B32A32_FLOAT>::Load;
198     table[R8G8B8A8_UNORM_SRGB]             = LoadMacroTile<TilingTraits<TTileMode, 32>, R8G8B8A8_UNORM_SRGB, R32G32B32A32_FLOAT>::Load;
199     table[R8G8B8A8_SNORM]                  = LoadMacroTile<TilingTraits<TTileMode, 32>, R8G8B8A8_SNORM, R32G32B32A32_FLOAT>::Load;
200     table[R8G8B8A8_SINT]                   = LoadMacroTile<TilingTraits<TTileMode, 32>, R8G8B8A8_SINT, R32G32B32A32_FLOAT>::Load;
201     table[R8G8B8A8_UINT]                   = LoadMacroTile<TilingTraits<TTileMode, 32>, R8G8B8A8_UINT, R32G32B32A32_FLOAT>::Load;
202     table[R16G16_UNORM]                    = LoadMacroTile<TilingTraits<TTileMode, 32>, R16G16_UNORM, R32G32B32A32_FLOAT>::Load;
203     table[R16G16_SNORM]                    = LoadMacroTile<TilingTraits<TTileMode, 32>, R16G16_SNORM, R32G32B32A32_FLOAT>::Load;
204     table[R16G16_SINT]                     = LoadMacroTile<TilingTraits<TTileMode, 32>, R16G16_SINT, R32G32B32A32_FLOAT>::Load;
205     table[R16G16_UINT]                     = LoadMacroTile<TilingTraits<TTileMode, 32>, R16G16_UINT, R32G32B32A32_FLOAT>::Load;
206     table[R16G16_FLOAT]                    = LoadMacroTile<TilingTraits<TTileMode, 32>, R16G16_FLOAT, R32G32B32A32_FLOAT>::Load;
207     table[B10G10R10A2_UNORM]               = LoadMacroTile<TilingTraits<TTileMode, 32>, B10G10R10A2_UNORM, R32G32B32A32_FLOAT>::Load;
208     table[B10G10R10A2_UNORM_SRGB]          = LoadMacroTile<TilingTraits<TTileMode, 32>, B10G10R10A2_UNORM_SRGB, R32G32B32A32_FLOAT>::Load;
209     table[R11G11B10_FLOAT]                 = LoadMacroTile<TilingTraits<TTileMode, 32>, R11G11B10_FLOAT, R32G32B32A32_FLOAT>::Load;
210     table[R10G10B10_FLOAT_A2_UNORM]        = LoadMacroTile<TilingTraits<TTileMode, 32>, R10G10B10_FLOAT_A2_UNORM, R32G32B32A32_FLOAT>::Load;
211     table[R32_SINT]                        = LoadMacroTile<TilingTraits<TTileMode, 32>, R32_SINT, R32G32B32A32_FLOAT>::Load;
212     table[R32_UINT]                        = LoadMacroTile<TilingTraits<TTileMode, 32>, R32_UINT, R32G32B32A32_FLOAT>::Load;
213     table[R32_FLOAT]                       = LoadMacroTile<TilingTraits<TTileMode, 32>, R32_FLOAT, R32G32B32A32_FLOAT>::Load;
214     table[R24_UNORM_X8_TYPELESS]           = LoadMacroTile<TilingTraits<TTileMode, 32>, R24_UNORM_X8_TYPELESS, R32G32B32A32_FLOAT>::Load;
215     table[X24_TYPELESS_G8_UINT]            = LoadMacroTile<TilingTraits<TTileMode, 32>, X24_TYPELESS_G8_UINT, R32G32B32A32_FLOAT>::Load;
216     table[L32_UNORM]                       = LoadMacroTile<TilingTraits<TTileMode, 32>, L32_UNORM, R32G32B32A32_FLOAT>::Load;
217     table[L16A16_UNORM]                    = LoadMacroTile<TilingTraits<TTileMode, 32>, L16A16_UNORM, R32G32B32A32_FLOAT>::Load;
218     table[I24X8_UNORM]                     = LoadMacroTile<TilingTraits<TTileMode, 32>, I24X8_UNORM, R32G32B32A32_FLOAT>::Load;
219     table[L24X8_UNORM]                     = LoadMacroTile<TilingTraits<TTileMode, 32>, L24X8_UNORM, R32G32B32A32_FLOAT>::Load;
220     table[I32_FLOAT]                       = LoadMacroTile<TilingTraits<TTileMode, 32>, I32_FLOAT, R32G32B32A32_FLOAT>::Load;
221     table[L32_FLOAT]                       = LoadMacroTile<TilingTraits<TTileMode, 32>, L32_FLOAT, R32G32B32A32_FLOAT>::Load;
222     table[A32_FLOAT]                       = LoadMacroTile<TilingTraits<TTileMode, 32>, A32_FLOAT, R32G32B32A32_FLOAT>::Load;
223     table[B8G8R8X8_UNORM]                  = LoadMacroTile<TilingTraits<TTileMode, 32>, B8G8R8X8_UNORM, R32G32B32A32_FLOAT>::Load;
224     table[B8G8R8X8_UNORM_SRGB]             = LoadMacroTile<TilingTraits<TTileMode, 32>, B8G8R8X8_UNORM_SRGB, R32G32B32A32_FLOAT>::Load;
225     table[R8G8B8X8_UNORM]                  = LoadMacroTile<TilingTraits<TTileMode, 32>, R8G8B8X8_UNORM, R32G32B32A32_FLOAT>::Load;
226     table[R8G8B8X8_UNORM_SRGB]             = LoadMacroTile<TilingTraits<TTileMode, 32>, R8G8B8X8_UNORM_SRGB, R32G32B32A32_FLOAT>::Load;
227     table[R9G9B9E5_SHAREDEXP]              = LoadMacroTile<TilingTraits<TTileMode, 32>, R9G9B9E5_SHAREDEXP, R32G32B32A32_FLOAT>::Load;
228     table[B10G10R10X2_UNORM]               = LoadMacroTile<TilingTraits<TTileMode, 32>, B10G10R10X2_UNORM, R32G32B32A32_FLOAT>::Load;
229     table[L16A16_FLOAT]                    = LoadMacroTile<TilingTraits<TTileMode, 32>, L16A16_FLOAT, R32G32B32A32_FLOAT>::Load;
230     table[R10G10B10X2_USCALED]             = LoadMacroTile<TilingTraits<TTileMode, 32>, R10G10B10X2_USCALED, R32G32B32A32_FLOAT>::Load;
231     table[R8G8B8A8_SSCALED]                = LoadMacroTile<TilingTraits<TTileMode, 32>, R8G8B8A8_SSCALED, R32G32B32A32_FLOAT>::Load;
232     table[R8G8B8A8_USCALED]                = LoadMacroTile<TilingTraits<TTileMode, 32>, R8G8B8A8_USCALED, R32G32B32A32_FLOAT>::Load;
233     table[R16G16_SSCALED]                  = LoadMacroTile<TilingTraits<TTileMode, 32>, R16G16_SSCALED, R32G32B32A32_FLOAT>::Load;
234     table[R16G16_USCALED]                  = LoadMacroTile<TilingTraits<TTileMode, 32>, R16G16_USCALED, R32G32B32A32_FLOAT>::Load;
235     table[R32_SSCALED]                     = LoadMacroTile<TilingTraits<TTileMode, 32>, R32_SSCALED, R32G32B32A32_FLOAT>::Load;
236     table[R32_USCALED]                     = LoadMacroTile<TilingTraits<TTileMode, 32>, R32_USCALED, R32G32B32A32_FLOAT>::Load;
237     table[B5G6R5_UNORM]                    = LoadMacroTile<TilingTraits<TTileMode, 16>, B5G6R5_UNORM, R32G32B32A32_FLOAT>::Load;
238     table[B5G6R5_UNORM_SRGB]               = LoadMacroTile<TilingTraits<TTileMode, 16>, B5G6R5_UNORM_SRGB, R32G32B32A32_FLOAT>::Load;
239     table[B5G5R5A1_UNORM]                  = LoadMacroTile<TilingTraits<TTileMode, 16>, B5G5R5A1_UNORM, R32G32B32A32_FLOAT>::Load;
240     table[B5G5R5A1_UNORM_SRGB]             = LoadMacroTile<TilingTraits<TTileMode, 16>, B5G5R5A1_UNORM_SRGB, R32G32B32A32_FLOAT>::Load;
241     table[B4G4R4A4_UNORM]                  = LoadMacroTile<TilingTraits<TTileMode, 16>, B4G4R4A4_UNORM, R32G32B32A32_FLOAT>::Load;
242     table[B4G4R4A4_UNORM_SRGB]             = LoadMacroTile<TilingTraits<TTileMode, 16>, B4G4R4A4_UNORM_SRGB, R32G32B32A32_FLOAT>::Load;
243     table[R8G8_UNORM]                      = LoadMacroTile<TilingTraits<TTileMode, 16>, R8G8_UNORM, R32G32B32A32_FLOAT>::Load;
244     table[R8G8_SNORM]                      = LoadMacroTile<TilingTraits<TTileMode, 16>, R8G8_SNORM, R32G32B32A32_FLOAT>::Load;
245     table[R8G8_SINT]                       = LoadMacroTile<TilingTraits<TTileMode, 16>, R8G8_SINT, R32G32B32A32_FLOAT>::Load;
246     table[R8G8_UINT]                       = LoadMacroTile<TilingTraits<TTileMode, 16>, R8G8_UINT, R32G32B32A32_FLOAT>::Load;
247     table[R16_UNORM]                       = LoadMacroTile<TilingTraits<TTileMode, 16>, R16_UNORM, R32G32B32A32_FLOAT>::Load;
248     table[R16_SNORM]                       = LoadMacroTile<TilingTraits<TTileMode, 16>, R16_SNORM, R32G32B32A32_FLOAT>::Load;
249     table[R16_SINT]                        = LoadMacroTile<TilingTraits<TTileMode, 16>, R16_SINT, R32G32B32A32_FLOAT>::Load;
250     table[R16_UINT]                        = LoadMacroTile<TilingTraits<TTileMode, 16>, R16_UINT, R32G32B32A32_FLOAT>::Load;
251     table[R16_FLOAT]                       = LoadMacroTile<TilingTraits<TTileMode, 16>, R16_FLOAT, R32G32B32A32_FLOAT>::Load;
252     table[I16_UNORM]                       = LoadMacroTile<TilingTraits<TTileMode, 16>, I16_UNORM, R32G32B32A32_FLOAT>::Load;
253     table[L16_UNORM]                       = LoadMacroTile<TilingTraits<TTileMode, 16>, L16_UNORM, R32G32B32A32_FLOAT>::Load;
254     table[A16_UNORM]                       = LoadMacroTile<TilingTraits<TTileMode, 16>, A16_UNORM, R32G32B32A32_FLOAT>::Load;
255     table[L8A8_UNORM]                      = LoadMacroTile<TilingTraits<TTileMode, 16>, L8A8_UNORM, R32G32B32A32_FLOAT>::Load;
256     table[I16_FLOAT]                       = LoadMacroTile<TilingTraits<TTileMode, 16>, I16_FLOAT, R32G32B32A32_FLOAT>::Load;
257     table[L16_FLOAT]                       = LoadMacroTile<TilingTraits<TTileMode, 16>, L16_FLOAT, R32G32B32A32_FLOAT>::Load;
258     table[A16_FLOAT]                       = LoadMacroTile<TilingTraits<TTileMode, 16>, A16_FLOAT, R32G32B32A32_FLOAT>::Load;
259     table[L8A8_UNORM_SRGB]                 = LoadMacroTile<TilingTraits<TTileMode, 16>, L8A8_UNORM_SRGB, R32G32B32A32_FLOAT>::Load;
260     table[B5G5R5X1_UNORM]                  = LoadMacroTile<TilingTraits<TTileMode, 16>, B5G5R5X1_UNORM, R32G32B32A32_FLOAT>::Load;
261     table[B5G5R5X1_UNORM_SRGB]             = LoadMacroTile<TilingTraits<TTileMode, 16>, B5G5R5X1_UNORM_SRGB, R32G32B32A32_FLOAT>::Load;
262     table[R8G8_SSCALED]                    = LoadMacroTile<TilingTraits<TTileMode, 16>, R8G8_SSCALED, R32G32B32A32_FLOAT>::Load;
263     table[R8G8_USCALED]                    = LoadMacroTile<TilingTraits<TTileMode, 16>, R8G8_USCALED, R32G32B32A32_FLOAT>::Load;
264     table[R16_SSCALED]                     = LoadMacroTile<TilingTraits<TTileMode, 16>, R16_SSCALED, R32G32B32A32_FLOAT>::Load;
265     table[R16_USCALED]                     = LoadMacroTile<TilingTraits<TTileMode, 16>, R16_USCALED, R32G32B32A32_FLOAT>::Load;
266     table[A1B5G5R5_UNORM]                  = LoadMacroTile<TilingTraits<TTileMode, 16>, A1B5G5R5_UNORM, R32G32B32A32_FLOAT>::Load;
267     table[A4B4G4R4_UNORM]                  = LoadMacroTile<TilingTraits<TTileMode, 16>, A4B4G4R4_UNORM, R32G32B32A32_FLOAT>::Load;
268     table[L8A8_UINT]                       = LoadMacroTile<TilingTraits<TTileMode, 16>, L8A8_UINT, R32G32B32A32_FLOAT>::Load;
269     table[L8A8_SINT]                       = LoadMacroTile<TilingTraits<TTileMode, 16>, L8A8_SINT, R32G32B32A32_FLOAT>::Load;
270     table[R8_UNORM]                        = LoadMacroTile<TilingTraits<TTileMode, 8>, R8_UNORM, R32G32B32A32_FLOAT>::Load;
271     table[R8_SNORM]                        = LoadMacroTile<TilingTraits<TTileMode, 8>, R8_SNORM, R32G32B32A32_FLOAT>::Load;
272     table[R8_SINT]                         = LoadMacroTile<TilingTraits<TTileMode, 8>, R8_SINT, R32G32B32A32_FLOAT>::Load;
273     table[R8_UINT]                         = LoadMacroTile<TilingTraits<TTileMode, 8>, R8_UINT, R32G32B32A32_FLOAT>::Load;
274     table[A8_UNORM]                        = LoadMacroTile<TilingTraits<TTileMode, 8>, A8_UNORM, R32G32B32A32_FLOAT>::Load;
275     table[I8_UNORM]                        = LoadMacroTile<TilingTraits<TTileMode, 8>, I8_UNORM, R32G32B32A32_FLOAT>::Load;
276     table[L8_UNORM]                        = LoadMacroTile<TilingTraits<TTileMode, 8>, L8_UNORM, R32G32B32A32_FLOAT>::Load;
277     table[R8_SSCALED]                      = LoadMacroTile<TilingTraits<TTileMode, 8>, R8_SSCALED, R32G32B32A32_FLOAT>::Load;
278     table[R8_USCALED]                      = LoadMacroTile<TilingTraits<TTileMode, 8>, R8_USCALED, R32G32B32A32_FLOAT>::Load;
279     table[L8_UNORM_SRGB]                   = LoadMacroTile<TilingTraits<TTileMode, 8>, L8_UNORM_SRGB, R32G32B32A32_FLOAT>::Load;
280     table[L8_UINT]                         = LoadMacroTile<TilingTraits<TTileMode, 8>, L8_UINT, R32G32B32A32_FLOAT>::Load;
281     table[L8_SINT]                         = LoadMacroTile<TilingTraits<TTileMode, 8>, L8_SINT, R32G32B32A32_FLOAT>::Load;
282     table[I8_UINT]                         = LoadMacroTile<TilingTraits<TTileMode, 8>, I8_UINT, R32G32B32A32_FLOAT>::Load;
283     table[I8_SINT]                         = LoadMacroTile<TilingTraits<TTileMode, 8>, I8_SINT, R32G32B32A32_FLOAT>::Load;
284     table[YCRCB_SWAPUVY]                   = LoadMacroTile<TilingTraits<TTileMode, 32>, YCRCB_SWAPUVY, R32G32B32A32_FLOAT>::Load;
285     table[BC1_UNORM]                       = LoadMacroTile<TilingTraits<TTileMode, 64>, BC1_UNORM, R32G32B32A32_FLOAT>::Load;
286     table[BC2_UNORM]                       = LoadMacroTile<TilingTraits<TTileMode, 128>, BC2_UNORM, R32G32B32A32_FLOAT>::Load;
287     table[BC3_UNORM]                       = LoadMacroTile<TilingTraits<TTileMode, 128>, BC3_UNORM, R32G32B32A32_FLOAT>::Load;
288     table[BC4_UNORM]                       = LoadMacroTile<TilingTraits<TTileMode, 64>, BC4_UNORM, R32G32B32A32_FLOAT>::Load;
289     table[BC5_UNORM]                       = LoadMacroTile<TilingTraits<TTileMode, 128>, BC5_UNORM, R32G32B32A32_FLOAT>::Load;
290     table[BC1_UNORM_SRGB]                  = LoadMacroTile<TilingTraits<TTileMode, 64>, BC1_UNORM_SRGB, R32G32B32A32_FLOAT>::Load;
291     table[BC2_UNORM_SRGB]                  = LoadMacroTile<TilingTraits<TTileMode, 128>, BC2_UNORM_SRGB, R32G32B32A32_FLOAT>::Load;
292     table[BC3_UNORM_SRGB]                  = LoadMacroTile<TilingTraits<TTileMode, 128>, BC3_UNORM_SRGB, R32G32B32A32_FLOAT>::Load;
293     table[YCRCB_SWAPUV]                    = LoadMacroTile<TilingTraits<TTileMode, 32>, YCRCB_SWAPUV, R32G32B32A32_FLOAT>::Load;
294     table[R8G8B8_UNORM]                    = LoadMacroTile<TilingTraits<TTileMode, 24>, R8G8B8_UNORM, R32G32B32A32_FLOAT>::Load;
295     table[R8G8B8_SNORM]                    = LoadMacroTile<TilingTraits<TTileMode, 24>, R8G8B8_SNORM, R32G32B32A32_FLOAT>::Load;
296     table[R8G8B8_SSCALED]                  = LoadMacroTile<TilingTraits<TTileMode, 24>, R8G8B8_SSCALED, R32G32B32A32_FLOAT>::Load;
297     table[R8G8B8_USCALED]                  = LoadMacroTile<TilingTraits<TTileMode, 24>, R8G8B8_USCALED, R32G32B32A32_FLOAT>::Load;
298     table[BC4_SNORM]                       = LoadMacroTile<TilingTraits<TTileMode, 64>, BC4_SNORM, R32G32B32A32_FLOAT>::Load;
299     table[BC5_SNORM]                       = LoadMacroTile<TilingTraits<TTileMode, 128>, BC5_SNORM, R32G32B32A32_FLOAT>::Load;
300     table[R16G16B16_FLOAT]                 = LoadMacroTile<TilingTraits<TTileMode, 48>, R16G16B16_FLOAT, R32G32B32A32_FLOAT>::Load;
301     table[R16G16B16_UNORM]                 = LoadMacroTile<TilingTraits<TTileMode, 48>, R16G16B16_UNORM, R32G32B32A32_FLOAT>::Load;
302     table[R16G16B16_SNORM]                 = LoadMacroTile<TilingTraits<TTileMode, 48>, R16G16B16_SNORM, R32G32B32A32_FLOAT>::Load;
303     table[R16G16B16_SSCALED]               = LoadMacroTile<TilingTraits<TTileMode, 48>, R16G16B16_SSCALED, R32G32B32A32_FLOAT>::Load;
304     table[R16G16B16_USCALED]               = LoadMacroTile<TilingTraits<TTileMode, 48>, R16G16B16_USCALED, R32G32B32A32_FLOAT>::Load;
305     table[BC6H_SF16]                       = LoadMacroTile<TilingTraits<TTileMode, 128>, BC6H_SF16, R32G32B32A32_FLOAT>::Load;
306     table[BC7_UNORM]                       = LoadMacroTile<TilingTraits<TTileMode, 128>, BC7_UNORM, R32G32B32A32_FLOAT>::Load;
307     table[BC7_UNORM_SRGB]                  = LoadMacroTile<TilingTraits<TTileMode, 128>, BC7_UNORM_SRGB, R32G32B32A32_FLOAT>::Load;
308     table[BC6H_UF16]                       = LoadMacroTile<TilingTraits<TTileMode, 128>, BC6H_UF16, R32G32B32A32_FLOAT>::Load;
309     table[R8G8B8_UNORM_SRGB]               = LoadMacroTile<TilingTraits<TTileMode, 24>, R8G8B8_UNORM_SRGB, R32G32B32A32_FLOAT>::Load;
310     table[R16G16B16_UINT]                  = LoadMacroTile<TilingTraits<TTileMode, 48>, R16G16B16_UINT, R32G32B32A32_FLOAT>::Load;
311     table[R16G16B16_SINT]                  = LoadMacroTile<TilingTraits<TTileMode, 48>, R16G16B16_SINT, R32G32B32A32_FLOAT>::Load;
312     table[R10G10B10A2_SNORM]               = LoadMacroTile<TilingTraits<TTileMode, 32>, R10G10B10A2_SNORM, R32G32B32A32_FLOAT>::Load;
313     table[R10G10B10A2_USCALED]             = LoadMacroTile<TilingTraits<TTileMode, 32>, R10G10B10A2_USCALED, R32G32B32A32_FLOAT>::Load;
314     table[R10G10B10A2_SSCALED]             = LoadMacroTile<TilingTraits<TTileMode, 32>, R10G10B10A2_SSCALED, R32G32B32A32_FLOAT>::Load;
315     table[R10G10B10A2_SINT]                = LoadMacroTile<TilingTraits<TTileMode, 32>, R10G10B10A2_SINT, R32G32B32A32_FLOAT>::Load;
316     table[B10G10R10A2_SNORM]               = LoadMacroTile<TilingTraits<TTileMode, 32>, B10G10R10A2_SNORM, R32G32B32A32_FLOAT>::Load;
317     table[B10G10R10A2_USCALED]             = LoadMacroTile<TilingTraits<TTileMode, 32>, B10G10R10A2_USCALED, R32G32B32A32_FLOAT>::Load;
318     table[B10G10R10A2_SSCALED]             = LoadMacroTile<TilingTraits<TTileMode, 32>, B10G10R10A2_SSCALED, R32G32B32A32_FLOAT>::Load;
319     table[B10G10R10A2_UINT]                = LoadMacroTile<TilingTraits<TTileMode, 32>, B10G10R10A2_UINT, R32G32B32A32_FLOAT>::Load;
320     table[B10G10R10A2_SINT]                = LoadMacroTile<TilingTraits<TTileMode, 32>, B10G10R10A2_SINT, R32G32B32A32_FLOAT>::Load;
321     table[R8G8B8_UINT]                     = LoadMacroTile<TilingTraits<TTileMode, 24>, R8G8B8_UINT, R32G32B32A32_FLOAT>::Load;
322     table[R8G8B8_SINT]                     = LoadMacroTile<TilingTraits<TTileMode, 24>, R8G8B8_SINT, R32G32B32A32_FLOAT>::Load;
323     table[RAW]                             = LoadMacroTile<TilingTraits<TTileMode, 8>, RAW, R32G32B32A32_FLOAT>::Load;
324 }
325 
326 //////////////////////////////////////////////////////////////////////////
327 /// InitLoadTileColorTable - Helper function for setting up the tables.
328 template<SWR_TILE_MODE TTileMode>
InitLoadTileDepthTable(PFN_LOAD_TILES (& table)[NUM_SWR_FORMATS])329 static INLINE void InitLoadTileDepthTable(PFN_LOAD_TILES(&table)[NUM_SWR_FORMATS])
330 {
331     memset(table, 0, sizeof(table));
332 
333    table[R32_FLOAT]                       = LoadMacroTile<TilingTraits<TTileMode, 32>, R32_FLOAT, R32_FLOAT>::Load;
334    table[R32_FLOAT_X8X24_TYPELESS]        = LoadMacroTile<TilingTraits<TTileMode, 64>, R32_FLOAT_X8X24_TYPELESS, R32_FLOAT>::Load;
335    table[R24_UNORM_X8_TYPELESS]           = LoadMacroTile<TilingTraits<TTileMode, 32>, R24_UNORM_X8_TYPELESS, R32_FLOAT>::Load;
336    table[R16_UNORM]                       = LoadMacroTile<TilingTraits<TTileMode, 16>, R16_UNORM, R32_FLOAT>::Load;
337 }
338 
339 
340 //////////////////////////////////////////////////////////////////////////
341 /// @brief Loads a full hottile from a render surface
342 /// @param hPrivateContext - Handle to private DC
343 /// @param dstFormat - Format for hot tile.
344 /// @param renderTargetIndex - Index to src render target
345 /// @param x, y - Coordinates to raster tile.
346 /// @param pDstHotTile - Pointer to Hot Tile
347 void SwrLoadHotTile(
348         HANDLE hWorkerPrivateData,
349         const SWR_SURFACE_STATE *pSrcSurface,
350         BucketManager* pBucketMgr,
351         SWR_FORMAT dstFormat,
352         SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
353         uint32_t x, uint32_t y, uint32_t renderTargetArrayIndex,
354         uint8_t *pDstHotTile);
355