1 /* 2 * Copyright (c) 2019, Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included 12 * in all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 23 { 24 /* 25 Buffer layout after shuffle 26 _________________________________________________ 27 |_______Block0__________|_______Block1__________| 28 |_______Block2__________|_______Block3__________| 29 |_______Block4__________|_______Block5__________| 30 |_______Block6__________|_______Block7__________| 31 32 Write back buffer layout correlate to the block number#, each box stands for 1 GRF 33 _______________________________________________ 34 |____R0_________R1_____|____R2_________R3_____| 35 |____G0_________G1_____|____G2_________G3_____| 36 |____B0_________B1_____|____B2_________B3_____| 37 |____A0_________A1_____|____A2_________A3_____| 38 |____R4_________R5_____|____R6_________R7_____| 39 |____G4_________G5_____|____G6_________G7_____| 40 |____B4_________B5_____|____B6_________B7_____| 41 |____A4_________A5_____|____A6_________A7_____| 42 */ 43 44 matrix_ref<uint, 8, 8> Result = DataBuffer.format<uint, 96, 8>().select<8, 1, 8, 1>(64, 0); 45 46 SurfaceIndex Dst_Surface(MDF_FC_OUTPUT_BTI_START); 47 matrix_ref<uint, 4, 8> TempResult4x8_Top = Result.select<4, 1, 8, 1>(0, 0); 48 matrix_ref<uint, 4, 8> TempResult4x8_Bottom = Result.select<4, 1, 8, 1>(4, 0); 49 50 ushort TempA = DestinationRGBFormat << 8; 51 52 #pragma unroll 53 for (uchar i = 0; i < 2; i++, DstY += 8) 54 { 55 // First 8x8 56 { 57 58 // first 8x4 59 { 60 // R/G/B channel top half 61 matrix_ref<ushort, 1, 4> TempR0 = DataBuffer.select<1, 1, 4, 1>(8 * i, 0); 62 matrix_ref<ushort, 1, 4> TempR2 = DataBuffer.select<1, 1, 4, 1>(8 * i, 8); 63 matrix_ref<ushort, 1, 4> TempR4 = DataBuffer.select<1, 1, 4, 1>(8 * i, 16); 64 matrix_ref<ushort, 1, 4> TempR6 = DataBuffer.select<1, 1, 4, 1>(8 * i, 24); 65 66 matrix_ref<ushort, 1, 4> TempG0 = DataBuffer.select<1, 1, 4, 1>(8 * i, 32); 67 matrix_ref<ushort, 1, 4> TempG2 = DataBuffer.select<1, 1, 4, 1>(8 * i, 40); 68 matrix_ref<ushort, 1, 4> TempG4 = DataBuffer.select<1, 1, 4, 1>(8 * i, 48); 69 matrix_ref<ushort, 1, 4> TempG6 = DataBuffer.select<1, 1, 4, 1>(8 * i, 56); 70 71 matrix_ref<ushort, 1, 4> TempB0 = DataBuffer.select<1, 1, 4, 1>(8 * i + 1, 0); 72 matrix_ref<ushort, 1, 4> TempB2 = DataBuffer.select<1, 1, 4, 1>(8 * i + 1, 8); 73 matrix_ref<ushort, 1, 4> TempB4 = DataBuffer.select<1, 1, 4, 1>(8 * i + 1, 16); 74 matrix_ref<ushort, 1, 4> TempB6 = DataBuffer.select<1, 1, 4, 1>(8 * i + 1, 24); 75 76 TempResult4x8_Top.select<1, 1, 4, 2>(0, 0) = (TempB0) + (TempG0 << 16); 77 TempResult4x8_Top.select<1, 1, 4, 2>(0, 1) = (TempR0) + (TempA << 16); 78 79 TempResult4x8_Top.select<1, 1, 4, 2>(1, 0) = (TempB2) + (TempG2 << 16); 80 TempResult4x8_Top.select<1, 1, 4, 2>(1, 1) = (TempR2) + (TempA << 16); 81 82 TempResult4x8_Top.select<1, 1, 4, 2>(2, 0) = (TempB4) + (TempG4 << 16); 83 TempResult4x8_Top.select<1, 1, 4, 2>(2, 1) = (TempR4) + (TempA << 16); 84 85 TempResult4x8_Top.select<1, 1, 4, 2>(3, 0) = (TempB6) + (TempG6 << 16); 86 TempResult4x8_Top.select<1, 1, 4, 2>(3, 1) = (TempR6) + (TempA << 16); 87 88 // R/G/B channel bottom half 89 matrix_ref<ushort, 1, 4> TempR8 = DataBuffer.select<1, 1, 4, 1>(8 * i + 4, 0); 90 matrix_ref<ushort, 1, 4> TempR10 = DataBuffer.select<1, 1, 4, 1>(8 * i + 4, 8); 91 matrix_ref<ushort, 1, 4> TempR12 = DataBuffer.select<1, 1, 4, 1>(8 * i + 4, 16); 92 matrix_ref<ushort, 1, 4> TempR14 = DataBuffer.select<1, 1, 4, 1>(8 * i + 4, 24); 93 94 matrix_ref<ushort, 1, 4> TempG8 = DataBuffer.select<1, 1, 4, 1>(8 * i + 4, 32); 95 matrix_ref<ushort, 1, 4> TempG10 = DataBuffer.select<1, 1, 4, 1>(8 * i + 4, 40); 96 matrix_ref<ushort, 1, 4> TempG12 = DataBuffer.select<1, 1, 4, 1>(8 * i + 4, 48); 97 matrix_ref<ushort, 1, 4> TempG14 = DataBuffer.select<1, 1, 4, 1>(8 * i + 4, 56); 98 99 matrix_ref<ushort, 1, 4> TempB8 = DataBuffer.select<1, 1, 4, 1>(8 * i + 5, 0); 100 matrix_ref<ushort, 1, 4> TempB10 = DataBuffer.select<1, 1, 4, 1>(8 * i + 5, 8); 101 matrix_ref<ushort, 1, 4> TempB12 = DataBuffer.select<1, 1, 4, 1>(8 * i + 5, 16); 102 matrix_ref<ushort, 1, 4> TempB14 = DataBuffer.select<1, 1, 4, 1>(8 * i + 5, 24); 103 104 TempResult4x8_Bottom.select<1, 1, 4, 2>(0, 0) = (TempB8) + (TempG8 << 16); 105 TempResult4x8_Bottom.select<1, 1, 4, 2>(0, 1) = (TempR8) + (TempA << 16); 106 107 TempResult4x8_Bottom.select<1, 1, 4, 2>(1, 0) = (TempB10) + (TempG10 << 16); 108 TempResult4x8_Bottom.select<1, 1, 4, 2>(1, 1) = (TempR10) + (TempA << 16); 109 110 TempResult4x8_Bottom.select<1, 1, 4, 2>(2, 0) = (TempB12) + (TempG12 << 16); 111 TempResult4x8_Bottom.select<1, 1, 4, 2>(2, 1) = (TempR12) + (TempA << 16); 112 113 TempResult4x8_Bottom.select<1, 1, 4, 2>(3, 0) = (TempB14) + (TempG14 << 16); 114 TempResult4x8_Bottom.select<1, 1, 4, 2>(3, 1) = (TempR14) + (TempA << 16); 115 116 write(Dst_Surface, DstX * 8, DstY, Result); 117 } 118 119 // second 8x4 120 { 121 // R/G/B channel top half 122 matrix_ref<ushort, 1, 4> TempR0 = DataBuffer.select<1, 1, 4, 1>(8 * i, 0 + 4); 123 matrix_ref<ushort, 1, 4> TempR2 = DataBuffer.select<1, 1, 4, 1>(8 * i, 8 + 4); 124 matrix_ref<ushort, 1, 4> TempR4 = DataBuffer.select<1, 1, 4, 1>(8 * i, 16 + 4); 125 matrix_ref<ushort, 1, 4> TempR6 = DataBuffer.select<1, 1, 4, 1>(8 * i, 24 + 4); 126 127 matrix_ref<ushort, 1, 4> TempG0 = DataBuffer.select<1, 1, 4, 1>(8 * i, 32 + 4); 128 matrix_ref<ushort, 1, 4> TempG2 = DataBuffer.select<1, 1, 4, 1>(8 * i, 40 + 4); 129 matrix_ref<ushort, 1, 4> TempG4 = DataBuffer.select<1, 1, 4, 1>(8 * i, 48 + 4); 130 matrix_ref<ushort, 1, 4> TempG6 = DataBuffer.select<1, 1, 4, 1>(8 * i, 56 + 4); 131 132 matrix_ref<ushort, 1, 4> TempB0 = DataBuffer.select<1, 1, 4, 1>(8 * i + 1, 0 + 4); 133 matrix_ref<ushort, 1, 4> TempB2 = DataBuffer.select<1, 1, 4, 1>(8 * i + 1, 8 + 4); 134 matrix_ref<ushort, 1, 4> TempB4 = DataBuffer.select<1, 1, 4, 1>(8 * i + 1, 16 + 4); 135 matrix_ref<ushort, 1, 4> TempB6 = DataBuffer.select<1, 1, 4, 1>(8 * i + 1, 24 + 4); 136 137 TempResult4x8_Top.select<1, 1, 4, 2>(0, 0) = (TempB0) + (TempG0 << 16); 138 TempResult4x8_Top.select<1, 1, 4, 2>(0, 1) = (TempR0) + (TempA << 16); 139 140 TempResult4x8_Top.select<1, 1, 4, 2>(1, 0) = (TempB2) + (TempG2 << 16); 141 TempResult4x8_Top.select<1, 1, 4, 2>(1, 1) = (TempR2) + (TempA << 16); 142 143 TempResult4x8_Top.select<1, 1, 4, 2>(2, 0) = (TempB4) + (TempG4 << 16); 144 TempResult4x8_Top.select<1, 1, 4, 2>(2, 1) = (TempR4) + (TempA << 16); 145 146 TempResult4x8_Top.select<1, 1, 4, 2>(3, 0) = (TempB6) + (TempG6 << 16); 147 TempResult4x8_Top.select<1, 1, 4, 2>(3, 1) = (TempR6) + (TempA << 16); 148 149 // R/G/B channel bottom half 150 matrix_ref<ushort, 1, 4> TempR8 = DataBuffer.select<1, 1, 4, 1>(8 * i + 4, 0 + 4); 151 matrix_ref<ushort, 1, 4> TempR10 = DataBuffer.select<1, 1, 4, 1>(8 * i + 4, 8 + 4); 152 matrix_ref<ushort, 1, 4> TempR12 = DataBuffer.select<1, 1, 4, 1>(8 * i + 4, 16 + 4); 153 matrix_ref<ushort, 1, 4> TempR14 = DataBuffer.select<1, 1, 4, 1>(8 * i + 4, 24 + 4); 154 155 matrix_ref<ushort, 1, 4> TempG8 = DataBuffer.select<1, 1, 4, 1>(8 * i + 4, 32 + 4); 156 matrix_ref<ushort, 1, 4> TempG10 = DataBuffer.select<1, 1, 4, 1>(8 * i + 4, 40 + 4); 157 matrix_ref<ushort, 1, 4> TempG12 = DataBuffer.select<1, 1, 4, 1>(8 * i + 4, 48 + 4); 158 matrix_ref<ushort, 1, 4> TempG14 = DataBuffer.select<1, 1, 4, 1>(8 * i + 4, 56 + 4); 159 160 matrix_ref<ushort, 1, 4> TempB8 = DataBuffer.select<1, 1, 4, 1>(8 * i + 5, 0 + 4); 161 matrix_ref<ushort, 1, 4> TempB10 = DataBuffer.select<1, 1, 4, 1>(8 * i + 5, 8 + 4); 162 matrix_ref<ushort, 1, 4> TempB12 = DataBuffer.select<1, 1, 4, 1>(8 * i + 5, 16 + 4); 163 matrix_ref<ushort, 1, 4> TempB14 = DataBuffer.select<1, 1, 4, 1>(8 * i + 5, 24 + 4); 164 165 TempResult4x8_Bottom.select<1, 1, 4, 2>(0, 0) = (TempB8) + (TempG8 << 16); 166 TempResult4x8_Bottom.select<1, 1, 4, 2>(0, 1) = (TempR8) + (TempA << 16); 167 168 TempResult4x8_Bottom.select<1, 1, 4, 2>(1, 0) = (TempB10) + (TempG10 << 16); 169 TempResult4x8_Bottom.select<1, 1, 4, 2>(1, 1) = (TempR10) + (TempA << 16); 170 171 TempResult4x8_Bottom.select<1, 1, 4, 2>(2, 0) = (TempB12) + (TempG12 << 16); 172 TempResult4x8_Bottom.select<1, 1, 4, 2>(2, 1) = (TempR12) + (TempA << 16); 173 174 TempResult4x8_Bottom.select<1, 1, 4, 2>(3, 0) = (TempB14) + (TempG14 << 16); 175 TempResult4x8_Bottom.select<1, 1, 4, 2>(3, 1) = (TempR14) + (TempA << 16); 176 177 write(Dst_Surface, DstX * 8 + 32, DstY, Result); 178 } 179 } 180 181 // Second 8x8 182 { 183 // first 8x4 184 { 185 // R/G/B channel top half 186 matrix_ref<ushort, 1, 4> TempR0 = DataBuffer.select<1, 1, 4, 1>(8 * i + 2, 0); 187 matrix_ref<ushort, 1, 4> TempR2 = DataBuffer.select<1, 1, 4, 1>(8 * i + 2, 8); 188 matrix_ref<ushort, 1, 4> TempR4 = DataBuffer.select<1, 1, 4, 1>(8 * i + 2, 16); 189 matrix_ref<ushort, 1, 4> TempR6 = DataBuffer.select<1, 1, 4, 1>(8 * i + 2, 24); 190 191 matrix_ref<ushort, 1, 4> TempG0 = DataBuffer.select<1, 1, 4, 1>(8 * i + 2, 32); 192 matrix_ref<ushort, 1, 4> TempG2 = DataBuffer.select<1, 1, 4, 1>(8 * i + 2, 40); 193 matrix_ref<ushort, 1, 4> TempG4 = DataBuffer.select<1, 1, 4, 1>(8 * i + 2, 48); 194 matrix_ref<ushort, 1, 4> TempG6 = DataBuffer.select<1, 1, 4, 1>(8 * i + 2, 56); 195 196 matrix_ref<ushort, 1, 4> TempB0 = DataBuffer.select<1, 1, 4, 1>(8 * i + 3, 0); 197 matrix_ref<ushort, 1, 4> TempB2 = DataBuffer.select<1, 1, 4, 1>(8 * i + 3, 8); 198 matrix_ref<ushort, 1, 4> TempB4 = DataBuffer.select<1, 1, 4, 1>(8 * i + 3, 16); 199 matrix_ref<ushort, 1, 4> TempB6 = DataBuffer.select<1, 1, 4, 1>(8 * i + 3, 24); 200 201 TempResult4x8_Top.select<1, 1, 4, 2>(0, 0) = (TempB0) + (TempG0 << 16); 202 TempResult4x8_Top.select<1, 1, 4, 2>(0, 1) = (TempR0) + (TempA << 16); 203 204 TempResult4x8_Top.select<1, 1, 4, 2>(1, 0) = (TempB2) + (TempG2 << 16); 205 TempResult4x8_Top.select<1, 1, 4, 2>(1, 1) = (TempR2) + (TempA << 16); 206 207 TempResult4x8_Top.select<1, 1, 4, 2>(2, 0) = (TempB4) + (TempG4 << 16); 208 TempResult4x8_Top.select<1, 1, 4, 2>(2, 1) = (TempR4) + (TempA << 16); 209 210 TempResult4x8_Top.select<1, 1, 4, 2>(3, 0) = (TempB6) + (TempG6 << 16); 211 TempResult4x8_Top.select<1, 1, 4, 2>(3, 1) = (TempR6) + (TempA << 16); 212 213 // R/G/B channel bottom half 214 matrix_ref<ushort, 1, 4> TempR8 = DataBuffer.select<1, 1, 4, 1>(8 * i + 6, 0); 215 matrix_ref<ushort, 1, 4> TempR10 = DataBuffer.select<1, 1, 4, 1>(8 * i + 6, 8); 216 matrix_ref<ushort, 1, 4> TempR12 = DataBuffer.select<1, 1, 4, 1>(8 * i + 6, 16); 217 matrix_ref<ushort, 1, 4> TempR14 = DataBuffer.select<1, 1, 4, 1>(8 * i + 6, 24); 218 219 matrix_ref<ushort, 1, 4> TempG8 = DataBuffer.select<1, 1, 4, 1>(8 * i + 6, 32); 220 matrix_ref<ushort, 1, 4> TempG10 = DataBuffer.select<1, 1, 4, 1>(8 * i + 6, 40); 221 matrix_ref<ushort, 1, 4> TempG12 = DataBuffer.select<1, 1, 4, 1>(8 * i + 6, 48); 222 matrix_ref<ushort, 1, 4> TempG14 = DataBuffer.select<1, 1, 4, 1>(8 * i + 6, 56); 223 224 matrix_ref<ushort, 1, 4> TempB8 = DataBuffer.select<1, 1, 4, 1>(8 * i + 7, 0); 225 matrix_ref<ushort, 1, 4> TempB10 = DataBuffer.select<1, 1, 4, 1>(8 * i + 7, 8); 226 matrix_ref<ushort, 1, 4> TempB12 = DataBuffer.select<1, 1, 4, 1>(8 * i + 7, 16); 227 matrix_ref<ushort, 1, 4> TempB14 = DataBuffer.select<1, 1, 4, 1>(8 * i + 7, 24); 228 229 TempResult4x8_Bottom.select<1, 1, 4, 2>(0, 0) = (TempB8) + (TempG8 << 16); 230 TempResult4x8_Bottom.select<1, 1, 4, 2>(0, 1) = (TempR8) + (TempA << 16); 231 232 TempResult4x8_Bottom.select<1, 1, 4, 2>(1, 0) = (TempB10) + (TempG10 << 16); 233 TempResult4x8_Bottom.select<1, 1, 4, 2>(1, 1) = (TempR10) + (TempA << 16); 234 235 TempResult4x8_Bottom.select<1, 1, 4, 2>(2, 0) = (TempB12) + (TempG12 << 16); 236 TempResult4x8_Bottom.select<1, 1, 4, 2>(2, 1) = (TempR12) + (TempA << 16); 237 238 TempResult4x8_Bottom.select<1, 1, 4, 2>(3, 0) = (TempB14) + (TempG14 << 16); 239 TempResult4x8_Bottom.select<1, 1, 4, 2>(3, 1) = (TempR14) + (TempA << 16); 240 241 write(Dst_Surface, DstX * 8 + 64, DstY, Result); 242 } 243 244 // second 8x4 245 { 246 // R/G/B channel top half 247 matrix_ref<ushort, 1, 4> TempR0 = DataBuffer.select<1, 1, 4, 1>(8 * i + 2, 0 + 4); 248 matrix_ref<ushort, 1, 4> TempR2 = DataBuffer.select<1, 1, 4, 1>(8 * i + 2, 8 + 4); 249 matrix_ref<ushort, 1, 4> TempR4 = DataBuffer.select<1, 1, 4, 1>(8 * i + 2, 16 + 4); 250 matrix_ref<ushort, 1, 4> TempR6 = DataBuffer.select<1, 1, 4, 1>(8 * i + 2, 24 + 4); 251 252 matrix_ref<ushort, 1, 4> TempG0 = DataBuffer.select<1, 1, 4, 1>(8 * i + 2, 32 + 4); 253 matrix_ref<ushort, 1, 4> TempG2 = DataBuffer.select<1, 1, 4, 1>(8 * i + 2, 40 + 4); 254 matrix_ref<ushort, 1, 4> TempG4 = DataBuffer.select<1, 1, 4, 1>(8 * i + 2, 48 + 4); 255 matrix_ref<ushort, 1, 4> TempG6 = DataBuffer.select<1, 1, 4, 1>(8 * i + 2, 56 + 4); 256 257 matrix_ref<ushort, 1, 4> TempB0 = DataBuffer.select<1, 1, 4, 1>(8 * i + 3, 0 + 4); 258 matrix_ref<ushort, 1, 4> TempB2 = DataBuffer.select<1, 1, 4, 1>(8 * i + 3, 8 + 4); 259 matrix_ref<ushort, 1, 4> TempB4 = DataBuffer.select<1, 1, 4, 1>(8 * i + 3, 16 + 4); 260 matrix_ref<ushort, 1, 4> TempB6 = DataBuffer.select<1, 1, 4, 1>(8 * i + 3, 24 + 4); 261 262 TempResult4x8_Top.select<1, 1, 4, 2>(0, 0) = (TempB0) + (TempG0 << 16); 263 TempResult4x8_Top.select<1, 1, 4, 2>(0, 1) = (TempR0) + (TempA << 16); 264 265 TempResult4x8_Top.select<1, 1, 4, 2>(1, 0) = (TempB2) + (TempG2 << 16); 266 TempResult4x8_Top.select<1, 1, 4, 2>(1, 1) = (TempR2) + (TempA << 16); 267 268 TempResult4x8_Top.select<1, 1, 4, 2>(2, 0) = (TempB4) + (TempG4 << 16); 269 TempResult4x8_Top.select<1, 1, 4, 2>(2, 1) = (TempR4) + (TempA << 16); 270 271 TempResult4x8_Top.select<1, 1, 4, 2>(3, 0) = (TempB6) + (TempG6 << 16); 272 TempResult4x8_Top.select<1, 1, 4, 2>(3, 1) = (TempR6) + (TempA << 16); 273 274 // R/G/B channel bottom half 275 matrix_ref<ushort, 1, 4> TempR8 = DataBuffer.select<1, 1, 4, 1>(8 * i + 6, 0 + 4); 276 matrix_ref<ushort, 1, 4> TempR10 = DataBuffer.select<1, 1, 4, 1>(8 * i + 6, 8 + 4); 277 matrix_ref<ushort, 1, 4> TempR12 = DataBuffer.select<1, 1, 4, 1>(8 * i + 6, 16 + 4); 278 matrix_ref<ushort, 1, 4> TempR14 = DataBuffer.select<1, 1, 4, 1>(8 * i + 6, 24 + 4); 279 280 matrix_ref<ushort, 1, 4> TempG8 = DataBuffer.select<1, 1, 4, 1>(8 * i + 6, 32 + 4); 281 matrix_ref<ushort, 1, 4> TempG10 = DataBuffer.select<1, 1, 4, 1>(8 * i + 6, 40 + 4); 282 matrix_ref<ushort, 1, 4> TempG12 = DataBuffer.select<1, 1, 4, 1>(8 * i + 6, 48 + 4); 283 matrix_ref<ushort, 1, 4> TempG14 = DataBuffer.select<1, 1, 4, 1>(8 * i + 6, 56 + 4); 284 285 matrix_ref<ushort, 1, 4> TempB8 = DataBuffer.select<1, 1, 4, 1>(8 * i + 7, 0 + 4); 286 matrix_ref<ushort, 1, 4> TempB10 = DataBuffer.select<1, 1, 4, 1>(8 * i + 7, 8 + 4); 287 matrix_ref<ushort, 1, 4> TempB12 = DataBuffer.select<1, 1, 4, 1>(8 * i + 7, 16 + 4); 288 matrix_ref<ushort, 1, 4> TempB14 = DataBuffer.select<1, 1, 4, 1>(8 * i + 7, 24 + 4); 289 290 TempResult4x8_Bottom.select<1, 1, 4, 2>(0, 0) = (TempB8) + (TempG8 << 16); 291 TempResult4x8_Bottom.select<1, 1, 4, 2>(0, 1) = (TempR8) + (TempA << 16); 292 293 TempResult4x8_Bottom.select<1, 1, 4, 2>(1, 0) = (TempB10) + (TempG10 << 16); 294 TempResult4x8_Bottom.select<1, 1, 4, 2>(1, 1) = (TempR10) + (TempA << 16); 295 296 TempResult4x8_Bottom.select<1, 1, 4, 2>(2, 0) = (TempB12) + (TempG12 << 16); 297 TempResult4x8_Bottom.select<1, 1, 4, 2>(2, 1) = (TempR12) + (TempA << 16); 298 299 TempResult4x8_Bottom.select<1, 1, 4, 2>(3, 0) = (TempB14) + (TempG14 << 16); 300 TempResult4x8_Bottom.select<1, 1, 4, 2>(3, 1) = (TempR14) + (TempA << 16); 301 302 write(Dst_Surface, DstX * 8 + 96, DstY, Result); 303 } 304 } 305 } 306 }