1 /* 2 * Copyright (c) 2019, Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included 12 * in all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 23 { 24 /* 25 Buffer layout after shuffle 26 _________________________________________________ 27 |_______Block0__________|_______Block1__________| 28 |_______Block2__________|_______Block3__________| 29 |_______Block4__________|_______Block5__________| 30 |_______Block6__________|_______Block7__________| 31 32 Write back buffer layout correlate to the block number#, each box stands for 1 GRF 33 _______________________________________________ 34 |____R0_________R1_____|____R2_________R3_____| 35 |____G0_________G1_____|____G2_________G3_____| 36 |____B0_________B1_____|____B2_________B3_____| 37 |____A0_________A1_____|____A2_________A3_____| 38 |____R4_________R5_____|____R6_________R7_____| 39 |____G4_________G5_____|____G6_________G7_____| 40 |____B4_________B5_____|____B6_________B7_____| 41 |____A4_________A5_____|____A6_________A7_____| 42 */ 43 44 matrix_ref<uint, 8, 8> Result = DataBuffer.format<uint, 96, 8>().select<8, 1, 8, 1>(64, 0); 45 46 SurfaceIndex Dst_Surface(MDF_FC_OUTPUT_BTI_START); 47 matrix_ref<uint, 4, 8> TempResult4x8_Top = Result.select<4, 1, 8, 1>(0, 0); 48 matrix_ref<uint, 4, 8> TempResult4x8_Bottom = Result.select<4, 1, 8, 1>(4, 0); 49 50 #pragma unroll 51 for (uchar i = 0; i < 2; i++, DstY += 8) 52 { 53 // First 8x8 54 { 55 56 // first 8x4 57 { 58 // R/G/B channel top half 59 matrix_ref<ushort, 1, 4> TempR0 = DataBuffer.select<1, 1, 4, 1>(8 * i, 0); 60 matrix_ref<ushort, 1, 4> TempR2 = DataBuffer.select<1, 1, 4, 1>(8 * i, 8); 61 matrix_ref<ushort, 1, 4> TempR4 = DataBuffer.select<1, 1, 4, 1>(8 * i, 16); 62 matrix_ref<ushort, 1, 4> TempR6 = DataBuffer.select<1, 1, 4, 1>(8 * i, 24); 63 64 matrix_ref<ushort, 1, 4> TempG0 = DataBuffer.select<1, 1, 4, 1>(8 * i, 32); 65 matrix_ref<ushort, 1, 4> TempG2 = DataBuffer.select<1, 1, 4, 1>(8 * i, 40); 66 matrix_ref<ushort, 1, 4> TempG4 = DataBuffer.select<1, 1, 4, 1>(8 * i, 48); 67 matrix_ref<ushort, 1, 4> TempG6 = DataBuffer.select<1, 1, 4, 1>(8 * i, 56); 68 69 matrix_ref<ushort, 1, 4> TempB0 = DataBuffer.select<1, 1, 4, 1>(8 * i + 1, 0); 70 matrix_ref<ushort, 1, 4> TempB2 = DataBuffer.select<1, 1, 4, 1>(8 * i + 1, 8); 71 matrix_ref<ushort, 1, 4> TempB4 = DataBuffer.select<1, 1, 4, 1>(8 * i + 1, 16); 72 matrix_ref<ushort, 1, 4> TempB6 = DataBuffer.select<1, 1, 4, 1>(8 * i + 1, 24); 73 74 matrix_ref<ushort, 1, 4> TempA0 = DataBuffer.select<1, 1, 4, 1>(8 * i + 1, 32); 75 matrix_ref<ushort, 1, 4> TempA2 = DataBuffer.select<1, 1, 4, 1>(8 * i + 1, 40); 76 matrix_ref<ushort, 1, 4> TempA4 = DataBuffer.select<1, 1, 4, 1>(8 * i + 1, 48); 77 matrix_ref<ushort, 1, 4> TempA6 = DataBuffer.select<1, 1, 4, 1>(8 * i + 1, 56); 78 79 TempResult4x8_Top.select<1, 1, 4, 2>(0, 0) = (TempB0) + (TempG0 << 16); 80 TempResult4x8_Top.select<1, 1, 4, 2>(0, 1) = (TempR0) + (TempA0 << 16); 81 82 TempResult4x8_Top.select<1, 1, 4, 2>(1, 0) = (TempB2) + (TempG2 << 16); 83 TempResult4x8_Top.select<1, 1, 4, 2>(1, 1) = (TempR2) + (TempA2 << 16); 84 85 TempResult4x8_Top.select<1, 1, 4, 2>(2, 0) = (TempB4) + (TempG4 << 16); 86 TempResult4x8_Top.select<1, 1, 4, 2>(2, 1) = (TempR4) + (TempA4 << 16); 87 88 TempResult4x8_Top.select<1, 1, 4, 2>(3, 0) = (TempB6) + (TempG6 << 16); 89 TempResult4x8_Top.select<1, 1, 4, 2>(3, 1) = (TempR6) + (TempA6 << 16); 90 91 // R/G/B channel bottom half 92 matrix_ref<ushort, 1, 4> TempR8 = DataBuffer.select<1, 1, 4, 1>(8 * i + 4, 0); 93 matrix_ref<ushort, 1, 4> TempR10 = DataBuffer.select<1, 1, 4, 1>(8 * i + 4, 8); 94 matrix_ref<ushort, 1, 4> TempR12 = DataBuffer.select<1, 1, 4, 1>(8 * i + 4, 16); 95 matrix_ref<ushort, 1, 4> TempR14 = DataBuffer.select<1, 1, 4, 1>(8 * i + 4, 24); 96 97 matrix_ref<ushort, 1, 4> TempG8 = DataBuffer.select<1, 1, 4, 1>(8 * i + 4, 32); 98 matrix_ref<ushort, 1, 4> TempG10 = DataBuffer.select<1, 1, 4, 1>(8 * i + 4, 40); 99 matrix_ref<ushort, 1, 4> TempG12 = DataBuffer.select<1, 1, 4, 1>(8 * i + 4, 48); 100 matrix_ref<ushort, 1, 4> TempG14 = DataBuffer.select<1, 1, 4, 1>(8 * i + 4, 56); 101 102 matrix_ref<ushort, 1, 4> TempB8 = DataBuffer.select<1, 1, 4, 1>(8 * i + 5, 0); 103 matrix_ref<ushort, 1, 4> TempB10 = DataBuffer.select<1, 1, 4, 1>(8 * i + 5, 8); 104 matrix_ref<ushort, 1, 4> TempB12 = DataBuffer.select<1, 1, 4, 1>(8 * i + 5, 16); 105 matrix_ref<ushort, 1, 4> TempB14 = DataBuffer.select<1, 1, 4, 1>(8 * i + 5, 24); 106 107 matrix_ref<ushort, 1, 4> TempA8 = DataBuffer.select<1, 1, 4, 1>(8 * i + 5, 32); 108 matrix_ref<ushort, 1, 4> TempA10 = DataBuffer.select<1, 1, 4, 1>(8 * i + 5, 40); 109 matrix_ref<ushort, 1, 4> TempA12 = DataBuffer.select<1, 1, 4, 1>(8 * i + 5, 48); 110 matrix_ref<ushort, 1, 4> TempA14 = DataBuffer.select<1, 1, 4, 1>(8 * i + 5, 56); 111 112 TempResult4x8_Bottom.select<1, 1, 4, 2>(0, 0) = (TempB8) + (TempG8 << 16); 113 TempResult4x8_Bottom.select<1, 1, 4, 2>(0, 1) = (TempR8) + (TempA8 << 16); 114 115 TempResult4x8_Bottom.select<1, 1, 4, 2>(1, 0) = (TempB10) + (TempG10 << 16); 116 TempResult4x8_Bottom.select<1, 1, 4, 2>(1, 1) = (TempR10) + (TempA10 << 16); 117 118 TempResult4x8_Bottom.select<1, 1, 4, 2>(2, 0) = (TempB12) + (TempG12 << 16); 119 TempResult4x8_Bottom.select<1, 1, 4, 2>(2, 1) = (TempR12) + (TempA12 << 16); 120 121 TempResult4x8_Bottom.select<1, 1, 4, 2>(3, 0) = (TempB14) + (TempG14 << 16); 122 TempResult4x8_Bottom.select<1, 1, 4, 2>(3, 1) = (TempR14) + (TempA14 << 16); 123 124 write(Dst_Surface, DstX * 8, DstY, Result); 125 } 126 127 // second 8x4 128 { 129 // R/G/B channel top half 130 matrix_ref<ushort, 1, 4> TempR0 = DataBuffer.select<1, 1, 4, 1>(8 * i, 0 + 4); 131 matrix_ref<ushort, 1, 4> TempR2 = DataBuffer.select<1, 1, 4, 1>(8 * i, 8 + 4); 132 matrix_ref<ushort, 1, 4> TempR4 = DataBuffer.select<1, 1, 4, 1>(8 * i, 16 + 4); 133 matrix_ref<ushort, 1, 4> TempR6 = DataBuffer.select<1, 1, 4, 1>(8 * i, 24 + 4); 134 135 matrix_ref<ushort, 1, 4> TempG0 = DataBuffer.select<1, 1, 4, 1>(8 * i, 32 + 4); 136 matrix_ref<ushort, 1, 4> TempG2 = DataBuffer.select<1, 1, 4, 1>(8 * i, 40 + 4); 137 matrix_ref<ushort, 1, 4> TempG4 = DataBuffer.select<1, 1, 4, 1>(8 * i, 48 + 4); 138 matrix_ref<ushort, 1, 4> TempG6 = DataBuffer.select<1, 1, 4, 1>(8 * i, 56 + 4); 139 140 matrix_ref<ushort, 1, 4> TempB0 = DataBuffer.select<1, 1, 4, 1>(8 * i + 1, 0 + 4); 141 matrix_ref<ushort, 1, 4> TempB2 = DataBuffer.select<1, 1, 4, 1>(8 * i + 1, 8 + 4); 142 matrix_ref<ushort, 1, 4> TempB4 = DataBuffer.select<1, 1, 4, 1>(8 * i + 1, 16 + 4); 143 matrix_ref<ushort, 1, 4> TempB6 = DataBuffer.select<1, 1, 4, 1>(8 * i + 1, 24 + 4); 144 145 matrix_ref<ushort, 1, 4> TempA0 = DataBuffer.select<1, 1, 4, 1>(8 * i + 1, 32 + 4); 146 matrix_ref<ushort, 1, 4> TempA2 = DataBuffer.select<1, 1, 4, 1>(8 * i + 1, 40 + 4); 147 matrix_ref<ushort, 1, 4> TempA4 = DataBuffer.select<1, 1, 4, 1>(8 * i + 1, 48 + 4); 148 matrix_ref<ushort, 1, 4> TempA6 = DataBuffer.select<1, 1, 4, 1>(8 * i + 1, 56 + 4); 149 150 TempResult4x8_Top.select<1, 1, 4, 2>(0, 0) = (TempB0) + (TempG0 << 16); 151 TempResult4x8_Top.select<1, 1, 4, 2>(0, 1) = (TempR0) + (TempA0 << 16); 152 153 TempResult4x8_Top.select<1, 1, 4, 2>(1, 0) = (TempB2) + (TempG2 << 16); 154 TempResult4x8_Top.select<1, 1, 4, 2>(1, 1) = (TempR2) + (TempA2 << 16); 155 156 TempResult4x8_Top.select<1, 1, 4, 2>(2, 0) = (TempB4) + (TempG4 << 16); 157 TempResult4x8_Top.select<1, 1, 4, 2>(2, 1) = (TempR4) + (TempA4 << 16); 158 159 TempResult4x8_Top.select<1, 1, 4, 2>(3, 0) = (TempB6) + (TempG6 << 16); 160 TempResult4x8_Top.select<1, 1, 4, 2>(3, 1) = (TempR6) + (TempA6 << 16); 161 162 // R/G/B channel bottom half 163 matrix_ref<ushort, 1, 4> TempR8 = DataBuffer.select<1, 1, 4, 1>(8 * i + 4, 0 + 4); 164 matrix_ref<ushort, 1, 4> TempR10 = DataBuffer.select<1, 1, 4, 1>(8 * i + 4, 8 + 4); 165 matrix_ref<ushort, 1, 4> TempR12 = DataBuffer.select<1, 1, 4, 1>(8 * i + 4, 16 + 4); 166 matrix_ref<ushort, 1, 4> TempR14 = DataBuffer.select<1, 1, 4, 1>(8 * i + 4, 24 + 4); 167 168 matrix_ref<ushort, 1, 4> TempG8 = DataBuffer.select<1, 1, 4, 1>(8 * i + 4, 32 + 4); 169 matrix_ref<ushort, 1, 4> TempG10 = DataBuffer.select<1, 1, 4, 1>(8 * i + 4, 40 + 4); 170 matrix_ref<ushort, 1, 4> TempG12 = DataBuffer.select<1, 1, 4, 1>(8 * i + 4, 48 + 4); 171 matrix_ref<ushort, 1, 4> TempG14 = DataBuffer.select<1, 1, 4, 1>(8 * i + 4, 56 + 4); 172 173 matrix_ref<ushort, 1, 4> TempB8 = DataBuffer.select<1, 1, 4, 1>(8 * i + 5, 0 + 4); 174 matrix_ref<ushort, 1, 4> TempB10 = DataBuffer.select<1, 1, 4, 1>(8 * i + 5, 8 + 4); 175 matrix_ref<ushort, 1, 4> TempB12 = DataBuffer.select<1, 1, 4, 1>(8 * i + 5, 16 + 4); 176 matrix_ref<ushort, 1, 4> TempB14 = DataBuffer.select<1, 1, 4, 1>(8 * i + 5, 24 + 4); 177 178 matrix_ref<ushort, 1, 4> TempA8 = DataBuffer.select<1, 1, 4, 1>(8 * i + 5, 32 + 4); 179 matrix_ref<ushort, 1, 4> TempA10 = DataBuffer.select<1, 1, 4, 1>(8 * i + 5, 40 + 4); 180 matrix_ref<ushort, 1, 4> TempA12 = DataBuffer.select<1, 1, 4, 1>(8 * i + 5, 48 + 4); 181 matrix_ref<ushort, 1, 4> TempA14 = DataBuffer.select<1, 1, 4, 1>(8 * i + 5, 56 + 4); 182 183 TempResult4x8_Bottom.select<1, 1, 4, 2>(0, 0) = (TempB8) + (TempG8 << 16); 184 TempResult4x8_Bottom.select<1, 1, 4, 2>(0, 1) = (TempR8) + (TempA8 << 16); 185 186 TempResult4x8_Bottom.select<1, 1, 4, 2>(1, 0) = (TempB10) + (TempG10 << 16); 187 TempResult4x8_Bottom.select<1, 1, 4, 2>(1, 1) = (TempR10) + (TempA10 << 16); 188 189 TempResult4x8_Bottom.select<1, 1, 4, 2>(2, 0) = (TempB12) + (TempG12 << 16); 190 TempResult4x8_Bottom.select<1, 1, 4, 2>(2, 1) = (TempR12) + (TempA12 << 16); 191 192 TempResult4x8_Bottom.select<1, 1, 4, 2>(3, 0) = (TempB14) + (TempG14 << 16); 193 TempResult4x8_Bottom.select<1, 1, 4, 2>(3, 1) = (TempR14) + (TempA14 << 16); 194 195 write(Dst_Surface, DstX * 8 + 32, DstY, Result); 196 } 197 } 198 199 // Second 8x8 200 { 201 // first 8x4 202 { 203 // R/G/B channel top half 204 matrix_ref<ushort, 1, 4> TempR0 = DataBuffer.select<1, 1, 4, 1>(8 * i + 2, 0); 205 matrix_ref<ushort, 1, 4> TempR2 = DataBuffer.select<1, 1, 4, 1>(8 * i + 2, 8); 206 matrix_ref<ushort, 1, 4> TempR4 = DataBuffer.select<1, 1, 4, 1>(8 * i + 2, 16); 207 matrix_ref<ushort, 1, 4> TempR6 = DataBuffer.select<1, 1, 4, 1>(8 * i + 2, 24); 208 209 matrix_ref<ushort, 1, 4> TempG0 = DataBuffer.select<1, 1, 4, 1>(8 * i + 2, 32); 210 matrix_ref<ushort, 1, 4> TempG2 = DataBuffer.select<1, 1, 4, 1>(8 * i + 2, 40); 211 matrix_ref<ushort, 1, 4> TempG4 = DataBuffer.select<1, 1, 4, 1>(8 * i + 2, 48); 212 matrix_ref<ushort, 1, 4> TempG6 = DataBuffer.select<1, 1, 4, 1>(8 * i + 2, 56); 213 214 matrix_ref<ushort, 1, 4> TempB0 = DataBuffer.select<1, 1, 4, 1>(8 * i + 3, 0); 215 matrix_ref<ushort, 1, 4> TempB2 = DataBuffer.select<1, 1, 4, 1>(8 * i + 3, 8); 216 matrix_ref<ushort, 1, 4> TempB4 = DataBuffer.select<1, 1, 4, 1>(8 * i + 3, 16); 217 matrix_ref<ushort, 1, 4> TempB6 = DataBuffer.select<1, 1, 4, 1>(8 * i + 3, 24); 218 219 matrix_ref<ushort, 1, 4> TempA0 = DataBuffer.select<1, 1, 4, 1>(8 * i + 3, 32); 220 matrix_ref<ushort, 1, 4> TempA2 = DataBuffer.select<1, 1, 4, 1>(8 * i + 3, 40); 221 matrix_ref<ushort, 1, 4> TempA4 = DataBuffer.select<1, 1, 4, 1>(8 * i + 3, 48); 222 matrix_ref<ushort, 1, 4> TempA6 = DataBuffer.select<1, 1, 4, 1>(8 * i + 3, 56); 223 224 TempResult4x8_Top.select<1, 1, 4, 2>(0, 0) = (TempB0) + (TempG0 << 16); 225 TempResult4x8_Top.select<1, 1, 4, 2>(0, 1) = (TempR0) + (TempA0 << 16); 226 227 TempResult4x8_Top.select<1, 1, 4, 2>(1, 0) = (TempB2) + (TempG2 << 16); 228 TempResult4x8_Top.select<1, 1, 4, 2>(1, 1) = (TempR2) + (TempA2 << 16); 229 230 TempResult4x8_Top.select<1, 1, 4, 2>(2, 0) = (TempB4) + (TempG4 << 16); 231 TempResult4x8_Top.select<1, 1, 4, 2>(2, 1) = (TempR4) + (TempA4 << 16); 232 233 TempResult4x8_Top.select<1, 1, 4, 2>(3, 0) = (TempB6) + (TempG6 << 16); 234 TempResult4x8_Top.select<1, 1, 4, 2>(3, 1) = (TempR6) + (TempA6 << 16); 235 236 // R/G/B channel bottom half 237 matrix_ref<ushort, 1, 4> TempR8 = DataBuffer.select<1, 1, 4, 1>(8 * i + 6, 0); 238 matrix_ref<ushort, 1, 4> TempR10 = DataBuffer.select<1, 1, 4, 1>(8 * i + 6, 8); 239 matrix_ref<ushort, 1, 4> TempR12 = DataBuffer.select<1, 1, 4, 1>(8 * i + 6, 16); 240 matrix_ref<ushort, 1, 4> TempR14 = DataBuffer.select<1, 1, 4, 1>(8 * i + 6, 24); 241 242 matrix_ref<ushort, 1, 4> TempG8 = DataBuffer.select<1, 1, 4, 1>(8 * i + 6, 32); 243 matrix_ref<ushort, 1, 4> TempG10 = DataBuffer.select<1, 1, 4, 1>(8 * i + 6, 40); 244 matrix_ref<ushort, 1, 4> TempG12 = DataBuffer.select<1, 1, 4, 1>(8 * i + 6, 48); 245 matrix_ref<ushort, 1, 4> TempG14 = DataBuffer.select<1, 1, 4, 1>(8 * i + 6, 56); 246 247 matrix_ref<ushort, 1, 4> TempB8 = DataBuffer.select<1, 1, 4, 1>(8 * i + 7, 0); 248 matrix_ref<ushort, 1, 4> TempB10 = DataBuffer.select<1, 1, 4, 1>(8 * i + 7, 8); 249 matrix_ref<ushort, 1, 4> TempB12 = DataBuffer.select<1, 1, 4, 1>(8 * i + 7, 16); 250 matrix_ref<ushort, 1, 4> TempB14 = DataBuffer.select<1, 1, 4, 1>(8 * i + 7, 24); 251 252 matrix_ref<ushort, 1, 4> TempA8 = DataBuffer.select<1, 1, 4, 1>(8 * i + 7, 32); 253 matrix_ref<ushort, 1, 4> TempA10 = DataBuffer.select<1, 1, 4, 1>(8 * i + 7, 40); 254 matrix_ref<ushort, 1, 4> TempA12 = DataBuffer.select<1, 1, 4, 1>(8 * i + 7, 48); 255 matrix_ref<ushort, 1, 4> TempA14 = DataBuffer.select<1, 1, 4, 1>(8 * i + 7, 56); 256 257 TempResult4x8_Bottom.select<1, 1, 4, 2>(0, 0) = (TempB8) + (TempG8 << 16); 258 TempResult4x8_Bottom.select<1, 1, 4, 2>(0, 1) = (TempR8) + (TempA8 << 16); 259 260 TempResult4x8_Bottom.select<1, 1, 4, 2>(1, 0) = (TempB10) + (TempG10 << 16); 261 TempResult4x8_Bottom.select<1, 1, 4, 2>(1, 1) = (TempR10) + (TempA10 << 16); 262 263 TempResult4x8_Bottom.select<1, 1, 4, 2>(2, 0) = (TempB12) + (TempG12 << 16); 264 TempResult4x8_Bottom.select<1, 1, 4, 2>(2, 1) = (TempR12) + (TempA12 << 16); 265 266 TempResult4x8_Bottom.select<1, 1, 4, 2>(3, 0) = (TempB14) + (TempG14 << 16); 267 TempResult4x8_Bottom.select<1, 1, 4, 2>(3, 1) = (TempR14) + (TempA14 << 16); 268 269 write(Dst_Surface, DstX * 8 + 64, DstY, Result); 270 } 271 272 // second 8x4 273 { 274 // R/G/B channel top half 275 matrix_ref<ushort, 1, 4> TempR0 = DataBuffer.select<1, 1, 4, 1>(8 * i + 2, 0 + 4); 276 matrix_ref<ushort, 1, 4> TempR2 = DataBuffer.select<1, 1, 4, 1>(8 * i + 2, 8 + 4); 277 matrix_ref<ushort, 1, 4> TempR4 = DataBuffer.select<1, 1, 4, 1>(8 * i + 2, 16 + 4); 278 matrix_ref<ushort, 1, 4> TempR6 = DataBuffer.select<1, 1, 4, 1>(8 * i + 2, 24 + 4); 279 280 matrix_ref<ushort, 1, 4> TempG0 = DataBuffer.select<1, 1, 4, 1>(8 * i + 2, 32 + 4); 281 matrix_ref<ushort, 1, 4> TempG2 = DataBuffer.select<1, 1, 4, 1>(8 * i + 2, 40 + 4); 282 matrix_ref<ushort, 1, 4> TempG4 = DataBuffer.select<1, 1, 4, 1>(8 * i + 2, 48 + 4); 283 matrix_ref<ushort, 1, 4> TempG6 = DataBuffer.select<1, 1, 4, 1>(8 * i + 2, 56 + 4); 284 285 matrix_ref<ushort, 1, 4> TempB0 = DataBuffer.select<1, 1, 4, 1>(8 * i + 3, 0 + 4); 286 matrix_ref<ushort, 1, 4> TempB2 = DataBuffer.select<1, 1, 4, 1>(8 * i + 3, 8 + 4); 287 matrix_ref<ushort, 1, 4> TempB4 = DataBuffer.select<1, 1, 4, 1>(8 * i + 3, 16 + 4); 288 matrix_ref<ushort, 1, 4> TempB6 = DataBuffer.select<1, 1, 4, 1>(8 * i + 3, 24 + 4); 289 290 matrix_ref<ushort, 1, 4> TempA0 = DataBuffer.select<1, 1, 4, 1>(8 * i + 3, 32 + 4); 291 matrix_ref<ushort, 1, 4> TempA2 = DataBuffer.select<1, 1, 4, 1>(8 * i + 3, 40 + 4); 292 matrix_ref<ushort, 1, 4> TempA4 = DataBuffer.select<1, 1, 4, 1>(8 * i + 3, 48 + 4); 293 matrix_ref<ushort, 1, 4> TempA6 = DataBuffer.select<1, 1, 4, 1>(8 * i + 3, 56 + 4); 294 295 TempResult4x8_Top.select<1, 1, 4, 2>(0, 0) = (TempB0) + (TempG0 << 16); 296 TempResult4x8_Top.select<1, 1, 4, 2>(0, 1) = (TempR0) + (TempA0 << 16); 297 298 TempResult4x8_Top.select<1, 1, 4, 2>(1, 0) = (TempB2) + (TempG2 << 16); 299 TempResult4x8_Top.select<1, 1, 4, 2>(1, 1) = (TempR2) + (TempA2 << 16); 300 301 TempResult4x8_Top.select<1, 1, 4, 2>(2, 0) = (TempB4) + (TempG4 << 16); 302 TempResult4x8_Top.select<1, 1, 4, 2>(2, 1) = (TempR4) + (TempA4 << 16); 303 304 TempResult4x8_Top.select<1, 1, 4, 2>(3, 0) = (TempB6) + (TempG6 << 16); 305 TempResult4x8_Top.select<1, 1, 4, 2>(3, 1) = (TempR6) + (TempA6 << 16); 306 307 // R/G/B channel bottom half 308 matrix_ref<ushort, 1, 4> TempR8 = DataBuffer.select<1, 1, 4, 1>(8 * i + 6, 0 + 4); 309 matrix_ref<ushort, 1, 4> TempR10 = DataBuffer.select<1, 1, 4, 1>(8 * i + 6, 8 + 4); 310 matrix_ref<ushort, 1, 4> TempR12 = DataBuffer.select<1, 1, 4, 1>(8 * i + 6, 16 + 4); 311 matrix_ref<ushort, 1, 4> TempR14 = DataBuffer.select<1, 1, 4, 1>(8 * i + 6, 24 + 4); 312 313 matrix_ref<ushort, 1, 4> TempG8 = DataBuffer.select<1, 1, 4, 1>(8 * i + 6, 32 + 4); 314 matrix_ref<ushort, 1, 4> TempG10 = DataBuffer.select<1, 1, 4, 1>(8 * i + 6, 40 + 4); 315 matrix_ref<ushort, 1, 4> TempG12 = DataBuffer.select<1, 1, 4, 1>(8 * i + 6, 48 + 4); 316 matrix_ref<ushort, 1, 4> TempG14 = DataBuffer.select<1, 1, 4, 1>(8 * i + 6, 56 + 4); 317 318 matrix_ref<ushort, 1, 4> TempB8 = DataBuffer.select<1, 1, 4, 1>(8 * i + 7, 0 + 4); 319 matrix_ref<ushort, 1, 4> TempB10 = DataBuffer.select<1, 1, 4, 1>(8 * i + 7, 8 + 4); 320 matrix_ref<ushort, 1, 4> TempB12 = DataBuffer.select<1, 1, 4, 1>(8 * i + 7, 16 + 4); 321 matrix_ref<ushort, 1, 4> TempB14 = DataBuffer.select<1, 1, 4, 1>(8 * i + 7, 24 + 4); 322 323 matrix_ref<ushort, 1, 4> TempA8 = DataBuffer.select<1, 1, 4, 1>(8 * i + 7, 32 + 4); 324 matrix_ref<ushort, 1, 4> TempA10 = DataBuffer.select<1, 1, 4, 1>(8 * i + 7, 40 + 4); 325 matrix_ref<ushort, 1, 4> TempA12 = DataBuffer.select<1, 1, 4, 1>(8 * i + 7, 48 + 4); 326 matrix_ref<ushort, 1, 4> TempA14 = DataBuffer.select<1, 1, 4, 1>(8 * i + 7, 56 + 4); 327 328 TempResult4x8_Bottom.select<1, 1, 4, 2>(0, 0) = (TempB8) + (TempG8 << 16); 329 TempResult4x8_Bottom.select<1, 1, 4, 2>(0, 1) = (TempR8) + (TempA8 << 16); 330 331 TempResult4x8_Bottom.select<1, 1, 4, 2>(1, 0) = (TempB10) + (TempG10 << 16); 332 TempResult4x8_Bottom.select<1, 1, 4, 2>(1, 1) = (TempR10) + (TempA10 << 16); 333 334 TempResult4x8_Bottom.select<1, 1, 4, 2>(2, 0) = (TempB12) + (TempG12 << 16); 335 TempResult4x8_Bottom.select<1, 1, 4, 2>(2, 1) = (TempR12) + (TempA12 << 16); 336 337 TempResult4x8_Bottom.select<1, 1, 4, 2>(3, 0) = (TempB14) + (TempG14 << 16); 338 TempResult4x8_Bottom.select<1, 1, 4, 2>(3, 1) = (TempR14) + (TempA14 << 16); 339 340 write(Dst_Surface, DstX * 8 + 96, DstY, Result); 341 } 342 } 343 } 344 }