1 /* 2 * Copyright (c) 2019, Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included 12 * in all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 23 { 24 /* 25 Buffer layout after shuffle 26 _________________________________________________ 27 |_______Block0__________|_______Block1__________| 28 |_______Block2__________|_______Block3__________| 29 |_______Block4__________|_______Block5__________| 30 |_______Block6__________|_______Block7__________| 31 32 Write back buffer layout correlate to the block number#, each box stands for 1 GRF 33 _______________________________________________ 34 |____R0_________R1_____|____R2_________R3_____| 35 |____G0_________G1_____|____G2_________G3_____| 36 |____B0_________B1_____|____B2_________B3_____| 37 |____A0_________A1_____|____A2_________A3_____| 38 |____R4_________R5_____|____R6_________R7_____| 39 |____G4_________G5_____|____G6_________G7_____| 40 |____B4_________B5_____|____B6_________B7_____| 41 |____A4_________A5_____|____A6_________A7_____| 42 */ 43 44 matrix_ref<uint, 9, 8> Msg = DataBuffer.format<uint, 96, 8>().select<9, 1, 8, 1>(64, 0); 45 matrix_ref<uint, 8, 8> Result = Msg.select<8, 1, 8, 1>(1, 0); 46 uint descriptor; 47 48 Msg.select<1, 1, 8, 1>(0, 0) = cm_get_r0<uint>(); 49 Msg.select<1, 1, 1, 1>(0, 2) = nBLOCK_WIDTH_32 + nBLOCK_HEIGHT_8; 50 descriptor = MDF_FC_OUTPUT_BTI_START + nDPMW_MSGDSC + nMSGLEN_8; 51 uchar ChannelSwap = (WAFlag >> 16) & 0x01; 52 53 uint TempA = (DestinationRGBFormat >> 6) << 30; 54 55 matrix_ref<uint, 4, 8> TempResult4x8_Top = Result.select<4, 1, 8, 1>(0, 0); 56 matrix_ref<uint, 4, 8> TempResult4x8_Bottom = Result.select<4, 1, 8, 1>(4, 0); 57 #pragma unroll 58 for (uchar i = 0; i < 2; i++, DstY += 8) 59 { 60 // First 8x8 61 { 62 // R/G/B channel 1st half 63 matrix_ref<ushort, 1, 8> TempR0 = DataBuffer.select<1, 1, 8, 1>(8 * i, 0); 64 matrix_ref<ushort, 1, 8> TempR2 = DataBuffer.select<1, 1, 8, 1>(8 * i, 8); 65 matrix_ref<ushort, 1, 8> TempR4 = DataBuffer.select<1, 1, 8, 1>(8 * i, 16); 66 matrix_ref<ushort, 1, 8> TempR6 = DataBuffer.select<1, 1, 8, 1>(8 * i, 24); 67 68 matrix_ref<ushort, 1, 8> TempG0 = DataBuffer.select<1, 1, 8, 1>(8 * i, 32); 69 matrix_ref<ushort, 1, 8> TempG2 = DataBuffer.select<1, 1, 8, 1>(8 * i, 40); 70 matrix_ref<ushort, 1, 8> TempG4 = DataBuffer.select<1, 1, 8, 1>(8 * i, 48); 71 matrix_ref<ushort, 1, 8> TempG6 = DataBuffer.select<1, 1, 8, 1>(8 * i, 56); 72 73 matrix_ref<ushort, 1, 8> TempB0 = DataBuffer.select<1, 1, 8, 1>(8 * i + 1, 0); 74 matrix_ref<ushort, 1, 8> TempB2 = DataBuffer.select<1, 1, 8, 1>(8 * i + 1, 8); 75 matrix_ref<ushort, 1, 8> TempB4 = DataBuffer.select<1, 1, 8, 1>(8 * i + 1, 16); 76 matrix_ref<ushort, 1, 8> TempB6 = DataBuffer.select<1, 1, 8, 1>(8 * i + 1, 24); 77 78 // Rounding 79 TempR0 = cm_add<ushort>(TempR0, 0x20, SAT); 80 TempR0 = TempR0 & 0xFFC0; 81 TempR2 = cm_add<ushort>(TempR2, 0x20, SAT); 82 TempR2 = TempR2 & 0xFFC0; 83 TempR4 = cm_add<ushort>(TempR4, 0x20, SAT); 84 TempR4 = TempR4 & 0xFFC0; 85 TempR6 = cm_add<ushort>(TempR6, 0x20, SAT); 86 TempR6 = TempR6 & 0xFFC0; 87 88 TempG0 = cm_add<ushort>(TempG0, 0x20, SAT); 89 TempG0 = TempG0 & 0xFFC0; 90 TempG2 = cm_add<ushort>(TempG2, 0x20, SAT); 91 TempG2 = TempG2 & 0xFFC0; 92 TempG4 = cm_add<ushort>(TempG4, 0x20, SAT); 93 TempG4 = TempG4 & 0xFFC0; 94 TempG6 = cm_add<ushort>(TempG6, 0x20, SAT); 95 TempG6 = TempG6 & 0xFFC0; 96 97 TempB0 = cm_add<ushort>(TempB0, 0x20, SAT); 98 TempB0 = TempB0 & 0xFFC0; 99 TempB2 = cm_add<ushort>(TempB2, 0x20, SAT); 100 TempB2 = TempB2 & 0xFFC0; 101 TempB4 = cm_add<ushort>(TempB4, 0x20, SAT); 102 TempB4 = TempB4 & 0xFFC0; 103 TempB6 = cm_add<ushort>(TempB6, 0x20, SAT); 104 TempB6 = TempB6 & 0xFFC0; 105 106 if (ChannelSwap) 107 { 108 TempResult4x8_Top.select<1, 1, 8, 1>(0, 0) = TempB0 >> 6; 109 TempResult4x8_Top.select<1, 1, 8, 1>(0, 0) = TempResult4x8_Top.select<1, 1, 8, 1>(0, 0) + TempG0 * 0x10; 110 TempResult4x8_Top.select<1, 1, 8, 1>(0, 0) = TempResult4x8_Top.select<1, 1, 8, 1>(0, 0) + TempR0 * 0x4000; 111 TempResult4x8_Top.select<1, 1, 8, 1>(0, 0) = TempResult4x8_Top.select<1, 1, 8, 1>(0, 0) + TempA; 112 113 TempResult4x8_Top.select<1, 1, 8, 1>(1, 0) = TempB2 >> 6; 114 TempResult4x8_Top.select<1, 1, 8, 1>(1, 0) = TempResult4x8_Top.select<1, 1, 8, 1>(1, 0) + TempG2 * 0x10; 115 TempResult4x8_Top.select<1, 1, 8, 1>(1, 0) = TempResult4x8_Top.select<1, 1, 8, 1>(1, 0) + TempR2 * 0x4000; 116 TempResult4x8_Top.select<1, 1, 8, 1>(1, 0) = TempResult4x8_Top.select<1, 1, 8, 1>(1, 0) + TempA; 117 118 TempResult4x8_Top.select<1, 1, 8, 1>(2, 0) = TempB4 >> 6; 119 TempResult4x8_Top.select<1, 1, 8, 1>(2, 0) = TempResult4x8_Top.select<1, 1, 8, 1>(2, 0) + TempG4 * 0x10; 120 TempResult4x8_Top.select<1, 1, 8, 1>(2, 0) = TempResult4x8_Top.select<1, 1, 8, 1>(2, 0) + TempR4 * 0x4000; 121 TempResult4x8_Top.select<1, 1, 8, 1>(2, 0) = TempResult4x8_Top.select<1, 1, 8, 1>(2, 0) + TempA; 122 123 TempResult4x8_Top.select<1, 1, 8, 1>(3, 0) = TempB6 >> 6; 124 TempResult4x8_Top.select<1, 1, 8, 1>(3, 0) = TempResult4x8_Top.select<1, 1, 8, 1>(3, 0) + TempG6 * 0x10; 125 TempResult4x8_Top.select<1, 1, 8, 1>(3, 0) = TempResult4x8_Top.select<1, 1, 8, 1>(3, 0) + TempR6 * 0x4000; 126 TempResult4x8_Top.select<1, 1, 8, 1>(3, 0) = TempResult4x8_Top.select<1, 1, 8, 1>(3, 0) + TempA; 127 } 128 else 129 { 130 TempResult4x8_Top.select<1, 1, 8, 1>(0, 0) = TempR0 >> 6; 131 TempResult4x8_Top.select<1, 1, 8, 1>(0, 0) = TempResult4x8_Top.select<1, 1, 8, 1>(0, 0) + TempG0 * 0x10; 132 TempResult4x8_Top.select<1, 1, 8, 1>(0, 0) = TempResult4x8_Top.select<1, 1, 8, 1>(0, 0) + TempB0 * 0x4000; 133 TempResult4x8_Top.select<1, 1, 8, 1>(0, 0) = TempResult4x8_Top.select<1, 1, 8, 1>(0, 0) + TempA; 134 135 TempResult4x8_Top.select<1, 1, 8, 1>(1, 0) = TempR2 >> 6; 136 TempResult4x8_Top.select<1, 1, 8, 1>(1, 0) = TempResult4x8_Top.select<1, 1, 8, 1>(1, 0) + TempG2 * 0x10; 137 TempResult4x8_Top.select<1, 1, 8, 1>(1, 0) = TempResult4x8_Top.select<1, 1, 8, 1>(1, 0) + TempB2 * 0x4000; 138 TempResult4x8_Top.select<1, 1, 8, 1>(1, 0) = TempResult4x8_Top.select<1, 1, 8, 1>(1, 0) + TempA; 139 140 TempResult4x8_Top.select<1, 1, 8, 1>(2, 0) = TempR4 >> 6; 141 TempResult4x8_Top.select<1, 1, 8, 1>(2, 0) = TempResult4x8_Top.select<1, 1, 8, 1>(2, 0) + TempG4 * 0x10; 142 TempResult4x8_Top.select<1, 1, 8, 1>(2, 0) = TempResult4x8_Top.select<1, 1, 8, 1>(2, 0) + TempB4 * 0x4000; 143 TempResult4x8_Top.select<1, 1, 8, 1>(2, 0) = TempResult4x8_Top.select<1, 1, 8, 1>(2, 0) + TempA; 144 145 TempResult4x8_Top.select<1, 1, 8, 1>(3, 0) = TempR6 >> 6; 146 TempResult4x8_Top.select<1, 1, 8, 1>(3, 0) = TempResult4x8_Top.select<1, 1, 8, 1>(3, 0) + TempG6 * 0x10; 147 TempResult4x8_Top.select<1, 1, 8, 1>(3, 0) = TempResult4x8_Top.select<1, 1, 8, 1>(3, 0) + TempB6 * 0x4000; 148 TempResult4x8_Top.select<1, 1, 8, 1>(3, 0) = TempResult4x8_Top.select<1, 1, 8, 1>(3, 0) + TempA; 149 } 150 151 // R/G/B channel 2nd half 152 matrix_ref<ushort, 1, 8> TempR8 = DataBuffer.select<1, 1, 8, 1>(8 * i + 4, 0); 153 matrix_ref<ushort, 1, 8> TempR10 = DataBuffer.select<1, 1, 8, 1>(8 * i + 4, 8); 154 matrix_ref<ushort, 1, 8> TempR12 = DataBuffer.select<1, 1, 8, 1>(8 * i + 4, 16); 155 matrix_ref<ushort, 1, 8> TempR14 = DataBuffer.select<1, 1, 8, 1>(8 * i + 4, 24); 156 157 matrix_ref<ushort, 1, 8> TempG8 = DataBuffer.select<1, 1, 8, 1>(8 * i + 4, 32); 158 matrix_ref<ushort, 1, 8> TempG10 = DataBuffer.select<1, 1, 8, 1>(8 * i + 4, 40); 159 matrix_ref<ushort, 1, 8> TempG12 = DataBuffer.select<1, 1, 8, 1>(8 * i + 4, 48); 160 matrix_ref<ushort, 1, 8> TempG14 = DataBuffer.select<1, 1, 8, 1>(8 * i + 4, 56); 161 162 matrix_ref<ushort, 1, 8> TempB8 = DataBuffer.select<1, 1, 8, 1>(8 * i + 5, 0); 163 matrix_ref<ushort, 1, 8> TempB10 = DataBuffer.select<1, 1, 8, 1>(8 * i + 5, 8); 164 matrix_ref<ushort, 1, 8> TempB12 = DataBuffer.select<1, 1, 8, 1>(8 * i + 5, 16); 165 matrix_ref<ushort, 1, 8> TempB14 = DataBuffer.select<1, 1, 8, 1>(8 * i + 5, 24); 166 167 // Rounding 168 TempR8 = cm_add<ushort>(TempR8, 0x20, SAT); 169 TempR8 = TempR8 & 0xFFC0; 170 TempR10 = cm_add<ushort>(TempR10, 0x20, SAT); 171 TempR10 = TempR10 & 0xFFC0; 172 TempR12 = cm_add<ushort>(TempR12, 0x20, SAT); 173 TempR12 = TempR12 & 0xFFC0; 174 TempR14 = cm_add<ushort>(TempR14, 0x20, SAT); 175 TempR14 = TempR14 & 0xFFC0; 176 177 TempG8 = cm_add<ushort>(TempG8, 0x20, SAT); 178 TempG8 = TempG8 & 0xFFC0; 179 TempG10 = cm_add<ushort>(TempG10, 0x20, SAT); 180 TempG10 = TempG10 & 0xFFC0; 181 TempG12 = cm_add<ushort>(TempG12, 0x20, SAT); 182 TempG12 = TempG12 & 0xFFC0; 183 TempG14 = cm_add<ushort>(TempG14, 0x20, SAT); 184 TempG14 = TempG14 & 0xFFC0; 185 186 TempB8 = cm_add<ushort>(TempB8, 0x20, SAT); 187 TempB8 = TempB8 & 0xFFC0; 188 TempB10 = cm_add<ushort>(TempB10, 0x20, SAT); 189 TempB10 = TempB10 & 0xFFC0; 190 TempB12 = cm_add<ushort>(TempB12, 0x20, SAT); 191 TempB12 = TempB12 & 0xFFC0; 192 TempB14 = cm_add<ushort>(TempB14, 0x20, SAT); 193 TempB14 = TempB14 & 0xFFC0; 194 195 if (ChannelSwap) 196 { 197 TempResult4x8_Bottom.select<1, 1, 8, 1>(0, 0) = TempB8 >> 6; 198 TempResult4x8_Bottom.select<1, 1, 8, 1>(0, 0) = TempResult4x8_Bottom.select<1, 1, 8, 1>(0, 0) + TempG8 * 0x10; 199 TempResult4x8_Bottom.select<1, 1, 8, 1>(0, 0) = TempResult4x8_Bottom.select<1, 1, 8, 1>(0, 0) + TempR8 * 0x4000; 200 TempResult4x8_Bottom.select<1, 1, 8, 1>(0, 0) = TempResult4x8_Bottom.select<1, 1, 8, 1>(0, 0) + TempA; 201 202 TempResult4x8_Bottom.select<1, 1, 8, 1>(1, 0) = TempB10 >> 6; 203 TempResult4x8_Bottom.select<1, 1, 8, 1>(1, 0) = TempResult4x8_Bottom.select<1, 1, 8, 1>(1, 0) + TempG10 * 0x10; 204 TempResult4x8_Bottom.select<1, 1, 8, 1>(1, 0) = TempResult4x8_Bottom.select<1, 1, 8, 1>(1, 0) + TempR10 * 0x4000; 205 TempResult4x8_Bottom.select<1, 1, 8, 1>(1, 0) = TempResult4x8_Bottom.select<1, 1, 8, 1>(1, 0) + TempA; 206 207 TempResult4x8_Bottom.select<1, 1, 8, 1>(2, 0) = TempB12 >> 6; 208 TempResult4x8_Bottom.select<1, 1, 8, 1>(2, 0) = TempResult4x8_Bottom.select<1, 1, 8, 1>(2, 0) + TempG12 * 0x10; 209 TempResult4x8_Bottom.select<1, 1, 8, 1>(2, 0) = TempResult4x8_Bottom.select<1, 1, 8, 1>(2, 0) + TempR12 * 0x4000; 210 TempResult4x8_Bottom.select<1, 1, 8, 1>(2, 0) = TempResult4x8_Bottom.select<1, 1, 8, 1>(2, 0) + TempA; 211 212 TempResult4x8_Bottom.select<1, 1, 8, 1>(3, 0) = TempB14 >> 6; 213 TempResult4x8_Bottom.select<1, 1, 8, 1>(3, 0) = TempResult4x8_Bottom.select<1, 1, 8, 1>(3, 0) + TempG14 * 0x10; 214 TempResult4x8_Bottom.select<1, 1, 8, 1>(3, 0) = TempResult4x8_Bottom.select<1, 1, 8, 1>(3, 0) + TempR14 * 0x4000; 215 TempResult4x8_Bottom.select<1, 1, 8, 1>(3, 0) = TempResult4x8_Bottom.select<1, 1, 8, 1>(3, 0) + TempA; 216 } 217 else 218 { 219 TempResult4x8_Bottom.select<1, 1, 8, 1>(0, 0) = TempR8 >> 6; 220 TempResult4x8_Bottom.select<1, 1, 8, 1>(0, 0) = TempResult4x8_Bottom.select<1, 1, 8, 1>(0, 0) + TempG8 * 0x10; 221 TempResult4x8_Bottom.select<1, 1, 8, 1>(0, 0) = TempResult4x8_Bottom.select<1, 1, 8, 1>(0, 0) + TempB8 * 0x4000; 222 TempResult4x8_Bottom.select<1, 1, 8, 1>(0, 0) = TempResult4x8_Bottom.select<1, 1, 8, 1>(0, 0) + TempA; 223 224 TempResult4x8_Bottom.select<1, 1, 8, 1>(1, 0) = TempR10 >> 6; 225 TempResult4x8_Bottom.select<1, 1, 8, 1>(1, 0) = TempResult4x8_Bottom.select<1, 1, 8, 1>(1, 0) + TempG10 * 0x10; 226 TempResult4x8_Bottom.select<1, 1, 8, 1>(1, 0) = TempResult4x8_Bottom.select<1, 1, 8, 1>(1, 0) + TempB10 * 0x4000; 227 TempResult4x8_Bottom.select<1, 1, 8, 1>(1, 0) = TempResult4x8_Bottom.select<1, 1, 8, 1>(1, 0) + TempA; 228 229 TempResult4x8_Bottom.select<1, 1, 8, 1>(2, 0) = TempR12 >> 6; 230 TempResult4x8_Bottom.select<1, 1, 8, 1>(2, 0) = TempResult4x8_Bottom.select<1, 1, 8, 1>(2, 0) + TempG12 * 0x10; 231 TempResult4x8_Bottom.select<1, 1, 8, 1>(2, 0) = TempResult4x8_Bottom.select<1, 1, 8, 1>(2, 0) + TempB12 * 0x4000; 232 TempResult4x8_Bottom.select<1, 1, 8, 1>(2, 0) = TempResult4x8_Bottom.select<1, 1, 8, 1>(2, 0) + TempA; 233 234 TempResult4x8_Bottom.select<1, 1, 8, 1>(3, 0) = TempR14 >> 6; 235 TempResult4x8_Bottom.select<1, 1, 8, 1>(3, 0) = TempResult4x8_Bottom.select<1, 1, 8, 1>(3, 0) + TempG14 * 0x10; 236 TempResult4x8_Bottom.select<1, 1, 8, 1>(3, 0) = TempResult4x8_Bottom.select<1, 1, 8, 1>(3, 0) + TempB14 * 0x4000; 237 TempResult4x8_Bottom.select<1, 1, 8, 1>(3, 0) = TempResult4x8_Bottom.select<1, 1, 8, 1>(3, 0) + TempA; 238 } 239 240 Msg.select<1, 1, 1, 1>(0, 0) = DstX * 4; 241 Msg.select<1, 1, 1, 1>(0, 1) = DstY; 242 243 cm_send(NULL, 244 Msg, 245 nDATAPORT_DC1, 246 descriptor, 247 0); 248 } 249 250 // Second 8x8 251 { 252 // R/G/B channel 1st half 253 matrix_ref<ushort, 1, 8> TempR0 = DataBuffer.select<1, 1, 8, 1>(8 * i + 2, 0); 254 matrix_ref<ushort, 1, 8> TempR2 = DataBuffer.select<1, 1, 8, 1>(8 * i + 2, 8); 255 matrix_ref<ushort, 1, 8> TempR4 = DataBuffer.select<1, 1, 8, 1>(8 * i + 2, 16); 256 matrix_ref<ushort, 1, 8> TempR6 = DataBuffer.select<1, 1, 8, 1>(8 * i + 2, 24); 257 258 matrix_ref<ushort, 1, 8> TempG0 = DataBuffer.select<1, 1, 8, 1>(8 * i + 2, 32); 259 matrix_ref<ushort, 1, 8> TempG2 = DataBuffer.select<1, 1, 8, 1>(8 * i + 2, 40); 260 matrix_ref<ushort, 1, 8> TempG4 = DataBuffer.select<1, 1, 8, 1>(8 * i + 2, 48); 261 matrix_ref<ushort, 1, 8> TempG6 = DataBuffer.select<1, 1, 8, 1>(8 * i + 2, 56); 262 263 matrix_ref<ushort, 1, 8> TempB0 = DataBuffer.select<1, 1, 8, 1>(8 * i + 3, 0); 264 matrix_ref<ushort, 1, 8> TempB2 = DataBuffer.select<1, 1, 8, 1>(8 * i + 3, 8); 265 matrix_ref<ushort, 1, 8> TempB4 = DataBuffer.select<1, 1, 8, 1>(8 * i + 3, 16); 266 matrix_ref<ushort, 1, 8> TempB6 = DataBuffer.select<1, 1, 8, 1>(8 * i + 3, 24); 267 268 // Rounding 269 TempR0 = cm_add<ushort>(TempR0, 0x20, SAT); 270 TempR0 = TempR0 & 0xFFC0; 271 TempR2 = cm_add<ushort>(TempR2, 0x20, SAT); 272 TempR2 = TempR2 & 0xFFC0; 273 TempR4 = cm_add<ushort>(TempR4, 0x20, SAT); 274 TempR4 = TempR4 & 0xFFC0; 275 TempR6 = cm_add<ushort>(TempR6, 0x20, SAT); 276 TempR6 = TempR6 & 0xFFC0; 277 278 TempG0 = cm_add<ushort>(TempG0, 0x20, SAT); 279 TempG0 = TempG0 & 0xFFC0; 280 TempG2 = cm_add<ushort>(TempG2, 0x20, SAT); 281 TempG2 = TempG2 & 0xFFC0; 282 TempG4 = cm_add<ushort>(TempG4, 0x20, SAT); 283 TempG4 = TempG4 & 0xFFC0; 284 TempG6 = cm_add<ushort>(TempG6, 0x20, SAT); 285 TempG6 = TempG6 & 0xFFC0; 286 287 TempB0 = cm_add<ushort>(TempB0, 0x20, SAT); 288 TempB0 = TempB0 & 0xFFC0; 289 TempB2 = cm_add<ushort>(TempB2, 0x20, SAT); 290 TempB2 = TempB2 & 0xFFC0; 291 TempB4 = cm_add<ushort>(TempB4, 0x20, SAT); 292 TempB4 = TempB4 & 0xFFC0; 293 TempB6 = cm_add<ushort>(TempB6, 0x20, SAT); 294 TempB6 = TempB6 & 0xFFC0; 295 296 if (ChannelSwap) 297 { 298 TempResult4x8_Top.select<1, 1, 8, 1>(0, 0) = TempB0 >> 6; 299 TempResult4x8_Top.select<1, 1, 8, 1>(0, 0) = TempResult4x8_Top.select<1, 1, 8, 1>(0, 0) + TempG0 * 0x10; 300 TempResult4x8_Top.select<1, 1, 8, 1>(0, 0) = TempResult4x8_Top.select<1, 1, 8, 1>(0, 0) + TempR0 * 0x4000; 301 TempResult4x8_Top.select<1, 1, 8, 1>(0, 0) = TempResult4x8_Top.select<1, 1, 8, 1>(0, 0) + TempA; 302 303 TempResult4x8_Top.select<1, 1, 8, 1>(1, 0) = TempB2 >> 6; 304 TempResult4x8_Top.select<1, 1, 8, 1>(1, 0) = TempResult4x8_Top.select<1, 1, 8, 1>(1, 0) + TempG2 * 0x10; 305 TempResult4x8_Top.select<1, 1, 8, 1>(1, 0) = TempResult4x8_Top.select<1, 1, 8, 1>(1, 0) + TempR2 * 0x4000; 306 TempResult4x8_Top.select<1, 1, 8, 1>(1, 0) = TempResult4x8_Top.select<1, 1, 8, 1>(1, 0) + TempA; 307 308 TempResult4x8_Top.select<1, 1, 8, 1>(2, 0) = TempB4 >> 6; 309 TempResult4x8_Top.select<1, 1, 8, 1>(2, 0) = TempResult4x8_Top.select<1, 1, 8, 1>(2, 0) + TempG4 * 0x10; 310 TempResult4x8_Top.select<1, 1, 8, 1>(2, 0) = TempResult4x8_Top.select<1, 1, 8, 1>(2, 0) + TempR4 * 0x4000; 311 TempResult4x8_Top.select<1, 1, 8, 1>(2, 0) = TempResult4x8_Top.select<1, 1, 8, 1>(2, 0) + TempA; 312 313 TempResult4x8_Top.select<1, 1, 8, 1>(3, 0) = TempB6 >> 6; 314 TempResult4x8_Top.select<1, 1, 8, 1>(3, 0) = TempResult4x8_Top.select<1, 1, 8, 1>(3, 0) + TempG6 * 0x10; 315 TempResult4x8_Top.select<1, 1, 8, 1>(3, 0) = TempResult4x8_Top.select<1, 1, 8, 1>(3, 0) + TempR6 * 0x4000; 316 TempResult4x8_Top.select<1, 1, 8, 1>(3, 0) = TempResult4x8_Top.select<1, 1, 8, 1>(3, 0) + TempA; 317 } 318 else 319 { 320 TempResult4x8_Top.select<1, 1, 8, 1>(0, 0) = TempR0 >> 6; 321 TempResult4x8_Top.select<1, 1, 8, 1>(0, 0) = TempResult4x8_Top.select<1, 1, 8, 1>(0, 0) + TempG0 * 0x10; 322 TempResult4x8_Top.select<1, 1, 8, 1>(0, 0) = TempResult4x8_Top.select<1, 1, 8, 1>(0, 0) + TempB0 * 0x4000; 323 TempResult4x8_Top.select<1, 1, 8, 1>(0, 0) = TempResult4x8_Top.select<1, 1, 8, 1>(0, 0) + TempA; 324 325 TempResult4x8_Top.select<1, 1, 8, 1>(1, 0) = TempR2 >> 6; 326 TempResult4x8_Top.select<1, 1, 8, 1>(1, 0) = TempResult4x8_Top.select<1, 1, 8, 1>(1, 0) + TempG2 * 0x10; 327 TempResult4x8_Top.select<1, 1, 8, 1>(1, 0) = TempResult4x8_Top.select<1, 1, 8, 1>(1, 0) + TempB2 * 0x4000; 328 TempResult4x8_Top.select<1, 1, 8, 1>(1, 0) = TempResult4x8_Top.select<1, 1, 8, 1>(1, 0) + TempA; 329 330 TempResult4x8_Top.select<1, 1, 8, 1>(2, 0) = TempR4 >> 6; 331 TempResult4x8_Top.select<1, 1, 8, 1>(2, 0) = TempResult4x8_Top.select<1, 1, 8, 1>(2, 0) + TempG4 * 0x10; 332 TempResult4x8_Top.select<1, 1, 8, 1>(2, 0) = TempResult4x8_Top.select<1, 1, 8, 1>(2, 0) + TempB4 * 0x4000; 333 TempResult4x8_Top.select<1, 1, 8, 1>(2, 0) = TempResult4x8_Top.select<1, 1, 8, 1>(2, 0) + TempA; 334 335 TempResult4x8_Top.select<1, 1, 8, 1>(3, 0) = TempR6 >> 6; 336 TempResult4x8_Top.select<1, 1, 8, 1>(3, 0) = TempResult4x8_Top.select<1, 1, 8, 1>(3, 0) + TempG6 * 0x10; 337 TempResult4x8_Top.select<1, 1, 8, 1>(3, 0) = TempResult4x8_Top.select<1, 1, 8, 1>(3, 0) + TempB6 * 0x4000; 338 TempResult4x8_Top.select<1, 1, 8, 1>(3, 0) = TempResult4x8_Top.select<1, 1, 8, 1>(3, 0) + TempA; 339 } 340 341 // R/G/B channel 2nd half 342 matrix_ref<ushort, 1, 8> TempR8 = DataBuffer.select<1, 1, 8, 1>(8 * i + 6, 0); 343 matrix_ref<ushort, 1, 8> TempR10 = DataBuffer.select<1, 1, 8, 1>(8 * i + 6, 8); 344 matrix_ref<ushort, 1, 8> TempR12 = DataBuffer.select<1, 1, 8, 1>(8 * i + 6, 16); 345 matrix_ref<ushort, 1, 8> TempR14 = DataBuffer.select<1, 1, 8, 1>(8 * i + 6, 24); 346 347 matrix_ref<ushort, 1, 8> TempG8 = DataBuffer.select<1, 1, 8, 1>(8 * i + 6, 32); 348 matrix_ref<ushort, 1, 8> TempG10 = DataBuffer.select<1, 1, 8, 1>(8 * i + 6, 40); 349 matrix_ref<ushort, 1, 8> TempG12 = DataBuffer.select<1, 1, 8, 1>(8 * i + 6, 48); 350 matrix_ref<ushort, 1, 8> TempG14 = DataBuffer.select<1, 1, 8, 1>(8 * i + 6, 56); 351 352 matrix_ref<ushort, 1, 8> TempB8 = DataBuffer.select<1, 1, 8, 1>(8 * i + 7, 0); 353 matrix_ref<ushort, 1, 8> TempB10 = DataBuffer.select<1, 1, 8, 1>(8 * i + 7, 8); 354 matrix_ref<ushort, 1, 8> TempB12 = DataBuffer.select<1, 1, 8, 1>(8 * i + 7, 16); 355 matrix_ref<ushort, 1, 8> TempB14 = DataBuffer.select<1, 1, 8, 1>(8 * i + 7, 24); 356 357 // Rounding 358 TempR8 = cm_add<ushort>(TempR8, 0x20, SAT); 359 TempR8 = TempR8 & 0xFFC0; 360 TempR10 = cm_add<ushort>(TempR10, 0x20, SAT); 361 TempR10 = TempR10 & 0xFFC0; 362 TempR12 = cm_add<ushort>(TempR12, 0x20, SAT); 363 TempR12 = TempR12 & 0xFFC0; 364 TempR14 = cm_add<ushort>(TempR14, 0x20, SAT); 365 TempR14 = TempR14 & 0xFFC0; 366 367 TempG8 = cm_add<ushort>(TempG8, 0x20, SAT); 368 TempG8 = TempG8 & 0xFFC0; 369 TempG10 = cm_add<ushort>(TempG10, 0x20, SAT); 370 TempG10 = TempG10 & 0xFFC0; 371 TempG12 = cm_add<ushort>(TempG12, 0x20, SAT); 372 TempG12 = TempG12 & 0xFFC0; 373 TempG14 = cm_add<ushort>(TempG14, 0x20, SAT); 374 TempG14 = TempG14 & 0xFFC0; 375 376 TempB8 = cm_add<ushort>(TempB8, 0x20, SAT); 377 TempB8 = TempB8 & 0xFFC0; 378 TempB10 = cm_add<ushort>(TempB10, 0x20, SAT); 379 TempB10 = TempB10 & 0xFFC0; 380 TempB12 = cm_add<ushort>(TempB12, 0x20, SAT); 381 TempB12 = TempB12 & 0xFFC0; 382 TempB14 = cm_add<ushort>(TempB14, 0x20, SAT); 383 TempB14 = TempB14 & 0xFFC0; 384 385 if (ChannelSwap) 386 { 387 TempResult4x8_Bottom.select<1, 1, 8, 1>(0, 0) = TempB8 >> 6; 388 TempResult4x8_Bottom.select<1, 1, 8, 1>(0, 0) = TempResult4x8_Bottom.select<1, 1, 8, 1>(0, 0) + TempG8 * 0x10; 389 TempResult4x8_Bottom.select<1, 1, 8, 1>(0, 0) = TempResult4x8_Bottom.select<1, 1, 8, 1>(0, 0) + TempR8 * 0x4000; 390 TempResult4x8_Bottom.select<1, 1, 8, 1>(0, 0) = TempResult4x8_Bottom.select<1, 1, 8, 1>(0, 0) + TempA; 391 392 TempResult4x8_Bottom.select<1, 1, 8, 1>(1, 0) = TempB10 >> 6; 393 TempResult4x8_Bottom.select<1, 1, 8, 1>(1, 0) = TempResult4x8_Bottom.select<1, 1, 8, 1>(1, 0) + TempG10 * 0x10; 394 TempResult4x8_Bottom.select<1, 1, 8, 1>(1, 0) = TempResult4x8_Bottom.select<1, 1, 8, 1>(1, 0) + TempR10 * 0x4000; 395 TempResult4x8_Bottom.select<1, 1, 8, 1>(1, 0) = TempResult4x8_Bottom.select<1, 1, 8, 1>(1, 0) + TempA; 396 397 TempResult4x8_Bottom.select<1, 1, 8, 1>(2, 0) = TempB12 >> 6; 398 TempResult4x8_Bottom.select<1, 1, 8, 1>(2, 0) = TempResult4x8_Bottom.select<1, 1, 8, 1>(2, 0) + TempG12 * 0x10; 399 TempResult4x8_Bottom.select<1, 1, 8, 1>(2, 0) = TempResult4x8_Bottom.select<1, 1, 8, 1>(2, 0) + TempR12 * 0x4000; 400 TempResult4x8_Bottom.select<1, 1, 8, 1>(2, 0) = TempResult4x8_Bottom.select<1, 1, 8, 1>(2, 0) + TempA; 401 402 TempResult4x8_Bottom.select<1, 1, 8, 1>(3, 0) = TempB14 >> 6; 403 TempResult4x8_Bottom.select<1, 1, 8, 1>(3, 0) = TempResult4x8_Bottom.select<1, 1, 8, 1>(3, 0) + TempG14 * 0x10; 404 TempResult4x8_Bottom.select<1, 1, 8, 1>(3, 0) = TempResult4x8_Bottom.select<1, 1, 8, 1>(3, 0) + TempR14 * 0x4000; 405 TempResult4x8_Bottom.select<1, 1, 8, 1>(3, 0) = TempResult4x8_Bottom.select<1, 1, 8, 1>(3, 0) + TempA; 406 } 407 else 408 { 409 TempResult4x8_Bottom.select<1, 1, 8, 1>(0, 0) = TempR8 >> 6; 410 TempResult4x8_Bottom.select<1, 1, 8, 1>(0, 0) = TempResult4x8_Bottom.select<1, 1, 8, 1>(0, 0) + TempG8 * 0x10; 411 TempResult4x8_Bottom.select<1, 1, 8, 1>(0, 0) = TempResult4x8_Bottom.select<1, 1, 8, 1>(0, 0) + TempB8 * 0x4000; 412 TempResult4x8_Bottom.select<1, 1, 8, 1>(0, 0) = TempResult4x8_Bottom.select<1, 1, 8, 1>(0, 0) + TempA; 413 414 TempResult4x8_Bottom.select<1, 1, 8, 1>(1, 0) = TempR10 >> 6; 415 TempResult4x8_Bottom.select<1, 1, 8, 1>(1, 0) = TempResult4x8_Bottom.select<1, 1, 8, 1>(1, 0) + TempG10 * 0x10; 416 TempResult4x8_Bottom.select<1, 1, 8, 1>(1, 0) = TempResult4x8_Bottom.select<1, 1, 8, 1>(1, 0) + TempB10 * 0x4000; 417 TempResult4x8_Bottom.select<1, 1, 8, 1>(1, 0) = TempResult4x8_Bottom.select<1, 1, 8, 1>(1, 0) + TempA; 418 419 TempResult4x8_Bottom.select<1, 1, 8, 1>(2, 0) = TempR12 >> 6; 420 TempResult4x8_Bottom.select<1, 1, 8, 1>(2, 0) = TempResult4x8_Bottom.select<1, 1, 8, 1>(2, 0) + TempG12 * 0x10; 421 TempResult4x8_Bottom.select<1, 1, 8, 1>(2, 0) = TempResult4x8_Bottom.select<1, 1, 8, 1>(2, 0) + TempB12 * 0x4000; 422 TempResult4x8_Bottom.select<1, 1, 8, 1>(2, 0) = TempResult4x8_Bottom.select<1, 1, 8, 1>(2, 0) + TempA; 423 424 TempResult4x8_Bottom.select<1, 1, 8, 1>(3, 0) = TempR14 >> 6; 425 TempResult4x8_Bottom.select<1, 1, 8, 1>(3, 0) = TempResult4x8_Bottom.select<1, 1, 8, 1>(3, 0) + TempG14 * 0x10; 426 TempResult4x8_Bottom.select<1, 1, 8, 1>(3, 0) = TempResult4x8_Bottom.select<1, 1, 8, 1>(3, 0) + TempB14 * 0x4000; 427 TempResult4x8_Bottom.select<1, 1, 8, 1>(3, 0) = TempResult4x8_Bottom.select<1, 1, 8, 1>(3, 0) + TempA; 428 } 429 430 Msg.select<1, 1, 1, 1>(0, 0) = DstX * 4 + 32; 431 Msg.select<1, 1, 1, 1>(0, 1) = DstY; 432 433 cm_send(NULL, 434 Msg, 435 nDATAPORT_DC1, 436 descriptor, 437 0); 438 } 439 } 440 //} 441 }