1 /* 2 * Copyright (c) 2019, Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included 12 * in all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 #if (defined BUFFER_0) || (defined BUFFER_1) || (defined BUFFER_2) || (defined BUFFER_3) 23 vector <ushort, 16> LoadMaskTemp = (TempMask0.select<1, 1, 16, 1>(0, 0) != 0); 24 #elif (defined BUFFER_4) || (defined BUFFER_5) 25 uchar sec_half_shift = (Layer_Index >> 7) * 8; 26 vector <ushort, 16> LoadMaskTemp = (TempMask.select<16, 1>(0) != 0); 27 #endif 28 29 #if defined BUFFER_0 30 Buffer_Index = 0; 31 CalculationMask = cm_pack_mask(LoadMaskTemp) & 0x000F; 32 #elif defined BUFFER_1 33 Buffer_Index = 1; 34 CalculationMask = (cm_pack_mask(LoadMaskTemp) >> 4) & 0x000F; 35 #elif defined BUFFER_2 36 Buffer_Index = 2; 37 CalculationMask = (cm_pack_mask(LoadMaskTemp) >> 8) & 0x000F; 38 #elif defined BUFFER_3 39 Buffer_Index = 3; 40 CalculationMask = (cm_pack_mask(LoadMaskTemp) >> 12) & 0x000F; 41 #elif defined BUFFER_4 42 CalculationMask = (cm_pack_mask(LoadMaskTemp) >> sec_half_shift) & 0x000F; 43 Buffer_Index = 4; 44 #elif defined BUFFER_5 45 CalculationMask = (cm_pack_mask(LoadMaskTemp) >> (4 + sec_half_shift)) & 0x000F; 46 Buffer_Index = 5; 47 #endif 48 49 if (CalculationMask != 0) 50 { 51 float StartX; 52 float StartY; 53 float DeltaX; 54 float DeltaY; 55 uchar RotationFlag; 56 57 /* 58 AVS Sampler 16x4 write back buffer layout for R/V, G/Y, B/U channel, each box stands for 8x1 ushort write back pixel 59 16x4 pixle deviding to 8 8x1 pixel block 60 _________________________________________________ 61 |_______Block0__________|_______Block1__________| 62 |_______Block2__________|_______Block3__________| 63 |_______Block4__________|_______Block5__________| 64 |_______Block6__________|_______Block7__________| 65 66 Write back buffer layout correlate to the block number#, each box stands for 1 GRF 67 _______________________________________________ 68 |____R0_________R2_____|____R4_________R6_____| 69 |____G0_________G2_____|____G4_________G6_____| 70 |____B0_________B2_____|____B4_________B6_____| 71 |____A0_________A2_____|____A4_________A6_____| 72 |____R1_________R3_____|____R5_________R7_____| 73 |____G1_________G3_____|____G5_________G7_____| 74 |____B1_________B3_____|____B5_________B7_____| 75 |____A1_________A3_____|____A5_________A7_____| 76 */ 77 matrix <float, 5, 8> mesg; 78 matrix <float, 2, 16> temp_rg; 79 80 mesg.select<1, 1, 8, 1>(0, 0).format<uint>() = cm_get_r0<uint>(); 81 uint desc_y; 82 uchar Layer_Index_45; 83 84 #ifdef BUFFER_0 85 #define INIT_INCREMENT 0.0f 86 #elif defined BUFFER_1 87 #ifdef ROTATE_90 88 #define INIT_INCREMENT 4.0f 89 #elif defined ROTATE_180 90 #define INIT_INCREMENT -4.0f 91 #elif defined ROTATE_270 92 #define INIT_INCREMENT -4.0f 93 #else 94 #define INIT_INCREMENT 4.0f 95 #endif 96 #elif defined BUFFER_2 97 #ifdef ROTATE_90 98 #define INIT_INCREMENT 8.0f 99 #elif defined ROTATE_180 100 #define INIT_INCREMENT -8.0f 101 #elif defined ROTATE_270 102 #define INIT_INCREMENT -8.0f 103 #else 104 #define INIT_INCREMENT 8.0f 105 #endif 106 #elif defined BUFFER_3 107 #ifdef ROTATE_90 108 #define INIT_INCREMENT 12.0f 109 #elif defined ROTATE_180 110 #define INIT_INCREMENT -12.0f 111 #elif defined ROTATE_270 112 #define INIT_INCREMENT -12.0f 113 #else 114 #define INIT_INCREMENT 12.0f 115 #endif 116 #elif defined BUFFER_4 117 #define INIT_INCREMENT 0.0f 118 #elif defined BUFFER_5 119 #ifdef ROTATE_90 120 #define INIT_INCREMENT 4.0f 121 #elif defined ROTATE_180 122 #define INIT_INCREMENT -4.0f 123 #elif defined ROTATE_270 124 #define INIT_INCREMENT -4.0f 125 #else 126 #define INIT_INCREMENT 4.0f 127 #endif 128 #endif 129 130 #if (defined BUFFER_0) || (defined BUFFER_1) || (defined BUFFER_2) || (defined BUFFER_3) 131 #ifdef ROTATE_90 132 StartX = Start_X(0); 133 StartY = Start_Y(0); 134 DeltaX = Delta_X(0); 135 DeltaY = Delta_Y(0); 136 RotationFlag = (uchar)(RotationChromaSitingFlag & 0x07); 137 138 StartX = StartX + DstY * DeltaX; 139 140 if (RotationFlag == MDF_FC_ROTATE_90_MIRROR_HORIZONTAL) 141 { 142 StartY = StartY + (DstX + MDF_FC_BLOCK_WIDTH) * DeltaY; 143 } 144 else 145 { 146 StartY = StartY + (Dst_Width - DstX) * DeltaY; 147 } 148 149 vector <float, 16> IncrementX(INIT_INCREMENT); 150 vector <float, 16> IncrementY(Dec); 151 #elif defined ROTATE_180 152 StartX = Start_X(0); 153 StartY = Start_Y(0); 154 DeltaX = Delta_X(0); 155 DeltaY = Delta_Y(0); 156 157 RotationFlag = (uchar)(RotationChromaSitingFlag & 0x07); 158 if (RotationFlag == MDF_FC_MIRROR_VERTICAL) 159 { 160 StartX += (DstX + MDF_FC_BLOCK_WIDTH) * DeltaX; 161 } 162 else 163 { 164 StartX += (Dst_Width - DstX) * DeltaX; 165 } 166 167 StartY = StartY + ((Dst_Height - DstY - 1)) * DeltaY; 168 169 vector <float, 16> IncrementX(Dec); 170 vector <float, 16> IncrementY(INIT_INCREMENT); 171 #elif defined ROTATE_270 172 StartX = Start_X(0); 173 StartY = Start_Y(0); 174 DeltaX = Delta_X(0); 175 DeltaY = Delta_Y(0); 176 RotationFlag = (uchar)(RotationChromaSitingFlag & 0x07); 177 178 StartX = StartX + (Dst_Height - DstY - 1) * DeltaX; 179 180 if (RotationFlag == MDF_FC_ROTATE_90_MIRROR_VERTICAL) 181 { 182 StartY = StartY + (Dst_Width - MDF_FC_BLOCK_WIDTH - DstX) * DeltaY; 183 } 184 else 185 { 186 StartY = StartY + DstX * DeltaY; 187 } 188 189 vector <float, 16> IncrementX(INIT_INCREMENT); 190 vector <float, 16> IncrementY(Inc); 191 #else 192 StartX = Start_X(0); 193 StartY = Start_Y(0); 194 DeltaX = Delta_X(0); 195 DeltaY = Delta_Y(0); 196 197 RotationFlag = (uchar)(RotationChromaSitingFlag & 0x07); 198 if (RotationFlag == MDF_FC_MIRROR_HORIZONTAL) 199 { 200 StartX += (Dst_Width - MDF_FC_BLOCK_WIDTH - DstX) * DeltaX; 201 } 202 else 203 { 204 StartX += DstX * DeltaX; 205 } 206 207 StartY += DstY * DeltaY; 208 209 vector <float, 16> IncrementX(Inc); 210 vector <float, 16> IncrementY(INIT_INCREMENT); 211 #endif 212 #endif 213 214 #if (defined BUFFER_4) || (defined BUFFER_5) 215 #ifdef ROTATE_90 216 Layer_Index_45 = Layer_Index & 0x7f; 217 StartX = Start_X(Layer_Index_45); 218 StartY = Start_Y(Layer_Index_45); 219 DeltaX = Delta_X(Layer_Index_45); 220 DeltaY = Delta_Y(Layer_Index_45); 221 222 StartX = StartX + (DstY + 8 * (Layer_Index >> 7)) * DeltaX; 223 RotationFlag = (uchar)((RotationChromaSitingFlag >> (3 * Layer_Index_45)) & 0x07); 224 225 if (RotationFlag == MDF_FC_ROTATE_90_MIRROR_HORIZONTAL) 226 { 227 StartY = StartY + (DstX + MDF_FC_BLOCK_WIDTH) * DeltaY; 228 } 229 else 230 { 231 StartY = StartY + (Dst_Width - DstX) * DeltaY; 232 } 233 234 vector <float, 16> IncrementX(INIT_INCREMENT); 235 vector <float, 16> IncrementY(Dec); 236 #elif defined ROTATE_180 237 Layer_Index_45 = Layer_Index & 0x7f; 238 StartX = Start_X(Layer_Index_45); 239 StartY = Start_Y(Layer_Index_45); 240 DeltaX = Delta_X(Layer_Index_45); 241 DeltaY = Delta_Y(Layer_Index_45); 242 243 RotationFlag = (uchar)((RotationChromaSitingFlag >> (3 * Layer_Index_45)) & 0x07); 244 if (RotationFlag == MDF_FC_MIRROR_VERTICAL) 245 { 246 StartX += (DstX + MDF_FC_BLOCK_WIDTH) * DeltaX; 247 } 248 else 249 { 250 StartX += (Dst_Width - DstX) * DeltaX; 251 } 252 253 StartY = StartY + (Dst_Height - 1 - DstY - 8 * (Layer_Index >> 7)) * DeltaY; 254 255 vector <float, 16> IncrementX(Dec); 256 vector <float, 16> IncrementY(INIT_INCREMENT); 257 #elif defined ROTATE_270 258 Layer_Index_45 = Layer_Index & 0x7f; 259 260 StartX = Start_X(Layer_Index_45); 261 StartY = Start_Y(Layer_Index_45); 262 DeltaX = Delta_X(Layer_Index_45); 263 DeltaY = Delta_Y(Layer_Index_45); 264 265 RotationFlag = (uchar)((RotationChromaSitingFlag >> (3 * Layer_Index_45)) & 0x07); 266 267 StartX = StartX + (Dst_Height - DstY - 1 - 8 * (Layer_Index >> 7)) * DeltaX; 268 if (RotationFlag == MDF_FC_ROTATE_90_MIRROR_VERTICAL) 269 { 270 StartY = StartY + (Dst_Width - MDF_FC_BLOCK_WIDTH - DstX) * DeltaY; 271 } 272 else 273 { 274 StartY = StartY + DstX * DeltaY; 275 } 276 277 vector <float, 16> IncrementX(INIT_INCREMENT); 278 vector <float, 16> IncrementY(Inc); 279 #else 280 Layer_Index_45 = Layer_Index & 0x7f; 281 StartX = Start_X(Layer_Index_45); 282 StartY = Start_Y(Layer_Index_45); 283 DeltaX = Delta_X(Layer_Index_45); 284 DeltaY = Delta_Y(Layer_Index_45); 285 RotationFlag = (uchar)((RotationChromaSitingFlag >> (3 * Layer_Index_45)) & 0x07); 286 if (RotationFlag == MDF_FC_MIRROR_HORIZONTAL) 287 { 288 StartX += (Dst_Width - MDF_FC_BLOCK_WIDTH - DstX) * DeltaX; 289 } 290 else 291 { 292 StartX += DstX * DeltaX; 293 } 294 295 StartY = StartY + (DstY + (Layer_Index >> 7) * 8) * DeltaY; 296 297 vector <float, 16> IncrementX(Inc); 298 vector <float, 16> IncrementY(INIT_INCREMENT); 299 #endif 300 #endif 301 302 #ifdef BUFFER_0 303 #define WriteBackBuffer DataBuffer0 304 #endif 305 #ifdef BUFFER_1 306 #define WriteBackBuffer DataBuffer1 307 #endif 308 #ifdef BUFFER_2 309 #define WriteBackBuffer DataBuffer2 310 #endif 311 #ifdef BUFFER_3 312 #define WriteBackBuffer DataBuffer3 313 #endif 314 #ifdef BUFFER_4 315 #define WriteBackBuffer DataBuffer4 316 #endif 317 #ifdef BUFFER_5 318 #define WriteBackBuffer DataBuffer5 319 #endif 320 321 mesg.format<uint, 5, 8>().select<1, 1, 8, 1>(0, 0) = cm_get_r0<uint>(); 322 323 #pragma unroll 324 for (short j = 0; j < 4; j++) 325 { 326 // Y channel 327 mesg.format<uint, 5, 8>().select<1, 1, 1, 1>(0, 2) = nSMPL_RED_CHANNEL_444_16; 328 329 mesg.select<1, 1, 8, 1>(1, 0) = StartX + IncrementX.select<8, 1>(0) * DeltaX; 330 mesg.select<1, 1, 8, 1>(2, 0) = StartX + IncrementX.select<8, 1>(8) * DeltaX; 331 332 mesg.select<1, 1, 8, 1>(3, 0) = StartY + IncrementY.select<8, 1>(0) * DeltaY; 333 mesg.select<1, 1, 8, 1>(4, 0) = StartY + IncrementY.select<8, 1>(8) * DeltaY; 334 335 #if (defined BUFFER_0) || (defined BUFFER_1) || (defined BUFFER_2) || (defined BUFFER_3) 336 desc_y = nSIMD16_0X_034X_MSG_DSC_1CH + (MDF_FC_3D_SAMPLER_SI_Y << 8) + MDF_FC_INPUT_BTI_START; 337 #endif 338 #if (defined BUFFER_4) || (defined BUFFER_5) 339 desc_y = nSIMD16_0X_034X_MSG_DSC_1CH + (MDF_FC_3D_SAMPLER_SI_Y << 8) + MDF_FC_INPUT_BTI_START + MDF_FC_INPUT_BTI_PER_LAYER * Layer_Index_45; 340 #endif 341 342 cm_send( 343 temp_rg.select<1, 1, 16, 1>(0, 0), 344 mesg.format<ushort, 5, 16>(), 345 nSMPL_ENGINE, 346 desc_y, 347 0); 348 349 vector<short, 4> row_offset(Row_Offset); 350 vector<short, 4> row_offset1(Row_offset1); 351 vector<short, 4> colonm_offset(Colomn_Offset); 352 353 // Y 354 WriteBackBuffer.format<ushort, 16, 16>().select<1, 1, 8, 1>(row_offset[j] + 2, Colomn_Offset[j]) = matrix<ushort, 1, 8>(temp_rg.select<1, 1, 8, 1>(0, 0) * MDF_FC_NORMALIZE_FACTOR, SAT); 355 WriteBackBuffer.format<ushort, 16, 16>().select<1, 1, 8, 1>(row_offset1[j] + 2, Colomn_Offset[j]) = matrix<ushort, 1, 8>(temp_rg.select<1, 1, 8, 1>(0, 8) * MDF_FC_NORMALIZE_FACTOR, SAT); 356 357 // UV channel 358 mesg.format<uint, 5, 8>().select<1, 1, 1, 1>(0, 2) = nSMPL_RED_GREEN_CHANNEL_444_16; 359 360 #if (defined BUFFER_0) || (defined BUFFER_1) || (defined BUFFER_2) || (defined BUFFER_3) 361 desc_y = nSIMD16_0X_034X_MSG_DSC_2CH + (MDF_FC_3D_SAMPLER_SI_U << 8) + MDF_FC_INPUT_BTI_START + MDF_FC_UV_PLANE_BTI_OFFSET; 362 #endif 363 #if (defined BUFFER_4) || (defined BUFFER_5) 364 desc_y = nSIMD16_0X_034X_MSG_DSC_2CH + (MDF_FC_3D_SAMPLER_SI_U << 8) + MDF_FC_INPUT_BTI_START + MDF_FC_INPUT_BTI_PER_LAYER * Layer_Index_45 + MDF_FC_UV_PLANE_BTI_OFFSET; 365 #endif 366 cm_send(temp_rg, 367 mesg.format<ushort, 5, 16>(), 368 nSMPL_ENGINE, 369 desc_y, 370 0); 371 372 // R 373 WriteBackBuffer.format<ushort, 16, 16>().select<1, 1, 8, 1>(row_offset[j], Colomn_Offset[j]) = matrix<ushort, 1, 8>(temp_rg.select<1, 1, 8, 1>(1, 0) * MDF_FC_NORMALIZE_FACTOR, SAT); 374 WriteBackBuffer.format<ushort, 16, 16>().select<1, 1, 8, 1>(row_offset1[j], Colomn_Offset[j]) = matrix<ushort, 1, 8>(temp_rg.select<1, 1, 8, 1>(1, 8) * MDF_FC_NORMALIZE_FACTOR, SAT); 375 // B 376 WriteBackBuffer.format<ushort, 16, 16>().select<1, 1, 8, 1>(row_offset[j] + 4, Colomn_Offset[j]) = matrix<ushort, 1, 8>(temp_rg.select<1, 1, 8, 1>(0, 0) * MDF_FC_NORMALIZE_FACTOR, SAT); 377 WriteBackBuffer.format<ushort, 16, 16>().select<1, 1, 8, 1>(row_offset1[j] + 4, Colomn_Offset[j]) = matrix<ushort, 1, 8>(temp_rg.select<1, 1, 8, 1>(0, 8) * MDF_FC_NORMALIZE_FACTOR, SAT); 378 379 #ifdef ROTATE_90 380 IncrementX = IncrementX + 1.0f; 381 #elif defined ROTATE_180 382 IncrementY = IncrementY - 1.0f; 383 #elif defined ROTATE_270 384 IncrementX = IncrementX - 1.0f; 385 #else 386 IncrementY = IncrementY + 1.0f; 387 #endif 388 } 389 390 WriteBackBuffer.format<ushort, 16, 16>().select<2, 1, 16, 1>(6, 0) = 0xffff; 391 WriteBackBuffer.format<ushort, 16, 16>().select<2, 1, 16, 1>(14, 0) = 0xffff; 392 #undef WriteBackBuffer 393 } 394