1 /* 2 * Copyright (c) 2019, Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included 12 * in all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 #if (defined BUFFER_0) || (defined BUFFER_1) || (defined BUFFER_2) || (defined BUFFER_3) 23 vector <ushort, 16> LoadMaskTemp = (TempMask0.select<1, 1, 16, 1>(0, 0) != 0); 24 #elif (defined BUFFER_4) || (defined BUFFER_5) 25 uchar sec_half_shift = (Layer_Index >> 7) * 8; 26 vector <ushort, 16> LoadMaskTemp = (TempMask.select<16, 1>(0) != 0); 27 #endif 28 29 #if defined BUFFER_0 30 Buffer_Index = 0; 31 #elif defined BUFFER_1 32 Buffer_Index = 1; 33 #elif defined BUFFER_2 34 Buffer_Index = 2; 35 #elif defined BUFFER_3 36 Buffer_Index = 3; 37 #elif defined BUFFER_4 38 Buffer_Index = 4; 39 #elif defined BUFFER_5 40 Buffer_Index = 5; 41 #endif 42 43 ushort LoadMask = cm_pack_mask(LoadMaskTemp); 44 CalculationMask = LoadMask == 0 ? 0x00 : 0xFF; 45 46 if (CalculationMask != 0) 47 { 48 float StartX; 49 float StartY; 50 float DeltaX; 51 float DeltaY; 52 /* 53 AVS Sampler 16x4 write back buffer layout for R/V, G/Y, B/U channel, each box stands for 8x1 ushort write back pixel 54 16x4 pixle deviding to 8 8x1 pixel block 55 _________________________________________________ 56 |_______Block0__________|_______Block1__________| 57 |_______Block2__________|_______Block3__________| 58 |_______Block4__________|_______Block5__________| 59 |_______Block6__________|_______Block7__________| 60 61 Write back buffer layout correlate to the block number#, each box stands for 1 GRF 62 _______________________________________________ 63 |____R0_________R2_____|____R4_________R6_____| 64 |____G0_________G2_____|____G4_________G6_____| 65 |____B0_________B2_____|____B4_________B6_____| 66 |____A0_________A2_____|____A4_________A6_____| 67 |____R1_________R3_____|____R5_________R7_____| 68 |____G1_________G3_____|____G5_________G7_____| 69 |____B1_________B3_____|____B5_________B7_____| 70 |____A1_________A3_____|____A5_________A7_____| 71 */ 72 matrix <float, 5, 8> mesg; 73 matrix <float, 1, 16> temp_r; 74 75 mesg.select<1, 1, 8, 1>(0, 0).format<uint>() = cm_get_r0<uint>(); 76 uint desc_y; 77 uchar Layer_Index_45; 78 79 #if (defined BUFFER_0) || (defined BUFFER_1) 80 #define INIT_INCREMENT 0.0f 81 #elif (defined BUFFER_2) || (defined BUFFER_3) 82 #define INIT_INCREMENT 4.0f 83 #endif 84 85 #if (defined BUFFER_0) || (defined BUFFER_1) || (defined BUFFER_2) || (defined BUFFER_3) 86 StartX = Start_X(0); 87 StartY = Start_Y(0); 88 DeltaX = Delta_X(0); 89 DeltaY = Delta_Y(0); 90 91 StartX += DstX * DeltaX; 92 StartY += DstY * DeltaY; 93 94 vector <float, 16> IncrementX(Inc); 95 vector <float, 16> IncrementY(INIT_INCREMENT); 96 #endif 97 98 #ifdef BUFFER_0 99 #define WriteBackBuffer DataBuffer0 100 #endif 101 #ifdef BUFFER_1 102 #define WriteBackBuffer DataBuffer1 103 #endif 104 #ifdef BUFFER_2 105 #define WriteBackBuffer DataBuffer2 106 #endif 107 #ifdef BUFFER_3 108 #define WriteBackBuffer DataBuffer3 109 #endif 110 111 mesg.format<uint, 5, 8>().select<1, 1, 8, 1>(0, 0) = cm_get_r0<uint>(); 112 mesg.format<uint, 5, 8>().select<1, 1, 1, 1>(0, 2) = 0x0; 113 114 #pragma unroll 115 for (short j = 0; j < 4; j++) 116 { 117 // Y channel 118 mesg.format<uint, 5, 8>().select<1, 1, 1, 1>(0, 2) = nSMPL_RED_CHANNEL_444_16; 119 120 mesg.select<1, 1, 8, 1>(1, 0) = StartX + IncrementX.select<8, 1>(0) * DeltaX; 121 mesg.select<1, 1, 8, 1>(2, 0) = StartX + IncrementX.select<8, 1>(8) * DeltaX; 122 123 mesg.select<1, 1, 8, 1>(3, 0) = StartY + IncrementY.select<8, 1>(0) * 2 * DeltaY; 124 mesg.select<1, 1, 8, 1>(4, 0) = StartY + IncrementY.select<8, 1>(8) * 2 * DeltaY; 125 126 #if (defined BUFFER_0) || (defined BUFFER_2) 127 desc_y = nSIMD16_0X_034X_MSG_DSC_1CH + (MDF_FC_3D_SAMPLER_SI_Y << 8) + MDF_FC_INPUT_BTI_START; 128 #elif (defined BUFFER_1) || (defined BUFFER_3) 129 desc_y = nSIMD16_0X_034X_MSG_DSC_1CH + (MDF_FC_3D_SAMPLER_SI_Y << 8) + MDF_FC_INPUT_BTI_F2; 130 #endif 131 cm_send( 132 temp_r, 133 mesg.format<ushort, 5, 16>(), 134 nSMPL_ENGINE, 135 desc_y, 136 0); 137 138 vector<short, 4> row_offset(Row_Offset); 139 vector<short, 4> row_offset1(Row_offset1); 140 vector<short, 4> colonm_offset(Colomn_Offset); 141 142 WriteBackBuffer.format<ushort, 16, 16>().select<1, 1, 8, 1>(row_offset[j] + 2, Colomn_Offset[j]) = matrix<ushort, 1, 8>(temp_r.select<1, 1, 8, 1>(0, 0) * MDF_FC_NORMALIZE_FACTOR, SAT); 143 WriteBackBuffer.format<ushort, 16, 16>().select<1, 1, 8, 1>(row_offset1[j] + 2, Colomn_Offset[j]) = matrix<ushort, 1, 8>(temp_r.select<1, 1, 8, 1>(0, 8) * MDF_FC_NORMALIZE_FACTOR, SAT); 144 145 // U channel 146 #if (defined BUFFER_0) || (defined BUFFER_2) 147 desc_y = nSIMD16_0X_034X_MSG_DSC_1CH + (MDF_FC_3D_SAMPLER_SI_U << 8) + MDF_FC_INPUT_BTI_START + MDF_FC_U_PLANE_BTI_OFFSET; 148 #elif (defined BUFFER_1) || (defined BUFFER_3) 149 desc_y = nSIMD16_0X_034X_MSG_DSC_1CH + (MDF_FC_3D_SAMPLER_SI_U << 8) + MDF_FC_INPUT_BTI_F2 + MDF_FC_U_PLANE_BTI_OFFSET; 150 #endif 151 cm_send( 152 temp_r, 153 mesg.format<ushort, 5, 16>(), 154 nSMPL_ENGINE, 155 desc_y, 156 0); 157 158 WriteBackBuffer.format<ushort, 16, 16>().select<1, 1, 8, 1>(row_offset[j] + 4, Colomn_Offset[j]) = matrix<ushort, 1, 8>(temp_r.select<1, 1, 8, 1>(0, 0) * MDF_FC_NORMALIZE_FACTOR, SAT); 159 WriteBackBuffer.format<ushort, 16, 16>().select<1, 1, 8, 1>(row_offset1[j] + 4, Colomn_Offset[j]) = matrix<ushort, 1, 8>(temp_r.select<1, 1, 8, 1>(0, 8) * MDF_FC_NORMALIZE_FACTOR, SAT); 160 161 // V channel 162 #if (defined BUFFER_0) || (defined BUFFER_2) 163 desc_y = nSIMD16_0X_034X_MSG_DSC_1CH + (MDF_FC_3D_SAMPLER_SI_V << 8) + MDF_FC_INPUT_BTI_START + MDF_FC_V_PLANE_BTI_OFFSET; 164 #elif (defined BUFFER_1) || (defined BUFFER_3) 165 desc_y = nSIMD16_0X_034X_MSG_DSC_1CH + (MDF_FC_3D_SAMPLER_SI_V << 8) + MDF_FC_INPUT_BTI_F2 + MDF_FC_V_PLANE_BTI_OFFSET; 166 #endif 167 cm_send( 168 temp_r, 169 mesg.format<ushort, 5, 16>(), 170 nSMPL_ENGINE, 171 desc_y, 172 0); 173 174 // V channel 175 WriteBackBuffer.format<ushort, 16, 16>().select<1, 1, 8, 1>(row_offset[j], Colomn_Offset[j]) = matrix<ushort, 1, 8>(temp_r.select<1, 1, 8, 1>(0, 0) * MDF_FC_NORMALIZE_FACTOR, SAT); 176 WriteBackBuffer.format<ushort, 16, 16>().select<1, 1, 8, 1>(row_offset1[j], Colomn_Offset[j]) = matrix<ushort, 1, 8>(temp_r.select<1, 1, 8, 1>(0, 8) * MDF_FC_NORMALIZE_FACTOR, SAT); 177 178 IncrementY = IncrementY + 1.0f; 179 } 180 181 WriteBackBuffer.format<ushort, 16, 16>().select<2, 1, 16, 1>(6, 0) = 0xffff; 182 WriteBackBuffer.format<ushort, 16, 16>().select<2, 1, 16, 1>(14, 0) = 0xffff; 183 184 #undef WriteBackBuffer 185 }