1 /*
2 * Copyright (c) 2019, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 #if (defined BUFFER_0) || (defined BUFFER_1) || (defined BUFFER_2) || (defined BUFFER_3)
23 vector <ushort, 16> LoadMaskTemp = (TempMask0.select<1, 1, 16, 1>(0, 0) != 0);
24 #elif (defined BUFFER_4) || (defined BUFFER_5)
25 uchar sec_half_shift = (Layer_Index >> 7) * 8;
26 vector <ushort, 16> LoadMaskTemp = (TempMask.select<16, 1>(0) != 0);
27 #endif
28 
29 #if defined BUFFER_0
30 Buffer_Index = 0;
31 CalculationMask = cm_pack_mask(LoadMaskTemp) & 0x000F;
32 #elif defined BUFFER_1
33 Buffer_Index = 1;
34 CalculationMask = (cm_pack_mask(LoadMaskTemp) >> 4) & 0x000F;
35 #elif defined BUFFER_2
36 Buffer_Index = 2;
37 CalculationMask = (cm_pack_mask(LoadMaskTemp) >> 8) & 0x000F;
38 #elif defined BUFFER_3
39 Buffer_Index = 3;
40 CalculationMask = (cm_pack_mask(LoadMaskTemp) >> 12) & 0x000F;
41 #elif defined BUFFER_4
42 CalculationMask = (cm_pack_mask(LoadMaskTemp) >> sec_half_shift) & 0x000F;
43 Buffer_Index = 4;
44 #elif defined BUFFER_5
45 CalculationMask = (cm_pack_mask(LoadMaskTemp) >> (4 + sec_half_shift)) & 0x000F;
46 Buffer_Index = 5;
47 #endif
48 
49 if (CalculationMask != 0)
50 {
51     float StartX;
52     float StartY;
53     float DeltaX;
54     float DeltaY;
55     uchar RotationFlag;
56 
57     /*
58     AVS Sampler 16x4 write back buffer layout for R/V, G/Y, B/U channel, each box stands for 8x1 ushort write back pixel
59     16x4 pixle deviding to 8 8x1 pixel block
60     _________________________________________________
61     |_______Block0__________|_______Block1__________|
62     |_______Block2__________|_______Block3__________|
63     |_______Block4__________|_______Block5__________|
64     |_______Block6__________|_______Block7__________|
65 
66     Write back buffer layout correlate to the block number#, each box stands for 1 GRF
67     _______________________________________________
68     |____R0_________R2_____|____R4_________R6_____|
69     |____G0_________G2_____|____G4_________G6_____|
70     |____B0_________B2_____|____B4_________B6_____|
71     |____A0_________A2_____|____A4_________A6_____|
72     |____R1_________R3_____|____R5_________R7_____|
73     |____G1_________G3_____|____G5_________G7_____|
74     |____B1_________B3_____|____B5_________B7_____|
75     |____A1_________A3_____|____A5_________A7_____|
76     */
77     matrix <float, 5, 8> mesg;
78     matrix <float, 2, 16> temp_rg;
79 
80     mesg.select<1, 1, 8, 1>(0, 0).format<uint>() = cm_get_r0<uint>();
81     uint desc_y;
82     uchar Layer_Index_45;
83 
84 #ifdef BUFFER_0
85 #define INIT_INCREMENT 0.0f
86 #elif defined BUFFER_1
87 #ifdef ROTATE_90
88 #define INIT_INCREMENT 4.0f
89 #elif defined ROTATE_180
90 #define INIT_INCREMENT -4.0f
91 #elif defined ROTATE_270
92 #define INIT_INCREMENT -4.0f
93 #else
94 #define INIT_INCREMENT 4.0f
95 #endif
96 #elif defined BUFFER_2
97 #ifdef ROTATE_90
98 #define INIT_INCREMENT 8.0f
99 #elif defined ROTATE_180
100 #define INIT_INCREMENT -8.0f
101 #elif defined ROTATE_270
102 #define INIT_INCREMENT -8.0f
103 #else
104 #define INIT_INCREMENT 8.0f
105 #endif
106 #elif defined BUFFER_3
107 #ifdef ROTATE_90
108 #define INIT_INCREMENT 12.0f
109 #elif defined ROTATE_180
110 #define INIT_INCREMENT -12.0f
111 #elif defined ROTATE_270
112 #define INIT_INCREMENT -12.0f
113 #else
114 #define INIT_INCREMENT 12.0f
115 #endif
116 #elif defined BUFFER_4
117 #define INIT_INCREMENT 0.0f
118 #elif defined BUFFER_5
119 #ifdef ROTATE_90
120 #define INIT_INCREMENT 4.0f
121 #elif defined ROTATE_180
122 #define INIT_INCREMENT -4.0f
123 #elif defined ROTATE_270
124 #define INIT_INCREMENT -4.0f
125 #else
126 #define INIT_INCREMENT 4.0f
127 #endif
128 #endif
129 
130 #if (defined BUFFER_0) || (defined BUFFER_1) || (defined BUFFER_2) || (defined BUFFER_3)
131 #ifdef ROTATE_90
132     StartX = Start_X(0);
133     StartY = Start_Y(0);
134     DeltaX = Delta_X(0);
135     DeltaY = Delta_Y(0);
136     RotationFlag = (uchar)(RotationChromaSitingFlag & 0x07);
137 
138     StartX = StartX + DstY * DeltaX;
139 
140     if (RotationFlag == MDF_FC_ROTATE_90_MIRROR_HORIZONTAL)
141     {
142         StartY = StartY + (DstX + MDF_FC_BLOCK_WIDTH) * DeltaY;
143     }
144     else
145     {
146         StartY = StartY + (Dst_Width - DstX) * DeltaY;
147     }
148 
149     vector <float, 16> IncrementX(INIT_INCREMENT);
150     vector <float, 16> IncrementY(Dec);
151 #elif defined ROTATE_180
152     StartX = Start_X(0);
153     StartY = Start_Y(0);
154     DeltaX = Delta_X(0);
155     DeltaY = Delta_Y(0);
156 
157     RotationFlag = (uchar)(RotationChromaSitingFlag & 0x07);
158     if (RotationFlag == MDF_FC_MIRROR_VERTICAL)
159     {
160         StartX += (DstX + MDF_FC_BLOCK_WIDTH) * DeltaX;
161     }
162     else
163     {
164         StartX += (Dst_Width - DstX) * DeltaX;
165     }
166 
167     StartY = StartY + ((Dst_Height - DstY - 1)) * DeltaY;
168 
169     vector <float, 16> IncrementX(Dec);
170     vector <float, 16> IncrementY(INIT_INCREMENT);
171 #elif defined ROTATE_270
172     StartX = Start_X(0);
173     StartY = Start_Y(0);
174     DeltaX = Delta_X(0);
175     DeltaY = Delta_Y(0);
176     RotationFlag = (uchar)(RotationChromaSitingFlag & 0x07);
177 
178     StartX = StartX + (Dst_Height - DstY - 1) * DeltaX;
179 
180     if (RotationFlag == MDF_FC_ROTATE_90_MIRROR_VERTICAL)
181     {
182         StartY = StartY + (Dst_Width - MDF_FC_BLOCK_WIDTH - DstX) * DeltaY;
183     }
184     else
185     {
186         StartY = StartY + DstX * DeltaY;
187     }
188 
189     vector <float, 16> IncrementX(INIT_INCREMENT);
190     vector <float, 16> IncrementY(Inc);
191 #else
192     StartX = Start_X(0);
193     StartY = Start_Y(0);
194     DeltaX = Delta_X(0);
195     DeltaY = Delta_Y(0);
196 
197     RotationFlag = (uchar)(RotationChromaSitingFlag & 0x07);
198     if (RotationFlag == MDF_FC_MIRROR_HORIZONTAL)
199     {
200         StartX += (Dst_Width - MDF_FC_BLOCK_WIDTH - DstX) * DeltaX;
201     }
202     else
203     {
204         StartX += DstX * DeltaX;
205     }
206 
207     StartY += DstY * DeltaY;
208 
209     vector <float, 16> IncrementX(Inc);
210     vector <float, 16> IncrementY(INIT_INCREMENT);
211 #endif
212 #endif
213 
214 #if (defined BUFFER_4) || (defined BUFFER_5)
215 #ifdef ROTATE_90
216     Layer_Index_45 = Layer_Index & 0x7f;
217     StartX = Start_X(Layer_Index_45);
218     StartY = Start_Y(Layer_Index_45);
219     DeltaX = Delta_X(Layer_Index_45);
220     DeltaY = Delta_Y(Layer_Index_45);
221 
222     StartX = StartX + (DstY + 8 * (Layer_Index >> 7)) * DeltaX;
223     RotationFlag = (uchar)((RotationChromaSitingFlag >> (3 * Layer_Index_45)) & 0x07);
224 
225     if (RotationFlag == MDF_FC_ROTATE_90_MIRROR_HORIZONTAL)
226     {
227         StartY = StartY + (DstX + MDF_FC_BLOCK_WIDTH) * DeltaY;
228     }
229     else
230     {
231         StartY = StartY + (Dst_Width - DstX) * DeltaY;
232     }
233 
234     vector <float, 16> IncrementX(INIT_INCREMENT);
235     vector <float, 16> IncrementY(Dec);
236 #elif defined ROTATE_180
237     Layer_Index_45 = Layer_Index & 0x7f;
238     StartX = Start_X(Layer_Index_45);
239     StartY = Start_Y(Layer_Index_45);
240     DeltaX = Delta_X(Layer_Index_45);
241     DeltaY = Delta_Y(Layer_Index_45);
242 
243     RotationFlag = (uchar)((RotationChromaSitingFlag >> (3 * Layer_Index_45)) & 0x07);
244     if (RotationFlag == MDF_FC_MIRROR_VERTICAL)
245     {
246         StartX += (DstX + MDF_FC_BLOCK_WIDTH) * DeltaX;
247     }
248     else
249     {
250         StartX += (Dst_Width - DstX) * DeltaX;
251     }
252 
253     StartY = StartY + (Dst_Height - 1 - DstY - 8 * (Layer_Index >> 7)) * DeltaY;
254 
255     vector <float, 16> IncrementX(Dec);
256     vector <float, 16> IncrementY(INIT_INCREMENT);
257 #elif defined ROTATE_270
258     Layer_Index_45 = Layer_Index & 0x7f;
259 
260     StartX = Start_X(Layer_Index_45);
261     StartY = Start_Y(Layer_Index_45);
262     DeltaX = Delta_X(Layer_Index_45);
263     DeltaY = Delta_Y(Layer_Index_45);
264 
265     RotationFlag = (uchar)((RotationChromaSitingFlag >> (3 * Layer_Index_45)) & 0x07);
266 
267     StartX = StartX + (Dst_Height - 1 - DstY - 8 * (Layer_Index >> 7)) * DeltaX;
268     if (RotationFlag == MDF_FC_ROTATE_90_MIRROR_VERTICAL)
269     {
270         StartY = StartY + (Dst_Width - MDF_FC_BLOCK_WIDTH - DstX) * DeltaY;
271     }
272     else
273     {
274         StartY = StartY + DstX * DeltaY;
275     }
276 
277     vector <float, 16> IncrementX(INIT_INCREMENT);
278     vector <float, 16> IncrementY(Inc);
279 #else
280     Layer_Index_45 = Layer_Index & 0x7f;
281     StartX = Start_X(Layer_Index_45);
282     StartY = Start_Y(Layer_Index_45);
283     DeltaX = Delta_X(Layer_Index_45);
284     DeltaY = Delta_Y(Layer_Index_45);
285     RotationFlag = (uchar)((RotationChromaSitingFlag >> (3 * Layer_Index_45)) & 0x07);
286     if (RotationFlag == MDF_FC_MIRROR_HORIZONTAL)
287     {
288         StartX += (Dst_Width - MDF_FC_BLOCK_WIDTH - DstX) * DeltaX;
289     }
290     else
291     {
292         StartX += DstX * DeltaX;
293     }
294 
295     StartY = StartY + (DstY + (Layer_Index >> 7) * 8) * DeltaY;
296 
297     vector <float, 16> IncrementX(Inc);
298     vector <float, 16> IncrementY(INIT_INCREMENT);
299 #endif
300 #endif
301 
302 #ifdef BUFFER_0
303 #define WriteBackBuffer DataBuffer0
304 #endif
305 #ifdef BUFFER_1
306 #define WriteBackBuffer DataBuffer1
307 #endif
308 #ifdef BUFFER_2
309 #define WriteBackBuffer DataBuffer2
310 #endif
311 #ifdef BUFFER_3
312 #define WriteBackBuffer DataBuffer3
313 #endif
314 #ifdef BUFFER_4
315 #define WriteBackBuffer DataBuffer4
316 #endif
317 #ifdef BUFFER_5
318 #define WriteBackBuffer DataBuffer5
319 #endif
320 
321     mesg.format<uint, 5, 8>().select<1, 1, 8, 1>(0, 0) = cm_get_r0<uint>();
322 
323 #pragma unroll
324     for (short j = 0; j < 4; j++)
325     {
326         // Y channel
327         mesg.format<uint, 5, 8>().select<1, 1, 1, 1>(0, 2) = nSMPL_RED_CHANNEL_444_16;
328 
329         mesg.select<1, 1, 8, 1>(1, 0) = StartX + IncrementX.select<8, 1>(0) * DeltaX;
330         mesg.select<1, 1, 8, 1>(2, 0) = StartX + IncrementX.select<8, 1>(8) * DeltaX;
331 
332         mesg.select<1, 1, 8, 1>(3, 0) = StartY + IncrementY.select<8, 1>(0) * DeltaY;
333         mesg.select<1, 1, 8, 1>(4, 0) = StartY + IncrementY.select<8, 1>(8) * DeltaY;
334 
335 #if (defined BUFFER_0) || (defined BUFFER_1) || (defined BUFFER_2) || (defined BUFFER_3)
336         desc_y = nSIMD16_0X_034X_MSG_DSC_1CH + (MDF_FC_3D_SAMPLER_SI_Y << 8) + MDF_FC_INPUT_BTI_START;
337 #endif
338 #if (defined BUFFER_4) || (defined BUFFER_5)
339         desc_y = nSIMD16_0X_034X_MSG_DSC_1CH + (MDF_FC_3D_SAMPLER_SI_Y << 8) + MDF_FC_INPUT_BTI_START + MDF_FC_INPUT_BTI_PER_LAYER * Layer_Index_45;
340 #endif
341 
342         cm_send(
343             temp_rg.select<1, 1, 16, 1>(0, 0),
344             mesg.format<ushort, 5, 16>(),
345             nSMPL_ENGINE,
346             desc_y,
347             0);
348 
349         vector<short, 4> row_offset(Row_Offset);
350         vector<short, 4> row_offset1(Row_offset1);
351         vector<short, 4> colonm_offset(Colomn_Offset);
352 
353         // Y
354         WriteBackBuffer.format<ushort, 16, 16>().select<1, 1, 8, 1>(row_offset[j] + 2, Colomn_Offset[j]) = matrix<ushort, 1, 8>(temp_rg.select<1, 1, 8, 1>(0, 0) * MDF_FC_NORMALIZE_FACTOR, SAT);
355         WriteBackBuffer.format<ushort, 16, 16>().select<1, 1, 8, 1>(row_offset1[j] + 2, Colomn_Offset[j]) = matrix<ushort, 1, 8>(temp_rg.select<1, 1, 8, 1>(0, 8) * MDF_FC_NORMALIZE_FACTOR, SAT);
356 
357         // UV channel
358         mesg.format<uint, 5, 8>().select<1, 1, 1, 1>(0, 2) = nSMPL_RED_GREEN_CHANNEL_444_16;
359 
360 #if (defined BUFFER_0) || (defined BUFFER_1) || (defined BUFFER_2) || (defined BUFFER_3)
361         desc_y = nSIMD16_0X_034X_MSG_DSC_2CH + (MDF_FC_3D_SAMPLER_SI_U << 8) + MDF_FC_INPUT_BTI_START + MDF_FC_UV_PLANE_BTI_OFFSET;
362 #endif
363 #if (defined BUFFER_4) || (defined BUFFER_5)
364         desc_y = nSIMD16_0X_034X_MSG_DSC_2CH + (MDF_FC_3D_SAMPLER_SI_U << 8) + MDF_FC_INPUT_BTI_START + MDF_FC_INPUT_BTI_PER_LAYER * Layer_Index_45 + MDF_FC_UV_PLANE_BTI_OFFSET;
365 #endif
366         cm_send(temp_rg,
367             mesg.format<ushort, 5, 16>(),
368             nSMPL_ENGINE,
369             desc_y,
370             0);
371 
372         // R
373         WriteBackBuffer.format<ushort, 16, 16>().select<1, 1, 8, 1>(row_offset[j], Colomn_Offset[j]) = matrix<ushort, 1, 8>(temp_rg.select<1, 1, 8, 1>(1, 0) * MDF_FC_NORMALIZE_FACTOR, SAT);
374         WriteBackBuffer.format<ushort, 16, 16>().select<1, 1, 8, 1>(row_offset1[j], Colomn_Offset[j]) = matrix<ushort, 1, 8>(temp_rg.select<1, 1, 8, 1>(1, 8) * MDF_FC_NORMALIZE_FACTOR, SAT);
375         // B
376         WriteBackBuffer.format<ushort, 16, 16>().select<1, 1, 8, 1>(row_offset[j] + 4, Colomn_Offset[j]) = matrix<ushort, 1, 8>(temp_rg.select<1, 1, 8, 1>(0, 0) * MDF_FC_NORMALIZE_FACTOR, SAT);
377         WriteBackBuffer.format<ushort, 16, 16>().select<1, 1, 8, 1>(row_offset1[j] + 4, Colomn_Offset[j]) = matrix<ushort, 1, 8>(temp_rg.select<1, 1, 8, 1>(0, 8) * MDF_FC_NORMALIZE_FACTOR, SAT);
378 
379 #ifdef ROTATE_90
380         IncrementX = IncrementX + 1.0f;
381 #elif defined ROTATE_180
382         IncrementY = IncrementY - 1.0f;
383 #elif defined ROTATE_270
384         IncrementX = IncrementX - 1.0f;
385 #else
386         IncrementY = IncrementY + 1.0f;
387 #endif
388     }
389 
390     WriteBackBuffer.format<ushort, 16, 16>().select<2, 1, 16, 1>(6, 0) = 0xffff;
391     WriteBackBuffer.format<ushort, 16, 16>().select<2, 1, 16, 1>(14, 0) = 0xffff;
392 #undef WriteBackBuffer
393 }
394