1 /*
2 * Copyright (c) 2019, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 #if (defined BUFFER_0) || (defined BUFFER_1) || (defined BUFFER_2) || (defined BUFFER_3)
23 vector <ushort, 16> LoadMaskTemp = (TempMask0.select<1, 1, 16, 1>(0, 0) != 0);
24 #elif (defined BUFFER_4) || (defined BUFFER_5)
25 uchar sec_half_shift = (Layer_Index >> 7) * 8;
26 vector <ushort, 16> LoadMaskTemp = (TempMask.select<16, 1>(0) != 0);
27 #endif
28 
29 #if defined BUFFER_0
30 Buffer_Index = 0;
31 CalculationMask = cm_pack_mask(LoadMaskTemp) & 0x000F;
32 #elif defined BUFFER_1
33 Buffer_Index = 1;
34 CalculationMask = (cm_pack_mask(LoadMaskTemp) >> 4) & 0x000F;
35 #elif defined BUFFER_2
36 Buffer_Index = 2;
37 CalculationMask = (cm_pack_mask(LoadMaskTemp) >> 8) & 0x000F;
38 #elif defined BUFFER_3
39 Buffer_Index = 3;
40 CalculationMask = (cm_pack_mask(LoadMaskTemp) >> 12) & 0x000F;
41 #elif defined BUFFER_4
42 CalculationMask = (cm_pack_mask(LoadMaskTemp) >> sec_half_shift) & 0x000F;
43 Buffer_Index = 4;
44 #elif defined BUFFER_5
45 CalculationMask = (cm_pack_mask(LoadMaskTemp) >> (4 + sec_half_shift)) & 0x000F;
46 Buffer_Index = 5;
47 #endif
48 
49 if (CalculationMask != 0)
50 {
51     float StartX;
52     float StartY;
53     float DeltaX;
54     float DeltaY;
55     uchar RotationFlag;
56 
57     /*
58     AVS Sampler 16x4 write back buffer layout for R/V, G/Y, B/U channel, each box stands for 8x1 ushort write back pixel
59     16x4 pixle deviding to 8 8x1 pixel block
60     _________________________________________________
61     |_______Block0__________|_______Block1__________|
62     |_______Block2__________|_______Block3__________|
63     |_______Block4__________|_______Block5__________|
64     |_______Block6__________|_______Block7__________|
65 
66     Write back buffer layout correlate to the block number#, each box stands for 1 GRF
67     _______________________________________________
68     |____R0_________R2_____|____R4_________R6_____|
69     |____G0_________G2_____|____G4_________G6_____|
70     |____B0_________B2_____|____B4_________B6_____|
71     |____A0_________A2_____|____A4_________A6_____|
72     |____R1_________R3_____|____R5_________R7_____|
73     |____G1_________G3_____|____G5_________G7_____|
74     |____B1_________B3_____|____B5_________B7_____|
75     |____A1_________A3_____|____A5_________A7_____|
76     */
77     matrix <float, 5, 8> mesg;
78     //vector <float, 16> IncrementX;
79     //vector <float, 16> IncrementY;
80     matrix <float, 1, 16> temp_r;
81 
82     mesg.select<1, 1, 8, 1>(0, 0).format<uint>() = cm_get_r0<uint>();
83     uint desc_y;
84     uchar Layer_Index_45;
85 
86 #ifdef BUFFER_0
87 #define INIT_INCREMENT 0.0f
88 #elif defined BUFFER_1
89 #ifdef ROTATE_90
90 #define INIT_INCREMENT 4.0f
91 #elif defined ROTATE_180
92 #define INIT_INCREMENT -4.0f
93 #elif defined ROTATE_270
94 #define INIT_INCREMENT -4.0f
95 #else
96 #define INIT_INCREMENT 4.0f
97 #endif
98 #elif defined BUFFER_2
99 #ifdef ROTATE_90
100 #define INIT_INCREMENT 8.0f
101 #elif defined ROTATE_180
102 #define INIT_INCREMENT -8.0f
103 #elif defined ROTATE_270
104 #define INIT_INCREMENT -8.0f
105 #else
106 #define INIT_INCREMENT 8.0f
107 #endif
108 #elif defined BUFFER_3
109 #ifdef ROTATE_90
110 #define INIT_INCREMENT 12.0f
111 #elif defined ROTATE_180
112 #define INIT_INCREMENT -12.0f
113 #elif defined ROTATE_270
114 #define INIT_INCREMENT -12.0f
115 #else
116 #define INIT_INCREMENT 12.0f
117 #endif
118 #elif defined BUFFER_4
119 #define INIT_INCREMENT 0.0f
120 #elif defined BUFFER_5
121 #ifdef ROTATE_90
122 #define INIT_INCREMENT 4.0f
123 #elif defined ROTATE_180
124 #define INIT_INCREMENT -4.0f
125 #elif defined ROTATE_270
126 #define INIT_INCREMENT -4.0f
127 #else
128 #define INIT_INCREMENT 4.0f
129 #endif
130 #endif
131 
132 #if (defined BUFFER_0) || (defined BUFFER_1) || (defined BUFFER_2) || (defined BUFFER_3)
133 #ifdef ROTATE_90
134     StartX = Start_X(0);
135     StartY = Start_Y(0);
136     DeltaX = Delta_X(0);
137     DeltaY = Delta_Y(0);
138     RotationFlag = (uchar)(RotationChromaSitingFlag & 0x07);
139 
140     StartX = StartX + DstY * DeltaX;
141 
142     if (RotationFlag == MDF_FC_ROTATE_90_MIRROR_HORIZONTAL)
143     {
144         StartY = StartY + (DstX + MDF_FC_BLOCK_WIDTH) * DeltaY;
145     }
146     else
147     {
148         StartY = StartY + (Dst_Width - DstX) * DeltaY;
149     }
150 
151     vector <float, 16> IncrementX(INIT_INCREMENT);
152     vector <float, 16> IncrementY(Dec);
153 #elif defined ROTATE_180
154     StartX = Start_X(0);
155     StartY = Start_Y(0);
156     DeltaX = Delta_X(0);
157     DeltaY = Delta_Y(0);
158 
159     RotationFlag = (uchar)(RotationChromaSitingFlag & 0x07);
160     if (RotationFlag == MDF_FC_MIRROR_VERTICAL)
161     {
162         StartX += (DstX + MDF_FC_BLOCK_WIDTH) * DeltaX;
163     }
164     else
165     {
166         StartX += (Dst_Width - DstX) * DeltaX;
167     }
168 
169     StartY = StartY + ((Dst_Height - DstY - 1)) * DeltaY;
170 
171     vector <float, 16> IncrementX(Dec);
172     vector <float, 16> IncrementY(INIT_INCREMENT);
173 #elif defined ROTATE_270
174     StartX = Start_X(0);
175     StartY = Start_Y(0);
176     DeltaX = Delta_X(0);
177     DeltaY = Delta_Y(0);
178     RotationFlag = (uchar)(RotationChromaSitingFlag & 0x07);
179 
180     StartX = StartX + (Dst_Height - DstY - 1) * DeltaX;
181 
182     if (RotationFlag == MDF_FC_ROTATE_90_MIRROR_VERTICAL)
183     {
184         StartY = StartY + (Dst_Width - MDF_FC_BLOCK_WIDTH - DstX) * DeltaY;
185     }
186     else
187     {
188         StartY = StartY + DstX * DeltaY;
189     }
190 
191     vector <float, 16> IncrementX(INIT_INCREMENT);
192     vector <float, 16> IncrementY(Inc);
193 #else
194     StartX = Start_X(0);
195     StartY = Start_Y(0);
196     DeltaX = Delta_X(0);
197     DeltaY = Delta_Y(0);
198 
199     RotationFlag = (uchar)(RotationChromaSitingFlag & 0x07);
200     if (RotationFlag == MDF_FC_MIRROR_HORIZONTAL)
201     {
202         StartX += (Dst_Width - MDF_FC_BLOCK_WIDTH - DstX) * DeltaX;
203     }
204     else
205     {
206         StartX += DstX * DeltaX;
207     }
208 
209     StartY += DstY * DeltaY;
210 
211     vector <float, 16> IncrementX(Inc);
212     vector <float, 16> IncrementY(INIT_INCREMENT);
213 #endif
214 #endif
215 
216 #if (defined BUFFER_4) || (defined BUFFER_5)
217 #ifdef ROTATE_90
218     Layer_Index_45 = Layer_Index & 0x7f;
219     StartX = Start_X(Layer_Index_45);
220     StartY = Start_Y(Layer_Index_45);
221     DeltaX = Delta_X(Layer_Index_45);
222     DeltaY = Delta_Y(Layer_Index_45);
223 
224     StartX = StartX + (DstY + 8 * (Layer_Index >> 7)) * DeltaX;
225     RotationFlag = (uchar)((RotationChromaSitingFlag >> (3 * Layer_Index_45)) & 0x07);
226 
227     if (RotationFlag == MDF_FC_ROTATE_90_MIRROR_HORIZONTAL)
228     {
229         StartY = StartY + (DstX + MDF_FC_BLOCK_WIDTH) * DeltaY;
230     }
231     else
232     {
233         StartY = StartY + (Dst_Width - DstX) * DeltaY;
234     }
235 
236     vector <float, 16> IncrementX(INIT_INCREMENT);
237     vector <float, 16> IncrementY(Dec);
238 #elif defined ROTATE_180
239     Layer_Index_45 = Layer_Index & 0x7f;
240     StartX = Start_X(Layer_Index_45);
241     StartY = Start_Y(Layer_Index_45);
242     DeltaX = Delta_X(Layer_Index_45);
243     DeltaY = Delta_Y(Layer_Index_45);
244 
245     RotationFlag = (uchar)((RotationChromaSitingFlag >> (3 * Layer_Index_45)) & 0x07);
246     if (RotationFlag == MDF_FC_MIRROR_VERTICAL)
247     {
248         StartX += (DstX + MDF_FC_BLOCK_WIDTH) * DeltaX;
249     }
250     else
251     {
252         StartX += (Dst_Width - DstX) * DeltaX;
253     }
254 
255     StartY = StartY + (Dst_Height - DstY - 8 * (Layer_Index >> 7) - 1) * DeltaY;
256 
257     vector <float, 16> IncrementX(Dec);
258     vector <float, 16> IncrementY(INIT_INCREMENT);
259 #elif defined ROTATE_270
260     Layer_Index_45 = Layer_Index & 0x7f;
261 
262     StartX = Start_X(Layer_Index_45);
263     StartY = Start_Y(Layer_Index_45);
264     DeltaX = Delta_X(Layer_Index_45);
265     DeltaY = Delta_Y(Layer_Index_45);
266 
267     RotationFlag = (uchar)((RotationChromaSitingFlag >> (3 * Layer_Index_45)) & 0x07);
268 
269     StartX = StartX + (Dst_Height - 1 - DstY - 8 * (Layer_Index >> 7)) * DeltaX;
270     if (RotationFlag == MDF_FC_ROTATE_90_MIRROR_VERTICAL)
271     {
272         StartY = StartY + (Dst_Width - MDF_FC_BLOCK_WIDTH - DstX) * DeltaY;
273     }
274     else
275     {
276         StartY = StartY + DstX * DeltaY;
277     }
278 
279     vector <float, 16> IncrementX(INIT_INCREMENT);
280     vector <float, 16> IncrementY(Inc);
281 #else
282     Layer_Index_45 = Layer_Index & 0x7f;
283     StartX = Start_X(Layer_Index_45);
284     StartY = Start_Y(Layer_Index_45);
285     DeltaX = Delta_X(Layer_Index_45);
286     DeltaY = Delta_Y(Layer_Index_45);
287     RotationFlag = (uchar)((RotationChromaSitingFlag >> (3 * Layer_Index_45)) & 0x07);
288     if (RotationFlag == MDF_FC_MIRROR_HORIZONTAL)
289     {
290         StartX += (Dst_Width - MDF_FC_BLOCK_WIDTH - DstX) * DeltaX;
291     }
292     else
293     {
294         StartX += DstX * DeltaX;
295     }
296 
297     StartY = StartY + (DstY + (Layer_Index >> 7) * 8) * DeltaY;
298 
299     vector <float, 16> IncrementX(Inc);
300     vector <float, 16> IncrementY(INIT_INCREMENT);
301 #endif
302 #endif
303 
304 #ifdef BUFFER_0
305 #define WriteBackBuffer DataBuffer0
306 #endif
307 #ifdef BUFFER_1
308 #define WriteBackBuffer DataBuffer1
309 #endif
310 #ifdef BUFFER_2
311 #define WriteBackBuffer DataBuffer2
312 #endif
313 #ifdef BUFFER_3
314 #define WriteBackBuffer DataBuffer3
315 #endif
316 #ifdef BUFFER_4
317 #define WriteBackBuffer DataBuffer4
318 #endif
319 #ifdef BUFFER_5
320 #define WriteBackBuffer DataBuffer5
321 #endif
322 
323     mesg.format<uint, 5, 8>().select<1, 1, 8, 1>(0, 0) = cm_get_r0<uint>();
324     mesg.format<uint, 5, 8>().select<1, 1, 1, 1>(0, 2) = 0x0;
325 
326 #pragma unroll
327     for (short j = 0; j < 4; j++)
328     {
329         // Y channel
330         mesg.format<uint, 5, 8>().select<1, 1, 1, 1>(0, 2) = nSMPL_RED_CHANNEL_444_16;
331 
332         mesg.select<1, 1, 8, 1>(1, 0) = StartX + IncrementX.select<8, 1>(0) * DeltaX;
333         mesg.select<1, 1, 8, 1>(2, 0) = StartX + IncrementX.select<8, 1>(8) * DeltaX;
334 
335         mesg.select<1, 1, 8, 1>(3, 0) = StartY + IncrementY.select<8, 1>(0) * DeltaY;
336         mesg.select<1, 1, 8, 1>(4, 0) = StartY + IncrementY.select<8, 1>(8) * DeltaY;
337 
338 #if (defined BUFFER_0) || (defined BUFFER_1) || (defined BUFFER_2) || (defined BUFFER_3)
339         desc_y = nSIMD16_0X_034X_MSG_DSC_1CH + (MDF_FC_3D_SAMPLER_SI_Y << 8) + MDF_FC_INPUT_BTI_START;
340 #endif
341 #if (defined BUFFER_4) || (defined BUFFER_5)
342         desc_y = nSIMD16_0X_034X_MSG_DSC_1CH + (MDF_FC_3D_SAMPLER_SI_Y << 8) + MDF_FC_INPUT_BTI_START + MDF_FC_INPUT_BTI_PER_LAYER * Layer_Index_45;
343 #endif
344 
345         cm_send(
346             temp_r,
347             mesg.format<ushort, 5, 16>(),
348             nSMPL_ENGINE,
349             desc_y,
350             0);
351 
352         vector<short, 4> row_offset(Row_Offset);
353         vector<short, 4> row_offset1(Row_offset1);
354         vector<short, 4> colonm_offset(Colomn_Offset);
355 
356         WriteBackBuffer.format<ushort, 16, 16>().select<1, 1, 8, 1>(row_offset[j] + 2, Colomn_Offset[j]) = matrix<ushort, 1, 8>(temp_r.select<1, 1, 8, 1>(0, 0) * MDF_FC_NORMALIZE_FACTOR, SAT);
357         WriteBackBuffer.format<ushort, 16, 16>().select<1, 1, 8, 1>(row_offset1[j] + 2, Colomn_Offset[j]) = matrix<ushort, 1, 8>(temp_r.select<1, 1, 8, 1>(0, 8) * MDF_FC_NORMALIZE_FACTOR, SAT);
358 
359         // U channel
360 #if (defined BUFFER_0) || (defined BUFFER_1) || (defined BUFFER_2) || (defined BUFFER_3)
361         desc_y = nSIMD16_0X_034X_MSG_DSC_1CH + (MDF_FC_3D_SAMPLER_SI_U << 8) + MDF_FC_INPUT_BTI_START + MDF_FC_U_PLANE_BTI_OFFSET;
362 #endif
363 #if (defined BUFFER_4) || (defined BUFFER_5)
364         desc_y = nSIMD16_0X_034X_MSG_DSC_1CH + (MDF_FC_3D_SAMPLER_SI_U << 8) + MDF_FC_INPUT_BTI_START + MDF_FC_INPUT_BTI_PER_LAYER * Layer_Index_45 + MDF_FC_U_PLANE_BTI_OFFSET;
365 #endif
366         cm_send(
367             temp_r,
368             mesg.format<ushort, 5, 16>(),
369             nSMPL_ENGINE,
370             desc_y,
371             0);
372 
373         WriteBackBuffer.format<ushort, 16, 16>().select<1, 1, 8, 1>(row_offset[j] + 4, Colomn_Offset[j]) = matrix<ushort, 1, 8>(temp_r.select<1, 1, 8, 1>(0, 0) * MDF_FC_NORMALIZE_FACTOR, SAT);
374         WriteBackBuffer.format<ushort, 16, 16>().select<1, 1, 8, 1>(row_offset1[j] + 4, Colomn_Offset[j]) = matrix<ushort, 1, 8>(temp_r.select<1, 1, 8, 1>(0, 8) * MDF_FC_NORMALIZE_FACTOR, SAT);
375 
376         // V channel
377 #if (defined BUFFER_0) || (defined BUFFER_1) || (defined BUFFER_2) || (defined BUFFER_3)
378         desc_y = nSIMD16_0X_034X_MSG_DSC_1CH + (MDF_FC_3D_SAMPLER_SI_V << 8) + MDF_FC_INPUT_BTI_START + MDF_FC_V_PLANE_BTI_OFFSET;
379 #endif
380 #if (defined BUFFER_4) || (defined BUFFER_5)
381         desc_y = nSIMD16_0X_034X_MSG_DSC_1CH + (MDF_FC_3D_SAMPLER_SI_V << 8) + MDF_FC_INPUT_BTI_START + MDF_FC_INPUT_BTI_PER_LAYER * Layer_Index_45 + MDF_FC_V_PLANE_BTI_OFFSET;
382 #endif
383         cm_send(
384             temp_r,
385             mesg.format<ushort, 5, 16>(),
386             nSMPL_ENGINE,
387             desc_y,
388             0);
389 
390         // V channel
391         WriteBackBuffer.format<ushort, 16, 16>().select<1, 1, 8, 1>(row_offset[j], Colomn_Offset[j]) = matrix<ushort, 1, 8>(temp_r.select<1, 1, 8, 1>(0, 0) * MDF_FC_NORMALIZE_FACTOR, SAT);
392         WriteBackBuffer.format<ushort, 16, 16>().select<1, 1, 8, 1>(row_offset1[j], Colomn_Offset[j]) = matrix<ushort, 1, 8>(temp_r.select<1, 1, 8, 1>(0, 8) * MDF_FC_NORMALIZE_FACTOR, SAT);
393 
394 #ifdef ROTATE_90
395         IncrementX = IncrementX + 1.0f;
396 #elif defined ROTATE_180
397         IncrementY = IncrementY - 1.0f;
398 #elif defined ROTATE_270
399         IncrementX = IncrementX - 1.0f;
400 #else
401         IncrementY = IncrementY + 1.0f;
402 #endif
403     }
404 
405     WriteBackBuffer.format<ushort, 16, 16>().select<2, 1, 16, 1>(6, 0) = 0xffff;
406     WriteBackBuffer.format<ushort, 16, 16>().select<2, 1, 16, 1>(14, 0) = 0xffff;
407 
408 #undef WriteBackBuffer
409 }