1 /*
2 * Copyright (c) 2019, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 #if (defined BUFFER_0) || (defined BUFFER_1) || (defined BUFFER_2) || (defined BUFFER_3)
23 vector <ushort, 16> LoadMaskTemp = (TempMask0.select<1, 1, 16, 1>(0, 0) != 0);
24 #elif (defined BUFFER_4) || (defined BUFFER_5)
25 uchar sec_half_shift = (Layer_Index >> 7) * 8;
26 vector <ushort, 16> LoadMaskTemp = (TempMask.select<16, 1>(0) != 0);
27 #endif
28 
29 #if defined BUFFER_0
30 Buffer_Index = 0;
31 #elif defined BUFFER_1
32 Buffer_Index = 1;
33 #elif defined BUFFER_2
34 Buffer_Index = 2;
35 #elif defined BUFFER_3
36 Buffer_Index = 3;
37 #elif defined BUFFER_4
38 Buffer_Index = 4;
39 #elif defined BUFFER_5
40 Buffer_Index = 5;
41 #endif
42 
43 ushort LoadMask = cm_pack_mask(LoadMaskTemp);
44 CalculationMask = LoadMask == 0 ? 0x00 : 0xFF;
45 
46 if (CalculationMask != 0)
47 {
48     float StartX;
49     float StartY;
50     float StartX1;
51     float StartY1;
52     float DeltaX;
53     float DeltaY;
54     uchar RotationFlag;
55 
56     /*
57     AVS Sampler 16x4 write back buffer layout for R/V, G/Y, B/U channel, each box stands for 8x1 ushort write back pixel
58     16x4 pixle deviding to 8 8x1 pixel block
59     _________________________________________________
60     |_______Block0__________|_______Block1__________|
61     |_______Block2__________|_______Block3__________|
62     |_______Block4__________|_______Block5__________|
63     |_______Block6__________|_______Block7__________|
64 
65     Write back buffer layout correlate to the block number#, each box stands for 1 GRF
66     _______________________________________________
67     |____R0_________R2_____|____R4_________R6_____|
68     |____G0_________G2_____|____G4_________G6_____|
69     |____B0_________B2_____|____B4_________B6_____|
70     |____A0_________A2_____|____A4_________A6_____|
71     |____R1_________R3_____|____R5_________R7_____|
72     |____G1_________G3_____|____G5_________G7_____|
73     |____B1_________B3_____|____B5_________B7_____|
74     |____A1_________A3_____|____A5_________A7_____|
75     */
76     matrix <float, 2, 8> mesg;
77     mesg.select<1, 1, 8, 1>(0, 0).format<uint>() = cm_get_r0<uint>();
78     mesg.select<1, 1, 8, 1>(1, 0).format<uint>() = cm_get_r0<uint>();
79     uint desc_y;
80     uchar Layer_Index_45;
81 
82 #ifdef BUFFER_0
83 #ifdef ROTATE_90
84     StartX = Start_X(0);
85     StartY = Start_Y(0);
86     DeltaX = Delta_X(0);
87     DeltaY = Delta_Y(0);
88 
89     StartX = StartX + DstY * DeltaX;
90     StartY = StartY + (Dst_Width - MDF_FC_BLOCK_WIDTH - DstX) * DeltaY;
91 
92     StartY1 = StartY;
93     StartX1 = StartX + 4 * DeltaX;
94 #elif defined ROTATE_180
95     StartX = Start_X(0);
96     StartY = Start_Y(0);
97     DeltaX = Delta_X(0);
98     DeltaY = Delta_Y(0);
99 
100     StartX += (Dst_Width - MDF_FC_BLOCK_WIDTH - DstX + MDF_FC_SAMPLER_UNORM_WIDTH) * DeltaX;
101     StartX1 = StartX - DeltaX * MDF_FC_SAMPLER_UNORM_WIDTH;
102 
103     StartY = StartY + (Dst_Height - MDF_FC_BLOCK_HEIGHT - DstY + 8) * DeltaY;
104     StartY1 = StartY;
105 #elif defined ROTATE_270
106     StartX = Start_X(0);
107     StartY = Start_Y(0);
108     DeltaX = Delta_X(0);
109     DeltaY = Delta_Y(0);
110 
111     StartX = StartX + (Dst_Height - MDF_FC_BLOCK_HEIGHT - DstY + 12) * DeltaX;
112     StartY = StartY + DstX * DeltaY;
113 
114     StartY1 = StartY;
115     StartX1 = StartX - 4 * DeltaX;
116 #else
117     StartX = Start_X(0);
118     StartY = Start_Y(0);
119     DeltaX = Delta_X(0);
120     DeltaY = Delta_Y(0);
121 
122     RotationFlag = (uchar)(RotationChromaSitingFlag & 0x07);
123     if (RotationFlag == MDF_FC_MIRROR_HORIZONTAL)
124     {
125         StartX += (Dst_Width - MDF_FC_BLOCK_WIDTH - DstX) * DeltaX;
126     }
127     else
128     {
129         StartX += DstX * DeltaX;
130     }
131 
132     StartX1 = StartX + MDF_FC_SAMPLER_UNORM_WIDTH * DeltaX;
133 
134     StartY += DstY * DeltaY;
135     StartY1 = StartY;
136 #endif
137 #endif
138 
139 #ifdef BUFFER_1
140 #ifdef ROTATE_90
141     StartX = Start_X(0);
142     StartY = Start_Y(0);
143     DeltaX = Delta_X(0);
144     DeltaY = Delta_Y(0);
145 
146     StartX = StartX + DstY * DeltaX;
147     StartY = StartY + (Dst_Width - MDF_FC_BLOCK_WIDTH - DstX) * DeltaY;
148 
149     StartY1 = StartY;
150     StartX1 = StartX + 4 * DeltaX;
151 
152 #elif defined ROTATE_180
153     StartX = Start_X(0);
154     StartY = Start_Y(0);
155     DeltaX = Delta_X(0);
156     DeltaY = Delta_Y(0);
157 
158     StartX += (Dst_Width - MDF_FC_BLOCK_WIDTH - DstX + MDF_FC_SAMPLER_UNORM_WIDTH) * DeltaX;
159     StartX1 = StartX - DeltaX * MDF_FC_SAMPLER_UNORM_WIDTH;
160 
161     StartY = StartY + (Dst_Height - MDF_FC_BLOCK_HEIGHT - DstY + 8) * DeltaY;
162     StartY1 = StartY;
163 #elif defined ROTATE_270
164     StartX = Start_X(0);
165     StartY = Start_Y(0);
166     DeltaX = Delta_X(0);
167     DeltaY = Delta_Y(0);
168 
169     StartX = StartX + (Dst_Height - MDF_FC_BLOCK_HEIGHT - DstY + 12) * DeltaX;
170     StartY = StartY + DstX * DeltaY;
171 
172     StartY1 = StartY;
173     StartX1 = StartX - 4 * DeltaX;
174 #else
175     StartX = Start_X(0);
176     StartY = Start_Y(0);
177     DeltaX = Delta_X(0);
178     DeltaY = Delta_Y(0);
179     RotationFlag = (uchar)(RotationChromaSitingFlag & 0x07);
180     if (RotationFlag == MDF_FC_MIRROR_HORIZONTAL)
181     {
182         StartX += (Dst_Width - MDF_FC_BLOCK_WIDTH - DstX) * DeltaX;
183     }
184     else
185     {
186         StartX += DstX * DeltaX;
187     }
188     //StartX += DstX * DeltaX;
189     StartY += DstY * DeltaY;
190 
191     StartX1 = StartX + DeltaX * MDF_FC_SAMPLER_UNORM_WIDTH;
192     StartY1 = StartY;
193 #endif
194 #endif
195 
196 #ifdef BUFFER_2
197 #ifdef ROTATE_90
198     StartX = Start_X(0);
199     StartY = Start_Y(0);
200     DeltaX = Delta_X(0);
201     DeltaY = Delta_Y(0);
202 
203     StartX = StartX + (8 + DstY) * DeltaX;
204     StartY = StartY + (Dst_Width - MDF_FC_BLOCK_WIDTH - DstX) * DeltaY;
205 
206     StartY1 = StartY;
207     StartX1 = StartX + 4 * DeltaX;
208 
209 #elif defined ROTATE_180
210     StartX = Start_X(0);
211     StartY = Start_Y(0);
212     DeltaX = Delta_X(0);
213     DeltaY = Delta_Y(0);
214 
215     StartX += (Dst_Width - MDF_FC_BLOCK_WIDTH - DstX + 8) * DeltaX;
216     StartX1 = StartX - DeltaX * MDF_FC_SAMPLER_UNORM_WIDTH;
217 
218     StartY = StartY + (Dst_Height - MDF_FC_BLOCK_HEIGHT - DstY) * DeltaY;
219     StartY1 = StartY;
220 #elif defined ROTATE_270
221     StartX = Start_X(0);
222     StartY = Start_Y(0);
223     DeltaX = Delta_X(0);
224     DeltaY = Delta_Y(0);
225 
226     StartX = StartX + (Dst_Height - MDF_FC_BLOCK_HEIGHT - DstY + 4) * DeltaX;
227     StartY = StartY + DstX * DeltaY;
228 
229     StartY1 = StartY;
230     StartX1 = StartX - 4 * DeltaX;
231 #else
232     StartX = Start_X(0);
233     StartY = Start_Y(0);
234     DeltaX = Delta_X(0);
235     DeltaY = Delta_Y(0);
236     RotationFlag = (uchar)(RotationChromaSitingFlag & 0x07);
237     if (RotationFlag == MDF_FC_MIRROR_HORIZONTAL)
238     {
239         StartX += (Dst_Width - MDF_FC_BLOCK_WIDTH - DstX) * DeltaX;
240     }
241     else
242     {
243         StartX += DstX * DeltaX;
244     }
245     //StartX += DstX * DeltaX;
246     StartY += (DstY + 2 * MDF_FC_SAMPLER_UNORM_HEIGHT)* DeltaY;
247     StartX1 = StartX + DeltaX * MDF_FC_SAMPLER_UNORM_WIDTH;
248     StartY1 = StartY;
249 #endif
250 #endif
251 
252 #ifdef BUFFER_3
253 #ifdef ROTATE_90
254     StartX = Start_X(0);
255     StartY = Start_Y(0);
256     DeltaX = Delta_X(0);
257     DeltaY = Delta_Y(0);
258 
259     StartX = StartX + (8 + DstY) * DeltaX;
260     StartY = StartY + (Dst_Width - MDF_FC_BLOCK_WIDTH - DstX) * DeltaY;
261 
262     StartY1 = StartY;
263     StartX1 = StartX + 4 * DeltaX;
264 
265 #elif defined ROTATE_180
266     StartX = Start_X(0);
267     StartY = Start_Y(0);
268     DeltaX = Delta_X(0);
269     DeltaY = Delta_Y(0);
270 
271     StartX += (Dst_Width - MDF_FC_BLOCK_WIDTH - DstX + 8) * DeltaX;
272     StartX1 = StartX - DeltaX * MDF_FC_SAMPLER_UNORM_WIDTH;
273 
274     StartY = StartY + (Dst_Height - MDF_FC_BLOCK_HEIGHT - DstY) * DeltaY;
275     StartY1 = StartY;
276 #elif defined ROTATE_270
277     StartX = Start_X(0);
278     StartY = Start_Y(0);
279     DeltaX = Delta_X(0);
280     DeltaY = Delta_Y(0);
281 
282     StartX = StartX + (Dst_Height - MDF_FC_BLOCK_HEIGHT - DstY + 4) * DeltaX;
283     StartY = StartY + DstX * DeltaY;
284 
285     StartY1 = StartY;
286     StartX1 = StartX - 4 * DeltaX;
287 #else
288     StartX = Start_X(0);
289     StartY = Start_Y(0);
290     DeltaX = Delta_X(0);
291     DeltaY = Delta_Y(0);
292 
293     RotationFlag = (uchar)(RotationChromaSitingFlag & 0x07);
294 
295     if (RotationFlag == MDF_FC_MIRROR_HORIZONTAL)
296     {
297         StartX += (Dst_Width - MDF_FC_BLOCK_WIDTH - DstX) * DeltaX;
298     }
299     else
300     {
301         StartX += DstX * DeltaX;
302     }
303     //StartX += DstX * DeltaX;
304     StartY += (DstY + 2 * MDF_FC_SAMPLER_UNORM_HEIGHT)* DeltaY;
305     StartX1 = StartX + DeltaX * MDF_FC_SAMPLER_UNORM_WIDTH;
306     StartY1 = StartY;
307 #endif
308 #endif
309 
310 #ifdef BUFFER_0
311 #define WriteBackBuffer DataBuffer0
312     mesg.select<1, 1, 1, 1>(1, 2) = StartX;
313     mesg.select<1, 1, 1, 1>(1, 3) = StartY;
314     mesg.select<1, 1, 1, 1>(1, 4) = DeltaX;
315     mesg.select<1, 1, 1, 1>(1, 5) = 2 * DeltaY;
316 
317 #if (defined ROTATE_180) || (defined ROTATE_270)
318     desc_y = nSMPL_UNORM_444_16BITS_MSGDSC_4CH + (MDF_FC_3D_SAMPLER_SI_Y << 8) + MDF_FC_INPUT_BTI_F2;
319 #else
320     desc_y = nSMPL_UNORM_444_16BITS_MSGDSC_4CH + (MDF_FC_3D_SAMPLER_SI_Y << 8) + MDF_FC_INPUT_BTI_START;
321 #endif
322     cm_send(WriteBackBuffer.format<ushort, 16, 16>().select<8, 1, 16, 1>(0, 0),
323         mesg.format<ushort, 2, 16>(),
324         nSMPL_ENGINE,
325         desc_y,
326         0);
327 
328     mesg.select<1, 1, 1, 1>(1, 2) = StartX1;
329     mesg.select<1, 1, 1, 1>(1, 3) = StartY1;
330     mesg.select<1, 1, 1, 1>(1, 4) = DeltaX;
331     mesg.select<1, 1, 1, 1>(1, 5) = 2 * DeltaY;
332 
333     cm_send(WriteBackBuffer.format<ushort, 16, 16>().select<8, 1, 16, 1>(8, 0),
334         mesg.format<ushort, 2, 16>(),
335         nSMPL_ENGINE,
336         desc_y,
337         0);
338 
339 #endif
340 #ifdef BUFFER_1
341 #define WriteBackBuffer DataBuffer1
342     mesg.select<1, 1, 1, 1>(1, 2) = StartX;
343     mesg.select<1, 1, 1, 1>(1, 3) = StartY;
344     mesg.select<1, 1, 1, 1>(1, 4) = DeltaX;
345     mesg.select<1, 1, 1, 1>(1, 5) = 2 * DeltaY;
346 
347 #if (defined ROTATE_180) || (defined ROTATE_270)
348     desc_y = nSMPL_UNORM_444_16BITS_MSGDSC_4CH + (MDF_FC_3D_SAMPLER_SI_Y << 8) + MDF_FC_INPUT_BTI_START;
349 #else
350     desc_y = nSMPL_UNORM_444_16BITS_MSGDSC_4CH + (MDF_FC_3D_SAMPLER_SI_Y << 8) + MDF_FC_INPUT_BTI_F2;
351 #endif
352     cm_send(WriteBackBuffer.format<ushort, 16, 16>().select<8, 1, 16, 1>(0, 0),
353         mesg.format<ushort, 2, 16>(),
354         nSMPL_ENGINE,
355         desc_y,
356         0);
357 
358     mesg.select<1, 1, 1, 1>(1, 2) = StartX1;
359     mesg.select<1, 1, 1, 1>(1, 3) = StartY1;
360     mesg.select<1, 1, 1, 1>(1, 4) = DeltaX;
361     mesg.select<1, 1, 1, 1>(1, 5) = 2 * DeltaY;
362     cm_send(WriteBackBuffer.format<ushort, 16, 16>().select<8, 1, 16, 1>(8, 0),
363         mesg.format<ushort, 2, 16>(),
364         nSMPL_ENGINE,
365         desc_y,
366         0);
367 
368 #endif
369 #ifdef BUFFER_2
370 #define WriteBackBuffer DataBuffer2
371     mesg.select<1, 1, 1, 1>(1, 2) = StartX;
372     mesg.select<1, 1, 1, 1>(1, 3) = StartY;
373     mesg.select<1, 1, 1, 1>(1, 4) = DeltaX;
374     mesg.select<1, 1, 1, 1>(1, 5) = 2 * DeltaY;
375 
376 #if (defined ROTATE_180) || (defined ROTATE_270)
377     desc_y = nSMPL_UNORM_444_16BITS_MSGDSC_4CH + (MDF_FC_3D_SAMPLER_SI_Y << 8) + MDF_FC_INPUT_BTI_F2;
378 #else
379     desc_y = nSMPL_UNORM_444_16BITS_MSGDSC_4CH + (MDF_FC_3D_SAMPLER_SI_Y << 8) + MDF_FC_INPUT_BTI_START;
380 #endif
381     cm_send(WriteBackBuffer.format<ushort, 16, 16>().select<8, 1, 16, 1>(0, 0),
382         mesg.format<ushort, 2, 16>(),
383         nSMPL_ENGINE,
384         desc_y,
385         0);
386 
387     mesg.select<1, 1, 1, 1>(1, 2) = StartX1;
388     mesg.select<1, 1, 1, 1>(1, 3) = StartY1;
389     mesg.select<1, 1, 1, 1>(1, 4) = DeltaX;
390     mesg.select<1, 1, 1, 1>(1, 5) = 2 * DeltaY;
391     cm_send(WriteBackBuffer.format<ushort, 16, 16>().select<8, 1, 16, 1>(8, 0),
392         mesg.format<ushort, 2, 16>(),
393         nSMPL_ENGINE,
394         desc_y,
395         0);
396 
397 #endif
398 #ifdef BUFFER_3
399 #define WriteBackBuffer DataBuffer3
400     mesg.select<1, 1, 1, 1>(1, 2) = StartX;
401     mesg.select<1, 1, 1, 1>(1, 3) = StartY;
402     mesg.select<1, 1, 1, 1>(1, 4) = DeltaX;
403     mesg.select<1, 1, 1, 1>(1, 5) = 2 * DeltaY;
404 
405 #if (defined ROTATE_180) || (defined ROTATE_270)
406     desc_y = nSMPL_UNORM_444_16BITS_MSGDSC_4CH + (MDF_FC_3D_SAMPLER_SI_Y << 8) + MDF_FC_INPUT_BTI_START;
407 #else
408     desc_y = nSMPL_UNORM_444_16BITS_MSGDSC_4CH + (MDF_FC_3D_SAMPLER_SI_Y << 8) + MDF_FC_INPUT_BTI_F2;
409 #endif
410     cm_send(WriteBackBuffer.format<ushort, 16, 16>().select<8, 1, 16, 1>(0, 0),
411         mesg.format<ushort, 2, 16>(),
412         nSMPL_ENGINE,
413         desc_y,
414         0);
415 
416     mesg.select<1, 1, 1, 1>(1, 2) = StartX1;
417     mesg.select<1, 1, 1, 1>(1, 3) = StartY1;
418     mesg.select<1, 1, 1, 1>(1, 4) = DeltaX;
419     mesg.select<1, 1, 1, 1>(1, 5) = 2 * DeltaY;
420     cm_send(WriteBackBuffer.format<ushort, 16, 16>().select<8, 1, 16, 1>(8, 0),
421         mesg.format<ushort, 2, 16>(),
422         nSMPL_ENGINE,
423         desc_y,
424         0);
425 
426 #endif
427 
428     // Shuffle the write back of sampler
429     /*
430     Buffer layout after shuffle
431     _________________________________________________
432     |_______Block0__________|_______Block1__________|
433     |_______Block2__________|_______Block3__________|
434     |_______Block4__________|_______Block5__________|
435     |_______Block6__________|_______Block7__________|
436 
437     Write back buffer layout correlate to the block number#, each box stands for 1 GRF
438     _______________________________________________
439     |____R0_________R1_____|____R2_________R3_____|
440     |____G0_________G1_____|____G2_________G3_____|
441     |____B0_________B1_____|____B2_________B3_____|
442     |____A0_________A1_____|____A2_________A3_____|
443     |____R4_________R5_____|____R6_________R7_____|
444     |____G4_________G5_____|____G6_________G7_____|
445     |____B4_________B5_____|____B6_________B7_____|
446     |____A4_________A5_____|____A6_________A7_____|
447     */
448 #undef WriteBackBuffer
449 }