1 /*
2 * Copyright (c) 2019, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 #if (defined BUFFER_0) || (defined BUFFER_1) || (defined BUFFER_2) || (defined BUFFER_3)
23 vector <ushort, 16> LoadMaskTemp = (TempMask0.select<1, 1, 16, 1>(0, 0) != 0);
24 #elif (defined BUFFER_4) || (defined BUFFER_5)
25 uchar sec_half_shift = (Layer_Index >> 7) * 8;
26 vector <ushort, 16> LoadMaskTemp = (TempMask.select<16, 1>(0) != 0);
27 #endif
28 
29 #if defined BUFFER_0
30 Buffer_Index = 0;
31 #elif defined BUFFER_1
32 Buffer_Index = 1;
33 #elif defined BUFFER_2
34 Buffer_Index = 2;
35 #elif defined BUFFER_3
36 Buffer_Index = 3;
37 #elif defined BUFFER_4
38 Buffer_Index = 4;
39 #elif defined BUFFER_5
40 Buffer_Index = 5;
41 #endif
42 
43 ushort LoadMask = cm_pack_mask(LoadMaskTemp);
44 CalculationMask = LoadMask == 0 ? 0x00 : 0xFF;
45 
46 if (CalculationMask != 0)
47 {
48     float StartX;
49     float StartY;
50     float DeltaX;
51     float DeltaY;
52     /*
53     AVS Sampler 16x4 write back buffer layout for R/V, G/Y, B/U channel, each box stands for 8x1 ushort write back pixel
54     16x4 pixle deviding to 8 8x1 pixel block
55     _________________________________________________
56     |_______Block0__________|_______Block1__________|
57     |_______Block2__________|_______Block3__________|
58     |_______Block4__________|_______Block5__________|
59     |_______Block6__________|_______Block7__________|
60 
61     Write back buffer layout correlate to the block number#, each box stands for 1 GRF
62     _______________________________________________
63     |____R0_________R2_____|____R4_________R6_____|
64     |____G0_________G2_____|____G4_________G6_____|
65     |____B0_________B2_____|____B4_________B6_____|
66     |____A0_________A2_____|____A4_________A6_____|
67     |____R1_________R3_____|____R5_________R7_____|
68     |____G1_________G3_____|____G5_________G7_____|
69     |____B1_________B3_____|____B5_________B7_____|
70     |____A1_________A3_____|____A5_________A7_____|
71     */
72     matrix <float, 5, 8> mesg;
73     matrix <float, 2, 16> temp_rg;
74 
75     mesg.select<1, 1, 8, 1>(0, 0).format<uint>() = cm_get_r0<uint>();
76     uint desc_y;
77     uchar Layer_Index_45;
78 
79 #if (defined BUFFER_0) || (defined BUFFER_1)
80 #define INIT_INCREMENT 0.0f
81 #elif (defined BUFFER_2) || (defined BUFFER_3)
82 #define INIT_INCREMENT 4.0f
83 #endif
84 
85 #if (defined BUFFER_0) || (defined BUFFER_1) || (defined BUFFER_2) || (defined BUFFER_3)
86     StartX = Start_X(0);
87     StartY = Start_Y(0);
88     DeltaX = Delta_X(0);
89     DeltaY = Delta_Y(0);
90 
91     StartX += DstX * DeltaX;
92     StartY += DstY * DeltaY;
93 
94     vector <float, 16> IncrementX(Inc);
95     vector <float, 16> IncrementY(INIT_INCREMENT);
96 #endif
97 
98 #ifdef BUFFER_0
99 #define WriteBackBuffer DataBuffer0
100 #endif
101 #ifdef BUFFER_1
102 #define WriteBackBuffer DataBuffer1
103 #endif
104 #ifdef BUFFER_2
105 #define WriteBackBuffer DataBuffer2
106 #endif
107 #ifdef BUFFER_3
108 #define WriteBackBuffer DataBuffer3
109 #endif
110 
111     mesg.format<uint, 5, 8>().select<1, 1, 8, 1>(0, 0) = cm_get_r0<uint>();
112     mesg.format<uint, 5, 8>().select<1, 1, 1, 1>(0, 2) = 0x0;
113 
114 #pragma unroll
115     for (short j = 0; j < 4; j++)
116     {
117         // Y channel
118         mesg.format<uint, 5, 8>().select<1, 1, 1, 1>(0, 2) = nSMPL_RED_CHANNEL_444_16;
119 
120         mesg.select<1, 1, 8, 1>(1, 0) = StartX + IncrementX.select<8, 1>(0) * DeltaX;
121         mesg.select<1, 1, 8, 1>(2, 0) = StartX + IncrementX.select<8, 1>(8) * DeltaX;
122 
123         mesg.select<1, 1, 8, 1>(3, 0) = StartY + IncrementY.select<8, 1>(0) * 2 * DeltaY;
124         mesg.select<1, 1, 8, 1>(4, 0) = StartY + IncrementY.select<8, 1>(8) * 2 * DeltaY;
125 
126 #if (defined BUFFER_0) || (defined BUFFER_2)
127         desc_y = nSIMD16_0X_034X_MSG_DSC_1CH + (MDF_FC_3D_SAMPLER_SI_Y << 8) + MDF_FC_INPUT_BTI_START;
128 #elif (defined BUFFER_1) || (defined BUFFER_3)
129         desc_y = nSIMD16_0X_034X_MSG_DSC_1CH + (MDF_FC_3D_SAMPLER_SI_Y << 8) + MDF_FC_INPUT_BTI_F2;
130 #endif
131 
132         cm_send(
133             temp_rg.select<1, 1, 16, 1>(0, 0),
134             mesg.format<ushort, 5, 16>(),
135             nSMPL_ENGINE,
136             desc_y,
137             0);
138 
139         vector<short, 4> row_offset(Row_Offset);
140         vector<short, 4> row_offset1(Row_offset1);
141         vector<short, 4> colonm_offset(Colomn_Offset);
142 
143         // Y
144         WriteBackBuffer.format<ushort, 16, 16>().select<1, 1, 8, 1>(row_offset[j] + 2, Colomn_Offset[j]) = matrix<ushort, 1, 8>(temp_rg.select<1, 1, 8, 1>(0, 0) * MDF_FC_NORMALIZE_FACTOR, SAT);
145         WriteBackBuffer.format<ushort, 16, 16>().select<1, 1, 8, 1>(row_offset1[j] + 2, Colomn_Offset[j]) = matrix<ushort, 1, 8>(temp_rg.select<1, 1, 8, 1>(0, 8) * MDF_FC_NORMALIZE_FACTOR, SAT);
146 
147         // UV channel
148         mesg.format<uint, 5, 8>().select<1, 1, 1, 1>(0, 2) = nSMPL_RED_GREEN_CHANNEL_444_16;
149 
150 #if (defined BUFFER_0) || (defined BUFFER_2)
151         desc_y = nSIMD16_0X_034X_MSG_DSC_2CH + (MDF_FC_3D_SAMPLER_SI_U << 8) + MDF_FC_INPUT_BTI_START + MDF_FC_UV_PLANE_BTI_OFFSET;
152 #elif (defined BUFFER_1) || (defined BUFFER_3)
153         desc_y = nSIMD16_0X_034X_MSG_DSC_2CH + (MDF_FC_3D_SAMPLER_SI_U << 8) + MDF_FC_INPUT_BTI_F2 + MDF_FC_UV_PLANE_BTI_OFFSET;
154 #endif
155         cm_send(temp_rg,
156             mesg.format<ushort, 5, 16>(),
157             nSMPL_ENGINE,
158             desc_y,
159             0);
160 
161         // R
162         WriteBackBuffer.format<ushort, 16, 16>().select<1, 1, 8, 1>(row_offset[j], Colomn_Offset[j]) = matrix<ushort, 1, 8>(temp_rg.select<1, 1, 8, 1>(1, 0) * MDF_FC_NORMALIZE_FACTOR, SAT);
163         WriteBackBuffer.format<ushort, 16, 16>().select<1, 1, 8, 1>(row_offset1[j], Colomn_Offset[j]) = matrix<ushort, 1, 8>(temp_rg.select<1, 1, 8, 1>(1, 8) * MDF_FC_NORMALIZE_FACTOR, SAT);
164         // B
165         WriteBackBuffer.format<ushort, 16, 16>().select<1, 1, 8, 1>(row_offset[j] + 4, Colomn_Offset[j]) = matrix<ushort, 1, 8>(temp_rg.select<1, 1, 8, 1>(0, 0) * MDF_FC_NORMALIZE_FACTOR, SAT);
166         WriteBackBuffer.format<ushort, 16, 16>().select<1, 1, 8, 1>(row_offset1[j] + 4, Colomn_Offset[j]) = matrix<ushort, 1, 8>(temp_rg.select<1, 1, 8, 1>(0, 8) * MDF_FC_NORMALIZE_FACTOR, SAT);
167 
168         IncrementY = IncrementY + 1.0f;
169     }
170 
171     WriteBackBuffer.format<ushort, 16, 16>().select<2, 1, 16, 1>(6, 0) = 0xffff;
172     WriteBackBuffer.format<ushort, 16, 16>().select<2, 1, 16, 1>(14, 0) = 0xffff;
173 
174 #undef WriteBackBuffer
175 }