1 /*
2 * Copyright (c) 2019, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 {
23     uchar Buffer_Index1 = (Buffer_Index & 0x0f) << 4;
24     uchar Buffer_Index2 = (Buffer_Index >> 4) << 4;
25     /*
26     Buffer layout after shuffle
27     _________________________________________________
28     |_______Block0__________|_______Block1__________|
29     |_______Block2__________|_______Block3__________|
30     |_______Block4__________|_______Block5__________|
31     |_______Block6__________|_______Block7__________|
32 
33     Write back buffer layout correlate to the block number#, each box stands for 1 GRF
34     _______________________________________________
35     |____R0_________R1_____|____R2_________R3_____|
36     |____G0_________G1_____|____G2_________G3_____|
37     |____B0_________B1_____|____B2_________B3_____|
38     |____A0_________A1_____|____A2_________A3_____|
39     |____R4_________R5_____|____R6_________R7_____|
40     |____G4_________G5_____|____G6_________G7_____|
41     |____B4_________B5_____|____B6_________B7_____|
42     |____A4_________A5_____|____A6_________A7_____|
43     */
44     {
45         matrix<ushort, 1, 16> Temp;
46         if (TempMask[CalculationMask])
47         {
48             // R1/G1/B1/A1
49             matrix<ushort, 1, 16> Alpha = DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index2 + 6, 0);
50 
51             Alpha = cm_add<ushort>(Alpha, -Alpha >> 4, SAT);
52 
53             {
54                 Temp = (((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_RV_0, 0) * (cm_add<ushort>(0xFF00, -Alpha, SAT)))) + ((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index2 + Channel_Offset_RV_0, 0) * (cm_add<ushort>(Alpha, 256, SAT))))) >> 16;
55                 DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_RV_0, 0).merge(Temp, DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_RV_0, 0), TempMask[CalculationMask]);
56             }
57 
58             {
59                 Temp = (((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_GY_0, 0) * (cm_add<ushort>(0xFF00, -Alpha, SAT)))) + ((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index2 + Channel_Offset_GY_0, 0) * (cm_add<ushort>(Alpha, 256, SAT))))) >> 16;
60                 DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_GY_0, 0).merge(Temp, DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_GY_0, 0), TempMask[CalculationMask]);
61             }
62 
63             {
64                 Temp = (((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_BU_0, 0) * (cm_add<ushort>(0xFF00, -Alpha, SAT)))) + ((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index2 + Channel_Offset_BU_0, 0) * (cm_add<ushort>(Alpha, 256, SAT))))) >> 16;
65                 DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_BU_0, 0).merge(Temp, DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_BU_0, 0), TempMask[CalculationMask]);
66             }
67         }
68 
69         if (TempMask[CalculationMask + 1])
70         {
71             // R2/G2/B2/A2
72             matrix<ushort, 1, 16> Alpha = DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index2 + 7, 0);
73 
74             Alpha = cm_add<ushort>(Alpha, -Alpha >> 4, SAT);
75 
76             {
77                 Temp = (((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_RV_1, 0) * (cm_add<ushort>(0xFF00, -Alpha, SAT)))) + ((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index2 + Channel_Offset_RV_1, 0) * (cm_add<ushort>(Alpha, 256, SAT))))) >> 16;
78                 DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_RV_1, 0).merge(Temp, DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_RV_1, 0), TempMask[CalculationMask + 1]);
79             }
80 
81             {
82                 Temp = (((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_GY_1, 0) * (cm_add<ushort>(0xFF00, -Alpha, SAT)))) + ((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index2 + Channel_Offset_GY_1, 0) * (cm_add<ushort>(Alpha, 256, SAT))))) >> 16;
83                 DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_GY_1, 0).merge(Temp, DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_GY_1, 0), TempMask[CalculationMask + 1]);
84             }
85 
86             {
87                 Temp = (((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_BU_1, 0) * (cm_add<ushort>(0xFF00, -Alpha, SAT)))) + ((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index2 + Channel_Offset_BU_1, 0) * (cm_add<ushort>(Alpha, 256, SAT))))) >> 16;
88                 DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_BU_1, 0).merge(Temp, DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_BU_1, 0), TempMask[CalculationMask + 1]);
89             }
90         }
91 
92         if (TempMask[CalculationMask + 2])
93         {
94             // R3/G3/B3/A3
95             matrix<ushort, 1, 16> Alpha = DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index2 + 14, 0);
96 
97             Alpha = cm_add<ushort>(Alpha, -Alpha >> 4, SAT);
98 
99             {
100                 Temp = (((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_RV_2, 0) * (cm_add<ushort>(0xFF00, -Alpha, SAT)))) + ((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index2 + Channel_Offset_RV_2, 0) * (cm_add<ushort>(Alpha, 256, SAT))))) >> 16;
101                 DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_RV_2, 0).merge(Temp, DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_RV_2, 0), TempMask[CalculationMask + 2]);
102             }
103 
104             {
105                 Temp = (((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_GY_2, 0) * (cm_add<ushort>(0xFF00, -Alpha, SAT)))) + ((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index2 + Channel_Offset_GY_2, 0) * (cm_add<ushort>(Alpha, 256, SAT))))) >> 16;
106                 DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_GY_2, 0).merge(Temp, DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_GY_2, 0), TempMask[CalculationMask + 2]);
107             }
108 
109             {
110                 Temp = (((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_BU_2, 0) * (cm_add<ushort>(0xFF00, -Alpha, SAT)))) + ((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index2 + Channel_Offset_BU_2, 0) * (cm_add<ushort>(Alpha, 256, SAT))))) >> 16;
111                 DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_BU_2, 0).merge(Temp, DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_BU_2, 0), TempMask[CalculationMask + 2]);
112             }
113         }
114 
115         if (TempMask[CalculationMask + 3])
116         {
117             // R4/G4/B4/A4
118             matrix<ushort, 1, 16> Alpha = DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index2 + 15, 0);
119 
120             Alpha = cm_add<ushort>(Alpha, -Alpha >> 4, SAT);
121 
122             {
123                 Temp = (((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_RV_3, 0) * (cm_add<ushort>(0xFF00, -Alpha, SAT)))) + ((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index2 + Channel_Offset_RV_3, 0) * (cm_add<ushort>(Alpha, 256, SAT))))) >> 16;
124                 DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_RV_3, 0).merge(Temp, DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_RV_3, 0), TempMask[CalculationMask + 3]);
125             }
126 
127             {
128                 Temp = (((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_GY_3, 0) * (cm_add<ushort>(0xFF00, -Alpha, SAT)))) + ((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index2 + Channel_Offset_GY_3, 0) * (cm_add<ushort>(Alpha, 256, SAT))))) >> 16;
129                 DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_GY_3, 0).merge(Temp, DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_GY_3, 0), TempMask[CalculationMask + 3]);
130             }
131 
132             {
133                 Temp = (((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_BU_3, 0) * (cm_add<ushort>(0xFF00, -Alpha, SAT)))) + ((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index2 + Channel_Offset_BU_3, 0) * (cm_add<ushort>(Alpha, 256, SAT))))) >> 16;
134                 DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_BU_3, 0).merge(Temp, DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_BU_3, 0), TempMask[CalculationMask + 3]);
135             }
136         }
137     }
138 
139 }