1 /*
2 * Copyright (c) 2019, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 #include "AlphaSrcBlendG.h"
23
AlphaSrcBlendG(CURBE_INPUT_OUTPUT,GLOBAL_BUFFER_INPUT_OUTPUT)24 _GENX_MAIN_ _CM_CALLABLE_ void AlphaSrcBlendG(
25 CURBE_INPUT_OUTPUT,
26 GLOBAL_BUFFER_INPUT_OUTPUT)
27 {
28 uchar Buffer_Index1 = (Buffer_Index & 0x0f) << 4;
29 uchar Buffer_Index2 = (Buffer_Index >> 4) << 4;
30
31 uchar ConstAlpha = (ConstantBlendingAlpha((Layer_Index & 0x7f) - 1));
32 ushort Alpha1 = cm_add<ushort>(ConstAlpha << 8, 256, SAT);
33
34 /*
35 Buffer layout after shuffle
36 _________________________________________________
37 |_______Block0__________|_______Block1__________|
38 |_______Block2__________|_______Block3__________|
39 |_______Block4__________|_______Block5__________|
40 |_______Block6__________|_______Block7__________|
41
42 Write back buffer layout correlate to the block number#, each box stands for 1 GRF
43 _______________________________________________
44 |____R0_________R1_____|____R2_________R3_____|
45 |____G0_________G1_____|____G2_________G3_____|
46 |____B0_________B1_____|____B2_________B3_____|
47 |____A0_________A1_____|____A2_________A3_____|
48 |____R4_________R5_____|____R6_________R7_____|
49 |____G4_________G5_____|____G6_________G7_____|
50 |____B4_________B5_____|____B6_________B7_____|
51 |____A4_________A5_____|____A6_________A7_____|
52 */
53 {
54 matrix<uint, 1, 16> Temp;
55
56 if (TempMask[CalculationMask])
57 {
58 // R1/G1/B1/A1
59 matrix<ushort, 1, 16> Alpha = DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index2 + 6, 0);
60 Alpha = (Alpha * (cm_add<uchar>(ConstAlpha, 1, SAT))) >> 8;
61
62 {
63 Temp = (((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_RV_0, 0) * (cm_add<ushort>(0xFF00, -Alpha, SAT)))) + ((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index2 + Channel_Offset_RV_0, 0) * Alpha1))) >> 16;
64 DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_RV_0, 0).merge(Temp.format<ushort, 1, 32>().select<1, 1, 16, 2>(0, 1), DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_RV_0, 0), TempMask[CalculationMask]);
65 }
66
67 {
68 Temp = (((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_GY_0, 0) * (cm_add<ushort>(0xFF00, -Alpha, SAT)))) + ((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index2 + Channel_Offset_GY_0, 0) * Alpha1))) >> 16;
69 DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_GY_0, 0).merge(Temp.format<ushort, 1, 32>().select<1, 1, 16, 2>(0, 1), DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_GY_0, 0), TempMask[CalculationMask]);
70 }
71
72 {
73 Temp = (((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_BU_0, 0) * (cm_add<ushort>(0xFF00, -Alpha, SAT)))) + ((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index2 + Channel_Offset_BU_0, 0) * Alpha1))) >> 16;
74 DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_BU_0, 0).merge(Temp.format<ushort, 1, 32>().select<1, 1, 16, 2>(0, 1), DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_BU_0, 0), TempMask[CalculationMask]);
75 }
76
77 {
78 Temp = (((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_A_0, 0) * (cm_add<ushort>(0xFF00, -Alpha, SAT)))) + ((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index2 + Channel_Offset_A_0, 0) * Alpha1))) >> 16;
79 DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_A_0, 0).merge(Temp.format<ushort, 1, 32>().select<1, 1, 16, 2>(0, 1), DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_A_0, 0), TempMask[CalculationMask]);
80 }
81 }
82
83 if (TempMask[CalculationMask + 1])
84 {
85 // R2/G2/B2/A2
86 matrix<ushort, 1, 16> Alpha = DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index2 + 7, 0);
87 Alpha = (Alpha * (cm_add<uchar>(ConstAlpha, 1, SAT))) >> 8;
88
89 {
90 Temp = (((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_RV_1, 0) * (cm_add<ushort>(0xFF00, -Alpha, SAT)))) + ((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index2 + Channel_Offset_RV_1, 0) * Alpha1))) >> 16;
91 DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_RV_1, 0).merge(Temp.format<ushort, 1, 32>().select<1, 1, 16, 2>(0, 1), DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_RV_1, 0), TempMask[CalculationMask + 1]);
92 }
93
94 {
95 Temp = (((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_GY_1, 0) * (cm_add<ushort>(0xFF00, -Alpha, SAT)))) + ((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index2 + Channel_Offset_GY_1, 0) * Alpha1))) >> 16;
96 DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_GY_1, 0).merge(Temp.format<ushort, 1, 32>().select<1, 1, 16, 2>(0, 1), DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_GY_1, 0), TempMask[CalculationMask + 1]);
97 }
98
99 {
100 Temp = (((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_BU_1, 0) * (cm_add<ushort>(0xFF00, -Alpha, SAT)))) + ((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index2 + Channel_Offset_BU_1, 0) * Alpha1))) >> 16;
101 DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_BU_1, 0).merge(Temp.format<ushort, 1, 32>().select<1, 1, 16, 2>(0, 1), DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_BU_1, 0), TempMask[CalculationMask + 1]);
102 }
103
104 {
105 Temp = (((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_A_1, 0) * (cm_add<ushort>(0xFF00, -Alpha, SAT)))) + ((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index2 + Channel_Offset_A_1, 0) * Alpha1))) >> 16;
106 DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_A_1, 0).merge(Temp.format<ushort, 1, 32>().select<1, 1, 16, 2>(0, 1), DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_A_1, 0), TempMask[CalculationMask + 1]);
107 }
108 }
109
110 if (TempMask[CalculationMask + 2])
111 {
112 // R3/G3/B3/A3
113 matrix<ushort, 1, 16> Alpha = DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index2 + 14, 0);
114 Alpha = (Alpha * (cm_add<uchar>(ConstAlpha, 1, SAT))) >> 8;
115
116 {
117 Temp = (((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_RV_2, 0) * (cm_add<ushort>(0xFF00, -Alpha, SAT)))) + ((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index2 + Channel_Offset_RV_2, 0) * Alpha1))) >> 16;
118 DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_RV_2, 0).merge(Temp.format<ushort, 1, 32>().select<1, 1, 16, 2>(0, 1), DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_RV_2, 0), TempMask[CalculationMask + 2]);
119 }
120
121 {
122 Temp = (((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_GY_2, 0) * (cm_add<ushort>(0xFF00, -Alpha, SAT)))) + ((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index2 + Channel_Offset_GY_2, 0) * Alpha1))) >> 16;
123 DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_GY_2, 0).merge(Temp.format<ushort, 1, 32>().select<1, 1, 16, 2>(0, 1), DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_GY_2, 0), TempMask[CalculationMask + 2]);
124 }
125
126 {
127 Temp = (((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_BU_2, 0) * (cm_add<ushort>(0xFF00, -Alpha, SAT)))) + ((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index2 + Channel_Offset_BU_2, 0) * Alpha1))) >> 16;
128 DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_BU_2, 0).merge(Temp.format<ushort, 1, 32>().select<1, 1, 16, 2>(0, 1), DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_BU_2, 0), TempMask[CalculationMask + 2]);
129 }
130
131 {
132 Temp = (((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_A_2, 0) * (cm_add<ushort>(0xFF00, -Alpha, SAT)))) + ((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index2 + Channel_Offset_A_2, 0) * Alpha1))) >> 16;
133 DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_A_2, 0).merge(Temp.format<ushort, 1, 32>().select<1, 1, 16, 2>(0, 1), DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_A_2, 0), TempMask[CalculationMask + 2]);
134 }
135 }
136
137 if (TempMask[CalculationMask + 3])
138 {
139 // R4/G4/B4/A4
140 matrix<ushort, 1, 16> Alpha = DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index2 + 15, 0);
141 Alpha = (Alpha * (cm_add<uchar>(ConstAlpha, 1, SAT))) >> 8;
142
143 {
144 Temp = (((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_RV_3, 0) * (cm_add<ushort>(0xFF00, -Alpha, SAT)))) + ((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index2 + Channel_Offset_RV_3, 0) * Alpha1))) >> 16;
145 DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_RV_3, 0).merge(Temp.format<ushort, 1, 32>().select<1, 1, 16, 2>(0, 1), DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_RV_3, 0), TempMask[CalculationMask + 3]);
146 }
147
148 {
149 Temp = (((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_GY_3, 0) * (cm_add<ushort>(0xFF00, -Alpha, SAT)))) + ((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index2 + Channel_Offset_GY_3, 0) * Alpha1))) >> 16;
150 DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_GY_3, 0).merge(Temp.format<ushort, 1, 32>().select<1, 1, 16, 2>(0, 1), DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_GY_3, 0), TempMask[CalculationMask + 3]);
151 }
152
153 {
154 Temp = (((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_BU_3, 0) * (cm_add<ushort>(0xFF00, -Alpha, SAT)))) + ((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index2 + Channel_Offset_BU_3, 0) * Alpha1))) >> 16;
155 DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_BU_3, 0).merge(Temp.format<ushort, 1, 32>().select<1, 1, 16, 2>(0, 1), DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_BU_3, 0), TempMask[CalculationMask + 3]);
156 }
157
158 {
159 Temp = (((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_A_3, 0) * (cm_add<ushort>(0xFF00, -Alpha, SAT)))) + ((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index2 + Channel_Offset_A_3, 0) * Alpha1))) >> 16;
160 DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_A_3, 0).merge(Temp.format<ushort, 1, 32>().select<1, 1, 16, 2>(0, 1), DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(Buffer_Index1 + Channel_Offset_A_3, 0), TempMask[CalculationMask + 3]);
161 }
162 }
163 }
164 }