1 /*
2 * Copyright (c) 2019, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 {
23     matrix<ushort, 1, 16> Temp;
24     ushort Alpha = (ushort)((ColorFill(3)) << 8);
25     Alpha = cm_add<ushort>(Alpha, 256, SAT);
26     ushort Alpha1  = cm_add<ushort>(0xff00, - Alpha, SAT);
27 
28 #pragma unroll
29     for (int i = 0; i < 4; i++)
30     {
31         /*
32         Buffer layout after shuffle
33         _________________________________________________
34         |_______Block0__________|_______Block1__________|
35         |_______Block2__________|_______Block3__________|
36         |_______Block4__________|_______Block5__________|
37         |_______Block6__________|_______Block7__________|
38 
39         Write back buffer layout correlate to the block number#, each box stands for 1 GRF
40         _______________________________________________
41         |____R0_________R1_____|____R2_________R3_____|
42         |____G0_________G1_____|____G2_________G3_____|
43         |____B0_________B1_____|____B2_________B3_____|
44         |____A0_________A1_____|____A2_________A3_____|
45         |____R4_________R5_____|____R6_________R7_____|
46         |____G4_________G5_____|____G6_________G7_____|
47         |____B4_________B5_____|____B6_________B7_____|
48         |____A4_________A5_____|____A6_________A7_____|
49         */
50 
51         // R0/G0/B0/A0
52         Temp = (((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(16 * i + Channel_Offset_RV_0, 0) * Alpha)) + (((ColorFill(0) << 8 )* Alpha1))) >> 16;
53         DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(16 * i + Channel_Offset_RV_0, 0).merge(Temp, (ColorFill(0) << 8), TempMask0[0][4 * i]);
54         Temp = (((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(16 * i + Channel_Offset_GY_0, 0) * Alpha)) + (((ColorFill(1) << 8)* Alpha1))) >> 16;
55         DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(16 * i + Channel_Offset_GY_0, 0).merge(Temp, (ColorFill(1) << 8), TempMask0[0][4 * i]);
56         Temp = (((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(16 * i + Channel_Offset_BU_0, 0) * Alpha)) + (((ColorFill(2) << 8)* Alpha1))) >> 16;
57         DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(16 * i + Channel_Offset_BU_0, 0).merge(Temp, (ColorFill(2) << 8), TempMask0[0][4 * i]);
58         Temp = (((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(16 * i + Channel_Offset_A_0, 0) * Alpha)) + (((ColorFill(3) << 8)* Alpha1))) >> 16;
59         DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(16 * i + Channel_Offset_A_0, 0).merge(Temp, (ColorFill(3) << 8), TempMask0[0][4 * i]);
60 
61         // R1/G1/B1/A1
62         Temp = (((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(16 * i + Channel_Offset_RV_1, 0) * Alpha)) + (((ColorFill(0) << 8)* Alpha1))) >> 16;
63         DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(16 * i + Channel_Offset_RV_1, 0).merge(Temp, (ColorFill(0) << 8), TempMask0[0][4 * i + 1]);
64         Temp = (((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(16 * i + Channel_Offset_GY_1, 0) * Alpha)) + (((ColorFill(1) << 8)* Alpha1))) >> 16;
65         DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(16 * i + Channel_Offset_GY_1, 0).merge(Temp, (ColorFill(1) << 8), TempMask0[0][4 * i + 1]);
66         Temp = (((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(16 * i + Channel_Offset_BU_1, 0) * Alpha)) + (((ColorFill(2) << 8)* Alpha1))) >> 16;
67         DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(16 * i + Channel_Offset_BU_1, 0).merge(Temp, (ColorFill(2) << 8), TempMask0[0][4 * i + 1]);
68         Temp = (((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(16 * i + Channel_Offset_A_1, 0) * Alpha)) + (((ColorFill(3) << 8)* Alpha1))) >> 16;
69         DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(16 * i + Channel_Offset_A_1, 0).merge(Temp, (ColorFill(3) << 8), TempMask0[0][4 * i + 1]);
70 
71         // R2/G2/B2/A2
72         Temp = (((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(16 * i + Channel_Offset_RV_2, 0) * Alpha)) + (((ColorFill(0) << 8)* Alpha1))) >> 16;
73         DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(16 * i + Channel_Offset_RV_2, 0).merge(Temp, (ColorFill(0) << 8), TempMask0[0][4 * i + 2]);
74         Temp = (((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(16 * i + Channel_Offset_GY_2, 0) * Alpha)) + (((ColorFill(1) << 8)* Alpha1))) >> 16;
75         DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(16 * i + Channel_Offset_GY_2, 0).merge(Temp, (ColorFill(1) << 8), TempMask0[0][4 * i + 2]);
76         Temp = (((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(16 * i + Channel_Offset_BU_2, 0) * Alpha)) + (((ColorFill(2) << 8)* Alpha1))) >> 16;
77         DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(16 * i + Channel_Offset_BU_2, 0).merge(Temp, (ColorFill(2) << 8), TempMask0[0][4 * i + 2]);
78         Temp = (((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(16 * i + Channel_Offset_A_2, 0) * Alpha)) + (((ColorFill(3) << 8)* Alpha1))) >> 16;
79         DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(16 * i + Channel_Offset_A_2, 0).merge(Temp, (ColorFill(3) << 8), TempMask0[0][4 * i + 2]);
80 
81         // R3/G3/B3/A3
82         Temp = (((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(16 * i + Channel_Offset_RV_3, 0) * Alpha)) + (((ColorFill(0) << 8)* Alpha1))) >> 16;
83         DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(16 * i + Channel_Offset_RV_3, 0).merge(Temp, (ColorFill(0) << 8), TempMask0[0][4 * i + 3]);
84         Temp = (((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(16 * i + Channel_Offset_GY_3, 0) * Alpha)) + (((ColorFill(1) << 8)* Alpha1))) >> 16;
85         DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(16 * i + Channel_Offset_GY_3, 0).merge(Temp, (ColorFill(1) << 8), TempMask0[0][4 * i + 3]);
86         Temp = (((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(16 * i + Channel_Offset_BU_3, 0) * Alpha)) + (((ColorFill(2) << 8)* Alpha1))) >> 16;
87         DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(16 * i + Channel_Offset_BU_3, 0).merge(Temp, (ColorFill(2) << 8), TempMask0[0][4 * i + 3]);
88         Temp = (((DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(16 * i + Channel_Offset_A_3, 0) * Alpha)) + (((ColorFill(3) << 8)* Alpha1))) >> 16;
89         DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(16 * i + Channel_Offset_A_3, 0).merge(Temp, (ColorFill(3) << 8), TempMask0[0][4 * i + 3]);
90     }
91 
92     TempMask0 = 0xFFFFFFFF;
93 }