1 /*
2 * Copyright (c) 2019, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 #include "MDF_FC_common_genx.h"
23 
Prepare_LumaKey_SampleUnorm(CURBE_INPUT_OUTPUT,GLOBAL_BUFFER_INPUT_OUTPUT)24 _GENX_MAIN_ void Prepare_LumaKey_SampleUnorm(
25     CURBE_INPUT_OUTPUT,
26     GLOBAL_BUFFER_INPUT_OUTPUT)
27 {
28     //            BYTE 3           |            BYTE 2         |          BYTE  1         |       BYTE 0
29     //   31 30 23 22 | 29 28 21 20 | 15 14 07 06 | 13 12 05 04 | 27 26 19 18 |25 24 17 16 | 11 10 03 02 | 09 08 01 00  -- Sampler returned format
30     //   31 30 29 28 | 23 22 21 20 | 27 26 25 24 | 19 18 17 16 | 15 14 13 12 |07 06 05 04 | 11 10 09 08 | 03 02 01 00  -- Intermediate Format
31     //   31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 |11 10 09 08 | 07 06 05 04 | 03 02 01 00  -- Destination format
32 
33     uchar Buffer_Index2 = (Buffer_Index >> 4) << 4;
34     uchar Mask_Index = cm_add<uchar>((Buffer_Index >> 4), -4, SAT);
35     Mask_Index = Mask_Index << 2;
36     Mask_Index = Mask_Index + ((Layer_Index & 0x80) >> 4); // Shift right Layer_Index 7 bits for sec half flag, left 3 bits for mask offset
37 
38     matrix<uchar, 1, 4> Sampler_Lumakey_Temp_Bit_01;
39     matrix<uchar, 1, 4> Sampler_Lumakey_Temp_Bit_23;
40     matrix<uchar, 1, 4> Sampler_Lumakey_Temp_Bit_45;
41     matrix<uchar, 1, 4> Sampler_Lumakey_Temp_Bit_67;
42 
43     // 1st half
44     Sampler_Lumakey_Temp_Bit_01.select<1, 1, 2, 2>(0, 0) = DataBuffer.format<uchar, 96, 32>().select<1, 1, 2, 1>(Buffer_Index2 + Channel_Offset_A_0, 0) & 0x03;
45     Sampler_Lumakey_Temp_Bit_01.select<1, 1, 2, 2>(0, 1) = (DataBuffer.format<uchar, 96, 32>().select<1, 1, 2, 1>(Buffer_Index2 + Channel_Offset_A_0, 0) & 0x0C) >> 2;
46 
47     Sampler_Lumakey_Temp_Bit_23.select<1, 1, 2, 2>(0, 0) = (DataBuffer.format<uchar, 96, 32>().select<1, 1, 2, 1>(Buffer_Index2 + Channel_Offset_A_0, 0) & 0x30) >> 2;
48     Sampler_Lumakey_Temp_Bit_23.select<1, 1, 2, 2>(0, 1) = (DataBuffer.format<uchar, 96, 32>().select<1, 1, 2, 1>(Buffer_Index2 + Channel_Offset_A_0, 0) & 0xC0) >> 4;
49 
50     Sampler_Lumakey_Temp_Bit_45.select<1, 1, 2, 2>(0, 0) = (DataBuffer.format<uchar, 96, 32>().select<1, 1, 2, 1>(Buffer_Index2 + Channel_Offset_A_0, 2) & 0x03) << 4;
51     Sampler_Lumakey_Temp_Bit_45.select<1, 1, 2, 2>(0, 1) = (DataBuffer.format<uchar, 96, 32>().select<1, 1, 2, 1>(Buffer_Index2 + Channel_Offset_A_0, 2) & 0x0C) << 2;
52 
53     Sampler_Lumakey_Temp_Bit_67.select<1, 1, 2, 2>(0, 0) = (DataBuffer.format<uchar, 96, 32>().select<1, 1, 2, 1>(Buffer_Index2 + Channel_Offset_A_0, 2) & 0x30) << 2;
54     Sampler_Lumakey_Temp_Bit_67.select<1, 1, 2, 2>(0, 1) = DataBuffer.format<uchar, 96, 32>().select<1, 1, 2, 1>(Buffer_Index2 + Channel_Offset_A_0, 2) & 0xC0;
55 
56     Sampler_Lumakey_Temp_Bit_01 = Sampler_Lumakey_Temp_Bit_01 | Sampler_Lumakey_Temp_Bit_23;
57     Sampler_Lumakey_Temp_Bit_01 = Sampler_Lumakey_Temp_Bit_01 | Sampler_Lumakey_Temp_Bit_45;
58     Sampler_Lumakey_Temp_Bit_01 = Sampler_Lumakey_Temp_Bit_01 | Sampler_Lumakey_Temp_Bit_67;
59 
60     TempMask[Mask_Index] = TempMask[Mask_Index] & Sampler_Lumakey_Temp_Bit_01.format<ushort, 1, 2>().row(0)[0];
61     TempMask[Mask_Index + 1] = TempMask[Mask_Index + 1] & Sampler_Lumakey_Temp_Bit_01.format<ushort, 1, 2>().row(0)[1];
62 
63     // 2nd half
64     Sampler_Lumakey_Temp_Bit_01.select<1, 1, 2, 2>(0, 0) = DataBuffer.format<uchar, 96, 32>().select<1, 1, 2, 1>(Buffer_Index2 + Channel_Offset_A_0 + 1, 0) & 0x03;
65     Sampler_Lumakey_Temp_Bit_01.select<1, 1, 2, 2>(0, 1) = (DataBuffer.format<uchar, 96, 32>().select<1, 1, 2, 1>(Buffer_Index2 + Channel_Offset_A_0 + 1, 0) & 0x0C) >> 2;
66 
67     Sampler_Lumakey_Temp_Bit_23.select<1, 1, 2, 2>(0, 0) = (DataBuffer.format<uchar, 96, 32>().select<1, 1, 2, 1>(Buffer_Index2 + Channel_Offset_A_0 + 1, 0) & 0x30) >> 2;
68     Sampler_Lumakey_Temp_Bit_23.select<1, 1, 2, 2>(0, 1) = (DataBuffer.format<uchar, 96, 32>().select<1, 1, 2, 1>(Buffer_Index2 + Channel_Offset_A_0 + 1, 0) & 0xC0) >> 4;
69 
70     Sampler_Lumakey_Temp_Bit_45.select<1, 1, 2, 2>(0, 0) = (DataBuffer.format<uchar, 96, 32>().select<1, 1, 2, 1>(Buffer_Index2 + Channel_Offset_A_0 + 1, 2) & 0x03) << 4;
71     Sampler_Lumakey_Temp_Bit_45.select<1, 1, 2, 2>(0, 1) = (DataBuffer.format<uchar, 96, 32>().select<1, 1, 2, 1>(Buffer_Index2 + Channel_Offset_A_0 + 1, 2) & 0x0C) << 2;
72 
73     Sampler_Lumakey_Temp_Bit_67.select<1, 1, 2, 2>(0, 0) = (DataBuffer.format<uchar, 96, 32>().select<1, 1, 2, 1>(Buffer_Index2 + Channel_Offset_A_0 + 1, 2) & 0x30) << 2;
74     Sampler_Lumakey_Temp_Bit_67.select<1, 1, 2, 2>(0, 1) = DataBuffer.format<uchar, 96, 32>().select<1, 1, 2, 1>(Buffer_Index2 + Channel_Offset_A_0 + 1, 2) & 0xC0;
75 
76     Sampler_Lumakey_Temp_Bit_01 = Sampler_Lumakey_Temp_Bit_01 | Sampler_Lumakey_Temp_Bit_23;
77     Sampler_Lumakey_Temp_Bit_01 = Sampler_Lumakey_Temp_Bit_01 | Sampler_Lumakey_Temp_Bit_45;
78     Sampler_Lumakey_Temp_Bit_01 = Sampler_Lumakey_Temp_Bit_01 | Sampler_Lumakey_Temp_Bit_67;
79 
80     TempMask[Mask_Index + 2] = TempMask[Mask_Index + 2] & Sampler_Lumakey_Temp_Bit_01.format<ushort, 1, 2>().row(0)[0];
81     TempMask[Mask_Index + 3] = TempMask[Mask_Index + 3] & Sampler_Lumakey_Temp_Bit_01.format<ushort, 1, 2>().row(0)[1];
82 }