1 /*
2 * Copyright (c) 2019, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 
23 {
24     /*
25     Buffer layout after shuffle
26     _________________________________________________
27     |_______Block0__________|_______Block1__________|
28     |_______Block2__________|_______Block3__________|
29     |_______Block4__________|_______Block5__________|
30     |_______Block6__________|_______Block7__________|
31 
32     Write back buffer layout correlate to the block number#, each box stands for 1 GRF
33     _______________________________________________
34     |____R0_________R1_____|____R2_________R3_____|
35     |____G0_________G1_____|____G2_________G3_____|
36     |____B0_________B1_____|____B2_________B3_____|
37     |____A0_________A1_____|____A2_________A3_____|
38     |____R4_________R5_____|____R6_________R7_____|
39     |____G4_________G5_____|____G6_________G7_____|
40     |____B4_________B5_____|____B6_________B7_____|
41     |____A4_________A5_____|____A6_________A7_____|
42     */
43 
44     matrix_ref<uchar, 16, 16> Result_Y    = DataBuffer.format<uchar, 192, 16>().select<16, 1, 16, 1>(128, 0);
45     matrix_ref<uchar, 8, 16> Result_UV    = DataBuffer.format<uchar, 192, 16>().select<8, 1, 16, 1>(160, 0);
46     matrix_ref<ushort, 2, 16> Result_Temp_V = DataBuffer.format<ushort, 96, 16>().select<2, 1, 16, 1>(88, 0);
47     matrix_ref<ushort, 2, 16> Result_Temp_U = DataBuffer.format<ushort, 96, 16>().select<2, 1, 16, 1>(90, 0);
48 
49     SurfaceIndex Dst_Surface_Y(MDF_FC_OUTPUT_BTI_START);
50     SurfaceIndex Dst_Surface_UV(MDF_FC_OUTPUT_BTI_START + MDF_FC_UV_PLANE_BTI_OFFSET);
51 
52 #pragma unroll
53     // Rounding Y plane
54     for (uchar i = 0; i < 4; i++)
55     {
56         DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(16 * i + 2, 0) = cm_add<ushort>(DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(16 * i + 2, 0), 0x80, SAT);
57         DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(16 * i + 3, 0) = cm_add<ushort>(DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(16 * i + 3, 0), 0x80, SAT);
58         DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(16 * i + 10, 0) = cm_add<ushort>(DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(16 * i + 10, 0), 0x80, SAT);
59         DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(16 * i + 11, 0) = cm_add<ushort>(DataBuffer.format<ushort, 96, 16>().select<1, 1, 16, 1>(16 * i + 11, 0), 0x80, SAT);
60     }
61 
62 #pragma unroll
63     // Write Y plane
64     for (uchar i = 0; i < 4; i++)
65     {
66         Result_Y.select<1, 1, 8, 1>(4 * i, 0)     = DataBuffer.format<uchar, 96, 32>().select<1, 1, 8, 2>(16 * i + 2, 1);
67         Result_Y.select<1, 1, 8, 1>(4 * i, 8)     = DataBuffer.format<uchar, 96, 32>().select<1, 1, 8, 2>(16 * i + 10, 1);
68         Result_Y.select<1, 1, 8, 1>(4 * i + 1, 0) = DataBuffer.format<uchar, 96, 32>().select<1, 1, 8, 2>(16 * i + 2, 17);
69         Result_Y.select<1, 1, 8, 1>(4 * i + 1, 8) = DataBuffer.format<uchar, 96, 32>().select<1, 1, 8, 2>(16 * i + 10, 17);
70         Result_Y.select<1, 1, 8, 1>(4 * i + 2, 0) = DataBuffer.format<uchar, 96, 32>().select<1, 1, 8, 2>(16 * i + 3, 1);
71         Result_Y.select<1, 1, 8, 1>(4 * i + 2, 8) = DataBuffer.format<uchar, 96, 32>().select<1, 1, 8, 2>(16 * i + 11, 1);
72         Result_Y.select<1, 1, 8, 1>(4 * i + 3, 0) = DataBuffer.format<uchar, 96, 32>().select<1, 1, 8, 2>(16 * i + 3, 17);
73         Result_Y.select<1, 1, 8, 1>(4 * i + 3, 8) = DataBuffer.format<uchar, 96, 32>().select<1, 1, 8, 2>(16 * i + 11, 17);
74     }
75 
76     write(Dst_Surface_Y, DstX, DstY, Result_Y);
77 
78 #pragma unroll
79     for (uchar i = 0; i < 4; i++)
80     {
81         // Write UV plane
82         switch (RotationChromaSitingFlag & 0x07000000)
83         {
84         case CHROMA_SUBSAMPLING_TOP_CENTER:
85             Result_Temp_V.select<1, 1, 16, 1>(0, 0).format<ushort, 2, 8>().select<1, 1, 4, 1>(0, 0) = cm_avg<ushort>(DataBuffer.format<ushort, 96, 16>().select<1, 1, 4, 2>(16 * i, 0), DataBuffer.format<ushort, 96, 16>().select<1, 1, 4, 2>(16 * i, 1));
86             Result_Temp_V.select<1, 1, 16, 1>(0, 0).format<ushort, 2, 8>().select<1, 1, 4, 1>(0, 4) = cm_avg<ushort>(DataBuffer.format<ushort, 96, 16>().select<1, 1, 4, 2>(16 * i + 8, 0), DataBuffer.format<ushort, 96, 16>().select<1, 1, 4, 2>(16 * i + 8, 1));
87             Result_Temp_V.select<1, 1, 16, 1>(1, 0).format<ushort, 2, 8>().select<1, 1, 4, 1>(0, 0) = cm_avg<ushort>(DataBuffer.format<ushort, 96, 16>().select<1, 1, 4, 2>(16 * i + 1, 0), DataBuffer.format<ushort, 96, 16>().select<1, 1, 4, 2>(16 * i + 1, 1));
88             Result_Temp_V.select<1, 1, 16, 1>(1, 0).format<ushort, 2, 8>().select<1, 1, 4, 1>(0, 4) = cm_avg<ushort>(DataBuffer.format<ushort, 96, 16>().select<1, 1, 4, 2>(16 * i + 9, 0), DataBuffer.format<ushort, 96, 16>().select<1, 1, 4, 2>(16 * i + 9, 1));
89 
90             Result_Temp_U.select<1, 1, 16, 1>(0, 0).format<ushort, 2, 8>().select<1, 1, 4, 1>(0, 0) = cm_avg<ushort>(DataBuffer.format<ushort, 96, 16>().select<1, 1, 4, 2>(16 * i + 4, 0), DataBuffer.format<ushort, 96, 16>().select<1, 1, 4, 2>(16 * i + 4, 1));
91             Result_Temp_U.select<1, 1, 16, 1>(0, 0).format<ushort, 2, 8>().select<1, 1, 4, 1>(0, 4) = cm_avg<ushort>(DataBuffer.format<ushort, 96, 16>().select<1, 1, 4, 2>(16 * i + 12, 0), DataBuffer.format<ushort, 96, 16>().select<1, 1, 4, 2>(16 * i + 12, 1));
92             Result_Temp_U.select<1, 1, 16, 1>(1, 0).format<ushort, 2, 8>().select<1, 1, 4, 1>(0, 0) = cm_avg<ushort>(DataBuffer.format<ushort, 96, 16>().select<1, 1, 4, 2>(16 * i + 5, 0), DataBuffer.format<ushort, 96, 16>().select<1, 1, 4, 2>(16 * i + 5, 1));
93             Result_Temp_U.select<1, 1, 16, 1>(1, 0).format<ushort, 2, 8>().select<1, 1, 4, 1>(0, 4) = cm_avg<ushort>(DataBuffer.format<ushort, 96, 16>().select<1, 1, 4, 2>(16 * i + 13, 0), DataBuffer.format<ushort, 96, 16>().select<1, 1, 4, 2>(16 * i + 13, 1));
94 
95             Result_Temp_U = cm_add<ushort>(Result_Temp_U, 0x80, SAT);
96             Result_Temp_V = cm_add<ushort>(Result_Temp_V, 0x80, SAT);
97 
98             Result_UV.select<2, 1, 8, 2>(2 * i, 0) = Result_Temp_U.format<uchar, 4, 16>().select<2, 2, 8, 2>(0, 1);
99             Result_UV.select<2, 1, 8, 2>(2 * i, 1) = Result_Temp_V.format<uchar, 4, 16>().select<2, 2, 8, 2>(0, 1);
100             break;
101         case CHROMA_SUBSAMPLING_CENTER_CENTER:
102             Result_Temp_V.select<1, 1, 16, 1>(0, 0).format<ushort, 2, 8>().select<1, 1, 4, 1>(0, 0) = cm_avg<ushort>(DataBuffer.format<ushort, 96, 16>().select<1, 1, 4, 2>(16 * i, 0), DataBuffer.format<ushort, 96, 16>().select<1, 1, 4, 2>(16 * i, 1));
103             Result_Temp_V.select<1, 1, 16, 1>(0, 0).format<ushort, 2, 8>().select<1, 1, 4, 1>(0, 4) = cm_avg<ushort>(DataBuffer.format<ushort, 96, 16>().select<1, 1, 4, 2>(16 * i + 8, 0), DataBuffer.format<ushort, 96, 16>().select<1, 1, 4, 2>(16 * i + 8, 1));
104             Result_Temp_V.select<1, 1, 16, 1>(0, 0).format<ushort, 2, 8>().select<1, 1, 4, 1>(1, 0) = cm_avg<ushort>(DataBuffer.format<ushort, 96, 16>().select<1, 1, 4, 2>(16 * i, 8), DataBuffer.format<ushort, 96, 16>().select<1, 1, 4, 2>(16 * i, 9));
105             Result_Temp_V.select<1, 1, 16, 1>(0, 0).format<ushort, 2, 8>().select<1, 1, 4, 1>(1, 4) = cm_avg<ushort>(DataBuffer.format<ushort, 96, 16>().select<1, 1, 4, 2>(16 * i + 8, 8), DataBuffer.format<ushort, 96, 16>().select<1, 1, 4, 2>(16 * i + 8, 9));
106             Result_Temp_V.select<1, 1, 16, 1>(1, 0).format<ushort, 2, 8>().select<1, 1, 4, 1>(0, 0) = cm_avg<ushort>(DataBuffer.format<ushort, 96, 16>().select<1, 1, 4, 2>(16 * i + 1, 0), DataBuffer.format<ushort, 96, 16>().select<1, 1, 4, 2>(16 * i + 1, 1));
107             Result_Temp_V.select<1, 1, 16, 1>(1, 0).format<ushort, 2, 8>().select<1, 1, 4, 1>(0, 4) = cm_avg<ushort>(DataBuffer.format<ushort, 96, 16>().select<1, 1, 4, 2>(16 * i + 9, 0), DataBuffer.format<ushort, 96, 16>().select<1, 1, 4, 2>(16 * i + 9, 1));
108             Result_Temp_V.select<1, 1, 16, 1>(1, 0).format<ushort, 2, 8>().select<1, 1, 4, 1>(1, 0) = cm_avg<ushort>(DataBuffer.format<ushort, 96, 16>().select<1, 1, 4, 2>(16 * i + 1, 8), DataBuffer.format<ushort, 96, 16>().select<1, 1, 4, 2>(16 * i + 1, 9));
109             Result_Temp_V.select<1, 1, 16, 1>(1, 0).format<ushort, 2, 8>().select<1, 1, 4, 1>(1, 4) = cm_avg<ushort>(DataBuffer.format<ushort, 96, 16>().select<1, 1, 4, 2>(16 * i + 9, 8), DataBuffer.format<ushort, 96, 16>().select<1, 1, 4, 2>(16 * i + 9, 9));
110 
111             Result_Temp_U.select<1, 1, 16, 1>(0, 0).format<ushort, 2, 8>().select<1, 1, 4, 1>(0, 0) = cm_avg<ushort>(DataBuffer.format<ushort, 96, 16>().select<1, 1, 4, 2>(16 * i + 4, 0), DataBuffer.format<ushort, 96, 16>().select<1, 1, 4, 2>(16 * i + 4, 1));
112             Result_Temp_U.select<1, 1, 16, 1>(0, 0).format<ushort, 2, 8>().select<1, 1, 4, 1>(0, 4) = cm_avg<ushort>(DataBuffer.format<ushort, 96, 16>().select<1, 1, 4, 2>(16 * i + 12, 0), DataBuffer.format<ushort, 96, 16>().select<1, 1, 4, 2>(16 * i + 12, 1));
113             Result_Temp_U.select<1, 1, 16, 1>(0, 0).format<ushort, 2, 8>().select<1, 1, 4, 1>(1, 0) = cm_avg<ushort>(DataBuffer.format<ushort, 96, 16>().select<1, 1, 4, 2>(16 * i + 4, 8), DataBuffer.format<ushort, 96, 16>().select<1, 1, 4, 2>(16 * i + 4, 9));
114             Result_Temp_U.select<1, 1, 16, 1>(0, 0).format<ushort, 2, 8>().select<1, 1, 4, 1>(1, 4) = cm_avg<ushort>(DataBuffer.format<ushort, 96, 16>().select<1, 1, 4, 2>(16 * i + 12, 8), DataBuffer.format<ushort, 96, 16>().select<1, 1, 4, 2>(16 * i + 12, 9));
115             Result_Temp_U.select<1, 1, 16, 1>(1, 0).format<ushort, 2, 8>().select<1, 1, 4, 1>(0, 0) = cm_avg<ushort>(DataBuffer.format<ushort, 96, 16>().select<1, 1, 4, 2>(16 * i + 5, 0), DataBuffer.format<ushort, 96, 16>().select<1, 1, 4, 2>(16 * i + 5, 1));
116             Result_Temp_U.select<1, 1, 16, 1>(1, 0).format<ushort, 2, 8>().select<1, 1, 4, 1>(0, 4) = cm_avg<ushort>(DataBuffer.format<ushort, 96, 16>().select<1, 1, 4, 2>(16 * i + 13, 0), DataBuffer.format<ushort, 96, 16>().select<1, 1, 4, 2>(16 * i + 13, 1));
117             Result_Temp_U.select<1, 1, 16, 1>(1, 0).format<ushort, 2, 8>().select<1, 1, 4, 1>(1, 0) = cm_avg<ushort>(DataBuffer.format<ushort, 96, 16>().select<1, 1, 4, 2>(16 * i + 5, 8), DataBuffer.format<ushort, 96, 16>().select<1, 1, 4, 2>(16 * i + 5, 9));
118             Result_Temp_U.select<1, 1, 16, 1>(1, 0).format<ushort, 2, 8>().select<1, 1, 4, 1>(1, 4) = cm_avg<ushort>(DataBuffer.format<ushort, 96, 16>().select<1, 1, 4, 2>(16 * i + 13, 8), DataBuffer.format<ushort, 96, 16>().select<1, 1, 4, 2>(16 * i + 13, 9));
119 
120             Result_Temp_V.format<ushort, 4, 8>().select<2, 2, 8, 1>(0, 0) = cm_avg<ushort>(Result_Temp_V.format<ushort, 4, 8>().select<2, 2, 8, 1>(0, 0), Result_Temp_V.format<ushort, 4, 8>().select<2, 2, 8, 1>(1, 0));
121             Result_Temp_U.format<ushort, 4, 8>().select<2, 2, 8, 1>(0, 0) = cm_avg<ushort>(Result_Temp_U.format<ushort, 4, 8>().select<2, 2, 8, 1>(0, 0), Result_Temp_U.format<ushort, 4, 8>().select<2, 2, 8, 1>(1, 0));
122 
123             Result_Temp_U = cm_add<ushort>(Result_Temp_U, 0x80, SAT);
124             Result_Temp_V = cm_add<ushort>(Result_Temp_V, 0x80, SAT);
125 
126             Result_UV.select<2, 1, 8, 2>(2 * i, 0) = Result_Temp_U.format<uchar, 4, 16>().select<2, 2, 8, 2>(0, 1);
127             Result_UV.select<2, 1, 8, 2>(2 * i, 1) = Result_Temp_V.format<uchar, 4, 16>().select<2, 2, 8, 2>(0, 1);
128 
129             break;
130         case CHROMA_SUBSAMPLING_BOTTOM_CENTER:
131             Result_Temp_V.select<1, 1, 16, 1>(0, 0).format<ushort, 2, 8>().select<1, 1, 4, 1>(1, 0) = cm_avg<ushort>(DataBuffer.format<ushort, 96, 16>().select<1, 1, 4, 2>(16 * i, 8), DataBuffer.format<ushort, 96, 16>().select<1, 1, 4, 2>(16 * i, 9));
132             Result_Temp_V.select<1, 1, 16, 1>(0, 0).format<ushort, 2, 8>().select<1, 1, 4, 1>(1, 4) = cm_avg<ushort>(DataBuffer.format<ushort, 96, 16>().select<1, 1, 4, 2>(16 * i + 8, 8), DataBuffer.format<ushort, 96, 16>().select<1, 1, 4, 2>(16 * i + 8, 9));
133             Result_Temp_V.select<1, 1, 16, 1>(1, 0).format<ushort, 2, 8>().select<1, 1, 4, 1>(1, 0) = cm_avg<ushort>(DataBuffer.format<ushort, 96, 16>().select<1, 1, 4, 2>(16 * i + 1, 8), DataBuffer.format<ushort, 96, 16>().select<1, 1, 4, 2>(16 * i + 1, 9));
134             Result_Temp_V.select<1, 1, 16, 1>(1, 0).format<ushort, 2, 8>().select<1, 1, 4, 1>(1, 4) = cm_avg<ushort>(DataBuffer.format<ushort, 96, 16>().select<1, 1, 4, 2>(16 * i + 9, 8), DataBuffer.format<ushort, 96, 16>().select<1, 1, 4, 2>(16 * i + 9, 9));
135 
136             Result_Temp_U.select<1, 1, 16, 1>(0, 0).format<ushort, 2, 8>().select<1, 1, 4, 1>(1, 0) = cm_avg<ushort>(DataBuffer.format<ushort, 96, 16>().select<1, 1, 4, 2>(16 * i + 4, 8), DataBuffer.format<ushort, 96, 16>().select<1, 1, 4, 2>(16 * i + 4, 9));
137             Result_Temp_U.select<1, 1, 16, 1>(0, 0).format<ushort, 2, 8>().select<1, 1, 4, 1>(1, 4) = cm_avg<ushort>(DataBuffer.format<ushort, 96, 16>().select<1, 1, 4, 2>(16 * i + 12, 8), DataBuffer.format<ushort, 96, 16>().select<1, 1, 4, 2>(16 * i + 12, 9));
138             Result_Temp_U.select<1, 1, 16, 1>(1, 0).format<ushort, 2, 8>().select<1, 1, 4, 1>(1, 0) = cm_avg<ushort>(DataBuffer.format<ushort, 96, 16>().select<1, 1, 4, 2>(16 * i + 5, 8), DataBuffer.format<ushort, 96, 16>().select<1, 1, 4, 2>(16 * i + 5, 9));
139             Result_Temp_U.select<1, 1, 16, 1>(1, 0).format<ushort, 2, 8>().select<1, 1, 4, 1>(1, 4) = cm_avg<ushort>(DataBuffer.format<ushort, 96, 16>().select<1, 1, 4, 2>(16 * i + 13, 8), DataBuffer.format<ushort, 96, 16>().select<1, 1, 4, 2>(16 * i + 13, 9));
140 
141             Result_Temp_U = cm_add<ushort>(Result_Temp_U, 0x80, SAT);
142             Result_Temp_V = cm_add<ushort>(Result_Temp_V, 0x80, SAT);
143 
144             Result_UV.select<2, 1, 8, 2>(2 * i, 0) = Result_Temp_U.format<uchar, 4, 16>().select<2, 2, 8, 2>(1, 1);
145             Result_UV.select<2, 1, 8, 2>(2 * i, 1) = Result_Temp_V.format<uchar, 4, 16>().select<2, 2, 8, 2>(1, 1);
146             break;
147         case CHROMA_SUBSAMPLING_TOP_LEFT:
148             DataBuffer.format<ushort, 96, 16>().select<2, 1, 16, 1>(16 * i, 0)      = cm_add<ushort>(DataBuffer.format<ushort, 96, 16>().select<2, 1, 16, 1>(16 * i, 0), 0x80, SAT);
149             DataBuffer.format<ushort, 96, 16>().select<2, 1, 16, 1>(16 * i + 8, 0)  = cm_add<ushort>(DataBuffer.format<ushort, 96, 16>().select<2, 1, 16, 1>(16 * i + 8, 0), 0x80, SAT);
150 
151             DataBuffer.format<ushort, 96, 16>().select<2, 1, 16, 1>(16 * i + 4, 0)  = cm_add<ushort>(DataBuffer.format<ushort, 96, 16>().select<2, 1, 16, 1>(16 * i + 4, 0), 0x80, SAT);
152             DataBuffer.format<ushort, 96, 16>().select<2, 1, 16, 1>(16 * i + 12, 0) = cm_add<ushort>(DataBuffer.format<ushort, 96, 16>().select<2, 1, 16, 1>(16 * i + 12, 0), 0x80, SAT);
153 
154             Result_UV.select<2, 1, 4, 2>(2 * i, 1) = DataBuffer.format<uchar, 96, 32>().select<2, 1, 4, 4>(16 * i, 1);
155             Result_UV.select<2, 1, 4, 2>(2 * i, 9) = DataBuffer.format<uchar, 96, 32>().select<2, 1, 4, 4>(16 * i + 8, 1);
156 
157             Result_UV.select<2, 1, 4, 2>(2 * i, 0) = DataBuffer.format<uchar, 96, 32>().select<2, 1, 4, 4>(16 * i + 4, 1);
158             Result_UV.select<2, 1, 4, 2>(2 * i, 8) = DataBuffer.format<uchar, 96, 32>().select<2, 1, 4, 4>(16 * i + 12, 1);
159 
160             break;
161         case CHROMA_SUBSAMPLING_CENTER_LEFT:
162             Result_Temp_V.select<1, 1, 8, 1>(0, 0) = cm_avg<ushort>(DataBuffer.format<ushort, 96, 16>().select<1, 1, 8, 1>(16 * i, 0), DataBuffer.format<ushort, 96, 16>().select<1, 1, 8, 1>(16 * i, 8));
163             Result_Temp_V.select<1, 1, 8, 1>(0, 8) = cm_avg<ushort>(DataBuffer.format<ushort, 96, 16>().select<1, 1, 8, 1>(16 * i + 8, 0), DataBuffer.format<ushort, 96, 16>().select<1, 1, 8, 1>(16 * i + 8, 8));
164             Result_Temp_V.select<1, 1, 8, 1>(1, 0) = cm_avg<ushort>(DataBuffer.format<ushort, 96, 16>().select<1, 1, 8, 1>(16 * i + 1, 0), DataBuffer.format<ushort, 96, 16>().select<1, 1, 8, 1>(16 * i + 1, 8));
165             Result_Temp_V.select<1, 1, 8, 1>(1, 8) = cm_avg<ushort>(DataBuffer.format<ushort, 96, 16>().select<1, 1, 8, 1>(16 * i + 9, 0), DataBuffer.format<ushort, 96, 16>().select<1, 1, 8, 1>(16 * i + 9, 8));
166 
167             Result_Temp_U.select<1, 1, 8, 1>(0, 0) = cm_avg<ushort>(DataBuffer.format<ushort, 96, 16>().select<1, 1, 8, 1>(16 * i + 4, 0), DataBuffer.format<ushort, 96, 16>().select<1, 1, 8, 1>(16 * i + 4, 8));
168             Result_Temp_U.select<1, 1, 8, 1>(0, 8) = cm_avg<ushort>(DataBuffer.format<ushort, 96, 16>().select<1, 1, 8, 1>(16 * i + 12, 0), DataBuffer.format<ushort, 96, 16>().select<1, 1, 8, 1>(16 * i + 12, 8));
169             Result_Temp_U.select<1, 1, 8, 1>(1, 0) = cm_avg<ushort>(DataBuffer.format<ushort, 96, 16>().select<1, 1, 8, 1>(16 * i + 5, 0), DataBuffer.format<ushort, 96, 16>().select<1, 1, 8, 1>(16 * i + 5, 8));
170             Result_Temp_U.select<1, 1, 8, 1>(1, 8) = cm_avg<ushort>(DataBuffer.format<ushort, 96, 16>().select<1, 1, 8, 1>(16 * i + 13, 0), DataBuffer.format<ushort, 96, 16>().select<1, 1, 8, 1>(16 * i + 13, 8));
171 
172             Result_Temp_U = cm_add<ushort>(Result_Temp_U, 0x80, SAT);
173             Result_Temp_V = cm_add<ushort>(Result_Temp_V, 0x80, SAT);
174 
175             Result_UV.select<2, 1, 8, 2>(2 * i, 0) = Result_Temp_U.format<uchar, 2, 32>().select<2, 1, 8, 4>(0, 1);
176             Result_UV.select<2, 1, 8, 2>(2 * i, 1) = Result_Temp_V.format<uchar, 2, 32>().select<2, 1, 8, 4>(0, 1);
177 
178             break;
179         case CHROMA_SUBSAMPLING_BOTTOM_LEFT:
180             DataBuffer.format<ushort, 96, 16>().select<2, 1, 16, 1>(16 * i, 0) = cm_add<ushort>(DataBuffer.format<ushort, 96, 16>().select<2, 1, 16, 1>(16 * i, 0), 0x80, SAT);
181             DataBuffer.format<ushort, 96, 16>().select<2, 1, 16, 1>(16 * i + 8, 0) = cm_add<ushort>(DataBuffer.format<ushort, 96, 16>().select<2, 1, 16, 1>(16 * i + 8, 0), 0x80, SAT);
182 
183             DataBuffer.format<ushort, 96, 16>().select<2, 1, 16, 1>(16 * i + 4, 0) = cm_add<ushort>(DataBuffer.format<ushort, 96, 16>().select<2, 1, 16, 1>(16 * i + 4, 0), 0x80, SAT);
184             DataBuffer.format<ushort, 96, 16>().select<2, 1, 16, 1>(16 * i + 12, 0) = cm_add<ushort>(DataBuffer.format<ushort, 96, 16>().select<2, 1, 16, 1>(16 * i + 12, 0), 0x80, SAT);
185 
186             Result_UV.select<2, 1, 4, 2>(2 * i, 1) = DataBuffer.format<uchar, 96, 32>().select<2, 1, 4, 4>(16 * i, 17);
187             Result_UV.select<2, 1, 4, 2>(2 * i, 9) = DataBuffer.format<uchar, 96, 32>().select<2, 1, 4, 4>(16 * i + 8, 17);
188 
189             Result_UV.select<2, 1, 4, 2>(2 * i, 0) = DataBuffer.format<uchar, 96, 32>().select<2, 1, 4, 4>(16 * i + 4, 17);
190             Result_UV.select<2, 1, 4, 2>(2 * i, 8) = DataBuffer.format<uchar, 96, 32>().select<2, 1, 4, 4>(16 * i + 12, 17);
191 
192             break;
193         default:
194             Result_Temp_V.select<1, 1, 8, 1>(0, 0) = cm_avg<ushort>(DataBuffer.format<ushort, 96, 16>().select<1, 1, 8, 2>(16 * i, 0), DataBuffer.format<ushort, 96, 16>().select<1, 1, 8, 2>(16 * i, 1));
195             Result_Temp_V.select<1, 1, 8, 1>(0, 8) = cm_avg<ushort>(DataBuffer.format<ushort, 96, 16>().select<1, 1, 8, 2>(16 * i + 8, 0), DataBuffer.format<ushort, 96, 16>().select<1, 1, 8, 2>(16 * i + 8, 1));
196             Result_Temp_V.select<1, 1, 8, 1>(1, 0) = cm_avg<ushort>(DataBuffer.format<ushort, 96, 16>().select<1, 1, 8, 2>(16 * i + 1, 0), DataBuffer.format<ushort, 96, 16>().select<1, 1, 8, 2>(16 * i + 1, 1));
197             Result_Temp_V.select<1, 1, 8, 1>(1, 8) = cm_avg<ushort>(DataBuffer.format<ushort, 96, 16>().select<1, 1, 8, 2>(16 * i + 9, 0), DataBuffer.format<ushort, 96, 16>().select<1, 1, 8, 2>(16 * i + 9, 1));
198 
199             Result_Temp_U.select<1, 1, 8, 1>(0, 0) = cm_avg<ushort>(DataBuffer.format<ushort, 96, 16>().select<1, 1, 8, 2>(16 * i + 4, 0), DataBuffer.format<ushort, 96, 16>().select<1, 1, 8, 2>(16 * i + 4, 1));
200             Result_Temp_U.select<1, 1, 8, 1>(0, 8) = cm_avg<ushort>(DataBuffer.format<ushort, 96, 16>().select<1, 1, 8, 2>(16 * i + 12, 0), DataBuffer.format<ushort, 96, 16>().select<1, 1, 8, 2>(16 * i + 12, 1));
201             Result_Temp_U.select<1, 1, 8, 1>(1, 0) = cm_avg<ushort>(DataBuffer.format<ushort, 96, 16>().select<1, 1, 8, 2>(16 * i + 5, 0), DataBuffer.format<ushort, 96, 16>().select<1, 1, 8, 2>(16 * i + 5, 1));
202             Result_Temp_U.select<1, 1, 8, 1>(1, 8) = cm_avg<ushort>(DataBuffer.format<ushort, 96, 16>().select<1, 1, 8, 2>(16 * i + 13, 0), DataBuffer.format<ushort, 96, 16>().select<1, 1, 8, 2>(16 * i + 13, 1));
203 
204             Result_Temp_U = cm_add<ushort>(Result_Temp_U, 0x80, SAT);
205             Result_Temp_V = cm_add<ushort>(Result_Temp_V, 0x80, SAT);
206 
207             Result_UV.select<2, 1, 8, 2>(2 * i, 0) = Result_Temp_U.format<uchar, 2, 32>().select<2, 1, 8, 4>(0, 1);
208             Result_UV.select<2, 1, 8, 2>(2 * i, 1) = Result_Temp_V.format<uchar, 2, 32>().select<2, 1, 8, 4>(0, 1);
209             break;
210         }
211     }
212 
213     write(Dst_Surface_UV, DstX, DstY >> 1, Result_UV);
214 }