1/*
2 * Copyright © <2010>, Intel Corporation.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
14 * of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 *
24 */
25// Modual name: IntraFrame_gen8.asm
26//
27// Make intra predition estimation for Intra frame on Gen8
28//
29
30//
31//  Now, begin source code....
32//
33
34/*
35 * __START
36 */
37__INTRA_START:
38mov  (16) tmp_reg0.0<1>:UD      0x0:UD {align1};
39mov  (16) tmp_reg2.0<1>:UD      0x0:UD {align1};
40mov  (16) tmp_reg4.0<1>:UD      0x0:UD {align1} ;
41mov  (16) tmp_reg6.0<1>:UD      0x0:UD {align1} ;
42
43shl  (2) read0_header.0<1>:D    orig_xy_ub<2,2,1>:UB 4:UW {align1};    /* (x, y) * 16 */
44add  (1) read0_header.0<1>:D    read0_header.0<0,1,0>:D -8:W {align1};     /* X offset */
45add  (1) read0_header.4<1>:D    read0_header.4<0,1,0>:D -1:W {align1};     /* Y offset */
46mov  (1) read0_header.8<1>:UD   BLOCK_32X1 {align1};
47mov  (1) read0_header.20<1>:UB  thread_id_ub {align1};                  /* dispatch id */
48
49shl  (2) read1_header.0<1>:D    orig_xy_ub<2,2,1>:UB 4:UW {align1};    /* (x, y) * 16 */
50add  (1) read1_header.0<1>:D    read1_header.0<0,1,0>:D -4:W {align1};     /* X offset */
51mov  (1) read1_header.8<1>:UD   BLOCK_4X16 {align1};
52mov  (1) read1_header.20<1>:UB  thread_id_ub {align1};                  /* dispatch id */
53
54shl  (2) vme_m0.8<1>:UW         orig_xy_ub<2,2,1>:UB 4:UW {align1};    /* (x, y) * 16 */
55mov  (1) vme_m0.20<1>:UB        thread_id_ub {align1};                  /* dispatch id */
56
57mul  (1) obw_m0.8<1>:UD         w_in_mb_uw<0,1,0>:UW orig_y_ub<0,1,0>:UB {align1};
58add  (1) obw_m0.8<1>:UD         obw_m0.8<0,1,0>:UD orig_x_ub<0,1,0>:UB {align1};
59mul  (1) obw_m0.8<1>:UD         obw_m0.8<0,1,0>:UD 0x02:UD {align1};
60mov  (1) obw_m0.20<1>:UB        thread_id_ub {align1};                  /* dispatch id */
61
62/*
63 * Media Read Message -- fetch Luma neighbor edge pixels
64 */
65/* ROW */
66mov  (8) msg_reg0.0<1>:UD       read0_header.0<8,8,1>:UD {align1};
67send (8) msg_ind INEP_ROW<1>:UB null read(BIND_IDX_INEP, 0, 0, 4) mlen 1 rlen 1 {align1};
68
69/* COL */
70mov  (8) msg_reg0.0<1>:UD       read1_header.0<8,8,1>:UD {align1};
71send (8) msg_ind INEP_COL0<1>:UB null read(BIND_IDX_INEP, 0, 0, 4) mlen 1 rlen 2 {align1};
72
73/*
74 * Media Read Message -- fetch Chroma neighbor edge pixels
75 */
76/* ROW */
77shl  (2) read0_header.0<1>:D    orig_xy_ub<2,2,1>:UB 3:UW {align1};    /* x * 16 , y * 8 */
78mul  (1) read0_header.0<1>:D    read0_header.0<0,1,0>:D  2:W {align1};
79add  (1) read0_header.0<1>:D    read0_header.0<0,1,0>:D -8:W {align1};     /* X offset */
80add  (1) read0_header.4<1>:D    read0_header.4<0,1,0>:D -1:W {align1};     /* Y offset */
81mov  (8) msg_reg0.0<1>:UD       read0_header.0<8,8,1>:UD {align1};
82send (8) msg_ind CHROMA_ROW<1>:UB null read(BIND_IDX_CBCR, 0, 0, 4) mlen 1 rlen 1 {align1};
83
84/* COL */
85shl  (2) read1_header.0<1>:D    orig_xy_ub<2,2,1>:UB 3:UW {align1};    /* x * 16, y * 8 */
86mul  (1) read1_header.0<1>:D    read1_header.0<0,1,0>:D  2:W {align1};
87add  (1) read1_header.0<1>:D    read1_header.0<0,1,0>:D -4:W {align1};     /* X offset */
88mov  (1) read1_header.8<1>:UD   BLOCK_8X4 {align1};
89mov  (8) msg_reg0.0<1>:UD       read1_header.0<8,8,1>:UD {align1};
90send (8) msg_ind CHROMA_COL<1>:UB null read(BIND_IDX_CBCR, 0, 0, 4) mlen 1 rlen 1 {align1};
91
92/* m2, get the MV/Mb cost passed by constant buffer
93when creating EU thread by MEDIA_OBJECT */
94mov (8) vme_msg_2<1>:UD         r1.0<8,8,1>:UD {align1};
95
96/* m3. This is changed for FWD/BWD cost center */
97mov (8) vme_msg_3<1>:UD		0x0:UD {align1};
98
99/* m4.*/
100mov (8) vme_msg_4<1>:ud		0x0:ud	{align1};
101
102/* m5 */
103mov  (1) INEP_ROW.0<1>:UD       0x0:UD {align1};
104and  (1) INEP_ROW.4<1>:UD       INEP_ROW.4<0,1,0>:UD            0xFF000000:UD {align1};
105mov  (8) vme_msg_5<1>:UD         INEP_ROW.0<8,8,1>:UD {align1};
106
107mov  (1) tmp_reg0.0<1>:UW	LUMA_CHROMA_MODE:UW {align1};
108/* Use the Luma mode */
109mov  (1) vme_msg_5.5<1>:UB	tmp_reg0.0<0,1,0>:UB {align1};
110
111/* m6 */
112mov  (8) vme_msg_6<1>:UD         0x0:UD {align1};
113mov (16) vme_msg_6.0<1>:UB       INEP_COL0.3<32,8,4>:UB {align1};
114mov  (1) vme_msg_6.16<1>:UD      INTRA_PREDICTORE_MODE {align1};
115
116/* the penalty for Intra mode */
117mov  (1) vme_msg_6.28<1>:UD	0x010101:UD {align1};
118mov  (1) vme_msg_6.20<1>:UW      CHROMA_ROW.6<0,1,0>:UW {align1};
119
120
121/* m7 */
122
123mov  (4) vme_msg_7.16<1>:UD      CHROMA_ROW.8<4,4,1>:UD {align1};
124mov  (8) vme_msg_7.0<1>:UW       CHROMA_COL.2<16,8,2>:UW {align1};
125
126/*
127 * VME message
128 */
129
130/* m1 */
131mov  (1) intra_flag<1>:UW       0x0:UW {align1}                     ;
132and.z.f0.0 (1) null<1>:UW transform_8x8_ub<0,1,0>:UB 1:UW {align1};
133(f0.0) mov  (1) intra_part_mask_ub<1>:UB  LUMA_INTRA_8x8_DISABLE {align1};
134
135/* assign MB intra struct from the thread payload*/
136mov (1) mb_intra_struct_ub<1>:UB input_mb_intra_ub<0,1,0>:UB {align1};
137
138/* Disable DC HAAR component when calculating HARR SATD block */
139mov  (1) tmp_reg0.0<1>:UW	DC_HARR_DISABLE:UW		{align1};
140mov  (1) vme_m1.30<1>:UB	tmp_reg0.0<0,1,0>:UB  {align1};
141
142mov  (8) vme_msg_1<1>:UD        vme_m1.0<8,8,1>:UD {align1};
143/* m0 */
144/* 16x16 Source, Intra_harr */
145add  (1) vme_m0.12<1>:UD        vme_m0.12<0,1,0>:ud	INTRA_SAD_HAAR:UD {align1};
146mov  (8) vme_msg_0<1>:UD        vme_m0.0<8,8,1>:UD {align1};
147
148/* after verification it will be passed by using payload */
149send (8)
150        vme_msg_ind
151        vme_wb<1>:UD
152        null
153        cre(
154                BIND_IDX_VME,
155                VME_SIC_MESSAGE_TYPE
156        )
157        mlen sic_vme_msg_length
158        rlen vme_wb_length
159        {align1};
160
161/* Check whether mb type is 0 */
162and.z.f0.0 (1) null<1>:UD vme_wb.0<0,1,0>:UD W0_INTRA_MB_TYPE_MASK {align1};
163(-f0.0) jmpi (1) __write_intra_output;
164
165/* Check whether intra mb mode is INTRA_8x8 */
166and (1) tmp_reg2<1>:UD vme_wb.0<0,1,0>:UD W0_INTRA_MB_MODE_MASK {align1};
167cmp.z.f0.0 (1) null<1>:UD tmp_reg2<0,1,0>:UD W0_INTRA_8x8 {align1};
168
169/* Set transform 8x8 flag */
170(f0.0) or (1) vme_wb.0<1>:UD vme_wb.0<0,1,0>:UD W0_TRANSFORM_8x8_FLAG {align1};
171
172__write_intra_output:
173/*
174 * Oword Block Write message
175 */
176mov  (8) msg_reg0.0<1>:UD       obw_m0<8,8,1>:UD {align1};
177
178mov  (1) msg_reg1.0<1>:UD       vme_wb.0<0,1,0>:UD      {align1};
179mov  (1) msg_reg1.4<1>:UD       vme_wb.16<0,1,0>:UD     {align1};
180mov  (1) msg_reg1.8<1>:UD       vme_wb.20<0,1,0>:UD     {align1};
181mov  (1) msg_reg1.12<1>:UD      vme_wb.24<0,1,0>:UD     {align1};
182
183/* Distortion, Intra (17-16), */
184mov  (1) msg_reg1.16<1>:UW      vme_wb.12<0,1,0>:UW     {align1};
185
186mov  (1) msg_reg1.20<1>:UD      vme_wb.8<0,1,0>:UD     {align1};
187/* VME clock counts */
188mov  (1) msg_reg1.24<1>:UD      vme_wb.28<0,1,0>:UD     {align1};
189
190mov  (1) msg_reg1.28<1>:UD      obw_m0.8<0,1,0>:UD     {align1};
191
192/* bind index 3, write 2 oword (32bytes), msg type: 8(OWord Block Write) */
193send (16)
194        msg_ind
195        obw_wb
196        null
197        data_port(
198                OBW_CACHE_TYPE,
199                OBW_MESSAGE_TYPE,
200                OBW_CONTROL_2,
201                OBW_BIND_IDX,
202                OBW_WRITE_COMMIT_CATEGORY,
203                OBW_HEADER_PRESENT
204        )
205        mlen 2
206        rlen obw_wb_length
207        {align1};
208
209__EXIT:
210/*
211 * kill thread
212 */
213mov  (8) ts_msg_reg0<1>:UD         r0<8,8,1>:UD {align1};
214send (16) ts_msg_ind acc0<1>UW null thread_spawner(0, 0, 1) mlen 1 rlen 0 {align1 EOT};
215