1/*
2 * Copyright © <2010>, Intel Corporation.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
14 * of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 * Authors: Zhao Yakui <yakui.zhao@intel.com>
24 */
25// Modual name: Inter_bframe_haswell.asm
26//
27// Make inter predition estimation for Inter frame for B-frame
28//
29
30//
31//  Now, begin source code....
32//
33
34#define SAVE_RET	add (1) RETURN_REG<1>:ud   ip:ud	32:ud
35#define	RETURN		mov (1)	ip:ud	RETURN_REG<0,1,0>:ud
36
37/*
38 * __START
39 */
40__INTER_START:
41mov  (16) tmp_reg0.0<1>:UD      0x0:UD {align1};
42mov  (16) tmp_reg2.0<1>:UD      0x0:UD {align1};
43mov  (16) tmp_reg4.0<1>:UD      0x0:UD {align1} ;
44mov  (16) tmp_reg6.0<1>:UD      0x0:UD {align1} ;
45
46shl  (2) read0_header.0<1>:D    orig_xy_ub<2,2,1>:UB 4:UW {align1};    /* (x, y) * 16 */
47add  (1) read0_header.0<1>:D    read0_header.0<0,1,0>:D -8:W {align1};     /* X offset */
48add  (1) read0_header.4<1>:D    read0_header.4<0,1,0>:D -1:W {align1};     /* Y offset */
49mov  (1) read0_header.8<1>:UD   BLOCK_32X1 {align1};
50mov  (1) read0_header.20<1>:UB  thread_id_ub {align1};                  /* dispatch id */
51
52shl  (2) read1_header.0<1>:D    orig_xy_ub<2,2,1>:UB 4:UW {align1};    /* (x, y) * 16 */
53add  (1) read1_header.0<1>:D    read1_header.0<0,1,0>:D -4:W {align1};     /* X offset */
54mov  (1) read1_header.8<1>:UD   BLOCK_4X16 {align1};
55mov  (1) read1_header.20<1>:UB  thread_id_ub {align1};                  /* dispatch id */
56
57shl  (2) vme_m0.8<1>:UW         orig_xy_ub<2,2,1>:UB 4:UW {align1};    /* (x, y) * 16 */
58mov  (1) vme_m0.20<1>:UB        thread_id_ub {align1};                  /* dispatch id */
59
60mul  (1) obw_m0.8<1>:UD         w_in_mb_uw<0,1,0>:UW orig_y_ub<0,1,0>:UB {align1};
61add  (1) obw_m0.8<1>:UD         obw_m0.8<0,1,0>:UD orig_x_ub<0,1,0>:UB {align1};
62mul  (1) obw_m0.8<1>:UD         obw_m0.8<0,1,0>:UD 24:UD {align1};
63mov  (1) obw_m0.20<1>:UB        thread_id_ub {align1};                  /* dispatch id */
64
65/*
66 * Media Read Message -- fetch Luma neighbor edge pixels
67 */
68/* ROW */
69mov  (8) msg_reg0.0<1>:UD       read0_header.0<8,8,1>:UD {align1};
70send (8) msg_ind INEP_ROW<1>:UB null read(BIND_IDX_INEP, 0, 0, 4) mlen 1 rlen 1 {align1};
71
72/* COL */
73mov  (8) msg_reg0.0<1>:UD       read1_header.0<8,8,1>:UD {align1};
74send (8) msg_ind INEP_COL0<1>:UB null read(BIND_IDX_INEP, 0, 0, 4) mlen 1 rlen 2 {align1};
75
76/*
77 * Media Read Message -- fetch Chroma neighbor edge pixels
78 */
79/* ROW */
80shl  (2) read0_header.0<1>:D    orig_xy_ub<2,2,1>:UB 3:UW {align1};    /* x * 16 , y * 8 */
81mul  (1) read0_header.0<1>:D    read0_header.0<0,1,0>:D  2:W {align1};
82add  (1) read0_header.0<1>:D    read0_header.0<0,1,0>:D -8:W {align1};     /* X offset */
83add  (1) read0_header.4<1>:D    read0_header.4<0,1,0>:D -1:W {align1};     /* Y offset */
84mov  (8) msg_reg0.0<1>:UD       read0_header.0<8,8,1>:UD {align1};
85send (8) msg_ind CHROMA_ROW<1>:UB null read(BIND_IDX_CBCR, 0, 0, 4) mlen 1 rlen 1 {align1};
86
87/* COL */
88shl  (2) read1_header.0<1>:D    orig_xy_ub<2,2,1>:UB 3:UW {align1};    /* x * 16, y * 8 */
89mul  (1) read1_header.0<1>:D    read1_header.0<0,1,0>:D  2:W {align1};
90add  (1) read1_header.0<1>:D    read1_header.0<0,1,0>:D -4:W {align1};     /* X offset */
91mov  (1) read1_header.8<1>:UD   BLOCK_8X4 {align1};
92mov  (8) msg_reg0.0<1>:UD       read1_header.0<8,8,1>:UD {align1};
93send (8) msg_ind CHROMA_COL<1>:UB null read(BIND_IDX_CBCR, 0, 0, 4) mlen 1 rlen 1 {align1};
94
95mov  (8) vme_m1.0<1>:ud		0:ud		{align1};
96mov  (8) mb_mvp_ref.0<1>:ud	0:ud		{align1};
97mov  (8) mb_ref_win.0<1>:ud	0:ud		{align1};
98and.z.f0.0 (1)		null:uw	mb_hwdep<0,1,0>:uw		0x04:uw   {align1};
99(f0.0) jmpi (1) __mb_hwdep_end;
100
101/* read back the data for MB A */
102/* the layout of MB result is: rx.0(Available). rx.4(MVa), rX.8(MVb), rX.16(Pred_L0 flag),
103*  rX.18 (Pred_L1 flag), rX.20(Forward reference ID), rX.22(Backwared reference ID)
104*/
105mov  (8) mba_result.0<1>:ud	0x0:ud		{align1};
106mov  (8) mbb_result.0<1>:ud	0x0:ud		{align1};
107mov  (8) mbc_result.0<1>:ud	0x0:ud		{align1};
108mba_start:
109mov  (8) mb_msg0.0<1>:ud	0:ud		{align1};
110and.z.f0.0 (2)		null:uw	input_mb_intra_ub<0,1,0>:ub	INTRA_PRED_AVAIL_FLAG_AE:uw   {align1};
111/* MB A doesn't exist. Zero MV. mba_flag is zero and ref ID = -1 */
112(f0.0)  mov  (2)    	mba_result.20<1>:w	-1:w	{align1};
113(f0.0)  jmpi (1)	mbb_start;
114mov  (1) mba_result.0<1>:d	MB_AVAIL		{align1};
115mov  (2) tmp_reg0.0<1>:UW	orig_xy_ub<2,2,1>:UB	{align1};
116add  (1) tmp_reg0.0<1>:w	tmp_reg0.0<0,1,0>:w	-1:w	{align1};
117mul  (1) mb_msg0.8<1>:UD       w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1};
118add  (1) mb_msg0.8<1>:UD       mb_msg0.8<0,1,0>:UD   tmp_reg0.0<0,1,0>:uw {align1};
119mul  (1) mb_msg0.8<1>:UD       mb_msg0.8<0,1,0>:UD 24:UD {align1};
120mov  (1) mb_msg0.20<1>:UB        thread_id_ub {align1};                  /* dispatch id */
121
122/* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */
123send (16)
124        mb_ind
125        mb_wb.0<1>:ud
126	NULL
127        data_port(
128                OBR_CACHE_TYPE,
129                OBR_MESSAGE_TYPE,
130                OBR_CONTROL_4,
131                OBR_BIND_IDX,
132                OBR_WRITE_COMMIT_CATEGORY,
133                OBR_HEADER_PRESENT
134        )
135        mlen 1
136        rlen 2
137        {align1};
138
139/* TODO: RefID is required after multi-references are added */
140cmp.l.f0.0 (2)		null:w	mb_intra_wb.16<0,1,0>:uw	mb_inter_wb.8<0,1,0>:uw {align1};
141(f0.0)   mov (2)	mba_result.20<1>:w			-1:w	{align1};
142(f0.0)   jmpi	(1)	mbb_start;
143
144add   (1) mb_msg0.8<1>:UD	mb_msg0.8<0,1,0>:ud	3:ud {align1};
145/* Read MV for MB A */
146/* bind index 3, read 8 oword (128bytes), msg type: 0(OWord Block Read) */
147send (16)
148        mb_ind
149        mb_mv0.0<1>:ud
150	NULL
151        data_port(
152                OBR_CACHE_TYPE,
153                OBR_MESSAGE_TYPE,
154                OBR_CONTROL_8,
155                OBR_BIND_IDX,
156                OBR_WRITE_COMMIT_CATEGORY,
157                OBR_HEADER_PRESENT
158        )
159        mlen 1
160        rlen 4
161        {align1};
162/* TODO: RefID is required after multi-references are added */
163/* MV */
164mov  	(2)	mba_result.20<1>:w		-1:w		{align1};
165mov	(1)	INPUT_ARG0.0<1>:ud	mb_inter_wb.4<0,1,0>:ud	{align1};
166mov	(1)	INPUT_ARG0.4<1>:ud	mb_inter_wb.0<0,1,0>:ud	{align1};
167mov	(1)	INPUT_ARG0.8<1>:ud	INTER_BLOCK1:ud	{align1};
168SAVE_RET	{align1};
169jmpi	(1)	mb_pred_func;
170mov 	(1)	mb_pred_mode.0<1>:uw	RET_ARG<0,1,0>:uw	{align1};
171cmp.e.f0.0 (1)	null:uw		mb_pred_mode.0<0,1,0>:uw	PRED_L0 {align1};
172(f0.0)	mov   (1)	mba_result.16<1>:uw		MB_PRED_FLAG		{align1};
173(f0.0)	mov   (1)	mba_result.20<1>:w		0:w		{align1};
174(f0.0)  mov   (1)	mba_result.4<1>:ud		mb_mv1.8<0,1,0>:ud	{align1};
175(f0.0)  jmpi	(1) mbb_start;
176cmp.e.f0.0 (1)	null:uw		mb_pred_mode.0<0,1,0>:uw	PRED_L1 {align1};
177(f0.0)	mov   (1)	mba_result.18<1>:uw		MB_PRED_FLAG		{align1};
178(f0.0)	mov   (1)	mba_result.22<1>:w		0:w		{align1};
179(f0.0)  mov   (1)	mba_result.8<1>:ud		mb_mv1.12<0,1,0>:ud	{align1};
180(f0.0)  jmpi	(1) mbb_start;
181mov   (2)	mba_result.4<1>:ud		mb_mv1.8<2,2,1>:ud	{align1};
182mov   (2)	mba_result.16<1>:uw		MB_PRED_FLAG		{align1};
183mov   (2)	mba_result.20<1>:w		0:w		{align1};
184
185mbb_start:
186mov  (8) mb_msg0.0<1>:ud	0:ud		{align1};
187and.z.f0.0 (2)		null:uw	input_mb_intra_ub<0,1,0>:ub	INTRA_PRED_AVAIL_FLAG_B:uw   {align1};
188/* MB B doesn't exist. Zero MV. mba_flag is zero */
189/* If MB B doesn't exist, neither MB C nor D exists */
190(f0.0)  mov  (2)    	mbb_result.20<1>:w	-1:w		{align1};
191(f0.0)  mov  (2)    	mbc_result.20<1>:w	-1:w		{align1};
192(f0.0)  jmpi (1)	mb_mvp_start;
193mov  (1) mbb_result.0<1>:d	MB_AVAIL		{align1};
194mov  (2) tmp_reg0.0<1>:UW	orig_xy_ub<2,2,1>:UB	{align1};
195add  (1) tmp_reg0.2<1>:w	tmp_reg0.2<0,1,0>:w	-1:w	{align1};
196mul  (1) mb_msg0.8<1>:UD       w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1};
197add  (1) mb_msg0.8<1>:UD       mb_msg0.8<0,1,0>:UD   tmp_reg0.0<0,1,0>:uw {align1};
198mul  (1) mb_msg0.8<1>:UD       mb_msg0.8<0,1,0>:UD 24:UD {align1};
199mov  (1) mb_msg0.20<1>:UB        thread_id_ub {align1};                  /* dispatch id */
200
201/* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */
202send (16)
203        mb_ind
204        mb_wb.0<1>:ud
205	NULL
206        data_port(
207                OBR_CACHE_TYPE,
208                OBR_MESSAGE_TYPE,
209                OBR_CONTROL_4,
210                OBR_BIND_IDX,
211                OBR_WRITE_COMMIT_CATEGORY,
212                OBR_HEADER_PRESENT
213        )
214        mlen 1
215        rlen 2
216        {align1};
217
218/* TODO: RefID is required after multi-references are added */
219cmp.l.f0.0 (2)		null:w	mb_intra_wb.16<0,1,0>:uw	mb_inter_wb.8<0,1,0>:uw {align1};
220(f0.0)   mov (2)	mbb_result.20<1>:w			-1:w	{align1};
221(f0.0)   jmpi	(1)	mbc_start;
222add   (1) mb_msg0.8<1>:UD	mb_msg0.8<0,1,0>:ud	3:ud {align1};
223/* Read MV for MB B */
224/* bind index 3, read 8 oword (128bytes), msg type: 0(OWord Block Read) */
225send (16)
226        mb_ind
227        mb_mv0.0<1>:ud
228	NULL
229        data_port(
230                OBR_CACHE_TYPE,
231                OBR_MESSAGE_TYPE,
232                OBR_CONTROL_8,
233                OBR_BIND_IDX,
234                OBR_WRITE_COMMIT_CATEGORY,
235                OBR_HEADER_PRESENT
236        )
237        mlen 1
238        rlen 4
239        {align1};
240/* TODO: RefID is required after multi-references are added */
241mov	   (2)		mbb_result.20<1>:w		-1:w	{align1};
242mov	(1)	INPUT_ARG0.0<1>:ud	mb_inter_wb.4<0,1,0>:ud	{align1};
243mov	(1)	INPUT_ARG0.4<1>:ud	mb_inter_wb.0<0,1,0>:ud	{align1};
244mov	(1)	INPUT_ARG0.8<1>:ud	INTER_BLOCK2:ud	{align1};
245SAVE_RET	{align1};
246jmpi	(1)	mb_pred_func;
247mov 	(1)	mb_pred_mode.0<1>:uw	RET_ARG<0,1,0>:uw	{align1};
248cmp.e.f0.0 (1)	null:uw		mb_pred_mode.0<0,1,0>:uw	PRED_L0 {align1};
249(f0.0)	mov   (1)	mbb_result.16<1>:uw		MB_PRED_FLAG		{align1};
250(f0.0)	mov   (1)	mbb_result.20<1>:w		0:w		{align1};
251(f0.0)  mov   (1)	mbb_result.4<1>:ud		mb_mv2.16<0,1,0>:ud	{align1};
252(f0.0)  jmpi	(1) mbc_start;
253cmp.e.f0.0 (1)	null:uw		mb_pred_mode.0<0,1,0>:uw	PRED_L1 {align1};
254(f0.0)	mov   (1)	mbb_result.18<1>:uw		MB_PRED_FLAG		{align1};
255(f0.0)	mov   (1)	mbb_result.22<1>:w		0:w		{align1};
256(f0.0)  mov   (1)	mbb_result.8<1>:ud		mb_mv2.20<0,1,0>:ud	{align1};
257(f0.0)  jmpi	(1) mbc_start;
258mov   (2)	mbb_result.16<1>:uw		MB_PRED_FLAG		{align1};
259mov   (2)	mbb_result.20<1>:w		0:w		{align1};
260mov	   (2)		mbb_result.4<1>:ud		mb_mv2.16<2,2,1>:ud	{align1};
261
262mbc_start:
263mov  (8) mb_msg0.0<1>:ud	0:ud		{align1};
264and.z.f0.0 (1)		null:uw	input_mb_intra_ub<0,1,0>:ub	INTRA_PRED_AVAIL_FLAG_C:uw   {align1};
265/* MB C doesn't exist. Zero MV. mba_flag is zero */
266/* Based on h264 spec the MB D will be replaced if MB C doesn't exist */
267(f0.0)  jmpi (1)	mbd_start;
268mov  (1) mbc_result.0<1>:d	MB_AVAIL		{align1};
269mov  (2) tmp_reg0.0<1>:UW	orig_xy_ub<2,2,1>:UB	{align1};
270add  (1) tmp_reg0.2<1>:w	tmp_reg0.2<0,1,0>:w	-1:w	{align1};
271add  (1) tmp_reg0.0<1>:w	tmp_reg0.0<0,1,0>:w	1:w	{align1};
272mul  (1) mb_msg0.8<1>:UD       w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1};
273add  (1) mb_msg0.8<1>:UD       mb_msg0.8<0,1,0>:UD   tmp_reg0.0<0,1,0>:uw {align1};
274mul  (1) mb_msg0.8<1>:UD       mb_msg0.8<0,1,0>:UD 24:UD {align1};
275mov  (1) mb_msg0.20<1>:UB        thread_id_ub {align1};                  /* dispatch id */
276
277/* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */
278send (16)
279        mb_ind
280        mb_wb.0<1>:ud
281	NULL
282        data_port(
283                OBR_CACHE_TYPE,
284                OBR_MESSAGE_TYPE,
285                OBR_CONTROL_4,
286                OBR_BIND_IDX,
287                OBR_WRITE_COMMIT_CATEGORY,
288                OBR_HEADER_PRESENT
289        )
290        mlen 1
291        rlen 2
292        {align1};
293
294/* TODO: RefID is required after multi-references are added */
295cmp.l.f0.0 (2)		null:w	mb_intra_wb.16<0,1,0>:uw	mb_inter_wb.8<0,1,0>:uw {align1};
296(f0.0)   mov (2)	mbc_result.20<1>:w			-1:w	{align1};
297(f0.0)   jmpi	(1)	mb_mvp_start;
298add   (1) mb_msg0.8<1>:UD	mb_msg0.8<0,1,0>:ud	3:ud {align1};
299/* Read MV for MB C */
300/* bind index 3, read 8 oword (128bytes), msg type: 0(OWord Block Read) */
301send (16)
302        mb_ind
303        mb_mv0.0<1>:ud
304	NULL
305        data_port(
306                OBR_CACHE_TYPE,
307                OBR_MESSAGE_TYPE,
308                OBR_CONTROL_8,
309                OBR_BIND_IDX,
310                OBR_WRITE_COMMIT_CATEGORY,
311                OBR_HEADER_PRESENT
312        )
313        mlen 1
314        rlen 4
315        {align1};
316/* TODO: RefID is required after multi-references are added */
317/* Forward MV */
318mov	   (2)		mbc_result.20<1>:w		-1:w	{align1};
319mov	(1)	INPUT_ARG0.0<1>:ud	mb_inter_wb.4<0,1,0>:ud	{align1};
320mov	(1)	INPUT_ARG0.4<1>:ud	mb_inter_wb.0<0,1,0>:ud	{align1};
321mov	(1)	INPUT_ARG0.8<1>:ud	INTER_BLOCK2:ud	{align1};
322SAVE_RET	{align1};
323jmpi	(1)	mb_pred_func;
324mov 	(1)	mb_pred_mode.0<1>:uw	RET_ARG<0,1,0>:uw	{align1};
325cmp.e.f0.0 (1)	null:uw		mb_pred_mode.0<0,1,0>:uw	PRED_L0 {align1};
326(f0.0)	mov    (1) mbc_result.16<1>:uw		MB_PRED_FLAG		{align1};
327(f0.0)	mov    (1) mbc_result.20<1>:w		0:w		{align1};
328(f0.0)  mov    (1)  mbc_result.4<1>:ud		mb_mv2.16<0,1,0>:ud	{align1};
329(f0.0)  jmpi   (1) mb_mvp_start;
330cmp.e.f0.0 (1)	null:uw		mb_pred_mode.0<0,1,0>:uw	PRED_L1 {align1};
331(f0.0)	mov   (1)	mbc_result.18<1>:uw		MB_PRED_FLAG		{align1};
332(f0.0)	mov    (1) mbc_result.22<1>:w		0:w		{align1};
333(f0.0)  mov    (1)  mbc_result.8<1>:ud		mb_mv2.20<0,1,0>:ud	{align1};
334(f0.0)  jmpi  (1) mb_mvp_start;
335mov  (2)  mbc_result.16<1>:uw		MB_PRED_FLAG		{align1};
336mov  (2)  mbc_result.20<1>:w		0:w	{align1};
337mov	   (2)		mbc_result.4<1>:ud		mb_mv2.16<2,2,1>:ud	{align1};
338
339jmpi   (1)    mb_mvp_start;
340mbd_start:
341mov  (8) mb_msg0.0<1>:ud	0:ud		{align1};
342and.z.f0.0 (2)		null:uw	input_mb_intra_ub<0,1,0>:ub	INTRA_PRED_AVAIL_FLAG_D:uw   {align1};
343(f0.0)  mov  (2)	mbc_result.20<1>:w		-1:w	{align1};
344(f0.0)  jmpi (1)	mb_mvp_start;
345mov  (1) mbc_result.0<1>:d	MB_AVAIL		{align1};
346mov  (2) tmp_reg0.0<1>:UW	orig_xy_ub<2,2,1>:UB	{align1};
347add  (2) tmp_reg0.0<1>:w	tmp_reg0.0<2,2,1>:w	-1:w	{align1};
348mul  (1) mb_msg0.8<1>:UD       w_in_mb_uw<0,1,0>:UW tmp_reg0.2<0,1,0>:UW {align1};
349add  (1) mb_msg0.8<1>:UD       mb_msg0.8<0,1,0>:UD   tmp_reg0.0<0,1,0>:uw {align1};
350mul  (1) mb_msg0.8<1>:UD       mb_msg0.8<0,1,0>:UD 24:UD {align1};
351mov  (1) mb_msg0.20<1>:UB        thread_id_ub {align1};                  /* dispatch id */
352
353/* bind index 3, read 4 oword (64bytes), msg type: 0(OWord Block Read) */
354send (16)
355        mb_ind
356        mb_wb.0<1>:ud
357	NULL
358        data_port(
359                OBR_CACHE_TYPE,
360                OBR_MESSAGE_TYPE,
361                OBR_CONTROL_4,
362                OBR_BIND_IDX,
363                OBR_WRITE_COMMIT_CATEGORY,
364                OBR_HEADER_PRESENT
365        )
366        mlen 1
367        rlen 2
368        {align1};
369
370cmp.l.f0.0 (2)		null:w	mb_intra_wb.16<0,1,0>:uw	mb_inter_wb.8<0,1,0>:uw {align1};
371(f0.0)   mov (2)	mbc_result.20<1>:w			-1:w	{align1};
372(f0.0)   jmpi	(1)	mb_mvp_start;
373
374add   (1) mb_msg0.8<1>:UD	mb_msg0.8<0,1,0>:ud	3:ud {align1};
375/* Read MV for MB D */
376/* bind index 3, read 8 oword (128bytes), msg type: 0(OWord Block Read) */
377send (16)
378        mb_ind
379        mb_mv0.0<1>:ub
380	NULL
381        data_port(
382                OBR_CACHE_TYPE,
383                OBR_MESSAGE_TYPE,
384                OBR_CONTROL_8,
385                OBR_BIND_IDX,
386                OBR_WRITE_COMMIT_CATEGORY,
387                OBR_HEADER_PRESENT
388        )
389        mlen 1
390        rlen 4
391        {align1};
392
393/* TODO: RefID is required after multi-references are added */
394
395/* Forward MV */
396mov	   (2)		mbc_result.20<1>:w		-1:w	{align1};
397mov	(1)	INPUT_ARG0.0<1>:ud	mb_inter_wb.4<0,1,0>:ud	{align1};
398mov	(1)	INPUT_ARG0.4<1>:ud	mb_inter_wb.0<0,1,0>:ud	{align1};
399mov	(1)	INPUT_ARG0.8<1>:ud	INTER_BLOCK3:ud	{align1};
400SAVE_RET	{align1};
401jmpi	(1)	mb_pred_func;
402mov 	(1)	mb_pred_mode.0<1>:uw	RET_ARG<0,1,0>:uw	{align1};
403cmp.e.f0.0 (1)	null:uw		mb_pred_mode.0<0,1,0>:uw	PRED_L0 {align1};
404(f0.0)	mov    (1) mbc_result.16<1>:uw		MB_PRED_FLAG		{align1};
405(f0.0)	mov    (1) mbc_result.20<1>:w		0:w		{align1};
406(f0.0)  mov    (1) mbc_result.4<1>:ud		mb_mv3.24<0,1,0>:ud	{align1};
407(f0.0)  jmpi	(1) mb_mvp_start;
408cmp.e.f0.0 (1)	null:uw		mb_pred_mode.0<0,1,0>:uw	PRED_L1 {align1};
409(f0.0)	mov   (1)	mbc_result.18<1>:uw		MB_PRED_FLAG		{align1};
410(f0.0)	mov    (1) mbc_result.22<1>:w		0:w		{align1};
411(f0.0)  mov    (1) mbc_result.8<1>:ud		mb_mv3.28<0,1,0>:ud	{align1};
412(f0.0)  jmpi  (1) mb_mvp_start;
413mov  (2)  mbc_result.16<1>:uw		MB_PRED_FLAG		{align1};
414mov  (2)  mbc_result.20<1>:w		0:w	{align1};
415mov	   (2)		mbc_result.4<1>:ud		mb_mv3.24<2,2,1>:ud	{align1};
416
417mb_mvp_start:
418/*TODO: Add the skip prediction */
419/* Check whether both MB B and C are inavailable */
420add	(1)	tmp_reg0.0<1>:d		mbb_result.0<0,1,0>:d	mbc_result.0<0,1,0>:d	{align1};
421cmp.z.f0.0 (1)	null:d			tmp_reg0.0<0,1,0>:d	0:d	{align1};
422(-f0.0)	jmpi (1)	mb_median_start;
423cmp.nz.f0.0 (2)	null:d	mba_result.0<0,1,0>:d		0:d		{align1};
424(f0.0)	mov	(2)	mbb_result.4<1>:ud		mba_result.4<2,2,1>:ud	{align1};
425(f0.0)	mov	(2)	mbc_result.4<1>:ud		mba_result.4<2,2,1>:ud	{align1};
426(f0.0)	mov	(2)	mbb_result.20<1>:uw		mba_result.20<2,2,1>:uw	{align1};
427(f0.0)	mov	(2)	mbc_result.20<1>:uw		mba_result.20<2,2,1>:uw	{align1};
428(f0.0)  mov     (2)	mb_mvp_ref.0<1>:ud		mba_result.4<2,2,1>:ud	{align1};
429(-f0.0) mov	(2)	mb_mvp_ref.0<1>:ud		0:ud			{align1};
430jmpi	(1)	__mb_hwdep_end;
431
432mb_median_start:
433/* forward_MVP */
434/* check whether only one neighbour MB has the same ref ID with the current MB */
435mov (8)	tmp_reg0.0<1>:ud		0:ud		{align1};
436cmp.z.f0.0	(1)	null:d	mba_result.20<0,1,0>:w	0:w	{align1};
437(f0.0)	add	(1)	tmp_reg0.0<1>:w		tmp_reg0.0<0,1,0>:w	1:w	{align1};
438(f0.0)	mov	(1)	tmp_reg0.4<1>:ud	mba_result.4<0,1,0>:ud	{align1};
439cmp.z.f0.0	(1)	null:d	mbb_result.20<0,1,0>:w	0:w	{align1};
440(f0.0)	add	(1)	tmp_reg0.0<1>:w		tmp_reg0.0<0,1,0>:w	1:w	{align1};
441(f0.0)	mov	(1)	tmp_reg0.4<1>:ud	mbb_result.4<0,1,0>:ud	{align1};
442cmp.z.f0.0	(1)	null:d	mbc_result.20<0,1,0>:w	0:w	{align1};
443(f0.0)	add	(1)	tmp_reg0.0<1>:w		tmp_reg0.0<0,1,0>:w	1:w	{align1};
444(f0.0)	mov	(1)	tmp_reg0.4<1>:ud	mbc_result.4<0,1,0>:ud	{align1};
445cmp.e.f0.0	(1)	null:d	tmp_reg0.0<0,1,0>:w	 1:w	{align1};
446(f0.0)	mov	(1)     mb_mvp_ref.0<1>:ud	tmp_reg0.4<0,1,0>:ud	{align1};
447(f0.0)	jmpi (1)  mvp_backward;
448
449mov	(1)	INPUT_ARG0.0<1>:w	mba_result.4<0,1,0>:w	{align1};
450mov	(1)	INPUT_ARG0.4<1>:w	mbb_result.4<0,1,0>:w	{align1};
451mov	(1)	INPUT_ARG0.8<1>:w	mbc_result.4<0,1,0>:w	{align1};
452SAVE_RET	{align1};
453 jmpi	(1)	word_imedian;
454mov	(1)	mb_mvp_ref.0<1>:w		RET_ARG<0,1,0>:w	{align1};
455mov	(1)	INPUT_ARG0.0<1>:w	mba_result.6<0,1,0>:w	{align1};
456mov	(1)	INPUT_ARG0.4<1>:w	mbb_result.6<0,1,0>:w	{align1};
457mov	(1)	INPUT_ARG0.8<1>:w	mbc_result.6<0,1,0>:w	{align1};
458SAVE_RET	{align1};
459jmpi	(1)	word_imedian;
460mov	(1)	mb_mvp_ref.2<1>:w		RET_ARG<0,1,0>:w	{align1};
461
462
463mvp_backward:
464/* check whether only one neighbour MB has the same ref ID with the current MB */
465mov (8)	tmp_reg0.0<1>:ud		0:ud		{align1};
466cmp.z.f0.0	(1)	null:d	mba_result.22<0,1,0>:w	0:w	{align1};
467(f0.0)	add	(1)	tmp_reg0.0<1>:w		tmp_reg0.0<0,1,0>:w	1:w	{align1};
468(f0.0)	mov	(1)	tmp_reg0.4<1>:ud	mba_result.8<0,1,0>:ud	{align1};
469cmp.z.f0.0	(1)	null:d	mbb_result.22<0,1,0>:w	0:w	{align1};
470(f0.0)	add	(1)	tmp_reg0.0<1>:w		tmp_reg0.0<0,1,0>:w	1:w	{align1};
471(f0.0)	mov	(1)	tmp_reg0.4<1>:ud	mbb_result.8<0,1,0>:ud	{align1};
472cmp.z.f0.0	(1)	null:d	mbc_result.22<0,1,0>:w	0:w	{align1};
473(f0.0)	add	(1)	tmp_reg0.0<1>:w		tmp_reg0.0<0,1,0>:w	1:w	{align1};
474(f0.0)	mov	(1)	tmp_reg0.4<1>:ud	mbc_result.8<0,1,0>:ud	{align1};
475cmp.e.f0.0	(1)	null:d	tmp_reg0.0<0,1,0>:w	 1:w	{align1};
476(f0.0)	mov	(1)     mb_mvp_ref.4<1>:ud	tmp_reg0.4<0,1,0>:ud	{align1};
477(f0.0)	jmpi (1) __mb_hwdep_end;
478
479mov	(1)	INPUT_ARG0.0<1>:w	mba_result.8<0,1,0>:w	{align1};
480mov	(1)	INPUT_ARG0.4<1>:w	mbb_result.8<0,1,0>:w	{align1};
481mov	(1)	INPUT_ARG0.8<1>:w	mbc_result.8<0,1,0>:w	{align1};
482SAVE_RET	{align1};
483 jmpi	(1)	word_imedian;
484mov	(1)	mb_mvp_ref.4<1>:w		RET_ARG<0,1,0>:w	{align1};
485mov	(1)	INPUT_ARG0.0<1>:w	mba_result.10<0,1,0>:w	{align1};
486mov	(1)	INPUT_ARG0.4<1>:w	mbb_result.10<0,1,0>:w	{align1};
487mov	(1)	INPUT_ARG0.8<1>:w	mbc_result.10<0,1,0>:w	{align1};
488SAVE_RET	{align1};
489jmpi	(1)	word_imedian;
490mov	(1)	mb_mvp_ref.6<1>:w		RET_ARG<0,1,0>:w	{align1};
491
492__mb_hwdep_end:
493asr	(4)	mb_ref_win.0<1>:w	mb_mvp_ref.0<4,4,1>:w	2:w	{align1};
494add	(4)	mb_ref_win.8<1>:w	mb_ref_win.0<4,4,1>:w	3:w	{align1};
495and	(4)	mb_ref_win.16<1>:uw	mb_ref_win.8<4,4,1>:uw	0xFFFC:uw {align1};
496/* m2, get the MV/Mb cost passed from constant buffer when
497spawning thread by MEDIA_OBJECT */
498mov (8) vme_m2<1>:UD            r1.0<8,8,1>:UD {align1};
499
500mov (8) vme_msg_2<1>:UD		vme_m2.0<8,8,1>:UD {align1};
501/* m3 cost center */
502mov (8) vme_m3.0<1>:ud		0x0:ud	{align1};
503mov (8) vme_msg_3<1>:UD		vme_m3.0<8,8,1>:UD {align1};
504
505/* m4. skip center */
506mov (8) vme_msg_4<1>:ud		0x0:ud	{align1};
507
508/* m5 */
509mov  (1) INEP_ROW.0<1>:UD       0x0:UD {align1};
510and  (1) INEP_ROW.4<1>:UD       INEP_ROW.4<0,1,0>:UD            0xFF000000:UD {align1};
511mov  (8) vme_msg_5<1>:UD         INEP_ROW.0<8,8,1>:UD {align1};
512/* Use the Luma mode */
513mov  (1) tmp_reg0.0<1>:UW	LUMA_INTRA_MODE:UW {align1};
514mov  (1) vme_msg_5.5<1>:UB	tmp_reg0.0<0,1,0>:UB {align1};
515
516/* m6 */
517mov  (8) vme_msg_6<1>:UD         0x0:UD {align1};
518mov (16) vme_msg_6.0<1>:UB       INEP_COL0.3<32,8,4>:UB {align1};
519mov  (1) vme_msg_6.16<1>:UD      INTRA_PREDICTORE_MODE {align1};
520
521/* the penalty for Intra mode */
522mov  (1) vme_msg_6.28<1>:UD	0x010101:UD {align1};
523mov  (1) vme_msg_6.20<1>:UW      CHROMA_ROW.6<0,1,0>:UW {align1};
524
525
526/* m7 */
527
528mov  (4) vme_msg_7.16<1>:UD      CHROMA_ROW.8<4,4,1>:UD {align1};
529mov  (8) vme_msg_7.0<1>:UW       CHROMA_COL.2<16,8,2>:UW {align1};
530
531/*
532 * SIC VME message
533 */
534/* m1 */
535mov  (1) intra_flag<1>:UW       0x0:UW {align1}                     ;
536and.z.f0.0 (1) null<1>:UW transform_8x8_ub<0,1,0>:UB 1:UW {align1};
537(f0.0) mov  (1) intra_part_mask_ub<1>:UB  LUMA_INTRA_8x8_DISABLE {align1};
538
539/* assign MB intra struct from the thread payload*/
540mov (1) mb_intra_struct_ub<1>:UB input_mb_intra_ub<0,1,0>:UB {align1};
541
542/* Disable DC HAAR component when calculating HARR SATD block */
543mov  (1) tmp_reg0.0<1>:UW	DC_HARR_DISABLE:UW		{align1};
544mov  (1) vme_m1.30<1>:UB	tmp_reg0.0<0,1,0>:UB  {align1};
545mov  (8) vme_msg_1<1>:UD        vme_m1.0<8,8,1>:UD {align1};
546
547/* m0 */
548mov  (1) vme_m0.12<1>:UD        INTRA_SAD_HAAR:UD {align1};    /* 16x16 Source, Intra_harr */
549mov  (8) vme_msg_0.0<1>:UD      vme_m0.0<8,8,1>:UD {align1};
550
551/* after verification it will be passed by using payload */
552send (8)
553        vme_msg_ind
554        vme_wb<1>:UD
555        null
556        cre(
557                BIND_IDX_VME,
558                VME_SIC_MESSAGE_TYPE
559        )
560        mlen sic_vme_msg_length
561        rlen vme_wb_length
562        {align1};
563/*
564 * Oword Block Write message
565 */
566mov  (8) msg_reg0.0<1>:UD       obw_m0<8,8,1>:UD {align1};
567
568mov  (1) msg_reg1.0<1>:UD       vme_wb.0<0,1,0>:UD      {align1};
569mov  (1) msg_reg1.4<1>:UD       vme_wb.16<0,1,0>:UD     {align1};
570mov  (1) msg_reg1.8<1>:UD       vme_wb.20<0,1,0>:UD     {align1};
571mov  (1) msg_reg1.12<1>:UD      vme_wb.24<0,1,0>:UD     {align1};
572
573/* Distortion, Intra (17-16), */
574mov  (1) msg_reg1.16<1>:UW      vme_wb.12<0,1,0>:UW     {align1};
575
576mov  (1) msg_reg1.20<1>:UD      vme_wb.8<0,1,0>:UD     {align1};
577/* VME clock counts */
578mov  (1) msg_reg1.24<1>:UD      vme_wb.28<0,1,0>:UD     {align1};
579
580mov  (1) msg_reg1.28<1>:UD      obw_m0.8<0,1,0>:UD     {align1};
581
582/* bind index 3, write 2 oword (32bytes), msg type: 8(OWord Block Write) */
583send (16)
584        msg_ind
585        obw_wb
586        null
587        data_port(
588                OBW_CACHE_TYPE,
589                OBW_MESSAGE_TYPE,
590                OBW_CONTROL_2,
591                OBW_BIND_IDX,
592                OBW_WRITE_COMMIT_CATEGORY,
593                OBW_HEADER_PRESENT
594        )
595        mlen 2
596        rlen obw_wb_length
597        {align1};
598
599/* IME search */
600mov  (1) vme_m0.12<1>:UD        SEARCH_CTRL_DUAL_REFERENCE + INTER_PART_MASK + INTER_SAD_HAAR:UD {align1};    /* 16x16 Source, harr */
601mov  (1) vme_m0.22<1>:UW        DREF_REGION_SIZE {align1};
602/* Dual Reference Width&Height,32x32 */
603
604mov  (1) vme_m0.0<1>:UD		vme_m0.8<0,1,0>:UD      {align1};
605
606/* Reference = (x-8,y-8)-(x+8,y+8) */
607add  (1) vme_m0.0<1>:W          vme_m0.0<0,1,0>:W -8:W {align1};
608add  (1) vme_m0.2<1>:W          vme_m0.2<0,1,0>:W -8:W {align1};
609
610mov  (1) vme_m0.0<1>:W		-8:W			{align1};
611mov  (1) vme_m0.2<1>:W		-8:W			{align1};
612
613mov  (1) vme_m0.4<1>:UD		vme_m0.0<0,1,0>:UD	{align1};
614and.z.f0.0 (1)		null:uw	input_mb_intra_ub<0,1,0>:ub	INTRA_PRED_AVAIL_FLAG_AE:uw   {align1};
615(f0.0)	add 	(1)	vme_m0.0<1>:w	vme_m0.0<0,1,0>:w	4:w	{align1};
616(f0.0)	add 	(1)	vme_m0.4<1>:w	vme_m0.4<0,1,0>:w	4:w	{align1};
617and.z.f0.0 (1)		null:uw	input_mb_intra_ub<0,1,0>:ub	INTRA_PRED_AVAIL_FLAG_B:uw   {align1};
618(f0.0)	add 	(1)	vme_m0.2<1>:w	vme_m0.2<0,1,0>:w	4:w	{align1};
619(f0.0)	add 	(1)	vme_m0.6<1>:w	vme_m0.6<0,1,0>:w	4:w	{align1};
620
621add  (2) vme_m0.0<1>:w		vme_m0.0<2,2,1>:w	mb_ref_win.16<2,2,1>:w	{align1};
622add  (2) vme_m0.4<1>:w		vme_m0.4<2,2,1>:w	mb_ref_win.20<2,2,1>:w	{align1};
623
624mov  (8) vme_msg_0.0<1>:UD      vme_m0.0<8,8,1>:UD {align1};
625
626mov  (1) vme_m1.0<1>:UD         ADAPTIVE_SEARCH_ENABLE:ud {align1} ;
627/* the Max MV number is passed by constant buffer */
628mov  (1) vme_m1.4<1>:UB         r4.28<0,1,0>:UB {align1};
629mov  (1) vme_m1.8<1>:UD         DSTART_CENTER + DSEARCH_PATH_LEN:UD {align1};
630mov  (8) vme_msg_1.0<1>:UD      vme_m1.0<8,8,1>:UD {align1};
631
632mov (8) vme_msg_2<1>:UD		vme_m2.0<8,8,1>:UD {align1};
633
634
635/* Setup the Cost center */
636/* currently four 8x8 share the same cost center */
637mov  (4) vme_m3.0<2>:ud		mb_mvp_ref.0<0,1,0>:ud	{align1};
638mov  (4) vme_m3.4<2>:ud		mb_mvp_ref.4<0,1,0>:ud	{align1};
639
640/* M4/M5 search path */
641
642mov  (1) vme_msg_4.0<1>:UD	0x10010101:UD {align1};
643mov  (1) vme_msg_4.4<1>:UD	0x100F0F0F:UD {align1};
644mov  (1) vme_msg_4.8<1>:UD	0x10010101:UD {align1};
645mov  (1) vme_msg_4.12<1>:UD	0x000F0F0F:UD {align1};
646
647mov  (4) vme_msg_4.16<1>:UD	0x0:UD {align1};
648mov  (8) vme_msg_5.16<1>:UD	0x0:UD {align1};
649
650send (8)
651        vme_msg_ind
652        vme_wb<1>:UD
653        null
654        vme(
655                BIND_IDX_VME,
656                0,
657                0,
658                VME_IME_MESSAGE_TYPE
659        )
660        mlen ime_vme_msg_length
661        rlen vme_wb_length {align1};
662
663/* Set Macroblock-shape/mode for FBR */
664
665mov  (1) vme_m2.20<1>:UD	0x0:UD {align1};
666mov  (1) vme_m2.21<1>:UB	vme_wb.25<0,1,0>:UB	{align1};
667mov  (1) vme_m2.22<1>:UB	vme_wb.26<0,1,0>:UB	{align1};
668
669and  (1) tmp_reg0.0<1>:UW	vme_wb.0<0,1,0>:UW	0x03:UW {align1};
670mov  (1) vme_m2.20<1>:UB	tmp_reg0.0<0,1,0>:UB    {align1};
671
672/* Send FBR message into CRE */
673
674mov  (8) vme_msg_4.0<1>:UD       vme_wb1.0<8,8,1>:UD {align1};
675mov  (8) vme_msg_5.0<1>:ud       vme_wb2.0<8,8,1>:ud {align1};
676mov  (8) vme_msg_6.0<1>:ud       vme_wb3.0<8,8,1>:ud {align1};
677mov  (8) vme_msg_7.0<1>:ud       vme_wb4.0<8,8,1>:ud {align1};
678
679 /* 16x16 Source, 1/4 pixel, harr, BME ENABLE */
680mov  (1) vme_m0.12<1>:UD	INTER_SAD_HAAR + SUB_PEL_MODE_QUARTER + FBR_BME_ENABLE:UD {align1};
681
682mov  (8) vme_msg_0.0<1>:UD	vme_m0.0<8,8,1>:UD  {align1};
683
684mov  (1) tmp_reg0.0<1>:uw	BI_WEIGHT	{align1};
685mov  (1) vme_m1.6<1>:UB		tmp_reg0.0<0,1,0>:ub	{align1};
686mov  (8) vme_msg_1.0<1>:UD	vme_m1.0<8,8,1>:UD  {align1};
687
688mov  (8) vme_msg_2.0<1>:UD		vme_m2.0<8,8,1>:UD	{align1};
689mov  (8) vme_msg_3.0<1>:UD		vme_m3.0<8,8,1>:UD	{align1};
690
691/* after verification it will be passed by using payload */
692send (8)
693        vme_msg_ind
694        vme_wb<1>:UD
695        null
696        cre(
697                BIND_IDX_VME,
698                VME_FBR_MESSAGE_TYPE
699        )
700        mlen fbr_vme_msg_length
701        rlen vme_wb_length
702        {align1};
703
704add  (1) obw_m0.8<1>:UD         obw_m0.8<0,1,0>:UD 0x02:UD {align1};
705mov  (8) msg_reg0.0<1>:UD       obw_m0<8,8,1>:UD {align1};
706/* write FME info */
707mov  (1) msg_reg1.0<1>:UD       vme_wb.0<0,1,0>:UD      {align1};
708
709mov  (1) msg_reg1.4<1>:UD       vme_wb.24<0,1,0>:UD     {align1};
710/* Inter distortion of FME */
711mov  (1) msg_reg1.8<1>:UD       vme_wb.8<0,1,0>:UD     {align1};
712
713mov  (1) msg_reg1.12<1>:UD	vme_m2.20<0,1,0>:UD {align1};
714
715/* bind index 3, write  oword (16bytes), msg type: 8(OWord Block Write) */
716send (16)
717        msg_ind
718        obw_wb
719        null
720        data_port(
721                OBW_CACHE_TYPE,
722                OBW_MESSAGE_TYPE,
723                OBW_CONTROL_0,
724                OBW_BIND_IDX,
725                OBW_WRITE_COMMIT_CATEGORY,
726                OBW_HEADER_PRESENT
727        )
728        mlen 2
729        rlen obw_wb_length
730        {align1};
731
732/* Write FME/BME MV */
733add  (1) obw_m0.8<1>:UD         obw_m0.8<0,1,0>:UD 0x01:UD {align1};
734mov  (8) msg_reg0.0<1>:UD       obw_m0.0<8,8,1>:UD {align1};
735
736
737mov  (8) msg_reg1.0<1>:UD       vme_wb1.0<8,8,1>:UD {align1};
738mov  (8) msg_reg2.0<1>:ud       vme_wb2.0<8,8,1>:ud {align1};
739mov  (8) msg_reg3.0<1>:ud       vme_wb3.0<8,8,1>:ud {align1};
740mov  (8) msg_reg4.0<1>:ud       vme_wb4.0<8,8,1>:ud {align1};
741/* bind index 3, write  8 oword (128 bytes), msg type: 8(OWord Block Write) */
742send (16)
743        msg_ind
744        obw_wb
745        null
746        data_port(
747                OBW_CACHE_TYPE,
748                OBW_MESSAGE_TYPE,
749                OBW_CONTROL_8,
750                OBW_BIND_IDX,
751                OBW_WRITE_COMMIT_CATEGORY,
752                OBW_HEADER_PRESENT
753        )
754        mlen 5
755        rlen obw_wb_length
756        {align1};
757
758/* Write FME/BME RefID */
759add  (1) obw_m0.8<1>:UD         obw_m0.8<0,1,0>:UD 0x08:UD {align1};
760mov  (8) msg_reg0.0<1>:UD       obw_m0<8,8,1>:UD {align1};
761
762mov  (8) msg_reg1.0<1>:UD	vme_wb6.0<8,8,1>:UD {align1};
763
764/* bind index 3, write 2 oword (32bytes), msg type: 8(OWord Block Write) */
765send (16)
766        msg_ind
767        obw_wb
768        null
769        data_port(
770                OBW_CACHE_TYPE,
771                OBW_MESSAGE_TYPE,
772                OBW_CONTROL_2,
773                OBW_BIND_IDX,
774                OBW_WRITE_COMMIT_CATEGORY,
775                OBW_HEADER_PRESENT
776        )
777        mlen 2
778        rlen obw_wb_length
779        {align1};
780
781
782/* Issue message fence so that the previous write message is committed */
783send (16)
784        mb_ind
785        mb_wb.0<1>:ud
786	NULL
787        data_port(
788                OBR_CACHE_TYPE,
789                OBR_MESSAGE_FENCE,
790                OBR_MF_COMMIT,
791                OBR_BIND_IDX,
792                OBR_WRITE_COMMIT_CATEGORY,
793                OBR_HEADER_PRESENT
794        )
795        mlen 1
796        rlen 1
797        {align1};
798
799__EXIT:
800/*
801 * kill thread
802 */
803mov  (8) ts_msg_reg0<1>:UD         r0<8,8,1>:UD {align1};
804send (16) ts_msg_ind acc0<1>UW null thread_spawner(0, 0, 1) mlen 1 rlen 0 {align1 EOT};
805
806
807	nop		;
808	nop		;
809/* Compare three word data to get the min value */
810word_imin:
811	cmp.le.f0.0 (1)		null:w		INPUT_ARG0.0<0,1,0>:w	INPUT_ARG0.4<0,1,0>:w {align1};
812	(f0.0) mov  (1)		TEMP_VAR0.0<1>:w INPUT_ARG0.0<0,1,0>:w			  {align1};
813	(-f0.0) mov (1)		TEMP_VAR0.0<1>:w INPUT_ARG0.4<0,1,0>:w			  {align1};
814	cmp.le.f0.0 (1)		null:w		TEMP_VAR0.0<0,1,0>:w	INPUT_ARG0.8<0,1,0>:w {align1};
815	(f0.0) mov  (1)		RET_ARG<1>:w TEMP_VAR0.0<0,1,0>:w			  {align1};
816	(-f0.0) mov (1)		RET_ARG<1>:w INPUT_ARG0.8<0,1,0>:w			  {align1};
817	RETURN		{align1};
818
819/* Compare three word data to get the max value */
820word_imax:
821	cmp.ge.f0.0 (1)		null:w		INPUT_ARG0.0<0,1,0>:w	INPUT_ARG0.4<0,1,0>:w {align1};
822	(f0.0) mov  (1)		TEMP_VAR0.0<1>:w INPUT_ARG0.0<0,1,0>:w			  {align1};
823	(-f0.0) mov (1)		TEMP_VAR0.0<1>:w INPUT_ARG0.4<0,1,0>:w			  {align1};
824	cmp.ge.f0.0 (1)		null:w		TEMP_VAR0.0<0,1,0>:w	INPUT_ARG0.8<0,1,0>:w {align1};
825	(f0.0) mov  (1)		RET_ARG<1>:w TEMP_VAR0.0<0,1,0>:w			  {align1};
826	(-f0.0) mov (1)		RET_ARG<1>:w INPUT_ARG0.8<0,1,0>:w			  {align1};
827	RETURN		{align1};
828
829word_imedian:
830	cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.4<0,1,0>:w {align1};
831	(f0.0)	jmpi (1) cmp_a_ge_b;
832	cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1};
833	(f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.0<0,1,0>:w {align1};
834	(f0.0) jmpi (1) cmp_end;
835	cmp.ge.f0.0 (1) null:w INPUT_ARG0.4<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1};
836	(f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.8<0,1,0>:w {align1};
837	(-f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.4<0,1,0>:w {align1};
838	jmpi (1) cmp_end;
839cmp_a_ge_b:
840	cmp.ge.f0.0 (1) null:w INPUT_ARG0.4<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1};
841	(f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.4<0,1,0>:w {align1};
842	(f0.0) jmpi (1) cmp_end;
843	cmp.ge.f0.0 (1) null:w INPUT_ARG0.0<0,1,0>:w INPUT_ARG0.8<0,1,0>:w {align1};
844	(f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.8<0,1,0>:w {align1};
845	(-f0.0) mov (1) RET_ARG<1>:w INPUT_ARG0.0<0,1,0>:w {align1};
846cmp_end:
847 	RETURN	{align1};
848
849mb_pred_func:
850        mov	(8)	TEMP_VAR0.0<1>:ud	0:ud	{align1};
851	mov     (1)	TEMP_VAR0.0<1>:ub	INPUT_ARG0.2<0,1,0>:ub	{align1};
852	and	(1)   TEMP_VAR0.4<1>:uw   INPUT_ARG0.4<0,1,0>:uw	INTER_MASK:uw	{align1};
853	/* INTER16x16 mode. The bit1-0 is the prediction mode */
854	cmp.e.f0.0 (1) null:uw	TEMP_VAR0.4<0,1,0>:uw	INTER_16X16MODE:uw	{align1};
855	(f0.0)	and (1) RET_ARG<1>:uw	TEMP_VAR0.0<0,1,0>:uw	PRED_MASK {align1};
856	(f0.0)  jmpi (1) end_mb_pred;
857	/* Check whether it is INTER8x8 mode. */
858	cmp.e.f0.0 (1) null:uw	TEMP_VAR0.4<0,1,0>:uw	INTER_8X8MODE:uw	{align1};
859	(f0.0)	jmpi (1) mb_pred_func_8;
860
861	/* Check whether it is INTER16x8 mode. */
862	cmp.e.f0.0 (1) null:uw	TEMP_VAR0.4<0,1,0>:uw	INTER_16X8MODE:uw	{align1};
863	(f0.0)	jmpi (1) mb_pred_func_168;
864mb_pred_func_816:
865	/* Block 0/2 uses the bit1-0. Block 1/3 uses the bit3-2 */
866	mov	(1) TEMP_VAR0.8<1>:uw INPUT_ARG0.8<0,1,0>:uw	{align1};
867	and.z.f0.0 (1)	null:uw	TEMP_VAR0.8<0,1,0>:uw	INTER_BLOCK1:uw	{align1};
868	(f0.0)	and	(1) RET_ARG<1>:uw	TEMP_VAR0.0<0,1,0>:uw	PRED_MASK {align1};
869	(f0.0)  jmpi (1) end_mb_pred;
870	shr	(1)  TEMP_VAR0.16<1>:uw TEMP_VAR0.0<0,1,0>:uw	2:uw {align1};
871	and	(1)  RET_ARG<1>:uw	TEMP_VAR0.16<0,1,0>:uw	PRED_MASK  {align1};
872	jmpi	(1) end_mb_pred;
873
874mb_pred_func_168:
875	/* Block 0/1 uses the bit1-0. Block 2/3 uses the bit3-2 */
876	mov	(1) TEMP_VAR0.8<1>:uw INPUT_ARG0.8<0,1,0>:uw	{align1};
877	cmp.l.f0.0 (1)	null:uw	TEMP_VAR0.8<0,1,0>:uw	INTER_BLOCK2:uw	{align1};
878	(f0.0)	and  (1) RET_ARG<1>:uw	TEMP_VAR0.0<0,1,0>:uw	PRED_MASK {align1};
879	(f0.0)  jmpi (1) end_mb_pred;
880	shr	(1)  TEMP_VAR0.16<1>:uw TEMP_VAR0.0<0,1,0>:uw	2:uw {align1};
881	and	(1)  RET_ARG<1>:uw	TEMP_VAR0.16<0,1,0>:uw	PRED_MASK  {align1};
882	jmpi	(1) end_mb_pred;
883
884mb_pred_func_8:
885	/* 8X8 mode. Every block uses two bits as the prediction mode. */
886	mul     (1)  TEMP_VAR0.8<1>:uw  INPUT_ARG0.8<0,1,0>:uw	2:uw {align1};
887	shr	(1)  TEMP_VAR0.16<1>:uw TEMP_VAR0.0<0,1,0>:uw	TEMP_VAR0.8<0,1,0>:uw {align1};
888	and	(1)  RET_ARG<1>:uw	TEMP_VAR0.16<0,1,0>:uw	PRED_MASK  {align1};
889end_mb_pred:
890 	RETURN	{align1};
891
892