1/*========================== begin_copyright_notice ============================
2
3Copyright (C) 2017-2021 Intel Corporation
4
5SPDX-License-Identifier: MIT
6
7============================= end_copyright_notice ===========================*/
8
9// This file defines helper builtins of OpenCL VME, VA extension functions.
10/*****************************************************************************/
11/*                       External device-side VME                            */
12/*****************************************************************************/
13#include "../../../Implementation/IGCBiF_Intrinsics.cl"
14
15// VME helper functions - conversion to and from opaque types.
16intel_sub_group_avc_mce_payload_t __builtin_IB_vme_helper_get_as_avc_mce_payload_t(uint4);
17uint4 __builtin_IB_vme_helper_get_handle_avc_mce_payload_t(intel_sub_group_avc_mce_payload_t);
18intel_sub_group_avc_ime_payload_t __builtin_IB_vme_helper_get_as_avc_ime_payload_t(uint4);
19uint4 __builtin_IB_vme_helper_get_handle_avc_ime_payload_t(intel_sub_group_avc_ime_payload_t);
20intel_sub_group_avc_ref_payload_t __builtin_IB_vme_helper_get_as_avc_ref_payload_t(uint4);
21uint4 __builtin_IB_vme_helper_get_handle_avc_ref_payload_t(intel_sub_group_avc_ref_payload_t);
22intel_sub_group_avc_sic_payload_t __builtin_IB_vme_helper_get_as_avc_sic_payload_t(uint4);
23uint4 __builtin_IB_vme_helper_get_handle_avc_sic_payload_t(intel_sub_group_avc_sic_payload_t);
24intel_sub_group_avc_mce_result_t __builtin_IB_vme_helper_get_as_avc_mce_result_t(uint4);
25uint4 __builtin_IB_vme_helper_get_handle_avc_mce_result_t(intel_sub_group_avc_mce_result_t);
26intel_sub_group_avc_ime_result_t __builtin_IB_vme_helper_get_as_avc_ime_result_t(uint4);
27uint4 __builtin_IB_vme_helper_get_handle_avc_ime_result_t(intel_sub_group_avc_ime_result_t);
28intel_sub_group_avc_ime_result_single_reference_streamout_t __builtin_IB_vme_helper_get_as_avc_ime_result_single_reference_streamout_t(uint8);
29uint8 __builtin_IB_vme_helper_get_handle_avc_ime_result_single_reference_streamout_t(intel_sub_group_avc_ime_result_single_reference_streamout_t);
30intel_sub_group_avc_ime_result_dual_reference_streamout_t __builtin_IB_vme_helper_get_as_avc_ime_result_dual_reference_streamout_t(uint8);
31uint8 __builtin_IB_vme_helper_get_handle_avc_ime_result_dual_reference_streamout_t(intel_sub_group_avc_ime_result_dual_reference_streamout_t);
32intel_sub_group_avc_ime_single_reference_streamin_t __builtin_IB_vme_helper_get_as_avc_ime_single_reference_streamin_t(uint);
33uint __builtin_IB_vme_helper_get_handle_avc_ime_single_reference_streamin_t(intel_sub_group_avc_ime_single_reference_streamin_t);
34intel_sub_group_avc_ime_dual_reference_streamin_t __builtin_IB_vme_helper_get_as_avc_ime_dual_reference_streamin_t(uint2);
35uint2 __builtin_IB_vme_helper_get_handle_avc_ime_dual_reference_streamin_t(intel_sub_group_avc_ime_dual_reference_streamin_t);
36intel_sub_group_avc_ref_result_t __builtin_IB_vme_helper_get_as_avc_ref_result_t(uint4);
37uint4 __builtin_IB_vme_helper_get_handle_avc_ref_result_t(intel_sub_group_avc_ref_result_t);
38intel_sub_group_avc_sic_result_t __builtin_IB_vme_helper_get_as_avc_sic_result_t(uint4);
39uint4 __builtin_IB_vme_helper_get_handle_avc_sic_result_t(intel_sub_group_avc_sic_result_t);
40
41
42// defines
43
44#define UNIVERSAL_INPUT_MESSAGE_NUM_GRFS 4
45#define INPUT_MESSAGE_SIC_NUM_GRFS       4
46#define RETURN_MESSAGE_NUM_GRFS          7
47#define NUM_DWORD_IN_GRF                 8
48
49enum STREAM_MODE {
50    VME_STREAM_DISABLE = 0,
51    VME_STREAM_OUT     = 1,
52    VME_STREAM_IN      = 2,
53    VME_STREAM_INOUT   = 3
54};
55
56enum {
57    VME_MAJOR_16x16 = 0,
58    VME_MAJOR_16x8  = 1,
59    VME_MAJOR_8x16  = 2,
60    VME_MAJOR_8x8   = 3
61};
62
63// Helper overloads
64
65static INLINE OVERLOADABLE uint4 intel_vme_send_ime_new_uint4(uint4 payload, long src_image, long fwd_ref_image, long bwd_ref_image, long vme_accelerator, enum STREAM_MODE mode)
66{
67    return __builtin_IB_vme_send_ime_new_uint4_uint4(payload, src_image, fwd_ref_image, bwd_ref_image, vme_accelerator, mode);
68}
69static INLINE OVERLOADABLE uint4 intel_vme_send_ime_new_uint4(uint8 payload, long src_image, long fwd_ref_image, long bwd_ref_image, long vme_accelerator, enum STREAM_MODE mode)
70{
71    return __builtin_IB_vme_send_ime_new_uint4_uint8(payload, src_image, fwd_ref_image, bwd_ref_image, vme_accelerator, mode);
72}
73static INLINE OVERLOADABLE uint8 intel_vme_send_ime_new_uint8(uint4 payload, long src_image, long fwd_ref_image, long bwd_ref_image, long vme_accelerator, enum STREAM_MODE mode)
74{
75    return __builtin_IB_vme_send_ime_new_uint8_uint4(payload, src_image, fwd_ref_image, bwd_ref_image, vme_accelerator, mode);
76}
77static INLINE OVERLOADABLE uint8 intel_vme_send_ime_new_uint8(uint8 payload, long src_image, long fwd_ref_image, long bwd_ref_image, long vme_accelerator, enum STREAM_MODE mode)
78{
79    return __builtin_IB_vme_send_ime_new_uint8_uint8(payload, src_image, fwd_ref_image, bwd_ref_image, vme_accelerator, mode);
80}
81
82static INLINE OVERLOADABLE uint intel_get_message_phase_dw(uint messagePhases, uint phaseIndex, uint dwIndex)
83{
84    return __builtin_IB_get_message_phase_dw(messagePhases, phaseIndex, dwIndex);
85}
86static INLINE OVERLOADABLE uint intel_get_message_phase_dw(uint2 messagePhases, uint phaseIndex, uint dwIndex)
87{
88    return __builtin_IB_get_message_phase_dw_uint2(messagePhases, phaseIndex, dwIndex);
89}
90static INLINE OVERLOADABLE uint intel_get_message_phase_dw(uint4 messagePhases, uint phaseIndex, uint dwIndex)
91{
92    return __builtin_IB_get_message_phase_dw_uint4(messagePhases, phaseIndex, dwIndex);
93}
94static INLINE OVERLOADABLE uint intel_get_message_phase_dw(uint8 messagePhases, uint phaseIndex, uint dwIndex)
95{
96    return __builtin_IB_get_message_phase_dw_uint8(messagePhases, phaseIndex, dwIndex);
97}
98
99static INLINE OVERLOADABLE ulong intel_get_message_phase_uq(uint messagePhases, uint phaseIndex, uint dwIndex)
100{
101    return __builtin_IB_get_message_phase_uq(messagePhases, phaseIndex, dwIndex);
102}
103static INLINE OVERLOADABLE ulong intel_get_message_phase_uq(uint2 messagePhases, uint phaseIndex, uint dwIndex)
104{
105    return __builtin_IB_get_message_phase_uq_uint2(messagePhases, phaseIndex, dwIndex);
106}
107static INLINE OVERLOADABLE ulong intel_get_message_phase_uq(uint4 messagePhases, uint phaseIndex, uint dwIndex)
108{
109    return __builtin_IB_get_message_phase_uq_uint4(messagePhases, phaseIndex, dwIndex);
110}
111static INLINE OVERLOADABLE ulong intel_get_message_phase_uq(uint8 messagePhases, uint phaseIndex, uint dwIndex)
112{
113    return __builtin_IB_get_message_phase_uq_uint8(messagePhases, phaseIndex, dwIndex);
114}
115
116static INLINE OVERLOADABLE uint intel_set_message_phase_dw(uint messagePhases, uint phaseIndex, uint dwIndex, uint val)
117{
118    return __builtin_IB_set_message_phase_dw(messagePhases, phaseIndex, dwIndex, val);
119}
120static INLINE OVERLOADABLE uint2 intel_set_message_phase_dw(uint2 messagePhases, uint phaseIndex, uint dwIndex, uint val)
121{
122    return __builtin_IB_set_message_phase_dw_uint2(messagePhases, phaseIndex, dwIndex, val);
123}
124static INLINE OVERLOADABLE uint4 intel_set_message_phase_dw(uint4 messagePhases, uint phaseIndex, uint dwIndex, uint val)
125{
126    return __builtin_IB_set_message_phase_dw_uint4(messagePhases, phaseIndex, dwIndex, val);
127}
128static INLINE OVERLOADABLE uint8 intel_set_message_phase_dw(uint8 messagePhases, uint phaseIndex, uint dwIndex, uint val)
129{
130    return __builtin_IB_set_message_phase_dw_uint8(messagePhases, phaseIndex, dwIndex, val);
131}
132
133static INLINE OVERLOADABLE uint intel_get_message_phase(uint messagePhases, uint phaseIndex)
134{
135    return __builtin_IB_get_message_phase(messagePhases, phaseIndex);
136}
137static INLINE OVERLOADABLE uint intel_get_message_phase(uint2 messagePhases, uint phaseIndex)
138{
139    return __builtin_IB_get_message_phase_uint2(messagePhases, phaseIndex);
140}
141static INLINE OVERLOADABLE uint intel_get_message_phase(uint4 messagePhases, uint phaseIndex)
142{
143    return __builtin_IB_get_message_phase_uint4(messagePhases, phaseIndex);
144}
145static INLINE OVERLOADABLE uint intel_get_message_phase(uint8 messagePhases, uint phaseIndex)
146{
147    return __builtin_IB_get_message_phase_uint8(messagePhases, phaseIndex);
148}
149
150static INLINE OVERLOADABLE uint intel_set_message_phase(uint messagePhases, uint phaseIndex, uint val)
151{
152    return __builtin_IB_set_message_phase(messagePhases, phaseIndex, val);
153}
154static INLINE OVERLOADABLE uint2 intel_set_message_phase(uint2 messagePhases, uint phaseIndex, uint val)
155{
156    return __builtin_IB_set_message_phase_uint2(messagePhases, phaseIndex, val);
157}
158static INLINE OVERLOADABLE uint4 intel_set_message_phase(uint4 messagePhases, uint phaseIndex, uint val)
159{
160    return __builtin_IB_set_message_phase_uint4(messagePhases, phaseIndex, val);
161}
162static INLINE OVERLOADABLE uint8 intel_set_message_phase(uint8 messagePhases, uint phaseIndex, uint val)
163{
164    return __builtin_IB_set_message_phase_uint8(messagePhases, phaseIndex, val);
165}
166
167static INLINE OVERLOADABLE ushort intel_get_message_phase_uw(uint messagePhases, uint phaseIndex, uint wIndex)
168{
169    return __builtin_IB_get_message_phase_uw(messagePhases, phaseIndex, wIndex);
170}
171static INLINE OVERLOADABLE ushort intel_get_message_phase_uw(uint2 messagePhases, uint phaseIndex, uint wIndex)
172{
173    return __builtin_IB_get_message_phase_uw_uint2(messagePhases, phaseIndex, wIndex);
174}
175static INLINE OVERLOADABLE ushort intel_get_message_phase_uw(uint4 messagePhases, uint phaseIndex, uint wIndex)
176{
177    return __builtin_IB_get_message_phase_uw_uint4(messagePhases, phaseIndex, wIndex);
178}
179static INLINE OVERLOADABLE ushort intel_get_message_phase_uw(uint8 messagePhases, uint phaseIndex, uint wIndex)
180{
181    return __builtin_IB_get_message_phase_uw_uint8(messagePhases, phaseIndex, wIndex);
182}
183
184static INLINE OVERLOADABLE uint intel_set_message_phase_uw(uint messagePhases, uint phaseIndex, uint dwIndex, ushort val)
185{
186    return __builtin_IB_set_message_phase_uw(messagePhases, phaseIndex, dwIndex, val);
187}
188static INLINE OVERLOADABLE uint2 intel_set_message_phase_uw(uint2 messagePhases, uint phaseIndex, uint dwIndex, ushort val)
189{
190    return __builtin_IB_set_message_phase_uw_uint2(messagePhases, phaseIndex, dwIndex, val);
191}
192static INLINE OVERLOADABLE uint4 intel_set_message_phase_uw(uint4 messagePhases, uint phaseIndex, uint dwIndex, ushort val)
193{
194    return __builtin_IB_set_message_phase_uw_uint4(messagePhases, phaseIndex, dwIndex, val);
195}
196static INLINE OVERLOADABLE uint8 intel_set_message_phase_uw(uint8 messagePhases, uint phaseIndex, uint dwIndex, ushort val)
197{
198    return __builtin_IB_set_message_phase_uw_uint8(messagePhases, phaseIndex, dwIndex, val);
199}
200
201static INLINE OVERLOADABLE uchar intel_get_message_phase_ub(uint messagePhases, uint phaseIndex, uint dwIndex)
202{
203    return __builtin_IB_get_message_phase_ub(messagePhases, phaseIndex, dwIndex);
204}
205static INLINE OVERLOADABLE uchar intel_get_message_phase_ub(uint2 messagePhases, uint phaseIndex, uint dwIndex)
206{
207    return __builtin_IB_get_message_phase_ub_uint2(messagePhases, phaseIndex, dwIndex);
208}
209static INLINE OVERLOADABLE uchar intel_get_message_phase_ub(uint4 messagePhases, uint phaseIndex, uint dwIndex)
210{
211    return __builtin_IB_get_message_phase_ub_uint4(messagePhases, phaseIndex, dwIndex);
212}
213static INLINE OVERLOADABLE uchar intel_get_message_phase_ub(uint8 messagePhases, uint phaseIndex, uint dwIndex)
214{
215    return __builtin_IB_get_message_phase_ub_uint8(messagePhases, phaseIndex, dwIndex);
216}
217
218static INLINE OVERLOADABLE uint intel_set_message_phase_ub(uint messagePhases, uint phaseIndex, uint dwIndex, uchar val)
219{
220    return __builtin_IB_set_message_phase_ub(messagePhases, phaseIndex, dwIndex, val);
221}
222static INLINE OVERLOADABLE uint2 intel_set_message_phase_ub(uint2 messagePhases, uint phaseIndex, uint dwIndex, uchar val)
223{
224    return __builtin_IB_set_message_phase_ub_uint2(messagePhases, phaseIndex, dwIndex, val);
225}
226static INLINE OVERLOADABLE uint4 intel_set_message_phase_ub(uint4 messagePhases, uint phaseIndex, uint dwIndex, uchar val)
227{
228    return __builtin_IB_set_message_phase_ub_uint4(messagePhases, phaseIndex, dwIndex, val);
229}
230static INLINE OVERLOADABLE uint8 intel_set_message_phase_ub(uint8 messagePhases, uint phaseIndex, uint dwIndex, uchar val)
231{
232    return __builtin_IB_set_message_phase_ub_uint8(messagePhases, phaseIndex, dwIndex, val);
233}
234
235static INLINE OVERLOADABLE ulong intel_simd_get_message_phase_uq(uint payload, uint phaseIndex, uint numPhases)
236{
237    return __builtin_IB_simd_get_message_phase_uq(payload, phaseIndex, numPhases);
238}
239static INLINE OVERLOADABLE ulong intel_simd_get_message_phase_uq(uint2 payload, uint phaseIndex, uint numPhases)
240{
241    return __builtin_IB_simd_get_message_phase_uq_uint2(payload, phaseIndex, numPhases);
242}
243static INLINE OVERLOADABLE ulong intel_simd_get_message_phase_uq(uint4 payload, uint phaseIndex, uint numPhases)
244{
245    return __builtin_IB_simd_get_message_phase_uq_uint4(payload, phaseIndex, numPhases);
246}
247static INLINE OVERLOADABLE ulong intel_simd_get_message_phase_uq(uint8 payload, uint phaseIndex, uint numPhases)
248{
249    return __builtin_IB_simd_get_message_phase_uq_uint8(payload, phaseIndex, numPhases);
250}
251
252static INLINE OVERLOADABLE ushort intel_simd_get_message_phase_uw(uint payload, uint phaseIndex, uint numPhases)
253{
254    return __builtin_IB_simd_get_message_phase_uw(payload, phaseIndex, numPhases);
255}
256static INLINE OVERLOADABLE ushort intel_simd_get_message_phase_uw(uint2 payload, uint phaseIndex, uint numPhases)
257{
258    return __builtin_IB_simd_get_message_phase_uw_uint2(payload, phaseIndex, numPhases);
259}
260static INLINE OVERLOADABLE ushort intel_simd_get_message_phase_uw(uint4 payload, uint phaseIndex, uint numPhases)
261{
262    return __builtin_IB_simd_get_message_phase_uw_uint4(payload, phaseIndex, numPhases);
263}
264static INLINE OVERLOADABLE ushort intel_simd_get_message_phase_uw(uint8 payload, uint phaseIndex, uint numPhases)
265{
266    return __builtin_IB_simd_get_message_phase_uw_uint8(payload, phaseIndex, numPhases);
267}
268
269static INLINE OVERLOADABLE ushort intel_broadcast_message_phase_uw(uint payload, uint phaseIndex, uint phaseSubindex, uint width)
270{
271    return __builtin_IB_broadcast_message_phase_uw(payload, phaseIndex, phaseSubindex, width);
272}
273static INLINE OVERLOADABLE ushort intel_broadcast_message_phase_uw(uint2 payload, uint phaseIndex, uint phaseSubindex, uint width)
274{
275    return __builtin_IB_broadcast_message_phase_uw_uint2(payload, phaseIndex, phaseSubindex, width);
276}
277static INLINE OVERLOADABLE ushort intel_broadcast_message_phase_uw(uint4 payload, uint phaseIndex, uint phaseSubindex, uint width)
278{
279    return __builtin_IB_broadcast_message_phase_uw_uint4(payload, phaseIndex, phaseSubindex, width);
280}
281static INLINE OVERLOADABLE ushort intel_broadcast_message_phase_uw(uint8 payload, uint phaseIndex, uint phaseSubindex, uint width)
282{
283    return __builtin_IB_broadcast_message_phase_uw_uint8(payload, phaseIndex, phaseSubindex, width);
284}
285
286static INLINE OVERLOADABLE ulong intel_broadcast_message_phase_uq(uint payload, uint phaseIndex, uint phaseSubindex, uint width)
287{
288    return __builtin_IB_broadcast_message_phase_uq(payload, phaseIndex, phaseSubindex, width);
289}
290static INLINE OVERLOADABLE ulong intel_broadcast_message_phase_uq(uint2 payload, uint phaseIndex, uint phaseSubindex, uint width)
291{
292    return __builtin_IB_broadcast_message_phase_uq_uint2(payload, phaseIndex, phaseSubindex, width);
293}
294static INLINE OVERLOADABLE ulong intel_broadcast_message_phase_uq(uint4 payload, uint phaseIndex, uint phaseSubindex, uint width)
295{
296    return __builtin_IB_broadcast_message_phase_uq_uint4(payload, phaseIndex, phaseSubindex, width);
297}
298static INLINE OVERLOADABLE ulong intel_broadcast_message_phase_uq(uint8 payload, uint phaseIndex, uint phaseSubindex, uint width)
299{
300    return __builtin_IB_broadcast_message_phase_uq_uint8(payload, phaseIndex, phaseSubindex, width);
301}
302
303static INLINE OVERLOADABLE uchar intel_broadcast_message_phase_ub(uint payload, uint phaseIndex, uint phaseSubindex, uint width)
304{
305    return __builtin_IB_broadcast_message_phase_ub(payload, phaseIndex, phaseSubindex, width);
306}
307static INLINE OVERLOADABLE uchar intel_broadcast_message_phase_ub(uint2 payload, uint phaseIndex, uint phaseSubindex, uint width)
308{
309    return __builtin_IB_broadcast_message_phase_ub_uint2(payload, phaseIndex, phaseSubindex, width);
310}
311static INLINE OVERLOADABLE uchar intel_broadcast_message_phase_ub(uint4 payload, uint phaseIndex, uint phaseSubindex, uint width)
312{
313    return __builtin_IB_broadcast_message_phase_ub_uint4(payload, phaseIndex, phaseSubindex, width);
314}
315static INLINE OVERLOADABLE uchar intel_broadcast_message_phase_ub(uint8 payload, uint phaseIndex, uint phaseSubindex, uint width)
316{
317    return __builtin_IB_broadcast_message_phase_ub_uint8(payload, phaseIndex, phaseSubindex, width);
318}
319
320static INLINE OVERLOADABLE uint intel_broadcast_message_phase_dw(uint payload, uint phaseIndex, uint phaseSubindex, uint width)
321{
322    return __builtin_IB_broadcast_message_phase_dw(payload, phaseIndex, phaseSubindex, width);
323}
324static INLINE OVERLOADABLE uint intel_broadcast_message_phase_dw(uint2 payload, uint phaseIndex, uint phaseSubindex, uint width)
325{
326    return __builtin_IB_broadcast_message_phase_dw_uint2(payload, phaseIndex, phaseSubindex, width);
327}
328static INLINE OVERLOADABLE uint intel_broadcast_message_phase_dw(uint4 payload, uint phaseIndex, uint phaseSubindex, uint width)
329{
330    return __builtin_IB_broadcast_message_phase_dw_uint4(payload, phaseIndex, phaseSubindex, width);
331}
332static INLINE OVERLOADABLE uint intel_broadcast_message_phase_dw(uint8 payload, uint phaseIndex, uint phaseSubindex, uint width)
333{
334    return __builtin_IB_broadcast_message_phase_dw_uint8(payload, phaseIndex, phaseSubindex, width);
335}
336
337static INLINE OVERLOADABLE uint intel_simd_set_message_phase_uq(uint messagePhases, uint phaseIndex, uint numPhases, uint subReg, uint numLanes, ulong val)
338{
339    return __builtin_IB_simd_set_message_phase_uq(messagePhases, phaseIndex, numPhases, subReg, numLanes, val);
340}
341static INLINE OVERLOADABLE uint2 intel_simd_set_message_phase_uq(uint2 messagePhases, uint phaseIndex, uint numPhases, uint subReg, uint numLanes, ulong val)
342{
343    return __builtin_IB_simd_set_message_phase_uq_uint2(messagePhases, phaseIndex, numPhases, subReg, numLanes, val);
344}
345static INLINE OVERLOADABLE uint4 intel_simd_set_message_phase_uq(uint4 messagePhases, uint phaseIndex, uint numPhases, uint subReg, uint numLanes, ulong val)
346{
347    return __builtin_IB_simd_set_message_phase_uq_uint4(messagePhases, phaseIndex, numPhases, subReg, numLanes, val);
348}
349static INLINE OVERLOADABLE uint8 intel_simd_set_message_phase_uq(uint8 messagePhases, uint phaseIndex, uint numPhases, uint subReg, uint numLanes, ulong val)
350{
351    return __builtin_IB_simd_set_message_phase_uq_uint8(messagePhases, phaseIndex, numPhases, subReg, numLanes, val);
352}
353
354static INLINE OVERLOADABLE uint intel_simd_set_message_phase_dw(uint messagePhases, uint phaseIndex, uint numPhases, uint subReg, uint numLanes, ulong val)
355{
356    return __builtin_IB_simd_set_message_phase_dw(messagePhases, phaseIndex, numPhases, subReg, numLanes, val);
357}
358static INLINE OVERLOADABLE uint2 intel_simd_set_message_phase_dw(uint2 messagePhases, uint phaseIndex, uint numPhases, uint subReg, uint numLanes, ulong val)
359{
360    return __builtin_IB_simd_set_message_phase_dw_uint2(messagePhases, phaseIndex, numPhases, subReg, numLanes, val);
361}
362static INLINE OVERLOADABLE uint4 intel_simd_set_message_phase_dw(uint4 messagePhases, uint phaseIndex, uint numPhases, uint subReg, uint numLanes, ulong val)
363{
364    return __builtin_IB_simd_set_message_phase_dw_uint4(messagePhases, phaseIndex, numPhases, subReg, numLanes, val);
365}
366static INLINE OVERLOADABLE uint8 intel_simd_set_message_phase_dw(uint8 messagePhases, uint phaseIndex, uint numPhases, uint subReg, uint numLanes, ulong val)
367{
368    return __builtin_IB_simd_set_message_phase_dw_uint8(messagePhases, phaseIndex, numPhases, subReg, numLanes, val);
369}
370
371static INLINE OVERLOADABLE uint intel_simd_set_message_phase_ub(uint messagePhases, uint phaseIndex, uint numPhases, uint subReg, uint numLanes, ulong val)
372{
373    return __builtin_IB_simd_set_message_phase_ub(messagePhases, phaseIndex, numPhases, subReg, numLanes, val);
374}
375static INLINE OVERLOADABLE uint2 intel_simd_set_message_phase_ub(uint2 messagePhases, uint phaseIndex, uint numPhases, uint subReg, uint numLanes, ulong val)
376{
377    return __builtin_IB_simd_set_message_phase_ub_uint2(messagePhases, phaseIndex, numPhases, subReg, numLanes, val);
378}
379static INLINE OVERLOADABLE uint4 intel_simd_set_message_phase_ub(uint4 messagePhases, uint phaseIndex, uint numPhases, uint subReg, uint numLanes, ulong val)
380{
381    return __builtin_IB_simd_set_message_phase_ub_uint4(messagePhases, phaseIndex, numPhases, subReg, numLanes, val);
382}
383static INLINE OVERLOADABLE uint8 intel_simd_set_message_phase_ub(uint8 messagePhases, uint phaseIndex, uint numPhases, uint subReg, uint numLanes, ulong val)
384{
385    return __builtin_IB_simd_set_message_phase_ub_uint8(messagePhases, phaseIndex, numPhases, subReg, numLanes, val);
386}
387
388static INLINE OVERLOADABLE uint intel_simd_set_message_phase_uw(uint messagePhases, uint phaseIndex, uint numPhases, uint subReg, uint numLanes, ulong val)
389{
390    return __builtin_IB_simd_set_message_phase_uw(messagePhases, phaseIndex, numPhases, subReg, numLanes, val);
391}
392static INLINE OVERLOADABLE uint2 intel_simd_set_message_phase_uw(uint2 messagePhases, uint phaseIndex, uint numPhases, uint subReg, uint numLanes, ulong val)
393{
394    return __builtin_IB_simd_set_message_phase_uw_uint2(messagePhases, phaseIndex, numPhases, subReg, numLanes, val);
395}
396static INLINE OVERLOADABLE uint4 intel_simd_set_message_phase_uw(uint4 messagePhases, uint phaseIndex, uint numPhases, uint subReg, uint numLanes, ulong val)
397{
398    return __builtin_IB_simd_set_message_phase_uw_uint4(messagePhases, phaseIndex, numPhases, subReg, numLanes, val);
399}
400static INLINE OVERLOADABLE uint8 intel_simd_set_message_phase_uw(uint8 messagePhases, uint phaseIndex, uint numPhases, uint subReg, uint numLanes, ulong val)
401{
402    return __builtin_IB_simd_set_message_phase_uw_uint8(messagePhases, phaseIndex, numPhases, subReg, numLanes, val);
403}
404
405static INLINE uint
406__calc_dual_ref_id_dword(
407    long src_image,
408    long fwd_ref_image,
409    long bwd_ref_image)
410{
411    uint srcBTI = __builtin_IB_get_image_bti(src_image);
412    uint fwdBTI = __builtin_IB_get_image_bti(fwd_ref_image);
413    uint bwdBTI = __builtin_IB_get_image_bti(bwd_ref_image);
414
415    uint imm0 = (fwdBTI - srcBTI - 1) >> 1;
416    uint imm1 = (bwdBTI - srcBTI - 1) >> 1;
417
418    uint imm = (imm0 << 0) | (imm1 << 4);
419    imm |= ( imm << 8 );
420    imm |= ( imm << 16 );
421
422    return imm;
423}
424
425static INLINE uint
426__calc_single_ref_id_dword(
427    long src_image,
428    long fwd_ref_image)
429{
430    uint srcBTI = __builtin_IB_get_image_bti(src_image);
431    uint fwdBTI = __builtin_IB_get_image_bti(fwd_ref_image);
432
433    uint imm = (fwdBTI - srcBTI - 1) >> 1;
434
435    imm |= ( imm << 4 );
436    imm |= ( imm << 8 );
437    imm |= ( imm << 16 );
438
439    return imm;
440}
441
442static INLINE uint4 OVERLOADABLE
443intel_sub_group_payload_set_single_ref_id(
444    long src_image,
445    long fwd_ref_image,
446    uint4 payload )
447{
448    uint imm = __calc_single_ref_id_dword(src_image, fwd_ref_image);
449    return intel_set_message_phase_dw(payload, 1, 6, imm);
450}
451
452static INLINE uint8 OVERLOADABLE
453intel_sub_group_payload_set_single_ref_id(
454    long src_image,
455    long fwd_ref_image,
456    uint8 payload )
457{
458    uint imm = __calc_single_ref_id_dword(src_image, fwd_ref_image);
459    return intel_set_message_phase_dw(payload, 1, 6, imm);
460}
461
462static INLINE uint4 OVERLOADABLE
463intel_sub_group_payload_set_dual_ref_id(
464    long src_image,
465    long fwd_ref_image,
466    long bwd_ref_image,
467    uint4 payload )
468{
469    uint imm = __calc_dual_ref_id_dword(src_image, fwd_ref_image, bwd_ref_image);
470    return intel_set_message_phase_dw(payload, 1, 6, imm);
471}
472
473static INLINE uint8 OVERLOADABLE
474intel_sub_group_payload_set_dual_ref_id(
475    long src_image,
476    long fwd_ref_image,
477    long bwd_ref_image,
478    uint8 payload )
479{
480    uint imm = __calc_dual_ref_id_dword(src_image, fwd_ref_image, bwd_ref_image);
481    return intel_set_message_phase_dw(payload, 1, 6, imm);
482}
483
484/*****************************************************************************\
485
486Description:
487    Set ref field polarities.
488    - set RefAccess (M0.3 :7)
489    - set RefIdpolarity (M1.1 :15:8)
490
491\*****************************************************************************/
492static INLINE uint4 OVERLOADABLE
493intel_sub_group_payload_set_ref_id_polarities_raw(
494    uchar ref_field_polarity,
495    uint4 payload)
496{
497    uint val = intel_get_message_phase_dw(payload, 0, 3);
498    val |= (0x1 << 7);
499    payload = intel_set_message_phase_dw(payload, 0, 3, val);
500
501    payload = intel_set_message_phase_ub(payload, 1, 1*4+1, ref_field_polarity);
502
503    return payload;
504}
505
506/*****************************************************************************\
507
508Description:
509    - set ref ids (M1.6)
510
511\*****************************************************************************/
512static INLINE uint4 OVERLOADABLE
513intel_sub_group_payload_set_ref_id_raw(
514    uint packed_ref_ids,
515    uint4 payload)
516{
517    return intel_set_message_phase_dw(payload, 1, 6, packed_ref_ids);
518}
519
520
521typedef ulong3 VMEImage_t;
522
523INLINE ulong getVMEImage( VMEImage_t VMEImage )
524{
525    return VMEImage.x;
526}
527INLINE ulong getVMEImageType( VMEImage_t VMEImage )
528{
529    return VMEImage.y;
530}
531INLINE ulong getVMESampler( VMEImage_t VMEImage )
532{
533    return VMEImage.z;
534}
535
536VMEImage_t __builtin_spirv_OpVmeImageINTEL_i64_i64_i64(ulong image, ulong imageType, ulong sampler)
537{
538  VMEImage_t vme_image = { image, imageType, sampler };
539  return vme_image;
540}
541
542
543// end helper overloads
544
545/////////////////////// User visible functions ///////////////////////
546
547uint2 __builtin_spirv_intel_sub_group_avc_mce_get_default_high_penalty_cost_table( )
548{
549    uint2 costTable;
550    costTable.s0 = 0x4E483D1D;
551    costTable.s1 = 0x5C5B5958;
552    return costTable;
553}
554
555INLINE uint2  OVERLOADABLE
556intel_sub_group_avc_mce_get_default_high_penalty_cost_table(void)
557{
558    return __builtin_spirv_intel_sub_group_avc_mce_get_default_high_penalty_cost_table();
559}
560
561uint2 __builtin_spirv_intel_sub_group_avc_mce_get_default_medium_penalty_cost_table( )
562{
563   uint2 costTable;
564   costTable.s0 = 0x2B1D1A05;
565   costTable.s1 = 0x39392F2D;
566   return costTable;
567}
568
569INLINE uint2  OVERLOADABLE
570intel_sub_group_avc_mce_get_default_medium_penalty_cost_table(void)
571{
572    return __builtin_spirv_intel_sub_group_avc_mce_get_default_medium_penalty_cost_table();
573}
574
575uint2 __builtin_spirv_intel_sub_group_avc_mce_get_default_low_penalty_cost_table( )
576{
577   uint2 costTable;
578   costTable.s0 = 0x09050401;
579   costTable.s1 = 0x0F0E0C0A;
580   return costTable;
581}
582
583INLINE uint2  OVERLOADABLE
584intel_sub_group_avc_mce_get_default_low_penalty_cost_table(void)
585{
586    return __builtin_spirv_intel_sub_group_avc_mce_get_default_low_penalty_cost_table();
587}
588
589/*****************************************************************************\
590
591Description:
592    Setup motion vector costing for the search or check.
593
594    ExtendedCostRange has already been shifted by 6 bits.
595
596    - initialize M0 to M2 from payload src
597    - set Cost Center (M3.0-M3.7)
598    - set Cost Table (M2.3, M2.4)
599    - set MV Cost Scale (M1.7 17:16)
600    - set NonSkipZMVAdded (M1.7 :5)
601
602\*****************************************************************************/
603intel_sub_group_avc_mce_payload_t __builtin_spirv_intel_sub_group_avc_mce_set_motion_vector_cost_function_i64_v2i32_i8_intel_sub_group_avc_mce_payload_t(
604    ulong packed_cost_center_delta,
605    uint2 packed_cost_table,
606    uchar cost_precision,
607    intel_sub_group_avc_mce_payload_t payload )
608{
609  uint4 handle = __builtin_IB_vme_helper_get_handle_avc_mce_payload_t(payload);
610
611  // Set Cost Center Delta (M3.0-M3.7)
612  const uint FWDCostCenter = (uint)packed_cost_center_delta;
613  const uint BWDCostCenter = (uint)(packed_cost_center_delta >> 32);
614
615  for (int comp = 0; comp <= 7; comp += 2) {
616    handle = intel_set_message_phase_dw(handle, 3, comp,   FWDCostCenter);
617    handle = intel_set_message_phase_dw(handle, 3, comp+1, BWDCostCenter);
618  }
619
620  // Set Cost Table (M2.3, M2.4)
621  handle = intel_set_message_phase_dw(handle, 2, 3, packed_cost_table.x);
622  handle = intel_set_message_phase_dw(handle, 2, 4, packed_cost_table.y);
623
624  // Set MV Cost Scale M1.7[17:16]
625  ushort MVCostScaleFactor = intel_get_message_phase_uw(handle, 1, 7 * 2 + 1) | cost_precision;
626  handle = intel_set_message_phase_uw(handle, 1, 7 * 2 + 1, MVCostScaleFactor);
627
628  // Set NonSkipZMVAdded M1.7[5:5].
629  const uint NonSkipZMvAdded = intel_get_message_phase_dw(handle, 1, 7) | 0x20;
630  handle = intel_set_message_phase_dw(handle, 1, 7, NonSkipZMvAdded);
631
632  intel_sub_group_avc_mce_payload_t result = __builtin_IB_vme_helper_get_as_avc_mce_payload_t( handle );
633  return result;
634}
635
636INLINE intel_sub_group_avc_mce_payload_t OVERLOADABLE
637intel_sub_group_avc_mce_set_motion_vector_cost_function(
638      ulong packed_cost_center_delta,
639      uint2 packed_cost_table,
640      uchar cost_precision,
641      intel_sub_group_avc_mce_payload_t payload )
642{
643    return __builtin_spirv_intel_sub_group_avc_mce_set_motion_vector_cost_function_i64_v2i32_i8_intel_sub_group_avc_mce_payload_t(packed_cost_center_delta, packed_cost_table, cost_precision, payload);
644}
645
646uint2 __builtin_spirv_intel_sub_group_avc_mce_get_default_inter_motion_vector_cost_table_i8_i8(
647    uchar slice_type,
648    uchar qp )
649{
650    uint2 penalty = 0;
651    if( slice_type ==  CLK_AVC_ME_SLICE_TYPE_PRED_INTEL )
652    {
653        uint2 penalty_table[52] =
654        {
655            { 0x09050401, 0x0f0e0c0a }, { 0x09050401, 0x0f0e0c0a }, { 0x09050401, 0x0f0e0c0a }, { 0x09050401, 0x0f0e0c0a },
656            { 0x09050401, 0x0f0e0c0a }, { 0x09050401, 0x0f0e0c0a }, { 0x09050401, 0x0f0e0c0a }, { 0x09050401, 0x0f0e0c0a },
657            { 0x09050401, 0x0f0e0c0a }, { 0x09050401, 0x0f0e0c0a }, { 0x09050401, 0x0f0e0c0a }, { 0x09050401, 0x0f0e0c0a },
658            { 0x09050401, 0x0f0e0c0a }, { 0x09050401, 0x0f0e0c0a }, { 0x09050401, 0x0f0e0c0a }, { 0x09050401, 0x0f0e0c0a },
659            { 0x190a0802, 0x1f1e1c1a }, { 0x190a0802, 0x1f1e1c1a }, { 0x190a0802, 0x1f1e1c1a }, { 0x190a0802, 0x1f1e1c1a },
660            { 0x1e0f0c03, 0x2b2b291f }, { 0x1e0f0c03, 0x2b2b291f }, { 0x1e0f0c03, 0x2b2b291f }, { 0x291a1804, 0x2f2e2c2a },
661            { 0x291a1804, 0x2f2e2c2a }, { 0x291a1804, 0x2f2e2c2a }, { 0x2b1d1a05, 0x39392f2d }, { 0x2e1f1c06, 0x3b3b392f },
662            { 0x2e1f1c06, 0x3b3b392f }, { 0x38291e07, 0x3d3c3b39 }, { 0x392a2808, 0x3f3e3c3a }, { 0x3a2b2909, 0x48483e3b },
663            { 0x3b2d2a0a, 0x49493f3d }, { 0x3c2e2b0b, 0x4a4a483e }, { 0x3f382d0d, 0x4c4b4a48 }, { 0x48392e0e, 0x4d4c4b49 },
664            { 0x493a3818, 0x4f4e4c4a }, { 0x4a3b3919, 0x58584e4b }, { 0x4b3d3a1a, 0x59594f4d }, { 0x4d3e3c1c, 0x5b5a594e },
665            { 0x4e483d1d, 0x5c5b5958 }, { 0x58493f1f, 0x5e5d5b59 }, { 0x594a4828, 0x5f5e5c5a }, { 0x5a4b4929, 0x68685e5b },
666            { 0x5b4d4a2a, 0x69695f5d }, { 0x5d4e4b2b, 0x6b6a685e }, { 0x5e584d2d, 0x6c6b6a68 }, { 0x68594e2e, 0x6d6c6b69 },
667            { 0x695a5838, 0x6f6e6c6a }, { 0x6a5b5939, 0x6f6f6e6b }, { 0x6b5d5a3a, 0x6f6f6f6d }, { 0x6d5e5b3b, 0x6f6f6f6e }
668        };
669        penalty = penalty_table[qp];
670    }
671    else if( slice_type ==  CLK_AVC_ME_SLICE_TYPE_BPRED_INTEL )
672    {
673        uint2 penalty_table[52] =
674        {
675            { 0x06020200, 0x180e0c0a }, { 0x06020200, 0x180e0c0a }, { 0x06020200, 0x180e0c0a }, { 0x06020200, 0x180e0c0a },
676            { 0x06020200, 0x180e0c0a }, { 0x06020200, 0x180e0c0a }, { 0x06020200, 0x180e0c0a }, { 0x06020200, 0x180e0c0a },
677            { 0x06020200, 0x180e0c0a }, { 0x06020200, 0x180e0c0a }, { 0x06020200, 0x180e0c0a }, { 0x06020200, 0x180e0c0a },
678            { 0x06020200, 0x180e0c0a }, { 0x06020200, 0x180e0c0a }, { 0x06020200, 0x180e0c0a }, { 0x06020200, 0x180e0c0a },
679            { 0x0c040400, 0x281e1c1a }, { 0x0c040400, 0x281e1c1a }, { 0x0c040400, 0x281e1c1a }, { 0x0c040400, 0x281e1c1a },
680            { 0x19060600, 0x2c2b291f }, { 0x19060600, 0x2c2b291f }, { 0x19060600, 0x2c2b291f }, { 0x1c080800, 0x382e2c2a },
681            { 0x1c080800, 0x382e2c2a }, { 0x1c080800, 0x382e2c2a }, { 0x1f0a0a00, 0x3a392f2d }, { 0x290c0c00, 0x3c3b392f },
682            { 0x290c0c00, 0x3c3b392f }, { 0x2b0e0e00, 0x3e3c3b39 }, { 0x2c181800, 0x483e3c3a }, { 0x2e191900, 0x49483e3b },
683            { 0x2f1a1a00, 0x4a493f3d }, { 0x381b1b00, 0x4b4a483e }, { 0x3a1d1d00, 0x4d4b4a48 }, { 0x3b1e1e00, 0x4e4c4b49 },
684            { 0x3c282800, 0x584e4c4a }, { 0x3e292900, 0x59584e4b }, { 0x3f2a2a00, 0x5a594f4d }, { 0x492c2c00, 0x5c5a594e },
685            { 0x492d2d00, 0x5d5b5958 }, { 0x4b2f2f00, 0x5f5d5b59 }, { 0x4c383800, 0x685e5c5a }, { 0x4e393900, 0x69685e5b },
686            { 0x4f3a3a00, 0x6a695f5d }, { 0x583b3b00, 0x6b6a685e }, { 0x5a3d3d00, 0x6d6b6a68 }, { 0x5b3e3e00, 0x6e6c6b69 },
687            { 0x5c484800, 0x6f6e6c6a }, { 0x5e494900, 0x6f6f6e6b }, { 0x5f4a4a00, 0x6f6f6f6d }, { 0x694b4b00, 0x6f6f6f6e }
688        };
689        penalty = penalty_table[qp];
690    }
691    else
692    {
693        penalty = 0;
694    }
695
696    return penalty;
697}
698
699INLINE uint2 OVERLOADABLE
700intel_sub_group_avc_mce_get_default_inter_motion_vector_cost_table(
701    uchar slice_type,
702    uchar qp )
703{
704    return __builtin_spirv_intel_sub_group_avc_mce_get_default_inter_motion_vector_cost_table_i8_i8(slice_type, qp);
705}
706
707/*****************************************************************************\
708
709Description:
710    Enable AC only Haar transform.
711    - set AConlyHAAR M1.7[21:21]
712
713\*****************************************************************************/
714intel_sub_group_avc_mce_payload_t __builtin_spirv_intel_sub_group_avc_mce_set_ac_only_haar_intel_sub_group_avc_mce_payload_t(
715    intel_sub_group_avc_mce_payload_t payload )
716{
717    uint4 handle = __builtin_IB_vme_helper_get_handle_avc_mce_payload_t(payload);
718
719    const uint AConlyHAAR = intel_get_message_phase_dw(handle, 1, 7) | (1<<21);
720    handle = intel_set_message_phase_dw(handle, 1, 7, AConlyHAAR);
721
722    intel_sub_group_avc_mce_payload_t result = __builtin_IB_vme_helper_get_as_avc_mce_payload_t( handle );
723    return result;
724}
725
726INLINE intel_sub_group_avc_mce_payload_t OVERLOADABLE
727intel_sub_group_avc_mce_set_ac_only_haar(
728      intel_sub_group_avc_mce_payload_t payload )
729{
730    return __builtin_spirv_intel_sub_group_avc_mce_set_ac_only_haar_intel_sub_group_avc_mce_payload_t(payload);
731}
732
733/*****************************************************************************\
734
735Description:
736    Set field polarities.
737    - Set SrcAccess (M0.3 : 6)
738    - Set SrcFieldPolarity (M1.7 : 19)
739
740\*****************************************************************************/
741intel_sub_group_avc_mce_payload_t __builtin_spirv_intel_sub_group_avc_mce_set_source_interlaced_field_polarity_i8_intel_sub_group_avc_mce_payload_t(
742    uchar src_field_polarity,
743    intel_sub_group_avc_mce_payload_t payload )
744{
745    uint4 handle = __builtin_IB_vme_helper_get_handle_avc_mce_payload_t(payload);
746
747    uint val = intel_get_message_phase_dw(handle, 0, 3);
748    val |= (0x1 << 6);
749    handle = intel_set_message_phase_dw(handle, 0, 3, val);
750
751    val = intel_get_message_phase_dw(handle, 1, 7);
752    val &= ~(1 << 19);
753    val |= (src_field_polarity << 19);
754    handle = intel_set_message_phase_dw(handle, 1, 7, val);
755
756    intel_sub_group_avc_mce_payload_t npayload = __builtin_IB_vme_helper_get_as_avc_mce_payload_t( handle );
757    return npayload;
758}
759
760INLINE intel_sub_group_avc_mce_payload_t OVERLOADABLE
761intel_sub_group_avc_mce_set_source_interlaced_field_polarity(
762     uchar  src_field_polarity,
763     intel_sub_group_avc_mce_payload_t payload )
764{
765    return __builtin_spirv_intel_sub_group_avc_mce_set_source_interlaced_field_polarity_i8_intel_sub_group_avc_mce_payload_t(src_field_polarity, payload);
766}
767
768/*****************************************************************************\
769
770Description:
771    Set ref field polarities.
772    - set RefAccess (M0.3 :7)
773    - set RefIdpolarity (M1.1 :15:8)
774
775\*****************************************************************************/
776intel_sub_group_avc_mce_payload_t __builtin_spirv_intel_sub_group_avc_mce_set_single_reference_interlaced_field_polarity_i8_intel_sub_group_avc_mce_payload_t(
777    uchar ref_field_polarity,
778    intel_sub_group_avc_mce_payload_t payload )
779{
780    ref_field_polarity |= ( ref_field_polarity << 1 );
781    ref_field_polarity |= ( ref_field_polarity << 2 );
782
783    uint4 p = __builtin_IB_vme_helper_get_handle_avc_mce_payload_t(payload);
784    uint4 handle = intel_sub_group_payload_set_ref_id_polarities_raw(
785        ref_field_polarity,
786        p);
787
788    intel_sub_group_avc_mce_payload_t npayload = __builtin_IB_vme_helper_get_as_avc_mce_payload_t( handle );
789    return npayload;
790}
791
792INLINE intel_sub_group_avc_mce_payload_t OVERLOADABLE
793intel_sub_group_avc_mce_set_single_reference_interlaced_field_polarity(
794     uchar  ref_field_polarity,
795     intel_sub_group_avc_mce_payload_t payload )
796{
797    return __builtin_spirv_intel_sub_group_avc_mce_set_single_reference_interlaced_field_polarity_i8_intel_sub_group_avc_mce_payload_t(ref_field_polarity, payload);
798}
799
800intel_sub_group_avc_mce_payload_t __builtin_spirv_intel_sub_group_avc_mce_set_dual_reference_interlaced_field_polarities_i8_i8_intel_sub_group_avc_mce_payload_t(
801    uchar fwd_ref_field_polarity,
802    uchar bwd_ref_field_polarity,
803    intel_sub_group_avc_mce_payload_t payload )
804{
805    fwd_ref_field_polarity |= ( fwd_ref_field_polarity << 1 );
806    fwd_ref_field_polarity |= ( fwd_ref_field_polarity << 2 );
807    bwd_ref_field_polarity |= ( bwd_ref_field_polarity << 1 );
808    bwd_ref_field_polarity |= ( bwd_ref_field_polarity << 2 );
809
810    uchar  ref_field_polarity =
811        ( fwd_ref_field_polarity << 0 ) | ( bwd_ref_field_polarity << 4 );
812
813    uint4 p = __builtin_IB_vme_helper_get_handle_avc_mce_payload_t(payload);
814    uint4 handle = intel_sub_group_payload_set_ref_id_polarities_raw(
815        ref_field_polarity,
816        p);
817
818    intel_sub_group_avc_mce_payload_t npayload = __builtin_IB_vme_helper_get_as_avc_mce_payload_t( handle );
819    return npayload;
820}
821
822INLINE intel_sub_group_avc_mce_payload_t OVERLOADABLE
823intel_sub_group_avc_mce_set_dual_reference_interlaced_field_polarities(
824     uchar  fwd_ref_field_polarity,
825     uchar  bwd_ref_field_polarity,
826     intel_sub_group_avc_mce_payload_t payload )
827{
828    return __builtin_spirv_intel_sub_group_avc_mce_set_dual_reference_interlaced_field_polarities_i8_i8_intel_sub_group_avc_mce_payload_t(fwd_ref_field_polarity, bwd_ref_field_polarity, payload);
829}
830
831/*****************************************************************************\
832
833Description:
834    Get the motion vectors (W1 W2 W3 W4) from IME result payload.
835
836\*****************************************************************************/
837ulong __builtin_spirv_intel_sub_group_avc_mce_get_motion_vectors_intel_sub_group_avc_mce_result_t(
838    intel_sub_group_avc_mce_result_t result )
839{
840    uint4 r = __builtin_IB_vme_helper_get_handle_avc_mce_result_t(result);
841    const ulong MVb = intel_simd_get_message_phase_uq(r, 1, 4);
842    return MVb;
843}
844
845INLINE ulong OVERLOADABLE
846intel_sub_group_avc_mce_get_motion_vectors(
847    intel_sub_group_avc_mce_result_t  result )
848{
849    return __builtin_spirv_intel_sub_group_avc_mce_get_motion_vectors_intel_sub_group_avc_mce_result_t(result);
850}
851
852/*****************************************************************************\
853
854Description:
855    Get the inter distortions (W5) from IME result payload.
856
857\*****************************************************************************/
858ushort __builtin_spirv_intel_sub_group_avc_mce_get_inter_distortions_intel_sub_group_avc_mce_result_t(
859    intel_sub_group_avc_mce_result_t result )
860{
861    return intel_simd_get_message_phase_uw(__builtin_IB_vme_helper_get_handle_avc_mce_result_t(result), 5, 1);
862}
863
864INLINE ushort OVERLOADABLE
865intel_sub_group_avc_mce_get_inter_distortions(
866    intel_sub_group_avc_mce_result_t  result )
867{
868    return __builtin_spirv_intel_sub_group_avc_mce_get_inter_distortions_intel_sub_group_avc_mce_result_t(result);
869}
870
871/*****************************************************************************\
872
873Description:
874    Get the inter best distortion W0.2[15:0] from IME result payload.
875
876\*****************************************************************************/
877ushort __builtin_spirv_intel_sub_group_avc_mce_get_best_inter_distortion_intel_sub_group_avc_mce_result_t(
878    intel_sub_group_avc_mce_result_t result )
879{
880    return intel_get_message_phase_uw(__builtin_IB_vme_helper_get_handle_avc_mce_result_t(result), 0, 2*2);
881}
882
883INLINE ushort OVERLOADABLE
884intel_sub_group_avc_mce_get_best_inter_distortion(
885    intel_sub_group_avc_mce_result_t  result )
886{
887    return __builtin_spirv_intel_sub_group_avc_mce_get_best_inter_distortion_intel_sub_group_avc_mce_result_t(result);
888}
889
890/*****************************************************************************\
891
892Description:
893    Get the inter major shape W0.0[1:0]) from IME result payload.
894
895\*****************************************************************************/
896uchar __builtin_spirv_intel_sub_group_avc_mce_get_inter_major_shape_intel_sub_group_avc_mce_result_t(
897    intel_sub_group_avc_mce_result_t result )
898{
899    const uchar InterMbMode = intel_get_message_phase_ub(__builtin_IB_vme_helper_get_handle_avc_mce_result_t(result), 0, 0) & 0x3;
900    return InterMbMode;
901}
902
903INLINE uchar OVERLOADABLE
904intel_sub_group_avc_mce_get_inter_major_shape(
905    intel_sub_group_avc_mce_result_t  result )
906{
907    return __builtin_spirv_intel_sub_group_avc_mce_get_inter_major_shape_intel_sub_group_avc_mce_result_t(result);
908}
909
910/*****************************************************************************\
911
912Description:
913    Get the inter major shape W0.6[15:8] from IME result payload.
914
915\*****************************************************************************/
916uchar __builtin_spirv_intel_sub_group_avc_mce_get_inter_minor_shapes_intel_sub_group_avc_mce_result_t(
917    intel_sub_group_avc_mce_result_t result )
918{
919    const uchar SubMbShape = intel_get_message_phase_ub(__builtin_IB_vme_helper_get_handle_avc_mce_result_t(result), 0, 6*4+1);
920    return SubMbShape;
921}
922
923INLINE uchar OVERLOADABLE
924intel_sub_group_avc_mce_get_inter_minor_shapes(
925    intel_sub_group_avc_mce_result_t  result )
926{
927    return __builtin_spirv_intel_sub_group_avc_mce_get_inter_minor_shapes_intel_sub_group_avc_mce_result_t(result);
928}
929
930/*****************************************************************************\
931
932Description:
933    Get the inter major shape W0.6[23:16] from IME result payload.
934
935\*****************************************************************************/
936uchar __builtin_spirv_intel_sub_group_avc_mce_get_inter_directions_intel_sub_group_avc_mce_result_t(
937    intel_sub_group_avc_mce_result_t result )
938{
939    const uchar SubMbPredMode = intel_get_message_phase_ub(__builtin_IB_vme_helper_get_handle_avc_mce_result_t(result), 0, 6*4+2);
940    return SubMbPredMode;
941}
942
943INLINE uchar OVERLOADABLE
944intel_sub_group_avc_mce_get_inter_directions(
945    intel_sub_group_avc_mce_result_t  result )
946{
947    return __builtin_spirv_intel_sub_group_avc_mce_get_inter_directions_intel_sub_group_avc_mce_result_t(result);
948}
949
950/*****************************************************************************\
951
952Description:
953    Get the count of motion vectors (W0.0 :28:24) from MCE result payload.
954
955\*****************************************************************************/
956uchar __builtin_spirv_intel_sub_group_avc_mce_get_inter_motion_vector_count_intel_sub_group_avc_mce_result_t(
957    intel_sub_group_avc_mce_result_t result )
958{
959    return intel_get_message_phase_ub(__builtin_IB_vme_helper_get_handle_avc_mce_result_t(result), 0, 0*4+3) & 0x1F;
960}
961
962INLINE uchar OVERLOADABLE
963intel_sub_group_avc_mce_get_inter_motion_vector_count(
964    intel_sub_group_avc_mce_result_t  result )
965{
966    return __builtin_spirv_intel_sub_group_avc_mce_get_inter_motion_vector_count_intel_sub_group_avc_mce_result_t(result);
967}
968
969// ... IME functions ...
970
971ushort2 OVERLOADABLE intel_sub_group_avc_ime_ref_window_size(
972    uchar search_window_config,
973    char dual_ref )
974{
975    ushort2 ref_window_size = 0;
976
977    if( search_window_config == CLK_AVC_ME_SEARCH_WINDOW_EXHAUSTIVE_INTEL    ||
978        search_window_config == CLK_AVC_ME_SEARCH_WINDOW_DIAMOND_INTEL       ||
979        search_window_config == CLK_AVC_ME_SEARCH_WINDOW_LARGE_DIAMOND_INTEL ||
980        search_window_config == CLK_AVC_ME_SEARCH_WINDOW_16x12_RADIUS_INTEL ) {
981        if( dual_ref ) {
982            ref_window_size.x = 32;
983            ref_window_size.y = 32;
984        }
985        else {
986            ref_window_size.x = 48;
987            ref_window_size.y = 40;
988        }
989    }
990    else if( search_window_config == CLK_AVC_ME_SEARCH_WINDOW_SMALL_INTEL ) {
991        ref_window_size.x = 28;
992        ref_window_size.y = 28;
993    }
994    else if( search_window_config == CLK_AVC_ME_SEARCH_WINDOW_TINY_INTEL       ||
995             search_window_config == CLK_AVC_ME_SEARCH_WINDOW_4x4_RADIUS_INTEL ) {
996        ref_window_size.x = 24;
997        ref_window_size.y = 24;
998    }
999    else if( search_window_config == CLK_AVC_ME_SEARCH_WINDOW_EXTRA_TINY_INTEL ||
1000             search_window_config == CLK_AVC_ME_SEARCH_WINDOW_2x2_RADIUS_INTEL ) {
1001        ref_window_size.x = 20;
1002        ref_window_size.y = 20;
1003    }
1004
1005    return ref_window_size;
1006}
1007
1008// This function is marked as a deprecated in specification, keeping for backward compatibility.
1009INLINE ushort2 OVERLOADABLE
1010intel_sub_group_ime_ref_window_size(
1011  uchar search_window_config,
1012  char  dual_ref )
1013{
1014    return intel_sub_group_avc_ime_ref_window_size(search_window_config, dual_ref);
1015}
1016
1017INLINE ushort2 __builtin_spirv_intel_sub_group_avc_ime_ref_window_size_i8_i8(
1018    uchar search_window_config,
1019    char dual_ref )
1020{
1021    return intel_sub_group_avc_ime_ref_window_size(search_window_config, dual_ref);
1022}
1023
1024INLINE ushort2 __builtin_spirv_intel_sub_group_ime_ref_window_size_i8_i8(
1025  uchar search_window_config,
1026  char  dual_ref )
1027{
1028    return intel_sub_group_avc_ime_ref_window_size(search_window_config, dual_ref);
1029}
1030
1031short2 __builtin_spirv_intel_sub_group_avc_ime_adjust_ref_offset_v2i16_v2i16_v2i16_v2i16(
1032    short2 ref_offset,
1033    ushort2 src_coord_us,
1034    ushort2 ref_window_size_us,
1035    ushort2 frame_size_us )
1036{
1037    short2 src_coord = as_short2( src_coord_us );
1038    short2 ref_window_size = as_short2( ref_window_size_us );
1039    short2 frame_size = as_short2( frame_size_us );
1040
1041    short2 block_size = 16;
1042    short2 search_window_size = ref_window_size - block_size;
1043    short2 ref_window_coord = src_coord - ( search_window_size >> 1 );
1044
1045    if( ref_window_coord.x + ref_offset.x >= frame_size.x ) {
1046        ref_offset.x = frame_size.x - ref_window_coord.x - search_window_size.x;
1047    }
1048    else if( ref_window_coord.x + ref_offset.x + search_window_size.x < 0 ) {
1049        ref_offset.x = -ref_window_coord.x;
1050    }
1051    if( ref_window_coord.y + ref_offset.y >= frame_size.y ) {
1052        ref_offset.y = frame_size.y - ref_window_coord.y - search_window_size.y;
1053    }
1054    else if( ref_window_coord.y + ref_offset.y + search_window_size.y < 0 ) {
1055        ref_offset.y = -ref_window_coord.y;
1056    }
1057
1058    return ref_offset;
1059}
1060
1061INLINE short2 OVERLOADABLE
1062intel_sub_group_avc_ime_adjust_ref_offset(
1063   short2  ref_offset,
1064   ushort2 src_coord_us,
1065   ushort2 ref_window_size_us,
1066   ushort2 frame_size_us )
1067{
1068    return __builtin_spirv_intel_sub_group_avc_ime_adjust_ref_offset_v2i16_v2i16_v2i16_v2i16(ref_offset, src_coord_us, ref_window_size_us, frame_size_us);
1069}
1070
1071/*****************************************************************************\
1072
1073Initialize IME payload M0:
1074- clear M0 and M1
1075- set SrcX M0.2[15:0] and SrcY M0.2[31:16]
1076- set SubMbPartMask M0.3[30:24]
1077- set InterSAD M0.3[21:20]
1078
1079\*****************************************************************************/
1080intel_sub_group_avc_ime_payload_t __builtin_spirv_intel_sub_group_avc_ime_initialize_v2i16_i8_i8(
1081    ushort2 src_coord,
1082    uchar partition_mask,
1083    uchar sad_adjustment )
1084{
1085    // Create and initialize IME payload
1086    uint4 payload = __builtin_IB_create_message_phases_uint4(UNIVERSAL_INPUT_MESSAGE_NUM_GRFS+4);
1087
1088    // Set SrcX M0.2[15:0] and SrcY M0.2[31:16].
1089    payload = intel_set_message_phase_dw(payload, 0, 2, as_uint(src_coord));
1090
1091    // Set SubMbPartMask M0.3[30:24] and InterSAD M0.3[21:20]
1092    const uint immValue = (partition_mask << 24) | (sad_adjustment << 20);
1093    payload = intel_set_message_phase_dw(payload, 0, 3, immValue);
1094
1095    intel_sub_group_avc_ime_payload_t result = __builtin_IB_vme_helper_get_as_avc_ime_payload_t(payload);
1096    return result;
1097}
1098
1099INLINE intel_sub_group_avc_ime_payload_t OVERLOADABLE
1100intel_sub_group_avc_ime_initialize(
1101    ushort2 src_coord,
1102    uchar partition_mask,
1103    uchar sad_adjustment )
1104{
1105    return __builtin_spirv_intel_sub_group_avc_ime_initialize_v2i16_i8_i8(src_coord, partition_mask, sad_adjustment);
1106}
1107
1108/*****************************************************************************\
1109
1110Description:
1111    Set IME search config.
1112
1113    - initialize M0 M1 from payload src
1114    - set MaxNumMVs M1.1[5:0]
1115    - set Ref0X, Ref0Y, Ref1X, Ref1Y (M0.0)
1116    - set RefHeight and RefWidth M0.5[16:31]
1117    - set LenSP & MaxNumSU M1.2[15:0]
1118    - set search path delta M4.0 and M5.0
1119    - set AdaptiveEn M1.0[1:1]
1120    - set SearchCtrl M0.3[10:8]
1121
1122\*****************************************************************************/
1123intel_sub_group_avc_ime_payload_t __builtin_spirv_avc_ime_set_reference_v2i16_v2i16_i8_bool_intel_sub_group_avc_ime_payload_t(
1124    short2 fwd_ref_offset,
1125    short2 bwd_ref_offset,
1126    uchar search_window_config,
1127    bool multiRef,
1128    intel_sub_group_avc_ime_payload_t payload )
1129{
1130    uint4 handle = __builtin_IB_vme_helper_get_handle_avc_ime_payload_t(payload);
1131
1132    // Set MaxNumMVs M1.1[5:0]
1133    handle = intel_set_message_phase_ub(handle, 1, 1*4, 0x20);
1134
1135    // Set the Ref0, Ref0Y (M0.0),
1136    // RefHeight and RefWidth (M0.5 :16:31), and
1137    // LenSP & MaxNumSU M1.2[15:0]
1138    // based on search window config.
1139
1140    const uint searchConfig = search_window_config;
1141    short2 offset = (short2)(0, 0);
1142    ushort dimXY = 0;
1143    uint startX = 0;
1144    uint startY = 0;
1145    ushort lenSPMaxNumSU = 0;
1146    uint adaptive = 0;
1147
1148    if( searchConfig == CLK_AVC_ME_SEARCH_WINDOW_EXHAUSTIVE_INTEL || searchConfig == CLK_AVC_ME_SEARCH_WINDOW_16x12_RADIUS_INTEL )
1149    {
1150        if( multiRef == false )
1151        {
1152            offset = (short2)(-16, -12);
1153            dimXY = 0x2830;        // 48, 40
1154            lenSPMaxNumSU = 0x3030;
1155            // EXHAUSTIVE uses a spiral SP.
1156            if( searchConfig == CLK_AVC_ME_SEARCH_WINDOW_EXHAUSTIVE_INTEL )
1157            {
1158                startX = 4; // ((48-16) >> 3) & 0xf
1159                startY = 3; // ((40-16) >> 3) & 0xf
1160            }
1161        }
1162        else
1163        {
1164            offset = (short2)(-8, -8);
1165            dimXY = 0x2020;        // 32, 32
1166            lenSPMaxNumSU = 0x1010;
1167            // EXHAUSTIVE uses a spiral SP.
1168            if( searchConfig == CLK_AVC_ME_SEARCH_WINDOW_EXHAUSTIVE_INTEL )
1169            {
1170                startX = 2; // ((32-16) >> 3) & 0xf
1171                startY = 2; // ((32-16) >> 3) & 0xf
1172            }
1173        }
1174    }
1175    else if( searchConfig == CLK_AVC_ME_SEARCH_WINDOW_SMALL_INTEL )
1176    {
1177        offset = (short2)(-6, -6);
1178        dimXY = 0x1C1C;        // 28, 28
1179        lenSPMaxNumSU = 0x0909;
1180        // SMALL uses a spiral SP.
1181        startX = 1;
1182        startY = 1;
1183    }
1184    else if( searchConfig == CLK_AVC_ME_SEARCH_WINDOW_TINY_INTEL )
1185    {
1186        offset = (short2)(-4, -4);
1187        dimXY = 0x1818;       // 24, 24
1188        lenSPMaxNumSU = 0x404;
1189    }
1190    else if( searchConfig == CLK_AVC_ME_SEARCH_WINDOW_EXTRA_TINY_INTEL || searchConfig == CLK_AVC_ME_SEARCH_WINDOW_2x2_RADIUS_INTEL )
1191    {
1192        offset = (short2)(-2, -2);
1193        dimXY = 0x1414;        // 20, 20
1194        lenSPMaxNumSU = 0x0101;
1195    }
1196    // DIAMOND 48x40 or 32x32 for multiref
1197    else if( searchConfig == CLK_AVC_ME_SEARCH_WINDOW_DIAMOND_INTEL )
1198    {
1199        if( multiRef == false )
1200        {
1201            offset = (short2)(-16, -12);
1202            dimXY = 0x2830;        // 48, 40
1203            lenSPMaxNumSU = 0x3910;
1204            startX = 4;
1205            startY = 3;
1206        }
1207        else
1208        {
1209            offset = (short2)(-8, -8);
1210            dimXY = 0x2020;        // 32, 32
1211            lenSPMaxNumSU = 0x3907;
1212            startX = 2;
1213            startY = 2;
1214        }
1215        adaptive = 0x1;
1216    }
1217    // LARGE DIAMOND 48x40 or 32x32 for multiref
1218    else if( searchConfig == CLK_AVC_ME_SEARCH_WINDOW_LARGE_DIAMOND_INTEL )
1219    {
1220        if( multiRef == false )
1221        {
1222            offset = (short2)(-16, -12);
1223            dimXY = 0x2830;        // 48, 40
1224            lenSPMaxNumSU = 0x3920;
1225            startX = 4; // ((48-16) >> 3) & 0xf
1226            startY = 3; // ((40-16) >> 3) & 0xf
1227        }
1228        else
1229        {
1230            offset = (short2)(-8, -8);
1231            dimXY = 0x2020;        // 32, 32
1232            lenSPMaxNumSU = 0x390A;
1233            startX = 2; // ((32-16) >> 3) & 0xf
1234            startY = 2; // ((32-16) >> 3) & 0xf
1235        }
1236        adaptive = 0x1;
1237    }
1238
1239    // Set the reference window ref0 offset M0.0
1240    const short2 Ref0 = fwd_ref_offset + offset;
1241    handle = intel_set_message_phase_dw(handle, 0, 0, as_uint(Ref0));
1242
1243    // Set the reference window ref1 offset M0.1
1244    if (multiRef) {
1245        const short2 Ref1 = bwd_ref_offset + offset;
1246        handle = intel_set_message_phase_dw(handle, 0, 1, as_uint(Ref1));
1247    }
1248
1249    // Set reference window height and width M0.5[16:31]
1250    handle = intel_set_message_phase_uw(handle, 0, 5*2+1, dimXY);
1251
1252    // Set LenSP & MaxNumSU (M1.2)...
1253    handle = intel_set_message_phase_uw(handle, 1, 2*2, lenSPMaxNumSU);
1254
1255    // Set search path delta (M4.0, M5.0)
1256
1257    // EXHAUSTIVE and SMALL use spiral search paths.
1258    if( searchConfig == CLK_AVC_ME_SEARCH_WINDOW_EXHAUSTIVE_INTEL || searchConfig == CLK_AVC_ME_SEARCH_WINDOW_SMALL_INTEL )
1259    {
1260        const uint cSearchPathSpiral[14] =
1261        {
1262            0x0101F00F, 0x0F0F1010, 0xF0F0F00F, 0x01010101,
1263            0x10101010, 0x0F0F0F0F, 0xF0F0F00F, 0x0101F0F0,
1264            0x01010101, 0x10101010, 0x0F0F1010, 0x0F0F0F0F,
1265            0xF0F0F00F, 0xF0F0F0F0
1266        };
1267        // Set M4.0 - M4.7...
1268        for (int comp = 0; comp < 8; comp++) {
1269          handle = intel_set_message_phase_dw(handle, 4, comp, cSearchPathSpiral[comp]);
1270        }
1271
1272        // Set M5.0 - M5.5...
1273        for (int comp = 8; comp < 14; comp++) {
1274          handle = intel_set_message_phase_dw(handle, 5, comp-8, cSearchPathSpiral[comp]);
1275        }
1276
1277        // Clear M5.6-M5.7
1278        handle = intel_set_message_phase_dw(handle, 5, 6, 0);
1279        handle = intel_set_message_phase_dw(handle, 5, 7, 0);
1280    }
1281    // 16x12_RADIUS, 4x4_RADIUS & EXTRA TINY all use raster search paths.
1282    else if( searchConfig == CLK_AVC_ME_SEARCH_WINDOW_16x12_RADIUS_INTEL ||
1283             searchConfig == CLK_AVC_ME_SEARCH_WINDOW_2x2_RADIUS_INTEL   ||
1284             searchConfig == CLK_AVC_ME_SEARCH_WINDOW_EXTRA_TINY_INTEL )
1285    {
1286        const uint cSearchPathRaster[12] = {
1287            0x01010101, 0x10010101, 0x0f0f0f0f, 0x100f0f0f,
1288            0x01010101, 0x10010101, 0x0f0f0f0f, 0x100f0f0f,
1289            0x01010101, 0x10010101, 0x0f0f0f0f, 0x000f0f0f
1290        };
1291
1292        // Set M4.0 - M4.7...
1293        for (int comp = 0; comp < 4; comp++) {
1294          handle = intel_set_message_phase_dw(handle, 4, comp,     cSearchPathRaster[comp]);
1295          handle = intel_set_message_phase_dw(handle, 4, comp + 4, cSearchPathRaster[comp]);
1296        }
1297
1298        // Set M5.0-M5.2...
1299        handle = intel_set_message_phase_dw(handle, 5, 0, intel_get_message_phase_dw(handle, 4, 0));
1300        handle = intel_set_message_phase_dw(handle, 5, 1, intel_get_message_phase_dw(handle, 4, 1));
1301        handle = intel_set_message_phase_dw(handle, 5, 2, intel_get_message_phase_dw(handle, 4, 2));
1302
1303        // Set M5.3...
1304        handle = intel_set_message_phase_dw(handle, 5, 3, cSearchPathRaster[11]);
1305
1306        // Clear M5.4-M5.7
1307        for (int comp = 4; comp <= 7; ++comp) {
1308          handle = intel_set_message_phase_dw(handle, 5, comp, 0);
1309        }
1310    }
1311    else if( searchConfig == CLK_AVC_ME_SEARCH_WINDOW_TINY_INTEL )
1312    {
1313        const uint cSearchPathTiny = 0x000f1001;
1314
1315        // Clear M4 and M5
1316        handle = intel_set_message_phase(handle, 4, 0);
1317        handle = intel_set_message_phase(handle, 5, 0);
1318
1319        // Set M4.0...
1320        handle = intel_set_message_phase_dw(handle, 4, 0, cSearchPathTiny);
1321    }
1322    else if( searchConfig == CLK_AVC_ME_SEARCH_WINDOW_DIAMOND_INTEL || searchConfig == CLK_AVC_ME_SEARCH_WINDOW_LARGE_DIAMOND_INTEL )
1323    {
1324        uint cSearchPathDiamond[ 11 ] = {
1325            0x120FF10F, 0x1E22E20D, 0x20E2FF10, 0x2EDD06FC,
1326            0x11D33FF1, 0xEB1FF33D, 0x4EF1F1F1, 0xF1F21211,
1327            0x0DFFFFE0, 0x11201F1F, 0x1105F1CF };
1328
1329        // Clear M5.0-M5.7.
1330        for (int comp = 0; comp <= 7; ++comp) {
1331          handle = intel_set_message_phase_dw(handle, 5, comp, 0);
1332        }
1333
1334        // Set M4.0 - M4.7...
1335        for (int comp = 0; comp <= 7; comp++) {
1336          handle = intel_set_message_phase_dw(handle, 4, comp, cSearchPathDiamond[comp]);
1337        }
1338
1339        // Set M5.0 - M5.2...
1340        for (int comp = 8; comp < 11; comp++) {
1341          handle = intel_set_message_phase_dw(handle, 5, comp-8, cSearchPathDiamond[comp]);
1342        }
1343    }
1344
1345    // Set Start0X, Start0Y, Start1X, Start1Y M1.2[31:16]
1346
1347    if (( startX | startY )) {
1348        ushort startXY = ( startX ) | ( startY << 4 );
1349        if( multiRef ) {
1350            startXY |= ( startXY << 8 );
1351        }
1352        handle = intel_set_message_phase_uw(handle, 1, 2*2+1, startXY);
1353    }
1354
1355    // Set AdaptiveEn M1.0[1:1]
1356
1357    if (adaptive != 0) {
1358        handle = intel_set_message_phase_ub(handle, 1, 0, 0x2);
1359    }
1360
1361    // Set SearchCtrl M0.3[10:8] to dual reference & dual record
1362
1363    if (multiRef) {
1364      const uint SearchCtrl = intel_get_message_phase_dw(handle, 0, 3) | (0x7 << 8);
1365      handle = intel_set_message_phase_dw(handle, 0, 3, SearchCtrl);
1366    }
1367
1368    intel_sub_group_avc_ime_payload_t result = __builtin_IB_vme_helper_get_as_avc_ime_payload_t(handle);
1369    return result;
1370}
1371
1372INLINE intel_sub_group_avc_ime_payload_t OVERLOADABLE
1373avc_ime_set_reference(
1374   short2 fwd_ref_offset,
1375   short2 bwd_ref_offset,
1376   uchar  search_window_config,
1377   bool   multiRef,
1378   intel_sub_group_avc_ime_payload_t payload )
1379{
1380    return __builtin_spirv_avc_ime_set_reference_v2i16_v2i16_i8_bool_intel_sub_group_avc_ime_payload_t(fwd_ref_offset, bwd_ref_offset, search_window_config, multiRef, payload);
1381}
1382
1383intel_sub_group_avc_ime_payload_t __builtin_spirv_intel_sub_group_avc_ime_set_single_reference_v2i16_i8_intel_sub_group_avc_ime_payload_t(
1384    short2 ref_offset,
1385    uchar search_window_config,
1386    intel_sub_group_avc_ime_payload_t payload )
1387{
1388    return avc_ime_set_reference(ref_offset, (short2)(0), search_window_config, false, payload);
1389}
1390
1391INLINE intel_sub_group_avc_ime_payload_t OVERLOADABLE
1392intel_sub_group_avc_ime_set_single_reference(
1393    short2 ref_offset,
1394    uchar  search_window_config,
1395    intel_sub_group_avc_ime_payload_t payload )
1396{
1397    return __builtin_spirv_intel_sub_group_avc_ime_set_single_reference_v2i16_i8_intel_sub_group_avc_ime_payload_t(ref_offset, search_window_config, payload);
1398}
1399
1400intel_sub_group_avc_ime_payload_t __builtin_spirv_intel_sub_group_avc_ime_set_dual_reference_v2i16_v2i16_i8_intel_sub_group_avc_ime_payload_t(
1401    short2 fwd_ref_offset,
1402    short2 bwd_ref_offset,
1403    uchar search_window_config,
1404    intel_sub_group_avc_ime_payload_t payload )
1405{
1406    return avc_ime_set_reference(fwd_ref_offset, bwd_ref_offset, search_window_config, true, payload);
1407}
1408
1409INLINE intel_sub_group_avc_ime_payload_t OVERLOADABLE
1410intel_sub_group_avc_ime_set_dual_reference(
1411   short2 fwd_ref_offset,
1412   short2 bwd_ref_offset,
1413   uchar  search_window_config,
1414   intel_sub_group_avc_ime_payload_t payload )
1415{
1416    return __builtin_spirv_intel_sub_group_avc_ime_set_dual_reference_v2i16_v2i16_i8_intel_sub_group_avc_ime_payload_t(fwd_ref_offset, bwd_ref_offset, search_window_config, payload);
1417}
1418
1419/*****************************************************************************\
1420
1421Description:
1422    set MaxNumMVs (M1.1 :5:0)
1423
1424\*****************************************************************************/
1425intel_sub_group_avc_ime_payload_t __builtin_spirv_intel_sub_group_avc_ime_set_max_motion_vector_count_i8_intel_sub_group_avc_ime_payload_t(
1426    uchar max_motion_vector_count,
1427    intel_sub_group_avc_ime_payload_t payload )
1428{
1429    uint4 handle = __builtin_IB_vme_helper_get_handle_avc_ime_payload_t(payload);
1430    uchar curr = intel_get_message_phase_ub(handle, 1, 1*4);
1431    curr &= ~0x3F;
1432    curr |= max_motion_vector_count;
1433    handle = intel_set_message_phase_ub(handle, 1, 1*4, curr);
1434    payload = __builtin_IB_vme_helper_get_as_avc_ime_payload_t(handle);
1435    return payload;
1436}
1437
1438INLINE intel_sub_group_avc_ime_payload_t OVERLOADABLE
1439intel_sub_group_avc_ime_set_max_motion_vector_count(
1440    uchar  max_motion_vector_count,
1441    intel_sub_group_avc_ime_payload_t payload )
1442{
1443    return __builtin_spirv_intel_sub_group_avc_ime_set_max_motion_vector_count_i8_intel_sub_group_avc_ime_payload_t(max_motion_vector_count, payload);
1444}
1445
1446intel_sub_group_avc_mce_payload_t __builtin_spirv_intel_sub_group_avc_ime_convert_to_mce_payload_intel_sub_group_avc_ime_payload_t(
1447    intel_sub_group_avc_ime_payload_t payload )
1448{
1449    uint4 handle = __builtin_IB_vme_helper_get_handle_avc_ime_payload_t(payload);
1450    return __builtin_IB_vme_helper_get_as_avc_mce_payload_t(handle);
1451}
1452
1453INLINE intel_sub_group_avc_mce_payload_t OVERLOADABLE
1454intel_sub_group_avc_ime_convert_to_mce_payload(
1455      intel_sub_group_avc_ime_payload_t payload )
1456{
1457    return __builtin_spirv_intel_sub_group_avc_ime_convert_to_mce_payload_intel_sub_group_avc_ime_payload_t(payload);
1458}
1459
1460intel_sub_group_avc_ime_payload_t __builtin_spirv_intel_sub_group_avc_mce_convert_to_ime_payload_intel_sub_group_avc_mce_payload_t(
1461    intel_sub_group_avc_mce_payload_t payload )
1462{
1463    uint4 handle = __builtin_IB_vme_helper_get_handle_avc_mce_payload_t(payload);
1464    return __builtin_IB_vme_helper_get_as_avc_ime_payload_t(handle);
1465}
1466
1467INLINE intel_sub_group_avc_ime_payload_t OVERLOADABLE
1468intel_sub_group_avc_mce_convert_to_ime_payload(
1469      intel_sub_group_avc_mce_payload_t payload )
1470{
1471    return __builtin_spirv_intel_sub_group_avc_mce_convert_to_ime_payload_intel_sub_group_avc_mce_payload_t(payload);
1472}
1473
1474/*****************************************************************************\
1475
1476Description:
1477
1478    Evalulate multiple reference IME operation.
1479
1480  Inputs:  4 Universal phases + 2 IME phases for search path
1481  Outputs: 7 phases
1482
1483\*****************************************************************************/
1484intel_sub_group_avc_ime_result_t __builtin_spirv_intel_sub_group_avc_ime_evaluate_with_dual_reference_v3i64_v3i64_v3i64_intel_sub_group_avc_ime_payload_t(
1485    VMEImage_t src_image_vme,
1486    VMEImage_t fwd_ref_image_vme,
1487    VMEImage_t bwd_ref_image_vme,
1488    intel_sub_group_avc_ime_payload_t payload )
1489{
1490    long src_image = getVMEImage(src_image_vme);
1491    long fwd_ref_image = getVMEImage(fwd_ref_image_vme);
1492    long bwd_ref_image = getVMEImage(bwd_ref_image_vme);
1493    long vme_accelerator = getVMESampler(src_image_vme);
1494
1495    uint4 handle = __builtin_IB_vme_helper_get_handle_avc_ime_payload_t(payload);
1496    handle = intel_sub_group_payload_set_dual_ref_id(src_image, fwd_ref_image, bwd_ref_image, handle);
1497    uint4 res = intel_vme_send_ime_new_uint4(handle, src_image, fwd_ref_image, bwd_ref_image, vme_accelerator, VME_STREAM_DISABLE);
1498
1499    intel_sub_group_avc_ime_result_t result = __builtin_IB_vme_helper_get_as_avc_ime_result_t( res );
1500    return result;
1501}
1502
1503INLINE intel_sub_group_avc_ime_result_t OVERLOADABLE
1504intel_sub_group_avc_ime_evaluate_with_dual_reference(
1505      read_only image2d_t src_image,
1506      read_only image2d_t fwd_ref_image,
1507      read_only image2d_t bwd_ref_image,
1508      sampler_t vme_accelerator,
1509      intel_sub_group_avc_ime_payload_t payload )
1510{
1511    long src_image_id = (long)__builtin_astype(src_image, void*);
1512    long fwd_ref_image_id = (long)__builtin_astype(fwd_ref_image, void*);
1513    long bwd_ref_image_id = (long)__builtin_astype(bwd_ref_image, void*);
1514    long vme_accelerator_id = (long)__builtin_astype(vme_accelerator, void*);
1515    VMEImage_t src_image_vme = __builtin_spirv_OpVmeImageINTEL_i64_i64_i64(src_image_id, 0, vme_accelerator_id);
1516    VMEImage_t fwd_ref_image_vme = __builtin_spirv_OpVmeImageINTEL_i64_i64_i64(fwd_ref_image_id, 0, vme_accelerator_id);
1517    VMEImage_t bwd_ref_image_vme = __builtin_spirv_OpVmeImageINTEL_i64_i64_i64(bwd_ref_image_id, 0, vme_accelerator_id);
1518
1519    return __builtin_spirv_intel_sub_group_avc_ime_evaluate_with_dual_reference_v3i64_v3i64_v3i64_intel_sub_group_avc_ime_payload_t(src_image_vme, fwd_ref_image_vme, bwd_ref_image_vme, payload);
1520}
1521
1522intel_sub_group_avc_ime_result_t __builtin_spirv_intel_sub_group_avc_ime_evaluate_with_single_reference_v3i64_v3i64_intel_sub_group_avc_ime_payload_t(
1523    VMEImage_t src_image_vme,
1524    VMEImage_t ref_image_vme,
1525    intel_sub_group_avc_ime_payload_t payload )
1526{
1527    long src_image = getVMEImage(src_image_vme);
1528    long ref_image = getVMEImage(ref_image_vme);
1529    long vme_accelerator = getVMESampler(src_image_vme);
1530
1531    uint4 handle = __builtin_IB_vme_helper_get_handle_avc_ime_payload_t(payload);
1532    handle = intel_sub_group_payload_set_single_ref_id(src_image, ref_image, handle);
1533    uint4 res = intel_vme_send_ime_new_uint4(handle, src_image, ref_image, ref_image, vme_accelerator, VME_STREAM_DISABLE);
1534
1535    intel_sub_group_avc_ime_result_t result = __builtin_IB_vme_helper_get_as_avc_ime_result_t(res);
1536    return result;
1537}
1538
1539INLINE intel_sub_group_avc_ime_result_t OVERLOADABLE
1540intel_sub_group_avc_ime_evaluate_with_single_reference(
1541      read_only image2d_t src_image,
1542      read_only image2d_t ref_image,
1543      sampler_t vme_accelerator,
1544      intel_sub_group_avc_ime_payload_t payload )
1545{
1546    long src_image_id = (long)__builtin_astype(src_image, void*);
1547    long ref_image_id = (long)__builtin_astype(ref_image, void*);
1548    long vme_accelerator_id = (long)__builtin_astype(vme_accelerator, void*);
1549    VMEImage_t src_image_vme = __builtin_spirv_OpVmeImageINTEL_i64_i64_i64(src_image_id, 0, vme_accelerator_id);
1550    VMEImage_t ref_image_vme = __builtin_spirv_OpVmeImageINTEL_i64_i64_i64(ref_image_id, 0, vme_accelerator_id);
1551
1552    return __builtin_spirv_intel_sub_group_avc_ime_evaluate_with_single_reference_v3i64_v3i64_intel_sub_group_avc_ime_payload_t(src_image_vme, ref_image_vme, payload);
1553}
1554
1555intel_sub_group_avc_ime_result_single_reference_streamout_t __builtin_spirv_intel_sub_group_avc_ime_evaluate_with_single_reference_streamout_v3i64_v3i64_intel_sub_group_avc_ime_payload_t(
1556    VMEImage_t src_image_vme,
1557    VMEImage_t ref_image_vme,
1558    intel_sub_group_avc_ime_payload_t payload )
1559{
1560    long src_image = getVMEImage(src_image_vme);
1561    long ref_image = getVMEImage(ref_image_vme);
1562    long vme_accelerator = getVMESampler(src_image_vme);
1563
1564    uint4 handle = __builtin_IB_vme_helper_get_handle_avc_ime_payload_t(payload);
1565    handle = intel_sub_group_payload_set_single_ref_id(src_image, ref_image, handle);
1566    uint8 res = intel_vme_send_ime_new_uint8(handle, src_image, ref_image, ref_image, vme_accelerator, VME_STREAM_OUT);
1567
1568    intel_sub_group_avc_ime_result_single_reference_streamout_t result = __builtin_IB_vme_helper_get_as_avc_ime_result_single_reference_streamout_t(res);
1569    return result;
1570}
1571
1572INLINE intel_sub_group_avc_ime_result_single_reference_streamout_t OVERLOADABLE
1573intel_sub_group_avc_ime_evaluate_with_single_reference_streamout(
1574      read_only image2d_t src_image,
1575      read_only image2d_t ref_image,
1576      sampler_t vme_accelerator,
1577      intel_sub_group_avc_ime_payload_t payload )
1578{
1579    long src_image_id = (long)__builtin_astype(src_image, void*);
1580    long ref_image_id = (long)__builtin_astype(ref_image, void*);
1581    long vme_accelerator_id = (long)__builtin_astype(vme_accelerator, void*);
1582    VMEImage_t src_image_vme = __builtin_spirv_OpVmeImageINTEL_i64_i64_i64(src_image_id, 0, vme_accelerator_id);
1583    VMEImage_t ref_image_vme = __builtin_spirv_OpVmeImageINTEL_i64_i64_i64(ref_image_id, 0, vme_accelerator_id);
1584
1585    return __builtin_spirv_intel_sub_group_avc_ime_evaluate_with_single_reference_streamout_v3i64_v3i64_intel_sub_group_avc_ime_payload_t(src_image_vme, ref_image_vme, payload);
1586}
1587
1588intel_sub_group_avc_ime_result_dual_reference_streamout_t __builtin_spirv_intel_sub_group_avc_ime_evaluate_with_dual_reference_streamout_v3i64_v3i64_v3i64_intel_sub_group_avc_ime_payload_t(
1589    VMEImage_t src_image_vme,
1590    VMEImage_t fwd_ref_image_vme,
1591    VMEImage_t bwd_ref_image_vme,
1592    intel_sub_group_avc_ime_payload_t payload )
1593{
1594    long src_image = getVMEImage(src_image_vme);
1595    long fwd_ref_image = getVMEImage(fwd_ref_image_vme);
1596    long bwd_ref_image = getVMEImage(bwd_ref_image_vme);
1597    long vme_accelerator = getVMESampler(src_image_vme);
1598
1599    uint4 handle = __builtin_IB_vme_helper_get_handle_avc_ime_payload_t(payload);
1600    handle = intel_sub_group_payload_set_dual_ref_id(src_image, fwd_ref_image, bwd_ref_image, handle);
1601    uint8 res = intel_vme_send_ime_new_uint8(handle, src_image, fwd_ref_image, bwd_ref_image, vme_accelerator, VME_STREAM_OUT);
1602
1603    intel_sub_group_avc_ime_result_dual_reference_streamout_t result = __builtin_IB_vme_helper_get_as_avc_ime_result_dual_reference_streamout_t( res );
1604    return result;
1605}
1606
1607INLINE intel_sub_group_avc_ime_result_dual_reference_streamout_t OVERLOADABLE
1608intel_sub_group_avc_ime_evaluate_with_dual_reference_streamout(
1609      read_only image2d_t src_image,
1610      read_only image2d_t fwd_ref_image,
1611      read_only image2d_t bwd_ref_image,
1612      sampler_t vme_accelerator,
1613      intel_sub_group_avc_ime_payload_t payload )
1614{
1615    long src_image_id = (long)__builtin_astype(src_image, void*);
1616    long fwd_ref_image_id = (long)__builtin_astype(fwd_ref_image, void*);
1617    long bwd_ref_image_id = (long)__builtin_astype(bwd_ref_image, void*);
1618    long vme_accelerator_id = (long)__builtin_astype(vme_accelerator, void*);
1619    VMEImage_t src_image_vme = __builtin_spirv_OpVmeImageINTEL_i64_i64_i64(src_image_id, 0, vme_accelerator_id);
1620    VMEImage_t fwd_ref_image_vme = __builtin_spirv_OpVmeImageINTEL_i64_i64_i64(fwd_ref_image_id, 0, vme_accelerator_id);
1621    VMEImage_t bwd_ref_image_vme = __builtin_spirv_OpVmeImageINTEL_i64_i64_i64(bwd_ref_image_id, 0, vme_accelerator_id);
1622
1623    return __builtin_spirv_intel_sub_group_avc_ime_evaluate_with_dual_reference_streamout_v3i64_v3i64_v3i64_intel_sub_group_avc_ime_payload_t(src_image_vme, fwd_ref_image_vme, bwd_ref_image_vme, payload);
1624}
1625
1626intel_sub_group_avc_ime_result_t __builtin_spirv_intel_sub_group_avc_ime_evaluate_with_single_reference_streamin_v3i64_v3i64_intel_sub_group_avc_ime_payload_t_intel_sub_group_avc_ime_single_reference_streamin_t(
1627    VMEImage_t src_image_vme,
1628    VMEImage_t ref_image_vme,
1629    intel_sub_group_avc_ime_payload_t payload,
1630    intel_sub_group_avc_ime_single_reference_streamin_t streamin )
1631{
1632    long src_image = getVMEImage(src_image_vme);
1633    long ref_image = getVMEImage(ref_image_vme);
1634    long vme_accelerator = getVMESampler(src_image_vme);
1635
1636    uint4 handle = __builtin_IB_vme_helper_get_handle_avc_ime_payload_t(payload);
1637    uint handle_streamin = __builtin_IB_vme_helper_get_handle_avc_ime_single_reference_streamin_t(streamin);
1638
1639    // Copy streamin to payload IME2 and IME3
1640    handle = intel_set_message_phase(handle, UNIVERSAL_INPUT_MESSAGE_NUM_GRFS+2, intel_get_message_phase(handle_streamin, 0));
1641    handle = intel_set_message_phase(handle, UNIVERSAL_INPUT_MESSAGE_NUM_GRFS+3, intel_get_message_phase(handle_streamin, 1));
1642
1643    handle = intel_sub_group_payload_set_single_ref_id(src_image, ref_image, handle);
1644    uint4 res = intel_vme_send_ime_new_uint4(handle, src_image, ref_image, ref_image, vme_accelerator, VME_STREAM_IN);
1645
1646    intel_sub_group_avc_ime_result_t result = __builtin_IB_vme_helper_get_as_avc_ime_result_t(res);
1647    return result;
1648}
1649
1650intel_sub_group_avc_ime_result_t OVERLOADABLE
1651intel_sub_group_avc_ime_evaluate_with_single_reference_streamin(
1652      read_only image2d_t src_image,
1653      read_only image2d_t ref_image,
1654      sampler_t vme_accelerator,
1655      intel_sub_group_avc_ime_payload_t payload,
1656      intel_sub_group_avc_ime_single_reference_streamin_t streamin )
1657{
1658    long src_image_id = (long)__builtin_astype(src_image, void*);
1659    long ref_image_id = (long)__builtin_astype(ref_image, void*);
1660    long vme_accelerator_id = (long)__builtin_astype(vme_accelerator, void*);
1661    VMEImage_t src_image_vme = __builtin_spirv_OpVmeImageINTEL_i64_i64_i64(src_image_id, 0, vme_accelerator_id);
1662    VMEImage_t ref_image_vme = __builtin_spirv_OpVmeImageINTEL_i64_i64_i64(ref_image_id, 0, vme_accelerator_id);
1663
1664    return __builtin_spirv_intel_sub_group_avc_ime_evaluate_with_single_reference_streamin_v3i64_v3i64_intel_sub_group_avc_ime_payload_t_intel_sub_group_avc_ime_single_reference_streamin_t(src_image_vme, ref_image_vme, payload, streamin);
1665}
1666
1667intel_sub_group_avc_ime_result_t __builtin_spirv_intel_sub_group_avc_ime_evaluate_with_dual_reference_streamin_v3i64_v3i64_v3i64_intel_sub_group_avc_ime_payload_t_intel_sub_group_avc_ime_dual_reference_streamin_t(
1668    VMEImage_t src_image_vme,
1669    VMEImage_t fwd_ref_image_vme,
1670    VMEImage_t bwd_ref_image_vme,
1671    intel_sub_group_avc_ime_payload_t payload,
1672    intel_sub_group_avc_ime_dual_reference_streamin_t streamin )
1673{
1674    long src_image = getVMEImage(src_image_vme);
1675    long fwd_ref_image = getVMEImage(fwd_ref_image_vme);
1676    long bwd_ref_image = getVMEImage(bwd_ref_image_vme);
1677    long vme_accelerator = getVMESampler(src_image_vme);
1678
1679    uint8 newPayload = __builtin_IB_create_message_phases_no_init_uint8(16);
1680
1681    uint4 handle_p = __builtin_IB_vme_helper_get_handle_avc_ime_payload_t(payload);
1682    uint2 handle_s = __builtin_IB_vme_helper_get_handle_avc_ime_dual_reference_streamin_t(streamin);
1683
1684    for (int i=0; i < UNIVERSAL_INPUT_MESSAGE_NUM_GRFS+2; i++)
1685    {
1686        newPayload = intel_set_message_phase(newPayload, i, intel_get_message_phase(handle_p, i));
1687    }
1688
1689    // Copy streamin to payload IME2 ~ IME5
1690    for (int i=UNIVERSAL_INPUT_MESSAGE_NUM_GRFS+2, j=0; i < 10; i++, j++)
1691    {
1692        newPayload = intel_set_message_phase(newPayload, i, intel_get_message_phase(handle_s, j));
1693    }
1694
1695    newPayload = intel_sub_group_payload_set_dual_ref_id(src_image, fwd_ref_image, bwd_ref_image, newPayload);
1696    uint4 res = intel_vme_send_ime_new_uint4(newPayload, src_image, fwd_ref_image, bwd_ref_image, vme_accelerator, VME_STREAM_IN);
1697
1698    intel_sub_group_avc_ime_result_t result = __builtin_IB_vme_helper_get_as_avc_ime_result_t( res );
1699    return result;
1700}
1701
1702intel_sub_group_avc_ime_result_t OVERLOADABLE
1703intel_sub_group_avc_ime_evaluate_with_dual_reference_streamin(
1704      read_only image2d_t src_image,
1705      read_only image2d_t fwd_ref_image,
1706      read_only image2d_t bwd_ref_image,
1707      sampler_t vme_accelerator,
1708      intel_sub_group_avc_ime_payload_t payload,
1709      intel_sub_group_avc_ime_dual_reference_streamin_t streamin )
1710{
1711    long src_image_id = (long)__builtin_astype(src_image, void*);
1712    long fwd_ref_image_id = (long)__builtin_astype(fwd_ref_image, void*);
1713    long bwd_ref_image_id = (long)__builtin_astype(bwd_ref_image, void*);
1714    long vme_accelerator_id = (long)__builtin_astype(vme_accelerator, void*);
1715    VMEImage_t src_image_vme = __builtin_spirv_OpVmeImageINTEL_i64_i64_i64(src_image_id, 0, vme_accelerator_id);
1716    VMEImage_t fwd_ref_image_vme = __builtin_spirv_OpVmeImageINTEL_i64_i64_i64(fwd_ref_image_id, 0, vme_accelerator_id);
1717    VMEImage_t bwd_ref_image_vme = __builtin_spirv_OpVmeImageINTEL_i64_i64_i64(bwd_ref_image_id, 0, vme_accelerator_id);
1718
1719    return __builtin_spirv_intel_sub_group_avc_ime_evaluate_with_dual_reference_streamin_v3i64_v3i64_v3i64_intel_sub_group_avc_ime_payload_t_intel_sub_group_avc_ime_dual_reference_streamin_t(src_image_vme, fwd_ref_image_vme, bwd_ref_image_vme, payload, streamin);
1720}
1721
1722intel_sub_group_avc_ime_result_single_reference_streamout_t __builtin_spirv_intel_sub_group_avc_ime_evaluate_with_single_reference_streaminout_v3i64_v3i64_intel_sub_group_avc_ime_payload_t_intel_sub_group_avc_ime_single_reference_streamin_t(
1723    VMEImage_t src_image_vme,
1724    VMEImage_t ref_image_vme,
1725    intel_sub_group_avc_ime_payload_t payload,
1726    intel_sub_group_avc_ime_single_reference_streamin_t streamin )
1727{
1728    long src_image = getVMEImage(src_image_vme);
1729    long ref_image = getVMEImage(ref_image_vme);
1730    long vme_accelerator = getVMESampler(src_image_vme);
1731
1732    uint4 handle = __builtin_IB_vme_helper_get_handle_avc_ime_payload_t(payload);
1733    uint handle_s = __builtin_IB_vme_helper_get_handle_avc_ime_single_reference_streamin_t(streamin);
1734    // Copy streamin to payload IME2 and IME3
1735    handle = intel_set_message_phase(handle, UNIVERSAL_INPUT_MESSAGE_NUM_GRFS+2, intel_get_message_phase(handle_s, 0));
1736    handle = intel_set_message_phase(handle, UNIVERSAL_INPUT_MESSAGE_NUM_GRFS+3, intel_get_message_phase(handle_s, 1));
1737
1738    handle = intel_sub_group_payload_set_single_ref_id(src_image, ref_image, handle);
1739    uint8 res = intel_vme_send_ime_new_uint8(handle, src_image, ref_image, ref_image, vme_accelerator, VME_STREAM_INOUT);
1740
1741    intel_sub_group_avc_ime_result_single_reference_streamout_t result = __builtin_IB_vme_helper_get_as_avc_ime_result_single_reference_streamout_t( res );
1742    return result;
1743}
1744
1745INLINE intel_sub_group_avc_ime_result_single_reference_streamout_t OVERLOADABLE
1746intel_sub_group_avc_ime_evaluate_with_single_reference_streaminout(
1747      read_only image2d_t src_image,
1748      read_only image2d_t ref_image,
1749      sampler_t vme_accelerator,
1750      intel_sub_group_avc_ime_payload_t payload,
1751      intel_sub_group_avc_ime_single_reference_streamin_t streamin )
1752{
1753    long src_image_id = (long)__builtin_astype(src_image, void*);
1754    long ref_image_id = (long)__builtin_astype(ref_image, void*);
1755    long vme_accelerator_id = (long)__builtin_astype(vme_accelerator, void*);
1756    VMEImage_t src_image_vme = __builtin_spirv_OpVmeImageINTEL_i64_i64_i64(src_image_id, 0, vme_accelerator_id);
1757    VMEImage_t ref_image_vme = __builtin_spirv_OpVmeImageINTEL_i64_i64_i64(ref_image_id, 0, vme_accelerator_id);
1758
1759    return __builtin_spirv_intel_sub_group_avc_ime_evaluate_with_single_reference_streaminout_v3i64_v3i64_intel_sub_group_avc_ime_payload_t_intel_sub_group_avc_ime_single_reference_streamin_t(src_image_vme, ref_image_vme, payload, streamin);
1760}
1761
1762intel_sub_group_avc_ime_result_dual_reference_streamout_t __builtin_spirv_intel_sub_group_avc_ime_evaluate_with_dual_reference_streaminout_v3i64_v3i64_v3i64_intel_sub_group_avc_ime_payload_t_intel_sub_group_avc_ime_dual_reference_streamin_t(
1763    VMEImage_t src_image_vme,
1764    VMEImage_t fwd_ref_image_vme,
1765    VMEImage_t bwd_ref_image_vme,
1766    intel_sub_group_avc_ime_payload_t payload,
1767    intel_sub_group_avc_ime_dual_reference_streamin_t streamin )
1768{
1769    long src_image = getVMEImage(src_image_vme);
1770    long fwd_ref_image = getVMEImage(fwd_ref_image_vme);
1771    long bwd_ref_image = getVMEImage(bwd_ref_image_vme);
1772    long vme_accelerator = getVMESampler(src_image_vme);
1773
1774    uint8 newPayload = __builtin_IB_create_message_phases_no_init_uint8(16);
1775
1776    uint4 handle_p = __builtin_IB_vme_helper_get_handle_avc_ime_payload_t(payload);
1777    uint2 handle_s = __builtin_IB_vme_helper_get_handle_avc_ime_dual_reference_streamin_t(streamin);
1778
1779    for (int i=0; i < UNIVERSAL_INPUT_MESSAGE_NUM_GRFS+2; i++)
1780    {
1781        newPayload = intel_set_message_phase(newPayload, i, intel_get_message_phase(handle_p, i));
1782    }
1783
1784    // Copy streamin to payload IME2 ~ IME5
1785    for (int i=UNIVERSAL_INPUT_MESSAGE_NUM_GRFS+2, j=0; i < 10; i++, j++)
1786    {
1787        newPayload = intel_set_message_phase(newPayload, i, intel_get_message_phase(handle_s, j));
1788    }
1789
1790    newPayload = intel_sub_group_payload_set_dual_ref_id(src_image, fwd_ref_image, bwd_ref_image, newPayload);
1791
1792    uint8 res = intel_vme_send_ime_new_uint8(newPayload, src_image, fwd_ref_image, bwd_ref_image, vme_accelerator, VME_STREAM_INOUT);
1793
1794    intel_sub_group_avc_ime_result_dual_reference_streamout_t result = __builtin_IB_vme_helper_get_as_avc_ime_result_dual_reference_streamout_t( res );
1795    return result;
1796}
1797
1798INLINE intel_sub_group_avc_ime_result_dual_reference_streamout_t OVERLOADABLE
1799intel_sub_group_avc_ime_evaluate_with_dual_reference_streaminout(
1800      read_only image2d_t src_image,
1801      read_only image2d_t fwd_ref_image,
1802      read_only image2d_t bwd_ref_image,
1803      sampler_t vme_accelerator,
1804      intel_sub_group_avc_ime_payload_t payload,
1805      intel_sub_group_avc_ime_dual_reference_streamin_t streamin )
1806{
1807    long src_image_id = (long)__builtin_astype(src_image, void*);
1808    long fwd_ref_image_id = (long)__builtin_astype(fwd_ref_image, void*);
1809    long bwd_ref_image_id = (long)__builtin_astype(bwd_ref_image, void*);
1810    long vme_accelerator_id = (long)__builtin_astype(vme_accelerator, void*);
1811    VMEImage_t src_image_vme = __builtin_spirv_OpVmeImageINTEL_i64_i64_i64(src_image_id, 0, vme_accelerator_id);
1812    VMEImage_t fwd_ref_image_vme = __builtin_spirv_OpVmeImageINTEL_i64_i64_i64(fwd_ref_image_id, 0, vme_accelerator_id);
1813    VMEImage_t bwd_ref_image_vme = __builtin_spirv_OpVmeImageINTEL_i64_i64_i64(bwd_ref_image_id, 0, vme_accelerator_id);
1814
1815    return __builtin_spirv_intel_sub_group_avc_ime_evaluate_with_dual_reference_streaminout_v3i64_v3i64_v3i64_intel_sub_group_avc_ime_payload_t_intel_sub_group_avc_ime_dual_reference_streamin_t(src_image_vme, fwd_ref_image_vme, bwd_ref_image_vme, payload, streamin);
1816}
1817
1818intel_sub_group_avc_ime_single_reference_streamin_t __builtin_spirv_intel_sub_group_avc_ime_get_single_reference_streamin_intel_sub_group_avc_ime_result_single_reference_streamout_t(
1819    intel_sub_group_avc_ime_result_single_reference_streamout_t result )
1820{
1821    uint handle = __builtin_IB_create_message_phases_no_init(2);
1822    uint8 handle_r = __builtin_IB_vme_helper_get_handle_avc_ime_result_single_reference_streamout_t(result);
1823    // Copy IME2 from result W7
1824    handle = intel_set_message_phase(handle, 0, intel_get_message_phase(handle_r, RETURN_MESSAGE_NUM_GRFS));
1825    // Copy IME3 from result W8
1826    handle = intel_set_message_phase(handle, 1, intel_get_message_phase(handle_r, RETURN_MESSAGE_NUM_GRFS + 1));
1827
1828    intel_sub_group_avc_ime_single_reference_streamin_t nresult = __builtin_IB_vme_helper_get_as_avc_ime_single_reference_streamin_t(handle);
1829    return nresult;
1830}
1831
1832INLINE intel_sub_group_avc_ime_single_reference_streamin_t OVERLOADABLE
1833intel_sub_group_avc_ime_get_single_reference_streamin(
1834   intel_sub_group_avc_ime_result_single_reference_streamout_t result )
1835{
1836    return __builtin_spirv_intel_sub_group_avc_ime_get_single_reference_streamin_intel_sub_group_avc_ime_result_single_reference_streamout_t(result);
1837}
1838
1839intel_sub_group_avc_ime_dual_reference_streamin_t __builtin_spirv_intel_sub_group_avc_ime_get_dual_reference_streamin_intel_sub_group_avc_ime_result_dual_reference_streamout_t(
1840    intel_sub_group_avc_ime_result_dual_reference_streamout_t result )
1841{
1842    uint2 handle  = __builtin_IB_create_message_phases_no_init_uint2(4);
1843    uint8 handle_r = __builtin_IB_vme_helper_get_handle_avc_ime_result_dual_reference_streamout_t(result);
1844    // Copy IME2~IME5 from result W7~W10
1845    for (int i = 0; i < 4; i++)
1846    {
1847        handle = intel_set_message_phase(handle, i, intel_get_message_phase(handle_r, RETURN_MESSAGE_NUM_GRFS+i));
1848    }
1849    intel_sub_group_avc_ime_dual_reference_streamin_t nresult = __builtin_IB_vme_helper_get_as_avc_ime_dual_reference_streamin_t(handle);
1850    return nresult;
1851}
1852
1853INLINE intel_sub_group_avc_ime_dual_reference_streamin_t OVERLOADABLE
1854intel_sub_group_avc_ime_get_dual_reference_streamin(
1855   intel_sub_group_avc_ime_result_dual_reference_streamout_t result )
1856{
1857    return __builtin_spirv_intel_sub_group_avc_ime_get_dual_reference_streamin_intel_sub_group_avc_ime_result_dual_reference_streamout_t(result);
1858}
1859
1860intel_sub_group_avc_ime_result_t __builtin_spirv_intel_sub_group_avc_ime_strip_single_reference_streamout_intel_sub_group_avc_ime_result_single_reference_streamout_t(
1861    intel_sub_group_avc_ime_result_single_reference_streamout_t result )
1862{
1863    uint4 res = __builtin_IB_create_message_phases_no_init_uint4(RETURN_MESSAGE_NUM_GRFS+1);
1864    uint8 handle_r = __builtin_IB_vme_helper_get_handle_avc_ime_result_single_reference_streamout_t(result);
1865
1866    for (int i = 0; i < RETURN_MESSAGE_NUM_GRFS; i++) {
1867        res = intel_set_message_phase(res, i, intel_get_message_phase(handle_r, i));
1868    }
1869
1870    intel_sub_group_avc_ime_result_t nresult = __builtin_IB_vme_helper_get_as_avc_ime_result_t(res);
1871    return nresult;
1872}
1873
1874INLINE intel_sub_group_avc_ime_result_t OVERLOADABLE
1875intel_sub_group_avc_ime_strip_single_reference_streamout(
1876    intel_sub_group_avc_ime_result_single_reference_streamout_t result )
1877{
1878    return __builtin_spirv_intel_sub_group_avc_ime_strip_single_reference_streamout_intel_sub_group_avc_ime_result_single_reference_streamout_t(result);
1879}
1880
1881intel_sub_group_avc_ime_result_t __builtin_spirv_intel_sub_group_avc_ime_strip_dual_reference_streamout_intel_sub_group_avc_ime_result_dual_reference_streamout_t(
1882    intel_sub_group_avc_ime_result_dual_reference_streamout_t result )
1883{
1884    uint4 res = __builtin_IB_create_message_phases_no_init_uint4(RETURN_MESSAGE_NUM_GRFS + 1);
1885    uint8 handle_r = __builtin_IB_vme_helper_get_handle_avc_ime_result_dual_reference_streamout_t(result);
1886
1887    for (int i = 0; i < RETURN_MESSAGE_NUM_GRFS; i++) {
1888        res = intel_set_message_phase(res, i, intel_get_message_phase(handle_r, i));
1889    }
1890
1891    intel_sub_group_avc_ime_result_t nresult = __builtin_IB_vme_helper_get_as_avc_ime_result_t(res);
1892    return nresult;
1893}
1894
1895INLINE intel_sub_group_avc_ime_result_t OVERLOADABLE
1896intel_sub_group_avc_ime_strip_dual_reference_streamout(
1897    intel_sub_group_avc_ime_result_dual_reference_streamout_t result )
1898{
1899    return __builtin_spirv_intel_sub_group_avc_ime_strip_dual_reference_streamout_intel_sub_group_avc_ime_result_dual_reference_streamout_t(result);
1900}
1901
1902/*****************************************************************************\
1903
1904Description:
1905    Get the MVs from the streamout results based on the input major shape and
1906    direction.
1907
1908\*****************************************************************************/
1909uint __builtin_spirv_intel_sub_group_avc_ime_get_streamout_major_shape_motion_vectors_intel_sub_group_avc_ime_result_dual_reference_streamout_t_i8_i8(
1910    intel_sub_group_avc_ime_result_dual_reference_streamout_t result,
1911    uchar shape,
1912    uchar direction )
1913{
1914    uint retValue = 0;
1915    // IME Streamout follows the same format as the IME Streamin message phases (IME2-IME5).
1916    const uint reg = (direction == CLK_AVC_ME_MAJOR_FORWARD_INTEL) ? /*fwd*/(RETURN_MESSAGE_NUM_GRFS+1) : /*bwd*/(RETURN_MESSAGE_NUM_GRFS+3);
1917    uint8 handle_r = __builtin_IB_vme_helper_get_handle_avc_ime_result_dual_reference_streamout_t(result);
1918
1919    if (shape == VME_MAJOR_16x16) {
1920        // WX+2.5 fwd
1921        // WX+4.5 bwd
1922        const uint reg = (direction == CLK_AVC_ME_MAJOR_FORWARD_INTEL) ? /*fwd*/(RETURN_MESSAGE_NUM_GRFS) : /*bwd*/(RETURN_MESSAGE_NUM_GRFS+2);
1923        retValue = intel_get_message_phase_dw(handle_r, reg, 5);
1924    }
1925    else if (shape == VME_MAJOR_16x8) {
1926        // WX+3.0 and WX+3.1 fwd
1927        // WX+5.0 and WX+5.1 bwd
1928        retValue = intel_broadcast_message_phase_dw(handle_r, reg, 0, 2);
1929    }
1930    else if (shape == VME_MAJOR_8x16) {
1931        // WX+3.2 and WX+3.3
1932        // WX+5.2 and WX+5.3
1933        retValue = intel_broadcast_message_phase_dw(handle_r, reg, 2, 2);
1934    }
1935    else if (shape == VME_MAJOR_8x8) {
1936        // WX+3.4 ~ WX+3.7
1937        // WX+5.4 ~ WX+5.7
1938        retValue = intel_broadcast_message_phase_dw(handle_r, reg, 4, 4);
1939    }
1940
1941    return retValue;
1942}
1943
1944INLINE uint OVERLOADABLE
1945intel_sub_group_avc_ime_get_streamout_major_shape_motion_vectors(
1946    intel_sub_group_avc_ime_result_dual_reference_streamout_t result,
1947    uchar shape,
1948    uchar direction )
1949{
1950    return __builtin_spirv_intel_sub_group_avc_ime_get_streamout_major_shape_motion_vectors_intel_sub_group_avc_ime_result_dual_reference_streamout_t_i8_i8(result, shape, direction);
1951}
1952
1953uint __builtin_spirv_intel_sub_group_avc_ime_get_streamout_major_shape_motion_vectors_intel_sub_group_avc_ime_result_single_reference_streamout_t_i8(
1954    intel_sub_group_avc_ime_result_single_reference_streamout_t result,
1955    uchar major_shape )
1956{
1957    uint8 handle = __builtin_IB_vme_helper_get_handle_avc_ime_result_single_reference_streamout_t(result);
1958    intel_sub_group_avc_ime_result_dual_reference_streamout_t nresult = __builtin_IB_vme_helper_get_as_avc_ime_result_dual_reference_streamout_t(handle);
1959    return intel_sub_group_avc_ime_get_streamout_major_shape_motion_vectors(nresult, major_shape, CLK_AVC_ME_MAJOR_FORWARD_INTEL);
1960}
1961
1962INLINE uint OVERLOADABLE
1963intel_sub_group_avc_ime_get_streamout_major_shape_motion_vectors(
1964    intel_sub_group_avc_ime_result_single_reference_streamout_t result,
1965    uchar major_shape )
1966{
1967    return __builtin_spirv_intel_sub_group_avc_ime_get_streamout_major_shape_motion_vectors_intel_sub_group_avc_ime_result_single_reference_streamout_t_i8(result, major_shape);
1968}
1969
1970/*****************************************************************************\
1971
1972Description:
1973    Get the distortions from the streamout results based on the input major shape and
1974    direction.
1975
1976\*****************************************************************************/
1977ushort __builtin_spirv_intel_sub_group_avc_ime_get_streamout_major_shape_distortions_intel_sub_group_avc_ime_result_dual_reference_streamout_t_i8_i8(
1978    intel_sub_group_avc_ime_result_dual_reference_streamout_t result,
1979    uchar shape,
1980    uchar direction )
1981{
1982    ushort retValue = 0;
1983    uint8 handle = __builtin_IB_vme_helper_get_handle_avc_ime_result_dual_reference_streamout_t(result);
1984    // IME Streamout follows the same format as the IME Streamin message phases (IME2-IME5).
1985    const uint reg = (direction == CLK_AVC_ME_MAJOR_FORWARD_INTEL) ?
1986        /*fwd*/ RETURN_MESSAGE_NUM_GRFS :
1987        /*bwd*/(RETURN_MESSAGE_NUM_GRFS+2);
1988
1989    if (shape == VME_MAJOR_16x16) {
1990        // WX+2.4 [15:0] - Rec0 Shape 16x16 Distortion
1991        // WX+4.4 [15:0] - Rec1 Shape 16x16 Distortion
1992        retValue = intel_get_message_phase_uw(handle, reg, 4*2);
1993    }
1994    else if (shape == VME_MAJOR_16x8) {
1995        // WX+2.0 [15:00] - Rec0 Shape 16x8_0 Distortion
1996        // WX+4.0 [15:00] - Rec1 Shape 16x8_0 Distortion
1997        retValue = intel_broadcast_message_phase_uw(handle, reg, 0, 2);
1998    }
1999    else if (shape == VME_MAJOR_8x16) {
2000        // WX+2.1 [15:00] - Rec0 Shape 8x16_0 Distortion
2001        // WX+4.1 [15:00] - Rec1 Shape 8x16_0 Distortion
2002        retValue = intel_broadcast_message_phase_uw(handle, reg, 1*2, 2);
2003    }
2004    else if (shape == VME_MAJOR_8x8) {
2005        // WX+2.2 [15:00] - Rec0 Shape 8x8_0 Distortion
2006        // WX+4.2 [15:00] - Rec1 Shape 8x8_0 Distortion
2007        retValue = intel_broadcast_message_phase_uw(handle, reg, 2*2, 4);
2008    }
2009
2010    return retValue;
2011}
2012
2013INLINE ushort OVERLOADABLE
2014intel_sub_group_avc_ime_get_streamout_major_shape_distortions(
2015    intel_sub_group_avc_ime_result_dual_reference_streamout_t result,
2016    uchar shape,
2017    uchar direction )
2018{
2019    return __builtin_spirv_intel_sub_group_avc_ime_get_streamout_major_shape_distortions_intel_sub_group_avc_ime_result_dual_reference_streamout_t_i8_i8(result, shape, direction);
2020}
2021
2022ushort __builtin_spirv_intel_sub_group_avc_ime_get_streamout_major_shape_distortions_intel_sub_group_avc_ime_result_single_reference_streamout_t_i8(
2023    intel_sub_group_avc_ime_result_single_reference_streamout_t result,
2024    uchar major_shape )
2025{
2026    uint8 handle = __builtin_IB_vme_helper_get_handle_avc_ime_result_single_reference_streamout_t(result);
2027    intel_sub_group_avc_ime_result_dual_reference_streamout_t nresult = __builtin_IB_vme_helper_get_as_avc_ime_result_dual_reference_streamout_t(handle);
2028    return intel_sub_group_avc_ime_get_streamout_major_shape_distortions(nresult, major_shape, 0);
2029}
2030
2031INLINE ushort OVERLOADABLE
2032intel_sub_group_avc_ime_get_streamout_major_shape_distortions(
2033    intel_sub_group_avc_ime_result_single_reference_streamout_t result,
2034    uchar major_shape )
2035{
2036    return __builtin_spirv_intel_sub_group_avc_ime_get_streamout_major_shape_distortions_intel_sub_group_avc_ime_result_single_reference_streamout_t_i8(result, major_shape);
2037}
2038
2039intel_sub_group_avc_mce_result_t __builtin_spirv_intel_sub_group_avc_ime_convert_to_mce_result_intel_sub_group_avc_ime_result_t(
2040    intel_sub_group_avc_ime_result_t result )
2041{
2042    uint4 handle = __builtin_IB_vme_helper_get_handle_avc_ime_result_t(result);
2043    return __builtin_IB_vme_helper_get_as_avc_mce_result_t(handle);
2044}
2045
2046INLINE intel_sub_group_avc_mce_result_t OVERLOADABLE
2047intel_sub_group_avc_ime_convert_to_mce_result(
2048      intel_sub_group_avc_ime_result_t  result )
2049{
2050    return __builtin_spirv_intel_sub_group_avc_ime_convert_to_mce_result_intel_sub_group_avc_ime_result_t(result);
2051}
2052
2053intel_sub_group_avc_ime_result_t __builtin_spirv_intel_sub_group_avc_mce_convert_to_ime_result_intel_sub_group_avc_mce_result_t(
2054    intel_sub_group_avc_mce_result_t result )
2055{
2056    uint4 handle = __builtin_IB_vme_helper_get_handle_avc_mce_result_t(result);
2057    return __builtin_IB_vme_helper_get_as_avc_ime_result_t(handle);
2058}
2059
2060INLINE intel_sub_group_avc_ime_result_t OVERLOADABLE
2061intel_sub_group_avc_mce_convert_to_ime_result(
2062      intel_sub_group_avc_mce_result_t  result )
2063{
2064    return __builtin_spirv_intel_sub_group_avc_mce_convert_to_ime_result_intel_sub_group_avc_mce_result_t(result);
2065}
2066
2067/*****************************************************************************\
2068
2069Description:
2070    - set UniMixDis M1.1[28:28]
2071
2072\*****************************************************************************/
2073intel_sub_group_avc_ime_payload_t __builtin_spirv_intel_sub_group_avc_ime_set_unidirectional_mix_disable_intel_sub_group_avc_ime_payload_t(
2074    intel_sub_group_avc_ime_payload_t payload )
2075{
2076    uint4 handle = __builtin_IB_vme_helper_get_handle_avc_ime_payload_t(payload);
2077    const uint UniMixDis = intel_get_message_phase_dw(handle, 1, 1) | (1<<28);
2078    handle = intel_set_message_phase_dw(handle, 1, 1, UniMixDis);
2079
2080    intel_sub_group_avc_ime_payload_t result = __builtin_IB_vme_helper_get_as_avc_ime_payload_t(handle);
2081    return result;
2082}
2083
2084INLINE intel_sub_group_avc_ime_payload_t OVERLOADABLE
2085intel_sub_group_avc_ime_set_unidirectional_mix_disable(
2086    intel_sub_group_avc_ime_payload_t payload )
2087{
2088    return __builtin_spirv_intel_sub_group_avc_ime_set_unidirectional_mix_disable_intel_sub_group_avc_ime_payload_t(payload);
2089}
2090
2091/*****************************************************************************\
2092
2093Description:
2094    - set EarlyIMESuccessEn (M1.0 :5)
2095    - set EarlyIMESTop (M1.0 : 31:24)
2096
2097\*****************************************************************************/
2098intel_sub_group_avc_ime_payload_t __builtin_spirv_intel_sub_group_avc_ime_set_early_search_termination_threshold_i8_intel_sub_group_avc_ime_payload_t(
2099    uchar threshold,
2100    intel_sub_group_avc_ime_payload_t payload )
2101{
2102    uint4 handle = __builtin_IB_vme_helper_get_handle_avc_ime_payload_t(payload);
2103    uchar newval = intel_get_message_phase_ub(handle, 1, 0*4) | (1 << 5);
2104    handle       = intel_set_message_phase_ub(handle, 1, 0*4, newval);
2105    handle       = intel_set_message_phase_ub(handle, 1, 0*4+3, threshold);
2106
2107    return __builtin_IB_vme_helper_get_as_avc_ime_payload_t(handle);
2108}
2109
2110INLINE intel_sub_group_avc_ime_payload_t OVERLOADABLE
2111intel_sub_group_avc_ime_set_early_search_termination_threshold(
2112    uchar threshold,
2113    intel_sub_group_avc_ime_payload_t payload )
2114{
2115    return __builtin_spirv_intel_sub_group_avc_ime_set_early_search_termination_threshold_i8_intel_sub_group_avc_ime_payload_t(threshold, payload);
2116}
2117
2118// Note: This function is not present in the VME specification, but our tests use it.
2119// TODO: a ticket to validation team has been submitted to remove the calls in tests. Remove this function when it is fixed.
2120// As Clang's spir-v generator doesn't recognize this function, it passses the call "as is".
2121// VME types are built-in in Clang 5.0, so the mangling changed from 4.0 version. Below mangled definiton is to workaround the mangling change for this function.
2122INLINE intel_sub_group_avc_ime_payload_t
2123_Z77intel_sub_group_avc_ime_set_early_unidirectional_search_termination_thresholdh37ocl_intel_sub_group_avc_ime_payload_t(
2124    uchar threshold,
2125    intel_sub_group_avc_ime_payload_t payload )
2126{
2127    return __builtin_spirv_intel_sub_group_avc_ime_set_early_search_termination_threshold_i8_intel_sub_group_avc_ime_payload_t(threshold, payload);
2128}
2129
2130
2131intel_sub_group_avc_ime_payload_t __builtin_spirv_intel_sub_group_avc_ime_set_early_unidirectional_search_termination_threshold_i8_intel_sub_group_avc_ime_payload_t(
2132    uchar threshold,
2133    intel_sub_group_avc_ime_payload_t payload )
2134{
2135    return intel_sub_group_avc_ime_set_early_search_termination_threshold(threshold, payload);
2136}
2137
2138/*****************************************************************************\
2139
2140Description:
2141    Get the early termination indication (W0.6 :24) from IME result payload.
2142
2143\*****************************************************************************/
2144uchar __builtin_spirv_intel_sub_group_avc_ime_get_unidirectional_early_search_termination_intel_sub_group_avc_ime_result_t(
2145    intel_sub_group_avc_ime_result_t result )
2146{
2147    uint4 handle = __builtin_IB_vme_helper_get_handle_avc_ime_result_t(result);
2148    return intel_get_message_phase_ub(handle, 0, 6*4+3) & 0x1;
2149}
2150
2151INLINE uchar OVERLOADABLE
2152intel_sub_group_avc_ime_get_unidirectional_early_search_termination(
2153    intel_sub_group_avc_ime_result_t  result )
2154{
2155    return __builtin_spirv_intel_sub_group_avc_ime_get_unidirectional_early_search_termination_intel_sub_group_avc_ime_result_t(result);
2156}
2157
2158/*****************************************************************************\
2159
2160Description:
2161    Get the truncated search indication (W0.6 :25) from IME result payload.
2162
2163\*****************************************************************************/
2164uchar __builtin_spirv_intel_sub_group_avc_ime_get_truncated_search_indication_intel_sub_group_avc_ime_result_t(
2165    intel_sub_group_avc_ime_result_t result )
2166{
2167    uint4 handle = __builtin_IB_vme_helper_get_handle_avc_ime_result_t(result);
2168    return (intel_get_message_phase_ub(handle, 0, 6*4+3) & (0x1 << 1)) >> 1;
2169}
2170
2171INLINE uchar OVERLOADABLE
2172intel_sub_group_avc_ime_get_truncated_search_indication(
2173    intel_sub_group_avc_ime_result_t result )
2174{
2175    return __builtin_spirv_intel_sub_group_avc_ime_get_truncated_search_indication_intel_sub_group_avc_ime_result_t(result);
2176}
2177
2178/*****************************************************************************\
2179
2180Description:
2181    - set WeightedSADHAAR M1.7 [20:20]
2182    - set Weighted SAD Control M1.3[31:0]
2183
2184\*****************************************************************************/
2185intel_sub_group_avc_ime_payload_t __builtin_spirv_intel_sub_group_avc_ime_set_weighted_sad_i32_intel_sub_group_avc_ime_payload_t(
2186    uint packed_sad_weights,
2187    intel_sub_group_avc_ime_payload_t payload )
2188{
2189    uint4 handle = __builtin_IB_vme_helper_get_handle_avc_ime_payload_t(payload);
2190    // Set WeightedSADHAAR M1.7[20:20]
2191    const uint WeightedSADHAAR = intel_get_message_phase_dw(handle, 1, 7) | (1<<20);
2192    handle = intel_set_message_phase_dw(handle, 1, 7, WeightedSADHAAR);
2193
2194    // Set Weighted SAD Control M1.3[31:0]
2195    handle = intel_set_message_phase_dw(handle, 1, 3, packed_sad_weights);
2196
2197    intel_sub_group_avc_ime_payload_t result = __builtin_IB_vme_helper_get_as_avc_ime_payload_t(handle);
2198    return result;
2199}
2200
2201INLINE intel_sub_group_avc_ime_payload_t OVERLOADABLE
2202intel_sub_group_avc_ime_set_weighted_sad(
2203    uint packed_sad_weights,
2204    intel_sub_group_avc_ime_payload_t payload )
2205{
2206    return __builtin_spirv_intel_sub_group_avc_ime_set_weighted_sad_i32_intel_sub_group_avc_ime_payload_t(packed_sad_weights, payload);
2207}
2208
2209/*****************************************************************************\
2210
2211Description:
2212    Get the motion vector 0 W1.0 [31:0] from IME result payload.
2213
2214\*****************************************************************************/
2215uint __builtin_spirv_intel_sub_group_avc_ime_get_weighting_pattern_minimum_motion_vector_intel_sub_group_avc_ime_result_t(
2216    intel_sub_group_avc_ime_result_t result )
2217{
2218    uint4 handle = __builtin_IB_vme_helper_get_handle_avc_ime_result_t(result);
2219    return intel_get_message_phase_dw(handle, 1, 0);
2220}
2221
2222INLINE uint OVERLOADABLE
2223intel_sub_group_avc_ime_get_weighting_pattern_minimum_motion_vector(
2224    intel_sub_group_avc_ime_result_t  result )
2225{
2226    return __builtin_spirv_intel_sub_group_avc_ime_get_weighting_pattern_minimum_motion_vector_intel_sub_group_avc_ime_result_t(result);
2227}
2228
2229/*****************************************************************************\
2230
2231Description:
2232    Get the inter border reached W5.0[15:0] from IME result payload.
2233
2234\*****************************************************************************/
2235ushort __builtin_spirv_intel_sub_group_avc_ime_get_weighting_pattern_minimum_distortion_intel_sub_group_avc_ime_result_t(
2236    intel_sub_group_avc_ime_result_t result )
2237{
2238    uint4 handle = __builtin_IB_vme_helper_get_handle_avc_ime_result_t(result);
2239    return intel_get_message_phase_uw(handle, 5, 0);
2240}
2241
2242INLINE ushort OVERLOADABLE
2243intel_sub_group_avc_ime_get_weighting_pattern_minimum_distortion(
2244    intel_sub_group_avc_ime_result_t  result )
2245{
2246    return __builtin_spirv_intel_sub_group_avc_ime_get_weighting_pattern_minimum_distortion_intel_sub_group_avc_ime_result_t(result);
2247}
2248
2249/*****************************************************************************\
2250
2251Description:
2252    Get the inter border reached W0.1 from IME result payload.
2253
2254\*****************************************************************************/
2255uchar __builtin_spirv_intel_sub_group_avc_ime_get_border_reached_i8_intel_sub_group_avc_ime_result_t(
2256    uchar frame_select,
2257    intel_sub_group_avc_ime_result_t result )
2258{
2259    uint4 handle = __builtin_IB_vme_helper_get_handle_avc_ime_result_t(result);
2260    uchar boarder_reached = intel_get_message_phase_ub(handle, 0, 1*4);
2261
2262    // Ref0 select - W0.1[3:0]
2263    if (frame_select == CLK_AVC_ME_FRAME_FORWARD_INTEL) {
2264        boarder_reached &= 0x0F;
2265    }
2266    // Ref1 frame - W0.1[7:4]
2267    else if (frame_select == CLK_AVC_ME_FRAME_BACKWARD_INTEL) {
2268        boarder_reached >>= 4;
2269    }
2270    else {
2271        boarder_reached = 0;
2272    }
2273
2274    return boarder_reached;
2275}
2276
2277INLINE uchar OVERLOADABLE
2278intel_sub_group_avc_ime_get_border_reached(
2279    uchar frame_select,
2280    intel_sub_group_avc_ime_result_t  result )
2281{
2282    return __builtin_spirv_intel_sub_group_avc_ime_get_border_reached_i8_intel_sub_group_avc_ime_result_t(frame_select, result);
2283}
2284
2285// ... REF functions ...
2286
2287/*****************************************************************************\
2288
2289    Initialize BME payload:
2290        - set SrcX (M0.2 :15:0) and SrcY (M0.2 :31:16)
2291        - set SubBlockMV (M4 - M7)
2292        - set MbMode (M2.5 :1:0)
2293        - set SubMbShape (M2.5 :15:8)
2294        - set SubPredMode (M2.5 :23:16)
2295        - set SubPelMode (M0.3 :13:12)
2296        - set InterSAD (M0.3 :21:20)
2297        - set BMEDisableFBR (M0.3 :18) for FME
2298        - set BiWeight (M1.1 :21:16) for BME
2299
2300\*****************************************************************************/
2301intel_sub_group_avc_ref_payload_t __builtin_spirv_avc_me_initialize_v2i16_i64_i8_i8_i8_i8_i8_i8_bool(
2302    ushort2 src_coord,
2303    ulong motion_vectors,
2304    uchar major_shape,
2305    uchar minor_shapes,
2306    uchar directions,
2307    uchar pixel_resolution,
2308    uchar bidirectional_weight,
2309    uchar sad_adjustment,
2310    bool is_bme )
2311{
2312    uint4 payload = __builtin_IB_create_message_phases_uint4(UNIVERSAL_INPUT_MESSAGE_NUM_GRFS+INPUT_MESSAGE_SIC_NUM_GRFS);
2313
2314    // Set SrcX M0.2[15:0] and SrcY M0.2[31:16].
2315    payload = intel_set_message_phase_dw(payload, 0, 2, as_uint(src_coord));
2316
2317    // set SubBlockMV (M4 - M7)
2318    payload = intel_simd_set_message_phase_uq(payload, 4, 4, 0, 4, motion_vectors);
2319
2320    // Set MbMode M2.5[1:0].
2321    payload = intel_set_message_phase_ub(payload, 2, 5*4, major_shape);
2322
2323    // Set SubMbShape M2.5[15:8].
2324    payload = intel_set_message_phase_ub(payload, 2, 5*4 + 1, minor_shapes);
2325
2326    // Set SubPredMode M2.5[23:16].
2327    payload = intel_set_message_phase_ub(payload, 2, 5*4 + 2, directions);
2328
2329    // Set SubPelMode M0.3[13:12], InterSAD M0.3[21:20]
2330    const uint imm = (sad_adjustment << 20) | (pixel_resolution << 12);
2331    payload = intel_set_message_phase_dw(payload, 0, 3, imm);
2332
2333    // Set BiWeight M1.1[21:16] for BME
2334    if (is_bme) {
2335        payload = intel_set_message_phase_ub(payload, 1, 1*4+2, bidirectional_weight);
2336    }
2337
2338    // Set BMEDisableFBR M0.3[18:18] for FME.
2339    if (!is_bme) {
2340        const uchar BMEDisableFBR = intel_get_message_phase_ub(payload, 0, 3*4+2) | (1<<2);
2341        payload = intel_set_message_phase_ub(payload, 0, 3*4+2, BMEDisableFBR);
2342    }
2343
2344    intel_sub_group_avc_ref_payload_t result = __builtin_IB_vme_helper_get_as_avc_ref_payload_t(payload);
2345    return result;
2346}
2347
2348INLINE intel_sub_group_avc_ref_payload_t OVERLOADABLE
2349avc_me_initialize(
2350    ushort2 src_coord,
2351    ulong motion_vectors,
2352    uchar major_shape,
2353    uchar minor_shapes,
2354    uchar directions,
2355    uchar pixel_resolution,
2356    uchar bidirectional_weight,
2357    uchar sad_adjustment,
2358    bool  is_bme )
2359{
2360    return __builtin_spirv_avc_me_initialize_v2i16_i64_i8_i8_i8_i8_i8_i8_bool(src_coord, motion_vectors, major_shape, minor_shapes, directions, pixel_resolution, bidirectional_weight, sad_adjustment, is_bme);
2361}
2362
2363intel_sub_group_avc_ref_payload_t __builtin_spirv_intel_sub_group_avc_bme_initialize_v2i16_i64_i8_i8_i8_i8_i8_i8(
2364    ushort2 src_coord,
2365    ulong motion_vectors,
2366    uchar major_shape,
2367    uchar minor_shapes,
2368    uchar directions,
2369    uchar pixel_resolution,
2370    uchar bidirectional_weight,
2371    uchar sad_adjustment )
2372{
2373    return avc_me_initialize(src_coord, motion_vectors, major_shape,
2374                             minor_shapes, directions, pixel_resolution,
2375                             bidirectional_weight, sad_adjustment, true);
2376}
2377
2378INLINE intel_sub_group_avc_ref_payload_t OVERLOADABLE
2379intel_sub_group_avc_bme_initialize(
2380    ushort2 src_coord,
2381    ulong motion_vectors,
2382    uchar major_shape,
2383    uchar minor_shapes,
2384    uchar directions,
2385    uchar pixel_resolution,
2386    uchar bidirectional_weight,
2387    uchar sad_adjustment )
2388{
2389    return __builtin_spirv_intel_sub_group_avc_bme_initialize_v2i16_i64_i8_i8_i8_i8_i8_i8(src_coord, motion_vectors, major_shape, minor_shapes, directions, pixel_resolution, bidirectional_weight, sad_adjustment);
2390}
2391
2392intel_sub_group_avc_ref_payload_t __builtin_spirv_intel_sub_group_avc_fme_initialize_v2i16_i64_i8_i8_i8_i8_i8(
2393    ushort2 src_coord,
2394    ulong motion_vectors,
2395    uchar major_shape,
2396    uchar minor_shapes,
2397    uchar directions,
2398    uchar pixel_resolution,
2399    uchar sad_adjustment )
2400{
2401    return avc_me_initialize(src_coord, motion_vectors, major_shape,
2402                             minor_shapes, directions, pixel_resolution,
2403                             0, sad_adjustment, false);
2404}
2405
2406INLINE intel_sub_group_avc_ref_payload_t OVERLOADABLE
2407intel_sub_group_avc_fme_initialize(
2408    ushort2 src_coord,
2409    ulong motion_vectors,
2410    uchar major_shape,
2411    uchar minor_shapes,
2412    uchar directions,
2413    uchar pixel_resolution,
2414    uchar sad_adjustment )
2415{
2416    return __builtin_spirv_intel_sub_group_avc_fme_initialize_v2i16_i64_i8_i8_i8_i8_i8(src_coord, motion_vectors, major_shape, minor_shapes, directions, pixel_resolution, sad_adjustment);
2417}
2418
2419intel_sub_group_avc_mce_payload_t __builtin_spirv_intel_sub_group_avc_ref_convert_to_mce_payload_intel_sub_group_avc_ref_payload_t(
2420    intel_sub_group_avc_ref_payload_t payload )
2421{
2422    uint4 handle = __builtin_IB_vme_helper_get_handle_avc_ref_payload_t(payload);
2423    return __builtin_IB_vme_helper_get_as_avc_mce_payload_t(handle);
2424}
2425
2426INLINE intel_sub_group_avc_mce_payload_t OVERLOADABLE
2427intel_sub_group_avc_ref_convert_to_mce_payload(
2428      intel_sub_group_avc_ref_payload_t payload )
2429{
2430    return __builtin_spirv_intel_sub_group_avc_ref_convert_to_mce_payload_intel_sub_group_avc_ref_payload_t(payload);
2431}
2432
2433intel_sub_group_avc_ref_payload_t __builtin_spirv_intel_sub_group_avc_mce_convert_to_ref_payload_intel_sub_group_avc_mce_payload_t(
2434    intel_sub_group_avc_mce_payload_t payload )
2435{
2436    uint4 handle = __builtin_IB_vme_helper_get_handle_avc_mce_payload_t(payload);
2437    return __builtin_IB_vme_helper_get_as_avc_ref_payload_t(handle);
2438}
2439
2440INLINE intel_sub_group_avc_ref_payload_t OVERLOADABLE
2441intel_sub_group_avc_mce_convert_to_ref_payload(
2442      intel_sub_group_avc_mce_payload_t payload )
2443{
2444    return __builtin_spirv_intel_sub_group_avc_mce_convert_to_ref_payload_intel_sub_group_avc_mce_payload_t(payload);
2445}
2446
2447/*****************************************************************************\
2448
2449Description:
2450    - set BiMixDis M1.0[2:2]
2451
2452\*****************************************************************************/
2453intel_sub_group_avc_ref_payload_t __builtin_spirv_intel_sub_group_avc_ref_set_bidirectional_mix_disable_intel_sub_group_avc_ref_payload_t(
2454    intel_sub_group_avc_ref_payload_t payload )
2455{
2456    uint4 handle = __builtin_IB_vme_helper_get_handle_avc_ref_payload_t(payload);
2457    const uint BiMixDis = intel_get_message_phase_dw(handle, 1, 0) | (1<<2);
2458    handle = intel_set_message_phase_dw(handle, 1, 0, BiMixDis);
2459
2460    intel_sub_group_avc_ref_payload_t result = __builtin_IB_vme_helper_get_as_avc_ref_payload_t( handle );
2461    return result;
2462}
2463
2464INLINE intel_sub_group_avc_ref_payload_t OVERLOADABLE
2465intel_sub_group_avc_ref_set_bidirectional_mix_disable(
2466    intel_sub_group_avc_ref_payload_t payload )
2467{
2468    return __builtin_spirv_intel_sub_group_avc_ref_set_bidirectional_mix_disable_intel_sub_group_avc_ref_payload_t(payload);
2469}
2470
2471/*****************************************************************************\
2472
2473Description:
2474    - set BilinearEnable M1.7[18:18]
2475
2476\*****************************************************************************/
2477intel_sub_group_avc_ref_payload_t __builtin_spirv_intel_sub_group_avc_ref_set_bilinear_filter_enable_intel_sub_group_avc_ref_payload_t(
2478    intel_sub_group_avc_ref_payload_t payload )
2479{
2480    uint4 handle = __builtin_IB_vme_helper_get_handle_avc_ref_payload_t(payload);
2481    const uint BilinearEnable = intel_get_message_phase_dw(handle, 1, 7) | (1<<18);
2482    handle = intel_set_message_phase_dw(handle, 1, 7, BilinearEnable);
2483
2484    intel_sub_group_avc_ref_payload_t result = __builtin_IB_vme_helper_get_as_avc_ref_payload_t(handle);
2485    return result;
2486}
2487
2488INLINE intel_sub_group_avc_ref_payload_t OVERLOADABLE
2489intel_sub_group_avc_ref_set_bilinear_filter_enable(
2490      intel_sub_group_avc_ref_payload_t payload )
2491{
2492    return __builtin_spirv_intel_sub_group_avc_ref_set_bilinear_filter_enable_intel_sub_group_avc_ref_payload_t(payload);
2493}
2494
2495/*****************************************************************************\
2496
2497    Evalulate a single reference REF operation.
2498
2499    (W0 W1 W2 W3 W4 W5 W6 W7) = (M0 M1 M2 M3 M4 M5) REF_EVALUATE_MULTI_REF ( (M0 M1) (M2 M3) (M4 M5) (M6 M7) )
2500
2501\*****************************************************************************/
2502intel_sub_group_avc_ref_result_t __builtin_spirv_intel_sub_group_avc_ref_evaluate_with_dual_reference_v3i64_v3i64_v3i64_intel_sub_group_avc_ref_payload_t(
2503    VMEImage_t src_image_vme,
2504    VMEImage_t fwd_ref_image_vme,
2505    VMEImage_t bwd_ref_image_vme,
2506    intel_sub_group_avc_ref_payload_t payload )
2507{
2508    long src_image = getVMEImage(src_image_vme);
2509    long fwd_ref_image = getVMEImage(fwd_ref_image_vme);
2510    long bwd_ref_image = getVMEImage(bwd_ref_image_vme);
2511    long vme_accelerator = getVMESampler(src_image_vme);
2512
2513    uint4 handle = __builtin_IB_vme_helper_get_handle_avc_ref_payload_t(payload);
2514
2515    handle = intel_sub_group_payload_set_dual_ref_id(src_image, fwd_ref_image, bwd_ref_image, handle);
2516    uint4 res = __builtin_IB_vme_send_fbr_new(handle, src_image, fwd_ref_image, bwd_ref_image, vme_accelerator);
2517
2518    intel_sub_group_avc_ref_result_t result = __builtin_IB_vme_helper_get_as_avc_ref_result_t(res);
2519    return result;
2520}
2521
2522INLINE intel_sub_group_avc_ref_result_t OVERLOADABLE
2523intel_sub_group_avc_ref_evaluate_with_dual_reference(
2524      read_only image2d_t src_image,
2525      read_only image2d_t fwd_ref_image,
2526      read_only image2d_t bwd_ref_image,
2527      sampler_t vme_accelerator,
2528      intel_sub_group_avc_ref_payload_t payload )
2529{
2530    long src_image_id = (long)__builtin_astype(src_image, void*);
2531    long fwd_ref_image_id = (long)__builtin_astype(fwd_ref_image, void*);
2532    long bwd_ref_image_id = (long)__builtin_astype(bwd_ref_image, void*);
2533    long vme_accelerator_id = (long)__builtin_astype(vme_accelerator, void*);
2534    VMEImage_t src_image_vme = __builtin_spirv_OpVmeImageINTEL_i64_i64_i64(src_image_id, 0, vme_accelerator_id);
2535    VMEImage_t fwd_ref_image_vme = __builtin_spirv_OpVmeImageINTEL_i64_i64_i64(fwd_ref_image_id, 0, vme_accelerator_id);
2536    VMEImage_t bwd_ref_image_vme = __builtin_spirv_OpVmeImageINTEL_i64_i64_i64(bwd_ref_image_id, 0, vme_accelerator_id);
2537
2538    return __builtin_spirv_intel_sub_group_avc_ref_evaluate_with_dual_reference_v3i64_v3i64_v3i64_intel_sub_group_avc_ref_payload_t(src_image_vme, fwd_ref_image_vme, bwd_ref_image_vme, payload);
2539}
2540
2541intel_sub_group_avc_ref_result_t __builtin_spirv_intel_sub_group_avc_ref_evaluate_with_single_reference_v3i64_v3i64_intel_sub_group_avc_ref_payload_t(
2542    VMEImage_t src_image_vme,
2543    VMEImage_t ref_image_vme,
2544    intel_sub_group_avc_ref_payload_t payload )
2545{
2546    long src_image = getVMEImage(src_image_vme);
2547    long ref_image = getVMEImage(ref_image_vme);
2548    long vme_accelerator = getVMESampler(src_image_vme);
2549
2550    uint4 handle = __builtin_IB_vme_helper_get_handle_avc_ref_payload_t(payload);
2551
2552    handle = intel_sub_group_payload_set_single_ref_id(src_image, ref_image, handle);
2553    uint4 res = __builtin_IB_vme_send_fbr_new(handle, src_image, ref_image, ref_image, vme_accelerator);
2554
2555    intel_sub_group_avc_ref_result_t result = __builtin_IB_vme_helper_get_as_avc_ref_result_t(res);
2556    return result;
2557}
2558
2559INLINE intel_sub_group_avc_ref_result_t OVERLOADABLE
2560intel_sub_group_avc_ref_evaluate_with_single_reference(
2561      read_only image2d_t src_image,
2562      read_only image2d_t ref_image,
2563      sampler_t vme_accelerator,
2564      intel_sub_group_avc_ref_payload_t payload )
2565{
2566    long src_image_id = (long)__builtin_astype(src_image, void*);
2567    long ref_image_id = (long)__builtin_astype(ref_image, void*);
2568    long vme_accelerator_id = (long)__builtin_astype(vme_accelerator, void*);
2569    VMEImage_t src_image_vme = __builtin_spirv_OpVmeImageINTEL_i64_i64_i64(src_image_id, 0, vme_accelerator_id);
2570    VMEImage_t ref_image_vme = __builtin_spirv_OpVmeImageINTEL_i64_i64_i64(ref_image_id, 0, vme_accelerator_id);
2571
2572    return __builtin_spirv_intel_sub_group_avc_ref_evaluate_with_single_reference_v3i64_v3i64_intel_sub_group_avc_ref_payload_t(src_image_vme, ref_image_vme, payload);
2573}
2574
2575intel_sub_group_avc_mce_result_t __builtin_spirv_intel_sub_group_avc_ref_convert_to_mce_result_intel_sub_group_avc_ref_result_t(
2576    intel_sub_group_avc_ref_result_t result )
2577{
2578    uint4 handle = __builtin_IB_vme_helper_get_handle_avc_ref_result_t(result);
2579    return __builtin_IB_vme_helper_get_as_avc_mce_result_t(handle);
2580}
2581
2582INLINE intel_sub_group_avc_mce_result_t OVERLOADABLE
2583intel_sub_group_avc_ref_convert_to_mce_result(
2584      intel_sub_group_avc_ref_result_t result )
2585{
2586    return __builtin_spirv_intel_sub_group_avc_ref_convert_to_mce_result_intel_sub_group_avc_ref_result_t(result);
2587}
2588
2589intel_sub_group_avc_ref_result_t __builtin_spirv_intel_sub_group_avc_mce_convert_to_ref_result_intel_sub_group_avc_mce_result_t(
2590    intel_sub_group_avc_mce_result_t result )
2591{
2592    uint4 handle = __builtin_IB_vme_helper_get_handle_avc_mce_result_t(result);
2593    return __builtin_IB_vme_helper_get_as_avc_ref_result_t(handle);
2594}
2595
2596INLINE intel_sub_group_avc_ref_result_t OVERLOADABLE
2597intel_sub_group_avc_mce_convert_to_ref_result(
2598      intel_sub_group_avc_mce_result_t result )
2599{
2600    return __builtin_spirv_intel_sub_group_avc_mce_convert_to_ref_result_intel_sub_group_avc_mce_result_t(result);
2601}
2602
2603// ... SIC functions ...
2604
2605/*****************************************************************************\
2606
2607Description:
2608    Initialize SIC payload M0:
2609        - set SrcX M0.2[15:0] and SrcY M0.2[31:16]
2610
2611\*****************************************************************************/
2612intel_sub_group_avc_sic_payload_t __builtin_spirv_intel_sub_group_avc_sic_initialize_v2i16(
2613    ushort2 src_coord )
2614{
2615    // Create and initialize SIC payload
2616    uint4 payload = __builtin_IB_create_message_phases_uint4(UNIVERSAL_INPUT_MESSAGE_NUM_GRFS+INPUT_MESSAGE_SIC_NUM_GRFS);
2617
2618    // set SrcX M0.2[15:0] and SrcY M0.2[31:16]
2619    payload = intel_set_message_phase_dw(payload, 0, 2, as_uint(src_coord));
2620
2621    intel_sub_group_avc_sic_payload_t npayload = __builtin_IB_vme_helper_get_as_avc_sic_payload_t(payload);
2622    return npayload;
2623}
2624
2625INLINE intel_sub_group_avc_sic_payload_t OVERLOADABLE
2626intel_sub_group_avc_sic_initialize(
2627    ushort2 src_coord )
2628{
2629    return __builtin_spirv_intel_sub_group_avc_sic_initialize_v2i16(src_coord);
2630}
2631
2632uint __builtin_spirv_intel_sub_group_avc_sic_get_motion_vector_mask_i32_i8(
2633    uint skip_block_partition_type,
2634    uchar direction )
2635{
2636    uint mask =
2637        ( skip_block_partition_type == CLK_AVC_ME_SKIP_BLOCK_PARTITION_16x16_INTEL ) ?
2638            direction + 1 :
2639            direction + 0x55;
2640    mask = ( mask << 24 );
2641    return mask;
2642}
2643
2644INLINE uint OVERLOADABLE
2645intel_sub_group_avc_sic_get_motion_vector_mask(
2646  uint skip_block_partition_type,
2647  uchar direction )
2648{
2649    return __builtin_spirv_intel_sub_group_avc_sic_get_motion_vector_mask_i32_i8(skip_block_partition_type, direction);
2650}
2651
2652/*****************************************************************************\
2653
2654Description:
2655    Configure SIC payload :
2656        - set Skip Mode Type (M0.3 :14)
2657        - set SkipModeEn (M1.0 :0)
2658        - set SkipCenterMask (M1.7 :31:24)
2659        - set BiWeight (M1.1 :21:16)
2660        - set InterSAD (M0.3 :21:20)
2661        - set SkipCenter Delta XY (M4.0)
2662        - set IntraComputeType (M5.1 :9:8)
2663
2664\*****************************************************************************/
2665intel_sub_group_avc_sic_payload_t __builtin_spirv_intel_sub_group_avc_sic_configure_skc_i32_i32_i64_i8_i8_intel_sub_group_avc_sic_payload_t(
2666    uint skip_block_partition_type,
2667    uint skip_motion_vector_mask,
2668    ulong motion_vectors,
2669    uchar bidirectional_weight,
2670    uchar skip_sad_adjustment,
2671    intel_sub_group_avc_sic_payload_t payload )
2672{
2673    uint4 handle = __builtin_IB_vme_helper_get_handle_avc_sic_payload_t(payload);
2674
2675    // Set Skip Mode Type M0.3[14:14] - the value is already shifted by 14 bits.
2676    handle = intel_set_message_phase_dw(handle, 0, 3, skip_block_partition_type);
2677
2678    // Set SkipModeEn M1.0[0:0].
2679    handle = intel_set_message_phase_ub(handle, 1, 0, 1);
2680
2681    // Set SkipCenterMask M1.7[31:24] - value is shifted by 24 bits already
2682    const uint SkipCenterMask = intel_get_message_phase_dw(handle, 1, 7) | skip_motion_vector_mask;
2683    handle = intel_set_message_phase_dw(handle, 1, 7, SkipCenterMask);
2684
2685    // Set BiWeight M1.1[21:16]
2686    handle = intel_set_message_phase_ub(handle, 1, 1*4+2, bidirectional_weight);
2687
2688    // Set InterSAD M0.3[21:20]
2689    const uchar InterSAD = intel_get_message_phase_ub(handle, 0, 3*4+2) | (skip_sad_adjustment << 4);
2690    handle = intel_set_message_phase_ub(handle, 0, 3*4+2, InterSAD);
2691
2692    // Set SkipCenter Delta XY (M4.0)
2693    handle = intel_simd_set_message_phase_uq(handle, 4, 1, 0, 4, motion_vectors);
2694
2695    // Set IntraComputeType M5.1[9:8].
2696    handle = intel_set_message_phase_ub(handle, 5, 1*4+1, 0x1);
2697
2698    intel_sub_group_avc_sic_payload_t result = __builtin_IB_vme_helper_get_as_avc_sic_payload_t( handle );
2699    return result;
2700}
2701
2702INLINE intel_sub_group_avc_sic_payload_t OVERLOADABLE
2703intel_sub_group_avc_sic_configure_skc(
2704    uint skip_block_partition_type,
2705    uint skip_motion_vector_mask,
2706    ulong motion_vectors,
2707    uchar bidirectional_weight,
2708    uchar skip_sad_adjustment,
2709    intel_sub_group_avc_sic_payload_t payload )
2710{
2711    return __builtin_spirv_intel_sub_group_avc_sic_configure_skc_i32_i32_i64_i8_i8_intel_sub_group_avc_sic_payload_t(skip_block_partition_type, skip_motion_vector_mask, motion_vectors, bidirectional_weight, skip_sad_adjustment, payload);
2712}
2713
2714/*****************************************************************************\
2715
2716Description:
2717    Configure SIC payload :
2718        - set IntraPartMask (M1.7 :4:0)
2719        - set MbIntraStruct (M1.7 :15:8)
2720        - set LeftEdgeLuma A [-1, 15] to [-1, 0] (M6.0-M6.3)
2721        - set LeftTopCornerLuma D (M5.1 :31:24)
2722        - set UpperEdgeLuma B [15, -1] to [0, -1] (M5.5-M5.2)
2723        - set UpperRightEdgeLuma C [23, -1] to [16, -1] (M5.7-M5.6)
2724        - set LeftEdgeChroma A [-1, 7] to [-1, 0] (M8.0-M3.3]
2725        - set LeftTopCornerChroma D (M7.5 :15:0)
2726        - set UpperEdgeChroma B [7, -1] to [0, -1] (M8.4-M8.7)
2727        - set IntraSAD (M0.3 :23:22)
2728
2729\*****************************************************************************/
2730intel_sub_group_avc_sic_payload_t __builtin_spirv_intel_sub_group_avc_sic_configure_ipe_i8_i8_i8_i8_i8_i8_i8_intel_sub_group_avc_sic_payload_t(
2731    uchar luma_intra_partition_mask,
2732    uchar intra_neighbour_availabilty,
2733    uchar left_edge_pixels,
2734    uchar left_upper_edge_pixel,
2735    uchar upper_edge_pixels,
2736    uchar upper_right_edge_pixels,
2737    uchar intra_sad_adjustment,
2738    intel_sub_group_avc_sic_payload_t sic_payload )
2739{
2740    uint4 payload = __builtin_IB_vme_helper_get_handle_avc_sic_payload_t(sic_payload);
2741
2742    // Set IntraPartMask (M1.7 :4:0). (M1.7 :7:4 are set to zero too)
2743    payload = intel_set_message_phase_ub(payload, 1, 7*4, luma_intra_partition_mask);
2744
2745    // Set MbIntraStruct (M1.7 :15:8).
2746    payload = intel_set_message_phase_ub(payload, 1, 7*4+1, intra_neighbour_availabilty);
2747
2748    // Set LeftEdge A [-1, 15] to [-1, 0] at M6.0-M6.3
2749    payload = intel_simd_set_message_phase_ub(payload, 6, 1, 0, 16, left_edge_pixels);
2750
2751    // Set LeftTopCorner D (M5.1 :31:24) - (M5.1 :23:16 MBZ)
2752    payload = intel_set_message_phase_ub(payload, 5, 1*4+3, left_upper_edge_pixel);
2753
2754    // Set UpperEdge B [15, -1] to [0, -1] (M5.5 - M5.2)
2755    payload = intel_simd_set_message_phase_ub(payload, 5, 1, 2*4, 16, upper_edge_pixels);
2756
2757    // Set UpperRightEdge C [23, -1] to [16, -1] (M5.7-M5.6).
2758    payload = intel_simd_set_message_phase_ub(payload, 5, 1, 6*4, 8, upper_right_edge_pixels);
2759
2760    // Set IntraSAD M0.3[23:22]
2761    const uint IntraSAD = intel_get_message_phase_dw(payload, 0, 3) | (intra_sad_adjustment << 22);
2762    payload = intel_set_message_phase_dw(payload, 0, 3, IntraSAD);
2763
2764    // Set IntraComputeType M5.1[9:8] to 01 for Luma only.
2765    payload = intel_set_message_phase_ub(payload, 5, 1*4+1, 1);
2766
2767    intel_sub_group_avc_sic_payload_t result = __builtin_IB_vme_helper_get_as_avc_sic_payload_t(payload);
2768    return result;
2769}
2770
2771INLINE intel_sub_group_avc_sic_payload_t OVERLOADABLE
2772intel_sub_group_avc_sic_configure_ipe(
2773    uchar luma_intra_partition_mask,
2774    uchar intra_neighbour_availabilty,
2775    uchar left_edge_pixels,
2776    uchar left_upper_edge_pixel,
2777    uchar upper_edge_pixels,
2778    uchar upper_right_edge_pixels,
2779    uchar intra_sad_adjustment,
2780    intel_sub_group_avc_sic_payload_t sic_payload)
2781{
2782    return __builtin_spirv_intel_sub_group_avc_sic_configure_ipe_i8_i8_i8_i8_i8_i8_i8_intel_sub_group_avc_sic_payload_t(luma_intra_partition_mask, intra_neighbour_availabilty, left_edge_pixels, left_upper_edge_pixel, upper_edge_pixels, upper_right_edge_pixels, intra_sad_adjustment, sic_payload);
2783}
2784
2785/*****************************************************************************\
2786
2787Description:
2788    Configure SIC payload :
2789        - set IntraPartMask (M1.7 :4:0)
2790        - set MbIntraStruct (M1.7 :15:8)
2791        - set LeftEdgeLuma A [-1, 15] to [-1, 0] (M6.0-M6.3)
2792        - set LeftTopCornerLuma D (M5.1 :31:24)
2793        - set UpperEdgeLuma B [15, -1] to [0, -1] (M5.5-M5.2)
2794        - set UpperRightEdgeLuma C [23, -1] to [16, -1] (M5.7-M5.6)
2795        - set LeftEdgeChroma A [-1, 7] to [-1, 0] (M8.0-M3.3]
2796        - set LeftTopCornerChroma D (M7.5 :15:0)
2797        - set UpperEdgeChroma B [7, -1] to [0, -1] (M8.4-M8.7)
2798        - set IntraSAD (M0.3 :23:22)
2799
2800\*****************************************************************************/
2801intel_sub_group_avc_sic_payload_t __builtin_spirv_intel_sub_group_avc_sic_configure_ipe_i8_i8_i8_i8_i8_i8_i16_i16_i16_i8_intel_sub_group_avc_sic_payload_t(
2802    uchar luma_intra_partition_mask,
2803    uchar intra_neighbour_availabilty,
2804    uchar left_edge_luma_pixels,
2805    uchar left_upper_edge_luma_pixel,
2806    uchar upper_edge_luma_pixels,
2807    uchar upper_right_edge_luma_pixels,
2808    ushort left_edge_chroma_pixels,
2809    ushort upper_left_corner_chroma_pixel,
2810    ushort upper_edge_chroma_pixels,
2811    uchar intra_sad_adjustment,
2812    intel_sub_group_avc_sic_payload_t sic_payload )
2813{
2814    uint4 payload = __builtin_IB_vme_helper_get_handle_avc_sic_payload_t(sic_payload);
2815
2816    // Set IntraPartMask (M1.7 [4:0]). (M1.7 [7:4] are set to zero too)
2817    payload = intel_set_message_phase_ub(payload, 1, 7*4, luma_intra_partition_mask);
2818
2819    // Set MbIntraStruct (M1.7 [15:8]).
2820    payload = intel_set_message_phase_ub(payload, 1, 7*4+1, intra_neighbour_availabilty);
2821
2822    // Set LeftEdge A [-1, 15] to [-1, 0] at M6.0-M6.3
2823    payload = intel_simd_set_message_phase_ub(payload, 6, 1, 0, 16, left_edge_luma_pixels);
2824
2825    // Set LeftTopCorner D (M5.1 :31:24) - (M5.1 :23:16 MBZ)
2826    payload = intel_set_message_phase_ub(payload, 5, 1*4+3, left_upper_edge_luma_pixel);
2827
2828    // Set UpperEdge B [15, -1] to [0, -1] (M5.5 - M5.2)
2829    payload = intel_simd_set_message_phase_ub(payload, 5, 1, 2*4, 16, upper_edge_luma_pixels);
2830
2831    // Set UpperRightEdge C [23, -1] to [16, -1] (M5.7-M5.6).
2832    payload = intel_simd_set_message_phase_ub(payload, 5, 1, 6*4, 8, upper_right_edge_luma_pixels);
2833
2834    // Set LeftEdgeChroma A [-1, 7] to [-1, 0] (M7.0-M7.3)
2835    payload = intel_simd_set_message_phase_uw(payload, 7, 1, 0, 8, left_edge_chroma_pixels);
2836
2837    // Set LeftTopCornerChroma D M6.5 [15:0]
2838    payload = intel_set_message_phase_uw(payload, 6, 5*2, upper_left_corner_chroma_pixel);
2839
2840    // Set UpperEdgeChroma B [7, -1] to [0, -1] (M7.7 - M7.4)
2841    payload = intel_simd_set_message_phase_uw(payload, 7, 1, 4*2, 8, upper_edge_chroma_pixels);
2842
2843    // Set IntraSAD M0.3[23:22]
2844    const uint IntraSAD = intel_get_message_phase_dw(payload, 0, 3) | (intra_sad_adjustment << 22);
2845    payload = intel_set_message_phase_dw(payload, 0, 3, IntraSAD);
2846
2847    // Set IntraComputeType M5.1[9:8] to 0 for Luma + Chroma enabled
2848    payload = intel_set_message_phase_uw(payload, 5, 1*2, 0);
2849
2850    intel_sub_group_avc_sic_payload_t result = __builtin_IB_vme_helper_get_as_avc_sic_payload_t( payload );
2851    return result;
2852}
2853
2854INLINE intel_sub_group_avc_sic_payload_t OVERLOADABLE
2855intel_sub_group_avc_sic_configure_ipe(
2856    uchar  luma_intra_partition_mask,
2857    uchar  intra_neighbour_availabilty,
2858    uchar  left_edge_luma_pixels,
2859    uchar  left_upper_edge_luma_pixel,
2860    uchar  upper_edge_luma_pixels,
2861    uchar  upper_right_edge_luma_pixels,
2862    ushort left_edge_chroma_pixels,
2863    ushort upper_left_corner_chroma_pixel,
2864    ushort upper_edge_chroma_pixels,
2865    uchar intra_sad_adjustment,
2866    intel_sub_group_avc_sic_payload_t sic_payload )
2867{
2868    return __builtin_spirv_intel_sub_group_avc_sic_configure_ipe_i8_i8_i8_i8_i8_i8_i16_i16_i16_i8_intel_sub_group_avc_sic_payload_t(luma_intra_partition_mask, intra_neighbour_availabilty, left_edge_luma_pixels, left_upper_edge_luma_pixel, upper_edge_luma_pixels, upper_right_edge_luma_pixels, left_edge_chroma_pixels, upper_left_corner_chroma_pixel, upper_edge_chroma_pixels, intra_sad_adjustment, sic_payload);
2869}
2870
2871intel_sub_group_avc_mce_payload_t __builtin_spirv_intel_sub_group_avc_sic_convert_to_mce_payload_intel_sub_group_avc_sic_payload_t(
2872    intel_sub_group_avc_sic_payload_t payload )
2873{
2874    uint4 handle = __builtin_IB_vme_helper_get_handle_avc_sic_payload_t(payload);
2875    return __builtin_IB_vme_helper_get_as_avc_mce_payload_t(handle);
2876}
2877
2878INLINE intel_sub_group_avc_mce_payload_t OVERLOADABLE
2879intel_sub_group_avc_sic_convert_to_mce_payload(
2880      intel_sub_group_avc_sic_payload_t payload )
2881{
2882    return __builtin_spirv_intel_sub_group_avc_sic_convert_to_mce_payload_intel_sub_group_avc_sic_payload_t(payload);
2883}
2884
2885intel_sub_group_avc_sic_payload_t __builtin_spirv_intel_sub_group_avc_mce_convert_to_sic_payload_intel_sub_group_avc_mce_payload_t(
2886    intel_sub_group_avc_mce_payload_t payload )
2887{
2888    uint4 handle = __builtin_IB_vme_helper_get_handle_avc_mce_payload_t(payload);
2889    return __builtin_IB_vme_helper_get_as_avc_sic_payload_t(handle);
2890}
2891
2892INLINE intel_sub_group_avc_sic_payload_t OVERLOADABLE
2893intel_sub_group_avc_mce_convert_to_sic_payload(
2894      intel_sub_group_avc_mce_payload_t payload )
2895{
2896    return __builtin_spirv_intel_sub_group_avc_mce_convert_to_sic_payload_intel_sub_group_avc_mce_payload_t(payload);
2897}
2898
2899/*****************************************************************************\
2900
2901Description:
2902    - set BilinearEnable M1.7[18:18]
2903
2904\*****************************************************************************/
2905intel_sub_group_avc_sic_payload_t __builtin_spirv_intel_sub_group_avc_sic_set_skc_bilinear_filter_enable_intel_sub_group_avc_sic_payload_t(
2906    intel_sub_group_avc_sic_payload_t payload )
2907{
2908    uint4 handle = __builtin_IB_vme_helper_get_handle_avc_sic_payload_t(payload);
2909    const uint BilinearEnable = intel_get_message_phase_dw(handle, 1, 7) | (1<<18);
2910    handle = intel_set_message_phase_dw(handle, 1, 7, BilinearEnable);
2911
2912    intel_sub_group_avc_sic_payload_t result = __builtin_IB_vme_helper_get_as_avc_sic_payload_t(handle);
2913    return result;
2914}
2915
2916INLINE intel_sub_group_avc_sic_payload_t OVERLOADABLE
2917intel_sub_group_avc_sic_set_skc_bilinear_filter_enable(
2918      intel_sub_group_avc_sic_payload_t payload )
2919{
2920    return __builtin_spirv_intel_sub_group_avc_sic_set_skc_bilinear_filter_enable_intel_sub_group_avc_sic_payload_t(payload);
2921}
2922
2923/*****************************************************************************\
2924Description:
2925    - set FTEnable: M0.3[17:17]
2926    - set SIC Forward Transform Coeff Threshold Matrix[0] (M2.6, M2.7)
2927
2928\*****************************************************************************/
2929intel_sub_group_avc_sic_payload_t __builtin_spirv_intel_sub_group_avc_sic_set_skc_forward_transform_enable_i64_intel_sub_group_avc_sic_payload_t(
2930    ulong packed_sad_coefficients,
2931    intel_sub_group_avc_sic_payload_t payload )
2932{
2933    uint4 handle = __builtin_IB_vme_helper_get_handle_avc_sic_payload_t(payload);
2934    // Set FTEnable: M0.3[17:17]
2935    const uint FTEnable = intel_get_message_phase_dw(handle, 0, 3) | (1<<17);
2936    handle = intel_set_message_phase_dw(handle, 0, 3, FTEnable);
2937
2938    // Set SIC Forward Transform Coeff Threshold Matrix (M2.6, M2.7)
2939    handle = intel_set_message_phase_dw(handle, 2, 6, (uint)packed_sad_coefficients);
2940    handle = intel_set_message_phase_dw(handle, 2, 7, (uint)(packed_sad_coefficients >> 32));
2941
2942    intel_sub_group_avc_sic_payload_t result = __builtin_IB_vme_helper_get_as_avc_sic_payload_t(handle);
2943    return result;
2944}
2945
2946INLINE intel_sub_group_avc_sic_payload_t OVERLOADABLE
2947intel_sub_group_avc_sic_set_skc_forward_transform_enable(
2948      ulong  packed_sad_coefficients,
2949      intel_sub_group_avc_sic_payload_t payload )
2950{
2951    return __builtin_spirv_intel_sub_group_avc_sic_set_skc_forward_transform_enable_i64_intel_sub_group_avc_sic_payload_t(packed_sad_coefficients, payload);
2952}
2953
2954/*****************************************************************************\
2955Description:
2956    - set BlockBasedSkipEnabled (M0.3 :19)
2957    - set T8x8FlagForInterEn (M1.0 : 7)
2958
2959\*****************************************************************************/
2960intel_sub_group_avc_sic_payload_t __builtin_spirv_intel_sub_group_avc_sic_set_block_based_raw_skip_sad_i8_intel_sub_group_avc_sic_payload_t(
2961    uchar block_based_skip_block_type,
2962    intel_sub_group_avc_sic_payload_t payload )
2963{
2964    uint4 handle = __builtin_IB_vme_helper_get_handle_avc_sic_payload_t(payload);
2965    uint dw = intel_get_message_phase_dw(handle, 0, 3) | (0x1 << 19);
2966    handle = intel_set_message_phase_dw(handle, 0, 3, dw);
2967
2968    uchar flag = intel_get_message_phase_ub(handle, 1, 0*4);
2969    flag &= ~0x80;
2970    flag |= block_based_skip_block_type;
2971    handle = intel_set_message_phase_ub(handle, 1, 0*4, flag);
2972
2973    return __builtin_IB_vme_helper_get_as_avc_sic_payload_t(handle);
2974}
2975
2976INLINE intel_sub_group_avc_sic_payload_t OVERLOADABLE
2977intel_sub_group_avc_sic_set_block_based_raw_skip_sad(
2978      uchar block_based_skip_block_type,
2979      intel_sub_group_avc_sic_payload_t payload )
2980{
2981    return __builtin_spirv_intel_sub_group_avc_sic_set_block_based_raw_skip_sad_i8_intel_sub_group_avc_sic_payload_t(block_based_skip_block_type, payload);
2982}
2983
2984intel_sub_group_avc_sic_result_t __builtin_spirv_intel_sub_group_avc_sic_evaluate_ipe_v3i64_intel_sub_group_avc_sic_payload_t(
2985    VMEImage_t src_image_vme,
2986    intel_sub_group_avc_sic_payload_t payload )
2987{
2988    long src_image = getVMEImage(src_image_vme);
2989    long vme_accelerator = getVMESampler(src_image_vme);
2990
2991    uint4 handle = __builtin_IB_vme_helper_get_handle_avc_sic_payload_t(payload);
2992    uint4 res = __builtin_IB_vme_send_sic_new(handle, src_image, src_image, src_image, vme_accelerator);
2993
2994    intel_sub_group_avc_sic_result_t result = __builtin_IB_vme_helper_get_as_avc_sic_result_t(res);
2995    return result;
2996}
2997
2998INLINE intel_sub_group_avc_sic_result_t OVERLOADABLE
2999intel_sub_group_avc_sic_evaluate_ipe(
3000      read_only image2d_t src_image,
3001      sampler_t vme_accelerator,
3002      intel_sub_group_avc_sic_payload_t payload )
3003{
3004    long src_image_id = (long)__builtin_astype(src_image, void*);
3005    long vme_accelerator_id = (long)__builtin_astype(vme_accelerator, void*);
3006    VMEImage_t src_image_vme = __builtin_spirv_OpVmeImageINTEL_i64_i64_i64(src_image_id, 0, vme_accelerator_id);
3007
3008    return __builtin_spirv_intel_sub_group_avc_sic_evaluate_ipe_v3i64_intel_sub_group_avc_sic_payload_t(src_image_vme, payload);
3009}
3010
3011intel_sub_group_avc_sic_result_t __builtin_spirv_intel_sub_group_avc_sic_evaluate_with_single_reference_v3i64_v3i64_intel_sub_group_avc_sic_payload_t(
3012    VMEImage_t src_image_vme,
3013    VMEImage_t ref_image_vme,
3014    intel_sub_group_avc_sic_payload_t payload )
3015{
3016    long src_image = getVMEImage(src_image_vme);
3017    long ref_image = getVMEImage(ref_image_vme);
3018    long vme_accelerator = getVMESampler(src_image_vme);
3019
3020    uint4 handle = __builtin_IB_vme_helper_get_handle_avc_sic_payload_t(payload);
3021    handle = intel_sub_group_payload_set_single_ref_id(src_image, ref_image, handle);
3022    uint4 res = __builtin_IB_vme_send_sic_new(handle, src_image, ref_image, ref_image, vme_accelerator);
3023
3024    intel_sub_group_avc_sic_result_t result = __builtin_IB_vme_helper_get_as_avc_sic_result_t(res);
3025    return result;
3026}
3027
3028INLINE intel_sub_group_avc_sic_result_t OVERLOADABLE
3029intel_sub_group_avc_sic_evaluate_with_single_reference(
3030      read_only image2d_t src_image,
3031      read_only image2d_t ref_image,
3032      sampler_t vme_accelerator,
3033      intel_sub_group_avc_sic_payload_t payload )
3034{
3035    long src_image_id = (long)__builtin_astype(src_image, void*);
3036    long ref_image_id = (long)__builtin_astype(ref_image, void*);
3037    long vme_accelerator_id = (long)__builtin_astype(vme_accelerator, void*);
3038    VMEImage_t src_image_vme = __builtin_spirv_OpVmeImageINTEL_i64_i64_i64(src_image_id, 0, vme_accelerator_id);
3039    VMEImage_t ref_image_vme = __builtin_spirv_OpVmeImageINTEL_i64_i64_i64(ref_image_id, 0, vme_accelerator_id);
3040
3041    return __builtin_spirv_intel_sub_group_avc_sic_evaluate_with_single_reference_v3i64_v3i64_intel_sub_group_avc_sic_payload_t(src_image_vme, ref_image_vme, payload);
3042}
3043
3044intel_sub_group_avc_sic_result_t __builtin_spirv_intel_sub_group_avc_sic_evaluate_with_dual_reference_v3i64_v3i64_v3i64_intel_sub_group_avc_sic_payload_t(
3045    VMEImage_t src_image_vme,
3046    VMEImage_t fwd_ref_image_vme,
3047    VMEImage_t bwd_ref_image_vme,
3048    intel_sub_group_avc_sic_payload_t payload )
3049{
3050    long src_image = getVMEImage(src_image_vme);
3051    long fwd_ref_image = getVMEImage(fwd_ref_image_vme);
3052    long bwd_ref_image = getVMEImage(bwd_ref_image_vme);
3053    long vme_accelerator = getVMESampler(src_image_vme);
3054
3055    uint4 handle = __builtin_IB_vme_helper_get_handle_avc_sic_payload_t(payload);
3056    handle = intel_sub_group_payload_set_dual_ref_id(src_image, fwd_ref_image, bwd_ref_image, handle);
3057    uint4 res = __builtin_IB_vme_send_sic_new(handle, src_image, fwd_ref_image, bwd_ref_image, vme_accelerator);
3058
3059    intel_sub_group_avc_sic_result_t result = __builtin_IB_vme_helper_get_as_avc_sic_result_t(res);
3060    return result;
3061}
3062
3063INLINE intel_sub_group_avc_sic_result_t OVERLOADABLE
3064intel_sub_group_avc_sic_evaluate_with_dual_reference(
3065      read_only image2d_t src_image,
3066      read_only image2d_t fwd_ref_image,
3067      read_only image2d_t bwd_ref_image,
3068      sampler_t vme_accelerator,
3069      intel_sub_group_avc_sic_payload_t payload )
3070{
3071    long src_image_id = (long)__builtin_astype(src_image, void*);
3072    long fwd_ref_image_id = (long)__builtin_astype(fwd_ref_image, void*);
3073    long bwd_ref_image_id = (long)__builtin_astype(bwd_ref_image, void*);
3074    long vme_accelerator_id = (long)__builtin_astype(vme_accelerator, void*);
3075    VMEImage_t src_image_vme = __builtin_spirv_OpVmeImageINTEL_i64_i64_i64(src_image_id, 0, vme_accelerator_id);
3076    VMEImage_t fwd_ref_image_vme = __builtin_spirv_OpVmeImageINTEL_i64_i64_i64(fwd_ref_image_id, 0, vme_accelerator_id);
3077    VMEImage_t bwd_ref_image_vme = __builtin_spirv_OpVmeImageINTEL_i64_i64_i64(bwd_ref_image_id, 0, vme_accelerator_id);
3078
3079    return __builtin_spirv_intel_sub_group_avc_sic_evaluate_with_dual_reference_v3i64_v3i64_v3i64_intel_sub_group_avc_sic_payload_t(src_image_vme, fwd_ref_image_vme, bwd_ref_image_vme, payload);
3080}
3081
3082intel_sub_group_avc_mce_result_t __builtin_spirv_intel_sub_group_avc_sic_convert_to_mce_result_intel_sub_group_avc_sic_result_t(
3083    intel_sub_group_avc_sic_result_t result )
3084{
3085    uint4 handle = __builtin_IB_vme_helper_get_handle_avc_sic_result_t(result);
3086    return __builtin_IB_vme_helper_get_as_avc_mce_result_t(handle);
3087}
3088
3089INLINE intel_sub_group_avc_mce_result_t OVERLOADABLE
3090intel_sub_group_avc_sic_convert_to_mce_result(
3091      intel_sub_group_avc_sic_result_t result )
3092{
3093    return __builtin_spirv_intel_sub_group_avc_sic_convert_to_mce_result_intel_sub_group_avc_sic_result_t(result);
3094}
3095
3096intel_sub_group_avc_sic_result_t __builtin_spirv_intel_sub_group_avc_mce_convert_to_sic_result_intel_sub_group_avc_mce_result_t(
3097    intel_sub_group_avc_mce_result_t result )
3098{
3099    uint4 handle = __builtin_IB_vme_helper_get_handle_avc_mce_result_t(result);
3100    return __builtin_IB_vme_helper_get_as_avc_sic_result_t(handle);
3101}
3102
3103INLINE intel_sub_group_avc_sic_result_t OVERLOADABLE
3104intel_sub_group_avc_mce_convert_to_sic_result(
3105      intel_sub_group_avc_mce_result_t result )
3106{
3107    return __builtin_spirv_intel_sub_group_avc_mce_convert_to_sic_result_intel_sub_group_avc_mce_result_t(result);
3108}
3109
3110/*****************************************************************************\
3111
3112Description:
3113    Get the intra luma shape W0.0[5:4] from SIC result payload.
3114
3115\*****************************************************************************/
3116uchar __builtin_spirv_intel_sub_group_avc_sic_get_ipe_luma_shape_intel_sub_group_avc_sic_result_t(
3117    intel_sub_group_avc_sic_result_t result )
3118{
3119    uint4 handle = __builtin_IB_vme_helper_get_handle_avc_sic_result_t(result);
3120    const uchar IntraMbMode = intel_get_message_phase_ub(handle, 0, 0);
3121    return (IntraMbMode >> 4) & 0x3;
3122}
3123
3124INLINE uchar OVERLOADABLE
3125intel_sub_group_avc_sic_get_ipe_luma_shape(
3126      intel_sub_group_avc_sic_result_t result )
3127{
3128    return __builtin_spirv_intel_sub_group_avc_sic_get_ipe_luma_shape_intel_sub_group_avc_sic_result_t(result);
3129}
3130
3131/*****************************************************************************\
3132
3133Description:
3134    Get the intra luma distortion W0.3[15:0] from SIC result payload.
3135
3136\*****************************************************************************/
3137ushort __builtin_spirv_intel_sub_group_avc_sic_get_best_ipe_luma_distortion_intel_sub_group_avc_sic_result_t(
3138    intel_sub_group_avc_sic_result_t result )
3139{
3140    uint4 handle = __builtin_IB_vme_helper_get_handle_avc_sic_result_t(result);
3141    return intel_get_message_phase_uw(handle, 0, 3*2);
3142}
3143
3144INLINE ushort OVERLOADABLE
3145intel_sub_group_avc_sic_get_best_ipe_luma_distortion(
3146      intel_sub_group_avc_sic_result_t result )
3147{
3148    return __builtin_spirv_intel_sub_group_avc_sic_get_best_ipe_luma_distortion_intel_sub_group_avc_sic_result_t(result);
3149}
3150
3151/*****************************************************************************\
3152
3153Description:
3154    Get the intra chroma distortion (W0.3 :31:16) from SIC result payload.
3155
3156\*****************************************************************************/
3157ushort __builtin_spirv_intel_sub_group_avc_sic_get_best_ipe_chroma_distortion_intel_sub_group_avc_sic_result_t(
3158    intel_sub_group_avc_sic_result_t result )
3159{
3160    uint4 handle = __builtin_IB_vme_helper_get_handle_avc_sic_result_t(result);
3161    return intel_get_message_phase_uw(handle, 0, 3*2+1);
3162}
3163
3164INLINE ushort OVERLOADABLE
3165intel_sub_group_avc_sic_get_best_ipe_chroma_distortion(
3166      intel_sub_group_avc_sic_result_t result )
3167{
3168    return __builtin_spirv_intel_sub_group_avc_sic_get_best_ipe_chroma_distortion_intel_sub_group_avc_sic_result_t(result);
3169}
3170
3171/*****************************************************************************\
3172
3173Description:
3174    Get the intra packed luma mode (W0.4-W0.5) from SIC result payload.
3175
3176\*****************************************************************************/
3177ulong __builtin_spirv_intel_sub_group_avc_sic_get_packed_ipe_luma_modes_intel_sub_group_avc_sic_result_t(
3178    intel_sub_group_avc_sic_result_t result )
3179{
3180    uint4 handle = __builtin_IB_vme_helper_get_handle_avc_sic_result_t(result);
3181    return intel_get_message_phase_uq(handle, 0, 4/2);
3182}
3183
3184INLINE ulong OVERLOADABLE
3185intel_sub_group_avc_sic_get_packed_ipe_luma_modes(
3186      intel_sub_group_avc_sic_result_t result )
3187{
3188    return __builtin_spirv_intel_sub_group_avc_sic_get_packed_ipe_luma_modes_intel_sub_group_avc_sic_result_t(result);
3189}
3190
3191/*****************************************************************************\
3192
3193Description:
3194    Get the intra luma mode W0.6[1:0] from SIC result payload.
3195
3196\*****************************************************************************/
3197uchar __builtin_spirv_intel_sub_group_avc_sic_get_ipe_chroma_mode_intel_sub_group_avc_sic_result_t(
3198    intel_sub_group_avc_sic_result_t result )
3199{
3200    uint4 handle = __builtin_IB_vme_helper_get_handle_avc_sic_result_t(result);
3201    const uchar MbIntraStruct = intel_get_message_phase_ub(handle, 0, 6*4);
3202    return (MbIntraStruct & 0x3);
3203}
3204
3205INLINE uchar OVERLOADABLE
3206intel_sub_group_avc_sic_get_ipe_chroma_mode(
3207      intel_sub_group_avc_sic_result_t result )
3208{
3209    return __builtin_spirv_intel_sub_group_avc_sic_get_ipe_chroma_mode_intel_sub_group_avc_sic_result_t(result);
3210}
3211
3212/*****************************************************************************\
3213
3214Description:
3215    Get the skc luma nzc W6.1 from SIC result payload.
3216
3217\*****************************************************************************/
3218uint __builtin_spirv_intel_sub_group_avc_sic_get_packed_skc_luma_count_threshold_intel_sub_group_avc_sic_result_t(
3219    intel_sub_group_avc_sic_result_t result )
3220{
3221    uint4 handle = __builtin_IB_vme_helper_get_handle_avc_sic_result_t(result);
3222    return intel_get_message_phase_dw(handle, 6, 1);
3223}
3224
3225INLINE uint OVERLOADABLE
3226intel_sub_group_avc_sic_get_packed_skc_luma_count_threshold(
3227      intel_sub_group_avc_sic_result_t result )
3228{
3229    return __builtin_spirv_intel_sub_group_avc_sic_get_packed_skc_luma_count_threshold_intel_sub_group_avc_sic_result_t(result);
3230}
3231
3232/*****************************************************************************\
3233
3234Description:
3235    Get the skc luma coeff mag clip sum (W6.2-W6.3) from SIC result payload.
3236
3237\*****************************************************************************/
3238ulong __builtin_spirv_intel_sub_group_avc_sic_get_packed_skc_luma_sum_threshold_intel_sub_group_avc_sic_result_t(
3239    intel_sub_group_avc_sic_result_t result )
3240{
3241    uint4 handle = __builtin_IB_vme_helper_get_handle_avc_sic_result_t(result);
3242    return intel_get_message_phase_uq(handle, 6, 2/2);
3243}
3244
3245INLINE ulong OVERLOADABLE
3246intel_sub_group_avc_sic_get_packed_skc_luma_sum_threshold(
3247      intel_sub_group_avc_sic_result_t result )
3248{
3249    return __builtin_spirv_intel_sub_group_avc_sic_get_packed_skc_luma_sum_threshold_intel_sub_group_avc_sic_result_t(result);
3250}
3251
3252/*****************************************************************************\
3253
3254Description:
3255    Get the raw skip distortion (W0.2 :31:16) from SIC result payload.
3256
3257\*****************************************************************************/
3258ushort __builtin_spirv_intel_sub_group_avc_sic_get_inter_raw_sads_intel_sub_group_avc_sic_result_t(
3259    intel_sub_group_avc_sic_result_t result )
3260{
3261    uint4 handle = __builtin_IB_vme_helper_get_handle_avc_sic_result_t(result);
3262    return intel_get_message_phase_uw(handle, 0, 2*2+1);
3263}
3264
3265INLINE ushort OVERLOADABLE
3266intel_sub_group_avc_sic_get_inter_raw_sads(
3267      intel_sub_group_avc_sic_result_t result )
3268{
3269    return __builtin_spirv_intel_sub_group_avc_sic_get_inter_raw_sads_intel_sub_group_avc_sic_result_t(result);
3270}
3271
3272// ... Common function wrappers ...
3273
3274intel_sub_group_avc_ime_payload_t __builtin_spirv_intel_sub_group_avc_ime_set_motion_vector_cost_function_i64_v2i32_i8_intel_sub_group_avc_ime_payload_t(
3275    ulong packed_cost_center_delta,
3276    uint2 packed_cost_table,
3277    uchar cost_precision,
3278    intel_sub_group_avc_ime_payload_t payload )
3279{
3280      intel_sub_group_avc_mce_payload_t mpayload =
3281          intel_sub_group_avc_ime_convert_to_mce_payload( payload );
3282      mpayload =
3283          intel_sub_group_avc_mce_set_motion_vector_cost_function(
3284              packed_cost_center_delta,
3285              packed_cost_table,
3286              cost_precision,
3287              mpayload );
3288      return intel_sub_group_avc_mce_convert_to_ime_payload( mpayload );
3289}
3290
3291INLINE intel_sub_group_avc_ime_payload_t OVERLOADABLE
3292intel_sub_group_avc_ime_set_motion_vector_cost_function(
3293      ulong packed_cost_center_delta,
3294      uint2 packed_cost_table,
3295      uchar cost_precision,
3296      intel_sub_group_avc_ime_payload_t payload )
3297{
3298    return __builtin_spirv_intel_sub_group_avc_ime_set_motion_vector_cost_function_i64_v2i32_i8_intel_sub_group_avc_ime_payload_t(packed_cost_center_delta, packed_cost_table, cost_precision, payload);
3299}
3300
3301intel_sub_group_avc_ref_payload_t __builtin_spirv_intel_sub_group_avc_ref_set_motion_vector_cost_function_i64_v2i32_i8_intel_sub_group_avc_ref_payload_t(
3302    ulong packed_cost_center_delta,
3303    uint2 packed_cost_table,
3304    uchar cost_precision,
3305    intel_sub_group_avc_ref_payload_t payload )
3306{
3307      intel_sub_group_avc_mce_payload_t mpayload =
3308          intel_sub_group_avc_ref_convert_to_mce_payload( payload );
3309      mpayload =
3310          intel_sub_group_avc_mce_set_motion_vector_cost_function(
3311              packed_cost_center_delta,
3312              packed_cost_table,
3313              cost_precision,
3314              mpayload );
3315      return intel_sub_group_avc_mce_convert_to_ref_payload( mpayload );
3316}
3317
3318INLINE intel_sub_group_avc_ref_payload_t OVERLOADABLE
3319intel_sub_group_avc_ref_set_motion_vector_cost_function(
3320      ulong packed_cost_center_delta,
3321      uint2 packed_cost_table,
3322      uchar cost_precision,
3323      intel_sub_group_avc_ref_payload_t payload )
3324{
3325    return __builtin_spirv_intel_sub_group_avc_ref_set_motion_vector_cost_function_i64_v2i32_i8_intel_sub_group_avc_ref_payload_t(packed_cost_center_delta, packed_cost_table, cost_precision, payload);
3326}
3327
3328intel_sub_group_avc_sic_payload_t __builtin_spirv_intel_sub_group_avc_sic_set_motion_vector_cost_function_i64_v2i32_i8_intel_sub_group_avc_sic_payload_t(
3329    ulong packed_cost_center_delta,
3330    uint2 packed_cost_table,
3331    uchar cost_precision,
3332    intel_sub_group_avc_sic_payload_t payload )
3333{
3334      intel_sub_group_avc_mce_payload_t mpayload =
3335          intel_sub_group_avc_sic_convert_to_mce_payload( payload );
3336      mpayload =
3337          intel_sub_group_avc_mce_set_motion_vector_cost_function(
3338              packed_cost_center_delta,
3339              packed_cost_table,
3340              cost_precision,
3341              mpayload );
3342      return intel_sub_group_avc_mce_convert_to_sic_payload( mpayload );
3343}
3344
3345INLINE intel_sub_group_avc_sic_payload_t OVERLOADABLE
3346intel_sub_group_avc_sic_set_motion_vector_cost_function(
3347      ulong packed_cost_center_delta,
3348      uint2 packed_cost_table,
3349      uchar cost_precision,
3350      intel_sub_group_avc_sic_payload_t payload )
3351{
3352    return __builtin_spirv_intel_sub_group_avc_sic_set_motion_vector_cost_function_i64_v2i32_i8_intel_sub_group_avc_sic_payload_t(packed_cost_center_delta, packed_cost_table, cost_precision, payload);
3353}
3354
3355intel_sub_group_avc_ime_payload_t __builtin_spirv_intel_sub_group_avc_ime_set_ac_only_haar_intel_sub_group_avc_ime_payload_t(
3356    intel_sub_group_avc_ime_payload_t payload )
3357{
3358    intel_sub_group_avc_mce_payload_t mpayload =
3359        intel_sub_group_avc_ime_convert_to_mce_payload( payload );
3360    mpayload =
3361      intel_sub_group_avc_mce_set_ac_only_haar( mpayload );
3362    return intel_sub_group_avc_mce_convert_to_ime_payload( mpayload );
3363}
3364
3365INLINE intel_sub_group_avc_ime_payload_t OVERLOADABLE
3366intel_sub_group_avc_ime_set_ac_only_haar(
3367    intel_sub_group_avc_ime_payload_t payload )
3368{
3369    return __builtin_spirv_intel_sub_group_avc_ime_set_ac_only_haar_intel_sub_group_avc_ime_payload_t(payload);
3370}
3371
3372intel_sub_group_avc_ref_payload_t __builtin_spirv_intel_sub_group_avc_ref_set_ac_only_haar_intel_sub_group_avc_ref_payload_t(
3373    intel_sub_group_avc_ref_payload_t payload )
3374{
3375  intel_sub_group_avc_mce_payload_t mpayload =
3376        intel_sub_group_avc_ref_convert_to_mce_payload( payload );
3377    mpayload =
3378      intel_sub_group_avc_mce_set_ac_only_haar( mpayload );
3379    return intel_sub_group_avc_mce_convert_to_ref_payload( mpayload );
3380}
3381
3382INLINE intel_sub_group_avc_ref_payload_t OVERLOADABLE
3383intel_sub_group_avc_ref_set_ac_only_haar(
3384    intel_sub_group_avc_ref_payload_t payload )
3385{
3386    return __builtin_spirv_intel_sub_group_avc_ref_set_ac_only_haar_intel_sub_group_avc_ref_payload_t(payload);
3387}
3388
3389intel_sub_group_avc_sic_payload_t __builtin_spirv_intel_sub_group_avc_sic_set_ac_only_haar_intel_sub_group_avc_sic_payload_t(
3390    intel_sub_group_avc_sic_payload_t payload )
3391{
3392    intel_sub_group_avc_mce_payload_t mpayload =
3393        intel_sub_group_avc_sic_convert_to_mce_payload( payload );
3394    mpayload =
3395      intel_sub_group_avc_mce_set_ac_only_haar( mpayload );
3396    return intel_sub_group_avc_mce_convert_to_sic_payload( mpayload );
3397}
3398
3399INLINE intel_sub_group_avc_sic_payload_t OVERLOADABLE
3400intel_sub_group_avc_sic_set_ac_only_haar(
3401    intel_sub_group_avc_sic_payload_t payload )
3402{
3403    return __builtin_spirv_intel_sub_group_avc_sic_set_ac_only_haar_intel_sub_group_avc_sic_payload_t(payload);
3404}
3405
3406ulong __builtin_spirv_intel_sub_group_avc_ime_get_motion_vectors_intel_sub_group_avc_ime_result_t(
3407    intel_sub_group_avc_ime_result_t result )
3408{
3409    intel_sub_group_avc_mce_result_t mresult =
3410      intel_sub_group_avc_ime_convert_to_mce_result( result );
3411    return intel_sub_group_avc_mce_get_motion_vectors( mresult );
3412}
3413
3414INLINE ulong OVERLOADABLE
3415intel_sub_group_avc_ime_get_motion_vectors(
3416    intel_sub_group_avc_ime_result_t result )
3417{
3418    return __builtin_spirv_intel_sub_group_avc_ime_get_motion_vectors_intel_sub_group_avc_ime_result_t(result);
3419}
3420
3421ulong __builtin_spirv_intel_sub_group_avc_ref_get_motion_vectors_intel_sub_group_avc_ref_result_t(
3422    intel_sub_group_avc_ref_result_t result )
3423{
3424    intel_sub_group_avc_mce_result_t mresult =
3425      intel_sub_group_avc_ref_convert_to_mce_result( result );
3426    return intel_sub_group_avc_mce_get_motion_vectors( mresult );
3427}
3428
3429INLINE ulong OVERLOADABLE
3430intel_sub_group_avc_ref_get_motion_vectors(
3431    intel_sub_group_avc_ref_result_t result )
3432{
3433    return __builtin_spirv_intel_sub_group_avc_ref_get_motion_vectors_intel_sub_group_avc_ref_result_t(result);
3434}
3435
3436ushort __builtin_spirv_intel_sub_group_avc_ime_get_inter_distortions_intel_sub_group_avc_ime_result_t(
3437    intel_sub_group_avc_ime_result_t result )
3438{
3439    intel_sub_group_avc_mce_result_t mresult =
3440      intel_sub_group_avc_ime_convert_to_mce_result( result );
3441    return intel_sub_group_avc_mce_get_inter_distortions( mresult );
3442}
3443
3444INLINE ushort OVERLOADABLE
3445intel_sub_group_avc_ime_get_inter_distortions(
3446    intel_sub_group_avc_ime_result_t result )
3447{
3448    return __builtin_spirv_intel_sub_group_avc_ime_get_inter_distortions_intel_sub_group_avc_ime_result_t(result);
3449}
3450
3451ushort __builtin_spirv_intel_sub_group_avc_ref_get_inter_distortions_intel_sub_group_avc_ref_result_t(
3452    intel_sub_group_avc_ref_result_t result )
3453{
3454    intel_sub_group_avc_mce_result_t mresult =
3455      intel_sub_group_avc_ref_convert_to_mce_result( result );
3456    return intel_sub_group_avc_mce_get_inter_distortions( mresult );
3457}
3458
3459INLINE ushort OVERLOADABLE
3460intel_sub_group_avc_ref_get_inter_distortions(
3461    intel_sub_group_avc_ref_result_t result )
3462{
3463    return __builtin_spirv_intel_sub_group_avc_ref_get_inter_distortions_intel_sub_group_avc_ref_result_t(result);
3464}
3465
3466ushort __builtin_spirv_intel_sub_group_avc_sic_get_inter_distortions_intel_sub_group_avc_sic_result_t(
3467    intel_sub_group_avc_sic_result_t result )
3468{
3469    intel_sub_group_avc_mce_result_t mresult =
3470      intel_sub_group_avc_sic_convert_to_mce_result( result );
3471    return intel_sub_group_avc_mce_get_inter_distortions( mresult );
3472}
3473
3474INLINE ushort OVERLOADABLE
3475intel_sub_group_avc_sic_get_inter_distortions(
3476    intel_sub_group_avc_sic_result_t result )
3477{
3478    return __builtin_spirv_intel_sub_group_avc_sic_get_inter_distortions_intel_sub_group_avc_sic_result_t(result);
3479}
3480
3481ushort __builtin_spirv_intel_sub_group_avc_ime_get_best_inter_distortion_intel_sub_group_avc_ime_result_t(
3482    intel_sub_group_avc_ime_result_t result )
3483{
3484    intel_sub_group_avc_mce_result_t mresult =
3485      intel_sub_group_avc_ime_convert_to_mce_result( result );
3486    return intel_sub_group_avc_mce_get_best_inter_distortion( mresult );
3487}
3488
3489INLINE ushort OVERLOADABLE
3490intel_sub_group_avc_ime_get_best_inter_distortion(
3491    intel_sub_group_avc_ime_result_t result )
3492{
3493    return __builtin_spirv_intel_sub_group_avc_ime_get_best_inter_distortion_intel_sub_group_avc_ime_result_t(result);
3494}
3495
3496ushort __builtin_spirv_intel_sub_group_avc_ref_get_best_inter_distortion_intel_sub_group_avc_ref_result_t(
3497    intel_sub_group_avc_ref_result_t result )
3498{
3499    intel_sub_group_avc_mce_result_t mresult =
3500      intel_sub_group_avc_ref_convert_to_mce_result( result );
3501    return intel_sub_group_avc_mce_get_best_inter_distortion( mresult );
3502}
3503
3504INLINE ushort OVERLOADABLE
3505intel_sub_group_avc_ref_get_best_inter_distortion(
3506    intel_sub_group_avc_ref_result_t result )
3507{
3508    return __builtin_spirv_intel_sub_group_avc_ref_get_best_inter_distortion_intel_sub_group_avc_ref_result_t(result);
3509}
3510
3511uchar __builtin_spirv_intel_sub_group_avc_ime_get_inter_major_shape_intel_sub_group_avc_ime_result_t(
3512    intel_sub_group_avc_ime_result_t result )
3513{
3514    intel_sub_group_avc_mce_result_t mresult =
3515      intel_sub_group_avc_ime_convert_to_mce_result( result );
3516    return intel_sub_group_avc_mce_get_inter_major_shape( mresult );
3517}
3518
3519INLINE uchar OVERLOADABLE
3520intel_sub_group_avc_ime_get_inter_major_shape(
3521    intel_sub_group_avc_ime_result_t  result )
3522{
3523    return __builtin_spirv_intel_sub_group_avc_ime_get_inter_major_shape_intel_sub_group_avc_ime_result_t(result);
3524}
3525
3526uchar __builtin_spirv_intel_sub_group_avc_ref_get_inter_major_shape_intel_sub_group_avc_ref_result_t(
3527    intel_sub_group_avc_ref_result_t result )
3528{
3529    intel_sub_group_avc_mce_result_t mresult =
3530      intel_sub_group_avc_ref_convert_to_mce_result( result );
3531    return intel_sub_group_avc_mce_get_inter_major_shape( mresult );
3532}
3533
3534INLINE uchar OVERLOADABLE
3535intel_sub_group_avc_ref_get_inter_major_shape(
3536    intel_sub_group_avc_ref_result_t  result )
3537{
3538    return __builtin_spirv_intel_sub_group_avc_ref_get_inter_major_shape_intel_sub_group_avc_ref_result_t(result);
3539}
3540
3541uchar __builtin_spirv_intel_sub_group_avc_ime_get_inter_minor_shapes_intel_sub_group_avc_ime_result_t(
3542    intel_sub_group_avc_ime_result_t result )
3543{
3544    intel_sub_group_avc_mce_result_t mresult =
3545      intel_sub_group_avc_ime_convert_to_mce_result( result );
3546    return intel_sub_group_avc_mce_get_inter_minor_shapes( mresult );
3547}
3548
3549INLINE uchar OVERLOADABLE
3550intel_sub_group_avc_ime_get_inter_minor_shapes(
3551    intel_sub_group_avc_ime_result_t result )
3552{
3553    return __builtin_spirv_intel_sub_group_avc_ime_get_inter_minor_shapes_intel_sub_group_avc_ime_result_t(result);
3554}
3555
3556uchar __builtin_spirv_intel_sub_group_avc_ref_get_inter_minor_shapes_intel_sub_group_avc_ref_result_t(
3557    intel_sub_group_avc_ref_result_t result )
3558{
3559    intel_sub_group_avc_mce_result_t mresult =
3560      intel_sub_group_avc_ref_convert_to_mce_result( result );
3561    return intel_sub_group_avc_mce_get_inter_minor_shapes( mresult );
3562}
3563
3564INLINE uchar OVERLOADABLE
3565intel_sub_group_avc_ref_get_inter_minor_shapes(
3566    intel_sub_group_avc_ref_result_t result )
3567{
3568    return __builtin_spirv_intel_sub_group_avc_ref_get_inter_minor_shapes_intel_sub_group_avc_ref_result_t(result);
3569}
3570
3571uchar __builtin_spirv_intel_sub_group_avc_ime_get_inter_directions_intel_sub_group_avc_ime_result_t(
3572    intel_sub_group_avc_ime_result_t result )
3573{
3574    intel_sub_group_avc_mce_result_t mresult =
3575      intel_sub_group_avc_ime_convert_to_mce_result( result );
3576    return intel_sub_group_avc_mce_get_inter_directions( mresult );
3577}
3578
3579INLINE uchar OVERLOADABLE
3580intel_sub_group_avc_ime_get_inter_directions(
3581    intel_sub_group_avc_ime_result_t result )
3582{
3583    return __builtin_spirv_intel_sub_group_avc_ime_get_inter_directions_intel_sub_group_avc_ime_result_t(result);
3584}
3585
3586uchar __builtin_spirv_intel_sub_group_avc_ref_get_inter_directions_intel_sub_group_avc_ref_result_t(
3587    intel_sub_group_avc_ref_result_t result )
3588{
3589    intel_sub_group_avc_mce_result_t mresult =
3590      intel_sub_group_avc_ref_convert_to_mce_result( result );
3591    return intel_sub_group_avc_mce_get_inter_directions( mresult );
3592}
3593
3594INLINE uchar OVERLOADABLE
3595intel_sub_group_avc_ref_get_inter_directions(
3596    intel_sub_group_avc_ref_result_t result )
3597{
3598    return __builtin_spirv_intel_sub_group_avc_ref_get_inter_directions_intel_sub_group_avc_ref_result_t(result);
3599}
3600
3601uchar __builtin_spirv_intel_sub_group_avc_ime_get_inter_motion_vector_count_intel_sub_group_avc_ime_result_t(
3602    intel_sub_group_avc_ime_result_t result )
3603{
3604    intel_sub_group_avc_mce_result_t mresult =
3605      intel_sub_group_avc_ime_convert_to_mce_result( result );
3606    return intel_sub_group_avc_mce_get_inter_motion_vector_count( mresult );
3607}
3608
3609INLINE uchar OVERLOADABLE
3610intel_sub_group_avc_ime_get_inter_motion_vector_count(
3611    intel_sub_group_avc_ime_result_t  result )
3612{
3613    return __builtin_spirv_intel_sub_group_avc_ime_get_inter_motion_vector_count_intel_sub_group_avc_ime_result_t(result);
3614}
3615
3616uchar __builtin_spirv_intel_sub_group_avc_ref_get_inter_motion_vector_count_intel_sub_group_avc_ref_result_t(
3617    intel_sub_group_avc_ref_result_t result )
3618{
3619    intel_sub_group_avc_mce_result_t mresult =
3620      intel_sub_group_avc_ref_convert_to_mce_result( result );
3621    return intel_sub_group_avc_mce_get_inter_motion_vector_count( mresult );
3622}
3623
3624INLINE uchar OVERLOADABLE
3625intel_sub_group_avc_ref_get_inter_motion_vector_count(
3626    intel_sub_group_avc_ref_result_t  result )
3627{
3628    return __builtin_spirv_intel_sub_group_avc_ref_get_inter_motion_vector_count_intel_sub_group_avc_ref_result_t(result);
3629}
3630
3631uchar __builtin_spirv_intel_sub_group_avc_mce_get_default_inter_base_multi_reference_penalty_i8_i8(
3632    uchar slice_type,
3633    uchar qp )
3634{
3635    uchar penalty = 0;
3636    if( slice_type ==  CLK_AVC_ME_SLICE_TYPE_PRED_INTEL ||
3637        slice_type ==  CLK_AVC_ME_SLICE_TYPE_BPRED_INTEL ) {
3638        uchar penalty_table[52] = {
3639            0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
3640            0x04, 0x04, 0x04, 0x08, 0x08, 0x08, 0x08, 0x0c, 0x0c, 0x0c, 0x18, 0x18, 0x18,
3641            0x1a, 0x1c, 0x1c, 0x1e, 0x28, 0x29, 0x2a, 0x2b, 0x2d, 0x2e, 0x38, 0x39, 0x3a,
3642            0x3c, 0x3d, 0x3f, 0x48, 0x49, 0x4a, 0x4b, 0x4d, 0x4e, 0x58, 0x59, 0x5a, 0x5b
3643        };
3644        penalty = penalty_table[qp];
3645    }
3646    else {
3647        penalty = 0;
3648    }
3649
3650    return penalty;
3651}
3652
3653INLINE uchar OVERLOADABLE
3654intel_sub_group_avc_mce_get_default_inter_base_multi_reference_penalty(
3655    uchar slice_type,
3656    uchar qp )
3657{
3658    return __builtin_spirv_intel_sub_group_avc_mce_get_default_inter_base_multi_reference_penalty_i8_i8(slice_type, qp);
3659}
3660
3661/*****************************************************************************\
3662
3663Description:
3664    Set the multi-reference base penalty when HW assisted multi-reference
3665    search is performed.
3666
3667    - set RefIDCost (M2.2 : 23:16)
3668    - set NonSkipZMVAdded (M1.7 :5)
3669
3670\*****************************************************************************/
3671
3672intel_sub_group_avc_mce_payload_t __builtin_spirv_intel_sub_group_avc_mce_set_inter_base_multi_reference_penalty_i8_intel_sub_group_avc_mce_payload_t(
3673    uchar reference_base_penalty,
3674    intel_sub_group_avc_mce_payload_t payload )
3675{
3676    uint4 handle = __builtin_IB_vme_helper_get_handle_avc_mce_payload_t(payload);
3677
3678    // Set RefIDCost (M2.2 : 23:16)
3679    handle = intel_set_message_phase_ub(handle, 2, 2*4+2, reference_base_penalty);
3680
3681    // Set NonSkipZMVAdded (M1.7 : 5)
3682    const uint NonSkipZMvAdded = intel_get_message_phase_dw(handle, 1, 7) | 0x20;
3683    handle = intel_set_message_phase_dw(handle, 1, 7, NonSkipZMvAdded);
3684
3685    intel_sub_group_avc_mce_payload_t result = __builtin_IB_vme_helper_get_as_avc_mce_payload_t(handle);
3686    return result;
3687}
3688
3689INLINE intel_sub_group_avc_mce_payload_t OVERLOADABLE
3690intel_sub_group_avc_mce_set_inter_base_multi_reference_penalty(
3691    uchar reference_base_penalty,
3692    intel_sub_group_avc_mce_payload_t payload )
3693{
3694    return __builtin_spirv_intel_sub_group_avc_mce_set_inter_base_multi_reference_penalty_i8_intel_sub_group_avc_mce_payload_t(reference_base_penalty, payload);
3695}
3696
3697ulong __builtin_spirv_intel_sub_group_avc_mce_get_default_inter_shape_penalty_i8_i8(
3698    uchar slice_type,
3699    uchar qp )
3700{
3701    ulong packed_penalty = 0;
3702
3703    if( slice_type ==  CLK_AVC_ME_SLICE_TYPE_PRED_INTEL ) {
3704        ulong packed_penalty_table[52] = {
3705            0x0506040208, 0x0506040208, 0x0506040208, 0x0506040208, 0x0506040208, 0x0506040208, 0x0506040208, 0x0506040208, 0x0506040208, 0x0506040208, 0x0506040208, 0x0506040208, 0x0506040208,
3706            0x0506040208, 0x0506040208, 0x0506040208, 0x0b0d090519, 0x0b0d090519, 0x0b0d090519, 0x0b0d090519, 0x181a0d071d, 0x181a0d071d, 0x181a0d071d, 0x1b1d190a29, 0x1b1d190a29, 0x1b1d190a29,
3707            0x1e281c0d2b, 0x282a1e0f2d, 0x282a1e0f2d, 0x2a2c28192f, 0x2b2d291b39, 0x2c2f2a1c3a, 0x2e382c1d3b, 0x2f392d1f3c, 0x393b2f293e, 0x3a3c38293f, 0x3b3d392b49, 0x3c3f3a2c4a, 0x3e483c2d4b,
3708            0x484a3d2f4c, 0x494a3e384d, 0x4a4c483a4f, 0x4b4d493b59, 0x4c4f4a3c5a, 0x4e584c3d5b, 0x4f594d3f5c, 0x595b4f485e, 0x5a5c58495f, 0x5b5d594b69, 0x5c5f5a4c6a, 0x5e685c4d6b, 0x68695d4f6c
3709        };
3710        packed_penalty = packed_penalty_table[qp];
3711    }
3712    else if( slice_type ==  CLK_AVC_ME_SLICE_TYPE_BPRED_INTEL ) {
3713        ulong packed_penalty_table[52] = {
3714            0x060a08060c, 0x060a08060c, 0x060a08060c, 0x060a08060c, 0x060a08060c, 0x060a08060c, 0x060a08060c, 0x060a08060c, 0x060a08060c, 0x060a08060c, 0x060a08060c, 0x060a08060c, 0x060a08060c,
3715            0x060a08060c, 0x060a08060c, 0x060a08060c, 0x0c1b190d1c, 0x0c1b190d1c, 0x0c1b190d1c, 0x0c1b190d1c, 0x19281d1a29, 0x19281d1a29, 0x19281d1a29, 0x1c2b291d2c, 0x1c2b291d2c, 0x1c2b291d2c,
3716            0x1f2d2b282f, 0x29382d2a39, 0x29382d2a39, 0x2b392f2b3b, 0x2c3b392d3c, 0x2e3c3a2f3e, 0x2f3d3b383f, 0x383e3c3948, 0x3a493e3b4a, 0x3b493f3b4b, 0x3c4b493d4c, 0x3e4c4a3f4e, 0x3f4d4b484f,
3717            0x494f4c4959, 0x49584d4a59, 0x4b5a4f4c5b, 0x4c5b594d5c, 0x4e5c5a4f5e, 0x4f5d5b585f, 0x585f5c5968, 0x5a685e5a6a, 0x5b695f5c6b, 0x5c6b695d6c, 0x5e6c6a5f6e, 0x5f6d6b686f, 0x696f6c6979
3718        };
3719        packed_penalty = packed_penalty_table[qp];
3720    }
3721    else {
3722        packed_penalty = 0;
3723    }
3724
3725    return packed_penalty;
3726}
3727
3728INLINE ulong OVERLOADABLE
3729intel_sub_group_avc_mce_get_default_inter_shape_penalty(
3730    uchar slice_type,
3731    uchar qp )
3732{
3733    return __builtin_spirv_intel_sub_group_avc_mce_get_default_inter_shape_penalty_i8_i8(slice_type, qp);
3734}
3735
3736/*****************************************************************************\
3737
3738Description:
3739    Set inter shape penalty.
3740
3741    - set Mode Cost 4-8 (M2.1 : 31:0 & M2.2 : 7:0)
3742    - set NonSkipZModeAdded (M1.7 :6)
3743
3744\*****************************************************************************/
3745intel_sub_group_avc_mce_payload_t __builtin_spirv_intel_sub_group_avc_mce_set_inter_shape_penalty_i64_intel_sub_group_avc_mce_payload_t(
3746    ulong packed_shape_penalty,
3747    intel_sub_group_avc_mce_payload_t payload )
3748{
3749    uint4 handle = __builtin_IB_vme_helper_get_handle_avc_mce_payload_t(payload);
3750
3751    // set Mode Cost 4-8 (M2.1 : 31:0 & M2.2 : 7:0)
3752    handle = intel_set_message_phase_dw(handle, 2, 1, (uint)packed_shape_penalty);
3753    handle = intel_set_message_phase_ub(handle, 2, 2*4, (uint)(packed_shape_penalty >> 32));
3754
3755    // set NonSkipZModeAdded (M1.7 :6)
3756    const uint NonSkipZModeAdded = intel_get_message_phase_dw(handle, 1, 7) | 0x40;
3757    handle = intel_set_message_phase_dw(handle, 1, 7, NonSkipZModeAdded);
3758
3759    intel_sub_group_avc_mce_payload_t result = __builtin_IB_vme_helper_get_as_avc_mce_payload_t(handle);
3760    return result;
3761}
3762
3763INLINE intel_sub_group_avc_mce_payload_t OVERLOADABLE
3764intel_sub_group_avc_mce_set_inter_shape_penalty(
3765     ulong packed_shape_penalty,
3766     intel_sub_group_avc_mce_payload_t payload )
3767{
3768    return __builtin_spirv_intel_sub_group_avc_mce_set_inter_shape_penalty_i64_intel_sub_group_avc_mce_payload_t(packed_shape_penalty, payload);
3769}
3770
3771uchar __builtin_spirv_intel_sub_group_avc_mce_get_default_inter_direction_penalty_i8_i8(
3772    uchar slice_type,
3773    uchar qp )
3774{
3775    uchar penalty = 0;
3776    if( slice_type ==  CLK_AVC_ME_SLICE_TYPE_BPRED_INTEL ) {
3777        uchar penalty_table[52] = {
3778            0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
3779            0x02, 0x02, 0x02, 0x04, 0x04, 0x04, 0x04, 0x06, 0x06, 0x06, 0x08, 0x08, 0x08,
3780            0x0a, 0x0c, 0x0c, 0x0e, 0x18, 0x19, 0x1a, 0x1b, 0x1d, 0x1e, 0x28, 0x29, 0x2a,
3781            0x2c, 0x2d, 0x2f, 0x38, 0x39, 0x3a, 0x3b, 0x3d, 0x3e, 0x48, 0x49, 0x4a, 0x4b
3782        };
3783        penalty = penalty_table[qp];
3784    }
3785    else {
3786        penalty = 0;
3787    }
3788
3789    return penalty;
3790}
3791
3792INLINE uchar OVERLOADABLE
3793intel_sub_group_avc_mce_get_default_inter_direction_penalty(
3794    uchar slice_type,
3795    uchar qp )
3796{
3797    return __builtin_spirv_intel_sub_group_avc_mce_get_default_inter_direction_penalty_i8_i8(slice_type, qp);
3798}
3799
3800/*****************************************************************************\
3801
3802Description:
3803    Set multi ref penalty.
3804
3805    - set Mode 9 Cost : MODE_INTER_BWD (M2.2 : 15:8)
3806    - set NonSkipZModeAdded (M1.7 :6)
3807
3808\*****************************************************************************/
3809
3810intel_sub_group_avc_mce_payload_t __builtin_spirv_intel_sub_group_avc_mce_set_inter_direction_penalty_i8_intel_sub_group_avc_mce_payload_t(
3811    uchar direction_cost,
3812    intel_sub_group_avc_mce_payload_t payload )
3813{
3814    uint4 handle = __builtin_IB_vme_helper_get_handle_avc_mce_payload_t(payload);
3815
3816    // set Mode 9 Cost : MODE_INTER_BWD (M2.2 : 15:8)
3817    handle = intel_set_message_phase_ub(handle, 2, 2*4+1, direction_cost);
3818
3819    // set NonSkipZModeAdded (M1.7 :6)
3820    const uint NonSkipZModeAdded = intel_get_message_phase_dw(handle, 1, 7) | 0x40;
3821    handle = intel_set_message_phase_dw(handle, 1, 7, NonSkipZModeAdded);
3822
3823    intel_sub_group_avc_mce_payload_t result = __builtin_IB_vme_helper_get_as_avc_mce_payload_t(handle);
3824    return result;
3825}
3826
3827INLINE intel_sub_group_avc_mce_payload_t OVERLOADABLE
3828intel_sub_group_avc_mce_set_inter_direction_penalty(
3829     uchar direction_cost,
3830     intel_sub_group_avc_mce_payload_t payload )
3831{
3832    return __builtin_spirv_intel_sub_group_avc_mce_set_inter_direction_penalty_i8_intel_sub_group_avc_mce_payload_t(direction_cost, payload);
3833}
3834
3835uint __builtin_spirv_intel_sub_group_avc_mce_get_default_intra_luma_shape_penalty_i8_i8(
3836    uchar slice_type,
3837    uchar qp )
3838{
3839    uint packed_penalty = 0;
3840
3841    if( slice_type ==  CLK_AVC_ME_SLICE_TYPE_PRED_INTEL ) {
3842        uint packed_penalty_table[52] = {
3843            0x391e1a00, 0x391e1a00, 0x391e1a00, 0x391e1a00, 0x391e1a00, 0x391e1a00, 0x391e1a00, 0x391e1a00, 0x391e1a00, 0x391e1a00, 0x391e1a00, 0x391e1a00, 0x391e1a00,
3844            0x391e1a00, 0x391e1a00, 0x391e1a00, 0x492e2a00, 0x492e2a00, 0x492e2a00, 0x492e2a00, 0x4d3b2f00, 0x4d3b2f00, 0x4d3b2f00, 0x593e3a00, 0x593e3a00, 0x593e3a00,
3845            0x5b493d00, 0x5d4b3f00, 0x5d4b3f00, 0x5f4c4900, 0x694e4a00, 0x6a584b00, 0x6b594d00, 0x6c5a4e00, 0x6e5b5800, 0x6f5c5900, 0x795e5a00, 0x7a685b00, 0x7b695d00,
3846            0x7d6a5e00, 0x7e6b6800, 0x886d6900, 0x896e6a00, 0x8a786b00, 0x8b796d00, 0x8c7a6e00, 0x8e7b7800, 0x8f7c7900, 0x8f7e7a00, 0x8f887b00, 0x8f897d00, 0x8f8a7e00
3847        };
3848        packed_penalty = packed_penalty_table[qp];
3849    }
3850    else if( slice_type ==  CLK_AVC_ME_SLICE_TYPE_BPRED_INTEL ) {
3851        uint packed_penalty_table[52] = {
3852            0x3a2a2900, 0x3a2a2900, 0x3a2a2900, 0x3a2a2900, 0x3a2a2900, 0x3a2a2900, 0x3a2a2900, 0x3a2a2900, 0x3a2a2900, 0x3a2a2900, 0x3a2a2900, 0x3a2a2900, 0x3a2a2900,
3853            0x3a2a2900, 0x3a2a2900, 0x3a2a2900, 0x4a3a3900, 0x4a3a3900, 0x4a3a3900, 0x4a3a3900, 0x4f3f3d00, 0x4f3f3d00, 0x4f3f3d00, 0x5a4a4900, 0x5a4a4900, 0x5a4a4900,
3854            0x5d4d4b00, 0x5f4f4d00, 0x5f4f4d00, 0x69594f00, 0x6a5a5900, 0x6b5b5a00, 0x6d5d5b00, 0x6e5e5c00, 0x78685e00, 0x79695f00, 0x7a6a6900, 0x7b6b6a00, 0x7d6d6b00,
3855            0x7e6e6c00, 0x88786d00, 0x89796f00, 0x8a7a7900, 0x8b7b7a00, 0x8d7d7b00, 0x8e7e7c00, 0x8f887e00, 0x8f897f00, 0x8f8a8900, 0x8f8b8a00, 0x8f8d8b00, 0x8f8e8c00
3856        };
3857        packed_penalty = packed_penalty_table[qp];
3858    }
3859    else {
3860        uint packed_penalty_table[52] = {
3861            0x2f000000, 0x2f000000, 0x2f000000, 0x2f000000, 0x2f000000, 0x2f000000, 0x2f000000, 0x2f000000, 0x2f000000, 0x2f000000, 0x2f000000, 0x2f000000, 0x2f000000,
3862            0x2f000000, 0x2f000000, 0x2f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f020000, 0x4b030000, 0x4b030000, 0x4b030000, 0x4f050000, 0x4f050000, 0x4f050000,
3863            0x59060000, 0x5b070000, 0x5b070000, 0x5d080000, 0x5f0a0000, 0x68290000, 0x692a0000, 0x6a2b0000, 0x6c2d0000, 0x6d3b0000, 0x6f3c0000, 0x783e0000, 0x793f0000,
3864            0x7b480000, 0x7c490000, 0x7e4a0000, 0x7f4b0000, 0x884e0000, 0x89580000, 0x8b590000, 0x8c5a0000, 0x8d5d0000, 0x8f5f0000, 0x8f680000, 0x8f690000, 0x8f6a0000
3865        };
3866       packed_penalty = packed_penalty_table[qp];
3867    }
3868
3869    return packed_penalty;
3870}
3871
3872INLINE uint OVERLOADABLE
3873intel_sub_group_avc_mce_get_default_intra_luma_shape_penalty(
3874    uchar slice_type,
3875    uchar qp )
3876{
3877    return __builtin_spirv_intel_sub_group_avc_mce_get_default_intra_luma_shape_penalty_i8_i8(slice_type, qp);
3878}
3879
3880/*****************************************************************************\
3881
3882Description:
3883    Set inter shape penalty.
3884
3885    - set Mode Cost 1-3 (M2.0 : 31:8)
3886
3887\*****************************************************************************/
3888
3889intel_sub_group_avc_mce_payload_t __builtin_spirv_intel_sub_group_avc_mce_set_intra_luma_shape_penalty_i32_intel_sub_group_avc_mce_payload_t(
3890    uint packed_shape_cost,
3891    intel_sub_group_avc_mce_payload_t payload )
3892{
3893    uint4 handle = __builtin_IB_vme_helper_get_handle_avc_mce_payload_t(payload);
3894
3895    // set Mode Cost 1-3 (M2.0 : 31:8)
3896    uchar b1 = (uchar)(packed_shape_cost >> 8);
3897    uchar b2 = (uchar)(packed_shape_cost >> 16);
3898    uchar b3 = (uchar)(packed_shape_cost >> 24);
3899    handle = intel_set_message_phase_ub(handle, 2, 1, b1);
3900    handle = intel_set_message_phase_ub(handle, 2, 2, b2);
3901    handle = intel_set_message_phase_ub(handle, 2, 3, b3);
3902
3903    intel_sub_group_avc_mce_payload_t result = __builtin_IB_vme_helper_get_as_avc_mce_payload_t(handle);
3904    return result;
3905}
3906
3907INLINE intel_sub_group_avc_mce_payload_t OVERLOADABLE
3908intel_sub_group_avc_mce_set_intra_luma_shape_penalty(
3909     uint packed_shape_cost,
3910     intel_sub_group_avc_mce_payload_t payload )
3911{
3912    return __builtin_spirv_intel_sub_group_avc_mce_set_intra_luma_shape_penalty_i32_intel_sub_group_avc_mce_payload_t(packed_shape_cost, payload);
3913}
3914
3915uchar __builtin_spirv_intel_sub_group_avc_mce_get_default_intra_luma_mode_penalty_i8_i8(
3916    uchar slice_type,
3917    uchar qp )
3918{
3919    uchar penalty = 0;
3920    if( slice_type == CLK_AVC_ME_SLICE_TYPE_PRED_INTEL ||
3921        slice_type == CLK_AVC_ME_SLICE_TYPE_BPRED_INTEL ) {
3922        uchar penalty_table[52] =  {
3923            0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
3924            0x07, 0x07, 0x07, 0x0e, 0x0e, 0x0e, 0x0e, 0x1b, 0x1b, 0x1b, 0x1e, 0x1e, 0x1e,
3925            0x29, 0x2b, 0x2b, 0x2c, 0x2e, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3e, 0x48, 0x49,
3926            0x4a, 0x4b, 0x4d, 0x4e, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5e, 0x68, 0x69, 0x6a
3927        };
3928        penalty = penalty_table[qp];
3929    }
3930    else {
3931        uchar penalty_table[52] =  {
3932            0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
3933            0x0f, 0x0f, 0x18, 0x28, 0x28, 0x28, 0x28, 0x2c, 0x2c, 0x2c, 0x2d, 0x2d, 0x2d,
3934            0x38, 0x38, 0x38, 0x39, 0x3b, 0x3b, 0x3c, 0x3d, 0x3f, 0x3f, 0x49, 0x4a, 0x4b,
3935            0x4c, 0x4d, 0x4f, 0x59, 0x5a, 0x5b, 0x5c, 0x5e, 0x5c, 0x5d, 0x5f, 0x68, 0x69
3936        };
3937        penalty = penalty_table[qp];
3938    }
3939
3940    return penalty;
3941}
3942
3943INLINE uchar OVERLOADABLE
3944intel_sub_group_avc_mce_get_default_intra_luma_mode_penalty(
3945    uchar slice_type,
3946    uchar qp )
3947{
3948    return __builtin_spirv_intel_sub_group_avc_mce_get_default_intra_luma_mode_penalty_i8_i8(slice_type, qp);
3949}
3950
3951uint __builtin_spirv_intel_sub_group_avc_mce_get_default_non_dc_luma_intra_penalty( )
3952{
3953    union {
3954        struct {
3955            uchar s0, s1, s2, s3;
3956        } x;
3957        uint y;
3958    } packed_penalty;
3959
3960    packed_penalty.y = 0;
3961
3962    packed_penalty.x.s0 = 36;
3963    packed_penalty.x.s1 = 12;
3964    packed_penalty.x.s2 = 4;
3965
3966    return packed_penalty.y;
3967}
3968
3969INLINE uint OVERLOADABLE
3970intel_sub_group_avc_mce_get_default_non_dc_luma_intra_penalty( void )
3971{
3972    return __builtin_spirv_intel_sub_group_avc_mce_get_default_non_dc_luma_intra_penalty();
3973}
3974
3975/*****************************************************************************\
3976
3977Description:
3978    Set multi ref penalty.
3979
3980    - set Mode Cost 0 : MODE_INTRA_NONPRED (M2.0 : 7:0)
3981    - set IntraMxMPredMode (M6.4)
3982    - set NonDCPredMode (M6.7)
3983
3984\*****************************************************************************/
3985
3986intel_sub_group_avc_mce_payload_t __builtin_spirv_intel_sub_group_avc_mce_set_intra_luma_mode_cost_function_i8_i32_i32_intel_sub_group_avc_mce_payload_t(
3987    uchar luma_mode_penalty,
3988    uint luma_packed_neighbor_modes,
3989    uint luma_packed_non_dc_penalty,
3990    intel_sub_group_avc_mce_payload_t payload )
3991{
3992    uint4 handle = __builtin_IB_vme_helper_get_handle_avc_mce_payload_t(payload);
3993
3994    // set Mode Cost 0 : MODE_INTRA_NONPRED (M2.0 : 7:0)
3995    handle = intel_set_message_phase_ub(handle, 2, 0, luma_mode_penalty);
3996
3997    // set IntraMxMPredMode (M6.4)
3998    handle = intel_set_message_phase_dw(handle, 6, 4, luma_packed_neighbor_modes);
3999
4000    // set NonDCPredMode (M6.7)
4001    handle = intel_set_message_phase_dw(handle, 6, 7, luma_packed_non_dc_penalty);
4002
4003    intel_sub_group_avc_mce_payload_t result = __builtin_IB_vme_helper_get_as_avc_mce_payload_t(handle);
4004    return result;
4005}
4006
4007INLINE intel_sub_group_avc_mce_payload_t OVERLOADABLE
4008intel_sub_group_avc_mce_set_intra_luma_mode_cost_function(
4009     uchar luma_mode_penalty,
4010     uint luma_packed_neighbor_modes,
4011     uint luma_packed_non_dc_penalty,
4012     intel_sub_group_avc_mce_payload_t payload )
4013{
4014    return __builtin_spirv_intel_sub_group_avc_mce_set_intra_luma_mode_cost_function_i8_i32_i32_intel_sub_group_avc_mce_payload_t(luma_mode_penalty, luma_packed_neighbor_modes, luma_packed_non_dc_penalty, payload);
4015}
4016
4017uchar __builtin_spirv_intel_sub_group_avc_mce_get_default_intra_chroma_mode_base_penalty( )
4018{
4019    return 0;
4020}
4021
4022INLINE uchar OVERLOADABLE
4023intel_sub_group_avc_mce_get_default_intra_chroma_mode_base_penalty( void )
4024{
4025    return __builtin_spirv_intel_sub_group_avc_mce_get_default_intra_chroma_mode_base_penalty();
4026}
4027
4028/*****************************************************************************\
4029
4030Description:
4031    Set inter shape penalty.
4032
4033    - set Chroma Intra Mode Cost (M2.2 : 31:24)
4034
4035\*****************************************************************************/
4036
4037intel_sub_group_avc_mce_payload_t __builtin_spirv_intel_sub_group_avc_mce_set_intra_chroma_mode_cost_function_i8_intel_sub_group_avc_mce_payload_t(
4038    uchar chroma_mode_base_penalty,
4039    intel_sub_group_avc_mce_payload_t payload )
4040{
4041    uint4 handle = __builtin_IB_vme_helper_get_handle_avc_mce_payload_t(payload);
4042
4043    // set Chroma Intra Mode Cost (M2.2 : 31:24)
4044    handle = intel_set_message_phase_ub(handle, 2, 2*4+3, chroma_mode_base_penalty);
4045
4046    intel_sub_group_avc_mce_payload_t result = __builtin_IB_vme_helper_get_as_avc_mce_payload_t(handle);
4047    return result;
4048}
4049
4050INLINE intel_sub_group_avc_mce_payload_t OVERLOADABLE
4051intel_sub_group_avc_mce_set_intra_chroma_mode_cost_function(
4052     uchar chroma_mode_base_penalty,
4053     intel_sub_group_avc_mce_payload_t payload )
4054{
4055    return __builtin_spirv_intel_sub_group_avc_mce_set_intra_chroma_mode_cost_function_i8_intel_sub_group_avc_mce_payload_t(chroma_mode_base_penalty, payload);
4056}
4057
4058/*****************************************************************************\
4059
4060Description:
4061    Get the inter reference ids (W6.0 :31:0) from IME result payload.
4062
4063\*****************************************************************************/
4064
4065uint __builtin_spirv_intel_sub_group_avc_mce_get_inter_reference_ids_intel_sub_group_avc_mce_result_t(
4066    intel_sub_group_avc_mce_result_t result )
4067{
4068    uint4 handle = __builtin_IB_vme_helper_get_handle_avc_mce_result_t(result);
4069    return intel_get_message_phase_dw(handle, 6, 0);
4070}
4071
4072INLINE uint OVERLOADABLE
4073intel_sub_group_avc_mce_get_inter_reference_ids(
4074    intel_sub_group_avc_mce_result_t result)
4075{
4076    return __builtin_spirv_intel_sub_group_avc_mce_get_inter_reference_ids_intel_sub_group_avc_mce_result_t(result);
4077}
4078
4079uchar __builtin_spirv_intel_sub_group_avc_mce_get_inter_reference_interlaced_field_polarities_i32_i32_intel_sub_group_avc_mce_result_t(
4080    uint packed_reference_ids,
4081    uint packed_reference_parameter_field_polarities,
4082    intel_sub_group_avc_mce_result_t result )
4083{
4084    uchar fwd_polarity = 0;
4085
4086    for( uchar i = 0; i < 4; i++ ) {
4087        uchar blk_reference_id = ( packed_reference_ids >> ( i * 8 ) ) & 0xF;
4088        uchar blk_bit_offset = blk_reference_id * 2;
4089        uchar blk_polarity = ( packed_reference_parameter_field_polarities >> blk_bit_offset ) & 0x1;
4090        fwd_polarity |= ( blk_polarity << i );
4091    }
4092
4093    uchar bwd_polarity = 0;
4094
4095    packed_reference_ids = ( packed_reference_ids >> 4 );
4096
4097    for( uchar i = 0; i < 4; i++ ) {
4098        uchar blk_reference_id = ( packed_reference_ids >> ( i * 8 ) ) & 0xF;
4099        uchar blk_bit_offset = blk_reference_id * 2 + 1;
4100        uchar blk_polarity = ( packed_reference_parameter_field_polarities >> blk_bit_offset ) & 0x1;
4101        bwd_polarity |= ( blk_polarity << i );
4102    }
4103
4104    uchar field_polarities = ( fwd_polarity << 0 ) | ( bwd_polarity << 4 );
4105
4106    return field_polarities;
4107}
4108
4109INLINE uchar OVERLOADABLE
4110intel_sub_group_avc_mce_get_inter_reference_interlaced_field_polarities(
4111    uint packed_reference_ids,
4112    uint  packed_reference_parameter_field_polarities,
4113    intel_sub_group_avc_mce_result_t  result )
4114{
4115    return __builtin_spirv_intel_sub_group_avc_mce_get_inter_reference_interlaced_field_polarities_i32_i32_intel_sub_group_avc_mce_result_t(packed_reference_ids, packed_reference_parameter_field_polarities, result);
4116}
4117
4118intel_sub_group_avc_ime_payload_t __builtin_spirv_intel_sub_group_avc_ime_set_inter_base_multi_reference_penalty_i8_intel_sub_group_avc_ime_payload_t(
4119    uchar reference_base_penalty,
4120    intel_sub_group_avc_ime_payload_t payload )
4121{
4122    intel_sub_group_avc_mce_payload_t mpayload =
4123        intel_sub_group_avc_ime_convert_to_mce_payload( payload );
4124    mpayload =
4125      intel_sub_group_avc_mce_set_inter_base_multi_reference_penalty(
4126          reference_base_penalty,
4127          mpayload );
4128    return intel_sub_group_avc_mce_convert_to_ime_payload( mpayload );
4129}
4130
4131INLINE intel_sub_group_avc_ime_payload_t OVERLOADABLE
4132intel_sub_group_avc_ime_set_inter_base_multi_reference_penalty(
4133    uchar reference_base_penalty,
4134    intel_sub_group_avc_ime_payload_t payload )
4135{
4136    return __builtin_spirv_intel_sub_group_avc_ime_set_inter_base_multi_reference_penalty_i8_intel_sub_group_avc_ime_payload_t(reference_base_penalty, payload);
4137}
4138
4139intel_sub_group_avc_ime_payload_t __builtin_spirv_intel_sub_group_avc_ime_set_inter_shape_penalty_i64_intel_sub_group_avc_ime_payload_t(
4140    ulong packed_shape_cost,
4141    intel_sub_group_avc_ime_payload_t payload )
4142{
4143    intel_sub_group_avc_mce_payload_t mpayload =
4144        intel_sub_group_avc_ime_convert_to_mce_payload( payload );
4145    mpayload =
4146      intel_sub_group_avc_mce_set_inter_shape_penalty(
4147          packed_shape_cost,
4148          mpayload );
4149    return intel_sub_group_avc_mce_convert_to_ime_payload( mpayload );
4150}
4151
4152INLINE intel_sub_group_avc_ime_payload_t OVERLOADABLE
4153intel_sub_group_avc_ime_set_inter_shape_penalty(
4154     ulong packed_shape_cost,
4155     intel_sub_group_avc_ime_payload_t payload )
4156{
4157    return __builtin_spirv_intel_sub_group_avc_ime_set_inter_shape_penalty_i64_intel_sub_group_avc_ime_payload_t(packed_shape_cost, payload);
4158}
4159
4160intel_sub_group_avc_ime_payload_t __builtin_spirv_intel_sub_group_avc_ime_set_inter_direction_penalty_i8_intel_sub_group_avc_ime_payload_t(
4161    uchar direction_cost,
4162    intel_sub_group_avc_ime_payload_t payload )
4163{
4164    intel_sub_group_avc_mce_payload_t mpayload =
4165        intel_sub_group_avc_ime_convert_to_mce_payload( payload );
4166    mpayload =
4167      intel_sub_group_avc_mce_set_inter_direction_penalty(
4168          direction_cost,
4169          mpayload );
4170    return intel_sub_group_avc_mce_convert_to_ime_payload( mpayload );
4171}
4172
4173INLINE intel_sub_group_avc_ime_payload_t OVERLOADABLE
4174intel_sub_group_avc_ime_set_inter_direction_penalty(
4175     uchar direction_cost,
4176     intel_sub_group_avc_ime_payload_t payload )
4177{
4178    return __builtin_spirv_intel_sub_group_avc_ime_set_inter_direction_penalty_i8_intel_sub_group_avc_ime_payload_t(direction_cost, payload);
4179}
4180
4181uint __builtin_spirv_intel_sub_group_avc_ime_get_inter_reference_ids_intel_sub_group_avc_ime_result_t(
4182    intel_sub_group_avc_ime_result_t result )
4183{
4184    intel_sub_group_avc_mce_result_t mresult =
4185      intel_sub_group_avc_ime_convert_to_mce_result( result );
4186    return intel_sub_group_avc_mce_get_inter_reference_ids( mresult );
4187}
4188
4189INLINE uint OVERLOADABLE
4190intel_sub_group_avc_ime_get_inter_reference_ids(
4191    intel_sub_group_avc_ime_result_t  result )
4192{
4193    return __builtin_spirv_intel_sub_group_avc_ime_get_inter_reference_ids_intel_sub_group_avc_ime_result_t(result);
4194}
4195
4196intel_sub_group_avc_ref_payload_t __builtin_spirv_intel_sub_group_avc_ref_set_inter_base_multi_reference_penalty_i8_intel_sub_group_avc_ref_payload_t(
4197    uchar reference_base_penalty,
4198    intel_sub_group_avc_ref_payload_t payload )
4199{
4200    intel_sub_group_avc_mce_payload_t mpayload =
4201        intel_sub_group_avc_ref_convert_to_mce_payload( payload );
4202    mpayload =
4203      intel_sub_group_avc_mce_set_inter_base_multi_reference_penalty(
4204          reference_base_penalty,
4205          mpayload );
4206    return intel_sub_group_avc_mce_convert_to_ref_payload( mpayload );
4207}
4208
4209INLINE intel_sub_group_avc_ref_payload_t OVERLOADABLE
4210intel_sub_group_avc_ref_set_inter_base_multi_reference_penalty(
4211    uchar reference_base_penalty,
4212    intel_sub_group_avc_ref_payload_t payload )
4213{
4214    return __builtin_spirv_intel_sub_group_avc_ref_set_inter_base_multi_reference_penalty_i8_intel_sub_group_avc_ref_payload_t(reference_base_penalty, payload);
4215}
4216
4217intel_sub_group_avc_ref_payload_t __builtin_spirv_intel_sub_group_avc_ref_set_inter_shape_penalty_i64_intel_sub_group_avc_ref_payload_t(
4218    ulong packed_shape_cost,
4219    intel_sub_group_avc_ref_payload_t payload )
4220{
4221    intel_sub_group_avc_mce_payload_t mpayload =
4222        intel_sub_group_avc_ref_convert_to_mce_payload( payload );
4223    mpayload =
4224      intel_sub_group_avc_mce_set_inter_shape_penalty(
4225          packed_shape_cost,
4226          mpayload );
4227    return intel_sub_group_avc_mce_convert_to_ref_payload( mpayload );
4228}
4229
4230INLINE intel_sub_group_avc_ref_payload_t OVERLOADABLE
4231intel_sub_group_avc_ref_set_inter_shape_penalty(
4232     ulong packed_shape_cost,
4233     intel_sub_group_avc_ref_payload_t payload )
4234{
4235    return __builtin_spirv_intel_sub_group_avc_ref_set_inter_shape_penalty_i64_intel_sub_group_avc_ref_payload_t(packed_shape_cost, payload);
4236}
4237
4238intel_sub_group_avc_ref_payload_t __builtin_spirv_intel_sub_group_avc_ref_set_inter_direction_penalty_i8_intel_sub_group_avc_ref_payload_t(
4239    uchar direction_cost,
4240    intel_sub_group_avc_ref_payload_t payload )
4241{
4242    intel_sub_group_avc_mce_payload_t mpayload =
4243        intel_sub_group_avc_ref_convert_to_mce_payload( payload );
4244    mpayload =
4245      intel_sub_group_avc_mce_set_inter_direction_penalty(
4246          direction_cost,
4247          mpayload );
4248    return intel_sub_group_avc_mce_convert_to_ref_payload( mpayload );
4249}
4250
4251INLINE intel_sub_group_avc_ref_payload_t OVERLOADABLE
4252intel_sub_group_avc_ref_set_inter_direction_penalty(
4253     uchar direction_cost,
4254     intel_sub_group_avc_ref_payload_t payload )
4255{
4256    return __builtin_spirv_intel_sub_group_avc_ref_set_inter_direction_penalty_i8_intel_sub_group_avc_ref_payload_t(direction_cost, payload);
4257}
4258
4259intel_sub_group_avc_ref_result_t __builtin_spirv_intel_sub_group_avc_ref_evaluate_with_multi_reference_v3i64_i32_intel_sub_group_avc_ref_payload_t(
4260    VMEImage_t src_image_vme,
4261    uint packed_reference_ids,
4262    intel_sub_group_avc_ref_payload_t payload )
4263{
4264    long src_image = getVMEImage(src_image_vme);
4265    long vme_accelerator = getVMESampler(src_image_vme);
4266
4267    uint4 handle = __builtin_IB_vme_helper_get_handle_avc_ref_payload_t(payload);
4268    handle = intel_set_message_phase_dw(handle, 1, 6, packed_reference_ids);
4269
4270    uint4 res = __builtin_IB_vme_send_fbr_new(handle, src_image, src_image, src_image, vme_accelerator);
4271
4272    intel_sub_group_avc_ref_result_t result = __builtin_IB_vme_helper_get_as_avc_ref_result_t(res);
4273    return result;
4274}
4275
4276INLINE intel_sub_group_avc_ref_result_t OVERLOADABLE
4277intel_sub_group_avc_ref_evaluate_with_multi_reference(
4278      read_only image2d_t src_image,
4279      uint packed_reference_ids,
4280      sampler_t vme_accelerator,
4281      intel_sub_group_avc_ref_payload_t payload )
4282{
4283    long src_image_id = (long)__builtin_astype(src_image, void*);
4284    long vme_accelerator_id = (long)__builtin_astype(vme_accelerator, void*);
4285    VMEImage_t src_image_vme = __builtin_spirv_OpVmeImageINTEL_i64_i64_i64(src_image_id, 0, vme_accelerator_id);
4286
4287    return __builtin_spirv_intel_sub_group_avc_ref_evaluate_with_multi_reference_v3i64_i32_intel_sub_group_avc_ref_payload_t(src_image_vme, packed_reference_ids, payload);
4288}
4289
4290intel_sub_group_avc_ref_result_t __builtin_spirv_intel_sub_group_avc_ref_evaluate_with_multi_reference_v3i64_i32_i8_intel_sub_group_avc_ref_payload_t(
4291    VMEImage_t src_image_vme,
4292    uint packed_reference_ids,
4293    uchar packed_reference_field_polarities,
4294    intel_sub_group_avc_ref_payload_t payload )
4295{
4296    long src_image = getVMEImage(src_image_vme);
4297    long vme_accelerator = getVMESampler(src_image_vme);
4298
4299    uint4 handle = __builtin_IB_vme_helper_get_handle_avc_ref_payload_t(payload);
4300    handle = intel_sub_group_payload_set_ref_id_raw(
4301        packed_reference_ids,
4302        handle);
4303    handle = intel_sub_group_payload_set_ref_id_polarities_raw(
4304        packed_reference_field_polarities,
4305        handle);
4306
4307    uint4 res = __builtin_IB_vme_send_fbr_new(handle, src_image, src_image, src_image, vme_accelerator);
4308
4309    intel_sub_group_avc_ref_result_t result = __builtin_IB_vme_helper_get_as_avc_ref_result_t(res);
4310    return result;
4311}
4312
4313INLINE intel_sub_group_avc_ref_result_t OVERLOADABLE
4314intel_sub_group_avc_ref_evaluate_with_multi_reference(
4315      read_only image2d_t src_image,
4316      uint packed_reference_ids,
4317      uchar packed_reference_field_polarities,
4318      sampler_t vme_accelerator,
4319      intel_sub_group_avc_ref_payload_t payload )
4320{
4321    long src_image_id = (long)__builtin_astype(src_image, void*);
4322    long vme_accelerator_id = (long)__builtin_astype(vme_accelerator, void*);
4323    VMEImage_t src_image_vme = __builtin_spirv_OpVmeImageINTEL_i64_i64_i64(src_image_id, 0, vme_accelerator_id);
4324
4325    return __builtin_spirv_intel_sub_group_avc_ref_evaluate_with_multi_reference_v3i64_i32_i8_intel_sub_group_avc_ref_payload_t(src_image_vme, packed_reference_ids, packed_reference_field_polarities, payload);
4326}
4327
4328uint __builtin_spirv_intel_sub_group_avc_ref_get_inter_reference_ids_intel_sub_group_avc_ref_result_t(
4329    intel_sub_group_avc_ref_result_t result )
4330{
4331    intel_sub_group_avc_mce_result_t mresult =
4332      intel_sub_group_avc_ref_convert_to_mce_result( result );
4333    return intel_sub_group_avc_mce_get_inter_reference_ids( mresult );
4334}
4335
4336INLINE uint OVERLOADABLE
4337intel_sub_group_avc_ref_get_inter_reference_ids(
4338    intel_sub_group_avc_ref_result_t  result )
4339{
4340    return __builtin_spirv_intel_sub_group_avc_ref_get_inter_reference_ids_intel_sub_group_avc_ref_result_t(result);
4341}
4342
4343uchar __builtin_spirv_intel_sub_group_avc_ime_get_inter_reference_interlaced_field_polarities_i32_i32_intel_sub_group_avc_ime_result_t(
4344    uint packed_reference_ids,
4345    uint packed_reference_parameter_field_polarities,
4346    intel_sub_group_avc_ime_result_t result )
4347{
4348    intel_sub_group_avc_mce_result_t mresult =
4349      intel_sub_group_avc_ime_convert_to_mce_result( result );
4350    return intel_sub_group_avc_mce_get_inter_reference_interlaced_field_polarities( packed_reference_ids, packed_reference_parameter_field_polarities, mresult );
4351}
4352
4353INLINE uchar OVERLOADABLE
4354intel_sub_group_avc_ime_get_inter_reference_interlaced_field_polarities(
4355    uint packed_reference_ids,
4356    uint  packed_reference_parameter_field_polarities,
4357    intel_sub_group_avc_ime_result_t  result )
4358{
4359    return __builtin_spirv_intel_sub_group_avc_ime_get_inter_reference_interlaced_field_polarities_i32_i32_intel_sub_group_avc_ime_result_t(packed_reference_ids, packed_reference_parameter_field_polarities, result);
4360}
4361
4362uchar __builtin_spirv_intel_sub_group_avc_ref_get_inter_reference_interlaced_field_polarities_i32_i32_intel_sub_group_avc_ref_result_t(
4363    uint packed_reference_ids,
4364    uint packed_reference_parameter_field_polarities,
4365    intel_sub_group_avc_ref_result_t result )
4366{
4367    intel_sub_group_avc_mce_result_t mresult =
4368      intel_sub_group_avc_ref_convert_to_mce_result( result );
4369    return intel_sub_group_avc_mce_get_inter_reference_interlaced_field_polarities( packed_reference_ids, packed_reference_parameter_field_polarities, mresult );
4370}
4371
4372INLINE uchar OVERLOADABLE
4373intel_sub_group_avc_ref_get_inter_reference_interlaced_field_polarities(
4374    uint packed_reference_ids,
4375    uint packed_reference_parameter_field_polarities,
4376    intel_sub_group_avc_ref_result_t  result )
4377{
4378    return __builtin_spirv_intel_sub_group_avc_ref_get_inter_reference_interlaced_field_polarities_i32_i32_intel_sub_group_avc_ref_result_t(packed_reference_ids, packed_reference_parameter_field_polarities, result);
4379}
4380
4381intel_sub_group_avc_sic_payload_t __builtin_spirv_intel_sub_group_avc_sic_set_inter_base_multi_reference_penalty_i8_intel_sub_group_avc_sic_payload_t(
4382    uchar reference_base_penalty,
4383    intel_sub_group_avc_sic_payload_t payload )
4384{
4385    intel_sub_group_avc_mce_payload_t mpayload =
4386        intel_sub_group_avc_sic_convert_to_mce_payload( payload );
4387    mpayload =
4388      intel_sub_group_avc_mce_set_inter_base_multi_reference_penalty(
4389          reference_base_penalty,
4390          mpayload );
4391    return intel_sub_group_avc_mce_convert_to_sic_payload( mpayload );
4392}
4393
4394INLINE intel_sub_group_avc_sic_payload_t OVERLOADABLE
4395intel_sub_group_avc_sic_set_inter_base_multi_reference_penalty(
4396    uchar reference_base_penalty,
4397    intel_sub_group_avc_sic_payload_t payload )
4398{
4399    return __builtin_spirv_intel_sub_group_avc_sic_set_inter_base_multi_reference_penalty_i8_intel_sub_group_avc_sic_payload_t(reference_base_penalty, payload);
4400}
4401
4402intel_sub_group_avc_sic_payload_t __builtin_spirv_intel_sub_group_avc_sic_set_inter_shape_penalty_i64_intel_sub_group_avc_sic_payload_t(
4403    ulong packed_shape_cost,
4404    intel_sub_group_avc_sic_payload_t payload )
4405{
4406    intel_sub_group_avc_mce_payload_t mpayload =
4407        intel_sub_group_avc_sic_convert_to_mce_payload( payload );
4408    mpayload =
4409      intel_sub_group_avc_mce_set_inter_shape_penalty(
4410          packed_shape_cost,
4411          mpayload );
4412    return intel_sub_group_avc_mce_convert_to_sic_payload( mpayload );
4413}
4414
4415INLINE intel_sub_group_avc_sic_payload_t OVERLOADABLE
4416intel_sub_group_avc_sic_set_inter_shape_penalty(
4417     ulong packed_shape_cost,
4418     intel_sub_group_avc_sic_payload_t payload )
4419{
4420    return __builtin_spirv_intel_sub_group_avc_sic_set_inter_shape_penalty_i64_intel_sub_group_avc_sic_payload_t(packed_shape_cost, payload);
4421}
4422
4423intel_sub_group_avc_sic_payload_t __builtin_spirv_intel_sub_group_avc_sic_set_inter_direction_penalty_i8_intel_sub_group_avc_sic_payload_t(
4424    uchar direction_cost,
4425    intel_sub_group_avc_sic_payload_t payload )
4426{
4427    intel_sub_group_avc_mce_payload_t mpayload =
4428        intel_sub_group_avc_sic_convert_to_mce_payload( payload );
4429    mpayload =
4430      intel_sub_group_avc_mce_set_inter_direction_penalty(
4431          direction_cost,
4432          mpayload );
4433    return intel_sub_group_avc_mce_convert_to_sic_payload( mpayload );
4434}
4435
4436INLINE intel_sub_group_avc_sic_payload_t OVERLOADABLE
4437intel_sub_group_avc_sic_set_inter_direction_penalty(
4438     uchar direction_cost,
4439     intel_sub_group_avc_sic_payload_t payload )
4440{
4441    return __builtin_spirv_intel_sub_group_avc_sic_set_inter_direction_penalty_i8_intel_sub_group_avc_sic_payload_t(direction_cost, payload);
4442}
4443
4444intel_sub_group_avc_sic_payload_t __builtin_spirv_intel_sub_group_avc_sic_set_intra_luma_shape_penalty_i32_intel_sub_group_avc_sic_payload_t(
4445    uint packed_shape_cost,
4446    intel_sub_group_avc_sic_payload_t payload )
4447{
4448    intel_sub_group_avc_mce_payload_t mpayload =
4449        intel_sub_group_avc_sic_convert_to_mce_payload( payload );
4450    mpayload =
4451      intel_sub_group_avc_mce_set_intra_luma_shape_penalty(
4452          packed_shape_cost,
4453          mpayload );
4454    return intel_sub_group_avc_mce_convert_to_sic_payload( mpayload );
4455}
4456
4457INLINE intel_sub_group_avc_sic_payload_t OVERLOADABLE
4458intel_sub_group_avc_sic_set_intra_luma_shape_penalty(
4459     uint packed_shape_cost,
4460     intel_sub_group_avc_sic_payload_t payload )
4461{
4462    return __builtin_spirv_intel_sub_group_avc_sic_set_intra_luma_shape_penalty_i32_intel_sub_group_avc_sic_payload_t(packed_shape_cost, payload);
4463}
4464
4465intel_sub_group_avc_sic_payload_t __builtin_spirv_intel_sub_group_avc_sic_set_intra_luma_mode_cost_function_i8_i32_i32_intel_sub_group_avc_sic_payload_t(
4466    uchar luma_mode_penalty,
4467    uint luma_packed_neighbor_modes,
4468    uint luma_packed_non_dc_penalty,
4469    intel_sub_group_avc_sic_payload_t payload )
4470{
4471    intel_sub_group_avc_mce_payload_t mpayload =
4472        intel_sub_group_avc_sic_convert_to_mce_payload( payload );
4473    mpayload =
4474      intel_sub_group_avc_mce_set_intra_luma_mode_cost_function(
4475          luma_mode_penalty,
4476          luma_packed_neighbor_modes,
4477          luma_packed_non_dc_penalty,
4478          mpayload );
4479    return intel_sub_group_avc_mce_convert_to_sic_payload( mpayload );
4480}
4481
4482INLINE intel_sub_group_avc_sic_payload_t OVERLOADABLE
4483intel_sub_group_avc_sic_set_intra_luma_mode_cost_function(
4484     uchar luma_mode_penalty,
4485     uint luma_packed_neighbor_modes,
4486     uint luma_packed_non_dc_penalty,
4487     intel_sub_group_avc_sic_payload_t payload )
4488{
4489    return __builtin_spirv_intel_sub_group_avc_sic_set_intra_luma_mode_cost_function_i8_i32_i32_intel_sub_group_avc_sic_payload_t(luma_mode_penalty, luma_packed_neighbor_modes, luma_packed_non_dc_penalty, payload);
4490}
4491
4492intel_sub_group_avc_sic_payload_t __builtin_spirv_intel_sub_group_avc_sic_set_intra_chroma_mode_cost_function_i8_intel_sub_group_avc_sic_payload_t(
4493    uchar chroma_mode_penalty,
4494    intel_sub_group_avc_sic_payload_t payload )
4495{
4496    intel_sub_group_avc_mce_payload_t mpayload =
4497        intel_sub_group_avc_sic_convert_to_mce_payload( payload );
4498    mpayload =
4499      intel_sub_group_avc_mce_set_intra_chroma_mode_cost_function(
4500          chroma_mode_penalty,
4501          mpayload );
4502    return intel_sub_group_avc_mce_convert_to_sic_payload( mpayload );
4503}
4504
4505INLINE intel_sub_group_avc_sic_payload_t OVERLOADABLE
4506intel_sub_group_avc_sic_set_intra_chroma_mode_cost_function(
4507     uchar chroma_mode_penalty,
4508     intel_sub_group_avc_sic_payload_t payload )
4509{
4510    return __builtin_spirv_intel_sub_group_avc_sic_set_intra_chroma_mode_cost_function_i8_intel_sub_group_avc_sic_payload_t(chroma_mode_penalty, payload);
4511}
4512
4513////
4514
4515intel_sub_group_avc_ime_payload_t __builtin_spirv_intel_sub_group_avc_ime_set_source_interlaced_field_polarity_i8_intel_sub_group_avc_ime_payload_t(
4516    uchar src_field_polarity,
4517    intel_sub_group_avc_ime_payload_t payload )
4518{
4519      intel_sub_group_avc_mce_payload_t mpayload =
4520          intel_sub_group_avc_ime_convert_to_mce_payload( payload );
4521      mpayload =
4522        intel_sub_group_avc_mce_set_source_interlaced_field_polarity(
4523            src_field_polarity,
4524            mpayload );
4525      return intel_sub_group_avc_mce_convert_to_ime_payload( mpayload );
4526}
4527
4528INLINE intel_sub_group_avc_ime_payload_t OVERLOADABLE
4529intel_sub_group_avc_ime_set_source_interlaced_field_polarity(
4530     uchar  src_field_polarity,
4531     intel_sub_group_avc_ime_payload_t payload )
4532{
4533    return __builtin_spirv_intel_sub_group_avc_ime_set_source_interlaced_field_polarity_i8_intel_sub_group_avc_ime_payload_t(src_field_polarity, payload);
4534}
4535
4536intel_sub_group_avc_ref_payload_t __builtin_spirv_intel_sub_group_avc_ref_set_source_interlaced_field_polarity_i8_intel_sub_group_avc_ref_payload_t(
4537    uchar src_field_polarity,
4538    intel_sub_group_avc_ref_payload_t payload )
4539{
4540      intel_sub_group_avc_mce_payload_t mpayload =
4541          intel_sub_group_avc_ref_convert_to_mce_payload( payload );
4542      mpayload =
4543        intel_sub_group_avc_mce_set_source_interlaced_field_polarity(
4544            src_field_polarity,
4545            mpayload );
4546      return intel_sub_group_avc_mce_convert_to_ref_payload( mpayload );
4547}
4548
4549INLINE intel_sub_group_avc_ref_payload_t OVERLOADABLE
4550intel_sub_group_avc_ref_set_source_interlaced_field_polarity(
4551     uchar  src_field_polarity,
4552     intel_sub_group_avc_ref_payload_t payload )
4553{
4554    return __builtin_spirv_intel_sub_group_avc_ref_set_source_interlaced_field_polarity_i8_intel_sub_group_avc_ref_payload_t(src_field_polarity, payload);
4555}
4556
4557intel_sub_group_avc_sic_payload_t __builtin_spirv_intel_sub_group_avc_sic_set_source_interlaced_field_polarity_i8_intel_sub_group_avc_sic_payload_t(
4558    uchar src_field_polarity,
4559    intel_sub_group_avc_sic_payload_t payload )
4560{
4561      intel_sub_group_avc_mce_payload_t mpayload =
4562          intel_sub_group_avc_sic_convert_to_mce_payload( payload );
4563      mpayload =
4564        intel_sub_group_avc_mce_set_source_interlaced_field_polarity(
4565            src_field_polarity,
4566            mpayload );
4567      return intel_sub_group_avc_mce_convert_to_sic_payload( mpayload );
4568}
4569
4570INLINE intel_sub_group_avc_sic_payload_t OVERLOADABLE
4571intel_sub_group_avc_sic_set_source_interlaced_field_polarity(
4572     uchar  src_field_polarity,
4573     intel_sub_group_avc_sic_payload_t payload )
4574{
4575    return __builtin_spirv_intel_sub_group_avc_sic_set_source_interlaced_field_polarity_i8_intel_sub_group_avc_sic_payload_t(src_field_polarity, payload);
4576}
4577
4578intel_sub_group_avc_ime_payload_t __builtin_spirv_intel_sub_group_avc_ime_set_single_reference_interlaced_field_polarity_i8_intel_sub_group_avc_ime_payload_t(
4579    uchar ref_field_polarity,
4580    intel_sub_group_avc_ime_payload_t payload )
4581{
4582      intel_sub_group_avc_mce_payload_t mpayload =
4583          intel_sub_group_avc_ime_convert_to_mce_payload( payload );
4584      mpayload =
4585        intel_sub_group_avc_mce_set_single_reference_interlaced_field_polarity(
4586            ref_field_polarity,
4587            mpayload );
4588      return intel_sub_group_avc_mce_convert_to_ime_payload( mpayload );
4589}
4590
4591INLINE intel_sub_group_avc_ime_payload_t OVERLOADABLE
4592intel_sub_group_avc_ime_set_single_reference_interlaced_field_polarity(
4593     uchar  ref_field_polarity,
4594     intel_sub_group_avc_ime_payload_t payload )
4595{
4596    return __builtin_spirv_intel_sub_group_avc_ime_set_single_reference_interlaced_field_polarity_i8_intel_sub_group_avc_ime_payload_t(ref_field_polarity, payload);
4597}
4598
4599intel_sub_group_avc_ref_payload_t __builtin_spirv_intel_sub_group_avc_ref_set_single_reference_interlaced_field_polarity_i8_intel_sub_group_avc_ref_payload_t(
4600    uchar ref_field_polarity,
4601    intel_sub_group_avc_ref_payload_t payload )
4602{
4603      intel_sub_group_avc_mce_payload_t mpayload =
4604          intel_sub_group_avc_ref_convert_to_mce_payload( payload );
4605      mpayload =
4606        intel_sub_group_avc_mce_set_single_reference_interlaced_field_polarity(
4607            ref_field_polarity,
4608            mpayload );
4609      return intel_sub_group_avc_mce_convert_to_ref_payload( mpayload );
4610}
4611
4612INLINE intel_sub_group_avc_ref_payload_t OVERLOADABLE
4613intel_sub_group_avc_ref_set_single_reference_interlaced_field_polarity(
4614     uchar  ref_field_polarity,
4615     intel_sub_group_avc_ref_payload_t payload )
4616{
4617    return __builtin_spirv_intel_sub_group_avc_ref_set_single_reference_interlaced_field_polarity_i8_intel_sub_group_avc_ref_payload_t(ref_field_polarity, payload);
4618}
4619
4620intel_sub_group_avc_sic_payload_t __builtin_spirv_intel_sub_group_avc_sic_set_single_reference_interlaced_field_polarity_i8_intel_sub_group_avc_sic_payload_t(
4621    uchar ref_field_polarity,
4622    intel_sub_group_avc_sic_payload_t payload )
4623{
4624      intel_sub_group_avc_mce_payload_t mpayload =
4625          intel_sub_group_avc_sic_convert_to_mce_payload( payload );
4626      mpayload =
4627        intel_sub_group_avc_mce_set_single_reference_interlaced_field_polarity(
4628            ref_field_polarity,
4629            mpayload );
4630      return intel_sub_group_avc_mce_convert_to_sic_payload( mpayload );
4631}
4632
4633INLINE intel_sub_group_avc_sic_payload_t OVERLOADABLE
4634intel_sub_group_avc_sic_set_single_reference_interlaced_field_polarity(
4635     uchar  ref_field_polarity,
4636     intel_sub_group_avc_sic_payload_t payload )
4637{
4638    return __builtin_spirv_intel_sub_group_avc_sic_set_single_reference_interlaced_field_polarity_i8_intel_sub_group_avc_sic_payload_t(ref_field_polarity, payload);
4639}
4640
4641intel_sub_group_avc_ime_payload_t __builtin_spirv_intel_sub_group_avc_ime_set_dual_reference_interlaced_field_polarities_i8_i8_intel_sub_group_avc_ime_payload_t(
4642    uchar fwd_ref_field_polarity,
4643    uchar bwd_ref_field_polarity,
4644    intel_sub_group_avc_ime_payload_t payload )
4645{
4646      intel_sub_group_avc_mce_payload_t mpayload =
4647          intel_sub_group_avc_ime_convert_to_mce_payload( payload );
4648      mpayload =
4649        intel_sub_group_avc_mce_set_dual_reference_interlaced_field_polarities(
4650            fwd_ref_field_polarity, bwd_ref_field_polarity,
4651            mpayload );
4652      return intel_sub_group_avc_mce_convert_to_ime_payload( mpayload );
4653}
4654
4655INLINE intel_sub_group_avc_ime_payload_t OVERLOADABLE
4656intel_sub_group_avc_ime_set_dual_reference_interlaced_field_polarities(
4657     uchar  fwd_ref_field_polarity,
4658     uchar  bwd_ref_field_polarity,
4659     intel_sub_group_avc_ime_payload_t payload )
4660{
4661    return __builtin_spirv_intel_sub_group_avc_ime_set_dual_reference_interlaced_field_polarities_i8_i8_intel_sub_group_avc_ime_payload_t(fwd_ref_field_polarity, bwd_ref_field_polarity, payload);
4662}
4663
4664intel_sub_group_avc_ref_payload_t __builtin_spirv_intel_sub_group_avc_ref_set_dual_reference_interlaced_field_polarities_i8_i8_intel_sub_group_avc_ref_payload_t(
4665    uchar fwd_ref_field_polarity,
4666    uchar bwd_ref_field_polarity,
4667    intel_sub_group_avc_ref_payload_t payload )
4668{
4669      intel_sub_group_avc_mce_payload_t mpayload =
4670          intel_sub_group_avc_ref_convert_to_mce_payload( payload );
4671      mpayload =
4672        intel_sub_group_avc_mce_set_dual_reference_interlaced_field_polarities(
4673            fwd_ref_field_polarity, bwd_ref_field_polarity,
4674            mpayload );
4675      return intel_sub_group_avc_mce_convert_to_ref_payload( mpayload );
4676}
4677
4678INLINE intel_sub_group_avc_ref_payload_t OVERLOADABLE
4679intel_sub_group_avc_ref_set_dual_reference_interlaced_field_polarities(
4680     uchar  fwd_ref_field_polarity,
4681     uchar  bwd_ref_field_polarity,
4682     intel_sub_group_avc_ref_payload_t payload )
4683{
4684    return __builtin_spirv_intel_sub_group_avc_ref_set_dual_reference_interlaced_field_polarities_i8_i8_intel_sub_group_avc_ref_payload_t(fwd_ref_field_polarity, bwd_ref_field_polarity, payload);
4685}
4686
4687intel_sub_group_avc_sic_payload_t __builtin_spirv_intel_sub_group_avc_sic_set_dual_reference_interlaced_field_polarities_i8_i8_intel_sub_group_avc_sic_payload_t(
4688    uchar fwd_ref_field_polarity,
4689    uchar bwd_ref_field_polarity,
4690    intel_sub_group_avc_sic_payload_t payload )
4691{
4692      intel_sub_group_avc_mce_payload_t mpayload =
4693          intel_sub_group_avc_sic_convert_to_mce_payload( payload );
4694      mpayload =
4695        intel_sub_group_avc_mce_set_dual_reference_interlaced_field_polarities(
4696            fwd_ref_field_polarity, bwd_ref_field_polarity,
4697            mpayload );
4698      return intel_sub_group_avc_mce_convert_to_sic_payload( mpayload );
4699}
4700
4701INLINE intel_sub_group_avc_sic_payload_t OVERLOADABLE
4702intel_sub_group_avc_sic_set_dual_reference_interlaced_field_polarities(
4703     uchar  fwd_ref_field_polarity,
4704     uchar  bwd_ref_field_polarity,
4705     intel_sub_group_avc_sic_payload_t payload )
4706{
4707    return __builtin_spirv_intel_sub_group_avc_sic_set_dual_reference_interlaced_field_polarities_i8_i8_intel_sub_group_avc_sic_payload_t(fwd_ref_field_polarity, bwd_ref_field_polarity, payload);
4708}
4709
4710////
4711
4712intel_sub_group_avc_sic_result_t __builtin_spirv_intel_sub_group_avc_sic_evaluate_with_multi_reference_v3i64_i32_intel_sub_group_avc_sic_payload_t(
4713    VMEImage_t src_image_vme,
4714    uint packed_reference_ids,
4715    intel_sub_group_avc_sic_payload_t payload )
4716{
4717    long src_image = getVMEImage(src_image_vme);
4718    long vme_accelerator = getVMESampler(src_image_vme);
4719
4720    uint4 handle = __builtin_IB_vme_helper_get_handle_avc_sic_payload_t(payload);
4721    handle = intel_set_message_phase_dw(handle, 1, 6, packed_reference_ids);
4722
4723    uint4 res = __builtin_IB_vme_send_sic_new(handle, src_image, src_image, src_image, vme_accelerator);
4724
4725    intel_sub_group_avc_sic_result_t result = __builtin_IB_vme_helper_get_as_avc_sic_result_t(res);
4726    return result;
4727}
4728
4729INLINE intel_sub_group_avc_sic_result_t OVERLOADABLE
4730intel_sub_group_avc_sic_evaluate_with_multi_reference(
4731      read_only image2d_t src_image,
4732      uint packed_reference_ids,
4733      sampler_t vme_accelerator,
4734      intel_sub_group_avc_sic_payload_t payload )
4735{
4736    long src_image_id = (long)__builtin_astype(src_image, void*);
4737    long vme_accelerator_id = (long)__builtin_astype(vme_accelerator, void*);
4738    VMEImage_t src_image_vme = __builtin_spirv_OpVmeImageINTEL_i64_i64_i64(src_image_id, 0, vme_accelerator_id);
4739
4740    return __builtin_spirv_intel_sub_group_avc_sic_evaluate_with_multi_reference_v3i64_i32_intel_sub_group_avc_sic_payload_t(src_image_vme, packed_reference_ids, payload);
4741}
4742
4743intel_sub_group_avc_sic_result_t __builtin_spirv_intel_sub_group_avc_sic_evaluate_with_multi_reference_v3i64_i32_i8_intel_sub_group_avc_sic_payload_t(
4744    VMEImage_t src_image_vme,
4745    uint packed_reference_ids,
4746    uchar packed_reference_field_polarities,
4747    intel_sub_group_avc_sic_payload_t payload )
4748{
4749    long src_image = getVMEImage(src_image_vme);
4750    long vme_accelerator = getVMESampler(src_image_vme);
4751
4752    uint4 handle = __builtin_IB_vme_helper_get_handle_avc_sic_payload_t(payload);
4753
4754    handle = intel_sub_group_payload_set_ref_id_raw(
4755        packed_reference_ids,
4756        handle);
4757    handle = intel_sub_group_payload_set_ref_id_polarities_raw(
4758        packed_reference_field_polarities,
4759        handle);
4760
4761    uint4 res = __builtin_IB_vme_send_sic_new(handle, src_image, src_image, src_image, vme_accelerator);
4762
4763    intel_sub_group_avc_sic_result_t result = __builtin_IB_vme_helper_get_as_avc_sic_result_t(res);
4764    return result;
4765}
4766
4767INLINE intel_sub_group_avc_sic_result_t OVERLOADABLE
4768intel_sub_group_avc_sic_evaluate_with_multi_reference(
4769      read_only image2d_t src_image,
4770      uint packed_reference_ids,
4771      uchar packed_reference_field_polarities,
4772      sampler_t vme_accelerator,
4773      intel_sub_group_avc_sic_payload_t payload )
4774{
4775    long src_image_id = (long)__builtin_astype(src_image, void*);
4776    long vme_accelerator_id = (long)__builtin_astype(vme_accelerator, void*);
4777    VMEImage_t src_image_vme = __builtin_spirv_OpVmeImageINTEL_i64_i64_i64(src_image_id, 0, vme_accelerator_id);
4778
4779    return __builtin_spirv_intel_sub_group_avc_sic_evaluate_with_multi_reference_v3i64_i32_i8_intel_sub_group_avc_sic_payload_t(src_image_vme, packed_reference_ids, packed_reference_field_polarities, payload);
4780}
4781
4782uchar __builtin_spirv_intel_sub_group_avc_ime_get_streamout_major_shape_reference_ids_intel_sub_group_avc_ime_result_dual_reference_streamout_t_i8_i8(
4783    intel_sub_group_avc_ime_result_dual_reference_streamout_t result,
4784    uchar major_shape,
4785    uchar direction )
4786{
4787    uchar retValue = 0;
4788    // IME Streamout follows the same format as the IME Streamin message phases (IME2-IME5).
4789    const uint reg = (direction == CLK_AVC_ME_MAJOR_FORWARD_INTEL) ?
4790        /*fwd*/ RETURN_MESSAGE_NUM_GRFS :
4791        /*bwd*/(RETURN_MESSAGE_NUM_GRFS+2);
4792
4793    uint8 handle = __builtin_IB_vme_helper_get_handle_avc_ime_result_dual_reference_streamout_t(result);
4794    if(major_shape == VME_MAJOR_16x16)
4795    {
4796        // WX+2.4 [19:16] - Rec0 Shape 16x16 RefID
4797        // WX+4.4 [19:16] - Rec1 Shape 16x16 RefID
4798        retValue = intel_get_message_phase_ub(handle, reg, 4*4+2) & 0xF;
4799    }
4800    else if(major_shape == VME_MAJOR_16x8)
4801    {
4802        // WX+2.6 [3:0] - Rec0 Shape 16x8_0 RefID
4803        // WX+2.6 [7:4] - Rec0 Shape 16x8_1 RefID
4804        // WX+4.6 [3:0] - Rec1 Shape 16x8_0 RefID
4805        // WX+4.6 [7:4] - Rec1 Shape 16x8_1 RefID
4806        uchar val = intel_get_message_phase_ub(handle, reg, 6*4);
4807        val >>= get_sub_group_local_id() * 4;
4808        retValue = val & 0xF;
4809    }
4810    else if(major_shape == VME_MAJOR_8x16)
4811    {
4812        // WX+2.6 [11:8]  - Rec0 Shape 8x16_0 RefID
4813        // WX+2.6 [15:12] - Rec0 Shape 8x16_1 RefID
4814        // WX+4.6 [11:8]  - Rec1 Shape 8x16_0 RefID
4815        // WX+4.6 [15:12] - Rec1 Shape 8x16_1 RefID
4816        uchar val = intel_get_message_phase_ub(handle, reg, 6*4+1);
4817        val >>= get_sub_group_local_id() * 4;
4818        retValue = val & 0xF;
4819    }
4820    else // 8x8
4821    {
4822        // WX+2.6 [19:16] - Rec0 Shape 8x8_0 RefID
4823        // WX+2.6 [23:20] - Rec0 Shape 8x8_1 RefID
4824        // WX+2.6 [27:24] - Rec0 Shape 8x8_2 RefID
4825        // WX+2.6 [31:28] - Rec0 Shape 8x8_3 RefID
4826        // WX+4.6 [19:16] - Rec1 Shape 8x8_0 RefID
4827        // WX+4.6 [23:20] - Rec1 Shape 8x8_1 RefID
4828        // WX+4.6 [27:24] - Rec1 Shape 8x8_2 RefID
4829        // WX+4.6 [31:28] - Rec1 Shape 8x8_3 RefID
4830        ushort val = intel_get_message_phase_uw(handle, reg, 6*2+1);
4831        val >>= get_sub_group_local_id() * 4;
4832        retValue = (uchar)(val & 0xF);
4833    }
4834
4835    return retValue;
4836}
4837
4838INLINE uchar OVERLOADABLE
4839intel_sub_group_avc_ime_get_streamout_major_shape_reference_ids(
4840    intel_sub_group_avc_ime_result_dual_reference_streamout_t result,
4841    uchar major_shape,
4842    uchar direction)
4843{
4844    return __builtin_spirv_intel_sub_group_avc_ime_get_streamout_major_shape_reference_ids_intel_sub_group_avc_ime_result_dual_reference_streamout_t_i8_i8(result, major_shape, direction);
4845}
4846
4847uchar __builtin_spirv_intel_sub_group_avc_ime_get_streamout_major_shape_reference_ids_intel_sub_group_avc_ime_result_single_reference_streamout_t_i8(
4848    intel_sub_group_avc_ime_result_single_reference_streamout_t result,
4849    uchar major_shape )
4850{
4851    uint8 handle = __builtin_IB_vme_helper_get_handle_avc_ime_result_single_reference_streamout_t(result);
4852    intel_sub_group_avc_ime_result_dual_reference_streamout_t nresult = __builtin_IB_vme_helper_get_as_avc_ime_result_dual_reference_streamout_t(handle);
4853
4854    return intel_sub_group_avc_ime_get_streamout_major_shape_reference_ids(
4855        nresult,
4856        major_shape,
4857        CLK_AVC_ME_MAJOR_FORWARD_INTEL);
4858}
4859
4860INLINE uchar OVERLOADABLE
4861intel_sub_group_avc_ime_get_streamout_major_shape_reference_ids(
4862    intel_sub_group_avc_ime_result_single_reference_streamout_t result,
4863    uchar major_shape )
4864{
4865    return __builtin_spirv_intel_sub_group_avc_ime_get_streamout_major_shape_reference_ids_intel_sub_group_avc_ime_result_single_reference_streamout_t_i8(result, major_shape);
4866}
4867
4868
4869/*****************************************************************************/
4870/*                       VA (Video Analytics)                                */
4871/*****************************************************************************/
4872
4873INLINE void OVERLOADABLE intel_work_group_va_boolcentroid(
4874    __local void* dst,
4875    int2 i_coord,
4876    int2 size,
4877    image2d_t image,
4878    sampler_t a )
4879{
4880    // placeholder for future implementation
4881}
4882
4883INLINE void OVERLOADABLE intel_work_group_va_boolsum(
4884    __local void* dst,
4885    int2 i_coord,
4886    int2 size,
4887    image2d_t image,
4888    sampler_t a )
4889{
4890    // placeholder for future implementation
4891}
4892
4893INLINE void OVERLOADABLE intel_work_group_va_centroid(
4894    __local void* dst,
4895    int2 i_coord,
4896    int size,
4897    image2d_t image,
4898    sampler_t a )
4899{
4900    // placeholder for future implementation
4901}
4902
4903INLINE void OVERLOADABLE intel_work_group_va_convolve_16x4(
4904    __local void* dst,
4905    int2 i_coord,
4906    image2d_t image,
4907    sampler_t a )
4908{
4909    // placeholder for future implementation
4910}
4911
4912INLINE void OVERLOADABLE intel_work_group_va_dilate_64x4(
4913    __local void* dst,
4914    int2 i_coord,
4915    image2d_t image,
4916    sampler_t a )
4917{
4918    // placeholder for future implementation
4919}
4920
4921INLINE void OVERLOADABLE intel_work_group_va_erode_64x4(
4922    __local void* dst,
4923    int2 i_coord,
4924    image2d_t image,
4925    sampler_t a )
4926{
4927    // placeholder for future implementation
4928}
4929
4930INLINE void OVERLOADABLE intel_work_group_va_minmax(
4931    __local void* dst,
4932    int2 i_coord,
4933    image2d_t image,
4934    sampler_t a )
4935{
4936    // placeholder for future implementation
4937}
4938
4939INLINE void OVERLOADABLE intel_work_group_va_minmaxfilter_16x4(
4940    __local void* dst,
4941    int2 i_coord,
4942    image2d_t image,
4943    sampler_t a )
4944{
4945    // placeholder for future implementation
4946}
4947
4948INLINE void OVERLOADABLE intel_work_group_va_boolcentroid(
4949    __local void* registers,
4950    float2 coordsNorm,
4951    int2 size,
4952    image2d_t srcImg,
4953    sampler_t accelerator )
4954{
4955    int i_image = (int)__builtin_astype( srcImg, __global void* );
4956    __builtin_IB_va_boolcentroid(
4957        registers,
4958        coordsNorm,
4959        size,
4960        i_image,
4961        __builtin_IB_convert_sampler_to_int(accelerator) );
4962}
4963
4964INLINE void OVERLOADABLE intel_work_group_va_boolsum(
4965    __local void* registers,
4966    float2 coordsNorm,
4967    int2 size,
4968    image2d_t srcImg,
4969    sampler_t accelerator )
4970{
4971    int i_image = (int)__builtin_astype( srcImg, __global void* );
4972    __builtin_IB_va_boolsum(
4973        registers,
4974        coordsNorm,
4975        size,
4976        i_image,
4977        __builtin_IB_convert_sampler_to_int(accelerator) );
4978}
4979
4980INLINE void OVERLOADABLE intel_work_group_va_centroid(
4981    __local void* registers,
4982    float2 coordsNorm,
4983    int size,
4984    image2d_t srcImg,
4985    sampler_t accelerator )
4986{
4987    int i_image = (int)__builtin_astype( srcImg, __global void* );
4988    __builtin_IB_va_centroid(
4989        registers,
4990        coordsNorm,
4991        size,
4992        i_image,
4993        __builtin_IB_convert_sampler_to_int(accelerator) );
4994}
4995
4996INLINE void OVERLOADABLE intel_work_group_va_convolve_16x4(
4997    __local void* registers,
4998    float2 coordsNorm,
4999    image2d_t srcImg,
5000    sampler_t accelerator )
5001{
5002    __builtin_IB_va_convolve_16x4_SLM(
5003        registers,
5004        coordsNorm,
5005        (int)__builtin_astype( srcImg, __global void* ),
5006        __builtin_IB_convert_sampler_to_int(accelerator) );
5007}
5008
5009INLINE void OVERLOADABLE intel_work_group_va_dilate_64x4(
5010    __local void* registers,
5011    float2 coordsNorm,
5012    image2d_t srcImg,
5013    sampler_t accelerator )
5014{
5015    int i_image = (int)__builtin_astype( srcImg, __global void* );
5016    __builtin_IB_va_dilate_64x4(
5017        registers,
5018        coordsNorm,
5019        i_image,
5020        __builtin_IB_convert_sampler_to_int(accelerator) );
5021}
5022
5023INLINE void OVERLOADABLE intel_work_group_va_erode_64x4(
5024    __local void* registers,
5025    float2 coordsNorm,
5026    image2d_t srcImg,
5027    sampler_t accelerator )
5028{
5029    int i_image = (int)__builtin_astype( srcImg, __global void* );
5030    __builtin_IB_va_erode_64x4(
5031        registers,
5032        coordsNorm,
5033        i_image,
5034        __builtin_IB_convert_sampler_to_int(accelerator) );
5035}
5036
5037INLINE void OVERLOADABLE intel_work_group_va_minmax(
5038    __local void* registers,
5039    float2 coordsNorm,
5040    image2d_t srcImg,
5041    sampler_t accelerator )
5042{
5043    int i_image = (int)__builtin_astype( srcImg, __global void* );
5044    __builtin_IB_va_minmax(
5045        registers,
5046        coordsNorm,
5047        i_image,
5048        __builtin_IB_convert_sampler_to_int(accelerator) );
5049}
5050
5051INLINE void OVERLOADABLE intel_work_group_va_minmaxfilter_16x4(
5052    __local void* registers,
5053    float2 coordsNorm,
5054    image2d_t srcImg,
5055    sampler_t accelerator )
5056{
5057    int i_image = (int)__builtin_astype( srcImg, __global void* );
5058    __builtin_IB_va_minmaxfilter_16x4_SLM(
5059        registers,
5060        coordsNorm,
5061        i_image,
5062        __builtin_IB_convert_sampler_to_int(accelerator) );
5063}
5064
5065INLINE short OVERLOADABLE intel_work_group_va_convolve_16x1(
5066    float2 coordsNorm,
5067    image2d_t srcImg,
5068    sampler_t accelerator )
5069{
5070    // placeholder for future implementation
5071}
5072
5073INLINE short4 OVERLOADABLE intel_work_group_va_convolve_16x4(
5074    float2 coordsNorm,
5075    image2d_t srcImg,
5076    sampler_t accelerator )
5077{
5078    // placeholder for future implementation
5079}
5080
5081INLINE uchar OVERLOADABLE intel_work_group_va_minfilter_16x1(
5082    float2 coordsNorm,
5083    image2d_t srcImg,
5084    sampler_t accelerator )
5085{
5086    // placeholder for future implementation
5087}
5088
5089INLINE uchar4 OVERLOADABLE intel_work_group_va_minfilter_16x4(
5090    float2 coordsNorm,
5091    image2d_t srcImg,
5092    sampler_t accelerator )
5093{
5094    // placeholder for future implementation
5095}
5096
5097INLINE uchar OVERLOADABLE intel_work_group_va_maxfilter_16x1(
5098    float2 coordsNorm,
5099    image2d_t srcImg,
5100    sampler_t accelerator )
5101{
5102    // placeholder for future implementation
5103}
5104
5105INLINE uchar4 OVERLOADABLE intel_work_group_va_maxfilter_16x4(
5106    float2 coordsNorm,
5107    image2d_t srcImg,
5108    sampler_t accelerator )
5109{
5110    // placeholder for future implementation
5111}
5112
5113