1/*========================== begin_copyright_notice ============================
2
3Copyright (C) 2017-2021 Intel Corporation
4
5SPDX-License-Identifier: MIT
6
7============================= end_copyright_notice ===========================*/
8
9#ifndef IGCBIF_INTRINSICS_CL
10#define IGCBIF_INTRINSICS_CL
11
12#pragma OPENCL EXTENSION cl_khr_fp16 : enable
13#pragma OPENCL EXTENSION cl_khr_fp64 : enable
14
15// Access of image and sampler parameters
16
17int    __builtin_IB_get_address_mode(int) __attribute__((const));
18int    __builtin_IB_is_normalized_coords(int) __attribute__((const));
19int    __builtin_IB_get_image_array_size(int) __attribute__((const));
20int    __builtin_IB_get_snap_wa_reqd(int) __attribute__((const));
21int    __builtin_IB_get_image_height(int) __attribute__((const));
22int    __builtin_IB_get_image_width(int) __attribute__((const));
23int    __builtin_IB_get_image_depth(int) __attribute__((const));
24int    __builtin_IB_get_image_channel_data_type(int) __attribute__((const));
25int    __builtin_IB_get_image_srgb_channel_order(int) __attribute__((const));
26int    __builtin_IB_get_image_channel_order(int) __attribute__((const));
27int    __builtin_IB_get_image_num_samples(int) __attribute__((const));
28int    __builtin_IB_get_image_num_mip_levels(int) __attribute__((const));
29long   __builtin_IB_get_flat_image_baseoffset(int) __attribute__((const));
30int    __builtin_IB_get_flat_image_width(int) __attribute__((const));
31int    __builtin_IB_get_flat_image_height(int) __attribute__((const));
32int    __builtin_IB_get_flat_image_pitch(int) __attribute__((const));
33
34// Image sampling and loads
35float4 __builtin_IB_OCL_1d_sample_l(int, int, float,  float);
36float4 __builtin_IB_OCL_1darr_sample_l(int, int, float2,  float);
37float4 __builtin_IB_OCL_2d_sample_l(int, int, float2, float);
38float4 __builtin_IB_OCL_2darr_sample_l(int, int, float4, float);
39float4 __builtin_IB_OCL_3d_sample_l(int, int, float4, float);
40
41float4 __builtin_IB_OCL_1d_sample_d(int, int, float,  float, float);
42float4 __builtin_IB_OCL_1darr_sample_d(int, int, float2,  float, float);
43float4 __builtin_IB_OCL_2d_sample_d(int, int, float2, float2, float2);
44float4 __builtin_IB_OCL_2darr_sample_d(int, int, float4, float2, float2);
45float4 __builtin_IB_OCL_3d_sample_d(int, int, float4, float4, float4);
46
47// versions that return uint for read_imageui
48uint4 __builtin_IB_OCL_1d_sample_lui(int, int, float,  float);
49uint4 __builtin_IB_OCL_1darr_sample_lui(int, int, float2,  float);
50uint4 __builtin_IB_OCL_2d_sample_lui(int, int, float2, float);
51uint4 __builtin_IB_OCL_2darr_sample_lui(int, int, float4, float);
52uint4 __builtin_IB_OCL_3d_sample_lui(int, int, float4, float);
53
54uint4 __builtin_IB_OCL_1d_sample_dui(int, int, float,  float, float);
55uint4 __builtin_IB_OCL_1darr_sample_dui(int, int, float2,  float, float);
56uint4 __builtin_IB_OCL_2d_sample_dui(int, int, float2, float2, float2);
57uint4 __builtin_IB_OCL_2darr_sample_dui(int, int, float4, float2, float2);
58uint4 __builtin_IB_OCL_3d_sample_dui(int, int, float4, float4, float4);
59
60uint4 __builtin_IB_OCL_1d_ldui(int, int,  int);
61uint4 __builtin_IB_OCL_1darr_ldui(int, int2,  int);
62uint4 __builtin_IB_OCL_2d_ldui(int, int2, int);
63uint4 __builtin_IB_OCL_2darr_ldui(int, int4, int);
64uint4 __builtin_IB_OCL_3d_ldui(int, int4, int);
65
66float4 __builtin_IB_OCL_1d_ld(int, int,  int);
67float4 __builtin_IB_OCL_1darr_ld(int, int2,  int);
68float4 __builtin_IB_OCL_2d_ld(int, int2, int);
69float4 __builtin_IB_OCL_2darr_ld(int, int4, int);
70float4 __builtin_IB_OCL_3d_ld(int, int4, int);
71
72float4 __builtin_IB_OCL_2d_ldmcs(int, int2);
73float4 __builtin_IB_OCL_2darr_ldmcs(int, int4);
74float4 __builtin_IB_OCL_2d_ld2dms(int, int2, int, float4);
75uint4  __builtin_IB_OCL_2d_ld2dmsui(int, int2, int, float4);
76float4 __builtin_IB_OCL_2darr_ld2dms(int, int4, int, float4);
77uint4  __builtin_IB_OCL_2darr_ld2dmsui(int, int4, int, float4);
78
79int __builtin_IB_convert_sampler_to_int(sampler_t);
80
81// Convert Functions for pipes and samplers
82#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
83__global void* __builtin_IB_convert_pipe_ro_to_intel_pipe(pipe int);
84__global void* __builtin_IB_convert_pipe_wo_to_intel_pipe(write_only pipe int);
85#endif
86
87// Image writes
88void     __builtin_IB_write_1darr_ui(int, int2, uint4, int);
89void     __builtin_IB_write_1d_ui(int, int, uint4, int);
90void     __builtin_IB_write_2darr_ui(int, int4, uint4, int);
91void     __builtin_IB_write_2d_ui(int, int2, uint4, int);
92void     __builtin_IB_write_3d_ui(int, int4, uint4, int);
93void     __builtin_IB_write_2darr_f(int, int4, float4, int);
94void     __builtin_IB_write_2d_f(int, int2, float4, int);
95
96// Workgroup functions
97local uchar* __builtin_IB_AllocLocalMemPool(bool allocAllWorkgroups, uint numAdditionalElements, uint elementSize);
98
99// Memory fences
100// See GenISAIntrinsics.td for documentation
101void     __builtin_IB_memfence(bool commitEnable, bool flushRW, bool flushConstant, bool flushTexture, bool flushIcache, bool isGlobal, bool invalidateL1);
102void     __builtin_IB_flush_sampler_cache(void);
103void     __builtin_IB_typedmemfence(bool invalidateCache);
104
105// Barrier
106void     __builtin_IB_thread_group_barrier(void) __attribute__((convergent));
107void     __builtin_IB_thread_group_barrier_signal(void) __attribute__((convergent));
108void     __builtin_IB_thread_group_barrier_wait(void) __attribute__((convergent));
109
110// Workitem functions
111uint     __builtin_IB_get_work_dim(void) __attribute__((const));
112uint     __builtin_IB_get_group_id(uint) __attribute__((const));
113uint     __builtin_IB_get_global_offset(uint) __attribute__((const));
114uint     __builtin_IB_get_local_size(uint) __attribute__((const));
115uint     __builtin_IB_get_local_id_x(void) __attribute__((const));
116uint     __builtin_IB_get_local_id_y(void) __attribute__((const));
117uint     __builtin_IB_get_local_id_z(void) __attribute__((const));
118uint     __builtin_IB_get_global_size(uint) __attribute__((const));
119uint     __builtin_IB_get_num_groups(uint) __attribute__((const));
120uint     __builtin_IB_get_enqueued_local_size(uint) __attribute__((const));
121
122// Double precision conversions
123half      __builtin_IB_ftoh_rtn(float) __attribute__((const));
124half      __builtin_IB_ftoh_rtp(float) __attribute__((const));
125half      __builtin_IB_ftoh_rtz(float)  __attribute__((const));
126#if defined(cl_khr_fp64)
127#endif // defined(cl_khr_fp64)
128
129// Debug/Testing Built-In Functions
130uint2     __builtin_IB_read_cycle_counter(void) __attribute__((const));
131void      __builtin_IB_source_value(uint reg);
132uint      __builtin_IB_set_dbg_register(uint dgb0_0);
133uint      __builtin_IB_movreg(uint reg) __attribute__((const));
134uint      __builtin_IB_movflag(uint flag) __attribute__((const));
135uint      __builtin_IB_movcr(uint reg) __attribute__((const));
136uint      __builtin_IB_hw_thread_id(void) __attribute__((const));
137uint      __builtin_IB_slice_id(void) __attribute__((const));
138uint      __builtin_IB_subslice_id(void) __attribute__((const));
139uint      __builtin_IB_dual_subslice_id(void) __attribute__((const));
140uint      __builtin_IB_eu_id(void) __attribute__((const));
141uint      __builtin_IB_get_sr0(uint DWNumber); // DWNumber=0|1|2|3
142uint      __builtin_IB_eu_thread_id(void) __attribute__((const));
143void      __builtin_IB_profile_snapshot(int point_type,int point_index) __attribute__((const));
144void      __builtin_IB_profile_aggregated(int point_type,int point_index) __attribute__((const));
145void      __builtin_IB_eu_thread_pause(uint value);
146
147// int -> float operations
148float __builtin_IB_itof_rtn(int);
149float __builtin_IB_itof_rtp(int);
150float __builtin_IB_itof_rtz(int);
151float __builtin_IB_uitof_rtn(uint);
152float __builtin_IB_uitof_rtp(uint);
153float __builtin_IB_uitof_rtz(uint);
154
155#if defined(cl_khr_fp64)
156// long -> double operations
157double __builtin_IB_itofp64_rtn(long);
158double __builtin_IB_itofp64_rtp(long);
159double __builtin_IB_itofp64_rtz(long);
160double __builtin_IB_uitofp64_rtn(ulong);
161double __builtin_IB_uitofp64_rtp(ulong);
162double __builtin_IB_uitofp64_rtz(ulong);
163#endif
164
165// Native integer operations
166uint     __builtin_IB_bfi(uint, uint, uint, uint) __attribute__((const));
167uint     __builtin_IB_ibfe(uint, uint, uint) __attribute__((const));
168uint     __builtin_IB_ubfe(uint, uint, uint) __attribute__((const));
169uint     __builtin_IB_bfrev(uint) __attribute__((const));
170
171char     __builtin_IB_popcount_1u8(char) __attribute__((const));
172short    __builtin_IB_popcount_1u16(short) __attribute__((const));
173int      __builtin_IB_popcount_1u32(int) __attribute__((const));
174
175// Native math operations - float version
176float    __builtin_IB_frnd_ne(float) __attribute__((const));
177float    __builtin_IB_frnd_ni(float) __attribute__((const));
178float    __builtin_IB_frnd_pi(float) __attribute__((const));
179float    __builtin_IB_frnd_zi(float) __attribute__((const));
180float    __builtin_IB_native_exp2f(float) __attribute__((const));
181float    __builtin_IB_native_cosf(float) __attribute__((const));
182float    __builtin_IB_native_log2f(float) __attribute__((const));
183float    __builtin_IB_native_powrf(float, float) __attribute__((const));
184float    __builtin_IB_native_sinf(float) __attribute__((const));
185float    __builtin_IB_native_sqrtf(float) __attribute__((const));
186float    __builtin_IB_fmax(float, float) __attribute__((const));
187float    __builtin_IB_fmin(float, float) __attribute__((const));
188half     __builtin_IB_HMAX(half, half) __attribute__((const));
189half     __builtin_IB_HMIN(half, half) __attribute__((const));
190
191// Native math operations - fp16 version
192half     __builtin_IB_native_cosh(half) __attribute__((const));
193half     __builtin_IB_native_exp2h(half) __attribute__((const));
194half     __builtin_IB_native_log2h(half) __attribute__((const));
195half     __builtin_IB_native_sinh(half) __attribute__((const));
196half     __builtin_IB_native_sqrth(half) __attribute__((const));
197half     __builtin_IB_fmah(half, half, half) __attribute__((const));
198
199// Native math operations - fp64 version
200#if defined(cl_khr_fp64)
201double    __builtin_IB_native_sqrtd(double) __attribute__((const));
202double    __builtin_IB_dmin(double, double) __attribute__((const));
203double    __builtin_IB_dmax(double, double) __attribute__((const));
204#endif
205
206// Atomic operations
207int      __builtin_IB_atomic_add_global_i32(__global int*, int);
208int      __builtin_IB_atomic_add_local_i32(__local int*, int);
209int      __builtin_IB_atomic_sub_global_i32(__global int*, int);
210int      __builtin_IB_atomic_sub_local_i32(__local int*, int);
211int      __builtin_IB_atomic_xchg_global_i32(__global int*, int);
212int      __builtin_IB_atomic_xchg_local_i32(__local int*, int);
213int      __builtin_IB_atomic_min_global_i32(__global int*, int);
214uint     __builtin_IB_atomic_min_global_u32(__global uint*, uint);
215float    __builtin_IB_atomic_min_global_f32(__global float*, float);
216int      __builtin_IB_atomic_min_local_i32(__local int*, int);
217uint     __builtin_IB_atomic_min_local_u32(__local uint*, uint);
218float    __builtin_IB_atomic_min_local_f32(__local float*, float);
219int      __builtin_IB_atomic_max_global_i32(__global int*, int);
220uint     __builtin_IB_atomic_max_global_u32(__global uint*, uint);
221float    __builtin_IB_atomic_max_global_f32(__global float*, float);
222int      __builtin_IB_atomic_max_local_i32(__local int*, int);
223uint     __builtin_IB_atomic_max_local_u32(__local uint*, uint);
224float    __builtin_IB_atomic_max_local_f32(__local float*, float);
225int      __builtin_IB_atomic_and_global_i32(__global int*, int);
226int      __builtin_IB_atomic_and_local_i32(__local int*, int);
227int      __builtin_IB_atomic_or_global_i32(__global int*, int);
228int      __builtin_IB_atomic_or_local_i32(__local int*, int);
229int      __builtin_IB_atomic_xor_global_i32(__global int*, int);
230int      __builtin_IB_atomic_xor_local_i32(__local int*, int);
231int      __builtin_IB_atomic_inc_global_i32(__global int*);
232int      __builtin_IB_atomic_inc_local_i32(__local int*);
233int      __builtin_IB_atomic_dec_global_i32(__global int*);
234int      __builtin_IB_atomic_dec_local_i32(__local int*);
235int      __builtin_IB_atomic_cmpxchg_global_i32(__global int*, int, int);
236float    __builtin_IB_atomic_cmpxchg_global_f32(__global float*, float, float);
237int      __builtin_IB_atomic_cmpxchg_local_i32(__local int*, int, int);
238float    __builtin_IB_atomic_cmpxchg_local_f32(__local float*, float, float);
239
240// Float Atomics
241#if defined(cl_intel_global_float_atomics)
242float    __builtin_IB_atomic_add_global_f32(__global float*, float);
243float    __builtin_IB_atomic_sub_global_f32(__global float*, float);
244#endif // defined(cl_intel_global_float_atomics)
245// 64bit Atomic operations
246#if defined(cl_intel_64bit_global_atomics_placeholder)
247long     __builtin_IB_atomic_add_global_i64(__global long*, long);
248long     __builtin_IB_atomic_sub_global_i64(__global long*, long);
249long     __builtin_IB_atomic_xchg_global_i64(__global long*, long);
250long     __builtin_IB_atomic_min_global_i64(__global long*, long);
251ulong    __builtin_IB_atomic_min_global_u64(__global ulong*, ulong);
252double   __builtin_IB_atomic_min_global_f64(__global double*, double);
253long     __builtin_IB_atomic_max_global_i64(__global long*, long);
254ulong    __builtin_IB_atomic_max_global_u64(__global ulong*, ulong);
255double   __builtin_IB_atomic_max_global_f64(__global double*, double);
256long     __builtin_IB_atomic_and_global_i64(__global long*, long);
257long     __builtin_IB_atomic_or_global_i64(__global long*, long);
258long     __builtin_IB_atomic_xor_global_i64(__global long*, long);
259long     __builtin_IB_atomic_inc_global_i64(__global long*);
260long     __builtin_IB_atomic_dec_global_i64(__global long*);
261long     __builtin_IB_atomic_cmpxchg_global_i64(__global long*, long, long);
262double   __builtin_IB_atomic_cmpxchg_global_f64(__global double*, double, double);
263#endif // defined(cl_intel_64bit_global_atomics_placeholder)
264
265// Atomic operations
266short    __builtin_IB_atomic_add_global_i16(__global short*, short);
267short    __builtin_IB_atomic_add_local_i16(__local short*, short);
268short    __builtin_IB_atomic_sub_global_i16(__global short*, short);
269short    __builtin_IB_atomic_sub_local_i16(__local short*, short);
270short    __builtin_IB_atomic_xchg_global_i16(__global short*, short);
271short    __builtin_IB_atomic_xchg_local_i16(__local short*, short);
272short    __builtin_IB_atomic_min_global_i16(__global short*, short);
273ushort   __builtin_IB_atomic_min_global_u16(__global ushort*, ushort);
274half     __builtin_IB_atomic_min_global_f16(__global half*, half);
275short    __builtin_IB_atomic_min_local_i16(__local short*, short);
276ushort   __builtin_IB_atomic_min_local_u16(__local ushort*, ushort);
277half     __builtin_IB_atomic_min_local_f16(__local half*, half);
278short    __builtin_IB_atomic_max_global_i16(__global short*, short);
279ushort   __builtin_IB_atomic_max_global_u16(__global ushort*, ushort);
280half     __builtin_IB_atomic_max_global_f16(__global half*, half);
281short    __builtin_IB_atomic_max_local_i16(__local short*, short);
282ushort   __builtin_IB_atomic_max_local_u16(__local ushort*, ushort);
283half     __builtin_IB_atomic_max_local_f16(__local half*, half);
284short    __builtin_IB_atomic_and_global_i16(__global short*, short);
285short    __builtin_IB_atomic_and_local_i16(__local short*, short);
286short    __builtin_IB_atomic_or_global_i16(__global short*, short);
287short    __builtin_IB_atomic_or_local_i16(__local short*, short);
288short    __builtin_IB_atomic_xor_global_i16(__global short*, short);
289short    __builtin_IB_atomic_xor_local_i16(__local short*, short);
290short    __builtin_IB_atomic_inc_global_i16(__global short*);
291short    __builtin_IB_atomic_inc_local_i16(__local short*);
292short    __builtin_IB_atomic_dec_global_i16(__global short*);
293short    __builtin_IB_atomic_dec_local_i16(__local short*);
294short    __builtin_IB_atomic_cmpxchg_global_i16(__global short*, short, short);
295half     __builtin_IB_atomic_cmpxchg_global_f16(__global half*, half, half);
296short    __builtin_IB_atomic_cmpxchg_local_i16(__local short*, short, short);
297half     __builtin_IB_atomic_cmpxchg_local_f16(__local half*, half, half);
298
299short    __builtin_IB_image_atomic_add_i16(int, int4, short);
300short    __builtin_IB_image_atomic_sub_i16(int, int4, short);
301short    __builtin_IB_image_atomic_xchg_i16(int, int4, short);
302short    __builtin_IB_image_atomic_min_i16(int, int4, short);
303ushort   __builtin_IB_image_atomic_min_u16(int, int4, ushort);
304short    __builtin_IB_image_atomic_max_i16(int, int4, short);
305ushort   __builtin_IB_image_atomic_max_u16(int, int4, ushort);
306short    __builtin_IB_image_atomic_and_i16(int, int4, short);
307short    __builtin_IB_image_atomic_or_i16(int, int4, short);
308short    __builtin_IB_image_atomic_xor_i16(int, int4, short);
309short    __builtin_IB_image_atomic_inc_i16(int, int4);
310short    __builtin_IB_image_atomic_cmpxchg_i16(int, int4, short, short);
311
312
313void __builtin_IB_kmp_acquire_lock(int *);
314void __builtin_IB_kmp_release_lock(int *);
315
316int      __builtin_IB_image_atomic_add_i32(int, int4, int);
317int      __builtin_IB_image_atomic_sub_i32(int, int4, int);
318int      __builtin_IB_image_atomic_xchg_i32(int, int4, int);
319int      __builtin_IB_image_atomic_min_i32(int, int4, int);
320uint     __builtin_IB_image_atomic_min_u32(int, int4, uint);
321int      __builtin_IB_image_atomic_max_i32(int, int4, int);
322uint     __builtin_IB_image_atomic_max_u32(int, int4, uint);
323int      __builtin_IB_image_atomic_and_i32(int, int4, int);
324int      __builtin_IB_image_atomic_or_i32(int, int4, int);
325int      __builtin_IB_image_atomic_xor_i32(int, int4, int);
326int      __builtin_IB_image_atomic_inc_i32(int, int4);
327int      __builtin_IB_image_atomic_dec_i32(int, int4);
328int      __builtin_IB_image_atomic_cmpxchg_i32(int, int4, int, int);
329
330void __builtin_IB_memcpy_global_to_private(private uchar *dst, global uchar *src, uint size, uint align);
331void __builtin_IB_memcpy_constant_to_private(private uchar *dst, constant uchar *src, uint size, uint align);
332void __builtin_IB_memcpy_local_to_private(private uchar *dst, local uchar *src, uint size, uint align);
333void __builtin_IB_memcpy_private_to_private(private uchar *dst, private uchar *src, uint size, uint align);
334#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
335void __builtin_IB_memcpy_generic_to_private(private uchar *dst, generic uchar *src, uint size, uint align);
336#endif
337
338void __builtin_IB_memcpy_private_to_global(global uchar *dst, private uchar *src, uint size, uint align);
339void __builtin_IB_memcpy_private_to_constant(constant uchar *dst, private uchar *src, uint size, uint align);
340void __builtin_IB_memcpy_private_to_local(local uchar *dst, private uchar *src, uint size, uint align);
341void __builtin_IB_memcpy_private_to_private(private uchar *dst, private uchar *src, uint size, uint align);
342#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
343void __builtin_IB_memcpy_private_to_generic(generic uchar *dst, private uchar *src, uint size, uint align);
344#endif
345
346// Correctly rounded sqrt and division
347float   __builtin_IB_ieee_sqrt(float) __attribute__((const));
348float   __builtin_IB_ieee_divide(float, float) __attribute__((const));
349
350#if defined(cl_khr_fp64)
351double   __builtin_IB_ieee_divide_f64(double, double) __attribute__((const));
352#endif
353
354// SIMD information
355ushort __builtin_IB_simd_lane_id() __attribute__((const));
356
357// an opaque handle pointing to a blob of registers.
358typedef uint GRFHandle;
359
360// legacy message phase builtins for old vme (not device side)
361void __builtin_IB_set_message_phase_legacy_dw(uint messagePhases, uint phaseIndex, uint dwIndex, uint val);
362void __builtin_IB_set_message_phase_legacy_uw(uint messagePhases, uint phaseIndex, uint dwIndex, ushort val);
363void __builtin_IB_set_message_phase_legacy_ub(uint messagePhases, uint phaseIndex, uint dwIndex, uchar val);
364
365void __builtin_IB_set_message_phase_legacy(uint messagePhases, uint phaseIndex, uint val);
366
367// Message Phases manipulation
368uint __builtin_IB_create_message_phases(uint numPhases);
369uint2 __builtin_IB_create_message_phases_uint2(uint numPhases);
370uint4 __builtin_IB_create_message_phases_uint4(uint numPhases);
371uint8 __builtin_IB_create_message_phases_uint8(uint numPhases);
372
373uint __builtin_IB_create_message_phases_no_init(uint numPhases);
374uint2 __builtin_IB_create_message_phases_no_init_uint2(uint numPhases);
375uint4 __builtin_IB_create_message_phases_no_init_uint4(uint numPhases);
376uint8 __builtin_IB_create_message_phases_no_init_uint8(uint numPhases);
377
378uint __builtin_IB_get_message_phase_dw(uint messagePhases, uint phaseIndex, uint dwIndex);
379uint __builtin_IB_get_message_phase_dw_uint2(uint2 messagePhases, uint phaseIndex, uint dwIndex);
380uint __builtin_IB_get_message_phase_dw_uint4(uint4 messagePhases, uint phaseIndex, uint dwIndex);
381uint __builtin_IB_get_message_phase_dw_uint8(uint8 messagePhases, uint phaseIndex, uint dwIndex);
382
383ulong __builtin_IB_get_message_phase_uq(uint messagePhases, uint phaseIndex, uint dwIndex);
384ulong __builtin_IB_get_message_phase_uq_uint2(uint2 messagePhases, uint phaseIndex, uint dwIndex);
385ulong __builtin_IB_get_message_phase_uq_uint4(uint4 messagePhases, uint phaseIndex, uint dwIndex);
386ulong __builtin_IB_get_message_phase_uq_uint8(uint8 messagePhases, uint phaseIndex, uint dwIndex);
387
388uint __builtin_IB_set_message_phase_dw(uint messagePhases, uint phaseIndex, uint dwIndex, uint val);
389uint2 __builtin_IB_set_message_phase_dw_uint2(uint2 messagePhases, uint phaseIndex, uint dwIndex, uint val);
390uint4 __builtin_IB_set_message_phase_dw_uint4(uint4 messagePhases, uint phaseIndex, uint dwIndex, uint val);
391uint8 __builtin_IB_set_message_phase_dw_uint8(uint8 messagePhases, uint phaseIndex, uint dwIndex, uint val);
392
393uint __builtin_IB_get_message_phase(uint messagePhases, uint phaseIndex);
394uint __builtin_IB_get_message_phase_uint2(uint2 messagePhases, uint phaseIndex);
395uint __builtin_IB_get_message_phase_uint4(uint4 messagePhases, uint phaseIndex);
396uint __builtin_IB_get_message_phase_uint8(uint8 messagePhases, uint phaseIndex);
397
398uint __builtin_IB_set_message_phase(uint messagePhases, uint phaseIndex, uint val);
399uint2 __builtin_IB_set_message_phase_uint2(uint2 messagePhases, uint phaseIndex, uint val);
400uint4 __builtin_IB_set_message_phase_uint4(uint4 messagePhases, uint phaseIndex, uint val);
401uint8 __builtin_IB_set_message_phase_uint8(uint8 messagePhases, uint phaseIndex, uint val);
402
403ushort __builtin_IB_get_message_phase_uw(uint messagePhases, uint phaseIndex, uint wIndex);
404ushort __builtin_IB_get_message_phase_uw_uint2(uint2 messagePhases, uint phaseIndex, uint wIndex);
405ushort __builtin_IB_get_message_phase_uw_uint4(uint4 messagePhases, uint phaseIndex, uint wIndex);
406ushort __builtin_IB_get_message_phase_uw_uint8(uint8 messagePhases, uint phaseIndex, uint wIndex);
407
408uint __builtin_IB_set_message_phase_uw(uint messagePhases, uint phaseIndex, uint dwIndex, ushort val);
409uint2 __builtin_IB_set_message_phase_uw_uint2(uint2 messagePhases, uint phaseIndex, uint dwIndex, ushort val);
410uint4 __builtin_IB_set_message_phase_uw_uint4(uint4 messagePhases, uint phaseIndex, uint dwIndex, ushort val);
411uint8 __builtin_IB_set_message_phase_uw_uint8(uint8 messagePhases, uint phaseIndex, uint dwIndex, ushort val);
412
413uchar __builtin_IB_get_message_phase_ub(uint messagePhases, uint phaseIndex, uint dwIndex);
414uchar __builtin_IB_get_message_phase_ub_uint2(uint2 messagePhases, uint phaseIndex, uint dwIndex);
415uchar __builtin_IB_get_message_phase_ub_uint4(uint4 messagePhases, uint phaseIndex, uint dwIndex);
416uchar __builtin_IB_get_message_phase_ub_uint8(uint8 messagePhases, uint phaseIndex, uint dwIndex);
417
418uint __builtin_IB_set_message_phase_ub(uint messagePhases, uint phaseIndex, uint dwIndex, uchar val);
419uint2 __builtin_IB_set_message_phase_ub_uint2(uint2 messagePhases, uint phaseIndex, uint dwIndex, uchar val);
420uint4 __builtin_IB_set_message_phase_ub_uint4(uint4 messagePhases, uint phaseIndex, uint dwIndex, uchar val);
421uint8 __builtin_IB_set_message_phase_ub_uint8(uint8 messagePhases, uint phaseIndex, uint dwIndex, uchar val);
422
423// Broadcast a phase value to all work-items in a sub-group
424uchar  __builtin_IB_broadcast_message_phase_ub(uint messagePhases, uint phaseIndex, uint phaseSubindex, uint width);
425uchar  __builtin_IB_broadcast_message_phase_ub_uint2(uint2 messagePhases, uint phaseIndex, uint phaseSubindex, uint width);
426uchar  __builtin_IB_broadcast_message_phase_ub_uint4(uint4 messagePhases, uint phaseIndex, uint phaseSubindex, uint width);
427uchar  __builtin_IB_broadcast_message_phase_ub_uint8(uint8 messagePhases, uint phaseIndex, uint phaseSubindex, uint width);
428
429ushort __builtin_IB_broadcast_message_phase_uw(uint messagePhases, uint phaseIndex, uint phaseSubindex, uint width);
430ushort __builtin_IB_broadcast_message_phase_uw_uint2(uint2 messagePhases, uint phaseIndex, uint phaseSubindex, uint width);
431ushort __builtin_IB_broadcast_message_phase_uw_uint4(uint4 messagePhases, uint phaseIndex, uint phaseSubindex, uint width);
432ushort __builtin_IB_broadcast_message_phase_uw_uint8(uint8 messagePhases, uint phaseIndex, uint phaseSubindex, uint width);
433
434uint   __builtin_IB_broadcast_message_phase_dw(uint messagePhases, uint phaseIndex, uint phaseSubindex, uint width);
435uint   __builtin_IB_broadcast_message_phase_dw_uint2(uint2 messagePhases, uint phaseIndex, uint phaseSubindex, uint width);
436uint   __builtin_IB_broadcast_message_phase_dw_uint4(uint4 messagePhases, uint phaseIndex, uint phaseSubindex, uint width);
437uint   __builtin_IB_broadcast_message_phase_dw_uint8(uint8 messagePhases, uint phaseIndex, uint phaseSubindex, uint width);
438
439ulong  __builtin_IB_broadcast_message_phase_uq(uint messagePhases, uint phaseIndex, uint phaseSubindex, uint width);
440ulong  __builtin_IB_broadcast_message_phase_uq_uint2(uint2 messagePhases, uint phaseIndex, uint phaseSubindex, uint width);
441ulong  __builtin_IB_broadcast_message_phase_uq_uint4(uint4 messagePhases, uint phaseIndex, uint phaseSubindex, uint width);
442ulong  __builtin_IB_broadcast_message_phase_uq_uint8(uint8 messagePhases, uint phaseIndex, uint phaseSubindex, uint width);
443
444// Copy the value phase(s) to all work-items in a sub-group
445ushort __builtin_IB_simd_get_message_phase_uw(uint messagePhases, uint phaseIndex, uint numPhases);
446ushort __builtin_IB_simd_get_message_phase_uw_uint2(uint2 messagePhases, uint phaseIndex, uint numPhases);
447ushort __builtin_IB_simd_get_message_phase_uw_uint4(uint4 messagePhases, uint phaseIndex, uint numPhases);
448ushort __builtin_IB_simd_get_message_phase_uw_uint8(uint8 messagePhases, uint phaseIndex, uint numPhases);
449
450ulong  __builtin_IB_simd_get_message_phase_uq(uint messagePhases, uint phaseIndex, uint numPhases);
451ulong  __builtin_IB_simd_get_message_phase_uq_uint2(uint2 messagePhases, uint phaseIndex, uint numPhases);
452ulong  __builtin_IB_simd_get_message_phase_uq_uint4(uint4 messagePhases, uint phaseIndex, uint numPhases);
453ulong  __builtin_IB_simd_get_message_phase_uq_uint8(uint8 messagePhases, uint phaseIndex, uint numPhases);
454
455uint __builtin_IB_simd_set_message_phase_ub(uint messagePhases, uint phaseIndex, uint numPhases, uint subReg, uint numLanes, uchar val);
456uint2 __builtin_IB_simd_set_message_phase_ub_uint2(uint2 messagePhases, uint phaseIndex, uint numPhases, uint subReg, uint numLanes, uchar val);
457uint4 __builtin_IB_simd_set_message_phase_ub_uint4(uint4 messagePhases, uint phaseIndex, uint numPhases, uint subReg, uint numLanes, uchar val);
458uint8 __builtin_IB_simd_set_message_phase_ub_uint8(uint8 messagePhases, uint phaseIndex, uint numPhases, uint subReg, uint numLanes, uchar val);
459
460uint __builtin_IB_simd_set_message_phase_uw(uint messagePhases, uint phaseIndex, uint numPhases, uint subReg, uint numLanes, ushort val);
461uint2 __builtin_IB_simd_set_message_phase_uw_uint2(uint2 messagePhases, uint phaseIndex, uint numPhases, uint subReg, uint numLanes, ushort val);
462uint4 __builtin_IB_simd_set_message_phase_uw_uint4(uint4 messagePhases, uint phaseIndex, uint numPhases, uint subReg, uint numLanes, ushort val);
463uint8 __builtin_IB_simd_set_message_phase_uw_uint8(uint8 messagePhases, uint phaseIndex, uint numPhases, uint subReg, uint numLanes, ushort val);
464
465uint __builtin_IB_simd_set_message_phase_dw(uint messagePhases, uint phaseIndex, uint numPhases, uint subReg, uint numLanes, uint val);
466uint2 __builtin_IB_simd_set_message_phase_dw_uint2(uint2 messagePhases, uint phaseIndex, uint numPhases, uint subReg, uint numLanes, uint val);
467uint4 __builtin_IB_simd_set_message_phase_dw_uint4(uint4 messagePhases, uint phaseIndex, uint numPhases, uint subReg, uint numLanes, uint val);
468uint8 __builtin_IB_simd_set_message_phase_dw_uint8(uint8 messagePhases, uint phaseIndex, uint numPhases, uint subReg, uint numLanes, uint val);
469
470uint __builtin_IB_simd_set_message_phase_uq(uint messagePhases, uint phaseIndex, uint numPhases, uint subReg, uint numLanes, ulong val);
471uint2 __builtin_IB_simd_set_message_phase_uq_uint2(uint2 messagePhases, uint phaseIndex, uint numPhases, uint subReg, uint numLanes, ulong val);
472uint4 __builtin_IB_simd_set_message_phase_uq_uint4(uint4 messagePhases, uint phaseIndex, uint numPhases, uint subReg, uint numLanes, ulong val);
473uint8 __builtin_IB_simd_set_message_phase_uq_uint8(uint8 messagePhases, uint phaseIndex, uint numPhases, uint subReg, uint numLanes, ulong val);
474
475void __builtin_IB_simdMediaRegionCopy(GRFHandle dst, uint dbyteoffset, uint dstride, uint dnumelem,
476                                      GRFHandle src, uint sbyteoffset, uint vstride, uint width, uint hstride, uint typesize, uint execsize, uint snumelem);
477
478void __builtin_IB_extract_mv_and_sad(GRFHandle MVMin, GRFHandle SADMin, GRFHandle result, uint blockType);
479void __builtin_IB_cmp_sads(GRFHandle MVCurr, GRFHandle SADCurr, GRFHandle MVMin, GRFHandle SADMin);
480
481// VME
482uint __builtin_IB_vme_mb_block_type() __attribute__((const));
483uint __builtin_IB_vme_subpixel_mode() __attribute__((const));
484uint __builtin_IB_vme_sad_adjust_mode() __attribute__((const));
485uint __builtin_IB_vme_search_path_type() __attribute__((const));
486void __builtin_IB_vme_send_ime(GRFHandle res, GRFHandle universalInputMsg, GRFHandle imeMsg, long srcImg, long refImg, uint ref0Coord, uint ref1Coord, uint costCenter);
487void __builtin_IB_vme_send_fbr(GRFHandle res, GRFHandle universalInputMsg, GRFHandle fbrMsg, long srcImg, long refImg, uint interMbMode, uint subMbShape, uint subMbPredMode);
488void __builtin_IB_vme_send_sic(GRFHandle res, GRFHandle universalInputMsg, GRFHandle sicMsg, long srcImg, long refImg0, long refImg1);
489
490uint4 __builtin_IB_vme_send_ime_new_uint4_uint8(uint8 inputMsg, long srcImg, long fwdRefImg, long bwdRefImg, long accelerator, uint streamMode);
491uint8 __builtin_IB_vme_send_ime_new_uint8_uint8(uint8 inputMsg, long srcImg, long fwdRefImg, long bwdRefImg, long accelerator, uint streamMode);
492uint4 __builtin_IB_vme_send_ime_new_uint4_uint4(uint4 inputMsg, long srcImg, long fwdRefImg, long bwdRefImg, long accelerator, uint streamMode);
493uint8 __builtin_IB_vme_send_ime_new_uint8_uint4(uint4 inputMsg, long srcImg, long fwdRefImg, long bwdRefImg, long accelerator, uint streamMode);
494
495uint4 __builtin_IB_vme_send_fbr_new(uint4 inputMsg, long srcImg, long fwdRefImg, long bwdRefImg, long accelerator);
496uint4 __builtin_IB_vme_send_sic_new(uint4 inputMsg, long srcImg, long fwdRefImg, long bwdRefImg, long accelerator);
497
498uint  __builtin_IB_get_image_bti(uint img);
499
500// ballot intrinsic
501uint __builtin_IB_WaveBallot(bool p);
502
503// VA
504void   __builtin_IB_va_erode_64x4( __local uchar* dst, float2 coords, int srcImgId, int i_accelerator );
505void   __builtin_IB_va_dilate_64x4( __local uchar* dst, float2 coords, int srcImgId, int i_accelerator );
506void   __builtin_IB_va_minmaxfilter_16x4_SLM( __local uchar* dst, float2 coords, int srcImgId, int i_accelerator );
507void   __builtin_IB_va_convolve_16x4_SLM( __local uchar* dst, float2 coords, int srcImgId, int i_accelerator );
508void   __builtin_IB_va_minmax( __local uchar* dst, float2 coords, int srcImgId, int i_accelerator );
509void   __builtin_IB_va_centroid( __local uchar* dst, float2 coords, int2 size, int srcImgId, int i_accelerator );
510void   __builtin_IB_va_boolcentroid( __local uchar* dst, float2 coords, int2 size, int srcImgId, int i_accelerator );
511void   __builtin_IB_va_boolsum( __local uchar* dst, float2 coords, int2 size, int srcImgId, int i_accelerator );
512short4 __builtin_IB_va_convolve_16x4( float2 coords, int srcImgId, int i_accelerator );
513
514// Device Enqueue
515__global void* __builtin_IB_get_default_device_queue();
516__global void* __builtin_IB_get_event_pool();
517uint __builtin_IB_get_max_workgroup_size();
518uint __builtin_IB_get_parent_event();
519uint __builtin_IB_get_prefered_workgroup_multiple();
520
521// Generic Address Space
522__local   void* __builtin_IB_to_local(void*);
523__private void* __builtin_IB_to_private(void*);
524
525// Internal program hint
526// facility for enforcing uniform property (@WIAnalysis) for
527// a local array residing in thread-private memory
528void __builtin_IB_assume_uniform(void*);
529
530// SubGroup Functions
531int     __builtin_IB_get_simd_size( void );
532int     __builtin_IB_get_simd_id( void );
533uint     __builtin_IB_simd_shuffle( uint, uint );
534bool    __builtin_IB_simd_shuffle_b(bool, uint);
535uchar    __builtin_IB_simd_shuffle_c( uchar, uint );
536ushort  __builtin_IB_simd_shuffle_us( ushort, uint );
537float     __builtin_IB_simd_shuffle_f( float, uint );
538half     __builtin_IB_simd_shuffle_h( half, uint );
539double  __builtin_IB_simd_shuffle_df(double, uint);
540uint    __builtin_IB_simd_shuffle_down( uint, uint, uint );
541ushort  __builtin_IB_simd_shuffle_down_us( ushort, ushort, uint );
542uchar  __builtin_IB_simd_shuffle_down_uc( uchar, uchar, uint );
543void   __builtin_IB_sub_group_barrier();
544
545// Block read : global address space
546uint    __builtin_IB_simd_block_read_1_global( const __global uint* );
547uint2   __builtin_IB_simd_block_read_2_global( const __global uint* );
548uint4   __builtin_IB_simd_block_read_4_global( const __global uint* );
549uint8   __builtin_IB_simd_block_read_8_global( const __global uint* );
550
551ushort    __builtin_IB_simd_block_read_1_global_h( const __global ushort* );
552ushort2   __builtin_IB_simd_block_read_2_global_h( const __global ushort* );
553ushort4   __builtin_IB_simd_block_read_4_global_h( const __global ushort* );
554ushort8   __builtin_IB_simd_block_read_8_global_h( const __global ushort* );
555ushort16  __builtin_IB_simd_block_read_16_global_h( const __global ushort* );
556
557uchar    __builtin_IB_simd_block_read_1_global_b( const __global uchar* );
558uchar2   __builtin_IB_simd_block_read_2_global_b( const __global uchar* );
559uchar4   __builtin_IB_simd_block_read_4_global_b( const __global uchar* );
560uchar8   __builtin_IB_simd_block_read_8_global_b( const __global uchar* );
561uchar16  __builtin_IB_simd_block_read_16_global_b( const __global uchar* );
562
563ulong    __builtin_IB_simd_block_read_1_global_l( const __global ulong* );
564ulong2   __builtin_IB_simd_block_read_2_global_l( const __global ulong* );
565ulong4   __builtin_IB_simd_block_read_4_global_l( const __global ulong* );
566ulong8   __builtin_IB_simd_block_read_8_global_l( const __global ulong* );
567
568void    __builtin_IB_simd_block_write_1_global( __global uint*, uint );
569void    __builtin_IB_simd_block_write_2_global( __global uint*, uint2 );
570void    __builtin_IB_simd_block_write_4_global( __global uint*, uint4 );
571void    __builtin_IB_simd_block_write_8_global( __global uint*, uint8 );
572
573void    __builtin_IB_simd_block_write_1_global_h( __global ushort*, ushort );
574void    __builtin_IB_simd_block_write_2_global_h( __global ushort*, ushort2 );
575void    __builtin_IB_simd_block_write_4_global_h( __global ushort*, ushort4 );
576void    __builtin_IB_simd_block_write_8_global_h( __global ushort*, ushort8 );
577void    __builtin_IB_simd_block_write_16_global_h( __global ushort*, ushort16 );
578
579void    __builtin_IB_simd_block_write_1_global_b( __global uchar*, uchar );
580void    __builtin_IB_simd_block_write_2_global_b( __global uchar*, uchar2 );
581void    __builtin_IB_simd_block_write_4_global_b( __global uchar*, uchar4 );
582void    __builtin_IB_simd_block_write_8_global_b( __global uchar*, uchar8 );
583void    __builtin_IB_simd_block_write_16_global_b( __global uchar*, uchar16 );
584
585void    __builtin_IB_simd_block_write_1_global_l( __global ulong*, ulong );
586void    __builtin_IB_simd_block_write_2_global_l( __global ulong*, ulong2 );
587void    __builtin_IB_simd_block_write_4_global_l( __global ulong*, ulong4 );
588void    __builtin_IB_simd_block_write_8_global_l( __global ulong*, ulong8 );
589
590// Block read : local address space
591uint    __builtin_IB_simd_block_read_1_local( const __local uint* );
592uint2   __builtin_IB_simd_block_read_2_local( const __local uint* );
593uint4   __builtin_IB_simd_block_read_4_local( const __local uint* );
594uint8   __builtin_IB_simd_block_read_8_local( const __local uint* );
595
596ushort    __builtin_IB_simd_block_read_1_local_h( const __local ushort* );
597ushort2   __builtin_IB_simd_block_read_2_local_h( const __local ushort* );
598ushort4   __builtin_IB_simd_block_read_4_local_h( const __local ushort* );
599ushort8   __builtin_IB_simd_block_read_8_local_h( const __local ushort* );
600ushort16  __builtin_IB_simd_block_read_16_local_h( const __local ushort* );
601
602uchar    __builtin_IB_simd_block_read_1_local_b( const __local uchar* );
603uchar2   __builtin_IB_simd_block_read_2_local_b( const __local uchar* );
604uchar4   __builtin_IB_simd_block_read_4_local_b( const __local uchar* );
605uchar8   __builtin_IB_simd_block_read_8_local_b( const __local uchar* );
606uchar16  __builtin_IB_simd_block_read_16_local_b( const __local uchar* );
607
608ulong    __builtin_IB_simd_block_read_1_local_l( const __local ulong* );
609ulong2   __builtin_IB_simd_block_read_2_local_l( const __local ulong* );
610ulong4   __builtin_IB_simd_block_read_4_local_l( const __local ulong* );
611ulong8   __builtin_IB_simd_block_read_8_local_l( const __local ulong* );
612
613void    __builtin_IB_simd_block_write_1_local( __local uint*, uint );
614void    __builtin_IB_simd_block_write_2_local( __local uint*, uint2 );
615void    __builtin_IB_simd_block_write_4_local( __local uint*, uint4 );
616void    __builtin_IB_simd_block_write_8_local( __local uint*, uint8 );
617
618void    __builtin_IB_simd_block_write_1_local_h( __local ushort*, ushort );
619void    __builtin_IB_simd_block_write_2_local_h( __local ushort*, ushort2 );
620void    __builtin_IB_simd_block_write_4_local_h( __local ushort*, ushort4 );
621void    __builtin_IB_simd_block_write_8_local_h( __local ushort*, ushort8 );
622void    __builtin_IB_simd_block_write_16_local_h( __local ushort*, ushort16 );
623
624void    __builtin_IB_simd_block_write_1_local_b( __local uchar*, uchar );
625void    __builtin_IB_simd_block_write_2_local_b( __local uchar*, uchar2 );
626void    __builtin_IB_simd_block_write_4_local_b( __local uchar*, uchar4 );
627void    __builtin_IB_simd_block_write_8_local_b( __local uchar*, uchar8 );
628void    __builtin_IB_simd_block_write_16_local_b( __local uchar*, uchar16 );
629
630void    __builtin_IB_simd_block_write_1_local_l( __local ulong*, ulong );
631void    __builtin_IB_simd_block_write_2_local_l( __local ulong*, ulong2 );
632void    __builtin_IB_simd_block_write_4_local_l( __local ulong*, ulong4 );
633void    __builtin_IB_simd_block_write_8_local_l( __local ulong*, ulong8 );
634
635uint    __builtin_IB_simd_media_block_read_1( int, int2 );
636uint2   __builtin_IB_simd_media_block_read_2( int, int2 );
637uint4   __builtin_IB_simd_media_block_read_4( int, int2 );
638uint8   __builtin_IB_simd_media_block_read_8( int, int2 );
639
640ushort   __builtin_IB_simd_media_block_read_1_h( int, int2 );
641ushort2  __builtin_IB_simd_media_block_read_2_h( int, int2 );
642ushort4  __builtin_IB_simd_media_block_read_4_h( int, int2 );
643ushort8  __builtin_IB_simd_media_block_read_8_h( int, int2 );
644
645uchar   __builtin_IB_simd_media_block_read_1_b( int, int2 );
646uchar2  __builtin_IB_simd_media_block_read_2_b( int, int2 );
647uchar4  __builtin_IB_simd_media_block_read_4_b( int, int2 );
648uchar8  __builtin_IB_simd_media_block_read_8_b( int, int2 );
649uchar16  __builtin_IB_simd_media_block_read_16_b( int, int2 );
650
651ulong   __builtin_IB_simd_media_block_read_1_l( int, int2 );
652ulong2  __builtin_IB_simd_media_block_read_2_l( int, int2 );
653ulong4  __builtin_IB_simd_media_block_read_4_l( int, int2 );
654ulong8  __builtin_IB_simd_media_block_read_8_l( int, int2 );
655
656void    __builtin_IB_media_block_rectangle_read( long image, int2 coords, int blockWidth, int blockHeight, GRFHandle destination );
657
658void    __builtin_IB_simd_media_block_write_1( int, int2, uint );
659void    __builtin_IB_simd_media_block_write_2( int, int2, uint2 );
660void    __builtin_IB_simd_media_block_write_4( int, int2, uint4 );
661void    __builtin_IB_simd_media_block_write_8( int, int2, uint8 );
662
663void    __builtin_IB_simd_media_block_write_1_h( int, int2, ushort );
664void    __builtin_IB_simd_media_block_write_2_h( int, int2, ushort2 );
665void    __builtin_IB_simd_media_block_write_4_h( int, int2, ushort4 );
666void    __builtin_IB_simd_media_block_write_8_h( int, int2, ushort8 );
667
668void    __builtin_IB_simd_media_block_write_1_b( int, int2, uchar );
669void    __builtin_IB_simd_media_block_write_2_b( int, int2, uchar2 );
670void    __builtin_IB_simd_media_block_write_4_b( int, int2, uchar4 );
671void    __builtin_IB_simd_media_block_write_8_b( int, int2, uchar8 );
672void    __builtin_IB_simd_media_block_write_16_b( int, int2, uchar16 );
673
674void    __builtin_IB_simd_media_block_write_1_l( int, int2, ulong );
675void    __builtin_IB_simd_media_block_write_2_l( int, int2, ulong2 );
676void    __builtin_IB_simd_media_block_write_4_l( int, int2, ulong4 );
677void    __builtin_IB_simd_media_block_write_8_l( int, int2, ulong8 );
678
679uchar   __builtin_IB_media_block_read_uchar(int image, int2 offset, int width, int height);
680uchar2  __builtin_IB_media_block_read_uchar2(int image, int2 offset, int width, int height);
681uchar4  __builtin_IB_media_block_read_uchar4(int image, int2 offset, int width, int height);
682uchar8  __builtin_IB_media_block_read_uchar8(int image, int2 offset, int width, int height);
683uchar16 __builtin_IB_media_block_read_uchar16(int image, int2 offset, int width, int height);
684
685ushort   __builtin_IB_media_block_read_ushort(int image, int2 offset, int width, int height);
686ushort2  __builtin_IB_media_block_read_ushort2(int image, int2 offset, int width, int height);
687ushort4  __builtin_IB_media_block_read_ushort4(int image, int2 offset, int width, int height);
688ushort8  __builtin_IB_media_block_read_ushort8(int image, int2 offset, int width, int height);
689ushort16 __builtin_IB_media_block_read_ushort16(int image, int2 offset, int width, int height);
690
691uint   __builtin_IB_media_block_read_uint(int image, int2 offset, int width, int height);
692uint2  __builtin_IB_media_block_read_uint2(int image, int2 offset, int width, int height);
693uint4  __builtin_IB_media_block_read_uint4(int image, int2 offset, int width, int height);
694uint8  __builtin_IB_media_block_read_uint8(int image, int2 offset, int width, int height);
695
696ulong   __builtin_IB_media_block_read_ulong(int image, int2 offset, int width, int height);
697ulong2  __builtin_IB_media_block_read_ulong2(int image, int2 offset, int width, int height);
698ulong4  __builtin_IB_media_block_read_ulong4(int image, int2 offset, int width, int height);
699ulong8  __builtin_IB_media_block_read_ulong8(int image, int2 offset, int width, int height);
700
701void __builtin_IB_media_block_write_uchar(int image, int2 offset, int width, int height, uchar pixels);
702void __builtin_IB_media_block_write_uchar2(int image, int2 offset, int width, int height, uchar2 pixels);
703void __builtin_IB_media_block_write_uchar4(int image, int2 offset, int width, int height, uchar4 pixels);
704void __builtin_IB_media_block_write_uchar8(int image, int2 offset, int width, int height, uchar8 pixels);
705void __builtin_IB_media_block_write_uchar16(int image, int2 offset, int width, int height, uchar16 pixels);
706
707void __builtin_IB_media_block_write_ushort(int image, int2 offset, int width, int height, ushort pixels);
708void __builtin_IB_media_block_write_ushort2(int image, int2 offset, int width, int height, ushort2 pixels);
709void __builtin_IB_media_block_write_ushort4(int image, int2 offset, int width, int height, ushort4 pixels);
710void __builtin_IB_media_block_write_ushort8(int image, int2 offset, int width, int height, ushort8 pixels);
711void __builtin_IB_media_block_write_ushort16(int image, int2 offset, int width, int height, ushort16 pixels);
712
713void __builtin_IB_media_block_write_uint(int image, int2 offset, int width, int height, uint pixels);
714void __builtin_IB_media_block_write_uint2(int image, int2 offset, int width, int height, uint2 pixels);
715void __builtin_IB_media_block_write_uint4(int image, int2 offset, int width, int height, uint4 pixels);
716void __builtin_IB_media_block_write_uint8(int image, int2 offset, int width, int height, uint8 pixels);
717
718void __builtin_IB_media_block_write_ulong(int image, int2 offset, int width, int height, ulong pixels);
719void __builtin_IB_media_block_write_ulong2(int image, int2 offset, int width, int height, ulong2 pixels);
720void __builtin_IB_media_block_write_ulong4(int image, int2 offset, int width, int height, ulong4 pixels);
721void __builtin_IB_media_block_write_ulong8(int image, int2 offset, int width, int height, ulong8 pixels);
722
723int __builtin_IB_dp4a_ss(int c, int a, int b) __attribute__((const));
724int __builtin_IB_dp4a_uu(int c, int a, int b) __attribute__((const));
725int __builtin_IB_dp4a_su(int c, int a, int b) __attribute__((const));
726int __builtin_IB_dp4a_us(int c, int a, int b) __attribute__((const));
727
728#define DECL_SUB_GROUP_OPERATION(type, type_abbr, operation, group_type)  \
729type   __builtin_IB_sub_group_##group_type##_##operation##_##type_abbr(type x) __attribute__((const));
730
731#define DECL_SUB_GROUP_CLUSTERED_OPERATION(type, type_abbr, operation, group_type)  \
732type   __builtin_IB_sub_group_clustered_##group_type##_##operation##_##type_abbr(type x, int cluster_size) __attribute__((const));
733
734#define DECL_SUB_GROUP_ALL_GROUPS(type, type_abbr, operation)  \
735DECL_SUB_GROUP_OPERATION(type, type_abbr, operation, reduce)   \
736DECL_SUB_GROUP_OPERATION(type, type_abbr, operation, scan)     \
737DECL_SUB_GROUP_CLUSTERED_OPERATION(type, type_abbr, operation, reduce)
738
739// ARITHMETIC OPERATIONS
740// __builtin_IB_sub_group_reduce_IAdd/FAdd
741// __builtin_IB_sub_group_scan_IAdd/FAdd
742// __builtin_IB_sub_group_clustered_reduce_IAdd/FAdd
743// __builtin_IB_sub_group_reduce_IMul/FMul
744// __builtin_IB_sub_group_scan_IMul/FMul
745// __builtin_IB_sub_group_clustered_reduce_IMul/FMul
746#define DECL_SUB_GROUP_ADD_MUL(type, type_abbr, type_sign)  \
747DECL_SUB_GROUP_ALL_GROUPS(type, type_abbr, type_sign##Add)  \
748DECL_SUB_GROUP_ALL_GROUPS(type, type_abbr, type_sign##Mul)  \
749DECL_SUB_GROUP_ALL_GROUPS(type, type_abbr, type_sign##MulINTEL)
750
751DECL_SUB_GROUP_ADD_MUL(char,   i8,  I)
752DECL_SUB_GROUP_ADD_MUL(short,  i16, I)
753DECL_SUB_GROUP_ADD_MUL(int,    i32, I)
754DECL_SUB_GROUP_ADD_MUL(long,   i64, I)
755DECL_SUB_GROUP_ADD_MUL(float,  f32, F)
756#if defined(cl_khr_fp64)
757DECL_SUB_GROUP_ADD_MUL(double, f64, F)
758#endif // defined(cl_khr_fp64)
759#if defined(cl_khr_fp16)
760DECL_SUB_GROUP_ADD_MUL(half,   f16, F)
761#endif // defined(cl_khr_fp16)
762
763// __builtin_IB_sub_group_reduce_SMin/UMin/FMin
764// __builtin_IB_sub_group_scan_SMin/UMin/FMin
765// __builtin_IB_sub_group_clustered_reduce_SMin/UMin/FMin
766// __builtin_IB_sub_group_reduce_SMax/UMax/FMax
767// __builtin_IB_sub_group_scan_SMax/UMax/FMax
768// __builtin_IB_sub_group_clustered_reduce_SMax/UMax/FMax
769#define DECL_SUB_GROUP_MIN_MAX(type, type_abbr, type_sign)  \
770DECL_SUB_GROUP_ALL_GROUPS(type, type_abbr, type_sign##Min)  \
771DECL_SUB_GROUP_ALL_GROUPS(type, type_abbr, type_sign##Max)
772
773DECL_SUB_GROUP_MIN_MAX(char,   i8, S)
774DECL_SUB_GROUP_MIN_MAX(uchar,  i8, U)
775DECL_SUB_GROUP_MIN_MAX(short,  i16, S)
776DECL_SUB_GROUP_MIN_MAX(ushort, i16, U)
777DECL_SUB_GROUP_MIN_MAX(int,    i32, S)
778DECL_SUB_GROUP_MIN_MAX(uint,   i32, U)
779DECL_SUB_GROUP_MIN_MAX(long,   i64, S)
780DECL_SUB_GROUP_MIN_MAX(ulong,  i64, U)
781DECL_SUB_GROUP_MIN_MAX(float,  f32, F)
782#if defined(cl_khr_fp64)
783DECL_SUB_GROUP_MIN_MAX(double, f64, F)
784#endif // defined(cl_khr_fp64)
785#if defined(cl_khr_fp16)
786DECL_SUB_GROUP_MIN_MAX(half,   f16, F)
787#endif // defined(cl_khr_fp16)
788
789// BITWISE OPERATIONS
790// __builtin_IB_sub_group_reduce_BitwiseAnd/Or/Xor
791// __builtin_IB_sub_group_scan_BitwiseAnd/Or/Xor
792// __builtin_IB_sub_group_clustered_reduce_BitwiseAnd/Or/Xor
793#define DECL_BITWISE_OPERATIONS(type, type_abbr)        \
794DECL_SUB_GROUP_ALL_GROUPS(type, type_abbr, BitwiseAnd)  \
795DECL_SUB_GROUP_ALL_GROUPS(type, type_abbr, BitwiseOr)   \
796DECL_SUB_GROUP_ALL_GROUPS(type, type_abbr, BitwiseXor)  \
797DECL_SUB_GROUP_ALL_GROUPS(type, type_abbr, BitwiseAndINTEL)  \
798DECL_SUB_GROUP_ALL_GROUPS(type, type_abbr, BitwiseOrINTEL)   \
799DECL_SUB_GROUP_ALL_GROUPS(type, type_abbr, BitwiseXorINTEL)
800
801DECL_BITWISE_OPERATIONS(char,  i8)
802DECL_BITWISE_OPERATIONS(short, i16)
803DECL_BITWISE_OPERATIONS(int,   i32)
804DECL_BITWISE_OPERATIONS(long,  i64)
805
806// LOGICAL OPERATIONS
807// __builtin_IB_sub_group_reduce_LogicalAnd/Or/Xor
808// __builtin_IB_sub_group_scan_LogicalAnd/Or/Xor
809// __builtin_IB_sub_group_clustered_reduce_LogicalAnd/Or/Xor
810#define DECL_LOGICAL_OPERATIONS(type, type_abbr)        \
811DECL_SUB_GROUP_ALL_GROUPS(type, type_abbr, LogicalAnd)  \
812DECL_SUB_GROUP_ALL_GROUPS(type, type_abbr, LogicalOr)   \
813DECL_SUB_GROUP_ALL_GROUPS(type, type_abbr, LogicalXor)  \
814DECL_SUB_GROUP_ALL_GROUPS(type, type_abbr, LogicalAndINTEL) \
815DECL_SUB_GROUP_ALL_GROUPS(type, type_abbr, LogicalOrINTEL)  \
816DECL_SUB_GROUP_ALL_GROUPS(type, type_abbr, LogicalXorINTEL)
817
818DECL_LOGICAL_OPERATIONS(bool, i1)
819
820// The following mul/fma with rtz is used internally for int div/rem emulation
821// x * y, using round-to-zero
822double __builtin_IB_mul_rtz_f64(double x, double y) __attribute__((const));
823float __builtin_IB_mul_rtz_f32(float x, float y) __attribute__((const));
824// x + y, using round-to-zero
825double __builtin_IB_add_rtz_f64(double x, double y) __attribute__((const));
826float __builtin_IB_add_rtz_f32(float x, float y) __attribute__((const));
827// x * y + z, using round-to-zero
828double __builtin_IB_fma_rtz_f64(double x, double y, double z) __attribute__((const));
829float __builtin_IB_fma_rtz_f32(float x, float y, float z) __attribute__((const));
830// x * y + z, using round-to-positive-infinity
831double __builtin_IB_fma_rtp_f64(double x, double y, double z) __attribute__((const));
832// x * y + z, using round-to-negative-infinity
833double __builtin_IB_fma_rtn_f64(double x, double y, double z) __attribute__((const));
834
835
836
837#include "IGCBiF_Intrinsics_Dpas.cl"
838
839
840
841#endif // IGCBIF_INTRINSICS_CL
842