1/*========================== begin_copyright_notice ============================ 2 3Copyright (C) 2017-2021 Intel Corporation 4 5SPDX-License-Identifier: MIT 6 7============================= end_copyright_notice ===========================*/ 8 9#ifndef IGCBIF_INTRINSICS_CL 10#define IGCBIF_INTRINSICS_CL 11 12#pragma OPENCL EXTENSION cl_khr_fp16 : enable 13#pragma OPENCL EXTENSION cl_khr_fp64 : enable 14 15// Access of image and sampler parameters 16 17int __builtin_IB_get_address_mode(int) __attribute__((const)); 18int __builtin_IB_is_normalized_coords(int) __attribute__((const)); 19int __builtin_IB_get_image_array_size(int) __attribute__((const)); 20int __builtin_IB_get_snap_wa_reqd(int) __attribute__((const)); 21int __builtin_IB_get_image_height(int) __attribute__((const)); 22int __builtin_IB_get_image_width(int) __attribute__((const)); 23int __builtin_IB_get_image_depth(int) __attribute__((const)); 24int __builtin_IB_get_image_channel_data_type(int) __attribute__((const)); 25int __builtin_IB_get_image_srgb_channel_order(int) __attribute__((const)); 26int __builtin_IB_get_image_channel_order(int) __attribute__((const)); 27int __builtin_IB_get_image_num_samples(int) __attribute__((const)); 28int __builtin_IB_get_image_num_mip_levels(int) __attribute__((const)); 29long __builtin_IB_get_flat_image_baseoffset(int) __attribute__((const)); 30int __builtin_IB_get_flat_image_width(int) __attribute__((const)); 31int __builtin_IB_get_flat_image_height(int) __attribute__((const)); 32int __builtin_IB_get_flat_image_pitch(int) __attribute__((const)); 33 34// Image sampling and loads 35float4 __builtin_IB_OCL_1d_sample_l(int, int, float, float); 36float4 __builtin_IB_OCL_1darr_sample_l(int, int, float2, float); 37float4 __builtin_IB_OCL_2d_sample_l(int, int, float2, float); 38float4 __builtin_IB_OCL_2darr_sample_l(int, int, float4, float); 39float4 __builtin_IB_OCL_3d_sample_l(int, int, float4, float); 40 41float4 __builtin_IB_OCL_1d_sample_d(int, int, float, float, float); 42float4 __builtin_IB_OCL_1darr_sample_d(int, int, float2, float, float); 43float4 __builtin_IB_OCL_2d_sample_d(int, int, float2, float2, float2); 44float4 __builtin_IB_OCL_2darr_sample_d(int, int, float4, float2, float2); 45float4 __builtin_IB_OCL_3d_sample_d(int, int, float4, float4, float4); 46 47// versions that return uint for read_imageui 48uint4 __builtin_IB_OCL_1d_sample_lui(int, int, float, float); 49uint4 __builtin_IB_OCL_1darr_sample_lui(int, int, float2, float); 50uint4 __builtin_IB_OCL_2d_sample_lui(int, int, float2, float); 51uint4 __builtin_IB_OCL_2darr_sample_lui(int, int, float4, float); 52uint4 __builtin_IB_OCL_3d_sample_lui(int, int, float4, float); 53 54uint4 __builtin_IB_OCL_1d_sample_dui(int, int, float, float, float); 55uint4 __builtin_IB_OCL_1darr_sample_dui(int, int, float2, float, float); 56uint4 __builtin_IB_OCL_2d_sample_dui(int, int, float2, float2, float2); 57uint4 __builtin_IB_OCL_2darr_sample_dui(int, int, float4, float2, float2); 58uint4 __builtin_IB_OCL_3d_sample_dui(int, int, float4, float4, float4); 59 60uint4 __builtin_IB_OCL_1d_ldui(int, int, int); 61uint4 __builtin_IB_OCL_1darr_ldui(int, int2, int); 62uint4 __builtin_IB_OCL_2d_ldui(int, int2, int); 63uint4 __builtin_IB_OCL_2darr_ldui(int, int4, int); 64uint4 __builtin_IB_OCL_3d_ldui(int, int4, int); 65 66float4 __builtin_IB_OCL_1d_ld(int, int, int); 67float4 __builtin_IB_OCL_1darr_ld(int, int2, int); 68float4 __builtin_IB_OCL_2d_ld(int, int2, int); 69float4 __builtin_IB_OCL_2darr_ld(int, int4, int); 70float4 __builtin_IB_OCL_3d_ld(int, int4, int); 71 72float4 __builtin_IB_OCL_2d_ldmcs(int, int2); 73float4 __builtin_IB_OCL_2darr_ldmcs(int, int4); 74float4 __builtin_IB_OCL_2d_ld2dms(int, int2, int, float4); 75uint4 __builtin_IB_OCL_2d_ld2dmsui(int, int2, int, float4); 76float4 __builtin_IB_OCL_2darr_ld2dms(int, int4, int, float4); 77uint4 __builtin_IB_OCL_2darr_ld2dmsui(int, int4, int, float4); 78 79int __builtin_IB_convert_sampler_to_int(sampler_t); 80 81// Convert Functions for pipes and samplers 82#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 83__global void* __builtin_IB_convert_pipe_ro_to_intel_pipe(pipe int); 84__global void* __builtin_IB_convert_pipe_wo_to_intel_pipe(write_only pipe int); 85#endif 86 87// Image writes 88void __builtin_IB_write_1darr_ui(int, int2, uint4, int); 89void __builtin_IB_write_1d_ui(int, int, uint4, int); 90void __builtin_IB_write_2darr_ui(int, int4, uint4, int); 91void __builtin_IB_write_2d_ui(int, int2, uint4, int); 92void __builtin_IB_write_3d_ui(int, int4, uint4, int); 93void __builtin_IB_write_2darr_f(int, int4, float4, int); 94void __builtin_IB_write_2d_f(int, int2, float4, int); 95 96// Workgroup functions 97local uchar* __builtin_IB_AllocLocalMemPool(bool allocAllWorkgroups, uint numAdditionalElements, uint elementSize); 98 99// Memory fences 100// See GenISAIntrinsics.td for documentation 101void __builtin_IB_memfence(bool commitEnable, bool flushRW, bool flushConstant, bool flushTexture, bool flushIcache, bool isGlobal, bool invalidateL1); 102void __builtin_IB_flush_sampler_cache(void); 103void __builtin_IB_typedmemfence(bool invalidateCache); 104 105// Barrier 106void __builtin_IB_thread_group_barrier(void) __attribute__((convergent)); 107void __builtin_IB_thread_group_barrier_signal(void) __attribute__((convergent)); 108void __builtin_IB_thread_group_barrier_wait(void) __attribute__((convergent)); 109 110// Workitem functions 111uint __builtin_IB_get_work_dim(void) __attribute__((const)); 112uint __builtin_IB_get_group_id(uint) __attribute__((const)); 113uint __builtin_IB_get_global_offset(uint) __attribute__((const)); 114uint __builtin_IB_get_local_size(uint) __attribute__((const)); 115uint __builtin_IB_get_local_id_x(void) __attribute__((const)); 116uint __builtin_IB_get_local_id_y(void) __attribute__((const)); 117uint __builtin_IB_get_local_id_z(void) __attribute__((const)); 118uint __builtin_IB_get_global_size(uint) __attribute__((const)); 119uint __builtin_IB_get_num_groups(uint) __attribute__((const)); 120uint __builtin_IB_get_enqueued_local_size(uint) __attribute__((const)); 121 122// Double precision conversions 123half __builtin_IB_ftoh_rtn(float) __attribute__((const)); 124half __builtin_IB_ftoh_rtp(float) __attribute__((const)); 125half __builtin_IB_ftoh_rtz(float) __attribute__((const)); 126#if defined(cl_khr_fp64) 127#endif // defined(cl_khr_fp64) 128 129// Debug/Testing Built-In Functions 130uint2 __builtin_IB_read_cycle_counter(void) __attribute__((const)); 131void __builtin_IB_source_value(uint reg); 132uint __builtin_IB_set_dbg_register(uint dgb0_0); 133uint __builtin_IB_movreg(uint reg) __attribute__((const)); 134uint __builtin_IB_movflag(uint flag) __attribute__((const)); 135uint __builtin_IB_movcr(uint reg) __attribute__((const)); 136uint __builtin_IB_hw_thread_id(void) __attribute__((const)); 137uint __builtin_IB_slice_id(void) __attribute__((const)); 138uint __builtin_IB_subslice_id(void) __attribute__((const)); 139uint __builtin_IB_dual_subslice_id(void) __attribute__((const)); 140uint __builtin_IB_eu_id(void) __attribute__((const)); 141uint __builtin_IB_get_sr0(uint DWNumber); // DWNumber=0|1|2|3 142uint __builtin_IB_eu_thread_id(void) __attribute__((const)); 143void __builtin_IB_profile_snapshot(int point_type,int point_index) __attribute__((const)); 144void __builtin_IB_profile_aggregated(int point_type,int point_index) __attribute__((const)); 145void __builtin_IB_eu_thread_pause(uint value); 146 147// int -> float operations 148float __builtin_IB_itof_rtn(int); 149float __builtin_IB_itof_rtp(int); 150float __builtin_IB_itof_rtz(int); 151float __builtin_IB_uitof_rtn(uint); 152float __builtin_IB_uitof_rtp(uint); 153float __builtin_IB_uitof_rtz(uint); 154 155#if defined(cl_khr_fp64) 156// long -> double operations 157double __builtin_IB_itofp64_rtn(long); 158double __builtin_IB_itofp64_rtp(long); 159double __builtin_IB_itofp64_rtz(long); 160double __builtin_IB_uitofp64_rtn(ulong); 161double __builtin_IB_uitofp64_rtp(ulong); 162double __builtin_IB_uitofp64_rtz(ulong); 163#endif 164 165// Native integer operations 166uint __builtin_IB_bfi(uint, uint, uint, uint) __attribute__((const)); 167uint __builtin_IB_ibfe(uint, uint, uint) __attribute__((const)); 168uint __builtin_IB_ubfe(uint, uint, uint) __attribute__((const)); 169uint __builtin_IB_bfrev(uint) __attribute__((const)); 170 171char __builtin_IB_popcount_1u8(char) __attribute__((const)); 172short __builtin_IB_popcount_1u16(short) __attribute__((const)); 173int __builtin_IB_popcount_1u32(int) __attribute__((const)); 174 175// Native math operations - float version 176float __builtin_IB_frnd_ne(float) __attribute__((const)); 177float __builtin_IB_frnd_ni(float) __attribute__((const)); 178float __builtin_IB_frnd_pi(float) __attribute__((const)); 179float __builtin_IB_frnd_zi(float) __attribute__((const)); 180float __builtin_IB_native_exp2f(float) __attribute__((const)); 181float __builtin_IB_native_cosf(float) __attribute__((const)); 182float __builtin_IB_native_log2f(float) __attribute__((const)); 183float __builtin_IB_native_powrf(float, float) __attribute__((const)); 184float __builtin_IB_native_sinf(float) __attribute__((const)); 185float __builtin_IB_native_sqrtf(float) __attribute__((const)); 186float __builtin_IB_fmax(float, float) __attribute__((const)); 187float __builtin_IB_fmin(float, float) __attribute__((const)); 188half __builtin_IB_HMAX(half, half) __attribute__((const)); 189half __builtin_IB_HMIN(half, half) __attribute__((const)); 190 191// Native math operations - fp16 version 192half __builtin_IB_native_cosh(half) __attribute__((const)); 193half __builtin_IB_native_exp2h(half) __attribute__((const)); 194half __builtin_IB_native_log2h(half) __attribute__((const)); 195half __builtin_IB_native_sinh(half) __attribute__((const)); 196half __builtin_IB_native_sqrth(half) __attribute__((const)); 197half __builtin_IB_fmah(half, half, half) __attribute__((const)); 198 199// Native math operations - fp64 version 200#if defined(cl_khr_fp64) 201double __builtin_IB_native_sqrtd(double) __attribute__((const)); 202double __builtin_IB_dmin(double, double) __attribute__((const)); 203double __builtin_IB_dmax(double, double) __attribute__((const)); 204#endif 205 206// Atomic operations 207int __builtin_IB_atomic_add_global_i32(__global int*, int); 208int __builtin_IB_atomic_add_local_i32(__local int*, int); 209int __builtin_IB_atomic_sub_global_i32(__global int*, int); 210int __builtin_IB_atomic_sub_local_i32(__local int*, int); 211int __builtin_IB_atomic_xchg_global_i32(__global int*, int); 212int __builtin_IB_atomic_xchg_local_i32(__local int*, int); 213int __builtin_IB_atomic_min_global_i32(__global int*, int); 214uint __builtin_IB_atomic_min_global_u32(__global uint*, uint); 215float __builtin_IB_atomic_min_global_f32(__global float*, float); 216int __builtin_IB_atomic_min_local_i32(__local int*, int); 217uint __builtin_IB_atomic_min_local_u32(__local uint*, uint); 218float __builtin_IB_atomic_min_local_f32(__local float*, float); 219int __builtin_IB_atomic_max_global_i32(__global int*, int); 220uint __builtin_IB_atomic_max_global_u32(__global uint*, uint); 221float __builtin_IB_atomic_max_global_f32(__global float*, float); 222int __builtin_IB_atomic_max_local_i32(__local int*, int); 223uint __builtin_IB_atomic_max_local_u32(__local uint*, uint); 224float __builtin_IB_atomic_max_local_f32(__local float*, float); 225int __builtin_IB_atomic_and_global_i32(__global int*, int); 226int __builtin_IB_atomic_and_local_i32(__local int*, int); 227int __builtin_IB_atomic_or_global_i32(__global int*, int); 228int __builtin_IB_atomic_or_local_i32(__local int*, int); 229int __builtin_IB_atomic_xor_global_i32(__global int*, int); 230int __builtin_IB_atomic_xor_local_i32(__local int*, int); 231int __builtin_IB_atomic_inc_global_i32(__global int*); 232int __builtin_IB_atomic_inc_local_i32(__local int*); 233int __builtin_IB_atomic_dec_global_i32(__global int*); 234int __builtin_IB_atomic_dec_local_i32(__local int*); 235int __builtin_IB_atomic_cmpxchg_global_i32(__global int*, int, int); 236float __builtin_IB_atomic_cmpxchg_global_f32(__global float*, float, float); 237int __builtin_IB_atomic_cmpxchg_local_i32(__local int*, int, int); 238float __builtin_IB_atomic_cmpxchg_local_f32(__local float*, float, float); 239 240// Float Atomics 241#if defined(cl_intel_global_float_atomics) 242float __builtin_IB_atomic_add_global_f32(__global float*, float); 243float __builtin_IB_atomic_sub_global_f32(__global float*, float); 244#endif // defined(cl_intel_global_float_atomics) 245// 64bit Atomic operations 246#if defined(cl_intel_64bit_global_atomics_placeholder) 247long __builtin_IB_atomic_add_global_i64(__global long*, long); 248long __builtin_IB_atomic_sub_global_i64(__global long*, long); 249long __builtin_IB_atomic_xchg_global_i64(__global long*, long); 250long __builtin_IB_atomic_min_global_i64(__global long*, long); 251ulong __builtin_IB_atomic_min_global_u64(__global ulong*, ulong); 252double __builtin_IB_atomic_min_global_f64(__global double*, double); 253long __builtin_IB_atomic_max_global_i64(__global long*, long); 254ulong __builtin_IB_atomic_max_global_u64(__global ulong*, ulong); 255double __builtin_IB_atomic_max_global_f64(__global double*, double); 256long __builtin_IB_atomic_and_global_i64(__global long*, long); 257long __builtin_IB_atomic_or_global_i64(__global long*, long); 258long __builtin_IB_atomic_xor_global_i64(__global long*, long); 259long __builtin_IB_atomic_inc_global_i64(__global long*); 260long __builtin_IB_atomic_dec_global_i64(__global long*); 261long __builtin_IB_atomic_cmpxchg_global_i64(__global long*, long, long); 262double __builtin_IB_atomic_cmpxchg_global_f64(__global double*, double, double); 263#endif // defined(cl_intel_64bit_global_atomics_placeholder) 264 265// Atomic operations 266short __builtin_IB_atomic_add_global_i16(__global short*, short); 267short __builtin_IB_atomic_add_local_i16(__local short*, short); 268short __builtin_IB_atomic_sub_global_i16(__global short*, short); 269short __builtin_IB_atomic_sub_local_i16(__local short*, short); 270short __builtin_IB_atomic_xchg_global_i16(__global short*, short); 271short __builtin_IB_atomic_xchg_local_i16(__local short*, short); 272short __builtin_IB_atomic_min_global_i16(__global short*, short); 273ushort __builtin_IB_atomic_min_global_u16(__global ushort*, ushort); 274half __builtin_IB_atomic_min_global_f16(__global half*, half); 275short __builtin_IB_atomic_min_local_i16(__local short*, short); 276ushort __builtin_IB_atomic_min_local_u16(__local ushort*, ushort); 277half __builtin_IB_atomic_min_local_f16(__local half*, half); 278short __builtin_IB_atomic_max_global_i16(__global short*, short); 279ushort __builtin_IB_atomic_max_global_u16(__global ushort*, ushort); 280half __builtin_IB_atomic_max_global_f16(__global half*, half); 281short __builtin_IB_atomic_max_local_i16(__local short*, short); 282ushort __builtin_IB_atomic_max_local_u16(__local ushort*, ushort); 283half __builtin_IB_atomic_max_local_f16(__local half*, half); 284short __builtin_IB_atomic_and_global_i16(__global short*, short); 285short __builtin_IB_atomic_and_local_i16(__local short*, short); 286short __builtin_IB_atomic_or_global_i16(__global short*, short); 287short __builtin_IB_atomic_or_local_i16(__local short*, short); 288short __builtin_IB_atomic_xor_global_i16(__global short*, short); 289short __builtin_IB_atomic_xor_local_i16(__local short*, short); 290short __builtin_IB_atomic_inc_global_i16(__global short*); 291short __builtin_IB_atomic_inc_local_i16(__local short*); 292short __builtin_IB_atomic_dec_global_i16(__global short*); 293short __builtin_IB_atomic_dec_local_i16(__local short*); 294short __builtin_IB_atomic_cmpxchg_global_i16(__global short*, short, short); 295half __builtin_IB_atomic_cmpxchg_global_f16(__global half*, half, half); 296short __builtin_IB_atomic_cmpxchg_local_i16(__local short*, short, short); 297half __builtin_IB_atomic_cmpxchg_local_f16(__local half*, half, half); 298 299short __builtin_IB_image_atomic_add_i16(int, int4, short); 300short __builtin_IB_image_atomic_sub_i16(int, int4, short); 301short __builtin_IB_image_atomic_xchg_i16(int, int4, short); 302short __builtin_IB_image_atomic_min_i16(int, int4, short); 303ushort __builtin_IB_image_atomic_min_u16(int, int4, ushort); 304short __builtin_IB_image_atomic_max_i16(int, int4, short); 305ushort __builtin_IB_image_atomic_max_u16(int, int4, ushort); 306short __builtin_IB_image_atomic_and_i16(int, int4, short); 307short __builtin_IB_image_atomic_or_i16(int, int4, short); 308short __builtin_IB_image_atomic_xor_i16(int, int4, short); 309short __builtin_IB_image_atomic_inc_i16(int, int4); 310short __builtin_IB_image_atomic_cmpxchg_i16(int, int4, short, short); 311 312 313void __builtin_IB_kmp_acquire_lock(int *); 314void __builtin_IB_kmp_release_lock(int *); 315 316int __builtin_IB_image_atomic_add_i32(int, int4, int); 317int __builtin_IB_image_atomic_sub_i32(int, int4, int); 318int __builtin_IB_image_atomic_xchg_i32(int, int4, int); 319int __builtin_IB_image_atomic_min_i32(int, int4, int); 320uint __builtin_IB_image_atomic_min_u32(int, int4, uint); 321int __builtin_IB_image_atomic_max_i32(int, int4, int); 322uint __builtin_IB_image_atomic_max_u32(int, int4, uint); 323int __builtin_IB_image_atomic_and_i32(int, int4, int); 324int __builtin_IB_image_atomic_or_i32(int, int4, int); 325int __builtin_IB_image_atomic_xor_i32(int, int4, int); 326int __builtin_IB_image_atomic_inc_i32(int, int4); 327int __builtin_IB_image_atomic_dec_i32(int, int4); 328int __builtin_IB_image_atomic_cmpxchg_i32(int, int4, int, int); 329 330void __builtin_IB_memcpy_global_to_private(private uchar *dst, global uchar *src, uint size, uint align); 331void __builtin_IB_memcpy_constant_to_private(private uchar *dst, constant uchar *src, uint size, uint align); 332void __builtin_IB_memcpy_local_to_private(private uchar *dst, local uchar *src, uint size, uint align); 333void __builtin_IB_memcpy_private_to_private(private uchar *dst, private uchar *src, uint size, uint align); 334#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 335void __builtin_IB_memcpy_generic_to_private(private uchar *dst, generic uchar *src, uint size, uint align); 336#endif 337 338void __builtin_IB_memcpy_private_to_global(global uchar *dst, private uchar *src, uint size, uint align); 339void __builtin_IB_memcpy_private_to_constant(constant uchar *dst, private uchar *src, uint size, uint align); 340void __builtin_IB_memcpy_private_to_local(local uchar *dst, private uchar *src, uint size, uint align); 341void __builtin_IB_memcpy_private_to_private(private uchar *dst, private uchar *src, uint size, uint align); 342#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) 343void __builtin_IB_memcpy_private_to_generic(generic uchar *dst, private uchar *src, uint size, uint align); 344#endif 345 346// Correctly rounded sqrt and division 347float __builtin_IB_ieee_sqrt(float) __attribute__((const)); 348float __builtin_IB_ieee_divide(float, float) __attribute__((const)); 349 350#if defined(cl_khr_fp64) 351double __builtin_IB_ieee_divide_f64(double, double) __attribute__((const)); 352#endif 353 354// SIMD information 355ushort __builtin_IB_simd_lane_id() __attribute__((const)); 356 357// an opaque handle pointing to a blob of registers. 358typedef uint GRFHandle; 359 360// legacy message phase builtins for old vme (not device side) 361void __builtin_IB_set_message_phase_legacy_dw(uint messagePhases, uint phaseIndex, uint dwIndex, uint val); 362void __builtin_IB_set_message_phase_legacy_uw(uint messagePhases, uint phaseIndex, uint dwIndex, ushort val); 363void __builtin_IB_set_message_phase_legacy_ub(uint messagePhases, uint phaseIndex, uint dwIndex, uchar val); 364 365void __builtin_IB_set_message_phase_legacy(uint messagePhases, uint phaseIndex, uint val); 366 367// Message Phases manipulation 368uint __builtin_IB_create_message_phases(uint numPhases); 369uint2 __builtin_IB_create_message_phases_uint2(uint numPhases); 370uint4 __builtin_IB_create_message_phases_uint4(uint numPhases); 371uint8 __builtin_IB_create_message_phases_uint8(uint numPhases); 372 373uint __builtin_IB_create_message_phases_no_init(uint numPhases); 374uint2 __builtin_IB_create_message_phases_no_init_uint2(uint numPhases); 375uint4 __builtin_IB_create_message_phases_no_init_uint4(uint numPhases); 376uint8 __builtin_IB_create_message_phases_no_init_uint8(uint numPhases); 377 378uint __builtin_IB_get_message_phase_dw(uint messagePhases, uint phaseIndex, uint dwIndex); 379uint __builtin_IB_get_message_phase_dw_uint2(uint2 messagePhases, uint phaseIndex, uint dwIndex); 380uint __builtin_IB_get_message_phase_dw_uint4(uint4 messagePhases, uint phaseIndex, uint dwIndex); 381uint __builtin_IB_get_message_phase_dw_uint8(uint8 messagePhases, uint phaseIndex, uint dwIndex); 382 383ulong __builtin_IB_get_message_phase_uq(uint messagePhases, uint phaseIndex, uint dwIndex); 384ulong __builtin_IB_get_message_phase_uq_uint2(uint2 messagePhases, uint phaseIndex, uint dwIndex); 385ulong __builtin_IB_get_message_phase_uq_uint4(uint4 messagePhases, uint phaseIndex, uint dwIndex); 386ulong __builtin_IB_get_message_phase_uq_uint8(uint8 messagePhases, uint phaseIndex, uint dwIndex); 387 388uint __builtin_IB_set_message_phase_dw(uint messagePhases, uint phaseIndex, uint dwIndex, uint val); 389uint2 __builtin_IB_set_message_phase_dw_uint2(uint2 messagePhases, uint phaseIndex, uint dwIndex, uint val); 390uint4 __builtin_IB_set_message_phase_dw_uint4(uint4 messagePhases, uint phaseIndex, uint dwIndex, uint val); 391uint8 __builtin_IB_set_message_phase_dw_uint8(uint8 messagePhases, uint phaseIndex, uint dwIndex, uint val); 392 393uint __builtin_IB_get_message_phase(uint messagePhases, uint phaseIndex); 394uint __builtin_IB_get_message_phase_uint2(uint2 messagePhases, uint phaseIndex); 395uint __builtin_IB_get_message_phase_uint4(uint4 messagePhases, uint phaseIndex); 396uint __builtin_IB_get_message_phase_uint8(uint8 messagePhases, uint phaseIndex); 397 398uint __builtin_IB_set_message_phase(uint messagePhases, uint phaseIndex, uint val); 399uint2 __builtin_IB_set_message_phase_uint2(uint2 messagePhases, uint phaseIndex, uint val); 400uint4 __builtin_IB_set_message_phase_uint4(uint4 messagePhases, uint phaseIndex, uint val); 401uint8 __builtin_IB_set_message_phase_uint8(uint8 messagePhases, uint phaseIndex, uint val); 402 403ushort __builtin_IB_get_message_phase_uw(uint messagePhases, uint phaseIndex, uint wIndex); 404ushort __builtin_IB_get_message_phase_uw_uint2(uint2 messagePhases, uint phaseIndex, uint wIndex); 405ushort __builtin_IB_get_message_phase_uw_uint4(uint4 messagePhases, uint phaseIndex, uint wIndex); 406ushort __builtin_IB_get_message_phase_uw_uint8(uint8 messagePhases, uint phaseIndex, uint wIndex); 407 408uint __builtin_IB_set_message_phase_uw(uint messagePhases, uint phaseIndex, uint dwIndex, ushort val); 409uint2 __builtin_IB_set_message_phase_uw_uint2(uint2 messagePhases, uint phaseIndex, uint dwIndex, ushort val); 410uint4 __builtin_IB_set_message_phase_uw_uint4(uint4 messagePhases, uint phaseIndex, uint dwIndex, ushort val); 411uint8 __builtin_IB_set_message_phase_uw_uint8(uint8 messagePhases, uint phaseIndex, uint dwIndex, ushort val); 412 413uchar __builtin_IB_get_message_phase_ub(uint messagePhases, uint phaseIndex, uint dwIndex); 414uchar __builtin_IB_get_message_phase_ub_uint2(uint2 messagePhases, uint phaseIndex, uint dwIndex); 415uchar __builtin_IB_get_message_phase_ub_uint4(uint4 messagePhases, uint phaseIndex, uint dwIndex); 416uchar __builtin_IB_get_message_phase_ub_uint8(uint8 messagePhases, uint phaseIndex, uint dwIndex); 417 418uint __builtin_IB_set_message_phase_ub(uint messagePhases, uint phaseIndex, uint dwIndex, uchar val); 419uint2 __builtin_IB_set_message_phase_ub_uint2(uint2 messagePhases, uint phaseIndex, uint dwIndex, uchar val); 420uint4 __builtin_IB_set_message_phase_ub_uint4(uint4 messagePhases, uint phaseIndex, uint dwIndex, uchar val); 421uint8 __builtin_IB_set_message_phase_ub_uint8(uint8 messagePhases, uint phaseIndex, uint dwIndex, uchar val); 422 423// Broadcast a phase value to all work-items in a sub-group 424uchar __builtin_IB_broadcast_message_phase_ub(uint messagePhases, uint phaseIndex, uint phaseSubindex, uint width); 425uchar __builtin_IB_broadcast_message_phase_ub_uint2(uint2 messagePhases, uint phaseIndex, uint phaseSubindex, uint width); 426uchar __builtin_IB_broadcast_message_phase_ub_uint4(uint4 messagePhases, uint phaseIndex, uint phaseSubindex, uint width); 427uchar __builtin_IB_broadcast_message_phase_ub_uint8(uint8 messagePhases, uint phaseIndex, uint phaseSubindex, uint width); 428 429ushort __builtin_IB_broadcast_message_phase_uw(uint messagePhases, uint phaseIndex, uint phaseSubindex, uint width); 430ushort __builtin_IB_broadcast_message_phase_uw_uint2(uint2 messagePhases, uint phaseIndex, uint phaseSubindex, uint width); 431ushort __builtin_IB_broadcast_message_phase_uw_uint4(uint4 messagePhases, uint phaseIndex, uint phaseSubindex, uint width); 432ushort __builtin_IB_broadcast_message_phase_uw_uint8(uint8 messagePhases, uint phaseIndex, uint phaseSubindex, uint width); 433 434uint __builtin_IB_broadcast_message_phase_dw(uint messagePhases, uint phaseIndex, uint phaseSubindex, uint width); 435uint __builtin_IB_broadcast_message_phase_dw_uint2(uint2 messagePhases, uint phaseIndex, uint phaseSubindex, uint width); 436uint __builtin_IB_broadcast_message_phase_dw_uint4(uint4 messagePhases, uint phaseIndex, uint phaseSubindex, uint width); 437uint __builtin_IB_broadcast_message_phase_dw_uint8(uint8 messagePhases, uint phaseIndex, uint phaseSubindex, uint width); 438 439ulong __builtin_IB_broadcast_message_phase_uq(uint messagePhases, uint phaseIndex, uint phaseSubindex, uint width); 440ulong __builtin_IB_broadcast_message_phase_uq_uint2(uint2 messagePhases, uint phaseIndex, uint phaseSubindex, uint width); 441ulong __builtin_IB_broadcast_message_phase_uq_uint4(uint4 messagePhases, uint phaseIndex, uint phaseSubindex, uint width); 442ulong __builtin_IB_broadcast_message_phase_uq_uint8(uint8 messagePhases, uint phaseIndex, uint phaseSubindex, uint width); 443 444// Copy the value phase(s) to all work-items in a sub-group 445ushort __builtin_IB_simd_get_message_phase_uw(uint messagePhases, uint phaseIndex, uint numPhases); 446ushort __builtin_IB_simd_get_message_phase_uw_uint2(uint2 messagePhases, uint phaseIndex, uint numPhases); 447ushort __builtin_IB_simd_get_message_phase_uw_uint4(uint4 messagePhases, uint phaseIndex, uint numPhases); 448ushort __builtin_IB_simd_get_message_phase_uw_uint8(uint8 messagePhases, uint phaseIndex, uint numPhases); 449 450ulong __builtin_IB_simd_get_message_phase_uq(uint messagePhases, uint phaseIndex, uint numPhases); 451ulong __builtin_IB_simd_get_message_phase_uq_uint2(uint2 messagePhases, uint phaseIndex, uint numPhases); 452ulong __builtin_IB_simd_get_message_phase_uq_uint4(uint4 messagePhases, uint phaseIndex, uint numPhases); 453ulong __builtin_IB_simd_get_message_phase_uq_uint8(uint8 messagePhases, uint phaseIndex, uint numPhases); 454 455uint __builtin_IB_simd_set_message_phase_ub(uint messagePhases, uint phaseIndex, uint numPhases, uint subReg, uint numLanes, uchar val); 456uint2 __builtin_IB_simd_set_message_phase_ub_uint2(uint2 messagePhases, uint phaseIndex, uint numPhases, uint subReg, uint numLanes, uchar val); 457uint4 __builtin_IB_simd_set_message_phase_ub_uint4(uint4 messagePhases, uint phaseIndex, uint numPhases, uint subReg, uint numLanes, uchar val); 458uint8 __builtin_IB_simd_set_message_phase_ub_uint8(uint8 messagePhases, uint phaseIndex, uint numPhases, uint subReg, uint numLanes, uchar val); 459 460uint __builtin_IB_simd_set_message_phase_uw(uint messagePhases, uint phaseIndex, uint numPhases, uint subReg, uint numLanes, ushort val); 461uint2 __builtin_IB_simd_set_message_phase_uw_uint2(uint2 messagePhases, uint phaseIndex, uint numPhases, uint subReg, uint numLanes, ushort val); 462uint4 __builtin_IB_simd_set_message_phase_uw_uint4(uint4 messagePhases, uint phaseIndex, uint numPhases, uint subReg, uint numLanes, ushort val); 463uint8 __builtin_IB_simd_set_message_phase_uw_uint8(uint8 messagePhases, uint phaseIndex, uint numPhases, uint subReg, uint numLanes, ushort val); 464 465uint __builtin_IB_simd_set_message_phase_dw(uint messagePhases, uint phaseIndex, uint numPhases, uint subReg, uint numLanes, uint val); 466uint2 __builtin_IB_simd_set_message_phase_dw_uint2(uint2 messagePhases, uint phaseIndex, uint numPhases, uint subReg, uint numLanes, uint val); 467uint4 __builtin_IB_simd_set_message_phase_dw_uint4(uint4 messagePhases, uint phaseIndex, uint numPhases, uint subReg, uint numLanes, uint val); 468uint8 __builtin_IB_simd_set_message_phase_dw_uint8(uint8 messagePhases, uint phaseIndex, uint numPhases, uint subReg, uint numLanes, uint val); 469 470uint __builtin_IB_simd_set_message_phase_uq(uint messagePhases, uint phaseIndex, uint numPhases, uint subReg, uint numLanes, ulong val); 471uint2 __builtin_IB_simd_set_message_phase_uq_uint2(uint2 messagePhases, uint phaseIndex, uint numPhases, uint subReg, uint numLanes, ulong val); 472uint4 __builtin_IB_simd_set_message_phase_uq_uint4(uint4 messagePhases, uint phaseIndex, uint numPhases, uint subReg, uint numLanes, ulong val); 473uint8 __builtin_IB_simd_set_message_phase_uq_uint8(uint8 messagePhases, uint phaseIndex, uint numPhases, uint subReg, uint numLanes, ulong val); 474 475void __builtin_IB_simdMediaRegionCopy(GRFHandle dst, uint dbyteoffset, uint dstride, uint dnumelem, 476 GRFHandle src, uint sbyteoffset, uint vstride, uint width, uint hstride, uint typesize, uint execsize, uint snumelem); 477 478void __builtin_IB_extract_mv_and_sad(GRFHandle MVMin, GRFHandle SADMin, GRFHandle result, uint blockType); 479void __builtin_IB_cmp_sads(GRFHandle MVCurr, GRFHandle SADCurr, GRFHandle MVMin, GRFHandle SADMin); 480 481// VME 482uint __builtin_IB_vme_mb_block_type() __attribute__((const)); 483uint __builtin_IB_vme_subpixel_mode() __attribute__((const)); 484uint __builtin_IB_vme_sad_adjust_mode() __attribute__((const)); 485uint __builtin_IB_vme_search_path_type() __attribute__((const)); 486void __builtin_IB_vme_send_ime(GRFHandle res, GRFHandle universalInputMsg, GRFHandle imeMsg, long srcImg, long refImg, uint ref0Coord, uint ref1Coord, uint costCenter); 487void __builtin_IB_vme_send_fbr(GRFHandle res, GRFHandle universalInputMsg, GRFHandle fbrMsg, long srcImg, long refImg, uint interMbMode, uint subMbShape, uint subMbPredMode); 488void __builtin_IB_vme_send_sic(GRFHandle res, GRFHandle universalInputMsg, GRFHandle sicMsg, long srcImg, long refImg0, long refImg1); 489 490uint4 __builtin_IB_vme_send_ime_new_uint4_uint8(uint8 inputMsg, long srcImg, long fwdRefImg, long bwdRefImg, long accelerator, uint streamMode); 491uint8 __builtin_IB_vme_send_ime_new_uint8_uint8(uint8 inputMsg, long srcImg, long fwdRefImg, long bwdRefImg, long accelerator, uint streamMode); 492uint4 __builtin_IB_vme_send_ime_new_uint4_uint4(uint4 inputMsg, long srcImg, long fwdRefImg, long bwdRefImg, long accelerator, uint streamMode); 493uint8 __builtin_IB_vme_send_ime_new_uint8_uint4(uint4 inputMsg, long srcImg, long fwdRefImg, long bwdRefImg, long accelerator, uint streamMode); 494 495uint4 __builtin_IB_vme_send_fbr_new(uint4 inputMsg, long srcImg, long fwdRefImg, long bwdRefImg, long accelerator); 496uint4 __builtin_IB_vme_send_sic_new(uint4 inputMsg, long srcImg, long fwdRefImg, long bwdRefImg, long accelerator); 497 498uint __builtin_IB_get_image_bti(uint img); 499 500// ballot intrinsic 501uint __builtin_IB_WaveBallot(bool p); 502 503// VA 504void __builtin_IB_va_erode_64x4( __local uchar* dst, float2 coords, int srcImgId, int i_accelerator ); 505void __builtin_IB_va_dilate_64x4( __local uchar* dst, float2 coords, int srcImgId, int i_accelerator ); 506void __builtin_IB_va_minmaxfilter_16x4_SLM( __local uchar* dst, float2 coords, int srcImgId, int i_accelerator ); 507void __builtin_IB_va_convolve_16x4_SLM( __local uchar* dst, float2 coords, int srcImgId, int i_accelerator ); 508void __builtin_IB_va_minmax( __local uchar* dst, float2 coords, int srcImgId, int i_accelerator ); 509void __builtin_IB_va_centroid( __local uchar* dst, float2 coords, int2 size, int srcImgId, int i_accelerator ); 510void __builtin_IB_va_boolcentroid( __local uchar* dst, float2 coords, int2 size, int srcImgId, int i_accelerator ); 511void __builtin_IB_va_boolsum( __local uchar* dst, float2 coords, int2 size, int srcImgId, int i_accelerator ); 512short4 __builtin_IB_va_convolve_16x4( float2 coords, int srcImgId, int i_accelerator ); 513 514// Device Enqueue 515__global void* __builtin_IB_get_default_device_queue(); 516__global void* __builtin_IB_get_event_pool(); 517uint __builtin_IB_get_max_workgroup_size(); 518uint __builtin_IB_get_parent_event(); 519uint __builtin_IB_get_prefered_workgroup_multiple(); 520 521// Generic Address Space 522__local void* __builtin_IB_to_local(void*); 523__private void* __builtin_IB_to_private(void*); 524 525// Internal program hint 526// facility for enforcing uniform property (@WIAnalysis) for 527// a local array residing in thread-private memory 528void __builtin_IB_assume_uniform(void*); 529 530// SubGroup Functions 531int __builtin_IB_get_simd_size( void ); 532int __builtin_IB_get_simd_id( void ); 533uint __builtin_IB_simd_shuffle( uint, uint ); 534bool __builtin_IB_simd_shuffle_b(bool, uint); 535uchar __builtin_IB_simd_shuffle_c( uchar, uint ); 536ushort __builtin_IB_simd_shuffle_us( ushort, uint ); 537float __builtin_IB_simd_shuffle_f( float, uint ); 538half __builtin_IB_simd_shuffle_h( half, uint ); 539double __builtin_IB_simd_shuffle_df(double, uint); 540uint __builtin_IB_simd_shuffle_down( uint, uint, uint ); 541ushort __builtin_IB_simd_shuffle_down_us( ushort, ushort, uint ); 542uchar __builtin_IB_simd_shuffle_down_uc( uchar, uchar, uint ); 543void __builtin_IB_sub_group_barrier(); 544 545// Block read : global address space 546uint __builtin_IB_simd_block_read_1_global( const __global uint* ); 547uint2 __builtin_IB_simd_block_read_2_global( const __global uint* ); 548uint4 __builtin_IB_simd_block_read_4_global( const __global uint* ); 549uint8 __builtin_IB_simd_block_read_8_global( const __global uint* ); 550 551ushort __builtin_IB_simd_block_read_1_global_h( const __global ushort* ); 552ushort2 __builtin_IB_simd_block_read_2_global_h( const __global ushort* ); 553ushort4 __builtin_IB_simd_block_read_4_global_h( const __global ushort* ); 554ushort8 __builtin_IB_simd_block_read_8_global_h( const __global ushort* ); 555ushort16 __builtin_IB_simd_block_read_16_global_h( const __global ushort* ); 556 557uchar __builtin_IB_simd_block_read_1_global_b( const __global uchar* ); 558uchar2 __builtin_IB_simd_block_read_2_global_b( const __global uchar* ); 559uchar4 __builtin_IB_simd_block_read_4_global_b( const __global uchar* ); 560uchar8 __builtin_IB_simd_block_read_8_global_b( const __global uchar* ); 561uchar16 __builtin_IB_simd_block_read_16_global_b( const __global uchar* ); 562 563ulong __builtin_IB_simd_block_read_1_global_l( const __global ulong* ); 564ulong2 __builtin_IB_simd_block_read_2_global_l( const __global ulong* ); 565ulong4 __builtin_IB_simd_block_read_4_global_l( const __global ulong* ); 566ulong8 __builtin_IB_simd_block_read_8_global_l( const __global ulong* ); 567 568void __builtin_IB_simd_block_write_1_global( __global uint*, uint ); 569void __builtin_IB_simd_block_write_2_global( __global uint*, uint2 ); 570void __builtin_IB_simd_block_write_4_global( __global uint*, uint4 ); 571void __builtin_IB_simd_block_write_8_global( __global uint*, uint8 ); 572 573void __builtin_IB_simd_block_write_1_global_h( __global ushort*, ushort ); 574void __builtin_IB_simd_block_write_2_global_h( __global ushort*, ushort2 ); 575void __builtin_IB_simd_block_write_4_global_h( __global ushort*, ushort4 ); 576void __builtin_IB_simd_block_write_8_global_h( __global ushort*, ushort8 ); 577void __builtin_IB_simd_block_write_16_global_h( __global ushort*, ushort16 ); 578 579void __builtin_IB_simd_block_write_1_global_b( __global uchar*, uchar ); 580void __builtin_IB_simd_block_write_2_global_b( __global uchar*, uchar2 ); 581void __builtin_IB_simd_block_write_4_global_b( __global uchar*, uchar4 ); 582void __builtin_IB_simd_block_write_8_global_b( __global uchar*, uchar8 ); 583void __builtin_IB_simd_block_write_16_global_b( __global uchar*, uchar16 ); 584 585void __builtin_IB_simd_block_write_1_global_l( __global ulong*, ulong ); 586void __builtin_IB_simd_block_write_2_global_l( __global ulong*, ulong2 ); 587void __builtin_IB_simd_block_write_4_global_l( __global ulong*, ulong4 ); 588void __builtin_IB_simd_block_write_8_global_l( __global ulong*, ulong8 ); 589 590// Block read : local address space 591uint __builtin_IB_simd_block_read_1_local( const __local uint* ); 592uint2 __builtin_IB_simd_block_read_2_local( const __local uint* ); 593uint4 __builtin_IB_simd_block_read_4_local( const __local uint* ); 594uint8 __builtin_IB_simd_block_read_8_local( const __local uint* ); 595 596ushort __builtin_IB_simd_block_read_1_local_h( const __local ushort* ); 597ushort2 __builtin_IB_simd_block_read_2_local_h( const __local ushort* ); 598ushort4 __builtin_IB_simd_block_read_4_local_h( const __local ushort* ); 599ushort8 __builtin_IB_simd_block_read_8_local_h( const __local ushort* ); 600ushort16 __builtin_IB_simd_block_read_16_local_h( const __local ushort* ); 601 602uchar __builtin_IB_simd_block_read_1_local_b( const __local uchar* ); 603uchar2 __builtin_IB_simd_block_read_2_local_b( const __local uchar* ); 604uchar4 __builtin_IB_simd_block_read_4_local_b( const __local uchar* ); 605uchar8 __builtin_IB_simd_block_read_8_local_b( const __local uchar* ); 606uchar16 __builtin_IB_simd_block_read_16_local_b( const __local uchar* ); 607 608ulong __builtin_IB_simd_block_read_1_local_l( const __local ulong* ); 609ulong2 __builtin_IB_simd_block_read_2_local_l( const __local ulong* ); 610ulong4 __builtin_IB_simd_block_read_4_local_l( const __local ulong* ); 611ulong8 __builtin_IB_simd_block_read_8_local_l( const __local ulong* ); 612 613void __builtin_IB_simd_block_write_1_local( __local uint*, uint ); 614void __builtin_IB_simd_block_write_2_local( __local uint*, uint2 ); 615void __builtin_IB_simd_block_write_4_local( __local uint*, uint4 ); 616void __builtin_IB_simd_block_write_8_local( __local uint*, uint8 ); 617 618void __builtin_IB_simd_block_write_1_local_h( __local ushort*, ushort ); 619void __builtin_IB_simd_block_write_2_local_h( __local ushort*, ushort2 ); 620void __builtin_IB_simd_block_write_4_local_h( __local ushort*, ushort4 ); 621void __builtin_IB_simd_block_write_8_local_h( __local ushort*, ushort8 ); 622void __builtin_IB_simd_block_write_16_local_h( __local ushort*, ushort16 ); 623 624void __builtin_IB_simd_block_write_1_local_b( __local uchar*, uchar ); 625void __builtin_IB_simd_block_write_2_local_b( __local uchar*, uchar2 ); 626void __builtin_IB_simd_block_write_4_local_b( __local uchar*, uchar4 ); 627void __builtin_IB_simd_block_write_8_local_b( __local uchar*, uchar8 ); 628void __builtin_IB_simd_block_write_16_local_b( __local uchar*, uchar16 ); 629 630void __builtin_IB_simd_block_write_1_local_l( __local ulong*, ulong ); 631void __builtin_IB_simd_block_write_2_local_l( __local ulong*, ulong2 ); 632void __builtin_IB_simd_block_write_4_local_l( __local ulong*, ulong4 ); 633void __builtin_IB_simd_block_write_8_local_l( __local ulong*, ulong8 ); 634 635uint __builtin_IB_simd_media_block_read_1( int, int2 ); 636uint2 __builtin_IB_simd_media_block_read_2( int, int2 ); 637uint4 __builtin_IB_simd_media_block_read_4( int, int2 ); 638uint8 __builtin_IB_simd_media_block_read_8( int, int2 ); 639 640ushort __builtin_IB_simd_media_block_read_1_h( int, int2 ); 641ushort2 __builtin_IB_simd_media_block_read_2_h( int, int2 ); 642ushort4 __builtin_IB_simd_media_block_read_4_h( int, int2 ); 643ushort8 __builtin_IB_simd_media_block_read_8_h( int, int2 ); 644 645uchar __builtin_IB_simd_media_block_read_1_b( int, int2 ); 646uchar2 __builtin_IB_simd_media_block_read_2_b( int, int2 ); 647uchar4 __builtin_IB_simd_media_block_read_4_b( int, int2 ); 648uchar8 __builtin_IB_simd_media_block_read_8_b( int, int2 ); 649uchar16 __builtin_IB_simd_media_block_read_16_b( int, int2 ); 650 651ulong __builtin_IB_simd_media_block_read_1_l( int, int2 ); 652ulong2 __builtin_IB_simd_media_block_read_2_l( int, int2 ); 653ulong4 __builtin_IB_simd_media_block_read_4_l( int, int2 ); 654ulong8 __builtin_IB_simd_media_block_read_8_l( int, int2 ); 655 656void __builtin_IB_media_block_rectangle_read( long image, int2 coords, int blockWidth, int blockHeight, GRFHandle destination ); 657 658void __builtin_IB_simd_media_block_write_1( int, int2, uint ); 659void __builtin_IB_simd_media_block_write_2( int, int2, uint2 ); 660void __builtin_IB_simd_media_block_write_4( int, int2, uint4 ); 661void __builtin_IB_simd_media_block_write_8( int, int2, uint8 ); 662 663void __builtin_IB_simd_media_block_write_1_h( int, int2, ushort ); 664void __builtin_IB_simd_media_block_write_2_h( int, int2, ushort2 ); 665void __builtin_IB_simd_media_block_write_4_h( int, int2, ushort4 ); 666void __builtin_IB_simd_media_block_write_8_h( int, int2, ushort8 ); 667 668void __builtin_IB_simd_media_block_write_1_b( int, int2, uchar ); 669void __builtin_IB_simd_media_block_write_2_b( int, int2, uchar2 ); 670void __builtin_IB_simd_media_block_write_4_b( int, int2, uchar4 ); 671void __builtin_IB_simd_media_block_write_8_b( int, int2, uchar8 ); 672void __builtin_IB_simd_media_block_write_16_b( int, int2, uchar16 ); 673 674void __builtin_IB_simd_media_block_write_1_l( int, int2, ulong ); 675void __builtin_IB_simd_media_block_write_2_l( int, int2, ulong2 ); 676void __builtin_IB_simd_media_block_write_4_l( int, int2, ulong4 ); 677void __builtin_IB_simd_media_block_write_8_l( int, int2, ulong8 ); 678 679uchar __builtin_IB_media_block_read_uchar(int image, int2 offset, int width, int height); 680uchar2 __builtin_IB_media_block_read_uchar2(int image, int2 offset, int width, int height); 681uchar4 __builtin_IB_media_block_read_uchar4(int image, int2 offset, int width, int height); 682uchar8 __builtin_IB_media_block_read_uchar8(int image, int2 offset, int width, int height); 683uchar16 __builtin_IB_media_block_read_uchar16(int image, int2 offset, int width, int height); 684 685ushort __builtin_IB_media_block_read_ushort(int image, int2 offset, int width, int height); 686ushort2 __builtin_IB_media_block_read_ushort2(int image, int2 offset, int width, int height); 687ushort4 __builtin_IB_media_block_read_ushort4(int image, int2 offset, int width, int height); 688ushort8 __builtin_IB_media_block_read_ushort8(int image, int2 offset, int width, int height); 689ushort16 __builtin_IB_media_block_read_ushort16(int image, int2 offset, int width, int height); 690 691uint __builtin_IB_media_block_read_uint(int image, int2 offset, int width, int height); 692uint2 __builtin_IB_media_block_read_uint2(int image, int2 offset, int width, int height); 693uint4 __builtin_IB_media_block_read_uint4(int image, int2 offset, int width, int height); 694uint8 __builtin_IB_media_block_read_uint8(int image, int2 offset, int width, int height); 695 696ulong __builtin_IB_media_block_read_ulong(int image, int2 offset, int width, int height); 697ulong2 __builtin_IB_media_block_read_ulong2(int image, int2 offset, int width, int height); 698ulong4 __builtin_IB_media_block_read_ulong4(int image, int2 offset, int width, int height); 699ulong8 __builtin_IB_media_block_read_ulong8(int image, int2 offset, int width, int height); 700 701void __builtin_IB_media_block_write_uchar(int image, int2 offset, int width, int height, uchar pixels); 702void __builtin_IB_media_block_write_uchar2(int image, int2 offset, int width, int height, uchar2 pixels); 703void __builtin_IB_media_block_write_uchar4(int image, int2 offset, int width, int height, uchar4 pixels); 704void __builtin_IB_media_block_write_uchar8(int image, int2 offset, int width, int height, uchar8 pixels); 705void __builtin_IB_media_block_write_uchar16(int image, int2 offset, int width, int height, uchar16 pixels); 706 707void __builtin_IB_media_block_write_ushort(int image, int2 offset, int width, int height, ushort pixels); 708void __builtin_IB_media_block_write_ushort2(int image, int2 offset, int width, int height, ushort2 pixels); 709void __builtin_IB_media_block_write_ushort4(int image, int2 offset, int width, int height, ushort4 pixels); 710void __builtin_IB_media_block_write_ushort8(int image, int2 offset, int width, int height, ushort8 pixels); 711void __builtin_IB_media_block_write_ushort16(int image, int2 offset, int width, int height, ushort16 pixels); 712 713void __builtin_IB_media_block_write_uint(int image, int2 offset, int width, int height, uint pixels); 714void __builtin_IB_media_block_write_uint2(int image, int2 offset, int width, int height, uint2 pixels); 715void __builtin_IB_media_block_write_uint4(int image, int2 offset, int width, int height, uint4 pixels); 716void __builtin_IB_media_block_write_uint8(int image, int2 offset, int width, int height, uint8 pixels); 717 718void __builtin_IB_media_block_write_ulong(int image, int2 offset, int width, int height, ulong pixels); 719void __builtin_IB_media_block_write_ulong2(int image, int2 offset, int width, int height, ulong2 pixels); 720void __builtin_IB_media_block_write_ulong4(int image, int2 offset, int width, int height, ulong4 pixels); 721void __builtin_IB_media_block_write_ulong8(int image, int2 offset, int width, int height, ulong8 pixels); 722 723int __builtin_IB_dp4a_ss(int c, int a, int b) __attribute__((const)); 724int __builtin_IB_dp4a_uu(int c, int a, int b) __attribute__((const)); 725int __builtin_IB_dp4a_su(int c, int a, int b) __attribute__((const)); 726int __builtin_IB_dp4a_us(int c, int a, int b) __attribute__((const)); 727 728#define DECL_SUB_GROUP_OPERATION(type, type_abbr, operation, group_type) \ 729type __builtin_IB_sub_group_##group_type##_##operation##_##type_abbr(type x) __attribute__((const)); 730 731#define DECL_SUB_GROUP_CLUSTERED_OPERATION(type, type_abbr, operation, group_type) \ 732type __builtin_IB_sub_group_clustered_##group_type##_##operation##_##type_abbr(type x, int cluster_size) __attribute__((const)); 733 734#define DECL_SUB_GROUP_ALL_GROUPS(type, type_abbr, operation) \ 735DECL_SUB_GROUP_OPERATION(type, type_abbr, operation, reduce) \ 736DECL_SUB_GROUP_OPERATION(type, type_abbr, operation, scan) \ 737DECL_SUB_GROUP_CLUSTERED_OPERATION(type, type_abbr, operation, reduce) 738 739// ARITHMETIC OPERATIONS 740// __builtin_IB_sub_group_reduce_IAdd/FAdd 741// __builtin_IB_sub_group_scan_IAdd/FAdd 742// __builtin_IB_sub_group_clustered_reduce_IAdd/FAdd 743// __builtin_IB_sub_group_reduce_IMul/FMul 744// __builtin_IB_sub_group_scan_IMul/FMul 745// __builtin_IB_sub_group_clustered_reduce_IMul/FMul 746#define DECL_SUB_GROUP_ADD_MUL(type, type_abbr, type_sign) \ 747DECL_SUB_GROUP_ALL_GROUPS(type, type_abbr, type_sign##Add) \ 748DECL_SUB_GROUP_ALL_GROUPS(type, type_abbr, type_sign##Mul) \ 749DECL_SUB_GROUP_ALL_GROUPS(type, type_abbr, type_sign##MulINTEL) 750 751DECL_SUB_GROUP_ADD_MUL(char, i8, I) 752DECL_SUB_GROUP_ADD_MUL(short, i16, I) 753DECL_SUB_GROUP_ADD_MUL(int, i32, I) 754DECL_SUB_GROUP_ADD_MUL(long, i64, I) 755DECL_SUB_GROUP_ADD_MUL(float, f32, F) 756#if defined(cl_khr_fp64) 757DECL_SUB_GROUP_ADD_MUL(double, f64, F) 758#endif // defined(cl_khr_fp64) 759#if defined(cl_khr_fp16) 760DECL_SUB_GROUP_ADD_MUL(half, f16, F) 761#endif // defined(cl_khr_fp16) 762 763// __builtin_IB_sub_group_reduce_SMin/UMin/FMin 764// __builtin_IB_sub_group_scan_SMin/UMin/FMin 765// __builtin_IB_sub_group_clustered_reduce_SMin/UMin/FMin 766// __builtin_IB_sub_group_reduce_SMax/UMax/FMax 767// __builtin_IB_sub_group_scan_SMax/UMax/FMax 768// __builtin_IB_sub_group_clustered_reduce_SMax/UMax/FMax 769#define DECL_SUB_GROUP_MIN_MAX(type, type_abbr, type_sign) \ 770DECL_SUB_GROUP_ALL_GROUPS(type, type_abbr, type_sign##Min) \ 771DECL_SUB_GROUP_ALL_GROUPS(type, type_abbr, type_sign##Max) 772 773DECL_SUB_GROUP_MIN_MAX(char, i8, S) 774DECL_SUB_GROUP_MIN_MAX(uchar, i8, U) 775DECL_SUB_GROUP_MIN_MAX(short, i16, S) 776DECL_SUB_GROUP_MIN_MAX(ushort, i16, U) 777DECL_SUB_GROUP_MIN_MAX(int, i32, S) 778DECL_SUB_GROUP_MIN_MAX(uint, i32, U) 779DECL_SUB_GROUP_MIN_MAX(long, i64, S) 780DECL_SUB_GROUP_MIN_MAX(ulong, i64, U) 781DECL_SUB_GROUP_MIN_MAX(float, f32, F) 782#if defined(cl_khr_fp64) 783DECL_SUB_GROUP_MIN_MAX(double, f64, F) 784#endif // defined(cl_khr_fp64) 785#if defined(cl_khr_fp16) 786DECL_SUB_GROUP_MIN_MAX(half, f16, F) 787#endif // defined(cl_khr_fp16) 788 789// BITWISE OPERATIONS 790// __builtin_IB_sub_group_reduce_BitwiseAnd/Or/Xor 791// __builtin_IB_sub_group_scan_BitwiseAnd/Or/Xor 792// __builtin_IB_sub_group_clustered_reduce_BitwiseAnd/Or/Xor 793#define DECL_BITWISE_OPERATIONS(type, type_abbr) \ 794DECL_SUB_GROUP_ALL_GROUPS(type, type_abbr, BitwiseAnd) \ 795DECL_SUB_GROUP_ALL_GROUPS(type, type_abbr, BitwiseOr) \ 796DECL_SUB_GROUP_ALL_GROUPS(type, type_abbr, BitwiseXor) \ 797DECL_SUB_GROUP_ALL_GROUPS(type, type_abbr, BitwiseAndINTEL) \ 798DECL_SUB_GROUP_ALL_GROUPS(type, type_abbr, BitwiseOrINTEL) \ 799DECL_SUB_GROUP_ALL_GROUPS(type, type_abbr, BitwiseXorINTEL) 800 801DECL_BITWISE_OPERATIONS(char, i8) 802DECL_BITWISE_OPERATIONS(short, i16) 803DECL_BITWISE_OPERATIONS(int, i32) 804DECL_BITWISE_OPERATIONS(long, i64) 805 806// LOGICAL OPERATIONS 807// __builtin_IB_sub_group_reduce_LogicalAnd/Or/Xor 808// __builtin_IB_sub_group_scan_LogicalAnd/Or/Xor 809// __builtin_IB_sub_group_clustered_reduce_LogicalAnd/Or/Xor 810#define DECL_LOGICAL_OPERATIONS(type, type_abbr) \ 811DECL_SUB_GROUP_ALL_GROUPS(type, type_abbr, LogicalAnd) \ 812DECL_SUB_GROUP_ALL_GROUPS(type, type_abbr, LogicalOr) \ 813DECL_SUB_GROUP_ALL_GROUPS(type, type_abbr, LogicalXor) \ 814DECL_SUB_GROUP_ALL_GROUPS(type, type_abbr, LogicalAndINTEL) \ 815DECL_SUB_GROUP_ALL_GROUPS(type, type_abbr, LogicalOrINTEL) \ 816DECL_SUB_GROUP_ALL_GROUPS(type, type_abbr, LogicalXorINTEL) 817 818DECL_LOGICAL_OPERATIONS(bool, i1) 819 820// The following mul/fma with rtz is used internally for int div/rem emulation 821// x * y, using round-to-zero 822double __builtin_IB_mul_rtz_f64(double x, double y) __attribute__((const)); 823float __builtin_IB_mul_rtz_f32(float x, float y) __attribute__((const)); 824// x + y, using round-to-zero 825double __builtin_IB_add_rtz_f64(double x, double y) __attribute__((const)); 826float __builtin_IB_add_rtz_f32(float x, float y) __attribute__((const)); 827// x * y + z, using round-to-zero 828double __builtin_IB_fma_rtz_f64(double x, double y, double z) __attribute__((const)); 829float __builtin_IB_fma_rtz_f32(float x, float y, float z) __attribute__((const)); 830// x * y + z, using round-to-positive-infinity 831double __builtin_IB_fma_rtp_f64(double x, double y, double z) __attribute__((const)); 832// x * y + z, using round-to-negative-infinity 833double __builtin_IB_fma_rtn_f64(double x, double y, double z) __attribute__((const)); 834 835 836 837#include "IGCBiF_Intrinsics_Dpas.cl" 838 839 840 841#endif // IGCBIF_INTRINSICS_CL 842