1//===-- generic/lib/misc/shuffle.cl ------------------------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9#include <clc/clc.h> 10 11#define _CLC_ELEMENT_CASES2(VAR) \ 12 case 0: return VAR.s0; \ 13 case 1: return VAR.s1; 14 15#define _CLC_ELEMENT_CASES4(VAR) \ 16 _CLC_ELEMENT_CASES2(VAR) \ 17 case 2: return VAR.s2; \ 18 case 3: return VAR.s3; 19 20#define _CLC_ELEMENT_CASES8(VAR) \ 21 _CLC_ELEMENT_CASES4(VAR) \ 22 case 4: return VAR.s4; \ 23 case 5: return VAR.s5; \ 24 case 6: return VAR.s6; \ 25 case 7: return VAR.s7; 26 27#define _CLC_ELEMENT_CASES16(VAR) \ 28 _CLC_ELEMENT_CASES8(VAR) \ 29 case 8: return VAR.s8; \ 30 case 9: return VAR.s9; \ 31 case 10: return VAR.sA; \ 32 case 11: return VAR.sB; \ 33 case 12: return VAR.sC; \ 34 case 13: return VAR.sD; \ 35 case 14: return VAR.sE; \ 36 case 15: return VAR.sF; 37 38#define _CLC_GET_ELEMENT_DEFINE(ARGTYPE, ARGSIZE, IDXTYPE) \ 39 inline ARGTYPE __clc_get_el_##ARGTYPE##ARGSIZE##_##IDXTYPE(ARGTYPE##ARGSIZE x, IDXTYPE idx) {\ 40 switch (idx){ \ 41 _CLC_ELEMENT_CASES##ARGSIZE(x) \ 42 default: return 0; \ 43 } \ 44 } \ 45 46#define _CLC_SHUFFLE_SET_ONE_ELEMENT(ARGTYPE, ARGSIZE, INDEX, MASKTYPE) \ 47 ret_val.s##INDEX = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s##INDEX); \ 48 49#define _CLC_SHUFFLE_SET_2_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \ 50 ret_val.s0 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s0); \ 51 ret_val.s1 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s1); 52 53#define _CLC_SHUFFLE_SET_4_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \ 54 _CLC_SHUFFLE_SET_2_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \ 55 ret_val.s2 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s2); \ 56 ret_val.s3 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s3); 57 58#define _CLC_SHUFFLE_SET_8_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \ 59 _CLC_SHUFFLE_SET_4_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \ 60 ret_val.s4 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s4); \ 61 ret_val.s5 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s5); \ 62 ret_val.s6 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s6); \ 63 ret_val.s7 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s7); 64 65#define _CLC_SHUFFLE_SET_16_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \ 66 _CLC_SHUFFLE_SET_8_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \ 67 ret_val.s8 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s8); \ 68 ret_val.s9 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s9); \ 69 ret_val.sA = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sA); \ 70 ret_val.sB = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sB); \ 71 ret_val.sC = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sC); \ 72 ret_val.sD = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sD); \ 73 ret_val.sE = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sE); \ 74 ret_val.sF = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sF); \ 75 76#define _CLC_SHUFFLE_DEFINE2(ARGTYPE, ARGSIZE, MASKTYPE) \ 77_CLC_DEF _CLC_OVERLOAD ARGTYPE##2 shuffle(ARGTYPE##ARGSIZE x, MASKTYPE##2 mask){ \ 78 ARGTYPE##2 ret_val; \ 79 mask &= (MASKTYPE##2)(ARGSIZE-1); \ 80 _CLC_SHUFFLE_SET_2_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \ 81 return ret_val; \ 82} 83 84#define _CLC_SHUFFLE_DEFINE4(ARGTYPE, ARGSIZE, MASKTYPE) \ 85_CLC_DEF _CLC_OVERLOAD ARGTYPE##4 shuffle(ARGTYPE##ARGSIZE x, MASKTYPE##4 mask){ \ 86 ARGTYPE##4 ret_val; \ 87 mask &= (MASKTYPE##4)(ARGSIZE-1); \ 88 _CLC_SHUFFLE_SET_4_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \ 89 return ret_val; \ 90} 91 92#define _CLC_SHUFFLE_DEFINE8(ARGTYPE, ARGSIZE, MASKTYPE) \ 93_CLC_DEF _CLC_OVERLOAD ARGTYPE##8 shuffle(ARGTYPE##ARGSIZE x, MASKTYPE##8 mask){ \ 94 ARGTYPE##8 ret_val; \ 95 mask &= (MASKTYPE##8)(ARGSIZE-1); \ 96 _CLC_SHUFFLE_SET_8_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \ 97 return ret_val; \ 98} 99 100#define _CLC_SHUFFLE_DEFINE16(ARGTYPE, ARGSIZE, MASKTYPE) \ 101_CLC_DEF _CLC_OVERLOAD ARGTYPE##16 shuffle(ARGTYPE##ARGSIZE x, MASKTYPE##16 mask){ \ 102 ARGTYPE##16 ret_val; \ 103 mask &= (MASKTYPE##16)(ARGSIZE-1); \ 104 _CLC_SHUFFLE_SET_16_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \ 105 return ret_val; \ 106} 107 108#define _CLC_VECTOR_SHUFFLE_MASKSIZE(INTYPE, ARGSIZE, MASKTYPE) \ 109 _CLC_GET_ELEMENT_DEFINE(INTYPE, ARGSIZE, MASKTYPE) \ 110 _CLC_SHUFFLE_DEFINE2(INTYPE, ARGSIZE, MASKTYPE) \ 111 _CLC_SHUFFLE_DEFINE4(INTYPE, ARGSIZE, MASKTYPE) \ 112 _CLC_SHUFFLE_DEFINE8(INTYPE, ARGSIZE, MASKTYPE) \ 113 _CLC_SHUFFLE_DEFINE16(INTYPE, ARGSIZE, MASKTYPE) \ 114 115#define _CLC_VECTOR_SHUFFLE_INSIZE(TYPE, MASKTYPE) \ 116 _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 2, MASKTYPE) \ 117 _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 4, MASKTYPE) \ 118 _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 8, MASKTYPE) \ 119 _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 16, MASKTYPE) \ 120 121 122 123_CLC_VECTOR_SHUFFLE_INSIZE(char, uchar) 124_CLC_VECTOR_SHUFFLE_INSIZE(short, ushort) 125_CLC_VECTOR_SHUFFLE_INSIZE(int, uint) 126_CLC_VECTOR_SHUFFLE_INSIZE(long, ulong) 127_CLC_VECTOR_SHUFFLE_INSIZE(uchar, uchar) 128_CLC_VECTOR_SHUFFLE_INSIZE(ushort, ushort) 129_CLC_VECTOR_SHUFFLE_INSIZE(uint, uint) 130_CLC_VECTOR_SHUFFLE_INSIZE(ulong, ulong) 131_CLC_VECTOR_SHUFFLE_INSIZE(float, uint) 132#ifdef cl_khr_fp64 133#pragma OPENCL EXTENSION cl_khr_fp64 : enable 134_CLC_VECTOR_SHUFFLE_INSIZE(double, ulong) 135#endif 136#ifdef cl_khr_fp16 137#pragma OPENCL EXTENSION cl_khr_fp16 : enable 138_CLC_VECTOR_SHUFFLE_INSIZE(half, ushort) 139#endif 140 141#undef _CLC_ELEMENT_CASES2 142#undef _CLC_ELEMENT_CASES4 143#undef _CLC_ELEMENT_CASES8 144#undef _CLC_ELEMENT_CASES16 145#undef _CLC_GET_ELEMENT_DEFINE 146#undef _CLC_SHUFFLE_SET_ONE_ELEMENT 147#undef _CLC_SHUFFLE_SET_2_ELEMENTS 148#undef _CLC_SHUFFLE_SET_4_ELEMENTS 149#undef _CLC_SHUFFLE_SET_8_ELEMENTS 150#undef _CLC_SHUFFLE_SET_16_ELEMENTS 151#undef _CLC_SHUFFLE_DEFINE2 152#undef _CLC_SHUFFLE_DEFINE4 153#undef _CLC_SHUFFLE_DEFINE8 154#undef _CLC_SHUFFLE_DEFINE16 155#undef _CLC_VECTOR_SHUFFLE_MASKSIZE 156#undef _CLC_VECTOR_SHUFFLE_INSIZE 157