1//===-- generic/lib/misc/shuffle.cl ------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include <clc/clc.h>
10
11#define _CLC_ELEMENT_CASES2(VAR) \
12    case 0: return VAR.s0; \
13    case 1: return VAR.s1;
14
15#define _CLC_ELEMENT_CASES4(VAR) \
16    _CLC_ELEMENT_CASES2(VAR) \
17    case 2: return VAR.s2; \
18    case 3: return VAR.s3;
19
20#define _CLC_ELEMENT_CASES8(VAR) \
21    _CLC_ELEMENT_CASES4(VAR) \
22    case 4: return VAR.s4; \
23    case 5: return VAR.s5; \
24    case 6: return VAR.s6; \
25    case 7: return VAR.s7;
26
27#define _CLC_ELEMENT_CASES16(VAR) \
28    _CLC_ELEMENT_CASES8(VAR) \
29    case 8: return VAR.s8; \
30    case 9: return VAR.s9; \
31    case 10: return VAR.sA; \
32    case 11: return VAR.sB; \
33    case 12: return VAR.sC; \
34    case 13: return VAR.sD; \
35    case 14: return VAR.sE; \
36    case 15: return VAR.sF;
37
38#define _CLC_GET_ELEMENT_DEFINE(ARGTYPE, ARGSIZE, IDXTYPE) \
39    inline ARGTYPE __clc_get_el_##ARGTYPE##ARGSIZE##_##IDXTYPE(ARGTYPE##ARGSIZE x, IDXTYPE idx) {\
40        switch (idx){ \
41            _CLC_ELEMENT_CASES##ARGSIZE(x) \
42            default: return 0; \
43        } \
44    } \
45
46#define _CLC_SHUFFLE_SET_ONE_ELEMENT(ARGTYPE, ARGSIZE, INDEX, MASKTYPE) \
47    ret_val.s##INDEX = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s##INDEX); \
48
49#define _CLC_SHUFFLE_SET_2_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
50    ret_val.s0 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s0); \
51    ret_val.s1 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s1);
52
53#define _CLC_SHUFFLE_SET_4_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
54    _CLC_SHUFFLE_SET_2_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
55    ret_val.s2 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s2); \
56    ret_val.s3 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s3);
57
58#define _CLC_SHUFFLE_SET_8_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
59    _CLC_SHUFFLE_SET_4_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
60    ret_val.s4 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s4); \
61    ret_val.s5 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s5); \
62    ret_val.s6 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s6); \
63    ret_val.s7 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s7);
64
65#define _CLC_SHUFFLE_SET_16_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
66    _CLC_SHUFFLE_SET_8_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
67    ret_val.s8 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s8); \
68    ret_val.s9 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s9); \
69    ret_val.sA = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sA); \
70    ret_val.sB = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sB); \
71    ret_val.sC = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sC); \
72    ret_val.sD = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sD); \
73    ret_val.sE = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sE); \
74    ret_val.sF = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sF); \
75
76#define _CLC_SHUFFLE_DEFINE2(ARGTYPE, ARGSIZE, MASKTYPE) \
77_CLC_DEF _CLC_OVERLOAD ARGTYPE##2 shuffle(ARGTYPE##ARGSIZE x, MASKTYPE##2 mask){ \
78    ARGTYPE##2 ret_val; \
79    mask &= (MASKTYPE##2)(ARGSIZE-1); \
80    _CLC_SHUFFLE_SET_2_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
81    return ret_val; \
82}
83
84#define _CLC_SHUFFLE_DEFINE4(ARGTYPE, ARGSIZE, MASKTYPE) \
85_CLC_DEF _CLC_OVERLOAD ARGTYPE##4 shuffle(ARGTYPE##ARGSIZE x, MASKTYPE##4 mask){ \
86    ARGTYPE##4 ret_val; \
87    mask &= (MASKTYPE##4)(ARGSIZE-1); \
88    _CLC_SHUFFLE_SET_4_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
89    return ret_val; \
90}
91
92#define _CLC_SHUFFLE_DEFINE8(ARGTYPE, ARGSIZE, MASKTYPE) \
93_CLC_DEF _CLC_OVERLOAD ARGTYPE##8 shuffle(ARGTYPE##ARGSIZE x, MASKTYPE##8 mask){ \
94    ARGTYPE##8 ret_val; \
95    mask &= (MASKTYPE##8)(ARGSIZE-1); \
96    _CLC_SHUFFLE_SET_8_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
97    return ret_val; \
98}
99
100#define _CLC_SHUFFLE_DEFINE16(ARGTYPE, ARGSIZE, MASKTYPE) \
101_CLC_DEF _CLC_OVERLOAD ARGTYPE##16 shuffle(ARGTYPE##ARGSIZE x, MASKTYPE##16 mask){ \
102    ARGTYPE##16 ret_val; \
103    mask &= (MASKTYPE##16)(ARGSIZE-1); \
104    _CLC_SHUFFLE_SET_16_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
105    return ret_val; \
106}
107
108#define _CLC_VECTOR_SHUFFLE_MASKSIZE(INTYPE, ARGSIZE, MASKTYPE) \
109  _CLC_GET_ELEMENT_DEFINE(INTYPE, ARGSIZE, MASKTYPE) \
110  _CLC_SHUFFLE_DEFINE2(INTYPE, ARGSIZE, MASKTYPE) \
111  _CLC_SHUFFLE_DEFINE4(INTYPE, ARGSIZE, MASKTYPE) \
112  _CLC_SHUFFLE_DEFINE8(INTYPE, ARGSIZE, MASKTYPE) \
113  _CLC_SHUFFLE_DEFINE16(INTYPE, ARGSIZE, MASKTYPE) \
114
115#define _CLC_VECTOR_SHUFFLE_INSIZE(TYPE, MASKTYPE) \
116  _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 2, MASKTYPE) \
117  _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 4, MASKTYPE) \
118  _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 8, MASKTYPE) \
119  _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 16, MASKTYPE) \
120
121
122
123_CLC_VECTOR_SHUFFLE_INSIZE(char, uchar)
124_CLC_VECTOR_SHUFFLE_INSIZE(short, ushort)
125_CLC_VECTOR_SHUFFLE_INSIZE(int, uint)
126_CLC_VECTOR_SHUFFLE_INSIZE(long, ulong)
127_CLC_VECTOR_SHUFFLE_INSIZE(uchar, uchar)
128_CLC_VECTOR_SHUFFLE_INSIZE(ushort, ushort)
129_CLC_VECTOR_SHUFFLE_INSIZE(uint, uint)
130_CLC_VECTOR_SHUFFLE_INSIZE(ulong, ulong)
131_CLC_VECTOR_SHUFFLE_INSIZE(float, uint)
132#ifdef cl_khr_fp64
133#pragma OPENCL EXTENSION cl_khr_fp64 : enable
134_CLC_VECTOR_SHUFFLE_INSIZE(double, ulong)
135#endif
136#ifdef cl_khr_fp16
137#pragma OPENCL EXTENSION cl_khr_fp16 : enable
138_CLC_VECTOR_SHUFFLE_INSIZE(half, ushort)
139#endif
140
141#undef _CLC_ELEMENT_CASES2
142#undef _CLC_ELEMENT_CASES4
143#undef _CLC_ELEMENT_CASES8
144#undef _CLC_ELEMENT_CASES16
145#undef _CLC_GET_ELEMENT_DEFINE
146#undef _CLC_SHUFFLE_SET_ONE_ELEMENT
147#undef _CLC_SHUFFLE_SET_2_ELEMENTS
148#undef _CLC_SHUFFLE_SET_4_ELEMENTS
149#undef _CLC_SHUFFLE_SET_8_ELEMENTS
150#undef _CLC_SHUFFLE_SET_16_ELEMENTS
151#undef _CLC_SHUFFLE_DEFINE2
152#undef _CLC_SHUFFLE_DEFINE4
153#undef _CLC_SHUFFLE_DEFINE8
154#undef _CLC_SHUFFLE_DEFINE16
155#undef _CLC_VECTOR_SHUFFLE_MASKSIZE
156#undef _CLC_VECTOR_SHUFFLE_INSIZE
157