1 /* { dg-do run } */
2 /* { dg-require-effective-target arm_neon } */
3 /* { dg-options "-O2" } */
4 /* { dg-add-options arm_neon } */
5
6 #include <arm_neon.h>
7 #include <stdlib.h>
8
9 struct __attribute__((aligned(16))) _v16u8_ {
10 uint8x16_t val;
_v16u8__v16u8_11 _v16u8_() { }
12
_v16u8__v16u8_13 _v16u8_( const uint8x16_t &src) { val = src; }
_v16u8__v16u8_14 _v16u8_( const int16x8_t &src) { val = vreinterpretq_u8_s16(src); }
_v16u8__v16u8_15 _v16u8_( const uint32x4_t &src) { val = vreinterpretq_u8_u32(src); }
16
uint8x16_t_v16u8_17 operator uint8x16_t () const { return val; }
int8x16_t_v16u8_18 operator int8x16_t () const { return vreinterpretq_s8_u8 (val); }
int16x8_t_v16u8_19 operator int16x8_t () const { return vreinterpretq_s16_u8(val); }
uint32x4_t_v16u8_20 operator uint32x4_t () const { return vreinterpretq_u32_u8(val); }
int32x4_t_v16u8_21 operator int32x4_t () const { return vreinterpretq_s32_u8(val); }
22 };
23 typedef struct _v16u8_ v16u8;
24 typedef const v16u8 cv16u8;
25
26 typedef v16u8 v16i8;
27 typedef v16u8 v8i16;
28 typedef v16u8 v4u32;
29
mergelo(const v16u8 & s,const v16u8 & t)30 inline v16u8 __attribute__((always_inline)) mergelo( const v16u8 & s, const v16u8 & t )
31 {
32 uint8x8x2_t r = vzip_u8( vget_low_u8(s), vget_low_u8(t) );
33 return vcombine_u8( r.val[0], r.val[1] );
34 }
35
unpacklo(const v16i8 & s)36 inline v8i16 __attribute__((always_inline)) unpacklo(const v16i8 & s)
37 {
38 return vmovl_s8( vget_low_s8( s ) );
39 }
40
41 const uint32_t __attribute__((aligned(16))) _InA [4] = { 0xFF020001, 0xFF020001, 0xFF000101, 0xFF000101 } ;
42 const uint32_t __attribute__((aligned(16))) _InB [4] = { 0xFF050002, 0xFF050002, 0xFF000303, 0xFF000203 } ;
43
test_func(void)44 __attribute__((noinline)) v16i8 test_func(void)
45 {
46 v16u8 A = vld1q_u8( (uint8_t*) _InA );
47 v16u8 B = vld1q_u8( (uint8_t*) _InB );
48 v8i16 r = vdupq_n_s16(2);
49
50 v16u8 _0 = mergelo( A, B );
51 v16u8 _1 = mergelo( B, A );
52
53 v16u8 _2 = mergelo( _0, _1 );
54 v16u8 _3 = mergelo( _1, _0 );
55
56 v8i16 _4 = vsubq_s16( unpacklo( _2 ), r );
57 v8i16 _5 = vsubq_s16( unpacklo( _3 ), r );
58
59 v8i16 ret = vaddq_s16( _4, _5 );
60
61 return ( ret );
62 }
63
main(int argc,char ** argv)64 int main (int argc, char **argv)
65 {
66 v16u8 val = test_func();
67
68 if (vgetq_lane_u32( val, 0 ) != 0xffffffff
69 || vgetq_lane_u32( val, 1 ) != 0xffffffff
70 || vgetq_lane_u32( val, 2 ) != 0xfffcfffc
71 || vgetq_lane_u32( val, 3 ) != 0xfffcfffc)
72 abort ();
73 exit (0);
74 }
75