1 /* { dg-do run { target arm*-*-* } } */
2 /* { dg-require-effective-target arm_neon } */
3 /* { dg-options "-O2" } */
4 /* { dg-add-options arm_neon } */
5 
6 #include <arm_neon.h>
7 #include <stdlib.h>
8 
9 struct __attribute__ ((aligned(8))) _v16u8_ {
10   uint8x16_t val;
_v16u8__v16u8_11   _v16u8_( const int16x8_t &src) { val = vreinterpretq_u8_s16(src); }
int16x8_t_v16u8_12   operator int16x8_t () const { return vreinterpretq_s16_u8(val); }
13 };
14 typedef struct _v16u8_ v16u8;
15 
16 struct __attribute__ ((aligned(4))) _v8u8_ {
17   uint8x8_t val;
_v8u8__v8u8_18   _v8u8_( const uint8x8_t &src) { val = src; }
int16x4_t_v8u8_19   operator int16x4_t () const { return vreinterpret_s16_u8(val); }
20 };
21 typedef struct _v8u8_ v8u8;
22 
23 typedef v16u8                v8i16;
24 typedef int32x4_t            v4i32;
25 typedef const short         cv1i16;
26 typedef const unsigned char cv1u8;
27 typedef const v8i16         cv8i16;
28 
zero_64()29 static inline __attribute__((always_inline)) v8u8 zero_64(){ return vdup_n_u8( 0 ); }
30 
loadlo_8i16(cv8i16 * p)31 static inline __attribute__((always_inline)) v8i16 loadlo_8i16( cv8i16* p ){
32   return vcombine_s16( vld1_s16( (cv1i16 *)p ), zero_64() );
33 }
_loadlo_8i16(cv8i16 * p,int offset)34 static inline __attribute__((always_inline)) v8i16 _loadlo_8i16( cv8i16* p, int offset ){
35   return loadlo_8i16( (cv8i16*)(&((cv1u8*)p)[offset]) );
36 }
37 
38 void __attribute__((noinline))
test(unsigned short * _Inp,int32_t * _Out,unsigned int s1v,unsigned int dv0,unsigned int smask_v)39 test(unsigned short *_Inp, int32_t *_Out,
40      unsigned int s1v, unsigned int dv0,
41      unsigned int smask_v)
42 {
43   int32x4_t c = vdupq_n_s32(0);
44 
45   for(unsigned int sv=0 ; sv!=dv0 ; sv=(sv+s1v)&smask_v )
46     {
47       int32x4_t s;
48       s = vmovl_s16( vget_low_s16( _loadlo_8i16( (cv8i16*) _Inp, sv ) ) );
49       c = vaddq_s32( c, s );
50     }
51   vst1q_s32( _Out, c );
52 }
53 
54 int
main()55 main()
56 {
57   unsigned short a[4] = {1, 2, 3, 4};
58   int32_t b[4] = {0, 0, 0, 0};
59   test(a, b, 1, 1, ~0);
60   if (b[0] != 1 || b[1] != 2 || b[2] != 3 || b[3] != 4)
61     abort();
62   return 0;
63 }
64