1 /* { dg-do compile } */
2 /* { dg-require-effective-target arm_neon_ok } */
3 /* { dg-require-effective-target arm_thumb2_ok } */
4 /* { dg-options "-O2 -mthumb" } */
5 /* { dg-add-options arm_neon } */
6 /* { dg-prune-output "switch .* conflicts with" } */
7 
8 #include <arm_neon.h>
9 #include <stddef.h>
10 
11 void *
memset(DST,C,LENGTH)12 memset (DST, C, LENGTH)
13      void *DST;
14      int C;
15      size_t LENGTH;
16 {
17   void* DST0 = DST;
18   unsigned char C_BYTE = C;
19 
20 
21   if (__builtin_expect(LENGTH < 4, 1)) {
22     size_t i = 0;
23     while (i < LENGTH) {
24       ((char*)DST)[i] = C_BYTE;
25       i++;
26     }
27     return DST;
28   }
29 
30   const char* DST_end = (char*)DST + LENGTH;
31 
32 
33   while ((uintptr_t)DST % 4 != 0) {
34     *(char*) (DST++) = C_BYTE;
35   }
36 
37 
38   uint32_t C_SHORTWORD = (uint32_t)(unsigned char)(C_BYTE) * 0x01010101;
39 
40 
41   if (__builtin_expect(DST_end - (char*)DST >= 16, 0)) {
42     while ((uintptr_t)DST % 16 != 0) {
43       *((uint32_t*)((char*)(DST) + (0))) = C_SHORTWORD;
44       DST += 4;
45     }
46 
47 
48     uint8x16_t C_WORD = vdupq_n_u8(C_BYTE);
49 
50 
51 
52 
53 
54     size_t i = 0;
55     LENGTH = DST_end - (char*)DST;
56     while (i + 16 * 16 <= LENGTH) {
57       *((uint8x16_t*)((char*)(DST) + (i))) = C_WORD;
58       *((uint8x16_t*)((char*)(DST) + (i + 16 * 1))) = C_WORD;
59       *((uint8x16_t*)((char*)(DST) + (i + 16 * 2))) = C_WORD;
60       *((uint8x16_t*)((char*)(DST) + (i + 16 * 3))) = C_WORD;
61       *((uint8x16_t*)((char*)(DST) + (i + 16 * 4))) = C_WORD;
62       *((uint8x16_t*)((char*)(DST) + (i + 16 * 5))) = C_WORD;
63       *((uint8x16_t*)((char*)(DST) + (i + 16 * 6))) = C_WORD;
64       *((uint8x16_t*)((char*)(DST) + (i + 16 * 7))) = C_WORD;
65       *((uint8x16_t*)((char*)(DST) + (i + 16 * 8))) = C_WORD;
66       *((uint8x16_t*)((char*)(DST) + (i + 16 * 9))) = C_WORD;
67       *((uint8x16_t*)((char*)(DST) + (i + 16 * 10))) = C_WORD;
68       *((uint8x16_t*)((char*)(DST) + (i + 16 * 11))) = C_WORD;
69       *((uint8x16_t*)((char*)(DST) + (i + 16 * 12))) = C_WORD;
70       *((uint8x16_t*)((char*)(DST) + (i + 16 * 13))) = C_WORD;
71       *((uint8x16_t*)((char*)(DST) + (i + 16 * 14))) = C_WORD;
72       *((uint8x16_t*)((char*)(DST) + (i + 16 * 15))) = C_WORD;
73       i += 16 * 16;
74     }
75     while (i + 16 * 4 <= LENGTH) {
76       *((uint8x16_t*)((char*)(DST) + (i))) = C_WORD;
77       *((uint8x16_t*)((char*)(DST) + (i + 16 * 1))) = C_WORD;
78       *((uint8x16_t*)((char*)(DST) + (i + 16 * 2))) = C_WORD;
79       *((uint8x16_t*)((char*)(DST) + (i + 16 * 3))) = C_WORD;
80       i += 16 * 4;
81     }
82     while (i + 16 <= LENGTH) {
83       *((uint8x16_t*)((char*)(DST) + (i))) = C_WORD;
84       i += 16;
85     }
86     DST += i;
87   }
88 
89   while (4 <= DST_end - (char*)DST) {
90     *((uint32_t*)((char*)(DST) + (0))) = C_SHORTWORD;
91     DST += 4;
92   }
93 
94 
95   while ((char*)DST < DST_end) {
96     *((char*)DST) = C_BYTE;
97     DST++;
98   }
99 
100   return DST0;
101 }
102