1 /* { dg-do compile } */ 2 /* { dg-require-effective-target arm_neon_ok } */ 3 /* { dg-require-effective-target arm_thumb2_ok } */ 4 /* { dg-options "-O2 -mthumb" } */ 5 /* { dg-add-options arm_neon } */ 6 /* { dg-prune-output "switch .* conflicts with" } */ 7 8 #include <arm_neon.h> 9 #include <stddef.h> 10 11 void * memset(DST,C,LENGTH)12memset (DST, C, LENGTH) 13 void *DST; 14 int C; 15 size_t LENGTH; 16 { 17 void* DST0 = DST; 18 unsigned char C_BYTE = C; 19 20 21 if (__builtin_expect(LENGTH < 4, 1)) { 22 size_t i = 0; 23 while (i < LENGTH) { 24 ((char*)DST)[i] = C_BYTE; 25 i++; 26 } 27 return DST; 28 } 29 30 const char* DST_end = (char*)DST + LENGTH; 31 32 33 while ((uintptr_t)DST % 4 != 0) { 34 *(char*) (DST++) = C_BYTE; 35 } 36 37 38 uint32_t C_SHORTWORD = (uint32_t)(unsigned char)(C_BYTE) * 0x01010101; 39 40 41 if (__builtin_expect(DST_end - (char*)DST >= 16, 0)) { 42 while ((uintptr_t)DST % 16 != 0) { 43 *((uint32_t*)((char*)(DST) + (0))) = C_SHORTWORD; 44 DST += 4; 45 } 46 47 48 uint8x16_t C_WORD = vdupq_n_u8(C_BYTE); 49 50 51 52 53 54 size_t i = 0; 55 LENGTH = DST_end - (char*)DST; 56 while (i + 16 * 16 <= LENGTH) { 57 *((uint8x16_t*)((char*)(DST) + (i))) = C_WORD; 58 *((uint8x16_t*)((char*)(DST) + (i + 16 * 1))) = C_WORD; 59 *((uint8x16_t*)((char*)(DST) + (i + 16 * 2))) = C_WORD; 60 *((uint8x16_t*)((char*)(DST) + (i + 16 * 3))) = C_WORD; 61 *((uint8x16_t*)((char*)(DST) + (i + 16 * 4))) = C_WORD; 62 *((uint8x16_t*)((char*)(DST) + (i + 16 * 5))) = C_WORD; 63 *((uint8x16_t*)((char*)(DST) + (i + 16 * 6))) = C_WORD; 64 *((uint8x16_t*)((char*)(DST) + (i + 16 * 7))) = C_WORD; 65 *((uint8x16_t*)((char*)(DST) + (i + 16 * 8))) = C_WORD; 66 *((uint8x16_t*)((char*)(DST) + (i + 16 * 9))) = C_WORD; 67 *((uint8x16_t*)((char*)(DST) + (i + 16 * 10))) = C_WORD; 68 *((uint8x16_t*)((char*)(DST) + (i + 16 * 11))) = C_WORD; 69 *((uint8x16_t*)((char*)(DST) + (i + 16 * 12))) = C_WORD; 70 *((uint8x16_t*)((char*)(DST) + (i + 16 * 13))) = C_WORD; 71 *((uint8x16_t*)((char*)(DST) + (i + 16 * 14))) = C_WORD; 72 *((uint8x16_t*)((char*)(DST) + (i + 16 * 15))) = C_WORD; 73 i += 16 * 16; 74 } 75 while (i + 16 * 4 <= LENGTH) { 76 *((uint8x16_t*)((char*)(DST) + (i))) = C_WORD; 77 *((uint8x16_t*)((char*)(DST) + (i + 16 * 1))) = C_WORD; 78 *((uint8x16_t*)((char*)(DST) + (i + 16 * 2))) = C_WORD; 79 *((uint8x16_t*)((char*)(DST) + (i + 16 * 3))) = C_WORD; 80 i += 16 * 4; 81 } 82 while (i + 16 <= LENGTH) { 83 *((uint8x16_t*)((char*)(DST) + (i))) = C_WORD; 84 i += 16; 85 } 86 DST += i; 87 } 88 89 while (4 <= DST_end - (char*)DST) { 90 *((uint32_t*)((char*)(DST) + (0))) = C_SHORTWORD; 91 DST += 4; 92 } 93 94 95 while ((char*)DST < DST_end) { 96 *((char*)DST) = C_BYTE; 97 DST++; 98 } 99 100 return DST0; 101 } 102