1/* 2 * memset - fill memory with a constant byte 3 * 4 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5 * See https://llvm.org/LICENSE.txt for license information. 6 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7 */ 8 9/* Assumptions: 10 * 11 * ARMv8-a, AArch64, Advanced SIMD, unaligned accesses. 12 * 13 */ 14 15#include "../asmdefs.h" 16 17#define dstin x0 18#define val x1 19#define valw w1 20#define count x2 21#define dst x3 22#define dstend x4 23#define zva_val x5 24 25ENTRY (__memset_aarch64) 26 27 dup v0.16B, valw 28 add dstend, dstin, count 29 30 cmp count, 96 31 b.hi L(set_long) 32 cmp count, 16 33 b.hs L(set_medium) 34 mov val, v0.D[0] 35 36 /* Set 0..15 bytes. */ 37 tbz count, 3, 1f 38 str val, [dstin] 39 str val, [dstend, -8] 40 ret 41 nop 421: tbz count, 2, 2f 43 str valw, [dstin] 44 str valw, [dstend, -4] 45 ret 462: cbz count, 3f 47 strb valw, [dstin] 48 tbz count, 1, 3f 49 strh valw, [dstend, -2] 503: ret 51 52 /* Set 17..96 bytes. */ 53L(set_medium): 54 str q0, [dstin] 55 tbnz count, 6, L(set96) 56 str q0, [dstend, -16] 57 tbz count, 5, 1f 58 str q0, [dstin, 16] 59 str q0, [dstend, -32] 601: ret 61 62 .p2align 4 63 /* Set 64..96 bytes. Write 64 bytes from the start and 64 32 bytes from the end. */ 65L(set96): 66 str q0, [dstin, 16] 67 stp q0, q0, [dstin, 32] 68 stp q0, q0, [dstend, -32] 69 ret 70 71 .p2align 4 72L(set_long): 73 and valw, valw, 255 74 bic dst, dstin, 15 75 str q0, [dstin] 76 cmp count, 160 77 ccmp valw, 0, 0, hs 78 b.ne L(no_zva) 79 80#ifndef SKIP_ZVA_CHECK 81 mrs zva_val, dczid_el0 82 and zva_val, zva_val, 31 83 cmp zva_val, 4 /* ZVA size is 64 bytes. */ 84 b.ne L(no_zva) 85#endif 86 str q0, [dst, 16] 87 stp q0, q0, [dst, 32] 88 bic dst, dst, 63 89 sub count, dstend, dst /* Count is now 64 too large. */ 90 sub count, count, 128 /* Adjust count and bias for loop. */ 91 92 .p2align 4 93L(zva_loop): 94 add dst, dst, 64 95 dc zva, dst 96 subs count, count, 64 97 b.hi L(zva_loop) 98 stp q0, q0, [dstend, -64] 99 stp q0, q0, [dstend, -32] 100 ret 101 102L(no_zva): 103 sub count, dstend, dst /* Count is 16 too large. */ 104 sub dst, dst, 16 /* Dst is biased by -32. */ 105 sub count, count, 64 + 16 /* Adjust count and bias for loop. */ 106L(no_zva_loop): 107 stp q0, q0, [dst, 32] 108 stp q0, q0, [dst, 64]! 109 subs count, count, 64 110 b.hi L(no_zva_loop) 111 stp q0, q0, [dstend, -64] 112 stp q0, q0, [dstend, -32] 113 ret 114 115END (__memset_aarch64) 116