1#ifdef __arm__
2/*
3 * Copyright (C) 2010 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 *      http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18/* Changes:
19 * 2010-08-11 Steve McIntyre <steve.mcintyre@arm.com>
20 *    Added small changes to the two functions to make them work on the
21 *    specified number of 16- or 32-bit values rather than the original
22 *    code which was specified as a count of bytes. More verbose comments
23 *    to aid future maintenance.
24 */
25
26    .text
27    .align
28
29    .global arm_memset32
30    .type   arm_memset32, %function
31
32/*
33 * Optimized memset functions for ARM.
34 *
35 * void arm_memset32(uint32_t* dst, uint32_t value, int count);
36 *
37 */
38arm_memset32:
39        .fnstart
40        push        {lr}
41
42        /* Multiply count by 4 - go from the number of 32-bit words to
43         * the number of bytes desired. */
44        mov         r2, r2, lsl #2
45
46.Lwork_32:
47        /* Set up registers ready for writing them out. */
48        mov         ip, r1
49        mov         lr, r1
50
51        /* Try to align the destination to a cache line. Assume 32
52         * byte (8 word) cache lines, it's the common case. */
53        rsb         r3, r0, #0
54        ands        r3, r3, #0x1C
55        beq         .Laligned32
56        cmp         r3, r2
57        andhi       r3, r2, #0x1C
58        sub         r2, r2, r3
59
60        /* (Optionally) write any unaligned leading bytes.
61         * (0-28 bytes, length in r3) */
62        movs        r3, r3, lsl #28
63        stmcsia     r0!, {r1, lr}
64        stmcsia     r0!, {r1, lr}
65        stmmiia     r0!, {r1, lr}
66        movs        r3, r3, lsl #2
67        strcs       r1, [r0], #4
68
69        /* Now quickly loop through the cache-aligned data. */
70.Laligned32:
71        mov         r3, r1
721:      subs        r2, r2, #32
73        stmhsia     r0!, {r1,r3,ip,lr}
74        stmhsia     r0!, {r1,r3,ip,lr}
75        bhs         1b
76        add         r2, r2, #32
77
78        /* (Optionally) store any remaining trailing bytes.
79         * (0-30 bytes, length in r2) */
80        movs        r2, r2, lsl #28
81        stmcsia     r0!, {r1,r3,ip,lr}
82        stmmiia     r0!, {r1,lr}
83        movs        r2, r2, lsl #2
84        strcs       r1, [r0], #4
85        strmih      lr, [r0], #2
86
87        pop         {pc}
88        .fnend
89#endif
90
91