1/* $NetBSD: pmap_subr.s,v 1.1 2021/07/10 20:22:37 thorpej Exp $ */ 2 3/*- 4 * Copyright (c) 2021 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32__KERNEL_RCSID(7, "$NetBSD: pmap_subr.s,v 1.1 2021/07/10 20:22:37 thorpej Exp $") 33 34/* 35 * Optimized pmap subroutines. 36 */ 37 38 .text 39inc7: .stabs __FILE__,132,0,0,inc7; .loc 1 __LINE__ 40 41/* 42 * pmap_zero_page: [ INTERFACE ] 43 * 44 * Zero the specified (machine independent) page by mapping the page 45 * into virtual memory and clear its contents, one machine dependent 46 * page at a time. 47 * 48 * Note: no locking is necessary in this function. 49 */ 50 .p2align 4 51LEAF(pmap_zero_page, 1) 52 /* No global references - skip LDGP() */ 53 54 /* 55 * Code here is arranged to keep branch targets on 16-byte 56 * boundaries, minimize result latencies in the loop, unroll 57 * the loop to at least 20 insns, and to dual-issue when 58 * feasible. 59 * 60 * In the setup, we use nop and unop to minimize pipline stalls 61 * on dependent instruction pairs. 62 */ 63 64 /* ---- */ 65 lda t0, -1 66 nop 67 sll t0, 42, t0 /* t0 = ALPHA_K0SEG_BASE */ 68 /* 69 * Loop counter: 70 * PAGE_SIZE / 8 bytes per store / 16 stores per iteration 71 */ 72 lda v0, ((ALPHA_PGBYTES / 8) / 16) 73 /* ---- */ 74 or a0, t0, a0 /* a0 = ALPHA_PHYS_TO_K0SEG(a0) */ 75 nop 76 addq a0, (8*8), a2 /* a2 = a0 + 8-quads */ 77 unop 78 /* ---- */ 791: stq zero, (0*8)(a0) /* 0 */ 80 stq zero, (1*8)(a0) /* 1 */ 81 stq zero, (2*8)(a0) /* 2 */ 82 stq zero, (3*8)(a0) /* 3 */ 83 /* ---- */ 84 stq zero, (4*8)(a0) /* 4 */ 85 stq zero, (5*8)(a0) /* 5 */ 86 stq zero, (6*8)(a0) /* 6 */ 87 stq zero, (7*8)(a0) /* 7 */ 88 /* ---- */ 89 addq a2, (8*8), a0 /* a0 = a2 + 8-quads */ 90 stq zero, (0*8)(a2) /* 8 */ 91 stq zero, (1*8)(a2) /* 9 */ 92 stq zero, (2*8)(a2) /* 10 */ 93 /* --- */ 94 subq v0, 1, v0 /* count-- */ 95 stq zero, (3*8)(a2) /* 11 */ 96 stq zero, (4*8)(a2) /* 12 */ 97 stq zero, (5*8)(a2) /* 13 */ 98 /* ---- */ 99 stq zero, (6*8)(a2) /* 14 */ 100 stq zero, (7*8)(a2) /* 15 */ 101 addq a0, (8*8), a2 /* a2 = a0 + 8-quads */ 102 bne v0, 1b /* loop around if count != 0 */ 103 /* ---- */ 104 105 RET 106 END(pmap_zero_page) 107 108/* 109 * pmap_copy_page: [ INTERFACE ] 110 * 111 * Copy the specified (machine independent) page by mapping the page 112 * into virtual memory and copying the page, one machine dependent 113 * page at a time. 114 * 115 * Note: no locking is necessary in this function. 116 */ 117 .p2align 4 118LEAF(pmap_copy_page, 2) 119 /* No global references - skip LDGP() */ 120 121 /* See above. */ 122 123 /* ---- */ 124 lda t0, -1 125 nop 126 sll t0, 42, t0 /* t0 = ALPHA_K0SEG_BASE */ 127 /* 128 * Loop counter: 129 * PAGE_SIZE / 8 bytes per store / 8 stores per iteration 130 */ 131 lda v0, ((ALPHA_PGBYTES / 8) / 8) 132 /* ---- */ 133 or a0, t0, a0 /* a0 = ALPHA_PHYS_TO_K0SEG(a0) */ 134 unop 135 or a1, t0, a1 /* a1 = ALPHA_PHYS_TO_K0SEG(a1) */ 136 unop 137 /* ---- */ 1381: ldq t0, (0*8)(a0) /* load 0 */ 139 ldq t1, (1*8)(a0) /* load 1 */ 140 ldq t2, (2*8)(a0) /* load 2 */ 141 ldq t3, (3*8)(a0) /* load 3 */ 142 /* ---- */ 143 ldq t4, (4*8)(a0) /* load 4 */ 144 ldq t5, (5*8)(a0) /* load 5 */ 145 ldq t6, (6*8)(a0) /* load 6 */ 146 ldq t7, (7*8)(a0) /* load 7 */ 147 /* ---- */ 148 addq a0, (8*8), a0 /* a0 = a0 + 8-quads */ 149 stq t0, (0*8)(a1) /* store 0 */ 150 stq t1, (1*8)(a1) /* store 1 */ 151 stq t2, (2*8)(a1) /* store 2 */ 152 /* ---- */ 153 subq v0, 1, v0 /* count-- */ 154 stq t3, (3*8)(a1) /* store 3 */ 155 stq t4, (4*8)(a1) /* store 4 */ 156 stq t5, (5*8)(a1) /* store 5 */ 157 /* ---- */ 158 stq t6, (6*8)(a1) /* store 6 */ 159 stq t7, (7*8)(a1) /* store 7 */ 160 addq a1, (8*8), a1 /* a1 = a1 + 8-quads */ 161 bne v0, 1b /* loop around if count != 0 */ 162 /* ---- */ 163 164 RET 165 END(pmap_copy_page) 166