1/* $NetBSD: blockio.S,v 1.5 2002/08/15 01:38:16 briggs Exp $ */ 2 3/* 4 * Copyright (c) 2001 Ben Harris. 5 * Copyright (c) 1994 Mark Brinicombe. 6 * Copyright (c) 1994 Brini. 7 * All rights reserved. 8 * 9 * This code is derived from software written for Brini by Mark Brinicombe 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by Brini. 22 * 4. The name of the company nor the name of the author may be used to 23 * endorse or promote products derived from this software without specific 24 * prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR IMPLIED 27 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 28 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 29 * IN NO EVENT SHALL BRINI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 30 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 31 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 32 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * RiscBSD kernel project 39 * 40 * blockio.S 41 * 42 * optimised block read/write from/to IO routines. 43 * 44 * Created : 08/10/94 45 * Modified : 22/01/99 -- R.Earnshaw 46 * Faster, and small tweaks for StrongARM 47 */ 48 49#include <machine/asm.h> 50 51RCSID("$NetBSD: blockio.S,v 1.5 2002/08/15 01:38:16 briggs Exp $") 52 53/* 54 * Read bytes from an I/O address into a block of memory 55 * 56 * r0 = address to read from (IO) 57 * r1 = address to write to (memory) 58 * r2 = length 59 */ 60 61/* This code will look very familiar if you've read _memcpy(). */ 62ENTRY(read_multi_1) 63 mov ip, sp 64 stmfd sp!, {fp, ip, lr, pc} 65 sub fp, ip, #4 66 subs r2, r2, #4 /* r2 = length - 4 */ 67 blt .Lrm1_l4 /* less than 4 bytes */ 68 ands r12, r1, #3 69 beq .Lrm1_main /* aligned destination */ 70 rsb r12, r12, #4 71 cmp r12, #2 72 ldrb r3, [r0] 73 strb r3, [r1], #1 74 ldrgeb r3, [r0] 75 strgeb r3, [r1], #1 76 ldrgtb r3, [r0] 77 strgtb r3, [r1], #1 78 subs r2, r2, r12 79 blt .Lrm1_l4 80.Lrm1_main: 81.Lrm1loop: 82 ldrb r3, [r0] 83 ldrb r12, [r0] 84 orr r3, r3, r12, lsl #8 85 ldrb r12, [r0] 86 orr r3, r3, r12, lsl #16 87 ldrb r12, [r0] 88 orr r3, r3, r12, lsl #24 89 str r3, [r1], #4 90 subs r2, r2, #4 91 bge .Lrm1loop 92.Lrm1_l4: 93 adds r2, r2, #4 /* r2 = length again */ 94 ldmeqdb fp, {fp, sp, pc} 95 moveq pc, r14 96 cmp r2, #2 97 ldrb r3, [r0] 98 strb r3, [r1], #1 99 ldrgeb r3, [r0] 100 strgeb r3, [r1], #1 101 ldrgtb r3, [r0] 102 strgtb r3, [r1], #1 103 ldmdb fp, {fp, sp, pc} 104 105/* 106 * Write bytes to an I/O address from a block of memory 107 * 108 * r0 = address to write to (IO) 109 * r1 = address to read from (memory) 110 * r2 = length 111 */ 112 113/* This code will look very familiar if you've read _memcpy(). */ 114ENTRY(write_multi_1) 115 mov ip, sp 116 stmfd sp!, {fp, ip, lr, pc} 117 sub fp, ip, #4 118 subs r2, r2, #4 /* r2 = length - 4 */ 119 blt .Lwm1_l4 /* less than 4 bytes */ 120 ands r12, r1, #3 121 beq .Lwm1_main /* aligned source */ 122 rsb r12, r12, #4 123 cmp r12, #2 124 ldrb r3, [r1], #1 125 strb r3, [r0] 126 ldrgeb r3, [r1], #1 127 strgeb r3, [r0] 128 ldrgtb r3, [r1], #1 129 strgtb r3, [r0] 130 subs r2, r2, r12 131 blt .Lwm1_l4 132.Lwm1_main: 133.Lwm1loop: 134 ldr r3, [r1], #4 135 strb r3, [r0] 136 mov r3, r3, lsr #8 137 strb r3, [r0] 138 mov r3, r3, lsr #8 139 strb r3, [r0] 140 mov r3, r3, lsr #8 141 strb r3, [r0] 142 subs r2, r2, #4 143 bge .Lwm1loop 144.Lwm1_l4: 145 adds r2, r2, #4 /* r2 = length again */ 146 ldmeqdb fp, {fp, sp, pc} 147 cmp r2, #2 148 ldrb r3, [r1], #1 149 strb r3, [r0] 150 ldrgeb r3, [r1], #1 151 strgeb r3, [r0] 152 ldrgtb r3, [r1], #1 153 strgtb r3, [r0] 154 ldmdb fp, {fp, sp, pc} 155 156/* 157 * Reads short ints (16 bits) from an I/O address into a block of memory 158 * 159 * r0 = address to read from (IO) 160 * r1 = address to write to (memory) 161 * r2 = length 162 */ 163 164ENTRY(insw) 165/* Make sure that we have a positive length */ 166 cmp r2, #0x00000000 167 movle pc, lr 168 169/* If the destination address and the size is word aligned, do it fast */ 170 171 tst r2, #0x00000001 172 tsteq r1, #0x00000003 173 beq .Lfastinsw 174 175/* Non aligned insw */ 176 177.Linswloop: 178 ldr r3, [r0] 179 subs r2, r2, #0x00000001 /* Loop test in load delay slot */ 180 strb r3, [r1], #0x0001 181 mov r3, r3, lsr #8 182 strb r3, [r1], #0x0001 183 bgt .Linswloop 184 185 mov pc, lr 186 187/* Word aligned insw */ 188 189.Lfastinsw: 190 191.Lfastinswloop: 192 ldr r3, [r0, #0x0002] /* take advantage of nonaligned 193 * word accesses */ 194 ldr ip, [r0] 195 mov r3, r3, lsr #16 /* Put the two shorts together */ 196 orr r3, r3, ip, lsl #16 197 str r3, [r1], #0x0004 /* Store */ 198 subs r2, r2, #0x00000002 /* Next */ 199 bgt .Lfastinswloop 200 201 mov pc, lr 202 203 204/* 205 * Writes short ints (16 bits) from a block of memory to an I/O address 206 * 207 * r0 = address to write to (IO) 208 * r1 = address to read from (memory) 209 * r2 = length 210 */ 211 212ENTRY(outsw) 213/* Make sure that we have a positive length */ 214 cmp r2, #0x00000000 215 movle pc, lr 216 217/* If the destination address and the size is word aligned, do it fast */ 218 219 tst r2, #0x00000001 220 tsteq r1, #0x00000003 221 beq .Lfastoutsw 222 223/* Non aligned outsw */ 224 225.Loutswloop: 226 ldrb r3, [r1], #0x0001 227 ldrb ip, [r1], #0x0001 228 subs r2, r2, #0x00000001 /* Loop test in load delay slot */ 229 orr r3, r3, ip, lsl #8 230 orr r3, r3, r3, lsl #16 231 str r3, [r0] 232 bgt .Loutswloop 233 234 mov pc, lr 235 236/* Word aligned outsw */ 237 238.Lfastoutsw: 239 240.Lfastoutswloop: 241 ldr r3, [r1], #0x0004 /* r3 = (H)(L) */ 242 subs r2, r2, #0x00000002 /* Loop test in load delay slot */ 243 244 eor ip, r3, r3, lsr #16 /* ip = (H)(H^L) */ 245 eor r3, r3, ip, lsl #16 /* r3 = (H^H^L)(L) = (L)(L) */ 246 eor ip, ip, r3, lsr #16 /* ip = (H)(H^L^L) = (H)(H) */ 247 248 str r3, [r0] 249 str ip, [r0] 250 251/* mov ip, r3, lsl #16 252 * orr ip, ip, ip, lsr #16 253 * str ip, [r0] 254 * 255 * mov ip, r3, lsr #16 256 * orr ip, ip, ip, lsl #16 257 * str ip, [r0] 258 */ 259 260 bgt .Lfastoutswloop 261 262 mov pc, lr 263 264/* 265 * reads short ints (16 bits) from an I/O address into a block of memory 266 * with a length garenteed to be a multiple of 16 bytes 267 * with a word aligned destination address 268 * 269 * r0 = address to read from (IO) 270 * r1 = address to write to (memory) 271 * r2 = length 272 */ 273 274ENTRY(insw16) 275/* Make sure that we have a positive length */ 276 cmp r2, #0x00000000 277 movle pc, lr 278 279/* If the destination address is word aligned and the size suitably 280 aligned, do it fast */ 281 282 tst r2, #0x00000007 283 tsteq r1, #0x00000003 284 285 bne _C_LABEL(insw) 286 287/* Word aligned insw */ 288 289 stmfd sp!, {r4,r5,lr} 290 291.Linsw16loop: 292 ldr r3, [r0, #0x0002] /* take advantage of nonaligned 293 * word accesses */ 294 ldr lr, [r0] 295 mov r3, r3, lsr #16 /* Put the two shorts together */ 296 orr r3, r3, lr, lsl #16 297 298 ldr r4, [r0, #0x0002] /* take advantage of nonaligned 299 * word accesses */ 300 ldr lr, [r0] 301 mov r4, r4, lsr #16 /* Put the two shorts together */ 302 orr r4, r4, lr, lsl #16 303 304 ldr r5, [r0, #0x0002] /* take advantage of nonaligned 305 * word accesses */ 306 ldr lr, [r0] 307 mov r5, r5, lsr #16 /* Put the two shorts together */ 308 orr r5, r5, lr, lsl #16 309 310 ldr ip, [r0, #0x0002] /* take advantage of nonaligned 311 * word accesses */ 312 ldr lr, [r0] 313 mov ip, ip, lsr #16 /* Put the two shorts together */ 314 orr ip, ip, lr, lsl #16 315 316 stmia r1!, {r3-r5,ip} 317 subs r2, r2, #0x00000008 /* Next */ 318 bgt .Linsw16loop 319 320 ldmfd sp!, {r4,r5,pc} /* Restore regs and go home */ 321 322 323/* 324 * Writes short ints (16 bits) from a block of memory to an I/O address 325 * 326 * r0 = address to write to (IO) 327 * r1 = address to read from (memory) 328 * r2 = length 329 */ 330 331ENTRY(outsw16) 332/* Make sure that we have a positive length */ 333 cmp r2, #0x00000000 334 movle pc, lr 335 336/* If the destination address is word aligned and the size suitably 337 aligned, do it fast */ 338 339 tst r2, #0x00000007 340 tsteq r1, #0x00000003 341 342 bne _C_LABEL(outsw) 343 344/* Word aligned outsw */ 345 346 stmfd sp!, {r4,r5,lr} 347 348.Loutsw16loop: 349 ldmia r1!, {r4,r5,ip,lr} 350 351 eor r3, r4, r4, lsl #16 /* r3 = (A^B)(B) */ 352 eor r4, r4, r3, lsr #16 /* r4 = (A)(B^A^B) = (A)(A) */ 353 eor r3, r3, r4, lsl #16 /* r3 = (A^B^A)(B) = (B)(B) */ 354 str r3, [r0] 355 str r4, [r0] 356 357/* mov r3, r4, lsl #16 358 * orr r3, r3, r3, lsr #16 359 * str r3, [r0] 360 * 361 * mov r3, r4, lsr #16 362 * orr r3, r3, r3, lsl #16 363 * str r3, [r0] 364 */ 365 366 eor r3, r5, r5, lsl #16 /* r3 = (A^B)(B) */ 367 eor r5, r5, r3, lsr #16 /* r4 = (A)(B^A^B) = (A)(A) */ 368 eor r3, r3, r5, lsl #16 /* r3 = (A^B^A)(B) = (B)(B) */ 369 str r3, [r0] 370 str r5, [r0] 371 372 eor r3, ip, ip, lsl #16 /* r3 = (A^B)(B) */ 373 eor ip, ip, r3, lsr #16 /* r4 = (A)(B^A^B) = (A)(A) */ 374 eor r3, r3, ip, lsl #16 /* r3 = (A^B^A)(B) = (B)(B) */ 375 str r3, [r0] 376 str ip, [r0] 377 378 eor r3, lr, lr, lsl #16 /* r3 = (A^B)(B) */ 379 eor lr, lr, r3, lsr #16 /* r4 = (A)(B^A^B) = (A)(A) */ 380 eor r3, r3, lr, lsl #16 /* r3 = (A^B^A)(B) = (B)(B) */ 381 str r3, [r0] 382 str lr, [r0] 383 384 subs r2, r2, #0x00000008 385 bgt .Loutsw16loop 386 387 ldmfd sp!, {r4,r5,pc} /* and go home */ 388 389/* 390 * reads short ints (16 bits) from an I/O address into a block of memory 391 * The I/O address is assumed to be mapped multiple times in a block of 392 * 8 words. 393 * The destination address should be word aligned. 394 * 395 * r0 = address to read from (IO) 396 * r1 = address to write to (memory) 397 * r2 = length 398 */ 399 400ENTRY(inswm8) 401/* Make sure that we have a positive length */ 402 cmp r2, #0x00000000 403 movle pc, lr 404 405/* If the destination address is word aligned and the size suitably 406 aligned, do it fast */ 407 408 tst r1, #0x00000003 409 410 bne _C_LABEL(insw) 411 412/* Word aligned insw */ 413 414 stmfd sp!, {r4-r9,lr} 415 416 mov lr, #0xff000000 417 orr lr, lr, #0x00ff0000 418 419.Linswm8_loop8: 420 cmp r2, #8 421 bcc .Linswm8_l8 422 423 ldmia r0, {r3-r9,ip} 424 425 bic r3, r3, lr 426 orr r3, r3, r4, lsl #16 427 bic r5, r5, lr 428 orr r4, r5, r6, lsl #16 429 bic r7, r7, lr 430 orr r5, r7, r8, lsl #16 431 bic r9, r9, lr 432 orr r6, r9, ip, lsl #16 433 434 stmia r1!, {r3-r6} 435 436 subs r2, r2, #0x00000008 /* Next */ 437 bne .Linswm8_loop8 438 beq .Linswm8_l1 439 440.Linswm8_l8: 441 cmp r2, #4 442 bcc .Linswm8_l4 443 444 ldmia r0, {r3-r6} 445 446 bic r3, r3, lr 447 orr r3, r3, r4, lsl #16 448 bic r5, r5, lr 449 orr r4, r5, r6, lsl #16 450 451 stmia r1!, {r3-r4} 452 453 subs r2, r2, #0x00000004 454 beq .Linswm8_l1 455 456.Linswm8_l4: 457 cmp r2, #2 458 bcc .Linswm8_l2 459 460 ldmia r0, {r3-r4} 461 462 bic r3, r3, lr 463 orr r3, r3, r4, lsl #16 464 str r3, [r1], #0x0004 465 466 subs r2, r2, #0x00000002 467 beq .Linswm8_l1 468 469.Linswm8_l2: 470 cmp r2, #1 471 bcc .Linswm8_l1 472 473 ldr r3, [r0] 474 subs r2, r2, #0x00000001 /* Test in load delay slot */ 475 /* XXX, why don't we use result? */ 476 477 strb r3, [r1], #0x0001 478 mov r3, r3, lsr #8 479 strb r3, [r1], #0x0001 480 481 482.Linswm8_l1: 483 ldmfd sp!, {r4-r9,pc} /* And go home */ 484 485/* 486 * write short ints (16 bits) to an I/O address from a block of memory 487 * The I/O address is assumed to be mapped multiple times in a block of 488 * 8 words. 489 * The source address should be word aligned. 490 * 491 * r0 = address to read to (IO) 492 * r1 = address to write from (memory) 493 * r2 = length 494 */ 495 496ENTRY(outswm8) 497/* Make sure that we have a positive length */ 498 cmp r2, #0x00000000 499 movle pc, lr 500 501/* If the destination address is word aligned and the size suitably 502 aligned, do it fast */ 503 504 tst r1, #0x00000003 505 506 bne _C_LABEL(outsw) 507 508/* Word aligned outsw */ 509 510 stmfd sp!, {r4-r8,lr} 511 512.Loutswm8_loop8: 513 cmp r2, #8 514 bcc .Loutswm8_l8 515 516 ldmia r1!, {r3,r5,r7,ip} 517 518 eor r4, r3, r3, lsr #16 /* r4 = (A)(A^B) */ 519 eor r3, r3, r4, lsl #16 /* r3 = (A^A^B)(B) = (B)(B) */ 520 eor r4, r4, r3, lsr #16 /* r4 = (A)(B^A^B) = (A)(A) */ 521 522 eor r6, r5, r5, lsr #16 /* r6 = (A)(A^B) */ 523 eor r5, r5, r6, lsl #16 /* r5 = (A^A^B)(B) = (B)(B) */ 524 eor r6, r6, r5, lsr #16 /* r6 = (A)(B^A^B) = (A)(A) */ 525 526 eor r8, r7, r7, lsr #16 /* r8 = (A)(A^B) */ 527 eor r7, r7, r8, lsl #16 /* r7 = (A^A^B)(B) = (B)(B) */ 528 eor r8, r8, r7, lsr #16 /* r8 = (A)(B^A^B) = (A)(A) */ 529 530 eor lr, ip, ip, lsr #16 /* lr = (A)(A^B) */ 531 eor ip, ip, lr, lsl #16 /* ip = (A^A^B)(B) = (B)(B) */ 532 eor lr, lr, ip, lsr #16 /* lr = (A)(B^A^B) = (A)(A) */ 533 534 stmia r0, {r3-r8,ip,lr} 535 536 subs r2, r2, #0x00000008 /* Next */ 537 bne .Loutswm8_loop8 538 beq .Loutswm8_l1 539 540.Loutswm8_l8: 541 cmp r2, #4 542 bcc .Loutswm8_l4 543 544 ldmia r1!, {r3-r4} 545 546 eor r6, r3, r3, lsr #16 /* r6 = (A)(A^B) */ 547 eor r5, r3, r6, lsl #16 /* r5 = (A^A^B)(B) = (B)(B) */ 548 eor r6, r6, r5, lsr #16 /* r6 = (A)(B^A^B) = (A)(A) */ 549 550 eor r8, r4, r4, lsr #16 /* r8 = (A)(A^B) */ 551 eor r7, r4, r8, lsl #16 /* r7 = (A^A^B)(B) = (B)(B) */ 552 eor r8, r8, r7, lsr #16 /* r8 = (A)(B^A^B) = (A)(A) */ 553 554 stmia r0, {r5-r8} 555 556 subs r2, r2, #0x00000004 557 beq .Loutswm8_l1 558 559.Loutswm8_l4: 560 cmp r2, #2 561 bcc .Loutswm8_l2 562 563 ldr r3, [r1], #0x0004 /* r3 = (A)(B) */ 564 subs r2, r2, #0x00000002 /* Done test in Load delay slot */ 565 566 eor r5, r3, r3, lsr #16 /* r5 = (A)(A^B)*/ 567 eor r4, r3, r5, lsl #16 /* r4 = (A^A^B)(B) = (B)(B) */ 568 eor r5, r5, r4, lsr #16 /* r5 = (A)(B^A^B) = (A)(A) */ 569 570 stmia r0, {r4, r5} 571 572 beq .Loutswm8_l1 573 574.Loutswm8_l2: 575 cmp r2, #1 576 bcc .Loutswm8_l1 577 578 ldrb r3, [r1], #0x0001 579 ldrb r4, [r1], #0x0001 580 subs r2, r2, #0x00000001 /* Done test in load delay slot */ 581 /* XXX This test isn't used? */ 582 orr r3, r3, r4, lsl #8 583 orr r3, r3, r3, lsl #16 584 str r3, [r0] 585 586.Loutswm8_l1: 587 ldmfd sp!, {r4-r8,pc} /* And go home */ 588