1#define M64 67108864 2 3#define aB1 (1<<1) 4#define aB2 (1<<2) 5#define aB3 (1<<3) 6#define aB4 (1<<4) 7#define aB5 (1<<5) 8#define aB6 (1<<6) 9#define aB7 (1<<7) 10#define aB8 (1<<8) 11#define aB9 (1<<9) 12#define aB10 (1<<10) 13#define aB11 (1<<11) 14#define aB12 (1<<12) 15#define aB13 (1<<13) 16#define aB14 (1<<14) 17#define aB15 (1<<15) 18#define aB16 (1<<16) 19#define aB17 (1<<17) 20#define aB18 (1<<18) 21#define aB19 (1<<19) 22#define aB20 (1<<20) 23#define aB21 (1<<21) 24#define aB22 (1<<22) 25#define aB23 (1<<23) 26#define aB24 (1<<24) 27#define aB25 (1<<25) 28#define aB26 (1<<26) 29#define bB1 (-aB1) 30#define bB2 (-aB2) 31#define bB3 (-aB3) 32#define bB4 (-aB4) 33#define bB5 (-aB5) 34#define bB6 (-aB6) 35#define bB7 (-aB7) 36#define bB8 (-aB8) 37#define bB9 (-aB9) 38#define bB10 (-aB10) 39#define bB11 (-aB11) 40#define bB12 (-aB12) 41#define bB13 (-aB13) 42#define bB14 (-aB14) 43#define bB15 (-aB15) 44#define bB16 (-aB16) 45#define bB17 (-aB17) 46#define bB18 (-aB18) 47#define bB19 (-aB19) 48#define bB20 (-aB20) 49#define bB21 (-aB21) 50#define bB22 (-aB22) 51#define bB23 (-aB23) 52#define bB24 (-aB24) 53#define bB25 (-aB25) 54#define bB26 (-aB26) 55#define cB1 (aB1-1) 56#define cB2 (aB2-1) 57#define cB3 (aB3-1) 58#define cB4 (aB4-1) 59#define cB5 (aB5-1) 60#define cB6 (aB6-1) 61#define cB7 (aB7-1) 62#define cB8 (aB8-1) 63#define cB9 (aB9-1) 64#define cB10 (aB10-1) 65#define cB11 (aB11-1) 66#define cB12 (aB12-1) 67#define cB13 (aB13-1) 68#define cB14 (aB14-1) 69#define cB15 (aB15-1) 70#define cB16 (aB16-1) 71#define cB17 (aB17-1) 72#define cB18 (aB18-1) 73#define cB19 (aB19-1) 74#define cB20 (aB20-1) 75#define cB21 (aB21-1) 76#define cB22 (aB22-1) 77#define cB23 (aB23-1) 78#define cB24 (aB24-1) 79#define cB25 (aB25-1) 80#define cB26 (aB26-1) 81#define dB1 (-aB1+1) 82#define dB2 (-aB2+1) 83#define dB3 (-aB3+1) 84#define dB4 (-aB4+1) 85#define dB5 (-aB5+1) 86#define dB6 (-aB6+1) 87#define dB7 (-aB7+1) 88#define dB8 (-aB8+1) 89#define dB9 (-aB9+1) 90#define dB10 (-aB10+1) 91#define dB11 (-aB11+1) 92#define dB12 (-aB12+1) 93#define dB13 (-aB13+1) 94#define dB14 (-aB14+1) 95#define dB15 (-aB15+1) 96#define dB16 (-aB16+1) 97#define dB17 (-aB17+1) 98#define dB18 (-aB18+1) 99#define dB19 (-aB19+1) 100#define dB20 (-aB20+1) 101#define dB21 (-aB21+1) 102#define dB22 (-aB22+1) 103#define dB23 (-aB23+1) 104#define dB24 (-aB24+1) 105#define dB25 (-aB25+1) 106#define dB26 (-aB26+1) 107 108#define add(a, b) $(a + b) 109#define sub(a, b) $(a - b) 110#define rsb(a, b) $(b - a) 111#define mul(a, b) $(a * b) 112#define div(a, b) $(a / b) 113#define rem(a, b) $(a % b) 114#define and(a, b) $(a & b) 115#define or(a, b) $(a | b) 116#define xor(a, b) $(a ^ b) 117 118#define alu2(N, X, L, R, V) \ 119 movi %r1 L \ 120 movi %r2 R \ 121 N##r %r0 %r1 %r2 \ 122 beqi X %r0 V \ 123 calli @abort \ 124X: 125#define alu1(N, M) \ 126 alu2(N, N##M##1, 3, $(M##1), N(3, M##1)) \ 127 alu2(N, N##M##2, 3, $(M##2), N(3, M##2)) \ 128 alu2(N, N##M##3, 3, $(M##3), N(3, M##3)) \ 129 alu2(N, N##M##4, 3, $(M##4), N(3, M##4)) \ 130 alu2(N, N##M##5, 3, $(M##5), N(3, M##5)) \ 131 alu2(N, N##M##6, 3, $(M##6), N(3, M##6)) \ 132 alu2(N, N##M##7, 3, $(M##7), N(3, M##7)) \ 133 alu2(N, N##M##8, 3, $(M##8), N(3, M##8)) \ 134 alu2(N, N##M##9, 3, $(M##9), N(3, M##9)) \ 135 alu2(N, N##M##10, 3, $(M##10), N(3, M##10)) \ 136 alu2(N, N##M##11, 3, $(M##11), N(3, M##11)) \ 137 alu2(N, N##M##12, 3, $(M##12), N(3, M##12)) \ 138 alu2(N, N##M##13, 3, $(M##13), N(3, M##13)) \ 139 alu2(N, N##M##14, 3, $(M##14), N(3, M##14)) \ 140 alu2(N, N##M##15, 3, $(M##15), N(3, M##15)) \ 141 alu2(N, N##M##16, 3, $(M##16), N(3, M##16)) \ 142 alu2(N, N##M##17, 3, $(M##17), N(3, M##17)) \ 143 alu2(N, N##M##18, 3, $(M##18), N(3, M##18)) \ 144 alu2(N, N##M##19, 3, $(M##19), N(3, M##19)) \ 145 alu2(N, N##M##20, 3, $(M##20), N(3, M##20)) \ 146 alu2(N, N##M##21, 3, $(M##21), N(3, M##21)) \ 147 alu2(N, N##M##22, 3, $(M##22), N(3, M##22)) \ 148 alu2(N, N##M##23, 3, $(M##23), N(3, M##23)) \ 149 alu2(N, N##M##24, 3, $(M##24), N(3, M##24)) \ 150 alu2(N, N##M##25, 3, $(M##25), N(3, M##25)) \ 151 alu2(N, N##M##26, 3, $(M##26), N(3, M##26)) 152 153#define alu(N) \ 154 alu1(N, aB) \ 155 alu1(N, bB) \ 156 alu1(N, cB) \ 157 alu1(N, dB) 158 159#define _lsh(N) \ 160 alu2(lsh, L##N, 3, N, $(3<<N)) 161#define _rsh(N) \ 162 alu2(rsh, R##N, $(1<<63), N, $((1<<63)>>N)) 163 164#if __WORDSIZE == 32 165# define xsh64(X) /**/ 166#else 167# define xsh64(X) \ 168 _##X##sh(32) \ 169 _##X##sh(33) \ 170 _##X##sh(34) \ 171 _##X##sh(35) \ 172 _##X##sh(36) \ 173 _##X##sh(37) \ 174 _##X##sh(38) \ 175 _##X##sh(39) \ 176 _##X##sh(40) \ 177 _##X##sh(41) \ 178 _##X##sh(42) \ 179 _##X##sh(43) \ 180 _##X##sh(44) \ 181 _##X##sh(45) \ 182 _##X##sh(46) \ 183 _##X##sh(47) \ 184 _##X##sh(48) \ 185 _##X##sh(49) \ 186 _##X##sh(50) \ 187 _##X##sh(51) \ 188 _##X##sh(52) \ 189 _##X##sh(53) \ 190 _##X##sh(54) \ 191 _##X##sh(55) \ 192 _##X##sh(56) \ 193 _##X##sh(57) \ 194 _##X##sh(58) \ 195 _##X##sh(59) \ 196 _##X##sh(60) \ 197 _##X##sh(61) \ 198 _##X##sh(62) \ 199 _##X##sh(63) 200#endif 201 202#define xsh(X) \ 203 _##X##sh(0) \ 204 _##X##sh(1) \ 205 _##X##sh(2) \ 206 _##X##sh(3) \ 207 _##X##sh(4) \ 208 _##X##sh(5) \ 209 _##X##sh(6) \ 210 _##X##sh(7) \ 211 _##X##sh(8) \ 212 _##X##sh(9) \ 213 _##X##sh(10) \ 214 _##X##sh(11) \ 215 _##X##sh(12) \ 216 _##X##sh(13) \ 217 _##X##sh(14) \ 218 _##X##sh(15) \ 219 _##X##sh(16) \ 220 _##X##sh(17) \ 221 _##X##sh(18) \ 222 _##X##sh(19) \ 223 _##X##sh(20) \ 224 _##X##sh(21) \ 225 _##X##sh(22) \ 226 _##X##sh(23) \ 227 _##X##sh(24) \ 228 _##X##sh(25) \ 229 _##X##sh(26) \ 230 _##X##sh(27) \ 231 _##X##sh(28) \ 232 _##X##sh(29) \ 233 _##X##sh(30) \ 234 _##X##sh(31) \ 235 xsh64(X) 236 237#define lsh() \ 238 xsh(l) 239#define rsh() \ 240 xsh(r) 241 242#define reset(V) \ 243 prepare \ 244 pushargi buf \ 245 pushargi V \ 246 pushargi $(M64 + 8) \ 247 finishi @memset 248 249#define stx(T, N, O, V) \ 250 movi %r0 V \ 251 movi %r1 O \ 252 stxr##T %r1 %v0 %r0 253#define stx8(T, M, V) \ 254 stx(T, 3, $(M##B3), V) \ 255 stx(T, 4, $(M##B4), V) \ 256 stx(T, 5, $(M##B5), V) \ 257 stx(T, 6, $(M##B6), V) \ 258 stx(T, 7, $(M##B7), V) \ 259 stx(T, 8, $(M##B8), V) \ 260 stx(T, 9, $(M##B9), V) \ 261 stx(T, 10, $(M##B10), V) \ 262 stx(T, 11, $(M##B11), V) \ 263 stx(T, 12, $(M##B12), V) \ 264 stx(T, 13, $(M##B13), V) \ 265 stx(T, 14, $(M##B14), V) \ 266 stx(T, 15, $(M##B15), V) \ 267 stx(T, 16, $(M##B16), V) \ 268 stx(T, 17, $(M##B17), V) \ 269 stx(T, 18, $(M##B18), V) \ 270 stx(T, 19, $(M##B19), V) \ 271 stx(T, 20, $(M##B20), V) \ 272 stx(T, 21, $(M##B21), V) \ 273 stx(T, 22, $(M##B22), V) \ 274 stx(T, 23, $(M##B23), V) \ 275 stx(T, 24, $(M##B24), V) \ 276 stx(T, 25, $(M##B25), V) \ 277 stx(T, 26, $(M##B26), V) 278#define stx4(T, M, V) \ 279 stx(T, 2, $(M##B2), V) \ 280 stx8(T, M, V) 281#define stx2(T, M, V) \ 282 stx(T, 1, $(M##B1), V) \ 283 stx4(T, M, V) 284#define ldx(T, N, M, O, V) \ 285 movi %r0 0 \ 286 ldxi##T %r0 %v0 O \ 287 beqi ldx##T##N##M %r0 V \ 288 calli @abort \ 289ldx##T##N##M: 290#define ldx8(T, M, V) \ 291 ldx(T, 3, M, $(M##B3), V) \ 292 ldx(T, 4, M, $(M##B4), V) \ 293 ldx(T, 5, M, $(M##B5), V) \ 294 ldx(T, 6, M, $(M##B6), V) \ 295 ldx(T, 7, M, $(M##B7), V) \ 296 ldx(T, 8, M, $(M##B8), V) \ 297 ldx(T, 9, M, $(M##B9), V) \ 298 ldx(T, 10, M, $(M##B10), V) \ 299 ldx(T, 11, M, $(M##B11), V) \ 300 ldx(T, 12, M, $(M##B12), V) \ 301 ldx(T, 13, M, $(M##B13), V) \ 302 ldx(T, 14, M, $(M##B14), V) \ 303 ldx(T, 15, M, $(M##B15), V) \ 304 ldx(T, 16, M, $(M##B16), V) \ 305 ldx(T, 17, M, $(M##B17), V) \ 306 ldx(T, 18, M, $(M##B18), V) \ 307 ldx(T, 19, M, $(M##B19), V) \ 308 ldx(T, 20, M, $(M##B20), V) \ 309 ldx(T, 21, M, $(M##B21), V) \ 310 ldx(T, 22, M, $(M##B22), V) \ 311 ldx(T, 23, M, $(M##B23), V) \ 312 ldx(T, 24, M, $(M##B24), V) \ 313 ldx(T, 25, M, $(M##B25), V) \ 314 ldx(T, 26, M, $(M##B26), V) 315#define ldx4(T, M, V) \ 316 ldx(T, 2, M, $(M##B2), V) \ 317 ldx8(T, M, V) 318#define ldx2(T, M, V) \ 319 ldx(T, 1, M, $(M##B1), V) \ 320 ldx4(T, M, V) 321 322#define stf(T, N, O, V) \ 323 movi##T %f0 V \ 324 movi %r0 O \ 325 stxr##T %r0 %v0 %f0 326#define stf8(T, M, V) \ 327 stf(T, 3, $(M##B3), V) \ 328 stf(T, 4, $(M##B4), V) \ 329 stf(T, 5, $(M##B5), V) \ 330 stf(T, 6, $(M##B6), V) \ 331 stf(T, 7, $(M##B7), V) \ 332 stf(T, 8, $(M##B8), V) \ 333 stf(T, 9, $(M##B9), V) \ 334 stf(T, 10, $(M##B10), V) \ 335 stf(T, 11, $(M##B11), V) \ 336 stf(T, 12, $(M##B12), V) \ 337 stf(T, 13, $(M##B13), V) \ 338 stf(T, 14, $(M##B14), V) \ 339 stf(T, 15, $(M##B15), V) \ 340 stf(T, 16, $(M##B16), V) \ 341 stf(T, 17, $(M##B17), V) \ 342 stf(T, 18, $(M##B18), V) \ 343 stf(T, 19, $(M##B19), V) \ 344 stf(T, 20, $(M##B20), V) \ 345 stf(T, 21, $(M##B21), V) \ 346 stf(T, 22, $(M##B22), V) \ 347 stf(T, 23, $(M##B23), V) \ 348 stf(T, 24, $(M##B24), V) \ 349 stf(T, 25, $(M##B25), V) \ 350 stf(T, 26, $(M##B26), V) 351#define stf4(T, M, V) \ 352 stf(T, 2, $(M##B2), V) \ 353 stf8(T, M, V) 354#define ldf(T, N, M, O, V) \ 355 movi##T %f0 0 \ 356 ldxi##T %f0 %v0 O \ 357 beqi##T ldf##T##N##M %f0 V \ 358 calli @abort \ 359ldf##T##N##M: 360#define ldf8(T, M, V) \ 361 ldf(T, 3, M, $(M##B3), V) \ 362 ldf(T, 4, M, $(M##B4), V) \ 363 ldf(T, 5, M, $(M##B5), V) \ 364 ldf(T, 6, M, $(M##B6), V) \ 365 ldf(T, 7, M, $(M##B7), V) \ 366 ldf(T, 8, M, $(M##B8), V) \ 367 ldf(T, 9, M, $(M##B9), V) \ 368 ldf(T, 10, M, $(M##B10), V) \ 369 ldf(T, 11, M, $(M##B11), V) \ 370 ldf(T, 12, M, $(M##B12), V) \ 371 ldf(T, 13, M, $(M##B13), V) \ 372 ldf(T, 14, M, $(M##B14), V) \ 373 ldf(T, 15, M, $(M##B15), V) \ 374 ldf(T, 16, M, $(M##B16), V) \ 375 ldf(T, 17, M, $(M##B17), V) \ 376 ldf(T, 18, M, $(M##B18), V) \ 377 ldf(T, 19, M, $(M##B19), V) \ 378 ldf(T, 20, M, $(M##B20), V) \ 379 ldf(T, 21, M, $(M##B21), V) \ 380 ldf(T, 22, M, $(M##B22), V) \ 381 ldf(T, 23, M, $(M##B23), V) \ 382 ldf(T, 24, M, $(M##B24), V) \ 383 ldf(T, 25, M, $(M##B25), V) \ 384 ldf(T, 26, M, $(M##B26), V) 385#define ldf4(T, M, V) \ 386 ldf(T, 2, M, $(M##B2), V) \ 387 ldf8(T, M, V) 388 389#define ldst_c() \ 390 reset(0xa5) \ 391 movi %v0 buf \ 392 stx2(_c, a, 0x5a) \ 393 ldx2(_c, a, 0x5a) \ 394 reset(0xa5) \ 395 movi %v0 $(buf + M64) \ 396 stx2(_c, b, 0x5a) \ 397 ldx2(_c, b, 0x5a) 398#define ldst_uc() \ 399 reset(0xa5) \ 400 movi %v0 buf \ 401 stx2(_c, a, 0x5a) \ 402 ldx2(_uc, a, 0x5a) \ 403 movi %v0 $(buf + M64) \ 404 stx2(_c, b, 0x5a) \ 405 ldx2(_uc, b, 0x5a) 406#define ldst_s() \ 407 reset(0xa5) \ 408 movi %v0 buf \ 409 stx2(_s, a, 0x5a5a) \ 410 ldx2(_s, a, 0x5a5a) \ 411 reset(0xa5) \ 412 movi %v0 $(buf + M64) \ 413 stx2(_s, b, 0x5a5a) \ 414 ldx2(_s, b, 0x5a5a) 415#define ldst_us() \ 416 reset(0xa5) \ 417 movi %v0 buf \ 418 stx2(_s, a, 0x5a5a) \ 419 ldx2(_us, a, 0x5a5a) \ 420 reset(0xa5) \ 421 movi %v0 $(buf + M64) \ 422 stx2(_s, b, 0x5a5a) \ 423 ldx2(_us, b, 0x5a5a) 424#define ldst_i() \ 425 reset(0xa5) \ 426 movi %v0 buf \ 427 stx4(_i, a, 0x5a5a5a5a) \ 428 ldx4(_i, a, 0x5a5a5a5a) \ 429 reset(0xa5) \ 430 movi %v0 $(buf + M64) \ 431 stx4(_i, b, 0x5a5a5a5a) \ 432 ldx4(_i, b, 0x5a5a5a5a) 433#define ldst_ui() \ 434 reset(0xa5) \ 435 movi %v0 buf \ 436 stx4(_i, a, 0x5a5a5a5a) \ 437 ldx4(_ui, a, 0x5a5a5a5a) \ 438 reset(0xa5) \ 439 movi %v0 $(buf + M64) \ 440 stx4(_i, b, 0x5a5a5a5a) \ 441 ldx4(_ui, b, 0x5a5a5a5a) 442#define ldst_l() \ 443 reset(0xa5) \ 444 movi %v0 buf \ 445 stx8(_l, a, 0x5a5a5a5a5a5a5a5a) \ 446 ldx8(_l, a, 0x5a5a5a5a5a5a5a5a) \ 447 reset(0xa5) \ 448 movi %v0 $(buf + M64) \ 449 stx8(_l, b, 0x5a5a5a5a5a5a5a5a) \ 450 ldx8(_l, b, 0x5a5a5a5a5a5a5a5a) 451#define ldst_f() \ 452 reset(0xa5) \ 453 movi %v0 buf \ 454 stf4(_f, a, 0.5) \ 455 ldf4(_f, a, 0.5) \ 456 reset(0xa5) \ 457 movi %v0 $(buf + M64) \ 458 stf4(_f, b, 0.5) \ 459 ldf4(_f, b, 0.5) 460#define ldst_d() \ 461 reset(0xa5) \ 462 movi %v0 buf \ 463 stf8(_d, a, 0.5) \ 464 ldf8(_d, a, 0.5) \ 465 reset(0xa5) \ 466 movi %v0 $(buf + M64) \ 467 stf8(_d, b, 0.5) \ 468 ldf8(_d, b, 0.5) 469 470.data 67112960 471buf: 472.size M64 473.size 8 474ok: 475.c "ok" 476 477.code 478 prolog 479 480 alu(add) 481 alu(sub) 482 alu(rsb) 483 alu(mul) 484 alu(div) 485 alu(rem) 486 lsh() 487 rsh() 488 alu(and) 489 alu(or) 490 alu(xor) 491 ldst_c() 492 ldst_uc() 493 ldst_s() 494 ldst_us() 495 ldst_i() 496#if __WORDSIZE == 64 497 ldst_ui() 498 ldst_l() 499#endif 500 ldst_f() 501 ldst_d() 502 503 prepare 504 pushargi ok 505 finishi @puts 506 ret 507 epilog 508