1#! /usr/bin/env perl 2# Copyright 2012-2016 The OpenSSL Project Authors. All Rights Reserved. 3# 4# Licensed under the OpenSSL license (the "License"). You may not use 5# this file except in compliance with the License. You can obtain a copy 6# in the file LICENSE in the source distribution or at 7# https://www.openssl.org/source/license.html 8 9 10# ==================================================================== 11# Written by David S. Miller and Andy Polyakov. 12# The module is licensed under 2-clause BSD 13# license. October 2012. All rights reserved. 14# ==================================================================== 15 16###################################################################### 17# Camellia for SPARC T4. 18# 19# As with AES below results [for aligned data] are virtually identical 20# to critical path lengths for 3-cycle instruction latency: 21# 22# 128-bit key 192/256- 23# CBC encrypt 4.14/4.21(*) 5.46/5.52 24# (*) numbers after slash are for 25# misaligned data. 26# 27# As with Intel AES-NI, question is if it's possible to improve 28# performance of parallelizable modes by interleaving round 29# instructions. In Camellia every instruction is dependent on 30# previous, which means that there is place for 2 additional ones 31# in between two dependent. Can we expect 3x performance improvement? 32# At least one can argue that it should be possible to break 2x 33# barrier... For some reason not even 2x appears to be possible: 34# 35# 128-bit key 192/256- 36# CBC decrypt 2.21/2.74 2.99/3.40 37# CTR 2.15/2.68(*) 2.93/3.34 38# (*) numbers after slash are for 39# misaligned data. 40# 41# This is for 2x interleave. But compared to 1x interleave CBC decrypt 42# improved by ... 0% for 128-bit key, and 11% for 192/256-bit one. 43# So that out-of-order execution logic can take non-interleaved code 44# to 1.87x, but can't take 2x interleaved one any further. There 45# surely is some explanation... As result 3x interleave was not even 46# attempted. Instead an effort was made to share specific modes 47# implementations with AES module (therefore sparct4_modes.pl). 48# 49# To anchor to something else, software C implementation processes 50# one byte in 38 cycles with 128-bit key on same processor. 51 52$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 53push(@INC,"${dir}","${dir}../../perlasm"); 54require "sparcv9_modes.pl"; 55 56$output = pop; 57open STDOUT,">$output"; 58 59$::evp=1; # if $evp is set to 0, script generates module with 60# Camellia_[en|de]crypt, Camellia_set_key and Camellia_cbc_encrypt 61# entry points. These are fully compatible with openssl/camellia.h. 62 63###################################################################### 64# single-round subroutines 65# 66{ 67my ($inp,$out,$key,$rounds,$tmp,$mask)=map("%o$_",(0..5)); 68 69$code=<<___; 70#include "sparc_arch.h" 71 72.text 73 74.globl cmll_t4_encrypt 75.align 32 76cmll_t4_encrypt: 77 andcc $inp, 7, %g1 ! is input aligned? 78 andn $inp, 7, $inp 79 80 ldx [$key + 0], %g4 81 ldx [$key + 8], %g5 82 83 ldx [$inp + 0], %o4 84 bz,pt %icc, 1f 85 ldx [$inp + 8], %o5 86 ldx [$inp + 16], $inp 87 sll %g1, 3, %g1 88 sub %g0, %g1, %o3 89 sllx %o4, %g1, %o4 90 sllx %o5, %g1, %g1 91 srlx %o5, %o3, %o5 92 srlx $inp, %o3, %o3 93 or %o5, %o4, %o4 94 or %o3, %g1, %o5 951: 96 ld [$key + 272], $rounds ! grandRounds, 3 or 4 97 ldd [$key + 16], %f12 98 ldd [$key + 24], %f14 99 xor %g4, %o4, %o4 100 xor %g5, %o5, %o5 101 ldd [$key + 32], %f16 102 ldd [$key + 40], %f18 103 movxtod %o4, %f0 104 movxtod %o5, %f2 105 ldd [$key + 48], %f20 106 ldd [$key + 56], %f22 107 sub $rounds, 1, $rounds 108 ldd [$key + 64], %f24 109 ldd [$key + 72], %f26 110 add $key, 80, $key 111 112.Lenc: 113 camellia_f %f12, %f2, %f0, %f2 114 ldd [$key + 0], %f12 115 sub $rounds,1,$rounds 116 camellia_f %f14, %f0, %f2, %f0 117 ldd [$key + 8], %f14 118 camellia_f %f16, %f2, %f0, %f2 119 ldd [$key + 16], %f16 120 camellia_f %f18, %f0, %f2, %f0 121 ldd [$key + 24], %f18 122 camellia_f %f20, %f2, %f0, %f2 123 ldd [$key + 32], %f20 124 camellia_f %f22, %f0, %f2, %f0 125 ldd [$key + 40], %f22 126 camellia_fl %f24, %f0, %f0 127 ldd [$key + 48], %f24 128 camellia_fli %f26, %f2, %f2 129 ldd [$key + 56], %f26 130 brnz,pt $rounds, .Lenc 131 add $key, 64, $key 132 133 andcc $out, 7, $tmp ! is output aligned? 134 camellia_f %f12, %f2, %f0, %f2 135 camellia_f %f14, %f0, %f2, %f0 136 camellia_f %f16, %f2, %f0, %f2 137 camellia_f %f18, %f0, %f2, %f0 138 camellia_f %f20, %f2, %f0, %f4 139 camellia_f %f22, %f0, %f4, %f2 140 fxor %f24, %f4, %f0 141 fxor %f26, %f2, %f2 142 143 bnz,pn %icc, 2f 144 nop 145 146 std %f0, [$out + 0] 147 retl 148 std %f2, [$out + 8] 149 1502: alignaddrl $out, %g0, $out 151 mov 0xff, $mask 152 srl $mask, $tmp, $mask 153 154 faligndata %f0, %f0, %f4 155 faligndata %f0, %f2, %f6 156 faligndata %f2, %f2, %f8 157 158 stda %f4, [$out + $mask]0xc0 ! partial store 159 std %f6, [$out + 8] 160 add $out, 16, $out 161 orn %g0, $mask, $mask 162 retl 163 stda %f8, [$out + $mask]0xc0 ! partial store 164.type cmll_t4_encrypt,#function 165.size cmll_t4_encrypt,.-cmll_t4_encrypt 166 167.globl cmll_t4_decrypt 168.align 32 169cmll_t4_decrypt: 170 ld [$key + 272], $rounds ! grandRounds, 3 or 4 171 andcc $inp, 7, %g1 ! is input aligned? 172 andn $inp, 7, $inp 173 174 sll $rounds, 6, $rounds 175 add $rounds, $key, $key 176 177 ldx [$inp + 0], %o4 178 bz,pt %icc, 1f 179 ldx [$inp + 8], %o5 180 ldx [$inp + 16], $inp 181 sll %g1, 3, %g1 182 sub %g0, %g1, %g4 183 sllx %o4, %g1, %o4 184 sllx %o5, %g1, %g1 185 srlx %o5, %g4, %o5 186 srlx $inp, %g4, %g4 187 or %o5, %o4, %o4 188 or %g4, %g1, %o5 1891: 190 ldx [$key + 0], %g4 191 ldx [$key + 8], %g5 192 ldd [$key - 8], %f12 193 ldd [$key - 16], %f14 194 xor %g4, %o4, %o4 195 xor %g5, %o5, %o5 196 ldd [$key - 24], %f16 197 ldd [$key - 32], %f18 198 movxtod %o4, %f0 199 movxtod %o5, %f2 200 ldd [$key - 40], %f20 201 ldd [$key - 48], %f22 202 sub $rounds, 64, $rounds 203 ldd [$key - 56], %f24 204 ldd [$key - 64], %f26 205 sub $key, 64, $key 206 207.Ldec: 208 camellia_f %f12, %f2, %f0, %f2 209 ldd [$key - 8], %f12 210 sub $rounds, 64, $rounds 211 camellia_f %f14, %f0, %f2, %f0 212 ldd [$key - 16], %f14 213 camellia_f %f16, %f2, %f0, %f2 214 ldd [$key - 24], %f16 215 camellia_f %f18, %f0, %f2, %f0 216 ldd [$key - 32], %f18 217 camellia_f %f20, %f2, %f0, %f2 218 ldd [$key - 40], %f20 219 camellia_f %f22, %f0, %f2, %f0 220 ldd [$key - 48], %f22 221 camellia_fl %f24, %f0, %f0 222 ldd [$key - 56], %f24 223 camellia_fli %f26, %f2, %f2 224 ldd [$key - 64], %f26 225 brnz,pt $rounds, .Ldec 226 sub $key, 64, $key 227 228 andcc $out, 7, $tmp ! is output aligned? 229 camellia_f %f12, %f2, %f0, %f2 230 camellia_f %f14, %f0, %f2, %f0 231 camellia_f %f16, %f2, %f0, %f2 232 camellia_f %f18, %f0, %f2, %f0 233 camellia_f %f20, %f2, %f0, %f4 234 camellia_f %f22, %f0, %f4, %f2 235 fxor %f26, %f4, %f0 236 fxor %f24, %f2, %f2 237 238 bnz,pn %icc, 2f 239 nop 240 241 std %f0, [$out + 0] 242 retl 243 std %f2, [$out + 8] 244 2452: alignaddrl $out, %g0, $out 246 mov 0xff, $mask 247 srl $mask, $tmp, $mask 248 249 faligndata %f0, %f0, %f4 250 faligndata %f0, %f2, %f6 251 faligndata %f2, %f2, %f8 252 253 stda %f4, [$out + $mask]0xc0 ! partial store 254 std %f6, [$out + 8] 255 add $out, 16, $out 256 orn %g0, $mask, $mask 257 retl 258 stda %f8, [$out + $mask]0xc0 ! partial store 259.type cmll_t4_decrypt,#function 260.size cmll_t4_decrypt,.-cmll_t4_decrypt 261___ 262} 263 264###################################################################### 265# key setup subroutines 266# 267{ 268sub ROTL128 { 269 my $rot = shift; 270 271 "srlx %o4, 64-$rot, %g4\n\t". 272 "sllx %o4, $rot, %o4\n\t". 273 "srlx %o5, 64-$rot, %g5\n\t". 274 "sllx %o5, $rot, %o5\n\t". 275 "or %o4, %g5, %o4\n\t". 276 "or %o5, %g4, %o5"; 277} 278 279my ($inp,$bits,$out,$tmp)=map("%o$_",(0..5)); 280$code.=<<___; 281.globl cmll_t4_set_key 282.align 32 283cmll_t4_set_key: 284 and $inp, 7, $tmp 285 alignaddr $inp, %g0, $inp 286 cmp $bits, 192 287 ldd [$inp + 0], %f0 288 bl,pt %icc,.L128 289 ldd [$inp + 8], %f2 290 291 be,pt %icc,.L192 292 ldd [$inp + 16], %f4 293 294 brz,pt $tmp, .L256aligned 295 ldd [$inp + 24], %f6 296 297 ldd [$inp + 32], %f8 298 faligndata %f0, %f2, %f0 299 faligndata %f2, %f4, %f2 300 faligndata %f4, %f6, %f4 301 b .L256aligned 302 faligndata %f6, %f8, %f6 303 304.align 16 305.L192: 306 brz,a,pt $tmp, .L256aligned 307 fnot2 %f4, %f6 308 309 ldd [$inp + 24], %f6 310 nop 311 faligndata %f0, %f2, %f0 312 faligndata %f2, %f4, %f2 313 faligndata %f4, %f6, %f4 314 fnot2 %f4, %f6 315 316.L256aligned: 317 std %f0, [$out + 0] ! k[0, 1] 318 fsrc2 %f0, %f28 319 std %f2, [$out + 8] ! k[2, 3] 320 fsrc2 %f2, %f30 321 fxor %f4, %f0, %f0 322 b .L128key 323 fxor %f6, %f2, %f2 324 325.align 16 326.L128: 327 brz,pt $tmp, .L128aligned 328 nop 329 330 ldd [$inp + 16], %f4 331 nop 332 faligndata %f0, %f2, %f0 333 faligndata %f2, %f4, %f2 334 335.L128aligned: 336 std %f0, [$out + 0] ! k[0, 1] 337 fsrc2 %f0, %f28 338 std %f2, [$out + 8] ! k[2, 3] 339 fsrc2 %f2, %f30 340 341.L128key: 342 mov %o7, %o5 3431: call .+8 344 add %o7, SIGMA-1b, %o4 345 mov %o5, %o7 346 347 ldd [%o4 + 0], %f16 348 ldd [%o4 + 8], %f18 349 ldd [%o4 + 16], %f20 350 ldd [%o4 + 24], %f22 351 352 camellia_f %f16, %f2, %f0, %f2 353 camellia_f %f18, %f0, %f2, %f0 354 fxor %f28, %f0, %f0 355 fxor %f30, %f2, %f2 356 camellia_f %f20, %f2, %f0, %f2 357 camellia_f %f22, %f0, %f2, %f0 358 359 bge,pn %icc, .L256key 360 nop 361 std %f0, [$out + 0x10] ! k[ 4, 5] 362 std %f2, [$out + 0x18] ! k[ 6, 7] 363 364 movdtox %f0, %o4 365 movdtox %f2, %o5 366 `&ROTL128(15)` 367 stx %o4, [$out + 0x30] ! k[12, 13] 368 stx %o5, [$out + 0x38] ! k[14, 15] 369 `&ROTL128(15)` 370 stx %o4, [$out + 0x40] ! k[16, 17] 371 stx %o5, [$out + 0x48] ! k[18, 19] 372 `&ROTL128(15)` 373 stx %o4, [$out + 0x60] ! k[24, 25] 374 `&ROTL128(15)` 375 stx %o4, [$out + 0x70] ! k[28, 29] 376 stx %o5, [$out + 0x78] ! k[30, 31] 377 `&ROTL128(34)` 378 stx %o4, [$out + 0xa0] ! k[40, 41] 379 stx %o5, [$out + 0xa8] ! k[42, 43] 380 `&ROTL128(17)` 381 stx %o4, [$out + 0xc0] ! k[48, 49] 382 stx %o5, [$out + 0xc8] ! k[50, 51] 383 384 movdtox %f28, %o4 ! k[ 0, 1] 385 movdtox %f30, %o5 ! k[ 2, 3] 386 `&ROTL128(15)` 387 stx %o4, [$out + 0x20] ! k[ 8, 9] 388 stx %o5, [$out + 0x28] ! k[10, 11] 389 `&ROTL128(30)` 390 stx %o4, [$out + 0x50] ! k[20, 21] 391 stx %o5, [$out + 0x58] ! k[22, 23] 392 `&ROTL128(15)` 393 stx %o5, [$out + 0x68] ! k[26, 27] 394 `&ROTL128(17)` 395 stx %o4, [$out + 0x80] ! k[32, 33] 396 stx %o5, [$out + 0x88] ! k[34, 35] 397 `&ROTL128(17)` 398 stx %o4, [$out + 0x90] ! k[36, 37] 399 stx %o5, [$out + 0x98] ! k[38, 39] 400 `&ROTL128(17)` 401 stx %o4, [$out + 0xb0] ! k[44, 45] 402 stx %o5, [$out + 0xb8] ! k[46, 47] 403 404 mov 3, $tmp 405 st $tmp, [$out + 0x110] 406 retl 407 xor %o0, %o0, %o0 408 409.align 16 410.L256key: 411 ldd [%o4 + 32], %f24 412 ldd [%o4 + 40], %f26 413 414 std %f0, [$out + 0x30] ! k[12, 13] 415 std %f2, [$out + 0x38] ! k[14, 15] 416 417 fxor %f4, %f0, %f0 418 fxor %f6, %f2, %f2 419 camellia_f %f24, %f2, %f0, %f2 420 camellia_f %f26, %f0, %f2, %f0 421 422 std %f0, [$out + 0x10] ! k[ 4, 5] 423 std %f2, [$out + 0x18] ! k[ 6, 7] 424 425 movdtox %f0, %o4 426 movdtox %f2, %o5 427 `&ROTL128(30)` 428 stx %o4, [$out + 0x50] ! k[20, 21] 429 stx %o5, [$out + 0x58] ! k[22, 23] 430 `&ROTL128(30)` 431 stx %o4, [$out + 0xa0] ! k[40, 41] 432 stx %o5, [$out + 0xa8] ! k[42, 43] 433 `&ROTL128(51)` 434 stx %o4, [$out + 0x100] ! k[64, 65] 435 stx %o5, [$out + 0x108] ! k[66, 67] 436 437 movdtox %f4, %o4 ! k[ 8, 9] 438 movdtox %f6, %o5 ! k[10, 11] 439 `&ROTL128(15)` 440 stx %o4, [$out + 0x20] ! k[ 8, 9] 441 stx %o5, [$out + 0x28] ! k[10, 11] 442 `&ROTL128(15)` 443 stx %o4, [$out + 0x40] ! k[16, 17] 444 stx %o5, [$out + 0x48] ! k[18, 19] 445 `&ROTL128(30)` 446 stx %o4, [$out + 0x90] ! k[36, 37] 447 stx %o5, [$out + 0x98] ! k[38, 39] 448 `&ROTL128(34)` 449 stx %o4, [$out + 0xd0] ! k[52, 53] 450 stx %o5, [$out + 0xd8] ! k[54, 55] 451 ldx [$out + 0x30], %o4 ! k[12, 13] 452 ldx [$out + 0x38], %o5 ! k[14, 15] 453 `&ROTL128(15)` 454 stx %o4, [$out + 0x30] ! k[12, 13] 455 stx %o5, [$out + 0x38] ! k[14, 15] 456 `&ROTL128(30)` 457 stx %o4, [$out + 0x70] ! k[28, 29] 458 stx %o5, [$out + 0x78] ! k[30, 31] 459 srlx %o4, 32, %g4 460 srlx %o5, 32, %g5 461 st %o4, [$out + 0xc0] ! k[48] 462 st %g5, [$out + 0xc4] ! k[49] 463 st %o5, [$out + 0xc8] ! k[50] 464 st %g4, [$out + 0xcc] ! k[51] 465 `&ROTL128(49)` 466 stx %o4, [$out + 0xe0] ! k[56, 57] 467 stx %o5, [$out + 0xe8] ! k[58, 59] 468 469 movdtox %f28, %o4 ! k[ 0, 1] 470 movdtox %f30, %o5 ! k[ 2, 3] 471 `&ROTL128(45)` 472 stx %o4, [$out + 0x60] ! k[24, 25] 473 stx %o5, [$out + 0x68] ! k[26, 27] 474 `&ROTL128(15)` 475 stx %o4, [$out + 0x80] ! k[32, 33] 476 stx %o5, [$out + 0x88] ! k[34, 35] 477 `&ROTL128(17)` 478 stx %o4, [$out + 0xb0] ! k[44, 45] 479 stx %o5, [$out + 0xb8] ! k[46, 47] 480 `&ROTL128(34)` 481 stx %o4, [$out + 0xf0] ! k[60, 61] 482 stx %o5, [$out + 0xf8] ! k[62, 63] 483 484 mov 4, $tmp 485 st $tmp, [$out + 0x110] 486 retl 487 xor %o0, %o0, %o0 488.type cmll_t4_set_key,#function 489.size cmll_t4_set_key,.-cmll_t4_set_key 490.align 32 491SIGMA: 492 .long 0xa09e667f, 0x3bcc908b, 0xb67ae858, 0x4caa73b2 493 .long 0xc6ef372f, 0xe94f82be, 0x54ff53a5, 0xf1d36f1c 494 .long 0x10e527fa, 0xde682d1d, 0xb05688c2, 0xb3e6c1fd 495.type SIGMA,#object 496.size SIGMA,.-SIGMA 497.asciz "Camellia for SPARC T4, David S. Miller, Andy Polyakov" 498___ 499} 500 501{{{ 502my ($inp,$out,$len,$key,$ivec,$enc)=map("%i$_",(0..5)); 503my ($ileft,$iright,$ooff,$omask,$ivoff)=map("%l$_",(1..7)); 504 505$code.=<<___; 506.align 32 507_cmll128_load_enckey: 508 ldx [$key + 0], %g4 509 ldx [$key + 8], %g5 510___ 511for ($i=2; $i<26;$i++) { # load key schedule 512 $code.=<<___; 513 ldd [$key + `8*$i`], %f`12+2*$i` 514___ 515} 516$code.=<<___; 517 retl 518 nop 519.type _cmll128_load_enckey,#function 520.size _cmll128_load_enckey,.-_cmll128_load_enckey 521_cmll256_load_enckey=_cmll128_load_enckey 522 523.align 32 524_cmll256_load_deckey: 525 ldd [$key + 64], %f62 526 ldd [$key + 72], %f60 527 b .Load_deckey 528 add $key, 64, $key 529_cmll128_load_deckey: 530 ldd [$key + 0], %f60 531 ldd [$key + 8], %f62 532.Load_deckey: 533___ 534for ($i=2; $i<24;$i++) { # load key schedule 535 $code.=<<___; 536 ldd [$key + `8*$i`], %f`62-2*$i` 537___ 538} 539$code.=<<___; 540 ldx [$key + 192], %g4 541 retl 542 ldx [$key + 200], %g5 543.type _cmll256_load_deckey,#function 544.size _cmll256_load_deckey,.-_cmll256_load_deckey 545 546.align 32 547_cmll128_encrypt_1x: 548___ 549for ($i=0; $i<3; $i++) { 550 $code.=<<___; 551 camellia_f %f`16+16*$i+0`, %f2, %f0, %f2 552 camellia_f %f`16+16*$i+2`, %f0, %f2, %f0 553 camellia_f %f`16+16*$i+4`, %f2, %f0, %f2 554 camellia_f %f`16+16*$i+6`, %f0, %f2, %f0 555___ 556$code.=<<___ if ($i<2); 557 camellia_f %f`16+16*$i+8`, %f2, %f0, %f2 558 camellia_f %f`16+16*$i+10`, %f0, %f2, %f0 559 camellia_fl %f`16+16*$i+12`, %f0, %f0 560 camellia_fli %f`16+16*$i+14`, %f2, %f2 561___ 562} 563$code.=<<___; 564 camellia_f %f56, %f2, %f0, %f4 565 camellia_f %f58, %f0, %f4, %f2 566 fxor %f60, %f4, %f0 567 retl 568 fxor %f62, %f2, %f2 569.type _cmll128_encrypt_1x,#function 570.size _cmll128_encrypt_1x,.-_cmll128_encrypt_1x 571_cmll128_decrypt_1x=_cmll128_encrypt_1x 572 573.align 32 574_cmll128_encrypt_2x: 575___ 576for ($i=0; $i<3; $i++) { 577 $code.=<<___; 578 camellia_f %f`16+16*$i+0`, %f2, %f0, %f2 579 camellia_f %f`16+16*$i+0`, %f6, %f4, %f6 580 camellia_f %f`16+16*$i+2`, %f0, %f2, %f0 581 camellia_f %f`16+16*$i+2`, %f4, %f6, %f4 582 camellia_f %f`16+16*$i+4`, %f2, %f0, %f2 583 camellia_f %f`16+16*$i+4`, %f6, %f4, %f6 584 camellia_f %f`16+16*$i+6`, %f0, %f2, %f0 585 camellia_f %f`16+16*$i+6`, %f4, %f6, %f4 586___ 587$code.=<<___ if ($i<2); 588 camellia_f %f`16+16*$i+8`, %f2, %f0, %f2 589 camellia_f %f`16+16*$i+8`, %f6, %f4, %f6 590 camellia_f %f`16+16*$i+10`, %f0, %f2, %f0 591 camellia_f %f`16+16*$i+10`, %f4, %f6, %f4 592 camellia_fl %f`16+16*$i+12`, %f0, %f0 593 camellia_fl %f`16+16*$i+12`, %f4, %f4 594 camellia_fli %f`16+16*$i+14`, %f2, %f2 595 camellia_fli %f`16+16*$i+14`, %f6, %f6 596___ 597} 598$code.=<<___; 599 camellia_f %f56, %f2, %f0, %f8 600 camellia_f %f56, %f6, %f4, %f10 601 camellia_f %f58, %f0, %f8, %f2 602 camellia_f %f58, %f4, %f10, %f6 603 fxor %f60, %f8, %f0 604 fxor %f60, %f10, %f4 605 fxor %f62, %f2, %f2 606 retl 607 fxor %f62, %f6, %f6 608.type _cmll128_encrypt_2x,#function 609.size _cmll128_encrypt_2x,.-_cmll128_encrypt_2x 610_cmll128_decrypt_2x=_cmll128_encrypt_2x 611 612.align 32 613_cmll256_encrypt_1x: 614 camellia_f %f16, %f2, %f0, %f2 615 camellia_f %f18, %f0, %f2, %f0 616 ldd [$key + 208], %f16 617 ldd [$key + 216], %f18 618 camellia_f %f20, %f2, %f0, %f2 619 camellia_f %f22, %f0, %f2, %f0 620 ldd [$key + 224], %f20 621 ldd [$key + 232], %f22 622 camellia_f %f24, %f2, %f0, %f2 623 camellia_f %f26, %f0, %f2, %f0 624 ldd [$key + 240], %f24 625 ldd [$key + 248], %f26 626 camellia_fl %f28, %f0, %f0 627 camellia_fli %f30, %f2, %f2 628 ldd [$key + 256], %f28 629 ldd [$key + 264], %f30 630___ 631for ($i=1; $i<3; $i++) { 632 $code.=<<___; 633 camellia_f %f`16+16*$i+0`, %f2, %f0, %f2 634 camellia_f %f`16+16*$i+2`, %f0, %f2, %f0 635 camellia_f %f`16+16*$i+4`, %f2, %f0, %f2 636 camellia_f %f`16+16*$i+6`, %f0, %f2, %f0 637 camellia_f %f`16+16*$i+8`, %f2, %f0, %f2 638 camellia_f %f`16+16*$i+10`, %f0, %f2, %f0 639 camellia_fl %f`16+16*$i+12`, %f0, %f0 640 camellia_fli %f`16+16*$i+14`, %f2, %f2 641___ 642} 643$code.=<<___; 644 camellia_f %f16, %f2, %f0, %f2 645 camellia_f %f18, %f0, %f2, %f0 646 ldd [$key + 16], %f16 647 ldd [$key + 24], %f18 648 camellia_f %f20, %f2, %f0, %f2 649 camellia_f %f22, %f0, %f2, %f0 650 ldd [$key + 32], %f20 651 ldd [$key + 40], %f22 652 camellia_f %f24, %f2, %f0, %f4 653 camellia_f %f26, %f0, %f4, %f2 654 ldd [$key + 48], %f24 655 ldd [$key + 56], %f26 656 fxor %f28, %f4, %f0 657 fxor %f30, %f2, %f2 658 ldd [$key + 64], %f28 659 retl 660 ldd [$key + 72], %f30 661.type _cmll256_encrypt_1x,#function 662.size _cmll256_encrypt_1x,.-_cmll256_encrypt_1x 663 664.align 32 665_cmll256_encrypt_2x: 666 camellia_f %f16, %f2, %f0, %f2 667 camellia_f %f16, %f6, %f4, %f6 668 camellia_f %f18, %f0, %f2, %f0 669 camellia_f %f18, %f4, %f6, %f4 670 ldd [$key + 208], %f16 671 ldd [$key + 216], %f18 672 camellia_f %f20, %f2, %f0, %f2 673 camellia_f %f20, %f6, %f4, %f6 674 camellia_f %f22, %f0, %f2, %f0 675 camellia_f %f22, %f4, %f6, %f4 676 ldd [$key + 224], %f20 677 ldd [$key + 232], %f22 678 camellia_f %f24, %f2, %f0, %f2 679 camellia_f %f24, %f6, %f4, %f6 680 camellia_f %f26, %f0, %f2, %f0 681 camellia_f %f26, %f4, %f6, %f4 682 ldd [$key + 240], %f24 683 ldd [$key + 248], %f26 684 camellia_fl %f28, %f0, %f0 685 camellia_fl %f28, %f4, %f4 686 camellia_fli %f30, %f2, %f2 687 camellia_fli %f30, %f6, %f6 688 ldd [$key + 256], %f28 689 ldd [$key + 264], %f30 690___ 691for ($i=1; $i<3; $i++) { 692 $code.=<<___; 693 camellia_f %f`16+16*$i+0`, %f2, %f0, %f2 694 camellia_f %f`16+16*$i+0`, %f6, %f4, %f6 695 camellia_f %f`16+16*$i+2`, %f0, %f2, %f0 696 camellia_f %f`16+16*$i+2`, %f4, %f6, %f4 697 camellia_f %f`16+16*$i+4`, %f2, %f0, %f2 698 camellia_f %f`16+16*$i+4`, %f6, %f4, %f6 699 camellia_f %f`16+16*$i+6`, %f0, %f2, %f0 700 camellia_f %f`16+16*$i+6`, %f4, %f6, %f4 701 camellia_f %f`16+16*$i+8`, %f2, %f0, %f2 702 camellia_f %f`16+16*$i+8`, %f6, %f4, %f6 703 camellia_f %f`16+16*$i+10`, %f0, %f2, %f0 704 camellia_f %f`16+16*$i+10`, %f4, %f6, %f4 705 camellia_fl %f`16+16*$i+12`, %f0, %f0 706 camellia_fl %f`16+16*$i+12`, %f4, %f4 707 camellia_fli %f`16+16*$i+14`, %f2, %f2 708 camellia_fli %f`16+16*$i+14`, %f6, %f6 709___ 710} 711$code.=<<___; 712 camellia_f %f16, %f2, %f0, %f2 713 camellia_f %f16, %f6, %f4, %f6 714 camellia_f %f18, %f0, %f2, %f0 715 camellia_f %f18, %f4, %f6, %f4 716 ldd [$key + 16], %f16 717 ldd [$key + 24], %f18 718 camellia_f %f20, %f2, %f0, %f2 719 camellia_f %f20, %f6, %f4, %f6 720 camellia_f %f22, %f0, %f2, %f0 721 camellia_f %f22, %f4, %f6, %f4 722 ldd [$key + 32], %f20 723 ldd [$key + 40], %f22 724 camellia_f %f24, %f2, %f0, %f8 725 camellia_f %f24, %f6, %f4, %f10 726 camellia_f %f26, %f0, %f8, %f2 727 camellia_f %f26, %f4, %f10, %f6 728 ldd [$key + 48], %f24 729 ldd [$key + 56], %f26 730 fxor %f28, %f8, %f0 731 fxor %f28, %f10, %f4 732 fxor %f30, %f2, %f2 733 fxor %f30, %f6, %f6 734 ldd [$key + 64], %f28 735 retl 736 ldd [$key + 72], %f30 737.type _cmll256_encrypt_2x,#function 738.size _cmll256_encrypt_2x,.-_cmll256_encrypt_2x 739 740.align 32 741_cmll256_decrypt_1x: 742 camellia_f %f16, %f2, %f0, %f2 743 camellia_f %f18, %f0, %f2, %f0 744 ldd [$key - 8], %f16 745 ldd [$key - 16], %f18 746 camellia_f %f20, %f2, %f0, %f2 747 camellia_f %f22, %f0, %f2, %f0 748 ldd [$key - 24], %f20 749 ldd [$key - 32], %f22 750 camellia_f %f24, %f2, %f0, %f2 751 camellia_f %f26, %f0, %f2, %f0 752 ldd [$key - 40], %f24 753 ldd [$key - 48], %f26 754 camellia_fl %f28, %f0, %f0 755 camellia_fli %f30, %f2, %f2 756 ldd [$key - 56], %f28 757 ldd [$key - 64], %f30 758___ 759for ($i=1; $i<3; $i++) { 760 $code.=<<___; 761 camellia_f %f`16+16*$i+0`, %f2, %f0, %f2 762 camellia_f %f`16+16*$i+2`, %f0, %f2, %f0 763 camellia_f %f`16+16*$i+4`, %f2, %f0, %f2 764 camellia_f %f`16+16*$i+6`, %f0, %f2, %f0 765 camellia_f %f`16+16*$i+8`, %f2, %f0, %f2 766 camellia_f %f`16+16*$i+10`, %f0, %f2, %f0 767 camellia_fl %f`16+16*$i+12`, %f0, %f0 768 camellia_fli %f`16+16*$i+14`, %f2, %f2 769___ 770} 771$code.=<<___; 772 camellia_f %f16, %f2, %f0, %f2 773 camellia_f %f18, %f0, %f2, %f0 774 ldd [$key + 184], %f16 775 ldd [$key + 176], %f18 776 camellia_f %f20, %f2, %f0, %f2 777 camellia_f %f22, %f0, %f2, %f0 778 ldd [$key + 168], %f20 779 ldd [$key + 160], %f22 780 camellia_f %f24, %f2, %f0, %f4 781 camellia_f %f26, %f0, %f4, %f2 782 ldd [$key + 152], %f24 783 ldd [$key + 144], %f26 784 fxor %f30, %f4, %f0 785 fxor %f28, %f2, %f2 786 ldd [$key + 136], %f28 787 retl 788 ldd [$key + 128], %f30 789.type _cmll256_decrypt_1x,#function 790.size _cmll256_decrypt_1x,.-_cmll256_decrypt_1x 791 792.align 32 793_cmll256_decrypt_2x: 794 camellia_f %f16, %f2, %f0, %f2 795 camellia_f %f16, %f6, %f4, %f6 796 camellia_f %f18, %f0, %f2, %f0 797 camellia_f %f18, %f4, %f6, %f4 798 ldd [$key - 8], %f16 799 ldd [$key - 16], %f18 800 camellia_f %f20, %f2, %f0, %f2 801 camellia_f %f20, %f6, %f4, %f6 802 camellia_f %f22, %f0, %f2, %f0 803 camellia_f %f22, %f4, %f6, %f4 804 ldd [$key - 24], %f20 805 ldd [$key - 32], %f22 806 camellia_f %f24, %f2, %f0, %f2 807 camellia_f %f24, %f6, %f4, %f6 808 camellia_f %f26, %f0, %f2, %f0 809 camellia_f %f26, %f4, %f6, %f4 810 ldd [$key - 40], %f24 811 ldd [$key - 48], %f26 812 camellia_fl %f28, %f0, %f0 813 camellia_fl %f28, %f4, %f4 814 camellia_fli %f30, %f2, %f2 815 camellia_fli %f30, %f6, %f6 816 ldd [$key - 56], %f28 817 ldd [$key - 64], %f30 818___ 819for ($i=1; $i<3; $i++) { 820 $code.=<<___; 821 camellia_f %f`16+16*$i+0`, %f2, %f0, %f2 822 camellia_f %f`16+16*$i+0`, %f6, %f4, %f6 823 camellia_f %f`16+16*$i+2`, %f0, %f2, %f0 824 camellia_f %f`16+16*$i+2`, %f4, %f6, %f4 825 camellia_f %f`16+16*$i+4`, %f2, %f0, %f2 826 camellia_f %f`16+16*$i+4`, %f6, %f4, %f6 827 camellia_f %f`16+16*$i+6`, %f0, %f2, %f0 828 camellia_f %f`16+16*$i+6`, %f4, %f6, %f4 829 camellia_f %f`16+16*$i+8`, %f2, %f0, %f2 830 camellia_f %f`16+16*$i+8`, %f6, %f4, %f6 831 camellia_f %f`16+16*$i+10`, %f0, %f2, %f0 832 camellia_f %f`16+16*$i+10`, %f4, %f6, %f4 833 camellia_fl %f`16+16*$i+12`, %f0, %f0 834 camellia_fl %f`16+16*$i+12`, %f4, %f4 835 camellia_fli %f`16+16*$i+14`, %f2, %f2 836 camellia_fli %f`16+16*$i+14`, %f6, %f6 837___ 838} 839$code.=<<___; 840 camellia_f %f16, %f2, %f0, %f2 841 camellia_f %f16, %f6, %f4, %f6 842 camellia_f %f18, %f0, %f2, %f0 843 camellia_f %f18, %f4, %f6, %f4 844 ldd [$key + 184], %f16 845 ldd [$key + 176], %f18 846 camellia_f %f20, %f2, %f0, %f2 847 camellia_f %f20, %f6, %f4, %f6 848 camellia_f %f22, %f0, %f2, %f0 849 camellia_f %f22, %f4, %f6, %f4 850 ldd [$key + 168], %f20 851 ldd [$key + 160], %f22 852 camellia_f %f24, %f2, %f0, %f8 853 camellia_f %f24, %f6, %f4, %f10 854 camellia_f %f26, %f0, %f8, %f2 855 camellia_f %f26, %f4, %f10, %f6 856 ldd [$key + 152], %f24 857 ldd [$key + 144], %f26 858 fxor %f30, %f8, %f0 859 fxor %f30, %f10, %f4 860 fxor %f28, %f2, %f2 861 fxor %f28, %f6, %f6 862 ldd [$key + 136], %f28 863 retl 864 ldd [$key + 128], %f30 865.type _cmll256_decrypt_2x,#function 866.size _cmll256_decrypt_2x,.-_cmll256_decrypt_2x 867___ 868 869&alg_cbc_encrypt_implement("cmll",128); 870&alg_cbc_encrypt_implement("cmll",256); 871 872&alg_cbc_decrypt_implement("cmll",128); 873&alg_cbc_decrypt_implement("cmll",256); 874 875if ($::evp) { 876 &alg_ctr32_implement("cmll",128); 877 &alg_ctr32_implement("cmll",256); 878} 879}}} 880 881if (!$::evp) { 882$code.=<<___; 883.global Camellia_encrypt 884Camellia_encrypt=cmll_t4_encrypt 885.global Camellia_decrypt 886Camellia_decrypt=cmll_t4_decrypt 887.global Camellia_set_key 888.align 32 889Camellia_set_key: 890 andcc %o2, 7, %g0 ! double-check alignment 891 bnz,a,pn %icc, 1f 892 mov -1, %o0 893 brz,a,pn %o0, 1f 894 mov -1, %o0 895 brz,a,pn %o2, 1f 896 mov -1, %o0 897 andncc %o1, 0x1c0, %g0 898 bnz,a,pn %icc, 1f 899 mov -2, %o0 900 cmp %o1, 128 901 bl,a,pn %icc, 1f 902 mov -2, %o0 903 b cmll_t4_set_key 904 nop 9051: retl 906 nop 907.type Camellia_set_key,#function 908.size Camellia_set_key,.-Camellia_set_key 909___ 910 911my ($inp,$out,$len,$key,$ivec,$enc)=map("%o$_",(0..5)); 912 913$code.=<<___; 914.globl Camellia_cbc_encrypt 915.align 32 916Camellia_cbc_encrypt: 917 ld [$key + 272], %g1 918 nop 919 brz $enc, .Lcbc_decrypt 920 cmp %g1, 3 921 922 be,pt %icc, cmll128_t4_cbc_encrypt 923 nop 924 ba cmll256_t4_cbc_encrypt 925 nop 926 927.Lcbc_decrypt: 928 be,pt %icc, cmll128_t4_cbc_decrypt 929 nop 930 ba cmll256_t4_cbc_decrypt 931 nop 932.type Camellia_cbc_encrypt,#function 933.size Camellia_cbc_encrypt,.-Camellia_cbc_encrypt 934___ 935} 936 937&emit_assembler(); 938 939close STDOUT; 940