1(* Common code for ARM NEON header file, documentation and test case 2 generators. 3 4 Copyright (C) 2006-2016 Free Software Foundation, Inc. 5 Contributed by CodeSourcery. 6 7 This file is part of GCC. 8 9 GCC is free software; you can redistribute it and/or modify it under 10 the terms of the GNU General Public License as published by the Free 11 Software Foundation; either version 3, or (at your option) any later 12 version. 13 14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY 15 WARRANTY; without even the implied warranty of MERCHANTABILITY or 16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 17 for more details. 18 19 You should have received a copy of the GNU General Public License 20 along with GCC; see the file COPYING3. If not see 21 <http://www.gnu.org/licenses/>. *) 22 23(* Shorthand types for vector elements. *) 24type elts = S8 | S16 | S32 | S64 | F16 | F32 | U8 | U16 | U32 | U64 | P8 | P16 25 | P64 | P128 | I8 | I16 | I32 | I64 | B8 | B16 | B32 | B64 | Conv of elts * elts 26 | Cast of elts * elts | NoElts 27 28type eltclass = Signed | Unsigned | Float | Poly | Int | Bits 29 | ConvClass of eltclass * eltclass | NoType 30 31(* These vector types correspond directly to C types. *) 32type vectype = T_int8x8 | T_int8x16 33 | T_int16x4 | T_int16x8 34 | T_int32x2 | T_int32x4 35 | T_int64x1 | T_int64x2 36 | T_uint8x8 | T_uint8x16 37 | T_uint16x4 | T_uint16x8 38 | T_uint32x2 | T_uint32x4 39 | T_uint64x1 | T_uint64x2 40 | T_float16x4 41 | T_float32x2 | T_float32x4 42 | T_poly8x8 | T_poly8x16 43 | T_poly16x4 | T_poly16x8 44 | T_immediate of int * int 45 | T_int8 | T_int16 46 | T_int32 | T_int64 47 | T_uint8 | T_uint16 48 | T_uint32 | T_uint64 49 | T_poly8 | T_poly16 50 | T_poly64 | T_poly64x1 51 | T_poly64x2 | T_poly128 52 | T_float16 | T_float32 53 | T_arrayof of int * vectype 54 | T_ptrto of vectype | T_const of vectype 55 | T_void | T_intQI 56 | T_intHI | T_intSI 57 | T_intDI | T_intTI 58 | T_floatHF | T_floatSF 59 60(* The meanings of the following are: 61 TImode : "Tetra", two registers (four words). 62 EImode : "hExa", three registers (six words). 63 OImode : "Octa", four registers (eight words). 64 CImode : "dodeCa", six registers (twelve words). 65 XImode : "heXadeca", eight registers (sixteen words). 66*) 67 68type inttype = B_TImode | B_EImode | B_OImode | B_CImode | B_XImode 69 70type shape_elt = Dreg | Qreg | Corereg | Immed | VecArray of int * shape_elt 71 | PtrTo of shape_elt | CstPtrTo of shape_elt 72 (* These next ones are used only in the test generator. *) 73 | Element_of_dreg (* Used for "lane" variants. *) 74 | Element_of_qreg (* Likewise. *) 75 | All_elements_of_dreg (* Used for "dup" variants. *) 76 | Alternatives of shape_elt list (* Used for multiple valid operands *) 77 78type shape_form = All of int * shape_elt 79 | Long 80 | Long_noreg of shape_elt 81 | Wide 82 | Wide_noreg of shape_elt 83 | Narrow 84 | Long_imm 85 | Narrow_imm 86 | Binary_imm of shape_elt 87 | Use_operands of shape_elt array 88 | By_scalar of shape_elt 89 | Unary_scalar of shape_elt 90 | Wide_lane 91 | Wide_scalar 92 | Pair_result of shape_elt 93 94type arity = Arity0 of vectype 95 | Arity1 of vectype * vectype 96 | Arity2 of vectype * vectype * vectype 97 | Arity3 of vectype * vectype * vectype * vectype 98 | Arity4 of vectype * vectype * vectype * vectype * vectype 99 100type vecmode = V8QI | V4HI | V4HF |V2SI | V2SF | DI 101 | V16QI | V8HI | V4SI | V4SF | V2DI | TI 102 | QI | HI | SI | SF 103 104type opcode = 105 (* Binary ops. *) 106 Vadd 107 | Vmul 108 | Vmla 109 | Vmls 110 | Vfma 111 | Vfms 112 | Vsub 113 | Vceq 114 | Vcge 115 | Vcgt 116 | Vcle 117 | Vclt 118 | Vcage 119 | Vcagt 120 | Vcale 121 | Vcalt 122 | Vtst 123 | Vabd 124 | Vaba 125 | Vmax 126 | Vmin 127 | Vpadd 128 | Vpada 129 | Vpmax 130 | Vpmin 131 | Vrecps 132 | Vrsqrts 133 | Vshl 134 | Vshr_n 135 | Vshl_n 136 | Vsra_n 137 | Vsri 138 | Vsli 139 (* Logic binops. *) 140 | Vand 141 | Vorr 142 | Veor 143 | Vbic 144 | Vorn 145 | Vbsl 146 (* Ops with scalar. *) 147 | Vmul_lane 148 | Vmla_lane 149 | Vmls_lane 150 | Vmul_n 151 | Vmla_n 152 | Vmls_n 153 | Vmull_n 154 | Vmull_lane 155 | Vqdmull_n 156 | Vqdmull_lane 157 | Vqdmulh_n 158 | Vqdmulh_lane 159 (* Unary ops. *) 160 | Vrintn 161 | Vrinta 162 | Vrintp 163 | Vrintm 164 | Vrintz 165 | Vabs 166 | Vneg 167 | Vcls 168 | Vclz 169 | Vcnt 170 | Vrecpe 171 | Vrsqrte 172 | Vmvn 173 (* Vector extract. *) 174 | Vext 175 (* Reverse elements. *) 176 | Vrev64 177 | Vrev32 178 | Vrev16 179 (* Transposition ops. *) 180 | Vtrn 181 | Vzip 182 | Vuzp 183 (* Loads and stores (VLD1/VST1/VLD2...), elements and structures. *) 184 | Vldx of int 185 | Vstx of int 186 | Vldx_lane of int 187 | Vldx_dup of int 188 | Vstx_lane of int 189 (* Set/extract lanes from a vector. *) 190 | Vget_lane 191 | Vset_lane 192 (* Initialize vector from bit pattern. *) 193 | Vcreate 194 (* Set all lanes to same value. *) 195 | Vdup_n 196 | Vmov_n (* Is this the same? *) 197 (* Duplicate scalar to all lanes of vector. *) 198 | Vdup_lane 199 (* Combine vectors. *) 200 | Vcombine 201 (* Get quadword high/low parts. *) 202 | Vget_high 203 | Vget_low 204 (* Convert vectors. *) 205 | Vcvt 206 | Vcvt_n 207 (* Narrow/lengthen vectors. *) 208 | Vmovn 209 | Vmovl 210 (* Table lookup. *) 211 | Vtbl of int 212 | Vtbx of int 213 (* Reinterpret casts. *) 214 | Vreinterp 215 216let rev_elems revsize elsize nelts _ = 217 let mask = (revsize / elsize) - 1 in 218 let arr = Array.init nelts 219 (fun i -> i lxor mask) in 220 Array.to_list arr 221 222let permute_range i stride nelts increment = 223 let rec build i = function 224 0 -> [] 225 | nelts -> i :: (i + stride) :: build (i + increment) (pred nelts) in 226 build i nelts 227 228(* Generate a list of integers suitable for vzip. *) 229let zip_range i stride nelts = permute_range i stride nelts 1 230 231(* Generate a list of integers suitable for vunzip. *) 232let uzip_range i stride nelts = permute_range i stride nelts 4 233 234(* Generate a list of integers suitable for trn. *) 235let trn_range i stride nelts = permute_range i stride nelts 2 236 237let zip_elems _ nelts part = 238 match part with 239 `lo -> zip_range 0 nelts (nelts / 2) 240 | `hi -> zip_range (nelts / 2) nelts (nelts / 2) 241 242let uzip_elems _ nelts part = 243 match part with 244 `lo -> uzip_range 0 2 (nelts / 2) 245 | `hi -> uzip_range 1 2 (nelts / 2) 246 247let trn_elems _ nelts part = 248 match part with 249 `lo -> trn_range 0 nelts (nelts / 2) 250 | `hi -> trn_range 1 nelts (nelts / 2) 251 252(* Features used for documentation, to distinguish between some instruction 253 variants, and to signal special requirements (e.g. swapping arguments). *) 254 255type features = 256 Halving 257 | Rounding 258 | Saturating 259 | Dst_unsign 260 | High_half 261 | Doubling 262 | Flipped of string (* Builtin name to use with flipped arguments. *) 263 | InfoWord (* Pass an extra word for signage/rounding etc. (always passed 264 for All _, Long, Wide, Narrow shape_forms. *) 265 (* Implement builtin as shuffle. The parameter is a function which returns 266 masks suitable for __builtin_shuffle: arguments are (element size, 267 number of elements, high/low part selector). *) 268 | Use_shuffle of (int -> int -> [`lo|`hi] -> int list) 269 (* A specification as to the shape of instruction expected upon 270 disassembly, used if it differs from the shape used to build the 271 intrinsic prototype. Multiple entries in the constructor's argument 272 indicate that the intrinsic expands to more than one assembly 273 instruction, each with a corresponding shape specified here. *) 274 | Disassembles_as of shape_form list 275 | Builtin_name of string (* Override the name of the builtin. *) 276 (* Override the name of the instruction. If more than one name 277 is specified, it means that the instruction can have any of those 278 names. *) 279 | Instruction_name of string list 280 (* Mark that the intrinsic yields no instructions, or expands to yield 281 behavior that the test generator cannot test. *) 282 | No_op 283 (* Mark that the intrinsic has constant arguments that cannot be set 284 to the defaults (zero for pointers and one otherwise) in the test 285 cases. The function supplied must return the integer to be written 286 into the testcase for the argument number (0-based) supplied to it. *) 287 | Const_valuator of (int -> int) 288 | Fixed_vector_reg 289 | Fixed_core_reg 290 (* Mark that the intrinsic requires __ARM_FEATURE_string to be defined. *) 291 | Requires_feature of string 292 (* Mark that the intrinsic requires a particular architecture version. *) 293 | Requires_arch of int 294 (* Mark that the intrinsic requires a particular bit in __ARM_FP to 295 be set. *) 296 | Requires_FP_bit of int 297 (* Compiler optimization level for the test. *) 298 | Compiler_optim of string 299 300exception MixedMode of elts * elts 301 302let rec elt_width = function 303 S8 | U8 | P8 | I8 | B8 -> 8 304 | S16 | U16 | P16 | I16 | B16 | F16 -> 16 305 | S32 | F32 | U32 | I32 | B32 -> 32 306 | S64 | U64 | P64 | I64 | B64 -> 64 307 | P128 -> 128 308 | Conv (a, b) -> 309 let wa = elt_width a and wb = elt_width b in 310 if wa = wb then wa else raise (MixedMode (a, b)) 311 | Cast (a, b) -> raise (MixedMode (a, b)) 312 | NoElts -> failwith "No elts" 313 314let rec elt_class = function 315 S8 | S16 | S32 | S64 -> Signed 316 | U8 | U16 | U32 | U64 -> Unsigned 317 | P8 | P16 | P64 | P128 -> Poly 318 | F16 | F32 -> Float 319 | I8 | I16 | I32 | I64 -> Int 320 | B8 | B16 | B32 | B64 -> Bits 321 | Conv (a, b) | Cast (a, b) -> ConvClass (elt_class a, elt_class b) 322 | NoElts -> NoType 323 324let elt_of_class_width c w = 325 match c, w with 326 Signed, 8 -> S8 327 | Signed, 16 -> S16 328 | Signed, 32 -> S32 329 | Signed, 64 -> S64 330 | Float, 16 -> F16 331 | Float, 32 -> F32 332 | Unsigned, 8 -> U8 333 | Unsigned, 16 -> U16 334 | Unsigned, 32 -> U32 335 | Unsigned, 64 -> U64 336 | Poly, 8 -> P8 337 | Poly, 16 -> P16 338 | Poly, 64 -> P64 339 | Poly, 128 -> P128 340 | Int, 8 -> I8 341 | Int, 16 -> I16 342 | Int, 32 -> I32 343 | Int, 64 -> I64 344 | Bits, 8 -> B8 345 | Bits, 16 -> B16 346 | Bits, 32 -> B32 347 | Bits, 64 -> B64 348 | _ -> failwith "Bad element type" 349 350(* Return unsigned integer element the same width as argument. *) 351let unsigned_of_elt elt = 352 elt_of_class_width Unsigned (elt_width elt) 353 354let signed_of_elt elt = 355 elt_of_class_width Signed (elt_width elt) 356 357(* Return untyped bits element the same width as argument. *) 358let bits_of_elt elt = 359 elt_of_class_width Bits (elt_width elt) 360 361let non_signed_variant = function 362 S8 -> I8 363 | S16 -> I16 364 | S32 -> I32 365 | S64 -> I64 366 | U8 -> I8 367 | U16 -> I16 368 | U32 -> I32 369 | U64 -> I64 370 | x -> x 371 372let poly_unsigned_variant v = 373 let elclass = match elt_class v with 374 Poly -> Unsigned 375 | x -> x in 376 elt_of_class_width elclass (elt_width v) 377 378let widen_elt elt = 379 let w = elt_width elt 380 and c = elt_class elt in 381 elt_of_class_width c (w * 2) 382 383let narrow_elt elt = 384 let w = elt_width elt 385 and c = elt_class elt in 386 elt_of_class_width c (w / 2) 387 388(* If we're trying to find a mode from a "Use_operands" instruction, use the 389 last vector operand as the dominant mode used to invoke the correct builtin. 390 We must stick to this rule in neon.md. *) 391let find_key_operand operands = 392 let rec scan opno = 393 match operands.(opno) with 394 Qreg -> Qreg 395 | Dreg -> Dreg 396 | VecArray (_, Qreg) -> Qreg 397 | VecArray (_, Dreg) -> Dreg 398 | _ -> scan (opno-1) 399 in 400 scan ((Array.length operands) - 1) 401 402(* Find a vecmode from a shape_elt ELT for an instruction with shape_form 403 SHAPE. For a Use_operands shape, if ARGPOS is passed then return the mode 404 for the given argument position, else determine which argument to return a 405 mode for automatically. *) 406 407let rec mode_of_elt ?argpos elt shape = 408 let flt = match elt_class elt with 409 Float | ConvClass(_, Float) -> true | _ -> false in 410 let idx = 411 match elt_width elt with 412 8 -> 0 | 16 -> 1 | 32 -> 2 | 64 -> 3 | 128 -> 4 413 | _ -> failwith "Bad element width" 414 in match shape with 415 All (_, Dreg) | By_scalar Dreg | Pair_result Dreg | Unary_scalar Dreg 416 | Binary_imm Dreg | Long_noreg Dreg | Wide_noreg Dreg -> 417 if flt then 418 [| V8QI; V4HF; V2SF; DI |].(idx) 419 else 420 [| V8QI; V4HI; V2SI; DI |].(idx) 421 | All (_, Qreg) | By_scalar Qreg | Pair_result Qreg | Unary_scalar Qreg 422 | Binary_imm Qreg | Long_noreg Qreg | Wide_noreg Qreg -> 423 [| V16QI; V8HI; if flt then V4SF else V4SI; V2DI; TI|].(idx) 424 | All (_, (Corereg | PtrTo _ | CstPtrTo _)) -> 425 [| QI; HI; if flt then SF else SI; DI |].(idx) 426 | Long | Wide | Wide_lane | Wide_scalar 427 | Long_imm -> 428 [| V8QI; V4HI; V2SI; DI |].(idx) 429 | Narrow | Narrow_imm -> [| V16QI; V8HI; V4SI; V2DI |].(idx) 430 | Use_operands ops -> 431 begin match argpos with 432 None -> mode_of_elt ?argpos elt (All (0, (find_key_operand ops))) 433 | Some pos -> mode_of_elt ?argpos elt (All (0, ops.(pos))) 434 end 435 | _ -> failwith "invalid shape" 436 437(* Modify an element type dependent on the shape of the instruction and the 438 operand number. *) 439 440let shapemap shape no = 441 let ident = fun x -> x in 442 match shape with 443 All _ | Use_operands _ | By_scalar _ | Pair_result _ | Unary_scalar _ 444 | Binary_imm _ -> ident 445 | Long | Long_noreg _ | Wide_scalar | Long_imm -> 446 [| widen_elt; ident; ident |].(no) 447 | Wide | Wide_noreg _ -> [| widen_elt; widen_elt; ident |].(no) 448 | Wide_lane -> [| widen_elt; ident; ident; ident |].(no) 449 | Narrow | Narrow_imm -> [| narrow_elt; ident; ident |].(no) 450 451(* Register type (D/Q) of an operand, based on shape and operand number. *) 452 453let regmap shape no = 454 match shape with 455 All (_, reg) | Long_noreg reg | Wide_noreg reg -> reg 456 | Long -> [| Qreg; Dreg; Dreg |].(no) 457 | Wide -> [| Qreg; Qreg; Dreg |].(no) 458 | Narrow -> [| Dreg; Qreg; Qreg |].(no) 459 | Wide_lane -> [| Qreg; Dreg; Dreg; Immed |].(no) 460 | Wide_scalar -> [| Qreg; Dreg; Corereg |].(no) 461 | By_scalar reg -> [| reg; reg; Dreg; Immed |].(no) 462 | Unary_scalar reg -> [| reg; Dreg; Immed |].(no) 463 | Pair_result reg -> [| VecArray (2, reg); reg; reg |].(no) 464 | Binary_imm reg -> [| reg; reg; Immed |].(no) 465 | Long_imm -> [| Qreg; Dreg; Immed |].(no) 466 | Narrow_imm -> [| Dreg; Qreg; Immed |].(no) 467 | Use_operands these -> these.(no) 468 469let type_for_elt shape elt no = 470 let elt = (shapemap shape no) elt in 471 let reg = regmap shape no in 472 let rec type_for_reg_elt reg elt = 473 match reg with 474 Dreg -> 475 begin match elt with 476 S8 -> T_int8x8 477 | S16 -> T_int16x4 478 | S32 -> T_int32x2 479 | S64 -> T_int64x1 480 | U8 -> T_uint8x8 481 | U16 -> T_uint16x4 482 | U32 -> T_uint32x2 483 | U64 -> T_uint64x1 484 | P64 -> T_poly64x1 485 | P128 -> T_poly128 486 | F16 -> T_float16x4 487 | F32 -> T_float32x2 488 | P8 -> T_poly8x8 489 | P16 -> T_poly16x4 490 | _ -> failwith "Bad elt type for Dreg" 491 end 492 | Qreg -> 493 begin match elt with 494 S8 -> T_int8x16 495 | S16 -> T_int16x8 496 | S32 -> T_int32x4 497 | S64 -> T_int64x2 498 | U8 -> T_uint8x16 499 | U16 -> T_uint16x8 500 | U32 -> T_uint32x4 501 | U64 -> T_uint64x2 502 | F32 -> T_float32x4 503 | P8 -> T_poly8x16 504 | P16 -> T_poly16x8 505 | P64 -> T_poly64x2 506 | P128 -> T_poly128 507 | _ -> failwith "Bad elt type for Qreg" 508 end 509 | Corereg -> 510 begin match elt with 511 S8 -> T_int8 512 | S16 -> T_int16 513 | S32 -> T_int32 514 | S64 -> T_int64 515 | U8 -> T_uint8 516 | U16 -> T_uint16 517 | U32 -> T_uint32 518 | U64 -> T_uint64 519 | P8 -> T_poly8 520 | P16 -> T_poly16 521 | P64 -> T_poly64 522 | P128 -> T_poly128 523 | F32 -> T_float32 524 | _ -> failwith "Bad elt type for Corereg" 525 end 526 | Immed -> 527 T_immediate (0, 0) 528 | VecArray (num, sub) -> 529 T_arrayof (num, type_for_reg_elt sub elt) 530 | PtrTo x -> 531 T_ptrto (type_for_reg_elt x elt) 532 | CstPtrTo x -> 533 T_ptrto (T_const (type_for_reg_elt x elt)) 534 (* Anything else is solely for the use of the test generator. *) 535 | _ -> assert false 536 in 537 type_for_reg_elt reg elt 538 539(* Return size of a vector type, in bits. *) 540let vectype_size = function 541 T_int8x8 | T_int16x4 | T_int32x2 | T_int64x1 542 | T_uint8x8 | T_uint16x4 | T_uint32x2 | T_uint64x1 543 | T_float32x2 | T_poly8x8 | T_poly64x1 | T_poly16x4 | T_float16x4 -> 64 544 | T_int8x16 | T_int16x8 | T_int32x4 | T_int64x2 545 | T_uint8x16 | T_uint16x8 | T_uint32x4 | T_uint64x2 546 | T_float32x4 | T_poly8x16 | T_poly64x2 | T_poly16x8 -> 128 547 | _ -> raise Not_found 548 549let inttype_for_array num elttype = 550 let eltsize = vectype_size elttype in 551 let numwords = (num * eltsize) / 32 in 552 match numwords with 553 4 -> B_TImode 554 | 6 -> B_EImode 555 | 8 -> B_OImode 556 | 12 -> B_CImode 557 | 16 -> B_XImode 558 | _ -> failwith ("no int type for size " ^ string_of_int numwords) 559 560(* These functions return pairs of (internal, external) types, where "internal" 561 types are those seen by GCC, and "external" are those seen by the assembler. 562 These types aren't necessarily the same, since the intrinsics can munge more 563 than one C type into each assembler opcode. *) 564 565let make_sign_invariant func shape elt = 566 let arity, elt' = func shape elt in 567 arity, non_signed_variant elt' 568 569(* Don't restrict any types. *) 570 571let elts_same make_arity shape elt = 572 let vtype = type_for_elt shape elt in 573 make_arity vtype, elt 574 575(* As sign_invar_*, but when sign matters. *) 576let elts_same_io_lane = 577 elts_same (fun vtype -> Arity4 (vtype 0, vtype 0, vtype 1, vtype 2, vtype 3)) 578 579let elts_same_io = 580 elts_same (fun vtype -> Arity3 (vtype 0, vtype 0, vtype 1, vtype 2)) 581 582let elts_same_2_lane = 583 elts_same (fun vtype -> Arity3 (vtype 0, vtype 1, vtype 2, vtype 3)) 584 585let elts_same_3 = elts_same_2_lane 586 587let elts_same_2 = 588 elts_same (fun vtype -> Arity2 (vtype 0, vtype 1, vtype 2)) 589 590let elts_same_1 = 591 elts_same (fun vtype -> Arity1 (vtype 0, vtype 1)) 592 593(* Use for signed/unsigned invariant operations (i.e. where the operation 594 doesn't depend on the sign of the data. *) 595 596let sign_invar_io_lane = make_sign_invariant elts_same_io_lane 597let sign_invar_io = make_sign_invariant elts_same_io 598let sign_invar_2_lane = make_sign_invariant elts_same_2_lane 599let sign_invar_2 = make_sign_invariant elts_same_2 600let sign_invar_1 = make_sign_invariant elts_same_1 601 602(* Sign-sensitive comparison. *) 603 604let cmp_sign_matters shape elt = 605 let vtype = type_for_elt shape elt 606 and rtype = type_for_elt shape (unsigned_of_elt elt) 0 in 607 Arity2 (rtype, vtype 1, vtype 2), elt 608 609(* Signed/unsigned invariant comparison. *) 610 611let cmp_sign_invar shape elt = 612 let shape', elt' = cmp_sign_matters shape elt in 613 let elt'' = 614 match non_signed_variant elt' with 615 P8 -> I8 616 | x -> x 617 in 618 shape', elt'' 619 620(* Comparison (VTST) where only the element width matters. *) 621 622let cmp_bits shape elt = 623 let vtype = type_for_elt shape elt 624 and rtype = type_for_elt shape (unsigned_of_elt elt) 0 625 and bits_only = bits_of_elt elt in 626 Arity2 (rtype, vtype 1, vtype 2), bits_only 627 628let reg_shift shape elt = 629 let vtype = type_for_elt shape elt 630 and op2type = type_for_elt shape (signed_of_elt elt) 2 in 631 Arity2 (vtype 0, vtype 1, op2type), elt 632 633(* Genericised constant-shift type-generating function. *) 634 635let const_shift mkimm ?arity ?result shape elt = 636 let op2type = (shapemap shape 2) elt in 637 let op2width = elt_width op2type in 638 let op2 = mkimm op2width 639 and op1 = type_for_elt shape elt 1 640 and r_elt = 641 match result with 642 None -> elt 643 | Some restriction -> restriction elt in 644 let rtype = type_for_elt shape r_elt 0 in 645 match arity with 646 None -> Arity2 (rtype, op1, op2), elt 647 | Some mkarity -> mkarity rtype op1 op2, elt 648 649(* Use for immediate right-shifts. *) 650 651let shift_right shape elt = 652 const_shift (fun imm -> T_immediate (1, imm)) shape elt 653 654let shift_right_acc shape elt = 655 const_shift (fun imm -> T_immediate (1, imm)) 656 ~arity:(fun dst op1 op2 -> Arity3 (dst, dst, op1, op2)) shape elt 657 658(* Use for immediate right-shifts when the operation doesn't care about 659 signedness. *) 660 661let shift_right_sign_invar = 662 make_sign_invariant shift_right 663 664(* Immediate right-shift; result is unsigned even when operand is signed. *) 665 666let shift_right_to_uns shape elt = 667 const_shift (fun imm -> T_immediate (1, imm)) ~result:unsigned_of_elt 668 shape elt 669 670(* Immediate left-shift. *) 671 672let shift_left shape elt = 673 const_shift (fun imm -> T_immediate (0, imm - 1)) shape elt 674 675(* Immediate left-shift, unsigned result. *) 676 677let shift_left_to_uns shape elt = 678 const_shift (fun imm -> T_immediate (0, imm - 1)) ~result:unsigned_of_elt 679 shape elt 680 681(* Immediate left-shift, don't care about signs. *) 682 683let shift_left_sign_invar = 684 make_sign_invariant shift_left 685 686(* Shift left/right and insert: only element size matters. *) 687 688let shift_insert shape elt = 689 let arity, elt = 690 const_shift (fun imm -> T_immediate (1, imm)) 691 ~arity:(fun dst op1 op2 -> Arity3 (dst, dst, op1, op2)) shape elt in 692 arity, bits_of_elt elt 693 694(* Get/set lane. *) 695 696let get_lane shape elt = 697 let vtype = type_for_elt shape elt in 698 Arity2 (vtype 0, vtype 1, vtype 2), 699 (match elt with P8 -> U8 | P16 -> U16 | S32 | U32 | F32 -> B32 | x -> x) 700 701let set_lane shape elt = 702 let vtype = type_for_elt shape elt in 703 Arity3 (vtype 0, vtype 1, vtype 2, vtype 3), bits_of_elt elt 704 705let set_lane_notype shape elt = 706 let vtype = type_for_elt shape elt in 707 Arity3 (vtype 0, vtype 1, vtype 2, vtype 3), NoElts 708 709let create_vector shape elt = 710 let vtype = type_for_elt shape U64 1 711 and rtype = type_for_elt shape elt 0 in 712 Arity1 (rtype, vtype), elt 713 714let conv make_arity shape elt = 715 let edest, esrc = match elt with 716 Conv (edest, esrc) | Cast (edest, esrc) -> edest, esrc 717 | _ -> failwith "Non-conversion element in conversion" in 718 let vtype = type_for_elt shape esrc 719 and rtype = type_for_elt shape edest 0 in 720 make_arity rtype vtype, elt 721 722let conv_1 = conv (fun rtype vtype -> Arity1 (rtype, vtype 1)) 723let conv_2 = conv (fun rtype vtype -> Arity2 (rtype, vtype 1, vtype 2)) 724 725(* Operation has an unsigned result even if operands are signed. *) 726 727let dst_unsign make_arity shape elt = 728 let vtype = type_for_elt shape elt 729 and rtype = type_for_elt shape (unsigned_of_elt elt) 0 in 730 make_arity rtype vtype, elt 731 732let dst_unsign_1 = dst_unsign (fun rtype vtype -> Arity1 (rtype, vtype 1)) 733 734let make_bits_only func shape elt = 735 let arity, elt' = func shape elt in 736 arity, bits_of_elt elt' 737 738(* Extend operation. *) 739 740let extend shape elt = 741 let vtype = type_for_elt shape elt in 742 Arity3 (vtype 0, vtype 1, vtype 2, vtype 3), bits_of_elt elt 743 744(* Table look-up operations. Operand 2 is signed/unsigned for signed/unsigned 745 integer ops respectively, or unsigned for polynomial ops. *) 746 747let table mkarity shape elt = 748 let vtype = type_for_elt shape elt in 749 let op2 = type_for_elt shape (poly_unsigned_variant elt) 2 in 750 mkarity vtype op2, bits_of_elt elt 751 752let table_2 = table (fun vtype op2 -> Arity2 (vtype 0, vtype 1, op2)) 753let table_io = table (fun vtype op2 -> Arity3 (vtype 0, vtype 0, vtype 1, op2)) 754 755(* Operations where only bits matter. *) 756 757let bits_1 = make_bits_only elts_same_1 758let bits_2 = make_bits_only elts_same_2 759let bits_3 = make_bits_only elts_same_3 760 761(* Store insns. *) 762let store_1 shape elt = 763 let vtype = type_for_elt shape elt in 764 Arity2 (T_void, vtype 0, vtype 1), bits_of_elt elt 765 766let store_3 shape elt = 767 let vtype = type_for_elt shape elt in 768 Arity3 (T_void, vtype 0, vtype 1, vtype 2), bits_of_elt elt 769 770let make_notype func shape elt = 771 let arity, _ = func shape elt in 772 arity, NoElts 773 774let notype_1 = make_notype elts_same_1 775let notype_2 = make_notype elts_same_2 776let notype_3 = make_notype elts_same_3 777 778(* Bit-select operations (first operand is unsigned int). *) 779 780let bit_select shape elt = 781 let vtype = type_for_elt shape elt 782 and itype = type_for_elt shape (unsigned_of_elt elt) in 783 Arity3 (vtype 0, itype 1, vtype 2, vtype 3), NoElts 784 785(* Common lists of supported element types. *) 786 787let s_8_32 = [S8; S16; S32] 788let u_8_32 = [U8; U16; U32] 789let su_8_32 = [S8; S16; S32; U8; U16; U32] 790let su_8_64 = S64 :: U64 :: su_8_32 791let su_16_64 = [S16; S32; S64; U16; U32; U64] 792let pf_su_8_16 = [P8; P16; S8; S16; U8; U16] 793let pf_su_8_32 = P8 :: P16 :: F32 :: su_8_32 794let pf_su_8_64 = P8 :: P16 :: F32 :: su_8_64 795let suf_32 = [S32; U32; F32] 796 797let ops = 798 [ 799 (* Addition. *) 800 Vadd, [], All (3, Dreg), "vadd", sign_invar_2, F32 :: su_8_32; 801 Vadd, [No_op], All (3, Dreg), "vadd", sign_invar_2, [S64; U64]; 802 Vadd, [], All (3, Qreg), "vaddQ", sign_invar_2, F32 :: su_8_64; 803 Vadd, [], Long, "vaddl", elts_same_2, su_8_32; 804 Vadd, [], Wide, "vaddw", elts_same_2, su_8_32; 805 Vadd, [Halving], All (3, Dreg), "vhadd", elts_same_2, su_8_32; 806 Vadd, [Halving], All (3, Qreg), "vhaddQ", elts_same_2, su_8_32; 807 Vadd, [Instruction_name ["vrhadd"]; Rounding; Halving], 808 All (3, Dreg), "vRhadd", elts_same_2, su_8_32; 809 Vadd, [Instruction_name ["vrhadd"]; Rounding; Halving], 810 All (3, Qreg), "vRhaddQ", elts_same_2, su_8_32; 811 Vadd, [Saturating], All (3, Dreg), "vqadd", elts_same_2, su_8_64; 812 Vadd, [Saturating], All (3, Qreg), "vqaddQ", elts_same_2, su_8_64; 813 Vadd, [High_half], Narrow, "vaddhn", sign_invar_2, su_16_64; 814 Vadd, [Instruction_name ["vraddhn"]; Rounding; High_half], 815 Narrow, "vRaddhn", sign_invar_2, su_16_64; 816 817 (* Multiplication. *) 818 Vmul, [], All (3, Dreg), "vmul", sign_invar_2, P8 :: F32 :: su_8_32; 819 Vmul, [], All (3, Qreg), "vmulQ", sign_invar_2, P8 :: F32 :: su_8_32; 820 Vmul, [Saturating; Doubling; High_half], All (3, Dreg), "vqdmulh", 821 elts_same_2, [S16; S32]; 822 Vmul, [Saturating; Doubling; High_half], All (3, Qreg), "vqdmulhQ", 823 elts_same_2, [S16; S32]; 824 Vmul, 825 [Saturating; Rounding; Doubling; High_half; 826 Instruction_name ["vqrdmulh"]], 827 All (3, Dreg), "vqRdmulh", 828 elts_same_2, [S16; S32]; 829 Vmul, 830 [Saturating; Rounding; Doubling; High_half; 831 Instruction_name ["vqrdmulh"]], 832 All (3, Qreg), "vqRdmulhQ", 833 elts_same_2, [S16; S32]; 834 Vmul, [], Long, "vmull", elts_same_2, P8 :: su_8_32; 835 Vmul, [Saturating; Doubling], Long, "vqdmull", elts_same_2, [S16; S32]; 836 837 (* Multiply-accumulate. *) 838 Vmla, [], All (3, Dreg), "vmla", sign_invar_io, F32 :: su_8_32; 839 Vmla, [], All (3, Qreg), "vmlaQ", sign_invar_io, F32 :: su_8_32; 840 Vmla, [], Long, "vmlal", elts_same_io, su_8_32; 841 Vmla, [Saturating; Doubling], Long, "vqdmlal", elts_same_io, [S16; S32]; 842 843 (* Multiply-subtract. *) 844 Vmls, [], All (3, Dreg), "vmls", sign_invar_io, F32 :: su_8_32; 845 Vmls, [], All (3, Qreg), "vmlsQ", sign_invar_io, F32 :: su_8_32; 846 Vmls, [], Long, "vmlsl", elts_same_io, su_8_32; 847 Vmls, [Saturating; Doubling], Long, "vqdmlsl", elts_same_io, [S16; S32]; 848 849 (* Fused-multiply-accumulate. *) 850 Vfma, [Requires_feature "FMA"], All (3, Dreg), "vfma", elts_same_io, [F32]; 851 Vfma, [Requires_feature "FMA"], All (3, Qreg), "vfmaQ", elts_same_io, [F32]; 852 Vfms, [Requires_feature "FMA"], All (3, Dreg), "vfms", elts_same_io, [F32]; 853 Vfms, [Requires_feature "FMA"], All (3, Qreg), "vfmsQ", elts_same_io, [F32]; 854 855 (* Round to integral. *) 856 Vrintn, [Builtin_name "vrintn"; Requires_arch 8], Use_operands [| Dreg; Dreg |], 857 "vrndn", elts_same_1, [F32]; 858 Vrintn, [Builtin_name "vrintn"; Requires_arch 8], Use_operands [| Qreg; Qreg |], 859 "vrndqn", elts_same_1, [F32]; 860 Vrinta, [Builtin_name "vrinta"; Requires_arch 8], Use_operands [| Dreg; Dreg |], 861 "vrnda", elts_same_1, [F32]; 862 Vrinta, [Builtin_name "vrinta"; Requires_arch 8], Use_operands [| Qreg; Qreg |], 863 "vrndqa", elts_same_1, [F32]; 864 Vrintp, [Builtin_name "vrintp"; Requires_arch 8], Use_operands [| Dreg; Dreg |], 865 "vrndp", elts_same_1, [F32]; 866 Vrintp, [Builtin_name "vrintp"; Requires_arch 8], Use_operands [| Qreg; Qreg |], 867 "vrndqp", elts_same_1, [F32]; 868 Vrintm, [Builtin_name "vrintm"; Requires_arch 8], Use_operands [| Dreg; Dreg |], 869 "vrndm", elts_same_1, [F32]; 870 Vrintm, [Builtin_name "vrintm"; Requires_arch 8], Use_operands [| Qreg; Qreg |], 871 "vrndqm", elts_same_1, [F32]; 872 Vrintz, [Builtin_name "vrintz"; Requires_arch 8], Use_operands [| Dreg; Dreg |], 873 "vrnd", elts_same_1, [F32]; 874 Vrintz, [Builtin_name "vrintz"; Requires_arch 8], Use_operands [| Qreg; Qreg |], 875 "vrndq", elts_same_1, [F32]; 876 (* Subtraction. *) 877 Vsub, [], All (3, Dreg), "vsub", sign_invar_2, F32 :: su_8_32; 878 Vsub, [No_op], All (3, Dreg), "vsub", sign_invar_2, [S64; U64]; 879 Vsub, [], All (3, Qreg), "vsubQ", sign_invar_2, F32 :: su_8_64; 880 Vsub, [], Long, "vsubl", elts_same_2, su_8_32; 881 Vsub, [], Wide, "vsubw", elts_same_2, su_8_32; 882 Vsub, [Halving], All (3, Dreg), "vhsub", elts_same_2, su_8_32; 883 Vsub, [Halving], All (3, Qreg), "vhsubQ", elts_same_2, su_8_32; 884 Vsub, [Saturating], All (3, Dreg), "vqsub", elts_same_2, su_8_64; 885 Vsub, [Saturating], All (3, Qreg), "vqsubQ", elts_same_2, su_8_64; 886 Vsub, [High_half], Narrow, "vsubhn", sign_invar_2, su_16_64; 887 Vsub, [Instruction_name ["vrsubhn"]; Rounding; High_half], 888 Narrow, "vRsubhn", sign_invar_2, su_16_64; 889 890 (* Comparison, equal. *) 891 Vceq, [], All (3, Dreg), "vceq", cmp_sign_invar, P8 :: F32 :: su_8_32; 892 Vceq, [], All (3, Qreg), "vceqQ", cmp_sign_invar, P8 :: F32 :: su_8_32; 893 894 (* Comparison, greater-than or equal. *) 895 Vcge, [], All (3, Dreg), "vcge", cmp_sign_matters, F32 :: s_8_32; 896 Vcge, [Instruction_name ["vcge"]; Builtin_name "vcgeu"], 897 All (3, Dreg), "vcge", cmp_sign_matters, 898 u_8_32; 899 Vcge, [], All (3, Qreg), "vcgeQ", cmp_sign_matters, F32 :: s_8_32; 900 Vcge, [Instruction_name ["vcge"]; Builtin_name "vcgeu"], 901 All (3, Qreg), "vcgeQ", cmp_sign_matters, 902 u_8_32; 903 904 (* Comparison, less-than or equal. *) 905 Vcle, [Flipped "vcge"], All (3, Dreg), "vcle", cmp_sign_matters, 906 F32 :: s_8_32; 907 Vcle, [Instruction_name ["vcge"]; Flipped "vcgeu"], 908 All (3, Dreg), "vcle", cmp_sign_matters, 909 u_8_32; 910 Vcle, [Instruction_name ["vcge"]; Flipped "vcgeQ"], 911 All (3, Qreg), "vcleQ", cmp_sign_matters, 912 F32 :: s_8_32; 913 Vcle, [Instruction_name ["vcge"]; Flipped "vcgeuQ"], 914 All (3, Qreg), "vcleQ", cmp_sign_matters, 915 u_8_32; 916 917 (* Comparison, greater-than. *) 918 Vcgt, [], All (3, Dreg), "vcgt", cmp_sign_matters, F32 :: s_8_32; 919 Vcgt, [Instruction_name ["vcgt"]; Builtin_name "vcgtu"], 920 All (3, Dreg), "vcgt", cmp_sign_matters, 921 u_8_32; 922 Vcgt, [], All (3, Qreg), "vcgtQ", cmp_sign_matters, F32 :: s_8_32; 923 Vcgt, [Instruction_name ["vcgt"]; Builtin_name "vcgtu"], 924 All (3, Qreg), "vcgtQ", cmp_sign_matters, 925 u_8_32; 926 927 (* Comparison, less-than. *) 928 Vclt, [Flipped "vcgt"], All (3, Dreg), "vclt", cmp_sign_matters, 929 F32 :: s_8_32; 930 Vclt, [Instruction_name ["vcgt"]; Flipped "vcgtu"], 931 All (3, Dreg), "vclt", cmp_sign_matters, 932 u_8_32; 933 Vclt, [Instruction_name ["vcgt"]; Flipped "vcgtQ"], 934 All (3, Qreg), "vcltQ", cmp_sign_matters, 935 F32 :: s_8_32; 936 Vclt, [Instruction_name ["vcgt"]; Flipped "vcgtuQ"], 937 All (3, Qreg), "vcltQ", cmp_sign_matters, 938 u_8_32; 939 940 (* Compare absolute greater-than or equal. *) 941 Vcage, [Instruction_name ["vacge"]], 942 All (3, Dreg), "vcage", cmp_sign_matters, [F32]; 943 Vcage, [Instruction_name ["vacge"]], 944 All (3, Qreg), "vcageQ", cmp_sign_matters, [F32]; 945 946 (* Compare absolute less-than or equal. *) 947 Vcale, [Instruction_name ["vacge"]; Flipped "vcage"], 948 All (3, Dreg), "vcale", cmp_sign_matters, [F32]; 949 Vcale, [Instruction_name ["vacge"]; Flipped "vcageQ"], 950 All (3, Qreg), "vcaleQ", cmp_sign_matters, [F32]; 951 952 (* Compare absolute greater-than or equal. *) 953 Vcagt, [Instruction_name ["vacgt"]], 954 All (3, Dreg), "vcagt", cmp_sign_matters, [F32]; 955 Vcagt, [Instruction_name ["vacgt"]], 956 All (3, Qreg), "vcagtQ", cmp_sign_matters, [F32]; 957 958 (* Compare absolute less-than or equal. *) 959 Vcalt, [Instruction_name ["vacgt"]; Flipped "vcagt"], 960 All (3, Dreg), "vcalt", cmp_sign_matters, [F32]; 961 Vcalt, [Instruction_name ["vacgt"]; Flipped "vcagtQ"], 962 All (3, Qreg), "vcaltQ", cmp_sign_matters, [F32]; 963 964 (* Test bits. *) 965 Vtst, [], All (3, Dreg), "vtst", cmp_bits, P8 :: su_8_32; 966 Vtst, [], All (3, Qreg), "vtstQ", cmp_bits, P8 :: su_8_32; 967 968 (* Absolute difference. *) 969 Vabd, [], All (3, Dreg), "vabd", elts_same_2, F32 :: su_8_32; 970 Vabd, [], All (3, Qreg), "vabdQ", elts_same_2, F32 :: su_8_32; 971 Vabd, [], Long, "vabdl", elts_same_2, su_8_32; 972 973 (* Absolute difference and accumulate. *) 974 Vaba, [], All (3, Dreg), "vaba", elts_same_io, su_8_32; 975 Vaba, [], All (3, Qreg), "vabaQ", elts_same_io, su_8_32; 976 Vaba, [], Long, "vabal", elts_same_io, su_8_32; 977 978 (* Max. *) 979 Vmax, [], All (3, Dreg), "vmax", elts_same_2, F32 :: su_8_32; 980 Vmax, [], All (3, Qreg), "vmaxQ", elts_same_2, F32 :: su_8_32; 981 982 (* Min. *) 983 Vmin, [], All (3, Dreg), "vmin", elts_same_2, F32 :: su_8_32; 984 Vmin, [], All (3, Qreg), "vminQ", elts_same_2, F32 :: su_8_32; 985 986 (* Pairwise add. *) 987 Vpadd, [], All (3, Dreg), "vpadd", sign_invar_2, F32 :: su_8_32; 988 Vpadd, [], Long_noreg Dreg, "vpaddl", elts_same_1, su_8_32; 989 Vpadd, [], Long_noreg Qreg, "vpaddlQ", elts_same_1, su_8_32; 990 991 (* Pairwise add, widen and accumulate. *) 992 Vpada, [], Wide_noreg Dreg, "vpadal", elts_same_2, su_8_32; 993 Vpada, [], Wide_noreg Qreg, "vpadalQ", elts_same_2, su_8_32; 994 995 (* Folding maximum, minimum. *) 996 Vpmax, [], All (3, Dreg), "vpmax", elts_same_2, F32 :: su_8_32; 997 Vpmin, [], All (3, Dreg), "vpmin", elts_same_2, F32 :: su_8_32; 998 999 (* Reciprocal step. *) 1000 Vrecps, [], All (3, Dreg), "vrecps", elts_same_2, [F32]; 1001 Vrecps, [], All (3, Qreg), "vrecpsQ", elts_same_2, [F32]; 1002 Vrsqrts, [], All (3, Dreg), "vrsqrts", elts_same_2, [F32]; 1003 Vrsqrts, [], All (3, Qreg), "vrsqrtsQ", elts_same_2, [F32]; 1004 1005 (* Vector shift left. *) 1006 Vshl, [], All (3, Dreg), "vshl", reg_shift, su_8_64; 1007 Vshl, [], All (3, Qreg), "vshlQ", reg_shift, su_8_64; 1008 Vshl, [Instruction_name ["vrshl"]; Rounding], 1009 All (3, Dreg), "vRshl", reg_shift, su_8_64; 1010 Vshl, [Instruction_name ["vrshl"]; Rounding], 1011 All (3, Qreg), "vRshlQ", reg_shift, su_8_64; 1012 Vshl, [Saturating], All (3, Dreg), "vqshl", reg_shift, su_8_64; 1013 Vshl, [Saturating], All (3, Qreg), "vqshlQ", reg_shift, su_8_64; 1014 Vshl, [Instruction_name ["vqrshl"]; Saturating; Rounding], 1015 All (3, Dreg), "vqRshl", reg_shift, su_8_64; 1016 Vshl, [Instruction_name ["vqrshl"]; Saturating; Rounding], 1017 All (3, Qreg), "vqRshlQ", reg_shift, su_8_64; 1018 1019 (* Vector shift right by constant. *) 1020 Vshr_n, [], Binary_imm Dreg, "vshr_n", shift_right, su_8_64; 1021 Vshr_n, [], Binary_imm Qreg, "vshrQ_n", shift_right, su_8_64; 1022 Vshr_n, [Instruction_name ["vrshr"]; Rounding], Binary_imm Dreg, 1023 "vRshr_n", shift_right, su_8_64; 1024 Vshr_n, [Instruction_name ["vrshr"]; Rounding], Binary_imm Qreg, 1025 "vRshrQ_n", shift_right, su_8_64; 1026 Vshr_n, [], Narrow_imm, "vshrn_n", shift_right_sign_invar, su_16_64; 1027 Vshr_n, [Instruction_name ["vrshrn"]; Rounding], Narrow_imm, "vRshrn_n", 1028 shift_right_sign_invar, su_16_64; 1029 Vshr_n, [Saturating], Narrow_imm, "vqshrn_n", shift_right, su_16_64; 1030 Vshr_n, [Instruction_name ["vqrshrn"]; Saturating; Rounding], Narrow_imm, 1031 "vqRshrn_n", shift_right, su_16_64; 1032 Vshr_n, [Saturating; Dst_unsign], Narrow_imm, "vqshrun_n", 1033 shift_right_to_uns, [S16; S32; S64]; 1034 Vshr_n, [Instruction_name ["vqrshrun"]; Saturating; Dst_unsign; Rounding], 1035 Narrow_imm, "vqRshrun_n", shift_right_to_uns, [S16; S32; S64]; 1036 1037 (* Vector shift left by constant. *) 1038 Vshl_n, [], Binary_imm Dreg, "vshl_n", shift_left_sign_invar, su_8_64; 1039 Vshl_n, [], Binary_imm Qreg, "vshlQ_n", shift_left_sign_invar, su_8_64; 1040 Vshl_n, [Saturating], Binary_imm Dreg, "vqshl_n", shift_left, su_8_64; 1041 Vshl_n, [Saturating], Binary_imm Qreg, "vqshlQ_n", shift_left, su_8_64; 1042 Vshl_n, [Saturating; Dst_unsign], Binary_imm Dreg, "vqshlu_n", 1043 shift_left_to_uns, [S8; S16; S32; S64]; 1044 Vshl_n, [Saturating; Dst_unsign], Binary_imm Qreg, "vqshluQ_n", 1045 shift_left_to_uns, [S8; S16; S32; S64]; 1046 Vshl_n, [], Long_imm, "vshll_n", shift_left, su_8_32; 1047 1048 (* Vector shift right by constant and accumulate. *) 1049 Vsra_n, [], Binary_imm Dreg, "vsra_n", shift_right_acc, su_8_64; 1050 Vsra_n, [], Binary_imm Qreg, "vsraQ_n", shift_right_acc, su_8_64; 1051 Vsra_n, [Instruction_name ["vrsra"]; Rounding], Binary_imm Dreg, 1052 "vRsra_n", shift_right_acc, su_8_64; 1053 Vsra_n, [Instruction_name ["vrsra"]; Rounding], Binary_imm Qreg, 1054 "vRsraQ_n", shift_right_acc, su_8_64; 1055 1056 (* Vector shift right and insert. *) 1057 Vsri, [Requires_feature "CRYPTO"], Use_operands [| Dreg; Dreg; Immed |], "vsri_n", shift_insert, 1058 [P64]; 1059 Vsri, [], Use_operands [| Dreg; Dreg; Immed |], "vsri_n", shift_insert, 1060 P8 :: P16 :: su_8_64; 1061 Vsri, [Requires_feature "CRYPTO"], Use_operands [| Qreg; Qreg; Immed |], "vsriQ_n", shift_insert, 1062 [P64]; 1063 Vsri, [], Use_operands [| Qreg; Qreg; Immed |], "vsriQ_n", shift_insert, 1064 P8 :: P16 :: su_8_64; 1065 1066 (* Vector shift left and insert. *) 1067 Vsli, [Requires_feature "CRYPTO"], Use_operands [| Dreg; Dreg; Immed |], "vsli_n", shift_insert, 1068 [P64]; 1069 Vsli, [], Use_operands [| Dreg; Dreg; Immed |], "vsli_n", shift_insert, 1070 P8 :: P16 :: su_8_64; 1071 Vsli, [Requires_feature "CRYPTO"], Use_operands [| Qreg; Qreg; Immed |], "vsliQ_n", shift_insert, 1072 [P64]; 1073 Vsli, [], Use_operands [| Qreg; Qreg; Immed |], "vsliQ_n", shift_insert, 1074 P8 :: P16 :: su_8_64; 1075 1076 (* Absolute value. *) 1077 Vabs, [], All (2, Dreg), "vabs", elts_same_1, [S8; S16; S32; F32]; 1078 Vabs, [], All (2, Qreg), "vabsQ", elts_same_1, [S8; S16; S32; F32]; 1079 Vabs, [Saturating], All (2, Dreg), "vqabs", elts_same_1, [S8; S16; S32]; 1080 Vabs, [Saturating], All (2, Qreg), "vqabsQ", elts_same_1, [S8; S16; S32]; 1081 1082 (* Negate. *) 1083 Vneg, [], All (2, Dreg), "vneg", elts_same_1, [S8; S16; S32; F32]; 1084 Vneg, [], All (2, Qreg), "vnegQ", elts_same_1, [S8; S16; S32; F32]; 1085 Vneg, [Saturating], All (2, Dreg), "vqneg", elts_same_1, [S8; S16; S32]; 1086 Vneg, [Saturating], All (2, Qreg), "vqnegQ", elts_same_1, [S8; S16; S32]; 1087 1088 (* Bitwise not. *) 1089 Vmvn, [], All (2, Dreg), "vmvn", notype_1, P8 :: su_8_32; 1090 Vmvn, [], All (2, Qreg), "vmvnQ", notype_1, P8 :: su_8_32; 1091 1092 (* Count leading sign bits. *) 1093 Vcls, [], All (2, Dreg), "vcls", elts_same_1, [S8; S16; S32]; 1094 Vcls, [], All (2, Qreg), "vclsQ", elts_same_1, [S8; S16; S32]; 1095 1096 (* Count leading zeros. *) 1097 Vclz, [], All (2, Dreg), "vclz", sign_invar_1, su_8_32; 1098 Vclz, [], All (2, Qreg), "vclzQ", sign_invar_1, su_8_32; 1099 1100 (* Count number of set bits. *) 1101 Vcnt, [], All (2, Dreg), "vcnt", bits_1, [P8; S8; U8]; 1102 Vcnt, [], All (2, Qreg), "vcntQ", bits_1, [P8; S8; U8]; 1103 1104 (* Reciprocal estimate. *) 1105 Vrecpe, [], All (2, Dreg), "vrecpe", elts_same_1, [U32; F32]; 1106 Vrecpe, [], All (2, Qreg), "vrecpeQ", elts_same_1, [U32; F32]; 1107 1108 (* Reciprocal square-root estimate. *) 1109 Vrsqrte, [], All (2, Dreg), "vrsqrte", elts_same_1, [U32; F32]; 1110 Vrsqrte, [], All (2, Qreg), "vrsqrteQ", elts_same_1, [U32; F32]; 1111 1112 (* Get lanes from a vector. *) 1113 Vget_lane, 1114 [InfoWord; Disassembles_as [Use_operands [| Corereg; Element_of_dreg |]]; 1115 Instruction_name ["vmov"]], 1116 Use_operands [| Corereg; Dreg; Immed |], 1117 "vget_lane", get_lane, pf_su_8_32; 1118 Vget_lane, 1119 [No_op; 1120 InfoWord; 1121 Disassembles_as [Use_operands [| Corereg; Corereg; Dreg |]]; 1122 Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)], 1123 Use_operands [| Corereg; Dreg; Immed |], 1124 "vget_lane", notype_2, [S64; U64]; 1125 Vget_lane, 1126 [InfoWord; Disassembles_as [Use_operands [| Corereg; Element_of_dreg |]]; 1127 Instruction_name ["vmov"]], 1128 Use_operands [| Corereg; Qreg; Immed |], 1129 "vgetQ_lane", get_lane, pf_su_8_32; 1130 Vget_lane, 1131 [InfoWord; 1132 Disassembles_as [Use_operands [| Corereg; Corereg; Dreg |]]; 1133 Instruction_name ["vmov"; "fmrrd"]; Const_valuator (fun _ -> 0); 1134 Fixed_core_reg], 1135 Use_operands [| Corereg; Qreg; Immed |], 1136 "vgetQ_lane", notype_2, [S64; U64]; 1137 1138 (* Set lanes in a vector. *) 1139 Vset_lane, [Disassembles_as [Use_operands [| Element_of_dreg; Corereg |]]; 1140 Instruction_name ["vmov"]], 1141 Use_operands [| Dreg; Corereg; Dreg; Immed |], "vset_lane", 1142 set_lane, pf_su_8_32; 1143 Vset_lane, [No_op; 1144 Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]]; 1145 Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)], 1146 Use_operands [| Dreg; Corereg; Dreg; Immed |], "vset_lane", 1147 set_lane_notype, [S64; U64]; 1148 Vset_lane, [Disassembles_as [Use_operands [| Element_of_dreg; Corereg |]]; 1149 Instruction_name ["vmov"]], 1150 Use_operands [| Qreg; Corereg; Qreg; Immed |], "vsetQ_lane", 1151 set_lane, pf_su_8_32; 1152 Vset_lane, [Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]]; 1153 Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)], 1154 Use_operands [| Qreg; Corereg; Qreg; Immed |], "vsetQ_lane", 1155 set_lane_notype, [S64; U64]; 1156 1157 (* Create vector from literal bit pattern. *) 1158 Vcreate, 1159 [Requires_feature "CRYPTO"; No_op], (* Not really, but it can yield various things that are too 1160 hard for the test generator at this time. *) 1161 Use_operands [| Dreg; Corereg |], "vcreate", create_vector, 1162 [P64]; 1163 Vcreate, 1164 [No_op], (* Not really, but it can yield various things that are too 1165 hard for the test generator at this time. *) 1166 Use_operands [| Dreg; Corereg |], "vcreate", create_vector, 1167 pf_su_8_64; 1168 1169 (* Set all lanes to the same value. *) 1170 Vdup_n, 1171 [Disassembles_as [Use_operands [| Dreg; 1172 Alternatives [ Corereg; 1173 Element_of_dreg ] |]]], 1174 Use_operands [| Dreg; Corereg |], "vdup_n", bits_1, 1175 pf_su_8_32; 1176 Vdup_n, 1177 [No_op; Requires_feature "CRYPTO"; 1178 Instruction_name ["vmov"]; 1179 Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]]], 1180 Use_operands [| Dreg; Corereg |], "vdup_n", notype_1, 1181 [P64]; 1182 Vdup_n, 1183 [No_op; 1184 Instruction_name ["vmov"]; 1185 Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]]], 1186 Use_operands [| Dreg; Corereg |], "vdup_n", notype_1, 1187 [S64; U64]; 1188 Vdup_n, 1189 [No_op; Requires_feature "CRYPTO"; 1190 Disassembles_as [Use_operands [| Qreg; 1191 Alternatives [ Corereg; 1192 Element_of_dreg ] |]]], 1193 Use_operands [| Qreg; Corereg |], "vdupQ_n", bits_1, 1194 [P64]; 1195 Vdup_n, 1196 [Disassembles_as [Use_operands [| Qreg; 1197 Alternatives [ Corereg; 1198 Element_of_dreg ] |]]], 1199 Use_operands [| Qreg; Corereg |], "vdupQ_n", bits_1, 1200 pf_su_8_32; 1201 Vdup_n, 1202 [No_op; 1203 Instruction_name ["vmov"]; 1204 Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]; 1205 Use_operands [| Dreg; Corereg; Corereg |]]], 1206 Use_operands [| Qreg; Corereg |], "vdupQ_n", notype_1, 1207 [S64; U64]; 1208 1209 (* These are just aliases for the above. *) 1210 Vmov_n, 1211 [Builtin_name "vdup_n"; 1212 Disassembles_as [Use_operands [| Dreg; 1213 Alternatives [ Corereg; 1214 Element_of_dreg ] |]]], 1215 Use_operands [| Dreg; Corereg |], 1216 "vmov_n", bits_1, pf_su_8_32; 1217 Vmov_n, 1218 [No_op; 1219 Builtin_name "vdup_n"; 1220 Instruction_name ["vmov"]; 1221 Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]]], 1222 Use_operands [| Dreg; Corereg |], 1223 "vmov_n", notype_1, [S64; U64]; 1224 Vmov_n, 1225 [Builtin_name "vdupQ_n"; 1226 Disassembles_as [Use_operands [| Qreg; 1227 Alternatives [ Corereg; 1228 Element_of_dreg ] |]]], 1229 Use_operands [| Qreg; Corereg |], 1230 "vmovQ_n", bits_1, pf_su_8_32; 1231 Vmov_n, 1232 [No_op; 1233 Builtin_name "vdupQ_n"; 1234 Instruction_name ["vmov"]; 1235 Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]; 1236 Use_operands [| Dreg; Corereg; Corereg |]]], 1237 Use_operands [| Qreg; Corereg |], 1238 "vmovQ_n", notype_1, [S64; U64]; 1239 1240 (* Duplicate, lane version. We can't use Use_operands here because the 1241 rightmost register (always Dreg) would be picked up by find_key_operand, 1242 when we want the leftmost register to be used in this case (otherwise 1243 the modes are indistinguishable in neon.md, etc. *) 1244 Vdup_lane, 1245 [Disassembles_as [Use_operands [| Dreg; Element_of_dreg |]]], 1246 Unary_scalar Dreg, "vdup_lane", bits_2, pf_su_8_32; 1247 Vdup_lane, 1248 [No_op; Requires_feature "CRYPTO"; Const_valuator (fun _ -> 0)], 1249 Unary_scalar Dreg, "vdup_lane", bits_2, [P64]; 1250 Vdup_lane, 1251 [No_op; Const_valuator (fun _ -> 0)], 1252 Unary_scalar Dreg, "vdup_lane", bits_2, [S64; U64]; 1253 Vdup_lane, 1254 [Disassembles_as [Use_operands [| Qreg; Element_of_dreg |]]], 1255 Unary_scalar Qreg, "vdupQ_lane", bits_2, pf_su_8_32; 1256 Vdup_lane, 1257 [No_op; Requires_feature "CRYPTO"; Const_valuator (fun _ -> 0)], 1258 Unary_scalar Qreg, "vdupQ_lane", bits_2, [P64]; 1259 Vdup_lane, 1260 [No_op; Const_valuator (fun _ -> 0)], 1261 Unary_scalar Qreg, "vdupQ_lane", bits_2, [S64; U64]; 1262 1263 (* Combining vectors. *) 1264 Vcombine, [Requires_feature "CRYPTO"; No_op], 1265 Use_operands [| Qreg; Dreg; Dreg |], "vcombine", notype_2, 1266 [P64]; 1267 Vcombine, [No_op], 1268 Use_operands [| Qreg; Dreg; Dreg |], "vcombine", notype_2, 1269 pf_su_8_64; 1270 1271 (* Splitting vectors. *) 1272 Vget_high, [Requires_feature "CRYPTO"; No_op], 1273 Use_operands [| Dreg; Qreg |], "vget_high", 1274 notype_1, [P64]; 1275 Vget_high, [No_op], 1276 Use_operands [| Dreg; Qreg |], "vget_high", 1277 notype_1, pf_su_8_64; 1278 Vget_low, [Instruction_name ["vmov"]; 1279 Disassembles_as [Use_operands [| Dreg; Dreg |]]; 1280 Fixed_vector_reg], 1281 Use_operands [| Dreg; Qreg |], "vget_low", 1282 notype_1, pf_su_8_32; 1283 Vget_low, [Requires_feature "CRYPTO"; No_op], 1284 Use_operands [| Dreg; Qreg |], "vget_low", 1285 notype_1, [P64]; 1286 Vget_low, [No_op], 1287 Use_operands [| Dreg; Qreg |], "vget_low", 1288 notype_1, [S64; U64]; 1289 1290 (* Conversions. *) 1291 Vcvt, [InfoWord], All (2, Dreg), "vcvt", conv_1, 1292 [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)]; 1293 Vcvt, [InfoWord], All (2, Qreg), "vcvtQ", conv_1, 1294 [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)]; 1295 Vcvt, [Builtin_name "vcvt" ; Requires_FP_bit 1], 1296 Use_operands [| Dreg; Qreg; |], "vcvt", conv_1, [Conv (F16, F32)]; 1297 Vcvt, [Builtin_name "vcvt" ; Requires_FP_bit 1], 1298 Use_operands [| Qreg; Dreg; |], "vcvt", conv_1, [Conv (F32, F16)]; 1299 Vcvt_n, [InfoWord], Use_operands [| Dreg; Dreg; Immed |], "vcvt_n", conv_2, 1300 [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)]; 1301 Vcvt_n, [InfoWord], Use_operands [| Qreg; Qreg; Immed |], "vcvtQ_n", conv_2, 1302 [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)]; 1303 1304 (* Move, narrowing. *) 1305 Vmovn, [Disassembles_as [Use_operands [| Dreg; Qreg |]]], 1306 Narrow, "vmovn", sign_invar_1, su_16_64; 1307 Vmovn, [Disassembles_as [Use_operands [| Dreg; Qreg |]]; Saturating], 1308 Narrow, "vqmovn", elts_same_1, su_16_64; 1309 Vmovn, 1310 [Disassembles_as [Use_operands [| Dreg; Qreg |]]; Saturating; Dst_unsign], 1311 Narrow, "vqmovun", dst_unsign_1, 1312 [S16; S32; S64]; 1313 1314 (* Move, long. *) 1315 Vmovl, [Disassembles_as [Use_operands [| Qreg; Dreg |]]], 1316 Long, "vmovl", elts_same_1, su_8_32; 1317 1318 (* Table lookup. *) 1319 Vtbl 1, 1320 [Instruction_name ["vtbl"]; 1321 Disassembles_as [Use_operands [| Dreg; VecArray (1, Dreg); Dreg |]]], 1322 Use_operands [| Dreg; Dreg; Dreg |], "vtbl1", table_2, [U8; S8; P8]; 1323 Vtbl 2, [Instruction_name ["vtbl"]], 1324 Use_operands [| Dreg; VecArray (2, Dreg); Dreg |], "vtbl2", table_2, 1325 [U8; S8; P8]; 1326 Vtbl 3, [Instruction_name ["vtbl"]], 1327 Use_operands [| Dreg; VecArray (3, Dreg); Dreg |], "vtbl3", table_2, 1328 [U8; S8; P8]; 1329 Vtbl 4, [Instruction_name ["vtbl"]], 1330 Use_operands [| Dreg; VecArray (4, Dreg); Dreg |], "vtbl4", table_2, 1331 [U8; S8; P8]; 1332 1333 (* Extended table lookup. *) 1334 Vtbx 1, 1335 [Instruction_name ["vtbx"]; 1336 Disassembles_as [Use_operands [| Dreg; VecArray (1, Dreg); Dreg |]]], 1337 Use_operands [| Dreg; Dreg; Dreg |], "vtbx1", table_io, [U8; S8; P8]; 1338 Vtbx 2, [Instruction_name ["vtbx"]], 1339 Use_operands [| Dreg; VecArray (2, Dreg); Dreg |], "vtbx2", table_io, 1340 [U8; S8; P8]; 1341 Vtbx 3, [Instruction_name ["vtbx"]], 1342 Use_operands [| Dreg; VecArray (3, Dreg); Dreg |], "vtbx3", table_io, 1343 [U8; S8; P8]; 1344 Vtbx 4, [Instruction_name ["vtbx"]], 1345 Use_operands [| Dreg; VecArray (4, Dreg); Dreg |], "vtbx4", table_io, 1346 [U8; S8; P8]; 1347 1348 (* Multiply, lane. (note: these were undocumented at the time of 1349 writing). *) 1350 Vmul_lane, [], By_scalar Dreg, "vmul_lane", sign_invar_2_lane, 1351 [S16; S32; U16; U32; F32]; 1352 Vmul_lane, [], By_scalar Qreg, "vmulQ_lane", sign_invar_2_lane, 1353 [S16; S32; U16; U32; F32]; 1354 1355 (* Multiply-accumulate, lane. *) 1356 Vmla_lane, [], By_scalar Dreg, "vmla_lane", sign_invar_io_lane, 1357 [S16; S32; U16; U32; F32]; 1358 Vmla_lane, [], By_scalar Qreg, "vmlaQ_lane", sign_invar_io_lane, 1359 [S16; S32; U16; U32; F32]; 1360 Vmla_lane, [], Wide_lane, "vmlal_lane", elts_same_io_lane, 1361 [S16; S32; U16; U32]; 1362 Vmla_lane, [Saturating; Doubling], Wide_lane, "vqdmlal_lane", 1363 elts_same_io_lane, [S16; S32]; 1364 1365 (* Multiply-subtract, lane. *) 1366 Vmls_lane, [], By_scalar Dreg, "vmls_lane", sign_invar_io_lane, 1367 [S16; S32; U16; U32; F32]; 1368 Vmls_lane, [], By_scalar Qreg, "vmlsQ_lane", sign_invar_io_lane, 1369 [S16; S32; U16; U32; F32]; 1370 Vmls_lane, [], Wide_lane, "vmlsl_lane", elts_same_io_lane, 1371 [S16; S32; U16; U32]; 1372 Vmls_lane, [Saturating; Doubling], Wide_lane, "vqdmlsl_lane", 1373 elts_same_io_lane, [S16; S32]; 1374 1375 (* Long multiply, lane. *) 1376 Vmull_lane, [], 1377 Wide_lane, "vmull_lane", elts_same_2_lane, [S16; S32; U16; U32]; 1378 1379 (* Saturating doubling long multiply, lane. *) 1380 Vqdmull_lane, [Saturating; Doubling], 1381 Wide_lane, "vqdmull_lane", elts_same_2_lane, [S16; S32]; 1382 1383 (* Saturating doubling long multiply high, lane. *) 1384 Vqdmulh_lane, [Saturating; Halving], 1385 By_scalar Qreg, "vqdmulhQ_lane", elts_same_2_lane, [S16; S32]; 1386 Vqdmulh_lane, [Saturating; Halving], 1387 By_scalar Dreg, "vqdmulh_lane", elts_same_2_lane, [S16; S32]; 1388 Vqdmulh_lane, [Saturating; Halving; Rounding; 1389 Instruction_name ["vqrdmulh"]], 1390 By_scalar Qreg, "vqRdmulhQ_lane", elts_same_2_lane, [S16; S32]; 1391 Vqdmulh_lane, [Saturating; Halving; Rounding; 1392 Instruction_name ["vqrdmulh"]], 1393 By_scalar Dreg, "vqRdmulh_lane", elts_same_2_lane, [S16; S32]; 1394 1395 (* Vector multiply by scalar. *) 1396 Vmul_n, [InfoWord; 1397 Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]], 1398 Use_operands [| Dreg; Dreg; Corereg |], "vmul_n", 1399 sign_invar_2, [S16; S32; U16; U32; F32]; 1400 Vmul_n, [InfoWord; 1401 Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]], 1402 Use_operands [| Qreg; Qreg; Corereg |], "vmulQ_n", 1403 sign_invar_2, [S16; S32; U16; U32; F32]; 1404 1405 (* Vector long multiply by scalar. *) 1406 Vmull_n, [Instruction_name ["vmull"]; 1407 Disassembles_as [Use_operands [| Qreg; Dreg; Element_of_dreg |]]], 1408 Wide_scalar, "vmull_n", 1409 elts_same_2, [S16; S32; U16; U32]; 1410 1411 (* Vector saturating doubling long multiply by scalar. *) 1412 Vqdmull_n, [Saturating; Doubling; 1413 Disassembles_as [Use_operands [| Qreg; Dreg; 1414 Element_of_dreg |]]], 1415 Wide_scalar, "vqdmull_n", 1416 elts_same_2, [S16; S32]; 1417 1418 (* Vector saturating doubling long multiply high by scalar. *) 1419 Vqdmulh_n, 1420 [Saturating; Halving; InfoWord; 1421 Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]], 1422 Use_operands [| Qreg; Qreg; Corereg |], 1423 "vqdmulhQ_n", elts_same_2, [S16; S32]; 1424 Vqdmulh_n, 1425 [Saturating; Halving; InfoWord; 1426 Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]], 1427 Use_operands [| Dreg; Dreg; Corereg |], 1428 "vqdmulh_n", elts_same_2, [S16; S32]; 1429 Vqdmulh_n, 1430 [Saturating; Halving; Rounding; InfoWord; 1431 Instruction_name ["vqrdmulh"]; 1432 Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]], 1433 Use_operands [| Qreg; Qreg; Corereg |], 1434 "vqRdmulhQ_n", elts_same_2, [S16; S32]; 1435 Vqdmulh_n, 1436 [Saturating; Halving; Rounding; InfoWord; 1437 Instruction_name ["vqrdmulh"]; 1438 Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]], 1439 Use_operands [| Dreg; Dreg; Corereg |], 1440 "vqRdmulh_n", elts_same_2, [S16; S32]; 1441 1442 (* Vector multiply-accumulate by scalar. *) 1443 Vmla_n, [InfoWord; 1444 Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]], 1445 Use_operands [| Dreg; Dreg; Corereg |], "vmla_n", 1446 sign_invar_io, [S16; S32; U16; U32; F32]; 1447 Vmla_n, [InfoWord; 1448 Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]], 1449 Use_operands [| Qreg; Qreg; Corereg |], "vmlaQ_n", 1450 sign_invar_io, [S16; S32; U16; U32; F32]; 1451 Vmla_n, [], Wide_scalar, "vmlal_n", elts_same_io, [S16; S32; U16; U32]; 1452 Vmla_n, [Saturating; Doubling], Wide_scalar, "vqdmlal_n", elts_same_io, 1453 [S16; S32]; 1454 1455 (* Vector multiply subtract by scalar. *) 1456 Vmls_n, [InfoWord; 1457 Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]], 1458 Use_operands [| Dreg; Dreg; Corereg |], "vmls_n", 1459 sign_invar_io, [S16; S32; U16; U32; F32]; 1460 Vmls_n, [InfoWord; 1461 Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]], 1462 Use_operands [| Qreg; Qreg; Corereg |], "vmlsQ_n", 1463 sign_invar_io, [S16; S32; U16; U32; F32]; 1464 Vmls_n, [], Wide_scalar, "vmlsl_n", elts_same_io, [S16; S32; U16; U32]; 1465 Vmls_n, [Saturating; Doubling], Wide_scalar, "vqdmlsl_n", elts_same_io, 1466 [S16; S32]; 1467 1468 (* Vector extract. *) 1469 Vext, [Requires_feature "CRYPTO"; Const_valuator (fun _ -> 0)], 1470 Use_operands [| Dreg; Dreg; Dreg; Immed |], "vext", extend, 1471 [P64]; 1472 Vext, [Const_valuator (fun _ -> 0)], 1473 Use_operands [| Dreg; Dreg; Dreg; Immed |], "vext", extend, 1474 pf_su_8_64; 1475 Vext, [Requires_feature "CRYPTO"; Const_valuator (fun _ -> 0)], 1476 Use_operands [| Qreg; Qreg; Qreg; Immed |], "vextQ", extend, 1477 [P64]; 1478 Vext, [Const_valuator (fun _ -> 0)], 1479 Use_operands [| Qreg; Qreg; Qreg; Immed |], "vextQ", extend, 1480 pf_su_8_64; 1481 1482 (* Reverse elements. *) 1483 Vrev64, [Use_shuffle (rev_elems 64)], All (2, Dreg), "vrev64", bits_1, 1484 P8 :: P16 :: F32 :: su_8_32; 1485 Vrev64, [Use_shuffle (rev_elems 64)], All (2, Qreg), "vrev64Q", bits_1, 1486 P8 :: P16 :: F32 :: su_8_32; 1487 Vrev32, [Use_shuffle (rev_elems 32)], All (2, Dreg), "vrev32", bits_1, 1488 [P8; P16; S8; U8; S16; U16]; 1489 Vrev32, [Use_shuffle (rev_elems 32)], All (2, Qreg), "vrev32Q", bits_1, 1490 [P8; P16; S8; U8; S16; U16]; 1491 Vrev16, [Use_shuffle (rev_elems 16)], All (2, Dreg), "vrev16", bits_1, 1492 [P8; S8; U8]; 1493 Vrev16, [Use_shuffle (rev_elems 16)], All (2, Qreg), "vrev16Q", bits_1, 1494 [P8; S8; U8]; 1495 1496 (* Bit selection. *) 1497 Vbsl, 1498 [Requires_feature "CRYPTO"; Instruction_name ["vbsl"; "vbit"; "vbif"]; 1499 Disassembles_as [Use_operands [| Dreg; Dreg; Dreg |]]], 1500 Use_operands [| Dreg; Dreg; Dreg; Dreg |], "vbsl", bit_select, 1501 [P64]; 1502 Vbsl, 1503 [Instruction_name ["vbsl"; "vbit"; "vbif"]; 1504 Disassembles_as [Use_operands [| Dreg; Dreg; Dreg |]]], 1505 Use_operands [| Dreg; Dreg; Dreg; Dreg |], "vbsl", bit_select, 1506 pf_su_8_64; 1507 Vbsl, 1508 [Requires_feature "CRYPTO"; Instruction_name ["vbsl"; "vbit"; "vbif"]; 1509 Disassembles_as [Use_operands [| Qreg; Qreg; Qreg |]]], 1510 Use_operands [| Qreg; Qreg; Qreg; Qreg |], "vbslQ", bit_select, 1511 [P64]; 1512 Vbsl, 1513 [Instruction_name ["vbsl"; "vbit"; "vbif"]; 1514 Disassembles_as [Use_operands [| Qreg; Qreg; Qreg |]]], 1515 Use_operands [| Qreg; Qreg; Qreg; Qreg |], "vbslQ", bit_select, 1516 pf_su_8_64; 1517 1518 Vtrn, [Use_shuffle trn_elems], Pair_result Dreg, "vtrn", bits_2, pf_su_8_16; 1519 Vtrn, [Use_shuffle trn_elems; Instruction_name ["vuzp"]], Pair_result Dreg, "vtrn", bits_2, suf_32; 1520 Vtrn, [Use_shuffle trn_elems], Pair_result Qreg, "vtrnQ", bits_2, pf_su_8_32; 1521 (* Zip elements. *) 1522 Vzip, [Use_shuffle zip_elems], Pair_result Dreg, "vzip", bits_2, pf_su_8_16; 1523 Vzip, [Use_shuffle zip_elems; Instruction_name ["vuzp"]], Pair_result Dreg, "vzip", bits_2, suf_32; 1524 Vzip, [Use_shuffle zip_elems], Pair_result Qreg, "vzipQ", bits_2, pf_su_8_32; 1525 1526 (* Unzip elements. *) 1527 Vuzp, [Use_shuffle uzip_elems], Pair_result Dreg, "vuzp", bits_2, 1528 pf_su_8_32; 1529 Vuzp, [Use_shuffle uzip_elems], Pair_result Qreg, "vuzpQ", bits_2, 1530 pf_su_8_32; 1531 1532 (* Element/structure loads. VLD1 variants. *) 1533 Vldx 1, 1534 [Requires_feature "CRYPTO"; 1535 Disassembles_as [Use_operands [| VecArray (1, Dreg); 1536 CstPtrTo Corereg |]]], 1537 Use_operands [| Dreg; CstPtrTo Corereg |], "vld1", bits_1, 1538 [P64]; 1539 Vldx 1, 1540 [Disassembles_as [Use_operands [| VecArray (1, Dreg); 1541 CstPtrTo Corereg |]]], 1542 Use_operands [| Dreg; CstPtrTo Corereg |], "vld1", bits_1, 1543 pf_su_8_64; 1544 Vldx 1, [Requires_feature "CRYPTO"; 1545 Disassembles_as [Use_operands [| VecArray (2, Dreg); 1546 CstPtrTo Corereg |]]], 1547 Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q", bits_1, 1548 [P64]; 1549 Vldx 1, [Disassembles_as [Use_operands [| VecArray (2, Dreg); 1550 CstPtrTo Corereg |]]], 1551 Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q", bits_1, 1552 pf_su_8_64; 1553 1554 Vldx_lane 1, 1555 [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg); 1556 CstPtrTo Corereg |]]], 1557 Use_operands [| Dreg; CstPtrTo Corereg; Dreg; Immed |], 1558 "vld1_lane", bits_3, pf_su_8_32; 1559 Vldx_lane 1, 1560 [Requires_feature "CRYPTO"; 1561 Disassembles_as [Use_operands [| VecArray (1, Dreg); 1562 CstPtrTo Corereg |]]; 1563 Const_valuator (fun _ -> 0)], 1564 Use_operands [| Dreg; CstPtrTo Corereg; Dreg; Immed |], 1565 "vld1_lane", bits_3, [P64]; 1566 Vldx_lane 1, 1567 [Disassembles_as [Use_operands [| VecArray (1, Dreg); 1568 CstPtrTo Corereg |]]; 1569 Const_valuator (fun _ -> 0)], 1570 Use_operands [| Dreg; CstPtrTo Corereg; Dreg; Immed |], 1571 "vld1_lane", bits_3, [S64; U64]; 1572 Vldx_lane 1, 1573 [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg); 1574 CstPtrTo Corereg |]]], 1575 Use_operands [| Qreg; CstPtrTo Corereg; Qreg; Immed |], 1576 "vld1Q_lane", bits_3, pf_su_8_32; 1577 Vldx_lane 1, 1578 [Requires_feature "CRYPTO"; 1579 Disassembles_as [Use_operands [| VecArray (1, Dreg); 1580 CstPtrTo Corereg |]]], 1581 Use_operands [| Qreg; CstPtrTo Corereg; Qreg; Immed |], 1582 "vld1Q_lane", bits_3, [P64]; 1583 Vldx_lane 1, 1584 [Disassembles_as [Use_operands [| VecArray (1, Dreg); 1585 CstPtrTo Corereg |]]], 1586 Use_operands [| Qreg; CstPtrTo Corereg; Qreg; Immed |], 1587 "vld1Q_lane", bits_3, [S64; U64]; 1588 1589 Vldx_dup 1, 1590 [Disassembles_as [Use_operands [| VecArray (1, All_elements_of_dreg); 1591 CstPtrTo Corereg |]]], 1592 Use_operands [| Dreg; CstPtrTo Corereg |], "vld1_dup", 1593 bits_1, pf_su_8_32; 1594 Vldx_dup 1, 1595 [Requires_feature "CRYPTO"; 1596 Disassembles_as [Use_operands [| VecArray (1, Dreg); 1597 CstPtrTo Corereg |]]], 1598 Use_operands [| Dreg; CstPtrTo Corereg |], "vld1_dup", 1599 bits_1, [P64]; 1600 Vldx_dup 1, 1601 [Disassembles_as [Use_operands [| VecArray (1, Dreg); 1602 CstPtrTo Corereg |]]], 1603 Use_operands [| Dreg; CstPtrTo Corereg |], "vld1_dup", 1604 bits_1, [S64; U64]; 1605 Vldx_dup 1, 1606 [Disassembles_as [Use_operands [| VecArray (2, All_elements_of_dreg); 1607 CstPtrTo Corereg |]]], 1608 Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q_dup", 1609 bits_1, pf_su_8_32; 1610 (* Treated identically to vld1_dup above as we now 1611 do a single load followed by a duplicate. *) 1612 Vldx_dup 1, 1613 [Requires_feature "CRYPTO"; 1614 Disassembles_as [Use_operands [| VecArray (1, Dreg); 1615 CstPtrTo Corereg |]]], 1616 Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q_dup", 1617 bits_1, [P64]; 1618 Vldx_dup 1, 1619 [Disassembles_as [Use_operands [| VecArray (1, Dreg); 1620 CstPtrTo Corereg |]]], 1621 Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q_dup", 1622 bits_1, [S64; U64]; 1623 1624 (* VST1 variants. *) 1625 Vstx 1, [Requires_feature "CRYPTO"; 1626 Disassembles_as [Use_operands [| VecArray (1, Dreg); 1627 PtrTo Corereg |]]], 1628 Use_operands [| PtrTo Corereg; Dreg |], "vst1", 1629 store_1, [P64]; 1630 Vstx 1, [Disassembles_as [Use_operands [| VecArray (1, Dreg); 1631 PtrTo Corereg |]]], 1632 Use_operands [| PtrTo Corereg; Dreg |], "vst1", 1633 store_1, pf_su_8_64; 1634 Vstx 1, [Requires_feature "CRYPTO"; 1635 Disassembles_as [Use_operands [| VecArray (2, Dreg); 1636 PtrTo Corereg |]]], 1637 Use_operands [| PtrTo Corereg; Qreg |], "vst1Q", 1638 store_1, [P64]; 1639 Vstx 1, [Disassembles_as [Use_operands [| VecArray (2, Dreg); 1640 PtrTo Corereg |]]], 1641 Use_operands [| PtrTo Corereg; Qreg |], "vst1Q", 1642 store_1, pf_su_8_64; 1643 1644 Vstx_lane 1, 1645 [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg); 1646 CstPtrTo Corereg |]]], 1647 Use_operands [| PtrTo Corereg; Dreg; Immed |], 1648 "vst1_lane", store_3, pf_su_8_32; 1649 Vstx_lane 1, 1650 [Requires_feature "CRYPTO"; 1651 Disassembles_as [Use_operands [| VecArray (1, Dreg); 1652 CstPtrTo Corereg |]]; 1653 Const_valuator (fun _ -> 0)], 1654 Use_operands [| PtrTo Corereg; Dreg; Immed |], 1655 "vst1_lane", store_3, [P64]; 1656 Vstx_lane 1, 1657 [Disassembles_as [Use_operands [| VecArray (1, Dreg); 1658 CstPtrTo Corereg |]]; 1659 Const_valuator (fun _ -> 0)], 1660 Use_operands [| PtrTo Corereg; Dreg; Immed |], 1661 "vst1_lane", store_3, [U64; S64]; 1662 Vstx_lane 1, 1663 [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg); 1664 CstPtrTo Corereg |]]], 1665 Use_operands [| PtrTo Corereg; Qreg; Immed |], 1666 "vst1Q_lane", store_3, pf_su_8_32; 1667 Vstx_lane 1, 1668 [Requires_feature "CRYPTO"; 1669 Disassembles_as [Use_operands [| VecArray (1, Dreg); 1670 CstPtrTo Corereg |]]], 1671 Use_operands [| PtrTo Corereg; Qreg; Immed |], 1672 "vst1Q_lane", store_3, [P64]; 1673 Vstx_lane 1, 1674 [Disassembles_as [Use_operands [| VecArray (1, Dreg); 1675 CstPtrTo Corereg |]]], 1676 Use_operands [| PtrTo Corereg; Qreg; Immed |], 1677 "vst1Q_lane", store_3, [U64; S64]; 1678 1679 (* VLD2 variants. *) 1680 Vldx 2, [], Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |], 1681 "vld2", bits_1, pf_su_8_32; 1682 Vldx 2, [Requires_feature "CRYPTO"; Instruction_name ["vld1"]], 1683 Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |], 1684 "vld2", bits_1, [P64]; 1685 Vldx 2, [Instruction_name ["vld1"]], 1686 Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |], 1687 "vld2", bits_1, [S64; U64]; 1688 Vldx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg); 1689 CstPtrTo Corereg |]; 1690 Use_operands [| VecArray (2, Dreg); 1691 CstPtrTo Corereg |]]], 1692 Use_operands [| VecArray (2, Qreg); CstPtrTo Corereg |], 1693 "vld2Q", bits_1, pf_su_8_32; 1694 1695 Vldx_lane 2, 1696 [Disassembles_as [Use_operands 1697 [| VecArray (2, Element_of_dreg); 1698 CstPtrTo Corereg |]]], 1699 Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg; 1700 VecArray (2, Dreg); Immed |], 1701 "vld2_lane", bits_3, P8 :: P16 :: F32 :: su_8_32; 1702 Vldx_lane 2, 1703 [Disassembles_as [Use_operands 1704 [| VecArray (2, Element_of_dreg); 1705 CstPtrTo Corereg |]]], 1706 Use_operands [| VecArray (2, Qreg); CstPtrTo Corereg; 1707 VecArray (2, Qreg); Immed |], 1708 "vld2Q_lane", bits_3, [P16; F32; U16; U32; S16; S32]; 1709 1710 Vldx_dup 2, 1711 [Disassembles_as [Use_operands 1712 [| VecArray (2, All_elements_of_dreg); CstPtrTo Corereg |]]], 1713 Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |], 1714 "vld2_dup", bits_1, pf_su_8_32; 1715 Vldx_dup 2, 1716 [Requires_feature "CRYPTO"; 1717 Instruction_name ["vld1"]; Disassembles_as [Use_operands 1718 [| VecArray (2, Dreg); CstPtrTo Corereg |]]], 1719 Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |], 1720 "vld2_dup", bits_1, [P64]; 1721 Vldx_dup 2, 1722 [Instruction_name ["vld1"]; Disassembles_as [Use_operands 1723 [| VecArray (2, Dreg); CstPtrTo Corereg |]]], 1724 Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |], 1725 "vld2_dup", bits_1, [S64; U64]; 1726 1727 (* VST2 variants. *) 1728 Vstx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg); 1729 PtrTo Corereg |]]], 1730 Use_operands [| PtrTo Corereg; VecArray (2, Dreg) |], "vst2", 1731 store_1, pf_su_8_32; 1732 Vstx 2, [Requires_feature "CRYPTO"; 1733 Disassembles_as [Use_operands [| VecArray (2, Dreg); 1734 PtrTo Corereg |]]; 1735 Instruction_name ["vst1"]], 1736 Use_operands [| PtrTo Corereg; VecArray (2, Dreg) |], "vst2", 1737 store_1, [P64]; 1738 Vstx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg); 1739 PtrTo Corereg |]]; 1740 Instruction_name ["vst1"]], 1741 Use_operands [| PtrTo Corereg; VecArray (2, Dreg) |], "vst2", 1742 store_1, [S64; U64]; 1743 Vstx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg); 1744 PtrTo Corereg |]; 1745 Use_operands [| VecArray (2, Dreg); 1746 PtrTo Corereg |]]], 1747 Use_operands [| PtrTo Corereg; VecArray (2, Qreg) |], "vst2Q", 1748 store_1, pf_su_8_32; 1749 1750 Vstx_lane 2, 1751 [Disassembles_as [Use_operands 1752 [| VecArray (2, Element_of_dreg); 1753 CstPtrTo Corereg |]]], 1754 Use_operands [| PtrTo Corereg; VecArray (2, Dreg); Immed |], "vst2_lane", 1755 store_3, P8 :: P16 :: F32 :: su_8_32; 1756 Vstx_lane 2, 1757 [Disassembles_as [Use_operands 1758 [| VecArray (2, Element_of_dreg); 1759 CstPtrTo Corereg |]]], 1760 Use_operands [| PtrTo Corereg; VecArray (2, Qreg); Immed |], "vst2Q_lane", 1761 store_3, [P16; F32; U16; U32; S16; S32]; 1762 1763 (* VLD3 variants. *) 1764 Vldx 3, [], Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |], 1765 "vld3", bits_1, pf_su_8_32; 1766 Vldx 3, [Requires_feature "CRYPTO"; Instruction_name ["vld1"]], 1767 Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |], 1768 "vld3", bits_1, [P64]; 1769 Vldx 3, [Instruction_name ["vld1"]], 1770 Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |], 1771 "vld3", bits_1, [S64; U64]; 1772 Vldx 3, [Disassembles_as [Use_operands [| VecArray (3, Dreg); 1773 CstPtrTo Corereg |]; 1774 Use_operands [| VecArray (3, Dreg); 1775 CstPtrTo Corereg |]]], 1776 Use_operands [| VecArray (3, Qreg); CstPtrTo Corereg |], 1777 "vld3Q", bits_1, P8 :: P16 :: F32 :: su_8_32; 1778 1779 Vldx_lane 3, 1780 [Disassembles_as [Use_operands 1781 [| VecArray (3, Element_of_dreg); 1782 CstPtrTo Corereg |]]], 1783 Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg; 1784 VecArray (3, Dreg); Immed |], 1785 "vld3_lane", bits_3, P8 :: P16 :: F32 :: su_8_32; 1786 Vldx_lane 3, 1787 [Disassembles_as [Use_operands 1788 [| VecArray (3, Element_of_dreg); 1789 CstPtrTo Corereg |]]], 1790 Use_operands [| VecArray (3, Qreg); CstPtrTo Corereg; 1791 VecArray (3, Qreg); Immed |], 1792 "vld3Q_lane", bits_3, [P16; F32; U16; U32; S16; S32]; 1793 1794 Vldx_dup 3, 1795 [Disassembles_as [Use_operands 1796 [| VecArray (3, All_elements_of_dreg); CstPtrTo Corereg |]]], 1797 Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |], 1798 "vld3_dup", bits_1, pf_su_8_32; 1799 Vldx_dup 3, 1800 [Requires_feature "CRYPTO"; 1801 Instruction_name ["vld1"]; Disassembles_as [Use_operands 1802 [| VecArray (3, Dreg); CstPtrTo Corereg |]]], 1803 Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |], 1804 "vld3_dup", bits_1, [P64]; 1805 Vldx_dup 3, 1806 [Instruction_name ["vld1"]; Disassembles_as [Use_operands 1807 [| VecArray (3, Dreg); CstPtrTo Corereg |]]], 1808 Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |], 1809 "vld3_dup", bits_1, [S64; U64]; 1810 1811 (* VST3 variants. *) 1812 Vstx 3, [Disassembles_as [Use_operands [| VecArray (4, Dreg); 1813 PtrTo Corereg |]]], 1814 Use_operands [| PtrTo Corereg; VecArray (3, Dreg) |], "vst3", 1815 store_1, pf_su_8_32; 1816 Vstx 3, [Requires_feature "CRYPTO"; 1817 Disassembles_as [Use_operands [| VecArray (4, Dreg); 1818 PtrTo Corereg |]]; 1819 Instruction_name ["vst1"]], 1820 Use_operands [| PtrTo Corereg; VecArray (3, Dreg) |], "vst3", 1821 store_1, [P64]; 1822 Vstx 3, [Disassembles_as [Use_operands [| VecArray (4, Dreg); 1823 PtrTo Corereg |]]; 1824 Instruction_name ["vst1"]], 1825 Use_operands [| PtrTo Corereg; VecArray (3, Dreg) |], "vst3", 1826 store_1, [S64; U64]; 1827 Vstx 3, [Disassembles_as [Use_operands [| VecArray (3, Dreg); 1828 PtrTo Corereg |]; 1829 Use_operands [| VecArray (3, Dreg); 1830 PtrTo Corereg |]]], 1831 Use_operands [| PtrTo Corereg; VecArray (3, Qreg) |], "vst3Q", 1832 store_1, pf_su_8_32; 1833 1834 Vstx_lane 3, 1835 [Disassembles_as [Use_operands 1836 [| VecArray (3, Element_of_dreg); 1837 CstPtrTo Corereg |]]], 1838 Use_operands [| PtrTo Corereg; VecArray (3, Dreg); Immed |], "vst3_lane", 1839 store_3, P8 :: P16 :: F32 :: su_8_32; 1840 Vstx_lane 3, 1841 [Disassembles_as [Use_operands 1842 [| VecArray (3, Element_of_dreg); 1843 CstPtrTo Corereg |]]], 1844 Use_operands [| PtrTo Corereg; VecArray (3, Qreg); Immed |], "vst3Q_lane", 1845 store_3, [P16; F32; U16; U32; S16; S32]; 1846 1847 (* VLD4/VST4 variants. *) 1848 Vldx 4, [], Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |], 1849 "vld4", bits_1, pf_su_8_32; 1850 Vldx 4, [Requires_feature "CRYPTO"; Instruction_name ["vld1"]], 1851 Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |], 1852 "vld4", bits_1, [P64]; 1853 Vldx 4, [Instruction_name ["vld1"]], 1854 Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |], 1855 "vld4", bits_1, [S64; U64]; 1856 Vldx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg); 1857 CstPtrTo Corereg |]; 1858 Use_operands [| VecArray (4, Dreg); 1859 CstPtrTo Corereg |]]], 1860 Use_operands [| VecArray (4, Qreg); CstPtrTo Corereg |], 1861 "vld4Q", bits_1, P8 :: P16 :: F32 :: su_8_32; 1862 1863 Vldx_lane 4, 1864 [Disassembles_as [Use_operands 1865 [| VecArray (4, Element_of_dreg); 1866 CstPtrTo Corereg |]]], 1867 Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg; 1868 VecArray (4, Dreg); Immed |], 1869 "vld4_lane", bits_3, P8 :: P16 :: F32 :: su_8_32; 1870 Vldx_lane 4, 1871 [Disassembles_as [Use_operands 1872 [| VecArray (4, Element_of_dreg); 1873 CstPtrTo Corereg |]]], 1874 Use_operands [| VecArray (4, Qreg); CstPtrTo Corereg; 1875 VecArray (4, Qreg); Immed |], 1876 "vld4Q_lane", bits_3, [P16; F32; U16; U32; S16; S32]; 1877 1878 Vldx_dup 4, 1879 [Disassembles_as [Use_operands 1880 [| VecArray (4, All_elements_of_dreg); CstPtrTo Corereg |]]], 1881 Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |], 1882 "vld4_dup", bits_1, pf_su_8_32; 1883 Vldx_dup 4, 1884 [Requires_feature "CRYPTO"; 1885 Instruction_name ["vld1"]; Disassembles_as [Use_operands 1886 [| VecArray (4, Dreg); CstPtrTo Corereg |]]], 1887 Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |], 1888 "vld4_dup", bits_1, [P64]; 1889 Vldx_dup 4, 1890 [Instruction_name ["vld1"]; Disassembles_as [Use_operands 1891 [| VecArray (4, Dreg); CstPtrTo Corereg |]]], 1892 Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |], 1893 "vld4_dup", bits_1, [S64; U64]; 1894 1895 Vstx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg); 1896 PtrTo Corereg |]]], 1897 Use_operands [| PtrTo Corereg; VecArray (4, Dreg) |], "vst4", 1898 store_1, pf_su_8_32; 1899 Vstx 4, [Requires_feature "CRYPTO"; 1900 Disassembles_as [Use_operands [| VecArray (4, Dreg); 1901 PtrTo Corereg |]]; 1902 Instruction_name ["vst1"]], 1903 Use_operands [| PtrTo Corereg; VecArray (4, Dreg) |], "vst4", 1904 store_1, [P64]; 1905 Vstx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg); 1906 PtrTo Corereg |]]; 1907 Instruction_name ["vst1"]], 1908 Use_operands [| PtrTo Corereg; VecArray (4, Dreg) |], "vst4", 1909 store_1, [S64; U64]; 1910 Vstx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg); 1911 PtrTo Corereg |]; 1912 Use_operands [| VecArray (4, Dreg); 1913 PtrTo Corereg |]]], 1914 Use_operands [| PtrTo Corereg; VecArray (4, Qreg) |], "vst4Q", 1915 store_1, pf_su_8_32; 1916 1917 Vstx_lane 4, 1918 [Disassembles_as [Use_operands 1919 [| VecArray (4, Element_of_dreg); 1920 CstPtrTo Corereg |]]], 1921 Use_operands [| PtrTo Corereg; VecArray (4, Dreg); Immed |], "vst4_lane", 1922 store_3, P8 :: P16 :: F32 :: su_8_32; 1923 Vstx_lane 4, 1924 [Disassembles_as [Use_operands 1925 [| VecArray (4, Element_of_dreg); 1926 CstPtrTo Corereg |]]], 1927 Use_operands [| PtrTo Corereg; VecArray (4, Qreg); Immed |], "vst4Q_lane", 1928 store_3, [P16; F32; U16; U32; S16; S32]; 1929 1930 (* Logical operations. And. *) 1931 Vand, [], All (3, Dreg), "vand", notype_2, su_8_32; 1932 Vand, [No_op], All (3, Dreg), "vand", notype_2, [S64; U64]; 1933 Vand, [], All (3, Qreg), "vandQ", notype_2, su_8_64; 1934 1935 (* Or. *) 1936 Vorr, [], All (3, Dreg), "vorr", notype_2, su_8_32; 1937 Vorr, [No_op], All (3, Dreg), "vorr", notype_2, [S64; U64]; 1938 Vorr, [], All (3, Qreg), "vorrQ", notype_2, su_8_64; 1939 1940 (* Eor. *) 1941 Veor, [], All (3, Dreg), "veor", notype_2, su_8_32; 1942 Veor, [No_op], All (3, Dreg), "veor", notype_2, [S64; U64]; 1943 Veor, [], All (3, Qreg), "veorQ", notype_2, su_8_64; 1944 1945 (* Bic (And-not). *) 1946 Vbic, [Compiler_optim "-O2"], All (3, Dreg), "vbic", notype_2, su_8_32; 1947 Vbic, [No_op; Compiler_optim "-O2"], All (3, Dreg), "vbic", notype_2, [S64; U64]; 1948 Vbic, [Compiler_optim "-O2"], All (3, Qreg), "vbicQ", notype_2, su_8_64; 1949 1950 (* Or-not. *) 1951 Vorn, [Compiler_optim "-O2"], All (3, Dreg), "vorn", notype_2, su_8_32; 1952 Vorn, [No_op; Compiler_optim "-O2"], All (3, Dreg), "vorn", notype_2, [S64; U64]; 1953 Vorn, [Compiler_optim "-O2"], All (3, Qreg), "vornQ", notype_2, su_8_64; 1954 ] 1955 1956let type_in_crypto_only t 1957 = (t == P64) || (t == P128) 1958 1959let cross_product s1 s2 1960 = List.filter (fun (e, e') -> e <> e') 1961 (List.concat (List.map (fun e1 -> List.map (fun e2 -> (e1,e2)) s1) s2)) 1962 1963let reinterp = 1964 let elems = P8 :: P16 :: F32 :: P64 :: su_8_64 in 1965 let casts = cross_product elems elems in 1966 List.map 1967 (fun (convto, convfrom) -> 1968 Vreinterp, (if (type_in_crypto_only convto) || (type_in_crypto_only convfrom) 1969 then [Requires_feature "CRYPTO"] else []) @ [No_op], Use_operands [| Dreg; Dreg |], 1970 "vreinterpret", conv_1, [Cast (convto, convfrom)]) 1971 casts 1972 1973let reinterpq = 1974 let elems = P8 :: P16 :: F32 :: P64 :: P128 :: su_8_64 in 1975 let casts = cross_product elems elems in 1976 List.map 1977 (fun (convto, convfrom) -> 1978 Vreinterp, (if (type_in_crypto_only convto) || (type_in_crypto_only convfrom) 1979 then [Requires_feature "CRYPTO"] else []) @ [No_op], Use_operands [| Qreg; Qreg |], 1980 "vreinterpretQ", conv_1, [Cast (convto, convfrom)]) 1981 casts 1982 1983(* Output routines. *) 1984 1985let rec string_of_elt = function 1986 S8 -> "s8" | S16 -> "s16" | S32 -> "s32" | S64 -> "s64" 1987 | U8 -> "u8" | U16 -> "u16" | U32 -> "u32" | U64 -> "u64" 1988 | I8 -> "i8" | I16 -> "i16" | I32 -> "i32" | I64 -> "i64" 1989 | B8 -> "8" | B16 -> "16" | B32 -> "32" | B64 -> "64" 1990 | F16 -> "f16" | F32 -> "f32" | P8 -> "p8" | P16 -> "p16" 1991 | P64 -> "p64" | P128 -> "p128" 1992 | Conv (a, b) | Cast (a, b) -> string_of_elt a ^ "_" ^ string_of_elt b 1993 | NoElts -> failwith "No elts" 1994 1995let string_of_elt_dots elt = 1996 match elt with 1997 Conv (a, b) | Cast (a, b) -> string_of_elt a ^ "." ^ string_of_elt b 1998 | _ -> string_of_elt elt 1999 2000let string_of_vectype vt = 2001 let rec name affix = function 2002 T_int8x8 -> affix "int8x8" 2003 | T_int8x16 -> affix "int8x16" 2004 | T_int16x4 -> affix "int16x4" 2005 | T_int16x8 -> affix "int16x8" 2006 | T_int32x2 -> affix "int32x2" 2007 | T_int32x4 -> affix "int32x4" 2008 | T_int64x1 -> affix "int64x1" 2009 | T_int64x2 -> affix "int64x2" 2010 | T_uint8x8 -> affix "uint8x8" 2011 | T_uint8x16 -> affix "uint8x16" 2012 | T_uint16x4 -> affix "uint16x4" 2013 | T_uint16x8 -> affix "uint16x8" 2014 | T_uint32x2 -> affix "uint32x2" 2015 | T_uint32x4 -> affix "uint32x4" 2016 | T_uint64x1 -> affix "uint64x1" 2017 | T_uint64x2 -> affix "uint64x2" 2018 | T_float16x4 -> affix "float16x4" 2019 | T_float32x2 -> affix "float32x2" 2020 | T_float32x4 -> affix "float32x4" 2021 | T_poly8x8 -> affix "poly8x8" 2022 | T_poly8x16 -> affix "poly8x16" 2023 | T_poly16x4 -> affix "poly16x4" 2024 | T_poly16x8 -> affix "poly16x8" 2025 | T_int8 -> affix "int8" 2026 | T_int16 -> affix "int16" 2027 | T_int32 -> affix "int32" 2028 | T_int64 -> affix "int64" 2029 | T_uint8 -> affix "uint8" 2030 | T_uint16 -> affix "uint16" 2031 | T_uint32 -> affix "uint32" 2032 | T_uint64 -> affix "uint64" 2033 | T_poly8 -> affix "poly8" 2034 | T_poly16 -> affix "poly16" 2035 | T_poly64 -> affix "poly64" 2036 | T_poly64x1 -> affix "poly64x1" 2037 | T_poly64x2 -> affix "poly64x2" 2038 | T_poly128 -> affix "poly128" 2039 | T_float16 -> affix "float16" 2040 | T_float32 -> affix "float32" 2041 | T_immediate _ -> "const int" 2042 | T_void -> "void" 2043 | T_intQI -> "__builtin_neon_qi" 2044 | T_intHI -> "__builtin_neon_hi" 2045 | T_intSI -> "__builtin_neon_si" 2046 | T_intDI -> "__builtin_neon_di" 2047 | T_intTI -> "__builtin_neon_ti" 2048 | T_floatHF -> "__builtin_neon_hf" 2049 | T_floatSF -> "__builtin_neon_sf" 2050 | T_arrayof (num, base) -> 2051 let basename = name (fun x -> x) base in 2052 affix (Printf.sprintf "%sx%d" basename num) 2053 | T_ptrto x -> 2054 let basename = name affix x in 2055 Printf.sprintf "%s *" basename 2056 | T_const x -> 2057 let basename = name affix x in 2058 Printf.sprintf "const %s" basename 2059 in 2060 name (fun x -> x ^ "_t") vt 2061 2062let string_of_inttype = function 2063 B_TImode -> "__builtin_neon_ti" 2064 | B_EImode -> "__builtin_neon_ei" 2065 | B_OImode -> "__builtin_neon_oi" 2066 | B_CImode -> "__builtin_neon_ci" 2067 | B_XImode -> "__builtin_neon_xi" 2068 2069let string_of_mode = function 2070 V8QI -> "v8qi" | V4HI -> "v4hi" | V4HF -> "v4hf" | V2SI -> "v2si" 2071 | V2SF -> "v2sf" | DI -> "di" | V16QI -> "v16qi" | V8HI -> "v8hi" 2072 | V4SI -> "v4si" | V4SF -> "v4sf" | V2DI -> "v2di" | QI -> "qi" 2073 | HI -> "hi" | SI -> "si" | SF -> "sf" | TI -> "ti" 2074 2075(* Use uppercase chars for letters which form part of the intrinsic name, but 2076 should be omitted from the builtin name (the info is passed in an extra 2077 argument, instead). *) 2078let intrinsic_name name = String.lowercase name 2079 2080(* Allow the name of the builtin to be overridden by things (e.g. Flipped) 2081 found in the features list. *) 2082let builtin_name features name = 2083 let name = List.fold_right 2084 (fun el name -> 2085 match el with 2086 Flipped x | Builtin_name x -> x 2087 | _ -> name) 2088 features name in 2089 let islower x = let str = String.make 1 x in (String.lowercase str) = str 2090 and buf = Buffer.create (String.length name) in 2091 String.iter (fun c -> if islower c then Buffer.add_char buf c) name; 2092 Buffer.contents buf 2093 2094(* Transform an arity into a list of strings. *) 2095let strings_of_arity a = 2096 match a with 2097 | Arity0 vt -> [string_of_vectype vt] 2098 | Arity1 (vt1, vt2) -> [string_of_vectype vt1; string_of_vectype vt2] 2099 | Arity2 (vt1, vt2, vt3) -> [string_of_vectype vt1; 2100 string_of_vectype vt2; 2101 string_of_vectype vt3] 2102 | Arity3 (vt1, vt2, vt3, vt4) -> [string_of_vectype vt1; 2103 string_of_vectype vt2; 2104 string_of_vectype vt3; 2105 string_of_vectype vt4] 2106 | Arity4 (vt1, vt2, vt3, vt4, vt5) -> [string_of_vectype vt1; 2107 string_of_vectype vt2; 2108 string_of_vectype vt3; 2109 string_of_vectype vt4; 2110 string_of_vectype vt5] 2111 2112(* Suffixes on the end of builtin names that are to be stripped in order 2113 to obtain the name used as an instruction. They are only stripped if 2114 preceded immediately by an underscore. *) 2115let suffixes_to_strip = [ "n"; "lane"; "dup" ] 2116 2117(* Get the possible names of an instruction corresponding to a "name" from the 2118 ops table. This is done by getting the equivalent builtin name and 2119 stripping any suffixes from the list at the top of this file, unless 2120 the features list presents with an Instruction_name entry, in which 2121 case that is used; or unless the features list presents with a Flipped 2122 entry, in which case that is used. If both such entries are present, 2123 the first in the list will be chosen. *) 2124let get_insn_names features name = 2125 let names = try 2126 begin 2127 match List.find (fun feature -> match feature with 2128 Instruction_name _ -> true 2129 | Flipped _ -> true 2130 | _ -> false) features 2131 with 2132 Instruction_name names -> names 2133 | Flipped name -> [name] 2134 | _ -> assert false 2135 end 2136 with Not_found -> [builtin_name features name] 2137 in 2138 begin 2139 List.map (fun name' -> 2140 try 2141 let underscore = String.rindex name' '_' in 2142 let our_suffix = String.sub name' (underscore + 1) 2143 ((String.length name') - underscore - 1) 2144 in 2145 let rec strip remaining_suffixes = 2146 match remaining_suffixes with 2147 [] -> name' 2148 | s::ss when our_suffix = s -> String.sub name' 0 underscore 2149 | _::ss -> strip ss 2150 in 2151 strip suffixes_to_strip 2152 with (Not_found | Invalid_argument _) -> name') names 2153 end 2154 2155(* Apply a function to each element of a list and then comma-separate 2156 the resulting strings. *) 2157let rec commas f elts acc = 2158 match elts with 2159 [] -> acc 2160 | [elt] -> acc ^ (f elt) 2161 | elt::elts -> 2162 commas f elts (acc ^ (f elt) ^ ", ") 2163 2164(* Given a list of features and the shape specified in the "ops" table, apply 2165 a function to each possible shape that the instruction may have. 2166 By default, this is the "shape" entry in "ops". If the features list 2167 contains a Disassembles_as entry, the shapes contained in that entry are 2168 mapped to corresponding outputs and returned in a list. If there is more 2169 than one Disassembles_as entry, only the first is used. *) 2170let analyze_all_shapes features shape f = 2171 try 2172 match List.find (fun feature -> 2173 match feature with Disassembles_as _ -> true 2174 | _ -> false) 2175 features with 2176 Disassembles_as shapes -> List.map f shapes 2177 | _ -> assert false 2178 with Not_found -> [f shape] 2179 2180(* The crypto intrinsics have unconventional shapes and are not that 2181 numerous to be worth the trouble of encoding here. We implement them 2182 explicitly here. *) 2183let crypto_intrinsics = 2184" 2185#ifdef __ARM_FEATURE_CRYPTO 2186 2187__extension__ static __inline poly128_t __attribute__ ((__always_inline__)) 2188vldrq_p128 (poly128_t const * __ptr) 2189{ 2190#ifdef __ARM_BIG_ENDIAN 2191 poly64_t* __ptmp = (poly64_t*) __ptr; 2192 poly64_t __d0 = vld1_p64 (__ptmp); 2193 poly64_t __d1 = vld1_p64 (__ptmp + 1); 2194 return vreinterpretq_p128_p64 (vcombine_p64 (__d1, __d0)); 2195#else 2196 return vreinterpretq_p128_p64 (vld1q_p64 ((poly64_t*) __ptr)); 2197#endif 2198} 2199 2200__extension__ static __inline void __attribute__ ((__always_inline__)) 2201vstrq_p128 (poly128_t * __ptr, poly128_t __val) 2202{ 2203#ifdef __ARM_BIG_ENDIAN 2204 poly64x2_t __tmp = vreinterpretq_p64_p128 (__val); 2205 poly64_t __d0 = vget_high_p64 (__tmp); 2206 poly64_t __d1 = vget_low_p64 (__tmp); 2207 vst1q_p64 ((poly64_t*) __ptr, vcombine_p64 (__d0, __d1)); 2208#else 2209 vst1q_p64 ((poly64_t*) __ptr, vreinterpretq_p64_p128 (__val)); 2210#endif 2211} 2212 2213/* The vceq_p64 intrinsic does not map to a single instruction. 2214 Instead we emulate it by performing a 32-bit variant of the vceq 2215 and applying a pairwise min reduction to the result. 2216 vceq_u32 will produce two 32-bit halves, each of which will contain either 2217 all ones or all zeros depending on whether the corresponding 32-bit 2218 halves of the poly64_t were equal. The whole poly64_t values are equal 2219 if and only if both halves are equal, i.e. vceq_u32 returns all ones. 2220 If the result is all zeroes for any half then the whole result is zeroes. 2221 This is what the pairwise min reduction achieves. */ 2222 2223__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 2224vceq_p64 (poly64x1_t __a, poly64x1_t __b) 2225{ 2226 uint32x2_t __t_a = vreinterpret_u32_p64 (__a); 2227 uint32x2_t __t_b = vreinterpret_u32_p64 (__b); 2228 uint32x2_t __c = vceq_u32 (__t_a, __t_b); 2229 uint32x2_t __m = vpmin_u32 (__c, __c); 2230 return vreinterpret_u64_u32 (__m); 2231} 2232 2233/* The vtst_p64 intrinsic does not map to a single instruction. 2234 We emulate it in way similar to vceq_p64 above but here we do 2235 a reduction with max since if any two corresponding bits 2236 in the two poly64_t's match, then the whole result must be all ones. */ 2237 2238__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) 2239vtst_p64 (poly64x1_t __a, poly64x1_t __b) 2240{ 2241 uint32x2_t __t_a = vreinterpret_u32_p64 (__a); 2242 uint32x2_t __t_b = vreinterpret_u32_p64 (__b); 2243 uint32x2_t __c = vtst_u32 (__t_a, __t_b); 2244 uint32x2_t __m = vpmax_u32 (__c, __c); 2245 return vreinterpret_u64_u32 (__m); 2246} 2247 2248__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 2249vaeseq_u8 (uint8x16_t __data, uint8x16_t __key) 2250{ 2251 return __builtin_arm_crypto_aese (__data, __key); 2252} 2253 2254__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 2255vaesdq_u8 (uint8x16_t __data, uint8x16_t __key) 2256{ 2257 return __builtin_arm_crypto_aesd (__data, __key); 2258} 2259 2260__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 2261vaesmcq_u8 (uint8x16_t __data) 2262{ 2263 return __builtin_arm_crypto_aesmc (__data); 2264} 2265 2266__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) 2267vaesimcq_u8 (uint8x16_t __data) 2268{ 2269 return __builtin_arm_crypto_aesimc (__data); 2270} 2271 2272__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) 2273vsha1h_u32 (uint32_t __hash_e) 2274{ 2275 uint32x4_t __t = vdupq_n_u32 (0); 2276 __t = vsetq_lane_u32 (__hash_e, __t, 0); 2277 __t = __builtin_arm_crypto_sha1h (__t); 2278 return vgetq_lane_u32 (__t, 0); 2279} 2280 2281__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 2282vsha1cq_u32 (uint32x4_t __hash_abcd, uint32_t __hash_e, uint32x4_t __wk) 2283{ 2284 uint32x4_t __t = vdupq_n_u32 (0); 2285 __t = vsetq_lane_u32 (__hash_e, __t, 0); 2286 return __builtin_arm_crypto_sha1c (__hash_abcd, __t, __wk); 2287} 2288 2289__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 2290vsha1pq_u32 (uint32x4_t __hash_abcd, uint32_t __hash_e, uint32x4_t __wk) 2291{ 2292 uint32x4_t __t = vdupq_n_u32 (0); 2293 __t = vsetq_lane_u32 (__hash_e, __t, 0); 2294 return __builtin_arm_crypto_sha1p (__hash_abcd, __t, __wk); 2295} 2296 2297__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 2298vsha1mq_u32 (uint32x4_t __hash_abcd, uint32_t __hash_e, uint32x4_t __wk) 2299{ 2300 uint32x4_t __t = vdupq_n_u32 (0); 2301 __t = vsetq_lane_u32 (__hash_e, __t, 0); 2302 return __builtin_arm_crypto_sha1m (__hash_abcd, __t, __wk); 2303} 2304 2305__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 2306vsha1su0q_u32 (uint32x4_t __w0_3, uint32x4_t __w4_7, uint32x4_t __w8_11) 2307{ 2308 return __builtin_arm_crypto_sha1su0 (__w0_3, __w4_7, __w8_11); 2309} 2310 2311__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 2312vsha1su1q_u32 (uint32x4_t __tw0_3, uint32x4_t __w12_15) 2313{ 2314 return __builtin_arm_crypto_sha1su1 (__tw0_3, __w12_15); 2315} 2316 2317__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 2318vsha256hq_u32 (uint32x4_t __hash_abcd, uint32x4_t __hash_efgh, uint32x4_t __wk) 2319{ 2320 return __builtin_arm_crypto_sha256h (__hash_abcd, __hash_efgh, __wk); 2321} 2322 2323__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 2324vsha256h2q_u32 (uint32x4_t __hash_abcd, uint32x4_t __hash_efgh, uint32x4_t __wk) 2325{ 2326 return __builtin_arm_crypto_sha256h2 (__hash_abcd, __hash_efgh, __wk); 2327} 2328 2329__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 2330vsha256su0q_u32 (uint32x4_t __w0_3, uint32x4_t __w4_7) 2331{ 2332 return __builtin_arm_crypto_sha256su0 (__w0_3, __w4_7); 2333} 2334 2335__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) 2336vsha256su1q_u32 (uint32x4_t __tw0_3, uint32x4_t __w8_11, uint32x4_t __w12_15) 2337{ 2338 return __builtin_arm_crypto_sha256su1 (__tw0_3, __w8_11, __w12_15); 2339} 2340 2341__extension__ static __inline poly128_t __attribute__ ((__always_inline__)) 2342vmull_p64 (poly64_t __a, poly64_t __b) 2343{ 2344 return (poly128_t) __builtin_arm_crypto_vmullp64 ((uint64_t) __a, (uint64_t) __b); 2345} 2346 2347__extension__ static __inline poly128_t __attribute__ ((__always_inline__)) 2348vmull_high_p64 (poly64x2_t __a, poly64x2_t __b) 2349{ 2350 poly64_t __t1 = vget_high_p64 (__a); 2351 poly64_t __t2 = vget_high_p64 (__b); 2352 2353 return (poly128_t) __builtin_arm_crypto_vmullp64 ((uint64_t) __t1, (uint64_t) __t2); 2354} 2355 2356#endif 2357" 2358