1eda14cbcSMatt Macy /* 2eda14cbcSMatt Macy * CDDL HEADER START 3eda14cbcSMatt Macy * 4eda14cbcSMatt Macy * The contents of this file are subject to the terms of the 5eda14cbcSMatt Macy * Common Development and Distribution License (the "License"). 6eda14cbcSMatt Macy * You may not use this file except in compliance with the License. 7eda14cbcSMatt Macy * 8eda14cbcSMatt Macy * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*271171e0SMartin Matuska * or https://opensource.org/licenses/CDDL-1.0. 10eda14cbcSMatt Macy * See the License for the specific language governing permissions 11eda14cbcSMatt Macy * and limitations under the License. 12eda14cbcSMatt Macy * 13eda14cbcSMatt Macy * When distributing Covered Code, include this CDDL HEADER in each 14eda14cbcSMatt Macy * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15eda14cbcSMatt Macy * If applicable, add the following below this CDDL HEADER, with the 16eda14cbcSMatt Macy * fields enclosed by brackets "[]" replaced with your own identifying 17eda14cbcSMatt Macy * information: Portions Copyright [yyyy] [name of copyright owner] 18eda14cbcSMatt Macy * 19eda14cbcSMatt Macy * CDDL HEADER END 20eda14cbcSMatt Macy */ 21eda14cbcSMatt Macy /* 22eda14cbcSMatt Macy * Copyright (C) 2016 Romain Dolbeau. All rights reserved. 23eda14cbcSMatt Macy */ 24eda14cbcSMatt Macy 25eda14cbcSMatt Macy #include <sys/types.h> 26eda14cbcSMatt Macy #include <sys/simd.h> 27eda14cbcSMatt Macy 28eda14cbcSMatt Macy #ifdef __linux__ 29eda14cbcSMatt Macy #define __asm __asm__ __volatile__ 30eda14cbcSMatt Macy #endif 31eda14cbcSMatt Macy 32eda14cbcSMatt Macy #define _REG_CNT(_0, _1, _2, _3, _4, _5, _6, _7, N, ...) N 33eda14cbcSMatt Macy #define REG_CNT(r...) _REG_CNT(r, 8, 7, 6, 5, 4, 3, 2, 1) 34eda14cbcSMatt Macy 35eda14cbcSMatt Macy #define VR0_(REG, ...) "%[w"#REG"]" 36eda14cbcSMatt Macy #define VR1_(_1, REG, ...) "%[w"#REG"]" 37eda14cbcSMatt Macy #define VR2_(_1, _2, REG, ...) "%[w"#REG"]" 38eda14cbcSMatt Macy #define VR3_(_1, _2, _3, REG, ...) "%[w"#REG"]" 39eda14cbcSMatt Macy #define VR4_(_1, _2, _3, _4, REG, ...) "%[w"#REG"]" 40eda14cbcSMatt Macy #define VR5_(_1, _2, _3, _4, _5, REG, ...) "%[w"#REG"]" 41eda14cbcSMatt Macy #define VR6_(_1, _2, _3, _4, _5, _6, REG, ...) "%[w"#REG"]" 42eda14cbcSMatt Macy #define VR7_(_1, _2, _3, _4, _5, _6, _7, REG, ...) "%[w"#REG"]" 43eda14cbcSMatt Macy 44eda14cbcSMatt Macy /* 45eda14cbcSMatt Macy * Here we need registers not used otherwise. 46eda14cbcSMatt Macy * They will be used in unused ASM for the case 47eda14cbcSMatt Macy * with more registers than required... but GCC 48eda14cbcSMatt Macy * will still need to make sure the constraints 49eda14cbcSMatt Macy * are correct, and duplicate constraints are illegal 50eda14cbcSMatt Macy * ... and we use the "register" number as a name 51eda14cbcSMatt Macy */ 52eda14cbcSMatt Macy 53eda14cbcSMatt Macy #define VR0(r...) VR0_(r) 54eda14cbcSMatt Macy #define VR1(r...) VR1_(r) 55eda14cbcSMatt Macy #define VR2(r...) VR2_(r, 36) 56eda14cbcSMatt Macy #define VR3(r...) VR3_(r, 36, 35) 57eda14cbcSMatt Macy #define VR4(r...) VR4_(r, 36, 35, 34, 33) 58eda14cbcSMatt Macy #define VR5(r...) VR5_(r, 36, 35, 34, 33, 32) 59eda14cbcSMatt Macy #define VR6(r...) VR6_(r, 36, 35, 34, 33, 32, 31) 60eda14cbcSMatt Macy #define VR7(r...) VR7_(r, 36, 35, 34, 33, 32, 31, 30) 61eda14cbcSMatt Macy 62eda14cbcSMatt Macy #define VR(X) "%[w"#X"]" 63eda14cbcSMatt Macy 64eda14cbcSMatt Macy #define RVR0_(REG, ...) [w##REG] "w" (w##REG) 65eda14cbcSMatt Macy #define RVR1_(_1, REG, ...) [w##REG] "w" (w##REG) 66eda14cbcSMatt Macy #define RVR2_(_1, _2, REG, ...) [w##REG] "w" (w##REG) 67eda14cbcSMatt Macy #define RVR3_(_1, _2, _3, REG, ...) [w##REG] "w" (w##REG) 68eda14cbcSMatt Macy #define RVR4_(_1, _2, _3, _4, REG, ...) [w##REG] "w" (w##REG) 69eda14cbcSMatt Macy #define RVR5_(_1, _2, _3, _4, _5, REG, ...) [w##REG] "w" (w##REG) 70eda14cbcSMatt Macy #define RVR6_(_1, _2, _3, _4, _5, _6, REG, ...) [w##REG] "w" (w##REG) 71eda14cbcSMatt Macy #define RVR7_(_1, _2, _3, _4, _5, _6, _7, REG, ...) [w##REG] "w" (w##REG) 72eda14cbcSMatt Macy 73eda14cbcSMatt Macy #define RVR0(r...) RVR0_(r) 74eda14cbcSMatt Macy #define RVR1(r...) RVR1_(r) 75eda14cbcSMatt Macy #define RVR2(r...) RVR2_(r, 36) 76eda14cbcSMatt Macy #define RVR3(r...) RVR3_(r, 36, 35) 77eda14cbcSMatt Macy #define RVR4(r...) RVR4_(r, 36, 35, 34, 33) 78eda14cbcSMatt Macy #define RVR5(r...) RVR5_(r, 36, 35, 34, 33, 32) 79eda14cbcSMatt Macy #define RVR6(r...) RVR6_(r, 36, 35, 34, 33, 32, 31) 80eda14cbcSMatt Macy #define RVR7(r...) RVR7_(r, 36, 35, 34, 33, 32, 31, 30) 81eda14cbcSMatt Macy 82eda14cbcSMatt Macy #define RVR(X) [w##X] "w" (w##X) 83eda14cbcSMatt Macy 84eda14cbcSMatt Macy #define WVR0_(REG, ...) [w##REG] "=w" (w##REG) 85eda14cbcSMatt Macy #define WVR1_(_1, REG, ...) [w##REG] "=w" (w##REG) 86eda14cbcSMatt Macy #define WVR2_(_1, _2, REG, ...) [w##REG] "=w" (w##REG) 87eda14cbcSMatt Macy #define WVR3_(_1, _2, _3, REG, ...) [w##REG] "=w" (w##REG) 88eda14cbcSMatt Macy #define WVR4_(_1, _2, _3, _4, REG, ...) [w##REG] "=w" (w##REG) 89eda14cbcSMatt Macy #define WVR5_(_1, _2, _3, _4, _5, REG, ...) [w##REG] "=w" (w##REG) 90eda14cbcSMatt Macy #define WVR6_(_1, _2, _3, _4, _5, _6, REG, ...) [w##REG] "=w" (w##REG) 91eda14cbcSMatt Macy #define WVR7_(_1, _2, _3, _4, _5, _6, _7, REG, ...) [w##REG] "=w" (w##REG) 92eda14cbcSMatt Macy 93eda14cbcSMatt Macy #define WVR0(r...) WVR0_(r) 94eda14cbcSMatt Macy #define WVR1(r...) WVR1_(r) 95eda14cbcSMatt Macy #define WVR2(r...) WVR2_(r, 36) 96eda14cbcSMatt Macy #define WVR3(r...) WVR3_(r, 36, 35) 97eda14cbcSMatt Macy #define WVR4(r...) WVR4_(r, 36, 35, 34, 33) 98eda14cbcSMatt Macy #define WVR5(r...) WVR5_(r, 36, 35, 34, 33, 32) 99eda14cbcSMatt Macy #define WVR6(r...) WVR6_(r, 36, 35, 34, 33, 32, 31) 100eda14cbcSMatt Macy #define WVR7(r...) WVR7_(r, 36, 35, 34, 33, 32, 31, 30) 101eda14cbcSMatt Macy 102eda14cbcSMatt Macy #define WVR(X) [w##X] "=w" (w##X) 103eda14cbcSMatt Macy 104eda14cbcSMatt Macy #define UVR0_(REG, ...) [w##REG] "+&w" (w##REG) 105eda14cbcSMatt Macy #define UVR1_(_1, REG, ...) [w##REG] "+&w" (w##REG) 106eda14cbcSMatt Macy #define UVR2_(_1, _2, REG, ...) [w##REG] "+&w" (w##REG) 107eda14cbcSMatt Macy #define UVR3_(_1, _2, _3, REG, ...) [w##REG] "+&w" (w##REG) 108eda14cbcSMatt Macy #define UVR4_(_1, _2, _3, _4, REG, ...) [w##REG] "+&w" (w##REG) 109eda14cbcSMatt Macy #define UVR5_(_1, _2, _3, _4, _5, REG, ...) [w##REG] "+&w" (w##REG) 110eda14cbcSMatt Macy #define UVR6_(_1, _2, _3, _4, _5, _6, REG, ...) [w##REG] "+&w" (w##REG) 111eda14cbcSMatt Macy #define UVR7_(_1, _2, _3, _4, _5, _6, _7, REG, ...) [w##REG] "+&w" (w##REG) 112eda14cbcSMatt Macy 113eda14cbcSMatt Macy #define UVR0(r...) UVR0_(r) 114eda14cbcSMatt Macy #define UVR1(r...) UVR1_(r) 115eda14cbcSMatt Macy #define UVR2(r...) UVR2_(r, 36) 116eda14cbcSMatt Macy #define UVR3(r...) UVR3_(r, 36, 35) 117eda14cbcSMatt Macy #define UVR4(r...) UVR4_(r, 36, 35, 34, 33) 118eda14cbcSMatt Macy #define UVR5(r...) UVR5_(r, 36, 35, 34, 33, 32) 119eda14cbcSMatt Macy #define UVR6(r...) UVR6_(r, 36, 35, 34, 33, 32, 31) 120eda14cbcSMatt Macy #define UVR7(r...) UVR7_(r, 36, 35, 34, 33, 32, 31, 30) 121eda14cbcSMatt Macy 122eda14cbcSMatt Macy #define UVR(X) [w##X] "+&w" (w##X) 123eda14cbcSMatt Macy 124eda14cbcSMatt Macy #define R_01(REG1, REG2, ...) REG1, REG2 125eda14cbcSMatt Macy #define _R_23(_0, _1, REG2, REG3, ...) REG2, REG3 126eda14cbcSMatt Macy #define R_23(REG...) _R_23(REG, 1, 2, 3) 127eda14cbcSMatt Macy 128eda14cbcSMatt Macy #define ZFS_ASM_BUG() ASSERT(0) 129eda14cbcSMatt Macy 130eda14cbcSMatt Macy #define OFFSET(ptr, val) (((unsigned char *)(ptr))+val) 131eda14cbcSMatt Macy 132eda14cbcSMatt Macy extern const uint8_t gf_clmul_mod_lt[4*256][16]; 133eda14cbcSMatt Macy 134eda14cbcSMatt Macy #define ELEM_SIZE 16 135eda14cbcSMatt Macy 136eda14cbcSMatt Macy typedef struct v { 137eda14cbcSMatt Macy uint8_t b[ELEM_SIZE] __attribute__((aligned(ELEM_SIZE))); 138eda14cbcSMatt Macy } v_t; 139eda14cbcSMatt Macy 140eda14cbcSMatt Macy #define XOR_ACC(src, r...) \ 141eda14cbcSMatt Macy { \ 142eda14cbcSMatt Macy switch (REG_CNT(r)) { \ 143eda14cbcSMatt Macy case 8: \ 144eda14cbcSMatt Macy __asm( \ 145eda14cbcSMatt Macy "ld1 { v21.4s },%[SRC0]\n" \ 146eda14cbcSMatt Macy "ld1 { v20.4s },%[SRC1]\n" \ 147eda14cbcSMatt Macy "ld1 { v19.4s },%[SRC2]\n" \ 148eda14cbcSMatt Macy "ld1 { v18.4s },%[SRC3]\n" \ 149eda14cbcSMatt Macy "eor " VR0(r) ".16b," VR0(r) ".16b,v21.16b\n" \ 150eda14cbcSMatt Macy "eor " VR1(r) ".16b," VR1(r) ".16b,v20.16b\n" \ 151eda14cbcSMatt Macy "eor " VR2(r) ".16b," VR2(r) ".16b,v19.16b\n" \ 152eda14cbcSMatt Macy "eor " VR3(r) ".16b," VR3(r) ".16b,v18.16b\n" \ 153eda14cbcSMatt Macy "ld1 { v21.4s },%[SRC4]\n" \ 154eda14cbcSMatt Macy "ld1 { v20.4s },%[SRC5]\n" \ 155eda14cbcSMatt Macy "ld1 { v19.4s },%[SRC6]\n" \ 156eda14cbcSMatt Macy "ld1 { v18.4s },%[SRC7]\n" \ 157eda14cbcSMatt Macy "eor " VR4(r) ".16b," VR4(r) ".16b,v21.16b\n" \ 158eda14cbcSMatt Macy "eor " VR5(r) ".16b," VR5(r) ".16b,v20.16b\n" \ 159eda14cbcSMatt Macy "eor " VR6(r) ".16b," VR6(r) ".16b,v19.16b\n" \ 160eda14cbcSMatt Macy "eor " VR7(r) ".16b," VR7(r) ".16b,v18.16b\n" \ 161eda14cbcSMatt Macy : UVR0(r), UVR1(r), UVR2(r), UVR3(r), \ 162eda14cbcSMatt Macy UVR4(r), UVR5(r), UVR6(r), UVR7(r) \ 163eda14cbcSMatt Macy : [SRC0] "Q" (*(OFFSET(src, 0))), \ 164eda14cbcSMatt Macy [SRC1] "Q" (*(OFFSET(src, 16))), \ 165eda14cbcSMatt Macy [SRC2] "Q" (*(OFFSET(src, 32))), \ 166eda14cbcSMatt Macy [SRC3] "Q" (*(OFFSET(src, 48))), \ 167eda14cbcSMatt Macy [SRC4] "Q" (*(OFFSET(src, 64))), \ 168eda14cbcSMatt Macy [SRC5] "Q" (*(OFFSET(src, 80))), \ 169eda14cbcSMatt Macy [SRC6] "Q" (*(OFFSET(src, 96))), \ 170eda14cbcSMatt Macy [SRC7] "Q" (*(OFFSET(src, 112))) \ 171eda14cbcSMatt Macy : "v18", "v19", "v20", "v21"); \ 172eda14cbcSMatt Macy break; \ 173eda14cbcSMatt Macy case 4: \ 174eda14cbcSMatt Macy __asm( \ 175eda14cbcSMatt Macy "ld1 { v21.4s },%[SRC0]\n" \ 176eda14cbcSMatt Macy "ld1 { v20.4s },%[SRC1]\n" \ 177eda14cbcSMatt Macy "ld1 { v19.4s },%[SRC2]\n" \ 178eda14cbcSMatt Macy "ld1 { v18.4s },%[SRC3]\n" \ 179eda14cbcSMatt Macy "eor " VR0(r) ".16b," VR0(r) ".16b,v21.16b\n" \ 180eda14cbcSMatt Macy "eor " VR1(r) ".16b," VR1(r) ".16b,v20.16b\n" \ 181eda14cbcSMatt Macy "eor " VR2(r) ".16b," VR2(r) ".16b,v19.16b\n" \ 182eda14cbcSMatt Macy "eor " VR3(r) ".16b," VR3(r) ".16b,v18.16b\n" \ 183eda14cbcSMatt Macy : UVR0(r), UVR1(r), UVR2(r), UVR3(r) \ 184eda14cbcSMatt Macy : [SRC0] "Q" (*(OFFSET(src, 0))), \ 185eda14cbcSMatt Macy [SRC1] "Q" (*(OFFSET(src, 16))), \ 186eda14cbcSMatt Macy [SRC2] "Q" (*(OFFSET(src, 32))), \ 187eda14cbcSMatt Macy [SRC3] "Q" (*(OFFSET(src, 48))) \ 188eda14cbcSMatt Macy : "v18", "v19", "v20", "v21"); \ 189eda14cbcSMatt Macy break; \ 190eda14cbcSMatt Macy case 2: \ 191eda14cbcSMatt Macy __asm( \ 192eda14cbcSMatt Macy "ld1 { v21.4s },%[SRC0]\n" \ 193eda14cbcSMatt Macy "ld1 { v20.4s },%[SRC1]\n" \ 194eda14cbcSMatt Macy "eor " VR0(r) ".16b," VR0(r) ".16b,v21.16b\n" \ 195eda14cbcSMatt Macy "eor " VR1(r) ".16b," VR1(r) ".16b,v20.16b\n" \ 196eda14cbcSMatt Macy : UVR0(r), UVR1(r) \ 197eda14cbcSMatt Macy : [SRC0] "Q" (*(OFFSET(src, 0))), \ 198eda14cbcSMatt Macy [SRC1] "Q" (*(OFFSET(src, 16))) \ 199eda14cbcSMatt Macy : "v20", "v21"); \ 200eda14cbcSMatt Macy break; \ 201eda14cbcSMatt Macy default: \ 202eda14cbcSMatt Macy ZFS_ASM_BUG(); \ 203eda14cbcSMatt Macy } \ 204eda14cbcSMatt Macy } 205eda14cbcSMatt Macy 206eda14cbcSMatt Macy #define XOR(r...) \ 207eda14cbcSMatt Macy { \ 208eda14cbcSMatt Macy switch (REG_CNT(r)) { \ 209eda14cbcSMatt Macy case 8: \ 210eda14cbcSMatt Macy __asm( \ 211eda14cbcSMatt Macy "eor " VR4(r) ".16b," VR4(r) ".16b," VR0(r) ".16b\n" \ 212eda14cbcSMatt Macy "eor " VR5(r) ".16b," VR5(r) ".16b," VR1(r) ".16b\n" \ 213eda14cbcSMatt Macy "eor " VR6(r) ".16b," VR6(r) ".16b," VR2(r) ".16b\n" \ 214eda14cbcSMatt Macy "eor " VR7(r) ".16b," VR7(r) ".16b," VR3(r) ".16b\n" \ 215eda14cbcSMatt Macy : UVR4(r), UVR5(r), UVR6(r), UVR7(r) \ 216eda14cbcSMatt Macy : RVR0(r), RVR1(r), RVR2(r), RVR3(r)); \ 217eda14cbcSMatt Macy break; \ 218eda14cbcSMatt Macy case 4: \ 219eda14cbcSMatt Macy __asm( \ 220eda14cbcSMatt Macy "eor " VR2(r) ".16b," VR2(r) ".16b," VR0(r) ".16b\n" \ 221eda14cbcSMatt Macy "eor " VR3(r) ".16b," VR3(r) ".16b," VR1(r) ".16b\n" \ 222eda14cbcSMatt Macy : UVR2(r), UVR3(r) \ 223eda14cbcSMatt Macy : RVR0(r), RVR1(r)); \ 224eda14cbcSMatt Macy break; \ 225eda14cbcSMatt Macy default: \ 226eda14cbcSMatt Macy ZFS_ASM_BUG(); \ 227eda14cbcSMatt Macy } \ 228eda14cbcSMatt Macy } 229eda14cbcSMatt Macy 230eda14cbcSMatt Macy #define ZERO(r...) \ 231eda14cbcSMatt Macy { \ 232eda14cbcSMatt Macy switch (REG_CNT(r)) { \ 233eda14cbcSMatt Macy case 8: \ 234eda14cbcSMatt Macy __asm( \ 235eda14cbcSMatt Macy "eor " VR0(r) ".16b," VR0(r) ".16b," VR0(r) ".16b\n" \ 236eda14cbcSMatt Macy "eor " VR1(r) ".16b," VR1(r) ".16b," VR1(r) ".16b\n" \ 237eda14cbcSMatt Macy "eor " VR2(r) ".16b," VR2(r) ".16b," VR2(r) ".16b\n" \ 238eda14cbcSMatt Macy "eor " VR3(r) ".16b," VR3(r) ".16b," VR3(r) ".16b\n" \ 239eda14cbcSMatt Macy "eor " VR4(r) ".16b," VR4(r) ".16b," VR4(r) ".16b\n" \ 240eda14cbcSMatt Macy "eor " VR5(r) ".16b," VR5(r) ".16b," VR5(r) ".16b\n" \ 241eda14cbcSMatt Macy "eor " VR6(r) ".16b," VR6(r) ".16b," VR6(r) ".16b\n" \ 242eda14cbcSMatt Macy "eor " VR7(r) ".16b," VR7(r) ".16b," VR7(r) ".16b\n" \ 243eda14cbcSMatt Macy : WVR0(r), WVR1(r), WVR2(r), WVR3(r), \ 244eda14cbcSMatt Macy WVR4(r), WVR5(r), WVR6(r), WVR7(r)); \ 245eda14cbcSMatt Macy break; \ 246eda14cbcSMatt Macy case 4: \ 247eda14cbcSMatt Macy __asm( \ 248eda14cbcSMatt Macy "eor " VR0(r) ".16b," VR0(r) ".16b," VR0(r) ".16b\n" \ 249eda14cbcSMatt Macy "eor " VR1(r) ".16b," VR1(r) ".16b," VR1(r) ".16b\n" \ 250eda14cbcSMatt Macy "eor " VR2(r) ".16b," VR2(r) ".16b," VR2(r) ".16b\n" \ 251eda14cbcSMatt Macy "eor " VR3(r) ".16b," VR3(r) ".16b," VR3(r) ".16b\n" \ 252eda14cbcSMatt Macy : WVR0(r), WVR1(r), WVR2(r), WVR3(r)); \ 253eda14cbcSMatt Macy break; \ 254eda14cbcSMatt Macy case 2: \ 255eda14cbcSMatt Macy __asm( \ 256eda14cbcSMatt Macy "eor " VR0(r) ".16b," VR0(r) ".16b," VR0(r) ".16b\n" \ 257eda14cbcSMatt Macy "eor " VR1(r) ".16b," VR1(r) ".16b," VR1(r) ".16b\n" \ 258eda14cbcSMatt Macy : WVR0(r), WVR1(r)); \ 259eda14cbcSMatt Macy break; \ 260eda14cbcSMatt Macy default: \ 261eda14cbcSMatt Macy ZFS_ASM_BUG(); \ 262eda14cbcSMatt Macy } \ 263eda14cbcSMatt Macy } 264eda14cbcSMatt Macy 265eda14cbcSMatt Macy #define COPY(r...) \ 266eda14cbcSMatt Macy { \ 267eda14cbcSMatt Macy switch (REG_CNT(r)) { \ 268eda14cbcSMatt Macy case 8: \ 269eda14cbcSMatt Macy __asm( \ 270eda14cbcSMatt Macy "mov " VR4(r) ".16b," VR0(r) ".16b\n" \ 271eda14cbcSMatt Macy "mov " VR5(r) ".16b," VR1(r) ".16b\n" \ 272eda14cbcSMatt Macy "mov " VR6(r) ".16b," VR2(r) ".16b\n" \ 273eda14cbcSMatt Macy "mov " VR7(r) ".16b," VR3(r) ".16b\n" \ 274eda14cbcSMatt Macy : WVR4(r), WVR5(r), WVR6(r), WVR7(r) \ 275eda14cbcSMatt Macy : RVR0(r), RVR1(r), RVR2(r), RVR3(r)); \ 276eda14cbcSMatt Macy break; \ 277eda14cbcSMatt Macy case 4: \ 278eda14cbcSMatt Macy __asm( \ 279eda14cbcSMatt Macy "mov " VR2(r) ".16b," VR0(r) ".16b\n" \ 280eda14cbcSMatt Macy "mov " VR3(r) ".16b," VR1(r) ".16b\n" \ 281eda14cbcSMatt Macy : WVR2(r), WVR3(r) \ 282eda14cbcSMatt Macy : RVR0(r), RVR1(r)); \ 283eda14cbcSMatt Macy break; \ 284eda14cbcSMatt Macy default: \ 285eda14cbcSMatt Macy ZFS_ASM_BUG(); \ 286eda14cbcSMatt Macy } \ 287eda14cbcSMatt Macy } 288eda14cbcSMatt Macy 289eda14cbcSMatt Macy #define LOAD(src, r...) \ 290eda14cbcSMatt Macy { \ 291eda14cbcSMatt Macy switch (REG_CNT(r)) { \ 292eda14cbcSMatt Macy case 8: \ 293eda14cbcSMatt Macy __asm( \ 294eda14cbcSMatt Macy "ld1 { " VR0(r) ".4s },%[SRC0]\n" \ 295eda14cbcSMatt Macy "ld1 { " VR1(r) ".4s },%[SRC1]\n" \ 296eda14cbcSMatt Macy "ld1 { " VR2(r) ".4s },%[SRC2]\n" \ 297eda14cbcSMatt Macy "ld1 { " VR3(r) ".4s },%[SRC3]\n" \ 298eda14cbcSMatt Macy "ld1 { " VR4(r) ".4s },%[SRC4]\n" \ 299eda14cbcSMatt Macy "ld1 { " VR5(r) ".4s },%[SRC5]\n" \ 300eda14cbcSMatt Macy "ld1 { " VR6(r) ".4s },%[SRC6]\n" \ 301eda14cbcSMatt Macy "ld1 { " VR7(r) ".4s },%[SRC7]\n" \ 302eda14cbcSMatt Macy : WVR0(r), WVR1(r), WVR2(r), WVR3(r), \ 303eda14cbcSMatt Macy WVR4(r), WVR5(r), WVR6(r), WVR7(r) \ 304eda14cbcSMatt Macy : [SRC0] "Q" (*(OFFSET(src, 0))), \ 305eda14cbcSMatt Macy [SRC1] "Q" (*(OFFSET(src, 16))), \ 306eda14cbcSMatt Macy [SRC2] "Q" (*(OFFSET(src, 32))), \ 307eda14cbcSMatt Macy [SRC3] "Q" (*(OFFSET(src, 48))), \ 308eda14cbcSMatt Macy [SRC4] "Q" (*(OFFSET(src, 64))), \ 309eda14cbcSMatt Macy [SRC5] "Q" (*(OFFSET(src, 80))), \ 310eda14cbcSMatt Macy [SRC6] "Q" (*(OFFSET(src, 96))), \ 311eda14cbcSMatt Macy [SRC7] "Q" (*(OFFSET(src, 112)))); \ 312eda14cbcSMatt Macy break; \ 313eda14cbcSMatt Macy case 4: \ 314eda14cbcSMatt Macy __asm( \ 315eda14cbcSMatt Macy "ld1 { " VR0(r) ".4s },%[SRC0]\n" \ 316eda14cbcSMatt Macy "ld1 { " VR1(r) ".4s },%[SRC1]\n" \ 317eda14cbcSMatt Macy "ld1 { " VR2(r) ".4s },%[SRC2]\n" \ 318eda14cbcSMatt Macy "ld1 { " VR3(r) ".4s },%[SRC3]\n" \ 319eda14cbcSMatt Macy : WVR0(r), WVR1(r), WVR2(r), WVR3(r) \ 320eda14cbcSMatt Macy : [SRC0] "Q" (*(OFFSET(src, 0))), \ 321eda14cbcSMatt Macy [SRC1] "Q" (*(OFFSET(src, 16))), \ 322eda14cbcSMatt Macy [SRC2] "Q" (*(OFFSET(src, 32))), \ 323eda14cbcSMatt Macy [SRC3] "Q" (*(OFFSET(src, 48)))); \ 324eda14cbcSMatt Macy break; \ 325eda14cbcSMatt Macy case 2: \ 326eda14cbcSMatt Macy __asm( \ 327eda14cbcSMatt Macy "ld1 { " VR0(r) ".4s },%[SRC0]\n" \ 328eda14cbcSMatt Macy "ld1 { " VR1(r) ".4s },%[SRC1]\n" \ 329eda14cbcSMatt Macy : WVR0(r), WVR1(r) \ 330eda14cbcSMatt Macy : [SRC0] "Q" (*(OFFSET(src, 0))), \ 331eda14cbcSMatt Macy [SRC1] "Q" (*(OFFSET(src, 16)))); \ 332eda14cbcSMatt Macy break; \ 333eda14cbcSMatt Macy default: \ 334eda14cbcSMatt Macy ZFS_ASM_BUG(); \ 335eda14cbcSMatt Macy } \ 336eda14cbcSMatt Macy } 337eda14cbcSMatt Macy 338eda14cbcSMatt Macy #define STORE(dst, r...) \ 339eda14cbcSMatt Macy { \ 340eda14cbcSMatt Macy switch (REG_CNT(r)) { \ 341eda14cbcSMatt Macy case 8: \ 342eda14cbcSMatt Macy __asm( \ 343eda14cbcSMatt Macy "st1 { " VR0(r) ".4s },%[DST0]\n" \ 344eda14cbcSMatt Macy "st1 { " VR1(r) ".4s },%[DST1]\n" \ 345eda14cbcSMatt Macy "st1 { " VR2(r) ".4s },%[DST2]\n" \ 346eda14cbcSMatt Macy "st1 { " VR3(r) ".4s },%[DST3]\n" \ 347eda14cbcSMatt Macy "st1 { " VR4(r) ".4s },%[DST4]\n" \ 348eda14cbcSMatt Macy "st1 { " VR5(r) ".4s },%[DST5]\n" \ 349eda14cbcSMatt Macy "st1 { " VR6(r) ".4s },%[DST6]\n" \ 350eda14cbcSMatt Macy "st1 { " VR7(r) ".4s },%[DST7]\n" \ 351eda14cbcSMatt Macy : [DST0] "=Q" (*(OFFSET(dst, 0))), \ 352eda14cbcSMatt Macy [DST1] "=Q" (*(OFFSET(dst, 16))), \ 353eda14cbcSMatt Macy [DST2] "=Q" (*(OFFSET(dst, 32))), \ 354eda14cbcSMatt Macy [DST3] "=Q" (*(OFFSET(dst, 48))), \ 355eda14cbcSMatt Macy [DST4] "=Q" (*(OFFSET(dst, 64))), \ 356eda14cbcSMatt Macy [DST5] "=Q" (*(OFFSET(dst, 80))), \ 357eda14cbcSMatt Macy [DST6] "=Q" (*(OFFSET(dst, 96))), \ 358eda14cbcSMatt Macy [DST7] "=Q" (*(OFFSET(dst, 112))) \ 359eda14cbcSMatt Macy : RVR0(r), RVR1(r), RVR2(r), RVR3(r), \ 360eda14cbcSMatt Macy RVR4(r), RVR5(r), RVR6(r), RVR7(r)); \ 361eda14cbcSMatt Macy break; \ 362eda14cbcSMatt Macy case 4: \ 363eda14cbcSMatt Macy __asm( \ 364eda14cbcSMatt Macy "st1 { " VR0(r) ".4s },%[DST0]\n" \ 365eda14cbcSMatt Macy "st1 { " VR1(r) ".4s },%[DST1]\n" \ 366eda14cbcSMatt Macy "st1 { " VR2(r) ".4s },%[DST2]\n" \ 367eda14cbcSMatt Macy "st1 { " VR3(r) ".4s },%[DST3]\n" \ 368eda14cbcSMatt Macy : [DST0] "=Q" (*(OFFSET(dst, 0))), \ 369eda14cbcSMatt Macy [DST1] "=Q" (*(OFFSET(dst, 16))), \ 370eda14cbcSMatt Macy [DST2] "=Q" (*(OFFSET(dst, 32))), \ 371eda14cbcSMatt Macy [DST3] "=Q" (*(OFFSET(dst, 48))) \ 372eda14cbcSMatt Macy : RVR0(r), RVR1(r), RVR2(r), RVR3(r)); \ 373eda14cbcSMatt Macy break; \ 374eda14cbcSMatt Macy case 2: \ 375eda14cbcSMatt Macy __asm( \ 376eda14cbcSMatt Macy "st1 { " VR0(r) ".4s },%[DST0]\n" \ 377eda14cbcSMatt Macy "st1 { " VR1(r) ".4s },%[DST1]\n" \ 378eda14cbcSMatt Macy : [DST0] "=Q" (*(OFFSET(dst, 0))), \ 379eda14cbcSMatt Macy [DST1] "=Q" (*(OFFSET(dst, 16))) \ 380eda14cbcSMatt Macy : RVR0(r), RVR1(r)); \ 381eda14cbcSMatt Macy break; \ 382eda14cbcSMatt Macy default: \ 383eda14cbcSMatt Macy ZFS_ASM_BUG(); \ 384eda14cbcSMatt Macy } \ 385eda14cbcSMatt Macy } 386eda14cbcSMatt Macy 387eda14cbcSMatt Macy /* 388eda14cbcSMatt Macy * Unfortunately cannot use the macro, because GCC 389eda14cbcSMatt Macy * will try to use the macro name and not value 390eda14cbcSMatt Macy * later on... 391eda14cbcSMatt Macy * Kept as a reference to what a numbered variable is 392eda14cbcSMatt Macy */ 393eda14cbcSMatt Macy #define _00 "v17" 394eda14cbcSMatt Macy #define _1d "v16" 395eda14cbcSMatt Macy #define _temp0 "v19" 396eda14cbcSMatt Macy #define _temp1 "v18" 397eda14cbcSMatt Macy 398eda14cbcSMatt Macy #define MUL2_SETUP() \ 399eda14cbcSMatt Macy { \ 400eda14cbcSMatt Macy __asm( \ 401eda14cbcSMatt Macy "eor " VR(17) ".16b," VR(17) ".16b," VR(17) ".16b\n" \ 402eda14cbcSMatt Macy "movi " VR(16) ".16b,#0x1d\n" \ 403eda14cbcSMatt Macy : WVR(16), WVR(17)); \ 404eda14cbcSMatt Macy } 405eda14cbcSMatt Macy 406eda14cbcSMatt Macy #define MUL2(r...) \ 407eda14cbcSMatt Macy { \ 408eda14cbcSMatt Macy switch (REG_CNT(r)) { \ 409eda14cbcSMatt Macy case 4: \ 410eda14cbcSMatt Macy __asm( \ 411eda14cbcSMatt Macy "cmgt v19.16b," VR(17) ".16b," VR0(r) ".16b\n" \ 412eda14cbcSMatt Macy "cmgt v18.16b," VR(17) ".16b," VR1(r) ".16b\n" \ 413eda14cbcSMatt Macy "cmgt v21.16b," VR(17) ".16b," VR2(r) ".16b\n" \ 414eda14cbcSMatt Macy "cmgt v20.16b," VR(17) ".16b," VR3(r) ".16b\n" \ 415eda14cbcSMatt Macy "and v19.16b,v19.16b," VR(16) ".16b\n" \ 416eda14cbcSMatt Macy "and v18.16b,v18.16b," VR(16) ".16b\n" \ 417eda14cbcSMatt Macy "and v21.16b,v21.16b," VR(16) ".16b\n" \ 418eda14cbcSMatt Macy "and v20.16b,v20.16b," VR(16) ".16b\n" \ 419eda14cbcSMatt Macy "shl " VR0(r) ".16b," VR0(r) ".16b,#1\n" \ 420eda14cbcSMatt Macy "shl " VR1(r) ".16b," VR1(r) ".16b,#1\n" \ 421eda14cbcSMatt Macy "shl " VR2(r) ".16b," VR2(r) ".16b,#1\n" \ 422eda14cbcSMatt Macy "shl " VR3(r) ".16b," VR3(r) ".16b,#1\n" \ 423eda14cbcSMatt Macy "eor " VR0(r) ".16b,v19.16b," VR0(r) ".16b\n" \ 424eda14cbcSMatt Macy "eor " VR1(r) ".16b,v18.16b," VR1(r) ".16b\n" \ 425eda14cbcSMatt Macy "eor " VR2(r) ".16b,v21.16b," VR2(r) ".16b\n" \ 426eda14cbcSMatt Macy "eor " VR3(r) ".16b,v20.16b," VR3(r) ".16b\n" \ 427eda14cbcSMatt Macy : UVR0(r), UVR1(r), UVR2(r), UVR3(r) \ 428eda14cbcSMatt Macy : RVR(17), RVR(16) \ 429eda14cbcSMatt Macy : "v18", "v19", "v20", "v21"); \ 430eda14cbcSMatt Macy break; \ 431eda14cbcSMatt Macy case 2: \ 432eda14cbcSMatt Macy __asm( \ 433eda14cbcSMatt Macy "cmgt v19.16b," VR(17) ".16b," VR0(r) ".16b\n" \ 434eda14cbcSMatt Macy "cmgt v18.16b," VR(17) ".16b," VR1(r) ".16b\n" \ 435eda14cbcSMatt Macy "and v19.16b,v19.16b," VR(16) ".16b\n" \ 436eda14cbcSMatt Macy "and v18.16b,v18.16b," VR(16) ".16b\n" \ 437eda14cbcSMatt Macy "shl " VR0(r) ".16b," VR0(r) ".16b,#1\n" \ 438eda14cbcSMatt Macy "shl " VR1(r) ".16b," VR1(r) ".16b,#1\n" \ 439eda14cbcSMatt Macy "eor " VR0(r) ".16b,v19.16b," VR0(r) ".16b\n" \ 440eda14cbcSMatt Macy "eor " VR1(r) ".16b,v18.16b," VR1(r) ".16b\n" \ 441eda14cbcSMatt Macy : UVR0(r), UVR1(r) \ 442eda14cbcSMatt Macy : RVR(17), RVR(16) \ 443eda14cbcSMatt Macy : "v18", "v19"); \ 444eda14cbcSMatt Macy break; \ 445eda14cbcSMatt Macy default: \ 446eda14cbcSMatt Macy ZFS_ASM_BUG(); \ 447eda14cbcSMatt Macy } \ 448eda14cbcSMatt Macy } 449eda14cbcSMatt Macy 450eda14cbcSMatt Macy #define MUL4(r...) \ 451eda14cbcSMatt Macy { \ 452eda14cbcSMatt Macy MUL2(r); \ 453eda14cbcSMatt Macy MUL2(r); \ 454eda14cbcSMatt Macy } 455eda14cbcSMatt Macy 456eda14cbcSMatt Macy /* 457eda14cbcSMatt Macy * Unfortunately cannot use the macro, because GCC 458eda14cbcSMatt Macy * will try to use the macro name and not value 459eda14cbcSMatt Macy * later on... 460eda14cbcSMatt Macy * Kept as a reference to what a register is 461eda14cbcSMatt Macy * (here we're using actual registers for the 462eda14cbcSMatt Macy * clobbered ones) 463eda14cbcSMatt Macy */ 464eda14cbcSMatt Macy #define _0f "v15" 465eda14cbcSMatt Macy #define _a_save "v14" 466eda14cbcSMatt Macy #define _b_save "v13" 467eda14cbcSMatt Macy #define _lt_mod_a "v12" 468eda14cbcSMatt Macy #define _lt_clmul_a "v11" 469eda14cbcSMatt Macy #define _lt_mod_b "v10" 470eda14cbcSMatt Macy #define _lt_clmul_b "v15" 471eda14cbcSMatt Macy 472eda14cbcSMatt Macy #define _MULx2(c, r...) \ 473eda14cbcSMatt Macy { \ 474eda14cbcSMatt Macy switch (REG_CNT(r)) { \ 475eda14cbcSMatt Macy case 2: \ 476eda14cbcSMatt Macy __asm( \ 477eda14cbcSMatt Macy /* lts for upper part */ \ 478eda14cbcSMatt Macy "movi v15.16b,#0x0f\n" \ 479eda14cbcSMatt Macy "ld1 { v10.4s },%[lt0]\n" \ 480eda14cbcSMatt Macy "ld1 { v11.4s },%[lt1]\n" \ 481eda14cbcSMatt Macy /* upper part */ \ 482eda14cbcSMatt Macy "and v14.16b," VR0(r) ".16b,v15.16b\n" \ 483eda14cbcSMatt Macy "and v13.16b," VR1(r) ".16b,v15.16b\n" \ 484eda14cbcSMatt Macy "ushr " VR0(r) ".16b," VR0(r) ".16b,#4\n" \ 485eda14cbcSMatt Macy "ushr " VR1(r) ".16b," VR1(r) ".16b,#4\n" \ 486eda14cbcSMatt Macy \ 487eda14cbcSMatt Macy "tbl v12.16b,{v10.16b}," VR0(r) ".16b\n" \ 488eda14cbcSMatt Macy "tbl v10.16b,{v10.16b}," VR1(r) ".16b\n" \ 489eda14cbcSMatt Macy "tbl v15.16b,{v11.16b}," VR0(r) ".16b\n" \ 490eda14cbcSMatt Macy "tbl v11.16b,{v11.16b}," VR1(r) ".16b\n" \ 491eda14cbcSMatt Macy \ 492eda14cbcSMatt Macy "eor " VR0(r) ".16b,v15.16b,v12.16b\n" \ 493eda14cbcSMatt Macy "eor " VR1(r) ".16b,v11.16b,v10.16b\n" \ 494eda14cbcSMatt Macy /* lts for lower part */ \ 495eda14cbcSMatt Macy "ld1 { v10.4s },%[lt2]\n" \ 496eda14cbcSMatt Macy "ld1 { v15.4s },%[lt3]\n" \ 497eda14cbcSMatt Macy /* lower part */ \ 498eda14cbcSMatt Macy "tbl v12.16b,{v10.16b},v14.16b\n" \ 499eda14cbcSMatt Macy "tbl v10.16b,{v10.16b},v13.16b\n" \ 500eda14cbcSMatt Macy "tbl v11.16b,{v15.16b},v14.16b\n" \ 501eda14cbcSMatt Macy "tbl v15.16b,{v15.16b},v13.16b\n" \ 502eda14cbcSMatt Macy \ 503eda14cbcSMatt Macy "eor " VR0(r) ".16b," VR0(r) ".16b,v12.16b\n" \ 504eda14cbcSMatt Macy "eor " VR1(r) ".16b," VR1(r) ".16b,v10.16b\n" \ 505eda14cbcSMatt Macy "eor " VR0(r) ".16b," VR0(r) ".16b,v11.16b\n" \ 506eda14cbcSMatt Macy "eor " VR1(r) ".16b," VR1(r) ".16b,v15.16b\n" \ 507eda14cbcSMatt Macy : UVR0(r), UVR1(r) \ 508eda14cbcSMatt Macy : [lt0] "Q" ((gf_clmul_mod_lt[4*(c)+0][0])), \ 509eda14cbcSMatt Macy [lt1] "Q" ((gf_clmul_mod_lt[4*(c)+1][0])), \ 510eda14cbcSMatt Macy [lt2] "Q" ((gf_clmul_mod_lt[4*(c)+2][0])), \ 511eda14cbcSMatt Macy [lt3] "Q" ((gf_clmul_mod_lt[4*(c)+3][0])) \ 512eda14cbcSMatt Macy : "v10", "v11", "v12", "v13", "v14", "v15"); \ 513eda14cbcSMatt Macy break; \ 514eda14cbcSMatt Macy default: \ 515eda14cbcSMatt Macy ZFS_ASM_BUG(); \ 516eda14cbcSMatt Macy } \ 517eda14cbcSMatt Macy } 518eda14cbcSMatt Macy 519eda14cbcSMatt Macy #define MUL(c, r...) \ 520eda14cbcSMatt Macy { \ 521eda14cbcSMatt Macy switch (REG_CNT(r)) { \ 522eda14cbcSMatt Macy case 4: \ 523eda14cbcSMatt Macy _MULx2(c, R_23(r)); \ 524eda14cbcSMatt Macy _MULx2(c, R_01(r)); \ 525eda14cbcSMatt Macy break; \ 526eda14cbcSMatt Macy case 2: \ 527eda14cbcSMatt Macy _MULx2(c, R_01(r)); \ 528eda14cbcSMatt Macy break; \ 529eda14cbcSMatt Macy default: \ 530eda14cbcSMatt Macy ZFS_ASM_BUG(); \ 531eda14cbcSMatt Macy } \ 532eda14cbcSMatt Macy } 533eda14cbcSMatt Macy 534eda14cbcSMatt Macy #define raidz_math_begin() kfpu_begin() 535eda14cbcSMatt Macy #define raidz_math_end() kfpu_end() 536eda14cbcSMatt Macy 537eda14cbcSMatt Macy /* Overkill... */ 538eda14cbcSMatt Macy #if defined(_KERNEL) 539eda14cbcSMatt Macy #define GEN_X_DEFINE_0_3() \ 540eda14cbcSMatt Macy register unsigned char w0 asm("v0") __attribute__((vector_size(16))); \ 541eda14cbcSMatt Macy register unsigned char w1 asm("v1") __attribute__((vector_size(16))); \ 542eda14cbcSMatt Macy register unsigned char w2 asm("v2") __attribute__((vector_size(16))); \ 543eda14cbcSMatt Macy register unsigned char w3 asm("v3") __attribute__((vector_size(16))); 544eda14cbcSMatt Macy #define GEN_X_DEFINE_4_5() \ 545eda14cbcSMatt Macy register unsigned char w4 asm("v4") __attribute__((vector_size(16))); \ 546eda14cbcSMatt Macy register unsigned char w5 asm("v5") __attribute__((vector_size(16))); 547eda14cbcSMatt Macy #define GEN_X_DEFINE_6_7() \ 548eda14cbcSMatt Macy register unsigned char w6 asm("v6") __attribute__((vector_size(16))); \ 549eda14cbcSMatt Macy register unsigned char w7 asm("v7") __attribute__((vector_size(16))); 550eda14cbcSMatt Macy #define GEN_X_DEFINE_8_9() \ 551eda14cbcSMatt Macy register unsigned char w8 asm("v8") __attribute__((vector_size(16))); \ 552eda14cbcSMatt Macy register unsigned char w9 asm("v9") __attribute__((vector_size(16))); 553eda14cbcSMatt Macy #define GEN_X_DEFINE_10_11() \ 554eda14cbcSMatt Macy register unsigned char w10 asm("v10") __attribute__((vector_size(16))); \ 555eda14cbcSMatt Macy register unsigned char w11 asm("v11") __attribute__((vector_size(16))); 556eda14cbcSMatt Macy #define GEN_X_DEFINE_12_15() \ 557eda14cbcSMatt Macy register unsigned char w12 asm("v12") __attribute__((vector_size(16))); \ 558eda14cbcSMatt Macy register unsigned char w13 asm("v13") __attribute__((vector_size(16))); \ 559eda14cbcSMatt Macy register unsigned char w14 asm("v14") __attribute__((vector_size(16))); \ 560eda14cbcSMatt Macy register unsigned char w15 asm("v15") __attribute__((vector_size(16))); 561eda14cbcSMatt Macy #define GEN_X_DEFINE_16() \ 562eda14cbcSMatt Macy register unsigned char w16 asm("v16") __attribute__((vector_size(16))); 563eda14cbcSMatt Macy #define GEN_X_DEFINE_17() \ 564eda14cbcSMatt Macy register unsigned char w17 asm("v17") __attribute__((vector_size(16))); 565eda14cbcSMatt Macy #define GEN_X_DEFINE_18_21() \ 566eda14cbcSMatt Macy register unsigned char w18 asm("v18") __attribute__((vector_size(16))); \ 567eda14cbcSMatt Macy register unsigned char w19 asm("v19") __attribute__((vector_size(16))); \ 568eda14cbcSMatt Macy register unsigned char w20 asm("v20") __attribute__((vector_size(16))); \ 569eda14cbcSMatt Macy register unsigned char w21 asm("v21") __attribute__((vector_size(16))); 570eda14cbcSMatt Macy #define GEN_X_DEFINE_22_23() \ 571eda14cbcSMatt Macy register unsigned char w22 asm("v22") __attribute__((vector_size(16))); \ 572eda14cbcSMatt Macy register unsigned char w23 asm("v23") __attribute__((vector_size(16))); 573eda14cbcSMatt Macy #define GEN_X_DEFINE_24_27() \ 574eda14cbcSMatt Macy register unsigned char w24 asm("v24") __attribute__((vector_size(16))); \ 575eda14cbcSMatt Macy register unsigned char w25 asm("v25") __attribute__((vector_size(16))); \ 576eda14cbcSMatt Macy register unsigned char w26 asm("v26") __attribute__((vector_size(16))); \ 577eda14cbcSMatt Macy register unsigned char w27 asm("v27") __attribute__((vector_size(16))); 578eda14cbcSMatt Macy #define GEN_X_DEFINE_28_30() \ 579eda14cbcSMatt Macy register unsigned char w28 asm("v28") __attribute__((vector_size(16))); \ 580eda14cbcSMatt Macy register unsigned char w29 asm("v29") __attribute__((vector_size(16))); \ 581eda14cbcSMatt Macy register unsigned char w30 asm("v30") __attribute__((vector_size(16))); 582eda14cbcSMatt Macy #define GEN_X_DEFINE_31() \ 583eda14cbcSMatt Macy register unsigned char w31 asm("v31") __attribute__((vector_size(16))); 584eda14cbcSMatt Macy #define GEN_X_DEFINE_32() \ 585eda14cbcSMatt Macy register unsigned char w32 asm("v31") __attribute__((vector_size(16))); 586eda14cbcSMatt Macy #define GEN_X_DEFINE_33_36() \ 587eda14cbcSMatt Macy register unsigned char w33 asm("v31") __attribute__((vector_size(16))); \ 588eda14cbcSMatt Macy register unsigned char w34 asm("v31") __attribute__((vector_size(16))); \ 589eda14cbcSMatt Macy register unsigned char w35 asm("v31") __attribute__((vector_size(16))); \ 590eda14cbcSMatt Macy register unsigned char w36 asm("v31") __attribute__((vector_size(16))); 591eda14cbcSMatt Macy #define GEN_X_DEFINE_37_38() \ 592eda14cbcSMatt Macy register unsigned char w37 asm("v31") __attribute__((vector_size(16))); \ 593eda14cbcSMatt Macy register unsigned char w38 asm("v31") __attribute__((vector_size(16))); 594eda14cbcSMatt Macy #define GEN_X_DEFINE_ALL() \ 595eda14cbcSMatt Macy GEN_X_DEFINE_0_3() \ 596eda14cbcSMatt Macy GEN_X_DEFINE_4_5() \ 597eda14cbcSMatt Macy GEN_X_DEFINE_6_7() \ 598eda14cbcSMatt Macy GEN_X_DEFINE_8_9() \ 599eda14cbcSMatt Macy GEN_X_DEFINE_10_11() \ 600eda14cbcSMatt Macy GEN_X_DEFINE_12_15() \ 601eda14cbcSMatt Macy GEN_X_DEFINE_16() \ 602eda14cbcSMatt Macy GEN_X_DEFINE_17() \ 603eda14cbcSMatt Macy GEN_X_DEFINE_18_21() \ 604eda14cbcSMatt Macy GEN_X_DEFINE_22_23() \ 605eda14cbcSMatt Macy GEN_X_DEFINE_24_27() \ 606eda14cbcSMatt Macy GEN_X_DEFINE_28_30() \ 607eda14cbcSMatt Macy GEN_X_DEFINE_31() \ 608eda14cbcSMatt Macy GEN_X_DEFINE_32() \ 609eda14cbcSMatt Macy GEN_X_DEFINE_33_36() \ 610eda14cbcSMatt Macy GEN_X_DEFINE_37_38() 611eda14cbcSMatt Macy #else 612eda14cbcSMatt Macy #define GEN_X_DEFINE_0_3() \ 613eda14cbcSMatt Macy unsigned char w0 __attribute__((vector_size(16))); \ 614eda14cbcSMatt Macy unsigned char w1 __attribute__((vector_size(16))); \ 615eda14cbcSMatt Macy unsigned char w2 __attribute__((vector_size(16))); \ 616eda14cbcSMatt Macy unsigned char w3 __attribute__((vector_size(16))); 617eda14cbcSMatt Macy #define GEN_X_DEFINE_4_5() \ 618eda14cbcSMatt Macy unsigned char w4 __attribute__((vector_size(16))); \ 619eda14cbcSMatt Macy unsigned char w5 __attribute__((vector_size(16))); 620eda14cbcSMatt Macy #define GEN_X_DEFINE_6_7() \ 621eda14cbcSMatt Macy unsigned char w6 __attribute__((vector_size(16))); \ 622eda14cbcSMatt Macy unsigned char w7 __attribute__((vector_size(16))); 623eda14cbcSMatt Macy #define GEN_X_DEFINE_8_9() \ 624eda14cbcSMatt Macy unsigned char w8 __attribute__((vector_size(16))); \ 625eda14cbcSMatt Macy unsigned char w9 __attribute__((vector_size(16))); 626eda14cbcSMatt Macy #define GEN_X_DEFINE_10_11() \ 627eda14cbcSMatt Macy unsigned char w10 __attribute__((vector_size(16))); \ 628eda14cbcSMatt Macy unsigned char w11 __attribute__((vector_size(16))); 629eda14cbcSMatt Macy #define GEN_X_DEFINE_12_15() \ 630eda14cbcSMatt Macy unsigned char w12 __attribute__((vector_size(16))); \ 631eda14cbcSMatt Macy unsigned char w13 __attribute__((vector_size(16))); \ 632eda14cbcSMatt Macy unsigned char w14 __attribute__((vector_size(16))); \ 633eda14cbcSMatt Macy unsigned char w15 __attribute__((vector_size(16))); 634eda14cbcSMatt Macy #define GEN_X_DEFINE_16() \ 635eda14cbcSMatt Macy unsigned char w16 __attribute__((vector_size(16))); 636eda14cbcSMatt Macy #define GEN_X_DEFINE_17() \ 637eda14cbcSMatt Macy unsigned char w17 __attribute__((vector_size(16))); 638eda14cbcSMatt Macy #define GEN_X_DEFINE_18_21() \ 639eda14cbcSMatt Macy unsigned char w18 __attribute__((vector_size(16))); \ 640eda14cbcSMatt Macy unsigned char w19 __attribute__((vector_size(16))); \ 641eda14cbcSMatt Macy unsigned char w20 __attribute__((vector_size(16))); \ 642eda14cbcSMatt Macy unsigned char w21 __attribute__((vector_size(16))); 643eda14cbcSMatt Macy #define GEN_X_DEFINE_22_23() \ 644eda14cbcSMatt Macy unsigned char w22 __attribute__((vector_size(16))); \ 645eda14cbcSMatt Macy unsigned char w23 __attribute__((vector_size(16))); 646eda14cbcSMatt Macy #define GEN_X_DEFINE_24_27() \ 647eda14cbcSMatt Macy unsigned char w24 __attribute__((vector_size(16))); \ 648eda14cbcSMatt Macy unsigned char w25 __attribute__((vector_size(16))); \ 649eda14cbcSMatt Macy unsigned char w26 __attribute__((vector_size(16))); \ 650eda14cbcSMatt Macy unsigned char w27 __attribute__((vector_size(16))); 651eda14cbcSMatt Macy #define GEN_X_DEFINE_28_30() \ 652eda14cbcSMatt Macy unsigned char w28 __attribute__((vector_size(16))); \ 653eda14cbcSMatt Macy unsigned char w29 __attribute__((vector_size(16))); \ 654eda14cbcSMatt Macy unsigned char w30 __attribute__((vector_size(16))); 655eda14cbcSMatt Macy #define GEN_X_DEFINE_31() \ 656eda14cbcSMatt Macy unsigned char w31 __attribute__((vector_size(16))); 657eda14cbcSMatt Macy #define GEN_X_DEFINE_32() \ 658eda14cbcSMatt Macy unsigned char w32 __attribute__((vector_size(16))); 659eda14cbcSMatt Macy #define GEN_X_DEFINE_33_36() \ 660eda14cbcSMatt Macy unsigned char w33 __attribute__((vector_size(16))); \ 661eda14cbcSMatt Macy unsigned char w34 __attribute__((vector_size(16))); \ 662eda14cbcSMatt Macy unsigned char w35 __attribute__((vector_size(16))); \ 663eda14cbcSMatt Macy unsigned char w36 __attribute__((vector_size(16))); 664eda14cbcSMatt Macy #define GEN_X_DEFINE_37_38() \ 665eda14cbcSMatt Macy unsigned char w37 __attribute__((vector_size(16))); \ 666eda14cbcSMatt Macy unsigned char w38 __attribute__((vector_size(16))); 667eda14cbcSMatt Macy #define GEN_X_DEFINE_ALL() \ 668eda14cbcSMatt Macy GEN_X_DEFINE_0_3() \ 669eda14cbcSMatt Macy GEN_X_DEFINE_4_5() \ 670eda14cbcSMatt Macy GEN_X_DEFINE_6_7() \ 671eda14cbcSMatt Macy GEN_X_DEFINE_8_9() \ 672eda14cbcSMatt Macy GEN_X_DEFINE_10_11() \ 673eda14cbcSMatt Macy GEN_X_DEFINE_12_15() \ 674eda14cbcSMatt Macy GEN_X_DEFINE_16() \ 675eda14cbcSMatt Macy GEN_X_DEFINE_17() \ 676eda14cbcSMatt Macy GEN_X_DEFINE_18_21() \ 677eda14cbcSMatt Macy GEN_X_DEFINE_22_23() \ 678eda14cbcSMatt Macy GEN_X_DEFINE_24_27() \ 679eda14cbcSMatt Macy GEN_X_DEFINE_28_30() \ 680eda14cbcSMatt Macy GEN_X_DEFINE_31() \ 681eda14cbcSMatt Macy GEN_X_DEFINE_32() \ 682eda14cbcSMatt Macy GEN_X_DEFINE_33_36() \ 683eda14cbcSMatt Macy GEN_X_DEFINE_37_38() 684eda14cbcSMatt Macy #endif 685