1 /* 2 ============================================================================ 3 Name : MMIHelpers.h 4 Author : Heiher <r@hev.cc> 5 Version : 0.0.1 6 Copyright : Copyright (c) 2015 everyone. 7 Description : The helpers for x86 SSE to Loongson MMI. 8 ============================================================================ 9 */ 10 11 #ifndef __MMI_HELPERS_H__ 12 #define __MMI_HELPERS_H__ 13 14 #define __mm_packxxxx(_f, _D, _d, _s, _t) \ 15 # _f " %[" # _t "], %[" # _d "h], %[" # _s "h] \n\t" # _f " %[" # _D \ 16 "l], %[" # _d "l], %[" # _s \ 17 "l] \n\t" \ 18 "punpckhwd %[" # _D "h], %[" # _D "l], %[" # _t \ 19 "] \n\t" \ 20 "punpcklwd %[" # _D "l], %[" # _D "l], %[" # _t "] \n\t" 21 22 #define _mm_or(_D, _d, _s) \ 23 "or %[" #_D "h], %[" #_d "h], %[" #_s \ 24 "h] \n\t" \ 25 "or %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t" 26 27 #define _mm_xor(_D, _d, _s) \ 28 "xor %[" #_D "h], %[" #_d "h], %[" #_s \ 29 "h] \n\t" \ 30 "xor %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t" 31 32 #define _mm_and(_D, _d, _s) \ 33 "and %[" #_D "h], %[" #_d "h], %[" #_s \ 34 "h] \n\t" \ 35 "and %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t" 36 37 /* SSE: pandn */ 38 #define _mm_pandn(_D, _d, _s) \ 39 "pandn %[" #_D "h], %[" #_d "h], %[" #_s \ 40 "h] \n\t" \ 41 "pandn %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t" 42 43 /* SSE: pshuflw */ 44 #define _mm_pshuflh(_D, _d, _s) \ 45 "mov.d %[" #_D "h], %[" #_d \ 46 "h] \n\t" \ 47 "pshufh %[" #_D "l], %[" #_d "l], %[" #_s "] \n\t" 48 49 /* SSE: psllw (bits) */ 50 #define _mm_psllh(_D, _d, _s) \ 51 "psllh %[" #_D "h], %[" #_d "h], %[" #_s \ 52 "] \n\t" \ 53 "psllh %[" #_D "l], %[" #_d "l], %[" #_s "] \n\t" 54 55 /* SSE: pslld (bits) */ 56 #define _mm_psllw(_D, _d, _s) \ 57 "psllw %[" #_D "h], %[" #_d "h], %[" #_s \ 58 "] \n\t" \ 59 "psllw %[" #_D "l], %[" #_d "l], %[" #_s "] \n\t" 60 61 /* SSE: psllq (bits) */ 62 #define _mm_pslld(_D, _d, _s) \ 63 "dsll %[" #_D "h], %[" #_d "h], %[" #_s \ 64 "] \n\t" \ 65 "dsll %[" #_D "l], %[" #_d "l], %[" #_s "] \n\t" 66 67 /* SSE: pslldq (bytes) */ 68 #define _mm_psllq(_D, _d, _s, _s64, _tf) \ 69 "subu %[" #_tf "], %[" #_s64 "], %[" #_s \ 70 "] \n\t" \ 71 "dsrl %[" #_tf "], %[" #_d "l], %[" #_tf \ 72 "] \n\t" \ 73 "dsll %[" #_D "h], %[" #_d "h], %[" #_s \ 74 "] \n\t" \ 75 "dsll %[" #_D "l], %[" #_d "l], %[" #_s \ 76 "] \n\t" \ 77 "or %[" #_D "h], %[" #_D "h], %[" #_tf "] \n\t" 78 79 /* SSE: psrlw (bits) */ 80 #define _mm_psrlh(_D, _d, _s) \ 81 "psrlh %[" #_D "h], %[" #_d "h], %[" #_s \ 82 "] \n\t" \ 83 "psrlh %[" #_D "l], %[" #_d "l], %[" #_s "] \n\t" 84 85 /* SSE: psrld (bits) */ 86 #define _mm_psrlw(_D, _d, _s) \ 87 "psrlw %[" #_D "h], %[" #_d "h], %[" #_s \ 88 "] \n\t" \ 89 "psrlw %[" #_D "l], %[" #_d "l], %[" #_s "] \n\t" 90 91 /* SSE: psrlq (bits) */ 92 #define _mm_psrld(_D, _d, _s) \ 93 "dsrl %[" #_D "h], %[" #_d "h], %[" #_s \ 94 "] \n\t" \ 95 "dsrl %[" #_D "l], %[" #_d "l], %[" #_s "] \n\t" 96 97 /* SSE: psrldq (bytes) */ 98 #define _mm_psrlq(_D, _d, _s, _s64, _tf) \ 99 "subu %[" #_tf "], %[" #_s64 "], %[" #_s \ 100 "] \n\t" \ 101 "dsll %[" #_tf "], %[" #_d "h], %[" #_tf \ 102 "] \n\t" \ 103 "dsrl %[" #_D "h], %[" #_d "h], %[" #_s \ 104 "] \n\t" \ 105 "dsrl %[" #_D "l], %[" #_d "l], %[" #_s \ 106 "] \n\t" \ 107 "or %[" #_D "l], %[" #_D "l], %[" #_tf "] \n\t" 108 109 /* SSE: psrad */ 110 #define _mm_psraw(_D, _d, _s) \ 111 "psraw %[" #_D "h], %[" #_d "h], %[" #_s \ 112 "] \n\t" \ 113 "psraw %[" #_D "l], %[" #_d "l], %[" #_s "] \n\t" 114 115 /* SSE: paddb */ 116 #define _mm_paddb(_D, _d, _s) \ 117 "paddb %[" #_D "h], %[" #_d "h], %[" #_s \ 118 "h] \n\t" \ 119 "paddb %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t" 120 121 /* SSE: paddw */ 122 #define _mm_paddh(_D, _d, _s) \ 123 "paddh %[" #_D "h], %[" #_d "h], %[" #_s \ 124 "h] \n\t" \ 125 "paddh %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t" 126 127 /* SSE: paddd */ 128 #define _mm_paddw(_D, _d, _s) \ 129 "paddw %[" #_D "h], %[" #_d "h], %[" #_s \ 130 "h] \n\t" \ 131 "paddw %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t" 132 133 /* SSE: paddq */ 134 #define _mm_paddd(_D, _d, _s) \ 135 "dadd %[" #_D "h], %[" #_d "h], %[" #_s \ 136 "h] \n\t" \ 137 "dadd %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t" 138 139 /* SSE: psubw */ 140 #define _mm_psubh(_D, _d, _s) \ 141 "psubh %[" #_D "h], %[" #_d "h], %[" #_s \ 142 "h] \n\t" \ 143 "psubh %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t" 144 145 /* SSE: psubd */ 146 #define _mm_psubw(_D, _d, _s) \ 147 "psubw %[" #_D "h], %[" #_d "h], %[" #_s \ 148 "h] \n\t" \ 149 "psubw %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t" 150 151 /* SSE: pmaxub */ 152 #define _mm_pmaxub(_D, _d, _s) \ 153 "pmaxub %[" #_D "h], %[" #_d "h], %[" #_s \ 154 "h] \n\t" \ 155 "pmaxub %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t" 156 157 /* SSE: pmullw */ 158 #define _mm_pmullh(_D, _d, _s) \ 159 "pmullh %[" #_D "h], %[" #_d "h], %[" #_s \ 160 "h] \n\t" \ 161 "pmullh %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t" 162 163 /* SSE: pmulhw */ 164 #define _mm_pmulhh(_D, _d, _s) \ 165 "pmulhh %[" #_D "h], %[" #_d "h], %[" #_s \ 166 "h] \n\t" \ 167 "pmulhh %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t" 168 169 /* SSE: pmuludq */ 170 #define _mm_pmuluw(_D, _d, _s) \ 171 "pmuluw %[" #_D "h], %[" #_d "h], %[" #_s \ 172 "h] \n\t" \ 173 "pmuluw %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t" 174 175 /* SSE: packsswb */ 176 #define _mm_packsshb(_D, _d, _s, _t) __mm_packxxxx(packsshb, _D, _d, _s, _t) 177 178 /* SSE: packssdw */ 179 #define _mm_packsswh(_D, _d, _s, _t) __mm_packxxxx(packsswh, _D, _d, _s, _t) 180 181 /* SSE: packuswb */ 182 #define _mm_packushb(_D, _d, _s, _t) __mm_packxxxx(packushb, _D, _d, _s, _t) 183 184 /* SSE: punpcklbw */ 185 #define _mm_punpcklbh(_D, _d, _s) \ 186 "punpckhbh %[" #_D "h], %[" #_d "l], %[" #_s \ 187 "l] \n\t" \ 188 "punpcklbh %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t" 189 190 /* SSE: punpcklwd */ 191 #define _mm_punpcklhw(_D, _d, _s) \ 192 "punpckhhw %[" #_D "h], %[" #_d "l], %[" #_s \ 193 "l] \n\t" \ 194 "punpcklhw %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t" 195 196 /* SSE: punpckldq */ 197 #define _mm_punpcklwd(_D, _d, _s) \ 198 "punpckhwd %[" #_D "h], %[" #_d "l], %[" #_s \ 199 "l] \n\t" \ 200 "punpcklwd %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t" 201 202 /* SSE: punpcklqdq */ 203 #define _mm_punpckldq(_D, _d, _s) \ 204 "mov.d %[" #_D "h], %[" #_s \ 205 "l] \n\t" \ 206 "mov.d %[" #_D "l], %[" #_d "l] \n\t" 207 208 /* SSE: punpckhbw */ 209 #define _mm_punpckhbh(_D, _d, _s) \ 210 "punpcklbh %[" #_D "l], %[" #_d "h], %[" #_s \ 211 "h] \n\t" \ 212 "punpckhbh %[" #_D "h], %[" #_d "h], %[" #_s "h] \n\t" 213 214 /* SSE: punpckhwd */ 215 #define _mm_punpckhhw(_D, _d, _s) \ 216 "punpcklhw %[" #_D "l], %[" #_d "h], %[" #_s \ 217 "h] \n\t" \ 218 "punpckhhw %[" #_D "h], %[" #_d "h], %[" #_s "h] \n\t" 219 220 /* SSE: punpckhdq */ 221 #define _mm_punpckhwd(_D, _d, _s) \ 222 "punpcklwd %[" #_D "l], %[" #_d "h], %[" #_s \ 223 "h] \n\t" \ 224 "punpckhwd %[" #_D "h], %[" #_d "h], %[" #_s "h] \n\t" 225 226 /* SSE: punpckhqdq */ 227 #define _mm_punpckhdq(_D, _d, _s) \ 228 "mov.d %[" #_D "l], %[" #_d \ 229 "h] \n\t" \ 230 "mov.d %[" #_D "h], %[" #_s "h] \n\t" 231 232 #endif /* __MMI_HELPERS_H__ */ 233