1 /* 2 ============================================================================ 3 Name : MMIHelpers.h 4 Author : Heiher <r@hev.cc> 5 Version : 0.0.1 6 Copyright : Copyright (c) 2015 everyone. 7 Description : The helpers for x86 SSE to Loongson MMI. 8 ============================================================================ 9 */ 10 11 #ifndef __MMI_HELPERS_H__ 12 #define __MMI_HELPERS_H__ 13 14 #define __mm_packxxxx(_f, _D, _d, _s, _t) \ 15 #_f" %["#_t"], %["#_d"h], %["#_s"h] \n\t" \ 16 #_f" %["#_D"l], %["#_d"l], %["#_s"l] \n\t" \ 17 "punpckhwd %["#_D"h], %["#_D"l], %["#_t"] \n\t" \ 18 "punpcklwd %["#_D"l], %["#_D"l], %["#_t"] \n\t" 19 20 #define _mm_or(_D, _d, _s) \ 21 "or %["#_D"h], %["#_d"h], %["#_s"h] \n\t" \ 22 "or %["#_D"l], %["#_d"l], %["#_s"l] \n\t" 23 24 #define _mm_xor(_D, _d, _s) \ 25 "xor %["#_D"h], %["#_d"h], %["#_s"h] \n\t" \ 26 "xor %["#_D"l], %["#_d"l], %["#_s"l] \n\t" 27 28 #define _mm_and(_D, _d, _s) \ 29 "and %["#_D"h], %["#_d"h], %["#_s"h] \n\t" \ 30 "and %["#_D"l], %["#_d"l], %["#_s"l] \n\t" 31 32 /* SSE: pandn */ 33 #define _mm_pandn(_D, _d, _s) \ 34 "pandn %["#_D"h], %["#_d"h], %["#_s"h] \n\t" \ 35 "pandn %["#_D"l], %["#_d"l], %["#_s"l] \n\t" 36 37 /* SSE: pshuflw */ 38 #define _mm_pshuflh(_D, _d, _s) \ 39 "mov.d %["#_D"h], %["#_d"h] \n\t" \ 40 "pshufh %["#_D"l], %["#_d"l], %["#_s"] \n\t" 41 42 /* SSE: psllw (bits) */ 43 #define _mm_psllh(_D, _d, _s) \ 44 "psllh %["#_D"h], %["#_d"h], %["#_s"] \n\t" \ 45 "psllh %["#_D"l], %["#_d"l], %["#_s"] \n\t" 46 47 /* SSE: pslld (bits) */ 48 #define _mm_psllw(_D, _d, _s) \ 49 "psllw %["#_D"h], %["#_d"h], %["#_s"] \n\t" \ 50 "psllw %["#_D"l], %["#_d"l], %["#_s"] \n\t" 51 52 /* SSE: psllq (bits) */ 53 #define _mm_pslld(_D, _d, _s) \ 54 "dsll %["#_D"h], %["#_d"h], %["#_s"] \n\t" \ 55 "dsll %["#_D"l], %["#_d"l], %["#_s"] \n\t" 56 57 /* SSE: pslldq (bytes) */ 58 #define _mm_psllq(_D, _d, _s, _s64, _tf) \ 59 "subu %["#_tf"], %["#_s64"], %["#_s"] \n\t" \ 60 "dsrl %["#_tf"], %["#_d"l], %["#_tf"] \n\t" \ 61 "dsll %["#_D"h], %["#_d"h], %["#_s"] \n\t" \ 62 "dsll %["#_D"l], %["#_d"l], %["#_s"] \n\t" \ 63 "or %["#_D"h], %["#_D"h], %["#_tf"] \n\t" 64 65 /* SSE: psrlw (bits) */ 66 #define _mm_psrlh(_D, _d, _s) \ 67 "psrlh %["#_D"h], %["#_d"h], %["#_s"] \n\t" \ 68 "psrlh %["#_D"l], %["#_d"l], %["#_s"] \n\t" 69 70 /* SSE: psrld (bits) */ 71 #define _mm_psrlw(_D, _d, _s) \ 72 "psrlw %["#_D"h], %["#_d"h], %["#_s"] \n\t" \ 73 "psrlw %["#_D"l], %["#_d"l], %["#_s"] \n\t" 74 75 /* SSE: psrlq (bits) */ 76 #define _mm_psrld(_D, _d, _s) \ 77 "dsrl %["#_D"h], %["#_d"h], %["#_s"] \n\t" \ 78 "dsrl %["#_D"l], %["#_d"l], %["#_s"] \n\t" 79 80 /* SSE: psrldq (bytes) */ 81 #define _mm_psrlq(_D, _d, _s, _s64, _tf) \ 82 "subu %["#_tf"], %["#_s64"], %["#_s"] \n\t" \ 83 "dsll %["#_tf"], %["#_d"h], %["#_tf"] \n\t" \ 84 "dsrl %["#_D"h], %["#_d"h], %["#_s"] \n\t" \ 85 "dsrl %["#_D"l], %["#_d"l], %["#_s"] \n\t" \ 86 "or %["#_D"l], %["#_D"l], %["#_tf"] \n\t" 87 88 /* SSE: psrad */ 89 #define _mm_psraw(_D, _d, _s) \ 90 "psraw %["#_D"h], %["#_d"h], %["#_s"] \n\t" \ 91 "psraw %["#_D"l], %["#_d"l], %["#_s"] \n\t" 92 93 /* SSE: paddb */ 94 #define _mm_paddb(_D, _d, _s) \ 95 "paddb %["#_D"h], %["#_d"h], %["#_s"h] \n\t" \ 96 "paddb %["#_D"l], %["#_d"l], %["#_s"l] \n\t" 97 98 /* SSE: paddw */ 99 #define _mm_paddh(_D, _d, _s) \ 100 "paddh %["#_D"h], %["#_d"h], %["#_s"h] \n\t" \ 101 "paddh %["#_D"l], %["#_d"l], %["#_s"l] \n\t" 102 103 /* SSE: paddd */ 104 #define _mm_paddw(_D, _d, _s) \ 105 "paddw %["#_D"h], %["#_d"h], %["#_s"h] \n\t" \ 106 "paddw %["#_D"l], %["#_d"l], %["#_s"l] \n\t" 107 108 /* SSE: paddq */ 109 #define _mm_paddd(_D, _d, _s) \ 110 "dadd %["#_D"h], %["#_d"h], %["#_s"h] \n\t" \ 111 "dadd %["#_D"l], %["#_d"l], %["#_s"l] \n\t" 112 113 /* SSE: psubw */ 114 #define _mm_psubh(_D, _d, _s) \ 115 "psubh %["#_D"h], %["#_d"h], %["#_s"h] \n\t" \ 116 "psubh %["#_D"l], %["#_d"l], %["#_s"l] \n\t" 117 118 /* SSE: psubd */ 119 #define _mm_psubw(_D, _d, _s) \ 120 "psubw %["#_D"h], %["#_d"h], %["#_s"h] \n\t" \ 121 "psubw %["#_D"l], %["#_d"l], %["#_s"l] \n\t" 122 123 /* SSE: pmaxub */ 124 #define _mm_pmaxub(_D, _d, _s) \ 125 "pmaxub %["#_D"h], %["#_d"h], %["#_s"h] \n\t" \ 126 "pmaxub %["#_D"l], %["#_d"l], %["#_s"l] \n\t" 127 128 /* SSE: pmullw */ 129 #define _mm_pmullh(_D, _d, _s) \ 130 "pmullh %["#_D"h], %["#_d"h], %["#_s"h] \n\t" \ 131 "pmullh %["#_D"l], %["#_d"l], %["#_s"l] \n\t" 132 133 /* SSE: pmulhw */ 134 #define _mm_pmulhh(_D, _d, _s) \ 135 "pmulhh %["#_D"h], %["#_d"h], %["#_s"h] \n\t" \ 136 "pmulhh %["#_D"l], %["#_d"l], %["#_s"l] \n\t" 137 138 /* SSE: pmuludq */ 139 #define _mm_pmuluw(_D, _d, _s) \ 140 "pmuluw %["#_D"h], %["#_d"h], %["#_s"h] \n\t" \ 141 "pmuluw %["#_D"l], %["#_d"l], %["#_s"l] \n\t" 142 143 /* SSE: packsswb */ 144 #define _mm_packsshb(_D, _d, _s, _t) \ 145 __mm_packxxxx(packsshb, _D, _d, _s, _t) 146 147 /* SSE: packssdw */ 148 #define _mm_packsswh(_D, _d, _s, _t) \ 149 __mm_packxxxx(packsswh, _D, _d, _s, _t) 150 151 /* SSE: packuswb */ 152 #define _mm_packushb(_D, _d, _s, _t) \ 153 __mm_packxxxx(packushb, _D, _d, _s, _t) 154 155 /* SSE: punpcklbw */ 156 #define _mm_punpcklbh(_D, _d, _s) \ 157 "punpckhbh %["#_D"h], %["#_d"l], %["#_s"l] \n\t" \ 158 "punpcklbh %["#_D"l], %["#_d"l], %["#_s"l] \n\t" 159 160 /* SSE: punpcklwd */ 161 #define _mm_punpcklhw(_D, _d, _s) \ 162 "punpckhhw %["#_D"h], %["#_d"l], %["#_s"l] \n\t" \ 163 "punpcklhw %["#_D"l], %["#_d"l], %["#_s"l] \n\t" 164 165 /* SSE: punpckldq */ 166 #define _mm_punpcklwd(_D, _d, _s) \ 167 "punpckhwd %["#_D"h], %["#_d"l], %["#_s"l] \n\t" \ 168 "punpcklwd %["#_D"l], %["#_d"l], %["#_s"l] \n\t" 169 170 /* SSE: punpcklqdq */ 171 #define _mm_punpckldq(_D, _d, _s) \ 172 "mov.d %["#_D"h], %["#_s"l] \n\t" \ 173 "mov.d %["#_D"l], %["#_d"l] \n\t" 174 175 /* SSE: punpckhbw */ 176 #define _mm_punpckhbh(_D, _d, _s) \ 177 "punpcklbh %["#_D"l], %["#_d"h], %["#_s"h] \n\t" \ 178 "punpckhbh %["#_D"h], %["#_d"h], %["#_s"h] \n\t" 179 180 /* SSE: punpckhwd */ 181 #define _mm_punpckhhw(_D, _d, _s) \ 182 "punpcklhw %["#_D"l], %["#_d"h], %["#_s"h] \n\t" \ 183 "punpckhhw %["#_D"h], %["#_d"h], %["#_s"h] \n\t" 184 185 /* SSE: punpckhdq */ 186 #define _mm_punpckhwd(_D, _d, _s) \ 187 "punpcklwd %["#_D"l], %["#_d"h], %["#_s"h] \n\t" \ 188 "punpckhwd %["#_D"h], %["#_d"h], %["#_s"h] \n\t" 189 190 /* SSE: punpckhqdq */ 191 #define _mm_punpckhdq(_D, _d, _s) \ 192 "mov.d %["#_D"l], %["#_d"h] \n\t" \ 193 "mov.d %["#_D"h], %["#_s"h] \n\t" 194 195 #endif /* __MMI_HELPERS_H__ */ 196 197