1 /*
2  ============================================================================
3  Name        : MMIHelpers.h
4  Author      : Heiher <r@hev.cc>
5  Version     : 0.0.1
6  Copyright   : Copyright (c) 2015 everyone.
7  Description : The helpers for x86 SSE to Loongson MMI.
8  ============================================================================
9  */
10 
11 #ifndef __MMI_HELPERS_H__
12 #define __MMI_HELPERS_H__
13 
14 #define __mm_packxxxx(_f, _D, _d, _s, _t)                                     \
15   #_f " %[" #_t "], %[" #_d "h], %[" #_s "h] \n\t" #_f " %[" #_D "l], %[" #_d \
16       "l], %[" #_s                                                            \
17       "l] \n\t"                                                               \
18       "punpckhwd %[" #_D "h], %[" #_D "l], %[" #_t                            \
19       "] \n\t"                                                                \
20       "punpcklwd %[" #_D "l], %[" #_D "l], %[" #_t "] \n\t"
21 
22 #define _mm_or(_D, _d, _s)              \
23   "or %[" #_D "h], %[" #_d "h], %[" #_s \
24   "h] \n\t"                             \
25   "or %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t"
26 
27 #define _mm_xor(_D, _d, _s)              \
28   "xor %[" #_D "h], %[" #_d "h], %[" #_s \
29   "h] \n\t"                              \
30   "xor %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t"
31 
32 #define _mm_and(_D, _d, _s)              \
33   "and %[" #_D "h], %[" #_d "h], %[" #_s \
34   "h] \n\t"                              \
35   "and %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t"
36 
37 /* SSE: pandn */
38 #define _mm_pandn(_D, _d, _s)              \
39   "pandn %[" #_D "h], %[" #_d "h], %[" #_s \
40   "h] \n\t"                                \
41   "pandn %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t"
42 
43 /* SSE: pshuflw */
44 #define _mm_pshuflh(_D, _d, _s) \
45   "mov.d %[" #_D "h], %[" #_d   \
46   "h] \n\t"                     \
47   "pshufh %[" #_D "l], %[" #_d "l], %[" #_s "] \n\t"
48 
49 /* SSE: psllw (bits) */
50 #define _mm_psllh(_D, _d, _s)              \
51   "psllh %[" #_D "h], %[" #_d "h], %[" #_s \
52   "] \n\t"                                 \
53   "psllh %[" #_D "l], %[" #_d "l], %[" #_s "] \n\t"
54 
55 /* SSE: pslld (bits) */
56 #define _mm_psllw(_D, _d, _s)              \
57   "psllw %[" #_D "h], %[" #_d "h], %[" #_s \
58   "] \n\t"                                 \
59   "psllw %[" #_D "l], %[" #_d "l], %[" #_s "] \n\t"
60 
61 /* SSE: psllq (bits) */
62 #define _mm_pslld(_D, _d, _s)             \
63   "dsll %[" #_D "h], %[" #_d "h], %[" #_s \
64   "] \n\t"                                \
65   "dsll %[" #_D "l], %[" #_d "l], %[" #_s "] \n\t"
66 
67 /* SSE: pslldq (bytes) */
68 #define _mm_psllq(_D, _d, _s, _s64, _tf)   \
69   "subu %[" #_tf "], %[" #_s64 "], %[" #_s \
70   "] \n\t"                                 \
71   "dsrl %[" #_tf "], %[" #_d "l], %[" #_tf \
72   "] \n\t"                                 \
73   "dsll %[" #_D "h], %[" #_d "h], %[" #_s  \
74   "] \n\t"                                 \
75   "dsll %[" #_D "l], %[" #_d "l], %[" #_s  \
76   "] \n\t"                                 \
77   "or %[" #_D "h], %[" #_D "h], %[" #_tf "] \n\t"
78 
79 /* SSE: psrlw (bits) */
80 #define _mm_psrlh(_D, _d, _s)              \
81   "psrlh %[" #_D "h], %[" #_d "h], %[" #_s \
82   "] \n\t"                                 \
83   "psrlh %[" #_D "l], %[" #_d "l], %[" #_s "] \n\t"
84 
85 /* SSE: psrld (bits) */
86 #define _mm_psrlw(_D, _d, _s)              \
87   "psrlw %[" #_D "h], %[" #_d "h], %[" #_s \
88   "] \n\t"                                 \
89   "psrlw %[" #_D "l], %[" #_d "l], %[" #_s "] \n\t"
90 
91 /* SSE: psrlq (bits) */
92 #define _mm_psrld(_D, _d, _s)             \
93   "dsrl %[" #_D "h], %[" #_d "h], %[" #_s \
94   "] \n\t"                                \
95   "dsrl %[" #_D "l], %[" #_d "l], %[" #_s "] \n\t"
96 
97 /* SSE: psrldq (bytes) */
98 #define _mm_psrlq(_D, _d, _s, _s64, _tf)   \
99   "subu %[" #_tf "], %[" #_s64 "], %[" #_s \
100   "] \n\t"                                 \
101   "dsll %[" #_tf "], %[" #_d "h], %[" #_tf \
102   "] \n\t"                                 \
103   "dsrl %[" #_D "h], %[" #_d "h], %[" #_s  \
104   "] \n\t"                                 \
105   "dsrl %[" #_D "l], %[" #_d "l], %[" #_s  \
106   "] \n\t"                                 \
107   "or %[" #_D "l], %[" #_D "l], %[" #_tf "] \n\t"
108 
109 /* SSE: psrad */
110 #define _mm_psraw(_D, _d, _s)              \
111   "psraw %[" #_D "h], %[" #_d "h], %[" #_s \
112   "] \n\t"                                 \
113   "psraw %[" #_D "l], %[" #_d "l], %[" #_s "] \n\t"
114 
115 /* SSE: paddb */
116 #define _mm_paddb(_D, _d, _s)              \
117   "paddb %[" #_D "h], %[" #_d "h], %[" #_s \
118   "h] \n\t"                                \
119   "paddb %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t"
120 
121 /* SSE: paddw */
122 #define _mm_paddh(_D, _d, _s)              \
123   "paddh %[" #_D "h], %[" #_d "h], %[" #_s \
124   "h] \n\t"                                \
125   "paddh %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t"
126 
127 /* SSE: paddd */
128 #define _mm_paddw(_D, _d, _s)              \
129   "paddw %[" #_D "h], %[" #_d "h], %[" #_s \
130   "h] \n\t"                                \
131   "paddw %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t"
132 
133 /* SSE: paddq */
134 #define _mm_paddd(_D, _d, _s)             \
135   "dadd %[" #_D "h], %[" #_d "h], %[" #_s \
136   "h] \n\t"                               \
137   "dadd %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t"
138 
139 /* SSE: psubw */
140 #define _mm_psubh(_D, _d, _s)              \
141   "psubh %[" #_D "h], %[" #_d "h], %[" #_s \
142   "h] \n\t"                                \
143   "psubh %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t"
144 
145 /* SSE: psubd */
146 #define _mm_psubw(_D, _d, _s)              \
147   "psubw %[" #_D "h], %[" #_d "h], %[" #_s \
148   "h] \n\t"                                \
149   "psubw %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t"
150 
151 /* SSE: pmaxub */
152 #define _mm_pmaxub(_D, _d, _s)              \
153   "pmaxub %[" #_D "h], %[" #_d "h], %[" #_s \
154   "h] \n\t"                                 \
155   "pmaxub %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t"
156 
157 /* SSE: pmullw */
158 #define _mm_pmullh(_D, _d, _s)              \
159   "pmullh %[" #_D "h], %[" #_d "h], %[" #_s \
160   "h] \n\t"                                 \
161   "pmullh %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t"
162 
163 /* SSE: pmulhw */
164 #define _mm_pmulhh(_D, _d, _s)              \
165   "pmulhh %[" #_D "h], %[" #_d "h], %[" #_s \
166   "h] \n\t"                                 \
167   "pmulhh %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t"
168 
169 /* SSE: pmuludq */
170 #define _mm_pmuluw(_D, _d, _s)              \
171   "pmuluw %[" #_D "h], %[" #_d "h], %[" #_s \
172   "h] \n\t"                                 \
173   "pmuluw %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t"
174 
175 /* SSE: packsswb */
176 #define _mm_packsshb(_D, _d, _s, _t) __mm_packxxxx(packsshb, _D, _d, _s, _t)
177 
178 /* SSE: packssdw */
179 #define _mm_packsswh(_D, _d, _s, _t) __mm_packxxxx(packsswh, _D, _d, _s, _t)
180 
181 /* SSE: packuswb */
182 #define _mm_packushb(_D, _d, _s, _t) __mm_packxxxx(packushb, _D, _d, _s, _t)
183 
184 /* SSE: punpcklbw */
185 #define _mm_punpcklbh(_D, _d, _s)              \
186   "punpckhbh %[" #_D "h], %[" #_d "l], %[" #_s \
187   "l] \n\t"                                    \
188   "punpcklbh %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t"
189 
190 /* SSE: punpcklwd */
191 #define _mm_punpcklhw(_D, _d, _s)              \
192   "punpckhhw %[" #_D "h], %[" #_d "l], %[" #_s \
193   "l] \n\t"                                    \
194   "punpcklhw %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t"
195 
196 /* SSE: punpckldq */
197 #define _mm_punpcklwd(_D, _d, _s)              \
198   "punpckhwd %[" #_D "h], %[" #_d "l], %[" #_s \
199   "l] \n\t"                                    \
200   "punpcklwd %[" #_D "l], %[" #_d "l], %[" #_s "l] \n\t"
201 
202 /* SSE: punpcklqdq */
203 #define _mm_punpckldq(_D, _d, _s) \
204   "mov.d %[" #_D "h], %[" #_s     \
205   "l] \n\t"                       \
206   "mov.d %[" #_D "l], %[" #_d "l] \n\t"
207 
208 /* SSE: punpckhbw */
209 #define _mm_punpckhbh(_D, _d, _s)              \
210   "punpcklbh %[" #_D "l], %[" #_d "h], %[" #_s \
211   "h] \n\t"                                    \
212   "punpckhbh %[" #_D "h], %[" #_d "h], %[" #_s "h] \n\t"
213 
214 /* SSE: punpckhwd */
215 #define _mm_punpckhhw(_D, _d, _s)              \
216   "punpcklhw %[" #_D "l], %[" #_d "h], %[" #_s \
217   "h] \n\t"                                    \
218   "punpckhhw %[" #_D "h], %[" #_d "h], %[" #_s "h] \n\t"
219 
220 /* SSE: punpckhdq */
221 #define _mm_punpckhwd(_D, _d, _s)              \
222   "punpcklwd %[" #_D "l], %[" #_d "h], %[" #_s \
223   "h] \n\t"                                    \
224   "punpckhwd %[" #_D "h], %[" #_d "h], %[" #_s "h] \n\t"
225 
226 /* SSE: punpckhqdq */
227 #define _mm_punpckhdq(_D, _d, _s) \
228   "mov.d %[" #_D "l], %[" #_d     \
229   "h] \n\t"                       \
230   "mov.d %[" #_D "h], %[" #_s "h] \n\t"
231 
232 #endif /* __MMI_HELPERS_H__ */
233