1 /*
2  ============================================================================
3  Name        : MMIHelpers.h
4  Author      : Heiher <r@hev.cc>
5  Version     : 0.0.1
6  Copyright   : Copyright (c) 2015 everyone.
7  Description : The helpers for x86 SSE to Loongson MMI.
8  ============================================================================
9  */
10 
11 #ifndef __MMI_HELPERS_H__
12 #define __MMI_HELPERS_H__
13 
14 #define __mm_packxxxx(_f, _D, _d, _s, _t)                   \
15 	#_f" %["#_t"], %["#_d"h], %["#_s"h] \n\t"           \
16 	#_f" %["#_D"l], %["#_d"l], %["#_s"l] \n\t"          \
17 	"punpckhwd %["#_D"h], %["#_D"l], %["#_t"] \n\t"     \
18 	"punpcklwd %["#_D"l], %["#_D"l], %["#_t"] \n\t"
19 
20 #define _mm_or(_D, _d, _s)                                  \
21 	"or %["#_D"h], %["#_d"h], %["#_s"h] \n\t"           \
22 	"or %["#_D"l], %["#_d"l], %["#_s"l] \n\t"
23 
24 #define _mm_xor(_D, _d, _s)                                 \
25 	"xor %["#_D"h], %["#_d"h], %["#_s"h] \n\t"          \
26 	"xor %["#_D"l], %["#_d"l], %["#_s"l] \n\t"
27 
28 #define _mm_and(_D, _d, _s)                                 \
29 	"and %["#_D"h], %["#_d"h], %["#_s"h] \n\t"          \
30 	"and %["#_D"l], %["#_d"l], %["#_s"l] \n\t"
31 
32 /* SSE: pandn */
33 #define _mm_pandn(_D, _d, _s)                               \
34 	"pandn %["#_D"h], %["#_d"h], %["#_s"h] \n\t"        \
35 	"pandn %["#_D"l], %["#_d"l], %["#_s"l] \n\t"
36 
37 /* SSE: pshuflw */
38 #define _mm_pshuflh(_D, _d, _s)                             \
39 	"mov.d %["#_D"h], %["#_d"h] \n\t"                   \
40 	"pshufh %["#_D"l], %["#_d"l], %["#_s"] \n\t"
41 
42 /* SSE: psllw (bits) */
43 #define _mm_psllh(_D, _d, _s)                               \
44 	"psllh %["#_D"h], %["#_d"h], %["#_s"] \n\t"         \
45 	"psllh %["#_D"l], %["#_d"l], %["#_s"] \n\t"
46 
47 /* SSE: pslld (bits) */
48 #define _mm_psllw(_D, _d, _s)                               \
49 	"psllw %["#_D"h], %["#_d"h], %["#_s"] \n\t"         \
50 	"psllw %["#_D"l], %["#_d"l], %["#_s"] \n\t"
51 
52 /* SSE: psllq (bits) */
53 #define _mm_pslld(_D, _d, _s)                               \
54 	"dsll %["#_D"h], %["#_d"h], %["#_s"] \n\t"          \
55 	"dsll %["#_D"l], %["#_d"l], %["#_s"] \n\t"
56 
57 /* SSE: pslldq (bytes) */
58 #define _mm_psllq(_D, _d, _s, _s64, _tf)                    \
59 	"subu %["#_tf"], %["#_s64"], %["#_s"] \n\t"         \
60 	"dsrl %["#_tf"], %["#_d"l], %["#_tf"] \n\t"         \
61 	"dsll %["#_D"h], %["#_d"h], %["#_s"] \n\t"          \
62 	"dsll %["#_D"l], %["#_d"l], %["#_s"] \n\t"          \
63 	"or %["#_D"h], %["#_D"h], %["#_tf"] \n\t"
64 
65 /* SSE: psrlw (bits) */
66 #define _mm_psrlh(_D, _d, _s)                               \
67 	"psrlh %["#_D"h], %["#_d"h], %["#_s"] \n\t"         \
68 	"psrlh %["#_D"l], %["#_d"l], %["#_s"] \n\t"
69 
70 /* SSE: psrld (bits) */
71 #define _mm_psrlw(_D, _d, _s)                               \
72 	"psrlw %["#_D"h], %["#_d"h], %["#_s"] \n\t"         \
73 	"psrlw %["#_D"l], %["#_d"l], %["#_s"] \n\t"
74 
75 /* SSE: psrlq (bits) */
76 #define _mm_psrld(_D, _d, _s)                               \
77 	"dsrl %["#_D"h], %["#_d"h], %["#_s"] \n\t"          \
78 	"dsrl %["#_D"l], %["#_d"l], %["#_s"] \n\t"
79 
80 /* SSE: psrldq (bytes) */
81 #define _mm_psrlq(_D, _d, _s, _s64, _tf)                    \
82 	"subu %["#_tf"], %["#_s64"], %["#_s"] \n\t"         \
83 	"dsll %["#_tf"], %["#_d"h], %["#_tf"] \n\t"         \
84 	"dsrl %["#_D"h], %["#_d"h], %["#_s"] \n\t"          \
85 	"dsrl %["#_D"l], %["#_d"l], %["#_s"] \n\t"          \
86 	"or %["#_D"l], %["#_D"l], %["#_tf"] \n\t"
87 
88 /* SSE: psrad */
89 #define _mm_psraw(_D, _d, _s)                               \
90 	"psraw %["#_D"h], %["#_d"h], %["#_s"] \n\t"         \
91 	"psraw %["#_D"l], %["#_d"l], %["#_s"] \n\t"
92 
93 /* SSE: paddb */
94 #define _mm_paddb(_D, _d, _s)                               \
95 	"paddb %["#_D"h], %["#_d"h], %["#_s"h] \n\t"        \
96 	"paddb %["#_D"l], %["#_d"l], %["#_s"l] \n\t"
97 
98 /* SSE: paddw */
99 #define _mm_paddh(_D, _d, _s)                               \
100 	"paddh %["#_D"h], %["#_d"h], %["#_s"h] \n\t"        \
101 	"paddh %["#_D"l], %["#_d"l], %["#_s"l] \n\t"
102 
103 /* SSE: paddd */
104 #define _mm_paddw(_D, _d, _s)                               \
105 	"paddw %["#_D"h], %["#_d"h], %["#_s"h] \n\t"        \
106 	"paddw %["#_D"l], %["#_d"l], %["#_s"l] \n\t"
107 
108 /* SSE: paddq */
109 #define _mm_paddd(_D, _d, _s)                               \
110 	"dadd %["#_D"h], %["#_d"h], %["#_s"h] \n\t"         \
111 	"dadd %["#_D"l], %["#_d"l], %["#_s"l] \n\t"
112 
113 /* SSE: psubw */
114 #define _mm_psubh(_D, _d, _s)                               \
115 	"psubh %["#_D"h], %["#_d"h], %["#_s"h] \n\t"        \
116 	"psubh %["#_D"l], %["#_d"l], %["#_s"l] \n\t"
117 
118 /* SSE: psubd */
119 #define _mm_psubw(_D, _d, _s)                               \
120 	"psubw %["#_D"h], %["#_d"h], %["#_s"h] \n\t"        \
121 	"psubw %["#_D"l], %["#_d"l], %["#_s"l] \n\t"
122 
123 /* SSE: pmaxub */
124 #define _mm_pmaxub(_D, _d, _s)                              \
125 	"pmaxub %["#_D"h], %["#_d"h], %["#_s"h] \n\t"       \
126 	"pmaxub %["#_D"l], %["#_d"l], %["#_s"l] \n\t"
127 
128 /* SSE: pmullw */
129 #define _mm_pmullh(_D, _d, _s)                              \
130 	"pmullh %["#_D"h], %["#_d"h], %["#_s"h] \n\t"       \
131 	"pmullh %["#_D"l], %["#_d"l], %["#_s"l] \n\t"
132 
133 /* SSE: pmulhw */
134 #define _mm_pmulhh(_D, _d, _s)                              \
135 	"pmulhh %["#_D"h], %["#_d"h], %["#_s"h] \n\t"       \
136 	"pmulhh %["#_D"l], %["#_d"l], %["#_s"l] \n\t"
137 
138 /* SSE: pmuludq */
139 #define _mm_pmuluw(_D, _d, _s)                              \
140 	"pmuluw %["#_D"h], %["#_d"h], %["#_s"h] \n\t"       \
141 	"pmuluw %["#_D"l], %["#_d"l], %["#_s"l] \n\t"
142 
143 /* SSE: packsswb */
144 #define _mm_packsshb(_D, _d, _s, _t)			    \
145 	__mm_packxxxx(packsshb, _D, _d, _s, _t)
146 
147 /* SSE: packssdw */
148 #define _mm_packsswh(_D, _d, _s, _t)			    \
149 	__mm_packxxxx(packsswh, _D, _d, _s, _t)
150 
151 /* SSE: packuswb */
152 #define _mm_packushb(_D, _d, _s, _t)			    \
153 	__mm_packxxxx(packushb, _D, _d, _s, _t)
154 
155 /* SSE: punpcklbw */
156 #define _mm_punpcklbh(_D, _d, _s)                           \
157 	"punpckhbh %["#_D"h], %["#_d"l], %["#_s"l] \n\t"    \
158 	"punpcklbh %["#_D"l], %["#_d"l], %["#_s"l] \n\t"
159 
160 /* SSE: punpcklwd */
161 #define _mm_punpcklhw(_D, _d, _s)                           \
162 	"punpckhhw %["#_D"h], %["#_d"l], %["#_s"l] \n\t"    \
163 	"punpcklhw %["#_D"l], %["#_d"l], %["#_s"l] \n\t"
164 
165 /* SSE: punpckldq */
166 #define _mm_punpcklwd(_D, _d, _s)                           \
167 	"punpckhwd %["#_D"h], %["#_d"l], %["#_s"l] \n\t"    \
168 	"punpcklwd %["#_D"l], %["#_d"l], %["#_s"l] \n\t"
169 
170 /* SSE: punpcklqdq */
171 #define _mm_punpckldq(_D, _d, _s)                           \
172 	"mov.d %["#_D"h], %["#_s"l] \n\t"                   \
173 	"mov.d %["#_D"l], %["#_d"l] \n\t"
174 
175 /* SSE: punpckhbw */
176 #define _mm_punpckhbh(_D, _d, _s)                           \
177 	"punpcklbh %["#_D"l], %["#_d"h], %["#_s"h] \n\t"    \
178 	"punpckhbh %["#_D"h], %["#_d"h], %["#_s"h] \n\t"
179 
180 /* SSE: punpckhwd */
181 #define _mm_punpckhhw(_D, _d, _s)                           \
182 	"punpcklhw %["#_D"l], %["#_d"h], %["#_s"h] \n\t"    \
183 	"punpckhhw %["#_D"h], %["#_d"h], %["#_s"h] \n\t"
184 
185 /* SSE: punpckhdq */
186 #define _mm_punpckhwd(_D, _d, _s)                           \
187 	"punpcklwd %["#_D"l], %["#_d"h], %["#_s"h] \n\t"    \
188 	"punpckhwd %["#_D"h], %["#_d"h], %["#_s"h] \n\t"
189 
190 /* SSE: punpckhqdq */
191 #define _mm_punpckhdq(_D, _d, _s)                           \
192 	"mov.d %["#_D"l], %["#_d"h] \n\t"                   \
193 	"mov.d %["#_D"h], %["#_s"h] \n\t"
194 
195 #endif /* __MMI_HELPERS_H__ */
196 
197