1 /*  This file, swapproc.c, contains general utility routines that are      */
2 /*  used by other FITSIO routines to swap bytes.                           */
3 
4 /*  The FITSIO software was written by William Pence at the High Energy    */
5 /*  Astrophysic Science Archive Research Center (HEASARC) at the NASA      */
6 /*  Goddard Space Flight Center.                                           */
7 
8 /* The fast SSE2 and SSSE3 functions were provided by Julian Taylor, ESO */
9 
10 #include <string.h>
11 #include <stdlib.h>
12 #include "fitsio2.h"
13 
14 /* bswap builtin is available since GCC 4.3 */
15 #if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)
16 #define HAVE_BSWAP
17 #endif
18 
19 #ifdef __SSSE3__
20 #include <tmmintrin.h>
21 /* swap 16 bytes according to mask, values must be 16 byte aligned */
swap_ssse3(char * values,__m128i mask)22 static inline void swap_ssse3(char * values, __m128i mask)
23 {
24     __m128i v = _mm_load_si128((__m128i *)values);
25     __m128i s = _mm_shuffle_epi8(v, mask);
26     _mm_store_si128((__m128i*)values, s);
27 }
28 #endif
29 #ifdef __SSE2__
30 #include <emmintrin.h>
31 /* swap 8 shorts, values must be 16 byte aligned
32  * faster than ssse3 variant for shorts */
swap2_sse2(char * values)33 static inline void swap2_sse2(char * values)
34 {
35     __m128i r1 = _mm_load_si128((__m128i *)values);
36     __m128i r2 = r1;
37     r1 = _mm_srli_epi16(r1, 8);
38     r2 = _mm_slli_epi16(r2, 8);
39     r1 = _mm_or_si128(r1, r2);
40     _mm_store_si128((__m128i*)values, r1);
41 }
42 /* the three shuffles required for 4 and 8 byte variants make
43  * SSE2 slower than bswap */
44 
45 
46 /* get number of elements to peel to reach alignment */
get_peel(void * addr,size_t esize,size_t nvals,size_t alignment)47 static inline size_t get_peel(void * addr, size_t esize, size_t nvals,
48                               size_t alignment)
49 {
50     const size_t offset = (size_t)addr % alignment;
51     size_t peel = offset ? (alignment - offset) / esize : 0;
52     peel = nvals < peel ? nvals : peel;
53     return peel;
54 }
55 #endif
56 
57 /*--------------------------------------------------------------------------*/
ffswap2_slow(short * svalues,long nvals)58 static void ffswap2_slow(short *svalues, long nvals)
59 {
60     register long ii;
61     unsigned short * usvalues;
62 
63     usvalues = (unsigned short *) svalues;
64 
65     for (ii = 0; ii < nvals; ii++)
66     {
67         usvalues[ii] = (usvalues[ii]>>8) | (usvalues[ii]<<8);
68     }
69 }
70 /*--------------------------------------------------------------------------*/
71 #if __SSE2__
ffswap2(short * svalues,long nvals)72 void ffswap2(short *svalues,  /* IO - pointer to shorts to be swapped    */
73              long nvals)     /* I  - number of shorts to be swapped     */
74 /*
75   swap the bytes in the input short integers: ( 0 1 -> 1 0 )
76 */
77 {
78     if ((long)svalues % 2 != 0) { /* should not happen */
79         ffswap2_slow(svalues, nvals);
80         return;
81     }
82 
83     long ii;
84     size_t peel = get_peel((void*)&svalues[0], sizeof(svalues[0]), nvals, 16);
85 
86     ffswap2_slow(svalues, peel);
87     for (ii = peel; ii < (nvals - peel - (nvals - peel) % 8); ii+=8) {
88         swap2_sse2((char*)&svalues[ii]);
89     }
90     ffswap2_slow(&svalues[ii], nvals - ii);
91 }
92 #else
ffswap2(short * svalues,long nvals)93 void ffswap2(short *svalues,  /* IO - pointer to shorts to be swapped    */
94              long nvals)     /* I  - number of shorts to be swapped     */
95 /*
96   swap the bytes in the input 4-byte integer: ( 0 1 2 3 -> 3 2 1 0 )
97 */
98 {
99     ffswap2_slow(svalues, nvals);
100 }
101 #endif
102 /*--------------------------------------------------------------------------*/
ffswap4_slow(INT32BIT * ivalues,long nvals)103 static void ffswap4_slow(INT32BIT *ivalues, long nvals)
104 {
105     register long ii;
106 
107 #if defined(HAVE_BSWAP)
108     for (ii = 0; ii < nvals; ii++)
109     {
110         ivalues[ii] = __builtin_bswap32(ivalues[ii]);
111     }
112 #elif defined(_MSC_VER) && (_MSC_VER >= 1400)
113     /* intrinsic byte swapping function in Microsoft Visual C++ 8.0 and later */
114     unsigned int* uivalues = (unsigned int *) ivalues;
115 
116     /* intrinsic byte swapping function in Microsoft Visual C++ */
117     for (ii = 0; ii < nvals; ii++)
118     {
119         uivalues[ii] = _byteswap_ulong(uivalues[ii]);
120     }
121 #else
122     char *cvalues, tmp;
123 
124     for (ii = 0; ii < nvals; ii++)
125     {
126         cvalues = (char *)&ivalues[ii];
127         tmp = cvalues[0];
128         cvalues[0] = cvalues[3];
129         cvalues[3] = tmp;
130         tmp = cvalues[1];
131         cvalues[1] = cvalues[2];
132         cvalues[2] = tmp;
133     }
134 #endif
135 }
136 /*--------------------------------------------------------------------------*/
137 #ifdef __SSSE3__
ffswap4(INT32BIT * ivalues,long nvals)138 void ffswap4(INT32BIT *ivalues,  /* IO - pointer to INT*4 to be swapped    */
139                  long nvals)     /* I  - number of floats to be swapped     */
140 /*
141   swap the bytes in the input 4-byte integer: ( 0 1 2 3 -> 3 2 1 0 )
142 */
143 {
144     if ((long)ivalues % 4 != 0) { /* should not happen */
145         ffswap4_slow(ivalues, nvals);
146         return;
147     }
148 
149     long ii;
150     const __m128i cmask4 = _mm_set_epi8(12, 13, 14, 15,
151                                         8, 9, 10, 11,
152                                         4, 5, 6, 7,
153                                         0, 1, 2 ,3);
154     size_t peel = get_peel((void*)&ivalues[0], sizeof(ivalues[0]), nvals, 16);
155     ffswap4_slow(ivalues, peel);
156     for (ii = peel; ii < (nvals - peel - (nvals - peel) % 4); ii+=4) {
157         swap_ssse3((char*)&ivalues[ii], cmask4);
158     }
159     ffswap4_slow(&ivalues[ii], nvals - ii);
160 }
161 #else
ffswap4(INT32BIT * ivalues,long nvals)162 void ffswap4(INT32BIT *ivalues,  /* IO - pointer to INT*4 to be swapped    */
163                  long nvals)     /* I  - number of floats to be swapped     */
164 /*
165   swap the bytes in the input 4-byte integer: ( 0 1 2 3 -> 3 2 1 0 )
166 */
167 {
168     ffswap4_slow(ivalues, nvals);
169 }
170 #endif
171 /*--------------------------------------------------------------------------*/
ffswap8_slow(double * dvalues,long nvals)172 static void ffswap8_slow(double *dvalues, long nvals)
173 {
174     register long ii;
175 #ifdef HAVE_BSWAP
176     LONGLONG * llvalues = (LONGLONG*)dvalues;
177 
178     for (ii = 0; ii < nvals; ii++) {
179         llvalues[ii] = __builtin_bswap64(llvalues[ii]);
180     }
181 #elif defined(_MSC_VER) && (_MSC_VER >= 1400)
182     /* intrinsic byte swapping function in Microsoft Visual C++ 8.0 and later */
183     unsigned __int64 * llvalues = (unsigned __int64 *) dvalues;
184 
185     for (ii = 0; ii < nvals; ii++)
186     {
187         llvalues[ii] = _byteswap_uint64(llvalues[ii]);
188     }
189 #else
190     register char *cvalues;
191     register char temp;
192 
193     cvalues = (char *) dvalues;      /* copy the pointer value */
194 
195     for (ii = 0; ii < nvals*8; ii += 8)
196     {
197         temp = cvalues[ii];
198         cvalues[ii] = cvalues[ii+7];
199         cvalues[ii+7] = temp;
200 
201         temp = cvalues[ii+1];
202         cvalues[ii+1] = cvalues[ii+6];
203         cvalues[ii+6] = temp;
204 
205         temp = cvalues[ii+2];
206         cvalues[ii+2] = cvalues[ii+5];
207         cvalues[ii+5] = temp;
208 
209         temp = cvalues[ii+3];
210         cvalues[ii+3] = cvalues[ii+4];
211         cvalues[ii+4] = temp;
212     }
213 #endif
214 }
215 /*--------------------------------------------------------------------------*/
216 #ifdef __SSSE3__
ffswap8(double * dvalues,long nvals)217 void ffswap8(double *dvalues,  /* IO - pointer to doubles to be swapped     */
218              long nvals)       /* I  - number of doubles to be swapped      */
219 /*
220   swap the bytes in the input doubles: ( 01234567  -> 76543210 )
221 */
222 {
223     if ((long)dvalues % 8 != 0) { /* should not happen on amd64 */
224         ffswap8_slow(dvalues, nvals);
225         return;
226     }
227 
228     long ii;
229     const __m128i cmask8 = _mm_set_epi8(8, 9, 10, 11, 12, 13, 14, 15,
230                                         0, 1, 2 ,3, 4, 5, 6, 7);
231     size_t peel = get_peel((void*)&dvalues[0], sizeof(dvalues[0]), nvals, 16);
232     ffswap8_slow(dvalues, peel);
233     for (ii = peel; ii < (nvals - peel - (nvals - peel) % 2); ii+=2) {
234         swap_ssse3((char*)&dvalues[ii], cmask8);
235     }
236     ffswap8_slow(&dvalues[ii], nvals - ii);
237 }
238 #else
ffswap8(double * dvalues,long nvals)239 void ffswap8(double *dvalues,  /* IO - pointer to doubles to be swapped     */
240              long nvals)       /* I  - number of doubles to be swapped      */
241 /*
242   swap the bytes in the input doubles: ( 01234567  -> 76543210 )
243 */
244 {
245     ffswap8_slow(dvalues, nvals);
246 }
247 #endif
248