1 /** @file float.h
2  ** @brief Float - Template
3  ** @author Andrea Vedaldi
4  ** @author David Novotny
5  **/
6 
7 /*
8 Copyright (C) 2014 Andrea Vedaldi.
9 Copyright (C) 2013 David Novotny.
10 Copyright (C) 2007-12 Andrea Vedaldi and Brian Fulkerson.
11 All rights reserved.
12 
13 This file is part of the VLFeat library and is made available under
14 the terms of the BSD license (see the COPYING file).
15 */
16 
17 #include "generic.h"
18 
19 #undef  T
20 #undef  SFX
21 #undef  VSIZE
22 #undef  VSFX
23 #undef  VTYPE
24 #undef  VSIZEavx
25 #undef  VSFXavx
26 #undef  VTYPEavx
27 
28 #if (FLT == VL_TYPE_FLOAT)
29 #  define T float
30 #  define SFX f
31 #elif (FLT == VL_TYPE_DOUBLE)
32 #  define T double
33 #  define SFX d
34 #elif (FLT == VL_TYPE_UINT32)
35 #  define T vl_uint32
36 #  define SFX ui32
37 #elif (FLT == VL_TYPE_INT32)
38 #  define T vl_int32
39 #  define SFX i32
40 #endif
41 
42 /* ---------------------------------------------------------------- */
43 /*                                                              AVX */
44 /* ---------------------------------------------------------------- */
45 
46 #ifdef __AVX__
47 
48 #if (FLT == VL_TYPE_FLOAT)
49 #  define VSIZEavx  8
50 #  define VSFXavx   s
51 #  define VTYPEavx  __m256
52 #elif (FLT == VL_TYPE_DOUBLE)
53 #  define VSIZEavx  4
54 #  define VSFXavx   d
55 #  define VTYPEavx  __m256d
56 #endif
57 
58 #define VALIGNEDavx(x) (! (((vl_uintptr)(x)) & 0x1F))
59 
60 #define VMULavx  VL_XCAT(_mm256_mul_p,     VSFX)
61 #define VDIVavx  VL_XCAT(_mm256_div_p,     VSFX)
62 #define VADDavx  VL_XCAT(_mm256_add_p,     VSFX)
63 #define VHADDavx  VL_XCAT(_mm_hadd_p,     VSFX)
64 #define VHADD2avx  VL_XCAT(_mm256_hadd_p,     VSFX)
65 #define VSUBavx  VL_XCAT(_mm256_sub_p,     VSFX)
66 #define VSTZavx  VL_XCAT(_mm256_setzero_p, VSFX)
67 #define VLD1avx  VL_XCAT(_mm256_broadcast_s,   VSFX)
68 #define VLDUavx  VL_XCAT(_mm256_loadu_p,   VSFX)
69 #define VST1avx  VL_XCAT(_mm256_store_s,   VSFX)
70 #define VST2avx  VL_XCAT(_mm256_store_p,   VSFX)
71 #define VST2Uavx VL_XCAT(_mm256_storeu_p,  VSFX)
72 #define VPERMavx VL_XCAT(_mm256_permute2f128_p,  VSFX)
73 //#define VCSTavx VL_XCAT( _mm256_castps256_ps128,  VSFX)
74 #define VCSTavx  VL_XCAT5(_mm256_castp,VSFX,256_p,VSFX,128)
75 
76 /* __AVX__ */
77 #endif
78 
79 /* ---------------------------------------------------------------- */
80 /*                                                             SSE2 */
81 /* ---------------------------------------------------------------- */
82 
83 #ifdef __SSE2__
84 
85 #if (FLT == VL_TYPE_FLOAT)
86 #  define VSIZE  4
87 #  define VSFX   s
88 #  define VTYPE  __m128
89 #elif (FLT == VL_TYPE_DOUBLE)
90 #  define VSIZE  2
91 #  define VSFX   d
92 #  define VTYPE  __m128d
93 #endif
94 
95 #define VALIGNED(x) (! (((vl_uintptr)(x)) & 0xF))
96 
97 #define VMAX  VL_XCAT(_mm_max_p,     VSFX)
98 #define VMUL  VL_XCAT(_mm_mul_p,     VSFX)
99 #define VDIV  VL_XCAT(_mm_div_p,     VSFX)
100 #define VADD  VL_XCAT(_mm_add_p,     VSFX)
101 #define VSUB  VL_XCAT(_mm_sub_p,     VSFX)
102 #define VSTZ  VL_XCAT(_mm_setzero_p, VSFX)
103 #define VLD1  VL_XCAT(_mm_load1_p,   VSFX)
104 #define VLDU  VL_XCAT(_mm_loadu_p,   VSFX)
105 #define VST1  VL_XCAT(_mm_store_s,   VSFX)
106 #define VSET1 VL_XCAT(_mm_set_s,     VSFX)
107 #define VSHU  VL_XCAT(_mm_shuffle_p, VSFX)
108 #define VNEQ  VL_XCAT(_mm_cmpneq_p,  VSFX)
109 #define VAND  VL_XCAT(_mm_and_p,     VSFX)
110 #define VANDN VL_XCAT(_mm_andnot_p,  VSFX)
111 #define VST2  VL_XCAT(_mm_store_p,   VSFX)
112 #define VST2U VL_XCAT(_mm_storeu_p,  VSFX)
113 
114 /* __SSE2__ */
115 #endif
116 
117