1 /** @file float.h 2 ** @brief Float - Template 3 ** @author Andrea Vedaldi 4 ** @author David Novotny 5 **/ 6 7 /* 8 Copyright (C) 2014 Andrea Vedaldi. 9 Copyright (C) 2013 David Novotny. 10 Copyright (C) 2007-12 Andrea Vedaldi and Brian Fulkerson. 11 All rights reserved. 12 13 This file is part of the VLFeat library and is made available under 14 the terms of the BSD license (see the COPYING file). 15 */ 16 17 #include "generic.h" 18 19 #undef T 20 #undef SFX 21 #undef VSIZE 22 #undef VSFX 23 #undef VTYPE 24 #undef VSIZEavx 25 #undef VSFXavx 26 #undef VTYPEavx 27 28 #if (FLT == VL_TYPE_FLOAT) 29 # define T float 30 # define SFX f 31 #elif (FLT == VL_TYPE_DOUBLE) 32 # define T double 33 # define SFX d 34 #elif (FLT == VL_TYPE_UINT32) 35 # define T vl_uint32 36 # define SFX ui32 37 #elif (FLT == VL_TYPE_INT32) 38 # define T vl_int32 39 # define SFX i32 40 #endif 41 42 /* ---------------------------------------------------------------- */ 43 /* AVX */ 44 /* ---------------------------------------------------------------- */ 45 46 #ifdef __AVX__ 47 48 #if (FLT == VL_TYPE_FLOAT) 49 # define VSIZEavx 8 50 # define VSFXavx s 51 # define VTYPEavx __m256 52 #elif (FLT == VL_TYPE_DOUBLE) 53 # define VSIZEavx 4 54 # define VSFXavx d 55 # define VTYPEavx __m256d 56 #endif 57 58 #define VALIGNEDavx(x) (! (((vl_uintptr)(x)) & 0x1F)) 59 60 #define VMULavx VL_XCAT(_mm256_mul_p, VSFX) 61 #define VDIVavx VL_XCAT(_mm256_div_p, VSFX) 62 #define VADDavx VL_XCAT(_mm256_add_p, VSFX) 63 #define VHADDavx VL_XCAT(_mm_hadd_p, VSFX) 64 #define VHADD2avx VL_XCAT(_mm256_hadd_p, VSFX) 65 #define VSUBavx VL_XCAT(_mm256_sub_p, VSFX) 66 #define VSTZavx VL_XCAT(_mm256_setzero_p, VSFX) 67 #define VLD1avx VL_XCAT(_mm256_broadcast_s, VSFX) 68 #define VLDUavx VL_XCAT(_mm256_loadu_p, VSFX) 69 #define VST1avx VL_XCAT(_mm256_store_s, VSFX) 70 #define VST2avx VL_XCAT(_mm256_store_p, VSFX) 71 #define VST2Uavx VL_XCAT(_mm256_storeu_p, VSFX) 72 #define VPERMavx VL_XCAT(_mm256_permute2f128_p, VSFX) 73 //#define VCSTavx VL_XCAT( _mm256_castps256_ps128, VSFX) 74 #define VCSTavx VL_XCAT5(_mm256_castp,VSFX,256_p,VSFX,128) 75 76 /* __AVX__ */ 77 #endif 78 79 /* ---------------------------------------------------------------- */ 80 /* SSE2 */ 81 /* ---------------------------------------------------------------- */ 82 83 #ifdef __SSE2__ 84 85 #if (FLT == VL_TYPE_FLOAT) 86 # define VSIZE 4 87 # define VSFX s 88 # define VTYPE __m128 89 #elif (FLT == VL_TYPE_DOUBLE) 90 # define VSIZE 2 91 # define VSFX d 92 # define VTYPE __m128d 93 #endif 94 95 #define VALIGNED(x) (! (((vl_uintptr)(x)) & 0xF)) 96 97 #define VMAX VL_XCAT(_mm_max_p, VSFX) 98 #define VMUL VL_XCAT(_mm_mul_p, VSFX) 99 #define VDIV VL_XCAT(_mm_div_p, VSFX) 100 #define VADD VL_XCAT(_mm_add_p, VSFX) 101 #define VSUB VL_XCAT(_mm_sub_p, VSFX) 102 #define VSTZ VL_XCAT(_mm_setzero_p, VSFX) 103 #define VLD1 VL_XCAT(_mm_load1_p, VSFX) 104 #define VLDU VL_XCAT(_mm_loadu_p, VSFX) 105 #define VST1 VL_XCAT(_mm_store_s, VSFX) 106 #define VSET1 VL_XCAT(_mm_set_s, VSFX) 107 #define VSHU VL_XCAT(_mm_shuffle_p, VSFX) 108 #define VNEQ VL_XCAT(_mm_cmpneq_p, VSFX) 109 #define VAND VL_XCAT(_mm_and_p, VSFX) 110 #define VANDN VL_XCAT(_mm_andnot_p, VSFX) 111 #define VST2 VL_XCAT(_mm_store_p, VSFX) 112 #define VST2U VL_XCAT(_mm_storeu_p, VSFX) 113 114 /* __SSE2__ */ 115 #endif 116 117