1 /*
2  * Software License Agreement (Simplified BSD License)
3  *
4  * Point Cloud Library (PCL) - www.pointclouds.org
5  * Copyright (c) 2013-, Open Perception, Inc.
6  * Copyright (c) 2012, Piotr Dollar & Ron Appel.[pdollar-at-caltech.edu]
7  *
8  * All rights reserved.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions are met:
12  *
13  * 1. Redistributions of source code must retain the above copyright notice, this
14  *list of conditions and the following disclaimer.
15  *
16  * 2. Redistributions in binary form must reproduce the above copyright notice,
17  *this list of conditions and the following disclaimer in the documentation
18  *and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
22  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
24  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
25  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
27  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  *
31  * The views and conclusions contained in the software and documentation are those
32  * of the authors and should not be interpreted as representing official policies,
33  * either expressed or implied, of the FreeBSD Project.
34  *
35  * Taken from Piotr Dollar's MATLAB Image&Video ToolboxVersion 3.00.
36  *
37  */
38 
39 #pragma once
40 
41 #if defined(__SSE2__)
42 #include <emmintrin.h> // SSE2:<e*.h>, SSE3:<p*.h>, SSE4:<s*.h>
43 
44 #define RETf inline __m128
45 #define RETi inline __m128i
46 
47 namespace pcl {
48 
49 // set, load and store values
sse_set(const float & x)50 RETf sse_set( const float &x ) { return _mm_set1_ps(x); }
sse_set(float x,float y,float z,float w)51 RETf sse_set( float x, float y, float z, float w ) { return _mm_set_ps(x,y,z,w); }
sse_set(const int & x)52 RETi sse_set( const int &x ) { return _mm_set1_epi32(x); }
sse_ld(const float & x)53 RETf sse_ld( const float &x ) { return _mm_load_ps(&x); }
sse_ldu(const float & x)54 RETf sse_ldu( const float &x ) { return _mm_loadu_ps(&x); }
sse_str(float & x,const __m128 y)55 RETf sse_str( float &x, const __m128 y ) { _mm_store_ps(&x,y); return y; }
sse_str1(float & x,const __m128 y)56 RETf sse_str1( float &x, const __m128 y ) { _mm_store_ss(&x,y); return y; }
sse_stru(float & x,const __m128 y)57 RETf sse_stru( float &x, const __m128 y ) { _mm_storeu_ps(&x,y); return y; }
sse_str(float & x,const float y)58 RETf sse_str( float &x, const float y ) { return sse_str(x,sse_set(y)); }
59 
60 // arithmetic operators
sse_add(const __m128i x,const __m128i y)61 RETi sse_add( const __m128i x, const __m128i y ) { return _mm_add_epi32(x,y); }
sse_add(const __m128 x,const __m128 y)62 RETf sse_add( const __m128 x, const __m128 y ) { return _mm_add_ps(x,y); }
sse_add(const __m128 x,const __m128 y,const __m128 z)63 RETf sse_add( const __m128 x, const __m128 y, const __m128 z ) {
64   return sse_add(sse_add(x,y),z); }
sse_add(const __m128 a,const __m128 b,const __m128 c,const __m128 & d)65 RETf sse_add( const __m128 a, const __m128 b, const __m128 c, const __m128 &d ) {
66   return sse_add(sse_add(sse_add(a,b),c),d); }
sse_sub(const __m128 x,const __m128 y)67 RETf sse_sub( const __m128 x, const __m128 y ) { return _mm_sub_ps(x,y); }
sse_mul(const __m128 x,const __m128 y)68 RETf sse_mul( const __m128 x, const __m128 y ) { return _mm_mul_ps(x,y); }
sse_mul(const __m128 x,const float y)69 RETf sse_mul( const __m128 x, const float y ) { return sse_mul(x,sse_set(y)); }
sse_mul(const float x,const __m128 y)70 RETf sse_mul( const float x, const __m128 y ) { return sse_mul(sse_set(x),y); }
sse_inc(__m128 & x,const __m128 y)71 RETf sse_inc( __m128 &x, const __m128 y ) { return x = sse_add(x,y); }
sse_inc(float & x,const __m128 y)72 RETf sse_inc( float &x, const __m128 y ) { __m128 t=sse_add(sse_ld(x),y); return sse_str(x,t); }
sse_dec(__m128 & x,const __m128 y)73 RETf sse_dec( __m128 &x, const __m128 y ) { return x = sse_sub(x,y); }
sse_dec(float & x,const __m128 y)74 RETf sse_dec( float &x, const __m128 y ) { __m128 t=sse_sub(sse_ld(x),y); return sse_str(x,t); }
sse_min(const __m128 x,const __m128 y)75 RETf sse_min( const __m128 x, const __m128 y ) { return _mm_min_ps(x,y); }
sse_rcp(const __m128 x)76 RETf sse_rcp( const __m128 x ) { return _mm_rcp_ps(x); }
sse_rcpsqrt(const __m128 x)77 RETf sse_rcpsqrt( const __m128 x ) { return _mm_rsqrt_ps(x); }
78 
79 // logical operators
sse_and(const __m128 x,const __m128 y)80 RETf sse_and( const __m128 x, const __m128 y ) { return _mm_and_ps(x,y); }
sse_and(const __m128i x,const __m128i y)81 RETi sse_and( const __m128i x, const __m128i y ) { return _mm_and_si128(x,y); }
sse_andnot(const __m128 x,const __m128 y)82 RETf sse_andnot( const __m128 x, const __m128 y ) { return _mm_andnot_ps(x,y); }
sse_or(const __m128 x,const __m128 y)83 RETf sse_or( const __m128 x, const __m128 y ) { return _mm_or_ps(x,y); }
sse_xor(const __m128 x,const __m128 y)84 RETf sse_xor( const __m128 x, const __m128 y ) { return _mm_xor_ps(x,y); }
85 
86 // comparison operators
sse_cmpgt(const __m128 x,const __m128 y)87 RETf sse_cmpgt( const __m128 x, const __m128 y ) { return _mm_cmpgt_ps(x,y); }
sse_cmpgt(const __m128i x,const __m128i y)88 RETi sse_cmpgt( const __m128i x, const __m128i y ) { return _mm_cmpgt_epi32(x,y); }
89 
90 // conversion operators
sse_cvt(const __m128i x)91 RETf sse_cvt( const __m128i x ) { return _mm_cvtepi32_ps(x); }
sse_cvt(const __m128 x)92 RETi sse_cvt( const __m128 x ) { return _mm_cvttps_epi32(x); }
93 
94 } // namespace pcl
95 
96 #undef RETf
97 #undef RETi
98 #endif /* defined(__SSE2__) */
99