1 /* primitives.h
2  * vi:ts=4 sw=4
3  *
4  * (c) Copyright 2012 Hewlett-Packard Development Company, L.P.
5  * Licensed under the Apache License, Version 2.0 (the "License"); you may
6  * not use this file except in compliance with the License. You may obtain
7  * a copy of the License at http://www.apache.org/licenses/LICENSE-2.0.
8  * Unless required by applicable law or agreed to in writing, software
9  * distributed under the License is distributed on an "AS IS" BASIS,
10  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
11  * or implied. See the License for the specific language governing
12  * permissions and limitations under the License.  Algorithms used by
13  * this code may be covered by patents by HP, Microsoft, or other parties.
14  */
15 
16 #ifdef __GNUC__
17 #pragma once
18 #endif
19 
20 #ifndef FREERDP_PRIMITIVES_H
21 #define FREERDP_PRIMITIVES_H
22 
23 #include <freerdp/api.h>
24 #include <freerdp/types.h>
25 #include <freerdp/codec/color.h>
26 
27 #include <winpr/platform.h>
28 
29 typedef INT32 pstatus_t;       /* match IppStatus. */
30 #define PRIMITIVES_SUCCESS (0) /* match ippStsNoErr */
31 
32 /* Simple macro for address of an x,y location in 2d 4-byte memory block */
33 #define PIXMAP4_ADDR(_dst_, _x_, _y_, _span_) \
34 	((void*)(((BYTE*)(_dst_)) + (((_x_) + (_y_) * (_span_)) << 2)))
35 
36 #define PRIM_X86_MMX_AVAILABLE (1U << 0)
37 #define PRIM_X86_3DNOW_AVAILABLE (1U << 1)
38 #define PRIM_X86_3DNOW_PREFETCH_AVAILABLE (1U << 2)
39 #define PRIM_X86_SSE_AVAILABLE (1U << 3)
40 #define PRIM_X86_SSE2_AVAILABLE (1U << 4)
41 #define PRIM_X86_SSE3_AVAILABLE (1U << 5)
42 #define PRIM_X86_SSSE3_AVAILABLE (1U << 6)
43 #define PRIM_X86_SSE41_AVAILABLE (1U << 7)
44 #define PRIM_X86_SSE42_AVAILABLE (1U << 8)
45 #define PRIM_X86_AVX_AVAILABLE (1U << 9)
46 #define PRIM_X86_FMA_AVAILABLE (1U << 10)
47 #define PRIM_X86_AVX_AES_AVAILABLE (1U << 11)
48 #define PRIM_X86_AVX2_AVAILABLE (1U << 12)
49 
50 #define PRIM_ARM_VFP1_AVAILABLE (1U << 0)
51 #define PRIM_ARM_VFP2_AVAILABLE (1U << 1)
52 #define PRIM_ARM_VFP3_AVAILABLE (1U << 2)
53 #define PRIM_ARM_VFP4_AVAILABLE (1U << 3)
54 #define PRIM_ARM_FPA_AVAILABLE (1U << 4)
55 #define PRIM_ARM_FPE_AVAILABLE (1U << 5)
56 #define PRIM_ARM_IWMMXT_AVAILABLE (1U << 6)
57 #define PRIM_ARM_NEON_AVAILABLE (1U << 7)
58 
59 /** @brief flags of primitives */
60 enum
61 {
62 	PRIM_FLAGS_HAVE_EXTCPU = (1U << 0), /* primitives are using CPU extensions */
63 	PRIM_FLAGS_HAVE_EXTGPU = (1U << 1), /* primitives are using the GPU */
64 };
65 
66 /* Structures compatible with IPP */
67 typedef struct
68 {
69 	UINT32 width;
70 	UINT32 height;
71 } prim_size_t; /* like IppiSize */
72 
73 typedef enum
74 {
75 	AVC444_LUMA,
76 	AVC444_CHROMAv1,
77 	AVC444_CHROMAv2
78 } avc444_frame_type;
79 
80 /* Function prototypes for all of the supported primitives. */
81 typedef pstatus_t (*__copy_t)(const void* pSrc, void* pDst, INT32 bytes);
82 typedef pstatus_t (*__copy_8u_t)(const BYTE* pSrc, BYTE* pDst, INT32 len);
83 typedef pstatus_t (*__copy_8u_AC4r_t)(const BYTE* pSrc, INT32 srcStep, /* bytes */
84                                       BYTE* pDst, INT32 dstStep,       /* bytes */
85                                       INT32 width, INT32 height);      /* pixels */
86 typedef pstatus_t (*__set_8u_t)(BYTE val, BYTE* pDst, UINT32 len);
87 typedef pstatus_t (*__set_32s_t)(INT32 val, INT32* pDst, UINT32 len);
88 typedef pstatus_t (*__set_32u_t)(UINT32 val, UINT32* pDst, UINT32 len);
89 typedef pstatus_t (*__zero_t)(void* pDst, size_t bytes);
90 typedef pstatus_t (*__alphaComp_argb_t)(const BYTE* pSrc1, UINT32 src1Step, const BYTE* pSrc2,
91                                         UINT32 src2Step, BYTE* pDst, UINT32 dstStep, UINT32 width,
92                                         UINT32 height);
93 typedef pstatus_t (*__add_16s_t)(const INT16* pSrc1, const INT16* pSrc2, INT16* pDst, UINT32 len);
94 typedef pstatus_t (*__lShiftC_16s_t)(const INT16* pSrc, UINT32 val, INT16* pSrcDst, UINT32 len);
95 typedef pstatus_t (*__lShiftC_16u_t)(const UINT16* pSrc, UINT32 val, UINT16* pSrcDst, UINT32 len);
96 typedef pstatus_t (*__rShiftC_16s_t)(const INT16* pSrc, UINT32 val, INT16* pSrcDst, UINT32 len);
97 typedef pstatus_t (*__rShiftC_16u_t)(const UINT16* pSrc, UINT32 val, UINT16* pSrcDst, UINT32 len);
98 typedef pstatus_t (*__shiftC_16s_t)(const INT16* pSrc, INT32 val, INT16* pSrcDst, UINT32 len);
99 typedef pstatus_t (*__shiftC_16u_t)(const UINT16* pSrc, INT32 val, UINT16* pSrcDst, UINT32 len);
100 typedef pstatus_t (*__sign_16s_t)(const INT16* pSrc, INT16* pDst, UINT32 len);
101 typedef pstatus_t (*__yCbCrToRGB_16s8u_P3AC4R_t)(const INT16* const pSrc[3], UINT32 srcStep,
102                                                  BYTE* pDst, UINT32 dstStep, UINT32 DstFormat,
103                                                  const prim_size_t* roi);
104 typedef pstatus_t (*__yCbCrToRGB_16s16s_P3P3_t)(const INT16* const pSrc[3], INT32 srcStep,
105                                                 INT16* pDst[3], INT32 dstStep,
106                                                 const prim_size_t* roi);
107 typedef pstatus_t (*__RGBToYCbCr_16s16s_P3P3_t)(const INT16* const pSrc[3], INT32 srcStep,
108                                                 INT16* pDst[3], INT32 dstStep,
109                                                 const prim_size_t* roi);
110 typedef pstatus_t (*__RGBToRGB_16s8u_P3AC4R_t)(const INT16* const pSrc[3], UINT32 srcStep,
111                                                BYTE* pDst, UINT32 dstStep, UINT32 DstFormat,
112                                                const prim_size_t* roi);
113 typedef pstatus_t (*__YCoCgToRGB_8u_AC4R_t)(const BYTE* pSrc, INT32 srcStep, BYTE* pDst,
114                                             UINT32 DstFormat, INT32 dstStep, UINT32 width,
115                                             UINT32 height, UINT8 shift, BOOL withAlpha);
116 typedef pstatus_t (*__RGB565ToARGB_16u32u_C3C4_t)(const UINT16* pSrc, INT32 srcStep, UINT32* pDst,
117                                                   INT32 dstStep, UINT32 width, UINT32 height,
118                                                   UINT32 format);
119 typedef pstatus_t (*__YUV420ToRGB_8u_P3AC4R_t)(const BYTE* const pSrc[3], const UINT32 srcStep[3],
120                                                BYTE* pDst, UINT32 dstStep, UINT32 DstFormat,
121                                                const prim_size_t* roi);
122 typedef pstatus_t (*__YUV444ToRGB_8u_P3AC4R_t)(const BYTE* const pSrc[3], const UINT32 srcStep[3],
123                                                BYTE* pDst, UINT32 dstStep, UINT32 DstFormat,
124                                                const prim_size_t* roi);
125 typedef pstatus_t (*__RGBToYUV420_8u_P3AC4R_t)(const BYTE* pSrc, UINT32 SrcFormat, UINT32 srcStep,
126                                                BYTE* pDst[3], UINT32 dstStep[3],
127                                                const prim_size_t* roi);
128 typedef pstatus_t (*__RGBToYUV444_8u_P3AC4R_t)(const BYTE* pSrc, UINT32 SrcFormat, UINT32 srcStep,
129                                                BYTE* pDst[3], UINT32 dstStep[3],
130                                                const prim_size_t* roi);
131 typedef pstatus_t (*__YUV420CombineToYUV444_t)(avc444_frame_type type, const BYTE* const pSrc[3],
132                                                const UINT32 srcStep[3], UINT32 nWidth,
133                                                UINT32 nHeight, BYTE* pDst[3],
134                                                const UINT32 dstStep[3], const RECTANGLE_16* roi);
135 typedef pstatus_t (*__YUV444SplitToYUV420_t)(const BYTE* const pSrc[3], const UINT32 srcStep[3],
136                                              BYTE* pMainDst[3], const UINT32 dstMainStep[3],
137                                              BYTE* pAuxDst[3], const UINT32 srcAuxStep[3],
138                                              const prim_size_t* roi);
139 typedef pstatus_t (*__RGBToAVC444YUV_t)(const BYTE* pSrc, UINT32 srcFormat, UINT32 srcStep,
140                                         BYTE* pMainDst[3], const UINT32 dstMainStep[3],
141                                         BYTE* pAuxDst[3], const UINT32 dstAuxStep[3],
142                                         const prim_size_t* roi);
143 typedef pstatus_t (*__andC_32u_t)(const UINT32* pSrc, UINT32 val, UINT32* pDst, INT32 len);
144 typedef pstatus_t (*__orC_32u_t)(const UINT32* pSrc, UINT32 val, UINT32* pDst, INT32 len);
145 typedef pstatus_t (*primitives_uninit_t)(void);
146 
147 typedef struct
148 {
149 	/* Memory-to-memory copy routines */
150 	__copy_t copy;                 /* memcpy/memmove, basically */
151 	__copy_8u_t copy_8u;           /* more strongly typed */
152 	__copy_8u_AC4r_t copy_8u_AC4r; /* pixel copy function */
153 	/* Memory setting routines */
154 	__set_8u_t set_8u; /* memset, basically */
155 	__set_32s_t set_32s;
156 	__set_32u_t set_32u;
157 	__zero_t zero; /* bzero or faster */
158 	/* Arithmetic functions */
159 	__add_16s_t add_16s;
160 	/* And/or */
161 	__andC_32u_t andC_32u;
162 	__orC_32u_t orC_32u;
163 	/* Shifts */
164 	__lShiftC_16s_t lShiftC_16s;
165 	__lShiftC_16u_t lShiftC_16u;
166 	__rShiftC_16s_t rShiftC_16s;
167 	__rShiftC_16u_t rShiftC_16u;
168 	__shiftC_16s_t shiftC_16s;
169 	__shiftC_16u_t shiftC_16u;
170 	/* Alpha Composition */
171 	__alphaComp_argb_t alphaComp_argb;
172 	/* Sign */
173 	__sign_16s_t sign_16s;
174 	/* Color conversions */
175 	__yCbCrToRGB_16s8u_P3AC4R_t yCbCrToRGB_16s8u_P3AC4R;
176 	__yCbCrToRGB_16s16s_P3P3_t yCbCrToRGB_16s16s_P3P3;
177 	__RGBToYCbCr_16s16s_P3P3_t RGBToYCbCr_16s16s_P3P3;
178 	__RGBToRGB_16s8u_P3AC4R_t RGBToRGB_16s8u_P3AC4R;
179 	__YCoCgToRGB_8u_AC4R_t YCoCgToRGB_8u_AC4R;
180 	__YUV420ToRGB_8u_P3AC4R_t YUV420ToRGB_8u_P3AC4R;
181 	__RGBToYUV420_8u_P3AC4R_t RGBToYUV420_8u_P3AC4R;
182 	__RGBToYUV444_8u_P3AC4R_t RGBToYUV444_8u_P3AC4R;
183 	__YUV420CombineToYUV444_t YUV420CombineToYUV444;
184 	__YUV444SplitToYUV420_t YUV444SplitToYUV420;
185 	__YUV444ToRGB_8u_P3AC4R_t YUV444ToRGB_8u_P3AC4R;
186 	__RGBToAVC444YUV_t RGBToAVC444YUV;
187 	__RGBToAVC444YUV_t RGBToAVC444YUVv2;
188 	/* flags */
189 	DWORD flags;
190 	primitives_uninit_t uninit;
191 } primitives_t;
192 
193 typedef enum
194 {
195 	PRIMITIVES_PURE_SOFT, /** use generic software implementation */
196 	PRIMITIVES_ONLY_CPU,  /** use generic software or cpu optimized routines */
197 	PRIMITIVES_ONLY_GPU,  /** use opencl optimized routines */
198 	PRIMITIVES_AUTODETECT /** detect the best routines */
199 } primitive_hints;
200 
201 #ifdef __cplusplus
202 extern "C"
203 {
204 #endif
205 
206 	FREERDP_API primitives_t* primitives_get(void);
207 	FREERDP_API void primitives_set_hints(primitive_hints hints);
208 	FREERDP_API primitive_hints primitives_get_hints(void);
209 	FREERDP_API primitives_t* primitives_get_generic(void);
210 	FREERDP_API DWORD primitives_flags(primitives_t* p);
211 	FREERDP_API BOOL primitives_init(primitives_t* p, primitive_hints hints);
212 	FREERDP_API void primitives_uninit(void);
213 
214 #ifdef __cplusplus
215 }
216 #endif
217 
218 #endif /* FREERDP_PRIMITIVES_H */
219