1 /*
2 * The MIT License (MIT)
3 * This file is part of waifu2x-converter-cpp
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a copy
6 * of this software and associated documentation files (the "Software"), to deal
7 * in the Software without restriction, including without limitation the rights
8 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 * copies of the Software, and to permit persons to whom the Software is
10 * furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in all
13 * copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23 
24 #ifndef FILTERS_HPP
25 #define FILTERS_HPP
26 
27 #include "w2xconv.h"
28 #include "Buffer.hpp"
29 #include <vector>
30 
31 namespace w2xc
32 {
33 	void initOpenCLGlobal(std::vector<W2XConvProcessor> *proc_list);
34 	void initCUDAGlobal(std::vector<W2XConvProcessor> *proc_list);
35 
36 	bool initOpenCL(W2XConv *c, ComputeEnv *env, W2XConvProcessor *proc);
37 	void finiOpenCL(ComputeEnv *env);
38 	bool initCUDA(ComputeEnv *env, int dev_id);
39 	void finiCUDA(ComputeEnv *env);
40 
41 	extern void filter_SSE_impl
42 	(
43 		ComputeEnv *env,
44 		const float *packed_input,
45 		float *packed_output,
46 		int nInputPlanes,
47 		int nOutputPlanes,
48 		const float *biases,
49 		const float *weight,
50 		int ip_width,
51 		int ip_height,
52 		int nJob
53 	);
54 
55 	extern void filter_AVX_impl
56 	(
57 		ComputeEnv *env,
58 		const float *packed_input,
59 		float *packed_output,
60 		int nInputPlanes,
61 		int nOutputPlanes,
62 		const float *biases,
63 		const float *weight,
64 		int ip_width,
65 		int ip_height,
66 		int nJob
67 	);
68 
69 	extern void filter_FMA_impl
70 	(
71 		ComputeEnv *env,
72 		const float *packed_input,
73 		float *packed_output,
74 		int nInputPlanes,
75 		int nOutputPlanes,
76 		const float *biases,
77 		const float *weight,
78 		int ip_width,
79 		int ip_height,
80 		int nJob
81 	);
82 
83 	extern void filter_NEON_impl
84 	(
85 		ComputeEnv *env,
86 		const float *packed_input,
87 		float *packed_output,
88 		int nInputPlanes,
89 		int nOutputPlanes,
90 		const float *biases,
91 		const float *weight,
92 		int ip_width,
93 		int ip_height,
94 		int nJob
95 	);
96 
97 	extern void filter_OpenCL_impl
98 	(
99 		ComputeEnv *env,
100 		Buffer *packed_input,
101 		Buffer *packed_output,
102 		int nInputPlanes,
103 		int nOutputPlanes,
104 		const float *biases,
105 		const float *weight,
106 		int ip_width,
107 		int ip_height,
108 		int nJob
109 	);
110 
111 	extern void filter_AltiVec_impl
112 	(
113 		ComputeEnv *env,
114 		const float *packed_input,
115 		float *packed_output,
116 		int nInputPlanes,
117 		int nOutputPlanes,
118 		const float *biases,
119 		const float *weight,
120 		int ip_width,
121 		int ip_height,
122 		int nJob
123 	);
124 
125 	extern void filter_CUDA_impl
126 	(
127 		ComputeEnv *env,
128 		Buffer *packed_input,
129 		Buffer *packed_output,
130 		int nInputPlanes,
131 		int nOutputPlanes,
132 		const float *biases,
133 		const float *weight,
134 		int ip_width,
135 		int ip_height,
136 		int nJob
137 	);
138 
139 }
140 
141 #endif
142