1 #define NO_IMPORT_ARRAY
2 #include "numpy/ndarrayobject.h"
3 #include "sigtools.h"
4 #include <stdbool.h>
5 #include <stdint.h>
6
7 static int elsizes[] = {sizeof(npy_bool),
8 sizeof(npy_byte),
9 sizeof(npy_ubyte),
10 sizeof(npy_short),
11 sizeof(npy_ushort),
12 sizeof(int),
13 sizeof(npy_uint),
14 sizeof(long),
15 sizeof(npy_ulong),
16 sizeof(npy_longlong),
17 sizeof(npy_ulonglong),
18 sizeof(float),
19 sizeof(double),
20 sizeof(npy_longdouble),
21 sizeof(npy_cfloat),
22 sizeof(npy_cdouble),
23 sizeof(npy_clongdouble),
24 sizeof(void *),
25 0,0,0,0};
26
27 typedef void (OneMultAddFunction) (char *, char *, int64_t, char **, int64_t);
28
29 #define MAKE_ONEMULTADD(fname, type) \
30 static void fname ## _onemultadd(char *sum, char *term1, int64_t str, char **pvals, int64_t n) { \
31 type dsum = *(type*)sum; \
32 for (int64_t k=0; k < n; k++) { \
33 type tmp = *(type*)(term1 + k * str); \
34 dsum += tmp * *(type*)pvals[k]; \
35 } \
36 *(type*)(sum) = dsum; \
37 }
38
39 MAKE_ONEMULTADD(UBYTE, npy_ubyte)
40 MAKE_ONEMULTADD(USHORT, npy_ushort)
41 MAKE_ONEMULTADD(UINT, npy_uint)
42 MAKE_ONEMULTADD(ULONG, npy_ulong)
43 MAKE_ONEMULTADD(ULONGLONG, npy_ulonglong)
44
45 MAKE_ONEMULTADD(BYTE, npy_byte)
46 MAKE_ONEMULTADD(SHORT, short)
47 MAKE_ONEMULTADD(INT, int)
48 MAKE_ONEMULTADD(LONG, long)
49 MAKE_ONEMULTADD(LONGLONG, npy_longlong)
50
51 MAKE_ONEMULTADD(FLOAT, float)
52 MAKE_ONEMULTADD(DOUBLE, double)
53 MAKE_ONEMULTADD(LONGDOUBLE, npy_longdouble)
54
55 #ifdef __GNUC__
56 MAKE_ONEMULTADD(CFLOAT, __complex__ float)
57 MAKE_ONEMULTADD(CDOUBLE, __complex__ double)
58 MAKE_ONEMULTADD(CLONGDOUBLE, __complex__ long double)
59 #else
60 #define MAKE_C_ONEMULTADD(fname, type) \
61 static void fname ## _onemultadd2(char *sum, char *term1, char *term2) { \
62 ((type *) sum)[0] += ((type *) term1)[0] * ((type *) term2)[0] \
63 - ((type *) term1)[1] * ((type *) term2)[1]; \
64 ((type *) sum)[1] += ((type *) term1)[0] * ((type *) term2)[1] \
65 + ((type *) term1)[1] * ((type *) term2)[0]; \
66 return; }
67
68 #define MAKE_C_ONEMULTADD2(fname, type) \
69 static void fname ## _onemultadd(char *sum, char *term1, int64_t str, \
70 char **pvals, int64_t n) { \
71 for (int64_t k=0; k < n; k++) { \
72 fname ## _onemultadd2(sum, term1 + k * str, pvals[k]); \
73 } \
74 }
75 MAKE_C_ONEMULTADD(CFLOAT, float)
76 MAKE_C_ONEMULTADD(CDOUBLE, double)
77 MAKE_C_ONEMULTADD(CLONGDOUBLE, npy_longdouble)
78 MAKE_C_ONEMULTADD2(CFLOAT, float)
79 MAKE_C_ONEMULTADD2(CDOUBLE, double)
80 MAKE_C_ONEMULTADD2(CLONGDOUBLE, npy_longdouble)
81 #endif /* __GNUC__ */
82
83 static OneMultAddFunction *OneMultAdd[]={NULL,
84 BYTE_onemultadd,
85 UBYTE_onemultadd,
86 SHORT_onemultadd,
87 USHORT_onemultadd,
88 INT_onemultadd,
89 UINT_onemultadd,
90 LONG_onemultadd,
91 ULONG_onemultadd,
92 LONGLONG_onemultadd,
93 ULONGLONG_onemultadd,
94 FLOAT_onemultadd,
95 DOUBLE_onemultadd,
96 LONGDOUBLE_onemultadd,
97 CFLOAT_onemultadd,
98 CDOUBLE_onemultadd,
99 CLONGDOUBLE_onemultadd,
100 NULL, NULL, NULL, NULL};
101
102
103 /* This could definitely be more optimized... */
104
105
pylab_convolve_2d(char * in,npy_intp * instr,char * out,npy_intp * outstr,char * hvals,npy_intp * hstr,npy_intp * Nwin,npy_intp * Ns,int flag,char * fillvalue)106 int pylab_convolve_2d (char *in, /* Input data Ns[0] x Ns[1] */
107 npy_intp *instr, /* Input strides */
108 char *out, /* Output data */
109 npy_intp *outstr, /* Output strides */
110 char *hvals, /* coefficients in filter */
111 npy_intp *hstr, /* coefficients strides */
112 npy_intp *Nwin, /* Size of kernel Nwin[0] x Nwin[1] */
113 npy_intp *Ns, /* Size of image Ns[0] x Ns[1] */
114 int flag, /* convolution parameters */
115 char *fillvalue) /* fill value */
116 {
117 const int boundary = flag & BOUNDARY_MASK; /* flag can be fill, reflecting, circular */
118 const int outsize = flag & OUTSIZE_MASK;
119 const int convolve = flag & FLIP_MASK;
120 const int type_num = (flag & TYPE_MASK) >> TYPE_SHIFT;
121 /*type_size*/
122
123 OneMultAddFunction *mult_and_add = OneMultAdd[type_num];
124 if (mult_and_add == NULL) return -5; /* Not available for this type */
125
126 if (type_num < 0 || type_num > MAXTYPES) return -4; /* Invalid type */
127 const int type_size = elsizes[type_num];
128
129 int64_t Os[2];
130 if (outsize == FULL) {Os[0] = Ns[0]+Nwin[0]-1; Os[1] = Ns[1]+Nwin[1]-1;}
131 else if (outsize == SAME) {Os[0] = Ns[0]; Os[1] = Ns[1];}
132 else if (outsize == VALID) {Os[0] = Ns[0]-Nwin[0]+1; Os[1] = Ns[1]-Nwin[1]+1;}
133 else return -1; /* Invalid output flag */
134
135 if ((boundary != PAD) && (boundary != REFLECT) && (boundary != CIRCULAR))
136 return -2; /* Invalid boundary flag */
137
138 char **indices = malloc(Nwin[1] * sizeof(indices[0]));
139 if (indices == NULL) return -3; /* No memory */
140
141 /* Speed this up by not doing any if statements in the for loop. Need 3*3*2=18 different
142 loops executed for different conditions */
143
144 for (int64_t m=0; m < Os[0]; m++) {
145 /* Reposition index into input image based on requested output size */
146 int64_t new_m;
147 if (outsize == FULL) new_m = convolve ? m : (m-Nwin[0]+1);
148 else if (outsize == SAME) new_m = convolve ? (m+((Nwin[0]-1)>>1)) : (m-((Nwin[0]-1) >> 1));
149 else new_m = convolve ? (m+Nwin[0]-1) : m; /* VALID */
150
151 for (int64_t n=0; n < Os[1]; n++) { /* loop over columns */
152 char * sum = out+m*outstr[0]+n*outstr[1];
153 memset(sum, 0, type_size); /* sum = 0.0; */
154
155 int64_t new_n;
156 if (outsize == FULL) new_n = convolve ? n : (n-Nwin[1]+1);
157 else if (outsize == SAME) new_n = convolve ? (n+((Nwin[1]-1)>>1)) : (n-((Nwin[1]-1) >> 1));
158 else new_n = convolve ? (n+Nwin[1]-1) : n;
159
160 /* Sum over kernel, if index into image is out of bounds
161 handle it according to boundary flag */
162 for (int64_t j=0; j < Nwin[0]; j++) {
163 int64_t ind0 = convolve ? (new_m-j): (new_m+j);
164 bool bounds_pad_flag = false;
165
166 if (ind0 < 0) {
167 if (boundary == REFLECT) ind0 = -1-ind0;
168 else if (boundary == CIRCULAR) ind0 = Ns[0] + ind0;
169 else bounds_pad_flag = true;
170 }
171 else if (ind0 >= Ns[0]) {
172 if (boundary == REFLECT) ind0 = Ns[0]+Ns[0]-1-ind0;
173 else if (boundary == CIRCULAR) ind0 = ind0 - Ns[0];
174 else bounds_pad_flag = true;
175 }
176
177 const int64_t ind0_memory = ind0*instr[0];
178
179 if (bounds_pad_flag) {
180 for (int64_t k=0; k < Nwin[1]; k++) {
181 indices[k] = fillvalue;
182 }
183 }
184 else {
185 for (int64_t k=0; k < Nwin[1]; k++) {
186 int64_t ind1 = convolve ? (new_n-k) : (new_n+k);
187 if (ind1 < 0) {
188 if (boundary == REFLECT) ind1 = -1-ind1;
189 else if (boundary == CIRCULAR) ind1 = Ns[1] + ind1;
190 else bounds_pad_flag = true;
191 }
192 else if (ind1 >= Ns[1]) {
193 if (boundary == REFLECT) ind1 = Ns[1]+Ns[1]-1-ind1;
194 else if (boundary == CIRCULAR) ind1 = ind1 - Ns[1];
195 else bounds_pad_flag = true;
196 }
197
198 if (bounds_pad_flag) {
199 indices[k] = fillvalue;
200 }
201 else {
202 indices[k] = in+ind0_memory+ind1*instr[1];
203 }
204 bounds_pad_flag = false;
205 }
206 }
207 mult_and_add(sum, hvals+j*hstr[0], hstr[1], indices, Nwin[1]);
208 }
209 }
210 }
211 free(indices);
212 return 0;
213 }
214