1 #define NO_IMPORT_ARRAY
2 #include "numpy/ndarrayobject.h"
3 #include "sigtools.h"
4 #include <stdbool.h>
5 #include <stdint.h>
6 
7 static int elsizes[] = {sizeof(npy_bool),
8                         sizeof(npy_byte),
9                         sizeof(npy_ubyte),
10                         sizeof(npy_short),
11                         sizeof(npy_ushort),
12                         sizeof(int),
13                         sizeof(npy_uint),
14                         sizeof(long),
15                         sizeof(npy_ulong),
16                         sizeof(npy_longlong),
17                         sizeof(npy_ulonglong),
18                         sizeof(float),
19                         sizeof(double),
20                         sizeof(npy_longdouble),
21                         sizeof(npy_cfloat),
22                         sizeof(npy_cdouble),
23                         sizeof(npy_clongdouble),
24                         sizeof(void *),
25 			0,0,0,0};
26 
27 typedef void (OneMultAddFunction) (char *, char *, int64_t, char **, int64_t);
28 
29 #define MAKE_ONEMULTADD(fname, type) \
30 static void fname ## _onemultadd(char *sum, char *term1, int64_t str, char **pvals, int64_t n) { \
31         type dsum = *(type*)sum; \
32         for (int64_t k=0; k < n; k++) { \
33           type tmp = *(type*)(term1 + k * str); \
34           dsum += tmp * *(type*)pvals[k]; \
35         } \
36         *(type*)(sum) = dsum; \
37 }
38 
39 MAKE_ONEMULTADD(UBYTE, npy_ubyte)
40 MAKE_ONEMULTADD(USHORT, npy_ushort)
41 MAKE_ONEMULTADD(UINT, npy_uint)
42 MAKE_ONEMULTADD(ULONG, npy_ulong)
43 MAKE_ONEMULTADD(ULONGLONG, npy_ulonglong)
44 
45 MAKE_ONEMULTADD(BYTE, npy_byte)
46 MAKE_ONEMULTADD(SHORT, short)
47 MAKE_ONEMULTADD(INT, int)
48 MAKE_ONEMULTADD(LONG, long)
49 MAKE_ONEMULTADD(LONGLONG, npy_longlong)
50 
51 MAKE_ONEMULTADD(FLOAT, float)
52 MAKE_ONEMULTADD(DOUBLE, double)
53 MAKE_ONEMULTADD(LONGDOUBLE, npy_longdouble)
54 
55 #ifdef __GNUC__
56 MAKE_ONEMULTADD(CFLOAT, __complex__ float)
57 MAKE_ONEMULTADD(CDOUBLE, __complex__ double)
58 MAKE_ONEMULTADD(CLONGDOUBLE, __complex__ long double)
59 #else
60 #define MAKE_C_ONEMULTADD(fname, type) \
61 static void fname ## _onemultadd2(char *sum, char *term1, char *term2) { \
62   ((type *) sum)[0] += ((type *) term1)[0] * ((type *) term2)[0] \
63     - ((type *) term1)[1] * ((type *) term2)[1]; \
64   ((type *) sum)[1] += ((type *) term1)[0] * ((type *) term2)[1] \
65     + ((type *) term1)[1] * ((type *) term2)[0]; \
66   return; }
67 
68 #define MAKE_C_ONEMULTADD2(fname, type) \
69 static void fname ## _onemultadd(char *sum, char *term1, int64_t str, \
70                                  char **pvals, int64_t n) { \
71         for (int64_t k=0; k < n; k++) { \
72           fname ## _onemultadd2(sum, term1 + k * str, pvals[k]); \
73         } \
74 }
75 MAKE_C_ONEMULTADD(CFLOAT, float)
76 MAKE_C_ONEMULTADD(CDOUBLE, double)
77 MAKE_C_ONEMULTADD(CLONGDOUBLE, npy_longdouble)
78 MAKE_C_ONEMULTADD2(CFLOAT, float)
79 MAKE_C_ONEMULTADD2(CDOUBLE, double)
80 MAKE_C_ONEMULTADD2(CLONGDOUBLE, npy_longdouble)
81 #endif /* __GNUC__ */
82 
83 static OneMultAddFunction *OneMultAdd[]={NULL,
84 					 BYTE_onemultadd,
85 					 UBYTE_onemultadd,
86 					 SHORT_onemultadd,
87                                          USHORT_onemultadd,
88 					 INT_onemultadd,
89                                          UINT_onemultadd,
90 					 LONG_onemultadd,
91 					 ULONG_onemultadd,
92 					 LONGLONG_onemultadd,
93 					 ULONGLONG_onemultadd,
94 					 FLOAT_onemultadd,
95 					 DOUBLE_onemultadd,
96 					 LONGDOUBLE_onemultadd,
97 					 CFLOAT_onemultadd,
98 					 CDOUBLE_onemultadd,
99 					 CLONGDOUBLE_onemultadd,
100                                          NULL, NULL, NULL, NULL};
101 
102 
103 /* This could definitely be more optimized... */
104 
105 
pylab_convolve_2d(char * in,npy_intp * instr,char * out,npy_intp * outstr,char * hvals,npy_intp * hstr,npy_intp * Nwin,npy_intp * Ns,int flag,char * fillvalue)106 int pylab_convolve_2d (char  *in,        /* Input data Ns[0] x Ns[1] */
107 		       npy_intp   *instr,     /* Input strides */
108 		       char  *out,       /* Output data */
109 		       npy_intp   *outstr,    /* Output strides */
110 		       char  *hvals,     /* coefficients in filter */
111 		       npy_intp   *hstr,      /* coefficients strides */
112 		       npy_intp   *Nwin,     /* Size of kernel Nwin[0] x Nwin[1] */
113 		       npy_intp   *Ns,        /* Size of image Ns[0] x Ns[1] */
114 		       int   flag,       /* convolution parameters */
115 		       char  *fillvalue) /* fill value */
116 {
117   const int boundary = flag & BOUNDARY_MASK;  /* flag can be fill, reflecting, circular */
118   const int outsize = flag & OUTSIZE_MASK;
119   const int convolve = flag & FLIP_MASK;
120   const int type_num = (flag & TYPE_MASK) >> TYPE_SHIFT;
121   /*type_size*/
122 
123   OneMultAddFunction *mult_and_add = OneMultAdd[type_num];
124   if (mult_and_add == NULL) return -5;  /* Not available for this type */
125 
126   if (type_num < 0 || type_num > MAXTYPES) return -4;  /* Invalid type */
127   const int type_size = elsizes[type_num];
128 
129   int64_t Os[2];
130   if (outsize == FULL) {Os[0] = Ns[0]+Nwin[0]-1; Os[1] = Ns[1]+Nwin[1]-1;}
131   else if (outsize == SAME) {Os[0] = Ns[0]; Os[1] = Ns[1];}
132   else if (outsize == VALID) {Os[0] = Ns[0]-Nwin[0]+1; Os[1] = Ns[1]-Nwin[1]+1;}
133   else return -1; /* Invalid output flag */
134 
135   if ((boundary != PAD) && (boundary != REFLECT) && (boundary != CIRCULAR))
136     return -2; /* Invalid boundary flag */
137 
138   char **indices = malloc(Nwin[1] * sizeof(indices[0]));
139   if (indices == NULL) return -3; /* No memory */
140 
141   /* Speed this up by not doing any if statements in the for loop.  Need 3*3*2=18 different
142      loops executed for different conditions */
143 
144   for (int64_t m=0; m < Os[0]; m++) {
145     /* Reposition index into input image based on requested output size */
146     int64_t new_m;
147     if (outsize == FULL) new_m = convolve ? m : (m-Nwin[0]+1);
148     else if (outsize == SAME) new_m = convolve ? (m+((Nwin[0]-1)>>1)) : (m-((Nwin[0]-1) >> 1));
149     else new_m = convolve ? (m+Nwin[0]-1) : m; /* VALID */
150 
151     for (int64_t n=0; n < Os[1]; n++) {  /* loop over columns */
152       char * sum = out+m*outstr[0]+n*outstr[1];
153       memset(sum, 0, type_size); /* sum = 0.0; */
154 
155       int64_t new_n;
156       if (outsize == FULL) new_n = convolve ? n : (n-Nwin[1]+1);
157       else if (outsize == SAME) new_n = convolve ? (n+((Nwin[1]-1)>>1)) : (n-((Nwin[1]-1) >> 1));
158       else new_n = convolve ? (n+Nwin[1]-1) : n;
159 
160       /* Sum over kernel, if index into image is out of bounds
161 	 handle it according to boundary flag */
162       for (int64_t j=0; j < Nwin[0]; j++) {
163 	int64_t ind0 = convolve ? (new_m-j): (new_m+j);
164 	bool bounds_pad_flag = false;
165 
166 	if (ind0 < 0) {
167 	  if (boundary == REFLECT) ind0 = -1-ind0;
168 	  else if (boundary == CIRCULAR) ind0 = Ns[0] + ind0;
169 	  else bounds_pad_flag = true;
170 	}
171 	else if (ind0 >= Ns[0]) {
172 	  if (boundary == REFLECT) ind0 = Ns[0]+Ns[0]-1-ind0;
173 	  else if (boundary == CIRCULAR) ind0 = ind0 - Ns[0];
174 	  else bounds_pad_flag = true;
175 	}
176 
177 	const int64_t ind0_memory = ind0*instr[0];
178 
179 	if (bounds_pad_flag) {
180 	  for (int64_t k=0; k < Nwin[1]; k++) {
181 	      indices[k] = fillvalue;
182 	  }
183 	}
184 	else  {
185 	  for (int64_t k=0; k < Nwin[1]; k++) {
186 	    int64_t ind1 = convolve ? (new_n-k) : (new_n+k);
187 	    if (ind1 < 0) {
188 	      if (boundary == REFLECT) ind1 = -1-ind1;
189 	      else if (boundary == CIRCULAR) ind1 = Ns[1] + ind1;
190 	      else bounds_pad_flag = true;
191 	    }
192 	    else if (ind1 >= Ns[1]) {
193 	      if (boundary == REFLECT) ind1 = Ns[1]+Ns[1]-1-ind1;
194 	      else if (boundary == CIRCULAR) ind1 = ind1 - Ns[1];
195 	      else bounds_pad_flag = true;
196 	    }
197 
198 	    if (bounds_pad_flag) {
199 	      indices[k] = fillvalue;
200 	    }
201 	    else {
202 	      indices[k] = in+ind0_memory+ind1*instr[1];
203 	    }
204 	    bounds_pad_flag = false;
205 	  }
206 	}
207 	mult_and_add(sum, hvals+j*hstr[0], hstr[1], indices, Nwin[1]);
208       }
209     }
210   }
211   free(indices);
212   return 0;
213 }
214