1 /*
2  * vim:syntax=c
3  * vim:sw=4
4  */
5 #include <Python.h>
6 #define PY_ARRAY_UNIQUE_SYMBOL _scipy_signal_ARRAY_API
7 #define NO_IMPORT_ARRAY
8 #include <numpy/ndarrayobject.h>
9 
10 #include "sigtools.h"
11 static void FLOAT_filt(char *b, char *a, char *x, char *y, char *Z,
12                        npy_intp len_b, npy_uintp len_x, npy_intp stride_X,
13                        npy_intp stride_Y);
14 static void CFLOAT_filt(char *b, char *a, char *x, char *y, char *Z,
15                         npy_intp len_b, npy_uintp len_x, npy_intp stride_X,
16                         npy_intp stride_Y);
17 static void DOUBLE_filt(char *b, char *a, char *x, char *y, char *Z,
18                        npy_intp len_b, npy_uintp len_x, npy_intp stride_X,
19                        npy_intp stride_Y);
20 static void CDOUBLE_filt(char *b, char *a, char *x, char *y, char *Z,
21                         npy_intp len_b, npy_uintp len_x, npy_intp stride_X,
22                         npy_intp stride_Y);
23 static void EXTENDED_filt(char *b, char *a, char *x, char *y, char *Z,
24                        npy_intp len_b, npy_uintp len_x, npy_intp stride_X,
25                        npy_intp stride_Y);
26 static void CEXTENDED_filt(char *b, char *a, char *x, char *y, char *Z,
27                         npy_intp len_b, npy_uintp len_x, npy_intp stride_X,
28                         npy_intp stride_Y);
29 
30 static void OBJECT_filt(char *b, char *a, char *x, char *y, char *Z,
31                         npy_intp len_b, npy_uintp len_x, npy_intp stride_X,
32                         npy_intp stride_Y);
33 
34 typedef void (BasicFilterFunction) (char *, char *,  char *, char *, char *,
35                                     npy_intp, npy_uintp, npy_intp, npy_intp);
36 
37 static BasicFilterFunction *BasicFilterFunctions[256];
38 
39 void
scipy_signal_sigtools_linear_filter_module_init(void)40 scipy_signal_sigtools_linear_filter_module_init(void)
41 {
42     int k;
43     for (k = 0; k < 256; ++k) {
44         BasicFilterFunctions[k] = NULL;
45     }
46     BasicFilterFunctions[NPY_FLOAT] = FLOAT_filt;
47     BasicFilterFunctions[NPY_DOUBLE] = DOUBLE_filt;
48     BasicFilterFunctions[NPY_LONGDOUBLE] = EXTENDED_filt;
49     BasicFilterFunctions[NPY_CFLOAT] = CFLOAT_filt;
50     BasicFilterFunctions[NPY_CDOUBLE] = CDOUBLE_filt;
51     BasicFilterFunctions[NPY_CLONGDOUBLE] = CEXTENDED_filt;
52     BasicFilterFunctions[NPY_OBJECT] = OBJECT_filt;
53 }
54 
55 /* There is the start of an OBJECT_filt, but it may need work */
56 
57 static int
58 RawFilter(const PyArrayObject * b, const PyArrayObject * a,
59           const PyArrayObject * x, const PyArrayObject * zi,
60           const PyArrayObject * zf, PyArrayObject * y, int axis,
61           BasicFilterFunction * filter_func);
62 
63 PyObject*
convert_shape_to_errmsg(npy_intp ndim,npy_intp * Xshape,npy_intp * Vishape,npy_intp theaxis,npy_intp val)64 convert_shape_to_errmsg(npy_intp ndim, npy_intp *Xshape, npy_intp *Vishape,
65                         npy_intp theaxis, npy_intp val)
66 {
67     npy_intp j, expect_size;
68     PyObject *msg, *tmp, *msg1, *tmp1;
69 
70     if (ndim == 1) {
71         msg = PyUnicode_FromFormat("Unexpected shape for zi: expected (%"   \
72                                     NPY_INTP_FMT ",), found (%" NPY_INTP_FMT \
73                                     ",).", val, Vishape[0]);
74         return msg;
75     }
76 
77     msg = PyUnicode_FromString("Unexpected shape for zi:  expected (");
78     if (!msg) {
79         return 0;
80     }
81 
82     msg1 = PyUnicode_FromString("), found (");
83     if (!msg1) {
84         Py_DECREF(msg);
85         return 0;
86     }
87 
88     for (j = 0; j < ndim; ++j) {
89         expect_size = j != theaxis ? Xshape[j] : val;
90 
91         if (j == ndim - 1) {
92             tmp = PyUnicode_FromFormat("%" NPY_INTP_FMT, expect_size);
93             tmp1 = PyUnicode_FromFormat("%" NPY_INTP_FMT, Vishape[j]);
94         } else {
95             tmp = PyUnicode_FromFormat("%" NPY_INTP_FMT ",", expect_size);
96             tmp1 = PyUnicode_FromFormat("%" NPY_INTP_FMT ",", Vishape[j]);
97         }
98         if (!tmp) {
99             Py_DECREF(msg);
100             Py_DECREF(msg1);
101             Py_XDECREF(tmp1);
102             return 0;
103         }
104         if (!tmp1) {
105             Py_DECREF(msg);
106             Py_DECREF(msg1);
107             Py_DECREF(tmp);
108             return 0;
109         }
110         Py_SETREF(msg, PyUnicode_Concat(msg, tmp));
111         Py_SETREF(msg1, PyUnicode_Concat(msg1, tmp1));
112         Py_DECREF(tmp);
113         Py_DECREF(tmp1);
114     }
115     tmp = PyUnicode_FromString(").");
116     if (!tmp) {
117         Py_DECREF(msg);
118         Py_DECREF(msg1);
119         return 0;
120     }
121     Py_SETREF(msg1, PyUnicode_Concat(msg1, tmp));
122     Py_SETREF(msg, PyUnicode_Concat(msg, msg1));
123     Py_DECREF(tmp);
124     Py_DECREF(msg1);
125     return msg;
126 }
127 
128 /*
129  * XXX: Error checking not done yet
130  */
131 PyObject*
scipy_signal_sigtools_linear_filter(PyObject * NPY_UNUSED (dummy),PyObject * args)132 scipy_signal_sigtools_linear_filter(PyObject * NPY_UNUSED(dummy), PyObject * args)
133 {
134     PyObject *b, *a, *X, *Vi;
135     PyArrayObject *arY, *arb, *ara, *arX, *arVi, *arVf;
136     int axis, typenum, theaxis, st, Vi_needs_broadcasted = 0;
137     char *ara_ptr, input_flag = 0, *azero;
138     npy_intp na, nb, nal, zi_size;
139     npy_intp zf_shape[NPY_MAXDIMS];
140     BasicFilterFunction *basic_filter;
141 
142     axis = -1;
143     Vi = NULL;
144     if (!PyArg_ParseTuple(args, "OOO|iO", &b, &a, &X, &axis, &Vi)) {
145         return NULL;
146     }
147 
148     typenum = PyArray_ObjectType(b, 0);
149     typenum = PyArray_ObjectType(a, typenum);
150     typenum = PyArray_ObjectType(X, typenum);
151     if (Vi != NULL) {
152         typenum = PyArray_ObjectType(Vi, typenum);
153     }
154 
155     arY = arVf = arVi = NULL;
156     ara = (PyArrayObject *) PyArray_ContiguousFromObject(a, typenum, 1, 1);
157     arb = (PyArrayObject *) PyArray_ContiguousFromObject(b, typenum, 1, 1);
158     arX = (PyArrayObject *) PyArray_FromObject(X, typenum, 0, 0);
159     /* XXX: fix failure handling here */
160     if (ara == NULL || arb == NULL || arX == NULL) {
161         PyErr_SetString(PyExc_ValueError,
162                         "could not convert b, a, and x to a common type");
163         goto fail;
164     }
165 
166     if (axis < -PyArray_NDIM(arX) || axis > PyArray_NDIM(arX) - 1) {
167         PyErr_SetString(PyExc_ValueError, "selected axis is out of range");
168         goto fail;
169     }
170     if (axis < 0) {
171         theaxis = PyArray_NDIM(arX) + axis;
172     } else {
173         theaxis = axis;
174     }
175 
176     if (Vi != NULL) {
177         arVi = (PyArrayObject *) PyArray_FromObject(Vi, typenum,
178                                                     PyArray_NDIM(arX),
179                                                     PyArray_NDIM(arX));
180         if (arVi == NULL)
181             goto fail;
182 
183         input_flag = 1;
184     }
185 
186     if (PyArray_TYPE(arX) < 256) {
187         basic_filter = BasicFilterFunctions[(int) (PyArray_TYPE(arX))];
188     }
189     else {
190         basic_filter = NULL;
191     }
192     if (basic_filter == NULL) {
193         PyObject *msg, *str;
194 
195         str = PyObject_Str((PyObject*)PyArray_DESCR(arX));
196         if (str == NULL) {
197             goto fail;
198         }
199         msg = PyUnicode_FromFormat(
200                         "input type '%U' not supported\n", str);
201         Py_DECREF(str);
202         if (msg == NULL) {
203             goto fail;
204         }
205         PyErr_SetObject(PyExc_NotImplementedError, msg);
206         Py_DECREF(msg);
207         goto fail;
208     }
209 
210     /* Skip over leading zeros in vector representing denominator (a) */
211     azero = PyArray_Zero(ara);
212     if (azero == NULL) {
213         goto fail;
214     }
215     ara_ptr = PyArray_DATA(ara);
216     nal = PyArray_ITEMSIZE(ara);
217     st = memcmp(ara_ptr, azero, nal);
218     PyDataMem_FREE(azero);
219     if (st == 0) {
220         PyErr_SetString(PyExc_ValueError,
221                         "BUG: filter coefficient a[0] == 0 not supported yet");
222         goto fail;
223     }
224 
225     na = PyArray_SIZE(ara);
226     nb = PyArray_SIZE(arb);
227     zi_size = (na > nb ? na : nb) - 1;
228     if (input_flag) {
229         npy_intp k, Vik, Xk;
230         for (k = 0; k < PyArray_NDIM(arX); ++k) {
231             Vik = PyArray_DIM(arVi, k);
232             Xk = PyArray_DIM(arX, k);
233             if (k == theaxis && Vik == zi_size) {
234                 zf_shape[k] = zi_size;
235             } else if (k != theaxis && Vik == Xk) {
236                 zf_shape[k] = Xk;
237             } else if (k != theaxis && Vik == 1) {
238                 zf_shape[k] = Xk;
239                 Vi_needs_broadcasted = 1;
240             } else {
241                 PyObject *msg = convert_shape_to_errmsg(PyArray_NDIM(arX),
242                         PyArray_DIMS(arX), PyArray_DIMS(arVi),
243                         theaxis, zi_size);
244                 if (!msg) {
245                     goto fail;
246                 }
247                 PyErr_SetObject(PyExc_ValueError, msg);
248                 Py_DECREF(msg);
249                 goto fail;
250             }
251         }
252 
253         if (Vi_needs_broadcasted) {
254             /* Expand the singleton dimensions of arVi without copying by
255              * creating a new view of arVi with the expanded dimensions
256              * but the corresponding stride = 0.
257              */
258             PyArrayObject *arVi_view;
259             PyArray_Descr *view_dtype;
260             npy_intp *arVi_shape = PyArray_DIMS(arVi);
261             npy_intp *arVi_strides = PyArray_STRIDES(arVi);
262             npy_intp ndim = PyArray_NDIM(arVi);
263             npy_intp strides[NPY_MAXDIMS];
264             npy_intp k;
265 
266             for (k = 0; k < ndim; ++k) {
267                 if (arVi_shape[k] == 1) {
268                     strides[k] = 0;
269                 } else {
270                     strides[k] = arVi_strides[k];
271                 }
272             }
273 
274             /* PyArray_DESCR borrows a reference, but it will be stolen
275              * by PyArray_NewFromDescr below, so increment it.
276              */
277             view_dtype = PyArray_DESCR(arVi);
278             Py_INCREF(view_dtype);
279 
280             arVi_view = (PyArrayObject *)PyArray_NewFromDescr(&PyArray_Type,
281                             view_dtype, ndim, zf_shape, strides,
282                             PyArray_BYTES(arVi), 0, NULL);
283             if (!arVi_view) {
284                 goto fail;
285             }
286             /* Give our reference to arVi to arVi_view */
287 #if NPY_API_VERSION >= 0x00000007
288             if (PyArray_SetBaseObject(arVi_view, (PyObject *)arVi) == -1) {
289                 Py_DECREF(arVi_view);
290                 goto fail;
291             }
292 #else
293             PyArray_BASE(arVi_view) = arVi;
294 #endif
295             arVi = arVi_view;
296         }
297 
298         arVf = (PyArrayObject *) PyArray_SimpleNew(PyArray_NDIM(arVi),
299                                                    zf_shape,
300                                                    typenum);
301         if (arVf == NULL) {
302             goto fail;
303         }
304     }
305 
306     arY = (PyArrayObject *) PyArray_SimpleNew(PyArray_NDIM(arX),
307                                               PyArray_DIMS(arX), typenum);
308     if (arY == NULL) {
309         goto fail;
310     }
311 
312 
313     st = RawFilter(arb, ara, arX, arVi, arVf, arY, theaxis, basic_filter);
314     if (st) {
315         goto fail;
316     }
317 
318     Py_XDECREF(ara);
319     Py_XDECREF(arb);
320     Py_XDECREF(arX);
321     Py_XDECREF(arVi);
322 
323     if (!input_flag) {
324         return PyArray_Return(arY);
325     } else {
326         return Py_BuildValue("(NN)", arY, arVf);
327     }
328 
329 
330   fail:
331     Py_XDECREF(ara);
332     Py_XDECREF(arb);
333     Py_XDECREF(arX);
334     Py_XDECREF(arVi);
335     Py_XDECREF(arVf);
336     Py_XDECREF(arY);
337     return NULL;
338 }
339 
340 /*
341  * Copy the first nx items of x into xzfilled , and fill the rest with 0s
342  */
343 static int
zfill(const PyArrayObject * x,npy_intp nx,char * xzfilled,npy_intp nxzfilled)344 zfill(const PyArrayObject * x, npy_intp nx, char *xzfilled, npy_intp nxzfilled)
345 {
346     char *xzero;
347     npy_intp i, nxl;
348     PyArray_CopySwapFunc *copyswap = PyArray_DESCR((PyArrayObject *)x)->f->copyswap;
349 
350     nxl = PyArray_ITEMSIZE(x);
351 
352     /* PyArray_Zero does not take const pointer, hence the cast */
353     xzero = PyArray_Zero((PyArrayObject *) x);
354     if (xzero == NULL) return -1;
355 
356     if (nx > 0) {
357         for (i = 0; i < nx; ++i) {
358             copyswap(xzfilled + i * nxl,
359                      (char *)PyArray_DATA((PyArrayObject *)x) + i * nxl,
360                      0, NULL);
361         }
362     }
363     for (i = nx; i < nxzfilled; ++i) {
364         copyswap(xzfilled + i * nxl, xzero, 0, NULL);
365     }
366 
367     PyDataMem_FREE(xzero);
368 
369     return 0;
370 }
371 
372 /*
373  * a and b assumed to be contiguous
374  *
375  * XXX: this code is very conservative, and could be considerably sped up for
376  * the usual cases (like contiguity).
377  *
378  * XXX: the code should be refactored (at least with/without initial
379  * condition), some code is wasteful here
380  */
381 static int
RawFilter(const PyArrayObject * b,const PyArrayObject * a,const PyArrayObject * x,const PyArrayObject * zi,const PyArrayObject * zf,PyArrayObject * y,int axis,BasicFilterFunction * filter_func)382 RawFilter(const PyArrayObject * b, const PyArrayObject * a,
383           const PyArrayObject * x, const PyArrayObject * zi,
384           const PyArrayObject * zf, PyArrayObject * y, int axis,
385           BasicFilterFunction * filter_func)
386 {
387     PyArrayIterObject *itx, *ity, *itzi = NULL, *itzf = NULL;
388     npy_intp nitx, i, nxl, nzfl, j;
389     npy_intp na, nb, nal, nbl;
390     npy_intp nfilt;
391     char *azfilled, *bzfilled, *zfzfilled, *yoyo;
392     PyArray_CopySwapFunc *copyswap = PyArray_DESCR((PyArrayObject *)x)->f->copyswap;
393 
394     itx = (PyArrayIterObject *) PyArray_IterAllButAxis((PyObject *) x,
395                                                        &axis);
396     if (itx == NULL) {
397         PyErr_SetString(PyExc_MemoryError, "Could not create itx");
398         goto fail;
399     }
400     nitx = itx->size;
401 
402     ity = (PyArrayIterObject *) PyArray_IterAllButAxis((PyObject *) y,
403                                                        &axis);
404     if (ity == NULL) {
405         PyErr_SetString(PyExc_MemoryError, "Could not create ity");
406         goto clean_itx;
407     }
408 
409     if (zi != NULL) {
410         itzi = (PyArrayIterObject *) PyArray_IterAllButAxis((PyObject *)
411                                                             zi, &axis);
412         if (itzi == NULL) {
413             PyErr_SetString(PyExc_MemoryError, "Could not create itzi");
414             goto clean_ity;
415         }
416 
417         itzf = (PyArrayIterObject *) PyArray_IterAllButAxis((PyObject *)
418                                                             zf, &axis);
419         if (itzf == NULL) {
420             PyErr_SetString(PyExc_MemoryError, "Could not create itzf");
421             goto clean_itzi;
422         }
423     }
424 
425     na = PyArray_SIZE((PyArrayObject *)a);
426     nal = PyArray_ITEMSIZE(a);
427     nb = PyArray_SIZE((PyArrayObject *)b);
428     nbl = PyArray_ITEMSIZE(b);
429 
430     nfilt = na > nb ? na : nb;
431 
432     azfilled = malloc(nal * nfilt);
433     if (azfilled == NULL) {
434         PyErr_SetString(PyExc_MemoryError, "Could not create azfilled");
435         goto clean_itzf;
436     }
437     bzfilled = malloc(nbl * nfilt);
438     if (bzfilled == NULL) {
439         PyErr_SetString(PyExc_MemoryError, "Could not create bzfilled");
440         goto clean_azfilled;
441     }
442 
443     nxl = PyArray_ITEMSIZE(x);
444     zfzfilled = malloc(nxl * (nfilt - 1));
445     if (zfzfilled == NULL) {
446         PyErr_SetString(PyExc_MemoryError, "Could not create zfzfilled");
447         goto clean_bzfilled;
448     }
449     /* Initialize zero filled buffers to 0, so that we can use
450      * Py_XINCREF/Py_XDECREF on it for object arrays (necessary for
451      * copyswap to work correctly). Stricly speaking, it is not needed for
452      * fundamental types (as values are copied instead of pointers, without
453      * refcounts), but oh well...
454      */
455     memset(azfilled, 0, nal * nfilt);
456     memset(bzfilled, 0, nbl * nfilt);
457     memset(zfzfilled, 0, nxl * (nfilt - 1));
458 
459     if (zfill(a, na, azfilled, nfilt) == -1) goto clean_zfzfilled;
460     if (zfill(b, nb, bzfilled, nfilt) == -1) goto clean_zfzfilled;
461 
462     /* XXX: Check that zf and zi have same type ? */
463     if (zf != NULL) {
464         nzfl = PyArray_ITEMSIZE(zf);
465     } else {
466         nzfl = 0;
467     }
468 
469     /* Iterate over the input array */
470     for (i = 0; i < nitx; ++i) {
471         if (zi != NULL) {
472             yoyo = itzi->dataptr;
473             /* Copy initial conditions zi in zfzfilled buffer */
474             for (j = 0; j < nfilt - 1; ++j) {
475                 copyswap(zfzfilled + j * nzfl, yoyo, 0, NULL);
476                 yoyo += itzi->strides[axis];
477             }
478             PyArray_ITER_NEXT(itzi);
479         } else {
480             if (zfill(x, 0, zfzfilled, nfilt - 1) == -1) goto clean_zfzfilled;
481         }
482 
483         filter_func(bzfilled, azfilled,
484                     itx->dataptr, ity->dataptr, zfzfilled,
485                     nfilt, PyArray_DIM(x, axis), itx->strides[axis],
486                     ity->strides[axis]);
487 
488         if (PyErr_Occurred()) {
489             goto clean_zfzfilled;
490         }
491         PyArray_ITER_NEXT(itx);
492         PyArray_ITER_NEXT(ity);
493 
494         /* Copy tmp buffer fo final values back into zf output array */
495         if (zi != NULL) {
496             yoyo = itzf->dataptr;
497             for (j = 0; j < nfilt - 1; ++j) {
498                 copyswap(yoyo, zfzfilled + j * nzfl, 0, NULL);
499                 yoyo += itzf->strides[axis];
500             }
501             PyArray_ITER_NEXT(itzf);
502         }
503     }
504 
505     /* Free up allocated memory */
506     free(zfzfilled);
507     free(bzfilled);
508     free(azfilled);
509 
510     if (zi != NULL) {
511         Py_DECREF(itzf);
512         Py_DECREF(itzi);
513     }
514     Py_DECREF(ity);
515     Py_DECREF(itx);
516 
517     return 0;
518 
519 clean_zfzfilled:
520     free(zfzfilled);
521 clean_bzfilled:
522     free(bzfilled);
523 clean_azfilled:
524     free(azfilled);
525 clean_itzf:
526     if (zf != NULL) {
527         Py_DECREF(itzf);
528     }
529 clean_itzi:
530     if (zi != NULL) {
531         Py_DECREF(itzi);
532     }
533 clean_ity:
534     Py_DECREF(ity);
535 clean_itx:
536     Py_DECREF(itx);
537 fail:
538     return -1;
539 }
540 
541 /*****************************************************************
542  *   This is code for a 1-D linear-filter along an arbitrary     *
543  *   dimension of an N-D array.                                  *
544  *****************************************************************/
545 
FLOAT_filt(char * b,char * a,char * x,char * y,char * Z,npy_intp len_b,npy_uintp len_x,npy_intp stride_X,npy_intp stride_Y)546 static void FLOAT_filt(char *b, char *a, char *x, char *y, char *Z,
547                        npy_intp len_b, npy_uintp len_x, npy_intp stride_X,
548                        npy_intp stride_Y)
549 {
550     Py_BEGIN_ALLOW_THREADS
551     char *ptr_x = x, *ptr_y = y;
552     float *ptr_Z;
553     float *ptr_b = (float*)b;
554     float *ptr_a = (float*)a;
555     float *xn, *yn;
556     const float a0 = *((float *) a);
557     npy_intp n;
558     npy_uintp k;
559 
560     /* normalize the filter coefs only once. */
561     for (n = 0; n < len_b; ++n) {
562         ptr_b[n] /= a0;
563         ptr_a[n] /= a0;
564     }
565 
566     for (k = 0; k < len_x; k++) {
567         ptr_b = (float *) b;   /* Reset a and b pointers */
568         ptr_a = (float *) a;
569         xn = (float *) ptr_x;
570         yn = (float *) ptr_y;
571         if (len_b > 1) {
572             ptr_Z = ((float *) Z);
573             *yn = *ptr_Z + *ptr_b  * *xn;   /* Calculate first delay (output) */
574             ptr_b++;
575             ptr_a++;
576             /* Fill in middle delays */
577             for (n = 0; n < len_b - 2; n++) {
578                 *ptr_Z =
579                     ptr_Z[1] + *xn * (*ptr_b) - *yn * (*ptr_a);
580                 ptr_b++;
581                 ptr_a++;
582                 ptr_Z++;
583             }
584             /* Calculate last delay */
585             *ptr_Z = *xn * (*ptr_b) - *yn * (*ptr_a);
586         } else {
587             *yn = *xn * (*ptr_b);
588         }
589 
590         ptr_y += stride_Y;      /* Move to next input/output point */
591         ptr_x += stride_X;
592     }
593     Py_END_ALLOW_THREADS
594 }
595 
596 
CFLOAT_filt(char * b,char * a,char * x,char * y,char * Z,npy_intp len_b,npy_uintp len_x,npy_intp stride_X,npy_intp stride_Y)597 static void CFLOAT_filt(char *b, char *a, char *x, char *y, char *Z,
598                         npy_intp len_b, npy_uintp len_x, npy_intp stride_X,
599                         npy_intp stride_Y)
600 {
601     Py_BEGIN_ALLOW_THREADS
602     char *ptr_x = x, *ptr_y = y;
603     float *ptr_Z, *ptr_b;
604     float *ptr_a;
605     float *xn, *yn;
606     float a0r = ((float *) a)[0];
607     float a0i = ((float *) a)[1];
608     float a0_mag, tmpr, tmpi;
609     npy_intp n;
610     npy_uintp k;
611 
612     a0_mag = a0r * a0r + a0i * a0i;
613     for (k = 0; k < len_x; k++) {
614         ptr_b = (float *) b;   /* Reset a and b pointers */
615         ptr_a = (float *) a;
616         xn = (float *) ptr_x;
617         yn = (float *) ptr_y;
618         if (len_b > 1) {
619             ptr_Z = ((float *) Z);
620             tmpr = ptr_b[0] * a0r + ptr_b[1] * a0i;
621             tmpi = ptr_b[1] * a0r - ptr_b[0] * a0i;
622             /* Calculate first delay (output) */
623             yn[0] = ptr_Z[0] + (tmpr * xn[0] - tmpi * xn[1]) / a0_mag;
624             yn[1] = ptr_Z[1] + (tmpi * xn[0] + tmpr * xn[1]) / a0_mag;
625             ptr_b += 2;
626             ptr_a += 2;
627             /* Fill in middle delays */
628             for (n = 0; n < len_b - 2; n++) {
629                 tmpr = ptr_b[0] * a0r + ptr_b[1] * a0i;
630                 tmpi = ptr_b[1] * a0r - ptr_b[0] * a0i;
631                 ptr_Z[0] =
632                     ptr_Z[2] + (tmpr * xn[0] - tmpi * xn[1]) / a0_mag;
633                 ptr_Z[1] =
634                     ptr_Z[3] + (tmpi * xn[0] + tmpr * xn[1]) / a0_mag;
635                 tmpr = ptr_a[0] * a0r + ptr_a[1] * a0i;
636                 tmpi = ptr_a[1] * a0r - ptr_a[0] * a0i;
637                 ptr_Z[0] -= (tmpr * yn[0] - tmpi * yn[1]) / a0_mag;
638                 ptr_Z[1] -= (tmpi * yn[0] + tmpr * yn[1]) / a0_mag;
639                 ptr_b += 2;
640                 ptr_a += 2;
641                 ptr_Z += 2;
642             }
643             /* Calculate last delay */
644 
645             tmpr = ptr_b[0] * a0r + ptr_b[1] * a0i;
646             tmpi = ptr_b[1] * a0r - ptr_b[0] * a0i;
647             ptr_Z[0] = (tmpr * xn[0] - tmpi * xn[1]) / a0_mag;
648             ptr_Z[1] = (tmpi * xn[0] + tmpr * xn[1]) / a0_mag;
649             tmpr = ptr_a[0] * a0r + ptr_a[1] * a0i;
650             tmpi = ptr_a[1] * a0r - ptr_a[0] * a0i;
651             ptr_Z[0] -= (tmpr * yn[0] - tmpi * yn[1]) / a0_mag;
652             ptr_Z[1] -= (tmpi * yn[0] + tmpr * yn[1]) / a0_mag;
653         } else {
654             tmpr = ptr_b[0] * a0r + ptr_b[1] * a0i;
655             tmpi = ptr_b[1] * a0r - ptr_b[0] * a0i;
656             yn[0] = (tmpr * xn[0] - tmpi * xn[1]) / a0_mag;
657             yn[1] = (tmpi * xn[0] + tmpr * xn[1]) / a0_mag;
658         }
659 
660         ptr_y += stride_Y;      /* Move to next input/output point */
661         ptr_x += stride_X;
662 
663     }
664     Py_END_ALLOW_THREADS
665 }
DOUBLE_filt(char * b,char * a,char * x,char * y,char * Z,npy_intp len_b,npy_uintp len_x,npy_intp stride_X,npy_intp stride_Y)666 static void DOUBLE_filt(char *b, char *a, char *x, char *y, char *Z,
667                        npy_intp len_b, npy_uintp len_x, npy_intp stride_X,
668                        npy_intp stride_Y)
669 {
670     Py_BEGIN_ALLOW_THREADS
671     char *ptr_x = x, *ptr_y = y;
672     double *ptr_Z;
673     double *ptr_b = (double*)b;
674     double *ptr_a = (double*)a;
675     double *xn, *yn;
676     const double a0 = *((double *) a);
677     npy_intp n;
678     npy_uintp k;
679 
680     /* normalize the filter coefs only once. */
681     for (n = 0; n < len_b; ++n) {
682         ptr_b[n] /= a0;
683         ptr_a[n] /= a0;
684     }
685 
686     for (k = 0; k < len_x; k++) {
687         ptr_b = (double *) b;   /* Reset a and b pointers */
688         ptr_a = (double *) a;
689         xn = (double *) ptr_x;
690         yn = (double *) ptr_y;
691         if (len_b > 1) {
692             ptr_Z = ((double *) Z);
693             *yn = *ptr_Z + *ptr_b  * *xn;   /* Calculate first delay (output) */
694             ptr_b++;
695             ptr_a++;
696             /* Fill in middle delays */
697             for (n = 0; n < len_b - 2; n++) {
698                 *ptr_Z =
699                     ptr_Z[1] + *xn * (*ptr_b) - *yn * (*ptr_a);
700                 ptr_b++;
701                 ptr_a++;
702                 ptr_Z++;
703             }
704             /* Calculate last delay */
705             *ptr_Z = *xn * (*ptr_b) - *yn * (*ptr_a);
706         } else {
707             *yn = *xn * (*ptr_b);
708         }
709 
710         ptr_y += stride_Y;      /* Move to next input/output point */
711         ptr_x += stride_X;
712     }
713     Py_END_ALLOW_THREADS
714 }
715 
716 
CDOUBLE_filt(char * b,char * a,char * x,char * y,char * Z,npy_intp len_b,npy_uintp len_x,npy_intp stride_X,npy_intp stride_Y)717 static void CDOUBLE_filt(char *b, char *a, char *x, char *y, char *Z,
718                         npy_intp len_b, npy_uintp len_x, npy_intp stride_X,
719                         npy_intp stride_Y)
720 {
721     Py_BEGIN_ALLOW_THREADS
722     char *ptr_x = x, *ptr_y = y;
723     double *ptr_Z, *ptr_b;
724     double *ptr_a;
725     double *xn, *yn;
726     double a0r = ((double *) a)[0];
727     double a0i = ((double *) a)[1];
728     double a0_mag, tmpr, tmpi;
729     npy_intp n;
730     npy_uintp k;
731 
732     a0_mag = a0r * a0r + a0i * a0i;
733     for (k = 0; k < len_x; k++) {
734         ptr_b = (double *) b;   /* Reset a and b pointers */
735         ptr_a = (double *) a;
736         xn = (double *) ptr_x;
737         yn = (double *) ptr_y;
738         if (len_b > 1) {
739             ptr_Z = ((double *) Z);
740             tmpr = ptr_b[0] * a0r + ptr_b[1] * a0i;
741             tmpi = ptr_b[1] * a0r - ptr_b[0] * a0i;
742             /* Calculate first delay (output) */
743             yn[0] = ptr_Z[0] + (tmpr * xn[0] - tmpi * xn[1]) / a0_mag;
744             yn[1] = ptr_Z[1] + (tmpi * xn[0] + tmpr * xn[1]) / a0_mag;
745             ptr_b += 2;
746             ptr_a += 2;
747             /* Fill in middle delays */
748             for (n = 0; n < len_b - 2; n++) {
749                 tmpr = ptr_b[0] * a0r + ptr_b[1] * a0i;
750                 tmpi = ptr_b[1] * a0r - ptr_b[0] * a0i;
751                 ptr_Z[0] =
752                     ptr_Z[2] + (tmpr * xn[0] - tmpi * xn[1]) / a0_mag;
753                 ptr_Z[1] =
754                     ptr_Z[3] + (tmpi * xn[0] + tmpr * xn[1]) / a0_mag;
755                 tmpr = ptr_a[0] * a0r + ptr_a[1] * a0i;
756                 tmpi = ptr_a[1] * a0r - ptr_a[0] * a0i;
757                 ptr_Z[0] -= (tmpr * yn[0] - tmpi * yn[1]) / a0_mag;
758                 ptr_Z[1] -= (tmpi * yn[0] + tmpr * yn[1]) / a0_mag;
759                 ptr_b += 2;
760                 ptr_a += 2;
761                 ptr_Z += 2;
762             }
763             /* Calculate last delay */
764 
765             tmpr = ptr_b[0] * a0r + ptr_b[1] * a0i;
766             tmpi = ptr_b[1] * a0r - ptr_b[0] * a0i;
767             ptr_Z[0] = (tmpr * xn[0] - tmpi * xn[1]) / a0_mag;
768             ptr_Z[1] = (tmpi * xn[0] + tmpr * xn[1]) / a0_mag;
769             tmpr = ptr_a[0] * a0r + ptr_a[1] * a0i;
770             tmpi = ptr_a[1] * a0r - ptr_a[0] * a0i;
771             ptr_Z[0] -= (tmpr * yn[0] - tmpi * yn[1]) / a0_mag;
772             ptr_Z[1] -= (tmpi * yn[0] + tmpr * yn[1]) / a0_mag;
773         } else {
774             tmpr = ptr_b[0] * a0r + ptr_b[1] * a0i;
775             tmpi = ptr_b[1] * a0r - ptr_b[0] * a0i;
776             yn[0] = (tmpr * xn[0] - tmpi * xn[1]) / a0_mag;
777             yn[1] = (tmpi * xn[0] + tmpr * xn[1]) / a0_mag;
778         }
779 
780         ptr_y += stride_Y;      /* Move to next input/output point */
781         ptr_x += stride_X;
782 
783     }
784     Py_END_ALLOW_THREADS
785 }
EXTENDED_filt(char * b,char * a,char * x,char * y,char * Z,npy_intp len_b,npy_uintp len_x,npy_intp stride_X,npy_intp stride_Y)786 static void EXTENDED_filt(char *b, char *a, char *x, char *y, char *Z,
787                        npy_intp len_b, npy_uintp len_x, npy_intp stride_X,
788                        npy_intp stride_Y)
789 {
790     Py_BEGIN_ALLOW_THREADS
791     char *ptr_x = x, *ptr_y = y;
792     npy_longdouble *ptr_Z;
793     npy_longdouble *ptr_b = (npy_longdouble*)b;
794     npy_longdouble *ptr_a = (npy_longdouble*)a;
795     npy_longdouble *xn, *yn;
796     const npy_longdouble a0 = *((npy_longdouble *) a);
797     npy_intp n;
798     npy_uintp k;
799 
800     /* normalize the filter coefs only once. */
801     for (n = 0; n < len_b; ++n) {
802         ptr_b[n] /= a0;
803         ptr_a[n] /= a0;
804     }
805 
806     for (k = 0; k < len_x; k++) {
807         ptr_b = (npy_longdouble *) b;   /* Reset a and b pointers */
808         ptr_a = (npy_longdouble *) a;
809         xn = (npy_longdouble *) ptr_x;
810         yn = (npy_longdouble *) ptr_y;
811         if (len_b > 1) {
812             ptr_Z = ((npy_longdouble *) Z);
813             *yn = *ptr_Z + *ptr_b  * *xn;   /* Calculate first delay (output) */
814             ptr_b++;
815             ptr_a++;
816             /* Fill in middle delays */
817             for (n = 0; n < len_b - 2; n++) {
818                 *ptr_Z =
819                     ptr_Z[1] + *xn * (*ptr_b) - *yn * (*ptr_a);
820                 ptr_b++;
821                 ptr_a++;
822                 ptr_Z++;
823             }
824             /* Calculate last delay */
825             *ptr_Z = *xn * (*ptr_b) - *yn * (*ptr_a);
826         } else {
827             *yn = *xn * (*ptr_b);
828         }
829 
830         ptr_y += stride_Y;      /* Move to next input/output point */
831         ptr_x += stride_X;
832     }
833     Py_END_ALLOW_THREADS
834 }
835 
836 
CEXTENDED_filt(char * b,char * a,char * x,char * y,char * Z,npy_intp len_b,npy_uintp len_x,npy_intp stride_X,npy_intp stride_Y)837 static void CEXTENDED_filt(char *b, char *a, char *x, char *y, char *Z,
838                         npy_intp len_b, npy_uintp len_x, npy_intp stride_X,
839                         npy_intp stride_Y)
840 {
841     Py_BEGIN_ALLOW_THREADS
842     char *ptr_x = x, *ptr_y = y;
843     npy_longdouble *ptr_Z, *ptr_b;
844     npy_longdouble *ptr_a;
845     npy_longdouble *xn, *yn;
846     npy_longdouble a0r = ((npy_longdouble *) a)[0];
847     npy_longdouble a0i = ((npy_longdouble *) a)[1];
848     npy_longdouble a0_mag, tmpr, tmpi;
849     npy_intp n;
850     npy_uintp k;
851 
852     a0_mag = a0r * a0r + a0i * a0i;
853     for (k = 0; k < len_x; k++) {
854         ptr_b = (npy_longdouble *) b;   /* Reset a and b pointers */
855         ptr_a = (npy_longdouble *) a;
856         xn = (npy_longdouble *) ptr_x;
857         yn = (npy_longdouble *) ptr_y;
858         if (len_b > 1) {
859             ptr_Z = ((npy_longdouble *) Z);
860             tmpr = ptr_b[0] * a0r + ptr_b[1] * a0i;
861             tmpi = ptr_b[1] * a0r - ptr_b[0] * a0i;
862             /* Calculate first delay (output) */
863             yn[0] = ptr_Z[0] + (tmpr * xn[0] - tmpi * xn[1]) / a0_mag;
864             yn[1] = ptr_Z[1] + (tmpi * xn[0] + tmpr * xn[1]) / a0_mag;
865             ptr_b += 2;
866             ptr_a += 2;
867             /* Fill in middle delays */
868             for (n = 0; n < len_b - 2; n++) {
869                 tmpr = ptr_b[0] * a0r + ptr_b[1] * a0i;
870                 tmpi = ptr_b[1] * a0r - ptr_b[0] * a0i;
871                 ptr_Z[0] =
872                     ptr_Z[2] + (tmpr * xn[0] - tmpi * xn[1]) / a0_mag;
873                 ptr_Z[1] =
874                     ptr_Z[3] + (tmpi * xn[0] + tmpr * xn[1]) / a0_mag;
875                 tmpr = ptr_a[0] * a0r + ptr_a[1] * a0i;
876                 tmpi = ptr_a[1] * a0r - ptr_a[0] * a0i;
877                 ptr_Z[0] -= (tmpr * yn[0] - tmpi * yn[1]) / a0_mag;
878                 ptr_Z[1] -= (tmpi * yn[0] + tmpr * yn[1]) / a0_mag;
879                 ptr_b += 2;
880                 ptr_a += 2;
881                 ptr_Z += 2;
882             }
883             /* Calculate last delay */
884 
885             tmpr = ptr_b[0] * a0r + ptr_b[1] * a0i;
886             tmpi = ptr_b[1] * a0r - ptr_b[0] * a0i;
887             ptr_Z[0] = (tmpr * xn[0] - tmpi * xn[1]) / a0_mag;
888             ptr_Z[1] = (tmpi * xn[0] + tmpr * xn[1]) / a0_mag;
889             tmpr = ptr_a[0] * a0r + ptr_a[1] * a0i;
890             tmpi = ptr_a[1] * a0r - ptr_a[0] * a0i;
891             ptr_Z[0] -= (tmpr * yn[0] - tmpi * yn[1]) / a0_mag;
892             ptr_Z[1] -= (tmpi * yn[0] + tmpr * yn[1]) / a0_mag;
893         } else {
894             tmpr = ptr_b[0] * a0r + ptr_b[1] * a0i;
895             tmpi = ptr_b[1] * a0r - ptr_b[0] * a0i;
896             yn[0] = (tmpr * xn[0] - tmpi * xn[1]) / a0_mag;
897             yn[1] = (tmpi * xn[0] + tmpr * xn[1]) / a0_mag;
898         }
899 
900         ptr_y += stride_Y;      /* Move to next input/output point */
901         ptr_x += stride_X;
902 
903     }
904     Py_END_ALLOW_THREADS
905 }
906 
OBJECT_filt(char * b,char * a,char * x,char * y,char * Z,npy_intp len_b,npy_uintp len_x,npy_intp stride_X,npy_intp stride_Y)907 static void OBJECT_filt(char *b, char *a, char *x, char *y, char *Z,
908                         npy_intp len_b, npy_uintp len_x, npy_intp stride_X,
909                         npy_intp stride_Y)
910 {
911     char *ptr_x = x, *ptr_y = y;
912     PyObject **ptr_Z, **ptr_b;
913     PyObject **ptr_a;
914     PyObject **xn, **yn;
915     PyObject **a0 = (PyObject **) a;
916     PyObject *tmp1, *tmp2, *tmp3;
917     npy_intp n;
918     npy_uintp k;
919 
920     /* My reference counting might not be right */
921     for (k = 0; k < len_x; k++) {
922         ptr_b = (PyObject **) b;        /* Reset a and b pointers */
923         ptr_a = (PyObject **) a;
924         xn = (PyObject **) ptr_x;
925         yn = (PyObject **) ptr_y;
926         if (len_b > 1) {
927             ptr_Z = ((PyObject **) Z);
928             /* Calculate first delay (output) */
929             tmp1 = PyNumber_Multiply(*ptr_b, *xn);
930             if (tmp1 == NULL) return;
931             tmp2 = PyNumber_TrueDivide(tmp1, *a0);
932             if (tmp2 == NULL) {
933                 Py_DECREF(tmp1);
934                 return;
935             }
936             tmp3 = PyNumber_Add(tmp2, *ptr_Z);
937             Py_XDECREF(*yn);
938             *yn = tmp3; /* Steals the reference */
939             Py_DECREF(tmp1);
940             Py_DECREF(tmp2);
941             if (tmp3 == NULL) return;
942             ptr_b++;
943             ptr_a++;
944 
945             /* Fill in middle delays */
946             for (n = 0; n < len_b - 2; n++) {
947                 tmp1 = PyNumber_Multiply(*xn, *ptr_b);
948                 if (tmp1 == NULL) return;
949                 tmp2 = PyNumber_TrueDivide(tmp1, *a0);
950                 if (tmp2 == NULL) {
951                     Py_DECREF(tmp1);
952                     return;
953                 }
954                 tmp3 = PyNumber_Add(tmp2, ptr_Z[1]);
955                 Py_DECREF(tmp1);
956                 Py_DECREF(tmp2);
957                 if (tmp3 == NULL) return;
958                 tmp1 = PyNumber_Multiply(*yn, *ptr_a);
959                 if (tmp1 == NULL) {
960                     Py_DECREF(tmp3);
961                     return;
962                 }
963                 tmp2 = PyNumber_TrueDivide(tmp1, *a0);
964                 Py_DECREF(tmp1);
965                 if (tmp2 == NULL) {
966                     Py_DECREF(tmp3);
967                     return;
968                 }
969                 Py_XDECREF(*ptr_Z);
970                 *ptr_Z = PyNumber_Subtract(tmp3, tmp2);
971                 Py_DECREF(tmp2);
972                 Py_DECREF(tmp3);
973                 if (*ptr_Z == NULL) return;
974                 ptr_b++;
975                 ptr_a++;
976                 ptr_Z++;
977             }
978             /* Calculate last delay */
979             tmp1 = PyNumber_Multiply(*xn, *ptr_b);
980             if (tmp1 == NULL) return;
981             tmp3 = PyNumber_TrueDivide(tmp1, *a0);
982             Py_DECREF(tmp1);
983             if (tmp3 == NULL) return;
984             tmp1 = PyNumber_Multiply(*yn, *ptr_a);
985             if (tmp1 == NULL) {
986                 Py_DECREF(tmp3);
987                 return;
988             }
989             tmp2 = PyNumber_TrueDivide(tmp1, *a0);
990             Py_DECREF(tmp1);
991             if (tmp2 == NULL) {
992                 Py_DECREF(tmp3);
993                 return;
994             }
995             Py_XDECREF(*ptr_Z);
996             *ptr_Z = PyNumber_Subtract(tmp3, tmp2);
997             Py_DECREF(tmp2);
998             Py_DECREF(tmp3);
999         } else {
1000             tmp1 = PyNumber_Multiply(*xn, *ptr_b);
1001             if (tmp1 == NULL) return;
1002             Py_XDECREF(*yn);
1003             *yn = PyNumber_TrueDivide(tmp1, *a0);
1004             Py_DECREF(tmp1);
1005             if (*yn == NULL) return;
1006         }
1007 
1008         ptr_y += stride_Y;      /* Move to next input/output point */
1009         ptr_x += stride_X;
1010     }
1011 }
1012