1 /*
2  * This file implements most of the main API functions of NumPy's nditer.
3  * This excludes functions specialized using the templating system.
4  *
5  * Copyright (c) 2010-2011 by Mark Wiebe (mwwiebe@gmail.com)
6  * The University of British Columbia
7  *
8  * Copyright (c) 2011 Enthought, Inc
9  *
10  * See LICENSE.txt for the license.
11  */
12 #define NPY_NO_DEPRECATED_API NPY_API_VERSION
13 
14 /* Indicate that this .c file is allowed to include the header */
15 #define NPY_ITERATOR_IMPLEMENTATION_CODE
16 #include "nditer_impl.h"
17 #include "templ_common.h"
18 #include "ctors.h"
19 
20 /* Internal helper functions private to this file */
21 static npy_intp
22 npyiter_checkreducesize(NpyIter *iter, npy_intp count,
23                                 npy_intp *reduce_innersize,
24                                 npy_intp *reduce_outerdim);
25 
26 /*NUMPY_API
27  * Removes an axis from iteration. This requires that NPY_ITER_MULTI_INDEX
28  * was set for iterator creation, and does not work if buffering is
29  * enabled. This function also resets the iterator to its initial state.
30  *
31  * Returns NPY_SUCCEED or NPY_FAIL.
32  */
33 NPY_NO_EXPORT int
NpyIter_RemoveAxis(NpyIter * iter,int axis)34 NpyIter_RemoveAxis(NpyIter *iter, int axis)
35 {
36     npy_uint32 itflags = NIT_ITFLAGS(iter);
37     int idim, ndim = NIT_NDIM(iter);
38     int iop, nop = NIT_NOP(iter);
39 
40     int xdim = 0;
41     npy_int8 *perm = NIT_PERM(iter);
42     NpyIter_AxisData *axisdata_del = NIT_AXISDATA(iter), *axisdata;
43     npy_intp sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
44 
45     npy_intp *baseoffsets = NIT_BASEOFFSETS(iter);
46     char **resetdataptr = NIT_RESETDATAPTR(iter);
47 
48     if (!(itflags&NPY_ITFLAG_HASMULTIINDEX)) {
49         PyErr_SetString(PyExc_RuntimeError,
50                 "Iterator RemoveAxis may only be called "
51                 "if a multi-index is being tracked");
52         return NPY_FAIL;
53     }
54     else if (itflags&NPY_ITFLAG_HASINDEX) {
55         PyErr_SetString(PyExc_RuntimeError,
56                 "Iterator RemoveAxis may not be called on "
57                 "an index is being tracked");
58         return NPY_FAIL;
59     }
60     else if (itflags&NPY_ITFLAG_BUFFER) {
61         PyErr_SetString(PyExc_RuntimeError,
62                 "Iterator RemoveAxis may not be called on "
63                 "a buffered iterator");
64         return NPY_FAIL;
65     }
66     else if (axis < 0 || axis >= ndim) {
67         PyErr_SetString(PyExc_ValueError,
68                 "axis out of bounds in iterator RemoveAxis");
69         return NPY_FAIL;
70     }
71 
72     /* Reverse axis, since the iterator treats them that way */
73     axis = ndim - 1 - axis;
74 
75     /* First find the axis in question */
76     for (idim = 0; idim < ndim; ++idim) {
77         /* If this is it, and it's iterated forward, done */
78         if (perm[idim] == axis) {
79             xdim = idim;
80             break;
81         }
82         /* If this is it, but it's iterated backward, must reverse the axis */
83         else if (-1 - perm[idim] == axis) {
84             npy_intp *strides = NAD_STRIDES(axisdata_del);
85             npy_intp shape = NAD_SHAPE(axisdata_del), offset;
86 
87             xdim = idim;
88 
89             /*
90              * Adjust baseoffsets and resetbaseptr back to the start of
91              * this axis.
92              */
93             for (iop = 0; iop < nop; ++iop) {
94                 offset = (shape-1)*strides[iop];
95                 baseoffsets[iop] += offset;
96                 resetdataptr[iop] += offset;
97             }
98             break;
99         }
100 
101         NIT_ADVANCE_AXISDATA(axisdata_del, 1);
102     }
103 
104     if (idim == ndim) {
105         PyErr_SetString(PyExc_RuntimeError,
106                 "internal error in iterator perm");
107         return NPY_FAIL;
108     }
109 
110     /* Adjust the permutation */
111     for (idim = 0; idim < ndim-1; ++idim) {
112         npy_int8 p = (idim < xdim) ? perm[idim] : perm[idim+1];
113         if (p >= 0) {
114             if (p > axis) {
115                 --p;
116             }
117         }
118         else if (p <= 0) {
119             if (p < -1-axis) {
120                 ++p;
121             }
122         }
123         perm[idim] = p;
124     }
125 
126     /* Shift all the axisdata structures by one */
127     axisdata = NIT_INDEX_AXISDATA(axisdata_del, 1);
128     memmove(axisdata_del, axisdata, (ndim-1-xdim)*sizeof_axisdata);
129 
130     /* Adjust the iteration size and reset iterend */
131     NIT_ITERSIZE(iter) = 1;
132     axisdata = NIT_AXISDATA(iter);
133     for (idim = 0; idim < ndim-1; ++idim) {
134         if (npy_mul_with_overflow_intp(&NIT_ITERSIZE(iter),
135                     NIT_ITERSIZE(iter), NAD_SHAPE(axisdata))) {
136             NIT_ITERSIZE(iter) = -1;
137             break;
138         }
139         NIT_ADVANCE_AXISDATA(axisdata, 1);
140     }
141     NIT_ITEREND(iter) = NIT_ITERSIZE(iter);
142 
143     /* Shrink the iterator */
144     NIT_NDIM(iter) = ndim - 1;
145     /* If it is now 0-d fill the singleton dimension */
146     if (ndim == 1) {
147         npy_intp *strides = NAD_STRIDES(axisdata_del);
148         NAD_SHAPE(axisdata_del) = 1;
149         for (iop = 0; iop < nop; ++iop) {
150             strides[iop] = 0;
151         }
152         NIT_ITFLAGS(iter) |= NPY_ITFLAG_ONEITERATION;
153     }
154 
155     return NpyIter_Reset(iter, NULL);
156 }
157 
158 /*NUMPY_API
159  * Removes multi-index support from an iterator.
160  *
161  * Returns NPY_SUCCEED or NPY_FAIL.
162  */
163 NPY_NO_EXPORT int
NpyIter_RemoveMultiIndex(NpyIter * iter)164 NpyIter_RemoveMultiIndex(NpyIter *iter)
165 {
166     npy_uint32 itflags;
167 
168     /* Make sure the iterator is reset */
169     if (NpyIter_Reset(iter, NULL) != NPY_SUCCEED) {
170         return NPY_FAIL;
171     }
172 
173     itflags = NIT_ITFLAGS(iter);
174     if (itflags&NPY_ITFLAG_HASMULTIINDEX) {
175         if (NIT_ITERSIZE(iter) < 0) {
176             PyErr_SetString(PyExc_ValueError, "iterator is too large");
177             return NPY_FAIL;
178         }
179 
180         NIT_ITFLAGS(iter) = itflags & ~NPY_ITFLAG_HASMULTIINDEX;
181         npyiter_coalesce_axes(iter);
182     }
183 
184     return NPY_SUCCEED;
185 }
186 
187 /*NUMPY_API
188  * Removes the inner loop handling (so HasExternalLoop returns true)
189  */
190 NPY_NO_EXPORT int
NpyIter_EnableExternalLoop(NpyIter * iter)191 NpyIter_EnableExternalLoop(NpyIter *iter)
192 {
193     npy_uint32 itflags = NIT_ITFLAGS(iter);
194     /*int ndim = NIT_NDIM(iter);*/
195     int nop = NIT_NOP(iter);
196 
197     /* Check conditions under which this can be done */
198     if (itflags&(NPY_ITFLAG_HASINDEX|NPY_ITFLAG_HASMULTIINDEX)) {
199         PyErr_SetString(PyExc_ValueError,
200                 "Iterator flag EXTERNAL_LOOP cannot be used "
201                 "if an index or multi-index is being tracked");
202         return NPY_FAIL;
203     }
204     if ((itflags&(NPY_ITFLAG_BUFFER|NPY_ITFLAG_RANGE|NPY_ITFLAG_EXLOOP))
205                         == (NPY_ITFLAG_RANGE|NPY_ITFLAG_EXLOOP)) {
206         PyErr_SetString(PyExc_ValueError,
207                 "Iterator flag EXTERNAL_LOOP cannot be used "
208                 "with ranged iteration unless buffering is also enabled");
209         return NPY_FAIL;
210     }
211     /* Set the flag */
212     if (!(itflags&NPY_ITFLAG_EXLOOP)) {
213         itflags |= NPY_ITFLAG_EXLOOP;
214         NIT_ITFLAGS(iter) = itflags;
215 
216         /*
217          * Check whether we can apply the single iteration
218          * optimization to the iternext function.
219          */
220         if (!(itflags&NPY_ITFLAG_BUFFER)) {
221             NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
222             if (NIT_ITERSIZE(iter) == NAD_SHAPE(axisdata)) {
223                 NIT_ITFLAGS(iter) |= NPY_ITFLAG_ONEITERATION;
224             }
225         }
226     }
227 
228     /* Reset the iterator */
229     return NpyIter_Reset(iter, NULL);
230 }
231 
232 
233 static char *_reset_cast_error = (
234         "Iterator reset failed due to a casting failure. "
235         "This error is set as a Python error.");
236 
237 /*NUMPY_API
238  * Resets the iterator to its initial state
239  *
240  * The use of errmsg is discouraged, it cannot be guaranteed that the GIL
241  * will not be grabbed on casting errors even when this is passed.
242  *
243  * If errmsg is non-NULL, it should point to a variable which will
244  * receive the error message, and no Python exception will be set.
245  * This is so that the function can be called from code not holding
246  * the GIL. Note that cast errors may still lead to the GIL being
247  * grabbed temporarily.
248  */
249 NPY_NO_EXPORT int
NpyIter_Reset(NpyIter * iter,char ** errmsg)250 NpyIter_Reset(NpyIter *iter, char **errmsg)
251 {
252     npy_uint32 itflags = NIT_ITFLAGS(iter);
253     /*int ndim = NIT_NDIM(iter);*/
254     int nop = NIT_NOP(iter);
255 
256     if (itflags&NPY_ITFLAG_BUFFER) {
257         NpyIter_BufferData *bufferdata;
258 
259         /* If buffer allocation was delayed, do it now */
260         if (itflags&NPY_ITFLAG_DELAYBUF) {
261             if (!npyiter_allocate_buffers(iter, errmsg)) {
262                 if (errmsg != NULL) {
263                     *errmsg = _reset_cast_error;
264                 }
265                 return NPY_FAIL;
266             }
267             NIT_ITFLAGS(iter) &= ~NPY_ITFLAG_DELAYBUF;
268         }
269         else {
270             /*
271              * If the iterindex is already right, no need to
272              * do anything (and no cast error has previously occurred).
273              */
274             bufferdata = NIT_BUFFERDATA(iter);
275             if (NIT_ITERINDEX(iter) == NIT_ITERSTART(iter) &&
276                     NBF_BUFITEREND(bufferdata) <= NIT_ITEREND(iter) &&
277                     NBF_SIZE(bufferdata) > 0) {
278                 return NPY_SUCCEED;
279             }
280             if (npyiter_copy_from_buffers(iter) < 0) {
281                 if (errmsg != NULL) {
282                     *errmsg = _reset_cast_error;
283                 }
284                 return NPY_FAIL;
285             }
286         }
287     }
288 
289     npyiter_goto_iterindex(iter, NIT_ITERSTART(iter));
290 
291     if (itflags&NPY_ITFLAG_BUFFER) {
292         /* Prepare the next buffers and set iterend/size */
293         if (npyiter_copy_to_buffers(iter, NULL) < 0) {
294             if (errmsg != NULL) {
295                 *errmsg = _reset_cast_error;
296             }
297             return NPY_FAIL;
298         }
299     }
300 
301     return NPY_SUCCEED;
302 }
303 
304 /*NUMPY_API
305  * Resets the iterator to its initial state, with new base data pointers.
306  * This function requires great caution.
307  *
308  * If errmsg is non-NULL, it should point to a variable which will
309  * receive the error message, and no Python exception will be set.
310  * This is so that the function can be called from code not holding
311  * the GIL. Note that cast errors may still lead to the GIL being
312  * grabbed temporarily.
313  */
314 NPY_NO_EXPORT int
NpyIter_ResetBasePointers(NpyIter * iter,char ** baseptrs,char ** errmsg)315 NpyIter_ResetBasePointers(NpyIter *iter, char **baseptrs, char **errmsg)
316 {
317     npy_uint32 itflags = NIT_ITFLAGS(iter);
318     /*int ndim = NIT_NDIM(iter);*/
319     int iop, nop = NIT_NOP(iter);
320 
321     char **resetdataptr = NIT_RESETDATAPTR(iter);
322     npy_intp *baseoffsets = NIT_BASEOFFSETS(iter);
323 
324     if (itflags&NPY_ITFLAG_BUFFER) {
325         /* If buffer allocation was delayed, do it now */
326         if (itflags&NPY_ITFLAG_DELAYBUF) {
327             if (!npyiter_allocate_buffers(iter, errmsg)) {
328                 return NPY_FAIL;
329             }
330             NIT_ITFLAGS(iter) &= ~NPY_ITFLAG_DELAYBUF;
331         }
332         else {
333             if (npyiter_copy_from_buffers(iter) < 0) {
334                 if (errmsg != NULL) {
335                     *errmsg = _reset_cast_error;
336                 }
337                 return NPY_FAIL;
338             }
339         }
340     }
341 
342     /* The new data pointers for resetting */
343     for (iop = 0; iop < nop; ++iop) {
344         resetdataptr[iop] = baseptrs[iop] + baseoffsets[iop];
345     }
346 
347     npyiter_goto_iterindex(iter, NIT_ITERSTART(iter));
348 
349     if (itflags&NPY_ITFLAG_BUFFER) {
350         /* Prepare the next buffers and set iterend/size */
351         if (npyiter_copy_to_buffers(iter, NULL) < 0) {
352             if (errmsg != NULL) {
353                 *errmsg = _reset_cast_error;
354             }
355             return NPY_FAIL;
356         }
357     }
358 
359     return NPY_SUCCEED;
360 }
361 
362 /*NUMPY_API
363  * Resets the iterator to a new iterator index range
364  *
365  * If errmsg is non-NULL, it should point to a variable which will
366  * receive the error message, and no Python exception will be set.
367  * This is so that the function can be called from code not holding
368  * the GIL. Note that cast errors may still lead to the GIL being
369  * grabbed temporarily.
370  */
371 NPY_NO_EXPORT int
NpyIter_ResetToIterIndexRange(NpyIter * iter,npy_intp istart,npy_intp iend,char ** errmsg)372 NpyIter_ResetToIterIndexRange(NpyIter *iter,
373                               npy_intp istart, npy_intp iend, char **errmsg)
374 {
375     npy_uint32 itflags = NIT_ITFLAGS(iter);
376     /*int ndim = NIT_NDIM(iter);*/
377     /*int nop = NIT_NOP(iter);*/
378 
379     if (!(itflags&NPY_ITFLAG_RANGE)) {
380         if (errmsg == NULL) {
381             PyErr_SetString(PyExc_ValueError,
382                     "Cannot call ResetToIterIndexRange on an iterator without "
383                     "requesting ranged iteration support in the constructor");
384         }
385         else {
386             *errmsg = "Cannot call ResetToIterIndexRange on an iterator "
387                       "without requesting ranged iteration support in the "
388                     "constructor";
389         }
390         return NPY_FAIL;
391     }
392 
393     if (istart < 0 || iend > NIT_ITERSIZE(iter)) {
394         if (NIT_ITERSIZE(iter) < 0) {
395             if (errmsg == NULL) {
396                 PyErr_SetString(PyExc_ValueError, "iterator is too large");
397             }
398             else {
399                 *errmsg = "iterator is too large";
400             }
401             return NPY_FAIL;
402         }
403         if (errmsg == NULL) {
404             PyErr_Format(PyExc_ValueError,
405                     "Out-of-bounds range [%" NPY_INTP_FMT ", %" NPY_INTP_FMT ") passed to "
406                     "ResetToIterIndexRange", istart, iend);
407         }
408         else {
409             *errmsg = "Out-of-bounds range passed to ResetToIterIndexRange";
410         }
411         return NPY_FAIL;
412     }
413     else if (iend < istart) {
414         if (errmsg == NULL) {
415             PyErr_Format(PyExc_ValueError,
416                     "Invalid range [%" NPY_INTP_FMT ", %" NPY_INTP_FMT ") passed to ResetToIterIndexRange",
417                     istart, iend);
418         }
419         else {
420             *errmsg = "Invalid range passed to ResetToIterIndexRange";
421         }
422         return NPY_FAIL;
423     }
424 
425     NIT_ITERSTART(iter) = istart;
426     NIT_ITEREND(iter) = iend;
427 
428     return NpyIter_Reset(iter, errmsg);
429 }
430 
431 /*NUMPY_API
432  * Sets the iterator to the specified multi-index, which must have the
433  * correct number of entries for 'ndim'.  It is only valid
434  * when NPY_ITER_MULTI_INDEX was passed to the constructor.  This operation
435  * fails if the multi-index is out of bounds.
436  *
437  * Returns NPY_SUCCEED on success, NPY_FAIL on failure.
438  */
439 NPY_NO_EXPORT int
NpyIter_GotoMultiIndex(NpyIter * iter,npy_intp const * multi_index)440 NpyIter_GotoMultiIndex(NpyIter *iter, npy_intp const *multi_index)
441 {
442     npy_uint32 itflags = NIT_ITFLAGS(iter);
443     int idim, ndim = NIT_NDIM(iter);
444     int nop = NIT_NOP(iter);
445 
446     npy_intp iterindex, factor;
447     NpyIter_AxisData *axisdata;
448     npy_intp sizeof_axisdata;
449     npy_int8 *perm;
450 
451     if (!(itflags&NPY_ITFLAG_HASMULTIINDEX)) {
452         PyErr_SetString(PyExc_ValueError,
453                 "Cannot call GotoMultiIndex on an iterator without "
454                 "requesting a multi-index in the constructor");
455         return NPY_FAIL;
456     }
457 
458     if (itflags&NPY_ITFLAG_BUFFER) {
459         PyErr_SetString(PyExc_ValueError,
460                 "Cannot call GotoMultiIndex on an iterator which "
461                 "is buffered");
462         return NPY_FAIL;
463     }
464 
465     if (itflags&NPY_ITFLAG_EXLOOP) {
466         PyErr_SetString(PyExc_ValueError,
467                 "Cannot call GotoMultiIndex on an iterator which "
468                 "has the flag EXTERNAL_LOOP");
469         return NPY_FAIL;
470     }
471 
472     perm = NIT_PERM(iter);
473     axisdata = NIT_AXISDATA(iter);
474     sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
475 
476     /* Compute the iterindex corresponding to the multi-index */
477     iterindex = 0;
478     factor = 1;
479     for (idim = 0; idim < ndim; ++idim) {
480         npy_int8 p = perm[idim];
481         npy_intp i, shape;
482 
483         shape = NAD_SHAPE(axisdata);
484         if (p < 0) {
485             /* If the perm entry is negative, reverse the index */
486             i = shape - multi_index[ndim+p] - 1;
487         }
488         else {
489             i = multi_index[ndim-p-1];
490         }
491 
492         /* Bounds-check this index */
493         if (i >= 0 && i < shape) {
494             iterindex += factor * i;
495             factor *= shape;
496         }
497         else {
498             PyErr_SetString(PyExc_IndexError,
499                     "Iterator GotoMultiIndex called with an out-of-bounds "
500                     "multi-index");
501             return NPY_FAIL;
502         }
503 
504         NIT_ADVANCE_AXISDATA(axisdata, 1);
505     }
506 
507     if (iterindex < NIT_ITERSTART(iter) || iterindex >= NIT_ITEREND(iter)) {
508         if (NIT_ITERSIZE(iter) < 0) {
509             PyErr_SetString(PyExc_ValueError, "iterator is too large");
510             return NPY_FAIL;
511         }
512         PyErr_SetString(PyExc_IndexError,
513                 "Iterator GotoMultiIndex called with a multi-index outside the "
514                 "restricted iteration range");
515         return NPY_FAIL;
516     }
517 
518     npyiter_goto_iterindex(iter, iterindex);
519 
520     return NPY_SUCCEED;
521 }
522 
523 /*NUMPY_API
524  * If the iterator is tracking an index, sets the iterator
525  * to the specified index.
526  *
527  * Returns NPY_SUCCEED on success, NPY_FAIL on failure.
528  */
529 NPY_NO_EXPORT int
NpyIter_GotoIndex(NpyIter * iter,npy_intp flat_index)530 NpyIter_GotoIndex(NpyIter *iter, npy_intp flat_index)
531 {
532     npy_uint32 itflags = NIT_ITFLAGS(iter);
533     int idim, ndim = NIT_NDIM(iter);
534     int nop = NIT_NOP(iter);
535 
536     npy_intp iterindex, factor;
537     NpyIter_AxisData *axisdata;
538     npy_intp sizeof_axisdata;
539 
540     if (!(itflags&NPY_ITFLAG_HASINDEX)) {
541         PyErr_SetString(PyExc_ValueError,
542                 "Cannot call GotoIndex on an iterator without "
543                 "requesting a C or Fortran index in the constructor");
544         return NPY_FAIL;
545     }
546 
547     if (itflags&NPY_ITFLAG_BUFFER) {
548         PyErr_SetString(PyExc_ValueError,
549                 "Cannot call GotoIndex on an iterator which "
550                 "is buffered");
551         return NPY_FAIL;
552     }
553 
554     if (itflags&NPY_ITFLAG_EXLOOP) {
555         PyErr_SetString(PyExc_ValueError,
556                 "Cannot call GotoIndex on an iterator which "
557                 "has the flag EXTERNAL_LOOP");
558         return NPY_FAIL;
559     }
560 
561     if (flat_index < 0 || flat_index >= NIT_ITERSIZE(iter)) {
562         PyErr_SetString(PyExc_IndexError,
563                 "Iterator GotoIndex called with an out-of-bounds "
564                 "index");
565         return NPY_FAIL;
566     }
567 
568     axisdata = NIT_AXISDATA(iter);
569     sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
570 
571     /* Compute the iterindex corresponding to the flat_index */
572     iterindex = 0;
573     factor = 1;
574     for (idim = 0; idim < ndim; ++idim) {
575         npy_intp i, shape, iterstride;
576 
577         iterstride = NAD_STRIDES(axisdata)[nop];
578         shape = NAD_SHAPE(axisdata);
579 
580         /* Extract the index from the flat_index */
581         if (iterstride == 0) {
582             i = 0;
583         }
584         else if (iterstride < 0) {
585             i = shape - (flat_index/(-iterstride))%shape - 1;
586         }
587         else {
588             i = (flat_index/iterstride)%shape;
589         }
590 
591         /* Add its contribution to iterindex */
592         iterindex += factor * i;
593         factor *= shape;
594 
595         NIT_ADVANCE_AXISDATA(axisdata, 1);
596     }
597 
598 
599     if (iterindex < NIT_ITERSTART(iter) || iterindex >= NIT_ITEREND(iter)) {
600         PyErr_SetString(PyExc_IndexError,
601                 "Iterator GotoIndex called with an index outside the "
602                 "restricted iteration range.");
603         return NPY_FAIL;
604     }
605 
606     npyiter_goto_iterindex(iter, iterindex);
607 
608     return NPY_SUCCEED;
609 }
610 
611 /*NUMPY_API
612  * Sets the iterator position to the specified iterindex,
613  * which matches the iteration order of the iterator.
614  *
615  * Returns NPY_SUCCEED on success, NPY_FAIL on failure.
616  */
617 NPY_NO_EXPORT int
NpyIter_GotoIterIndex(NpyIter * iter,npy_intp iterindex)618 NpyIter_GotoIterIndex(NpyIter *iter, npy_intp iterindex)
619 {
620     npy_uint32 itflags = NIT_ITFLAGS(iter);
621     /*int ndim = NIT_NDIM(iter);*/
622     int iop, nop = NIT_NOP(iter);
623 
624     if (itflags&NPY_ITFLAG_EXLOOP) {
625         PyErr_SetString(PyExc_ValueError,
626                 "Cannot call GotoIterIndex on an iterator which "
627                 "has the flag EXTERNAL_LOOP");
628         return NPY_FAIL;
629     }
630 
631     if (iterindex < NIT_ITERSTART(iter) || iterindex >= NIT_ITEREND(iter)) {
632         if (NIT_ITERSIZE(iter) < 0) {
633             PyErr_SetString(PyExc_ValueError, "iterator is too large");
634             return NPY_FAIL;
635         }
636         PyErr_SetString(PyExc_IndexError,
637                 "Iterator GotoIterIndex called with an iterindex outside the "
638                 "iteration range.");
639         return NPY_FAIL;
640     }
641 
642     if (itflags&NPY_ITFLAG_BUFFER) {
643         NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter);
644         npy_intp bufiterend, size;
645 
646         size = NBF_SIZE(bufferdata);
647         bufiterend = NBF_BUFITEREND(bufferdata);
648         /* Check if the new iterindex is already within the buffer */
649         if (!(itflags&NPY_ITFLAG_REDUCE) && iterindex < bufiterend &&
650                                         iterindex >= bufiterend - size) {
651             npy_intp *strides, delta;
652             char **ptrs;
653 
654             strides = NBF_STRIDES(bufferdata);
655             ptrs = NBF_PTRS(bufferdata);
656             delta = iterindex - NIT_ITERINDEX(iter);
657 
658             for (iop = 0; iop < nop; ++iop) {
659                 ptrs[iop] += delta * strides[iop];
660             }
661 
662             NIT_ITERINDEX(iter) = iterindex;
663         }
664         /* Start the buffer at the provided iterindex */
665         else {
666             /* Write back to the arrays */
667             if (npyiter_copy_from_buffers(iter) < 0) {
668                 return NPY_FAIL;
669             }
670 
671             npyiter_goto_iterindex(iter, iterindex);
672 
673             /* Prepare the next buffers and set iterend/size */
674             if (npyiter_copy_to_buffers(iter, NULL) < 0) {
675                 return NPY_FAIL;
676             }
677         }
678     }
679     else {
680         npyiter_goto_iterindex(iter, iterindex);
681     }
682 
683     return NPY_SUCCEED;
684 }
685 
686 /*NUMPY_API
687  * Gets the current iteration index
688  */
689 NPY_NO_EXPORT npy_intp
NpyIter_GetIterIndex(NpyIter * iter)690 NpyIter_GetIterIndex(NpyIter *iter)
691 {
692     npy_uint32 itflags = NIT_ITFLAGS(iter);
693     int idim, ndim = NIT_NDIM(iter);
694     int nop = NIT_NOP(iter);
695 
696     /* iterindex is only used if NPY_ITER_RANGED or NPY_ITER_BUFFERED was set */
697     if (itflags&(NPY_ITFLAG_RANGE|NPY_ITFLAG_BUFFER)) {
698         return NIT_ITERINDEX(iter);
699     }
700     else {
701         npy_intp iterindex;
702         NpyIter_AxisData *axisdata;
703         npy_intp sizeof_axisdata;
704 
705         iterindex = 0;
706         if (ndim == 0) {
707             return 0;
708         }
709         sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
710         axisdata = NIT_INDEX_AXISDATA(NIT_AXISDATA(iter), ndim-1);
711 
712         for (idim = ndim-2; idim >= 0; --idim) {
713             iterindex += NAD_INDEX(axisdata);
714             NIT_ADVANCE_AXISDATA(axisdata, -1);
715             iterindex *= NAD_SHAPE(axisdata);
716         }
717         iterindex += NAD_INDEX(axisdata);
718 
719         return iterindex;
720     }
721 }
722 
723 /*NUMPY_API
724  * Whether the buffer allocation is being delayed
725  */
726 NPY_NO_EXPORT npy_bool
NpyIter_HasDelayedBufAlloc(NpyIter * iter)727 NpyIter_HasDelayedBufAlloc(NpyIter *iter)
728 {
729     return (NIT_ITFLAGS(iter)&NPY_ITFLAG_DELAYBUF) != 0;
730 }
731 
732 /*NUMPY_API
733  * Whether the iterator handles the inner loop
734  */
735 NPY_NO_EXPORT npy_bool
NpyIter_HasExternalLoop(NpyIter * iter)736 NpyIter_HasExternalLoop(NpyIter *iter)
737 {
738     return (NIT_ITFLAGS(iter)&NPY_ITFLAG_EXLOOP) != 0;
739 }
740 
741 /*NUMPY_API
742  * Whether the iterator is tracking a multi-index
743  */
744 NPY_NO_EXPORT npy_bool
NpyIter_HasMultiIndex(NpyIter * iter)745 NpyIter_HasMultiIndex(NpyIter *iter)
746 {
747     return (NIT_ITFLAGS(iter)&NPY_ITFLAG_HASMULTIINDEX) != 0;
748 }
749 
750 /*NUMPY_API
751  * Whether the iterator is tracking an index
752  */
753 NPY_NO_EXPORT npy_bool
NpyIter_HasIndex(NpyIter * iter)754 NpyIter_HasIndex(NpyIter *iter)
755 {
756     return (NIT_ITFLAGS(iter)&NPY_ITFLAG_HASINDEX) != 0;
757 }
758 
759 /*NUMPY_API
760  * Checks to see whether this is the first time the elements
761  * of the specified reduction operand which the iterator points at are
762  * being seen for the first time. The function returns
763  * a reasonable answer for reduction operands and when buffering is
764  * disabled. The answer may be incorrect for buffered non-reduction
765  * operands.
766  *
767  * This function is intended to be used in EXTERNAL_LOOP mode only,
768  * and will produce some wrong answers when that mode is not enabled.
769  *
770  * If this function returns true, the caller should also
771  * check the inner loop stride of the operand, because if
772  * that stride is 0, then only the first element of the innermost
773  * external loop is being visited for the first time.
774  *
775  * WARNING: For performance reasons, 'iop' is not bounds-checked,
776  *          it is not confirmed that 'iop' is actually a reduction
777  *          operand, and it is not confirmed that EXTERNAL_LOOP
778  *          mode is enabled. These checks are the responsibility of
779  *          the caller, and should be done outside of any inner loops.
780  */
781 NPY_NO_EXPORT npy_bool
NpyIter_IsFirstVisit(NpyIter * iter,int iop)782 NpyIter_IsFirstVisit(NpyIter *iter, int iop)
783 {
784     npy_uint32 itflags = NIT_ITFLAGS(iter);
785     int idim, ndim = NIT_NDIM(iter);
786     int nop = NIT_NOP(iter);
787 
788     NpyIter_AxisData *axisdata;
789     npy_intp sizeof_axisdata;
790 
791     sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
792     axisdata = NIT_AXISDATA(iter);
793 
794     for (idim = 0; idim < ndim; ++idim) {
795         npy_intp coord = NAD_INDEX(axisdata);
796         npy_intp stride = NAD_STRIDES(axisdata)[iop];
797 
798         /*
799          * If this is a reduction dimension and the coordinate
800          * is not at the start, it's definitely not the first visit
801          */
802         if (stride == 0 && coord != 0) {
803             return 0;
804         }
805 
806         NIT_ADVANCE_AXISDATA(axisdata, 1);
807     }
808 
809     /*
810      * In reduction buffering mode, there's a double loop being
811      * tracked in the buffer part of the iterator data structure.
812      * We only need to check the outer level of this two-level loop,
813      * because of the requirement that EXTERNAL_LOOP be enabled.
814      */
815     if (itflags&NPY_ITFLAG_BUFFER) {
816         NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter);
817         /* The outer reduce loop */
818         if (NBF_REDUCE_POS(bufferdata) != 0 &&
819                 NBF_REDUCE_OUTERSTRIDES(bufferdata)[iop] == 0) {
820             return 0;
821         }
822     }
823 
824     return 1;
825 }
826 
827 /*NUMPY_API
828  * Whether the iteration could be done with no buffering.
829  */
830 NPY_NO_EXPORT npy_bool
NpyIter_RequiresBuffering(NpyIter * iter)831 NpyIter_RequiresBuffering(NpyIter *iter)
832 {
833     npy_uint32 itflags = NIT_ITFLAGS(iter);
834     /*int ndim = NIT_NDIM(iter);*/
835     int iop, nop = NIT_NOP(iter);
836 
837     npyiter_opitflags *op_itflags;
838 
839     if (!(itflags&NPY_ITFLAG_BUFFER)) {
840         return 0;
841     }
842 
843     op_itflags = NIT_OPITFLAGS(iter);
844 
845     /* If any operand requires a cast, buffering is mandatory */
846     for (iop = 0; iop < nop; ++iop) {
847         if (op_itflags[iop]&NPY_OP_ITFLAG_CAST) {
848             return 1;
849         }
850     }
851 
852     return 0;
853 }
854 
855 /*NUMPY_API
856  * Whether the iteration loop, and in particular the iternext()
857  * function, needs API access.  If this is true, the GIL must
858  * be retained while iterating.
859  */
860 NPY_NO_EXPORT npy_bool
NpyIter_IterationNeedsAPI(NpyIter * iter)861 NpyIter_IterationNeedsAPI(NpyIter *iter)
862 {
863     return (NIT_ITFLAGS(iter)&NPY_ITFLAG_NEEDSAPI) != 0;
864 }
865 
866 /*NUMPY_API
867  * Gets the number of dimensions being iterated
868  */
869 NPY_NO_EXPORT int
NpyIter_GetNDim(NpyIter * iter)870 NpyIter_GetNDim(NpyIter *iter)
871 {
872     return NIT_NDIM(iter);
873 }
874 
875 /*NUMPY_API
876  * Gets the number of operands being iterated
877  */
878 NPY_NO_EXPORT int
NpyIter_GetNOp(NpyIter * iter)879 NpyIter_GetNOp(NpyIter *iter)
880 {
881     return NIT_NOP(iter);
882 }
883 
884 /*NUMPY_API
885  * Gets the number of elements being iterated
886  */
887 NPY_NO_EXPORT npy_intp
NpyIter_GetIterSize(NpyIter * iter)888 NpyIter_GetIterSize(NpyIter *iter)
889 {
890     return NIT_ITERSIZE(iter);
891 }
892 
893 /*NUMPY_API
894  * Whether the iterator is buffered
895  */
896 NPY_NO_EXPORT npy_bool
NpyIter_IsBuffered(NpyIter * iter)897 NpyIter_IsBuffered(NpyIter *iter)
898 {
899     return (NIT_ITFLAGS(iter)&NPY_ITFLAG_BUFFER) != 0;
900 }
901 
902 /*NUMPY_API
903  * Whether the inner loop can grow if buffering is unneeded
904  */
905 NPY_NO_EXPORT npy_bool
NpyIter_IsGrowInner(NpyIter * iter)906 NpyIter_IsGrowInner(NpyIter *iter)
907 {
908     return (NIT_ITFLAGS(iter)&NPY_ITFLAG_GROWINNER) != 0;
909 }
910 
911 /*NUMPY_API
912  * Gets the size of the buffer, or 0 if buffering is not enabled
913  */
914 NPY_NO_EXPORT npy_intp
NpyIter_GetBufferSize(NpyIter * iter)915 NpyIter_GetBufferSize(NpyIter *iter)
916 {
917     npy_uint32 itflags = NIT_ITFLAGS(iter);
918     /*int ndim = NIT_NDIM(iter);*/
919     int nop = NIT_NOP(iter);
920 
921     if (itflags&NPY_ITFLAG_BUFFER) {
922         NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter);
923         return NBF_BUFFERSIZE(bufferdata);
924     }
925     else {
926         return 0;
927     }
928 
929 }
930 
931 /*NUMPY_API
932  * Gets the range of iteration indices being iterated
933  */
934 NPY_NO_EXPORT void
NpyIter_GetIterIndexRange(NpyIter * iter,npy_intp * istart,npy_intp * iend)935 NpyIter_GetIterIndexRange(NpyIter *iter,
936                           npy_intp *istart, npy_intp *iend)
937 {
938     *istart = NIT_ITERSTART(iter);
939     *iend = NIT_ITEREND(iter);
940 }
941 
942 /*NUMPY_API
943  * Gets the broadcast shape if a multi-index is being tracked by the iterator,
944  * otherwise gets the shape of the iteration as Fortran-order
945  * (fastest-changing index first).
946  *
947  * The reason Fortran-order is returned when a multi-index
948  * is not enabled is that this is providing a direct view into how
949  * the iterator traverses the n-dimensional space. The iterator organizes
950  * its memory from fastest index to slowest index, and when
951  * a multi-index is enabled, it uses a permutation to recover the original
952  * order.
953  *
954  * Returns NPY_SUCCEED or NPY_FAIL.
955  */
956 NPY_NO_EXPORT int
NpyIter_GetShape(NpyIter * iter,npy_intp * outshape)957 NpyIter_GetShape(NpyIter *iter, npy_intp *outshape)
958 {
959     npy_uint32 itflags = NIT_ITFLAGS(iter);
960     int ndim = NIT_NDIM(iter);
961     int nop = NIT_NOP(iter);
962 
963     int idim, sizeof_axisdata;
964     NpyIter_AxisData *axisdata;
965     npy_int8 *perm;
966 
967     axisdata = NIT_AXISDATA(iter);
968     sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
969 
970     if (itflags&NPY_ITFLAG_HASMULTIINDEX) {
971         perm = NIT_PERM(iter);
972         for(idim = 0; idim < ndim; ++idim) {
973             int axis = npyiter_undo_iter_axis_perm(idim, ndim, perm, NULL);
974             outshape[axis] = NAD_SHAPE(axisdata);
975 
976             NIT_ADVANCE_AXISDATA(axisdata, 1);
977         }
978     }
979     else {
980         for(idim = 0; idim < ndim; ++idim) {
981             outshape[idim] = NAD_SHAPE(axisdata);
982             NIT_ADVANCE_AXISDATA(axisdata, 1);
983         }
984     }
985 
986     return NPY_SUCCEED;
987 }
988 
989 /*NUMPY_API
990  * Builds a set of strides which are the same as the strides of an
991  * output array created using the NPY_ITER_ALLOCATE flag, where NULL
992  * was passed for op_axes.  This is for data packed contiguously,
993  * but not necessarily in C or Fortran order. This should be used
994  * together with NpyIter_GetShape and NpyIter_GetNDim.
995  *
996  * A use case for this function is to match the shape and layout of
997  * the iterator and tack on one or more dimensions.  For example,
998  * in order to generate a vector per input value for a numerical gradient,
999  * you pass in ndim*itemsize for itemsize, then add another dimension to
1000  * the end with size ndim and stride itemsize.  To do the Hessian matrix,
1001  * you do the same thing but add two dimensions, or take advantage of
1002  * the symmetry and pack it into 1 dimension with a particular encoding.
1003  *
1004  * This function may only be called if the iterator is tracking a multi-index
1005  * and if NPY_ITER_DONT_NEGATE_STRIDES was used to prevent an axis from
1006  * being iterated in reverse order.
1007  *
1008  * If an array is created with this method, simply adding 'itemsize'
1009  * for each iteration will traverse the new array matching the
1010  * iterator.
1011  *
1012  * Returns NPY_SUCCEED or NPY_FAIL.
1013  */
1014 NPY_NO_EXPORT int
NpyIter_CreateCompatibleStrides(NpyIter * iter,npy_intp itemsize,npy_intp * outstrides)1015 NpyIter_CreateCompatibleStrides(NpyIter *iter,
1016                             npy_intp itemsize, npy_intp *outstrides)
1017 {
1018     npy_uint32 itflags = NIT_ITFLAGS(iter);
1019     int idim, ndim = NIT_NDIM(iter);
1020     int nop = NIT_NOP(iter);
1021 
1022     npy_intp sizeof_axisdata;
1023     NpyIter_AxisData *axisdata;
1024     npy_int8 *perm;
1025 
1026     if (!(itflags&NPY_ITFLAG_HASMULTIINDEX)) {
1027         PyErr_SetString(PyExc_RuntimeError,
1028                 "Iterator CreateCompatibleStrides may only be called "
1029                 "if a multi-index is being tracked");
1030         return NPY_FAIL;
1031     }
1032 
1033     axisdata = NIT_AXISDATA(iter);
1034     sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
1035 
1036     perm = NIT_PERM(iter);
1037     for(idim = 0; idim < ndim; ++idim) {
1038         npy_bool flipped;
1039         npy_int8 axis = npyiter_undo_iter_axis_perm(idim, ndim, perm, &flipped);
1040         if (flipped) {
1041             PyErr_SetString(PyExc_RuntimeError,
1042                     "Iterator CreateCompatibleStrides may only be called "
1043                     "if DONT_NEGATE_STRIDES was used to prevent reverse "
1044                     "iteration of an axis");
1045             return NPY_FAIL;
1046         }
1047         else {
1048             outstrides[axis] = itemsize;
1049         }
1050 
1051         itemsize *= NAD_SHAPE(axisdata);
1052         NIT_ADVANCE_AXISDATA(axisdata, 1);
1053     }
1054 
1055     return NPY_SUCCEED;
1056 }
1057 
1058 /*NUMPY_API
1059  * Get the array of data pointers (1 per object being iterated)
1060  *
1061  * This function may be safely called without holding the Python GIL.
1062  */
1063 NPY_NO_EXPORT char **
NpyIter_GetDataPtrArray(NpyIter * iter)1064 NpyIter_GetDataPtrArray(NpyIter *iter)
1065 {
1066     npy_uint32 itflags = NIT_ITFLAGS(iter);
1067     /*int ndim = NIT_NDIM(iter);*/
1068     int nop = NIT_NOP(iter);
1069 
1070     if (itflags&NPY_ITFLAG_BUFFER) {
1071         NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter);
1072         return NBF_PTRS(bufferdata);
1073     }
1074     else {
1075         NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
1076         return NAD_PTRS(axisdata);
1077     }
1078 }
1079 
1080 /*NUMPY_API
1081  * Get the array of data pointers (1 per object being iterated),
1082  * directly into the arrays (never pointing to a buffer), for starting
1083  * unbuffered iteration. This always returns the addresses for the
1084  * iterator position as reset to iterator index 0.
1085  *
1086  * These pointers are different from the pointers accepted by
1087  * NpyIter_ResetBasePointers, because the direction along some
1088  * axes may have been reversed, requiring base offsets.
1089  *
1090  * This function may be safely called without holding the Python GIL.
1091  */
1092 NPY_NO_EXPORT char **
NpyIter_GetInitialDataPtrArray(NpyIter * iter)1093 NpyIter_GetInitialDataPtrArray(NpyIter *iter)
1094 {
1095     /*npy_uint32 itflags = NIT_ITFLAGS(iter);*/
1096     /*int ndim = NIT_NDIM(iter);*/
1097     int nop = NIT_NOP(iter);
1098 
1099     return NIT_RESETDATAPTR(iter);
1100 }
1101 
1102 /*NUMPY_API
1103  * Get the array of data type pointers (1 per object being iterated)
1104  */
1105 NPY_NO_EXPORT PyArray_Descr **
NpyIter_GetDescrArray(NpyIter * iter)1106 NpyIter_GetDescrArray(NpyIter *iter)
1107 {
1108     /*npy_uint32 itflags = NIT_ITFLAGS(iter);*/
1109     /*int ndim = NIT_NDIM(iter);*/
1110     /*int nop = NIT_NOP(iter);*/
1111 
1112     return NIT_DTYPES(iter);
1113 }
1114 
1115 /*NUMPY_API
1116  * Get the array of objects being iterated
1117  */
1118 NPY_NO_EXPORT PyArrayObject **
NpyIter_GetOperandArray(NpyIter * iter)1119 NpyIter_GetOperandArray(NpyIter *iter)
1120 {
1121     /*npy_uint32 itflags = NIT_ITFLAGS(iter);*/
1122     /*int ndim = NIT_NDIM(iter);*/
1123     int nop = NIT_NOP(iter);
1124 
1125     return NIT_OPERANDS(iter);
1126 }
1127 
1128 /*NUMPY_API
1129  * Returns a view to the i-th object with the iterator's internal axes
1130  */
1131 NPY_NO_EXPORT PyArrayObject *
NpyIter_GetIterView(NpyIter * iter,npy_intp i)1132 NpyIter_GetIterView(NpyIter *iter, npy_intp i)
1133 {
1134     npy_uint32 itflags = NIT_ITFLAGS(iter);
1135     int idim, ndim = NIT_NDIM(iter);
1136     int nop = NIT_NOP(iter);
1137 
1138     npy_intp shape[NPY_MAXDIMS], strides[NPY_MAXDIMS];
1139     PyArrayObject *obj, *view;
1140     PyArray_Descr *dtype;
1141     char *dataptr;
1142     NpyIter_AxisData *axisdata;
1143     npy_intp sizeof_axisdata;
1144     int writeable;
1145 
1146     if (i < 0) {
1147         PyErr_SetString(PyExc_IndexError,
1148                 "index provided for an iterator view was out of bounds");
1149         return NULL;
1150     }
1151 
1152     /* Don't provide views if buffering is enabled */
1153     if (itflags&NPY_ITFLAG_BUFFER) {
1154         PyErr_SetString(PyExc_ValueError,
1155                 "cannot provide an iterator view when buffering is enabled");
1156         return NULL;
1157     }
1158 
1159     obj = NIT_OPERANDS(iter)[i];
1160     dtype = PyArray_DESCR(obj);
1161     writeable = NIT_OPITFLAGS(iter)[i]&NPY_OP_ITFLAG_WRITE;
1162     dataptr = NIT_RESETDATAPTR(iter)[i];
1163     axisdata = NIT_AXISDATA(iter);
1164     sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
1165 
1166     /* Retrieve the shape and strides from the axisdata */
1167     for (idim = 0; idim < ndim; ++idim) {
1168         shape[ndim-idim-1] = NAD_SHAPE(axisdata);
1169         strides[ndim-idim-1] = NAD_STRIDES(axisdata)[i];
1170 
1171         NIT_ADVANCE_AXISDATA(axisdata, 1);
1172     }
1173 
1174     Py_INCREF(dtype);
1175     view = (PyArrayObject *)PyArray_NewFromDescrAndBase(
1176             &PyArray_Type, dtype,
1177             ndim, shape, strides, dataptr,
1178             writeable ? NPY_ARRAY_WRITEABLE : 0, NULL, (PyObject *)obj);
1179 
1180     return view;
1181 }
1182 
1183 /*NUMPY_API
1184  * Get a pointer to the index, if it is being tracked
1185  */
1186 NPY_NO_EXPORT npy_intp *
NpyIter_GetIndexPtr(NpyIter * iter)1187 NpyIter_GetIndexPtr(NpyIter *iter)
1188 {
1189     npy_uint32 itflags = NIT_ITFLAGS(iter);
1190     /*int ndim = NIT_NDIM(iter);*/
1191     int nop = NIT_NOP(iter);
1192 
1193     NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
1194 
1195     if (itflags&NPY_ITFLAG_HASINDEX) {
1196         /* The index is just after the data pointers */
1197         return (npy_intp*)NAD_PTRS(axisdata) + nop;
1198     }
1199     else {
1200         return NULL;
1201     }
1202 }
1203 
1204 /*NUMPY_API
1205  * Gets an array of read flags (1 per object being iterated)
1206  */
1207 NPY_NO_EXPORT void
NpyIter_GetReadFlags(NpyIter * iter,char * outreadflags)1208 NpyIter_GetReadFlags(NpyIter *iter, char *outreadflags)
1209 {
1210     /*npy_uint32 itflags = NIT_ITFLAGS(iter);*/
1211     /*int ndim = NIT_NDIM(iter);*/
1212     int iop, nop = NIT_NOP(iter);
1213 
1214     npyiter_opitflags *op_itflags = NIT_OPITFLAGS(iter);
1215 
1216     for (iop = 0; iop < nop; ++iop) {
1217         outreadflags[iop] = (op_itflags[iop]&NPY_OP_ITFLAG_READ) != 0;
1218     }
1219 }
1220 
1221 /*NUMPY_API
1222  * Gets an array of write flags (1 per object being iterated)
1223  */
1224 NPY_NO_EXPORT void
NpyIter_GetWriteFlags(NpyIter * iter,char * outwriteflags)1225 NpyIter_GetWriteFlags(NpyIter *iter, char *outwriteflags)
1226 {
1227     /*npy_uint32 itflags = NIT_ITFLAGS(iter);*/
1228     /*int ndim = NIT_NDIM(iter);*/
1229     int iop, nop = NIT_NOP(iter);
1230 
1231     npyiter_opitflags *op_itflags = NIT_OPITFLAGS(iter);
1232 
1233     for (iop = 0; iop < nop; ++iop) {
1234         outwriteflags[iop] = (op_itflags[iop]&NPY_OP_ITFLAG_WRITE) != 0;
1235     }
1236 }
1237 
1238 
1239 /*NUMPY_API
1240  * Get the array of strides for the inner loop (when HasExternalLoop is true)
1241  *
1242  * This function may be safely called without holding the Python GIL.
1243  */
1244 NPY_NO_EXPORT npy_intp *
NpyIter_GetInnerStrideArray(NpyIter * iter)1245 NpyIter_GetInnerStrideArray(NpyIter *iter)
1246 {
1247     npy_uint32 itflags = NIT_ITFLAGS(iter);
1248     /*int ndim = NIT_NDIM(iter);*/
1249     int nop = NIT_NOP(iter);
1250 
1251     if (itflags&NPY_ITFLAG_BUFFER) {
1252         NpyIter_BufferData *data = NIT_BUFFERDATA(iter);
1253         return NBF_STRIDES(data);
1254     }
1255     else {
1256         NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
1257         return NAD_STRIDES(axisdata);
1258     }
1259 }
1260 
1261 /*NUMPY_API
1262  * Gets the array of strides for the specified axis.
1263  * If the iterator is tracking a multi-index, gets the strides
1264  * for the axis specified, otherwise gets the strides for
1265  * the iteration axis as Fortran order (fastest-changing axis first).
1266  *
1267  * Returns NULL if an error occurs.
1268  */
1269 NPY_NO_EXPORT npy_intp *
NpyIter_GetAxisStrideArray(NpyIter * iter,int axis)1270 NpyIter_GetAxisStrideArray(NpyIter *iter, int axis)
1271 {
1272     npy_uint32 itflags = NIT_ITFLAGS(iter);
1273     int idim, ndim = NIT_NDIM(iter);
1274     int nop = NIT_NOP(iter);
1275 
1276     npy_int8 *perm = NIT_PERM(iter);
1277     NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
1278     npy_intp sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
1279 
1280     if (axis < 0 || axis >= ndim) {
1281         PyErr_SetString(PyExc_ValueError,
1282                 "axis out of bounds in iterator GetStrideAxisArray");
1283         return NULL;
1284     }
1285 
1286     if (itflags&NPY_ITFLAG_HASMULTIINDEX) {
1287         /* Reverse axis, since the iterator treats them that way */
1288         axis = ndim-1-axis;
1289 
1290         /* First find the axis in question */
1291         for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
1292             if (perm[idim] == axis || -1 - perm[idim] == axis) {
1293                 return NAD_STRIDES(axisdata);
1294             }
1295         }
1296     }
1297     else {
1298         return NAD_STRIDES(NIT_INDEX_AXISDATA(axisdata, axis));
1299     }
1300 
1301     PyErr_SetString(PyExc_RuntimeError,
1302             "internal error in iterator perm");
1303     return  NULL;
1304 }
1305 
1306 /*NUMPY_API
1307  * Get an array of strides which are fixed.  Any strides which may
1308  * change during iteration receive the value NPY_MAX_INTP.  Once
1309  * the iterator is ready to iterate, call this to get the strides
1310  * which will always be fixed in the inner loop, then choose optimized
1311  * inner loop functions which take advantage of those fixed strides.
1312  *
1313  * This function may be safely called without holding the Python GIL.
1314  */
1315 NPY_NO_EXPORT void
NpyIter_GetInnerFixedStrideArray(NpyIter * iter,npy_intp * out_strides)1316 NpyIter_GetInnerFixedStrideArray(NpyIter *iter, npy_intp *out_strides)
1317 {
1318     npy_uint32 itflags = NIT_ITFLAGS(iter);
1319     int ndim = NIT_NDIM(iter);
1320     int iop, nop = NIT_NOP(iter);
1321 
1322     NpyIter_AxisData *axisdata0 = NIT_AXISDATA(iter);
1323     npy_intp sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
1324 
1325     if (itflags&NPY_ITFLAG_BUFFER) {
1326         NpyIter_BufferData *data = NIT_BUFFERDATA(iter);
1327         npyiter_opitflags *op_itflags = NIT_OPITFLAGS(iter);
1328         npy_intp stride, *strides = NBF_STRIDES(data),
1329                 *ad_strides = NAD_STRIDES(axisdata0);
1330         PyArray_Descr **dtypes = NIT_DTYPES(iter);
1331 
1332         for (iop = 0; iop < nop; ++iop) {
1333             stride = strides[iop];
1334             /*
1335              * Operands which are always/never buffered have fixed strides,
1336              * and everything has fixed strides when ndim is 0 or 1
1337              */
1338             if (ndim <= 1 || (op_itflags[iop]&
1339                             (NPY_OP_ITFLAG_CAST|NPY_OP_ITFLAG_BUFNEVER))) {
1340                 out_strides[iop] = stride;
1341             }
1342             /* If it's a reduction, 0-stride inner loop may have fixed stride */
1343             else if (stride == 0 && (itflags&NPY_ITFLAG_REDUCE)) {
1344                 /* If it's a reduction operand, definitely fixed stride */
1345                 if (op_itflags[iop]&NPY_OP_ITFLAG_REDUCE) {
1346                     out_strides[iop] = stride;
1347                 }
1348                 /*
1349                  * Otherwise it's guaranteed to be a fixed stride if the
1350                  * stride is 0 for all the dimensions.
1351                  */
1352                 else {
1353                     NpyIter_AxisData *axisdata = axisdata0;
1354                     int idim;
1355                     for (idim = 0; idim < ndim; ++idim) {
1356                         if (NAD_STRIDES(axisdata)[iop] != 0) {
1357                             break;
1358                         }
1359                         NIT_ADVANCE_AXISDATA(axisdata, 1);
1360                     }
1361                     /* If all the strides were 0, the stride won't change */
1362                     if (idim == ndim) {
1363                         out_strides[iop] = stride;
1364                     }
1365                     else {
1366                         out_strides[iop] = NPY_MAX_INTP;
1367                     }
1368                 }
1369             }
1370             /*
1371              * Inner loop contiguous array means its stride won't change when
1372              * switching between buffering and not buffering
1373              */
1374             else if (ad_strides[iop] == dtypes[iop]->elsize) {
1375                 out_strides[iop] = ad_strides[iop];
1376             }
1377             /*
1378              * Otherwise the strides can change if the operand is sometimes
1379              * buffered, sometimes not.
1380              */
1381             else {
1382                 out_strides[iop] = NPY_MAX_INTP;
1383             }
1384         }
1385     }
1386     else {
1387         /* If there's no buffering, the strides are always fixed */
1388         memcpy(out_strides, NAD_STRIDES(axisdata0), nop*NPY_SIZEOF_INTP);
1389     }
1390 }
1391 
1392 /*NUMPY_API
1393  * Get a pointer to the size of the inner loop  (when HasExternalLoop is true)
1394  *
1395  * This function may be safely called without holding the Python GIL.
1396  */
1397 NPY_NO_EXPORT npy_intp *
NpyIter_GetInnerLoopSizePtr(NpyIter * iter)1398 NpyIter_GetInnerLoopSizePtr(NpyIter *iter)
1399 {
1400     npy_uint32 itflags = NIT_ITFLAGS(iter);
1401     /*int ndim = NIT_NDIM(iter);*/
1402     int nop = NIT_NOP(iter);
1403 
1404     if (itflags&NPY_ITFLAG_BUFFER) {
1405         NpyIter_BufferData *data = NIT_BUFFERDATA(iter);
1406         return &NBF_SIZE(data);
1407     }
1408     else {
1409         NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
1410         return &NAD_SHAPE(axisdata);
1411     }
1412 }
1413 
1414 
1415 /*NUMPY_API
1416  * For debugging
1417  */
1418 NPY_NO_EXPORT void
NpyIter_DebugPrint(NpyIter * iter)1419 NpyIter_DebugPrint(NpyIter *iter)
1420 {
1421     npy_uint32 itflags = NIT_ITFLAGS(iter);
1422     int idim, ndim = NIT_NDIM(iter);
1423     int iop, nop = NIT_NOP(iter);
1424 
1425     NpyIter_AxisData *axisdata;
1426     npy_intp sizeof_axisdata;
1427 
1428     NPY_ALLOW_C_API_DEF
1429     NPY_ALLOW_C_API
1430 
1431     printf("\n------ BEGIN ITERATOR DUMP ------\n");
1432     printf("| Iterator Address: %p\n", (void *)iter);
1433     printf("| ItFlags: ");
1434     if (itflags&NPY_ITFLAG_IDENTPERM)
1435         printf("IDENTPERM ");
1436     if (itflags&NPY_ITFLAG_NEGPERM)
1437         printf("NEGPERM ");
1438     if (itflags&NPY_ITFLAG_HASINDEX)
1439         printf("HASINDEX ");
1440     if (itflags&NPY_ITFLAG_HASMULTIINDEX)
1441         printf("HASMULTIINDEX ");
1442     if (itflags&NPY_ITFLAG_FORCEDORDER)
1443         printf("FORCEDORDER ");
1444     if (itflags&NPY_ITFLAG_EXLOOP)
1445         printf("EXLOOP ");
1446     if (itflags&NPY_ITFLAG_RANGE)
1447         printf("RANGE ");
1448     if (itflags&NPY_ITFLAG_BUFFER)
1449         printf("BUFFER ");
1450     if (itflags&NPY_ITFLAG_GROWINNER)
1451         printf("GROWINNER ");
1452     if (itflags&NPY_ITFLAG_ONEITERATION)
1453         printf("ONEITERATION ");
1454     if (itflags&NPY_ITFLAG_DELAYBUF)
1455         printf("DELAYBUF ");
1456     if (itflags&NPY_ITFLAG_NEEDSAPI)
1457         printf("NEEDSAPI ");
1458     if (itflags&NPY_ITFLAG_REDUCE)
1459         printf("REDUCE ");
1460     if (itflags&NPY_ITFLAG_REUSE_REDUCE_LOOPS)
1461         printf("REUSE_REDUCE_LOOPS ");
1462 
1463     printf("\n");
1464     printf("| NDim: %d\n", ndim);
1465     printf("| NOp: %d\n", nop);
1466     if (NIT_MASKOP(iter) >= 0) {
1467         printf("| MaskOp: %d\n", (int)NIT_MASKOP(iter));
1468     }
1469     printf("| IterSize: %d\n", (int)NIT_ITERSIZE(iter));
1470     printf("| IterStart: %d\n", (int)NIT_ITERSTART(iter));
1471     printf("| IterEnd: %d\n", (int)NIT_ITEREND(iter));
1472     printf("| IterIndex: %d\n", (int)NIT_ITERINDEX(iter));
1473     printf("| Iterator SizeOf: %d\n",
1474                             (int)NIT_SIZEOF_ITERATOR(itflags, ndim, nop));
1475     printf("| BufferData SizeOf: %d\n",
1476                             (int)NIT_BUFFERDATA_SIZEOF(itflags, ndim, nop));
1477     printf("| AxisData SizeOf: %d\n",
1478                             (int)NIT_AXISDATA_SIZEOF(itflags, ndim, nop));
1479     printf("|\n");
1480 
1481     printf("| Perm: ");
1482     for (idim = 0; idim < ndim; ++idim) {
1483         printf("%d ", (int)NIT_PERM(iter)[idim]);
1484     }
1485     printf("\n");
1486     printf("| DTypes: ");
1487     for (iop = 0; iop < nop; ++iop) {
1488         printf("%p ", (void *)NIT_DTYPES(iter)[iop]);
1489     }
1490     printf("\n");
1491     printf("| DTypes: ");
1492     for (iop = 0; iop < nop; ++iop) {
1493         if (NIT_DTYPES(iter)[iop] != NULL)
1494             PyObject_Print((PyObject*)NIT_DTYPES(iter)[iop], stdout, 0);
1495         else
1496             printf("(nil) ");
1497         printf(" ");
1498     }
1499     printf("\n");
1500     printf("| InitDataPtrs: ");
1501     for (iop = 0; iop < nop; ++iop) {
1502         printf("%p ", (void *)NIT_RESETDATAPTR(iter)[iop]);
1503     }
1504     printf("\n");
1505     printf("| BaseOffsets: ");
1506     for (iop = 0; iop < nop; ++iop) {
1507         printf("%i ", (int)NIT_BASEOFFSETS(iter)[iop]);
1508     }
1509     printf("\n");
1510     if (itflags&NPY_ITFLAG_HASINDEX) {
1511         printf("| InitIndex: %d\n",
1512                         (int)(npy_intp)NIT_RESETDATAPTR(iter)[nop]);
1513     }
1514     printf("| Operands: ");
1515     for (iop = 0; iop < nop; ++iop) {
1516         printf("%p ", (void *)NIT_OPERANDS(iter)[iop]);
1517     }
1518     printf("\n");
1519     printf("| Operand DTypes: ");
1520     for (iop = 0; iop < nop; ++iop) {
1521         PyArray_Descr *dtype;
1522         if (NIT_OPERANDS(iter)[iop] != NULL) {
1523             dtype = PyArray_DESCR(NIT_OPERANDS(iter)[iop]);
1524             if (dtype != NULL)
1525                 PyObject_Print((PyObject *)dtype, stdout, 0);
1526             else
1527                 printf("(nil) ");
1528         }
1529         else {
1530             printf("(op nil) ");
1531         }
1532         printf(" ");
1533     }
1534     printf("\n");
1535     printf("| OpItFlags:\n");
1536     for (iop = 0; iop < nop; ++iop) {
1537         printf("|   Flags[%d]: ", (int)iop);
1538         if ((NIT_OPITFLAGS(iter)[iop])&NPY_OP_ITFLAG_READ)
1539             printf("READ ");
1540         if ((NIT_OPITFLAGS(iter)[iop])&NPY_OP_ITFLAG_WRITE)
1541             printf("WRITE ");
1542         if ((NIT_OPITFLAGS(iter)[iop])&NPY_OP_ITFLAG_CAST)
1543             printf("CAST ");
1544         if ((NIT_OPITFLAGS(iter)[iop])&NPY_OP_ITFLAG_BUFNEVER)
1545             printf("BUFNEVER ");
1546         if ((NIT_OPITFLAGS(iter)[iop])&NPY_OP_ITFLAG_ALIGNED)
1547             printf("ALIGNED ");
1548         if ((NIT_OPITFLAGS(iter)[iop])&NPY_OP_ITFLAG_REDUCE)
1549             printf("REDUCE ");
1550         if ((NIT_OPITFLAGS(iter)[iop])&NPY_OP_ITFLAG_VIRTUAL)
1551             printf("VIRTUAL ");
1552         if ((NIT_OPITFLAGS(iter)[iop])&NPY_OP_ITFLAG_WRITEMASKED)
1553             printf("WRITEMASKED ");
1554         printf("\n");
1555     }
1556     printf("|\n");
1557 
1558     if (itflags&NPY_ITFLAG_BUFFER) {
1559         NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter);
1560         printf("| BufferData:\n");
1561         printf("|   BufferSize: %d\n", (int)NBF_BUFFERSIZE(bufferdata));
1562         printf("|   Size: %d\n", (int)NBF_SIZE(bufferdata));
1563         printf("|   BufIterEnd: %d\n", (int)NBF_BUFITEREND(bufferdata));
1564         if (itflags&NPY_ITFLAG_REDUCE) {
1565             printf("|   REDUCE Pos: %d\n",
1566                         (int)NBF_REDUCE_POS(bufferdata));
1567             printf("|   REDUCE OuterSize: %d\n",
1568                         (int)NBF_REDUCE_OUTERSIZE(bufferdata));
1569             printf("|   REDUCE OuterDim: %d\n",
1570                         (int)NBF_REDUCE_OUTERDIM(bufferdata));
1571         }
1572         printf("|   Strides: ");
1573         for (iop = 0; iop < nop; ++iop)
1574             printf("%d ", (int)NBF_STRIDES(bufferdata)[iop]);
1575         printf("\n");
1576         /* Print the fixed strides when there's no inner loop */
1577         if (itflags&NPY_ITFLAG_EXLOOP) {
1578             npy_intp fixedstrides[NPY_MAXDIMS];
1579             printf("|   Fixed Strides: ");
1580             NpyIter_GetInnerFixedStrideArray(iter, fixedstrides);
1581             for (iop = 0; iop < nop; ++iop)
1582                 printf("%d ", (int)fixedstrides[iop]);
1583             printf("\n");
1584         }
1585         printf("|   Ptrs: ");
1586         for (iop = 0; iop < nop; ++iop)
1587             printf("%p ", (void *)NBF_PTRS(bufferdata)[iop]);
1588         printf("\n");
1589         if (itflags&NPY_ITFLAG_REDUCE) {
1590             printf("|   REDUCE Outer Strides: ");
1591             for (iop = 0; iop < nop; ++iop)
1592                 printf("%d ", (int)NBF_REDUCE_OUTERSTRIDES(bufferdata)[iop]);
1593             printf("\n");
1594             printf("|   REDUCE Outer Ptrs: ");
1595             for (iop = 0; iop < nop; ++iop)
1596                 printf("%p ", (void *)NBF_REDUCE_OUTERPTRS(bufferdata)[iop]);
1597             printf("\n");
1598         }
1599         printf("|   ReadTransferFn: ");
1600         for (iop = 0; iop < nop; ++iop)
1601             printf("%p ", (void *)NBF_READTRANSFERFN(bufferdata)[iop]);
1602         printf("\n");
1603         printf("|   ReadTransferData: ");
1604         for (iop = 0; iop < nop; ++iop)
1605             printf("%p ", (void *)NBF_READTRANSFERDATA(bufferdata)[iop]);
1606         printf("\n");
1607         printf("|   WriteTransferFn: ");
1608         for (iop = 0; iop < nop; ++iop)
1609             printf("%p ", (void *)NBF_WRITETRANSFERFN(bufferdata)[iop]);
1610         printf("\n");
1611         printf("|   WriteTransferData: ");
1612         for (iop = 0; iop < nop; ++iop)
1613             printf("%p ", (void *)NBF_WRITETRANSFERDATA(bufferdata)[iop]);
1614         printf("\n");
1615         printf("|   Buffers: ");
1616         for (iop = 0; iop < nop; ++iop)
1617             printf("%p ", (void *)NBF_BUFFERS(bufferdata)[iop]);
1618         printf("\n");
1619         printf("|\n");
1620     }
1621 
1622     axisdata = NIT_AXISDATA(iter);
1623     sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
1624     for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
1625         printf("| AxisData[%d]:\n", (int)idim);
1626         printf("|   Shape: %d\n", (int)NAD_SHAPE(axisdata));
1627         printf("|   Index: %d\n", (int)NAD_INDEX(axisdata));
1628         printf("|   Strides: ");
1629         for (iop = 0; iop < nop; ++iop) {
1630             printf("%d ", (int)NAD_STRIDES(axisdata)[iop]);
1631         }
1632         printf("\n");
1633         if (itflags&NPY_ITFLAG_HASINDEX) {
1634             printf("|   Index Stride: %d\n", (int)NAD_STRIDES(axisdata)[nop]);
1635         }
1636         printf("|   Ptrs: ");
1637         for (iop = 0; iop < nop; ++iop) {
1638             printf("%p ", (void *)NAD_PTRS(axisdata)[iop]);
1639         }
1640         printf("\n");
1641         if (itflags&NPY_ITFLAG_HASINDEX) {
1642             printf("|   Index Value: %d\n",
1643                                (int)((npy_intp*)NAD_PTRS(axisdata))[nop]);
1644         }
1645     }
1646 
1647     printf("------- END ITERATOR DUMP -------\n");
1648     fflush(stdout);
1649 
1650     NPY_DISABLE_C_API
1651 }
1652 
1653 NPY_NO_EXPORT void
npyiter_coalesce_axes(NpyIter * iter)1654 npyiter_coalesce_axes(NpyIter *iter)
1655 {
1656     npy_uint32 itflags = NIT_ITFLAGS(iter);
1657     int idim, ndim = NIT_NDIM(iter);
1658     int nop = NIT_NOP(iter);
1659 
1660     npy_intp istrides, nstrides = NAD_NSTRIDES();
1661     NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
1662     npy_intp sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
1663     NpyIter_AxisData *ad_compress = axisdata;
1664     npy_intp new_ndim = 1;
1665 
1666     /* The HASMULTIINDEX or IDENTPERM flags do not apply after coalescing */
1667     NIT_ITFLAGS(iter) &= ~(NPY_ITFLAG_IDENTPERM|NPY_ITFLAG_HASMULTIINDEX);
1668 
1669     for (idim = 0; idim < ndim-1; ++idim) {
1670         int can_coalesce = 1;
1671         npy_intp shape0 = NAD_SHAPE(ad_compress);
1672         npy_intp shape1 = NAD_SHAPE(NIT_INDEX_AXISDATA(axisdata, 1));
1673         npy_intp *strides0 = NAD_STRIDES(ad_compress);
1674         npy_intp *strides1 = NAD_STRIDES(NIT_INDEX_AXISDATA(axisdata, 1));
1675 
1676         /* Check that all the axes can be coalesced */
1677         for (istrides = 0; istrides < nstrides; ++istrides) {
1678             if (!((shape0 == 1 && strides0[istrides] == 0) ||
1679                   (shape1 == 1 && strides1[istrides] == 0)) &&
1680                      (strides0[istrides]*shape0 != strides1[istrides])) {
1681                 can_coalesce = 0;
1682                 break;
1683             }
1684         }
1685 
1686         if (can_coalesce) {
1687             npy_intp *strides = NAD_STRIDES(ad_compress);
1688 
1689             NIT_ADVANCE_AXISDATA(axisdata, 1);
1690             NAD_SHAPE(ad_compress) *= NAD_SHAPE(axisdata);
1691             for (istrides = 0; istrides < nstrides; ++istrides) {
1692                 if (strides[istrides] == 0) {
1693                     strides[istrides] = NAD_STRIDES(axisdata)[istrides];
1694                 }
1695             }
1696         }
1697         else {
1698             NIT_ADVANCE_AXISDATA(axisdata, 1);
1699             NIT_ADVANCE_AXISDATA(ad_compress, 1);
1700             if (ad_compress != axisdata) {
1701                 memcpy(ad_compress, axisdata, sizeof_axisdata);
1702             }
1703             ++new_ndim;
1704         }
1705     }
1706 
1707     /*
1708      * If the number of axes shrunk, reset the perm and
1709      * compress the data into the new layout.
1710      */
1711     if (new_ndim < ndim) {
1712         npy_int8 *perm = NIT_PERM(iter);
1713 
1714         /* Reset to an identity perm */
1715         for (idim = 0; idim < new_ndim; ++idim) {
1716             perm[idim] = (npy_int8)idim;
1717         }
1718         NIT_NDIM(iter) = new_ndim;
1719     }
1720 }
1721 
1722 /*
1723  * If errmsg is non-NULL, it should point to a variable which will
1724  * receive the error message, and no Python exception will be set.
1725  * This is so that the function can be called from code not holding
1726  * the GIL.
1727  */
1728 NPY_NO_EXPORT int
npyiter_allocate_buffers(NpyIter * iter,char ** errmsg)1729 npyiter_allocate_buffers(NpyIter *iter, char **errmsg)
1730 {
1731     /*npy_uint32 itflags = NIT_ITFLAGS(iter);*/
1732     /*int ndim = NIT_NDIM(iter);*/
1733     int iop = 0, nop = NIT_NOP(iter);
1734 
1735     npy_intp i;
1736     npyiter_opitflags *op_itflags = NIT_OPITFLAGS(iter);
1737     NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter);
1738     PyArray_Descr **op_dtype = NIT_DTYPES(iter);
1739     npy_intp buffersize = NBF_BUFFERSIZE(bufferdata);
1740     char *buffer, **buffers = NBF_BUFFERS(bufferdata);
1741 
1742     for (iop = 0; iop < nop; ++iop) {
1743         npyiter_opitflags flags = op_itflags[iop];
1744 
1745         /*
1746          * If we have determined that a buffer may be needed,
1747          * allocate one.
1748          */
1749         if (!(flags&NPY_OP_ITFLAG_BUFNEVER)) {
1750             npy_intp itemsize = op_dtype[iop]->elsize;
1751             buffer = PyArray_malloc(itemsize*buffersize);
1752             if (buffer == NULL) {
1753                 if (errmsg == NULL) {
1754                     PyErr_NoMemory();
1755                 }
1756                 else {
1757                     *errmsg = "out of memory";
1758                 }
1759                 goto fail;
1760             }
1761             if (PyDataType_FLAGCHK(op_dtype[iop], NPY_NEEDS_INIT)) {
1762                 memset(buffer, '\0', itemsize*buffersize);
1763             }
1764             buffers[iop] = buffer;
1765         }
1766     }
1767 
1768     return 1;
1769 
1770 fail:
1771     for (i = 0; i < iop; ++i) {
1772         if (buffers[i] != NULL) {
1773             PyArray_free(buffers[i]);
1774             buffers[i] = NULL;
1775         }
1776     }
1777     return 0;
1778 }
1779 
1780 /*
1781  * This sets the AXISDATA portion of the iterator to the specified
1782  * iterindex, updating the pointers as well.  This function does
1783  * no error checking.
1784  */
1785 NPY_NO_EXPORT void
npyiter_goto_iterindex(NpyIter * iter,npy_intp iterindex)1786 npyiter_goto_iterindex(NpyIter *iter, npy_intp iterindex)
1787 {
1788     npy_uint32 itflags = NIT_ITFLAGS(iter);
1789     int idim, ndim = NIT_NDIM(iter);
1790     int nop = NIT_NOP(iter);
1791 
1792     char **dataptr;
1793     NpyIter_AxisData *axisdata;
1794     npy_intp sizeof_axisdata;
1795     npy_intp istrides, nstrides, i, shape;
1796 
1797     axisdata = NIT_AXISDATA(iter);
1798     sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
1799     nstrides = NAD_NSTRIDES();
1800 
1801     NIT_ITERINDEX(iter) = iterindex;
1802 
1803     ndim = ndim ? ndim : 1;
1804 
1805     if (iterindex == 0) {
1806         dataptr = NIT_RESETDATAPTR(iter);
1807 
1808         for (idim = 0; idim < ndim; ++idim) {
1809             char **ptrs;
1810             NAD_INDEX(axisdata) = 0;
1811             ptrs = NAD_PTRS(axisdata);
1812             for (istrides = 0; istrides < nstrides; ++istrides) {
1813                 ptrs[istrides] = dataptr[istrides];
1814             }
1815 
1816             NIT_ADVANCE_AXISDATA(axisdata, 1);
1817         }
1818     }
1819     else {
1820         /*
1821          * Set the multi-index, from the fastest-changing to the
1822          * slowest-changing.
1823          */
1824         axisdata = NIT_AXISDATA(iter);
1825         shape = NAD_SHAPE(axisdata);
1826         i = iterindex;
1827         iterindex /= shape;
1828         NAD_INDEX(axisdata) = i - iterindex * shape;
1829         for (idim = 0; idim < ndim-1; ++idim) {
1830             NIT_ADVANCE_AXISDATA(axisdata, 1);
1831 
1832             shape = NAD_SHAPE(axisdata);
1833             i = iterindex;
1834             iterindex /= shape;
1835             NAD_INDEX(axisdata) = i - iterindex * shape;
1836         }
1837 
1838         dataptr = NIT_RESETDATAPTR(iter);
1839 
1840         /*
1841          * Accumulate the successive pointers with their
1842          * offsets in the opposite order, starting from the
1843          * original data pointers.
1844          */
1845         for (idim = 0; idim < ndim; ++idim) {
1846             npy_intp *strides;
1847             char **ptrs;
1848 
1849             strides = NAD_STRIDES(axisdata);
1850             ptrs = NAD_PTRS(axisdata);
1851 
1852             i = NAD_INDEX(axisdata);
1853 
1854             for (istrides = 0; istrides < nstrides; ++istrides) {
1855                 ptrs[istrides] = dataptr[istrides] + i*strides[istrides];
1856             }
1857 
1858             dataptr = ptrs;
1859 
1860             NIT_ADVANCE_AXISDATA(axisdata, -1);
1861         }
1862     }
1863 }
1864 
1865 /*
1866  * This gets called after the buffers have been exhausted, and
1867  * their data needs to be written back to the arrays.  The multi-index
1868  * must be positioned for the beginning of the buffer.
1869  */
1870 NPY_NO_EXPORT int
npyiter_copy_from_buffers(NpyIter * iter)1871 npyiter_copy_from_buffers(NpyIter *iter)
1872 {
1873     npy_uint32 itflags = NIT_ITFLAGS(iter);
1874     int ndim = NIT_NDIM(iter);
1875     int iop, nop = NIT_NOP(iter);
1876     int maskop = NIT_MASKOP(iter);
1877 
1878     npyiter_opitflags *op_itflags = NIT_OPITFLAGS(iter);
1879     NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter);
1880     NpyIter_AxisData *axisdata = NIT_AXISDATA(iter),
1881                     *reduce_outeraxisdata = NULL;
1882 
1883     PyArray_Descr **dtypes = NIT_DTYPES(iter);
1884     npy_intp transfersize = NBF_SIZE(bufferdata);
1885     npy_intp *strides = NBF_STRIDES(bufferdata),
1886              *ad_strides = NAD_STRIDES(axisdata);
1887     npy_intp sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
1888     char **ad_ptrs = NAD_PTRS(axisdata);
1889     char **buffers = NBF_BUFFERS(bufferdata);
1890     char *buffer;
1891 
1892     npy_intp reduce_outerdim = 0;
1893     npy_intp *reduce_outerstrides = NULL;
1894 
1895     PyArray_StridedUnaryOp *stransfer = NULL;
1896     NpyAuxData *transferdata = NULL;
1897 
1898     npy_intp axisdata_incr = NIT_AXISDATA_SIZEOF(itflags, ndim, nop) /
1899                                 NPY_SIZEOF_INTP;
1900 
1901     /* If we're past the end, nothing to copy */
1902     if (NBF_SIZE(bufferdata) == 0) {
1903         return 0;
1904     }
1905 
1906     NPY_IT_DBG_PRINT("Iterator: Copying buffers to outputs\n");
1907 
1908     if (itflags&NPY_ITFLAG_REDUCE) {
1909         reduce_outerdim = NBF_REDUCE_OUTERDIM(bufferdata);
1910         reduce_outerstrides = NBF_REDUCE_OUTERSTRIDES(bufferdata);
1911         reduce_outeraxisdata = NIT_INDEX_AXISDATA(axisdata, reduce_outerdim);
1912         transfersize *= NBF_REDUCE_OUTERSIZE(bufferdata);
1913     }
1914 
1915     for (iop = 0; iop < nop; ++iop) {
1916         stransfer = NBF_WRITETRANSFERFN(bufferdata)[iop];
1917         transferdata = NBF_WRITETRANSFERDATA(bufferdata)[iop];
1918         buffer = buffers[iop];
1919         /*
1920          * Copy the data back to the arrays.  If the type has refs,
1921          * this function moves them so the buffer's refs are released.
1922          *
1923          * The flag USINGBUFFER is set when the buffer was used, so
1924          * only copy back when this flag is on.
1925          */
1926         if ((stransfer != NULL) &&
1927                (op_itflags[iop]&(NPY_OP_ITFLAG_WRITE|NPY_OP_ITFLAG_USINGBUFFER))
1928                         == (NPY_OP_ITFLAG_WRITE|NPY_OP_ITFLAG_USINGBUFFER)) {
1929             npy_intp op_transfersize;
1930 
1931             npy_intp src_stride, *dst_strides, *dst_coords, *dst_shape;
1932             int ndim_transfer;
1933 
1934             NPY_IT_DBG_PRINT1("Iterator: Operand %d was buffered\n",
1935                                         (int)iop);
1936 
1937             /*
1938              * If this operand is being reduced in the inner loop,
1939              * its buffering stride was set to zero, and just
1940              * one element was copied.
1941              */
1942             if (op_itflags[iop]&NPY_OP_ITFLAG_REDUCE) {
1943                 if (strides[iop] == 0) {
1944                     if (reduce_outerstrides[iop] == 0) {
1945                         op_transfersize = 1;
1946                         src_stride = 0;
1947                         dst_strides = &src_stride;
1948                         dst_coords = &NAD_INDEX(reduce_outeraxisdata);
1949                         dst_shape = &NAD_SHAPE(reduce_outeraxisdata);
1950                         ndim_transfer = 1;
1951                     }
1952                     else {
1953                         op_transfersize = NBF_REDUCE_OUTERSIZE(bufferdata);
1954                         src_stride = reduce_outerstrides[iop];
1955                         dst_strides =
1956                                 &NAD_STRIDES(reduce_outeraxisdata)[iop];
1957                         dst_coords = &NAD_INDEX(reduce_outeraxisdata);
1958                         dst_shape = &NAD_SHAPE(reduce_outeraxisdata);
1959                         ndim_transfer = ndim - reduce_outerdim;
1960                     }
1961                 }
1962                 else {
1963                     if (reduce_outerstrides[iop] == 0) {
1964                         op_transfersize = NBF_SIZE(bufferdata);
1965                         src_stride = strides[iop];
1966                         dst_strides = &ad_strides[iop];
1967                         dst_coords = &NAD_INDEX(axisdata);
1968                         dst_shape = &NAD_SHAPE(axisdata);
1969                         ndim_transfer = reduce_outerdim ?
1970                                         reduce_outerdim : 1;
1971                     }
1972                     else {
1973                         op_transfersize = transfersize;
1974                         src_stride = strides[iop];
1975                         dst_strides = &ad_strides[iop];
1976                         dst_coords = &NAD_INDEX(axisdata);
1977                         dst_shape = &NAD_SHAPE(axisdata);
1978                         ndim_transfer = ndim;
1979                     }
1980                 }
1981             }
1982             else {
1983                 op_transfersize = transfersize;
1984                 src_stride = strides[iop];
1985                 dst_strides = &ad_strides[iop];
1986                 dst_coords = &NAD_INDEX(axisdata);
1987                 dst_shape = &NAD_SHAPE(axisdata);
1988                 ndim_transfer = ndim;
1989             }
1990 
1991             NPY_IT_DBG_PRINT2("Iterator: Copying buffer to "
1992                                 "operand %d (%d items)\n",
1993                                 (int)iop, (int)op_transfersize);
1994 
1995             /* WRITEMASKED operand */
1996             if (op_itflags[iop] & NPY_OP_ITFLAG_WRITEMASKED) {
1997                 npy_bool *maskptr;
1998 
1999                 /*
2000                  * The mask pointer may be in the buffer or in
2001                  * the array, detect which one.
2002                  */
2003                 if ((op_itflags[maskop]&NPY_OP_ITFLAG_USINGBUFFER) != 0) {
2004                     maskptr = (npy_bool *)buffers[maskop];
2005                 }
2006                 else {
2007                     maskptr = (npy_bool *)ad_ptrs[maskop];
2008                 }
2009 
2010                 if (PyArray_TransferMaskedStridedToNDim(ndim_transfer,
2011                         ad_ptrs[iop], dst_strides, axisdata_incr,
2012                         buffer, src_stride,
2013                         maskptr, strides[maskop],
2014                         dst_coords, axisdata_incr,
2015                         dst_shape, axisdata_incr,
2016                         op_transfersize, dtypes[iop]->elsize,
2017                         (PyArray_MaskedStridedUnaryOp *)stransfer,
2018                         transferdata) < 0) {
2019                     return -1;
2020                 }
2021             }
2022             /* Regular operand */
2023             else {
2024                 if (PyArray_TransferStridedToNDim(ndim_transfer,
2025                         ad_ptrs[iop], dst_strides, axisdata_incr,
2026                         buffer, src_stride,
2027                         dst_coords, axisdata_incr,
2028                         dst_shape, axisdata_incr,
2029                         op_transfersize, dtypes[iop]->elsize,
2030                         stransfer,
2031                         transferdata) < 0) {
2032                     return -1;
2033                 }
2034             }
2035         }
2036         /* If there's no copy back, we may have to decrement refs.  In
2037          * this case, the transfer function has a 'decsrcref' transfer
2038          * function, so we can use it to do the decrement.
2039          *
2040          * The flag USINGBUFFER is set when the buffer was used, so
2041          * only decrement refs when this flag is on.
2042          */
2043         else if (stransfer != NULL &&
2044                        (op_itflags[iop]&NPY_OP_ITFLAG_USINGBUFFER) != 0) {
2045             NPY_IT_DBG_PRINT1("Iterator: Freeing refs and zeroing buffer "
2046                                 "of operand %d\n", (int)iop);
2047             /* Decrement refs */
2048             if (stransfer(NULL, 0, buffer, dtypes[iop]->elsize,
2049                           transfersize, dtypes[iop]->elsize,
2050                           transferdata) < 0) {
2051                 /* Since this should only decrement, it should never error */
2052                 assert(0);
2053                 return -1;
2054             }
2055             /*
2056              * Zero out the memory for safety.  For instance,
2057              * if during iteration some Python code copied an
2058              * array pointing into the buffer, it will get None
2059              * values for its references after this.
2060              */
2061             memset(buffer, 0, dtypes[iop]->elsize*transfersize);
2062         }
2063     }
2064 
2065     NPY_IT_DBG_PRINT("Iterator: Finished copying buffers to outputs\n");
2066     return 0;
2067 }
2068 
2069 /*
2070  * This gets called after the iterator has been positioned to a multi-index
2071  * for the start of a buffer.  It decides which operands need a buffer,
2072  * and copies the data into the buffers.
2073  */
2074 NPY_NO_EXPORT int
npyiter_copy_to_buffers(NpyIter * iter,char ** prev_dataptrs)2075 npyiter_copy_to_buffers(NpyIter *iter, char **prev_dataptrs)
2076 {
2077     npy_uint32 itflags = NIT_ITFLAGS(iter);
2078     int ndim = NIT_NDIM(iter);
2079     int iop, nop = NIT_NOP(iter);
2080 
2081     npyiter_opitflags *op_itflags = NIT_OPITFLAGS(iter);
2082     NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter);
2083     NpyIter_AxisData *axisdata = NIT_AXISDATA(iter),
2084                     *reduce_outeraxisdata = NULL;
2085 
2086     PyArray_Descr **dtypes = NIT_DTYPES(iter);
2087     PyArrayObject **operands = NIT_OPERANDS(iter);
2088     npy_intp *strides = NBF_STRIDES(bufferdata),
2089              *ad_strides = NAD_STRIDES(axisdata);
2090     npy_intp sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
2091     char **ptrs = NBF_PTRS(bufferdata), **ad_ptrs = NAD_PTRS(axisdata);
2092     char **buffers = NBF_BUFFERS(bufferdata);
2093     npy_intp iterindex, iterend, transfersize,
2094             singlestridesize, reduce_innersize = 0, reduce_outerdim = 0;
2095     int is_onestride = 0, any_buffered = 0;
2096 
2097     npy_intp *reduce_outerstrides = NULL;
2098     char **reduce_outerptrs = NULL;
2099 
2100     PyArray_StridedUnaryOp *stransfer = NULL;
2101     NpyAuxData *transferdata = NULL;
2102 
2103     /*
2104      * Have to get this flag before npyiter_checkreducesize sets
2105      * it for the next iteration.
2106      */
2107     npy_bool reuse_reduce_loops = (prev_dataptrs != NULL) &&
2108                     ((itflags&NPY_ITFLAG_REUSE_REDUCE_LOOPS) != 0);
2109 
2110     npy_intp axisdata_incr = NIT_AXISDATA_SIZEOF(itflags, ndim, nop) /
2111                                 NPY_SIZEOF_INTP;
2112 
2113     NPY_IT_DBG_PRINT("Iterator: Copying inputs to buffers\n");
2114 
2115     /* Calculate the size if using any buffers */
2116     iterindex = NIT_ITERINDEX(iter);
2117     iterend = NIT_ITEREND(iter);
2118     transfersize = NBF_BUFFERSIZE(bufferdata);
2119     if (transfersize > iterend - iterindex) {
2120         transfersize = iterend - iterindex;
2121     }
2122 
2123     /* If last time around, the reduce loop structure was full, we reuse it */
2124     if (reuse_reduce_loops) {
2125         npy_intp full_transfersize, prev_reduce_outersize;
2126 
2127         prev_reduce_outersize = NBF_REDUCE_OUTERSIZE(bufferdata);
2128         reduce_outerstrides = NBF_REDUCE_OUTERSTRIDES(bufferdata);
2129         reduce_outerptrs = NBF_REDUCE_OUTERPTRS(bufferdata);
2130         reduce_outerdim = NBF_REDUCE_OUTERDIM(bufferdata);
2131         reduce_outeraxisdata = NIT_INDEX_AXISDATA(axisdata, reduce_outerdim);
2132         reduce_innersize = NBF_SIZE(bufferdata);
2133         NBF_REDUCE_POS(bufferdata) = 0;
2134         /*
2135          * Try to do make the outersize as big as possible. This allows
2136          * it to shrink when processing the last bit of the outer reduce loop,
2137          * then grow again at the beginnning of the next outer reduce loop.
2138          */
2139         NBF_REDUCE_OUTERSIZE(bufferdata) = (NAD_SHAPE(reduce_outeraxisdata)-
2140                                             NAD_INDEX(reduce_outeraxisdata));
2141         full_transfersize = NBF_REDUCE_OUTERSIZE(bufferdata)*reduce_innersize;
2142         /* If the full transfer size doesn't fit in the buffer, truncate it */
2143         if (full_transfersize > NBF_BUFFERSIZE(bufferdata)) {
2144             NBF_REDUCE_OUTERSIZE(bufferdata) = transfersize/reduce_innersize;
2145             transfersize = NBF_REDUCE_OUTERSIZE(bufferdata)*reduce_innersize;
2146         }
2147         else {
2148             transfersize = full_transfersize;
2149         }
2150         if (prev_reduce_outersize < NBF_REDUCE_OUTERSIZE(bufferdata)) {
2151             /*
2152              * If the previous time around less data was copied it may not
2153              * be safe to reuse the buffers even if the pointers match.
2154              */
2155             reuse_reduce_loops = 0;
2156         }
2157         NBF_BUFITEREND(bufferdata) = iterindex + reduce_innersize;
2158 
2159         NPY_IT_DBG_PRINT3("Reused reduce transfersize: %d innersize: %d "
2160                         "itersize: %d\n",
2161                             (int)transfersize,
2162                             (int)reduce_innersize,
2163                             (int)NpyIter_GetIterSize(iter));
2164         NPY_IT_DBG_PRINT1("Reduced reduce outersize: %d",
2165                             (int)NBF_REDUCE_OUTERSIZE(bufferdata));
2166     }
2167     /*
2168      * If there are any reduction operands, we may have to make
2169      * the size smaller so we don't copy the same value into
2170      * a buffer twice, as the buffering does not have a mechanism
2171      * to combine values itself.
2172      */
2173     else if (itflags&NPY_ITFLAG_REDUCE) {
2174         NPY_IT_DBG_PRINT("Iterator: Calculating reduce loops\n");
2175         transfersize = npyiter_checkreducesize(iter, transfersize,
2176                                                 &reduce_innersize,
2177                                                 &reduce_outerdim);
2178         NPY_IT_DBG_PRINT3("Reduce transfersize: %d innersize: %d "
2179                         "itersize: %d\n",
2180                             (int)transfersize,
2181                             (int)reduce_innersize,
2182                             (int)NpyIter_GetIterSize(iter));
2183 
2184         reduce_outerstrides = NBF_REDUCE_OUTERSTRIDES(bufferdata);
2185         reduce_outerptrs = NBF_REDUCE_OUTERPTRS(bufferdata);
2186         reduce_outeraxisdata = NIT_INDEX_AXISDATA(axisdata, reduce_outerdim);
2187         NBF_SIZE(bufferdata) = reduce_innersize;
2188         NBF_REDUCE_POS(bufferdata) = 0;
2189         NBF_REDUCE_OUTERDIM(bufferdata) = reduce_outerdim;
2190         NBF_BUFITEREND(bufferdata) = iterindex + reduce_innersize;
2191         if (reduce_innersize == 0) {
2192             NBF_REDUCE_OUTERSIZE(bufferdata) = 0;
2193             return 0;
2194         }
2195         else {
2196             NBF_REDUCE_OUTERSIZE(bufferdata) = transfersize/reduce_innersize;
2197         }
2198     }
2199     else {
2200         NBF_SIZE(bufferdata) = transfersize;
2201         NBF_BUFITEREND(bufferdata) = iterindex + transfersize;
2202     }
2203 
2204     /* Calculate the maximum size if using a single stride and no buffers */
2205     singlestridesize = NAD_SHAPE(axisdata)-NAD_INDEX(axisdata);
2206     if (singlestridesize > iterend - iterindex) {
2207         singlestridesize = iterend - iterindex;
2208     }
2209     if (singlestridesize >= transfersize) {
2210         is_onestride = 1;
2211     }
2212 
2213     for (iop = 0; iop < nop; ++iop) {
2214         /*
2215          * If the buffer is write-only, these two are NULL, and the buffer
2216          * pointers will be set up but the read copy won't be done
2217          */
2218         stransfer = NBF_READTRANSFERFN(bufferdata)[iop];
2219         transferdata = NBF_READTRANSFERDATA(bufferdata)[iop];
2220         switch (op_itflags[iop]&
2221                         (NPY_OP_ITFLAG_BUFNEVER|
2222                          NPY_OP_ITFLAG_CAST|
2223                          NPY_OP_ITFLAG_REDUCE)) {
2224             /* Never need to buffer this operand */
2225             case NPY_OP_ITFLAG_BUFNEVER:
2226                 ptrs[iop] = ad_ptrs[iop];
2227                 if (itflags&NPY_ITFLAG_REDUCE) {
2228                     reduce_outerstrides[iop] = reduce_innersize *
2229                                                  strides[iop];
2230                     reduce_outerptrs[iop] = ptrs[iop];
2231                 }
2232                 /*
2233                  * Should not adjust the stride - ad_strides[iop]
2234                  * could be zero, but strides[iop] was initialized
2235                  * to the first non-trivial stride.
2236                  */
2237                 stransfer = NULL;
2238                 /* The flag NPY_OP_ITFLAG_USINGBUFFER can be ignored here */
2239                 break;
2240             /* Never need to buffer this operand */
2241             case NPY_OP_ITFLAG_BUFNEVER|NPY_OP_ITFLAG_REDUCE:
2242                 ptrs[iop] = ad_ptrs[iop];
2243                 reduce_outerptrs[iop] = ptrs[iop];
2244                 reduce_outerstrides[iop] = 0;
2245                 /*
2246                  * Should not adjust the stride - ad_strides[iop]
2247                  * could be zero, but strides[iop] was initialized
2248                  * to the first non-trivial stride.
2249                  */
2250                 stransfer = NULL;
2251                 /* The flag NPY_OP_ITFLAG_USINGBUFFER can be ignored here */
2252                 break;
2253             /* Just a copy */
2254             case 0:
2255                 /* Do not reuse buffer if it did not exist */
2256                 if (!(op_itflags[iop] & NPY_OP_ITFLAG_USINGBUFFER) &&
2257                                                 (prev_dataptrs != NULL)) {
2258                     prev_dataptrs[iop] = NULL;
2259                 }
2260                 /*
2261                  * No copyswap or cast was requested, so all we're
2262                  * doing is copying the data to fill the buffer and
2263                  * produce a single stride.  If the underlying data
2264                  * already does that, no need to copy it.
2265                  */
2266                 if (is_onestride) {
2267                     ptrs[iop] = ad_ptrs[iop];
2268                     strides[iop] = ad_strides[iop];
2269                     stransfer = NULL;
2270                     /* Signal that the buffer is not being used */
2271                     op_itflags[iop] &= (~NPY_OP_ITFLAG_USINGBUFFER);
2272                 }
2273                 /* If some other op is reduced, we have a double reduce loop */
2274                 else if ((itflags&NPY_ITFLAG_REDUCE) &&
2275                                 (reduce_outerdim == 1) &&
2276                                 (transfersize/reduce_innersize <=
2277                                             NAD_SHAPE(reduce_outeraxisdata) -
2278                                             NAD_INDEX(reduce_outeraxisdata))) {
2279                     ptrs[iop] = ad_ptrs[iop];
2280                     reduce_outerptrs[iop] = ptrs[iop];
2281                     strides[iop] = ad_strides[iop];
2282                     reduce_outerstrides[iop] =
2283                                     NAD_STRIDES(reduce_outeraxisdata)[iop];
2284                     stransfer = NULL;
2285                     /* Signal that the buffer is not being used */
2286                     op_itflags[iop] &= (~NPY_OP_ITFLAG_USINGBUFFER);
2287                 }
2288                 else {
2289                     /* In this case, the buffer is being used */
2290                     ptrs[iop] = buffers[iop];
2291                     strides[iop] = dtypes[iop]->elsize;
2292                     if (itflags&NPY_ITFLAG_REDUCE) {
2293                         reduce_outerstrides[iop] = reduce_innersize *
2294                                                      strides[iop];
2295                         reduce_outerptrs[iop] = ptrs[iop];
2296                     }
2297                     /* Signal that the buffer is being used */
2298                     op_itflags[iop] |= NPY_OP_ITFLAG_USINGBUFFER;
2299                 }
2300                 break;
2301             /* Just a copy, but with a reduction */
2302             case NPY_OP_ITFLAG_REDUCE:
2303                 /* Do not reuse buffer if it did not exist */
2304                 if (!(op_itflags[iop] & NPY_OP_ITFLAG_USINGBUFFER) &&
2305                                                 (prev_dataptrs != NULL)) {
2306                     prev_dataptrs[iop] = NULL;
2307                 }
2308                 if (ad_strides[iop] == 0) {
2309                     strides[iop] = 0;
2310                     /* It's all in one stride in the inner loop dimension */
2311                     if (is_onestride) {
2312                         NPY_IT_DBG_PRINT1("reduce op %d all one stride\n", (int)iop);
2313                         ptrs[iop] = ad_ptrs[iop];
2314                         reduce_outerstrides[iop] = 0;
2315                         stransfer = NULL;
2316                         /* Signal that the buffer is not being used */
2317                         op_itflags[iop] &= (~NPY_OP_ITFLAG_USINGBUFFER);
2318                     }
2319                     /* It's all in one stride in the reduce outer loop */
2320                     else if ((reduce_outerdim > 0) &&
2321                                     (transfersize/reduce_innersize <=
2322                                             NAD_SHAPE(reduce_outeraxisdata) -
2323                                             NAD_INDEX(reduce_outeraxisdata))) {
2324                         NPY_IT_DBG_PRINT1("reduce op %d all one outer stride\n",
2325                                                             (int)iop);
2326                         ptrs[iop] = ad_ptrs[iop];
2327                         /* Outer reduce loop advances by one item */
2328                         reduce_outerstrides[iop] =
2329                                 NAD_STRIDES(reduce_outeraxisdata)[iop];
2330                         stransfer = NULL;
2331                         /* Signal that the buffer is not being used */
2332                         op_itflags[iop] &= (~NPY_OP_ITFLAG_USINGBUFFER);
2333                     }
2334                     /* In this case, the buffer is being used */
2335                     else {
2336                         NPY_IT_DBG_PRINT1("reduce op %d must buffer\n", (int)iop);
2337                         ptrs[iop] = buffers[iop];
2338                         /* Both outer and inner reduce loops have stride 0 */
2339                         if (NAD_STRIDES(reduce_outeraxisdata)[iop] == 0) {
2340                             reduce_outerstrides[iop] = 0;
2341                         }
2342                         /* Outer reduce loop advances by one item */
2343                         else {
2344                             reduce_outerstrides[iop] = dtypes[iop]->elsize;
2345                         }
2346                         /* Signal that the buffer is being used */
2347                         op_itflags[iop] |= NPY_OP_ITFLAG_USINGBUFFER;
2348                     }
2349 
2350                 }
2351                 else if (is_onestride) {
2352                     NPY_IT_DBG_PRINT1("reduce op %d all one stride in dim 0\n", (int)iop);
2353                     ptrs[iop] = ad_ptrs[iop];
2354                     strides[iop] = ad_strides[iop];
2355                     reduce_outerstrides[iop] = 0;
2356                     stransfer = NULL;
2357                     /* Signal that the buffer is not being used */
2358                     op_itflags[iop] &= (~NPY_OP_ITFLAG_USINGBUFFER);
2359                 }
2360                 else {
2361                     /* It's all in one stride in the reduce outer loop */
2362                     if ((reduce_outerdim == 1) &&
2363                                     (transfersize/reduce_innersize <=
2364                                             NAD_SHAPE(reduce_outeraxisdata) -
2365                                             NAD_INDEX(reduce_outeraxisdata))) {
2366                         ptrs[iop] = ad_ptrs[iop];
2367                         strides[iop] = ad_strides[iop];
2368                         /* Outer reduce loop advances by one item */
2369                         reduce_outerstrides[iop] =
2370                                 NAD_STRIDES(reduce_outeraxisdata)[iop];
2371                         stransfer = NULL;
2372                         /* Signal that the buffer is not being used */
2373                         op_itflags[iop] &= (~NPY_OP_ITFLAG_USINGBUFFER);
2374                     }
2375                     /* In this case, the buffer is being used */
2376                     else {
2377                         ptrs[iop] = buffers[iop];
2378                         strides[iop] = dtypes[iop]->elsize;
2379 
2380                         if (NAD_STRIDES(reduce_outeraxisdata)[iop] == 0) {
2381                             /* Reduction in outer reduce loop */
2382                             reduce_outerstrides[iop] = 0;
2383                         }
2384                         else {
2385                             /* Advance to next items in outer reduce loop */
2386                             reduce_outerstrides[iop] = reduce_innersize *
2387                                                          dtypes[iop]->elsize;
2388                         }
2389                         /* Signal that the buffer is being used */
2390                         op_itflags[iop] |= NPY_OP_ITFLAG_USINGBUFFER;
2391                     }
2392                 }
2393                 reduce_outerptrs[iop] = ptrs[iop];
2394                 break;
2395             default:
2396                 /* In this case, the buffer is always being used */
2397                 any_buffered = 1;
2398 
2399                 /* Signal that the buffer is being used */
2400                 op_itflags[iop] |= NPY_OP_ITFLAG_USINGBUFFER;
2401 
2402                 if (!(op_itflags[iop]&NPY_OP_ITFLAG_REDUCE)) {
2403                     ptrs[iop] = buffers[iop];
2404                     strides[iop] = dtypes[iop]->elsize;
2405                     if (itflags&NPY_ITFLAG_REDUCE) {
2406                         reduce_outerstrides[iop] = reduce_innersize *
2407                                                      strides[iop];
2408                         reduce_outerptrs[iop] = ptrs[iop];
2409                     }
2410                 }
2411                 /* The buffer is being used with reduction */
2412                 else {
2413                     ptrs[iop] = buffers[iop];
2414                     if (ad_strides[iop] == 0) {
2415                         NPY_IT_DBG_PRINT1("cast op %d has innermost stride 0\n", (int)iop);
2416                         strides[iop] = 0;
2417                         /* Both outer and inner reduce loops have stride 0 */
2418                         if (NAD_STRIDES(reduce_outeraxisdata)[iop] == 0) {
2419                             NPY_IT_DBG_PRINT1("cast op %d has outermost stride 0\n", (int)iop);
2420                             reduce_outerstrides[iop] = 0;
2421                         }
2422                         /* Outer reduce loop advances by one item */
2423                         else {
2424                             NPY_IT_DBG_PRINT1("cast op %d has outermost stride !=0\n", (int)iop);
2425                             reduce_outerstrides[iop] = dtypes[iop]->elsize;
2426                         }
2427                     }
2428                     else {
2429                         NPY_IT_DBG_PRINT1("cast op %d has innermost stride !=0\n", (int)iop);
2430                         strides[iop] = dtypes[iop]->elsize;
2431 
2432                         if (NAD_STRIDES(reduce_outeraxisdata)[iop] == 0) {
2433                             NPY_IT_DBG_PRINT1("cast op %d has outermost stride 0\n", (int)iop);
2434                             /* Reduction in outer reduce loop */
2435                             reduce_outerstrides[iop] = 0;
2436                         }
2437                         else {
2438                             NPY_IT_DBG_PRINT1("cast op %d has outermost stride !=0\n", (int)iop);
2439                             /* Advance to next items in outer reduce loop */
2440                             reduce_outerstrides[iop] = reduce_innersize *
2441                                                          dtypes[iop]->elsize;
2442                         }
2443                     }
2444                     reduce_outerptrs[iop] = ptrs[iop];
2445                 }
2446                 break;
2447         }
2448 
2449         if (stransfer != NULL) {
2450             npy_intp src_itemsize;
2451             npy_intp op_transfersize;
2452 
2453             npy_intp dst_stride, *src_strides, *src_coords, *src_shape;
2454             int ndim_transfer;
2455 
2456             npy_bool skip_transfer = 0;
2457 
2458             src_itemsize = PyArray_DTYPE(operands[iop])->elsize;
2459 
2460             /* If stransfer wasn't set to NULL, buffering is required */
2461             any_buffered = 1;
2462 
2463             /*
2464              * If this operand is being reduced in the inner loop,
2465              * set its buffering stride to zero, and just copy
2466              * one element.
2467              */
2468             if (op_itflags[iop]&NPY_OP_ITFLAG_REDUCE) {
2469                 if (ad_strides[iop] == 0) {
2470                     strides[iop] = 0;
2471                     if (reduce_outerstrides[iop] == 0) {
2472                         op_transfersize = 1;
2473                         dst_stride = 0;
2474                         src_strides = &dst_stride;
2475                         src_coords = &NAD_INDEX(reduce_outeraxisdata);
2476                         src_shape = &NAD_SHAPE(reduce_outeraxisdata);
2477                         ndim_transfer = 1;
2478 
2479                         /*
2480                          * When we're reducing a single element, and
2481                          * it's still the same element, don't overwrite
2482                          * it even when reuse reduce loops is unset.
2483                          * This preserves the precision of the
2484                          * intermediate calculation.
2485                          */
2486                         if (prev_dataptrs &&
2487                                     prev_dataptrs[iop] == ad_ptrs[iop]) {
2488                             NPY_IT_DBG_PRINT1("Iterator: skipping operand %d"
2489                                     " copy because it's a 1-element reduce\n",
2490                                     (int)iop);
2491 
2492                             skip_transfer = 1;
2493                         }
2494                     }
2495                     else {
2496                         op_transfersize = NBF_REDUCE_OUTERSIZE(bufferdata);
2497                         dst_stride = reduce_outerstrides[iop];
2498                         src_strides = &NAD_STRIDES(reduce_outeraxisdata)[iop];
2499                         src_coords = &NAD_INDEX(reduce_outeraxisdata);
2500                         src_shape = &NAD_SHAPE(reduce_outeraxisdata);
2501                         ndim_transfer = ndim - reduce_outerdim;
2502                     }
2503                 }
2504                 else {
2505                     if (reduce_outerstrides[iop] == 0) {
2506                         op_transfersize = NBF_SIZE(bufferdata);
2507                         dst_stride = strides[iop];
2508                         src_strides = &ad_strides[iop];
2509                         src_coords = &NAD_INDEX(axisdata);
2510                         src_shape = &NAD_SHAPE(axisdata);
2511                         ndim_transfer = reduce_outerdim ? reduce_outerdim : 1;
2512                     }
2513                     else {
2514                         op_transfersize = transfersize;
2515                         dst_stride = strides[iop];
2516                         src_strides = &ad_strides[iop];
2517                         src_coords = &NAD_INDEX(axisdata);
2518                         src_shape = &NAD_SHAPE(axisdata);
2519                         ndim_transfer = ndim;
2520                     }
2521                 }
2522             }
2523             else {
2524                 op_transfersize = transfersize;
2525                 dst_stride = strides[iop];
2526                 src_strides = &ad_strides[iop];
2527                 src_coords = &NAD_INDEX(axisdata);
2528                 src_shape = &NAD_SHAPE(axisdata);
2529                 ndim_transfer = ndim;
2530             }
2531 
2532             /*
2533              * If the whole buffered loop structure remains the same,
2534              * and the source pointer for this data didn't change,
2535              * we don't have to copy the data again.
2536              */
2537             if (reuse_reduce_loops && prev_dataptrs[iop] == ad_ptrs[iop]) {
2538                 NPY_IT_DBG_PRINT2("Iterator: skipping operands %d "
2539                         "copy (%d items) because loops are reused and the data "
2540                         "pointer didn't change\n",
2541                         (int)iop, (int)op_transfersize);
2542                 skip_transfer = 1;
2543             }
2544 
2545             /*
2546              * Copy data to the buffers if necessary.
2547              *
2548              * We always copy if the operand has references. In that case
2549              * a "write" function must be in use that either copies or clears
2550              * the buffer.
2551              * This write from buffer call does not check for skip-transfer
2552              * so we have to assume the buffer is cleared.  For dtypes that
2553              * do not have references, we can assume that the write function
2554              * will leave the source (buffer) unmodified.
2555              */
2556             if (!skip_transfer || PyDataType_REFCHK(dtypes[iop])) {
2557                 NPY_IT_DBG_PRINT2("Iterator: Copying operand %d to "
2558                                 "buffer (%d items)\n",
2559                                 (int)iop, (int)op_transfersize);
2560 
2561                 if (PyArray_TransferNDimToStrided(
2562                                 ndim_transfer, ptrs[iop], dst_stride,
2563                                 ad_ptrs[iop], src_strides, axisdata_incr,
2564                                 src_coords, axisdata_incr,
2565                                 src_shape, axisdata_incr,
2566                                 op_transfersize, src_itemsize,
2567                                 stransfer, transferdata) < 0) {
2568                     return -1;
2569                 }
2570             }
2571         }
2572     }
2573 
2574     /*
2575      * If buffering wasn't needed, we can grow the inner
2576      * loop to as large as possible.
2577      *
2578      * TODO: Could grow REDUCE loop too with some more logic above.
2579      */
2580     if (!any_buffered && (itflags&NPY_ITFLAG_GROWINNER) &&
2581                         !(itflags&NPY_ITFLAG_REDUCE)) {
2582         if (singlestridesize > transfersize) {
2583             NPY_IT_DBG_PRINT2("Iterator: Expanding inner loop size "
2584                     "from %d to %d since buffering wasn't needed\n",
2585                     (int)NBF_SIZE(bufferdata), (int)singlestridesize);
2586             NBF_SIZE(bufferdata) = singlestridesize;
2587             NBF_BUFITEREND(bufferdata) = iterindex + singlestridesize;
2588         }
2589     }
2590 
2591     NPY_IT_DBG_PRINT1("Any buffering needed: %d\n", any_buffered);
2592 
2593     NPY_IT_DBG_PRINT1("Iterator: Finished copying inputs to buffers "
2594                         "(buffered size is %d)\n", (int)NBF_SIZE(bufferdata));
2595     return 0;
2596 }
2597 
2598 
2599 /**
2600  * This function clears any references still held by the buffers and should
2601  * only be used to discard buffers if an error occurred.
2602  *
2603  * @param iter Iterator
2604  */
2605 NPY_NO_EXPORT void
npyiter_clear_buffers(NpyIter * iter)2606 npyiter_clear_buffers(NpyIter *iter)
2607 {
2608     int nop = iter->nop;
2609     NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter);
2610 
2611     if (NBF_SIZE(bufferdata) == 0) {
2612         /* if the buffers are empty already, there is nothing to do */
2613         return;
2614     }
2615 
2616     if (!(NIT_ITFLAGS(iter) & NPY_ITFLAG_NEEDSAPI)) {
2617         /* Buffers do not require clearing, but should not be copied back */
2618         NBF_SIZE(bufferdata) = 0;
2619         return;
2620     }
2621 
2622     /*
2623      * The iterator may be using a dtype with references, which always
2624      * requires the API. In that case, further cleanup may be necessary.
2625      *
2626      * TODO: At this time, we assume that a dtype having references
2627      *       implies the need to hold the GIL at all times. In theory
2628      *       we could broaden this definition for a new
2629      *       `PyArray_Item_XDECREF` API and the assumption may become
2630      *       incorrect.
2631      */
2632     PyObject *type, *value, *traceback;
2633     PyErr_Fetch(&type,  &value, &traceback);
2634 
2635     /* Cleanup any buffers with references */
2636     char **buffers = NBF_BUFFERS(bufferdata);
2637     PyArray_Descr **dtypes = NIT_DTYPES(iter);
2638     npyiter_opitflags *op_itflags = NIT_OPITFLAGS(iter);
2639     for (int iop = 0; iop < nop; ++iop, ++buffers) {
2640         /*
2641          * We may want to find a better way to do this, on the other hand,
2642          * this cleanup seems rare and fairly special.  A dtype using
2643          * references (right now only us) must always keep the buffer in
2644          * a well defined state (either NULL or owning the reference).
2645          * Only we implement cleanup
2646          */
2647         if (!PyDataType_REFCHK(dtypes[iop]) ||
2648                 !(op_itflags[iop]&NPY_OP_ITFLAG_USINGBUFFER)) {
2649             continue;
2650         }
2651         if (*buffers == 0) {
2652             continue;
2653         }
2654         int itemsize = dtypes[iop]->elsize;
2655         for (npy_intp i = 0; i < NBF_SIZE(bufferdata); i++) {
2656             /*
2657              * See above comment, if this API is expanded the GIL assumption
2658              * could become incorrect.
2659              */
2660             PyArray_Item_XDECREF(*buffers + (itemsize * i), dtypes[iop]);
2661         }
2662         /* Clear out the buffer just to be sure */
2663         memset(*buffers, 0, NBF_SIZE(bufferdata) * itemsize);
2664     }
2665     /* Signal that the buffers are empty */
2666     NBF_SIZE(bufferdata) = 0;
2667     PyErr_Restore(type, value, traceback);
2668 }
2669 
2670 
2671 /*
2672  * This checks how much space can be buffered without encountering the
2673  * same value twice, or for operands whose innermost stride is zero,
2674  * without encountering a different value.  By reducing the buffered
2675  * amount to this size, reductions can be safely buffered.
2676  *
2677  * Reductions are buffered with two levels of looping, to avoid
2678  * frequent copying to the buffers.  The return value is the over-all
2679  * buffer size, and when the flag NPY_ITFLAG_REDUCE is set, reduce_innersize
2680  * receives the size of the inner of the two levels of looping.
2681  *
2682  * The value placed in reduce_outerdim is the index into the AXISDATA
2683  * for where the second level of the double loop begins.
2684  *
2685  * The return value is always a multiple of the value placed in
2686  * reduce_innersize.
2687  */
2688 static npy_intp
npyiter_checkreducesize(NpyIter * iter,npy_intp count,npy_intp * reduce_innersize,npy_intp * reduce_outerdim)2689 npyiter_checkreducesize(NpyIter *iter, npy_intp count,
2690                                 npy_intp *reduce_innersize,
2691                                 npy_intp *reduce_outerdim)
2692 {
2693     npy_uint32 itflags = NIT_ITFLAGS(iter);
2694     int idim, ndim = NIT_NDIM(iter);
2695     int iop, nop = NIT_NOP(iter);
2696 
2697     NpyIter_AxisData *axisdata;
2698     npy_intp sizeof_axisdata;
2699     npy_intp coord, shape, *strides;
2700     npy_intp reducespace = 1, factor;
2701     npy_bool nonzerocoord;
2702 
2703     npyiter_opitflags *op_itflags = NIT_OPITFLAGS(iter);
2704     char stride0op[NPY_MAXARGS];
2705 
2706     /* Default to no outer axis */
2707     *reduce_outerdim = 0;
2708 
2709     /* If there's only one dimension, no need to calculate anything */
2710     if (ndim == 1 || count == 0) {
2711         *reduce_innersize = count;
2712         return count;
2713     }
2714 
2715     sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
2716     axisdata = NIT_AXISDATA(iter);
2717 
2718     /* Indicate which REDUCE operands have stride 0 in the inner loop */
2719     strides = NAD_STRIDES(axisdata);
2720     for (iop = 0; iop < nop; ++iop) {
2721         stride0op[iop] = (op_itflags[iop]&NPY_OP_ITFLAG_REDUCE) &&
2722                            (strides[iop] == 0);
2723         NPY_IT_DBG_PRINT2("Iterator: Operand %d has stride 0 in "
2724                         "the inner loop? %d\n", iop, (int)stride0op[iop]);
2725     }
2726     shape = NAD_SHAPE(axisdata);
2727     coord = NAD_INDEX(axisdata);
2728     reducespace += (shape-coord-1);
2729     factor = shape;
2730     NIT_ADVANCE_AXISDATA(axisdata, 1);
2731 
2732     /* Initialize nonzerocoord based on the first coordinate */
2733     nonzerocoord = (coord != 0);
2734 
2735     /* Go forward through axisdata, calculating the space available */
2736     for (idim = 1; idim < ndim && reducespace < count;
2737                                 ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
2738         NPY_IT_DBG_PRINT2("Iterator: inner loop reducespace %d, count %d\n",
2739                                 (int)reducespace, (int)count);
2740 
2741         strides = NAD_STRIDES(axisdata);
2742         for (iop = 0; iop < nop; ++iop) {
2743             /*
2744              * If a reduce stride switched from zero to non-zero, or
2745              * vice versa, that's the point where the data will stop
2746              * being the same element or will repeat, and if the
2747              * buffer starts with an all zero multi-index up to this
2748              * point, gives us the reduce_innersize.
2749              */
2750             if((stride0op[iop] && (strides[iop] != 0)) ||
2751                         (!stride0op[iop] &&
2752                          (strides[iop] == 0) &&
2753                          (op_itflags[iop]&NPY_OP_ITFLAG_REDUCE))) {
2754                 NPY_IT_DBG_PRINT1("Iterator: Reduce operation limits "
2755                                     "buffer to %d\n", (int)reducespace);
2756                 /*
2757                  * If we already found more elements than count, or
2758                  * the starting coordinate wasn't zero, the two-level
2759                  * looping is unnecessary/can't be done, so return.
2760                  */
2761                 if (count <= reducespace) {
2762                     *reduce_innersize = count;
2763                     NIT_ITFLAGS(iter) |= NPY_ITFLAG_REUSE_REDUCE_LOOPS;
2764                     return count;
2765                 }
2766                 else if (nonzerocoord) {
2767                     if (reducespace < count) {
2768                         count = reducespace;
2769                     }
2770                     *reduce_innersize = count;
2771                     /* NOTE: This is similar to the (coord != 0) case below. */
2772                     NIT_ITFLAGS(iter) &= ~NPY_ITFLAG_REUSE_REDUCE_LOOPS;
2773                     return count;
2774                 }
2775                 else {
2776                     *reduce_innersize = reducespace;
2777                     break;
2778                 }
2779             }
2780         }
2781         /* If we broke out of the loop early, we found reduce_innersize */
2782         if (iop != nop) {
2783             NPY_IT_DBG_PRINT2("Iterator: Found first dim not "
2784                             "reduce (%d of %d)\n", iop, nop);
2785             break;
2786         }
2787 
2788         shape = NAD_SHAPE(axisdata);
2789         coord = NAD_INDEX(axisdata);
2790         if (coord != 0) {
2791             nonzerocoord = 1;
2792         }
2793         reducespace += (shape-coord-1) * factor;
2794         factor *= shape;
2795     }
2796 
2797     /*
2798      * If there was any non-zero coordinate, the reduction inner
2799      * loop doesn't fit in the buffersize, or the reduction inner loop
2800      * covered the entire iteration size, can't do the double loop.
2801      */
2802     if (nonzerocoord || count < reducespace || idim == ndim) {
2803         if (reducespace < count) {
2804             count = reducespace;
2805         }
2806         *reduce_innersize = count;
2807         /* In this case, we can't reuse the reduce loops */
2808         NIT_ITFLAGS(iter) &= ~NPY_ITFLAG_REUSE_REDUCE_LOOPS;
2809         return count;
2810     }
2811 
2812     coord = NAD_INDEX(axisdata);
2813     if (coord != 0) {
2814         /*
2815          * In this case, it is only safe to reuse the buffer if the amount
2816          * of data copied is not more then the current axes, as is the
2817          * case when reuse_reduce_loops was active already.
2818          * It should be in principle OK when the idim loop returns immidiatly.
2819          */
2820         NIT_ITFLAGS(iter) &= ~NPY_ITFLAG_REUSE_REDUCE_LOOPS;
2821     }
2822     else {
2823         /* In this case, we can reuse the reduce loops */
2824         NIT_ITFLAGS(iter) |= NPY_ITFLAG_REUSE_REDUCE_LOOPS;
2825     }
2826 
2827     *reduce_innersize = reducespace;
2828     count /= reducespace;
2829 
2830     NPY_IT_DBG_PRINT2("Iterator: reduce_innersize %d count /ed %d\n",
2831                     (int)reducespace, (int)count);
2832 
2833     /*
2834      * Continue through the rest of the dimensions.  If there are
2835      * two separated reduction axes, we may have to cut the buffer
2836      * short again.
2837      */
2838     *reduce_outerdim = idim;
2839     reducespace = 1;
2840     factor = 1;
2841     /* Indicate which REDUCE operands have stride 0 at the current level */
2842     strides = NAD_STRIDES(axisdata);
2843     for (iop = 0; iop < nop; ++iop) {
2844         stride0op[iop] = (op_itflags[iop]&NPY_OP_ITFLAG_REDUCE) &&
2845                            (strides[iop] == 0);
2846         NPY_IT_DBG_PRINT2("Iterator: Operand %d has stride 0 in "
2847                         "the outer loop? %d\n", iop, (int)stride0op[iop]);
2848     }
2849     shape = NAD_SHAPE(axisdata);
2850     reducespace += (shape-coord-1) * factor;
2851     factor *= shape;
2852     NIT_ADVANCE_AXISDATA(axisdata, 1);
2853     ++idim;
2854 
2855     for (; idim < ndim && reducespace < count;
2856                                 ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
2857         NPY_IT_DBG_PRINT2("Iterator: outer loop reducespace %d, count %d\n",
2858                                 (int)reducespace, (int)count);
2859         strides = NAD_STRIDES(axisdata);
2860         for (iop = 0; iop < nop; ++iop) {
2861             /*
2862              * If a reduce stride switched from zero to non-zero, or
2863              * vice versa, that's the point where the data will stop
2864              * being the same element or will repeat, and if the
2865              * buffer starts with an all zero multi-index up to this
2866              * point, gives us the reduce_innersize.
2867              */
2868             if((stride0op[iop] && (strides[iop] != 0)) ||
2869                         (!stride0op[iop] &&
2870                          (strides[iop] == 0) &&
2871                          (op_itflags[iop]&NPY_OP_ITFLAG_REDUCE))) {
2872                 NPY_IT_DBG_PRINT1("Iterator: Reduce operation limits "
2873                                     "buffer to %d\n", (int)reducespace);
2874                 /*
2875                  * This terminates the outer level of our double loop.
2876                  */
2877                 if (count <= reducespace) {
2878                     return count * (*reduce_innersize);
2879                 }
2880                 else {
2881                     return reducespace * (*reduce_innersize);
2882                 }
2883             }
2884         }
2885 
2886         shape = NAD_SHAPE(axisdata);
2887         coord = NAD_INDEX(axisdata);
2888         if (coord != 0) {
2889             nonzerocoord = 1;
2890         }
2891         reducespace += (shape-coord-1) * factor;
2892         factor *= shape;
2893     }
2894 
2895     if (reducespace < count) {
2896         count = reducespace;
2897     }
2898     return count * (*reduce_innersize);
2899 }
2900 
2901 NPY_NO_EXPORT npy_bool
npyiter_has_writeback(NpyIter * iter)2902 npyiter_has_writeback(NpyIter *iter)
2903 {
2904     int iop, nop;
2905     npyiter_opitflags *op_itflags;
2906     if (iter == NULL) {
2907         return 0;
2908     }
2909     nop = NIT_NOP(iter);
2910     op_itflags = NIT_OPITFLAGS(iter);
2911 
2912     for (iop=0; iop<nop; iop++) {
2913         if (op_itflags[iop] & NPY_OP_ITFLAG_HAS_WRITEBACK) {
2914             return NPY_TRUE;
2915         }
2916     }
2917     return NPY_FALSE;
2918 }
2919 #undef NPY_ITERATOR_IMPLEMENTATION_CODE
2920