1 /*
2 * This file implements most of the main API functions of NumPy's nditer.
3 * This excludes functions specialized using the templating system.
4 *
5 * Copyright (c) 2010-2011 by Mark Wiebe (mwwiebe@gmail.com)
6 * The University of British Columbia
7 *
8 * Copyright (c) 2011 Enthought, Inc
9 *
10 * See LICENSE.txt for the license.
11 */
12 #define NPY_NO_DEPRECATED_API NPY_API_VERSION
13
14 /* Indicate that this .c file is allowed to include the header */
15 #define NPY_ITERATOR_IMPLEMENTATION_CODE
16 #include "nditer_impl.h"
17 #include "templ_common.h"
18 #include "ctors.h"
19
20 /* Internal helper functions private to this file */
21 static npy_intp
22 npyiter_checkreducesize(NpyIter *iter, npy_intp count,
23 npy_intp *reduce_innersize,
24 npy_intp *reduce_outerdim);
25
26 /*NUMPY_API
27 * Removes an axis from iteration. This requires that NPY_ITER_MULTI_INDEX
28 * was set for iterator creation, and does not work if buffering is
29 * enabled. This function also resets the iterator to its initial state.
30 *
31 * Returns NPY_SUCCEED or NPY_FAIL.
32 */
33 NPY_NO_EXPORT int
NpyIter_RemoveAxis(NpyIter * iter,int axis)34 NpyIter_RemoveAxis(NpyIter *iter, int axis)
35 {
36 npy_uint32 itflags = NIT_ITFLAGS(iter);
37 int idim, ndim = NIT_NDIM(iter);
38 int iop, nop = NIT_NOP(iter);
39
40 int xdim = 0;
41 npy_int8 *perm = NIT_PERM(iter);
42 NpyIter_AxisData *axisdata_del = NIT_AXISDATA(iter), *axisdata;
43 npy_intp sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
44
45 npy_intp *baseoffsets = NIT_BASEOFFSETS(iter);
46 char **resetdataptr = NIT_RESETDATAPTR(iter);
47
48 if (!(itflags&NPY_ITFLAG_HASMULTIINDEX)) {
49 PyErr_SetString(PyExc_RuntimeError,
50 "Iterator RemoveAxis may only be called "
51 "if a multi-index is being tracked");
52 return NPY_FAIL;
53 }
54 else if (itflags&NPY_ITFLAG_HASINDEX) {
55 PyErr_SetString(PyExc_RuntimeError,
56 "Iterator RemoveAxis may not be called on "
57 "an index is being tracked");
58 return NPY_FAIL;
59 }
60 else if (itflags&NPY_ITFLAG_BUFFER) {
61 PyErr_SetString(PyExc_RuntimeError,
62 "Iterator RemoveAxis may not be called on "
63 "a buffered iterator");
64 return NPY_FAIL;
65 }
66 else if (axis < 0 || axis >= ndim) {
67 PyErr_SetString(PyExc_ValueError,
68 "axis out of bounds in iterator RemoveAxis");
69 return NPY_FAIL;
70 }
71
72 /* Reverse axis, since the iterator treats them that way */
73 axis = ndim - 1 - axis;
74
75 /* First find the axis in question */
76 for (idim = 0; idim < ndim; ++idim) {
77 /* If this is it, and it's iterated forward, done */
78 if (perm[idim] == axis) {
79 xdim = idim;
80 break;
81 }
82 /* If this is it, but it's iterated backward, must reverse the axis */
83 else if (-1 - perm[idim] == axis) {
84 npy_intp *strides = NAD_STRIDES(axisdata_del);
85 npy_intp shape = NAD_SHAPE(axisdata_del), offset;
86
87 xdim = idim;
88
89 /*
90 * Adjust baseoffsets and resetbaseptr back to the start of
91 * this axis.
92 */
93 for (iop = 0; iop < nop; ++iop) {
94 offset = (shape-1)*strides[iop];
95 baseoffsets[iop] += offset;
96 resetdataptr[iop] += offset;
97 }
98 break;
99 }
100
101 NIT_ADVANCE_AXISDATA(axisdata_del, 1);
102 }
103
104 if (idim == ndim) {
105 PyErr_SetString(PyExc_RuntimeError,
106 "internal error in iterator perm");
107 return NPY_FAIL;
108 }
109
110 /* Adjust the permutation */
111 for (idim = 0; idim < ndim-1; ++idim) {
112 npy_int8 p = (idim < xdim) ? perm[idim] : perm[idim+1];
113 if (p >= 0) {
114 if (p > axis) {
115 --p;
116 }
117 }
118 else if (p <= 0) {
119 if (p < -1-axis) {
120 ++p;
121 }
122 }
123 perm[idim] = p;
124 }
125
126 /* Shift all the axisdata structures by one */
127 axisdata = NIT_INDEX_AXISDATA(axisdata_del, 1);
128 memmove(axisdata_del, axisdata, (ndim-1-xdim)*sizeof_axisdata);
129
130 /* Adjust the iteration size and reset iterend */
131 NIT_ITERSIZE(iter) = 1;
132 axisdata = NIT_AXISDATA(iter);
133 for (idim = 0; idim < ndim-1; ++idim) {
134 if (npy_mul_with_overflow_intp(&NIT_ITERSIZE(iter),
135 NIT_ITERSIZE(iter), NAD_SHAPE(axisdata))) {
136 NIT_ITERSIZE(iter) = -1;
137 break;
138 }
139 NIT_ADVANCE_AXISDATA(axisdata, 1);
140 }
141 NIT_ITEREND(iter) = NIT_ITERSIZE(iter);
142
143 /* Shrink the iterator */
144 NIT_NDIM(iter) = ndim - 1;
145 /* If it is now 0-d fill the singleton dimension */
146 if (ndim == 1) {
147 npy_intp *strides = NAD_STRIDES(axisdata_del);
148 NAD_SHAPE(axisdata_del) = 1;
149 for (iop = 0; iop < nop; ++iop) {
150 strides[iop] = 0;
151 }
152 NIT_ITFLAGS(iter) |= NPY_ITFLAG_ONEITERATION;
153 }
154
155 return NpyIter_Reset(iter, NULL);
156 }
157
158 /*NUMPY_API
159 * Removes multi-index support from an iterator.
160 *
161 * Returns NPY_SUCCEED or NPY_FAIL.
162 */
163 NPY_NO_EXPORT int
NpyIter_RemoveMultiIndex(NpyIter * iter)164 NpyIter_RemoveMultiIndex(NpyIter *iter)
165 {
166 npy_uint32 itflags;
167
168 /* Make sure the iterator is reset */
169 if (NpyIter_Reset(iter, NULL) != NPY_SUCCEED) {
170 return NPY_FAIL;
171 }
172
173 itflags = NIT_ITFLAGS(iter);
174 if (itflags&NPY_ITFLAG_HASMULTIINDEX) {
175 if (NIT_ITERSIZE(iter) < 0) {
176 PyErr_SetString(PyExc_ValueError, "iterator is too large");
177 return NPY_FAIL;
178 }
179
180 NIT_ITFLAGS(iter) = itflags & ~NPY_ITFLAG_HASMULTIINDEX;
181 npyiter_coalesce_axes(iter);
182 }
183
184 return NPY_SUCCEED;
185 }
186
187 /*NUMPY_API
188 * Removes the inner loop handling (so HasExternalLoop returns true)
189 */
190 NPY_NO_EXPORT int
NpyIter_EnableExternalLoop(NpyIter * iter)191 NpyIter_EnableExternalLoop(NpyIter *iter)
192 {
193 npy_uint32 itflags = NIT_ITFLAGS(iter);
194 /*int ndim = NIT_NDIM(iter);*/
195 int nop = NIT_NOP(iter);
196
197 /* Check conditions under which this can be done */
198 if (itflags&(NPY_ITFLAG_HASINDEX|NPY_ITFLAG_HASMULTIINDEX)) {
199 PyErr_SetString(PyExc_ValueError,
200 "Iterator flag EXTERNAL_LOOP cannot be used "
201 "if an index or multi-index is being tracked");
202 return NPY_FAIL;
203 }
204 if ((itflags&(NPY_ITFLAG_BUFFER|NPY_ITFLAG_RANGE|NPY_ITFLAG_EXLOOP))
205 == (NPY_ITFLAG_RANGE|NPY_ITFLAG_EXLOOP)) {
206 PyErr_SetString(PyExc_ValueError,
207 "Iterator flag EXTERNAL_LOOP cannot be used "
208 "with ranged iteration unless buffering is also enabled");
209 return NPY_FAIL;
210 }
211 /* Set the flag */
212 if (!(itflags&NPY_ITFLAG_EXLOOP)) {
213 itflags |= NPY_ITFLAG_EXLOOP;
214 NIT_ITFLAGS(iter) = itflags;
215
216 /*
217 * Check whether we can apply the single iteration
218 * optimization to the iternext function.
219 */
220 if (!(itflags&NPY_ITFLAG_BUFFER)) {
221 NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
222 if (NIT_ITERSIZE(iter) == NAD_SHAPE(axisdata)) {
223 NIT_ITFLAGS(iter) |= NPY_ITFLAG_ONEITERATION;
224 }
225 }
226 }
227
228 /* Reset the iterator */
229 return NpyIter_Reset(iter, NULL);
230 }
231
232
233 static char *_reset_cast_error = (
234 "Iterator reset failed due to a casting failure. "
235 "This error is set as a Python error.");
236
237 /*NUMPY_API
238 * Resets the iterator to its initial state
239 *
240 * The use of errmsg is discouraged, it cannot be guaranteed that the GIL
241 * will not be grabbed on casting errors even when this is passed.
242 *
243 * If errmsg is non-NULL, it should point to a variable which will
244 * receive the error message, and no Python exception will be set.
245 * This is so that the function can be called from code not holding
246 * the GIL. Note that cast errors may still lead to the GIL being
247 * grabbed temporarily.
248 */
249 NPY_NO_EXPORT int
NpyIter_Reset(NpyIter * iter,char ** errmsg)250 NpyIter_Reset(NpyIter *iter, char **errmsg)
251 {
252 npy_uint32 itflags = NIT_ITFLAGS(iter);
253 /*int ndim = NIT_NDIM(iter);*/
254 int nop = NIT_NOP(iter);
255
256 if (itflags&NPY_ITFLAG_BUFFER) {
257 NpyIter_BufferData *bufferdata;
258
259 /* If buffer allocation was delayed, do it now */
260 if (itflags&NPY_ITFLAG_DELAYBUF) {
261 if (!npyiter_allocate_buffers(iter, errmsg)) {
262 if (errmsg != NULL) {
263 *errmsg = _reset_cast_error;
264 }
265 return NPY_FAIL;
266 }
267 NIT_ITFLAGS(iter) &= ~NPY_ITFLAG_DELAYBUF;
268 }
269 else {
270 /*
271 * If the iterindex is already right, no need to
272 * do anything (and no cast error has previously occurred).
273 */
274 bufferdata = NIT_BUFFERDATA(iter);
275 if (NIT_ITERINDEX(iter) == NIT_ITERSTART(iter) &&
276 NBF_BUFITEREND(bufferdata) <= NIT_ITEREND(iter) &&
277 NBF_SIZE(bufferdata) > 0) {
278 return NPY_SUCCEED;
279 }
280 if (npyiter_copy_from_buffers(iter) < 0) {
281 if (errmsg != NULL) {
282 *errmsg = _reset_cast_error;
283 }
284 return NPY_FAIL;
285 }
286 }
287 }
288
289 npyiter_goto_iterindex(iter, NIT_ITERSTART(iter));
290
291 if (itflags&NPY_ITFLAG_BUFFER) {
292 /* Prepare the next buffers and set iterend/size */
293 if (npyiter_copy_to_buffers(iter, NULL) < 0) {
294 if (errmsg != NULL) {
295 *errmsg = _reset_cast_error;
296 }
297 return NPY_FAIL;
298 }
299 }
300
301 return NPY_SUCCEED;
302 }
303
304 /*NUMPY_API
305 * Resets the iterator to its initial state, with new base data pointers.
306 * This function requires great caution.
307 *
308 * If errmsg is non-NULL, it should point to a variable which will
309 * receive the error message, and no Python exception will be set.
310 * This is so that the function can be called from code not holding
311 * the GIL. Note that cast errors may still lead to the GIL being
312 * grabbed temporarily.
313 */
314 NPY_NO_EXPORT int
NpyIter_ResetBasePointers(NpyIter * iter,char ** baseptrs,char ** errmsg)315 NpyIter_ResetBasePointers(NpyIter *iter, char **baseptrs, char **errmsg)
316 {
317 npy_uint32 itflags = NIT_ITFLAGS(iter);
318 /*int ndim = NIT_NDIM(iter);*/
319 int iop, nop = NIT_NOP(iter);
320
321 char **resetdataptr = NIT_RESETDATAPTR(iter);
322 npy_intp *baseoffsets = NIT_BASEOFFSETS(iter);
323
324 if (itflags&NPY_ITFLAG_BUFFER) {
325 /* If buffer allocation was delayed, do it now */
326 if (itflags&NPY_ITFLAG_DELAYBUF) {
327 if (!npyiter_allocate_buffers(iter, errmsg)) {
328 return NPY_FAIL;
329 }
330 NIT_ITFLAGS(iter) &= ~NPY_ITFLAG_DELAYBUF;
331 }
332 else {
333 if (npyiter_copy_from_buffers(iter) < 0) {
334 if (errmsg != NULL) {
335 *errmsg = _reset_cast_error;
336 }
337 return NPY_FAIL;
338 }
339 }
340 }
341
342 /* The new data pointers for resetting */
343 for (iop = 0; iop < nop; ++iop) {
344 resetdataptr[iop] = baseptrs[iop] + baseoffsets[iop];
345 }
346
347 npyiter_goto_iterindex(iter, NIT_ITERSTART(iter));
348
349 if (itflags&NPY_ITFLAG_BUFFER) {
350 /* Prepare the next buffers and set iterend/size */
351 if (npyiter_copy_to_buffers(iter, NULL) < 0) {
352 if (errmsg != NULL) {
353 *errmsg = _reset_cast_error;
354 }
355 return NPY_FAIL;
356 }
357 }
358
359 return NPY_SUCCEED;
360 }
361
362 /*NUMPY_API
363 * Resets the iterator to a new iterator index range
364 *
365 * If errmsg is non-NULL, it should point to a variable which will
366 * receive the error message, and no Python exception will be set.
367 * This is so that the function can be called from code not holding
368 * the GIL. Note that cast errors may still lead to the GIL being
369 * grabbed temporarily.
370 */
371 NPY_NO_EXPORT int
NpyIter_ResetToIterIndexRange(NpyIter * iter,npy_intp istart,npy_intp iend,char ** errmsg)372 NpyIter_ResetToIterIndexRange(NpyIter *iter,
373 npy_intp istart, npy_intp iend, char **errmsg)
374 {
375 npy_uint32 itflags = NIT_ITFLAGS(iter);
376 /*int ndim = NIT_NDIM(iter);*/
377 /*int nop = NIT_NOP(iter);*/
378
379 if (!(itflags&NPY_ITFLAG_RANGE)) {
380 if (errmsg == NULL) {
381 PyErr_SetString(PyExc_ValueError,
382 "Cannot call ResetToIterIndexRange on an iterator without "
383 "requesting ranged iteration support in the constructor");
384 }
385 else {
386 *errmsg = "Cannot call ResetToIterIndexRange on an iterator "
387 "without requesting ranged iteration support in the "
388 "constructor";
389 }
390 return NPY_FAIL;
391 }
392
393 if (istart < 0 || iend > NIT_ITERSIZE(iter)) {
394 if (NIT_ITERSIZE(iter) < 0) {
395 if (errmsg == NULL) {
396 PyErr_SetString(PyExc_ValueError, "iterator is too large");
397 }
398 else {
399 *errmsg = "iterator is too large";
400 }
401 return NPY_FAIL;
402 }
403 if (errmsg == NULL) {
404 PyErr_Format(PyExc_ValueError,
405 "Out-of-bounds range [%" NPY_INTP_FMT ", %" NPY_INTP_FMT ") passed to "
406 "ResetToIterIndexRange", istart, iend);
407 }
408 else {
409 *errmsg = "Out-of-bounds range passed to ResetToIterIndexRange";
410 }
411 return NPY_FAIL;
412 }
413 else if (iend < istart) {
414 if (errmsg == NULL) {
415 PyErr_Format(PyExc_ValueError,
416 "Invalid range [%" NPY_INTP_FMT ", %" NPY_INTP_FMT ") passed to ResetToIterIndexRange",
417 istart, iend);
418 }
419 else {
420 *errmsg = "Invalid range passed to ResetToIterIndexRange";
421 }
422 return NPY_FAIL;
423 }
424
425 NIT_ITERSTART(iter) = istart;
426 NIT_ITEREND(iter) = iend;
427
428 return NpyIter_Reset(iter, errmsg);
429 }
430
431 /*NUMPY_API
432 * Sets the iterator to the specified multi-index, which must have the
433 * correct number of entries for 'ndim'. It is only valid
434 * when NPY_ITER_MULTI_INDEX was passed to the constructor. This operation
435 * fails if the multi-index is out of bounds.
436 *
437 * Returns NPY_SUCCEED on success, NPY_FAIL on failure.
438 */
439 NPY_NO_EXPORT int
NpyIter_GotoMultiIndex(NpyIter * iter,npy_intp const * multi_index)440 NpyIter_GotoMultiIndex(NpyIter *iter, npy_intp const *multi_index)
441 {
442 npy_uint32 itflags = NIT_ITFLAGS(iter);
443 int idim, ndim = NIT_NDIM(iter);
444 int nop = NIT_NOP(iter);
445
446 npy_intp iterindex, factor;
447 NpyIter_AxisData *axisdata;
448 npy_intp sizeof_axisdata;
449 npy_int8 *perm;
450
451 if (!(itflags&NPY_ITFLAG_HASMULTIINDEX)) {
452 PyErr_SetString(PyExc_ValueError,
453 "Cannot call GotoMultiIndex on an iterator without "
454 "requesting a multi-index in the constructor");
455 return NPY_FAIL;
456 }
457
458 if (itflags&NPY_ITFLAG_BUFFER) {
459 PyErr_SetString(PyExc_ValueError,
460 "Cannot call GotoMultiIndex on an iterator which "
461 "is buffered");
462 return NPY_FAIL;
463 }
464
465 if (itflags&NPY_ITFLAG_EXLOOP) {
466 PyErr_SetString(PyExc_ValueError,
467 "Cannot call GotoMultiIndex on an iterator which "
468 "has the flag EXTERNAL_LOOP");
469 return NPY_FAIL;
470 }
471
472 perm = NIT_PERM(iter);
473 axisdata = NIT_AXISDATA(iter);
474 sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
475
476 /* Compute the iterindex corresponding to the multi-index */
477 iterindex = 0;
478 factor = 1;
479 for (idim = 0; idim < ndim; ++idim) {
480 npy_int8 p = perm[idim];
481 npy_intp i, shape;
482
483 shape = NAD_SHAPE(axisdata);
484 if (p < 0) {
485 /* If the perm entry is negative, reverse the index */
486 i = shape - multi_index[ndim+p] - 1;
487 }
488 else {
489 i = multi_index[ndim-p-1];
490 }
491
492 /* Bounds-check this index */
493 if (i >= 0 && i < shape) {
494 iterindex += factor * i;
495 factor *= shape;
496 }
497 else {
498 PyErr_SetString(PyExc_IndexError,
499 "Iterator GotoMultiIndex called with an out-of-bounds "
500 "multi-index");
501 return NPY_FAIL;
502 }
503
504 NIT_ADVANCE_AXISDATA(axisdata, 1);
505 }
506
507 if (iterindex < NIT_ITERSTART(iter) || iterindex >= NIT_ITEREND(iter)) {
508 if (NIT_ITERSIZE(iter) < 0) {
509 PyErr_SetString(PyExc_ValueError, "iterator is too large");
510 return NPY_FAIL;
511 }
512 PyErr_SetString(PyExc_IndexError,
513 "Iterator GotoMultiIndex called with a multi-index outside the "
514 "restricted iteration range");
515 return NPY_FAIL;
516 }
517
518 npyiter_goto_iterindex(iter, iterindex);
519
520 return NPY_SUCCEED;
521 }
522
523 /*NUMPY_API
524 * If the iterator is tracking an index, sets the iterator
525 * to the specified index.
526 *
527 * Returns NPY_SUCCEED on success, NPY_FAIL on failure.
528 */
529 NPY_NO_EXPORT int
NpyIter_GotoIndex(NpyIter * iter,npy_intp flat_index)530 NpyIter_GotoIndex(NpyIter *iter, npy_intp flat_index)
531 {
532 npy_uint32 itflags = NIT_ITFLAGS(iter);
533 int idim, ndim = NIT_NDIM(iter);
534 int nop = NIT_NOP(iter);
535
536 npy_intp iterindex, factor;
537 NpyIter_AxisData *axisdata;
538 npy_intp sizeof_axisdata;
539
540 if (!(itflags&NPY_ITFLAG_HASINDEX)) {
541 PyErr_SetString(PyExc_ValueError,
542 "Cannot call GotoIndex on an iterator without "
543 "requesting a C or Fortran index in the constructor");
544 return NPY_FAIL;
545 }
546
547 if (itflags&NPY_ITFLAG_BUFFER) {
548 PyErr_SetString(PyExc_ValueError,
549 "Cannot call GotoIndex on an iterator which "
550 "is buffered");
551 return NPY_FAIL;
552 }
553
554 if (itflags&NPY_ITFLAG_EXLOOP) {
555 PyErr_SetString(PyExc_ValueError,
556 "Cannot call GotoIndex on an iterator which "
557 "has the flag EXTERNAL_LOOP");
558 return NPY_FAIL;
559 }
560
561 if (flat_index < 0 || flat_index >= NIT_ITERSIZE(iter)) {
562 PyErr_SetString(PyExc_IndexError,
563 "Iterator GotoIndex called with an out-of-bounds "
564 "index");
565 return NPY_FAIL;
566 }
567
568 axisdata = NIT_AXISDATA(iter);
569 sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
570
571 /* Compute the iterindex corresponding to the flat_index */
572 iterindex = 0;
573 factor = 1;
574 for (idim = 0; idim < ndim; ++idim) {
575 npy_intp i, shape, iterstride;
576
577 iterstride = NAD_STRIDES(axisdata)[nop];
578 shape = NAD_SHAPE(axisdata);
579
580 /* Extract the index from the flat_index */
581 if (iterstride == 0) {
582 i = 0;
583 }
584 else if (iterstride < 0) {
585 i = shape - (flat_index/(-iterstride))%shape - 1;
586 }
587 else {
588 i = (flat_index/iterstride)%shape;
589 }
590
591 /* Add its contribution to iterindex */
592 iterindex += factor * i;
593 factor *= shape;
594
595 NIT_ADVANCE_AXISDATA(axisdata, 1);
596 }
597
598
599 if (iterindex < NIT_ITERSTART(iter) || iterindex >= NIT_ITEREND(iter)) {
600 PyErr_SetString(PyExc_IndexError,
601 "Iterator GotoIndex called with an index outside the "
602 "restricted iteration range.");
603 return NPY_FAIL;
604 }
605
606 npyiter_goto_iterindex(iter, iterindex);
607
608 return NPY_SUCCEED;
609 }
610
611 /*NUMPY_API
612 * Sets the iterator position to the specified iterindex,
613 * which matches the iteration order of the iterator.
614 *
615 * Returns NPY_SUCCEED on success, NPY_FAIL on failure.
616 */
617 NPY_NO_EXPORT int
NpyIter_GotoIterIndex(NpyIter * iter,npy_intp iterindex)618 NpyIter_GotoIterIndex(NpyIter *iter, npy_intp iterindex)
619 {
620 npy_uint32 itflags = NIT_ITFLAGS(iter);
621 /*int ndim = NIT_NDIM(iter);*/
622 int iop, nop = NIT_NOP(iter);
623
624 if (itflags&NPY_ITFLAG_EXLOOP) {
625 PyErr_SetString(PyExc_ValueError,
626 "Cannot call GotoIterIndex on an iterator which "
627 "has the flag EXTERNAL_LOOP");
628 return NPY_FAIL;
629 }
630
631 if (iterindex < NIT_ITERSTART(iter) || iterindex >= NIT_ITEREND(iter)) {
632 if (NIT_ITERSIZE(iter) < 0) {
633 PyErr_SetString(PyExc_ValueError, "iterator is too large");
634 return NPY_FAIL;
635 }
636 PyErr_SetString(PyExc_IndexError,
637 "Iterator GotoIterIndex called with an iterindex outside the "
638 "iteration range.");
639 return NPY_FAIL;
640 }
641
642 if (itflags&NPY_ITFLAG_BUFFER) {
643 NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter);
644 npy_intp bufiterend, size;
645
646 size = NBF_SIZE(bufferdata);
647 bufiterend = NBF_BUFITEREND(bufferdata);
648 /* Check if the new iterindex is already within the buffer */
649 if (!(itflags&NPY_ITFLAG_REDUCE) && iterindex < bufiterend &&
650 iterindex >= bufiterend - size) {
651 npy_intp *strides, delta;
652 char **ptrs;
653
654 strides = NBF_STRIDES(bufferdata);
655 ptrs = NBF_PTRS(bufferdata);
656 delta = iterindex - NIT_ITERINDEX(iter);
657
658 for (iop = 0; iop < nop; ++iop) {
659 ptrs[iop] += delta * strides[iop];
660 }
661
662 NIT_ITERINDEX(iter) = iterindex;
663 }
664 /* Start the buffer at the provided iterindex */
665 else {
666 /* Write back to the arrays */
667 if (npyiter_copy_from_buffers(iter) < 0) {
668 return NPY_FAIL;
669 }
670
671 npyiter_goto_iterindex(iter, iterindex);
672
673 /* Prepare the next buffers and set iterend/size */
674 if (npyiter_copy_to_buffers(iter, NULL) < 0) {
675 return NPY_FAIL;
676 }
677 }
678 }
679 else {
680 npyiter_goto_iterindex(iter, iterindex);
681 }
682
683 return NPY_SUCCEED;
684 }
685
686 /*NUMPY_API
687 * Gets the current iteration index
688 */
689 NPY_NO_EXPORT npy_intp
NpyIter_GetIterIndex(NpyIter * iter)690 NpyIter_GetIterIndex(NpyIter *iter)
691 {
692 npy_uint32 itflags = NIT_ITFLAGS(iter);
693 int idim, ndim = NIT_NDIM(iter);
694 int nop = NIT_NOP(iter);
695
696 /* iterindex is only used if NPY_ITER_RANGED or NPY_ITER_BUFFERED was set */
697 if (itflags&(NPY_ITFLAG_RANGE|NPY_ITFLAG_BUFFER)) {
698 return NIT_ITERINDEX(iter);
699 }
700 else {
701 npy_intp iterindex;
702 NpyIter_AxisData *axisdata;
703 npy_intp sizeof_axisdata;
704
705 iterindex = 0;
706 if (ndim == 0) {
707 return 0;
708 }
709 sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
710 axisdata = NIT_INDEX_AXISDATA(NIT_AXISDATA(iter), ndim-1);
711
712 for (idim = ndim-2; idim >= 0; --idim) {
713 iterindex += NAD_INDEX(axisdata);
714 NIT_ADVANCE_AXISDATA(axisdata, -1);
715 iterindex *= NAD_SHAPE(axisdata);
716 }
717 iterindex += NAD_INDEX(axisdata);
718
719 return iterindex;
720 }
721 }
722
723 /*NUMPY_API
724 * Whether the buffer allocation is being delayed
725 */
726 NPY_NO_EXPORT npy_bool
NpyIter_HasDelayedBufAlloc(NpyIter * iter)727 NpyIter_HasDelayedBufAlloc(NpyIter *iter)
728 {
729 return (NIT_ITFLAGS(iter)&NPY_ITFLAG_DELAYBUF) != 0;
730 }
731
732 /*NUMPY_API
733 * Whether the iterator handles the inner loop
734 */
735 NPY_NO_EXPORT npy_bool
NpyIter_HasExternalLoop(NpyIter * iter)736 NpyIter_HasExternalLoop(NpyIter *iter)
737 {
738 return (NIT_ITFLAGS(iter)&NPY_ITFLAG_EXLOOP) != 0;
739 }
740
741 /*NUMPY_API
742 * Whether the iterator is tracking a multi-index
743 */
744 NPY_NO_EXPORT npy_bool
NpyIter_HasMultiIndex(NpyIter * iter)745 NpyIter_HasMultiIndex(NpyIter *iter)
746 {
747 return (NIT_ITFLAGS(iter)&NPY_ITFLAG_HASMULTIINDEX) != 0;
748 }
749
750 /*NUMPY_API
751 * Whether the iterator is tracking an index
752 */
753 NPY_NO_EXPORT npy_bool
NpyIter_HasIndex(NpyIter * iter)754 NpyIter_HasIndex(NpyIter *iter)
755 {
756 return (NIT_ITFLAGS(iter)&NPY_ITFLAG_HASINDEX) != 0;
757 }
758
759 /*NUMPY_API
760 * Checks to see whether this is the first time the elements
761 * of the specified reduction operand which the iterator points at are
762 * being seen for the first time. The function returns
763 * a reasonable answer for reduction operands and when buffering is
764 * disabled. The answer may be incorrect for buffered non-reduction
765 * operands.
766 *
767 * This function is intended to be used in EXTERNAL_LOOP mode only,
768 * and will produce some wrong answers when that mode is not enabled.
769 *
770 * If this function returns true, the caller should also
771 * check the inner loop stride of the operand, because if
772 * that stride is 0, then only the first element of the innermost
773 * external loop is being visited for the first time.
774 *
775 * WARNING: For performance reasons, 'iop' is not bounds-checked,
776 * it is not confirmed that 'iop' is actually a reduction
777 * operand, and it is not confirmed that EXTERNAL_LOOP
778 * mode is enabled. These checks are the responsibility of
779 * the caller, and should be done outside of any inner loops.
780 */
781 NPY_NO_EXPORT npy_bool
NpyIter_IsFirstVisit(NpyIter * iter,int iop)782 NpyIter_IsFirstVisit(NpyIter *iter, int iop)
783 {
784 npy_uint32 itflags = NIT_ITFLAGS(iter);
785 int idim, ndim = NIT_NDIM(iter);
786 int nop = NIT_NOP(iter);
787
788 NpyIter_AxisData *axisdata;
789 npy_intp sizeof_axisdata;
790
791 sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
792 axisdata = NIT_AXISDATA(iter);
793
794 for (idim = 0; idim < ndim; ++idim) {
795 npy_intp coord = NAD_INDEX(axisdata);
796 npy_intp stride = NAD_STRIDES(axisdata)[iop];
797
798 /*
799 * If this is a reduction dimension and the coordinate
800 * is not at the start, it's definitely not the first visit
801 */
802 if (stride == 0 && coord != 0) {
803 return 0;
804 }
805
806 NIT_ADVANCE_AXISDATA(axisdata, 1);
807 }
808
809 /*
810 * In reduction buffering mode, there's a double loop being
811 * tracked in the buffer part of the iterator data structure.
812 * We only need to check the outer level of this two-level loop,
813 * because of the requirement that EXTERNAL_LOOP be enabled.
814 */
815 if (itflags&NPY_ITFLAG_BUFFER) {
816 NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter);
817 /* The outer reduce loop */
818 if (NBF_REDUCE_POS(bufferdata) != 0 &&
819 NBF_REDUCE_OUTERSTRIDES(bufferdata)[iop] == 0) {
820 return 0;
821 }
822 }
823
824 return 1;
825 }
826
827 /*NUMPY_API
828 * Whether the iteration could be done with no buffering.
829 */
830 NPY_NO_EXPORT npy_bool
NpyIter_RequiresBuffering(NpyIter * iter)831 NpyIter_RequiresBuffering(NpyIter *iter)
832 {
833 npy_uint32 itflags = NIT_ITFLAGS(iter);
834 /*int ndim = NIT_NDIM(iter);*/
835 int iop, nop = NIT_NOP(iter);
836
837 npyiter_opitflags *op_itflags;
838
839 if (!(itflags&NPY_ITFLAG_BUFFER)) {
840 return 0;
841 }
842
843 op_itflags = NIT_OPITFLAGS(iter);
844
845 /* If any operand requires a cast, buffering is mandatory */
846 for (iop = 0; iop < nop; ++iop) {
847 if (op_itflags[iop]&NPY_OP_ITFLAG_CAST) {
848 return 1;
849 }
850 }
851
852 return 0;
853 }
854
855 /*NUMPY_API
856 * Whether the iteration loop, and in particular the iternext()
857 * function, needs API access. If this is true, the GIL must
858 * be retained while iterating.
859 */
860 NPY_NO_EXPORT npy_bool
NpyIter_IterationNeedsAPI(NpyIter * iter)861 NpyIter_IterationNeedsAPI(NpyIter *iter)
862 {
863 return (NIT_ITFLAGS(iter)&NPY_ITFLAG_NEEDSAPI) != 0;
864 }
865
866 /*NUMPY_API
867 * Gets the number of dimensions being iterated
868 */
869 NPY_NO_EXPORT int
NpyIter_GetNDim(NpyIter * iter)870 NpyIter_GetNDim(NpyIter *iter)
871 {
872 return NIT_NDIM(iter);
873 }
874
875 /*NUMPY_API
876 * Gets the number of operands being iterated
877 */
878 NPY_NO_EXPORT int
NpyIter_GetNOp(NpyIter * iter)879 NpyIter_GetNOp(NpyIter *iter)
880 {
881 return NIT_NOP(iter);
882 }
883
884 /*NUMPY_API
885 * Gets the number of elements being iterated
886 */
887 NPY_NO_EXPORT npy_intp
NpyIter_GetIterSize(NpyIter * iter)888 NpyIter_GetIterSize(NpyIter *iter)
889 {
890 return NIT_ITERSIZE(iter);
891 }
892
893 /*NUMPY_API
894 * Whether the iterator is buffered
895 */
896 NPY_NO_EXPORT npy_bool
NpyIter_IsBuffered(NpyIter * iter)897 NpyIter_IsBuffered(NpyIter *iter)
898 {
899 return (NIT_ITFLAGS(iter)&NPY_ITFLAG_BUFFER) != 0;
900 }
901
902 /*NUMPY_API
903 * Whether the inner loop can grow if buffering is unneeded
904 */
905 NPY_NO_EXPORT npy_bool
NpyIter_IsGrowInner(NpyIter * iter)906 NpyIter_IsGrowInner(NpyIter *iter)
907 {
908 return (NIT_ITFLAGS(iter)&NPY_ITFLAG_GROWINNER) != 0;
909 }
910
911 /*NUMPY_API
912 * Gets the size of the buffer, or 0 if buffering is not enabled
913 */
914 NPY_NO_EXPORT npy_intp
NpyIter_GetBufferSize(NpyIter * iter)915 NpyIter_GetBufferSize(NpyIter *iter)
916 {
917 npy_uint32 itflags = NIT_ITFLAGS(iter);
918 /*int ndim = NIT_NDIM(iter);*/
919 int nop = NIT_NOP(iter);
920
921 if (itflags&NPY_ITFLAG_BUFFER) {
922 NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter);
923 return NBF_BUFFERSIZE(bufferdata);
924 }
925 else {
926 return 0;
927 }
928
929 }
930
931 /*NUMPY_API
932 * Gets the range of iteration indices being iterated
933 */
934 NPY_NO_EXPORT void
NpyIter_GetIterIndexRange(NpyIter * iter,npy_intp * istart,npy_intp * iend)935 NpyIter_GetIterIndexRange(NpyIter *iter,
936 npy_intp *istart, npy_intp *iend)
937 {
938 *istart = NIT_ITERSTART(iter);
939 *iend = NIT_ITEREND(iter);
940 }
941
942 /*NUMPY_API
943 * Gets the broadcast shape if a multi-index is being tracked by the iterator,
944 * otherwise gets the shape of the iteration as Fortran-order
945 * (fastest-changing index first).
946 *
947 * The reason Fortran-order is returned when a multi-index
948 * is not enabled is that this is providing a direct view into how
949 * the iterator traverses the n-dimensional space. The iterator organizes
950 * its memory from fastest index to slowest index, and when
951 * a multi-index is enabled, it uses a permutation to recover the original
952 * order.
953 *
954 * Returns NPY_SUCCEED or NPY_FAIL.
955 */
956 NPY_NO_EXPORT int
NpyIter_GetShape(NpyIter * iter,npy_intp * outshape)957 NpyIter_GetShape(NpyIter *iter, npy_intp *outshape)
958 {
959 npy_uint32 itflags = NIT_ITFLAGS(iter);
960 int ndim = NIT_NDIM(iter);
961 int nop = NIT_NOP(iter);
962
963 int idim, sizeof_axisdata;
964 NpyIter_AxisData *axisdata;
965 npy_int8 *perm;
966
967 axisdata = NIT_AXISDATA(iter);
968 sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
969
970 if (itflags&NPY_ITFLAG_HASMULTIINDEX) {
971 perm = NIT_PERM(iter);
972 for(idim = 0; idim < ndim; ++idim) {
973 int axis = npyiter_undo_iter_axis_perm(idim, ndim, perm, NULL);
974 outshape[axis] = NAD_SHAPE(axisdata);
975
976 NIT_ADVANCE_AXISDATA(axisdata, 1);
977 }
978 }
979 else {
980 for(idim = 0; idim < ndim; ++idim) {
981 outshape[idim] = NAD_SHAPE(axisdata);
982 NIT_ADVANCE_AXISDATA(axisdata, 1);
983 }
984 }
985
986 return NPY_SUCCEED;
987 }
988
989 /*NUMPY_API
990 * Builds a set of strides which are the same as the strides of an
991 * output array created using the NPY_ITER_ALLOCATE flag, where NULL
992 * was passed for op_axes. This is for data packed contiguously,
993 * but not necessarily in C or Fortran order. This should be used
994 * together with NpyIter_GetShape and NpyIter_GetNDim.
995 *
996 * A use case for this function is to match the shape and layout of
997 * the iterator and tack on one or more dimensions. For example,
998 * in order to generate a vector per input value for a numerical gradient,
999 * you pass in ndim*itemsize for itemsize, then add another dimension to
1000 * the end with size ndim and stride itemsize. To do the Hessian matrix,
1001 * you do the same thing but add two dimensions, or take advantage of
1002 * the symmetry and pack it into 1 dimension with a particular encoding.
1003 *
1004 * This function may only be called if the iterator is tracking a multi-index
1005 * and if NPY_ITER_DONT_NEGATE_STRIDES was used to prevent an axis from
1006 * being iterated in reverse order.
1007 *
1008 * If an array is created with this method, simply adding 'itemsize'
1009 * for each iteration will traverse the new array matching the
1010 * iterator.
1011 *
1012 * Returns NPY_SUCCEED or NPY_FAIL.
1013 */
1014 NPY_NO_EXPORT int
NpyIter_CreateCompatibleStrides(NpyIter * iter,npy_intp itemsize,npy_intp * outstrides)1015 NpyIter_CreateCompatibleStrides(NpyIter *iter,
1016 npy_intp itemsize, npy_intp *outstrides)
1017 {
1018 npy_uint32 itflags = NIT_ITFLAGS(iter);
1019 int idim, ndim = NIT_NDIM(iter);
1020 int nop = NIT_NOP(iter);
1021
1022 npy_intp sizeof_axisdata;
1023 NpyIter_AxisData *axisdata;
1024 npy_int8 *perm;
1025
1026 if (!(itflags&NPY_ITFLAG_HASMULTIINDEX)) {
1027 PyErr_SetString(PyExc_RuntimeError,
1028 "Iterator CreateCompatibleStrides may only be called "
1029 "if a multi-index is being tracked");
1030 return NPY_FAIL;
1031 }
1032
1033 axisdata = NIT_AXISDATA(iter);
1034 sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
1035
1036 perm = NIT_PERM(iter);
1037 for(idim = 0; idim < ndim; ++idim) {
1038 npy_bool flipped;
1039 npy_int8 axis = npyiter_undo_iter_axis_perm(idim, ndim, perm, &flipped);
1040 if (flipped) {
1041 PyErr_SetString(PyExc_RuntimeError,
1042 "Iterator CreateCompatibleStrides may only be called "
1043 "if DONT_NEGATE_STRIDES was used to prevent reverse "
1044 "iteration of an axis");
1045 return NPY_FAIL;
1046 }
1047 else {
1048 outstrides[axis] = itemsize;
1049 }
1050
1051 itemsize *= NAD_SHAPE(axisdata);
1052 NIT_ADVANCE_AXISDATA(axisdata, 1);
1053 }
1054
1055 return NPY_SUCCEED;
1056 }
1057
1058 /*NUMPY_API
1059 * Get the array of data pointers (1 per object being iterated)
1060 *
1061 * This function may be safely called without holding the Python GIL.
1062 */
1063 NPY_NO_EXPORT char **
NpyIter_GetDataPtrArray(NpyIter * iter)1064 NpyIter_GetDataPtrArray(NpyIter *iter)
1065 {
1066 npy_uint32 itflags = NIT_ITFLAGS(iter);
1067 /*int ndim = NIT_NDIM(iter);*/
1068 int nop = NIT_NOP(iter);
1069
1070 if (itflags&NPY_ITFLAG_BUFFER) {
1071 NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter);
1072 return NBF_PTRS(bufferdata);
1073 }
1074 else {
1075 NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
1076 return NAD_PTRS(axisdata);
1077 }
1078 }
1079
1080 /*NUMPY_API
1081 * Get the array of data pointers (1 per object being iterated),
1082 * directly into the arrays (never pointing to a buffer), for starting
1083 * unbuffered iteration. This always returns the addresses for the
1084 * iterator position as reset to iterator index 0.
1085 *
1086 * These pointers are different from the pointers accepted by
1087 * NpyIter_ResetBasePointers, because the direction along some
1088 * axes may have been reversed, requiring base offsets.
1089 *
1090 * This function may be safely called without holding the Python GIL.
1091 */
1092 NPY_NO_EXPORT char **
NpyIter_GetInitialDataPtrArray(NpyIter * iter)1093 NpyIter_GetInitialDataPtrArray(NpyIter *iter)
1094 {
1095 /*npy_uint32 itflags = NIT_ITFLAGS(iter);*/
1096 /*int ndim = NIT_NDIM(iter);*/
1097 int nop = NIT_NOP(iter);
1098
1099 return NIT_RESETDATAPTR(iter);
1100 }
1101
1102 /*NUMPY_API
1103 * Get the array of data type pointers (1 per object being iterated)
1104 */
1105 NPY_NO_EXPORT PyArray_Descr **
NpyIter_GetDescrArray(NpyIter * iter)1106 NpyIter_GetDescrArray(NpyIter *iter)
1107 {
1108 /*npy_uint32 itflags = NIT_ITFLAGS(iter);*/
1109 /*int ndim = NIT_NDIM(iter);*/
1110 /*int nop = NIT_NOP(iter);*/
1111
1112 return NIT_DTYPES(iter);
1113 }
1114
1115 /*NUMPY_API
1116 * Get the array of objects being iterated
1117 */
1118 NPY_NO_EXPORT PyArrayObject **
NpyIter_GetOperandArray(NpyIter * iter)1119 NpyIter_GetOperandArray(NpyIter *iter)
1120 {
1121 /*npy_uint32 itflags = NIT_ITFLAGS(iter);*/
1122 /*int ndim = NIT_NDIM(iter);*/
1123 int nop = NIT_NOP(iter);
1124
1125 return NIT_OPERANDS(iter);
1126 }
1127
1128 /*NUMPY_API
1129 * Returns a view to the i-th object with the iterator's internal axes
1130 */
1131 NPY_NO_EXPORT PyArrayObject *
NpyIter_GetIterView(NpyIter * iter,npy_intp i)1132 NpyIter_GetIterView(NpyIter *iter, npy_intp i)
1133 {
1134 npy_uint32 itflags = NIT_ITFLAGS(iter);
1135 int idim, ndim = NIT_NDIM(iter);
1136 int nop = NIT_NOP(iter);
1137
1138 npy_intp shape[NPY_MAXDIMS], strides[NPY_MAXDIMS];
1139 PyArrayObject *obj, *view;
1140 PyArray_Descr *dtype;
1141 char *dataptr;
1142 NpyIter_AxisData *axisdata;
1143 npy_intp sizeof_axisdata;
1144 int writeable;
1145
1146 if (i < 0) {
1147 PyErr_SetString(PyExc_IndexError,
1148 "index provided for an iterator view was out of bounds");
1149 return NULL;
1150 }
1151
1152 /* Don't provide views if buffering is enabled */
1153 if (itflags&NPY_ITFLAG_BUFFER) {
1154 PyErr_SetString(PyExc_ValueError,
1155 "cannot provide an iterator view when buffering is enabled");
1156 return NULL;
1157 }
1158
1159 obj = NIT_OPERANDS(iter)[i];
1160 dtype = PyArray_DESCR(obj);
1161 writeable = NIT_OPITFLAGS(iter)[i]&NPY_OP_ITFLAG_WRITE;
1162 dataptr = NIT_RESETDATAPTR(iter)[i];
1163 axisdata = NIT_AXISDATA(iter);
1164 sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
1165
1166 /* Retrieve the shape and strides from the axisdata */
1167 for (idim = 0; idim < ndim; ++idim) {
1168 shape[ndim-idim-1] = NAD_SHAPE(axisdata);
1169 strides[ndim-idim-1] = NAD_STRIDES(axisdata)[i];
1170
1171 NIT_ADVANCE_AXISDATA(axisdata, 1);
1172 }
1173
1174 Py_INCREF(dtype);
1175 view = (PyArrayObject *)PyArray_NewFromDescrAndBase(
1176 &PyArray_Type, dtype,
1177 ndim, shape, strides, dataptr,
1178 writeable ? NPY_ARRAY_WRITEABLE : 0, NULL, (PyObject *)obj);
1179
1180 return view;
1181 }
1182
1183 /*NUMPY_API
1184 * Get a pointer to the index, if it is being tracked
1185 */
1186 NPY_NO_EXPORT npy_intp *
NpyIter_GetIndexPtr(NpyIter * iter)1187 NpyIter_GetIndexPtr(NpyIter *iter)
1188 {
1189 npy_uint32 itflags = NIT_ITFLAGS(iter);
1190 /*int ndim = NIT_NDIM(iter);*/
1191 int nop = NIT_NOP(iter);
1192
1193 NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
1194
1195 if (itflags&NPY_ITFLAG_HASINDEX) {
1196 /* The index is just after the data pointers */
1197 return (npy_intp*)NAD_PTRS(axisdata) + nop;
1198 }
1199 else {
1200 return NULL;
1201 }
1202 }
1203
1204 /*NUMPY_API
1205 * Gets an array of read flags (1 per object being iterated)
1206 */
1207 NPY_NO_EXPORT void
NpyIter_GetReadFlags(NpyIter * iter,char * outreadflags)1208 NpyIter_GetReadFlags(NpyIter *iter, char *outreadflags)
1209 {
1210 /*npy_uint32 itflags = NIT_ITFLAGS(iter);*/
1211 /*int ndim = NIT_NDIM(iter);*/
1212 int iop, nop = NIT_NOP(iter);
1213
1214 npyiter_opitflags *op_itflags = NIT_OPITFLAGS(iter);
1215
1216 for (iop = 0; iop < nop; ++iop) {
1217 outreadflags[iop] = (op_itflags[iop]&NPY_OP_ITFLAG_READ) != 0;
1218 }
1219 }
1220
1221 /*NUMPY_API
1222 * Gets an array of write flags (1 per object being iterated)
1223 */
1224 NPY_NO_EXPORT void
NpyIter_GetWriteFlags(NpyIter * iter,char * outwriteflags)1225 NpyIter_GetWriteFlags(NpyIter *iter, char *outwriteflags)
1226 {
1227 /*npy_uint32 itflags = NIT_ITFLAGS(iter);*/
1228 /*int ndim = NIT_NDIM(iter);*/
1229 int iop, nop = NIT_NOP(iter);
1230
1231 npyiter_opitflags *op_itflags = NIT_OPITFLAGS(iter);
1232
1233 for (iop = 0; iop < nop; ++iop) {
1234 outwriteflags[iop] = (op_itflags[iop]&NPY_OP_ITFLAG_WRITE) != 0;
1235 }
1236 }
1237
1238
1239 /*NUMPY_API
1240 * Get the array of strides for the inner loop (when HasExternalLoop is true)
1241 *
1242 * This function may be safely called without holding the Python GIL.
1243 */
1244 NPY_NO_EXPORT npy_intp *
NpyIter_GetInnerStrideArray(NpyIter * iter)1245 NpyIter_GetInnerStrideArray(NpyIter *iter)
1246 {
1247 npy_uint32 itflags = NIT_ITFLAGS(iter);
1248 /*int ndim = NIT_NDIM(iter);*/
1249 int nop = NIT_NOP(iter);
1250
1251 if (itflags&NPY_ITFLAG_BUFFER) {
1252 NpyIter_BufferData *data = NIT_BUFFERDATA(iter);
1253 return NBF_STRIDES(data);
1254 }
1255 else {
1256 NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
1257 return NAD_STRIDES(axisdata);
1258 }
1259 }
1260
1261 /*NUMPY_API
1262 * Gets the array of strides for the specified axis.
1263 * If the iterator is tracking a multi-index, gets the strides
1264 * for the axis specified, otherwise gets the strides for
1265 * the iteration axis as Fortran order (fastest-changing axis first).
1266 *
1267 * Returns NULL if an error occurs.
1268 */
1269 NPY_NO_EXPORT npy_intp *
NpyIter_GetAxisStrideArray(NpyIter * iter,int axis)1270 NpyIter_GetAxisStrideArray(NpyIter *iter, int axis)
1271 {
1272 npy_uint32 itflags = NIT_ITFLAGS(iter);
1273 int idim, ndim = NIT_NDIM(iter);
1274 int nop = NIT_NOP(iter);
1275
1276 npy_int8 *perm = NIT_PERM(iter);
1277 NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
1278 npy_intp sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
1279
1280 if (axis < 0 || axis >= ndim) {
1281 PyErr_SetString(PyExc_ValueError,
1282 "axis out of bounds in iterator GetStrideAxisArray");
1283 return NULL;
1284 }
1285
1286 if (itflags&NPY_ITFLAG_HASMULTIINDEX) {
1287 /* Reverse axis, since the iterator treats them that way */
1288 axis = ndim-1-axis;
1289
1290 /* First find the axis in question */
1291 for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
1292 if (perm[idim] == axis || -1 - perm[idim] == axis) {
1293 return NAD_STRIDES(axisdata);
1294 }
1295 }
1296 }
1297 else {
1298 return NAD_STRIDES(NIT_INDEX_AXISDATA(axisdata, axis));
1299 }
1300
1301 PyErr_SetString(PyExc_RuntimeError,
1302 "internal error in iterator perm");
1303 return NULL;
1304 }
1305
1306 /*NUMPY_API
1307 * Get an array of strides which are fixed. Any strides which may
1308 * change during iteration receive the value NPY_MAX_INTP. Once
1309 * the iterator is ready to iterate, call this to get the strides
1310 * which will always be fixed in the inner loop, then choose optimized
1311 * inner loop functions which take advantage of those fixed strides.
1312 *
1313 * This function may be safely called without holding the Python GIL.
1314 */
1315 NPY_NO_EXPORT void
NpyIter_GetInnerFixedStrideArray(NpyIter * iter,npy_intp * out_strides)1316 NpyIter_GetInnerFixedStrideArray(NpyIter *iter, npy_intp *out_strides)
1317 {
1318 npy_uint32 itflags = NIT_ITFLAGS(iter);
1319 int ndim = NIT_NDIM(iter);
1320 int iop, nop = NIT_NOP(iter);
1321
1322 NpyIter_AxisData *axisdata0 = NIT_AXISDATA(iter);
1323 npy_intp sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
1324
1325 if (itflags&NPY_ITFLAG_BUFFER) {
1326 NpyIter_BufferData *data = NIT_BUFFERDATA(iter);
1327 npyiter_opitflags *op_itflags = NIT_OPITFLAGS(iter);
1328 npy_intp stride, *strides = NBF_STRIDES(data),
1329 *ad_strides = NAD_STRIDES(axisdata0);
1330 PyArray_Descr **dtypes = NIT_DTYPES(iter);
1331
1332 for (iop = 0; iop < nop; ++iop) {
1333 stride = strides[iop];
1334 /*
1335 * Operands which are always/never buffered have fixed strides,
1336 * and everything has fixed strides when ndim is 0 or 1
1337 */
1338 if (ndim <= 1 || (op_itflags[iop]&
1339 (NPY_OP_ITFLAG_CAST|NPY_OP_ITFLAG_BUFNEVER))) {
1340 out_strides[iop] = stride;
1341 }
1342 /* If it's a reduction, 0-stride inner loop may have fixed stride */
1343 else if (stride == 0 && (itflags&NPY_ITFLAG_REDUCE)) {
1344 /* If it's a reduction operand, definitely fixed stride */
1345 if (op_itflags[iop]&NPY_OP_ITFLAG_REDUCE) {
1346 out_strides[iop] = stride;
1347 }
1348 /*
1349 * Otherwise it's guaranteed to be a fixed stride if the
1350 * stride is 0 for all the dimensions.
1351 */
1352 else {
1353 NpyIter_AxisData *axisdata = axisdata0;
1354 int idim;
1355 for (idim = 0; idim < ndim; ++idim) {
1356 if (NAD_STRIDES(axisdata)[iop] != 0) {
1357 break;
1358 }
1359 NIT_ADVANCE_AXISDATA(axisdata, 1);
1360 }
1361 /* If all the strides were 0, the stride won't change */
1362 if (idim == ndim) {
1363 out_strides[iop] = stride;
1364 }
1365 else {
1366 out_strides[iop] = NPY_MAX_INTP;
1367 }
1368 }
1369 }
1370 /*
1371 * Inner loop contiguous array means its stride won't change when
1372 * switching between buffering and not buffering
1373 */
1374 else if (ad_strides[iop] == dtypes[iop]->elsize) {
1375 out_strides[iop] = ad_strides[iop];
1376 }
1377 /*
1378 * Otherwise the strides can change if the operand is sometimes
1379 * buffered, sometimes not.
1380 */
1381 else {
1382 out_strides[iop] = NPY_MAX_INTP;
1383 }
1384 }
1385 }
1386 else {
1387 /* If there's no buffering, the strides are always fixed */
1388 memcpy(out_strides, NAD_STRIDES(axisdata0), nop*NPY_SIZEOF_INTP);
1389 }
1390 }
1391
1392 /*NUMPY_API
1393 * Get a pointer to the size of the inner loop (when HasExternalLoop is true)
1394 *
1395 * This function may be safely called without holding the Python GIL.
1396 */
1397 NPY_NO_EXPORT npy_intp *
NpyIter_GetInnerLoopSizePtr(NpyIter * iter)1398 NpyIter_GetInnerLoopSizePtr(NpyIter *iter)
1399 {
1400 npy_uint32 itflags = NIT_ITFLAGS(iter);
1401 /*int ndim = NIT_NDIM(iter);*/
1402 int nop = NIT_NOP(iter);
1403
1404 if (itflags&NPY_ITFLAG_BUFFER) {
1405 NpyIter_BufferData *data = NIT_BUFFERDATA(iter);
1406 return &NBF_SIZE(data);
1407 }
1408 else {
1409 NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
1410 return &NAD_SHAPE(axisdata);
1411 }
1412 }
1413
1414
1415 /*NUMPY_API
1416 * For debugging
1417 */
1418 NPY_NO_EXPORT void
NpyIter_DebugPrint(NpyIter * iter)1419 NpyIter_DebugPrint(NpyIter *iter)
1420 {
1421 npy_uint32 itflags = NIT_ITFLAGS(iter);
1422 int idim, ndim = NIT_NDIM(iter);
1423 int iop, nop = NIT_NOP(iter);
1424
1425 NpyIter_AxisData *axisdata;
1426 npy_intp sizeof_axisdata;
1427
1428 NPY_ALLOW_C_API_DEF
1429 NPY_ALLOW_C_API
1430
1431 printf("\n------ BEGIN ITERATOR DUMP ------\n");
1432 printf("| Iterator Address: %p\n", (void *)iter);
1433 printf("| ItFlags: ");
1434 if (itflags&NPY_ITFLAG_IDENTPERM)
1435 printf("IDENTPERM ");
1436 if (itflags&NPY_ITFLAG_NEGPERM)
1437 printf("NEGPERM ");
1438 if (itflags&NPY_ITFLAG_HASINDEX)
1439 printf("HASINDEX ");
1440 if (itflags&NPY_ITFLAG_HASMULTIINDEX)
1441 printf("HASMULTIINDEX ");
1442 if (itflags&NPY_ITFLAG_FORCEDORDER)
1443 printf("FORCEDORDER ");
1444 if (itflags&NPY_ITFLAG_EXLOOP)
1445 printf("EXLOOP ");
1446 if (itflags&NPY_ITFLAG_RANGE)
1447 printf("RANGE ");
1448 if (itflags&NPY_ITFLAG_BUFFER)
1449 printf("BUFFER ");
1450 if (itflags&NPY_ITFLAG_GROWINNER)
1451 printf("GROWINNER ");
1452 if (itflags&NPY_ITFLAG_ONEITERATION)
1453 printf("ONEITERATION ");
1454 if (itflags&NPY_ITFLAG_DELAYBUF)
1455 printf("DELAYBUF ");
1456 if (itflags&NPY_ITFLAG_NEEDSAPI)
1457 printf("NEEDSAPI ");
1458 if (itflags&NPY_ITFLAG_REDUCE)
1459 printf("REDUCE ");
1460 if (itflags&NPY_ITFLAG_REUSE_REDUCE_LOOPS)
1461 printf("REUSE_REDUCE_LOOPS ");
1462
1463 printf("\n");
1464 printf("| NDim: %d\n", ndim);
1465 printf("| NOp: %d\n", nop);
1466 if (NIT_MASKOP(iter) >= 0) {
1467 printf("| MaskOp: %d\n", (int)NIT_MASKOP(iter));
1468 }
1469 printf("| IterSize: %d\n", (int)NIT_ITERSIZE(iter));
1470 printf("| IterStart: %d\n", (int)NIT_ITERSTART(iter));
1471 printf("| IterEnd: %d\n", (int)NIT_ITEREND(iter));
1472 printf("| IterIndex: %d\n", (int)NIT_ITERINDEX(iter));
1473 printf("| Iterator SizeOf: %d\n",
1474 (int)NIT_SIZEOF_ITERATOR(itflags, ndim, nop));
1475 printf("| BufferData SizeOf: %d\n",
1476 (int)NIT_BUFFERDATA_SIZEOF(itflags, ndim, nop));
1477 printf("| AxisData SizeOf: %d\n",
1478 (int)NIT_AXISDATA_SIZEOF(itflags, ndim, nop));
1479 printf("|\n");
1480
1481 printf("| Perm: ");
1482 for (idim = 0; idim < ndim; ++idim) {
1483 printf("%d ", (int)NIT_PERM(iter)[idim]);
1484 }
1485 printf("\n");
1486 printf("| DTypes: ");
1487 for (iop = 0; iop < nop; ++iop) {
1488 printf("%p ", (void *)NIT_DTYPES(iter)[iop]);
1489 }
1490 printf("\n");
1491 printf("| DTypes: ");
1492 for (iop = 0; iop < nop; ++iop) {
1493 if (NIT_DTYPES(iter)[iop] != NULL)
1494 PyObject_Print((PyObject*)NIT_DTYPES(iter)[iop], stdout, 0);
1495 else
1496 printf("(nil) ");
1497 printf(" ");
1498 }
1499 printf("\n");
1500 printf("| InitDataPtrs: ");
1501 for (iop = 0; iop < nop; ++iop) {
1502 printf("%p ", (void *)NIT_RESETDATAPTR(iter)[iop]);
1503 }
1504 printf("\n");
1505 printf("| BaseOffsets: ");
1506 for (iop = 0; iop < nop; ++iop) {
1507 printf("%i ", (int)NIT_BASEOFFSETS(iter)[iop]);
1508 }
1509 printf("\n");
1510 if (itflags&NPY_ITFLAG_HASINDEX) {
1511 printf("| InitIndex: %d\n",
1512 (int)(npy_intp)NIT_RESETDATAPTR(iter)[nop]);
1513 }
1514 printf("| Operands: ");
1515 for (iop = 0; iop < nop; ++iop) {
1516 printf("%p ", (void *)NIT_OPERANDS(iter)[iop]);
1517 }
1518 printf("\n");
1519 printf("| Operand DTypes: ");
1520 for (iop = 0; iop < nop; ++iop) {
1521 PyArray_Descr *dtype;
1522 if (NIT_OPERANDS(iter)[iop] != NULL) {
1523 dtype = PyArray_DESCR(NIT_OPERANDS(iter)[iop]);
1524 if (dtype != NULL)
1525 PyObject_Print((PyObject *)dtype, stdout, 0);
1526 else
1527 printf("(nil) ");
1528 }
1529 else {
1530 printf("(op nil) ");
1531 }
1532 printf(" ");
1533 }
1534 printf("\n");
1535 printf("| OpItFlags:\n");
1536 for (iop = 0; iop < nop; ++iop) {
1537 printf("| Flags[%d]: ", (int)iop);
1538 if ((NIT_OPITFLAGS(iter)[iop])&NPY_OP_ITFLAG_READ)
1539 printf("READ ");
1540 if ((NIT_OPITFLAGS(iter)[iop])&NPY_OP_ITFLAG_WRITE)
1541 printf("WRITE ");
1542 if ((NIT_OPITFLAGS(iter)[iop])&NPY_OP_ITFLAG_CAST)
1543 printf("CAST ");
1544 if ((NIT_OPITFLAGS(iter)[iop])&NPY_OP_ITFLAG_BUFNEVER)
1545 printf("BUFNEVER ");
1546 if ((NIT_OPITFLAGS(iter)[iop])&NPY_OP_ITFLAG_ALIGNED)
1547 printf("ALIGNED ");
1548 if ((NIT_OPITFLAGS(iter)[iop])&NPY_OP_ITFLAG_REDUCE)
1549 printf("REDUCE ");
1550 if ((NIT_OPITFLAGS(iter)[iop])&NPY_OP_ITFLAG_VIRTUAL)
1551 printf("VIRTUAL ");
1552 if ((NIT_OPITFLAGS(iter)[iop])&NPY_OP_ITFLAG_WRITEMASKED)
1553 printf("WRITEMASKED ");
1554 printf("\n");
1555 }
1556 printf("|\n");
1557
1558 if (itflags&NPY_ITFLAG_BUFFER) {
1559 NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter);
1560 printf("| BufferData:\n");
1561 printf("| BufferSize: %d\n", (int)NBF_BUFFERSIZE(bufferdata));
1562 printf("| Size: %d\n", (int)NBF_SIZE(bufferdata));
1563 printf("| BufIterEnd: %d\n", (int)NBF_BUFITEREND(bufferdata));
1564 if (itflags&NPY_ITFLAG_REDUCE) {
1565 printf("| REDUCE Pos: %d\n",
1566 (int)NBF_REDUCE_POS(bufferdata));
1567 printf("| REDUCE OuterSize: %d\n",
1568 (int)NBF_REDUCE_OUTERSIZE(bufferdata));
1569 printf("| REDUCE OuterDim: %d\n",
1570 (int)NBF_REDUCE_OUTERDIM(bufferdata));
1571 }
1572 printf("| Strides: ");
1573 for (iop = 0; iop < nop; ++iop)
1574 printf("%d ", (int)NBF_STRIDES(bufferdata)[iop]);
1575 printf("\n");
1576 /* Print the fixed strides when there's no inner loop */
1577 if (itflags&NPY_ITFLAG_EXLOOP) {
1578 npy_intp fixedstrides[NPY_MAXDIMS];
1579 printf("| Fixed Strides: ");
1580 NpyIter_GetInnerFixedStrideArray(iter, fixedstrides);
1581 for (iop = 0; iop < nop; ++iop)
1582 printf("%d ", (int)fixedstrides[iop]);
1583 printf("\n");
1584 }
1585 printf("| Ptrs: ");
1586 for (iop = 0; iop < nop; ++iop)
1587 printf("%p ", (void *)NBF_PTRS(bufferdata)[iop]);
1588 printf("\n");
1589 if (itflags&NPY_ITFLAG_REDUCE) {
1590 printf("| REDUCE Outer Strides: ");
1591 for (iop = 0; iop < nop; ++iop)
1592 printf("%d ", (int)NBF_REDUCE_OUTERSTRIDES(bufferdata)[iop]);
1593 printf("\n");
1594 printf("| REDUCE Outer Ptrs: ");
1595 for (iop = 0; iop < nop; ++iop)
1596 printf("%p ", (void *)NBF_REDUCE_OUTERPTRS(bufferdata)[iop]);
1597 printf("\n");
1598 }
1599 printf("| ReadTransferFn: ");
1600 for (iop = 0; iop < nop; ++iop)
1601 printf("%p ", (void *)NBF_READTRANSFERFN(bufferdata)[iop]);
1602 printf("\n");
1603 printf("| ReadTransferData: ");
1604 for (iop = 0; iop < nop; ++iop)
1605 printf("%p ", (void *)NBF_READTRANSFERDATA(bufferdata)[iop]);
1606 printf("\n");
1607 printf("| WriteTransferFn: ");
1608 for (iop = 0; iop < nop; ++iop)
1609 printf("%p ", (void *)NBF_WRITETRANSFERFN(bufferdata)[iop]);
1610 printf("\n");
1611 printf("| WriteTransferData: ");
1612 for (iop = 0; iop < nop; ++iop)
1613 printf("%p ", (void *)NBF_WRITETRANSFERDATA(bufferdata)[iop]);
1614 printf("\n");
1615 printf("| Buffers: ");
1616 for (iop = 0; iop < nop; ++iop)
1617 printf("%p ", (void *)NBF_BUFFERS(bufferdata)[iop]);
1618 printf("\n");
1619 printf("|\n");
1620 }
1621
1622 axisdata = NIT_AXISDATA(iter);
1623 sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
1624 for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
1625 printf("| AxisData[%d]:\n", (int)idim);
1626 printf("| Shape: %d\n", (int)NAD_SHAPE(axisdata));
1627 printf("| Index: %d\n", (int)NAD_INDEX(axisdata));
1628 printf("| Strides: ");
1629 for (iop = 0; iop < nop; ++iop) {
1630 printf("%d ", (int)NAD_STRIDES(axisdata)[iop]);
1631 }
1632 printf("\n");
1633 if (itflags&NPY_ITFLAG_HASINDEX) {
1634 printf("| Index Stride: %d\n", (int)NAD_STRIDES(axisdata)[nop]);
1635 }
1636 printf("| Ptrs: ");
1637 for (iop = 0; iop < nop; ++iop) {
1638 printf("%p ", (void *)NAD_PTRS(axisdata)[iop]);
1639 }
1640 printf("\n");
1641 if (itflags&NPY_ITFLAG_HASINDEX) {
1642 printf("| Index Value: %d\n",
1643 (int)((npy_intp*)NAD_PTRS(axisdata))[nop]);
1644 }
1645 }
1646
1647 printf("------- END ITERATOR DUMP -------\n");
1648 fflush(stdout);
1649
1650 NPY_DISABLE_C_API
1651 }
1652
1653 NPY_NO_EXPORT void
npyiter_coalesce_axes(NpyIter * iter)1654 npyiter_coalesce_axes(NpyIter *iter)
1655 {
1656 npy_uint32 itflags = NIT_ITFLAGS(iter);
1657 int idim, ndim = NIT_NDIM(iter);
1658 int nop = NIT_NOP(iter);
1659
1660 npy_intp istrides, nstrides = NAD_NSTRIDES();
1661 NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
1662 npy_intp sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
1663 NpyIter_AxisData *ad_compress = axisdata;
1664 npy_intp new_ndim = 1;
1665
1666 /* The HASMULTIINDEX or IDENTPERM flags do not apply after coalescing */
1667 NIT_ITFLAGS(iter) &= ~(NPY_ITFLAG_IDENTPERM|NPY_ITFLAG_HASMULTIINDEX);
1668
1669 for (idim = 0; idim < ndim-1; ++idim) {
1670 int can_coalesce = 1;
1671 npy_intp shape0 = NAD_SHAPE(ad_compress);
1672 npy_intp shape1 = NAD_SHAPE(NIT_INDEX_AXISDATA(axisdata, 1));
1673 npy_intp *strides0 = NAD_STRIDES(ad_compress);
1674 npy_intp *strides1 = NAD_STRIDES(NIT_INDEX_AXISDATA(axisdata, 1));
1675
1676 /* Check that all the axes can be coalesced */
1677 for (istrides = 0; istrides < nstrides; ++istrides) {
1678 if (!((shape0 == 1 && strides0[istrides] == 0) ||
1679 (shape1 == 1 && strides1[istrides] == 0)) &&
1680 (strides0[istrides]*shape0 != strides1[istrides])) {
1681 can_coalesce = 0;
1682 break;
1683 }
1684 }
1685
1686 if (can_coalesce) {
1687 npy_intp *strides = NAD_STRIDES(ad_compress);
1688
1689 NIT_ADVANCE_AXISDATA(axisdata, 1);
1690 NAD_SHAPE(ad_compress) *= NAD_SHAPE(axisdata);
1691 for (istrides = 0; istrides < nstrides; ++istrides) {
1692 if (strides[istrides] == 0) {
1693 strides[istrides] = NAD_STRIDES(axisdata)[istrides];
1694 }
1695 }
1696 }
1697 else {
1698 NIT_ADVANCE_AXISDATA(axisdata, 1);
1699 NIT_ADVANCE_AXISDATA(ad_compress, 1);
1700 if (ad_compress != axisdata) {
1701 memcpy(ad_compress, axisdata, sizeof_axisdata);
1702 }
1703 ++new_ndim;
1704 }
1705 }
1706
1707 /*
1708 * If the number of axes shrunk, reset the perm and
1709 * compress the data into the new layout.
1710 */
1711 if (new_ndim < ndim) {
1712 npy_int8 *perm = NIT_PERM(iter);
1713
1714 /* Reset to an identity perm */
1715 for (idim = 0; idim < new_ndim; ++idim) {
1716 perm[idim] = (npy_int8)idim;
1717 }
1718 NIT_NDIM(iter) = new_ndim;
1719 }
1720 }
1721
1722 /*
1723 * If errmsg is non-NULL, it should point to a variable which will
1724 * receive the error message, and no Python exception will be set.
1725 * This is so that the function can be called from code not holding
1726 * the GIL.
1727 */
1728 NPY_NO_EXPORT int
npyiter_allocate_buffers(NpyIter * iter,char ** errmsg)1729 npyiter_allocate_buffers(NpyIter *iter, char **errmsg)
1730 {
1731 /*npy_uint32 itflags = NIT_ITFLAGS(iter);*/
1732 /*int ndim = NIT_NDIM(iter);*/
1733 int iop = 0, nop = NIT_NOP(iter);
1734
1735 npy_intp i;
1736 npyiter_opitflags *op_itflags = NIT_OPITFLAGS(iter);
1737 NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter);
1738 PyArray_Descr **op_dtype = NIT_DTYPES(iter);
1739 npy_intp buffersize = NBF_BUFFERSIZE(bufferdata);
1740 char *buffer, **buffers = NBF_BUFFERS(bufferdata);
1741
1742 for (iop = 0; iop < nop; ++iop) {
1743 npyiter_opitflags flags = op_itflags[iop];
1744
1745 /*
1746 * If we have determined that a buffer may be needed,
1747 * allocate one.
1748 */
1749 if (!(flags&NPY_OP_ITFLAG_BUFNEVER)) {
1750 npy_intp itemsize = op_dtype[iop]->elsize;
1751 buffer = PyArray_malloc(itemsize*buffersize);
1752 if (buffer == NULL) {
1753 if (errmsg == NULL) {
1754 PyErr_NoMemory();
1755 }
1756 else {
1757 *errmsg = "out of memory";
1758 }
1759 goto fail;
1760 }
1761 if (PyDataType_FLAGCHK(op_dtype[iop], NPY_NEEDS_INIT)) {
1762 memset(buffer, '\0', itemsize*buffersize);
1763 }
1764 buffers[iop] = buffer;
1765 }
1766 }
1767
1768 return 1;
1769
1770 fail:
1771 for (i = 0; i < iop; ++i) {
1772 if (buffers[i] != NULL) {
1773 PyArray_free(buffers[i]);
1774 buffers[i] = NULL;
1775 }
1776 }
1777 return 0;
1778 }
1779
1780 /*
1781 * This sets the AXISDATA portion of the iterator to the specified
1782 * iterindex, updating the pointers as well. This function does
1783 * no error checking.
1784 */
1785 NPY_NO_EXPORT void
npyiter_goto_iterindex(NpyIter * iter,npy_intp iterindex)1786 npyiter_goto_iterindex(NpyIter *iter, npy_intp iterindex)
1787 {
1788 npy_uint32 itflags = NIT_ITFLAGS(iter);
1789 int idim, ndim = NIT_NDIM(iter);
1790 int nop = NIT_NOP(iter);
1791
1792 char **dataptr;
1793 NpyIter_AxisData *axisdata;
1794 npy_intp sizeof_axisdata;
1795 npy_intp istrides, nstrides, i, shape;
1796
1797 axisdata = NIT_AXISDATA(iter);
1798 sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
1799 nstrides = NAD_NSTRIDES();
1800
1801 NIT_ITERINDEX(iter) = iterindex;
1802
1803 ndim = ndim ? ndim : 1;
1804
1805 if (iterindex == 0) {
1806 dataptr = NIT_RESETDATAPTR(iter);
1807
1808 for (idim = 0; idim < ndim; ++idim) {
1809 char **ptrs;
1810 NAD_INDEX(axisdata) = 0;
1811 ptrs = NAD_PTRS(axisdata);
1812 for (istrides = 0; istrides < nstrides; ++istrides) {
1813 ptrs[istrides] = dataptr[istrides];
1814 }
1815
1816 NIT_ADVANCE_AXISDATA(axisdata, 1);
1817 }
1818 }
1819 else {
1820 /*
1821 * Set the multi-index, from the fastest-changing to the
1822 * slowest-changing.
1823 */
1824 axisdata = NIT_AXISDATA(iter);
1825 shape = NAD_SHAPE(axisdata);
1826 i = iterindex;
1827 iterindex /= shape;
1828 NAD_INDEX(axisdata) = i - iterindex * shape;
1829 for (idim = 0; idim < ndim-1; ++idim) {
1830 NIT_ADVANCE_AXISDATA(axisdata, 1);
1831
1832 shape = NAD_SHAPE(axisdata);
1833 i = iterindex;
1834 iterindex /= shape;
1835 NAD_INDEX(axisdata) = i - iterindex * shape;
1836 }
1837
1838 dataptr = NIT_RESETDATAPTR(iter);
1839
1840 /*
1841 * Accumulate the successive pointers with their
1842 * offsets in the opposite order, starting from the
1843 * original data pointers.
1844 */
1845 for (idim = 0; idim < ndim; ++idim) {
1846 npy_intp *strides;
1847 char **ptrs;
1848
1849 strides = NAD_STRIDES(axisdata);
1850 ptrs = NAD_PTRS(axisdata);
1851
1852 i = NAD_INDEX(axisdata);
1853
1854 for (istrides = 0; istrides < nstrides; ++istrides) {
1855 ptrs[istrides] = dataptr[istrides] + i*strides[istrides];
1856 }
1857
1858 dataptr = ptrs;
1859
1860 NIT_ADVANCE_AXISDATA(axisdata, -1);
1861 }
1862 }
1863 }
1864
1865 /*
1866 * This gets called after the buffers have been exhausted, and
1867 * their data needs to be written back to the arrays. The multi-index
1868 * must be positioned for the beginning of the buffer.
1869 */
1870 NPY_NO_EXPORT int
npyiter_copy_from_buffers(NpyIter * iter)1871 npyiter_copy_from_buffers(NpyIter *iter)
1872 {
1873 npy_uint32 itflags = NIT_ITFLAGS(iter);
1874 int ndim = NIT_NDIM(iter);
1875 int iop, nop = NIT_NOP(iter);
1876 int maskop = NIT_MASKOP(iter);
1877
1878 npyiter_opitflags *op_itflags = NIT_OPITFLAGS(iter);
1879 NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter);
1880 NpyIter_AxisData *axisdata = NIT_AXISDATA(iter),
1881 *reduce_outeraxisdata = NULL;
1882
1883 PyArray_Descr **dtypes = NIT_DTYPES(iter);
1884 npy_intp transfersize = NBF_SIZE(bufferdata);
1885 npy_intp *strides = NBF_STRIDES(bufferdata),
1886 *ad_strides = NAD_STRIDES(axisdata);
1887 npy_intp sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
1888 char **ad_ptrs = NAD_PTRS(axisdata);
1889 char **buffers = NBF_BUFFERS(bufferdata);
1890 char *buffer;
1891
1892 npy_intp reduce_outerdim = 0;
1893 npy_intp *reduce_outerstrides = NULL;
1894
1895 PyArray_StridedUnaryOp *stransfer = NULL;
1896 NpyAuxData *transferdata = NULL;
1897
1898 npy_intp axisdata_incr = NIT_AXISDATA_SIZEOF(itflags, ndim, nop) /
1899 NPY_SIZEOF_INTP;
1900
1901 /* If we're past the end, nothing to copy */
1902 if (NBF_SIZE(bufferdata) == 0) {
1903 return 0;
1904 }
1905
1906 NPY_IT_DBG_PRINT("Iterator: Copying buffers to outputs\n");
1907
1908 if (itflags&NPY_ITFLAG_REDUCE) {
1909 reduce_outerdim = NBF_REDUCE_OUTERDIM(bufferdata);
1910 reduce_outerstrides = NBF_REDUCE_OUTERSTRIDES(bufferdata);
1911 reduce_outeraxisdata = NIT_INDEX_AXISDATA(axisdata, reduce_outerdim);
1912 transfersize *= NBF_REDUCE_OUTERSIZE(bufferdata);
1913 }
1914
1915 for (iop = 0; iop < nop; ++iop) {
1916 stransfer = NBF_WRITETRANSFERFN(bufferdata)[iop];
1917 transferdata = NBF_WRITETRANSFERDATA(bufferdata)[iop];
1918 buffer = buffers[iop];
1919 /*
1920 * Copy the data back to the arrays. If the type has refs,
1921 * this function moves them so the buffer's refs are released.
1922 *
1923 * The flag USINGBUFFER is set when the buffer was used, so
1924 * only copy back when this flag is on.
1925 */
1926 if ((stransfer != NULL) &&
1927 (op_itflags[iop]&(NPY_OP_ITFLAG_WRITE|NPY_OP_ITFLAG_USINGBUFFER))
1928 == (NPY_OP_ITFLAG_WRITE|NPY_OP_ITFLAG_USINGBUFFER)) {
1929 npy_intp op_transfersize;
1930
1931 npy_intp src_stride, *dst_strides, *dst_coords, *dst_shape;
1932 int ndim_transfer;
1933
1934 NPY_IT_DBG_PRINT1("Iterator: Operand %d was buffered\n",
1935 (int)iop);
1936
1937 /*
1938 * If this operand is being reduced in the inner loop,
1939 * its buffering stride was set to zero, and just
1940 * one element was copied.
1941 */
1942 if (op_itflags[iop]&NPY_OP_ITFLAG_REDUCE) {
1943 if (strides[iop] == 0) {
1944 if (reduce_outerstrides[iop] == 0) {
1945 op_transfersize = 1;
1946 src_stride = 0;
1947 dst_strides = &src_stride;
1948 dst_coords = &NAD_INDEX(reduce_outeraxisdata);
1949 dst_shape = &NAD_SHAPE(reduce_outeraxisdata);
1950 ndim_transfer = 1;
1951 }
1952 else {
1953 op_transfersize = NBF_REDUCE_OUTERSIZE(bufferdata);
1954 src_stride = reduce_outerstrides[iop];
1955 dst_strides =
1956 &NAD_STRIDES(reduce_outeraxisdata)[iop];
1957 dst_coords = &NAD_INDEX(reduce_outeraxisdata);
1958 dst_shape = &NAD_SHAPE(reduce_outeraxisdata);
1959 ndim_transfer = ndim - reduce_outerdim;
1960 }
1961 }
1962 else {
1963 if (reduce_outerstrides[iop] == 0) {
1964 op_transfersize = NBF_SIZE(bufferdata);
1965 src_stride = strides[iop];
1966 dst_strides = &ad_strides[iop];
1967 dst_coords = &NAD_INDEX(axisdata);
1968 dst_shape = &NAD_SHAPE(axisdata);
1969 ndim_transfer = reduce_outerdim ?
1970 reduce_outerdim : 1;
1971 }
1972 else {
1973 op_transfersize = transfersize;
1974 src_stride = strides[iop];
1975 dst_strides = &ad_strides[iop];
1976 dst_coords = &NAD_INDEX(axisdata);
1977 dst_shape = &NAD_SHAPE(axisdata);
1978 ndim_transfer = ndim;
1979 }
1980 }
1981 }
1982 else {
1983 op_transfersize = transfersize;
1984 src_stride = strides[iop];
1985 dst_strides = &ad_strides[iop];
1986 dst_coords = &NAD_INDEX(axisdata);
1987 dst_shape = &NAD_SHAPE(axisdata);
1988 ndim_transfer = ndim;
1989 }
1990
1991 NPY_IT_DBG_PRINT2("Iterator: Copying buffer to "
1992 "operand %d (%d items)\n",
1993 (int)iop, (int)op_transfersize);
1994
1995 /* WRITEMASKED operand */
1996 if (op_itflags[iop] & NPY_OP_ITFLAG_WRITEMASKED) {
1997 npy_bool *maskptr;
1998
1999 /*
2000 * The mask pointer may be in the buffer or in
2001 * the array, detect which one.
2002 */
2003 if ((op_itflags[maskop]&NPY_OP_ITFLAG_USINGBUFFER) != 0) {
2004 maskptr = (npy_bool *)buffers[maskop];
2005 }
2006 else {
2007 maskptr = (npy_bool *)ad_ptrs[maskop];
2008 }
2009
2010 if (PyArray_TransferMaskedStridedToNDim(ndim_transfer,
2011 ad_ptrs[iop], dst_strides, axisdata_incr,
2012 buffer, src_stride,
2013 maskptr, strides[maskop],
2014 dst_coords, axisdata_incr,
2015 dst_shape, axisdata_incr,
2016 op_transfersize, dtypes[iop]->elsize,
2017 (PyArray_MaskedStridedUnaryOp *)stransfer,
2018 transferdata) < 0) {
2019 return -1;
2020 }
2021 }
2022 /* Regular operand */
2023 else {
2024 if (PyArray_TransferStridedToNDim(ndim_transfer,
2025 ad_ptrs[iop], dst_strides, axisdata_incr,
2026 buffer, src_stride,
2027 dst_coords, axisdata_incr,
2028 dst_shape, axisdata_incr,
2029 op_transfersize, dtypes[iop]->elsize,
2030 stransfer,
2031 transferdata) < 0) {
2032 return -1;
2033 }
2034 }
2035 }
2036 /* If there's no copy back, we may have to decrement refs. In
2037 * this case, the transfer function has a 'decsrcref' transfer
2038 * function, so we can use it to do the decrement.
2039 *
2040 * The flag USINGBUFFER is set when the buffer was used, so
2041 * only decrement refs when this flag is on.
2042 */
2043 else if (stransfer != NULL &&
2044 (op_itflags[iop]&NPY_OP_ITFLAG_USINGBUFFER) != 0) {
2045 NPY_IT_DBG_PRINT1("Iterator: Freeing refs and zeroing buffer "
2046 "of operand %d\n", (int)iop);
2047 /* Decrement refs */
2048 if (stransfer(NULL, 0, buffer, dtypes[iop]->elsize,
2049 transfersize, dtypes[iop]->elsize,
2050 transferdata) < 0) {
2051 /* Since this should only decrement, it should never error */
2052 assert(0);
2053 return -1;
2054 }
2055 /*
2056 * Zero out the memory for safety. For instance,
2057 * if during iteration some Python code copied an
2058 * array pointing into the buffer, it will get None
2059 * values for its references after this.
2060 */
2061 memset(buffer, 0, dtypes[iop]->elsize*transfersize);
2062 }
2063 }
2064
2065 NPY_IT_DBG_PRINT("Iterator: Finished copying buffers to outputs\n");
2066 return 0;
2067 }
2068
2069 /*
2070 * This gets called after the iterator has been positioned to a multi-index
2071 * for the start of a buffer. It decides which operands need a buffer,
2072 * and copies the data into the buffers.
2073 */
2074 NPY_NO_EXPORT int
npyiter_copy_to_buffers(NpyIter * iter,char ** prev_dataptrs)2075 npyiter_copy_to_buffers(NpyIter *iter, char **prev_dataptrs)
2076 {
2077 npy_uint32 itflags = NIT_ITFLAGS(iter);
2078 int ndim = NIT_NDIM(iter);
2079 int iop, nop = NIT_NOP(iter);
2080
2081 npyiter_opitflags *op_itflags = NIT_OPITFLAGS(iter);
2082 NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter);
2083 NpyIter_AxisData *axisdata = NIT_AXISDATA(iter),
2084 *reduce_outeraxisdata = NULL;
2085
2086 PyArray_Descr **dtypes = NIT_DTYPES(iter);
2087 PyArrayObject **operands = NIT_OPERANDS(iter);
2088 npy_intp *strides = NBF_STRIDES(bufferdata),
2089 *ad_strides = NAD_STRIDES(axisdata);
2090 npy_intp sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
2091 char **ptrs = NBF_PTRS(bufferdata), **ad_ptrs = NAD_PTRS(axisdata);
2092 char **buffers = NBF_BUFFERS(bufferdata);
2093 npy_intp iterindex, iterend, transfersize,
2094 singlestridesize, reduce_innersize = 0, reduce_outerdim = 0;
2095 int is_onestride = 0, any_buffered = 0;
2096
2097 npy_intp *reduce_outerstrides = NULL;
2098 char **reduce_outerptrs = NULL;
2099
2100 PyArray_StridedUnaryOp *stransfer = NULL;
2101 NpyAuxData *transferdata = NULL;
2102
2103 /*
2104 * Have to get this flag before npyiter_checkreducesize sets
2105 * it for the next iteration.
2106 */
2107 npy_bool reuse_reduce_loops = (prev_dataptrs != NULL) &&
2108 ((itflags&NPY_ITFLAG_REUSE_REDUCE_LOOPS) != 0);
2109
2110 npy_intp axisdata_incr = NIT_AXISDATA_SIZEOF(itflags, ndim, nop) /
2111 NPY_SIZEOF_INTP;
2112
2113 NPY_IT_DBG_PRINT("Iterator: Copying inputs to buffers\n");
2114
2115 /* Calculate the size if using any buffers */
2116 iterindex = NIT_ITERINDEX(iter);
2117 iterend = NIT_ITEREND(iter);
2118 transfersize = NBF_BUFFERSIZE(bufferdata);
2119 if (transfersize > iterend - iterindex) {
2120 transfersize = iterend - iterindex;
2121 }
2122
2123 /* If last time around, the reduce loop structure was full, we reuse it */
2124 if (reuse_reduce_loops) {
2125 npy_intp full_transfersize, prev_reduce_outersize;
2126
2127 prev_reduce_outersize = NBF_REDUCE_OUTERSIZE(bufferdata);
2128 reduce_outerstrides = NBF_REDUCE_OUTERSTRIDES(bufferdata);
2129 reduce_outerptrs = NBF_REDUCE_OUTERPTRS(bufferdata);
2130 reduce_outerdim = NBF_REDUCE_OUTERDIM(bufferdata);
2131 reduce_outeraxisdata = NIT_INDEX_AXISDATA(axisdata, reduce_outerdim);
2132 reduce_innersize = NBF_SIZE(bufferdata);
2133 NBF_REDUCE_POS(bufferdata) = 0;
2134 /*
2135 * Try to do make the outersize as big as possible. This allows
2136 * it to shrink when processing the last bit of the outer reduce loop,
2137 * then grow again at the beginnning of the next outer reduce loop.
2138 */
2139 NBF_REDUCE_OUTERSIZE(bufferdata) = (NAD_SHAPE(reduce_outeraxisdata)-
2140 NAD_INDEX(reduce_outeraxisdata));
2141 full_transfersize = NBF_REDUCE_OUTERSIZE(bufferdata)*reduce_innersize;
2142 /* If the full transfer size doesn't fit in the buffer, truncate it */
2143 if (full_transfersize > NBF_BUFFERSIZE(bufferdata)) {
2144 NBF_REDUCE_OUTERSIZE(bufferdata) = transfersize/reduce_innersize;
2145 transfersize = NBF_REDUCE_OUTERSIZE(bufferdata)*reduce_innersize;
2146 }
2147 else {
2148 transfersize = full_transfersize;
2149 }
2150 if (prev_reduce_outersize < NBF_REDUCE_OUTERSIZE(bufferdata)) {
2151 /*
2152 * If the previous time around less data was copied it may not
2153 * be safe to reuse the buffers even if the pointers match.
2154 */
2155 reuse_reduce_loops = 0;
2156 }
2157 NBF_BUFITEREND(bufferdata) = iterindex + reduce_innersize;
2158
2159 NPY_IT_DBG_PRINT3("Reused reduce transfersize: %d innersize: %d "
2160 "itersize: %d\n",
2161 (int)transfersize,
2162 (int)reduce_innersize,
2163 (int)NpyIter_GetIterSize(iter));
2164 NPY_IT_DBG_PRINT1("Reduced reduce outersize: %d",
2165 (int)NBF_REDUCE_OUTERSIZE(bufferdata));
2166 }
2167 /*
2168 * If there are any reduction operands, we may have to make
2169 * the size smaller so we don't copy the same value into
2170 * a buffer twice, as the buffering does not have a mechanism
2171 * to combine values itself.
2172 */
2173 else if (itflags&NPY_ITFLAG_REDUCE) {
2174 NPY_IT_DBG_PRINT("Iterator: Calculating reduce loops\n");
2175 transfersize = npyiter_checkreducesize(iter, transfersize,
2176 &reduce_innersize,
2177 &reduce_outerdim);
2178 NPY_IT_DBG_PRINT3("Reduce transfersize: %d innersize: %d "
2179 "itersize: %d\n",
2180 (int)transfersize,
2181 (int)reduce_innersize,
2182 (int)NpyIter_GetIterSize(iter));
2183
2184 reduce_outerstrides = NBF_REDUCE_OUTERSTRIDES(bufferdata);
2185 reduce_outerptrs = NBF_REDUCE_OUTERPTRS(bufferdata);
2186 reduce_outeraxisdata = NIT_INDEX_AXISDATA(axisdata, reduce_outerdim);
2187 NBF_SIZE(bufferdata) = reduce_innersize;
2188 NBF_REDUCE_POS(bufferdata) = 0;
2189 NBF_REDUCE_OUTERDIM(bufferdata) = reduce_outerdim;
2190 NBF_BUFITEREND(bufferdata) = iterindex + reduce_innersize;
2191 if (reduce_innersize == 0) {
2192 NBF_REDUCE_OUTERSIZE(bufferdata) = 0;
2193 return 0;
2194 }
2195 else {
2196 NBF_REDUCE_OUTERSIZE(bufferdata) = transfersize/reduce_innersize;
2197 }
2198 }
2199 else {
2200 NBF_SIZE(bufferdata) = transfersize;
2201 NBF_BUFITEREND(bufferdata) = iterindex + transfersize;
2202 }
2203
2204 /* Calculate the maximum size if using a single stride and no buffers */
2205 singlestridesize = NAD_SHAPE(axisdata)-NAD_INDEX(axisdata);
2206 if (singlestridesize > iterend - iterindex) {
2207 singlestridesize = iterend - iterindex;
2208 }
2209 if (singlestridesize >= transfersize) {
2210 is_onestride = 1;
2211 }
2212
2213 for (iop = 0; iop < nop; ++iop) {
2214 /*
2215 * If the buffer is write-only, these two are NULL, and the buffer
2216 * pointers will be set up but the read copy won't be done
2217 */
2218 stransfer = NBF_READTRANSFERFN(bufferdata)[iop];
2219 transferdata = NBF_READTRANSFERDATA(bufferdata)[iop];
2220 switch (op_itflags[iop]&
2221 (NPY_OP_ITFLAG_BUFNEVER|
2222 NPY_OP_ITFLAG_CAST|
2223 NPY_OP_ITFLAG_REDUCE)) {
2224 /* Never need to buffer this operand */
2225 case NPY_OP_ITFLAG_BUFNEVER:
2226 ptrs[iop] = ad_ptrs[iop];
2227 if (itflags&NPY_ITFLAG_REDUCE) {
2228 reduce_outerstrides[iop] = reduce_innersize *
2229 strides[iop];
2230 reduce_outerptrs[iop] = ptrs[iop];
2231 }
2232 /*
2233 * Should not adjust the stride - ad_strides[iop]
2234 * could be zero, but strides[iop] was initialized
2235 * to the first non-trivial stride.
2236 */
2237 stransfer = NULL;
2238 /* The flag NPY_OP_ITFLAG_USINGBUFFER can be ignored here */
2239 break;
2240 /* Never need to buffer this operand */
2241 case NPY_OP_ITFLAG_BUFNEVER|NPY_OP_ITFLAG_REDUCE:
2242 ptrs[iop] = ad_ptrs[iop];
2243 reduce_outerptrs[iop] = ptrs[iop];
2244 reduce_outerstrides[iop] = 0;
2245 /*
2246 * Should not adjust the stride - ad_strides[iop]
2247 * could be zero, but strides[iop] was initialized
2248 * to the first non-trivial stride.
2249 */
2250 stransfer = NULL;
2251 /* The flag NPY_OP_ITFLAG_USINGBUFFER can be ignored here */
2252 break;
2253 /* Just a copy */
2254 case 0:
2255 /* Do not reuse buffer if it did not exist */
2256 if (!(op_itflags[iop] & NPY_OP_ITFLAG_USINGBUFFER) &&
2257 (prev_dataptrs != NULL)) {
2258 prev_dataptrs[iop] = NULL;
2259 }
2260 /*
2261 * No copyswap or cast was requested, so all we're
2262 * doing is copying the data to fill the buffer and
2263 * produce a single stride. If the underlying data
2264 * already does that, no need to copy it.
2265 */
2266 if (is_onestride) {
2267 ptrs[iop] = ad_ptrs[iop];
2268 strides[iop] = ad_strides[iop];
2269 stransfer = NULL;
2270 /* Signal that the buffer is not being used */
2271 op_itflags[iop] &= (~NPY_OP_ITFLAG_USINGBUFFER);
2272 }
2273 /* If some other op is reduced, we have a double reduce loop */
2274 else if ((itflags&NPY_ITFLAG_REDUCE) &&
2275 (reduce_outerdim == 1) &&
2276 (transfersize/reduce_innersize <=
2277 NAD_SHAPE(reduce_outeraxisdata) -
2278 NAD_INDEX(reduce_outeraxisdata))) {
2279 ptrs[iop] = ad_ptrs[iop];
2280 reduce_outerptrs[iop] = ptrs[iop];
2281 strides[iop] = ad_strides[iop];
2282 reduce_outerstrides[iop] =
2283 NAD_STRIDES(reduce_outeraxisdata)[iop];
2284 stransfer = NULL;
2285 /* Signal that the buffer is not being used */
2286 op_itflags[iop] &= (~NPY_OP_ITFLAG_USINGBUFFER);
2287 }
2288 else {
2289 /* In this case, the buffer is being used */
2290 ptrs[iop] = buffers[iop];
2291 strides[iop] = dtypes[iop]->elsize;
2292 if (itflags&NPY_ITFLAG_REDUCE) {
2293 reduce_outerstrides[iop] = reduce_innersize *
2294 strides[iop];
2295 reduce_outerptrs[iop] = ptrs[iop];
2296 }
2297 /* Signal that the buffer is being used */
2298 op_itflags[iop] |= NPY_OP_ITFLAG_USINGBUFFER;
2299 }
2300 break;
2301 /* Just a copy, but with a reduction */
2302 case NPY_OP_ITFLAG_REDUCE:
2303 /* Do not reuse buffer if it did not exist */
2304 if (!(op_itflags[iop] & NPY_OP_ITFLAG_USINGBUFFER) &&
2305 (prev_dataptrs != NULL)) {
2306 prev_dataptrs[iop] = NULL;
2307 }
2308 if (ad_strides[iop] == 0) {
2309 strides[iop] = 0;
2310 /* It's all in one stride in the inner loop dimension */
2311 if (is_onestride) {
2312 NPY_IT_DBG_PRINT1("reduce op %d all one stride\n", (int)iop);
2313 ptrs[iop] = ad_ptrs[iop];
2314 reduce_outerstrides[iop] = 0;
2315 stransfer = NULL;
2316 /* Signal that the buffer is not being used */
2317 op_itflags[iop] &= (~NPY_OP_ITFLAG_USINGBUFFER);
2318 }
2319 /* It's all in one stride in the reduce outer loop */
2320 else if ((reduce_outerdim > 0) &&
2321 (transfersize/reduce_innersize <=
2322 NAD_SHAPE(reduce_outeraxisdata) -
2323 NAD_INDEX(reduce_outeraxisdata))) {
2324 NPY_IT_DBG_PRINT1("reduce op %d all one outer stride\n",
2325 (int)iop);
2326 ptrs[iop] = ad_ptrs[iop];
2327 /* Outer reduce loop advances by one item */
2328 reduce_outerstrides[iop] =
2329 NAD_STRIDES(reduce_outeraxisdata)[iop];
2330 stransfer = NULL;
2331 /* Signal that the buffer is not being used */
2332 op_itflags[iop] &= (~NPY_OP_ITFLAG_USINGBUFFER);
2333 }
2334 /* In this case, the buffer is being used */
2335 else {
2336 NPY_IT_DBG_PRINT1("reduce op %d must buffer\n", (int)iop);
2337 ptrs[iop] = buffers[iop];
2338 /* Both outer and inner reduce loops have stride 0 */
2339 if (NAD_STRIDES(reduce_outeraxisdata)[iop] == 0) {
2340 reduce_outerstrides[iop] = 0;
2341 }
2342 /* Outer reduce loop advances by one item */
2343 else {
2344 reduce_outerstrides[iop] = dtypes[iop]->elsize;
2345 }
2346 /* Signal that the buffer is being used */
2347 op_itflags[iop] |= NPY_OP_ITFLAG_USINGBUFFER;
2348 }
2349
2350 }
2351 else if (is_onestride) {
2352 NPY_IT_DBG_PRINT1("reduce op %d all one stride in dim 0\n", (int)iop);
2353 ptrs[iop] = ad_ptrs[iop];
2354 strides[iop] = ad_strides[iop];
2355 reduce_outerstrides[iop] = 0;
2356 stransfer = NULL;
2357 /* Signal that the buffer is not being used */
2358 op_itflags[iop] &= (~NPY_OP_ITFLAG_USINGBUFFER);
2359 }
2360 else {
2361 /* It's all in one stride in the reduce outer loop */
2362 if ((reduce_outerdim == 1) &&
2363 (transfersize/reduce_innersize <=
2364 NAD_SHAPE(reduce_outeraxisdata) -
2365 NAD_INDEX(reduce_outeraxisdata))) {
2366 ptrs[iop] = ad_ptrs[iop];
2367 strides[iop] = ad_strides[iop];
2368 /* Outer reduce loop advances by one item */
2369 reduce_outerstrides[iop] =
2370 NAD_STRIDES(reduce_outeraxisdata)[iop];
2371 stransfer = NULL;
2372 /* Signal that the buffer is not being used */
2373 op_itflags[iop] &= (~NPY_OP_ITFLAG_USINGBUFFER);
2374 }
2375 /* In this case, the buffer is being used */
2376 else {
2377 ptrs[iop] = buffers[iop];
2378 strides[iop] = dtypes[iop]->elsize;
2379
2380 if (NAD_STRIDES(reduce_outeraxisdata)[iop] == 0) {
2381 /* Reduction in outer reduce loop */
2382 reduce_outerstrides[iop] = 0;
2383 }
2384 else {
2385 /* Advance to next items in outer reduce loop */
2386 reduce_outerstrides[iop] = reduce_innersize *
2387 dtypes[iop]->elsize;
2388 }
2389 /* Signal that the buffer is being used */
2390 op_itflags[iop] |= NPY_OP_ITFLAG_USINGBUFFER;
2391 }
2392 }
2393 reduce_outerptrs[iop] = ptrs[iop];
2394 break;
2395 default:
2396 /* In this case, the buffer is always being used */
2397 any_buffered = 1;
2398
2399 /* Signal that the buffer is being used */
2400 op_itflags[iop] |= NPY_OP_ITFLAG_USINGBUFFER;
2401
2402 if (!(op_itflags[iop]&NPY_OP_ITFLAG_REDUCE)) {
2403 ptrs[iop] = buffers[iop];
2404 strides[iop] = dtypes[iop]->elsize;
2405 if (itflags&NPY_ITFLAG_REDUCE) {
2406 reduce_outerstrides[iop] = reduce_innersize *
2407 strides[iop];
2408 reduce_outerptrs[iop] = ptrs[iop];
2409 }
2410 }
2411 /* The buffer is being used with reduction */
2412 else {
2413 ptrs[iop] = buffers[iop];
2414 if (ad_strides[iop] == 0) {
2415 NPY_IT_DBG_PRINT1("cast op %d has innermost stride 0\n", (int)iop);
2416 strides[iop] = 0;
2417 /* Both outer and inner reduce loops have stride 0 */
2418 if (NAD_STRIDES(reduce_outeraxisdata)[iop] == 0) {
2419 NPY_IT_DBG_PRINT1("cast op %d has outermost stride 0\n", (int)iop);
2420 reduce_outerstrides[iop] = 0;
2421 }
2422 /* Outer reduce loop advances by one item */
2423 else {
2424 NPY_IT_DBG_PRINT1("cast op %d has outermost stride !=0\n", (int)iop);
2425 reduce_outerstrides[iop] = dtypes[iop]->elsize;
2426 }
2427 }
2428 else {
2429 NPY_IT_DBG_PRINT1("cast op %d has innermost stride !=0\n", (int)iop);
2430 strides[iop] = dtypes[iop]->elsize;
2431
2432 if (NAD_STRIDES(reduce_outeraxisdata)[iop] == 0) {
2433 NPY_IT_DBG_PRINT1("cast op %d has outermost stride 0\n", (int)iop);
2434 /* Reduction in outer reduce loop */
2435 reduce_outerstrides[iop] = 0;
2436 }
2437 else {
2438 NPY_IT_DBG_PRINT1("cast op %d has outermost stride !=0\n", (int)iop);
2439 /* Advance to next items in outer reduce loop */
2440 reduce_outerstrides[iop] = reduce_innersize *
2441 dtypes[iop]->elsize;
2442 }
2443 }
2444 reduce_outerptrs[iop] = ptrs[iop];
2445 }
2446 break;
2447 }
2448
2449 if (stransfer != NULL) {
2450 npy_intp src_itemsize;
2451 npy_intp op_transfersize;
2452
2453 npy_intp dst_stride, *src_strides, *src_coords, *src_shape;
2454 int ndim_transfer;
2455
2456 npy_bool skip_transfer = 0;
2457
2458 src_itemsize = PyArray_DTYPE(operands[iop])->elsize;
2459
2460 /* If stransfer wasn't set to NULL, buffering is required */
2461 any_buffered = 1;
2462
2463 /*
2464 * If this operand is being reduced in the inner loop,
2465 * set its buffering stride to zero, and just copy
2466 * one element.
2467 */
2468 if (op_itflags[iop]&NPY_OP_ITFLAG_REDUCE) {
2469 if (ad_strides[iop] == 0) {
2470 strides[iop] = 0;
2471 if (reduce_outerstrides[iop] == 0) {
2472 op_transfersize = 1;
2473 dst_stride = 0;
2474 src_strides = &dst_stride;
2475 src_coords = &NAD_INDEX(reduce_outeraxisdata);
2476 src_shape = &NAD_SHAPE(reduce_outeraxisdata);
2477 ndim_transfer = 1;
2478
2479 /*
2480 * When we're reducing a single element, and
2481 * it's still the same element, don't overwrite
2482 * it even when reuse reduce loops is unset.
2483 * This preserves the precision of the
2484 * intermediate calculation.
2485 */
2486 if (prev_dataptrs &&
2487 prev_dataptrs[iop] == ad_ptrs[iop]) {
2488 NPY_IT_DBG_PRINT1("Iterator: skipping operand %d"
2489 " copy because it's a 1-element reduce\n",
2490 (int)iop);
2491
2492 skip_transfer = 1;
2493 }
2494 }
2495 else {
2496 op_transfersize = NBF_REDUCE_OUTERSIZE(bufferdata);
2497 dst_stride = reduce_outerstrides[iop];
2498 src_strides = &NAD_STRIDES(reduce_outeraxisdata)[iop];
2499 src_coords = &NAD_INDEX(reduce_outeraxisdata);
2500 src_shape = &NAD_SHAPE(reduce_outeraxisdata);
2501 ndim_transfer = ndim - reduce_outerdim;
2502 }
2503 }
2504 else {
2505 if (reduce_outerstrides[iop] == 0) {
2506 op_transfersize = NBF_SIZE(bufferdata);
2507 dst_stride = strides[iop];
2508 src_strides = &ad_strides[iop];
2509 src_coords = &NAD_INDEX(axisdata);
2510 src_shape = &NAD_SHAPE(axisdata);
2511 ndim_transfer = reduce_outerdim ? reduce_outerdim : 1;
2512 }
2513 else {
2514 op_transfersize = transfersize;
2515 dst_stride = strides[iop];
2516 src_strides = &ad_strides[iop];
2517 src_coords = &NAD_INDEX(axisdata);
2518 src_shape = &NAD_SHAPE(axisdata);
2519 ndim_transfer = ndim;
2520 }
2521 }
2522 }
2523 else {
2524 op_transfersize = transfersize;
2525 dst_stride = strides[iop];
2526 src_strides = &ad_strides[iop];
2527 src_coords = &NAD_INDEX(axisdata);
2528 src_shape = &NAD_SHAPE(axisdata);
2529 ndim_transfer = ndim;
2530 }
2531
2532 /*
2533 * If the whole buffered loop structure remains the same,
2534 * and the source pointer for this data didn't change,
2535 * we don't have to copy the data again.
2536 */
2537 if (reuse_reduce_loops && prev_dataptrs[iop] == ad_ptrs[iop]) {
2538 NPY_IT_DBG_PRINT2("Iterator: skipping operands %d "
2539 "copy (%d items) because loops are reused and the data "
2540 "pointer didn't change\n",
2541 (int)iop, (int)op_transfersize);
2542 skip_transfer = 1;
2543 }
2544
2545 /*
2546 * Copy data to the buffers if necessary.
2547 *
2548 * We always copy if the operand has references. In that case
2549 * a "write" function must be in use that either copies or clears
2550 * the buffer.
2551 * This write from buffer call does not check for skip-transfer
2552 * so we have to assume the buffer is cleared. For dtypes that
2553 * do not have references, we can assume that the write function
2554 * will leave the source (buffer) unmodified.
2555 */
2556 if (!skip_transfer || PyDataType_REFCHK(dtypes[iop])) {
2557 NPY_IT_DBG_PRINT2("Iterator: Copying operand %d to "
2558 "buffer (%d items)\n",
2559 (int)iop, (int)op_transfersize);
2560
2561 if (PyArray_TransferNDimToStrided(
2562 ndim_transfer, ptrs[iop], dst_stride,
2563 ad_ptrs[iop], src_strides, axisdata_incr,
2564 src_coords, axisdata_incr,
2565 src_shape, axisdata_incr,
2566 op_transfersize, src_itemsize,
2567 stransfer, transferdata) < 0) {
2568 return -1;
2569 }
2570 }
2571 }
2572 }
2573
2574 /*
2575 * If buffering wasn't needed, we can grow the inner
2576 * loop to as large as possible.
2577 *
2578 * TODO: Could grow REDUCE loop too with some more logic above.
2579 */
2580 if (!any_buffered && (itflags&NPY_ITFLAG_GROWINNER) &&
2581 !(itflags&NPY_ITFLAG_REDUCE)) {
2582 if (singlestridesize > transfersize) {
2583 NPY_IT_DBG_PRINT2("Iterator: Expanding inner loop size "
2584 "from %d to %d since buffering wasn't needed\n",
2585 (int)NBF_SIZE(bufferdata), (int)singlestridesize);
2586 NBF_SIZE(bufferdata) = singlestridesize;
2587 NBF_BUFITEREND(bufferdata) = iterindex + singlestridesize;
2588 }
2589 }
2590
2591 NPY_IT_DBG_PRINT1("Any buffering needed: %d\n", any_buffered);
2592
2593 NPY_IT_DBG_PRINT1("Iterator: Finished copying inputs to buffers "
2594 "(buffered size is %d)\n", (int)NBF_SIZE(bufferdata));
2595 return 0;
2596 }
2597
2598
2599 /**
2600 * This function clears any references still held by the buffers and should
2601 * only be used to discard buffers if an error occurred.
2602 *
2603 * @param iter Iterator
2604 */
2605 NPY_NO_EXPORT void
npyiter_clear_buffers(NpyIter * iter)2606 npyiter_clear_buffers(NpyIter *iter)
2607 {
2608 int nop = iter->nop;
2609 NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter);
2610
2611 if (NBF_SIZE(bufferdata) == 0) {
2612 /* if the buffers are empty already, there is nothing to do */
2613 return;
2614 }
2615
2616 if (!(NIT_ITFLAGS(iter) & NPY_ITFLAG_NEEDSAPI)) {
2617 /* Buffers do not require clearing, but should not be copied back */
2618 NBF_SIZE(bufferdata) = 0;
2619 return;
2620 }
2621
2622 /*
2623 * The iterator may be using a dtype with references, which always
2624 * requires the API. In that case, further cleanup may be necessary.
2625 *
2626 * TODO: At this time, we assume that a dtype having references
2627 * implies the need to hold the GIL at all times. In theory
2628 * we could broaden this definition for a new
2629 * `PyArray_Item_XDECREF` API and the assumption may become
2630 * incorrect.
2631 */
2632 PyObject *type, *value, *traceback;
2633 PyErr_Fetch(&type, &value, &traceback);
2634
2635 /* Cleanup any buffers with references */
2636 char **buffers = NBF_BUFFERS(bufferdata);
2637 PyArray_Descr **dtypes = NIT_DTYPES(iter);
2638 npyiter_opitflags *op_itflags = NIT_OPITFLAGS(iter);
2639 for (int iop = 0; iop < nop; ++iop, ++buffers) {
2640 /*
2641 * We may want to find a better way to do this, on the other hand,
2642 * this cleanup seems rare and fairly special. A dtype using
2643 * references (right now only us) must always keep the buffer in
2644 * a well defined state (either NULL or owning the reference).
2645 * Only we implement cleanup
2646 */
2647 if (!PyDataType_REFCHK(dtypes[iop]) ||
2648 !(op_itflags[iop]&NPY_OP_ITFLAG_USINGBUFFER)) {
2649 continue;
2650 }
2651 if (*buffers == 0) {
2652 continue;
2653 }
2654 int itemsize = dtypes[iop]->elsize;
2655 for (npy_intp i = 0; i < NBF_SIZE(bufferdata); i++) {
2656 /*
2657 * See above comment, if this API is expanded the GIL assumption
2658 * could become incorrect.
2659 */
2660 PyArray_Item_XDECREF(*buffers + (itemsize * i), dtypes[iop]);
2661 }
2662 /* Clear out the buffer just to be sure */
2663 memset(*buffers, 0, NBF_SIZE(bufferdata) * itemsize);
2664 }
2665 /* Signal that the buffers are empty */
2666 NBF_SIZE(bufferdata) = 0;
2667 PyErr_Restore(type, value, traceback);
2668 }
2669
2670
2671 /*
2672 * This checks how much space can be buffered without encountering the
2673 * same value twice, or for operands whose innermost stride is zero,
2674 * without encountering a different value. By reducing the buffered
2675 * amount to this size, reductions can be safely buffered.
2676 *
2677 * Reductions are buffered with two levels of looping, to avoid
2678 * frequent copying to the buffers. The return value is the over-all
2679 * buffer size, and when the flag NPY_ITFLAG_REDUCE is set, reduce_innersize
2680 * receives the size of the inner of the two levels of looping.
2681 *
2682 * The value placed in reduce_outerdim is the index into the AXISDATA
2683 * for where the second level of the double loop begins.
2684 *
2685 * The return value is always a multiple of the value placed in
2686 * reduce_innersize.
2687 */
2688 static npy_intp
npyiter_checkreducesize(NpyIter * iter,npy_intp count,npy_intp * reduce_innersize,npy_intp * reduce_outerdim)2689 npyiter_checkreducesize(NpyIter *iter, npy_intp count,
2690 npy_intp *reduce_innersize,
2691 npy_intp *reduce_outerdim)
2692 {
2693 npy_uint32 itflags = NIT_ITFLAGS(iter);
2694 int idim, ndim = NIT_NDIM(iter);
2695 int iop, nop = NIT_NOP(iter);
2696
2697 NpyIter_AxisData *axisdata;
2698 npy_intp sizeof_axisdata;
2699 npy_intp coord, shape, *strides;
2700 npy_intp reducespace = 1, factor;
2701 npy_bool nonzerocoord;
2702
2703 npyiter_opitflags *op_itflags = NIT_OPITFLAGS(iter);
2704 char stride0op[NPY_MAXARGS];
2705
2706 /* Default to no outer axis */
2707 *reduce_outerdim = 0;
2708
2709 /* If there's only one dimension, no need to calculate anything */
2710 if (ndim == 1 || count == 0) {
2711 *reduce_innersize = count;
2712 return count;
2713 }
2714
2715 sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
2716 axisdata = NIT_AXISDATA(iter);
2717
2718 /* Indicate which REDUCE operands have stride 0 in the inner loop */
2719 strides = NAD_STRIDES(axisdata);
2720 for (iop = 0; iop < nop; ++iop) {
2721 stride0op[iop] = (op_itflags[iop]&NPY_OP_ITFLAG_REDUCE) &&
2722 (strides[iop] == 0);
2723 NPY_IT_DBG_PRINT2("Iterator: Operand %d has stride 0 in "
2724 "the inner loop? %d\n", iop, (int)stride0op[iop]);
2725 }
2726 shape = NAD_SHAPE(axisdata);
2727 coord = NAD_INDEX(axisdata);
2728 reducespace += (shape-coord-1);
2729 factor = shape;
2730 NIT_ADVANCE_AXISDATA(axisdata, 1);
2731
2732 /* Initialize nonzerocoord based on the first coordinate */
2733 nonzerocoord = (coord != 0);
2734
2735 /* Go forward through axisdata, calculating the space available */
2736 for (idim = 1; idim < ndim && reducespace < count;
2737 ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
2738 NPY_IT_DBG_PRINT2("Iterator: inner loop reducespace %d, count %d\n",
2739 (int)reducespace, (int)count);
2740
2741 strides = NAD_STRIDES(axisdata);
2742 for (iop = 0; iop < nop; ++iop) {
2743 /*
2744 * If a reduce stride switched from zero to non-zero, or
2745 * vice versa, that's the point where the data will stop
2746 * being the same element or will repeat, and if the
2747 * buffer starts with an all zero multi-index up to this
2748 * point, gives us the reduce_innersize.
2749 */
2750 if((stride0op[iop] && (strides[iop] != 0)) ||
2751 (!stride0op[iop] &&
2752 (strides[iop] == 0) &&
2753 (op_itflags[iop]&NPY_OP_ITFLAG_REDUCE))) {
2754 NPY_IT_DBG_PRINT1("Iterator: Reduce operation limits "
2755 "buffer to %d\n", (int)reducespace);
2756 /*
2757 * If we already found more elements than count, or
2758 * the starting coordinate wasn't zero, the two-level
2759 * looping is unnecessary/can't be done, so return.
2760 */
2761 if (count <= reducespace) {
2762 *reduce_innersize = count;
2763 NIT_ITFLAGS(iter) |= NPY_ITFLAG_REUSE_REDUCE_LOOPS;
2764 return count;
2765 }
2766 else if (nonzerocoord) {
2767 if (reducespace < count) {
2768 count = reducespace;
2769 }
2770 *reduce_innersize = count;
2771 /* NOTE: This is similar to the (coord != 0) case below. */
2772 NIT_ITFLAGS(iter) &= ~NPY_ITFLAG_REUSE_REDUCE_LOOPS;
2773 return count;
2774 }
2775 else {
2776 *reduce_innersize = reducespace;
2777 break;
2778 }
2779 }
2780 }
2781 /* If we broke out of the loop early, we found reduce_innersize */
2782 if (iop != nop) {
2783 NPY_IT_DBG_PRINT2("Iterator: Found first dim not "
2784 "reduce (%d of %d)\n", iop, nop);
2785 break;
2786 }
2787
2788 shape = NAD_SHAPE(axisdata);
2789 coord = NAD_INDEX(axisdata);
2790 if (coord != 0) {
2791 nonzerocoord = 1;
2792 }
2793 reducespace += (shape-coord-1) * factor;
2794 factor *= shape;
2795 }
2796
2797 /*
2798 * If there was any non-zero coordinate, the reduction inner
2799 * loop doesn't fit in the buffersize, or the reduction inner loop
2800 * covered the entire iteration size, can't do the double loop.
2801 */
2802 if (nonzerocoord || count < reducespace || idim == ndim) {
2803 if (reducespace < count) {
2804 count = reducespace;
2805 }
2806 *reduce_innersize = count;
2807 /* In this case, we can't reuse the reduce loops */
2808 NIT_ITFLAGS(iter) &= ~NPY_ITFLAG_REUSE_REDUCE_LOOPS;
2809 return count;
2810 }
2811
2812 coord = NAD_INDEX(axisdata);
2813 if (coord != 0) {
2814 /*
2815 * In this case, it is only safe to reuse the buffer if the amount
2816 * of data copied is not more then the current axes, as is the
2817 * case when reuse_reduce_loops was active already.
2818 * It should be in principle OK when the idim loop returns immidiatly.
2819 */
2820 NIT_ITFLAGS(iter) &= ~NPY_ITFLAG_REUSE_REDUCE_LOOPS;
2821 }
2822 else {
2823 /* In this case, we can reuse the reduce loops */
2824 NIT_ITFLAGS(iter) |= NPY_ITFLAG_REUSE_REDUCE_LOOPS;
2825 }
2826
2827 *reduce_innersize = reducespace;
2828 count /= reducespace;
2829
2830 NPY_IT_DBG_PRINT2("Iterator: reduce_innersize %d count /ed %d\n",
2831 (int)reducespace, (int)count);
2832
2833 /*
2834 * Continue through the rest of the dimensions. If there are
2835 * two separated reduction axes, we may have to cut the buffer
2836 * short again.
2837 */
2838 *reduce_outerdim = idim;
2839 reducespace = 1;
2840 factor = 1;
2841 /* Indicate which REDUCE operands have stride 0 at the current level */
2842 strides = NAD_STRIDES(axisdata);
2843 for (iop = 0; iop < nop; ++iop) {
2844 stride0op[iop] = (op_itflags[iop]&NPY_OP_ITFLAG_REDUCE) &&
2845 (strides[iop] == 0);
2846 NPY_IT_DBG_PRINT2("Iterator: Operand %d has stride 0 in "
2847 "the outer loop? %d\n", iop, (int)stride0op[iop]);
2848 }
2849 shape = NAD_SHAPE(axisdata);
2850 reducespace += (shape-coord-1) * factor;
2851 factor *= shape;
2852 NIT_ADVANCE_AXISDATA(axisdata, 1);
2853 ++idim;
2854
2855 for (; idim < ndim && reducespace < count;
2856 ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
2857 NPY_IT_DBG_PRINT2("Iterator: outer loop reducespace %d, count %d\n",
2858 (int)reducespace, (int)count);
2859 strides = NAD_STRIDES(axisdata);
2860 for (iop = 0; iop < nop; ++iop) {
2861 /*
2862 * If a reduce stride switched from zero to non-zero, or
2863 * vice versa, that's the point where the data will stop
2864 * being the same element or will repeat, and if the
2865 * buffer starts with an all zero multi-index up to this
2866 * point, gives us the reduce_innersize.
2867 */
2868 if((stride0op[iop] && (strides[iop] != 0)) ||
2869 (!stride0op[iop] &&
2870 (strides[iop] == 0) &&
2871 (op_itflags[iop]&NPY_OP_ITFLAG_REDUCE))) {
2872 NPY_IT_DBG_PRINT1("Iterator: Reduce operation limits "
2873 "buffer to %d\n", (int)reducespace);
2874 /*
2875 * This terminates the outer level of our double loop.
2876 */
2877 if (count <= reducespace) {
2878 return count * (*reduce_innersize);
2879 }
2880 else {
2881 return reducespace * (*reduce_innersize);
2882 }
2883 }
2884 }
2885
2886 shape = NAD_SHAPE(axisdata);
2887 coord = NAD_INDEX(axisdata);
2888 if (coord != 0) {
2889 nonzerocoord = 1;
2890 }
2891 reducespace += (shape-coord-1) * factor;
2892 factor *= shape;
2893 }
2894
2895 if (reducespace < count) {
2896 count = reducespace;
2897 }
2898 return count * (*reduce_innersize);
2899 }
2900
2901 NPY_NO_EXPORT npy_bool
npyiter_has_writeback(NpyIter * iter)2902 npyiter_has_writeback(NpyIter *iter)
2903 {
2904 int iop, nop;
2905 npyiter_opitflags *op_itflags;
2906 if (iter == NULL) {
2907 return 0;
2908 }
2909 nop = NIT_NOP(iter);
2910 op_itflags = NIT_OPITFLAGS(iter);
2911
2912 for (iop=0; iop<nop; iop++) {
2913 if (op_itflags[iop] & NPY_OP_ITFLAG_HAS_WRITEBACK) {
2914 return NPY_TRUE;
2915 }
2916 }
2917 return NPY_FALSE;
2918 }
2919 #undef NPY_ITERATOR_IMPLEMENTATION_CODE
2920