1 /*
2  * This file implements the construction, copying, and destruction
3  * aspects of NumPy's nditer.
4  *
5  * Copyright (c) 2010-2011 by Mark Wiebe (mwwiebe@gmail.com)
6  * The University of British Columbia
7  *
8  * Copyright (c) 2011 Enthought, Inc
9  *
10  * See LICENSE.txt for the license.
11  */
12 #define NPY_NO_DEPRECATED_API NPY_API_VERSION
13 
14 /* Indicate that this .c file is allowed to include the header */
15 #define NPY_ITERATOR_IMPLEMENTATION_CODE
16 #include "nditer_impl.h"
17 
18 #include "arrayobject.h"
19 #include "array_coercion.h"
20 #include "templ_common.h"
21 #include "array_assign.h"
22 
23 /* Internal helper functions private to this file */
24 static int
25 npyiter_check_global_flags(npy_uint32 flags, npy_uint32* itflags);
26 static int
27 npyiter_check_op_axes(int nop, int oa_ndim, int **op_axes,
28                         const npy_intp *itershape);
29 static int
30 npyiter_calculate_ndim(int nop, PyArrayObject **op_in,
31                        int oa_ndim);
32 static int
33 npyiter_check_per_op_flags(npy_uint32 flags, npyiter_opitflags *op_itflags);
34 static int
35 npyiter_prepare_one_operand(PyArrayObject **op,
36                         char **op_dataptr,
37                         PyArray_Descr *op_request_dtype,
38                         PyArray_Descr** op_dtype,
39                         npy_uint32 flags,
40                         npy_uint32 op_flags, npyiter_opitflags *op_itflags);
41 static int
42 npyiter_prepare_operands(int nop,
43                     PyArrayObject **op_in,
44                     PyArrayObject **op,
45                     char **op_dataptr,
46                     PyArray_Descr **op_request_dtypes,
47                     PyArray_Descr **op_dtype,
48                     npy_uint32 flags,
49                     npy_uint32 *op_flags, npyiter_opitflags *op_itflags,
50                     npy_int8 *out_maskop);
51 static int
52 npyiter_check_casting(int nop, PyArrayObject **op,
53                     PyArray_Descr **op_dtype,
54                     NPY_CASTING casting,
55                     npyiter_opitflags *op_itflags);
56 static int
57 npyiter_fill_axisdata(NpyIter *iter, npy_uint32 flags, npyiter_opitflags *op_itflags,
58                     char **op_dataptr,
59                     const npy_uint32 *op_flags, int **op_axes,
60                     npy_intp const *itershape);
61 static NPY_INLINE int
62 npyiter_get_op_axis(int axis, npy_bool *reduction_axis);
63 static void
64 npyiter_replace_axisdata(
65         NpyIter *iter, int iop, PyArrayObject *op,
66         int orig_op_ndim, const int *op_axes);
67 static void
68 npyiter_compute_index_strides(NpyIter *iter, npy_uint32 flags);
69 static void
70 npyiter_apply_forced_iteration_order(NpyIter *iter, NPY_ORDER order);
71 static void
72 npyiter_flip_negative_strides(NpyIter *iter);
73 static void
74 npyiter_reverse_axis_ordering(NpyIter *iter);
75 static void
76 npyiter_find_best_axis_ordering(NpyIter *iter);
77 static PyArray_Descr *
78 npyiter_get_common_dtype(int nop, PyArrayObject **op,
79                         const npyiter_opitflags *op_itflags, PyArray_Descr **op_dtype,
80                         PyArray_Descr **op_request_dtypes,
81                         int only_inputs);
82 static PyArrayObject *
83 npyiter_new_temp_array(NpyIter *iter, PyTypeObject *subtype,
84                 npy_uint32 flags, npyiter_opitflags *op_itflags,
85                 int op_ndim, npy_intp const *shape,
86                 PyArray_Descr *op_dtype, const int *op_axes);
87 static int
88 npyiter_allocate_arrays(NpyIter *iter,
89                         npy_uint32 flags,
90                         PyArray_Descr **op_dtype, PyTypeObject *subtype,
91                         const npy_uint32 *op_flags, npyiter_opitflags *op_itflags,
92                         int **op_axes);
93 static void
94 npyiter_get_priority_subtype(int nop, PyArrayObject **op,
95                             const npyiter_opitflags *op_itflags,
96                             double *subtype_priority, PyTypeObject **subtype);
97 static int
98 npyiter_allocate_transfer_functions(NpyIter *iter);
99 
100 
101 /*NUMPY_API
102  * Allocate a new iterator for multiple array objects, and advanced
103  * options for controlling the broadcasting, shape, and buffer size.
104  */
105 NPY_NO_EXPORT NpyIter *
NpyIter_AdvancedNew(int nop,PyArrayObject ** op_in,npy_uint32 flags,NPY_ORDER order,NPY_CASTING casting,npy_uint32 * op_flags,PyArray_Descr ** op_request_dtypes,int oa_ndim,int ** op_axes,npy_intp * itershape,npy_intp buffersize)106 NpyIter_AdvancedNew(int nop, PyArrayObject **op_in, npy_uint32 flags,
107                  NPY_ORDER order, NPY_CASTING casting,
108                  npy_uint32 *op_flags,
109                  PyArray_Descr **op_request_dtypes,
110                  int oa_ndim, int **op_axes, npy_intp *itershape,
111                  npy_intp buffersize)
112 {
113     npy_uint32 itflags = NPY_ITFLAG_IDENTPERM;
114     int idim, ndim;
115     int iop;
116 
117     /* The iterator being constructed */
118     NpyIter *iter;
119 
120     /* Per-operand values */
121     PyArrayObject **op;
122     PyArray_Descr **op_dtype;
123     npyiter_opitflags *op_itflags;
124     char **op_dataptr;
125 
126     npy_int8 *perm;
127     NpyIter_BufferData *bufferdata = NULL;
128     int any_allocate = 0, any_missing_dtypes = 0, need_subtype = 0;
129 
130     /* The subtype for automatically allocated outputs */
131     double subtype_priority = NPY_PRIORITY;
132     PyTypeObject *subtype = &PyArray_Type;
133 
134 #if NPY_IT_CONSTRUCTION_TIMING
135     npy_intp c_temp,
136             c_start,
137             c_check_op_axes,
138             c_check_global_flags,
139             c_calculate_ndim,
140             c_malloc,
141             c_prepare_operands,
142             c_fill_axisdata,
143             c_compute_index_strides,
144             c_apply_forced_iteration_order,
145             c_find_best_axis_ordering,
146             c_get_priority_subtype,
147             c_find_output_common_dtype,
148             c_check_casting,
149             c_allocate_arrays,
150             c_coalesce_axes,
151             c_prepare_buffers;
152 #endif
153 
154     NPY_IT_TIME_POINT(c_start);
155 
156     if (nop > NPY_MAXARGS) {
157         PyErr_Format(PyExc_ValueError,
158             "Cannot construct an iterator with more than %d operands "
159             "(%d were requested)", NPY_MAXARGS, nop);
160         return NULL;
161     }
162 
163     /*
164      * Before 1.8, if `oa_ndim == 0`, this meant `op_axes != NULL` was an error.
165      * With 1.8, `oa_ndim == -1` takes this role, while op_axes in that case
166      * enforces a 0-d iterator. Using `oa_ndim == 0` with `op_axes == NULL`
167      * is thus an error in 1.13 after deprecation.
168      */
169     if ((oa_ndim == 0) && (op_axes == NULL)) {
170         PyErr_Format(PyExc_ValueError,
171             "Using `oa_ndim == 0` when `op_axes` is NULL. "
172             "Use `oa_ndim == -1` or the MultiNew "
173             "iterator for NumPy <1.8 compatibility");
174         return NULL;
175     }
176 
177     /* Error check 'oa_ndim' and 'op_axes', which must be used together */
178     if (!npyiter_check_op_axes(nop, oa_ndim, op_axes, itershape)) {
179         return NULL;
180     }
181 
182     NPY_IT_TIME_POINT(c_check_op_axes);
183 
184     /* Check the global iterator flags */
185     if (!npyiter_check_global_flags(flags, &itflags)) {
186         return NULL;
187     }
188 
189     NPY_IT_TIME_POINT(c_check_global_flags);
190 
191     /* Calculate how many dimensions the iterator should have */
192     ndim = npyiter_calculate_ndim(nop, op_in, oa_ndim);
193 
194     NPY_IT_TIME_POINT(c_calculate_ndim);
195 
196     /* Allocate memory for the iterator */
197     iter = (NpyIter*)
198                 PyObject_Malloc(NIT_SIZEOF_ITERATOR(itflags, ndim, nop));
199 
200     NPY_IT_TIME_POINT(c_malloc);
201 
202     /* Fill in the basic data */
203     NIT_ITFLAGS(iter) = itflags;
204     NIT_NDIM(iter) = ndim;
205     NIT_NOP(iter) = nop;
206     NIT_MASKOP(iter) = -1;
207     NIT_ITERINDEX(iter) = 0;
208     memset(NIT_BASEOFFSETS(iter), 0, (nop+1)*NPY_SIZEOF_INTP);
209 
210     op = NIT_OPERANDS(iter);
211     op_dtype = NIT_DTYPES(iter);
212     op_itflags = NIT_OPITFLAGS(iter);
213     op_dataptr = NIT_RESETDATAPTR(iter);
214 
215     /* Prepare all the operands */
216     if (!npyiter_prepare_operands(nop, op_in, op, op_dataptr,
217                         op_request_dtypes, op_dtype,
218                         flags,
219                         op_flags, op_itflags,
220                         &NIT_MASKOP(iter))) {
221         PyObject_Free(iter);
222         return NULL;
223     }
224     /* Set resetindex to zero as well (it's just after the resetdataptr) */
225     op_dataptr[nop] = 0;
226 
227     NPY_IT_TIME_POINT(c_prepare_operands);
228 
229     /*
230      * Initialize buffer data (must set the buffers and transferdata
231      * to NULL before we might deallocate the iterator).
232      */
233     if (itflags & NPY_ITFLAG_BUFFER) {
234         bufferdata = NIT_BUFFERDATA(iter);
235         NBF_SIZE(bufferdata) = 0;
236         memset(NBF_BUFFERS(bufferdata), 0, nop*NPY_SIZEOF_INTP);
237         memset(NBF_PTRS(bufferdata), 0, nop*NPY_SIZEOF_INTP);
238         memset(NBF_READTRANSFERDATA(bufferdata), 0, nop*NPY_SIZEOF_INTP);
239         memset(NBF_WRITETRANSFERDATA(bufferdata), 0, nop*NPY_SIZEOF_INTP);
240     }
241 
242     /* Fill in the AXISDATA arrays and set the ITERSIZE field */
243     if (!npyiter_fill_axisdata(iter, flags, op_itflags, op_dataptr,
244                                         op_flags, op_axes, itershape)) {
245         NpyIter_Deallocate(iter);
246         return NULL;
247     }
248 
249     NPY_IT_TIME_POINT(c_fill_axisdata);
250 
251     if (itflags & NPY_ITFLAG_BUFFER) {
252         /*
253          * If buffering is enabled and no buffersize was given, use a default
254          * chosen to be big enough to get some amortization benefits, but
255          * small enough to be cache-friendly.
256          */
257         if (buffersize <= 0) {
258             buffersize = NPY_BUFSIZE;
259         }
260         /* No point in a buffer bigger than the iteration size */
261         if (buffersize > NIT_ITERSIZE(iter)) {
262             buffersize = NIT_ITERSIZE(iter);
263         }
264         NBF_BUFFERSIZE(bufferdata) = buffersize;
265 
266         /*
267          * Initialize for use in FirstVisit, which may be called before
268          * the buffers are filled and the reduce pos is updated.
269          */
270         NBF_REDUCE_POS(bufferdata) = 0;
271     }
272 
273     /*
274      * If an index was requested, compute the strides for it.
275      * Note that we must do this before changing the order of the
276      * axes
277      */
278     npyiter_compute_index_strides(iter, flags);
279 
280     NPY_IT_TIME_POINT(c_compute_index_strides);
281 
282     /* Initialize the perm to the identity */
283     perm = NIT_PERM(iter);
284     for(idim = 0; idim < ndim; ++idim) {
285         perm[idim] = (npy_int8)idim;
286     }
287 
288     /*
289      * If an iteration order is being forced, apply it.
290      */
291     npyiter_apply_forced_iteration_order(iter, order);
292     itflags = NIT_ITFLAGS(iter);
293 
294     NPY_IT_TIME_POINT(c_apply_forced_iteration_order);
295 
296     /* Set some flags for allocated outputs */
297     for (iop = 0; iop < nop; ++iop) {
298         if (op[iop] == NULL) {
299             /* Flag this so later we can avoid flipping axes */
300             any_allocate = 1;
301             /* If a subtype may be used, indicate so */
302             if (!(op_flags[iop] & NPY_ITER_NO_SUBTYPE)) {
303                 need_subtype = 1;
304             }
305             /*
306              * If the data type wasn't provided, will need to
307              * calculate it.
308              */
309             if (op_dtype[iop] == NULL) {
310                 any_missing_dtypes = 1;
311             }
312         }
313     }
314 
315     /*
316      * If the ordering was not forced, reorder the axes
317      * and flip negative strides to find the best one.
318      */
319     if (!(itflags & NPY_ITFLAG_FORCEDORDER)) {
320         if (ndim > 1) {
321             npyiter_find_best_axis_ordering(iter);
322         }
323         /*
324          * If there's an output being allocated, we must not negate
325          * any strides.
326          */
327         if (!any_allocate && !(flags & NPY_ITER_DONT_NEGATE_STRIDES)) {
328             npyiter_flip_negative_strides(iter);
329         }
330         itflags = NIT_ITFLAGS(iter);
331     }
332 
333     NPY_IT_TIME_POINT(c_find_best_axis_ordering);
334 
335     if (need_subtype) {
336         npyiter_get_priority_subtype(nop, op, op_itflags,
337                                      &subtype_priority, &subtype);
338     }
339 
340     NPY_IT_TIME_POINT(c_get_priority_subtype);
341 
342     /*
343      * If an automatically allocated output didn't have a specified
344      * dtype, we need to figure it out now, before allocating the outputs.
345      */
346     if (any_missing_dtypes || (flags & NPY_ITER_COMMON_DTYPE)) {
347         PyArray_Descr *dtype;
348         int only_inputs = !(flags & NPY_ITER_COMMON_DTYPE);
349 
350         op = NIT_OPERANDS(iter);
351         op_dtype = NIT_DTYPES(iter);
352 
353         dtype = npyiter_get_common_dtype(nop, op,
354                                     op_itflags, op_dtype,
355                                     op_request_dtypes,
356                                     only_inputs);
357         if (dtype == NULL) {
358             NpyIter_Deallocate(iter);
359             return NULL;
360         }
361         if (flags & NPY_ITER_COMMON_DTYPE) {
362             NPY_IT_DBG_PRINT("Iterator: Replacing all data types\n");
363             /* Replace all the data types */
364             for (iop = 0; iop < nop; ++iop) {
365                 if (op_dtype[iop] != dtype) {
366                     Py_XDECREF(op_dtype[iop]);
367                     Py_INCREF(dtype);
368                     op_dtype[iop] = dtype;
369                 }
370             }
371         }
372         else {
373             NPY_IT_DBG_PRINT("Iterator: Setting unset output data types\n");
374             /* Replace the NULL data types */
375             for (iop = 0; iop < nop; ++iop) {
376                 if (op_dtype[iop] == NULL) {
377                     Py_INCREF(dtype);
378                     op_dtype[iop] = dtype;
379                 }
380             }
381         }
382         Py_DECREF(dtype);
383     }
384 
385     NPY_IT_TIME_POINT(c_find_output_common_dtype);
386 
387     /*
388      * All of the data types have been settled, so it's time
389      * to check that data type conversions are following the
390      * casting rules.
391      */
392     if (!npyiter_check_casting(nop, op, op_dtype, casting, op_itflags)) {
393         NpyIter_Deallocate(iter);
394         return NULL;
395     }
396 
397     NPY_IT_TIME_POINT(c_check_casting);
398 
399     /*
400      * At this point, the iteration order has been finalized. so
401      * any allocation of ops that were NULL, or any temporary
402      * copying due to casting/byte order/alignment can be
403      * done now using a memory layout matching the iterator.
404      */
405     if (!npyiter_allocate_arrays(iter, flags, op_dtype, subtype, op_flags,
406                             op_itflags, op_axes)) {
407         NpyIter_Deallocate(iter);
408         return NULL;
409     }
410 
411     NPY_IT_TIME_POINT(c_allocate_arrays);
412 
413     /*
414      * Finally, if a multi-index wasn't requested,
415      * it may be possible to coalesce some axes together.
416      */
417     if (ndim > 1 && !(itflags & NPY_ITFLAG_HASMULTIINDEX)) {
418         npyiter_coalesce_axes(iter);
419         /*
420          * The operation may have changed the layout, so we have to
421          * get the internal pointers again.
422          */
423         itflags = NIT_ITFLAGS(iter);
424         ndim = NIT_NDIM(iter);
425         op = NIT_OPERANDS(iter);
426         op_dtype = NIT_DTYPES(iter);
427         op_itflags = NIT_OPITFLAGS(iter);
428         op_dataptr = NIT_RESETDATAPTR(iter);
429     }
430 
431     NPY_IT_TIME_POINT(c_coalesce_axes);
432 
433     /*
434      * Now that the axes are finished, check whether we can apply
435      * the single iteration optimization to the iternext function.
436      */
437     if (!(itflags & NPY_ITFLAG_BUFFER)) {
438         NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
439         if (itflags & NPY_ITFLAG_EXLOOP) {
440             if (NIT_ITERSIZE(iter) == NAD_SHAPE(axisdata)) {
441                 NIT_ITFLAGS(iter) |= NPY_ITFLAG_ONEITERATION;
442             }
443         }
444         else if (NIT_ITERSIZE(iter) == 1) {
445             NIT_ITFLAGS(iter) |= NPY_ITFLAG_ONEITERATION;
446         }
447     }
448 
449     /*
450      * If REFS_OK was specified, check whether there are any
451      * reference arrays and flag it if so.
452      */
453     if (flags & NPY_ITER_REFS_OK) {
454         for (iop = 0; iop < nop; ++iop) {
455             PyArray_Descr *rdt = op_dtype[iop];
456             if ((rdt->flags & (NPY_ITEM_REFCOUNT |
457                                      NPY_ITEM_IS_POINTER |
458                                      NPY_NEEDS_PYAPI)) != 0) {
459                 /* Iteration needs API access */
460                 NIT_ITFLAGS(iter) |= NPY_ITFLAG_NEEDSAPI;
461             }
462         }
463     }
464 
465     /* If buffering is set without delayed allocation */
466     if (itflags & NPY_ITFLAG_BUFFER) {
467         if (!npyiter_allocate_transfer_functions(iter)) {
468             NpyIter_Deallocate(iter);
469             return NULL;
470         }
471         if (!(itflags & NPY_ITFLAG_DELAYBUF)) {
472             /* Allocate the buffers */
473             if (!npyiter_allocate_buffers(iter, NULL)) {
474                 NpyIter_Deallocate(iter);
475                 return NULL;
476             }
477 
478             /* Prepare the next buffers and set iterend/size */
479             if (npyiter_copy_to_buffers(iter, NULL) < 0) {
480                 NpyIter_Deallocate(iter);
481                 return NULL;
482             }
483         }
484     }
485 
486     NPY_IT_TIME_POINT(c_prepare_buffers);
487 
488 #if NPY_IT_CONSTRUCTION_TIMING
489     printf("\nIterator construction timing:\n");
490     NPY_IT_PRINT_TIME_START(c_start);
491     NPY_IT_PRINT_TIME_VAR(c_check_op_axes);
492     NPY_IT_PRINT_TIME_VAR(c_check_global_flags);
493     NPY_IT_PRINT_TIME_VAR(c_calculate_ndim);
494     NPY_IT_PRINT_TIME_VAR(c_malloc);
495     NPY_IT_PRINT_TIME_VAR(c_prepare_operands);
496     NPY_IT_PRINT_TIME_VAR(c_fill_axisdata);
497     NPY_IT_PRINT_TIME_VAR(c_compute_index_strides);
498     NPY_IT_PRINT_TIME_VAR(c_apply_forced_iteration_order);
499     NPY_IT_PRINT_TIME_VAR(c_find_best_axis_ordering);
500     NPY_IT_PRINT_TIME_VAR(c_get_priority_subtype);
501     NPY_IT_PRINT_TIME_VAR(c_find_output_common_dtype);
502     NPY_IT_PRINT_TIME_VAR(c_check_casting);
503     NPY_IT_PRINT_TIME_VAR(c_allocate_arrays);
504     NPY_IT_PRINT_TIME_VAR(c_coalesce_axes);
505     NPY_IT_PRINT_TIME_VAR(c_prepare_buffers);
506     printf("\n");
507 #endif
508 
509     return iter;
510 }
511 
512 /*NUMPY_API
513  * Allocate a new iterator for more than one array object, using
514  * standard NumPy broadcasting rules and the default buffer size.
515  */
516 NPY_NO_EXPORT NpyIter *
NpyIter_MultiNew(int nop,PyArrayObject ** op_in,npy_uint32 flags,NPY_ORDER order,NPY_CASTING casting,npy_uint32 * op_flags,PyArray_Descr ** op_request_dtypes)517 NpyIter_MultiNew(int nop, PyArrayObject **op_in, npy_uint32 flags,
518                  NPY_ORDER order, NPY_CASTING casting,
519                  npy_uint32 *op_flags,
520                  PyArray_Descr **op_request_dtypes)
521 {
522     return NpyIter_AdvancedNew(nop, op_in, flags, order, casting,
523                             op_flags, op_request_dtypes,
524                             -1, NULL, NULL, 0);
525 }
526 
527 /*NUMPY_API
528  * Allocate a new iterator for one array object.
529  */
530 NPY_NO_EXPORT NpyIter *
NpyIter_New(PyArrayObject * op,npy_uint32 flags,NPY_ORDER order,NPY_CASTING casting,PyArray_Descr * dtype)531 NpyIter_New(PyArrayObject *op, npy_uint32 flags,
532                   NPY_ORDER order, NPY_CASTING casting,
533                   PyArray_Descr* dtype)
534 {
535     /* Split the flags into separate global and op flags */
536     npy_uint32 op_flags = flags & NPY_ITER_PER_OP_FLAGS;
537     flags &= NPY_ITER_GLOBAL_FLAGS;
538 
539     return NpyIter_AdvancedNew(1, &op, flags, order, casting,
540                             &op_flags, &dtype,
541                             -1, NULL, NULL, 0);
542 }
543 
544 /*NUMPY_API
545  * Makes a copy of the iterator
546  */
547 NPY_NO_EXPORT NpyIter *
NpyIter_Copy(NpyIter * iter)548 NpyIter_Copy(NpyIter *iter)
549 {
550     npy_uint32 itflags = NIT_ITFLAGS(iter);
551     int ndim = NIT_NDIM(iter);
552     int iop, nop = NIT_NOP(iter);
553     int out_of_memory = 0;
554 
555     npy_intp size;
556     NpyIter *newiter;
557     PyArrayObject **objects;
558     PyArray_Descr **dtypes;
559 
560     /* Allocate memory for the new iterator */
561     size = NIT_SIZEOF_ITERATOR(itflags, ndim, nop);
562     newiter = (NpyIter*)PyObject_Malloc(size);
563 
564     /* Copy the raw values to the new iterator */
565     memcpy(newiter, iter, size);
566 
567     /* Take ownership of references to the operands and dtypes */
568     objects = NIT_OPERANDS(newiter);
569     dtypes = NIT_DTYPES(newiter);
570     for (iop = 0; iop < nop; ++iop) {
571         Py_INCREF(objects[iop]);
572         Py_INCREF(dtypes[iop]);
573     }
574 
575     /* Allocate buffers and make copies of the transfer data if necessary */
576     if (itflags & NPY_ITFLAG_BUFFER) {
577         NpyIter_BufferData *bufferdata;
578         npy_intp buffersize, itemsize;
579         char **buffers;
580         NpyAuxData **readtransferdata, **writetransferdata;
581 
582         bufferdata = NIT_BUFFERDATA(newiter);
583         buffers = NBF_BUFFERS(bufferdata);
584         readtransferdata = NBF_READTRANSFERDATA(bufferdata);
585         writetransferdata = NBF_WRITETRANSFERDATA(bufferdata);
586         buffersize = NBF_BUFFERSIZE(bufferdata);
587 
588         for (iop = 0; iop < nop; ++iop) {
589             if (buffers[iop] != NULL) {
590                 if (out_of_memory) {
591                     buffers[iop] = NULL;
592                 }
593                 else {
594                     itemsize = dtypes[iop]->elsize;
595                     buffers[iop] = PyArray_malloc(itemsize*buffersize);
596                     if (buffers[iop] == NULL) {
597                         out_of_memory = 1;
598                     }
599                     if (PyDataType_FLAGCHK(dtypes[iop], NPY_NEEDS_INIT)) {
600                         memset(buffers[iop], '\0', itemsize*buffersize);
601                     }
602                 }
603             }
604 
605             if (readtransferdata[iop] != NULL) {
606                 if (out_of_memory) {
607                     readtransferdata[iop] = NULL;
608                 }
609                 else {
610                     readtransferdata[iop] =
611                           NPY_AUXDATA_CLONE(readtransferdata[iop]);
612                     if (readtransferdata[iop] == NULL) {
613                         out_of_memory = 1;
614                     }
615                 }
616             }
617 
618             if (writetransferdata[iop] != NULL) {
619                 if (out_of_memory) {
620                     writetransferdata[iop] = NULL;
621                 }
622                 else {
623                     writetransferdata[iop] =
624                           NPY_AUXDATA_CLONE(writetransferdata[iop]);
625                     if (writetransferdata[iop] == NULL) {
626                         out_of_memory = 1;
627                     }
628                 }
629             }
630         }
631 
632         /* Initialize the buffers to the current iterindex */
633         if (!out_of_memory && NBF_SIZE(bufferdata) > 0) {
634             npyiter_goto_iterindex(newiter, NIT_ITERINDEX(newiter));
635 
636             /* Prepare the next buffers and set iterend/size */
637             npyiter_copy_to_buffers(newiter, NULL);
638         }
639     }
640 
641     if (out_of_memory) {
642         NpyIter_Deallocate(newiter);
643         PyErr_NoMemory();
644         return NULL;
645     }
646 
647     return newiter;
648 }
649 
650 /*NUMPY_API
651  * Deallocate an iterator.
652  *
653  * To correctly work when an error is in progress, we have to check
654  * `PyErr_Occurred()`. This is necessary when buffers are not finalized
655  * or WritebackIfCopy is used. We could avoid that check by exposing a new
656  * function which is passed in whether or not a Python error is already set.
657  */
658 NPY_NO_EXPORT int
NpyIter_Deallocate(NpyIter * iter)659 NpyIter_Deallocate(NpyIter *iter)
660 {
661     int success = PyErr_Occurred() == NULL;
662 
663     npy_uint32 itflags;
664     /*int ndim = NIT_NDIM(iter);*/
665     int iop, nop;
666     PyArray_Descr **dtype;
667     PyArrayObject **object;
668     npyiter_opitflags *op_itflags;
669 
670     if (iter == NULL) {
671         return success;
672     }
673 
674     itflags = NIT_ITFLAGS(iter);
675     nop = NIT_NOP(iter);
676     dtype = NIT_DTYPES(iter);
677     object = NIT_OPERANDS(iter);
678     op_itflags = NIT_OPITFLAGS(iter);
679 
680     /* Deallocate any buffers and buffering data */
681     if (itflags & NPY_ITFLAG_BUFFER) {
682         /* Ensure no data is held by the buffers before they are cleared */
683         if (success) {
684             if (npyiter_copy_from_buffers(iter) < 0) {
685                 success = NPY_FAIL;
686             }
687         }
688         else {
689             npyiter_clear_buffers(iter);
690         }
691 
692         NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter);
693         char **buffers;
694         NpyAuxData **transferdata;
695 
696         /* buffers */
697         buffers = NBF_BUFFERS(bufferdata);
698         for (iop = 0; iop < nop; ++iop, ++buffers) {
699             PyArray_free(*buffers);
700         }
701         /* read bufferdata */
702         transferdata = NBF_READTRANSFERDATA(bufferdata);
703         for(iop = 0; iop < nop; ++iop, ++transferdata) {
704             if (*transferdata) {
705                 NPY_AUXDATA_FREE(*transferdata);
706             }
707         }
708         /* write bufferdata */
709         transferdata = NBF_WRITETRANSFERDATA(bufferdata);
710         for(iop = 0; iop < nop; ++iop, ++transferdata) {
711             if (*transferdata) {
712                 NPY_AUXDATA_FREE(*transferdata);
713             }
714         }
715     }
716 
717     /*
718      * Deallocate all the dtypes and objects that were iterated and resolve
719      * any writeback buffers created by the iterator.
720      */
721     for (iop = 0; iop < nop; ++iop, ++dtype, ++object) {
722         if (op_itflags[iop] & NPY_OP_ITFLAG_HAS_WRITEBACK) {
723             if (success && PyArray_ResolveWritebackIfCopy(*object) < 0) {
724                 success = 0;
725             }
726             else {
727                 PyArray_DiscardWritebackIfCopy(*object);
728             }
729         }
730         Py_XDECREF(*dtype);
731         Py_XDECREF(*object);
732     }
733 
734     /* Deallocate the iterator memory */
735     PyObject_Free(iter);
736     return success;
737 }
738 
739 
740 /* Checks 'flags' for (C|F)_ORDER_INDEX, MULTI_INDEX, and EXTERNAL_LOOP,
741  * setting the appropriate internal flags in 'itflags'.
742  *
743  * Returns 1 on success, 0 on error.
744  */
745 static int
npyiter_check_global_flags(npy_uint32 flags,npy_uint32 * itflags)746 npyiter_check_global_flags(npy_uint32 flags, npy_uint32* itflags)
747 {
748     if ((flags & NPY_ITER_PER_OP_FLAGS) != 0) {
749         PyErr_SetString(PyExc_ValueError,
750                     "A per-operand flag was passed as a global flag "
751                     "to the iterator constructor");
752         return 0;
753     }
754 
755     /* Check for an index */
756     if (flags & (NPY_ITER_C_INDEX | NPY_ITER_F_INDEX)) {
757         if ((flags & (NPY_ITER_C_INDEX | NPY_ITER_F_INDEX)) ==
758                     (NPY_ITER_C_INDEX | NPY_ITER_F_INDEX)) {
759             PyErr_SetString(PyExc_ValueError,
760                     "Iterator flags C_INDEX and "
761                     "F_INDEX cannot both be specified");
762             return 0;
763         }
764         (*itflags) |= NPY_ITFLAG_HASINDEX;
765     }
766     /* Check if a multi-index was requested */
767     if (flags & NPY_ITER_MULTI_INDEX) {
768         /*
769          * This flag primarily disables dimension manipulations that
770          * would produce an incorrect multi-index.
771          */
772         (*itflags) |= NPY_ITFLAG_HASMULTIINDEX;
773     }
774     /* Check if the caller wants to handle inner iteration */
775     if (flags & NPY_ITER_EXTERNAL_LOOP) {
776         if ((*itflags) & (NPY_ITFLAG_HASINDEX | NPY_ITFLAG_HASMULTIINDEX)) {
777             PyErr_SetString(PyExc_ValueError,
778                     "Iterator flag EXTERNAL_LOOP cannot be used "
779                     "if an index or multi-index is being tracked");
780             return 0;
781         }
782         (*itflags) |= NPY_ITFLAG_EXLOOP;
783     }
784     /* Ranged */
785     if (flags & NPY_ITER_RANGED) {
786         (*itflags) |= NPY_ITFLAG_RANGE;
787         if ((flags & NPY_ITER_EXTERNAL_LOOP) &&
788                                     !(flags & NPY_ITER_BUFFERED)) {
789             PyErr_SetString(PyExc_ValueError,
790                     "Iterator flag RANGED cannot be used with "
791                     "the flag EXTERNAL_LOOP unless "
792                     "BUFFERED is also enabled");
793             return 0;
794         }
795     }
796     /* Buffering */
797     if (flags & NPY_ITER_BUFFERED) {
798         (*itflags) |= NPY_ITFLAG_BUFFER;
799         if (flags & NPY_ITER_GROWINNER) {
800             (*itflags) |= NPY_ITFLAG_GROWINNER;
801         }
802         if (flags & NPY_ITER_DELAY_BUFALLOC) {
803             (*itflags) |= NPY_ITFLAG_DELAYBUF;
804         }
805     }
806 
807     return 1;
808 }
809 
810 static int
npyiter_check_op_axes(int nop,int oa_ndim,int ** op_axes,const npy_intp * itershape)811 npyiter_check_op_axes(int nop, int oa_ndim, int **op_axes,
812                         const npy_intp *itershape)
813 {
814     char axes_dupcheck[NPY_MAXDIMS];
815     int iop, idim;
816 
817     if (oa_ndim < 0) {
818         /*
819          * If `oa_ndim < 0`, `op_axes` and `itershape` are signalled to
820          * be unused and should be NULL. (Before NumPy 1.8 this was
821          * signalled by `oa_ndim == 0`.)
822          */
823         if (op_axes != NULL || itershape != NULL) {
824             PyErr_Format(PyExc_ValueError,
825                     "If 'op_axes' or 'itershape' is not NULL in the iterator "
826                     "constructor, 'oa_ndim' must be zero or greater");
827             return 0;
828         }
829         return 1;
830     }
831     if (oa_ndim > NPY_MAXDIMS) {
832         PyErr_Format(PyExc_ValueError,
833                 "Cannot construct an iterator with more than %d dimensions "
834                 "(%d were requested for op_axes)",
835                 NPY_MAXDIMS, oa_ndim);
836         return 0;
837     }
838     if (op_axes == NULL) {
839         PyErr_Format(PyExc_ValueError,
840                 "If 'oa_ndim' is zero or greater in the iterator "
841                 "constructor, then op_axes cannot be NULL");
842         return 0;
843     }
844 
845     /* Check that there are no duplicates in op_axes */
846     for (iop = 0; iop < nop; ++iop) {
847         int *axes = op_axes[iop];
848         if (axes != NULL) {
849             memset(axes_dupcheck, 0, NPY_MAXDIMS);
850             for (idim = 0; idim < oa_ndim; ++idim) {
851                 int i = npyiter_get_op_axis(axes[idim], NULL);
852 
853                 if (i >= 0) {
854                     if (i >= NPY_MAXDIMS) {
855                         PyErr_Format(PyExc_ValueError,
856                                 "The 'op_axes' provided to the iterator "
857                                 "constructor for operand %d "
858                                 "contained invalid "
859                                 "values %d", iop, i);
860                         return 0;
861                     }
862                     else if (axes_dupcheck[i] == 1) {
863                         PyErr_Format(PyExc_ValueError,
864                                 "The 'op_axes' provided to the iterator "
865                                 "constructor for operand %d "
866                                 "contained duplicate "
867                                 "value %d", iop, i);
868                         return 0;
869                     }
870                     else {
871                         axes_dupcheck[i] = 1;
872                     }
873                 }
874             }
875         }
876     }
877 
878     return 1;
879 }
880 
881 static int
npyiter_calculate_ndim(int nop,PyArrayObject ** op_in,int oa_ndim)882 npyiter_calculate_ndim(int nop, PyArrayObject **op_in,
883                        int oa_ndim)
884 {
885     /* If 'op_axes' is being used, force 'ndim' */
886     if (oa_ndim >= 0 ) {
887         return oa_ndim;
888     }
889     /* Otherwise it's the maximum 'ndim' from the operands */
890     else {
891         int ndim = 0, iop;
892 
893         for (iop = 0; iop < nop; ++iop) {
894             if (op_in[iop] != NULL) {
895                 int ondim = PyArray_NDIM(op_in[iop]);
896                 if (ondim > ndim) {
897                     ndim = ondim;
898                 }
899             }
900 
901         }
902 
903         return ndim;
904     }
905 }
906 
907 /*
908  * Checks the per-operand input flags, and fills in op_itflags.
909  *
910  * Returns 1 on success, 0 on failure.
911  */
912 static int
npyiter_check_per_op_flags(npy_uint32 op_flags,npyiter_opitflags * op_itflags)913 npyiter_check_per_op_flags(npy_uint32 op_flags, npyiter_opitflags *op_itflags)
914 {
915     if ((op_flags & NPY_ITER_GLOBAL_FLAGS) != 0) {
916         PyErr_SetString(PyExc_ValueError,
917                     "A global iterator flag was passed as a per-operand flag "
918                     "to the iterator constructor");
919         return 0;
920     }
921 
922     /* Check the read/write flags */
923     if (op_flags & NPY_ITER_READONLY) {
924         /* The read/write flags are mutually exclusive */
925         if (op_flags & (NPY_ITER_READWRITE|NPY_ITER_WRITEONLY)) {
926             PyErr_SetString(PyExc_ValueError,
927                     "Only one of the iterator flags READWRITE, "
928                     "READONLY, and WRITEONLY may be "
929                     "specified for an operand");
930             return 0;
931         }
932 
933         *op_itflags = NPY_OP_ITFLAG_READ;
934     }
935     else if (op_flags & NPY_ITER_READWRITE) {
936         /* The read/write flags are mutually exclusive */
937         if (op_flags & NPY_ITER_WRITEONLY) {
938             PyErr_SetString(PyExc_ValueError,
939                     "Only one of the iterator flags READWRITE, "
940                     "READONLY, and WRITEONLY may be "
941                     "specified for an operand");
942             return 0;
943         }
944 
945         *op_itflags = NPY_OP_ITFLAG_READ|NPY_OP_ITFLAG_WRITE;
946     }
947     else if(op_flags & NPY_ITER_WRITEONLY) {
948         *op_itflags = NPY_OP_ITFLAG_WRITE;
949     }
950     else {
951         PyErr_SetString(PyExc_ValueError,
952                 "None of the iterator flags READWRITE, "
953                 "READONLY, or WRITEONLY were "
954                 "specified for an operand");
955         return 0;
956     }
957 
958     /* Check the flags for temporary copies */
959     if (((*op_itflags) & NPY_OP_ITFLAG_WRITE) &&
960                 (op_flags & (NPY_ITER_COPY |
961                            NPY_ITER_UPDATEIFCOPY)) == NPY_ITER_COPY) {
962         PyErr_SetString(PyExc_ValueError,
963                 "If an iterator operand is writeable, must use "
964                 "the flag UPDATEIFCOPY instead of "
965                 "COPY");
966         return 0;
967     }
968 
969     /* Check the flag for a write masked operands */
970     if (op_flags & NPY_ITER_WRITEMASKED) {
971         if (!((*op_itflags) & NPY_OP_ITFLAG_WRITE)) {
972             PyErr_SetString(PyExc_ValueError,
973                 "The iterator flag WRITEMASKED may only "
974                 "be used with READWRITE or WRITEONLY");
975             return 0;
976         }
977         if ((op_flags & NPY_ITER_ARRAYMASK) != 0) {
978             PyErr_SetString(PyExc_ValueError,
979                 "The iterator flag WRITEMASKED may not "
980                 "be used together with ARRAYMASK");
981             return 0;
982         }
983         *op_itflags |= NPY_OP_ITFLAG_WRITEMASKED;
984     }
985 
986     if ((op_flags & NPY_ITER_VIRTUAL) != 0) {
987         if ((op_flags & NPY_ITER_READWRITE) == 0) {
988             PyErr_SetString(PyExc_ValueError,
989                 "The iterator flag VIRTUAL should be "
990                 "be used together with READWRITE");
991             return 0;
992         }
993         *op_itflags |= NPY_OP_ITFLAG_VIRTUAL;
994     }
995 
996     return 1;
997 }
998 
999 /*
1000  * Prepares a a constructor operand.  Assumes a reference to 'op'
1001  * is owned, and that 'op' may be replaced.  Fills in 'op_dataptr',
1002  * 'op_dtype', and may modify 'op_itflags'.
1003  *
1004  * Returns 1 on success, 0 on failure.
1005  */
1006 static int
npyiter_prepare_one_operand(PyArrayObject ** op,char ** op_dataptr,PyArray_Descr * op_request_dtype,PyArray_Descr ** op_dtype,npy_uint32 flags,npy_uint32 op_flags,npyiter_opitflags * op_itflags)1007 npyiter_prepare_one_operand(PyArrayObject **op,
1008                         char **op_dataptr,
1009                         PyArray_Descr *op_request_dtype,
1010                         PyArray_Descr **op_dtype,
1011                         npy_uint32 flags,
1012                         npy_uint32 op_flags, npyiter_opitflags *op_itflags)
1013 {
1014     /* NULL operands must be automatically allocated outputs */
1015     if (*op == NULL) {
1016         /* ALLOCATE or VIRTUAL should be enabled */
1017         if ((op_flags & (NPY_ITER_ALLOCATE|NPY_ITER_VIRTUAL)) == 0) {
1018             PyErr_SetString(PyExc_ValueError,
1019                     "Iterator operand was NULL, but neither the "
1020                     "ALLOCATE nor the VIRTUAL flag was specified");
1021             return 0;
1022         }
1023 
1024         if (op_flags & NPY_ITER_ALLOCATE) {
1025             /* Writing should be enabled */
1026             if (!((*op_itflags) & NPY_OP_ITFLAG_WRITE)) {
1027                 PyErr_SetString(PyExc_ValueError,
1028                         "Automatic allocation was requested for an iterator "
1029                         "operand, but it wasn't flagged for writing");
1030                 return 0;
1031             }
1032             /*
1033              * Reading should be disabled if buffering is enabled without
1034              * also enabling NPY_ITER_DELAY_BUFALLOC.  In all other cases,
1035              * the caller may initialize the allocated operand to a value
1036              * before beginning iteration.
1037              */
1038             if (((flags & (NPY_ITER_BUFFERED |
1039                             NPY_ITER_DELAY_BUFALLOC)) == NPY_ITER_BUFFERED) &&
1040                     ((*op_itflags) & NPY_OP_ITFLAG_READ)) {
1041                 PyErr_SetString(PyExc_ValueError,
1042                         "Automatic allocation was requested for an iterator "
1043                         "operand, and it was flagged as readable, but "
1044                         "buffering  without delayed allocation was enabled");
1045                 return 0;
1046             }
1047 
1048             /* If a requested dtype was provided, use it, otherwise NULL */
1049             Py_XINCREF(op_request_dtype);
1050             *op_dtype = op_request_dtype;
1051         }
1052         else {
1053             *op_dtype = NULL;
1054         }
1055 
1056         /* Specify bool if no dtype was requested for the mask */
1057         if (op_flags & NPY_ITER_ARRAYMASK) {
1058             if (*op_dtype == NULL) {
1059                 *op_dtype = PyArray_DescrFromType(NPY_BOOL);
1060                 if (*op_dtype == NULL) {
1061                     return 0;
1062                 }
1063             }
1064         }
1065 
1066         *op_dataptr = NULL;
1067 
1068         return 1;
1069     }
1070 
1071     /* VIRTUAL operands must be NULL */
1072     if (op_flags & NPY_ITER_VIRTUAL) {
1073         PyErr_SetString(PyExc_ValueError,
1074                 "Iterator operand flag VIRTUAL was specified, "
1075                 "but the operand was not NULL");
1076         return 0;
1077     }
1078 
1079 
1080     if (PyArray_Check(*op)) {
1081 
1082         if ((*op_itflags) & NPY_OP_ITFLAG_WRITE
1083             && PyArray_FailUnlessWriteable(*op, "operand array with iterator "
1084                                            "write flag set") < 0) {
1085             return 0;
1086         }
1087         if (!(flags & NPY_ITER_ZEROSIZE_OK) && PyArray_SIZE(*op) == 0) {
1088             PyErr_SetString(PyExc_ValueError,
1089                     "Iteration of zero-sized operands is not enabled");
1090             return 0;
1091         }
1092         *op_dataptr = PyArray_BYTES(*op);
1093         /* PyArray_DESCR does not give us a reference */
1094         *op_dtype = PyArray_DESCR(*op);
1095         if (*op_dtype == NULL) {
1096             PyErr_SetString(PyExc_ValueError,
1097                     "Iterator input operand has no dtype descr");
1098             return 0;
1099         }
1100         Py_INCREF(*op_dtype);
1101         /*
1102          * If references weren't specifically allowed, make sure there
1103          * are no references in the inputs or requested dtypes.
1104          */
1105         if (!(flags & NPY_ITER_REFS_OK)) {
1106             PyArray_Descr *dt = PyArray_DESCR(*op);
1107             if (((dt->flags & (NPY_ITEM_REFCOUNT |
1108                            NPY_ITEM_IS_POINTER)) != 0) ||
1109                     (dt != *op_dtype &&
1110                         (((*op_dtype)->flags & (NPY_ITEM_REFCOUNT |
1111                                              NPY_ITEM_IS_POINTER))) != 0)) {
1112                 PyErr_SetString(PyExc_TypeError,
1113                         "Iterator operand or requested dtype holds "
1114                         "references, but the REFS_OK flag was not enabled");
1115                 return 0;
1116             }
1117         }
1118         /*
1119          * Checking whether casts are valid is done later, once the
1120          * final data types have been selected.  For now, just store the
1121          * requested type.
1122          */
1123         if (op_request_dtype != NULL) {
1124             /* We just have a borrowed reference to op_request_dtype */
1125             Py_SETREF(*op_dtype, PyArray_AdaptDescriptorToArray(
1126                                             *op, (PyObject *)op_request_dtype));
1127             if (*op_dtype == NULL) {
1128                 return 0;
1129             }
1130         }
1131 
1132         /* Check if the operand is in the byte order requested */
1133         if (op_flags & NPY_ITER_NBO) {
1134             /* Check byte order */
1135             if (!PyArray_ISNBO((*op_dtype)->byteorder)) {
1136                 PyArray_Descr *nbo_dtype;
1137 
1138                 /* Replace with a new descr which is in native byte order */
1139                 nbo_dtype = PyArray_DescrNewByteorder(*op_dtype, NPY_NATIVE);
1140                 Py_DECREF(*op_dtype);
1141                 *op_dtype = nbo_dtype;
1142 
1143                 NPY_IT_DBG_PRINT("Iterator: Setting NPY_OP_ITFLAG_CAST "
1144                                     "because of NPY_ITER_NBO\n");
1145                 /* Indicate that byte order or alignment needs fixing */
1146                 *op_itflags |= NPY_OP_ITFLAG_CAST;
1147             }
1148         }
1149         /* Check if the operand is aligned */
1150         if (op_flags & NPY_ITER_ALIGNED) {
1151             /* Check alignment */
1152             if (!IsAligned(*op)) {
1153                 NPY_IT_DBG_PRINT("Iterator: Setting NPY_OP_ITFLAG_CAST "
1154                                     "because of NPY_ITER_ALIGNED\n");
1155                 *op_itflags |= NPY_OP_ITFLAG_CAST;
1156             }
1157         }
1158         /*
1159          * The check for NPY_ITER_CONTIG can only be done later,
1160          * once the final iteration order is settled.
1161          */
1162     }
1163     else {
1164         PyErr_SetString(PyExc_ValueError,
1165                 "Iterator inputs must be ndarrays");
1166         return 0;
1167     }
1168 
1169     return 1;
1170 }
1171 
1172 /*
1173  * Process all the operands, copying new references so further processing
1174  * can replace the arrays if copying is necessary.
1175  */
1176 static int
npyiter_prepare_operands(int nop,PyArrayObject ** op_in,PyArrayObject ** op,char ** op_dataptr,PyArray_Descr ** op_request_dtypes,PyArray_Descr ** op_dtype,npy_uint32 flags,npy_uint32 * op_flags,npyiter_opitflags * op_itflags,npy_int8 * out_maskop)1177 npyiter_prepare_operands(int nop, PyArrayObject **op_in,
1178                     PyArrayObject **op,
1179                     char **op_dataptr,
1180                     PyArray_Descr **op_request_dtypes,
1181                     PyArray_Descr **op_dtype,
1182                     npy_uint32 flags,
1183                     npy_uint32 *op_flags, npyiter_opitflags *op_itflags,
1184                     npy_int8 *out_maskop)
1185 {
1186     int iop, i;
1187     npy_int8 maskop = -1;
1188     int any_writemasked_ops = 0;
1189 
1190     /*
1191      * Here we just prepare the provided operands.
1192      */
1193     for (iop = 0; iop < nop; ++iop) {
1194         op[iop] = op_in[iop];
1195         Py_XINCREF(op[iop]);
1196         op_dtype[iop] = NULL;
1197 
1198         /* Check the readonly/writeonly flags, and fill in op_itflags */
1199         if (!npyiter_check_per_op_flags(op_flags[iop], &op_itflags[iop])) {
1200             goto fail_iop;
1201         }
1202 
1203         /* Extract the operand which is for masked iteration */
1204         if ((op_flags[iop] & NPY_ITER_ARRAYMASK) != 0) {
1205             if (maskop != -1) {
1206                 PyErr_SetString(PyExc_ValueError,
1207                         "Only one iterator operand may receive an "
1208                         "ARRAYMASK flag");
1209                 goto fail_iop;
1210             }
1211 
1212             maskop = iop;
1213             *out_maskop = iop;
1214         }
1215 
1216         if (op_flags[iop] & NPY_ITER_WRITEMASKED) {
1217             any_writemasked_ops = 1;
1218         }
1219 
1220         /*
1221          * Prepare the operand.  This produces an op_dtype[iop] reference
1222          * on success.
1223          */
1224         if (!npyiter_prepare_one_operand(&op[iop],
1225                         &op_dataptr[iop],
1226                         op_request_dtypes ? op_request_dtypes[iop] : NULL,
1227                         &op_dtype[iop],
1228                         flags,
1229                         op_flags[iop], &op_itflags[iop])) {
1230             goto fail_iop;
1231         }
1232     }
1233 
1234     /* If all the operands were NULL, it's an error */
1235     if (op[0] == NULL) {
1236         int all_null = 1;
1237         for (iop = 1; iop < nop; ++iop) {
1238             if (op[iop] != NULL) {
1239                 all_null = 0;
1240                 break;
1241             }
1242         }
1243         if (all_null) {
1244             PyErr_SetString(PyExc_ValueError,
1245                     "At least one iterator operand must be non-NULL");
1246             goto fail_nop;
1247         }
1248     }
1249 
1250     if (any_writemasked_ops && maskop < 0) {
1251         PyErr_SetString(PyExc_ValueError,
1252                 "An iterator operand was flagged as WRITEMASKED, "
1253                 "but no ARRAYMASK operand was given to supply "
1254                 "the mask");
1255         goto fail_nop;
1256     }
1257     else if (!any_writemasked_ops && maskop >= 0) {
1258         PyErr_SetString(PyExc_ValueError,
1259                 "An iterator operand was flagged as the ARRAYMASK, "
1260                 "but no WRITEMASKED operands were given to use "
1261                 "the mask");
1262         goto fail_nop;
1263     }
1264 
1265     return 1;
1266 
1267   fail_nop:
1268     iop = nop - 1;
1269   fail_iop:
1270     for (i = 0; i < iop+1; ++i) {
1271         Py_XDECREF(op[i]);
1272         Py_XDECREF(op_dtype[i]);
1273     }
1274     return 0;
1275 }
1276 
1277 static const char *
npyiter_casting_to_string(NPY_CASTING casting)1278 npyiter_casting_to_string(NPY_CASTING casting)
1279 {
1280     switch (casting) {
1281         case NPY_NO_CASTING:
1282             return "'no'";
1283         case NPY_EQUIV_CASTING:
1284             return "'equiv'";
1285         case NPY_SAFE_CASTING:
1286             return "'safe'";
1287         case NPY_SAME_KIND_CASTING:
1288             return "'same_kind'";
1289         case NPY_UNSAFE_CASTING:
1290             return "'unsafe'";
1291         default:
1292             return "<unknown>";
1293     }
1294 }
1295 
1296 
1297 static int
npyiter_check_casting(int nop,PyArrayObject ** op,PyArray_Descr ** op_dtype,NPY_CASTING casting,npyiter_opitflags * op_itflags)1298 npyiter_check_casting(int nop, PyArrayObject **op,
1299                     PyArray_Descr **op_dtype,
1300                     NPY_CASTING casting,
1301                     npyiter_opitflags *op_itflags)
1302 {
1303     int iop;
1304 
1305     for(iop = 0; iop < nop; ++iop) {
1306         NPY_IT_DBG_PRINT1("Iterator: Checking casting for operand %d\n",
1307                             (int)iop);
1308 #if NPY_IT_DBG_TRACING
1309         printf("op: ");
1310         if (op[iop] != NULL) {
1311             PyObject_Print((PyObject *)PyArray_DESCR(op[iop]), stdout, 0);
1312         }
1313         else {
1314             printf("<null>");
1315         }
1316         printf(", iter: ");
1317         PyObject_Print((PyObject *)op_dtype[iop], stdout, 0);
1318         printf("\n");
1319 #endif
1320         /* If the types aren't equivalent, a cast is necessary */
1321         if (op[iop] != NULL && !PyArray_EquivTypes(PyArray_DESCR(op[iop]),
1322                                                      op_dtype[iop])) {
1323             /* Check read (op -> temp) casting */
1324             if ((op_itflags[iop] & NPY_OP_ITFLAG_READ) &&
1325                         !PyArray_CanCastArrayTo(op[iop],
1326                                           op_dtype[iop],
1327                                           casting)) {
1328                 PyErr_Format(PyExc_TypeError,
1329                         "Iterator operand %d dtype could not be cast from "
1330                         "%R to %R according to the rule %s",
1331                         iop, PyArray_DESCR(op[iop]), op_dtype[iop],
1332                         npyiter_casting_to_string(casting));
1333                 return 0;
1334             }
1335             /* Check write (temp -> op) casting */
1336             if ((op_itflags[iop] & NPY_OP_ITFLAG_WRITE) &&
1337                         !PyArray_CanCastTypeTo(op_dtype[iop],
1338                                           PyArray_DESCR(op[iop]),
1339                                           casting)) {
1340                 PyErr_Format(PyExc_TypeError,
1341                         "Iterator requested dtype could not be cast from "
1342                         "%R to %R, the operand %d dtype, "
1343                         "according to the rule %s",
1344                         op_dtype[iop], PyArray_DESCR(op[iop]), iop,
1345                         npyiter_casting_to_string(casting));
1346                 return 0;
1347             }
1348 
1349             NPY_IT_DBG_PRINT("Iterator: Setting NPY_OP_ITFLAG_CAST "
1350                                 "because the types aren't equivalent\n");
1351             /* Indicate that this operand needs casting */
1352             op_itflags[iop] |= NPY_OP_ITFLAG_CAST;
1353         }
1354     }
1355 
1356     return 1;
1357 }
1358 
1359 /*
1360  * Checks that the mask broadcasts to the WRITEMASK REDUCE
1361  * operand 'iop', but 'iop' never broadcasts to the mask.
1362  * If 'iop' broadcasts to the mask, the result would be more
1363  * than one mask value per reduction element, something which
1364  * is invalid.
1365  *
1366  * This check should only be called after all the operands
1367  * have been filled in.
1368  *
1369  * Returns 1 on success, 0 on error.
1370  */
1371 static int
check_mask_for_writemasked_reduction(NpyIter * iter,int iop)1372 check_mask_for_writemasked_reduction(NpyIter *iter, int iop)
1373 {
1374     npy_uint32 itflags = NIT_ITFLAGS(iter);
1375     int idim, ndim = NIT_NDIM(iter);
1376     int nop = NIT_NOP(iter);
1377     int maskop = NIT_MASKOP(iter);
1378 
1379     NpyIter_AxisData *axisdata;
1380     npy_intp sizeof_axisdata;
1381 
1382     axisdata = NIT_AXISDATA(iter);
1383     sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
1384 
1385     for(idim = 0; idim < ndim; ++idim) {
1386         npy_intp maskstride, istride;
1387 
1388         istride = NAD_STRIDES(axisdata)[iop];
1389         maskstride = NAD_STRIDES(axisdata)[maskop];
1390 
1391         /*
1392          * If 'iop' is being broadcast to 'maskop', we have
1393          * the invalid situation described above.
1394          */
1395         if (maskstride != 0 && istride == 0) {
1396             PyErr_SetString(PyExc_ValueError,
1397                     "Iterator reduction operand is WRITEMASKED, "
1398                     "but also broadcasts to multiple mask values. "
1399                     "There can be only one mask value per WRITEMASKED "
1400                     "element.");
1401             return 0;
1402         }
1403 
1404         NIT_ADVANCE_AXISDATA(axisdata, 1);
1405     }
1406 
1407     return 1;
1408 }
1409 
1410 /*
1411  * Check whether a reduction is OK based on the flags and the operand being
1412  * readwrite. This path is deprecated, since usually only specific axes
1413  * should be reduced. If axes are specified explicitely, the flag is
1414  * unnecessary.
1415  */
1416 static int
npyiter_check_reduce_ok_and_set_flags(NpyIter * iter,npy_uint32 flags,npyiter_opitflags * op_itflags,int dim)1417 npyiter_check_reduce_ok_and_set_flags(
1418         NpyIter *iter, npy_uint32 flags, npyiter_opitflags *op_itflags,
1419         int dim) {
1420     /* If it's writeable, this means a reduction */
1421     if (*op_itflags & NPY_OP_ITFLAG_WRITE) {
1422         if (!(flags & NPY_ITER_REDUCE_OK)) {
1423             PyErr_Format(PyExc_ValueError,
1424                     "output operand requires a reduction along dimension %d, "
1425                     "but the reduction is not enabled. The dimension size of 1 "
1426                     "does not match the expected output shape.", dim);
1427             return 0;
1428         }
1429         if (!(*op_itflags & NPY_OP_ITFLAG_READ)) {
1430             PyErr_SetString(PyExc_ValueError,
1431                     "output operand requires a reduction, but is flagged as "
1432                     "write-only, not read-write");
1433             return 0;
1434         }
1435         NPY_IT_DBG_PRINT("Iterator: Indicating that a reduction is"
1436                          "occurring\n");
1437 
1438         NIT_ITFLAGS(iter) |= NPY_ITFLAG_REDUCE;
1439         *op_itflags |= NPY_OP_ITFLAG_REDUCE;
1440     }
1441     return 1;
1442 }
1443 
1444 /**
1445  * Removes the (additive) NPY_ITER_REDUCTION_AXIS indication and sets
1446  * is_forced_broadcast to 1 if it is set. Otherwise to 0.
1447  *
1448  * @param axis The op_axes[i] to normalize.
1449  * @param reduction_axis Output 1 if a reduction axis, otherwise 0.
1450  * @returns The normalized axis (without reduce axis flag).
1451  */
1452 static NPY_INLINE int
npyiter_get_op_axis(int axis,npy_bool * reduction_axis)1453 npyiter_get_op_axis(int axis, npy_bool *reduction_axis) {
1454     npy_bool forced_broadcast = axis >= NPY_ITER_REDUCTION_AXIS(-1);
1455 
1456     if (reduction_axis != NULL) {
1457         *reduction_axis = forced_broadcast;
1458     }
1459     if (forced_broadcast) {
1460         return axis - NPY_ITER_REDUCTION_AXIS(0);
1461     }
1462     return axis;
1463 }
1464 
1465 /*
1466  * Fills in the AXISDATA for the 'nop' operands, broadcasting
1467  * the dimensionas as necessary.  Also fills
1468  * in the ITERSIZE data member.
1469  *
1470  * If op_axes is not NULL, it should point to an array of ndim-sized
1471  * arrays, one for each op.
1472  *
1473  * Returns 1 on success, 0 on failure.
1474  */
1475 static int
npyiter_fill_axisdata(NpyIter * iter,npy_uint32 flags,npyiter_opitflags * op_itflags,char ** op_dataptr,const npy_uint32 * op_flags,int ** op_axes,npy_intp const * itershape)1476 npyiter_fill_axisdata(NpyIter *iter, npy_uint32 flags, npyiter_opitflags *op_itflags,
1477                     char **op_dataptr,
1478                     const npy_uint32 *op_flags, int **op_axes,
1479                     npy_intp const *itershape)
1480 {
1481     npy_uint32 itflags = NIT_ITFLAGS(iter);
1482     int idim, ndim = NIT_NDIM(iter);
1483     int iop, nop = NIT_NOP(iter);
1484     int maskop = NIT_MASKOP(iter);
1485 
1486     int ondim;
1487     NpyIter_AxisData *axisdata;
1488     npy_intp sizeof_axisdata;
1489     PyArrayObject **op = NIT_OPERANDS(iter), *op_cur;
1490     npy_intp broadcast_shape[NPY_MAXDIMS];
1491 
1492     /* First broadcast the shapes together */
1493     if (itershape == NULL) {
1494         for (idim = 0; idim < ndim; ++idim) {
1495             broadcast_shape[idim] = 1;
1496         }
1497     }
1498     else {
1499         for (idim = 0; idim < ndim; ++idim) {
1500             broadcast_shape[idim] = itershape[idim];
1501             /* Negative shape entries are deduced from the operands */
1502             if (broadcast_shape[idim] < 0) {
1503                 broadcast_shape[idim] = 1;
1504             }
1505         }
1506     }
1507     for (iop = 0; iop < nop; ++iop) {
1508         op_cur = op[iop];
1509         if (op_cur != NULL) {
1510             npy_intp *shape = PyArray_DIMS(op_cur);
1511             ondim = PyArray_NDIM(op_cur);
1512 
1513             if (op_axes == NULL || op_axes[iop] == NULL) {
1514                 /*
1515                  * Possible if op_axes are being used, but
1516                  * op_axes[iop] is NULL
1517                  */
1518                 if (ondim > ndim) {
1519                     PyErr_SetString(PyExc_ValueError,
1520                             "input operand has more dimensions than allowed "
1521                             "by the axis remapping");
1522                     return 0;
1523                 }
1524                 for (idim = 0; idim < ondim; ++idim) {
1525                     npy_intp bshape = broadcast_shape[idim+ndim-ondim];
1526                     npy_intp op_shape = shape[idim];
1527 
1528                     if (bshape == 1) {
1529                         broadcast_shape[idim+ndim-ondim] = op_shape;
1530                     }
1531                     else if (bshape != op_shape && op_shape != 1) {
1532                         goto broadcast_error;
1533                     }
1534                 }
1535             }
1536             else {
1537                 int *axes = op_axes[iop];
1538                 for (idim = 0; idim < ndim; ++idim) {
1539                     int i = npyiter_get_op_axis(axes[idim], NULL);
1540 
1541                     if (i >= 0) {
1542                         if (i < ondim) {
1543                             npy_intp bshape = broadcast_shape[idim];
1544                             npy_intp op_shape = shape[i];
1545 
1546                             if (bshape == 1) {
1547                                 broadcast_shape[idim] = op_shape;
1548                             }
1549                             else if (bshape != op_shape && op_shape != 1) {
1550                                 goto broadcast_error;
1551                             }
1552                         }
1553                         else {
1554                             PyErr_Format(PyExc_ValueError,
1555                                     "Iterator input op_axes[%d][%d] (==%d) "
1556                                     "is not a valid axis of op[%d], which "
1557                                     "has %d dimensions ",
1558                                     iop, (ndim-idim-1), i,
1559                                     iop, ondim);
1560                             return 0;
1561                         }
1562                     }
1563                 }
1564             }
1565         }
1566     }
1567     /*
1568      * If a shape was provided with a 1 entry, make sure that entry didn't
1569      * get expanded by broadcasting.
1570      */
1571     if (itershape != NULL) {
1572         for (idim = 0; idim < ndim; ++idim) {
1573             if (itershape[idim] == 1 && broadcast_shape[idim] != 1) {
1574                 goto broadcast_error;
1575             }
1576         }
1577     }
1578 
1579     axisdata = NIT_AXISDATA(iter);
1580     sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
1581 
1582     if (ndim == 0) {
1583         /* Need to fill the first axisdata, even if the iterator is 0-d */
1584         NAD_SHAPE(axisdata) = 1;
1585         NAD_INDEX(axisdata) = 0;
1586         memcpy(NAD_PTRS(axisdata), op_dataptr, NPY_SIZEOF_INTP*nop);
1587         memset(NAD_STRIDES(axisdata), 0, NPY_SIZEOF_INTP*nop);
1588     }
1589 
1590     /* Now process the operands, filling in the axisdata */
1591     for (idim = 0; idim < ndim; ++idim) {
1592         npy_intp bshape = broadcast_shape[ndim-idim-1];
1593         npy_intp *strides = NAD_STRIDES(axisdata);
1594 
1595         NAD_SHAPE(axisdata) = bshape;
1596         NAD_INDEX(axisdata) = 0;
1597         memcpy(NAD_PTRS(axisdata), op_dataptr, NPY_SIZEOF_INTP*nop);
1598 
1599         for (iop = 0; iop < nop; ++iop) {
1600             op_cur = op[iop];
1601 
1602             if (op_axes == NULL || op_axes[iop] == NULL) {
1603                 if (op_cur == NULL) {
1604                     strides[iop] = 0;
1605                 }
1606                 else {
1607                     ondim = PyArray_NDIM(op_cur);
1608                     if (bshape == 1) {
1609                         strides[iop] = 0;
1610                         if (idim >= ondim &&
1611                                     (op_flags[iop] & NPY_ITER_NO_BROADCAST)) {
1612                             goto operand_different_than_broadcast;
1613                         }
1614                     }
1615                     else if (idim >= ondim ||
1616                                     PyArray_DIM(op_cur, ondim-idim-1) == 1) {
1617                         strides[iop] = 0;
1618                         if (op_flags[iop] & NPY_ITER_NO_BROADCAST) {
1619                             goto operand_different_than_broadcast;
1620                         }
1621                         /* If it's writeable, this means a reduction */
1622                         if (op_itflags[iop] & NPY_OP_ITFLAG_WRITE) {
1623                             if (!(flags & NPY_ITER_REDUCE_OK)) {
1624                                 PyErr_SetString(PyExc_ValueError,
1625                                         "output operand requires a "
1626                                         "reduction, but reduction is "
1627                                         "not enabled");
1628                                 return 0;
1629                             }
1630                             if (!(op_itflags[iop] & NPY_OP_ITFLAG_READ)) {
1631                                 PyErr_SetString(PyExc_ValueError,
1632                                         "output operand requires a "
1633                                         "reduction, but is flagged as "
1634                                         "write-only, not read-write");
1635                                 return 0;
1636                             }
1637                             /*
1638                              * The ARRAYMASK can't be a reduction, because
1639                              * it would be possible to write back to the
1640                              * array once when the ARRAYMASK says 'True',
1641                              * then have the reduction on the ARRAYMASK
1642                              * later flip to 'False', indicating that the
1643                              * write back should never have been done,
1644                              * and violating the strict masking semantics
1645                              */
1646                             if (iop == maskop) {
1647                                 PyErr_SetString(PyExc_ValueError,
1648                                         "output operand requires a "
1649                                         "reduction, but is flagged as "
1650                                         "the ARRAYMASK operand which "
1651                                         "is not permitted to be the "
1652                                         "result of a reduction");
1653                                 return 0;
1654                             }
1655 
1656                             NIT_ITFLAGS(iter) |= NPY_ITFLAG_REDUCE;
1657                             op_itflags[iop] |= NPY_OP_ITFLAG_REDUCE;
1658                         }
1659                     }
1660                     else {
1661                         strides[iop] = PyArray_STRIDE(op_cur, ondim-idim-1);
1662                     }
1663                 }
1664             }
1665             else {
1666                 int *axes = op_axes[iop];
1667                 npy_bool reduction_axis;
1668                 int i;
1669                 i = npyiter_get_op_axis(axes[ndim - idim - 1], &reduction_axis);
1670 
1671                 if (reduction_axis) {
1672                     /* This is explicitly a reduction axis */
1673                     strides[iop] = 0;
1674                     NIT_ITFLAGS(iter) |= NPY_ITFLAG_REDUCE;
1675                     op_itflags[iop] |= NPY_OP_ITFLAG_REDUCE;
1676 
1677                     if (NPY_UNLIKELY((i >= 0) && (op_cur != NULL) &&
1678                             (PyArray_DIM(op_cur, i) != 1))) {
1679                         PyErr_Format(PyExc_ValueError,
1680                                 "operand was set up as a reduction along axis "
1681                                 "%d, but the length of the axis is %zd "
1682                                 "(it has to be 1)",
1683                                 i, (Py_ssize_t)PyArray_DIM(op_cur, i));
1684                         return 0;
1685                     }
1686                 }
1687                 else if (bshape == 1) {
1688                     /*
1689                      * If the full iterator shape is 1, zero always works.
1690                      * NOTE: We thus always allow broadcast dimensions (i = -1)
1691                      *       if the shape is 1.
1692                      */
1693                     strides[iop] = 0;
1694                 }
1695                 else if (i >= 0) {
1696                     if (op_cur == NULL) {
1697                         /* stride is filled later, shape will match `bshape` */
1698                         strides[iop] = 0;
1699                     }
1700                     else if (PyArray_DIM(op_cur, i) == 1) {
1701                         strides[iop] = 0;
1702                         if (op_flags[iop] & NPY_ITER_NO_BROADCAST) {
1703                             goto operand_different_than_broadcast;
1704                         }
1705                         if (!npyiter_check_reduce_ok_and_set_flags(
1706                                 iter, flags, &op_itflags[iop], i)) {
1707                             return 0;
1708                         }
1709                     }
1710                     else {
1711                         strides[iop] = PyArray_STRIDE(op_cur, i);
1712                     }
1713                 }
1714                 else {
1715                     strides[iop] = 0;
1716                     if (!npyiter_check_reduce_ok_and_set_flags(
1717                             iter, flags, &op_itflags[iop], i)) {
1718                         return 0;
1719                     }
1720                 }
1721             }
1722         }
1723 
1724         NIT_ADVANCE_AXISDATA(axisdata, 1);
1725     }
1726 
1727     /* Now fill in the ITERSIZE member */
1728     NIT_ITERSIZE(iter) = 1;
1729     for (idim = 0; idim < ndim; ++idim) {
1730         if (npy_mul_with_overflow_intp(&NIT_ITERSIZE(iter),
1731                     NIT_ITERSIZE(iter), broadcast_shape[idim])) {
1732             if ((itflags & NPY_ITFLAG_HASMULTIINDEX) &&
1733                     !(itflags & NPY_ITFLAG_HASINDEX) &&
1734                     !(itflags & NPY_ITFLAG_BUFFER)) {
1735                 /*
1736                  * If RemoveAxis may be called, the size check is delayed
1737                  * until either the multi index is removed, or GetIterNext
1738                  * is called.
1739                  */
1740                 NIT_ITERSIZE(iter) = -1;
1741                 break;
1742             }
1743             else {
1744                 PyErr_SetString(PyExc_ValueError, "iterator is too large");
1745                 return 0;
1746             }
1747         }
1748     }
1749     /* The range defaults to everything */
1750     NIT_ITERSTART(iter) = 0;
1751     NIT_ITEREND(iter) = NIT_ITERSIZE(iter);
1752 
1753     return 1;
1754 
1755 broadcast_error: {
1756         npy_intp remdims[NPY_MAXDIMS];
1757 
1758         if (op_axes == NULL) {
1759             PyObject *shape1 = PyUnicode_FromString("");
1760             if (shape1 == NULL) {
1761                 return 0;
1762             }
1763             for (iop = 0; iop < nop; ++iop) {
1764                 if (op[iop] != NULL) {
1765                     int ndims = PyArray_NDIM(op[iop]);
1766                     npy_intp *dims = PyArray_DIMS(op[iop]);
1767                     PyObject *tmp = convert_shape_to_string(ndims, dims, " ");
1768                     if (tmp == NULL) {
1769                         Py_DECREF(shape1);
1770                         return 0;
1771                     }
1772                     Py_SETREF(shape1, PyUnicode_Concat(shape1, tmp));
1773                     Py_DECREF(tmp);
1774                     if (shape1 == NULL) {
1775                         return 0;
1776                     }
1777                 }
1778             }
1779             if (itershape == NULL) {
1780                 PyErr_Format(PyExc_ValueError,
1781                         "operands could not be broadcast together with "
1782                         "shapes %S", shape1);
1783                 Py_DECREF(shape1);
1784                 return 0;
1785             }
1786             else {
1787                 PyObject *shape2 = convert_shape_to_string(ndim, itershape, "");
1788                 if (shape2 == NULL) {
1789                     Py_DECREF(shape1);
1790                     return 0;
1791                 }
1792                 PyErr_Format(PyExc_ValueError,
1793                         "operands could not be broadcast together with "
1794                         "shapes %S and requested shape %S", shape1, shape2);
1795                 Py_DECREF(shape1);
1796                 Py_DECREF(shape2);
1797                 return 0;
1798             }
1799         }
1800         else {
1801             PyObject *shape1 = PyUnicode_FromString("");
1802             if (shape1 == NULL) {
1803                 return 0;
1804             }
1805             for (iop = 0; iop < nop; ++iop) {
1806                 if (op[iop] != NULL) {
1807                     int *axes = op_axes[iop];
1808                     int ndims = PyArray_NDIM(op[iop]);
1809                     npy_intp *dims = PyArray_DIMS(op[iop]);
1810                     char *tmpstr = (axes == NULL) ? " " : "->";
1811 
1812                     PyObject *tmp = convert_shape_to_string(ndims, dims, tmpstr);
1813                     if (tmp == NULL) {
1814                         Py_DECREF(shape1);
1815                         return 0;
1816                     }
1817                     Py_SETREF(shape1, PyUnicode_Concat(shape1, tmp));
1818                     Py_DECREF(tmp);
1819                     if (shape1 == NULL) {
1820                         return 0;
1821                     }
1822 
1823                     if (axes != NULL) {
1824                         for (idim = 0; idim < ndim; ++idim) {
1825                             int i = npyiter_get_op_axis(axes[idim], NULL);
1826 
1827                             if (i >= 0 && i < PyArray_NDIM(op[iop])) {
1828                                 remdims[idim] = PyArray_DIM(op[iop], i);
1829                             }
1830                             else {
1831                                 remdims[idim] = -1;
1832                             }
1833                         }
1834                         PyObject *tmp = convert_shape_to_string(ndim, remdims, " ");
1835                         if (tmp == NULL) {
1836                             Py_DECREF(shape1);
1837                             return 0;
1838                         }
1839                         Py_SETREF(shape1, PyUnicode_Concat(shape1, tmp));
1840                         Py_DECREF(tmp);
1841                         if (shape1 == NULL) {
1842                             return 0;
1843                         }
1844                     }
1845                 }
1846             }
1847             if (itershape == NULL) {
1848                 PyErr_Format(PyExc_ValueError,
1849                         "operands could not be broadcast together with "
1850                         "remapped shapes [original->remapped]: %S", shape1);
1851                 Py_DECREF(shape1);
1852                 return 0;
1853             }
1854             else {
1855                 PyObject *shape2 = convert_shape_to_string(ndim, itershape, "");
1856                 if (shape2 == NULL) {
1857                     Py_DECREF(shape1);
1858                     return 0;
1859                 }
1860                 PyErr_Format(PyExc_ValueError,
1861                         "operands could not be broadcast together with "
1862                         "remapped shapes [original->remapped]: %S and "
1863                         "requested shape %S", shape1, shape2);
1864                 Py_DECREF(shape1);
1865                 Py_DECREF(shape2);
1866                 return 0;
1867             }
1868         }
1869     }
1870 
1871 operand_different_than_broadcast: {
1872         /* operand shape */
1873         int ndims = PyArray_NDIM(op[iop]);
1874         npy_intp *dims = PyArray_DIMS(op[iop]);
1875         PyObject *shape1 = convert_shape_to_string(ndims, dims, "");
1876         if (shape1 == NULL) {
1877             return 0;
1878         }
1879 
1880         /* Broadcast shape */
1881         PyObject *shape2 = convert_shape_to_string(ndim, broadcast_shape, "");
1882         if (shape2 == NULL) {
1883             Py_DECREF(shape1);
1884             return 0;
1885         }
1886 
1887         if (op_axes == NULL || op_axes[iop] == NULL) {
1888             /* operand shape not remapped */
1889 
1890             if (op_flags[iop] & NPY_ITER_READONLY) {
1891                 PyErr_Format(PyExc_ValueError,
1892                     "non-broadcastable operand with shape %S doesn't "
1893                     "match the broadcast shape %S", shape1, shape2);
1894             }
1895             else {
1896                 PyErr_Format(PyExc_ValueError,
1897                     "non-broadcastable output operand with shape %S doesn't "
1898                     "match the broadcast shape %S", shape1, shape2);
1899             }
1900             Py_DECREF(shape1);
1901             Py_DECREF(shape2);
1902             return 0;
1903         }
1904         else {
1905             /* operand shape remapped */
1906 
1907             npy_intp remdims[NPY_MAXDIMS];
1908             int *axes = op_axes[iop];
1909             for (idim = 0; idim < ndim; ++idim) {
1910                 npy_intp i = axes[ndim - idim - 1];
1911                 if (i >= 0 && i < PyArray_NDIM(op[iop])) {
1912                     remdims[idim] = PyArray_DIM(op[iop], i);
1913                 }
1914                 else {
1915                     remdims[idim] = -1;
1916                 }
1917             }
1918 
1919             PyObject *shape3 = convert_shape_to_string(ndim, remdims, "");
1920             if (shape3 == NULL) {
1921                 Py_DECREF(shape1);
1922                 Py_DECREF(shape2);
1923                 return 0;
1924             }
1925 
1926             if (op_flags[iop] & NPY_ITER_READONLY) {
1927                 PyErr_Format(PyExc_ValueError,
1928                     "non-broadcastable operand with shape %S "
1929                     "[remapped to %S] doesn't match the broadcast shape %S",
1930                     shape1, shape3, shape2);
1931             }
1932             else {
1933                 PyErr_Format(PyExc_ValueError,
1934                     "non-broadcastable output operand with shape %S "
1935                     "[remapped to %S] doesn't match the broadcast shape %S",
1936                     shape1, shape3, shape2);
1937             }
1938             Py_DECREF(shape1);
1939             Py_DECREF(shape2);
1940             Py_DECREF(shape3);
1941             return 0;
1942         }
1943     }
1944 }
1945 
1946 /*
1947  * Replaces the AXISDATA for the iop'th operand, broadcasting
1948  * the dimensions as necessary.  Assumes the replacement array is
1949  * exactly the same shape as the original array used when
1950  * npy_fill_axisdata was called.
1951  *
1952  * If op_axes is not NULL, it should point to an ndim-sized
1953  * array.
1954  */
1955 static void
npyiter_replace_axisdata(NpyIter * iter,int iop,PyArrayObject * op,int orig_op_ndim,const int * op_axes)1956 npyiter_replace_axisdata(
1957         NpyIter *iter, int iop, PyArrayObject *op,
1958         int orig_op_ndim, const int *op_axes)
1959 {
1960     npy_uint32 itflags = NIT_ITFLAGS(iter);
1961     int idim, ndim = NIT_NDIM(iter);
1962     int nop = NIT_NOP(iter);
1963     char *op_dataptr = PyArray_DATA(op);
1964 
1965     NpyIter_AxisData *axisdata0, *axisdata;
1966     npy_intp sizeof_axisdata;
1967     npy_int8 *perm;
1968     npy_intp baseoffset = 0;
1969 
1970     perm = NIT_PERM(iter);
1971     axisdata0 = NIT_AXISDATA(iter);
1972     sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
1973 
1974     /*
1975      * Replace just the strides which were non-zero, and compute
1976      * the base data address.
1977      */
1978     axisdata = axisdata0;
1979 
1980     if (op_axes != NULL) {
1981         for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
1982             int i;
1983             npy_bool axis_flipped;
1984             npy_intp shape;
1985 
1986             /* Apply perm to get the original axis, and check if its flipped */
1987             i = npyiter_undo_iter_axis_perm(idim, ndim, perm, &axis_flipped);
1988 
1989             i = npyiter_get_op_axis(op_axes[i], NULL);
1990             assert(i < orig_op_ndim);
1991             if (i >= 0) {
1992                 shape = PyArray_DIM(op, i);
1993                 if (shape != 1) {
1994                     npy_intp stride = PyArray_STRIDE(op, i);
1995                     if (axis_flipped) {
1996                         NAD_STRIDES(axisdata)[iop] = -stride;
1997                         baseoffset += stride*(shape-1);
1998                     }
1999                     else {
2000                         NAD_STRIDES(axisdata)[iop] = stride;
2001                     }
2002                 }
2003             }
2004         }
2005     }
2006     else {
2007         for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
2008             int i;
2009             npy_bool axis_flipped;
2010             npy_intp shape;
2011 
2012             i = npyiter_undo_iter_axis_perm(
2013                     idim, orig_op_ndim, perm, &axis_flipped);
2014 
2015             if (i >= 0) {
2016                 shape = PyArray_DIM(op, i);
2017                 if (shape != 1) {
2018                     npy_intp stride = PyArray_STRIDE(op, i);
2019                     if (axis_flipped) {
2020                         NAD_STRIDES(axisdata)[iop] = -stride;
2021                         baseoffset += stride*(shape-1);
2022                     }
2023                     else {
2024                         NAD_STRIDES(axisdata)[iop] = stride;
2025                     }
2026                 }
2027             }
2028         }
2029     }
2030 
2031     op_dataptr += baseoffset;
2032 
2033     /* Now the base data pointer is calculated, set it everywhere it's needed */
2034     NIT_RESETDATAPTR(iter)[iop] = op_dataptr;
2035     NIT_BASEOFFSETS(iter)[iop] = baseoffset;
2036     axisdata = axisdata0;
2037     /* Fill at least one axisdata, for the 0-d case */
2038     NAD_PTRS(axisdata)[iop] = op_dataptr;
2039     NIT_ADVANCE_AXISDATA(axisdata, 1);
2040     for (idim = 1; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
2041         NAD_PTRS(axisdata)[iop] = op_dataptr;
2042     }
2043 }
2044 
2045 /*
2046  * Computes the iterator's index strides and initializes the index values
2047  * to zero.
2048  *
2049  * This must be called before the axes (i.e. the AXISDATA array) may
2050  * be reordered.
2051  */
2052 static void
npyiter_compute_index_strides(NpyIter * iter,npy_uint32 flags)2053 npyiter_compute_index_strides(NpyIter *iter, npy_uint32 flags)
2054 {
2055     npy_uint32 itflags = NIT_ITFLAGS(iter);
2056     int idim, ndim = NIT_NDIM(iter);
2057     int nop = NIT_NOP(iter);
2058 
2059     npy_intp indexstride;
2060     NpyIter_AxisData *axisdata;
2061     npy_intp sizeof_axisdata;
2062 
2063     /*
2064      * If there is only one element being iterated, we just have
2065      * to touch the first AXISDATA because nothing will ever be
2066      * incremented. This also initializes the data for the 0-d case.
2067      */
2068     if (NIT_ITERSIZE(iter) == 1) {
2069         if (itflags & NPY_ITFLAG_HASINDEX) {
2070             axisdata = NIT_AXISDATA(iter);
2071             NAD_PTRS(axisdata)[nop] = 0;
2072         }
2073         return;
2074     }
2075 
2076     if (flags & NPY_ITER_C_INDEX) {
2077         sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
2078         axisdata = NIT_AXISDATA(iter);
2079         indexstride = 1;
2080         for(idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
2081             npy_intp shape = NAD_SHAPE(axisdata);
2082 
2083             if (shape == 1) {
2084                 NAD_STRIDES(axisdata)[nop] = 0;
2085             }
2086             else {
2087                 NAD_STRIDES(axisdata)[nop] = indexstride;
2088             }
2089             NAD_PTRS(axisdata)[nop] = 0;
2090             indexstride *= shape;
2091         }
2092     }
2093     else if (flags & NPY_ITER_F_INDEX) {
2094         sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
2095         axisdata = NIT_INDEX_AXISDATA(NIT_AXISDATA(iter), ndim-1);
2096         indexstride = 1;
2097         for(idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, -1)) {
2098             npy_intp shape = NAD_SHAPE(axisdata);
2099 
2100             if (shape == 1) {
2101                 NAD_STRIDES(axisdata)[nop] = 0;
2102             }
2103             else {
2104                 NAD_STRIDES(axisdata)[nop] = indexstride;
2105             }
2106             NAD_PTRS(axisdata)[nop] = 0;
2107             indexstride *= shape;
2108         }
2109     }
2110 }
2111 
2112 /*
2113  * If the order is NPY_KEEPORDER, lets the iterator find the best
2114  * iteration order, otherwise forces it.  Indicates in the itflags that
2115  * whether the iteration order was forced.
2116  */
2117 static void
npyiter_apply_forced_iteration_order(NpyIter * iter,NPY_ORDER order)2118 npyiter_apply_forced_iteration_order(NpyIter *iter, NPY_ORDER order)
2119 {
2120     /*npy_uint32 itflags = NIT_ITFLAGS(iter);*/
2121     int ndim = NIT_NDIM(iter);
2122     int iop, nop = NIT_NOP(iter);
2123 
2124     switch (order) {
2125     case NPY_CORDER:
2126         NIT_ITFLAGS(iter) |= NPY_ITFLAG_FORCEDORDER;
2127         break;
2128     case NPY_FORTRANORDER:
2129         NIT_ITFLAGS(iter) |= NPY_ITFLAG_FORCEDORDER;
2130         /* Only need to actually do something if there is more than 1 dim */
2131         if (ndim > 1) {
2132             npyiter_reverse_axis_ordering(iter);
2133         }
2134         break;
2135     case NPY_ANYORDER:
2136         NIT_ITFLAGS(iter) |= NPY_ITFLAG_FORCEDORDER;
2137         /* Only need to actually do something if there is more than 1 dim */
2138         if (ndim > 1) {
2139             PyArrayObject **op = NIT_OPERANDS(iter);
2140             int forder = 1;
2141 
2142             /* Check that all the array inputs are fortran order */
2143             for (iop = 0; iop < nop; ++iop, ++op) {
2144                 if (*op && !PyArray_CHKFLAGS(*op, NPY_ARRAY_F_CONTIGUOUS)) {
2145                     forder = 0;
2146                     break;
2147                 }
2148             }
2149 
2150             if (forder) {
2151                 npyiter_reverse_axis_ordering(iter);
2152             }
2153         }
2154         break;
2155     case NPY_KEEPORDER:
2156         /* Don't set the forced order flag here... */
2157         break;
2158     }
2159 }
2160 
2161 /*
2162  * This function negates any strides in the iterator
2163  * which are negative.  When iterating more than one
2164  * object, it only flips strides when they are all
2165  * negative or zero.
2166  */
2167 static void
npyiter_flip_negative_strides(NpyIter * iter)2168 npyiter_flip_negative_strides(NpyIter *iter)
2169 {
2170     npy_uint32 itflags = NIT_ITFLAGS(iter);
2171     int idim, ndim = NIT_NDIM(iter);
2172     int iop, nop = NIT_NOP(iter);
2173 
2174     npy_intp istrides, nstrides = NAD_NSTRIDES();
2175     NpyIter_AxisData *axisdata, *axisdata0;
2176     npy_intp *baseoffsets;
2177     npy_intp sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
2178     int any_flipped = 0;
2179 
2180     axisdata0 = axisdata = NIT_AXISDATA(iter);
2181     baseoffsets = NIT_BASEOFFSETS(iter);
2182     for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
2183         npy_intp *strides = NAD_STRIDES(axisdata);
2184         int any_negative = 0;
2185 
2186         /*
2187          * Check the signs of all the operand strides.
2188          */
2189         for (iop = 0; iop < nop; ++iop) {
2190             if (strides[iop] < 0) {
2191                 any_negative = 1;
2192             }
2193             else if (strides[iop] != 0) {
2194                 break;
2195             }
2196         }
2197         /*
2198          * If at least one stride is negative and none are positive,
2199          * flip all the strides for this dimension.
2200          */
2201         if (any_negative && iop == nop) {
2202             npy_intp shapem1 = NAD_SHAPE(axisdata) - 1;
2203 
2204             for (istrides = 0; istrides < nstrides; ++istrides) {
2205                 npy_intp stride = strides[istrides];
2206 
2207                 /* Adjust the base pointers to start at the end */
2208                 baseoffsets[istrides] += shapem1 * stride;
2209                 /* Flip the stride */
2210                 strides[istrides] = -stride;
2211             }
2212             /*
2213              * Make the perm entry negative so get_multi_index
2214              * knows it's flipped
2215              */
2216             NIT_PERM(iter)[idim] = -1-NIT_PERM(iter)[idim];
2217 
2218             any_flipped = 1;
2219         }
2220     }
2221 
2222     /*
2223      * If any strides were flipped, the base pointers were adjusted
2224      * in the first AXISDATA, and need to be copied to all the rest
2225      */
2226     if (any_flipped) {
2227         char **resetdataptr = NIT_RESETDATAPTR(iter);
2228 
2229         for (istrides = 0; istrides < nstrides; ++istrides) {
2230             resetdataptr[istrides] += baseoffsets[istrides];
2231         }
2232         axisdata = axisdata0;
2233         for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
2234             char **ptrs = NAD_PTRS(axisdata);
2235             for (istrides = 0; istrides < nstrides; ++istrides) {
2236                 ptrs[istrides] = resetdataptr[istrides];
2237             }
2238         }
2239         /*
2240          * Indicate that some of the perm entries are negative,
2241          * and that it's not (strictly speaking) the identity perm.
2242          */
2243         NIT_ITFLAGS(iter) = (NIT_ITFLAGS(iter)|NPY_ITFLAG_NEGPERM) &
2244                             ~NPY_ITFLAG_IDENTPERM;
2245     }
2246 }
2247 
2248 static void
npyiter_reverse_axis_ordering(NpyIter * iter)2249 npyiter_reverse_axis_ordering(NpyIter *iter)
2250 {
2251     npy_uint32 itflags = NIT_ITFLAGS(iter);
2252     int ndim = NIT_NDIM(iter);
2253     int nop = NIT_NOP(iter);
2254 
2255     npy_intp i, temp, size;
2256     npy_intp *first, *last;
2257     npy_int8 *perm;
2258 
2259     size = NIT_AXISDATA_SIZEOF(itflags, ndim, nop)/NPY_SIZEOF_INTP;
2260     first = (npy_intp*)NIT_AXISDATA(iter);
2261     last = first + (ndim-1)*size;
2262 
2263     /* This loop reverses the order of the AXISDATA array */
2264     while (first < last) {
2265         for (i = 0; i < size; ++i) {
2266             temp = first[i];
2267             first[i] = last[i];
2268             last[i] = temp;
2269         }
2270         first += size;
2271         last -= size;
2272     }
2273 
2274     /* Store the perm we applied */
2275     perm = NIT_PERM(iter);
2276     for(i = ndim-1; i >= 0; --i, ++perm) {
2277         *perm = (npy_int8)i;
2278     }
2279 
2280     NIT_ITFLAGS(iter) &= ~NPY_ITFLAG_IDENTPERM;
2281 }
2282 
2283 static NPY_INLINE npy_intp
intp_abs(npy_intp x)2284 intp_abs(npy_intp x)
2285 {
2286     return (x < 0) ? -x : x;
2287 }
2288 
2289 static void
npyiter_find_best_axis_ordering(NpyIter * iter)2290 npyiter_find_best_axis_ordering(NpyIter *iter)
2291 {
2292     npy_uint32 itflags = NIT_ITFLAGS(iter);
2293     int idim, ndim = NIT_NDIM(iter);
2294     int iop, nop = NIT_NOP(iter);
2295 
2296     npy_intp ax_i0, ax_i1, ax_ipos;
2297     npy_int8 ax_j0, ax_j1;
2298     npy_int8 *perm;
2299     NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
2300     npy_intp sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
2301     int permuted = 0;
2302 
2303     perm = NIT_PERM(iter);
2304 
2305     /*
2306      * Do a custom stable insertion sort.  Note that because
2307      * the AXISDATA has been reversed from C order, this
2308      * is sorting from smallest stride to biggest stride.
2309      */
2310     for (ax_i0 = 1; ax_i0 < ndim; ++ax_i0) {
2311         npy_intp *strides0;
2312 
2313         /* 'ax_ipos' is where perm[ax_i0] will get inserted */
2314         ax_ipos = ax_i0;
2315         ax_j0 = perm[ax_i0];
2316 
2317         strides0 = NAD_STRIDES(NIT_INDEX_AXISDATA(axisdata, ax_j0));
2318         for (ax_i1 = ax_i0-1; ax_i1 >= 0; --ax_i1) {
2319             int ambig = 1, shouldswap = 0;
2320             npy_intp *strides1;
2321 
2322             ax_j1 = perm[ax_i1];
2323 
2324             strides1 = NAD_STRIDES(NIT_INDEX_AXISDATA(axisdata, ax_j1));
2325 
2326             for (iop = 0; iop < nop; ++iop) {
2327                 if (strides0[iop] != 0 && strides1[iop] != 0) {
2328                     if (intp_abs(strides1[iop]) <=
2329                                             intp_abs(strides0[iop])) {
2330                         /*
2331                          * Set swap even if it's not ambiguous already,
2332                          * because in the case of conflicts between
2333                          * different operands, C-order wins.
2334                          */
2335                         shouldswap = 0;
2336                     }
2337                     else {
2338                         /* Only set swap if it's still ambiguous */
2339                         if (ambig) {
2340                             shouldswap = 1;
2341                         }
2342                     }
2343 
2344                     /*
2345                      * A comparison has been done, so it's
2346                      * no longer ambiguous
2347                      */
2348                     ambig = 0;
2349                 }
2350             }
2351             /*
2352              * If the comparison was unambiguous, either shift
2353              * 'ax_ipos' to 'ax_i1' or stop looking for an insertion
2354              * point
2355              */
2356             if (!ambig) {
2357                 if (shouldswap) {
2358                     ax_ipos = ax_i1;
2359                 }
2360                 else {
2361                     break;
2362                 }
2363             }
2364         }
2365 
2366         /* Insert perm[ax_i0] into the right place */
2367         if (ax_ipos != ax_i0) {
2368             for (ax_i1 = ax_i0; ax_i1 > ax_ipos; --ax_i1) {
2369                 perm[ax_i1] = perm[ax_i1-1];
2370             }
2371             perm[ax_ipos] = ax_j0;
2372             permuted = 1;
2373         }
2374     }
2375 
2376     /* Apply the computed permutation to the AXISDATA array */
2377     if (permuted == 1) {
2378         npy_intp i, size = sizeof_axisdata/NPY_SIZEOF_INTP;
2379         NpyIter_AxisData *ad_i;
2380 
2381         /* Use the index as a flag, set each to 1 */
2382         ad_i = axisdata;
2383         for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(ad_i, 1)) {
2384             NAD_INDEX(ad_i) = 1;
2385         }
2386         /* Apply the permutation by following the cycles */
2387         for (idim = 0; idim < ndim; ++idim) {
2388             ad_i = NIT_INDEX_AXISDATA(axisdata, idim);
2389 
2390             /* If this axis hasn't been touched yet, process it */
2391             if (NAD_INDEX(ad_i) == 1) {
2392                 npy_int8 pidim = perm[idim];
2393                 npy_intp tmp;
2394                 NpyIter_AxisData *ad_p, *ad_q;
2395 
2396                 if (pidim != idim) {
2397                     /* Follow the cycle, copying the data */
2398                     for (i = 0; i < size; ++i) {
2399                         pidim = perm[idim];
2400                         ad_q = ad_i;
2401                         tmp = *((npy_intp*)ad_q + i);
2402                         while (pidim != idim) {
2403                             ad_p = NIT_INDEX_AXISDATA(axisdata, pidim);
2404                             *((npy_intp*)ad_q + i) = *((npy_intp*)ad_p + i);
2405 
2406                             ad_q = ad_p;
2407                             pidim = perm[(int)pidim];
2408                         }
2409                         *((npy_intp*)ad_q + i) = tmp;
2410                     }
2411                     /* Follow the cycle again, marking it as done */
2412                     pidim = perm[idim];
2413 
2414                     while (pidim != idim) {
2415                         NAD_INDEX(NIT_INDEX_AXISDATA(axisdata, pidim)) = 0;
2416                         pidim = perm[(int)pidim];
2417                     }
2418                 }
2419                 NAD_INDEX(ad_i) = 0;
2420             }
2421         }
2422         /* Clear the identity perm flag */
2423         NIT_ITFLAGS(iter) &= ~NPY_ITFLAG_IDENTPERM;
2424     }
2425 }
2426 
2427 /*
2428  * Calculates a dtype that all the types can be promoted to, using the
2429  * ufunc rules.  If only_inputs is 1, it leaves any operands that
2430  * are not read from out of the calculation.
2431  */
2432 static PyArray_Descr *
npyiter_get_common_dtype(int nop,PyArrayObject ** op,const npyiter_opitflags * op_itflags,PyArray_Descr ** op_dtype,PyArray_Descr ** op_request_dtypes,int only_inputs)2433 npyiter_get_common_dtype(int nop, PyArrayObject **op,
2434                         const npyiter_opitflags *op_itflags, PyArray_Descr **op_dtype,
2435                         PyArray_Descr **op_request_dtypes,
2436                         int only_inputs)
2437 {
2438     int iop;
2439     npy_intp narrs = 0, ndtypes = 0;
2440     PyArrayObject *arrs[NPY_MAXARGS];
2441     PyArray_Descr *dtypes[NPY_MAXARGS];
2442     PyArray_Descr *ret;
2443 
2444     NPY_IT_DBG_PRINT("Iterator: Getting a common data type from operands\n");
2445 
2446     for (iop = 0; iop < nop; ++iop) {
2447         if (op_dtype[iop] != NULL &&
2448                     (!only_inputs || (op_itflags[iop] & NPY_OP_ITFLAG_READ))) {
2449             /* If no dtype was requested and the op is a scalar, pass the op */
2450             if ((op_request_dtypes == NULL ||
2451                             op_request_dtypes[iop] == NULL) &&
2452                                             PyArray_NDIM(op[iop]) == 0) {
2453                 arrs[narrs++] = op[iop];
2454             }
2455             /* Otherwise just pass in the dtype */
2456             else {
2457                 dtypes[ndtypes++] = op_dtype[iop];
2458             }
2459         }
2460     }
2461 
2462     if (narrs == 0) {
2463         npy_intp i;
2464         ret = dtypes[0];
2465         for (i = 1; i < ndtypes; ++i) {
2466             if (ret != dtypes[i])
2467                 break;
2468         }
2469         if (i == ndtypes) {
2470             if (ndtypes == 1 || PyArray_ISNBO(ret->byteorder)) {
2471                 Py_INCREF(ret);
2472             }
2473             else {
2474                 ret = PyArray_DescrNewByteorder(ret, NPY_NATIVE);
2475             }
2476         }
2477         else {
2478             ret = PyArray_ResultType(narrs, arrs, ndtypes, dtypes);
2479         }
2480     }
2481     else {
2482         ret = PyArray_ResultType(narrs, arrs, ndtypes, dtypes);
2483     }
2484 
2485     return ret;
2486 }
2487 
2488 /*
2489  * Allocates a temporary array which can be used to replace op
2490  * in the iteration.  Its dtype will be op_dtype.
2491  *
2492  * The result array has a memory ordering which matches the iterator,
2493  * which may or may not match that of op.  The parameter 'shape' may be
2494  * NULL, in which case it is filled in from the iterator's shape.
2495  *
2496  * This function must be called before any axes are coalesced.
2497  */
2498 static PyArrayObject *
npyiter_new_temp_array(NpyIter * iter,PyTypeObject * subtype,npy_uint32 flags,npyiter_opitflags * op_itflags,int op_ndim,npy_intp const * shape,PyArray_Descr * op_dtype,const int * op_axes)2499 npyiter_new_temp_array(NpyIter *iter, PyTypeObject *subtype,
2500                 npy_uint32 flags, npyiter_opitflags *op_itflags,
2501                 int op_ndim, npy_intp const *shape,
2502                 PyArray_Descr *op_dtype, const int *op_axes)
2503 {
2504     npy_uint32 itflags = NIT_ITFLAGS(iter);
2505     int idim, ndim = NIT_NDIM(iter);
2506     int used_op_ndim;
2507     int nop = NIT_NOP(iter);
2508 
2509     npy_int8 *perm = NIT_PERM(iter);
2510     npy_intp new_shape[NPY_MAXDIMS], strides[NPY_MAXDIMS];
2511     npy_intp stride = op_dtype->elsize;
2512     NpyIter_AxisData *axisdata;
2513     npy_intp sizeof_axisdata;
2514     int i;
2515 
2516     PyArrayObject *ret;
2517 
2518     /*
2519      * There is an interaction with array-dtypes here, which
2520      * generally works. Let's say you make an nditer with an
2521      * output dtype of a 2-double array. All-scalar inputs
2522      * will result in a 1-dimensional output with shape (2).
2523      * Everything still works out in the nditer, because the
2524      * new dimension is always added on the end, and it cares
2525      * about what happens at the beginning.
2526      */
2527 
2528     /* If it's a scalar, don't need to check the axes */
2529     if (op_ndim == 0) {
2530         Py_INCREF(op_dtype);
2531         ret = (PyArrayObject *)PyArray_NewFromDescr(subtype, op_dtype, 0,
2532                                NULL, NULL, NULL, 0, NULL);
2533 
2534         return ret;
2535     }
2536 
2537     axisdata = NIT_AXISDATA(iter);
2538     sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
2539 
2540     /* Initialize the strides to invalid values */
2541     for (i = 0; i < op_ndim; ++i) {
2542         strides[i] = NPY_MAX_INTP;
2543     }
2544 
2545     if (op_axes != NULL) {
2546         used_op_ndim = 0;
2547         for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
2548             npy_bool reduction_axis;
2549 
2550             /* Apply the perm to get the original axis */
2551             i = npyiter_undo_iter_axis_perm(idim, ndim, perm, NULL);
2552             i = npyiter_get_op_axis(op_axes[i], &reduction_axis);
2553 
2554             if (i >= 0) {
2555                 NPY_IT_DBG_PRINT3("Iterator: Setting allocated stride %d "
2556                                     "for iterator dimension %d to %d\n", (int)i,
2557                                     (int)idim, (int)stride);
2558                 used_op_ndim += 1;
2559                 strides[i] = stride;
2560                 if (shape == NULL) {
2561                     if (reduction_axis) {
2562                         /* reduction axes always have a length of 1 */
2563                         new_shape[i] = 1;
2564                     }
2565                     else {
2566                         new_shape[i] = NAD_SHAPE(axisdata);
2567                     }
2568                     stride *= new_shape[i];
2569                     if (i >= ndim) {
2570                         PyErr_Format(PyExc_ValueError,
2571                                 "automatically allocated output array "
2572                                 "specified with an inconsistent axis mapping; "
2573                                 "the axis mapping cannot include dimension %d "
2574                                 "which is too large for the iterator dimension "
2575                                 "of %d.", i, ndim);
2576                         return NULL;
2577                     }
2578                 }
2579                 else {
2580                     assert(!reduction_axis || shape[i] == 1);
2581                     stride *= shape[i];
2582                 }
2583             }
2584             else {
2585                 if (shape == NULL) {
2586                     /*
2587                      * If deleting this axis produces a reduction, but
2588                      * reduction wasn't enabled, throw an error.
2589                      * NOTE: We currently always allow new-axis if the iteration
2590                      *       size is 1 (thus allowing broadcasting sometimes).
2591                      */
2592                     if (!reduction_axis && NAD_SHAPE(axisdata) != 1) {
2593                         if (!npyiter_check_reduce_ok_and_set_flags(
2594                                 iter, flags, op_itflags, i)) {
2595                             return NULL;
2596                         }
2597                     }
2598                 }
2599             }
2600         }
2601     }
2602     else {
2603         used_op_ndim = ndim;
2604         for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
2605             /* Apply the perm to get the original axis */
2606             i = npyiter_undo_iter_axis_perm(idim, op_ndim, perm, NULL);
2607 
2608             if (i >= 0) {
2609                 NPY_IT_DBG_PRINT3("Iterator: Setting allocated stride %d "
2610                                     "for iterator dimension %d to %d\n", (int)i,
2611                                     (int)idim, (int)stride);
2612                 strides[i] = stride;
2613                 if (shape == NULL) {
2614                     new_shape[i] = NAD_SHAPE(axisdata);
2615                     stride *= new_shape[i];
2616                 }
2617                 else {
2618                     stride *= shape[i];
2619                 }
2620             }
2621         }
2622     }
2623 
2624     if (shape == NULL) {
2625         /* If shape was NULL, use the shape we calculated */
2626         op_ndim = used_op_ndim;
2627         shape = new_shape;
2628         /*
2629          * If there's a gap in the array's dimensions, it's an error.
2630          * For instance, if op_axes [0, 2] is specified, there will a place
2631          * in the strides array where the value is not set.
2632          */
2633         for (i = 0; i < op_ndim; i++) {
2634             if (strides[i] == NPY_MAX_INTP) {
2635                 PyErr_Format(PyExc_ValueError,
2636                         "automatically allocated output array "
2637                         "specified with an inconsistent axis mapping; "
2638                         "the axis mapping is missing an entry for "
2639                         "dimension %d.", i);
2640                 return NULL;
2641             }
2642         }
2643     }
2644     else if (used_op_ndim < op_ndim) {
2645         /*
2646          * If custom axes were specified, some dimensions may not have
2647          * been used. These are additional axes which are ignored in the
2648          * iterator but need to be handled here.
2649          */
2650         npy_intp factor, itemsize, new_strides[NPY_MAXDIMS];
2651 
2652         /* Fill in the missing strides in C order */
2653         factor = 1;
2654         itemsize = op_dtype->elsize;
2655         for (i = op_ndim-1; i >= 0; --i) {
2656             if (strides[i] == NPY_MAX_INTP) {
2657                 new_strides[i] = factor * itemsize;
2658                 factor *= shape[i];
2659             }
2660         }
2661 
2662         /*
2663          * Copy the missing strides, and multiply the existing strides
2664          * by the calculated factor.  This way, the missing strides
2665          * are tighter together in memory, which is good for nested
2666          * loops.
2667          */
2668         for (i = 0; i < op_ndim; ++i) {
2669             if (strides[i] == NPY_MAX_INTP) {
2670                 strides[i] = new_strides[i];
2671             }
2672             else {
2673                 strides[i] *= factor;
2674             }
2675         }
2676     }
2677 
2678     /* Allocate the temporary array */
2679     Py_INCREF(op_dtype);
2680     ret = (PyArrayObject *)PyArray_NewFromDescr(subtype, op_dtype, op_ndim,
2681                                shape, strides, NULL, 0, NULL);
2682     if (ret == NULL) {
2683         return NULL;
2684     }
2685 
2686     /* Double-check that the subtype didn't mess with the dimensions */
2687     if (subtype != &PyArray_Type) {
2688         /*
2689          * TODO: the dtype could have a subarray, which adds new dimensions
2690          *       to `ret`, that should typically be fine, but will break
2691          *       in this branch.
2692          */
2693         if (PyArray_NDIM(ret) != op_ndim ||
2694                     !PyArray_CompareLists(shape, PyArray_DIMS(ret), op_ndim)) {
2695             PyErr_SetString(PyExc_RuntimeError,
2696                     "Iterator automatic output has an array subtype "
2697                     "which changed the dimensions of the output");
2698             Py_DECREF(ret);
2699             return NULL;
2700         }
2701     }
2702 
2703     return ret;
2704 }
2705 
2706 static int
npyiter_allocate_arrays(NpyIter * iter,npy_uint32 flags,PyArray_Descr ** op_dtype,PyTypeObject * subtype,const npy_uint32 * op_flags,npyiter_opitflags * op_itflags,int ** op_axes)2707 npyiter_allocate_arrays(NpyIter *iter,
2708                         npy_uint32 flags,
2709                         PyArray_Descr **op_dtype, PyTypeObject *subtype,
2710                         const npy_uint32 *op_flags, npyiter_opitflags *op_itflags,
2711                         int **op_axes)
2712 {
2713     npy_uint32 itflags = NIT_ITFLAGS(iter);
2714     int idim, ndim = NIT_NDIM(iter);
2715     int iop, nop = NIT_NOP(iter);
2716 
2717     int check_writemasked_reductions = 0;
2718 
2719     NpyIter_BufferData *bufferdata = NULL;
2720     PyArrayObject **op = NIT_OPERANDS(iter);
2721 
2722     if (itflags & NPY_ITFLAG_BUFFER) {
2723         bufferdata = NIT_BUFFERDATA(iter);
2724     }
2725 
2726     if (flags & NPY_ITER_COPY_IF_OVERLAP) {
2727         /*
2728          * Perform operand memory overlap checks, if requested.
2729          *
2730          * If any write operand has memory overlap with any read operand,
2731          * eliminate all overlap by making temporary copies, by enabling
2732          * NPY_OP_ITFLAG_FORCECOPY for the write operand to force WRITEBACKIFCOPY.
2733          *
2734          * Operands with NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE enabled are not
2735          * considered overlapping if the arrays are exactly the same. In this
2736          * case, the iterator loops through them in the same order element by
2737          * element.  (As usual, the user-provided inner loop is assumed to be
2738          * able to deal with this level of simple aliasing.)
2739          */
2740         for (iop = 0; iop < nop; ++iop) {
2741             int may_share_memory = 0;
2742             int iother;
2743 
2744             if (op[iop] == NULL) {
2745                 /* Iterator will always allocate */
2746                 continue;
2747             }
2748 
2749             if (!(op_itflags[iop] & NPY_OP_ITFLAG_WRITE)) {
2750                 /*
2751                  * Copy output operands only, not inputs.
2752                  * A more sophisticated heuristic could be
2753                  * substituted here later.
2754                  */
2755                 continue;
2756             }
2757 
2758             for (iother = 0; iother < nop; ++iother) {
2759                 if (iother == iop || op[iother] == NULL) {
2760                     continue;
2761                 }
2762 
2763                 if (!(op_itflags[iother] & NPY_OP_ITFLAG_READ)) {
2764                     /* No data dependence for arrays not read from */
2765                     continue;
2766                 }
2767 
2768                 if (op_itflags[iother] & NPY_OP_ITFLAG_FORCECOPY) {
2769                     /* Already copied */
2770                     continue;
2771                 }
2772 
2773                 /*
2774                  * If the arrays are views to exactly the same data, no need
2775                  * to make copies, if the caller (eg ufunc) says it accesses
2776                  * data only in the iterator order.
2777                  *
2778                  * However, if there is internal overlap (e.g. a zero stride on
2779                  * a non-unit dimension), a copy cannot be avoided.
2780                  */
2781                 if ((op_flags[iop] & NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE) &&
2782                     (op_flags[iother] & NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE) &&
2783                     PyArray_BYTES(op[iop]) == PyArray_BYTES(op[iother]) &&
2784                     PyArray_NDIM(op[iop]) == PyArray_NDIM(op[iother]) &&
2785                     PyArray_CompareLists(PyArray_DIMS(op[iop]),
2786                                          PyArray_DIMS(op[iother]),
2787                                          PyArray_NDIM(op[iop])) &&
2788                     PyArray_CompareLists(PyArray_STRIDES(op[iop]),
2789                                          PyArray_STRIDES(op[iother]),
2790                                          PyArray_NDIM(op[iop])) &&
2791                     PyArray_DESCR(op[iop]) == PyArray_DESCR(op[iother]) &&
2792                     solve_may_have_internal_overlap(op[iop], 1) == 0) {
2793 
2794                     continue;
2795                 }
2796 
2797                 /*
2798                  * Use max work = 1. If the arrays are large, it might
2799                  * make sense to go further.
2800                  */
2801                 may_share_memory = solve_may_share_memory(op[iop],
2802                                                           op[iother],
2803                                                           1);
2804 
2805                 if (may_share_memory) {
2806                     op_itflags[iop] |= NPY_OP_ITFLAG_FORCECOPY;
2807                     break;
2808                 }
2809             }
2810         }
2811     }
2812 
2813     for (iop = 0; iop < nop; ++iop) {
2814         /*
2815          * Check whether there are any WRITEMASKED REDUCE operands
2816          * which should be validated after all the strides are filled
2817          * in.
2818          */
2819         if ((op_itflags[iop] &
2820                 (NPY_OP_ITFLAG_WRITEMASKED | NPY_OP_ITFLAG_REDUCE)) ==
2821                         (NPY_OP_ITFLAG_WRITEMASKED | NPY_OP_ITFLAG_REDUCE)) {
2822             check_writemasked_reductions = 1;
2823         }
2824 
2825         /* NULL means an output the iterator should allocate */
2826         if (op[iop] == NULL) {
2827             PyArrayObject *out;
2828             PyTypeObject *op_subtype;
2829 
2830             /* Check whether the subtype was disabled */
2831             op_subtype = (op_flags[iop] & NPY_ITER_NO_SUBTYPE) ?
2832                                                 &PyArray_Type : subtype;
2833 
2834             /*
2835              * Allocate the output array.
2836              *
2837              * Note that here, ndim is always correct if no op_axes was given
2838              * (but the actual dimension of op can be larger). If op_axes
2839              * is given, ndim is not actually used.
2840              */
2841             out = npyiter_new_temp_array(iter, op_subtype,
2842                                         flags, &op_itflags[iop],
2843                                         ndim,
2844                                         NULL,
2845                                         op_dtype[iop],
2846                                         op_axes ? op_axes[iop] : NULL);
2847             if (out == NULL) {
2848                 return 0;
2849             }
2850 
2851             op[iop] = out;
2852 
2853             /*
2854              * Now we need to replace the pointers and strides with values
2855              * from the new array.
2856              */
2857             npyiter_replace_axisdata(iter, iop, op[iop], ndim,
2858                     op_axes ? op_axes[iop] : NULL);
2859 
2860             /*
2861              * New arrays are guaranteed true-aligned, but copy/cast code
2862              * needs uint-alignment in addition.
2863              */
2864             if (IsUintAligned(out)) {
2865                 op_itflags[iop] |= NPY_OP_ITFLAG_ALIGNED;
2866             }
2867             /* New arrays need no cast */
2868             op_itflags[iop] &= ~NPY_OP_ITFLAG_CAST;
2869         }
2870         /*
2871          * If casting is required, the operand is read-only, and
2872          * it's an array scalar, make a copy whether or not the
2873          * copy flag is enabled.
2874          */
2875         else if ((op_itflags[iop] & (NPY_OP_ITFLAG_CAST |
2876                          NPY_OP_ITFLAG_READ |
2877                          NPY_OP_ITFLAG_WRITE)) == (NPY_OP_ITFLAG_CAST |
2878                                                    NPY_OP_ITFLAG_READ) &&
2879                           PyArray_NDIM(op[iop]) == 0) {
2880             PyArrayObject *temp;
2881             Py_INCREF(op_dtype[iop]);
2882             temp = (PyArrayObject *)PyArray_NewFromDescr(
2883                                         &PyArray_Type, op_dtype[iop],
2884                                         0, NULL, NULL, NULL, 0, NULL);
2885             if (temp == NULL) {
2886                 return 0;
2887             }
2888             if (PyArray_CopyInto(temp, op[iop]) != 0) {
2889                 Py_DECREF(temp);
2890                 return 0;
2891             }
2892             Py_DECREF(op[iop]);
2893             op[iop] = temp;
2894 
2895             /*
2896              * Now we need to replace the pointers and strides with values
2897              * from the temporary array.
2898              */
2899             npyiter_replace_axisdata(iter, iop, op[iop], 0, NULL);
2900 
2901             /*
2902              * New arrays are guaranteed true-aligned, but copy/cast code
2903              * needs uint-alignment in addition.
2904              */
2905             if (IsUintAligned(temp)) {
2906                 op_itflags[iop] |= NPY_OP_ITFLAG_ALIGNED;
2907             }
2908             /*
2909              * New arrays need no cast, and in the case
2910              * of scalars, always have stride 0 so never need buffering
2911              */
2912             op_itflags[iop] |= NPY_OP_ITFLAG_BUFNEVER;
2913             op_itflags[iop] &= ~NPY_OP_ITFLAG_CAST;
2914             if (itflags & NPY_ITFLAG_BUFFER) {
2915                 NBF_STRIDES(bufferdata)[iop] = 0;
2916             }
2917         }
2918         /*
2919          * Make a temporary copy if,
2920          * 1. If casting is required and permitted, or,
2921          * 2. If force-copy is requested
2922          */
2923         else if (((op_itflags[iop] & NPY_OP_ITFLAG_CAST) &&
2924                         (op_flags[iop] &
2925                         (NPY_ITER_COPY|NPY_ITER_UPDATEIFCOPY))) ||
2926                  (op_itflags[iop] & NPY_OP_ITFLAG_FORCECOPY)) {
2927             PyArrayObject *temp;
2928             int ondim = PyArray_NDIM(op[iop]);
2929 
2930             /* Allocate the temporary array, if possible */
2931             temp = npyiter_new_temp_array(iter, &PyArray_Type,
2932                                         flags, &op_itflags[iop],
2933                                         ondim,
2934                                         PyArray_DIMS(op[iop]),
2935                                         op_dtype[iop],
2936                                         op_axes ? op_axes[iop] : NULL);
2937             if (temp == NULL) {
2938                 return 0;
2939             }
2940 
2941             /*
2942              * If the data will be read, copy it into temp.
2943              * TODO: It might be possible to do a view into
2944              *       op[iop]'s mask instead here.
2945              */
2946             if (op_itflags[iop] & NPY_OP_ITFLAG_READ) {
2947                 if (PyArray_CopyInto(temp, op[iop]) != 0) {
2948                     Py_DECREF(temp);
2949                     return 0;
2950                 }
2951             }
2952             /* If the data will be written to, set WRITEBACKIFCOPY
2953                and require a context manager */
2954             if (op_itflags[iop] & NPY_OP_ITFLAG_WRITE) {
2955                 Py_INCREF(op[iop]);
2956                 if (PyArray_SetWritebackIfCopyBase(temp, op[iop]) < 0) {
2957                     Py_DECREF(temp);
2958                     return 0;
2959                 }
2960                 op_itflags[iop] |= NPY_OP_ITFLAG_HAS_WRITEBACK;
2961             }
2962 
2963             Py_DECREF(op[iop]);
2964             op[iop] = temp;
2965 
2966             /*
2967              * Now we need to replace the pointers and strides with values
2968              * from the temporary array.
2969              */
2970             npyiter_replace_axisdata(iter, iop, op[iop], ondim,
2971                     op_axes ? op_axes[iop] : NULL);
2972 
2973             /*
2974              * New arrays are guaranteed true-aligned, but copy/cast code
2975              * additionally needs uint-alignment in addition.
2976              */
2977             if (IsUintAligned(temp)) {
2978                 op_itflags[iop] |= NPY_OP_ITFLAG_ALIGNED;
2979             }
2980             /* The temporary copy needs no cast */
2981             op_itflags[iop] &= ~NPY_OP_ITFLAG_CAST;
2982         }
2983         else {
2984             /*
2985              * Buffering must be enabled for casting/conversion if copy
2986              * wasn't specified.
2987              */
2988             if ((op_itflags[iop] & NPY_OP_ITFLAG_CAST) &&
2989                                   !(itflags & NPY_ITFLAG_BUFFER)) {
2990                 PyErr_SetString(PyExc_TypeError,
2991                         "Iterator operand required copying or buffering, "
2992                         "but neither copying nor buffering was enabled");
2993                 return 0;
2994             }
2995 
2996             /*
2997              * If the operand is aligned, any buffering can use aligned
2998              * optimizations.
2999              */
3000             if (IsUintAligned(op[iop])) {
3001                 op_itflags[iop] |= NPY_OP_ITFLAG_ALIGNED;
3002             }
3003         }
3004 
3005         /* Here we can finally check for contiguous iteration */
3006         if (op_flags[iop] & NPY_ITER_CONTIG) {
3007             NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
3008             npy_intp stride = NAD_STRIDES(axisdata)[iop];
3009 
3010             if (stride != op_dtype[iop]->elsize) {
3011                 NPY_IT_DBG_PRINT("Iterator: Setting NPY_OP_ITFLAG_CAST "
3012                                     "because of NPY_ITER_CONTIG\n");
3013                 op_itflags[iop] |= NPY_OP_ITFLAG_CAST;
3014                 if (!(itflags & NPY_ITFLAG_BUFFER)) {
3015                     PyErr_SetString(PyExc_TypeError,
3016                             "Iterator operand required buffering, "
3017                             "to be contiguous as requested, but "
3018                             "buffering is not enabled");
3019                     return 0;
3020                 }
3021             }
3022         }
3023 
3024         /*
3025          * If no alignment, byte swap, or casting is needed,
3026          * the inner stride of this operand works for the whole
3027          * array, we can set NPY_OP_ITFLAG_BUFNEVER.
3028          */
3029         if ((itflags & NPY_ITFLAG_BUFFER) &&
3030                                 !(op_itflags[iop] & NPY_OP_ITFLAG_CAST)) {
3031             NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
3032             if (ndim <= 1) {
3033                 op_itflags[iop] |= NPY_OP_ITFLAG_BUFNEVER;
3034                 NBF_STRIDES(bufferdata)[iop] = NAD_STRIDES(axisdata)[iop];
3035             }
3036             else if (PyArray_NDIM(op[iop]) > 0) {
3037                 npy_intp stride, shape, innerstride = 0, innershape;
3038                 npy_intp sizeof_axisdata =
3039                                     NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
3040                 /* Find stride of the first non-empty shape */
3041                 for (idim = 0; idim < ndim; ++idim) {
3042                     innershape = NAD_SHAPE(axisdata);
3043                     if (innershape != 1) {
3044                         innerstride = NAD_STRIDES(axisdata)[iop];
3045                         break;
3046                     }
3047                     NIT_ADVANCE_AXISDATA(axisdata, 1);
3048                 }
3049                 ++idim;
3050                 NIT_ADVANCE_AXISDATA(axisdata, 1);
3051                 /* Check that everything could have coalesced together */
3052                 for (; idim < ndim; ++idim) {
3053                     stride = NAD_STRIDES(axisdata)[iop];
3054                     shape = NAD_SHAPE(axisdata);
3055                     if (shape != 1) {
3056                         /*
3057                          * If N times the inner stride doesn't equal this
3058                          * stride, the multi-dimensionality is needed.
3059                          */
3060                         if (innerstride*innershape != stride) {
3061                             break;
3062                         }
3063                         else {
3064                             innershape *= shape;
3065                         }
3066                     }
3067                     NIT_ADVANCE_AXISDATA(axisdata, 1);
3068                 }
3069                 /*
3070                  * If we looped all the way to the end, one stride works.
3071                  * Set that stride, because it may not belong to the first
3072                  * dimension.
3073                  */
3074                 if (idim == ndim) {
3075                     op_itflags[iop] |= NPY_OP_ITFLAG_BUFNEVER;
3076                     NBF_STRIDES(bufferdata)[iop] = innerstride;
3077                 }
3078             }
3079         }
3080     }
3081 
3082     if (check_writemasked_reductions) {
3083         for (iop = 0; iop < nop; ++iop) {
3084             /*
3085              * Check whether there are any WRITEMASKED REDUCE operands
3086              * which should be validated now that all the strides are filled
3087              * in.
3088              */
3089             if ((op_itflags[iop] &
3090                     (NPY_OP_ITFLAG_WRITEMASKED | NPY_OP_ITFLAG_REDUCE)) ==
3091                         (NPY_OP_ITFLAG_WRITEMASKED | NPY_OP_ITFLAG_REDUCE)) {
3092                 /*
3093                  * If the ARRAYMASK has 'bigger' dimensions
3094                  * than this REDUCE WRITEMASKED operand,
3095                  * the result would be more than one mask
3096                  * value per reduction element, something which
3097                  * is invalid. This function provides validation
3098                  * for that.
3099                  */
3100                 if (!check_mask_for_writemasked_reduction(iter, iop)) {
3101                     return 0;
3102                 }
3103             }
3104         }
3105     }
3106 
3107     return 1;
3108 }
3109 
3110 /*
3111  * The __array_priority__ attribute of the inputs determines
3112  * the subtype of any output arrays.  This function finds the
3113  * subtype of the input array with highest priority.
3114  */
3115 static void
npyiter_get_priority_subtype(int nop,PyArrayObject ** op,const npyiter_opitflags * op_itflags,double * subtype_priority,PyTypeObject ** subtype)3116 npyiter_get_priority_subtype(int nop, PyArrayObject **op,
3117                             const npyiter_opitflags *op_itflags,
3118                             double *subtype_priority,
3119                             PyTypeObject **subtype)
3120 {
3121     int iop;
3122 
3123     for (iop = 0; iop < nop; ++iop) {
3124         if (op[iop] != NULL && op_itflags[iop] & NPY_OP_ITFLAG_READ) {
3125             double priority = PyArray_GetPriority((PyObject *)op[iop], 0.0);
3126             if (priority > *subtype_priority) {
3127                 *subtype_priority = priority;
3128                 *subtype = Py_TYPE(op[iop]);
3129             }
3130         }
3131     }
3132 }
3133 
3134 static int
npyiter_allocate_transfer_functions(NpyIter * iter)3135 npyiter_allocate_transfer_functions(NpyIter *iter)
3136 {
3137     npy_uint32 itflags = NIT_ITFLAGS(iter);
3138     /*int ndim = NIT_NDIM(iter);*/
3139     int iop = 0, nop = NIT_NOP(iter);
3140 
3141     npy_intp i;
3142     npyiter_opitflags *op_itflags = NIT_OPITFLAGS(iter);
3143     NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter);
3144     NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
3145     PyArrayObject **op = NIT_OPERANDS(iter);
3146     PyArray_Descr **op_dtype = NIT_DTYPES(iter);
3147     npy_intp *strides = NAD_STRIDES(axisdata), op_stride;
3148     PyArray_StridedUnaryOp **readtransferfn = NBF_READTRANSFERFN(bufferdata),
3149                         **writetransferfn = NBF_WRITETRANSFERFN(bufferdata);
3150     NpyAuxData **readtransferdata = NBF_READTRANSFERDATA(bufferdata),
3151                **writetransferdata = NBF_WRITETRANSFERDATA(bufferdata);
3152 
3153     PyArray_StridedUnaryOp *stransfer = NULL;
3154     NpyAuxData *transferdata = NULL;
3155     int needs_api = 0;
3156 
3157     for (iop = 0; iop < nop; ++iop) {
3158         npyiter_opitflags flags = op_itflags[iop];
3159         /*
3160          * Reduction operands may be buffered with a different stride,
3161          * so we must pass NPY_MAX_INTP to the transfer function factory.
3162          */
3163         op_stride = (flags & NPY_OP_ITFLAG_REDUCE) ? NPY_MAX_INTP :
3164                                                    strides[iop];
3165 
3166         /*
3167          * If we have determined that a buffer may be needed,
3168          * allocate the appropriate transfer functions
3169          */
3170         if (!(flags & NPY_OP_ITFLAG_BUFNEVER)) {
3171             if (flags & NPY_OP_ITFLAG_READ) {
3172                 int move_references = 0;
3173                 if (PyArray_GetDTypeTransferFunction(
3174                                         (flags & NPY_OP_ITFLAG_ALIGNED) != 0,
3175                                         op_stride,
3176                                         op_dtype[iop]->elsize,
3177                                         PyArray_DESCR(op[iop]),
3178                                         op_dtype[iop],
3179                                         move_references,
3180                                         &stransfer,
3181                                         &transferdata,
3182                                         &needs_api) != NPY_SUCCEED) {
3183                     iop -= 1;  /* This one cannot be cleaned up yet. */
3184                     goto fail;
3185                 }
3186                 readtransferfn[iop] = stransfer;
3187                 readtransferdata[iop] = transferdata;
3188             }
3189             else {
3190                 readtransferfn[iop] = NULL;
3191             }
3192             if (flags & NPY_OP_ITFLAG_WRITE) {
3193                 int move_references = 1;
3194 
3195                 /* If the operand is WRITEMASKED, use a masked transfer fn */
3196                 if (flags & NPY_OP_ITFLAG_WRITEMASKED) {
3197                     int maskop = NIT_MASKOP(iter);
3198                     PyArray_Descr *mask_dtype = PyArray_DESCR(op[maskop]);
3199 
3200                     /*
3201                      * If the mask's stride is contiguous, use it, otherwise
3202                      * the mask may or may not be buffered, so the stride
3203                      * could be inconsistent.
3204                      */
3205                     if (PyArray_GetMaskedDTypeTransferFunction(
3206                                 (flags & NPY_OP_ITFLAG_ALIGNED) != 0,
3207                                 op_dtype[iop]->elsize,
3208                                 op_stride,
3209                                 (strides[maskop] == mask_dtype->elsize) ?
3210                                                 mask_dtype->elsize :
3211                                                 NPY_MAX_INTP,
3212                                 op_dtype[iop],
3213                                 PyArray_DESCR(op[iop]),
3214                                 mask_dtype,
3215                                 move_references,
3216                                 (PyArray_MaskedStridedUnaryOp **)&stransfer,
3217                                 &transferdata,
3218                                 &needs_api) != NPY_SUCCEED) {
3219                         goto fail;
3220                     }
3221                 }
3222                 else {
3223                     if (PyArray_GetDTypeTransferFunction(
3224                                         (flags & NPY_OP_ITFLAG_ALIGNED) != 0,
3225                                         op_dtype[iop]->elsize,
3226                                         op_stride,
3227                                         op_dtype[iop],
3228                                         PyArray_DESCR(op[iop]),
3229                                         move_references,
3230                                         &stransfer,
3231                                         &transferdata,
3232                                         &needs_api) != NPY_SUCCEED) {
3233                         goto fail;
3234                     }
3235                 }
3236                 writetransferfn[iop] = stransfer;
3237                 writetransferdata[iop] = transferdata;
3238             }
3239             /* If no write back but there are references make a decref fn */
3240             else if (PyDataType_REFCHK(op_dtype[iop])) {
3241                 /*
3242                  * By passing NULL to dst_type and setting move_references
3243                  * to 1, we get back a function that just decrements the
3244                  * src references.
3245                  */
3246                 if (PyArray_GetDTypeTransferFunction(
3247                                         (flags & NPY_OP_ITFLAG_ALIGNED) != 0,
3248                                         op_dtype[iop]->elsize, 0,
3249                                         op_dtype[iop], NULL,
3250                                         1,
3251                                         &stransfer,
3252                                         &transferdata,
3253                                         &needs_api) != NPY_SUCCEED) {
3254                     goto fail;
3255                 }
3256                 writetransferfn[iop] = stransfer;
3257                 writetransferdata[iop] = transferdata;
3258             }
3259             else {
3260                 writetransferfn[iop] = NULL;
3261             }
3262         }
3263         else {
3264             readtransferfn[iop] = NULL;
3265             writetransferfn[iop] = NULL;
3266         }
3267     }
3268 
3269     /* If any of the dtype transfer functions needed the API, flag it */
3270     if (needs_api) {
3271         NIT_ITFLAGS(iter) |= NPY_ITFLAG_NEEDSAPI;
3272     }
3273 
3274     return 1;
3275 
3276 fail:
3277     for (i = 0; i < iop+1; ++i) {
3278         if (readtransferdata[iop] != NULL) {
3279             NPY_AUXDATA_FREE(readtransferdata[iop]);
3280             readtransferdata[iop] = NULL;
3281         }
3282         if (writetransferdata[iop] != NULL) {
3283             NPY_AUXDATA_FREE(writetransferdata[iop]);
3284             writetransferdata[iop] = NULL;
3285         }
3286     }
3287     return 0;
3288 }
3289 
3290 #undef NPY_ITERATOR_IMPLEMENTATION_CODE
3291