1 /*
2 * This file implements the construction, copying, and destruction
3 * aspects of NumPy's nditer.
4 *
5 * Copyright (c) 2010-2011 by Mark Wiebe (mwwiebe@gmail.com)
6 * The University of British Columbia
7 *
8 * Copyright (c) 2011 Enthought, Inc
9 *
10 * See LICENSE.txt for the license.
11 */
12 #define NPY_NO_DEPRECATED_API NPY_API_VERSION
13
14 /* Indicate that this .c file is allowed to include the header */
15 #define NPY_ITERATOR_IMPLEMENTATION_CODE
16 #include "nditer_impl.h"
17
18 #include "arrayobject.h"
19 #include "array_coercion.h"
20 #include "templ_common.h"
21 #include "array_assign.h"
22
23 /* Internal helper functions private to this file */
24 static int
25 npyiter_check_global_flags(npy_uint32 flags, npy_uint32* itflags);
26 static int
27 npyiter_check_op_axes(int nop, int oa_ndim, int **op_axes,
28 const npy_intp *itershape);
29 static int
30 npyiter_calculate_ndim(int nop, PyArrayObject **op_in,
31 int oa_ndim);
32 static int
33 npyiter_check_per_op_flags(npy_uint32 flags, npyiter_opitflags *op_itflags);
34 static int
35 npyiter_prepare_one_operand(PyArrayObject **op,
36 char **op_dataptr,
37 PyArray_Descr *op_request_dtype,
38 PyArray_Descr** op_dtype,
39 npy_uint32 flags,
40 npy_uint32 op_flags, npyiter_opitflags *op_itflags);
41 static int
42 npyiter_prepare_operands(int nop,
43 PyArrayObject **op_in,
44 PyArrayObject **op,
45 char **op_dataptr,
46 PyArray_Descr **op_request_dtypes,
47 PyArray_Descr **op_dtype,
48 npy_uint32 flags,
49 npy_uint32 *op_flags, npyiter_opitflags *op_itflags,
50 npy_int8 *out_maskop);
51 static int
52 npyiter_check_casting(int nop, PyArrayObject **op,
53 PyArray_Descr **op_dtype,
54 NPY_CASTING casting,
55 npyiter_opitflags *op_itflags);
56 static int
57 npyiter_fill_axisdata(NpyIter *iter, npy_uint32 flags, npyiter_opitflags *op_itflags,
58 char **op_dataptr,
59 const npy_uint32 *op_flags, int **op_axes,
60 npy_intp const *itershape);
61 static NPY_INLINE int
62 npyiter_get_op_axis(int axis, npy_bool *reduction_axis);
63 static void
64 npyiter_replace_axisdata(
65 NpyIter *iter, int iop, PyArrayObject *op,
66 int orig_op_ndim, const int *op_axes);
67 static void
68 npyiter_compute_index_strides(NpyIter *iter, npy_uint32 flags);
69 static void
70 npyiter_apply_forced_iteration_order(NpyIter *iter, NPY_ORDER order);
71 static void
72 npyiter_flip_negative_strides(NpyIter *iter);
73 static void
74 npyiter_reverse_axis_ordering(NpyIter *iter);
75 static void
76 npyiter_find_best_axis_ordering(NpyIter *iter);
77 static PyArray_Descr *
78 npyiter_get_common_dtype(int nop, PyArrayObject **op,
79 const npyiter_opitflags *op_itflags, PyArray_Descr **op_dtype,
80 PyArray_Descr **op_request_dtypes,
81 int only_inputs);
82 static PyArrayObject *
83 npyiter_new_temp_array(NpyIter *iter, PyTypeObject *subtype,
84 npy_uint32 flags, npyiter_opitflags *op_itflags,
85 int op_ndim, npy_intp const *shape,
86 PyArray_Descr *op_dtype, const int *op_axes);
87 static int
88 npyiter_allocate_arrays(NpyIter *iter,
89 npy_uint32 flags,
90 PyArray_Descr **op_dtype, PyTypeObject *subtype,
91 const npy_uint32 *op_flags, npyiter_opitflags *op_itflags,
92 int **op_axes);
93 static void
94 npyiter_get_priority_subtype(int nop, PyArrayObject **op,
95 const npyiter_opitflags *op_itflags,
96 double *subtype_priority, PyTypeObject **subtype);
97 static int
98 npyiter_allocate_transfer_functions(NpyIter *iter);
99
100
101 /*NUMPY_API
102 * Allocate a new iterator for multiple array objects, and advanced
103 * options for controlling the broadcasting, shape, and buffer size.
104 */
105 NPY_NO_EXPORT NpyIter *
NpyIter_AdvancedNew(int nop,PyArrayObject ** op_in,npy_uint32 flags,NPY_ORDER order,NPY_CASTING casting,npy_uint32 * op_flags,PyArray_Descr ** op_request_dtypes,int oa_ndim,int ** op_axes,npy_intp * itershape,npy_intp buffersize)106 NpyIter_AdvancedNew(int nop, PyArrayObject **op_in, npy_uint32 flags,
107 NPY_ORDER order, NPY_CASTING casting,
108 npy_uint32 *op_flags,
109 PyArray_Descr **op_request_dtypes,
110 int oa_ndim, int **op_axes, npy_intp *itershape,
111 npy_intp buffersize)
112 {
113 npy_uint32 itflags = NPY_ITFLAG_IDENTPERM;
114 int idim, ndim;
115 int iop;
116
117 /* The iterator being constructed */
118 NpyIter *iter;
119
120 /* Per-operand values */
121 PyArrayObject **op;
122 PyArray_Descr **op_dtype;
123 npyiter_opitflags *op_itflags;
124 char **op_dataptr;
125
126 npy_int8 *perm;
127 NpyIter_BufferData *bufferdata = NULL;
128 int any_allocate = 0, any_missing_dtypes = 0, need_subtype = 0;
129
130 /* The subtype for automatically allocated outputs */
131 double subtype_priority = NPY_PRIORITY;
132 PyTypeObject *subtype = &PyArray_Type;
133
134 #if NPY_IT_CONSTRUCTION_TIMING
135 npy_intp c_temp,
136 c_start,
137 c_check_op_axes,
138 c_check_global_flags,
139 c_calculate_ndim,
140 c_malloc,
141 c_prepare_operands,
142 c_fill_axisdata,
143 c_compute_index_strides,
144 c_apply_forced_iteration_order,
145 c_find_best_axis_ordering,
146 c_get_priority_subtype,
147 c_find_output_common_dtype,
148 c_check_casting,
149 c_allocate_arrays,
150 c_coalesce_axes,
151 c_prepare_buffers;
152 #endif
153
154 NPY_IT_TIME_POINT(c_start);
155
156 if (nop > NPY_MAXARGS) {
157 PyErr_Format(PyExc_ValueError,
158 "Cannot construct an iterator with more than %d operands "
159 "(%d were requested)", NPY_MAXARGS, nop);
160 return NULL;
161 }
162
163 /*
164 * Before 1.8, if `oa_ndim == 0`, this meant `op_axes != NULL` was an error.
165 * With 1.8, `oa_ndim == -1` takes this role, while op_axes in that case
166 * enforces a 0-d iterator. Using `oa_ndim == 0` with `op_axes == NULL`
167 * is thus an error in 1.13 after deprecation.
168 */
169 if ((oa_ndim == 0) && (op_axes == NULL)) {
170 PyErr_Format(PyExc_ValueError,
171 "Using `oa_ndim == 0` when `op_axes` is NULL. "
172 "Use `oa_ndim == -1` or the MultiNew "
173 "iterator for NumPy <1.8 compatibility");
174 return NULL;
175 }
176
177 /* Error check 'oa_ndim' and 'op_axes', which must be used together */
178 if (!npyiter_check_op_axes(nop, oa_ndim, op_axes, itershape)) {
179 return NULL;
180 }
181
182 NPY_IT_TIME_POINT(c_check_op_axes);
183
184 /* Check the global iterator flags */
185 if (!npyiter_check_global_flags(flags, &itflags)) {
186 return NULL;
187 }
188
189 NPY_IT_TIME_POINT(c_check_global_flags);
190
191 /* Calculate how many dimensions the iterator should have */
192 ndim = npyiter_calculate_ndim(nop, op_in, oa_ndim);
193
194 NPY_IT_TIME_POINT(c_calculate_ndim);
195
196 /* Allocate memory for the iterator */
197 iter = (NpyIter*)
198 PyObject_Malloc(NIT_SIZEOF_ITERATOR(itflags, ndim, nop));
199
200 NPY_IT_TIME_POINT(c_malloc);
201
202 /* Fill in the basic data */
203 NIT_ITFLAGS(iter) = itflags;
204 NIT_NDIM(iter) = ndim;
205 NIT_NOP(iter) = nop;
206 NIT_MASKOP(iter) = -1;
207 NIT_ITERINDEX(iter) = 0;
208 memset(NIT_BASEOFFSETS(iter), 0, (nop+1)*NPY_SIZEOF_INTP);
209
210 op = NIT_OPERANDS(iter);
211 op_dtype = NIT_DTYPES(iter);
212 op_itflags = NIT_OPITFLAGS(iter);
213 op_dataptr = NIT_RESETDATAPTR(iter);
214
215 /* Prepare all the operands */
216 if (!npyiter_prepare_operands(nop, op_in, op, op_dataptr,
217 op_request_dtypes, op_dtype,
218 flags,
219 op_flags, op_itflags,
220 &NIT_MASKOP(iter))) {
221 PyObject_Free(iter);
222 return NULL;
223 }
224 /* Set resetindex to zero as well (it's just after the resetdataptr) */
225 op_dataptr[nop] = 0;
226
227 NPY_IT_TIME_POINT(c_prepare_operands);
228
229 /*
230 * Initialize buffer data (must set the buffers and transferdata
231 * to NULL before we might deallocate the iterator).
232 */
233 if (itflags & NPY_ITFLAG_BUFFER) {
234 bufferdata = NIT_BUFFERDATA(iter);
235 NBF_SIZE(bufferdata) = 0;
236 memset(NBF_BUFFERS(bufferdata), 0, nop*NPY_SIZEOF_INTP);
237 memset(NBF_PTRS(bufferdata), 0, nop*NPY_SIZEOF_INTP);
238 memset(NBF_READTRANSFERDATA(bufferdata), 0, nop*NPY_SIZEOF_INTP);
239 memset(NBF_WRITETRANSFERDATA(bufferdata), 0, nop*NPY_SIZEOF_INTP);
240 }
241
242 /* Fill in the AXISDATA arrays and set the ITERSIZE field */
243 if (!npyiter_fill_axisdata(iter, flags, op_itflags, op_dataptr,
244 op_flags, op_axes, itershape)) {
245 NpyIter_Deallocate(iter);
246 return NULL;
247 }
248
249 NPY_IT_TIME_POINT(c_fill_axisdata);
250
251 if (itflags & NPY_ITFLAG_BUFFER) {
252 /*
253 * If buffering is enabled and no buffersize was given, use a default
254 * chosen to be big enough to get some amortization benefits, but
255 * small enough to be cache-friendly.
256 */
257 if (buffersize <= 0) {
258 buffersize = NPY_BUFSIZE;
259 }
260 /* No point in a buffer bigger than the iteration size */
261 if (buffersize > NIT_ITERSIZE(iter)) {
262 buffersize = NIT_ITERSIZE(iter);
263 }
264 NBF_BUFFERSIZE(bufferdata) = buffersize;
265
266 /*
267 * Initialize for use in FirstVisit, which may be called before
268 * the buffers are filled and the reduce pos is updated.
269 */
270 NBF_REDUCE_POS(bufferdata) = 0;
271 }
272
273 /*
274 * If an index was requested, compute the strides for it.
275 * Note that we must do this before changing the order of the
276 * axes
277 */
278 npyiter_compute_index_strides(iter, flags);
279
280 NPY_IT_TIME_POINT(c_compute_index_strides);
281
282 /* Initialize the perm to the identity */
283 perm = NIT_PERM(iter);
284 for(idim = 0; idim < ndim; ++idim) {
285 perm[idim] = (npy_int8)idim;
286 }
287
288 /*
289 * If an iteration order is being forced, apply it.
290 */
291 npyiter_apply_forced_iteration_order(iter, order);
292 itflags = NIT_ITFLAGS(iter);
293
294 NPY_IT_TIME_POINT(c_apply_forced_iteration_order);
295
296 /* Set some flags for allocated outputs */
297 for (iop = 0; iop < nop; ++iop) {
298 if (op[iop] == NULL) {
299 /* Flag this so later we can avoid flipping axes */
300 any_allocate = 1;
301 /* If a subtype may be used, indicate so */
302 if (!(op_flags[iop] & NPY_ITER_NO_SUBTYPE)) {
303 need_subtype = 1;
304 }
305 /*
306 * If the data type wasn't provided, will need to
307 * calculate it.
308 */
309 if (op_dtype[iop] == NULL) {
310 any_missing_dtypes = 1;
311 }
312 }
313 }
314
315 /*
316 * If the ordering was not forced, reorder the axes
317 * and flip negative strides to find the best one.
318 */
319 if (!(itflags & NPY_ITFLAG_FORCEDORDER)) {
320 if (ndim > 1) {
321 npyiter_find_best_axis_ordering(iter);
322 }
323 /*
324 * If there's an output being allocated, we must not negate
325 * any strides.
326 */
327 if (!any_allocate && !(flags & NPY_ITER_DONT_NEGATE_STRIDES)) {
328 npyiter_flip_negative_strides(iter);
329 }
330 itflags = NIT_ITFLAGS(iter);
331 }
332
333 NPY_IT_TIME_POINT(c_find_best_axis_ordering);
334
335 if (need_subtype) {
336 npyiter_get_priority_subtype(nop, op, op_itflags,
337 &subtype_priority, &subtype);
338 }
339
340 NPY_IT_TIME_POINT(c_get_priority_subtype);
341
342 /*
343 * If an automatically allocated output didn't have a specified
344 * dtype, we need to figure it out now, before allocating the outputs.
345 */
346 if (any_missing_dtypes || (flags & NPY_ITER_COMMON_DTYPE)) {
347 PyArray_Descr *dtype;
348 int only_inputs = !(flags & NPY_ITER_COMMON_DTYPE);
349
350 op = NIT_OPERANDS(iter);
351 op_dtype = NIT_DTYPES(iter);
352
353 dtype = npyiter_get_common_dtype(nop, op,
354 op_itflags, op_dtype,
355 op_request_dtypes,
356 only_inputs);
357 if (dtype == NULL) {
358 NpyIter_Deallocate(iter);
359 return NULL;
360 }
361 if (flags & NPY_ITER_COMMON_DTYPE) {
362 NPY_IT_DBG_PRINT("Iterator: Replacing all data types\n");
363 /* Replace all the data types */
364 for (iop = 0; iop < nop; ++iop) {
365 if (op_dtype[iop] != dtype) {
366 Py_XDECREF(op_dtype[iop]);
367 Py_INCREF(dtype);
368 op_dtype[iop] = dtype;
369 }
370 }
371 }
372 else {
373 NPY_IT_DBG_PRINT("Iterator: Setting unset output data types\n");
374 /* Replace the NULL data types */
375 for (iop = 0; iop < nop; ++iop) {
376 if (op_dtype[iop] == NULL) {
377 Py_INCREF(dtype);
378 op_dtype[iop] = dtype;
379 }
380 }
381 }
382 Py_DECREF(dtype);
383 }
384
385 NPY_IT_TIME_POINT(c_find_output_common_dtype);
386
387 /*
388 * All of the data types have been settled, so it's time
389 * to check that data type conversions are following the
390 * casting rules.
391 */
392 if (!npyiter_check_casting(nop, op, op_dtype, casting, op_itflags)) {
393 NpyIter_Deallocate(iter);
394 return NULL;
395 }
396
397 NPY_IT_TIME_POINT(c_check_casting);
398
399 /*
400 * At this point, the iteration order has been finalized. so
401 * any allocation of ops that were NULL, or any temporary
402 * copying due to casting/byte order/alignment can be
403 * done now using a memory layout matching the iterator.
404 */
405 if (!npyiter_allocate_arrays(iter, flags, op_dtype, subtype, op_flags,
406 op_itflags, op_axes)) {
407 NpyIter_Deallocate(iter);
408 return NULL;
409 }
410
411 NPY_IT_TIME_POINT(c_allocate_arrays);
412
413 /*
414 * Finally, if a multi-index wasn't requested,
415 * it may be possible to coalesce some axes together.
416 */
417 if (ndim > 1 && !(itflags & NPY_ITFLAG_HASMULTIINDEX)) {
418 npyiter_coalesce_axes(iter);
419 /*
420 * The operation may have changed the layout, so we have to
421 * get the internal pointers again.
422 */
423 itflags = NIT_ITFLAGS(iter);
424 ndim = NIT_NDIM(iter);
425 op = NIT_OPERANDS(iter);
426 op_dtype = NIT_DTYPES(iter);
427 op_itflags = NIT_OPITFLAGS(iter);
428 op_dataptr = NIT_RESETDATAPTR(iter);
429 }
430
431 NPY_IT_TIME_POINT(c_coalesce_axes);
432
433 /*
434 * Now that the axes are finished, check whether we can apply
435 * the single iteration optimization to the iternext function.
436 */
437 if (!(itflags & NPY_ITFLAG_BUFFER)) {
438 NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
439 if (itflags & NPY_ITFLAG_EXLOOP) {
440 if (NIT_ITERSIZE(iter) == NAD_SHAPE(axisdata)) {
441 NIT_ITFLAGS(iter) |= NPY_ITFLAG_ONEITERATION;
442 }
443 }
444 else if (NIT_ITERSIZE(iter) == 1) {
445 NIT_ITFLAGS(iter) |= NPY_ITFLAG_ONEITERATION;
446 }
447 }
448
449 /*
450 * If REFS_OK was specified, check whether there are any
451 * reference arrays and flag it if so.
452 */
453 if (flags & NPY_ITER_REFS_OK) {
454 for (iop = 0; iop < nop; ++iop) {
455 PyArray_Descr *rdt = op_dtype[iop];
456 if ((rdt->flags & (NPY_ITEM_REFCOUNT |
457 NPY_ITEM_IS_POINTER |
458 NPY_NEEDS_PYAPI)) != 0) {
459 /* Iteration needs API access */
460 NIT_ITFLAGS(iter) |= NPY_ITFLAG_NEEDSAPI;
461 }
462 }
463 }
464
465 /* If buffering is set without delayed allocation */
466 if (itflags & NPY_ITFLAG_BUFFER) {
467 if (!npyiter_allocate_transfer_functions(iter)) {
468 NpyIter_Deallocate(iter);
469 return NULL;
470 }
471 if (!(itflags & NPY_ITFLAG_DELAYBUF)) {
472 /* Allocate the buffers */
473 if (!npyiter_allocate_buffers(iter, NULL)) {
474 NpyIter_Deallocate(iter);
475 return NULL;
476 }
477
478 /* Prepare the next buffers and set iterend/size */
479 if (npyiter_copy_to_buffers(iter, NULL) < 0) {
480 NpyIter_Deallocate(iter);
481 return NULL;
482 }
483 }
484 }
485
486 NPY_IT_TIME_POINT(c_prepare_buffers);
487
488 #if NPY_IT_CONSTRUCTION_TIMING
489 printf("\nIterator construction timing:\n");
490 NPY_IT_PRINT_TIME_START(c_start);
491 NPY_IT_PRINT_TIME_VAR(c_check_op_axes);
492 NPY_IT_PRINT_TIME_VAR(c_check_global_flags);
493 NPY_IT_PRINT_TIME_VAR(c_calculate_ndim);
494 NPY_IT_PRINT_TIME_VAR(c_malloc);
495 NPY_IT_PRINT_TIME_VAR(c_prepare_operands);
496 NPY_IT_PRINT_TIME_VAR(c_fill_axisdata);
497 NPY_IT_PRINT_TIME_VAR(c_compute_index_strides);
498 NPY_IT_PRINT_TIME_VAR(c_apply_forced_iteration_order);
499 NPY_IT_PRINT_TIME_VAR(c_find_best_axis_ordering);
500 NPY_IT_PRINT_TIME_VAR(c_get_priority_subtype);
501 NPY_IT_PRINT_TIME_VAR(c_find_output_common_dtype);
502 NPY_IT_PRINT_TIME_VAR(c_check_casting);
503 NPY_IT_PRINT_TIME_VAR(c_allocate_arrays);
504 NPY_IT_PRINT_TIME_VAR(c_coalesce_axes);
505 NPY_IT_PRINT_TIME_VAR(c_prepare_buffers);
506 printf("\n");
507 #endif
508
509 return iter;
510 }
511
512 /*NUMPY_API
513 * Allocate a new iterator for more than one array object, using
514 * standard NumPy broadcasting rules and the default buffer size.
515 */
516 NPY_NO_EXPORT NpyIter *
NpyIter_MultiNew(int nop,PyArrayObject ** op_in,npy_uint32 flags,NPY_ORDER order,NPY_CASTING casting,npy_uint32 * op_flags,PyArray_Descr ** op_request_dtypes)517 NpyIter_MultiNew(int nop, PyArrayObject **op_in, npy_uint32 flags,
518 NPY_ORDER order, NPY_CASTING casting,
519 npy_uint32 *op_flags,
520 PyArray_Descr **op_request_dtypes)
521 {
522 return NpyIter_AdvancedNew(nop, op_in, flags, order, casting,
523 op_flags, op_request_dtypes,
524 -1, NULL, NULL, 0);
525 }
526
527 /*NUMPY_API
528 * Allocate a new iterator for one array object.
529 */
530 NPY_NO_EXPORT NpyIter *
NpyIter_New(PyArrayObject * op,npy_uint32 flags,NPY_ORDER order,NPY_CASTING casting,PyArray_Descr * dtype)531 NpyIter_New(PyArrayObject *op, npy_uint32 flags,
532 NPY_ORDER order, NPY_CASTING casting,
533 PyArray_Descr* dtype)
534 {
535 /* Split the flags into separate global and op flags */
536 npy_uint32 op_flags = flags & NPY_ITER_PER_OP_FLAGS;
537 flags &= NPY_ITER_GLOBAL_FLAGS;
538
539 return NpyIter_AdvancedNew(1, &op, flags, order, casting,
540 &op_flags, &dtype,
541 -1, NULL, NULL, 0);
542 }
543
544 /*NUMPY_API
545 * Makes a copy of the iterator
546 */
547 NPY_NO_EXPORT NpyIter *
NpyIter_Copy(NpyIter * iter)548 NpyIter_Copy(NpyIter *iter)
549 {
550 npy_uint32 itflags = NIT_ITFLAGS(iter);
551 int ndim = NIT_NDIM(iter);
552 int iop, nop = NIT_NOP(iter);
553 int out_of_memory = 0;
554
555 npy_intp size;
556 NpyIter *newiter;
557 PyArrayObject **objects;
558 PyArray_Descr **dtypes;
559
560 /* Allocate memory for the new iterator */
561 size = NIT_SIZEOF_ITERATOR(itflags, ndim, nop);
562 newiter = (NpyIter*)PyObject_Malloc(size);
563
564 /* Copy the raw values to the new iterator */
565 memcpy(newiter, iter, size);
566
567 /* Take ownership of references to the operands and dtypes */
568 objects = NIT_OPERANDS(newiter);
569 dtypes = NIT_DTYPES(newiter);
570 for (iop = 0; iop < nop; ++iop) {
571 Py_INCREF(objects[iop]);
572 Py_INCREF(dtypes[iop]);
573 }
574
575 /* Allocate buffers and make copies of the transfer data if necessary */
576 if (itflags & NPY_ITFLAG_BUFFER) {
577 NpyIter_BufferData *bufferdata;
578 npy_intp buffersize, itemsize;
579 char **buffers;
580 NpyAuxData **readtransferdata, **writetransferdata;
581
582 bufferdata = NIT_BUFFERDATA(newiter);
583 buffers = NBF_BUFFERS(bufferdata);
584 readtransferdata = NBF_READTRANSFERDATA(bufferdata);
585 writetransferdata = NBF_WRITETRANSFERDATA(bufferdata);
586 buffersize = NBF_BUFFERSIZE(bufferdata);
587
588 for (iop = 0; iop < nop; ++iop) {
589 if (buffers[iop] != NULL) {
590 if (out_of_memory) {
591 buffers[iop] = NULL;
592 }
593 else {
594 itemsize = dtypes[iop]->elsize;
595 buffers[iop] = PyArray_malloc(itemsize*buffersize);
596 if (buffers[iop] == NULL) {
597 out_of_memory = 1;
598 }
599 if (PyDataType_FLAGCHK(dtypes[iop], NPY_NEEDS_INIT)) {
600 memset(buffers[iop], '\0', itemsize*buffersize);
601 }
602 }
603 }
604
605 if (readtransferdata[iop] != NULL) {
606 if (out_of_memory) {
607 readtransferdata[iop] = NULL;
608 }
609 else {
610 readtransferdata[iop] =
611 NPY_AUXDATA_CLONE(readtransferdata[iop]);
612 if (readtransferdata[iop] == NULL) {
613 out_of_memory = 1;
614 }
615 }
616 }
617
618 if (writetransferdata[iop] != NULL) {
619 if (out_of_memory) {
620 writetransferdata[iop] = NULL;
621 }
622 else {
623 writetransferdata[iop] =
624 NPY_AUXDATA_CLONE(writetransferdata[iop]);
625 if (writetransferdata[iop] == NULL) {
626 out_of_memory = 1;
627 }
628 }
629 }
630 }
631
632 /* Initialize the buffers to the current iterindex */
633 if (!out_of_memory && NBF_SIZE(bufferdata) > 0) {
634 npyiter_goto_iterindex(newiter, NIT_ITERINDEX(newiter));
635
636 /* Prepare the next buffers and set iterend/size */
637 npyiter_copy_to_buffers(newiter, NULL);
638 }
639 }
640
641 if (out_of_memory) {
642 NpyIter_Deallocate(newiter);
643 PyErr_NoMemory();
644 return NULL;
645 }
646
647 return newiter;
648 }
649
650 /*NUMPY_API
651 * Deallocate an iterator.
652 *
653 * To correctly work when an error is in progress, we have to check
654 * `PyErr_Occurred()`. This is necessary when buffers are not finalized
655 * or WritebackIfCopy is used. We could avoid that check by exposing a new
656 * function which is passed in whether or not a Python error is already set.
657 */
658 NPY_NO_EXPORT int
NpyIter_Deallocate(NpyIter * iter)659 NpyIter_Deallocate(NpyIter *iter)
660 {
661 int success = PyErr_Occurred() == NULL;
662
663 npy_uint32 itflags;
664 /*int ndim = NIT_NDIM(iter);*/
665 int iop, nop;
666 PyArray_Descr **dtype;
667 PyArrayObject **object;
668 npyiter_opitflags *op_itflags;
669
670 if (iter == NULL) {
671 return success;
672 }
673
674 itflags = NIT_ITFLAGS(iter);
675 nop = NIT_NOP(iter);
676 dtype = NIT_DTYPES(iter);
677 object = NIT_OPERANDS(iter);
678 op_itflags = NIT_OPITFLAGS(iter);
679
680 /* Deallocate any buffers and buffering data */
681 if (itflags & NPY_ITFLAG_BUFFER) {
682 /* Ensure no data is held by the buffers before they are cleared */
683 if (success) {
684 if (npyiter_copy_from_buffers(iter) < 0) {
685 success = NPY_FAIL;
686 }
687 }
688 else {
689 npyiter_clear_buffers(iter);
690 }
691
692 NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter);
693 char **buffers;
694 NpyAuxData **transferdata;
695
696 /* buffers */
697 buffers = NBF_BUFFERS(bufferdata);
698 for (iop = 0; iop < nop; ++iop, ++buffers) {
699 PyArray_free(*buffers);
700 }
701 /* read bufferdata */
702 transferdata = NBF_READTRANSFERDATA(bufferdata);
703 for(iop = 0; iop < nop; ++iop, ++transferdata) {
704 if (*transferdata) {
705 NPY_AUXDATA_FREE(*transferdata);
706 }
707 }
708 /* write bufferdata */
709 transferdata = NBF_WRITETRANSFERDATA(bufferdata);
710 for(iop = 0; iop < nop; ++iop, ++transferdata) {
711 if (*transferdata) {
712 NPY_AUXDATA_FREE(*transferdata);
713 }
714 }
715 }
716
717 /*
718 * Deallocate all the dtypes and objects that were iterated and resolve
719 * any writeback buffers created by the iterator.
720 */
721 for (iop = 0; iop < nop; ++iop, ++dtype, ++object) {
722 if (op_itflags[iop] & NPY_OP_ITFLAG_HAS_WRITEBACK) {
723 if (success && PyArray_ResolveWritebackIfCopy(*object) < 0) {
724 success = 0;
725 }
726 else {
727 PyArray_DiscardWritebackIfCopy(*object);
728 }
729 }
730 Py_XDECREF(*dtype);
731 Py_XDECREF(*object);
732 }
733
734 /* Deallocate the iterator memory */
735 PyObject_Free(iter);
736 return success;
737 }
738
739
740 /* Checks 'flags' for (C|F)_ORDER_INDEX, MULTI_INDEX, and EXTERNAL_LOOP,
741 * setting the appropriate internal flags in 'itflags'.
742 *
743 * Returns 1 on success, 0 on error.
744 */
745 static int
npyiter_check_global_flags(npy_uint32 flags,npy_uint32 * itflags)746 npyiter_check_global_flags(npy_uint32 flags, npy_uint32* itflags)
747 {
748 if ((flags & NPY_ITER_PER_OP_FLAGS) != 0) {
749 PyErr_SetString(PyExc_ValueError,
750 "A per-operand flag was passed as a global flag "
751 "to the iterator constructor");
752 return 0;
753 }
754
755 /* Check for an index */
756 if (flags & (NPY_ITER_C_INDEX | NPY_ITER_F_INDEX)) {
757 if ((flags & (NPY_ITER_C_INDEX | NPY_ITER_F_INDEX)) ==
758 (NPY_ITER_C_INDEX | NPY_ITER_F_INDEX)) {
759 PyErr_SetString(PyExc_ValueError,
760 "Iterator flags C_INDEX and "
761 "F_INDEX cannot both be specified");
762 return 0;
763 }
764 (*itflags) |= NPY_ITFLAG_HASINDEX;
765 }
766 /* Check if a multi-index was requested */
767 if (flags & NPY_ITER_MULTI_INDEX) {
768 /*
769 * This flag primarily disables dimension manipulations that
770 * would produce an incorrect multi-index.
771 */
772 (*itflags) |= NPY_ITFLAG_HASMULTIINDEX;
773 }
774 /* Check if the caller wants to handle inner iteration */
775 if (flags & NPY_ITER_EXTERNAL_LOOP) {
776 if ((*itflags) & (NPY_ITFLAG_HASINDEX | NPY_ITFLAG_HASMULTIINDEX)) {
777 PyErr_SetString(PyExc_ValueError,
778 "Iterator flag EXTERNAL_LOOP cannot be used "
779 "if an index or multi-index is being tracked");
780 return 0;
781 }
782 (*itflags) |= NPY_ITFLAG_EXLOOP;
783 }
784 /* Ranged */
785 if (flags & NPY_ITER_RANGED) {
786 (*itflags) |= NPY_ITFLAG_RANGE;
787 if ((flags & NPY_ITER_EXTERNAL_LOOP) &&
788 !(flags & NPY_ITER_BUFFERED)) {
789 PyErr_SetString(PyExc_ValueError,
790 "Iterator flag RANGED cannot be used with "
791 "the flag EXTERNAL_LOOP unless "
792 "BUFFERED is also enabled");
793 return 0;
794 }
795 }
796 /* Buffering */
797 if (flags & NPY_ITER_BUFFERED) {
798 (*itflags) |= NPY_ITFLAG_BUFFER;
799 if (flags & NPY_ITER_GROWINNER) {
800 (*itflags) |= NPY_ITFLAG_GROWINNER;
801 }
802 if (flags & NPY_ITER_DELAY_BUFALLOC) {
803 (*itflags) |= NPY_ITFLAG_DELAYBUF;
804 }
805 }
806
807 return 1;
808 }
809
810 static int
npyiter_check_op_axes(int nop,int oa_ndim,int ** op_axes,const npy_intp * itershape)811 npyiter_check_op_axes(int nop, int oa_ndim, int **op_axes,
812 const npy_intp *itershape)
813 {
814 char axes_dupcheck[NPY_MAXDIMS];
815 int iop, idim;
816
817 if (oa_ndim < 0) {
818 /*
819 * If `oa_ndim < 0`, `op_axes` and `itershape` are signalled to
820 * be unused and should be NULL. (Before NumPy 1.8 this was
821 * signalled by `oa_ndim == 0`.)
822 */
823 if (op_axes != NULL || itershape != NULL) {
824 PyErr_Format(PyExc_ValueError,
825 "If 'op_axes' or 'itershape' is not NULL in the iterator "
826 "constructor, 'oa_ndim' must be zero or greater");
827 return 0;
828 }
829 return 1;
830 }
831 if (oa_ndim > NPY_MAXDIMS) {
832 PyErr_Format(PyExc_ValueError,
833 "Cannot construct an iterator with more than %d dimensions "
834 "(%d were requested for op_axes)",
835 NPY_MAXDIMS, oa_ndim);
836 return 0;
837 }
838 if (op_axes == NULL) {
839 PyErr_Format(PyExc_ValueError,
840 "If 'oa_ndim' is zero or greater in the iterator "
841 "constructor, then op_axes cannot be NULL");
842 return 0;
843 }
844
845 /* Check that there are no duplicates in op_axes */
846 for (iop = 0; iop < nop; ++iop) {
847 int *axes = op_axes[iop];
848 if (axes != NULL) {
849 memset(axes_dupcheck, 0, NPY_MAXDIMS);
850 for (idim = 0; idim < oa_ndim; ++idim) {
851 int i = npyiter_get_op_axis(axes[idim], NULL);
852
853 if (i >= 0) {
854 if (i >= NPY_MAXDIMS) {
855 PyErr_Format(PyExc_ValueError,
856 "The 'op_axes' provided to the iterator "
857 "constructor for operand %d "
858 "contained invalid "
859 "values %d", iop, i);
860 return 0;
861 }
862 else if (axes_dupcheck[i] == 1) {
863 PyErr_Format(PyExc_ValueError,
864 "The 'op_axes' provided to the iterator "
865 "constructor for operand %d "
866 "contained duplicate "
867 "value %d", iop, i);
868 return 0;
869 }
870 else {
871 axes_dupcheck[i] = 1;
872 }
873 }
874 }
875 }
876 }
877
878 return 1;
879 }
880
881 static int
npyiter_calculate_ndim(int nop,PyArrayObject ** op_in,int oa_ndim)882 npyiter_calculate_ndim(int nop, PyArrayObject **op_in,
883 int oa_ndim)
884 {
885 /* If 'op_axes' is being used, force 'ndim' */
886 if (oa_ndim >= 0 ) {
887 return oa_ndim;
888 }
889 /* Otherwise it's the maximum 'ndim' from the operands */
890 else {
891 int ndim = 0, iop;
892
893 for (iop = 0; iop < nop; ++iop) {
894 if (op_in[iop] != NULL) {
895 int ondim = PyArray_NDIM(op_in[iop]);
896 if (ondim > ndim) {
897 ndim = ondim;
898 }
899 }
900
901 }
902
903 return ndim;
904 }
905 }
906
907 /*
908 * Checks the per-operand input flags, and fills in op_itflags.
909 *
910 * Returns 1 on success, 0 on failure.
911 */
912 static int
npyiter_check_per_op_flags(npy_uint32 op_flags,npyiter_opitflags * op_itflags)913 npyiter_check_per_op_flags(npy_uint32 op_flags, npyiter_opitflags *op_itflags)
914 {
915 if ((op_flags & NPY_ITER_GLOBAL_FLAGS) != 0) {
916 PyErr_SetString(PyExc_ValueError,
917 "A global iterator flag was passed as a per-operand flag "
918 "to the iterator constructor");
919 return 0;
920 }
921
922 /* Check the read/write flags */
923 if (op_flags & NPY_ITER_READONLY) {
924 /* The read/write flags are mutually exclusive */
925 if (op_flags & (NPY_ITER_READWRITE|NPY_ITER_WRITEONLY)) {
926 PyErr_SetString(PyExc_ValueError,
927 "Only one of the iterator flags READWRITE, "
928 "READONLY, and WRITEONLY may be "
929 "specified for an operand");
930 return 0;
931 }
932
933 *op_itflags = NPY_OP_ITFLAG_READ;
934 }
935 else if (op_flags & NPY_ITER_READWRITE) {
936 /* The read/write flags are mutually exclusive */
937 if (op_flags & NPY_ITER_WRITEONLY) {
938 PyErr_SetString(PyExc_ValueError,
939 "Only one of the iterator flags READWRITE, "
940 "READONLY, and WRITEONLY may be "
941 "specified for an operand");
942 return 0;
943 }
944
945 *op_itflags = NPY_OP_ITFLAG_READ|NPY_OP_ITFLAG_WRITE;
946 }
947 else if(op_flags & NPY_ITER_WRITEONLY) {
948 *op_itflags = NPY_OP_ITFLAG_WRITE;
949 }
950 else {
951 PyErr_SetString(PyExc_ValueError,
952 "None of the iterator flags READWRITE, "
953 "READONLY, or WRITEONLY were "
954 "specified for an operand");
955 return 0;
956 }
957
958 /* Check the flags for temporary copies */
959 if (((*op_itflags) & NPY_OP_ITFLAG_WRITE) &&
960 (op_flags & (NPY_ITER_COPY |
961 NPY_ITER_UPDATEIFCOPY)) == NPY_ITER_COPY) {
962 PyErr_SetString(PyExc_ValueError,
963 "If an iterator operand is writeable, must use "
964 "the flag UPDATEIFCOPY instead of "
965 "COPY");
966 return 0;
967 }
968
969 /* Check the flag for a write masked operands */
970 if (op_flags & NPY_ITER_WRITEMASKED) {
971 if (!((*op_itflags) & NPY_OP_ITFLAG_WRITE)) {
972 PyErr_SetString(PyExc_ValueError,
973 "The iterator flag WRITEMASKED may only "
974 "be used with READWRITE or WRITEONLY");
975 return 0;
976 }
977 if ((op_flags & NPY_ITER_ARRAYMASK) != 0) {
978 PyErr_SetString(PyExc_ValueError,
979 "The iterator flag WRITEMASKED may not "
980 "be used together with ARRAYMASK");
981 return 0;
982 }
983 *op_itflags |= NPY_OP_ITFLAG_WRITEMASKED;
984 }
985
986 if ((op_flags & NPY_ITER_VIRTUAL) != 0) {
987 if ((op_flags & NPY_ITER_READWRITE) == 0) {
988 PyErr_SetString(PyExc_ValueError,
989 "The iterator flag VIRTUAL should be "
990 "be used together with READWRITE");
991 return 0;
992 }
993 *op_itflags |= NPY_OP_ITFLAG_VIRTUAL;
994 }
995
996 return 1;
997 }
998
999 /*
1000 * Prepares a a constructor operand. Assumes a reference to 'op'
1001 * is owned, and that 'op' may be replaced. Fills in 'op_dataptr',
1002 * 'op_dtype', and may modify 'op_itflags'.
1003 *
1004 * Returns 1 on success, 0 on failure.
1005 */
1006 static int
npyiter_prepare_one_operand(PyArrayObject ** op,char ** op_dataptr,PyArray_Descr * op_request_dtype,PyArray_Descr ** op_dtype,npy_uint32 flags,npy_uint32 op_flags,npyiter_opitflags * op_itflags)1007 npyiter_prepare_one_operand(PyArrayObject **op,
1008 char **op_dataptr,
1009 PyArray_Descr *op_request_dtype,
1010 PyArray_Descr **op_dtype,
1011 npy_uint32 flags,
1012 npy_uint32 op_flags, npyiter_opitflags *op_itflags)
1013 {
1014 /* NULL operands must be automatically allocated outputs */
1015 if (*op == NULL) {
1016 /* ALLOCATE or VIRTUAL should be enabled */
1017 if ((op_flags & (NPY_ITER_ALLOCATE|NPY_ITER_VIRTUAL)) == 0) {
1018 PyErr_SetString(PyExc_ValueError,
1019 "Iterator operand was NULL, but neither the "
1020 "ALLOCATE nor the VIRTUAL flag was specified");
1021 return 0;
1022 }
1023
1024 if (op_flags & NPY_ITER_ALLOCATE) {
1025 /* Writing should be enabled */
1026 if (!((*op_itflags) & NPY_OP_ITFLAG_WRITE)) {
1027 PyErr_SetString(PyExc_ValueError,
1028 "Automatic allocation was requested for an iterator "
1029 "operand, but it wasn't flagged for writing");
1030 return 0;
1031 }
1032 /*
1033 * Reading should be disabled if buffering is enabled without
1034 * also enabling NPY_ITER_DELAY_BUFALLOC. In all other cases,
1035 * the caller may initialize the allocated operand to a value
1036 * before beginning iteration.
1037 */
1038 if (((flags & (NPY_ITER_BUFFERED |
1039 NPY_ITER_DELAY_BUFALLOC)) == NPY_ITER_BUFFERED) &&
1040 ((*op_itflags) & NPY_OP_ITFLAG_READ)) {
1041 PyErr_SetString(PyExc_ValueError,
1042 "Automatic allocation was requested for an iterator "
1043 "operand, and it was flagged as readable, but "
1044 "buffering without delayed allocation was enabled");
1045 return 0;
1046 }
1047
1048 /* If a requested dtype was provided, use it, otherwise NULL */
1049 Py_XINCREF(op_request_dtype);
1050 *op_dtype = op_request_dtype;
1051 }
1052 else {
1053 *op_dtype = NULL;
1054 }
1055
1056 /* Specify bool if no dtype was requested for the mask */
1057 if (op_flags & NPY_ITER_ARRAYMASK) {
1058 if (*op_dtype == NULL) {
1059 *op_dtype = PyArray_DescrFromType(NPY_BOOL);
1060 if (*op_dtype == NULL) {
1061 return 0;
1062 }
1063 }
1064 }
1065
1066 *op_dataptr = NULL;
1067
1068 return 1;
1069 }
1070
1071 /* VIRTUAL operands must be NULL */
1072 if (op_flags & NPY_ITER_VIRTUAL) {
1073 PyErr_SetString(PyExc_ValueError,
1074 "Iterator operand flag VIRTUAL was specified, "
1075 "but the operand was not NULL");
1076 return 0;
1077 }
1078
1079
1080 if (PyArray_Check(*op)) {
1081
1082 if ((*op_itflags) & NPY_OP_ITFLAG_WRITE
1083 && PyArray_FailUnlessWriteable(*op, "operand array with iterator "
1084 "write flag set") < 0) {
1085 return 0;
1086 }
1087 if (!(flags & NPY_ITER_ZEROSIZE_OK) && PyArray_SIZE(*op) == 0) {
1088 PyErr_SetString(PyExc_ValueError,
1089 "Iteration of zero-sized operands is not enabled");
1090 return 0;
1091 }
1092 *op_dataptr = PyArray_BYTES(*op);
1093 /* PyArray_DESCR does not give us a reference */
1094 *op_dtype = PyArray_DESCR(*op);
1095 if (*op_dtype == NULL) {
1096 PyErr_SetString(PyExc_ValueError,
1097 "Iterator input operand has no dtype descr");
1098 return 0;
1099 }
1100 Py_INCREF(*op_dtype);
1101 /*
1102 * If references weren't specifically allowed, make sure there
1103 * are no references in the inputs or requested dtypes.
1104 */
1105 if (!(flags & NPY_ITER_REFS_OK)) {
1106 PyArray_Descr *dt = PyArray_DESCR(*op);
1107 if (((dt->flags & (NPY_ITEM_REFCOUNT |
1108 NPY_ITEM_IS_POINTER)) != 0) ||
1109 (dt != *op_dtype &&
1110 (((*op_dtype)->flags & (NPY_ITEM_REFCOUNT |
1111 NPY_ITEM_IS_POINTER))) != 0)) {
1112 PyErr_SetString(PyExc_TypeError,
1113 "Iterator operand or requested dtype holds "
1114 "references, but the REFS_OK flag was not enabled");
1115 return 0;
1116 }
1117 }
1118 /*
1119 * Checking whether casts are valid is done later, once the
1120 * final data types have been selected. For now, just store the
1121 * requested type.
1122 */
1123 if (op_request_dtype != NULL) {
1124 /* We just have a borrowed reference to op_request_dtype */
1125 Py_SETREF(*op_dtype, PyArray_AdaptDescriptorToArray(
1126 *op, (PyObject *)op_request_dtype));
1127 if (*op_dtype == NULL) {
1128 return 0;
1129 }
1130 }
1131
1132 /* Check if the operand is in the byte order requested */
1133 if (op_flags & NPY_ITER_NBO) {
1134 /* Check byte order */
1135 if (!PyArray_ISNBO((*op_dtype)->byteorder)) {
1136 PyArray_Descr *nbo_dtype;
1137
1138 /* Replace with a new descr which is in native byte order */
1139 nbo_dtype = PyArray_DescrNewByteorder(*op_dtype, NPY_NATIVE);
1140 Py_DECREF(*op_dtype);
1141 *op_dtype = nbo_dtype;
1142
1143 NPY_IT_DBG_PRINT("Iterator: Setting NPY_OP_ITFLAG_CAST "
1144 "because of NPY_ITER_NBO\n");
1145 /* Indicate that byte order or alignment needs fixing */
1146 *op_itflags |= NPY_OP_ITFLAG_CAST;
1147 }
1148 }
1149 /* Check if the operand is aligned */
1150 if (op_flags & NPY_ITER_ALIGNED) {
1151 /* Check alignment */
1152 if (!IsAligned(*op)) {
1153 NPY_IT_DBG_PRINT("Iterator: Setting NPY_OP_ITFLAG_CAST "
1154 "because of NPY_ITER_ALIGNED\n");
1155 *op_itflags |= NPY_OP_ITFLAG_CAST;
1156 }
1157 }
1158 /*
1159 * The check for NPY_ITER_CONTIG can only be done later,
1160 * once the final iteration order is settled.
1161 */
1162 }
1163 else {
1164 PyErr_SetString(PyExc_ValueError,
1165 "Iterator inputs must be ndarrays");
1166 return 0;
1167 }
1168
1169 return 1;
1170 }
1171
1172 /*
1173 * Process all the operands, copying new references so further processing
1174 * can replace the arrays if copying is necessary.
1175 */
1176 static int
npyiter_prepare_operands(int nop,PyArrayObject ** op_in,PyArrayObject ** op,char ** op_dataptr,PyArray_Descr ** op_request_dtypes,PyArray_Descr ** op_dtype,npy_uint32 flags,npy_uint32 * op_flags,npyiter_opitflags * op_itflags,npy_int8 * out_maskop)1177 npyiter_prepare_operands(int nop, PyArrayObject **op_in,
1178 PyArrayObject **op,
1179 char **op_dataptr,
1180 PyArray_Descr **op_request_dtypes,
1181 PyArray_Descr **op_dtype,
1182 npy_uint32 flags,
1183 npy_uint32 *op_flags, npyiter_opitflags *op_itflags,
1184 npy_int8 *out_maskop)
1185 {
1186 int iop, i;
1187 npy_int8 maskop = -1;
1188 int any_writemasked_ops = 0;
1189
1190 /*
1191 * Here we just prepare the provided operands.
1192 */
1193 for (iop = 0; iop < nop; ++iop) {
1194 op[iop] = op_in[iop];
1195 Py_XINCREF(op[iop]);
1196 op_dtype[iop] = NULL;
1197
1198 /* Check the readonly/writeonly flags, and fill in op_itflags */
1199 if (!npyiter_check_per_op_flags(op_flags[iop], &op_itflags[iop])) {
1200 goto fail_iop;
1201 }
1202
1203 /* Extract the operand which is for masked iteration */
1204 if ((op_flags[iop] & NPY_ITER_ARRAYMASK) != 0) {
1205 if (maskop != -1) {
1206 PyErr_SetString(PyExc_ValueError,
1207 "Only one iterator operand may receive an "
1208 "ARRAYMASK flag");
1209 goto fail_iop;
1210 }
1211
1212 maskop = iop;
1213 *out_maskop = iop;
1214 }
1215
1216 if (op_flags[iop] & NPY_ITER_WRITEMASKED) {
1217 any_writemasked_ops = 1;
1218 }
1219
1220 /*
1221 * Prepare the operand. This produces an op_dtype[iop] reference
1222 * on success.
1223 */
1224 if (!npyiter_prepare_one_operand(&op[iop],
1225 &op_dataptr[iop],
1226 op_request_dtypes ? op_request_dtypes[iop] : NULL,
1227 &op_dtype[iop],
1228 flags,
1229 op_flags[iop], &op_itflags[iop])) {
1230 goto fail_iop;
1231 }
1232 }
1233
1234 /* If all the operands were NULL, it's an error */
1235 if (op[0] == NULL) {
1236 int all_null = 1;
1237 for (iop = 1; iop < nop; ++iop) {
1238 if (op[iop] != NULL) {
1239 all_null = 0;
1240 break;
1241 }
1242 }
1243 if (all_null) {
1244 PyErr_SetString(PyExc_ValueError,
1245 "At least one iterator operand must be non-NULL");
1246 goto fail_nop;
1247 }
1248 }
1249
1250 if (any_writemasked_ops && maskop < 0) {
1251 PyErr_SetString(PyExc_ValueError,
1252 "An iterator operand was flagged as WRITEMASKED, "
1253 "but no ARRAYMASK operand was given to supply "
1254 "the mask");
1255 goto fail_nop;
1256 }
1257 else if (!any_writemasked_ops && maskop >= 0) {
1258 PyErr_SetString(PyExc_ValueError,
1259 "An iterator operand was flagged as the ARRAYMASK, "
1260 "but no WRITEMASKED operands were given to use "
1261 "the mask");
1262 goto fail_nop;
1263 }
1264
1265 return 1;
1266
1267 fail_nop:
1268 iop = nop - 1;
1269 fail_iop:
1270 for (i = 0; i < iop+1; ++i) {
1271 Py_XDECREF(op[i]);
1272 Py_XDECREF(op_dtype[i]);
1273 }
1274 return 0;
1275 }
1276
1277 static const char *
npyiter_casting_to_string(NPY_CASTING casting)1278 npyiter_casting_to_string(NPY_CASTING casting)
1279 {
1280 switch (casting) {
1281 case NPY_NO_CASTING:
1282 return "'no'";
1283 case NPY_EQUIV_CASTING:
1284 return "'equiv'";
1285 case NPY_SAFE_CASTING:
1286 return "'safe'";
1287 case NPY_SAME_KIND_CASTING:
1288 return "'same_kind'";
1289 case NPY_UNSAFE_CASTING:
1290 return "'unsafe'";
1291 default:
1292 return "<unknown>";
1293 }
1294 }
1295
1296
1297 static int
npyiter_check_casting(int nop,PyArrayObject ** op,PyArray_Descr ** op_dtype,NPY_CASTING casting,npyiter_opitflags * op_itflags)1298 npyiter_check_casting(int nop, PyArrayObject **op,
1299 PyArray_Descr **op_dtype,
1300 NPY_CASTING casting,
1301 npyiter_opitflags *op_itflags)
1302 {
1303 int iop;
1304
1305 for(iop = 0; iop < nop; ++iop) {
1306 NPY_IT_DBG_PRINT1("Iterator: Checking casting for operand %d\n",
1307 (int)iop);
1308 #if NPY_IT_DBG_TRACING
1309 printf("op: ");
1310 if (op[iop] != NULL) {
1311 PyObject_Print((PyObject *)PyArray_DESCR(op[iop]), stdout, 0);
1312 }
1313 else {
1314 printf("<null>");
1315 }
1316 printf(", iter: ");
1317 PyObject_Print((PyObject *)op_dtype[iop], stdout, 0);
1318 printf("\n");
1319 #endif
1320 /* If the types aren't equivalent, a cast is necessary */
1321 if (op[iop] != NULL && !PyArray_EquivTypes(PyArray_DESCR(op[iop]),
1322 op_dtype[iop])) {
1323 /* Check read (op -> temp) casting */
1324 if ((op_itflags[iop] & NPY_OP_ITFLAG_READ) &&
1325 !PyArray_CanCastArrayTo(op[iop],
1326 op_dtype[iop],
1327 casting)) {
1328 PyErr_Format(PyExc_TypeError,
1329 "Iterator operand %d dtype could not be cast from "
1330 "%R to %R according to the rule %s",
1331 iop, PyArray_DESCR(op[iop]), op_dtype[iop],
1332 npyiter_casting_to_string(casting));
1333 return 0;
1334 }
1335 /* Check write (temp -> op) casting */
1336 if ((op_itflags[iop] & NPY_OP_ITFLAG_WRITE) &&
1337 !PyArray_CanCastTypeTo(op_dtype[iop],
1338 PyArray_DESCR(op[iop]),
1339 casting)) {
1340 PyErr_Format(PyExc_TypeError,
1341 "Iterator requested dtype could not be cast from "
1342 "%R to %R, the operand %d dtype, "
1343 "according to the rule %s",
1344 op_dtype[iop], PyArray_DESCR(op[iop]), iop,
1345 npyiter_casting_to_string(casting));
1346 return 0;
1347 }
1348
1349 NPY_IT_DBG_PRINT("Iterator: Setting NPY_OP_ITFLAG_CAST "
1350 "because the types aren't equivalent\n");
1351 /* Indicate that this operand needs casting */
1352 op_itflags[iop] |= NPY_OP_ITFLAG_CAST;
1353 }
1354 }
1355
1356 return 1;
1357 }
1358
1359 /*
1360 * Checks that the mask broadcasts to the WRITEMASK REDUCE
1361 * operand 'iop', but 'iop' never broadcasts to the mask.
1362 * If 'iop' broadcasts to the mask, the result would be more
1363 * than one mask value per reduction element, something which
1364 * is invalid.
1365 *
1366 * This check should only be called after all the operands
1367 * have been filled in.
1368 *
1369 * Returns 1 on success, 0 on error.
1370 */
1371 static int
check_mask_for_writemasked_reduction(NpyIter * iter,int iop)1372 check_mask_for_writemasked_reduction(NpyIter *iter, int iop)
1373 {
1374 npy_uint32 itflags = NIT_ITFLAGS(iter);
1375 int idim, ndim = NIT_NDIM(iter);
1376 int nop = NIT_NOP(iter);
1377 int maskop = NIT_MASKOP(iter);
1378
1379 NpyIter_AxisData *axisdata;
1380 npy_intp sizeof_axisdata;
1381
1382 axisdata = NIT_AXISDATA(iter);
1383 sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
1384
1385 for(idim = 0; idim < ndim; ++idim) {
1386 npy_intp maskstride, istride;
1387
1388 istride = NAD_STRIDES(axisdata)[iop];
1389 maskstride = NAD_STRIDES(axisdata)[maskop];
1390
1391 /*
1392 * If 'iop' is being broadcast to 'maskop', we have
1393 * the invalid situation described above.
1394 */
1395 if (maskstride != 0 && istride == 0) {
1396 PyErr_SetString(PyExc_ValueError,
1397 "Iterator reduction operand is WRITEMASKED, "
1398 "but also broadcasts to multiple mask values. "
1399 "There can be only one mask value per WRITEMASKED "
1400 "element.");
1401 return 0;
1402 }
1403
1404 NIT_ADVANCE_AXISDATA(axisdata, 1);
1405 }
1406
1407 return 1;
1408 }
1409
1410 /*
1411 * Check whether a reduction is OK based on the flags and the operand being
1412 * readwrite. This path is deprecated, since usually only specific axes
1413 * should be reduced. If axes are specified explicitely, the flag is
1414 * unnecessary.
1415 */
1416 static int
npyiter_check_reduce_ok_and_set_flags(NpyIter * iter,npy_uint32 flags,npyiter_opitflags * op_itflags,int dim)1417 npyiter_check_reduce_ok_and_set_flags(
1418 NpyIter *iter, npy_uint32 flags, npyiter_opitflags *op_itflags,
1419 int dim) {
1420 /* If it's writeable, this means a reduction */
1421 if (*op_itflags & NPY_OP_ITFLAG_WRITE) {
1422 if (!(flags & NPY_ITER_REDUCE_OK)) {
1423 PyErr_Format(PyExc_ValueError,
1424 "output operand requires a reduction along dimension %d, "
1425 "but the reduction is not enabled. The dimension size of 1 "
1426 "does not match the expected output shape.", dim);
1427 return 0;
1428 }
1429 if (!(*op_itflags & NPY_OP_ITFLAG_READ)) {
1430 PyErr_SetString(PyExc_ValueError,
1431 "output operand requires a reduction, but is flagged as "
1432 "write-only, not read-write");
1433 return 0;
1434 }
1435 NPY_IT_DBG_PRINT("Iterator: Indicating that a reduction is"
1436 "occurring\n");
1437
1438 NIT_ITFLAGS(iter) |= NPY_ITFLAG_REDUCE;
1439 *op_itflags |= NPY_OP_ITFLAG_REDUCE;
1440 }
1441 return 1;
1442 }
1443
1444 /**
1445 * Removes the (additive) NPY_ITER_REDUCTION_AXIS indication and sets
1446 * is_forced_broadcast to 1 if it is set. Otherwise to 0.
1447 *
1448 * @param axis The op_axes[i] to normalize.
1449 * @param reduction_axis Output 1 if a reduction axis, otherwise 0.
1450 * @returns The normalized axis (without reduce axis flag).
1451 */
1452 static NPY_INLINE int
npyiter_get_op_axis(int axis,npy_bool * reduction_axis)1453 npyiter_get_op_axis(int axis, npy_bool *reduction_axis) {
1454 npy_bool forced_broadcast = axis >= NPY_ITER_REDUCTION_AXIS(-1);
1455
1456 if (reduction_axis != NULL) {
1457 *reduction_axis = forced_broadcast;
1458 }
1459 if (forced_broadcast) {
1460 return axis - NPY_ITER_REDUCTION_AXIS(0);
1461 }
1462 return axis;
1463 }
1464
1465 /*
1466 * Fills in the AXISDATA for the 'nop' operands, broadcasting
1467 * the dimensionas as necessary. Also fills
1468 * in the ITERSIZE data member.
1469 *
1470 * If op_axes is not NULL, it should point to an array of ndim-sized
1471 * arrays, one for each op.
1472 *
1473 * Returns 1 on success, 0 on failure.
1474 */
1475 static int
npyiter_fill_axisdata(NpyIter * iter,npy_uint32 flags,npyiter_opitflags * op_itflags,char ** op_dataptr,const npy_uint32 * op_flags,int ** op_axes,npy_intp const * itershape)1476 npyiter_fill_axisdata(NpyIter *iter, npy_uint32 flags, npyiter_opitflags *op_itflags,
1477 char **op_dataptr,
1478 const npy_uint32 *op_flags, int **op_axes,
1479 npy_intp const *itershape)
1480 {
1481 npy_uint32 itflags = NIT_ITFLAGS(iter);
1482 int idim, ndim = NIT_NDIM(iter);
1483 int iop, nop = NIT_NOP(iter);
1484 int maskop = NIT_MASKOP(iter);
1485
1486 int ondim;
1487 NpyIter_AxisData *axisdata;
1488 npy_intp sizeof_axisdata;
1489 PyArrayObject **op = NIT_OPERANDS(iter), *op_cur;
1490 npy_intp broadcast_shape[NPY_MAXDIMS];
1491
1492 /* First broadcast the shapes together */
1493 if (itershape == NULL) {
1494 for (idim = 0; idim < ndim; ++idim) {
1495 broadcast_shape[idim] = 1;
1496 }
1497 }
1498 else {
1499 for (idim = 0; idim < ndim; ++idim) {
1500 broadcast_shape[idim] = itershape[idim];
1501 /* Negative shape entries are deduced from the operands */
1502 if (broadcast_shape[idim] < 0) {
1503 broadcast_shape[idim] = 1;
1504 }
1505 }
1506 }
1507 for (iop = 0; iop < nop; ++iop) {
1508 op_cur = op[iop];
1509 if (op_cur != NULL) {
1510 npy_intp *shape = PyArray_DIMS(op_cur);
1511 ondim = PyArray_NDIM(op_cur);
1512
1513 if (op_axes == NULL || op_axes[iop] == NULL) {
1514 /*
1515 * Possible if op_axes are being used, but
1516 * op_axes[iop] is NULL
1517 */
1518 if (ondim > ndim) {
1519 PyErr_SetString(PyExc_ValueError,
1520 "input operand has more dimensions than allowed "
1521 "by the axis remapping");
1522 return 0;
1523 }
1524 for (idim = 0; idim < ondim; ++idim) {
1525 npy_intp bshape = broadcast_shape[idim+ndim-ondim];
1526 npy_intp op_shape = shape[idim];
1527
1528 if (bshape == 1) {
1529 broadcast_shape[idim+ndim-ondim] = op_shape;
1530 }
1531 else if (bshape != op_shape && op_shape != 1) {
1532 goto broadcast_error;
1533 }
1534 }
1535 }
1536 else {
1537 int *axes = op_axes[iop];
1538 for (idim = 0; idim < ndim; ++idim) {
1539 int i = npyiter_get_op_axis(axes[idim], NULL);
1540
1541 if (i >= 0) {
1542 if (i < ondim) {
1543 npy_intp bshape = broadcast_shape[idim];
1544 npy_intp op_shape = shape[i];
1545
1546 if (bshape == 1) {
1547 broadcast_shape[idim] = op_shape;
1548 }
1549 else if (bshape != op_shape && op_shape != 1) {
1550 goto broadcast_error;
1551 }
1552 }
1553 else {
1554 PyErr_Format(PyExc_ValueError,
1555 "Iterator input op_axes[%d][%d] (==%d) "
1556 "is not a valid axis of op[%d], which "
1557 "has %d dimensions ",
1558 iop, (ndim-idim-1), i,
1559 iop, ondim);
1560 return 0;
1561 }
1562 }
1563 }
1564 }
1565 }
1566 }
1567 /*
1568 * If a shape was provided with a 1 entry, make sure that entry didn't
1569 * get expanded by broadcasting.
1570 */
1571 if (itershape != NULL) {
1572 for (idim = 0; idim < ndim; ++idim) {
1573 if (itershape[idim] == 1 && broadcast_shape[idim] != 1) {
1574 goto broadcast_error;
1575 }
1576 }
1577 }
1578
1579 axisdata = NIT_AXISDATA(iter);
1580 sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
1581
1582 if (ndim == 0) {
1583 /* Need to fill the first axisdata, even if the iterator is 0-d */
1584 NAD_SHAPE(axisdata) = 1;
1585 NAD_INDEX(axisdata) = 0;
1586 memcpy(NAD_PTRS(axisdata), op_dataptr, NPY_SIZEOF_INTP*nop);
1587 memset(NAD_STRIDES(axisdata), 0, NPY_SIZEOF_INTP*nop);
1588 }
1589
1590 /* Now process the operands, filling in the axisdata */
1591 for (idim = 0; idim < ndim; ++idim) {
1592 npy_intp bshape = broadcast_shape[ndim-idim-1];
1593 npy_intp *strides = NAD_STRIDES(axisdata);
1594
1595 NAD_SHAPE(axisdata) = bshape;
1596 NAD_INDEX(axisdata) = 0;
1597 memcpy(NAD_PTRS(axisdata), op_dataptr, NPY_SIZEOF_INTP*nop);
1598
1599 for (iop = 0; iop < nop; ++iop) {
1600 op_cur = op[iop];
1601
1602 if (op_axes == NULL || op_axes[iop] == NULL) {
1603 if (op_cur == NULL) {
1604 strides[iop] = 0;
1605 }
1606 else {
1607 ondim = PyArray_NDIM(op_cur);
1608 if (bshape == 1) {
1609 strides[iop] = 0;
1610 if (idim >= ondim &&
1611 (op_flags[iop] & NPY_ITER_NO_BROADCAST)) {
1612 goto operand_different_than_broadcast;
1613 }
1614 }
1615 else if (idim >= ondim ||
1616 PyArray_DIM(op_cur, ondim-idim-1) == 1) {
1617 strides[iop] = 0;
1618 if (op_flags[iop] & NPY_ITER_NO_BROADCAST) {
1619 goto operand_different_than_broadcast;
1620 }
1621 /* If it's writeable, this means a reduction */
1622 if (op_itflags[iop] & NPY_OP_ITFLAG_WRITE) {
1623 if (!(flags & NPY_ITER_REDUCE_OK)) {
1624 PyErr_SetString(PyExc_ValueError,
1625 "output operand requires a "
1626 "reduction, but reduction is "
1627 "not enabled");
1628 return 0;
1629 }
1630 if (!(op_itflags[iop] & NPY_OP_ITFLAG_READ)) {
1631 PyErr_SetString(PyExc_ValueError,
1632 "output operand requires a "
1633 "reduction, but is flagged as "
1634 "write-only, not read-write");
1635 return 0;
1636 }
1637 /*
1638 * The ARRAYMASK can't be a reduction, because
1639 * it would be possible to write back to the
1640 * array once when the ARRAYMASK says 'True',
1641 * then have the reduction on the ARRAYMASK
1642 * later flip to 'False', indicating that the
1643 * write back should never have been done,
1644 * and violating the strict masking semantics
1645 */
1646 if (iop == maskop) {
1647 PyErr_SetString(PyExc_ValueError,
1648 "output operand requires a "
1649 "reduction, but is flagged as "
1650 "the ARRAYMASK operand which "
1651 "is not permitted to be the "
1652 "result of a reduction");
1653 return 0;
1654 }
1655
1656 NIT_ITFLAGS(iter) |= NPY_ITFLAG_REDUCE;
1657 op_itflags[iop] |= NPY_OP_ITFLAG_REDUCE;
1658 }
1659 }
1660 else {
1661 strides[iop] = PyArray_STRIDE(op_cur, ondim-idim-1);
1662 }
1663 }
1664 }
1665 else {
1666 int *axes = op_axes[iop];
1667 npy_bool reduction_axis;
1668 int i;
1669 i = npyiter_get_op_axis(axes[ndim - idim - 1], &reduction_axis);
1670
1671 if (reduction_axis) {
1672 /* This is explicitly a reduction axis */
1673 strides[iop] = 0;
1674 NIT_ITFLAGS(iter) |= NPY_ITFLAG_REDUCE;
1675 op_itflags[iop] |= NPY_OP_ITFLAG_REDUCE;
1676
1677 if (NPY_UNLIKELY((i >= 0) && (op_cur != NULL) &&
1678 (PyArray_DIM(op_cur, i) != 1))) {
1679 PyErr_Format(PyExc_ValueError,
1680 "operand was set up as a reduction along axis "
1681 "%d, but the length of the axis is %zd "
1682 "(it has to be 1)",
1683 i, (Py_ssize_t)PyArray_DIM(op_cur, i));
1684 return 0;
1685 }
1686 }
1687 else if (bshape == 1) {
1688 /*
1689 * If the full iterator shape is 1, zero always works.
1690 * NOTE: We thus always allow broadcast dimensions (i = -1)
1691 * if the shape is 1.
1692 */
1693 strides[iop] = 0;
1694 }
1695 else if (i >= 0) {
1696 if (op_cur == NULL) {
1697 /* stride is filled later, shape will match `bshape` */
1698 strides[iop] = 0;
1699 }
1700 else if (PyArray_DIM(op_cur, i) == 1) {
1701 strides[iop] = 0;
1702 if (op_flags[iop] & NPY_ITER_NO_BROADCAST) {
1703 goto operand_different_than_broadcast;
1704 }
1705 if (!npyiter_check_reduce_ok_and_set_flags(
1706 iter, flags, &op_itflags[iop], i)) {
1707 return 0;
1708 }
1709 }
1710 else {
1711 strides[iop] = PyArray_STRIDE(op_cur, i);
1712 }
1713 }
1714 else {
1715 strides[iop] = 0;
1716 if (!npyiter_check_reduce_ok_and_set_flags(
1717 iter, flags, &op_itflags[iop], i)) {
1718 return 0;
1719 }
1720 }
1721 }
1722 }
1723
1724 NIT_ADVANCE_AXISDATA(axisdata, 1);
1725 }
1726
1727 /* Now fill in the ITERSIZE member */
1728 NIT_ITERSIZE(iter) = 1;
1729 for (idim = 0; idim < ndim; ++idim) {
1730 if (npy_mul_with_overflow_intp(&NIT_ITERSIZE(iter),
1731 NIT_ITERSIZE(iter), broadcast_shape[idim])) {
1732 if ((itflags & NPY_ITFLAG_HASMULTIINDEX) &&
1733 !(itflags & NPY_ITFLAG_HASINDEX) &&
1734 !(itflags & NPY_ITFLAG_BUFFER)) {
1735 /*
1736 * If RemoveAxis may be called, the size check is delayed
1737 * until either the multi index is removed, or GetIterNext
1738 * is called.
1739 */
1740 NIT_ITERSIZE(iter) = -1;
1741 break;
1742 }
1743 else {
1744 PyErr_SetString(PyExc_ValueError, "iterator is too large");
1745 return 0;
1746 }
1747 }
1748 }
1749 /* The range defaults to everything */
1750 NIT_ITERSTART(iter) = 0;
1751 NIT_ITEREND(iter) = NIT_ITERSIZE(iter);
1752
1753 return 1;
1754
1755 broadcast_error: {
1756 npy_intp remdims[NPY_MAXDIMS];
1757
1758 if (op_axes == NULL) {
1759 PyObject *shape1 = PyUnicode_FromString("");
1760 if (shape1 == NULL) {
1761 return 0;
1762 }
1763 for (iop = 0; iop < nop; ++iop) {
1764 if (op[iop] != NULL) {
1765 int ndims = PyArray_NDIM(op[iop]);
1766 npy_intp *dims = PyArray_DIMS(op[iop]);
1767 PyObject *tmp = convert_shape_to_string(ndims, dims, " ");
1768 if (tmp == NULL) {
1769 Py_DECREF(shape1);
1770 return 0;
1771 }
1772 Py_SETREF(shape1, PyUnicode_Concat(shape1, tmp));
1773 Py_DECREF(tmp);
1774 if (shape1 == NULL) {
1775 return 0;
1776 }
1777 }
1778 }
1779 if (itershape == NULL) {
1780 PyErr_Format(PyExc_ValueError,
1781 "operands could not be broadcast together with "
1782 "shapes %S", shape1);
1783 Py_DECREF(shape1);
1784 return 0;
1785 }
1786 else {
1787 PyObject *shape2 = convert_shape_to_string(ndim, itershape, "");
1788 if (shape2 == NULL) {
1789 Py_DECREF(shape1);
1790 return 0;
1791 }
1792 PyErr_Format(PyExc_ValueError,
1793 "operands could not be broadcast together with "
1794 "shapes %S and requested shape %S", shape1, shape2);
1795 Py_DECREF(shape1);
1796 Py_DECREF(shape2);
1797 return 0;
1798 }
1799 }
1800 else {
1801 PyObject *shape1 = PyUnicode_FromString("");
1802 if (shape1 == NULL) {
1803 return 0;
1804 }
1805 for (iop = 0; iop < nop; ++iop) {
1806 if (op[iop] != NULL) {
1807 int *axes = op_axes[iop];
1808 int ndims = PyArray_NDIM(op[iop]);
1809 npy_intp *dims = PyArray_DIMS(op[iop]);
1810 char *tmpstr = (axes == NULL) ? " " : "->";
1811
1812 PyObject *tmp = convert_shape_to_string(ndims, dims, tmpstr);
1813 if (tmp == NULL) {
1814 Py_DECREF(shape1);
1815 return 0;
1816 }
1817 Py_SETREF(shape1, PyUnicode_Concat(shape1, tmp));
1818 Py_DECREF(tmp);
1819 if (shape1 == NULL) {
1820 return 0;
1821 }
1822
1823 if (axes != NULL) {
1824 for (idim = 0; idim < ndim; ++idim) {
1825 int i = npyiter_get_op_axis(axes[idim], NULL);
1826
1827 if (i >= 0 && i < PyArray_NDIM(op[iop])) {
1828 remdims[idim] = PyArray_DIM(op[iop], i);
1829 }
1830 else {
1831 remdims[idim] = -1;
1832 }
1833 }
1834 PyObject *tmp = convert_shape_to_string(ndim, remdims, " ");
1835 if (tmp == NULL) {
1836 Py_DECREF(shape1);
1837 return 0;
1838 }
1839 Py_SETREF(shape1, PyUnicode_Concat(shape1, tmp));
1840 Py_DECREF(tmp);
1841 if (shape1 == NULL) {
1842 return 0;
1843 }
1844 }
1845 }
1846 }
1847 if (itershape == NULL) {
1848 PyErr_Format(PyExc_ValueError,
1849 "operands could not be broadcast together with "
1850 "remapped shapes [original->remapped]: %S", shape1);
1851 Py_DECREF(shape1);
1852 return 0;
1853 }
1854 else {
1855 PyObject *shape2 = convert_shape_to_string(ndim, itershape, "");
1856 if (shape2 == NULL) {
1857 Py_DECREF(shape1);
1858 return 0;
1859 }
1860 PyErr_Format(PyExc_ValueError,
1861 "operands could not be broadcast together with "
1862 "remapped shapes [original->remapped]: %S and "
1863 "requested shape %S", shape1, shape2);
1864 Py_DECREF(shape1);
1865 Py_DECREF(shape2);
1866 return 0;
1867 }
1868 }
1869 }
1870
1871 operand_different_than_broadcast: {
1872 /* operand shape */
1873 int ndims = PyArray_NDIM(op[iop]);
1874 npy_intp *dims = PyArray_DIMS(op[iop]);
1875 PyObject *shape1 = convert_shape_to_string(ndims, dims, "");
1876 if (shape1 == NULL) {
1877 return 0;
1878 }
1879
1880 /* Broadcast shape */
1881 PyObject *shape2 = convert_shape_to_string(ndim, broadcast_shape, "");
1882 if (shape2 == NULL) {
1883 Py_DECREF(shape1);
1884 return 0;
1885 }
1886
1887 if (op_axes == NULL || op_axes[iop] == NULL) {
1888 /* operand shape not remapped */
1889
1890 if (op_flags[iop] & NPY_ITER_READONLY) {
1891 PyErr_Format(PyExc_ValueError,
1892 "non-broadcastable operand with shape %S doesn't "
1893 "match the broadcast shape %S", shape1, shape2);
1894 }
1895 else {
1896 PyErr_Format(PyExc_ValueError,
1897 "non-broadcastable output operand with shape %S doesn't "
1898 "match the broadcast shape %S", shape1, shape2);
1899 }
1900 Py_DECREF(shape1);
1901 Py_DECREF(shape2);
1902 return 0;
1903 }
1904 else {
1905 /* operand shape remapped */
1906
1907 npy_intp remdims[NPY_MAXDIMS];
1908 int *axes = op_axes[iop];
1909 for (idim = 0; idim < ndim; ++idim) {
1910 npy_intp i = axes[ndim - idim - 1];
1911 if (i >= 0 && i < PyArray_NDIM(op[iop])) {
1912 remdims[idim] = PyArray_DIM(op[iop], i);
1913 }
1914 else {
1915 remdims[idim] = -1;
1916 }
1917 }
1918
1919 PyObject *shape3 = convert_shape_to_string(ndim, remdims, "");
1920 if (shape3 == NULL) {
1921 Py_DECREF(shape1);
1922 Py_DECREF(shape2);
1923 return 0;
1924 }
1925
1926 if (op_flags[iop] & NPY_ITER_READONLY) {
1927 PyErr_Format(PyExc_ValueError,
1928 "non-broadcastable operand with shape %S "
1929 "[remapped to %S] doesn't match the broadcast shape %S",
1930 shape1, shape3, shape2);
1931 }
1932 else {
1933 PyErr_Format(PyExc_ValueError,
1934 "non-broadcastable output operand with shape %S "
1935 "[remapped to %S] doesn't match the broadcast shape %S",
1936 shape1, shape3, shape2);
1937 }
1938 Py_DECREF(shape1);
1939 Py_DECREF(shape2);
1940 Py_DECREF(shape3);
1941 return 0;
1942 }
1943 }
1944 }
1945
1946 /*
1947 * Replaces the AXISDATA for the iop'th operand, broadcasting
1948 * the dimensions as necessary. Assumes the replacement array is
1949 * exactly the same shape as the original array used when
1950 * npy_fill_axisdata was called.
1951 *
1952 * If op_axes is not NULL, it should point to an ndim-sized
1953 * array.
1954 */
1955 static void
npyiter_replace_axisdata(NpyIter * iter,int iop,PyArrayObject * op,int orig_op_ndim,const int * op_axes)1956 npyiter_replace_axisdata(
1957 NpyIter *iter, int iop, PyArrayObject *op,
1958 int orig_op_ndim, const int *op_axes)
1959 {
1960 npy_uint32 itflags = NIT_ITFLAGS(iter);
1961 int idim, ndim = NIT_NDIM(iter);
1962 int nop = NIT_NOP(iter);
1963 char *op_dataptr = PyArray_DATA(op);
1964
1965 NpyIter_AxisData *axisdata0, *axisdata;
1966 npy_intp sizeof_axisdata;
1967 npy_int8 *perm;
1968 npy_intp baseoffset = 0;
1969
1970 perm = NIT_PERM(iter);
1971 axisdata0 = NIT_AXISDATA(iter);
1972 sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
1973
1974 /*
1975 * Replace just the strides which were non-zero, and compute
1976 * the base data address.
1977 */
1978 axisdata = axisdata0;
1979
1980 if (op_axes != NULL) {
1981 for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
1982 int i;
1983 npy_bool axis_flipped;
1984 npy_intp shape;
1985
1986 /* Apply perm to get the original axis, and check if its flipped */
1987 i = npyiter_undo_iter_axis_perm(idim, ndim, perm, &axis_flipped);
1988
1989 i = npyiter_get_op_axis(op_axes[i], NULL);
1990 assert(i < orig_op_ndim);
1991 if (i >= 0) {
1992 shape = PyArray_DIM(op, i);
1993 if (shape != 1) {
1994 npy_intp stride = PyArray_STRIDE(op, i);
1995 if (axis_flipped) {
1996 NAD_STRIDES(axisdata)[iop] = -stride;
1997 baseoffset += stride*(shape-1);
1998 }
1999 else {
2000 NAD_STRIDES(axisdata)[iop] = stride;
2001 }
2002 }
2003 }
2004 }
2005 }
2006 else {
2007 for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
2008 int i;
2009 npy_bool axis_flipped;
2010 npy_intp shape;
2011
2012 i = npyiter_undo_iter_axis_perm(
2013 idim, orig_op_ndim, perm, &axis_flipped);
2014
2015 if (i >= 0) {
2016 shape = PyArray_DIM(op, i);
2017 if (shape != 1) {
2018 npy_intp stride = PyArray_STRIDE(op, i);
2019 if (axis_flipped) {
2020 NAD_STRIDES(axisdata)[iop] = -stride;
2021 baseoffset += stride*(shape-1);
2022 }
2023 else {
2024 NAD_STRIDES(axisdata)[iop] = stride;
2025 }
2026 }
2027 }
2028 }
2029 }
2030
2031 op_dataptr += baseoffset;
2032
2033 /* Now the base data pointer is calculated, set it everywhere it's needed */
2034 NIT_RESETDATAPTR(iter)[iop] = op_dataptr;
2035 NIT_BASEOFFSETS(iter)[iop] = baseoffset;
2036 axisdata = axisdata0;
2037 /* Fill at least one axisdata, for the 0-d case */
2038 NAD_PTRS(axisdata)[iop] = op_dataptr;
2039 NIT_ADVANCE_AXISDATA(axisdata, 1);
2040 for (idim = 1; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
2041 NAD_PTRS(axisdata)[iop] = op_dataptr;
2042 }
2043 }
2044
2045 /*
2046 * Computes the iterator's index strides and initializes the index values
2047 * to zero.
2048 *
2049 * This must be called before the axes (i.e. the AXISDATA array) may
2050 * be reordered.
2051 */
2052 static void
npyiter_compute_index_strides(NpyIter * iter,npy_uint32 flags)2053 npyiter_compute_index_strides(NpyIter *iter, npy_uint32 flags)
2054 {
2055 npy_uint32 itflags = NIT_ITFLAGS(iter);
2056 int idim, ndim = NIT_NDIM(iter);
2057 int nop = NIT_NOP(iter);
2058
2059 npy_intp indexstride;
2060 NpyIter_AxisData *axisdata;
2061 npy_intp sizeof_axisdata;
2062
2063 /*
2064 * If there is only one element being iterated, we just have
2065 * to touch the first AXISDATA because nothing will ever be
2066 * incremented. This also initializes the data for the 0-d case.
2067 */
2068 if (NIT_ITERSIZE(iter) == 1) {
2069 if (itflags & NPY_ITFLAG_HASINDEX) {
2070 axisdata = NIT_AXISDATA(iter);
2071 NAD_PTRS(axisdata)[nop] = 0;
2072 }
2073 return;
2074 }
2075
2076 if (flags & NPY_ITER_C_INDEX) {
2077 sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
2078 axisdata = NIT_AXISDATA(iter);
2079 indexstride = 1;
2080 for(idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
2081 npy_intp shape = NAD_SHAPE(axisdata);
2082
2083 if (shape == 1) {
2084 NAD_STRIDES(axisdata)[nop] = 0;
2085 }
2086 else {
2087 NAD_STRIDES(axisdata)[nop] = indexstride;
2088 }
2089 NAD_PTRS(axisdata)[nop] = 0;
2090 indexstride *= shape;
2091 }
2092 }
2093 else if (flags & NPY_ITER_F_INDEX) {
2094 sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
2095 axisdata = NIT_INDEX_AXISDATA(NIT_AXISDATA(iter), ndim-1);
2096 indexstride = 1;
2097 for(idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, -1)) {
2098 npy_intp shape = NAD_SHAPE(axisdata);
2099
2100 if (shape == 1) {
2101 NAD_STRIDES(axisdata)[nop] = 0;
2102 }
2103 else {
2104 NAD_STRIDES(axisdata)[nop] = indexstride;
2105 }
2106 NAD_PTRS(axisdata)[nop] = 0;
2107 indexstride *= shape;
2108 }
2109 }
2110 }
2111
2112 /*
2113 * If the order is NPY_KEEPORDER, lets the iterator find the best
2114 * iteration order, otherwise forces it. Indicates in the itflags that
2115 * whether the iteration order was forced.
2116 */
2117 static void
npyiter_apply_forced_iteration_order(NpyIter * iter,NPY_ORDER order)2118 npyiter_apply_forced_iteration_order(NpyIter *iter, NPY_ORDER order)
2119 {
2120 /*npy_uint32 itflags = NIT_ITFLAGS(iter);*/
2121 int ndim = NIT_NDIM(iter);
2122 int iop, nop = NIT_NOP(iter);
2123
2124 switch (order) {
2125 case NPY_CORDER:
2126 NIT_ITFLAGS(iter) |= NPY_ITFLAG_FORCEDORDER;
2127 break;
2128 case NPY_FORTRANORDER:
2129 NIT_ITFLAGS(iter) |= NPY_ITFLAG_FORCEDORDER;
2130 /* Only need to actually do something if there is more than 1 dim */
2131 if (ndim > 1) {
2132 npyiter_reverse_axis_ordering(iter);
2133 }
2134 break;
2135 case NPY_ANYORDER:
2136 NIT_ITFLAGS(iter) |= NPY_ITFLAG_FORCEDORDER;
2137 /* Only need to actually do something if there is more than 1 dim */
2138 if (ndim > 1) {
2139 PyArrayObject **op = NIT_OPERANDS(iter);
2140 int forder = 1;
2141
2142 /* Check that all the array inputs are fortran order */
2143 for (iop = 0; iop < nop; ++iop, ++op) {
2144 if (*op && !PyArray_CHKFLAGS(*op, NPY_ARRAY_F_CONTIGUOUS)) {
2145 forder = 0;
2146 break;
2147 }
2148 }
2149
2150 if (forder) {
2151 npyiter_reverse_axis_ordering(iter);
2152 }
2153 }
2154 break;
2155 case NPY_KEEPORDER:
2156 /* Don't set the forced order flag here... */
2157 break;
2158 }
2159 }
2160
2161 /*
2162 * This function negates any strides in the iterator
2163 * which are negative. When iterating more than one
2164 * object, it only flips strides when they are all
2165 * negative or zero.
2166 */
2167 static void
npyiter_flip_negative_strides(NpyIter * iter)2168 npyiter_flip_negative_strides(NpyIter *iter)
2169 {
2170 npy_uint32 itflags = NIT_ITFLAGS(iter);
2171 int idim, ndim = NIT_NDIM(iter);
2172 int iop, nop = NIT_NOP(iter);
2173
2174 npy_intp istrides, nstrides = NAD_NSTRIDES();
2175 NpyIter_AxisData *axisdata, *axisdata0;
2176 npy_intp *baseoffsets;
2177 npy_intp sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
2178 int any_flipped = 0;
2179
2180 axisdata0 = axisdata = NIT_AXISDATA(iter);
2181 baseoffsets = NIT_BASEOFFSETS(iter);
2182 for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
2183 npy_intp *strides = NAD_STRIDES(axisdata);
2184 int any_negative = 0;
2185
2186 /*
2187 * Check the signs of all the operand strides.
2188 */
2189 for (iop = 0; iop < nop; ++iop) {
2190 if (strides[iop] < 0) {
2191 any_negative = 1;
2192 }
2193 else if (strides[iop] != 0) {
2194 break;
2195 }
2196 }
2197 /*
2198 * If at least one stride is negative and none are positive,
2199 * flip all the strides for this dimension.
2200 */
2201 if (any_negative && iop == nop) {
2202 npy_intp shapem1 = NAD_SHAPE(axisdata) - 1;
2203
2204 for (istrides = 0; istrides < nstrides; ++istrides) {
2205 npy_intp stride = strides[istrides];
2206
2207 /* Adjust the base pointers to start at the end */
2208 baseoffsets[istrides] += shapem1 * stride;
2209 /* Flip the stride */
2210 strides[istrides] = -stride;
2211 }
2212 /*
2213 * Make the perm entry negative so get_multi_index
2214 * knows it's flipped
2215 */
2216 NIT_PERM(iter)[idim] = -1-NIT_PERM(iter)[idim];
2217
2218 any_flipped = 1;
2219 }
2220 }
2221
2222 /*
2223 * If any strides were flipped, the base pointers were adjusted
2224 * in the first AXISDATA, and need to be copied to all the rest
2225 */
2226 if (any_flipped) {
2227 char **resetdataptr = NIT_RESETDATAPTR(iter);
2228
2229 for (istrides = 0; istrides < nstrides; ++istrides) {
2230 resetdataptr[istrides] += baseoffsets[istrides];
2231 }
2232 axisdata = axisdata0;
2233 for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
2234 char **ptrs = NAD_PTRS(axisdata);
2235 for (istrides = 0; istrides < nstrides; ++istrides) {
2236 ptrs[istrides] = resetdataptr[istrides];
2237 }
2238 }
2239 /*
2240 * Indicate that some of the perm entries are negative,
2241 * and that it's not (strictly speaking) the identity perm.
2242 */
2243 NIT_ITFLAGS(iter) = (NIT_ITFLAGS(iter)|NPY_ITFLAG_NEGPERM) &
2244 ~NPY_ITFLAG_IDENTPERM;
2245 }
2246 }
2247
2248 static void
npyiter_reverse_axis_ordering(NpyIter * iter)2249 npyiter_reverse_axis_ordering(NpyIter *iter)
2250 {
2251 npy_uint32 itflags = NIT_ITFLAGS(iter);
2252 int ndim = NIT_NDIM(iter);
2253 int nop = NIT_NOP(iter);
2254
2255 npy_intp i, temp, size;
2256 npy_intp *first, *last;
2257 npy_int8 *perm;
2258
2259 size = NIT_AXISDATA_SIZEOF(itflags, ndim, nop)/NPY_SIZEOF_INTP;
2260 first = (npy_intp*)NIT_AXISDATA(iter);
2261 last = first + (ndim-1)*size;
2262
2263 /* This loop reverses the order of the AXISDATA array */
2264 while (first < last) {
2265 for (i = 0; i < size; ++i) {
2266 temp = first[i];
2267 first[i] = last[i];
2268 last[i] = temp;
2269 }
2270 first += size;
2271 last -= size;
2272 }
2273
2274 /* Store the perm we applied */
2275 perm = NIT_PERM(iter);
2276 for(i = ndim-1; i >= 0; --i, ++perm) {
2277 *perm = (npy_int8)i;
2278 }
2279
2280 NIT_ITFLAGS(iter) &= ~NPY_ITFLAG_IDENTPERM;
2281 }
2282
2283 static NPY_INLINE npy_intp
intp_abs(npy_intp x)2284 intp_abs(npy_intp x)
2285 {
2286 return (x < 0) ? -x : x;
2287 }
2288
2289 static void
npyiter_find_best_axis_ordering(NpyIter * iter)2290 npyiter_find_best_axis_ordering(NpyIter *iter)
2291 {
2292 npy_uint32 itflags = NIT_ITFLAGS(iter);
2293 int idim, ndim = NIT_NDIM(iter);
2294 int iop, nop = NIT_NOP(iter);
2295
2296 npy_intp ax_i0, ax_i1, ax_ipos;
2297 npy_int8 ax_j0, ax_j1;
2298 npy_int8 *perm;
2299 NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
2300 npy_intp sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
2301 int permuted = 0;
2302
2303 perm = NIT_PERM(iter);
2304
2305 /*
2306 * Do a custom stable insertion sort. Note that because
2307 * the AXISDATA has been reversed from C order, this
2308 * is sorting from smallest stride to biggest stride.
2309 */
2310 for (ax_i0 = 1; ax_i0 < ndim; ++ax_i0) {
2311 npy_intp *strides0;
2312
2313 /* 'ax_ipos' is where perm[ax_i0] will get inserted */
2314 ax_ipos = ax_i0;
2315 ax_j0 = perm[ax_i0];
2316
2317 strides0 = NAD_STRIDES(NIT_INDEX_AXISDATA(axisdata, ax_j0));
2318 for (ax_i1 = ax_i0-1; ax_i1 >= 0; --ax_i1) {
2319 int ambig = 1, shouldswap = 0;
2320 npy_intp *strides1;
2321
2322 ax_j1 = perm[ax_i1];
2323
2324 strides1 = NAD_STRIDES(NIT_INDEX_AXISDATA(axisdata, ax_j1));
2325
2326 for (iop = 0; iop < nop; ++iop) {
2327 if (strides0[iop] != 0 && strides1[iop] != 0) {
2328 if (intp_abs(strides1[iop]) <=
2329 intp_abs(strides0[iop])) {
2330 /*
2331 * Set swap even if it's not ambiguous already,
2332 * because in the case of conflicts between
2333 * different operands, C-order wins.
2334 */
2335 shouldswap = 0;
2336 }
2337 else {
2338 /* Only set swap if it's still ambiguous */
2339 if (ambig) {
2340 shouldswap = 1;
2341 }
2342 }
2343
2344 /*
2345 * A comparison has been done, so it's
2346 * no longer ambiguous
2347 */
2348 ambig = 0;
2349 }
2350 }
2351 /*
2352 * If the comparison was unambiguous, either shift
2353 * 'ax_ipos' to 'ax_i1' or stop looking for an insertion
2354 * point
2355 */
2356 if (!ambig) {
2357 if (shouldswap) {
2358 ax_ipos = ax_i1;
2359 }
2360 else {
2361 break;
2362 }
2363 }
2364 }
2365
2366 /* Insert perm[ax_i0] into the right place */
2367 if (ax_ipos != ax_i0) {
2368 for (ax_i1 = ax_i0; ax_i1 > ax_ipos; --ax_i1) {
2369 perm[ax_i1] = perm[ax_i1-1];
2370 }
2371 perm[ax_ipos] = ax_j0;
2372 permuted = 1;
2373 }
2374 }
2375
2376 /* Apply the computed permutation to the AXISDATA array */
2377 if (permuted == 1) {
2378 npy_intp i, size = sizeof_axisdata/NPY_SIZEOF_INTP;
2379 NpyIter_AxisData *ad_i;
2380
2381 /* Use the index as a flag, set each to 1 */
2382 ad_i = axisdata;
2383 for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(ad_i, 1)) {
2384 NAD_INDEX(ad_i) = 1;
2385 }
2386 /* Apply the permutation by following the cycles */
2387 for (idim = 0; idim < ndim; ++idim) {
2388 ad_i = NIT_INDEX_AXISDATA(axisdata, idim);
2389
2390 /* If this axis hasn't been touched yet, process it */
2391 if (NAD_INDEX(ad_i) == 1) {
2392 npy_int8 pidim = perm[idim];
2393 npy_intp tmp;
2394 NpyIter_AxisData *ad_p, *ad_q;
2395
2396 if (pidim != idim) {
2397 /* Follow the cycle, copying the data */
2398 for (i = 0; i < size; ++i) {
2399 pidim = perm[idim];
2400 ad_q = ad_i;
2401 tmp = *((npy_intp*)ad_q + i);
2402 while (pidim != idim) {
2403 ad_p = NIT_INDEX_AXISDATA(axisdata, pidim);
2404 *((npy_intp*)ad_q + i) = *((npy_intp*)ad_p + i);
2405
2406 ad_q = ad_p;
2407 pidim = perm[(int)pidim];
2408 }
2409 *((npy_intp*)ad_q + i) = tmp;
2410 }
2411 /* Follow the cycle again, marking it as done */
2412 pidim = perm[idim];
2413
2414 while (pidim != idim) {
2415 NAD_INDEX(NIT_INDEX_AXISDATA(axisdata, pidim)) = 0;
2416 pidim = perm[(int)pidim];
2417 }
2418 }
2419 NAD_INDEX(ad_i) = 0;
2420 }
2421 }
2422 /* Clear the identity perm flag */
2423 NIT_ITFLAGS(iter) &= ~NPY_ITFLAG_IDENTPERM;
2424 }
2425 }
2426
2427 /*
2428 * Calculates a dtype that all the types can be promoted to, using the
2429 * ufunc rules. If only_inputs is 1, it leaves any operands that
2430 * are not read from out of the calculation.
2431 */
2432 static PyArray_Descr *
npyiter_get_common_dtype(int nop,PyArrayObject ** op,const npyiter_opitflags * op_itflags,PyArray_Descr ** op_dtype,PyArray_Descr ** op_request_dtypes,int only_inputs)2433 npyiter_get_common_dtype(int nop, PyArrayObject **op,
2434 const npyiter_opitflags *op_itflags, PyArray_Descr **op_dtype,
2435 PyArray_Descr **op_request_dtypes,
2436 int only_inputs)
2437 {
2438 int iop;
2439 npy_intp narrs = 0, ndtypes = 0;
2440 PyArrayObject *arrs[NPY_MAXARGS];
2441 PyArray_Descr *dtypes[NPY_MAXARGS];
2442 PyArray_Descr *ret;
2443
2444 NPY_IT_DBG_PRINT("Iterator: Getting a common data type from operands\n");
2445
2446 for (iop = 0; iop < nop; ++iop) {
2447 if (op_dtype[iop] != NULL &&
2448 (!only_inputs || (op_itflags[iop] & NPY_OP_ITFLAG_READ))) {
2449 /* If no dtype was requested and the op is a scalar, pass the op */
2450 if ((op_request_dtypes == NULL ||
2451 op_request_dtypes[iop] == NULL) &&
2452 PyArray_NDIM(op[iop]) == 0) {
2453 arrs[narrs++] = op[iop];
2454 }
2455 /* Otherwise just pass in the dtype */
2456 else {
2457 dtypes[ndtypes++] = op_dtype[iop];
2458 }
2459 }
2460 }
2461
2462 if (narrs == 0) {
2463 npy_intp i;
2464 ret = dtypes[0];
2465 for (i = 1; i < ndtypes; ++i) {
2466 if (ret != dtypes[i])
2467 break;
2468 }
2469 if (i == ndtypes) {
2470 if (ndtypes == 1 || PyArray_ISNBO(ret->byteorder)) {
2471 Py_INCREF(ret);
2472 }
2473 else {
2474 ret = PyArray_DescrNewByteorder(ret, NPY_NATIVE);
2475 }
2476 }
2477 else {
2478 ret = PyArray_ResultType(narrs, arrs, ndtypes, dtypes);
2479 }
2480 }
2481 else {
2482 ret = PyArray_ResultType(narrs, arrs, ndtypes, dtypes);
2483 }
2484
2485 return ret;
2486 }
2487
2488 /*
2489 * Allocates a temporary array which can be used to replace op
2490 * in the iteration. Its dtype will be op_dtype.
2491 *
2492 * The result array has a memory ordering which matches the iterator,
2493 * which may or may not match that of op. The parameter 'shape' may be
2494 * NULL, in which case it is filled in from the iterator's shape.
2495 *
2496 * This function must be called before any axes are coalesced.
2497 */
2498 static PyArrayObject *
npyiter_new_temp_array(NpyIter * iter,PyTypeObject * subtype,npy_uint32 flags,npyiter_opitflags * op_itflags,int op_ndim,npy_intp const * shape,PyArray_Descr * op_dtype,const int * op_axes)2499 npyiter_new_temp_array(NpyIter *iter, PyTypeObject *subtype,
2500 npy_uint32 flags, npyiter_opitflags *op_itflags,
2501 int op_ndim, npy_intp const *shape,
2502 PyArray_Descr *op_dtype, const int *op_axes)
2503 {
2504 npy_uint32 itflags = NIT_ITFLAGS(iter);
2505 int idim, ndim = NIT_NDIM(iter);
2506 int used_op_ndim;
2507 int nop = NIT_NOP(iter);
2508
2509 npy_int8 *perm = NIT_PERM(iter);
2510 npy_intp new_shape[NPY_MAXDIMS], strides[NPY_MAXDIMS];
2511 npy_intp stride = op_dtype->elsize;
2512 NpyIter_AxisData *axisdata;
2513 npy_intp sizeof_axisdata;
2514 int i;
2515
2516 PyArrayObject *ret;
2517
2518 /*
2519 * There is an interaction with array-dtypes here, which
2520 * generally works. Let's say you make an nditer with an
2521 * output dtype of a 2-double array. All-scalar inputs
2522 * will result in a 1-dimensional output with shape (2).
2523 * Everything still works out in the nditer, because the
2524 * new dimension is always added on the end, and it cares
2525 * about what happens at the beginning.
2526 */
2527
2528 /* If it's a scalar, don't need to check the axes */
2529 if (op_ndim == 0) {
2530 Py_INCREF(op_dtype);
2531 ret = (PyArrayObject *)PyArray_NewFromDescr(subtype, op_dtype, 0,
2532 NULL, NULL, NULL, 0, NULL);
2533
2534 return ret;
2535 }
2536
2537 axisdata = NIT_AXISDATA(iter);
2538 sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
2539
2540 /* Initialize the strides to invalid values */
2541 for (i = 0; i < op_ndim; ++i) {
2542 strides[i] = NPY_MAX_INTP;
2543 }
2544
2545 if (op_axes != NULL) {
2546 used_op_ndim = 0;
2547 for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
2548 npy_bool reduction_axis;
2549
2550 /* Apply the perm to get the original axis */
2551 i = npyiter_undo_iter_axis_perm(idim, ndim, perm, NULL);
2552 i = npyiter_get_op_axis(op_axes[i], &reduction_axis);
2553
2554 if (i >= 0) {
2555 NPY_IT_DBG_PRINT3("Iterator: Setting allocated stride %d "
2556 "for iterator dimension %d to %d\n", (int)i,
2557 (int)idim, (int)stride);
2558 used_op_ndim += 1;
2559 strides[i] = stride;
2560 if (shape == NULL) {
2561 if (reduction_axis) {
2562 /* reduction axes always have a length of 1 */
2563 new_shape[i] = 1;
2564 }
2565 else {
2566 new_shape[i] = NAD_SHAPE(axisdata);
2567 }
2568 stride *= new_shape[i];
2569 if (i >= ndim) {
2570 PyErr_Format(PyExc_ValueError,
2571 "automatically allocated output array "
2572 "specified with an inconsistent axis mapping; "
2573 "the axis mapping cannot include dimension %d "
2574 "which is too large for the iterator dimension "
2575 "of %d.", i, ndim);
2576 return NULL;
2577 }
2578 }
2579 else {
2580 assert(!reduction_axis || shape[i] == 1);
2581 stride *= shape[i];
2582 }
2583 }
2584 else {
2585 if (shape == NULL) {
2586 /*
2587 * If deleting this axis produces a reduction, but
2588 * reduction wasn't enabled, throw an error.
2589 * NOTE: We currently always allow new-axis if the iteration
2590 * size is 1 (thus allowing broadcasting sometimes).
2591 */
2592 if (!reduction_axis && NAD_SHAPE(axisdata) != 1) {
2593 if (!npyiter_check_reduce_ok_and_set_flags(
2594 iter, flags, op_itflags, i)) {
2595 return NULL;
2596 }
2597 }
2598 }
2599 }
2600 }
2601 }
2602 else {
2603 used_op_ndim = ndim;
2604 for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
2605 /* Apply the perm to get the original axis */
2606 i = npyiter_undo_iter_axis_perm(idim, op_ndim, perm, NULL);
2607
2608 if (i >= 0) {
2609 NPY_IT_DBG_PRINT3("Iterator: Setting allocated stride %d "
2610 "for iterator dimension %d to %d\n", (int)i,
2611 (int)idim, (int)stride);
2612 strides[i] = stride;
2613 if (shape == NULL) {
2614 new_shape[i] = NAD_SHAPE(axisdata);
2615 stride *= new_shape[i];
2616 }
2617 else {
2618 stride *= shape[i];
2619 }
2620 }
2621 }
2622 }
2623
2624 if (shape == NULL) {
2625 /* If shape was NULL, use the shape we calculated */
2626 op_ndim = used_op_ndim;
2627 shape = new_shape;
2628 /*
2629 * If there's a gap in the array's dimensions, it's an error.
2630 * For instance, if op_axes [0, 2] is specified, there will a place
2631 * in the strides array where the value is not set.
2632 */
2633 for (i = 0; i < op_ndim; i++) {
2634 if (strides[i] == NPY_MAX_INTP) {
2635 PyErr_Format(PyExc_ValueError,
2636 "automatically allocated output array "
2637 "specified with an inconsistent axis mapping; "
2638 "the axis mapping is missing an entry for "
2639 "dimension %d.", i);
2640 return NULL;
2641 }
2642 }
2643 }
2644 else if (used_op_ndim < op_ndim) {
2645 /*
2646 * If custom axes were specified, some dimensions may not have
2647 * been used. These are additional axes which are ignored in the
2648 * iterator but need to be handled here.
2649 */
2650 npy_intp factor, itemsize, new_strides[NPY_MAXDIMS];
2651
2652 /* Fill in the missing strides in C order */
2653 factor = 1;
2654 itemsize = op_dtype->elsize;
2655 for (i = op_ndim-1; i >= 0; --i) {
2656 if (strides[i] == NPY_MAX_INTP) {
2657 new_strides[i] = factor * itemsize;
2658 factor *= shape[i];
2659 }
2660 }
2661
2662 /*
2663 * Copy the missing strides, and multiply the existing strides
2664 * by the calculated factor. This way, the missing strides
2665 * are tighter together in memory, which is good for nested
2666 * loops.
2667 */
2668 for (i = 0; i < op_ndim; ++i) {
2669 if (strides[i] == NPY_MAX_INTP) {
2670 strides[i] = new_strides[i];
2671 }
2672 else {
2673 strides[i] *= factor;
2674 }
2675 }
2676 }
2677
2678 /* Allocate the temporary array */
2679 Py_INCREF(op_dtype);
2680 ret = (PyArrayObject *)PyArray_NewFromDescr(subtype, op_dtype, op_ndim,
2681 shape, strides, NULL, 0, NULL);
2682 if (ret == NULL) {
2683 return NULL;
2684 }
2685
2686 /* Double-check that the subtype didn't mess with the dimensions */
2687 if (subtype != &PyArray_Type) {
2688 /*
2689 * TODO: the dtype could have a subarray, which adds new dimensions
2690 * to `ret`, that should typically be fine, but will break
2691 * in this branch.
2692 */
2693 if (PyArray_NDIM(ret) != op_ndim ||
2694 !PyArray_CompareLists(shape, PyArray_DIMS(ret), op_ndim)) {
2695 PyErr_SetString(PyExc_RuntimeError,
2696 "Iterator automatic output has an array subtype "
2697 "which changed the dimensions of the output");
2698 Py_DECREF(ret);
2699 return NULL;
2700 }
2701 }
2702
2703 return ret;
2704 }
2705
2706 static int
npyiter_allocate_arrays(NpyIter * iter,npy_uint32 flags,PyArray_Descr ** op_dtype,PyTypeObject * subtype,const npy_uint32 * op_flags,npyiter_opitflags * op_itflags,int ** op_axes)2707 npyiter_allocate_arrays(NpyIter *iter,
2708 npy_uint32 flags,
2709 PyArray_Descr **op_dtype, PyTypeObject *subtype,
2710 const npy_uint32 *op_flags, npyiter_opitflags *op_itflags,
2711 int **op_axes)
2712 {
2713 npy_uint32 itflags = NIT_ITFLAGS(iter);
2714 int idim, ndim = NIT_NDIM(iter);
2715 int iop, nop = NIT_NOP(iter);
2716
2717 int check_writemasked_reductions = 0;
2718
2719 NpyIter_BufferData *bufferdata = NULL;
2720 PyArrayObject **op = NIT_OPERANDS(iter);
2721
2722 if (itflags & NPY_ITFLAG_BUFFER) {
2723 bufferdata = NIT_BUFFERDATA(iter);
2724 }
2725
2726 if (flags & NPY_ITER_COPY_IF_OVERLAP) {
2727 /*
2728 * Perform operand memory overlap checks, if requested.
2729 *
2730 * If any write operand has memory overlap with any read operand,
2731 * eliminate all overlap by making temporary copies, by enabling
2732 * NPY_OP_ITFLAG_FORCECOPY for the write operand to force WRITEBACKIFCOPY.
2733 *
2734 * Operands with NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE enabled are not
2735 * considered overlapping if the arrays are exactly the same. In this
2736 * case, the iterator loops through them in the same order element by
2737 * element. (As usual, the user-provided inner loop is assumed to be
2738 * able to deal with this level of simple aliasing.)
2739 */
2740 for (iop = 0; iop < nop; ++iop) {
2741 int may_share_memory = 0;
2742 int iother;
2743
2744 if (op[iop] == NULL) {
2745 /* Iterator will always allocate */
2746 continue;
2747 }
2748
2749 if (!(op_itflags[iop] & NPY_OP_ITFLAG_WRITE)) {
2750 /*
2751 * Copy output operands only, not inputs.
2752 * A more sophisticated heuristic could be
2753 * substituted here later.
2754 */
2755 continue;
2756 }
2757
2758 for (iother = 0; iother < nop; ++iother) {
2759 if (iother == iop || op[iother] == NULL) {
2760 continue;
2761 }
2762
2763 if (!(op_itflags[iother] & NPY_OP_ITFLAG_READ)) {
2764 /* No data dependence for arrays not read from */
2765 continue;
2766 }
2767
2768 if (op_itflags[iother] & NPY_OP_ITFLAG_FORCECOPY) {
2769 /* Already copied */
2770 continue;
2771 }
2772
2773 /*
2774 * If the arrays are views to exactly the same data, no need
2775 * to make copies, if the caller (eg ufunc) says it accesses
2776 * data only in the iterator order.
2777 *
2778 * However, if there is internal overlap (e.g. a zero stride on
2779 * a non-unit dimension), a copy cannot be avoided.
2780 */
2781 if ((op_flags[iop] & NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE) &&
2782 (op_flags[iother] & NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE) &&
2783 PyArray_BYTES(op[iop]) == PyArray_BYTES(op[iother]) &&
2784 PyArray_NDIM(op[iop]) == PyArray_NDIM(op[iother]) &&
2785 PyArray_CompareLists(PyArray_DIMS(op[iop]),
2786 PyArray_DIMS(op[iother]),
2787 PyArray_NDIM(op[iop])) &&
2788 PyArray_CompareLists(PyArray_STRIDES(op[iop]),
2789 PyArray_STRIDES(op[iother]),
2790 PyArray_NDIM(op[iop])) &&
2791 PyArray_DESCR(op[iop]) == PyArray_DESCR(op[iother]) &&
2792 solve_may_have_internal_overlap(op[iop], 1) == 0) {
2793
2794 continue;
2795 }
2796
2797 /*
2798 * Use max work = 1. If the arrays are large, it might
2799 * make sense to go further.
2800 */
2801 may_share_memory = solve_may_share_memory(op[iop],
2802 op[iother],
2803 1);
2804
2805 if (may_share_memory) {
2806 op_itflags[iop] |= NPY_OP_ITFLAG_FORCECOPY;
2807 break;
2808 }
2809 }
2810 }
2811 }
2812
2813 for (iop = 0; iop < nop; ++iop) {
2814 /*
2815 * Check whether there are any WRITEMASKED REDUCE operands
2816 * which should be validated after all the strides are filled
2817 * in.
2818 */
2819 if ((op_itflags[iop] &
2820 (NPY_OP_ITFLAG_WRITEMASKED | NPY_OP_ITFLAG_REDUCE)) ==
2821 (NPY_OP_ITFLAG_WRITEMASKED | NPY_OP_ITFLAG_REDUCE)) {
2822 check_writemasked_reductions = 1;
2823 }
2824
2825 /* NULL means an output the iterator should allocate */
2826 if (op[iop] == NULL) {
2827 PyArrayObject *out;
2828 PyTypeObject *op_subtype;
2829
2830 /* Check whether the subtype was disabled */
2831 op_subtype = (op_flags[iop] & NPY_ITER_NO_SUBTYPE) ?
2832 &PyArray_Type : subtype;
2833
2834 /*
2835 * Allocate the output array.
2836 *
2837 * Note that here, ndim is always correct if no op_axes was given
2838 * (but the actual dimension of op can be larger). If op_axes
2839 * is given, ndim is not actually used.
2840 */
2841 out = npyiter_new_temp_array(iter, op_subtype,
2842 flags, &op_itflags[iop],
2843 ndim,
2844 NULL,
2845 op_dtype[iop],
2846 op_axes ? op_axes[iop] : NULL);
2847 if (out == NULL) {
2848 return 0;
2849 }
2850
2851 op[iop] = out;
2852
2853 /*
2854 * Now we need to replace the pointers and strides with values
2855 * from the new array.
2856 */
2857 npyiter_replace_axisdata(iter, iop, op[iop], ndim,
2858 op_axes ? op_axes[iop] : NULL);
2859
2860 /*
2861 * New arrays are guaranteed true-aligned, but copy/cast code
2862 * needs uint-alignment in addition.
2863 */
2864 if (IsUintAligned(out)) {
2865 op_itflags[iop] |= NPY_OP_ITFLAG_ALIGNED;
2866 }
2867 /* New arrays need no cast */
2868 op_itflags[iop] &= ~NPY_OP_ITFLAG_CAST;
2869 }
2870 /*
2871 * If casting is required, the operand is read-only, and
2872 * it's an array scalar, make a copy whether or not the
2873 * copy flag is enabled.
2874 */
2875 else if ((op_itflags[iop] & (NPY_OP_ITFLAG_CAST |
2876 NPY_OP_ITFLAG_READ |
2877 NPY_OP_ITFLAG_WRITE)) == (NPY_OP_ITFLAG_CAST |
2878 NPY_OP_ITFLAG_READ) &&
2879 PyArray_NDIM(op[iop]) == 0) {
2880 PyArrayObject *temp;
2881 Py_INCREF(op_dtype[iop]);
2882 temp = (PyArrayObject *)PyArray_NewFromDescr(
2883 &PyArray_Type, op_dtype[iop],
2884 0, NULL, NULL, NULL, 0, NULL);
2885 if (temp == NULL) {
2886 return 0;
2887 }
2888 if (PyArray_CopyInto(temp, op[iop]) != 0) {
2889 Py_DECREF(temp);
2890 return 0;
2891 }
2892 Py_DECREF(op[iop]);
2893 op[iop] = temp;
2894
2895 /*
2896 * Now we need to replace the pointers and strides with values
2897 * from the temporary array.
2898 */
2899 npyiter_replace_axisdata(iter, iop, op[iop], 0, NULL);
2900
2901 /*
2902 * New arrays are guaranteed true-aligned, but copy/cast code
2903 * needs uint-alignment in addition.
2904 */
2905 if (IsUintAligned(temp)) {
2906 op_itflags[iop] |= NPY_OP_ITFLAG_ALIGNED;
2907 }
2908 /*
2909 * New arrays need no cast, and in the case
2910 * of scalars, always have stride 0 so never need buffering
2911 */
2912 op_itflags[iop] |= NPY_OP_ITFLAG_BUFNEVER;
2913 op_itflags[iop] &= ~NPY_OP_ITFLAG_CAST;
2914 if (itflags & NPY_ITFLAG_BUFFER) {
2915 NBF_STRIDES(bufferdata)[iop] = 0;
2916 }
2917 }
2918 /*
2919 * Make a temporary copy if,
2920 * 1. If casting is required and permitted, or,
2921 * 2. If force-copy is requested
2922 */
2923 else if (((op_itflags[iop] & NPY_OP_ITFLAG_CAST) &&
2924 (op_flags[iop] &
2925 (NPY_ITER_COPY|NPY_ITER_UPDATEIFCOPY))) ||
2926 (op_itflags[iop] & NPY_OP_ITFLAG_FORCECOPY)) {
2927 PyArrayObject *temp;
2928 int ondim = PyArray_NDIM(op[iop]);
2929
2930 /* Allocate the temporary array, if possible */
2931 temp = npyiter_new_temp_array(iter, &PyArray_Type,
2932 flags, &op_itflags[iop],
2933 ondim,
2934 PyArray_DIMS(op[iop]),
2935 op_dtype[iop],
2936 op_axes ? op_axes[iop] : NULL);
2937 if (temp == NULL) {
2938 return 0;
2939 }
2940
2941 /*
2942 * If the data will be read, copy it into temp.
2943 * TODO: It might be possible to do a view into
2944 * op[iop]'s mask instead here.
2945 */
2946 if (op_itflags[iop] & NPY_OP_ITFLAG_READ) {
2947 if (PyArray_CopyInto(temp, op[iop]) != 0) {
2948 Py_DECREF(temp);
2949 return 0;
2950 }
2951 }
2952 /* If the data will be written to, set WRITEBACKIFCOPY
2953 and require a context manager */
2954 if (op_itflags[iop] & NPY_OP_ITFLAG_WRITE) {
2955 Py_INCREF(op[iop]);
2956 if (PyArray_SetWritebackIfCopyBase(temp, op[iop]) < 0) {
2957 Py_DECREF(temp);
2958 return 0;
2959 }
2960 op_itflags[iop] |= NPY_OP_ITFLAG_HAS_WRITEBACK;
2961 }
2962
2963 Py_DECREF(op[iop]);
2964 op[iop] = temp;
2965
2966 /*
2967 * Now we need to replace the pointers and strides with values
2968 * from the temporary array.
2969 */
2970 npyiter_replace_axisdata(iter, iop, op[iop], ondim,
2971 op_axes ? op_axes[iop] : NULL);
2972
2973 /*
2974 * New arrays are guaranteed true-aligned, but copy/cast code
2975 * additionally needs uint-alignment in addition.
2976 */
2977 if (IsUintAligned(temp)) {
2978 op_itflags[iop] |= NPY_OP_ITFLAG_ALIGNED;
2979 }
2980 /* The temporary copy needs no cast */
2981 op_itflags[iop] &= ~NPY_OP_ITFLAG_CAST;
2982 }
2983 else {
2984 /*
2985 * Buffering must be enabled for casting/conversion if copy
2986 * wasn't specified.
2987 */
2988 if ((op_itflags[iop] & NPY_OP_ITFLAG_CAST) &&
2989 !(itflags & NPY_ITFLAG_BUFFER)) {
2990 PyErr_SetString(PyExc_TypeError,
2991 "Iterator operand required copying or buffering, "
2992 "but neither copying nor buffering was enabled");
2993 return 0;
2994 }
2995
2996 /*
2997 * If the operand is aligned, any buffering can use aligned
2998 * optimizations.
2999 */
3000 if (IsUintAligned(op[iop])) {
3001 op_itflags[iop] |= NPY_OP_ITFLAG_ALIGNED;
3002 }
3003 }
3004
3005 /* Here we can finally check for contiguous iteration */
3006 if (op_flags[iop] & NPY_ITER_CONTIG) {
3007 NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
3008 npy_intp stride = NAD_STRIDES(axisdata)[iop];
3009
3010 if (stride != op_dtype[iop]->elsize) {
3011 NPY_IT_DBG_PRINT("Iterator: Setting NPY_OP_ITFLAG_CAST "
3012 "because of NPY_ITER_CONTIG\n");
3013 op_itflags[iop] |= NPY_OP_ITFLAG_CAST;
3014 if (!(itflags & NPY_ITFLAG_BUFFER)) {
3015 PyErr_SetString(PyExc_TypeError,
3016 "Iterator operand required buffering, "
3017 "to be contiguous as requested, but "
3018 "buffering is not enabled");
3019 return 0;
3020 }
3021 }
3022 }
3023
3024 /*
3025 * If no alignment, byte swap, or casting is needed,
3026 * the inner stride of this operand works for the whole
3027 * array, we can set NPY_OP_ITFLAG_BUFNEVER.
3028 */
3029 if ((itflags & NPY_ITFLAG_BUFFER) &&
3030 !(op_itflags[iop] & NPY_OP_ITFLAG_CAST)) {
3031 NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
3032 if (ndim <= 1) {
3033 op_itflags[iop] |= NPY_OP_ITFLAG_BUFNEVER;
3034 NBF_STRIDES(bufferdata)[iop] = NAD_STRIDES(axisdata)[iop];
3035 }
3036 else if (PyArray_NDIM(op[iop]) > 0) {
3037 npy_intp stride, shape, innerstride = 0, innershape;
3038 npy_intp sizeof_axisdata =
3039 NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
3040 /* Find stride of the first non-empty shape */
3041 for (idim = 0; idim < ndim; ++idim) {
3042 innershape = NAD_SHAPE(axisdata);
3043 if (innershape != 1) {
3044 innerstride = NAD_STRIDES(axisdata)[iop];
3045 break;
3046 }
3047 NIT_ADVANCE_AXISDATA(axisdata, 1);
3048 }
3049 ++idim;
3050 NIT_ADVANCE_AXISDATA(axisdata, 1);
3051 /* Check that everything could have coalesced together */
3052 for (; idim < ndim; ++idim) {
3053 stride = NAD_STRIDES(axisdata)[iop];
3054 shape = NAD_SHAPE(axisdata);
3055 if (shape != 1) {
3056 /*
3057 * If N times the inner stride doesn't equal this
3058 * stride, the multi-dimensionality is needed.
3059 */
3060 if (innerstride*innershape != stride) {
3061 break;
3062 }
3063 else {
3064 innershape *= shape;
3065 }
3066 }
3067 NIT_ADVANCE_AXISDATA(axisdata, 1);
3068 }
3069 /*
3070 * If we looped all the way to the end, one stride works.
3071 * Set that stride, because it may not belong to the first
3072 * dimension.
3073 */
3074 if (idim == ndim) {
3075 op_itflags[iop] |= NPY_OP_ITFLAG_BUFNEVER;
3076 NBF_STRIDES(bufferdata)[iop] = innerstride;
3077 }
3078 }
3079 }
3080 }
3081
3082 if (check_writemasked_reductions) {
3083 for (iop = 0; iop < nop; ++iop) {
3084 /*
3085 * Check whether there are any WRITEMASKED REDUCE operands
3086 * which should be validated now that all the strides are filled
3087 * in.
3088 */
3089 if ((op_itflags[iop] &
3090 (NPY_OP_ITFLAG_WRITEMASKED | NPY_OP_ITFLAG_REDUCE)) ==
3091 (NPY_OP_ITFLAG_WRITEMASKED | NPY_OP_ITFLAG_REDUCE)) {
3092 /*
3093 * If the ARRAYMASK has 'bigger' dimensions
3094 * than this REDUCE WRITEMASKED operand,
3095 * the result would be more than one mask
3096 * value per reduction element, something which
3097 * is invalid. This function provides validation
3098 * for that.
3099 */
3100 if (!check_mask_for_writemasked_reduction(iter, iop)) {
3101 return 0;
3102 }
3103 }
3104 }
3105 }
3106
3107 return 1;
3108 }
3109
3110 /*
3111 * The __array_priority__ attribute of the inputs determines
3112 * the subtype of any output arrays. This function finds the
3113 * subtype of the input array with highest priority.
3114 */
3115 static void
npyiter_get_priority_subtype(int nop,PyArrayObject ** op,const npyiter_opitflags * op_itflags,double * subtype_priority,PyTypeObject ** subtype)3116 npyiter_get_priority_subtype(int nop, PyArrayObject **op,
3117 const npyiter_opitflags *op_itflags,
3118 double *subtype_priority,
3119 PyTypeObject **subtype)
3120 {
3121 int iop;
3122
3123 for (iop = 0; iop < nop; ++iop) {
3124 if (op[iop] != NULL && op_itflags[iop] & NPY_OP_ITFLAG_READ) {
3125 double priority = PyArray_GetPriority((PyObject *)op[iop], 0.0);
3126 if (priority > *subtype_priority) {
3127 *subtype_priority = priority;
3128 *subtype = Py_TYPE(op[iop]);
3129 }
3130 }
3131 }
3132 }
3133
3134 static int
npyiter_allocate_transfer_functions(NpyIter * iter)3135 npyiter_allocate_transfer_functions(NpyIter *iter)
3136 {
3137 npy_uint32 itflags = NIT_ITFLAGS(iter);
3138 /*int ndim = NIT_NDIM(iter);*/
3139 int iop = 0, nop = NIT_NOP(iter);
3140
3141 npy_intp i;
3142 npyiter_opitflags *op_itflags = NIT_OPITFLAGS(iter);
3143 NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter);
3144 NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
3145 PyArrayObject **op = NIT_OPERANDS(iter);
3146 PyArray_Descr **op_dtype = NIT_DTYPES(iter);
3147 npy_intp *strides = NAD_STRIDES(axisdata), op_stride;
3148 PyArray_StridedUnaryOp **readtransferfn = NBF_READTRANSFERFN(bufferdata),
3149 **writetransferfn = NBF_WRITETRANSFERFN(bufferdata);
3150 NpyAuxData **readtransferdata = NBF_READTRANSFERDATA(bufferdata),
3151 **writetransferdata = NBF_WRITETRANSFERDATA(bufferdata);
3152
3153 PyArray_StridedUnaryOp *stransfer = NULL;
3154 NpyAuxData *transferdata = NULL;
3155 int needs_api = 0;
3156
3157 for (iop = 0; iop < nop; ++iop) {
3158 npyiter_opitflags flags = op_itflags[iop];
3159 /*
3160 * Reduction operands may be buffered with a different stride,
3161 * so we must pass NPY_MAX_INTP to the transfer function factory.
3162 */
3163 op_stride = (flags & NPY_OP_ITFLAG_REDUCE) ? NPY_MAX_INTP :
3164 strides[iop];
3165
3166 /*
3167 * If we have determined that a buffer may be needed,
3168 * allocate the appropriate transfer functions
3169 */
3170 if (!(flags & NPY_OP_ITFLAG_BUFNEVER)) {
3171 if (flags & NPY_OP_ITFLAG_READ) {
3172 int move_references = 0;
3173 if (PyArray_GetDTypeTransferFunction(
3174 (flags & NPY_OP_ITFLAG_ALIGNED) != 0,
3175 op_stride,
3176 op_dtype[iop]->elsize,
3177 PyArray_DESCR(op[iop]),
3178 op_dtype[iop],
3179 move_references,
3180 &stransfer,
3181 &transferdata,
3182 &needs_api) != NPY_SUCCEED) {
3183 iop -= 1; /* This one cannot be cleaned up yet. */
3184 goto fail;
3185 }
3186 readtransferfn[iop] = stransfer;
3187 readtransferdata[iop] = transferdata;
3188 }
3189 else {
3190 readtransferfn[iop] = NULL;
3191 }
3192 if (flags & NPY_OP_ITFLAG_WRITE) {
3193 int move_references = 1;
3194
3195 /* If the operand is WRITEMASKED, use a masked transfer fn */
3196 if (flags & NPY_OP_ITFLAG_WRITEMASKED) {
3197 int maskop = NIT_MASKOP(iter);
3198 PyArray_Descr *mask_dtype = PyArray_DESCR(op[maskop]);
3199
3200 /*
3201 * If the mask's stride is contiguous, use it, otherwise
3202 * the mask may or may not be buffered, so the stride
3203 * could be inconsistent.
3204 */
3205 if (PyArray_GetMaskedDTypeTransferFunction(
3206 (flags & NPY_OP_ITFLAG_ALIGNED) != 0,
3207 op_dtype[iop]->elsize,
3208 op_stride,
3209 (strides[maskop] == mask_dtype->elsize) ?
3210 mask_dtype->elsize :
3211 NPY_MAX_INTP,
3212 op_dtype[iop],
3213 PyArray_DESCR(op[iop]),
3214 mask_dtype,
3215 move_references,
3216 (PyArray_MaskedStridedUnaryOp **)&stransfer,
3217 &transferdata,
3218 &needs_api) != NPY_SUCCEED) {
3219 goto fail;
3220 }
3221 }
3222 else {
3223 if (PyArray_GetDTypeTransferFunction(
3224 (flags & NPY_OP_ITFLAG_ALIGNED) != 0,
3225 op_dtype[iop]->elsize,
3226 op_stride,
3227 op_dtype[iop],
3228 PyArray_DESCR(op[iop]),
3229 move_references,
3230 &stransfer,
3231 &transferdata,
3232 &needs_api) != NPY_SUCCEED) {
3233 goto fail;
3234 }
3235 }
3236 writetransferfn[iop] = stransfer;
3237 writetransferdata[iop] = transferdata;
3238 }
3239 /* If no write back but there are references make a decref fn */
3240 else if (PyDataType_REFCHK(op_dtype[iop])) {
3241 /*
3242 * By passing NULL to dst_type and setting move_references
3243 * to 1, we get back a function that just decrements the
3244 * src references.
3245 */
3246 if (PyArray_GetDTypeTransferFunction(
3247 (flags & NPY_OP_ITFLAG_ALIGNED) != 0,
3248 op_dtype[iop]->elsize, 0,
3249 op_dtype[iop], NULL,
3250 1,
3251 &stransfer,
3252 &transferdata,
3253 &needs_api) != NPY_SUCCEED) {
3254 goto fail;
3255 }
3256 writetransferfn[iop] = stransfer;
3257 writetransferdata[iop] = transferdata;
3258 }
3259 else {
3260 writetransferfn[iop] = NULL;
3261 }
3262 }
3263 else {
3264 readtransferfn[iop] = NULL;
3265 writetransferfn[iop] = NULL;
3266 }
3267 }
3268
3269 /* If any of the dtype transfer functions needed the API, flag it */
3270 if (needs_api) {
3271 NIT_ITFLAGS(iter) |= NPY_ITFLAG_NEEDSAPI;
3272 }
3273
3274 return 1;
3275
3276 fail:
3277 for (i = 0; i < iop+1; ++i) {
3278 if (readtransferdata[iop] != NULL) {
3279 NPY_AUXDATA_FREE(readtransferdata[iop]);
3280 readtransferdata[iop] = NULL;
3281 }
3282 if (writetransferdata[iop] != NULL) {
3283 NPY_AUXDATA_FREE(writetransferdata[iop]);
3284 writetransferdata[iop] = NULL;
3285 }
3286 }
3287 return 0;
3288 }
3289
3290 #undef NPY_ITERATOR_IMPLEMENTATION_CODE
3291