1#
2# Copyright (c) 2017 Intel Corporation
3# SPDX-License-Identifier: BSD-2-Clause
4#
5
6
7from math import sqrt
8import numbers
9import re
10import sys
11import dis
12import platform
13import types as pytypes
14import warnings
15from functools import reduce
16import numpy as np
17from numpy.random import randn
18import operator
19from collections import defaultdict, namedtuple
20
21import numba.parfors.parfor
22from numba import njit, prange, set_num_threads, get_num_threads
23from numba.core import (types, utils, typing, errors, ir, rewrites,
24                        typed_passes, inline_closurecall, config, compiler, cpu)
25from numba.extending import (overload_method, register_model,
26                             typeof_impl, unbox, NativeValue, models)
27from numba.core.registry import cpu_target
28from numba.core.annotations import type_annotations
29from numba.core.ir_utils import (find_callname, guard, build_definitions,
30                            get_definition, is_getitem, is_setitem,
31                            index_var_of_get_setitem)
32from numba.np.unsafe.ndarray import empty_inferred as unsafe_empty
33from numba.core.bytecode import ByteCodeIter
34from numba.core.compiler import (compile_isolated, Flags, CompilerBase,
35                                 DefaultPassBuilder)
36from numba.core.compiler_machinery import register_pass, AnalysisPass
37from numba.core.typed_passes import IRLegalization
38from numba.tests.support import (TestCase, captured_stdout, MemoryLeakMixin,
39                      override_env_config, linux_only, tag,
40                      skip_parfors_unsupported, _32bit, needs_blas,
41                      needs_lapack, disabled_test, skip_unless_scipy)
42import cmath
43import unittest
44
45
46x86_only = unittest.skipIf(platform.machine() not in ('i386', 'x86_64'), 'x86 only test')
47
48_GLOBAL_INT_FOR_TESTING1 = 17
49_GLOBAL_INT_FOR_TESTING2 = 5
50
51TestNamedTuple = namedtuple('TestNamedTuple', ('part0', 'part1'))
52
53class TestParforsBase(TestCase):
54    """
55    Base class for testing parfors.
56    Provides functions for compilation and three way comparison between
57    python functions, njit'd functions and parfor njit'd functions.
58    """
59
60    _numba_parallel_test_ = False
61
62    def __init__(self, *args):
63        # flags for njit()
64        self.cflags = Flags()
65        self.cflags.set('nrt')
66
67        # flags for njit(parallel=True)
68        self.pflags = Flags()
69        self.pflags.set('auto_parallel', cpu.ParallelOptions(True))
70        self.pflags.set('nrt')
71
72        # flags for njit(parallel=True, fastmath=True)
73        self.fast_pflags = Flags()
74        self.fast_pflags.set('auto_parallel', cpu.ParallelOptions(True))
75        self.fast_pflags.set('nrt')
76        self.fast_pflags.set('fastmath', cpu.FastMathOptions(True))
77        super(TestParforsBase, self).__init__(*args)
78
79    def _compile_this(self, func, sig, flags):
80        return compile_isolated(func, sig, flags=flags)
81
82    def compile_parallel(self, func, sig):
83        return self._compile_this(func, sig, flags=self.pflags)
84
85    def compile_parallel_fastmath(self, func, sig):
86        return self._compile_this(func, sig, flags=self.fast_pflags)
87
88    def compile_njit(self, func, sig):
89        return self._compile_this(func, sig, flags=self.cflags)
90
91    def compile_all(self, pyfunc, *args, **kwargs):
92        sig = tuple([numba.typeof(x) for x in args])
93
94        # compile the prange injected function
95        cpfunc = self.compile_parallel(pyfunc, sig)
96
97        # compile a standard njit of the original function
98        cfunc = self.compile_njit(pyfunc, sig)
99
100        return cfunc, cpfunc
101
102    def check_parfors_vs_others(self, pyfunc, cfunc, cpfunc, *args, **kwargs):
103        """
104        Checks python, njit and parfor impls produce the same result.
105
106        Arguments:
107            pyfunc - the python function to test
108            cfunc - CompilerResult from njit of pyfunc
109            cpfunc - CompilerResult from njit(parallel=True) of pyfunc
110            args - arguments for the function being tested
111        Keyword Arguments:
112            scheduler_type - 'signed', 'unsigned' or None, default is None.
113                           Supply in cases where the presence of a specific
114                           scheduler is to be asserted.
115            fastmath_pcres - a fastmath parallel compile result, if supplied
116                             will be run to make sure the result is correct
117            Remaining kwargs are passed to np.testing.assert_almost_equal
118        """
119        scheduler_type = kwargs.pop('scheduler_type', None)
120        check_fastmath = kwargs.pop('check_fastmath', None)
121        fastmath_pcres = kwargs.pop('fastmath_pcres', None)
122        check_scheduling = kwargs.pop('check_scheduling', True)
123
124        def copy_args(*args):
125            if not args:
126                return tuple()
127            new_args = []
128            for x in args:
129                if isinstance(x, np.ndarray):
130                    new_args.append(x.copy('k'))
131                elif isinstance(x, np.number):
132                    new_args.append(x.copy())
133                elif isinstance(x, numbers.Number):
134                    new_args.append(x)
135                elif isinstance(x, tuple):
136                    new_args.append(x)
137                elif isinstance(x, list):
138                    new_args.append(x[:])
139                else:
140                    raise ValueError('Unsupported argument type encountered')
141            return tuple(new_args)
142
143        # python result
144        py_expected = pyfunc(*copy_args(*args))
145
146        # njit result
147        njit_output = cfunc.entry_point(*copy_args(*args))
148
149        # parfor result
150        parfor_output = cpfunc.entry_point(*copy_args(*args))
151
152        np.testing.assert_almost_equal(njit_output, py_expected, **kwargs)
153        np.testing.assert_almost_equal(parfor_output, py_expected, **kwargs)
154
155        self.assertEqual(type(njit_output), type(parfor_output))
156
157        if check_scheduling:
158            self.check_scheduling(cpfunc, scheduler_type)
159
160        # if requested check fastmath variant
161        if fastmath_pcres is not None:
162            parfor_fastmath_output = fastmath_pcres.entry_point(*copy_args(*args))
163            np.testing.assert_almost_equal(parfor_fastmath_output, py_expected,
164                                           **kwargs)
165
166
167    def check_scheduling(self, cres, scheduler_type):
168        # make sure parfor set up scheduling
169        scheduler_str = '@do_scheduling'
170        if scheduler_type is not None:
171            if scheduler_type in ['signed', 'unsigned']:
172                scheduler_str += '_' + scheduler_type
173            else:
174                msg = "Unknown scheduler_type specified: %s"
175                raise ValueError(msg % scheduler_type)
176
177        self.assertIn(scheduler_str, cres.library.get_llvm_str())
178
179    def _filter_mod(self, mod, magicstr, checkstr=None):
180        """ helper function to filter out modules by name"""
181        filt = [x for x in mod if magicstr in x.name]
182        if checkstr is not None:
183            for x in filt:
184                assert checkstr in str(x)
185        return filt
186
187    def _get_gufunc_modules(self, cres, magicstr, checkstr=None):
188        """ gets the gufunc LLVM Modules"""
189        _modules = [x for x in cres.library._codegen._engine._ee._modules]
190        return self._filter_mod(_modules, magicstr, checkstr=checkstr)
191
192    def _get_gufunc_info(self, cres, fn):
193        """ helper for gufunc IR/asm generation"""
194        # get the gufunc modules
195        magicstr = '__numba_parfor_gufunc'
196        gufunc_mods = self._get_gufunc_modules(cres, magicstr)
197        x = dict()
198        for mod in gufunc_mods:
199            x[mod.name] = fn(mod)
200        return x
201
202    def _get_gufunc_ir(self, cres):
203        """
204        Returns the IR of the gufuncs used as parfor kernels
205        as a dict mapping the gufunc name to its IR.
206
207        Arguments:
208         cres - a CompileResult from `njit(parallel=True, ...)`
209        """
210        return self._get_gufunc_info(cres, str)
211
212    def _get_gufunc_asm(self, cres):
213        """
214        Returns the assembly of the gufuncs used as parfor kernels
215        as a dict mapping the gufunc name to its assembly.
216
217        Arguments:
218         cres - a CompileResult from `njit(parallel=True, ...)`
219        """
220        tm = cres.library._codegen._tm
221        def emit_asm(mod):
222            return str(tm.emit_assembly(mod))
223        return self._get_gufunc_info(cres, emit_asm)
224
225    def assert_fastmath(self, pyfunc, sig):
226        """
227        Asserts that the fastmath flag has some effect in that suitable
228        instructions are now labelled as `fast`. Whether LLVM can actually do
229        anything to optimise better now the derestrictions are supplied is
230        another matter!
231
232        Arguments:
233         pyfunc - a function that contains operations with parallel semantics
234         sig - the type signature of pyfunc
235        """
236
237        cres = self.compile_parallel_fastmath(pyfunc, sig)
238        _ir = self._get_gufunc_ir(cres)
239
240        def _get_fast_instructions(ir):
241            splitted = ir.splitlines()
242            fast_inst = []
243            for x in splitted:
244                m = re.search(r'\bfast\b', x)  # \b for wholeword
245                if m is not None:
246                    fast_inst.append(x)
247            return fast_inst
248
249        def _assert_fast(instrs):
250            ops = ('fadd', 'fsub', 'fmul', 'fdiv', 'frem', 'fcmp')
251            for inst in instrs:
252                count = 0
253                for op in ops:
254                    match = op + ' fast'
255                    if match in inst:
256                        count += 1
257                self.assertTrue(count > 0)
258
259        for name, guir in _ir.items():
260            inst = _get_fast_instructions(guir)
261            _assert_fast(inst)
262
263
264def blackscholes_impl(sptprice, strike, rate, volatility, timev):
265    # blackscholes example
266    logterm = np.log(sptprice / strike)
267    powterm = 0.5 * volatility * volatility
268    den = volatility * np.sqrt(timev)
269    d1 = (((rate + powterm) * timev) + logterm) / den
270    d2 = d1 - den
271    NofXd1 = 0.5 + 0.5 * 2.0 * d1
272    NofXd2 = 0.5 + 0.5 * 2.0 * d2
273    futureValue = strike * np.exp(- rate * timev)
274    c1 = futureValue * NofXd2
275    call = sptprice * NofXd1 - c1
276    put = call - futureValue + sptprice
277    return put
278
279
280def lr_impl(Y, X, w, iterations):
281    # logistic regression example
282    for i in range(iterations):
283        w -= np.dot(((1.0 / (1.0 + np.exp(-Y * np.dot(X, w))) - 1.0) * Y), X)
284    return w
285
286def example_kmeans_test(A, numCenter, numIter, init_centroids):
287    centroids = init_centroids
288    N, D = A.shape
289
290    for l in range(numIter):
291        dist = np.array([[sqrt(np.sum((A[i,:]-centroids[j,:])**2))
292                                for j in range(numCenter)] for i in range(N)])
293        labels = np.array([dist[i,:].argmin() for i in range(N)])
294
295        centroids = np.array([[np.sum(A[labels==i, j])/np.sum(labels==i)
296                                 for j in range(D)] for i in range(numCenter)])
297
298    return centroids
299
300def get_optimized_numba_ir(test_func, args, **kws):
301    typingctx = typing.Context()
302    targetctx = cpu.CPUContext(typingctx)
303    test_ir = compiler.run_frontend(test_func)
304    if kws:
305        options = cpu.ParallelOptions(kws)
306    else:
307        options = cpu.ParallelOptions(True)
308
309    tp = TestPipeline(typingctx, targetctx, args, test_ir)
310
311    with cpu_target.nested_context(typingctx, targetctx):
312        typingctx.refresh()
313        targetctx.refresh()
314
315        inline_pass = inline_closurecall.InlineClosureCallPass(tp.state.func_ir,
316                                                               options,
317                                                               typed=True)
318        inline_pass.run()
319
320        rewrites.rewrite_registry.apply('before-inference', tp.state)
321
322        tp.state.typemap, tp.state.return_type, tp.state.calltypes = \
323        typed_passes.type_inference_stage(tp.state.typingctx, tp.state.func_ir,
324            tp.state.args, None)
325
326        type_annotations.TypeAnnotation(
327            func_ir=tp.state.func_ir,
328            typemap=tp.state.typemap,
329            calltypes=tp.state.calltypes,
330            lifted=(),
331            lifted_from=None,
332            args=tp.state.args,
333            return_type=tp.state.return_type,
334            html_output=config.HTML)
335
336        diagnostics = numba.parfors.parfor.ParforDiagnostics()
337
338        preparfor_pass = numba.parfors.parfor.PreParforPass(
339            tp.state.func_ir, tp.state.typemap, tp.state.calltypes,
340            tp.state.typingctx, options,
341            swapped=diagnostics.replaced_fns)
342        preparfor_pass.run()
343
344        rewrites.rewrite_registry.apply('after-inference', tp.state)
345
346        flags = compiler.Flags()
347        parfor_pass = numba.parfors.parfor.ParforPass(
348            tp.state.func_ir, tp.state.typemap, tp.state.calltypes,
349            tp.state.return_type, tp.state.typingctx, options, flags,
350            diagnostics=diagnostics)
351        parfor_pass.run()
352        test_ir._definitions = build_definitions(test_ir.blocks)
353
354    return test_ir, tp
355
356def countParfors(test_func, args, **kws):
357    test_ir, tp = get_optimized_numba_ir(test_func, args, **kws)
358    ret_count = 0
359
360    for label, block in test_ir.blocks.items():
361        for i, inst in enumerate(block.body):
362            if isinstance(inst, numba.parfors.parfor.Parfor):
363                ret_count += 1
364
365    return ret_count
366
367
368def countArrays(test_func, args, **kws):
369    test_ir, tp = get_optimized_numba_ir(test_func, args, **kws)
370    return _count_arrays_inner(test_ir.blocks, tp.state.typemap)
371
372def get_init_block_size(test_func, args, **kws):
373    test_ir, tp = get_optimized_numba_ir(test_func, args, **kws)
374    blocks = test_ir.blocks
375
376    ret_count = 0
377
378    for label, block in blocks.items():
379        for i, inst in enumerate(block.body):
380            if isinstance(inst, numba.parfors.parfor.Parfor):
381                ret_count += len(inst.init_block.body)
382
383    return ret_count
384
385def _count_arrays_inner(blocks, typemap):
386    ret_count = 0
387    arr_set = set()
388
389    for label, block in blocks.items():
390        for i, inst in enumerate(block.body):
391            if isinstance(inst, numba.parfors.parfor.Parfor):
392                parfor_blocks = inst.loop_body.copy()
393                parfor_blocks[0] = inst.init_block
394                ret_count += _count_arrays_inner(parfor_blocks, typemap)
395            if (isinstance(inst, ir.Assign)
396                    and isinstance(typemap[inst.target.name],
397                                    types.ArrayCompatible)):
398                arr_set.add(inst.target.name)
399
400    ret_count += len(arr_set)
401    return ret_count
402
403def countArrayAllocs(test_func, args, **kws):
404    test_ir, tp = get_optimized_numba_ir(test_func, args, **kws)
405    ret_count = 0
406
407    for block in test_ir.blocks.values():
408        ret_count += _count_array_allocs_inner(test_ir, block)
409
410    return ret_count
411
412def _count_array_allocs_inner(func_ir, block):
413    ret_count = 0
414    for inst in block.body:
415        if isinstance(inst, numba.parfors.parfor.Parfor):
416            ret_count += _count_array_allocs_inner(func_ir, inst.init_block)
417            for b in inst.loop_body.values():
418                ret_count += _count_array_allocs_inner(func_ir, b)
419
420        if (isinstance(inst, ir.Assign) and isinstance(inst.value, ir.Expr)
421                and inst.value.op == 'call'
422                and (guard(find_callname, func_ir, inst.value) == ('empty', 'numpy')
423                or guard(find_callname, func_ir, inst.value)
424                    == ('empty_inferred', 'numba.np.unsafe.ndarray'))):
425            ret_count += 1
426
427    return ret_count
428
429def countNonParforArrayAccesses(test_func, args, **kws):
430    test_ir, tp = get_optimized_numba_ir(test_func, args, **kws)
431    return _count_non_parfor_array_accesses_inner(test_ir, test_ir.blocks,
432                                                  tp.state.typemap)
433
434def _count_non_parfor_array_accesses_inner(f_ir, blocks, typemap, parfor_indices=None):
435    ret_count = 0
436    if parfor_indices is None:
437        parfor_indices = set()
438
439    for label, block in blocks.items():
440        for stmt in block.body:
441            if isinstance(stmt, numba.parfors.parfor.Parfor):
442                parfor_indices.add(stmt.index_var.name)
443                parfor_blocks = stmt.loop_body.copy()
444                parfor_blocks[0] = stmt.init_block
445                ret_count += _count_non_parfor_array_accesses_inner(
446                    f_ir, parfor_blocks, typemap, parfor_indices)
447
448            # getitem
449            if (is_getitem(stmt) and isinstance(typemap[stmt.value.value.name],
450                        types.ArrayCompatible) and not _uses_indices(
451                        f_ir, index_var_of_get_setitem(stmt), parfor_indices)):
452                ret_count += 1
453
454            # setitem
455            if (is_setitem(stmt) and isinstance(typemap[stmt.target.name],
456                    types.ArrayCompatible) and not _uses_indices(
457                    f_ir, index_var_of_get_setitem(stmt), parfor_indices)):
458                ret_count += 1
459
460    return ret_count
461
462def _uses_indices(f_ir, index, index_set):
463    if index.name in index_set:
464        return True
465
466    ind_def = guard(get_definition, f_ir, index)
467    if isinstance(ind_def, ir.Expr) and ind_def.op == 'build_tuple':
468        varnames = set(v.name for v in ind_def.items)
469        return len(varnames & index_set) != 0
470
471    return False
472
473
474class TestPipeline(object):
475    def __init__(self, typingctx, targetctx, args, test_ir):
476        self.state = compiler.StateDict()
477        self.state.typingctx = typingctx
478        self.state.targetctx = targetctx
479        self.state.args = args
480        self.state.func_ir = test_ir
481        self.state.typemap = None
482        self.state.return_type = None
483        self.state.calltypes = None
484
485
486class TestParfors(TestParforsBase):
487
488    def __init__(self, *args):
489        TestParforsBase.__init__(self, *args)
490        # these are used in the mass of simple tests
491        m = np.reshape(np.arange(12.), (3, 4))
492        self.simple_args = [np.arange(3.), np.arange(4.), m, m.T]
493
494    def check(self, pyfunc, *args, **kwargs):
495        cfunc, cpfunc = self.compile_all(pyfunc, *args)
496        self.check_parfors_vs_others(pyfunc, cfunc, cpfunc, *args, **kwargs)
497
498    @skip_parfors_unsupported
499    def test_arraymap(self):
500        def test_impl(a, x, y):
501            return a * x + y
502
503        A = np.linspace(0, 1, 10)
504        X = np.linspace(2, 1, 10)
505        Y = np.linspace(1, 2, 10)
506
507        self.check(test_impl, A, X, Y)
508
509    @skip_parfors_unsupported
510    @needs_blas
511    def test_mvdot(self):
512        def test_impl(a, v):
513            return np.dot(a, v)
514
515        A = np.linspace(0, 1, 20).reshape(2, 10)
516        v = np.linspace(2, 1, 10)
517
518        self.check(test_impl, A, v)
519
520    @skip_parfors_unsupported
521    def test_0d_broadcast(self):
522        def test_impl():
523            X = np.array(1)
524            Y = np.ones((10, 12))
525            return np.sum(X + Y)
526        self.check(test_impl)
527        self.assertTrue(countParfors(test_impl, ()) == 1)
528
529    @skip_parfors_unsupported
530    def test_2d_parfor(self):
531        def test_impl():
532            X = np.ones((10, 12))
533            Y = np.zeros((10, 12))
534            return np.sum(X + Y)
535        self.check(test_impl)
536        self.assertTrue(countParfors(test_impl, ()) == 1)
537
538    @skip_parfors_unsupported
539    def test_pi(self):
540        def test_impl(n):
541            x = 2 * np.random.ranf(n) - 1
542            y = 2 * np.random.ranf(n) - 1
543            return 4 * np.sum(x**2 + y**2 < 1) / n
544
545        self.check(test_impl, 100000, decimal=1)
546        self.assertTrue(countParfors(test_impl, (types.int64, )) == 1)
547        self.assertTrue(countArrays(test_impl, (types.intp,)) == 0)
548
549    @skip_parfors_unsupported
550    def test_fuse_argmin_argmax_max_min(self):
551        for op in [np.argmin, np.argmax, np.min, np.max]:
552            def test_impl(n):
553                A = np.ones(n)
554                C = op(A)
555                B = A.sum()
556                return B + C
557            self.check(test_impl, 256)
558            self.assertTrue(countParfors(test_impl, (types.int64, )) == 1)
559            self.assertTrue(countArrays(test_impl, (types.intp,)) == 0)
560
561    @skip_parfors_unsupported
562    def test_blackscholes(self):
563        # blackscholes takes 5 1D float array args
564        args = (numba.float64[:], ) * 5
565        self.assertTrue(countParfors(blackscholes_impl, args) == 1)
566
567    @skip_parfors_unsupported
568    @needs_blas
569    def test_logistic_regression(self):
570        args = (numba.float64[:], numba.float64[:,:], numba.float64[:],
571                numba.int64)
572        self.assertTrue(countParfors(lr_impl, args) == 2)
573        self.assertTrue(countArrayAllocs(lr_impl, args) == 1)
574
575    @skip_parfors_unsupported
576    def test_kmeans(self):
577        np.random.seed(0)
578        N = 1024
579        D = 10
580        centers = 3
581        A = np.random.ranf((N, D))
582        init_centroids = np.random.ranf((centers, D))
583        self.check(example_kmeans_test, A, centers, 3, init_centroids,
584                                                                    decimal=1)
585        # TODO: count parfors after k-means fusion is working
586        # requires recursive parfor counting
587        arg_typs = (types.Array(types.float64, 2, 'C'), types.intp, types.intp,
588                    types.Array(types.float64, 2, 'C'))
589        self.assertTrue(
590            countNonParforArrayAccesses(example_kmeans_test, arg_typs) == 0)
591
592    @unittest.skipIf(not _32bit, "Only impacts 32 bit hardware")
593    @needs_blas
594    def test_unsupported_combination_raises(self):
595        """
596        This test is in place until issues with the 'parallel'
597        target on 32 bit hardware are fixed.
598        """
599        with self.assertRaises(errors.UnsupportedParforsError) as raised:
600            @njit(parallel=True)
601            def ddot(a, v):
602                return np.dot(a, v)
603
604            A = np.linspace(0, 1, 20).reshape(2, 10)
605            v = np.linspace(2, 1, 10)
606            ddot(A, v)
607
608        msg = ("The 'parallel' target is not currently supported on 32 bit "
609               "hardware")
610        self.assertIn(msg, str(raised.exception))
611
612    @skip_parfors_unsupported
613    def test_simple01(self):
614        def test_impl():
615            return np.ones(())
616        with self.assertRaises(AssertionError) as raises:
617            self.check(test_impl)
618        self.assertIn("\'@do_scheduling\' not found", str(raises.exception))
619
620    @skip_parfors_unsupported
621    def test_simple02(self):
622        def test_impl():
623            return np.ones((1,))
624        self.check(test_impl)
625
626    @skip_parfors_unsupported
627    def test_simple03(self):
628        def test_impl():
629            return np.ones((1, 2))
630        self.check(test_impl)
631
632    @skip_parfors_unsupported
633    def test_simple04(self):
634        def test_impl():
635            return np.ones(1)
636        self.check(test_impl)
637
638    @skip_parfors_unsupported
639    def test_simple07(self):
640        def test_impl():
641            return np.ones((1, 2), dtype=np.complex128)
642        self.check(test_impl)
643
644    @skip_parfors_unsupported
645    def test_simple08(self):
646        def test_impl():
647            return np.ones((1, 2)) + np.ones((1, 2))
648        self.check(test_impl)
649
650    @skip_parfors_unsupported
651    def test_simple09(self):
652        def test_impl():
653            return np.ones((1, 1))
654        self.check(test_impl)
655
656    @skip_parfors_unsupported
657    def test_simple10(self):
658        def test_impl():
659            return np.ones((0, 0))
660        self.check(test_impl)
661
662    @skip_parfors_unsupported
663    def test_simple11(self):
664        def test_impl():
665            return np.ones((10, 10)) + 1.
666        self.check(test_impl)
667
668    @skip_parfors_unsupported
669    def test_simple12(self):
670        def test_impl():
671            return np.ones((10, 10)) + np.complex128(1.)
672        self.check(test_impl)
673
674    @skip_parfors_unsupported
675    def test_simple13(self):
676        def test_impl():
677            return np.complex128(1.)
678        with self.assertRaises(AssertionError) as raises:
679            self.check(test_impl)
680        self.assertIn("\'@do_scheduling\' not found", str(raises.exception))
681
682    @skip_parfors_unsupported
683    def test_simple14(self):
684        def test_impl():
685            return np.ones((10, 10))[0::20]
686        self.check(test_impl)
687
688    @skip_parfors_unsupported
689    def test_simple15(self):
690        def test_impl(v1, v2, m1, m2):
691            return v1 + v1
692        self.check(test_impl, *self.simple_args)
693
694    @skip_parfors_unsupported
695    def test_simple16(self):
696        def test_impl(v1, v2, m1, m2):
697            return m1 + m1
698        self.check(test_impl, *self.simple_args)
699
700    @skip_parfors_unsupported
701    def test_simple17(self):
702        def test_impl(v1, v2, m1, m2):
703            return m2 + v1
704        self.check(test_impl, *self.simple_args)
705
706    @skip_parfors_unsupported
707    @needs_lapack
708    def test_simple18(self):
709        def test_impl(v1, v2, m1, m2):
710            return m1.T + np.linalg.svd(m2)[1]
711        self.check(test_impl, *self.simple_args)
712
713    @skip_parfors_unsupported
714    @needs_blas
715    def test_simple19(self):
716        def test_impl(v1, v2, m1, m2):
717            return np.dot(m1, v2)
718        self.check(test_impl, *self.simple_args)
719
720    @skip_parfors_unsupported
721    @needs_blas
722    def test_simple20(self):
723        def test_impl(v1, v2, m1, m2):
724            return np.dot(m1, m2)
725        # gemm is left to BLAS
726        with self.assertRaises(AssertionError) as raises:
727            self.check(test_impl, *self.simple_args)
728        self.assertIn("\'@do_scheduling\' not found", str(raises.exception))
729
730    @skip_parfors_unsupported
731    @needs_blas
732    def test_simple21(self):
733        def test_impl(v1, v2, m1, m2):
734            return np.dot(v1, v1)
735        self.check(test_impl, *self.simple_args)
736
737    @skip_parfors_unsupported
738    def test_simple22(self):
739        def test_impl(v1, v2, m1, m2):
740            return np.sum(v1 + v1)
741        self.check(test_impl, *self.simple_args)
742
743    @skip_parfors_unsupported
744    def test_simple23(self):
745        def test_impl(v1, v2, m1, m2):
746            x = 2 * v1
747            y = 2 * v1
748            return 4 * np.sum(x**2 + y**2 < 1) / 10
749        self.check(test_impl, *self.simple_args)
750
751    @skip_parfors_unsupported
752    def test_simple24(self):
753        def test_impl():
754            n = 20
755            A = np.ones((n, n))
756            b = np.arange(n)
757            return np.sum(A[:, b])
758        self.check(test_impl)
759
760    @disabled_test
761    def test_simple_operator_15(self):
762        """same as corresponding test_simple_<n> case but using operator.add"""
763        def test_impl(v1, v2, m1, m2):
764            return operator.add(v1, v1)
765
766        self.check(test_impl, *self.simple_args)
767
768    @disabled_test
769    def test_simple_operator_16(self):
770        def test_impl(v1, v2, m1, m2):
771            return operator.add(m1, m1)
772
773        self.check(test_impl, *self.simple_args)
774
775    @disabled_test
776    def test_simple_operator_17(self):
777        def test_impl(v1, v2, m1, m2):
778            return operator.add(m2, v1)
779
780        self.check(test_impl, *self.simple_args)
781
782    @skip_parfors_unsupported
783    def test_np_func_direct_import(self):
784        from numpy import ones  # import here becomes FreeVar
785        def test_impl(n):
786            A = ones(n)
787            return A[0]
788        n = 111
789        self.check(test_impl, n)
790
791    @skip_parfors_unsupported
792    def test_np_random_func_direct_import(self):
793        def test_impl(n):
794            A = randn(n)
795            return A[0]
796        self.assertTrue(countParfors(test_impl, (types.int64, )) == 1)
797
798    @skip_parfors_unsupported
799    def test_arange(self):
800        # test with stop only
801        def test_impl1(n):
802            return np.arange(n)
803        # start and stop
804        def test_impl2(s, n):
805            return np.arange(n)
806        # start, step, stop
807        def test_impl3(s, n, t):
808            return np.arange(s, n, t)
809
810        for arg in [11, 128, 30.0, complex(4,5), complex(5,4)]:
811            self.check(test_impl1, arg)
812            self.check(test_impl2, 2, arg)
813            self.check(test_impl3, 2, arg, 2)
814
815    @skip_parfors_unsupported
816    def test_linspace(self):
817        # without num
818        def test_impl1(start, stop):
819            return np.linspace(start, stop)
820        # with num
821        def test_impl2(start, stop, num):
822            return np.linspace(start, stop, num)
823
824        for arg in [11, 128, 30.0, complex(4,5), complex(5,4)]:
825            self.check(test_impl1, 2, arg)
826            self.check(test_impl2, 2, arg, 30)
827
828    @skip_parfors_unsupported
829    def test_size_assertion(self):
830        def test_impl(m, n):
831            A = np.ones(m)
832            B = np.ones(n)
833            return np.sum(A + B)
834
835        self.check(test_impl, 10, 10)
836        with self.assertRaises(AssertionError) as raises:
837            cfunc = njit(parallel=True)(test_impl)
838            cfunc(10, 9)
839        msg = "Sizes of A, B do not match"
840        self.assertIn(msg, str(raises.exception))
841
842    @skip_parfors_unsupported
843    def test_mean(self):
844        def test_impl(A):
845            return A.mean()
846        N = 100
847        A = np.random.ranf(N)
848        B = np.random.randint(10, size=(N, 3))
849        self.check(test_impl, A)
850        self.check(test_impl, B)
851        self.assertTrue(countParfors(test_impl, (types.Array(types.float64, 1, 'C'), )) == 1)
852        self.assertTrue(countParfors(test_impl, (types.Array(types.float64, 2, 'C'), )) == 1)
853
854    @skip_parfors_unsupported
855    def test_var(self):
856        def test_impl(A):
857            return A.var()
858        N = 100
859        A = np.random.ranf(N)
860        B = np.random.randint(10, size=(N, 3))
861        C = A + 1j * A
862        self.check(test_impl, A)
863        self.check(test_impl, B)
864        self.check(test_impl, C)
865        self.assertTrue(countParfors(test_impl, (types.Array(types.float64, 1, 'C'), )) == 2)
866        self.assertTrue(countParfors(test_impl, (types.Array(types.float64, 2, 'C'), )) == 2)
867
868    @skip_parfors_unsupported
869    def test_std(self):
870        def test_impl(A):
871            return A.std()
872        N = 100
873        A = np.random.ranf(N)
874        B = np.random.randint(10, size=(N, 3))
875        C = A + 1j * A
876        self.check(test_impl, A)
877        self.check(test_impl, B)
878        self.check(test_impl, C)
879        self.assertTrue(countParfors(test_impl, (types.Array(types.float64, 1, 'C'), )) == 2)
880        self.assertTrue(countParfors(test_impl, (types.Array(types.float64, 2, 'C'), )) == 2)
881
882    @skip_parfors_unsupported
883    def test_issue4963_globals(self):
884        def test_impl():
885            buf = np.zeros((_GLOBAL_INT_FOR_TESTING1, _GLOBAL_INT_FOR_TESTING2))
886            return buf
887        self.check(test_impl)
888
889    @skip_parfors_unsupported
890    def test_issue4963_freevars(self):
891        _FREEVAR_INT_FOR_TESTING1 = 17
892        _FREEVAR_INT_FOR_TESTING2 = 5
893        def test_impl():
894            buf = np.zeros((_FREEVAR_INT_FOR_TESTING1, _FREEVAR_INT_FOR_TESTING2))
895            return buf
896        self.check(test_impl)
897
898    @skip_parfors_unsupported
899    def test_random_parfor(self):
900        """
901        Test function with only a random call to make sure a random function
902        like ranf is actually translated to a parfor.
903        """
904        def test_impl(n):
905            A = np.random.ranf((n, n))
906            return A
907        self.assertTrue(countParfors(test_impl, (types.int64, )) == 1)
908
909    @skip_parfors_unsupported
910    def test_randoms(self):
911        def test_impl(n):
912            A = np.random.standard_normal(size=(n, n))
913            B = np.random.randn(n, n)
914            C = np.random.normal(0.0, 1.0, (n, n))
915            D = np.random.chisquare(1.0, (n, n))
916            E = np.random.randint(1, high=3, size=(n, n))
917            F = np.random.triangular(1, 2, 3, (n, n))
918            return np.sum(A+B+C+D+E+F)
919
920        n = 128
921        cpfunc = self.compile_parallel(test_impl, (numba.typeof(n),))
922        parfor_output = cpfunc.entry_point(n)
923        py_output = test_impl(n)
924        # check results within 5% since random numbers generated in parallel
925        np.testing.assert_allclose(parfor_output, py_output, rtol=0.05)
926        self.assertTrue(countParfors(test_impl, (types.int64, )) == 1)
927
928    @skip_parfors_unsupported
929    def test_dead_randoms(self):
930        def test_impl(n):
931            A = np.random.standard_normal(size=(n, n))
932            B = np.random.randn(n, n)
933            C = np.random.normal(0.0, 1.0, (n, n))
934            D = np.random.chisquare(1.0, (n, n))
935            E = np.random.randint(1, high=3, size=(n, n))
936            F = np.random.triangular(1, 2, 3, (n, n))
937            return 3
938
939        n = 128
940        cpfunc = self.compile_parallel(test_impl, (numba.typeof(n),))
941        parfor_output = cpfunc.entry_point(n)
942        py_output = test_impl(n)
943        self.assertEqual(parfor_output, py_output)
944        self.assertTrue(countParfors(test_impl, (types.int64, )) == 0)
945
946    @skip_parfors_unsupported
947    def test_cfg(self):
948        # from issue #2477
949        def test_impl(x, is_positive, N):
950            for i in numba.prange(2):
951                for j in range( i*N//2, (i+1)*N//2 ):
952                    is_positive[j] = 0
953                    if x[j] > 0:
954                        is_positive[j] = 1
955
956            return is_positive
957
958        N = 100
959        x = np.random.rand(N)
960        is_positive = np.zeros(N)
961        self.check(test_impl, x, is_positive, N)
962
963    @skip_parfors_unsupported
964    def test_reduce(self):
965        def test_impl(A):
966            init_val = 10
967            return reduce(lambda a,b: min(a, b), A, init_val)
968
969        n = 211
970        A = np.random.ranf(n)
971        self.check(test_impl, A)
972        A = np.random.randint(10, size=n).astype(np.int32)
973        self.check(test_impl, A)
974
975        # test checking the number of arguments for the reduce function
976        def test_impl():
977            g = lambda x: x ** 2
978            return reduce(g, np.array([1, 2, 3, 4, 5]), 2)
979        with self.assertTypingError():
980            self.check(test_impl)
981
982        # test checking reduction over bitarray masked arrays
983        n = 160
984        A = np.random.randint(10, size=n).astype(np.int32)
985        def test_impl(A):
986            return np.sum(A[A>=3])
987        self.check(test_impl, A)
988        # TODO: this should fuse
989        # self.assertTrue(countParfors(test_impl, (numba.float64[:],)) == 1)
990
991        def test_impl(A):
992            B = A[:,0]
993            return np.sum(A[B>=3,1])
994        self.check(test_impl, A.reshape((16,10)))
995        # TODO: this should also fuse
996        #self.assertTrue(countParfors(test_impl, (numba.float64[:,:],)) == 1)
997
998        def test_impl(A):
999            B = A[:,0]
1000            return np.sum(A[B>=3,1:2])
1001        self.check(test_impl, A.reshape((16,10)))
1002        # this doesn't fuse due to mixed indices
1003        self.assertTrue(countParfors(test_impl, (numba.float64[:,:],)) == 2)
1004
1005    @skip_parfors_unsupported
1006    def test_min(self):
1007        def test_impl1(A):
1008            return A.min()
1009
1010        def test_impl2(A):
1011            return np.min(A)
1012
1013        n = 211
1014        A = np.random.ranf(n)
1015        B = np.random.randint(10, size=n).astype(np.int32)
1016        C = np.random.ranf((n, n))  # test multi-dimensional array
1017        D = np.array([np.inf, np.inf])
1018        self.check(test_impl1, A)
1019        self.check(test_impl1, B)
1020        self.check(test_impl1, C)
1021        self.check(test_impl1, D)
1022        self.check(test_impl2, A)
1023        self.check(test_impl2, B)
1024        self.check(test_impl2, C)
1025        self.check(test_impl2, D)
1026
1027        # checks that 0d array input raises
1028        msg = ("zero-size array to reduction operation "
1029               "minimum which has no identity")
1030        for impl in (test_impl1, test_impl2):
1031            pcfunc = self.compile_parallel(impl, (types.int64[:],))
1032            with self.assertRaises(ValueError) as e:
1033                pcfunc.entry_point(np.array([], dtype=np.int64))
1034            self.assertIn(msg, str(e.exception))
1035
1036    @skip_parfors_unsupported
1037    def test_max(self):
1038        def test_impl1(A):
1039            return A.max()
1040
1041        def test_impl2(A):
1042            return np.max(A)
1043
1044        n = 211
1045        A = np.random.ranf(n)
1046        B = np.random.randint(10, size=n).astype(np.int32)
1047        C = np.random.ranf((n, n))  # test multi-dimensional array
1048        D = np.array([-np.inf, -np.inf])
1049        self.check(test_impl1, A)
1050        self.check(test_impl1, B)
1051        self.check(test_impl1, C)
1052        self.check(test_impl1, D)
1053        self.check(test_impl2, A)
1054        self.check(test_impl2, B)
1055        self.check(test_impl2, C)
1056        self.check(test_impl2, D)
1057
1058        # checks that 0d array input raises
1059        msg = ("zero-size array to reduction operation "
1060               "maximum which has no identity")
1061        for impl in (test_impl1, test_impl2):
1062            pcfunc = self.compile_parallel(impl, (types.int64[:],))
1063            with self.assertRaises(ValueError) as e:
1064                pcfunc.entry_point(np.array([], dtype=np.int64))
1065            self.assertIn(msg, str(e.exception))
1066
1067    @skip_parfors_unsupported
1068    def test_use_of_reduction_var1(self):
1069        def test_impl():
1070            acc = 0
1071            for i in prange(1):
1072                acc = cmath.sqrt(acc)
1073            return acc
1074
1075        # checks that invalid use of reduction variable is detected
1076        msg = ("Use of reduction variable acc in an unsupported reduction function.")
1077        with self.assertRaises(ValueError) as e:
1078            pcfunc = self.compile_parallel(test_impl, ())
1079        self.assertIn(msg, str(e.exception))
1080
1081    @skip_parfors_unsupported
1082    def test_argmin(self):
1083        def test_impl1(A):
1084            return A.argmin()
1085
1086        def test_impl2(A):
1087            return np.argmin(A)
1088
1089        n = 211
1090        A = np.array([1., 0., 2., 0., 3.])
1091        B = np.random.randint(10, size=n).astype(np.int32)
1092        C = np.random.ranf((n, n))  # test multi-dimensional array
1093        self.check(test_impl1, A)
1094        self.check(test_impl1, B)
1095        self.check(test_impl1, C)
1096        self.check(test_impl2, A)
1097        self.check(test_impl2, B)
1098        self.check(test_impl2, C)
1099
1100        # checks that 0d array input raises
1101        msg = 'attempt to get argmin of an empty sequence'
1102        for impl in (test_impl1, test_impl2):
1103            pcfunc = self.compile_parallel(impl, (types.int64[:],))
1104            with self.assertRaises(ValueError) as e:
1105                pcfunc.entry_point(np.array([], dtype=np.int64))
1106            self.assertIn(msg, str(e.exception))
1107
1108    @skip_parfors_unsupported
1109    def test_argmax(self):
1110        def test_impl1(A):
1111            return A.argmax()
1112
1113        def test_impl2(A):
1114            return np.argmax(A)
1115
1116        n = 211
1117        A = np.array([1., 0., 3., 2., 3.])
1118        B = np.random.randint(10, size=n).astype(np.int32)
1119        C = np.random.ranf((n, n))  # test multi-dimensional array
1120        self.check(test_impl1, A)
1121        self.check(test_impl1, B)
1122        self.check(test_impl1, C)
1123        self.check(test_impl2, A)
1124        self.check(test_impl2, B)
1125        self.check(test_impl2, C)
1126
1127        # checks that 0d array input raises
1128        msg = 'attempt to get argmax of an empty sequence'
1129        for impl in (test_impl1, test_impl2):
1130            pcfunc = self.compile_parallel(impl, (types.int64[:],))
1131            with self.assertRaises(ValueError) as e:
1132                pcfunc.entry_point(np.array([], dtype=np.int64))
1133            self.assertIn(msg, str(e.exception))
1134
1135    @skip_parfors_unsupported
1136    def test_parfor_array_access1(self):
1137        # signed index of the prange generated by sum() should be replaced
1138        # resulting in array A to be eliminated (see issue #2846)
1139        def test_impl(n):
1140            A = np.ones(n)
1141            return A.sum()
1142
1143        n = 211
1144        self.check(test_impl, n)
1145        self.assertEqual(countArrays(test_impl, (types.intp,)), 0)
1146
1147    @skip_parfors_unsupported
1148    def test_parfor_array_access2(self):
1149        # in this test, the prange index has the same name (i) in two loops
1150        # thus, i has multiple definitions and is harder to replace
1151        def test_impl(n):
1152            A = np.ones(n)
1153            m = 0
1154            n = 0
1155            for i in numba.prange(len(A)):
1156                m += A[i]
1157
1158            for i in numba.prange(len(A)):
1159                if m == n:  # access in another block
1160                    n += A[i]
1161
1162            return m + n
1163
1164        n = 211
1165        self.check(test_impl, n)
1166        self.assertEqual(countNonParforArrayAccesses(test_impl, (types.intp,)), 0)
1167
1168    @skip_parfors_unsupported
1169    def test_parfor_array_access3(self):
1170        def test_impl(n):
1171            A = np.ones(n, np.int64)
1172            m = 0
1173            for i in numba.prange(len(A)):
1174                m += A[i]
1175                if m==2:
1176                    i = m
1177
1178        n = 211
1179        with self.assertRaises(errors.UnsupportedRewriteError) as raises:
1180            self.check(test_impl, n)
1181        self.assertIn("Overwrite of parallel loop index", str(raises.exception))
1182
1183    @skip_parfors_unsupported
1184    @needs_blas
1185    def test_parfor_array_access4(self):
1186        # in this test, one index of a multi-dim access should be replaced
1187        # np.dot parallel implementation produces this case
1188        def test_impl(A, b):
1189            return np.dot(A, b)
1190
1191        n = 211
1192        d = 4
1193        A = np.random.ranf((n, d))
1194        b = np.random.ranf(d)
1195        self.check(test_impl, A, b)
1196        # make sure the parfor index is replaced in build_tuple of access to A
1197        test_ir, tp = get_optimized_numba_ir(
1198            test_impl, (types.Array(types.float64, 2, 'C'),
1199                        types.Array(types.float64, 1, 'C')))
1200        # this code should have one basic block after optimization
1201        self.assertTrue(len(test_ir.blocks) == 1 and 0 in test_ir.blocks)
1202        block = test_ir.blocks[0]
1203        parfor_found = False
1204        parfor = None
1205        for stmt in block.body:
1206            if isinstance(stmt, numba.parfors.parfor.Parfor):
1207                parfor_found = True
1208                parfor = stmt
1209
1210        self.assertTrue(parfor_found)
1211        build_tuple_found = False
1212        # there should be only one build_tuple
1213        for bl in parfor.loop_body.values():
1214            for stmt in bl.body:
1215                if (isinstance(stmt, ir.Assign)
1216                        and isinstance(stmt.value, ir.Expr)
1217                        and stmt.value.op == 'build_tuple'):
1218                    build_tuple_found = True
1219                    self.assertTrue(parfor.index_var in stmt.value.items)
1220
1221        self.assertTrue(build_tuple_found)
1222
1223    @skip_parfors_unsupported
1224    def test_parfor_dtype_type(self):
1225        # test array type replacement creates proper type
1226        def test_impl(a):
1227            for i in numba.prange(len(a)):
1228                a[i] = a.dtype.type(0)
1229            return a[4]
1230
1231        a = np.ones(10)
1232        self.check(test_impl, a)
1233
1234    @skip_parfors_unsupported
1235    def test_parfor_array_access5(self):
1236        # one dim is slice in multi-dim access
1237        def test_impl(n):
1238            X = np.ones((n, 3))
1239            y = 0
1240            for i in numba.prange(n):
1241                y += X[i,:].sum()
1242            return y
1243
1244        n = 211
1245        self.check(test_impl, n)
1246        self.assertEqual(countNonParforArrayAccesses(test_impl, (types.intp,)), 0)
1247
1248    @skip_parfors_unsupported
1249    @disabled_test # Test itself is problematic, see #3155
1250    def test_parfor_hoist_setitem(self):
1251        # Make sure that read of out is not hoisted.
1252        def test_impl(out):
1253            for i in prange(10):
1254                out[0] = 2 * out[0]
1255            return out[0]
1256
1257        out = np.ones(1)
1258        self.check(test_impl, out)
1259
1260    @skip_parfors_unsupported
1261    @needs_blas
1262    def test_parfor_generate_fuse(self):
1263        # issue #2857
1264        def test_impl(N, D):
1265            w = np.ones(D)
1266            X = np.ones((N, D))
1267            Y = np.ones(N)
1268            for i in range(3):
1269                B = (-Y * np.dot(X, w))
1270
1271            return B
1272
1273        n = 211
1274        d = 3
1275        self.check(test_impl, n, d)
1276        self.assertEqual(countArrayAllocs(test_impl, (types.intp, types.intp)), 4)
1277        self.assertEqual(countParfors(test_impl, (types.intp, types.intp)), 4)
1278
1279    @skip_parfors_unsupported
1280    def test_ufunc_expr(self):
1281        # issue #2885
1282        def test_impl(A, B):
1283            return np.bitwise_and(A, B)
1284
1285        A = np.ones(3, np.uint8)
1286        B = np.ones(3, np.uint8)
1287        B[1] = 0
1288        self.check(test_impl, A, B)
1289
1290    @skip_parfors_unsupported
1291    def test_find_callname_intrinsic(self):
1292        def test_impl(n):
1293            A = unsafe_empty((n,))
1294            for i in range(n):
1295                A[i] = i + 2.0
1296            return A
1297
1298        # the unsafe allocation should be found even though it is imported
1299        # as a different name
1300        self.assertEqual(countArrayAllocs(test_impl, (types.intp,)), 1)
1301
1302    @skip_parfors_unsupported
1303    def test_reduction_var_reuse(self):
1304        # issue #3139
1305        def test_impl(n):
1306            acc = 0
1307            for i in prange(n):
1308                acc += 1
1309
1310            for i in prange(n):
1311                acc += 2
1312
1313            return acc
1314        self.check(test_impl, 16)
1315
1316    @skip_parfors_unsupported
1317    def test_two_d_array_reduction_reuse(self):
1318        def test_impl(n):
1319            shp = (13, 17)
1320            size = shp[0] * shp[1]
1321            result1 = np.zeros(shp, np.int_)
1322            tmp = np.arange(size).reshape(shp)
1323
1324            for i in numba.prange(n):
1325                result1 += tmp
1326
1327            for i in numba.prange(n):
1328                result1 += tmp
1329
1330            return result1
1331
1332        self.check(test_impl, 100)
1333
1334    @skip_parfors_unsupported
1335    def test_one_d_array_reduction(self):
1336        def test_impl(n):
1337            result = np.zeros(1, np.int_)
1338
1339            for i in numba.prange(n):
1340                result += np.array([i], np.int_)
1341
1342            return result
1343
1344        self.check(test_impl, 100)
1345
1346    @skip_parfors_unsupported
1347    def test_two_d_array_reduction(self):
1348        def test_impl(n):
1349            shp = (13, 17)
1350            size = shp[0] * shp[1]
1351            result1 = np.zeros(shp, np.int_)
1352            tmp = np.arange(size).reshape(shp)
1353
1354            for i in numba.prange(n):
1355                result1 += tmp
1356
1357            return result1
1358
1359        self.check(test_impl, 100)
1360
1361    @skip_parfors_unsupported
1362    def test_two_d_array_reduction_with_float_sizes(self):
1363        # result1 is float32 and tmp is float64.
1364        # Tests reduction with differing dtypes.
1365        def test_impl(n):
1366            shp = (2, 3)
1367            result1 = np.zeros(shp, np.float32)
1368            tmp = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).reshape(shp)
1369
1370            for i in numba.prange(n):
1371                result1 += tmp
1372
1373            return result1
1374
1375        self.check(test_impl, 100)
1376
1377    @skip_parfors_unsupported
1378    def test_two_d_array_reduction_prod(self):
1379        def test_impl(n):
1380            shp = (13, 17)
1381            result1 = 2 * np.ones(shp, np.int_)
1382            tmp = 2 * np.ones_like(result1)
1383
1384            for i in numba.prange(n):
1385                result1 *= tmp
1386
1387            return result1
1388
1389        self.check(test_impl, 100)
1390
1391    @skip_parfors_unsupported
1392    def test_three_d_array_reduction(self):
1393        def test_impl(n):
1394            shp = (3, 2, 7)
1395            result1 = np.zeros(shp, np.int_)
1396
1397            for i in numba.prange(n):
1398                result1 += np.ones(shp, np.int_)
1399
1400            return result1
1401
1402        self.check(test_impl, 100)
1403
1404    @skip_parfors_unsupported
1405    def test_preparfor_canonicalize_kws(self):
1406        # test canonicalize_array_math typing for calls with kw args
1407        def test_impl(A):
1408            return A.argsort() + 1
1409
1410        n = 211
1411        A = np.arange(n)
1412        self.check(test_impl, A)
1413
1414    @skip_parfors_unsupported
1415    def test_preparfor_datetime64(self):
1416        # test array.dtype transformation for datetime64
1417        def test_impl(A):
1418            return A.dtype
1419
1420        A = np.empty(1, np.dtype('datetime64[ns]'))
1421        cpfunc = self.compile_parallel(test_impl, (numba.typeof(A),))
1422        self.assertEqual(cpfunc.entry_point(A), test_impl(A))
1423
1424    @skip_parfors_unsupported
1425    def test_no_hoisting_with_member_function_call(self):
1426        def test_impl(X):
1427            n = X.shape[0]
1428            acc = 0
1429            for i in prange(n):
1430                R = {1, 2, 3}
1431                R.add(i)
1432                tmp = 0
1433                for x in R:
1434                    tmp += x
1435                acc += tmp
1436            return acc
1437
1438        self.check(test_impl, np.random.ranf(128))
1439
1440    @skip_parfors_unsupported
1441    def test_array_compare_scalar(self):
1442        """ issue3671: X != 0 becomes an arrayexpr with operator.ne.
1443            That is turned into a parfor by devectorizing.  Make sure
1444            the return type of the devectorized operator.ne
1445            on integer types works properly.
1446        """
1447        def test_impl():
1448            X = np.zeros(10, dtype=np.int_)
1449            return X != 0
1450
1451        self.check(test_impl)
1452
1453    @skip_parfors_unsupported
1454    def test_reshape_with_neg_one(self):
1455        # issue3314
1456        def test_impl(a, b):
1457            result_matrix = np.zeros((b, b, 1), dtype=np.float64)
1458            sub_a = a[0:b]
1459            a = sub_a.size
1460            b = a / 1
1461            z = sub_a.reshape(-1, 1)
1462            result_data = sub_a / z
1463            result_matrix[:,:,0] = result_data
1464            return result_matrix
1465
1466        a = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0,
1467                   7.0, 8.0, 9.0, 10.0, 11.0, 12.0])
1468        b = 3
1469
1470        self.check(test_impl, a, b)
1471
1472    @skip_parfors_unsupported
1473    def test_reshape_with_large_neg(self):
1474        # issue3314
1475        def test_impl(a, b):
1476            result_matrix = np.zeros((b, b, 1), dtype=np.float64)
1477            sub_a = a[0:b]
1478            a = sub_a.size
1479            b = a / 1
1480            z = sub_a.reshape(-1307, 1)
1481            result_data = sub_a / z
1482            result_matrix[:,:,0] = result_data
1483            return result_matrix
1484
1485        a = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0,
1486                   7.0, 8.0, 9.0, 10.0, 11.0, 12.0])
1487        b = 3
1488
1489        self.check(test_impl, a, b)
1490
1491    @skip_parfors_unsupported
1492    def test_reshape_with_too_many_neg_one(self):
1493        # issue3314
1494        with self.assertRaises(errors.UnsupportedRewriteError) as raised:
1495            @njit(parallel=True)
1496            def test_impl(a, b):
1497                rm = np.zeros((b, b, 1), dtype=np.float64)
1498                sub_a = a[0:b]
1499                a = sub_a.size
1500                b = a / 1
1501                z = sub_a.reshape(-1, -1)
1502                result_data = sub_a / z
1503                rm[:,:,0] = result_data
1504                return rm
1505
1506            a = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0,
1507                       7.0, 8.0, 9.0, 10.0, 11.0, 12.0])
1508            b = 3
1509            test_impl(a, b)
1510
1511        msg = ("The reshape API may only include one negative argument.")
1512        self.assertIn(msg, str(raised.exception))
1513
1514    @skip_parfors_unsupported
1515    def test_ndarray_fill(self):
1516        def test_impl(x):
1517            x.fill(7.0)
1518            return x
1519        x = np.zeros(10)
1520        self.check(test_impl, x)
1521        self.assertTrue(countParfors(test_impl, (types.Array(types.float64, 1, 'C'),)) == 1)
1522
1523    @skip_parfors_unsupported
1524    def test_ndarray_fill2d(self):
1525        def test_impl(x):
1526            x.fill(7.0)
1527            return x
1528        x = np.zeros((2,2))
1529        self.check(test_impl, x)
1530        self.assertTrue(countParfors(test_impl, (types.Array(types.float64, 2, 'C'),)) == 1)
1531
1532    @skip_parfors_unsupported
1533    def test_0d_array(self):
1534        def test_impl(n):
1535            return np.sum(n) + np.prod(n) + np.min(n) + np.max(n) + np.var(n)
1536        self.check(test_impl, np.array(7), check_scheduling=False)
1537
1538    @skip_parfors_unsupported
1539    def test_array_analysis_optional_def(self):
1540        def test_impl(x, half):
1541            size = len(x)
1542            parr = x[0:size]
1543
1544            if half:
1545                parr = x[0:size//2]
1546
1547            return parr.sum()
1548        x = np.ones(20)
1549        self.check(test_impl, x, True, check_scheduling=False)
1550
1551    @skip_parfors_unsupported
1552    def test_prange_side_effects(self):
1553        def test_impl(a, b):
1554            data = np.empty(len(a), dtype=np.float64)
1555            size = len(data)
1556            for i in numba.prange(size):
1557                data[i] = a[i]
1558            for i in numba.prange(size):
1559                data[i] = data[i] + b[i]
1560            return data
1561
1562        x = np.arange(10 ** 2, dtype=float)
1563        y = np.arange(10 ** 2, dtype=float)
1564
1565        self.check(test_impl, x, y)
1566        self.assertTrue(countParfors(test_impl,
1567                                    (types.Array(types.float64, 1, 'C'),
1568                                     types.Array(types.float64, 1, 'C'))) == 1)
1569
1570    @skip_parfors_unsupported
1571    def test_tuple1(self):
1572        def test_impl(a):
1573            atup = (3, 4)
1574            b = 7
1575            for i in numba.prange(len(a)):
1576                a[i] += atup[0] + atup[1] + b
1577            return a
1578
1579        x = np.arange(10)
1580        self.check(test_impl, x)
1581
1582    @skip_parfors_unsupported
1583    def test_tuple2(self):
1584        def test_impl(a):
1585            atup = a.shape
1586            b = 7
1587            for i in numba.prange(len(a)):
1588                a[i] += atup[0] + b
1589            return a
1590
1591        x = np.arange(10)
1592        self.check(test_impl, x)
1593
1594    @skip_parfors_unsupported
1595    def test_tuple3(self):
1596        def test_impl(a):
1597            atup = (np.arange(10), 4)
1598            b = 7
1599            for i in numba.prange(len(a)):
1600                a[i] += atup[0][5] + atup[1] + b
1601            return a
1602
1603        x = np.arange(10)
1604        self.check(test_impl, x)
1605
1606    @skip_parfors_unsupported
1607    def test_namedtuple1(self):
1608        def test_impl(a):
1609            antup = TestNamedTuple(part0=3, part1=4)
1610            b = 7
1611            for i in numba.prange(len(a)):
1612                a[i] += antup.part0 + antup.part1 + b
1613            return a
1614
1615        x = np.arange(10)
1616        self.check(test_impl, x)
1617
1618    @skip_parfors_unsupported
1619    def test_namedtuple2(self):
1620        TestNamedTuple2 = namedtuple('TestNamedTuple2', ('part0', 'part1'))
1621        def test_impl(a):
1622            antup = TestNamedTuple2(part0=3, part1=4)
1623            b = 7
1624            for i in numba.prange(len(a)):
1625                a[i] += antup.part0 + antup.part1 + b
1626            return a
1627
1628        x = np.arange(10)
1629        self.check(test_impl, x)
1630
1631    @skip_parfors_unsupported
1632    def test_inplace_binop(self):
1633        def test_impl(a, b):
1634            b += a
1635            return b
1636
1637        X = np.arange(10) + 10
1638        Y = np.arange(10) + 100
1639        self.check(test_impl, X, Y)
1640        self.assertTrue(countParfors(test_impl,
1641                                    (types.Array(types.float64, 1, 'C'),
1642                                     types.Array(types.float64, 1, 'C'))) == 1)
1643
1644class TestParforsLeaks(MemoryLeakMixin, TestParforsBase):
1645    def check(self, pyfunc, *args, **kwargs):
1646        cfunc, cpfunc = self.compile_all(pyfunc, *args)
1647        self.check_parfors_vs_others(pyfunc, cfunc, cpfunc, *args, **kwargs)
1648
1649    @skip_parfors_unsupported
1650    def test_reduction(self):
1651        # issue4299
1652        @njit(parallel=True)
1653        def test_impl(arr):
1654            return arr.sum()
1655
1656        arr = np.arange(10).astype(np.float64)
1657        self.check(test_impl, arr)
1658
1659    @skip_parfors_unsupported
1660    def test_multiple_reduction_vars(self):
1661        @njit(parallel=True)
1662        def test_impl(arr):
1663            a = 0.
1664            b = 1.
1665            for i in prange(arr.size):
1666                a += arr[i]
1667                b += 1. / (arr[i] + 1)
1668            return a * b
1669        arr = np.arange(10).astype(np.float64)
1670        self.check(test_impl, arr)
1671
1672
1673class TestPrangeBase(TestParforsBase):
1674
1675    def __init__(self, *args):
1676        TestParforsBase.__init__(self, *args)
1677
1678    def generate_prange_func(self, pyfunc, patch_instance):
1679        """
1680        This function does the actual code augmentation to enable the explicit
1681        testing of `prange` calls in place of `range`.
1682        """
1683        pyfunc_code = pyfunc.__code__
1684
1685        prange_names = list(pyfunc_code.co_names)
1686
1687        if patch_instance is None:
1688            # patch all instances, cheat by just switching
1689            # range for prange
1690            assert 'range' in pyfunc_code.co_names
1691            prange_names = tuple([x if x != 'range' else 'prange'
1692                                  for x in pyfunc_code.co_names])
1693            new_code = bytes(pyfunc_code.co_code)
1694        else:
1695            # patch specified instances...
1696            # find where 'range' is in co_names
1697            range_idx = pyfunc_code.co_names.index('range')
1698            range_locations = []
1699            # look for LOAD_GLOBALs that point to 'range'
1700            for instr in dis.Bytecode(pyfunc_code):
1701                if instr.opname == 'LOAD_GLOBAL':
1702                    if instr.arg == range_idx:
1703                        range_locations.append(instr.offset + 1)
1704            # add in 'prange' ref
1705            prange_names.append('prange')
1706            prange_names = tuple(prange_names)
1707            prange_idx = len(prange_names) - 1
1708            new_code = bytearray(pyfunc_code.co_code)
1709            assert len(patch_instance) <= len(range_locations)
1710            # patch up the new byte code
1711            for i in patch_instance:
1712                idx = range_locations[i]
1713                new_code[idx] = prange_idx
1714            new_code = bytes(new_code)
1715
1716        # create new code parts
1717        co_args = [pyfunc_code.co_argcount]
1718
1719        if utils.PYVERSION >= (3, 8):
1720            co_args.append(pyfunc_code.co_posonlyargcount)
1721        co_args.append(pyfunc_code.co_kwonlyargcount)
1722        co_args.extend([pyfunc_code.co_nlocals,
1723                        pyfunc_code.co_stacksize,
1724                        pyfunc_code.co_flags,
1725                        new_code,
1726                        pyfunc_code.co_consts,
1727                        prange_names,
1728                        pyfunc_code.co_varnames,
1729                        pyfunc_code.co_filename,
1730                        pyfunc_code.co_name,
1731                        pyfunc_code.co_firstlineno,
1732                        pyfunc_code.co_lnotab,
1733                        pyfunc_code.co_freevars,
1734                        pyfunc_code.co_cellvars
1735                        ])
1736
1737        # create code object with prange mutation
1738        prange_code = pytypes.CodeType(*co_args)
1739
1740        # get function
1741        pfunc = pytypes.FunctionType(prange_code, globals())
1742
1743        return pfunc
1744
1745    def prange_tester(self, pyfunc, *args, **kwargs):
1746        """
1747        The `prange` tester
1748        This is a hack. It basically switches out range calls for prange.
1749        It does this by copying the live code object of a function
1750        containing 'range' then copying the .co_names and mutating it so
1751        that 'range' is replaced with 'prange'. It then creates a new code
1752        object containing the mutation and instantiates a function to contain
1753        it. At this point three results are created:
1754        1. The result of calling the original python function.
1755        2. The result of calling a njit compiled version of the original
1756            python function.
1757        3. The result of calling a njit(parallel=True) version of the mutated
1758           function containing `prange`.
1759        The three results are then compared and the `prange` based function's
1760        llvm_ir is inspected to ensure the scheduler code is present.
1761
1762        Arguments:
1763         pyfunc - the python function to test
1764         args - data arguments to pass to the pyfunc under test
1765
1766        Keyword Arguments:
1767         patch_instance - iterable containing which instances of `range` to
1768                          replace. If not present all instance of `range` are
1769                          replaced.
1770         scheduler_type - 'signed', 'unsigned' or None, default is None.
1771                           Supply in cases where the presence of a specific
1772                           scheduler is to be asserted.
1773         check_fastmath - if True then a check will be performed to ensure the
1774                          IR contains instructions labelled with 'fast'
1775         check_fastmath_result - if True then a check will be performed to
1776                                 ensure the result of running with fastmath
1777                                 on matches that of the pyfunc
1778         Remaining kwargs are passed to np.testing.assert_almost_equal
1779
1780
1781        Example:
1782            def foo():
1783                acc = 0
1784                for x in range(5):
1785                    for y in range(10):
1786                        acc +=1
1787                return acc
1788
1789            # calling as
1790            prange_tester(foo)
1791            # will test code equivalent to
1792            # def foo():
1793            #     acc = 0
1794            #     for x in prange(5): # <- changed
1795            #         for y in prange(10): # <- changed
1796            #             acc +=1
1797            #     return acc
1798
1799            # calling as
1800            prange_tester(foo, patch_instance=[1])
1801            # will test code equivalent to
1802            # def foo():
1803            #     acc = 0
1804            #     for x in range(5): # <- outer loop (0) unchanged
1805            #         for y in prange(10): # <- inner loop (1) changed
1806            #             acc +=1
1807            #     return acc
1808
1809        """
1810        patch_instance = kwargs.pop('patch_instance', None)
1811        check_fastmath = kwargs.pop('check_fastmath', False)
1812        check_fastmath_result = kwargs.pop('check_fastmath_result', False)
1813
1814        pfunc = self.generate_prange_func(pyfunc, patch_instance)
1815
1816        # Compile functions
1817        # compile a standard njit of the original function
1818        sig = tuple([numba.typeof(x) for x in args])
1819        cfunc = self.compile_njit(pyfunc, sig)
1820
1821        # compile the prange injected function
1822        with warnings.catch_warnings(record=True) as raised_warnings:
1823            warnings.simplefilter('always')
1824            cpfunc = self.compile_parallel(pfunc, sig)
1825
1826        # if check_fastmath is True then check fast instructions
1827        if check_fastmath:
1828            self.assert_fastmath(pfunc, sig)
1829
1830        # if check_fastmath_result is True then compile a function
1831        # so that the parfors checker can assert the result is ok.
1832        if check_fastmath_result:
1833            fastcpfunc = self.compile_parallel_fastmath(pfunc, sig)
1834            kwargs = dict({'fastmath_pcres': fastcpfunc}, **kwargs)
1835
1836        self.check_parfors_vs_others(pyfunc, cfunc, cpfunc, *args, **kwargs)
1837        return raised_warnings
1838
1839
1840class TestPrange(TestPrangeBase):
1841    """ Tests Prange """
1842
1843    @skip_parfors_unsupported
1844    def test_prange01(self):
1845        def test_impl():
1846            n = 4
1847            A = np.zeros(n)
1848            for i in range(n):
1849                A[i] = 2.0 * i
1850            return A
1851        self.prange_tester(test_impl, scheduler_type='unsigned',
1852                           check_fastmath=True)
1853
1854    @skip_parfors_unsupported
1855    def test_prange02(self):
1856        def test_impl():
1857            n = 4
1858            A = np.zeros(n - 1)
1859            for i in range(1, n):
1860                A[i - 1] = 2.0 * i
1861            return A
1862        self.prange_tester(test_impl, scheduler_type='unsigned',
1863                           check_fastmath=True)
1864
1865    @skip_parfors_unsupported
1866    def test_prange03(self):
1867        def test_impl():
1868            s = 10
1869            for i in range(10):
1870                s += 2
1871            return s
1872        self.prange_tester(test_impl, scheduler_type='unsigned',
1873                           check_fastmath=True)
1874
1875    @skip_parfors_unsupported
1876    def test_prange03mul(self):
1877        def test_impl():
1878            s = 3
1879            for i in range(10):
1880                s *= 2
1881            return s
1882        self.prange_tester(test_impl, scheduler_type='unsigned',
1883                           check_fastmath=True)
1884
1885    @skip_parfors_unsupported
1886    def test_prange03sub(self):
1887        def test_impl():
1888            s = 100
1889            for i in range(10):
1890                s -= 2
1891            return s
1892        self.prange_tester(test_impl, scheduler_type='unsigned',
1893                           check_fastmath=True)
1894
1895    @skip_parfors_unsupported
1896    def test_prange03div(self):
1897        def test_impl():
1898            s = 10
1899            for i in range(10):
1900                s /= 2
1901            return s
1902        self.prange_tester(test_impl, scheduler_type='unsigned',
1903                           check_fastmath=True)
1904
1905    @skip_parfors_unsupported
1906    def test_prange04(self):
1907        def test_impl():
1908            a = 2
1909            b = 3
1910            A = np.empty(4)
1911            for i in range(4):
1912                if i == a:
1913                    A[i] = b
1914                else:
1915                    A[i] = 0
1916            return A
1917        self.prange_tester(test_impl, scheduler_type='unsigned',
1918                           check_fastmath=True)
1919
1920    @skip_parfors_unsupported
1921    def test_prange05(self):
1922        def test_impl():
1923            n = 4
1924            A = np.ones((n), dtype=np.float64)
1925            s = 0
1926            for i in range(1, n - 1, 1):
1927                s += A[i]
1928            return s
1929        self.prange_tester(test_impl, scheduler_type='unsigned',
1930                           check_fastmath=True)
1931
1932    @skip_parfors_unsupported
1933    def test_prange06(self):
1934        def test_impl():
1935            n = 4
1936            A = np.ones((n), dtype=np.float64)
1937            s = 0
1938            for i in range(1, 1, 1):
1939                s += A[i]
1940            return s
1941        self.prange_tester(test_impl, scheduler_type='unsigned',
1942                           check_fastmath=True)
1943
1944    @skip_parfors_unsupported
1945    def test_prange07(self):
1946        def test_impl():
1947            n = 4
1948            A = np.ones((n), dtype=np.float64)
1949            s = 0
1950            for i in range(n, 1):
1951                s += A[i]
1952            return s
1953        self.prange_tester(test_impl, scheduler_type='unsigned',
1954                           check_fastmath=True)
1955
1956    @skip_parfors_unsupported
1957    def test_prange08(self):
1958        def test_impl():
1959            n = 4
1960            A = np.ones((n))
1961            acc = 0
1962            for i in range(len(A)):
1963                for j in range(len(A)):
1964                    acc += A[i]
1965            return acc
1966        self.prange_tester(test_impl, scheduler_type='unsigned',
1967                           check_fastmath=True)
1968
1969    @skip_parfors_unsupported
1970    def test_prange08_1(self):
1971        def test_impl():
1972            n = 4
1973            A = np.ones((n))
1974            acc = 0
1975            for i in range(4):
1976                for j in range(4):
1977                    acc += A[i]
1978            return acc
1979        self.prange_tester(test_impl, scheduler_type='unsigned',
1980                           check_fastmath=True)
1981
1982    @skip_parfors_unsupported
1983    def test_prange09(self):
1984        def test_impl():
1985            n = 4
1986            acc = 0
1987            for i in range(n):
1988                for j in range(n):
1989                    acc += 1
1990            return acc
1991        # patch inner loop to 'prange'
1992        self.prange_tester(test_impl, patch_instance=[1],
1993                           scheduler_type='unsigned',
1994                           check_fastmath=True)
1995
1996    @skip_parfors_unsupported
1997    def test_prange10(self):
1998        def test_impl():
1999            n = 4
2000            acc2 = 0
2001            for j in range(n):
2002                acc1 = 0
2003                for i in range(n):
2004                    acc1 += 1
2005                acc2 += acc1
2006            return acc2
2007        # patch outer loop to 'prange'
2008        self.prange_tester(test_impl, patch_instance=[0],
2009                           scheduler_type='unsigned',
2010                           check_fastmath=True)
2011
2012    @skip_parfors_unsupported
2013    @unittest.skip("list append is not thread-safe yet (#2391, #2408)")
2014    def test_prange11(self):
2015        def test_impl():
2016            n = 4
2017            return [np.sin(j) for j in range(n)]
2018        self.prange_tester(test_impl, scheduler_type='unsigned',
2019                           check_fastmath=True)
2020
2021    @skip_parfors_unsupported
2022    def test_prange12(self):
2023        def test_impl():
2024            acc = 0
2025            n = 4
2026            X = np.ones(n)
2027            for i in range(-len(X)):
2028                acc += X[i]
2029            return acc
2030        self.prange_tester(test_impl, scheduler_type='unsigned',
2031                           check_fastmath=True)
2032
2033    @skip_parfors_unsupported
2034    def test_prange13(self):
2035        def test_impl(n):
2036            acc = 0
2037            for i in range(n):
2038                acc += 1
2039            return acc
2040        self.prange_tester(test_impl, np.int32(4), scheduler_type='unsigned',
2041                           check_fastmath=True)
2042
2043    @skip_parfors_unsupported
2044    def test_prange14(self):
2045        def test_impl(A):
2046            s = 3
2047            for i in range(len(A)):
2048                s += A[i]*2
2049            return s
2050        # this tests reduction detection well since the accumulated variable
2051        # is initialized before the parfor and the value accessed from the array
2052        # is updated before accumulation
2053        self.prange_tester(test_impl, np.random.ranf(4),
2054                           scheduler_type='unsigned',
2055                           check_fastmath=True)
2056
2057    @skip_parfors_unsupported
2058    def test_prange15(self):
2059        # from issue 2587
2060        # test parfor type inference when there is multi-dimensional indexing
2061        def test_impl(N):
2062            acc = 0
2063            for i in range(N):
2064                x = np.ones((1, 1))
2065                acc += x[0, 0]
2066            return acc
2067        self.prange_tester(test_impl, 1024, scheduler_type='unsigned',
2068                           check_fastmath=True)
2069
2070    # Tests for negative ranges
2071    @skip_parfors_unsupported
2072    def test_prange16(self):
2073        def test_impl(N):
2074            acc = 0
2075            for i in range(-N, N):
2076                acc += 2
2077            return acc
2078        self.prange_tester(test_impl, 1024, scheduler_type='signed',
2079                           check_fastmath=True)
2080
2081    @skip_parfors_unsupported
2082    def test_prange17(self):
2083        def test_impl(N):
2084            acc = 0
2085            X = np.ones(N)
2086            for i in range(-N, N):
2087                acc += X[i]
2088            return acc
2089        self.prange_tester(test_impl, 9, scheduler_type='signed',
2090                           check_fastmath=True)
2091
2092    @skip_parfors_unsupported
2093    def test_prange18(self):
2094        def test_impl(N):
2095            acc = 0
2096            X = np.ones(N)
2097            for i in range(-N, 5):
2098                acc += X[i]
2099                for j in range(-4, N):
2100                    acc += X[j]
2101            return acc
2102        self.prange_tester(test_impl, 9, scheduler_type='signed',
2103                           check_fastmath=True)
2104
2105    @skip_parfors_unsupported
2106    def test_prange19(self):
2107        def test_impl(N):
2108            acc = 0
2109            M = N + 4
2110            X = np.ones((N, M))
2111            for i in range(-N, N):
2112                for j in range(-M, M):
2113                    acc += X[i, j]
2114            return acc
2115        self.prange_tester(test_impl, 9, scheduler_type='signed',
2116                           check_fastmath=True)
2117
2118    @skip_parfors_unsupported
2119    def test_prange20(self):
2120        def test_impl(N):
2121            acc = 0
2122            X = np.ones(N)
2123            for i in range(-1, N):
2124                acc += X[i]
2125            return acc
2126        self.prange_tester(test_impl, 9, scheduler_type='signed',
2127                           check_fastmath=True)
2128
2129    @skip_parfors_unsupported
2130    def test_prange21(self):
2131        def test_impl(N):
2132            acc = 0
2133            for i in range(-3, -1):
2134                acc += 3
2135            return acc
2136        self.prange_tester(test_impl, 9, scheduler_type='signed',
2137                           check_fastmath=True)
2138
2139    @skip_parfors_unsupported
2140    def test_prange22(self):
2141        def test_impl():
2142            a = 0
2143            b = 3
2144            A = np.empty(4)
2145            for i in range(-2, 2):
2146                if i == a:
2147                    A[i] = b
2148                elif i < 1:
2149                    A[i] = -1
2150                else:
2151                    A[i] = 7
2152            return A
2153        self.prange_tester(test_impl, scheduler_type='signed',
2154                           check_fastmath=True, check_fastmath_result=True)
2155
2156    @skip_parfors_unsupported
2157    def test_prange23(self):
2158        # test non-contig input
2159        def test_impl(A):
2160            for i in range(len(A)):
2161                A[i] = i
2162            return A
2163        A = np.zeros(32)[::2]
2164        self.prange_tester(test_impl, A, scheduler_type='unsigned',
2165                           check_fastmath=True, check_fastmath_result=True)
2166
2167    @skip_parfors_unsupported
2168    def test_prange24(self):
2169        # test non-contig input, signed range
2170        def test_impl(A):
2171            for i in range(-len(A), 0):
2172                A[i] = i
2173            return A
2174        A = np.zeros(32)[::2]
2175        self.prange_tester(test_impl, A, scheduler_type='signed',
2176                           check_fastmath=True, check_fastmath_result=True)
2177
2178    @skip_parfors_unsupported
2179    def test_prange25(self):
2180        def test_impl(A):
2181            n = len(A)
2182            buf = [np.zeros_like(A) for _ in range(n)]
2183            for i in range(n):
2184                buf[i] = A + i
2185            return buf
2186        A = np.ones((10,))
2187        self.prange_tester(test_impl, A,  patch_instance=[1],
2188                           scheduler_type='unsigned', check_fastmath=True,
2189                           check_fastmath_result=True)
2190
2191        cpfunc = self.compile_parallel(test_impl, (numba.typeof(A),))
2192        diagnostics = cpfunc.metadata['parfor_diagnostics']
2193        hoisted_allocs = diagnostics.hoisted_allocations()
2194        self.assertEqual(len(hoisted_allocs), 0)
2195
2196    # should this work?
2197    @skip_parfors_unsupported
2198    def test_prange26(self):
2199        def test_impl(A):
2200            B = A[::3]
2201            for i in range(len(B)):
2202                B[i] = i
2203            return A
2204        A = np.zeros(32)[::2]
2205        self.prange_tester(test_impl, A, scheduler_type='unsigned',
2206                           check_fastmath=True, check_fastmath_result=True)
2207
2208    @skip_parfors_unsupported
2209    def test_prange27(self):
2210        # issue5597: usedef error in parfor
2211        def test_impl(a, b, c):
2212            for j in range(b[0]-1):
2213                for k in range(2):
2214                    z = np.abs(a[c-1:c+1])
2215            return 0
2216
2217        # patch inner loop to 'prange'
2218        self.prange_tester(test_impl,
2219                           np.arange(20),
2220                           np.asarray([4,4,4,4,4,4,4,4,4,4]),
2221                           0,
2222                           patch_instance=[1],
2223                           scheduler_type='unsigned',
2224                           check_fastmath=True)
2225
2226    @skip_parfors_unsupported
2227    def test_prange_two_instances_same_reduction_var(self):
2228        # issue4922 - multiple uses of same reduction variable
2229        def test_impl(n):
2230            c = 0
2231            for i in range(n):
2232                c += 1
2233                if i > 10:
2234                    c += 1
2235            return c
2236        self.prange_tester(test_impl, 9)
2237
2238    @skip_parfors_unsupported
2239    def test_prange_conflicting_reduction_ops(self):
2240        def test_impl(n):
2241            c = 0
2242            for i in range(n):
2243                c += 1
2244                if i > 10:
2245                    c *= 1
2246            return c
2247
2248        with self.assertRaises(errors.UnsupportedError) as raises:
2249            self.prange_tester(test_impl, 9)
2250        msg = ('Reduction variable c has multiple conflicting reduction '
2251               'operators.')
2252        self.assertIn(msg, str(raises.exception))
2253
2254#    @skip_parfors_unsupported
2255    @disabled_test
2256    def test_check_error_model(self):
2257        def test_impl():
2258            n = 32
2259            A = np.zeros(n)
2260            for i in range(n):
2261                A[i] = 1 / i # div-by-zero when i = 0
2262            return A
2263
2264        with self.assertRaises(ZeroDivisionError) as raises:
2265            test_impl()
2266
2267        # compile parallel functions
2268        pfunc = self.generate_prange_func(test_impl, None)
2269        pcres = self.compile_parallel(pfunc, ())
2270        pfcres = self.compile_parallel_fastmath(pfunc, ())
2271
2272        # should raise
2273        with self.assertRaises(ZeroDivisionError) as raises:
2274            pcres.entry_point()
2275
2276        # should not raise
2277        result = pfcres.entry_point()
2278        self.assertEqual(result[0], np.inf)
2279
2280
2281    @skip_parfors_unsupported
2282    def test_check_alias_analysis(self):
2283        # check alias analysis reports ok
2284        def test_impl(A):
2285            for i in range(len(A)):
2286                B = A[i]
2287                B[:] = 1
2288            return A
2289        A = np.zeros(32).reshape(4, 8)
2290        self.prange_tester(test_impl, A, scheduler_type='unsigned',
2291                           check_fastmath=True, check_fastmath_result=True)
2292        pfunc = self.generate_prange_func(test_impl, None)
2293        sig = tuple([numba.typeof(A)])
2294        cres = self.compile_parallel_fastmath(pfunc, sig)
2295        _ir = self._get_gufunc_ir(cres)
2296        for k, v in _ir.items():
2297            for line in v.splitlines():
2298                # get the fn definition line
2299                if 'define' in line and k in line:
2300                    # there should only be 2x noalias, one on each of the first
2301                    # 2 args (retptr, excinfo).
2302                    # Note: used to be 3x no noalias, but env arg is dropped.
2303                    self.assertEqual(line.count('noalias'), 2)
2304                    break
2305
2306    @skip_parfors_unsupported
2307    def test_prange_raises_invalid_step_size(self):
2308        def test_impl(N):
2309            acc = 0
2310            for i in range(0, N, 2):
2311                acc += 2
2312            return acc
2313
2314        with self.assertRaises(errors.UnsupportedRewriteError) as raises:
2315            self.prange_tester(test_impl, 1024)
2316        msg = 'Only constant step size of 1 is supported for prange'
2317        self.assertIn(msg, str(raises.exception))
2318
2319    @skip_parfors_unsupported
2320    def test_prange_fastmath_check_works(self):
2321        # this function will benefit from `fastmath`, the div will
2322        # get optimised to a multiply by reciprocal and the accumulator
2323        # then becomes an fmadd: A = A + i * 0.5
2324        def test_impl():
2325            n = 128
2326            A = 0
2327            for i in range(n):
2328                A += i / 2.0
2329            return A
2330        self.prange_tester(test_impl, scheduler_type='unsigned',
2331                           check_fastmath=True)
2332        pfunc = self.generate_prange_func(test_impl, None)
2333        cres = self.compile_parallel_fastmath(pfunc, ())
2334        ir = self._get_gufunc_ir(cres)
2335        _id = '%[A-Z_0-9]?(.[0-9]+)+[.]?[i]?'
2336        recipr_str = '\s+%s = fmul fast double %s, 5.000000e-01'
2337        reciprocal_inst = re.compile(recipr_str % (_id, _id))
2338        fadd_inst = re.compile('\s+%s = fadd fast double %s, %s'
2339                               % (_id, _id, _id))
2340        # check there is something like:
2341        #  %.329 = fmul fast double %.325, 5.000000e-01
2342        #  %.337 = fadd fast double %A.07, %.329
2343        for name, kernel in ir.items():
2344            splitted = kernel.splitlines()
2345            for i, x in enumerate(splitted):
2346                if reciprocal_inst.match(x):
2347                    break
2348            self.assertTrue(fadd_inst.match(splitted[i + 1]))
2349
2350    @skip_parfors_unsupported
2351    def test_kde_example(self):
2352        def test_impl(X):
2353            # KDE example
2354            b = 0.5
2355            points = np.array([-1.0, 2.0, 5.0])
2356            N = points.shape[0]
2357            n = X.shape[0]
2358            exps = 0
2359            for i in range(n):
2360                p = X[i]
2361                d = (-(p - points)**2) / (2 * b**2)
2362                m = np.min(d)
2363                exps += m - np.log(b * N) + np.log(np.sum(np.exp(d - m)))
2364            return exps
2365
2366        n = 128
2367        X = np.random.ranf(n)
2368        self.prange_tester(test_impl, X)
2369
2370    @skip_parfors_unsupported
2371    def test_parfor_alias1(self):
2372        def test_impl(n):
2373            b = np.zeros((n, n))
2374            a = b[0]
2375            for j in range(n):
2376                a[j] = j + 1
2377            return b.sum()
2378        self.prange_tester(test_impl, 4)
2379
2380    @skip_parfors_unsupported
2381    def test_parfor_alias2(self):
2382        def test_impl(n):
2383            b = np.zeros((n, n))
2384            for i in range(n):
2385              a = b[i]
2386              for j in range(n):
2387                a[j] = i + j
2388            return b.sum()
2389        self.prange_tester(test_impl, 4)
2390
2391    @skip_parfors_unsupported
2392    def test_parfor_alias3(self):
2393        def test_impl(n):
2394            b = np.zeros((n, n, n))
2395            for i in range(n):
2396              a = b[i]
2397              for j in range(n):
2398                c = a[j]
2399                for k in range(n):
2400                  c[k] = i + j + k
2401            return b.sum()
2402        self.prange_tester(test_impl, 4)
2403
2404    @skip_parfors_unsupported
2405    def test_parfor_race_1(self):
2406        def test_impl(x, y):
2407            for j in range(y):
2408                k = x
2409            return k
2410        raised_warnings = self.prange_tester(test_impl, 10, 20)
2411        warning_obj = raised_warnings[0]
2412        expected_msg = ("Variable k used in parallel loop may be written to "
2413                        "simultaneously by multiple workers and may result "
2414                        "in non-deterministic or unintended results.")
2415        self.assertIn(expected_msg, str(warning_obj.message))
2416
2417    @skip_parfors_unsupported
2418    def test_nested_parfor_push_call_vars(self):
2419        """ issue 3686: if a prange has something inside it that causes
2420            a nested parfor to be generated and both the inner and outer
2421            parfor use the same call variable defined outside the parfors
2422            then ensure that when that call variable is pushed into the
2423            parfor that the call variable isn't duplicated with the same
2424            name resulting in a redundant type lock.
2425        """
2426        def test_impl():
2427            B = 0
2428            f = np.negative
2429            for i in range(1):
2430                this_matters = f(1.)
2431                B += f(np.zeros(1,))[0]
2432            for i in range(2):
2433                this_matters = f(1.)
2434                B += f(np.zeros(1,))[0]
2435
2436            return B
2437        self.prange_tester(test_impl)
2438
2439    @skip_parfors_unsupported
2440    def test_copy_global_for_parfor(self):
2441        """ issue4903: a global is copied next to a parfor so that
2442            it can be inlined into the parfor and thus not have to be
2443            passed to the parfor (i.e., an unsupported function type).
2444            This global needs to be renamed in the block into which
2445            it is copied.
2446        """
2447        def test_impl(zz, tc):
2448            lh = np.zeros(len(tc))
2449            lc = np.zeros(len(tc))
2450            for i in range(1):
2451                nt = tc[i]
2452                for t in range(nt):
2453                    lh += np.exp(zz[i, t])
2454                for t in range(nt):
2455                    lc += np.exp(zz[i, t])
2456            return lh, lc
2457
2458        m = 2
2459        zz = np.ones((m, m, m))
2460        tc = np.ones(m, dtype=np.int_)
2461        self.prange_tester(test_impl, zz, tc, patch_instance=[0])
2462
2463    @skip_parfors_unsupported
2464    def test_multiple_call_getattr_object(self):
2465        def test_impl(n):
2466            B = 0
2467            f = np.negative
2468            for i in range(1):
2469                this_matters = f(1.0)
2470                B += f(n)
2471
2472            return B
2473        self.prange_tester(test_impl, 1.0)
2474
2475    @skip_parfors_unsupported
2476    def test_argument_alias_recarray_field(self):
2477        # Test for issue4007.
2478        def test_impl(n):
2479            for i in range(len(n)):
2480                n.x[i] = 7.0
2481            return n
2482        X1 = np.zeros(10, dtype=[('x', float), ('y', int), ])
2483        X2 = np.zeros(10, dtype=[('x', float), ('y', int), ])
2484        X3 = np.zeros(10, dtype=[('x', float), ('y', int), ])
2485        v1 = X1.view(np.recarray)
2486        v2 = X2.view(np.recarray)
2487        v3 = X3.view(np.recarray)
2488
2489        # Numpy doesn't seem to support almost equal on recarray.
2490        # So, we convert to list and use assertEqual instead.
2491        python_res = list(test_impl(v1))
2492        njit_res = list(njit(test_impl)(v2))
2493        pa_func = njit(test_impl, parallel=True)
2494        pa_res = list(pa_func(v3))
2495        self.assertEqual(python_res, njit_res)
2496        self.assertEqual(python_res, pa_res)
2497
2498    @skip_parfors_unsupported
2499    def test_mutable_list_param(self):
2500        """ issue3699: test that mutable variable to call in loop
2501            is not hoisted.  The call in test_impl forces a manual
2502            check here rather than using prange_tester.
2503        """
2504        @njit
2505        def list_check(X):
2506            """ If the variable X is hoisted in the test_impl prange
2507                then subsequent list_check calls would return increasing
2508                values.
2509            """
2510            ret = X[-1]
2511            a = X[-1] + 1
2512            X.append(a)
2513            return ret
2514        def test_impl(n):
2515            for i in prange(n):
2516                X = [100]
2517                a = list_check(X)
2518            return a
2519        python_res = test_impl(10)
2520        njit_res = njit(test_impl)(10)
2521        pa_func = njit(test_impl, parallel=True)
2522        pa_res = pa_func(10)
2523        self.assertEqual(python_res, njit_res)
2524        self.assertEqual(python_res, pa_res)
2525
2526    @skip_parfors_unsupported
2527    def test_list_comprehension_prange(self):
2528        # issue4569
2529        def test_impl(x):
2530            return np.array([len(x[i]) for i in range(len(x))])
2531        x = [np.array([1,2,3], dtype=int),np.array([1,2], dtype=int)]
2532        self.prange_tester(test_impl, x)
2533
2534    @skip_parfors_unsupported
2535    def test_ssa_false_reduction(self):
2536        # issue5698
2537        # SSA for h creates assignments to h that make it look like a
2538        # reduction variable except that it lacks an associated
2539        # reduction operator.  Test here that h is excluded as a
2540        # reduction variable.
2541        def test_impl(image, a, b):
2542            empty = np.zeros(image.shape)
2543            for i in range(image.shape[0]):
2544                r = image[i][0] / 255.0
2545                if a == 0:
2546                    h = 0
2547                if b == 0:
2548                    h = 0
2549                empty[i] = [h, h, h]
2550            return empty
2551
2552        image = np.zeros((3, 3), dtype=np.int32)
2553        self.prange_tester(test_impl, image, 0, 0)
2554
2555
2556@skip_parfors_unsupported
2557@x86_only
2558class TestParforsVectorizer(TestPrangeBase):
2559
2560    # env mutating test
2561    _numba_parallel_test_ = False
2562
2563    def get_gufunc_asm(self, func, schedule_type, *args, **kwargs):
2564
2565        fastmath = kwargs.pop('fastmath', False)
2566        cpu_name = kwargs.pop('cpu_name', 'skylake-avx512')
2567        assertions = kwargs.pop('assertions', True)
2568        # force LLVM to use zmm registers for vectorization
2569        # https://reviews.llvm.org/D67259
2570        cpu_features = kwargs.pop('cpu_features', '-prefer-256-bit')
2571
2572        env_opts = {'NUMBA_CPU_NAME': cpu_name,
2573                    'NUMBA_CPU_FEATURES': cpu_features,
2574                    }
2575
2576        overrides = []
2577        for k, v in env_opts.items():
2578            overrides.append(override_env_config(k, v))
2579
2580        with overrides[0], overrides[1]:
2581            sig = tuple([numba.typeof(x) for x in args])
2582            pfunc_vectorizable = self.generate_prange_func(func, None)
2583            if fastmath == True:
2584                cres = self.compile_parallel_fastmath(pfunc_vectorizable, sig)
2585            else:
2586                cres = self.compile_parallel(pfunc_vectorizable, sig)
2587
2588            # get the gufunc asm
2589            asm = self._get_gufunc_asm(cres)
2590
2591            if assertions:
2592                schedty = re.compile('call\s+\w+\*\s+@do_scheduling_(\w+)\(')
2593                matches = schedty.findall(cres.library.get_llvm_str())
2594                self.assertGreaterEqual(len(matches), 1) # at least 1 parfor call
2595                self.assertEqual(matches[0], schedule_type)
2596                self.assertTrue(asm != {})
2597
2598            return asm
2599
2600    # this is a common match pattern for something like:
2601    # \n\tvsqrtpd\t-192(%rbx,%rsi,8), %zmm0\n
2602    # to check vsqrtpd operates on zmm
2603    match_vsqrtpd_on_zmm = re.compile('\n\s+vsqrtpd\s+.*zmm.*\n')
2604
2605    @linux_only
2606    def test_vectorizer_fastmath_asm(self):
2607        """ This checks that if fastmath is set and the underlying hardware
2608        is suitable, and the function supplied is amenable to fastmath based
2609        vectorization, that the vectorizer actually runs.
2610        """
2611
2612        # This function will benefit from `fastmath` if run on a suitable
2613        # target. The vectorizer should unwind the loop and generate
2614        # packed dtype=double add and sqrt instructions.
2615        def will_vectorize(A):
2616            n = len(A)
2617            acc = 0
2618            for i in range(n):
2619                acc += np.sqrt(i)
2620            return acc
2621
2622        arg = np.zeros(10)
2623
2624        fast_asm = self.get_gufunc_asm(will_vectorize, 'unsigned', arg,
2625                                       fastmath=True)
2626        slow_asm = self.get_gufunc_asm(will_vectorize, 'unsigned', arg,
2627                                       fastmath=False)
2628
2629        for v in fast_asm.values():
2630            # should unwind and call vector sqrt then vector add
2631            # all on packed doubles using zmm's
2632            self.assertTrue('vaddpd' in v)
2633            self.assertTrue('vsqrtpd' in v)
2634            self.assertTrue('zmm' in v)
2635            # make sure vsqrtpd operates on zmm
2636            self.assertTrue(len(self.match_vsqrtpd_on_zmm.findall(v)) > 1)
2637
2638        for v in slow_asm.values():
2639            # vector variants should not be present
2640            self.assertTrue('vaddpd' not in v)
2641            self.assertTrue('vsqrtpd' not in v)
2642            # check scalar variant is present
2643            self.assertTrue('vsqrtsd' in v)
2644            self.assertTrue('vaddsd' in v)
2645            # check no zmm addressing is present
2646            self.assertTrue('zmm' not in v)
2647
2648    @linux_only
2649    def test_unsigned_refusal_to_vectorize(self):
2650        """ This checks that if fastmath is set and the underlying hardware
2651        is suitable, and the function supplied is amenable to fastmath based
2652        vectorization, that the vectorizer actually runs.
2653        """
2654
2655        def will_not_vectorize(A):
2656            n = len(A)
2657            for i in range(-n, 0):
2658                A[i] = np.sqrt(A[i])
2659            return A
2660
2661        def will_vectorize(A):
2662            n = len(A)
2663            for i in range(n):
2664                A[i] = np.sqrt(A[i])
2665            return A
2666
2667        arg = np.zeros(10)
2668
2669        # Boundschecking breaks vectorization
2670        with override_env_config('NUMBA_BOUNDSCHECK', '0'):
2671            novec_asm = self.get_gufunc_asm(will_not_vectorize, 'signed', arg,
2672                                            fastmath=True)
2673
2674            vec_asm = self.get_gufunc_asm(will_vectorize, 'unsigned', arg,
2675                                          fastmath=True)
2676
2677        for v in novec_asm.values():
2678            # vector variant should not be present
2679            self.assertTrue('vsqrtpd' not in v)
2680            # check scalar variant is present
2681            self.assertTrue('vsqrtsd' in v)
2682            # check no zmm addressing is present
2683            self.assertTrue('zmm' not in v)
2684
2685        for v in vec_asm.values():
2686            # should unwind and call vector sqrt then vector mov
2687            # all on packed doubles using zmm's
2688            self.assertTrue('vsqrtpd' in v)
2689            self.assertTrue('vmovupd' in v)
2690            self.assertTrue('zmm' in v)
2691            # make sure vsqrtpd operates on zmm
2692            self.assertTrue(len(self.match_vsqrtpd_on_zmm.findall(v)) > 1)
2693
2694    @linux_only
2695    # needed as 32bit doesn't have equivalent signed/unsigned instruction generation
2696    # for this function
2697    @skip_parfors_unsupported
2698    def test_signed_vs_unsigned_vec_asm(self):
2699        """ This checks vectorization for signed vs unsigned variants of a
2700        trivial accumulator, the only meaningful difference should be the
2701        presence of signed vs. unsigned unpack instructions (for the
2702        induction var).
2703        """
2704        def signed_variant():
2705            n = 4096
2706            A = 0.
2707            for i in range(-n, 0):
2708                A += i
2709            return A
2710
2711        def unsigned_variant():
2712            n = 4096
2713            A = 0.
2714            for i in range(n):
2715                A += i
2716            return A
2717
2718        # Boundschecking breaks the diff check below because of the pickled exception
2719        with override_env_config('NUMBA_BOUNDSCHECK', '0'):
2720            signed_asm = self.get_gufunc_asm(signed_variant, 'signed',
2721                                             fastmath=True)
2722            unsigned_asm = self.get_gufunc_asm(unsigned_variant, 'unsigned',
2723                                               fastmath=True)
2724
2725        def strip_instrs(asm):
2726            acc = []
2727            for x in asm.splitlines():
2728                spd = x.strip()
2729                # filter out anything that isn't a trivial instruction
2730                # and anything with the gufunc id as it contains an address
2731                if spd != '' and not (spd.startswith('.')
2732                                     or spd.startswith('_')
2733                                     or spd.startswith('"')
2734                                     or '__numba_parfor_gufunc' in spd):
2735                        acc.append(re.sub('[\t]', '', spd))
2736            return acc
2737
2738        for k, v in signed_asm.items():
2739            signed_instr = strip_instrs(v)
2740            break
2741
2742        for k, v in unsigned_asm.items():
2743            unsigned_instr = strip_instrs(v)
2744            break
2745
2746        from difflib import SequenceMatcher as sm
2747        # make sure that the only difference in instruction (if there is a
2748        # difference) is the char 'u'. For example:
2749        # vcvtsi2sdq vs. vcvtusi2sdq
2750        self.assertEqual(len(signed_instr), len(unsigned_instr))
2751        for a, b in zip(signed_instr, unsigned_instr):
2752            if a == b:
2753                continue
2754            else:
2755                s = sm(lambda x: x == '\t', a, b)
2756                ops = s.get_opcodes()
2757                for op in ops:
2758                    if op[0] == 'insert':
2759                        self.assertEqual(b[op[-2]:op[-1]], 'u')
2760
2761
2762class TestParforsSlice(TestParforsBase):
2763
2764    def check(self, pyfunc, *args, **kwargs):
2765        cfunc, cpfunc = self.compile_all(pyfunc, *args)
2766        self.check_parfors_vs_others(pyfunc, cfunc, cpfunc, *args, **kwargs)
2767
2768    @skip_parfors_unsupported
2769    def test_parfor_slice1(self):
2770        def test_impl(a):
2771            (n,) = a.shape
2772            b = a[0:n-2] + a[1:n-1]
2773            return b
2774
2775        self.check(test_impl, np.ones(10))
2776
2777    @skip_parfors_unsupported
2778    def test_parfor_slice2(self):
2779        def test_impl(a, m):
2780            (n,) = a.shape
2781            b = a[0:n-2] + a[1:m]
2782            return b
2783
2784        # runtime assertion should succeed
2785        self.check(test_impl, np.ones(10), 9)
2786        # next we expect failure
2787        with self.assertRaises(AssertionError) as raises:
2788            njit(parallel=True)(test_impl)(np.ones(10),10)
2789        self.assertIn("do not match", str(raises.exception))
2790
2791    @skip_parfors_unsupported
2792    def test_parfor_slice3(self):
2793        def test_impl(a):
2794            (m,n) = a.shape
2795            b = a[0:m-1,0:n-1] + a[1:m,1:n]
2796            return b
2797
2798        self.check(test_impl, np.ones((4,3)))
2799
2800    @skip_parfors_unsupported
2801    def test_parfor_slice4(self):
2802        def test_impl(a):
2803            (m,n) = a.shape
2804            b = a[:,0:n-1] + a[:,1:n]
2805            return b
2806
2807        self.check(test_impl, np.ones((4,3)))
2808
2809    @skip_parfors_unsupported
2810    def test_parfor_slice5(self):
2811        def test_impl(a):
2812            (m,n) = a.shape
2813            b = a[0:m-1,:] + a[1:m,:]
2814            return b
2815
2816        self.check(test_impl, np.ones((4,3)))
2817
2818    @skip_parfors_unsupported
2819    def test_parfor_slice6(self):
2820        def test_impl(a):
2821            b = a.transpose()
2822            c = a[1,:] + b[:,1]
2823            return c
2824
2825        self.check(test_impl, np.ones((4,3)))
2826
2827    @skip_parfors_unsupported
2828    def test_parfor_slice7(self):
2829        def test_impl(a):
2830            b = a.transpose()
2831            c = a[1,:] + b[1,:]
2832            return c
2833
2834        # runtime check should succeed
2835        self.check(test_impl, np.ones((3,3)))
2836        # next we expect failure
2837        with self.assertRaises(AssertionError) as raises:
2838            njit(parallel=True)(test_impl)(np.ones((3,4)))
2839        self.assertIn("do not match", str(raises.exception))
2840
2841#    @skip_parfors_unsupported
2842    @disabled_test
2843    def test_parfor_slice8(self):
2844        def test_impl(a):
2845            (m,n) = a.shape
2846            b = a.transpose()
2847            b[1:m,1:n] = a[1:m,1:n]
2848            return b
2849
2850        self.check(test_impl, np.arange(9).reshape((3,3)))
2851
2852#    @skip_parfors_unsupported
2853    @disabled_test
2854    def test_parfor_slice9(self):
2855        def test_impl(a):
2856            (m,n) = a.shape
2857            b = a.transpose()
2858            b[1:n,1:m] = a[:,1:m]
2859            return b
2860
2861        self.check(test_impl, np.arange(12).reshape((3,4)))
2862
2863#    @skip_parfors_unsupported
2864    @disabled_test
2865    def test_parfor_slice10(self):
2866        def test_impl(a):
2867            (m,n) = a.shape
2868            b = a.transpose()
2869            b[2,1:m] = a[2,1:m]
2870            return b
2871
2872        self.check(test_impl, np.arange(9).reshape((3,3)))
2873
2874    @skip_parfors_unsupported
2875    def test_parfor_slice11(self):
2876        def test_impl(a):
2877            (m,n,l) = a.shape
2878            b = a.copy()
2879            b[:,1,1:l] = a[:,2,1:l]
2880            return b
2881
2882        self.check(test_impl, np.arange(27).reshape((3,3,3)))
2883
2884    @skip_parfors_unsupported
2885    def test_parfor_slice12(self):
2886        def test_impl(a):
2887            (m,n) = a.shape
2888            b = a.copy()
2889            b[1,1:-1] = a[0,:-2]
2890            return b
2891
2892        self.check(test_impl, np.arange(12).reshape((3,4)))
2893
2894    @skip_parfors_unsupported
2895    def test_parfor_slice13(self):
2896        def test_impl(a):
2897            (m,n) = a.shape
2898            b = a.copy()
2899            c = -1
2900            b[1,1:c] = a[0,-n:c-1]
2901            return b
2902
2903        self.check(test_impl, np.arange(12).reshape((3,4)))
2904
2905    @skip_parfors_unsupported
2906    def test_parfor_slice14(self):
2907        def test_impl(a):
2908            (m,n) = a.shape
2909            b = a.copy()
2910            b[1,:-1] = a[0,-3:4]
2911            return b
2912
2913        self.check(test_impl, np.arange(12).reshape((3,4)))
2914
2915    @skip_parfors_unsupported
2916    def test_parfor_slice15(self):
2917        def test_impl(a):
2918            (m,n) = a.shape
2919            b = a.copy()
2920            b[1,-(n-1):] = a[0,-3:4]
2921            return b
2922
2923        self.check(test_impl, np.arange(12).reshape((3,4)))
2924
2925
2926    @disabled_test
2927    def test_parfor_slice16(self):
2928        """ This test is disabled because if n is larger than the array size
2929            then n and n-1 will both be the end of the array and thus the
2930            slices will in fact be of different sizes and unable to fuse.
2931        """
2932        def test_impl(a, b, n):
2933            assert(a.shape == b.shape)
2934            a[1:n] = 10
2935            b[0:(n-1)] = 10
2936            return a * b
2937
2938        self.check(test_impl, np.ones(10), np.zeros(10), 8)
2939        args = (numba.float64[:], numba.float64[:], numba.int64)
2940        self.assertEqual(countParfors(test_impl, args), 2)
2941
2942    @skip_parfors_unsupported
2943    def test_parfor_slice17(self):
2944        def test_impl(m, A):
2945            B = np.zeros(m)
2946            n = len(A)
2947            B[-n:] = A
2948            return B
2949
2950        self.check(test_impl, 10, np.ones(10))
2951
2952    @skip_parfors_unsupported
2953    def test_parfor_slice18(self):
2954        # issue 3534
2955        def test_impl():
2956            a = np.zeros(10)
2957            a[1:8] = np.arange(0, 7)
2958            y = a[3]
2959            return y
2960
2961        self.check(test_impl)
2962
2963    @skip_parfors_unsupported
2964    def test_parfor_slice19(self):
2965        # issues #3561 and #3554, empty slice binop
2966        def test_impl(X):
2967            X[:0] += 1
2968            return X
2969
2970        self.check(test_impl, np.ones(10))
2971
2972    @skip_parfors_unsupported
2973    def test_parfor_slice20(self):
2974        # issue #4075, slice size
2975        def test_impl():
2976            a = np.ones(10)
2977            c = a[1:]
2978            s = len(c)
2979            return s
2980
2981        self.check(test_impl, check_scheduling=False)
2982
2983    @skip_parfors_unsupported
2984    def test_parfor_slice21(self):
2985        def test_impl(x1, x2):
2986            x1 = x1.reshape(x1.size, 1)
2987            x2 = x2.reshape(x2.size, 1)
2988            return x1 >= x2[:-1, :]
2989
2990        x1 = np.random.rand(5)
2991        x2 = np.random.rand(6)
2992        self.check(test_impl, x1, x2)
2993
2994    @skip_parfors_unsupported
2995    def test_parfor_slice22(self):
2996        def test_impl(x1, x2):
2997            b = np.zeros((10,))
2998            for i in prange(1):
2999                b += x1[:, x2]
3000            return b
3001
3002        x1 = np.zeros((10,7))
3003        x2 = np.array(4)
3004        self.check(test_impl, x1, x2)
3005
3006    @skip_parfors_unsupported
3007    def test_parfor_slice23(self):
3008        # issue #4630
3009        def test_impl(x):
3010            x[:0] = 2
3011            return x
3012
3013        self.check(test_impl, np.ones(10))
3014
3015    @skip_parfors_unsupported
3016    def test_parfor_slice24(self):
3017        def test_impl(m, A, n):
3018            B = np.zeros(m)
3019            C = B[n:]
3020            C = A[:len(C)]
3021            return B
3022
3023        for i in range(-15, 15):
3024            self.check(test_impl, 10, np.ones(10), i)
3025
3026    @skip_parfors_unsupported
3027    def test_parfor_slice25(self):
3028        def test_impl(m, A, n):
3029            B = np.zeros(m)
3030            C = B[:n]
3031            C = A[:len(C)]
3032            return B
3033
3034        for i in range(-15, 15):
3035            self.check(test_impl, 10, np.ones(10), i)
3036
3037    @skip_parfors_unsupported
3038    def test_parfor_slice26(self):
3039        def test_impl(a):
3040            (n,) = a.shape
3041            b = a.copy()
3042            b[-(n-1):] = a[-3:4]
3043            return b
3044
3045        self.check(test_impl, np.arange(4))
3046
3047    @skip_parfors_unsupported
3048    def test_parfor_slice27(self):
3049        # issue5601: tests array analysis of the slice with
3050        # n_valid_vals of unknown size.
3051        def test_impl(a):
3052            n_valid_vals = 0
3053
3054            for i in prange(a.shape[0]):
3055                if a[i] != 0:
3056                    n_valid_vals += 1
3057
3058                if n_valid_vals:
3059                    unused = a[:n_valid_vals]
3060
3061            return 0
3062
3063        self.check(test_impl, np.arange(3))
3064
3065    @skip_parfors_unsupported
3066    def test_issue5942_1(self):
3067        # issue5942: tests statement reordering of
3068        # aliased arguments.
3069        def test_impl(gg, gg_next):
3070            gs = gg.shape
3071            d = gs[0]
3072            for i_gg in prange(d):
3073                gg_next[i_gg, :]  = gg[i_gg, :]
3074                gg_next[i_gg, 0] += 1
3075
3076            return gg_next
3077
3078        d = 4
3079        k = 2
3080
3081        gg      = np.zeros((d, k), dtype = np.int32)
3082        gg_next = np.zeros((d, k), dtype = np.int32)
3083        self.check(test_impl, gg, gg_next)
3084
3085    @skip_parfors_unsupported
3086    def test_issue5942_2(self):
3087        # issue5942: tests statement reordering
3088        def test_impl(d, k):
3089            gg      = np.zeros((d, k), dtype = np.int32)
3090            gg_next = np.zeros((d, k), dtype = np.int32)
3091
3092            for i_gg in prange(d):
3093                for n in range(k):
3094                    gg[i_gg, n] = i_gg
3095                gg_next[i_gg, :]  = gg[i_gg, :]
3096                gg_next[i_gg, 0] += 1
3097
3098            return gg_next
3099
3100        d = 4
3101        k = 2
3102
3103        self.check(test_impl, d, k)
3104
3105    @skip_parfors_unsupported
3106    @skip_unless_scipy
3107    def test_issue6102(self):
3108        # The problem is originally observed on Python3.8 because of the
3109        # changes in how loops are represented in 3.8 bytecode.
3110        @njit(parallel=True)
3111        def f(r):
3112            for ir in prange(r.shape[0]):
3113                dist = np.inf
3114                tr = np.array([0, 0, 0], dtype=np.float32)
3115                for i in [1, 0, -1]:
3116                    dist_t = np.linalg.norm(r[ir, :] + i)
3117                    if dist_t < dist:
3118                        dist = dist_t
3119                        tr = np.array([i, i, i], dtype=np.float32)
3120                r[ir, :] += tr
3121            return r
3122
3123        r = np.array([[0., 0., 0.], [0., 0., 1.]])
3124        self.assertPreciseEqual(f(r), f.py_func(r))
3125
3126
3127class TestParforsOptions(TestParforsBase):
3128
3129    def check(self, pyfunc, *args, **kwargs):
3130        cfunc, cpfunc = self.compile_all(pyfunc, *args)
3131        self.check_parfors_vs_others(pyfunc, cfunc, cpfunc, *args, **kwargs)
3132
3133    @skip_parfors_unsupported
3134    def test_parfor_options(self):
3135        def test_impl(a):
3136            n = a.shape[0]
3137            b = np.ones(n)
3138            c = np.array([ i for i in range(n) ])
3139            b[:n] = a + b * c
3140            for i in prange(n):
3141                c[i] = b[i] * a[i]
3142            return reduce(lambda x,y:x+y, c, 0)
3143
3144        self.check(test_impl, np.ones(10))
3145        args = (numba.float64[:],)
3146        # everything should fuse with default option
3147        self.assertEqual(countParfors(test_impl, args), 1)
3148        # with no fusion
3149        self.assertEqual(countParfors(test_impl, args, fusion=False), 6)
3150        # with no fusion, comprehension
3151        self.assertEqual(countParfors(test_impl, args, fusion=False,
3152                         comprehension=False), 5)
3153        #with no fusion, comprehension, setitem
3154        self.assertEqual(countParfors(test_impl, args, fusion=False,
3155                         comprehension=False, setitem=False), 4)
3156         # with no fusion, comprehension, prange
3157        self.assertEqual(countParfors(test_impl, args, fusion=False,
3158                         comprehension=False, setitem=False, prange=False), 3)
3159         # with no fusion, comprehension, prange, reduction
3160        self.assertEqual(countParfors(test_impl, args, fusion=False,
3161                         comprehension=False, setitem=False, prange=False,
3162                         reduction=False), 2)
3163        # with no fusion, comprehension, prange, reduction, numpy
3164        self.assertEqual(countParfors(test_impl, args, fusion=False,
3165                         comprehension=False, setitem=False, prange=False,
3166                         reduction=False, numpy=False), 0)
3167
3168
3169class TestParforsBitMask(TestParforsBase):
3170
3171    def check(self, pyfunc, *args, **kwargs):
3172        cfunc, cpfunc = self.compile_all(pyfunc, *args)
3173        self.check_parfors_vs_others(pyfunc, cfunc, cpfunc, *args, **kwargs)
3174
3175    @skip_parfors_unsupported
3176    def test_parfor_bitmask1(self):
3177        def test_impl(a, n):
3178            b = a > n
3179            a[b] = 0
3180            return a
3181
3182        self.check(test_impl, np.arange(10), 5)
3183
3184    @skip_parfors_unsupported
3185    def test_parfor_bitmask2(self):
3186        def test_impl(a, b):
3187            a[b] = 0
3188            return a
3189
3190        a = np.arange(10)
3191        b = a > 5
3192        self.check(test_impl, a, b)
3193
3194    @skip_parfors_unsupported
3195    def test_parfor_bitmask3(self):
3196        def test_impl(a, b):
3197            a[b] = a[b]
3198            return a
3199
3200        a = np.arange(10)
3201        b = a > 5
3202        self.check(test_impl, a, b)
3203
3204    @skip_parfors_unsupported
3205    def test_parfor_bitmask4(self):
3206        def test_impl(a, b):
3207            a[b] = (2 * a)[b]
3208            return a
3209
3210        a = np.arange(10)
3211        b = a > 5
3212        self.check(test_impl, a, b)
3213
3214    @skip_parfors_unsupported
3215    def test_parfor_bitmask5(self):
3216        def test_impl(a, b):
3217            a[b] = a[b] * a[b]
3218            return a
3219
3220        a = np.arange(10)
3221        b = a > 5
3222        self.check(test_impl, a, b)
3223
3224    @skip_parfors_unsupported
3225    def test_parfor_bitmask6(self):
3226        def test_impl(a, b, c):
3227            a[b] = c
3228            return a
3229
3230        a = np.arange(10)
3231        b = a > 5
3232        c = np.zeros(sum(b))
3233
3234        # expect failure due to lack of parallelism
3235        with self.assertRaises(AssertionError) as raises:
3236            self.check(test_impl, a, b, c)
3237        self.assertIn("\'@do_scheduling\' not found", str(raises.exception))
3238
3239class TestParforsMisc(TestParforsBase):
3240    """
3241    Tests miscellaneous parts of ParallelAccelerator use.
3242    """
3243    _numba_parallel_test_ = False
3244
3245    def check(self, pyfunc, *args, **kwargs):
3246        cfunc, cpfunc = self.compile_all(pyfunc, *args)
3247        self.check_parfors_vs_others(pyfunc, cfunc, cpfunc, *args, **kwargs)
3248
3249    @skip_parfors_unsupported
3250    def test_no_warn_if_cache_set(self):
3251
3252        def pyfunc():
3253            arr = np.ones(100)
3254            for i in prange(arr.size):
3255                arr[i] += i
3256            return arr
3257
3258        cfunc = njit(parallel=True, cache=True)(pyfunc)
3259
3260        with warnings.catch_warnings(record=True) as raised_warnings:
3261            warnings.simplefilter('always')
3262            cfunc()
3263
3264        self.assertEqual(len(raised_warnings), 0)
3265
3266        # Make sure the dynamic globals flag is set
3267        has_dynamic_globals = [cres.library.has_dynamic_globals
3268                               for cres in cfunc.overloads.values()]
3269        self.assertEqual(has_dynamic_globals, [False])
3270
3271    @skip_parfors_unsupported
3272    def test_statement_reordering_respects_aliasing(self):
3273        def impl():
3274            a = np.zeros(10)
3275            a[1:8] = np.arange(0, 7)
3276            print('a[3]:', a[3])
3277            print('a[3]:', a[3])
3278            return a
3279
3280        cres = self.compile_parallel(impl, ())
3281        with captured_stdout() as stdout:
3282            cres.entry_point()
3283        for line in stdout.getvalue().splitlines():
3284            self.assertEqual('a[3]: 2.0', line)
3285
3286    @skip_parfors_unsupported
3287    def test_parfor_ufunc_typing(self):
3288        def test_impl(A):
3289            return np.isinf(A)
3290
3291        A = np.array([np.inf, 0.0])
3292        cfunc = njit(parallel=True)(test_impl)
3293        # save global state
3294        old_seq_flag = numba.parfors.parfor.sequential_parfor_lowering
3295        try:
3296            numba.parfors.parfor.sequential_parfor_lowering = True
3297            np.testing.assert_array_equal(test_impl(A), cfunc(A))
3298        finally:
3299            # recover global state
3300            numba.parfors.parfor.sequential_parfor_lowering = old_seq_flag
3301
3302    @skip_parfors_unsupported
3303    def test_init_block_dce(self):
3304        # issue4690
3305        def test_impl():
3306            res = 0
3307            arr = [1,2,3,4,5]
3308            numba.parfors.parfor.init_prange()
3309            dummy = arr
3310            for i in numba.prange(5):
3311                res += arr[i]
3312            return res + dummy[2]
3313
3314        self.assertTrue(get_init_block_size(test_impl, ()) == 0)
3315
3316    @skip_parfors_unsupported
3317    def test_alias_analysis_for_parfor1(self):
3318        def test_impl():
3319            acc = 0
3320            for _ in range(4):
3321                acc += 1
3322
3323            data = np.zeros((acc,))
3324            return data
3325
3326        self.check(test_impl)
3327
3328    @skip_parfors_unsupported
3329    def test_no_state_change_in_gufunc_lowering_on_error(self):
3330        # tests #5098, if there's an exception arising in gufunc lowering the
3331        # sequential_parfor_lowering global variable should remain as False on
3332        # stack unwind.
3333
3334        @register_pass(mutates_CFG=True, analysis_only=False)
3335        class BreakParfors(AnalysisPass):
3336            _name = "break_parfors"
3337
3338            def __init__(self):
3339                AnalysisPass.__init__(self)
3340
3341            def run_pass(self, state):
3342                for blk in state.func_ir.blocks.values():
3343                    for stmt in blk.body:
3344                        if isinstance(stmt, numba.parfors.parfor.Parfor):
3345                            # races should be a set(), that list is iterable
3346                            # permits it to get through to the
3347                            # _create_gufunc_for_parfor_body routine at which
3348                            # point it needs to be a set so e.g. set.difference
3349                            # can be computed, this therefore creates an error
3350                            # in the right location.
3351                            stmt.races = []
3352                    return True
3353
3354
3355        class BreakParforsCompiler(CompilerBase):
3356
3357            def define_pipelines(self):
3358                pm = DefaultPassBuilder.define_nopython_pipeline(self.state)
3359                pm.add_pass_after(BreakParfors, IRLegalization)
3360                pm.finalize()
3361                return [pm]
3362
3363
3364        @njit(parallel=True, pipeline_class=BreakParforsCompiler)
3365        def foo():
3366            x = 1
3367            for _ in prange(1):
3368                x += 1
3369            return x
3370
3371        # assert default state for global
3372        self.assertFalse(numba.parfors.parfor.sequential_parfor_lowering)
3373
3374        with self.assertRaises(errors.LoweringError) as raises:
3375            foo()
3376
3377        self.assertIn("'list' object has no attribute 'difference'",
3378                      str(raises.exception))
3379
3380        # assert state has not changed
3381        self.assertFalse(numba.parfors.parfor.sequential_parfor_lowering)
3382
3383    @skip_parfors_unsupported
3384    def test_issue_5098(self):
3385        class DummyType(types.Opaque):
3386            pass
3387
3388        dummy_type = DummyType("my_dummy")
3389        register_model(DummyType)(models.OpaqueModel)
3390
3391        class Dummy(object):
3392            pass
3393
3394        @typeof_impl.register(Dummy)
3395        def typeof_Dummy(val, c):
3396            return dummy_type
3397
3398        @unbox(DummyType)
3399        def unbox_index(typ, obj, c):
3400            return NativeValue(c.context.get_dummy_value())
3401
3402        @overload_method(DummyType, "method1", jit_options={"parallel":True})
3403        def _get_method1(obj, arr, func):
3404            def _foo(obj, arr, func):
3405                def baz(a, f):
3406                    c = a.copy()
3407                    c[np.isinf(a)] = np.nan
3408                    return f(c)
3409
3410                length = len(arr)
3411                output_arr = np.empty(length, dtype=np.float64)
3412                for i in prange(length):
3413                    output_arr[i] = baz(arr[i], func)
3414                for i in prange(length - 1):
3415                    output_arr[i] += baz(arr[i], func)
3416                return output_arr
3417            return _foo
3418
3419        @njit
3420        def bar(v):
3421            return v.mean()
3422
3423        @njit
3424        def test1(d):
3425            return d.method1(np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]), bar)
3426
3427        save_state = numba.parfors.parfor.sequential_parfor_lowering
3428        self.assertFalse(save_state)
3429        try:
3430            test1(Dummy())
3431            self.assertFalse(numba.parfors.parfor.sequential_parfor_lowering)
3432        finally:
3433            # always set the sequential_parfor_lowering state back to the
3434            # original state
3435            numba.parfors.parfor.sequential_parfor_lowering = save_state
3436
3437    @skip_parfors_unsupported
3438    def test_oversized_tuple_as_arg_to_kernel(self):
3439
3440        @njit(parallel=True)
3441        def oversize_tuple():
3442            big_tup = (1,2,3,4)
3443            z = 0
3444            for x in prange(10):
3445                z += big_tup[0]
3446            return z
3447
3448        with override_env_config('NUMBA_PARFOR_MAX_TUPLE_SIZE', '3'):
3449            with self.assertRaises(errors.UnsupportedParforsError) as raises:
3450                oversize_tuple()
3451
3452        errstr = str(raises.exception)
3453        self.assertIn("Use of a tuple", errstr)
3454        self.assertIn("in a parallel region", errstr)
3455
3456    @skip_parfors_unsupported
3457    def test_issue5167(self):
3458
3459        def ndvi_njit(img_nir, img_red):
3460            fillvalue = 0
3461            out_img = np.full(img_nir.shape, fillvalue, dtype=img_nir.dtype)
3462            dims = img_nir.shape
3463            for y in prange(dims[0]):
3464                for x in prange(dims[1]):
3465                    out_img[y, x] = ((img_nir[y, x] - img_red[y, x]) /
3466                                     (img_nir[y, x] + img_red[y, x]))
3467            return out_img
3468
3469        tile_shape = (4, 4)
3470        array1 = np.random.uniform(low=1.0, high=10000.0, size=tile_shape)
3471        array2 = np.random.uniform(low=1.0, high=10000.0, size=tile_shape)
3472        self.check(ndvi_njit, array1, array2)
3473
3474    @skip_parfors_unsupported
3475    def test_issue5065(self):
3476
3477        def reproducer(a, dist, dist_args):
3478            result = np.zeros((a.shape[0], a.shape[0]), dtype=np.float32)
3479            for i in prange(a.shape[0]):
3480                for j in range(i + 1, a.shape[0]):
3481                    d = dist(a[i], a[j], *dist_args)
3482                    result[i, j] = d
3483                    result[j, i] = d
3484            return result
3485
3486        @njit
3487        def euclidean(x, y):
3488            result = 0.0
3489            for i in range(x.shape[0]):
3490                result += (x[i] - y[i]) ** 2
3491            return np.sqrt(result)
3492
3493        a = np.random.random(size=(5, 2))
3494
3495        got = njit(parallel=True)(reproducer)(a.copy(), euclidean,())
3496        expected = reproducer(a.copy(), euclidean,())
3497
3498        np.testing.assert_allclose(got, expected)
3499
3500    @skip_parfors_unsupported
3501    def test_issue5001(self):
3502
3503        def test_numba_parallel(myarray):
3504            result = [0] * len(myarray)
3505            for i in prange(len(myarray)):
3506                result[i] = len(myarray[i])
3507            return result
3508
3509        myarray = (np.empty(100),np.empty(50))
3510        self.check(test_numba_parallel, myarray)
3511
3512    @skip_parfors_unsupported
3513    def test_issue3169(self):
3514
3515        @njit
3516        def foo(grids):
3517            pass
3518
3519        @njit(parallel=True)
3520        def bar(grids):
3521            for x in prange(1):
3522                foo(grids)
3523
3524        # returns nothing, just check it compiles
3525        bar(([1],) * 2)
3526
3527    @disabled_test
3528    def test_issue4846(self):
3529
3530        mytype = namedtuple("mytype", ("a", "b"))
3531
3532        def outer(mydata):
3533            for k in prange(3):
3534                inner(k, mydata)
3535            return mydata.a
3536
3537        @njit(nogil=True)
3538        def inner(k, mydata):
3539            f = (k, mydata.a)
3540            g = (k, mydata.b)
3541
3542        mydata = mytype(a="a", b="b")
3543
3544        self.check(outer, mydata)
3545
3546    @skip_parfors_unsupported
3547    def test_issue3748(self):
3548
3549        def test1b():
3550            x = (1, 2, 3, 4, 5)
3551            a = 0
3552            for i in prange(len(x)):
3553                a += x[i]
3554            return a
3555
3556        self.check(test1b,)
3557
3558    @skip_parfors_unsupported
3559    def test_issue5277(self):
3560
3561        def parallel_test(size, arr):
3562            for x in prange(size[0]):
3563                for y in prange(size[1]):
3564                    arr[y][x] = x * 4.5 + y
3565            return arr
3566
3567        size = (10, 10)
3568        arr = np.zeros(size, dtype=int)
3569
3570        self.check(parallel_test, size, arr)
3571
3572    @skip_parfors_unsupported
3573    def test_issue5570_ssa_races(self):
3574        @njit(parallel=True)
3575        def foo(src, method, out):
3576            for i in prange(1):
3577                for j in range(1):
3578                    out[i, j] = 1
3579            if method:
3580                out += 1
3581            return out
3582
3583        src = np.zeros((5,5))
3584        method = 57
3585        out = np.zeros((2, 2))
3586
3587        self.assertPreciseEqual(
3588            foo(src, method, out),
3589            foo.py_func(src, method, out)
3590        )
3591
3592    @skip_parfors_unsupported
3593    def test_issue6095_numpy_max(self):
3594        @njit(parallel=True)
3595        def find_maxima_3D_jit(args):
3596            package = args
3597            for index in range(0, 10):
3598                z_stack = package[index, :, :]
3599            return np.max(z_stack)
3600
3601        np.random.seed(0)
3602        args = np.random.random((10, 10, 10))
3603        self.assertPreciseEqual(
3604            find_maxima_3D_jit(args),
3605            find_maxima_3D_jit.py_func(args),
3606        )
3607
3608
3609@skip_parfors_unsupported
3610class TestParforsDiagnostics(TestParforsBase):
3611
3612    def check(self, pyfunc, *args, **kwargs):
3613        cfunc, cpfunc = self.compile_all(pyfunc, *args)
3614        self.check_parfors_vs_others(pyfunc, cfunc, cpfunc, *args, **kwargs)
3615
3616    def assert_fusion_equivalence(self, got, expected):
3617        a = self._fusion_equivalent(got)
3618        b = self._fusion_equivalent(expected)
3619        self.assertEqual(a, b)
3620
3621    def _fusion_equivalent(self, thing):
3622        # parfors indexes the Parfors class instance id's from wherever the
3623        # internal state happens to be. To assert fusion equivalence we just
3624        # check that the relative difference between fusion adjacency lists
3625        # is the same. For example:
3626        # {3: [2, 1]} is the same as {13: [12, 11]}
3627        # this function strips the indexing etc out returning something suitable
3628        # for checking equivalence
3629        new = defaultdict(list)
3630        min_key = min(thing.keys())
3631        for k in sorted(thing.keys()):
3632            new[k - min_key] = [x - min_key for x in thing[k]]
3633        return new
3634
3635    def assert_diagnostics(self, diagnostics, parfors_count=None,
3636                           fusion_info=None, nested_fusion_info=None,
3637                           replaced_fns=None, hoisted_allocations=None):
3638        if parfors_count is not None:
3639            self.assertEqual(parfors_count, diagnostics.count_parfors())
3640        if fusion_info is not None:
3641            self.assert_fusion_equivalence(fusion_info, diagnostics.fusion_info)
3642        if nested_fusion_info is not None:
3643            self.assert_fusion_equivalence(nested_fusion_info,
3644                                           diagnostics.nested_fusion_info)
3645        if replaced_fns is not None:
3646            repl = diagnostics.replaced_fns.values()
3647            for x in replaced_fns:
3648                for replaced in repl:
3649                    if replaced[0] == x:
3650                        break
3651                else:
3652                    msg = "Replacement for %s was not found. Had %s" % (x, repl)
3653                    raise AssertionError(msg)
3654
3655        if hoisted_allocations is not None:
3656            hoisted_allocs = diagnostics.hoisted_allocations()
3657            self.assertEqual(hoisted_allocations, len(hoisted_allocs))
3658
3659        # just make sure that the dump() function doesn't have an issue!
3660        with captured_stdout():
3661            for x in range(1, 5):
3662                diagnostics.dump(x)
3663
3664    def test_array_expr(self):
3665        def test_impl():
3666            n = 10
3667            a = np.ones(n)
3668            b = np.zeros(n)
3669            return a + b
3670
3671        self.check(test_impl,)
3672        cpfunc = self.compile_parallel(test_impl, ())
3673        diagnostics = cpfunc.metadata['parfor_diagnostics']
3674        self.assert_diagnostics(diagnostics, parfors_count=1,
3675                                fusion_info = {3: [4, 5]})
3676
3677    def test_prange(self):
3678        def test_impl():
3679            n = 10
3680            a = np.empty(n)
3681            for i in prange(n):
3682                a[i] = i * 10
3683            return a
3684
3685        self.check(test_impl,)
3686        cpfunc = self.compile_parallel(test_impl, ())
3687        diagnostics = cpfunc.metadata['parfor_diagnostics']
3688        self.assert_diagnostics(diagnostics, parfors_count=1)
3689
3690    def test_nested_prange(self):
3691        def test_impl():
3692            n = 10
3693            a = np.empty((n, n))
3694            for i in prange(n):
3695                for j in prange(n):
3696                    a[i, j] = i * 10 + j
3697            return a
3698
3699        self.check(test_impl,)
3700        cpfunc = self.compile_parallel(test_impl, ())
3701        diagnostics = cpfunc.metadata['parfor_diagnostics']
3702        self.assert_diagnostics(diagnostics, parfors_count=2,
3703                                nested_fusion_info={2: [1]})
3704
3705    def test_function_replacement(self):
3706        def test_impl():
3707            n = 10
3708            a = np.ones(n)
3709            b = np.argmin(a)
3710            return b
3711
3712        self.check(test_impl,)
3713        cpfunc = self.compile_parallel(test_impl, ())
3714        diagnostics = cpfunc.metadata['parfor_diagnostics']
3715        self.assert_diagnostics(diagnostics, parfors_count=1,
3716                                fusion_info={2: [3]},
3717                                replaced_fns = [('argmin', 'numpy'),])
3718
3719    def test_reduction(self):
3720        def test_impl():
3721            n = 10
3722            a = np.ones(n + 1) # prevent fusion
3723            acc = 0
3724            for i in prange(n):
3725                acc += a[i]
3726            return acc
3727
3728        self.check(test_impl,)
3729        cpfunc = self.compile_parallel(test_impl, ())
3730        diagnostics = cpfunc.metadata['parfor_diagnostics']
3731        self.assert_diagnostics(diagnostics, parfors_count=2)
3732
3733    def test_setitem(self):
3734        def test_impl():
3735            n = 10
3736            a = np.ones(n)
3737            a[:] = 7
3738            return a
3739
3740        self.check(test_impl,)
3741        cpfunc = self.compile_parallel(test_impl, ())
3742        diagnostics = cpfunc.metadata['parfor_diagnostics']
3743        self.assert_diagnostics(diagnostics, parfors_count=1)
3744
3745    def test_allocation_hoisting(self):
3746        def test_impl():
3747            n = 10
3748            m = 5
3749            acc = 0
3750            for i in prange(n):
3751                temp = np.zeros((m,)) # the np.empty call should get hoisted
3752                for j in range(m):
3753                    temp[j] = i
3754                acc += temp[-1]
3755            return acc
3756
3757        self.check(test_impl,)
3758        cpfunc = self.compile_parallel(test_impl, ())
3759        diagnostics = cpfunc.metadata['parfor_diagnostics']
3760        self.assert_diagnostics(diagnostics, hoisted_allocations=1)
3761
3762
3763if __name__ == "__main__":
3764    unittest.main()
3765