1# 2# Copyright (c) 2017 Intel Corporation 3# SPDX-License-Identifier: BSD-2-Clause 4# 5 6 7from math import sqrt 8import numbers 9import re 10import sys 11import dis 12import platform 13import types as pytypes 14import warnings 15from functools import reduce 16import numpy as np 17from numpy.random import randn 18import operator 19from collections import defaultdict, namedtuple 20 21import numba.parfors.parfor 22from numba import njit, prange, set_num_threads, get_num_threads 23from numba.core import (types, utils, typing, errors, ir, rewrites, 24 typed_passes, inline_closurecall, config, compiler, cpu) 25from numba.extending import (overload_method, register_model, 26 typeof_impl, unbox, NativeValue, models) 27from numba.core.registry import cpu_target 28from numba.core.annotations import type_annotations 29from numba.core.ir_utils import (find_callname, guard, build_definitions, 30 get_definition, is_getitem, is_setitem, 31 index_var_of_get_setitem) 32from numba.np.unsafe.ndarray import empty_inferred as unsafe_empty 33from numba.core.bytecode import ByteCodeIter 34from numba.core.compiler import (compile_isolated, Flags, CompilerBase, 35 DefaultPassBuilder) 36from numba.core.compiler_machinery import register_pass, AnalysisPass 37from numba.core.typed_passes import IRLegalization 38from numba.tests.support import (TestCase, captured_stdout, MemoryLeakMixin, 39 override_env_config, linux_only, tag, 40 skip_parfors_unsupported, _32bit, needs_blas, 41 needs_lapack, disabled_test, skip_unless_scipy) 42import cmath 43import unittest 44 45 46x86_only = unittest.skipIf(platform.machine() not in ('i386', 'x86_64'), 'x86 only test') 47 48_GLOBAL_INT_FOR_TESTING1 = 17 49_GLOBAL_INT_FOR_TESTING2 = 5 50 51TestNamedTuple = namedtuple('TestNamedTuple', ('part0', 'part1')) 52 53class TestParforsBase(TestCase): 54 """ 55 Base class for testing parfors. 56 Provides functions for compilation and three way comparison between 57 python functions, njit'd functions and parfor njit'd functions. 58 """ 59 60 _numba_parallel_test_ = False 61 62 def __init__(self, *args): 63 # flags for njit() 64 self.cflags = Flags() 65 self.cflags.set('nrt') 66 67 # flags for njit(parallel=True) 68 self.pflags = Flags() 69 self.pflags.set('auto_parallel', cpu.ParallelOptions(True)) 70 self.pflags.set('nrt') 71 72 # flags for njit(parallel=True, fastmath=True) 73 self.fast_pflags = Flags() 74 self.fast_pflags.set('auto_parallel', cpu.ParallelOptions(True)) 75 self.fast_pflags.set('nrt') 76 self.fast_pflags.set('fastmath', cpu.FastMathOptions(True)) 77 super(TestParforsBase, self).__init__(*args) 78 79 def _compile_this(self, func, sig, flags): 80 return compile_isolated(func, sig, flags=flags) 81 82 def compile_parallel(self, func, sig): 83 return self._compile_this(func, sig, flags=self.pflags) 84 85 def compile_parallel_fastmath(self, func, sig): 86 return self._compile_this(func, sig, flags=self.fast_pflags) 87 88 def compile_njit(self, func, sig): 89 return self._compile_this(func, sig, flags=self.cflags) 90 91 def compile_all(self, pyfunc, *args, **kwargs): 92 sig = tuple([numba.typeof(x) for x in args]) 93 94 # compile the prange injected function 95 cpfunc = self.compile_parallel(pyfunc, sig) 96 97 # compile a standard njit of the original function 98 cfunc = self.compile_njit(pyfunc, sig) 99 100 return cfunc, cpfunc 101 102 def check_parfors_vs_others(self, pyfunc, cfunc, cpfunc, *args, **kwargs): 103 """ 104 Checks python, njit and parfor impls produce the same result. 105 106 Arguments: 107 pyfunc - the python function to test 108 cfunc - CompilerResult from njit of pyfunc 109 cpfunc - CompilerResult from njit(parallel=True) of pyfunc 110 args - arguments for the function being tested 111 Keyword Arguments: 112 scheduler_type - 'signed', 'unsigned' or None, default is None. 113 Supply in cases where the presence of a specific 114 scheduler is to be asserted. 115 fastmath_pcres - a fastmath parallel compile result, if supplied 116 will be run to make sure the result is correct 117 Remaining kwargs are passed to np.testing.assert_almost_equal 118 """ 119 scheduler_type = kwargs.pop('scheduler_type', None) 120 check_fastmath = kwargs.pop('check_fastmath', None) 121 fastmath_pcres = kwargs.pop('fastmath_pcres', None) 122 check_scheduling = kwargs.pop('check_scheduling', True) 123 124 def copy_args(*args): 125 if not args: 126 return tuple() 127 new_args = [] 128 for x in args: 129 if isinstance(x, np.ndarray): 130 new_args.append(x.copy('k')) 131 elif isinstance(x, np.number): 132 new_args.append(x.copy()) 133 elif isinstance(x, numbers.Number): 134 new_args.append(x) 135 elif isinstance(x, tuple): 136 new_args.append(x) 137 elif isinstance(x, list): 138 new_args.append(x[:]) 139 else: 140 raise ValueError('Unsupported argument type encountered') 141 return tuple(new_args) 142 143 # python result 144 py_expected = pyfunc(*copy_args(*args)) 145 146 # njit result 147 njit_output = cfunc.entry_point(*copy_args(*args)) 148 149 # parfor result 150 parfor_output = cpfunc.entry_point(*copy_args(*args)) 151 152 np.testing.assert_almost_equal(njit_output, py_expected, **kwargs) 153 np.testing.assert_almost_equal(parfor_output, py_expected, **kwargs) 154 155 self.assertEqual(type(njit_output), type(parfor_output)) 156 157 if check_scheduling: 158 self.check_scheduling(cpfunc, scheduler_type) 159 160 # if requested check fastmath variant 161 if fastmath_pcres is not None: 162 parfor_fastmath_output = fastmath_pcres.entry_point(*copy_args(*args)) 163 np.testing.assert_almost_equal(parfor_fastmath_output, py_expected, 164 **kwargs) 165 166 167 def check_scheduling(self, cres, scheduler_type): 168 # make sure parfor set up scheduling 169 scheduler_str = '@do_scheduling' 170 if scheduler_type is not None: 171 if scheduler_type in ['signed', 'unsigned']: 172 scheduler_str += '_' + scheduler_type 173 else: 174 msg = "Unknown scheduler_type specified: %s" 175 raise ValueError(msg % scheduler_type) 176 177 self.assertIn(scheduler_str, cres.library.get_llvm_str()) 178 179 def _filter_mod(self, mod, magicstr, checkstr=None): 180 """ helper function to filter out modules by name""" 181 filt = [x for x in mod if magicstr in x.name] 182 if checkstr is not None: 183 for x in filt: 184 assert checkstr in str(x) 185 return filt 186 187 def _get_gufunc_modules(self, cres, magicstr, checkstr=None): 188 """ gets the gufunc LLVM Modules""" 189 _modules = [x for x in cres.library._codegen._engine._ee._modules] 190 return self._filter_mod(_modules, magicstr, checkstr=checkstr) 191 192 def _get_gufunc_info(self, cres, fn): 193 """ helper for gufunc IR/asm generation""" 194 # get the gufunc modules 195 magicstr = '__numba_parfor_gufunc' 196 gufunc_mods = self._get_gufunc_modules(cres, magicstr) 197 x = dict() 198 for mod in gufunc_mods: 199 x[mod.name] = fn(mod) 200 return x 201 202 def _get_gufunc_ir(self, cres): 203 """ 204 Returns the IR of the gufuncs used as parfor kernels 205 as a dict mapping the gufunc name to its IR. 206 207 Arguments: 208 cres - a CompileResult from `njit(parallel=True, ...)` 209 """ 210 return self._get_gufunc_info(cres, str) 211 212 def _get_gufunc_asm(self, cres): 213 """ 214 Returns the assembly of the gufuncs used as parfor kernels 215 as a dict mapping the gufunc name to its assembly. 216 217 Arguments: 218 cres - a CompileResult from `njit(parallel=True, ...)` 219 """ 220 tm = cres.library._codegen._tm 221 def emit_asm(mod): 222 return str(tm.emit_assembly(mod)) 223 return self._get_gufunc_info(cres, emit_asm) 224 225 def assert_fastmath(self, pyfunc, sig): 226 """ 227 Asserts that the fastmath flag has some effect in that suitable 228 instructions are now labelled as `fast`. Whether LLVM can actually do 229 anything to optimise better now the derestrictions are supplied is 230 another matter! 231 232 Arguments: 233 pyfunc - a function that contains operations with parallel semantics 234 sig - the type signature of pyfunc 235 """ 236 237 cres = self.compile_parallel_fastmath(pyfunc, sig) 238 _ir = self._get_gufunc_ir(cres) 239 240 def _get_fast_instructions(ir): 241 splitted = ir.splitlines() 242 fast_inst = [] 243 for x in splitted: 244 m = re.search(r'\bfast\b', x) # \b for wholeword 245 if m is not None: 246 fast_inst.append(x) 247 return fast_inst 248 249 def _assert_fast(instrs): 250 ops = ('fadd', 'fsub', 'fmul', 'fdiv', 'frem', 'fcmp') 251 for inst in instrs: 252 count = 0 253 for op in ops: 254 match = op + ' fast' 255 if match in inst: 256 count += 1 257 self.assertTrue(count > 0) 258 259 for name, guir in _ir.items(): 260 inst = _get_fast_instructions(guir) 261 _assert_fast(inst) 262 263 264def blackscholes_impl(sptprice, strike, rate, volatility, timev): 265 # blackscholes example 266 logterm = np.log(sptprice / strike) 267 powterm = 0.5 * volatility * volatility 268 den = volatility * np.sqrt(timev) 269 d1 = (((rate + powterm) * timev) + logterm) / den 270 d2 = d1 - den 271 NofXd1 = 0.5 + 0.5 * 2.0 * d1 272 NofXd2 = 0.5 + 0.5 * 2.0 * d2 273 futureValue = strike * np.exp(- rate * timev) 274 c1 = futureValue * NofXd2 275 call = sptprice * NofXd1 - c1 276 put = call - futureValue + sptprice 277 return put 278 279 280def lr_impl(Y, X, w, iterations): 281 # logistic regression example 282 for i in range(iterations): 283 w -= np.dot(((1.0 / (1.0 + np.exp(-Y * np.dot(X, w))) - 1.0) * Y), X) 284 return w 285 286def example_kmeans_test(A, numCenter, numIter, init_centroids): 287 centroids = init_centroids 288 N, D = A.shape 289 290 for l in range(numIter): 291 dist = np.array([[sqrt(np.sum((A[i,:]-centroids[j,:])**2)) 292 for j in range(numCenter)] for i in range(N)]) 293 labels = np.array([dist[i,:].argmin() for i in range(N)]) 294 295 centroids = np.array([[np.sum(A[labels==i, j])/np.sum(labels==i) 296 for j in range(D)] for i in range(numCenter)]) 297 298 return centroids 299 300def get_optimized_numba_ir(test_func, args, **kws): 301 typingctx = typing.Context() 302 targetctx = cpu.CPUContext(typingctx) 303 test_ir = compiler.run_frontend(test_func) 304 if kws: 305 options = cpu.ParallelOptions(kws) 306 else: 307 options = cpu.ParallelOptions(True) 308 309 tp = TestPipeline(typingctx, targetctx, args, test_ir) 310 311 with cpu_target.nested_context(typingctx, targetctx): 312 typingctx.refresh() 313 targetctx.refresh() 314 315 inline_pass = inline_closurecall.InlineClosureCallPass(tp.state.func_ir, 316 options, 317 typed=True) 318 inline_pass.run() 319 320 rewrites.rewrite_registry.apply('before-inference', tp.state) 321 322 tp.state.typemap, tp.state.return_type, tp.state.calltypes = \ 323 typed_passes.type_inference_stage(tp.state.typingctx, tp.state.func_ir, 324 tp.state.args, None) 325 326 type_annotations.TypeAnnotation( 327 func_ir=tp.state.func_ir, 328 typemap=tp.state.typemap, 329 calltypes=tp.state.calltypes, 330 lifted=(), 331 lifted_from=None, 332 args=tp.state.args, 333 return_type=tp.state.return_type, 334 html_output=config.HTML) 335 336 diagnostics = numba.parfors.parfor.ParforDiagnostics() 337 338 preparfor_pass = numba.parfors.parfor.PreParforPass( 339 tp.state.func_ir, tp.state.typemap, tp.state.calltypes, 340 tp.state.typingctx, options, 341 swapped=diagnostics.replaced_fns) 342 preparfor_pass.run() 343 344 rewrites.rewrite_registry.apply('after-inference', tp.state) 345 346 flags = compiler.Flags() 347 parfor_pass = numba.parfors.parfor.ParforPass( 348 tp.state.func_ir, tp.state.typemap, tp.state.calltypes, 349 tp.state.return_type, tp.state.typingctx, options, flags, 350 diagnostics=diagnostics) 351 parfor_pass.run() 352 test_ir._definitions = build_definitions(test_ir.blocks) 353 354 return test_ir, tp 355 356def countParfors(test_func, args, **kws): 357 test_ir, tp = get_optimized_numba_ir(test_func, args, **kws) 358 ret_count = 0 359 360 for label, block in test_ir.blocks.items(): 361 for i, inst in enumerate(block.body): 362 if isinstance(inst, numba.parfors.parfor.Parfor): 363 ret_count += 1 364 365 return ret_count 366 367 368def countArrays(test_func, args, **kws): 369 test_ir, tp = get_optimized_numba_ir(test_func, args, **kws) 370 return _count_arrays_inner(test_ir.blocks, tp.state.typemap) 371 372def get_init_block_size(test_func, args, **kws): 373 test_ir, tp = get_optimized_numba_ir(test_func, args, **kws) 374 blocks = test_ir.blocks 375 376 ret_count = 0 377 378 for label, block in blocks.items(): 379 for i, inst in enumerate(block.body): 380 if isinstance(inst, numba.parfors.parfor.Parfor): 381 ret_count += len(inst.init_block.body) 382 383 return ret_count 384 385def _count_arrays_inner(blocks, typemap): 386 ret_count = 0 387 arr_set = set() 388 389 for label, block in blocks.items(): 390 for i, inst in enumerate(block.body): 391 if isinstance(inst, numba.parfors.parfor.Parfor): 392 parfor_blocks = inst.loop_body.copy() 393 parfor_blocks[0] = inst.init_block 394 ret_count += _count_arrays_inner(parfor_blocks, typemap) 395 if (isinstance(inst, ir.Assign) 396 and isinstance(typemap[inst.target.name], 397 types.ArrayCompatible)): 398 arr_set.add(inst.target.name) 399 400 ret_count += len(arr_set) 401 return ret_count 402 403def countArrayAllocs(test_func, args, **kws): 404 test_ir, tp = get_optimized_numba_ir(test_func, args, **kws) 405 ret_count = 0 406 407 for block in test_ir.blocks.values(): 408 ret_count += _count_array_allocs_inner(test_ir, block) 409 410 return ret_count 411 412def _count_array_allocs_inner(func_ir, block): 413 ret_count = 0 414 for inst in block.body: 415 if isinstance(inst, numba.parfors.parfor.Parfor): 416 ret_count += _count_array_allocs_inner(func_ir, inst.init_block) 417 for b in inst.loop_body.values(): 418 ret_count += _count_array_allocs_inner(func_ir, b) 419 420 if (isinstance(inst, ir.Assign) and isinstance(inst.value, ir.Expr) 421 and inst.value.op == 'call' 422 and (guard(find_callname, func_ir, inst.value) == ('empty', 'numpy') 423 or guard(find_callname, func_ir, inst.value) 424 == ('empty_inferred', 'numba.np.unsafe.ndarray'))): 425 ret_count += 1 426 427 return ret_count 428 429def countNonParforArrayAccesses(test_func, args, **kws): 430 test_ir, tp = get_optimized_numba_ir(test_func, args, **kws) 431 return _count_non_parfor_array_accesses_inner(test_ir, test_ir.blocks, 432 tp.state.typemap) 433 434def _count_non_parfor_array_accesses_inner(f_ir, blocks, typemap, parfor_indices=None): 435 ret_count = 0 436 if parfor_indices is None: 437 parfor_indices = set() 438 439 for label, block in blocks.items(): 440 for stmt in block.body: 441 if isinstance(stmt, numba.parfors.parfor.Parfor): 442 parfor_indices.add(stmt.index_var.name) 443 parfor_blocks = stmt.loop_body.copy() 444 parfor_blocks[0] = stmt.init_block 445 ret_count += _count_non_parfor_array_accesses_inner( 446 f_ir, parfor_blocks, typemap, parfor_indices) 447 448 # getitem 449 if (is_getitem(stmt) and isinstance(typemap[stmt.value.value.name], 450 types.ArrayCompatible) and not _uses_indices( 451 f_ir, index_var_of_get_setitem(stmt), parfor_indices)): 452 ret_count += 1 453 454 # setitem 455 if (is_setitem(stmt) and isinstance(typemap[stmt.target.name], 456 types.ArrayCompatible) and not _uses_indices( 457 f_ir, index_var_of_get_setitem(stmt), parfor_indices)): 458 ret_count += 1 459 460 return ret_count 461 462def _uses_indices(f_ir, index, index_set): 463 if index.name in index_set: 464 return True 465 466 ind_def = guard(get_definition, f_ir, index) 467 if isinstance(ind_def, ir.Expr) and ind_def.op == 'build_tuple': 468 varnames = set(v.name for v in ind_def.items) 469 return len(varnames & index_set) != 0 470 471 return False 472 473 474class TestPipeline(object): 475 def __init__(self, typingctx, targetctx, args, test_ir): 476 self.state = compiler.StateDict() 477 self.state.typingctx = typingctx 478 self.state.targetctx = targetctx 479 self.state.args = args 480 self.state.func_ir = test_ir 481 self.state.typemap = None 482 self.state.return_type = None 483 self.state.calltypes = None 484 485 486class TestParfors(TestParforsBase): 487 488 def __init__(self, *args): 489 TestParforsBase.__init__(self, *args) 490 # these are used in the mass of simple tests 491 m = np.reshape(np.arange(12.), (3, 4)) 492 self.simple_args = [np.arange(3.), np.arange(4.), m, m.T] 493 494 def check(self, pyfunc, *args, **kwargs): 495 cfunc, cpfunc = self.compile_all(pyfunc, *args) 496 self.check_parfors_vs_others(pyfunc, cfunc, cpfunc, *args, **kwargs) 497 498 @skip_parfors_unsupported 499 def test_arraymap(self): 500 def test_impl(a, x, y): 501 return a * x + y 502 503 A = np.linspace(0, 1, 10) 504 X = np.linspace(2, 1, 10) 505 Y = np.linspace(1, 2, 10) 506 507 self.check(test_impl, A, X, Y) 508 509 @skip_parfors_unsupported 510 @needs_blas 511 def test_mvdot(self): 512 def test_impl(a, v): 513 return np.dot(a, v) 514 515 A = np.linspace(0, 1, 20).reshape(2, 10) 516 v = np.linspace(2, 1, 10) 517 518 self.check(test_impl, A, v) 519 520 @skip_parfors_unsupported 521 def test_0d_broadcast(self): 522 def test_impl(): 523 X = np.array(1) 524 Y = np.ones((10, 12)) 525 return np.sum(X + Y) 526 self.check(test_impl) 527 self.assertTrue(countParfors(test_impl, ()) == 1) 528 529 @skip_parfors_unsupported 530 def test_2d_parfor(self): 531 def test_impl(): 532 X = np.ones((10, 12)) 533 Y = np.zeros((10, 12)) 534 return np.sum(X + Y) 535 self.check(test_impl) 536 self.assertTrue(countParfors(test_impl, ()) == 1) 537 538 @skip_parfors_unsupported 539 def test_pi(self): 540 def test_impl(n): 541 x = 2 * np.random.ranf(n) - 1 542 y = 2 * np.random.ranf(n) - 1 543 return 4 * np.sum(x**2 + y**2 < 1) / n 544 545 self.check(test_impl, 100000, decimal=1) 546 self.assertTrue(countParfors(test_impl, (types.int64, )) == 1) 547 self.assertTrue(countArrays(test_impl, (types.intp,)) == 0) 548 549 @skip_parfors_unsupported 550 def test_fuse_argmin_argmax_max_min(self): 551 for op in [np.argmin, np.argmax, np.min, np.max]: 552 def test_impl(n): 553 A = np.ones(n) 554 C = op(A) 555 B = A.sum() 556 return B + C 557 self.check(test_impl, 256) 558 self.assertTrue(countParfors(test_impl, (types.int64, )) == 1) 559 self.assertTrue(countArrays(test_impl, (types.intp,)) == 0) 560 561 @skip_parfors_unsupported 562 def test_blackscholes(self): 563 # blackscholes takes 5 1D float array args 564 args = (numba.float64[:], ) * 5 565 self.assertTrue(countParfors(blackscholes_impl, args) == 1) 566 567 @skip_parfors_unsupported 568 @needs_blas 569 def test_logistic_regression(self): 570 args = (numba.float64[:], numba.float64[:,:], numba.float64[:], 571 numba.int64) 572 self.assertTrue(countParfors(lr_impl, args) == 2) 573 self.assertTrue(countArrayAllocs(lr_impl, args) == 1) 574 575 @skip_parfors_unsupported 576 def test_kmeans(self): 577 np.random.seed(0) 578 N = 1024 579 D = 10 580 centers = 3 581 A = np.random.ranf((N, D)) 582 init_centroids = np.random.ranf((centers, D)) 583 self.check(example_kmeans_test, A, centers, 3, init_centroids, 584 decimal=1) 585 # TODO: count parfors after k-means fusion is working 586 # requires recursive parfor counting 587 arg_typs = (types.Array(types.float64, 2, 'C'), types.intp, types.intp, 588 types.Array(types.float64, 2, 'C')) 589 self.assertTrue( 590 countNonParforArrayAccesses(example_kmeans_test, arg_typs) == 0) 591 592 @unittest.skipIf(not _32bit, "Only impacts 32 bit hardware") 593 @needs_blas 594 def test_unsupported_combination_raises(self): 595 """ 596 This test is in place until issues with the 'parallel' 597 target on 32 bit hardware are fixed. 598 """ 599 with self.assertRaises(errors.UnsupportedParforsError) as raised: 600 @njit(parallel=True) 601 def ddot(a, v): 602 return np.dot(a, v) 603 604 A = np.linspace(0, 1, 20).reshape(2, 10) 605 v = np.linspace(2, 1, 10) 606 ddot(A, v) 607 608 msg = ("The 'parallel' target is not currently supported on 32 bit " 609 "hardware") 610 self.assertIn(msg, str(raised.exception)) 611 612 @skip_parfors_unsupported 613 def test_simple01(self): 614 def test_impl(): 615 return np.ones(()) 616 with self.assertRaises(AssertionError) as raises: 617 self.check(test_impl) 618 self.assertIn("\'@do_scheduling\' not found", str(raises.exception)) 619 620 @skip_parfors_unsupported 621 def test_simple02(self): 622 def test_impl(): 623 return np.ones((1,)) 624 self.check(test_impl) 625 626 @skip_parfors_unsupported 627 def test_simple03(self): 628 def test_impl(): 629 return np.ones((1, 2)) 630 self.check(test_impl) 631 632 @skip_parfors_unsupported 633 def test_simple04(self): 634 def test_impl(): 635 return np.ones(1) 636 self.check(test_impl) 637 638 @skip_parfors_unsupported 639 def test_simple07(self): 640 def test_impl(): 641 return np.ones((1, 2), dtype=np.complex128) 642 self.check(test_impl) 643 644 @skip_parfors_unsupported 645 def test_simple08(self): 646 def test_impl(): 647 return np.ones((1, 2)) + np.ones((1, 2)) 648 self.check(test_impl) 649 650 @skip_parfors_unsupported 651 def test_simple09(self): 652 def test_impl(): 653 return np.ones((1, 1)) 654 self.check(test_impl) 655 656 @skip_parfors_unsupported 657 def test_simple10(self): 658 def test_impl(): 659 return np.ones((0, 0)) 660 self.check(test_impl) 661 662 @skip_parfors_unsupported 663 def test_simple11(self): 664 def test_impl(): 665 return np.ones((10, 10)) + 1. 666 self.check(test_impl) 667 668 @skip_parfors_unsupported 669 def test_simple12(self): 670 def test_impl(): 671 return np.ones((10, 10)) + np.complex128(1.) 672 self.check(test_impl) 673 674 @skip_parfors_unsupported 675 def test_simple13(self): 676 def test_impl(): 677 return np.complex128(1.) 678 with self.assertRaises(AssertionError) as raises: 679 self.check(test_impl) 680 self.assertIn("\'@do_scheduling\' not found", str(raises.exception)) 681 682 @skip_parfors_unsupported 683 def test_simple14(self): 684 def test_impl(): 685 return np.ones((10, 10))[0::20] 686 self.check(test_impl) 687 688 @skip_parfors_unsupported 689 def test_simple15(self): 690 def test_impl(v1, v2, m1, m2): 691 return v1 + v1 692 self.check(test_impl, *self.simple_args) 693 694 @skip_parfors_unsupported 695 def test_simple16(self): 696 def test_impl(v1, v2, m1, m2): 697 return m1 + m1 698 self.check(test_impl, *self.simple_args) 699 700 @skip_parfors_unsupported 701 def test_simple17(self): 702 def test_impl(v1, v2, m1, m2): 703 return m2 + v1 704 self.check(test_impl, *self.simple_args) 705 706 @skip_parfors_unsupported 707 @needs_lapack 708 def test_simple18(self): 709 def test_impl(v1, v2, m1, m2): 710 return m1.T + np.linalg.svd(m2)[1] 711 self.check(test_impl, *self.simple_args) 712 713 @skip_parfors_unsupported 714 @needs_blas 715 def test_simple19(self): 716 def test_impl(v1, v2, m1, m2): 717 return np.dot(m1, v2) 718 self.check(test_impl, *self.simple_args) 719 720 @skip_parfors_unsupported 721 @needs_blas 722 def test_simple20(self): 723 def test_impl(v1, v2, m1, m2): 724 return np.dot(m1, m2) 725 # gemm is left to BLAS 726 with self.assertRaises(AssertionError) as raises: 727 self.check(test_impl, *self.simple_args) 728 self.assertIn("\'@do_scheduling\' not found", str(raises.exception)) 729 730 @skip_parfors_unsupported 731 @needs_blas 732 def test_simple21(self): 733 def test_impl(v1, v2, m1, m2): 734 return np.dot(v1, v1) 735 self.check(test_impl, *self.simple_args) 736 737 @skip_parfors_unsupported 738 def test_simple22(self): 739 def test_impl(v1, v2, m1, m2): 740 return np.sum(v1 + v1) 741 self.check(test_impl, *self.simple_args) 742 743 @skip_parfors_unsupported 744 def test_simple23(self): 745 def test_impl(v1, v2, m1, m2): 746 x = 2 * v1 747 y = 2 * v1 748 return 4 * np.sum(x**2 + y**2 < 1) / 10 749 self.check(test_impl, *self.simple_args) 750 751 @skip_parfors_unsupported 752 def test_simple24(self): 753 def test_impl(): 754 n = 20 755 A = np.ones((n, n)) 756 b = np.arange(n) 757 return np.sum(A[:, b]) 758 self.check(test_impl) 759 760 @disabled_test 761 def test_simple_operator_15(self): 762 """same as corresponding test_simple_<n> case but using operator.add""" 763 def test_impl(v1, v2, m1, m2): 764 return operator.add(v1, v1) 765 766 self.check(test_impl, *self.simple_args) 767 768 @disabled_test 769 def test_simple_operator_16(self): 770 def test_impl(v1, v2, m1, m2): 771 return operator.add(m1, m1) 772 773 self.check(test_impl, *self.simple_args) 774 775 @disabled_test 776 def test_simple_operator_17(self): 777 def test_impl(v1, v2, m1, m2): 778 return operator.add(m2, v1) 779 780 self.check(test_impl, *self.simple_args) 781 782 @skip_parfors_unsupported 783 def test_np_func_direct_import(self): 784 from numpy import ones # import here becomes FreeVar 785 def test_impl(n): 786 A = ones(n) 787 return A[0] 788 n = 111 789 self.check(test_impl, n) 790 791 @skip_parfors_unsupported 792 def test_np_random_func_direct_import(self): 793 def test_impl(n): 794 A = randn(n) 795 return A[0] 796 self.assertTrue(countParfors(test_impl, (types.int64, )) == 1) 797 798 @skip_parfors_unsupported 799 def test_arange(self): 800 # test with stop only 801 def test_impl1(n): 802 return np.arange(n) 803 # start and stop 804 def test_impl2(s, n): 805 return np.arange(n) 806 # start, step, stop 807 def test_impl3(s, n, t): 808 return np.arange(s, n, t) 809 810 for arg in [11, 128, 30.0, complex(4,5), complex(5,4)]: 811 self.check(test_impl1, arg) 812 self.check(test_impl2, 2, arg) 813 self.check(test_impl3, 2, arg, 2) 814 815 @skip_parfors_unsupported 816 def test_linspace(self): 817 # without num 818 def test_impl1(start, stop): 819 return np.linspace(start, stop) 820 # with num 821 def test_impl2(start, stop, num): 822 return np.linspace(start, stop, num) 823 824 for arg in [11, 128, 30.0, complex(4,5), complex(5,4)]: 825 self.check(test_impl1, 2, arg) 826 self.check(test_impl2, 2, arg, 30) 827 828 @skip_parfors_unsupported 829 def test_size_assertion(self): 830 def test_impl(m, n): 831 A = np.ones(m) 832 B = np.ones(n) 833 return np.sum(A + B) 834 835 self.check(test_impl, 10, 10) 836 with self.assertRaises(AssertionError) as raises: 837 cfunc = njit(parallel=True)(test_impl) 838 cfunc(10, 9) 839 msg = "Sizes of A, B do not match" 840 self.assertIn(msg, str(raises.exception)) 841 842 @skip_parfors_unsupported 843 def test_mean(self): 844 def test_impl(A): 845 return A.mean() 846 N = 100 847 A = np.random.ranf(N) 848 B = np.random.randint(10, size=(N, 3)) 849 self.check(test_impl, A) 850 self.check(test_impl, B) 851 self.assertTrue(countParfors(test_impl, (types.Array(types.float64, 1, 'C'), )) == 1) 852 self.assertTrue(countParfors(test_impl, (types.Array(types.float64, 2, 'C'), )) == 1) 853 854 @skip_parfors_unsupported 855 def test_var(self): 856 def test_impl(A): 857 return A.var() 858 N = 100 859 A = np.random.ranf(N) 860 B = np.random.randint(10, size=(N, 3)) 861 C = A + 1j * A 862 self.check(test_impl, A) 863 self.check(test_impl, B) 864 self.check(test_impl, C) 865 self.assertTrue(countParfors(test_impl, (types.Array(types.float64, 1, 'C'), )) == 2) 866 self.assertTrue(countParfors(test_impl, (types.Array(types.float64, 2, 'C'), )) == 2) 867 868 @skip_parfors_unsupported 869 def test_std(self): 870 def test_impl(A): 871 return A.std() 872 N = 100 873 A = np.random.ranf(N) 874 B = np.random.randint(10, size=(N, 3)) 875 C = A + 1j * A 876 self.check(test_impl, A) 877 self.check(test_impl, B) 878 self.check(test_impl, C) 879 self.assertTrue(countParfors(test_impl, (types.Array(types.float64, 1, 'C'), )) == 2) 880 self.assertTrue(countParfors(test_impl, (types.Array(types.float64, 2, 'C'), )) == 2) 881 882 @skip_parfors_unsupported 883 def test_issue4963_globals(self): 884 def test_impl(): 885 buf = np.zeros((_GLOBAL_INT_FOR_TESTING1, _GLOBAL_INT_FOR_TESTING2)) 886 return buf 887 self.check(test_impl) 888 889 @skip_parfors_unsupported 890 def test_issue4963_freevars(self): 891 _FREEVAR_INT_FOR_TESTING1 = 17 892 _FREEVAR_INT_FOR_TESTING2 = 5 893 def test_impl(): 894 buf = np.zeros((_FREEVAR_INT_FOR_TESTING1, _FREEVAR_INT_FOR_TESTING2)) 895 return buf 896 self.check(test_impl) 897 898 @skip_parfors_unsupported 899 def test_random_parfor(self): 900 """ 901 Test function with only a random call to make sure a random function 902 like ranf is actually translated to a parfor. 903 """ 904 def test_impl(n): 905 A = np.random.ranf((n, n)) 906 return A 907 self.assertTrue(countParfors(test_impl, (types.int64, )) == 1) 908 909 @skip_parfors_unsupported 910 def test_randoms(self): 911 def test_impl(n): 912 A = np.random.standard_normal(size=(n, n)) 913 B = np.random.randn(n, n) 914 C = np.random.normal(0.0, 1.0, (n, n)) 915 D = np.random.chisquare(1.0, (n, n)) 916 E = np.random.randint(1, high=3, size=(n, n)) 917 F = np.random.triangular(1, 2, 3, (n, n)) 918 return np.sum(A+B+C+D+E+F) 919 920 n = 128 921 cpfunc = self.compile_parallel(test_impl, (numba.typeof(n),)) 922 parfor_output = cpfunc.entry_point(n) 923 py_output = test_impl(n) 924 # check results within 5% since random numbers generated in parallel 925 np.testing.assert_allclose(parfor_output, py_output, rtol=0.05) 926 self.assertTrue(countParfors(test_impl, (types.int64, )) == 1) 927 928 @skip_parfors_unsupported 929 def test_dead_randoms(self): 930 def test_impl(n): 931 A = np.random.standard_normal(size=(n, n)) 932 B = np.random.randn(n, n) 933 C = np.random.normal(0.0, 1.0, (n, n)) 934 D = np.random.chisquare(1.0, (n, n)) 935 E = np.random.randint(1, high=3, size=(n, n)) 936 F = np.random.triangular(1, 2, 3, (n, n)) 937 return 3 938 939 n = 128 940 cpfunc = self.compile_parallel(test_impl, (numba.typeof(n),)) 941 parfor_output = cpfunc.entry_point(n) 942 py_output = test_impl(n) 943 self.assertEqual(parfor_output, py_output) 944 self.assertTrue(countParfors(test_impl, (types.int64, )) == 0) 945 946 @skip_parfors_unsupported 947 def test_cfg(self): 948 # from issue #2477 949 def test_impl(x, is_positive, N): 950 for i in numba.prange(2): 951 for j in range( i*N//2, (i+1)*N//2 ): 952 is_positive[j] = 0 953 if x[j] > 0: 954 is_positive[j] = 1 955 956 return is_positive 957 958 N = 100 959 x = np.random.rand(N) 960 is_positive = np.zeros(N) 961 self.check(test_impl, x, is_positive, N) 962 963 @skip_parfors_unsupported 964 def test_reduce(self): 965 def test_impl(A): 966 init_val = 10 967 return reduce(lambda a,b: min(a, b), A, init_val) 968 969 n = 211 970 A = np.random.ranf(n) 971 self.check(test_impl, A) 972 A = np.random.randint(10, size=n).astype(np.int32) 973 self.check(test_impl, A) 974 975 # test checking the number of arguments for the reduce function 976 def test_impl(): 977 g = lambda x: x ** 2 978 return reduce(g, np.array([1, 2, 3, 4, 5]), 2) 979 with self.assertTypingError(): 980 self.check(test_impl) 981 982 # test checking reduction over bitarray masked arrays 983 n = 160 984 A = np.random.randint(10, size=n).astype(np.int32) 985 def test_impl(A): 986 return np.sum(A[A>=3]) 987 self.check(test_impl, A) 988 # TODO: this should fuse 989 # self.assertTrue(countParfors(test_impl, (numba.float64[:],)) == 1) 990 991 def test_impl(A): 992 B = A[:,0] 993 return np.sum(A[B>=3,1]) 994 self.check(test_impl, A.reshape((16,10))) 995 # TODO: this should also fuse 996 #self.assertTrue(countParfors(test_impl, (numba.float64[:,:],)) == 1) 997 998 def test_impl(A): 999 B = A[:,0] 1000 return np.sum(A[B>=3,1:2]) 1001 self.check(test_impl, A.reshape((16,10))) 1002 # this doesn't fuse due to mixed indices 1003 self.assertTrue(countParfors(test_impl, (numba.float64[:,:],)) == 2) 1004 1005 @skip_parfors_unsupported 1006 def test_min(self): 1007 def test_impl1(A): 1008 return A.min() 1009 1010 def test_impl2(A): 1011 return np.min(A) 1012 1013 n = 211 1014 A = np.random.ranf(n) 1015 B = np.random.randint(10, size=n).astype(np.int32) 1016 C = np.random.ranf((n, n)) # test multi-dimensional array 1017 D = np.array([np.inf, np.inf]) 1018 self.check(test_impl1, A) 1019 self.check(test_impl1, B) 1020 self.check(test_impl1, C) 1021 self.check(test_impl1, D) 1022 self.check(test_impl2, A) 1023 self.check(test_impl2, B) 1024 self.check(test_impl2, C) 1025 self.check(test_impl2, D) 1026 1027 # checks that 0d array input raises 1028 msg = ("zero-size array to reduction operation " 1029 "minimum which has no identity") 1030 for impl in (test_impl1, test_impl2): 1031 pcfunc = self.compile_parallel(impl, (types.int64[:],)) 1032 with self.assertRaises(ValueError) as e: 1033 pcfunc.entry_point(np.array([], dtype=np.int64)) 1034 self.assertIn(msg, str(e.exception)) 1035 1036 @skip_parfors_unsupported 1037 def test_max(self): 1038 def test_impl1(A): 1039 return A.max() 1040 1041 def test_impl2(A): 1042 return np.max(A) 1043 1044 n = 211 1045 A = np.random.ranf(n) 1046 B = np.random.randint(10, size=n).astype(np.int32) 1047 C = np.random.ranf((n, n)) # test multi-dimensional array 1048 D = np.array([-np.inf, -np.inf]) 1049 self.check(test_impl1, A) 1050 self.check(test_impl1, B) 1051 self.check(test_impl1, C) 1052 self.check(test_impl1, D) 1053 self.check(test_impl2, A) 1054 self.check(test_impl2, B) 1055 self.check(test_impl2, C) 1056 self.check(test_impl2, D) 1057 1058 # checks that 0d array input raises 1059 msg = ("zero-size array to reduction operation " 1060 "maximum which has no identity") 1061 for impl in (test_impl1, test_impl2): 1062 pcfunc = self.compile_parallel(impl, (types.int64[:],)) 1063 with self.assertRaises(ValueError) as e: 1064 pcfunc.entry_point(np.array([], dtype=np.int64)) 1065 self.assertIn(msg, str(e.exception)) 1066 1067 @skip_parfors_unsupported 1068 def test_use_of_reduction_var1(self): 1069 def test_impl(): 1070 acc = 0 1071 for i in prange(1): 1072 acc = cmath.sqrt(acc) 1073 return acc 1074 1075 # checks that invalid use of reduction variable is detected 1076 msg = ("Use of reduction variable acc in an unsupported reduction function.") 1077 with self.assertRaises(ValueError) as e: 1078 pcfunc = self.compile_parallel(test_impl, ()) 1079 self.assertIn(msg, str(e.exception)) 1080 1081 @skip_parfors_unsupported 1082 def test_argmin(self): 1083 def test_impl1(A): 1084 return A.argmin() 1085 1086 def test_impl2(A): 1087 return np.argmin(A) 1088 1089 n = 211 1090 A = np.array([1., 0., 2., 0., 3.]) 1091 B = np.random.randint(10, size=n).astype(np.int32) 1092 C = np.random.ranf((n, n)) # test multi-dimensional array 1093 self.check(test_impl1, A) 1094 self.check(test_impl1, B) 1095 self.check(test_impl1, C) 1096 self.check(test_impl2, A) 1097 self.check(test_impl2, B) 1098 self.check(test_impl2, C) 1099 1100 # checks that 0d array input raises 1101 msg = 'attempt to get argmin of an empty sequence' 1102 for impl in (test_impl1, test_impl2): 1103 pcfunc = self.compile_parallel(impl, (types.int64[:],)) 1104 with self.assertRaises(ValueError) as e: 1105 pcfunc.entry_point(np.array([], dtype=np.int64)) 1106 self.assertIn(msg, str(e.exception)) 1107 1108 @skip_parfors_unsupported 1109 def test_argmax(self): 1110 def test_impl1(A): 1111 return A.argmax() 1112 1113 def test_impl2(A): 1114 return np.argmax(A) 1115 1116 n = 211 1117 A = np.array([1., 0., 3., 2., 3.]) 1118 B = np.random.randint(10, size=n).astype(np.int32) 1119 C = np.random.ranf((n, n)) # test multi-dimensional array 1120 self.check(test_impl1, A) 1121 self.check(test_impl1, B) 1122 self.check(test_impl1, C) 1123 self.check(test_impl2, A) 1124 self.check(test_impl2, B) 1125 self.check(test_impl2, C) 1126 1127 # checks that 0d array input raises 1128 msg = 'attempt to get argmax of an empty sequence' 1129 for impl in (test_impl1, test_impl2): 1130 pcfunc = self.compile_parallel(impl, (types.int64[:],)) 1131 with self.assertRaises(ValueError) as e: 1132 pcfunc.entry_point(np.array([], dtype=np.int64)) 1133 self.assertIn(msg, str(e.exception)) 1134 1135 @skip_parfors_unsupported 1136 def test_parfor_array_access1(self): 1137 # signed index of the prange generated by sum() should be replaced 1138 # resulting in array A to be eliminated (see issue #2846) 1139 def test_impl(n): 1140 A = np.ones(n) 1141 return A.sum() 1142 1143 n = 211 1144 self.check(test_impl, n) 1145 self.assertEqual(countArrays(test_impl, (types.intp,)), 0) 1146 1147 @skip_parfors_unsupported 1148 def test_parfor_array_access2(self): 1149 # in this test, the prange index has the same name (i) in two loops 1150 # thus, i has multiple definitions and is harder to replace 1151 def test_impl(n): 1152 A = np.ones(n) 1153 m = 0 1154 n = 0 1155 for i in numba.prange(len(A)): 1156 m += A[i] 1157 1158 for i in numba.prange(len(A)): 1159 if m == n: # access in another block 1160 n += A[i] 1161 1162 return m + n 1163 1164 n = 211 1165 self.check(test_impl, n) 1166 self.assertEqual(countNonParforArrayAccesses(test_impl, (types.intp,)), 0) 1167 1168 @skip_parfors_unsupported 1169 def test_parfor_array_access3(self): 1170 def test_impl(n): 1171 A = np.ones(n, np.int64) 1172 m = 0 1173 for i in numba.prange(len(A)): 1174 m += A[i] 1175 if m==2: 1176 i = m 1177 1178 n = 211 1179 with self.assertRaises(errors.UnsupportedRewriteError) as raises: 1180 self.check(test_impl, n) 1181 self.assertIn("Overwrite of parallel loop index", str(raises.exception)) 1182 1183 @skip_parfors_unsupported 1184 @needs_blas 1185 def test_parfor_array_access4(self): 1186 # in this test, one index of a multi-dim access should be replaced 1187 # np.dot parallel implementation produces this case 1188 def test_impl(A, b): 1189 return np.dot(A, b) 1190 1191 n = 211 1192 d = 4 1193 A = np.random.ranf((n, d)) 1194 b = np.random.ranf(d) 1195 self.check(test_impl, A, b) 1196 # make sure the parfor index is replaced in build_tuple of access to A 1197 test_ir, tp = get_optimized_numba_ir( 1198 test_impl, (types.Array(types.float64, 2, 'C'), 1199 types.Array(types.float64, 1, 'C'))) 1200 # this code should have one basic block after optimization 1201 self.assertTrue(len(test_ir.blocks) == 1 and 0 in test_ir.blocks) 1202 block = test_ir.blocks[0] 1203 parfor_found = False 1204 parfor = None 1205 for stmt in block.body: 1206 if isinstance(stmt, numba.parfors.parfor.Parfor): 1207 parfor_found = True 1208 parfor = stmt 1209 1210 self.assertTrue(parfor_found) 1211 build_tuple_found = False 1212 # there should be only one build_tuple 1213 for bl in parfor.loop_body.values(): 1214 for stmt in bl.body: 1215 if (isinstance(stmt, ir.Assign) 1216 and isinstance(stmt.value, ir.Expr) 1217 and stmt.value.op == 'build_tuple'): 1218 build_tuple_found = True 1219 self.assertTrue(parfor.index_var in stmt.value.items) 1220 1221 self.assertTrue(build_tuple_found) 1222 1223 @skip_parfors_unsupported 1224 def test_parfor_dtype_type(self): 1225 # test array type replacement creates proper type 1226 def test_impl(a): 1227 for i in numba.prange(len(a)): 1228 a[i] = a.dtype.type(0) 1229 return a[4] 1230 1231 a = np.ones(10) 1232 self.check(test_impl, a) 1233 1234 @skip_parfors_unsupported 1235 def test_parfor_array_access5(self): 1236 # one dim is slice in multi-dim access 1237 def test_impl(n): 1238 X = np.ones((n, 3)) 1239 y = 0 1240 for i in numba.prange(n): 1241 y += X[i,:].sum() 1242 return y 1243 1244 n = 211 1245 self.check(test_impl, n) 1246 self.assertEqual(countNonParforArrayAccesses(test_impl, (types.intp,)), 0) 1247 1248 @skip_parfors_unsupported 1249 @disabled_test # Test itself is problematic, see #3155 1250 def test_parfor_hoist_setitem(self): 1251 # Make sure that read of out is not hoisted. 1252 def test_impl(out): 1253 for i in prange(10): 1254 out[0] = 2 * out[0] 1255 return out[0] 1256 1257 out = np.ones(1) 1258 self.check(test_impl, out) 1259 1260 @skip_parfors_unsupported 1261 @needs_blas 1262 def test_parfor_generate_fuse(self): 1263 # issue #2857 1264 def test_impl(N, D): 1265 w = np.ones(D) 1266 X = np.ones((N, D)) 1267 Y = np.ones(N) 1268 for i in range(3): 1269 B = (-Y * np.dot(X, w)) 1270 1271 return B 1272 1273 n = 211 1274 d = 3 1275 self.check(test_impl, n, d) 1276 self.assertEqual(countArrayAllocs(test_impl, (types.intp, types.intp)), 4) 1277 self.assertEqual(countParfors(test_impl, (types.intp, types.intp)), 4) 1278 1279 @skip_parfors_unsupported 1280 def test_ufunc_expr(self): 1281 # issue #2885 1282 def test_impl(A, B): 1283 return np.bitwise_and(A, B) 1284 1285 A = np.ones(3, np.uint8) 1286 B = np.ones(3, np.uint8) 1287 B[1] = 0 1288 self.check(test_impl, A, B) 1289 1290 @skip_parfors_unsupported 1291 def test_find_callname_intrinsic(self): 1292 def test_impl(n): 1293 A = unsafe_empty((n,)) 1294 for i in range(n): 1295 A[i] = i + 2.0 1296 return A 1297 1298 # the unsafe allocation should be found even though it is imported 1299 # as a different name 1300 self.assertEqual(countArrayAllocs(test_impl, (types.intp,)), 1) 1301 1302 @skip_parfors_unsupported 1303 def test_reduction_var_reuse(self): 1304 # issue #3139 1305 def test_impl(n): 1306 acc = 0 1307 for i in prange(n): 1308 acc += 1 1309 1310 for i in prange(n): 1311 acc += 2 1312 1313 return acc 1314 self.check(test_impl, 16) 1315 1316 @skip_parfors_unsupported 1317 def test_two_d_array_reduction_reuse(self): 1318 def test_impl(n): 1319 shp = (13, 17) 1320 size = shp[0] * shp[1] 1321 result1 = np.zeros(shp, np.int_) 1322 tmp = np.arange(size).reshape(shp) 1323 1324 for i in numba.prange(n): 1325 result1 += tmp 1326 1327 for i in numba.prange(n): 1328 result1 += tmp 1329 1330 return result1 1331 1332 self.check(test_impl, 100) 1333 1334 @skip_parfors_unsupported 1335 def test_one_d_array_reduction(self): 1336 def test_impl(n): 1337 result = np.zeros(1, np.int_) 1338 1339 for i in numba.prange(n): 1340 result += np.array([i], np.int_) 1341 1342 return result 1343 1344 self.check(test_impl, 100) 1345 1346 @skip_parfors_unsupported 1347 def test_two_d_array_reduction(self): 1348 def test_impl(n): 1349 shp = (13, 17) 1350 size = shp[0] * shp[1] 1351 result1 = np.zeros(shp, np.int_) 1352 tmp = np.arange(size).reshape(shp) 1353 1354 for i in numba.prange(n): 1355 result1 += tmp 1356 1357 return result1 1358 1359 self.check(test_impl, 100) 1360 1361 @skip_parfors_unsupported 1362 def test_two_d_array_reduction_with_float_sizes(self): 1363 # result1 is float32 and tmp is float64. 1364 # Tests reduction with differing dtypes. 1365 def test_impl(n): 1366 shp = (2, 3) 1367 result1 = np.zeros(shp, np.float32) 1368 tmp = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).reshape(shp) 1369 1370 for i in numba.prange(n): 1371 result1 += tmp 1372 1373 return result1 1374 1375 self.check(test_impl, 100) 1376 1377 @skip_parfors_unsupported 1378 def test_two_d_array_reduction_prod(self): 1379 def test_impl(n): 1380 shp = (13, 17) 1381 result1 = 2 * np.ones(shp, np.int_) 1382 tmp = 2 * np.ones_like(result1) 1383 1384 for i in numba.prange(n): 1385 result1 *= tmp 1386 1387 return result1 1388 1389 self.check(test_impl, 100) 1390 1391 @skip_parfors_unsupported 1392 def test_three_d_array_reduction(self): 1393 def test_impl(n): 1394 shp = (3, 2, 7) 1395 result1 = np.zeros(shp, np.int_) 1396 1397 for i in numba.prange(n): 1398 result1 += np.ones(shp, np.int_) 1399 1400 return result1 1401 1402 self.check(test_impl, 100) 1403 1404 @skip_parfors_unsupported 1405 def test_preparfor_canonicalize_kws(self): 1406 # test canonicalize_array_math typing for calls with kw args 1407 def test_impl(A): 1408 return A.argsort() + 1 1409 1410 n = 211 1411 A = np.arange(n) 1412 self.check(test_impl, A) 1413 1414 @skip_parfors_unsupported 1415 def test_preparfor_datetime64(self): 1416 # test array.dtype transformation for datetime64 1417 def test_impl(A): 1418 return A.dtype 1419 1420 A = np.empty(1, np.dtype('datetime64[ns]')) 1421 cpfunc = self.compile_parallel(test_impl, (numba.typeof(A),)) 1422 self.assertEqual(cpfunc.entry_point(A), test_impl(A)) 1423 1424 @skip_parfors_unsupported 1425 def test_no_hoisting_with_member_function_call(self): 1426 def test_impl(X): 1427 n = X.shape[0] 1428 acc = 0 1429 for i in prange(n): 1430 R = {1, 2, 3} 1431 R.add(i) 1432 tmp = 0 1433 for x in R: 1434 tmp += x 1435 acc += tmp 1436 return acc 1437 1438 self.check(test_impl, np.random.ranf(128)) 1439 1440 @skip_parfors_unsupported 1441 def test_array_compare_scalar(self): 1442 """ issue3671: X != 0 becomes an arrayexpr with operator.ne. 1443 That is turned into a parfor by devectorizing. Make sure 1444 the return type of the devectorized operator.ne 1445 on integer types works properly. 1446 """ 1447 def test_impl(): 1448 X = np.zeros(10, dtype=np.int_) 1449 return X != 0 1450 1451 self.check(test_impl) 1452 1453 @skip_parfors_unsupported 1454 def test_reshape_with_neg_one(self): 1455 # issue3314 1456 def test_impl(a, b): 1457 result_matrix = np.zeros((b, b, 1), dtype=np.float64) 1458 sub_a = a[0:b] 1459 a = sub_a.size 1460 b = a / 1 1461 z = sub_a.reshape(-1, 1) 1462 result_data = sub_a / z 1463 result_matrix[:,:,0] = result_data 1464 return result_matrix 1465 1466 a = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 1467 7.0, 8.0, 9.0, 10.0, 11.0, 12.0]) 1468 b = 3 1469 1470 self.check(test_impl, a, b) 1471 1472 @skip_parfors_unsupported 1473 def test_reshape_with_large_neg(self): 1474 # issue3314 1475 def test_impl(a, b): 1476 result_matrix = np.zeros((b, b, 1), dtype=np.float64) 1477 sub_a = a[0:b] 1478 a = sub_a.size 1479 b = a / 1 1480 z = sub_a.reshape(-1307, 1) 1481 result_data = sub_a / z 1482 result_matrix[:,:,0] = result_data 1483 return result_matrix 1484 1485 a = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 1486 7.0, 8.0, 9.0, 10.0, 11.0, 12.0]) 1487 b = 3 1488 1489 self.check(test_impl, a, b) 1490 1491 @skip_parfors_unsupported 1492 def test_reshape_with_too_many_neg_one(self): 1493 # issue3314 1494 with self.assertRaises(errors.UnsupportedRewriteError) as raised: 1495 @njit(parallel=True) 1496 def test_impl(a, b): 1497 rm = np.zeros((b, b, 1), dtype=np.float64) 1498 sub_a = a[0:b] 1499 a = sub_a.size 1500 b = a / 1 1501 z = sub_a.reshape(-1, -1) 1502 result_data = sub_a / z 1503 rm[:,:,0] = result_data 1504 return rm 1505 1506 a = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 1507 7.0, 8.0, 9.0, 10.0, 11.0, 12.0]) 1508 b = 3 1509 test_impl(a, b) 1510 1511 msg = ("The reshape API may only include one negative argument.") 1512 self.assertIn(msg, str(raised.exception)) 1513 1514 @skip_parfors_unsupported 1515 def test_ndarray_fill(self): 1516 def test_impl(x): 1517 x.fill(7.0) 1518 return x 1519 x = np.zeros(10) 1520 self.check(test_impl, x) 1521 self.assertTrue(countParfors(test_impl, (types.Array(types.float64, 1, 'C'),)) == 1) 1522 1523 @skip_parfors_unsupported 1524 def test_ndarray_fill2d(self): 1525 def test_impl(x): 1526 x.fill(7.0) 1527 return x 1528 x = np.zeros((2,2)) 1529 self.check(test_impl, x) 1530 self.assertTrue(countParfors(test_impl, (types.Array(types.float64, 2, 'C'),)) == 1) 1531 1532 @skip_parfors_unsupported 1533 def test_0d_array(self): 1534 def test_impl(n): 1535 return np.sum(n) + np.prod(n) + np.min(n) + np.max(n) + np.var(n) 1536 self.check(test_impl, np.array(7), check_scheduling=False) 1537 1538 @skip_parfors_unsupported 1539 def test_array_analysis_optional_def(self): 1540 def test_impl(x, half): 1541 size = len(x) 1542 parr = x[0:size] 1543 1544 if half: 1545 parr = x[0:size//2] 1546 1547 return parr.sum() 1548 x = np.ones(20) 1549 self.check(test_impl, x, True, check_scheduling=False) 1550 1551 @skip_parfors_unsupported 1552 def test_prange_side_effects(self): 1553 def test_impl(a, b): 1554 data = np.empty(len(a), dtype=np.float64) 1555 size = len(data) 1556 for i in numba.prange(size): 1557 data[i] = a[i] 1558 for i in numba.prange(size): 1559 data[i] = data[i] + b[i] 1560 return data 1561 1562 x = np.arange(10 ** 2, dtype=float) 1563 y = np.arange(10 ** 2, dtype=float) 1564 1565 self.check(test_impl, x, y) 1566 self.assertTrue(countParfors(test_impl, 1567 (types.Array(types.float64, 1, 'C'), 1568 types.Array(types.float64, 1, 'C'))) == 1) 1569 1570 @skip_parfors_unsupported 1571 def test_tuple1(self): 1572 def test_impl(a): 1573 atup = (3, 4) 1574 b = 7 1575 for i in numba.prange(len(a)): 1576 a[i] += atup[0] + atup[1] + b 1577 return a 1578 1579 x = np.arange(10) 1580 self.check(test_impl, x) 1581 1582 @skip_parfors_unsupported 1583 def test_tuple2(self): 1584 def test_impl(a): 1585 atup = a.shape 1586 b = 7 1587 for i in numba.prange(len(a)): 1588 a[i] += atup[0] + b 1589 return a 1590 1591 x = np.arange(10) 1592 self.check(test_impl, x) 1593 1594 @skip_parfors_unsupported 1595 def test_tuple3(self): 1596 def test_impl(a): 1597 atup = (np.arange(10), 4) 1598 b = 7 1599 for i in numba.prange(len(a)): 1600 a[i] += atup[0][5] + atup[1] + b 1601 return a 1602 1603 x = np.arange(10) 1604 self.check(test_impl, x) 1605 1606 @skip_parfors_unsupported 1607 def test_namedtuple1(self): 1608 def test_impl(a): 1609 antup = TestNamedTuple(part0=3, part1=4) 1610 b = 7 1611 for i in numba.prange(len(a)): 1612 a[i] += antup.part0 + antup.part1 + b 1613 return a 1614 1615 x = np.arange(10) 1616 self.check(test_impl, x) 1617 1618 @skip_parfors_unsupported 1619 def test_namedtuple2(self): 1620 TestNamedTuple2 = namedtuple('TestNamedTuple2', ('part0', 'part1')) 1621 def test_impl(a): 1622 antup = TestNamedTuple2(part0=3, part1=4) 1623 b = 7 1624 for i in numba.prange(len(a)): 1625 a[i] += antup.part0 + antup.part1 + b 1626 return a 1627 1628 x = np.arange(10) 1629 self.check(test_impl, x) 1630 1631 @skip_parfors_unsupported 1632 def test_inplace_binop(self): 1633 def test_impl(a, b): 1634 b += a 1635 return b 1636 1637 X = np.arange(10) + 10 1638 Y = np.arange(10) + 100 1639 self.check(test_impl, X, Y) 1640 self.assertTrue(countParfors(test_impl, 1641 (types.Array(types.float64, 1, 'C'), 1642 types.Array(types.float64, 1, 'C'))) == 1) 1643 1644class TestParforsLeaks(MemoryLeakMixin, TestParforsBase): 1645 def check(self, pyfunc, *args, **kwargs): 1646 cfunc, cpfunc = self.compile_all(pyfunc, *args) 1647 self.check_parfors_vs_others(pyfunc, cfunc, cpfunc, *args, **kwargs) 1648 1649 @skip_parfors_unsupported 1650 def test_reduction(self): 1651 # issue4299 1652 @njit(parallel=True) 1653 def test_impl(arr): 1654 return arr.sum() 1655 1656 arr = np.arange(10).astype(np.float64) 1657 self.check(test_impl, arr) 1658 1659 @skip_parfors_unsupported 1660 def test_multiple_reduction_vars(self): 1661 @njit(parallel=True) 1662 def test_impl(arr): 1663 a = 0. 1664 b = 1. 1665 for i in prange(arr.size): 1666 a += arr[i] 1667 b += 1. / (arr[i] + 1) 1668 return a * b 1669 arr = np.arange(10).astype(np.float64) 1670 self.check(test_impl, arr) 1671 1672 1673class TestPrangeBase(TestParforsBase): 1674 1675 def __init__(self, *args): 1676 TestParforsBase.__init__(self, *args) 1677 1678 def generate_prange_func(self, pyfunc, patch_instance): 1679 """ 1680 This function does the actual code augmentation to enable the explicit 1681 testing of `prange` calls in place of `range`. 1682 """ 1683 pyfunc_code = pyfunc.__code__ 1684 1685 prange_names = list(pyfunc_code.co_names) 1686 1687 if patch_instance is None: 1688 # patch all instances, cheat by just switching 1689 # range for prange 1690 assert 'range' in pyfunc_code.co_names 1691 prange_names = tuple([x if x != 'range' else 'prange' 1692 for x in pyfunc_code.co_names]) 1693 new_code = bytes(pyfunc_code.co_code) 1694 else: 1695 # patch specified instances... 1696 # find where 'range' is in co_names 1697 range_idx = pyfunc_code.co_names.index('range') 1698 range_locations = [] 1699 # look for LOAD_GLOBALs that point to 'range' 1700 for instr in dis.Bytecode(pyfunc_code): 1701 if instr.opname == 'LOAD_GLOBAL': 1702 if instr.arg == range_idx: 1703 range_locations.append(instr.offset + 1) 1704 # add in 'prange' ref 1705 prange_names.append('prange') 1706 prange_names = tuple(prange_names) 1707 prange_idx = len(prange_names) - 1 1708 new_code = bytearray(pyfunc_code.co_code) 1709 assert len(patch_instance) <= len(range_locations) 1710 # patch up the new byte code 1711 for i in patch_instance: 1712 idx = range_locations[i] 1713 new_code[idx] = prange_idx 1714 new_code = bytes(new_code) 1715 1716 # create new code parts 1717 co_args = [pyfunc_code.co_argcount] 1718 1719 if utils.PYVERSION >= (3, 8): 1720 co_args.append(pyfunc_code.co_posonlyargcount) 1721 co_args.append(pyfunc_code.co_kwonlyargcount) 1722 co_args.extend([pyfunc_code.co_nlocals, 1723 pyfunc_code.co_stacksize, 1724 pyfunc_code.co_flags, 1725 new_code, 1726 pyfunc_code.co_consts, 1727 prange_names, 1728 pyfunc_code.co_varnames, 1729 pyfunc_code.co_filename, 1730 pyfunc_code.co_name, 1731 pyfunc_code.co_firstlineno, 1732 pyfunc_code.co_lnotab, 1733 pyfunc_code.co_freevars, 1734 pyfunc_code.co_cellvars 1735 ]) 1736 1737 # create code object with prange mutation 1738 prange_code = pytypes.CodeType(*co_args) 1739 1740 # get function 1741 pfunc = pytypes.FunctionType(prange_code, globals()) 1742 1743 return pfunc 1744 1745 def prange_tester(self, pyfunc, *args, **kwargs): 1746 """ 1747 The `prange` tester 1748 This is a hack. It basically switches out range calls for prange. 1749 It does this by copying the live code object of a function 1750 containing 'range' then copying the .co_names and mutating it so 1751 that 'range' is replaced with 'prange'. It then creates a new code 1752 object containing the mutation and instantiates a function to contain 1753 it. At this point three results are created: 1754 1. The result of calling the original python function. 1755 2. The result of calling a njit compiled version of the original 1756 python function. 1757 3. The result of calling a njit(parallel=True) version of the mutated 1758 function containing `prange`. 1759 The three results are then compared and the `prange` based function's 1760 llvm_ir is inspected to ensure the scheduler code is present. 1761 1762 Arguments: 1763 pyfunc - the python function to test 1764 args - data arguments to pass to the pyfunc under test 1765 1766 Keyword Arguments: 1767 patch_instance - iterable containing which instances of `range` to 1768 replace. If not present all instance of `range` are 1769 replaced. 1770 scheduler_type - 'signed', 'unsigned' or None, default is None. 1771 Supply in cases where the presence of a specific 1772 scheduler is to be asserted. 1773 check_fastmath - if True then a check will be performed to ensure the 1774 IR contains instructions labelled with 'fast' 1775 check_fastmath_result - if True then a check will be performed to 1776 ensure the result of running with fastmath 1777 on matches that of the pyfunc 1778 Remaining kwargs are passed to np.testing.assert_almost_equal 1779 1780 1781 Example: 1782 def foo(): 1783 acc = 0 1784 for x in range(5): 1785 for y in range(10): 1786 acc +=1 1787 return acc 1788 1789 # calling as 1790 prange_tester(foo) 1791 # will test code equivalent to 1792 # def foo(): 1793 # acc = 0 1794 # for x in prange(5): # <- changed 1795 # for y in prange(10): # <- changed 1796 # acc +=1 1797 # return acc 1798 1799 # calling as 1800 prange_tester(foo, patch_instance=[1]) 1801 # will test code equivalent to 1802 # def foo(): 1803 # acc = 0 1804 # for x in range(5): # <- outer loop (0) unchanged 1805 # for y in prange(10): # <- inner loop (1) changed 1806 # acc +=1 1807 # return acc 1808 1809 """ 1810 patch_instance = kwargs.pop('patch_instance', None) 1811 check_fastmath = kwargs.pop('check_fastmath', False) 1812 check_fastmath_result = kwargs.pop('check_fastmath_result', False) 1813 1814 pfunc = self.generate_prange_func(pyfunc, patch_instance) 1815 1816 # Compile functions 1817 # compile a standard njit of the original function 1818 sig = tuple([numba.typeof(x) for x in args]) 1819 cfunc = self.compile_njit(pyfunc, sig) 1820 1821 # compile the prange injected function 1822 with warnings.catch_warnings(record=True) as raised_warnings: 1823 warnings.simplefilter('always') 1824 cpfunc = self.compile_parallel(pfunc, sig) 1825 1826 # if check_fastmath is True then check fast instructions 1827 if check_fastmath: 1828 self.assert_fastmath(pfunc, sig) 1829 1830 # if check_fastmath_result is True then compile a function 1831 # so that the parfors checker can assert the result is ok. 1832 if check_fastmath_result: 1833 fastcpfunc = self.compile_parallel_fastmath(pfunc, sig) 1834 kwargs = dict({'fastmath_pcres': fastcpfunc}, **kwargs) 1835 1836 self.check_parfors_vs_others(pyfunc, cfunc, cpfunc, *args, **kwargs) 1837 return raised_warnings 1838 1839 1840class TestPrange(TestPrangeBase): 1841 """ Tests Prange """ 1842 1843 @skip_parfors_unsupported 1844 def test_prange01(self): 1845 def test_impl(): 1846 n = 4 1847 A = np.zeros(n) 1848 for i in range(n): 1849 A[i] = 2.0 * i 1850 return A 1851 self.prange_tester(test_impl, scheduler_type='unsigned', 1852 check_fastmath=True) 1853 1854 @skip_parfors_unsupported 1855 def test_prange02(self): 1856 def test_impl(): 1857 n = 4 1858 A = np.zeros(n - 1) 1859 for i in range(1, n): 1860 A[i - 1] = 2.0 * i 1861 return A 1862 self.prange_tester(test_impl, scheduler_type='unsigned', 1863 check_fastmath=True) 1864 1865 @skip_parfors_unsupported 1866 def test_prange03(self): 1867 def test_impl(): 1868 s = 10 1869 for i in range(10): 1870 s += 2 1871 return s 1872 self.prange_tester(test_impl, scheduler_type='unsigned', 1873 check_fastmath=True) 1874 1875 @skip_parfors_unsupported 1876 def test_prange03mul(self): 1877 def test_impl(): 1878 s = 3 1879 for i in range(10): 1880 s *= 2 1881 return s 1882 self.prange_tester(test_impl, scheduler_type='unsigned', 1883 check_fastmath=True) 1884 1885 @skip_parfors_unsupported 1886 def test_prange03sub(self): 1887 def test_impl(): 1888 s = 100 1889 for i in range(10): 1890 s -= 2 1891 return s 1892 self.prange_tester(test_impl, scheduler_type='unsigned', 1893 check_fastmath=True) 1894 1895 @skip_parfors_unsupported 1896 def test_prange03div(self): 1897 def test_impl(): 1898 s = 10 1899 for i in range(10): 1900 s /= 2 1901 return s 1902 self.prange_tester(test_impl, scheduler_type='unsigned', 1903 check_fastmath=True) 1904 1905 @skip_parfors_unsupported 1906 def test_prange04(self): 1907 def test_impl(): 1908 a = 2 1909 b = 3 1910 A = np.empty(4) 1911 for i in range(4): 1912 if i == a: 1913 A[i] = b 1914 else: 1915 A[i] = 0 1916 return A 1917 self.prange_tester(test_impl, scheduler_type='unsigned', 1918 check_fastmath=True) 1919 1920 @skip_parfors_unsupported 1921 def test_prange05(self): 1922 def test_impl(): 1923 n = 4 1924 A = np.ones((n), dtype=np.float64) 1925 s = 0 1926 for i in range(1, n - 1, 1): 1927 s += A[i] 1928 return s 1929 self.prange_tester(test_impl, scheduler_type='unsigned', 1930 check_fastmath=True) 1931 1932 @skip_parfors_unsupported 1933 def test_prange06(self): 1934 def test_impl(): 1935 n = 4 1936 A = np.ones((n), dtype=np.float64) 1937 s = 0 1938 for i in range(1, 1, 1): 1939 s += A[i] 1940 return s 1941 self.prange_tester(test_impl, scheduler_type='unsigned', 1942 check_fastmath=True) 1943 1944 @skip_parfors_unsupported 1945 def test_prange07(self): 1946 def test_impl(): 1947 n = 4 1948 A = np.ones((n), dtype=np.float64) 1949 s = 0 1950 for i in range(n, 1): 1951 s += A[i] 1952 return s 1953 self.prange_tester(test_impl, scheduler_type='unsigned', 1954 check_fastmath=True) 1955 1956 @skip_parfors_unsupported 1957 def test_prange08(self): 1958 def test_impl(): 1959 n = 4 1960 A = np.ones((n)) 1961 acc = 0 1962 for i in range(len(A)): 1963 for j in range(len(A)): 1964 acc += A[i] 1965 return acc 1966 self.prange_tester(test_impl, scheduler_type='unsigned', 1967 check_fastmath=True) 1968 1969 @skip_parfors_unsupported 1970 def test_prange08_1(self): 1971 def test_impl(): 1972 n = 4 1973 A = np.ones((n)) 1974 acc = 0 1975 for i in range(4): 1976 for j in range(4): 1977 acc += A[i] 1978 return acc 1979 self.prange_tester(test_impl, scheduler_type='unsigned', 1980 check_fastmath=True) 1981 1982 @skip_parfors_unsupported 1983 def test_prange09(self): 1984 def test_impl(): 1985 n = 4 1986 acc = 0 1987 for i in range(n): 1988 for j in range(n): 1989 acc += 1 1990 return acc 1991 # patch inner loop to 'prange' 1992 self.prange_tester(test_impl, patch_instance=[1], 1993 scheduler_type='unsigned', 1994 check_fastmath=True) 1995 1996 @skip_parfors_unsupported 1997 def test_prange10(self): 1998 def test_impl(): 1999 n = 4 2000 acc2 = 0 2001 for j in range(n): 2002 acc1 = 0 2003 for i in range(n): 2004 acc1 += 1 2005 acc2 += acc1 2006 return acc2 2007 # patch outer loop to 'prange' 2008 self.prange_tester(test_impl, patch_instance=[0], 2009 scheduler_type='unsigned', 2010 check_fastmath=True) 2011 2012 @skip_parfors_unsupported 2013 @unittest.skip("list append is not thread-safe yet (#2391, #2408)") 2014 def test_prange11(self): 2015 def test_impl(): 2016 n = 4 2017 return [np.sin(j) for j in range(n)] 2018 self.prange_tester(test_impl, scheduler_type='unsigned', 2019 check_fastmath=True) 2020 2021 @skip_parfors_unsupported 2022 def test_prange12(self): 2023 def test_impl(): 2024 acc = 0 2025 n = 4 2026 X = np.ones(n) 2027 for i in range(-len(X)): 2028 acc += X[i] 2029 return acc 2030 self.prange_tester(test_impl, scheduler_type='unsigned', 2031 check_fastmath=True) 2032 2033 @skip_parfors_unsupported 2034 def test_prange13(self): 2035 def test_impl(n): 2036 acc = 0 2037 for i in range(n): 2038 acc += 1 2039 return acc 2040 self.prange_tester(test_impl, np.int32(4), scheduler_type='unsigned', 2041 check_fastmath=True) 2042 2043 @skip_parfors_unsupported 2044 def test_prange14(self): 2045 def test_impl(A): 2046 s = 3 2047 for i in range(len(A)): 2048 s += A[i]*2 2049 return s 2050 # this tests reduction detection well since the accumulated variable 2051 # is initialized before the parfor and the value accessed from the array 2052 # is updated before accumulation 2053 self.prange_tester(test_impl, np.random.ranf(4), 2054 scheduler_type='unsigned', 2055 check_fastmath=True) 2056 2057 @skip_parfors_unsupported 2058 def test_prange15(self): 2059 # from issue 2587 2060 # test parfor type inference when there is multi-dimensional indexing 2061 def test_impl(N): 2062 acc = 0 2063 for i in range(N): 2064 x = np.ones((1, 1)) 2065 acc += x[0, 0] 2066 return acc 2067 self.prange_tester(test_impl, 1024, scheduler_type='unsigned', 2068 check_fastmath=True) 2069 2070 # Tests for negative ranges 2071 @skip_parfors_unsupported 2072 def test_prange16(self): 2073 def test_impl(N): 2074 acc = 0 2075 for i in range(-N, N): 2076 acc += 2 2077 return acc 2078 self.prange_tester(test_impl, 1024, scheduler_type='signed', 2079 check_fastmath=True) 2080 2081 @skip_parfors_unsupported 2082 def test_prange17(self): 2083 def test_impl(N): 2084 acc = 0 2085 X = np.ones(N) 2086 for i in range(-N, N): 2087 acc += X[i] 2088 return acc 2089 self.prange_tester(test_impl, 9, scheduler_type='signed', 2090 check_fastmath=True) 2091 2092 @skip_parfors_unsupported 2093 def test_prange18(self): 2094 def test_impl(N): 2095 acc = 0 2096 X = np.ones(N) 2097 for i in range(-N, 5): 2098 acc += X[i] 2099 for j in range(-4, N): 2100 acc += X[j] 2101 return acc 2102 self.prange_tester(test_impl, 9, scheduler_type='signed', 2103 check_fastmath=True) 2104 2105 @skip_parfors_unsupported 2106 def test_prange19(self): 2107 def test_impl(N): 2108 acc = 0 2109 M = N + 4 2110 X = np.ones((N, M)) 2111 for i in range(-N, N): 2112 for j in range(-M, M): 2113 acc += X[i, j] 2114 return acc 2115 self.prange_tester(test_impl, 9, scheduler_type='signed', 2116 check_fastmath=True) 2117 2118 @skip_parfors_unsupported 2119 def test_prange20(self): 2120 def test_impl(N): 2121 acc = 0 2122 X = np.ones(N) 2123 for i in range(-1, N): 2124 acc += X[i] 2125 return acc 2126 self.prange_tester(test_impl, 9, scheduler_type='signed', 2127 check_fastmath=True) 2128 2129 @skip_parfors_unsupported 2130 def test_prange21(self): 2131 def test_impl(N): 2132 acc = 0 2133 for i in range(-3, -1): 2134 acc += 3 2135 return acc 2136 self.prange_tester(test_impl, 9, scheduler_type='signed', 2137 check_fastmath=True) 2138 2139 @skip_parfors_unsupported 2140 def test_prange22(self): 2141 def test_impl(): 2142 a = 0 2143 b = 3 2144 A = np.empty(4) 2145 for i in range(-2, 2): 2146 if i == a: 2147 A[i] = b 2148 elif i < 1: 2149 A[i] = -1 2150 else: 2151 A[i] = 7 2152 return A 2153 self.prange_tester(test_impl, scheduler_type='signed', 2154 check_fastmath=True, check_fastmath_result=True) 2155 2156 @skip_parfors_unsupported 2157 def test_prange23(self): 2158 # test non-contig input 2159 def test_impl(A): 2160 for i in range(len(A)): 2161 A[i] = i 2162 return A 2163 A = np.zeros(32)[::2] 2164 self.prange_tester(test_impl, A, scheduler_type='unsigned', 2165 check_fastmath=True, check_fastmath_result=True) 2166 2167 @skip_parfors_unsupported 2168 def test_prange24(self): 2169 # test non-contig input, signed range 2170 def test_impl(A): 2171 for i in range(-len(A), 0): 2172 A[i] = i 2173 return A 2174 A = np.zeros(32)[::2] 2175 self.prange_tester(test_impl, A, scheduler_type='signed', 2176 check_fastmath=True, check_fastmath_result=True) 2177 2178 @skip_parfors_unsupported 2179 def test_prange25(self): 2180 def test_impl(A): 2181 n = len(A) 2182 buf = [np.zeros_like(A) for _ in range(n)] 2183 for i in range(n): 2184 buf[i] = A + i 2185 return buf 2186 A = np.ones((10,)) 2187 self.prange_tester(test_impl, A, patch_instance=[1], 2188 scheduler_type='unsigned', check_fastmath=True, 2189 check_fastmath_result=True) 2190 2191 cpfunc = self.compile_parallel(test_impl, (numba.typeof(A),)) 2192 diagnostics = cpfunc.metadata['parfor_diagnostics'] 2193 hoisted_allocs = diagnostics.hoisted_allocations() 2194 self.assertEqual(len(hoisted_allocs), 0) 2195 2196 # should this work? 2197 @skip_parfors_unsupported 2198 def test_prange26(self): 2199 def test_impl(A): 2200 B = A[::3] 2201 for i in range(len(B)): 2202 B[i] = i 2203 return A 2204 A = np.zeros(32)[::2] 2205 self.prange_tester(test_impl, A, scheduler_type='unsigned', 2206 check_fastmath=True, check_fastmath_result=True) 2207 2208 @skip_parfors_unsupported 2209 def test_prange27(self): 2210 # issue5597: usedef error in parfor 2211 def test_impl(a, b, c): 2212 for j in range(b[0]-1): 2213 for k in range(2): 2214 z = np.abs(a[c-1:c+1]) 2215 return 0 2216 2217 # patch inner loop to 'prange' 2218 self.prange_tester(test_impl, 2219 np.arange(20), 2220 np.asarray([4,4,4,4,4,4,4,4,4,4]), 2221 0, 2222 patch_instance=[1], 2223 scheduler_type='unsigned', 2224 check_fastmath=True) 2225 2226 @skip_parfors_unsupported 2227 def test_prange_two_instances_same_reduction_var(self): 2228 # issue4922 - multiple uses of same reduction variable 2229 def test_impl(n): 2230 c = 0 2231 for i in range(n): 2232 c += 1 2233 if i > 10: 2234 c += 1 2235 return c 2236 self.prange_tester(test_impl, 9) 2237 2238 @skip_parfors_unsupported 2239 def test_prange_conflicting_reduction_ops(self): 2240 def test_impl(n): 2241 c = 0 2242 for i in range(n): 2243 c += 1 2244 if i > 10: 2245 c *= 1 2246 return c 2247 2248 with self.assertRaises(errors.UnsupportedError) as raises: 2249 self.prange_tester(test_impl, 9) 2250 msg = ('Reduction variable c has multiple conflicting reduction ' 2251 'operators.') 2252 self.assertIn(msg, str(raises.exception)) 2253 2254# @skip_parfors_unsupported 2255 @disabled_test 2256 def test_check_error_model(self): 2257 def test_impl(): 2258 n = 32 2259 A = np.zeros(n) 2260 for i in range(n): 2261 A[i] = 1 / i # div-by-zero when i = 0 2262 return A 2263 2264 with self.assertRaises(ZeroDivisionError) as raises: 2265 test_impl() 2266 2267 # compile parallel functions 2268 pfunc = self.generate_prange_func(test_impl, None) 2269 pcres = self.compile_parallel(pfunc, ()) 2270 pfcres = self.compile_parallel_fastmath(pfunc, ()) 2271 2272 # should raise 2273 with self.assertRaises(ZeroDivisionError) as raises: 2274 pcres.entry_point() 2275 2276 # should not raise 2277 result = pfcres.entry_point() 2278 self.assertEqual(result[0], np.inf) 2279 2280 2281 @skip_parfors_unsupported 2282 def test_check_alias_analysis(self): 2283 # check alias analysis reports ok 2284 def test_impl(A): 2285 for i in range(len(A)): 2286 B = A[i] 2287 B[:] = 1 2288 return A 2289 A = np.zeros(32).reshape(4, 8) 2290 self.prange_tester(test_impl, A, scheduler_type='unsigned', 2291 check_fastmath=True, check_fastmath_result=True) 2292 pfunc = self.generate_prange_func(test_impl, None) 2293 sig = tuple([numba.typeof(A)]) 2294 cres = self.compile_parallel_fastmath(pfunc, sig) 2295 _ir = self._get_gufunc_ir(cres) 2296 for k, v in _ir.items(): 2297 for line in v.splitlines(): 2298 # get the fn definition line 2299 if 'define' in line and k in line: 2300 # there should only be 2x noalias, one on each of the first 2301 # 2 args (retptr, excinfo). 2302 # Note: used to be 3x no noalias, but env arg is dropped. 2303 self.assertEqual(line.count('noalias'), 2) 2304 break 2305 2306 @skip_parfors_unsupported 2307 def test_prange_raises_invalid_step_size(self): 2308 def test_impl(N): 2309 acc = 0 2310 for i in range(0, N, 2): 2311 acc += 2 2312 return acc 2313 2314 with self.assertRaises(errors.UnsupportedRewriteError) as raises: 2315 self.prange_tester(test_impl, 1024) 2316 msg = 'Only constant step size of 1 is supported for prange' 2317 self.assertIn(msg, str(raises.exception)) 2318 2319 @skip_parfors_unsupported 2320 def test_prange_fastmath_check_works(self): 2321 # this function will benefit from `fastmath`, the div will 2322 # get optimised to a multiply by reciprocal and the accumulator 2323 # then becomes an fmadd: A = A + i * 0.5 2324 def test_impl(): 2325 n = 128 2326 A = 0 2327 for i in range(n): 2328 A += i / 2.0 2329 return A 2330 self.prange_tester(test_impl, scheduler_type='unsigned', 2331 check_fastmath=True) 2332 pfunc = self.generate_prange_func(test_impl, None) 2333 cres = self.compile_parallel_fastmath(pfunc, ()) 2334 ir = self._get_gufunc_ir(cres) 2335 _id = '%[A-Z_0-9]?(.[0-9]+)+[.]?[i]?' 2336 recipr_str = '\s+%s = fmul fast double %s, 5.000000e-01' 2337 reciprocal_inst = re.compile(recipr_str % (_id, _id)) 2338 fadd_inst = re.compile('\s+%s = fadd fast double %s, %s' 2339 % (_id, _id, _id)) 2340 # check there is something like: 2341 # %.329 = fmul fast double %.325, 5.000000e-01 2342 # %.337 = fadd fast double %A.07, %.329 2343 for name, kernel in ir.items(): 2344 splitted = kernel.splitlines() 2345 for i, x in enumerate(splitted): 2346 if reciprocal_inst.match(x): 2347 break 2348 self.assertTrue(fadd_inst.match(splitted[i + 1])) 2349 2350 @skip_parfors_unsupported 2351 def test_kde_example(self): 2352 def test_impl(X): 2353 # KDE example 2354 b = 0.5 2355 points = np.array([-1.0, 2.0, 5.0]) 2356 N = points.shape[0] 2357 n = X.shape[0] 2358 exps = 0 2359 for i in range(n): 2360 p = X[i] 2361 d = (-(p - points)**2) / (2 * b**2) 2362 m = np.min(d) 2363 exps += m - np.log(b * N) + np.log(np.sum(np.exp(d - m))) 2364 return exps 2365 2366 n = 128 2367 X = np.random.ranf(n) 2368 self.prange_tester(test_impl, X) 2369 2370 @skip_parfors_unsupported 2371 def test_parfor_alias1(self): 2372 def test_impl(n): 2373 b = np.zeros((n, n)) 2374 a = b[0] 2375 for j in range(n): 2376 a[j] = j + 1 2377 return b.sum() 2378 self.prange_tester(test_impl, 4) 2379 2380 @skip_parfors_unsupported 2381 def test_parfor_alias2(self): 2382 def test_impl(n): 2383 b = np.zeros((n, n)) 2384 for i in range(n): 2385 a = b[i] 2386 for j in range(n): 2387 a[j] = i + j 2388 return b.sum() 2389 self.prange_tester(test_impl, 4) 2390 2391 @skip_parfors_unsupported 2392 def test_parfor_alias3(self): 2393 def test_impl(n): 2394 b = np.zeros((n, n, n)) 2395 for i in range(n): 2396 a = b[i] 2397 for j in range(n): 2398 c = a[j] 2399 for k in range(n): 2400 c[k] = i + j + k 2401 return b.sum() 2402 self.prange_tester(test_impl, 4) 2403 2404 @skip_parfors_unsupported 2405 def test_parfor_race_1(self): 2406 def test_impl(x, y): 2407 for j in range(y): 2408 k = x 2409 return k 2410 raised_warnings = self.prange_tester(test_impl, 10, 20) 2411 warning_obj = raised_warnings[0] 2412 expected_msg = ("Variable k used in parallel loop may be written to " 2413 "simultaneously by multiple workers and may result " 2414 "in non-deterministic or unintended results.") 2415 self.assertIn(expected_msg, str(warning_obj.message)) 2416 2417 @skip_parfors_unsupported 2418 def test_nested_parfor_push_call_vars(self): 2419 """ issue 3686: if a prange has something inside it that causes 2420 a nested parfor to be generated and both the inner and outer 2421 parfor use the same call variable defined outside the parfors 2422 then ensure that when that call variable is pushed into the 2423 parfor that the call variable isn't duplicated with the same 2424 name resulting in a redundant type lock. 2425 """ 2426 def test_impl(): 2427 B = 0 2428 f = np.negative 2429 for i in range(1): 2430 this_matters = f(1.) 2431 B += f(np.zeros(1,))[0] 2432 for i in range(2): 2433 this_matters = f(1.) 2434 B += f(np.zeros(1,))[0] 2435 2436 return B 2437 self.prange_tester(test_impl) 2438 2439 @skip_parfors_unsupported 2440 def test_copy_global_for_parfor(self): 2441 """ issue4903: a global is copied next to a parfor so that 2442 it can be inlined into the parfor and thus not have to be 2443 passed to the parfor (i.e., an unsupported function type). 2444 This global needs to be renamed in the block into which 2445 it is copied. 2446 """ 2447 def test_impl(zz, tc): 2448 lh = np.zeros(len(tc)) 2449 lc = np.zeros(len(tc)) 2450 for i in range(1): 2451 nt = tc[i] 2452 for t in range(nt): 2453 lh += np.exp(zz[i, t]) 2454 for t in range(nt): 2455 lc += np.exp(zz[i, t]) 2456 return lh, lc 2457 2458 m = 2 2459 zz = np.ones((m, m, m)) 2460 tc = np.ones(m, dtype=np.int_) 2461 self.prange_tester(test_impl, zz, tc, patch_instance=[0]) 2462 2463 @skip_parfors_unsupported 2464 def test_multiple_call_getattr_object(self): 2465 def test_impl(n): 2466 B = 0 2467 f = np.negative 2468 for i in range(1): 2469 this_matters = f(1.0) 2470 B += f(n) 2471 2472 return B 2473 self.prange_tester(test_impl, 1.0) 2474 2475 @skip_parfors_unsupported 2476 def test_argument_alias_recarray_field(self): 2477 # Test for issue4007. 2478 def test_impl(n): 2479 for i in range(len(n)): 2480 n.x[i] = 7.0 2481 return n 2482 X1 = np.zeros(10, dtype=[('x', float), ('y', int), ]) 2483 X2 = np.zeros(10, dtype=[('x', float), ('y', int), ]) 2484 X3 = np.zeros(10, dtype=[('x', float), ('y', int), ]) 2485 v1 = X1.view(np.recarray) 2486 v2 = X2.view(np.recarray) 2487 v3 = X3.view(np.recarray) 2488 2489 # Numpy doesn't seem to support almost equal on recarray. 2490 # So, we convert to list and use assertEqual instead. 2491 python_res = list(test_impl(v1)) 2492 njit_res = list(njit(test_impl)(v2)) 2493 pa_func = njit(test_impl, parallel=True) 2494 pa_res = list(pa_func(v3)) 2495 self.assertEqual(python_res, njit_res) 2496 self.assertEqual(python_res, pa_res) 2497 2498 @skip_parfors_unsupported 2499 def test_mutable_list_param(self): 2500 """ issue3699: test that mutable variable to call in loop 2501 is not hoisted. The call in test_impl forces a manual 2502 check here rather than using prange_tester. 2503 """ 2504 @njit 2505 def list_check(X): 2506 """ If the variable X is hoisted in the test_impl prange 2507 then subsequent list_check calls would return increasing 2508 values. 2509 """ 2510 ret = X[-1] 2511 a = X[-1] + 1 2512 X.append(a) 2513 return ret 2514 def test_impl(n): 2515 for i in prange(n): 2516 X = [100] 2517 a = list_check(X) 2518 return a 2519 python_res = test_impl(10) 2520 njit_res = njit(test_impl)(10) 2521 pa_func = njit(test_impl, parallel=True) 2522 pa_res = pa_func(10) 2523 self.assertEqual(python_res, njit_res) 2524 self.assertEqual(python_res, pa_res) 2525 2526 @skip_parfors_unsupported 2527 def test_list_comprehension_prange(self): 2528 # issue4569 2529 def test_impl(x): 2530 return np.array([len(x[i]) for i in range(len(x))]) 2531 x = [np.array([1,2,3], dtype=int),np.array([1,2], dtype=int)] 2532 self.prange_tester(test_impl, x) 2533 2534 @skip_parfors_unsupported 2535 def test_ssa_false_reduction(self): 2536 # issue5698 2537 # SSA for h creates assignments to h that make it look like a 2538 # reduction variable except that it lacks an associated 2539 # reduction operator. Test here that h is excluded as a 2540 # reduction variable. 2541 def test_impl(image, a, b): 2542 empty = np.zeros(image.shape) 2543 for i in range(image.shape[0]): 2544 r = image[i][0] / 255.0 2545 if a == 0: 2546 h = 0 2547 if b == 0: 2548 h = 0 2549 empty[i] = [h, h, h] 2550 return empty 2551 2552 image = np.zeros((3, 3), dtype=np.int32) 2553 self.prange_tester(test_impl, image, 0, 0) 2554 2555 2556@skip_parfors_unsupported 2557@x86_only 2558class TestParforsVectorizer(TestPrangeBase): 2559 2560 # env mutating test 2561 _numba_parallel_test_ = False 2562 2563 def get_gufunc_asm(self, func, schedule_type, *args, **kwargs): 2564 2565 fastmath = kwargs.pop('fastmath', False) 2566 cpu_name = kwargs.pop('cpu_name', 'skylake-avx512') 2567 assertions = kwargs.pop('assertions', True) 2568 # force LLVM to use zmm registers for vectorization 2569 # https://reviews.llvm.org/D67259 2570 cpu_features = kwargs.pop('cpu_features', '-prefer-256-bit') 2571 2572 env_opts = {'NUMBA_CPU_NAME': cpu_name, 2573 'NUMBA_CPU_FEATURES': cpu_features, 2574 } 2575 2576 overrides = [] 2577 for k, v in env_opts.items(): 2578 overrides.append(override_env_config(k, v)) 2579 2580 with overrides[0], overrides[1]: 2581 sig = tuple([numba.typeof(x) for x in args]) 2582 pfunc_vectorizable = self.generate_prange_func(func, None) 2583 if fastmath == True: 2584 cres = self.compile_parallel_fastmath(pfunc_vectorizable, sig) 2585 else: 2586 cres = self.compile_parallel(pfunc_vectorizable, sig) 2587 2588 # get the gufunc asm 2589 asm = self._get_gufunc_asm(cres) 2590 2591 if assertions: 2592 schedty = re.compile('call\s+\w+\*\s+@do_scheduling_(\w+)\(') 2593 matches = schedty.findall(cres.library.get_llvm_str()) 2594 self.assertGreaterEqual(len(matches), 1) # at least 1 parfor call 2595 self.assertEqual(matches[0], schedule_type) 2596 self.assertTrue(asm != {}) 2597 2598 return asm 2599 2600 # this is a common match pattern for something like: 2601 # \n\tvsqrtpd\t-192(%rbx,%rsi,8), %zmm0\n 2602 # to check vsqrtpd operates on zmm 2603 match_vsqrtpd_on_zmm = re.compile('\n\s+vsqrtpd\s+.*zmm.*\n') 2604 2605 @linux_only 2606 def test_vectorizer_fastmath_asm(self): 2607 """ This checks that if fastmath is set and the underlying hardware 2608 is suitable, and the function supplied is amenable to fastmath based 2609 vectorization, that the vectorizer actually runs. 2610 """ 2611 2612 # This function will benefit from `fastmath` if run on a suitable 2613 # target. The vectorizer should unwind the loop and generate 2614 # packed dtype=double add and sqrt instructions. 2615 def will_vectorize(A): 2616 n = len(A) 2617 acc = 0 2618 for i in range(n): 2619 acc += np.sqrt(i) 2620 return acc 2621 2622 arg = np.zeros(10) 2623 2624 fast_asm = self.get_gufunc_asm(will_vectorize, 'unsigned', arg, 2625 fastmath=True) 2626 slow_asm = self.get_gufunc_asm(will_vectorize, 'unsigned', arg, 2627 fastmath=False) 2628 2629 for v in fast_asm.values(): 2630 # should unwind and call vector sqrt then vector add 2631 # all on packed doubles using zmm's 2632 self.assertTrue('vaddpd' in v) 2633 self.assertTrue('vsqrtpd' in v) 2634 self.assertTrue('zmm' in v) 2635 # make sure vsqrtpd operates on zmm 2636 self.assertTrue(len(self.match_vsqrtpd_on_zmm.findall(v)) > 1) 2637 2638 for v in slow_asm.values(): 2639 # vector variants should not be present 2640 self.assertTrue('vaddpd' not in v) 2641 self.assertTrue('vsqrtpd' not in v) 2642 # check scalar variant is present 2643 self.assertTrue('vsqrtsd' in v) 2644 self.assertTrue('vaddsd' in v) 2645 # check no zmm addressing is present 2646 self.assertTrue('zmm' not in v) 2647 2648 @linux_only 2649 def test_unsigned_refusal_to_vectorize(self): 2650 """ This checks that if fastmath is set and the underlying hardware 2651 is suitable, and the function supplied is amenable to fastmath based 2652 vectorization, that the vectorizer actually runs. 2653 """ 2654 2655 def will_not_vectorize(A): 2656 n = len(A) 2657 for i in range(-n, 0): 2658 A[i] = np.sqrt(A[i]) 2659 return A 2660 2661 def will_vectorize(A): 2662 n = len(A) 2663 for i in range(n): 2664 A[i] = np.sqrt(A[i]) 2665 return A 2666 2667 arg = np.zeros(10) 2668 2669 # Boundschecking breaks vectorization 2670 with override_env_config('NUMBA_BOUNDSCHECK', '0'): 2671 novec_asm = self.get_gufunc_asm(will_not_vectorize, 'signed', arg, 2672 fastmath=True) 2673 2674 vec_asm = self.get_gufunc_asm(will_vectorize, 'unsigned', arg, 2675 fastmath=True) 2676 2677 for v in novec_asm.values(): 2678 # vector variant should not be present 2679 self.assertTrue('vsqrtpd' not in v) 2680 # check scalar variant is present 2681 self.assertTrue('vsqrtsd' in v) 2682 # check no zmm addressing is present 2683 self.assertTrue('zmm' not in v) 2684 2685 for v in vec_asm.values(): 2686 # should unwind and call vector sqrt then vector mov 2687 # all on packed doubles using zmm's 2688 self.assertTrue('vsqrtpd' in v) 2689 self.assertTrue('vmovupd' in v) 2690 self.assertTrue('zmm' in v) 2691 # make sure vsqrtpd operates on zmm 2692 self.assertTrue(len(self.match_vsqrtpd_on_zmm.findall(v)) > 1) 2693 2694 @linux_only 2695 # needed as 32bit doesn't have equivalent signed/unsigned instruction generation 2696 # for this function 2697 @skip_parfors_unsupported 2698 def test_signed_vs_unsigned_vec_asm(self): 2699 """ This checks vectorization for signed vs unsigned variants of a 2700 trivial accumulator, the only meaningful difference should be the 2701 presence of signed vs. unsigned unpack instructions (for the 2702 induction var). 2703 """ 2704 def signed_variant(): 2705 n = 4096 2706 A = 0. 2707 for i in range(-n, 0): 2708 A += i 2709 return A 2710 2711 def unsigned_variant(): 2712 n = 4096 2713 A = 0. 2714 for i in range(n): 2715 A += i 2716 return A 2717 2718 # Boundschecking breaks the diff check below because of the pickled exception 2719 with override_env_config('NUMBA_BOUNDSCHECK', '0'): 2720 signed_asm = self.get_gufunc_asm(signed_variant, 'signed', 2721 fastmath=True) 2722 unsigned_asm = self.get_gufunc_asm(unsigned_variant, 'unsigned', 2723 fastmath=True) 2724 2725 def strip_instrs(asm): 2726 acc = [] 2727 for x in asm.splitlines(): 2728 spd = x.strip() 2729 # filter out anything that isn't a trivial instruction 2730 # and anything with the gufunc id as it contains an address 2731 if spd != '' and not (spd.startswith('.') 2732 or spd.startswith('_') 2733 or spd.startswith('"') 2734 or '__numba_parfor_gufunc' in spd): 2735 acc.append(re.sub('[\t]', '', spd)) 2736 return acc 2737 2738 for k, v in signed_asm.items(): 2739 signed_instr = strip_instrs(v) 2740 break 2741 2742 for k, v in unsigned_asm.items(): 2743 unsigned_instr = strip_instrs(v) 2744 break 2745 2746 from difflib import SequenceMatcher as sm 2747 # make sure that the only difference in instruction (if there is a 2748 # difference) is the char 'u'. For example: 2749 # vcvtsi2sdq vs. vcvtusi2sdq 2750 self.assertEqual(len(signed_instr), len(unsigned_instr)) 2751 for a, b in zip(signed_instr, unsigned_instr): 2752 if a == b: 2753 continue 2754 else: 2755 s = sm(lambda x: x == '\t', a, b) 2756 ops = s.get_opcodes() 2757 for op in ops: 2758 if op[0] == 'insert': 2759 self.assertEqual(b[op[-2]:op[-1]], 'u') 2760 2761 2762class TestParforsSlice(TestParforsBase): 2763 2764 def check(self, pyfunc, *args, **kwargs): 2765 cfunc, cpfunc = self.compile_all(pyfunc, *args) 2766 self.check_parfors_vs_others(pyfunc, cfunc, cpfunc, *args, **kwargs) 2767 2768 @skip_parfors_unsupported 2769 def test_parfor_slice1(self): 2770 def test_impl(a): 2771 (n,) = a.shape 2772 b = a[0:n-2] + a[1:n-1] 2773 return b 2774 2775 self.check(test_impl, np.ones(10)) 2776 2777 @skip_parfors_unsupported 2778 def test_parfor_slice2(self): 2779 def test_impl(a, m): 2780 (n,) = a.shape 2781 b = a[0:n-2] + a[1:m] 2782 return b 2783 2784 # runtime assertion should succeed 2785 self.check(test_impl, np.ones(10), 9) 2786 # next we expect failure 2787 with self.assertRaises(AssertionError) as raises: 2788 njit(parallel=True)(test_impl)(np.ones(10),10) 2789 self.assertIn("do not match", str(raises.exception)) 2790 2791 @skip_parfors_unsupported 2792 def test_parfor_slice3(self): 2793 def test_impl(a): 2794 (m,n) = a.shape 2795 b = a[0:m-1,0:n-1] + a[1:m,1:n] 2796 return b 2797 2798 self.check(test_impl, np.ones((4,3))) 2799 2800 @skip_parfors_unsupported 2801 def test_parfor_slice4(self): 2802 def test_impl(a): 2803 (m,n) = a.shape 2804 b = a[:,0:n-1] + a[:,1:n] 2805 return b 2806 2807 self.check(test_impl, np.ones((4,3))) 2808 2809 @skip_parfors_unsupported 2810 def test_parfor_slice5(self): 2811 def test_impl(a): 2812 (m,n) = a.shape 2813 b = a[0:m-1,:] + a[1:m,:] 2814 return b 2815 2816 self.check(test_impl, np.ones((4,3))) 2817 2818 @skip_parfors_unsupported 2819 def test_parfor_slice6(self): 2820 def test_impl(a): 2821 b = a.transpose() 2822 c = a[1,:] + b[:,1] 2823 return c 2824 2825 self.check(test_impl, np.ones((4,3))) 2826 2827 @skip_parfors_unsupported 2828 def test_parfor_slice7(self): 2829 def test_impl(a): 2830 b = a.transpose() 2831 c = a[1,:] + b[1,:] 2832 return c 2833 2834 # runtime check should succeed 2835 self.check(test_impl, np.ones((3,3))) 2836 # next we expect failure 2837 with self.assertRaises(AssertionError) as raises: 2838 njit(parallel=True)(test_impl)(np.ones((3,4))) 2839 self.assertIn("do not match", str(raises.exception)) 2840 2841# @skip_parfors_unsupported 2842 @disabled_test 2843 def test_parfor_slice8(self): 2844 def test_impl(a): 2845 (m,n) = a.shape 2846 b = a.transpose() 2847 b[1:m,1:n] = a[1:m,1:n] 2848 return b 2849 2850 self.check(test_impl, np.arange(9).reshape((3,3))) 2851 2852# @skip_parfors_unsupported 2853 @disabled_test 2854 def test_parfor_slice9(self): 2855 def test_impl(a): 2856 (m,n) = a.shape 2857 b = a.transpose() 2858 b[1:n,1:m] = a[:,1:m] 2859 return b 2860 2861 self.check(test_impl, np.arange(12).reshape((3,4))) 2862 2863# @skip_parfors_unsupported 2864 @disabled_test 2865 def test_parfor_slice10(self): 2866 def test_impl(a): 2867 (m,n) = a.shape 2868 b = a.transpose() 2869 b[2,1:m] = a[2,1:m] 2870 return b 2871 2872 self.check(test_impl, np.arange(9).reshape((3,3))) 2873 2874 @skip_parfors_unsupported 2875 def test_parfor_slice11(self): 2876 def test_impl(a): 2877 (m,n,l) = a.shape 2878 b = a.copy() 2879 b[:,1,1:l] = a[:,2,1:l] 2880 return b 2881 2882 self.check(test_impl, np.arange(27).reshape((3,3,3))) 2883 2884 @skip_parfors_unsupported 2885 def test_parfor_slice12(self): 2886 def test_impl(a): 2887 (m,n) = a.shape 2888 b = a.copy() 2889 b[1,1:-1] = a[0,:-2] 2890 return b 2891 2892 self.check(test_impl, np.arange(12).reshape((3,4))) 2893 2894 @skip_parfors_unsupported 2895 def test_parfor_slice13(self): 2896 def test_impl(a): 2897 (m,n) = a.shape 2898 b = a.copy() 2899 c = -1 2900 b[1,1:c] = a[0,-n:c-1] 2901 return b 2902 2903 self.check(test_impl, np.arange(12).reshape((3,4))) 2904 2905 @skip_parfors_unsupported 2906 def test_parfor_slice14(self): 2907 def test_impl(a): 2908 (m,n) = a.shape 2909 b = a.copy() 2910 b[1,:-1] = a[0,-3:4] 2911 return b 2912 2913 self.check(test_impl, np.arange(12).reshape((3,4))) 2914 2915 @skip_parfors_unsupported 2916 def test_parfor_slice15(self): 2917 def test_impl(a): 2918 (m,n) = a.shape 2919 b = a.copy() 2920 b[1,-(n-1):] = a[0,-3:4] 2921 return b 2922 2923 self.check(test_impl, np.arange(12).reshape((3,4))) 2924 2925 2926 @disabled_test 2927 def test_parfor_slice16(self): 2928 """ This test is disabled because if n is larger than the array size 2929 then n and n-1 will both be the end of the array and thus the 2930 slices will in fact be of different sizes and unable to fuse. 2931 """ 2932 def test_impl(a, b, n): 2933 assert(a.shape == b.shape) 2934 a[1:n] = 10 2935 b[0:(n-1)] = 10 2936 return a * b 2937 2938 self.check(test_impl, np.ones(10), np.zeros(10), 8) 2939 args = (numba.float64[:], numba.float64[:], numba.int64) 2940 self.assertEqual(countParfors(test_impl, args), 2) 2941 2942 @skip_parfors_unsupported 2943 def test_parfor_slice17(self): 2944 def test_impl(m, A): 2945 B = np.zeros(m) 2946 n = len(A) 2947 B[-n:] = A 2948 return B 2949 2950 self.check(test_impl, 10, np.ones(10)) 2951 2952 @skip_parfors_unsupported 2953 def test_parfor_slice18(self): 2954 # issue 3534 2955 def test_impl(): 2956 a = np.zeros(10) 2957 a[1:8] = np.arange(0, 7) 2958 y = a[3] 2959 return y 2960 2961 self.check(test_impl) 2962 2963 @skip_parfors_unsupported 2964 def test_parfor_slice19(self): 2965 # issues #3561 and #3554, empty slice binop 2966 def test_impl(X): 2967 X[:0] += 1 2968 return X 2969 2970 self.check(test_impl, np.ones(10)) 2971 2972 @skip_parfors_unsupported 2973 def test_parfor_slice20(self): 2974 # issue #4075, slice size 2975 def test_impl(): 2976 a = np.ones(10) 2977 c = a[1:] 2978 s = len(c) 2979 return s 2980 2981 self.check(test_impl, check_scheduling=False) 2982 2983 @skip_parfors_unsupported 2984 def test_parfor_slice21(self): 2985 def test_impl(x1, x2): 2986 x1 = x1.reshape(x1.size, 1) 2987 x2 = x2.reshape(x2.size, 1) 2988 return x1 >= x2[:-1, :] 2989 2990 x1 = np.random.rand(5) 2991 x2 = np.random.rand(6) 2992 self.check(test_impl, x1, x2) 2993 2994 @skip_parfors_unsupported 2995 def test_parfor_slice22(self): 2996 def test_impl(x1, x2): 2997 b = np.zeros((10,)) 2998 for i in prange(1): 2999 b += x1[:, x2] 3000 return b 3001 3002 x1 = np.zeros((10,7)) 3003 x2 = np.array(4) 3004 self.check(test_impl, x1, x2) 3005 3006 @skip_parfors_unsupported 3007 def test_parfor_slice23(self): 3008 # issue #4630 3009 def test_impl(x): 3010 x[:0] = 2 3011 return x 3012 3013 self.check(test_impl, np.ones(10)) 3014 3015 @skip_parfors_unsupported 3016 def test_parfor_slice24(self): 3017 def test_impl(m, A, n): 3018 B = np.zeros(m) 3019 C = B[n:] 3020 C = A[:len(C)] 3021 return B 3022 3023 for i in range(-15, 15): 3024 self.check(test_impl, 10, np.ones(10), i) 3025 3026 @skip_parfors_unsupported 3027 def test_parfor_slice25(self): 3028 def test_impl(m, A, n): 3029 B = np.zeros(m) 3030 C = B[:n] 3031 C = A[:len(C)] 3032 return B 3033 3034 for i in range(-15, 15): 3035 self.check(test_impl, 10, np.ones(10), i) 3036 3037 @skip_parfors_unsupported 3038 def test_parfor_slice26(self): 3039 def test_impl(a): 3040 (n,) = a.shape 3041 b = a.copy() 3042 b[-(n-1):] = a[-3:4] 3043 return b 3044 3045 self.check(test_impl, np.arange(4)) 3046 3047 @skip_parfors_unsupported 3048 def test_parfor_slice27(self): 3049 # issue5601: tests array analysis of the slice with 3050 # n_valid_vals of unknown size. 3051 def test_impl(a): 3052 n_valid_vals = 0 3053 3054 for i in prange(a.shape[0]): 3055 if a[i] != 0: 3056 n_valid_vals += 1 3057 3058 if n_valid_vals: 3059 unused = a[:n_valid_vals] 3060 3061 return 0 3062 3063 self.check(test_impl, np.arange(3)) 3064 3065 @skip_parfors_unsupported 3066 def test_issue5942_1(self): 3067 # issue5942: tests statement reordering of 3068 # aliased arguments. 3069 def test_impl(gg, gg_next): 3070 gs = gg.shape 3071 d = gs[0] 3072 for i_gg in prange(d): 3073 gg_next[i_gg, :] = gg[i_gg, :] 3074 gg_next[i_gg, 0] += 1 3075 3076 return gg_next 3077 3078 d = 4 3079 k = 2 3080 3081 gg = np.zeros((d, k), dtype = np.int32) 3082 gg_next = np.zeros((d, k), dtype = np.int32) 3083 self.check(test_impl, gg, gg_next) 3084 3085 @skip_parfors_unsupported 3086 def test_issue5942_2(self): 3087 # issue5942: tests statement reordering 3088 def test_impl(d, k): 3089 gg = np.zeros((d, k), dtype = np.int32) 3090 gg_next = np.zeros((d, k), dtype = np.int32) 3091 3092 for i_gg in prange(d): 3093 for n in range(k): 3094 gg[i_gg, n] = i_gg 3095 gg_next[i_gg, :] = gg[i_gg, :] 3096 gg_next[i_gg, 0] += 1 3097 3098 return gg_next 3099 3100 d = 4 3101 k = 2 3102 3103 self.check(test_impl, d, k) 3104 3105 @skip_parfors_unsupported 3106 @skip_unless_scipy 3107 def test_issue6102(self): 3108 # The problem is originally observed on Python3.8 because of the 3109 # changes in how loops are represented in 3.8 bytecode. 3110 @njit(parallel=True) 3111 def f(r): 3112 for ir in prange(r.shape[0]): 3113 dist = np.inf 3114 tr = np.array([0, 0, 0], dtype=np.float32) 3115 for i in [1, 0, -1]: 3116 dist_t = np.linalg.norm(r[ir, :] + i) 3117 if dist_t < dist: 3118 dist = dist_t 3119 tr = np.array([i, i, i], dtype=np.float32) 3120 r[ir, :] += tr 3121 return r 3122 3123 r = np.array([[0., 0., 0.], [0., 0., 1.]]) 3124 self.assertPreciseEqual(f(r), f.py_func(r)) 3125 3126 3127class TestParforsOptions(TestParforsBase): 3128 3129 def check(self, pyfunc, *args, **kwargs): 3130 cfunc, cpfunc = self.compile_all(pyfunc, *args) 3131 self.check_parfors_vs_others(pyfunc, cfunc, cpfunc, *args, **kwargs) 3132 3133 @skip_parfors_unsupported 3134 def test_parfor_options(self): 3135 def test_impl(a): 3136 n = a.shape[0] 3137 b = np.ones(n) 3138 c = np.array([ i for i in range(n) ]) 3139 b[:n] = a + b * c 3140 for i in prange(n): 3141 c[i] = b[i] * a[i] 3142 return reduce(lambda x,y:x+y, c, 0) 3143 3144 self.check(test_impl, np.ones(10)) 3145 args = (numba.float64[:],) 3146 # everything should fuse with default option 3147 self.assertEqual(countParfors(test_impl, args), 1) 3148 # with no fusion 3149 self.assertEqual(countParfors(test_impl, args, fusion=False), 6) 3150 # with no fusion, comprehension 3151 self.assertEqual(countParfors(test_impl, args, fusion=False, 3152 comprehension=False), 5) 3153 #with no fusion, comprehension, setitem 3154 self.assertEqual(countParfors(test_impl, args, fusion=False, 3155 comprehension=False, setitem=False), 4) 3156 # with no fusion, comprehension, prange 3157 self.assertEqual(countParfors(test_impl, args, fusion=False, 3158 comprehension=False, setitem=False, prange=False), 3) 3159 # with no fusion, comprehension, prange, reduction 3160 self.assertEqual(countParfors(test_impl, args, fusion=False, 3161 comprehension=False, setitem=False, prange=False, 3162 reduction=False), 2) 3163 # with no fusion, comprehension, prange, reduction, numpy 3164 self.assertEqual(countParfors(test_impl, args, fusion=False, 3165 comprehension=False, setitem=False, prange=False, 3166 reduction=False, numpy=False), 0) 3167 3168 3169class TestParforsBitMask(TestParforsBase): 3170 3171 def check(self, pyfunc, *args, **kwargs): 3172 cfunc, cpfunc = self.compile_all(pyfunc, *args) 3173 self.check_parfors_vs_others(pyfunc, cfunc, cpfunc, *args, **kwargs) 3174 3175 @skip_parfors_unsupported 3176 def test_parfor_bitmask1(self): 3177 def test_impl(a, n): 3178 b = a > n 3179 a[b] = 0 3180 return a 3181 3182 self.check(test_impl, np.arange(10), 5) 3183 3184 @skip_parfors_unsupported 3185 def test_parfor_bitmask2(self): 3186 def test_impl(a, b): 3187 a[b] = 0 3188 return a 3189 3190 a = np.arange(10) 3191 b = a > 5 3192 self.check(test_impl, a, b) 3193 3194 @skip_parfors_unsupported 3195 def test_parfor_bitmask3(self): 3196 def test_impl(a, b): 3197 a[b] = a[b] 3198 return a 3199 3200 a = np.arange(10) 3201 b = a > 5 3202 self.check(test_impl, a, b) 3203 3204 @skip_parfors_unsupported 3205 def test_parfor_bitmask4(self): 3206 def test_impl(a, b): 3207 a[b] = (2 * a)[b] 3208 return a 3209 3210 a = np.arange(10) 3211 b = a > 5 3212 self.check(test_impl, a, b) 3213 3214 @skip_parfors_unsupported 3215 def test_parfor_bitmask5(self): 3216 def test_impl(a, b): 3217 a[b] = a[b] * a[b] 3218 return a 3219 3220 a = np.arange(10) 3221 b = a > 5 3222 self.check(test_impl, a, b) 3223 3224 @skip_parfors_unsupported 3225 def test_parfor_bitmask6(self): 3226 def test_impl(a, b, c): 3227 a[b] = c 3228 return a 3229 3230 a = np.arange(10) 3231 b = a > 5 3232 c = np.zeros(sum(b)) 3233 3234 # expect failure due to lack of parallelism 3235 with self.assertRaises(AssertionError) as raises: 3236 self.check(test_impl, a, b, c) 3237 self.assertIn("\'@do_scheduling\' not found", str(raises.exception)) 3238 3239class TestParforsMisc(TestParforsBase): 3240 """ 3241 Tests miscellaneous parts of ParallelAccelerator use. 3242 """ 3243 _numba_parallel_test_ = False 3244 3245 def check(self, pyfunc, *args, **kwargs): 3246 cfunc, cpfunc = self.compile_all(pyfunc, *args) 3247 self.check_parfors_vs_others(pyfunc, cfunc, cpfunc, *args, **kwargs) 3248 3249 @skip_parfors_unsupported 3250 def test_no_warn_if_cache_set(self): 3251 3252 def pyfunc(): 3253 arr = np.ones(100) 3254 for i in prange(arr.size): 3255 arr[i] += i 3256 return arr 3257 3258 cfunc = njit(parallel=True, cache=True)(pyfunc) 3259 3260 with warnings.catch_warnings(record=True) as raised_warnings: 3261 warnings.simplefilter('always') 3262 cfunc() 3263 3264 self.assertEqual(len(raised_warnings), 0) 3265 3266 # Make sure the dynamic globals flag is set 3267 has_dynamic_globals = [cres.library.has_dynamic_globals 3268 for cres in cfunc.overloads.values()] 3269 self.assertEqual(has_dynamic_globals, [False]) 3270 3271 @skip_parfors_unsupported 3272 def test_statement_reordering_respects_aliasing(self): 3273 def impl(): 3274 a = np.zeros(10) 3275 a[1:8] = np.arange(0, 7) 3276 print('a[3]:', a[3]) 3277 print('a[3]:', a[3]) 3278 return a 3279 3280 cres = self.compile_parallel(impl, ()) 3281 with captured_stdout() as stdout: 3282 cres.entry_point() 3283 for line in stdout.getvalue().splitlines(): 3284 self.assertEqual('a[3]: 2.0', line) 3285 3286 @skip_parfors_unsupported 3287 def test_parfor_ufunc_typing(self): 3288 def test_impl(A): 3289 return np.isinf(A) 3290 3291 A = np.array([np.inf, 0.0]) 3292 cfunc = njit(parallel=True)(test_impl) 3293 # save global state 3294 old_seq_flag = numba.parfors.parfor.sequential_parfor_lowering 3295 try: 3296 numba.parfors.parfor.sequential_parfor_lowering = True 3297 np.testing.assert_array_equal(test_impl(A), cfunc(A)) 3298 finally: 3299 # recover global state 3300 numba.parfors.parfor.sequential_parfor_lowering = old_seq_flag 3301 3302 @skip_parfors_unsupported 3303 def test_init_block_dce(self): 3304 # issue4690 3305 def test_impl(): 3306 res = 0 3307 arr = [1,2,3,4,5] 3308 numba.parfors.parfor.init_prange() 3309 dummy = arr 3310 for i in numba.prange(5): 3311 res += arr[i] 3312 return res + dummy[2] 3313 3314 self.assertTrue(get_init_block_size(test_impl, ()) == 0) 3315 3316 @skip_parfors_unsupported 3317 def test_alias_analysis_for_parfor1(self): 3318 def test_impl(): 3319 acc = 0 3320 for _ in range(4): 3321 acc += 1 3322 3323 data = np.zeros((acc,)) 3324 return data 3325 3326 self.check(test_impl) 3327 3328 @skip_parfors_unsupported 3329 def test_no_state_change_in_gufunc_lowering_on_error(self): 3330 # tests #5098, if there's an exception arising in gufunc lowering the 3331 # sequential_parfor_lowering global variable should remain as False on 3332 # stack unwind. 3333 3334 @register_pass(mutates_CFG=True, analysis_only=False) 3335 class BreakParfors(AnalysisPass): 3336 _name = "break_parfors" 3337 3338 def __init__(self): 3339 AnalysisPass.__init__(self) 3340 3341 def run_pass(self, state): 3342 for blk in state.func_ir.blocks.values(): 3343 for stmt in blk.body: 3344 if isinstance(stmt, numba.parfors.parfor.Parfor): 3345 # races should be a set(), that list is iterable 3346 # permits it to get through to the 3347 # _create_gufunc_for_parfor_body routine at which 3348 # point it needs to be a set so e.g. set.difference 3349 # can be computed, this therefore creates an error 3350 # in the right location. 3351 stmt.races = [] 3352 return True 3353 3354 3355 class BreakParforsCompiler(CompilerBase): 3356 3357 def define_pipelines(self): 3358 pm = DefaultPassBuilder.define_nopython_pipeline(self.state) 3359 pm.add_pass_after(BreakParfors, IRLegalization) 3360 pm.finalize() 3361 return [pm] 3362 3363 3364 @njit(parallel=True, pipeline_class=BreakParforsCompiler) 3365 def foo(): 3366 x = 1 3367 for _ in prange(1): 3368 x += 1 3369 return x 3370 3371 # assert default state for global 3372 self.assertFalse(numba.parfors.parfor.sequential_parfor_lowering) 3373 3374 with self.assertRaises(errors.LoweringError) as raises: 3375 foo() 3376 3377 self.assertIn("'list' object has no attribute 'difference'", 3378 str(raises.exception)) 3379 3380 # assert state has not changed 3381 self.assertFalse(numba.parfors.parfor.sequential_parfor_lowering) 3382 3383 @skip_parfors_unsupported 3384 def test_issue_5098(self): 3385 class DummyType(types.Opaque): 3386 pass 3387 3388 dummy_type = DummyType("my_dummy") 3389 register_model(DummyType)(models.OpaqueModel) 3390 3391 class Dummy(object): 3392 pass 3393 3394 @typeof_impl.register(Dummy) 3395 def typeof_Dummy(val, c): 3396 return dummy_type 3397 3398 @unbox(DummyType) 3399 def unbox_index(typ, obj, c): 3400 return NativeValue(c.context.get_dummy_value()) 3401 3402 @overload_method(DummyType, "method1", jit_options={"parallel":True}) 3403 def _get_method1(obj, arr, func): 3404 def _foo(obj, arr, func): 3405 def baz(a, f): 3406 c = a.copy() 3407 c[np.isinf(a)] = np.nan 3408 return f(c) 3409 3410 length = len(arr) 3411 output_arr = np.empty(length, dtype=np.float64) 3412 for i in prange(length): 3413 output_arr[i] = baz(arr[i], func) 3414 for i in prange(length - 1): 3415 output_arr[i] += baz(arr[i], func) 3416 return output_arr 3417 return _foo 3418 3419 @njit 3420 def bar(v): 3421 return v.mean() 3422 3423 @njit 3424 def test1(d): 3425 return d.method1(np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]), bar) 3426 3427 save_state = numba.parfors.parfor.sequential_parfor_lowering 3428 self.assertFalse(save_state) 3429 try: 3430 test1(Dummy()) 3431 self.assertFalse(numba.parfors.parfor.sequential_parfor_lowering) 3432 finally: 3433 # always set the sequential_parfor_lowering state back to the 3434 # original state 3435 numba.parfors.parfor.sequential_parfor_lowering = save_state 3436 3437 @skip_parfors_unsupported 3438 def test_oversized_tuple_as_arg_to_kernel(self): 3439 3440 @njit(parallel=True) 3441 def oversize_tuple(): 3442 big_tup = (1,2,3,4) 3443 z = 0 3444 for x in prange(10): 3445 z += big_tup[0] 3446 return z 3447 3448 with override_env_config('NUMBA_PARFOR_MAX_TUPLE_SIZE', '3'): 3449 with self.assertRaises(errors.UnsupportedParforsError) as raises: 3450 oversize_tuple() 3451 3452 errstr = str(raises.exception) 3453 self.assertIn("Use of a tuple", errstr) 3454 self.assertIn("in a parallel region", errstr) 3455 3456 @skip_parfors_unsupported 3457 def test_issue5167(self): 3458 3459 def ndvi_njit(img_nir, img_red): 3460 fillvalue = 0 3461 out_img = np.full(img_nir.shape, fillvalue, dtype=img_nir.dtype) 3462 dims = img_nir.shape 3463 for y in prange(dims[0]): 3464 for x in prange(dims[1]): 3465 out_img[y, x] = ((img_nir[y, x] - img_red[y, x]) / 3466 (img_nir[y, x] + img_red[y, x])) 3467 return out_img 3468 3469 tile_shape = (4, 4) 3470 array1 = np.random.uniform(low=1.0, high=10000.0, size=tile_shape) 3471 array2 = np.random.uniform(low=1.0, high=10000.0, size=tile_shape) 3472 self.check(ndvi_njit, array1, array2) 3473 3474 @skip_parfors_unsupported 3475 def test_issue5065(self): 3476 3477 def reproducer(a, dist, dist_args): 3478 result = np.zeros((a.shape[0], a.shape[0]), dtype=np.float32) 3479 for i in prange(a.shape[0]): 3480 for j in range(i + 1, a.shape[0]): 3481 d = dist(a[i], a[j], *dist_args) 3482 result[i, j] = d 3483 result[j, i] = d 3484 return result 3485 3486 @njit 3487 def euclidean(x, y): 3488 result = 0.0 3489 for i in range(x.shape[0]): 3490 result += (x[i] - y[i]) ** 2 3491 return np.sqrt(result) 3492 3493 a = np.random.random(size=(5, 2)) 3494 3495 got = njit(parallel=True)(reproducer)(a.copy(), euclidean,()) 3496 expected = reproducer(a.copy(), euclidean,()) 3497 3498 np.testing.assert_allclose(got, expected) 3499 3500 @skip_parfors_unsupported 3501 def test_issue5001(self): 3502 3503 def test_numba_parallel(myarray): 3504 result = [0] * len(myarray) 3505 for i in prange(len(myarray)): 3506 result[i] = len(myarray[i]) 3507 return result 3508 3509 myarray = (np.empty(100),np.empty(50)) 3510 self.check(test_numba_parallel, myarray) 3511 3512 @skip_parfors_unsupported 3513 def test_issue3169(self): 3514 3515 @njit 3516 def foo(grids): 3517 pass 3518 3519 @njit(parallel=True) 3520 def bar(grids): 3521 for x in prange(1): 3522 foo(grids) 3523 3524 # returns nothing, just check it compiles 3525 bar(([1],) * 2) 3526 3527 @disabled_test 3528 def test_issue4846(self): 3529 3530 mytype = namedtuple("mytype", ("a", "b")) 3531 3532 def outer(mydata): 3533 for k in prange(3): 3534 inner(k, mydata) 3535 return mydata.a 3536 3537 @njit(nogil=True) 3538 def inner(k, mydata): 3539 f = (k, mydata.a) 3540 g = (k, mydata.b) 3541 3542 mydata = mytype(a="a", b="b") 3543 3544 self.check(outer, mydata) 3545 3546 @skip_parfors_unsupported 3547 def test_issue3748(self): 3548 3549 def test1b(): 3550 x = (1, 2, 3, 4, 5) 3551 a = 0 3552 for i in prange(len(x)): 3553 a += x[i] 3554 return a 3555 3556 self.check(test1b,) 3557 3558 @skip_parfors_unsupported 3559 def test_issue5277(self): 3560 3561 def parallel_test(size, arr): 3562 for x in prange(size[0]): 3563 for y in prange(size[1]): 3564 arr[y][x] = x * 4.5 + y 3565 return arr 3566 3567 size = (10, 10) 3568 arr = np.zeros(size, dtype=int) 3569 3570 self.check(parallel_test, size, arr) 3571 3572 @skip_parfors_unsupported 3573 def test_issue5570_ssa_races(self): 3574 @njit(parallel=True) 3575 def foo(src, method, out): 3576 for i in prange(1): 3577 for j in range(1): 3578 out[i, j] = 1 3579 if method: 3580 out += 1 3581 return out 3582 3583 src = np.zeros((5,5)) 3584 method = 57 3585 out = np.zeros((2, 2)) 3586 3587 self.assertPreciseEqual( 3588 foo(src, method, out), 3589 foo.py_func(src, method, out) 3590 ) 3591 3592 @skip_parfors_unsupported 3593 def test_issue6095_numpy_max(self): 3594 @njit(parallel=True) 3595 def find_maxima_3D_jit(args): 3596 package = args 3597 for index in range(0, 10): 3598 z_stack = package[index, :, :] 3599 return np.max(z_stack) 3600 3601 np.random.seed(0) 3602 args = np.random.random((10, 10, 10)) 3603 self.assertPreciseEqual( 3604 find_maxima_3D_jit(args), 3605 find_maxima_3D_jit.py_func(args), 3606 ) 3607 3608 3609@skip_parfors_unsupported 3610class TestParforsDiagnostics(TestParforsBase): 3611 3612 def check(self, pyfunc, *args, **kwargs): 3613 cfunc, cpfunc = self.compile_all(pyfunc, *args) 3614 self.check_parfors_vs_others(pyfunc, cfunc, cpfunc, *args, **kwargs) 3615 3616 def assert_fusion_equivalence(self, got, expected): 3617 a = self._fusion_equivalent(got) 3618 b = self._fusion_equivalent(expected) 3619 self.assertEqual(a, b) 3620 3621 def _fusion_equivalent(self, thing): 3622 # parfors indexes the Parfors class instance id's from wherever the 3623 # internal state happens to be. To assert fusion equivalence we just 3624 # check that the relative difference between fusion adjacency lists 3625 # is the same. For example: 3626 # {3: [2, 1]} is the same as {13: [12, 11]} 3627 # this function strips the indexing etc out returning something suitable 3628 # for checking equivalence 3629 new = defaultdict(list) 3630 min_key = min(thing.keys()) 3631 for k in sorted(thing.keys()): 3632 new[k - min_key] = [x - min_key for x in thing[k]] 3633 return new 3634 3635 def assert_diagnostics(self, diagnostics, parfors_count=None, 3636 fusion_info=None, nested_fusion_info=None, 3637 replaced_fns=None, hoisted_allocations=None): 3638 if parfors_count is not None: 3639 self.assertEqual(parfors_count, diagnostics.count_parfors()) 3640 if fusion_info is not None: 3641 self.assert_fusion_equivalence(fusion_info, diagnostics.fusion_info) 3642 if nested_fusion_info is not None: 3643 self.assert_fusion_equivalence(nested_fusion_info, 3644 diagnostics.nested_fusion_info) 3645 if replaced_fns is not None: 3646 repl = diagnostics.replaced_fns.values() 3647 for x in replaced_fns: 3648 for replaced in repl: 3649 if replaced[0] == x: 3650 break 3651 else: 3652 msg = "Replacement for %s was not found. Had %s" % (x, repl) 3653 raise AssertionError(msg) 3654 3655 if hoisted_allocations is not None: 3656 hoisted_allocs = diagnostics.hoisted_allocations() 3657 self.assertEqual(hoisted_allocations, len(hoisted_allocs)) 3658 3659 # just make sure that the dump() function doesn't have an issue! 3660 with captured_stdout(): 3661 for x in range(1, 5): 3662 diagnostics.dump(x) 3663 3664 def test_array_expr(self): 3665 def test_impl(): 3666 n = 10 3667 a = np.ones(n) 3668 b = np.zeros(n) 3669 return a + b 3670 3671 self.check(test_impl,) 3672 cpfunc = self.compile_parallel(test_impl, ()) 3673 diagnostics = cpfunc.metadata['parfor_diagnostics'] 3674 self.assert_diagnostics(diagnostics, parfors_count=1, 3675 fusion_info = {3: [4, 5]}) 3676 3677 def test_prange(self): 3678 def test_impl(): 3679 n = 10 3680 a = np.empty(n) 3681 for i in prange(n): 3682 a[i] = i * 10 3683 return a 3684 3685 self.check(test_impl,) 3686 cpfunc = self.compile_parallel(test_impl, ()) 3687 diagnostics = cpfunc.metadata['parfor_diagnostics'] 3688 self.assert_diagnostics(diagnostics, parfors_count=1) 3689 3690 def test_nested_prange(self): 3691 def test_impl(): 3692 n = 10 3693 a = np.empty((n, n)) 3694 for i in prange(n): 3695 for j in prange(n): 3696 a[i, j] = i * 10 + j 3697 return a 3698 3699 self.check(test_impl,) 3700 cpfunc = self.compile_parallel(test_impl, ()) 3701 diagnostics = cpfunc.metadata['parfor_diagnostics'] 3702 self.assert_diagnostics(diagnostics, parfors_count=2, 3703 nested_fusion_info={2: [1]}) 3704 3705 def test_function_replacement(self): 3706 def test_impl(): 3707 n = 10 3708 a = np.ones(n) 3709 b = np.argmin(a) 3710 return b 3711 3712 self.check(test_impl,) 3713 cpfunc = self.compile_parallel(test_impl, ()) 3714 diagnostics = cpfunc.metadata['parfor_diagnostics'] 3715 self.assert_diagnostics(diagnostics, parfors_count=1, 3716 fusion_info={2: [3]}, 3717 replaced_fns = [('argmin', 'numpy'),]) 3718 3719 def test_reduction(self): 3720 def test_impl(): 3721 n = 10 3722 a = np.ones(n + 1) # prevent fusion 3723 acc = 0 3724 for i in prange(n): 3725 acc += a[i] 3726 return acc 3727 3728 self.check(test_impl,) 3729 cpfunc = self.compile_parallel(test_impl, ()) 3730 diagnostics = cpfunc.metadata['parfor_diagnostics'] 3731 self.assert_diagnostics(diagnostics, parfors_count=2) 3732 3733 def test_setitem(self): 3734 def test_impl(): 3735 n = 10 3736 a = np.ones(n) 3737 a[:] = 7 3738 return a 3739 3740 self.check(test_impl,) 3741 cpfunc = self.compile_parallel(test_impl, ()) 3742 diagnostics = cpfunc.metadata['parfor_diagnostics'] 3743 self.assert_diagnostics(diagnostics, parfors_count=1) 3744 3745 def test_allocation_hoisting(self): 3746 def test_impl(): 3747 n = 10 3748 m = 5 3749 acc = 0 3750 for i in prange(n): 3751 temp = np.zeros((m,)) # the np.empty call should get hoisted 3752 for j in range(m): 3753 temp[j] = i 3754 acc += temp[-1] 3755 return acc 3756 3757 self.check(test_impl,) 3758 cpfunc = self.compile_parallel(test_impl, ()) 3759 diagnostics = cpfunc.metadata['parfor_diagnostics'] 3760 self.assert_diagnostics(diagnostics, hoisted_allocations=1) 3761 3762 3763if __name__ == "__main__": 3764 unittest.main() 3765