1""" 2Defines Linkers that deal with C implementations. 3 4""" 5 6from __future__ import absolute_import, print_function, division 7 8# Python imports 9from copy import copy 10import os 11import sys 12import logging 13 14import numpy as np 15 16import theano 17from theano import config 18from theano.compat import PY3 19from theano.compat import izip 20from six import string_types, reraise 21from six.moves import StringIO, xrange 22 23# gof imports 24from theano.gof import graph 25from theano.gof import link 26from theano.gof import utils 27from theano.gof import cmodule 28from theano.gof.compilelock import get_lock, release_lock 29from theano.gof.callcache import CallCache 30 31 32_logger = logging.getLogger("theano.gof.cc") 33 34 35run_cthunk = None # Will be imported only when needed. 36 37 38def get_module_cache(init_args=None): 39 """ 40 41 Parameters 42 ---------- 43 init_args 44 If not None, the (k, v) pairs in this dictionary will be forwarded to 45 the ModuleCache constructor as keyword arguments. 46 47 """ 48 return cmodule.get_module_cache(config.compiledir, init_args=init_args) 49 50 51_persistent_module_cache = None 52 53 54def get_persistent_module_cache(): 55 global _persistent_module_cache 56 if _persistent_module_cache is None: 57 _persistent_module_cache = CallCache(os.path.join(config.compiledir, 58 'persistent_cache')) 59 return _persistent_module_cache 60 61 62class CodeBlock: 63 """ 64 Represents a computation unit composed of declare, behavior, and cleanup. 65 66 The constructor initializes a L{CodeBlock} with templatized declare, 67 behavior and cleanup. The sub parameter will be used in the other 68 arguments' templates. sub should contain a key called 'id' that maps to an 69 identifier for this block. The identifier will be used to determine the 70 failure code and a label to jump to. It should also contain a key called 71 'failure_var' that contains the name of the variable that contains the error 72 code. 73 74 Parameters 75 ---------- 76 declare 77 C code that declares variables for use by the computation. 78 behavior 79 C code that performs the computation. 80 cleanup 81 C code that cleans up things allocated or incref-ed in behavior. 82 83 """ 84 85 def __init__(self, declare, behavior, cleanup, sub): 86 self.declare = declare 87 self.behavior = behavior 88 # the dummy is because gcc throws an error when a label's 89 # right next to a closing brace (maybe there's an ignore flag 90 # for that...) 91 # we need the label even if cleanup is empty because the 92 # behavior block jumps there on failure 93 self.cleanup = ("__label_%(id)i:\n" % sub + cleanup + 94 "\ndouble __DUMMY_%(id)i;\n" % sub) # % sub 95 96 97def failure_code(sub, use_goto=True): 98 """ 99 Code contained in sub['fail'], usually substituted for %(fail)s. 100 101 It sets information about current error, then goto the code 102 actually handling the failure, which is defined in struct_gen(). 103 104 Parameters 105 ---------- 106 sub: dict 107 Contains other code snippets that can be substituted, 108 in particular 'failure_var' and 'id'. 109 use_goto: bool, True by default 110 Include a "goto" statement to the failure label. 111 Passing False is sometimes required, in which cases we have to 112 be careful to avoid executing incorrect code. 113 114 """ 115 if use_goto: 116 goto_statement = 'goto __label_%(id)i;' % sub 117 else: 118 goto_statement = '' 119 return '''{ 120 %(failure_var)s = %(id)i; 121 if (!PyErr_Occurred()) { 122 PyErr_SetString(PyExc_RuntimeError, 123 "Unexpected error in an Op's C code. " 124 "No Python exception was set."); 125 } 126 %(goto_statement)s}''' % dict(sub, goto_statement=goto_statement) 127 128 129def failure_code_init(sub): 130 """ 131 Code for failure in the struct init. 132 133 Parameters: 134 ---------- 135 sub 136 Dictionary used to template the struct. 137 * failure_var -> must contain a variable name to use for 138 the failure code. 139 """ 140 return '''{ 141 if (!PyErr_Occurred()) { 142 PyErr_SetString(PyExc_RuntimeError, 143 "Unexpected error in an Op's C code. " 144 "No Python exception was set."); 145 } 146 return %(id)d; 147}''' % sub 148 149 150def code_gen(blocks): 151 """ 152 From a list of L{CodeBlock} instances, returns a string 153 that executes them all in sequence. 154 155 Eg for C{(decl1, task1, 156 cleanup1)} and C{(decl2, task2, cleanup2)} the returned string 157 will be of the form: 158 159 decl1 160 decl2 161 { 162 task1 163 { 164 task2 165 cleanup2 166 } 167 cleanup1 168 } 169 170 Parameters: 171 ---------- 172 blocks 173 List of CodeBlock instances such that 174 * declarations, behavior and cleanup are in the run() 175 method of the struct 176 """ 177 decl = "" 178 head = "" 179 tail = "" 180 for block in blocks: 181 decl += block.declare 182 head = head + ("\n{\n%s" % block.behavior) 183 tail = ("%s\n}\n" % block.cleanup) + tail 184 return decl + head + tail 185 186 187def struct_gen(args, struct_builders, blocks, sub): 188 """ 189 Generates a struct conforming to the following specifications: 190 191 Parameters 192 ---------- 193 args 194 All of the PyObject* type, stored in the struct 195 they represent the storage and must be length 1 python lists. 196 struct_builders 197 List of L{CodeBlock} instances such that 198 * declarations are in the struct 199 * behavior is in the constructor 200 * cleanup is in the destructor 201 blocks 202 List of CodeBlock instances such that 203 * declarations, behavior and cleanup are in the run() 204 method of the struct 205 sub 206 Dictionary used to template the struct. 207 * failure_var -> must contain a variable name to use for 208 the failure code. 209 210 Returns 211 ------- 212 object 213 In a nutshell, this returns code for a struct that represents 214 a function with state. The state's initialization and destruction 215 are handled by struct_builders and the actual behavior of the 216 function is handled by blocks. 217 218 """ 219 struct_decl = "" 220 struct_init_head = "" 221 struct_init_tail = "" 222 struct_cleanup = "" 223 224 for block in struct_builders: 225 # decl are declarations that go in the struct 226 # init_head are in the constructor 227 # init_tail and cleanup do the same thing, but the former will 228 # be executed if any step in the constructor fails and the 229 # latter only at destruction time. 230 struct_decl += block.declare 231 struct_init_head = struct_init_head + ("\n%s" % block.behavior) 232 struct_cleanup += block.cleanup 233 234 behavior = code_gen(blocks) 235 236 # declares the storage 237 storage_decl = "\n".join(["PyObject* %s;" % arg for arg in args]) 238 # in the constructor, sets the storage to the arguments 239 storage_set = "\n".join(["this->%s = %s;" % (arg, arg) for arg in args]) 240 # increments the storage's refcount in the constructor 241 storage_incref = "\n".join(["Py_XINCREF(%s);" % arg for arg in args]) 242 # decrements the storage's refcount in the destructor 243 storage_decref = "\n".join(["Py_XDECREF(this->%s);" % arg for arg in args]) 244 245 args_names = ", ".join(args) 246 args_decl = ", ".join(["PyObject* %s" % arg for arg in args]) 247 248 # The following code stores the exception data in __ERROR, which 249 # is a special field of the struct. __ERROR is a list of length 3 250 # that holds the type, the value and the traceback. After storing 251 # the error, we return the failure code so we know which code 252 # block failed. 253 do_return = """ 254 if (%(failure_var)s) { 255 // When there is a failure, this code puts the exception 256 // in __ERROR. 257 PyObject* err_type = NULL; 258 PyObject* err_msg = NULL; 259 PyObject* err_traceback = NULL; 260 PyErr_Fetch(&err_type, &err_msg, &err_traceback); 261 if (!err_type) {err_type = Py_None;Py_INCREF(Py_None);} 262 if (!err_msg) {err_msg = Py_None; Py_INCREF(Py_None);} 263 if (!err_traceback) {err_traceback = Py_None; Py_INCREF(Py_None);} 264 PyObject* old_err_type = PyList_GET_ITEM(__ERROR, 0); 265 PyObject* old_err_msg = PyList_GET_ITEM(__ERROR, 1); 266 PyObject* old_err_traceback = PyList_GET_ITEM(__ERROR, 2); 267 PyList_SET_ITEM(__ERROR, 0, err_type); 268 PyList_SET_ITEM(__ERROR, 1, err_msg); 269 PyList_SET_ITEM(__ERROR, 2, err_traceback); 270 {Py_XDECREF(old_err_type);} 271 {Py_XDECREF(old_err_msg);} 272 {Py_XDECREF(old_err_traceback);} 273 } 274 // The failure code is returned to index what code block failed. 275 return %(failure_var)s; 276 """ % sub 277 278 sub = dict(sub) 279 sub.update(locals()) 280 281 # TODO: add some error checking to make sure storage_<x> are 282 # 1-element lists and __ERROR is a 3-elements list. 283 284 struct_code = """ 285 namespace { 286 struct %(name)s { 287 PyObject* __ERROR; 288 289 %(storage_decl)s 290 %(struct_decl)s 291 292 %(name)s() { 293 // This is only somewhat safe because we: 294 // 1) Are not a virtual class 295 // 2) Do not use any virtual classes in the members 296 // 3) Deal with mostly POD and pointers 297 298 // If this changes, we would have to revise this, but for 299 // now I am tired of chasing segfaults because 300 // initialization code had an error and some pointer has 301 // a junk value. 302 #ifndef THEANO_DONT_MEMSET_STRUCT 303 memset(this, 0, sizeof(*this)); 304 #endif 305 } 306 ~%(name)s(void) { 307 cleanup(); 308 } 309 310 int init(PyObject* __ERROR, %(args_decl)s) { 311 %(storage_incref)s 312 %(storage_set)s 313 %(struct_init_head)s 314 this->__ERROR = __ERROR; 315 return 0; 316 } 317 void cleanup(void) { 318 %(struct_cleanup)s 319 %(storage_decref)s 320 } 321 int run(void) { 322 int %(failure_var)s = 0; 323 %(behavior)s 324 %(do_return)s 325 } 326 }; 327 } 328 """ % sub 329 330 return struct_code 331 332 333# The get_<x> functions complete the return value of r.get_<x>() 334# with handling of the py_<name> variable. 335 336def get_nothing(r, name, sub): 337 """ 338 WRITEME 339 340 """ 341 return "" 342 343 344def get_c_declare(r, name, sub): 345 """ 346 Wrapper around c_declare that declares py_name. 347 348 """ 349 # The declaration will be used by the Apply node that 350 # is computing it (`r.owner`), and by each of the clients. 351 # If some of these have `check_input=True` in their `.op`, 352 # it means they need `r`'s dtype to be declared, so 353 # we have to pass `check_input=True` to `c_declare`. 354 if ((any([getattr(c.op, 'check_input', config.check_input) 355 for (c, _) in r.clients 356 if not isinstance(c, string_types)]) or 357 (r.owner and 358 getattr(r.owner.op, 'check_input', config.check_input)))): 359 c_declare = r.type.c_declare(name, sub, True) 360 else: 361 c_declare = r.type.c_declare(name, sub, False) 362 pre = """ 363 PyObject* py_%(name)s; 364 """ % locals() 365 return pre + c_declare 366 367 368def get_c_init(r, name, sub): 369 """ 370 Wrapper around c_init that initializes py_name to Py_None. 371 372 """ 373 pre = "" """ 374 py_%(name)s = Py_None; 375 {Py_XINCREF(py_%(name)s);} 376 """ % locals() 377 return pre + r.type.c_init(name, sub) 378 379 380def get_c_extract(r, name, sub): 381 """ 382 Wrapper around c_extract that initializes py_name from storage. 383 384 """ 385 # `c_extract` is called when getting the value of an apply node's 386 # input from the compute map, before being used by its clients. 387 # If one of the clients has `check_input=True`, we need to perform 388 # checks on the variable. 389 # However that code is not used by C code of the apply node creating 390 # this variable, so there is no need to check `r.owner.op.check_input`. 391 if any([getattr(c.op, 'check_input', config.check_input) 392 for (c, _) in r.clients 393 if not isinstance(c, string_types)]): 394 # check_broadcast is just an hack to easily remove just the 395 # broadcast check on the old GPU back-end. This check isn't 396 # done in the new GPU back-end or on the CPU. 397 if any([getattr(c.op, 'check_broadcast', True) 398 for (c, _) in r.clients 399 if not isinstance(c, string_types)]): 400 c_extract = r.type.c_extract(name, sub, True) 401 else: 402 try: 403 c_extract = r.type.c_extract( 404 name, sub, True, 405 check_broadcast=False) 406 except TypeError as e: 407 c_extract = r.type.c_extract(name, sub, True) 408 else: 409 c_extract = r.type.c_extract(name, sub, False) 410 411 pre = """ 412 py_%(name)s = PyList_GET_ITEM(storage_%(name)s, 0); 413 {Py_XINCREF(py_%(name)s);} 414 """ % locals() 415 return pre + c_extract 416 417 418def get_c_extract_out(r, name, sub): 419 """ 420 Wrapper around c_extract_out that initializes py_name from storage. 421 422 """ 423 # `c_extract_out` is used to extract an output variable from 424 # the compute map, to be used as pre-allocated memory for `r` 425 # before its value gets computed. 426 # If the node producing `r` has `check_input=True`, it may 427 # also perform type checks on the initial value of the output, 428 # so we need to pass `check_input=True` to `c_extract_out`. 429 # However, that code is not used by potential clients of `r`, 430 # so we do not need to check them. 431 check_input = getattr(r.owner.op, 'check_input', config.check_input) 432 # check_broadcast is just an hack to easily remove just the 433 # broadcast check on the old GPU back-end. This check isn't 434 # done in the new GPU back-end or on the CPU. 435 if getattr(r.owner.op, 'check_broadcast', True): 436 c_extract = r.type.c_extract_out(name, sub, check_input) 437 else: 438 try: 439 c_extract = r.type.c_extract_out(name, sub, check_input, 440 check_broadcast=False) 441 except TypeError as e: 442 c_extract = r.type.c_extract_out(name, sub, check_input) 443 444 pre = """ 445 py_%(name)s = PyList_GET_ITEM(storage_%(name)s, 0); 446 {Py_XINCREF(py_%(name)s);} 447 """ % locals() 448 return pre + c_extract 449 450 451def get_c_cleanup(r, name, sub): 452 """ 453 Wrapper around c_cleanup that decrefs py_name. 454 455 """ 456 post = """ 457 {Py_XDECREF(py_%(name)s);} 458 """ % locals() 459 return r.type.c_cleanup(name, sub) + post 460 461 462def get_c_sync(r, name, sub): 463 """ 464 Wrapper around c_sync that syncs py_name with storage. 465 466 """ 467 return """ 468 if (!%(failure_var)s) { 469 %(sync)s 470 PyObject* old = PyList_GET_ITEM(storage_%(name)s, 0); 471 {Py_XINCREF(py_%(name)s);} 472 PyList_SET_ITEM(storage_%(name)s, 0, py_%(name)s); 473 {Py_XDECREF(old);} 474 } 475 """ % dict(sync=r.type.c_sync(name, sub), name=name, **sub) 476 477 478def apply_policy(policy, r, name, sub): 479 """ 480 Apply the list of policies to name.r,sub 481 482 Parameters 483 ---------- 484 policy 485 List of functions that map a L{Variable} to a string, 486 or a single such function. 487 r: L{Variable} 488 489 Returns 490 ------- 491 object 492 C{policy[0](r) + policy[1](r) + ...}. 493 494 """ 495 if isinstance(policy, (list, tuple)): 496 ret = "" 497 for sub_policy in policy: 498 ret += sub_policy(r, name, sub) 499 return ret 500 return policy(r, name, sub) 501 502 503def struct_variable_codeblocks(variable, policies, id, symbol_table, sub): 504 """ 505 Update "sub" dict and create two codeblocks with different failure modes 506 507 Parameters 508 ---------- 509 variable : a Variable 510 policies : a pair of tuples 511 (declare_policy, behavior_policy, cleanup_policy) -- at construction. 512 (declare_policy, behavior_policy, cleanup_policy)) -- at execution. 513 The first list will produce an element of the 'struct_builders' argument 514 in struct_gen. The second list will produce an element of the 'blocks' 515 argument in struct_gen. 516 id 517 The id assigned to this variable's task in the computation. 518 symbol_table 519 A dict that maps variables to variable names. It is not read by this 520 function but a variable name for the variable is computed and added to 521 the table. 522 sub 523 Dictionary for use by L{CodeBlock}. 524 525 """ 526 527 name = "V%i" % id 528 if variable not in symbol_table: 529 symbol_table[variable] = name 530 sub = dict(sub) 531# sub['name'] = name 532 sub['id'] = id 533 sub['fail'] = failure_code_init(sub) 534 sub['py_ptr'] = "py_%s" % name 535 sub['stor_ptr'] = "storage_%s" % name 536 # struct_declare, struct_behavior, struct_cleanup, sub) 537 struct_builder = CodeBlock(*[apply_policy(policy, variable, name, sub) 538 for policy in policies[0]] + [sub]) 539 sub['id'] = id + 1 540 sub['fail'] = failure_code(sub) 541 sub['py_ptr'] = "py_%s" % name 542 sub['stor_ptr'] = "storage_%s" % name 543 # run_declare, run_behavior, run_cleanup, sub) 544 block = CodeBlock(*[apply_policy(policy, variable, name, sub) 545 for policy in policies[1]] + [sub]) 546 547 return struct_builder, block 548 549 550class CLinker(link.Linker): 551 """ 552 Creates C code for an fgraph, compiles it and returns callables 553 through make_thunk and make_function that make use of the compiled 554 code. 555 556 no_recycling can contain a list of Variables that belong to the fgraph. 557 If a Variable is in no_recycling, CLinker will clear the output storage 558 associated to it during the computation (to avoid reusing it). 559 560 """ 561 562 def __init__(self, schedule=None): 563 self.fgraph = None 564 if schedule: 565 self.schedule = schedule 566 567 def accept(self, fgraph, no_recycling=None, profile=None): 568 """ 569 Associate linker with fgraph 570 571 """ 572 if no_recycling is None: 573 no_recycling = [] 574 if self.fgraph is not None and self.fgraph is not fgraph: 575 # A linker can be tied to only one FunctionGraph. 576 return type(self)(self.schedule).accept( 577 fgraph, no_recycling, profile) 578 self.fgraph = fgraph 579 self.fetch_variables() 580 self.no_recycling = no_recycling 581 return self 582 583 def fetch_variables(self): 584 """ 585 Fills the inputs, outputs, variables, orphans, temps and node_order 586 fields. 587 588 """ 589 fgraph = self.fgraph 590 self.inputs = fgraph.inputs 591 self.outputs = fgraph.outputs 592 593 self.node_order = self.schedule(fgraph) 594 595 # list(fgraph.variables) 596 # We need to include the unused inputs in our variables, 597 # otherwise we can't pass them to the module. 598 self.variables = [var for var in self.inputs if not len(var.clients)] 599 self.variables += graph.variables(self.inputs, self.outputs) 600 601 # This adds a hidden input which is the params for each node 602 # that needs it 603 self.node_params = dict() 604 for node in self.node_order: 605 params = node.run_params() 606 if params is not graph.NoParams: 607 # try to avoid creating more than one variable for the 608 # same params. 609 if params in self.node_params: 610 var = self.node_params[params] 611 assert var.type == node.params_type 612 var.clients.append((node, 'params')) 613 else: 614 var = graph.Constant(node.params_type, params) 615 var.clients = [(node, 'params')] 616 self.node_params[params] = var 617 self.variables.append(var) 618 619 # The orphans field is listified to ensure a consistent order. 620 # list(fgraph.orphans.difference(self.outputs)) 621 self.orphans = list(r for r in self.variables 622 if isinstance(r, graph.Constant) and 623 r not in self.inputs) 624 # C type constants (theano.scalar.Scalar). They don't request an object 625 self.consts = [] 626 # Move c type from orphans (theano.scalar.Scalar) to self.consts 627 for variable in self.orphans: 628 if isinstance(variable, graph.Constant): 629 try: 630 variable.type.c_literal(variable.data) 631 self.consts.append(variable) 632 self.orphans.remove(variable) 633 except (utils.MethodNotDefined, NotImplementedError): 634 pass 635 636 self.temps = list(set(self.variables).difference( 637 self.inputs).difference(self.outputs).difference(self.orphans)) 638 639 def code_gen(self): 640 """ 641 Generates code for a struct that does the computation of the fgraph and 642 stores it in the struct_code field of the instance. 643 644 If reuse_storage is True, outputs and temporaries will be stored in 645 the struct so they can be reused each time a function returned by 646 make_function is called, which means that the output of a call will 647 be invalidated by the next. If reuse_storage is False, that problem 648 is avoided. 649 650 This method caches its computations. 651 652 """ 653 654 if getattr(self, 'struct_code', False): 655 return self.struct_code 656 657 no_recycling = self.no_recycling 658 659 c_support_code_apply = [] 660 c_init_code_apply = [] 661 662 symbol = {} 663 664 # (init_)tasks contains a list of pairs (Op/Variable, task_name) 665 # e.g. (x, 'get') or (x+y, 'code') 666 init_tasks = [] 667 tasks = [] 668 669 # (init_)blocks contain CodeBlock instances. There is a direct 670 # correspondance with (init_)tasks. 671 init_blocks = [] 672 blocks = [] 673 674 failure_var = "__failure" 675 id = 1 676 677 for variable in self.variables: 678 sub = dict(failure_var=failure_var) 679 680 # it might be possible to inline constant variables as C literals 681 # policy = [[what to declare in the struct, 682 # what to do at construction, 683 # what to do at destruction], 684 # [what to declare in each run, 685 # what to do at the beginning of each run, 686 # what to do at the end of each run]] 687 if variable in self.consts: 688 symbol[variable] = ("(" + variable.type.c_literal( 689 variable.data) + ")") 690 continue 691 elif variable in self.inputs: 692 # We need to extract the new inputs at each run 693 # they do not need to be relayed to Python, so we don't sync. 694 # If the variable is both an input and an output, there is 695 # no need to synchronize either, it is already up-to-date. 696 policy = [[get_nothing, get_nothing, get_nothing], 697 [get_c_declare, get_c_extract, get_c_cleanup]] 698 elif variable in self.orphans: 699 if not isinstance(variable, graph.Constant): 700 raise TypeError("All orphans to CLinker must be Constant" 701 " instances.", variable) 702 # orphans are not inputs so we'll just get fetch them 703 # when we initialize the struct and assume they stay 704 # the same 705 policy = [[get_c_declare, get_c_extract, get_c_cleanup], 706 [get_nothing, get_nothing, get_nothing]] 707 elif variable in self.temps: 708 # temps don't need to be extracted from Python, so we 709 # call c_init rather than c_extract they do not need 710 # to be relayed to Python, so we don't sync 711 if variable.type.c_is_simple() or variable in no_recycling: 712 policy = [[get_nothing, get_nothing, get_nothing], 713 [get_c_declare, get_c_init, get_c_cleanup]] 714 else: 715 # it is useful for complex temps to reuse storage 716 # at each run, so we only clean up in the 717 # destructor 718 policy = [[get_c_declare, get_c_init, get_c_cleanup], 719 [get_nothing, get_nothing, get_nothing]] 720 elif variable in self.outputs: 721 if variable.type.c_is_simple() or variable in no_recycling: 722 # Do not extract output from Python 723 policy = [[get_nothing, get_nothing, get_nothing], 724 [get_c_declare, get_c_init, 725 (get_c_sync, get_c_cleanup)]] 726 else: 727 # We try to use the output that is pre-allocated. 728 # The linker will usually just reuse the storage 729 # from last run, but in the first execution, 730 # it will be None. 731 # We clean-up at each run to enable garbage collection 732 # in the Linker. 733 policy = [[get_nothing, get_nothing, get_nothing], 734 [get_c_declare, get_c_extract_out, 735 (get_c_sync, get_c_cleanup)]] 736 else: 737 raise Exception("this shouldn't be possible, please report this exception") 738 739 builder, block = struct_variable_codeblocks(variable, policy, 740 id, symbol, sub) 741 742 # each Variable generates two CodeBlocks, one to 743 # declare/initialize/destroy struct variables and the 744 # other to declare/extract/cleanup each time the function 745 # is run. 746 # Typically, only one of the two actually does anything 747 # (see all the possible combinations above) 748 749 init_tasks.append((variable, 'init', id)) 750 init_blocks.append(builder) 751 752 tasks.append((variable, 'get', id + 1)) 753 blocks.append(block) 754 755 id += 2 756 757 for node_num, node in enumerate(self.node_order): 758 759 sub = dict(failure_var=failure_var) 760 761 params = node.run_params() 762 if params is not graph.NoParams: 763 params_var = symbol[self.node_params[params]] 764 765 # The placeholder will be replaced by a hash of the entire 766 # code (module + support code) in DynamicModule.code. 767 # This ensures that, when defining functions in support code, 768 # we cannot have two different functions, in different modules, 769 # that have the same name. 770 name = "node_<<<<HASH_PLACEHOLDER>>>>_%i" % node_num 771 isyms = [symbol[r] for r in node.inputs] 772 osyms = [symbol[r] for r in node.outputs] 773 774 # Make the CodeBlock for c_code 775 sub['id'] = id 776 sub['fail'] = failure_code(sub) 777 if params is not graph.NoParams: 778 sub['params'] = params_var 779 780 sub_struct = dict() 781 sub_struct['id'] = id + 1 782 sub_struct['fail'] = failure_code_init(sub) 783 if params is not graph.NoParams: 784 # Since params inputs are always constants they are 785 # guaranteed to be available in the struct init code. 786 sub_struct['params'] = params_var 787 788 struct_support = "" 789 struct_init = "" 790 struct_cleanup = "" 791 792 op = node.op 793 # type-specific support code 794 try: 795 c_support_code_apply.append(op.c_support_code_apply(node, 796 name)) 797 except utils.MethodNotDefined: 798 pass 799 else: 800 # The following will be executed if the "try" block succeeds 801 assert isinstance(c_support_code_apply[-1], string_types), ( 802 str(node.op) + 803 " didn't return a string for c_support_code_apply") 804 805 try: 806 c_init_code_apply.append(op.c_init_code_apply(node, name)) 807 except utils.MethodNotDefined: 808 pass 809 else: 810 assert isinstance(c_init_code_apply[-1], string_types), ( 811 str(node.op) + 812 " didn't return a string for c_init_code_apply") 813 814 try: 815 struct_init = op.c_init_code_struct(node, name, sub_struct) 816 assert isinstance(struct_init, string_types), ( 817 str(node.op) + 818 " didn't return a string for c_init_code_struct") 819 except utils.MethodNotDefined: 820 pass 821 822 try: 823 struct_support = op.c_support_code_struct(node, name) 824 assert isinstance(struct_support, string_types), ( 825 str(node.op) + 826 " didn't return a string for c_support_code_struct") 827 except utils.MethodNotDefined: 828 pass 829 830 try: 831 struct_cleanup = op.c_cleanup_code_struct(node, name) 832 assert isinstance(struct_cleanup, string_types), ( 833 str(node.op) + 834 " didn't return a string for c_cleanup_code_struct") 835 except utils.MethodNotDefined: 836 pass 837 838 # emit c_code 839 try: 840 behavior = op.c_code(node, name, isyms, osyms, sub) 841 except utils.MethodNotDefined: 842 raise NotImplementedError("%s cannot produce C code" % op) 843 assert isinstance(behavior, string_types), ( 844 str(node.op) + " didn't return a string for c_code") 845 # To help understand what is following. It help read the c code. 846 # This prevent different op that generate the same c code 847 # to be merged, I suppose this won't happen... 848 behavior = ("// Op class " + node.op.__class__.__name__ + "\n" + 849 behavior) 850 851 try: 852 cleanup = op.c_code_cleanup(node, name, isyms, osyms, sub) 853 except utils.MethodNotDefined: 854 cleanup = "" 855 856 _logger.info('compiling un-versioned Apply %s', str(node)) 857 858 blocks.append(CodeBlock("", behavior, cleanup, sub)) 859 tasks.append((node, 'code', id)) 860 id += 1 861 862 init_blocks.append(CodeBlock(struct_support, struct_init, 863 struct_cleanup, {'id': id})) 864 init_tasks.append((node, 'init', id)) 865 id += 1 866 867 # List of arg names for use in struct_gen. Note the call to 868 # uniq: duplicate inputs must only be passed once because they 869 # are mapped to the same name. Duplicates are defined by (a 870 # is b), rather than (a==b) since Constant instances can 871 # compare equal to equivalent Constant instances. 872 args = [] 873 args += ["storage_%s" % symbol[variable] for variable 874 in utils.uniq(self.inputs + self.outputs + self.orphans)] 875 876 # <<<<HASH_PLACEHOLDER>>>> will be replaced by a hash of the whole 877 # code in the file, including support code, in DynamicModule.code. 878 struct_name = '__struct_compiled_op_%s' % '<<<<HASH_PLACEHOLDER>>>>' 879 struct_code = struct_gen(args, init_blocks, blocks, 880 dict(failure_var=failure_var, 881 name=struct_name)) 882 883 self.struct_code = struct_code 884 self.struct_name = struct_name 885 self.args = args 886 self.r2symbol = symbol 887 self.init_blocks = init_blocks 888 self.init_tasks = init_tasks 889 self.blocks = blocks 890 self.tasks = tasks 891 all_info = self.inputs + self.outputs + self.orphans 892 self.c_support_code_apply = c_support_code_apply 893 self.c_init_code_apply = c_init_code_apply 894 895 if (self.init_tasks, self.tasks) != self.get_init_tasks(): 896 print("init_tasks\n", self.init_tasks, file=sys.stderr) 897 print(self.get_init_tasks()[0], file=sys.stderr) 898 print("tasks\n", self.tasks, file=sys.stderr) 899 print(self.get_init_tasks()[1], file=sys.stderr) 900 assert (self.init_tasks, self.tasks) == self.get_init_tasks() 901 902 # List of indices that should be ignored when passing the arguments 903 # (basically, everything that the previous call to uniq eliminated) 904 self.dupidx = [i for i, x in enumerate(all_info) 905 if all_info.count(x) > 1 and all_info.index(x) != i] 906 return self.struct_code 907 908 def support_code(self): 909 """ 910 Returns a list of support code strings that are needed by 911 one or more Variables or Ops. 912 The support code from Variables is added before the support code from Ops.This might contain duplicates. 913 """ 914 ret = [] 915 if config.cmodule.debug: 916 ret.append(""" 917 #ifndef DEBUG 918 #define DEBUG 919 #endif 920 """) 921 # generic support code 922 for x in [y.type for y in self.variables] + [ 923 y.op for y in self.node_order]: 924 try: 925 support_code = x.c_support_code() 926 if isinstance(support_code, list): 927 ret.extend(support_code) 928 else: 929 ret.append(support_code) 930 except utils.MethodNotDefined: 931 pass 932 return ret 933 934 def compile_args(self): 935 """ 936 Returns a list of compile args that are needed by one 937 or more Variables or Ops. 938 939 This might contain duplicates. 940 941 """ 942 ret = ["-O3"] 943# this is the param the -ffast-math activate. I put the explicitly as 944# FillMissing must disable some of them. Putting -ffast-math would 945# make it disable all other parameter at the same time. 946 ret += ["-fno-math-errno", 947 # "-funsafe-math-optimizations", 948 # "-fno-signaling-nans", 949 # "-fcx-limited-range", 950 # "-fno-rounding-math", 951 # "-ffinite-math-only", 952 953 # the current code generate label event if they are not used. 954 # Could use gcc attribute for those label only 955 "-Wno-unused-label", 956 "-Wno-unused-variable", # idem as the precedent 957 "-Wno-write-strings", # generated by our code generator... 958 ] 959 960 c_compiler = self.c_compiler() 961 962 for x in [y.type for y in self.variables] + [ 963 y.op for y in self.node_order]: 964 try: 965 try: 966 ret += x.c_compile_args(c_compiler) 967 except TypeError: 968 ret += x.c_compile_args() 969 except utils.MethodNotDefined: 970 pass 971 972 ret = utils.uniq(ret) # to remove duplicate 973 # The args set by the compiler include the user flags. We do not want 974 # to reorder them 975 ret += c_compiler.compile_args() 976 for x in [y.type for y in self.variables] + [ 977 y.op for y in self.node_order]: 978 try: 979 try: 980 no_comp = x.c_no_compile_args(c_compiler) 981 except TypeError: 982 no_comp = x.c_no_compile_args() 983 for i in no_comp: 984 try: 985 ret.remove(i) 986 except ValueError: 987 pass # in case the value is not there 988 except utils.MethodNotDefined: 989 pass 990 return ret 991 992 def headers(self): 993 """ 994 Returns a list of headers that are needed by one 995 or more Types or Ops. 996 997 The return value will not contain duplicates. 998 999 """ 1000 ret = [] 1001 c_compiler = self.c_compiler() 1002 for x in [y.type for y in self.variables] + [ 1003 y.op for y in self.node_order]: 1004 try: 1005 try: 1006 ret += x.c_headers(c_compiler) 1007 except TypeError: 1008 ret += x.c_headers() 1009 except utils.MethodNotDefined: 1010 pass 1011 return utils.uniq(ret) 1012 1013 def init_code(self): 1014 """ 1015 Return a list of code snippets that have to be inserted 1016 in the module initialization code. 1017 1018 The return value will not contain duplicates. 1019 1020 """ 1021 ret = [] 1022 for x in [y.type for y in self.variables] + [ 1023 y.op for y in self.node_order]: 1024 try: 1025 ret += x.c_init_code() 1026 except utils.MethodNotDefined: 1027 pass 1028 return utils.uniq(ret) 1029 1030 def c_compiler(self): 1031 c_compiler = None 1032 for x in [y.type for y in self.variables] + [ 1033 y.op for y in self.node_order]: 1034 if hasattr(x, 'c_compiler'): 1035 x_compiler = x.c_compiler() 1036 else: 1037 continue 1038 1039 if c_compiler is None: 1040 c_compiler = x_compiler 1041 else: 1042 if x_compiler and (x_compiler != c_compiler): 1043 raise Exception('Nodes have requested specific' 1044 ' different compilers', 1045 (c_compiler, x_compiler)) 1046 if (c_compiler is None): 1047 return cmodule.GCC_compiler 1048 else: 1049 return c_compiler 1050 1051 def header_dirs(self): 1052 """ 1053 Returns a list of lib directories that are needed by one 1054 or more Types or Ops. 1055 1056 The return value will not contain duplicates. 1057 1058 """ 1059 ret = [] 1060 c_compiler = self.c_compiler() 1061 for x in [y.type for y in self.variables] + [ 1062 y.op for y in self.node_order]: 1063 try: 1064 try: 1065 ret += x.c_header_dirs(c_compiler) 1066 except TypeError: 1067 ret += x.c_header_dirs() 1068 except utils.MethodNotDefined: 1069 pass 1070 # filter out empty strings/None 1071 return [r for r in utils.uniq(ret) if r] 1072 1073 def libraries(self): 1074 """ 1075 Returns a list of libraries that are needed by one 1076 or more Types or Ops. 1077 1078 The return value will not contain duplicates. 1079 1080 """ 1081 ret = [] 1082 c_compiler = self.c_compiler() 1083 for x in [y.type for y in self.variables] + [ 1084 y.op for y in self.node_order]: 1085 try: 1086 try: 1087 ret += x.c_libraries(c_compiler) 1088 except TypeError: 1089 ret += x.c_libraries() 1090 except utils.MethodNotDefined: 1091 pass 1092 return utils.uniq(ret) 1093 1094 def lib_dirs(self): 1095 """ 1096 Returns a list of lib directories that are needed by one 1097 or more Types or Ops. 1098 1099 The return value will not contain duplicates. 1100 1101 """ 1102 ret = [] 1103 c_compiler = self.c_compiler() 1104 for x in [y.type for y in self.variables] + [ 1105 y.op for y in self.node_order]: 1106 try: 1107 try: 1108 ret += x.c_lib_dirs(c_compiler) 1109 except TypeError: 1110 ret += x.c_lib_dirs() 1111 except utils.MethodNotDefined: 1112 pass 1113 # filter out empty strings/None 1114 return [r for r in utils.uniq(ret) if r] 1115 1116 def __compile__(self, input_storage=None, output_storage=None, 1117 storage_map=None, keep_lock=False): 1118 """ 1119 Compiles this linker's fgraph. 1120 1121 Parameters 1122 ---------- 1123 input_storage: list or None 1124 List of lists of length 1. In order to use the thunk returned 1125 by __compile__, the inputs must be put in that storage. 1126 If None, storage will be allocated. 1127 output_storage: list of lists of length 1 1128 The thunk returned by __compile__ will put the variables of the 1129 computation in these lists. If None, storage will be allocated. 1130 1131 Returns 1132 ------- 1133 object 1134 Thunk, input_storage, output_storage, error_storage. 1135 1136 """ 1137 error_storage = [None, None, None] 1138 if input_storage is None: 1139 input_storage = tuple([None] for variable in self.inputs) 1140 if output_storage is None: 1141 map = {} 1142 output_storage = [] 1143 # Initialize the map with the inputs, as some outputs may 1144 # be inputs as well. 1145 for i, variable in enumerate(self.inputs): 1146 map[variable] = input_storage[i] 1147 for variable in self.outputs: 1148 if variable not in map: 1149 map[variable] = [None] 1150 output_storage.append(map[variable]) 1151 input_storage = tuple(input_storage) 1152 output_storage = tuple(output_storage) 1153 thunk, module = self.cthunk_factory(error_storage, 1154 input_storage, 1155 output_storage, 1156 storage_map, 1157 keep_lock=keep_lock) 1158 return (thunk, 1159 module, 1160 [link.Container(input, storage) for input, storage in 1161 izip(self.fgraph.inputs, input_storage)], 1162 [link.Container(output, storage, True) for output, storage in 1163 izip(self.fgraph.outputs, output_storage)], 1164 error_storage) 1165 1166 def get_init_tasks(self): 1167 init_tasks = [] 1168 tasks = [] 1169 id = 1 1170 for v in self.variables: 1171 if v in self.consts: 1172 continue 1173 init_tasks.append((v, 'init', id)) 1174 tasks.append((v, 'get', id + 1)) 1175 id += 2 1176 for node in self.node_order: 1177 tasks.append((node, 'code', id)) 1178 init_tasks.append((node, 'init', id + 1)) 1179 id += 2 1180 return init_tasks, tasks 1181 1182 def make_thunk(self, input_storage=None, output_storage=None, 1183 storage_map=None, keep_lock=False): 1184 """ 1185 Compiles this linker's fgraph and returns a function to perform the 1186 computations, as well as lists of storage cells for both the inputs 1187 and outputs. 1188 1189 Parameters 1190 ---------- 1191 input_storage: list or None 1192 List of lists of length 1. In order to use 1193 the thunk returned by __compile__, the inputs must be put in 1194 that storage. If None, storage will be allocated. 1195 output_storage: list of lists of length 1. 1196 The thunk returned by __compile__ will put the variables 1197 of the computation in these lists. If None, storage will 1198 be allocated. 1199 storage_map: dict that map variables to storages. 1200 This is used when you need to customize the storage of 1201 this thunk 1202 keep_lock: 1203 If True, we won't release the lock on the compiledir 1204 at the end of this function call. 1205 Returns: thunk, input_storage, output_storage 1206 1207 The return values can be used as follows: 1208 f, istor, ostor = clinker.make_thunk() 1209 istor[0].data = first_input 1210 istor[1].data = second_input 1211 f() 1212 first_output = ostor[0].data 1213 """ 1214 init_tasks, tasks = self.get_init_tasks() 1215 cthunk, module, in_storage, out_storage, error_storage = self.__compile__( 1216 input_storage, output_storage, storage_map, 1217 keep_lock=keep_lock) 1218 1219 res = _CThunk(cthunk, init_tasks, tasks, error_storage, module) 1220 res.nodes = self.node_order 1221 return res, in_storage, out_storage 1222 1223 def cmodule_key(self): 1224 """ 1225 Return a complete hashable signature of the module we compiled. 1226 1227 This function must have the property that no two programs that 1228 compute different things yield the same key. 1229 1230 The key returned by this function is of the form (version, signature) 1231 The signature has the following form: 1232 {{{ 1233 'CLinker.cmodule_key', compilation args, libraries, 1234 header_dirs, numpy ABI version, config hash, 1235 (op0, input_signature0, output_signature0), 1236 (op1, input_signature1, output_signature1), 1237 ... 1238 (opK, input_signatureK, output_signatureK), 1239 }}} 1240 1241 Note that config hash now uses sha256, and not md5. 1242 1243 The signature is a tuple, some elements of which are sub-tuples. 1244 1245 The outer tuple has a brief header, containing the compilation options 1246 passed to the compiler, the libraries to link against, a sha256 hash 1247 of theano.config (for all config options where "in_c_key" is True). 1248 It is followed by elements for every node in the topological ordering 1249 of `self.fgraph`. 1250 1251 Input Signature 1252 --------------- 1253 1254 Each input signature is a tuple with an element for each input 1255 to the corresponding Apply node. Each element identifies the 1256 type of the node input, and the nature of that input in the 1257 graph. 1258 1259 The nature of a typical variable is encoded by integer pairs 1260 ``((a,b),c)``: 1261 ``a`` is the topological position of the input's owner 1262 (-1 for graph inputs), 1263 ``b`` is the index of the variable in the owner's output list. 1264 ``c`` is a flag indicating whether the variable is in the 1265 no_recycling set. 1266 1267 If a variable is also a graph output, then its position in the 1268 outputs list is also bundled with this tuple (after the b). 1269 1270 The nature of a Constant instance is defined as its signature, 1271 together with two integers: the topological position of the 1272 first Apply using that Constant instance, and the lowest index 1273 into that Apply's inputs that refers to that Constant. (These 1274 two integers are a surrogate for the id() of the Constant. 1275 The integers are important because merge-able constants have 1276 the same signature, but require separate containers in C 1277 code.) The membership in no_recycling is also included in the 1278 signature. 1279 1280 Output Signature 1281 ---------------- 1282 1283 The outputs of a node are entirely determined by the node's Op 1284 and the nature of the inputs, but the set of outputs that may 1285 be re-used by the computation (the elements of 1286 self.no_recycling) can affect the code that is generated. 1287 1288 The format of each Op's output signature is a (version, no_recycle) 1289 pair, where version is incremented if codegen() changes how it 1290 handles the outputs, and no_recycle is simply a list of 1291 booleans, indicating whether each output is in the 1292 no_recycling set. Older versions of compiled modules only have the 1293 no_recycle list. 1294 1295 """ 1296 return self.cmodule_key_(self.fgraph, self.no_recycling, 1297 compile_args=self.compile_args(), 1298 libraries=self.libraries(), 1299 header_dirs=self.header_dirs(), 1300 c_compiler=self.c_compiler(), 1301 ) 1302 1303 def cmodule_key_variables(self, inputs, outputs, no_recycling, 1304 compile_args=None, libraries=None, 1305 header_dirs=None, insert_config_hash=True, 1306 c_compiler=None): 1307 1308 # Assemble a dummy fgraph using the provided inputs and outputs. It is 1309 # only used to compute the cmodule key so it only need to expose an 1310 # `inputs` and an `outputs` attribute as well as a toposort() method 1311 # which returns a deterministic result. 1312 class FakeFunctionGraph(): 1313 def __init__(self, inputs, outputs): 1314 self.inputs = inputs 1315 self.outputs = outputs 1316 1317 def toposort(self): 1318 # Calling io_toposort() here is fine because the results will 1319 # only be used to compute the cmodule key which requires that 1320 # the result of the toposort be deterministic. The ordering 1321 # doesn't need to include information about inplace operations 1322 # because that information will be included explicitly in 1323 # cmodule_key_(). 1324 return graph.io_toposort(self.inputs, self.outputs) 1325 1326 fgraph = FakeFunctionGraph(inputs, outputs) 1327 return self.cmodule_key_(fgraph, no_recycling, compile_args, 1328 libraries, header_dirs, insert_config_hash, 1329 c_compiler) 1330 1331 def cmodule_key_(self, fgraph, no_recycling, compile_args=None, 1332 libraries=None, header_dirs=None, insert_config_hash=True, 1333 c_compiler=None): 1334 """ 1335 Do the actual computation of cmodule_key in a static method 1336 to allow it to be reused in scalar.Composite.__eq__. 1337 1338 """ 1339 if compile_args is None: 1340 compile_args = [] 1341 if libraries is None: 1342 libraries = [] 1343 if header_dirs is None: 1344 header_dirs = [] 1345 order = self.schedule(fgraph) 1346 # set of variables that have been computed by nodes we have 1347 # seen 'so far' in the loop below 1348 fgraph_computed_set = set() 1349 fgraph_inputs_dict = dict((i, (-1, pos)) for pos, i in 1350 enumerate(fgraph.inputs)) 1351 constant_ids = dict() 1352 op_pos = {} # Apply -> topological position 1353 1354 # First we put the header, compile_args, library names and config hash 1355 # into the signature. 1356 sig = ['CLinker.cmodule_key'] # will be cast to tuple on return 1357 if compile_args is not None: 1358 # We must sort it as the order from a set is not guaranteed. 1359 # In particular, 2 sets with the same content can give different 1360 # order depending on the order you put data in it. 1361 # Sets are used to remove duplicate elements. 1362 args = sorted(compile_args) 1363 args = tuple(args) 1364 sig.append(args) 1365 if libraries is not None: 1366 # see comments for compile_args 1367 args = sorted(libraries) 1368 args = tuple(args) 1369 sig.append(args) 1370 1371 if header_dirs is not None: 1372 args = sorted(header_dirs) 1373 args = tuple(args) 1374 sig.append(args) 1375 1376 # We must always add the numpy ABI version here as 1377 # DynamicModule always add the include <numpy/arrayobject.h> 1378 if np.lib.NumpyVersion(np.__version__) < '1.16.0a': 1379 ndarray_c_version = np.core.multiarray._get_ndarray_c_version() 1380 else: 1381 ndarray_c_version = np.core._multiarray_umath._get_ndarray_c_version() 1382 sig.append('NPY_ABI_VERSION=0x%X' % 1383 ndarray_c_version) 1384 if c_compiler: 1385 sig.append('c_compiler_str=' + c_compiler.version_str()) 1386 1387 # IMPORTANT: The 'md5' prefix is used to isolate the compilation 1388 # parameters from the rest of the key. If you want to add more key 1389 # elements, they should be before this md5 hash if and only if they 1390 # can lead to a different compiled file with the same source code. 1391 1392 # NOTE: config md5 is not using md5 hash, but sha256 instead. Function 1393 # string instances of md5 will be updated at a later release. 1394 if insert_config_hash: 1395 sig.append('md5:' + theano.configparser.get_config_hash()) 1396 else: 1397 sig.append('md5: <omitted>') 1398 1399 error_on_play = [False] 1400 1401 def in_sig(i, topological_pos, i_idx): 1402 # assert that every input to every node is one of' 1403 # - an fgraph input 1404 # - an output from a node in the FunctionGraph 1405 # - a Constant 1406 1407 # It is important that a variable (i) 1408 # yield a 'position' that reflects its role in code_gen() 1409 if isinstance(i, graph.Constant): # orphans 1410 if id(i) not in constant_ids: 1411 isig = (i.signature(), topological_pos, i_idx) 1412 # If the Theano constant provides a strong hash 1413 # (no collision for transpose, 2, 1, 0, -1, -2, 1414 # 2 element swapped...) we put this hash in the signature 1415 # instead of the value. This makes the key file much 1416 # smaller for big constant arrays. Before this, we saw key 1417 # files up to 80M. 1418 if hasattr(isig[0], "theano_hash"): 1419 isig = (isig[0].theano_hash(), topological_pos, i_idx) 1420 try: 1421 hash(isig) 1422 except Exception: 1423 # generic constants don't have a hashable signature 1424 error_on_play[0] = True 1425 return None 1426 constant_ids[id(i)] = isig 1427 else: 1428 isig = constant_ids[id(i)] 1429 # print 'SIGNATURE', i.signature() 1430 # return i.signature() 1431 elif i in fgraph_inputs_dict: # inputs 1432 isig = fgraph_inputs_dict[i] 1433 else: 1434 if i.owner is None: 1435 assert all(all(out is not None for out in o.outputs) 1436 for o in order) 1437 assert all(input.owner is None for input in fgraph.inputs) 1438 raise Exception('what is this?', (i, type(i), i.clients, 1439 fgraph)) 1440 1441 if i in fgraph.outputs: 1442 isig = (op_pos[i.owner], # outputs 1443 i.owner.outputs.index(i), 1444 fgraph.outputs.index(i)) 1445 else: 1446 isig = (op_pos[i.owner], i.owner.outputs.index(i)) # temps 1447 return (isig, i in no_recycling) 1448 1449 version = [] 1450 for node_pos, node in enumerate(order): 1451 if hasattr(node.op, 'c_code_cache_version_apply'): 1452 version.append(node.op.c_code_cache_version_apply(node)) 1453 if hasattr(node.op, '__props__'): 1454 version.append(node.op.__props__) 1455 for i in node.inputs: 1456 version.append(i.type.c_code_cache_version()) 1457 for o in node.outputs: 1458 version.append(o.type.c_code_cache_version()) 1459 1460 # add the signature for this node 1461 sig.append(( 1462 node.op, 1463 tuple((i.type, in_sig(i, node_pos, ipos)) 1464 for ipos, i in enumerate(node.inputs)), 1465 (1, # Increment if cmodule change its handling of outputs 1466 tuple(o in no_recycling for o in node.outputs)))) 1467 1468 if error_on_play[0]: 1469 # if one of the signatures is not hashable 1470 # then bypass the cache mechanism and 1471 # compile fresh every time 1472 return None 1473 1474 op_pos[node] = node_pos 1475 fgraph_computed_set.update(node.outputs) 1476 1477 # Add not used input in the key 1478 # If inputs don't define a 'clients' attribute (as is the case if 1479 # fgraph is not a real FunctionGraph but a FakeFunctionGraph, a 1480 # lightweight class designed to imitate FunctionGraph), pretend they 1481 # have none. This if fine because the goal is only to have all of the 1482 # graph's information used to compute the key. If we mistakenly 1483 # pretend that inputs with clients don't have any, were are only using 1484 # those inputs more than once to compute the key. 1485 for ipos, var in [(i, var) for i, var in enumerate(fgraph.inputs) 1486 if not len(getattr(var, 'clients', []))]: 1487 sig.append((var.type, in_sig(var, -1, ipos))) 1488 1489 # crystalize the signature and version 1490 sig = tuple(sig) 1491 version = tuple(version) 1492 for v in version: 1493 if not v: 1494 # one of the ops or types here is unversioned, 1495 # so this fgraph is entirely unversioned 1496 return ((), sig) 1497 return version, sig 1498 1499 def get_src_code(self): 1500 mod = self.get_dynamic_module() 1501 return mod.code() 1502 1503 def compile_cmodule(self, location=None): 1504 """ 1505 This compiles the source code for this linker and returns a 1506 loaded module. 1507 1508 """ 1509 if location is None: 1510 location = cmodule.dlimport_workdir(config.compiledir) 1511 mod = self.get_dynamic_module() 1512 c_compiler = self.c_compiler() 1513 libs = self.libraries() 1514 preargs = self.compile_args() 1515 # We want to compute the code without the lock 1516 src_code = mod.code() 1517 get_lock() 1518 try: 1519 _logger.debug("LOCATION %s", str(location)) 1520 module = c_compiler.compile_str( 1521 module_name=mod.code_hash, 1522 src_code=src_code, 1523 location=location, 1524 include_dirs=self.header_dirs(), 1525 lib_dirs=self.lib_dirs(), 1526 libs=libs, 1527 preargs=preargs) 1528 except Exception as e: 1529 e.args += (str(self.fgraph),) 1530 raise 1531 finally: 1532 release_lock() 1533 return module 1534 1535 def get_dynamic_module(self): 1536 """ 1537 Return a cmodule.DynamicModule instance full of the code for our fgraph. 1538 1539 This method is cached on the first call so it can be called 1540 multiple times without penalty. 1541 1542 """ 1543 if not hasattr(self, '_mod'): 1544 self.code_gen() 1545 1546 mod = cmodule.DynamicModule() 1547 1548 # The code of instantiate 1549 # the 1 is for error_storage 1550 code = self.instantiate_code(1 + len(self.args)) 1551 instantiate = cmodule.ExtFunction('instantiate', code, 1552 method=cmodule.METH_VARARGS) 1553 # ['error_storage'] + argnames, 1554 # local_dict = d, 1555 # global_dict = {}) 1556 1557 # Static methods that can run and destroy the struct built by 1558 # instantiate. 1559 if PY3: 1560 static = """ 1561 static int {struct_name}_executor({struct_name} *self) {{ 1562 return self->run(); 1563 }} 1564 1565 static void {struct_name}_destructor(PyObject *capsule) {{ 1566 {struct_name} *self = ({struct_name} *)PyCapsule_GetContext(capsule); 1567 delete self; 1568 }} 1569 """.format(struct_name=self.struct_name) 1570 else: 1571 static = """ 1572 static int %(struct_name)s_executor(%(struct_name)s* self) { 1573 return self->run(); 1574 } 1575 1576 static void %(struct_name)s_destructor(void* executor, void* self) { 1577 delete ((%(struct_name)s*)self); 1578 } 1579 """ % dict(struct_name=self.struct_name) 1580 1581 # We add all the support code, compile args, headers and libs we need. 1582 for support_code in self.support_code() + self.c_support_code_apply: 1583 mod.add_support_code(support_code) 1584 mod.add_support_code(self.struct_code) 1585 mod.add_support_code(static) 1586 mod.add_function(instantiate) 1587 for header in self.headers(): 1588 mod.add_include(header) 1589 for init_code_block in self.init_code() + self.c_init_code_apply: 1590 mod.add_init_code(init_code_block) 1591 self._mod = mod 1592 return self._mod 1593 1594 def cthunk_factory(self, error_storage, in_storage, out_storage, 1595 storage_map=None, keep_lock=False): 1596 """ 1597 Returns a thunk that points to an instance of a C struct that 1598 can carry on the computation of this linker's fgraph 1599 1600 Parameters: 1601 ---------- 1602 error_storage -> list of length 3 1603 in_storage -> list of lists of length 1, one per input 1604 out_storage -> list of lists of length 1, one per output 1605 1606 Returns a thunk that points to an instance of a C struct that 1607 can carry on the computation of this linker's fgraph. That thunk, 1608 when executed, will fetch its inputs from in_storage, put its 1609 outputs in out_storage and if an error occurs will put the 1610 type, value and traceback of the exception in error_storage. 1611 """ 1612 try: 1613 key = self.cmodule_key() 1614 except KeyError: 1615 key = None 1616 if key is None: 1617 # If we can't get a key, then forget the cache mechanism. 1618 module = self.compile_cmodule() 1619 else: 1620 # Set compute_map as None as clinker do not support lazy evaluation 1621 for node in self.node_order: 1622 node.op.prepare_node(node, storage_map, None, 'c') 1623 module = get_module_cache().module_from_key( 1624 key=key, lnk=self, keep_lock=keep_lock) 1625 1626 vars = self.inputs + self.outputs + self.orphans 1627 # List of indices that should be ignored when passing the arguments 1628 # (basically, everything that the previous call to uniq eliminated) 1629 dupidx = [i for i, x in enumerate(vars) 1630 if vars.count(x) > 1 and vars.index(x) != i] 1631 1632 out_storage = [x for i, x in enumerate(out_storage) 1633 if (i + len(in_storage)) not in dupidx] 1634 in_storage = [x for i, x in enumerate(in_storage) if i not in dupidx] 1635 if storage_map is None: 1636 orphd = [[orphan.data] for orphan in self.orphans] 1637 else: 1638 orphd = [storage_map[orphan] for orphan in self.orphans] 1639 1640 ret = module.instantiate(error_storage, 1641 *(in_storage + out_storage + orphd)) 1642 return ret, module 1643 1644 def instantiate_code(self, n_args): 1645 code = StringIO() 1646 struct_name = self.struct_name 1647 print("static PyObject * instantiate(PyObject * self, PyObject *argtuple) {", file=code) 1648 print(' assert(PyTuple_Check(argtuple));', file=code) 1649 print(' if (%(n_args)i != PyTuple_Size(argtuple)){ ' % locals(), file=code) 1650 print(' PyErr_Format(PyExc_TypeError, "Wrong number of arguments, expected %(n_args)i, got %%i", (int)PyTuple_Size(argtuple));' % locals(), file=code) 1651 print(' return NULL;', file=code) 1652 print(' }', file=code) 1653 print(' %(struct_name)s* struct_ptr = new %(struct_name)s();' % locals(), file=code) 1654 print(' if (struct_ptr->init(', ','.join('PyTuple_GET_ITEM(argtuple, %i)' % n for n in xrange(n_args)), ') != 0) {', file=code) 1655 print(' delete struct_ptr;', file=code) 1656 print(' return NULL;', file=code) 1657 print(' }', file=code) 1658 if PY3: 1659 print("""\ 1660 PyObject* thunk = PyCapsule_New((void*)(&{struct_name}_executor), NULL, {struct_name}_destructor); 1661 if (thunk != NULL && PyCapsule_SetContext(thunk, struct_ptr) != 0) {{ 1662 PyErr_Clear(); 1663 Py_DECREF(thunk); 1664 thunk = NULL; 1665 }} 1666""".format(**locals()), file=code) 1667 else: 1668 print(' PyObject* thunk = PyCObject_FromVoidPtrAndDesc((void*)(&%(struct_name)s_executor), struct_ptr, %(struct_name)s_destructor);' % locals(), file=code) 1669 print(" return thunk; }", file=code) 1670 return code.getvalue() 1671 1672 1673class _CThunk(object): 1674 """ 1675 A thunk with a C implementation. 1676 1677 Parameters 1678 ---------- 1679 cthunk 1680 The CObject pointer used by run_cthunk. 1681 init_tasks 1682 WRITEME 1683 tasks 1684 WRITEME 1685 error_storage 1686 WRITEME 1687 module 1688 The module that was used to compile this cthunk. 1689 Mostly only useful for tests. 1690 1691 """ 1692 1693 def __init__(self, cthunk, init_tasks, tasks, error_storage, module): 1694 global run_cthunk 1695 if run_cthunk is None: 1696 # Lazy import to avoid compilation when importing theano. 1697 from theano.gof.cutils import run_cthunk # noqa 1698 self.cthunk = cthunk 1699 self.init_tasks = init_tasks 1700 self.tasks = tasks 1701 self.error_storage = error_storage 1702 self.module = module 1703 1704 def find_task(self, failure_code): 1705 """ 1706 Maps a failure code to the task that is associated to it. 1707 1708 """ 1709 failure_code -= 1 1710 n = len(self.init_tasks) 1711 # note that the failure code is distributed in two lists 1712 if failure_code < 2 * n: 1713 return [self.init_tasks, self.tasks][ 1714 failure_code % 2][failure_code // 2] 1715 else: 1716 return self.tasks[failure_code - n] 1717 1718 def __call__(self): 1719 failure = run_cthunk(self.cthunk) 1720 if failure: 1721 task, taskname, id = self.find_task(failure) 1722 try: 1723 trace = task.trace 1724 except AttributeError: 1725 trace = () 1726 try: 1727 exc_type, _exc_value, exc_trace = self.error_storage 1728 if task in self.nodes: 1729 self.position_of_error = self.nodes.index(task) 1730 # this can be used to retrieve the location the Op was declared 1731 exc_value = exc_type(_exc_value) 1732 exc_value.__thunk_trace__ = trace 1733 except Exception: 1734 print(('ERROR retrieving error_storage.' 1735 'Was the error set in the c code?'), 1736 end=' ', file=sys.stderr) 1737 print(self.error_storage, file=sys.stderr) 1738 raise 1739 reraise(exc_type, exc_value, exc_trace) 1740 1741 1742class OpWiseCLinker(link.LocalLinker): 1743 """ 1744 Uses CLinker on the individual Ops that comprise an fgraph and loops 1745 over them in Python. The variable is slower than a compiled version of 1746 the whole fgraph, but saves on compilation time because small changes 1747 in the computation graph won't necessarily trigger any recompilation, 1748 only local changes in the Variables or Ops that are used. 1749 1750 If fallback_on_perform is True, OpWiseCLinker will use an op's 1751 perform method if no C version can be generated. 1752 1753 no_recycling can contain a list of Variables that belong to the fgraph. 1754 If a Variable is in no_recycling, CLinker will clear the output storage 1755 associated to it prior to computation (to avoid reusing it). 1756 1757 Notes 1758 ----- 1759 This is in a sense the 'default' linker for Theano. The 1760 overhead of using the OpWiseCLinker as compared with the CLinker 1761 is only noticeable for graphs of very small tensors (such as 20 1762 elements or less). 1763 1764 """ 1765 1766 __cache__ = {} 1767 1768 def __init__(self, 1769 fallback_on_perform=True, 1770 allow_gc=None, 1771 nice_errors=True, 1772 schedule=None): 1773 if allow_gc is None: 1774 allow_gc = config.allow_gc 1775 self.fgraph = None 1776 self.fallback_on_perform = fallback_on_perform 1777 self.nice_errors = nice_errors 1778 self.allow_gc = allow_gc 1779 if schedule: 1780 self.schedule = schedule 1781 1782 def accept(self, fgraph, no_recycling=None, profile=None): 1783 """ 1784 Associate linker with fgraph 1785 """ 1786 if no_recycling is None: 1787 no_recycling = [] 1788 if self.fgraph is not None and self.fgraph is not fgraph: 1789 # A linker can be tied to only one FunctionGraph. 1790 return type(self)( 1791 fallback_on_perform=self.fallback_on_perform, 1792 allow_gc=self.allow_gc, 1793 nice_errors=self.nice_errors, 1794 schedule=self.schedule, 1795 ).accept(fgraph, no_recycling, profile) 1796 self.fgraph = fgraph 1797 self.no_recycling = no_recycling 1798 return self 1799 1800 def make_all(self, profiler=None, input_storage=None, output_storage=None, 1801 storage_map=None): 1802 1803 # The lock will be acquired when we compile the first 1804 # C code. We will keep the lock until all the function 1805 # compilation will be finished. This allow to don't 1806 # require the lock when all c code are already compiled! 1807 orig_n_lock = getattr(get_lock, "n_lock", 0) 1808 try: 1809 1810 fgraph = self.fgraph 1811 order = self.schedule(fgraph) 1812 no_recycling = self.no_recycling 1813 1814 input_storage, output_storage, storage_map = link.map_storage( 1815 fgraph, order, input_storage, output_storage, storage_map) 1816 if self.allow_gc: 1817 computed, last_user = link.gc_helper(order) 1818 post_thunk_old_storage = [] 1819 else: 1820 post_thunk_old_storage = None 1821 1822 compute_map = {} 1823 for k in storage_map: 1824 compute_map[k] = [k.owner is None] 1825 1826 thunks = [] 1827 for node in order: 1828 # make_thunk will try by default C code, otherwise 1829 # it fall back to python. 1830 thunks += [node.op.make_thunk(node, 1831 storage_map, 1832 compute_map, 1833 no_recycling)] 1834 thunks[-1].inputs = [storage_map[v] for v in node.inputs] 1835 thunks[-1].outputs = [storage_map[v] for v in node.outputs] 1836 1837 for node in order: 1838 if self.allow_gc: 1839 post_thunk_old_storage.append( 1840 [storage_map[input] for input in node.inputs 1841 if ((input in computed) and 1842 (input not in fgraph.outputs) and 1843 node == last_user[input])]) 1844 1845 if no_recycling is True: 1846 no_recycling = list(storage_map.values()) 1847 no_recycling = utils.difference(no_recycling, input_storage) 1848 else: 1849 no_recycling = [storage_map[r] 1850 for r in no_recycling if r not in fgraph.inputs] 1851 1852 f = link.streamline(fgraph, thunks, order, 1853 post_thunk_old_storage, 1854 no_recycling=no_recycling, 1855 nice_errors=self.nice_errors) 1856 1857 f.allow_gc = self.allow_gc 1858 1859 finally: 1860 # Release lock on compilation directory. 1861 if getattr(get_lock, "n_lock", 0) > orig_n_lock: 1862 release_lock() 1863 assert get_lock.n_lock == orig_n_lock 1864 1865 return (f, 1866 [link.Container(input, storage) 1867 for input, storage in izip(fgraph.inputs, input_storage)], 1868 [link.Container(output, storage, True) 1869 for output, storage in izip(fgraph.outputs, output_storage)], 1870 thunks, 1871 order) 1872 1873 1874def _default_checker(x, y): 1875 """ 1876 Default checker for DualLinker. This checks that the 1877 variables contain the same data using ==. 1878 1879 1880 Parameters: 1881 ---------- 1882 x,y 1883 the variables to compare data 1884 """ 1885 if x[0] != y[0]: 1886 raise Exception("Output mismatch.", 1887 {'performlinker': x[0], 'clinker': y[0]}) 1888 1889 1890class DualLinker(link.Linker): 1891 """ 1892 Runs the fgraph in parallel using PerformLinker and CLinker. 1893 1894 The thunk/function produced by DualLinker uses PerformLinker as the 1895 "main" implementation: the inputs and outputs are fed to/taken from 1896 the Ops' perform. However, DualLinker also instantiates a copy of 1897 the fgraph on which it runs OpWiseCLinker. At each step, the variables 1898 of perform and of the C implementation are verified using a checker 1899 function. 1900 1901 """ 1902 1903 def __init__(self, checker=_default_checker, schedule=None): 1904 """ 1905 Initialize a DualLinker. 1906 1907 The checker argument must be a function that takes two lists 1908 of length 1. The first one passed will contain the output 1909 computed by PerformLinker and the second one the output 1910 computed by OpWiseCLinker. The checker should compare the data 1911 fields of the two variables to see if they match. By default, 1912 DualLinker uses ==. A custom checker can be provided to 1913 compare up to a certain error tolerance. 1914 1915 If a mismatch occurs, the checker should raise an exception to 1916 halt the computation. If it does not, the computation will 1917 carry on and errors will snowball. The checker can sidestep 1918 the problem by fiddling with the data, but it should be 1919 careful not to share data between the two outputs (or inplace 1920 operations that use them will interfere). 1921 1922 no_recycling can contain a list of Variables that belong to the fgraph. 1923 If a Variable is in no_recycling, CLinker will clear the output storage 1924 associated to it during the computation (to avoid reusing it). 1925 1926 """ 1927 self.fgraph = None 1928 self.checker = checker 1929 if schedule: 1930 self.schedule = schedule 1931 1932 def accept(self, fgraph, no_recycling=None, profile=None): 1933 """ 1934 Update/tie self with fgraph 1935 """ 1936 if no_recycling is None: 1937 no_recycling = [] 1938 if self.fgraph is not None and self.fgraph is not fgraph: 1939 return type(self)(self.checker, self.schedule).accept( 1940 fgraph, no_recycling, profile) 1941 self.fgraph = fgraph 1942 self.no_recycling = no_recycling 1943 return self 1944 1945 def make_thunk(self, **kwargs): 1946 """ 1947 Compiles this linker's fgraph and returns a function to perform the 1948 computations 1949 """ 1950 fgraph = self.fgraph 1951 no_recycling = self.no_recycling 1952 1953 _f, i1, o1, thunks1, order1 = ( 1954 link.PerformLinker(schedule=self.schedule).accept( 1955 fgraph, no_recycling=no_recycling).make_all(**kwargs)) 1956 kwargs.pop('input_storage', None) 1957 _f, i2, o2, thunks2, order2 = ( 1958 OpWiseCLinker(schedule=self.schedule).accept( 1959 fgraph, no_recycling=no_recycling).make_all(**kwargs)) 1960 1961 def f(): 1962 for input1, input2 in izip(i1, i2): 1963 # Set the inputs to be the same in both branches. 1964 # The copy is necessary in order for inplace ops not to 1965 # interfere. 1966 input2.storage[0] = copy(input1.storage[0]) 1967 for thunk1, thunk2, node1, node2 in izip(thunks1, thunks2, 1968 order1, order2): 1969 for output, storage in izip(node1.outputs, thunk1.outputs): 1970 if output in no_recycling: 1971 storage[0] = None 1972 for output, storage in izip(node2.outputs, thunk2.outputs): 1973 if output in no_recycling: 1974 storage[0] = None 1975 try: 1976 thunk1() 1977 thunk2() 1978 for output1, output2 in izip(thunk1.outputs, 1979 thunk2.outputs): 1980 self.checker(output1, output2) 1981 except Exception: 1982 link.raise_with_op(node1) 1983 1984 return f, i1, o1 1985 1986 1987class HideC(object): 1988 def __hide(*args): 1989 raise utils.MethodNotDefined() 1990 1991 c_code = __hide 1992 c_code_cleanup = __hide 1993 1994 c_headers = __hide 1995 c_header_dirs = __hide 1996 c_libraries = __hide 1997 c_lib_dirs = __hide 1998 1999 c_support_code = __hide 2000 c_support_code_apply = __hide 2001 2002 c_compile_args = __hide 2003 c_no_compile_args = __hide 2004 c_init_code = __hide 2005 c_init_code_apply = __hide 2006 2007 c_init_code_struct = __hide 2008 c_support_code_struct = __hide 2009 c_cleanup_code_struct = __hide 2010 2011 def c_code_cache_version(self): 2012 return () 2013 2014 def c_code_cache_version_apply(self, node): 2015 return self.c_code_cache_version() 2016