1"""
2Defines Linkers that deal with C implementations.
3
4"""
5
6from __future__ import absolute_import, print_function, division
7
8# Python imports
9from copy import copy
10import os
11import sys
12import logging
13
14import numpy as np
15
16import theano
17from theano import config
18from theano.compat import PY3
19from theano.compat import izip
20from six import string_types, reraise
21from six.moves import StringIO, xrange
22
23# gof imports
24from theano.gof import graph
25from theano.gof import link
26from theano.gof import utils
27from theano.gof import cmodule
28from theano.gof.compilelock import get_lock, release_lock
29from theano.gof.callcache import CallCache
30
31
32_logger = logging.getLogger("theano.gof.cc")
33
34
35run_cthunk = None  # Will be imported only when needed.
36
37
38def get_module_cache(init_args=None):
39    """
40
41    Parameters
42    ----------
43    init_args
44        If not None, the (k, v) pairs in this dictionary will be forwarded to
45        the ModuleCache constructor as keyword arguments.
46
47    """
48    return cmodule.get_module_cache(config.compiledir, init_args=init_args)
49
50
51_persistent_module_cache = None
52
53
54def get_persistent_module_cache():
55    global _persistent_module_cache
56    if _persistent_module_cache is None:
57        _persistent_module_cache = CallCache(os.path.join(config.compiledir,
58                                                          'persistent_cache'))
59    return _persistent_module_cache
60
61
62class CodeBlock:
63    """
64    Represents a computation unit composed of declare, behavior, and cleanup.
65
66    The constructor initializes a L{CodeBlock} with templatized declare,
67    behavior and cleanup. The sub parameter will be used in the other
68    arguments' templates. sub should contain a key called 'id' that maps to an
69    identifier for this block. The identifier will be used to determine the
70    failure code and a label to jump to. It should also contain a key called
71    'failure_var' that contains the name of the variable that contains the error
72    code.
73
74    Parameters
75    ----------
76    declare
77        C code that declares variables for use by the computation.
78    behavior
79        C code that performs the computation.
80    cleanup
81        C code that cleans up things allocated or incref-ed in behavior.
82
83    """
84
85    def __init__(self, declare, behavior, cleanup, sub):
86        self.declare = declare
87        self.behavior = behavior
88        # the dummy is because gcc throws an error when a label's
89        # right next to a closing brace (maybe there's an ignore flag
90        # for that...)
91        # we need the label even if cleanup is empty because the
92        # behavior block jumps there on failure
93        self.cleanup = ("__label_%(id)i:\n" % sub + cleanup +
94                        "\ndouble __DUMMY_%(id)i;\n" % sub)  # % sub
95
96
97def failure_code(sub, use_goto=True):
98    """
99    Code contained in sub['fail'], usually substituted for %(fail)s.
100
101    It sets information about current error, then goto the code
102    actually handling the failure, which is defined in struct_gen().
103
104    Parameters
105    ----------
106    sub: dict
107        Contains other code snippets that can be substituted,
108        in particular 'failure_var' and 'id'.
109    use_goto: bool, True by default
110        Include a "goto" statement to the failure label.
111        Passing False is sometimes required, in which cases we have to
112        be careful to avoid executing incorrect code.
113
114    """
115    if use_goto:
116        goto_statement = 'goto __label_%(id)i;' % sub
117    else:
118        goto_statement = ''
119    return '''{
120        %(failure_var)s = %(id)i;
121        if (!PyErr_Occurred()) {
122            PyErr_SetString(PyExc_RuntimeError,
123                "Unexpected error in an Op's C code. "
124                "No Python exception was set.");
125        }
126        %(goto_statement)s}''' % dict(sub, goto_statement=goto_statement)
127
128
129def failure_code_init(sub):
130    """
131    Code for failure in the struct init.
132
133    Parameters:
134    ----------
135    sub
136      Dictionary used to template the struct.
137      * failure_var -> must contain a variable name to use for
138      the failure code.
139    """
140    return '''{
141        if (!PyErr_Occurred()) {
142            PyErr_SetString(PyExc_RuntimeError,
143                "Unexpected error in an Op's C code. "
144                "No Python exception was set.");
145            }
146        return %(id)d;
147}''' % sub
148
149
150def code_gen(blocks):
151    """
152    From a list of L{CodeBlock} instances, returns a string
153    that executes them all in sequence.
154
155    Eg for C{(decl1, task1,
156    cleanup1)} and C{(decl2, task2, cleanup2)} the returned string
157    will be of the form:
158
159        decl1
160        decl2
161        {
162         task1
163         {
164          task2
165          cleanup2
166         }
167         cleanup1
168        }
169
170    Parameters:
171    ----------
172    blocks
173         List of CodeBlock instances such that
174         * declarations, behavior and cleanup are in the run()
175         method of the struct
176    """
177    decl = ""
178    head = ""
179    tail = ""
180    for block in blocks:
181        decl += block.declare
182        head = head + ("\n{\n%s" % block.behavior)
183        tail = ("%s\n}\n" % block.cleanup) + tail
184    return decl + head + tail
185
186
187def struct_gen(args, struct_builders, blocks, sub):
188    """
189    Generates a struct conforming to the following specifications:
190
191    Parameters
192    ----------
193     args
194        All of the PyObject* type, stored in the struct
195        they represent the storage and must be length 1 python lists.
196     struct_builders
197        List of L{CodeBlock} instances such that
198        * declarations are in the struct
199        * behavior is in the constructor
200        * cleanup is in the destructor
201     blocks
202        List of CodeBlock instances such that
203        * declarations, behavior and cleanup are in the run()
204        method of the struct
205     sub
206        Dictionary used to template the struct.
207        * failure_var -> must contain a variable name to use for
208        the failure code.
209
210    Returns
211    -------
212    object
213        In a nutshell, this returns code for a struct that represents
214        a function with state. The state's initialization and destruction
215        are handled by struct_builders and the actual behavior of the
216        function is handled by blocks.
217
218    """
219    struct_decl = ""
220    struct_init_head = ""
221    struct_init_tail = ""
222    struct_cleanup = ""
223
224    for block in struct_builders:
225        # decl are declarations that go in the struct
226        # init_head are in the constructor
227        # init_tail and cleanup do the same thing, but the former will
228        #     be executed if any step in the constructor fails and the
229        #     latter only at destruction time.
230        struct_decl += block.declare
231        struct_init_head = struct_init_head + ("\n%s" % block.behavior)
232        struct_cleanup += block.cleanup
233
234    behavior = code_gen(blocks)
235
236    # declares the storage
237    storage_decl = "\n".join(["PyObject* %s;" % arg for arg in args])
238    # in the constructor, sets the storage to the arguments
239    storage_set = "\n".join(["this->%s = %s;" % (arg, arg) for arg in args])
240    # increments the storage's refcount in the constructor
241    storage_incref = "\n".join(["Py_XINCREF(%s);" % arg for arg in args])
242    # decrements the storage's refcount in the destructor
243    storage_decref = "\n".join(["Py_XDECREF(this->%s);" % arg for arg in args])
244
245    args_names = ", ".join(args)
246    args_decl = ", ".join(["PyObject* %s" % arg for arg in args])
247
248    # The following code stores the exception data in __ERROR, which
249    # is a special field of the struct. __ERROR is a list of length 3
250    # that holds the type, the value and the traceback. After storing
251    # the error, we return the failure code so we know which code
252    # block failed.
253    do_return = """
254        if (%(failure_var)s) {
255            // When there is a failure, this code puts the exception
256            // in __ERROR.
257            PyObject* err_type = NULL;
258            PyObject* err_msg = NULL;
259            PyObject* err_traceback = NULL;
260            PyErr_Fetch(&err_type, &err_msg, &err_traceback);
261            if (!err_type) {err_type = Py_None;Py_INCREF(Py_None);}
262            if (!err_msg) {err_msg = Py_None; Py_INCREF(Py_None);}
263            if (!err_traceback) {err_traceback = Py_None; Py_INCREF(Py_None);}
264            PyObject* old_err_type = PyList_GET_ITEM(__ERROR, 0);
265            PyObject* old_err_msg = PyList_GET_ITEM(__ERROR, 1);
266            PyObject* old_err_traceback = PyList_GET_ITEM(__ERROR, 2);
267            PyList_SET_ITEM(__ERROR, 0, err_type);
268            PyList_SET_ITEM(__ERROR, 1, err_msg);
269            PyList_SET_ITEM(__ERROR, 2, err_traceback);
270            {Py_XDECREF(old_err_type);}
271            {Py_XDECREF(old_err_msg);}
272            {Py_XDECREF(old_err_traceback);}
273        }
274        // The failure code is returned to index what code block failed.
275        return %(failure_var)s;
276        """ % sub
277
278    sub = dict(sub)
279    sub.update(locals())
280
281    # TODO: add some error checking to make sure storage_<x> are
282    # 1-element lists and __ERROR is a 3-elements list.
283
284    struct_code = """
285    namespace {
286    struct %(name)s {
287        PyObject* __ERROR;
288
289        %(storage_decl)s
290        %(struct_decl)s
291
292        %(name)s() {
293            // This is only somewhat safe because we:
294            //  1) Are not a virtual class
295            //  2) Do not use any virtual classes in the members
296            //  3) Deal with mostly POD and pointers
297
298            // If this changes, we would have to revise this, but for
299            // now I am tired of chasing segfaults because
300            // initialization code had an error and some pointer has
301            // a junk value.
302            #ifndef THEANO_DONT_MEMSET_STRUCT
303            memset(this, 0, sizeof(*this));
304            #endif
305        }
306        ~%(name)s(void) {
307            cleanup();
308        }
309
310        int init(PyObject* __ERROR, %(args_decl)s) {
311            %(storage_incref)s
312            %(storage_set)s
313            %(struct_init_head)s
314            this->__ERROR = __ERROR;
315            return 0;
316        }
317        void cleanup(void) {
318            %(struct_cleanup)s
319            %(storage_decref)s
320        }
321        int run(void) {
322            int %(failure_var)s = 0;
323            %(behavior)s
324            %(do_return)s
325        }
326    };
327    }
328    """ % sub
329
330    return struct_code
331
332
333# The get_<x> functions complete the return value of r.get_<x>()
334# with handling of the py_<name> variable.
335
336def get_nothing(r, name, sub):
337    """
338    WRITEME
339
340    """
341    return ""
342
343
344def get_c_declare(r, name, sub):
345    """
346    Wrapper around c_declare that declares py_name.
347
348    """
349    # The declaration will be used by the Apply node that
350    # is computing it (`r.owner`), and by each of the clients.
351    # If some of these have `check_input=True` in their `.op`,
352    # it means they need `r`'s dtype to be declared, so
353    # we have to pass `check_input=True` to `c_declare`.
354    if ((any([getattr(c.op, 'check_input', config.check_input)
355              for (c, _) in r.clients
356              if not isinstance(c, string_types)]) or
357         (r.owner and
358          getattr(r.owner.op, 'check_input', config.check_input)))):
359        c_declare = r.type.c_declare(name, sub, True)
360    else:
361        c_declare = r.type.c_declare(name, sub, False)
362    pre = """
363    PyObject* py_%(name)s;
364    """ % locals()
365    return pre + c_declare
366
367
368def get_c_init(r, name, sub):
369    """
370    Wrapper around c_init that initializes py_name to Py_None.
371
372    """
373    pre = "" """
374    py_%(name)s = Py_None;
375    {Py_XINCREF(py_%(name)s);}
376    """ % locals()
377    return pre + r.type.c_init(name, sub)
378
379
380def get_c_extract(r, name, sub):
381    """
382    Wrapper around c_extract that initializes py_name from storage.
383
384    """
385    # `c_extract` is called when getting the value of an apply node's
386    # input from the compute map, before being used by its clients.
387    # If one of the clients has `check_input=True`, we need to perform
388    # checks on the variable.
389    # However that code is not used by C code of the apply node creating
390    # this variable, so there is no need to check `r.owner.op.check_input`.
391    if any([getattr(c.op, 'check_input', config.check_input)
392            for (c, _) in r.clients
393            if not isinstance(c, string_types)]):
394        # check_broadcast is just an hack to easily remove just the
395        # broadcast check on the old GPU back-end. This check isn't
396        # done in the new GPU back-end or on the CPU.
397        if any([getattr(c.op, 'check_broadcast', True)
398                for (c, _) in r.clients
399                if not isinstance(c, string_types)]):
400            c_extract = r.type.c_extract(name, sub, True)
401        else:
402            try:
403                c_extract = r.type.c_extract(
404                    name, sub, True,
405                    check_broadcast=False)
406            except TypeError as e:
407                c_extract = r.type.c_extract(name, sub, True)
408    else:
409        c_extract = r.type.c_extract(name, sub, False)
410
411    pre = """
412    py_%(name)s = PyList_GET_ITEM(storage_%(name)s, 0);
413    {Py_XINCREF(py_%(name)s);}
414    """ % locals()
415    return pre + c_extract
416
417
418def get_c_extract_out(r, name, sub):
419    """
420    Wrapper around c_extract_out that initializes py_name from storage.
421
422    """
423    # `c_extract_out` is used to extract an output variable from
424    # the compute map, to be used as pre-allocated memory for `r`
425    # before its value gets computed.
426    # If the node producing `r` has `check_input=True`, it may
427    # also perform type checks on the initial value of the output,
428    # so we need to pass `check_input=True` to `c_extract_out`.
429    # However, that code is not used by potential clients of `r`,
430    # so we do not need to check them.
431    check_input = getattr(r.owner.op, 'check_input', config.check_input)
432    # check_broadcast is just an hack to easily remove just the
433    # broadcast check on the old GPU back-end. This check isn't
434    # done in the new GPU back-end or on the CPU.
435    if getattr(r.owner.op, 'check_broadcast', True):
436        c_extract = r.type.c_extract_out(name, sub, check_input)
437    else:
438        try:
439            c_extract = r.type.c_extract_out(name, sub, check_input,
440                                             check_broadcast=False)
441        except TypeError as e:
442            c_extract = r.type.c_extract_out(name, sub, check_input)
443
444    pre = """
445    py_%(name)s = PyList_GET_ITEM(storage_%(name)s, 0);
446    {Py_XINCREF(py_%(name)s);}
447    """ % locals()
448    return pre + c_extract
449
450
451def get_c_cleanup(r, name, sub):
452    """
453    Wrapper around c_cleanup that decrefs py_name.
454
455    """
456    post = """
457    {Py_XDECREF(py_%(name)s);}
458    """ % locals()
459    return r.type.c_cleanup(name, sub) + post
460
461
462def get_c_sync(r, name, sub):
463    """
464    Wrapper around c_sync that syncs py_name with storage.
465
466    """
467    return """
468    if (!%(failure_var)s) {
469      %(sync)s
470      PyObject* old = PyList_GET_ITEM(storage_%(name)s, 0);
471      {Py_XINCREF(py_%(name)s);}
472      PyList_SET_ITEM(storage_%(name)s, 0, py_%(name)s);
473      {Py_XDECREF(old);}
474    }
475    """ % dict(sync=r.type.c_sync(name, sub), name=name, **sub)
476
477
478def apply_policy(policy, r, name, sub):
479    """
480    Apply the list of policies to name.r,sub
481
482    Parameters
483    ----------
484    policy
485        List of functions that map a L{Variable} to a string,
486        or a single such function.
487    r: L{Variable}
488
489    Returns
490    -------
491    object
492        C{policy[0](r) + policy[1](r) + ...}.
493
494    """
495    if isinstance(policy, (list, tuple)):
496        ret = ""
497        for sub_policy in policy:
498            ret += sub_policy(r, name, sub)
499        return ret
500    return policy(r, name, sub)
501
502
503def struct_variable_codeblocks(variable, policies, id, symbol_table, sub):
504    """
505    Update "sub" dict and create two codeblocks with different failure modes
506
507    Parameters
508    ----------
509    variable : a Variable
510    policies : a pair of tuples
511        (declare_policy, behavior_policy, cleanup_policy) -- at construction.
512        (declare_policy, behavior_policy, cleanup_policy)) -- at execution.
513        The first list will produce an element of the 'struct_builders' argument
514        in struct_gen. The second list will produce an element of the 'blocks'
515        argument in struct_gen.
516    id
517        The id assigned to this variable's task in the computation.
518    symbol_table
519        A dict that maps variables to variable names. It is not read by this
520        function but a variable name for the variable is computed and added to
521        the table.
522    sub
523        Dictionary for use by L{CodeBlock}.
524
525    """
526
527    name = "V%i" % id
528    if variable not in symbol_table:
529        symbol_table[variable] = name
530    sub = dict(sub)
531#    sub['name'] = name
532    sub['id'] = id
533    sub['fail'] = failure_code_init(sub)
534    sub['py_ptr'] = "py_%s" % name
535    sub['stor_ptr'] = "storage_%s" % name
536    # struct_declare, struct_behavior, struct_cleanup, sub)
537    struct_builder = CodeBlock(*[apply_policy(policy, variable, name, sub)
538                                 for policy in policies[0]] + [sub])
539    sub['id'] = id + 1
540    sub['fail'] = failure_code(sub)
541    sub['py_ptr'] = "py_%s" % name
542    sub['stor_ptr'] = "storage_%s" % name
543    # run_declare, run_behavior, run_cleanup, sub)
544    block = CodeBlock(*[apply_policy(policy, variable, name, sub)
545                        for policy in policies[1]] + [sub])
546
547    return struct_builder, block
548
549
550class CLinker(link.Linker):
551    """
552    Creates C code for an fgraph, compiles it and returns callables
553    through make_thunk and make_function that make use of the compiled
554    code.
555
556    no_recycling can contain a list of Variables that belong to the fgraph.
557    If a Variable is in no_recycling, CLinker will clear the output storage
558    associated to it during the computation (to avoid reusing it).
559
560    """
561
562    def __init__(self, schedule=None):
563        self.fgraph = None
564        if schedule:
565            self.schedule = schedule
566
567    def accept(self, fgraph, no_recycling=None, profile=None):
568        """
569        Associate linker with fgraph
570
571        """
572        if no_recycling is None:
573            no_recycling = []
574        if self.fgraph is not None and self.fgraph is not fgraph:
575            # A linker can be tied to only one FunctionGraph.
576            return type(self)(self.schedule).accept(
577                fgraph, no_recycling, profile)
578        self.fgraph = fgraph
579        self.fetch_variables()
580        self.no_recycling = no_recycling
581        return self
582
583    def fetch_variables(self):
584        """
585        Fills the inputs, outputs, variables, orphans, temps and node_order
586        fields.
587
588        """
589        fgraph = self.fgraph
590        self.inputs = fgraph.inputs
591        self.outputs = fgraph.outputs
592
593        self.node_order = self.schedule(fgraph)
594
595        # list(fgraph.variables)
596        # We need to include the unused inputs in our variables,
597        # otherwise we can't pass them to the module.
598        self.variables = [var for var in self.inputs if not len(var.clients)]
599        self.variables += graph.variables(self.inputs, self.outputs)
600
601        # This adds a hidden input which is the params for each node
602        # that needs it
603        self.node_params = dict()
604        for node in self.node_order:
605            params = node.run_params()
606            if params is not graph.NoParams:
607                # try to avoid creating more than one variable for the
608                # same params.
609                if params in self.node_params:
610                    var = self.node_params[params]
611                    assert var.type == node.params_type
612                    var.clients.append((node, 'params'))
613                else:
614                    var = graph.Constant(node.params_type, params)
615                    var.clients = [(node, 'params')]
616                    self.node_params[params] = var
617                    self.variables.append(var)
618
619        # The orphans field is listified to ensure a consistent order.
620        # list(fgraph.orphans.difference(self.outputs))
621        self.orphans = list(r for r in self.variables
622                            if isinstance(r, graph.Constant) and
623                            r not in self.inputs)
624        # C type constants (theano.scalar.Scalar). They don't request an object
625        self.consts = []
626        # Move c type from orphans (theano.scalar.Scalar) to self.consts
627        for variable in self.orphans:
628            if isinstance(variable, graph.Constant):
629                try:
630                    variable.type.c_literal(variable.data)
631                    self.consts.append(variable)
632                    self.orphans.remove(variable)
633                except (utils.MethodNotDefined, NotImplementedError):
634                    pass
635
636        self.temps = list(set(self.variables).difference(
637            self.inputs).difference(self.outputs).difference(self.orphans))
638
639    def code_gen(self):
640        """
641        Generates code for a struct that does the computation of the fgraph and
642        stores it in the struct_code field of the instance.
643
644        If reuse_storage is True, outputs and temporaries will be stored in
645        the struct so they can be reused each time a function returned by
646        make_function is called, which means that the output of a call will
647        be invalidated by the next. If reuse_storage is False, that problem
648        is avoided.
649
650        This method caches its computations.
651
652        """
653
654        if getattr(self, 'struct_code', False):
655            return self.struct_code
656
657        no_recycling = self.no_recycling
658
659        c_support_code_apply = []
660        c_init_code_apply = []
661
662        symbol = {}
663
664        # (init_)tasks contains a list of pairs (Op/Variable, task_name)
665        # e.g. (x, 'get') or (x+y, 'code')
666        init_tasks = []
667        tasks = []
668
669        # (init_)blocks contain CodeBlock instances. There is a direct
670        # correspondance with (init_)tasks.
671        init_blocks = []
672        blocks = []
673
674        failure_var = "__failure"
675        id = 1
676
677        for variable in self.variables:
678            sub = dict(failure_var=failure_var)
679
680            # it might be possible to inline constant variables as C literals
681            # policy = [[what to declare in the struct,
682            #            what to do at construction,
683            #            what to do at destruction],
684            #           [what to declare in each run,
685            #            what to do at the beginning of each run,
686            #            what to do at the end of each run]]
687            if variable in self.consts:
688                symbol[variable] = ("(" + variable.type.c_literal(
689                    variable.data) + ")")
690                continue
691            elif variable in self.inputs:
692                # We need to extract the new inputs at each run
693                # they do not need to be relayed to Python, so we don't sync.
694                # If the variable is both an input and an output, there is
695                # no need to synchronize either, it is already up-to-date.
696                policy = [[get_nothing, get_nothing, get_nothing],
697                          [get_c_declare, get_c_extract, get_c_cleanup]]
698            elif variable in self.orphans:
699                if not isinstance(variable, graph.Constant):
700                    raise TypeError("All orphans to CLinker must be Constant"
701                                    " instances.", variable)
702                # orphans are not inputs so we'll just get fetch them
703                # when we initialize the struct and assume they stay
704                # the same
705                policy = [[get_c_declare, get_c_extract, get_c_cleanup],
706                          [get_nothing, get_nothing, get_nothing]]
707            elif variable in self.temps:
708                # temps don't need to be extracted from Python, so we
709                # call c_init rather than c_extract they do not need
710                # to be relayed to Python, so we don't sync
711                if variable.type.c_is_simple() or variable in no_recycling:
712                    policy = [[get_nothing, get_nothing, get_nothing],
713                              [get_c_declare, get_c_init, get_c_cleanup]]
714                else:
715                    # it is useful for complex temps to reuse storage
716                    # at each run, so we only clean up in the
717                    # destructor
718                    policy = [[get_c_declare, get_c_init, get_c_cleanup],
719                              [get_nothing, get_nothing, get_nothing]]
720            elif variable in self.outputs:
721                if variable.type.c_is_simple() or variable in no_recycling:
722                    # Do not extract output from Python
723                    policy = [[get_nothing, get_nothing, get_nothing],
724                              [get_c_declare, get_c_init,
725                                  (get_c_sync, get_c_cleanup)]]
726                else:
727                    # We try to use the output that is pre-allocated.
728                    # The linker will usually just reuse the storage
729                    # from last run, but in the first execution,
730                    # it will be None.
731                    # We clean-up at each run to enable garbage collection
732                    # in the Linker.
733                    policy = [[get_nothing, get_nothing, get_nothing],
734                              [get_c_declare, get_c_extract_out,
735                                  (get_c_sync, get_c_cleanup)]]
736            else:
737                raise Exception("this shouldn't be possible, please report this exception")
738
739            builder, block = struct_variable_codeblocks(variable, policy,
740                                                        id, symbol, sub)
741
742            # each Variable generates two CodeBlocks, one to
743            # declare/initialize/destroy struct variables and the
744            # other to declare/extract/cleanup each time the function
745            # is run.
746            # Typically, only one of the two actually does anything
747            # (see all the possible combinations above)
748
749            init_tasks.append((variable, 'init', id))
750            init_blocks.append(builder)
751
752            tasks.append((variable, 'get', id + 1))
753            blocks.append(block)
754
755            id += 2
756
757        for node_num, node in enumerate(self.node_order):
758
759            sub = dict(failure_var=failure_var)
760
761            params = node.run_params()
762            if params is not graph.NoParams:
763                params_var = symbol[self.node_params[params]]
764
765            # The placeholder will be replaced by a hash of the entire
766            # code (module + support code) in DynamicModule.code.
767            # This ensures that, when defining functions in support code,
768            # we cannot have two different functions, in different modules,
769            # that have the same name.
770            name = "node_<<<<HASH_PLACEHOLDER>>>>_%i" % node_num
771            isyms = [symbol[r] for r in node.inputs]
772            osyms = [symbol[r] for r in node.outputs]
773
774            # Make the CodeBlock for c_code
775            sub['id'] = id
776            sub['fail'] = failure_code(sub)
777            if params is not graph.NoParams:
778                sub['params'] = params_var
779
780            sub_struct = dict()
781            sub_struct['id'] = id + 1
782            sub_struct['fail'] = failure_code_init(sub)
783            if params is not graph.NoParams:
784                # Since params inputs are always constants they are
785                # guaranteed to be available in the struct init code.
786                sub_struct['params'] = params_var
787
788            struct_support = ""
789            struct_init = ""
790            struct_cleanup = ""
791
792            op = node.op
793            # type-specific support code
794            try:
795                c_support_code_apply.append(op.c_support_code_apply(node,
796                                                                    name))
797            except utils.MethodNotDefined:
798                pass
799            else:
800                # The following will be executed if the "try" block succeeds
801                assert isinstance(c_support_code_apply[-1], string_types), (
802                    str(node.op) +
803                    " didn't return a string for c_support_code_apply")
804
805            try:
806                c_init_code_apply.append(op.c_init_code_apply(node, name))
807            except utils.MethodNotDefined:
808                pass
809            else:
810                assert isinstance(c_init_code_apply[-1], string_types), (
811                    str(node.op) +
812                    " didn't return a string for c_init_code_apply")
813
814            try:
815                struct_init = op.c_init_code_struct(node, name, sub_struct)
816                assert isinstance(struct_init, string_types), (
817                    str(node.op) +
818                    " didn't return a string for c_init_code_struct")
819            except utils.MethodNotDefined:
820                pass
821
822            try:
823                struct_support = op.c_support_code_struct(node, name)
824                assert isinstance(struct_support, string_types), (
825                    str(node.op) +
826                    " didn't return a string for c_support_code_struct")
827            except utils.MethodNotDefined:
828                pass
829
830            try:
831                struct_cleanup = op.c_cleanup_code_struct(node, name)
832                assert isinstance(struct_cleanup, string_types), (
833                    str(node.op) +
834                    " didn't return a string for c_cleanup_code_struct")
835            except utils.MethodNotDefined:
836                pass
837
838            # emit c_code
839            try:
840                behavior = op.c_code(node, name, isyms, osyms, sub)
841            except utils.MethodNotDefined:
842                raise NotImplementedError("%s cannot produce C code" % op)
843            assert isinstance(behavior, string_types), (
844                str(node.op) + " didn't return a string for c_code")
845            # To help understand what is following. It help read the c code.
846            # This prevent different op that generate the same c code
847            # to be merged, I suppose this won't happen...
848            behavior = ("// Op class " + node.op.__class__.__name__ + "\n" +
849                        behavior)
850
851            try:
852                cleanup = op.c_code_cleanup(node, name, isyms, osyms, sub)
853            except utils.MethodNotDefined:
854                cleanup = ""
855
856            _logger.info('compiling un-versioned Apply %s', str(node))
857
858            blocks.append(CodeBlock("", behavior, cleanup, sub))
859            tasks.append((node, 'code', id))
860            id += 1
861
862            init_blocks.append(CodeBlock(struct_support, struct_init,
863                                         struct_cleanup, {'id': id}))
864            init_tasks.append((node, 'init', id))
865            id += 1
866
867        # List of arg names for use in struct_gen. Note the call to
868        # uniq: duplicate inputs must only be passed once because they
869        # are mapped to the same name.  Duplicates are defined by (a
870        # is b), rather than (a==b) since Constant instances can
871        # compare equal to equivalent Constant instances.
872        args = []
873        args += ["storage_%s" % symbol[variable] for variable
874                 in utils.uniq(self.inputs + self.outputs + self.orphans)]
875
876        # <<<<HASH_PLACEHOLDER>>>> will be replaced by a hash of the whole
877        # code in the file, including support code, in DynamicModule.code.
878        struct_name = '__struct_compiled_op_%s' % '<<<<HASH_PLACEHOLDER>>>>'
879        struct_code = struct_gen(args, init_blocks, blocks,
880                                 dict(failure_var=failure_var,
881                                      name=struct_name))
882
883        self.struct_code = struct_code
884        self.struct_name = struct_name
885        self.args = args
886        self.r2symbol = symbol
887        self.init_blocks = init_blocks
888        self.init_tasks = init_tasks
889        self.blocks = blocks
890        self.tasks = tasks
891        all_info = self.inputs + self.outputs + self.orphans
892        self.c_support_code_apply = c_support_code_apply
893        self.c_init_code_apply = c_init_code_apply
894
895        if (self.init_tasks, self.tasks) != self.get_init_tasks():
896            print("init_tasks\n", self.init_tasks, file=sys.stderr)
897            print(self.get_init_tasks()[0], file=sys.stderr)
898            print("tasks\n", self.tasks, file=sys.stderr)
899            print(self.get_init_tasks()[1], file=sys.stderr)
900            assert (self.init_tasks, self.tasks) == self.get_init_tasks()
901
902        # List of indices that should be ignored when passing the arguments
903        # (basically, everything that the previous call to uniq eliminated)
904        self.dupidx = [i for i, x in enumerate(all_info)
905                       if all_info.count(x) > 1 and all_info.index(x) != i]
906        return self.struct_code
907
908    def support_code(self):
909        """
910        Returns a list of support code strings that are needed by
911        one or more Variables or Ops.
912        The support code from Variables is added before the support code from Ops.This might contain duplicates.
913        """
914        ret = []
915        if config.cmodule.debug:
916            ret.append("""
917            #ifndef DEBUG
918            #define DEBUG
919            #endif
920            """)
921        # generic support code
922        for x in [y.type for y in self.variables] + [
923                y.op for y in self.node_order]:
924            try:
925                support_code = x.c_support_code()
926                if isinstance(support_code, list):
927                    ret.extend(support_code)
928                else:
929                    ret.append(support_code)
930            except utils.MethodNotDefined:
931                pass
932        return ret
933
934    def compile_args(self):
935        """
936        Returns a list of compile args that are needed by one
937        or more Variables or Ops.
938
939        This might contain duplicates.
940
941        """
942        ret = ["-O3"]
943# this is the param the -ffast-math activate. I put the explicitly as
944# FillMissing must disable some of them. Putting -ffast-math would
945# make it disable all other parameter at the same time.
946        ret += ["-fno-math-errno",
947                # "-funsafe-math-optimizations",
948                # "-fno-signaling-nans",
949                # "-fcx-limited-range",
950                # "-fno-rounding-math",
951                # "-ffinite-math-only",
952
953                # the current code generate label event if they are not used.
954                # Could use gcc attribute for those label only
955                "-Wno-unused-label",
956                "-Wno-unused-variable",  # idem as the precedent
957                "-Wno-write-strings",  # generated by our code generator...
958                ]
959
960        c_compiler = self.c_compiler()
961
962        for x in [y.type for y in self.variables] + [
963                y.op for y in self.node_order]:
964            try:
965                try:
966                    ret += x.c_compile_args(c_compiler)
967                except TypeError:
968                    ret += x.c_compile_args()
969            except utils.MethodNotDefined:
970                pass
971
972        ret = utils.uniq(ret)  # to remove duplicate
973        # The args set by the compiler include the user flags. We do not want
974        # to reorder them
975        ret += c_compiler.compile_args()
976        for x in [y.type for y in self.variables] + [
977                y.op for y in self.node_order]:
978            try:
979                try:
980                    no_comp = x.c_no_compile_args(c_compiler)
981                except TypeError:
982                    no_comp = x.c_no_compile_args()
983                for i in no_comp:
984                    try:
985                        ret.remove(i)
986                    except ValueError:
987                        pass  # in case the value is not there
988            except utils.MethodNotDefined:
989                pass
990        return ret
991
992    def headers(self):
993        """
994        Returns a list of headers that are needed by one
995        or more Types or Ops.
996
997        The return value will not contain duplicates.
998
999        """
1000        ret = []
1001        c_compiler = self.c_compiler()
1002        for x in [y.type for y in self.variables] + [
1003                y.op for y in self.node_order]:
1004            try:
1005                try:
1006                    ret += x.c_headers(c_compiler)
1007                except TypeError:
1008                    ret += x.c_headers()
1009            except utils.MethodNotDefined:
1010                pass
1011        return utils.uniq(ret)
1012
1013    def init_code(self):
1014        """
1015        Return a list of code snippets that have to be inserted
1016        in the module initialization code.
1017
1018        The return value will not contain duplicates.
1019
1020        """
1021        ret = []
1022        for x in [y.type for y in self.variables] + [
1023                y.op for y in self.node_order]:
1024            try:
1025                ret += x.c_init_code()
1026            except utils.MethodNotDefined:
1027                pass
1028        return utils.uniq(ret)
1029
1030    def c_compiler(self):
1031        c_compiler = None
1032        for x in [y.type for y in self.variables] + [
1033                y.op for y in self.node_order]:
1034            if hasattr(x, 'c_compiler'):
1035                x_compiler = x.c_compiler()
1036            else:
1037                continue
1038
1039            if c_compiler is None:
1040                c_compiler = x_compiler
1041            else:
1042                if x_compiler and (x_compiler != c_compiler):
1043                    raise Exception('Nodes have requested specific'
1044                                    ' different compilers',
1045                                    (c_compiler, x_compiler))
1046        if (c_compiler is None):
1047            return cmodule.GCC_compiler
1048        else:
1049            return c_compiler
1050
1051    def header_dirs(self):
1052        """
1053        Returns a list of lib directories that are needed by one
1054        or more Types or Ops.
1055
1056        The return value will not contain duplicates.
1057
1058        """
1059        ret = []
1060        c_compiler = self.c_compiler()
1061        for x in [y.type for y in self.variables] + [
1062                y.op for y in self.node_order]:
1063            try:
1064                try:
1065                    ret += x.c_header_dirs(c_compiler)
1066                except TypeError:
1067                    ret += x.c_header_dirs()
1068            except utils.MethodNotDefined:
1069                pass
1070        # filter out empty strings/None
1071        return [r for r in utils.uniq(ret) if r]
1072
1073    def libraries(self):
1074        """
1075        Returns a list of libraries that are needed by one
1076        or more Types or Ops.
1077
1078        The return value will not contain duplicates.
1079
1080        """
1081        ret = []
1082        c_compiler = self.c_compiler()
1083        for x in [y.type for y in self.variables] + [
1084                y.op for y in self.node_order]:
1085            try:
1086                try:
1087                    ret += x.c_libraries(c_compiler)
1088                except TypeError:
1089                    ret += x.c_libraries()
1090            except utils.MethodNotDefined:
1091                pass
1092        return utils.uniq(ret)
1093
1094    def lib_dirs(self):
1095        """
1096        Returns a list of lib directories that are needed by one
1097        or more Types or Ops.
1098
1099        The return value will not contain duplicates.
1100
1101        """
1102        ret = []
1103        c_compiler = self.c_compiler()
1104        for x in [y.type for y in self.variables] + [
1105                y.op for y in self.node_order]:
1106            try:
1107                try:
1108                    ret += x.c_lib_dirs(c_compiler)
1109                except TypeError:
1110                    ret += x.c_lib_dirs()
1111            except utils.MethodNotDefined:
1112                pass
1113        # filter out empty strings/None
1114        return [r for r in utils.uniq(ret) if r]
1115
1116    def __compile__(self, input_storage=None, output_storage=None,
1117                    storage_map=None, keep_lock=False):
1118        """
1119        Compiles this linker's fgraph.
1120
1121        Parameters
1122        ----------
1123        input_storage: list or None
1124            List of lists of length 1. In order to use the thunk returned
1125            by __compile__, the inputs must be put in that storage.
1126            If None, storage will be allocated.
1127        output_storage: list of lists of length 1
1128            The thunk returned by __compile__ will put the variables of the
1129            computation in these lists. If None, storage will be allocated.
1130
1131        Returns
1132        -------
1133        object
1134            Thunk, input_storage, output_storage, error_storage.
1135
1136        """
1137        error_storage = [None, None, None]
1138        if input_storage is None:
1139            input_storage = tuple([None] for variable in self.inputs)
1140        if output_storage is None:
1141            map = {}
1142            output_storage = []
1143            # Initialize the map with the inputs, as some outputs may
1144            # be inputs as well.
1145            for i, variable in enumerate(self.inputs):
1146                map[variable] = input_storage[i]
1147            for variable in self.outputs:
1148                if variable not in map:
1149                    map[variable] = [None]
1150                output_storage.append(map[variable])
1151        input_storage = tuple(input_storage)
1152        output_storage = tuple(output_storage)
1153        thunk, module = self.cthunk_factory(error_storage,
1154                                            input_storage,
1155                                            output_storage,
1156                                            storage_map,
1157                                            keep_lock=keep_lock)
1158        return (thunk,
1159                module,
1160                [link.Container(input, storage) for input, storage in
1161                 izip(self.fgraph.inputs, input_storage)],
1162                [link.Container(output, storage, True) for output, storage in
1163                 izip(self.fgraph.outputs, output_storage)],
1164                error_storage)
1165
1166    def get_init_tasks(self):
1167        init_tasks = []
1168        tasks = []
1169        id = 1
1170        for v in self.variables:
1171            if v in self.consts:
1172                continue
1173            init_tasks.append((v, 'init', id))
1174            tasks.append((v, 'get', id + 1))
1175            id += 2
1176        for node in self.node_order:
1177            tasks.append((node, 'code', id))
1178            init_tasks.append((node, 'init', id + 1))
1179            id += 2
1180        return init_tasks, tasks
1181
1182    def make_thunk(self, input_storage=None, output_storage=None,
1183                   storage_map=None, keep_lock=False):
1184        """
1185        Compiles this linker's fgraph and returns a function to perform the
1186        computations, as well as lists of storage cells for both the inputs
1187        and outputs.
1188
1189        Parameters
1190        ----------
1191        input_storage: list or None
1192            List of lists of length 1. In order to use
1193            the thunk returned by __compile__, the inputs must be put in
1194            that storage. If None, storage will be allocated.
1195        output_storage: list of lists of length 1.
1196            The thunk returned by __compile__ will put the variables
1197            of the computation in these lists. If None, storage will
1198            be allocated.
1199        storage_map: dict that map variables to storages.
1200            This is used when you need to customize the storage of
1201            this thunk
1202        keep_lock:
1203            If True, we won't release the lock on the compiledir
1204            at the end of this function call.
1205        Returns: thunk, input_storage, output_storage
1206
1207        The return values can be used as follows:
1208          f, istor, ostor = clinker.make_thunk()
1209          istor[0].data = first_input
1210          istor[1].data = second_input
1211          f()
1212          first_output = ostor[0].data
1213        """
1214        init_tasks, tasks = self.get_init_tasks()
1215        cthunk, module, in_storage, out_storage, error_storage = self.__compile__(
1216            input_storage, output_storage, storage_map,
1217            keep_lock=keep_lock)
1218
1219        res = _CThunk(cthunk, init_tasks, tasks, error_storage, module)
1220        res.nodes = self.node_order
1221        return res, in_storage, out_storage
1222
1223    def cmodule_key(self):
1224        """
1225        Return a complete hashable signature of the module we compiled.
1226
1227        This function must have the property that no two programs that
1228        compute different things yield the same key.
1229
1230        The key returned by this function is of the form (version, signature)
1231        The signature has the following form:
1232        {{{
1233            'CLinker.cmodule_key', compilation args, libraries,
1234            header_dirs, numpy ABI version, config hash,
1235            (op0, input_signature0, output_signature0),
1236            (op1, input_signature1, output_signature1),
1237            ...
1238            (opK, input_signatureK, output_signatureK),
1239        }}}
1240
1241        Note that config hash now uses sha256, and not md5.
1242
1243        The signature is a tuple, some elements of which are sub-tuples.
1244
1245        The outer tuple has a brief header, containing the compilation options
1246        passed to the compiler, the libraries to link against, a sha256 hash
1247        of theano.config (for all config options where "in_c_key" is True).
1248        It is followed by elements for every node in the topological ordering
1249        of `self.fgraph`.
1250
1251        Input Signature
1252        ---------------
1253
1254        Each input signature is a tuple with an element for each input
1255        to the corresponding Apply node. Each element identifies the
1256        type of the node input, and the nature of that input in the
1257        graph.
1258
1259        The nature of a typical variable is encoded by integer pairs
1260        ``((a,b),c)``:
1261        ``a`` is the topological position of the input's owner
1262              (-1 for graph inputs),
1263        ``b`` is the index of the variable in the owner's output list.
1264        ``c`` is a flag indicating whether the variable is in the
1265              no_recycling set.
1266
1267        If a variable is also a graph output, then its position in the
1268        outputs list is also bundled with this tuple (after the b).
1269
1270        The nature of a Constant instance is defined as its signature,
1271        together with two integers: the topological position of the
1272        first Apply using that Constant instance, and the lowest index
1273        into that Apply's inputs that refers to that Constant.  (These
1274        two integers are a surrogate for the id() of the Constant.
1275        The integers are important because merge-able constants have
1276        the same signature, but require separate containers in C
1277        code.)  The membership in no_recycling is also included in the
1278        signature.
1279
1280        Output Signature
1281        ----------------
1282
1283        The outputs of a node are entirely determined by the node's Op
1284        and the nature of the inputs, but the set of outputs that may
1285        be re-used by the computation (the elements of
1286        self.no_recycling) can affect the code that is generated.
1287
1288        The format of each Op's output signature is a (version, no_recycle)
1289        pair, where version is incremented if codegen() changes how it
1290        handles the outputs, and no_recycle is simply a list of
1291        booleans, indicating whether each output is in the
1292        no_recycling set. Older versions of compiled modules only have the
1293        no_recycle list.
1294
1295        """
1296        return self.cmodule_key_(self.fgraph, self.no_recycling,
1297                                 compile_args=self.compile_args(),
1298                                 libraries=self.libraries(),
1299                                 header_dirs=self.header_dirs(),
1300                                 c_compiler=self.c_compiler(),
1301                                 )
1302
1303    def cmodule_key_variables(self, inputs, outputs, no_recycling,
1304                              compile_args=None, libraries=None,
1305                              header_dirs=None, insert_config_hash=True,
1306                              c_compiler=None):
1307
1308        # Assemble a dummy fgraph using the provided inputs and outputs. It is
1309        # only used to compute the cmodule key so it only need to expose an
1310        # `inputs` and an `outputs` attribute as well as a toposort() method
1311        # which returns a deterministic result.
1312        class FakeFunctionGraph():
1313            def __init__(self, inputs, outputs):
1314                self.inputs = inputs
1315                self.outputs = outputs
1316
1317            def toposort(self):
1318                # Calling io_toposort() here is fine because the results will
1319                # only be used to compute the cmodule key which requires that
1320                # the result of the toposort be deterministic. The ordering
1321                # doesn't need to include information about inplace operations
1322                # because that information will be included explicitly in
1323                # cmodule_key_().
1324                return graph.io_toposort(self.inputs, self.outputs)
1325
1326        fgraph = FakeFunctionGraph(inputs, outputs)
1327        return self.cmodule_key_(fgraph, no_recycling, compile_args,
1328                                 libraries, header_dirs, insert_config_hash,
1329                                 c_compiler)
1330
1331    def cmodule_key_(self, fgraph, no_recycling, compile_args=None,
1332                     libraries=None, header_dirs=None, insert_config_hash=True,
1333                     c_compiler=None):
1334        """
1335        Do the actual computation of cmodule_key in a static method
1336        to allow it to be reused in scalar.Composite.__eq__.
1337
1338        """
1339        if compile_args is None:
1340            compile_args = []
1341        if libraries is None:
1342            libraries = []
1343        if header_dirs is None:
1344            header_dirs = []
1345        order = self.schedule(fgraph)
1346        # set of variables that have been computed by nodes we have
1347        # seen 'so far' in the loop below
1348        fgraph_computed_set = set()
1349        fgraph_inputs_dict = dict((i, (-1, pos)) for pos, i in
1350                                  enumerate(fgraph.inputs))
1351        constant_ids = dict()
1352        op_pos = {}  # Apply -> topological position
1353
1354        # First we put the header, compile_args, library names and config hash
1355        # into the signature.
1356        sig = ['CLinker.cmodule_key']  # will be cast to tuple on return
1357        if compile_args is not None:
1358            # We must sort it as the order from a set is not guaranteed.
1359            # In  particular, 2 sets with the same content can give different
1360            # order depending on the order you put data in it.
1361            # Sets are used to remove duplicate elements.
1362            args = sorted(compile_args)
1363            args = tuple(args)
1364            sig.append(args)
1365        if libraries is not None:
1366            # see comments for compile_args
1367            args = sorted(libraries)
1368            args = tuple(args)
1369            sig.append(args)
1370
1371        if header_dirs is not None:
1372            args = sorted(header_dirs)
1373            args = tuple(args)
1374            sig.append(args)
1375
1376        # We must always add the numpy ABI version here as
1377        # DynamicModule always add the include <numpy/arrayobject.h>
1378        if np.lib.NumpyVersion(np.__version__) < '1.16.0a':
1379            ndarray_c_version = np.core.multiarray._get_ndarray_c_version()
1380        else:
1381            ndarray_c_version = np.core._multiarray_umath._get_ndarray_c_version()
1382        sig.append('NPY_ABI_VERSION=0x%X' %
1383                   ndarray_c_version)
1384        if c_compiler:
1385            sig.append('c_compiler_str=' + c_compiler.version_str())
1386
1387        # IMPORTANT: The 'md5' prefix is used to isolate the compilation
1388        # parameters from the rest of the key. If you want to add more key
1389        # elements, they should be before this md5 hash if and only if they
1390        # can lead to a different compiled file with the same source code.
1391
1392        # NOTE: config md5 is not using md5 hash, but sha256 instead. Function
1393        # string instances of md5 will be updated at a later release.
1394        if insert_config_hash:
1395            sig.append('md5:' + theano.configparser.get_config_hash())
1396        else:
1397            sig.append('md5: <omitted>')
1398
1399        error_on_play = [False]
1400
1401        def in_sig(i, topological_pos, i_idx):
1402            # assert that every input to every node is one of'
1403            # - an fgraph input
1404            # - an output from a node in the FunctionGraph
1405            # - a Constant
1406
1407            # It is important that a variable (i)
1408            # yield a 'position' that reflects its role in code_gen()
1409            if isinstance(i, graph.Constant):  # orphans
1410                if id(i) not in constant_ids:
1411                    isig = (i.signature(), topological_pos, i_idx)
1412                    # If the Theano constant provides a strong hash
1413                    # (no collision for transpose, 2, 1, 0, -1, -2,
1414                    # 2 element swapped...) we put this hash in the signature
1415                    # instead of the value. This makes the key file much
1416                    # smaller for big constant arrays. Before this, we saw key
1417                    # files up to 80M.
1418                    if hasattr(isig[0], "theano_hash"):
1419                        isig = (isig[0].theano_hash(), topological_pos, i_idx)
1420                    try:
1421                        hash(isig)
1422                    except Exception:
1423                        # generic constants don't have a hashable signature
1424                        error_on_play[0] = True
1425                        return None
1426                    constant_ids[id(i)] = isig
1427                else:
1428                    isig = constant_ids[id(i)]
1429                # print 'SIGNATURE', i.signature()
1430                # return i.signature()
1431            elif i in fgraph_inputs_dict:  # inputs
1432                isig = fgraph_inputs_dict[i]
1433            else:
1434                if i.owner is None:
1435                    assert all(all(out is not None for out in o.outputs)
1436                               for o in order)
1437                    assert all(input.owner is None for input in fgraph.inputs)
1438                    raise Exception('what is this?', (i, type(i), i.clients,
1439                                                      fgraph))
1440
1441                if i in fgraph.outputs:
1442                    isig = (op_pos[i.owner],  # outputs
1443                            i.owner.outputs.index(i),
1444                            fgraph.outputs.index(i))
1445                else:
1446                    isig = (op_pos[i.owner], i.owner.outputs.index(i))  # temps
1447            return (isig, i in no_recycling)
1448
1449        version = []
1450        for node_pos, node in enumerate(order):
1451            if hasattr(node.op, 'c_code_cache_version_apply'):
1452                version.append(node.op.c_code_cache_version_apply(node))
1453            if hasattr(node.op, '__props__'):
1454                version.append(node.op.__props__)
1455            for i in node.inputs:
1456                version.append(i.type.c_code_cache_version())
1457            for o in node.outputs:
1458                version.append(o.type.c_code_cache_version())
1459
1460            # add the signature for this node
1461            sig.append((
1462                node.op,
1463                tuple((i.type, in_sig(i, node_pos, ipos))
1464                      for ipos, i in enumerate(node.inputs)),
1465                (1,  # Increment if cmodule change its handling of outputs
1466                    tuple(o in no_recycling for o in node.outputs))))
1467
1468            if error_on_play[0]:
1469                # if one of the signatures is not hashable
1470                # then bypass the cache mechanism and
1471                # compile fresh every time
1472                return None
1473
1474            op_pos[node] = node_pos
1475            fgraph_computed_set.update(node.outputs)
1476
1477        # Add not used input in the key
1478        # If inputs don't define a 'clients' attribute (as is the case if
1479        # fgraph is not a real FunctionGraph but a FakeFunctionGraph, a
1480        # lightweight class designed to imitate FunctionGraph), pretend they
1481        # have none. This if fine because the goal is only to have all of the
1482        # graph's information used to compute the key. If we mistakenly
1483        # pretend that inputs with clients don't have any, were are only using
1484        # those inputs more than once to compute the key.
1485        for ipos, var in [(i, var) for i, var in enumerate(fgraph.inputs)
1486                          if not len(getattr(var, 'clients', []))]:
1487            sig.append((var.type, in_sig(var, -1, ipos)))
1488
1489        # crystalize the signature and version
1490        sig = tuple(sig)
1491        version = tuple(version)
1492        for v in version:
1493            if not v:
1494                # one of the ops or types here is unversioned,
1495                # so this fgraph is entirely unversioned
1496                return ((), sig)
1497        return version, sig
1498
1499    def get_src_code(self):
1500        mod = self.get_dynamic_module()
1501        return mod.code()
1502
1503    def compile_cmodule(self, location=None):
1504        """
1505        This compiles the source code for this linker and returns a
1506        loaded module.
1507
1508        """
1509        if location is None:
1510            location = cmodule.dlimport_workdir(config.compiledir)
1511        mod = self.get_dynamic_module()
1512        c_compiler = self.c_compiler()
1513        libs = self.libraries()
1514        preargs = self.compile_args()
1515        # We want to compute the code without the lock
1516        src_code = mod.code()
1517        get_lock()
1518        try:
1519            _logger.debug("LOCATION %s", str(location))
1520            module = c_compiler.compile_str(
1521                module_name=mod.code_hash,
1522                src_code=src_code,
1523                location=location,
1524                include_dirs=self.header_dirs(),
1525                lib_dirs=self.lib_dirs(),
1526                libs=libs,
1527                preargs=preargs)
1528        except Exception as e:
1529            e.args += (str(self.fgraph),)
1530            raise
1531        finally:
1532            release_lock()
1533        return module
1534
1535    def get_dynamic_module(self):
1536        """
1537        Return a cmodule.DynamicModule instance full of the code for our fgraph.
1538
1539        This method is cached on the first call so it can be called
1540        multiple times without penalty.
1541
1542        """
1543        if not hasattr(self, '_mod'):
1544            self.code_gen()
1545
1546            mod = cmodule.DynamicModule()
1547
1548            # The code of instantiate
1549            # the 1 is for error_storage
1550            code = self.instantiate_code(1 + len(self.args))
1551            instantiate = cmodule.ExtFunction('instantiate', code,
1552                                              method=cmodule.METH_VARARGS)
1553            # ['error_storage'] + argnames,
1554            # local_dict = d,
1555            # global_dict = {})
1556
1557            # Static methods that can run and destroy the struct built by
1558            # instantiate.
1559            if PY3:
1560                static = """
1561        static int {struct_name}_executor({struct_name} *self) {{
1562            return self->run();
1563        }}
1564
1565        static void {struct_name}_destructor(PyObject *capsule) {{
1566            {struct_name} *self = ({struct_name} *)PyCapsule_GetContext(capsule);
1567            delete self;
1568        }}
1569        """.format(struct_name=self.struct_name)
1570            else:
1571                static = """
1572        static int %(struct_name)s_executor(%(struct_name)s* self) {
1573            return self->run();
1574        }
1575
1576        static void %(struct_name)s_destructor(void* executor, void* self) {
1577            delete ((%(struct_name)s*)self);
1578        }
1579        """ % dict(struct_name=self.struct_name)
1580
1581        # We add all the support code, compile args, headers and libs we need.
1582            for support_code in self.support_code() + self.c_support_code_apply:
1583                mod.add_support_code(support_code)
1584            mod.add_support_code(self.struct_code)
1585            mod.add_support_code(static)
1586            mod.add_function(instantiate)
1587            for header in self.headers():
1588                mod.add_include(header)
1589            for init_code_block in self.init_code() + self.c_init_code_apply:
1590                mod.add_init_code(init_code_block)
1591            self._mod = mod
1592        return self._mod
1593
1594    def cthunk_factory(self, error_storage, in_storage, out_storage,
1595                       storage_map=None, keep_lock=False):
1596        """
1597        Returns a thunk that points to an instance of a C struct that
1598        can carry on the computation of this linker's fgraph
1599
1600        Parameters:
1601        ----------
1602        error_storage -> list of length 3
1603        in_storage -> list of lists of length 1, one per input
1604        out_storage -> list of lists of length 1, one per output
1605
1606        Returns a thunk that points to an instance of a C struct that
1607        can carry on the computation of this linker's fgraph. That thunk,
1608        when executed, will fetch its inputs from in_storage, put its
1609        outputs in out_storage and if an error occurs will put the
1610        type, value and traceback of the exception in error_storage.
1611        """
1612        try:
1613            key = self.cmodule_key()
1614        except KeyError:
1615            key = None
1616        if key is None:
1617            # If we can't get a key, then forget the cache mechanism.
1618            module = self.compile_cmodule()
1619        else:
1620            # Set compute_map as None as clinker do not support lazy evaluation
1621            for node in self.node_order:
1622                node.op.prepare_node(node, storage_map, None, 'c')
1623            module = get_module_cache().module_from_key(
1624                key=key, lnk=self, keep_lock=keep_lock)
1625
1626        vars = self.inputs + self.outputs + self.orphans
1627        # List of indices that should be ignored when passing the arguments
1628        # (basically, everything that the previous call to uniq eliminated)
1629        dupidx = [i for i, x in enumerate(vars)
1630                  if vars.count(x) > 1 and vars.index(x) != i]
1631
1632        out_storage = [x for i, x in enumerate(out_storage)
1633                       if (i + len(in_storage)) not in dupidx]
1634        in_storage = [x for i, x in enumerate(in_storage) if i not in dupidx]
1635        if storage_map is None:
1636            orphd = [[orphan.data] for orphan in self.orphans]
1637        else:
1638            orphd = [storage_map[orphan] for orphan in self.orphans]
1639
1640        ret = module.instantiate(error_storage,
1641                                 *(in_storage + out_storage + orphd))
1642        return ret, module
1643
1644    def instantiate_code(self, n_args):
1645        code = StringIO()
1646        struct_name = self.struct_name
1647        print("static PyObject * instantiate(PyObject * self, PyObject *argtuple) {", file=code)
1648        print('  assert(PyTuple_Check(argtuple));', file=code)
1649        print('  if (%(n_args)i != PyTuple_Size(argtuple)){ ' % locals(), file=code)
1650        print('     PyErr_Format(PyExc_TypeError, "Wrong number of arguments, expected %(n_args)i, got %%i", (int)PyTuple_Size(argtuple));' % locals(), file=code)
1651        print('     return NULL;', file=code)
1652        print('  }', file=code)
1653        print('  %(struct_name)s* struct_ptr = new %(struct_name)s();' % locals(), file=code)
1654        print('  if (struct_ptr->init(', ','.join('PyTuple_GET_ITEM(argtuple, %i)' % n for n in xrange(n_args)), ') != 0) {', file=code)
1655        print('    delete struct_ptr;', file=code)
1656        print('    return NULL;', file=code)
1657        print('  }', file=code)
1658        if PY3:
1659            print("""\
1660    PyObject* thunk = PyCapsule_New((void*)(&{struct_name}_executor), NULL, {struct_name}_destructor);
1661    if (thunk != NULL && PyCapsule_SetContext(thunk, struct_ptr) != 0) {{
1662        PyErr_Clear();
1663        Py_DECREF(thunk);
1664        thunk = NULL;
1665    }}
1666""".format(**locals()), file=code)
1667        else:
1668            print('  PyObject* thunk = PyCObject_FromVoidPtrAndDesc((void*)(&%(struct_name)s_executor), struct_ptr, %(struct_name)s_destructor);' % locals(), file=code)
1669        print("  return thunk; }", file=code)
1670        return code.getvalue()
1671
1672
1673class _CThunk(object):
1674    """
1675    A thunk with a C implementation.
1676
1677    Parameters
1678    ----------
1679    cthunk
1680        The CObject pointer used by run_cthunk.
1681    init_tasks
1682        WRITEME
1683    tasks
1684        WRITEME
1685    error_storage
1686        WRITEME
1687    module
1688        The module that was used to compile this cthunk.
1689        Mostly only useful for tests.
1690
1691    """
1692
1693    def __init__(self, cthunk, init_tasks, tasks, error_storage, module):
1694        global run_cthunk
1695        if run_cthunk is None:
1696            # Lazy import to avoid compilation when importing theano.
1697            from theano.gof.cutils import run_cthunk  # noqa
1698        self.cthunk = cthunk
1699        self.init_tasks = init_tasks
1700        self.tasks = tasks
1701        self.error_storage = error_storage
1702        self.module = module
1703
1704    def find_task(self, failure_code):
1705        """
1706        Maps a failure code to the task that is associated to it.
1707
1708        """
1709        failure_code -= 1
1710        n = len(self.init_tasks)
1711        # note that the failure code is distributed in two lists
1712        if failure_code < 2 * n:
1713            return [self.init_tasks, self.tasks][
1714                failure_code % 2][failure_code // 2]
1715        else:
1716            return self.tasks[failure_code - n]
1717
1718    def __call__(self):
1719        failure = run_cthunk(self.cthunk)
1720        if failure:
1721            task, taskname, id = self.find_task(failure)
1722            try:
1723                trace = task.trace
1724            except AttributeError:
1725                trace = ()
1726            try:
1727                exc_type, _exc_value, exc_trace = self.error_storage
1728                if task in self.nodes:
1729                    self.position_of_error = self.nodes.index(task)
1730                # this can be used to retrieve the location the Op was declared
1731                exc_value = exc_type(_exc_value)
1732                exc_value.__thunk_trace__ = trace
1733            except Exception:
1734                print(('ERROR retrieving error_storage.'
1735                       'Was the error set in the c code?'),
1736                      end=' ', file=sys.stderr)
1737                print(self.error_storage, file=sys.stderr)
1738                raise
1739            reraise(exc_type, exc_value, exc_trace)
1740
1741
1742class OpWiseCLinker(link.LocalLinker):
1743    """
1744    Uses CLinker on the individual Ops that comprise an fgraph and loops
1745    over them in Python. The variable is slower than a compiled version of
1746    the whole fgraph, but saves on compilation time because small changes
1747    in the computation graph won't necessarily trigger any recompilation,
1748    only local changes in the Variables or Ops that are used.
1749
1750    If fallback_on_perform is True, OpWiseCLinker will use an op's
1751    perform method if no C version can be generated.
1752
1753    no_recycling can contain a list of Variables that belong to the fgraph.
1754    If a Variable is in no_recycling, CLinker will clear the output storage
1755    associated to it prior to computation (to avoid reusing it).
1756
1757    Notes
1758    -----
1759    This is in a sense the 'default' linker for Theano. The
1760    overhead of using the OpWiseCLinker as compared with the CLinker
1761    is only noticeable for graphs of very small tensors (such as 20
1762    elements or less).
1763
1764    """
1765
1766    __cache__ = {}
1767
1768    def __init__(self,
1769                 fallback_on_perform=True,
1770                 allow_gc=None,
1771                 nice_errors=True,
1772                 schedule=None):
1773        if allow_gc is None:
1774            allow_gc = config.allow_gc
1775        self.fgraph = None
1776        self.fallback_on_perform = fallback_on_perform
1777        self.nice_errors = nice_errors
1778        self.allow_gc = allow_gc
1779        if schedule:
1780            self.schedule = schedule
1781
1782    def accept(self, fgraph, no_recycling=None, profile=None):
1783        """
1784        Associate linker with fgraph
1785        """
1786        if no_recycling is None:
1787            no_recycling = []
1788        if self.fgraph is not None and self.fgraph is not fgraph:
1789            # A linker can be tied to only one FunctionGraph.
1790            return type(self)(
1791                fallback_on_perform=self.fallback_on_perform,
1792                allow_gc=self.allow_gc,
1793                nice_errors=self.nice_errors,
1794                schedule=self.schedule,
1795            ).accept(fgraph, no_recycling, profile)
1796        self.fgraph = fgraph
1797        self.no_recycling = no_recycling
1798        return self
1799
1800    def make_all(self, profiler=None, input_storage=None, output_storage=None,
1801                 storage_map=None):
1802
1803        # The lock will be acquired when we compile the first
1804        # C code. We will keep the lock until all the function
1805        # compilation will be finished. This allow to don't
1806        # require the lock when all c code are already compiled!
1807        orig_n_lock = getattr(get_lock, "n_lock", 0)
1808        try:
1809
1810            fgraph = self.fgraph
1811            order = self.schedule(fgraph)
1812            no_recycling = self.no_recycling
1813
1814            input_storage, output_storage, storage_map = link.map_storage(
1815                fgraph, order, input_storage, output_storage, storage_map)
1816            if self.allow_gc:
1817                computed, last_user = link.gc_helper(order)
1818                post_thunk_old_storage = []
1819            else:
1820                post_thunk_old_storage = None
1821
1822            compute_map = {}
1823            for k in storage_map:
1824                compute_map[k] = [k.owner is None]
1825
1826            thunks = []
1827            for node in order:
1828                # make_thunk will try by default C code, otherwise
1829                # it fall back to python.
1830                thunks += [node.op.make_thunk(node,
1831                                              storage_map,
1832                                              compute_map,
1833                                              no_recycling)]
1834                thunks[-1].inputs = [storage_map[v] for v in node.inputs]
1835                thunks[-1].outputs = [storage_map[v] for v in node.outputs]
1836
1837            for node in order:
1838                if self.allow_gc:
1839                    post_thunk_old_storage.append(
1840                        [storage_map[input] for input in node.inputs
1841                         if ((input in computed) and
1842                             (input not in fgraph.outputs) and
1843                             node == last_user[input])])
1844
1845            if no_recycling is True:
1846                no_recycling = list(storage_map.values())
1847                no_recycling = utils.difference(no_recycling, input_storage)
1848            else:
1849                no_recycling = [storage_map[r]
1850                                for r in no_recycling if r not in fgraph.inputs]
1851
1852            f = link.streamline(fgraph, thunks, order,
1853                                post_thunk_old_storage,
1854                                no_recycling=no_recycling,
1855                                nice_errors=self.nice_errors)
1856
1857            f.allow_gc = self.allow_gc
1858
1859        finally:
1860            # Release lock on compilation directory.
1861            if getattr(get_lock, "n_lock", 0) > orig_n_lock:
1862                release_lock()
1863                assert get_lock.n_lock == orig_n_lock
1864
1865        return (f,
1866                [link.Container(input, storage)
1867                 for input, storage in izip(fgraph.inputs, input_storage)],
1868                [link.Container(output, storage, True)
1869                 for output, storage in izip(fgraph.outputs, output_storage)],
1870                thunks,
1871                order)
1872
1873
1874def _default_checker(x, y):
1875    """
1876    Default checker for DualLinker. This checks that the
1877    variables contain the same data using ==.
1878
1879
1880    Parameters:
1881    ----------
1882    x,y
1883        the variables to compare data
1884    """
1885    if x[0] != y[0]:
1886        raise Exception("Output mismatch.",
1887                        {'performlinker': x[0], 'clinker': y[0]})
1888
1889
1890class DualLinker(link.Linker):
1891    """
1892    Runs the fgraph in parallel using PerformLinker and CLinker.
1893
1894    The thunk/function produced by DualLinker uses PerformLinker as the
1895    "main" implementation: the inputs and outputs are fed to/taken from
1896    the Ops' perform. However, DualLinker also instantiates a copy of
1897    the fgraph on which it runs OpWiseCLinker. At each step, the variables
1898    of perform and of the C implementation are verified using a checker
1899    function.
1900
1901    """
1902
1903    def __init__(self, checker=_default_checker, schedule=None):
1904        """
1905        Initialize a DualLinker.
1906
1907        The checker argument must be a function that takes two lists
1908        of length 1. The first one passed will contain the output
1909        computed by PerformLinker and the second one the output
1910        computed by OpWiseCLinker. The checker should compare the data
1911        fields of the two variables to see if they match. By default,
1912        DualLinker uses ==. A custom checker can be provided to
1913        compare up to a certain error tolerance.
1914
1915        If a mismatch occurs, the checker should raise an exception to
1916        halt the computation. If it does not, the computation will
1917        carry on and errors will snowball. The checker can sidestep
1918        the problem by fiddling with the data, but it should be
1919        careful not to share data between the two outputs (or inplace
1920        operations that use them will interfere).
1921
1922        no_recycling can contain a list of Variables that belong to the fgraph.
1923        If a Variable is in no_recycling, CLinker will clear the output storage
1924        associated to it during the computation (to avoid reusing it).
1925
1926        """
1927        self.fgraph = None
1928        self.checker = checker
1929        if schedule:
1930            self.schedule = schedule
1931
1932    def accept(self, fgraph, no_recycling=None, profile=None):
1933        """
1934        Update/tie self with fgraph
1935        """
1936        if no_recycling is None:
1937            no_recycling = []
1938        if self.fgraph is not None and self.fgraph is not fgraph:
1939            return type(self)(self.checker, self.schedule).accept(
1940                fgraph, no_recycling, profile)
1941        self.fgraph = fgraph
1942        self.no_recycling = no_recycling
1943        return self
1944
1945    def make_thunk(self, **kwargs):
1946        """
1947        Compiles this linker's fgraph and returns a function to perform the
1948        computations
1949        """
1950        fgraph = self.fgraph
1951        no_recycling = self.no_recycling
1952
1953        _f, i1, o1, thunks1, order1 = (
1954            link.PerformLinker(schedule=self.schedule).accept(
1955                fgraph, no_recycling=no_recycling).make_all(**kwargs))
1956        kwargs.pop('input_storage', None)
1957        _f, i2, o2, thunks2, order2 = (
1958            OpWiseCLinker(schedule=self.schedule).accept(
1959                fgraph, no_recycling=no_recycling).make_all(**kwargs))
1960
1961        def f():
1962            for input1, input2 in izip(i1, i2):
1963                # Set the inputs to be the same in both branches.
1964                # The copy is necessary in order for inplace ops not to
1965                # interfere.
1966                input2.storage[0] = copy(input1.storage[0])
1967            for thunk1, thunk2, node1, node2 in izip(thunks1, thunks2,
1968                                                     order1, order2):
1969                for output, storage in izip(node1.outputs, thunk1.outputs):
1970                    if output in no_recycling:
1971                        storage[0] = None
1972                for output, storage in izip(node2.outputs, thunk2.outputs):
1973                    if output in no_recycling:
1974                        storage[0] = None
1975                try:
1976                    thunk1()
1977                    thunk2()
1978                    for output1, output2 in izip(thunk1.outputs,
1979                                                 thunk2.outputs):
1980                        self.checker(output1, output2)
1981                except Exception:
1982                    link.raise_with_op(node1)
1983
1984        return f, i1, o1
1985
1986
1987class HideC(object):
1988    def __hide(*args):
1989        raise utils.MethodNotDefined()
1990
1991    c_code = __hide
1992    c_code_cleanup = __hide
1993
1994    c_headers = __hide
1995    c_header_dirs = __hide
1996    c_libraries = __hide
1997    c_lib_dirs = __hide
1998
1999    c_support_code = __hide
2000    c_support_code_apply = __hide
2001
2002    c_compile_args = __hide
2003    c_no_compile_args = __hide
2004    c_init_code = __hide
2005    c_init_code_apply = __hide
2006
2007    c_init_code_struct = __hide
2008    c_support_code_struct = __hide
2009    c_cleanup_code_struct = __hide
2010
2011    def c_code_cache_version(self):
2012        return ()
2013
2014    def c_code_cache_version_apply(self, node):
2015        return self.c_code_cache_version()
2016