1from __future__ import absolute_import, print_function, division
2
3import os
4import shutil
5import sys
6from tempfile import mkdtemp
7import time
8import unittest
9import copy
10from collections import OrderedDict
11
12import six.moves.cPickle as pickle
13from six.moves import xrange
14import numpy as np
15from nose.plugins.skip import SkipTest
16from nose.tools import assert_raises
17from nose.tools import raises
18from numpy.testing import dec
19
20import theano
21import theano.sandbox.rng_mrg
22from theano import tensor
23from theano.compile.pfunc import rebuild_collect_shared
24from theano.tests import unittest_tools as utt
25import theano.scalar.sharedvar
26from theano.scan_module.scan_op import Scan
27from theano.compat import PY3
28from theano.tests.unittest_tools import attr
29
30
31'''
32  Questions and notes about scan that should be answered :
33
34   * Scan seems to do copies of every input variable. Is that needed?
35   answer : probably not, but it doesn't hurt also ( what we copy is
36   theano variables, which just cary information about the type / dimension
37   of the data)
38
39
40   * There is some of scan functionality that is not well documented
41'''
42
43if theano.config.mode == 'FAST_COMPILE':
44    mode_with_opt = theano.compile.mode.get_mode('FAST_RUN')
45else:
46    mode_with_opt = theano.compile.mode.get_default_mode()
47if theano.config.mode in ('DEBUG_MODE', 'DebugMode'):
48    mode_nodebug = theano.compile.mode.get_mode('FAST_RUN')
49else:
50    mode_nodebug = mode_with_opt
51
52
53type_eps = {'float64': 1e-7,
54            'float32': 3e-3}
55
56
57class multiple_outputs_numeric_grad:
58    """WRITEME"""
59    def __init__(self, f, pt, ndarray_mask=None, eps=None):
60        """
61        Return the gradient of f at pt.
62
63        This function computes the gradient by a one-sided finite differences
64        of a fixed step size (eps).
65
66        It is assumed that f(...) will return a scalar.
67        :param eps: the stepsize for the finite differencing. None means
68        input dtype-dependent. See `type_eps`.
69        """
70
71        def prod(inputs):
72            rval = 1
73            for i in inputs:
74                rval *= i
75            return rval
76        packed_pt = False
77        if not isinstance(pt, (list, tuple)):
78            pt = [pt]
79            packed_pt = True
80
81        # This mask tells us if we are dealing with an ndarray input or
82        # something else ( a random state ? ) with which we shouldn't really
83        # mess up
84        if not ndarray_mask:
85            ndarray_mask = [True for x in pt]
86
87        dtype_eps = type_eps['float64']
88
89        for i, p in enumerate(pt):
90            if ndarray_mask[i]:
91                pt[i] = np.array(p)
92                _eps = type_eps[str(pt[i].dtype)]
93                if _eps > dtype_eps:
94                    dtype_eps = _eps
95
96        self.ndarray_mask = ndarray_mask
97        # '''
98        # Compute clean output:
99        f_x = f(*pt)
100        gx = []
101        # now iterate over the elements of x and call f on those + delta x
102        for i in xrange(len(pt)):
103            if ndarray_mask[i]:
104                # It is a ndarray that we can tweak
105                if eps:
106                    _eps = eps
107                else:
108                    _eps = dtype_eps
109                if pt[i].ndim:
110                    _g = []
111                    # it has several dimensions:
112                    for pos in xrange(prod(pt[i].shape)):
113                        t = pt[i].copy()
114                        t = t.flatten()
115                        t[pos] += _eps
116                        t = t.reshape(pt[i].shape)
117                        f_eps = f(*(pt[:i] + [t] + pt[i + 1:]))
118                        _g.append(np.asarray((f_eps - f_x) / _eps))
119                    gx.append(np.asarray(_g).reshape(pt[i].shape))
120                else:
121                    t = np.array(pt[i] + _eps)
122                    f_eps = f(*(pt[:i] + [t] + pt[i + 1:]))
123                    gx.append(np.asarray((f_eps - f_x) / _eps))
124        self.gx = gx
125
126    @staticmethod
127    def abs_rel_err(a, b, eps=1.0e-10):
128        """
129        Return a small number when a and b are close, relative to how big they are
130        """
131        return abs(a - b) / (abs(a) + abs(b) + eps)
132
133    def max_err(self, _g_pt):
134        """
135        Return the biggest relative error between g_pt and self.gx
136        """
137        g_pt = []
138        for i in xrange(len(_g_pt)):
139            if self.ndarray_mask[i]:
140                g_pt.append(_g_pt[i])
141            elif isinstance(_g_pt[i], np.ndarray):
142                assert np.all(_g_pt[i] == 0)
143        if len(g_pt) != len(self.gx):
144            raise ValueError('argument has wrong number of elements',
145                             len(g_pt))
146        errs = []
147
148        for i, (a, b) in enumerate(zip(g_pt, self.gx)):
149            if a.shape != b.shape:
150                raise ValueError('argument element %i has wrong shape %s' %
151                                 (i, str((a.shape, b.shape))))
152            vv = multiple_outputs_numeric_grad.abs_rel_err(a, b)
153            errs.append(np.max(
154                multiple_outputs_numeric_grad.abs_rel_err(a, b)))
155        if np.all(np.isfinite(errs)):
156            return np.max(errs), np.argmax(errs)
157        else:
158            return np.inf, 0
159
160
161# TODO: Test this function, and if it works,
162# use it with the normal verify_grad rather than the
163# copy-and-pasted one above.
164# Also - add a reference to this technique in the
165# verify_grad method so that other ops with multiple outputs can be tested.
166# DONE - rp
167def scan_project_sum(*args, **kwargs):
168    rng = theano.tensor.shared_randomstreams.RandomStreams(123)
169    scan_outputs, updates = theano.scan(*args, **kwargs)
170    if type(scan_outputs) not in [list, tuple]:
171        scan_outputs = [scan_outputs]
172    # we should ignore the random-state updates so that
173    # the uniform numbers are the same every evaluation and on every call
174    rng.add_default_updates = False
175    factors = [rng.uniform(size=s.shape, low=0.1, high=0.9) for s
176               in scan_outputs]
177    # Random values (?)
178    return (sum([(s * f).sum() for s, f in zip(scan_outputs, factors)]),
179            updates)
180
181
182def asarrayX(value):
183    return theano._asarray(value, dtype=theano.config.floatX)
184
185
186def clone_optimized_graph(f):
187    maker_ins = [x for x in f.maker.fgraph.inputs
188                 if not isinstance(x, theano.tensor.sharedvar.SharedVariable)]
189    inps, outs, _ = rebuild_collect_shared(f.maker.fgraph.outputs,
190                                           maker_ins,
191                                           copy_inputs_over=False)
192    ins = [x for x in inps
193           if not isinstance(x, theano.tensor.sharedvar.SharedVariable)]
194    return (ins, outs)
195
196
197def grab_scan_node(output):
198    if output.owner is None:
199        return None
200    if output.owner.op.__class__.__name__ == 'Scan':
201        return [output.owner]
202    rval = []
203    for i in output.owner.inputs:
204        ri = grab_scan_node(i)
205        if ri is not None:
206            rval += ri
207    if rval is []:
208        return None
209    else:
210        return rval
211
212
213def scan_nodes_from_fct(fct):
214    nodes = fct.maker.fgraph.toposort()
215    scan_nodes = [n for n in nodes if isinstance(n.op, Scan)]
216    return scan_nodes
217
218
219class T_Scan(unittest.TestCase):
220
221    def setUp(self):
222        utt.seed_rng()
223        super(T_Scan, self).setUp()
224
225    # generator network, only one output , type scalar ; no sequence or
226    # non sequence arguments
227    @dec.skipif(
228        isinstance(theano.compile.mode.get_default_mode(),
229                   theano.compile.debugmode.DebugMode),
230        ("This test fails in DebugMode, because it is not yet picklable."))
231    def test_pickling(self):
232        def f_pow2(x_tm1):
233            return 2 * x_tm1
234
235        state = theano.tensor.scalar('state')
236        n_steps = theano.tensor.iscalar('nsteps')
237        output, updates = theano.scan(f_pow2,
238                                      [],
239                                      state,
240                                      [],
241                                      n_steps=n_steps,
242                                      truncate_gradient=-1,
243                                      go_backwards=False)
244        _my_f = theano.function([state, n_steps],
245                                output,
246                                updates=updates,
247                                allow_input_downcast=True)
248
249        # TESTING PICKLE-ing this function
250        origdir = os.getcwd()
251        tmpdir = None
252        try:
253            tmpdir = mkdtemp()
254            os.chdir(tmpdir)
255
256            with open('tmp_scan_test_pickle.pkl', 'wb') as f_out:
257                pickle.dump(_my_f, f_out, protocol=-1)
258            with open('tmp_scan_test_pickle.pkl', 'rb') as f_in:
259                my_f = pickle.load(f_in)
260        finally:
261            # Get back to the original dir, and delete the temporary one.
262            os.chdir(origdir)
263            if tmpdir is not None:
264                shutil.rmtree(tmpdir)
265
266        rng = np.random.RandomState(utt.fetch_seed())
267        state = rng.uniform()
268        steps = 5
269
270        numpy_values = np.array([state * (2 ** (k + 1)) for k
271                                    in xrange(steps)])
272        theano_values = my_f(state, steps)
273        utt.assert_allclose(numpy_values, theano_values)
274
275    # Test that the inner input_storage and output_storage are
276    # properly cleared
277    def test_inner_storage_leak(self):
278        def f_pow2(x_tm1):
279            return 2 * x_tm1
280
281        state = theano.tensor.scalar('state')
282        n_steps = theano.tensor.iscalar('nsteps')
283        output, updates = theano.scan(f_pow2,
284                                      [],
285                                      state,
286                                      [],
287                                      n_steps=n_steps)
288
289        f = theano.function([state, n_steps],
290                            output,
291                            updates=updates,
292                            allow_input_downcast=True)
293
294        scan_node = [node for node in f.maker.fgraph.toposort()
295                     if isinstance(node.op, Scan)]
296
297        assert len(scan_node) == 1
298        scan_node = scan_node[0]
299
300        # Make sure they start out as None
301        assert all(i.value is None for i in scan_node.op.fn.input_storage)
302        assert all(o.value is None for o in scan_node.op.fn.output_storage)
303
304        rng = np.random.RandomState(utt.fetch_seed())
305        state = rng.uniform()
306        steps = 5
307
308        f(state, steps)
309
310        # And that they stay that way
311        assert all(i.value is None for i in scan_node.op.fn.input_storage)
312        assert all(o.value is None for o in scan_node.op.fn.output_storage)
313
314    # generator network, only one output , type scalar ; no sequence or
315    # non sequence arguments
316    def test_generator_one_output_scalar(self):
317        def f_pow2(x_tm1):
318            return 2 * x_tm1
319
320        state = theano.tensor.scalar('state')
321        n_steps = theano.tensor.iscalar('nsteps')
322        # Test return_list at the same time.
323        output, updates = theano.scan(f_pow2,
324                                      [],
325                                      state,
326                                      [],
327                                      n_steps=n_steps,
328                                      truncate_gradient=-1,
329                                      return_list=True,
330                                      go_backwards=False)
331        my_f = theano.function([state, n_steps],
332                               output,
333                               updates=updates,
334                               allow_input_downcast=True)
335
336        rng = np.random.RandomState(utt.fetch_seed())
337        state = rng.uniform()
338        steps = 5
339
340        numpy_values = np.array([state * (2 ** (k + 1)) for k
341                                    in xrange(steps)])
342        theano_values = my_f(state, steps)
343        utt.assert_allclose(numpy_values, theano_values[0])
344
345    def test_subtensor_multiple_slices(self):
346        # This addresses a bug reported by Matthias Zoehrer
347        # the bug happens when you have multiple subtensors on the output of
348        # scan (the bug requires the reshape to be produced, and it has
349        # which has something to do with how the subtensors overlap
350        def f_pow2(x_tm1):
351            return 2 * x_tm1
352
353        state = theano.tensor.vector('state')
354        n_steps = theano.tensor.iscalar('nsteps')
355        output, updates = theano.scan(f_pow2,
356                                      [],
357                                      state,
358                                      [],
359                                      n_steps=n_steps,
360                                      truncate_gradient=-1,
361                                      go_backwards=False)
362        nw_shape = tensor.ivector('nw_shape')
363        # Note that the output is reshaped to 3 dimensional tensor, and
364        my_f = theano.function([state, n_steps, nw_shape],
365                               [tensor.reshape(output, nw_shape, ndim=3)[:-2],
366                                output[:-4]],
367                               updates=updates,
368                               allow_input_downcast=True)
369        nodes = [x for x in my_f.maker.fgraph.toposort()
370                 if isinstance(x.op, theano.scan_module.scan_op.Scan)]
371        # This assertion fails if savemem optimization failed on scan
372        if theano.config.mode != "FAST_COMPILE":
373            assert nodes[0].op._scan_savemem_visited
374        rng = np.random.RandomState(utt.fetch_seed())
375        my_f(rng.uniform(size=(3,)),
376             4,
377             np.int64([2, 2, 3]))
378
379    @attr('slow')
380    def test_only_nonseq_inputs(self):
381        # Compile the Theano function
382        n_steps = 2
383        inp = tensor.matrix()
384        broadcasted_inp, _ = theano.scan(lambda x: x,
385                                         non_sequences=[inp],
386                                         n_steps=n_steps)
387        out = broadcasted_inp.sum()
388        gr = tensor.grad(out, inp)
389        fun = theano.function([inp], [broadcasted_inp, gr])
390
391        # Execute the Theano function and compare outputs to the expected outputs
392        inputs = np.array([[1, 2], [3, 4]], dtype=theano.config.floatX)
393        expected_out1 = np.repeat(inputs[None], n_steps, axis=0)
394        expected_out2 = np.ones(inputs.shape, dtype="int8") * n_steps
395
396        out1, out2 = fun(inputs)
397        utt.assert_allclose(out1, expected_out1)
398        utt.assert_allclose(out2, expected_out2)
399
400    # simple rnn, one input, one state, weights for each; input/state
401    # are vectors, weights are scalars
402    def test_one_sequence_one_output_weights(self):
403        def f_rnn(u_t, x_tm1, W_in, W):
404            return u_t * W_in + x_tm1 * W
405
406        u = theano.tensor.vector('u')
407        x0 = theano.tensor.scalar('x0')
408        W_in = theano.tensor.scalar('win')
409        W = theano.tensor.scalar('w')
410
411        output, updates = theano.scan(f_rnn,
412                                      u,
413                                      x0,
414                                      [W_in, W],
415                                      n_steps=None,
416                                      truncate_gradient=-1,
417                                      go_backwards=False)
418
419        f2 = theano.function([u, x0, W_in, W],
420                             output,
421                             updates=updates,
422                             allow_input_downcast=True)
423        # get random initial values
424        rng = np.random.RandomState(utt.fetch_seed())
425        v_u = rng.uniform(size=(4,), low=-5., high=5.)
426        v_x0 = rng.uniform()
427        W = rng.uniform()
428        W_in = rng.uniform()
429
430        # compute the output in numpy
431        v_out = np.zeros((4,))
432        v_out[0] = v_u[0] * W_in + v_x0 * W
433        for step in xrange(1, 4):
434            v_out[step] = v_u[step] * W_in + v_out[step - 1] * W
435        theano_values = f2(v_u, v_x0, W_in, W)
436        utt.assert_allclose(theano_values, v_out)
437
438    # simple rnn, one input, one state, weights for each; input/state
439    # are vectors, weights are scalars; using shared variables
440    def test_one_sequence_one_output_weights_shared(self):
441        rng = np.random.RandomState(utt.fetch_seed())
442        u = theano.tensor.vector('u')
443        x0 = theano.tensor.scalar('x0')
444        W_in = theano.shared(asarrayX(rng.uniform()), name='w_in')
445        W = theano.shared(asarrayX(rng.uniform()), name='w')
446
447        def f_rnn_shared(u_t, x_tm1, tmp_W_in, tmp_W):
448            return u_t * tmp_W_in + x_tm1 * tmp_W
449
450        output, updates = theano.scan(f_rnn_shared,
451                                      u,
452                                      x0,
453                                      [W_in, W],
454                                      n_steps=None,
455                                      truncate_gradient=-1,
456                                      go_backwards=False)
457        f3 = theano.function([u, x0],
458                             output,
459                             updates=updates,
460                             allow_input_downcast=True)
461        # get random initial values
462
463        v_u = rng.uniform(size=(4,), low=-5., high=5.)
464        v_x0 = rng.uniform()
465        # compute the output i numpy
466        v_out = np.zeros((4,))
467        v_out[0] = v_u[0] * W_in.get_value() + v_x0 * W.get_value()
468        for step in xrange(1, 4):
469            v_out[step] = (v_u[step] * W_in.get_value() +
470                           v_out[step - 1] * W.get_value())
471
472        theano_values = f3(v_u, v_x0)
473        assert np.allclose(theano_values, v_out)
474
475    # some rnn with multiple outputs and multiple inputs; other
476    # dimension instead of scalars/vectors
477    def test_multiple_inputs_multiple_outputs(self):
478        rng = np.random.RandomState(utt.fetch_seed())
479        vW_in2 = asarrayX(rng.uniform(size=(2,), low=-5., high=5.))
480        vW = asarrayX(rng.uniform(size=(2, 2), low=-5., high=5.))
481        vWout = asarrayX(rng.uniform(size=(2,), low=-5., high=5.))
482        vW_in1 = asarrayX(rng.uniform(size=(2, 2), low=-5., high=5.))
483        v_u1 = asarrayX(rng.uniform(size=(3, 2), low=-5., high=5.))
484        v_u2 = asarrayX(rng.uniform(size=(3,), low=-5., high=5.))
485        v_x0 = asarrayX(rng.uniform(size=(2,), low=-5., high=5.))
486        v_y0 = asarrayX(rng.uniform())
487
488        W_in2 = theano.shared(vW_in2, name='win2')
489        W = theano.shared(vW, name='w')
490        W_out = theano.shared(vWout, name='wout')
491        W_in1 = theano.tensor.matrix('win')
492        u1 = theano.tensor.matrix('u1')
493        u2 = theano.tensor.vector('u2')
494        x0 = theano.tensor.vector('x0')
495        y0 = theano.tensor.scalar('y0')
496
497        def f_rnn_cmpl(u1_t, u2_t, x_tm1, y_tm1, W_in1):
498            return [theano.dot(u1_t, W_in1) + u2_t * W_in2 +
499                    theano.dot(x_tm1, W), theano.dot(x_tm1, W_out)]
500
501        outputs, updates = theano.scan(f_rnn_cmpl,
502                                       [u1, u2],
503                                       [x0, y0],
504                                       W_in1,
505                                       n_steps=None,
506                                       truncate_gradient=-1,
507                                       go_backwards=False)
508
509        f4 = theano.function([u1, u2, x0, y0, W_in1],
510                             outputs,
511                             updates=updates,
512                             allow_input_downcast=True)
513
514        # compute the values in numpy
515        v_x = np.zeros((3, 2), dtype=theano.config.floatX)
516        v_y = np.zeros((3,), dtype=theano.config.floatX)
517        v_x[0] = (np.dot(v_u1[0], vW_in1) + v_u2[0] * vW_in2 +
518                  np.dot(v_x0, vW))
519        v_y[0] = np.dot(v_x0, vWout)
520        for i in xrange(1, 3):
521            v_x[i] = (np.dot(v_u1[i], vW_in1) + v_u2[i] * vW_in2 +
522                      np.dot(v_x[i - 1], vW))
523            v_y[i] = np.dot(v_x[i - 1], vWout)
524
525        (theano_x, theano_y) = f4(v_u1, v_u2, v_x0, v_y0, vW_in1)
526        utt.assert_allclose(theano_x, v_x)
527        utt.assert_allclose(theano_y, v_y)
528
529    def test_multiple_outs_taps(self):
530        l = 5
531        rng = np.random.RandomState(utt.fetch_seed())
532        vW_in2 = asarrayX(rng.uniform(size=(2,), low=-.2, high=.2))
533        vW = asarrayX(rng.uniform(size=(2, 2), low=-.2, high=.2))
534        vWout = asarrayX(rng.uniform(size=(2,), low=-.2, high=.2))
535        vW_in1 = asarrayX(rng.uniform(size=(2, 2), low=-.2, high=.2))
536        v_u1 = asarrayX(rng.uniform(size=(l, 2), low=-.2, high=.2))
537        v_u2 = asarrayX(rng.uniform(size=(l + 2, 2), low=-.2, high=.2))
538        v_x0 = asarrayX(rng.uniform(size=(2,), low=-.2, high=.2))
539        v_y0 = asarrayX(rng.uniform(size=(3,)))
540
541        W_in2 = theano.shared(vW_in2, name='win2')
542        W = theano.shared(vW, name='w')
543        W_out = theano.shared(vWout, name='wout')
544        W_in1 = theano.tensor.matrix('win')
545        u1 = theano.tensor.matrix('u1')
546        u2 = theano.tensor.matrix('u2')
547        x0 = theano.tensor.vector('x0')
548        y0 = theano.tensor.vector('y0')
549
550        def f_rnn_cmpl(u1_t,
551                       u2_tm1,
552                       u2_t,
553                       u2_tp1,
554                       x_tm1,
555                       y_tm1,
556                       y_tm3,
557                       W_in1):
558            return [theano.dot(u1_t, W_in1) +
559                    (u2_t + u2_tm1 * u2_tp1) * W_in2 +
560                    theano.dot(x_tm1, W),
561                    (y_tm1 + y_tm3) * theano.dot(x_tm1, W_out),
562                    theano.dot(u1_t, W_in1)]
563
564        outputs, updates = theano.scan(f_rnn_cmpl,
565                                       [u1, dict(input=u2, taps=[-1, 0, 1])],
566                                       [x0, dict(initial=y0, taps=[-1, -3]),
567                                        None],
568                                       W_in1,
569                                       n_steps=None,
570                                       truncate_gradient=-1,
571                                       go_backwards=False)
572
573        f = theano.function([u1, u2, x0, y0, W_in1],
574                            outputs,
575                            updates=updates,
576                            allow_input_downcast=True)
577        theano_out = f(v_u1,
578                       v_u2,
579                       v_x0,
580                       v_y0,
581                       vW_in1)
582
583        ny0 = np.zeros((5, 2))
584        ny1 = np.zeros((5,))
585        ny2 = np.zeros((5, 2))
586        ny0[0] = np.dot(v_u1[0], vW_in1) + \
587                (v_u2[1] + v_u2[0] * v_u2[2]) * vW_in2 + np.dot(v_x0, vW)
588
589        ny1[0] = (v_y0[2] + v_y0[0]) * np.dot(v_x0, vWout)
590        ny2[0] = np.dot(v_u1[0], vW_in1)
591
592        ny0[1] = np.dot(v_u1[1], vW_in1) + \
593                (v_u2[2] + v_u2[1] * v_u2[3]) * vW_in2 + np.dot(ny0[0], vW)
594
595        ny1[1] = (ny1[0] + v_y0[1]) * np.dot(ny0[0], vWout)
596        ny2[1] = np.dot(v_u1[1], vW_in1)
597
598        ny0[2] = np.dot(v_u1[2], vW_in1) + \
599                (v_u2[3] + v_u2[2] * v_u2[4]) * vW_in2 + \
600                np.dot(ny0[1], vW)
601        ny1[2] = (ny1[1] + v_y0[2]) * np.dot(ny0[1], vWout)
602        ny2[2] = np.dot(v_u1[2], vW_in1)
603
604        ny0[3] = np.dot(v_u1[3], vW_in1) + \
605                           (v_u2[4] + v_u2[3] * v_u2[5]) * vW_in2 + \
606                           np.dot(ny0[2], vW)
607
608        ny1[3] = (ny1[2] + ny1[0]) * np.dot(ny0[2], vWout)
609        ny2[3] = np.dot(v_u1[3], vW_in1)
610
611        ny0[4] = np.dot(v_u1[4], vW_in1) + \
612                           (v_u2[5] + v_u2[4] * v_u2[6]) * vW_in2 + \
613                           np.dot(ny0[3], vW)
614
615        ny1[4] = (ny1[3] + ny1[1]) * np.dot(ny0[3], vWout)
616        ny2[4] = np.dot(v_u1[4], vW_in1)
617
618    def test_using_taps_sequence(self):
619        # this test refers to a bug reported by Nicolas
620        # Boulanger-Lewandowski June 6th
621        x = theano.tensor.dvector()
622        y, updates = theano.scan(lambda x: [x],
623                                 sequences=dict(input=x, taps=[-1]),
624                                 outputs_info=[None])
625        inp = np.arange(5).astype('float64')
626        rval = theano.function([x], y, updates=updates)(inp)
627        assert np.all(rval == inp[:-1])
628
629    def test_using_negative_taps_sequence(self):
630        # This test refers to a bug reported on github on May 22 2015 by
631        # user june-qijun
632        def lp(x, x2):
633            return x
634        x = tensor.fvector('x')
635        res, upd = theano.scan(lp,
636                               sequences=dict(input=x, taps=[-2, -1]))
637        f = theano.function([x], res, updates = upd)
638
639        output =  f([1, 2, 3, 4, 5])
640        expected_output = np.array([1, 2, 3], dtype="float32")
641        utt.assert_allclose(output, expected_output)
642
643    def test_connection_pattern(self):
644        # Test connection_pattern() in the presence of recurrent outputs
645        # with multiple taps.
646        #
647        # This test refers to a bug signaled on the theano-users mailing list
648        # on March 10 2015 by David Schneider-Joseph.
649
650        def fn(a_m2, a_m1, b_m2, b_m1):
651            return a_m1, b_m1
652
653        a0 = theano.shared(np.arange(2))
654        b0 = theano.shared(np.arange(2))
655
656        (a, b), _ = theano.scan(fn,
657                        outputs_info=[{'initial': a0, 'taps': [-2, -1]},
658                                      {'initial': b0, 'taps': [-2, -1]}],
659                        n_steps=2)
660
661        tensor.grad(a[-1], a0)
662
663        # Also validate that the mappings outer_inp_from_outer_out and
664        # outer_inp_from_inner_inp produce the correct results
665        scan_node = a.owner.inputs[0].owner
666
667        result = scan_node.op.var_mappings['outer_inp_from_outer_out']
668        expected_result = {0: 1, 1: 2}
669        assert(result == expected_result)
670
671        result = scan_node.op.var_mappings['outer_inp_from_inner_inp']
672        expected_result = {0: 1, 1: 1, 2: 2, 3: 2}
673        assert(result == expected_result)
674
675    def test_connection_pattern2(self):
676        # This tests for a crash in connection_pattern() when a scan node
677        # has more than one mitmot (multiple input taps as well as
678        # multiple output taps) output
679
680        x = tensor.matrix()
681        seq = tensor.vector()
682
683        def inner_fct(seq, state_old, state_current):
684            state_next = state_old * 2 + state_current + seq
685            return state_next
686
687        out, _ = theano.scan(inner_fct, sequences=seq,
688                            outputs_info={'initial':x, 'taps':[-2,-1]})
689
690        g_out = theano.grad(out.sum(), [seq, x])
691
692        scan_node = g_out[0].owner.inputs[1].owner.inputs[1].owner.inputs[0].owner
693        connection_pattern = scan_node.op.connection_pattern(scan_node)
694
695        # Also validate that the mappings outer_inp_from_outer_out and
696        # outer_inp_from_inner_inp produce the correct results
697        scan_node = out.owner.inputs[0].owner
698
699        result = scan_node.op.var_mappings['outer_inp_from_outer_out']
700        expected_result = {0: 2}
701        assert(result == expected_result)
702
703        result = scan_node.op.var_mappings['outer_inp_from_inner_inp']
704        expected_result = {0: 1, 1: 2, 2: 2}
705        assert(result == expected_result)
706
707    def test_grad_grad_mitsot_sitsot(self):
708        # Test for an index error when taking the second derivative
709        # through a Scan node with one sitsot and one mitsot.
710
711        def inner_fct(mitsot_m2, mitsot_m1, sitsot):
712            total = mitsot_m2 + mitsot_m1 + sitsot
713            output = total ** 1.05
714            return output, output
715
716        inputs = [tensor.matrix(), tensor.vector()]
717        outputs_info = [dict(initial=inputs[0], taps=[-2, -1]), inputs[1]]
718
719        scan_outputs, updates = theano.scan(fn=inner_fct,
720                                            outputs_info=outputs_info,
721                                            n_steps=5)
722
723        # Take the gradient of each output wrt its corresponding initial state
724        gradients = [theano.grad(scan_outputs[0].sum(), inputs[0]),
725                     theano.grad(scan_outputs[1].sum(), inputs[1])]
726
727        # Take the gradient of the sum of gradients wrt the inputs
728        sum_of_grads = sum([g.sum() for g in gradients])
729        second_gradients = theano.grad(sum_of_grads, inputs[0])
730
731    def test_verify_second_grad_sitsot(self):
732
733        def get_sum_of_grad(inp):
734
735            scan_outputs, updates = theano.scan(fn=lambda x: x * 2,
736                                                outputs_info=[inp],
737                                                n_steps=5)
738
739            # Take the gradient of each output wrt its corresponding initial
740            # state
741            return theano.grad(scan_outputs.sum(), inp).sum()
742
743        # Call verify_grad to ensure the correctness of the second gradients
744        floatX = theano.config.floatX
745        inputs_test_values = [np.random.random((3)).astype(floatX)]
746        theano.tests.unittest_tools.verify_grad(get_sum_of_grad,
747                                                inputs_test_values)
748
749    def test_verify_second_grad_mitsot1(self):
750
751        def inner_fct(mitsot_m2, sitsot):
752            total = mitsot_m2 + sitsot
753            output = total ** 1.02
754            return output, output
755
756        def get_sum_of_grad(input0, input1):
757            outputs_info = [dict(initial=input0, taps=[-2]), input1]
758
759            scan_outputs, updates = theano.scan(fn=inner_fct,
760                                                outputs_info=outputs_info,
761                                                n_steps=3)
762
763            # Take the gradient of each output wrt its corresponding initial
764            # state
765            gradients = [theano.grad(scan_outputs[0].sum(), input0),
766                         theano.grad(scan_outputs[1].sum(), input1)]
767
768            return gradients[0].sum() + gradients[1].sum()
769
770        # Call verify_grad to ensure the correctness of the second gradients
771        floatX = theano.config.floatX
772        inputs_test_values = [np.random.random((2, 3)).astype(floatX),
773                              np.random.random((3)).astype(floatX)]
774        theano.tests.unittest_tools.verify_grad(get_sum_of_grad,
775                                                inputs_test_values)
776
777    def test_grad_two_scans(self):
778
779        # data input & output
780        x = tensor.tensor3('x')
781        t = tensor.imatrix('t')
782
783        # forward pass
784        W = theano.shared(
785            np.random.randn(2, 2).astype('float32'),
786            name="W", borrow=True)
787
788        def forward_scanner(x_t):
789            a2_t = tensor.dot(x_t, W)
790            y_t = tensor.nnet.softmax_graph(a2_t)
791            return y_t
792
793        y, _ = theano.scan(fn=forward_scanner, sequences=x,
794                           outputs_info=[None])
795
796        # loss function
797        def error_scanner(y_t, t_t):
798            return tensor.mean(tensor.nnet.categorical_crossentropy(y_t, t_t))
799
800        L, _ = theano.scan(fn=error_scanner, sequences=[y, t],
801                           outputs_info=[None])
802        L = tensor.mean(L)
803
804        # backward pass
805        gW = tensor.grad(L, [W])
806
807    # simple rnn, one input, one state, weights for each; input/state are
808    # vectors, weights are scalars; using shared variables and past
809    # taps (sequences and outputs)
810    def test_using_taps_input_output(self):
811        rng = np.random.RandomState(utt.fetch_seed())
812        vW = asarrayX(rng.uniform())
813        vW_in = asarrayX(rng.uniform())
814        vu = asarrayX(rng.uniform(size=(4,), low=-5., high=5.))
815        vx0 = asarrayX(rng.uniform(size=(2,), low=-5., high=5.))
816
817        u = theano.tensor.vector('u')
818        x0 = theano.tensor.vector('x0')
819        W_in = theano.shared(vW_in, name='w_in')
820        W = theano.shared(vW, name='w')
821
822        def f_rnn_shared(u_tm2, x_tm1, x_tm2):
823            return u_tm2 * W_in + x_tm1 * W + x_tm2
824
825        outputs, updates = theano.scan(f_rnn_shared,
826                                       dict(input=u, taps=-2),
827                                       dict(initial=x0, taps=[-1, -2]),
828                                       [],
829                                       n_steps=None,
830                                       truncate_gradient=-1,
831                                       go_backwards=False)
832
833        f7 = theano.function([u, x0],
834                             outputs,
835                             updates=updates,
836                             allow_input_downcast=True)
837        theano_out = f7(vu, vx0)
838
839        # compute output in numpy
840        # a bit of explaining:
841        # due to the definition of sequences taps in scan, v_0[0] is
842        # actually v_0[-2], and v_0[1] is v_0[-1]. The values v_0[2]
843        # and v_0[3] do not get uesd ( because you do not use v_0[t]
844        # in scan) which might seem strange, but then again why not use
845        # v_0[t] instead of v_0[t-2] in a real application ??
846        # also vx0[0] corresponds to vx0[-2], vx0[1] to vx0[-1]
847        numpy_out = np.zeros((2,))
848        numpy_out[0] = vu[0] * vW_in + vx0[1] * vW + vx0[0]
849        numpy_out[1] = vu[1] * vW_in + numpy_out[0] * vW + vx0[1]
850        utt.assert_allclose(numpy_out, theano_out)
851
852    # simple rnn, one input, one state, weights for each; input/state are
853    # vectors, weights are scalars; using shared variables and past
854    # taps (sequences and outputs) and future taps for sequences
855    def test_past_future_taps_shared(self):
856        rng = np.random.RandomState(utt.fetch_seed())
857        vW = asarrayX(rng.uniform())
858        vW_in = asarrayX(rng.uniform())
859        vu = asarrayX(rng.uniform(size=(6,), low=-5., high=5.))
860        vx0 = asarrayX(rng.uniform(size=(2,), low=-5., high=5.))
861
862        u = theano.tensor.vector('u')
863        x0 = theano.tensor.vector('x0')
864        W_in = theano.shared(vW_in, name='w_in')
865        W = theano.shared(vW, name='w')
866
867        def f_rnn_shared(u_tm2, u_tp2, x_tm1, x_tm2):
868            return (u_tm2 + u_tp2) * W_in + x_tm1 * W + x_tm2
869
870        output, updates = theano.scan(f_rnn_shared,
871                                      dict(input=u, taps=[-2, 2]),
872                                      dict(initial=x0, taps=[-1, -2]),
873                                      [],
874                                      n_steps=None,
875                                      truncate_gradient=-1,
876                                      go_backwards=False)
877
878        f8 = theano.function([u, x0],
879                             output,
880                             updates=updates,
881                             allow_input_downcast=True)
882        theano_out = f8(vu, vx0)
883        # compute output in numpy
884        numpy_out = np.zeros(2)
885        # think of vu[0] as vu[-2], vu[4] as vu[2]
886        # and vx0[0] as vx0[-2], vx0[1] as vx0[-1]
887        numpy_out[0] = (vu[0] + vu[4]) * vW_in + vx0[1] * vW + vx0[0]
888        numpy_out[1] = (vu[1] + vu[5]) * vW_in + numpy_out[0] * vW + vx0[1]
889        utt.assert_allclose(numpy_out, theano_out)
890
891    # simple rnn ; compute inplace version 1
892    @utt.assertFailure_fast
893    def test_inplace1(self):
894        rng = np.random.RandomState(utt.fetch_seed())
895        vW = asarrayX(np.random.uniform())
896        vW_in = asarrayX(np.random.uniform())
897        vu0 = asarrayX(rng.uniform(size=(3,), low=-5., high=5.))
898        vu1 = asarrayX(rng.uniform(size=(3,), low=-5., high=5.))
899        vu2 = asarrayX(rng.uniform(size=(3,), low=-5., high=5.))
900        vx0 = asarrayX(rng.uniform())
901        vx1 = asarrayX(rng.uniform())
902
903        u0 = theano.tensor.vector('u0')
904        u1 = theano.tensor.vector('u1')
905        u2 = theano.tensor.vector('u2')
906        mu0 = theano.In(u0, mutable=False)
907        mu1 = theano.In(u1, mutable=True)
908        mu2 = theano.In(u2, mutable=True)
909        x0 = theano.tensor.scalar('x0')
910        x1 = theano.tensor.scalar('y0')
911        W_in = theano.shared(vW_in, 'Win')
912        W = theano.shared(vW, 'W')
913        mode = theano.compile.mode.get_mode(None).including('inplace')
914
915        def f_rnn_shared(u0_t, u1_t, u2_t, x0_tm1, x1_tm1):
916            return [u0_t * W_in + x0_tm1 * W + u1_t * u2_t,
917                    u0_t * W_in + x1_tm1 * W + u1_t + u2_t]
918
919        outputs, updates = theano.scan(f_rnn_shared,
920                                       [u0, u1, u2],
921                                       [dict(initial=x0, inplace=u2),
922                                        dict(initial=x1, inplace=u1)],
923                                       [],
924                                       n_steps=None,
925                                       truncate_gradient=-1,
926                                       go_backwards=False,
927                                       mode=mode)
928
929        f9 = theano.function([mu0, mu1, mu2, x0, x1],
930                             outputs,
931                             updates=updates,
932                             mode=mode,
933                             allow_input_downcast=True)
934        scan_node = [x for x in f9.maker.fgraph.toposort()
935                     if isinstance(x.op, theano.scan_module.scan_op.Scan)]
936        assert 0 in scan_node[0].op.destroy_map.keys()
937        assert 1 in scan_node[0].op.destroy_map.keys()
938        # compute output in numpy
939        numpy_x0 = np.zeros((3,))
940        numpy_x1 = np.zeros((3,))
941        numpy_x0[0] = vu0[0] * vW_in + vx0 * vW + vu1[0] * vu2[0]
942        numpy_x1[0] = vu0[0] * vW_in + vx1 * vW + vu1[0] + vu2[0]
943        for i in xrange(1, 3):
944            numpy_x0[i] = (vu0[i] * vW_in + numpy_x0[i - 1] * vW +
945                           vu1[i] * vu2[i])
946            numpy_x1[i] = (vu0[i] * vW_in + numpy_x1[i - 1] * vW +
947                           vu1[i] + vu2[i])
948
949        # note theano computes inplace, so call function after numpy
950        # equivalent is done
951        (theano_x0, theano_x1) = f9(vu0, vu1, vu2, vx0, vx1)
952        # assert that theano does what it should
953        utt.assert_allclose(theano_x0, numpy_x0)
954        utt.assert_allclose(theano_x1, numpy_x1)
955
956    # simple rnn ; compute inplace version 2
957    @utt.assertFailure_fast
958    def test_inplace2(self):
959        rng = np.random.RandomState(utt.fetch_seed())
960        vW = asarrayX(np.random.uniform())
961        vW_in = asarrayX(np.random.uniform())
962        vu0 = asarrayX(rng.uniform(size=(3,), low=-5., high=5.))
963        vu1 = asarrayX(rng.uniform(size=(4,), low=-5., high=5.))
964        vu2 = asarrayX(rng.uniform(size=(5,), low=-5., high=5.))
965        vx0 = asarrayX(rng.uniform())
966        vx1 = asarrayX(rng.uniform())
967
968        u0 = theano.tensor.vector('u0')
969        u1 = theano.tensor.vector('u1')
970        u2 = theano.tensor.vector('u2')
971        mu0 = theano.In(u0, mutable=True)
972        mu1 = theano.In(u1, mutable=True)
973        mu2 = theano.In(u2, mutable=True)
974        x0 = theano.tensor.scalar('x0')
975        x1 = theano.tensor.scalar('y0')
976        W_in = theano.shared(vW_in, 'Win')
977        W = theano.shared(vW, 'W')
978        mode = theano.compile.mode.get_mode(None).including('inplace')
979
980        def f_rnn_shared(u0_t,
981                         u1_t,
982                         u1_tp1,
983                         u2_tm1,
984                         u2_t,
985                         u2_tp1,
986                         x0_tm1,
987                         x1_tm1):
988            return [u0_t * W_in + x0_tm1 * W + u1_t * u1_tp1,
989                    u0_t * W_in + x1_tm1 * W + u2_tm1 + u2_t + u2_tp1]
990
991        outputs, updates = theano.scan(f_rnn_shared,
992                                       [u0,
993                                        dict(input=u1, taps=[0, 1]),
994                                        dict(input=u2, taps=[-1, 0, +1])],
995                                       [dict(initial=x0), dict(initial=x1)],
996                                       [],
997                                       n_steps=None,
998                                       truncate_gradient=-1,
999                                       go_backwards=False,
1000                                       mode=mode)
1001        f9 = theano.function([mu0, mu1, mu2, x0, x1],
1002                             outputs,
1003                             updates=updates,
1004                             mode=mode,
1005                             allow_input_downcast=True)
1006
1007        scan_node = [x for x in f9.maker.fgraph.toposort()
1008                     if isinstance(x.op, theano.scan_module.scan_op.Scan)]
1009        assert 0 in scan_node[0].op.destroy_map.keys()
1010        assert 1 in scan_node[0].op.destroy_map.keys()
1011        # compute output in numpy
1012        numpy_x0 = np.zeros((3,))
1013        numpy_x1 = np.zeros((3,))
1014        numpy_x0[0] = vu0[0] * vW_in + vx0 * vW + vu1[0] * vu1[1]
1015        numpy_x1[0] = vu0[0] * vW_in + vx1 * vW + vu2[0] + vu2[1] + vu2[2]
1016        for i in xrange(1, 3):
1017            numpy_x0[i] = (vu0[i] * vW_in + numpy_x0[i - 1] * vW +
1018                           vu1[i] * vu1[i + 1])
1019            numpy_x1[i] = (vu0[i] * vW_in + numpy_x1[i - 1] * vW +
1020                           vu2[i] + vu2[i + 1] + vu2[i + 2])
1021
1022        # note theano computes inplace, so call function after numpy
1023        # equivalent is done
1024        (theano_x0, theano_x1) = f9(vu0, vu1, vu2, vx0, vx1)
1025        # assert that theano does what it should
1026        utt.assert_allclose(theano_x0, numpy_x0)
1027        utt.assert_allclose(theano_x1, numpy_x1)
1028
1029    @utt.assertFailure_fast
1030    def test_inplace3(self):
1031        rng = np.random.RandomState(utt.fetch_seed())
1032
1033        vx0 = asarrayX(rng.uniform())
1034        vx1 = asarrayX(rng.uniform())
1035        x0 = theano.shared(vx0)
1036        x1 = theano.shared(vx1)
1037        outputs, updates = theano.scan(lambda x, y: (x + asarrayX(1),
1038                                                     y + asarrayX(1)),
1039                                       [],
1040                                       [x0, x1],
1041                                       n_steps=3)
1042        x0 = asarrayX(np.zeros((3,)))
1043        x0[0] = vx0
1044        x0 = theano.tensor.constant(x0)
1045        to_replace = outputs[0].owner.inputs[0].owner.inputs[1]
1046        outputs = theano.clone(outputs,
1047                               replace=[(to_replace, x0)])
1048        mode = theano.compile.mode.get_mode(None).including('inplace')
1049        f9 = theano.function([],
1050                             outputs,
1051                             updates=updates,
1052                             mode=mode)
1053        scan_node = [x for x in f9.maker.fgraph.toposort()
1054                     if isinstance(x.op, theano.scan_module.scan_op.Scan)]
1055        assert 0 not in scan_node[0].op.destroy_map.keys()
1056        assert 1 in scan_node[0].op.destroy_map.keys()
1057
1058    # Shared variable with updates
1059    def test_shared_arguments_with_updates(self):
1060        rng = np.random.RandomState(utt.fetch_seed())
1061
1062        vW1 = asarrayX(rng.rand(2, 3))
1063        vW2 = asarrayX(rng.rand(3, 2))
1064        vu1 = asarrayX(rng.rand(3, 2))
1065        vu2 = asarrayX(rng.rand(3, 3))
1066        vy0 = asarrayX(rng.rand(3, 2))
1067        vy1 = asarrayX(rng.rand(2))
1068        vy2 = asarrayX(rng.rand(3))
1069
1070        # Their is a bug when floatX=float32 when we remove this line.
1071        # The trace back is:
1072# Traceback (most recent call last):
1073#  File "/u/bastienf/repos/Theano/theano/tests/test_scan.py", line 434, in test_shared_arguments_with_updates
1074#    theano_y0,theano_y1,theano_y2 = f10(vu2, vy0)
1075#  File "/u/bastienf/repos/theano/compile/function_module.py", line 480, in __call__
1076#    self.fn()
1077#  File "/u/bastienf/repos/theano/compile/profilemode.py", line 59, in profile_f
1078#    raise_with_op(node)
1079#  File "/u/bastienf/repos/theano/compile/profilemode.py", line 52, in profile_f
1080#    th()
1081#  File "/u/bastienf/repos/theano/gof/cc.py", line 1141, in <lambda>
1082#    thunk = lambda p = p, i = node_input_storage, o = node_output_storage, n = node: p(n, [x[0] for x in i], o)
1083#  File "/u/bastienf/repos/theano/scan.py", line 922, in perform
1084#    inplace_map)
1085#  File "/u/bastienf/repos/theano/scan.py", line 1054, in scan
1086#    something = fn(*fn_args)
1087#  File "/u/bastienf/repos/theano/compile/function_module.py", line 458, in __call__
1088#    s.storage[0] = s.type.filter(arg, strict=s.strict)
1089#  File "/u/bastienf/repos/theano/tensor/basic.py", line 415, in filter
1090#    data = theano._asarray(data, dtype = self.dtype) #TODO - consider to pad shape with ones
1091#  File "/u/bastienf/repos/theano/misc/safe_asarray.py", line 30, in _asarray
1092#    rval = numpy.asarray(a, dtype=dtype, order=order)
1093#  File "/u/lisa/local/byhost/ceylon.iro.umontreal.ca//lib64/python2.5/site-packages/numpy/core/numeric.py", line 230, in asarray
1094#    return array(a, dtype, copy=False, order=order)
1095# TypeError: ('__array__() takes no arguments (1 given)', <theano.scan.Scan object at 0x3dbbf90>(?_steps, u1, u2, y0, y1, 0.0, W1, W2), 'Sequence id of Apply node=0')
1096#
1097#  This don't seam to be a theano related bug...
1098        vu1 = asarrayX(rng.rand(3, 2))
1099
1100        W1 = theano.shared(vW1, 'W1')
1101        W2 = theano.shared(vW2, 'W2')
1102        u1 = theano.shared(vu1, 'u1')
1103        y1 = theano.shared(vy1, 'y1')
1104
1105        def f(u1_t, u2_t, y0_tm3, y0_tm2, y0_tm1, y1_tm1):
1106            y0_t = (theano.dot(theano.dot(u1_t, W1), W2) + 0.1 * y0_tm1 +
1107                    0.33 * y0_tm2 + 0.17 * y0_tm3)
1108            y1_t = theano.dot(u2_t, W2) + y1_tm1
1109            y2_t = theano.dot(u1_t, W1)
1110            nwW1 = W1 + .1
1111            nwW2 = W2 + .05
1112            # return outputs followed by a list of updates
1113            return ([y0_t, y1_t, y2_t], [(W1, nwW1), (W2, nwW2)])
1114
1115        u2 = theano.tensor.matrix('u2')
1116        y0 = theano.tensor.matrix('y0')
1117        outputs, updates = theano.scan(f,
1118                                       [u1, u2],
1119                                       [dict(initial=y0, taps=[-3, -2, -1]),
1120                                        y1,
1121                                        None],
1122                                       [],
1123                                       n_steps=None,
1124                                       go_backwards=False,
1125                                       truncate_gradient=-1)
1126
1127        f10 = theano.function([u2, y0],
1128                              outputs,
1129                              updates=updates,
1130                              allow_input_downcast=True)
1131        allstuff = f10(vu2, vy0)
1132        theano_y0, theano_y1, theano_y2 = allstuff
1133
1134        # do things in numpy
1135        numpy_y0 = np.zeros((6, 2))
1136        numpy_y1 = np.zeros((4, 2))
1137        numpy_y2 = np.zeros((3, 3))
1138        numpy_y0[:3] = vy0
1139        numpy_y1[0] = vy1
1140        numpy_W1 = vW1.copy()
1141        numpy_W2 = vW2.copy()
1142        for idx in xrange(3):
1143            numpy_y0[idx + 3] = np.dot(np.dot(vu1[idx, :], numpy_W1),
1144                                          numpy_W2) + \
1145                                0.1 * numpy_y0[idx + 2] + \
1146                                0.33 * numpy_y0[idx + 1] + \
1147                                0.17 * numpy_y0[idx]
1148            numpy_y1[idx + 1] = (np.dot(vu2[idx, :], numpy_W2) +
1149                                 numpy_y1[idx])
1150            numpy_y2[idx] = np.dot(vu1[idx, :], numpy_W1)
1151            numpy_W1 = numpy_W1 + .1
1152            numpy_W2 = numpy_W2 + .05
1153
1154        utt.assert_allclose(theano_y0, numpy_y0[3:])
1155        utt.assert_allclose(theano_y1, numpy_y1[1:])
1156        utt.assert_allclose(theano_y2, numpy_y2)
1157        utt.assert_allclose(W1.get_value(), numpy_W1)
1158        utt.assert_allclose(W2.get_value(), numpy_W2)
1159
1160    def test_grad_dtype_change(self):
1161        x = tensor.fscalar('x')
1162        y = tensor.fscalar('y')
1163        c = tensor.iscalar('c')
1164
1165        def inner_fn(cond, x, y):
1166            new_cond = tensor.cast(tensor.switch(cond, x, y), 'int32')
1167            new_x = tensor.switch(cond, tensor.nnet.sigmoid(y * x), x)
1168            new_y = tensor.switch(cond, y, tensor.nnet.sigmoid(x))
1169            return new_cond, new_x, new_y
1170
1171        values, _ = theano.scan(
1172            inner_fn,
1173            outputs_info=[c, x, y],
1174            n_steps=10,
1175            truncate_gradient=-1,
1176            go_backwards=False)
1177        gX, gY = tensor.grad(values[1].sum(), [x, y])
1178        f = theano.function([c, x, y], [gX, gY],
1179                            allow_input_downcast=True)
1180        # Check for runtime errors
1181        f(np.int32(0), np.float32(1.), np.float32(.5))
1182
1183    def test_simple_shared_mrg_random(self):
1184        theano_rng = theano.sandbox.rng_mrg.MRG_RandomStreams(utt.fetch_seed())
1185
1186        values, updates = theano.scan(lambda: theano_rng.uniform((2,), -1, 1),
1187                                      [],
1188                                      [],
1189                                      [],
1190                                      n_steps=5,
1191                                      truncate_gradient=-1,
1192                                      go_backwards=False)
1193        my_f = theano.function([],
1194                               values,
1195                               updates=updates,
1196                               allow_input_downcast=True)
1197
1198        # Just check for run-time errors
1199        theano_v = my_f()
1200        theano_v = my_f()
1201
1202    def test_simple_shared_random(self):
1203        theano_rng = theano.tensor.shared_randomstreams.RandomStreams(
1204            utt.fetch_seed())
1205
1206        values, updates = theano.scan(lambda: theano_rng.uniform((2,), -1, 1),
1207                                      [],
1208                                      [],
1209                                      [],
1210                                      n_steps=5,
1211                                      truncate_gradient=-1,
1212                                      go_backwards=False)
1213        my_f = theano.function([],
1214                               values,
1215                               updates=updates,
1216                               allow_input_downcast=True)
1217
1218        rng_seed = np.random.RandomState(utt.fetch_seed()).randint(2 ** 30)
1219        rng = np.random.RandomState(int(rng_seed))  # int() is for 32bit
1220
1221        numpy_v = np.zeros((10, 2))
1222        for i in xrange(10):
1223            numpy_v[i] = rng.uniform(-1, 1, size=(2,))
1224
1225        theano_v = my_f()
1226        utt.assert_allclose(theano_v, numpy_v[:5, :])
1227        theano_v = my_f()
1228        utt.assert_allclose(theano_v, numpy_v[5:, :])
1229
1230    def test_gibbs_chain(self):
1231        rng = np.random.RandomState(utt.fetch_seed())
1232        v_W = np.array(rng.rand(20, 30) - .5, dtype='float32')
1233        v_vsample = np.array(rng.binomial(1, .5, size=(3, 20),),
1234                                dtype='float32')
1235        v_bvis = np.array(rng.rand(20) - .5, dtype='float32')
1236        v_bhid = np.array(rng.rand(30) - .5, dtype='float32')
1237        W = theano.shared(v_W, 'vW')
1238        bhid = theano.shared(v_bhid, 'vbhid')
1239        bvis = theano.shared(v_bvis, 'vbvis')
1240        vsample = theano.tensor.matrix(dtype='float32')
1241        trng = theano.tensor.shared_randomstreams.RandomStreams(
1242            utt.fetch_seed())
1243
1244        def f(vsample_tm1):
1245            hmean_t = theano.tensor.nnet.sigmoid(
1246                theano.dot(vsample_tm1, W) + bhid)
1247            hsample_t = theano.tensor.cast(
1248                trng.binomial(hmean_t.shape, 1, hmean_t),
1249                dtype='float32')
1250            vmean_t = theano.tensor.nnet.sigmoid(
1251                theano.dot(hsample_t, W.T) + bvis)
1252            return theano.tensor.cast(
1253                trng.binomial(vmean_t.shape, 1, vmean_t),
1254                dtype='float32')
1255
1256        theano_vsamples, updates = theano.scan(f,
1257                                               [],
1258                                               vsample,
1259                                               [],
1260                                               n_steps=10,
1261                                               truncate_gradient=-1,
1262                                               go_backwards=False)
1263
1264        my_f = theano.function([vsample], theano_vsamples[-1],
1265                               updates=updates,
1266                               allow_input_downcast=True)
1267
1268        _rng = np.random.RandomState(utt.fetch_seed())
1269        rng_seed = _rng.randint(2 ** 30)
1270        nrng1 = np.random.RandomState(int(rng_seed))  # int() is for 32bit
1271
1272        rng_seed = _rng.randint(2 ** 30)
1273        nrng2 = np.random.RandomState(int(rng_seed))  # int() is for 32bit
1274
1275        def numpy_implementation(vsample):
1276            for idx in range(10):
1277                hmean = 1. / (1. + np.exp(-(np.dot(vsample, v_W) +\
1278                        v_bhid)))
1279                hsample = np.array(nrng1.binomial(1,
1280                                                     hmean,
1281                                                     size=hmean.shape),
1282                                      dtype='float32')
1283                vmean = 1. / (1. + np.exp(-(np.dot(hsample, v_W.T) +\
1284                        v_bvis)))
1285                vsample = np.array(nrng2.binomial(1,
1286                                                     vmean,
1287                                                     size=vmean.shape),
1288                                      dtype='float32')
1289
1290            return vsample
1291
1292        t_result = my_f(v_vsample)
1293        n_result = numpy_implementation(v_vsample)
1294        utt.assert_allclose(t_result, n_result)
1295
1296    def test_only_shared_no_input_no_output(self):
1297        rng = np.random.RandomState(utt.fetch_seed())
1298        v_state = asarrayX(rng.uniform())
1299        state = theano.shared(v_state, 'vstate')
1300
1301        def f_2():
1302            return OrderedDict([(state, 2 * state)])
1303        n_steps = theano.tensor.iscalar('nstep')
1304        output, updates = theano.scan(f_2,
1305                                      [],
1306                                      [],
1307                                      [],
1308                                      n_steps=n_steps,
1309                                      truncate_gradient=-1,
1310                                      go_backwards=False)
1311        this_f = theano.function([n_steps],
1312                                 output,
1313                                 updates=updates,
1314                                allow_input_downcast=True)
1315        n_steps = 3
1316        this_f(n_steps)
1317        numpy_state = v_state * (2 ** (n_steps))
1318        utt.assert_allclose(state.get_value(), numpy_state)
1319
1320    def test_map_functionality(self):
1321        def f_rnn(u_t):
1322            return u_t + 3
1323
1324        u = theano.tensor.vector('u')
1325
1326        outputs, updates = theano.scan(f_rnn,
1327                                       u,
1328                                       [],
1329                                       [],
1330                                       n_steps=None,
1331                                       truncate_gradient=-1,
1332                                       go_backwards=False)
1333
1334        f2 = theano.function([u],
1335                             outputs,
1336                             updates=updates,
1337                             allow_input_downcast=True)
1338        rng = np.random.RandomState(utt.fetch_seed())
1339
1340        v_u = rng.uniform(size=(5,), low=-5., high=5.)
1341        numpy_result = v_u + 3
1342        theano_result = f2(v_u)
1343        utt.assert_allclose(theano_result, numpy_result)
1344
1345    def test_map(self):
1346        v = theano.tensor.vector('v')
1347        abs_expr, abs_updates = theano.map(
1348            lambda x: abs(x),
1349            v,
1350            [],
1351            truncate_gradient=-1,
1352            go_backwards=False)
1353
1354        f = theano.function([v],
1355                            abs_expr,
1356                            updates=abs_updates,
1357                            allow_input_downcast=True)
1358
1359        rng = np.random.RandomState(utt.fetch_seed())
1360        vals = rng.uniform(size=(10,), low=-5., high=5.)
1361        abs_vals = abs(vals)
1362        theano_vals = f(vals)
1363        utt.assert_allclose(abs_vals, theano_vals)
1364
1365    def test_backwards(self):
1366        def f_rnn(u_t, x_tm1, W_in, W):
1367            return u_t * W_in + x_tm1 * W
1368
1369        u = theano.tensor.vector('u')
1370        x0 = theano.tensor.scalar('x0')
1371        W_in = theano.tensor.scalar('win')
1372        W = theano.tensor.scalar('w')
1373
1374        output, updates = theano.scan(f_rnn,
1375                                      u,
1376                                      x0,
1377                                      [W_in, W],
1378                                      n_steps=None,
1379                                      truncate_gradient=-1,
1380                                      go_backwards=True)
1381
1382        f2 = theano.function([u, x0, W_in, W],
1383                             output,
1384                             updates=updates,
1385                             allow_input_downcast=True)
1386        # get random initial values
1387        rng = np.random.RandomState(utt.fetch_seed())
1388        v_u = rng.uniform(size=(4,), low=-5., high=5.)
1389        v_x0 = rng.uniform()
1390        W = rng.uniform()
1391        W_in = rng.uniform()
1392
1393        # compute the output in numpy
1394        v_out = np.zeros((4,))
1395        v_out[0] = v_u[3] * W_in + v_x0 * W
1396        for step in xrange(1, 4):
1397            v_out[step] = v_u[3 - step] * W_in + v_out[step - 1] * W
1398
1399        theano_values = f2(v_u, v_x0, W_in, W)
1400        utt.assert_allclose(theano_values, v_out)
1401
1402    def test_reduce(self):
1403        v = theano.tensor.vector('v')
1404        s = theano.tensor.scalar('s')
1405        result, updates = theano.reduce(lambda x, y: x + y, v, s)
1406
1407        f = theano.function([v, s],
1408                            result,
1409                            updates=updates,
1410                            allow_input_downcast=True)
1411        rng = np.random.RandomState(utt.fetch_seed())
1412        v_v = rng.uniform(size=(5,), low=-5., high=5.)
1413        assert abs(np.sum(v_v) - f(v_v, 0.)) < 1e-3
1414
1415    def test_grad_one_output(self):
1416        def f_rnn(u_t, x_tm1, W_in, W):
1417            return u_t * W_in + x_tm1 * W
1418
1419        u = theano.tensor.vector('u')
1420        x0 = theano.tensor.scalar('x0')
1421        W_in = theano.tensor.scalar('W_in')
1422        W = theano.tensor.scalar('W')
1423
1424        cost, updates = scan_project_sum(f_rnn,
1425                                         u,
1426                                         x0,
1427                                         [W_in, W],
1428                                         n_steps=None,
1429                                         truncate_gradient=-1,
1430                                         go_backwards=False)
1431        gu, gx0, gW_in, gW = theano.tensor.grad(cost,
1432                                                [u, x0, W_in, W])
1433        grad_fn = theano.function(
1434            [u, x0, W_in, W],
1435            [gu, gx0, gW_in, gW],
1436            updates=updates,
1437            no_default_updates=True,
1438            allow_input_downcast=True)
1439        cost_fn = theano.function(
1440            [u, x0, W_in, W],
1441            cost,
1442            updates=updates,
1443            no_default_updates=True,
1444            allow_input_downcast=True)
1445
1446        # get random initial values
1447        rng = np.random.RandomState(utt.fetch_seed())
1448        v_u = np.array(rng.uniform(size=(10,), low=-.5, high=.5),
1449                          dtype=theano.config.floatX)
1450        v_x0 = np.array(rng.uniform(), dtype=theano.config.floatX)
1451        W = np.array(rng.uniform(), dtype=theano.config.floatX)
1452        W_in = np.array(rng.uniform(), dtype=theano.config.floatX)
1453        analytic_grad = grad_fn(v_u, v_x0, W_in, W)
1454
1455        num_grad = multiple_outputs_numeric_grad(
1456            cost_fn, [v_u, v_x0, W_in, W])
1457        max_err, max_err_pos = num_grad.max_err(analytic_grad)
1458
1459        if max_err > 1e-2:
1460            raise Exception(theano.tensor.verify_grad.E_grad,
1461                            (max_err, 1e-2, max_err_pos,
1462                             analytic_grad[max_err_pos],
1463                             num_grad.gx[max_err_pos]))
1464
1465    def test_grad_multiple_outs(self):
1466        rng = np.random.RandomState(utt.fetch_seed())
1467        vW_in2 = asarrayX(rng.uniform(size=(2,), low=-.1, high=.1))
1468        vW = asarrayX(rng.uniform(size=(2, 2), low=-.1, high=.1))
1469        vWout = asarrayX(rng.uniform(size=(2,), low=-.1, high=.1))
1470        vW_in1 = asarrayX(rng.uniform(size=(2, 2), low=-.1, high=.1))
1471        v_u1 = asarrayX(rng.uniform(size=(7, 2), low=-.1, high=.1))
1472        v_u2 = asarrayX(rng.uniform(size=(7,), low=-.1, high=.1))
1473        v_x0 = asarrayX(rng.uniform(size=(2,), low=-.1, high=.1))
1474        v_y0 = asarrayX(rng.uniform())
1475
1476        W_in2 = theano.shared(vW_in2, name='win2')
1477        W = theano.shared(vW, name='w')
1478        W_out = theano.shared(vWout, name='wout')
1479        W_in1 = theano.tensor.matrix('win')
1480        u1 = theano.tensor.matrix('u1')
1481        u2 = theano.tensor.vector('u2')
1482        x0 = theano.tensor.vector('x0')
1483        y0 = theano.tensor.scalar('y0')
1484
1485        def f_rnn_cmpl(u1_t, u2_t, x_tm1, y_tm1, W_in1):
1486            return [theano.dot(u1_t, W_in1) + u2_t * W_in2 + \
1487                    theano.dot(x_tm1, W), theano.dot(x_tm1, W_out)]
1488
1489        cost, updates = scan_project_sum(f_rnn_cmpl,
1490                                         [u1, u2],
1491                                         [x0, y0],
1492                                         W_in1,
1493                                         n_steps=None,
1494                                         truncate_gradient=-1,
1495                                         go_backwards=False)
1496        vparams = [v_u1, v_u2, v_x0, v_y0, vW_in1]
1497        # y0 is actually not used in the computation of the cost
1498        params = [u1, u2, x0, y0, W_in1]
1499        gparams = theano.grad(cost, params,
1500                                     disconnected_inputs='ignore')
1501
1502        grad_fn = theano.function([u1, u2, x0, y0, W_in1],
1503                                  gparams,
1504                                  updates=updates,
1505                                  no_default_updates=True,
1506                                 allow_input_downcast=True)
1507        cost_fn = theano.function([u1, u2, x0, y0, W_in1],
1508                                  cost,
1509                                  updates=updates,
1510                                  no_default_updates=True,
1511                                 allow_input_downcast=True)
1512
1513        num_grad = multiple_outputs_numeric_grad(cost_fn,
1514                                                 [v_u1,
1515                                                  v_u2,
1516                                                  v_x0,
1517                                                  v_y0,
1518                                                  vW_in1])
1519        analytic_grad = grad_fn(v_u1, v_u2, v_x0, v_y0, vW_in1)
1520        max_err, max_err_pos = num_grad.max_err(analytic_grad)
1521
1522        if max_err > 1e-2:
1523            raise Exception(theano.tensor.verify_grad.E_grad,
1524                            (max_err, 1e-2, max_err_pos,
1525                             analytic_grad[max_err_pos],
1526                             num_grad.gx[max_err_pos]))
1527
1528    @attr('slow')
1529    def test_grad_multiple_outs_taps(self):
1530        l = 5
1531        rng = np.random.RandomState(utt.fetch_seed())
1532        vW_in2 = asarrayX(rng.uniform(size=(2,), low=-.2, high=.2))
1533        vW = asarrayX(rng.uniform(size=(2, 2), low=-.2, high=.2))
1534        vWout = asarrayX(rng.uniform(size=(2,), low=-.2, high=.2))
1535        vW_in1 = asarrayX(rng.uniform(size=(2, 2), low=-.2, high=.2))
1536        v_u1 = asarrayX(rng.uniform(size=(l, 2), low=-.2, high=.2))
1537        v_u2 = asarrayX(rng.uniform(size=(l + 2, 2), low=-.2, high=.2))
1538        v_x0 = asarrayX(rng.uniform(size=(2,), low=-.2, high=.2))
1539        v_y0 = asarrayX(rng.uniform(size=(3,)))
1540
1541        W_in2 = theano.shared(vW_in2, name='win2')
1542        W = theano.shared(vW, name='w')
1543        W_out = theano.shared(vWout, name='wout')
1544        W_in1 = theano.tensor.matrix('win')
1545        u1 = theano.tensor.matrix('u1')
1546        u2 = theano.tensor.matrix('u2')
1547        x0 = theano.tensor.vector('x0')
1548        y0 = theano.tensor.vector('y0')
1549
1550        W_in1.tag.test_value = vW_in1
1551        u1.tag.test_value = v_u1
1552        u2.tag.test_value = v_u2
1553        x0.tag.test_value = v_x0
1554        y0.tag.test_value = v_y0
1555
1556        def f_rnn_cmpl(u1_t,
1557                       u2_tm1,
1558                       u2_t,
1559                       u2_tp1,
1560                       x_tm1,
1561                       y_tm1,
1562                       y_tm3,
1563                       W_in1):
1564            return [theano.dot(u1_t, W_in1) +
1565                    (u2_t + u2_tm1 * u2_tp1) * W_in2 +
1566                    theano.dot(x_tm1, W),
1567                    (y_tm1 + y_tm3) * theano.dot(x_tm1, W_out),
1568                    theano.dot(u1_t, W_in1)]
1569
1570        # We change the compute_test_value[_opt] flag to run the
1571        # assert in Scan.grad() of the new scan input sequence related
1572        # to outer_mitsot_outs, outer_sitsot_outs and
1573        # outer_nitsot_outs. This allow to test an old Scan bug.
1574        old1 = theano.config.compute_test_value
1575        old2 = theano.config.compute_test_value_opt
1576        theano.config.compute_test_value = 'raise'
1577        theano.config.compute_test_value_opt = 'raise'
1578        try:
1579            cost, updates = scan_project_sum(
1580                f_rnn_cmpl,
1581                [u1, dict(input=u2, taps=[-1, 0, 1])],
1582                [x0, dict(initial=y0, taps=[-1, -3]), None],
1583                W_in1,
1584                n_steps=None,
1585                truncate_gradient=-1,
1586                go_backwards=False)
1587            vparams = [v_u1, v_u2, v_x0, v_y0, vW_in1]
1588            params = [u1, u2, x0, y0, W_in1]
1589            gparams = theano.tensor.grad(cost, params)
1590            print(".", file=sys.stderr)
1591            # Test the output including names
1592            output_str = theano.printing.debugprint(cost, file='str')
1593            lines = output_str.split('\n')
1594            expected_output = """Elemwise{add,no_inplace} [id A] ''
1595 |Elemwise{add,no_inplace} [id B] ''
1596 | |Elemwise{add,no_inplace} [id C] ''
1597 | | |TensorConstant{0} [id D]
1598 | | |Sum{acc_dtype=float64} [id E] ''
1599 | |   |Elemwise{mul,no_inplace} [id F] ''
1600 | |     |Subtensor{int64::} [id G] ''
1601 | |     | |for{cpu,scan_fn}.1 [id H] ''
1602 | |     | | |Elemwise{minimum,no_inplace} [id I] ''
1603 | |     | | | |Elemwise{minimum,no_inplace} [id J] ''
1604 | |     | | | | |Elemwise{minimum,no_inplace} [id K] ''
1605 | |     | | | | | |Subtensor{int64} [id L] ''
1606 | |     | | | | | | |Shape [id M] ''
1607 | |     | | | | | | | |Subtensor{int64::} [id N] 'u1[0:]'
1608 | |     | | | | | | |   |u1 [id O]
1609 | |     | | | | | | |   |Constant{0} [id P]
1610 | |     | | | | | | |Constant{0} [id Q]
1611 | |     | | | | | |Subtensor{int64} [id R] ''
1612 | |     | | | | |   |Shape [id S] ''
1613 | |     | | | | |   | |Subtensor{int64:int64:} [id T] 'u2[0:-2]'
1614 | |     | | | | |   |   |u2 [id U]
1615 | |     | | | | |   |   |Constant{0} [id V]
1616 | |     | | | | |   |   |Constant{-2} [id W]
1617 | |     | | | | |   |Constant{0} [id X]
1618 | |     | | | | |Subtensor{int64} [id Y] ''
1619 | |     | | | |   |Shape [id Z] ''
1620 | |     | | | |   | |Subtensor{int64:int64:} [id BA] 'u2[1:-1]'
1621 | |     | | | |   |   |u2 [id U]
1622 | |     | | | |   |   |Constant{1} [id BB]
1623 | |     | | | |   |   |Constant{-1} [id BC]
1624 | |     | | | |   |Constant{0} [id BD]
1625 | |     | | | |Subtensor{int64} [id BE] ''
1626 | |     | | |   |Shape [id BF] ''
1627 | |     | | |   | |Subtensor{int64::} [id BG] 'u2[2:]'
1628 | |     | | |   |   |u2 [id U]
1629 | |     | | |   |   |Constant{2} [id BH]
1630 | |     | | |   |Constant{0} [id BI]
1631 | |     | | |Subtensor{:int64:} [id BJ] ''
1632 | |     | | | |Subtensor{int64::} [id N] 'u1[0:]'
1633 | |     | | | |ScalarFromTensor [id BK] ''
1634 | |     | | |   |Elemwise{minimum,no_inplace} [id I] ''
1635 | |     | | |Subtensor{:int64:} [id BL] ''
1636 | |     | | | |Subtensor{int64:int64:} [id T] 'u2[0:-2]'
1637 | |     | | | |ScalarFromTensor [id BM] ''
1638 | |     | | |   |Elemwise{minimum,no_inplace} [id I] ''
1639 | |     | | |Subtensor{:int64:} [id BN] ''
1640 | |     | | | |Subtensor{int64:int64:} [id BA] 'u2[1:-1]'
1641 | |     | | | |ScalarFromTensor [id BO] ''
1642 | |     | | |   |Elemwise{minimum,no_inplace} [id I] ''
1643 | |     | | |Subtensor{:int64:} [id BP] ''
1644 | |     | | | |Subtensor{int64::} [id BG] 'u2[2:]'
1645 | |     | | | |ScalarFromTensor [id BQ] ''
1646 | |     | | |   |Elemwise{minimum,no_inplace} [id I] ''
1647 | |     | | |IncSubtensor{Set;:int64:} [id BR] ''
1648 | |     | | | |AllocEmpty{dtype='%(float)s'} [id BS] ''
1649 | |     | | | | |Elemwise{add,no_inplace} [id BT] ''
1650 | |     | | | |   |Elemwise{minimum,no_inplace} [id I] ''
1651 | |     | | | |   |Subtensor{int64} [id BU] ''
1652 | |     | | | |     |Shape [id BV] ''
1653 | |     | | | |     | |Subtensor{:int64:} [id BW] ''
1654 | |     | | | |     |   |y0 [id BX]
1655 | |     | | | |     |   |Constant{3} [id BY]
1656 | |     | | | |     |Constant{0} [id BZ]
1657 | |     | | | |Subtensor{:int64:} [id BW] ''
1658 | |     | | | |ScalarFromTensor [id CA] ''
1659 | |     | | |   |Subtensor{int64} [id BU] ''
1660 | |     | | |IncSubtensor{Set;:int64:} [id CB] ''
1661 | |     | | | |AllocEmpty{dtype='%(float)s'} [id CC] ''
1662 | |     | | | | |Elemwise{add,no_inplace} [id CD] ''
1663 | |     | | | | | |Elemwise{minimum,no_inplace} [id I] ''
1664 | |     | | | | | |Subtensor{int64} [id CE] ''
1665 | |     | | | | |   |Shape [id CF] ''
1666 | |     | | | | |   | |Rebroadcast{0} [id CG] ''
1667 | |     | | | | |   |   |InplaceDimShuffle{x,0} [id CH] ''
1668 | |     | | | | |   |     |x0 [id CI]
1669 | |     | | | | |   |Constant{0} [id CJ]
1670 | |     | | | | |Subtensor{int64} [id CK] ''
1671 | |     | | | |   |Shape [id CL] ''
1672 | |     | | | |   | |Rebroadcast{0} [id CG] ''
1673 | |     | | | |   |Constant{1} [id CM]
1674 | |     | | | |Rebroadcast{0} [id CG] ''
1675 | |     | | | |ScalarFromTensor [id CN] ''
1676 | |     | | |   |Subtensor{int64} [id CE] ''
1677 | |     | | |Elemwise{minimum,no_inplace} [id I] ''
1678 | |     | | |win2 [id CO]
1679 | |     | | |w [id CP]
1680 | |     | | |wout [id CQ]
1681 | |     | | |win [id CR]
1682 | |     | |Constant{1} [id CS]
1683 | |     |RandomFunction{uniform}.1 [id CT] ''
1684 | |       |<RandomStateType> [id CU]
1685 | |       |Shape [id CV] ''
1686 | |       | |Subtensor{int64::} [id G] ''
1687 | |       |TensorConstant{0.1} [id CW]
1688 | |       |TensorConstant{0.9} [id CX]
1689 | |Sum{acc_dtype=float64} [id CY] ''
1690 |   |Elemwise{mul,no_inplace} [id CZ] ''
1691 |     |Subtensor{int64::} [id DA] ''
1692 |     | |for{cpu,scan_fn}.0 [id H] ''
1693 |     | |Constant{3} [id DB]
1694 |     |RandomFunction{uniform}.1 [id DC] ''
1695 |       |<RandomStateType> [id DD]
1696 |       |Shape [id DE] ''
1697 |       | |Subtensor{int64::} [id DA] ''
1698 |       |TensorConstant{0.1} [id CW]
1699 |       |TensorConstant{0.9} [id CX]
1700 |Sum{acc_dtype=float64} [id DF] ''
1701   |Elemwise{mul,no_inplace} [id DG] ''
1702     |for{cpu,scan_fn}.2 [id H] ''
1703     |RandomFunction{uniform}.1 [id DH] ''
1704       |<RandomStateType> [id DI]
1705       |Shape [id DJ] ''
1706       | |for{cpu,scan_fn}.2 [id H] ''
1707       |TensorConstant{0.1} [id CW]
1708       |TensorConstant{0.9} [id CX]
1709
1710Inner graphs of the scan ops:
1711
1712for{cpu,scan_fn}.1 [id H] ''
1713 >Elemwise{Composite{((i0 + i1) * i2)}} [id DK] ''
1714 > |y0[t-1] [id DL] -> [id BR]
1715 > |y0[t-3] [id DM] -> [id BR]
1716 > |InplaceDimShuffle{} [id DN] ''
1717 >   |CGemv{inplace} [id DO] ''
1718 >     |AllocEmpty{dtype='%(float)s'} [id DP] ''
1719 >     | |TensorConstant{1} [id DQ]
1720 >     |TensorConstant{1.0} [id DR]
1721 >     |InplaceDimShuffle{x,0} [id DS] ''
1722 >     | |wout_copy [id DT] -> [id CQ]
1723 >     |x0[t-1] [id DU] -> [id CB]
1724 >     |TensorConstant{0.0} [id DV]
1725 >Elemwise{Composite{(i0 + ((i1 + (i2 * i3)) * i4) + i5)}} [id DW] ''
1726 > |CGemv{no_inplace} [id DX] ''
1727 > | |AllocEmpty{dtype='%(float)s'} [id DY] ''
1728 > | | |Shape_i{1} [id DZ] ''
1729 > | |   |win_copy [id EA] -> [id CR]
1730 > | |TensorConstant{1.0} [id DR]
1731 > | |InplaceDimShuffle{1,0} [id EB] 'win_copy.T'
1732 > | | |win_copy [id EA] -> [id CR]
1733 > | |u1[t] [id EC] -> [id BJ]
1734 > | |TensorConstant{0.0} [id DV]
1735 > |u2[t] [id ED] -> [id BN]
1736 > |u2[t-1] [id EE] -> [id BL]
1737 > |u2[t+1] [id EF] -> [id BP]
1738 > |win2_copy [id EG] -> [id CO]
1739 > |CGemv{inplace} [id EH] ''
1740 >   |AllocEmpty{dtype='%(float)s'} [id EI] ''
1741 >   | |Shape_i{1} [id EJ] ''
1742 >   |   |w_copy [id EK] -> [id CP]
1743 >   |TensorConstant{1.0} [id DR]
1744 >   |InplaceDimShuffle{1,0} [id EL] 'w_copy.T'
1745 >   | |w_copy [id EK] -> [id CP]
1746 >   |x0[t-1] [id DU] -> [id CB]
1747 >   |TensorConstant{0.0} [id DV]
1748 >CGemv{no_inplace} [id DX] ''
1749
1750for{cpu,scan_fn}.0 [id H] ''
1751 >Elemwise{Composite{((i0 + i1) * i2)}} [id DK] ''
1752 >Elemwise{Composite{(i0 + ((i1 + (i2 * i3)) * i4) + i5)}} [id DW] ''
1753 >CGemv{no_inplace} [id DX] ''
1754
1755for{cpu,scan_fn}.2 [id H] ''
1756 >Elemwise{Composite{((i0 + i1) * i2)}} [id DK] ''
1757 >Elemwise{Composite{(i0 + ((i1 + (i2 * i3)) * i4) + i5)}} [id DW] ''
1758 >CGemv{no_inplace} [id DX] ''
1759
1760for{cpu,scan_fn}.2 [id H] ''
1761 >Elemwise{Composite{((i0 + i1) * i2)}} [id DK] ''
1762 >Elemwise{Composite{(i0 + ((i1 + (i2 * i3)) * i4) + i5)}} [id DW] ''
1763 >CGemv{no_inplace} [id DX] ''
1764""" % {"float":theano.config.floatX}
1765            if theano.config.mode != 'FAST_COMPILE' and theano.config.floatX == 'float64':
1766                for truth, out in zip(expected_output.split("\n"), lines):
1767                    assert truth.strip() == out.strip(), (truth, out)
1768
1769            cost_fn = theano.function([u1, u2, x0, y0, W_in1],
1770                                      cost,
1771                                      updates=updates,
1772                                      no_default_updates=True,
1773                                      allow_input_downcast=True)
1774            print(".", file=sys.stderr)
1775            grad_fn = theano.function([u1, u2, x0, y0, W_in1],
1776                                      gparams,
1777                                      updates=updates,
1778                                      no_default_updates=True,
1779                                      allow_input_downcast=True)
1780            print(".", file=sys.stderr)
1781        finally:
1782            theano.config.compute_test_value = old1
1783            theano.config.compute_test_value_opt = old2
1784
1785        num_grad = multiple_outputs_numeric_grad(cost_fn,
1786                                                 [v_u1,
1787                                                  v_u2,
1788                                                  v_x0,
1789                                                  v_y0,
1790                                                  vW_in1])
1791
1792        analytic_grad = grad_fn(v_u1, v_u2, v_x0, v_y0, vW_in1)
1793        max_err, max_err_pos = num_grad.max_err(analytic_grad)
1794        if max_err > 1e-2:
1795            raise Exception(theano.tensor.verify_grad.E_grad,
1796                            (max_err, 1e-2, max_err_pos,
1797                             analytic_grad[max_err_pos],
1798                             num_grad.gx[max_err_pos]))
1799
1800    @attr('slow')
1801    def test_grad_multiple_outs_taps_backwards(self):
1802        l = 5
1803        rng = np.random.RandomState(utt.fetch_seed())
1804        vW_in2 = asarrayX(rng.uniform(size=(2,), low=-.2, high=.2))
1805        vW = asarrayX(rng.uniform(size=(2, 2), low=-.2, high=.2))
1806        vWout = asarrayX(rng.uniform(size=(2,), low=-.2, high=.2))
1807        vW_in1 = asarrayX(rng.uniform(size=(2, 2), low=-.2, high=.2))
1808        v_u1 = asarrayX(rng.uniform(size=(l, 2), low=-.2, high=.2))
1809        v_u2 = asarrayX(rng.uniform(size=(l + 2, 2), low=-.2, high=.2))
1810        v_x0 = asarrayX(rng.uniform(size=(2,), low=-.2, high=.2))
1811        v_y0 = asarrayX(rng.uniform(size=(3,)))
1812
1813        W_in2 = theano.shared(vW_in2, name='win2')
1814        W = theano.shared(vW, name='w')
1815        W_out = theano.shared(vWout, name='wout')
1816        W_in1 = theano.tensor.matrix('win')
1817        u1 = theano.tensor.matrix('u1')
1818        u2 = theano.tensor.matrix('u2')
1819        x0 = theano.tensor.vector('x0')
1820        y0 = theano.tensor.vector('y0')
1821
1822        def f_rnn_cmpl(u1_t,
1823                       u2_tm1,
1824                       u2_t,
1825                       u2_tp1,
1826                       x_tm1,
1827                       y_tm1,
1828                       y_tm3,
1829                       W_in1):
1830            return [theano.dot(u1_t, W_in1) + \
1831                        (u2_t + u2_tm1 * u2_tp1) * W_in2 + \
1832                        theano.dot(x_tm1, W),
1833                    (y_tm1 + y_tm3) * theano.dot(x_tm1, W_out)]
1834        cost, updates = scan_project_sum(f_rnn_cmpl,
1835                                         [u1, dict(input=u2, taps=[-1, 0, 1])],
1836                                         [x0, dict(initial=y0, taps=[-1, -3])],
1837                                         W_in1,
1838                                         n_steps=None,
1839                                         truncate_gradient=-1,
1840                                         go_backwards=True)
1841        vparams = [v_u1, v_u2, v_x0, v_y0, vW_in1]
1842        params = [u1, u2, x0, y0, W_in1]
1843        gparams = theano.tensor.grad(cost, params)
1844        grad_fn = theano.function([u1, u2, x0, y0, W_in1],
1845                                  gparams,
1846                                  updates=updates,
1847                                  no_default_updates=True,
1848                                 allow_input_downcast=True)
1849        cost_fn = theano.function([u1, u2, x0, y0, W_in1],
1850                                  cost,
1851                                  updates=updates,
1852                                  no_default_updates=True,
1853                                  allow_input_downcast=True)
1854
1855        num_grad = multiple_outputs_numeric_grad(cost_fn, [v_u1,
1856                                                           v_u2,
1857                                                           v_x0,
1858                                                           v_y0,
1859                                                           vW_in1])
1860
1861        analytic_grad = grad_fn(v_u1, v_u2, v_x0, v_y0, vW_in1)
1862        max_err, max_err_pos = num_grad.max_err(analytic_grad)
1863        if max_err > 1e-2:
1864            raise Exception(theano.tensor.verify_grad.E_grad,
1865                            (max_err, 1e-2, max_err_pos,
1866                             analytic_grad[max_err_pos],
1867                             num_grad.gx[max_err_pos]))
1868
1869    def test_grad_multiple_outs_some_uncomputable(self):
1870        rng = np.random.RandomState(utt.fetch_seed())
1871        vW_in = asarrayX(rng.uniform(size=(2, 2), low=-3., high=3.))
1872        v_u = asarrayX(rng.uniform(size=(5, 2), low=-3., high=3.))
1873        v_u2 = np.array([1, 3, 4, 6, 8], dtype='int32')
1874        v_x0 = asarrayX(rng.uniform(size=(2,), low=-3., high=3.))
1875
1876        W_in = theano.tensor.matrix('win')
1877        u = theano.tensor.matrix('u1')
1878        u2 = theano.tensor.ivector('u2')
1879        x0 = theano.tensor.vector('x0', dtype=theano.config.floatX)
1880        # trng  = theano.tensor.shared_randomstreams.RandomStreams(
1881        #                                               utt.fetch_seed())
1882
1883        def f_rnn_cmpl(u_t, u2_t, x_tm1, W_in):
1884            trng1 = theano.tensor.shared_randomstreams.RandomStreams(123)
1885            x_t = theano.tensor.cast(u2_t, theano.config.floatX) +\
1886                    theano.dot(u_t, W_in) + x_tm1 + \
1887                    trng1.uniform(low=-1.1, high=1.1,
1888                                  dtype=theano.config.floatX)
1889            return x_t, 2 * u2_t
1890
1891        cost, updates = scan_project_sum(f_rnn_cmpl,
1892                                         [u, u2],
1893                                         [x0, None],
1894                                         W_in,
1895                                         n_steps=None,
1896                                         truncate_gradient=-1,
1897                                         go_backwards=False)
1898        vparams = [v_u, v_u2, v_x0, vW_in]
1899        params = [u, u2, x0, W_in]
1900        gparams = theano.tensor.grad(cost, params)
1901        grad_fn = theano.function([u, u2, x0, W_in],
1902                                  gparams,
1903                                  updates=updates,
1904                                  no_default_updates=True,
1905                                  allow_input_downcast=True)
1906        cost_fn = theano.function([u, u2, x0, W_in],
1907                                  cost,
1908                                  updates=updates,
1909                                  no_default_updates=True,
1910                                  allow_input_downcast=True)
1911
1912        def reset_rng_fn(fn, *args):
1913            for idx, arg in enumerate(fn.maker.expanded_inputs):
1914                if (arg.value and type(arg.value.data) == \
1915                    type(np.random.RandomState(123))):
1916                    obj = fn.maker.expanded_inputs[idx].value
1917                    obj.data = np.random.RandomState(123)
1918                    fn.maker.expanded_inputs[idx].value = obj
1919            return fn(*args)
1920
1921        reset_rng_cost_fn = lambda *args: reset_rng_fn(cost_fn, *args)
1922        reset_rng_grad_fn = lambda *args: reset_rng_fn(grad_fn, *args)
1923        num_grad = multiple_outputs_numeric_grad(
1924            reset_rng_cost_fn,
1925            [v_u, v_u2, v_x0, vW_in],
1926            ndarray_mask=[True, False, True, True])
1927        analytic_grad = reset_rng_grad_fn(v_u, v_u2, v_x0, vW_in)
1928        max_err, max_err_pos = num_grad.max_err(analytic_grad)
1929
1930        if max_err > 1e-2:
1931            raise Exception(theano.tensor.verify_grad.E_grad,
1932                            (max_err, 1e-2, max_err_pos,
1933                             analytic_grad[max_err_pos],
1934                             num_grad.gx[max_err_pos]))
1935
1936        # Also validate that the mappings outer_inp_from_outer_out and
1937        # outer_inp_from_inner_inp produce the correct results
1938        scan_node = list(updates.values())[0].owner
1939
1940        result = scan_node.op.var_mappings['outer_inp_from_outer_out']
1941        expected_result = {0: 3, 1: 5, 2: 4}
1942        assert(result == expected_result)
1943
1944        result = scan_node.op.var_mappings['outer_inp_from_inner_inp']
1945        expected_result = {0: 1, 1: 2, 2: 3, 3: 4, 4: 6}
1946        assert(result == expected_result)
1947
1948    def test_grad_multiple_outs_some_truncate(self):
1949        rng = np.random.RandomState(utt.fetch_seed())
1950        vW_in = asarrayX(rng.uniform(size=(2, 2), low=-.1, high=.1))
1951        v_u = asarrayX(rng.uniform(size=(5, 2), low=-.1, high=.1))
1952        v_x0 = asarrayX(rng.uniform(size=(2,), low=-.1, high=.1))
1953
1954        W_in = theano.tensor.matrix('win')
1955        u = theano.tensor.matrix('u1')
1956        x0 = theano.tensor.vector('x0')
1957        # trng  = theano.tensor.shared_randomstreams.RandomStreams(
1958        #                                               utt.fetch_seed())
1959
1960        def f_rnn_cmpl(u_t, x_tm1, W_in):
1961            trng1 = theano.tensor.shared_randomstreams.RandomStreams(123)
1962            rnd_nb = trng1.uniform(low=-.1, high=.1)
1963            x_t = theano.dot(u_t, W_in) + x_tm1 + rnd_nb
1964            x_t = theano.tensor.cast(x_t, dtype=theano.config.floatX)
1965            return x_t
1966
1967        cost, updates = scan_project_sum(f_rnn_cmpl,
1968                                         u,
1969                                         x0,
1970                                         W_in,
1971                                         n_steps=None,
1972                                         truncate_gradient=3,
1973                                         go_backwards=False)
1974        vparams = [v_u, v_x0, vW_in]
1975        params = [u, x0, W_in]
1976        gparams = theano.tensor.grad(cost, params)
1977
1978        grad_fn = theano.function([u, x0, W_in],
1979                                  gparams,
1980                                  updates=updates,
1981                                  no_default_updates=True,
1982                                 allow_input_downcast=True)
1983        cost_fn = theano.function([u, x0, W_in],
1984                                  cost,
1985                                  updates=updates,
1986                                  no_default_updates=True,
1987                                 allow_input_downcast=True)
1988
1989        def reset_rng_fn(fn, *args):
1990            for idx, arg in enumerate(fn.maker.expanded_inputs):
1991                if (arg.value and
1992                    isinstance(arg.value.data, np.random.RandomState)):
1993                    obj = fn.maker.expanded_inputs[idx].value
1994                    obj.data = np.random.RandomState(123)
1995                    fn.maker.expanded_inputs[idx].value = obj
1996            out = fn(*args)
1997            return out
1998
1999        reset_rng_cost_fn = lambda *args: reset_rng_fn(cost_fn, *args)
2000        reset_rng_grad_fn = lambda *args: reset_rng_fn(grad_fn, *args)
2001        num_grad = multiple_outputs_numeric_grad(
2002            reset_rng_cost_fn, [v_u, v_x0, vW_in])
2003        analytic_grad = reset_rng_grad_fn(v_u, v_x0, vW_in)
2004        utt.assert_allclose(analytic_grad[0][:2], np.zeros((2, 2)))
2005
2006    def test_grad_multiple_outs_some_disconnected(self):
2007        final_cost = self._grad_mout_helper(100, mode_nodebug)
2008        assert final_cost < 0.02, final_cost
2009
2010    def test_grad_multiple_outs_some_disconnected_2(self):
2011        # This is to try the network in DEBUG_MODE, but not fully
2012        # train it since that would take 3 hours
2013        self._grad_mout_helper(1, None)
2014
2015    def _grad_mout_helper(self, n_iters, mode):
2016        # Created on Tue Oct 07 13:28:51 2014
2017        # @author: vaneetke
2018        rng = np.random.RandomState(utt.fetch_seed())
2019        n_hid = 3
2020        n_in = 1
2021        n_out = 1
2022
2023        W_hh_v = asarrayX(rng.uniform(size=(n_hid, n_hid), low=-1, high=1))
2024        h0_v = asarrayX(rng.uniform(size=(2, n_hid), low=-1, high=1))
2025        b_h_v = asarrayX(rng.uniform(size=(n_hid), low=-.01, high=.01))
2026        W_ih_v = asarrayX(rng.uniform(size=(n_in, n_hid), low=-1, high=1))
2027        W_ho_v = asarrayX(rng.uniform(size=(n_hid, n_out), low=-1, high=1))
2028        b_o_v = asarrayX(rng.uniform(size=(n_out), low=-.01, high=.01))
2029
2030        # parameters of the rnn
2031        b_h = theano.shared(b_h_v, name='b_h')
2032        h0 = theano.shared(h0_v, name='h0')
2033        W_ih = theano.shared(W_ih_v, name='W_ih')
2034        W_hh = theano.shared(W_hh_v, name='W_hh')
2035        W_ho = theano.shared(W_ho_v, name='W_ho')
2036        b_o = theano.shared(b_o_v, name='b_o')
2037        params = [W_ih, W_hh, b_h, W_ho, b_o, h0]
2038
2039        # first dimension is time
2040        x = tensor.matrix()
2041
2042        # sequences: x_t
2043        # prior results: h_tm2, h_tm1
2044        # non-sequences: W_ih, W_hh, W_ho, b_h
2045        def one_step(x_t, h_tm2, h_tm1, W_ih, W_hh, b_h, W_ho, b_o):
2046            h_t = tensor.tanh(theano.dot(x_t, W_ih)
2047                              + theano.dot(h_tm2, W_hh) + b_h)
2048            y_t = theano.dot(h_t, W_ho) + b_o
2049            return [h_t, y_t]
2050
2051        # hidden and outputs of the entire sequence
2052        [h, y], _ = theano.scan(
2053            fn=one_step,
2054            sequences=dict(input=x),
2055            # corresponds to the return type of one_step
2056            outputs_info=[dict(initial=h0, taps=[-2, -1]), None],
2057            non_sequences=[W_ih, W_hh, b_h, W_ho, b_o],
2058            mode=mode)
2059
2060        # target values
2061        t = tensor.matrix()
2062
2063        # learning rate
2064        lr = asarrayX(0.1)
2065        learning_rate = theano.shared(lr)
2066
2067        cost = ((0.5 * ((y - t) ** 2.0).mean())
2068                + (0.5 * (y.std() - t.std()) ** 2.0))
2069
2070        gparams = theano.grad(cost, params)
2071        updates = [(param, param - gparam * learning_rate)
2072                   for param, gparam in zip(params, gparams)]
2073        learn_rnn_fn = theano.function(inputs=[x, t],
2074                                       outputs=cost,
2075                                       updates=updates,
2076                                       mode=mode)
2077        eval_rnn_fn = theano.function(inputs=[x],
2078                                      outputs=y,
2079                                      mode=mode)
2080
2081        # artificial data
2082        x_v = np.arange(0., 10.49, 0.21, dtype=theano.config.floatX)
2083        x_v = x_v.reshape(len(x_v), 1)
2084        s_v = np.sin(x_v)
2085        t_v = np.roll(s_v, -1)[:-1]
2086        s_v = s_v[:-1]
2087        for i in xrange(n_iters):
2088            cost = learn_rnn_fn(s_v, t_v)
2089        pred = eval_rnn_fn(s_v)
2090        return cost
2091
2092    def test_draw_as_input_to_scan(self):
2093        trng = theano.tensor.shared_randomstreams.RandomStreams(123)
2094
2095        x = theano.tensor.matrix('x')
2096        y = trng.binomial(size=x.shape, p=x)
2097        z, updates = theano.scan(lambda a: a, non_sequences=y, n_steps=2)
2098
2099        f = theano.function([x],
2100                            [y, z],
2101                            updates=updates,
2102                            allow_input_downcast=True)
2103
2104        rng = np.random.RandomState(utt.fetch_seed())
2105        nx = rng.uniform(size=(10, 10))
2106        ny1, nz1 = f(nx)
2107        ny2, nz2 = f(nx)
2108
2109        utt.assert_allclose([ny1, ny1], nz1)
2110        utt.assert_allclose([ny2, ny2], nz2)
2111        assert not np.allclose(ny1, ny2)
2112
2113    def test_grad_of_shared(self):
2114        x1 = theano.shared(3.)
2115        x1.name = 'x1'
2116        x2 = theano.tensor.vector('x2')
2117        y, updates = theano.scan(
2118            lambda v: theano.tensor.cast(v * x1, theano.config.floatX),
2119            sequences=x2)
2120        m = theano.tensor.grad(y.sum(), x1)
2121
2122        f = theano.function([x2], m, allow_input_downcast=True)
2123        utt.assert_allclose(f([2, 3]), 5)
2124
2125    def test_computing_gradient(self):
2126        x1 = theano.tensor.scalar('x1')
2127        x2 = theano.shared(np.array([1, 2, 3, 4, 5]), name='x2')
2128        K = x2 * x1
2129
2130        out, updates = theano.scan(lambda i, v: theano.tensor.grad(K[i], v),
2131                sequences=theano.tensor.arange(K.shape[0]),
2132                non_sequences=x1)
2133        f = theano.function([x1], out, allow_input_downcast=True)
2134
2135        assert np.all(f(3.) != 0.)
2136
2137    def test_shared_updates(self):
2138        X = theano.shared(np.array(1))
2139
2140        out, updates = theano.scan(
2141            lambda: OrderedDict([(X, (X + 1))]),
2142            outputs_info=[],
2143            non_sequences=[],
2144            sequences=[],
2145            n_steps=10)
2146
2147        f = theano.function([], [], updates=updates)
2148        f()
2149        assert X.get_value() == 11
2150
2151    def test_memory_aliasing_updates(self):
2152        x = theano.shared(np.array(1))
2153        y = theano.shared(np.array(1))
2154
2155        out, updates = theano.scan(
2156            lambda: OrderedDict([(x, x + 1), (y, x)]),
2157            outputs_info=[],
2158            non_sequences=[],
2159            sequences=[],
2160            n_steps=10)
2161
2162        f = theano.function([], [], updates=updates)
2163        f()
2164        assert not np.may_share_memory(x.container.storage[0],
2165                                          y.container.storage[0])
2166
2167        assert x.get_value() != y.get_value()
2168
2169    def test_scan_output_padding(self):
2170        # Scan outputs are usually lists, whose entries correspond to the
2171        # intermediate result. When n_steps=1, some extra machinery is
2172        # required in order to mimic this interface. Scan thus calls
2173        # tensor.shape_padleft on the inner function outputs.
2174        #
2175        # However, this is not the proper behavior for shared variables,
2176        # they should not be padded in any way
2177        #
2178        # This unit test addresses the bug fix of changeset ba7157e95cb1.
2179
2180        a = theano.tensor.vector()
2181        init_a = theano.tensor.vector()
2182        b = theano.shared(np.random.rand(5, 4))
2183
2184        def inner_func(a):
2185            return a + 1, OrderedDict([(b, 2 * b)])
2186
2187        out, updates = theano.scan(
2188            inner_func,
2189            outputs_info=[OrderedDict([('initial', init_a)])],
2190            n_steps=1)
2191        out = out[-1]
2192        assert out.type.ndim == a.type.ndim
2193        assert updates[b].type.ndim == b.type.ndim
2194
2195        out, updates = theano.scan(inner_func,
2196                                   outputs_info=[init_a],
2197                                   n_steps=1)
2198        assert out.type.ndim == a.type.ndim + 1
2199        assert updates[b].type.ndim == b.type.ndim
2200
2201    def test_scan_extra_inputs_hessian(self):
2202        x = theano.tensor.vector('x')
2203        A = theano.tensor.matrix('A')
2204        fc1 = theano.shared(0.5, name='fc1')
2205        fc2 = theano.shared(0.9, name='fc2')
2206        y = fc1 * theano.dot(x * x, theano.dot(A, x))
2207        y.name = 'y'
2208        gy = theano.tensor.grad(y, x)
2209        gy.name = 'gy'
2210        hy, updates = theano.scan(
2211            lambda i, gy, x: theano.tensor.grad(gy[i] * fc2, x),
2212            sequences=theano.tensor.arange(gy.shape[0]),
2213            non_sequences=[gy, x])
2214
2215        f = theano.function([x, A], hy, allow_input_downcast=True)
2216        vx = np.array([1., 1.], dtype=theano.config.floatX)
2217        vA = np.array([[1., 1.], [1., 0.]], dtype=theano.config.floatX)
2218        vR = np.array([[3.6, 1.8], [1.8, 0.9]], dtype=theano.config.floatX)
2219        out = f(vx, vA)
2220
2221        utt.assert_allclose(out, vR)
2222
2223    def test_cloning_no_replace_strict_copy_inputs(self):
2224        # This has nothing to do with scan, but it refers to the clone
2225        # function that scan uses internally and that pfunc uses now and
2226        # that users might want to use
2227        x = theano.tensor.vector('x')
2228        y = theano.tensor.vector('y')
2229        z = theano.shared(0.25)
2230
2231        f1 = z * (x + y) ** 2 + 5
2232        f2 = theano.clone(f1,
2233                          replace=None,
2234                          strict=True,
2235                          share_inputs=True)
2236        f2_inp = theano.gof.graph.inputs([f2])
2237
2238        assert z  in f2_inp
2239        assert x  in f2_inp
2240        assert y  in f2_inp
2241
2242    def test_cloning_no_replace_strict_not_copy_inputs(self):
2243        # This has nothing to do with scan, but it refers to the clone
2244        # function that scan uses internally and that pfunc uses now and
2245        # that users might want to use
2246        x = theano.tensor.vector('x')
2247        y = theano.tensor.vector('y')
2248        z = theano.shared(0.25)
2249
2250        f1 = z * (x + y) ** 2 + 5
2251        f2 = theano.clone(f1,
2252                          replace=None,
2253                          strict=True,
2254                          share_inputs=False)
2255        f2_inp = theano.gof.graph.inputs([f2])
2256
2257        assert not z in f2_inp
2258        assert not x in f2_inp
2259        assert not y in f2_inp
2260
2261    def test_cloning_replace_strict_copy_inputs(self):
2262        # This has nothing to do with scan, but it refers to the clone
2263        # function that scan uses internally and that pfunc uses now and
2264        # that users might want to use
2265        x = theano.tensor.vector('x')
2266        y = theano.tensor.vector('y')
2267        y2 = theano.tensor.vector('y2')
2268        z = theano.shared(0.25)
2269
2270        f1 = z * (x + y) ** 2 + 5
2271        f2 = theano.clone(f1,
2272                          replace=OrderedDict([(y, y2)]),
2273                          strict=True,
2274                          share_inputs=True)
2275        f2_inp = theano.gof.graph.inputs([f2])
2276        assert z in f2_inp
2277        assert x in f2_inp
2278        assert y2 in f2_inp
2279
2280    def test_cloning_replace_not_strict_copy_inputs(self):
2281        # This has nothing to do with scan, but it refers to the clone
2282        # function that scan uses internally and that pfunc uses now and
2283        # that users might want to use
2284        x = theano.tensor.vector('x')
2285        y = theano.tensor.fvector('y')
2286        y2 = theano.tensor.dvector('y2')
2287        z = theano.shared(0.25)
2288
2289        f1 = z * (x + y) ** 2 + 5
2290        f2 = theano.clone(f1,
2291                          replace=OrderedDict([(y, y2)]),
2292                          strict=False,
2293                          share_inputs=True)
2294        f2_inp = theano.gof.graph.inputs([f2])
2295        assert z in f2_inp
2296        assert x in f2_inp
2297        assert y2 in f2_inp
2298
2299    def test_cloning_replace_strict_not_copy_inputs(self):
2300        # This has nothing to do with scan, but it refers to the clone
2301        # function that scan uses internally and that pfunc uses now and
2302        # that users might want to use
2303        x = theano.tensor.vector('x')
2304        y = theano.tensor.vector('y')
2305        y2 = theano.tensor.vector('y2')
2306        z = theano.shared(0.25)
2307
2308        f1 = z * (x + y) ** 2 + 5
2309        f2 = theano.clone(f1,
2310                          replace=[(y, y2)],
2311                          strict=True,
2312                          share_inputs=False)
2313        f2_inp = theano.gof.graph.inputs([f2])
2314        assert not z in f2_inp
2315        assert not x in f2_inp
2316        assert not y2 in f2_inp
2317
2318    def test_cloning_replace_not_strict_not_copy_inputs(self):
2319        # This has nothing to do with scan, but it refers to the clone
2320        # function that scan uses internally and that pfunc uses now and
2321        # that users might want to use
2322        x = theano.tensor.vector('x')
2323        y = theano.tensor.fvector('y')
2324        y2 = theano.tensor.dvector('y2')
2325        z = theano.shared(0.25)
2326
2327        f1 = z * (x + y) ** 2 + 5
2328        f2 = theano.clone(f1,
2329                          replace=[(y, y2)],
2330                          strict=False,
2331                          share_inputs=False)
2332        f2_inp = theano.gof.graph.inputs([f2])
2333        assert not z  in f2_inp
2334        assert not x  in f2_inp
2335        assert not y2 in f2_inp
2336
2337    # TEST RE-ordering of inputs
2338    # some rnn with multiple outputs and multiple inputs; other
2339    # dimension instead of scalars/vectors
2340    def test_reordering(self):
2341        rng = np.random.RandomState(utt.fetch_seed())
2342        vW_in2 = asarrayX(rng.uniform(size=(2,), low=-5., high=5.))
2343        vW = asarrayX(rng.uniform(size=(2, 2), low=-5., high=5.))
2344        vWout = asarrayX(rng.uniform(size=(2,), low=-5., high=5.))
2345        vW_in1 = asarrayX(rng.uniform(size=(2, 2), low=-5., high=5.))
2346        v_u1 = asarrayX(rng.uniform(size=(3, 2), low=-5., high=5.))
2347        v_u2 = asarrayX(rng.uniform(size=(3,), low=-5., high=5.))
2348        v_x0 = asarrayX(rng.uniform(size=(2,), low=-5., high=5.))
2349        v_y0 = asarrayX(rng.uniform(size=(3,)))
2350
2351        W_in2 = theano.shared(vW_in2, name='win2')
2352        W = theano.shared(vW, name='w')
2353        W_out = theano.shared(vWout, name='wout')
2354        W_in1 = theano.tensor.matrix('win')
2355        u1 = theano.tensor.matrix('u1')
2356        u2 = theano.tensor.vector('u2')
2357        x0 = theano.tensor.vector('x0')
2358        y0 = theano.tensor.vector('y0')
2359
2360        def f_rnn_cmpl(u1_t, u2_t, x_tm1, y_tm1, y_tm3, W_in1):
2361            return [y_tm3 + 1,
2362                    y_tm3 + 2,
2363                    theano.dot(u1_t, W_in1) + u2_t * W_in2 + \
2364                        theano.dot(x_tm1, W),
2365                    y_tm1 + theano.dot(x_tm1, W_out)]
2366
2367        outputs, updates = theano.scan(f_rnn_cmpl,
2368                                       [u1, u2],
2369                                       [None,
2370                                        None,
2371                                        x0,
2372                                        dict(initial=y0, taps=[-1, -3])],
2373                                       W_in1,
2374                                       n_steps=None,
2375                                       truncate_gradient=-1,
2376                                       go_backwards=False)
2377
2378        f4 = theano.function([u1, u2, x0, y0, W_in1],
2379                             outputs,
2380                             updates=updates,
2381                             allow_input_downcast=True)
2382
2383        # compute the values in numpy
2384        v_x = np.zeros((3, 2), dtype=theano.config.floatX)
2385        v_y = np.zeros((3,), dtype=theano.config.floatX)
2386        v_x[0] = np.dot(v_u1[0], vW_in1) + v_u2[0] * vW_in2 + \
2387                    np.dot(v_x0, vW)
2388        v_y[0] = np.dot(v_x0, vWout) + v_y0[2]
2389        for i in xrange(1, 3):
2390            v_x[i] = np.dot(v_u1[i], vW_in1) + v_u2[i] * vW_in2 + \
2391                        np.dot(v_x[i - 1], vW)
2392            v_y[i] = np.dot(v_x[i - 1], vWout) + v_y[i - 1]
2393
2394        (theano_dump1, theano_dump2, theano_x, theano_y) = f4(v_u1,
2395                                                              v_u2,
2396                                                              v_x0,
2397                                                              v_y0,
2398                                                              vW_in1)
2399
2400        utt.assert_allclose(theano_x, v_x)
2401        utt.assert_allclose(theano_y, v_y)
2402
2403    def test_scan_as_tensor_on_gradients(self):
2404        # Bug reported by cityhall on scan when computing the gradients
2405
2406        to_scan = theano.tensor.dvector('to_scan')
2407        seq = theano.tensor.dmatrix('seq')
2408        f1 = theano.tensor.dscalar('f1')
2409
2410        def scanStep(prev, seq, f1):
2411            return prev + f1 * seq
2412
2413        scanned, _ = theano.scan(fn=scanStep,
2414                                 sequences=[seq],
2415                                 outputs_info=[to_scan],
2416                                 non_sequences=[f1])
2417
2418        f_scan = theano.function(inputs=[to_scan, seq, f1],
2419                                 outputs=scanned,
2420                                 allow_input_downcast=True)
2421
2422        t_grad = theano.tensor.grad(scanned.sum(),
2423                                    wrt=[to_scan, f1],
2424                                    consider_constant=[seq])
2425        f_grad = theano.function(inputs=[to_scan, seq, f1],
2426                                 outputs=t_grad,
2427                                 allow_input_downcast=True)
2428
2429    def test_save_mem(self):
2430        rng = np.random.RandomState(utt.fetch_seed())
2431        vW_in2 = asarrayX(rng.uniform(size=(2,), low=-5., high=5.))
2432        vW = asarrayX(rng.uniform(size=(2, 2), low=-5., high=5.))
2433        vWout = asarrayX(rng.uniform(size=(2,), low=-5., high=5.))
2434        vW_in1 = asarrayX(rng.uniform(size=(2, 2), low=-5., high=5.))
2435        v_u1 = asarrayX(rng.uniform(size=(8, 2), low=-5., high=5.))
2436        v_u2 = asarrayX(rng.uniform(size=(8,), low=-5., high=5.))
2437        v_x0 = asarrayX(rng.uniform(size=(2,), low=-5., high=5.))
2438        v_y0 = asarrayX(rng.uniform(size=(3,)))
2439
2440        W_in2 = theano.shared(vW_in2, name='win2')
2441        W = theano.shared(vW, name='w')
2442        W_out = theano.shared(vWout, name='wout')
2443        W_in1 = theano.tensor.matrix('win')
2444        u1 = theano.tensor.matrix('u1')
2445        u2 = theano.tensor.vector('u2')
2446        x0 = theano.tensor.vector('x0')
2447        y0 = theano.tensor.vector('y0')
2448
2449        def f_rnn_cmpl(u1_t, u2_t, x_tm1, y_tm1, y_tm3, W_in1):
2450            return [y_tm3 + 1,
2451                    theano.dot(u1_t, W_in1) + u2_t * W_in2 + \
2452                        theano.dot(x_tm1, W),
2453                    y_tm1 + theano.dot(x_tm1, W_out)]
2454
2455        _outputs, updates = theano.scan(f_rnn_cmpl,
2456                                       [u1, u2],
2457                                       [None,
2458                                        dict(initial=x0),
2459                                        dict(initial=y0, taps=[-1, -3])],
2460                                       W_in1,
2461                                       n_steps=None,
2462                                       truncate_gradient=-1,
2463                                       go_backwards=False)
2464        outputs = [_outputs[0][-1], _outputs[1][-1], _outputs[2][-1]]
2465        f4 = theano.function([u1, u2, x0, y0, W_in1],
2466                             outputs,
2467                             updates=updates,
2468                             allow_input_downcast=True)
2469
2470        # compute the values in numpy
2471        v_x = np.zeros((8, 2), dtype=theano.config.floatX)
2472        v_y = np.zeros((8,), dtype=theano.config.floatX)
2473        v_x[0] = np.dot(v_u1[0], vW_in1) + v_u2[0] * vW_in2 + \
2474                        np.dot(v_x0, vW)
2475        v_y[0] = np.dot(v_x0, vWout) + v_y0[2]
2476
2477        for i in xrange(1, 8):
2478            v_x[i] = np.dot(v_u1[i], vW_in1) + v_u2[i] * vW_in2 + \
2479                        np.dot(v_x[i - 1], vW)
2480            v_y[i] = np.dot(v_x[i - 1], vWout) + v_y[i - 1]
2481
2482        (theano_dump, theano_x, theano_y) = f4(v_u1, v_u2, v_x0, v_y0, vW_in1)
2483
2484        utt.assert_allclose(theano_x, v_x[-1:])
2485        utt.assert_allclose(theano_y, v_y[-1:])
2486
2487    def caching_nsteps_by_scan_op(self):
2488        W = tensor.matrix('weights')
2489        initial = tensor.vector('initial')
2490        inpt = tensor.matrix('inpt')
2491
2492        def one_step(x_t, h_tm1, W):
2493            expr = tensor.dot(h_tm1, W) + x_t
2494            return expr
2495
2496        expr, _ = theano.scan(
2497          fn=one_step,
2498          sequences=[inpt],
2499          outputs_info=[initial],
2500          non_sequences=[W])
2501
2502        sh = expr.shape[0]
2503
2504        v1 = theano.shared(np.ones(5, dtype=theano.config.floatX))
2505        v2 = theano.shared(np.ones((5, 5), dtype=theano.config.floatX))
2506        shapef = theano.function([W],
2507                                 expr,
2508                                 givens=OrderedDict([(initial, v1),
2509                                         (inpt, v2)]))
2510        # First execution to cache n_steps
2511        shapef(np.ones((5, 5), dtype=theano.config.floatX))
2512
2513        cost = expr.sum()
2514        d_cost_wrt_W = tensor.grad(cost, [W])
2515        f = theano.function(
2516            [W, inpt], d_cost_wrt_W,
2517            givens=OrderedDict([(initial, theano.shared(np.zeros(5)))]))
2518
2519        rval = np.asarray([[5187989] * 5] * 5, dtype=theano.config.floatX)
2520        arg1 = np.ones((5, 5), dtype=theano.config.floatX)
2521        arg2 = np.ones((10, 5), dtype=theano.config.floatX)
2522        utt.assert_allclose(f(arg1, arg2), rval)
2523
2524    def test_save_mem_reduced_number_of_steps(self):
2525        def f_rnn(u_t):
2526            return (u_t + 1.,
2527                    u_t + 2.,
2528                    u_t + 3.,
2529                    u_t + 4.,
2530                    u_t + 5.,
2531                    u_t + 6.,
2532                    u_t + 7.)
2533
2534        u = theano.tensor.vector('u')
2535        idx = theano.tensor.iscalar('idx')
2536        jdx = theano.tensor.iscalar('jdx')
2537        [x1, x2, x3, x4, x5, x6, x7], updates = \
2538                theano.scan(f_rnn,
2539                            u,
2540                            n_steps=None,
2541                            truncate_gradient=-1,
2542                            go_backwards=False)
2543
2544        f2 = theano.function([u, idx, jdx],
2545                             [x1[:2],
2546                              x2[4],
2547                              x3[idx],
2548                              x4[:idx],
2549                              x5[-10],
2550                              x6[-jdx],
2551                              x7[:-jdx]],
2552                              updates=updates,
2553                              allow_input_downcast=True)
2554        # get random initial values
2555        rng = np.random.RandomState(utt.fetch_seed())
2556        v_u = rng.uniform(size=(20,), low=-5., high=5.)
2557
2558        # compute the output in numpy
2559        tx1, tx2, tx3, tx4, tx5, tx6, tx7 = f2(v_u, 3, 15)
2560
2561        utt.assert_allclose(tx1, v_u[:2] + 1.)
2562        utt.assert_allclose(tx2, v_u[4] + 2.)
2563        utt.assert_allclose(tx3, v_u[3] + 3.)
2564        utt.assert_allclose(tx4, v_u[:3] + 4.)
2565        utt.assert_allclose(tx5, v_u[-10] + 5.)
2566        utt.assert_allclose(tx6, v_u[-15] + 6.)
2567        utt.assert_allclose(tx7, v_u[:-15] + 7.)
2568        scan_node = f2.maker.fgraph.outputs[0].owner.inputs[0]
2569
2570        # Maybe ugly, way to check if the optimization had
2571        # been applied
2572
2573    def test_save_mem_store_steps(self):
2574
2575        def f_rnn(u_t, x1_tm1, x1_tm3, x2_tm1, x3tm2, x3_tm1, x4_tm1):
2576            return (u_t + 1.,
2577                    u_t + 2.,
2578                    u_t + 3.,
2579                    u_t + 4.,
2580                    u_t + 5.,
2581                    u_t + 6.,
2582                    u_t + 7.)
2583
2584        u = theano.tensor.vector('u')
2585        idx = theano.tensor.iscalar('idx')
2586        jdx = theano.tensor.iscalar('jdx')
2587        x10 = theano.tensor.vector('x10')
2588        x20 = theano.tensor.scalar('x20')
2589        x30 = theano.tensor.vector('x30')
2590        x40 = theano.tensor.scalar('x40')
2591        [x1, x2, x3, x4, x5, x6, x7], updates = \
2592                theano.scan(f_rnn,
2593                            u,
2594                            [None,
2595                             None,
2596                             None,
2597                             dict(initial=x10, taps=[-1, -2]),
2598                             x20,
2599                             dict(initial=x30, taps=[-1, -2]),
2600                             x40],
2601                            n_steps=None,
2602                            truncate_gradient=-1,
2603                            go_backwards=False)
2604
2605        f2 = theano.function([u, x10, x20, x30, x40],
2606                             [x1[-7], x2[-3: -1], x3[-6:], x4[-1], x5[-1]],
2607                             updates=updates,
2608                             allow_input_downcast=True)
2609
2610        # get random initial values
2611        rng = np.random.RandomState(utt.fetch_seed())
2612        v_u = rng.uniform(size=(20,), low=-5., high=5.)
2613
2614        # compute the output in numpy
2615        tx1, tx2, tx3, tx4, tx5 = f2(v_u, [0, 0], 0, [0, 0], 0)
2616
2617        utt.assert_allclose(tx1, v_u[-7] + 1.)
2618        utt.assert_allclose(tx2, v_u[-3:-1] + 2.)
2619        utt.assert_allclose(tx3, v_u[-6:] + 3.)
2620        utt.assert_allclose(tx4, v_u[-1] + 4.)
2621        utt.assert_allclose(tx5, v_u[-1] + 5.)
2622
2623    def test_use_scan_direct_output(self):
2624        # This test looks for a crash that happened when directly using the
2625        # recurrent output of a scan node instead of taking the result
2626        # returned by the scan() function
2627
2628        # Obtain a compilation mode that will cause the test to fail if an
2629        # exception occurs in the optimization process
2630        on_opt_error = theano.config.on_opt_error
2631        theano.config.on_opt_error = "raise"
2632        mode = theano.compile.get_default_mode()
2633        theano.config.on_opt_error = on_opt_error
2634
2635        x = tensor.scalar()
2636        seq = tensor.vector()
2637        outputs_info=[x, tensor.zeros_like(x)]
2638        (out1, out2), updates = theano.scan(lambda a, b, c : (a + b, b + c),
2639                                            sequences=seq,
2640                                            outputs_info=outputs_info,
2641                                            mode=mode)
2642
2643        # Obtain a reference to the scan outputs before the subtensor and
2644        # compile a function with them as outputs
2645        assert isinstance(out1.owner.op, tensor.subtensor.Subtensor)
2646        assert isinstance(out2.owner.op, tensor.subtensor.Subtensor)
2647        out1_direct = out1.owner.inputs[0]
2648        out2_direct = out2.owner.inputs[0]
2649        fct = theano.function([x, seq],
2650                              [out1_direct[:-1], out2_direct[:-1]],
2651                              mode=mode)
2652
2653        # Test the function to ensure valid outputs
2654        floatX = theano.config.floatX
2655
2656        init_value = 5.0
2657        seq_value = np.arange(4, dtype=floatX)
2658        output1, output2 = fct(init_value, seq_value)
2659
2660        expected_output1 = [init_value]
2661        expected_output2 = [0]
2662        for i in seq_value[:-1]:
2663            expected_output2.append(expected_output1[-1] +
2664                                    expected_output2[-1])
2665            expected_output1.append(expected_output1[-1] + i)
2666
2667        utt.assert_allclose(output1, expected_output1)
2668        utt.assert_allclose(output2, expected_output2)
2669
2670    def test_use_scan_direct_output2(self):
2671        # This test looks for a crash that happened when directly using the
2672        # recurrent output of a scan node associated with a state with a
2673        # state with broadcastable dimensions
2674
2675        x = tensor.dcol()
2676        seq = tensor.dcol()
2677        outputs_info=[x, tensor.zeros_like(x)]
2678        (out1, out2), updates = theano.scan(lambda a, b, c : (a + b, a + c),
2679                                            sequences=seq,
2680                                            outputs_info=outputs_info)
2681
2682        # Obtain a reference to the scan outputs before the subtensor and
2683        # compile a function with them as outputs
2684        assert isinstance(out1.owner.op, tensor.subtensor.Subtensor)
2685        assert isinstance(out2.owner.op, tensor.subtensor.Subtensor)
2686        out1_direct = out1.owner.inputs[0]
2687        out2_direct = out2.owner.inputs[0]
2688        fct = theano.function([x, seq],
2689                              [out1_direct, out2_direct])
2690
2691        # Test that the function returns valid outputs
2692        x_val = np.arange(0, 4)[:, None]
2693        seq_val = np.arange(4, 8)[:, None]
2694
2695        out1, out2 = fct(x_val, seq_val)
2696
2697        expected_out1 = np.zeros((5, 4, 1))
2698        expected_out2 = np.zeros((5, 4, 1))
2699        for i in range(4):
2700            expected_out2[i + 1] = expected_out2[i] + seq_val[i]
2701        for i in range(5):
2702            expected_out1[i] = expected_out2[i] + x_val
2703
2704        utt.assert_allclose(out1, expected_out1)
2705        utt.assert_allclose(out2, expected_out2)
2706
2707    def test_infer_shape(self):
2708        # Test for a crash in scan.infer_shape when using both
2709        # an until condition and random sampling in the inner function.
2710
2711        x = tensor.scalar()
2712        srng = theano.tensor.shared_randomstreams.RandomStreams(0)
2713
2714        def inner_fct(previous_val):
2715            new_val = previous_val + srng.uniform()
2716            condition = theano.scan_module.until(previous_val > 5)
2717            return new_val, condition
2718
2719        out, updates = theano.scan(inner_fct,
2720                                   outputs_info=x,
2721                                   n_steps=10)
2722
2723        g_out = tensor.grad(out.sum(), x)
2724        fct = theano.function([x], [out, g_out])
2725
2726        for i in xrange(-5, 5):
2727            output, g_output = fct(i)
2728            assert len(output) == g_output
2729
2730    def test_infer_shape2(self):
2731        # Ensure that the shape inference can remove the Scan node in the
2732        # case of a complicated inner graph involving sequences and recurrent
2733        # states
2734
2735        seq = tensor.lvector()
2736        sitsot_init = tensor.lscalar()
2737        mitsot_init = tensor.lvector()
2738
2739        def step(seq1, sitsot_m1, mitsot_m2, mitsot_m1):
2740            # Every iteration, the sitsot state decreases and the mitsot state
2741            # increases such that their total value remains identical. This
2742            # is because this value will be used as the shape of a nitsot
2743            # output and the outputs of every iteration need to have the same
2744            # shape
2745            diff = mitsot_m1 + seq1
2746            next_mitsot_val = mitsot_m2 + diff
2747            next_sitsot_val = sitsot_m1 - diff
2748            nitsot_out = tensor.alloc(np.asarray(0., 'float32'),
2749                                      next_mitsot_val +
2750                                      next_sitsot_val)
2751            return next_sitsot_val, next_mitsot_val, nitsot_out
2752
2753        out, updates = theano.scan(fn=step,
2754                                   sequences=seq,
2755                                   outputs_info=[sitsot_init,
2756                                                 {'initial': mitsot_init,
2757                                                  'taps': [-2, -1]},
2758                                                 None],
2759                                   n_steps=5)
2760
2761        f = theano.function([seq, sitsot_init, mitsot_init], out[2].shape,
2762                            mode='FAST_RUN')
2763        # When Scan.infer_shape will cover more case, there will no scan left.
2764        assert(len(scan_nodes_from_fct(f)) == 1)
2765
2766        # This generate a scan crash during execution.
2767        # output_shape = f(np.arange(5), 5, [1, 2])
2768        # assert(all(output_shape == (5, 6)))
2769
2770    # The following test will fail in DebugMode if there are
2771    # some problems in Scan.infer_shape
2772    def test_remove_stuff(self):
2773        x = theano.tensor.vector('x')
2774
2775        def lm(m):
2776            trng = theano.tensor.shared_randomstreams.RandomStreams(
2777                                                     utt.fetch_seed())
2778            return [2 * m + trng.uniform(low=-1.1, high=1.1,
2779                                      dtype=theano.config.floatX),
2780                    m + trng.uniform(size=[3])]
2781
2782        [o1, o2], updates = theano.scan(lm,
2783                                        sequences=x,
2784                                        n_steps=None,
2785                                        truncate_gradient=-1,
2786                                        name='forward',
2787                                        go_backwards=False)
2788        go1 = theano.tensor.grad(o1.mean(), wrt=x)
2789        f = theano.function([x], go1, updates=updates,
2790                            allow_input_downcast=True, mode=mode_with_opt)
2791        self.assertTrue(np.allclose(f([1, 2, 3]), 2. / 3))
2792
2793        topo = f.maker.fgraph.toposort()
2794        # this new assert is here to test if scan_merging works ..
2795        nb_scan = len([n for n in topo
2796            if isinstance(n.op, theano.scan_module.scan_op.Scan)])
2797        self.assertTrue(nb_scan == 1)
2798        nb_shape_i = len([n for n in topo
2799            if isinstance(n.op, theano.tensor.opt.Shape_i)])
2800        if theano.config.mode != 'FAST_COMPILE':
2801            self.assertTrue(nb_shape_i == 1)
2802
2803    def test_merge(self):
2804        x = theano.tensor.vector()
2805        y = theano.tensor.vector()
2806
2807        def sum(s):
2808            return s + 1
2809
2810        sx, upx = theano.scan(sum, sequences=[x])
2811        sy, upy = theano.scan(sum, sequences=[y])
2812
2813        f = theano.function([x, y], [sx, sy],
2814                            mode=mode_with_opt.excluding('scanOp_pushout_seqs_ops'))
2815        topo = f.maker.fgraph.toposort()
2816        scans = [n for n in topo if isinstance(
2817            n.op, theano.scan_module.scan_op.Scan)]
2818        self.assertTrue(len(scans) == 2)
2819
2820        sx, upx = theano.scan(sum, sequences=[x], n_steps=2)
2821        sy, upy = theano.scan(sum, sequences=[y], n_steps=3)
2822
2823        f = theano.function([x, y], [sx, sy],
2824                            mode=mode_with_opt.excluding('scanOp_pushout_seqs_ops'))
2825        topo = f.maker.fgraph.toposort()
2826        scans = [n for n in topo if isinstance(
2827            n.op, theano.scan_module.scan_op.Scan)]
2828        self.assertTrue(len(scans) == 2)
2829
2830        sx, upx = theano.scan(sum, sequences=[x], n_steps=4)
2831        sy, upy = theano.scan(sum, sequences=[y], n_steps=4)
2832
2833        f = theano.function([x, y], [sx, sy],
2834                            mode=mode_with_opt.excluding('scanOp_pushout_seqs_ops'))
2835        topo = f.maker.fgraph.toposort()
2836        scans = [n for n in topo if isinstance(
2837            n.op, theano.scan_module.scan_op.Scan)]
2838        self.assertTrue(len(scans) == 1)
2839
2840        sx, upx = theano.scan(sum, sequences=[x])
2841        sy, upy = theano.scan(sum, sequences=[x])
2842
2843        f = theano.function([x], [sx, sy],
2844                            mode=mode_with_opt.excluding('scanOp_pushout_seqs_ops'))
2845        topo = f.maker.fgraph.toposort()
2846        scans = [n for n in topo if isinstance(
2847            n.op, theano.scan_module.scan_op.Scan)]
2848        self.assertTrue(len(scans) == 1)
2849
2850        sx, upx = theano.scan(sum, sequences=[x])
2851        sy, upy = theano.scan(sum, sequences=[x], mode='FAST_COMPILE')
2852
2853        f = theano.function([x], [sx, sy],
2854                            mode=mode_with_opt.excluding('scanOp_pushout_seqs_ops'))
2855        topo = f.maker.fgraph.toposort()
2856        scans = [n for n in topo if isinstance(
2857            n.op, theano.scan_module.scan_op.Scan)]
2858        self.assertTrue(len(scans) == 1)
2859
2860        sx, upx = theano.scan(sum, sequences=[x])
2861        sy, upy = theano.scan(sum, sequences=[x], truncate_gradient=1)
2862
2863        f = theano.function([x], [sx, sy],
2864                            mode=mode_with_opt.excluding('scanOp_pushout_seqs_ops'))
2865        topo = f.maker.fgraph.toposort()
2866        scans = [n for n in topo if isinstance(
2867            n.op, theano.scan_module.scan_op.Scan)]
2868        self.assertTrue(len(scans) == 2)
2869
2870    def test_merge_3scans(self):
2871        # This test checks a case where we have 3 scans, two of them
2872        # cannot be merged together, but the third one can be merged with
2873        # either.
2874        x = theano.tensor.vector()
2875        y = theano.tensor.vector()
2876
2877        def sum(s):
2878            return s + 1
2879
2880        sx, upx = theano.scan(sum, sequences=[x], n_steps=4, name='X')
2881        # We need to use an expression of y rather than y so the toposort
2882        # comes up with the 'Y' scan last.
2883        sy, upy = theano.scan(sum, sequences=[2 * y + 2], n_steps=4, name='Y')
2884        sz, upz = theano.scan(sum, sequences=[sx], n_steps=4, name='Z')
2885
2886        f = theano.function(
2887            [x, y], [sy, sz],
2888            mode=mode_with_opt.excluding('scanOp_pushout_seqs_ops'))
2889        topo = f.maker.fgraph.toposort()
2890        scans = [n for n in topo if isinstance(
2891            n.op, theano.scan_module.scan_op.Scan)]
2892        self.assertTrue(len(scans) == 2)
2893
2894        rng = np.random.RandomState(utt.fetch_seed())
2895        x_val = rng.uniform(size=(4,)).astype(theano.config.floatX)
2896        y_val = rng.uniform(size=(4,)).astype(theano.config.floatX)
2897        # Run it so DebugMode can detect optimization problems.
2898        f(x_val, y_val)
2899
2900    def test_pushout_seqs(self):
2901
2902        def init_predictive_output(inputs,targets,hyp,x_star,s_star):
2903            E = hyp.shape[0]
2904
2905            def init_K(i,X,Y):
2906                XX = X.sum(1).reshape((X.shape[0], 1))
2907                K = (XX + XX.T)
2908                return K.sum()
2909
2910            beta, K_updts = theano.scan(init_K, sequences=tensor.arange(E),
2911                                        non_sequences=[inputs,targets])
2912
2913            # mean
2914            def predict_mean_i(i,x_star,s_star,X,beta,h):
2915                n,D = tensor.shape(X)
2916                # rescale every dimension by the corresponding inverse lengthscale
2917                iL = tensor.diag(h[i,:D])
2918                inp = (X - x_star).dot(iL)
2919
2920                # compute the mean
2921                B = iL.dot(s_star).dot(iL)
2922                t = inp.dot(B)
2923
2924                lb = (inp * t).sum() + beta.sum()
2925
2926                Mi = tensor.sum(lb) * h[i,D]
2927                return Mi
2928
2929            (M), M_updts = theano.scan( predict_mean_i ,
2930                                        sequences=tensor.arange(E),
2931                                        non_sequences=[x_star,s_star,inputs,beta,hyp] )
2932            return M
2933
2934        # some initializations
2935        hypx = np.log(np.tile([1,1,1,1,1,1,0.01], (3,1)))
2936
2937        # variables used in the following expressions
2938        hyp = theano.shared(hypx)
2939        inputs = tensor.dmatrix('X')
2940        targets = tensor.dmatrix('Y')
2941        x_star = tensor.dvector('x_star')
2942        s_star = tensor.dmatrix('s_star')
2943
2944        M = init_predictive_output(inputs,targets,hyp,x_star,s_star)
2945
2946        X = np.random.random((10,4))
2947        Y = np.random.random((10,3))
2948        test_m = np.random.random((4,))
2949        test_s = np.eye(4)
2950
2951        # Compute expected outputs (jacobian of M wrt x_star)
2952        dfdm = theano.function([inputs,targets,x_star,s_star],
2953                               [tensor.grad(M[0],x_star),
2954                                tensor.grad(M[1],x_star),
2955                                tensor.grad(M[2],x_star)])
2956        expected_output = dfdm(X,Y,test_m,test_s)
2957
2958        # equivalent code for the jacobian using scan
2959        dMdm, dMdm_updts = theano.scan(lambda i,M,x: tensor.grad(M[i],x),
2960                                       sequences=tensor.arange(M.shape[0]),
2961                                       non_sequences=[M,x_star])
2962        dfdm = theano.function([inputs,targets,x_star,s_star],
2963                               [dMdm[0], dMdm[1], dMdm[2]])
2964        scan_output = dfdm(X,Y,test_m,test_s)
2965
2966        # equivalent code for the jacobian using tensor.jacobian
2967        dMdm_j = tensor.jacobian(M,x_star)
2968        dfdm_j = theano.function([inputs,targets,x_star,s_star],
2969                                 [dMdm_j[0], dMdm_j[1], dMdm_j[2]])
2970        jacobian_outputs = dfdm_j(X,Y,test_m,test_s)
2971
2972        utt.assert_allclose(expected_output, scan_output)
2973        utt.assert_allclose(expected_output, jacobian_outputs)
2974
2975    @theano.change_flags(on_opt_error='raise')
2976    def test_pushout_seqs2(self):
2977        # This test for a bug with PushOutSeqScan that was reported on the
2978        # theano-user mailing list where the optimization raised an exception
2979        # when applied on this graph.
2980        x = tensor.matrix()
2981        outputs, updates = theano.scan(
2982            lambda x: [x*x, tensor.constant(0).copy().copy()],
2983            n_steps=2,
2984            sequences=[],
2985            non_sequences=[],
2986            outputs_info=[x, None])
2987
2988        # Compile a theano function where any optimization error will lead to
2989        # an exception being raised
2990        theano.function([x], outputs, updates=updates)
2991
2992    @theano.change_flags(on_opt_error='raise')
2993    def test_pushout_nonseq(self):
2994        # Test case originally reported by Daniel Renshaw. The crashed occurred
2995        # during the optimization PushOutNonSeqScan when it attempted to
2996        # a scan node with two outputs but only providing a replacement for
2997        # one of those outputs. This led the optimization to raise an
2998        # exception.
2999
3000        outputs, _ = theano.scan(lambda x: (x * x, x),
3001                                 non_sequences=[2], n_steps=2)
3002        f = theano.function(inputs=[], outputs=outputs)
3003
3004        outs = f()
3005        expected_outs = [[4, 4], [2, 2]]
3006        utt.assert_allclose(outs, expected_outs)
3007
3008    def test_sequence_dict(self):
3009        # Test that we can specify sequences as a dictionary with
3010        # only the 'input' key
3011        def incr(s):
3012            return s + 1
3013
3014        x = theano.tensor.vector()
3015        sx, upx = theano.scan(
3016            fn=incr,
3017            sequences=[{'input': x}])
3018        f = theano.function([x], sx)
3019
3020    def test_hash(self):
3021        x = theano.tensor.vector()
3022        y = theano.tensor.vector()
3023        scan1, updates = theano.scan(lambda _x: _x + 1, x)
3024        scan2, updates = theano.scan(lambda _x: _x + 1, y)
3025        assert scan1.owner.op == scan2.owner.op
3026        assert hash(scan1.owner.op) == hash(scan2.owner.op)
3027
3028    def test_same(self):
3029        # This test is checking a bug discovered by Arnaud and it is based
3030        # on his code
3031
3032        x = theano.tensor.fmatrix('x')
3033
3034        mem_val = np.zeros((2,), dtype='float32')
3035        memory = theano.shared(mem_val)
3036        W = theano.shared(np.random.random((5, 2)).astype('float32'))
3037
3038        def f(inp, mem):
3039            i = theano.tensor.join(0, inp, mem)
3040            d = theano.tensor.dot(i, W)
3041            return d, d
3042
3043        outs, updts = theano.scan(f, sequences=[x],
3044                                  non_sequences=[],
3045                                  outputs_info=[None, memory])
3046
3047        f = theano.function([x], outs[0])
3048        f2 = theano.function([x], outs[1])
3049
3050        x_val = np.random.random((4, 3)).astype('float32')
3051
3052        f_vals = f(x_val)
3053        memory.set_value(mem_val)
3054        f2_vals = f2(x_val)
3055        utt.assert_allclose(f_vals, f2_vals)
3056
3057    def test_reduce_memory_consumption(self):
3058
3059        x = theano.shared(np.asarray(
3060            np.random.uniform(size=(10,)), dtype=theano.config.floatX))
3061        o, _ = theano.reduce(lambda v, acc: acc + v,
3062                             x,
3063                             theano.tensor.constant(
3064                                 np.asarray(0.,
3065                                               dtype=theano.config.floatX)))
3066        mode = theano.compile.mode.FAST_RUN
3067        mode = mode.excluding('inplace')
3068        f1 = theano.function([], o, mode=mode)
3069        inputs, outputs = clone_optimized_graph(f1)
3070
3071        scan_nodes = grab_scan_node(outputs[0])
3072        assert scan_nodes is not None
3073        scan_node = scan_nodes[0]
3074        f1 = theano.function(inputs, scan_node.inputs[2])
3075
3076        # Originally, the shape would have been 1 due to the SaveMem
3077        # optimization reducing the size to the number of taps (in this case
3078        # 1) provided to the inner function. Now, because of the memory-reuse
3079        # feature in Scan it can be 2 because SaveMem needs to keep a
3080        # larger buffer to avoid aliasing between the inputs and the outputs.
3081        if theano.config.scan.allow_output_prealloc:
3082            assert f1().shape[0] == 2
3083        else:
3084            assert f1().shape[0] == 1
3085
3086        gx = theano.tensor.grad(o, x)
3087        f2 = theano.function([], gx)
3088        utt.assert_allclose(f2(), np.ones((10,)))
3089
3090    def test_foldl_memory_consumption(self):
3091        x = theano.shared(np.asarray(
3092            np.random.uniform(size=(10,)), dtype=theano.config.floatX))
3093        o, _ = theano.foldl(lambda v, acc: acc + v,
3094                            x,
3095                            theano.tensor.constant(
3096                                np.asarray(0.,
3097                                              dtype=theano.config.floatX)))
3098
3099        mode = theano.compile.mode.FAST_RUN
3100        mode = mode.excluding('inplace')
3101        f0 = theano.function([], o, mode=mode)
3102        inputs, outputs = clone_optimized_graph(f0)
3103
3104        scan_nodes = grab_scan_node(outputs[0])
3105        assert scan_nodes is not None
3106        scan_node = scan_nodes[0]
3107        f1 = theano.function(inputs, scan_node.inputs[2])
3108
3109        # Originally, the shape would have been 1 due to the SaveMem
3110        # optimization reducing the size to the number of taps (in this case
3111        # 1) provided to the inner function. Now, because of the memory-reuse
3112        # feature in Scan it can be 2 because SaveMem needs to keep a
3113        # larger buffer to avoid aliasing between the inputs and the outputs.
3114        if theano.config.scan.allow_output_prealloc:
3115            assert f1().shape[0] == 2
3116        else:
3117            assert f1().shape[0] == 1
3118
3119        gx = theano.tensor.grad(o, x)
3120        f2 = theano.function([], gx)
3121        utt.assert_allclose(f2(), np.ones((10,)))
3122
3123    def test_foldr_memory_consumption(self):
3124
3125        x = theano.shared(np.asarray(
3126            np.random.uniform(size=(10,)), dtype=theano.config.floatX))
3127        o, _ = theano.foldr(lambda v, acc: acc + v,
3128                            x,
3129                            theano.tensor.constant(
3130                                np.asarray(0.,
3131                                              dtype=theano.config.floatX)))
3132
3133        mode = theano.compile.mode.FAST_RUN
3134        mode = mode.excluding('inplace')
3135        f1 = theano.function([], o, mode=mode)
3136        inputs, outputs = clone_optimized_graph(f1)
3137
3138        scan_nodes = grab_scan_node(outputs[0])
3139        assert scan_nodes is not None
3140        scan_node = scan_nodes[0]
3141        f1 = theano.function(inputs, scan_node.inputs[2])
3142
3143        # Originally, the shape would have been 1 due to the SaveMem
3144        # optimization reducing the size to the number of taps (in this case
3145        # 1) provided to the inner function. Now, because of the memory-reuse
3146        # feature in Scan it can be 2 because SaveMem needs to keep a
3147        # larger buffer to avoid aliasing between the inputs and the outputs.
3148        if theano.config.scan.allow_output_prealloc:
3149            assert f1().shape[0] == 2
3150        else:
3151            assert f1().shape[0] == 1
3152
3153        gx = theano.tensor.grad(o, x)
3154        f2 = theano.function([], gx)
3155        utt.assert_allclose(f2(), np.ones((10,)))
3156
3157    @attr('slow')
3158    def test_rop2(self):
3159        seed = utt.fetch_seed()
3160        rng = np.random.RandomState(seed)
3161        floatX = theano.config.floatX
3162        v_u = np.array(rng.uniform(size=(3, 5)) - .5, dtype=floatX)
3163        v_W = np.array(rng.uniform(size=(5, 5)) - .5, dtype=floatX)
3164        v_h0 = np.array(rng.uniform(size=(5,)) - .5, dtype=floatX)
3165
3166        v_eu = np.array(rng.uniform(size=(3, 5)) - .5, dtype=floatX)
3167        v_eW = np.array(rng.uniform(size=(5, 5)) - .5, dtype=floatX)
3168        v_eh0 = np.array(rng.uniform(size=(5,)) - .5, dtype=floatX)
3169
3170        def rnn_fn(_u, _y, _W):
3171
3172            srng = theano.tensor.shared_randomstreams.RandomStreams(seed)
3173            tmp_val = _u + _y + srng.uniform(size=v_h0.shape) *\
3174                        np.asarray(1e-6, dtype=floatX)
3175            sl_o = theano.tensor.tanh(theano.tensor.dot(_W, tmp_val))
3176            return sl_o, tmp_val
3177
3178        u = theano.tensor.matrix('U')
3179        h0 = theano.tensor.vector('h0')
3180        W = theano.tensor.matrix('W')
3181
3182        _u = theano.tensor.specify_shape(u, v_u.shape)
3183        _u.name = '_U'
3184        _h0 = theano.tensor.specify_shape(h0, v_h0.shape)
3185        _h0.name = '_h0'
3186        _W = theano.tensor.specify_shape(W, v_W.shape)
3187        _W.name = '_W'
3188
3189        [o, _], _ = theano.scan(rnn_fn,
3190                           sequences=_u,
3191                           outputs_info=[_h0, None],
3192                           non_sequences=_W,
3193                           name='rnn_fn')
3194        o = o[-1]
3195        eu = theano.tensor.matrix('eu')
3196        eh0 = theano.tensor.vector('eh0')
3197        eW = theano.tensor.matrix('eW')
3198
3199        nwo_u = theano.tensor.Rop(o, _u, eu)
3200        nwo_h0 = theano.tensor.Rop(o, _h0, eh0)
3201        nwo_W = theano.tensor.Rop(o, _W, eW)
3202        fn_rop = theano.function([u, h0, W, eu, eh0, eW],
3203                                 [nwo_u, nwo_h0, nwo_W, o],
3204                                 on_unused_input='ignore')
3205        vnu, vnh0, vnW, vno = fn_rop(v_u, v_h0, v_W, v_eu, v_eh0, v_eW)
3206
3207        n2o_u, _ = theano.scan(lambda i, o, u, h0, W, eu: \
3208                                (theano.tensor.grad(o[i], u) * eu).sum(),
3209                              sequences=tensor.arange(o.shape[0]),
3210                              non_sequences=[o, u, h0, W, eu],
3211                              name='jacobU')
3212
3213        n2o_h0, _ = theano.scan(lambda i, o, u, h0, W, eh0: \
3214                                  (theano.tensor.grad(o[i], h0) * eh0).sum(),
3215                              sequences=tensor.arange(o.shape[0]),
3216                              non_sequences=[o, u, h0, W, eh0],
3217                              name='jacobh')
3218
3219        n2o_W, _ = theano.scan(lambda i, o, u, h0, W, eW: \
3220                                  (theano.tensor.grad(o[i], W) * eW).sum(),
3221                              sequences=tensor.arange(o.shape[0]),
3222                              non_sequences=[o, u, h0, W, eW],
3223                             name='jacobW')
3224
3225        fn_test = theano.function([u, h0, W, eu, eh0, eW],
3226                                  [n2o_u, n2o_h0, n2o_W, o],
3227                                  on_unused_input='ignore')
3228
3229        tnu, tnh0, tnW, tno = fn_test(v_u, v_h0, v_W, v_eu, v_eh0, v_eW)
3230        utt.assert_allclose(vnu, tnu, atol=1e-6)
3231        utt.assert_allclose(vnh0, tnh0, atol=1e-6)
3232        utt.assert_allclose(vnW, tnW, atol=2e-6)
3233
3234    def test_rop(self):
3235        seed = utt.fetch_seed()
3236        rng = np.random.RandomState(seed)
3237        floatX = theano.config.floatX
3238        v_u = np.array(rng.uniform(size=(20, 5)), dtype=floatX)
3239        v_W = np.array(rng.uniform(size=(5, 5)), dtype=floatX)
3240        v_h0 = np.array(rng.uniform(size=(5,)), dtype=floatX)
3241
3242        v_eu = np.array(rng.uniform(size=(20, 5)), dtype=floatX)
3243        v_eW = np.array(rng.uniform(size=(5, 5)), dtype=floatX)
3244        v_eh0 = np.array(rng.uniform(size=(5,)), dtype=floatX)
3245
3246        def rnn_fn(_u, _y, _W):
3247            sl_o = theano.tensor.tanh(theano.tensor.dot(_W, (_u + _y)))
3248            return sl_o
3249
3250        u = theano.tensor.matrix('U')
3251        h0 = theano.tensor.vector('h0')
3252        W = theano.tensor.matrix('W')
3253
3254        _u = theano.tensor.specify_shape(u, v_u.shape)
3255        _u.name = '_U'
3256        _h0 = theano.tensor.specify_shape(h0, v_h0.shape)
3257        _h0.name = '_h0'
3258        _W = theano.tensor.specify_shape(W, v_W.shape)
3259        _W.name = '_W'
3260
3261        o, _ = theano.scan(rnn_fn,
3262                           sequences=_u,
3263                           outputs_info=_h0,
3264                           non_sequences=_W,
3265                           name='rnn_fn')
3266        o = o[-1]
3267        eu = theano.tensor.matrix('eu')
3268        eh0 = theano.tensor.vector('eh0')
3269        eW = theano.tensor.matrix('eW')
3270
3271        nwo_u = theano.tensor.Rop(o, _u, eu)
3272        nwo_h0 = theano.tensor.Rop(o, _h0, eh0)
3273        nwo_W = theano.tensor.Rop(o, _W, eW)
3274        fn_rop = theano.function([u, h0, W, eu, eh0, eW],
3275                                 [nwo_u, nwo_h0, nwo_W],
3276                                 on_unused_input='ignore')
3277
3278        n2o_u, _ = theano.scan(lambda i, o, u, h0, W, eu: \
3279                                (theano.tensor.grad(o[i], u) * eu).sum(),
3280                              sequences=tensor.arange(o.shape[0]),
3281                              non_sequences=[o, u, h0, W, eu],
3282                              name='jacobU')
3283
3284        n2o_h0, _ = theano.scan(lambda i, o, u, h0, W, eh0: \
3285                                  (theano.tensor.grad(o[i], h0) * eh0).sum(),
3286                              sequences=tensor.arange(o.shape[0]),
3287                              non_sequences=[o, u, h0, W, eh0],
3288                              name='jacobh')
3289
3290        n2o_W, _ = theano.scan(lambda i, o, u, h0, W, eW: \
3291                                  (theano.tensor.grad(o[i], W) * eW).sum(),
3292                              sequences=tensor.arange(o.shape[0]),
3293                              non_sequences=[o, u, h0, W, eW],
3294                             name='jacobW')
3295
3296        fn_test = theano.function([u, h0, W, eu, eh0, eW],
3297                                  [n2o_u, n2o_h0, n2o_W],
3298                                  on_unused_input='ignore')
3299
3300        vnu, vnh0, vnW = fn_rop(v_u, v_h0, v_W, v_eu, v_eh0, v_eW)
3301        tnu, tnh0, tnW = fn_test(v_u, v_h0, v_W, v_eu, v_eh0, v_eW)
3302
3303        utt.assert_allclose(vnu, tnu, atol=1e-6)
3304        utt.assert_allclose(vnh0, tnh0, atol=1e-6)
3305        utt.assert_allclose(vnW, tnW, atol=1e-6)
3306
3307    def test_pushout_dot(self):
3308        W = tensor.matrix('W')
3309        h = tensor.matrix('h')
3310
3311        o, _ = theano.scan(lambda hi, him1, W: (hi, tensor.dot(hi+him1, W)),
3312                           outputs_info=[tensor.zeros([h.shape[1]]), None],
3313                           sequences=[h],
3314                           non_sequences=[W])
3315
3316        f = theano.function([W, h], o, mode=mode_with_opt)
3317
3318        scan_nodes = [x for x in f.maker.fgraph.toposort()
3319                     if isinstance(x.op,
3320                                   theano.scan_module.scan_op.Scan)]
3321        assert len(scan_nodes) == 1
3322        scan_op = scan_nodes[0].op
3323        assert not any(isinstance(n.op, tensor.Dot) for n in
3324                       scan_op.fn.maker.fgraph.apply_nodes)
3325
3326    def test_pushout_all(self):
3327        W1 = tensor.matrix('W1')
3328        W2 = tensor.matrix('W2')
3329        h0 = tensor.vector('h0')
3330
3331        def lambda_fn(h, W1, W2):
3332            return tensor.dot(h, W1 + W2)
3333
3334        o, _ = theano.scan(lambda_fn,
3335                           non_sequences=[h0, W1, W2],
3336                           n_steps=5)
3337
3338        f = theano.function([h0, W1, W2], o, mode=mode_with_opt)
3339
3340        scan_nodes = [x for x in f.maker.fgraph.toposort()
3341                     if isinstance(x.op,
3342                                   theano.scan_module.scan_op.Scan)]
3343        assert len(scan_nodes) == 0
3344
3345        seed = utt.fetch_seed()
3346        rng = np.random.RandomState(seed)
3347        floatX = theano.config.floatX
3348        v_h = np.array(rng.uniform(size=(2,)), dtype=floatX)
3349        v_W1 = np.array(rng.uniform(size=(2, 2)), dtype=floatX)
3350        v_W2 = np.array(rng.uniform(size=(2, 2)), dtype=floatX)
3351
3352        v_out = np.dot(v_h, v_W1 + v_W2)
3353        sol = np.zeros((5, 2))
3354        # This line is here to make sol have the same shape as the output of
3355        # theano. Note that what we ask theano to do is to repeat the 2
3356        # elements vector v_out 5 times
3357        sol[:, :] = v_out
3358        utt.assert_allclose(sol, f(v_h, v_W1, v_W2))
3359
3360    def test_pushout_while(self):
3361        # Ensure that the optimizations for Scan that push computation out of
3362        # the Scan don't alter the result for 'as_while' scans.
3363
3364        W1 = tensor.matrix('W1')
3365        W2 = tensor.matrix('W2')
3366        step_indices = tensor.vector('step_indices')
3367
3368        def lambda_fn(step_idx, W1, W2):
3369            until_condition = theano.scan_module.until(step_idx > 2)
3370            return tensor.dot(W1, W2), until_condition
3371
3372        # Compile a function with the optimization
3373        o, _ = theano.scan(lambda_fn,
3374                           sequences=[step_indices, W1],
3375                           non_sequences=[W2],
3376                           n_steps=5)
3377
3378        f = theano.function([W1, W2, step_indices], o, mode=mode_with_opt)
3379
3380        # Compule an theano function without the optimization
3381        o, _ = theano.scan(lambda_fn,
3382                           sequences=[step_indices, W1],
3383                           non_sequences=[W2],
3384                           n_steps=5, mode='FAST_COMPILE')
3385
3386        f_ref = theano.function([W1, W2, step_indices], o, mode='FAST_COMPILE')
3387
3388        # Compare the results of the two implementations
3389        input_values = [np.random.random((5, 5)).astype("float32"),
3390                        np.random.random((5, 5)).astype("float32"),
3391                        np.arange(5).astype("float32")]
3392
3393        out = f(*input_values)
3394        out_ref = f_ref(*input_values)
3395        utt.assert_allclose(out, out_ref)
3396
3397    def test_pushout(self):
3398        W1 = tensor.matrix('W1')
3399        W2 = tensor.matrix('W2')
3400        h0 = tensor.vector('h0')
3401
3402        def lambda_fn(h, W1, W2):
3403            return tensor.dot(h, W1 + W2)
3404
3405        o, _ = theano.scan(lambda_fn,
3406                           outputs_info=h0,
3407                           non_sequences=[W1, W2],
3408                           n_steps=5)
3409
3410        f = theano.function([h0, W1, W2], o, mode=mode_with_opt)
3411
3412        scan_node = [x for x in f.maker.fgraph.toposort()
3413                     if isinstance(x.op,
3414                                   theano.scan_module.scan_op.Scan)][0]
3415        assert len([x for x in scan_node.op.fn.maker.fgraph.toposort()
3416                    if isinstance(x.op, theano.tensor.Elemwise)]) == 0
3417
3418    def test_pushout_nomodif(self):
3419        inp = tensor.matrix('inp')
3420
3421        def fn(i, i_tm1):
3422            return i + 10, i_tm1
3423
3424        ([i_t, i_tm1], _) = theano.scan(
3425            fn, sequences=[inp],
3426            outputs_info=[np.asarray([0.0, 0.0], theano.config.floatX),
3427                          None])
3428        f = theano.function([inp], [i_t, i_tm1])
3429        val = np.arange(10).reshape(5, 2).astype(theano.config.floatX)
3430        ret = f(val)
3431        utt.assert_allclose(ret[0], val + 10)
3432        utt.assert_allclose(ret[1], [[0.,  0.],
3433                                     [10., 11.],
3434                                     [12., 13.],
3435                                     [14., 15.],
3436                                     [16., 17.]])
3437
3438    def test_alloc_inputs1(self):
3439        W1 = tensor.matrix('W1')
3440        W2 = tensor.matrix('W2')
3441        h0 = tensor.vector('h0')
3442
3443        def lambda_fn(h, W1, W2):
3444            return tensor.dot(h, W1 * W2)
3445        o, _ = theano.scan(lambda_fn,
3446                           outputs_info=h0,
3447                           non_sequences=[W1, tensor.zeros_like(W2)],
3448                           n_steps=5)
3449
3450        f = theano.function([h0, W1, W2], o, mode=mode_with_opt)
3451        scan_node = [x for x in f.maker.fgraph.toposort()
3452                     if isinstance(x.op,
3453                                   theano.scan_module.scan_op.Scan)][0]
3454        assert len([x for x in scan_node.op.fn.maker.fgraph.toposort()
3455                    if isinstance(x.op, theano.tensor.Elemwise)]) == 0
3456
3457    def test_alloc_inputs2(self):
3458        raise SkipTest("This tests depends on an optimization for "
3459                       "scan that has not been implemented yet.")
3460        W1 = tensor.matrix()
3461        W2 = tensor.matrix()
3462        h0 = tensor.vector()
3463
3464        def lambda_fn(W1, h, W2):
3465            return W1 * tensor.dot(h, W2)
3466
3467        o, _ = theano.scan(lambda_fn,
3468                           sequences=tensor.zeros_like(W1),
3469                           outputs_info=h0,
3470                           non_sequences=[tensor.zeros_like(W2)],
3471                           n_steps=5)
3472
3473        f = theano.function([h0, W1, W2], o, mode=mode_with_opt)
3474        scan_node = [x for x in f.maker.fgraph.toposort()
3475                     if isinstance(x.op,
3476                                   theano.scan_module.scan_op.Scan)][0]
3477
3478        assert len([x for x in scan_node.op.fn.maker.fgraph.toposort()
3479                    if isinstance(x.op, theano.tensor.Elemwise)]) == 0
3480
3481    def test_alloc_inputs3(self):
3482        _W1 = tensor.matrix()
3483        _W2 = tensor.matrix()
3484        _h0 = tensor.vector()
3485
3486        W1 = tensor.specify_shape(_W1, (3, 3))
3487        W2 = tensor.specify_shape(_W2, (3, 3))
3488        h0 = tensor.specify_shape(_h0, (3,))
3489
3490        def lambda_fn(W1, h, W2):
3491            return W1 * tensor.dot(h, W2)
3492
3493        o, _ = theano.scan(lambda_fn,
3494                           sequences=tensor.zeros_like(W1),
3495                           outputs_info=h0,
3496                           non_sequences=[tensor.zeros_like(W2)],
3497                           n_steps=5)
3498
3499        f = theano.function([_h0, _W1, _W2], o, mode=mode_with_opt)
3500        scan_node = [x for x in f.maker.fgraph.toposort()
3501                     if isinstance(x.op,
3502                                   theano.scan_module.scan_op.Scan)][0]
3503
3504        assert len(scan_node.op.inputs) == 1
3505
3506    def test_while0(self):
3507        x = tensor.vector('x')
3508
3509        def lambda_fn(x_t):
3510            return x_t + 1, theano.scan_module.until(x_t > 3)
3511        o, _ = theano.scan(lambda_fn, x)
3512        f = theano.function([x], o)
3513        vx = np.zeros((50,), dtype=theano.config.floatX)
3514        vx[23] = 4
3515        out = f(vx)
3516        assert len(out) == 24
3517
3518    def test_while1(self):
3519        x = tensor.vector('x')
3520
3521        def lambda_fn(x_t):
3522            return x_t + 1, theano.scan_module.until(x_t > 3)
3523        o, _ = theano.scan(lambda_fn, x)
3524        o2, _ = theano.scan(lambda x_t: x_t + 2, x)
3525
3526        f = theano.function([x], [o, o2], mode=mode_with_opt)
3527        vx = np.zeros((50,), dtype=theano.config.floatX)
3528        vx[23] = 4
3529        out, out2 = f(vx)
3530        assert len(out) == 24
3531        assert np.all(out2 == vx + 2)
3532        lssc = [x for x in f.maker.fgraph.toposort()
3533                if isinstance(x.op, theano.scan_module.scan_op.Scan)]
3534        # One scan node gets optimnized out
3535        assert len(lssc) == 1
3536
3537    @dec.skipif(True,
3538                        ("This test fails because not typed outputs_info "
3539                         "are always gived the smallest dtype. There is "
3540                         "no upcast of outputs_info in scan for now."))
3541    def test_outputs_info_not_typed(self):
3542        # This was ticket 766
3543
3544        coefficients = theano.tensor.vector("coefficients")
3545        x = tensor.scalar("x")
3546        max_coefficients_supported = 10000
3547
3548        # Generate the components of the polynomial
3549        full_range = theano.tensor.arange(max_coefficients_supported)
3550        components, updates = theano.scan(
3551            fn=lambda coeff, power, free_var: coeff * (free_var ** power),
3552            sequences=[coefficients, full_range],
3553            non_sequences=x)
3554        polynomial1 = components.sum()
3555        polynomial2, updates = theano.scan(
3556            fn=lambda coeff, power, prev, free_var: \
3557                            prev + coeff * (free_var ** power),
3558            outputs_info=theano.tensor.constant(0, dtype='floatX'),
3559            sequences=[coefficients, full_range],
3560            non_sequences=x)
3561
3562        # python int
3563        polynomial3, updates = theano.scan(
3564            fn=lambda coeff, power, prev, free_var: \
3565                            prev + coeff * (free_var ** power),
3566            outputs_info=0,
3567            sequences=[coefficients, full_range],
3568            non_sequences=x)
3569
3570        # python float
3571        polynomial4, updates = theano.scan(
3572            fn=lambda coeff, power, prev, free_var: \
3573                            prev + coeff * (free_var ** power),
3574            outputs_info=0.,
3575            sequences=[coefficients, full_range],
3576            non_sequences=x)
3577
3578        calculate_polynomial = theano.function(
3579            inputs=[coefficients, x],
3580            outputs=[polynomial1,
3581                     polynomial2[-1],
3582                     polynomial3[-1],
3583                     polynomial4[-1]])
3584
3585        test_coeff = np.asarray([1, 0, 2], dtype=theano.config.floatX)
3586        # This will be tested by DEBUG_MODE
3587        out = calculate_polynomial(test_coeff, 3)
3588        assert out[0] == 19
3589        assert out[1] == 19
3590        assert out[2] == 19
3591        assert out[4] == 19
3592        # 19.0
3593
3594    def test_crash_nonseq_grad(self):
3595        # Test case was originally reported by Bitton Tenessi. It crashed
3596        # during the grad operation and this tests validates that it now
3597        # raises a NullTypeGradError instead because the gradient relies on
3598        # the intermediary states of the random number generators used in the
3599        # test. The test case was modified from the original for simplicity
3600
3601        rand_stream = tensor.shared_randomstreams.RandomStreams()
3602        inp = tensor.matrix()
3603        norm_inp = inp / tensor.sum(inp, axis=0)
3604
3605        def unit_dropout(out_idx):
3606            def stochastic_pooling(in_idx):
3607                # sample the input matrix for each column according to the
3608                # column values
3609                pvals = norm_inp.T
3610                sample = rand_stream.multinomial(n=1, pvals=pvals)
3611                return inp + sample
3612
3613            pooled, updates_inner = theano.scan(fn=stochastic_pooling,
3614                                        sequences=tensor.arange(inp.shape[0]))
3615
3616            # randomly add stuff to units
3617            rand_nums = rand_stream.binomial(size=pooled.shape)
3618            return pooled + rand_nums, updates_inner
3619
3620        out, updates_outer = theano.scan(unit_dropout,
3621                                     sequences=[tensor.arange(inp.shape[0])])
3622
3623        assert_raises(theano.gradient.NullTypeGradError,
3624                      tensor.grad, out.sum(), inp)
3625
3626    def test_bugFunctioProvidesIntermediateNodesAsInputs(self):
3627        # This is a bug recently reported by Ilya
3628        # made it CPU friendly
3629        V = tensor.ftensor3('INPUT')
3630        orig = tensor.fmatrix('PARAM')
3631        # = gpu_from_host(orig)  # <-- this doesn't work
3632        W = orig + 2  # <-- has same effect but it works on CPU as well
3633        # W = T.fmatrix('PARAM') # <-- this line works
3634
3635        def one_step(v, W):
3636            o = v + 1 + W.sum()  # <-- this doesn't work
3637            # o = v + 1  # <-- this line works
3638            return o
3639
3640        OS, updates = theano.scan(
3641            fn=one_step,
3642            sequences=V,
3643            outputs_info=[None],
3644            non_sequences=[W])
3645
3646        O = OS.sum() + W.sum()
3647
3648        # This bug manifests itself by not allowing the function to compile,
3649        # so if it compiles it means the test pass
3650        f = theano.function([V, W], O)
3651
3652    def test_while2(self):
3653        x = tensor.vector('x')
3654
3655        def lambda_fn(x_t):
3656            return x_t + 1, theano.scan_module.until(x_t > 3)
3657        o, _ = theano.scan(lambda_fn, x)
3658        o2, _ = theano.scan(lambda x_t: (x_t + 2,
3659                                         theano.scan_module.until(x_t > 3)),
3660                            x)
3661
3662        f = theano.function([x], [o, o2], mode=mode_with_opt)
3663        vx = np.zeros((50,), dtype=theano.config.floatX)
3664        vx[23] = 4
3665        out, out2 = f(vx)
3666        assert len(out) == 24
3667        assert len(out2) == 24
3668        lssc = [x for x in f.maker.fgraph.toposort()
3669                if isinstance(x.op, theano.scan_module.scan_op.Scan)]
3670        assert len(lssc) == 1
3671
3672    def test_while_infershape(self):
3673        x = tensor.vector('x')
3674
3675        def lambda_fn(x_t):
3676            return x_t + 1, theano.scan_module.until(x_t > 3)
3677        o, _ = theano.scan(lambda_fn, x)
3678
3679        f = theano.function([x], o.shape[0], mode=mode_with_opt)
3680        vx = np.zeros((50,), dtype=theano.config.floatX)
3681        vx[23] = 4
3682        out = f(vx)
3683        assert out == 24
3684
3685    def test_infershape_seq_shorter_nsteps(self):
3686        raise SkipTest("This is a generic problem with "
3687                       "infershape that has to be discussed "
3688                       "and figured out")
3689        x = tensor.vector('x')
3690        [o1, o2], _ = theano.scan(lambda x, y: (x + 1, y + x),
3691                         sequences=x,
3692                         outputs_info=[None, x[0]],
3693                         n_steps=20)
3694
3695        f = theano.function([x],
3696                            [o1.shape[0], o2.shape[0]],
3697                            mode=mode_with_opt)
3698
3699        vx = np.ones((10,), dtype=theano.config.floatX)
3700        out1, out2 = f(vx)
3701        assert out1 == 10
3702        assert out2 == 10
3703        lssc = [x for x in f.maker.fgraph.toposort()
3704                if isinstance(x.op, theano.scan_module.scan_op.Scan)]
3705        assert len(lssc) == 0
3706
3707    def test_infershape_nsteps_smaller_seq_length(self):
3708        x = tensor.vector('x')
3709        [o1, o2], _ = theano.scan(lambda x, y: (x + 1, y + x),
3710                         sequences=x,
3711                         outputs_info=[None, x[0]],
3712                         n_steps=20)
3713
3714        f = theano.function([x],
3715                            [o1.shape[0], o2.shape[0]],
3716                            mode=mode_with_opt)
3717
3718        vx = np.ones((30,), dtype=theano.config.floatX)
3719        o1, o2 = f(vx)
3720        assert o1 == 20
3721        assert o2 == 20
3722        lssc = [x for x in f.maker.fgraph.toposort()
3723                if isinstance(x.op, theano.scan_module.scan_op.Scan)]
3724        assert len(lssc) == 0
3725
3726    def test_oinp_iinp_iout_oout_mappings(self):
3727        # Test the mapping produces by
3728        # ScanOp.get_oinp_iinp_iout_oout_mappings()
3729
3730        rng = theano.tensor.shared_randomstreams.RandomStreams(123)
3731
3732        def inner_fct(seq, mitsot, sitsot, nitsot, nseq):
3733            random_scalar = rng.uniform((1,))[0]
3734            total = seq + mitsot + sitsot + nitsot + nseq + random_scalar
3735            return total, total, total
3736
3737        # Assemble a scan with one sequence, one mitsot, one sitsot, one nitsot
3738        # a non-sequence and a random state to test the mappings.
3739        seq = [tensor.vector()]
3740        non_seq = [tensor.scalar()]
3741        outputs_info = [dict(initial=tensor.vector(), taps=[-3, -1]),
3742                        tensor.scalar(), None]
3743
3744        scan_outputs, _ = theano.scan(fn=inner_fct, sequences=seq,
3745                                      outputs_info=outputs_info,
3746                                      non_sequences=non_seq)
3747
3748        # Compare the mappings with the expected values
3749        scan_node = scan_outputs[0].owner.inputs[0].owner
3750        mappings = scan_node.op.var_mappings
3751
3752        assert mappings['inner_inp_from_outer_inp'] == {0 : [], 1 : [0],
3753                                                        2 : [1, 2], 3 : [3],
3754                                                        4 : [4], 5 : [],
3755                                                        6 : [5]}
3756        assert mappings['inner_out_from_outer_inp'] == {0 : [], 1 : [],
3757                                                        2 : [0], 3 : [1],
3758                                                        4 : [3], 5 : [2],
3759                                                        6 : []}
3760        assert mappings['outer_out_from_outer_inp'] == {0 : -1, 1 : -1,
3761                                                        2 : 0, 3 : 1,
3762                                                        4 : 3, 5 : 2,
3763                                                        6 : -1}
3764
3765        assert mappings['outer_inp_from_inner_inp'] == {0 : 1, 1 : 2,
3766                                                        2 : 2, 3 : 3,
3767                                                        4 : 4, 5 : 6}
3768        assert mappings['inner_out_from_inner_inp'] == {0 : [], 1 : [0],
3769                                                        2 : [0], 3 : [1],
3770                                                        4 : [3], 5 : []}
3771        assert mappings['outer_out_from_inner_inp'] == {0 : -1, 1 : 0,
3772                                                        2 : 0, 3 : 1,
3773                                                        4 : 3, 5 : -1}
3774
3775        assert mappings['outer_inp_from_inner_out'] == {0 : 2, 1 : 3,
3776                                                        2 : 5, 3 : 4}
3777        assert mappings['inner_inp_from_inner_out'] == {0 : [1, 2], 1 : [3],
3778                                                        2 : [], 3 : [4]}
3779        assert mappings['outer_out_from_inner_out'] == {0 : 0, 1 : 1,
3780                                                        2 : 2, 3 : 3}
3781
3782        assert mappings['outer_inp_from_outer_out'] == {0 : 2, 1 : 3,
3783                                                        2 : 5, 3 : 4}
3784        assert mappings['inner_inp_from_outer_out'] == {0 : [1, 2], 1 : [3],
3785                                                        2 : [], 3 : [4]}
3786        assert mappings['inner_out_from_outer_out'] == {0 : [0], 1 : [1],
3787                                                        2 : [2], 3 : [3]}
3788
3789    def test_grad_duplicate_outputs(self):
3790        # This test validates that taking the gradient of a scan, in which
3791        # multiple outputs are the same theano variable, works.
3792
3793        def inner_fct(inp1, inp2, inp3):
3794            total = inp1 + inp2 + inp3
3795            return total, total
3796
3797        # Assemble the scan
3798        seq = tensor.matrix()
3799        out_init = tensor.matrix()
3800        non_seq = tensor.vector()
3801
3802        outputs_info = ([None, dict(initial=out_init, taps=[-3])])
3803
3804        scan_outputs, _ = theano.scan(fn=inner_fct, sequences=seq,
3805                                      outputs_info=outputs_info,
3806                                      non_sequences=non_seq)
3807
3808        # Attempt to take various gradients
3809        g_output0 = theano.grad(scan_outputs[0].sum(), [seq, out_init, non_seq])
3810        g_output1 = theano.grad(scan_outputs[1].sum(), [seq, out_init, non_seq])
3811
3812        # Compile the function
3813        fct = theano.function([seq, out_init, non_seq],
3814                              g_output0 + g_output1)
3815
3816        # Run the function and validate the outputs
3817        dtype = theano.config.floatX
3818        seq_value = np.random.random((10, 3)).astype(dtype)
3819        out_init_value = np.random.random((3, 3)).astype(dtype)
3820        non_seq_value = np.random.random((3)).astype(dtype)
3821
3822        outputs =  fct(seq_value, out_init_value, non_seq_value)
3823
3824        expected_g_seq = np.array([[4, 4, 4],
3825                                      [3, 3, 3],
3826                                      [3, 3, 3],
3827                                      [3, 3, 3],
3828                                      [2, 2, 2],
3829                                      [2, 2, 2],
3830                                      [2, 2, 2],
3831                                      [1, 1, 1],
3832                                      [1, 1, 1],
3833                                      [1, 1, 1]])
3834        expected_g_out_init = expected_g_seq[:3]
3835        expected_g_non_seq = np.array([22, 22, 22])
3836
3837        utt.assert_allclose(outputs[0], expected_g_seq)
3838        utt.assert_allclose(outputs[1], expected_g_out_init)
3839        utt.assert_allclose(outputs[2], expected_g_non_seq)
3840        utt.assert_allclose(outputs[3], expected_g_seq)
3841        utt.assert_allclose(outputs[4], expected_g_out_init)
3842        utt.assert_allclose(outputs[5], expected_g_non_seq)
3843
3844    def test_grad_duplicate_outputs_connection_pattern(self):
3845        # This test checks for a crash in scan.connection_pattern when taking
3846        # the grad of a scan with certain combinations of outputs.
3847
3848        def inner_fct(inp1, inp2, inp3, inp4, inp5, inp6):
3849            total = inp1 + inp2 + inp3 + inp4 + inp5 + inp6
3850            return total, total, total, total, total, total
3851
3852        # Assemble the scan
3853        out_init = [tensor.vector(), tensor.vector(),
3854                    tensor.matrix(), tensor.matrix()]
3855
3856        outputs_info = ([None, None, out_init[0], out_init[1],
3857                        dict(initial=out_init[2], taps=[-2, -1]),
3858                        dict(initial=out_init[3], taps=[-2, -1])])
3859
3860        scan_outputs, _ = theano.scan(fn=inner_fct, outputs_info=outputs_info,
3861                                      n_steps=10)
3862
3863        g_output0 = theano.grad(scan_outputs[0].sum(), out_init[1])
3864
3865        # Validate the connnection pattern is as it should be
3866        node = scan_outputs[0].owner
3867        connection_pattern = node.op.connection_pattern(node)
3868        expected_connection_pattern = [[(j in [1, 2, 3, 4]) for i in range(6)]
3869                                       for j in range(7)]
3870
3871        assert connection_pattern == expected_connection_pattern
3872
3873    def test_grad_multiple_seqs_different_nsteps(self):
3874        # Example provided Michael Forbes
3875        # This test assures that we clip the sequences to n_steps before
3876        # computing the gradient (so that when we reverse them we actually
3877        # get the right values in
3878        c = theano.tensor.vector('c')
3879        x = theano.tensor.scalar('x')
3880        _max_coefficients_supported = 1000
3881        full_range = theano.tensor.arange(_max_coefficients_supported)
3882        components, updates = theano.scan(
3883            fn=lambda coeff, power, free_var: coeff * (free_var ** power),
3884            outputs_info=None,
3885            sequences=[c, full_range],
3886            non_sequences=x)
3887        P = components.sum()
3888        dP = theano.tensor.grad(P, x)
3889        tf = theano.function([c, x], dP)
3890        assert tf([1.0, 2.0, -3.0, 4.0], 2.0) == 38
3891
3892    def test_grad_of_grad_of_state(self):
3893        # Example provided Michael Forbes
3894        # This tests ensures that we can compute gradients through cost
3895        # defines in terms of gradients of scan
3896        c = theano.tensor.vector('c')
3897        x = theano.tensor.scalar('x')
3898        _max_coefficients_supported = 1000
3899        full_range = theano.tensor.arange(_max_coefficients_supported)
3900        components, updates = theano.scan(
3901            fn=lambda coeff, power, free_var: coeff * (free_var ** power),
3902            outputs_info=None,
3903            sequences=[c, full_range],
3904            non_sequences=x)
3905        P = components.sum()
3906        dP = theano.tensor.grad(P, x).sum()
3907        ddP = theano.tensor.grad(dP, x)
3908        tf = theano.function([c, x], ddP)
3909        assert tf([1.0, 2.0, -3.0, 4.0], 2.0) == 42
3910
3911    def test_return_steps(self):
3912        rng = np.random.RandomState(utt.fetch_seed())
3913        vW_in2 = asarrayX(rng.uniform(size=(2,), low=-5., high=5.))
3914        vW = asarrayX(rng.uniform(size=(2, 2), low=-5., high=5.))
3915        vWout = asarrayX(rng.uniform(size=(2,), low=-5., high=5.))
3916        vW_in1 = asarrayX(rng.uniform(size=(2, 2), low=-5., high=5.))
3917        v_u1 = asarrayX(rng.uniform(size=(8, 2), low=-5., high=5.))
3918        v_u2 = asarrayX(rng.uniform(size=(8,), low=-5., high=5.))
3919        v_x0 = asarrayX(rng.uniform(size=(2,), low=-5., high=5.))
3920        v_y0 = asarrayX(rng.uniform(size=(3,)))
3921
3922        W_in2 = theano.shared(vW_in2, name='win2')
3923        W = theano.shared(vW, name='w')
3924        W_out = theano.shared(vWout, name='wout')
3925        W_in1 = theano.tensor.matrix('win')
3926        u1 = theano.tensor.matrix('u1')
3927        u2 = theano.tensor.vector('u2')
3928        x0 = theano.tensor.vector('x0')
3929        y0 = theano.tensor.vector('y0')
3930
3931        def f_rnn_cmpl(u1_t, u2_t, x_tm1, y_tm1, y_tm3, W_in1):
3932            return [y_tm3 + 1,
3933                    theano.dot(u1_t, W_in1) + u2_t * W_in2 + \
3934                        theano.dot(x_tm1, W),
3935                    y_tm1 + theano.dot(x_tm1, W_out)]
3936
3937        rval, updates = theano.scan(f_rnn_cmpl,
3938                                    [u1, u2],
3939                                    [None,
3940                                     dict(initial=x0),
3941                                     dict(initial=y0, taps=[-1, -3])],
3942                                    W_in1,
3943                                    n_steps=None,
3944                                    truncate_gradient=-1,
3945                                    go_backwards=False)
3946
3947        outputs = []
3948        outputs += [rval[0][-3:]]
3949        outputs += [rval[1][-2:]]
3950        outputs += [rval[2][-4:]]
3951        f4 = theano.function([u1, u2, x0, y0, W_in1],
3952                             outputs,
3953                             updates=updates,
3954                             allow_input_downcast=True)
3955
3956        # compute the values in numpy
3957        v_x = np.zeros((8, 2), dtype=theano.config.floatX)
3958        v_y = np.zeros((8,), dtype=theano.config.floatX)
3959        v_x[0] = np.dot(v_u1[0], vW_in1) + v_u2[0] * vW_in2 + \
3960                    np.dot(v_x0, vW)
3961        v_y[0] = np.dot(v_x0, vWout) + v_y0[2]
3962
3963        for i in xrange(1, 8):
3964            v_x[i] = np.dot(v_u1[i], vW_in1) + v_u2[i] * vW_in2 + \
3965                        np.dot(v_x[i - 1], vW)
3966            v_y[i] = np.dot(v_x[i - 1], vWout) + v_y[i - 1]
3967
3968        (theano_dump, theano_x, theano_y) = f4(v_u1, v_u2, v_x0, v_y0, vW_in1)
3969
3970        utt.assert_allclose(theano_x, v_x[-2:])
3971        utt.assert_allclose(theano_y, v_y[-4:])
3972
3973    def test_opt_order(self):
3974        # Verify that scan optimizations are applied before blas
3975        # optimizations.
3976        # This is needed as otherwise, the dot won't become a dot22
3977        # so it will be slower and won't get transferred to the gpu.
3978
3979        x = theano.tensor.matrix('x')
3980        A = theano.tensor.matrix('A')
3981
3982        z, updates = theano.scan(
3983            theano.dot,
3984            sequences=[],
3985            non_sequences=[x, A],
3986            n_steps=2)
3987        f = theano.function([x, A], z)
3988        topo = f.maker.fgraph.toposort()
3989        if theano.config.mode != "FAST_COMPILE":
3990            assert any([isinstance(node.op, tensor.blas.Dot22)
3991                        for node in topo])
3992
3993        vx = np.array([[1., 1.], [2., 2.]], dtype=theano.config.floatX)
3994        vA = np.array([[1., 1.], [1., 0.]], dtype=theano.config.floatX)
3995        vR = np.array([[[2, 1], [4, 2]], [[2, 1], [4, 2]]],
3996                         dtype=theano.config.floatX)
3997        utt.assert_allclose(f(vx, vA), vR)
3998
3999    def test_savemem_opt(self):
4000        y0 = theano.shared(np.ones((2, 10)))
4001        [y1, y2], updates = theano.scan(lambda y: [y, y],
4002                                         outputs_info=[dict(initial=y0,
4003                                                            taps=[-2]), None],
4004                                        n_steps=5)
4005        rval = theano.function([], y2.sum())()
4006
4007    def test_savemem_opt_0_step(self):
4008        # Test a case where the savemem optimization has the opportunity to
4009        # lower the number of steps of a Scan to 0. It tests that the
4010        # optimization doesn't do so since Scan nodes with 0
4011        # steps are not currently supported and doing so would result in a
4012        # crash during the function execution.
4013
4014        def inner_scan_step(x_t_t, h_tm1, w):
4015            return tensor.dot(h_tm1, w) + x_t_t
4016
4017        def outer_scan_step(x_t, w):
4018            h, _ = theano.scan(inner_scan_step,
4019                            sequences=[x_t[1:]],
4020                            outputs_info=[x_t[0]],
4021                            non_sequences=[w],
4022                            strict=True,
4023                            name="the_inner_scan")
4024            return h
4025
4026        def get_outputs(x, w):
4027            features, _ = theano.scan(outer_scan_step,
4028                                    sequences=[x],
4029                                    non_sequences=[w],
4030                                    strict=True,
4031                                    name="the_outer_scan")
4032
4033            return_val =  tensor.grad(features.sum(), w)
4034            return return_val
4035
4036        # Compile the theano function
4037        x = tensor.tensor3('x')
4038        w = tensor.matrix('w')
4039        f = theano.function(inputs=[x, w], outputs=get_outputs(x, w))
4040
4041        # Test the function to ensure it returns valid results
4042        x_value = np.random.random((2, 2, 3)).astype(theano.config.floatX)
4043        w_value = np.random.random((3, 3)).astype(theano.config.floatX)
4044        expected_output = np.tile(x_value[:, 0].sum(0), (3, 1)).transpose()
4045
4046        output = f(x_value, w_value)
4047        utt.assert_allclose(output, expected_output)
4048
4049
4050    def test_grad_multiple_taps_state(self):
4051        # The test is based on the code provided by Timothy Lillicrap
4052
4053        def onestep(xdl, xprev, w):
4054            xnew = w + xprev
4055            return xnew
4056
4057        xinit = tensor.tensor3('xinit')
4058        w = tensor.matrix('w')
4059        (xseq, updates) = theano.scan(
4060            n_steps=10,
4061            fn=onestep,
4062            outputs_info=[dict(initial=xinit, taps=[-4, -1])],
4063            non_sequences=w)
4064        loss = (xseq[-1] ** 2).sum()
4065        cost_fn = theano.function([xinit, w],
4066                                  loss,
4067                                  no_default_updates=True,
4068                                  allow_input_downcast=True)
4069
4070        gw, gx = tensor.grad(loss, [w, xinit])
4071        grad_fn = theano.function([xinit, w], [gx, gw],
4072                                 allow_input_downcast=True)
4073        rng = np.random.RandomState(utt.fetch_seed())
4074        # If numbers are small, the gradients with respect to x are small
4075        # and the numeric differentiation becomes unstable.
4076        # To fix this issue I ensure we are sampling numbers larger in
4077        # absolute value than 1.
4078        v_x = np.array(rng.uniform(size=(5, 2, 2), low=1., high=3.),
4079                           dtype=theano.config.floatX)
4080        # Making some entries to be negative.
4081        pos = rng.uniform(size=(5, 2, 2), low=0., high=1) < .5
4082        v_x[pos] = -1 * v_x[pos]
4083        v_w = np.array(rng.uniform(size=(2, 2), low=1., high=3.),
4084                          dtype=theano.config.floatX)
4085        pos = rng.uniform(size=(2, 2), low=0., high=1.) < .5
4086        v_w[pos] = -1 * v_w[pos]
4087        analytic_grad = grad_fn(v_x, v_w)
4088        num_grad = multiple_outputs_numeric_grad(cost_fn,
4089                                                 [v_x, v_w])
4090        max_err, max_err_pos = num_grad.max_err(analytic_grad)
4091        if max_err > 1e-2:
4092            raise Exception(theano.tensor.verify_grad.E_grad,
4093                            (max_err, 1e-2, max_err_pos,
4094                             analytic_grad[max_err_pos],
4095                             num_grad.gx[max_err_pos]))
4096
4097    def test_grad_numeric_shared(self):
4098        shared_var = theano.shared(np.float32(1.))
4099
4100        def inner_fn():
4101            return [], OrderedDict(
4102                [(shared_var, shared_var + np.float32(1.))])
4103        _, updates = theano.scan(inner_fn,
4104                                 n_steps=10,
4105                                 truncate_gradient=-1,
4106                                 go_backwards=False)
4107        cost = list(updates.values())[0]
4108        g_sh = tensor.grad(cost, shared_var)
4109        fgrad = theano.function([], g_sh)
4110        assert fgrad() == 1
4111
4112    def test_rop_mitmot(self):
4113        # this test is a copy paste from the script given by Justin Bayer to
4114        # reproduce this bug
4115        # We have 2 parameter groups with the following shapes.
4116        W1shape = (1, 3)
4117        W2shape = (3, 3)
4118
4119        n_pars = 1 * 3 + 3 * 3
4120
4121        # Allocate big parameter array.
4122        pars = theano.shared(np.empty(n_pars))
4123
4124        # Assign slices.
4125        W1 = pars[:3].reshape(W1shape)
4126        W2 = pars[3:].reshape(W2shape)
4127
4128        # Define recurrent model. We are using a model where each input is a
4129        # tensor
4130        # of shape (T, B, D) where T is the number of timesteps, B is the
4131        # number of
4132        # sequences iterated over in parallel and D is the dimensionality of
4133        # each
4134        # item at a timestep.
4135
4136        inpt = tensor.tensor3('inpt')
4137        target = tensor.tensor3('target')
4138
4139        # Make these flat in order to be able to use dot products instead of
4140        # tensordot,
4141        # which is slower.
4142        inpt_flat = inpt.reshape((inpt.shape[0] * inpt.shape[1],
4143                                  inpt.shape[2]))
4144        hidden_flat = tensor.dot(inpt_flat, W1)
4145        hidden = hidden_flat.reshape((inpt.shape[0], inpt.shape[1], 3))
4146
4147        transfer = tensor.nnet.sigmoid
4148
4149        hidden_rec, _ = theano.scan(
4150                lambda x, h_tm1: transfer(tensor.dot(h_tm1, W2) + x),
4151                sequences=hidden,
4152                outputs_info=[tensor.zeros_like(hidden[0])])
4153
4154        hidden_rec_flat = hidden_rec.reshape(
4155                    (hidden_rec.shape[0] * hidden_rec.shape[1],
4156                     hidden_rec.shape[2]))
4157
4158        cost = ((hidden_rec - target) ** 2).mean()
4159        d_cost_wrt_pars = tensor.grad(cost, pars)
4160
4161        p = tensor.dvector()
4162        Hp = tensor.Rop(d_cost_wrt_pars, pars, p)
4163
4164    def test_seq_tap_bug_jeremiah(self):
4165        inp = np.arange(10).reshape(-1, 1).astype(theano.config.floatX)
4166        exp_out = np.zeros((10, 1)).astype(theano.config.floatX)
4167        exp_out[4:] = inp[:-4]
4168
4169        def onestep(x, x_tm4):
4170            return x, x_tm4
4171
4172        seq = tensor.matrix()
4173        initial_value = theano.shared(np.zeros((4, 1),
4174                                                  dtype=theano.config.floatX))
4175        outputs_info = [OrderedDict(
4176            [('initial', initial_value), ('taps', [-4])]), None]
4177        results, updates = theano.scan(fn=onestep,
4178                                       sequences=seq,
4179                                       outputs_info=outputs_info)
4180
4181        f = theano.function([seq], results[1])
4182        assert np.all(exp_out == f(inp))
4183
4184    def test_borrow_bug_jeremiah(self):
4185        # This tests two things. The first is a bug occurring when scan wrongly
4186        # used the borrow flag. The second thing it that Scan's infer_shape()
4187        # method will be able to remove the Scan node from the graph in this
4188        # case.
4189
4190        inp = np.arange(10).reshape(-1, 1).astype(theano.config.floatX)
4191        exp_out = np.zeros((10, 1)).astype(theano.config.floatX)
4192        exp_out[4:] = inp[:-4]
4193
4194        def onestep(x, x_tm4):
4195            return x, x_tm4
4196
4197        seq = tensor.matrix()
4198        initial_value = theano.shared(np.zeros((4, 1),
4199                                                  dtype=theano.config.floatX))
4200        outputs_info = [OrderedDict([('initial', initial_value),
4201                                     ('taps', [-4])]), None]
4202        results, _ = theano.scan(fn=onestep,
4203                                       sequences=seq,
4204                                       outputs_info=outputs_info)
4205        sharedvar = theano.shared(np.zeros((1, 1),
4206                                              dtype=theano.config.floatX))
4207        updates = OrderedDict([(sharedvar, results[0][-1:])])
4208
4209        f = theano.function([seq], results[1], updates=updates)
4210
4211        # This fails if scan uses wrongly the borrow flag
4212        assert np.all(exp_out == f(inp))
4213
4214        # This fails if Scan's infer_shape() is unable to remove the Scan
4215        # node from the graph.
4216        f_infershape = theano.function([seq], results[1].shape,
4217                                       mode='FAST_RUN')
4218        scan_nodes_infershape = scan_nodes_from_fct(f_infershape)
4219        assert(len(scan_nodes_infershape) == 0)
4220
4221    def test_memory_reuse_with_outputs_as_inputs(self):
4222        # Test the memory pre-allocation feature in scan for the following
4223        # cases :
4224        #  - An output of the inner graph is also an input of the inner graph
4225        #  - An output of the inner graph is not an input in the unoptimized
4226        #    graph but it could becomes the case in the optimized graph due to
4227        #    the optimizations.
4228        #  - An output of the inner graph is obtained through a view op on an
4229        #    input of the inner graph and the view op is removed by the
4230        #    optimization process
4231        #  - An output of the inner graph is obtained through a view op on an
4232        #    input of the inner graph and the view op is NOT removed by the
4233        #    optimization process
4234        #  - An output of the inner graph is not obtained through any of the
4235        #    previously mentionned cases (standard case)
4236
4237        def inner_fn(tap_m3, tap_m2, tap_m1):
4238            return (tap_m2, (tap_m1 * 1),
4239                    theano.gradient.disconnected_grad(tap_m2),
4240                    theano.tensor.opt.assert_(tap_m2, 1),
4241                    tap_m3 + tap_m2 + tap_m1)
4242
4243        init = theano.tensor.matrix()
4244        outputs_info = [None, None, None, None,
4245                        dict(initial=init, taps=[-3, -2, -1])]
4246
4247        out, _ = theano.scan(inner_fn, outputs_info=outputs_info, n_steps=3)
4248        fct = theano.function([init], out)
4249
4250        # Compare obtained outputs with expected outputs
4251        floatX = theano.config.floatX
4252        outputs = fct(np.arange(9, dtype=floatX).reshape(3,3))
4253
4254        states = np.array([[0, 1, 2],
4255                              [3, 4, 5],
4256                              [6, 7, 8],
4257                              [9, 12, 15],
4258                              [18, 23, 28],
4259                              [33, 42, 51]],dtype=floatX)
4260        expected_outputs = [states[1:4], states[2:5], states[1:4],
4261                            states[1:4], states[3:6]]
4262
4263        utt.assert_allclose(outputs, expected_outputs)
4264
4265    def test_grad_connectivity_matrix(self):
4266        def inner_fn(x_tm1, y_tm1, z_tm1):
4267            x_tm1.name = 'x'
4268            y_tm1.name = 'y'
4269            z_tm1.name = 'z'
4270            return x_tm1 ** 2, y_tm1, x_tm1 + 1
4271        x0 = tensor.vector('X')
4272        y0 = tensor.vector('y0')
4273        z0 = tensor.vector('Z')
4274        [x, y, z], _ = theano.scan(inner_fn,
4275                                 outputs_info=[x0, y0, z0],
4276                                 n_steps=10)
4277        cost = (x + y + z).sum()
4278
4279        gx0 = tensor.grad(cost, x0)  # defined
4280        gy0 = tensor.grad(cost, y0)  # defined
4281        self.assertRaises(ValueError, tensor.grad, cost, z0)
4282        cost = x.sum()
4283        self.assertRaises(ValueError, tensor.grad, cost, y0)
4284
4285    def test_disconnected_gradient(self):
4286        v = tensor.vector('v')
4287        m = tensor.matrix('m')
4288        u0 = tensor.zeros((7,))
4289
4290        [u, m2], _ = theano.scan(lambda _, u: [u, v],
4291                                 sequences=m,
4292                                 outputs_info=[u0, None])
4293        # This used to raise an exception with older versions because for a
4294        # disconnected gradient a non disconnected type was returned
4295        tensor.grad((m * m2).sum(), v)
4296
4297    def test_disconnected_gradient2(self):
4298        v = tensor.vector('v')
4299        m = tensor.matrix('m')
4300        u0 = tensor.zeros((7,))
4301
4302        [u, m2], _ = theano.scan(lambda x, u: [x+u, u+v],
4303                                 sequences=m,
4304                                 outputs_info=[u0, None])
4305        # This used to raise an exception with older versions because
4306        # scan could not detect the connection between `m2` and `x`
4307        tensor.grad(m2.sum(), m)
4308
4309    def test_disconnected_gradient3(self):
4310        # This tests for a crash that would occur sometimes when taking the
4311        # gradient through a scan with a non-recurrent output which would
4312        # receive a disconnected gradient
4313
4314        v = tensor.dvector('v')
4315
4316        def step(seq):
4317            out1 = seq + 1
4318            out2 = out1 + 1
4319            return out1, out2
4320
4321        [out1, out2], _ = theano.scan(step, sequences=v)
4322        gv = tensor.grad(out2.sum(), [v])
4323        f = theano.function([v], gv)
4324
4325        # Ensure the output of the function is valid
4326        output = f(np.random.random(5))
4327        utt.assert_allclose(output, np.ones(5))
4328
4329    def test_dot_optimization(self):
4330        A = tensor.matrix('A')
4331        B = tensor.matrix('B')
4332        S, _ = theano.scan(lambda x1, x2, u: u + tensor.dot(x1, x2),
4333                           sequences=[A.dimshuffle(0, 1, 'x'),
4334                                        B.dimshuffle(0, 'x', 1)],
4335                           outputs_info=[tensor.zeros_like(A)])
4336        f = theano.function([A, B], S.owner.inputs[0][-1])
4337        rng = np.random.RandomState(utt.fetch_seed())
4338        vA = rng.uniform(size=(5, 5)).astype(theano.config.floatX)
4339        vB = rng.uniform(size=(5, 5)).astype(theano.config.floatX)
4340        utt.assert_allclose(f(vA, vB), np.dot(vA.T, vB))
4341
4342    def test_pregreedy_optimizer(self):
4343        W = tensor.zeros((5, 4))
4344        bv = tensor.zeros((5,))
4345        bh = tensor.zeros((4,))
4346        v = tensor.matrix('v')
4347        (bv_t, bh_t), _ = theano.scan(lambda _: [bv, bh], sequences=v,
4348                                      outputs_info=[None, None])
4349        chain, _ = theano.scan(
4350            lambda x: tensor.dot(tensor.dot(x, W) + bh_t, W.T) + bv_t,
4351            outputs_info=v,
4352            n_steps=2)
4353        theano.function([v], chain)(np.zeros((3, 5),
4354                                                dtype=theano.config.floatX))
4355
4356    def test_savemem_does_not_duplicate_number_of_scan_nodes(self):
4357        var = tensor.ones(())
4358        values, _ = theano.scan(lambda x: ([x], (),
4359                                           theano.scan_module.until(x)),
4360                                outputs_info=[var], n_steps=2)
4361
4362        tmp_fn = theano.function([var], values)
4363        scan_nodes = [x for x in tmp_fn.maker.fgraph.toposort()
4364                      if isinstance(x.op,
4365                                    theano.scan_module.scan_op.Scan)]
4366        assert len(scan_nodes) == 1
4367
4368    def test_eliminate_seqs(self):
4369        U = tensor.vector('U')
4370        sh = theano.shared(asarrayX(2.))
4371        x1 = tensor.vector('x1')
4372        x2 = tensor.scalar('x2')
4373
4374        def rec_fn(*args):
4375            u_t = args[0]
4376            return [(u_t + 1,  # mitsot
4377                     u_t + 2,  # sitsot
4378                     u_t + 3),  # nitsot
4379                    {sh: u_t + 4}]  # shared
4380
4381        [X1, X2, X3], updates = theano.scan(
4382            rec_fn,
4383            U,
4384            [dict(initial=x1, taps=[-1, -3]), x2, None],
4385            n_steps=None,
4386            truncate_gradient=-1,
4387            go_backwards=False)
4388        f = theano.function([U, x1, x2], [X1, X2, X3],
4389                            updates=updates,
4390                            mode=theano.Mode(linker='py'),
4391                            allow_input_downcast=True)
4392        rng = np.random.RandomState(utt.fetch_seed())
4393        v_u = asarrayX(rng.uniform(size=(5,)))
4394        outs = f(v_u, [0, 0, 0], 0)
4395        utt.assert_allclose(outs[0], v_u + 1)
4396        utt.assert_allclose(outs[1], v_u + 2)
4397        utt.assert_allclose(outs[2], v_u + 3)
4398        utt.assert_allclose(sh.get_value(), v_u[-1] + 4)
4399
4400    def test_eliminate_nonseqs(self):
4401        W = tensor.scalar('W')
4402        sh = theano.shared(asarrayX(2.))
4403        x1 = tensor.vector('x1')
4404        x2 = tensor.scalar('x2')
4405
4406        def rec_fn(*args):
4407            w = args[-1]
4408            return [(w + 1.,  # mitsot
4409                     w + 2.,  # sitsot
4410                     w + 3.),  # nitsot
4411                    {sh: w + 4.}]  # shared
4412
4413        [X1, X2, X3], updates = theano.scan(
4414            rec_fn,
4415            [],
4416            [dict(initial=x1, taps=[-1, -3]), x2, None],
4417            W,
4418            n_steps=5,
4419            truncate_gradient=-1,
4420            go_backwards=False)
4421        f = theano.function([W, x1, x2], [X1, X2, X3],
4422                            updates=updates,
4423                            mode=theano.Mode(linker='py'),
4424                            allow_input_downcast=True)
4425        rng = np.random.RandomState(utt.fetch_seed())
4426        v_w = asarrayX(rng.uniform())
4427        outs = f(v_w, [0, 0, 0], 0)
4428        utt.assert_allclose(outs[0], v_w + 1)
4429        utt.assert_allclose(outs[1], v_w + 2)
4430        utt.assert_allclose(outs[2], v_w + 3)
4431        utt.assert_allclose(sh.get_value(), v_w + 4)
4432
4433    def test_grad_bug_disconnected_input(self):
4434        W = theano.shared(np.zeros((3, 3)), name='W')
4435        v = theano.tensor.ivector(name='v')
4436        y, _ = theano.scan(lambda i, W: W[i], sequences=v, outputs_info=None, non_sequences=W)
4437
4438        # This used to raise an exception
4439        f = theano.function([v], theano.tensor.grad(y.sum(), W))
4440        utt.assert_allclose(f([1, 2]), [[0, 0, 0], [1, 1, 1], [1, 1, 1]])
4441
4442    def test_clone(self):
4443        def test(x, y, mention_y):
4444            if mention_y:
4445                d = 0.1 + 0 * y
4446            else:
4447                d = 0.1
4448            out = theano.clone(y, replace={x: x + d})
4449            # theano.printing.debugprint(out)
4450            return theano.function([], out)()
4451
4452        x = theano.shared(np.asarray(0., dtype=theano.config.floatX))
4453        utt.assert_allclose(test(x, tensor.sum((x+1)**2), mention_y=False),
4454                              1.21000003815)
4455        utt.assert_allclose(test(x, tensor.sum((x+1)**2), mention_y=True),
4456                              1.21000003815)
4457
4458    def test_grad_find_input(self):
4459        w = theano.shared(np.array(0, dtype='float32'), name='w')
4460        init = tensor.fscalar('init')
4461
4462        out, _ = theano.scan(
4463                fn=lambda prev: w,
4464                outputs_info=init,
4465                n_steps=2,
4466        )
4467        tensor.grad(out[-1], w)
4468
4469    def test_scan_merge_nodes(self):
4470        inps = tensor.vector()
4471        state = tensor.scalar()
4472        y1, _ = theano.scan(lambda x, y: x*y,
4473                            sequences=inps,
4474                            outputs_info=state,
4475                            n_steps=5)
4476
4477        y2, _ = theano.scan(lambda x, y : (x+y, theano.scan_module.until(x > 0)),
4478                            sequences=inps,
4479                            outputs_info=state,
4480                            n_steps=5)
4481        scan_node1 = y1.owner.inputs[0].owner
4482        assert isinstance(scan_node1.op, theano.scan_module.scan_op.Scan)
4483        scan_node2 = y2.owner.inputs[0].owner
4484        assert isinstance(scan_node2.op, theano.scan_module.scan_op.Scan)
4485        opt_obj = theano.scan_module.scan_opt.ScanMerge()
4486        # Test the method belongs_to of this class. Specifically see if it
4487        # detects the two scan_nodes as not being similar
4488        assert not opt_obj.belongs_to_set(scan_node1, [scan_node2])
4489        assert not opt_obj.belongs_to_set(scan_node2, [scan_node1])
4490
4491    def test_remove_constants_and_unused_inputs_scan_non_seqs(self):
4492        # Test the opt remove_constants_and_unused_inputs_scan for
4493        # non sequences.
4494        W = theano.tensor.matrix(name='W')
4495        v = theano.tensor.ivector(name='v')
4496        y1, _ = theano.scan(lambda i, W: W[i], sequences=v,
4497                            outputs_info=None, non_sequences=[W])
4498        y2, _ = theano.scan(lambda i, _, W: W[i], sequences=v,
4499                            outputs_info=None, non_sequences=[W[0], W])
4500        y3, _ = theano.scan(lambda i, W, _: W[i], sequences=v,
4501                            outputs_info=None, non_sequences=[W, W[0]])
4502        y4, _ = theano.scan(lambda i, _, _2, W: W[i], sequences=v,
4503                            outputs_info=None, non_sequences=[W[0], W[0], W])
4504        y5, _ = theano.scan(lambda i, _, W, _2: W[i], sequences=v,
4505                            outputs_info=None, non_sequences=[W[0], W, W[0]])
4506        y6, _ = theano.scan(lambda i, W, _, _2: W[i], sequences=v,
4507                            outputs_info=None, non_sequences=[W, W[0], W[0]])
4508        # TODO: y7 have problem during run time. I think it should
4509        # raise an error during the scan construction.
4510        # y7, _ = theano.scan(lambda i, W, _, _2: W[i], sequences=v,
4511        #                    outputs_info=None, non_sequences=[v, W[0], W])
4512        for out in [y1, y2, y3, y4, y5, y6]:
4513            # This used to raise an exception
4514            f = theano.function([W, v], out, mode=mode_with_opt)
4515            f(np.zeros((3, 3), dtype=theano.config.floatX), [1, 2])
4516
4517            scan_nodes = scan_nodes_from_fct(f)
4518            assert len(scan_nodes) == 1
4519            scan_node = scan_nodes[0]
4520
4521            # The first input is the number of iteration.
4522            assert (len(scan_node.inputs[1:]) ==
4523                    len(set(scan_node.inputs[1:])))
4524            inp = scan_node.op.inner_non_seqs(scan_node.op.inputs)
4525            assert len(inp) == 1
4526            assert (len(inp) == len(set(inp)))
4527            inp = scan_node.op.outer_non_seqs(scan_node)
4528            assert len(inp) == 1
4529            assert (len(inp) == len(set(inp)))
4530
4531    def test_remove_constants_and_unused_inputs_scan_seqs(self):
4532        # Test the opt remove_constants_and_unused_inputs_scan for sequences.
4533        W = theano.tensor.matrix(name='W')
4534        v = theano.tensor.ivector(name='v')
4535        vv = theano.tensor.matrix(name='vv')
4536        y1, _ = theano.scan(lambda i, W: W[i], sequences=v,
4537                            outputs_info=None, non_sequences=[W])
4538        y2, _ = theano.scan(lambda i, _, W: W[i], sequences=[v, v],
4539                            outputs_info=None, non_sequences=W)
4540        y3, _ = theano.scan(lambda i, _, W: W[i], sequences=[v, vv[0]],
4541                            outputs_info=None, non_sequences=W)
4542        y4, _ = theano.scan(lambda _, i, W: W[i], sequences=[vv[0], v],
4543                            outputs_info=None, non_sequences=W)
4544        y5, _ = theano.scan(lambda _, i, _2, W: W[i], sequences=[vv, v, vv[0]],
4545                            outputs_info=None, non_sequences=W)
4546        y6, _ = theano.scan(lambda _, _2, i, W: W[i], sequences=[vv[0], vv, v],
4547                            outputs_info=None, non_sequences=W)
4548        y7, _ = theano.scan(lambda i, _, _2, W: W[i],
4549                            sequences=[v, vv[0], vv[0]],
4550                            outputs_info=None, non_sequences=W)
4551        y8, _ = theano.scan(lambda _, i, W, _2, _3: W[i], sequences=[vv[0], v],
4552                            outputs_info=None, non_sequences=[W, W[0], W[0]])
4553        for out in [y1, y2, y3, y4, y5, y6, y7, y8]:
4554            # This used to raise an exception
4555            f = theano.function([W, v, vv], out, on_unused_input='ignore',
4556                                mode=mode_with_opt)
4557            f(np.zeros((3, 3), theano.config.floatX),
4558              [1, 2],
4559              np.zeros((3, 3), theano.config.floatX))
4560
4561            scan_nodes = scan_nodes_from_fct(f)
4562            assert len(scan_nodes) == 1
4563            scan_node = scan_nodes[0]
4564
4565            # The first input is the number of iteration.
4566            assert (len(scan_node.inputs[1:]) ==
4567                    len(set(scan_node.inputs[1:])))
4568            inp = scan_node.op.inner_seqs(scan_node.op.inputs)
4569            assert len(inp) == 1
4570            inp = scan_node.op.outer_seqs(scan_node)
4571            assert len(inp) == 1
4572            inp = scan_node.op.inner_non_seqs(scan_node.op.inputs)
4573            assert len(inp) == 1
4574            inp = scan_node.op.outer_non_seqs(scan_node)
4575            assert len(inp) == 1
4576
4577    @attr('slow')
4578    def test_hessian_bug_grad_grad_two_scans(self):
4579        # Bug reported by Bitton Tenessi
4580        # NOTE : The test to reproduce the bug reported by Bitton Tenessi
4581        # was modified from its original version to be faster to run.
4582
4583        W = tensor.fvector(name='W')
4584        n_steps = tensor.iscalar(name='Nb_steps')
4585
4586        def loss_outer(sum_outer, W):
4587
4588            def loss_inner(sum_inner, W):
4589
4590                return sum_inner + (W**2).sum()
4591
4592            result_inner, _ = theano.scan(
4593                fn=loss_inner,
4594                outputs_info=tensor.as_tensor_variable(
4595                    np.asarray(0, dtype=np.float32)),
4596                non_sequences=[W],
4597                n_steps=1,
4598            )
4599            return sum_outer + result_inner[-1]
4600        # Also test return_list for that case.
4601        result_outer, _ = theano.scan(
4602            fn=loss_outer,
4603            outputs_info=tensor.as_tensor_variable(
4604                np.asarray(0, dtype=np.float32)),
4605            non_sequences=[W],
4606            n_steps=n_steps,
4607            return_list=True,
4608        )
4609
4610        cost = result_outer[0][-1]
4611        H = theano.gradient.hessian(cost, W)
4612        print(".", file=sys.stderr)
4613        f = theano.function([W, n_steps], H)
4614        f(np.ones((8,), dtype='float32'), 1)
4615
4616    def test_strict_mode(self):
4617        n = 10
4618
4619        w = np.array([[-1, 2], [3, -4]]).astype(theano.config.floatX)
4620        w_ = theano.shared(w)
4621        x0 = np.array([1, 2]).astype(theano.config.floatX)
4622        x0_ = tensor.vector(name='x0', dtype=theano.config.floatX)
4623
4624        def _scan_loose(x):
4625            return tensor.dot(x, w_)
4626
4627        def _scan_strict(x, w_ns):
4628            return tensor.dot(x, w_ns)
4629
4630        ret_loose = theano.scan(_scan_loose,
4631                              sequences=[],
4632                              outputs_info=[x0_],
4633                              n_steps=n,
4634                              strict=False)
4635        f_loose = theano.function([x0_], ret_loose[0][-1])
4636
4637        ret_strict = theano.scan(_scan_strict,
4638                               sequences=[],
4639                               outputs_info=[x0_],
4640                               non_sequences=[w_],
4641                               n_steps=n,
4642                               strict=True)
4643        f_strict = theano.function([x0_], ret_strict[0][-1])
4644
4645        result_loose = f_loose(x0)
4646        result_strict = f_strict(x0)
4647
4648        diff = (abs(result_loose - result_strict)).mean()
4649
4650        assert diff <= type_eps[theano.config.floatX]
4651
4652    @raises(theano.gof.fg.MissingInputError)
4653    def test_strict_mode_ex(self):
4654        n = 10
4655
4656        w = np.array([[-1, 2], [3, -4]]).astype(theano.config.floatX)
4657        w_ = theano.shared(w)
4658        x0 = np.array([1, 2]).astype(theano.config.floatX)
4659        x0_ = tensor.vector(name='x0', dtype=theano.config.floatX)
4660
4661        def _scan_loose(x):
4662            return tensor.dot(x, w_)
4663
4664        ret_strict = theano.scan(_scan_loose,
4665                                 sequences=[],
4666                                 outputs_info=[x0_],
4667                                 n_steps=n,
4668                                 strict=True)
4669
4670    def test_monitor_mode(self):
4671        # Test that it is possible to pass an instance of MonitorMode
4672        # to the inner function
4673        k = tensor.iscalar("k")
4674        A = tensor.vector("A")
4675
4676        # Build a MonitorMode that counts how many values are greater than 10
4677        def detect_large_outputs(i, node, fn):
4678            for output in fn.outputs:
4679                if isinstance(output[0], np.ndarray):
4680                    detect_large_outputs.large_count += (output[0] > 10).sum()
4681        detect_large_outputs.large_count = 0
4682
4683        mode = theano.compile.MonitorMode(post_func=detect_large_outputs)
4684
4685        # Symbolic description of the result
4686        result, updates = theano.scan(
4687            fn=lambda prior_result, A: prior_result * A,
4688            outputs_info=tensor.ones_like(A),
4689            non_sequences=A,
4690            n_steps=k,
4691            mode=mode)
4692
4693        final_result = result[-1]
4694
4695        f = theano.function(inputs=[A, k],
4696                            outputs=final_result,
4697                            updates=updates)
4698        f(np.asarray([2, 3, .1, 0, 1], dtype=theano.config.floatX), 4)
4699
4700        # There should be 3 outputs greater than 10: prior_result[0] at step 3,
4701        # and prior_result[1] at steps 2 and 3.
4702        if theano.config.mode in ["DEBUG_MODE", "DebugMode"]:
4703            # DebugMode will run all the intermediate nodes, so we
4704            # should expect a multiple of 3, not exactly 3.
4705            assert detect_large_outputs.large_count % 3 == 0
4706
4707        else:
4708            assert detect_large_outputs.large_count == 3
4709
4710
4711class ScanGpuTests:
4712    """
4713    This class defines a number of tests for Scan on GPU as well as a few
4714    helper functions for these tests. The GPU tests defined in this class are
4715    independent of the GPU backend used. Because of this, a class inheriting
4716    from ScanGpuTests should define the following attributes and methods to
4717    make the tests run on a specific backend :
4718    - self.gpu_backend : Reference to the backend module
4719    - self.mode_with_opt : Compilation mode to force usage of the gpu backend
4720    - self.is_scan_on_gpu(node) : Method to determine is a scan node has been
4721                                  moved to run on a gpu under the specific
4722                                  backend. Returns a boolean.
4723    """
4724    def test_one_sequence_one_output_weights_gpu1(self):
4725
4726        def f_rnn(u_t, x_tm1, W_in, W):
4727            return u_t * W_in + x_tm1 * W
4728
4729        u = theano.tensor.fvector('u')
4730        x0 = theano.tensor.fscalar('x0')
4731        W_in = theano.tensor.fscalar('win')
4732        W = theano.tensor.fscalar('w')
4733
4734        # The following line is needed to have the first case being used
4735        # Otherwise, it is the second that is tested.
4736        mode = self.mode_with_gpu.excluding('InputToGpuOptimizer')
4737        output, updates = theano.scan(f_rnn,
4738                                      u,
4739                                      x0,
4740                                      [W_in, W],
4741                                      n_steps=None,
4742                                      truncate_gradient=-1,
4743                                      go_backwards=False,
4744                                      mode=mode)
4745
4746        output = self.gpu_backend.gpu_from_host(output)
4747        f2 = theano.function([u, x0, W_in, W],
4748                             output,
4749                             updates=updates,
4750                             allow_input_downcast=True,
4751                             mode=self.mode_with_gpu)
4752
4753        # get random initial values
4754        rng = np.random.RandomState(utt.fetch_seed())
4755        v_u = rng.uniform(size=(4,), low=-5., high=5.)
4756        v_x0 = rng.uniform()
4757        W = rng.uniform()
4758        W_in = rng.uniform()
4759
4760        v_u = np.asarray(v_u, dtype='float32')
4761        v_x0 = np.asarray(v_x0, dtype='float32')
4762        W = np.asarray(W, dtype='float32')
4763        W_in = np.asarray(W_in, dtype='float32')
4764
4765        # compute the output in numpy
4766        v_out = np.zeros((4,))
4767        v_out[0] = v_u[0] * W_in + v_x0 * W
4768        for step in xrange(1, 4):
4769            v_out[step] = v_u[step] * W_in + v_out[step - 1] * W
4770        theano_values = f2(v_u, v_x0, W_in, W)
4771        utt.assert_allclose(theano_values, v_out)
4772
4773        # TO DEL
4774        topo = f2.maker.fgraph.toposort()
4775        scan_node = [node for node in topo
4776                     if isinstance(node.op, theano.scan_module.scan_op.Scan)]
4777        assert len(scan_node) == 1
4778        scan_node = scan_node[0]
4779
4780        topo = f2.maker.fgraph.toposort()
4781        assert sum([isinstance(node.op, self.gpu_backend.HostFromGpu)
4782                    for node in topo]) == 0
4783        assert sum([isinstance(node.op, self.gpu_backend.GpuFromHost)
4784                    for node in topo]) == 4
4785
4786        scan_node = [node for node in topo
4787                     if isinstance(node.op, theano.scan_module.scan_op.Scan)]
4788        assert len(scan_node) == 1
4789        scan_node = scan_node[0]
4790        scan_node_topo = scan_node.op.fn.maker.fgraph.toposort()
4791
4792        # check that there is no gpu transfer in the inner loop.
4793        assert any([isinstance(node.op, self.gpu_backend.GpuElemwise)
4794                    for node in scan_node_topo])
4795        assert not any([isinstance(node.op, self.gpu_backend.HostFromGpu)
4796                        for node in scan_node_topo])
4797        assert not any([isinstance(node.op, self.gpu_backend.GpuFromHost)
4798                        for node in scan_node_topo])
4799
4800    # This second version test the second case in the optimizer to the gpu.
4801    def test_one_sequence_one_output_weights_gpu2(self):
4802
4803        def f_rnn(u_t, x_tm1, W_in, W):
4804            return u_t * W_in + x_tm1 * W
4805
4806        u = theano.tensor.fvector('u')
4807        x0 = theano.tensor.fscalar('x0')
4808        W_in = theano.tensor.fscalar('win')
4809        W = theano.tensor.fscalar('w')
4810        output, updates = theano.scan(f_rnn,
4811                                      u,
4812                                      x0,
4813                                      [W_in, W],
4814                                      n_steps=None,
4815                                      truncate_gradient=-1,
4816                                      go_backwards=False,
4817                                      mode=self.mode_with_gpu)
4818
4819        f2 = theano.function([u, x0, W_in, W],
4820                             output,
4821                             updates=updates,
4822                             allow_input_downcast=True,
4823                             mode=self.mode_with_gpu)
4824
4825        # get random initial values
4826        rng = np.random.RandomState(utt.fetch_seed())
4827        v_u = rng.uniform(size=(4,), low=-5., high=5.)
4828        v_x0 = rng.uniform()
4829        W = rng.uniform()
4830        W_in = rng.uniform()
4831
4832        # compute the output in numpy
4833        v_out = np.zeros((4,))
4834        v_out[0] = v_u[0] * W_in + v_x0 * W
4835        for step in xrange(1, 4):
4836            v_out[step] = v_u[step] * W_in + v_out[step - 1] * W
4837        theano_values = f2(v_u, v_x0, W_in, W)
4838        utt.assert_allclose(theano_values, v_out)
4839
4840        topo = f2.maker.fgraph.toposort()
4841        assert sum([isinstance(node.op, self.gpu_backend.HostFromGpu)
4842                    for node in topo]) == 1
4843        assert sum([isinstance(node.op, self.gpu_backend.GpuFromHost)
4844                    for node in topo]) == 4
4845
4846        scan_node = [node for node in topo
4847                     if isinstance(node.op, theano.scan_module.scan_op.Scan)]
4848        assert len(scan_node) == 1
4849        scan_node = scan_node[0]
4850        scan_node_topo = scan_node.op.fn.maker.fgraph.toposort()
4851
4852        # check that there is no gpu transfer in the inner loop.
4853        assert any([isinstance(node.op, self.gpu_backend.GpuElemwise)
4854                    for node in scan_node_topo])
4855        assert not any([isinstance(node.op, self.gpu_backend.HostFromGpu)
4856                        for node in scan_node_topo])
4857        assert not any([isinstance(node.op, self.gpu_backend.GpuFromHost)
4858                        for node in scan_node_topo])
4859
4860    # This third test checks that scan can deal with a mixture of dtypes as
4861    # outputs when is running on GPU
4862    def test_gpu3_mixture_dtype_outputs(self):
4863
4864        def f_rnn(u_t, x_tm1, W_in, W):
4865            return (u_t * W_in + x_tm1 * W,
4866                    tensor.cast(u_t + x_tm1, 'int64'))
4867
4868        u = theano.tensor.fvector('u')
4869        x0 = theano.tensor.fscalar('x0')
4870        W_in = theano.tensor.fscalar('win')
4871        W = theano.tensor.fscalar('w')
4872        output, updates = theano.scan(f_rnn,
4873                                      u,
4874                                      [x0, None],
4875                                      [W_in, W],
4876                                      n_steps=None,
4877                                      truncate_gradient=-1,
4878                                      go_backwards=False,
4879                                      mode=self.mode_with_gpu)
4880
4881        f2 = theano.function([u, x0, W_in, W],
4882                             output,
4883                             updates=updates,
4884                             allow_input_downcast=True,
4885                             mode=self.mode_with_gpu)
4886
4887        # get random initial values
4888        rng = np.random.RandomState(utt.fetch_seed())
4889        v_u = rng.uniform(size=(4,), low=-5., high=5.)
4890        v_x0 = rng.uniform()
4891        W = rng.uniform()
4892        W_in = rng.uniform()
4893
4894        # compute the output in numpy
4895        v_out1 = np.zeros((4,))
4896        v_out2 = np.zeros((4,), dtype='int64')
4897        v_out1[0] = v_u[0] * W_in + v_x0 * W
4898        v_out2[0] = v_u[0] + v_x0
4899        for step in xrange(1, 4):
4900            v_out1[step] = v_u[step] * W_in + v_out1[step - 1] * W
4901            v_out2[step] = np.int64(v_u[step] + v_out1[step - 1])
4902
4903        theano_out1, theano_out2 = f2(v_u, v_x0, W_in, W)
4904        utt.assert_allclose(theano_out1, v_out1)
4905        utt.assert_allclose(theano_out2, v_out2)
4906
4907        topo = f2.maker.fgraph.toposort()
4908        scan_node = [node for node in topo
4909                     if isinstance(node.op, theano.scan_module.scan_op.Scan)]
4910        assert len(scan_node) == 1
4911        scan_node = scan_node[0]
4912        assert self.is_scan_on_gpu(scan_node)
4913
4914    def test_gibbs_chain(self):
4915        rng = np.random.RandomState(utt.fetch_seed())
4916        v_vsample = np.array(rng.binomial(1, .5, size=(3, 20),),
4917                                dtype='float32')
4918        vsample = theano.shared(v_vsample)
4919        trng = theano.sandbox.rng_mrg.MRG_RandomStreams(
4920            utt.fetch_seed())
4921
4922        def f(vsample_tm1):
4923            return trng.binomial(vsample_tm1.shape, n=1, p=0.3,
4924                                 dtype='float32') * vsample_tm1
4925
4926        theano_vsamples, updates = theano.scan(f,
4927                                               [],
4928                                               vsample,
4929                                               [],
4930                                               n_steps=10,
4931                                               truncate_gradient=-1,
4932                                               go_backwards=False,
4933                                               mode=self.mode_with_gpu)
4934        my_f = theano.function([],
4935                               theano_vsamples[-1],
4936                               updates=updates,
4937                               allow_input_downcast=True,
4938                               mode=self.mode_with_gpu)
4939
4940        # I leave this to tested by debugmode, this test was anyway more of
4941        # doest the graph compile kind of test
4942        t_result = my_f()
4943
4944    def test_gpu_memory_usage(self):
4945        # This test validates that the memory usage of the defined theano
4946        # function is reasonnable when executed on the GPU. It checks for
4947        # a bug in which one of scan's optimization was not applied which
4948        # made the scan node compute large and unnecessary outputs which
4949        # brought memory usage on the GPU to ~12G.
4950
4951        # Dimensionality of input and output data (not one-hot coded)
4952        n_in = 100
4953        n_out = 100
4954        # Number of neurons in hidden layer
4955        n_hid = 4000
4956
4957        # Number of minibatches
4958        mb_size = 2
4959        # Time steps in minibatch
4960        mb_length = 200
4961
4962        # Define input variables
4963        xin = tensor.ftensor3(name='xin')
4964        yout = tensor.ftensor3(name='yout')
4965
4966        # Initialize the network parameters
4967        floatX = theano.config.floatX
4968        U = theano.shared(np.zeros((n_in, n_hid), dtype="float32"),
4969                        name='W_xin_to_l1')
4970        V = theano.shared(np.zeros((n_hid, n_hid), dtype="float32"),
4971                        name='W_l1_to_l1')
4972        W = theano.shared(np.zeros((n_hid, n_out), dtype="float32"),
4973                        name='W_l1_to_l2')
4974        nparams = [U, V, W]
4975
4976        # Build the forward pass
4977        l1_base = tensor.dot(xin, U)
4978
4979        def scan_l(baseline, last_step):
4980            return baseline + tensor.dot(last_step, V)
4981
4982        zero_output = tensor.alloc(np.asarray(0., dtype="float32"),
4983                                   mb_size, n_hid)
4984
4985        l1_out, _ = theano.scan(scan_l, sequences=[l1_base],
4986                                outputs_info=[zero_output],
4987                                mode=self.mode_with_gpu_nodebug)
4988
4989        l2_out = tensor.dot(l1_out, W)
4990
4991        # Compute the cost and take the gradient wrt params
4992        cost = tensor.sum((l2_out - yout) ** 2)
4993        grads = tensor.grad(cost, nparams)
4994        updates = list(zip(nparams, (n - g for n, g in zip(nparams, grads))))
4995
4996        # Compile the theano function
4997        feval_backprop = theano.function([xin, yout], cost, updates=updates,
4998                                         mode=self.mode_with_gpu_nodebug)
4999
5000        # Validate that the PushOutScanOutput optimization has been applied
5001        # by checking the number of outputs of the grad Scan node in the
5002        # compiled function.
5003        nodes = feval_backprop.maker.fgraph.toposort()
5004        scan_nodes = [n for n in nodes if isinstance(
5005                      n.op, theano.scan_module.scan_op.Scan)]
5006
5007        # The grad scan is always the 2nd one according to toposort. If the
5008        # optimization has been applied, it has 2 outputs, otherwise 3.
5009        grad_scan_node = scan_nodes[1]
5010        assert len(grad_scan_node.outputs) == 2, len(grad_scan_node.outputs)
5011
5012        # Call the theano function to ensure the absence of a memory error
5013        feval_backprop(np.zeros((mb_length, mb_size, n_in),
5014                                   dtype="float32"),
5015                       np.zeros((mb_length, mb_size, n_out),
5016                                   dtype="float32"))
5017
5018    def test_memory_reuse_gpudimshuffle(self):
5019        # Test the memory pre-allocation feature in scan when one output is
5020        # the result of a GpuDimshuffle (because an optimization in
5021        # GpuDimshuffle can cause issues with the memory pre-allocation
5022        # where it falsely thinks that a pre-allocated memory region has
5023        # been used when it hasn't).
5024        def inner_fn(seq1, recurrent_out):
5025            temp = seq1 + recurrent_out.sum()
5026            output1 = temp.dimshuffle(1, 0)
5027            output2 = temp.sum() + recurrent_out
5028            return output1, output2
5029
5030        input1 = theano.tensor.ftensor3()
5031        init = theano.tensor.ftensor3()
5032        outputs_info = [None, init]
5033
5034        out, _ = theano.scan(inner_fn, sequences=[input1],
5035                             outputs_info=outputs_info,
5036                             mode=self.mode_with_gpu)
5037
5038        out1 = out[0].flatten()
5039        out2 = out[1].flatten()
5040
5041        fct = theano.function([input1, init], [out1, out2],
5042                              mode=self.mode_with_gpu)
5043
5044        output = fct(np.ones((2, 1, 1), dtype="float32"),
5045                     np.ones((1, 1, 1), dtype="float32"))
5046
5047        expected_output = (np.array([2, 4], dtype="float32"),
5048                           np.array([3, 7], dtype="float32"))
5049        utt.assert_allclose(output, expected_output)
5050
5051
5052class T_Scan_Gpuarray(unittest.TestCase, ScanGpuTests):
5053    """
5054    This class takes the gpu tests for scan that are defined in
5055    class ScanGpuTests and runs them using the gpuarray backend.
5056    """
5057
5058    def __init__(self, *args, **kwargs):
5059        from theano import gpuarray
5060        self.gpu_backend = gpuarray
5061
5062        # This is unfortunate, but required
5063        def gpu_from_host(v):
5064            return gpuarray.GpuFromHost(None)(v)
5065        self.gpu_backend.gpu_from_host = gpu_from_host
5066
5067        self.mode_with_gpu = mode_with_opt.including('gpuarray', 'scan')
5068        self.mode_with_gpu_nodebug = mode_nodebug.including('gpuarray', 'scan')
5069        super(T_Scan_Gpuarray, self).__init__(*args, **kwargs)
5070
5071    def setUp(self):
5072        # Make sure to activate the new backend, if possible otherwise
5073        # tesing this class directly will always skip.
5074        import theano.gpuarray.tests.config
5075        # Skip the test if pygpu is not available
5076        if not self.gpu_backend.pygpu_activated:
5077            raise SkipTest('Optional package pygpu disabled')
5078
5079        utt.seed_rng()
5080        super(T_Scan_Gpuarray, self).setUp()
5081
5082    def is_scan_on_gpu(self, node):
5083        return node.op.info.get('gpua', False)
5084
5085
5086
5087def test_speed():
5088    #
5089    # This function prints out the speed of very simple recurrent
5090    # calculations implemented in various ways.  In DebugMode this will
5091    # test the correctness of the optimizations applied, but generally
5092    # correctness-testing is not the goal of this test.
5093    #
5094    # To be honest, it isn't really a unit test so much as a tool for testing
5095    # approaches to scan.
5096    #
5097    # The computation being tested here is a recurrent addition.
5098    #
5099    #
5100    # We need the CVM for this speed test
5101    if not theano.config.cxx:
5102        raise SkipTest("G++ not available, so we need to skip this test.")
5103
5104    r = np.arange(10000).astype(theano.config.floatX).reshape(1000, 10)
5105
5106    t0 = time.time()
5107    for i in xrange(1, 1000):
5108        r[i] += r[i - 1]
5109    t1 = time.time()
5110    print('python', t1 - t0)
5111
5112    r = np.arange(10000).astype(theano.config.floatX).reshape(1000, 10)
5113    t0 = time.time()
5114    r_i = iter(r[1:])
5115    r_ii = iter(r[:-1])
5116    if PY3:
5117        while True:
5118            try:
5119                tmp = next(r_i)
5120                tmp += next(r_ii)
5121            except StopIteration:
5122                break
5123    else:
5124        while True:
5125            try:
5126                tmp = next(r_i)
5127                tmp += next(r_ii)
5128            except StopIteration:
5129                break
5130    t1 = time.time()
5131    print('python with builtin iterator', t1 - t0)
5132
5133    if 1:
5134        r = np.arange(10000).astype(theano.config.floatX).reshape(1000, 10)
5135        s_r = tensor.matrix()
5136        s_y, updates = theano.scan(fn=lambda ri, rii: ri + rii,
5137                sequences=[s_r[1:]],
5138                outputs_info=tensor.constant(r[0]),
5139                mode=theano.Mode(linker='cvm'))
5140        assert not updates
5141        f = theano.function([s_r], s_y)
5142
5143        t2 = time.time()
5144        f(r)
5145        t3 = time.time()
5146        print('theano (scan, cvm)', t3 - t2)
5147
5148    if 1:
5149        r = np.arange(10000).astype(theano.config.floatX).reshape(-1, 10)
5150        shared_r = theano.shared(r)
5151        s_i = theano.shared(np.array(1))
5152        s_rinc = tensor.inc_subtensor(shared_r[s_i], shared_r[s_i - 1],
5153                tolerate_inplace_aliasing=True)
5154        # theano.printing.debugprint(s_rinc)
5155        f = theano.function([],
5156                            [],
5157                            updates=OrderedDict([
5158                                (s_i, s_i + 1),
5159                                (shared_r, s_rinc)]),
5160                           mode=theano.Mode(linker='cvm'))
5161        f._check_for_aliased_inputs = False
5162        t2 = time.time()
5163        f_fn = f.fn
5164        for i in xrange(998):
5165            f_fn()
5166        f()  # 999 to update the profiling timers
5167        t3 = time.time()
5168        print('theano (updates, cvm)', t3 - t2)
5169        # print shared_r.get_value()
5170
5171
5172def test_speed_rnn():
5173
5174    #
5175    # This function prints out the speed of recurrent neural network
5176    # calculations implemented in various ways.  In DebugMode this will
5177    # test the correctness of the optimizations applied, but generally
5178    # correctness-testing is not the goal of this test.
5179    #
5180    # To be honest, it isn't really a unit test so much as a tool for testing
5181    # approaches to scan.
5182    #
5183    # The computation being tested here is a repeated tanh of a matrix-vector
5184    # multiplication - the heart of an ESN or RNN.
5185    #
5186
5187    # We need the CVM for this speed test
5188    if not theano.config.cxx:
5189        raise SkipTest("G++ not available, so we need to skip this test.")
5190
5191    L = 10000
5192    N = 50
5193
5194    np.random.seed(2523452)
5195    r = np.arange(L * N).astype(theano.config.floatX).reshape(L, N)
5196    w = np.random.randn(N, N).astype(theano.config.floatX)
5197
5198    t0 = time.time()
5199    for i in xrange(1, L):
5200        r[i] = np.tanh(np.dot(r[i - 1], w))
5201    t1 = time.time()
5202    print('python', t1 - t0)
5203
5204    if 1:
5205        r = np.arange(L * N).astype(theano.config.floatX).reshape(L, N)
5206        s_r = tensor.matrix()
5207        s_y, updates = theano.scan(
5208                fn=lambda ri, rii: tensor.tanh(tensor.dot(rii, w)),
5209                sequences=[s_r[1:]],
5210                outputs_info=tensor.constant(r[0]),
5211                mode=theano.Mode(linker='cvm'))
5212        assert not updates
5213        f = theano.function([s_r], s_y, mode=theano.Mode(linker='cvm'))
5214
5215        t2 = time.time()
5216        f(r)
5217        t3 = time.time()
5218        print('theano (scan, cvm)', t3 - t2)
5219
5220    if 1:
5221        r = np.arange(L * N).astype(theano.config.floatX).reshape(L, N)
5222        s_w = theano.shared(w)
5223        shared_r = theano.shared(r)
5224        s_i = theano.scalar.sharedvar.shared(1)
5225        s_rinc = tensor.inc_subtensor(
5226                shared_r[s_i],
5227                theano.tensor.tanh(
5228                    theano.tensor.dot(
5229                        shared_r[s_i - 1],
5230                        w)),
5231                tolerate_inplace_aliasing=True)
5232        f = theano.function([], [],
5233                updates=OrderedDict([
5234                    (s_i, s_i + 1),
5235                    (shared_r, s_rinc)]),
5236                mode=theano.Mode(linker='cvm'))
5237        # theano.printing.debugprint(f)
5238        f_fn = f.fn
5239        # print f_fn
5240        t2 = time.time()
5241        f_fn(n_calls=L - 2)
5242        f()  # 999 to update the profiling timers
5243        t3 = time.time()
5244        print('theano (updates, cvm)', t3 - t2)
5245        # print shared_r.get_value()
5246
5247
5248def test_speed_batchrnn():
5249
5250    #
5251    # This function prints out the speed of recurrent neural network
5252    # calculations implemented in various ways.
5253
5254    # We force the mode to theano.Mode(linker='cvm'). If you manually
5255    # change this code to use DebugMode this will test the correctness
5256    # of the optimizations applied, but generally correctness-testing
5257    # is not the goal of this test.
5258    #
5259    # To be honest, it isn't really a unit test so much as a tool for testing
5260    # approaches to scan.
5261    #
5262    # The computation being tested here is a repeated tanh of a matrix-vector
5263    # multiplication - the heart of an ESN or RNN.
5264    #
5265
5266    # We need the CVM for this speed test
5267    if not theano.config.cxx:
5268        raise SkipTest("G++ not available, so we need to skip this test.")
5269    L = 100
5270    B = 50
5271    N = 400
5272
5273    np.random.seed(2523452)
5274    r = np.arange(B * L * N).astype(theano.config.floatX).reshape(L, B, N)
5275    w = np.random.randn(N, N).astype(theano.config.floatX)
5276
5277    t0 = time.time()
5278    for i in xrange(1, L):
5279        r[i] = np.tanh(np.dot(r[i - 1], w))
5280    t1 = time.time()
5281    print('python', t1 - t0)
5282
5283    if 1:
5284        r = np.arange(B * L * N).astype(
5285            theano.config.floatX).reshape(L, B, N)
5286        s_w = theano.shared(w)
5287        shared_r = theano.shared(r)
5288        s_i = theano.scalar.sharedvar.shared(1)
5289        s_rinc = tensor.inc_subtensor(
5290                shared_r[s_i],
5291                theano.tensor.tanh(
5292                    theano.tensor.dot(
5293                        shared_r[s_i - 1],
5294                        w)),
5295                tolerate_inplace_aliasing=True)
5296        f = theano.function([],
5297                            [],
5298                            updates=[
5299                                (s_i, s_i + 1),
5300                                (shared_r, s_rinc)],
5301                mode=theano.Mode(linker='cvm'))
5302        # theano.printing.debugprint(f)
5303        f_fn = f.fn
5304        # print f_fn
5305        t2 = time.time()
5306        f_fn(n_calls=L - 2)
5307        f()  # 999 to update the profiling timers
5308        t3 = time.time()
5309        print('theano (updates, cvm)', t3 - t2)
5310
5311
5312if __name__ == '__main__':
5313    #'''
5314    print(' Use nosetests to run these tests ')
5315    '''
5316    scan_tst = T_Scan()
5317    #''
5318    print 1
5319    scan_tst.test_generator_one_output_scalar()
5320    #''
5321    print 2
5322    scan_tst.test_one_sequence_one_output_weights()
5323
5324    #''
5325    print 3
5326    scan_tst.test_one_sequence_one_output_weights_shared()
5327
5328    #''
5329    print 4
5330    scan_tst.test_multiple_inputs_multiple_outputs()
5331    #''
5332    print 5
5333    scan_tst.test_using_taps_input_output()
5334
5335    #''
5336    print 6
5337    scan_tst.test_past_future_taps_shared()
5338    #''
5339    print 7
5340    scan_tst.test_inplace1()
5341    #''
5342    print 8
5343    scan_tst.test_inplace2()
5344    #''
5345    print 9
5346    scan_tst.test_shared_arguments_with_updates()
5347
5348    print 10
5349    scan_tst.test_simple_shared_random()
5350
5351    print 11
5352    scan_tst.test_only_shared_no_input_no_output()
5353
5354    print 12
5355    scan_tst.test_map_functionality()
5356
5357    print 13
5358    scan_tst.test_map()
5359    #''
5360    print 14
5361    scan_tst.test_backwards()
5362    #''
5363
5364    print 15
5365    scan_tst.test_reduce()
5366
5367    print 15.5
5368    scan_tst.test_save_mem()
5369    #''
5370    print 16
5371    scan_tst.test_grad_one_output()
5372    #''
5373    print 17
5374    scan_tst.test_grad_multiple_outs()
5375    #''
5376    print 17.5
5377    scan_tst.test_multiple_outs_taps()
5378    #''
5379    print 18
5380    scan_tst.test_grad_multiple_outs_taps()
5381    #''
5382    print 19
5383    scan_tst.test_grad_multiple_outs_taps_backwards()
5384    #''
5385    print 20
5386    scan_tst.test_grad_multiple_outs_some_uncomputable()
5387    #''
5388    print 21
5389    scan_tst.test_grad_multiple_outs_some_truncate()
5390    #''
5391    print 22
5392    scan_tst.test_grad_of_shared()
5393    #''
5394    print 23
5395    scan_tst.test_computing_gradient()
5396    #''
5397    print 24
5398    scan_tst.test_scan_output_padding()
5399
5400    print 25
5401    scan_tst.test_scan_extra_inputs_hessian()
5402    #''
5403    print 26
5404    scan_tst.test_cloning_no_replace_strict_copy_inputs()
5405
5406    print 27
5407    scan_tst.test_cloning_no_replace_strict_not_copy_inputs()
5408
5409    print 28
5410    scan_tst.test_cloning_replace_strict_copy_inputs()
5411
5412    print 29
5413    scan_tst.test_cloning_replace_not_strict_copy_inputs()
5414
5415    print 30
5416    scan_tst.test_cloning_replace_strict_not_copy_inputs()
5417
5418    print 31
5419    scan_tst.test_cloning_replace_not_strict_not_copy_inputs()
5420    #''
5421    print 32
5422    scan_tst.test_draw_as_input_to_scan()
5423    #''
5424    print 33
5425    scan_tst.test_reordering()
5426    #''
5427    print 34
5428    scan_tst.test_return_steps()
5429    #''
5430    print 35
5431    scan_tst.test_scan_as_tensor_on_gradients()
5432    #''
5433    print 36
5434    scan_tst.test_save_mem_reduced_number_of_steps()
5435    #''
5436    print 37
5437    scan_tst.test_save_mem_store_steps()
5438    #'''
5439
5440
5441def test_compute_test_value():
5442    # Verify that test values can be used with scan.
5443    backup = theano.config.compute_test_value
5444    theano.config.compute_test_value = 'raise'
5445    try:
5446        x = tensor.vector('x')
5447        xv = np.ones(3, dtype=theano.config.floatX)
5448        x.tag.test_value = xv
5449        y = theano.shared(np.arange(3, dtype=theano.config.floatX),
5450                          name='y')
5451        z, updates = theano.scan(
5452                fn=lambda u, v: u + v,
5453                sequences=[x, y])
5454        assert not updates
5455        z.name = 'z'
5456        # The gradient computation used to crash before 6af465e.
5457        g = tensor.grad(z.sum(), x)
5458        #f = theano.function([x], g)
5459        # print f(xv)
5460    finally:
5461        theano.config.compute_test_value = backup
5462
5463
5464def test_compute_test_value_nonseq():
5465    # Verify that test values can be used for non_sequences with scan.
5466    backup = theano.config.compute_test_value
5467    theano.config.compute_test_value = 'raise'
5468    try:
5469        x = tensor.vector('x')
5470        xv = np.ones(3, dtype=theano.config.floatX)
5471        x.tag.test_value = xv
5472        y = theano.shared(
5473                np.arange(9, dtype=theano.config.floatX).reshape(3, 3),
5474                name='y')
5475        z, updates = theano.scan(
5476                fn=lambda u, v: u + v,
5477                sequences=[x],
5478                non_sequences=[y])
5479        assert not updates
5480        z.name = 'z'
5481        # The gradient computation used to crash before 6af465e.
5482        g = tensor.grad(z.sum(), x)
5483        #f = theano.function([x], g)
5484        # print f(xv)
5485    finally:
5486        theano.config.compute_test_value = backup
5487
5488
5489def test_compute_test_value_grad():
5490    # Test case originally reported by Bitton Tenessi
5491    # https://groups.google.com/d/msg/theano-users/fAP3i2CbskQ/3OgBf4yjqiQJ
5492    WEIGHT = np.array([1, 2, 1, 3, 4, 1, 5, 6, 1, 7, 8, 1],
5493                         dtype='float32')
5494
5495    old_compute_test_val = theano.config.compute_test_value
5496    old_exception_verbosity = theano.config.exception_verbosity
5497    try:
5498        theano.config.compute_test_value = 'raise'
5499        theano.config.exception_verbosity = 'high'
5500
5501        W_flat = tensor.fvector(name='W')
5502        W_flat.tag.test_value = WEIGHT
5503        W = W_flat.reshape((2, 2, 3))
5504
5505        outputs_mi = tensor.as_tensor_variable(
5506                np.asarray(0, dtype='float32'))
5507        outputs_mi.tag.test_value = np.asarray(0, dtype='float32')
5508
5509        def loss_mi(mi, sum_mi, W):
5510            outputs_ti = tensor.as_tensor_variable(
5511                    np.asarray(0, dtype='float32'))
5512            outputs_ti.tag.test_value = np.asarray(0, dtype='float32')
5513
5514            def loss_ti(ti, sum_ti, mi, W):
5515                return W.sum().sum().sum() + sum_ti
5516
5517            result_ti, _ = theano.scan(
5518                    fn=loss_ti,
5519                    outputs_info=outputs_ti,
5520                    sequences=tensor.arange(W.shape[1], dtype='int32'),
5521                    non_sequences=[mi, W],
5522                    )
5523            lossmi = result_ti[-1]
5524            return sum_mi + lossmi
5525
5526        result_mi, _ = theano.scan(
5527                fn=loss_mi,
5528                outputs_info=outputs_mi,
5529                sequences=tensor.arange(W.shape[0], dtype='int32'),
5530                non_sequences=[W],
5531                )
5532
5533        loss = result_mi[-1]
5534        tensor.grad(loss, W_flat)
5535    finally:
5536        theano.config.compute_test_value = old_compute_test_val
5537        theano.config.exception_verbosity = old_exception_verbosity
5538
5539
5540def test_compute_test_value_grad_cast():
5541    # Test for test values when variables have to be casted
5542    # Reported by Daniel Renshaw at
5543    # https://groups.google.com/d/topic/theano-users/o4jK9xDe5WI/discussion
5544    floatX = theano.config.floatX
5545    backup = theano.config.compute_test_value
5546    theano.config.compute_test_value = 'raise'
5547    try:
5548        h = tensor.matrix('h')
5549        h.tag.test_value = np.array([[1, 2, 3, 4], [5, 6, 7, 8]],
5550                                       dtype=floatX)
5551
5552        w = theano.shared(np.random.randn(4, 3).astype(floatX), name='w')
5553
5554        outputs, _ = theano.scan(lambda i, h, w: (theano.dot(h[i], w), i),
5555                                 outputs_info=[None, 0], non_sequences=[h, w],
5556                                 n_steps=3)
5557
5558        theano.grad(outputs[0].sum(), w)
5559    finally:
5560        theano.config.compute_test_value = backup
5561
5562
5563def test_constant_folding_n_steps():
5564    # The following code used to crash at revision 2060b8f, in the constant
5565    # folding optimization step.
5566    res, _ = theano.scan(lambda x: x * 2,
5567                         outputs_info=tensor.ones(()),
5568                         # The constant `n_steps` was causing the crash.
5569                         n_steps=10)
5570    on_opt_error = theano.config.on_opt_error
5571    theano.config.on_opt_error = 'raise'
5572    try:
5573        theano.function([], res)()
5574    finally:
5575        theano.config.on_opt_error = on_opt_error
5576
5577
5578def test_outputs_taps_check():
5579    # Checks that errors are raised with bad output_info taps.
5580    x = tensor.fvector('x')
5581    y = tensor.fvector('y')
5582    f = lambda x, y: [x]
5583    outputs_info = {'initial': y, 'taps': [0]}
5584    assert_raises(ValueError, theano.scan, f, x, outputs_info)
5585    outputs_info = {'initial': y, 'taps': [-1, -1]}
5586    assert_raises(ValueError, theano.scan, f, x, outputs_info)
5587    print('done')
5588
5589
5590def test_default_value_broadcasted():
5591    def floatx(X):
5592        return np.asarray(X, dtype=theano.config.floatX)
5593
5594    def init_weights(shape, name):
5595        return theano.shared(floatx(np.random.randn(*shape) * 0.1), name)
5596
5597    X = theano.tensor.matrix('X')
5598    in_size = 2
5599    out_size = 4
5600    W_x = init_weights((in_size, out_size), "W_x")
5601
5602    def _active(x, pre_h):
5603        x = theano.tensor.reshape(x, (1, in_size))
5604        pre_h = theano.tensor.dot(x, W_x)
5605        return pre_h
5606
5607    value, scan_updates = theano.scan(_active, sequences=X,
5608                                      outputs_info=[theano.tensor.alloc(floatx(0.), 1, out_size)])
5609    cost = theano.tensor.mean(value)
5610    gW_x = theano.tensor.grad(cost, W_x)
5611    updates = [(W_x, W_x - 0.1 * gW_x)]
5612    f = theano.function([X], outputs=cost, updates=updates)
5613    f(np.random.rand(10, in_size).astype(X.dtype))
5614
5615
5616class TestInconsistentBroadcast(unittest.TestCase):
5617
5618    def test_raise_error(self):
5619        x = tensor.tensor3()
5620        initial_x = tensor.constant(np.zeros((1, 10)))
5621        y, updates = theano.scan(fn=lambda x, prev_x: x + prev_x,
5622                                 sequences=x,
5623                                 outputs_info=[dict(initial=initial_x)])
5624        # Error, because the broadcast patterns are inconsistent.
5625        with self.assertRaises(TypeError):
5626            gs = tensor.grad(y.sum(), x)
5627
5628        # No error here, because the broadcast patterns are consistent.
5629        initial_x = tensor.unbroadcast(initial_x, 0, 1)
5630        y, updates = theano.scan(fn=lambda x, prev_x: x + prev_x,
5631                                 sequences=x,
5632                                 outputs_info=[dict(initial=initial_x)])
5633        gs = tensor.grad(y.sum(), x)
5634
5635
5636class TestMissingInputError(unittest.TestCase):
5637
5638    @raises(theano.gof.fg.MissingInputError)
5639    def test_raise_error(self):
5640        c = theano.shared(0.)
5641        inc = tensor.scalar('inc')
5642
5643        def count_up():
5644            return tensor.zeros(()), {c: c + inc}
5645
5646        _, updates = theano.scan(count_up, n_steps=20)
5647        func = theano.function(inputs=[inc], outputs=[], updates=updates)
5648
5649
5650class TestGradUntil(unittest.TestCase):
5651
5652    def setUp(self):
5653        self.x = tensor.vector(name='x')
5654        self.threshold = tensor.scalar(name='threshold', dtype='int64')
5655        self.seq = np.arange(15, dtype=theano.config.floatX)
5656        self.numpy_output = self.seq[:7]**2
5657        z = np.zeros(8, dtype=theano.config.floatX)
5658        self.numpy_gradient = 2 * np.concatenate([self.seq[:7], z], axis=0)
5659
5660    def test_grad_until(self):
5661        r, _ = theano.scan(lambda x, u: (x * x,
5662                                         theano.scan_module.until(x > u)),
5663                           sequences=self.x,
5664                           non_sequences=[self.threshold])
5665        g = theano.grad(r.sum(), self.x)
5666        f = theano.function([self.x, self.threshold], [r, g])
5667        theano_output, theano_gradient = f(self.seq, 5)
5668
5669        utt.assert_allclose(theano_output, self.numpy_output)
5670        utt.assert_allclose(theano_gradient, self.numpy_gradient)
5671
5672    def test_grad_until_ndim_greater_one(self):
5673        def tile_array(inp):
5674            n_cols = 5
5675            return np.tile(inp.reshape((-1, 1)), (1, n_cols))
5676
5677        X = tensor.matrix(name='x')
5678        arr = tile_array(self.seq)
5679        r, _ = theano.scan(lambda x, u: (x * x,
5680                                         theano.scan_module.until(
5681                                             tensor.all(x > u))),
5682                           sequences=X,
5683                           non_sequences=[self.threshold])
5684        g = theano.grad(r.sum(), X)
5685        f = theano.function([X, self.threshold], [r, g])
5686        theano_output, theano_gradient = f(arr, 5)
5687
5688        utt.assert_allclose(theano_output, tile_array(self.numpy_output))
5689        utt.assert_allclose(theano_gradient, tile_array(self.numpy_gradient))
5690
5691    def test_grad_until_and_truncate(self):
5692        n = 3
5693        r, _ = theano.scan(lambda x, u: (x * x,
5694                                         theano.scan_module.until(x > u)),
5695                           sequences=self.x,
5696                           non_sequences=[self.threshold],
5697                           truncate_gradient=n)
5698        g = theano.grad(r.sum(), self.x)
5699        f = theano.function([self.x, self.threshold], [r, g])
5700        theano_output, theano_gradient = f(self.seq, 5)
5701
5702        self.numpy_gradient[:7 - n] = 0
5703        utt.assert_allclose(theano_output, self.numpy_output)
5704        utt.assert_allclose(theano_gradient, self.numpy_gradient)
5705
5706    def test_grad_until_and_truncate_sequence_taps(self):
5707        n = 3
5708        r, _ = theano.scan(lambda x, y, u: (x * y,
5709                                            theano.scan_module.until(y > u)),
5710                           sequences=dict(input=self.x, taps=[-2, 0]),
5711                           non_sequences=[self.threshold],
5712                           truncate_gradient=n)
5713        g = theano.grad(r.sum(), self.x)
5714        f = theano.function([self.x, self.threshold], [r, g])
5715        theano_output, theano_gradient = f(self.seq, 6)
5716
5717        # Gradient computed by hand:
5718        numpy_grad = np.array([0, 0, 0, 5, 6, 10, 4, 5, 0, 0, 0, 0, 0, 0, 0])
5719        numpy_grad = numpy_grad.astype(theano.config.floatX)
5720        utt.assert_allclose(theano_gradient, numpy_grad)
5721
5722def test_condition_hidden_inp():
5723    max_value = theano.tensor.scalar("max_value")
5724    n_steps = theano.tensor.iscalar("n_steps")
5725    def accum(prev_value, step):
5726        new_value = prev_value + step
5727        new_step = step + 1
5728        condition = theano.scan_module.until(new_value > max_value)
5729        return [new_value, new_step], condition
5730
5731    rs, updates = theano.scan(
5732        fn=accum,
5733        outputs_info=[0, 0],
5734        n_steps=n_steps)
5735
5736    f = theano.function(
5737        inputs=[max_value, n_steps],
5738        outputs=rs)
5739
5740    _sum, total_steps = f(100, 100)
5741
5742def test_mintap_onestep():
5743    seq = theano.tensor.ivector("seq")
5744    seq_info = dict(input=seq, taps=[2])
5745
5746    def accum(seq_t, prev_sum):
5747        new_sum = prev_sum + seq_t
5748        return new_sum
5749
5750    rs, updates = theano.scan(fn=accum,
5751                              sequences=seq_info,
5752                              outputs_info=0,
5753                              n_steps=1)
5754
5755    f = theano.function(inputs=[seq],
5756                        outputs=rs)
5757    _seq = np.arange(20).astype("int32")
5758    _sum = f(_seq)
5759    print("sum %f" % _sum)
5760    assert _sum == 2
5761