1from __future__ import absolute_import, print_function, division
2
3import copy
4import logging
5import time
6import unittest
7
8import numpy as np
9from six.moves import xrange
10from nose.plugins.skip import SkipTest
11from nose.tools import assert_raises, assert_true
12
13import theano
14import theano.scalar as scal
15from six import StringIO
16from theano import compile
17from theano.compile import deep_copy_op, DeepCopyOp
18from theano.compile import get_mode
19from theano import config
20from theano import function
21from theano import gof
22from theano import pprint
23from theano import shared
24from theano.gof import FunctionGraph
25import theano.tensor.opt as opt
26from theano.tensor.opt import (
27    local_add_specialize,
28    local_dimshuffle_lift,
29    local_useless_dimshuffle_in_reshape,
30    local_useless_alloc,
31    local_merge_alloc,
32    local_greedy_distributor,
33    local_useless_reshape,
34    local_reshape_to_dimshuffle,
35    mul_canonizer,
36    Shape_i,
37    Assert,
38    MakeVector,
39    make_vector,
40    local_canonicalize_alloc
41    )
42from theano import tensor
43from theano import tensor as T
44from theano.tensor import scalar, iscalar, lscalar, fscalar, dscalar
45from theano.tensor import vector, lvector, fvector, dvector
46from theano.tensor import matrix, fmatrix, dmatrix, tensor3
47from theano.tensor import vectors, matrices, fmatrices, dmatrices
48from theano.tensor import (
49    AdvancedSubtensor,
50    AdvancedSubtensor1,
51    as_tensor_variable,
52    IncSubtensor,
53    AdvancedIncSubtensor,
54    AdvancedIncSubtensor1,
55    inplace,
56    Join,
57    join,
58    Subtensor,
59    TensorType,
60    tile
61    )
62from theano.tensor.elemwise import DimShuffle
63from theano.tensor.type import values_eq_approx_remove_nan
64from theano.tests import unittest_tools as utt
65from theano.gof.opt import check_stack_trace, out2in
66from theano import change_flags
67from nose.plugins.attrib import attr
68
69mode_opt = theano.config.mode
70if mode_opt == 'FAST_COMPILE':
71    mode_opt = 'FAST_RUN'
72mode_opt = theano.compile.mode.get_mode(mode_opt)
73
74dimshuffle_lift = out2in(local_dimshuffle_lift)
75
76_optimizer_stabilize = gof.Query(include=['fast_run'])
77_optimizer_stabilize.position_cutoff = 1.51
78_optimizer_stabilize = compile.optdb.query(_optimizer_stabilize)
79
80_optimizer_specialize = gof.Query(include=['fast_run'])
81_optimizer_specialize.position_cutoff = 2.01
82_optimizer_specialize = compile.optdb.query(_optimizer_specialize)
83
84_optimizer_fast_run = gof.Query(include=['fast_run'])
85_optimizer_fast_run = compile.optdb.query(_optimizer_fast_run)
86
87
88def ds(x, y):
89    return DimShuffle(x.type.broadcastable, y)(x)
90
91
92def optimize(g, level='fast_run'):
93    if level == 'fast_run':
94        _optimizer_fast_run.optimize(g)
95    elif level == 'specialize':
96        _optimizer_specialize.optimize(g)
97    elif level == 'stabilize':
98        _optimizer_stabilize.optimize(g)
99    else:
100        raise ValueError(level)
101    return g
102
103
104def inputs(xbc=(0, 0), ybc=(0, 0), zbc=(0, 0)):
105    x = TensorType(broadcastable=xbc, dtype='float64')('x')
106    y = TensorType(broadcastable=ybc, dtype='float64')('y')
107    z = TensorType(broadcastable=zbc, dtype='float64')('z')
108    return x, y, z
109
110
111class test_dimshuffle_lift(unittest.TestCase):
112    def test_double_transpose(self):
113        x, y, z = inputs()
114        e = ds(ds(x, (1, 0)), (1, 0))
115        g = FunctionGraph([x], [e])
116        self.assertTrue(str(g) == "[InplaceDimShuffle{1,0}(InplaceDimShuffle{1,0}(x))]")
117        dimshuffle_lift.optimize(g)
118        self.assertTrue(str(g) == "[x]")
119        # no need to check_stack_trace as graph is supposed to be empty
120
121    def test_merge2(self):
122        x, y, z = inputs()
123        e = ds(ds(x, (1, 'x', 0)), (2, 0, 'x', 1))
124        g = FunctionGraph([x], [e])
125        self.assertTrue(str(g) == "[InplaceDimShuffle{2,0,x,1}(InplaceDimShuffle{1,x,0}(x))]",
126                        str(g))
127        dimshuffle_lift.optimize(g)
128        self.assertTrue(str(g) == "[InplaceDimShuffle{0,1,x,x}(x)]", str(g))
129        # Check stacktrace was copied over correctly after opt was applied
130        self.assertTrue(check_stack_trace(g, ops_to_check='all'))
131
132    def test_elim3(self):
133        x, y, z = inputs()
134        e = ds(ds(ds(x, (0, 'x', 1)), (2, 0, 'x', 1)), (1, 0))
135        g = FunctionGraph([x], [e])
136        self.assertTrue(str(g) == ("[InplaceDimShuffle{1,0}(InplaceDimShuffle{2,0,x,1}"
137                                   "(InplaceDimShuffle{0,x,1}(x)))]"),
138                        str(g))
139        dimshuffle_lift.optimize(g)
140        self.assertTrue(str(g) == "[x]", str(g))
141        # no need to check_stack_trace as graph is supposed to be empty
142
143    def test_lift(self):
144        x, y, z = inputs([False] * 1, [False] * 2, [False] * 3)
145        e = x + y + z
146        g = FunctionGraph([x, y, z], [e])
147
148        # It does not really matter if the DimShuffles are inplace
149        # or not.
150        init_str_g_inplace = (
151            "[Elemwise{add,no_inplace}(InplaceDimShuffle{x,0,1}"
152            "(Elemwise{add,no_inplace}(InplaceDimShuffle{x,0}(x), y)), z)]")
153        init_str_g_noinplace = (
154            "[Elemwise{add,no_inplace}(DimShuffle{x,0,1}"
155            "(Elemwise{add,no_inplace}(DimShuffle{x,0}(x), y)), z)]")
156        self.assertTrue(str(g) in (init_str_g_inplace, init_str_g_noinplace),
157                        str(g))
158
159        opt_str_g_inplace = (
160            "[Elemwise{add,no_inplace}(Elemwise{add,no_inplace}"
161            "(InplaceDimShuffle{x,x,0}(x), InplaceDimShuffle{x,0,1}(y)), z)]")
162        opt_str_g_noinplace = (
163            "[Elemwise{add,no_inplace}(Elemwise{add,no_inplace}"
164            "(DimShuffle{x,x,0}(x), DimShuffle{x,0,1}(y)), z)]")
165        dimshuffle_lift.optimize(g)
166        self.assertTrue(str(g) in (opt_str_g_inplace, opt_str_g_noinplace),
167                        str(g))
168        # Check stacktrace was copied over correctly after opt was applied
169        self.assertTrue(check_stack_trace(g, ops_to_check='all'))
170
171    def test_recursive_lift(self):
172        v = T.vector(dtype="float64")
173        m = T.matrix(dtype="float64")
174        out = ((v + 42) * (m + 84)).T
175        g = FunctionGraph([v, m], [out])
176        init_str_g = ("[InplaceDimShuffle{1,0}(Elemwise{mul,no_inplace}"
177                      "(InplaceDimShuffle{x,0}(Elemwise{add,no_inplace}"
178                      "(<TensorType(float64, vector)>, "
179                      "InplaceDimShuffle{x}(TensorConstant{42}))), "
180                      "Elemwise{add,no_inplace}"
181                      "(<TensorType(float64, matrix)>, "
182                      "InplaceDimShuffle{x,x}(TensorConstant{84}))))]")
183        self.assertTrue(str(g) == init_str_g)
184        new_out = local_dimshuffle_lift.transform(g.outputs[0].owner)[0]
185        new_g = FunctionGraph(g.inputs, [new_out])
186        opt_str_g = ("[Elemwise{mul,no_inplace}(Elemwise{add,no_inplace}"
187                     "(InplaceDimShuffle{0,x}(<TensorType(float64, vector)>), "
188                     "InplaceDimShuffle{x,x}(TensorConstant{42})), "
189                     "Elemwise{add,no_inplace}(InplaceDimShuffle{1,0}"
190                     "(<TensorType(float64, matrix)>), "
191                     "InplaceDimShuffle{x,x}(TensorConstant{84})))]")
192        self.assertTrue(str(new_g) == opt_str_g)
193        # Check stacktrace was copied over correctly after opt was applied
194        self.assertTrue(check_stack_trace(new_g, ops_to_check='all'))
195
196    def test_useless_dimshuffle(self):
197        x, _, _ = inputs()
198        e = ds(x, (0, 1))
199        g = FunctionGraph([x], [e])
200        self.assertTrue(str(g) == "[InplaceDimShuffle{0,1}(x)]")
201        dimshuffle_lift.optimize(g)
202        self.assertTrue(str(g) == "[x]")
203        # Check stacktrace was copied over correctly after opt was applied
204        self.assertTrue(hasattr(g.outputs[0].tag, 'trace'))
205
206    def test_dimshuffle_on_broadcastable(self):
207        x, y, z = inputs([False, True], [True, False, True], [False, False, True])
208        u = tensor.constant(1)
209        ds_x = ds(x, (0, 'x'))   # useless
210        ds_y = ds(y, (2, 1, 0))  # useless
211        ds_z = ds(z, (2, 1, 0))  # useful
212        ds_u = ds(u, ('x'))  # useful
213        g = FunctionGraph([x, y, z, u], [ds_x, ds_y, ds_z, ds_u])
214        self.assertTrue(str(g) == "[InplaceDimShuffle{0,x}(x), InplaceDimShuffle{2,1,0}(y), InplaceDimShuffle{2,1,0}(z), InplaceDimShuffle{x}(TensorConstant{1})]")
215        dimshuffle_lift.optimize(g)
216        self.assertTrue(str(g) == "[x, y, InplaceDimShuffle{2,1,0}(z), InplaceDimShuffle{x}(TensorConstant{1})]")
217        # Check stacktrace was copied over correctly after opt was applied
218        self.assertTrue(hasattr(g.outputs[0].tag, 'trace'))
219
220
221def test_local_useless_dimshuffle_in_reshape():
222    vector = TensorType(broadcastable=(False,), dtype='float64')('vector')
223    mat = TensorType(broadcastable=(False, False), dtype='float64')('mat')
224    row = TensorType(broadcastable=(True, False), dtype='float64')('row')
225    col = TensorType(broadcastable=(False, True), dtype='float64')('col')
226
227    reshape_dimshuffle_vector = tensor.reshape(vector.dimshuffle('x', 0), vector.shape)
228    reshape_dimshuffle_mat = tensor.reshape(mat.dimshuffle('x', 0, 'x', 1), mat.shape)
229    reshape_dimshuffle_row = tensor.reshape(row.dimshuffle(1, 'x'), row.shape)
230    reshape_dimshuffle_col = tensor.reshape(col.dimshuffle(0), col.shape)
231
232    g = FunctionGraph([vector, mat, row, col],
233                      [reshape_dimshuffle_vector, reshape_dimshuffle_mat,
234                       reshape_dimshuffle_row, reshape_dimshuffle_col])
235
236    print(str(g))
237    assert_true(str(g) == "[Reshape{1}(InplaceDimShuffle{x,0}(vector), Shape(vector)), "
238                          "Reshape{2}(InplaceDimShuffle{x,0,x,1}(mat), Shape(mat)), "
239                          "Reshape{2}(InplaceDimShuffle{1,x}(row), Shape(row)), "
240                          "Reshape{2}(InplaceDimShuffle{0}(col), Shape(col))]")
241    useless_dimshuffle_in_reshape = out2in(local_useless_dimshuffle_in_reshape)
242    useless_dimshuffle_in_reshape.optimize(g)
243    assert_true(str(g) == "[Reshape{1}(vector, Shape(vector)), "
244                          "Reshape{2}(mat, Shape(mat)), "
245                          "Reshape{2}(row, Shape(row)), "
246                          "Reshape{2}(col, Shape(col))]")
247
248    # Check stacktrace was copied over correctly after opt was applied
249    assert_true(check_stack_trace(g, ops_to_check='all'))
250
251    # Check that the optimization does not get applied when the order
252    # of dimensions has changed.
253    reshape_dimshuffle_mat2 = tensor.reshape(mat.dimshuffle('x', 1, 'x', 0), mat.shape)
254    h = FunctionGraph([mat], [reshape_dimshuffle_mat2])
255    str_h = str(h)
256    useless_dimshuffle_in_reshape.optimize(h)
257    assert_true(str(h) == str_h)
258
259
260def test_add_canonizer_problem0():
261    n_segments = 10
262    label = lscalar('label')
263    segment_labels = label + theano._asarray([0] * n_segments, dtype='int64')
264
265    r = segment_labels * 5
266    f = function([label], r)
267    f(3)
268
269    # This was crashing in the past.
270    c0 = theano.tensor.constant([True])
271    c1 = theano.tensor.constant([True])
272    theano.function([], c0 + c1)
273
274
275class test_greedy_distribute(unittest.TestCase):
276    def test_main(self):
277        a, b, c, d, x, y, z = matrices('abcdxyz')
278
279        # 1. ((a/x + b/y) * x * y) --> a*y + b*x
280        e = (a / z + b / x) * x * z
281        g = FunctionGraph([a, b, c, d, x, y, z], [e])
282        # print pprint(g.outputs[0])
283        mul_canonizer.optimize(g)
284        gof.TopoOptimizer(gof.LocalOptGroup(local_greedy_distributor),
285                          order='out_to_in').optimize(g)
286        # print pprint(g.outputs[0])
287        assert str(pprint(g.outputs[0])) == "((a * x) + (b * z))"
288
289        # 2. ((a/x + b) * x) --> a + b*x
290        e = (a / x + b) * x
291        g = FunctionGraph([a, b, x], [e])
292        # print pprint(g.outputs[0])
293        mul_canonizer.optimize(g)
294        gof.TopoOptimizer(gof.LocalOptGroup(local_greedy_distributor),
295                          order='out_to_in').optimize(g)
296        # print pprint(g.outputs[0])
297        assert str(pprint(g.outputs[0])) == "(a + (b * x))"
298
299    def test_kording_bug(self):
300        x, y = vectors('xy')
301        eps = scalar('eps')
302        s = scalar('s')
303
304        # r = theano.tensor.mul(theano.tensor.fill(x, 2.*a), x/a , (y+z) , a)
305        # r = theano.tensor.mul((x/a+y) , a, z)
306        r = tensor.mul(s - 1,
307                       eps + x / s,
308                       eps + y / s,
309                       s)
310
311        f = function([s, eps, x, y], r ** 2)
312
313        s_val = np.asarray(4, dtype=config.floatX)
314        eps_val = np.asarray(1.e-6, dtype=config.floatX)
315        x_val = np.asarray([1.5, 2], dtype=config.floatX)
316        y_val = np.asarray([2.3, 3.1], dtype=config.floatX)
317
318        r0 = f(s_val, eps_val, x_val, y_val)
319        r1 = f(s_val, eps_val, x_val, y_val)
320        r2 = f(s_val, eps_val, x_val, y_val)
321
322        assert np.all(r0 == r1)
323        assert np.all(r0 == r2)
324
325
326class test_canonize(unittest.TestCase):
327    def test_muldiv(self):
328        x, y, z = matrices('xyz')
329        a, b, c, d = matrices('abcd')
330        # e = (2.0 * x) / (2.0 * y)
331        # e = (2.0 * x) / (4.0 * y)
332        # e = x / (y / z)
333        # e = (x * y) / x
334        # e = (x / y) * (y / z) * (z / x)
335        # e = (a / b) * (b / c) * (c / d)
336        # e = (a * b) / (b * c) / (c * d)
337        # e = 2 * x / 2
338        # e = x / y / x
339        # e = (x / x) * (y / y)
340        e = (-1 * x) / y / (-2 * z)
341        g = FunctionGraph([x, y, z, a, b, c, d], [e])
342        print(pprint(g.outputs[0]))
343        mul_canonizer.optimize(g)
344        print(pprint(g.outputs[0]))
345
346    def test_elemwise_multiple_inputs_optimisation(self):
347        # verify that the Canonizer merge sequential Elemwise({mul,add}) part 1
348        #
349        # This part are that case that is done, but don't include case
350        # that are not implemented but are supposed to be.
351        #
352        # Test with and without DimShuffle
353
354        shp = (5, 5)
355        fx, fy, fz = fmatrices('xyz')
356        dx, dy, dz = dmatrices('xyz')
357        # fv = fvector('r').dimshuffle('x', 0)
358        # dv = dvector('s').dimshuffle('x', 0)
359        fxv = theano._asarray(np.random.rand(*shp), dtype='float32')
360        fyv = theano._asarray(np.random.rand(*shp), dtype='float32')
361        fzv = theano._asarray(np.random.rand(*shp), dtype='float32')
362        # fvv = theano._asarray(np.random.rand(shp[0]), dtype='float32').reshape(1, shp[0])
363        # dxv = theano._asarray(np.random.rand(*shp), dtype='float64')
364        # dyv = theano._asarray(np.random.rand(*shp), dtype='float64')
365        # dzv = theano._asarray(np.random.rand(*shp), dtype='float64')
366        # dvv = theano._asarray(np.random.rand(shp[0]), dtype='float64').reshape(1, shp[0])
367        cases = [
368            (fx + fy, (fx, fy), (fxv, fyv), 1, 'float32'),
369            (fx * fy, (fx, fy), (fxv, fyv), 1, 'float32'),
370            # (fx+fy+fz,(fx,fy,fz),(fxv,fyv,fzv),1,'float32'),
371            # (dx+dy+dz,(dx,dy,dz),(dxv,dyv,dzv),1,'float64'),
372            # (fx*fy*fz,(fx,fy,fz),(fxv,fyv,fzv),1,'float32'),
373            # (dx*dy*dz,(dx,dy,dz),(dxv,dyv,dzv),1,'float64'),
374            # (fx*fy*(fx+fy+fz),(fx,fy,fz),(fxv,fyv,fzv),2,'float32'),
375            # (dx*dy*(dx+dy+dz),(dx,dy,dz),(dxv,dyv,dzv),2,'float64'),
376            # (fx*fy*(fx+fy+dz),(fx,fy,dz),(dxv,dyv,dzv),2,'float64'),  # check mixed type add
377            # (dz*fy*(fx+fy),(fx,fy,dz),(dxv,dyv,dzv),2,'float64'),  # check mixed type mul
378            # check with dimshuffle of constant
379            (fx + fy + fz + 2, (fx, fy, fz), (fxv, fyv, fzv), 1,
380                {'custom': 'float32', 'numpy+floatX': config.floatX, 'numpy': 'float64'}),
381            (fx * fy * fz * 2, (fx, fy, fz), (fxv, fyv, fzv), 1,
382                {'custom': 'float32', 'numpy+floatX': config.floatX, 'numpy': 'float64'}),
383            # (2+fx+fy+fz,(fx,fy,fz),(fxv,fyv,fzv),1,'float32'),
384            # (2*fx*fy*fz,(fx,fy,fz),(fxv,fyv,fzv),1,'float32'),
385            (2 + fx + fy + fz + 2, (fx, fy, fz), (fxv, fyv, fzv), 1,
386                {'custom': 'float32', 'numpy+floatX': config.floatX, 'numpy': 'float64'}),
387            (2 * fx * fy * fz * 2, (fx, fy, fz), (fxv, fyv, fzv), 1,
388                {'custom': 'float32', 'numpy+floatX': config.floatX, 'numpy': 'float64'}),
389            # (fx*fy*2*(fx+fy+fz),(fx,fy,fz),(fxv,fyv,fzv),2,'float32'),
390            # (fx*fy*(2+fx+fy+fz),(fx,fy,fz),(fxv,fyv,fzv),2,'float32'),
391            (fx * fy * 2 * (fx + fy + fz + 2), (fx, fy, fz), (fxv, fyv, fzv), 2,
392                {'custom': 'float32', 'numpy+floatX': config.floatX, 'numpy': 'float64'}),
393
394            # check with broadcast of row
395            # (fx+fy+fz+fv,(fx,fy,fz,fv),(fxv,fyv,fzv,fvv),1,'float32'),
396            # (fx*fy*fz*fv,(fx,fy,fz,fv),(fxv,fyv,fzv,fvv),1,'float32'),
397            # (fv+fx+fy+fz,(fx,fy,fz,fv),(fxv,fyv,fzv,fvv),1,'float32'),
398            # (fv*fx*fy*fz,(fx,fy,fz,fv),(fxv,fyv,fzv,fvv),1,'float32'),
399            # (fx*fy*fv*(fx+fy+fz),(fx,fy,fz,fv),(fxv,fyv,fzv,fvv),2,'float32'),
400            # (fx*fy*(fv+fx+fy+fz),(fx,fy,fz,fv),(fxv,fyv,fzv,fvv),2,'float32'),
401            # (fx*fy*fv*(fv+fx+fy+fz),(fx,fy,fz,fv),(fxv,fyv,fzv,fvv),2,'float32'),
402            # (dx+dy+dz+dv,(dx,dy,dz,dv),(dxv,dyv,dzv,dvv),1,'float64'),
403            # (dx*dy*dz*dv,(dx,dy,dz,dv),(dxv,dyv,dzv,dvv),1,'float64'),
404            # (dv+dx+dy+dz,(dx,dy,dz,dv),(dxv,dyv,dzv,dvv),1,'float64'),
405            # (dv*dx*dy*dz,(dx,dy,dz,dv),(dxv,dyv,dzv,dvv),1,'float64'),
406            # (dx*dy*dv*(dx+dy+dz),(dx,dy,dz,dv),(dxv,dyv,dzv,dvv),2,'float64'),
407            # (dx*dy*(dv+dx+dy+dz),(dx,dy,dz,dv),(dxv,dyv,dzv,dvv),2,'float64'),
408            # (dx*dy*dv*(dv+dx+dy+dz),(dx,dy,dz,dv),(dxv,dyv,dzv,dvv),2,'float64'),
409            ]  # [10:11]
410        # print cases
411
412        # We must be sure that the Canonizer is working, but that we don't have other
413        # optimisation that could hide bug in the Canonizer as local_elemwise_fusion
414        mode = compile.mode.get_default_mode()
415        opt = gof.Query(["canonicalize"])
416        opt = opt.excluding('local_elemwise_fusion')
417        mode = mode.__class__(linker=mode.linker, optimizer=opt)
418        for id, [g, sym_inputs, val_inputs,
419                 nb_elemwise, out_dtype] in enumerate(cases):
420            if isinstance(out_dtype, dict):
421                out_dtype = out_dtype[config.cast_policy]
422            f = compile.function(list(sym_inputs), g,
423                                 # we need the optimisation enabled, debug do this.
424                                 mode=mode)
425
426            out = f(*val_inputs)
427            assert(len(f.maker.fgraph.toposort()) == nb_elemwise)
428            assert(out_dtype == out.dtype)
429
430    def test_elemwise_multiple_inputs_optimisation2(self):
431        # verify that the Canonizer merge sequential Elemwise({mul,add}) part 2.
432        # This part are that case that should have been done, but that are not implemented.
433        # Test with and without DimShuffle
434
435        raise SkipTest("Current implementation of Canonizer does not "
436                       "implement all cases. Skip the corresponding test.")
437
438        shp = (5, 5)
439        fx, fy, fz = fmatrices('xyz')
440        dx, dy, dz = dmatrices('xyz')
441        fv = fvector('r').dimshuffle('x', 0)
442        dv = dvector('s').dimshuffle('x', 0)
443        fxv = theano._asarray(np.random.rand(*shp), dtype='float32')
444        fyv = theano._asarray(np.random.rand(*shp), dtype='float32')
445        fzv = theano._asarray(np.random.rand(*shp), dtype='float32')
446        fvv = theano._asarray(np.random.rand(shp[0]), dtype='float32').reshape(1, shp[0])
447        dxv = theano._asarray(np.random.rand(*shp), dtype='float64')
448        dyv = theano._asarray(np.random.rand(*shp), dtype='float64')
449        dzv = theano._asarray(np.random.rand(*shp), dtype='float64')
450        dvv = theano._asarray(np.random.rand(shp[0]), dtype='float64').reshape(1, shp[0])
451        cases = [
452            (fx + fy, (fx, fy), (fxv, fyv), 1, 'float32'),
453            (fx * fy, (fx, fy), (fxv, fyv), 1, 'float32'),
454            (fx + fy + fz, (fx, fy, fz), (fxv, fyv, fzv), 1, 'float32'),
455            (dx + dy + dz, (dx, dy, dz), (dxv, dyv, dzv), 1, 'float64'),
456            (fx * fy * fz, (fx, fy, fz), (fxv, fyv, fzv), 1, 'float32'),
457            (dx * dy * dz, (dx, dy, dz), (dxv, dyv, dzv), 1, 'float64'),
458            (fx * fy * (fx + fy + fz), (fx, fy, fz), (fxv, fyv, fzv), 2, 'float32'),
459            (dx * dy * (dx + dy + dz), (dx, dy, dz), (dxv, dyv, dzv), 2, 'float64'),
460            (fx * fy * (fx + fy + dz), (fx, fy, dz), (dxv, dyv, dzv), 2, 'float64'),  # check mixed type add
461            (dz * fy * (fx + fy), (fx, fy, dz), (dxv, dyv, dzv), 2, 'float64'),  # check mixed type mul
462            # check with dimshuffle of constant
463            (fx + fy + fz + 2, (fx, fy, fz), (fxv, fyv, fzv), 1, 'float32'),
464            (fx * fy * fz * 2, (fx, fy, fz), (fxv, fyv, fzv), 1, 'float32'),
465            (2 + fx + fy + fz, (fx, fy, fz), (fxv, fyv, fzv), 1, 'float32'),
466            (2 * fx * fy * fz, (fx, fy, fz), (fxv, fyv, fzv), 1, 'float32'),
467            (2 + fx + fy + fz + 2, (fx, fy, fz), (fxv, fyv, fzv), 1, 'float32'),
468            (2 * fx * fy * fz * 2, (fx, fy, fz), (fxv, fyv, fzv), 1, 'float32'),
469            (fx * fy * 2 * (fx + fy + fz), (fx, fy, fz), (fxv, fyv, fzv), 2, 'float32'),
470            (fx * fy * (2 + fx + fy + fz), (fx, fy, fz), (fxv, fyv, fzv), 2, 'float32'),
471            (fx * fy * 2 * (fx + fy + fz + 2), (fx, fy, fz), (fxv, fyv, fzv), 2, 'float32'),
472
473            # check with broadcast of row
474            (fx + fy + fz + fv, (fx, fy, fz, fv), (fxv, fyv, fzv, fvv), 1, 'float32'),
475            (fx * fy * fz * fv, (fx, fy, fz, fv), (fxv, fyv, fzv, fvv), 1, 'float32'),
476            (fv + fx + fy + fz, (fx, fy, fz, fv), (fxv, fyv, fzv, fvv), 1, 'float32'),
477            (fv * fx * fy * fz, (fx, fy, fz, fv), (fxv, fyv, fzv, fvv), 1, 'float32'),
478            (fx * fy * fv * (fx + fy + fz), (fx, fy, fz, fv), (fxv, fyv, fzv, fvv), 2, 'float32'),
479            (fx * fy * (fv + fx + fy + fz), (fx, fy, fz, fv), (fxv, fyv, fzv, fvv), 2, 'float32'),
480            (fx * fy * fv * (fv + fx + fy + fz), (fx, fy, fz, fv), (fxv, fyv, fzv, fvv), 2, 'float32'),
481            (dx + dy + dz + dv, (dx, dy, dz, dv), (dxv, dyv, dzv, dvv), 1, 'float64'),
482            (dx * dy * dz * dv, (dx, dy, dz, dv), (dxv, dyv, dzv, dvv), 1, 'float64'),
483            (dv + dx + dy + dz, (dx, dy, dz, dv), (dxv, dyv, dzv, dvv), 1, 'float64'),
484            (dv * dx * dy * dz, (dx, dy, dz, dv), (dxv, dyv, dzv, dvv), 1, 'float64'),
485            (dx * dy * dv * (dx + dy + dz), (dx, dy, dz, dv), (dxv, dyv, dzv, dvv), 2, 'float64'),
486            (dx * dy * (dv + dx + dy + dz), (dx, dy, dz, dv), (dxv, dyv, dzv, dvv), 2, 'float64'),
487            (dx * dy * dv * (dv + dx + dy + dz), (dx, dy, dz, dv), (dxv, dyv, dzv, dvv), 2, 'float64'),
488            ]  # [10:11]
489        # print cases
490
491        # We must be sure that the Canonizer is working, but that we don't have other
492        # optimisation that could hide bug in the Canonizer as local_elemwise_fusion
493        mode = compile.mode.get_default_mode()
494        mode._optimizer = gof.Query(["canonicalize"])
495        mode._optimizer = mode._optimizer.excluding('local_elemwise_fusion')
496        for id, [g, sym_inputs, val_inputs, nb_elemwise, out_dtype] in enumerate(cases):
497            f = compile.function(list(sym_inputs), g,
498                                 # we need the optimisation enabled, debug do this.
499                                 mode=mode)
500
501            out = f(*val_inputs)
502            assert(len(f.maker.fgraph.toposort()) == nb_elemwise)
503            assert(out_dtype == out.dtype)
504
505    @attr('slow')
506    def test_multiple_case(self):
507        # test those case take from the comment in Canonizer
508        # x / x -> 1
509        # (x * y) / x -> y
510        # x / y / x -> 1 / y
511        # x / y / z -> x / (y * z)
512        # x / (y / z) -> (x * z) / y
513        # (a / b) * (b / c) * (c / d) -> a / d
514        # (2.0 * x) / (4.0 * y) -> (0.5 * x) / y
515        # 2 * x / 2 -> x
516        # with and without DimShuffle
517        # TODO: with DimShuffle
518
519        shp = (3, 3)
520        fx, fy, fz, fw = fmatrices('xyzw')
521        dx, dy, dz, dw = dmatrices('xyzw')
522        fv = fvector('r').dimshuffle('x', 0)
523        dv = dvector('s').dimshuffle('x', 0)
524        fxv = theano._asarray(np.random.rand(*shp), dtype='float32')
525        fyv = theano._asarray(np.random.rand(*shp), dtype='float32')
526        fzv = theano._asarray(np.random.rand(*shp), dtype='float32')
527        fwv = theano._asarray(np.random.rand(*shp), dtype='float32')
528        fvv = theano._asarray(np.random.rand(shp[0]), dtype='float32').reshape(1, shp[0])
529        dxv = theano._asarray(np.random.rand(*shp), dtype='float64')
530        dyv = theano._asarray(np.random.rand(*shp), dtype='float64')
531        dzv = theano._asarray(np.random.rand(*shp), dtype='float64')
532        dwv = theano._asarray(np.random.rand(*shp), dtype='float64')
533        dvv = theano._asarray(np.random.rand(shp[0]), dtype='float64').reshape(1, shp[0])
534
535        # We must be sure that the Canonizer is working, but that we don't have other
536        # optimisation that could hide bug in the Canonizer as local_elemwise_fusion
537        mode = compile.mode.get_default_mode()
538
539        opt = gof.Query(["canonicalize"])
540        opt = opt.including('ShapeOpt', 'local_fill_to_alloc')
541        opt = opt.excluding(
542            'local_elemwise_fusion')
543        mode = mode.__class__(linker=mode.linker, optimizer=opt)
544        # test x / x -> 1
545        for id, (g, sym_inputs, val_inputs, out_dtype) in enumerate([
546                (fx / fx, [fx], [fxv], 'float32'),
547                (dx / dx, [dx], [dxv], 'float64'),
548                (fv / fv, [fv], [fvv], 'float32'),
549                (dv / dv, [dv], [dvv], 'float64')]):
550            f = compile.function(list(sym_inputs), g,
551                                 mode=mode)
552            out = f(*val_inputs)
553            assert (out == np.ones(shp, dtype=out_dtype)).all()
554            topo = f.maker.fgraph.toposort()
555            if sym_inputs[0].broadcastable[0]:
556                assert len(topo) == 2
557                assert isinstance(topo[0].op, Shape_i)
558                assert isinstance(topo[1].op, tensor.Alloc)
559            else:
560                assert len(topo) == 3
561                assert isinstance(topo[0].op, Shape_i)
562                assert isinstance(topo[1].op, Shape_i)
563                assert isinstance(topo[2].op, tensor.Alloc)
564            assert(out_dtype == out.dtype)
565
566        # test (x * y) / x -> y
567        for id, (g, sym_inputs, val_inputs, nb_elemwise, out_dtype) in enumerate([
568                ((dx * dy) / dx, [dx, dy], [dxv, dyv], 0, 'float64'),
569                ((fx * fy) / fx, [fx, fy], [fxv, fyv], 0, 'float32'),
570                ((dv * dy) / dv, [dv, dy], [dvv, dyv], 0, 'float64'),
571                ((fv * fy) / fv, [fv, fy], [fvv, fyv], 0, 'float32'),
572                # must broadcast as there is a dimshuffle in the computation
573                ((dx * dv) / dx, [dx, dv], [dxv, dvv], 1, 'float64'),
574                # topo: [Elemwise{second,no_inplace}(x, <TensorType(float64, row)>)]
575                ((fx * fv) / fx, [fx, fv], [fxv, fvv], 1, 'float32')
576                # topo: [Elemwise{second,no_inplace}(x, <TensorType(float32, row)>)]
577                ]):
578            f = compile.function(list(sym_inputs), g,
579                                 mode=mode)
580            out = f(*val_inputs)
581            assert(out_dtype == out.dtype)
582            utt.assert_allclose(out, val_inputs[1])
583            topo = f.maker.fgraph.toposort()
584            if topo and not(len(topo) == 1 and topo[0].op == deep_copy_op):
585                for node in topo[:-1]:
586                    assert isinstance(node.op, Shape_i)
587                assert isinstance(topo[-1].op, tensor.Alloc)
588
589        # test x / y / x -> 1 / y
590        for id, (g, sym_inputs, val_inputs, nb_elemwise, out_dtype) in enumerate([
591                ((dx / dy) / dx, [dx, dy], [dxv, dyv], 1, 'float64'),
592                ((fx / fy) / fx, [fx, fy], [fxv, fyv], 1, 'float32'),
593                ((dv / dy) / dv, [dv, dy], [dvv, dyv], 1, 'float64'),
594                ((fv / fy) / fv, [fv, fy], [fvv, fyv], 1, 'float32'),
595                # must broadcast as their is a dimshuffle in the computation
596                ((dx / dv) / dx, [dx, dv], [dxv, dvv], 1, 'float64'),
597                # topo: [Shape_i, Shape_i, Elemwise{inv,no_inplace}(<TensorType(float64, row)>), Alloc]
598                ((fx / fv) / fx, [fx, fv], [fxv, fvv], 1, 'float32'),
599                # topo: [Shape_i, Shape_i, Elemwise{inv,no_inplace}(<TensorType(float32, row)>), Alloc]
600                ]):
601            f = compile.function(list(sym_inputs), g, mode=mode)
602            out = f(*val_inputs)
603            utt.assert_allclose(out, (1 / val_inputs[1]))
604            topo = f.maker.fgraph.toposort()
605            elem = [t for t in topo if isinstance(t.op, T.Elemwise)]
606            assert len(elem) == nb_elemwise
607            assert isinstance(elem[0].op, (T.Elemwise, ))
608            assert isinstance(elem[0].op.scalar_op, (
609                theano.scalar.basic.Inv, theano.scalar.basic.TrueDiv))
610            assert(out_dtype == out.dtype)
611
612        # test (a / b) * (b / c) * (c / d) -> a / d
613        for id, (g, sym_inputs, val_inputs, out_dtype) in enumerate([
614                ((dx / dy) * (dy / dz) * (dz / dw), [dx, dy, dz, dw], [dxv, dyv, dzv, dwv], 'float64'),
615                ((fx / fy) * (fy / fz) * (fz / fw), [fx, fy, fz, fw], [fxv, fyv, fzv, fwv], 'float32'),
616                ((dv / dy) * (dy / dz) * (dz / dw), [dv, dy, dz, dw], [dvv, dyv, dzv, dwv], 'float64'),
617                ((fv / fy) * (fy / fz) * (fz / fw), [fv, fy, fz, fw], [fvv, fyv, fzv, fwv], 'float32'),
618                ((dx / dv) * (dv / dz) * (dz / dw), [dx, dv, dz, dw], [dxv, dvv, dzv, dwv], 'float64'),
619                ((fx / fv) * (fv / fz) * (fz / fw), [fx, fv, fz, fw], [fxv, fvv, fzv, fwv], 'float32'),
620                ((dx / dy) * (dy / dv) * (dv / dw), [dx, dy, dv, dw], [dxv, dyv, dvv, dwv], 'float64'),
621                ((fx / fy) * (fy / fv) * (fv / fw), [fx, fy, fv, fw], [fxv, fyv, fvv, fwv], 'float32'),
622                ((dx / dy) * (dy / dz) * (dz / dv), [dx, dy, dz, dv], [dxv, dyv, dzv, dvv], 'float64'),
623                ((fx / fy) * (fy / fz) * (fz / fv), [fx, fy, fz, fv], [fxv, fyv, fzv, fvv], 'float32'),
624                ]):
625            f = compile.function(list(sym_inputs), g, mode=mode)
626            out = f(*val_inputs)
627            utt.assert_allclose(out, (val_inputs[0] / val_inputs[3]))
628            topo = f.maker.fgraph.toposort()
629            assert len(topo) == 1
630            assert isinstance(topo[0].op, (T.Elemwise, ))
631            assert isinstance(topo[0].op.scalar_op, theano.scalar.basic.TrueDiv)
632            assert len(topo[0].inputs) == 2
633            assert(out_dtype == out.dtype)
634
635        # test (2.0 * x) / (4.0 * y) -> (0.5 * x) / y
636        for id, (g, sym_inputs, val_inputs, out_dtype) in enumerate([
637                (((2.0 * dx) / (4.0 * dy)), [dx, dy], [dxv, dyv], 'float64'),
638                (((2.0 * fx) / (4.0 * fy)), [fx, fy], [fxv, fyv], {'custom': 'float32', 'numpy+floatX': config.floatX, 'numpy': 'float64'}),
639                (((2.0 * dv) / (4.0 * dy)), [dv, dy], [dvv, dyv], 'float64'),
640                (((2.0 * fv) / (4.0 * fy)), [fv, fy], [fvv, fyv], {'custom': 'float32', 'numpy+floatX': config.floatX, 'numpy': 'float64'}),
641                (((2.0 * dx) / (4.0 * dv)), [dx, dv], [dxv, dvv], 'float64'),
642                (((2.0 * fx) / (4.0 * fv)), [fx, fv], [fxv, fvv], {'custom': 'float32', 'numpy+floatX': config.floatX, 'numpy': 'float64'}),
643                ]):
644            if isinstance(out_dtype, dict):
645                out_dtype = out_dtype[config.cast_policy]
646            f = compile.function(list(sym_inputs), g, mode=mode)
647            out = f(*val_inputs)
648            utt.assert_allclose(out, (0.5 * val_inputs[0] / val_inputs[1]))
649            topo = f.maker.fgraph.toposort()
650            assert len(topo) == 2
651            assert isinstance(topo[0].op, (T.Elemwise, ))
652            assert isinstance(topo[0].op.scalar_op, theano.scalar.basic.Mul)
653            assert len(topo[0].inputs) == 2
654            assert isinstance(topo[1].op, (T.Elemwise, ))
655            assert isinstance(topo[1].op.scalar_op, theano.scalar.basic.TrueDiv)
656            assert len(topo[1].inputs) == 2
657            assert(out_dtype == out.dtype)
658
659        # test 2 * x / 2 -> x
660        for id, (g, sym_inputs, val_inputs, out_dtype) in enumerate([
661                ((2 * dx) / 2, [dx], [dxv], 'float64'),
662                ((2 * fx) / 2, [fx], [fxv], {'custom': 'float32', 'numpy+floatX': config.floatX, 'numpy': 'float64'}),
663                ((2 * dv) / 2, [dv], [dvv], 'float64'),
664                ((2 * fv) / 2, [fv], [fvv], {'custom': 'float32', 'numpy+floatX': config.floatX, 'numpy': 'float64'}),
665                ]):
666            if isinstance(out_dtype, dict):
667                out_dtype = out_dtype[config.cast_policy]
668            f = compile.function(list(sym_inputs), g, mode=mode)
669            out = f(*val_inputs)
670            utt.assert_allclose(out, val_inputs[0])
671            topo = f.maker.fgraph.toposort()
672            assert len(topo) == 1
673            topo[0].op == deep_copy_op
674            assert(out_dtype == out.dtype)
675
676        # test x / abs(x) -> sign(x)
677        for id, (g, sym_inputs, val_inputs, out_dtype) in enumerate([
678                (dx / abs(dx), [dx], [0.5 - dxv], 'float64'),
679                (fx / abs(fx), [fx], [0.5 - fxv], 'float32'),
680                (dx / abs(dx), [dx], [0.1 * dxv], 'float64'),
681                (fx / abs(fx), [fx], [0.1 * fxv], 'float32'),
682                (dv / abs(dv), [dv], [0.5 - dvv], 'float64'),
683                (fv / abs(fv), [fv], [0.5 - fvv], 'float32'),
684                ]):
685            f = compile.function(list(sym_inputs), g, mode=mode)
686            out = f(*val_inputs)
687            assert np.all(np.isfinite(out))
688            utt.assert_allclose(out, np.sign(val_inputs[0]))
689            assert(out_dtype == out.dtype)
690            assert len(f.maker.fgraph.toposort()) == 1
691
692        # test (2*x) / (3*abs(x)) -> sign(x)
693        for id, (g, sym_inputs, val_inputs, out_dtype) in enumerate([
694                ((2 * dx) / (3 * abs(dx)), [dx], [0.5 - dxv], 'float64'),
695                ((2 * fx) / (3 * abs(fx)), [fx], [0.5 - fxv],
696                    {'custom': 'float32', 'numpy+floatX': config.floatX, 'numpy': 'float64'}),
697                ((2 * dx) / (3 * abs(dx)), [dx], [0.1 * dxv], 'float64'),
698                ((2 * fx) / (3 * abs(fx)), [fx], [0.1 * fxv],
699                    {'custom': 'float32', 'numpy+floatX': config.floatX, 'numpy': 'float64'}),
700                ((2 * dv) / (3 * abs(dv)), [dv], [0.5 - dvv], 'float64'),
701                ((2 * fv) / (3 * abs(fv)), [fv], [0.5 - fvv],
702                    {'custom': 'float32', 'numpy+floatX': config.floatX, 'numpy': 'float64'}),
703                ]):
704
705            if isinstance(out_dtype, dict):
706                out_dtype = out_dtype[config.cast_policy]
707            f = compile.function(list(sym_inputs), g,
708                                 mode=mode)
709            topo = f.maker.fgraph.toposort()
710            out = f(*val_inputs)
711            assert np.all(np.isfinite(out))
712            utt.assert_allclose(out, np.sign(val_inputs[0]) * 2 / 3)
713            assert(out_dtype == out.dtype)
714
715    def test_abs_mul_div(self):
716        # test that if we have
717        # 4 * x / abs(2*x) it get simplifier during canonicalisation.
718
719        x = T.dscalar()
720        # a = T.abs_(x)
721
722        if theano.config.mode == 'FAST_COMPILE':
723            mode = theano.compile.mode.get_mode('FAST_RUN').excluding(
724                "local_elemwise_fusion")
725        else:
726            mode = theano.compile.mode.get_default_mode().excluding(
727                "local_elemwise_fusion")
728
729        f = theano.function([x], [(4 * x) / abs(2 * x)], mode=mode)
730        print(f.maker.fgraph.toposort())
731        print()
732        f(.1)
733        f(-1)
734        # some stabilization optimization make the output be finite instead of nan
735        # debug_mode will raise an error when he see nan
736        if not isinstance(mode, theano.compile.debugmode.DebugMode):
737            assert np.isfinite(f(0))
738
739        assert len(f.maker.fgraph.toposort()) == 2
740        assert f.maker.fgraph.toposort()[0].op == T.sgn
741
742        f = theano.function([x], [(4 * x) / abs(x / 2)], mode=mode)
743        print(f.maker.fgraph.toposort())
744        print()
745        f(.1)
746        f(-1)
747        # some stabilization optimization make the output be finite instead of nan
748        # debug_mode will raise an error when he see nan
749        if not isinstance(mode, theano.compile.debugmode.DebugMode):
750            assert np.isfinite(f(0))
751
752        assert len(f.maker.fgraph.toposort()) == 2
753        assert f.maker.fgraph.toposort()[0].op == T.sgn
754
755    def test_multiple_case_that_fail(self):
756        raise SkipTest("Current implementation of Canonizer does not "
757                       "implement all cases. Skip the corresponding test.")
758
759        shp = (4, 4)
760        fx, fy, fz = fmatrices('xyz')
761        dx, dy, dz = dmatrices('xyz')
762        fxv = theano._asarray(np.random.rand(*shp), dtype='float32')
763        fyv = theano._asarray(np.random.rand(*shp), dtype='float32')
764        fzv = theano._asarray(np.random.rand(*shp), dtype='float32')
765        dxv = theano._asarray(np.random.rand(*shp), dtype='float32')
766        dyv = theano._asarray(np.random.rand(*shp), dtype='float32')
767        dzv = theano._asarray(np.random.rand(*shp), dtype='float32')
768        # fvv = theano._asarray(np.random.rand(shp[0]), dtype='float32').reshape(1, shp[0])
769        # We must be sure that the Canonizer is working, but that we don't have other
770        # optimisation that could hide bug in the Canonizer as local_elemwise_fusion
771        mode = compile.mode.get_default_mode()
772
773        opt = gof.Query(["canonicalize"])
774        opt = opt.excluding('local_elemwise_fusion')
775        mode = mode.__class__(linker=mode.linker, optimizer=opt)
776        # test fail!
777        # test x / y / z -> x / (y * z)
778        for (g, sym_inputs, val_inputs, out_dtype) in [
779                ((dx / dy) / dz, [dx, dy, dz], [dxv, dyv, dzv], 'float64'),
780                ((fx / fy) / fz, [fx, fy, fz], [fxv, fyv, fzv], 'float32')
781                ]:
782            f = compile.function(list(sym_inputs), g, mode=mode)
783            out = f(*val_inputs)
784            utt.assert_allclose(out, val_inputs[0] / val_inputs[1] / val_inputs[2])
785            topo = f.maker.fgraph.toposort()
786            assert len(topo) == 2
787            assert isinstance(topo[0].op, (T.Elemwise, ))
788            assert isinstance(topo[0].op.scalar_op, theano.scalar.basic.Inv)
789            assert len(topo[0].inputs) == 1
790            assert(out_dtype == out.dtype)
791
792        # test x / (y / z) -> (x * z) / y
793        for (g, sym_inputs, val_inputs, out_dtype) in [
794                (dx / (dy / dz), [dx, dy, dz], [dxv, dyv, dzv], 'float64'),
795                (fx / (fy / fz), [fx, fy, fz], [fxv, fyv, fzv], 'float32')
796                ]:
797            f = compile.function(list(sym_inputs), g,
798                                 mode=mode)
799            out = f(*val_inputs)
800            utt.assert_allclose(out, val_inputs[0] / (val_inputs[1] / val_inputs[2]))
801            topo = f.maker.fgraph.toposort()
802            assert len(topo) == 2
803            assert isinstance(topo[0].op, (T.Elemwise, ))
804            assert isinstance(topo[0].op.scalar_op, theano.scalar.basic.Inv)
805            assert len(topo[0].inputs) == 1
806            assert(out_dtype == out.dtype)
807
808    def test_dont_merge_if_multiple_client(self):
809        # test those case take from the comment in Canonizer
810        raise SkipTest("Not implemented")
811
812    def test_canonicalize_nan(self):
813        # Regression test for bug in canonicalization of NaN values.
814        # This bug caused an infinite loop which was caught by the equilibrium
815        # optimizer, resulting in an error log message.
816
817        sio = StringIO()
818        handler = logging.StreamHandler(sio)
819        handler.setLevel(logging.ERROR)
820        logging.getLogger('theano.gof.opt').addHandler(handler)
821        try:
822            x = vector()
823            theano.function([x], x + np.nan)
824        finally:
825            logging.getLogger('theano.gof.opt').removeHandler(handler)
826        # Ideally this test would only catch the maxed out equilibrium
827        # optimizer error message, but to be safe in case this message
828        # is modified in the future, we assert that there is no error
829        # at all.
830        assert not sio.getvalue()
831
832
833def test_local_merge_abs():
834    x, y, z = T.matrices('xyz')
835    x_val = np.random.rand(5, 5).astype(config.floatX)
836    y_val = np.random.rand(5, 5).astype(config.floatX)
837    z_val = np.random.rand(5, 5).astype(config.floatX)
838    mode = theano.config.mode
839    if mode == "FAST_COMPILE":
840        mode = "FAST_RUN"
841    mode = theano.compile.mode.get_mode(mode).excluding(
842        "local_elemwise_fusion")
843
844    f = theano.function([y, z], (abs(y * z * -2)), mode=mode)
845    f(y_val, z_val)
846    assert isinstance(f.maker.fgraph.toposort()[1].op.scalar_op, scal.Abs)
847    assert len(f.maker.fgraph.toposort()) == 2
848
849    f = theano.function([x, y], abs(x / y), mode=mode)
850    f(x_val, y_val)
851    assert isinstance(f.maker.fgraph.toposort()[1].op.scalar_op, scal.Abs)
852    assert len(f.maker.fgraph.toposort()) == 2
853
854
855def test_merge_abs_bugfix():
856    # Test crash in optimization reported by Jeremiah Lowin at
857    # https://groups.google.com/d/topic/theano-users/TaXfqXP2Mj0/discussion
858    input = T.matrix()
859    # normalize on cols
860    step1 = input / input.sum(0)
861    # normalize on rows
862    step2 = step1 / step1.sum(1)
863    # get l1 norm
864    l1_norm = T.abs_(step2).sum()
865    theano.function([input], T.grad(l1_norm, input))
866
867
868def test_mixeddiv():
869    # Test that int division is preserved
870    i = iscalar()
871    d = dscalar()
872    assert 0 == function([i, d], d * (i // (i + 1)))(3, 1.0)
873
874
875def test_const_type_in_mul_canonizer():
876    input = dmatrix()
877    w = dmatrix()
878    visb = dvector()
879    hidb = dvector()
880    betas = dvector()
881    a = dvector()
882
883    def sigm(x):
884        return 1. / (1 + tensor.exp(-x))
885
886    hid = sigm((tensor.dot(w, input) + hidb) * betas)
887
888    vis_gauss1 = (tensor.dot(w.T, hid) + visb) * betas / (2 * a * a)
889    vis_gauss2 = (tensor.dot(w.T, hid) + visb) * betas / (2. * a * a)
890
891    f1 = function([input, w, visb, hidb, betas, a], vis_gauss1)
892    f2 = function([input, w, visb, hidb, betas, a], vis_gauss2)
893
894    ival = np.random.rand(5, 5)
895    wval = np.random.rand(5, 5)
896    visbval = np.random.rand(5)
897    hidbval = np.random.rand(5)
898    betaval = np.random.rand(5)
899    aval = np.random.rand(5)
900
901    utt.assert_allclose(
902        f2(ival, wval, visbval, hidbval, betaval, aval),
903        f1(ival, wval, visbval, hidbval, betaval, aval))
904
905
906def test_cast_in_mul_canonizer():
907    x, y = tensor.vectors('xy')
908    m = tensor.minimum(x, y)
909    o = m.sum()
910    go = tensor.fill(o, 1)
911    e = tensor.eq(go, x)
912    o1 = (1 - e) * go
913    o2 = e * go
914    mode = theano.compile.get_default_mode().excluding('fusion').including('fast_run')
915    f = theano.function([x, y], [o1, o2], mode=mode)
916    theano.printing.debugprint(f, print_type=True)
917    nodes = f.maker.fgraph.apply_nodes
918    assert len([n for n in nodes if isinstance(getattr(n.op, 'scalar_op', None),
919                                               theano.scalar.Identity)]) == 0
920    assert len([n for n in nodes if isinstance(getattr(n.op, 'scalar_op'),
921                                               theano.scalar.Cast)]) == 1
922    f([1], [1])
923
924
925class test_fusion(unittest.TestCase):
926    mode = copy.copy(compile.mode.get_default_mode())
927    _shared = staticmethod(shared)
928    topo_exclude = ()
929
930    def do(self, mode, shared_fn, shp, nb_repeat=1, assert_len_topo=True, slice=None):
931        """
932        param shared_fn: if None, will use compile.function
933        verify that the elemwise fusion work
934        Test with and without DimShuffle
935        """
936        # TODO: disable the canonizer?
937        def my_init(shp, dtype='float64', num=0):
938            ret = np.zeros(shp, dtype=dtype) + num
939            return ret
940        fw, fx, fy, fz = [theano.tensor.tensor(dtype='float32',
941                                               broadcastable=[False] * len(shp),
942                                               name=n) for n in 'wxyz']
943        dw, dx, dy, dz = [theano.tensor.tensor(dtype='float64',
944                                               broadcastable=[False] * len(shp),
945                                               name=n) for n in 'wxyz']
946        ix, iy, iz = [theano.tensor.tensor(dtype='int32',
947                                           broadcastable=[False] * len(shp),
948                                           name=n) for n in 'xyz']
949        fv = fvector('v')
950        fs = fscalar('s')
951
952        fwv = my_init(shp, 'float32', 1)
953        fxv = my_init(shp, 'float32', 2)
954        fyv = my_init(shp, 'float32', 3)
955        fzv = my_init(shp, 'float32', 4)
956        fvv = theano._asarray(np.random.rand(shp[0]), dtype='float32')
957        fsv = np.asarray(np.random.rand(), dtype='float32')
958        dwv = my_init(shp, 'float64', 5)
959        ixv = theano._asarray(my_init(shp, num=60), dtype='int32')
960        iyv = theano._asarray(my_init(shp, num=70), dtype='int32')
961        izv = theano._asarray(my_init(shp, num=70), dtype='int32')
962        fwx = fw + fx
963        ftanx = theano.tensor.tan(fx)
964        cases = [
965            (fx + fy + fz, (fx, fy, fz), (fxv, fyv, fzv), 1, fxv +
966                fyv + fzv, 'float32'),  # 0
967            (fx * fy * fz, (fx, fy, fz), (fxv, fyv, fzv), 1, fxv *
968                fyv * fzv, 'float32'),  # 1
969            (fx + fy * fz, (fx, fy, fz), (fxv, fyv, fzv), 1, fxv +
970                fyv * fzv, 'float32'),  # 2
971            (fx * fy + fz, (fx, fy, fz), (fxv, fyv, fzv), 1, fxv *
972                fyv + fzv, 'float32'),  # 3
973            (fw + fx + fy + fz, (fw, fx, fy, fz), (fwv, fxv, fyv, fzv), 1,
974                fwv + fxv + fyv + fzv, 'float32'),
975            ((fw + fx) + (fy + fz), (fw, fx, fy, fz), (fwv, fxv, fyv, fzv), 1,
976                fwv + fxv + fyv + fzv, 'float32'),  # 5
977            (((fw + fx) + fy) + fz, (fw, fx, fy, fz), (fwv, fxv, fyv, fzv), 1,
978                fwv + fxv + fyv + fzv, 'float32'),
979            ((fw + (fx + fy)) + fz, (fw, fx, fy, fz), (fwv, fxv, fyv, fzv), 1,
980                fwv + fxv + fyv + fzv, 'float32'),
981            ((fw + (fx + fy) + fz), (fw, fx, fy, fz), (fwv, fxv, fyv, fzv), 1,
982                fwv + fxv + fyv + fzv, 'float32'),
983            (fw + (fx + (fy + fz)), (fw, fx, fy, fz), (fwv, fxv, fyv, fzv), 1,
984                fwv + fxv + fyv + fzv, 'float32'),
985            ((fw + fx) + (fy + fz), (fw, fx, fy, fz), (fwv, fxv, fyv, fzv), 1,
986                fwv + fxv + fyv + fzv, 'float32'),  # 10
987            (fw * fx * fy * fz, (fw, fx, fy, fz), (fwv, fxv, fyv, fzv), 1,
988                fwv * fxv * fyv * fzv, 'float32'),
989            (fw + fx * fy * fz, (fw, fx, fy, fz), (fwv, fxv, fyv, fzv), 1,
990                fwv + fxv * fyv * fzv, 'float32'),
991            (fx + fy * fz * fx, (fx, fy, fz), (fxv, fyv, fzv), 1,
992                fxv + fyv * fzv * fxv, 'float32'),
993            (fx * fy + fz + fy, (fx, fy, fz), (fxv, fyv, fzv), 1,
994                fxv * fyv + fzv + fyv, 'float32'),
995            (fx * fy * fz * fw + fx + fy + fz + fw, (fw, fx, fy, fz), (fwv, fxv, fyv, fzv), 1,
996                fxv * fyv * fzv * fwv + fxv + fyv + fzv + fwv, 'float32'),  # 15
997            # test with constant
998            ((fw + fx) + (fy + fz) + 2., (fw, fx, fy, fz), (fwv, fxv, fyv, fzv),
999                1, fwv + fxv + fyv + fzv + 2, 'float32'),
1000            (((fw + fx) + 2. + fy) + fz, (fw, fx, fy, fz), (fwv, fxv, fyv, fzv),
1001                1, fwv + fxv + fyv + fzv + 2, 'float32'),
1002            ((fw + (fx + 2. + fy)) + fz, (fw, fx, fy, fz), (fwv, fxv, fyv, fzv),
1003                1, fwv + fxv + fyv + fzv + 2, 'float32'),
1004            ((fw + (fx + fy) + 2 + fz), (fw, fx, fy, fz), (fwv, fxv, fyv, fzv),
1005                1, fwv + fxv + fyv + fzv + 2, 'float32'),
1006            (fw + (fx + (fy + fz) + 2.), (fw, fx, fy, fz), (fwv, fxv, fyv, fzv),
1007                1, fwv + fxv + fyv + fzv + 2, 'float32'),  # 20
1008            (2 + (fw + fx) + (fy + fz), (fw, fx, fy, fz), (fwv, fxv, fyv, fzv),
1009                1, fwv + fxv + fyv + fzv + 2, 'float32'),
1010            # mix float32 and float64
1011            (2 + (dw + fx) + (fy + fz), (dw, fx, fy, fz), (dwv, fxv, fyv, fzv),
1012                1, dwv + fxv + fyv + fzv + 2, 'float64'),
1013            (2 + (fw + dw) + (fy + fz), (fw, dw, fy, fz), (fwv, dwv, fyv, fzv),
1014                1, fwv + dwv + fyv + fzv + 2, 'float64'),
1015            (2 + (fw + fx) + (dw + fz), (fw, fx, dw, fz), (fwv, fxv, dwv, fzv),
1016                1, fwv + fxv + dwv + fzv + 2, 'float64'),
1017            (2 + (fw + fx) + (fy + dw), (fw, fx, fy, dw), (fwv, fxv, fyv, dwv),
1018                1, fwv + fxv + fyv + dwv + 2, 'float64'),  # 25
1019            # test when their is other op then elemwise.
1020            ((fwx.sum()) + (fwx) + (fy + fz), (fw, fx, fy, fz), (fwv, fxv, fyv, fzv),
1021                4, (fwv + fxv).sum() + fwv + fxv + fyv + fzv, 'float32'),
1022            # test other elemwise op
1023            (fx + fy + tensor.cos(fz), (fx, fy, fz), (fxv, fyv, fzv), 1,
1024                fxv + fyv + np.cos(fzv), 'float32'),
1025            (fx + fy + tensor.cosh(fz), (fx, fy, fz), (fxv, fyv, fzv), 1,
1026                fxv + fyv + np.cosh(fzv), 'float32'),
1027            (fx + fy + abs(fz), (fx, fy, fz), (fxv, fyv, fzv), 1, fxv + fyv +
1028                np.absolute(fzv), 'float32'),
1029            (ix + iy + abs(iz), (ix, iy, iz), (ixv, iyv, izv), 1, ixv + iyv +
1030                np.absolute(izv), 'int32'),  # 30
1031            (fx + fy + theano.tensor.log(fz), (fx, fy, fz), (fxv, fyv, fzv),
1032                1, fxv + fyv + np.log(fzv), 'float32'),
1033            (fx + fy + theano.tensor.log2(fz), (fx, fy, fz), (fxv, fyv, fzv),
1034                1, fxv + fyv + np.log2(fzv), 'float32'),
1035            (fx + fy + theano.tensor.log10(fz), (fx, fy, fz), (fxv, fyv, fzv),
1036                1, fxv + fyv + np.log10(fzv), 'float32'),
1037            (fx + fy ** fz, (fx, fy, fz), (fxv, fyv, fzv), 1, fxv + fyv ** fzv,
1038                'float32'),  # pow
1039            (fx + fy + theano.tensor.exp(fz), (fx, fy, fz), (fxv, fyv, fzv),
1040                1, fxv + fyv + np.exp(fzv), 'float32'),  # 35
1041            (fx - fy - fz, (fx, fy, fz), (fxv, fyv, fzv), 1, fxv - fyv - fzv, 'float32'),
1042            (fx - (fy / fz), (fx, fy, fz), (fxv, fyv, fzv), 1, fxv - (fyv / fzv), 'float32'),
1043            (fx - theano.tensor.true_div(fy, 2), (fx, fy), (fxv, fyv),
1044                1, fxv - (fyv / 2), 'float32'),
1045            (fx - theano.tensor.true_div(fy, fz), (fx, fy, fz), (fxv, fyv, fzv),
1046                1, fxv - (fyv / fzv), 'float32'),
1047            (fx - theano.tensor.int_div(ix * 100, iy * 1000), (fx, ix, iy), (fxv, ixv, iyv),
1048                1, fxv - ((ixv * 100) // (iyv * 1000)), {'custom': 'float64', 'numpy + floatX': config.floatX, 'numpy': 'float64'}),  # 40
1049            (fx - (fy / 2), (fx, fy), (fxv, fyv), 1, fxv - (fyv / 2), 'float32'),
1050            (fx - (fy % fz), (fx, fy, fz), (fxv, fyv, fzv), 1, fxv - (fyv % fzv), 'float32'),
1051            (fx - (fy > fz), (fx, fy, fz), (fxv, fyv, fzv), 1, fxv - (fyv > fzv), 'float32'),
1052            (fx - (fy >= fz), (fx, fy, fz), (fxv, fyv, fzv), 1, fxv - (fyv >= fzv), 'float32'),
1053            (fx - (fy < fz), (fx, fy, fz), (fxv, fyv, fzv), 1, fxv - (fyv < fzv), 'float32'),  # 45
1054            (fx - (fy <= fz), (fx, fy, fz), (fxv, fyv, fzv), 1, fxv - (fyv <= fzv), 'float32'),
1055            (fx - T.eq(fy, fz), (fx, fy, fz), (fxv, fyv, fzv), 1,
1056                fxv - (fyv == fzv), 'float32'),
1057            (fx - T.neq(fy, fz), (fx, fy, fz), (fxv, fyv, fzv), 1, fxv - (
1058                fyv != fzv), 'float32'),
1059            (fx - fy + tensor.tan(fz), (fx, fy, fz), (fxv, fyv, fzv), 1,
1060                fxv - fyv + np.tan(fzv), 'float32'),
1061            (fx - fy + tensor.tanh(fz), (fx, fy, fz), (fxv, fyv, fzv), 1,
1062                fxv - fyv + np.tanh(fzv), 'float32'),  # 50
1063            (fx - fy + tensor.sin(fz), (fx, fy, fz), (fxv, fyv, fzv), 1,
1064                fxv - fyv + np.sin(fzv), 'float32'),
1065            (fx - fy + tensor.sinh(fz), (fx, fy, fz), (fxv, fyv, fzv), 1,
1066                fxv - fyv + np.sinh(fzv), 'float32'),
1067            (fx - fy + theano.tensor.sqr(fz), (fx, fy, fz), (fxv, fyv, fzv), 1,
1068                fxv - fyv + (fzv * fzv), 'float32'),
1069            (fx - fy + theano.tensor.sqrt(fz), (fx, fy, fz), (fxv, fyv, fzv), 1,
1070                fxv - fyv + np.sqrt(fzv), 'float32'),
1071            (fx - fy + theano.tensor.inv(fz), (fx, fy, fz), (fxv, fyv, fzv), 1,
1072                fxv - fyv + (1 / fzv), 'float32'),  # 55
1073            (fx - fy + theano.tensor.neg(fz), (fx, fy, fz), (fxv, fyv, fzv), 1,
1074                fxv - fyv + (-fzv), 'float32'),
1075            (fx - fy + theano.tensor.round(fz), (fx, fy, fz), (fxv, fyv, fzv), 1,
1076                fxv - fyv + np.round(fzv), 'float32'),
1077            (ix - iy + theano.tensor.iround(fz), (ix, iy, fz), (ixv, iyv, fzv), 1,
1078                ixv - iyv + np.round(fzv), 'int64'),
1079            # Bit op
1080            (fx - theano.tensor.or_(iy, iz), (fx, iy, iz), (fxv, iyv, izv), 1,
1081                fxv - (iyv | izv), {'custom': 'float64', 'numpy + floatX': config.floatX, 'numpy': 'float64'}),
1082            (fx - theano.tensor.xor(iy, iz), (fx, iy, iz), (fxv, iyv, izv), 1,
1083                fxv - (iyv ^ izv), {'custom': 'float64', 'numpy + floatX': config.floatX, 'numpy': 'float64'}),  # 60
1084            (fx - theano.tensor.and_(iy, iz), (fx, iy, iz), (fxv, iyv, izv), 1,
1085                fxv - (iyv & izv), {'custom': 'float64', 'numpy + floatX': config.floatX, 'numpy': 'float64'}),
1086            (fx - theano.tensor.invert(iy), (fx, iy), (fxv, iyv), 1,
1087                fxv - (~iyv), {'custom': 'float64', 'numpy + floatX': config.floatX, 'numpy': 'float64'}),
1088
1089            (fx - theano.tensor.cast(fy, dtype='float64'), (fx, fy), (fxv, fyv), 1,
1090                fxv - np.asarray(fyv, 'float64'), 'float64'),
1091            (theano.tensor.pow(fx * fy + fz, fx * fy), (fx, fy, fz), (fxv, fyv, fzv), 1,
1092                np.power(fxv * fyv + fzv, fxv * fyv), 'float32'),
1093            (fv + fy ** fz, (fv, fy, fz), (fvv, fyv, fzv), 2, fvv + fyv ** fzv, 'float32'),  # fused with a dimshuffle #65
1094            (fv - fy + tensor.tanh(fz), (fv, fy, fz), (fvv, fyv, fzv), 2,
1095                fvv - fyv + np.tanh(fzv), 'float32'),  # fused with a dimshuffle
1096
1097            # Cases where the same input is reused many times.
1098            (theano.tensor.mul(fx, fx, fx, fx), (fx,), (fxv,), 1, fxv *
1099                fxv * fxv * fxv, 'float32'),
1100            (theano.tensor.mul(fx, ftanx, ftanx), (fx,), (fxv,), 1,
1101                fxv * np.tan(fxv) * np.tan(fxv), 'float32'),
1102            (theano.tensor.mul(fx, ftanx, ftanx, fx), (fx,), (fxv,),
1103                1, fxv * np.tan(fxv) * np.tan(fxv) * fxv, 'float32'),
1104            (theano.tensor.mul(ftanx, ftanx, fx + fy), (fx, fy), (fxv, fyv),
1105                1, np.tan(fxv) * np.tan(fxv) * (fxv + fyv), 'float32'),  # 70
1106
1107            # Cases with different broadcast pattern. They should not
1108            # be merged as this would duplicate computation
1109            # The graph should have 2 elemwise and 1 dimshuffle
1110            (fx * theano.tensor.sin(fs), (fx, fs), (fxv, fsv), 3,
1111                fxv * np.sin(fsv), 'float32'),
1112            ]
1113        if slice:
1114            cases = cases[slice]
1115        times = np.zeros(len(cases))
1116        fail1 = []
1117        fail2 = []
1118        fail3 = []
1119        fail4 = []
1120        for id, [g, sym_inputs, val_inputs,
1121                 nb_elemwise, answer, out_dtype] in enumerate(cases):
1122            if isinstance(out_dtype, dict):
1123                out_dtype = out_dtype[config.cast_policy]
1124
1125            if shared_fn is None:
1126                f = compile.function(list(sym_inputs), g, mode=mode)
1127                for x in xrange(nb_repeat):
1128                    out = f(*val_inputs)
1129                t1 = time.time()
1130            else:
1131                out = shared_fn(np.zeros(shp, dtype=out_dtype), 'out')
1132                assert out.dtype == g.dtype
1133                f = function(sym_inputs, [], updates=[(out, g)], mode=mode)
1134                t0 = time.time()
1135                for x in xrange(nb_repeat):
1136                    f(*val_inputs)
1137                t1 = time.time()
1138                out = out.get_value()
1139
1140            times[id] = t1 - t0
1141            atol = 1e-8
1142            if out_dtype == 'float32':
1143                atol = 1e-6
1144            if not np.allclose(out, answer * nb_repeat, atol=atol):
1145                fail1.append(id)
1146                print("cases", id)
1147                print(val_inputs)
1148                print(out)
1149                print(answer * nb_repeat)
1150            topo = f.maker.fgraph.toposort()
1151            topo_ = [n for n in topo
1152                     if not isinstance(n.op, self.topo_exclude)]
1153            if assert_len_topo:
1154                if not len(topo_) == nb_elemwise:
1155                    fail3.append((id, topo_, nb_elemwise))
1156                if nb_elemwise == 1:
1157                    # if no variable appears multiple times in the
1158                    # input of g,
1159                    # check that the number of input to the Composite
1160                    # Elemwise is ok
1161                    if len(set(g.owner.inputs)) == len(g.owner.inputs):
1162                        expected_len_sym_inputs = np.sum(
1163                            [not isinstance(x, theano.gof.Constant)
1164                             for x in topo_[0].inputs])
1165                        assert expected_len_sym_inputs == len(sym_inputs)
1166
1167            if not out_dtype == out.dtype:
1168                fail4.append((id, out_dtype, out.dtype))
1169
1170        failed = len(fail1 + fail2 + fail3 + fail4)
1171        if failed > 0:
1172            print("Executed", len(cases), "cases", "failed", failed)
1173            raise Exception("Failed %d cases" % failed, fail1,
1174                            fail2, fail3, fail4)
1175
1176        return times
1177
1178    def test_elemwise_fusion(self):
1179        shp = (5, 5)
1180        mode = copy.copy(self.mode)
1181        # we need the optimisation enabled and the canonicalize.
1182        # the canonicalize is needed to merge multiplication/addition by constant.
1183        mode._optimizer = mode._optimizer.including(
1184            'local_elemwise_fusion', 'composite_elemwise_fusion',
1185            'canonicalize')
1186        self.do(mode, self._shared, shp)
1187
1188    @attr('slow')
1189    def test_elemwise_fusion_4d(self):
1190        shp = (3, 3, 3, 3)
1191        mode = copy.copy(self.mode)
1192        # we need the optimisation enabled and the canonicalize.
1193        # the canonicalize is needed to merge multiplication/addition by constant.
1194        mode._optimizer = mode._optimizer.including(
1195            'local_elemwise_fusion', 'composite_elemwise_fusion',
1196            'canonicalize')
1197        self.do(mode, self._shared, shp, slice=slice(0, 1))
1198
1199    def test_fusion_35inputs(self):
1200        # Make sure a fused graph with more than 35 inputs does not segfault
1201        # or error.
1202        inpts = vectors(['i%i' % i for i in xrange(35)])
1203        # Make an elemwise graph looking like:
1204        # sin(i34 + sin(i33 + sin(... i1 + sin(i0) ...)))
1205        out = tensor.sin(inpts[0])
1206        for idx in xrange(1, 35):
1207            out = tensor.sin(inpts[idx] + out)
1208
1209        f = function(inpts, out, mode=self.mode)
1210        # Test it on some dummy values
1211        f(*[list(range(i, 4 + i)) for i in xrange(35)])
1212
1213    def test_pickle_big_fusion(self):
1214        # In the past, pickle of Composite generated in that case
1215        # crashed with max recusion limit. So we where not able to
1216        # generate C code in that case.
1217
1218        if not theano.config.cxx:
1219            raise SkipTest("no c compiler, so can't use big elemwise!")
1220        factors = []
1221        sd = tensor.dscalar()
1222        means = tensor.dvector()
1223
1224        cst_05 = theano.tensor.constant(.5)
1225        cst_m05 = theano.tensor.constant(-.5)
1226        cst_2 = theano.tensor.constant(2)
1227        cst_m2 = theano.tensor.constant(-2)
1228        ones = theano.tensor.constant(np.ones(10))
1229        n = 85
1230        if theano.config.mode in ["DebugMode", "DEBUG_MODE"]:
1231            n = 10
1232
1233        for i in xrange(n):
1234            f = (cst_m05 * sd ** cst_m2 * (ones - means[i]) ** cst_2 +
1235                 cst_05 * tensor.log(cst_05 * (sd ** cst_m2) / np.pi))
1236            factors.append(tensor.sum(f))
1237
1238        logp = tensor.add(*factors)
1239
1240        vars = [sd, means]
1241        dlogp = function(vars, [theano.grad(logp, v) for v in vars])
1242        dlogp(2, np.random.rand(n))
1243
1244    def speed_fusion(self, s=None):
1245        """
1246        param type s: a slice object
1247        param s: a slice to apply to the case to execute. If None, exec all case.
1248        """
1249
1250        shp = (3000, 3000)
1251        shp = (1000, 1000)
1252        nb_repeat = 50
1253        # linker=gof.CLinker
1254        # linker=gof.OpWiseCLinker
1255
1256        mode1 = copy.copy(self.mode)
1257        mode1._optimizer = mode1._optimizer.including('local_elemwise_fusion')
1258        # TODO:clinker is much faster... but use to much memory
1259        # Possible cause: as their is do deletion of intermediate value when we don't keep the fct.
1260        # More plausible cause: we keep a link to the output data?
1261        # Follow up. Clinker do the same... second cause?
1262        mode2 = copy.copy(self.mode)
1263        mode2._optimizer = mode2._optimizer.excluding('local_elemwise_fusion')
1264        print("test with linker", str(mode1.linker))
1265        times1 = self.do(mode1, self._shared, shp, nb_repeat=nb_repeat,
1266                         assert_len_topo=False, slice=s)
1267        times2 = self.do(mode2, self._shared, shp, nb_repeat=nb_repeat,
1268                         assert_len_topo=False, slice=s)
1269        print("times1 with local_elemwise_fusion")
1270        print(times1, times1.min(), times1.max(), times1.sum())
1271        print("times2 without local_elemwise_fusion")
1272        print(times2, times2.min(), times2.max(), times2.sum())
1273        d = times2 / times1
1274
1275        print("times2/times1")
1276        print(d)
1277        print("min", d.min(), "argmin", d.argmin(), "max", d.max(),
1278              "mean", d.mean(), "std", d.std())
1279
1280    def test_fusion_inplace(self):
1281        mode = copy.copy(self.mode)
1282        # we need the optimisation enabled and the canonicalize.
1283        # the canonicalize is needed to merge multiplication/addition by constant.
1284        mode._optimizer = mode._optimizer.including(
1285            'local_elemwise_fusion', 'composite_elemwise_fusion',
1286            'canonicalize', 'inplace')
1287
1288        x, y, z = dmatrices('xyz')
1289        f = theano.function([x, y, z], tensor.dot(x, y) + x + y + z, mode=mode)
1290        topo = [n for n in f.maker.fgraph.toposort()
1291                if not isinstance(n.op, self.topo_exclude)]
1292        assert len(topo) == 2
1293        assert topo[-1].op.inplace_pattern
1294        f(np.random.random((5, 5)), np.random.random((5, 5)),
1295            np.random.random((5, 5)))
1296
1297    def speed_log_exp(self):
1298        s = slice(31, 36)
1299        print("time", self.do(self.mode, self._shared, shp=(1000, 1000),
1300                              assert_len_topo=False, slice=s, nb_repeat=100))
1301
1302
1303class TimesN(theano.scalar.basic.UnaryScalarOp):
1304    """
1305    Used in test TestCompositeCodegen
1306
1307    Must be outside of the class, otherwise, the c cache code can't
1308    pickle this class and this cause stuff printing during test.
1309    """
1310    def __eq__(self, other):
1311        return super(TimesN, self).__eq__(other) and self.n == other.n
1312
1313    def __hash__(self):
1314        return super(TimesN, self).__hash__() ^ hash(self.n)
1315
1316    def __init__(self, n, *args, **kwargs):
1317        self.n = n
1318        theano.scalar.basic.UnaryScalarOp.__init__(self, *args, **kwargs)
1319
1320    def impl(self, x):
1321        return x * self.n
1322
1323    def c_support_code_apply(self, node, nodename):
1324        n = str(self.n)
1325        return """
1326        float %(nodename)s_timesn(float x) { return x * %(n)s; }
1327        """ % locals()
1328
1329    def c_code(self, node, name, inputs, outputs, sub):
1330        (x,) = inputs
1331        (z,) = outputs
1332        return "%(z)s = %(name)s_timesn(%(x)s);" % locals()
1333
1334
1335class TestCompositeCodegen(unittest.TestCase):
1336    """
1337    Test The Composite Ops code generation in a case where there is multiple
1338    scalar ops with support code.
1339    """
1340    def setUp(self):
1341        upgrade_to_float = theano.scalar.basic.upgrade_to_float
1342
1343        self.scal_times_2 = TimesN(2, upgrade_to_float, name='times_2')
1344        self.times_2 = theano.tensor.elemwise.Elemwise(
1345            self.scal_times_2,
1346            name='times_2')
1347
1348        self.scal_times_3 = TimesN(3, upgrade_to_float, name='times_3')
1349        self.times_3 = theano.tensor.elemwise.Elemwise(
1350            self.scal_times_3,
1351            name='times_3')
1352
1353        self.x = fvector()
1354
1355    def test_nested_composite(self):
1356        y = self.times_2(self.x)
1357        z = self.times_3(y)
1358        f = function([self.x], z)
1359        if config.mode != "FAST_COMPILE":
1360            assert len(f.maker.fgraph.toposort()) == 1
1361        fval = f([1, 2, 3])
1362        assert np.all(fval == [6, 12, 18])
1363
1364    def test_local_useless_composite(self):
1365        x = theano.scalar.float32()
1366        c = theano.scalar.Composite([x], [x + 1, x - 1])
1367        X = theano.tensor.matrix()
1368        o = theano.tensor.Elemwise(scalar_op=c)(X)
1369        mode = theano.compile.mode.get_default_mode().including(
1370            'local_useless_composite')
1371
1372        f = theano.function([X], o[0], mode=mode)
1373        topo = f.maker.fgraph.toposort()
1374        assert len(topo) == 1
1375        assert len(topo[0].outputs) == 1
1376        utt.assert_allclose(f([[1.]]), [[2.]])
1377
1378        f = theano.function([X], o[1], mode=mode)
1379        topo = f.maker.fgraph.toposort()
1380        assert len(topo) == 1
1381        assert len(topo[0].outputs) == 1
1382        utt.assert_allclose(f([[1.]]), [[0.]])
1383
1384
1385@utt.assertFailure_fast
1386def test_log1p():
1387    m = theano.config.mode
1388    if m == 'FAST_COMPILE':
1389        m = 'FAST_RUN'
1390    m = compile.mode.get_mode(m)
1391    m = m.excluding('fusion')
1392    # check some basic cases
1393    x = dvector()
1394    f = function([x], T.log(1 + (x)), mode=m)
1395    assert [node.op for node in f.maker.fgraph.toposort()] == [T.log1p]
1396    f = function([x], T.log(1 + (-x)), mode=m)
1397    assert [node.op for node in f.maker.fgraph.toposort()] == [
1398        T.neg, inplace.log1p_inplace]
1399    f = function([x], -T.log(1 + (-x)), mode=m)
1400    assert [node.op for node in f.maker.fgraph.toposort()] == [
1401        T.neg, inplace.log1p_inplace, inplace.neg_inplace]
1402
1403    # check trickier cases (and use different dtype)
1404    y = fmatrix()
1405    f = function([x, y], T.log(tensor.fill(y, 1) + (x)), mode=m)
1406    # the first three ops are Shape_i, Shape_i, and Dimshuffle
1407    topo = f.maker.fgraph.toposort()
1408    assert topo[-1].op == tensor.alloc
1409    assert T.log1p in [node.op for node in topo]
1410
1411    f = function([x, y], T.log(0 + (x) + tensor.fill(y, 1.0)), mode=m)
1412    topo = f.maker.fgraph.toposort()
1413    assert topo[-1].op == tensor.alloc
1414    assert T.log1p in [node.op for node in topo]
1415
1416    f = function([x, y], T.log(2 + (x) - tensor.fill(y, 1.0)), mode=m)
1417    topo = f.maker.fgraph.toposort()
1418    assert topo[-1].op == tensor.alloc
1419    assert T.log1p in [node.op for node in topo]
1420
1421    f([1e-7, 10], [[0, 0], [0, 0]])  # debugmode will verify values
1422
1423    # should work for int
1424    z = tensor.imatrix()
1425    f = function([z], T.log(1 + (z)), mode=m)
1426    assert [node.op for node in f.maker.fgraph.toposort()] == [T.log1p]
1427
1428
1429def test_log_add():
1430    m = theano.config.mode
1431    if m == 'FAST_COMPILE':
1432        m = 'FAST_RUN'
1433    m = compile.mode.get_mode(m)
1434    m = m.excluding('fusion')
1435    m = copy.copy(m)
1436    # No need to put them back as we have a new object
1437    m.check_isfinite = False
1438
1439    # check some basic cases
1440    x = dvector()
1441    y = dvector()
1442    f = function([x, y], T.log(T.exp(x) + T.exp(y)), mode=m)
1443
1444    f([10000], [10000])  # causes overflow if handled incorrectly
1445    assert np.isfinite(f([10000], [10000]))
1446    utt.assert_allclose(f([10000], [10000]), 10000 + np.log1p(1))
1447
1448    # test that it give the same result when it don't overflow
1449    f([10], [10])  # don't causes overflow
1450    utt.assert_allclose(f([10], [10]), 10 + np.log1p(1))
1451
1452    # test that it also works with more than two args, (this currently fails)
1453    x = dvector()
1454    y = dvector()
1455    f = function([x, y], T.log(T.exp(x) + T.exp(y) + T.exp(x - y) + T.exp(
1456        x + y)), mode=m)
1457
1458    try:
1459        f([10000], [10000])  # causes overflow if handled incorrectly
1460        utt.assert_allclose(f([10000], [10000]), 20000)
1461    except utt.WrongValue:
1462        raise SkipTest("log(add(exp)) is not stabilized when adding "
1463                       "more than 2 elements, see #623")
1464
1465    # TODO: test that the optimization works in the presence of broadcasting.
1466
1467    # TODO: (write and) test that the optimization works with Sum in addition to working with Add.
1468
1469
1470def test_local_useless_slice():
1471    # test a simple matrix
1472    x = tensor.matrix('x')
1473    mode_unopt = compile.get_default_mode().excluding("local_useless_slice", "local_mul_canonizer")
1474    mode_opt = compile.get_default_mode().including("local_useless_slice").excluding("local_mul_canonizer")
1475
1476    # test with and without the useless slice
1477    o = 2 * x[0, :]
1478    f_unopt = theano.function([x], o, mode=mode_unopt)
1479    f_opt = theano.function([x], o, mode=mode_opt)
1480    test_inp = np.random.randint(-10, 10, (4, 4)).astype('float32')
1481    assert all(f_opt(test_inp) == f_unopt(test_inp)),\
1482        "The optimization caused a mismatch in the result"
1483    # test to see if the slice is truly gone
1484    apply_node = f_opt.maker.fgraph.toposort()[0]
1485    subtens = apply_node.op
1486    assert not any(isinstance(idx, slice) for idx in subtens.idx_list), "Slice should be gone"
1487
1488    # Now test that the stack trace is copied over properly,
1489    # before before and after optimization.
1490    assert check_stack_trace(f_unopt, ops_to_check='all')
1491    assert check_stack_trace(f_opt, ops_to_check='all')
1492
1493    # test a 4d tensor
1494    z = tensor.tensor4('z')
1495    o2 = z[1, :, :, 1]
1496    o3 = z[0, :, :, :]
1497    f_opt_check = theano.function([z], o2, mode=mode_opt)
1498    f_opt_check_apply = theano.function([z], o3, mode=mode_opt)
1499
1500    # The optimization shouldn't apply here
1501    apply_node = f_opt_check.maker.fgraph.toposort()[0]
1502    subtens = apply_node.op
1503    assert [isinstance(idx, slice) for idx in subtens.idx_list].count(True) == 2
1504    # But it should here
1505    apply_node = f_opt_check_apply.maker.fgraph.toposort()[0]
1506    subtens = apply_node.op
1507    assert not any(isinstance(idx, slice) for idx in subtens.idx_list)
1508
1509    # Finally, test that the stack trace is copied over properly,
1510    # before before and after optimization.
1511    assert check_stack_trace(f_opt_check, ops_to_check=Subtensor)
1512    assert check_stack_trace(f_opt_check_apply, ops_to_check=Subtensor)
1513
1514
1515def test_local_useless_inc_subtensor():
1516    x = tensor.matrix('x')
1517    y = tensor.matrix('y')
1518    mode = compile.get_default_mode().including("local_useless_inc_subtensor")
1519    for sub in [slice(None), slice(None, None, -1)]:
1520        o = tensor.set_subtensor(x[::, sub], y)
1521        f = theano.function([x, y], o, mode=mode)
1522        o_shape = tensor.set_subtensor(x[::, sub],
1523                                       tensor.specify_shape(y, x.shape))
1524        f_shape = theano.function([x, y], o_shape, mode=mode)
1525
1526        # Test with shape info
1527        topo = f_shape.maker.fgraph.toposort()
1528        assert not any(isinstance(n.op, tensor.IncSubtensor) for n in topo)
1529        out = f_shape([[2, 3]], [[3, 4]])
1530        assert (out == np.asarray([[3, 4]])[::, sub]).all()
1531
1532        # Test that without shape info, we don't apply the opt.
1533        topo = f.maker.fgraph.toposort()
1534        assert len(topo) == 1
1535        assert isinstance(topo[0].op, tensor.IncSubtensor)
1536        out = f([[2, 3]], [[3, 4]])
1537        assert (out == np.asarray([[3, 4]])[::, sub]).all()
1538
1539        # Test that we don't remove shape error
1540        try:
1541            f([[2, 3]], [[3, 4], [4, 5]])
1542            assert False
1543        except (ValueError, AssertionError):
1544            pass
1545
1546        # Test that we don't remove broadcastability
1547        out = f([[2, 3], [3, 4]], [[5, 6]])
1548        assert (out == np.asarray([[5, 6], [5, 6]])[::, sub]).all()
1549
1550    # Test that we do not optimize others strides even when sub and y
1551    # have same shapes
1552    sub = x[::, ::2]
1553    o_shape = tensor.set_subtensor(sub,
1554                                   tensor.specify_shape(y, sub.shape))
1555    f_shape = theano.function([x, y], o_shape)
1556    topo = f_shape.maker.fgraph.toposort()
1557    # theano.printing.debugprint(f_shape)
1558    assert any(isinstance(n.op, tensor.IncSubtensor) for n in topo)
1559    out = f_shape([[2, 3, 6, 7]], [[8, 9]])
1560    assert (out == np.asarray([[8, 3, 9, 7]])).all()
1561
1562
1563def test_local_useless_subtensor():
1564    x = tensor.matrix('x')
1565
1566    # Test default
1567    for dims in [(slice(0, None), ),
1568                 (slice(0, None), slice(0, None)),
1569                 ]:
1570        f = function([x], tensor.exp(x).__getitem__(dims), mode=mode_opt)
1571        # theano.printing.debugprint(f)
1572        prog = f.maker.fgraph.toposort()
1573        assert prog[0].op == tensor.exp
1574        assert len(prog) == 1
1575        f([[0, 1, 2], [3, 4, 5]])  # let debugmode test something
1576
1577    x_c = tensor.specify_shape(x, (2, 3))
1578    # Test constant
1579    for dims, res in [((slice(0, 2), ), True),
1580                      ((slice(0, 2), slice(0, None)), True),
1581                      ((slice(0, 2), slice(0, 3)), True),
1582                      ((slice(0, None), slice(0, 3)), True),
1583                      ((slice(0, 3), slice(0, 13)), True),
1584                      ((slice(0, 3), slice(0, 2)), False),
1585                      ((slice(0, 1), slice(0, None)), False),
1586                      ((slice(0, 1), 1), False)]:
1587        f = function([x], tensor.exp(x_c).__getitem__(dims), mode=mode_opt)
1588        # theano.printing.debugprint(f)
1589        prog = f.maker.fgraph.toposort()
1590        if res:
1591            assert isinstance(prog[0].op, theano.tensor.SpecifyShape), dims
1592            assert prog[1].op == tensor.exp, (dims, prog)
1593            assert len(prog) == 2, dims
1594        else:
1595            assert any([isinstance(node.op, Subtensor) for node in prog])
1596        f([[0, 1, 2], [3, 4, 5]])  # let debugmode test something
1597
1598    # Test Variable
1599    for idx, (dims, res) in enumerate([
1600            ((slice(0, x.shape[0]), ), True),
1601            ((slice(0, x.shape[1]), ), False),
1602            ((slice(0, x.shape[0]), slice(0, x.shape[1]), ), True),
1603            ((slice(0, x.shape[0]), slice(0, x.shape[0]), ), False),
1604            ((slice(0, x.shape[1]), slice(0, x.shape[0]), ), False),
1605            ((slice(0, x.shape[1]), slice(0, x.shape[1]), ), False),
1606            ((slice(0, x.shape[1]), 2), False),
1607            ((slice(0, x.shape[1]), slice(x.shape[0] - x.shape[0],
1608                                          x.shape[1]),), False),
1609            ((slice(0, T.scalar_from_tensor(x.shape[0])), ), True),
1610            ]):
1611        f = function([x], tensor.exp(x).__getitem__(dims), mode=mode_opt)
1612        # theano.printing.debugprint(f)
1613        prog = f.maker.fgraph.toposort()
1614        if res:
1615            assert prog[0].op == tensor.exp, dims
1616            assert len(prog) == 1, dims
1617        else:
1618            assert any([isinstance(node.op, Subtensor) for node in prog])
1619        f([[0, 1, 2], [3, 4, 5]])  # let debugmode test something
1620    # Test mix Variable and Constant
1621    # Currently not supported
1622    for idx, (dims, res) in enumerate([
1623            ((slice(0, x.shape[0]), slice(0, 3)), False),
1624            ((slice(0, 3), slice(0, x.shape[1])), False),
1625            ]):
1626        f = function([x], tensor.exp(x_c).__getitem__(dims), mode=mode_opt)
1627        # theano.printing.debugprint(f)
1628        prog = f.maker.fgraph.toposort()
1629        if res:
1630            assert prog[0].op == tensor.exp, dims
1631            assert len(prog) == 1, dims
1632        else:
1633            assert any([isinstance(node.op, Subtensor) for node in prog])
1634        f([[0, 1, 2], [3, 4, 5]])  # let debugmode test something
1635
1636    # Test scalar variable
1637    s = scal.int32('s')
1638    for idx, (dims, res) in enumerate([
1639            ((slice(0, s), ), False),
1640            ]):
1641        f = function([x, s], tensor.exp(x).__getitem__(dims), mode=mode_opt)
1642        # theano.printing.debugprint(f)
1643        prog = f.maker.fgraph.toposort()
1644        if res:
1645            assert prog[0].op == tensor.exp, dims
1646            assert len(prog) == 1, dims
1647        else:
1648            assert any([isinstance(node.op, Subtensor) for node in prog])
1649        f([[1, 2, 3], [4, 5, 6]], 1)
1650        f([[1, 2, 3], [4, 5, 6]], 3)
1651
1652    # Test AdvancedSubtensor1 case when all rows are selected by a list/vector
1653    # or ARange op
1654    for dims, res in (([0, 1], True),
1655                      ([1, 0], False),
1656                      ([0, 0], False),
1657                      ([0, 0, 1], False),
1658                      (T.arange(2), True),
1659                      (T.arange(0, 2), True),
1660                      (T.arange(0, 2, 2), False),
1661                      (T.arange(0, 2, -1), False),
1662                      (T.arange(1, 2), False)):
1663        f = function([x], tensor.exp(x_c).__getitem__(dims), mode=mode_opt)
1664        # theano.printing.debugprint(f)
1665        prog = f.maker.fgraph.toposort()
1666        if res:
1667            assert isinstance(prog[0].op, theano.tensor.SpecifyShape), dims
1668            assert prog[1].op == tensor.exp, dims
1669            assert len(prog) == 2, dims
1670        else:
1671            assert any([isinstance(node.op, AdvancedSubtensor1)
1672                        for node in prog])
1673        f([[0, 1, 2], [3, 4, 5]])  # let debugmode test something
1674
1675
1676def test_local_subtensor_remove_broadcastable_index():
1677    # testing local_subtensor_remove_broadcastable_index optimization
1678    #
1679    # tests removing broadcastable dimensions with index 0 or -1,
1680    # otherwise the optimzation should not be applied
1681
1682    mode = theano.compile.mode.get_default_mode()
1683    mode = mode.including("local_subtensor_remove_broadcastable_index")
1684    x = T.dmatrix('x')
1685    y1 = x.dimshuffle(0, 'x', 1)
1686    y2 = x.dimshuffle('x', 1, 0, 'x')
1687    y3 = x.dimshuffle('x', 1, 'x', 0, 'x')
1688
1689    # testing for cases that the optimzation should be applied
1690    z1 = y1[:, 0, :]
1691    z2 = y1[:, -1, :]
1692    z3 = y2[0, :, :, -1]
1693    z4 = y2[0, :, :, 0]
1694    z5 = y2[-1, :, :, -1]
1695    z6 = y3[-1, :, 0, :, -1]
1696    z7 = y3[-1, :, -1, :, -1]
1697    z8 = y3[0, :, 0, :, 0]
1698    f = theano.function([x], [z1, z2, z3, z4, z5, z6, z7, z8], mode=mode)
1699    for elem in f.maker.fgraph.toposort():
1700        assert type(elem.op) not in [Subtensor, AdvancedSubtensor,
1701                                     AdvancedSubtensor1, IncSubtensor,
1702                                     AdvancedIncSubtensor,
1703                                     AdvancedIncSubtensor1]
1704
1705    rng = np.random.RandomState(seed=utt.fetch_seed())
1706    xn = rng.rand(5, 5)
1707    f(xn)
1708
1709    # testing for cases that the optimzation should not be applied
1710    # to verify that other subtensor usage are passed without errors
1711    w1 = y1[3, 0, :]
1712    w2 = y1[2:4, -1, :]
1713    w3 = y2[0, :, 4:, -1]
1714    w4 = y2[:, :, 0, -1]
1715    w5 = y2[0, 2:4, :, 0]
1716    w6 = y2[0, -1, :, -1]
1717    w7 = y2[-1, 4:, :, -1]
1718    w8 = y2[-1, :, :3, -1]
1719    w9 = y2[-1, :, -1, -1]
1720    w10 = y3[-1, 2, 0, :, -1]
1721    w11 = y3[-1, 0, -1, :, -1]
1722    w12 = y3[-1, :, -1, -1, -1]
1723    w13 = y3[0, 0, 0, :, 0]
1724    w14 = y3[-1, 2:4, 0, 1:5, -1]
1725    w15 = y3[-1, 0, -1, 0, -1]
1726    w16 = y3[0, 2, 0, 4, 0]
1727    w17 = y3[:, 0, :, 1]
1728    w18 = y3[0, :, :, 2]
1729    w19 = y3[:, 2, 0]
1730    w20 = y3[:, 3]
1731    f2 = theano.function([x], [w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11,
1732                               w12, w13, w14, w15, w16, w17, w18, w19, w20],
1733                         mode=mode)
1734    f2(xn)
1735
1736
1737class Test_subtensor_inc_subtensor(unittest.TestCase):
1738    @classmethod
1739    def setUpClass(cls):
1740        cls.mode = theano.compile.mode.get_default_mode().including('local_subtensor_inc_subtensor')
1741
1742    def test_basic(self):
1743        # basic test
1744        x = tensor.matrix('x')
1745        i = tensor.iscalar('i')
1746        v = tensor.vector('v')
1747        y = tensor.set_subtensor(x[i], v)
1748        z = y[i]
1749        f = theano.function([x, i, v], z, mode=self.mode)
1750        prog = f.maker.fgraph.toposort()
1751        assert len(prog) == 1
1752        assert isinstance(prog[0].op, DeepCopyOp)
1753        # basic test, numerical check
1754        x_ = np.random.uniform(size=[3, 4]).astype(config.floatX)
1755        v_ = np.random.uniform(size=[4, ]).astype(config.floatX)
1756        i_ = 1
1757        assert np.array_equal(f(x_, i_, v_), v_)
1758
1759    def test_multiple_idx(self):
1760        # complicated test
1761        x = tensor.tensor4('x')
1762        i1 = tensor.iscalar('i1')
1763        i2 = tensor.iscalar('i2')
1764        i3 = tensor.iscalar('i3')
1765        i4 = tensor.iscalar('i4')
1766        v = tensor.tensor3('v')
1767        y = tensor.set_subtensor(x[i1, :i2, i3:, ::i4], v)
1768        z = y[i1, :i2, i3:, ::i4]
1769        f = theano.function([x, i1, i2, i3, i4, v], z, mode=self.mode)
1770        prog = f.maker.fgraph.toposort()
1771        assert len(prog) == 1
1772        assert isinstance(prog[0].op, DeepCopyOp)
1773        # complicated test, numerical check
1774        x_ = np.random.uniform(size=[3, 4, 5, 6]).astype(config.floatX)
1775        v_ = np.random.uniform(size=[2, 2, 2]).astype(config.floatX)
1776        i1_, i2_, i3_, i4_ = 1, 2, 3, 4
1777        assert np.array_equal(f(x_, i1_, i2_, i3_, i4_, v_), v_)
1778
1779    def test_not_applied(self):
1780        # case not use this optimization
1781        x = tensor.tensor4('x')
1782        i1 = tensor.iscalar('i1')
1783        i2 = tensor.iscalar('i2')
1784        i3 = tensor.iscalar('i3')
1785        i4 = tensor.iscalar('i4')
1786        v = tensor.tensor3('v')
1787        y = tensor.set_subtensor(x[i1, :i2, i3:, ::i4], v)
1788        z = y[i1, :i3, i2:, ::i4]
1789        f = theano.function([x, i1, i2, i3, i4, v], z, mode=self.mode)
1790        prog = f.maker.fgraph.toposort()
1791        assert len(prog) != 1
1792        assert any(isinstance(x.op, tensor.IncSubtensor) for x in prog)
1793        assert any(isinstance(x.op, tensor.Subtensor) for x in prog)
1794        # case not use this optimization, numerical check
1795        x_ = np.random.uniform(size=[3, 4, 5, 6]).astype(config.floatX)
1796        v_ = np.random.uniform(size=[2, 2, 2]).astype(config.floatX)
1797        i1_, i2_, i3_, i4_ = 1, 2, 3, 4
1798        x_[i1_, :i2_, i3_:, ::i4_] = v_
1799        assert np.array_equal(f(x_, i1_, i2_, i3_, i4_, v_), x_[i1_, :i3_, i2_:, ::i4_])
1800
1801    def test_fewer_dims(self):
1802        # case when v has fewer dimensions
1803        x = tensor.matrix('x')
1804        i1 = tensor.iscalar('i')
1805        i2 = tensor.iscalar('i')
1806        v = tensor.vector('v')
1807        y = tensor.set_subtensor(x[:i1, :i2], v)
1808        z = y[:i1, :i2]
1809        f = theano.function([x, i1, i2, v], z, mode=self.mode)
1810        prog = f.maker.fgraph.toposort()
1811        assert any(isinstance(x.op, tensor.Alloc) for x in prog)
1812        # case when v is broadcastable, numerical check
1813        x_ = np.random.uniform(size=[3, 4]).astype(config.floatX)
1814        v_ = np.random.uniform(size=[2, ]).astype(config.floatX)
1815        i1_, i2_ = 2, 2
1816        x_[:i1_, :i2_] = v_
1817        assert np.array_equal(f(x_, i1_, i2_, v_), x_[:i1_, :i2_])
1818
1819    def test_broadcasted(self):
1820        # case when v has the same number of dimensions, some broadcastable
1821        x = tensor.matrix('x')
1822        i1 = tensor.iscalar('i')
1823        i2 = tensor.iscalar('i')
1824        v = tensor.col('v')
1825        y = tensor.set_subtensor(x[:i1, :i2], v)
1826        z = y[:i1, :i2]
1827        f = theano.function([x, i1, i2, v], z, mode=self.mode)
1828        prog = f.maker.fgraph.toposort()
1829        assert any(isinstance(x.op, tensor.Alloc) for x in prog)
1830        # case when v is broadcastable, numerical check
1831        x_ = np.random.uniform(size=[3, 4]).astype(config.floatX)
1832        v_ = np.random.uniform(size=[2, 1]).astype(config.floatX)
1833        i1_, i2_ = 2, 2
1834        x_[:i1_, :i2_] = v_
1835        assert np.array_equal(f(x_, i1_, i2_, v_), x_[:i1_, :i2_])
1836
1837    def test_different_dtypes(self):
1838        # Case when the dtype differs
1839        x = tensor.bmatrix('x')
1840        i = tensor.iscalar('i')
1841        v = tensor.vector('v')
1842        y = tensor.set_subtensor(x[i], v)
1843        z = y[i]
1844        f = theano.function([x, i, v], z, mode=self.mode)
1845        prog = f.maker.fgraph.toposort()
1846        assert len(prog) == 1
1847        assert prog[0].op == tensor.basic._convert_to_int8
1848        # basic test, numerical check
1849        x_ = np.random.randint(12, size=[3, 4]).astype('int8')
1850        v_ = np.random.uniform(12, size=[4, ]).astype(config.floatX)
1851        i_ = 1
1852        assert np.array_equal(f(x_, i_, v_), v_.astype('int8'))
1853
1854
1855class test_local_subtensor_make_vector(unittest.TestCase):
1856    def test_scalar_idx(self):
1857        x, y, z = tensor.lscalars('xyz')
1858        v = make_vector(x, y, z)
1859        f = function([x, y, z], v[0], mode=mode_opt)
1860
1861        prog = f.maker.fgraph.toposort()
1862        assert len(prog) == 1
1863        assert isinstance(prog[0].op, theano.compile.ops.DeepCopyOp)
1864        assert f(0, 1, 2) == 0
1865
1866    def test_slice_idx_stop(self):
1867        x, y, z = tensor.lscalars('xyz')
1868        v = make_vector(x, y, z)
1869        f = function([x, y, z], v[:2], mode=mode_opt)
1870
1871        prog = f.maker.fgraph.toposort()
1872        assert len(prog) == 1
1873        assert isinstance(prog[0].op, MakeVector)
1874        assert len(prog[0].inputs) == 2
1875        r = f(0, 1, 2)
1876        assert r[0] == 0 and r[1] == 1
1877
1878    def test_slice_idx_step(self):
1879        x, y, z = tensor.lscalars('xyz')
1880        v = make_vector(x, y, z)
1881        f = function([x, y, z], v[::2], mode=mode_opt)
1882
1883        prog = f.maker.fgraph.toposort()
1884        assert len(prog) == 1
1885        assert isinstance(prog[0].op, MakeVector)
1886        assert len(prog[0].inputs) == 2
1887        r = f(0, 1, 2)
1888        assert r[0] == 0 and r[1] == 2
1889
1890    def test_AdvancedSubtensor1_idx(self):
1891        x, y, z = tensor.lscalars('xyz')
1892        v = make_vector(x, y, z)
1893        f = function([x, y, z], v[[0, 2]], mode=mode_opt)
1894
1895        prog = f.maker.fgraph.toposort()
1896        assert len(prog) == 1
1897        assert isinstance(prog[0].op, MakeVector)
1898        assert len(prog[0].inputs) == 2
1899        r = f(0, 1, 2)
1900        assert r[0] == 0 and r[1] == 2
1901
1902    def test_stack_trace(self):
1903        x, y, z = tensor.lscalars('xyz')
1904        v = make_vector(x, y, z)
1905
1906        mode = theano.compile.mode.get_default_mode().including(
1907            "local_subtensor_make_vector")
1908
1909        # list of subtensor cases, where local_subtensor_make_vector
1910        # inserts a new MakeVector node
1911        v_subtensors = [v[:2], v[::2], v[[0, 2]]]
1912
1913        for v_subtensor in v_subtensors:
1914            f = function([x, y, z], v_subtensor, mode=mode)
1915            self.assertTrue(check_stack_trace(f, ops_to_check='all'))
1916
1917
1918class test_local_subtensor_lift(unittest.TestCase):
1919    def test0(self):
1920        # basic test that the Op works
1921        x = tensor.matrix('x')
1922        f = function([x], tensor.exp(x)[0], mode=mode_opt)
1923
1924        # Check stacktrace was copied over correctly after opt was applied
1925        self.assertTrue(check_stack_trace(f, ops_to_check='all'))
1926
1927        prog = f.maker.fgraph.toposort()
1928        assert isinstance(prog[0].op, tensor.Subtensor)  # first subtensor
1929        assert prog[1].op == tensor.exp
1930        assert len(prog) == 2
1931        f([[0, 1], [2, 3]])  # let debugmode test something
1932
1933    def test0b(self):
1934        # as test0, but we reuse the output of the elemwise
1935        # So we should not lift the subtensor
1936        x = tensor.matrix('x')
1937        f = function([x], [tensor.exp(x)[0], tensor.exp(x)], mode=mode_opt)
1938
1939        # Check stacktrace was copied over correctly after opt was applied
1940        self.assertTrue(check_stack_trace(f, ops_to_check=[
1941            Subtensor, tensor.Elemwise]))
1942
1943        prog = f.maker.fgraph.toposort()
1944        assert prog[0].op == tensor.exp
1945        assert isinstance(prog[1].op, tensor.Subtensor)  # first subtensor
1946        assert isinstance(prog[2].op, DeepCopyOp)
1947        assert len(prog) == 3
1948        f([[0, 1], [2, 3]])  # let debugmode test something
1949
1950    def test1(self):
1951        # basic test that the optimization work with scalar broadcasted
1952        x = tensor.matrix('x')
1953        y = tensor.scalar('y')
1954        z = tensor.matrix('z')
1955        f = function([x, y, z], tensor.exp(x + y + z)[0], mode=mode_opt)
1956
1957        # Check stacktrace was copied over correctly after opt was applied
1958        self.assertTrue(check_stack_trace(f, ops_to_check=[
1959            Subtensor, tensor.DimShuffle]))
1960
1961        prog = f.maker.fgraph.toposort()
1962        assert isinstance(prog[0].op, tensor.Subtensor)
1963        assert isinstance(prog[1].op, tensor.DimShuffle)
1964        assert isinstance(prog[2].op, tensor.Subtensor)
1965        assert isinstance(prog[3].op.scalar_op, theano.scalar.
1966                          Composite)  # Composite{add,add}
1967        assert len(prog) == 4
1968        # let debugmode test something
1969        f([[0, 1], [2, 3]], 4, [[4, 5], [6, 7]])
1970
1971    def test2(self):
1972        # as 1, but take a slice
1973        x = tensor.matrix('x')
1974        y = tensor.scalar('y')
1975        z = tensor.matrix('z')
1976        f = function([x, y, z], tensor.exp(x + y + z)[0:2], mode=mode_opt)
1977
1978        # Check stacktrace was copied over correctly after opt was applied
1979        self.assertTrue(check_stack_trace(f, ops_to_check=[
1980            Subtensor, tensor.DimShuffle]))
1981
1982        prog = f.maker.fgraph.toposort()
1983        assert isinstance(prog[0].op, tensor.Subtensor)
1984        assert isinstance(prog[1].op, tensor.DimShuffle)
1985        assert isinstance(prog[2].op, tensor.Subtensor)
1986        assert isinstance(prog[3].op.scalar_op, theano.scalar.
1987                          Composite)  # Composite{add,add}
1988        assert len(prog) == 4
1989        # let debugmode test something
1990        f([[0, 1], [2, 3]], 4, [[4, 5], [6, 7]])
1991
1992    def test3(self):
1993        # basic test that the optimization does work with broadcasting
1994        # for unary elemwise.
1995        y = tensor.vector('y')
1996        f = function([y], tensor.exp(y.dimshuffle(0, 'x'))[0], mode=mode_opt)
1997
1998        # Check stacktrace was copied over correctly after opt was applied
1999        self.assertTrue(check_stack_trace(f, ops_to_check='all'))
2000
2001        prog = f.maker.fgraph.toposort()
2002        assert isinstance(prog[0].op, tensor.DimShuffle)
2003        assert isinstance(prog[1].op, tensor.Subtensor)
2004        assert prog[2].op == tensor.exp
2005        assert len(prog) == 3
2006        f([4, 5])  # let debugmode test something
2007
2008    @utt.assertFailure_fast
2009    def test4(self):
2010        # basic test that the optimization doesn't work with broadcasting
2011        # ... It *could* be extended to,
2012        # ... but right now it doesn't, so it shouldn't try.
2013        x = tensor.matrix('x')
2014        y = tensor.vector('y')
2015        f = function([x, y], tensor.exp(x + y)[0], mode=mode_opt)
2016
2017        # Opt doesn't apply, so no need for check_stack_trace
2018        # self.assertTrue(check_stack_trace(f, ops_to_check='all'))
2019
2020        prog = f.maker.fgraph.toposort()
2021        assert isinstance(prog[0].op, tensor.DimShuffle)
2022        assert prog[1].op == tensor.add
2023        assert isinstance(prog[2].op, tensor.Subtensor)  # first subtensor
2024        assert prog[3].op == inplace.exp_inplace
2025        assert len(prog) == 4
2026        f([[0, 1], [2, 3]], [4, 5])  # let debugmode test something
2027
2028    def test5(self):
2029        # test that we don't lift when we reuse the output of the
2030        # elemwise for other computation.
2031        x = tensor.matrix('x')
2032        y = tensor.vector('y')
2033        f = function([x, y], [tensor.exp(x + y)[0], tensor.exp(x + y) + x],
2034                     mode=mode_opt)
2035
2036        # Opt doesn't apply, so no need for check_stack_trace
2037        # self.assertTrue(check_stack_trace(f, ops_to_check=Subtensor))
2038
2039        prog = f.maker.fgraph.toposort()
2040        assert isinstance(prog[0].op, tensor.DimShuffle)
2041        assert isinstance(prog[1].op.scalar_op, theano.scalar.
2042                          Composite)  # Composite{add,exp}
2043        assert prog[2].op == tensor.add or prog[3].op == tensor.add
2044        # first subtensor
2045        assert isinstance(prog[2].op, tensor.Subtensor) or isinstance(prog[3].op, tensor.Subtensor)
2046        assert len(prog) == 4
2047        f([[0, 1], [2, 3]], [4, 5])  # let debugmode test something
2048
2049    def test6(self):
2050        # basic test that the optimization works with a scalar as input,
2051        # and a scalar as output (no broadcasting of the scalar needed).
2052        # The optimization used to fail and display an ERROR message.
2053
2054        x = tensor.vector('x')
2055        y = tensor.scalar('y')
2056        f = function([x, y], tensor.exp(x + y)[0], mode=mode_opt)
2057
2058        # Check stacktrace was copied over correctly after opt was applied
2059        self.assertTrue(check_stack_trace(f, ops_to_check=Subtensor))
2060
2061        prog = f.maker.fgraph.toposort()
2062        assert isinstance(prog[0].op, tensor.Subtensor)
2063        # Composite{add,exp}
2064        assert isinstance(prog[1].op.scalar_op, theano.scalar.Composite)
2065        assert len(prog) == 2
2066        f([1, 2, 3], 4)  # let debugmode test something
2067
2068    def test7(self):
2069        # Test that Subtensor(Rebroadcast(x)) gets optimized into
2070        # Rebroadcast(Subtensor(x)).
2071
2072        # test basic case
2073        x = tensor.matrix('x')
2074        xval = np.random.rand(1, 10).astype(config.floatX)
2075        assert x.broadcastable == (False, False)
2076        newx = tensor.Rebroadcast((0, True), (1, False))(x)
2077        assert newx.broadcastable == (True, False)
2078
2079        f1 = function([x], newx[:2, :5], mode=mode_opt)
2080        # Check stacktrace was copied over correctly after opt was applied
2081        self.assertTrue(check_stack_trace(f1, ops_to_check=[
2082                        Subtensor, tensor.Rebroadcast]))
2083        prog = f1.maker.fgraph.toposort()
2084        assert isinstance(prog[0].op, tensor.Subtensor)
2085        assert isinstance(prog[1].op, tensor.Rebroadcast)
2086        assert (f1(xval) == xval[:2, :5]).all()
2087
2088        # corner case 1: rebroadcast changes dims which are dropped through subtensor
2089        y = tensor.tensor4('x')
2090        yval = np.random.rand(1, 10, 1, 3).astype(config.floatX)
2091        assert y.broadcastable == (False, False, False, False)
2092        newy = tensor.Rebroadcast((0, True), (2, True))(y)
2093        assert newy.broadcastable == (True, False, True, False)
2094
2095        f2 = function([y], newy[:, 3, 0, :], mode=mode_opt)
2096        # Check stacktrace was copied over correctly after opt was applied
2097        self.assertTrue(check_stack_trace(f2, ops_to_check=[
2098                        Subtensor, tensor.Rebroadcast]))
2099        prog = f2.maker.fgraph.toposort()
2100        assert isinstance(prog[0].op, tensor.Subtensor)
2101        assert isinstance(prog[1].op, tensor.Rebroadcast)
2102        assert (f2(yval) == yval[:, 3, 0, :]).all()
2103
2104        # corner case 2: subtensor idx_list is shorter than resulting broadcast pattern
2105        f3 = function([y], newy[:, 3, 0], mode=mode_opt)
2106        # Check stacktrace was copied over correctly after opt was applied
2107        self.assertTrue(check_stack_trace(f3, ops_to_check=[
2108            Subtensor, tensor.Rebroadcast]))
2109        prog = f3.maker.fgraph.toposort()
2110        assert isinstance(prog[0].op, tensor.Subtensor)
2111        assert isinstance(prog[1].op, tensor.Rebroadcast)
2112        assert (f3(yval) == yval[:, 3, 0]).all()
2113
2114        # corner case 3: subtensor idx_list is shorter than rebroadcast.axis
2115        z = tensor.tensor4('x')
2116        zval = np.random.rand(4, 10, 3, 1).astype(config.floatX)
2117        assert z.broadcastable == (False, False, False, False)
2118        newz = tensor.Rebroadcast((3, True))(z)
2119        assert newz.broadcastable == (False, False, False, True)
2120
2121        f4 = function([z], newz[:, 3, 0], mode=mode_opt)
2122        # Check stacktrace was copied over correctly after opt was applied
2123        self.assertTrue(check_stack_trace(f4, ops_to_check=[
2124            Subtensor, tensor.Rebroadcast]))
2125        prog = f4.maker.fgraph.toposort()
2126        assert isinstance(prog[0].op, tensor.Subtensor)
2127        assert isinstance(prog[1].op, tensor.Rebroadcast)
2128        assert (f4(zval) == zval[:, 3, 0]).all()
2129
2130
2131class test_local_subtensor_merge(unittest.TestCase):
2132    def setUp(self):
2133        utt.seed_rng()
2134        self.x_shapes = [(2, 2), (5, 3), (4, 1), (1, 2),
2135                         (0, 2), (2, 0), (1, 0), (0, 0)]
2136        self.rng = np.random.RandomState(seed=utt.fetch_seed())
2137
2138    def test_const(self):
2139        # var[const::][-1] -> var[-1]
2140        x = tensor.matrix('x')
2141        for idx in xrange(-7, 6):
2142            f = function([x], x[idx::][-1], mode=mode_opt)
2143            g = function([x], x[idx::][-1], mode=mode_opt.excluding(
2144                'local_subtensor_merge'))
2145
2146            # Check stacktrace was copied over correctly after opt was applied
2147            self.assertTrue(check_stack_trace(f, ops_to_check=Subtensor))
2148
2149            topo = f.maker.fgraph.toposort()
2150            assert len([t for t in topo
2151                        if isinstance(t.op, tensor.Subtensor)]) == 1
2152            assert isinstance(topo[-1].op, DeepCopyOp)
2153
2154            for x_s in self.x_shapes:
2155                x_val = self.rng.uniform(size=x_s).astype(config.floatX)
2156
2157                if idx < x_s[0] and x_s[0] > 0:
2158                    # The first subtensor is non-empty, so it makes sense
2159                    f(x_val)  # let debugmode test something
2160                else:
2161                    # A non-empty subtensor of an empty one should be
2162                    # an IndexError
2163                    self.assertRaises(IndexError, f, x_val)
2164                    self.assertRaises(IndexError, g, x_val)
2165
2166    def test_scalar(self):
2167        # var[int::][-1] -> var[-1]
2168        x = tensor.matrix('x')
2169        y = tensor.iscalar('y')
2170        f = function([x, y], x[y::][-1], mode=mode_opt)
2171        g = function([x, y], x[y::][-1],
2172                     mode=mode_opt.excluding('local_subtensor_merge'))
2173        # theano.printing.debugprint(f, print_type=True)
2174
2175        # Check stacktrace was copied over correctly after opt was applied
2176        self.assertTrue(check_stack_trace(f, ops_to_check=Subtensor))
2177
2178        topo = f.maker.fgraph.toposort()
2179        # print [t for t in topo if isinstance(t.op, tensor.Subtensor)]
2180        assert len([t for t in topo
2181                    if isinstance(t.op, tensor.Subtensor)]) == 1
2182        # print topo[-1].op
2183        assert isinstance(topo[-1].op, DeepCopyOp)
2184
2185        for x_s in self.x_shapes:
2186            x_val = self.rng.uniform(size=x_s).astype(config.floatX)
2187
2188            for idx in xrange(-9, 8):
2189                if (idx < x_s[0]) and (x_s[0] > 0):
2190                    # The first subtensor is non-empty
2191                    f(x_val, idx)  # let debugmode test something
2192                else:
2193                    self.assertRaises(IndexError, f, x_val, idx)
2194                    self.assertRaises(IndexError, g, x_val, idx)
2195
2196    @attr('slow')
2197    def test_const2(self):
2198        # var[::-1][const] -> var[-1]
2199        x = tensor.matrix('x')
2200        for idx in xrange(-8, 7):
2201            f = function([x], x[::-1][idx], mode=mode_opt)
2202            g = function([x], x[::-1][idx],
2203                         mode=mode_opt.excluding('local_subtensor_merge'))
2204
2205            # Check stacktrace was copied over correctly after opt was applied
2206            self.assertTrue(check_stack_trace(f, ops_to_check=Subtensor))
2207
2208            # theano.printing.debugprint(f, print_type=True)
2209            topo = f.maker.fgraph.toposort()
2210            # print [t for t in topo if isinstance(t.op, tensor.Subtensor)]
2211            assert len([t for t in topo
2212                        if isinstance(t.op, tensor.Subtensor)]) == 1
2213            # print topo[-1].op
2214            assert isinstance(topo[-1].op, DeepCopyOp)
2215
2216            for x_s in self.x_shapes:
2217                x_val = self.rng.uniform(size=x_s).astype(config.floatX)
2218                if (idx < x_s[0]) and (idx >= -x_s[0]):
2219                    # The first subtensor is non-empty, so it makes sense
2220                    f(x_val)  # let debugmode test something
2221                else:
2222                    # A non-empty subtensor of an empty one should be
2223                    # an IndexError
2224                    self.assertRaises(IndexError, f, x_val)
2225                    self.assertRaises(IndexError, g, x_val)
2226
2227    def test_scalar2(self):
2228        # var[::-1][int] -> var[-1]
2229        x = tensor.matrix('x')
2230        y = tensor.iscalar('y')
2231        f = function([x, y], x[::-1][y], mode=mode_opt)
2232        g = function([x, y], x[::-1][y],
2233                     mode=mode_opt.excluding('local_subtensor_merge'))
2234        # theano.printing.debugprint(f, print_type=True)
2235
2236        # Check stacktrace was copied over correctly after opt was applied
2237        self.assertTrue(check_stack_trace(f, ops_to_check=Subtensor))
2238
2239        topo = f.maker.fgraph.toposort()
2240        # print [t for t in topo if isinstance(t.op, tensor.Subtensor)]
2241        assert len([t for t in topo
2242                    if isinstance(t.op, tensor.Subtensor)]) == 1
2243        # print topo[-1].op
2244        assert isinstance(topo[-1].op, DeepCopyOp)
2245
2246        for x_s in self.x_shapes:
2247            x_val = self.rng.uniform(size=x_s).astype(config.floatX)
2248
2249            for idx in xrange(-x_s[0], x_s[0]):
2250                f(x_val, idx)  # let debugmode test something
2251            for idx in (list(range(x_s[0], 9)) + list(range(-9, -x_s[0]))):
2252                self.assertRaises(IndexError, f, x_val, idx)
2253                self.assertRaises(IndexError, g, x_val, idx)
2254
2255    def test_const3(self):
2256        # var[::-1][:const] -> var[-1]
2257        x = tensor.matrix('x')
2258        for idx in xrange(-9, 8):
2259            f = function([x], x[::-1][:idx], mode=mode_opt)
2260
2261            # Check stacktrace was copied over correctly after opt was applied
2262            self.assertTrue(check_stack_trace(f, ops_to_check=Subtensor))
2263
2264            # theano.printing.debugprint(f, print_type=True)
2265            topo = f.maker.fgraph.toposort()
2266            # print [t for t in topo if isinstance(t.op, tensor.Subtensor)]
2267            assert len([t for t in topo
2268                        if isinstance(t.op, tensor.Subtensor)]) == 1
2269            # print topo[-1].op
2270            assert isinstance(topo[-1].op, DeepCopyOp)
2271
2272            for x_s in self.x_shapes:
2273                x_val = self.rng.uniform(size=x_s).astype(config.floatX)
2274                f(x_val)  # let debugmode test something
2275
2276    def test_scalar3(self):
2277        # var[::-1][:int] -> var[-1]
2278        x = tensor.matrix('x')
2279        y = tensor.iscalar('y')
2280        f = function([x, y], x[::-1][:y], mode=mode_opt)
2281
2282        # Check stacktrace was copied over correctly after opt was applied
2283        self.assertTrue(check_stack_trace(f, ops_to_check=Subtensor))
2284
2285        # theano.printing.debugprint(f, print_type=True)
2286
2287        topo = f.maker.fgraph.toposort()
2288        # print [t for t in topo if isinstance(t.op, tensor.Subtensor)]
2289        assert len([t for t in topo
2290                    if isinstance(t.op, tensor.Subtensor)]) == 1
2291        # print topo[-1].op
2292        assert isinstance(topo[-1].op, DeepCopyOp)
2293
2294        for x_s in self.x_shapes:
2295            x_val = self.rng.uniform(size=x_s).astype(config.floatX)
2296            for idx in xrange(-7, 7):
2297                f(x_val, idx)  # let debugmode test something
2298
2299    def test_const4(self):
2300        # var[const1::][:const2]
2301        x = tensor.matrix('x')
2302        for idx1 in xrange(-7, 7):
2303            for idx2 in xrange(-7, 7):
2304                f = function([x], x[idx1:][:idx2], mode=mode_opt)
2305
2306                # Check stacktrace was copied over correctly after opt was applied
2307                self.assertTrue(check_stack_trace(f, ops_to_check=Subtensor))
2308
2309                # theano.printing.debugprint(f, print_type=True)
2310                topo = f.maker.fgraph.toposort()
2311                # print [t for t in topo if isinstance(t.op, tensor.Subtensor)]
2312                assert len([t for t in topo
2313                            if isinstance(t.op, tensor.Subtensor)]) == 1
2314                # print topo[-1].op
2315                assert isinstance(topo[-1].op, DeepCopyOp)
2316
2317                for x_s in self.x_shapes:
2318                    x_val = self.rng.uniform(size=x_s).astype(config.floatX)
2319                    f(x_val)  # let debugmode test something
2320
2321    def test_scalar4(self):
2322        # var[int1:][:int2]
2323        x = tensor.matrix('x')
2324        y = tensor.iscalar('y')
2325        z = tensor.iscalar('y')
2326        f = function([x, y, z], x[y:][:z], mode=mode_opt)
2327
2328        # Check stacktrace was copied over correctly after opt was applied
2329        self.assertTrue(check_stack_trace(f, ops_to_check=Subtensor))
2330
2331        # theano.printing.debugprint(f, print_type=True)
2332
2333        topo = f.maker.fgraph.toposort()
2334        # print [t for t in topo if isinstance(t.op, tensor.Subtensor)]
2335        assert len([t for t in topo
2336                    if isinstance(t.op, tensor.Subtensor)]) == 1
2337        # print topo[-1].op
2338        assert isinstance(topo[-1].op, DeepCopyOp)
2339
2340        for x_s in self.x_shapes:
2341            x_val = self.rng.uniform(size=x_s).astype(config.floatX)
2342            for idx1 in xrange(-11, 11):
2343                for idx2 in xrange(-11, 11):
2344                    f(x_val, idx1, idx2)  # let debugmode test something
2345
2346    def test_const_general(self):
2347        # Some cases of merge: shape, (start, stop, step) of first,
2348        # (start, stop, step) of second subtensor
2349        cases = [
2350            ((2, 3), (None, None, None), (None, None, -1)),
2351            ((12, 1), (None, None, -4), (None, None, 1)),
2352            ((5, 3), (1, 4, 2), (None, None, -1)),
2353        ]
2354        x = tensor.matrix('x')
2355
2356        for shape, sl1, sl2 in cases:
2357            z = x[slice(*sl1)][slice(*sl2)]
2358            f = function([x], z, mode=mode_opt)
2359
2360            # Check stacktrace was copied over correctly after opt was applied
2361            self.assertTrue(check_stack_trace(f, ops_to_check=Subtensor))
2362
2363            x_val = self.rng.uniform(size=shape).astype(config.floatX)
2364            f(x_val)
2365
2366    def test_scalar5(self):
2367        # General case with two real slices
2368        # var[b1:e1:s1][b2:e2:s2]
2369        x = tensor.matrix('x')
2370        b1 = tensor.iscalar('b1')
2371        e1 = tensor.iscalar('e1')
2372        s1 = tensor.iscalar('s1')
2373        b2 = tensor.iscalar('b2')
2374        e2 = tensor.iscalar('e2')
2375        s2 = tensor.iscalar('s2')
2376        f = function([x, b1, e1, s1, b2, e2, s2], x[b1:e1:s1][b2:e2:s2],
2377                     mode=mode_opt)
2378
2379        # Check stacktrace was copied over correctly after opt was applied
2380        self.assertTrue(check_stack_trace(f, ops_to_check=Subtensor))
2381
2382        # theano.printing.debugprint(f, print_type=True)
2383
2384        topo = f.maker.fgraph.toposort()
2385        # print [t for t in topo if isinstance(t.op, tensor.Subtensor)]
2386        assert len([t for t in topo if isinstance(t.op, tensor.
2387                                                  Subtensor)]) == 1
2388        # print topo[-1].op
2389        assert isinstance(topo[-1].op, DeepCopyOp)
2390
2391        b1r = self.rng.permutation(list(range(-8, 8)))[:2]
2392        e1r = self.rng.permutation(list(range(-8, 8)))[:2]
2393        b2r = self.rng.permutation(list(range(-8, 8)))[:2]
2394        e2r = self.rng.permutation(list(range(-8, 8)))[:2]
2395
2396        s1r = self.rng.permutation([-7, -6, -5, -4, -3, -2, -1, 1,
2397                                    2, 3, 4, 5, 6, 7])[:2]
2398        s2r = self.rng.permutation([-7, -6, -5, -4, -3, -2, -1, 1,
2399                                    2, 3, 4, 5, 6, 7])[:2]
2400
2401        for x_s in self.x_shapes:
2402            x_val = self.rng.uniform(size=x_s).astype(config.floatX)
2403            for b1 in b1r:
2404                for e1 in e1r:
2405                    for s1 in s1r:
2406                        for b2 in b2r:
2407                            for e2 in e2r:
2408                                for s2 in s2r:
2409                                    f(x_val, b1, e1, s1, b2, e2, s2)
2410
2411    def test_const5(self):
2412        # Bug reported by Razvan
2413        data = np.asarray(np.arange(8),
2414                          dtype=theano.config.floatX)
2415        x = theano.tensor.vector('x')
2416        y = x[7:1:-1]
2417        t = theano.shared(np.int64(0))
2418
2419        fun = theano.function([x], y[t])
2420
2421        val = fun(data)
2422        assert val == data[7:1:-1][0]
2423
2424    def test_const6(self):
2425        # Bug reported by Graham
2426        data = self.rng.uniform(size=(8, 8, 8)).astype(theano.config.floatX)
2427        x = theano.tensor.tensor3('x')
2428
2429        nops = 1
2430        if theano.config.mode == "FAST_COMPILE":
2431            nops = 2
2432
2433        # test 1)
2434        y = x[3:6, 2:6, 1:7][1]
2435        fun = theano.function([x], y)
2436        val = fun(data)
2437        assert np.all(val == data[3:6, 2:6, 1:7][1])
2438        assert len([n for n in fun.maker.fgraph.toposort()
2439                    if isinstance(n.op, Subtensor)]) == nops
2440
2441        # test 2)
2442        y = x[2, 3][1]
2443        fun = theano.function([x], y)
2444        val = fun(data)
2445        assert np.all(val == data[2, 3][1])
2446        assert len([n for n in fun.maker.fgraph.toposort()
2447                    if isinstance(n.op, Subtensor)]) == nops
2448
2449        # test 3)
2450        y = x[3:6, 2, 1:7][1]
2451        fun = theano.function([x], y)
2452        val = fun(data)
2453        assert np.all(val == data[3:6, 2, 1:7][1])
2454        assert len([n for n in fun.maker.fgraph.toposort()
2455                    if isinstance(n.op, Subtensor)]) == nops
2456
2457    def test_scalar6(self):
2458        # General case with one slice and one index
2459        # var[b:e:s][i]
2460        x = tensor.matrix('x')
2461        b = tensor.iscalar('b')
2462        e = tensor.iscalar('e')
2463        s = tensor.iscalar('s')
2464        i = tensor.iscalar('i')
2465        f = function([x, b, e, s, i], x[b:e:s][i], mode=mode_opt)
2466
2467        # Check stacktrace was copied over correctly after opt was applied
2468        self.assertTrue(check_stack_trace(f, ops_to_check=Subtensor))
2469
2470        # theano.printing.debugprint(f, print_type=True)
2471
2472        topo = f.maker.fgraph.toposort()
2473        # print [t for t in topo if isinstance(t.op, tensor.Subtensor)]
2474        assert len([t for t in topo if isinstance(t.op, tensor.
2475                                                  Subtensor)]) == 1
2476        # print topo[-1].op
2477        assert isinstance(topo[-1].op, DeepCopyOp)
2478
2479        b_r = self.rng.permutation(list(range(-4, 4)))[:3]
2480        e_r = self.rng.permutation(list(range(-4, 4)))[:3]
2481        i_r = self.rng.permutation(list(range(-4, 4)))[:3]
2482
2483        s_r = self.rng.permutation([-3, -2, -1, 1, 2, 3])[:3]
2484
2485        for x_s in self.x_shapes:
2486            n_index_err = 0
2487            n_ok = 0
2488            x_val = self.rng.uniform(size=x_s).astype(config.floatX)
2489            for b_v in b_r:
2490                for e_v in e_r:
2491                    for s_v in s_r:
2492                        for i_v in i_r:
2493                            # The index could be out of bounds
2494                            # In that case, an Exception should be raised,
2495                            # otherwise, we let DebugMode check f
2496                            try:
2497                                x_val[b_v:e_v:s_v][i_v]
2498                            except IndexError:
2499                                n_index_err += 1
2500                                self.assertRaises(IndexError,
2501                                                  f, x_val, b_v, e_v, s_v, i_v)
2502                            else:
2503                                # Executed if the "try" clause did not raise
2504                                # any exception
2505                                n_ok += 1
2506                                f(x_val, b_v, e_v, s_v, i_v)
2507
2508            # print 'shape: %s' % (x_s,)
2509            # print '%% OK: %f' % (float(n_ok) * 100 / (n_ok + n_index_err))
2510
2511    @attr('slow')
2512    def test_none_slice(self):
2513        # Test case of two slices, var[b1:e1:s1][b2:e2:s2]
2514        # where any of the b, e, and s can be None
2515        x = tensor.matrix('x')
2516        b1 = tensor.iscalar('b1')
2517        e1 = tensor.iscalar('e1')
2518        s1 = tensor.iscalar('s1')
2519        b2 = tensor.iscalar('b2')
2520        e2 = tensor.iscalar('e2')
2521        s2 = tensor.iscalar('s2')
2522
2523        # Generate all possible lists of positions for None in those 6 slots
2524        # A 1 indicates None is present, 0 that there is a Theano scalar.
2525        none_positions = np.ndindex(2, 2, 2, 2, 2, 2)
2526
2527        # Ranges to be used when not None
2528        b1r = self.rng.permutation(list(range(-4, 4)))[:]
2529        e1r = self.rng.permutation(list(range(-4, 4)))[:]
2530        b2r = self.rng.permutation(list(range(-4, 4)))[:]
2531        e2r = self.rng.permutation(list(range(-4, 4)))[:]
2532        s1r = self.rng.permutation([-4, -3, -2, -1, 1, 2, 3, 4])[:]
2533        s2r = self.rng.permutation([-4, -3, -2, -1, 1, 2, 3, 4])[:]
2534
2535        scalar_vars = [b1, e1, s1, b2, e2, s2]
2536        scalar_ranges = [b1r, e1r, s1r, b2r, e2r, s2r]
2537
2538        # For each case, we will build a graph, function, and list of values
2539        # Then, we test it on each input shape.
2540        for none_pos in none_positions:
2541            slice_inputs = []
2542            input_vars = []
2543            values = []
2544            if sum(none_pos) == 0:
2545                # Those case are already tested in test_scalar4
2546                continue
2547
2548            for i, none_i in enumerate(none_pos):
2549                if none_i:
2550                    slice_inputs.append(None)
2551                else:
2552                    slice_inputs.append(scalar_vars[i])
2553                    input_vars.append(scalar_vars[i])
2554                    values.append(scalar_ranges[i])
2555
2556            slice1 = slice(*slice_inputs[:3])
2557            slice2 = slice(*slice_inputs[3:])
2558            sub_x = x[slice1][slice2]
2559            f = theano.function([x] + input_vars, sub_x, mode=mode_opt)
2560
2561            # Check stacktrace was copied over correctly after opt was applied
2562            # for some cases, the optimization may remove all Subtensors,
2563            # which is why we pass "bug_print='ignore'".
2564            self.assertTrue(check_stack_trace(f, ops_to_check=Subtensor,
2565                                              bug_print='ignore'))
2566
2567            topo = f.maker.fgraph.toposort()
2568            # print [t for t in topo if isinstance(t.op, tensor.Subtensor)]
2569            assert len([t for t in topo if isinstance(t.op,
2570                                                      tensor.Subtensor)]) <= 1
2571            assert isinstance(topo[-1].op, DeepCopyOp)
2572
2573            for x_s in self.x_shapes:
2574                x_val = self.rng.uniform(size=x_s).astype(config.floatX)
2575                for i_val in zip(*values):
2576                    f(x_val, *i_val)
2577
2578    def test_none_index(self):
2579        # Test the general case of indexing into a subvector,
2580        # like x[b:e:s][i], where any of b, e, and s can be None
2581        x = tensor.matrix('x')
2582        b = tensor.iscalar('b')
2583        e = tensor.iscalar('e')
2584        s = tensor.iscalar('s')
2585        i = tensor.iscalar('i')
2586
2587        # Generate all possible lists of positions for None in those 6 slots
2588        # A 1 indicates None is present, 0 that there is a Theano scalar.
2589        # The last index (i) is never None
2590        none_positions = np.ndindex(2, 2, 2, 1)
2591
2592        # Ranges to be used when not None
2593        b_r = self.rng.permutation(list(range(-4, 4)))[:]
2594        e_r = self.rng.permutation(list(range(-4, 4)))[:]
2595        i_r = self.rng.permutation(list(range(-4, 4)))[:]
2596        s_r = self.rng.permutation([-4, -3, -2, -1, 1, 2, 3, 4])[:]
2597
2598        scalar_vars = [b, e, s, i]
2599        scalar_ranges = [b_r, e_r, s_r, i_r]
2600
2601        # For each case, we will build a graph, function, and list of values
2602        # Then, we test it on each input shape.
2603        for none_pos in none_positions:
2604            slice_inputs = []
2605            input_vars = []
2606            values = []
2607            if sum(none_pos) == 0:
2608                # Those case are already tested in test_scalar6
2609                continue
2610
2611            for j, none_j in enumerate(none_pos):
2612                if none_j:
2613                    slice_inputs.append(None)
2614
2615                else:
2616                    slice_inputs.append(scalar_vars[j])
2617                    input_vars.append(scalar_vars[j])
2618                    values.append(scalar_ranges[j])
2619
2620            symbol_slice = slice(*slice_inputs[:3])
2621            sub_x = x[symbol_slice][i]
2622            f = theano.function([x] + input_vars, sub_x, mode=mode_opt)
2623
2624            # Check stacktrace was copied over correctly after opt was applied
2625            self.assertTrue(check_stack_trace(f, ops_to_check=Subtensor))
2626
2627            topo = f.maker.fgraph.toposort()
2628            # print [t for t in topo if isinstance(t.op, tensor.Subtensor)]
2629            assert len([t for t in topo if isinstance(t.op,
2630                                                      tensor.Subtensor)]) <= 1
2631            assert isinstance(topo[-1].op, DeepCopyOp)
2632
2633            for x_s in self.x_shapes:
2634                x_val = self.rng.uniform(size=x_s).astype(config.floatX)
2635                for i_val in zip(*values):
2636                    # The index could be out of bounds
2637                    # In that case, an Exception should be raised,
2638                    # otherwise, we let DebugMode check f
2639                    # For that, we need to create a numerical slice.
2640                    i_val_idx = 0
2641                    num_slice_inputs = []
2642                    for none_j in none_pos:
2643                        if none_j:
2644                            num_slice_inputs.append(None)
2645                        else:
2646                            num_slice_inputs.append(i_val[i_val_idx])
2647                            i_val_idx += 1
2648                    num_slice = slice(*num_slice_inputs[:3])
2649                    num_i = num_slice_inputs[3]
2650
2651                    try:
2652                        x_val[num_slice][num_i]
2653                    except IndexError:
2654                        self.assertRaises(IndexError, f, x_val, *i_val)
2655                    else:
2656                        # Executed if the "try" clause did not raise
2657                        # any exception
2658                        f(x_val, *i_val)
2659
2660
2661class test_local_adv_sub1_adv_inc_sub1(unittest.TestCase):
2662    def setUp(self):
2663        utt.seed_rng()
2664        mode = theano.compile.mode.get_default_mode()
2665        self.mode = mode.including("local_adv_sub1_adv_inc_sub1").excluding("fusion")
2666        self.mode_no_assert = self.mode.including("local_remove_all_assert")
2667
2668    def test0(self):
2669        for dtype1, dtype2 in [("float32", "float32"),
2670                               ("float32", "float64"),
2671                               ("float64", "float32"),
2672                               ("float64", "float64")]:
2673            x = tensor.matrix(dtype=dtype1)
2674            y = tensor.matrix(dtype=dtype2)
2675            idx = tensor.ivector()
2676
2677            dx = np.random.rand(4, 5).astype(dtype1)
2678            dy = np.random.rand(2, 5).astype(dtype2)
2679            # Duplicate the last row of dy
2680            dy = np.vstack([dy, dy[-1:]])
2681            # Use the same index twice, with the same corresponding value.
2682            # That makes set_subtensor well-defined, and tests
2683            # duplication for inc_subtensor.
2684            didx = np.asarray([1, 3, 3], "int32")
2685
2686            # set_subtensor
2687            inc = tensor.set_subtensor(x[idx], y)
2688            o = inc[idx]
2689            f = theano.function([x, y, idx], o, self.mode_no_assert)
2690
2691            res = f(dx, dy, didx)
2692            utt.assert_allclose(dy, res)
2693            topo = f.maker.fgraph.toposort()
2694            assert len(topo) == 1
2695            assert isinstance(topo[0].op, (compile.DeepCopyOp, T.Elemwise))
2696
2697            # inc_subtensor(data[idx], y)
2698            inc = tensor.inc_subtensor(x[idx], y)
2699            o = inc[idx]
2700            f = theano.function([x, y, idx], o, self.mode_no_assert)
2701
2702            res = f(dx, dy, didx)
2703            _dx = dx.copy()
2704            np.add.at(_dx, didx, dy)
2705            utt.assert_allclose(_dx[didx], res)
2706            topo = f.maker.fgraph.toposort()
2707            len(topo) == 2
2708
2709            # inc_subtensor(0[idx], y)
2710            inc = tensor.inc_subtensor(x.zeros_like()[idx], y)
2711            o = inc[idx]
2712            f = theano.function([x, y, idx], o, self.mode_no_assert)
2713
2714            res = f(dx, dy, didx)
2715            utt.assert_allclose(np.vstack([dy[0], 2 * dy[1], 2 * dy[2]]), res)
2716
2717    def test_assert(self):
2718            x = tensor.matrix("x")
2719            y = tensor.matrix("y")
2720            idx = tensor.ivector()
2721
2722            dx = np.random.rand(4, 5).astype(config.floatX)
2723            dy = np.random.rand(2, 5).astype(config.floatX)
2724
2725            # set_subtensor
2726            inc = tensor.set_subtensor(x[idx], y)
2727            o = inc[idx]
2728            f = theano.function([x, y, idx], o, self.mode)
2729            # test wrong index
2730            for i in [dx.shape[0], -dx.shape[0] - 1]:
2731                self.assertRaises((AssertionError, IndexError),
2732                                  f, dx, dy, [i, i])
2733            # test wrong shape
2734            self.assertRaises((AssertionError, ValueError),
2735                              f, dx, dy, [1])
2736
2737    def test_stack_trace(self):
2738        x = tensor.matrix("x")
2739        # test cases with y.dtype
2740        # - equal to x.dtype
2741        # - different from x.dtype (to trigger the cast in
2742        #   local_adv_sub1_adv_inc_sub1)
2743        ys = [tensor.matrix("y"), tensor.dmatrix("y")]
2744        idx = tensor.ivector()
2745
2746        # set_subtensor and then subtensor with both ys
2747        incs = [tensor.set_subtensor(x[idx], y) for y in ys]
2748        outs = [inc[idx] for inc in incs]
2749
2750        for y, out in zip(ys, outs):
2751            f = theano.function([x, y, idx], out, self.mode)
2752            self.assertTrue(check_stack_trace(
2753                f, ops_to_check=(Assert, scal.Cast)))
2754
2755
2756class Test_alloc_zero(unittest.TestCase):
2757    def setUp(self):
2758        mode = theano.compile.mode.get_default_mode()
2759        self.mode = mode.including("local_incsubtensor_of_zeros",
2760                                   "local_setsubtensor_of_constants",
2761                                   "local_0_dot_x")
2762
2763    def test_setsubtensor_allocs0(self):
2764        x = tensor.matrix()
2765        y = tensor.matrix()
2766        x0 = tensor.zeros_like(x)
2767        y0 = tensor.zeros_like(y)
2768        z = tensor.set_subtensor(x0[:4], y0)
2769        f = theano.function([x, y], z, mode=self.mode)
2770        assert np.all([not isinstance(n.op, tensor.IncSubtensor)
2771                       for n in f.maker.fgraph.toposort()])
2772
2773    def test_setsubtensor_allocs1(self):
2774        y = tensor.matrix()
2775        x0 = tensor.constant(np.asarray(np.zeros((4, 4)),
2776                                        dtype=config.floatX))
2777        y0 = tensor.zeros_like(y)
2778        z = tensor.set_subtensor(x0[:4], y0)
2779        f = theano.function([y], z, mode=self.mode)
2780        assert np.all([not isinstance(n.op, tensor.IncSubtensor)
2781                       for n in f.maker.fgraph.toposort()])
2782
2783    def test_setsubtensor_allocs1t(self):
2784        y = tensor.matrix()
2785        x0 = tensor.constant(np.asarray(np.zeros((4, 4)),
2786                                        dtype=config.floatX))
2787        y0 = tensor.zeros_like(y)
2788        z = tensor.set_subtensor(x0[:4], y0.T)
2789        f = theano.function([y], z, mode=mode_opt)
2790        assert np.all([not isinstance(n.op, tensor.IncSubtensor)
2791                      for n in f.maker.fgraph.toposort()])
2792
2793    def test_setsubtensor_allocs2(self):
2794        x = tensor.matrix()
2795        y0 = tensor.constant(np.asarray(np.zeros_like((4, 4)),
2796                                        dtype=config.floatX))
2797        x0 = tensor.zeros_like(x)
2798        z = tensor.set_subtensor(x0[:4], y0)
2799        f = theano.function([x], z, mode=self.mode)
2800        assert np.all([not isinstance(n.op, tensor.IncSubtensor)
2801                       for n in f.maker.fgraph.toposort()])
2802
2803    def test_incsubtensor_allocs0(self):
2804        x = tensor.matrix()
2805        y = tensor.matrix()
2806        y0 = tensor.zeros_like(y)
2807        z = tensor.inc_subtensor(x[:4], y0)
2808        f = theano.function([x, y], z, mode=self.mode)
2809        assert np.all([not isinstance(n.op, tensor.IncSubtensor)
2810                       for n in f.maker.fgraph.toposort()])
2811
2812    def test_incsubtensor_allocs0t(self):
2813        x = tensor.matrix()
2814        y = tensor.matrix()
2815        y0 = tensor.zeros_like(y)
2816        z = tensor.inc_subtensor(x[:4], y0.T)
2817        f = theano.function([x, y], z, mode=mode_opt)
2818        assert np.all([not isinstance(n.op, tensor.IncSubtensor)
2819                       for n in f.maker.fgraph.toposort()])
2820
2821    def test_incsubtensor_allocs1(self):
2822        x = tensor.matrix()
2823        y0 = tensor.constant(np.asarray(np.zeros_like((4, 4)),
2824                                        dtype=config.floatX))
2825        z = tensor.inc_subtensor(x[:4], y0)
2826        f = theano.function([x], z, mode=self.mode)
2827        assert np.all([not isinstance(n.op, tensor.IncSubtensor)
2828                       for n in f.maker.fgraph.toposort()])
2829
2830    def test_incsubtensor_x_zeros(self):
2831        x = tensor.constant(np.asarray(np.zeros((4, 4)),
2832                                       dtype=config.floatX))
2833        y = tensor.matrix()
2834        z = tensor.inc_subtensor(x[:4], y)
2835        f = theano.function([y], z)
2836        inc_nodes = [n for n in f.maker.fgraph.toposort()
2837                     if isinstance(n.op, tensor.IncSubtensor)]
2838
2839        assert(len(inc_nodes) == 1)
2840        node_is_set_instead_of_inc = inc_nodes[0].op.set_instead_of_inc
2841        mode = theano.config.mode
2842        assert((mode != "FAST_COMPILE" and node_is_set_instead_of_inc) or
2843               (mode == "FAST_COMPILE" and not node_is_set_instead_of_inc))
2844        test_X = np.random.random((4, 4)).astype(config.floatX)
2845        utt.assert_allclose(f(test_X), test_X)
2846
2847        # also check the flag doesn't get set if first input is not zeros:
2848        not_all_zeros = np.zeros((4, 4))
2849        not_all_zeros[1, 0] = 0.001
2850        x = tensor.constant(np.asarray(not_all_zeros, dtype=config.floatX))
2851        y = tensor.matrix()
2852        z = tensor.inc_subtensor(x[:4], y)
2853        f = theano.function([y], z)
2854        inc_nodes = [n for n in f.maker.fgraph.toposort()
2855                     if isinstance(n.op, tensor.IncSubtensor)]
2856        assert(len(inc_nodes) == 1)
2857        assert(inc_nodes[0].op.set_instead_of_inc is False)
2858        test_X = np.random.random((4, 4)).astype(config.floatX)
2859        utt.assert_allclose(f(test_X), test_X + not_all_zeros)
2860
2861    def test_advancedincsubtensor1_allocs0(self):
2862        x = tensor.matrix()
2863        y = tensor.matrix()
2864        y0 = tensor.zeros_like(y)
2865        z = tensor.inc_subtensor(x[[0, 1, 2, 3]], y0)
2866        f = theano.function([x, y], z, mode=self.mode)
2867        assert np.all([not isinstance(n.op, tensor.AdvancedIncSubtensor1)
2868                       for n in f.maker.fgraph.toposort()])
2869
2870    def test_advancedincsubtensor1_allocs0t(self):
2871        x = tensor.matrix()
2872        y = tensor.matrix()
2873        y0 = tensor.zeros_like(y)
2874        z = tensor.inc_subtensor(x[[0, 1, 2, 3]], y0.T)
2875        f = theano.function([x, y], z, mode=mode_opt)
2876        assert np.all([not isinstance(n.op, tensor.AdvancedIncSubtensor1)
2877                       for n in f.maker.fgraph.toposort()])
2878
2879    def test_advancedincsubtensor1_allocs1(self):
2880        x = tensor.matrix()
2881        y0 = tensor.constant(np.asarray(np.zeros_like((4, 4)),
2882                                        dtype=config.floatX))
2883        z = tensor.inc_subtensor(x[[0, 1, 2, 3]], y0)
2884        f = theano.function([x], z, mode=self.mode)
2885        assert np.all([not isinstance(n.op, tensor.AdvancedIncSubtensor1)
2886                       for n in f.maker.fgraph.toposort()])
2887
2888    def test_advancedincsubtensor_allocs0(self):
2889        x = tensor.matrix()
2890        y = tensor.matrix()
2891        y0 = tensor.zeros_like(y)
2892        z = tensor.inc_subtensor(x[[[0, 0], [1, 1]], [[0, 1], [0, 1]]], y0)
2893        f = theano.function([x, y], z, mode=self.mode)
2894        assert np.all([not isinstance(n.op, tensor.AdvancedIncSubtensor)
2895                       for n in f.maker.fgraph.toposort()])
2896
2897    def test_advancedincsubtensor_allocs0t(self):
2898        x = tensor.matrix()
2899        y = tensor.matrix()
2900        y0 = tensor.zeros_like(y)
2901        z = tensor.inc_subtensor(x[[[0, 0], [1, 1]], [[0, 1], [0, 1]]], y0.T)
2902        f = theano.function([x, y], z, mode=mode_opt)
2903        assert np.all([not isinstance(n.op, tensor.AdvancedIncSubtensor)
2904                       for n in f.maker.fgraph.toposort()])
2905
2906    def test_advancedincsubtensor_allocs1(self):
2907        x = tensor.matrix()
2908        y0 = tensor.constant(np.asarray(np.zeros_like((2, 2)),
2909                                        dtype=config.floatX))
2910        z = tensor.inc_subtensor(x[[[0, 0], [1, 1]], [[0, 1], [0, 1]]], y0)
2911        f = theano.function([x], z, mode=self.mode)
2912        assert np.all([not isinstance(n.op, tensor.AdvancedIncSubtensor)
2913                       for n in f.maker.fgraph.toposort()])
2914
2915    def test_dot_allocs_0(self):
2916        v1 = tensor.vector('v1')
2917        v2 = tensor.vector('v2')
2918        m1 = tensor.matrix('m1')
2919        m2 = tensor.matrix('m2')
2920        vv2 = np.asarray([0, 1], dtype=theano.config.floatX)
2921        vm2 = np.asarray([[1, 2], [4, 5]],
2922                         dtype=theano.config.floatX)
2923        vv3 = np.asarray([0, 1, 2], dtype=theano.config.floatX)
2924        vm3 = np.asarray([[1, 2, 3], [4, 5, 6], [7, 8, 9]],
2925                         dtype=theano.config.floatX)
2926        for _e1 in [(v1, vv2, vv3), (m1, vm2, vm3)]:
2927            for _e2 in [(v2, vv2, vv3), (m2, vm2, vm3)]:
2928                for p in [0, 1]:
2929                    if p == 0:
2930                        e1 = tensor.zeros_like(_e1[0])
2931                        e2 = _e2[0]
2932                    else:
2933                        e1 = _e1[0]
2934                        e2 = tensor.zeros_like(_e2[0])
2935                    o = tensor.dot(e1, e2)
2936                    f = theano.function([_e1[0], _e2[0]], o, mode=self.mode)
2937                    f(_e1[1], _e2[1])
2938                    f(_e1[2], _e2[2])
2939                    assert np.all([not isinstance(n.op, tensor.Dot) for n in
2940                                   f.maker.fgraph.toposort()])
2941
2942                    # test that we don't remove shape errors
2943                    self.assertRaises((ValueError, AssertionError), f,
2944                                      _e1[1], _e2[2])
2945                    self.assertRaises((ValueError, AssertionError), f,
2946                                      _e1[2], _e2[1])
2947
2948
2949def test_local_IncSubtensor_serialize():
2950    d = np.random.normal(0, 0.01, size=(100, 100))
2951    d = d.astype(theano.config.floatX)
2952
2953    W = theano.shared(d, name='W')
2954    i = T.vector('i', dtype='int64')
2955    j = T.vector('j', dtype='int64')
2956    t = T.scalar('t')
2957    y = (W[i] + W[j] + W[1] + W[i, j]).sum()
2958    cost = T.sqr(t - y)
2959    dW = theano.grad(cost, W)
2960    mode = theano.compile.mode.get_default_mode().excluding('fusion')
2961    mode = mode.including("local_IncSubtensor_serialize")
2962    f = theano.function([i, j, t], updates=[(W, W - 0.01 * dW)], mode=mode)
2963    topo = f.maker.fgraph.toposort()
2964    adds = [n for n in topo if isinstance(n.op, T.Elemwise) and
2965            isinstance(n.op.scalar_op, theano.scalar.Add)]
2966    for a in adds:
2967        assert not any([inp.owner and
2968                        isinstance(inp.owner.op,
2969                                   (tensor.IncSubtensor,
2970                                    tensor.AdvancedIncSubtensor,
2971                                    tensor.AdvancedIncSubtensor1))
2972                        for inp in a.inputs])
2973
2974    # Now test that the stack trace is copied over properly,
2975    # if we return the gradients. We need to use same mode as before.
2976    f = theano.function([i, j, t], dW, mode=mode)
2977    assert check_stack_trace(f, ops_to_check=[
2978        tensor.IncSubtensor, tensor.AdvancedIncSubtensor,
2979        tensor.AdvancedIncSubtensor1])
2980
2981
2982def test_local_set_to_inc_subtensor():
2983    v = theano.tensor.fmatrix()
2984    s = v[[2, 1]]
2985    g = s + 3
2986    r = theano.tensor.set_subtensor(s, g)
2987    moder = compile.get_default_mode().excluding('local_set_to_inc_subtensor')
2988    modet = compile.get_default_mode().including('local_set_to_inc_subtensor')
2989    f1 = theano.function([v], r, mode=moder)
2990    f2 = theano.function([v], r, mode=modet)
2991
2992    advi1 = [n for n in f1.maker.fgraph.toposort()
2993             if isinstance(n.op, tensor.AdvancedIncSubtensor1)]
2994
2995    advi2 = [n for n in f2.maker.fgraph.toposort()
2996             if isinstance(n.op, tensor.AdvancedIncSubtensor1)]
2997
2998    # We only have SetSubtensor in f1
2999    assert all(n.op.set_instead_of_inc for n in advi1)
3000    # We don't have any SetSubtensor in f2
3001    assert all(not n.op.set_instead_of_inc for n in advi2)
3002
3003    val = np.random.randn(3, 2).astype('float32')
3004
3005    r1 = f1(val)
3006    r2 = f2(val)
3007
3008    utt.assert_allclose(r1, r2)
3009
3010    # Finally, test that the stack trace is copied over properly,
3011    # before and after optimization.
3012    assert check_stack_trace(f1, ops_to_check=tensor.AdvancedIncSubtensor1)
3013    assert check_stack_trace(f2, ops_to_check='all')
3014
3015
3016def test_local_subtensor_of_dot():
3017    m1 = theano.tensor.matrix()
3018    m2 = theano.tensor.matrix()
3019    d1 = np.arange(6).reshape((3, 2)).astype(config.floatX)
3020    d2 = np.arange(8).reshape((2, 4)).astype(config.floatX) + 10
3021    mode = compile.get_default_mode().including("local_subtensor_of_dot")
3022
3023    def test_equality(a, b):
3024        return a.shape == b.shape and np.allclose(a, b)
3025
3026    # [cst]
3027    f = theano.function([m1, m2], theano.dot(m1, m2)[1], mode=mode)
3028    topo = f.maker.fgraph.toposort()
3029    assert test_equality(f(d1, d2), np.dot(d1, d2)[1])
3030    # DimShuffle happen in FAST_COMPILE
3031    assert isinstance(topo[-1].op, (T.blas_c.CGemv, T.blas.Gemv, T.DimShuffle))
3032
3033    # slice
3034    f = theano.function([m1, m2], theano.dot(m1, m2)[1:2], mode=mode)
3035    topo = f.maker.fgraph.toposort()
3036    assert test_equality(f(d1, d2), np.dot(d1, d2)[1:2])
3037    assert isinstance(topo[-1].op, (T.blas.Dot22))
3038
3039    m1 = theano.tensor.tensor3()
3040    m2 = theano.tensor.tensor3()
3041    idx = theano.tensor.iscalar()
3042    d1 = np.arange(30).reshape(2, 5, 3).astype(config.floatX)
3043    d2 = np.arange(72).reshape(4, 3, 6).astype(config.floatX) + 100
3044
3045    f = theano.function([m1, m2, idx], theano.dot(m1, m2)[idx, 1:4, :, idx:], mode=mode)
3046    assert test_equality(f(d1, d2, 1), np.dot(d1, d2)[1, 1:4, :, 1:])
3047    # if we return the gradients. We need to use same mode as before.
3048    assert check_stack_trace(f, ops_to_check='last')
3049
3050    f = theano.function([m1, m2, idx], theano.dot(m1, m2)[1:4, :, idx:, idx], mode=mode)
3051    assert test_equality(f(d1, d2, 1), np.dot(d1, d2)[1:4, :, 1:, 1])
3052
3053    # Now test that the stack trace is copied over properly,
3054    # if we return the gradients. We need to use same mode as before.
3055    assert check_stack_trace(f, ops_to_check='last')
3056
3057
3058class Test_local_elemwise_alloc(unittest.TestCase):
3059    dtype = config.floatX
3060
3061    def setUp(self):
3062        self.fast_compile_mode = get_mode('FAST_COMPILE')
3063        self.fast_run_mode = get_mode('FAST_RUN')
3064
3065        self.vec = T.vector('vec', dtype=self.dtype)
3066        self.mat = T.matrix('mat', dtype=self.dtype)
3067        self.tens = T.tensor3('tens', dtype=self.dtype)
3068
3069        self.alloc_wo_dep = T.alloc(self.vec, 2, 2)
3070        self.alloc_wo_dep_broad = T.alloc(self.vec, 1, 2)
3071        self.alloc_w_dep = T.alloc(self.vec, *self.mat.shape)
3072        self.alloc_w_dep_broad = T.alloc(self.vec, 1, *self.mat.shape)
3073        self.alloc_w_dep_broad2 = T.alloc(self.vec, self.mat.shape[0],
3074                                          self.mat.shape[1], 1)
3075        self.alloc_w_dep_tens = T.alloc(
3076            self.vec,
3077            self.tens.shape[0],
3078            self.tens.shape[1]
3079        )
3080        self.tv_wo_dep = T.alloc(self.vec, 5, 5)
3081        self.tm_wo_dep = T.alloc(self.mat, 5, 5, 5)
3082        self.s = T.iscalar('s')
3083        self.tv_w_dep = T.alloc(self.vec, self.s, self.s)
3084        self.tm_w_dep = T.alloc(self.mat, 5, 5, 5)
3085        self.row = theano.tensor.row(dtype=self.dtype)
3086        self.o = T.alloc(self.row, 5, 5)
3087
3088    def _verify_alloc_count(self, f, count):
3089        assert(
3090            sum([isinstance(elem.op, T.Alloc)
3091                 for elem in f.maker.fgraph.toposort()
3092                 if elem.op is not None]) == count
3093        )
3094
3095    def _verify_assert_count(self, f, count):
3096        assert(
3097            sum([isinstance(elem.op, T.opt.Assert)
3098                 for elem in f.maker.fgraph.toposort()
3099                 if elem.op is not None]) == count
3100        )
3101
3102    def test_remove_alloc_wo_dimshuffle(self):
3103        # Exclude local_useless_alloc, since it does not introduce
3104        # assert in all the same cases.
3105        self.fast_run_mode = self.fast_run_mode.excluding(
3106            'local_useless_alloc', 'local_canonicalize_alloc')
3107        # No optimization on alloc
3108        func = function(
3109            [self.vec, self.mat],
3110            self.alloc_wo_dep + self.mat,
3111            mode=self.fast_compile_mode
3112        )
3113        self._verify_alloc_count(func, 1)
3114        self._verify_assert_count(func, 0)
3115        # Check stacktrace was copied over correctly after opt was applied
3116        self.assertTrue(check_stack_trace(func, ops_to_check='all'))
3117
3118        # Optimization on alloc with assert
3119        func = function(
3120            [self.vec, self.mat],
3121            self.alloc_wo_dep + self.mat,
3122            mode=self.fast_run_mode
3123        )
3124        self._verify_alloc_count(func, 0)
3125        self._verify_assert_count(func, 1)
3126
3127        # Optimization on alloc with assert and broadcast
3128        func = function(
3129            [self.vec, self.mat],
3130            self.alloc_wo_dep_broad + self.mat,
3131            mode=self.fast_run_mode
3132        )
3133        self._verify_alloc_count(func, 0)
3134        self._verify_assert_count(func, 1)
3135
3136        # No optimization on alloc without assert
3137        func = function(
3138            [self.vec, self.mat],
3139            self.alloc_w_dep + self.mat,
3140            mode=self.fast_compile_mode
3141        )
3142        self._verify_alloc_count(func, 1)
3143        self._verify_assert_count(func, 0)
3144
3145        # Optimization on alloc without assert
3146        func = function(
3147            [self.vec, self.mat],
3148            self.alloc_w_dep + self. mat,
3149            mode=self.fast_run_mode
3150        )
3151        self._verify_alloc_count(func, 0)
3152        self._verify_assert_count(func, 0)
3153
3154        # Optimization on alloc without assert and with broadcast
3155        func = function(
3156            [self.vec, self.mat],
3157            self.alloc_w_dep_broad + self. mat,
3158            mode=self.fast_run_mode
3159        )
3160        self._verify_alloc_count(func, 0)
3161        self._verify_assert_count(func, 0)
3162
3163        # Not optimized case on alloc and with broadcast
3164        func = function(
3165            [self.vec, self.mat],
3166            self.alloc_w_dep_broad2 + self. mat,
3167            mode=self.fast_run_mode
3168        )
3169        self._verify_alloc_count(func, 1)
3170        self._verify_assert_count(func, 0)
3171
3172    def test_remove_alloc_w_dimshuffle(self):
3173        # No optimization on dimshuffle with assert
3174        func = function(
3175            [self.vec, self.tens],
3176            self.alloc_wo_dep.dimshuffle(0, 1, 'x') + self.tens,
3177            mode=self.fast_compile_mode
3178        )
3179        self._verify_alloc_count(func, 1)
3180        self._verify_assert_count(func, 0)
3181
3182        # Optimization on dimshuffle with assert
3183        func = function(
3184            [self.vec, self.tens],
3185            self.alloc_wo_dep.dimshuffle(0, 1, 'x') + self.tens,
3186            mode=self.fast_run_mode
3187        )
3188        self._verify_alloc_count(func, 0)
3189        self._verify_assert_count(func, 1)
3190
3191        # No optimization on dimshuffle without assert
3192        func = function(
3193            [self.vec, self.tens],
3194            self.alloc_w_dep_tens.dimshuffle(0, 1, 'x') + self.tens,
3195            mode=self.fast_compile_mode
3196        )
3197        self._verify_alloc_count(func, 1)
3198        self._verify_assert_count(func, 0)
3199
3200        # Optimization on dimshuffle without assert
3201        func = function(
3202            [self.vec, self.tens],
3203            self.alloc_w_dep_tens.dimshuffle(0, 1, 'x') + self.tens,
3204            mode=self.fast_run_mode
3205        )
3206        self._verify_alloc_count(func, 0)
3207        self._verify_assert_count(func, 0)
3208
3209    def test_multi_input_single_alloc(self):
3210        # No optimization on dimshuffle with assert
3211        func = function(
3212            [self.vec, self.mat],
3213            self.tv_wo_dep + self.tm_wo_dep,
3214            mode=self.fast_compile_mode
3215        )
3216        self._verify_alloc_count(func, 2)
3217        self._verify_assert_count(func, 0)
3218
3219        # Optimization on dimshuffle with assert
3220        func = function(
3221            [self.vec, self.mat],
3222            self.tv_wo_dep + self.tm_wo_dep,
3223            mode=self.fast_run_mode
3224        )
3225        self._verify_alloc_count(func, 1)
3226        self._verify_assert_count(func, 0)
3227
3228        # No optimization on dimshuffle without assert
3229        func = function(
3230            [self.vec, self.mat, self.s],
3231            self.tv_w_dep + self.tm_w_dep,
3232            mode=self.fast_compile_mode
3233        )
3234        self._verify_alloc_count(func, 2)
3235        self._verify_assert_count(func, 0)
3236
3237        # Optimization on dimshuffle without assert
3238        func = function(
3239            [self.vec, self.mat, self.s],
3240            self.tv_w_dep + self.tm_w_dep,
3241            mode=self.fast_run_mode
3242        )
3243        self._verify_alloc_count(func, 1)
3244        self._verify_assert_count(func, 1)
3245
3246    def test_error(self):
3247        t3fft = theano.tensor.tensor(dtype=self.dtype,
3248                                     broadcastable=(False, False, True))
3249        o = self.o.dimshuffle(0, 1, 'x') + t3fft
3250        func = function(
3251            [t3fft, self.row],
3252            o,
3253            mode=self.fast_run_mode
3254        )
3255        self._verify_alloc_count(func, 0)
3256        self._verify_assert_count(func, 1)
3257        d = np.random.rand(5, 5, 1).astype(self.dtype)
3258        r = np.random.rand(1, 5).astype(self.dtype)
3259        func(d, r)
3260
3261
3262def test_local_subtensor_of_alloc():
3263
3264    # DebugMode should detect if something goes wrong.
3265    # test shape combination of odd and event shape.
3266    for shape in [(3, 5), (4, 6), (3, 8), (4, 7),
3267                  (1, 5), (5, 1)]:
3268        x = tensor.tensor(dtype=theano.config.floatX,
3269                          broadcastable=(shape[0] == 1, shape[1] == 1))
3270
3271        xval = np.zeros(shape, dtype=config.floatX)
3272        yval = np.arange(shape[1], dtype=config.floatX)
3273
3274        for y in [theano.shared(yval), tensor.constant([1.])]:
3275
3276            # The rows of yx are copies of y
3277            yx = tensor.alloc(y, x.shape[0], x.shape[1])
3278
3279            # Slice of each row
3280            z_mat = yx[:, 3:]
3281            assert z_mat.ndim == 2
3282
3283            # Only one column
3284            z_vec = yx[:, 3]
3285            assert z_vec.ndim == 1
3286            # results are vector
3287            slicess = []
3288            if shape[0] != 1:
3289                slicess.append((2, slice(None)))
3290            if shape[1] != 1:
3291                slicess.append((slice(None), 3))
3292
3293            # results are matrix
3294            slicess += [
3295                (slice(None), slice(3, None)),
3296                (slice(3, None), ),
3297                (slice(3, None), slice(3, None)),
3298                (slice(1, 3), slice(None, -1)),
3299                (slice(None, None, 2)),
3300                (slice(1, None, 2)),
3301            ]
3302            for slices in slicess:
3303                z = yx.__getitem__(slices)
3304                f = theano.function([x], z)
3305                if theano.config.mode != 'FAST_COMPILE':
3306                    # Subtensor can be in the input of Alloc
3307                    assert not isinstance(f.maker.fgraph.toposort()[-1].op,
3308                                          Subtensor)
3309                val = f(xval)
3310                assert xval.__getitem__(slices).shape == val.shape
3311
3312
3313def test_local_fill_useless():
3314    # Test opt local_fill_useless
3315    x = dvector()
3316    y = dvector()
3317    z = lvector()
3318    m = dmatrix()
3319
3320    x_ = np.random.rand(5,)
3321    y_ = np.random.rand(5,)
3322    z_ = (np.random.rand(5,) * 5).astype("int64")
3323    m_ = np.random.rand(5, 5)
3324
3325    # basic case
3326    f = function([x], T.fill(x, x) * 2, mode=mode_opt)
3327    assert [node.op for node in f.maker.fgraph.toposort()] == [T.mul]
3328    f(x_)
3329
3330    # basic case
3331    f = function([x, y], T.second(y, x) * 2, mode=mode_opt)
3332    assert [node.op for node in f.maker.fgraph.toposort()] == [T.mul]
3333    f(x_, y_)
3334
3335    # basic case
3336    f = function([x, y], T.fill(x, y) * 2, mode=mode_opt)
3337    assert [node.op for node in f.maker.fgraph.toposort()] == [T.mul]
3338    f(x_, y_)
3339
3340    # now with different type(cast)
3341    f = function([x, z], T.fill(z, x) * 2, mode=mode_opt)
3342    assert [node.op for node in f.maker.fgraph.toposort()] == [T.mul]
3343    f(x_, z_)
3344
3345    # now with different type(cast)
3346    f = function([x, z], T.fill(x, z) * 2, mode=mode_opt)
3347    assert [node.op for node in f.maker.fgraph.toposort()] == [T.mul]
3348    f(x_, z_)
3349
3350    # now cutting out the input ??
3351    f = function([x, y], T.fill(x, y) * 2, mode=mode_opt)
3352    assert [node.op for node in f.maker.fgraph.toposort()] == [T.mul]
3353    f(x_, y_)
3354
3355    # Test with different number of dimensions
3356    # The fill is not useless, so it should stay
3357    f = function([m, x], T.fill(m, x) * 2, mode=mode_opt)
3358    ops = [node.op.__class__ for node in f.maker.fgraph.toposort()]
3359    assert T.Alloc in ops
3360    f(m_, x_)
3361
3362
3363def test_local_elemwise_sub_zeros():
3364    # Test opt local_elemwise_sub_zeros
3365    # We test separately for scalars, vectors and matrices
3366    scalar = T.scalar()
3367    vect = T.vector()
3368    mat = T.matrix()
3369
3370    rng = np.random.RandomState(seed=utt.fetch_seed())
3371    scalar_val = rng.rand(1).astype(config.floatX)[0]
3372    vect_val = rng.rand(5).astype(config.floatX)
3373    mat_val = rng.rand(3, 2).astype(config.floatX)
3374
3375    mode = theano.compile.get_default_mode()\
3376        .excluding('canonicalize', 'uncanonicalize',
3377                   'ShapeOpt', 'local_fill_to_alloc',
3378                   'local_elemwise_alloc')\
3379        .including('local_elemwise_sub_zeros')
3380
3381    # Test scalar minus scalar
3382    f = function([scalar], scalar - scalar, mode=mode)
3383    # Check optimized graph is correct
3384    assert isinstance(f.maker.fgraph.toposort()[0].op, T.Elemwise)
3385    assert isinstance(f.maker.fgraph.toposort()[0].op.scalar_op,
3386                      theano.scalar.Second)
3387    assert isinstance(f.maker.fgraph.toposort()[0].inputs[1],
3388                      T.TensorConstant) or\
3389        isinstance(f.maker.fgraph.toposort()[0].inputs[1],
3390                   T.TensorConstant)
3391    utt.assert_allclose(f(scalar_val), 0.0)
3392    # Check stack trace is copied over
3393    assert check_stack_trace(f, ops_to_check='all')
3394
3395    # Test vector minus vector
3396    f = function([vect], vect - vect, mode=mode)
3397    # Check optimized graph is correct
3398    assert isinstance(f.maker.fgraph.toposort()[0].op, T.Elemwise)
3399    assert isinstance(f.maker.fgraph.toposort()[0].op.scalar_op,
3400                      theano.scalar.Second)
3401    assert isinstance(f.maker.fgraph.toposort()[0].inputs[1],
3402                      T.TensorConstant) or\
3403        isinstance(f.maker.fgraph.toposort()[0].inputs[1],
3404                   T.TensorConstant)
3405    utt.assert_allclose(f(vect_val), np.zeros(vect_val.shape))
3406    # Check stack trace is copied over
3407    assert check_stack_trace(f, ops_to_check='all')
3408
3409    # Test vector minus vector
3410    f = function([mat], mat - mat, mode=mode)
3411    # Check optimized graph is correct
3412    assert isinstance(f.maker.fgraph.toposort()[0].op, T.Elemwise)
3413    assert isinstance(f.maker.fgraph.toposort()[0].op.scalar_op,
3414                      theano.scalar.Second)
3415    assert isinstance(f.maker.fgraph.toposort()[0].inputs[1],
3416                      T.TensorConstant) or\
3417        isinstance(f.maker.fgraph.toposort()[0].inputs[1],
3418                   T.TensorConstant)
3419    utt.assert_allclose(f(mat_val), np.zeros(mat_val.shape))
3420    # Check stack trace is copied over
3421    assert check_stack_trace(f, ops_to_check='all')
3422
3423
3424class Test_local_useless_elemwise_comparison(unittest.TestCase):
3425    def setUp(self):
3426        self.rng = np.random.RandomState(utt.fetch_seed())
3427
3428    def test_local_useless_elemwise_comparison(self):
3429        # TODO: test each case individually.
3430        # The following case is what made me discover those cases.
3431        X = T.matrix('X')
3432        Y = T.vector('Y')
3433        X_sum, updates = theano.scan(fn=lambda x: x.sum(),
3434                                     outputs_info=None,
3435                                     sequences=[X],
3436                                     non_sequences=None)
3437        Z = X_sum + Y
3438        # theano.printing.debugprint(Z)
3439        # here is the output for the debug print:
3440        """
3441        Elemwise{add,no_inplace} [id A] ''
3442         |for{cpu,scan_fn} [id B] ''
3443         | |Subtensor{int64} [id C] ''
3444         | | |Shape [id D] ''
3445         | | | |Subtensor{int64::} [id E] 'X[0:]'
3446         | | |   |X [id F]
3447         | | |   |Constant{0} [id G]
3448         | | |Constant{0} [id H]
3449         | |Subtensor{:int64:} [id I] ''
3450         | | |Subtensor{int64::} [id E] 'X[0:]'
3451         | | |ScalarFromTensor [id J] ''
3452         | |   |Subtensor{int64} [id C] ''
3453         | |Subtensor{int64} [id C] ''
3454         |Y [id K]
3455
3456        Inner graphs of the scan ops:
3457
3458        for{cpu,scan_fn} [id B] ''
3459         >Sum{acc_dtype=float64} [id L] ''
3460         > |X[t] [id M] -> [id I]
3461        """
3462
3463        mode = theano.compile.get_default_mode().excluding('fusion')
3464        f = theano.function([X, Y], Z, mode=mode)
3465        f(self.rng.rand(2, 3).astype(config.floatX),
3466          self.rng.rand(2).astype(config.floatX))
3467        # theano.printing.debugprint(f, print_type=True)
3468        # here is the output for the debug print:
3469        """
3470        Elemwise{Add}[(0, 0)] [id A] <TensorType(float64, vector)> ''   7
3471         |for{cpu,scan_fn} [id B] <TensorType(float64, vector)> ''   6
3472         | |Shape_i{0} [id C] <TensorType(int64, scalar)> ''   0
3473         | | |X [id D] <TensorType(float64, matrix)>
3474         | |Subtensor{int64:int64:int8} [id E] <TensorType(float64, matrix)> ''   5
3475         | | |X [id D] <TensorType(float64, matrix)>
3476         | | |ScalarFromTensor [id F] <int64> ''   4
3477         | | | |Elemwise{switch,no_inplace} [id G] <TensorType(int64, scalar)> ''   3
3478         | | |   |Elemwise{le,no_inplace} [id H] <TensorType(int8, scalar)> ''   2
3479         | | |   | |Shape_i{0} [id C] <TensorType(int64, scalar)> ''   0
3480         | | |   | |TensorConstant{0} [id I] <TensorType(int8, scalar)>
3481         | | |   |TensorConstant{0} [id I] <TensorType(int8, scalar)>
3482         | | |   |TensorConstant{0} [id J] <TensorType(int64, scalar)>
3483         | | |ScalarFromTensor [id K] <int64> ''   1
3484         | | | |Shape_i{0} [id C] <TensorType(int64, scalar)> ''   0
3485         | | |Constant{1} [id L] <int8>
3486         | |Shape_i{0} [id C] <TensorType(int64, scalar)> ''   0
3487         |Y [id M] <TensorType(float64, vector)>
3488
3489        Inner graphs of the scan ops:
3490
3491        for{cpu,scan_fn} [id B] <TensorType(float64, vector)> ''
3492         >Sum{acc_dtype=float64} [id N] <TensorType(float64, scalar)> ''
3493         > |X[t] [id O] <TensorType(float64, vector)> -> [id E]
3494        """
3495
3496    def assert_eqs_const(self, f, val, op=deep_copy_op):
3497        topo = f.maker.fgraph.toposort()
3498        elem = topo[0]
3499        assert len(topo) == 1, topo
3500        assert elem.op == op, elem.op
3501        if op == deep_copy_op:
3502            assert len(elem.inputs) == 1, elem.inputs
3503            assert isinstance(elem.inputs[0], T.TensorConstant), elem
3504            assert T.extract_constant(elem.inputs[0]) == val, val
3505        else:
3506            assert len(elem.inputs) == 2, elem.inputs
3507            assert isinstance(elem.inputs[0], T.TensorConstant), elem
3508            assert T.extract_constant(elem.inputs[0]) == val, val
3509
3510    def assert_identity(self, f):
3511        topo = f.maker.fgraph.toposort()
3512        assert len(topo) == 1
3513        assert topo[0].op == deep_copy_op
3514        if f.outputs[0].variable.dtype == 'bool':
3515            x_vals = [0, 1]
3516        else:
3517            x_vals = [0, 1, 10]
3518        for x_val in x_vals:
3519            assert f(x_val) == x_val
3520
3521    def test_inequality_with_self(self):
3522        x = T.scalar('x', dtype=config.floatX)
3523        mode = theano.compile.get_default_mode().including('local_useless_elemwise_comparison')
3524
3525        f = theano.function([x], T.lt(x, x), mode=mode)
3526        self.assert_eqs_const(f, 0)
3527
3528        f = theano.function([x], T.le(x, x), mode=mode)
3529        self.assert_eqs_const(f, 1)
3530
3531        f = theano.function([x], T.gt(x, x), mode=mode)
3532        self.assert_eqs_const(f, 0)
3533
3534        f = theano.function([x], T.ge(x, x), mode=mode)
3535        self.assert_eqs_const(f, 1)
3536
3537        f = theano.function([x], T.minimum(x, x), mode=mode)
3538        self.assert_identity(f)
3539
3540        f = theano.function([x], T.maximum(x, x), mode=mode)
3541        self.assert_identity(f)
3542
3543    def test_shape_inequality_with_self(self):
3544        x = T.vector('x', dtype=config.floatX)
3545        mode = theano.compile.get_default_mode().including(
3546            'local_useless_elemwise_comparison',
3547            'local_shape_to_shape_i',
3548            'local_track_shape_i',
3549            'local_subtensor_make_vector')
3550        f = theano.function([x], T.lt(x.shape[0], 0), mode=mode)
3551        self.assert_eqs_const(f, 0)
3552
3553        f = theano.function([x], T.ge(x.shape[0], 0), mode=mode)
3554        self.assert_eqs_const(f, 1)
3555
3556        f = theano.function([x], T.maximum(x.shape[0], 0), mode=mode)
3557        topo = f.maker.fgraph.toposort()
3558        assert len(topo) == 1
3559        assert isinstance(topo[0].op, Shape_i), topo[0].op
3560        x_val = np.ones(100, dtype=config.floatX)
3561        assert f(x_val) == x_val.shape[0]
3562
3563        f = theano.function([x], T.maximum(0, x.shape[0]), mode=mode)
3564        topo = f.maker.fgraph.toposort()
3565        assert len(topo) == 1
3566        assert isinstance(topo[0].op, Shape_i), topo[0].op
3567        x_val = np.ones(100, dtype=config.floatX)
3568        assert f(x_val) == x_val.shape[0]
3569
3570        f = theano.function([x], T.minimum(x.shape[0], 0), mode=mode)
3571        self.assert_eqs_const(f, 0)
3572        assert f(x_val) == 0
3573
3574        f = theano.function([x], T.minimum(0, x.shape[0]), mode=mode)
3575        self.assert_eqs_const(f, 0)
3576        assert f(x_val) == 0
3577        f = theano.function([x], T.minimum([0, 0], x.shape[0]), mode=mode)
3578        # This case isn't optimized.
3579        # self.assert_eqs_const(f, 0)
3580        utt.assert_allclose(f(x_val), [0, 0])
3581
3582    def test_shape_add_inequality(self):
3583        x = T.vector('x', dtype=config.floatX)
3584        mode = theano.compile.get_default_mode().including(
3585            'local_useless_elemwise_comparison',
3586            'local_shape_to_shape_i',
3587            'local_track_shape_i',
3588            'local_subtensor_make_vector')
3589
3590        y = T.vector('y', dtype=config.floatX)
3591
3592        f = theano.function([x, y], T.lt(x.shape[0] + y.shape[0], 0), mode=mode)
3593        self.assert_eqs_const(f, 0)
3594
3595        f = theano.function([x, y], T.ge(x.shape[0] + y.shape[0], 0), mode=mode)
3596        self.assert_eqs_const(f, 1)
3597
3598    def test_equality_shapes(self):
3599        # Test equality where one sides contain only shapes related
3600        # stuff.
3601        if theano.config.mode == "FAST_COMPILE":
3602            raise SkipTest("Skip opt test as the opt is disabled")
3603        x = T.vector('x', dtype=config.floatX)
3604        for g in [x.shape[0],
3605                  Shape_i(0)(x)]:
3606            f = theano.function([x], T.eq(g, 0))
3607            assert f([3, 3]) == 0
3608            assert f([]) == 1
3609
3610            f = theano.function([x], T.eq(g, -1))
3611            self.assert_eqs_const(f, 0)
3612            assert f([3, 3]) == 0
3613
3614        g = join(0,
3615                 x.shape[0:],  # todo test reshape, dimshuffle
3616                 x.shape[0:1])
3617        f = theano.function([x], T.eq(g, 0))
3618        assert (f([3, 3]) == 0).all()
3619        assert (f([]) == 1).all()
3620
3621        f = theano.function([x], T.eq(g, -1))
3622        self.assert_eqs_const(f, 0, op=T.alloc)
3623        assert (f([3, 3]) == 0).all()
3624
3625    def test_and(self):
3626        # bitwise "and" with 0 should give 0 for both bool and int
3627        # bitwise "and" with 1 should only simplify for bool
3628        mode = theano.compile.get_default_mode().including('canonicalize')
3629        for dtype, zero, one in [('bool', np.array(False), np.array(True)),
3630                                 ('int8', np.int8(0), np.int8(1)),
3631                                 ('int8', 0, 1)]:
3632            x = T.scalar('x', dtype=dtype)
3633
3634            f = theano.function([x], T.and_(x, zero), mode=mode)
3635            self.assert_eqs_const(f, 0)
3636
3637            f = theano.function([x], T.and_(zero, x), mode=mode)
3638            self.assert_eqs_const(f, 0)
3639
3640            f = theano.function([x], T.and_(x, one), mode=mode)
3641            if dtype == 'bool':
3642                self.assert_identity(f)
3643
3644            f = theano.function([x], T.and_(one, x), mode=mode)
3645            if dtype == 'bool':
3646                self.assert_identity(f)
3647
3648    def test_and_int(self):
3649        # Test that bitwise "and" is correctly computed on int constants.
3650        f = theano.function([], T.and_(5, 6))
3651        assert f() == 4
3652
3653    def test_or(self):
3654        # bitwise "or" with 0 should simplify for both bool and int
3655        # bitwise "or" with 1 should only give 1 for bool
3656        mode = theano.compile.get_default_mode().including('canonicalize')
3657        for dtype, zero, one in [('bool', np.array(False), np.array(True)),
3658                                 ('int8', np.int8(0), np.int8(1)),
3659                                 ('int8', 0, 1)]:
3660            x = T.scalar('x', dtype=dtype)
3661
3662            f = theano.function([x], T.or_(x, one), mode=mode)
3663            if dtype == 'bool':
3664                self.assert_eqs_const(f, 1)
3665
3666            f = theano.function([x], T.or_(one, x), mode=mode)
3667            if dtype == 'bool':
3668                self.assert_eqs_const(f, 1)
3669
3670            f = theano.function([x], T.or_(x, zero), mode=mode)
3671            self.assert_identity(f)
3672
3673            f = theano.function([x], T.or_(zero, x), mode=mode)
3674            self.assert_identity(f)
3675
3676    def test_or_int(self):
3677        # Test that bitwise "or" is correctly computed on int constants.
3678        f = theano.function([], T.or_(5, 6))
3679        assert f() == 7
3680
3681    def test_xor(self):
3682        # bitwise "xor" with itself should always give 0 for both bool and int.
3683        mode = theano.compile.get_default_mode().including('canonicalize')
3684        for dtype in ('bool', 'int8'):
3685            x = T.scalar('x', dtype=dtype)
3686
3687            f = theano.function([x], T.xor(x, x), mode=mode)
3688            self.assert_eqs_const(f, 0)
3689
3690    def test_stacktrace(self):
3691        mode = theano.compile.get_default_mode().including(
3692            'local_useless_elemwise_comparison')
3693
3694        x = T.vector('x', dtype=config.floatX)
3695        f = theano.function([x], T.gt(x, x), mode=mode)
3696        self.assertTrue(check_stack_trace(f, ops_to_check='last'))
3697
3698        f = theano.function([x], T.le(x, x), mode=mode)
3699        self.assertTrue(check_stack_trace(f, ops_to_check='last'))
3700
3701
3702class Test_local_canonicalize_alloc(unittest.TestCase):
3703    def setUp(self):
3704        self.rng = np.random.RandomState(utt.fetch_seed())
3705
3706    @change_flags(compute_test_value='off')
3707    def test0(self):
3708        x = shared(self.rng.randn(3, 7))
3709        a = tensor.alloc(x, 6, 7)
3710
3711        # It is a bad idea to have tensor.alloc return x directly,
3712        # because the shape mismatch cannot be caught.
3713        assert a.owner and isinstance(a.owner.op, tensor.Alloc)
3714
3715        f = function([], a, mode=mode_opt)
3716        # The optimization should then be applied, and remove Alloc
3717        assert ([node.op for node in f.maker.fgraph.toposort()] ==
3718                [deep_copy_op])
3719
3720        # In DebugMode, the shape mismatch should be detected
3721        if isinstance(mode_opt, compile.DebugMode):
3722            self.assertRaises(ValueError, f)
3723
3724        # No need to check_stack_trace as the optimization
3725        # local_canonicalize_alloc only removes nodes.
3726
3727    def test1(self):
3728        # Test that alloc never gets instantiated during optimization
3729        mode = mode_opt.excluding('local_canonicalize_alloc')
3730
3731        x = tensor.matrix('x')
3732        xx = tensor.fill(x, x)
3733
3734        # The optimization 'locall_fill_to_alloc' should call tensor.alloc,
3735        # which should return x and not alloc(x, ...)
3736        f = function([x], [xx], mode=mode)
3737        op_classes = [node.op.__class__ for node in f.maker.fgraph.toposort()]
3738        assert tensor.Alloc not in op_classes
3739
3740        # No need to check_stack_trace as the optimization
3741        # local_canonicalize_alloc only removes nodes.
3742
3743    def test2(self):
3744        # Test that alloc never gets instantiated during optimization
3745        mode = mode_opt.excluding('local_canonicalize_alloc')
3746
3747        x = tensor.matrix('x')
3748        y = tensor.tile(x, (1,) * 2)
3749
3750        f = function([x], [y], mode=mode)
3751        op_classes = [node.op.__class__ for node in f.maker.fgraph.toposort()]
3752        print(op_classes)
3753
3754        # We are supposed to test if tensr.Alloc is not in op_classes,
3755        # but since the proper proper optimization is not currently
3756        # implemented it will fail. Once the correct optimization is in place,
3757        # we have to change the following we should not see tensor.Alloc
3758        # in op_classes and we have to change the assert.
3759        assert tensor.Alloc in op_classes
3760        # The correct opt removes nodes, no need for check_stack_trace
3761
3762    def test_useless_alloc_with_shape_one(self):
3763        alloc_lift = out2in(local_canonicalize_alloc)
3764        x = shared(self.rng.randn(2,))
3765        y = shared(self.rng.randn())
3766        z = shared(self.rng.randn(1, 1))
3767        w = shared(self.rng.randn(1, 1))
3768        alloc_x = tensor.alloc(x, 1, 3, 2)
3769        alloc_y = tensor.alloc(y, 1, 1)
3770        alloc_z = tensor.alloc(z, 1, 1, 2)
3771        alloc_w = tensor.alloc(w, 1, 2)
3772
3773        g = FunctionGraph([x, y, z, w], [alloc_x, alloc_y, alloc_z, alloc_w])
3774        self.assertTrue(str(g) == ("[Alloc(<TensorType(float64, vector)>, "
3775                                   "TensorConstant{1}, "
3776                                   "TensorConstant{3}, "
3777                                   "TensorConstant{2}), "
3778
3779                                   "Alloc(<TensorType(float64, scalar)>, "
3780                                   "TensorConstant{1}, "
3781                                   "TensorConstant{1}), "
3782
3783                                   "Alloc(<TensorType(float64, matrix)>, "
3784                                   "TensorConstant{1}, "
3785                                   "TensorConstant{1}, "
3786                                   "TensorConstant{2}), "
3787
3788                                   "Alloc(<TensorType(float64, matrix)>, "
3789                                   "TensorConstant{1}, "
3790                                   "TensorConstant{2})]"))
3791
3792        alloc_lift.optimize(g)
3793        self.assertTrue(str(g) == "[InplaceDimShuffle{x,0,1}"
3794                                  "(Alloc(<TensorType(float64, vector)>, "
3795                                  "TensorConstant{3}, "
3796                                  "TensorConstant{2})), "
3797
3798                                  "InplaceDimShuffle{x,x}"
3799                                  "(<TensorType(float64, scalar)>), "
3800
3801                                  "InplaceDimShuffle{x,0,1}"
3802                                  "(Alloc(<TensorType(float64, matrix)>, "
3803                                  "TensorConstant{1}, "
3804                                  "TensorConstant{2})), "
3805
3806                                  "Alloc(<TensorType(float64, matrix)>, "
3807                                  "TensorConstant{1}, "
3808                                  "TensorConstant{2})]")
3809
3810        # Check stacktrace was copied over correctly after opt was applied
3811        self.assertTrue(check_stack_trace(g, ops_to_check='all'))
3812
3813
3814class Test_local_useless_inc_subtensor_alloc(unittest.TestCase):
3815    opt_name = 'local_useless_inc_subtensor_alloc'
3816
3817    def setUp(self):
3818        # The optimization requires the shape feature so we need to compile in
3819        # FAST_RUN mode.
3820        mode = theano.config.mode
3821        if mode == 'FAST_COMPILE':
3822            mode = 'FAST_RUN'
3823        self.mode = compile.mode.get_mode(mode)
3824
3825    def test_advanced_inc_subtensor(self):
3826        x = tensor.vector('x')
3827        y = tensor.scalar('y')
3828        i = tensor.matrix('i', dtype='int64')
3829        z = tensor.advanced_inc_subtensor(x, T.alloc(y, *i.shape), i)
3830        mode1 = self.mode.excluding(self.opt_name)
3831        mode2 = self.mode.including(self.opt_name)
3832        f1 = theano.function([x, i, y], z, mode=mode1)
3833        f2 = theano.function([x, i, y], z, mode=mode2)
3834
3835        # the alloc op should still be there
3836        assert (len([n for n in f1.maker.fgraph.toposort()
3837                     if isinstance(n.op, tensor.Alloc)]) == 1)
3838        # the alloc op should have been removed
3839        assert (len([n for n in f2.maker.fgraph.toposort()
3840                     if isinstance(n.op, tensor.Alloc)]) == 0)
3841
3842        x_value = np.random.randn(5).astype(config.floatX)
3843        y_value = np.random.randn()
3844        i_value = np.random.randint(0, 3, size=(2, 3))
3845
3846        r1 = f1(x_value, i_value, y_value)
3847        r2 = f2(x_value, i_value, y_value)
3848
3849        utt.assert_allclose(r1, r2)
3850
3851        # Check stacktrace was copied over correctly after opt was applied
3852        self.assertTrue(check_stack_trace(f1, ops_to_check=tensor.AdvancedIncSubtensor))
3853        self.assertTrue(check_stack_trace(f2, ops_to_check=tensor.AdvancedIncSubtensor))
3854
3855    def test_advanced_inc_subtensor1(self):
3856        x = tensor.vector('x')
3857        y = tensor.scalar('y')
3858        i = tensor.vector('i', dtype='int64')
3859        z = tensor.advanced_inc_subtensor1(x, T.alloc(y, *i.shape), i)
3860        mode1 = self.mode.excluding(self.opt_name)
3861        mode2 = self.mode.including(self.opt_name)
3862        f1 = theano.function([x, i, y], z, mode=mode1)
3863        f2 = theano.function([x, i, y], z, mode=mode2)
3864
3865        # the alloc op should still be there
3866        assert (len([n for n in f1.maker.fgraph.toposort()
3867                     if isinstance(n.op, tensor.Alloc)]) == 1)
3868        # the alloc op should have been removed
3869        assert (len([n for n in f2.maker.fgraph.toposort()
3870                     if isinstance(n.op, tensor.Alloc)]) == 0)
3871
3872        x_value = np.random.randn(5).astype(config.floatX)
3873        y_value = np.random.randn()
3874        i_value = np.random.randint(0, 3, size=2)
3875
3876        r1 = f1(x_value, i_value, y_value)
3877        r2 = f2(x_value, i_value, y_value)
3878
3879        utt.assert_allclose(r1, r2)
3880
3881        # Check stacktrace was copied over correctly after opt was applied
3882        self.assertTrue(check_stack_trace(
3883            f1, ops_to_check=tensor.AdvancedIncSubtensor1))
3884        self.assertTrue(check_stack_trace(f2, ops_to_check='all'))
3885
3886    def test_incsubtensor(self):
3887        x = tensor.vector('x')
3888        y = tensor.scalar('y')
3889        i = tensor.scalar('i', dtype='int64')
3890        z = tensor.inc_subtensor(x[:i], T.alloc(y, i))
3891        mode1 = self.mode.excluding(self.opt_name)
3892        mode2 = self.mode.including(self.opt_name)
3893        f1 = theano.function([x, i, y], z, mode=mode1)
3894        f2 = theano.function([x, i, y], z, mode=mode2)
3895
3896        # the alloc op should still be there
3897        assert (len([n for n in f1.maker.fgraph.toposort()
3898                     if isinstance(n.op, tensor.Alloc)]) == 1)
3899        # the alloc op should have been removed
3900        assert (len([n for n in f2.maker.fgraph.toposort()
3901                     if isinstance(n.op, tensor.Alloc)]) == 0)
3902
3903        x_value = np.random.randn(5).astype(config.floatX)
3904        y_value = np.random.randn()
3905        i_value = 3
3906
3907        r1 = f1(x_value, i_value, y_value)
3908        r2 = f2(x_value, i_value, y_value)
3909
3910        utt.assert_allclose(r1, r2)
3911
3912        # Check stacktrace was copied over correctly after opt was applied
3913        self.assertTrue(check_stack_trace(f1, ops_to_check='last'))
3914        self.assertTrue(check_stack_trace(f2, ops_to_check='last'))
3915
3916
3917class test_shapeoptimizer(unittest.TestCase):
3918    def setUp(self):
3919        utt.seed_rng()
3920
3921    def test0(self):
3922        mode = theano.config.mode
3923        if mode == 'FAST_COMPILE':
3924            mode = 'FAST_RUN'
3925        v = T.vector()
3926        m = T.matrix()
3927        f = function([v, m], (v + m).shape, mode=mode)
3928        for node in f.maker.fgraph.toposort():
3929            assert node.op != T.add
3930
3931    def test_constant(self):
3932        mode = theano.config.mode
3933        if mode == 'FAST_COMPILE':
3934            mode = 'FAST_RUN'
3935
3936        v = T.vector()
3937        f = function([v], v.dimshuffle('x', 'x', 0).shape[1], mode=mode)
3938        topo = f.maker.fgraph.toposort()
3939        assert len(topo) == 1
3940        assert topo[0].op == deep_copy_op
3941
3942    @staticmethod
3943    def max_pool_c01b(c01b, pool_shp, pool_stride, img_shp):
3944        """
3945        Like max_pool but with input using axes ('c', 0, 1, 'b')
3946          (Alex Krizhevsky format)
3947
3948        pool_shp, pool_stride and img_shp are int that represent
3949        the same shp in x and y.
3950        """
3951        mx = None
3952
3953        # Compute index in pooled space of last needed pool
3954        # (needed = each input pixel must appear in at least one pool)
3955        def last_pool(im_shp, p_shp, p_strd):
3956            rval = int(np.ceil(float(im_shp - p_shp) / p_strd))
3957            assert p_strd * rval + p_shp >= im_shp
3958            assert p_strd * (rval - 1) + p_shp < im_shp
3959            return rval
3960        # Compute starting row of the last pool
3961        last_pool_r = last_pool(img_shp, pool_shp, pool_stride) * pool_stride
3962        # Compute number of rows needed in img for all indexes to work out
3963        required_r = last_pool_r + pool_shp
3964
3965        last_pool_c = last_pool(img_shp, pool_shp, pool_stride) * pool_stride
3966        required_c = last_pool_c + pool_shp
3967
3968        wide_infinity = T.alloc(-np.inf, c01b.shape[0],
3969                                required_r, required_c, c01b.shape[3])
3970
3971        c01b = T.set_subtensor(wide_infinity[:, 0:img_shp, 0:img_shp, :], c01b)
3972
3973        for row_within_pool in xrange(pool_shp):
3974            row_stop = last_pool_r + row_within_pool + 1
3975            for col_within_pool in xrange(pool_shp):
3976                col_stop = last_pool_c + col_within_pool + 1
3977                cur = c01b[:, row_within_pool:row_stop:pool_stride,
3978                           col_within_pool:col_stop:pool_stride, :]
3979                if mx is None:
3980                    mx = cur
3981                else:
3982                    mx = T.maximum(mx, cur)
3983        return mx
3984
3985    def test_broadcasted_dims(self):
3986        # This test a case that caused a crash during optimization
3987        shp = (1, 1, 1, 1)
3988        rng = np.random.RandomState(utt.fetch_seed())
3989        a = shared(rng.rand(*shp).astype(config.floatX))
3990        out = self.max_pool_c01b(a, 1, 1, 1)
3991
3992        # max_pool_c01b use -inf and this will trigger DebugMode error.
3993        mode = copy.copy(theano.compile.get_default_mode())
3994        mode.check_isfinite = False
3995        f = theano.function([], out, mode=mode)
3996        f()
3997
3998    def test_constant_merge(self):
3999        # This test the error in gh-1122 that is a caused by the
4000        # combination of merge optimizer and ShapeFeature.
4001
4002        x = tensor.constant([0, 0])
4003        y = x[1:]
4004        x1 = x - tensor.join(0, y, y)
4005        x1.eval()
4006
4007    def test_local_track_shape_i(self):
4008        class IdentityNoShape(gof.Op):
4009            '''Op that does not infer the output shape from the input one'''
4010            def make_node(self, x):
4011                x = as_tensor_variable(x)
4012                return gof.Apply(self, [x], [x.type()])
4013
4014            def perform(self, node, inp, out_):
4015                x, = inp
4016                out, = out_
4017                out[0] = x.copy()
4018            # def infer_shape(self, node, (xshp,)):
4019                # return [tuple([self.shape_i(i)(r) for i in xrange(r.ndim)])]
4020        identity_noshape = IdentityNoShape()
4021
4022        class IdentityShape(gof.Op):
4023            '''Op that does infer the output shape from the input one'''
4024            def make_node(self, x):
4025                x = as_tensor_variable(x)
4026                return gof.Apply(self, [x], [x.type()])
4027
4028            def perform(self, node, inp, out_):
4029                x, = inp
4030                out, = out_
4031                out[0] = x.copy()
4032
4033            def infer_shape(self, node, xshp_):
4034                # Could also just return.
4035                xshp, = xshp_
4036                return (xshp,)
4037        identity_shape = IdentityShape()
4038
4039        @gof.local_optimizer([IdentityNoShape])
4040        def local_identity_noshape_to_identity_shape(node):
4041            '''Optimization transforming the first Op into the second'''
4042            if isinstance(node.op, IdentityNoShape):
4043                return [identity_shape(node.inputs[0])]
4044
4045        mode = theano.compile.get_default_mode().including(
4046            'ShapeOpt', 'specialize')
4047        rng = np.random.RandomState(utt.fetch_seed())
4048        x = T.tensor3('x')
4049        ins_x = identity_noshape(x)
4050
4051        # Without the optimization
4052        f = theano.function([x], ins_x.shape, mode=mode)
4053        xval = rng.randn(3, 4, 7).astype(config.floatX)
4054        assert np.all(f(xval) == [3, 4, 7])
4055        f_ops = [node.op for node in f.maker.fgraph.toposort()]
4056        assert len(f_ops) == 5
4057        assert identity_noshape in f_ops
4058        assert identity_shape not in f_ops
4059
4060        # Register the optimization
4061        opt.register_specialize(local_identity_noshape_to_identity_shape)
4062
4063        mode = theano.compile.get_default_mode().including(
4064            'ShapeOpt', 'specialize')
4065        # With the optimization
4066        # The identity_shape op should not be needed anymore to compute
4067        # the shape
4068        g = theano.function([x], ins_x.shape, mode=mode)
4069        xval = rng.randn(6, 1, 2).astype(config.floatX)
4070        assert np.all(g(xval) == [6, 1, 2])
4071        g_ops = [node.op for node in g.maker.fgraph.toposort()]
4072        assert len(g_ops) == 4
4073        assert identity_noshape not in g_ops
4074        assert identity_shape not in g_ops
4075
4076        # test multiple level of op without infer_shape
4077        ins_x3 = identity_noshape(identity_noshape(identity_noshape(x)))
4078        h = theano.function([x], ins_x3.shape, mode=mode)
4079        xval = rng.randn(6, 1, 2).astype(config.floatX)
4080        assert np.all(h(xval) == [6, 1, 2])
4081        h_ops = [node.op for node in h.maker.fgraph.toposort()]
4082        assert len(h_ops) == 4
4083        assert identity_noshape not in h_ops
4084        assert identity_shape not in h_ops
4085
4086    def test_no_shapeopt(self):
4087        # Test that a basic example works even when ShapeOpt is excluded
4088        X = T.matrix()
4089        expr = X.shape[0]
4090
4091        mode = theano.compile.get_default_mode().excluding('ShapeOpt')
4092        f = theano.function([X], expr, mode=mode)
4093        print(f([[1, 2], [2, 3]]))
4094
4095
4096class test_assert(utt.InferShapeTester):
4097
4098    def setUp(self):
4099        super(test_assert, self).setUp()
4100
4101    def test0(self):
4102        x = T.scalar()
4103        y = T.scalar()
4104        f = theano.function([x, y], theano.tensor.opt.assert_op(x, T.eq(x, y)))
4105        f(1, 1)
4106        self.assertRaises(AssertionError, f, 1, 0)
4107
4108    def test_local_remove_useless_assert1(self):
4109        # remove assert that are always true
4110        mode = theano.config.mode
4111        if mode == 'FAST_COMPILE':
4112            mode = 'FAST_RUN'
4113        mode = compile.mode.get_mode(mode)
4114
4115        x = T.scalar()
4116        f = theano.function([x], theano.tensor.opt.assert_op(x, 1), mode=mode)
4117        assert f(1) == 1
4118        assert f(5) == 5
4119        topo = f.maker.fgraph.toposort()
4120        assert len(topo) == 1
4121        assert topo[0].op == deep_copy_op
4122
4123    def test_test_local_remove_useless_assert2(self):
4124        # remove assert condition that are always true
4125        mode = theano.config.mode
4126        if mode == 'FAST_COMPILE':
4127            mode = 'FAST_RUN'
4128        mode = compile.mode.get_mode(mode)
4129
4130        x = T.scalar()
4131        y = T.scalar()
4132        f = theano.function([x, y], theano.tensor.opt.assert_op(x, y, 1),
4133                            mode=mode)
4134        assert f(1, 1) == 1
4135        assert f(5, 1) == 5
4136        topo = f.maker.fgraph.toposort()
4137        assert len(topo) == 2
4138        assert len(topo[0].inputs) == 2
4139        assert topo[1].op == deep_copy_op
4140
4141    def test_local_remove_useless_assert3(self):
4142        # don't remove assert condition that are always false
4143        mode = theano.config.mode
4144        if mode == 'FAST_COMPILE':
4145            mode = 'FAST_RUN'
4146        mode = compile.mode.get_mode(mode)
4147
4148        x = T.scalar()
4149        y = T.scalar()
4150        f = theano.function([x, y], theano.tensor.opt.assert_op(x, y, 0),
4151                            mode=mode)
4152        self.assertRaises(AssertionError, f, 1, 0)
4153        topo = f.maker.fgraph.toposort()
4154        assert len(topo) == 2
4155        assert len(topo[0].inputs) == 3
4156        assert topo[1].op == deep_copy_op
4157
4158    def test_local_remove_all_assert1(self):
4159        # remove assert condition that are unknown
4160        mode = theano.config.mode
4161        if mode == 'FAST_COMPILE':
4162            mode = 'FAST_RUN'
4163        mode = compile.mode.get_mode(mode).including('local_remove_all_assert')
4164
4165        x = T.scalar()
4166        y = T.scalar()
4167        f = theano.function([x, y], theano.tensor.opt.assert_op(x, y),
4168                            mode=mode)
4169        if isinstance(mode, theano.compile.debugmode.DebugMode):
4170            # DebugMode will run the original version with the Assert
4171            self.assertRaises(AssertionError, f, 1, 0)
4172        else:
4173            f(1, 0)  # Without opt, it should fail.
4174        topo = f.maker.fgraph.toposort()
4175        assert len(topo) == 1, topo
4176        assert topo[0].op == deep_copy_op, topo
4177
4178        mode = compile.mode.get_default_mode()
4179        a = theano.tensor.opt.assert_op(x, T.eq(x, 0).any())
4180        f = theano.function([x], a, mode=mode.excluding('unsafe'))
4181        topo = f.maker.fgraph.toposort()
4182        a_op = [n for n in topo if isinstance(n.op, T.opt.Assert)]
4183        assert len(a_op) == 1
4184
4185    def test_infer_shape(self):
4186
4187        adscal = dscalar()
4188        bdscal = dscalar()
4189        adscal_val = np.random.rand()
4190        bdscal_val = np.random.rand() + 1
4191        out = theano.tensor.opt.assert_op(adscal, bdscal)
4192        self._compile_and_check([adscal, bdscal], [out],
4193                                [adscal_val, bdscal_val], Assert)
4194
4195        admat = dmatrix()
4196        admat_val = np.random.rand(3, 4)
4197        adscal_val += 1
4198        out = theano.tensor.opt.assert_op(admat, adscal, bdscal)
4199        self._compile_and_check([admat, adscal, bdscal], [out],
4200                                [admat_val, adscal_val, bdscal_val], Assert)
4201
4202
4203def test_local_mul_specialize():
4204    mode = theano.config.mode
4205    if mode == 'FAST_COMPILE':
4206        mode = 'FAST_RUN'
4207    mode = compile.mode.get_mode(mode)
4208    mode = mode.excluding('fusion')
4209
4210    v = T.vector()
4211    m = T.vector()
4212
4213    f = function([v], v * 1, mode=mode)
4214    nodes = [node.op for node in f.maker.fgraph.toposort()]
4215    nodes == [deep_copy_op]
4216
4217    f = function([v], v * 0, mode=mode)
4218    nodes = [node.op for node in f.maker.fgraph.toposort()]
4219    assert nodes == [Shape_i(0), T.alloc]
4220
4221    f = function([v], v * (-1), mode=mode)
4222    nodes = [node.op for node in f.maker.fgraph.toposort()]
4223    assert nodes == [T.neg]
4224
4225    f = function([v, m], v * 1 * (-m), mode=mode)
4226    nodes = [node.op for node in f.maker.fgraph.toposort()]
4227    assert nodes == [T.mul]
4228
4229    f = function([v, m], v * 0 * (-m), mode=mode)
4230    nodes = [node.op for node in f.maker.fgraph.toposort()]
4231    assert nodes == [Shape_i(0), T.alloc]
4232
4233    f = function([v, m], v * (-1) * (-m), mode=mode)
4234    nodes = [node.op for node in f.maker.fgraph.toposort()]
4235    assert nodes == [T.mul]
4236
4237    f = function([v, m], v * (-1) * m, mode=mode)
4238    nodes = [node.op for node in f.maker.fgraph.toposort()]
4239    assert nodes == [T.mul]
4240
4241
4242class T_Tile(unittest.TestCase):
4243    def test_local_useless_tile(self):
4244        v = T.vector()
4245        m = T.matrix()
4246        mode = None
4247        if theano.config.mode == "FAST_COMPILE":
4248            mode = "FAST_RUN"
4249        for var, data in [(v, [1, 2, 3]), (m, [[1, 2], [3, 4]])]:
4250            # When len(repeat pattern) <= var.ndim, everything is removed
4251            # for ndim in range(1, var.ndim):
4252            for ndim in range(var.ndim + 1):
4253                f = theano.function([var], tile(var, (1,) * ndim), mode=mode)
4254                topo = f.maker.fgraph.toposort()
4255                assert len(topo) == 1
4256                assert isinstance(topo[0].op, compile.DeepCopyOp)
4257                f(data)
4258                # In this case the opt only removes nodes,
4259                # no need to check_stack_trace
4260            # When len(repeat pattern) > var.ndim, only a dimshuffle should be
4261            # left, but there can be a DeepCopy as well
4262            for ndim in range(var.ndim + 1, var.ndim + 3):
4263                f = theano.function([var], tile(var, (1,) * ndim), mode=mode)
4264                topo = f.maker.fgraph.toposort()
4265                assert len(topo) <= 2
4266                assert isinstance(topo[0].op, DimShuffle)
4267                assert check_stack_trace(f, ops_to_check=[DimShuffle])
4268                f(data)
4269
4270
4271def speed_local_pow_specialize_range():
4272    val = np.random.rand(1e7)
4273    v = T.vector()
4274    mode = compile.mode.get_default_mode()
4275    mode_without_pow_opt = mode.excluding('local_pow_specialize')
4276    for i in xrange(500, 513):
4277        f1 = function([v], v ** i, mode=mode)
4278        f2 = function([v], v ** i, mode=mode_without_pow_opt)
4279        assert len(f1.maker.fgraph.toposort()) == 1
4280        t1 = time.time()
4281        f1(val)
4282        t2 = time.time()
4283        f2(val)
4284        t3 = time.time()
4285        print(i, t2 - t1, t3 - t2, t2 - t1 < t3 - t2)
4286        if not t2 - t1 < t3 - t2:
4287            print("WARNING WE ARE SLOWER")
4288    for i in xrange(-3, -1500, -1):
4289        f1 = function([v], v ** i, mode=mode)
4290        f2 = function([v], v ** i, mode=mode_without_pow_opt)
4291        assert len(f1.maker.fgraph.toposort()) == 1
4292        t1 = time.time()
4293        f1(val)
4294        t2 = time.time()
4295        f2(val)
4296        t3 = time.time()
4297        print(i, t2 - t1, t3 - t2, t2 - t1 < t3 - t2)
4298        if not t2 - t1 < t3 - t2:
4299            print("WARNING WE ARE SLOWER")
4300
4301
4302def test_local_pow_specialize():
4303    mode = theano.config.mode
4304    if mode == 'FAST_COMPILE':
4305        mode = 'FAST_RUN'
4306    mode = compile.mode.get_mode(mode)
4307    mode = mode.excluding('fusion')
4308
4309    v = T.vector()
4310    val = np.arange(10, dtype=theano.config.floatX)
4311    val_no0 = np.arange(1, 10, dtype=theano.config.floatX)
4312
4313    f = function([v], v ** 0, mode=mode)
4314    nodes = [node.op for node in f.maker.fgraph.toposort()]
4315    assert nodes == [Shape_i(0), T.alloc]
4316    utt.assert_allclose(f(val), val ** 0)
4317
4318    f = function([v], v ** 1, mode=mode)
4319    nodes = [node.op for node in f.maker.fgraph.toposort()]
4320    nodes == [deep_copy_op]
4321    utt.assert_allclose(f(val), val ** 1)
4322
4323    f = function([v], v ** (-1), mode=mode)
4324    nodes = [node.op for node in f.maker.fgraph.toposort()]
4325    assert nodes == [T.inv]
4326    utt.assert_allclose(f(val_no0), val_no0 ** (-1))
4327
4328    f = function([v], v ** 2, mode=mode)
4329    nodes = [node.op for node in f.maker.fgraph.toposort()]
4330    assert nodes == [T.sqr]
4331    utt.assert_allclose(f(val), val ** 2)
4332
4333    f = function([v], v ** (-2), mode=mode)
4334    nodes = [node.op for node in f.maker.fgraph.toposort()]
4335    assert len(nodes) == 2
4336    assert nodes[0] == T.sqr
4337    assert isinstance(nodes[1].scalar_op, theano.scalar.basic.Inv)
4338#    assert nodes == [T.sqr,T.inv]#Why this don't work?
4339    utt.assert_allclose(f(val_no0), val_no0 ** (-2))
4340
4341    f = function([v], v ** (.5), mode=mode)
4342    nodes = [node.op for node in f.maker.fgraph.toposort()]
4343    assert nodes == [T.sqrt]
4344    utt.assert_allclose(f(val), val ** (.5))
4345
4346    f = function([v], v ** (-.5), mode=mode)
4347    nodes = [node.op for node in f.maker.fgraph.toposort()]
4348    assert len(nodes) == 2
4349    assert nodes[0] == T.sqrt
4350    assert isinstance(nodes[1].scalar_op, theano.scalar.basic.Inv)
4351#    assert nodes == [T.sqrt,T.inv]#Why this don't work?
4352    utt.assert_allclose(f(val_no0), val_no0 ** (-.5))
4353
4354
4355def test_local_pow_specialize_device_more_aggressive_on_cpu():
4356    mode = theano.config.mode
4357    if mode == 'FAST_COMPILE':
4358        mode = 'FAST_RUN'
4359    mode = compile.mode.get_mode(mode)
4360    mode = mode.excluding('fusion').excluding('gpu')
4361
4362    v = T.vector()
4363    val = np.arange(10, dtype=theano.config.floatX)
4364    val_no0 = np.arange(1, 10, dtype=theano.config.floatX)
4365    f = function([v], v ** (15), mode=mode)
4366    nodes = [node.op for node in f.maker.fgraph.toposort()]
4367    assert len(nodes) == 1
4368    assert len(f.maker.fgraph.toposort()[0].op.scalar_op.fgraph.apply_nodes) == 6
4369    assert isinstance(nodes[0].scalar_op, theano.scalar.Composite)
4370    utt.assert_allclose(f(val), val ** 15)
4371
4372    f = function([v], v ** (-15), mode=mode)
4373    nodes = [node.op for node in f.maker.fgraph.toposort()]
4374    assert len(nodes) == 2
4375    assert len(f.maker.fgraph.toposort()[0].op.scalar_op.fgraph.apply_nodes) == 6
4376    assert isinstance(nodes[0].scalar_op, theano.scalar.Composite)
4377    assert isinstance(nodes[-1].scalar_op, theano.scalar.basic.Inv)
4378    utt.assert_allclose(f(val_no0), val_no0 ** (-15))
4379
4380    f = function([v], v ** (16), mode=mode)
4381    nodes = [node.op for node in f.maker.fgraph.toposort()]
4382    assert len(nodes) == 1
4383    assert len(f.maker.fgraph.toposort()[0].op.scalar_op.fgraph.apply_nodes) == 4
4384    assert isinstance(nodes[0].scalar_op, theano.scalar.Composite)
4385    utt.assert_allclose(f(val), val ** 16)
4386
4387    f = function([v], v ** (-16), mode=mode)
4388    nodes = [node.op for node in f.maker.fgraph.toposort()]
4389    assert len(nodes) == 2
4390    assert len(f.maker.fgraph.toposort()[0].op.scalar_op.fgraph.apply_nodes) == 4
4391    assert isinstance(nodes[0].scalar_op, theano.scalar.Composite)
4392    assert isinstance(nodes[-1].scalar_op, theano.scalar.basic.Inv)
4393    utt.assert_allclose(f(val_no0), val_no0 ** (-16))
4394
4395
4396class T_Rebroadcast(unittest.TestCase):
4397    def test_local_useless_rebroadcast(self):
4398        mode = theano.compile.get_default_mode().including('canonicalize')
4399        v1 = T.vector()
4400        v2 = T.vector()
4401        j = T.join(0, v1, v2)
4402        f = theano.function([v1, v2], j, mode=mode)
4403        f([1, 2], [3, 4, 5])
4404        e = f.maker.fgraph.toposort()
4405        assert len([n for n in e if isinstance(n.op, T.Rebroadcast)]) == 0
4406
4407        assert check_stack_trace(f, ops_to_check='all')
4408
4409    def test_rebroadcast_rebroadcast(self):
4410        mode = theano.compile.get_default_mode().including('canonicalize')
4411        m = T.matrix()
4412        s = T.addbroadcast(m, 0, 1)
4413        v = T.unbroadcast(s, 1)
4414        f = theano.function([m], v, mode=mode)
4415        f([[76]])
4416        e = f.maker.fgraph.toposort()
4417        rebroadcast_nodes = [n for n in e if isinstance(n.op, T.Rebroadcast)]
4418        assert len(rebroadcast_nodes) == 1
4419        assert rebroadcast_nodes[0].op.axis == {0: True}
4420
4421
4422class T_useless_elemwise(unittest.TestCase):
4423    def setUp(self):
4424        self.mode = theano.compile.get_default_mode().including(
4425            'canonicalize', 'local_fill_to_alloc')
4426
4427    def test_eq(self):
4428        x = T.dmatrix()
4429        y = T.dmatrix()
4430        f = theano.function([x, y], T.eq(x, y), mode=self.mode)
4431        vx = np.random.rand(5, 4)
4432        vy = np.random.rand(5, 4)
4433        f(vx, vy)
4434        topo = f.maker.fgraph.toposort()
4435        assert len(topo) == 1
4436        assert isinstance(topo[0].op, T.Elemwise)
4437        assert isinstance(topo[0].op.scalar_op, theano.scalar.EQ)
4438        f2 = theano.function([x], T.eq(x, x), mode=self.mode)
4439        assert np.all(f2(vx) == np.ones((5, 4)))
4440        topo2 = f2.maker.fgraph.toposort()
4441        # Shape_i{1}(<TensorType(float64, matrix)>), Shape_i{0}(<TensorType(float64, matrix)>), Alloc([[1]], Shape_i{0}.0, Shape_i{1}.0
4442        assert len(topo2) == 3
4443        assert isinstance(topo2[-1].op, T.Alloc)
4444
4445    def test_neq(self):
4446        x = T.dmatrix()
4447        y = T.dmatrix()
4448        f = theano.function([x, y], T.neq(x, y), mode=self.mode)
4449        vx = np.random.rand(5, 4)
4450        vy = np.random.rand(5, 4)
4451        f(vx, vy)
4452        topo = f.maker.fgraph.toposort()
4453        assert len(topo) == 1
4454        assert isinstance(topo[0].op, T.Elemwise)
4455        assert isinstance(topo[0].op.scalar_op, theano.scalar.NEQ)
4456        f2 = theano.function([x], T.neq(x, x), mode=self.mode)
4457        assert np.all(f2(vx) == np.zeros((5, 4)))
4458        topo2 = f2.maker.fgraph.toposort()
4459        assert len(topo2) == 3
4460        assert isinstance(topo2[-1].op, T.Alloc)
4461
4462    def test_mul(self):
4463        x = T.dmatrix()
4464        y = T.dmatrix()
4465        f = theano.function([x], T.mul(x), mode=self.mode)
4466        vx = np.random.rand(5, 4)
4467        vy = np.random.rand(5, 4)
4468        f(vx)
4469        topo = f.maker.fgraph.toposort()
4470        assert len(topo) == 1
4471        assert topo[0].op == deep_copy_op
4472        f2 = theano.function([x, y], T.mul(x, y), mode=self.mode)
4473        assert np.all(f2(vx, vy) == vx * vy)
4474        topo2 = f2.maker.fgraph.toposort()
4475        assert len(topo2) == 1
4476        assert isinstance(topo2[0].op, T.Elemwise)
4477        assert isinstance(topo2[0].op.scalar_op, theano.scalar.Mul)
4478
4479    def test_add(self):
4480        x = T.dmatrix()
4481        y = T.dmatrix()
4482        f = theano.function([x], T.add(x), mode=self.mode)
4483        vx = np.random.rand(5, 4)
4484        vy = np.random.rand(5, 4)
4485        f(vx)
4486        topo = f.maker.fgraph.toposort()
4487        assert len(topo) == 1
4488        assert topo[0].op == deep_copy_op
4489        f2 = theano.function([x, y], T.add(x, y), mode=self.mode)
4490        assert np.all(f2(vx, vy) == vx + vy)
4491        topo2 = f2.maker.fgraph.toposort()
4492        assert len(topo2) == 1
4493        assert isinstance(topo2[0].op, T.Elemwise)
4494        assert isinstance(topo2[0].op.scalar_op, theano.scalar.Add)
4495
4496    def test_identity(self):
4497        # scalar.identity is used in 2 Elemwise functions:
4498        # tensor_copy, and view
4499        x = T.matrix()
4500        f = theano.function([x], T.tensor_copy(x), mode=self.mode)
4501        vx = np.random.rand(5, 4).astype(config.floatX)
4502        f(vx)
4503        topo = f.maker.fgraph.toposort()
4504        assert len(topo) == 1
4505        assert topo[0].op == deep_copy_op
4506
4507
4508class T_cast_cast(unittest.TestCase):
4509    def setUp(self):
4510        mode = theano.compile.get_default_mode()
4511        self.mode = mode.including('local_cast_cast')
4512
4513    def test_consecutive(self):
4514        x = T.fmatrix()
4515        o = T.Elemwise(scal.Cast(scal.Scalar("float64")))(x.astype("float64"))
4516        f = theano.function([x], o, mode=self.mode)
4517        dx = np.random.rand(5, 4).astype("float32")
4518        f(dx)
4519        topo = f.maker.fgraph.toposort()
4520        assert len(topo) == 1
4521        assert isinstance(topo[0].op.scalar_op, scal.basic.Cast)
4522
4523        x = T.dmatrix()
4524        o = T.Elemwise(scal.Cast(scal.Scalar("float32")))(x.astype("float32"))
4525        f = theano.function([x], o, mode=self.mode)
4526        dx = np.random.rand(5, 4)
4527        f(dx)
4528        topo = f.maker.fgraph.toposort()
4529        assert len(topo) == 1
4530        assert isinstance(topo[0].op.scalar_op, scal.basic.Cast)
4531
4532    def test_upcast(self):
4533        # Upcast followed by any other cast
4534        x = T.fmatrix()
4535        o = T.Elemwise(scal.Cast(scal.Scalar("complex128")))(x.astype("complex64"))
4536        f = theano.function([x], o, mode=self.mode)
4537        dx = np.random.rand(5, 4).astype("float32")
4538        f(dx)
4539        topo = f.maker.fgraph.toposort()
4540        assert len(topo) == 1
4541        assert isinstance(topo[0].op.scalar_op, scal.basic.Cast)
4542
4543        # Upcast followed by a downcast back to the base type
4544        x = T.fmatrix()
4545        o = T.Elemwise(scal.Cast(scal.Scalar("float32")))(x.astype("float64"))
4546        f = theano.function([x], o, mode=self.mode)
4547        dx = np.random.rand(5, 4).astype('float32')
4548        f(dx)
4549        topo = f.maker.fgraph.toposort()
4550        assert len(topo) == 1
4551        assert isinstance(topo[0].op, DeepCopyOp)
4552
4553        # Downcast followed by an upcast back to the base type
4554        # Optimization shouldn't be applied
4555        x = T.dmatrix()
4556        o = T.Elemwise(scal.Cast(scal.Scalar("float64")))(x.astype("float32"))
4557        f = theano.function([x], o, mode=self.mode)
4558        dx = np.random.rand(5, 4)
4559        f(dx)
4560        topo = f.maker.fgraph.toposort()
4561        assert (len(topo) == 1 and isinstance(topo[0].op.scalar_op, scal.basic.Composite)) or (len(topo) > 1)
4562
4563
4564class T_func_inverse(unittest.TestCase):
4565
4566    def setUp(self):
4567        mode = theano.compile.get_default_mode()
4568        self.mode = mode.including('local_func_inv')
4569
4570    def assert_func_pair_optimized(self, func1, func2, data,
4571                                   should_copy=True, is_complex=False):
4572        # Check that a pair of funcs is optimized properly
4573
4574        x = T.cmatrix() if is_complex else T.fmatrix()
4575        o = func2(func1(x))
4576        f = theano.function([x], o, mode=self.mode)
4577        delta = f(data) - data
4578        topo = f.maker.fgraph.toposort()
4579
4580        if should_copy:
4581            acceptable_topo_lens = [1]
4582        else:
4583            # The 2 funcs can be split apart if they are not inverses
4584            acceptable_topo_lens = [1, 2]
4585
4586        if should_copy:
4587            delta_condition = np.all(delta == 0)
4588        else:
4589            delta_condition = np.all(delta != 0)
4590
4591        self.assertTrue(len(topo) in acceptable_topo_lens)
4592        self.assertTrue(delta_condition)
4593        self.assertEqual(isinstance(topo[0].op, DeepCopyOp), should_copy,
4594                         "Inverse functions not removed!")
4595
4596    def test(self):
4597        # test optimization for consecutive functional inverses
4598
4599        dx = np.random.rand(5, 4).astype("float32")
4600        self.assert_func_pair_optimized(T.deg2rad, T.rad2deg, dx)
4601        dx = np.random.rand(5, 4).astype("float32") * 180
4602        self.assert_func_pair_optimized(T.rad2deg, T.deg2rad, dx)
4603
4604        # Test the other functional inverses
4605        dx = np.random.rand(5, 4).astype("float32")
4606        self.assert_func_pair_optimized(T.cosh, T.arccosh, dx)
4607        self.assert_func_pair_optimized(T.arcsinh, T.sinh, dx)
4608        self.assert_func_pair_optimized(T.arctanh, T.tanh, dx)
4609        self.assert_func_pair_optimized(T.inv, T.inv, dx)
4610        self.assert_func_pair_optimized(T.neg, T.neg, dx)
4611        cx = dx + complex(0, 1) * (dx + 0.01)
4612        self.assert_func_pair_optimized(T.conj, T.conj, cx, is_complex=True)
4613
4614        # Test that non-inverse functions are ran normally
4615        self.assert_func_pair_optimized(T.conj, T.neg, cx,
4616                                        should_copy=False, is_complex=True)
4617        dx = np.random.rand(5, 4).astype("float32") + 0.01
4618        self.assert_func_pair_optimized(T.rad2deg, T.rad2deg, dx,
4619                                        should_copy=False)
4620        self.assert_func_pair_optimized(T.rad2deg, T.cosh, dx,
4621                                        should_copy=False)
4622
4623
4624def test_constant_folding():
4625    # Test that constant folding get registered at fast_compile
4626    # An error removed that registration during the registration.
4627
4628    x = tensor.dvector()
4629    mode = theano.compile.get_mode("FAST_COMPILE").excluding("fusion")
4630    f = theano.function([x], [x * 2, x + x], mode=mode)
4631    topo = f.maker.fgraph.toposort()
4632    assert len(topo) == 2
4633
4634    # Test that we do not crash when constant folding elemwise scalar
4635    # as they should not generate c code.
4636
4637    x = tensor.constant(3)
4638    assert x.ndim == 0
4639    mode = theano.compile.get_mode("FAST_COMPILE").excluding("fusion")
4640    f = theano.function([], [x * 2, x + x], mode=mode)
4641    topo = f.maker.fgraph.toposort()
4642    assert len(topo) == 2
4643    assert all([isinstance(n.op, DeepCopyOp) for n in topo])
4644
4645
4646def test_constant_get_stabilized():
4647    # Currently Theano enable the constant_folding optimization before stabilization optimization.
4648    # This cause some stabilization optimization not being implemented and thus cause inf value to appear
4649    # when it should not.
4650    #
4651    # .. note: we can't simply move the constant_folding optimization to specialize as this break other optimization!
4652    # We will need to partially duplicate some canonicalize optimzation to specialize to fix this issue.
4653
4654    x2 = T.scalar()
4655    y2 = T.log(1 + T.exp(x2))
4656    mode = theano.compile.get_default_mode()
4657    mode.check_isfinite = False
4658    f2 = theano.function([x2], y2, mode=mode)
4659    try:
4660        assert len(f2.maker.fgraph.toposort()) == 1
4661        assert (f2.maker.fgraph.toposort()[0].op ==
4662                theano.tensor.nnet.sigm.softplus)
4663        assert f2(800) == 800
4664
4665        x = T.as_tensor_variable(800)
4666        y = T.log(1 + T.exp(x))
4667        f = theano.function([], y, mode=mode)
4668        assert len(f.maker.fgraph.toposort()) == 0
4669        assert np.isinf(f())
4670
4671        # When this error is fixed, the following line should be ok.
4672        assert f() == 800, f()
4673
4674    except AssertionError:
4675        raise SkipTest('Theano optimizes constant before stabilization. '
4676                       'This breaks stabilization optimization in some '
4677                       'cases. See #504.')
4678
4679
4680class T_local_switch_sink(unittest.TestCase):
4681    def setUp(self):
4682        # condition values
4683        self.condm = np.asarray([[0.1, 0, 1, -1],
4684                                 [0., 0., 0., 0.],
4685                                 [1, 1, 1, 1]])
4686        self.condv = np.asarray([0.1, 0, 1, -1])
4687        self.conds = [0.1, 0, 1, -1]
4688
4689        # x values
4690        self.xm = np.ones((3, 4))
4691        self.xv = np.ones((4,))
4692        self.xs = 1.
4693
4694        # expected results
4695        self.resm = (
4696            [np.asarray([[1, 0, 1, 0], [0, 0, 0, 0], [1, 1, 1, 1]])] * 3 +
4697            [np.asarray([[1, 0, 1, 0], [1, 0, 1, 0], [1, 0, 1, 0]])] +
4698            2 * [np.asarray([[1, 0, 1, 0]])] +
4699            [[np.ones((3, 4)), np.zeros((3, 4)), np.ones((3, 4)), np.zeros((3, 4))]] +
4700            [[np.ones((4,)), np.zeros((4,)), np.ones((4,)), np.zeros((4,))]] +
4701            [[np.asarray(1.0), np.asarray(0.0), np.asarray(1.0), np.asarray(0.0)]])
4702
4703        self.mode = theano.compile.mode.get_default_mode().including(
4704            'canonicalize', 'fast_run').excluding('gpu', 'fusion')
4705        self.mode = copy.copy(self.mode)
4706        self.mode.check_isfinite = False
4707
4708    def function_remove_nan(self, *args, **kwargs):
4709        """
4710        Wrapper around theano.function for this test.
4711
4712        It disables checking for NaN removed by optimizations in DebugMode
4713        (it has false positives in that case).
4714        """
4715        f = theano.function(*args, **kwargs)
4716
4717        def wrapped_f(*args, **kwargs):
4718            # This is a bit ugly since it changes the global value of
4719            # TensorType.values_eq_approx.
4720            old_values_eq_approx = staticmethod(TensorType.values_eq_approx)
4721            TensorType.values_eq_approx = staticmethod(values_eq_approx_remove_nan)
4722            try:
4723                out = f(*args, **kwargs)
4724            finally:
4725                TensorType.values_eq_approx = old_values_eq_approx
4726            return out
4727
4728        return wrapped_f
4729
4730    def test_local_mul_switch_sink(self):
4731        c = T.dscalar()
4732        idx = 0
4733        for condition in [(T.dmatrix('cond'), self.condm),
4734                          (T.dvector('cond'), self.condv),
4735                          (T.dscalar('cond'), self.conds)]:
4736            for x in [(T.dmatrix('x'), self.xm), (T.dvector('x'), self.xv),
4737                      (T.dscalar('x'), self.xs)]:
4738                y = T.mul(T.switch(condition[0] > 0, 1. * x[0], 0. * x[0]),
4739                          T.switch(condition[0] > 0,
4740                                   1. * x[0], T.log(c) * x[0]))
4741                f = self.function_remove_nan([condition[0], x[0], c],
4742                                             [y], mode=self.mode)
4743                if type(condition[1]) is list:
4744                    for i in xrange(len(condition[1])):
4745                        res = f(condition[1][i], x[1], -1)
4746                        assert (res == np.asarray(
4747                            self.resm[idx][i])).sum() == self.resm[idx][i].size
4748                else:
4749                    res = f(condition[1], x[1], -1)
4750                    assert ((res == np.asarray(self.resm[idx])).sum() ==
4751                            self.resm[idx].size)
4752                idx += 1
4753
4754        # This case caused a missed optimization in the past.
4755        x = T.dscalar('x')
4756        y = T.switch(x < 7, x, T.sqrt(x - 7))
4757        f = self.function_remove_nan([x], T.grad(y, x), self.mode)
4758        assert f(5) == 1, f(5)
4759
4760    @attr('slow')
4761    def test_local_div_switch_sink(self):
4762        c = T.dscalar()
4763        idx = 0
4764        for condition in [(T.dmatrix('cond'), self.condm), (T.dvector('cond'), self.condv), (T.dscalar('cond'), self.conds)]:
4765            for x in [(T.dmatrix('x'), self.xm), (T.dvector('x'), self.xv), (T.dscalar('x'), self.xs)]:
4766                y = T.true_div(
4767                    T.switch(condition[0] > 0, 1. * x[0], 0. * x[0]),
4768                    T.switch(condition[0] > 0, 1. * x[0], T.log(c) * x[0]))
4769                f = self.function_remove_nan([condition[0], x[0], c],
4770                                             [y], mode=self.mode)
4771                if type(condition[1]) is list:
4772                    for i in xrange(len(condition[1])):
4773                        res = f(condition[1][i], x[1], -1)
4774                        assert ((res == np.asarray(self.resm[idx][i])).sum() ==
4775                                self.resm[idx][i].size)
4776                else:
4777                    res = f(condition[1], x[1], -1)
4778                    assert ((res == np.asarray(self.resm[idx])).sum() ==
4779                            self.resm[idx].size)
4780                idx += 1
4781
4782
4783class T_local_erf(unittest.TestCase):
4784    def setUp(self):
4785        self.mode = theano.compile.mode.get_default_mode().including(
4786            'canonicalize', 'fast_run').excluding('gpu', 'fusion')
4787        self.mode._optimizer.position_cutoff = 1.50001
4788        if theano.config.cxx == '' and not theano.scalar.basic_scipy.imported_scipy_special:
4789            raise SkipTest("erf need a c++ compiler or scipy")
4790
4791    def test_local_one_plus_erf(self):
4792        val = np.asarray([-30, -3, -2, -1, 0, 1, 2, 3, 30],
4793                         dtype=config.floatX)
4794        x = T.vector()
4795
4796        f = theano.function([x], 1 + T.erf(x), mode=self.mode)
4797        assert [n.op for n in f.maker.fgraph.toposort()] == [
4798            T.mul, T.erfc], f.maker.fgraph.toposort()
4799        f(val)
4800
4801        f = theano.function([x], T.erf(x) + 1, mode=self.mode)
4802        assert [n.op for n in f.maker.fgraph.toposort()] == [
4803            T.mul, T.erfc], f.maker.fgraph.toposort()
4804        f(val)
4805
4806        f = theano.function([x], T.erf(x) + 2, mode=self.mode)
4807        topo = f.maker.fgraph.toposort()
4808        assert len(topo) == 2
4809        assert topo[0].op == T.erf
4810        assert isinstance(topo[1].op, T.Elemwise)
4811        assert isinstance(topo[1].op.scalar_op, scal.Add)
4812        f(val)
4813
4814    def test_local_one_minus_erf(self):
4815        val = np.asarray([-30, -3, -2, -1, 0, 1, 2, 3, 30],
4816                         dtype=config.floatX)
4817        x = T.vector()
4818
4819        f = theano.function([x], 1 - T.erf(x), mode=self.mode)
4820        assert [n.op for n in f.maker.fgraph.toposort()] == [T.erfc],\
4821            f.maker.fgraph.toposort()
4822        print(f(val))
4823
4824        f = theano.function([x], 1 + (-T.erf(x)), mode=self.mode)
4825        assert [n.op for n in f.maker.fgraph.toposort()] == [T.erfc],\
4826            f.maker.fgraph.toposort()
4827        print(f(val))
4828
4829        f = theano.function([x], (-T.erf(x)) + 1, mode=self.mode)
4830        assert [n.op for n in f.maker.fgraph.toposort()] == [T.erfc],\
4831            f.maker.fgraph.toposort()
4832        print(f(val))
4833
4834        f = theano.function([x], 2 - T.erf(x), mode=self.mode)
4835        topo = f.maker.fgraph.toposort()
4836        assert len(topo) == 2, f.maker.fgraph.toposort()
4837        assert topo[0].op == T.erf, f.maker.fgraph.toposort()
4838        assert isinstance(topo[1].op, T.Elemwise), f.maker.fgraph.toposort()
4839        assert isinstance(topo[1].op.scalar_op, scal.Add)\
4840            or isinstance(topo[1].op.scalar_op, scal.Sub), f.maker.fgraph.toposort()
4841        print(f(val))
4842
4843    def test_local_erf_minus_one(self):
4844        val = np.asarray([-30, -3, -2, -1, 0, 1, 2, 3, 30],
4845                         dtype=config.floatX)
4846        x = T.vector()
4847
4848        f = theano.function([x], T.erf(x) - 1, mode=self.mode)
4849        assert [n.op for n in f.maker.fgraph.toposort()] == [T.erfc, T.mul]
4850        print(f(val))
4851
4852        f = theano.function([x], T.erf(x) + (-1), mode=self.mode)
4853        assert [n.op for n in f.maker.fgraph.toposort()] == [T.erfc, T.mul]
4854        print(f(val))
4855
4856        f = theano.function([x], -1 + T.erf(x), mode=self.mode)
4857        assert [n.op for n in f.maker.fgraph.toposort()] == [T.erfc, T.mul]
4858        print(f(val))
4859
4860        f = theano.function([x], T.erf(x) - 2, mode=self.mode)
4861        topo = f.maker.fgraph.toposort()
4862        assert len(topo) == 2
4863        assert topo[0].op == T.erf
4864        assert isinstance(topo[1].op, T.Elemwise)
4865        assert isinstance(topo[1].op.scalar_op, scal.Add)\
4866            or isinstance(topo[1].op.scalar_op, scal.Sub)
4867        print(f(val))
4868
4869
4870class T_local_erfc(unittest.TestCase):
4871    def setUp(self):
4872        self.mode_fusion = theano.compile.mode.get_default_mode().including(
4873            'canonicalize').including('fast_run').excluding('gpu')
4874        self.mode = self.mode_fusion.excluding('fusion')
4875        self.mode._optimizer.position_cutoff = 1.50001
4876        if (theano.config.cxx == '' and
4877                not theano.scalar.basic_scipy.imported_scipy_special):
4878            raise SkipTest("erfc need a c++ compiler or scipy")
4879
4880    def test_local_one_minus_erfc(self):
4881        # test opt: 1-erfc(x) => erf(x) and -erfc(x)+1 => erf(x)
4882
4883        val = np.asarray([-30, -3, -2, -1, 0, 1, 2, 3, 30],
4884                         dtype=config.floatX)
4885        x = T.vector('x')
4886
4887        f = theano.function([x], 1 - T.erfc(x), mode=self.mode)
4888        assert [n.op for n in f.maker.fgraph.toposort()] == [T.erf],\
4889            f.maker.fgraph.toposort()
4890        print(f(val))
4891
4892        f = theano.function([x], (-T.erfc(x)) + 1, mode=self.mode)
4893        assert [n.op for n in f.maker.fgraph.toposort()] == [T.erf],\
4894            f.maker.fgraph.toposort()
4895        print(f(val))
4896
4897        f = theano.function([x], 2 - T.erfc(x), mode=self.mode)
4898        topo = f.maker.fgraph.toposort()
4899        assert len(topo) == 2, f.maker.fgraph.toposort()
4900        assert topo[0].op == T.erfc, f.maker.fgraph.toposort()
4901        assert isinstance(topo[1].op, T.Elemwise), f.maker.fgraph.toposort()
4902        assert isinstance(topo[1].op.scalar_op, scal.Sub),\
4903            f.maker.fgraph.toposort()
4904        print(f(val))
4905
4906    def test_local_erf_neg_minus_one(self):
4907        # test opt: (-1)+erfc(-x)=>erf(x)
4908
4909        val = np.asarray([-30, -3, -2, -1, 0, 1, 2, 3, 30],
4910                         dtype=config.floatX)
4911        x = T.vector('x')
4912
4913        f = theano.function([x], -1 + T.erfc(-x), mode=self.mode)
4914        assert [n.op for n in f.maker.fgraph.toposort()] == [T.erf],\
4915            f.maker.fgraph.toposort()
4916        print(f(val))
4917
4918        f = theano.function([x], T.erfc(-x) - 1, mode=self.mode)
4919        assert [n.op for n in f.maker.fgraph.toposort()] == [T.erf],\
4920            f.maker.fgraph.toposort()
4921        print(f(val))
4922
4923        f = theano.function([x], T.erfc(-x) + (-1), mode=self.mode)
4924        assert [n.op for n in f.maker.fgraph.toposort()] == [T.erf],\
4925            f.maker.fgraph.toposort()
4926        print(f(val))
4927
4928    def test_local_log_erfc(self):
4929        val = [-30, -27, -26, -11, -10, -3, -2, -1, 0, 1, 2, 3, 10,
4930               11, 26, 27, 28, 30]
4931        if theano.config.mode in ["DebugMode", "DEBUG_MODE", "FAST_COMPILE"]:
4932            # python mode don't like the inv(0)
4933            val.remove(0)
4934        val = np.asarray(val, dtype=config.floatX)
4935        x = T.vector('x')
4936
4937        # their is some nan that will happear in the graph for the log of the negatives values
4938        mode = copy.copy(self.mode)
4939        mode.check_isfinite = False
4940        mode_fusion = copy.copy(self.mode_fusion)
4941        mode_fusion.check_isfinite = False
4942
4943        f = theano.function([x], T.log(T.erfc(x)), mode=mode)
4944        assert len(f.maker.fgraph.apply_nodes) == 23, len(f.maker.fgraph.apply_nodes)
4945        assert f.maker.fgraph.outputs[0].dtype == theano.config.floatX
4946        assert all(np.isfinite(f(val)))
4947
4948        f = theano.function([x], T.log(T.erfc(-x)), mode=mode)
4949        assert len(f.maker.fgraph.apply_nodes) == 24, len(f.maker.fgraph.apply_nodes)
4950        assert f.maker.fgraph.outputs[0].dtype == theano.config.floatX
4951        assert all(np.isfinite(f(-val)))
4952
4953        f = theano.function([x], T.log(T.erfc(x)), mode=mode_fusion)
4954        assert len(f.maker.fgraph.apply_nodes) == 1, len(f.maker.fgraph.apply_nodes)
4955        assert f.maker.fgraph.outputs[0].dtype == theano.config.floatX
4956        assert len(f.maker.fgraph.toposort()[0].fgraph.toposort()[
4957            0].op.scalar_op.fgraph.apply_nodes) == 22, len(f.maker.fgraph.toposort()[0].fgraph.toposort()[0].op.scalar_op.fgraph.apply_nodes)
4958        # TODO: fix this problem
4959        if theano.config.floatX == "float32" and theano.config.mode in ["DebugMode", "DEBUG_MODE"]:
4960            raise SkipTest('The python code upcast somewhere internally '
4961                           'some value of float32 to python float for '
4962                           'part of its computation. That make that the '
4963                           'c and python code don\'t generate the same value. '
4964                           'You can ignore this error.')
4965        assert all(np.isfinite(f(val)))
4966
4967    def test_local_grad_log_erfc_neg(self):
4968        val = [-100, -30, -27, -26.4, -26.2, -26, -11, -10, -9, -3, -2, -1, 0,
4969               1, 2, 3, 9, 10, 11, 27, 26.4, 26.2, 26, 28, 30, 100]
4970        if theano.config.mode in ["DebugMode", "DEBUG_MODE", "FAST_COMPILE"]:
4971            # python mode don't like the inv(0) in computation,
4972            # but the switch don't select this value.
4973            # So it is computed for no good reason.
4974            val.remove(0)
4975        if theano.config.mode in ["DebugMode", "DEBUG_MODE"] and theano.config.floatX == 'float32':
4976            # In float32 their is a plage of values close to 10 that we stabilize as it give bigger error then the stabilized version.
4977            # The orig value in float32 -30.0, the stab value -20.1 the orig value in float64 -18.1.
4978            val.remove(10)
4979        val = np.asarray(val, dtype=config.floatX)
4980        x = T.vector('x')
4981        y = T.vector('y')
4982
4983        # their is some nan that will happear in the graph for the log of the negatives values
4984        mode = copy.copy(self.mode)
4985        mode.check_isfinite = False
4986        mode_fusion = copy.copy(self.mode_fusion)
4987        mode_fusion.check_isfinite = False
4988
4989        f = theano.function([x], T.grad(T.log(T.erfc(x)).sum(), x), mode=mode)
4990
4991        assert len(f.maker.fgraph.apply_nodes) == 23, len(f.maker.fgraph.apply_nodes)
4992        assert all(np.isfinite(f(val)))
4993        assert f.maker.fgraph.outputs[0].dtype == theano.config.floatX
4994
4995        # test with a different mul constant
4996        f = theano.function(
4997            [x],
4998            T.mul(T.exp(T.neg(T.sqr(x))), - 10.12837917) / T.erfc(x),
4999            mode=mode)
5000        assert len(f.maker.fgraph.apply_nodes) == 23, len(f.maker.fgraph.apply_nodes)
5001        assert f.maker.fgraph.outputs[0].dtype == theano.config.floatX
5002        assert all(np.isfinite(f(val)))
5003
5004        # test that we work without the mul
5005        f = theano.function([x], T.exp(T.neg(T.sqr(x))) / T.erfc(x), mode=mode)
5006        assert len(f.maker.fgraph.apply_nodes) == 22, len(f.maker.fgraph.apply_nodes)
5007        assert f.maker.fgraph.outputs[0].dtype == theano.config.floatX
5008        assert all(np.isfinite(f(val)))
5009
5010        # test that we don't work if x!=y
5011        f = theano.function([x, y], T.exp(T.neg(T.sqr(x))) / T.erfc(
5012            y), mode=mode)
5013        assert len(f.maker.fgraph.apply_nodes) == 5, len(f.maker.fgraph.apply_nodes)
5014        assert f.maker.fgraph.outputs[0].dtype == theano.config.floatX
5015        f(val, val - 3)
5016
5017        # test that we work without the sqr and neg
5018        f = theano.function([x], T.exp(T.mul(-1, x, x)) / T.erfc(x), mode=mode)
5019        assert len(f.maker.fgraph.apply_nodes) == 21, len(f.maker.fgraph.apply_nodes)
5020        assert f.maker.fgraph.outputs[0].dtype == theano.config.floatX
5021        assert all(np.isfinite(f(val)))
5022
5023        # test that it work correctly if x is x*2 in the graph.
5024        f = theano.function([x], T.grad(T.log(T.erfc(2 * x)).sum(), x), mode=mode)
5025        assert len(f.maker.fgraph.apply_nodes) == 23, len(f.maker.fgraph.apply_nodes)
5026        assert np.isfinite(f(val)).all()
5027        assert f.maker.fgraph.outputs[0].dtype == theano.config.floatX
5028
5029        f = theano.function([x], T.grad(T.log(T.erfc(x)).sum(), x), mode=mode_fusion)
5030        assert len(f.maker.fgraph.apply_nodes) == 1, len(f.maker.fgraph.apply_nodes)
5031        assert f.maker.fgraph.outputs[0].dtype == theano.config.floatX
5032
5033        # TODO: fix this problem
5034        if theano.config.floatX == "float32" and theano.config.mode in ["DebugMode", "DEBUG_MODE"]:
5035            # The python code upcast somewhere internally some value of float32
5036            # to python float for part of its computation. That make that the c
5037            # and python code do not generate the same value. You can ignore
5038            # this error. This happen in an intermediate step that don't show
5039            # in the final result.
5040
5041            # Showing this test error is a duplicate of the one in test_local_log_erfc. We hide it.
5042            pass
5043        else:
5044            assert all(np.isfinite(f(val)))
5045
5046    def speed_local_log_erfc(self):
5047
5048        val = np.random.rand(1e6)
5049        x = T.vector()
5050        mode = theano.compile.mode.get_mode("FAST_RUN")
5051        f1 = theano.function([x], T.log(T.erfc(x)),
5052                             mode=mode.excluding("local_log_erfc"))
5053        f2 = theano.function([x], T.log(T.erfc(x)), mode=mode)
5054        print(f1.maker.fgraph.toposort())
5055        print(f2.maker.fgraph.toposort())
5056        t0 = time.time()
5057        f1(val)
5058        t1 = time.time()
5059        f2(val)
5060        t2 = time.time()
5061        print(t1 - t0, t2 - t1)
5062
5063
5064class test_local_useless_switch(unittest.TestCase):
5065    def setUp(self):
5066        self.mode = mode_opt.excluding('constant_folding')
5067
5068    def test_const0(self):
5069
5070        for dtype1 in ['int32', 'int64']:
5071            for dtype2 in ['int32', 'int64']:
5072                x = theano.tensor.matrix('x', dtype=dtype1)
5073                y = theano.tensor.matrix('y', dtype=dtype2)
5074                z = theano.tensor.switch(0, x, y)
5075                f = theano.function([x, y], z, mode=self.mode)
5076                assert len([node.op for node in f.maker.fgraph.toposort() if
5077                            (isinstance(node.op, theano.tensor.Elemwise) and
5078                             isinstance(node.op.scalar_op,
5079                                        theano.scalar.basic.Switch))]) == 0
5080                vx = np.array([[1, 2, 3], [4, 5, 6]], dtype=dtype1)
5081                vy = np.array([[7, 8, 9], [10, 11, 12]], dtype=dtype2)
5082                assert np.all(f(vx, vy) == vy)
5083
5084    def test_const1(self):
5085
5086        for dtype1 in ['int32', 'int64']:
5087            for dtype2 in ['int32', 'int64']:
5088                x = theano.tensor.matrix('x', dtype=dtype1)
5089                y = theano.tensor.matrix('y', dtype=dtype2)
5090                z = theano.tensor.switch(1, x, y)
5091                f = theano.function([x, y], z, mode=self.mode)
5092                assert len([node.op for node in f.maker.fgraph.toposort() if
5093                            (isinstance(node.op, theano.tensor.Elemwise) and
5094                             isinstance(node.op.scalar_op,
5095                                        theano.scalar.basic.Switch))]) == 0
5096                vx = np.array([[1, 2, 3], [4, 5, 6]], dtype=dtype1)
5097                vy = np.array([[7, 8, 9], [10, 11, 12]], dtype=dtype2)
5098                assert np.all(f(vx, vy) == vx)
5099
5100    def test_left_is_right(self):
5101
5102        for dtype1 in ['int32', 'int64']:
5103            x = theano.tensor.matrix('x', dtype=dtype1)
5104            varc = theano.tensor.matrix('varc', dtype=dtype1)
5105            z1 = theano.tensor.switch(1, x, x)
5106            z0 = theano.tensor.switch(0, x, x)
5107            z2 = theano.tensor.switch(varc, x, x)
5108            f1 = theano.function([x], z1, mode=self.mode)
5109            f0 = theano.function([x], z0, mode=self.mode)
5110            f2 = theano.function([x, varc], z2, mode=self.mode)
5111
5112            topo = f1.maker.fgraph.toposort()
5113            assert len(topo) == 1
5114            assert topo[0].op == deep_copy_op
5115
5116            topo = f0.maker.fgraph.toposort()
5117            assert len(topo) == 1
5118            assert topo[0].op == deep_copy_op
5119
5120            topo = f2.maker.fgraph.toposort()
5121            assert len(topo) == 1
5122            assert topo[0].op == deep_copy_op
5123
5124            vx = np.array([[1, 2, 3], [4, 5, 6]], dtype=dtype1)
5125            vc = np.array([[1, 2, 3], [4, 5, 6]], dtype=dtype1)
5126            assert np.all(f1(vx) == vx)
5127            assert np.all(f0(vx) == vx)
5128            assert np.all(f2(vx, vc) == vx)
5129
5130    def test_shape_le_0(self):
5131
5132        for dtype1 in ['float32', 'float64']:
5133            x = theano.tensor.matrix('x', dtype=dtype1)
5134            z0 = theano.tensor.switch(theano.tensor.le(x.shape[0], 0), 0, x.shape[0])
5135            f0 = theano.function([x], z0, mode=self.mode)
5136            assert isinstance(f0.maker.fgraph.toposort()[0].op, Shape_i)
5137
5138            z1 = theano.tensor.switch(theano.tensor.le(x.shape[1], 0), 0, x.shape[1])
5139            f1 = theano.function([x], z1, mode=self.mode)
5140            assert isinstance(f1.maker.fgraph.toposort()[0].op, Shape_i)
5141
5142            vx = np.random.randn(0, 5).astype(dtype1)
5143            assert f0(vx) == 0
5144            assert f1(vx) == 5
5145
5146    def test_broadcast1(self):
5147        # test switch(cst, matrix, row)
5148        x = theano.tensor.matrix('x', dtype='int32')
5149        y = theano.tensor.vector('y', dtype='int64')
5150
5151        z = theano.tensor.switch(1, x, y)
5152        f = theano.function([x, y], z, mode=self.mode)
5153        assert len([node.op for node in f.maker.fgraph.toposort() if
5154                    isinstance(node.op, theano.tensor.Elemwise) and
5155                    not isinstance(node.op.scalar_op, theano.scalar.basic.Cast)]) == 0
5156        vx = np.array([[1, 2, 3], [4, 5, 6]], dtype='int32')
5157        vy = np.array([10, 11, 12], dtype='int64')
5158        assert np.all(f(vx, vy) == vx)
5159
5160        z = theano.tensor.switch(0, x, y)
5161        f = theano.function([x, y], z, mode=self.mode)
5162        assert len([node.op for node in f.maker.fgraph.toposort() if
5163                    isinstance(node.op, theano.tensor.Elemwise)]) == 0
5164        vx = np.array([[1, 2, 3], [4, 5, 6]], dtype='int32')
5165        vy = np.array([10, 11, 12], dtype='int64')
5166        assert np.all(f(vx, vy) == vy)
5167
5168    def test_broadcast2(self):
5169        # test switch(cst, vector, matrix)
5170
5171        # This case is not optimized for now.
5172        x = theano.tensor.vector('x', dtype='int32')
5173        y = theano.tensor.matrix('y', dtype='int64')
5174        z = theano.tensor.switch(1, x, y)
5175        f = theano.function([x, y], z, mode=self.mode)
5176        assert len([node.op for node in f.maker.fgraph.toposort() if
5177                    isinstance(node.op, theano.tensor.Elemwise) and
5178                    not isinstance(node.op.scalar_op, theano.scalar.basic.Cast)]) == 0
5179        vx = np.array([4, 5, 6], dtype='int32')
5180        vy = np.array([[7, 8, 9], [10, 11, 12]], dtype='int64')
5181        assert np.all(f(vx, vy) == vx)
5182
5183        z = theano.tensor.switch(0, x, y)
5184        f = theano.function([x, y], z, mode=self.mode)
5185        assert len([node.op for node in f.maker.fgraph.toposort() if
5186                    isinstance(node.op, theano.tensor.Elemwise)]) == 0
5187        vx = np.array([4, 5, 6], dtype='int32')
5188        vy = np.array([[7, 8, 9], [10, 11, 12]], dtype='int64')
5189        assert np.all(f(vx, vy) == vy)
5190
5191    def test_broadcast3(self):
5192        # test switch(matrix, same_vector, same_vector)
5193
5194        x = theano.tensor.matrix('x', dtype='int32')
5195        y = theano.tensor.vector('y', dtype='int64')
5196        z = theano.tensor.switch(x, y, y)
5197        f = theano.function([x, y], z, mode=self.mode)
5198        vx = np.array([[0, 1], [1, 0]], dtype='int32')
5199        vy = np.array([7, 8], dtype='int64')
5200        utt.assert_allclose(f(vx, vy), np.where(vx, vy, vy))
5201        assert len([node.op for node in f.maker.fgraph.toposort() if
5202                    isinstance(node.op, theano.tensor.Elemwise)]) == 0
5203
5204
5205class test_local_merge_switch_same_cond(unittest.TestCase):
5206    def test_elemwise(self):
5207        # float Ops
5208        mats = theano.tensor.matrices('cabxy')
5209        c, a, b, x, y = mats
5210        s1 = T.switch(c, a, b)
5211        s2 = T.switch(c, x, y)
5212        for op in (T.add, T.sub, T.mul, T.true_div, T.int_div, T.floor_div,
5213                   T.minimum, T.maximum, T.gt, T.lt, T.ge, T.le, T.eq, T.neq,
5214                   T.pow):
5215            g = optimize(FunctionGraph(mats, [op(s1, s2)]))
5216            assert str(g).count('Switch') == 1
5217        # integer Ops
5218        mats = theano.tensor.imatrices('cabxy')
5219        c, a, b, x, y = mats
5220        s1 = T.switch(c, a, b)
5221        s2 = T.switch(c, x, y)
5222        for op in (T.and_, T.or_, T.xor,
5223                   T.bitwise_and, T.bitwise_or, T.bitwise_xor):
5224            g = optimize(FunctionGraph(mats, [op(s1, s2)]))
5225            assert str(g).count('Switch') == 1
5226        # add/mul with more than two inputs
5227        u, v = theano.tensor.matrices('uv')
5228        s3 = T.switch(c, u, v)
5229        for op in (T.add, T.mul):
5230            g = optimize(FunctionGraph(mats + [u, v], [op(s1, s2, s3)]))
5231            assert str(g).count('Switch') == 1
5232
5233
5234class T_local_sum_prod(unittest.TestCase):
5235    """
5236    Test sum/prod opts in opt.py
5237    """
5238    def setUp(self):
5239        self.mode = theano.compile.get_default_mode().including('canonicalize',
5240                                                                'specialize')
5241
5242    def test_local_sum_prod_mul_by_scalar(self):
5243        # Test the optimization local_sum_prod_mul_by_scalar for both Sum and
5244        # Prod ops in six cases each :
5245        # 1-the inputs to the mul contain a scalar and no non-scalar
5246        # 2-the inputs to the mul contain a scalar and one non-scalar
5247        # 3-the inputs to the mul contain a scalar and two non-scalars
5248        # 4-the inputs to the mul contain two scalars and no non-scalar
5249        # 5-the inputs to the mul contain two scalars and one non-scalar
5250        # 6-the inputs to the mul contain two scalars and two non-scalars
5251
5252        vect = T.dvector()
5253        mat = T.dmatrix()
5254        scalar1 = T.dscalar()
5255        scalar2 = T.dscalar()
5256
5257        v_val = np.random.rand(2)
5258        m_val = np.random.rand(2, 2)
5259        s1_val = np.random.rand()
5260        s2_val = np.random.rand()
5261
5262        def test_reduction_opt(inputs, inputs_val, reduction_op,
5263                               expected_output, nb_expected_sum_nodes):
5264            mul_out = T.mul(*inputs)
5265            f = theano.function(inputs, reduction_op()(mul_out),
5266                                mode=self.mode)
5267            out = f(*inputs_val)
5268            utt.assert_allclose(out, expected_output)
5269
5270            # Ensure that the optimization has been applied properly by
5271            # ensuring that the optimized graph contains the expected number
5272            # of apply nodes for the sum op
5273            prod_nodes = [n for n in f.maker.fgraph.toposort()
5274                          if isinstance(n.op, reduction_op)]
5275            assert len(prod_nodes) == nb_expected_sum_nodes
5276
5277        # Test sum
5278
5279        # Case 1
5280        test_reduction_opt([scalar1], [s1_val], T.Sum, s1_val, 0)
5281
5282        # Case 2
5283        test_reduction_opt([vect, scalar1], [v_val, s1_val], T.Sum,
5284                           s1_val * v_val.sum(), 1)
5285
5286        # Case 3
5287        test_reduction_opt([vect, mat, scalar1], [v_val, m_val, s1_val], T.Sum,
5288                           s1_val * (v_val * m_val).sum(), 1)
5289
5290        # Case 4
5291        test_reduction_opt([scalar1, scalar2], [s1_val, s2_val], T.Sum,
5292                           s1_val * s2_val, 0)
5293
5294        # Case 5
5295        test_reduction_opt([vect, scalar1, scalar2], [v_val, s1_val, s2_val],
5296                           T.Sum, s1_val * s2_val * v_val.sum(), 1)
5297
5298        # Case 6
5299        test_reduction_opt([vect, mat, scalar1, scalar2],
5300                           [v_val, m_val, s1_val, s2_val], T.Sum,
5301                           s1_val * s2_val * (v_val * m_val).sum(), 1)
5302
5303        # Test prod
5304
5305        # Case 1
5306        test_reduction_opt([scalar1], [s1_val], T.elemwise.Prod, s1_val, 0)
5307
5308        # Case 2
5309        test_reduction_opt([vect, scalar1], [v_val, s1_val], T.elemwise.Prod,
5310                           (s1_val * v_val).prod(), 1)
5311
5312        # Case 3
5313        test_reduction_opt([vect, mat, scalar1], [v_val, m_val, s1_val],
5314                           T.elemwise.Prod, (s1_val * v_val * m_val).prod(), 2)
5315
5316        # Case 4
5317        test_reduction_opt([scalar1, scalar2], [s1_val, s2_val],
5318                           T.elemwise.Prod, s1_val * s2_val, 0)
5319
5320        # Case 5
5321        test_reduction_opt([vect, scalar1, scalar2], [v_val, s1_val, s2_val],
5322                           T.elemwise.Prod, (s1_val * s2_val * v_val).prod(),
5323                           1)
5324
5325        # Case 6
5326        test_reduction_opt([vect, mat, scalar1, scalar2],
5327                           [v_val, m_val, s1_val, s2_val], T.elemwise.Prod,
5328                           (s1_val * s2_val * v_val * m_val).prod(), 2)
5329
5330    def test_local_sum_prod_all_to_none(self):
5331        a = T.tensor3()
5332        input = np.arange(3 * 4 * 5, dtype=config.floatX).reshape(3, 4, 5)
5333        # test sum
5334        f = theano.function([a], a.sum(), mode=self.mode)
5335        assert len(f.maker.fgraph.apply_nodes) == 1
5336        utt.assert_allclose(f(input), input.sum())
5337        # test prod
5338        f = theano.function([a], a.prod(), mode=self.mode)
5339        assert len(f.maker.fgraph.apply_nodes) == 1
5340        utt.assert_allclose(f(input), input.prod())
5341        # test sum
5342        f = theano.function([a], a.sum([0, 1, 2]), mode=self.mode)
5343        assert len(f.maker.fgraph.apply_nodes) == 1
5344        utt.assert_allclose(f(input), input.sum())
5345        # test prod
5346        f = theano.function([a], a.prod([0, 1, 2]), mode=self.mode)
5347        assert len(f.maker.fgraph.apply_nodes) == 1
5348        utt.assert_allclose(f(input), input.prod())
5349
5350        backup = config.warn.sum_sum_bug
5351        config.warn.sum_sum_bug = False
5352        try:
5353            f = theano.function([a], a.sum(0).sum(0).sum(0), mode=self.mode)
5354            assert len(f.maker.fgraph.apply_nodes) == 1
5355            utt.assert_allclose(f(input), input.sum())
5356        finally:
5357            config.warn.sum_sum_bug = backup
5358
5359    def test_local_sum_sum_prod_prod(self):
5360        a = T.tensor3()
5361        input = np.arange(3 * 4 * 5, dtype=config.floatX).reshape(3, 4, 5)
5362        dims = [(0, 0), (1, 0), (2, 0), (0, 1), (1, 1), (2, 1),
5363                ((0, 1), 0), ((1, 2), 0), (0, (0, 1)),
5364                (1, (0, 1)), (2, (0, 1))]
5365
5366        backup = config.warn.sum_sum_bug
5367        config.warn.sum_sum_bug = False
5368
5369        def my_prod(data, d, dd):
5370            # This prod when d or dd is a tuple of 2 dimensions.
5371            if not isinstance(d, tuple) and not isinstance(dd, tuple):
5372                return data.prod(d).prod(dd)
5373            if isinstance(d, tuple):
5374                d = sorted(d)
5375                return data.prod(d[1]).prod(d[0]).prod(dd)
5376            else:
5377                dd = sorted(dd)
5378                return data.prod(d).prod(dd[1]).prod(dd[0])
5379
5380        def my_sum(data, d, dd):
5381            # This sum when d or dd is a tuple of 2 dimensions.
5382            if not isinstance(d, tuple) and not isinstance(dd, tuple):
5383                return data.sum(d).sum(dd)
5384            if isinstance(d, tuple):
5385                d = sorted(d)
5386                return data.sum(d[1]).sum(d[0]).sum(dd)
5387            else:
5388                dd = sorted(dd)
5389                return data.sum(d).sum(dd[1]).sum(dd[0])
5390
5391        def my_sum_prod(data, d, dd):
5392            # This sum when d or dd is a tuple of 2 dimensions.
5393            if not isinstance(d, tuple) and not isinstance(dd, tuple):
5394                return data.sum(d).prod(dd)
5395            if isinstance(d, tuple):
5396                d = sorted(d)
5397                return data.sum(d[1]).sum(d[0]).prod(dd)
5398            else:
5399                dd = sorted(dd)
5400                return data.sum(d).prod(dd[1]).prod(dd[0])
5401
5402        try:
5403            for d, dd in dims:
5404                expected = my_sum(input, d, dd)
5405                f = theano.function([a], a.sum(d).sum(dd), mode=self.mode)
5406                utt.assert_allclose(f(input), expected)
5407                assert len(f.maker.fgraph.apply_nodes) == 1
5408            for d, dd in dims[:6]:
5409                f = theano.function([a], a.sum(d).sum(dd).
5410                                    sum(0), mode=self.mode)
5411                utt.assert_allclose(f(input), input.sum(d).sum(dd).sum(0))
5412                assert len(f.maker.fgraph.apply_nodes) == 1
5413            for d in [0, 1, 2]:
5414                f = theano.function([a], a.sum(d).sum(None), mode=self.mode)
5415                utt.assert_allclose(f(input), input.sum(d).sum())
5416                assert len(f.maker.fgraph.apply_nodes) == 1
5417            f = theano.function([a], a.sum(None).sum(), mode=self.mode)
5418            utt.assert_allclose(f(input), input.sum())
5419            assert len(f.maker.fgraph.apply_nodes) == 1
5420        finally:
5421            config.warn.sum_sum_bug = backup
5422
5423        # test prod
5424        for d, dd in dims:
5425            expected = my_prod(input, d, dd)
5426            f = theano.function([a], a.prod(d).prod(dd), mode=self.mode)
5427            utt.assert_allclose(f(input), expected)
5428            assert len(f.maker.fgraph.apply_nodes) == 1
5429        for d, dd in dims[:6]:
5430            f = theano.function([a], a.prod(d).prod(dd).
5431                                prod(0), mode=self.mode)
5432            utt.assert_allclose(f(input), input.prod(d).prod(dd).prod(0))
5433            assert len(f.maker.fgraph.apply_nodes) == 1
5434        for d in [0, 1, 2]:
5435            f = theano.function([a], a.prod(d).prod(None), mode=self.mode)
5436            utt.assert_allclose(f(input), input.prod(d).prod())
5437            assert len(f.maker.fgraph.apply_nodes) == 1
5438        f = theano.function([a], a.prod(None).prod(), mode=self.mode)
5439        utt.assert_allclose(f(input), input.prod())
5440        assert len(f.maker.fgraph.apply_nodes) == 1
5441
5442        # test sum prod don't get opt.
5443        for d, dd in dims:
5444            expected = my_sum_prod(input, d, dd)
5445            f = theano.function([a], a.sum(d).prod(dd), mode=self.mode)
5446            utt.assert_allclose(f(input), expected)
5447            assert len(f.maker.fgraph.apply_nodes) == 2
5448        for d, dd in dims[:6]:
5449            f = theano.function([a], a.sum(d).prod(dd).
5450                                prod(0), mode=self.mode)
5451            utt.assert_allclose(f(input), input.sum(d).prod(dd).prod(0))
5452            assert len(f.maker.fgraph.apply_nodes) == 2
5453        for d in [0, 1, 2]:
5454            f = theano.function([a], a.sum(d).prod(None), mode=self.mode)
5455            utt.assert_allclose(f(input), input.sum(d).prod())
5456            assert len(f.maker.fgraph.apply_nodes) == 2
5457        f = theano.function([a], a.sum(None).prod(), mode=self.mode)
5458        utt.assert_allclose(f(input), input.sum())
5459        assert len(f.maker.fgraph.apply_nodes) == 1
5460
5461    def test_local_sum_prod_alloc(self):
5462        # test local_opt_alloc
5463        a = T.dtensor3()
5464        input = np.asarray(np.arange(2 * 3 * 4).reshape(2, 3, 4),
5465                           dtype='float64')
5466        mode = self.mode.including('specialize').excluding('fusion')
5467
5468        for t_like, n_like, nb_nodes in [
5469                (tensor.zeros_like, np.zeros_like, (1, 3, 3, 2)),
5470                (tensor.ones_like, np.ones_like, (5, 5, 5, 6))]:
5471            # test sum
5472            f = theano.function([a], t_like(a).sum(None), mode=mode)
5473            utt.assert_allclose(f(input), n_like(input).sum())
5474            assert len(f.maker.fgraph.apply_nodes) == nb_nodes[0]
5475
5476            f = theano.function([a], t_like(a).sum([0, 1, 2]), mode=mode)
5477            utt.assert_allclose(f(input), n_like(input).sum())
5478            assert len(f.maker.fgraph.apply_nodes) == nb_nodes[0]
5479
5480            for d in xrange(3):
5481                f = theano.function([a], t_like(a).sum(d), mode=mode)
5482                utt.assert_allclose(f(input), n_like(input).sum(d))
5483                assert len(f.maker.fgraph.apply_nodes) == nb_nodes[1]
5484                topo = f.maker.fgraph.toposort()
5485                assert topo[-1].op == T.alloc
5486                assert not any([isinstance(node.op, T.Sum) for node in topo])
5487            for i in xrange(3):
5488                f = theano.function([a], t_like(a).sum(i), mode=mode)
5489                utt.assert_allclose(f(input), n_like(input).sum(i))
5490                assert len(f.maker.fgraph.apply_nodes) == nb_nodes[2]
5491                topo = f.maker.fgraph.toposort()
5492                assert topo[-1].op == T.alloc
5493                assert not any([isinstance(node.op, T.Sum) for node in topo])
5494
5495            # test prod
5496            f = theano.function([a], t_like(a).prod(None), mode=mode)
5497            utt.assert_allclose(f(input), n_like(input).prod())
5498            # assert len(f.maker.fgraph.apply_nodes) == nb_nodes[0]
5499
5500            f = theano.function([a], t_like(a).prod([0, 1, 2]), mode=mode)
5501            utt.assert_allclose(f(input), n_like(input).prod())
5502            # assert len(f.maker.fgraph.apply_nodes) == nb_nodes[0]
5503
5504            for d in range(3):
5505                f = theano.function([a], t_like(a).prod(d), mode=mode)
5506                utt.assert_allclose(f(input), n_like(input).prod(d))
5507                # assert len(f.maker.fgraph.apply_nodes) == nb_nodes[1]
5508                topo = f.maker.fgraph.toposort()
5509                assert topo[-1].op == T.alloc
5510                assert not any([isinstance(node.op, T.elemwise.Prod) for node in topo])
5511            for i in range(3):
5512                f = theano.function([a], t_like(a).prod(i), mode=mode)
5513                utt.assert_allclose(f(input), n_like(input).prod(i))
5514                # assert len(f.maker.fgraph.apply_nodes) == nb_nodes[2]
5515                topo = f.maker.fgraph.toposort()
5516                assert topo[-1].op == T.alloc
5517                assert not any([isinstance(node.op, T.elemwise.Prod) for node in topo])
5518
5519            backup = config.warn.sum_sum_bug
5520            config.warn.sum_sum_bug = False
5521            try:
5522                for d, dd in [(0, 0), (1, 0), (2, 0), (0, 1), (1, 1), (2, 1)]:
5523                    f = theano.function([a], t_like(a).
5524                                        sum(d).sum(dd), mode=mode)
5525                    utt.assert_allclose(f(input),
5526                                        n_like(input).sum(d).sum(dd))
5527                    assert len(f.maker.fgraph.apply_nodes) == nb_nodes[3]
5528                    topo = f.maker.fgraph.toposort()
5529                    assert topo[-1].op == T.alloc
5530                    assert not any([isinstance(node.op,
5531                                               T.Sum) for node in topo])
5532            finally:
5533                config.warn.sum_sum_bug = backup
5534
5535    def test_local_sum_sum_int8(self):
5536        # Test that local_sum_sum works when combining two sums on an int8 array.
5537        # This is a regression test for ticket gh-356.
5538
5539        x = tensor.tensor3(dtype='int8')
5540
5541        y = x.sum(axis=0).sum(axis=1)
5542        backup = config.on_opt_error
5543        config.on_opt_error = 'raise'
5544        try:
5545            # This compilation would fail prior to fix.
5546            theano.function([x], y)
5547        finally:
5548            config.on_opt_error = backup
5549
5550    def test_local_sum_sum_dtype(self):
5551        # Test that local_sum_sum works when specifying dtypes manually.
5552
5553        x = tensor.tensor3(dtype='int8')
5554        y = x.sum(axis=0, dtype='int32').sum(axis=1, dtype='int64')
5555        backup = config.on_opt_error
5556        config.on_opt_error = 'raise'
5557        try:
5558            # This compilation would fail prior to fix.
5559            theano.function([x], y)
5560        finally:
5561            config.on_opt_error = backup
5562
5563    def test_local_sum_prod_mul_by_scalar_stack_trace(self):
5564        # Test that stack trace is copied over correctly for local_sum_prod_mul_by_scalar.
5565        m0 = theano.compile.get_default_mode()\
5566            .excluding('inplace_elemwise_opt')\
5567            .including('canonicalize', 'specialize')
5568
5569        vect = T.dvector()
5570        mat = T.dmatrix()
5571        scalar = T.dscalar()
5572
5573        f = theano.function([vect, scalar], T.sum(vect * scalar), mode=m0)
5574        assert check_stack_trace(f, ops_to_check='all')
5575
5576        f = theano.function([vect], T.sum(-vect), mode=m0)
5577        assert check_stack_trace(f, ops_to_check=[T.Sum])
5578
5579        f = theano.function([vect, scalar],
5580                            T.elemwise.Prod()(vect * scalar), mode=m0)
5581        assert check_stack_trace(f, ops_to_check=[T.elemwise.Prod])
5582
5583        f = theano.function([vect], T.elemwise.Prod()(-vect), mode=m0)
5584        assert check_stack_trace(f, ops_to_check=[T.elemwise.Prod])
5585
5586        f = theano.function([mat, scalar], T.sum(mat * scalar), mode=m0)
5587        assert check_stack_trace(f, ops_to_check='all')
5588
5589        f = theano.function([mat], T.sum(-mat), mode=m0)
5590        assert check_stack_trace(f, ops_to_check=[T.Sum])
5591
5592
5593class T_local_opt_alloc(unittest.TestCase):
5594    dtype = 'float32'
5595
5596    def test_sum_upcast(self):
5597        s = theano.tensor.lscalar()
5598        a = theano.tensor.alloc(np.asarray(5, dtype=self.dtype), s, s)
5599        orig = theano.config.warn_float64
5600        theano.config.warn_float64 = "raise"
5601        try:
5602            f = theano.function([s], a.sum())
5603            f(5)
5604        finally:
5605            theano.config.warn_float64 = orig
5606
5607    def test_prod_upcast(self):
5608        s = theano.tensor.lscalar()
5609        a = theano.tensor.alloc(np.asarray(5, dtype=self.dtype), s, s)
5610        orig = theano.config.warn_float64
5611        theano.config.warn_float64 = "raise"
5612        try:
5613            f = theano.function([s], a.prod())
5614            f(5)
5615        finally:
5616            theano.config.warn_float64 = orig
5617
5618    @change_flags(on_opt_error='raise')
5619    def test_sum_bool_upcast(self):
5620        s = theano.tensor.lscalar()
5621        a = theano.tensor.alloc(np.asarray(True, dtype='bool'), s, s)
5622        f = theano.function([s], a.sum())
5623        f(5)
5624        # test with user specified dtype
5625        f = theano.function([s], a.sum(dtype=self.dtype))
5626        f(5)
5627        # test only 1 axis summed
5628        f = theano.function([s], a.sum(axis=0, dtype=self.dtype))
5629        f(5)
5630        print(self.dtype)
5631
5632
5633class T_local_opt_alloc_f16(T_local_opt_alloc):
5634    dtype = 'float16'
5635
5636
5637class T_local_reduce(unittest.TestCase):
5638    def setUp(self):
5639        self.mode = theano.compile.get_default_mode().including(
5640            'canonicalize',
5641            'specialize',
5642            'uncanonicalize', 'local_max_and_argmax')
5643
5644    def test_local_reduce_broadcast_all_0(self):
5645        for fct in [tensor.sum, tensor.all, tensor.any, tensor.prod,
5646                    tensor.max, tensor.min]:
5647            x = T.TensorType('int64', (True, True, True))()
5648            f = theano.function([x], [fct(x)], mode=self.mode)
5649            assert not any([
5650                isinstance(node.op, T.CAReduce)
5651                for node in f.maker.fgraph.toposort()])
5652
5653    def test_local_reduce_broadcast_all_1(self):
5654        for fct in [tensor.sum, tensor.all, tensor.any, tensor.prod,
5655                    tensor.max, tensor.min]:
5656            x = T.TensorType('int64', (True, True))()
5657            f = theano.function([x], [fct(x, axis=[0, 1])], mode=self.mode)
5658            assert not any([
5659                isinstance(node.op, T.CAReduce)
5660                for node in f.maker.fgraph.toposort()])
5661
5662    def test_local_reduce_broadcast_some_0(self):
5663        for fct in [tensor.sum, tensor.all, tensor.any, tensor.prod,
5664                    tensor.max, tensor.min]:
5665            x = T.TensorType('int64', (True, False, True))()
5666            f = theano.function([x], [fct(x, axis=[0, 1])], mode=self.mode)
5667
5668            order = f.maker.fgraph.toposort()
5669            assert 1 == sum([isinstance(node.op, T.CAReduce)
5670                             for node in order])
5671
5672            node = [node for node in order if isinstance(node.op,
5673                                                         tensor.CAReduce)][0]
5674
5675            op = node.op
5676            assert isinstance(op, T.CAReduce)
5677            # -- the leading broadcastable dimension has been dropped
5678            #   by the local_reduce_broadcastable optimization
5679            #   now summation is over the original x's dimension 1.
5680            assert node.inputs[0].ndim == 2, node
5681            assert op.axis == (0,), op.axis
5682
5683    def test_local_reduce_broadcast_some_1(self):
5684        for fct in [tensor.sum, tensor.all, tensor.any, tensor.prod,
5685                    tensor.max, tensor.min]:
5686            x = T.TensorType('int64', (True, True, True))()
5687            f = theano.function([x], [fct(x, axis=[0, 2])], mode=self.mode)
5688            assert not any([
5689                isinstance(node.op, T.CAReduce)
5690                for node in f.maker.fgraph.toposort()])
5691
5692    def test_local_reduce_join(self):
5693        vx = matrix()
5694        vy = matrix()
5695        vz = matrix()
5696        x = np.asarray([[1, 0], [3, 4]], dtype=config.floatX)
5697        y = np.asarray([[4, 0], [2, 1]], dtype=config.floatX)
5698        z = np.asarray([[5, 0], [1, 2]], dtype=config.floatX)
5699        # Test different reduction scalar operation
5700        for out, res in [
5701            (T.max((vx, vy), 0), np.max((x, y), 0)),
5702            (T.min((vx, vy), 0), np.min((x, y), 0)),
5703            (T.sum((vx, vy, vz), 0), np.sum((x, y, z), 0)),
5704            (T.prod((vx, vy, vz), 0), np.prod((x, y, z), 0)),
5705            (T.prod((vx, vy.T, vz), 0), np.prod((x, y.T, z), 0)),
5706        ]:
5707            f = theano.function([vx, vy, vz], out,
5708                                on_unused_input='ignore', mode=self.mode)
5709            assert (f(x, y, z) == res).all(), out
5710            topo = f.maker.fgraph.toposort()
5711            assert len(topo) <= 2, out
5712            assert isinstance(topo[-1].op, T.Elemwise), out
5713
5714        # Test different axis for the join and the reduction
5715        # We must force the dtype, of otherwise, this tests will fail
5716        # on 32 bit systems
5717        A = theano.shared(np.array([1, 2, 3, 4, 5], dtype='int64'))
5718
5719        f = theano.function([], T.sum(T.stack([A, A]), axis=0), mode=self.mode)
5720        utt.assert_allclose(f(), [2, 4, 6, 8, 10])
5721        topo = f.maker.fgraph.toposort()
5722        assert isinstance(topo[-1].op, T.Elemwise)
5723
5724        # Test a case that was bugged in a old Theano bug
5725        try:
5726            old = theano.config.warn.reduce_join
5727            theano.config.warn.reduce_join = False
5728            f = theano.function([], T.sum(T.stack([A, A]), axis=1),
5729                                mode=self.mode)
5730        finally:
5731            theano.config.warn.reduce_join = old
5732        utt.assert_allclose(f(), [15, 15])
5733        topo = f.maker.fgraph.toposort()
5734        assert not isinstance(topo[-1].op, T.Elemwise)
5735
5736        # This case could be optimized
5737        A = theano.shared(np.array([1, 2, 3, 4, 5]).reshape(5, 1))
5738        f = theano.function([], T.sum(T.concatenate((A, A), axis=1), axis=1),
5739                            mode=self.mode)
5740        utt.assert_allclose(f(), [2, 4, 6, 8, 10])
5741        topo = f.maker.fgraph.toposort()
5742        assert not isinstance(topo[-1].op, T.Elemwise)
5743
5744        A = theano.shared(np.array([1, 2, 3, 4, 5]).reshape(5, 1))
5745        f = theano.function([], T.sum(T.concatenate((A, A), axis=1), axis=0),
5746                            mode=self.mode)
5747        utt.assert_allclose(f(), [15, 15])
5748        topo = f.maker.fgraph.toposort()
5749        assert not isinstance(topo[-1].op, T.Elemwise)
5750
5751        # Test that the optimization does not crash in one case where it
5752        # is not applied.  Reported at
5753        # https://groups.google.com/d/topic/theano-users/EDgyCU00fFA/discussion
5754        old = theano.config.warn.reduce_join
5755        try:
5756            theano.config.warn.reduce_join = False
5757            out = tensor.sum([vx, vy, vz], axis=None)
5758            f = theano.function([vx, vy, vz], out)
5759        finally:
5760            theano.config.warn.reduce_join = old
5761
5762
5763class T_local_sum_prod_dimshuffle(unittest.TestCase):
5764    def setUp(self):
5765        self.mode = theano.compile.get_default_mode().including('canonicalize')
5766
5767    def test_local_sum_div_dimshuffle(self):
5768        a = T.matrix('a')
5769        b = T.vector('b')
5770        c = T.tensor3('c')
5771        d = T.scalar('d')
5772        sum = tensor.sum
5773        sums = [
5774            sum(a / d),
5775            sum(a / d.dimshuffle('x', 'x')),
5776            sum(a / d.dimshuffle('x', 'x'), axis=0),
5777            sum(a / d.dimshuffle('x', 'x'), axis=1),
5778            sum(b / d),
5779            sum(b / d.dimshuffle('x')),
5780            sum(c / d),
5781            sum(c / d.dimshuffle('x', 'x', 'x')),
5782            sum(c / d.dimshuffle('x', 'x', 'x'), axis=0),
5783            sum(c / d.dimshuffle('x', 'x', 'x'), axis=1),
5784            sum(c / d.dimshuffle('x', 'x', 'x'), axis=2),
5785
5786            sum(a / b, axis=0),
5787            sum(a / b.dimshuffle(0, 'x'), axis=1),
5788            sum(a.dimshuffle(0, 1) / b.dimshuffle(0, 'x'), axis=1),
5789            sum(a.dimshuffle(1, 0) / b.dimshuffle(0, 'x'), axis=1),
5790            sum(c / a, axis=0),
5791            sum(c / a.dimshuffle(1, 0), axis=0),
5792            sum(c / a.dimshuffle(0, 'x', 1), axis=1),
5793            sum(c / a.dimshuffle(1, 'x', 0), axis=1),
5794            sum(c / a.dimshuffle(0, 1, 'x'), axis=2),
5795            sum(c / a.dimshuffle(1, 0, 'x'), axis=2),
5796            sum(c / b, axis=0),
5797            sum(c / b, axis=1),
5798            sum(c / b, axis=(0, 1)),
5799            sum(c / b.dimshuffle(0, 'x'), axis=0),
5800            sum(c / b.dimshuffle(0, 'x'), axis=2),
5801            sum(c / b.dimshuffle(0, 'x'), axis=(0, 2)),
5802            sum(c / b.dimshuffle(0, 'x', 'x'), axis=1),
5803            sum(c / b.dimshuffle(0, 'x', 'x'), axis=2),
5804            sum(c / b.dimshuffle(0, 'x', 'x'), axis=(1, 2)),
5805            sum(sum(c, axis=0) / b, axis=0),
5806            sum(sum(c, axis=1) / b, axis=0),
5807            ]
5808
5809        rng = np.random.RandomState(utt.fetch_seed())
5810        a_val = rng.randn(2, 2).astype(config.floatX)
5811        b_val = rng.randn(2).astype(config.floatX)
5812        c_val = rng.randn(2, 2, 2).astype(config.floatX)
5813        d_val = np.asarray(rng.randn(), config.floatX)
5814
5815        backup = config.warn.sum_sum_bug, config.warn.sum_div_dimshuffle_bug
5816        config.warn.sum_sum_bug = False
5817        config.warn.sum_div_dimshuffle_bug = False
5818        try:
5819            for i, s in enumerate(sums):
5820                print(i)
5821                f = theano.function([a, b, c, d], s, mode=self.mode,
5822                                    on_unused_input='ignore')
5823                g = f.maker.fgraph.toposort()
5824                assert isinstance(g[-1].op.scalar_op,
5825                                  theano.scalar.basic.TrueDiv)
5826                f(a_val, b_val, c_val, d_val)
5827        finally:
5828            config.warn.sum_sum_bug, config.warn.sum_div_dimshuffle_bug =\
5829                backup
5830
5831    def test_local_prod_div_dimshuffle(self):
5832        a = T.matrix('a')
5833        b = T.vector('b')
5834        c = T.tensor3('c')
5835        e = T.matrix('e')
5836        d = T.scalar('d')
5837        prod = T.prod
5838        prods = [
5839            prod(a / d),
5840            prod(a / d.dimshuffle('x', 'x')),
5841            prod(a / d.dimshuffle('x', 'x'), axis=0),
5842            prod(a / d.dimshuffle('x', 'x'), axis=1),
5843            prod(b / d),
5844            prod(b / d.dimshuffle('x')),
5845            prod(c / d),
5846            prod(c / d.dimshuffle('x', 'x', 'x')),
5847            prod(c / d.dimshuffle('x', 'x', 'x'), axis=0),
5848            prod(c / d.dimshuffle('x', 'x', 'x'), axis=1),
5849            prod(c / d.dimshuffle('x', 'x', 'x'), axis=2),
5850
5851            prod(a / b, axis=0),
5852            prod(a / b.dimshuffle(0, 'x'), axis=1),
5853            prod(a.dimshuffle(0, 1) / b.dimshuffle(0, 'x'), axis=1),
5854            prod(a.dimshuffle(1, 0) / b.dimshuffle(0, 'x'), axis=1),
5855            prod(c / a, axis=0),
5856            prod(c / a.dimshuffle(1, 0), axis=0),
5857            prod(c / a.dimshuffle(0, 'x', 1), axis=1),
5858            prod(c / a.dimshuffle(1, 'x', 0), axis=1),
5859            prod(c / a.dimshuffle(0, 1, 'x'), axis=2),
5860            prod(c / a.dimshuffle(1, 0, 'x'), axis=2),
5861            prod(c / b, axis=0),
5862            prod(c / b, axis=1),
5863            prod(c / b, axis=(0, 1)),
5864            prod(c / b.dimshuffle(0, 'x'), axis=0),
5865            prod(c / b.dimshuffle(0, 'x'), axis=2),
5866            prod(c / b.dimshuffle(0, 'x'), axis=(0, 2)),
5867            prod(c / b.dimshuffle(0, 'x', 'x'), axis=1),
5868            prod(c / b.dimshuffle(0, 'x', 'x'), axis=2),
5869            prod(c / b.dimshuffle(0, 'x', 'x'), axis=(1, 2)),
5870            prod(c / b.dimshuffle(0, 'x', 'x'), axis=(0, 1)),
5871            prod(c / b.dimshuffle(0, 'x', 'x'), axis=(1, 0)),
5872            prod(prod(c, axis=0) / b, axis=0),
5873            prod(prod(c, axis=1) / b, axis=0)]
5874
5875        rng = np.random.RandomState(utt.fetch_seed())
5876        a_val = rng.randn(2, 2).astype(config.floatX)
5877        b_val = rng.randn(2).astype(config.floatX)
5878        c_val = rng.randn(2, 2, 2).astype(config.floatX)
5879        d_val = np.asarray(rng.randn(), config.floatX)
5880
5881        default_mode = theano.compile.mode.get_default_mode()
5882        # FusionOptimizer is included to make sure that expected_outer_operator
5883        # remains the same for all optimization modes.
5884        mode_with_opt = default_mode.including('local_sum_prod_div_dimshuffle',
5885                                               'FusionOptimizer')
5886        mode_without_opt = default_mode.excluding('local_sum_prod_div_dimshuffle')
5887
5888        # Numerical tests: tests whether the numerical values with and without
5889        #                  optimizer are equal or not.
5890        for i, s in enumerate(prods):
5891            f = theano.function([a, b, c, d], s,
5892                                on_unused_input='ignore',
5893                                mode=mode_without_opt)
5894            g = theano.function([a, b, c, d], s,
5895                                on_unused_input='ignore',
5896                                mode=mode_with_opt)
5897
5898            utt.assert_allclose(f(a_val, b_val, c_val, d_val),
5899                                g(a_val, b_val, c_val, d_val))
5900
5901        # Logical tests: tests whether the optimizer has been appplied or not
5902        #                by checking graph structure.
5903        prods = [
5904            prod(a / e),
5905            prod(a / d),
5906            prod(a / d.dimshuffle('x', 'x')),
5907            prod(c / d.dimshuffle('x', 'x', 'x'), axis=1),
5908            prod(a.dimshuffle(1, 0) / b.dimshuffle(0, 'x'), axis=1),
5909            prod(c / b.dimshuffle(0, 'x', 'x'), axis=(1, 0)),
5910            prod(prod(c, axis=1) / b, axis=0),
5911            prod(prod(c, axis=(1, 2)) / b, axis=0)]
5912
5913        expected_outer_operator = [theano.scalar.basic.Mul,
5914                                   theano.scalar.basic.Composite,
5915                                   theano.scalar.basic.Composite,
5916                                   theano.scalar.basic.TrueDiv,
5917                                   theano.scalar.basic.Composite,
5918                                   theano.scalar.basic.Mul,
5919                                   theano.scalar.basic.Composite,
5920                                   theano.scalar.basic.Mul]
5921
5922        for i, s in enumerate(prods):
5923            g = theano.function([a, b, c, d, e], s,
5924                                on_unused_input='ignore',
5925                                mode=mode_with_opt)
5926            assert isinstance(g.maker.fgraph.toposort()[-1].op.scalar_op,
5927                              expected_outer_operator[i])
5928
5929    # TODO:
5930    # test_local_sum_prod_dimshuffle (a * b * c)
5931    # test_local_sum_divprod_dimshuffle ((a * b) / (c * d))
5932
5933
5934class TestMakeVector(utt.InferShapeTester):
5935
5936    def setUp(self):
5937        super(TestMakeVector, self).setUp()
5938
5939    def test_make_vector(self):
5940        b = T.bscalar()
5941        i = T.iscalar()
5942        d = T.dscalar()
5943
5944        # TODO: draw random values instead. Not really important.
5945        val = {b: 2,
5946               i: -3,
5947               d: 0.7}
5948
5949        # Should work
5950        for (dtype, inputs) in [("int8", (b, b)),
5951                                ("int32", (i, b)),
5952                                ("int32", (b, i)),
5953                                ("float64", (b, i)),
5954                                ("float64", (b, d)),
5955                                ("float64", (d, i)),
5956                                ("float64", ()),
5957                                ("int64", ()),
5958                                ]:
5959            mv = opt.MakeVector(dtype=dtype)(*inputs)
5960            assert mv.dtype == dtype
5961            f = theano.function([b, i, d], mv, on_unused_input='ignore')
5962            f(val[b], val[i], val[d])
5963
5964            s = mv.sum()
5965            gb = T.grad(s, b, disconnected_inputs='ignore')
5966            gi = T.grad(s, i, disconnected_inputs='ignore')
5967            gd = T.grad(s, d, disconnected_inputs='ignore')
5968            # print 'gb =', gb
5969            # print 'gi =', gi
5970            # print 'gd =', gd
5971
5972            g = theano.function([b, i, d], [gb, gi, gd])
5973            g_val = g(val[b], val[i], val[d])
5974            # print 'g_val =', g_val
5975
5976            if dtype in tensor.int_dtypes:
5977                # The gradient should be 0
5978                utt.assert_allclose(g_val, 0)
5979            else:
5980                for var, grval in zip((b, i, d), g_val):
5981                    float_inputs = []
5982                    if var.dtype in tensor.int_dtypes:
5983                        pass
5984                        # Currently we don't do any checks on these variables
5985                        # verify_grad doesn't support integer inputs yet
5986                        # however, the gradient on them is *not* defined to
5987                        # be 0
5988                    elif var not in inputs:
5989                        assert grval == 0
5990                    else:
5991                        float_inputs.append(var)
5992
5993                # Build a function that takes float_inputs, use fix values for the
5994                # other inputs, and returns the MakeVector. Use it for verify_grad.
5995                if float_inputs:
5996                    def fun(*fl_inputs):
5997                        f_inputs = []
5998                        for var in f_inputs:
5999                            if var in fl_inputs:
6000                                # use symbolic variable
6001                                f_inputs.append(var)
6002                            else:
6003                                # use constant value
6004                                f_inputs.append(val[var])
6005                        return opt.MakeVector(dtype=dtype)(*f_inputs)
6006
6007                    utt.verify_grad(fun, [val[ri] for ri in float_inputs])
6008
6009        # should fail
6010        for (dtype, inputs) in [("int8", (b, i)),
6011                                ("int8", (i, b)),
6012                                ("int8", (b, d)),
6013                                ("int8", (i, i)),
6014                                ("int32", (d, i)),
6015                                ("int32", (i, d)),
6016                                ("float32", (i, d)),
6017                                ]:
6018            try:
6019                opt.MakeVector(dtype=dtype)(*inputs)
6020                raise Exception("Theano should have raised an error")
6021            except AssertionError:
6022                pass
6023
6024    def test_infer_shape(self):
6025        adscal = dscalar()
6026        bdscal = dscalar()
6027        aiscal = iscalar()
6028        biscal = iscalar()
6029        ciscal = iscalar()
6030        discal = iscalar()
6031        adscal_val = np.random.rand()
6032        bdscal_val = np.random.rand()
6033        aiscal_val = np.random.randint(10)
6034        biscal_val = np.random.randint(10)
6035        ciscal_val = np.random.randint(10)
6036        discal_val = np.random.randint(10)
6037        self._compile_and_check([adscal, aiscal],
6038                                [MakeVector('float64')(adscal, aiscal)],
6039                                [adscal_val, aiscal_val], MakeVector)
6040
6041        self._compile_and_check([adscal, bdscal, aiscal],
6042                                [MakeVector('float64')(adscal, bdscal, aiscal)],
6043                                [adscal_val, bdscal_val, aiscal_val], MakeVector)
6044
6045        self._compile_and_check([aiscal, biscal, ciscal, discal],
6046                                [MakeVector('int32')(aiscal, biscal, ciscal, discal)],
6047                                [aiscal_val, biscal_val, ciscal_val, discal_val],
6048                                MakeVector)
6049
6050
6051def test_local_join_1():
6052    # test for vector
6053    a = tensor.vector('a')
6054    s = tensor.stack([a])
6055    f = function([a], s, mode=mode_opt)
6056    val = f([1])
6057    assert np.all(val == [1])
6058    e = f.maker.fgraph.toposort()
6059    assert len([n for n in e if isinstance(n.op, Join)]) == 0
6060    assert f.maker.fgraph.outputs[0].dtype == config.floatX
6061
6062    # test for matrix join(0,a)
6063    a = tensor.matrix('a')
6064    s = join(0, a)
6065    f = function([a], s, mode=mode_opt)
6066    val = f([[1]])
6067    assert np.all(val == [[1]])
6068    e = f.maker.fgraph.toposort()
6069    assert len([n for n in e if isinstance(n.op, Join)]) == 0
6070    assert f.maker.fgraph.outputs[0].dtype == config.floatX
6071
6072    # test for matrix join(1,a)
6073    s = join(1, a)
6074    f = function([a], s, mode=mode_opt)
6075    val = f([[1]])
6076    assert np.all(val == [[1]])
6077    e = f.maker.fgraph.toposort()
6078    assert len([n for n in e if isinstance(n.op, Join)]) == 0
6079    assert f.maker.fgraph.outputs[0].dtype == config.floatX
6080
6081    # test we don't apply when their is 2 inputs
6082    s = join(1, a, a)
6083    f = function([a], s, mode=mode_opt)
6084    val = f([[1]])
6085    assert np.all(val == [[1]])
6086    e = f.maker.fgraph.toposort()
6087    assert len([n for n in e if isinstance(n.op, Join)]) == 1
6088    assert f.maker.fgraph.outputs[0].dtype == config.floatX
6089
6090
6091def test_local_join_empty():
6092    # test for vector, vector, empty to vector
6093    empty_vec = np.asarray([], dtype=config.floatX)
6094    a = tensor.vector('a')
6095    s = tensor.join(0, a, a, empty_vec)
6096    f = function([a], s, mode=mode_opt)
6097    val = f([1])
6098    assert np.all(val == [1])
6099    e = f.maker.fgraph.toposort()
6100    assert len([n for n in e if isinstance(n.op, Join)]) == 1
6101    assert all([not isinstance(n.op, Join) or len(n.inputs) == 3
6102                for n in e if isinstance(n.op, Join)])
6103    assert f.maker.fgraph.outputs[0].dtype == config.floatX
6104
6105    # test for matrix join(1,a)
6106    empty_mat = np.asarray([[]], dtype=config.floatX)
6107    m = tensor.matrix('m')
6108    s = join(1, empty_mat, m, m, m)
6109    f = function([m], s, mode=mode_opt)
6110    val = f([[1]])
6111    assert np.all(val == [[1]])
6112    e = f.maker.fgraph.toposort()
6113    assert len([n for n in e if isinstance(n.op, Join)]) == 1
6114    assert all([not isinstance(n.op, Join) or len(n.inputs) == 4
6115                for n in e if isinstance(n.op, Join)])
6116    assert f.maker.fgraph.outputs[0].dtype == config.floatX
6117    # test for vector, vector, empty to matrix
6118    # We can't optimize this case.
6119    s = tensor.stack([a, a, empty_vec])
6120    f = function([a], s, mode=mode_opt)
6121    val = f([])
6122    assert np.all(val == [1])
6123    e = f.maker.fgraph.toposort()
6124    assert len([n for n in e if isinstance(n.op, Join)]) == 1
6125    assert all([not isinstance(n.op, Join) or len(n.inputs) == 4
6126                for n in e if isinstance(n.op, Join)])
6127    assert f.maker.fgraph.outputs[0].dtype == config.floatX
6128    # test for matrix join(0,a)
6129    # We can't optimize this case.
6130    s = join(0, m, np.asarray([[2.]], dtype=config.floatX), m)
6131    f = function([m], s, mode=mode_opt)
6132    val = f([[1]])
6133    assert np.all(val == [[1], [2], [1]])
6134    e = f.maker.fgraph.toposort()
6135    assert len([n for n in e if isinstance(n.op, Join)]) == 1
6136    assert all([not isinstance(n.op, Join) or len(n.inputs) == 4
6137                for n in e if isinstance(n.op, Join)])
6138    assert f.maker.fgraph.outputs[0].dtype == config.floatX
6139
6140
6141def test_local_join_make_vector():
6142    a, b, c, d, e = tensor.scalars('abcde')
6143    v = tensor.vector('v')
6144    mv = MakeVector(config.floatX)
6145    s = tensor.join(0, mv(a), v, mv(b, c), mv(d, e))
6146    f = function([a, b, c, d, e, v], s, mode=mode_opt)
6147    theano.printing.debugprint(f)
6148    val = f(1, 2, 3, 4, 6, [7, 8])
6149    assert np.all(val == [1, 7, 8, 2, 3, 4, 6])
6150    e = f.maker.fgraph.toposort()
6151    assert len([n for n in e if isinstance(n.op, Join)]) == 1
6152    assert all([not isinstance(n.op, Join) or len(n.inputs) == 4
6153                for n in e if isinstance(n.op, Join)])
6154    assert f.maker.fgraph.outputs[0].dtype == config.floatX
6155
6156    assert check_stack_trace(f, ops_to_check='all')
6157
6158
6159def test_local_add_specialize():
6160    # test of non-zero dimension
6161    a = tensor.vector()
6162    s = tensor.add(tensor.zeros_like(a))
6163    assert local_add_specialize.transform(s.owner)
6164
6165    # test of 0-d
6166    a = tensor.scalar()
6167    s = tensor.add(tensor.zeros_like(a))
6168    assert local_add_specialize.transform(s.owner)
6169
6170    # Test when the 0 input is forcing upcasting
6171    a = tensor.constant(0, dtype='int64')
6172    b = tensor.constant(1, dtype='int32')
6173    s = a + b
6174    transformed = local_add_specialize.transform(s.owner)
6175    assert transformed
6176    assert transformed[0].type == s.type
6177
6178
6179def test_local_tensor_scalar_tensor():
6180    dtypes = ['int8', 'int16', 'int32', 'int64',
6181              'uint8', 'uint16', 'uint32', 'uint64',
6182              'float32', 'float64',
6183              'complex64', 'complex128'
6184              ]
6185
6186    for dtype in dtypes:
6187        t_type = TensorType(dtype=dtype, broadcastable=())
6188        t = t_type()
6189        s = tensor.scalar_from_tensor(t)
6190        t2 = tensor.tensor_from_scalar(s)
6191
6192        f = function([t], t2, mode=mode_opt)
6193        e = f.maker.fgraph.toposort()
6194        cast_nodes = [n for n in e
6195                      if isinstance(n.op, (tensor.TensorFromScalar,
6196                                           tensor.ScalarFromTensor))]
6197        assert len(cast_nodes) == 0
6198        f(0)
6199
6200
6201def test_local_scalar_tensor_scalar():
6202    dtypes = ['int8', 'int16', 'int32', 'int64',
6203              'uint8', 'uint16', 'uint32', 'uint64',
6204              'float32', 'float64',
6205              'complex64', 'complex128'
6206              ]
6207
6208    for dtype in dtypes:
6209        s_type = theano.scalar.Scalar(dtype=dtype)
6210        s = s_type()
6211        t = tensor.tensor_from_scalar(s)
6212        s2 = tensor.scalar_from_tensor(t)
6213
6214        f = function([s], s2, mode=mode_opt)
6215        e = f.maker.fgraph.toposort()
6216        cast_nodes = [n for n in e
6217                      if isinstance(n.op, (tensor.TensorFromScalar,
6218                                           tensor.ScalarFromTensor))]
6219        assert len(cast_nodes) == 0
6220        f(0)
6221
6222
6223def test_local_div_to_inv():
6224    num_len_s = tensor.lscalar('num_len')
6225    denom_s = tensor.scalar('denom')
6226
6227    num_v = tensor.alloc(1, num_len_s)
6228    denom_m = denom_s.dimshuffle('x', 'x')
6229
6230    out = num_v / denom_m
6231    assert np.all(out.broadcastable == (True, False))
6232
6233    f = theano.function([num_len_s, denom_s], out)
6234    out_val = f(3, 2.)
6235    assert out_val.shape == (1, 3)
6236    utt.assert_allclose(out_val, 0.5)
6237
6238
6239def test_local_useless_split():
6240    x = tensor.matrix('x')
6241    splits = tensor.ivector('splits')
6242    opt = tensor.split(x, splits, n_splits=1)
6243    nonopt = tensor.split(x, splits, n_splits=3)
6244
6245    mode = compile.get_default_mode().including("local_useless_split")
6246    f_opt = theano.function([x, splits], opt, mode=mode)
6247    f_nonopt = theano.function([x, splits], nonopt, mode=mode)
6248
6249    f_opt(np.random.rand(4, 4).astype(config.floatX), [4])
6250    f_nonopt(np.random.rand(4, 4).astype(config.floatX), [1, 2, 1])
6251    graph_opt = f_opt.maker.fgraph.toposort()
6252    graph_nonopt = f_nonopt.maker.fgraph.toposort()
6253
6254    assert isinstance(graph_opt[-1].op, DeepCopyOp)
6255    assert len(graph_nonopt) == 1
6256    assert isinstance(graph_nonopt[0].op, tensor.Split)
6257
6258    assert check_stack_trace(f_opt, ops_to_check=[Assert])
6259    assert check_stack_trace(f_nonopt, ops_to_check='all')
6260
6261
6262def test_local_flatten_lift():
6263    for i in xrange(1, 4):
6264        x = tensor.tensor4()
6265        out = tensor.flatten(T.exp(x), i)
6266        assert out.ndim == i
6267        mode = compile.mode.get_default_mode()
6268        mode = mode.including('local_reshape_lift')
6269        f = theano.function([x], out, mode=mode)
6270        x_np = np.random.rand(5, 4, 3, 2).astype(config.floatX)
6271        out_np = f(x_np)
6272        topo = f.maker.fgraph.toposort()
6273        shape_out_np = tuple(x_np.shape[:i - 1]) + (np.prod(x_np.shape[i - 1:]),)
6274        assert shape_out_np == out_np.shape
6275
6276        reshape_nodes = [n for n in topo if isinstance(n.op, tensor.Reshape)]
6277        assert (len(reshape_nodes) == 1 and
6278                tensor.is_flat(reshape_nodes[0].outputs[0], ndim=i))
6279        assert isinstance(topo[-1].op, tensor.Elemwise)
6280
6281
6282class Test_Reshape(unittest.TestCase):
6283    def setUp(self):
6284        self.mode = mode_opt
6285        self.op = tensor.Reshape
6286
6287    def test_local_reshape(self):
6288        a = tensor.fmatrix()
6289        b = self.op(3)(a, [2, 3, 4])
6290        c = self.op(1)(b, [24])
6291        f = theano.function([a], c, mode=self.mode)
6292        topo = f.maker.fgraph.toposort()
6293        assert sum(isinstance(node.op, self.op) for node in topo) == 1
6294
6295        # Check stack trace
6296        self.assertTrue(check_stack_trace(f, ops_to_check=[self.op]))
6297
6298
6299class Test_local_useless_reshape(unittest.TestCase):
6300    def setUp(self):
6301        self.rng = np.random.RandomState(utt.fetch_seed())
6302
6303    def test_0(self):
6304        mode = theano.compile.get_default_mode().including(
6305            'local_useless_reshape')
6306        i = T.iscalar('i')
6307        m = theano.tensor.mgrid[0:i, ]
6308        f = theano.function([i], m, mode=mode)
6309        topo = f.maker.fgraph.toposort()
6310        assert not any(isinstance(n.op, tensor.basic.Reshape) for n in topo)
6311
6312    def test_1(self):
6313        x = theano.tensor.matrix('x')
6314        r = x.reshape(x.shape)
6315
6316        m0 = theano.compile.get_default_mode()
6317        m1 = m0.including('local_useless_reshape')
6318        f1 = theano.function([x], r, mode=m1)
6319        topo = f1.maker.fgraph.toposort()
6320        assert not any(isinstance(n.op, tensor.basic.Reshape) for n in topo)
6321
6322        m2 = m1.excluding('ShapeOpt')
6323        f2 = theano.function([x], r, mode=m2)
6324        topo = f2.maker.fgraph.toposort()
6325        assert not any(isinstance(n.op, tensor.basic.Reshape) for n in topo)
6326
6327        # We do not need tests checking that stack traces are copied over,
6328        # because local_useless_reshape only removes nodes from the graph
6329
6330    def test_2(self):
6331        x = theano.tensor.matrix('x')
6332        r = x.reshape([Shape_i(i)(x) for i in xrange(x.ndim)])
6333
6334        m0 = theano.compile.get_default_mode()
6335        m1 = m0.including('local_useless_reshape')
6336        f1 = theano.function([x], r, mode=m1)
6337        topo = f1.maker.fgraph.toposort()
6338        assert not any(isinstance(n.op, tensor.basic.Reshape) for n in topo)
6339
6340        m2 = m1.excluding('ShapeOpt')
6341        f2 = theano.function([x], r, mode=m2)
6342        topo = f2.maker.fgraph.toposort()
6343        assert not any(isinstance(n.op, tensor.basic.Reshape) for n in topo)
6344
6345    def test_m1(self):
6346            x = theano.tensor.matrix('x')
6347            r = x.reshape((x.shape[0], -1))
6348
6349            m0 = theano.compile.get_default_mode()
6350            m1 = m0.including('local_useless_reshape')
6351            f1 = theano.function([x], r, mode=m1)
6352            topo = f1.maker.fgraph.toposort()
6353            assert not any(isinstance(n.op, tensor.basic.Reshape) for n in topo)
6354
6355            m2 = m1.excluding('ShapeOpt')
6356            f2 = theano.function([x], r, mode=m2)
6357            topo = f2.maker.fgraph.toposort()
6358            assert not any(isinstance(n.op, tensor.basic.Reshape) for n in topo)
6359
6360
6361class Test_local_reshape_to_dimshuffle(unittest.TestCase):
6362    def setUp(self):
6363        self.rng = np.random.RandomState(utt.fetch_seed())
6364
6365    def test_1(self):
6366        reshape_lift = out2in(local_reshape_to_dimshuffle)
6367        useless_reshape = out2in(local_useless_reshape)
6368        x = shared(self.rng.randn(4,))
6369        y = shared(self.rng.randn(5, 6))
6370        reshape_x = tensor.reshape(x, (1, 4))
6371        reshape_y = tensor.reshape(y, (1, 5, 1, 6, 1, 1))
6372
6373        g = FunctionGraph([x, y], [reshape_x, reshape_y])
6374        self.assertTrue(str(g) == ("[Reshape{2}"
6375                                   "(<TensorType(float64, vector)>, "
6376                                   "TensorConstant{[1 4]}), "
6377                                   "Reshape{6}"
6378                                   "(<TensorType(float64, matrix)>, "
6379                                   "TensorConstant{[1 5 1 6 1 1]})]"))
6380
6381        reshape_lift.optimize(g)
6382        useless_reshape.optimize(g)
6383        self.assertTrue(str(g) == "[InplaceDimShuffle{x,0}"
6384                                  "(<TensorType(float64, vector)>), "
6385                                  "InplaceDimShuffle{x,0,x,1,x,x}"
6386                                  "(Reshape{2}(<TensorType(float64, matrix)>, "
6387                                  "TensorConstant{[5 6]}))]")
6388
6389        # Check stacktrace was copied over correctly after opt was applied
6390        assert check_stack_trace(g, ops_to_check=(T.DimShuffle, T.Reshape))
6391
6392
6393def test_local_reshape_lift():
6394    x = tensor.tensor4()
6395    out = T.exp(x).reshape([x.size])
6396    assert out.ndim == 1
6397    mode = compile.mode.get_default_mode()
6398    mode = mode.including('local_reshape_lift')
6399    f = theano.function([x], out, mode=mode)
6400    f(np.random.rand(5, 4, 3, 2).astype(config.floatX))
6401    topo = f.maker.fgraph.toposort()
6402    assert isinstance(topo[-2].op, tensor.Reshape)
6403    assert isinstance(topo[-1].op, tensor.Elemwise)
6404    # Check stacktrace was copied over correctly after opt was applied
6405    assert check_stack_trace(f, ops_to_check='last')
6406
6407
6408class Test_lift_transpose_through_dot(unittest.TestCase):
6409    def simple_optimize(self, g):
6410        out2in(opt.local_useless_elemwise).optimize(g)
6411        out2in(opt.local_lift_transpose_through_dot).optimize(g)
6412        out2in(opt.local_useless_elemwise).optimize(g)
6413        return g
6414
6415    def test_matrix_matrix(self):
6416        a, b = matrices('ab')
6417        g = self.simple_optimize(FunctionGraph([a, b], [tensor.dot(a, b).T]))
6418        sg = '[dot(InplaceDimShuffle{1,0}(b), InplaceDimShuffle{1,0}(a))]'
6419        assert str(g) == sg, (str(g), sg)
6420        # Check stacktrace was copied over correctly after opt was applied
6421        self.assertTrue(check_stack_trace(g, ops_to_check='all'))
6422
6423    def test_row_matrix(self):
6424        a = vector('a')
6425        b = matrix('b')
6426        g = optimize(FunctionGraph(
6427            [a, b],
6428            [tensor.dot(a.dimshuffle('x', 0), b).T]),
6429            level='stabilize')
6430        sg = '[dot(InplaceDimShuffle{1,0}(b), InplaceDimShuffle{0,x}(a))]'
6431        assert str(g) == sg, (str(g), sg)
6432        # Check stacktrace was copied over correctly after opt was applied
6433        self.assertTrue(check_stack_trace(g, ops_to_check='all'))
6434
6435    def test_matrix_col(self):
6436        a = vector('a')
6437        b = matrix('b')
6438        g = optimize(FunctionGraph(
6439            [a, b],
6440            [tensor.dot(b, a.dimshuffle(0, 'x')).T]),
6441            level='stabilize')
6442        sg = '[dot(InplaceDimShuffle{x,0}(a), InplaceDimShuffle{1,0}(b))]'
6443        assert str(g) == sg, (str(g), sg)
6444        # Check stacktrace was copied over correctly after opt was applied
6445        self.assertTrue(check_stack_trace(g, ops_to_check='all'))
6446
6447
6448def test_local_upcast_elemwise_constant_inputs():
6449    s = dvector("s")
6450    x = tensor.sum(tensor.log(10 ** s))
6451    f = function([s], [tensor.grad(x, s)])
6452    f([-42, -2.1, -1, -0.5, 0, 0.2, 1, 2, 12])
6453
6454    # This test a corner where the optimization should not be applied.
6455    old = theano.config.floatX
6456    theano.config.floatX = 'float32'
6457    try:
6458        v = lvector()
6459        function([v], theano.tensor.basic.true_div(v, 2))
6460    finally:
6461        theano.config.floatX = old
6462
6463
6464class TestShape_i(utt.InferShapeTester):
6465
6466    def setUp(self):
6467        super(TestShape_i, self).setUp()
6468
6469    def test_perform(self):
6470
6471        advec = vector()
6472        advec_val = np.random.rand(3).astype(config.floatX)
6473        f = function([advec], Shape_i(0)(advec))
6474        out = f(advec_val)
6475        utt.assert_allclose(out, advec_val.shape[0])
6476
6477        admat = matrix()
6478        admat_val = np.random.rand(4, 3).astype(config.floatX)
6479        for i in xrange(2):
6480            f = function([admat], Shape_i(i)(admat))
6481            out = f(admat_val)
6482            utt.assert_allclose(out, admat_val.shape[i])
6483
6484    def test_infer_shape(self):
6485        admat = matrix()
6486        admat_val = np.random.rand(3, 4).astype(config.floatX)
6487        self._compile_and_check([admat], [Shape_i(0)(admat)],
6488                                [admat_val], Shape_i)
6489
6490        self._compile_and_check([admat], [Shape_i(1)(admat)],
6491                                [admat_val], Shape_i)
6492
6493
6494class TestShapeFeature(unittest.TestCase):
6495    def test_scalar(self):
6496        x = scalar()
6497        cst = T.constant(1).clone()
6498        o = x + cst
6499        fgraph = FunctionGraph([x], [o], clone=False)
6500        shape_feature = opt.ShapeFeature()
6501        fgraph.attach_feature(shape_feature)
6502        assert shape_feature.same_shape(x, o)
6503
6504    def test_vector(self):
6505        x = vector()
6506        cst = T.constant(1).clone()
6507        o = x + cst
6508        fgraph = FunctionGraph([x], [o], clone=False)
6509        shape_feature = opt.ShapeFeature()
6510        fgraph.attach_feature(shape_feature)
6511        assert shape_feature.same_shape(x, o)
6512
6513    def test_vector2(self):
6514        x = vector()
6515        y = vector()
6516        o = x + y
6517        fgraph = FunctionGraph([x, y], [o], clone=False)
6518        shape_feature = opt.ShapeFeature()
6519        fgraph.attach_feature(shape_feature)
6520        assert shape_feature.same_shape(x, o)
6521        # The following case isn't implemented
6522        assert not shape_feature.same_shape(y, o)
6523
6524    def test_vector_dim(self):
6525        x = vector()
6526        y = vector()
6527        o = x + y
6528        fgraph = FunctionGraph([x, y], [o], clone=False)
6529        shape_feature = opt.ShapeFeature()
6530        fgraph.attach_feature(shape_feature)
6531        assert shape_feature.same_shape(x, o, 0, 0)
6532        # The following case isn't implemented
6533        assert not shape_feature.same_shape(y, o, 0, 0)
6534
6535    def test_vector_dim_err(self):
6536        x = vector()
6537        y = vector()
6538        o = x + y
6539        fgraph = FunctionGraph([x, y], [o], clone=False)
6540        shape_feature = opt.ShapeFeature()
6541        fgraph.attach_feature(shape_feature)
6542        self.assertRaises(IndexError, shape_feature.same_shape, x, o, 1, 0)
6543        self.assertRaises(IndexError, shape_feature.same_shape, x, o, 0, 1)
6544
6545
6546def test_assert_op_gradient():
6547    x = T.vector('x')
6548    assert_op = Assert()
6549    cost = T.sum(assert_op(x, x.size < 2))
6550    grad = T.grad(cost, x)
6551    func = theano.function([x], grad)
6552
6553    x_val = np.ones(shape=(1,), dtype=theano.config.floatX)
6554    assert func(x_val) == 1
6555
6556
6557class TestIntDivByOne(unittest.TestCase):
6558
6559    def setUp(self):
6560        self.mode = theano.compile.mode.get_default_mode()
6561        self.mode = self.mode.including('local_intdiv_by_one')
6562
6563    def test1(self):
6564        # Tests removing the extra floor_div by 1 introduced by
6565        # local_subtensor_merge optimization
6566
6567        y = T.tensor4('y')
6568        self.mode = self.mode.excluding('fusion')
6569        f = theano.function([y], y[::-1][::-1], mode=self.mode)
6570
6571        graph = f.maker.fgraph.toposort()
6572        divs = [node for node in graph
6573                if isinstance(node.op, T.elemwise.Elemwise) and
6574                isinstance(node.op.scalar_op, theano.scalar.IntDiv)]
6575        assert len(divs) == 0
6576
6577    def test2(self):
6578        # Simple test case for removing dividing by 1
6579        y = T.tensor4('y')
6580        z = y // 1
6581        f = theano.function([y], z, mode=self.mode)
6582        graph = f.maker.fgraph.toposort()
6583        divs = [node for node in graph
6584                if isinstance(node.op, T.elemwise.Elemwise) and
6585                isinstance(node.op.scalar_op, theano.scalar.IntDiv)]
6586        assert len(divs) == 0
6587
6588    def test3(self):
6589        # Simple test case for removing dividing by a tensor of ones
6590        y = T.tensor4('y')
6591        z = y // np.ones((2, 2, 2, 2))
6592        f = theano.function([y], z, mode=self.mode)
6593        graph = f.maker.fgraph.toposort()
6594        divs = [node for node in graph
6595                if isinstance(node.op, T.elemwise.Elemwise) and
6596                isinstance(node.op.scalar_op, theano.scalar.IntDiv)]
6597        assert len(divs) == 0
6598
6599
6600def test_local_zero_div():
6601    # Tests 0/x -> 0
6602
6603    for t in (T.scalar, T.ivector, T.ftensor4):
6604        x = t('x')
6605        for op in (T.int_div, T.true_div):
6606            y = op(0, x)
6607            g = optimize(FunctionGraph([x], [y]))
6608            # the division should be gone
6609            divs = [node for node in g.toposort()
6610                    if isinstance(node.op, T.elemwise.Elemwise) and
6611                    isinstance(node.op.scalar_op, type(op.scalar_op))]
6612            assert len(divs) == 0
6613            # the output type should match the unoptimized one
6614            output = g.outputs[0]
6615            assert output.ndim == y.ndim
6616            assert output.type == y.type
6617            # and the output should be zero
6618            assert theano.tensor.get_scalar_constant_value(output) == 0
6619
6620
6621def test_local_sumsqr2dot():
6622    G = matrix('G')
6623    W = matrix('W')
6624
6625    y = T.sqr(W.dimshuffle('x', 0, 1) * G.dimshuffle(0, 'x', 1)).sum(axis=(1, 2))
6626    MODE = theano.compile.get_default_mode().including('local_sumsqr2dot')
6627
6628    f = function([W, G], y, mode=MODE)
6629
6630    w_val = np.random.rand(4, 3).astype(config.floatX)
6631    g_val = np.random.rand(5, 3).astype(config.floatX)
6632
6633    f_val = f(w_val, g_val)
6634    f_test = np.dot(np.square(g_val), np.square(w_val).sum(axis=0))
6635
6636    utt.assert_allclose(f_val, f_test)
6637    assert any(isinstance(n.op, (tensor.basic.Dot, tensor.blas.Dot22,
6638                                 tensor.blas.Gemv, tensor.blas_c.CGemv))
6639               for n in f.maker.fgraph.toposort())
6640
6641
6642def test_local_expm1():
6643    x = matrix('x')
6644    u = T.scalar('u')
6645
6646    y = T.exp(x) - 1.
6647    z = T.exp(x) - 2.
6648    t = T.exp(x) - x
6649    s = T.exp(u) - np.ones((4, 3)).astype(config.floatX)
6650    MODE = theano.compile.get_default_mode().including('local_expm1')
6651    f = function([x], y, mode=MODE)
6652    g = function([x], z, mode=MODE)
6653    h = function([x], t, mode=MODE)
6654    r = function([u], s, mode=MODE)
6655    x_val = np.random.rand(4, 3).astype(config.floatX)
6656    f_val = f(x_val)
6657    f_test = function([x], T.expm1(x), mode=MODE)
6658
6659    utt.assert_allclose(f_val, f_test(x_val))
6660
6661    assert any(isinstance(n.op, T.Elemwise) and isinstance(n.op.scalar_op, theano.scalar.basic.Expm1)
6662               for n in f.maker.fgraph.toposort())
6663
6664    assert not any(isinstance(n.op, T.Elemwise) and isinstance(n.op.scalar_op, theano.scalar.basic.Expm1)
6665                   for n in g.maker.fgraph.toposort())
6666
6667    assert not any(isinstance(n.op, T.Elemwise) and isinstance(n.op.scalar_op, theano.scalar.basic.Expm1)
6668                   for n in h.maker.fgraph.toposort())
6669
6670    assert not any(isinstance(n.op, T.Elemwise) and isinstance(n.op.scalar_op, theano.scalar.basic.Expm1)
6671                   for n in r.maker.fgraph.toposort())
6672
6673
6674def test_local_merge_alloc():
6675    # Add this opt to the default mode,
6676    # otherwise, FAST_COMPILE fails.
6677    default_mode = theano.compile.mode.get_default_mode()
6678    opt_mode = default_mode.including("local_merge_alloc")
6679
6680    x = T.iscalar('x')
6681    y = T.iscalar('y')
6682    y2 = T.iscalar('y2')
6683    z = T.iscalar('z')
6684    w = T.iscalar('w')
6685    m = T.fscalar('m')
6686    # case 1
6687    # Alloc(Alloc(m, x, 1, 1, 1), x, y, z, w) -> Alloc(m, x, y, z, w)
6688    output = T.alloc(T.alloc(m, 1, y, 1, 1), x, y, z, w)
6689    f = theano.function([m, x, y, z, w], output, mode=opt_mode)
6690    topo = f.maker.fgraph.toposort()
6691    assert len(topo) == 1
6692    assert isinstance(topo[0].op, T.Alloc)
6693    o = f(0., 1, 2, 3, 4)
6694    assert o.shape == (1, 2, 3, 4)
6695
6696    # case 2
6697    # Alloc(Alloc(m, y, 1, 1), x, y, z, w) -> Alloc(m, x, y, z, w)
6698    output = T.alloc(T.alloc(m, y, 1, 1), x, y, z, w)
6699    f = theano.function([m, x, y, z, w], output, mode=opt_mode)
6700    topo = f.maker.fgraph.toposort()
6701    assert len(topo) == 1
6702    assert isinstance(topo[0].op, T.Alloc)
6703    o = f(0., 1, 2, 3, 4)
6704    assert o.shape == (1, 2, 3, 4)
6705
6706    # case 3
6707    # Alloc(Alloc(m, y1, 1, 1), x, y2, z, w) ->
6708    #   Alloc(m, x, assert(y1, y1==y2), z, w)
6709    output = T.alloc(T.alloc(m, y, 1, 1), x, y2, z, w)
6710    f = theano.function([m, x, y, y2, z, w], output, mode=opt_mode)
6711    topo = f.maker.fgraph.toposort()
6712    assert len(topo) == 3
6713    assert isinstance(topo[-2].op, T.opt.Assert)
6714    assert isinstance(topo[-1].op, T.Alloc)
6715    o = f(0., 1, 2, 2, 3, 4)
6716    assert o.shape == (1, 2, 3, 4)
6717    assert_raises((AssertionError, ValueError), f, 0., 1, 2, 5, 3, 4)
6718
6719
6720def test_local_useless_alloc():
6721
6722    useless_alloc = out2in(local_useless_alloc)
6723    merge_alloc = out2in(local_merge_alloc)
6724
6725    x = T.iscalar('x')
6726    y = T.iscalar('y')
6727    y2 = T.iscalar('y2')
6728    z = T.iscalar('z')
6729    w = T.iscalar('w')
6730    m = T.fscalar('m')
6731
6732    # case 1
6733    # Alloc(Alloc(m, x, 1, 1, 1), x, y, z, w) -> Alloc(m, x, y, z, w)
6734    output = T.alloc(T.alloc(m, 1, y, 1, 1), x, y, z, w)
6735    g = FunctionGraph([m, x, y, z, w], [output])
6736
6737    useless_alloc.optimize(g)
6738    merge_alloc.optimize(g)
6739    useless_alloc.optimize(g)
6740
6741    topo = g.toposort()
6742    assert len(topo) == 1
6743    assert isinstance(topo[0].op, T.Alloc)
6744
6745    # case 2
6746    # Alloc(Alloc(m, y, 1, 1), x, y, z, w) -> Alloc(m, x, y, z, w)
6747    output = T.alloc(T.alloc(m, y, 1, 1), x, y, z, w)
6748    g = FunctionGraph([m, x, y, z, w], [output])
6749
6750    useless_alloc.optimize(g)
6751    merge_alloc.optimize(g)
6752    useless_alloc.optimize(g)
6753
6754    topo = g.toposort()
6755    assert len(topo) == 1
6756    assert isinstance(topo[0].op, T.Alloc)
6757
6758    # case 3
6759    # Alloc(Alloc(m, y1, 1, 1), x, y2, z, w) ->
6760    #   Alloc(m, x, assert(y1, y1==y2), z, w)
6761    output = T.alloc(T.alloc(m, y, 1, 1), x, y2, z, w)
6762    g = FunctionGraph([m, x, y, y2, z, w], [output])
6763
6764    useless_alloc.optimize(g)
6765    merge_alloc.optimize(g)
6766    useless_alloc.optimize(g)
6767
6768    topo = g.toposort()
6769    assert len(topo) == 3
6770    assert isinstance(topo[-2].op, T.opt.Assert)
6771    assert isinstance(topo[-1].op, T.Alloc)
6772
6773
6774def compile_graph_log_sum_exp(x, axis, dimshuffle_op=None):
6775    sum_exp = T.sum(T.exp(x), axis=axis)
6776    if dimshuffle_op:
6777        sum_exp = dimshuffle_op(sum_exp)
6778    y = T.log(sum_exp)
6779    MODE = theano.compile.get_default_mode().including('local_log_sum_exp')
6780    return function([x], y, mode=MODE)
6781
6782
6783def check_max_log_sum_exp(x, axis, dimshuffle_op=None):
6784    f = compile_graph_log_sum_exp(x, axis, dimshuffle_op)
6785
6786    fgraph = f.maker.fgraph.toposort()
6787    for node in fgraph:
6788        if (hasattr(node.op, 'scalar_op') and
6789                node.op.scalar_op == theano.scalar.basic.maximum):
6790            return
6791
6792        # in mode FAST_COMPILE, the optimisations don't replace the
6793        # MaxAndArgmax op.
6794        if isinstance(node.op, theano.tensor.MaxAndArgmax):
6795            return
6796
6797    raise Exception('No maximum detected after log_sum_exp optimisation')
6798
6799
6800def test_local_log_sum_exp1():
6801    # Tests if optimization is applied by checking the presence of the maximum
6802    x = tensor3('x')
6803    check_max_log_sum_exp(x, axis=(0,), dimshuffle_op=None)
6804    check_max_log_sum_exp(x, axis=(1,), dimshuffle_op=None)
6805    check_max_log_sum_exp(x, axis=(2,), dimshuffle_op=None)
6806    check_max_log_sum_exp(x, axis=(0, 1), dimshuffle_op=None)
6807    check_max_log_sum_exp(x, axis=(0, 1, 2), dimshuffle_op=None)
6808
6809    # If a transpose is applied to the sum
6810    transpose_op = DimShuffle((False, False), (1, 0))
6811    check_max_log_sum_exp(x, axis=2, dimshuffle_op=transpose_op)
6812
6813    # If the sum is performed with keepdims=True
6814    x = TensorType(dtype='floatX', broadcastable=(False, True, False))('x')
6815    sum_keepdims_op = x.sum(axis=(0, 1), keepdims=True).owner.op
6816    check_max_log_sum_exp(x, axis=(0, 1), dimshuffle_op=sum_keepdims_op)
6817
6818
6819def test_local_log_sum_exp2():
6820    # Tests if the optimization works (result is correct) around 1.0
6821
6822    x = tensor3('x')
6823    x_val = 1.0 + np.random.rand(4, 3, 2).astype(config.floatX) / 10.0
6824
6825    f = compile_graph_log_sum_exp(x, axis=(1,))
6826    naive_ret = np.log(np.sum(np.exp(x_val), axis=1))
6827    optimised_ret = f(x_val)
6828    assert np.allclose(naive_ret, optimised_ret)
6829
6830    # If a transpose is applied
6831    transpose_op = DimShuffle((False, False), (1, 0))
6832    f = compile_graph_log_sum_exp(x, axis=(1,), dimshuffle_op=transpose_op)
6833    naive_ret = np.log(np.sum(np.exp(x_val), axis=1).T)
6834    optimised_ret = f(x_val)
6835    assert np.allclose(naive_ret, optimised_ret)
6836
6837
6838def test_local_log_sum_exp3():
6839    # Tests if the optimization works (result is correct) for extreme value 100
6840    x = vector('x')
6841    f = compile_graph_log_sum_exp(x, axis=0)
6842
6843    x_val = np.array([-100., 100.]).astype(config.floatX)
6844
6845    optimised_ret = f(x_val)
6846
6847    assert np.allclose(optimised_ret, 100.)
6848