1from __future__ import absolute_import, print_function, division
2import unittest
3
4import numpy as np
5
6from theano.compat import imap
7import theano.tensor.inplace
8from theano.tensor import basic as tensor
9from theano import tensor as T
10from theano import config
11from theano.gof.opt import check_stack_trace
12from theano.tests import unittest_tools as utt
13from theano.tensor.nnet import (sigmoid, sigmoid_inplace,
14                                softplus, ultra_fast_sigmoid, hard_sigmoid)
15from theano.tensor.nnet.sigm import (
16    compute_mul, is_1pexp, parse_mul_tree, perform_sigm_times_exp,
17    register_local_1msigmoid, simplify_mul,
18)
19from theano.tensor.tests.test_basic import (makeBroadcastTester, copymod,
20                                            check_floatX, upcast_int8_nfunc,
21                                            _good_broadcast_unary_normal_no_complex)
22
23
24class T_sigmoid(unittest.TestCase):
25    def setUp(self):
26        utt.seed_rng()
27
28    def test_elemwise(self):
29        utt.verify_grad(sigmoid, [np.random.rand(3, 4)])
30
31SigmoidTester = makeBroadcastTester(
32    op=sigmoid,
33    expected=upcast_int8_nfunc(lambda inputs: check_floatX(
34        inputs, 1 / (1 + np.exp(-inputs)))),
35    good=copymod(_good_broadcast_unary_normal_no_complex,
36                 without=['uint16']),  # The reason that 'uint16' is excluted is that
37                                       # theano works well but numpy overflows resulting
38                                       # in an assertion error.
39    # grad=_grad_broadcast_unary_normal,
40    name='SigmoidTester',
41)
42
43UltraFastSigmoidTester = makeBroadcastTester(
44    op=ultra_fast_sigmoid,
45    expected=upcast_int8_nfunc(lambda inputs: check_floatX(
46        inputs, 1 / (1 + np.exp(-inputs)))),
47    good=copymod(_good_broadcast_unary_normal_no_complex,
48                 without=['uint16']),  # numpy fucnting overflows with uint16.
49    # grad=_grad_broadcast_unary_normal,
50    name='UltraFastSigmoidTester',
51    # This is an approx of the sigmoid. That is why we raise eps
52    eps=5e-2)
53
54HardSigmoidTester = makeBroadcastTester(
55    op=hard_sigmoid,
56    expected=upcast_int8_nfunc(lambda inputs: check_floatX(
57        inputs, 1 / (1 + np.exp(-inputs)))),
58    good=copymod(_good_broadcast_unary_normal_no_complex,
59                 without=['uint16']),  # numpy fucnting overflows with uint16.
60    # grad=_grad_broadcast_unary_normal,
61    name='HardSigmoidTester',
62    # This is an approx of the sigmoid. That is why we raise eps
63    eps=1e-1)
64
65
66SoftplusTester = makeBroadcastTester(
67    op=softplus,
68    expected=upcast_int8_nfunc(lambda inputs: check_floatX(
69        inputs, np.log1p(np.exp(inputs)))),
70    good=dict(copymod(_good_broadcast_unary_normal_no_complex,
71                      without=['uint8', 'uint16']),  # numpy fucnting overflows with uint16.
72              uint8=[np.arange(0, 89, dtype='uint8')],  # the range is different in new added uint8.
73              int8=[np.arange(-127, 89, dtype='int8')]),
74    # grad=_grad_broadcast_unary_normal,
75    name='SoftplusTester',
76)
77
78
79class T_softplus(unittest.TestCase):
80    def setUp(self):
81        utt.seed_rng()
82
83    def test_elemwise(self):
84        utt.verify_grad(softplus, [np.random.rand(3, 4)])
85
86
87class T_sigmoid_opts(unittest.TestCase):
88
89    def get_mode(self, excluding=None):
90        """
91        Return appropriate mode for the tests.
92
93        :param excluding: List of optimizations to exclude.
94
95        :return: The current default mode unless the `config.mode` option is
96        set to 'FAST_COMPILE' (in which case it is replaced by the 'FAST_RUN'
97        mode), without the optimizations specified in `excluding`.
98        """
99        if excluding is None:
100            excluding = []
101        m = theano.config.mode
102        if m == 'FAST_COMPILE':
103            mode = theano.compile.mode.get_mode('FAST_RUN')
104        else:
105            mode = theano.compile.mode.get_default_mode()
106        if excluding:
107            return mode.excluding(*excluding)
108        else:
109            return mode
110
111    def test_exp_over_1_plus_exp(self):
112        m = self.get_mode(excluding=['local_elemwise_fusion'])
113
114        x = T.vector()
115        data = np.random.rand(54).astype(config.floatX)
116
117        backup = config.warn.identify_1pexp_bug
118        config.warn.identify_1pexp_bug = False
119        try:
120            # tests exp_over_1_plus_exp
121            f = theano.function([x], T.exp(x) / (1 + T.exp(x)), mode=m)
122            assert [node.op for node in f.maker.fgraph.toposort()] == [sigmoid]
123            f(data)
124            f = theano.function([x], T.exp(x) / (2 + T.exp(x)), mode=m)
125            assert [node.op for node in f.maker.fgraph.toposort()] != [sigmoid]
126            f(data)
127            f = theano.function([x], T.exp(x) / (1 - T.exp(x)), mode=m)
128            assert [node.op for node in f.maker.fgraph.toposort()] != [sigmoid]
129            f(data)
130            f = theano.function([x], T.exp(x + 1) / (1 + T.exp(x)), mode=m)
131            assert [node.op for node in f.maker.fgraph.toposort()] != [sigmoid]
132            f(data)
133
134            # tests inv_1_plus_exp
135            f = theano.function([x], T.fill(x, 1.0) / (1 + T.exp(-x)), mode=m)
136            # todo: solve issue #4589 first
137            # assert check_stack_trace(f, ops_to_check=sigmoid)
138            assert [node.op for node in f.maker.fgraph.toposort()] == [sigmoid]
139            f(data)
140            f = theano.function([x], T.fill(x, 1.0) / (2 + T.exp(-x)), mode=m)
141            assert [node.op for node in f.maker.fgraph.toposort()] != [sigmoid]
142            f(data)
143            f = theano.function([x], T.fill(x, 1.0) / (1 - T.exp(-x)), mode=m)
144            assert [node.op for node in f.maker.fgraph.toposort()] != [sigmoid]
145            f(data)
146            f = theano.function([x], T.fill(x, 1.1) / (1 + T.exp(-x)), mode=m)
147            assert [node.op for node in f.maker.fgraph.toposort()] != [sigmoid]
148            f(data)
149
150            # tests inv_1_plus_exp with neg
151            f = theano.function([x], T.fill(x, -1.0) / (1 + T.exp(-x)), mode=m)
152            # todo: solve issue #4589 first
153            # assert check_stack_trace(
154            #     f, ops_to_check=[sigmoid, theano.tensor.inplace.neg_inplace])
155            assert ([node.op for node in f.maker.fgraph.toposort()] ==
156                    [sigmoid, theano.tensor.inplace.neg_inplace])
157            f(data)
158            f = theano.function([x], T.fill(x, -1.0) / (1 - T.exp(-x)), mode=m)
159            assert ([node.op for node in f.maker.fgraph.toposort()] != [sigmoid,
160                    theano.tensor.inplace.neg_inplace])
161            f(data)
162            f = theano.function([x], T.fill(x, -1.0) / (2 + T.exp(-x)), mode=m)
163            assert ([node.op for node in f.maker.fgraph.toposort()] != [sigmoid,
164                    theano.tensor.inplace.neg_inplace])
165            f(data)
166            f = theano.function([x], T.fill(x, -1.1) / (1 + T.exp(-x)), mode=m)
167            assert ([node.op for node in f.maker.fgraph.toposort()] != [sigmoid,
168                    theano.tensor.inplace.neg_inplace])
169            f(data)
170
171            # tests double inv_1_plus_exp with neg
172            # (-1)(exp(x)) / (1+exp(x))(1+exp(-x))
173            # = (-1)/(1+exp(-x)) * exp(x)/(1+exp(x))
174            # = - (sigm(x) * sigm(x))
175            f = theano.function([x], (T.fill(x, -1.0) * T.exp(x)) /
176                                ((1 + T.exp(x)) * (1 + T.exp(-x))), mode=m)
177            # todo: solve issue #4589 first
178            # assert check_stack_trace(f, ops_to_check=[sigmoid, T.mul])
179            assert ([node.op for node in f.maker.fgraph.toposort()] == [sigmoid,
180                    T.mul])
181            f(data)
182            f = theano.function([x], (T.fill(x, -1.1) * T.exp(x)) /
183                                ((1 + T.exp(x)) * (1 + T.exp(-x))), mode=m)
184            assert ([node.op for node in f.maker.fgraph.toposort()] != [sigmoid,
185                    T.mul, theano.tensor.inplace.neg_inplace])
186            f(data)
187            f = theano.function([x], (T.fill(x, -1.0) * T.exp(x)) /
188                                ((2 + T.exp(x)) * (1 + T.exp(-x))), mode=m)
189            assert ([node.op for node in f.maker.fgraph.toposort()] != [sigmoid,
190                    T.mul, theano.tensor.inplace.neg_inplace])
191            f(data)
192            f = theano.function([x], (T.fill(x, -1.0) * T.exp(x)) /
193                                ((1 + T.exp(x)) * (2 + T.exp(-x))), mode=m)
194            assert ([node.op for node in f.maker.fgraph.toposort()] != [sigmoid,
195                    T.mul, theano.tensor.inplace.neg_inplace])
196            f(data)
197            f = theano.function([x], (T.fill(x, -1.0) * T.exp(x)) /
198                                ((1 + T.exp(x)) * (1 + T.exp(x))), mode=m)
199            assert ([node.op for node in f.maker.fgraph.toposort()] != [sigmoid,
200                    T.mul, theano.tensor.inplace.neg_inplace])
201            f(data)
202            f = theano.function([x], (T.fill(x, -1.0) * T.exp(x)) /
203                                ((1 + T.exp(x)) * (2 + T.exp(-x))), mode=m)
204            assert ([node.op for node in f.maker.fgraph.toposort()] != [sigmoid,
205                    T.mul, theano.tensor.inplace.neg_inplace])
206            f(data)
207
208        finally:
209            # Restore config option.
210            config.warn.identify_1pexp_bug = backup
211
212    def test_1msigmoid(self):
213        if not register_local_1msigmoid:
214            return
215
216        m = self.get_mode()
217        x = T.fmatrix()
218
219        # tests exp_over_1_plus_exp
220        f = theano.function([x], 1 - T.exp(x) / (1 + T.exp(x)), mode=m)
221        assert check_stack_trace(f, ops_to_check=[tensor.neg, sigmoid_inplace])
222        assert [node.op for node in f.maker.fgraph.toposort()] == [
223            tensor.neg, sigmoid_inplace]
224
225        # tests inv_1_plus_exp
226        f = theano.function([x], 1 - T.fill(x, 1.0) / (1 + T.exp(-x)), mode=m)
227        assert check_stack_trace(f, ops_to_check=[tensor.neg, sigmoid_inplace])
228        assert ([node.op for node in f.maker.fgraph.toposort()] == [tensor.neg,
229                sigmoid_inplace])
230
231    def test_local_sigm_times_exp(self):
232        # Test the `local_sigm_times_exp` optimization.
233        # exp(x) * sigm(-x) -> sigm(x)
234        # exp(-x) * sigm(x) -> sigm(-x)
235
236        def match(func, ops):
237            # print [node.op.scalar_op for node in func.maker.fgraph.toposort()]
238            assert [node.op for node in func.maker.fgraph.toposort()] == ops
239        m = self.get_mode(excluding=['local_elemwise_fusion', 'inplace'])
240        x, y = tensor.vectors('x', 'y')
241
242        f = theano.function([x], sigmoid(-x) * tensor.exp(x), mode=m)
243        match(f, [sigmoid])
244        assert check_stack_trace(f, ops_to_check=sigmoid)
245
246        f = theano.function([x], sigmoid(x) * tensor.exp(-x), mode=m)
247        match(f, [tensor.neg, sigmoid])
248        assert check_stack_trace(f, ops_to_check=sigmoid)
249
250        f = theano.function([x], -(-(-(sigmoid(x)))) * tensor.exp(-x), mode=m)
251        match(f, [tensor.neg, sigmoid, tensor.neg])
252        # assert check_stack_trace(f, ops_to_check=sigmoid)
253
254        f = theano.function(
255            [x, y],
256            (sigmoid(x) * sigmoid(-y) * -tensor.exp(-x) *
257                tensor.exp(x * y) * tensor.exp(y)), mode=m)
258        topo = f.maker.fgraph.toposort()
259        for op, nb in [(sigmoid, 2), (tensor.mul, 2),
260                       (tensor.neg, 1), (tensor.exp, 1)]:
261            assert sum([n.op == op for n in topo]) == nb
262        # assert check_stack_trace(f, ops_to_check=[sigmoid, tensor.mul,
263        #                                           tensor.exp])
264
265    def test_perform_sigm_times_exp(self):
266        # Test the core function doing the `sigm_times_exp` optimization.
267        #
268        # It is easier to test different graph scenarios this way than by
269        # compiling a theano function.
270
271        x, y, z, t = tensor.vectors('x', 'y', 'z', 't')
272        exp = tensor.exp
273
274        def ok(expr1, expr2):
275            trees = [parse_mul_tree(e) for e in (expr1, expr2)]
276            perform_sigm_times_exp(trees[0])
277            trees[0] = simplify_mul(trees[0])
278            good = theano.gof.graph.is_same_graph(
279                compute_mul(trees[0]),
280                compute_mul(trees[1]))
281            if not good:
282                print(trees[0])
283                print(trees[1])
284                print('***')
285                theano.printing.debugprint(compute_mul(trees[0]))
286                print('***')
287                theano.printing.debugprint(compute_mul(trees[1]))
288            assert good
289        ok(sigmoid(x) * exp(-x), sigmoid(-x))
290        ok(-x * sigmoid(x) * (y * (-1 * z) * exp(-x)),
291           -x * sigmoid(-x) * (y * (-1 * z)))
292        ok(-sigmoid(-x) *
293           (exp(y) * (-exp(-z) * 3 * -exp(x)) *
294            (y * 2 * (-sigmoid(-y) * (z + t) * exp(z)) * sigmoid(z))) * -
295           sigmoid(x),
296           sigmoid(x) *
297           (-sigmoid(y) * (-sigmoid(-z) * 3) * (y * 2 * ((z + t) * exp(z)))) *
298           (-sigmoid(x)))
299        ok(exp(-x) * -exp(-x) * (-sigmoid(x) * -sigmoid(x)),
300           -sigmoid(-x) * sigmoid(-x))
301        ok(-exp(x) * -sigmoid(-x) * -exp(-x),
302           -sigmoid(-x))
303
304    def test_grad_log1msigm(self):
305        # At some point, this returned nan, because (1 - sigm(x)) was
306        # on both the numerator and the denominator of a fraction,
307        # but the two nodes in question had not been merged.
308        x = tensor.matrix('x')
309        lr = tensor.scalar('lr')
310
311        s = sigmoid(x)
312        l = T.log(1 - s)
313        c = l.mean()
314        ux = x - lr * theano.grad(c, x)
315
316        # Before the optimization, inf and NaN will be produced in the graph,
317        # and DebugMode will complain. Everything is fine afterwards.
318        mode = self.get_mode()
319        if not isinstance(mode, theano.compile.DebugMode):
320            f = theano.function([x, lr], ux, mode=mode)
321            ux_v = f([[50]], 0.1)
322            assert not np.isnan(ux_v)
323
324    def test_local_ultra_fast_sigmoid(self):
325        x = tensor.matrix('x')
326        s = sigmoid(x)
327
328        mode = self.get_mode('local_ultra_fast_sigmoid')
329        f = theano.function([x], s, mode=mode)
330        assert check_stack_trace(f, ops_to_check=sigmoid)
331        topo = f.maker.fgraph.toposort()
332        assert len(topo) == 1
333        assert topo[0].op == sigmoid
334
335        mode = self.get_mode().including('local_ultra_fast_sigmoid')
336        f = theano.function([x], s, mode=mode)
337        assert check_stack_trace(f, ops_to_check=ultra_fast_sigmoid)
338        topo = f.maker.fgraph.toposort()
339        assert topo[0].op == ultra_fast_sigmoid
340        assert len(topo) == 1
341        f([[-50, -10, -4, -1, 0, 1, 4, 10, 50]])
342
343    def test_local_hard_sigmoid(self):
344        x = tensor.matrix('x')
345        s = sigmoid(x)
346
347        mode = self.get_mode('local_hard_sigmoid')
348        f = theano.function([x], s, mode=mode)
349        assert check_stack_trace(f, ops_to_check=sigmoid)
350        topo = f.maker.fgraph.toposort()
351        assert topo[0].op == sigmoid
352        assert len(topo) == 1
353
354        mode = self.get_mode().including('local_hard_sigmoid')
355        f = theano.function([x], s, mode=mode)
356        topo = f.maker.fgraph.toposort()
357        assert not any([n.op == sigmoid for n in topo])
358        f([[-50, -10, -4, -1, 0, 1, 4, 10, 50]])
359
360        mode2 = mode.excluding('fusion').excluding('inplace')
361        f2 = theano.function([x], s, mode=mode2)
362        self.assertTrue(check_stack_trace(f2, ops_to_check=theano.tensor.clip))
363
364
365class T_softplus_opts(unittest.TestCase):
366    def setUp(self):
367        if theano.config.mode == 'FAST_COMPILE':
368            m = theano.compile.mode.get_mode('FAST_RUN').excluding(
369                'local_elemwise_fusion')
370        else:
371            m = theano.compile.mode.get_default_mode().excluding(
372                'local_elemwise_fusion')
373        self.m = m
374        utt.seed_rng()
375
376    def test_logsigm_to_softplus(self):
377        x = T.vector()
378
379        out = T.log(sigmoid(x))
380        f = theano.function([x], out, mode=self.m)
381
382        # Fix ticket #4581 first
383        # assert check_stack_trace(
384        #     f, ops_to_check=(theano.scalar.Neg,
385        #                      theano.tensor.nnet.sigm.ScalarSoftplus))
386        topo = f.maker.fgraph.toposort()
387        assert len(topo) == 3
388        assert isinstance(topo[0].op.scalar_op, theano.scalar.Neg)
389        assert isinstance(topo[1].op.scalar_op,
390                          theano.tensor.nnet.sigm.ScalarSoftplus)
391        assert isinstance(topo[2].op.scalar_op, theano.scalar.Neg)
392        f(np.random.rand(54).astype(config.floatX))
393
394    def test_log1msigm_to_softplus(self):
395        x = T.matrix()
396
397        out = T.log(1 - sigmoid(x))
398        f = theano.function([x], out, mode=self.m)
399        topo = f.maker.fgraph.toposort()
400        assert len(topo) == 2
401        assert isinstance(topo[0].op.scalar_op,
402                          theano.tensor.nnet.sigm.ScalarSoftplus)
403        assert isinstance(topo[1].op.scalar_op, theano.scalar.Neg)
404        # assert check_stack_trace(f, ops_to_check='all')
405        f(np.random.rand(54, 11).astype(config.floatX))
406
407        # Same test with a flatten
408        out = T.log(1 - T.flatten(sigmoid(x)))
409        f = theano.function([x], out, mode=self.m)
410
411        # assert check_stack_trace(f, ops_to_check='all')
412        topo = f.maker.fgraph.toposort()
413        assert len(topo) == 3
414        assert tensor.is_flat(topo[0].outputs[0])
415        assert isinstance(topo[1].op.scalar_op,
416                          theano.tensor.nnet.sigm.ScalarSoftplus)
417        assert isinstance(topo[2].op.scalar_op, theano.scalar.Neg)
418        f(np.random.rand(54, 11).astype(config.floatX))
419
420        # Same test with a reshape
421        out = T.log(1 - sigmoid(x).reshape([x.size]))
422        f = theano.function([x], out, mode=self.m)
423        topo = f.maker.fgraph.toposort()
424        # assert len(topo) == 3
425        assert any(isinstance(node.op, T.Reshape) for node in topo)
426        assert any(isinstance(getattr(node.op, 'scalar_op', None),
427                              theano.tensor.nnet.sigm.ScalarSoftplus)
428                   for node in topo)
429        f(np.random.rand(54, 11).astype(config.floatX))
430
431    def test_log1pexp_to_softplus(self):
432        m = theano.config.mode
433        if m == 'FAST_COMPILE':
434            m = 'FAST_RUN'
435
436        x = T.vector()
437
438        out = T.log(1 + T.exp(x))
439        f = theano.function([x], out, mode=self.m)
440
441        # Fix ticket #4581 first
442        # assert check_stack_trace(f, ops_to_check='all')
443        topo = f.maker.fgraph.toposort()
444        assert len(topo) == 1
445        assert isinstance(topo[0].op.scalar_op,
446                          theano.tensor.nnet.sigm.ScalarSoftplus)
447        f(np.random.rand(54).astype(config.floatX))
448
449
450class T_sigmoid_utils(unittest.TestCase):
451    """
452    Test utility functions found in 'sigm.py'.
453    """
454
455    def test_compute_mul(self):
456        x, y, z = tensor.vectors('x', 'y', 'z')
457        tree = (x * y) * -z
458        mul_tree = parse_mul_tree(tree)
459        assert parse_mul_tree(compute_mul(mul_tree)) == mul_tree
460        assert theano.gof.graph.is_same_graph(
461            compute_mul(parse_mul_tree(tree)), tree)
462
463    def test_parse_mul_tree(self):
464        x, y, z = tensor.vectors('x', 'y', 'z')
465        assert parse_mul_tree(x * y) == [False, [[False, x], [False, y]]]
466        assert parse_mul_tree(-(x * y)) == [True, [[False, x], [False, y]]]
467        assert parse_mul_tree(-x * y) == [False, [[True, x], [False, y]]]
468        assert parse_mul_tree(-x) == [True, x]
469        assert parse_mul_tree((x * y) * -z) == [
470            False, [[False, [[False, x], [False, y]]], [True, z]]]
471
472    def test_is_1pexp(self):
473        backup = config.warn.identify_1pexp_bug
474        config.warn.identify_1pexp_bug = False
475        try:
476            x = tensor.vector('x')
477            exp = tensor.exp
478            assert is_1pexp(1 + exp(x), False) == (False, x)
479            assert is_1pexp(exp(x) + 1, False) == (False, x)
480            for neg, exp_arg in imap(lambda x:
481                                     is_1pexp(x, only_process_constants=False),
482                                     [(1 + exp(-x)), (exp(-x) + 1)]):
483                assert not neg and theano.gof.graph.is_same_graph(exp_arg, -x)
484            assert is_1pexp(1 - exp(x), False) is None
485            assert is_1pexp(2 + exp(x), False) is None
486            assert is_1pexp(exp(x) + 2, False) is None
487            assert is_1pexp(exp(x) - 1, False) is None
488            assert is_1pexp(-1 + exp(x), False) is None
489            assert is_1pexp(1 + 2 * exp(x), False) is None
490        finally:
491            config.warn.identify_1pexp_bug = backup
492