AI-MXNet/t/test_autograd.t

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

use strict;
use warnings;
use AI::MXNet qw(mx);
use AI::MXNet::AutoGrad qw(autograd);
use AI::MXNet::TestUtils qw(same almost_equal rand_ndarray);
use AI::MXNet::Base qw(:DEFAULT pones);
use Test::More tests => 246;
$ENV{MXNET_STORAGE_FALLBACK_LOG_VERBOSE} = 0;
$ENV{MXNET_SUBGRAPH_VERBOSE} = 0;

sub autograd_assert
{
    my $kwargs = {};
    if(ref $_[-1] eq 'HASH') { $kwargs = pop(@_) };
    my @args = @_;
    my $func   = $kwargs->{func};
    my $grad_f = $kwargs->{grad_func};
    my $argnum = $kwargs->{argnum};
    my $grad_func = autograd->grad_and_loss($func, $argnum);
    my ($grad_vals, $output) = $grad_func->(@args);
    my $res = $func->(@args);
    ok(same($output->aspdl, $res->aspdl));
    my $grad_res = $grad_f->(@args);
    ok(@$grad_vals == @$grad_res);
    for(zip($grad_vals, $grad_res)) {
        my ($a, $b) = @$_;
        ok(same($a->aspdl, $b->aspdl));
    }
}

sub test_unary_func
{
    my $check_unary_func = sub {
        my ($x) = @_;
        my $f_exp       = sub { $_[0]->exp };
        my $f_exp_grad  = sub { [$_[0]->exp] };
        autograd_assert($x, { func => $f_exp, grad_func => $f_exp_grad });
        my $f_half      = sub { $_[0]/2 };
        my $f_half_grad = sub { [mx->nd->ones($_[0]->shape) * 0.5] };
        autograd_assert($x, { func => $f_half, grad_func => $f_half_grad });
        my $f_square    = sub { $_[0]**2 };
        my $f_square_grad = sub { [2*$_[0]] };
        autograd_assert($x, { func => $f_square, grad_func => $f_square_grad });
    };
    my $uniform = mx->nd->uniform(shape=>[4, 5]);
    $check_unary_func->($uniform);
    my $stypes = ['row_sparse', 'csr', 'default'];
    for my $stype (@$stypes)
    {
        $check_unary_func->($uniform->tostype($stype));
    }
}

test_unary_func();

sub test_binary_func
{
    my $check_binary_func = sub {
        my ($x, $y) = @_;
        my $f_add      = sub { $_[0]+$_[1] };
        my $f_add_grad = sub { [map { mx->nd->ones($_->shape) } @_] };
        autograd_assert($x, $y, { func => $f_add, grad_func => $f_add_grad });
        my $f_mul      = sub { $_[0]*$_[1] };
        my $f_mul_grad = sub { [reverse(@_)] };
        autograd_assert($x, $y, { func => $f_mul, grad_func => $f_mul_grad });
        my $f_compose  = sub { $_[0]+$_[0]*$_[1] };
        my $f_compose_grad = sub { [mx->nd->ones($_[0]->shape) + $y, $x] };
        autograd_assert($x, $y, { func => $f_compose, grad_func => $f_compose_grad });
    };
    my $uniform_x = mx->nd->uniform(shape=>[4, 5]);
    my $uniform_y = mx->nd->uniform(shape=>[4, 5]);
    $check_binary_func->($uniform_x, $uniform_y);
    my $stypes = ['row_sparse', 'csr', 'default'];
    for my $stype_x (@$stypes)
    {
        for my $stype_y (@$stypes)
        {
            my $x = $uniform_x->tostype($stype_x);
            my $y = $uniform_y->tostype($stype_y);
            $check_binary_func->($x, $y);
        }
    }
}

test_binary_func();

sub test_operator_with_state
{
    my $f_fc = sub {
        my ($a, $b, $weight, $bias) = @_;
        my $x = $a*$b;
        my $fc = mx->nd->FullyConnected(
            $x, $weight, $bias, num_hidden=>32);
        return $fc;
    };

    my $a = mx->nd->uniform(shape=>[64, 50]);
    my $b = mx->nd->uniform(shape=>[64, 50]);
    my $weight = mx->nd->uniform(shape=>[32, 50]);
    my $bias = mx->nd->uniform(shape=>[32]);

    my $grad_func = autograd->grad_and_loss($f_fc);
    my ($grad_vals, $outputs) = $grad_func->($a, $b, $weight, $bias);
}

test_operator_with_state();

sub test_argnum
{
    my $f_with_mode = sub {
        my ($a, $b, $mode) = @_;
        if($mode)
        {
            return $a+$b;
        }
        else
        {
            return $a*$b;
        }
    };
    my $a = mx->nd->uniform(shape=>[3, 2]);
    my $b = mx->nd->uniform(shape=>[3, 2]);
    my $f_add_grad = sub { [map { mx->nd->ones($_->shape) } @_[0,1]] };
    my $f_mul_grad = sub { [reverse(@_[0,1])] };
    autograd_assert($a, $b, 1,
        { argnum=>[0, 1], func=>$f_with_mode, grad_func=>$f_add_grad });
    autograd_assert($a, $b, 0,
        { argnum=>[0, 1], func=>$f_with_mode, grad_func=>$f_mul_grad });
}

test_argnum();

sub test_training
{
    my $x = mx->nd->ones([10, 10]);
    autograd->record(sub {
        my $y = mx->nd->Dropout($x, p=>0.5);
        ok(not ($y->aspdl == $x->aspdl)->all);
        autograd->pause(sub {
            my $y = mx->nd->Dropout($x, p=>0.5);
            ok(($y->aspdl == $x->aspdl)->all);
        });
    });
}

test_training();

sub test_out_grads
{
    my $x = mx->nd->ones([3, 5]);
    my $dx = mx->nd->zeros_like($x);
    autograd->mark_variables([$x], [$dx]);
    my $da;
    my $db = mx->nd->array([1,2,3,4,5]);
    my $dc = mx->nd->array([5,4,3,2,1]);

    autograd->record(sub {
        my ($a, $b, $c) = @{ $x };
        autograd->backward([$a, $b, $c], head_grads => [$da, $db, $dc]);
    });
    ok(($dx->aspdl == pdl(
        [[1,1,1,1,1],
         [1,2,3,4,5],
         [5,4,3,2,1]]))->all);
}

test_out_grads();

sub test_detach_updated_grad
{
    my $x = mx->nd->ones([2, 2]);
    my $dx = mx->nd->zeros_like($x);
    my $y = mx->nd->ones_like($x);
    my $dy = mx->nd->zeros_like($x);
    autograd->mark_variables([$x, $y], [$dx, $dy]);
    ok($x->_fresh_grad == 0);
    ok($y->_fresh_grad == 0);

    autograd->record(sub {
        my $x2 = $x + 2;
        my $y2  = $x2 + $y;
        $y2->backward();
    });
    ok(($dx->aspdl == 1)->all);
    ok($x->_fresh_grad == 1);
    ok($y->_fresh_grad == 1);

    $dx .= 0;
    $x->_fresh_grad(0);
    $y->_fresh_grad(0);
    ok($x->_fresh_grad == 0);
    ok($y->_fresh_grad == 0);

    autograd->record(sub {
        my $x2 = $x + 2;
        $x2 = $x2->detach;
        my $y2  = $x2 + $y;
        $y2->backward();
    });
    ok(($dx->aspdl == 0)->all);
    ok($x->_fresh_grad == 0);
    ok($y->_fresh_grad == 1);
}

test_detach_updated_grad();

sub test_retain_grad
{
    my $x = mx->nd->ones([2, 2]);
    my $dx = mx->nd->zeros([2, 2]);
    autograd->mark_variables([$x], [$dx], grad_reqs=>'add');
    autograd->record(sub {
        my $y = $x + 1;
        $y->backward(retain_graph=>0);
    });
    ok(($dx->aspdl == 1)->all);

    $dx .= 0;
    autograd->record(sub {
        my $y = $x + 1;
        $y->backward(retain_graph=>1);
        $y->backward(retain_graph=>0);
    });
    ok(($dx->aspdl == 2)->all);
    no warnings;
    open(CPERR, ">&STDERR");
    open(STDERR, ">/dev/null");
    eval {
        autograd->record(sub {
            my $y = $x + 1;
            $y->backward();
            $y->backward();
        });
    };
    open(STDERR, ">&CPERR");
    ok($@);
}

test_retain_grad();

sub test_attach_grad
{
    my $check_attach_grad = sub {
        my ($x) = @_;
        ok(not defined $x->grad);
        $x->attach_grad();
        autograd->record(sub {
            my $y = $x * 2;
            ok(not defined $y->grad);
            $y->backward;
        });
        ok(($x->grad->aspdl == 2)->all);
    };
    my $zeros = mx->nd->zeros([10, 10]);
    $check_attach_grad->($zeros);
    my @stypes = ('default', 'row_sparse', 'csr');
    for my $stype (@stypes)
    {
        my $x = $zeros->tostype($stype);
        $check_attach_grad->($x);
    }
}

test_attach_grad();

sub test_is_train
{
    my $x = mx->nd->ones([10, 10]);
    $x->attach_grad();
    autograd->record(sub {
        ok(autograd->is_recording());
        ok(autograd->is_training());
        my $y = mx->nd->Dropout($x, p=>0.5);
        ok($y->aspdl->max == 2 and $y->aspdl->min == 0);
        $y->backward();
        ok(($x->grad->aspdl == $y->aspdl)->all);
        autograd->predict_mode(sub {
            ok(autograd->is_recording());
            ok(not autograd->is_training());
            my $y = mx->nd->Dropout($x, p=>0.5);
            ok(($y->aspdl == $x->aspdl)->all);
            $y->backward(train_mode=>0);
            ok(($x->grad->aspdl == $x->aspdl)->all);
        });
    }, train_mode => 1);

    autograd->record(sub {
        ok(autograd->is_recording());
        ok(not autograd->is_training());
        my $y = mx->nd->Dropout($x, p=>0.5);
        ok(($y->aspdl == $x->aspdl)->all);
        $y->backward(train_mode=>0);
        ok(($x->grad->aspdl == $x->aspdl)->all);

        autograd->train_mode(sub {
            ok(autograd->is_recording);
            ok(autograd->is_training);
            my $y = mx->nd->Dropout($x, p=>0.5);
            ok($y->aspdl->max == 2 and $y->aspdl->min == 0);
            $y->backward;
            ok(($x->grad->aspdl == $y->aspdl)->all);
        });
    }, train_mode => 0);

    ok(not autograd->is_recording);
    ok(not autograd->is_training);
    my $y = mx->nd->Dropout($x, p=>0.5);
    ok(($y->aspdl == $x->aspdl)->all);

    autograd->train_mode(sub {
        ok(not autograd->is_recording);
        ok(autograd->is_training);
        my $y = mx->nd->Dropout($x, p=>0.5);
        ok($y->aspdl->max == 2 and $y->aspdl->min == 0);
    });
}

test_is_train();

sub test_get_symbol
{
    my $x = mx->nd->ones([1]);
    $x->attach_grad;
    my $y;
    autograd->record(sub {
        $y = $x*$x + 2*$x - 1;
    });
    ok(@{ autograd->get_symbol($y)->list_arguments } == 1);

    my $z = mx->nd->ones([1]);
    $z->attach_grad;
    autograd->record(sub {
        $y = $x*$x + 2*$z - 1;
    });
    ok(@{ autograd->get_symbol($y)->list_arguments } == 2);
}

test_get_symbol();

sub test_gradient
{
    my $x = mx->nd->ones([1]);
    $x->attach_grad;
    my $z;
    mx->autograd->record(sub {
        $z = mx->nd->elemwise_add($x->exp, $x);
    });
    my $dx = mx->autograd->grad($z, $x, create_graph=>1);
    ok(abs($dx->asscalar - 3.71828175) < 1e-7);
    $dx->backward;
    ok(abs($x->grad->asscalar - 2.71828175) < 1e-7);
}

test_gradient();

sub test_grad_with_stype
{
    my $check_grad_with_stype = sub { my ($array_stype, $grad_stype, $expected_stype) = @_;
        my $x = mx->nd->zeros([1, 1], stype=>$array_stype);
        $x->attach_grad(stype=>$grad_stype);
        # check grad attached
        ok($x->grad->stype eq $expected_stype);
        my $y = $x->detach();
        # check array detached
        ok($y->stype eq $array_stype);
    };
    my @stypes = ('default', 'csr', 'row_sparse');
    for my $stype (@stypes)
    {
        # check the default stype of the gradient (same as the array stype)
        $check_grad_with_stype->($stype, undef, $stype);
        for my $grad_stype (@stypes)
        {
            # check the stype of the gradient when provided
            $check_grad_with_stype->($stype, $grad_stype, $grad_stype);
        }
    }
}

test_grad_with_stype();

sub test_sparse_dot_grad
{
    my $check_sparse_dot_grad = sub { my ($rhs) = @_;
        my $lhs = rand_ndarray([2, 8], 'csr');
        my $y;
        mx->autograd->record(sub {
            $y = mx->nd->dot($lhs, $rhs);
        });
        $y->backward();
        my $grad = $rhs->grad;
        my $grad_pdl = $lhs->aspdl->transpose x pones($rhs->shape->[1], $lhs->shape->[0]);
        ok($grad->stype eq 'row_sparse');
        ok(almost_equal($grad->aspdl, $grad_pdl));
    };

    # check grad with row_sparse weight
    my $shape = [8, 3];
    my $rsp = mx->nd->ones($shape)->tostype('row_sparse');
    $rsp->attach_grad();
    $check_sparse_dot_grad->($rsp);

    # check grad with dense weight
    my $dns = mx->nd->ones($shape);
    $dns->attach_grad(stype=>'row_sparse');
    $check_sparse_dot_grad->($dns);
}

test_sparse_dot_grad();