1# Licensed to the Apache Software Foundation (ASF) under one
2# or more contributor license agreements.  See the NOTICE file
3# distributed with this work for additional information
4# regarding copyright ownership.  The ASF licenses this file
5# to you under the Apache License, Version 2.0 (the
6# "License"); you may not use this file except in compliance
7# with the License.  You may obtain a copy of the License at
8#
9#   http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing,
12# software distributed under the License is distributed on an
13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14# KIND, either express or implied.  See the License for the
15# specific language governing permissions and limitations
16# under the License.
17
18
19#############################################################
20## Please read the README.md document for better reference ##
21#############################################################
22from __future__ import print_function
23
24import logging
25import random
26
27import mxnet as mx
28import numpy as np
29from sklearn.datasets import fetch_mldata
30from sklearn.decomposition import PCA
31
32
33logger = logging.getLogger()
34logger.setLevel(logging.DEBUG)
35
36np.random.seed(1234) # set seed for deterministic ordering
37mx.random.seed(1234)
38random.seed(1234)
39
40# Network declaration as symbols. The following pattern was based
41# on the article, but feel free to play with the number of nodes
42# and with the activation function
43data = mx.symbol.Variable('data')
44fc1  = mx.symbol.FullyConnected(data = data, name='fc1', num_hidden=512)
45act1 = mx.symbol.Activation(data = fc1, name='relu1', act_type="relu")
46fc2  = mx.symbol.FullyConnected(data = act1, name = 'fc2', num_hidden = 512)
47act2 = mx.symbol.Activation(data = fc2, name='relu2', act_type="relu")
48fc3  = mx.symbol.FullyConnected(data = act2, name='fc3', num_hidden=10)
49
50# Here we add the ultimate layer based on L2-SVM objective
51mlp_svm_l2 = mx.symbol.SVMOutput(data=fc3, name='svm_l2')
52
53# With L1-SVM objective
54mlp_svm_l1 = mx.symbol.SVMOutput(data=fc3, name='svm_l1', use_linear=True)
55
56# Compare with softmax cross entropy loss
57mlp_softmax = mx.symbol.SoftmaxOutput(data=fc3, name='softmax')
58
59print("Preparing data...")
60mnist_data = mx.test_utils.get_mnist()
61X = np.concatenate([mnist_data['train_data'], mnist_data['test_data']])
62Y = np.concatenate([mnist_data['train_label'], mnist_data['test_label']])
63X = X.reshape((X.shape[0], -1)).astype(np.float32) * 255
64
65# Now we fetch MNIST dataset, add some noise, as the article suggests,
66# permutate and assign the examples to be used on our network
67mnist_pca = PCA(n_components=70).fit_transform(X)
68noise = np.random.normal(size=mnist_pca.shape)
69mnist_pca += noise
70p = np.random.permutation(mnist_pca.shape[0])
71X = mnist_pca[p] / 255.
72Y = Y[p]
73X_show = X[p]
74
75# This is just to normalize the input and separate train set and test set
76X_train = X[:60000]
77X_test = X[60000:]
78X_show = X_show[60000:]
79Y_train = Y[:60000]
80Y_test = Y[60000:]
81print("Data prepared.")
82# Article's suggestion on batch size
83batch_size = 200
84
85ctx = mx.gpu() if mx.context.num_gpus() > 0 else mx.cpu()
86
87results = {}
88for output in [mlp_svm_l2, mlp_svm_l1, mlp_softmax]:
89
90    print("\nTesting with %s \n" % output.name)
91
92    label = output.name + "_label"
93
94    train_iter = mx.io.NDArrayIter(X_train, Y_train, batch_size=batch_size, label_name=label)
95    test_iter = mx.io.NDArrayIter(X_test, Y_test, batch_size=batch_size, label_name=label)
96
97    # Here we instatiate and fit the model for our data
98    # The article actually suggests using 400 epochs,
99    # But I reduced to 10, for convenience
100
101    mod = mx.mod.Module(
102        context = ctx,
103        symbol = output,         # Use the network we just defined
104        label_names = [label],
105    )
106    mod.fit(
107        train_data=train_iter,
108        eval_data=test_iter,  # Testing data set. MXNet computes scores on test set every epoch
109        batch_end_callback = mx.callback.Speedometer(batch_size, 200),  # Logging module to print out progress
110        num_epoch = 10,       # Train for 10 epochs
111        optimizer_params = {
112            'learning_rate': 0.1,  # Learning rate
113            'momentum': 0.9,       # Momentum for SGD with momentum
114            'wd': 0.00001,         # Weight decay for regularization
115        })
116    results[output.name] = mod.score(test_iter, mx.metric.Accuracy())[0][1]*100
117    print('Accuracy for %s:'%output.name, mod.score(test_iter, mx.metric.Accuracy())[0][1]*100, '%\n')
118
119for key, value in results.items():
120    print(key, value, "%s")
121
122#svm_l2 97.85 %s
123#svm_l1 98.15 %s
124#softmax 97.69 %s
125