1# Licensed to the Apache Software Foundation (ASF) under one 2# or more contributor license agreements. See the NOTICE file 3# distributed with this work for additional information 4# regarding copyright ownership. The ASF licenses this file 5# to you under the Apache License, Version 2.0 (the 6# "License"); you may not use this file except in compliance 7# with the License. You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, 12# software distributed under the License is distributed on an 13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14# KIND, either express or implied. See the License for the 15# specific language governing permissions and limitations 16# under the License. 17 18 19############################################################# 20## Please read the README.md document for better reference ## 21############################################################# 22from __future__ import print_function 23 24import logging 25import random 26 27import mxnet as mx 28import numpy as np 29from sklearn.datasets import fetch_mldata 30from sklearn.decomposition import PCA 31 32 33logger = logging.getLogger() 34logger.setLevel(logging.DEBUG) 35 36np.random.seed(1234) # set seed for deterministic ordering 37mx.random.seed(1234) 38random.seed(1234) 39 40# Network declaration as symbols. The following pattern was based 41# on the article, but feel free to play with the number of nodes 42# and with the activation function 43data = mx.symbol.Variable('data') 44fc1 = mx.symbol.FullyConnected(data = data, name='fc1', num_hidden=512) 45act1 = mx.symbol.Activation(data = fc1, name='relu1', act_type="relu") 46fc2 = mx.symbol.FullyConnected(data = act1, name = 'fc2', num_hidden = 512) 47act2 = mx.symbol.Activation(data = fc2, name='relu2', act_type="relu") 48fc3 = mx.symbol.FullyConnected(data = act2, name='fc3', num_hidden=10) 49 50# Here we add the ultimate layer based on L2-SVM objective 51mlp_svm_l2 = mx.symbol.SVMOutput(data=fc3, name='svm_l2') 52 53# With L1-SVM objective 54mlp_svm_l1 = mx.symbol.SVMOutput(data=fc3, name='svm_l1', use_linear=True) 55 56# Compare with softmax cross entropy loss 57mlp_softmax = mx.symbol.SoftmaxOutput(data=fc3, name='softmax') 58 59print("Preparing data...") 60mnist_data = mx.test_utils.get_mnist() 61X = np.concatenate([mnist_data['train_data'], mnist_data['test_data']]) 62Y = np.concatenate([mnist_data['train_label'], mnist_data['test_label']]) 63X = X.reshape((X.shape[0], -1)).astype(np.float32) * 255 64 65# Now we fetch MNIST dataset, add some noise, as the article suggests, 66# permutate and assign the examples to be used on our network 67mnist_pca = PCA(n_components=70).fit_transform(X) 68noise = np.random.normal(size=mnist_pca.shape) 69mnist_pca += noise 70p = np.random.permutation(mnist_pca.shape[0]) 71X = mnist_pca[p] / 255. 72Y = Y[p] 73X_show = X[p] 74 75# This is just to normalize the input and separate train set and test set 76X_train = X[:60000] 77X_test = X[60000:] 78X_show = X_show[60000:] 79Y_train = Y[:60000] 80Y_test = Y[60000:] 81print("Data prepared.") 82# Article's suggestion on batch size 83batch_size = 200 84 85ctx = mx.gpu() if mx.context.num_gpus() > 0 else mx.cpu() 86 87results = {} 88for output in [mlp_svm_l2, mlp_svm_l1, mlp_softmax]: 89 90 print("\nTesting with %s \n" % output.name) 91 92 label = output.name + "_label" 93 94 train_iter = mx.io.NDArrayIter(X_train, Y_train, batch_size=batch_size, label_name=label) 95 test_iter = mx.io.NDArrayIter(X_test, Y_test, batch_size=batch_size, label_name=label) 96 97 # Here we instatiate and fit the model for our data 98 # The article actually suggests using 400 epochs, 99 # But I reduced to 10, for convenience 100 101 mod = mx.mod.Module( 102 context = ctx, 103 symbol = output, # Use the network we just defined 104 label_names = [label], 105 ) 106 mod.fit( 107 train_data=train_iter, 108 eval_data=test_iter, # Testing data set. MXNet computes scores on test set every epoch 109 batch_end_callback = mx.callback.Speedometer(batch_size, 200), # Logging module to print out progress 110 num_epoch = 10, # Train for 10 epochs 111 optimizer_params = { 112 'learning_rate': 0.1, # Learning rate 113 'momentum': 0.9, # Momentum for SGD with momentum 114 'wd': 0.00001, # Weight decay for regularization 115 }) 116 results[output.name] = mod.score(test_iter, mx.metric.Accuracy())[0][1]*100 117 print('Accuracy for %s:'%output.name, mod.score(test_iter, mx.metric.Accuracy())[0][1]*100, '%\n') 118 119for key, value in results.items(): 120 print(key, value, "%s") 121 122#svm_l2 97.85 %s 123#svm_l1 98.15 %s 124#softmax 97.69 %s 125