Using Gluon with Xfer¶
This notebook demonstrates how to use neural networks defined and trained with Gluon as source models for Transfer Learning with Xfer.
TL;DR Gluon models can be used with Xfer provided they use HybridBlocks so that the symbol can be extracted.
This demo is a dummy example where a CNN source model is trained on MNIST using Gluon and then repurposed for MNIST again. This is obviously redundant but shows the steps required to use Gluon with Xfer.
In [1]:
import numpy as np
import mxnet as mx
from mxnet import nd, autograd, gluon
mx.random.seed(1)
import time
from sklearn.metrics import classification_report
from scipy import io as scipyio
import urllib.request
import zipfile
import os
import logging
import xfer
Train CNN with gluon¶
Using code taken from The Straight Dope
In [2]:
ctx = mx.cpu()
In [3]:
batch_size = 64
num_inputs = 784
num_outputs = 10
def transform(data, label):
return nd.transpose(data.astype(np.float32), (2,0,1))/255, label.astype(np.float32)
train_data = gluon.data.DataLoader(gluon.data.vision.MNIST(train=True, transform=transform),
batch_size, shuffle=True)
test_data = gluon.data.DataLoader(gluon.data.vision.MNIST(train=False, transform=transform),
batch_size, shuffle=False)
In [4]:
num_fc = 512
net = gluon.nn.HybridSequential()
with net.name_scope():
net.add(gluon.nn.Conv2D(channels=20, kernel_size=5, activation='relu'))
net.add(gluon.nn.MaxPool2D(pool_size=2, strides=2))
net.add(gluon.nn.Conv2D(channels=50, kernel_size=5, activation='relu'))
net.add(gluon.nn.MaxPool2D(pool_size=2, strides=2))
# The Flatten layer collapses all axis, except the first one, into one axis.
net.add(gluon.nn.Flatten())
net.add(gluon.nn.Dense(num_fc, activation="relu"))
net.add(gluon.nn.Dense(num_outputs))
In [5]:
net.collect_params().initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx)
In [6]:
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()
In [7]:
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': .1})
In [8]:
net.hybridize()
In [9]:
def evaluate_accuracy(data_iterator, net):
acc = mx.metric.Accuracy()
for i, (data, label) in enumerate(data_iterator):
data = data.as_in_context(ctx)
label = label.as_in_context(ctx)
output = net(data)
predictions = nd.argmax(output, axis=1)
acc.update(preds=predictions, labels=label)
return acc.get()[1]
In [10]:
epochs = 1
smoothing_constant = .01
for e in range(epochs):
start_time_train = time.time()
for i, (data, label) in enumerate(train_data):
data = data.as_in_context(ctx)
label = label.as_in_context(ctx)
with autograd.record():
output = net(data)
loss = softmax_cross_entropy(output, label)
loss.backward()
trainer.step(data.shape[0])
##########################
# Keep a moving average of the losses
##########################
curr_loss = nd.mean(loss).asscalar()
moving_loss = (curr_loss if ((i == 0) and (e == 0))
else (1 - smoothing_constant) * moving_loss + smoothing_constant * curr_loss)
end_time_train = time.time()
start_time_eval = time.time()
test_accuracy = evaluate_accuracy(test_data, net)
train_accuracy = evaluate_accuracy(train_data, net)
end_time_eval = time.time()
epoch_time = end_time_train - start_time_train
eval_time = end_time_eval - start_time_eval
print("Epoch {}.\nLoss: {}, Train_acc {}, Test_acc {}, Epoch_time {}, Eval_time {}".format(e, moving_loss, train_accuracy, test_accuracy, epoch_time, eval_time))
Epoch 0.
Loss: 0.11107716270094219, Train_acc 0.9745833333333334, Test_acc 0.9742, Epoch_time 54.26378679275513, Eval_time 23.154165029525757
Load MNIST dataset¶
Load MNIST into data iterators
In [19]:
mnist = mx.test_utils.get_mnist()
In [20]:
train_iter = mx.io.NDArrayIter(mnist['train_data'], mnist['train_label'], batch_size, shuffle=True)
val_iter = mx.io.NDArrayIter(mnist['test_data'], mnist['test_label'], batch_size)
Convert Gluon model net
to Module¶
Adapted from snippet found `here <https://github.com/apache/incubator-mxnet/issues/9374>`__
From the Gluon model, the symbol and parameters are extracted and used
to define an Module
object.
In [21]:
def block2symbol(block):
data = mx.sym.Variable('data')
sym = block(data)
args = {}
auxs = {}
for k, v in block.collect_params().items():
args[k] = mx.nd.array(v.data().asnumpy())
auxs[k] = mx.nd.array(v.data().asnumpy())
return sym, args, auxs
In [22]:
def symbol2mod(sym, args, auxs, data_iter):
mx_sym = mx.sym.SoftmaxOutput(data=sym, name='softmax')
model = mx.mod.Module(symbol=mx_sym, context=mx.cpu(),
label_names=['softmax_label'])
model.bind( data_shapes = data_iter.provide_data,
label_shapes = data_iter.provide_label )
model.set_params(args, auxs)
return model
In [23]:
sym_params = block2symbol(net)
In [24]:
net_mod = symbol2mod(*sym_params, train_iter)
Alternative Method¶
Serialise Gluon model to file using .export()
.
Load the serialised model as an MXNet Module with Module.load()
so
that xfer can be used.
In [25]:
# model_name = 'gluon-model'
# net.export(model_name)
# mod = mx.mod.Module.load(model_name, 0, label_names=[])
# os.remove(model_name+'-symbol.json')
# os.remove(model_name+'-0000.params')
Inspect Module¶
In [26]:
mh = xfer.model_handler.ModelHandler(net_mod)
In [27]:
mh.layer_names
Out[27]:
['hybridsequential0_conv0_fwd',
'hybridsequential0_conv0_relu_fwd',
'hybridsequential0_pool0_fwd',
'hybridsequential0_conv1_fwd',
'hybridsequential0_conv1_relu_fwd',
'hybridsequential0_pool1_fwd',
'hybridsequential0_flatten0_reshape0',
'hybridsequential0_dense0_fwd',
'hybridsequential0_dense0_relu_fwd',
'hybridsequential0_dense1_fwd',
'softmax']
Neural Network Repurposer¶
In [28]:
repFT = xfer.NeuralNetworkFineTuneRepurposer(source_model=net_mod,
transfer_layer_name='hybridsequential0_dense0_relu_fwd',
target_class_count=26, num_epochs=2)
In [29]:
repFT.repurpose(train_iter)
WARNING:root:Already bound, ignoring bind()
/anaconda/envs/xfer-env/lib/python3.6/site-packages/mxnet/module/base_module.py:488: UserWarning: Parameters already initialized and force_init=False. init_params call ignored.
allow_missing=allow_missing, force_init=force_init)
In [30]:
predictionsFT = repFT.predict_label(val_iter)
In [32]:
print(classification_report(mnist['test_label'], predictionsFT,
digits=3))
precision recall f1-score support
0 0.960 0.990 0.975 980
1 0.984 0.989 0.986 1135
2 0.968 0.965 0.967 1032
3 0.967 0.972 0.970 1010
4 0.977 0.971 0.974 982
5 0.975 0.975 0.975 892
6 0.974 0.966 0.970 958
7 0.970 0.959 0.964 1028
8 0.966 0.966 0.966 974
9 0.966 0.954 0.960 1009
avg / total 0.971 0.971 0.971 10000
Meta-model Repurposer¶
In [33]:
repLR = xfer.LrRepurposer(source_model=net_mod, feature_layer_names=['hybridsequential0_dense0_fwd'])
In [34]:
repLR.repurpose(train_iter)
/anaconda/envs/xfer-env/lib/python3.6/site-packages/sklearn/linear_model/sag.py:326: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge
"the coef_ did not converge", ConvergenceWarning)
In [35]:
predictionsLR = repLR.predict_label(val_iter)
In [36]:
print(classification_report(mnist['test_label'], predictionsLR,
digits=3))
precision recall f1-score support
0 0.990 0.993 0.991 980
1 0.991 0.996 0.993 1135
2 0.985 0.989 0.987 1032
3 0.987 0.989 0.988 1010
4 0.992 0.990 0.991 982
5 0.979 0.982 0.980 892
6 0.990 0.984 0.987 958
7 0.983 0.985 0.984 1028
8 0.987 0.986 0.986 974
9 0.989 0.977 0.983 1009
avg / total 0.987 0.987 0.987 10000
In [ ]: