Using Gluon with Xfer

This notebook demonstrates how to use neural networks defined and trained with Gluon as source models for Transfer Learning with Xfer.

TL;DR Gluon models can be used with Xfer provided they use HybridBlocks so that the symbol can be extracted.

This demo is a dummy example where a CNN source model is trained on MNIST using Gluon and then repurposed for MNIST again. This is obviously redundant but shows the steps required to use Gluon with Xfer.

In [1]:
import numpy as np
import mxnet as mx
from mxnet import nd, autograd, gluon
mx.random.seed(1)

import time
from sklearn.metrics import classification_report
from scipy import io as scipyio
import urllib.request
import zipfile
import os
import logging

import xfer

Train CNN with gluon

Using code taken from The Straight Dope

In [2]:
ctx = mx.cpu()
In [3]:
batch_size = 64
num_inputs = 784
num_outputs = 10
def transform(data, label):
    return nd.transpose(data.astype(np.float32), (2,0,1))/255, label.astype(np.float32)
train_data = gluon.data.DataLoader(gluon.data.vision.MNIST(train=True, transform=transform),
                                      batch_size, shuffle=True)
test_data = gluon.data.DataLoader(gluon.data.vision.MNIST(train=False, transform=transform),
                                     batch_size, shuffle=False)
In [4]:
num_fc = 512
net = gluon.nn.HybridSequential()
with net.name_scope():
    net.add(gluon.nn.Conv2D(channels=20, kernel_size=5, activation='relu'))
    net.add(gluon.nn.MaxPool2D(pool_size=2, strides=2))
    net.add(gluon.nn.Conv2D(channels=50, kernel_size=5, activation='relu'))
    net.add(gluon.nn.MaxPool2D(pool_size=2, strides=2))
    # The Flatten layer collapses all axis, except the first one, into one axis.
    net.add(gluon.nn.Flatten())
    net.add(gluon.nn.Dense(num_fc, activation="relu"))
    net.add(gluon.nn.Dense(num_outputs))
In [5]:
net.collect_params().initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx)
In [6]:
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()
In [7]:
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': .1})
In [8]:
net.hybridize()
In [9]:
def evaluate_accuracy(data_iterator, net):
    acc = mx.metric.Accuracy()
    for i, (data, label) in enumerate(data_iterator):
        data = data.as_in_context(ctx)
        label = label.as_in_context(ctx)
        output = net(data)
        predictions = nd.argmax(output, axis=1)
        acc.update(preds=predictions, labels=label)
    return acc.get()[1]
In [10]:
epochs = 1
smoothing_constant = .01

for e in range(epochs):
    start_time_train = time.time()
    for i, (data, label) in enumerate(train_data):
        data = data.as_in_context(ctx)
        label = label.as_in_context(ctx)
        with autograd.record():
            output = net(data)
            loss = softmax_cross_entropy(output, label)
        loss.backward()
        trainer.step(data.shape[0])

        ##########################
        #  Keep a moving average of the losses
        ##########################
        curr_loss = nd.mean(loss).asscalar()
        moving_loss = (curr_loss if ((i == 0) and (e == 0))
                       else (1 - smoothing_constant) * moving_loss + smoothing_constant * curr_loss)
    end_time_train = time.time()

    start_time_eval = time.time()
    test_accuracy = evaluate_accuracy(test_data, net)
    train_accuracy = evaluate_accuracy(train_data, net)
    end_time_eval = time.time()

    epoch_time = end_time_train - start_time_train
    eval_time = end_time_eval - start_time_eval
    print("Epoch {}.\nLoss: {}, Train_acc {}, Test_acc {}, Epoch_time {}, Eval_time {}".format(e, moving_loss, train_accuracy, test_accuracy, epoch_time, eval_time))
Epoch 0.
Loss: 0.11107716270094219, Train_acc 0.9745833333333334, Test_acc 0.9742, Epoch_time 54.26378679275513, Eval_time 23.154165029525757

Load MNIST dataset

Load MNIST into data iterators

In [19]:
mnist = mx.test_utils.get_mnist()
In [20]:
train_iter = mx.io.NDArrayIter(mnist['train_data'], mnist['train_label'], batch_size, shuffle=True)
val_iter = mx.io.NDArrayIter(mnist['test_data'], mnist['test_label'], batch_size)

Convert Gluon model net to Module

Adapted from snippet found `here <https://github.com/apache/incubator-mxnet/issues/9374>`__

From the Gluon model, the symbol and parameters are extracted and used to define an Module object.

In [21]:
def block2symbol(block):
    data = mx.sym.Variable('data')
    sym = block(data)
    args = {}
    auxs = {}
    for k, v in block.collect_params().items():
        args[k] = mx.nd.array(v.data().asnumpy())
        auxs[k] = mx.nd.array(v.data().asnumpy())
    return sym, args, auxs
In [22]:
def symbol2mod(sym, args, auxs, data_iter):
    mx_sym = mx.sym.SoftmaxOutput(data=sym, name='softmax')
    model = mx.mod.Module(symbol=mx_sym, context=mx.cpu(),
                          label_names=['softmax_label'])
    model.bind( data_shapes = data_iter.provide_data,
                label_shapes = data_iter.provide_label )
    model.set_params(args, auxs)
    return model
In [23]:
sym_params = block2symbol(net)
In [24]:
net_mod = symbol2mod(*sym_params, train_iter)

Alternative Method

Serialise Gluon model to file using .export().

Load the serialised model as an MXNet Module with Module.load() so that xfer can be used.

In [25]:
# model_name = 'gluon-model'
# net.export(model_name)

# mod = mx.mod.Module.load(model_name, 0, label_names=[])
# os.remove(model_name+'-symbol.json')
# os.remove(model_name+'-0000.params')

Inspect Module

In [26]:
mh = xfer.model_handler.ModelHandler(net_mod)
In [27]:
mh.layer_names
Out[27]:
['hybridsequential0_conv0_fwd',
 'hybridsequential0_conv0_relu_fwd',
 'hybridsequential0_pool0_fwd',
 'hybridsequential0_conv1_fwd',
 'hybridsequential0_conv1_relu_fwd',
 'hybridsequential0_pool1_fwd',
 'hybridsequential0_flatten0_reshape0',
 'hybridsequential0_dense0_fwd',
 'hybridsequential0_dense0_relu_fwd',
 'hybridsequential0_dense1_fwd',
 'softmax']

Neural Network Repurposer

In [28]:
repFT = xfer.NeuralNetworkFineTuneRepurposer(source_model=net_mod,
                                             transfer_layer_name='hybridsequential0_dense0_relu_fwd',
                                             target_class_count=26, num_epochs=2)
In [29]:
repFT.repurpose(train_iter)
WARNING:root:Already bound, ignoring bind()
/anaconda/envs/xfer-env/lib/python3.6/site-packages/mxnet/module/base_module.py:488: UserWarning: Parameters already initialized and force_init=False. init_params call ignored.
  allow_missing=allow_missing, force_init=force_init)
In [30]:
predictionsFT = repFT.predict_label(val_iter)
In [32]:
print(classification_report(mnist['test_label'], predictionsFT,
      digits=3))
             precision    recall  f1-score   support

          0      0.960     0.990     0.975       980
          1      0.984     0.989     0.986      1135
          2      0.968     0.965     0.967      1032
          3      0.967     0.972     0.970      1010
          4      0.977     0.971     0.974       982
          5      0.975     0.975     0.975       892
          6      0.974     0.966     0.970       958
          7      0.970     0.959     0.964      1028
          8      0.966     0.966     0.966       974
          9      0.966     0.954     0.960      1009

avg / total      0.971     0.971     0.971     10000

Meta-model Repurposer

In [33]:
repLR = xfer.LrRepurposer(source_model=net_mod, feature_layer_names=['hybridsequential0_dense0_fwd'])
In [34]:
repLR.repurpose(train_iter)
/anaconda/envs/xfer-env/lib/python3.6/site-packages/sklearn/linear_model/sag.py:326: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge
  "the coef_ did not converge", ConvergenceWarning)
In [35]:
predictionsLR = repLR.predict_label(val_iter)
In [36]:
print(classification_report(mnist['test_label'], predictionsLR,
      digits=3))
             precision    recall  f1-score   support

          0      0.990     0.993     0.991       980
          1      0.991     0.996     0.993      1135
          2      0.985     0.989     0.987      1032
          3      0.987     0.989     0.988      1010
          4      0.992     0.990     0.991       982
          5      0.979     0.982     0.980       892
          6      0.990     0.984     0.987       958
          7      0.983     0.985     0.984      1028
          8      0.987     0.986     0.986       974
          9      0.989     0.977     0.983      1009

avg / total      0.987     0.987     0.987     10000

In [ ]: