Using Gluon with Xfer

This notebook demonstrates how to use neural networks defined and trained with Gluon as source models for Transfer Learning with Xfer.

TL;DR Gluon models can be used with Xfer provided they use HybridBlocks so that the symbol can be extracted.

This demo is a dummy example where a CNN source model is trained on MNIST using Gluon and then repurposed for MNIST again. This is obviously redundant but shows the steps required to use Gluon with Xfer.

import numpy as np
import mxnet as mx
from mxnet import nd, autograd, gluon

import time
from sklearn.metrics import classification_report
from scipy import io as scipyio
import urllib.request
import zipfile
import os
import logging

import xfer

Train CNN with gluon

Using code taken from The Straight Dope

ctx = mx.cpu()
batch_size = 64
num_inputs = 784
num_outputs = 10
def transform(data, label):
    return nd.transpose(data.astype(np.float32), (2,0,1))/255, label.astype(np.float32)
train_data =, transform=transform),
                                      batch_size, shuffle=True)
test_data =, transform=transform),
                                     batch_size, shuffle=False)
num_fc = 512
net = gluon.nn.HybridSequential()
with net.name_scope():
    net.add(gluon.nn.Conv2D(channels=20, kernel_size=5, activation='relu'))
    net.add(gluon.nn.MaxPool2D(pool_size=2, strides=2))
    net.add(gluon.nn.Conv2D(channels=50, kernel_size=5, activation='relu'))
    net.add(gluon.nn.MaxPool2D(pool_size=2, strides=2))
    # The Flatten layer collapses all axis, except the first one, into one axis.
    net.add(gluon.nn.Dense(num_fc, activation="relu"))
net.collect_params().initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx)
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': .1})
def evaluate_accuracy(data_iterator, net):
    acc = mx.metric.Accuracy()
    for i, (data, label) in enumerate(data_iterator):
        data = data.as_in_context(ctx)
        label = label.as_in_context(ctx)
        output = net(data)
        predictions = nd.argmax(output, axis=1)
        acc.update(preds=predictions, labels=label)
    return acc.get()[1]
epochs = 1
smoothing_constant = .01

for e in range(epochs):
    start_time_train = time.time()
    for i, (data, label) in enumerate(train_data):
        data = data.as_in_context(ctx)
        label = label.as_in_context(ctx)
        with autograd.record():
            output = net(data)
            loss = softmax_cross_entropy(output, label)

        #  Keep a moving average of the losses
        curr_loss = nd.mean(loss).asscalar()
        moving_loss = (curr_loss if ((i == 0) and (e == 0))
                       else (1 - smoothing_constant) * moving_loss + smoothing_constant * curr_loss)
    end_time_train = time.time()

    start_time_eval = time.time()
    test_accuracy = evaluate_accuracy(test_data, net)
    train_accuracy = evaluate_accuracy(train_data, net)
    end_time_eval = time.time()

    epoch_time = end_time_train - start_time_train
    eval_time = end_time_eval - start_time_eval
    print("Epoch {}.\nLoss: {}, Train_acc {}, Test_acc {}, Epoch_time {}, Eval_time {}".format(e, moving_loss, train_accuracy, test_accuracy, epoch_time, eval_time))
Epoch 0.
Loss: 0.11107716270094219, Train_acc 0.9745833333333334, Test_acc 0.9742, Epoch_time 54.26378679275513, Eval_time 23.154165029525757

Load MNIST dataset

Load MNIST into data iterators

mnist = mx.test_utils.get_mnist()
train_iter =['train_data'], mnist['train_label'], batch_size, shuffle=True)
val_iter =['test_data'], mnist['test_label'], batch_size)

Convert Gluon model net to Module

Adapted from snippet found `here <>`__

From the Gluon model, the symbol and parameters are extracted and used to define an Module object.

def block2symbol(block):
    data = mx.sym.Variable('data')
    sym = block(data)
    args = {}
    auxs = {}
    for k, v in block.collect_params().items():
        args[k] = mx.nd.array(
        auxs[k] = mx.nd.array(
    return sym, args, auxs
def symbol2mod(sym, args, auxs, data_iter):
    mx_sym = mx.sym.SoftmaxOutput(data=sym, name='softmax')
    model = mx.mod.Module(symbol=mx_sym, context=mx.cpu(),
    model.bind( data_shapes = data_iter.provide_data,
                label_shapes = data_iter.provide_label )
    model.set_params(args, auxs)
    return model
sym_params = block2symbol(net)
net_mod = symbol2mod(*sym_params, train_iter)

Alternative Method

Serialise Gluon model to file using .export().

Load the serialised model as an MXNet Module with Module.load() so that xfer can be used.

# model_name = 'gluon-model'
# net.export(model_name)

# mod = mx.mod.Module.load(model_name, 0, label_names=[])
# os.remove(model_name+'-symbol.json')
# os.remove(model_name+'-0000.params')

Inspect Module

mh = xfer.model_handler.ModelHandler(net_mod)
In [27]:

Neural Network Repurposer

repFT = xfer.NeuralNetworkFineTuneRepurposer(source_model=net_mod,
                                             target_class_count=26, num_epochs=2)
predictionsFT = repFT.predict_label(val_iter)
print(classification_report(mnist['test_label'], predictionsFT,
             precision    recall  f1-score   support

          0      0.960     0.990     0.975       980
          1      0.984     0.989     0.986      1135
          2      0.968     0.965     0.967      1032
          3      0.967     0.972     0.970      1010
          4      0.977     0.971     0.974       982
          5      0.975     0.975     0.975       892
          6      0.974     0.966     0.970       958
          7      0.970     0.959     0.964      1028
          8      0.966     0.966     0.966       974
          9      0.966     0.954     0.960      1009

avg / total      0.971     0.971     0.971     10000

Meta-model Repurposer

repLR = xfer.LrRepurposer(source_model=net_mod, feature_layer_names=['hybridsequential0_dense0_fwd'])
In [34]:
predictionsLR = repLR.predict_label(val_iter)
print(classification_report(mnist['test_label'], predictionsLR,
             precision    recall  f1-score   support

          0      0.990     0.993     0.991       980
          1      0.991     0.996     0.993      1135
          2      0.985     0.989     0.987      1032
          3      0.987     0.989     0.988      1010
          4      0.992     0.990     0.991       982
          5      0.979     0.982     0.980       892
          6      0.990     0.984     0.987       958
          7      0.983     0.985     0.984      1028
          8      0.987     0.986     0.986       974
          9      0.989     0.977     0.983      1009

avg / total      0.987     0.987     0.987     10000

