LAB 2.4 - Model instrumentation

LAB 2.4 - Model instrumentation#

The labs require a tensorflow version lower than the default one used in Google Colab. Run the following cell to downgrade TensorFlow accordingly.

import os
def downgrade_tf_version():
    os.system("!yes | pip uninstall -y tensorflow")
    os.system("!yes | pip install tensorflow==2.12.0")
    os.kill(os.getpid(), 9)
downgrade_tf_version()

!wget -nc --no-cache -O init.py -q https://raw.githubusercontent.com/rramosp/2021.deeplearning/main/content/init.py
import init; init.init(force_download=False); init.get_weblink() 

from local.lib.rlxmoocapi import submit, session
import inspect
session.LoginSequence(endpoint=init.endpoint, course_id=init.course_id, lab_id="L02.04", varname="student");

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from tensorflow import keras
from IPython.display import Image
import pandas as pd

%matplotlib inline

LAB SUMMARY#

In this lab you will be implementing Tensorflow Callbacks that will be invoked at different stages when a model gets trained.

Read carefully the following tutorial: https://www.tensorflow.org/guide/keras/custom_callback

load sample MNIST data as customary#

mnist = pd.read_csv("local/data/mnist1.5k.csv.gz", compression="gzip", header=None).values
X=mnist[:,1:785]/255.
y=mnist[:,0]
print("dimension de las imagenes y las clases", X.shape, y.shape)

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2)
X_train = X_train.astype(np.float32)
X_test  = X_test.astype(np.float32)
y_train_oh = np.eye(10)[y_train]
y_test_oh  = np.eye(10)[y_test]
print(X_train.shape, y_train_oh.shape)

from tensorflow.keras import Sequential, Model
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.callbacks import Callback

A basic multi layered dense model#

observe that the function allows us to parametrize the number of hidden layers and their activation function

def get_model(input_dim=784, output_dim=10, num_hidden_layers=6, hidden_size=10, activation="relu"):

    model = Sequential()
    model.add(Dense(hidden_size, activation=activation, input_dim=input_dim, name="Layer_%02d_Input"%(0)))
    
    for i in range(num_hidden_layers):
        model.add(Dense(hidden_size, activation=activation, name="Layer_%02d_Hidden"%(i+1)))
   
    model.add(Dense(output_dim, activation="softmax", name="Layer_%02d_Output"%(num_hidden_layers+1)))
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    
    return model

TASK 1: Weights monitoring callback#

Create a callback that:

when each epoch ends, retrieves the weights of the layer with name self.layer_name.
gets is kernels weights.
computes the mean and standard deviation of those weights.
appends them to self.mean_per_epoch and self.std_per_epoch

When used with a model, at the end of any training it will contain a list of weights means and another list of weights stds, both with one element per each epoch. With this we can monitor how the weights in a specific layer progress over the training process. Tensorboard offers a similar functionality, but the goal with this task is to get acquainted with the callback mechanism.

def LayerWeightsCallback(layer_name):
    class LayerWeightsCallback_class(Callback):

        def __init__(self, layer_name):
            super().__init__()
            self.mean_per_epoch = []
            self.std_per_epoch = []
            self.layer_name = layer_name

        def on_epoch_end(self, epoch, logs=None):
            w = ... # get the weights from layer_name in self.model
            ... # append to self.mean_per_epoch the weights mean 
            ... # append to self.std_per_epoch the weights std 
    
    return LayerWeightsCallback_class(layer_name)

manually test your code with the following cell. Your callback should report the weights mean and std doubling at every epoch, just with the same values as they are being computed within the for loop. The loop simulates training epochs.

model = get_model(input_dim=2, output_dim=3, num_hidden_layers=1, hidden_size=2, activation="sigmoid")
layer_names = [i.name for i in model.layers if not "Input" in i.name]
layer = model.get_layer(np.random.choice(layer_names))

cb = LayerWeightsCallback(layer.name)
cb.model = model

m, s, wh = [], [], []
for epoch in range(3):
    w = layer.get_weights()[0]
    wh.append(w)
    print ("epoch", epoch, "weights mean/std", np.mean(w),np.std(w))
    cb.on_epoch_end(epoch)    
    layer.set_weights([i*2 for i in layer.get_weights()])    
    
print ("\nweight mean at each epoch", cb.mean_per_epoch)
print ("weight std  at each epoch", cb.std_per_epoch)

Now you can use your class to keep track of weights while training. We choose to keep track of weights with your callback for all the layers.

Observe how we plot the weights progress with the data gathered by your callback. What interpretation can you give to the plots?

model = get_model(num_hidden_layers=3, activation="sigmoid")
cbs = [LayerWeightsCallback(i.name) for i in model.layers]
model.fit(X_train, y_train_oh, epochs=100, batch_size=8, 
          validation_data=(X_test, y_test_oh), callbacks=cbs)

def plot_epochs(cbset, title=""):
    cm = plt.cm.Blues
    plt.figure(figsize=(12,3))
    plt.subplot(121)
    for i,cb in enumerate(cbset):
        plt.plot(cb.mean_per_epoch, label=cb.layer_name, color=cm(int(255*(i+1)/(len(cbset)))))
        plt.xlabel("epochs"); plt.grid(); plt.title(title+" mean")
    plt.subplot(122)
    for i,cb in enumerate(cbset):
        plt.plot(cb.std_per_epoch, label=cb.layer_name, color=cm(int(255*(i+1)/(len(cbset)))))
        plt.xlabel("epochs"); plt.grid(); plt.title(title+" std")
    plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
    
plot_epochs(cbs, "WEIGHTS")

../_images/09118a224366b3b27772256063639b34bbc7571f17999ed9f3ac9003422df1d3.png

Registra tu solución en linea

student.submit_task(namespace=globals(), task_id='T1');

TASK 2: Activations monitoring callback#

observe how, given a model and a certain input, we can get the activations at different layers in a very straight forward manner. If we do this before training (.fit) it is also ok, the model will simply use the initial random weights.

model = get_model(num_hidden_layers=3, activation="sigmoid")
model.summary()

activations_layer_0 = model.layers[0](X_train)
activations_layer_1 = model.layers[1](activations_layer_0)
activations_layer_0.shape, activations_layer_1.shape

Create a callback that:

when each epoch ends, feeds self.X into the model and retrieves the activations at the output of layer with name self.layer_name.
computes the mean and standard deviation of those activations.
appends them to self.mean_per_epoch and self.std_per_epoch

When used with a model, at the end of any training it will contain a list of activations means and another list of activation stds, both with one element per each epoch. With this we can monitor how the activation in a specific layer progress over the training process. Tensorboard offers a similar functionality, but the goal with this task is to get acquainted with the callback mechanism.

def TrainValActivationsCallback(layer_name, X):
    class TrainValActivationsCallback_class(Callback):

        def __init__(self, layer_name, X):
            super().__init__()
            self.mean_per_epoch = []
            self.std_per_epoch = []
            self.layer_name = layer_name
            self.X = X

        def on_epoch_end(self, epoch, logs=None):
            a = ... # feed self.X into self.model and get the activations at layer_name
            ... # append to self.mean_per_epoch the activations mean 
            ... # append to self.std_per_epoch the activations std 
    
    return TrainValActivationsCallback_class(layer_name, X)

test your code with the following cell. Observe that

we create a small input dataset and model
invoke your callback implementation
your callback must contain two lists: mean_per_epoch and std_per_epoch with one single value each
you can check that single value with our computation. We do this computation in two different ways (1) by using explicitly the model layers; and (2) by using matrix multiplication with layers weights (linear activation== no activation)

X_in = np.random.random(size=(5,2)).astype(np.float32)
print ("input data\n", X_in)
model = get_model(input_dim=2, output_dim=3, num_hidden_layers=2, hidden_size=2, activation="linear")
layer_name = 'Layer_02_Hidden'
layer = model.get_layer(layer_name)

cb = TrainValActivationsCallback(layer.name, X_in)
cb.model = model
cb.on_epoch_end(epoch)    
    
print ("\nactivations at", layer_name)
print ("\nactivation mean/std with your callback", cb.mean_per_epoch, cb.std_per_epoch)

l0,l1,l2,l3 = model.layers
a = l2(l1(l0(X_in))).numpy() 
print ("using model layer functions            ", a.mean(), a.std())

a = X_in.dot(l0.get_weights()[0]).dot(l1.get_weights()[0]).dot(l2.get_weights()[0])
print ("manual matrix mult linear activation   ", a.mean(), a.std())

Now you can use your class to keep track of activations while training. We choose to keep track of activations with your callback for all the layers.

Observe how we plot the activations progress with the data gathered by your callback. What interpretation can you give to the plots?

In this case, we are creating a callback for each layer for train and for test data. If activations are similar in both train and test it may mean that the train/test splits are being treated similarly by the model.

model = get_model(num_hidden_layers=3, activation="sigmoid")
cbs_train = [TrainValActivationsCallback(i.name, X_train) for i in model.layers]
cbs_test  = [TrainValActivationsCallback(i.name, X_test) for i in model.layers]
model.fit(X_train, y_train_oh, epochs=100, batch_size=32, 
          validation_data=(X_test, y_test_oh), callbacks=cbs_train+cbs_test)

plot_epochs(cbs_train, "ACTIVATIONS TRAIN")

../_images/d226c0c8176b41e721bd399acbcb36fa840b5f0988f77538632384b39fe3f70d.png

plot_epochs(cbs_test, "ACTIVATIONS TEST")

../_images/3fbf371a1ad844540d04c589ebe8433d1228eddb910a8004cce7365ebca8202c.png