How to do it...

We proceed with the recipe as follows:

We need to import the standard modules, TensorFlow, NumPy, and Pandas, for reading the .csv file, and Matplolib:

import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

The training, validation, and testing data is obtained using the helper functions:

X_train, Y_train = preprocess_data(train_data)
X_val, Y_val = preprocess_data(val_data)
X_test, Y_test = preprocess_data(test_data)

Let us explore our data a little. We plot the mean image and find the number of images in each training, validation, and testing dataset:

# Explore Data
mean_image = X_train.mean(axis=0)
std_image = np.std(X_train, axis=0)
print("Training Data set has {} images".format(len(X_train)))
print("Validation Data set has {} images".format(len(X_val)))
print("Test Data set has {} images".format(len(X_test)))
plt.imshow(mean_image.reshape(48,48), cmap='gray')

We get the result as follows:

We also see the images from the training sample and their respective label:

classes = ['angry','disgust','fear','happy','sad','surprise','neutral']
num_classes = len(classes)
samples_per_class = 7
for y,cls in enumerate(classes):
     idxs = np.flatnonzero(np.argmax(Y_train, axis =1) == y)
     idxs = np.random.choice(idxs, samples_per_class, replace=False)
     for i, idx in enumerate(idxs):
         plt_idx = i * num_classes + y + 1
         plt.subplot(samples_per_class, num_classes, plt_idx)
         plt.imshow(X_train[idx].reshape(48,48), cmap='gray') #pixel height and width
         plt.axis('off')
         if i == 0:
             plt.title(cls)
plt.show()

The plot is as follows:

Next, we define the RBM stack; each RBM takes the output of the previous RBM as its input:

RBM_hidden_sizes = [1500, 700, 400] #create 4 layers of RBM with size 1500, 700, 400 and 100
#Set input as training data
inpX = X_train
#Create list to hold our RBMs
rbm_list = []
#Size of inputs is the number of inputs in the training set
input_size = inpX.shape[1]
#For each RBM we want to generate
for i, size in enumerate(RBM_hidden_sizes):
     print ('RBM: ',i,' ',input_size,'->', size)
     rbm_list.append(RBM(input_size, size))
     input_size = size

This generates three RBMs: the first RBM with 2,304 (48 × 48) input and 1,500 hidden units, the second RBM with 1,500 input and 700 hidden units, and finally the third RBM with 700 input and 400 hidden units.

We train each RBM one by one. The technique is also called greedy wise training. In the original paper the number of epochs for training each RBM on MNIST was 30, so here, too, increasing the epochs should improve the performance of the network:

# Greedy wise training of RBMs
init = tf.global_variables_initializer()
for rbm in rbm_list:
     print ('New RBM:')
     #Train a new one
     with tf.Session() as sess:
         sess.run(init)
         rbm.set_session(sess)
         err = rbm.fit(inpX, 5)
         inpX_n = rbm.rbm_output(inpX)
         print(inpX_n.shape)
         inpX = inpX_n

We define a DBN class. In the class, we build the complete DBN with the three layers of RBM and two additional MLP layers. The weights of RBM layers are loaded from the pre-trained RBMs. We also declare methods to train and predict the DBN; for fine-tuning, the network tries to minimize the mean square loss function:

class DBN(object):
 
     def __init__(self, sizes, X, Y, eta = 0.001, momentum = 0.0, epochs = 10, batch_size = 100):
         #Initialize hyperparameters
         self._sizes = sizes
         print(self._sizes)
         self._sizes.append(1000) # size of the first FC layer
         self._X = X
         self._Y = Y
         self.N = len(X)
         self.w_list = []
         self.c_list = []
         self._learning_rate = eta
         self._momentum = momentum
         self._epochs = epochs
         self._batchsize = batch_size
         input_size = X.shape[1]
 
         #initialization loop
         for size in self._sizes + [Y.shape[1]]:
             #Define upper limit for the uniform distribution range
             max_range = 4 * math.sqrt(6. / (input_size + size))
 
             #Initialize weights through a random uniform distribution
             self.w_list.append(
             np.random.uniform( -max_range, max_range, [input_size,         size]).astype(np.float32))
 
             #Initialize bias as zeroes
             self.c_list.append(np.zeros([size], np.float32))
             input_size = size
 
 
         # Build DBN
         #Create placeholders for input, weights, biases, output
         self._a = [None] * (len(self._sizes) + 2)
         self._w = [None] * (len(self._sizes) + 1)
         self._c = [None] * (len(self._sizes) + 1)
         self._a[0] = tf.placeholder("float", [None, self._X.shape[1]])
         self.y = tf.placeholder("float", [None, self._Y.shape[1]])
 
         #Define variables and activation function
         for i in range(len(self._sizes) + 1):
             self._w[i] = tf.Variable(self.w_list[i])
             self._c[i] = tf.Variable(self.c_list[i])
         for i in range(1, len(self._sizes) + 2):
             self._a[i] = tf.nn.sigmoid(tf.matmul(self._a[i - 1], self._w[i - 1]) + self._c[i - 1])
 
         #Define the cost function
         cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=self.y, logits= self._a[-1]))
         #cost = tf.reduce_mean(tf.square(self._a[-1] - self.y))
 
         #Define the training operation (Momentum Optimizer minimizing the Cost function)
         self.train_op = tf.train.AdamOptimizer(learning_rate=self._learning_rate).minimize(cost)
 
         #Prediction operation
         self.predict_op = tf.argmax(self._a[-1], 1)
 
     #load data from rbm
     def load_from_rbms(self, dbn_sizes,rbm_list):
         #Check if expected sizes are correct
         assert len(dbn_sizes) == len(self._sizes)
 
         for i in range(len(self._sizes)):
             #Check if for each RBN the expected sizes are correct
             assert dbn_sizes[i] == self._sizes[i]
 
         #If everything is correct, bring over the weights and biases
         for i in range(len(self._sizes)-1):
             self.w_list[i] = rbm_list[i]._W
             self.c_list[i] = rbm_list[i]._c
 
 
     def set_session(self, session):
         self.session = session

    #Training method
     def train(self, val_x, val_y):
         #For each epoch
         num_batches = self.N // self._batchsize
 
         batch_size = self._batchsize
         for i in range(self._epochs):
             #For each step
             for j in range(num_batches):
                 batch = self._X[j * batch_size: (j * batch_size + batch_size)]
                 batch_label = self._Y[j * batch_size: (j * batch_size + batch_size)]
 
                 self.session.run(self.train_op, feed_dict={self._a[0]: batch, self.y: batch_label})
 
                 for j in range(len(self._sizes) + 1):
                     #Retrieve weights and biases
                     self.w_list[j] = sess.run(self._w[j])
                     self.c_list[j] = sess.run(self._c[j])
 
             train_acc = np.mean(np.argmax(self._Y, axis=1) ==
 self.session.run(self.predict_op, feed_dict={self._a[0]: self._X, self.y: self._Y}))
 
             val_acc = np.mean(np.argmax(val_y, axis=1) ==
 self.session.run(self.predict_op, feed_dict={self._a[0]: val_x, self.y: val_y}))
             print (" epoch " + str(i) + "/" + str(self._epochs) + " Training Accuracy: " +  str(train_acc) + " Validation Accuracy: " + str(val_acc))
 
     def predict(self, X):
         return self.session.run(self.predict_op, feed_dict={self._a[0]: X})

Now, we train instantiate a DBN object and train it. And predict the labels for the test data:

nNet = DBN(RBM_hidden_sizes, X_train, Y_train, epochs = 80)
with tf.Session() as sess:
     #Initialize Variables
     sess.run(tf.global_variables_initializer())
     nNet.set_session(sess)
     nNet.load_from_rbms(RBM_hidden_sizes,rbm_list)
     nNet.train(X_val, Y_val)
     y_pred = nNet.predict(X_test)

Table of Contents for How to do it...

Create new playlist

Sign In

Sign Up

Table of Contents for
How to do it...