Emotion Classifier using XLNet

No comments

In this blog, I have demonstrated the use of XLNet for a simple emotion classifier. The dataset used in this task contains four emotion classes (0-Anger, 1-Fear, 2-Joy, and 3-Sadness). I have used the XLNet pretrained model to classify it. Before going into the details – please check the XLNet basics (covered in the following video tutorials):

Details of the dataset:

This code is provided to support the hands-on practice after learning the XLNet Basics.

import pandas as pd
import numpy as np
#Please install NLTK
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import re
import string
import csv
# please install keras-xlnet 'pip install keras-xlnet'
from keras_xlnet.backend import keras
from keras_bert.layers import Extract
from keras_xlnet import Tokenizer, load_trained_model_from_checkpoint, ATTENTION_TYPE_BI
from collections import namedtuple
import  os

Source_File = "Emotion_Dataset2.csv" #Path of the source training file for 'Emotion Classification'

# -------------XLNet----------
# Path of the unzipped "XLNet-Large, Cased: 24-layer, 1024-hidden, 16-heads" model file downloaded from "https://github.com/zihangdai/xlnet" However this experiment can be repeated by using XLNet base model also.

xlnet_folder_path = '../model/XLNET/xlnet_cased_L-24_H-1024_A-16/'

xlnet_pretrained_path = namedtuple('xlnet_pretrained_path',['config', 'model', 'vocab'])
config_path = os.path.join(xlnet_folder_path,'xlnet_config.json')
model_path = os.path.join(xlnet_folder_path,'xlnet_model.ckpt')
vocab_path = os.path.join(xlnet_folder_path,'spiece.model')
paths = xlnet_pretrained_path(config_path,model_path,vocab_path)
tokenizer = Tokenizer(paths.vocab)

BATCH_SIZE = 16
SEQ_LEN = 30
EPOCH = 21 #you can check the code for 1-2 epoch also

trained_model_path = "xlnet_sentiment.h5" #default path to store the final trained model

# Read data; code source - keras-xlnet github page
class DataSequence(keras.utils.Sequence):

    def __init__(self, x, y):
        self.x = x
        self.y = y

    def __len__(self):
        return (len(self.y) + BATCH_SIZE - 1) // BATCH_SIZE

    def __getitem__(self, index):
        s = slice(index * BATCH_SIZE, (index + 1) * BATCH_SIZE)
        return [item[s] for item in self.x], self.y[s]

# Main Emotion Classifier class
class Simple_Emotion_Classifier:
    # geterate input sequence for xlnet *code source - keras-xlnet github page and modified for the current application requirements
    def generate_sequence(self, Lines, Train_Labels):
        tokens, classes = [], []
        # df = pd.read_csv(path, sep='\t', error_bad_lines=False)
        for i in range(0,int(len(Lines))):
            text_a, text_b, clas_labels = str(Lines[i]), "", Train_Labels[i]
            # if not isinstance(text_a, str) or not isinstance(text_b, str) or cls not in CLASSES:
            #     continue
            # encoded_a, encoded_b = tokenizer.encode(text_a)[:48], tokenizer.encode(text_b)[:49]
            encoded_a = tokenizer.encode(text_a)
            # encoded = encoded_a + [tokenizer.SYM_SEP] + encoded_b + [tokenizer.SYM_SEP]
            encoded = encoded_a + [tokenizer.SYM_SEP] + [tokenizer.SYM_SEP]
            encoded = [tokenizer.SYM_PAD] * (SEQ_LEN - 1 - len(encoded)) + encoded + [tokenizer.SYM_CLS]
            tokens.append(encoded)
            classes.append(clas_labels)
        tokens, classes = np.array(tokens), np.array(classes)
        segments = np.zeros_like(tokens)
        segments[:, -1] = 1
        lengths = np.zeros_like(tokens[:, :1])
        return DataSequence([tokens, segments, lengths], classes)

    def return_sequence_for_csv_file(self, CSV_train_file):
        Lines, Train_Labels, Train_Label_Weight = self.Read_Lines_Classes_Weights(CSV_train_file)
        data_seq = self.generate_sequence(Lines,Train_Labels)
        return data_seq

    # Function to train the complete model
    def Train_Model(self, CSV_train_file):
        # Load pretrained model * code-source - keras-xlnet github page *
        model = load_trained_model_from_checkpoint(
            config_path=paths.config,
            checkpoint_path=paths.model,
            batch_size=BATCH_SIZE,
            memory_len=0,
            target_len=SEQ_LEN,
            in_train_phase=False,
            attention_type=ATTENTION_TYPE_BI,
        )
        # Build classification model * code-source - keras-xlnet github page *
        last = Extract(index=-1, name='Extract')(model.output)
        dense = keras.layers.Dense(units=768, activation='tanh', name='Dense')(last)
        dropout = keras.layers.Dropout(rate=0.1, name='Dropout')(dense)
        output = keras.layers.Dense(units=4, activation='softmax', name='Softmax')(dropout)
        model = keras.models.Model(inputs=model.inputs, outputs=output)
        model.summary()
        # Fit model
        train_seq = self.return_sequence_for_csv_file(CSV_train_file)
        model.compile(
            optimizer=keras.optimizers.Adam(lr=3e-5),
            loss='sparse_categorical_crossentropy',
            metrics=['sparse_categorical_accuracy'],
        )

        model.fit_generator(
            generator=train_seq,
            # validation_data=dev_matched_seq,
            epochs=EPOCH,
            callbacks=[keras.callbacks.EarlyStopping(monitor='val_loss', patience=2)],
        )
        model.save_weights(trained_model_path)

    # Function to read and preprocess the source file
    def Read_Lines_Classes_Weights(self, SourceFile):
        Lines = []  # contains sentences with stopwords
        Lines_Cln = [] # contains highly cleaned lines
        Train_Labels = []
        Train_Label_Weight = []
        word_freq = {}
        data = pd.read_csv(SourceFile)
        for line in data.values:
            # print(line[1], line[2], line[3])
            line1 = str(line[1]).strip()
            if int(len(line1)) >= 1:
                Train_Labels.append(line[2])  # add train labels
                lab_weight1 = float(line[3])
                Train_Label_Weight.append(lab_weight1)  # add train label weight
                txt0 = str(line1[1])
                wds_list = txt0.split()
                # Remove words having length greater than 15
                if int(len(wds_list)) > 0:
                    txt1 = ' '.join(wds_list)
                    # split into words
                    tokens = word_tokenize(txt1)
                    # prepare regex for char filtering
                    re_punc = re.compile('[%s]' % re.escape(string.punctuation))
                    # remove punctuation from each word
                    tokens = [re_punc.sub('', w) for w in tokens]
                    # remove remaining tokens that are not alphabetic
                    tokens = [word for word in tokens if word.isalpha()]
                    stop_words = set(stopwords.words('english'))
                    tokens = [w for w in tokens if not w in stop_words]
                    # filter out short tokens
                    tokens = [word for word in tokens if len(word) > 0]
                    # stemming of words
                    # porter = PorterStemmer()
                    # stemmed = [porter.stem(word) for word in tokens]
                    txt2 = ' '.join(tokens)
                    if txt2.strip().__len__()==0:
                        txt2 = "hello world"
                    #Stem lines remove noisy words
                    Lines.append(txt2)
                else:
                    Lines.append("hellow world")
        return Lines, Train_Labels, Train_Label_Weight

    # Function to load the trained model
    def load_xlnet_model(self, saved_model_path):
        # Load pretrained model * code-source - keras-xlnet github page *
        model = load_trained_model_from_checkpoint(
            config_path=paths.config,
            checkpoint_path=paths.model,
            batch_size=BATCH_SIZE,
            memory_len=0,
            target_len=SEQ_LEN,
            in_train_phase=False,
            attention_type=ATTENTION_TYPE_BI,
        )
        # Build classification model * code-source - keras-xlnet github page *
        last = Extract(index=-1, name='Extract')(model.output)
        dense = keras.layers.Dense(units=768, activation='tanh', name='Dense')(last)
        dropout = keras.layers.Dropout(rate=0.1, name='Dropout')(dense)
        output = keras.layers.Dense(units=4, activation='softmax', name='Softmax')(dropout)
        model = keras.models.Model(inputs=model.inputs, outputs=output)
        model.load_weights(saved_model_path)
        return model

    # Function to test the emotion classifier for the single sentence
    def Emotion_Classifier_Test(self, Input_Sentence, model1):
        cln_test_lines = []
        line1 = str(Input_Sentence).strip().lower()
        tokens = word_tokenize(line1)
        # prepare regex for char filtering
        re_punc = re.compile('[%s]' % re.escape(string.punctuation))
        # remove punctuation from each word
        tokens = [re_punc.sub('', w) for w in tokens]
        # remove remaining tokens that are not alphabetic
        tokens = [word for word in tokens if word.isalpha()]
        # filter out short tokens
        tokens = [word for word in tokens if len(word) > 0]
        # stemming of words
        # Stem lines remove noisy words
        # porter = PorterStemmer()
        # stemmed = [porter.stem(word) for word in tokens]
        txt2 = ' '.join(tokens)
        if txt2.__len__() > 0:
            cln_test_lines.append(txt2)
        else:
            cln_test_lines.append("Very Good")
        initial_false_level = 0
        Labels = []
        Labels.append(0)
        test_seq = self.generate_sequence(cln_test_lines,Labels)
        pred = model1.predict_generator(test_seq)[0]
        print("prediction => ", pred)
        index1 = np.argmax(pred)
        print("class index => ", index1)

if __name__=="__main__":
    print("Calling Simple Emotion Classifier Training and Test")
    S_EMO_CLS = Simple_Emotion_Classifier()
    # Train the model
    S_EMO_CLS.Train_Model(Source_File)
    # Load the trained model
    model1 = S_EMO_CLS.load_xlnet_model(trained_model_path)
    print("model loaded")
    input_text1 = " Jimmy Carr makes me want to cry and cry *shiver*"
    # test the trained model
    S_EMO_CLS.Emotion_Classifier_Test(input_text1,model1)

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out /  Change )

Twitter picture

You are commenting using your Twitter account. Log Out /  Change )

Facebook photo

You are commenting using your Facebook account. Log Out /  Change )

Connecting to %s

This site uses Akismet to reduce spam. Learn how your comment data is processed.