CONDOR MLP for predicting poker hands

This tutorial explains how to equip a deep neural network with the CONDOR layer and loss function for ordinal regression in the context of predicting poker hands.

0 -- Obtaining and preparing the Poker Hand dataset from the UCI ML repository

First, we are going to download and prepare the UCI Poker Hand dataset from https://archive.ics.uci.edu/ml/datasets/Poker+Hand and save it as CSV files locally. This is a general procedure that is not specific to CONDOR.

This dataset has 10 ordinal labels,

0: Nothing in hand; not a recognized poker hand 
1: One pair; one pair of equal ranks within five cards 
2: Two pairs; two pairs of equal ranks within five cards 
3: Three of a kind; three equal ranks within five cards 
4: Straight; five cards, sequentially ranked with no gaps 
5: Flush; five cards with the same suit 
6: Full house; pair + different rank three of a kind 
7: Four of a kind; four equal ranks within five cards 
8: Straight flush; straight + flush 
9: Royal flush; {Ace, King, Queen, Jack, Ten} + flush 

where 0 < 1 < 2 ... < 9.

Download training examples and test dataset:

import pandas as pd


train_df = pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/poker/poker-hand-training-true.data", header=None)
train_features = train_df.loc[:, 0:10]
train_labels = train_df.loc[:, 10]

print('Number of features:', train_features.shape[1])
print('Number of training examples:', train_features.shape[0])
Number of features: 11
Number of training examples: 25010
test_df = pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/poker/poker-hand-testing.data", header=None)
test_df.head()

test_features = test_df.loc[:, 0:10]
test_labels = test_df.loc[:, 10]

print('Number of test examples:', test_features.shape[0])
Number of test examples: 1000000

Standardize features:

from sklearn.preprocessing import StandardScaler


sc = StandardScaler()
train_features_sc = sc.fit_transform(train_features)
test_features_sc = sc.transform(test_features)

Save training and test set as CSV files locally

pd.DataFrame(train_features_sc).to_csv('train_features.csv', index=False)
train_labels.to_csv('train_labels.csv', index=False)

pd.DataFrame(test_features_sc).to_csv('test_features.csv', index=False)
test_labels.to_csv('test_labels.csv', index=False)

# don't need those anymore
del test_features
del train_features
del train_labels
del test_labels

1 -- Setting up the dataset and dataloader

In this section, we set up the data set and data loaders using PyTorch utilities. This is a general procedure that is not specific to CONDOR.

import torch


##########################
### SETTINGS
##########################

# Hyperparameters
random_seed = 1
learning_rate = 0.001
num_epochs = 20
batch_size = 128

# Architecture
NUM_CLASSES = 10

# Other
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print('Training on', DEVICE)
Training on cpu
from torch.utils.data import Dataset
import numpy as np


class MyDataset(Dataset):

    def __init__(self, csv_path_features, csv_path_labels, dtype=np.float32):

        self.features = pd.read_csv(csv_path_features).values.astype(np.float32)
        self.labels = pd.read_csv(csv_path_labels).values.flatten()

    def __getitem__(self, index):
        inputs = self.features[index]
        label = self.labels[index]
        return inputs, label

    def __len__(self):
        return self.labels.shape[0]
import torch
from torch.utils.data import DataLoader


# Note transforms.ToTensor() scales input images
# to 0-1 range
train_dataset = MyDataset('train_features.csv', 'train_labels.csv')
test_dataset = MyDataset('test_features.csv', 'test_labels.csv')


train_loader = DataLoader(dataset=train_dataset,
                          batch_size=batch_size,
                          shuffle=True, # want to shuffle the dataset
                          num_workers=0) # number processes/CPUs to use

test_loader = DataLoader(dataset=test_dataset,
                         batch_size=batch_size,
                         shuffle=True, # want to shuffle the dataset
                         num_workers=0) # number processes/CPUs to use

# Checking the dataset
for inputs, labels in train_loader:  
    print('Input batch dimensions:', inputs.shape)
    print('Input label dimensions:', labels.shape)
    break
Input batch dimensions: torch.Size([128, 11])
Input label dimensions: torch.Size([128])

2 - Equipping MLP with CONDOR layer

In this section, we are using condor_pytorch to outfit a multilayer perceptron for ordinal regression. Note that the CONDOR method only requires replacing the last (output) layer, which is typically a fully-connected layer, by the CONDOR layer with one fewer node.

class CondorMLP(torch.nn.Module):

    def __init__(self, num_classes):
        super(CondorMLP, self).__init__()

        self.features = torch.nn.Sequential(
            torch.nn.Linear(11, 5),
            torch.nn.ReLU(),
            torch.nn.Linear(5, 5),
            torch.nn.ReLU(),
            torch.nn.Linear(5,num_classes-1) #THIS IS KEY OUTPUT SIZE
        )


    def forward(self, x):
        logits = self.features(x)
        return logits




torch.manual_seed(random_seed)
model = CondorMLP(num_classes=NUM_CLASSES)
model.to(DEVICE)

optimizer = torch.optim.Adam(model.parameters())

3 - Using the CONDOR loss for model training

During training, all you need to do is to

1) convert the integer class labels into the extended binary label format using the levels_from_labelbatch provided via condor_pytorch:

        levels = levels_from_labelbatch(class_labels, 
                                        num_classes=NUM_CLASSES)

2) Apply the CONDOR loss (also provided via condor_pytorch):

        cost = condor_negloglikeloss(logits, levels)
from condor_pytorch.dataset import levels_from_labelbatch
from condor_pytorch.losses import condor_negloglikeloss


for epoch in range(num_epochs):

    model = model.train()
    for batch_idx, (features, class_labels) in enumerate(train_loader):

        ##### Convert class labels for CONDOR
        levels = levels_from_labelbatch(class_labels, 
                                        num_classes=NUM_CLASSES)
        ###--------------------------------------------------------------------###

        features = features.to(DEVICE)
        levels = levels.to(DEVICE)
        logits = model(features)

        #### CONDOR loss 
        cost = cost = condor_negloglikeloss(logits, levels)
        ###--------------------------------------------------------------------###   


        optimizer.zero_grad()
        cost.backward()
        optimizer.step()

        ### LOGGING
        if not batch_idx % 200:
            print ('Epoch: %03d/%03d | Batch %03d/%03d | Cost: %.4f' 
                   %(epoch+1, num_epochs, batch_idx, 
                     len(train_loader), cost))
Epoch: 001/020 | Batch 000/196 | Cost: 1.1676
Epoch: 002/020 | Batch 000/196 | Cost: 1.1595
Epoch: 003/020 | Batch 000/196 | Cost: 0.5821
Epoch: 004/020 | Batch 000/196 | Cost: 0.2216
Epoch: 005/020 | Batch 000/196 | Cost: 0.1225
Epoch: 006/020 | Batch 000/196 | Cost: 0.1025
Epoch: 007/020 | Batch 000/196 | Cost: 0.0678
Epoch: 008/020 | Batch 000/196 | Cost: 0.0501
Epoch: 009/020 | Batch 000/196 | Cost: 0.0422
Epoch: 010/020 | Batch 000/196 | Cost: 0.0219
Epoch: 011/020 | Batch 000/196 | Cost: 0.0082
Epoch: 012/020 | Batch 000/196 | Cost: 0.0158
Epoch: 013/020 | Batch 000/196 | Cost: 0.0144
Epoch: 014/020 | Batch 000/196 | Cost: 0.0032
Epoch: 015/020 | Batch 000/196 | Cost: 0.0238
Epoch: 016/020 | Batch 000/196 | Cost: 0.0434
Epoch: 017/020 | Batch 000/196 | Cost: 0.0112
Epoch: 018/020 | Batch 000/196 | Cost: 0.0018
Epoch: 019/020 | Batch 000/196 | Cost: 0.0230
Epoch: 020/020 | Batch 000/196 | Cost: 0.0011

4 -- Evaluate model

Finally, after model training, we can evaluate the performance of the model. For example, via the mean absolute error and mean squared error measures.

For this, we are going to use the logits_to_label utility function from condor_pytorch to convert the probabilities back to the orginal label.

from condor_pytorch.dataset import logits_to_label
from condor_pytorch.activations import ordinal_softmax
from condor_pytorch.metrics import earth_movers_distance
from condor_pytorch.metrics import ordinal_accuracy
from condor_pytorch.metrics import mean_absolute_error

def compute_mae_and_acc(model, data_loader, device):

    with torch.no_grad():

        emd, mae, acc, acc1, num_examples = 0., 0., 0., 0., 0

        for i, (features, targets) in enumerate(data_loader):
            ##### Convert class labels for CONDOR
            levels = levels_from_labelbatch(targets, 
                                        num_classes=NUM_CLASSES)
            features = features.to(device)
            levels = levels.to(device)
            targets = targets.float().to(device)
            ids = targets.long()

            logits = model(features)
            predicted_labels = logits_to_label(logits).float()
            predicted_probs = ordinal_softmax(logits).float()

            num_examples += targets.size(0)
            mae  += mean_absolute_error(logits,levels,reduction='sum')
            acc  += ordinal_accuracy(logits,levels,tolerance=0,reduction='sum')
            acc1 += ordinal_accuracy(logits,levels,tolerance=1,reduction='sum')
            emd  += earth_movers_distance(logits,levels,reduction='sum')

        mae  = mae / num_examples
        acc  = acc / num_examples
        acc1 = acc1 / num_examples
        emd  = emd / num_examples
        return mae, acc, acc1, emd
train_mae, train_acc, train_acc1, train_emd = compute_mae_and_acc(model, train_loader, DEVICE)
test_mae, test_acc, test_acc1, test_emd = compute_mae_and_acc(model, test_loader, DEVICE)
print(f'Mean absolute error (train/test): {train_mae:.2f} | {test_mae:.2f}')
print(f'Accuracy tolerance 0 (train/test): {train_acc:.2f} | {test_acc:.2f}')
print(f'Accuracy tolerance 1 (train/test): {train_acc1:.2f} | {test_acc1:.2f}')
print(f'Earth movers distance (train/test): {train_emd:.3f} | {test_emd:.3f}')
Mean absolute error (train/test): 0.00 | 0.00
Accuracy tolerance 0 (train/test): 1.00 | 1.00
Accuracy tolerance 1 (train/test): 1.00 | 1.00
Earth movers distance (train/test): 0.005 | 0.004