import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import Subset, ConcatDataset, DataLoader
import os
"CUDA_VISIBLE_DEVICES"] = "0"
os.environ[
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
# Retina display
%config InlineBackend.figure_format = 'retina'
Overconfident Neural Networks
Reference: https://www.kaggle.com/code/jramkiss/overconfident-neural-networks/notebook
# Read in data from data folder
= '../data/afhq/train'
train_dir = '../data/afhq/val'
valid_dir
# Show some images
from torchvision import datasets, transforms
# Show first image in each folder: cat, dog, wild
def show_image(path):
= os.listdir(path)
files = plt.imread(os.path.join(path, files[0]))
img
plt.imshow(img)# Remove axis ticks
'off')
plt.axis(
plt.show()
'cat'))
show_image(os.path.join(train_dir, 'dog'))
show_image(os.path.join(train_dir, 'wild')) show_image(os.path.join(train_dir,
# Show sizes of each image
from PIL import Image
def show_image_sizes(path):
= os.listdir(path)
files = Image.open(os.path.join(path, files[0]))
img print(img.size)
'cat')) show_image_sizes(os.path.join(train_dir,
(512, 512)
# Simple CNN with 3 convolutional layers, and 3 classes
class SimpleCNN(nn.Module):
def __init__(self):
super(SimpleCNN, self).__init__()
# Convolutional layers
self.conv1 = nn.Conv2d(3, 8, 3, padding=1)
self.conv2 = nn.Conv2d(8, 16, 3, padding=1)
self.conv3 = nn.Conv2d(16, 32, 3, padding=1)
self.conv4 = nn.Conv2d(32, 32, 3, padding=1)
# Max pooling layer
self.pool = nn.MaxPool2d(2, 2)
# Linear layers
self.fc1 = nn.Linear(32 * 32 * 32, 512)
self.fc2 = nn.Linear(512, 3)
# Dropout layer
self.dropout = nn.Dropout(0.25)
# Softmax
self.softmax = nn.LogSoftmax(dim=1)
def forward(self, x):
# Convolutional layers
= self.conv1(x) # Input is 3x512X512, output is 8x512x512
x = nn.functional.relu(x)
x = self.pool(x) # Output is 8x256x256
x = self.conv2(x) # Input is 8x256x256, output is 16x256x256
x = nn.functional.relu(x)
x = self.pool(x) # Output is 16x128x128
x = self.conv3(x) # Input is 16x128x128, output is 32x128x128
x = nn.functional.relu(x)
x = self.pool(x) # Output is 32x64x64
x = self.conv4(x) # Input is 32x64x64, output is 32x64x64
x = nn.functional.relu(x)
x = self.pool(x) # Output is 32x32x32
x # Flatten
= x.view(-1, 32 * 32 * 32)
x # Linear layers
= self.fc1(x)
x = nn.functional.relu(x)
x = self.dropout(x)
x = self.fc2(x)
x # Softmax
= self.softmax(x)
x return x
= SimpleCNN()
cnn print(cnn)
SimpleCNN(
(conv1): Conv2d(3, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(conv2): Conv2d(8, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(conv3): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(conv4): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(fc1): Linear(in_features=32768, out_features=512, bias=True)
(fc2): Linear(in_features=512, out_features=3, bias=True)
(dropout): Dropout(p=0.25, inplace=False)
(softmax): LogSoftmax(dim=1)
)
1, 3, 512, 512)) cnn(torch.randn(
tensor([[-1.0659, -1.0911, -1.1402]], grad_fn=<LogSoftmaxBackward0>)
# Number of
= 32
batch_size
= transforms.Compose([
transform 512, 512)),
transforms.Resize((
transforms.ToTensor()
])
# Load the dataset using ImageFolder and apply the transformation
= ImageFolder(train_dir, transform=transform)
train_dataset = ImageFolder(valid_dir, transform=transform)
valid_dataset
# Select first 100 samples from cat classes
= [idx for idx, label in enumerate(train_dataset.targets) if label == 0]
cat_indices = cat_indices[:100]
cat_indices
# Select first 100 samples from dog classes
= [idx for idx, label in enumerate(train_dataset.targets) if label == 1]
dog_indices = dog_indices[:100]
dog_indices
# Select first 100 samples from wild classes
= [idx for idx, label in enumerate(train_dataset.targets) if label == 2]
wild_indices = wild_indices[:100]
wild_indices
# Concatenate the indices
= cat_indices + dog_indices + wild_indices
indices
# Create a concatenated dataset
= Subset(train_dataset, indices)
train_subset
# Create the data loaders
= torch.utils.data.DataLoader(
train_loader
train_subset,=batch_size,
batch_size=True
shuffle
)
import matplotlib.pyplot as plt
import numpy as np
# Iterate over the first batch of the train_loader
for images, labels in train_loader:
# Convert images and labels to numpy arrays
= images.numpy()
images = labels.numpy()
labels
# Display the images and labels
= plt.subplots(figsize=(10, 5), ncols=8)
fig, axes for i, ax in enumerate(axes):
1, 2, 0)))
ax.imshow(np.transpose(images[i], (f"Label: {labels[i]}")
ax.set_title('off')
ax.axis(
plt.tight_layout()
plt.show()
#break # Break after displaying the first batch
# Train the model
from torch import optim
# Loss function
= nn.NLLLoss()
criterion
# Optimizer
= optim.Adam(cnn.parameters(), lr=0.001)
optimizer
# Number of epochs
= 1
epochs
# Move model to GPU
cnn.cuda()
# Train the model
for epoch in range(epochs):
= 0.0
train_loss = 0.0
valid_loss
cnn.train()for data, target in train_loader:
# Move tensors to GPU
= data.cuda(), target.cuda()
data, target # Clear gradients
optimizer.zero_grad()# Forward pass
= cnn(data)
output # Calculate loss
= criterion(output, target)
loss # Backward pass
loss.backward()# Update weights
optimizer.step()# Update training loss
print(loss.item() * data.size(0))
+= loss.item() * data.size(0)
train_loss eval()
cnn.
# Average losses
= train_loss / len(train_loader.dataset)
train_loss
34.59089279174805
35.19190216064453
36.06556701660156
34.57152557373047
KeyboardInterrupt:
# Predictions
eval()
cnn.= next(iter(train_loader ))
images, labels
# Move tensors to GPU
= images.cuda(), labels.cuda()
images, labels
# Get predictions
= cnn(images)
preds
=1) preds.argmax(dim
tensor([1, 2, 1, 1, 2, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2,
2, 2, 1, 1, 2, 2, 1, 2], device='cuda:0')