import torch
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
%matplotlib inline
# retina display
%config InlineBackend.figure_format = 'retina'
Introduction
Main idea:
- Start with an input image \(x\) and a target class \(y\).
- Compute the gradient of the loss function (with true target class) with respect to the input image \(x\).
- Make a small update \(\epsilon\) to the input image \(x\) to slightly change the output of the model.
- Update equation: \(x_{adv} = x + \epsilon \cdot sign(\nabla_x J(\theta, x, y))\)
- Repeat step 2-4 until the model is fooled.
Reference: https://www.youtube.com/watch?v=5lFiZTSsp40
= torch.linspace(-3, 3, 1000)
xs = torch.sign(xs)
signs
plt.plot(xs.numpy(), signs.numpy())'x')
plt.xlabel('sign(x)') plt.ylabel(
Text(0, 0.5, 'sign(x)')
# Load a pretrained model for imagenet classification
= torch.hub.load('pytorch/vision:v0.6.0', 'resnet18', pretrained=True) model
Using cache found in /home/nipun.batra/.cache/torch/hub/pytorch_vision_v0.6.0
model
ResNet(
(conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
(layer1): Sequential(
(0): BasicBlock(
(conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(1): BasicBlock(
(conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(layer2): Sequential(
(0): BasicBlock(
(conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(downsample): Sequential(
(0): Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(1): BasicBlock(
(conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(layer3): Sequential(
(0): BasicBlock(
(conv1): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(downsample): Sequential(
(0): Conv2d(128, 256, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(1): BasicBlock(
(conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(layer4): Sequential(
(0): BasicBlock(
(conv1): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(downsample): Sequential(
(0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(1): BasicBlock(
(conv1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(avgpool): AdaptiveAvgPool2d(output_size=(1, 1))
(fc): Linear(in_features=512, out_features=1000, bias=True)
)
# Put the model in evaluation mode
eval() model.
ResNet(
(conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
(layer1): Sequential(
(0): BasicBlock(
(conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(1): BasicBlock(
(conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(layer2): Sequential(
(0): BasicBlock(
(conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(downsample): Sequential(
(0): Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(1): BasicBlock(
(conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(layer3): Sequential(
(0): BasicBlock(
(conv1): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(downsample): Sequential(
(0): Conv2d(128, 256, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(1): BasicBlock(
(conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(layer4): Sequential(
(0): BasicBlock(
(conv1): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(downsample): Sequential(
(0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(1): BasicBlock(
(conv1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(avgpool): AdaptiveAvgPool2d(output_size=(1, 1))
(fc): Linear(in_features=512, out_features=1000, bias=True)
)
# Read in an image and preprocess
from PIL import Image
from torchvision import transforms
= Image.open("happy-doggy.jpg")
im plt.imshow(im)
# Apply transformations
= transforms.Compose([
preprocess_transforms 256),
transforms.Resize(224),
transforms.CenterCrop(
transforms.ToTensor(),
])= preprocess_transforms(im) img_tensor
/home/nipun.batra/.cache/torch/hub/pytorch_vision_v0.6.0/torchvision/transforms/functional.py:74: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()
img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
# plot transformed image
1, 2, 0)) plt.imshow(img_tensor.permute(
# Add a batch dimension
= img_tensor.unsqueeze(0)
batched_img_tensor batched_img_tensor.shape
torch.Size([1, 3, 224, 224])
# predict the class of the image
= model(batched_img_tensor)
preds preds.argmax()
tensor(273)
# Download imagenet labels
import urllib
= ("https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt", "imagenet_classes.txt")
url, filename try: urllib.URLopener().retrieve(url, filename)
except: urllib.request.urlretrieve(url, filename)
# Load ImageNet labels
with open("imagenet_classes.txt", "r") as f:
= [s.strip() for s in f.readlines()]
categories
categories[preds.argmax()]
'dingo'
def func(inp, model, target):
= model(inp)
out = torch.nn.functional.nll_loss(out, target=torch.LongTensor([target]))
loss
print(f"Loss: {loss.item()}")
return loss
258), func(batched_img_tensor, model, 263) func(batched_img_tensor, model,
Loss: 1.7196619510650635
Loss: -5.548308372497559
(tensor(1.7197, grad_fn=<NllLossBackward0>),
tensor(-5.5483, grad_fn=<NllLossBackward0>))
# Get the gradient of the output with respect to an image
def get_gradient(image_tensor, label_idx):
# Make sure the image requires a gradient.
True)
image_tensor.requires_grad_(# Forward pass
= model(image_tensor)
preds # Get the gradient of the output with respect to the inputs based on NLL
= torch.nn.functional.nll_loss(preds, target=torch.LongTensor([label_idx]))
loss
loss.backward()print(f"Loss: {loss.item()}")
False)
image_tensor.requires_grad_(return image_tensor.grad.data
= get_gradient(batched_img_tensor, preds.argmax())
grads grads.shape
Loss: -12.811663627624512
torch.Size([1, 3, 224, 224])
# Plot the gradient
0].permute(1, 2, 0)) plt.imshow(grads[
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
# Get the sign of the gradient
= torch.sign(grads)
signs signs
tensor([[[[-1., -1., 1., ..., 1., -1., 1.],
[-1., -1., -1., ..., 1., 1., 1.],
[ 1., -1., -1., ..., -1., 1., 1.],
...,
[-1., 1., -1., ..., 1., 1., 1.],
[-1., -1., 1., ..., 1., 1., 1.],
[-1., -1., -1., ..., -1., -1., 1.]],
[[-1., -1., 1., ..., -1., -1., 1.],
[-1., -1., 1., ..., -1., -1., -1.],
[ 1., -1., -1., ..., -1., -1., 1.],
...,
[ 1., 1., -1., ..., 1., 1., -1.],
[-1., -1., 1., ..., 1., 1., 1.],
[-1., 1., -1., ..., -1., -1., 1.]],
[[-1., -1., -1., ..., -1., -1., 1.],
[-1., -1., -1., ..., -1., -1., -1.],
[ 1., -1., -1., ..., -1., -1., -1.],
...,
[ 1., -1., -1., ..., -1., -1., -1.],
[-1., -1., 1., ..., 1., -1., -1.],
[-1., 1., -1., ..., -1., -1., 1.]]]])
# Plot the sign of the gradient
0].permute(1, 2, 0)) plt.imshow(signs[
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
# Add noise to the image
= 0.001
eps = batched_img_tensor + eps * signs
noisy_img = torch.clamp(noisy_img, 0, 1) noisy_img
# Plot the noisy image
0].permute(1, 2, 0)) plt.imshow(noisy_img[
# view the label for the noisy image
model(noisy_img).argmax()
tensor(434)
# Now, repeat till the model is fooled
= 0.0001
eps
= batched_img_tensor.clone()
noisy_img True)
noisy_img.requires_grad_(= 0
i while model(noisy_img).argmax() == preds.argmax():
= get_gradient(noisy_img, preds.argmax())
grads = torch.sign(grads)
signs = noisy_img + eps * signs
noisy_img = torch.clamp(noisy_img, -2, 2)
noisy_img = noisy_img.detach().requires_grad_(True)
noisy_img = i + 1
i if i % 10 == 0:
print(i)
# Detach the image tensor from the computation graph
False)
noisy_img.requires_grad_(= noisy_img.detach()
noisy_img
Loss: -12.811663627624512
Loss: -12.379142761230469
Loss: -11.94972038269043
Loss: -11.527520179748535
model(noisy_img).argmax()
tensor(173)
categories[model(noisy_img).argmax()]
'Ibizan hound'
# Plot the image
0].permute(1, 2, 0)) plt.imshow(noisy_img[
# Difference between the original and the adversarial image
= (noisy_img - batched_img_tensor)[0]
diff = diff.mean(dim=0)
diff = torch.abs(diff)
diff = diff / diff.max() diff
diff.shape
torch.Size([224, 224])
= plt.subplots(figsize=(10, 4), ncols=3)
fig, ax 0].imshow(batched_img_tensor[0].permute(1, 2, 0))
ax[0].set_title("Original Image\n Label: " + categories[preds.argmax()])
ax[
1].imshow(noisy_img[0].permute(1, 2, 0))
ax[1].set_title("Adversarial Image\n Label: " + categories[model(noisy_img).argmax()])
ax[
2].imshow(diff, cmap='gray')
ax[2].set_title("Difference") ax[
Text(0.5, 1.0, 'Difference')
# Now, let us create a function to generate adversarial examples for any image
def generate_adversarial_example(image_tensor, target_label, model, eps=0.001, max_iter=500):
# Create a noisy image identical to the input image
= image_tensor.clone()
noisy_img # Make the noisy image require a gradient
True)
noisy_img.requires_grad_(for i in range(max_iter):
# Get the gradient of the input image
= get_gradient(noisy_img, target_label)
grads # Get the sign of the gradient
= torch.sign(grads)
signs # Add noise to the image
= noisy_img + eps * signs
noisy_img # Clamp the image to make sure it is within the valid pixel range
= torch.clamp(noisy_img, -2, 2)
noisy_img # Detach the image tensor from the computation graph
False)
noisy_img.requires_grad_(# Check if the noisy image is misclassified
# Print model prediction
print(f"Iteration: {i} \t Model Prediction: {categories[model(noisy_img).argmax()]}")
# If the noisy image is misclassified, return it
if model(noisy_img).argmax() != target_label:
return noisy_img
# If the noisy image is not misclassified within max_iter iterations, return it
return noisy_img
def plot_image_adversary_difference(image_tensor, noisy_img):
# Difference between the original and the adversarial image
= (noisy_img - image_tensor)[0]
diff = diff.mean(dim=0)
diff = torch.abs(diff)
diff = diff / diff.max()
diff # Plot the image
= plt.subplots(figsize=(10, 4), ncols=3)
fig, ax 0].imshow(image_tensor[0].permute(1, 2, 0))
ax[0].set_title("Original Image\n Label: " + categories[model(image_tensor).argmax()])
ax[
1].imshow(noisy_img[0].permute(1, 2, 0))
ax[1].set_title("Adversarial Image\n Label: " + categories[model(noisy_img).argmax()])
ax[
2].imshow(diff, cmap='gray')
ax[2].set_title("Difference")
ax[
def preprocess(img_path):
= Image.open(img_path)
img = img.resize((224, 224))
img = preprocess_transforms(img)
img_tensor = img_tensor.unsqueeze(0)
img_tensor return img_tensor
= "lion.jpg"
img_path = preprocess(img_path) img_tensor
/home/nipun.batra/.cache/torch/hub/pytorch_vision_v0.6.0/torchvision/transforms/functional.py:74: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()
img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
0].permute(1, 2, 0)) plt.imshow(img_tensor[
model(img_tensor).argmax()
tensor(291)
categories[model(img_tensor).argmax()]
'lion'
= generate_adversarial_example(img_tensor, model(img_tensor).argmax(), model, eps=0.0001, max_iter=50)
adv
plot_image_adversary_difference(img_tensor, adv)
Loss: -17.654619216918945
Iteration: 0 Model Prediction: lion
Loss: -17.16172981262207
Iteration: 1 Model Prediction: lion
Loss: -16.668292999267578
Iteration: 2 Model Prediction: lion
Loss: -16.1711483001709
Iteration: 3 Model Prediction: lion
Loss: -15.673859596252441
Iteration: 4 Model Prediction: lion
Loss: -15.182080268859863
Iteration: 5 Model Prediction: lion
Loss: -14.688620567321777
Iteration: 6 Model Prediction: lion
Loss: -14.19030475616455
Iteration: 7 Model Prediction: lion
Loss: -13.684654235839844
Iteration: 8 Model Prediction: lion
Loss: -13.176743507385254
Iteration: 9 Model Prediction: lion
Loss: -12.678997993469238
Iteration: 10 Model Prediction: lion
Loss: -12.19263744354248
Iteration: 11 Model Prediction: lion
Loss: -11.708401679992676
Iteration: 12 Model Prediction: lion
Loss: -11.224825859069824
Iteration: 13 Model Prediction: lion
Loss: -10.733859062194824
Iteration: 14 Model Prediction: lion
Loss: -10.247729301452637
Iteration: 15 Model Prediction: lion
Loss: -9.766221046447754
Iteration: 16 Model Prediction: lion
Loss: -9.282770156860352
Iteration: 17 Model Prediction: lion
Loss: -8.798469543457031
Iteration: 18 Model Prediction: lion
Loss: -8.31735897064209
Iteration: 19 Model Prediction: lion
Loss: -7.843080043792725
Iteration: 20 Model Prediction: lion
Loss: -7.380602836608887
Iteration: 21 Model Prediction: lion
Loss: -6.926629066467285
Iteration: 22 Model Prediction: shield
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
="monkey.jpg"
img_path = preprocess(img_path)
img_tensor
0].permute(1, 2, 0)) plt.imshow(img_tensor[
/home/nipun.batra/.cache/torch/hub/pytorch_vision_v0.6.0/torchvision/transforms/functional.py:74: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()
img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
model(img_tensor).argmax()
tensor(373)
categories[model(img_tensor).argmax()]
'macaque'
= generate_adversarial_example(img_tensor, model(img_tensor).argmax(), model, eps=0.0001, max_iter=50)
adv
plot_image_adversary_difference(img_tensor, adv)
Loss: -10.831324577331543
Iteration: 0 Model Prediction: macaque
Loss: -10.409868240356445
Iteration: 1 Model Prediction: macaque
Loss: -9.994575500488281
Iteration: 2 Model Prediction: macaque
Loss: -9.579815864562988
Iteration: 3 Model Prediction: macaque
Loss: -9.168878555297852
Iteration: 4 Model Prediction: macaque
Loss: -8.76225471496582
Iteration: 5 Model Prediction: Chihuahua