Why use logits

ML
Author

Nipun Batra

Published

February 27, 2024

import numpy as np
import sklearn 
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
from latexify import *
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
import torch
import torch.nn.functional as F
import ipywidgets as widgets
from ipywidgets import interactive
from IPython.display import display

# Example ground truth probabilities
ground_truth_probs = torch.tensor([0.3, 0.7])

# Example model predictions (logits)
model_logits = torch.tensor([-2.0, 2.0])

# Applying softmax to logits to get probabilities
model_probs = F.sigmoid(model_logits)

# Cross-entropy loss using probabilities
loss_probs = F.binary_cross_entropy(model_probs, ground_truth_probs)

# Cross-entropy loss using logits
loss_logits = F.binary_cross_entropy_with_logits(model_logits, ground_truth_probs)

print("Loss using probabilities:", loss_probs.item())
print("Loss using logits:", loss_logits.item())
Loss using probabilities: 0.7269279956817627
Loss using logits: 0.7269280552864075
l1 = widgets.FloatSlider(value=0.3, min=-300, max=300, step=0.01, description='Logits ex 1')
l2 = widgets.FloatSlider(value=0.7, min=-300, max=300, step=0.01, description='Logits ex 2')

box = widgets.VBox([l1, l2])
def print_loss_using_both_methods(l1, l2):
    logits = torch.tensor([l1, l2])
    probs = F.sigmoid(logits)
    loss_probs = F.binary_cross_entropy(probs, ground_truth_probs)
    loss_logits = F.binary_cross_entropy_with_logits(logits, ground_truth_probs)
    print("Loss using probabilities:", loss_probs.item())
    print("Loss using logits:", loss_logits.item())
# add interactivity
interactive(print_loss_using_both_methods, l1=l1, l2=l2)
print_loss_using_both_methods(l1.value, l2.value)
Loss using probabilities: 8.003093719482422
Loss using logits: 8.003093719482422
def our_sigmoid(z):
    return 1/(1+torch.exp(-z))
our_sigmoid(torch.tensor(-90.0))
tensor(0.)
F.binary_cross_entropy_with_logits?
Signature:
F.binary_cross_entropy_with_logits(
    input: torch.Tensor,
    target: torch.Tensor,
    weight: Optional[torch.Tensor] = None,
    size_average: Optional[bool] = None,
    reduce: Optional[bool] = None,
    reduction: str = 'mean',
    pos_weight: Optional[torch.Tensor] = None,
) -> torch.Tensor
Docstring:
Calculate Binary Cross Entropy between target and input logits.

See :class:`~torch.nn.BCEWithLogitsLoss` for details.

Args:
    input: Tensor of arbitrary shape as unnormalized scores (often referred to as logits).
    target: Tensor of the same shape as input with values between 0 and 1
    weight (Tensor, optional): a manual rescaling weight
        if provided it's repeated to match input tensor shape
    size_average (bool, optional): Deprecated (see :attr:`reduction`). By default,
        the losses are averaged over each loss element in the batch. Note that for
        some losses, there multiple elements per sample. If the field :attr:`size_average`
        is set to ``False``, the losses are instead summed for each minibatch. Ignored
        when reduce is ``False``. Default: ``True``
    reduce (bool, optional): Deprecated (see :attr:`reduction`). By default, the
        losses are averaged or summed over observations for each minibatch depending
        on :attr:`size_average`. When :attr:`reduce` is ``False``, returns a loss per
        batch element instead and ignores :attr:`size_average`. Default: ``True``
    reduction (str, optional): Specifies the reduction to apply to the output:
        ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied,
        ``'mean'``: the sum of the output will be divided by the number of
        elements in the output, ``'sum'``: the output will be summed. Note: :attr:`size_average`
        and :attr:`reduce` are in the process of being deprecated, and in the meantime,
        specifying either of those two args will override :attr:`reduction`. Default: ``'mean'``
    pos_weight (Tensor, optional): a weight of positive examples to be broadcasted with target.
        Must be a tensor with equal size along the class dimension to the number of classes.
        Pay close attention to PyTorch's broadcasting semantics in order to achieve the desired
        operations. For a target of size [B, C, H, W] (where B is batch size) pos_weight of
        size [B, C, H, W] will apply different pos_weights to each element of the batch or
        [C, H, W] the same pos_weights across the batch. To apply the same positive weight
        along all spacial dimensions for a 2D multi-class target [C, H, W] use: [C, 1, 1].
        Default: ``None``

Examples::

     >>> input = torch.randn(3, requires_grad=True)
     >>> target = torch.empty(3).random_(2)
     >>> loss = F.binary_cross_entropy_with_logits(input, target)
     >>> loss.backward()
File:      ~/miniconda3/envs/ml/lib/python3.11/site-packages/torch/nn/functional.py
Type:      function