# Install required packages
!pip install albumentations opencv-python-headless pillow
!pip install nlpaug transformers
!pip install torch torchvision torchaudio
!pip install matplotlib seaborn pandas numpyWeek 5 Lab: Data Augmentation
CS 203: Software Tools and Techniques for AI
IIT Gandhinagar
Learning Objectives
By the end of this lab, you will be able to:
- Apply image augmentation techniques using Albumentations
- Perform text augmentation using nlpaug
- Understand audio augmentation with audiomentations
- Design appropriate augmentation pipelines for different tasks
- Measure the impact of augmentation on model performance
Netflix Movie Theme
We’ll augment our movie data to improve model performance: - Movie posters: Image augmentation for genre classification - Movie reviews: Text augmentation for sentiment analysis - Audio clips: Audio augmentation for trailer classification
Part 1: Environment Setup
# Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
import cv2
import warnings
warnings.filterwarnings('ignore')
# Set random seed for reproducibility
np.random.seed(42)
print("All imports successful!")Part 2: Image Augmentation with Albumentations
2.1 Creating Sample Images
# Create a sample image (movie poster placeholder)
def create_sample_poster(width=400, height=600):
"""
Create a synthetic movie poster image for demonstration.
"""
# Create gradient background
img = np.zeros((height, width, 3), dtype=np.uint8)
# Create gradient from dark blue to black
for i in range(height):
ratio = i / height
img[i, :, 0] = int(20 * (1 - ratio)) # Blue
img[i, :, 1] = int(10 * (1 - ratio)) # Green
img[i, :, 2] = int(40 * (1 - ratio)) # Red (in BGR)
# Add some shapes to represent movie poster elements
# Title area (rectangle)
cv2.rectangle(img, (50, 450), (350, 550), (200, 180, 100), -1)
# Character silhouette (ellipse)
cv2.ellipse(img, (200, 250), (80, 150), 0, 0, 360, (100, 80, 60), -1)
# Star rating
for i in range(5):
x = 80 + i * 60
cv2.circle(img, (x, 580), 15, (0, 200, 255), -1)
return img
# Create and display sample image
sample_image = create_sample_poster()
plt.figure(figsize=(6, 9))
plt.imshow(cv2.cvtColor(sample_image, cv2.COLOR_BGR2RGB))
plt.title("Sample Movie Poster")
plt.axis('off')
plt.show()
print(f"Image shape: {sample_image.shape}")2.2 Basic Albumentations
import albumentations as A
# Define individual transforms
transforms = {
'Original': None,
'Horizontal Flip': A.HorizontalFlip(p=1.0),
'Rotate 15': A.Rotate(limit=15, p=1.0),
'Brightness': A.RandomBrightnessContrast(brightness_limit=0.3, contrast_limit=0, p=1.0),
'Contrast': A.RandomBrightnessContrast(brightness_limit=0, contrast_limit=0.3, p=1.0),
'Blur': A.GaussianBlur(blur_limit=(7, 7), p=1.0),
}
# Apply and display
fig, axes = plt.subplots(2, 3, figsize=(15, 12))
axes = axes.flatten()
for idx, (name, transform) in enumerate(transforms.items()):
if transform is None:
augmented = sample_image
else:
augmented = transform(image=sample_image)['image']
axes[idx].imshow(cv2.cvtColor(augmented, cv2.COLOR_BGR2RGB))
axes[idx].set_title(name, fontsize=12)
axes[idx].axis('off')
plt.tight_layout()
plt.show()Question 2.1 (Solved): Create an Augmentation Pipeline
# SOLVED: Create a composed augmentation pipeline
poster_augmentation = A.Compose([
# Geometric transforms
A.HorizontalFlip(p=0.5),
A.Rotate(limit=10, p=0.5),
A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.1, rotate_limit=5, p=0.5),
# Color transforms
A.OneOf([
A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=1),
A.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1, p=1),
], p=0.5),
# Noise and blur
A.OneOf([
A.GaussianBlur(blur_limit=3, p=1),
A.GaussNoise(var_limit=(10, 50), p=1),
], p=0.3),
])
# Generate multiple augmentations
fig, axes = plt.subplots(2, 5, figsize=(20, 10))
axes = axes.flatten()
for i in range(10):
augmented = poster_augmentation(image=sample_image)['image']
axes[i].imshow(cv2.cvtColor(augmented, cv2.COLOR_BGR2RGB))
axes[i].set_title(f"Augmented {i+1}")
axes[i].axis('off')
plt.suptitle("10 Augmented Versions of the Same Poster", fontsize=14)
plt.tight_layout()
plt.show()Question 2.2: Create a Domain-Specific Augmentation Pipeline
Create an augmentation pipeline specifically for movie poster genre classification.
# TODO: Create an augmentation pipeline that:
# 1. Preserves the overall composition (important for genre recognition)
# 2. Simulates different viewing conditions (brightness, contrast)
# 3. Is NOT too aggressive (we want to keep the poster recognizable)
genre_classification_augmentation = A.Compose([
# Your augmentations here
])
# Test your pipelineQuestion 2.3: Advanced Augmentations (Cutout, MixUp)
# Cutout (CoarseDropout in Albumentations)
cutout_transform = A.CoarseDropout(
max_holes=8,
max_height=50,
max_width=50,
min_holes=4,
min_height=20,
min_width=20,
fill_value=0,
p=1.0
)
# Apply and show
cutout_image = cutout_transform(image=sample_image)['image']
fig, axes = plt.subplots(1, 2, figsize=(12, 8))
axes[0].imshow(cv2.cvtColor(sample_image, cv2.COLOR_BGR2RGB))
axes[0].set_title("Original")
axes[0].axis('off')
axes[1].imshow(cv2.cvtColor(cutout_image, cv2.COLOR_BGR2RGB))
axes[1].set_title("With Cutout")
axes[1].axis('off')
plt.show()Question 2.4: Implement MixUp
# TODO: Implement MixUp augmentation
# MixUp blends two images and their labels
def mixup(image1, image2, label1, label2, alpha=0.4):
"""
Perform MixUp augmentation.
Args:
image1, image2: Two images to mix
label1, label2: Their corresponding labels (one-hot or probabilities)
alpha: Parameter for Beta distribution
Returns:
mixed_image: Blended image
mixed_label: Blended label
"""
# Your code here
pass
# Create a second sample image
sample_image2 = create_sample_poster()
# Modify it slightly
sample_image2 = cv2.rectangle(sample_image2.copy(), (100, 200), (300, 400), (0, 0, 255), -1)
# Test your mixup functionQuestion 2.5: Augmentation with Bounding Boxes
# Augmentation that preserves bounding box annotations
bbox_transform = A.Compose([
A.HorizontalFlip(p=0.5),
A.Rotate(limit=15, p=0.5),
A.RandomBrightnessContrast(p=0.3),
], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['labels']))
# Sample bounding boxes [x_min, y_min, x_max, y_max]
bboxes = [
[50, 450, 350, 550], # Title area
[120, 100, 280, 400], # Character
]
labels = ['title', 'character']
# Apply transform
transformed = bbox_transform(
image=sample_image,
bboxes=bboxes,
labels=labels
)
# Draw bounding boxes
def draw_bboxes(image, bboxes, labels):
img = image.copy()
colors = {'title': (0, 255, 0), 'character': (255, 0, 0)}
for bbox, label in zip(bboxes, labels):
x1, y1, x2, y2 = [int(c) for c in bbox]
cv2.rectangle(img, (x1, y1), (x2, y2), colors.get(label, (0, 0, 255)), 2)
cv2.putText(img, label, (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, colors.get(label, (0, 0, 255)), 2)
return img
# Display
fig, axes = plt.subplots(1, 2, figsize=(12, 8))
orig_with_boxes = draw_bboxes(sample_image, bboxes, labels)
axes[0].imshow(cv2.cvtColor(orig_with_boxes, cv2.COLOR_BGR2RGB))
axes[0].set_title("Original with Bboxes")
axes[0].axis('off')
aug_with_boxes = draw_bboxes(transformed['image'], transformed['bboxes'], transformed['labels'])
axes[1].imshow(cv2.cvtColor(aug_with_boxes, cv2.COLOR_BGR2RGB))
axes[1].set_title("Augmented with Bboxes")
axes[1].axis('off')
plt.show()Part 3: Text Augmentation with nlpaug
3.1 Setting Up nlpaug
import nlpaug.augmenter.word as naw
import nlpaug.augmenter.char as nac
import nlpaug.augmenter.sentence as nas
# Sample movie reviews
sample_reviews = [
"This movie was absolutely fantastic! A must-watch.",
"Terrible film. Waste of time and money.",
"The acting was superb, but the plot was confusing.",
"One of the best movies I have seen this year.",
"Boring and predictable. Would not recommend.",
]
print("Sample reviews:")
for i, review in enumerate(sample_reviews):
print(f"{i+1}. {review}")Question 3.1 (Solved): Synonym Augmentation
# SOLVED: Synonym replacement augmentation
# Create synonym augmenter using WordNet
synonym_aug = naw.SynonymAug(aug_src='wordnet', aug_max=3)
print("Synonym Augmentation Examples:\n")
for review in sample_reviews[:3]:
augmented = synonym_aug.augment(review)
print(f"Original: {review}")
print(f"Augmented: {augmented[0]}")
print()Question 3.2: Random Word Operations
# TODO: Try different word-level augmentations:
# 1. Random word deletion
# 2. Random word swap
# 3. Random word insertion
# Your code hereQuestion 3.3: Contextual Word Embeddings (BERT)
# Contextual augmentation using BERT
# Note: This requires downloading the BERT model (may take a few minutes)
try:
bert_aug = naw.ContextualWordEmbsAug(
model_path='bert-base-uncased',
action='substitute',
aug_max=2
)
print("BERT Contextual Augmentation:\n")
for review in sample_reviews[:2]:
augmented = bert_aug.augment(review)
print(f"Original: {review}")
print(f"Augmented: {augmented[0]}")
print()
except Exception as e:
print(f"BERT augmentation not available: {e}")
print("Try: pip install transformers torch")Question 3.4: Character-Level Augmentation
# Character-level augmentation (simulating typos)
# Keyboard typo augmenter
keyboard_aug = nac.KeyboardAug(aug_char_max=2, aug_word_max=2)
# OCR error augmenter
ocr_aug = nac.OcrAug(aug_char_max=2, aug_word_max=2)
print("Character-Level Augmentation Examples:\n")
review = sample_reviews[0]
print(f"Original: {review}")
print(f"Keyboard typos: {keyboard_aug.augment(review)[0]}")
print(f"OCR errors: {ocr_aug.augment(review)[0]}")Question 3.5: Create a Complete Text Augmentation Pipeline
# TODO: Create a pipeline that applies multiple augmentations
# Use naf.Sequential or naf.Sometimes for probabilistic application
import nlpaug.flow as naf
def create_text_augmentation_pipeline():
"""
Create a text augmentation pipeline for movie review sentiment.
Requirements:
- Should preserve sentiment (don't replace sentiment words)
- Should maintain readability
- Should be diverse
"""
# Your code here
pass
# Test your pipelineQuestion 3.6: Back-Translation (Advanced)
# Back-translation: English -> Other Language -> English
# This requires downloading translation models
try:
back_trans_aug = naw.BackTranslationAug(
from_model_name='facebook/wmt19-en-de',
to_model_name='facebook/wmt19-de-en'
)
print("Back-Translation Augmentation:\n")
review = "This movie was absolutely fantastic!"
augmented = back_trans_aug.augment(review)
print(f"Original: {review}")
print(f"Back-translated: {augmented[0]}")
except Exception as e:
print(f"Back-translation not available: {e}")
print("Requires: pip install transformers torch sentencepiece")Part 4: Audio Augmentation
4.1 Creating Sample Audio
import numpy as np
# Create a synthetic audio signal (sine wave + noise)
def create_sample_audio(duration=2.0, sr=16000):
"""
Create a sample audio signal.
Args:
duration: Duration in seconds
sr: Sample rate
Returns:
audio: Audio signal as numpy array
"""
t = np.linspace(0, duration, int(sr * duration), endpoint=False)
# Create a combination of frequencies (like a chord)
audio = (
0.3 * np.sin(2 * np.pi * 440 * t) + # A4 note
0.2 * np.sin(2 * np.pi * 554 * t) + # C#5 note
0.2 * np.sin(2 * np.pi * 659 * t) + # E5 note
0.1 * np.random.randn(len(t)) # Some noise
)
# Normalize
audio = audio / np.max(np.abs(audio))
return audio.astype(np.float32)
sample_audio = create_sample_audio()
sr = 16000
print(f"Audio shape: {sample_audio.shape}")
print(f"Duration: {len(sample_audio) / sr:.2f} seconds")
# Plot waveform
plt.figure(figsize=(12, 3))
plt.plot(np.linspace(0, len(sample_audio)/sr, len(sample_audio)), sample_audio)
plt.title("Sample Audio Waveform")
plt.xlabel("Time (s)")
plt.ylabel("Amplitude")
plt.show()Question 4.1: Basic Audio Augmentations
# Manual audio augmentations
def add_noise(audio, noise_level=0.01):
"""Add Gaussian noise to audio."""
noise = np.random.randn(len(audio)) * noise_level
return audio + noise
def time_shift(audio, shift_range=0.2):
"""Shift audio in time."""
shift = int(len(audio) * shift_range * np.random.uniform(-1, 1))
return np.roll(audio, shift)
def change_volume(audio, volume_range=(0.5, 1.5)):
"""Change audio volume."""
volume = np.random.uniform(*volume_range)
return audio * volume
def pitch_shift_simple(audio, shift_factor=0.1):
"""Simple pitch shift by resampling."""
from scipy.signal import resample
factor = 1 + np.random.uniform(-shift_factor, shift_factor)
new_length = int(len(audio) / factor)
shifted = resample(audio, new_length)
# Pad or trim to original length
if len(shifted) > len(audio):
return shifted[:len(audio)]
else:
return np.pad(shifted, (0, len(audio) - len(shifted)))
# Apply augmentations
augmentations = {
'Original': sample_audio,
'Added Noise': add_noise(sample_audio, 0.05),
'Time Shifted': time_shift(sample_audio, 0.1),
'Volume Changed': change_volume(sample_audio, (0.5, 1.5)),
}
# Plot
fig, axes = plt.subplots(4, 1, figsize=(12, 8))
for idx, (name, audio) in enumerate(augmentations.items()):
axes[idx].plot(audio)
axes[idx].set_title(name)
axes[idx].set_xlim(0, len(audio))
plt.tight_layout()
plt.show()Question 4.2: Implement SpecAugment
# TODO: Implement SpecAugment (frequency and time masking on spectrograms)
from scipy.signal import spectrogram
def spec_augment(audio, sr, freq_mask_param=10, time_mask_param=20):
"""
Apply SpecAugment to audio.
Args:
audio: Audio signal
sr: Sample rate
freq_mask_param: Maximum frequency mask width
time_mask_param: Maximum time mask width
Returns:
Augmented spectrogram
"""
# Your code here
pass
# Test your functionPart 5: Measuring Augmentation Impact
5.1 Setting Up a Classification Task
from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
# Load a small subset of 20 newsgroups
categories = ['rec.sport.baseball', 'rec.sport.hockey']
newsgroups = fetch_20newsgroups(subset='all', categories=categories,
remove=('headers', 'footers', 'quotes'))
# Limit to smaller dataset for demonstration
texts = newsgroups.data[:500]
labels = newsgroups.target[:500]
# Split
X_train, X_test, y_train, y_test = train_test_split(
texts, labels, test_size=0.3, random_state=42
)
print(f"Training samples: {len(X_train)}")
print(f"Test samples: {len(X_test)}")Question 5.1 (Solved): Baseline Without Augmentation
# SOLVED: Train baseline model without augmentation
# Vectorize
vectorizer = TfidfVectorizer(max_features=1000, stop_words='english')
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)
# Train
model = LogisticRegression(max_iter=1000)
model.fit(X_train_vec, y_train)
# Evaluate
y_pred = model.predict(X_test_vec)
baseline_accuracy = accuracy_score(y_test, y_pred)
print(f"Baseline Accuracy (no augmentation): {baseline_accuracy:.2%}")Question 5.2: Train with Augmentation
# TODO: Augment training data and measure improvement
# 1. Augment each training sample 2-3 times
# 2. Combine with original data
# 3. Train model
# 4. Compare accuracy
# Your code hereQuestion 5.3: Learning Curve Comparison
# TODO: Plot learning curves with and without augmentation
# Train on 10%, 20%, ..., 100% of training data
# Compare the two curves
# Your code herePart 6: Best Practices and Guidelines
Question 6.1: When NOT to Augment
# Demonstration: Bad augmentation that changes the label
# Example 1: Text sentiment - replacing key sentiment words
review = "This movie was terrible"
# Bad augmentation: synonym replacement of "terrible"
bad_aug = "This movie was awesome" # Completely changes meaning!
print("Example of BAD text augmentation:")
print(f"Original (NEGATIVE): {review}")
print(f"Bad augmentation: {bad_aug}")
print("The label changed from NEGATIVE to POSITIVE!")
print("\n" + "="*50 + "\n")
# Example 2: Digit classification with vertical flip
print("Example of BAD image augmentation:")
print("Original: Image of '6'")
print("Vertical flip: Image of '9'")
print("The label changed from '6' to '9'!")Question 6.2: Create an Augmentation Checklist
# TODO: Write a checklist for designing augmentation pipelines
augmentation_checklist = """
# Augmentation Design Checklist
## Before Choosing Augmentations:
[ ] What is the task? (classification, detection, segmentation, etc.)
[ ] What modality? (image, text, audio, video)
[ ] What domain? (natural images, medical, documents, etc.)
## Validation Questions:
[ ] Does this augmentation preserve the label?
[ ] ...
[ ] ...
## Implementation:
[ ] ...
## Testing:
[ ] ...
"""
# Complete the checklist
print(augmentation_checklist)Challenge Problems
Challenge 1: Test-Time Augmentation (TTA)
# Challenge: Implement Test-Time Augmentation
# Apply augmentations at inference time and average predictions
def tta_predict(model, vectorizer, text, n_augments=5):
"""
Make prediction using Test-Time Augmentation.
Args:
model: Trained classifier
vectorizer: Fitted TfidfVectorizer
text: Text to classify
n_augments: Number of augmented versions to use
Returns:
Averaged prediction probabilities
"""
# Your code here
passChallenge 2: AutoAugment-Style Policy Search
# Challenge: Implement a simple augmentation policy search
# Find the best combination of augmentation parameters
def search_augmentation_policy(X_train, y_train, X_val, y_val, n_trials=20):
"""
Search for the best augmentation policy using random search.
Returns:
best_policy: Dictionary of best augmentation parameters
best_score: Validation accuracy with best policy
"""
# Your code here
passChallenge 3: Augmentation for Imbalanced Classes
# Challenge: Use augmentation to balance an imbalanced dataset
# More augmentation for minority classes
def class_balanced_augmentation(X, y, target_count=None):
"""
Augment data to balance class distribution.
Args:
X: Features
y: Labels
target_count: Target number of samples per class (default: max class count)
Returns:
X_balanced, y_balanced: Balanced dataset
"""
# Your code here
passSummary
In this lab, you learned:
- Image Augmentation: Geometric and color transforms with Albumentations
- Text Augmentation: Synonym replacement, back-translation with nlpaug
- Audio Augmentation: Time-domain and frequency-domain augmentations
- Pipeline Design: Creating appropriate augmentation pipelines for tasks
- Impact Measurement: Comparing model performance with/without augmentation
Key Takeaways
| Modality | Key Transforms | Libraries |
|---|---|---|
| Image | Flip, rotate, color, cutout | Albumentations |
| Text | Synonym, back-translation | nlpaug |
| Audio | Noise, pitch, SpecAugment | audiomentations |
Best Practices
- Always verify augmentations preserve labels
- Start simple, add complexity gradually
- Don’t augment validation/test data
- Measure impact before and after
- Domain-specific choices matter
Next Week
Week 6: LLM APIs - Integrating large language models into your applications!