import torch
import torchvision
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.transforms import functional as F
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.patches as patches
#config retina
%config InlineBackend.figure_format = 'retina'
# Load pre-trained Faster R-CNN model
= fasterrcnn_resnet50_fpn(pretrained=True)
model = model.eval() _
Object detection using YOLO and segmentation using Segment Anything
ML
References
- https://blog.roboflow.com/how-to-use-segment-anything-model-sam/
= Image.open('../shared/datasets/images/office.jpg') img
# Transform the image
= F.to_tensor(img)
img_tensor = img_tensor.unsqueeze(0) # Add batch dimension img_tensor
= plt.figure(figsize=(6, 6))
fig
plt.imshow(img)= plt.axis('off') _
# Forward pass through the model
with torch.no_grad():
= model(img_tensor) prediction
= [
class_names '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign',
'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A',
'N/A', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
'bottle', 'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table',
'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard',
'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A',
'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
]
= 0.5
threshold
# Filter boxes based on confidence scores
= prediction[0]['boxes'][prediction[0]['scores'] > threshold]
filtered_boxes = prediction[0]['scores'][prediction[0]['scores'] > threshold]
filtered_scores = prediction[0]['labels'][prediction[0]['scores'] > threshold]
filtered_labels
# Plot the image with bounding boxes and class names
= plt.subplots(1, figsize=(10, 10))
fig, ax
ax.imshow(img)
# Add bounding boxes and labels to the plot
for i in range(filtered_boxes.size(0)):
= filtered_boxes[i].cpu().numpy()
box = filtered_scores[i].item()
score = filtered_labels[i].item()
label
# Create a Rectangle patch
= patches.Rectangle((box[0], box[1]), box[2] - box[0], box[3] - box[1], linewidth=1, edgecolor='r', facecolor='none')
rect
# Add the patch to the Axes
ax.add_patch(rect)
# Add class name and score as text
= class_names[label]
class_name 0], box[1], f'{class_name} ({score:.2f})', color='r', verticalalignment='top')
ax.text(box[
'off') # Turn off axis labels
plt.axis( plt.show()
%pip install -q roboflow supervision
Note: you may need to restart the kernel to use updated packages.
try:
from segment_anything import SamPredictor, sam_model_registry, SamAutomaticMaskGenerator
except ImportError:
%pip install git+https://github.com/facebookresearch/segment-anything.git
-q roboflow supervision
pip install from segment_anything import SamPredictor, sam_model_registry
# Place the model weights
import os
= os.path.expanduser('~/.cache/torch/sam.pth')
pth_path = "https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth"
pth_url
if not os.path.exists(pth_path):
import urllib.request
print(f'Downloading SAM weights to {pth_path}')
urllib.request.urlretrieve(pth_url, pth_path)
= torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
DEVICE = "vit_h"
MODEL_TYPE
= sam_model_registry[MODEL_TYPE](checkpoint=pth_path)
sam = sam.to(device=DEVICE) _
import cv2from segment_anything import SamAutomaticMaskGeneratormask_generator = SamAutomaticMaskGenerator(sam)image_bgr = cv2.imread('../shared/datasets/images/office.jpg')image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)result = mask_generator.generate(image_rgb)
import supervision as sv
= sv.MaskAnnotator(color_lookup = sv.ColorLookup.INDEX)
mask_annotator = sv.Detections.from_sam(result)
detections = mask_annotator.annotate(image_bgr, detections) annotated_image
# Blending the annotated image with the original image
= 0.5 # Adjust the alpha value as needed
alpha = cv2.addWeighted(image_bgr, 1 - alpha, annotated_image, alpha, 0)
blended_image
# Display the original image, annotated image, and the blended result
= plt.subplots(1, 3, figsize=(18, 6))
fig, ax 0].imshow(img_tensor.squeeze(0).permute(1, 2, 0))
ax[0].set_title('Original Image')
ax[1].imshow(annotated_image)
ax[1].set_title('Annotated Image')
ax[2].imshow(blended_image)
ax[2].set_title('Blended Result')
ax[
for a in ax:
'off') a.axis(