Gemini API Performance: Sequential vs Async vs Single-Prompt

LLM
Gemini
Performance
Batching
Python
Author

Nipun Batra

Published

December 12, 2025

Introduction

We often need to process multiple images. The question is: What is the fastest way to do it?

We will compare three approaches: 1. Sequential: One by one (The naive loop). 2. Concurrent (Client-Side Batching): Using asyncio to send parallel requests. 3. Single-Prompt (Server-Side Context Batching): Sending all images in a single API call and asking for all answers at once.

We use 4 images with varying crowd densities.

import os
import time
import asyncio
from google import genai
from PIL import Image
import io
import matplotlib.pyplot as plt
import json

# Setup the client
client = genai.Client(api_key=os.environ.get("GEMINI_API_KEY"))

# Using the experimental flash model for speed
MODEL_ID = "models/gemini-3-pro-preview"
image_paths = ["bus.jpg", "crowd1.jpg", "crowd2.jpg", "classroom.jpg"]
images = [Image.open(p) for p in image_paths]

# Visualization
fig, axes = plt.subplots(1, 4, figsize=(10, 5))
for ax, img, name in zip(axes, images, image_paths):
    ax.imshow(img)
    ax.set_title(name)
    ax.axis('off')
plt.show()

print(f"Benchmarking with {len(images)} images.")

Benchmarking with 4 images.

1. Sequential Processing

The baseline: sending one request after another.

def run_sequential(images):
    results = []
    start_time = time.time()
    prompt = "Count the number of people in this image. Return just the integer number."
    
    for i, img in enumerate(images):
        print(f"Seq: Processing {i+1}/{len(images)}...", end="\n")
        response = client.models.generate_content(
            model=MODEL_ID,
            contents=[img, prompt]
        )
        results.append(response.text.strip())
        
    return time.time() - start_time, results

print("Running Sequential...")
seq_time, seq_results = run_sequential(images)
print(f"Sequential Time: {seq_time:.2f}s")
Running Sequential...
Seq: Processing 1/4...
Seq: Processing 2/4...
Seq: Processing 3/4...
Seq: Processing 4/4...
Sequential Time: 85.47s

2. Concurrent Processing (asyncio)

Sending requests in parallel. This is often what we mean by “batching” in online systems—reducing latency by overlapping I/O.

async def process_one_async(client, img):
    prompt = "Count the number of people in this image. Return just the integer number."
    response = await client.aio.models.generate_content(
        model=MODEL_ID,
        contents=[img, prompt]
    )
    return response.text.strip()

async def run_async(images):
    start_time = time.time()
    tasks = [process_one_async(client, img) for img in images]
    results = await asyncio.gather(*tasks)
    return time.time() - start_time, results

print("Running Async...")
# await run_async(images) # Jupyter syntax
async_time, async_results = await run_async(images)
print(f"Async Time: {async_time:.2f}s")
Running Async...
Async Time: 39.35s

3. Single-Prompt Batching (All-in-One)

Here we ask: “Can we pass a batch to Gemini itself?”

Yes! We can pass multiple images in the contents list of a single request. We instruct the model to look at all of them and return a structured list of counts.

Note: This saves HTTP overhead but requires the model to hold all images in context.

def run_single_prompt(images):
    start_time = time.time()
    
    # Construct a multimodal prompt with interleaved images
    # "Here is image 1", img1, "Here is image 2", img2...
    
    contents = []
    for i, img in enumerate(images):
        contents.append(f"Image {i+1}:")
        contents.append(img)
    
    prompt = (
        """I have provided 4 images above labeled Image 1 to Image 4.
        Count the number of people in each image respectively.

        Return the result strictly as a JSON list of integers, e.g., [10, 5, 3, 20].

        Do not include markdown formatting or backticks."""
    )
    contents.append(prompt)
    
    print("Sending Single-Prompt Request...")
    response = client.models.generate_content(
        model=MODEL_ID,
        contents=contents
    )
    
    return time.time() - start_time, response

def parse_single_prompt_response(response, images):
    # Parse the JSON response
    try:
        text_response = response.text.strip()
        # Handle potential markdown ```json wrapping
        if text_response.startswith("```"):
            text_response = text_response.split("```", 1)[1].rsplit("```", 1)[0]
        
        results = json.loads(text_response)
        # Convert to strings to match other methods
        results = [str(x) for x in results]
    except Exception as e:
        print(f"Error parsing JSON: {e}")
        print(f"Raw response: {response.text}")
        results = ["Error"] * len(images)
        
    return results

print("Running Single-Prompt...")
single_time, response = run_single_prompt(images)
Running Single-Prompt...
Sending Single-Prompt Request...
print(response.text)
[4, 38, 35, 9]
single_results = parse_single_prompt_response(response, images)
print(f"Single-Prompt Time: {single_time:.2f}s")
print(f"Results: {single_results}")
Single-Prompt Time: 99.92s
Results: ['4', '38', '35', '9']

Final Comparison

Which method wins?

times = [seq_time, async_time, single_time]
labels = ['Sequential', 'Async (Client Batch)', 'Single Prompt (Server Context)']
colors = ['orange', 'skyblue', 'lightgreen']

plt.figure(figsize=(10, 6))
bars = plt.bar(labels, times, color=colors)
plt.ylabel('Total Time (seconds)')
plt.title('Performance Comparison: Counting People in 4 Images')

# Add values on top
for bar in bars:
    yval = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2, yval + 0.1, f'{yval:.2f}s', ha='center', va='bottom')

plt.show()

# Accuracy Check
print("--- Consistency Check ---")
print(f"{'Image':<15} | {'Sequential':<10} | {'Async':<10} | {'Single-Prompt':<10}")
print("-" * 55)
for i, name in enumerate(image_paths):
    print(f"{name:<15} | {seq_results[i]:<10} | {async_results[i]:<10} | {single_results[i] if i < len(single_results) else 'N/A':<10}")

--- Consistency Check ---
Image           | Sequential | Async      | Single-Prompt
-------------------------------------------------------
bus.jpg         | 4          | 6          | 4         
crowd1.jpg      | 85         | 120        | 38        
crowd2.jpg      | 104        | 110        | 35        
classroom.jpg   | 9          | 10         | 9