import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
%config InlineBackend.figure_format = 'retina'
Introduction To Matplotlib
Data and Library Import
= "https://raw.githubusercontent.com/YashBachwana/ES114-2025--Car-Price-Dataset/refs/heads/main/Car%20Price/CarPrice_Assignment.csv"
url = pd.read_csv(url,index_col = 0) Data
Data.head()
symboling | CarName | fueltype | aspiration | doornumber | carbody | drivewheel | enginelocation | wheelbase | carlength | ... | enginesize | fuelsystem | boreratio | stroke | compressionratio | horsepower | peakrpm | citympg | highwaympg | price | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
car_ID | |||||||||||||||||||||
1 | 3 | alfa-romero giulia | gas | std | two | convertible | rwd | front | 88.6 | 168.8 | ... | 130 | mpfi | 3.47 | 2.68 | 9.0 | 111 | 5000 | 21 | 27 | 13495.0 |
2 | 3 | alfa-romero stelvio | gas | std | two | convertible | rwd | front | 88.6 | 168.8 | ... | 130 | mpfi | 3.47 | 2.68 | 9.0 | 111 | 5000 | 21 | 27 | 16500.0 |
3 | 1 | alfa-romero Quadrifoglio | gas | std | two | hatchback | rwd | front | 94.5 | 171.2 | ... | 152 | mpfi | 2.68 | 3.47 | 9.0 | 154 | 5000 | 19 | 26 | 16500.0 |
4 | 2 | audi 100 ls | gas | std | four | sedan | fwd | front | 99.8 | 176.6 | ... | 109 | mpfi | 3.19 | 3.40 | 10.0 | 102 | 5500 | 24 | 30 | 13950.0 |
5 | 2 | audi 100ls | gas | std | four | sedan | 4wd | front | 99.4 | 176.6 | ... | 136 | mpfi | 3.19 | 3.40 | 8.0 | 115 | 5500 | 18 | 22 | 17450.0 |
5 rows × 25 columns
A Simple Line Plot
A line plot is the most basic type of plot in Matplotlib. It is used to display information as a series of data points connected by straight lines.
Sin Wave
import numpy as np
import matplotlib.pyplot as plt
# Create a figure and axis
= plt.subplots()
fig, ax
= np.linspace(0, 10, 100)
x = np.sin(x)
y
# Plot y = sin(x) on the ax object
ax.plot(x, y)
Adding Label, Title and Grid
# Create a figure and axis
= plt.subplots()
fig, ax
= np.linspace(0, 10, 100)
x = np.sin(x)
y
# Plot y = sin(x) on the ax object
ax.plot(x, y)
# Add title and labels
"Sine Wave: Basic Plot")
ax.set_title("Time (in seconds)")
ax.set_xlabel("Amplitude")
ax.set_ylabel(
# Add grid for better visibility of the plot
True) ax.grid(
= pd.Series(y, index=x, name="Amplitude")
sine_series sine_series
0.00000 0.000000
0.10101 0.100838
0.20202 0.200649
0.30303 0.298414
0.40404 0.393137
...
9.59596 -0.170347
9.69697 -0.268843
9.79798 -0.364599
9.89899 -0.456637
10.00000 -0.544021
Name: Amplitude, Length: 100, dtype: float64
# Plot the sine wave using pandas Series
= sine_series.plot(
ax ="Sine Wave: Basic Plot",
title="Time (in seconds)",
xlabel="Amplitude",
ylabel=True
grid )
Organizing The Plots
Multiple plots on the same figure
# Create a figure and axis
= plt.subplots()
fig, ax
= np.linspace(0, 10, 100)
x = np.sin(x)
y1 = np.cos(x)
y2
# Plot y = sin(x) on the ax object with label
="sin(x)", color='b')
ax.plot(x, y1, label
# Plot y = cos(x) on the ax object with label
="cos(x)",color='r')
ax.plot(x, y2, label
# Add title and labels
"Sine and Cosine Waves")
ax.set_title("Time (in seconds)")
ax.set_xlabel("Amplitude")
ax.set_ylabel(
# Add legend to distinguish the curves
="upper right", title="Functions")
ax.legend(loc
# Add grid for better visibility
True)
ax.grid(
# Display the plot
plt.show()
Other ways to specify colors
# Create a figure and axis
= plt.subplots()
fig, ax
= np.linspace(0, 10, 100)
x = np.sin(x)
y1 = np.cos(x)
y2
# Plot y = sin(x) on the ax object with label
="sin(x)", color='C0')
ax.plot(x, y1, label
# Plot y = cos(x) on the ax object with label
="cos(x)",color='C1')
ax.plot(x, y2, label
# Add title and labels
"Sine and Cosine Waves")
ax.set_title("Time (in seconds)")
ax.set_xlabel("Amplitude")
ax.set_ylabel(
# Add legend to distinguish the curves
="upper right")
ax.legend(loc
# Add grid for better visibility
True) ax.grid(
Splitting a figure
import numpy as np
import matplotlib.pyplot as plt
= np.linspace(0, 10, 100)
x
# Create a figure with 4 subplots arranged in a 2x2 grid
= plt.subplots(2, 2, figsize=(10, 6)) # 2 rows, 2 columns
fig, axes
# First subplot: sin(x)
0, 0].plot(x, np.sin(x))
axes[0, 0].set_title("sin(x)")
axes[
# Second subplot: cos(x)
0, 1].plot(x, np.cos(x), color='red')
axes[0, 1].set_title("cos(x)")
axes[
# Third subplot: tan(x)
1, 0].plot(x, np.tan(x), color='green')
axes[1, 0].set_title("tan(x)")
axes[
# Fourth subplot: exp(-x)
1, 1].plot(x, np.exp(-x), color='purple')
axes[1, 1].set_title("exp(-x)")
axes[
fig.tight_layout()
# Above same plot using pandas
# Create a pandas DataFrame with the sine, cosine, and tangent values
= pd.DataFrame({
df "sin(x)": np.sin(x),
"cos(x)": np.cos(x),
"tan(x)": np.tan(x),
"exp(-x)": np.exp(-x)
=x)
}, index
df
sin(x) | cos(x) | tan(x) | exp(-x) | |
---|---|---|---|---|
0.00000 | 0.000000 | 1.000000 | 0.000000 | 1.000000 |
0.10101 | 0.100838 | 0.994903 | 0.101355 | 0.903924 |
0.20202 | 0.200649 | 0.979663 | 0.204814 | 0.817078 |
0.30303 | 0.298414 | 0.954437 | 0.312660 | 0.738577 |
0.40404 | 0.393137 | 0.919480 | 0.427564 | 0.667617 |
... | ... | ... | ... | ... |
9.59596 | -0.170347 | -0.985384 | 0.172874 | 0.000068 |
9.69697 | -0.268843 | -0.963184 | 0.279119 | 0.000061 |
9.79798 | -0.364599 | -0.931165 | 0.391551 | 0.000056 |
9.89899 | -0.456637 | -0.889653 | 0.513276 | 0.000050 |
10.00000 | -0.544021 | -0.839072 | 0.648361 | 0.000045 |
100 rows × 4 columns
# Create a figure with 4 subplots arranged in a 2x2 grid
= plt.subplots(2, 2, figsize=(10, 6)) # 2 rows, 2 columns
fig, axes
# Plot each column of the DataFrame on a separate subplot
"sin(x)"].plot(ax=axes[0, 0], color='blue', title="sin(x)")
df["cos(x)"].plot(ax=axes[0, 1], color='red', title="cos(x)")
df["tan(x)"].plot(ax=axes[1, 0], color='green', title="tan(x)")
df["exp(-x)"].plot(ax=axes[1, 1], color='purple', title="exp(-x)")
df[
fig.tight_layout()
# Above same plot using pandas
# Create a pandas DataFrame with the sine, cosine, and tangent values
= pd.DataFrame({
df "sin(x)": np.sin(x),
"cos(x)": np.cos(x),
"tan(x)": np.tan(x),
"exp(-x)": np.exp(-x)
=x)
}, index
= plt.subplots(2, 2, figsize=(10, 6))
fig, axes
=True, ax=axes, figsize=(10, 6),
df.plot(subplots=["sin(x)", "cos(x)", "tan(x)", "exp(-x)"]
title=False)
,legend
fig.tight_layout()
= plt.subplots(2, 2, figsize=(10, 6))
fig, axes
=True, ax=axes, figsize=(10, 6),
df.plot(subplots=["sin(x)", "cos(x)", "tan(x)", "exp(-x)"]
title=False)
,legend
# Add super title to the figure
"Trigonometric Functions and Exponential Decay", fontsize=20)
fig.suptitle(
fig.tight_layout()
Tweaking
import numpy as np
import matplotlib.pyplot as plt
# Daily temperature variations (in °C) over a week
= ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
days = [22, 24, 23, 25, 26, 27, 28] # City A temperatures
city_a = [18, 19, 20, 21, 22, 21, 20] # City B temperatures
city_b = [30, 31, 32, 33, 34, 35, 36] # City C temperatures
city_c
# Create a figure and axis
= plt.subplots(figsize=(10, 6))
fig, ax
# Plot trends with customizations
='blue', linestyle='-', linewidth=2, marker='o', label='City A')
ax.plot(days, city_a, color='green', linestyle='--', linewidth=2, marker='s', label='City B')
ax.plot(days, city_b, color='red', linestyle='-.', linewidth=2, marker='^', label='City C')
ax.plot(days, city_c, color
# Add title and labels
"Temperature Trends Over a Week", fontsize=16)
ax.set_title("Day of the Week", fontsize=14)
ax.set_xlabel("Temperature (°C)", fontsize=14)
ax.set_ylabel(
# Customize ticks
# Use day names for x-axis
ax.set_xticks(days) ='both', which='major', labelsize=12)
ax.tick_params(axis
# Add legend
=12, title="Cities")
ax.legend(fontsize
# Add grid for better readability
True, linestyle='--', alpha=0.6)
ax.grid(
# Display the plot
plt.tight_layout()
# Create a pandas DataFrame
= pd.DataFrame({
data "Day": days,
"City A": city_a,
"City B": city_b,
"City C": city_c
})
# Define a list of markers
= ["o", "s", "^"]
markers
# Plot in one go
= data.plot(
ax ="Day",
x=["City A", "City B", "City C"],
y=(10, 6),
figsize="-",
linestyle=2,
linewidth="Temperature Trends Over a Week",
title="Day of the Week",
xlabel="Temperature (°C)"
ylabel
)
# Apply markers
for line, marker in zip(ax.lines, markers):
line.set_marker(marker)
# Customize legend
=12, title="Cities")
ax.legend(fontsize
# Customize grid and ticks
True, linestyle="--", alpha=0.6)
ax.grid(="both", which="major", labelsize=12) ax.tick_params(axis
Limits and Ticks
import matplotlib.pyplot as plt
import numpy as np
# Hypothetical temperature data (in °C) for 24 hours
= np.arange(24) # Hours from 0 to 23
hours = [12, 11, 10, 9, 9, 8, 8, 10, 14, 18, 22, 25, 27, 29, 28, 26, 23, 21, 18, 16, 15, 14, 13, 12]
temperature
# Create a figure and axis
= plt.subplots(figsize=(10, 6))
fig, ax
# Plot the temperature data
='o', color='orange', label='Temperature (°C)') ax.plot(hours, temperature, marker
import matplotlib.pyplot as plt
import numpy as np
# Hypothetical temperature data (in °C) for 24 hours
= np.arange(24) # Hours from 0 to 23
hours = [12, 11, 10, 9, 9, 8, 8, 10, 14, 18, 22, 25, 27, 29, 28, 26, 23, 21, 18, 16, 15, 14, 13, 12]
temperature
# Create a figure and axis
= plt.subplots(figsize=(10, 6))
fig, ax
# Plot the temperature data
='o', color='orange', label='Temperature (°C)')
ax.plot(hours, temperature, marker
# Add title and labels
"Hourly Temperature Variations", fontsize=16)
ax.set_title("Hour of the Day", fontsize=14)
ax.set_xlabel("Temperature (°C)", fontsize=14)
ax.set_ylabel(
# Modify axis limits to focus on the specific hours and temperature range
6, 18) # Focus on hours 6 AM to 6 PM
ax.set_xlim(15, 30) # Focus on the relevant temperature range
ax.set_ylim(
# Customize ticks
range(6, 19, 2)) # Show ticks every 2 hours in the focused range
ax.set_xticks(f"{h} AM" if h < 12 else f"{h-12} PM" for h in range(6, 19, 2)]) # Format as AM/PM
ax.set_xticklabels([range(15, 31, 5)) # Show y-axis ticks every 5°C
ax.set_yticks(
# Add gridlines for better visibility
='both', axis='both', linestyle='--', alpha=0.6)
ax.grid(which
# Add legend
=12, loc='upper left')
ax.legend(fontsize
# Display the plot
plt.tight_layout() plt.show()
Saving The Plots
# Save the plot as a PNG file
"temperature_variations.png", dpi=300, bbox_inches='tight')
fig.savefig(
# Save the plot as a PDF file
"temperature_variations.pdf", dpi=300, bbox_inches='tight')
fig.savefig(
# Save the plot as an SVG file
"temperature_variations.svg", dpi=300, bbox_inches='tight') fig.savefig(
Choose Your Plot
Bar Plot
Used for categorical or discrete data to compare counts or summarized values.
# Count occurrences of each fuel type
= Data['carbody'].value_counts()
fueltype_counts fueltype_counts
carbody
sedan 96
hatchback 70
wagon 25
hardtop 8
convertible 6
Name: count, dtype: int64
# Parameters for bar color and width
= ['skyblue'] # Example color list, you can change it to any color you prefer
bar_color = 0.6
bar_width
= plt.subplots(figsize=(8, 6))
fig, ax =bar_color, width=bar_width)
ax.bar(fueltype_counts.index, fueltype_counts.values, color
'Distribution of Car Body', fontsize=16)
ax.set_title(#ax.set_xlabel('Car Body', fontsize=14)
#ax.set_ylabel('Count', fontsize=14)
#ax.set_xticks(range(len(fueltype_counts.index)))
#ax.set_xticklabels(fueltype_counts.index, fontsize=12)
plt.show()
# In pandas
='bar', color='skyblue',
fueltype_counts.plot(kind=(8, 6),
figsize='Car Body', ylabel='Count'
xlabel='Distribution of Car Body') , title
Annotations
# Count occurrences of each fuel type
= Data['carbody'].value_counts()
fueltype_counts
# Parameters for bar color and width
= ['skyblue'] # Example color list, you can change it to any color you prefer
bar_color = 0.6
bar_width
= plt.subplots(figsize=(8, 6))
fig, ax = ax.bar(fueltype_counts.index, fueltype_counts.values, color=bar_color, width=bar_width)
bars
# Add annotations on top of each bar
for bar in bars:
= bar.get_height()
yval + bar.get_width() / 2, yval + 1, # Position the text above the bar
ax.text(bar.get_x() round(yval, 2), # Display the count value (rounded to 2 decimal places)
='center', va='bottom', fontsize=12) # Align text at the center of the bar and just above it
ha
'Distribution of Car Body', fontsize=16)
ax.set_title('Car Body', fontsize=14)
ax.set_xlabel('Count', fontsize=14)
ax.set_ylabel(range(len(fueltype_counts.index)))
ax.set_xticks(=12)
ax.set_xticklabels(fueltype_counts.index, fontsize
plt.show()
# Plot the bar chart
= fueltype_counts.plot(
ax ='bar',
kind='skyblue',
color=(8, 6),
figsize=0,
rot='Car Body',
xlabel='Count',
ylabel='Distribution of Car Body'
title
)
# Annotate the bars
for bar in ax.patches:
# Get the height of the bar (count value)
= bar.get_height()
bar_height # Annotate the bar with its value
ax.annotate(f'{bar_height}', # Annotation text
=(bar.get_x() + bar.get_width() / 2, bar_height), # Position above bar
xy=(0, 5), # Offset for the annotation
xytext='offset points',
textcoords='center',
ha=10
fontsize
)
# Adjust layout and show the plot
plt.tight_layout()
Scatterplot
Used for bivariate data to examine relationships or correlations between two continuous variables.
= Data['horsepower']
x = Data['price']
y
# Create the scatter plot
= plt.subplots(figsize=(8, 6))
fig, ax ='blue', alpha=0.7)
ax.scatter(x, y, color
'Horsepower vs. Car Price', fontsize=16)
ax.set_title('Horsepower', fontsize=14)
ax.set_xlabel('Price ($)', fontsize=14)
ax.set_ylabel(
True) ax.grid(
Data
symboling | CarName | fueltype | aspiration | doornumber | carbody | drivewheel | enginelocation | wheelbase | carlength | ... | enginesize | fuelsystem | boreratio | stroke | compressionratio | horsepower | peakrpm | citympg | highwaympg | price | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
car_ID | |||||||||||||||||||||
1 | 3 | alfa-romero giulia | gas | std | two | convertible | rwd | front | 88.6 | 168.8 | ... | 130 | mpfi | 3.47 | 2.68 | 9.0 | 111 | 5000 | 21 | 27 | 13495.0 |
2 | 3 | alfa-romero stelvio | gas | std | two | convertible | rwd | front | 88.6 | 168.8 | ... | 130 | mpfi | 3.47 | 2.68 | 9.0 | 111 | 5000 | 21 | 27 | 16500.0 |
3 | 1 | alfa-romero Quadrifoglio | gas | std | two | hatchback | rwd | front | 94.5 | 171.2 | ... | 152 | mpfi | 2.68 | 3.47 | 9.0 | 154 | 5000 | 19 | 26 | 16500.0 |
4 | 2 | audi 100 ls | gas | std | four | sedan | fwd | front | 99.8 | 176.6 | ... | 109 | mpfi | 3.19 | 3.40 | 10.0 | 102 | 5500 | 24 | 30 | 13950.0 |
5 | 2 | audi 100ls | gas | std | four | sedan | 4wd | front | 99.4 | 176.6 | ... | 136 | mpfi | 3.19 | 3.40 | 8.0 | 115 | 5500 | 18 | 22 | 17450.0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
201 | -1 | volvo 145e (sw) | gas | std | four | sedan | rwd | front | 109.1 | 188.8 | ... | 141 | mpfi | 3.78 | 3.15 | 9.5 | 114 | 5400 | 23 | 28 | 16845.0 |
202 | -1 | volvo 144ea | gas | turbo | four | sedan | rwd | front | 109.1 | 188.8 | ... | 141 | mpfi | 3.78 | 3.15 | 8.7 | 160 | 5300 | 19 | 25 | 19045.0 |
203 | -1 | volvo 244dl | gas | std | four | sedan | rwd | front | 109.1 | 188.8 | ... | 173 | mpfi | 3.58 | 2.87 | 8.8 | 134 | 5500 | 18 | 23 | 21485.0 |
204 | -1 | volvo 246 | diesel | turbo | four | sedan | rwd | front | 109.1 | 188.8 | ... | 145 | idi | 3.01 | 3.40 | 23.0 | 106 | 4800 | 26 | 27 | 22470.0 |
205 | -1 | volvo 264gl | gas | turbo | four | sedan | rwd | front | 109.1 | 188.8 | ... | 141 | mpfi | 3.78 | 3.15 | 9.5 | 114 | 5400 | 19 | 25 | 22625.0 |
205 rows × 25 columns
='horsepower', y='price', color='blue', alpha=0.7, figsize=(8, 6),
Data.plot.scatter(x='Horsepower vs. Car Price', xlabel='Horsepower', ylabel='Price ($)', grid=True) title
Pie Chart
Used for proportional or compositional data to represent parts of a whole
# Count occurrences of each fuel type
= Data['carbody'].value_counts()
fueltype_counts
= plt.subplots(figsize=(8, 6))
fig, ax
= fueltype_counts.values
sizes = fueltype_counts.index
labels = ['gold', 'lightblue', 'lightgreen', 'pink','lightgrey'] # Adjust colors as needed
colors
=labels, colors=colors, autopct='%1.1f%%', startangle=140)
ax.pie(sizes, labels
"Distribution of Car Fuel Types")
ax.set_title(
plt.show()
# Pie in pandas
='pie', figsize=(8, 6), autopct='%1.1f%%', startangle=140,
fueltype_counts.plot(kind='Distribution of Car Fuel Types', colors=['gold', 'lightblue', 'lightgreen', 'pink','lightgrey']) title
Histogram
Used for univariate continuous data to visualize frequency distribution.
import matplotlib.pyplot as plt
= plt.subplots(figsize=(8, 6))
fig, ax
= Data['price']
data
=40, color='skyblue', edgecolor='black')
ax.hist(data, bins
'Histogram of Price', fontsize=16)
ax.set_title('Price ($)', fontsize=14)
ax.set_xlabel('Frequency', fontsize=14)
ax.set_ylabel(
plt.show()
# In pandas
'price'].plot(kind='hist', bins=20, color='skyblue', edgecolor='black',
Data[=(8, 6), title='Histogram of Price', xlabel='Price ($)', ylabel='Frequency') figsize
Errorbar
Used for data with measurements from multiple experiments, showing variability or uncertainty, often from multiple experiments.
import numpy as np
import matplotlib.pyplot as plt
# Simulate a physics experiment: Measuring spring constant (k) at different masses
# Each mass is measured multiple times to account for experimental uncertainty
# Create sample data
= np.array([50, 100, 150, 200, 250]) # mass in grams
masses = 10
num_trials
# Simulate multiple measurements for each mass
# Each measurement has some random variation to simulate real experimental conditions
= []
measurements for mass in masses:
# Simulate spring constant measurements with some random noise
# True k = 10 N/m with measurement errors
= 10 + np.random.normal(0, 0.5, num_trials)
k_measurements
measurements.append(k_measurements)
measurements
[array([ 9.60059384, 9.15385593, 10.28497197, 9.85293433, 10.02889485,
10.34334378, 9.08639395, 9.82668512, 9.7685277 , 9.4138344 ]),
array([ 9.51136846, 11.50160258, 10.15063726, 10.8276022 , 8.97931269,
10.13273831, 10.38841646, 8.64112354, 10.33124531, 10.69226999]),
array([10.26489637, 9.81086493, 10.65842485, 9.47632847, 9.2579158 ,
9.60256365, 10.60278096, 9.38020745, 10.54828873, 10.95357033]),
array([10.10126388, 10.76131134, 10.30305548, 9.50037411, 9.24211105,
10.82680171, 9.91489558, 9.66617419, 10.18283327, 9.85033258]),
array([10.18823263, 10.01292153, 10.57612678, 9.92321915, 10.25308004,
10.6656051 , 10.08219377, 9.35058485, 11.04976976, 9.32749305])]
# Calculate means and standard errors
= [np.mean(m) for m in measurements]
means = [np.std(m) / np.sqrt(num_trials) for m in measurements] # Standard error of the mean
errors
# Create the error bar plot
= plt.subplots(figsize=(10, 6))
fig, ax
=errors, fmt='o',
ax.errorbar(masses, means, yerr='blue', ecolor='black',
color=5, capthick=1.5,
capsize='Measured Values')
label
# Add true value line
=10, color='r', linestyle='--', label='True Spring Constant')
ax.axhline(y
'Spring Constant Measurements vs Mass', fontsize=12)
ax.set_title('Mass (g)', fontsize=10)
ax.set_xlabel('Spring Constant (N/m)', fontsize=10)
ax.set_ylabel(True, linestyle='--', alpha=0.7)
ax.grid(
ax.legend()
= pd.DataFrame(measurements, index=masses).T
df df
50 | 100 | 150 | 200 | 250 | |
---|---|---|---|---|---|
0 | 9.600594 | 9.511368 | 10.264896 | 10.101264 | 10.188233 |
1 | 9.153856 | 11.501603 | 9.810865 | 10.761311 | 10.012922 |
2 | 10.284972 | 10.150637 | 10.658425 | 10.303055 | 10.576127 |
3 | 9.852934 | 10.827602 | 9.476328 | 9.500374 | 9.923219 |
4 | 10.028895 | 8.979313 | 9.257916 | 9.242111 | 10.253080 |
5 | 10.343344 | 10.132738 | 9.602564 | 10.826802 | 10.665605 |
6 | 9.086394 | 10.388416 | 10.602781 | 9.914896 | 10.082194 |
7 | 9.826685 | 8.641124 | 9.380207 | 9.666174 | 9.350585 |
8 | 9.768528 | 10.331245 | 10.548289 | 10.182833 | 11.049770 |
9 | 9.413834 | 10.692270 | 10.953570 | 9.850333 | 9.327493 |
Boxplot
Used for univariate or grouped data to summarize distributions and highlight outliers.
import matplotlib.pyplot as plt
# Create a boxplot to compare price distribution by car body type
= plt.subplots(figsize=(10, 6))
fig, ax
# Plot the boxplot
='price', by='carbody', ax=ax, grid=False, patch_artist=True,
Data.boxplot(column=dict(facecolor='lightblue', color='blue'),
boxprops=dict(color='blue'),
whiskerprops=dict(color='red', linewidth=2))
medianprops
"Price Distribution by Car Body Type", fontsize=16)
ax.set_title("Car Body Type", fontsize=14)
ax.set_xlabel("Price (USD)", fontsize=14)
ax.set_ylabel("") # This is to remove the default title
plt.suptitle(
# Show the plot
plt.show()
Scale Transformations
import matplotlib.pyplot as plt
import numpy as np
#exponential growth
= np.linspace(1, 100, 500)
x = np.exp(x / 20)
y
= plt.subplots(1, 2, figsize=(12, 6))
fig, (ax1, ax2)
# Plot with normal scale
="Exponential Growth", color="blue")
ax1.plot(x, y, label"Normal Scale")
ax1.set_title("X-axis")
ax1.set_xlabel("Y-axis")
ax1.set_ylabel(
ax1.legend()True)
ax1.grid(
# Plot with log scale (Y-axis)
="Exponential Growth", color="green")
ax2.plot(x, y, label"log")
ax2.set_yscale("Logarithmic Scale (Y-axis)")
ax2.set_title("X-axis")
ax2.set_xlabel("Log(Y-axis)")
ax2.set_ylabel(
ax2.legend()True, which="both", linestyle="--", linewidth=0.5) ax2.grid(
3D Visualization And Contour
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
# Create grid for paraboloid
= np.linspace(-10, 10, 100)
x = np.linspace(-10, 10, 100)
y
= np.meshgrid(x,y) X, Y
pd.DataFrame(X)
0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | ... | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | -10.0 | -9.79798 | -9.59596 | -9.393939 | -9.191919 | -8.989899 | -8.787879 | -8.585859 | -8.383838 | -8.181818 | ... | 8.181818 | 8.383838 | 8.585859 | 8.787879 | 8.989899 | 9.191919 | 9.393939 | 9.59596 | 9.79798 | 10.0 |
1 | -10.0 | -9.79798 | -9.59596 | -9.393939 | -9.191919 | -8.989899 | -8.787879 | -8.585859 | -8.383838 | -8.181818 | ... | 8.181818 | 8.383838 | 8.585859 | 8.787879 | 8.989899 | 9.191919 | 9.393939 | 9.59596 | 9.79798 | 10.0 |
2 | -10.0 | -9.79798 | -9.59596 | -9.393939 | -9.191919 | -8.989899 | -8.787879 | -8.585859 | -8.383838 | -8.181818 | ... | 8.181818 | 8.383838 | 8.585859 | 8.787879 | 8.989899 | 9.191919 | 9.393939 | 9.59596 | 9.79798 | 10.0 |
3 | -10.0 | -9.79798 | -9.59596 | -9.393939 | -9.191919 | -8.989899 | -8.787879 | -8.585859 | -8.383838 | -8.181818 | ... | 8.181818 | 8.383838 | 8.585859 | 8.787879 | 8.989899 | 9.191919 | 9.393939 | 9.59596 | 9.79798 | 10.0 |
4 | -10.0 | -9.79798 | -9.59596 | -9.393939 | -9.191919 | -8.989899 | -8.787879 | -8.585859 | -8.383838 | -8.181818 | ... | 8.181818 | 8.383838 | 8.585859 | 8.787879 | 8.989899 | 9.191919 | 9.393939 | 9.59596 | 9.79798 | 10.0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
95 | -10.0 | -9.79798 | -9.59596 | -9.393939 | -9.191919 | -8.989899 | -8.787879 | -8.585859 | -8.383838 | -8.181818 | ... | 8.181818 | 8.383838 | 8.585859 | 8.787879 | 8.989899 | 9.191919 | 9.393939 | 9.59596 | 9.79798 | 10.0 |
96 | -10.0 | -9.79798 | -9.59596 | -9.393939 | -9.191919 | -8.989899 | -8.787879 | -8.585859 | -8.383838 | -8.181818 | ... | 8.181818 | 8.383838 | 8.585859 | 8.787879 | 8.989899 | 9.191919 | 9.393939 | 9.59596 | 9.79798 | 10.0 |
97 | -10.0 | -9.79798 | -9.59596 | -9.393939 | -9.191919 | -8.989899 | -8.787879 | -8.585859 | -8.383838 | -8.181818 | ... | 8.181818 | 8.383838 | 8.585859 | 8.787879 | 8.989899 | 9.191919 | 9.393939 | 9.59596 | 9.79798 | 10.0 |
98 | -10.0 | -9.79798 | -9.59596 | -9.393939 | -9.191919 | -8.989899 | -8.787879 | -8.585859 | -8.383838 | -8.181818 | ... | 8.181818 | 8.383838 | 8.585859 | 8.787879 | 8.989899 | 9.191919 | 9.393939 | 9.59596 | 9.79798 | 10.0 |
99 | -10.0 | -9.79798 | -9.59596 | -9.393939 | -9.191919 | -8.989899 | -8.787879 | -8.585859 | -8.383838 | -8.181818 | ... | 8.181818 | 8.383838 | 8.585859 | 8.787879 | 8.989899 | 9.191919 | 9.393939 | 9.59596 | 9.79798 | 10.0 |
100 rows × 100 columns
pd.DataFrame(Y)
0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | ... | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | -10.000000 | -10.000000 | -10.000000 | -10.000000 | -10.000000 | -10.000000 | -10.000000 | -10.000000 | -10.000000 | -10.000000 | ... | -10.000000 | -10.000000 | -10.000000 | -10.000000 | -10.000000 | -10.000000 | -10.000000 | -10.000000 | -10.000000 | -10.000000 |
1 | -9.797980 | -9.797980 | -9.797980 | -9.797980 | -9.797980 | -9.797980 | -9.797980 | -9.797980 | -9.797980 | -9.797980 | ... | -9.797980 | -9.797980 | -9.797980 | -9.797980 | -9.797980 | -9.797980 | -9.797980 | -9.797980 | -9.797980 | -9.797980 |
2 | -9.595960 | -9.595960 | -9.595960 | -9.595960 | -9.595960 | -9.595960 | -9.595960 | -9.595960 | -9.595960 | -9.595960 | ... | -9.595960 | -9.595960 | -9.595960 | -9.595960 | -9.595960 | -9.595960 | -9.595960 | -9.595960 | -9.595960 | -9.595960 |
3 | -9.393939 | -9.393939 | -9.393939 | -9.393939 | -9.393939 | -9.393939 | -9.393939 | -9.393939 | -9.393939 | -9.393939 | ... | -9.393939 | -9.393939 | -9.393939 | -9.393939 | -9.393939 | -9.393939 | -9.393939 | -9.393939 | -9.393939 | -9.393939 |
4 | -9.191919 | -9.191919 | -9.191919 | -9.191919 | -9.191919 | -9.191919 | -9.191919 | -9.191919 | -9.191919 | -9.191919 | ... | -9.191919 | -9.191919 | -9.191919 | -9.191919 | -9.191919 | -9.191919 | -9.191919 | -9.191919 | -9.191919 | -9.191919 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
95 | 9.191919 | 9.191919 | 9.191919 | 9.191919 | 9.191919 | 9.191919 | 9.191919 | 9.191919 | 9.191919 | 9.191919 | ... | 9.191919 | 9.191919 | 9.191919 | 9.191919 | 9.191919 | 9.191919 | 9.191919 | 9.191919 | 9.191919 | 9.191919 |
96 | 9.393939 | 9.393939 | 9.393939 | 9.393939 | 9.393939 | 9.393939 | 9.393939 | 9.393939 | 9.393939 | 9.393939 | ... | 9.393939 | 9.393939 | 9.393939 | 9.393939 | 9.393939 | 9.393939 | 9.393939 | 9.393939 | 9.393939 | 9.393939 |
97 | 9.595960 | 9.595960 | 9.595960 | 9.595960 | 9.595960 | 9.595960 | 9.595960 | 9.595960 | 9.595960 | 9.595960 | ... | 9.595960 | 9.595960 | 9.595960 | 9.595960 | 9.595960 | 9.595960 | 9.595960 | 9.595960 | 9.595960 | 9.595960 |
98 | 9.797980 | 9.797980 | 9.797980 | 9.797980 | 9.797980 | 9.797980 | 9.797980 | 9.797980 | 9.797980 | 9.797980 | ... | 9.797980 | 9.797980 | 9.797980 | 9.797980 | 9.797980 | 9.797980 | 9.797980 | 9.797980 | 9.797980 | 9.797980 |
99 | 10.000000 | 10.000000 | 10.000000 | 10.000000 | 10.000000 | 10.000000 | 10.000000 | 10.000000 | 10.000000 | 10.000000 | ... | 10.000000 | 10.000000 | 10.000000 | 10.000000 | 10.000000 | 10.000000 | 10.000000 | 10.000000 | 10.000000 | 10.000000 |
100 rows × 100 columns
= X**2 + Y**2
Z
= plt.subplots(subplot_kw={"projection": "3d"})
fig, ax
= ax.plot_surface(X, Y, Z, cmap='viridis')
surface
# Add labels and title
'3D Plot of Paraboloid', fontsize=16)
ax.set_title('X-axis', fontsize=12)
ax.set_xlabel('Y-axis', fontsize=12)
ax.set_ylabel('Z-axis', fontsize=12)
ax.set_zlabel(
=0.1)
fig.colorbar(surface,pad plt.show()
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
# Create grid for paraboloid
= np.linspace(-10, 10, 100)
x = np.linspace(-10, 10, 100)
y
= np.meshgrid(x, y)
X, Y
= X**2 + Y**2
Z
# Create figure and subplots
= plt.subplots(1, 2, figsize=(14, 6))
fig, axs
# 3D Plot
= fig.add_subplot(121, projection='3d')
ax1 = ax1.plot_surface(X, Y, Z, cmap='viridis')
surface '3D Plot of Paraboloid', fontsize=16)
ax1.set_title('X-axis', fontsize=12)
ax1.set_xlabel('Y-axis', fontsize=12)
ax1.set_ylabel('Z-axis', fontsize=12)
ax1.set_zlabel(
# 2D Contour Plot
= axs[1]
ax2 = ax2.contourf(X, Y, Z, levels=20, cmap='viridis')
contour '2D Contour Plot of Paraboloid', fontsize=16)
ax2.set_title('X-axis', fontsize=12)
ax2.set_xlabel('Y-axis', fontsize=12)
ax2.set_ylabel(=ax2, pad=0.1)
fig.colorbar(contour, ax
plt.tight_layout() plt.show()
Using Imshow for Image-Like Data
imshow
is crucial for displaying 2D data as color-coded images. It’s commonly used for heatmaps, matrices, and actual images.
import numpy as np
import matplotlib.pyplot as plt
# Create sample data
= np.random.rand(10, 10)
data
pd.DataFrame(data)
0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 0.490860 | 0.929045 | 0.572919 | 0.964711 | 0.980014 | 0.186145 | 0.427508 | 0.608805 | 0.893498 | 0.695284 |
1 | 0.966428 | 0.422390 | 0.348150 | 0.525304 | 0.331108 | 0.580977 | 0.562608 | 0.560270 | 0.939938 | 0.808085 |
2 | 0.021255 | 0.326444 | 0.765063 | 0.515920 | 0.515554 | 0.121355 | 0.830749 | 0.594427 | 0.062847 | 0.371122 |
3 | 0.222631 | 0.762149 | 0.229365 | 0.335383 | 0.635986 | 0.205019 | 0.852074 | 0.897168 | 0.055277 | 0.186129 |
4 | 0.987063 | 0.835088 | 0.654150 | 0.181315 | 0.012669 | 0.086974 | 0.903000 | 0.829740 | 0.716490 | 0.834996 |
5 | 0.180575 | 0.426452 | 0.881832 | 0.496820 | 0.524836 | 0.539941 | 0.650598 | 0.490325 | 0.337512 | 0.901674 |
6 | 0.752542 | 0.284082 | 0.979989 | 0.957409 | 0.929105 | 0.480490 | 0.818328 | 0.123510 | 0.564911 | 0.189244 |
7 | 0.992347 | 0.311162 | 0.411070 | 0.600753 | 0.495184 | 0.069100 | 0.372265 | 0.431248 | 0.456604 | 0.238691 |
8 | 0.258555 | 0.691445 | 0.463634 | 0.021314 | 0.333122 | 0.435857 | 0.207976 | 0.866270 | 0.231009 | 0.749924 |
9 | 0.905349 | 0.911525 | 0.675188 | 0.804228 | 0.966661 | 0.572587 | 0.021375 | 0.688160 | 0.815374 | 0.508309 |
# Basic imshow example
= plt.subplots(figsize=(8, 6))
fig, ax = ax.imshow(data, cmap='viridis')
im =ax)
fig.colorbar(im, ax'Basic imshow example')
ax.set_title(
# Multiple imshow with different interpolations
= plt.subplots(1, 2, figsize=(12, 4))
fig, (ax1, ax2) = ax1.imshow(data, interpolation='nearest', cmap='coolwarm')
im1 'nearest interpolation')
ax1.set_title(=ax1)
fig.colorbar(im1, ax
= ax2.imshow(data, interpolation='bilinear', cmap='coolwarm')
im2 'bilinear interpolation')
ax2.set_title(=ax2)
fig.colorbar(im2, ax fig.tight_layout()
#Advanced Plot Features
##Custom Markers and Lines
What are Custom Markers and Lines?
Markers and lines are elements used in plots to differentiate data points and highlight trends. Matplotlib allows customization of their shapes, colors, and styles to make plots more informative and visually appealing.
Customizing Markers
- Markers represent individual data points on a plot.
- Use the marker
parameter in plotting functions.
Common Marker Options:
- 'o'
: Circle
- 's'
: Square
- '^'
: Triangle up
- 'x'
: Cross
- '*'
: Star
- '.'
: Point
= np.linspace(0, 10, 50)
x = np.sin(x)
y
= plt.subplots(figsize=(10, 6))
fig, ax 'o-', label='Default')
ax.plot(x, y, + 1, 'D--', markersize=2, label='Diamond markers')
ax.plot(x, y - 1, 's:', markerfacecolor='red',
ax.plot(x, y ='black', label='Square markers')
markeredgecolor
ax.legend()'Custom Markers and Lines') ax.set_title(
Text(0.5, 1.0, 'Custom Markers and Lines')
fill_between
: Highlighting Areas in Plots
What is fill_between
?
The fill_between
function in Matplotlib is used to fill the area between two curves or between a curve and a horizontal line.
Where to Use:
- To visually emphasize the range of values or uncertainty in data.
- To highlight areas under a curve or between curves.
Why Use fill_between
?
- Makes plots more intuitive by shading regions of interest.
- Helps in representing data variability, confidence intervals, or integrals.
= plt.subplots(figsize=(10, 6))
fig, ax - 0.2, y + 0.2, alpha=0.1, color='red')
ax.fill_between(x, y 'r-', label='Main line')
ax.plot(x, y, 'Fill Between Example')
ax.set_title( ax.legend()
#Stylesheets
What are Stylesheets?
Matplotlib stylesheets are pre-defined sets of style parameters that help you create visually appealing and consistent plots.
Where to Use:
- Use stylesheets when you want your plots to have a cohesive appearance across a project or to match a publication’s style guide.
Why Use Stylesheets?
- Simplify customization by applying a uniform theme with a single line of code.
- Save time and maintain consistency in multi-plot projects.
# List available styles
print(plt.style.available)
['Solarize_Light2', '_classic_test_patch', '_mpl-gallery', '_mpl-gallery-nogrid', 'bmh', 'classic', 'dark_background', 'fast', 'fivethirtyeight', 'ggplot', 'grayscale', 'seaborn-v0_8', 'seaborn-v0_8-bright', 'seaborn-v0_8-colorblind', 'seaborn-v0_8-dark', 'seaborn-v0_8-dark-palette', 'seaborn-v0_8-darkgrid', 'seaborn-v0_8-deep', 'seaborn-v0_8-muted', 'seaborn-v0_8-notebook', 'seaborn-v0_8-paper', 'seaborn-v0_8-pastel', 'seaborn-v0_8-poster', 'seaborn-v0_8-talk', 'seaborn-v0_8-ticks', 'seaborn-v0_8-white', 'seaborn-v0_8-whitegrid', 'tableau-colorblind10']
# Example using different styles
= np.random.randn(1000)
data = ['default', 'seaborn-v0_8-bright', 'dark_background', 'seaborn-v0_8-dark-palette']
styles
# Create separate figures for each style
= plt.figure(figsize=(15, 10))
fig for idx, style in enumerate(styles):
with plt.style.context(style):
= fig.add_subplot(2, 2, idx + 1)
ax =30)
ax.hist(data, binsf'Style: {style}')
ax.set_title( fig.tight_layout()
Tick Formatters and Locators
What are Tick Formatters and Locators?
- Tick Locators: Control where ticks appear on axes.
- Tick Formatters: Control how tick labels are displayed.
These tools provide fine-grained control over axis ticks, ensuring your plots are readable and tailored to your data.
This example demonstrates how to customize tick formats for various data types using Matplotlib. The code covers:
- Scientific Notation: Compact representation of very large or small numbers.
- Currency Formatting: Display monetary values with thousands separators and dollar signs.
- Date Formatting: Properly format and align dates on a time-series plot.
These customizations improve the clarity and relevance of your plots, making them more tailored to specific audiences or data contexts.
import matplotlib.dates as mdates
import matplotlib.ticker as ticker
from datetime import datetime, timedelta
# Example with custom number formatting
= plt.subplots(2, 1, figsize=(10, 8))
fig, (ax1, ax2)
# Scientific notation
= np.linspace(1e-3, 1e5, 100)
x = x**2
y
ax1.plot(x, y)'Scientific Notation')
ax1.set_title(
# Create formatter and set it to use scientific notation
= ticker.ScalarFormatter()
formatter True)
formatter.set_scientific(
ax1.xaxis.set_major_formatter(formatter)
# Currency formatting
def currency_formatter(x, p):
return f'${x:,.0f}'
= np.linspace(0, 1000000, 100)
x = x * 0.1
y
ax2.plot(x, y)'Currency Format')
ax2.set_title(
ax2.yaxis.set_major_formatter(ticker.FuncFormatter(currency_formatter))
fig.tight_layout()
# Date formatting example
= [datetime.now() + timedelta(days=x) for x in range(100)]
dates = np.random.randn(100).cumsum()
values
= plt.subplots(figsize=(10, 6))
fig, ax
ax.plot(dates, values)
ax.xaxis.set_major_locator(mdates.AutoDateLocator())'%Y-%m-%d'))
ax.xaxis.set_major_formatter(mdates.DateFormatter(# Rotate and align the tick labels
fig.autofmt_xdate() 'Date Formatting Example') ax.set_title(
Text(0.5, 1.0, 'Date Formatting Example')