# Seaborn — Matrix Plots
# Matrix plot = a grid where rows and columns are categories/variables
# and each cell is colored based on a value
# Used to see patterns, correlations, and relationships at a glance
# Two main matrix plots: heatmap and clustermap
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
# ══════════════════════════════════════════════════════════════════════════════
# ── 1. heatmap — Color Grid showing values ────────────────────────────────────
# Each cell = a number shown as a color
# Darker/brighter color = higher or lower value (depends on colormap)
# Great for: showing correlation between columns, confusion matrix, pivot tables
# ══════════════════════════════════════════════════════════════════════════════
# ── Simple heatmap from a 2D list ─────────────────────────────────────────────
data = pd.DataFrame(
[[10, 20, 30],
[40, 50, 60],
[70, 80, 90]],
index=["Row A", "Row B", "Row C"], # row labels (y-axis)
columns=["Col 1", "Col 2", "Col 3"] # column labels (x-axis)
)
# data=pd.DataFrame({
# "col1" : [10,40,70],
# "col2" : [20,50,80],
# "col3" : [30,60,90]
# })
# data.index=["Row A","Row B","Row C"]
sns.heatmap(data)
plt.title("Basic Heatmap")
plt.show() # Output: 3×3 color grid — darker cells = higher values (bottom row darkest)
# ── annot=True — show numbers inside each cell ────────────────────────────────
sns.heatmap(data, annot=True)
plt.title("Heatmap with Numbers")
plt.show() # Output: same color grid but each cell also shows its number
# ── fmt= — format of numbers shown inside cells ───────────────────────────────
sns.heatmap(data, annot=True, fmt="d") # d = integer format (no decimals)
plt.title("Heatmap with Integer Labels")
plt.show() # Output: numbers shown as 10, 20, 30 (not 10.0, 20.0...)
sns.heatmap(data, annot=True, cmap="Blues") # light blue → dark blue
plt.title("Heatmap with color theme Blues")
plt.show() # Output: low values = light blue, high values = dark blue
sns.heatmap(data, annot=True, cmap="YlOrRd") # yellow → orange → red
plt.title("Heatmap with color theme YlOrRd")
plt.show() # Output: low values = yellow, high values = red
sns.heatmap(data, annot=True, cmap="coolwarm") # blue → white → red
plt.title("Heatmap with color theme coolwarm")
plt.show() # Output: low = blue, middle = white, high = red (good for correlation)
sns.heatmap(data, annot=True, cmap="Greens") # light green → dark green
plt.title("Heatmap with color theme Greens")
plt.show() # Output: low values = light green, high values = dark green
# ── linewidths= — adds borders between cells ──────────────────────────────────
sns.heatmap(data, annot=True, linewidths=0.5, linecolor="white")
plt.title("Heatmap with Cell Borders")
plt.show() # Output: white lines separate each cell — easier to read
# ── vmin / vmax — fix the color scale range ───────────────────────────────────
# vmin = value mapped to the lightest color
# vmax = value mapped to the darkest color
sns.heatmap(data, annot=True, vmin=0, vmax=100)
plt.title("Heatmap with Fixed Color Scale (0 to 100)")
plt.show() # Output: colors scale from 0 (light) to 100 (dark) — 90 is near darkest
# ══════════════════════════════════════════════════════════════════════════════
# ── 2. Correlation Heatmap — most common real-world use ───────────────────────
# Correlation = how much two columns move together
# +1.0 → both increase together (perfect positive)
# 0.0 → no relationship
# -1.0 → one increases while other decreases (perfect negative)
# df.corr() calculates correlation between all numeric columns
# ══════════════════════════════════════════════════════════════════════════════
df = pd.DataFrame({
"age": [22, 25, 30, 35, 40, 45, 50],
"salary": [30000, 35000, 50000, 60000, 72000, 80000, 90000],
"experience": [1, 2, 5, 8, 12, 18, 25],
"score": [85, 80, 75, 70, 65, 60, 55]
})
corr = df.corr() # corr() returns a table of correlation values between every column pair
#calculates how strongly each pair of columns is related to each other.
sns.heatmap(corr,
annot=True, # show correlation values inside cells
fmt=".2f", # 2 decimal places e.g. 0.98
cmap="coolwarm", # blue=negative, red=positive correlation
vmin=-1, vmax=1) # fix scale from -1 to +1
plt.title("Correlation Heatmap")
plt.show() # Output: grid showing how strongly each pair of columns is related
# age vs salary = ~0.99 (strong positive), age vs score = ~-0.99 (strong negative)
# ── mask= — hide the upper triangle (avoid duplicate info) ────────────────────
# Correlation table is symmetric — top-right mirrors bottom-left
# mask hides the duplicate upper triangle so it's easier to read
mask = np.zeros_like(corr, dtype=bool) # start with all False (show everything)
mask[np.triu_indices_from(mask)] = True # set upper triangle to True (hide it)
sns.heatmap(corr, annot=True, fmt=".2f", cmap="coolwarm", mask=mask, vmin=-1, vmax=1)
plt.title("Correlation Heatmap — Lower Triangle Only")
plt.show() # Output: only bottom-left half shown — cleaner, no repeated values
# ══════════════════════════════════════════════════════════════════════════════
# ── 3. clustermap — Heatmap with automatic grouping (clustering) ──────────────
# Same as heatmap BUT it reorders rows and columns automatically
# so that similar rows/columns are placed next to each other
# Dendrograms (tree diagrams) on top and left show which rows/cols are similar
# ══════════════════════════════════════════════════════════════════════════════
# ── Simple clustermap ────────────────────────────────────────────────────────
data2 = pd.DataFrame({
"Math": [90, 85, 40, 45, 70],
"Science": [88, 80, 42, 50, 68],
"History": [45, 50, 85, 90, 55],
"Art": [40, 45, 88, 92, 60]
}, index=["Alice", "Bob", "Carol", "Dave", "Eve"])
sns.clustermap(data2)
plt.suptitle("Clustermap — similar students and subjects grouped together", y=1.02)
plt.show() # Output: heatmap with rows/cols reordered — students good at Math/Science
# grouped together, students good at History/Art grouped together
# ── annot=True — show values in cells ────────────────────────────────────────
sns.clustermap(data2, annot=True, fmt="d", cmap="YlOrRd")
plt.suptitle("Clustermap with Values", y=1.02)
plt.show() # Output: colored grid with scores shown, similar rows/cols clustered
# ── standard_scale= — normalize data before clustering ───────────────────────
# standard_scale=1 → scale each column so values go from 0 to 1
# Useful when columns have very different ranges (e.g. salary vs age)
sns.clustermap(data2, standard_scale=1, cmap="Blues")
plt.suptitle("Clustermap with Normalized Columns (0 to 1)", y=1.02)
plt.show() # Output: each column scaled 0–1, makes comparison fair across columns
# ── z_score= — normalize by row or column using z-score ──────────────────────
# z_score=0 → normalize each row z_score=1 → normalize each column
# Shows which values are above/below average within each row or column
sns.clustermap(data2, z_score=1, cmap="coolwarm")
plt.suptitle("Clustermap with Z-score (above/below average per column)", y=1.02)
plt.show() # Output: blue = below average, red = above average within each subject
# ══════════════════════════════════════════════════════════════════════════════
# ── heatmap vs clustermap ─────────────────────────────────────────────────────
# ┌─────────────┬──────────────────────────────────┬────────────────────────────────┐
# │ │ heatmap │ clustermap │
# ├─────────────┼──────────────────────────────────┼────────────────────────────────┤
# │ Row order │ stays as-is │ reordered to group similar rows│
# │ Col order │ stays as-is │ reordered to group similar cols│
# │ Dendrogram │ no │ yes (tree on top and left) │
# │ Best for │ fixed grids like confusion matrix│ finding hidden patterns/groups │
# └─────────────┴──────────────────────────────────┴────────────────────────────────┘
# ══════════════════════════════════════════════════════════════════════════════
# ══════════════════════════════════════════════════════════════════════════════
# ── Quick Reference ───────────────────────────────────────────────────────────
# ══════════════════════════════════════════════════════════════════════════════
# sns.heatmap(data) → color grid from a 2D table
# sns.heatmap(data, annot=True) → show values inside each cell
# sns.heatmap(data, fmt="d") → integer format inside cells
# sns.heatmap(data, fmt=".2f") → 2 decimal format inside cells
# sns.heatmap(data, cmap="coolwarm") → set color theme
# sns.heatmap(data, vmin=0, vmax=1) → fix color scale range
# sns.heatmap(data, linewidths=0.5) → borders between cells
# sns.heatmap(data, mask=mask) → hide certain cells (e.g. upper triangle)
# df.corr() → correlation table between all numeric columns
#
# sns.clustermap(data) → heatmap with auto-grouping of similar rows/cols
# sns.clustermap(data, standard_scale=1) → normalize columns 0 to 1
# sns.clustermap(data, z_score=1) → show above/below average per column
#
# plt.show() → display the plot
No comments:
Post a Comment
Please comment below to feedback or ask questions.