Keep Learning: Python Pandas Series

# Pandas is a Python library for data manipulation and analysis.
# Mainly used in data science, analysis, and machine learning workflows.
# Used in Structured tabular data
import pandas as pd
import numpy as np

# ── 1. Creating a Series ──────────────────────────────────────────────────────
# A Series is a 1D labeled array — like a single column in a spreadsheet
# ── Syntax ────────────────────────────────────────────────────────────────────
# pd.Series(data, index, dtype, name)
#   data  → list, dict, scalar, or NumPy array  (required)
#   index → custom labels, default: 0, 1, 2...  (optional)
#   dtype → force data type e.g. float, str      (optional)
#   name  → name for the Series                  (optional)

# pd.Series([10, 20, 30])                          # from list, default index
# pd.Series([10, 20, 30], index=["a", "b", "c"])   # custom index
# pd.Series({"a": 10, "b": 20})                    # from dict
# pd.Series(5, index=["x", "y", "z"])              # scalar repeated
# pd.Series([1, 2, 3], dtype=float, name="nums")   # with dtype and name
s = pd.Series([10, 20, 30, 40, 50])
print(s)
# 0    10
# 1    20
# 2    30
# 3    40
# 4    50
# dtype: int64

# ── 2. Series with Custom Index ───────────────────────────────────────────────
# Labels replace default 0,1,2... index

s = pd.Series([10, 20, 30], index=["a", "b", "c"])
print(s)
# a    10
# b    20
# c    30
# dtype: int64

# ── 3. Series from a Dictionary ───────────────────────────────────────────────
# Keys become the index, values become the data

data = {"Alice": 90, "Bob": 85, "Carol": 92}
s = pd.Series(data)
print(s)
# Alice    90
# Bob      85
# Carol    92
# dtype: int64

# ── 4. Series from a Scalar ───────────────────────────────────────────────────
# Single value repeated for each index label

s = pd.Series(7, index=["x", "y", "z"])
print(s)
# x    7
# y    7
# z    7
# dtype: int64

# ── 5. Series from NumPy Array ────────────────────────────────────────────────

arr = np.array([1.1, 2.2, 3.3])
s = pd.Series(arr, index=["p", "q", "r"])
print(s)
# p    1.1
# q    2.2
# r    3.3
# dtype: float64

# ── 6. Accessing Elements ─────────────────────────────────────────────────────

s = pd.Series([10, 20, 30, 40, 50], index=["a", "b", "c", "d", "e"])

print(s["a"])                   # 10    → by label
print(s[0])                     # 10    → by position
print(s["b":"d"])               # b 20, c 30, d 40  → label slice (inclusive)
print(s[1:4])                   # b 20, c 30, d 40  → position slice (exclusive end)
print(s[["a", "c", "e"]])       # 10, 30, 50  → multiple labels

# ── 7. Series Attributes ──────────────────────────────────────────────────────

s = pd.Series([10, 20, 30, 40, 50])

print(s.values)                 # [10 20 30 40 50]  → NumPy array of values
print(s.index)                  # RangeIndex(start=0, stop=5, step=1)
print(s.dtype)                  # int64
print(s.shape)                  # (5,)
print(s.size)                   # 5  → total elements
print(s.name)                   # None  → no name assigned yet

s.name = "Scores"
print(s.name)                   # Scores

# ── 8. Boolean Filtering ──────────────────────────────────────────────────────

s = pd.Series([10, 20, 30, 40, 50], index=["a", "b", "c", "d", "e"])

print(s > 25)
# a    False
# b    False
# c     True
# d     True
# e     True

print(s[s > 25])
# c    30
# d    40
# e    50

print(s[s % 20 == 0])           # b 20, d 40  → divisible by 20

# ── 9. Arithmetic Operations ──────────────────────────────────────────────────
# Operations are aligned by index

s1 = pd.Series([1, 2, 3], index=["a", "b", "c"])
s2 = pd.Series([10, 20, 30], index=["a", "b", "c"])

print(s1 + s2)                  # a 11, b 22, c 33
print(s1 * s2)                  # a 10, b 40, c 90
print(s2 / s1)                  # a 10.0, b 10.0, c 10.0

# Mismatched index → NaN where labels don't align
s3 = pd.Series([100, 200], index=["a", "x"])
print(s1 + s3)
# a    101.0
# b      NaN
# c      NaN
# x      NaN

# ── 10. Common Series Methods ─────────────────────────────────────────────────

s = pd.Series([30, 10, 50, 20, 40])

print(s.sum())                  # 150   → total
print(s.mean())                 # 30.0  → average
print(s.min())                  # 10    → minimum
print(s.max())                  # 50    → maximum
print(s.std())                  # 15.81 → standard deviation
print(s.sort_values())          # sorted ascending: 10,20,30,40,50
print(s.sort_index())           # sorted by index: 0,1,2,3,4
print(s.value_counts())         # count of each unique value
print(s.describe())
# count     5.0
# mean     30.0
# std      15.81
# min      10.0
# 25%      20.0
# 50%      30.0
# 75%      40.0
# max      50.0

# ── 11. Handling Missing Values ───────────────────────────────────────────────

s = pd.Series([10, None, 30, None, 50])

print(s.isnull())               # False True False True False  → True where NaN
print(s.notnull())              # True False True False True
print(s.dropna())               # 0 10.0, 2 30.0, 4 50.0  → removes NaN
print(s.fillna(0))              # replaces NaN with 0 → 10,0,30,0,50
print(s.fillna(s.mean()))       # replaces NaN with mean → 10,30,30,30,50
Keep Learning

Pages

Python Pandas Series

No comments:

Post a Comment

Follow In

Translate

Thank for your visit

About Me