# Pandas is a Python library for data manipulation and analysis.
# Mainly used in data science, analysis, and machine learning workflows.
# Used in Structured tabular data
import pandas as pd
import numpy as np
# ── 1. Creating a Series ──────────────────────────────────────────────────────
# A Series is a 1D labeled array — like a single column in a spreadsheet
# ── Syntax ────────────────────────────────────────────────────────────────────
# pd.Series(data, index, dtype, name)
# data → list, dict, scalar, or NumPy array (required)
# index → custom labels, default: 0, 1, 2... (optional)
# dtype → force data type e.g. float, str (optional)
# name → name for the Series (optional)
# pd.Series([10, 20, 30]) # from list, default index
# pd.Series([10, 20, 30], index=["a", "b", "c"]) # custom index
# pd.Series({"a": 10, "b": 20}) # from dict
# pd.Series(5, index=["x", "y", "z"]) # scalar repeated
# pd.Series([1, 2, 3], dtype=float, name="nums") # with dtype and name
s = pd.Series([10, 20, 30, 40, 50])
print(s)
# 0 10
# 1 20
# 2 30
# 3 40
# 4 50
# dtype: int64
# ── 2. Series with Custom Index ───────────────────────────────────────────────
# Labels replace default 0,1,2... index
s = pd.Series([10, 20, 30], index=["a", "b", "c"])
print(s)
# a 10
# b 20
# c 30
# dtype: int64
# ── 3. Series from a Dictionary ───────────────────────────────────────────────
# Keys become the index, values become the data
data = {"Alice": 90, "Bob": 85, "Carol": 92}
s = pd.Series(data)
print(s)
# Alice 90
# Bob 85
# Carol 92
# dtype: int64
# ── 4. Series from a Scalar ───────────────────────────────────────────────────
# Single value repeated for each index label
s = pd.Series(7, index=["x", "y", "z"])
print(s)
# x 7
# y 7
# z 7
# dtype: int64
# ── 5. Series from NumPy Array ────────────────────────────────────────────────
arr = np.array([1.1, 2.2, 3.3])
s = pd.Series(arr, index=["p", "q", "r"])
print(s)
# p 1.1
# q 2.2
# r 3.3
# dtype: float64
# ── 6. Accessing Elements ─────────────────────────────────────────────────────
s = pd.Series([10, 20, 30, 40, 50], index=["a", "b", "c", "d", "e"])
print(s["a"]) # 10 → by label
print(s[0]) # 10 → by position
print(s["b":"d"]) # b 20, c 30, d 40 → label slice (inclusive)
print(s[1:4]) # b 20, c 30, d 40 → position slice (exclusive end)
print(s[["a", "c", "e"]]) # 10, 30, 50 → multiple labels
# ── 7. Series Attributes ──────────────────────────────────────────────────────
s = pd.Series([10, 20, 30, 40, 50])
print(s.values) # [10 20 30 40 50] → NumPy array of values
print(s.index) # RangeIndex(start=0, stop=5, step=1)
print(s.dtype) # int64
print(s.shape) # (5,)
print(s.size) # 5 → total elements
print(s.name) # None → no name assigned yet
s.name = "Scores"
print(s.name) # Scores
# ── 8. Boolean Filtering ──────────────────────────────────────────────────────
s = pd.Series([10, 20, 30, 40, 50], index=["a", "b", "c", "d", "e"])
print(s > 25)
# a False
# b False
# c True
# d True
# e True
print(s[s > 25])
# c 30
# d 40
# e 50
print(s[s % 20 == 0]) # b 20, d 40 → divisible by 20
# ── 9. Arithmetic Operations ──────────────────────────────────────────────────
# Operations are aligned by index
s1 = pd.Series([1, 2, 3], index=["a", "b", "c"])
s2 = pd.Series([10, 20, 30], index=["a", "b", "c"])
print(s1 + s2) # a 11, b 22, c 33
print(s1 * s2) # a 10, b 40, c 90
print(s2 / s1) # a 10.0, b 10.0, c 10.0
# Mismatched index → NaN where labels don't align
s3 = pd.Series([100, 200], index=["a", "x"])
print(s1 + s3)
# a 101.0
# b NaN
# c NaN
# x NaN
# ── 10. Common Series Methods ─────────────────────────────────────────────────
s = pd.Series([30, 10, 50, 20, 40])
print(s.sum()) # 150 → total
print(s.mean()) # 30.0 → average
print(s.min()) # 10 → minimum
print(s.max()) # 50 → maximum
print(s.std()) # 15.81 → standard deviation
print(s.sort_values()) # sorted ascending: 10,20,30,40,50
print(s.sort_index()) # sorted by index: 0,1,2,3,4
print(s.value_counts()) # count of each unique value
print(s.describe())
# count 5.0
# mean 30.0
# std 15.81
# min 10.0
# 25% 20.0
# 50% 30.0
# 75% 40.0
# max 50.0
# ── 11. Handling Missing Values ───────────────────────────────────────────────
s = pd.Series([10, None, 30, None, 50])
print(s.isnull()) # False True False True False → True where NaN
print(s.notnull()) # True False True False True
print(s.dropna()) # 0 10.0, 2 30.0, 4 50.0 → removes NaN
print(s.fillna(0)) # replaces NaN with 0 → 10,0,30,0,50
print(s.fillna(s.mean())) # replaces NaN with mean → 10,30,30,30,50
No comments:
Post a Comment
Please comment below to feedback or ask questions.