# Pandas Data Input and Output
# Reading and writing data between DataFrames and external sources (CSV, JSON, Excel, SQL)
import pandas as pd
# ── Sample DataFrame used throughout ─────────────────────────────────────────
df = pd.DataFrame({
"Name": ["Alice", "Bob", "Carol", "Dave"],
"Age": [30, 25, 28, 35],
"Dept": ["HR", "IT", "HR", "IT"],
"Salary": [50000, 80000, 55000, 90000]
})
# ══════════════════════════════════════════════════════════════════════════════
# ── 1. CSV ────────────────────────────────────────────────────────────────────
# Most common format — comma separated values
# ══════════════════════════════════════════════════════════════════════════════
# ── Write to CSV ──────────────────────────────────────────────────────────────
df.to_csv("employees.csv", index=False) # index=False skips row numbers
# creates employees.csv:
# Name,Age,Dept,Salary
# Alice,30,HR,50000
# Bob,25,IT,80000
# ...
df.to_csv("employees_idx.csv", index=True) # index=True includes row numbers (default)
# 0,Alice,30,HR,50000
# 1,Bob,25,IT,80000
df.to_csv("employees.csv", sep="|", index=False) # custom delimiter
# Name|Age|Dept|Salary
df[["Name","Salary"]].to_csv("names.csv", index=False) # save specific columns only
# ── Read from CSV ─────────────────────────────────────────────────────────────
df.to_csv("employees.csv", index=False) # reset to comma for examples below
df_csv = pd.read_csv("employees.csv") # basic read
print(df_csv)
# Name Age Dept Salary
# 0 Alice 30 HR 50000
# 1 Bob 25 IT 80000
# 2 Carol 28 HR 55000
# 3 Dave 35 IT 90000
# ── read_csv options ──────────────────────────────────────────────────────────
pd.read_csv("employees.csv", sep=",") # delimiter (default comma)
pd.read_csv("employees.csv", header=0) # row 0 as column names (default)
pd.read_csv("employees.csv", header=None) # no header row — uses 0,1,2...
pd.read_csv("employees.csv", usecols=["Name","Salary"]) # load specific columns only
pd.read_csv("employees.csv", nrows=2) # load first 2 rows only
pd.read_csv("employees.csv", skiprows=1) # skip first row
pd.read_csv("employees.csv", index_col="Name") # set Name as index
pd.read_csv("employees.csv", na_values=["NA","-","?"]) # treat as NaN
pd.read_csv("employees.csv", dtype={"Age": float}) # force dtype on column
df_custom = pd.read_csv("employees.csv", usecols=["Name","Salary"], nrows=2)
print(df_custom)
# Name Salary
# 0 Alice 50000
# 1 Bob 80000
# ── Inspect after loading ─────────────────────────────────────────────────────
print(df_csv.shape) # (4, 4)
print(df_csv.dtypes) # Name object, Age int64 ...
print(df_csv.head(2)) # first 2 rows
print(df_csv.info()) # column info + non-null count
# ══════════════════════════════════════════════════════════════════════════════
# ── 2. JSON ───────────────────────────────────────────────────────────────────
# JavaScript Object Notation — common in APIs and web data
# ══════════════════════════════════════════════════════════════════════════════
# ── Write to JSON ─────────────────────────────────────────────────────────────
df.to_json("employees.json", orient="records") # list of row dicts (most readable)
# [{"Name":"Alice","Age":30,"Dept":"HR","Salary":50000},...]
df.to_json("employees.json", orient="records", indent=2) # pretty-printed
df.to_json("cols.json", orient="columns") # dict of column dicts
df.to_json("index.json", orient="index") # dict keyed by row index
# ── Read from JSON ────────────────────────────────────────────────────────────
df_json = pd.read_json("employees.json")
print(df_json)
# Name Age Dept Salary
# 0 Alice 30 HR 50000
# 1 Bob 25 IT 80000
# 2 Carol 28 HR 55000
# 3 Dave 35 IT 90000
# ── orient options for read ───────────────────────────────────────────────────
# pd.read_json("file.json", orient="records") # list of row dicts
# pd.read_json("file.json", orient="columns") # dict of column dicts
# pd.read_json("file.json", orient="index") # dict keyed by index
# ── From JSON string directly ─────────────────────────────────────────────────
json_str = '[{"Name":"Alice","Age":30},{"Name":"Bob","Age":25}]'
df_from_str = pd.read_json(json_str)
print(df_from_str)
# Name Age
# 0 Alice 30
# 1 Bob 25
# ══════════════════════════════════════════════════════════════════════════════
# ── 3. Excel ──────────────────────────────────────────────────────────────────
# requires: pip install openpyxl
# ══════════════════════════════════════════════════════════════════════════════
# ── Write to Excel ────────────────────────────────────────────────────────────
# df.to_excel("employees.xlsx", index=False) # single sheet
# df.to_excel("employees.xlsx", index=False, sheet_name="Data") # named sheet
# ── Multiple sheets ───────────────────────────────────────────────────────────
# hr = df[df["Dept"] == "HR"]
# it = df[df["Dept"] == "IT"]
# with pd.ExcelWriter("departments.xlsx") as writer:
# hr.to_excel(writer, sheet_name="HR", index=False)
# it.to_excel(writer, sheet_name="IT", index=False)
# ── Read from Excel ───────────────────────────────────────────────────────────
# df_xl = pd.read_excel("employees.xlsx")
# df_xl = pd.read_excel("employees.xlsx", sheet_name="Data")
# df_xl = pd.read_excel("employees.xlsx", usecols=["Name","Salary"])
# df_xl = pd.read_excel("employees.xlsx", nrows=2)
# ── Read all sheets at once ───────────────────────────────────────────────────
# all_sheets = pd.read_excel("departments.xlsx", sheet_name=None)
# all_sheets["HR"] → DataFrame for HR sheet
# all_sheets["IT"] → DataFrame for IT sheet
# ══════════════════════════════════════════════════════════════════════════════
# ── 4. SQL ────────────────────────────────────────────────────────────────────
# requires: pip install sqlalchemy
# ══════════════════════════════════════════════════════════════════════════════
import sqlite3
conn = sqlite3.connect(":memory:") # in-memory SQLite database
# ── Write to SQL ──────────────────────────────────────────────────────────────
df.to_sql("employees", conn, index=False, if_exists="replace")
# if_exists options:
# "fail" → raise error if table exists
# "replace" → drop table and recreate
# "append" → add rows to existing table
# ── Read from SQL ─────────────────────────────────────────────────────────────
df_sql = pd.read_sql("SELECT * FROM employees", conn)
print(df_sql)
# Name Age Dept Salary
# 0 Alice 30 HR 50000
# 1 Bob 25 IT 80000
# 2 Carol 28 HR 55000
# 3 Dave 35 IT 90000
df_filtered = pd.read_sql("SELECT Name, Salary FROM employees WHERE Dept='IT'", conn)
print(df_filtered)
# Name Salary
# 0 Bob 80000
# 1 Dave 90000
df_query = pd.read_sql_query("SELECT * FROM employees WHERE Age > 28", conn)
print(df_query)
# Name Age Dept Salary
# 0 Alice 30 HR 50000
# 1 Dave 35 IT 90000
conn.close()
# ══════════════════════════════════════════════════════════════════════════════
# ── 5. Clipboard ──────────────────────────────────────────────────────────────
# Read/write from system clipboard (useful for quick copy-paste from Excel)
# ══════════════════════════════════════════════════════════════════════════════
# df.to_clipboard(index=False) # copy to clipboard
# pd.read_clipboard() # paste from clipboard into DataFrame
# ══════════════════════════════════════════════════════════════════════════════
# ── 6. Common I/O Options Summary ────────────────────────────────────────────
# ══════════════════════════════════════════════════════════════════════════════
# ┌─────────────┬──────────────────────────────┬──────────────────────────┐
# │ Format │ Read │ Write │
# ├─────────────┼──────────────────────────────┼──────────────────────────┤
# │ CSV │ pd.read_csv("f.csv") │ df.to_csv("f.csv") │
# │ JSON │ pd.read_json("f.json") │ df.to_json("f.json") │
# │ Excel │ pd.read_excel("f.xlsx") │ df.to_excel("f.xlsx") │
# │ SQL │ pd.read_sql("SELECT...",con)│ df.to_sql("table", con) │
# │ Clipboard │ pd.read_clipboard() │ df.to_clipboard() │
# └─────────────┴──────────────────────────────┴──────────────────────────┘
# ── Shared useful options ─────────────────────────────────────────────────────
# index=False → don't write row numbers
# usecols=[...] → load specific columns only
# nrows=N → load first N rows
# dtype={"col":type}→ force data type on column
# na_values=[...] → treat these as NaN
# header=None → no header row in file
# index_col="col" → use column as row index
No comments:
Post a Comment
Please comment below to feedback or ask questions.