Hospitals sit on huge amounts of operational data (admissions, discharges, diagnoses, department, length of stay) that's rarely analyzed beyond basic reporting. This project treats a hospital admissions dataset purely as a data analysis exercise (no prediction modeling) — the goal is to answer questions administrators and doctors actually care about:
The output is a set of clear visualizations and a short insights report — the kind of analysis a hospital administrator, insurance analyst, or public health researcher would actually use for planning and resource allocation.
You can use a public dataset (e.g., Kaggle's "Hospital Readmissions" or "Healthcare Dataset") or plug in anonymized data if you're building this for a client.
# hospital_readmission_analysis.py
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style("whitegrid")
BRAND_DARK = "#163a6e"
BRAND_ORANGE = "#ff8a3d"
# ---------------------------------------------------------
# 1. Load dataset (replace with real hospital records)
# df = pd.read_csv("hospital_admissions.csv")
# ---------------------------------------------------------
np.random.seed(7)
n = 2000
departments = ["Cardiology", "Orthopedics", "General Medicine", "Pediatrics",
"Neurology", "Respiratory"]
diagnoses = ["Hypertension", "Fracture", "Diabetes", "Pneumonia",
"Asthma", "Migraine", "Viral Fever", "Cardiac Arrest"]
start_dates = pd.to_datetime("2024-01-01") + pd.to_timedelta(
np.random.randint(0, 545, n), unit="D"
)
stay_length = np.random.gamma(shape=2.0, scale=2.0, size=n).astype(int) + 1
df = pd.DataFrame({
"patient_id": np.random.randint(1000, 1500, n), # allows repeat patients
"department": np.random.choice(departments, n, p=[.18, .15, .22, .15, .12, .18]),
"diagnosis": np.random.choice(diagnoses, n),
"age": np.random.randint(1, 90, n),
"admission_date": start_dates,
})
df["length_of_stay"] = stay_length
df["discharge_date"] = df["admission_date"] + pd.to_timedelta(df["length_of_stay"], unit="D")
# ---------------------------------------------------------
# 2. Derived fields
# ---------------------------------------------------------
df["age_group"] = pd.cut(
df["age"], bins=[0, 12, 18, 40, 60, 90],
labels=["Child (0-12)", "Teen (13-18)", "Adult (19-40)", "Middle-age (41-60)", "Senior (60+)"]
)
df["month"] = df["admission_date"].dt.to_period("M").astype(str)
df["day_of_week"] = df["admission_date"].dt.day_name()
df["season"] = df["admission_date"].dt.month % 12 // 3 + 1
season_map = {1: "Winter", 2: "Spring", 3: "Summer", 4: "Autumn"}
df["season"] = df["season"].map(season_map)
# ---------------------------------------------------------
# 3. Readmission flag: same patient, admitted again within 30 days
# ---------------------------------------------------------
df = df.sort_values(["patient_id", "admission_date"])
df["prev_discharge"] = df.groupby("patient_id")["discharge_date"].shift(1)
df["days_since_last_discharge"] = (df["admission_date"] - df["prev_discharge"]).dt.days
df["is_readmission"] = df["days_since_last_discharge"].between(0, 30)
overall_readmit_rate = df["is_readmission"].mean() * 100
print(f"Overall 30-day readmission rate: {overall_readmit_rate:.1f}%\n")
# ---------------------------------------------------------
# 4. Department load & avg length of stay
# ---------------------------------------------------------
dept_summary = df.groupby("department").agg(
total_patients=("patient_id", "count"),
avg_stay=("length_of_stay", "mean"),
readmit_rate=("is_readmission", "mean")
).round(2).sort_values("total_patients", ascending=False)
dept_summary["readmit_rate"] = (dept_summary["readmit_rate"] * 100).round(1)
print("Department summary:\n", dept_summary, "\n")
plt.figure(figsize=(8, 5))
sns.barplot(data=dept_summary.reset_index(), x="total_patients", y="department", color=BRAND_DARK)
plt.title("Patient load by department")
plt.xlabel("Number of admissions")
plt.tight_layout()
plt.savefig("department_load.png", dpi=150)
plt.close()
# ---------------------------------------------------------
# 5. Readmission rate by diagnosis
# ---------------------------------------------------------
diag_readmit = df.groupby("diagnosis")["is_readmission"].mean().sort_values(ascending=False) * 100
plt.figure(figsize=(8, 5))
diag_readmit.plot(kind="barh", color=BRAND_ORANGE)
plt.title("30-day readmission rate by diagnosis (%)")
plt.xlabel("Readmission rate (%)")
plt.tight_layout()
plt.savefig("readmission_by_diagnosis.png", dpi=150)
plt.close()
# ---------------------------------------------------------
# 6. Seasonal admission trend
# ---------------------------------------------------------
seasonal = df.groupby(["season", "diagnosis"]).size().unstack(fill_value=0)
plt.figure(figsize=(10, 5))
seasonal.T.plot(kind="bar", stacked=True, colormap="tab20", ax=plt.gca())
plt.title("Diagnosis distribution by season")
plt.ylabel("Number of admissions")
plt.legend(title="Season", bbox_to_anchor=(1.02, 1), loc="upper left")
plt.tight_layout()
plt.savefig("seasonal_diagnosis_trend.png", dpi=150)
plt.close()
# ---------------------------------------------------------
# 7. Length of stay by age group
# ---------------------------------------------------------
plt.figure(figsize=(8, 5))
sns.boxplot(data=df, x="age_group", y="length_of_stay", color=BRAND_ORANGE)
plt.title("Length of stay by age group")
plt.xticks(rotation=20)
plt.tight_layout()
plt.savefig("stay_by_age_group.png", dpi=150)
plt.close()
# ---------------------------------------------------------
# 8. Admissions by day of week (staffing insight)
# ---------------------------------------------------------
day_order = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
day_counts = df["day_of_week"].value_counts().reindex(day_order)
plt.figure(figsize=(8, 4))
day_counts.plot(kind="bar", color=BRAND_DARK)
plt.title("Admissions by day of week")
plt.ylabel("Number of admissions")
plt.xticks(rotation=30)
plt.tight_layout()
plt.savefig("admissions_by_weekday.png", dpi=150)
plt.close()
print("Charts saved: department_load.png, readmission_by_diagnosis.png,",
"seasonal_diagnosis_trend.png, stay_by_age_group.png, admissions_by_weekday.png")
Get an official Project Completion Certificate with a unique ID & QR verification — perfect for internships, resumes, and college submissions.
Get Certificate →