#!/usr/bin/env python3
"""Validate Customer 360 outputs and create an operating report."""

from __future__ import annotations

import csv
from datetime import datetime
from pathlib import Path

BASE = Path(__file__).resolve().parents[1]
RAW = BASE / "data" / "raw"
GOLD = BASE / "data" / "gold"
REPORTS = BASE / "reports"
REPORTS.mkdir(parents=True, exist_ok=True)
NOW = datetime.fromisoformat("2026-05-19T09:00:00")


def read_csv(path: Path) -> list[dict]:
    with path.open("r", newline="", encoding="utf-8") as f:
        return list(csv.DictReader(f))


def write_csv(path: Path, rows: list[dict]) -> None:
    with path.open("w", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=list(rows[0].keys()))
        writer.writeheader()
        writer.writerows(rows)


def metric(name: str, value, threshold: str, status: str, explanation: str) -> dict:
    return {"metric": name, "value": value, "threshold": threshold, "status": status, "explanation": explanation}


def max_lag_hours(rows: list[dict], field: str) -> float:
    latest = max(datetime.fromisoformat(row[field]) for row in rows if row.get(field))
    return round((NOW - latest).total_seconds() / 3600, 2)


def markdown_table(rows: list[dict]) -> str:
    headers = list(rows[0].keys())
    output = ["| " + " | ".join(headers) + " |", "| " + " | ".join(["---"] * len(headers)) + " |"]
    for row in rows:
        output.append("| " + " | ".join(str(row[h]) for h in headers) + " |")
    return "\n".join(output)


def main() -> None:
    customers = read_csv(RAW / "customers.csv")
    orders = read_csv(RAW / "orders.csv")
    clicks = read_csv(RAW / "clickstream.csv")
    customer_360 = read_csv(GOLD / "customer_360.csv")
    kpis = {row["metric"]: row["value"] for row in read_csv(GOLD / "kpi_summary.csv")}

    customer_ids = [row["customer_id"] for row in customer_360]
    profile_coverage = len(set(customer_ids)) / len(customers)
    duplicate_profiles = len(customer_ids) - len(set(customer_ids))
    critical_null_count = sum(1 for row in customer_360 if not row["customer_id"] or not row["segment"] or not row["retention_priority"])
    critical_null_rate = critical_null_count / len(customer_360)
    order_lag = max_lag_hours(orders, "event_time")
    click_lag = max_lag_hours(clicks, "event_time")
    consent_rate = float(kpis.get("marketing_consent_rate", 0))

    rows = [
        metric("profile_coverage_rate", round(profile_coverage, 4), ">= 0.99", "pass" if profile_coverage >= 0.99 else "fail", "Every known customer should appear in the gold Customer 360 profile."),
        metric("duplicate_profile_count", duplicate_profiles, "= 0", "pass" if duplicate_profiles == 0 else "fail", "A serving profile must be unique by customer_id."),
        metric("critical_null_rate", round(critical_null_rate, 4), "<= 0.01", "pass" if critical_null_rate <= 0.01 else "fail", "Critical serving columns should not be null."),
        metric("orders_freshness_lag_hours", order_lag, "<= 72 for sample project", "pass" if order_lag <= 72 else "warn", "In production this would normally be measured in seconds or minutes."),
        metric("clickstream_freshness_lag_hours", click_lag, "<= 24 for sample project", "pass" if click_lag <= 24 else "warn", "Recent behavioral signals should arrive quickly enough for personalization."),
        metric("marketing_consent_rate", round(consent_rate, 4), "informational", "info", "Activation must respect current consent state rather than maximize reach blindly."),
    ]
    write_csv(REPORTS / "customer360_quality_report.csv", rows)
    (REPORTS / "customer360_quality_report.md").write_text(
        "# Customer 360 Quality Report\n\n"
        "This report summarizes the operating checks generated by `scripts/validate_outputs.py`.\n\n"
        + markdown_table(rows)
        + "\n",
        encoding="utf-8",
    )
    print(f"Wrote validation report to {REPORTS}")


if __name__ == "__main__":
    main()
