#!/usr/bin/env python3
"""Validate the deterministic Chapter 7 lab output."""

from __future__ import annotations

import argparse
from pathlib import Path
import duckdb
import pandas as pd

EXPECTED_COUNTS = {
    "dim_date": 3,
    "dim_product": 4,
    "dim_customer": 5,
    "fact_sales": 6,
}
EXPECTED_DAILY = pd.DataFrame([
    {"order_date": "2026-05-28", "order_count": 2, "quantity": 3, "net_revenue": 203.50},
    {"order_date": "2026-05-29", "order_count": 2, "quantity": 2, "net_revenue": 192.00},
    {"order_date": "2026-05-30", "order_count": 2, "quantity": 5, "net_revenue": 180.00},
])

def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser()
    parser.add_argument("--warehouse-root", default=".warehouse/turanmart")
    return parser.parse_args()

def fail(message: str) -> None:
    raise SystemExit(f"VALIDATION FAILED: {message}")

def main() -> None:
    args = parse_args()
    db_path = Path(args.warehouse_root) / "turanmart_analytics.duckdb"
    if not db_path.exists():
        fail(f"missing DuckDB database at {db_path}; run starter.py first")
    con = duckdb.connect(str(db_path), read_only=True)

    for table, expected in EXPECTED_COUNTS.items():
        actual = con.execute(f"SELECT COUNT(*) FROM {table}").fetchone()[0]
        if actual != expected:
            fail(f"{table} row count {actual}, expected {expected}")

    versions = con.execute("""
        SELECT customer_id, region, valid_from, valid_to, is_current
        FROM dim_customer
        WHERE customer_id = 'C-1002'
        ORDER BY valid_from
    """).fetchdf()
    if len(versions) != 2:
        fail(f"customer C-1002 version count {len(versions)}, expected 2")
    if versions.iloc[0]["region"] != "Samarkand" or str(versions.iloc[0]["valid_to"]) != "2026-05-30":
        fail("first C-1002 version should be Samarkand and closed on 2026-05-30")
    if versions.iloc[1]["region"] != "Tashkent" or not bool(versions.iloc[1]["is_current"]):
        fail("second C-1002 version should be current Tashkent version")

    joined = con.execute("""
        SELECT d.order_date, c.customer_id, c.region
        FROM fact_sales f
        JOIN dim_date d ON f.date_id = d.date_id
        JOIN dim_customer c ON f.customer_key = c.customer_key
        WHERE c.customer_id = 'C-1002'
        ORDER BY d.order_date, f.sales_key
    """).fetchdf()
    regions_by_date = dict(zip(joined["order_date"], joined["region"]))
    if regions_by_date.get("2026-05-29") != "Samarkand":
        fail("2026-05-29 C-1002 sale should join to Samarkand version")
    if regions_by_date.get("2026-05-30") != "Tashkent":
        fail("2026-05-30 C-1002 sale should join to Tashkent version")

    total = con.execute("SELECT ROUND(SUM(net_revenue), 2) FROM fact_sales").fetchone()[0]
    if float(total) != 575.50:
        fail(f"total net_revenue {total}, expected 575.50")

    daily = con.execute("SELECT order_date, order_count, quantity, ROUND(net_revenue, 2) AS net_revenue FROM daily_sales_summary ORDER BY order_date").fetchdf()
    daily["order_date"] = daily["order_date"].astype(str)
    daily["net_revenue"] = daily["net_revenue"].astype(float).round(2)
    if not daily.equals(EXPECTED_DAILY):
        fail(f"daily summary mismatch\nActual:\n{daily}\nExpected:\n{EXPECTED_DAILY}")

    con.close()
    print("DIM_DATE rows: 3")
    print("DIM_PRODUCT rows: 4")
    print("DIM_CUSTOMER rows: 5")
    print("FACT_SALES rows: 6")
    print("CUSTOMER C-1002 versions: 2")
    print("TOTAL net_revenue: 575.50")
    print("VALIDATION PASSED")

if __name__ == "__main__":
    main()
