#!/usr/bin/env python3
"""Build the Chapter 7 dimensional sales mart locally with DuckDB."""

from __future__ import annotations

import argparse
from pathlib import Path
import duckdb
import pandas as pd

def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser()
    parser.add_argument("--warehouse-root", default=".warehouse/turanmart", help="Directory for DuckDB database and CSV exports")
    return parser.parse_args()

def main() -> None:
    args = parse_args()
    lab_root = Path(__file__).resolve().parent
    data_dir = lab_root / "data"
    warehouse_root = Path(args.warehouse_root)
    exports = warehouse_root / "exports"
    warehouse_root.mkdir(parents=True, exist_ok=True)
    exports.mkdir(parents=True, exist_ok=True)
    db_path = warehouse_root / "turanmart_analytics.duckdb"

    orders = pd.read_csv(data_dir / "source_order_lines.csv", parse_dates=["order_ts"])
    products = pd.read_csv(data_dir / "source_products.csv")
    customers = pd.read_csv(data_dir / "customer_snapshots.csv", parse_dates=["valid_from"])
    orders["order_date"] = orders["order_ts"].dt.date.astype(str)
    orders["net_revenue"] = (orders["quantity"] * orders["unit_price"] * (1 - orders["discount_rate"])).round(2)

    dim_date = pd.DataFrame({"date_key": pd.to_datetime(sorted(orders["order_date"].unique()))})
    dim_date["date_id"] = dim_date["date_key"].dt.strftime("%Y%m%d").astype(int)
    dim_date["order_date"] = dim_date["date_key"].dt.date.astype(str)
    dim_date["year"] = dim_date["date_key"].dt.year
    dim_date["month"] = dim_date["date_key"].dt.month
    dim_date["day"] = dim_date["date_key"].dt.day
    dim_date = dim_date[["date_id", "order_date", "year", "month", "day"]]

    dim_product = products.copy().sort_values("product_id").reset_index(drop=True)
    dim_product.insert(0, "product_key", range(1, len(dim_product) + 1))

    dim_customer = customers.sort_values(["customer_id", "valid_from"]).reset_index(drop=True)
    dim_customer["valid_to"] = dim_customer.groupby("customer_id")["valid_from"].shift(-1)
    dim_customer["is_current"] = dim_customer["valid_to"].isna()
    dim_customer["valid_from"] = dim_customer["valid_from"].dt.strftime("%Y-%m-%d")
    dim_customer["valid_to"] = dim_customer["valid_to"].dt.strftime("%Y-%m-%d").fillna("")
    dim_customer.insert(0, "customer_key", range(1, len(dim_customer) + 1))

    fact_rows = []
    for row in orders.sort_values(["order_ts", "order_id", "line_id"]).to_dict("records"):
        date_id = int(pd.to_datetime(row["order_date"]).strftime("%Y%m%d"))
        product_key = int(dim_product.loc[dim_product["product_id"] == row["product_id"], "product_key"].iloc[0])
        history = dim_customer[dim_customer["customer_id"] == row["customer_id"]].copy()
        order_date = row["order_date"]
        match = history[(history["valid_from"] <= order_date) & ((history["valid_to"] == "") | (order_date < history["valid_to"]))]
        if match.empty:
            raise ValueError(f"No customer version for {row['customer_id']} on {order_date}")
        customer_key = int(match["customer_key"].iloc[0])
        fact_rows.append({
            "sales_key": len(fact_rows) + 1,
            "order_id": row["order_id"],
            "line_id": int(row["line_id"]),
            "date_id": date_id,
            "product_key": product_key,
            "customer_key": customer_key,
            "channel": row["channel"],
            "quantity": int(row["quantity"]),
            "unit_price": float(row["unit_price"]),
            "discount_rate": float(row["discount_rate"]),
            "net_revenue": float(row["net_revenue"]),
        })
    fact_sales = pd.DataFrame(fact_rows)

    daily_sales_summary = (
        fact_sales.merge(dim_date[["date_id", "order_date"]], on="date_id")
        .groupby("order_date", as_index=False)
        .agg(order_count=("sales_key", "count"), quantity=("quantity", "sum"), net_revenue=("net_revenue", "sum"))
        .sort_values("order_date")
    )
    daily_sales_summary["net_revenue"] = daily_sales_summary["net_revenue"].round(2)

    con = duckdb.connect(str(db_path))
    for table_name, frame in {
        "dim_date": dim_date,
        "dim_product": dim_product,
        "dim_customer": dim_customer,
        "fact_sales": fact_sales,
        "daily_sales_summary": daily_sales_summary,
    }.items():
        con.register("frame", frame)
        con.execute(f"CREATE OR REPLACE TABLE {table_name} AS SELECT * FROM frame")
        con.unregister("frame")
        frame.to_csv(exports / f"{table_name}.csv", index=False)
    con.close()

    print(f"Wrote DuckDB database: {db_path}")
    print(f"Wrote CSV exports: {exports}")
    print(f"FACT_SALES rows: {len(fact_sales)}")
    print(f"TOTAL net_revenue: {fact_sales['net_revenue'].sum():.2f}")

if __name__ == "__main__":
    main()
