#!/usr/bin/env python3
from __future__ import annotations

import argparse
import json
from pathlib import Path
import pandas as pd

EXPECTED = [
    {'order_date': '2026-05-28', 'order_count': 2, 'item_count': 3, 'total_revenue': 203.50},
    {'order_date': '2026-05-29', 'order_count': 2, 'item_count': 2, 'total_revenue': 192.00},
    {'order_date': '2026-05-30', 'order_count': 2, 'item_count': 5, 'total_revenue': 180.00},
]


def read_jsonl(path: Path) -> list[dict]:
    return [json.loads(line) for line in path.read_text(encoding='utf-8').splitlines() if line.strip()]


def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument('--lake-root', default='.lake/turanmart-lake-dev')
    args = parser.parse_args()
    lake = Path(args.lake_root)

    bronze_orders = lake / 'bronze/commerce/orders/ingest_date=2026-05-30/batch_id=001/orders.jsonl'
    rejected_path = lake / 'silver/commerce/orders_rejected/ingest_date=2026-05-30/rejected_orders.jsonl'
    gold_csv = lake / 'gold/commerce/daily_revenue/daily_revenue.csv'

    missing = [p for p in [bronze_orders, rejected_path, gold_csv] if not p.exists()]
    if missing:
        raise SystemExit('Missing expected files: ' + ', '.join(str(p) for p in missing))

    bronze_rows = read_jsonl(bronze_orders)
    rejected_rows = read_jsonl(rejected_path)
    gold_df = pd.read_csv(gold_csv)

    parquet_files = sorted((lake / 'silver/commerce/orders_clean').glob('order_date=*/part-0000.parquet'))
    if len(parquet_files) != 3:
        raise SystemExit(f'Expected 3 Silver date partitions, found {len(parquet_files)}')
    silver_df = pd.concat([pd.read_parquet(p) for p in parquet_files], ignore_index=True)

    if len(bronze_rows) != 8:
        raise SystemExit(f'Expected 8 Bronze rows, found {len(bronze_rows)}')
    if len(silver_df) != 6:
        raise SystemExit(f'Expected 6 Silver clean rows, found {len(silver_df)}')
    if len(rejected_rows) != 2:
        raise SystemExit(f'Expected 2 rejected rows, found {len(rejected_rows)}')

    expected_df = pd.DataFrame(EXPECTED)
    merged = gold_df.merge(expected_df, on='order_date', suffixes=('', '_expected'))
    if len(merged) != 3:
        raise SystemExit('Gold output does not contain the expected three order dates')
    for _, row in merged.iterrows():
        for col in ['order_count', 'item_count']:
            if int(row[col]) != int(row[f'{col}_expected']):
                raise SystemExit(f'Mismatch for {row["order_date"]} {col}: {row[col]} != {row[f"{col}_expected"]}')
        if round(float(row['total_revenue']), 2) != round(float(row['total_revenue_expected']), 2):
            raise SystemExit(f'Mismatch for {row["order_date"]} total_revenue')

    print(f'BRONZE orders rows: {len(bronze_rows)}')
    print(f'SILVER clean orders rows: {len(silver_df)}')
    print(f'SILVER rejected orders rows: {len(rejected_rows)}')
    print(f'GOLD daily revenue rows: {len(gold_df)}')
    print(f'GOLD total_revenue: {gold_df["total_revenue"].sum():.2f}')
    print('VALIDATION PASSED')


if __name__ == '__main__':
    main()
