from pathlib import Path
import pandas as pd
import duckdb

ROOT = Path(__file__).resolve().parents[4]
LAB = ROOT / "shared" / "labs" / "ch02_data_models_formats_quality"
OUT = LAB / "output" / "parquet"
EXPECTED = LAB / "expected_output" / "revenue_by_date_region.csv"

required_files = [OUT / "orders.parquet", OUT / "order_items.parquet", OUT / "events.parquet"]
missing = [str(path) for path in required_files if not path.exists()]
if missing:
    raise SystemExit(f"Missing generated Parquet files: {missing}")

query = f"""
SELECT
    CAST(order_ts AS DATE) AS order_date,
    region,
    COUNT(*) AS order_count,
    ROUND(SUM(total_amount), 2) AS total_amount
FROM read_parquet('{(OUT / 'orders.parquet').as_posix()}')
WHERE payment_status IN ('paid', 'refunded')
GROUP BY 1, 2
ORDER BY 1, 2
"""
actual = duckdb.sql(query).df()
actual['order_date'] = actual['order_date'].astype(str)
expected = pd.read_csv(EXPECTED)
expected['order_date'] = expected['order_date'].astype(str)
pd.testing.assert_frame_equal(actual.reset_index(drop=True), expected.reset_index(drop=True), check_dtype=False)
print('PASS: Chapter 2 lab outputs match expected revenue results')
