#!/usr/bin/env python3
"""Validate the Chapter 19 feature store and model serving lab artifacts.

The validator intentionally uses only the Python standard library so students can run it
in a clean environment. It performs structural checks on the starter artifacts rather
than trying to replace a production feature-store or serving-platform test suite.
"""

from __future__ import annotations

import csv
import json
import re
import sys
from pathlib import Path

ROOT = Path(__file__).resolve().parents[1]

REQUIRED_CONTRACT_KEYS = [
    "contract_id",
    "model_feature_service",
    "prediction_time_column",
    "entities",
    "feature_views",
    "event_timestamp",
    "source_delay",
    "temporal_join_lookback",
    "online_store",
    "ttl",
    "freshness_sla",
    "require_training_serving_skew_test",
]

REQUIRED_PAYLOAD_FIELDS = [
    "entity_type",
    "entity_key",
    "model_feature_service",
    "feature_view",
    "feature_event_ts",
    "materialized_at",
    "features",
]

REQUIRED_SKEW_COLUMNS = [
    "case_id",
    "entity_type",
    "entity_key",
    "feature_name",
    "offline_value",
    "online_value",
    "feature_event_ts",
    "materialized_at",
    "max_age_minutes",
    "tolerance",
    "result",
    "review_note",
]

REQUIRED_SERVING_TERMS = [
    "endpoint_name",
    "serving_platform",
    "inference_runtime",
    "model_versions",
    "latency_slo",
    "batching_policy",
    "shadow_test",
    "canary",
    "monitoring",
    "rollback",
    "feature_freshness",
    "training_serving_skew",
    "promotion_status",
]


def read_text(name: str) -> str:
    path = ROOT / name
    if not path.exists():
        raise AssertionError(f"Missing required file: {name}")
    return path.read_text(encoding="utf-8")


def validate_feature_store_contract() -> None:
    text = read_text("feature_store_contract.yml")
    missing = [key for key in REQUIRED_CONTRACT_KEYS if key not in text]
    if missing:
        raise AssertionError(f"Feature contract missing keys: {', '.join(missing)}")
    if text.count("feature_views:") != 1:
        raise AssertionError("Feature contract should define one feature_views section")
    if text.count("name:") < 8:
        raise AssertionError("Feature contract should include entities, feature views, and feature names")
    for iso_duration in ["PT10M", "P7D", "PT30M", "P30D"]:
        if iso_duration not in text:
            raise AssertionError(f"Feature contract missing expected duration: {iso_duration}")


def validate_online_payloads() -> None:
    path = ROOT / "online_feature_payloads.jsonl"
    if not path.exists():
        raise AssertionError("Missing required file: online_feature_payloads.jsonl")
    records = []
    with path.open(encoding="utf-8") as f:
        for line_no, line in enumerate(f, 1):
            if not line.strip():
                continue
            try:
                record = json.loads(line)
            except json.JSONDecodeError as exc:
                raise AssertionError(f"Invalid JSON on line {line_no}: {exc}") from exc
            missing = [field for field in REQUIRED_PAYLOAD_FIELDS if field not in record]
            if missing:
                raise AssertionError(f"Payload missing required field(s) on line {line_no}: {', '.join(missing)}")
            if not isinstance(record["features"], dict) or not record["features"]:
                raise AssertionError(f"Payload features must be a non-empty object on line {line_no}")
            if record["model_feature_service"] != "fraud_risk_service_v3":
                raise AssertionError(f"Unexpected feature service on line {line_no}")
            records.append(record)
    if len(records) < 4:
        raise AssertionError("Online payloads should include at least four sample records")
    entity_types = {record["entity_type"] for record in records}
    if {"customer", "merchant"} - entity_types:
        raise AssertionError("Online payloads should include both customer and merchant entities")


def validate_skew_test_cases() -> None:
    path = ROOT / "skew_test_cases.csv"
    if not path.exists():
        raise AssertionError("Missing required file: skew_test_cases.csv")
    with path.open(newline="", encoding="utf-8") as f:
        rows = list(csv.DictReader(f))
    if not rows:
        raise AssertionError("Skew test cases must contain at least one row")
    missing = [col for col in REQUIRED_SKEW_COLUMNS if col not in rows[0]]
    if missing:
        raise AssertionError(f"Skew test cases missing columns: {', '.join(missing)}")
    results = {row["result"] for row in rows}
    if not {"PASS", "REVIEW"}.issubset(results):
        raise AssertionError("Skew test should include both PASS and REVIEW evidence")
    for row in rows:
        offline_value = float(row["offline_value"])
        online_value = float(row["online_value"])
        tolerance = float(row["tolerance"])
        max_age = int(row["max_age_minutes"])
        if max_age <= 0:
            raise AssertionError(f"max_age_minutes must be positive for case {row['case_id']}")
        difference = abs(offline_value - online_value)
        if row["result"] == "PASS" and difference > tolerance:
            raise AssertionError(f"PASS case exceeds tolerance: {row['case_id']}")
        if len(row["review_note"].strip()) < 15:
            raise AssertionError(f"Review note should be descriptive for case {row['case_id']}")


def validate_serving_release_plan() -> None:
    text = read_text("serving_release_plan.yml")
    missing = [term for term in REQUIRED_SERVING_TERMS if term not in text]
    if missing:
        raise AssertionError(f"Serving plan missing term(s): {', '.join(missing)}")
    if text.count("severity:") < 5:
        raise AssertionError("Serving plan should define severities for monitoring checks")
    for required in ["fraud_risk_xgboost:2.4.1", "fraud_risk_xgboost:2.5.0", "initial_traffic_percent: 5"]:
        if required not in text:
            raise AssertionError(f"Serving plan missing expected rollout detail: {required}")
    match = re.search(r"p95_ms:\s*(\d+)", text)
    if not match or int(match.group(1)) > 200:
        raise AssertionError("Serving plan should define a realistic p95 latency SLO at or below 200 ms")


def main() -> int:
    checks = [
        validate_feature_store_contract,
        validate_online_payloads,
        validate_skew_test_cases,
        validate_serving_release_plan,
    ]
    for check in checks:
        check()
        print(f"PASS {check.__name__}")
    print("All Chapter 19 feature store and model serving checks passed.")
    return 0


if __name__ == "__main__":
    try:
        raise SystemExit(main())
    except Exception as exc:  # noqa: BLE001 - educational validator should print concise failures
        print(f"FAIL {exc}", file=sys.stderr)
        raise SystemExit(1)
