#!/usr/bin/env python3
"""Generate deterministic Customer 360 source data for Chapter 21.

The dataset is synthetic but designed to resemble common retail and digital
engagement signals: customer master records, orders, clickstream sessions,
support tickets, campaign events, and consent changes. The script uses only the
Python standard library so readers can run it in a clean environment.
"""

from __future__ import annotations

import csv
import random
from datetime import datetime, timedelta
from pathlib import Path

BASE = Path(__file__).resolve().parents[1]
RAW = BASE / "data" / "raw"
RAW.mkdir(parents=True, exist_ok=True)

random.seed(21021)
NOW = datetime(2026, 5, 19, 9, 0, 0)
COUNTRIES = ["UZ", "KZ", "TJ", "KG", "TR"]
CHANNELS = ["web", "mobile", "store", "marketplace"]
CATEGORIES = ["electronics", "books", "home", "fashion", "beauty", "sports"]
CAMPAIGNS = ["welcome", "winback", "loyalty", "cross_sell", "seasonal"]


def write_csv(name: str, rows: list[dict]) -> None:
    if not rows:
        return
    with (RAW / name).open("w", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=list(rows[0].keys()))
        writer.writeheader()
        writer.writerows(rows)


def generate_customers(n: int = 120) -> list[dict]:
    rows = []
    for i in range(1, n + 1):
        created = NOW - timedelta(days=random.randint(5, 720))
        rows.append({
            "customer_id": f"C{i:05d}",
            "email_hash": f"email_{i:04d}_hash",
            "loyalty_id": f"L{i:05d}" if random.random() < 0.68 else "",
            "phone_hash": f"phone_{random.randint(1, 115):04d}_hash",
            "country": random.choices(COUNTRIES, weights=[55, 18, 10, 7, 10])[0],
            "preferred_channel": random.choice(CHANNELS),
            "created_at": created.isoformat(),
            "updated_at": (created + timedelta(days=random.randint(0, 60))).isoformat(),
        })
    return rows


def generate_orders(customers: list[dict]) -> list[dict]:
    rows, order_id = [], 1
    for c in customers:
        count = random.choices(range(11), weights=[8, 12, 14, 16, 14, 10, 8, 6, 5, 4, 3])[0]
        for _ in range(count):
            event_time = NOW - timedelta(days=random.randint(0, 180), hours=random.randint(0, 23), minutes=random.randint(0, 59))
            rows.append({
                "order_id": f"O{order_id:07d}",
                "customer_id": c["customer_id"],
                "event_time": event_time.isoformat(),
                "category": random.choice(CATEGORIES),
                "amount_usd": round(max(8, random.gauss(65, 35)), 2),
                "status": random.choices(["paid", "returned", "cancelled"], weights=[88, 7, 5])[0],
                "channel": random.choice(CHANNELS),
            })
            order_id += 1
    return rows


def generate_clickstream(customers: list[dict]) -> list[dict]:
    rows, event_id = [], 1
    actions = ["view", "search", "add_to_cart", "checkout_start", "recommendation_click"]
    for c in customers:
        for _ in range(random.randint(4, 35)):
            event_time = NOW - timedelta(days=random.randint(0, 45), seconds=random.randint(0, 86400))
            rows.append({
                "event_id": f"E{event_id:08d}",
                "customer_id": c["customer_id"] if random.random() < 0.86 else "",
                "cookie_id": f"cookie_{random.randint(1, 155):04d}",
                "event_time": event_time.isoformat(),
                "action": random.choices(actions, weights=[52, 22, 12, 7, 7])[0],
                "category": random.choice(CATEGORIES),
                "session_id": f"S{random.randint(1, 2200):06d}",
            })
            event_id += 1
    return rows


def generate_support_tickets(customers: list[dict]) -> list[dict]:
    rows, ticket_id = [], 1
    reasons = ["delivery", "refund", "product_question", "account", "payment"]
    for c in customers:
        for _ in range(random.choices(range(5), weights=[55, 24, 12, 6, 3])[0]):
            created = NOW - timedelta(days=random.randint(0, 120), hours=random.randint(0, 23))
            rows.append({
                "ticket_id": f"T{ticket_id:06d}",
                "customer_id": c["customer_id"],
                "created_at": created.isoformat(),
                "reason": random.choice(reasons),
                "priority": random.choices(["low", "medium", "high"], weights=[55, 35, 10])[0],
                "resolved_hours": round(max(1, random.gauss(18, 11)), 1),
                "csat": random.choices([1, 2, 3, 4, 5], weights=[4, 6, 15, 38, 37])[0],
            })
            ticket_id += 1
    return rows


def generate_campaign_events(customers: list[dict]) -> list[dict]:
    rows, event_id = [], 1
    for c in customers:
        for _ in range(random.randint(1, 6)):
            sent = NOW - timedelta(days=random.randint(0, 90), hours=random.randint(0, 23))
            opened = random.random() < 0.43
            clicked = opened and random.random() < 0.21
            rows.append({
                "campaign_event_id": f"M{event_id:07d}",
                "customer_id": c["customer_id"],
                "campaign": random.choice(CAMPAIGNS),
                "sent_at": sent.isoformat(),
                "opened_at": (sent + timedelta(hours=random.randint(1, 72))).isoformat() if opened else "",
                "clicked_at": (sent + timedelta(hours=random.randint(1, 96))).isoformat() if clicked else "",
            })
            event_id += 1
    return rows


def generate_consent_events(customers: list[dict]) -> list[dict]:
    rows, event_id = [], 1
    for c in customers:
        base = NOW - timedelta(days=random.randint(0, 365))
        rows.append({
            "consent_event_id": f"P{event_id:07d}",
            "customer_id": c["customer_id"],
            "event_time": base.isoformat(),
            "marketing_consent": str(random.random() < 0.78).lower(),
            "personalization_consent": str(random.random() < 0.84).lower(),
            "source": random.choice(["checkout", "profile", "campaign", "support"]),
        })
        event_id += 1
    return rows


def main() -> None:
    customers = generate_customers()
    write_csv("customers.csv", customers)
    write_csv("orders.csv", generate_orders(customers))
    write_csv("clickstream.csv", generate_clickstream(customers))
    write_csv("support_tickets.csv", generate_support_tickets(customers))
    write_csv("campaign_events.csv", generate_campaign_events(customers))
    write_csv("consent_events.csv", generate_consent_events(customers))
    print(f"Generated source data in {RAW}")


if __name__ == "__main__":
    main()
