feat(state+runtime+gui): market_snapshots — calibrazione soglie da dati

Sistema dedicato di raccolta dati per scegliere le soglie dei filtri sui percentili reali invece di valori a istinto. Nuovi componenti: * state/migrations/0003_market_snapshots.sql — tabella + index, PK composta (timestamp, asset). Ogni colonna numerica è NULL-able per preservare la continuità della serie quando un singolo MCP fallisce. * state/models.py — MarketSnapshotRecord Pydantic. * state/repository.py — record_market_snapshot, list_market_snapshots, _row_to_market_snapshot. * runtime/market_snapshot_cycle.py — collettore best-effort che chiama spot/dvol/realized_vol/dealer_gamma/funding_perp/funding_cross/ liquidation_heatmap/macro per ogni asset; raccoglie gli errori in fetch_errors_json e segna fetch_ok=false ma persiste comunque la riga. * clients/deribit.py — generalizzati dealer_gamma_profile(currency), realized_vol(currency), spot_perp_price(asset). dealer_gamma_profile_eth resta come alias per la chiamata dell'entry cycle. * runtime/orchestrator.py — nuovo job APScheduler `market_snapshot` cron */15 con assets configurabili (default ETH+BTC); il consumer manual_actions ora dispatcha anche kind=run_cycle cycle=market_snapshot per la GUI. * gui/data_layer.py — load_market_snapshots, enqueue_run_cycle accetta market_snapshot; tipo MarketSnapshotRecord esposto. * gui/pages/6_📐_Calibrazione.py — selezione asset+finestra, conteggio fetch_ok, per ogni metrica: istogramma, soglia da strategy.yaml come vline rossa, percentili P5/P10/P25/P50/P75/P90/P95, % di tick che la soglia avrebbe filtrato. * gui/pages/1_📊_Status.py — bottone "📐 Forza snapshot" (4° del pannello Forza ciclo) per popolare la tabella senza aspettare il cron. 5 nuovi test sul collector (happy, fault tolerance, asset switch, macro fail, empty assets); test_orchestrator job set aggiornato. 368/368 tests pass; ruff clean; mypy strict src clean. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-30 14:39:09 +02:00
parent 63d1aa4262
commit d9454fc996
11 changed files with 956 additions and 7 deletions
@@ -32,6 +32,7 @@ from cerbero_bite.state import Repository, connect, transaction
 from cerbero_bite.state.models import (
    DecisionRecord,
    ManualAction,
+    MarketSnapshotRecord,
    PositionRecord,
    SystemStateRecord,
 )
@@ -61,6 +62,7 @@ __all__ = [
    "load_closed_positions",
    "load_decisions_for_position",
    "load_engine_snapshot",
+    "load_market_snapshots",
    "load_open_positions",
    "load_pending_manual_actions",
    "load_position_by_id",
@@ -634,9 +636,10 @@ def enqueue_run_cycle(
    method on the next minute tick.
    """
    cycle_norm = cycle.strip().lower()
-    if cycle_norm not in {"entry", "monitor", "health"}:
+    if cycle_norm not in {"entry", "monitor", "health", "market_snapshot"}:
        raise ValueError(
-            f"cycle must be entry|monitor|health, got '{cycle}'"
+            f"cycle must be entry|monitor|health|market_snapshot, "
+            f"got '{cycle}'"
        )
    return _enqueue_action(
        db_path=db_path,
@@ -645,6 +648,28 @@ def enqueue_run_cycle(
    )


+def load_market_snapshots(
+    *,
+    asset: str,
+    db_path: Path | str = DEFAULT_DB_PATH,
+    start: datetime | None = None,
+    end: datetime | None = None,
+    limit: int = 5000,
+) -> list[MarketSnapshotRecord]:
+    """Return market_snapshots rows for the asset, newest-first."""
+    db_path = Path(db_path)
+    if not db_path.exists():
+        return []
+    repo = Repository()
+    conn = connect(db_path)
+    try:
+        return repo.list_market_snapshots(
+            conn, asset=asset, start=start, end=end, limit=limit
+        )
+    finally:
+        conn.close()
+
+
 def load_pending_manual_actions(
    *, db_path: Path | str = DEFAULT_DB_PATH
 ) -> list[ManualAction]:
@@ -47,7 +47,7 @@ def _render_force_cycle_panel(db_path: Path) -> None:
        "solo se il motore è in esecuzione (`cerbero-bite start`); il job "
        "`manual_actions` consuma la coda ogni minuto."
    )
-    cols = st.columns(3)
+    cols = st.columns(4)
    if cols[0].button(
        "▶ Forza entry",
        use_container_width=True,
@@ -72,6 +72,13 @@ def _render_force_cycle_panel(db_path: Path) -> None:
    ):
        aid = enqueue_run_cycle(cycle="health", db_path=db_path)
        st.success(f"✅ ciclo health accodato (id #{aid}).")
+    if cols[3].button(
+        "📐 Forza snapshot",
+        use_container_width=True,
+        help="Esegue subito una raccolta market_snapshot (alimenta Calibrazione).",
+    ):
+        aid = enqueue_run_cycle(cycle="market_snapshot", db_path=db_path)
+        st.success(f"✅ snapshot accodato (id #{aid}).")


@st.cache_data(ttl=60, show_spinner=False)
@@ -0,0 +1,309 @@
+"""Calibrazione page — distribuzioni storiche dei segnali per tarare le soglie.
+
+Legge dalla tabella ``market_snapshots`` (popolata dal job dedicato cron
+``*/15``). Per ogni metrica osservabile mostra:
+
+* istogramma + linea verticale della soglia attuale di config,
+* percentili P5/P10/P25/P50/P75/P90/P95,
+* percentuale di tick che la soglia attuale avrebbe filtrato.
+
+L'idea è scegliere le soglie sui percentili reali del proprio
+ambiente (testnet o mainnet), invece di valori fissati a istinto.
+"""
+
+from __future__ import annotations
+
+import os
+from dataclasses import dataclass
+from datetime import UTC, datetime, timedelta
+from pathlib import Path
+
+import pandas as pd
+import plotly.graph_objects as go
+import streamlit as st
+
+from cerbero_bite.config.loader import load_strategy
+from cerbero_bite.gui.data_layer import (
+    DEFAULT_DB_PATH,
+    humanize_dt,
+    load_market_snapshots,
+)
+from cerbero_bite.state.models import MarketSnapshotRecord
+
+
+def _resolve_db() -> Path:
+    return Path(os.environ.get("CERBERO_BITE_GUI_DB", DEFAULT_DB_PATH))
+
+
+@dataclass(frozen=True)
+class MetricSpec:
+    """Descrittore della metrica da plottare."""
+
+    field: str
+    title: str
+    unit: str
+    threshold_label: str | None
+    threshold_value: float | None
+    threshold_direction: str  # "below" o "above"  (filtra se valore è X soglia)
+
+
+def _metric_specs(strategy: object | None) -> list[MetricSpec]:
+    """Costruisce gli spec leggendo le soglie correnti da strategy.yaml."""
+    funding_max: float | None = None
+    dealer_min: float | None = None
+    dvol_min: float | None = None
+    if strategy is not None:
+        try:
+            funding_max = float(strategy.entry.funding_max_abs_annualized)  # type: ignore[attr-defined]
+        except Exception:
+            funding_max = None
+        try:
+            dealer_min = float(strategy.entry.dealer_gamma_min)  # type: ignore[attr-defined]
+        except Exception:
+            dealer_min = None
+        try:
+            dvol_min = float(strategy.entry.dvol_min)  # type: ignore[attr-defined]
+        except Exception:
+            dvol_min = None
+
+    specs: list[MetricSpec] = [
+        MetricSpec(
+            field="dvol",
+            title="DVOL",
+            unit="%",
+            threshold_label=(
+                f"DVOL min={dvol_min:.0f}" if dvol_min is not None else None
+            ),
+            threshold_value=dvol_min,
+            threshold_direction="below",
+        ),
+        MetricSpec(
+            field="realized_vol_30d",
+            title="Realized vol 30d",
+            unit="%",
+            threshold_label=None,
+            threshold_value=None,
+            threshold_direction="below",
+        ),
+        MetricSpec(
+            field="iv_minus_rv",
+            title="IV − RV (30d)",
+            unit="%",
+            threshold_label=None,
+            threshold_value=None,
+            threshold_direction="below",
+        ),
+        MetricSpec(
+            field="funding_perp_annualized",
+            title="Funding perp annualized",
+            unit="frazione",
+            threshold_label=(
+                f"|funding| max={funding_max:.2f}"
+                if funding_max is not None
+                else None
+            ),
+            threshold_value=funding_max,
+            threshold_direction="above_abs",
+        ),
+        MetricSpec(
+            field="funding_cross_annualized",
+            title="Funding cross median annualized",
+            unit="frazione",
+            threshold_label=None,
+            threshold_value=None,
+            threshold_direction="above_abs",
+        ),
+        MetricSpec(
+            field="dealer_net_gamma",
+            title="Dealer net gamma",
+            unit="USD",
+            threshold_label=(
+                f"min={dealer_min:.0f}"
+                if dealer_min is not None
+                else None
+            ),
+            threshold_value=dealer_min,
+            threshold_direction="below",
+        ),
+        MetricSpec(
+            field="oi_delta_pct_4h",
+            title="OI delta % (4h)",
+            unit="%",
+            threshold_label=None,
+            threshold_value=None,
+            threshold_direction="below",
+        ),
+    ]
+    return specs
+
+
+def _series(records: list[MarketSnapshotRecord], field: str) -> pd.Series:
+    values: list[float] = []
+    for r in records:
+        v = getattr(r, field, None)
+        if v is None:
+            continue
+        try:
+            values.append(float(v))
+        except (TypeError, ValueError):
+            continue
+    return pd.Series(values, dtype="float64")
+
+
+def _percent_blocked(s: pd.Series, spec: MetricSpec) -> float | None:
+    if spec.threshold_value is None or s.empty:
+        return None
+    if spec.threshold_direction == "below":
+        return float((s < spec.threshold_value).mean())
+    if spec.threshold_direction == "above_abs":
+        return float((s.abs() > spec.threshold_value).mean())
+    if spec.threshold_direction == "above":
+        return float((s > spec.threshold_value).mean())
+    return None
+
+
+def _percentiles_strip(s: pd.Series) -> None:
+    if s.empty:
+        st.caption("(nessun dato)")
+        return
+    quantiles = [0.05, 0.10, 0.25, 0.50, 0.75, 0.90, 0.95]
+    cols = st.columns(len(quantiles))
+    for col, q in zip(cols, quantiles, strict=False):
+        col.metric(f"P{int(q * 100)}", f"{s.quantile(q):.4g}")
+
+
+def _render_metric(spec: MetricSpec, records: list[MarketSnapshotRecord]) -> None:
+    s = _series(records, spec.field)
+    if s.empty:
+        st.subheader(f"{spec.title}")
+        st.info(
+            f"Nessun valore disponibile per `{spec.field}`. "
+            "Avvia il job `market_snapshot` (engine attivo, cron */15) per "
+            "popolare la tabella."
+        )
+        return
+
+    st.subheader(f"{spec.title} ({spec.unit})")
+
+    pct_blocked = _percent_blocked(s, spec)
+    cols = st.columns(4)
+    cols[0].metric("Tick raccolti", len(s))
+    cols[1].metric("Min", f"{s.min():.4g}")
+    cols[2].metric("Max", f"{s.max():.4g}")
+    cols[3].metric(
+        "% bloccato dalla soglia",
+        f"{pct_blocked:.0%}" if pct_blocked is not None else "—",
+        help=(
+            "Frazione di tick che la soglia di config avrebbe filtrato"
+            f" se applicata a questa serie ({spec.threshold_direction})."
+        ),
+    )
+
+    fig = go.Figure()
+    fig.add_trace(go.Histogram(x=s, nbinsx=40, opacity=0.85, name="distrib."))
+    if spec.threshold_value is not None:
+        fig.add_vline(
+            x=spec.threshold_value,
+            line_dash="dash",
+            line_color="red",
+            line_width=2,
+            annotation_text=spec.threshold_label or f"soglia {spec.threshold_value}",
+            annotation_position="top",
+        )
+        if spec.threshold_direction == "above_abs":
+            # Disegna anche il bound negativo per i filtri simmetrici.
+            fig.add_vline(
+                x=-spec.threshold_value,
+                line_dash="dash",
+                line_color="red",
+                line_width=2,
+                annotation_text=None,
+            )
+    fig.update_layout(
+        height=280,
+        margin={"l": 10, "r": 10, "t": 30, "b": 10},
+        xaxis_title=spec.unit,
+        yaxis_title="numero tick",
+    )
+    st.plotly_chart(fig, use_container_width=True)
+
+    _percentiles_strip(s)
+
+
+def render() -> None:
+    st.title("📐 Calibrazione")
+    st.caption(
+        "Distribuzioni storiche dei segnali raccolti dal job "
+        "`market_snapshot` (cron */15). Usa i percentili reali per "
+        "tarare le soglie in `strategy.yaml` invece di valori a istinto."
+    )
+
+    db_path = _resolve_db()
+
+    col_a, col_b = st.columns(2)
+    asset = col_a.selectbox("Asset", options=["ETH", "BTC"], index=0)
+    window = col_b.selectbox(
+        "Finestra",
+        options=[
+            "Tutto lo storico",
+            "Ultime 24h",
+            "Ultimi 7 giorni",
+            "Ultimi 30 giorni",
+        ],
+        index=0,
+    )
+
+    now = datetime.now(UTC)
+    start: datetime | None = None
+    if window == "Ultime 24h":
+        start = now - timedelta(hours=24)
+    elif window == "Ultimi 7 giorni":
+        start = now - timedelta(days=7)
+    elif window == "Ultimi 30 giorni":
+        start = now - timedelta(days=30)
+
+    records = load_market_snapshots(
+        asset=asset, db_path=db_path, start=start, limit=5000
+    )
+
+    if not records:
+        st.info(
+            "Nessun snapshot disponibile in questa finestra per "
+            f"`{asset}`. Avvia l'engine (`cerbero-bite start`) e attendi "
+            "almeno un tick del job `market_snapshot` (cron */15)."
+        )
+        return
+
+    st.caption(
+        f"{len(records)} snapshot · primo {humanize_dt(records[-1].timestamp)} "
+        f"· ultimo {humanize_dt(records[0].timestamp)}"
+    )
+
+    # Conteggio fetch_ok per qualità delle serie
+    n_ok = sum(1 for r in records if r.fetch_ok)
+    cols = st.columns(3)
+    cols[0].metric("Snapshot totali", len(records))
+    cols[1].metric("fetch_ok = true", n_ok)
+    cols[2].metric(
+        "Tasso ok",
+        f"{n_ok / len(records):.0%}" if records else "—",
+    )
+    st.divider()
+
+    # Carica strategy.yaml per leggere le soglie correnti
+    try:
+        strategy = load_strategy(Path("strategy.yaml"))
+    except Exception as exc:
+        st.warning(
+            f"Impossibile leggere `strategy.yaml`: {type(exc).__name__}: {exc}"
+        )
+        strategy = None
+
+    specs = _metric_specs(strategy)
+
+    for spec in specs:
+        _render_metric(spec, records)
+        st.divider()
+
+
+render()