feat(state+runtime+gui): market_snapshots — calibrazione soglie da dati

Sistema dedicato di raccolta dati per scegliere le soglie dei filtri
sui percentili reali invece di valori a istinto.

Nuovi componenti:

* state/migrations/0003_market_snapshots.sql — tabella + index, PK
  composta (timestamp, asset). Ogni colonna numerica è NULL-able per
  preservare la continuità della serie quando un singolo MCP fallisce.
* state/models.py — MarketSnapshotRecord Pydantic.
* state/repository.py — record_market_snapshot, list_market_snapshots,
  _row_to_market_snapshot.
* runtime/market_snapshot_cycle.py — collettore best-effort che chiama
  spot/dvol/realized_vol/dealer_gamma/funding_perp/funding_cross/
  liquidation_heatmap/macro per ogni asset; raccoglie gli errori in
  fetch_errors_json e segna fetch_ok=false ma persiste comunque la
  riga.
* clients/deribit.py — generalizzati dealer_gamma_profile(currency),
  realized_vol(currency), spot_perp_price(asset). dealer_gamma_profile_eth
  resta come alias per la chiamata dell'entry cycle.
* runtime/orchestrator.py — nuovo job APScheduler `market_snapshot`
  cron */15 con assets configurabili (default ETH+BTC); il consumer
  manual_actions ora dispatcha anche kind=run_cycle cycle=market_snapshot
  per la GUI.
* gui/data_layer.py — load_market_snapshots, enqueue_run_cycle accetta
  market_snapshot; tipo MarketSnapshotRecord esposto.
* gui/pages/6_📐_Calibrazione.py — selezione asset+finestra, conteggio
  fetch_ok, per ogni metrica: istogramma, soglia da strategy.yaml come
  vline rossa, percentili P5/P10/P25/P50/P75/P90/P95, % di tick che la
  soglia avrebbe filtrato.
* gui/pages/1_📊_Status.py — bottone "📐 Forza snapshot" (4° del pannello
  Forza ciclo) per popolare la tabella senza aspettare il cron.

5 nuovi test sul collector (happy, fault tolerance, asset switch,
macro fail, empty assets); test_orchestrator job set aggiornato.
368/368 tests pass; ruff clean; mypy strict src clean.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-30 14:39:09 +02:00
parent 63d1aa4262
commit d9454fc996
11 changed files with 956 additions and 7 deletions
+27 -2
View File
@@ -32,6 +32,7 @@ from cerbero_bite.state import Repository, connect, transaction
from cerbero_bite.state.models import (
DecisionRecord,
ManualAction,
MarketSnapshotRecord,
PositionRecord,
SystemStateRecord,
)
@@ -61,6 +62,7 @@ __all__ = [
"load_closed_positions",
"load_decisions_for_position",
"load_engine_snapshot",
"load_market_snapshots",
"load_open_positions",
"load_pending_manual_actions",
"load_position_by_id",
@@ -634,9 +636,10 @@ def enqueue_run_cycle(
method on the next minute tick.
"""
cycle_norm = cycle.strip().lower()
if cycle_norm not in {"entry", "monitor", "health"}:
if cycle_norm not in {"entry", "monitor", "health", "market_snapshot"}:
raise ValueError(
f"cycle must be entry|monitor|health, got '{cycle}'"
f"cycle must be entry|monitor|health|market_snapshot, "
f"got '{cycle}'"
)
return _enqueue_action(
db_path=db_path,
@@ -645,6 +648,28 @@ def enqueue_run_cycle(
)
def load_market_snapshots(
*,
asset: str,
db_path: Path | str = DEFAULT_DB_PATH,
start: datetime | None = None,
end: datetime | None = None,
limit: int = 5000,
) -> list[MarketSnapshotRecord]:
"""Return market_snapshots rows for the asset, newest-first."""
db_path = Path(db_path)
if not db_path.exists():
return []
repo = Repository()
conn = connect(db_path)
try:
return repo.list_market_snapshots(
conn, asset=asset, start=start, end=end, limit=limit
)
finally:
conn.close()
def load_pending_manual_actions(
*, db_path: Path | str = DEFAULT_DB_PATH
) -> list[ManualAction]:
+8 -1
View File
@@ -47,7 +47,7 @@ def _render_force_cycle_panel(db_path: Path) -> None:
"solo se il motore è in esecuzione (`cerbero-bite start`); il job "
"`manual_actions` consuma la coda ogni minuto."
)
cols = st.columns(3)
cols = st.columns(4)
if cols[0].button(
"▶ Forza entry",
use_container_width=True,
@@ -72,6 +72,13 @@ def _render_force_cycle_panel(db_path: Path) -> None:
):
aid = enqueue_run_cycle(cycle="health", db_path=db_path)
st.success(f"✅ ciclo health accodato (id #{aid}).")
if cols[3].button(
"📐 Forza snapshot",
use_container_width=True,
help="Esegue subito una raccolta market_snapshot (alimenta Calibrazione).",
):
aid = enqueue_run_cycle(cycle="market_snapshot", db_path=db_path)
st.success(f"✅ snapshot accodato (id #{aid}).")
@st.cache_data(ttl=60, show_spinner=False)
@@ -0,0 +1,309 @@
"""Calibrazione page — distribuzioni storiche dei segnali per tarare le soglie.
Legge dalla tabella ``market_snapshots`` (popolata dal job dedicato cron
``*/15``). Per ogni metrica osservabile mostra:
* istogramma + linea verticale della soglia attuale di config,
* percentili P5/P10/P25/P50/P75/P90/P95,
* percentuale di tick che la soglia attuale avrebbe filtrato.
L'idea è scegliere le soglie sui percentili reali del proprio
ambiente (testnet o mainnet), invece di valori fissati a istinto.
"""
from __future__ import annotations
import os
from dataclasses import dataclass
from datetime import UTC, datetime, timedelta
from pathlib import Path
import pandas as pd
import plotly.graph_objects as go
import streamlit as st
from cerbero_bite.config.loader import load_strategy
from cerbero_bite.gui.data_layer import (
DEFAULT_DB_PATH,
humanize_dt,
load_market_snapshots,
)
from cerbero_bite.state.models import MarketSnapshotRecord
def _resolve_db() -> Path:
return Path(os.environ.get("CERBERO_BITE_GUI_DB", DEFAULT_DB_PATH))
@dataclass(frozen=True)
class MetricSpec:
"""Descrittore della metrica da plottare."""
field: str
title: str
unit: str
threshold_label: str | None
threshold_value: float | None
threshold_direction: str # "below" o "above" (filtra se valore è X soglia)
def _metric_specs(strategy: object | None) -> list[MetricSpec]:
"""Costruisce gli spec leggendo le soglie correnti da strategy.yaml."""
funding_max: float | None = None
dealer_min: float | None = None
dvol_min: float | None = None
if strategy is not None:
try:
funding_max = float(strategy.entry.funding_max_abs_annualized) # type: ignore[attr-defined]
except Exception:
funding_max = None
try:
dealer_min = float(strategy.entry.dealer_gamma_min) # type: ignore[attr-defined]
except Exception:
dealer_min = None
try:
dvol_min = float(strategy.entry.dvol_min) # type: ignore[attr-defined]
except Exception:
dvol_min = None
specs: list[MetricSpec] = [
MetricSpec(
field="dvol",
title="DVOL",
unit="%",
threshold_label=(
f"DVOL min={dvol_min:.0f}" if dvol_min is not None else None
),
threshold_value=dvol_min,
threshold_direction="below",
),
MetricSpec(
field="realized_vol_30d",
title="Realized vol 30d",
unit="%",
threshold_label=None,
threshold_value=None,
threshold_direction="below",
),
MetricSpec(
field="iv_minus_rv",
title="IV RV (30d)",
unit="%",
threshold_label=None,
threshold_value=None,
threshold_direction="below",
),
MetricSpec(
field="funding_perp_annualized",
title="Funding perp annualized",
unit="frazione",
threshold_label=(
f"|funding| max={funding_max:.2f}"
if funding_max is not None
else None
),
threshold_value=funding_max,
threshold_direction="above_abs",
),
MetricSpec(
field="funding_cross_annualized",
title="Funding cross median annualized",
unit="frazione",
threshold_label=None,
threshold_value=None,
threshold_direction="above_abs",
),
MetricSpec(
field="dealer_net_gamma",
title="Dealer net gamma",
unit="USD",
threshold_label=(
f"min={dealer_min:.0f}"
if dealer_min is not None
else None
),
threshold_value=dealer_min,
threshold_direction="below",
),
MetricSpec(
field="oi_delta_pct_4h",
title="OI delta % (4h)",
unit="%",
threshold_label=None,
threshold_value=None,
threshold_direction="below",
),
]
return specs
def _series(records: list[MarketSnapshotRecord], field: str) -> pd.Series:
values: list[float] = []
for r in records:
v = getattr(r, field, None)
if v is None:
continue
try:
values.append(float(v))
except (TypeError, ValueError):
continue
return pd.Series(values, dtype="float64")
def _percent_blocked(s: pd.Series, spec: MetricSpec) -> float | None:
if spec.threshold_value is None or s.empty:
return None
if spec.threshold_direction == "below":
return float((s < spec.threshold_value).mean())
if spec.threshold_direction == "above_abs":
return float((s.abs() > spec.threshold_value).mean())
if spec.threshold_direction == "above":
return float((s > spec.threshold_value).mean())
return None
def _percentiles_strip(s: pd.Series) -> None:
if s.empty:
st.caption("(nessun dato)")
return
quantiles = [0.05, 0.10, 0.25, 0.50, 0.75, 0.90, 0.95]
cols = st.columns(len(quantiles))
for col, q in zip(cols, quantiles, strict=False):
col.metric(f"P{int(q * 100)}", f"{s.quantile(q):.4g}")
def _render_metric(spec: MetricSpec, records: list[MarketSnapshotRecord]) -> None:
s = _series(records, spec.field)
if s.empty:
st.subheader(f"{spec.title}")
st.info(
f"Nessun valore disponibile per `{spec.field}`. "
"Avvia il job `market_snapshot` (engine attivo, cron */15) per "
"popolare la tabella."
)
return
st.subheader(f"{spec.title} ({spec.unit})")
pct_blocked = _percent_blocked(s, spec)
cols = st.columns(4)
cols[0].metric("Tick raccolti", len(s))
cols[1].metric("Min", f"{s.min():.4g}")
cols[2].metric("Max", f"{s.max():.4g}")
cols[3].metric(
"% bloccato dalla soglia",
f"{pct_blocked:.0%}" if pct_blocked is not None else "",
help=(
"Frazione di tick che la soglia di config avrebbe filtrato"
f" se applicata a questa serie ({spec.threshold_direction})."
),
)
fig = go.Figure()
fig.add_trace(go.Histogram(x=s, nbinsx=40, opacity=0.85, name="distrib."))
if spec.threshold_value is not None:
fig.add_vline(
x=spec.threshold_value,
line_dash="dash",
line_color="red",
line_width=2,
annotation_text=spec.threshold_label or f"soglia {spec.threshold_value}",
annotation_position="top",
)
if spec.threshold_direction == "above_abs":
# Disegna anche il bound negativo per i filtri simmetrici.
fig.add_vline(
x=-spec.threshold_value,
line_dash="dash",
line_color="red",
line_width=2,
annotation_text=None,
)
fig.update_layout(
height=280,
margin={"l": 10, "r": 10, "t": 30, "b": 10},
xaxis_title=spec.unit,
yaxis_title="numero tick",
)
st.plotly_chart(fig, use_container_width=True)
_percentiles_strip(s)
def render() -> None:
st.title("📐 Calibrazione")
st.caption(
"Distribuzioni storiche dei segnali raccolti dal job "
"`market_snapshot` (cron */15). Usa i percentili reali per "
"tarare le soglie in `strategy.yaml` invece di valori a istinto."
)
db_path = _resolve_db()
col_a, col_b = st.columns(2)
asset = col_a.selectbox("Asset", options=["ETH", "BTC"], index=0)
window = col_b.selectbox(
"Finestra",
options=[
"Tutto lo storico",
"Ultime 24h",
"Ultimi 7 giorni",
"Ultimi 30 giorni",
],
index=0,
)
now = datetime.now(UTC)
start: datetime | None = None
if window == "Ultime 24h":
start = now - timedelta(hours=24)
elif window == "Ultimi 7 giorni":
start = now - timedelta(days=7)
elif window == "Ultimi 30 giorni":
start = now - timedelta(days=30)
records = load_market_snapshots(
asset=asset, db_path=db_path, start=start, limit=5000
)
if not records:
st.info(
"Nessun snapshot disponibile in questa finestra per "
f"`{asset}`. Avvia l'engine (`cerbero-bite start`) e attendi "
"almeno un tick del job `market_snapshot` (cron */15)."
)
return
st.caption(
f"{len(records)} snapshot · primo {humanize_dt(records[-1].timestamp)} "
f"· ultimo {humanize_dt(records[0].timestamp)}"
)
# Conteggio fetch_ok per qualità delle serie
n_ok = sum(1 for r in records if r.fetch_ok)
cols = st.columns(3)
cols[0].metric("Snapshot totali", len(records))
cols[1].metric("fetch_ok = true", n_ok)
cols[2].metric(
"Tasso ok",
f"{n_ok / len(records):.0%}" if records else "",
)
st.divider()
# Carica strategy.yaml per leggere le soglie correnti
try:
strategy = load_strategy(Path("strategy.yaml"))
except Exception as exc:
st.warning(
f"Impossibile leggere `strategy.yaml`: {type(exc).__name__}: {exc}"
)
strategy = None
specs = _metric_specs(strategy)
for spec in specs:
_render_metric(spec, records)
st.divider()
render()