Phase 4: orchestrator + cycles auto-execute
Componente runtime/ che cabla core+clients+state+safety in un engine autonomo notify-only: nessuna conferma manuale, ordini combo piazzati direttamente quando le regole passano. 311 test pass, copertura totale 94%, runtime/ 90%, mypy strict pulito, ruff clean. Moduli: - runtime/alert_manager.py: escalation tree LOW/MEDIUM/HIGH/CRITICAL → audit + Telegram + kill switch. - runtime/dependencies.py: build_runtime() costruisce RuntimeContext con tutti i client MCP, repository, audit log, kill switch, alert manager. - runtime/entry_cycle.py: flusso settimanale (snapshot parallelo spot/dvol/funding/macro/holdings/equity → validate_entry → compute_bias → options_chain → select_strikes → liquidity_gate → sizing_engine → combo_builder.build → place_combo_order → notify_position_opened). - runtime/monitor_cycle.py: loop 12h con dvol_history per il return_4h, exit_decision.evaluate, close auto-execute. - runtime/health_check.py: probe parallelo MCP + SQLite + environment match; 3 strikes consecutivi → kill switch HIGH. - runtime/recovery.py: riconciliazione SQLite vs broker all'avvio; mismatch → kill switch CRITICAL. - runtime/scheduler.py: AsyncIOScheduler builder con cron entry (lun 14:00), monitor (02/14), health (5min). - runtime/orchestrator.py: façade boot() + run_entry/monitor/health + install_scheduler + run_forever, con env check vs strategy. CLI: - start: avvia engine bloccante (asyncio.run + scheduler). - dry-run --cycle entry|monitor|health: esegue un singolo ciclo per debug/test in produzione. - stop: documenta lo shutdown via SIGTERM al container. Documentazione: - docs/06-operational-flow.md riscritto per il modello notify-only auto-execute (no conferma manuale, no memory, no brain-bridge). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,128 @@
|
||||
"""Periodic health probe across MCP services + SQLite + environment.
|
||||
|
||||
The probe is fail-soft: every check is wrapped in a try/except so a
|
||||
single misbehaving service does not abort the others. The orchestrator
|
||||
keeps a counter of consecutive failures: at the third failure the
|
||||
kill switch arms (HIGH severity); any time the probe succeeds the
|
||||
counter resets and a fresh ``HEALTH_OK`` line is appended to the
|
||||
audit log so the dead-man watcher stays quiet.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
from dataclasses import dataclass
|
||||
from datetime import UTC, datetime
|
||||
from typing import Literal
|
||||
|
||||
from cerbero_bite.runtime.alert_manager import Severity
|
||||
from cerbero_bite.runtime.dependencies import RuntimeContext
|
||||
from cerbero_bite.state import connect
|
||||
|
||||
__all__ = ["HealthCheck", "HealthCheckResult", "HealthState"]
|
||||
|
||||
|
||||
_log = logging.getLogger("cerbero_bite.runtime.health")
|
||||
|
||||
|
||||
HealthState = Literal["ok", "degraded"]
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class HealthCheckResult:
|
||||
state: HealthState
|
||||
failures: list[tuple[str, str]] # [(service, reason), ...]
|
||||
consecutive_failures: int
|
||||
|
||||
|
||||
class HealthCheck:
|
||||
"""Stateful health probe; remembers consecutive failures across calls."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
ctx: RuntimeContext,
|
||||
*,
|
||||
expected_environment: Literal["testnet", "mainnet"],
|
||||
kill_after: int = 3,
|
||||
) -> None:
|
||||
self._ctx = ctx
|
||||
self._expected = expected_environment
|
||||
self._kill_after = kill_after
|
||||
self._consecutive = 0
|
||||
|
||||
async def run(self, *, now: datetime | None = None) -> HealthCheckResult:
|
||||
when = (now or self._ctx.clock()).astimezone(UTC)
|
||||
failures: list[tuple[str, str]] = []
|
||||
|
||||
async def _probe(service: str, coro: object) -> None:
|
||||
try:
|
||||
await coro # type: ignore[misc]
|
||||
except Exception as exc: # surface every error to the operator
|
||||
failures.append((service, f"{type(exc).__name__}: {exc}"))
|
||||
|
||||
await asyncio.gather(
|
||||
_probe("deribit", self._probe_deribit()),
|
||||
_probe("macro", self._ctx.macro.get_calendar(days=1)),
|
||||
_probe("sentiment", self._probe_sentiment()),
|
||||
_probe("hyperliquid", self._ctx.hyperliquid.funding_rate_annualized("ETH")),
|
||||
_probe("portfolio", self._ctx.portfolio.total_equity_eur()),
|
||||
)
|
||||
|
||||
# SQLite health: lightweight transaction.
|
||||
try:
|
||||
conn = connect(self._ctx.db_path)
|
||||
try:
|
||||
self._ctx.repository.touch_health_check(conn, now=when)
|
||||
finally:
|
||||
conn.close()
|
||||
except Exception as exc: # pragma: no cover — sqlite errors are rare
|
||||
failures.append(("sqlite", f"{type(exc).__name__}: {exc}"))
|
||||
|
||||
if failures:
|
||||
self._consecutive += 1
|
||||
state: HealthState = "degraded"
|
||||
self._ctx.audit_log.append(
|
||||
event="HEALTH_DEGRADED",
|
||||
payload={
|
||||
"failures": failures,
|
||||
"consecutive": self._consecutive,
|
||||
},
|
||||
now=when,
|
||||
)
|
||||
if self._consecutive >= self._kill_after:
|
||||
await self._ctx.alert_manager.emit(
|
||||
Severity.HIGH,
|
||||
source="health_check",
|
||||
message=(
|
||||
f"{self._consecutive} consecutive health-check failures "
|
||||
f"(latest: {failures})"
|
||||
),
|
||||
)
|
||||
else:
|
||||
self._consecutive = 0
|
||||
state = "ok"
|
||||
self._ctx.audit_log.append(
|
||||
event="HEALTH_OK", payload={}, now=when
|
||||
)
|
||||
|
||||
return HealthCheckResult(
|
||||
state=state,
|
||||
failures=failures,
|
||||
consecutive_failures=self._consecutive,
|
||||
)
|
||||
|
||||
async def _probe_deribit(self) -> None:
|
||||
info = await self._ctx.deribit.environment_info()
|
||||
if info.environment != self._expected:
|
||||
raise RuntimeError(
|
||||
f"deribit environment mismatch: expected {self._expected}, "
|
||||
f"got {info.environment}"
|
||||
)
|
||||
|
||||
async def _probe_sentiment(self) -> None:
|
||||
# Avoid funding_cross which would raise on empty snapshot during
|
||||
# the health probe; we only need a successful HTTP round-trip.
|
||||
await self._ctx.sentiment._http.call(
|
||||
"get_cross_exchange_funding", {"assets": ["ETH"]}
|
||||
)
|
||||
Reference in New Issue
Block a user