Phase 4 hardening: status CLI, lock file, backup job, hash enforce, pooling, real bias
Sei interventi mirati sui rischi operativi rilevati nell'audit post-Fase 4. 317 test pass, mypy strict pulito, ruff clean. 1. status CLI: legge SQLite reale e mostra kill_switch, posizioni aperte, environment, config_version, last_health_check, started_at. Sostituisce il placeholder "phase 0 skeleton". 2. Lock file single-instance: runtime/lockfile.py acquisisce data/.lockfile via fcntl.flock al boot di run_forever; un secondo container fallisce subito con LockError. 3. Backup orario nello scheduler: nuovo job APScheduler 0 * * * * chiama scripts.backup.backup_database + prune_backups. 4. config_hash enforce su start: il CLI start verifica l'integrità del file (enforce_hash=True). Mismatch → exit 1 prima di toccare stato. dry-run resta enforce_hash=False per debug. 5. Connection pooling MCP: RuntimeContext espone un httpx.AsyncClient long-lived condiviso da tutti i wrapper (limits 20/10 connections/keepalive). aclose() chiamato in run_forever finale. 6. Bias direzionale reale: deribit.historical_close + deribit.adx_14 popolano TrendContext con spot a 30 giorni e ADX(14) effettivi. Sblocca bull_put e bear_call. Quando i dati storici mancano l'engine emette alert MEDIUM e cade su no_entry in modo deterministico. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -14,6 +14,8 @@ from dataclasses import dataclass
|
||||
from datetime import UTC, datetime
|
||||
from pathlib import Path
|
||||
|
||||
import httpx
|
||||
|
||||
from cerbero_bite.clients._base import HttpToolClient
|
||||
from cerbero_bite.clients.deribit import DeribitClient
|
||||
from cerbero_bite.clients.hyperliquid import HyperliquidClient
|
||||
@@ -56,8 +58,14 @@ class RuntimeContext:
|
||||
portfolio: PortfolioClient
|
||||
telegram: TelegramClient
|
||||
|
||||
http_client: httpx.AsyncClient
|
||||
|
||||
clock: Callable[[], datetime]
|
||||
|
||||
async def aclose(self) -> None:
|
||||
"""Close the shared HTTP client. Idempotent."""
|
||||
await self.http_client.aclose()
|
||||
|
||||
|
||||
def _utc_now() -> datetime:
|
||||
return datetime.now(UTC)
|
||||
@@ -103,6 +111,14 @@ def build_runtime(
|
||||
clock=clk,
|
||||
)
|
||||
|
||||
# Single long-lived AsyncClient shared by every wrapper. httpx pools
|
||||
# connections per host so the snapshot stage of the entry cycle
|
||||
# avoids paying TLS/TCP handshakes on each call.
|
||||
http_client = httpx.AsyncClient(
|
||||
timeout=httpx.Timeout(timeout_s),
|
||||
limits=httpx.Limits(max_connections=20, max_keepalive_connections=10),
|
||||
)
|
||||
|
||||
def _client(service: str) -> HttpToolClient:
|
||||
return HttpToolClient(
|
||||
service=service,
|
||||
@@ -110,6 +126,7 @@ def build_runtime(
|
||||
token=token,
|
||||
timeout_s=timeout_s,
|
||||
retry_max=retry_max,
|
||||
client=http_client,
|
||||
)
|
||||
|
||||
telegram = TelegramClient(_client("telegram"))
|
||||
@@ -131,5 +148,6 @@ def build_runtime(
|
||||
hyperliquid=HyperliquidClient(_client("hyperliquid")),
|
||||
portfolio=PortfolioClient(_client("portfolio")),
|
||||
telegram=telegram,
|
||||
http_client=http_client,
|
||||
clock=clk,
|
||||
)
|
||||
|
||||
@@ -84,6 +84,8 @@ class EntryCycleResult:
|
||||
@dataclass(frozen=True)
|
||||
class _MarketSnapshot:
|
||||
spot_eth_usd: Decimal
|
||||
spot_eth_30d_ago: Decimal | None
|
||||
adx_14: Decimal | None
|
||||
dvol: Decimal
|
||||
funding_perp: Decimal
|
||||
funding_cross: Decimal
|
||||
@@ -102,7 +104,28 @@ async def _gather_snapshot(
|
||||
cfg: StrategyConfig,
|
||||
now: datetime,
|
||||
) -> _MarketSnapshot:
|
||||
window_days = cfg.entry.trend_window_days
|
||||
historical_start = now - timedelta(days=window_days + 1)
|
||||
historical_end = now - timedelta(days=window_days - 1)
|
||||
adx_start = now - timedelta(days=10)
|
||||
|
||||
spot_t: asyncio.Task[Decimal] = asyncio.create_task(deribit.index_price_eth())
|
||||
spot_past_t: asyncio.Task[Decimal | None] = asyncio.create_task(
|
||||
deribit.historical_close(
|
||||
instrument="ETH-PERPETUAL",
|
||||
start=historical_start,
|
||||
end=historical_end,
|
||||
resolution="1D",
|
||||
)
|
||||
)
|
||||
adx_t: asyncio.Task[Decimal | None] = asyncio.create_task(
|
||||
deribit.adx_14(
|
||||
instrument="ETH-PERPETUAL",
|
||||
start=adx_start,
|
||||
end=now,
|
||||
resolution="1h",
|
||||
)
|
||||
)
|
||||
dvol_t: asyncio.Task[Decimal] = asyncio.create_task(
|
||||
deribit.latest_dvol(currency="ETH", now=now)
|
||||
)
|
||||
@@ -128,6 +151,8 @@ async def _gather_snapshot(
|
||||
|
||||
await asyncio.gather(
|
||||
spot_t,
|
||||
spot_past_t,
|
||||
adx_t,
|
||||
dvol_t,
|
||||
funding_perp_t,
|
||||
funding_cross_t,
|
||||
@@ -137,6 +162,8 @@ async def _gather_snapshot(
|
||||
)
|
||||
return _MarketSnapshot(
|
||||
spot_eth_usd=spot_t.result(),
|
||||
spot_eth_30d_ago=spot_past_t.result(),
|
||||
adx_14=adx_t.result(),
|
||||
dvol=dvol_t.result(),
|
||||
funding_perp=funding_perp_t.result(),
|
||||
funding_cross=funding_cross_t.result(),
|
||||
@@ -299,6 +326,10 @@ async def run_entry_cycle(
|
||||
inputs = {
|
||||
"snapshot": {
|
||||
"spot_eth_usd": str(snap.spot_eth_usd),
|
||||
"spot_eth_30d_ago": (
|
||||
str(snap.spot_eth_30d_ago) if snap.spot_eth_30d_ago else None
|
||||
),
|
||||
"adx_14": str(snap.adx_14) if snap.adx_14 is not None else None,
|
||||
"dvol": str(snap.dvol),
|
||||
"funding_perp": str(snap.funding_perp),
|
||||
"funding_cross": str(snap.funding_cross),
|
||||
@@ -326,17 +357,26 @@ async def run_entry_cycle(
|
||||
status=_STATUS_NO_ENTRY, reason=";".join(decision.reasons)
|
||||
)
|
||||
|
||||
# 3. Bias (need a 30-day prior spot — orchestrator passes it in)
|
||||
# We approximate by reusing the current spot until the historical
|
||||
# snapshot store ships in Phase 5; for now no historical → bias
|
||||
# cannot fire bull/bear, only iron_condor when DVOL/ADX align. The
|
||||
# caller is responsible for plugging in real data via overrides.
|
||||
# 3. Bias — eth_30d_ago and adx_14 come from the historical snapshot
|
||||
# collected during the parallel snapshot stage. When either signal
|
||||
# is missing the bias function falls back to "no entry" (defensive
|
||||
# behaviour: never trade without confirmed regime data).
|
||||
if snap.spot_eth_30d_ago is None:
|
||||
await alert.medium(
|
||||
source="entry_cycle",
|
||||
message="historical spot unavailable — bias falls back to neutral",
|
||||
)
|
||||
if snap.adx_14 is None:
|
||||
await alert.medium(
|
||||
source="entry_cycle",
|
||||
message="ADX unavailable — bias may reject iron_condor",
|
||||
)
|
||||
trend_ctx = TrendContext(
|
||||
eth_now=snap.spot_eth_usd,
|
||||
eth_30d_ago=snap.spot_eth_usd,
|
||||
eth_30d_ago=snap.spot_eth_30d_ago or snap.spot_eth_usd,
|
||||
funding_cross_annualized=snap.funding_cross,
|
||||
dvol_now=snap.dvol,
|
||||
adx_14=Decimal("25"), # placeholder until ADX lands in market data
|
||||
adx_14=snap.adx_14 if snap.adx_14 is not None else Decimal("25"),
|
||||
)
|
||||
bias = compute_bias(trend_ctx, cfg)
|
||||
if bias is None:
|
||||
|
||||
@@ -0,0 +1,91 @@
|
||||
"""Single-instance file lock for the engine (``docs/02-architecture.md``).
|
||||
|
||||
Acquires an exclusive ``fcntl.flock`` on a sentinel file at boot. A
|
||||
second container/process trying to start while another holds the lock
|
||||
gets :class:`LockError` immediately and exits before doing any I/O.
|
||||
|
||||
The lock is released either on context exit or when the process dies
|
||||
(the OS releases ``flock`` automatically), so a hard crash cannot
|
||||
permanently wedge the system.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import fcntl
|
||||
import logging
|
||||
import os
|
||||
from pathlib import Path
|
||||
from types import TracebackType
|
||||
|
||||
__all__ = ["EngineLock", "LockError"]
|
||||
|
||||
|
||||
_log = logging.getLogger("cerbero_bite.runtime.lockfile")
|
||||
|
||||
|
||||
class LockError(RuntimeError):
|
||||
"""Raised when another instance already holds the engine lock."""
|
||||
|
||||
|
||||
class EngineLock:
|
||||
"""Context manager around a per-instance file lock.
|
||||
|
||||
Usage::
|
||||
|
||||
with EngineLock(Path("data/.lockfile")) as lock:
|
||||
...
|
||||
|
||||
On enter, writes the current PID to the lock file so an operator
|
||||
can identify the running instance. On exit, releases the lock.
|
||||
"""
|
||||
|
||||
def __init__(self, path: Path | str) -> None:
|
||||
self._path = Path(path)
|
||||
self._fh: object | None = None # actual type: io.TextIOWrapper
|
||||
|
||||
@property
|
||||
def path(self) -> Path:
|
||||
return self._path
|
||||
|
||||
def acquire(self) -> None:
|
||||
self._path.parent.mkdir(parents=True, exist_ok=True)
|
||||
# ``r+`` would fail if the file does not exist yet; use ``a+``
|
||||
# then seek to overwrite the contents on each acquire.
|
||||
fh = self._path.open("a+", encoding="utf-8")
|
||||
try:
|
||||
fcntl.flock(fh.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
|
||||
except BlockingIOError as exc:
|
||||
fh.close()
|
||||
raise LockError(
|
||||
f"another Cerbero Bite instance holds {self._path}; "
|
||||
f"check the running container or remove the file if stale"
|
||||
) from exc
|
||||
|
||||
fh.seek(0)
|
||||
fh.truncate()
|
||||
fh.write(f"{os.getpid()}\n")
|
||||
fh.flush()
|
||||
self._fh = fh
|
||||
_log.info("engine lock acquired: pid=%d path=%s", os.getpid(), self._path)
|
||||
|
||||
def release(self) -> None:
|
||||
if self._fh is None:
|
||||
return
|
||||
try:
|
||||
fcntl.flock(self._fh.fileno(), fcntl.LOCK_UN) # type: ignore[attr-defined]
|
||||
finally:
|
||||
self._fh.close() # type: ignore[attr-defined]
|
||||
self._fh = None
|
||||
_log.info("engine lock released: %s", self._path)
|
||||
|
||||
def __enter__(self) -> EngineLock:
|
||||
self.acquire()
|
||||
return self
|
||||
|
||||
def __exit__(
|
||||
self,
|
||||
exc_type: type[BaseException] | None,
|
||||
exc: BaseException | None,
|
||||
tb: TracebackType | None,
|
||||
) -> None:
|
||||
self.release()
|
||||
@@ -25,6 +25,7 @@ from cerbero_bite.config.schema import StrategyConfig
|
||||
from cerbero_bite.runtime.dependencies import RuntimeContext, build_runtime
|
||||
from cerbero_bite.runtime.entry_cycle import EntryCycleResult, run_entry_cycle
|
||||
from cerbero_bite.runtime.health_check import HealthCheck, HealthCheckResult
|
||||
from cerbero_bite.runtime.lockfile import EngineLock
|
||||
from cerbero_bite.runtime.monitor_cycle import MonitorCycleResult, run_monitor_cycle
|
||||
from cerbero_bite.runtime.recovery import recover_state
|
||||
from cerbero_bite.runtime.scheduler import JobSpec, build_scheduler
|
||||
@@ -40,6 +41,8 @@ Environment = Literal["testnet", "mainnet"]
|
||||
_CRON_ENTRY = "0 14 * * MON"
|
||||
_CRON_MONITOR = "0 2,14 * * *"
|
||||
_CRON_HEALTH = "*/5 * * * *"
|
||||
_CRON_BACKUP = "0 * * * *"
|
||||
_BACKUP_RETENTION_DAYS = 30
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
@@ -135,6 +138,9 @@ class Orchestrator:
|
||||
entry_cron: str = _CRON_ENTRY,
|
||||
monitor_cron: str = _CRON_MONITOR,
|
||||
health_cron: str = _CRON_HEALTH,
|
||||
backup_cron: str = _CRON_BACKUP,
|
||||
backup_dir: Path | None = None,
|
||||
backup_retention_days: int = _BACKUP_RETENTION_DAYS,
|
||||
) -> AsyncIOScheduler:
|
||||
"""Build the scheduler with the canonical job set, ready to start."""
|
||||
|
||||
@@ -158,24 +164,77 @@ class Orchestrator:
|
||||
async def _health() -> None:
|
||||
await _safe("health", self.run_health)
|
||||
|
||||
backups_target = backup_dir or self._ctx.db_path.parent / "backups"
|
||||
|
||||
async def _backup() -> None:
|
||||
async def _do() -> None:
|
||||
await asyncio.to_thread(
|
||||
_run_backup,
|
||||
db_path=self._ctx.db_path,
|
||||
backup_dir=backups_target,
|
||||
retention_days=backup_retention_days,
|
||||
)
|
||||
|
||||
await _safe("backup", _do)
|
||||
|
||||
self._scheduler = build_scheduler(
|
||||
[
|
||||
JobSpec(name="entry", cron=entry_cron, coro_factory=_entry),
|
||||
JobSpec(name="monitor", cron=monitor_cron, coro_factory=_monitor),
|
||||
JobSpec(name="health", cron=health_cron, coro_factory=_health),
|
||||
JobSpec(name="backup", cron=backup_cron, coro_factory=_backup),
|
||||
]
|
||||
)
|
||||
return self._scheduler
|
||||
|
||||
async def run_forever(self) -> None:
|
||||
"""Boot, install the scheduler, and block forever (until cancelled)."""
|
||||
await self.boot()
|
||||
scheduler = self.install_scheduler()
|
||||
scheduler.start()
|
||||
try:
|
||||
await asyncio.Event().wait()
|
||||
finally:
|
||||
scheduler.shutdown(wait=False)
|
||||
async def run_forever(self, *, lock_path: Path | None = None) -> None:
|
||||
"""Boot, acquire the single-instance lock, install the scheduler.
|
||||
|
||||
``lock_path`` defaults to ``<db_path.parent>/.lockfile`` so two
|
||||
containers cannot trade against the same SQLite file.
|
||||
"""
|
||||
lock = EngineLock(
|
||||
lock_path or self._ctx.db_path.parent / ".lockfile"
|
||||
)
|
||||
with lock:
|
||||
try:
|
||||
await self.boot()
|
||||
scheduler = self.install_scheduler()
|
||||
scheduler.start()
|
||||
try:
|
||||
await asyncio.Event().wait()
|
||||
finally:
|
||||
scheduler.shutdown(wait=False)
|
||||
finally:
|
||||
await self._ctx.aclose()
|
||||
|
||||
|
||||
def _run_backup(
|
||||
*, db_path: Path, backup_dir: Path, retention_days: int
|
||||
) -> None:
|
||||
"""Synchronous helper invoked from the scheduler via ``asyncio.to_thread``.
|
||||
|
||||
Keeps the import of ``scripts.backup`` lazy: importing the module
|
||||
eagerly at orchestrator load time would mean the scheduler depends
|
||||
on a script that lives outside the ``cerbero_bite`` package, which
|
||||
breaks ``importlib.util.spec_from_file_location`` if the cwd shifts
|
||||
at runtime.
|
||||
"""
|
||||
import sys # noqa: PLC0415 — kept lazy to keep module load cheap
|
||||
from importlib.util import ( # noqa: PLC0415
|
||||
module_from_spec,
|
||||
spec_from_file_location,
|
||||
)
|
||||
|
||||
backup_py = Path(__file__).resolve().parents[3] / "scripts" / "backup.py"
|
||||
spec = spec_from_file_location("_cerbero_bite_backup", backup_py)
|
||||
if spec is None or spec.loader is None: # pragma: no cover — only on broken installs
|
||||
raise RuntimeError(f"cannot load scripts/backup.py from {backup_py}")
|
||||
module = module_from_spec(spec)
|
||||
sys.modules.setdefault(spec.name, module)
|
||||
spec.loader.exec_module(module)
|
||||
module.backup_database(db_path=db_path, backup_dir=backup_dir)
|
||||
module.prune_backups(backup_dir, retention_days=retention_days)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
Reference in New Issue
Block a user