Hardening round 2: healthcheck, audit anchor, return_4h, exec config, signals

Sei interventi MEDIA priorità sul sistema. 323 test pass, mypy strict
pulito, ruff clean.

1. Docker HEALTHCHECK + cerbero-bite healthcheck:
   - nuovo subcommand che esce 0 se kill_switch=0 e last_health_check
     entro --max-staleness-s (default 600s);
   - HEALTHCHECK direttiva nel Dockerfile (60s interval, 5s timeout,
     start_period 120s, retries 3);
   - healthcheck definition nel docker-compose.yml.

2. Audit hash chain anti-truncation:
   - migration 0002: nuova colonna system_state.last_audit_hash;
   - AuditLog accetta callback on_append, dependencies.py la wire al
     repository.set_last_audit_hash;
   - Orchestrator.boot verifica che il tail file matcha l'anchor
     persistito; mismatch → kill switch CRITICAL.

3. return_4h bootstrap da deribit get_historical:
   - quando dvol_history è vuoto _fetch_return_4h cade su
     deribit.historical_close (1h candle 4h fa);
   - alert LOW se anche il fallback fallisce.

4. execution.environment + execution.eur_to_usd in strategy.yaml:
   - ExecutionConfig promosso a typed schema con i due campi
     consumati al boot;
   - CLI start preferisce i valori da config; CLI flag overridano
     solo quando differenti dai default.

5. Cycle correlation ID:
   - structlog.contextvars.bind_contextvars in run_entry/run_monitor/
     run_health propaga cycle_id e cycle nei log strutturati.

6. SIGTERM/SIGINT clean shutdown:
   - run_forever installa loop.add_signal_handler per SIGTERM e
     SIGINT; il segnale set()ta un asyncio.Event che termina il
     blocco principale, scheduler.shutdown e ctx.aclose finalizzano.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-28 00:37:39 +02:00
parent 411b747e93
commit b5b96f959c
15 changed files with 477 additions and 24 deletions
+6
View File
@@ -48,5 +48,11 @@ RUN mkdir -p /app/data/log /app/data/backups \
&& chown -R bite:bite /app && chown -R bite:bite /app
USER bite USER bite
# The healthcheck rides on the same Click entrypoint: it queries the
# SQLite singleton and exits 0/1 based on kill_switch + last_health_check.
HEALTHCHECK --interval=60s --timeout=5s --start-period=120s --retries=3 \
CMD ["cerbero-bite", "healthcheck", "--db", "/app/data/state.sqlite"]
ENTRYPOINT ["cerbero-bite"] ENTRYPOINT ["cerbero-bite"]
CMD ["status"] CMD ["status"]
+7
View File
@@ -48,6 +48,13 @@ services:
CERBERO_BITE_MCP_PORTFOLIO_URL: http://mcp-portfolio:9018 CERBERO_BITE_MCP_PORTFOLIO_URL: http://mcp-portfolio:9018
volumes: volumes:
- bite-data:/app/data - bite-data:/app/data
healthcheck:
test:
["CMD", "cerbero-bite", "healthcheck", "--db", "/app/data/state.sqlite"]
interval: 60s
timeout: 5s
retries: 3
start_period: 120s
# Default command runs the engine status check; override with the # Default command runs the engine status check; override with the
# CLI subcommand of choice (start, ping, dry-run, ...). # CLI subcommand of choice (start, ping, dry-run, ...).
command: ["status"] command: ["status"]
+80 -2
View File
@@ -123,6 +123,69 @@ def status(db: Path) -> None:
) )
@main.command()
@click.option(
"--db",
type=click.Path(dir_okay=False, path_type=Path),
default=_DEFAULT_DB_PATH,
show_default=True,
)
@click.option(
"--max-staleness-s",
type=int,
default=600,
show_default=True,
help=(
"Maximum age (seconds) of last_health_check before the engine is "
"considered unhealthy. Used by Docker HEALTHCHECK."
),
)
def healthcheck(db: Path, max_staleness_s: int) -> None:
"""Exit 0 if the engine is healthy, 1 otherwise.
The check is intentionally conservative:
* the SQLite file must exist and be readable,
* ``system_state.kill_switch`` must be 0,
* ``system_state.last_health_check`` must not be older than
``--max-staleness-s`` seconds.
Wired as the container HEALTHCHECK in ``Dockerfile``.
"""
if not db.exists():
console.print("[red]unhealthy[/red]: state.sqlite missing")
sys.exit(1)
try:
conn = connect_state(db)
try:
run_migrations(conn)
sys_state = Repository().get_system_state(conn)
finally:
conn.close()
except Exception as exc:
console.print(f"[red]unhealthy[/red]: {type(exc).__name__}: {exc}")
sys.exit(1)
if sys_state is None:
console.print("[red]unhealthy[/red]: system_state singleton missing")
sys.exit(1)
if sys_state.kill_switch == 1:
console.print(
f"[red]unhealthy[/red]: kill switch armed "
f"reason={sys_state.kill_reason!r}"
)
sys.exit(1)
age = (datetime.now(UTC) - sys_state.last_health_check).total_seconds()
if age > max_staleness_s:
console.print(
f"[red]unhealthy[/red]: last_health_check stale "
f"({age:.0f}s > {max_staleness_s}s)"
)
sys.exit(1)
console.print(f"[green]healthy[/green] last_check_age={age:.0f}s")
def _engine_options(func: Callable[..., Any]) -> Callable[..., Any]: def _engine_options(func: Callable[..., Any]) -> Callable[..., Any]:
"""Common options for the engine commands.""" """Common options for the engine commands."""
decorators = [ decorators = [
@@ -181,14 +244,29 @@ def _build_orchestrator(
) -> Orchestrator: ) -> Orchestrator:
loaded = load_strategy(strategy_path, enforce_hash=enforce_hash) loaded = load_strategy(strategy_path, enforce_hash=enforce_hash)
token = load_token(path=token_file) token = load_token(path=token_file)
# Strategy file values win over the CLI defaults; explicit overrides
# via env-style values (CLI flags) still apply when the user provides
# them — Click signals "default" via Click's resilient_parsing flag,
# but for now the CLI value is treated as authoritative when it
# differs from the documented default to keep the surface small.
cfg_env = loaded.config.execution.environment
cfg_fx = loaded.config.execution.eur_to_usd
chosen_env = (
environment if environment != "testnet" or cfg_env == "testnet" else cfg_env
)
chosen_fx = (
Decimal(str(eur_to_usd))
if eur_to_usd != 1.075
else cfg_fx
)
return make_orchestrator( return make_orchestrator(
cfg=loaded.config, cfg=loaded.config,
endpoints=load_endpoints(), endpoints=load_endpoints(),
token=token, token=token,
db_path=db, db_path=db,
audit_path=audit, audit_path=audit,
expected_environment=environment, # type: ignore[arg-type] expected_environment=chosen_env, # type: ignore[arg-type]
eur_to_usd=Decimal(str(eur_to_usd)), eur_to_usd=chosen_fx,
) )
+12 -1
View File
@@ -203,7 +203,18 @@ class _LooseSection(BaseModel):
model_config = ConfigDict(frozen=True, extra="allow") model_config = ConfigDict(frozen=True, extra="allow")
class ExecutionConfig(_LooseSection): ... class ExecutionConfig(BaseModel):
"""Runtime execution settings consumed by the orchestrator.
The remaining knobs (initial_limit, reprice_step_ticks, …) live as
extra fields validated lazily — they will graduate to typed fields
when the order-management layer needs them.
"""
model_config = ConfigDict(frozen=True, extra="allow")
environment: Literal["testnet", "mainnet"] = "testnet"
eur_to_usd: Decimal = Field(default=Decimal("1.075"))
class MonitoringConfig(_LooseSection): ... class MonitoringConfig(_LooseSection): ...
+17 -1
View File
@@ -103,7 +103,23 @@ def build_runtime(
finally: finally:
conn.close() conn.close()
audit_log = AuditLog(audit_path) def _persist_audit_anchor(line_hash: str) -> None:
"""Mirror the latest audit chain hash into ``system_state``.
Best-effort: if SQLite is locked by another writer the audit
log itself is still consistent, the anchor will catch up on
the next append.
"""
anchor_conn = connect(db_path)
try:
with transaction(anchor_conn):
repository.set_last_audit_hash(anchor_conn, hex_hash=line_hash)
except Exception: # pragma: no cover — durability is best-effort
pass
finally:
anchor_conn.close()
audit_log = AuditLog(audit_path, on_append=_persist_audit_anchor)
kill_switch = KillSwitch( kill_switch = KillSwitch(
connection_factory=lambda: connect(db_path), connection_factory=lambda: connect(db_path),
repository=repository, repository=repository,
+33 -10
View File
@@ -154,13 +154,19 @@ def _option_type_from_name(name: str) -> PutOrCall:
async def _fetch_return_4h(ctx: RuntimeContext, *, now: datetime) -> Decimal: async def _fetch_return_4h(ctx: RuntimeContext, *, now: datetime) -> Decimal:
"""Compute ETH 4h return from the locally stored dvol_history snapshots. """Compute ETH 4h return.
The orchestrator records a snapshot at the start of every monitor Resolution order:
cycle (see :func:`run_monitor_cycle`); this helper reads the most
recent snapshot at least 3.5h old and computes ``(now / past) - 1``. 1. local ``dvol_history`` snapshot at least 3h30 old (recorded by
Returns 0 if no historical sample is available — in that branch the previous monitor cycles);
orchestrator emits a LOW alert about insufficient history. 2. Deribit ``get_historical`` 1h candles 4h ago — bootstrap when
SQLite has no recent sample (first cycle after a fresh
container, or after long downtime).
Returns ``0`` only when both sources fail; in that case the
monitor cycle emits a LOW alert and exit_decision falls back to
HOLD on the adverse-move trigger.
""" """
cutoff = now - timedelta(hours=3, minutes=30) cutoff = now - timedelta(hours=3, minutes=30)
floor = now - timedelta(hours=8) floor = now - timedelta(hours=8)
@@ -174,14 +180,31 @@ async def _fetch_return_4h(ctx: RuntimeContext, *, now: datetime) -> Decimal:
).fetchone() ).fetchone()
finally: finally:
conn.close() conn.close()
if row is None: if row is not None:
return Decimal("0")
past_spot = Decimal(str(row[1])) past_spot = Decimal(str(row[1]))
if past_spot == 0: if past_spot != 0:
return Decimal("0")
spot_now = await ctx.deribit.index_price_eth() spot_now = await ctx.deribit.index_price_eth()
return spot_now / past_spot - Decimal("1") return spot_now / past_spot - Decimal("1")
# Fallback: ask Deribit for the 4h candle close.
try:
past_close = await ctx.deribit.historical_close(
instrument="ETH-PERPETUAL",
start=now - timedelta(hours=5),
end=now - timedelta(hours=3, minutes=30),
resolution="1h",
)
except Exception: # pragma: no cover — defensive, surface as LOW alert
past_close = None
if past_close is None or past_close == 0:
await ctx.alert_manager.low(
source="monitor_cycle",
message="no return_4h sample available (history empty + bootstrap failed)",
)
return Decimal("0")
spot_now = await ctx.deribit.index_price_eth()
return spot_now / past_close - Decimal("1")
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Cycle entry point # Cycle entry point
+80 -2
View File
@@ -17,7 +17,9 @@ from datetime import UTC, datetime
from decimal import Decimal from decimal import Decimal
from pathlib import Path from pathlib import Path
from typing import Literal from typing import Literal
from uuid import uuid4
import structlog
from apscheduler.schedulers.asyncio import AsyncIOScheduler from apscheduler.schedulers.asyncio import AsyncIOScheduler
from cerbero_bite.config.mcp_endpoints import McpEndpoints from cerbero_bite.config.mcp_endpoints import McpEndpoints
@@ -29,6 +31,7 @@ from cerbero_bite.runtime.lockfile import EngineLock
from cerbero_bite.runtime.monitor_cycle import MonitorCycleResult, run_monitor_cycle from cerbero_bite.runtime.monitor_cycle import MonitorCycleResult, run_monitor_cycle
from cerbero_bite.runtime.recovery import recover_state from cerbero_bite.runtime.recovery import recover_state
from cerbero_bite.runtime.scheduler import JobSpec, build_scheduler from cerbero_bite.runtime.scheduler import JobSpec, build_scheduler
from cerbero_bite.state import connect as connect_state
__all__ = ["Orchestrator"] __all__ = ["Orchestrator"]
@@ -82,6 +85,7 @@ class Orchestrator:
async def boot(self) -> _BootResult: async def boot(self) -> _BootResult:
"""Reconcile state, verify environment, run a first health probe.""" """Reconcile state, verify environment, run a first health probe."""
when = self._ctx.clock() when = self._ctx.clock()
await self._verify_audit_anchor(now=when)
await recover_state(self._ctx, now=when) await recover_state(self._ctx, now=when)
info = await self._ctx.deribit.environment_info() info = await self._ctx.deribit.environment_info()
@@ -111,22 +115,70 @@ class Orchestrator:
# Cycle invocations (used by scheduler jobs and CLI dry-run) # Cycle invocations (used by scheduler jobs and CLI dry-run)
# ------------------------------------------------------------------ # ------------------------------------------------------------------
async def _verify_audit_anchor(self, *, now: datetime) -> None: # noqa: ARG002
"""Compare the audit log tail with the SQLite anchor.
``now`` is accepted for symmetry with the other ``boot``
helpers but unused: the comparison is purely between the
in-memory tail hash and the value persisted on the previous
run.
"""
conn = connect_state(self._ctx.db_path)
try:
state = self._ctx.repository.get_system_state(conn)
finally:
conn.close()
if state is None or state.last_audit_hash is None:
return # first boot, nothing to compare against
actual_tail = self._ctx.audit_log.last_hash
if actual_tail != state.last_audit_hash:
await self._ctx.alert_manager.critical(
source="orchestrator.boot",
message=(
f"audit log anchor mismatch: anchor="
f"{state.last_audit_hash[:12]}…, file tail="
f"{actual_tail[:12]}… — possible tampering or truncation"
),
component="safety.audit_log",
)
async def run_entry( async def run_entry(
self, *, now: datetime | None = None self, *, now: datetime | None = None
) -> EntryCycleResult: ) -> EntryCycleResult:
cycle_id = str(uuid4())
token = structlog.contextvars.bind_contextvars(
cycle="entry", cycle_id=cycle_id
)
try:
return await run_entry_cycle( return await run_entry_cycle(
self._ctx, eur_to_usd_rate=self._eur_to_usd, now=now self._ctx, eur_to_usd_rate=self._eur_to_usd, now=now
) )
finally:
structlog.contextvars.reset_contextvars(**token)
async def run_monitor( async def run_monitor(
self, *, now: datetime | None = None self, *, now: datetime | None = None
) -> MonitorCycleResult: ) -> MonitorCycleResult:
cycle_id = str(uuid4())
token = structlog.contextvars.bind_contextvars(
cycle="monitor", cycle_id=cycle_id
)
try:
return await run_monitor_cycle(self._ctx, now=now) return await run_monitor_cycle(self._ctx, now=now)
finally:
structlog.contextvars.reset_contextvars(**token)
async def run_health( async def run_health(
self, *, now: datetime | None = None self, *, now: datetime | None = None
) -> HealthCheckResult: ) -> HealthCheckResult:
cycle_id = str(uuid4())
token = structlog.contextvars.bind_contextvars(
cycle="health", cycle_id=cycle_id
)
try:
return await self._health.run(now=now) return await self._health.run(now=now)
finally:
structlog.contextvars.reset_contextvars(**token)
# ------------------------------------------------------------------ # ------------------------------------------------------------------
# Scheduler lifecycle # Scheduler lifecycle
@@ -191,8 +243,14 @@ class Orchestrator:
"""Boot, acquire the single-instance lock, install the scheduler. """Boot, acquire the single-instance lock, install the scheduler.
``lock_path`` defaults to ``<db_path.parent>/.lockfile`` so two ``lock_path`` defaults to ``<db_path.parent>/.lockfile`` so two
containers cannot trade against the same SQLite file. containers cannot trade against the same SQLite file. SIGTERM
and SIGINT are intercepted so Docker (or the operator) can
signal a clean shutdown — the scheduler is stopped, in-flight
cycles complete, the audit log fsyncs, and the HTTP client is
closed before the process exits.
""" """
import signal # noqa: PLC0415 — only needed by run_forever
lock = EngineLock( lock = EngineLock(
lock_path or self._ctx.db_path.parent / ".lockfile" lock_path or self._ctx.db_path.parent / ".lockfile"
) )
@@ -201,8 +259,28 @@ class Orchestrator:
await self.boot() await self.boot()
scheduler = self.install_scheduler() scheduler = self.install_scheduler()
scheduler.start() scheduler.start()
stop_event = asyncio.Event()
def _on_signal(signame: str) -> None:
_log.info("received %s — initiating shutdown", signame)
stop_event.set()
loop = asyncio.get_running_loop()
for sig_name in ("SIGTERM", "SIGINT"):
sig = getattr(signal, sig_name, None)
if sig is None: # pragma: no cover — Windows fallback
continue
try: try:
await asyncio.Event().wait() loop.add_signal_handler(
sig, _on_signal, sig_name
)
except NotImplementedError: # pragma: no cover
# Some sandboxes (Windows asyncio) don't support
# add_signal_handler; fall back to no-op.
signal.signal(sig, lambda *_: stop_event.set())
try:
await stop_event.wait()
finally: finally:
scheduler.shutdown(wait=False) scheduler.shutdown(wait=False)
finally: finally:
+10 -2
View File
@@ -17,7 +17,7 @@ from __future__ import annotations
import hashlib import hashlib
import json import json
import os import os
from collections.abc import Iterator from collections.abc import Callable, Iterator
from dataclasses import dataclass from dataclasses import dataclass
from datetime import UTC, datetime from datetime import UTC, datetime
from pathlib import Path from pathlib import Path
@@ -176,10 +176,16 @@ class AuditLog:
fsync'd before returning. fsync'd before returning.
""" """
def __init__(self, path: str | Path) -> None: def __init__(
self,
path: str | Path,
*,
on_append: Callable[[str], None] | None = None,
) -> None:
self._path = Path(path) self._path = Path(path)
self._path.parent.mkdir(parents=True, exist_ok=True) self._path.parent.mkdir(parents=True, exist_ok=True)
self._last_hash: str = self._tail_hash() or GENESIS_HASH self._last_hash: str = self._tail_hash() or GENESIS_HASH
self._on_append = on_append
@property @property
def path(self) -> Path: # pragma: no cover — accessor used by callers only def path(self) -> Path: # pragma: no cover — accessor used by callers only
@@ -237,6 +243,8 @@ class AuditLog:
os.fsync(fh.fileno()) os.fsync(fh.fileno())
self._last_hash = line_hash self._last_hash = line_hash
if self._on_append is not None:
self._on_append(line_hash)
return AuditEntry( return AuditEntry(
timestamp=ts, timestamp=ts,
event=event, event=event,
@@ -0,0 +1,8 @@
-- 0002_audit_anchor.sql — store the latest audit chain hash inside
-- system_state so a truncation of the audit log file can be detected
-- at boot (the file would still verify on its own, but the recorded
-- anchor would not match the file's tail hash).
ALTER TABLE system_state ADD COLUMN last_audit_hash TEXT;
PRAGMA user_version = 2;
+1
View File
@@ -152,3 +152,4 @@ class SystemStateRecord(BaseModel):
last_kelly_calib: datetime | None = None last_kelly_calib: datetime | None = None
config_version: str config_version: str
started_at: datetime started_at: datetime
last_audit_hash: str | None = None
+13
View File
@@ -414,6 +414,7 @@ class Repository:
row = conn.execute("SELECT * FROM system_state WHERE id = 1").fetchone() row = conn.execute("SELECT * FROM system_state WHERE id = 1").fetchone()
if row is None: if row is None:
return None return None
keys = row.keys()
return SystemStateRecord( return SystemStateRecord(
id=int(row["id"]), id=int(row["id"]),
kill_switch=int(row["kill_switch"]), kill_switch=int(row["kill_switch"]),
@@ -423,6 +424,18 @@ class Repository:
last_kelly_calib=_dec_dt(row["last_kelly_calib"]), last_kelly_calib=_dec_dt(row["last_kelly_calib"]),
config_version=row["config_version"], config_version=row["config_version"],
started_at=_dec_dt_required(row["started_at"]), started_at=_dec_dt_required(row["started_at"]),
last_audit_hash=(
row["last_audit_hash"] if "last_audit_hash" in keys else None
),
)
def set_last_audit_hash(
self, conn: sqlite3.Connection, *, hex_hash: str
) -> None:
"""Store the most recent audit chain hash. Called by AuditLog after append."""
conn.execute(
"UPDATE system_state SET last_audit_hash = ? WHERE id = 1",
(hex_hash,),
) )
def set_kill_switch( def set_kill_switch(
+3 -1
View File
@@ -7,7 +7,7 @@
# the commit message. # the commit message.
config_version: "1.0.0" config_version: "1.0.0"
config_hash: "a857dc4b187cbdf5ac3f04c4aad48ab7587659bc9a3139db206566e10e2fa5e5" config_hash: "f4bfebbb048bed7efa5c0fb71dc188619264edbe8dd09bb195bba8350e609d9c"
last_review: "2026-04-26" last_review: "2026-04-26"
last_reviewer: "Adriano" last_reviewer: "Adriano"
@@ -96,6 +96,8 @@ exit:
- "CLOSE_DELTA" - "CLOSE_DELTA"
execution: execution:
environment: "testnet" # testnet|mainnet — kill switch on broker mismatch
eur_to_usd: "1.075" # default FX rate for sizing engine; override at boot
combo_only: true combo_only: true
initial_limit: "mid" initial_limit: "mid"
reprice_step_ticks: 1 reprice_step_ticks: 1
+125
View File
@@ -0,0 +1,125 @@
"""Tests for the audit chain anti-truncation anchor."""
from __future__ import annotations
from datetime import UTC, datetime
from pathlib import Path
import pytest
from pytest_httpx import HTTPXMock
from cerbero_bite.config import golden_config
from cerbero_bite.config.mcp_endpoints import load_endpoints
from cerbero_bite.runtime import build_runtime
from cerbero_bite.runtime.orchestrator import Orchestrator
from cerbero_bite.state import connect
pytestmark = pytest.mark.httpx_mock(assert_all_responses_were_requested=False)
def _now() -> datetime:
return datetime(2026, 4, 27, 14, 0, tzinfo=UTC)
def _build(tmp_path: Path) -> Orchestrator:
ctx = build_runtime(
cfg=golden_config(),
endpoints=load_endpoints(env={}),
token="t",
db_path=tmp_path / "state.sqlite",
audit_path=tmp_path / "audit.log",
retry_max=1,
clock=_now,
)
return Orchestrator(
ctx,
expected_environment="testnet",
eur_to_usd=__import__("decimal").Decimal("1.075"),
)
def _wire_boot_dependencies(httpx_mock: HTTPXMock) -> None:
httpx_mock.add_response(
url="http://mcp-deribit:9011/tools/environment_info",
json={
"exchange": "deribit",
"environment": "testnet",
"source": "env",
"env_value": "true",
"base_url": "https://test.deribit.com/api/v2",
"max_leverage": 3,
},
is_reusable=True,
)
httpx_mock.add_response(
url="http://mcp-deribit:9011/tools/get_positions",
json=[],
is_reusable=True,
)
httpx_mock.add_response(
url="http://mcp-macro:9013/tools/get_macro_calendar",
json={"events": []},
is_reusable=True,
)
httpx_mock.add_response(
url="http://mcp-sentiment:9014/tools/get_cross_exchange_funding",
json={"snapshot": {}},
is_reusable=True,
)
httpx_mock.add_response(
url="http://mcp-hyperliquid:9012/tools/get_funding_rate",
json={"asset": "ETH", "current_funding_rate": 0.0001},
is_reusable=True,
)
httpx_mock.add_response(
url="http://mcp-portfolio:9018/tools/get_total_portfolio_value",
json={"total_value_eur": 1000.0},
is_reusable=True,
)
@pytest.mark.asyncio
async def test_audit_anchor_persisted_after_append(tmp_path: Path) -> None:
orch = _build(tmp_path)
orch.context.audit_log.append(
event="TEST",
payload={"x": 1},
now=_now(),
)
conn = connect(tmp_path / "state.sqlite")
try:
state = orch.context.repository.get_system_state(conn)
finally:
conn.close()
assert state is not None
assert state.last_audit_hash == orch.context.audit_log.last_hash
@pytest.mark.asyncio
async def test_boot_detects_audit_truncation(
tmp_path: Path, httpx_mock: HTTPXMock
) -> None:
orch = _build(tmp_path)
# Append three lines so we have something to truncate.
for i in range(3):
orch.context.audit_log.append(
event=f"E{i}", payload={"i": i}, now=_now()
)
# Truncate the file: keep only the first line.
audit_path = tmp_path / "audit.log"
head = audit_path.read_text(encoding="utf-8").splitlines(keepends=True)[0]
audit_path.write_text(head, encoding="utf-8")
# Rebuild orchestrator (the AuditLog tail-reads the file again).
orch = _build(tmp_path)
_wire_boot_dependencies(httpx_mock)
httpx_mock.add_response(
url="http://mcp-telegram:9017/tools/notify_system_error",
json={"ok": True},
is_reusable=True,
)
await orch.boot()
assert orch.context.kill_switch.is_armed() is True
+11
View File
@@ -98,6 +98,7 @@ def _wire_market_data(
*, *,
spot: float = 3000.0, spot: float = 3000.0,
dvol: float = 50.0, dvol: float = 50.0,
historical_close: float | None = None,
) -> None: ) -> None:
httpx_mock.add_response( httpx_mock.add_response(
url="http://mcp-deribit:9011/tools/get_ticker", url="http://mcp-deribit:9011/tools/get_ticker",
@@ -109,6 +110,16 @@ def _wire_market_data(
json={"currency": "ETH", "latest": dvol, "candles": []}, json={"currency": "ETH", "latest": dvol, "candles": []},
is_reusable=True, is_reusable=True,
) )
# Bootstrap fallback for return_4h when dvol_history is empty.
httpx_mock.add_response(
url="http://mcp-deribit:9011/tools/get_historical",
json={
"candles": (
[{"close": historical_close}] if historical_close is not None else []
)
},
is_reusable=True,
)
def _wire_position_quotes( def _wire_position_quotes(
+66
View File
@@ -0,0 +1,66 @@
"""Tests for the ``cerbero-bite healthcheck`` subcommand."""
from __future__ import annotations
from datetime import UTC, datetime, timedelta
from pathlib import Path
from click.testing import CliRunner
from cerbero_bite.cli import main as cli_main
from cerbero_bite.state import Repository, connect, run_migrations, transaction
def _seed_state(db: Path, *, last_check: datetime, kill_switch: bool = False) -> None:
conn = connect(db)
try:
run_migrations(conn)
repo = Repository()
with transaction(conn):
repo.init_system_state(
conn, config_version="1.0.0", now=last_check
)
if kill_switch:
repo.set_kill_switch(
conn, armed=True, reason="test", now=last_check
)
else:
repo.touch_health_check(conn, now=last_check)
finally:
conn.close()
def test_healthcheck_exits_one_when_db_missing(tmp_path: Path) -> None:
result = CliRunner().invoke(
cli_main,
["healthcheck", "--db", str(tmp_path / "absent.sqlite")],
)
assert result.exit_code == 1
assert "unhealthy" in result.output
def test_healthcheck_exits_one_when_kill_switch_armed(tmp_path: Path) -> None:
db = tmp_path / "state.sqlite"
_seed_state(db, last_check=datetime.now(UTC), kill_switch=True)
result = CliRunner().invoke(cli_main, ["healthcheck", "--db", str(db)])
assert result.exit_code == 1
assert "kill switch" in result.output
def test_healthcheck_exits_one_when_last_check_stale(tmp_path: Path) -> None:
db = tmp_path / "state.sqlite"
_seed_state(db, last_check=datetime.now(UTC) - timedelta(hours=1))
result = CliRunner().invoke(
cli_main,
["healthcheck", "--db", str(db), "--max-staleness-s", "60"],
)
assert result.exit_code == 1
assert "stale" in result.output
def test_healthcheck_exits_zero_on_recent_check(tmp_path: Path) -> None:
db = tmp_path / "state.sqlite"
_seed_state(db, last_check=datetime.now(UTC))
result = CliRunner().invoke(cli_main, ["healthcheck", "--db", str(db)])
assert result.exit_code == 0
assert "healthy" in result.output