Phase 2: persistence + safety controls

Aggiunge la persistenza SQLite, l'audit log a hash chain, il kill
switch coordinato e i CLI di gestione documentati in
docs/05-data-model.md e docs/07-risk-controls.md. 197 test pass,
1 skipped (sqlite3 CLI mancante), copertura totale 97%.

State (`state/`):
- 0001_init.sql con positions, instructions, decisions, dvol_history,
  manual_actions, system_state.
- db.py: connect con WAL + foreign_keys + transaction ctx, runner
  forward-only basato su PRAGMA user_version.
- models.py: record Pydantic, Decimal preservato come TEXT.
- repository.py: CRUD typed con singola connessione passata, cache
  aware, posizioni concorrenti.

Safety (`safety/`):
- audit_log.py: AuditLog append-only con SHA-256 chain e fsync,
  verify_chain riconosce ogni manomissione (payload, prev_hash,
  hash, JSON, separatori).
- kill_switch.py: arm/disarm transazionali, idempotenti, accoppiati
  all'audit chain.

Config (`config/loader.py` + `strategy.yaml`):
- Loader YAML con deep-merge di strategy.local.yaml.
- Verifica config_hash SHA-256 (riga config_hash esclusa).
- File golden strategy.yaml + esempio override.

Scripts:
- dead_man.sh: watchdog shell indipendente da Python.
- backup.py: VACUUM INTO orario con retention 30 giorni.

CLI:
- audit verify (exit 2 su tampering).
- kill-switch arm/disarm/status su SQLite reale.
- state inspect con tabella posizioni aperte.
- config hash, config validate.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-27 13:35:35 +02:00
parent fbb7753cc6
commit 263470786d
25 changed files with 3669 additions and 14 deletions
+247 -10
View File
@@ -10,19 +10,32 @@ without changing the surface.
from __future__ import annotations
import sys
from datetime import UTC, datetime
from pathlib import Path
import click
from rich.console import Console
from rich.table import Table
from cerbero_bite import __version__
from cerbero_bite.config.loader import compute_config_hash, load_strategy
from cerbero_bite.logging import configure as configure_logging
from cerbero_bite.logging import get_logger
from cerbero_bite.safety.audit_log import AuditChainError, AuditLog
from cerbero_bite.safety.audit_log import verify_chain as verify_audit_chain
from cerbero_bite.safety.kill_switch import KillSwitch
from cerbero_bite.state import Repository, run_migrations, transaction
from cerbero_bite.state import connect as connect_state
console = Console()
log = get_logger("cli")
_DEFAULT_DB_PATH = Path("data/state.sqlite")
_DEFAULT_AUDIT_PATH = Path("data/audit.log")
_DEFAULT_STRATEGY_PATH = Path("strategy.yaml")
def _phase0_notice(action: str) -> None:
console.print(f"[yellow]\\[phase 0 placeholder][/yellow] {action}")
@@ -85,18 +98,131 @@ def kill_switch() -> None:
"""Manage the engine kill switch."""
def _make_kill_switch(
db_path: Path, audit_path: Path, *, config_version: str
) -> KillSwitch:
"""Wire a :class:`KillSwitch` against the on-disk paths.
``init_system_state`` is called eagerly so the CLI can be used on
a fresh checkout before the engine ever ran.
"""
db_path.parent.mkdir(parents=True, exist_ok=True)
audit_path.parent.mkdir(parents=True, exist_ok=True)
conn = connect_state(db_path)
try:
run_migrations(conn)
repo = Repository()
with transaction(conn):
repo.init_system_state(
conn, config_version=config_version, now=datetime.now(UTC)
)
finally:
conn.close()
return KillSwitch(
connection_factory=lambda: connect_state(db_path),
repository=Repository(),
audit_log=AuditLog(audit_path),
)
@kill_switch.command(name="arm")
@click.option("--reason", required=True, help="Why you are arming the kill switch.")
def kill_switch_arm(reason: str) -> None:
@click.option(
"--source",
default="manual",
show_default=True,
help="Trigger label (manual, mcp_timeout, hash_chain, ...).",
)
@click.option(
"--db",
type=click.Path(dir_okay=False, path_type=Path),
default=_DEFAULT_DB_PATH,
show_default=True,
)
@click.option(
"--audit",
type=click.Path(dir_okay=False, path_type=Path),
default=_DEFAULT_AUDIT_PATH,
show_default=True,
)
@click.option(
"--config-version",
default="unknown",
show_default=True,
help="Recorded next to the kill event when the singleton is initialised.",
)
def kill_switch_arm(
reason: str, source: str, db: Path, audit: Path, config_version: str
) -> None:
"""Arm the kill switch (engine refuses new entries)."""
_phase0_notice(f"kill-switch arm placeholder (reason: {reason!r}).")
ks = _make_kill_switch(db, audit, config_version=config_version)
ks.arm(reason=reason, source=source)
console.print(f"[red]kill switch ARMED[/red] reason={reason!r} source={source}")
@kill_switch.command(name="disarm")
@click.option("--reason", required=True, help="Why you are disarming.")
def kill_switch_disarm(reason: str) -> None:
@click.option(
"--source",
default="manual",
show_default=True,
)
@click.option(
"--db",
type=click.Path(dir_okay=False, path_type=Path),
default=_DEFAULT_DB_PATH,
show_default=True,
)
@click.option(
"--audit",
type=click.Path(dir_okay=False, path_type=Path),
default=_DEFAULT_AUDIT_PATH,
show_default=True,
)
@click.option(
"--config-version",
default="unknown",
show_default=True,
)
def kill_switch_disarm(
reason: str, source: str, db: Path, audit: Path, config_version: str
) -> None:
"""Disarm the kill switch."""
_phase0_notice(f"kill-switch disarm placeholder (reason: {reason!r}).")
ks = _make_kill_switch(db, audit, config_version=config_version)
ks.disarm(reason=reason, source=source)
console.print(f"[green]kill switch DISARMED[/green] reason={reason!r}")
@kill_switch.command(name="status")
@click.option(
"--db",
type=click.Path(dir_okay=False, path_type=Path),
default=_DEFAULT_DB_PATH,
show_default=True,
)
def kill_switch_status(db: Path) -> None:
"""Print the current kill switch state."""
if not db.exists():
console.print("[yellow]state.sqlite not found — engine never ran[/yellow]")
return
conn = connect_state(db)
try:
run_migrations(conn)
state = Repository().get_system_state(conn)
finally:
conn.close()
if state is None:
console.print("[yellow]system_state singleton missing[/yellow]")
return
armed = state.kill_switch == 1
flag = "[red]ARMED[/red]" if armed else "[green]disarmed[/green]"
console.print(
f"kill_switch: {flag}\n"
f"reason: {state.kill_reason or '-'}\n"
f"kill_at: {state.kill_at.isoformat() if state.kill_at else '-'}\n"
f"last_health_check: {state.last_health_check.isoformat()}"
)
@main.command()
@@ -123,9 +249,42 @@ def config() -> None:
@config.command(name="hash")
def config_hash() -> None:
"""Compute and print SHA-256 of strategy.yaml."""
_phase0_notice("config hash placeholder; will read strategy.yaml and compute SHA-256.")
@click.option(
"--file",
"yaml_path",
type=click.Path(exists=True, dir_okay=False, path_type=Path),
default=_DEFAULT_STRATEGY_PATH,
show_default=True,
)
def config_hash(yaml_path: Path) -> None:
"""Compute and print the SHA-256 of *yaml_path* (config_hash field excluded)."""
text = yaml_path.read_text(encoding="utf-8")
digest = compute_config_hash(text)
console.print(digest)
@config.command(name="validate")
@click.option(
"--file",
"yaml_path",
type=click.Path(exists=True, dir_okay=False, path_type=Path),
default=_DEFAULT_STRATEGY_PATH,
show_default=True,
)
@click.option(
"--enforce-hash/--no-enforce-hash",
default=True,
show_default=True,
help="When enabled, the recorded config_hash must match the file body.",
)
def config_validate(yaml_path: Path, enforce_hash: bool) -> None:
"""Load and validate ``strategy.yaml`` (and any local override)."""
loaded = load_strategy(yaml_path, enforce_hash=enforce_hash)
console.print(
f"[green]ok[/green] version={loaded.config.config_version} "
f"hash={loaded.computed_hash[:16]}"
f"sources={', '.join(p.name for p in loaded.sources)}"
)
@main.group()
@@ -134,9 +293,87 @@ def audit() -> None:
@audit.command(name="verify")
def audit_verify() -> None:
"""Verify audit chain integrity."""
_phase0_notice("audit verify placeholder; will walk audit.log hash chain.")
@click.option(
"--file",
"audit_path",
type=click.Path(dir_okay=False, path_type=Path),
default=_DEFAULT_AUDIT_PATH,
show_default=True,
)
def audit_verify(audit_path: Path) -> None:
"""Walk the hash chain in *audit_path* and report tampering."""
try:
count = verify_audit_chain(audit_path)
except AuditChainError as exc:
console.print(f"[red]TAMPERED[/red]: {exc}")
sys.exit(2)
if count == 0:
console.print("[yellow]audit log empty[/yellow]")
else:
console.print(f"[green]ok[/green] {count} entries verified")
@main.group()
def state() -> None:
"""State inspection utilities."""
@state.command(name="inspect")
@click.option(
"--db",
type=click.Path(dir_okay=False, path_type=Path),
default=_DEFAULT_DB_PATH,
show_default=True,
)
def state_inspect(db: Path) -> None:
"""Print a short snapshot of the SQLite state file."""
if not db.exists():
console.print("[yellow]state.sqlite not found[/yellow]")
return
conn = connect_state(db)
try:
run_migrations(conn)
repo = Repository()
sys_state = repo.get_system_state(conn)
positions = repo.list_open_positions(conn)
concurrent = repo.count_concurrent_positions(conn)
finally:
conn.close()
if sys_state is None:
console.print("[yellow]system_state singleton missing[/yellow]")
return
armed = "[red]ARMED[/red]" if sys_state.kill_switch == 1 else "[green]disarmed[/green]"
console.print(
f"engine state: kill_switch={armed}, "
f"open positions: {concurrent}, "
f"config_version: {sys_state.config_version}"
)
if not positions:
console.print("no open positions")
return
table = Table(title="open positions")
table.add_column("proposal_id")
table.add_column("status")
table.add_column("spread")
table.add_column("short")
table.add_column("long")
table.add_column("n")
table.add_column("expiry")
for pos in positions:
table.add_row(
str(pos.proposal_id)[:8],
pos.status,
pos.spread_type,
str(pos.short_strike),
str(pos.long_strike),
str(pos.n_contracts),
pos.expiry.isoformat(),
)
console.print(table)
def _entrypoint() -> None: