Phase 2: persistence + safety controls

Aggiunge la persistenza SQLite, l'audit log a hash chain, il kill
switch coordinato e i CLI di gestione documentati in
docs/05-data-model.md e docs/07-risk-controls.md. 197 test pass,
1 skipped (sqlite3 CLI mancante), copertura totale 97%.

State (`state/`):
- 0001_init.sql con positions, instructions, decisions, dvol_history,
  manual_actions, system_state.
- db.py: connect con WAL + foreign_keys + transaction ctx, runner
  forward-only basato su PRAGMA user_version.
- models.py: record Pydantic, Decimal preservato come TEXT.
- repository.py: CRUD typed con singola connessione passata, cache
  aware, posizioni concorrenti.

Safety (`safety/`):
- audit_log.py: AuditLog append-only con SHA-256 chain e fsync,
  verify_chain riconosce ogni manomissione (payload, prev_hash,
  hash, JSON, separatori).
- kill_switch.py: arm/disarm transazionali, idempotenti, accoppiati
  all'audit chain.

Config (`config/loader.py` + `strategy.yaml`):
- Loader YAML con deep-merge di strategy.local.yaml.
- Verifica config_hash SHA-256 (riga config_hash esclusa).
- File golden strategy.yaml + esempio override.

Scripts:
- dead_man.sh: watchdog shell indipendente da Python.
- backup.py: VACUUM INTO orario con retention 30 giorni.

CLI:
- audit verify (exit 2 su tampering).
- kill-switch arm/disarm/status su SQLite reale.
- state inspect con tabella posizioni aperte.
- config hash, config validate.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-27 13:35:35 +02:00
parent fbb7753cc6
commit 263470786d
25 changed files with 3669 additions and 14 deletions
+111
View File
@@ -0,0 +1,111 @@
"""Smoke tests for the dead_man.sh shell watchdog."""
from __future__ import annotations
import shutil
import sqlite3
import subprocess
from datetime import UTC, datetime, timedelta
from pathlib import Path
import pytest
REPO_ROOT = Path(__file__).resolve().parents[2]
SCRIPT = REPO_ROOT / "scripts" / "dead_man.sh"
_SQLITE3_BIN = shutil.which("sqlite3")
pytestmark = [
pytest.mark.integration,
]
def _require_sqlite3() -> None:
if _SQLITE3_BIN is None:
pytest.skip("sqlite3 CLI not installed on this host")
def _setup_project(tmp_path: Path) -> Path:
project = tmp_path / "project"
(project / "data" / "log").mkdir(parents=True)
(project / "scripts").mkdir(parents=True)
shutil.copy(SCRIPT, project / "scripts" / "dead_man.sh")
return project
def _write_health(project: Path, ts: datetime) -> None:
log_file = project / "data" / "log" / f"cerbero-bite-{ts:%Y-%m-%d}.jsonl"
log_file.write_text(
f'{{"ts": "{ts.astimezone(UTC).isoformat()}", "event": "HEALTH_OK"}}\n',
encoding="utf-8",
)
def _run(project: Path, threshold: int = 900) -> subprocess.CompletedProcess[str]:
return subprocess.run(
["bash", str(project / "scripts" / "dead_man.sh")],
env={
"PATH": "/usr/bin:/bin",
"PROJECT_ROOT": str(project),
"DEAD_MAN_THRESHOLD_SECONDS": str(threshold),
},
capture_output=True,
text=True,
check=False,
)
def test_dead_man_exits_zero_when_recent_health_ok(tmp_path: Path) -> None:
project = _setup_project(tmp_path)
_write_health(project, datetime.now(UTC) - timedelta(seconds=60))
result = _run(project, threshold=900)
assert result.returncode == 0, result.stderr
def test_dead_man_arms_kill_switch_when_silent(tmp_path: Path) -> None:
_require_sqlite3()
project = _setup_project(tmp_path)
_write_health(project, datetime.now(UTC) - timedelta(seconds=2000))
# Pre-create the SQLite system_state singleton; otherwise the script
# has nothing to update.
db = project / "data" / "state.sqlite"
conn = sqlite3.connect(str(db))
conn.execute(
"CREATE TABLE system_state (id INTEGER PRIMARY KEY CHECK(id=1), "
"kill_switch INTEGER NOT NULL DEFAULT 0, kill_reason TEXT, "
"kill_at TEXT, last_health_check TEXT NOT NULL, "
"last_kelly_calib TEXT, config_version TEXT NOT NULL, "
"started_at TEXT NOT NULL)"
)
conn.execute(
"INSERT INTO system_state(id, last_health_check, config_version, started_at) "
"VALUES (1, '2026-04-27', '1.0.0', '2026-04-27')"
)
conn.commit()
conn.close()
result = _run(project, threshold=900)
assert result.returncode == 1
# Kill switch armed.
conn = sqlite3.connect(str(db))
try:
kill = conn.execute("SELECT kill_switch FROM system_state").fetchone()[0]
finally:
conn.close()
assert kill == 1
# Alert file exists.
alert = project / "data" / "log" / "dead-man-alert.txt"
assert alert.exists()
assert "dead_man" in alert.read_text(encoding="utf-8")
def test_dead_man_handles_missing_log_file(tmp_path: Path) -> None:
project = _setup_project(tmp_path)
# No log file at all.
result = _run(project, threshold=900)
assert result.returncode == 1
alert = project / "data" / "log" / "dead-man-alert.txt"
assert alert.exists()