Phase 2: persistence + safety controls
Aggiunge la persistenza SQLite, l'audit log a hash chain, il kill switch coordinato e i CLI di gestione documentati in docs/05-data-model.md e docs/07-risk-controls.md. 197 test pass, 1 skipped (sqlite3 CLI mancante), copertura totale 97%. State (`state/`): - 0001_init.sql con positions, instructions, decisions, dvol_history, manual_actions, system_state. - db.py: connect con WAL + foreign_keys + transaction ctx, runner forward-only basato su PRAGMA user_version. - models.py: record Pydantic, Decimal preservato come TEXT. - repository.py: CRUD typed con singola connessione passata, cache aware, posizioni concorrenti. Safety (`safety/`): - audit_log.py: AuditLog append-only con SHA-256 chain e fsync, verify_chain riconosce ogni manomissione (payload, prev_hash, hash, JSON, separatori). - kill_switch.py: arm/disarm transazionali, idempotenti, accoppiati all'audit chain. Config (`config/loader.py` + `strategy.yaml`): - Loader YAML con deep-merge di strategy.local.yaml. - Verifica config_hash SHA-256 (riga config_hash esclusa). - File golden strategy.yaml + esempio override. Scripts: - dead_man.sh: watchdog shell indipendente da Python. - backup.py: VACUUM INTO orario con retention 30 giorni. CLI: - audit verify (exit 2 su tampering). - kill-switch arm/disarm/status su SQLite reale. - state inspect con tabella posizioni aperte. - config hash, config validate. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,111 @@
|
||||
"""Smoke tests for the dead_man.sh shell watchdog."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import shutil
|
||||
import sqlite3
|
||||
import subprocess
|
||||
from datetime import UTC, datetime, timedelta
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[2]
|
||||
SCRIPT = REPO_ROOT / "scripts" / "dead_man.sh"
|
||||
|
||||
_SQLITE3_BIN = shutil.which("sqlite3")
|
||||
|
||||
pytestmark = [
|
||||
pytest.mark.integration,
|
||||
]
|
||||
|
||||
|
||||
def _require_sqlite3() -> None:
|
||||
if _SQLITE3_BIN is None:
|
||||
pytest.skip("sqlite3 CLI not installed on this host")
|
||||
|
||||
|
||||
def _setup_project(tmp_path: Path) -> Path:
|
||||
project = tmp_path / "project"
|
||||
(project / "data" / "log").mkdir(parents=True)
|
||||
(project / "scripts").mkdir(parents=True)
|
||||
shutil.copy(SCRIPT, project / "scripts" / "dead_man.sh")
|
||||
return project
|
||||
|
||||
|
||||
def _write_health(project: Path, ts: datetime) -> None:
|
||||
log_file = project / "data" / "log" / f"cerbero-bite-{ts:%Y-%m-%d}.jsonl"
|
||||
log_file.write_text(
|
||||
f'{{"ts": "{ts.astimezone(UTC).isoformat()}", "event": "HEALTH_OK"}}\n',
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
|
||||
def _run(project: Path, threshold: int = 900) -> subprocess.CompletedProcess[str]:
|
||||
return subprocess.run(
|
||||
["bash", str(project / "scripts" / "dead_man.sh")],
|
||||
env={
|
||||
"PATH": "/usr/bin:/bin",
|
||||
"PROJECT_ROOT": str(project),
|
||||
"DEAD_MAN_THRESHOLD_SECONDS": str(threshold),
|
||||
},
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=False,
|
||||
)
|
||||
|
||||
|
||||
def test_dead_man_exits_zero_when_recent_health_ok(tmp_path: Path) -> None:
|
||||
project = _setup_project(tmp_path)
|
||||
_write_health(project, datetime.now(UTC) - timedelta(seconds=60))
|
||||
result = _run(project, threshold=900)
|
||||
assert result.returncode == 0, result.stderr
|
||||
|
||||
|
||||
def test_dead_man_arms_kill_switch_when_silent(tmp_path: Path) -> None:
|
||||
_require_sqlite3()
|
||||
project = _setup_project(tmp_path)
|
||||
_write_health(project, datetime.now(UTC) - timedelta(seconds=2000))
|
||||
|
||||
# Pre-create the SQLite system_state singleton; otherwise the script
|
||||
# has nothing to update.
|
||||
db = project / "data" / "state.sqlite"
|
||||
conn = sqlite3.connect(str(db))
|
||||
conn.execute(
|
||||
"CREATE TABLE system_state (id INTEGER PRIMARY KEY CHECK(id=1), "
|
||||
"kill_switch INTEGER NOT NULL DEFAULT 0, kill_reason TEXT, "
|
||||
"kill_at TEXT, last_health_check TEXT NOT NULL, "
|
||||
"last_kelly_calib TEXT, config_version TEXT NOT NULL, "
|
||||
"started_at TEXT NOT NULL)"
|
||||
)
|
||||
conn.execute(
|
||||
"INSERT INTO system_state(id, last_health_check, config_version, started_at) "
|
||||
"VALUES (1, '2026-04-27', '1.0.0', '2026-04-27')"
|
||||
)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
result = _run(project, threshold=900)
|
||||
assert result.returncode == 1
|
||||
|
||||
# Kill switch armed.
|
||||
conn = sqlite3.connect(str(db))
|
||||
try:
|
||||
kill = conn.execute("SELECT kill_switch FROM system_state").fetchone()[0]
|
||||
finally:
|
||||
conn.close()
|
||||
assert kill == 1
|
||||
|
||||
# Alert file exists.
|
||||
alert = project / "data" / "log" / "dead-man-alert.txt"
|
||||
assert alert.exists()
|
||||
assert "dead_man" in alert.read_text(encoding="utf-8")
|
||||
|
||||
|
||||
def test_dead_man_handles_missing_log_file(tmp_path: Path) -> None:
|
||||
project = _setup_project(tmp_path)
|
||||
# No log file at all.
|
||||
result = _run(project, threshold=900)
|
||||
assert result.returncode == 1
|
||||
alert = project / "data" / "log" / "dead-man-alert.txt"
|
||||
assert alert.exists()
|
||||
Reference in New Issue
Block a user