feat(gui+runtime): Phase D — kill-switch arm/disarm from the dashboard

Wires the GUI's first write path through the manual_actions queue:

* runtime/manual_actions_consumer.py — drains the queue and
  dispatches arm_kill / disarm_kill via KillSwitch (preserving the
  audit chain). Unsupported kinds (force_close, approve/reject_proposal)
  are marked result="not_supported" so they don't sit forever.
* runtime/orchestrator.py — adds a `manual_actions` job at */1 cron
  to the canonical scheduler manifest.
* gui/data_layer.py — write helpers enqueue_arm_kill /
  enqueue_disarm_kill (the only write path the GUI uses) plus
  load_pending_manual_actions for the pending strip.
* gui/pages/1_📊_Status.py — kill-switch arm/disarm panel with typed
  confirmation ("yes I am sure") + reason field; pending-actions table
  rendered when the queue is non-empty.

End-to-end smoke against the testnet state.sqlite:
  GUI enqueue → consumer dispatch → KillSwitch transition → audit
  chain hash linkage holds, "source":"manual_gui" recorded.

7 new unit tests for the consumer (arm, disarm, drain, unsupported,
default-reason, KillSwitchError handling, empty queue); 360/360 pass.
ruff clean; mypy strict src clean.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-30 12:33:58 +02:00
parent 6f6dd4c8dd
commit e8345a29c8
6 changed files with 470 additions and 4 deletions
@@ -0,0 +1,114 @@
"""Consumer of the ``manual_actions`` queue.
The GUI (and other out-of-band tooling) records operator intent in the
SQLite ``manual_actions`` table; this consumer pulls those rows and
dispatches them through the same primitives the engine uses internally
(``KillSwitch.arm`` / ``disarm``) so the audit chain remains the single
source of truth for state transitions.
Currently supported kinds:
* ``arm_kill`` — payload ``{"reason": str}``; arms the kill switch.
* ``disarm_kill`` — payload ``{"reason": str}``; disarms it.
Future kinds (``force_close``, ``approve_proposal``,
``reject_proposal``) are recognised by the ``ManualAction`` schema but
not yet wired up — the consumer marks them as
``result="not_supported"`` so they don't sit in the queue forever.
"""
from __future__ import annotations
import json
import logging
from datetime import UTC, datetime
from typing import TYPE_CHECKING
from cerbero_bite.safety.kill_switch import KillSwitchError
from cerbero_bite.state import connect, transaction
if TYPE_CHECKING:
from cerbero_bite.runtime.dependencies import RuntimeContext
__all__ = ["consume_manual_actions"]
_log = logging.getLogger("cerbero_bite.runtime.manual_actions")
_CONSUMER_ID = "engine"
def _parse_payload(raw: str | None) -> dict[str, object]:
if not raw:
return {}
try:
parsed = json.loads(raw)
except (TypeError, ValueError):
return {}
return parsed if isinstance(parsed, dict) else {}
async def consume_manual_actions(
ctx: RuntimeContext, *, now: datetime | None = None
) -> int:
"""Drain the queue. Return the number of actions processed.
The function is synchronous at heart (SQLite + KillSwitch), but kept
``async def`` so the orchestrator can register it as an APScheduler
coroutine without an extra wrapper. Each iteration fetches the next
unconsumed row and processes it; the loop terminates when the queue
is empty so a single tick can catch up after a long pause.
"""
reference = (now or datetime.now(UTC)).astimezone(UTC)
processed = 0
while True:
conn = connect(ctx.db_path)
try:
action = ctx.repository.next_unconsumed_action(conn)
finally:
conn.close()
if action is None:
break
if action.id is None:
_log.warning("manual_action without id, skipping")
break
payload = _parse_payload(action.payload_json)
result = "ok"
try:
if action.kind == "arm_kill":
reason = str(payload.get("reason", "manual via GUI"))
ctx.kill_switch.arm(reason=reason, source="manual_gui")
elif action.kind == "disarm_kill":
reason = str(payload.get("reason", "manual via GUI"))
ctx.kill_switch.disarm(reason=reason, source="manual_gui")
else:
result = "not_supported"
_log.warning(
"manual_action kind=%s not supported yet", action.kind
)
except KillSwitchError as exc:
_log.exception("kill switch transition failed")
result = f"error: {type(exc).__name__}: {exc}"
except Exception as exc: # pragma: no cover — defensive
_log.exception("manual_action dispatch failed")
result = f"error: {type(exc).__name__}: {exc}"
conn = connect(ctx.db_path)
try:
with transaction(conn):
ctx.repository.mark_action_consumed(
conn,
action.id,
consumed_by=_CONSUMER_ID,
result=result,
now=reference,
)
finally:
conn.close()
processed += 1
if processed:
_log.info("processed %d manual_actions", processed)
return processed
+14
View File
@@ -28,6 +28,7 @@ from cerbero_bite.runtime.dependencies import RuntimeContext, build_runtime
from cerbero_bite.runtime.entry_cycle import EntryCycleResult, run_entry_cycle
from cerbero_bite.runtime.health_check import HealthCheck, HealthCheckResult
from cerbero_bite.runtime.lockfile import EngineLock
from cerbero_bite.runtime.manual_actions_consumer import consume_manual_actions
from cerbero_bite.runtime.monitor_cycle import MonitorCycleResult, run_monitor_cycle
from cerbero_bite.runtime.recovery import recover_state
from cerbero_bite.runtime.scheduler import JobSpec, build_scheduler
@@ -45,6 +46,7 @@ _CRON_ENTRY = "0 14 * * MON"
_CRON_MONITOR = "0 2,14 * * *"
_CRON_HEALTH = "*/5 * * * *"
_CRON_BACKUP = "0 * * * *"
_CRON_MANUAL_ACTIONS = "*/1 * * * *"
_BACKUP_RETENTION_DAYS = 30
@@ -191,6 +193,7 @@ class Orchestrator:
monitor_cron: str = _CRON_MONITOR,
health_cron: str = _CRON_HEALTH,
backup_cron: str = _CRON_BACKUP,
manual_actions_cron: str = _CRON_MANUAL_ACTIONS,
backup_dir: Path | None = None,
backup_retention_days: int = _BACKUP_RETENTION_DAYS,
) -> AsyncIOScheduler:
@@ -229,12 +232,23 @@ class Orchestrator:
await _safe("backup", _do)
async def _manual_actions() -> None:
async def _do() -> None:
await consume_manual_actions(self._ctx)
await _safe("manual_actions", _do)
self._scheduler = build_scheduler(
[
JobSpec(name="entry", cron=entry_cron, coro_factory=_entry),
JobSpec(name="monitor", cron=monitor_cron, coro_factory=_monitor),
JobSpec(name="health", cron=health_cron, coro_factory=_health),
JobSpec(name="backup", cron=backup_cron, coro_factory=_backup),
JobSpec(
name="manual_actions",
cron=manual_actions_cron,
coro_factory=_manual_actions,
),
]
)
return self._scheduler