feat(gui+runtime): Phase D — kill-switch arm/disarm from the dashboard
Wires the GUI's first write path through the manual_actions queue: * runtime/manual_actions_consumer.py — drains the queue and dispatches arm_kill / disarm_kill via KillSwitch (preserving the audit chain). Unsupported kinds (force_close, approve/reject_proposal) are marked result="not_supported" so they don't sit forever. * runtime/orchestrator.py — adds a `manual_actions` job at */1 cron to the canonical scheduler manifest. * gui/data_layer.py — write helpers enqueue_arm_kill / enqueue_disarm_kill (the only write path the GUI uses) plus load_pending_manual_actions for the pending strip. * gui/pages/1_📊_Status.py — kill-switch arm/disarm panel with typed confirmation ("yes I am sure") + reason field; pending-actions table rendered when the queue is non-empty. End-to-end smoke against the testnet state.sqlite: GUI enqueue → consumer dispatch → KillSwitch transition → audit chain hash linkage holds, "source":"manual_gui" recorded. 7 new unit tests for the consumer (arm, disarm, drain, unsupported, default-reason, KillSwitchError handling, empty queue); 360/360 pass. ruff clean; mypy strict src clean. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,114 @@
|
||||
"""Consumer of the ``manual_actions`` queue.
|
||||
|
||||
The GUI (and other out-of-band tooling) records operator intent in the
|
||||
SQLite ``manual_actions`` table; this consumer pulls those rows and
|
||||
dispatches them through the same primitives the engine uses internally
|
||||
(``KillSwitch.arm`` / ``disarm``) so the audit chain remains the single
|
||||
source of truth for state transitions.
|
||||
|
||||
Currently supported kinds:
|
||||
|
||||
* ``arm_kill`` — payload ``{"reason": str}``; arms the kill switch.
|
||||
* ``disarm_kill`` — payload ``{"reason": str}``; disarms it.
|
||||
|
||||
Future kinds (``force_close``, ``approve_proposal``,
|
||||
``reject_proposal``) are recognised by the ``ManualAction`` schema but
|
||||
not yet wired up — the consumer marks them as
|
||||
``result="not_supported"`` so they don't sit in the queue forever.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
from datetime import UTC, datetime
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from cerbero_bite.safety.kill_switch import KillSwitchError
|
||||
from cerbero_bite.state import connect, transaction
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from cerbero_bite.runtime.dependencies import RuntimeContext
|
||||
|
||||
__all__ = ["consume_manual_actions"]
|
||||
|
||||
|
||||
_log = logging.getLogger("cerbero_bite.runtime.manual_actions")
|
||||
_CONSUMER_ID = "engine"
|
||||
|
||||
|
||||
def _parse_payload(raw: str | None) -> dict[str, object]:
|
||||
if not raw:
|
||||
return {}
|
||||
try:
|
||||
parsed = json.loads(raw)
|
||||
except (TypeError, ValueError):
|
||||
return {}
|
||||
return parsed if isinstance(parsed, dict) else {}
|
||||
|
||||
|
||||
async def consume_manual_actions(
|
||||
ctx: RuntimeContext, *, now: datetime | None = None
|
||||
) -> int:
|
||||
"""Drain the queue. Return the number of actions processed.
|
||||
|
||||
The function is synchronous at heart (SQLite + KillSwitch), but kept
|
||||
``async def`` so the orchestrator can register it as an APScheduler
|
||||
coroutine without an extra wrapper. Each iteration fetches the next
|
||||
unconsumed row and processes it; the loop terminates when the queue
|
||||
is empty so a single tick can catch up after a long pause.
|
||||
"""
|
||||
reference = (now or datetime.now(UTC)).astimezone(UTC)
|
||||
processed = 0
|
||||
|
||||
while True:
|
||||
conn = connect(ctx.db_path)
|
||||
try:
|
||||
action = ctx.repository.next_unconsumed_action(conn)
|
||||
finally:
|
||||
conn.close()
|
||||
if action is None:
|
||||
break
|
||||
if action.id is None:
|
||||
_log.warning("manual_action without id, skipping")
|
||||
break
|
||||
|
||||
payload = _parse_payload(action.payload_json)
|
||||
result = "ok"
|
||||
|
||||
try:
|
||||
if action.kind == "arm_kill":
|
||||
reason = str(payload.get("reason", "manual via GUI"))
|
||||
ctx.kill_switch.arm(reason=reason, source="manual_gui")
|
||||
elif action.kind == "disarm_kill":
|
||||
reason = str(payload.get("reason", "manual via GUI"))
|
||||
ctx.kill_switch.disarm(reason=reason, source="manual_gui")
|
||||
else:
|
||||
result = "not_supported"
|
||||
_log.warning(
|
||||
"manual_action kind=%s not supported yet", action.kind
|
||||
)
|
||||
except KillSwitchError as exc:
|
||||
_log.exception("kill switch transition failed")
|
||||
result = f"error: {type(exc).__name__}: {exc}"
|
||||
except Exception as exc: # pragma: no cover — defensive
|
||||
_log.exception("manual_action dispatch failed")
|
||||
result = f"error: {type(exc).__name__}: {exc}"
|
||||
|
||||
conn = connect(ctx.db_path)
|
||||
try:
|
||||
with transaction(conn):
|
||||
ctx.repository.mark_action_consumed(
|
||||
conn,
|
||||
action.id,
|
||||
consumed_by=_CONSUMER_ID,
|
||||
result=result,
|
||||
now=reference,
|
||||
)
|
||||
finally:
|
||||
conn.close()
|
||||
processed += 1
|
||||
|
||||
if processed:
|
||||
_log.info("processed %d manual_actions", processed)
|
||||
return processed
|
||||
@@ -28,6 +28,7 @@ from cerbero_bite.runtime.dependencies import RuntimeContext, build_runtime
|
||||
from cerbero_bite.runtime.entry_cycle import EntryCycleResult, run_entry_cycle
|
||||
from cerbero_bite.runtime.health_check import HealthCheck, HealthCheckResult
|
||||
from cerbero_bite.runtime.lockfile import EngineLock
|
||||
from cerbero_bite.runtime.manual_actions_consumer import consume_manual_actions
|
||||
from cerbero_bite.runtime.monitor_cycle import MonitorCycleResult, run_monitor_cycle
|
||||
from cerbero_bite.runtime.recovery import recover_state
|
||||
from cerbero_bite.runtime.scheduler import JobSpec, build_scheduler
|
||||
@@ -45,6 +46,7 @@ _CRON_ENTRY = "0 14 * * MON"
|
||||
_CRON_MONITOR = "0 2,14 * * *"
|
||||
_CRON_HEALTH = "*/5 * * * *"
|
||||
_CRON_BACKUP = "0 * * * *"
|
||||
_CRON_MANUAL_ACTIONS = "*/1 * * * *"
|
||||
_BACKUP_RETENTION_DAYS = 30
|
||||
|
||||
|
||||
@@ -191,6 +193,7 @@ class Orchestrator:
|
||||
monitor_cron: str = _CRON_MONITOR,
|
||||
health_cron: str = _CRON_HEALTH,
|
||||
backup_cron: str = _CRON_BACKUP,
|
||||
manual_actions_cron: str = _CRON_MANUAL_ACTIONS,
|
||||
backup_dir: Path | None = None,
|
||||
backup_retention_days: int = _BACKUP_RETENTION_DAYS,
|
||||
) -> AsyncIOScheduler:
|
||||
@@ -229,12 +232,23 @@ class Orchestrator:
|
||||
|
||||
await _safe("backup", _do)
|
||||
|
||||
async def _manual_actions() -> None:
|
||||
async def _do() -> None:
|
||||
await consume_manual_actions(self._ctx)
|
||||
|
||||
await _safe("manual_actions", _do)
|
||||
|
||||
self._scheduler = build_scheduler(
|
||||
[
|
||||
JobSpec(name="entry", cron=entry_cron, coro_factory=_entry),
|
||||
JobSpec(name="monitor", cron=monitor_cron, coro_factory=_monitor),
|
||||
JobSpec(name="health", cron=health_cron, coro_factory=_health),
|
||||
JobSpec(name="backup", cron=backup_cron, coro_factory=_backup),
|
||||
JobSpec(
|
||||
name="manual_actions",
|
||||
cron=manual_actions_cron,
|
||||
coro_factory=_manual_actions,
|
||||
),
|
||||
]
|
||||
)
|
||||
return self._scheduler
|
||||
|
||||
Reference in New Issue
Block a user