feat(V2): /health/ready con ping client + middleware request log strutturato + request_id correlation

- /health/ready: ping di tutti i client (exchange, env) cached con
  timeout 2s, status ready|degraded|not_ready, opt-in 503 via
  READY_FAILS_ON_DEGRADED.
- Middleware mcp.request: 1 riga JSON per HTTP request con request_id,
  method, path, status_code, duration_ms, actor, bot_tag, exchange,
  tool, client_ip, user_agent.
- request_id propagato in request.state, audit log e error envelope per
  correlazione cross-cutting.
- Aggiunto async health() come probe minimo a bybit/alpaca/macro/
  sentiment/deribit (hyperliquid lo aveva già).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
AdrianoDev
2026-05-01 09:03:28 +02:00
parent 9afd087152
commit 8ecc1a24a9
13 changed files with 509 additions and 2 deletions
+5
View File
@@ -74,6 +74,7 @@ def audit_write_op(
payload: dict[str, Any] | None = None,
result: dict[str, Any] | None = None,
error: str | None = None,
request_id: str | None = None,
) -> None:
"""Emit a structured audit log record per write operation.
@@ -86,6 +87,8 @@ def audit_write_op(
payload: input non-sensibile (qty, side, leverage, ecc.).
result: output del client (order_id, status, ecc.).
error: stringa errore se l'operazione ha fallito.
request_id: id propagato dal middleware request log per correlazione
tra audit log e request log.
"""
_configure_audit_sink()
record: dict[str, Any] = {
@@ -97,6 +100,8 @@ def audit_write_op(
"target": target,
"payload": payload or {},
}
if request_id is not None:
record["request_id"] = request_id
if result is not None:
record["result"] = _summarize_result(result)
if error is not None:
+3
View File
@@ -58,6 +58,7 @@ async def audit_call(
"""Esegue tool_fn e logga audit (success o error). Riraisola eccezioni."""
actor = getattr(request.state, "environment", None)
bot_tag = getattr(request.state, "bot_tag", None)
request_id = getattr(request.state, "request_id", None)
target = _extract_target(params, target_field)
payload = _safe_dump(params)
@@ -72,6 +73,7 @@ async def audit_call(
target=target,
payload=payload,
error=f"{type(e).__name__}: {e}",
request_id=request_id,
)
raise
@@ -93,5 +95,6 @@ async def audit_call(
target=target,
payload=payload,
result=audit_result,
request_id=request_id,
)
return result
+104
View File
@@ -0,0 +1,104 @@
"""Middleware: structured JSON request log per ogni HTTP request.
Emette una riga JSON sul logger ``mcp.request`` con campi correlabili
all'audit log via ``request_id``. Espone anche ``request_id`` su
``request.state`` così che handler/exception handler downstream possano
includerlo nei propri payload.
"""
from __future__ import annotations
import logging
import time
import uuid
from collections.abc import Awaitable, Callable
from datetime import UTC, datetime
from typing import Any
from fastapi import FastAPI, Request
from starlette.responses import Response
from cerbero_mcp.common.logging import get_json_logger
_logger = get_json_logger("mcp.request", level=logging.INFO)
def _extract_exchange(path: str) -> str | None:
"""Estrae il nome dell'exchange dal path se è un ``/mcp-{exchange}/...``."""
if not path.startswith("/mcp-"):
return None
rest = path[len("/mcp-"):]
end = rest.find("/")
if end < 0:
return rest or None
return rest[:end] or None
def _extract_tool(path: str) -> str | None:
"""Estrae nome tool dal path ``/mcp-X/tools/Y``."""
parts = path.split("/")
# ["", "mcp-deribit", "tools", "place_order"]
if len(parts) >= 4 and parts[2] == "tools":
return parts[3] or None
return None
def install_request_log_middleware(app: FastAPI) -> None:
"""Aggiunge un middleware HTTP che logga JSON per ogni request."""
@app.middleware("http")
async def request_log(
request: Request,
call_next: Callable[[Request], Awaitable[Response]],
) -> Response:
request_id = uuid.uuid4().hex
# Espone request_id per uso downstream (audit, error envelope)
request.state.request_id = request_id
t0 = time.perf_counter()
status_code = 500
error: str | None = None
response: Response | None = None
try:
response = await call_next(request)
status_code = response.status_code
except Exception as e:
error = f"{type(e).__name__}: {str(e)[:200]}"
raise
finally:
dur_ms = (time.perf_counter() - t0) * 1000
path = request.url.path
payload: dict[str, Any] = {
"event": "request",
"request_id": request_id,
"method": request.method,
"path": path,
"status_code": status_code,
"duration_ms": round(dur_ms, 2),
"timestamp": datetime.now(UTC).isoformat(),
}
ua = request.headers.get("user-agent")
if ua:
payload["user_agent"] = ua[:200]
client = request.client
if client is not None:
payload["client_ip"] = client.host
actor = getattr(request.state, "environment", None)
if actor:
payload["actor"] = actor
bot_tag = getattr(request.state, "bot_tag", None)
if bot_tag:
payload["bot_tag"] = bot_tag
exchange = _extract_exchange(path)
if exchange:
payload["exchange"] = exchange
tool = _extract_tool(path)
if tool:
payload["tool"] = tool
if error:
payload["error"] = error
_logger.error("request", extra=payload)
else:
_logger.info("request", extra=payload)
# response è settato se non c'è stata eccezione (altrimenti
# l'eccezione è stata già rilanciata dal blocco except).
assert response is not None
return response