feat(agents): hypothesis retry-with-error-feedback (max 1 retry)

HypothesisAgent.propose ora riprova una volta in caso di parse o
validation error: il prompt user del retry include l'output precedente
(troncato a 800 char) e il messaggio di errore, così l'LLM può
auto-correggersi. Configurabile via max_retries (default 1).

Cambia il modello dati di HypothesisProposal: completion (singolare)
diventa completions: list[CompletionResult] con n_attempts. L'orchestrator
itera su completions per registrare il costo di ogni chiamata LLM,
incluse le retry.

Phase 1 v4 mostrava 64% di parse failure recuperabili: il retry punta
a tagliare quel tasso senza inflazionare i token oltre 2x worst-case.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-10 21:20:47 +02:00
parent 44eb6436c1
commit d4fcb42fc5
3 changed files with 193 additions and 47 deletions
+87 -29
View File
@@ -1,7 +1,7 @@
from __future__ import annotations
import re
from dataclasses import dataclass
from dataclasses import dataclass, field
from ..genome.hypothesis import HypothesisAgentGenome
from ..llm.client import CompletionResult, LLMClient
@@ -23,10 +23,20 @@ class MarketSummary:
@dataclass(frozen=True)
class HypothesisProposal:
"""Risultato di una propose() del HypothesisAgent.
``completions`` contiene SEMPRE almeno un elemento: il primo tentativo.
Se il primo tentativo fallisce e c'e' budget di retry, vengono accodate
le completions successive, una per ogni retry effettuato.
``n_attempts == len(completions)``. ``raw_text`` riflette l'ULTIMO output
LLM osservato (quello che ha prodotto strategy o l'ultimo parse_error).
"""
strategy: Strategy | None
raw_text: str
completion: CompletionResult
completions: list[CompletionResult] = field(default_factory=list)
parse_error: str | None = None
n_attempts: int = 1
SYSTEM_TEMPLATE = """\
@@ -113,7 +123,7 @@ USER_TEMPLATE = """\
Mercato: {symbol} timeframe {timeframe}, {n_bars} barre osservate.
Statistiche return: mean={return_mean:.5f}, std={return_std:.5f}, \
skew={skew:.3f}, kurt={kurtosis:.3f}.
Regime volatilità : {volatility_regime}.
Regime volatilità: {volatility_regime}.
Feature accessibili dal tuo genoma: {feature_access}.
Lookback massimo che puoi usare nel ragionamento: {lookback_window} barre.
@@ -122,6 +132,21 @@ Genera una strategia che cerchi anomalie sfruttabili in questo regime.
"""
_RETRY_TEMPLATE = """\
{original_user}
--- TENTATIVO PRECEDENTE FALLITO ---
Output: {previous_raw}
Errore: {previous_error}
---
Correggi l'errore e rispondi di nuovo con un singolo oggetto JSON valido
dentro fence ```json...```, seguendo strettamente lo schema fornito nel
SYSTEM message.
"""
_RETRY_RAW_TRUNCATE = 800
_JSON_FENCE_RE = re.compile(
r"```(?:json)?\s*(\{[\s\S]*\})\s*```",
re.MULTILINE,
@@ -175,9 +200,25 @@ def _extract_json(text: str) -> str | None:
return _balance_braces(stripped)
def _try_parse(text: str) -> tuple[Strategy | None, str | None]:
"""Estrai+parsea+valida. Ritorna (strategy, error). Esattamente uno e' None."""
payload = _extract_json(text)
if payload is None:
return None, "no JSON object found in output"
try:
ast = parse_strategy(payload)
validate_strategy(ast)
except (ParseError, ValidationError) as e:
return None, str(e)
return ast, None
class HypothesisAgent:
def __init__(self, llm: LLMClient):
def __init__(self, llm: LLMClient, max_retries: int = 1):
if max_retries < 0:
raise ValueError("max_retries must be >= 0")
self._llm = llm
self._max_retries = max_retries
def propose(
self,
@@ -188,7 +229,7 @@ class HypothesisAgent:
cognitive_style=genome.cognitive_style,
system_prompt=genome.system_prompt,
)
user = USER_TEMPLATE.format(
original_user = USER_TEMPLATE.format(
symbol=market.symbol,
timeframe=market.timeframe,
n_bars=market.n_bars,
@@ -201,28 +242,45 @@ class HypothesisAgent:
lookback_window=genome.lookback_window,
)
completion = self._llm.complete(genome, system=system, user=user)
completions: list[CompletionResult] = []
errors: list[str] = []
last_raw = ""
max_attempts = 1 + self._max_retries
payload = _extract_json(completion.text)
if payload is None:
return HypothesisProposal(
strategy=None,
raw_text=completion.text,
completion=completion,
parse_error="no JSON object found in output",
)
try:
ast = parse_strategy(payload)
validate_strategy(ast)
return HypothesisProposal(
strategy=ast,
raw_text=completion.text,
completion=completion,
)
except (ParseError, ValidationError) as e:
return HypothesisProposal(
strategy=None,
raw_text=completion.text,
completion=completion,
parse_error=str(e),
)
for attempt in range(max_attempts):
if attempt == 0:
user = original_user
else:
truncated = last_raw[:_RETRY_RAW_TRUNCATE]
user = _RETRY_TEMPLATE.format(
original_user=original_user,
previous_raw=truncated,
previous_error=errors[-1],
)
completion = self._llm.complete(genome, system=system, user=user)
completions.append(completion)
last_raw = completion.text
strategy, err = _try_parse(completion.text)
if strategy is not None:
return HypothesisProposal(
strategy=strategy,
raw_text=completion.text,
completions=completions,
parse_error=None,
n_attempts=len(completions),
)
assert err is not None
errors.append(err)
chained = " | ".join(
f"attempt {i + 1}: {e}" for i, e in enumerate(errors)
)
return HypothesisProposal(
strategy=None,
raw_text=last_raw,
completions=completions,
parse_error=chained,
n_attempts=len(completions),
)
+17 -15
View File
@@ -99,21 +99,23 @@ def run_phase1(
continue # elite gia' valutata in generazione precedente
repo.save_genome(run_id=run_id, generation_idx=gen, genome=genome)
proposal = hypothesis_agent.propose(genome, market)
cost_record = cost_tracker.record(
input_tokens=proposal.completion.input_tokens,
output_tokens=proposal.completion.output_tokens,
tier=proposal.completion.tier,
run_id=run_id,
agent_id=genome.id,
)
repo.save_cost_record(
run_id=run_id,
agent_id=genome.id,
tier=cost_record.tier.value,
input_tokens=cost_record.input_tokens,
output_tokens=cost_record.output_tokens,
cost_usd=cost_record.cost_usd,
)
# Registra costo per OGNI completion (incluse retry).
for completion in proposal.completions:
cost_record = cost_tracker.record(
input_tokens=completion.input_tokens,
output_tokens=completion.output_tokens,
tier=completion.tier,
run_id=run_id,
agent_id=genome.id,
)
repo.save_cost_record(
run_id=run_id,
agent_id=genome.id,
tier=cost_record.tier.value,
input_tokens=cost_record.input_tokens,
output_tokens=cost_record.output_tokens,
cost_usd=cost_record.cost_usd,
)
if proposal.strategy is None:
repo.save_evaluation(