d4fcb42fc5
HypothesisAgent.propose ora riprova una volta in caso di parse o validation error: il prompt user del retry include l'output precedente (troncato a 800 char) e il messaggio di errore, così l'LLM può auto-correggersi. Configurabile via max_retries (default 1). Cambia il modello dati di HypothesisProposal: completion (singolare) diventa completions: list[CompletionResult] con n_attempts. L'orchestrator itera su completions per registrare il costo di ogni chiamata LLM, incluse le retry. Phase 1 v4 mostrava 64% di parse failure recuperabili: il retry punta a tagliare quel tasso senza inflazionare i token oltre 2x worst-case. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
217 lines
7.1 KiB
Python
217 lines
7.1 KiB
Python
import json
|
|
|
|
from multi_swarm.agents.hypothesis import HypothesisAgent, MarketSummary
|
|
from multi_swarm.genome.hypothesis import HypothesisAgentGenome, ModelTier
|
|
from multi_swarm.llm.client import CompletionResult
|
|
|
|
|
|
def make_summary() -> MarketSummary:
|
|
return MarketSummary(
|
|
symbol="BTC/USDT",
|
|
timeframe="1h",
|
|
n_bars=1000,
|
|
return_mean=0.0001,
|
|
return_std=0.01,
|
|
skew=0.1,
|
|
kurtosis=3.5,
|
|
volatility_regime="high",
|
|
)
|
|
|
|
|
|
VALID_STRATEGY_JSON = json.dumps(
|
|
{
|
|
"rules": [
|
|
{
|
|
"condition": {
|
|
"op": "gt",
|
|
"args": [
|
|
{"kind": "indicator", "name": "rsi", "params": [14]},
|
|
{"kind": "literal", "value": 70.0},
|
|
],
|
|
},
|
|
"action": "entry-short",
|
|
}
|
|
]
|
|
}
|
|
)
|
|
|
|
|
|
def make_genome() -> HypothesisAgentGenome:
|
|
return HypothesisAgentGenome(
|
|
system_prompt="Pensa come un fisico.",
|
|
feature_access=["close"],
|
|
temperature=0.9,
|
|
top_p=0.95,
|
|
model_tier=ModelTier.C,
|
|
lookback_window=200,
|
|
cognitive_style="physicist",
|
|
)
|
|
|
|
|
|
def test_hypothesis_agent_calls_llm_and_parses(mocker): # type: ignore[no-untyped-def]
|
|
fake_llm = mocker.MagicMock()
|
|
fake_llm.complete.return_value = CompletionResult(
|
|
text=VALID_STRATEGY_JSON,
|
|
input_tokens=200,
|
|
output_tokens=80,
|
|
tier=ModelTier.C,
|
|
model="qwen",
|
|
)
|
|
agent = HypothesisAgent(llm=fake_llm)
|
|
proposal = agent.propose(make_genome(), make_summary())
|
|
assert proposal.strategy is not None
|
|
assert proposal.completions[0].input_tokens == 200
|
|
assert proposal.n_attempts == 1
|
|
fake_llm.complete.assert_called_once()
|
|
|
|
|
|
def test_hypothesis_agent_returns_none_on_parse_error(mocker): # type: ignore[no-untyped-def]
|
|
fake_llm = mocker.MagicMock()
|
|
fake_llm.complete.return_value = CompletionResult(
|
|
text="this is not JSON",
|
|
input_tokens=200,
|
|
output_tokens=80,
|
|
tier=ModelTier.C,
|
|
model="qwen",
|
|
)
|
|
agent = HypothesisAgent(llm=fake_llm, max_retries=0)
|
|
proposal = agent.propose(make_genome(), make_summary())
|
|
assert proposal.strategy is None
|
|
assert proposal.parse_error is not None
|
|
assert proposal.n_attempts == 1
|
|
assert fake_llm.complete.call_count == 1
|
|
|
|
|
|
def test_hypothesis_agent_extracts_json_from_markdown_fence(mocker): # type: ignore[no-untyped-def]
|
|
fenced = (
|
|
"Ecco la strategia:\n```json\n"
|
|
+ VALID_STRATEGY_JSON
|
|
+ "\n```\nFatta."
|
|
)
|
|
fake_llm = mocker.MagicMock()
|
|
fake_llm.complete.return_value = CompletionResult(
|
|
text=fenced,
|
|
input_tokens=200,
|
|
output_tokens=80,
|
|
tier=ModelTier.C,
|
|
model="qwen",
|
|
)
|
|
agent = HypothesisAgent(llm=fake_llm)
|
|
proposal = agent.propose(make_genome(), make_summary())
|
|
assert proposal.strategy is not None
|
|
|
|
|
|
def test_hypothesis_agent_returns_error_on_invalid_strategy(mocker): # type: ignore[no-untyped-def]
|
|
bad = json.dumps(
|
|
{
|
|
"rules": [
|
|
{
|
|
"condition": {
|
|
"op": "gt",
|
|
"args": [
|
|
{"kind": "indicator", "name": "wibble", "params": [14]},
|
|
{"kind": "literal", "value": 70.0},
|
|
],
|
|
},
|
|
"action": "entry-short",
|
|
}
|
|
]
|
|
}
|
|
)
|
|
fake_llm = mocker.MagicMock()
|
|
fake_llm.complete.return_value = CompletionResult(
|
|
text=bad,
|
|
input_tokens=200,
|
|
output_tokens=80,
|
|
tier=ModelTier.C,
|
|
model="qwen",
|
|
)
|
|
agent = HypothesisAgent(llm=fake_llm, max_retries=0)
|
|
proposal = agent.propose(make_genome(), make_summary())
|
|
assert proposal.strategy is None
|
|
assert proposal.parse_error is not None
|
|
assert "wibble" in proposal.parse_error or "unknown" in proposal.parse_error
|
|
|
|
|
|
def test_hypothesis_agent_retries_on_parse_error_and_succeeds(mocker): # type: ignore[no-untyped-def]
|
|
"""Primo output malformato → secondo output valido → strategia accettata."""
|
|
fake_llm = mocker.MagicMock()
|
|
fake_llm.complete.side_effect = [
|
|
CompletionResult(
|
|
text="this is not JSON at all",
|
|
input_tokens=200,
|
|
output_tokens=80,
|
|
tier=ModelTier.C,
|
|
model="qwen",
|
|
),
|
|
CompletionResult(
|
|
text="```json\n" + VALID_STRATEGY_JSON + "\n```",
|
|
input_tokens=300,
|
|
output_tokens=120,
|
|
tier=ModelTier.C,
|
|
model="qwen",
|
|
),
|
|
]
|
|
agent = HypothesisAgent(llm=fake_llm, max_retries=1)
|
|
proposal = agent.propose(make_genome(), make_summary())
|
|
assert proposal.strategy is not None
|
|
assert proposal.n_attempts == 2
|
|
assert len(proposal.completions) == 2
|
|
assert proposal.completions[0].input_tokens == 200
|
|
assert proposal.completions[1].input_tokens == 300
|
|
assert fake_llm.complete.call_count == 2
|
|
# Il secondo prompt user deve contenere il marker corrective.
|
|
second_call_kwargs = fake_llm.complete.call_args_list[1].kwargs
|
|
assert "TENTATIVO PRECEDENTE FALLITO" in second_call_kwargs["user"]
|
|
assert "this is not JSON at all" in second_call_kwargs["user"]
|
|
|
|
|
|
def test_hypothesis_agent_gives_up_after_max_retries(mocker): # type: ignore[no-untyped-def]
|
|
"""Entrambi i tentativi falliscono → strategy None, errori concatenati."""
|
|
fake_llm = mocker.MagicMock()
|
|
fake_llm.complete.side_effect = [
|
|
CompletionResult(
|
|
text="garbage attempt 1",
|
|
input_tokens=200,
|
|
output_tokens=50,
|
|
tier=ModelTier.C,
|
|
model="qwen",
|
|
),
|
|
CompletionResult(
|
|
text="garbage attempt 2",
|
|
input_tokens=250,
|
|
output_tokens=60,
|
|
tier=ModelTier.C,
|
|
model="qwen",
|
|
),
|
|
]
|
|
agent = HypothesisAgent(llm=fake_llm, max_retries=1)
|
|
proposal = agent.propose(make_genome(), make_summary())
|
|
assert proposal.strategy is None
|
|
assert proposal.n_attempts == 2
|
|
assert len(proposal.completions) == 2
|
|
assert fake_llm.complete.call_count == 2
|
|
assert proposal.parse_error is not None
|
|
assert "attempt 1" in proposal.parse_error
|
|
assert "attempt 2" in proposal.parse_error
|
|
# raw_text deve riflettere l'ULTIMO output (non il primo).
|
|
assert proposal.raw_text == "garbage attempt 2"
|
|
|
|
|
|
def test_hypothesis_agent_no_retry_when_first_succeeds(mocker): # type: ignore[no-untyped-def]
|
|
"""Primo tentativo OK → nessun retry, anche con max_retries=1 di default."""
|
|
fake_llm = mocker.MagicMock()
|
|
fake_llm.complete.return_value = CompletionResult(
|
|
text=VALID_STRATEGY_JSON,
|
|
input_tokens=200,
|
|
output_tokens=80,
|
|
tier=ModelTier.C,
|
|
model="qwen",
|
|
)
|
|
agent = HypothesisAgent(llm=fake_llm) # default max_retries=1
|
|
proposal = agent.propose(make_genome(), make_summary())
|
|
assert proposal.strategy is not None
|
|
assert proposal.n_attempts == 1
|
|
assert len(proposal.completions) == 1
|
|
assert fake_llm.complete.call_count == 1
|