feat(llm): full multi-tier S/A/B/C/D with routing + pricing

Estende ModelTier a 5 livelli (S/A/B/C/D) con routing automatico:
S/A/B via Anthropic SDK, C/D via OpenRouter (OpenAI SDK). Aggiunge
prezzi per tier S (Opus), A (Sonnet placeholder) e D (Llama). Refactor
LLMClient.complete con dispatch tramite tier_models map e helper
_call_anthropic / _call_openrouter. Settings esposte per tutti e 5
i modelli env-configurabili.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-10 09:18:57 +02:00
parent 7482600146
commit 33d8e275e7
10 changed files with 241 additions and 36 deletions
+16 -4
View File
@@ -45,26 +45,38 @@ def test_settings_requires_tokens(monkeypatch: pytest.MonkeyPatch) -> None:
def test_settings_loads_llm_model_overrides(monkeypatch: pytest.MonkeyPatch) -> None:
monkeypatch.setenv("CERBERO_TESTNET_TOKEN", "tok-test")
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
monkeypatch.setenv("LLM_MODEL_TIER_C", "deepseek/deepseek-chat")
monkeypatch.setenv("LLM_MODEL_TIER_S", "claude-mega-x")
monkeypatch.setenv("LLM_MODEL_TIER_A", "claude-premium-y")
monkeypatch.setenv("LLM_MODEL_TIER_B", "claude-opus-4-7")
monkeypatch.setenv("LLM_MODEL_TIER_C", "deepseek/deepseek-chat")
monkeypatch.setenv("LLM_MODEL_TIER_D", "mistralai/mistral-7b")
monkeypatch.setenv("OPENROUTER_BASE_URL", "https://example.com/api/v1")
s = Settings(_env_file=None) # type: ignore[call-arg]
assert s.llm_model_tier_c == "deepseek/deepseek-chat"
assert s.llm_model_tier_s == "claude-mega-x"
assert s.llm_model_tier_a == "claude-premium-y"
assert s.llm_model_tier_b == "claude-opus-4-7"
assert s.llm_model_tier_c == "deepseek/deepseek-chat"
assert s.llm_model_tier_d == "mistralai/mistral-7b"
assert s.openrouter_base_url == "https://example.com/api/v1"
def test_settings_llm_model_defaults(monkeypatch: pytest.MonkeyPatch) -> None:
monkeypatch.setenv("CERBERO_TESTNET_TOKEN", "tok-test")
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
monkeypatch.delenv("LLM_MODEL_TIER_C", raising=False)
monkeypatch.delenv("LLM_MODEL_TIER_S", raising=False)
monkeypatch.delenv("LLM_MODEL_TIER_A", raising=False)
monkeypatch.delenv("LLM_MODEL_TIER_B", raising=False)
monkeypatch.delenv("LLM_MODEL_TIER_C", raising=False)
monkeypatch.delenv("LLM_MODEL_TIER_D", raising=False)
monkeypatch.delenv("OPENROUTER_BASE_URL", raising=False)
s = Settings(_env_file=None) # type: ignore[call-arg]
assert s.llm_model_tier_c == "qwen/qwen-2.5-72b-instruct"
assert s.llm_model_tier_s == "claude-opus-4-7"
assert s.llm_model_tier_a == "claude-sonnet-4-6"
assert s.llm_model_tier_b == "claude-sonnet-4-6"
assert s.llm_model_tier_c == "qwen/qwen-2.5-72b-instruct"
assert s.llm_model_tier_d == "meta-llama/llama-3.3-70b-instruct"
assert s.openrouter_base_url == "https://openrouter.ai/api/v1"
+31
View File
@@ -30,3 +30,34 @@ def test_tracker_per_tier_breakdown():
summary = t.summary()
assert "C" in summary["by_tier"]
assert "B" in summary["by_tier"]
def test_estimate_cost_tier_s():
cost = estimate_cost(input_tokens=1_000_000, output_tokens=1_000_000, tier=ModelTier.S)
assert cost == 15.00 + 75.00
def test_estimate_cost_tier_a():
cost = estimate_cost(input_tokens=1_000_000, output_tokens=1_000_000, tier=ModelTier.A)
assert cost == 3.00 + 15.00
def test_estimate_cost_tier_d():
cost = estimate_cost(input_tokens=1_000_000, output_tokens=1_000_000, tier=ModelTier.D)
assert cost == 0.10 + 0.30
def test_tracker_summary_contains_all_five_tiers():
t = CostTracker()
for tier in (ModelTier.S, ModelTier.A, ModelTier.B, ModelTier.C, ModelTier.D):
t.record(
input_tokens=1_000,
output_tokens=1_000,
tier=tier,
run_id="r",
agent_id=f"a-{tier.value}",
)
summary = t.summary()
for tier_letter in ("S", "A", "B", "C", "D"):
assert tier_letter in summary["by_tier"]
assert summary["by_tier"][tier_letter]["calls"] == 1
+19
View File
@@ -48,3 +48,22 @@ def test_genome_id_is_deterministic_on_content():
top_p=0.9, model_tier=ModelTier.C, lookback_window=100, cognitive_style="x",
)
assert g1.id == g2.id
def test_genome_all_tiers_serde_roundtrip():
"""Tutti i 5 tier (S, A, B, C, D) sopravvivono a to_dict/from_dict."""
for tier in (ModelTier.S, ModelTier.A, ModelTier.B, ModelTier.C, ModelTier.D):
g = HypothesisAgentGenome(
system_prompt="prompt",
feature_access=["close"],
temperature=0.7,
top_p=0.9,
model_tier=tier,
lookback_window=128,
cognitive_style="generic",
)
payload = g.to_dict()
assert payload["model_tier"] == tier.value
g2 = HypothesisAgentGenome.from_dict(payload)
assert g2.model_tier == tier
assert g2.id == g.id
+88
View File
@@ -121,6 +121,94 @@ def test_completion_uses_custom_model_tier_b(mocker):
assert out.model == "claude-opus-4-7"
def test_completion_tier_s_uses_anthropic_with_opus(mocker):
fake_anthropic = mocker.MagicMock()
fake_msg = mocker.MagicMock()
fake_msg.content = [mocker.MagicMock(text="(strategy s)")]
fake_msg.usage = mocker.MagicMock(input_tokens=50, output_tokens=100)
fake_anthropic.messages.create.return_value = fake_msg
mocker.patch("multi_swarm.llm.client.Anthropic", return_value=fake_anthropic)
client = LLMClient(openrouter_api_key="or-x", anthropic_api_key="an-x")
g = make_genome(ModelTier.S)
out = client.complete(g, system="sys", user="usr")
fake_anthropic.messages.create.assert_called_once()
call_kwargs = fake_anthropic.messages.create.call_args.kwargs
assert call_kwargs["model"] == "claude-opus-4-7"
assert out.tier == ModelTier.S
assert out.model == "claude-opus-4-7"
def test_completion_tier_a_uses_anthropic_with_sonnet(mocker):
fake_anthropic = mocker.MagicMock()
fake_msg = mocker.MagicMock()
fake_msg.content = [mocker.MagicMock(text="(strategy a)")]
fake_msg.usage = mocker.MagicMock(input_tokens=40, output_tokens=80)
fake_anthropic.messages.create.return_value = fake_msg
mocker.patch("multi_swarm.llm.client.Anthropic", return_value=fake_anthropic)
client = LLMClient(openrouter_api_key="or-x", anthropic_api_key="an-x")
g = make_genome(ModelTier.A)
out = client.complete(g, system="sys", user="usr")
fake_anthropic.messages.create.assert_called_once()
call_kwargs = fake_anthropic.messages.create.call_args.kwargs
assert call_kwargs["model"] == "claude-sonnet-4-6"
assert out.tier == ModelTier.A
assert out.model == "claude-sonnet-4-6"
def test_completion_tier_d_uses_openrouter_with_llama(mocker):
fake_openai = mocker.MagicMock()
fake_response = mocker.MagicMock()
fake_response.choices = [
mocker.MagicMock(message=mocker.MagicMock(content="(strategy d)"))
]
fake_response.usage = mocker.MagicMock(prompt_tokens=30, completion_tokens=70)
fake_openai.chat.completions.create.return_value = fake_response
mocker.patch("multi_swarm.llm.client.OpenAI", return_value=fake_openai)
client = LLMClient(openrouter_api_key="or-x", anthropic_api_key=None)
g = make_genome(ModelTier.D)
out = client.complete(g, system="sys", user="usr")
fake_openai.chat.completions.create.assert_called_once()
call_kwargs = fake_openai.chat.completions.create.call_args.kwargs
assert call_kwargs["model"] == "meta-llama/llama-3.3-70b-instruct"
assert out.tier == ModelTier.D
assert out.model == "meta-llama/llama-3.3-70b-instruct"
def test_completion_uses_custom_model_tier_s(mocker):
fake_anthropic = mocker.MagicMock()
fake_msg = mocker.MagicMock()
fake_msg.content = [mocker.MagicMock(text="(strategy custom-s)")]
fake_msg.usage = mocker.MagicMock(input_tokens=10, output_tokens=20)
fake_anthropic.messages.create.return_value = fake_msg
mocker.patch("multi_swarm.llm.client.Anthropic", return_value=fake_anthropic)
client = LLMClient(
openrouter_api_key="or-x",
anthropic_api_key="an-x",
model_tier_s="claude-future-mega",
)
g = make_genome(ModelTier.S)
out = client.complete(g, system="sys", user="usr")
call_kwargs = fake_anthropic.messages.create.call_args.kwargs
assert call_kwargs["model"] == "claude-future-mega"
assert out.model == "claude-future-mega"
def test_completion_tier_s_without_anthropic_key_raises(mocker):
mocker.patch("multi_swarm.llm.client.OpenAI", return_value=mocker.MagicMock())
client = LLMClient(openrouter_api_key="or-x", anthropic_api_key=None)
g = make_genome(ModelTier.S)
with pytest.raises(RuntimeError, match="tier S"):
client.complete(g, system="sys", user="usr")
@pytest.mark.slow
def test_completion_succeeds_after_one_retry(mocker):
"""Dopo 1 fallimento transient, il retry riesce al 2 tentativo."""