feat(llm): full multi-tier S/A/B/C/D with routing + pricing
Estende ModelTier a 5 livelli (S/A/B/C/D) con routing automatico: S/A/B via Anthropic SDK, C/D via OpenRouter (OpenAI SDK). Aggiunge prezzi per tier S (Opus), A (Sonnet placeholder) e D (Llama). Refactor LLMClient.complete con dispatch tramite tier_models map e helper _call_anthropic / _call_openrouter. Settings esposte per tutti e 5 i modelli env-configurabili. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
+4
-1
@@ -9,8 +9,11 @@ OPENROUTER_API_KEY=
|
|||||||
ANTHROPIC_API_KEY=
|
ANTHROPIC_API_KEY=
|
||||||
|
|
||||||
# LLM models (override Phase 1 defaults if needed)
|
# LLM models (override Phase 1 defaults if needed)
|
||||||
LLM_MODEL_TIER_C=qwen/qwen-2.5-72b-instruct
|
LLM_MODEL_TIER_S=claude-opus-4-7
|
||||||
|
LLM_MODEL_TIER_A=claude-sonnet-4-6
|
||||||
LLM_MODEL_TIER_B=claude-sonnet-4-6
|
LLM_MODEL_TIER_B=claude-sonnet-4-6
|
||||||
|
LLM_MODEL_TIER_C=qwen/qwen-2.5-72b-instruct
|
||||||
|
LLM_MODEL_TIER_D=meta-llama/llama-3.3-70b-instruct
|
||||||
OPENROUTER_BASE_URL=https://openrouter.ai/api/v1
|
OPENROUTER_BASE_URL=https://openrouter.ai/api/v1
|
||||||
|
|
||||||
# Run config
|
# Run config
|
||||||
|
|||||||
@@ -48,8 +48,11 @@ def main() -> None:
|
|||||||
settings.anthropic_api_key.get_secret_value()
|
settings.anthropic_api_key.get_secret_value()
|
||||||
if settings.anthropic_api_key else None
|
if settings.anthropic_api_key else None
|
||||||
),
|
),
|
||||||
model_tier_c=settings.llm_model_tier_c,
|
model_tier_s=settings.llm_model_tier_s,
|
||||||
|
model_tier_a=settings.llm_model_tier_a,
|
||||||
model_tier_b=settings.llm_model_tier_b,
|
model_tier_b=settings.llm_model_tier_b,
|
||||||
|
model_tier_c=settings.llm_model_tier_c,
|
||||||
|
model_tier_d=settings.llm_model_tier_d,
|
||||||
openrouter_base_url=settings.openrouter_base_url,
|
openrouter_base_url=settings.openrouter_base_url,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -26,8 +26,11 @@ class Settings(BaseSettings):
|
|||||||
openrouter_api_key: SecretStr
|
openrouter_api_key: SecretStr
|
||||||
anthropic_api_key: SecretStr | None = None
|
anthropic_api_key: SecretStr | None = None
|
||||||
|
|
||||||
llm_model_tier_c: str = "qwen/qwen-2.5-72b-instruct"
|
llm_model_tier_s: str = "claude-opus-4-7"
|
||||||
|
llm_model_tier_a: str = "claude-sonnet-4-6"
|
||||||
llm_model_tier_b: str = "claude-sonnet-4-6"
|
llm_model_tier_b: str = "claude-sonnet-4-6"
|
||||||
|
llm_model_tier_c: str = "qwen/qwen-2.5-72b-instruct"
|
||||||
|
llm_model_tier_d: str = "meta-llama/llama-3.3-70b-instruct"
|
||||||
openrouter_base_url: str = "https://openrouter.ai/api/v1"
|
openrouter_base_url: str = "https://openrouter.ai/api/v1"
|
||||||
|
|
||||||
run_name: str = "phase1-spike-001"
|
run_name: str = "phase1-spike-001"
|
||||||
|
|||||||
@@ -8,8 +8,11 @@ from typing import Any
|
|||||||
|
|
||||||
|
|
||||||
class ModelTier(StrEnum):
|
class ModelTier(StrEnum):
|
||||||
|
S = "S" # top-tier reasoning (Opus / equivalent) via Anthropic
|
||||||
|
A = "A" # premium override via Anthropic
|
||||||
B = "B" # Sonnet 4.6 via Anthropic
|
B = "B" # Sonnet 4.6 via Anthropic
|
||||||
C = "C" # Qwen 2.5 72B via OpenRouter
|
C = "C" # Qwen 2.5 72B via OpenRouter
|
||||||
|
D = "D" # ultra-economic (Llama / cheap models) via OpenRouter
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
|||||||
@@ -16,8 +16,11 @@ from tenacity import (
|
|||||||
from ..genome.hypothesis import HypothesisAgentGenome, ModelTier
|
from ..genome.hypothesis import HypothesisAgentGenome, ModelTier
|
||||||
|
|
||||||
# Modelli configurati per Phase 1
|
# Modelli configurati per Phase 1
|
||||||
MODEL_TIER_C = "qwen/qwen-2.5-72b-instruct" # via OpenRouter
|
MODEL_TIER_S = "claude-opus-4-7" # via Anthropic
|
||||||
|
MODEL_TIER_A = "claude-sonnet-4-6" # via Anthropic (premium override)
|
||||||
MODEL_TIER_B = "claude-sonnet-4-6" # via Anthropic
|
MODEL_TIER_B = "claude-sonnet-4-6" # via Anthropic
|
||||||
|
MODEL_TIER_C = "qwen/qwen-2.5-72b-instruct" # via OpenRouter
|
||||||
|
MODEL_TIER_D = "meta-llama/llama-3.3-70b-instruct" # via OpenRouter
|
||||||
OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1"
|
OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1"
|
||||||
|
|
||||||
# Errori transient: retry. RateLimit/Auth/InvalidRequest: NO retry.
|
# Errori transient: retry. RateLimit/Auth/InvalidRequest: NO retry.
|
||||||
@@ -41,17 +44,33 @@ class CompletionResult:
|
|||||||
|
|
||||||
|
|
||||||
class LLMClient:
|
class LLMClient:
|
||||||
|
_ANTHROPIC_TIERS: tuple[ModelTier, ...] = (ModelTier.S, ModelTier.A, ModelTier.B)
|
||||||
|
_OPENROUTER_TIERS: tuple[ModelTier, ...] = (ModelTier.C, ModelTier.D)
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
openrouter_api_key: str,
|
openrouter_api_key: str,
|
||||||
anthropic_api_key: str | None = None,
|
anthropic_api_key: str | None = None,
|
||||||
model_tier_c: str = MODEL_TIER_C,
|
model_tier_s: str = MODEL_TIER_S,
|
||||||
|
model_tier_a: str = MODEL_TIER_A,
|
||||||
model_tier_b: str = MODEL_TIER_B,
|
model_tier_b: str = MODEL_TIER_B,
|
||||||
|
model_tier_c: str = MODEL_TIER_C,
|
||||||
|
model_tier_d: str = MODEL_TIER_D,
|
||||||
openrouter_base_url: str = OPENROUTER_BASE_URL,
|
openrouter_base_url: str = OPENROUTER_BASE_URL,
|
||||||
) -> None:
|
) -> None:
|
||||||
self.model_tier_c = model_tier_c
|
self.model_tier_s = model_tier_s
|
||||||
|
self.model_tier_a = model_tier_a
|
||||||
self.model_tier_b = model_tier_b
|
self.model_tier_b = model_tier_b
|
||||||
|
self.model_tier_c = model_tier_c
|
||||||
|
self.model_tier_d = model_tier_d
|
||||||
self.openrouter_base_url = openrouter_base_url
|
self.openrouter_base_url = openrouter_base_url
|
||||||
|
self._tier_models: dict[ModelTier, str] = {
|
||||||
|
ModelTier.S: model_tier_s,
|
||||||
|
ModelTier.A: model_tier_a,
|
||||||
|
ModelTier.B: model_tier_b,
|
||||||
|
ModelTier.C: model_tier_c,
|
||||||
|
ModelTier.D: model_tier_d,
|
||||||
|
}
|
||||||
self._openrouter = OpenAI(api_key=openrouter_api_key, base_url=openrouter_base_url)
|
self._openrouter = OpenAI(api_key=openrouter_api_key, base_url=openrouter_base_url)
|
||||||
self._anthropic = Anthropic(api_key=anthropic_api_key) if anthropic_api_key else None
|
self._anthropic = Anthropic(api_key=anthropic_api_key) if anthropic_api_key else None
|
||||||
|
|
||||||
@@ -68,9 +87,21 @@ class LLMClient:
|
|||||||
user: str,
|
user: str,
|
||||||
max_tokens: int = 2000,
|
max_tokens: int = 2000,
|
||||||
) -> CompletionResult:
|
) -> CompletionResult:
|
||||||
if genome.model_tier == ModelTier.C:
|
model = self._tier_models[genome.model_tier]
|
||||||
|
if genome.model_tier in self._ANTHROPIC_TIERS:
|
||||||
|
return self._call_anthropic(genome, system, user, max_tokens, model)
|
||||||
|
return self._call_openrouter(genome, system, user, max_tokens, model)
|
||||||
|
|
||||||
|
def _call_openrouter(
|
||||||
|
self,
|
||||||
|
genome: HypothesisAgentGenome,
|
||||||
|
system: str,
|
||||||
|
user: str,
|
||||||
|
max_tokens: int,
|
||||||
|
model: str,
|
||||||
|
) -> CompletionResult:
|
||||||
resp = self._openrouter.chat.completions.create(
|
resp = self._openrouter.chat.completions.create(
|
||||||
model=self.model_tier_c,
|
model=model,
|
||||||
messages=[
|
messages=[
|
||||||
{"role": "system", "content": system},
|
{"role": "system", "content": system},
|
||||||
{"role": "user", "content": user},
|
{"role": "user", "content": user},
|
||||||
@@ -85,15 +116,24 @@ class LLMClient:
|
|||||||
text=resp.choices[0].message.content or "",
|
text=resp.choices[0].message.content or "",
|
||||||
input_tokens=usage.prompt_tokens,
|
input_tokens=usage.prompt_tokens,
|
||||||
output_tokens=usage.completion_tokens,
|
output_tokens=usage.completion_tokens,
|
||||||
tier=ModelTier.C,
|
tier=genome.model_tier,
|
||||||
model=self.model_tier_c,
|
model=model,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def _call_anthropic(
|
||||||
|
self,
|
||||||
|
genome: HypothesisAgentGenome,
|
||||||
|
system: str,
|
||||||
|
user: str,
|
||||||
|
max_tokens: int,
|
||||||
|
model: str,
|
||||||
|
) -> CompletionResult:
|
||||||
if self._anthropic is None:
|
if self._anthropic is None:
|
||||||
raise RuntimeError("ANTHROPIC_API_KEY required for tier B genomes")
|
raise RuntimeError(
|
||||||
|
f"ANTHROPIC_API_KEY required for tier {genome.model_tier.value} genomes"
|
||||||
|
)
|
||||||
msg = self._anthropic.messages.create(
|
msg = self._anthropic.messages.create(
|
||||||
model=self.model_tier_b,
|
model=model,
|
||||||
system=system,
|
system=system,
|
||||||
messages=[{"role": "user", "content": user}],
|
messages=[{"role": "user", "content": user}],
|
||||||
temperature=genome.temperature,
|
temperature=genome.temperature,
|
||||||
@@ -105,6 +145,6 @@ class LLMClient:
|
|||||||
text=text,
|
text=text,
|
||||||
input_tokens=msg.usage.input_tokens,
|
input_tokens=msg.usage.input_tokens,
|
||||||
output_tokens=msg.usage.output_tokens,
|
output_tokens=msg.usage.output_tokens,
|
||||||
tier=ModelTier.B,
|
tier=genome.model_tier,
|
||||||
model=self.model_tier_b,
|
model=model,
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -8,8 +8,11 @@ from typing import Any
|
|||||||
from ..genome.hypothesis import ModelTier
|
from ..genome.hypothesis import ModelTier
|
||||||
|
|
||||||
PRICE_PER_M_TOKENS: dict[ModelTier, dict[str, float]] = {
|
PRICE_PER_M_TOKENS: dict[ModelTier, dict[str, float]] = {
|
||||||
ModelTier.C: {"input": 0.40, "output": 0.40},
|
ModelTier.S: {"input": 15.00, "output": 75.00},
|
||||||
|
ModelTier.A: {"input": 3.00, "output": 15.00},
|
||||||
ModelTier.B: {"input": 3.00, "output": 15.00},
|
ModelTier.B: {"input": 3.00, "output": 15.00},
|
||||||
|
ModelTier.C: {"input": 0.40, "output": 0.40},
|
||||||
|
ModelTier.D: {"input": 0.10, "output": 0.30},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -45,26 +45,38 @@ def test_settings_requires_tokens(monkeypatch: pytest.MonkeyPatch) -> None:
|
|||||||
def test_settings_loads_llm_model_overrides(monkeypatch: pytest.MonkeyPatch) -> None:
|
def test_settings_loads_llm_model_overrides(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||||
monkeypatch.setenv("CERBERO_TESTNET_TOKEN", "tok-test")
|
monkeypatch.setenv("CERBERO_TESTNET_TOKEN", "tok-test")
|
||||||
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
||||||
monkeypatch.setenv("LLM_MODEL_TIER_C", "deepseek/deepseek-chat")
|
monkeypatch.setenv("LLM_MODEL_TIER_S", "claude-mega-x")
|
||||||
|
monkeypatch.setenv("LLM_MODEL_TIER_A", "claude-premium-y")
|
||||||
monkeypatch.setenv("LLM_MODEL_TIER_B", "claude-opus-4-7")
|
monkeypatch.setenv("LLM_MODEL_TIER_B", "claude-opus-4-7")
|
||||||
|
monkeypatch.setenv("LLM_MODEL_TIER_C", "deepseek/deepseek-chat")
|
||||||
|
monkeypatch.setenv("LLM_MODEL_TIER_D", "mistralai/mistral-7b")
|
||||||
monkeypatch.setenv("OPENROUTER_BASE_URL", "https://example.com/api/v1")
|
monkeypatch.setenv("OPENROUTER_BASE_URL", "https://example.com/api/v1")
|
||||||
|
|
||||||
s = Settings(_env_file=None) # type: ignore[call-arg]
|
s = Settings(_env_file=None) # type: ignore[call-arg]
|
||||||
|
|
||||||
assert s.llm_model_tier_c == "deepseek/deepseek-chat"
|
assert s.llm_model_tier_s == "claude-mega-x"
|
||||||
|
assert s.llm_model_tier_a == "claude-premium-y"
|
||||||
assert s.llm_model_tier_b == "claude-opus-4-7"
|
assert s.llm_model_tier_b == "claude-opus-4-7"
|
||||||
|
assert s.llm_model_tier_c == "deepseek/deepseek-chat"
|
||||||
|
assert s.llm_model_tier_d == "mistralai/mistral-7b"
|
||||||
assert s.openrouter_base_url == "https://example.com/api/v1"
|
assert s.openrouter_base_url == "https://example.com/api/v1"
|
||||||
|
|
||||||
|
|
||||||
def test_settings_llm_model_defaults(monkeypatch: pytest.MonkeyPatch) -> None:
|
def test_settings_llm_model_defaults(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||||
monkeypatch.setenv("CERBERO_TESTNET_TOKEN", "tok-test")
|
monkeypatch.setenv("CERBERO_TESTNET_TOKEN", "tok-test")
|
||||||
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
||||||
monkeypatch.delenv("LLM_MODEL_TIER_C", raising=False)
|
monkeypatch.delenv("LLM_MODEL_TIER_S", raising=False)
|
||||||
|
monkeypatch.delenv("LLM_MODEL_TIER_A", raising=False)
|
||||||
monkeypatch.delenv("LLM_MODEL_TIER_B", raising=False)
|
monkeypatch.delenv("LLM_MODEL_TIER_B", raising=False)
|
||||||
|
monkeypatch.delenv("LLM_MODEL_TIER_C", raising=False)
|
||||||
|
monkeypatch.delenv("LLM_MODEL_TIER_D", raising=False)
|
||||||
monkeypatch.delenv("OPENROUTER_BASE_URL", raising=False)
|
monkeypatch.delenv("OPENROUTER_BASE_URL", raising=False)
|
||||||
|
|
||||||
s = Settings(_env_file=None) # type: ignore[call-arg]
|
s = Settings(_env_file=None) # type: ignore[call-arg]
|
||||||
|
|
||||||
assert s.llm_model_tier_c == "qwen/qwen-2.5-72b-instruct"
|
assert s.llm_model_tier_s == "claude-opus-4-7"
|
||||||
|
assert s.llm_model_tier_a == "claude-sonnet-4-6"
|
||||||
assert s.llm_model_tier_b == "claude-sonnet-4-6"
|
assert s.llm_model_tier_b == "claude-sonnet-4-6"
|
||||||
|
assert s.llm_model_tier_c == "qwen/qwen-2.5-72b-instruct"
|
||||||
|
assert s.llm_model_tier_d == "meta-llama/llama-3.3-70b-instruct"
|
||||||
assert s.openrouter_base_url == "https://openrouter.ai/api/v1"
|
assert s.openrouter_base_url == "https://openrouter.ai/api/v1"
|
||||||
|
|||||||
@@ -30,3 +30,34 @@ def test_tracker_per_tier_breakdown():
|
|||||||
summary = t.summary()
|
summary = t.summary()
|
||||||
assert "C" in summary["by_tier"]
|
assert "C" in summary["by_tier"]
|
||||||
assert "B" in summary["by_tier"]
|
assert "B" in summary["by_tier"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_estimate_cost_tier_s():
|
||||||
|
cost = estimate_cost(input_tokens=1_000_000, output_tokens=1_000_000, tier=ModelTier.S)
|
||||||
|
assert cost == 15.00 + 75.00
|
||||||
|
|
||||||
|
|
||||||
|
def test_estimate_cost_tier_a():
|
||||||
|
cost = estimate_cost(input_tokens=1_000_000, output_tokens=1_000_000, tier=ModelTier.A)
|
||||||
|
assert cost == 3.00 + 15.00
|
||||||
|
|
||||||
|
|
||||||
|
def test_estimate_cost_tier_d():
|
||||||
|
cost = estimate_cost(input_tokens=1_000_000, output_tokens=1_000_000, tier=ModelTier.D)
|
||||||
|
assert cost == 0.10 + 0.30
|
||||||
|
|
||||||
|
|
||||||
|
def test_tracker_summary_contains_all_five_tiers():
|
||||||
|
t = CostTracker()
|
||||||
|
for tier in (ModelTier.S, ModelTier.A, ModelTier.B, ModelTier.C, ModelTier.D):
|
||||||
|
t.record(
|
||||||
|
input_tokens=1_000,
|
||||||
|
output_tokens=1_000,
|
||||||
|
tier=tier,
|
||||||
|
run_id="r",
|
||||||
|
agent_id=f"a-{tier.value}",
|
||||||
|
)
|
||||||
|
summary = t.summary()
|
||||||
|
for tier_letter in ("S", "A", "B", "C", "D"):
|
||||||
|
assert tier_letter in summary["by_tier"]
|
||||||
|
assert summary["by_tier"][tier_letter]["calls"] == 1
|
||||||
|
|||||||
@@ -48,3 +48,22 @@ def test_genome_id_is_deterministic_on_content():
|
|||||||
top_p=0.9, model_tier=ModelTier.C, lookback_window=100, cognitive_style="x",
|
top_p=0.9, model_tier=ModelTier.C, lookback_window=100, cognitive_style="x",
|
||||||
)
|
)
|
||||||
assert g1.id == g2.id
|
assert g1.id == g2.id
|
||||||
|
|
||||||
|
|
||||||
|
def test_genome_all_tiers_serde_roundtrip():
|
||||||
|
"""Tutti i 5 tier (S, A, B, C, D) sopravvivono a to_dict/from_dict."""
|
||||||
|
for tier in (ModelTier.S, ModelTier.A, ModelTier.B, ModelTier.C, ModelTier.D):
|
||||||
|
g = HypothesisAgentGenome(
|
||||||
|
system_prompt="prompt",
|
||||||
|
feature_access=["close"],
|
||||||
|
temperature=0.7,
|
||||||
|
top_p=0.9,
|
||||||
|
model_tier=tier,
|
||||||
|
lookback_window=128,
|
||||||
|
cognitive_style="generic",
|
||||||
|
)
|
||||||
|
payload = g.to_dict()
|
||||||
|
assert payload["model_tier"] == tier.value
|
||||||
|
g2 = HypothesisAgentGenome.from_dict(payload)
|
||||||
|
assert g2.model_tier == tier
|
||||||
|
assert g2.id == g.id
|
||||||
|
|||||||
@@ -121,6 +121,94 @@ def test_completion_uses_custom_model_tier_b(mocker):
|
|||||||
assert out.model == "claude-opus-4-7"
|
assert out.model == "claude-opus-4-7"
|
||||||
|
|
||||||
|
|
||||||
|
def test_completion_tier_s_uses_anthropic_with_opus(mocker):
|
||||||
|
fake_anthropic = mocker.MagicMock()
|
||||||
|
fake_msg = mocker.MagicMock()
|
||||||
|
fake_msg.content = [mocker.MagicMock(text="(strategy s)")]
|
||||||
|
fake_msg.usage = mocker.MagicMock(input_tokens=50, output_tokens=100)
|
||||||
|
fake_anthropic.messages.create.return_value = fake_msg
|
||||||
|
mocker.patch("multi_swarm.llm.client.Anthropic", return_value=fake_anthropic)
|
||||||
|
|
||||||
|
client = LLMClient(openrouter_api_key="or-x", anthropic_api_key="an-x")
|
||||||
|
g = make_genome(ModelTier.S)
|
||||||
|
out = client.complete(g, system="sys", user="usr")
|
||||||
|
|
||||||
|
fake_anthropic.messages.create.assert_called_once()
|
||||||
|
call_kwargs = fake_anthropic.messages.create.call_args.kwargs
|
||||||
|
assert call_kwargs["model"] == "claude-opus-4-7"
|
||||||
|
assert out.tier == ModelTier.S
|
||||||
|
assert out.model == "claude-opus-4-7"
|
||||||
|
|
||||||
|
|
||||||
|
def test_completion_tier_a_uses_anthropic_with_sonnet(mocker):
|
||||||
|
fake_anthropic = mocker.MagicMock()
|
||||||
|
fake_msg = mocker.MagicMock()
|
||||||
|
fake_msg.content = [mocker.MagicMock(text="(strategy a)")]
|
||||||
|
fake_msg.usage = mocker.MagicMock(input_tokens=40, output_tokens=80)
|
||||||
|
fake_anthropic.messages.create.return_value = fake_msg
|
||||||
|
mocker.patch("multi_swarm.llm.client.Anthropic", return_value=fake_anthropic)
|
||||||
|
|
||||||
|
client = LLMClient(openrouter_api_key="or-x", anthropic_api_key="an-x")
|
||||||
|
g = make_genome(ModelTier.A)
|
||||||
|
out = client.complete(g, system="sys", user="usr")
|
||||||
|
|
||||||
|
fake_anthropic.messages.create.assert_called_once()
|
||||||
|
call_kwargs = fake_anthropic.messages.create.call_args.kwargs
|
||||||
|
assert call_kwargs["model"] == "claude-sonnet-4-6"
|
||||||
|
assert out.tier == ModelTier.A
|
||||||
|
assert out.model == "claude-sonnet-4-6"
|
||||||
|
|
||||||
|
|
||||||
|
def test_completion_tier_d_uses_openrouter_with_llama(mocker):
|
||||||
|
fake_openai = mocker.MagicMock()
|
||||||
|
fake_response = mocker.MagicMock()
|
||||||
|
fake_response.choices = [
|
||||||
|
mocker.MagicMock(message=mocker.MagicMock(content="(strategy d)"))
|
||||||
|
]
|
||||||
|
fake_response.usage = mocker.MagicMock(prompt_tokens=30, completion_tokens=70)
|
||||||
|
fake_openai.chat.completions.create.return_value = fake_response
|
||||||
|
mocker.patch("multi_swarm.llm.client.OpenAI", return_value=fake_openai)
|
||||||
|
|
||||||
|
client = LLMClient(openrouter_api_key="or-x", anthropic_api_key=None)
|
||||||
|
g = make_genome(ModelTier.D)
|
||||||
|
out = client.complete(g, system="sys", user="usr")
|
||||||
|
|
||||||
|
fake_openai.chat.completions.create.assert_called_once()
|
||||||
|
call_kwargs = fake_openai.chat.completions.create.call_args.kwargs
|
||||||
|
assert call_kwargs["model"] == "meta-llama/llama-3.3-70b-instruct"
|
||||||
|
assert out.tier == ModelTier.D
|
||||||
|
assert out.model == "meta-llama/llama-3.3-70b-instruct"
|
||||||
|
|
||||||
|
|
||||||
|
def test_completion_uses_custom_model_tier_s(mocker):
|
||||||
|
fake_anthropic = mocker.MagicMock()
|
||||||
|
fake_msg = mocker.MagicMock()
|
||||||
|
fake_msg.content = [mocker.MagicMock(text="(strategy custom-s)")]
|
||||||
|
fake_msg.usage = mocker.MagicMock(input_tokens=10, output_tokens=20)
|
||||||
|
fake_anthropic.messages.create.return_value = fake_msg
|
||||||
|
mocker.patch("multi_swarm.llm.client.Anthropic", return_value=fake_anthropic)
|
||||||
|
|
||||||
|
client = LLMClient(
|
||||||
|
openrouter_api_key="or-x",
|
||||||
|
anthropic_api_key="an-x",
|
||||||
|
model_tier_s="claude-future-mega",
|
||||||
|
)
|
||||||
|
g = make_genome(ModelTier.S)
|
||||||
|
out = client.complete(g, system="sys", user="usr")
|
||||||
|
|
||||||
|
call_kwargs = fake_anthropic.messages.create.call_args.kwargs
|
||||||
|
assert call_kwargs["model"] == "claude-future-mega"
|
||||||
|
assert out.model == "claude-future-mega"
|
||||||
|
|
||||||
|
|
||||||
|
def test_completion_tier_s_without_anthropic_key_raises(mocker):
|
||||||
|
mocker.patch("multi_swarm.llm.client.OpenAI", return_value=mocker.MagicMock())
|
||||||
|
client = LLMClient(openrouter_api_key="or-x", anthropic_api_key=None)
|
||||||
|
g = make_genome(ModelTier.S)
|
||||||
|
with pytest.raises(RuntimeError, match="tier S"):
|
||||||
|
client.complete(g, system="sys", user="usr")
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.slow
|
@pytest.mark.slow
|
||||||
def test_completion_succeeds_after_one_retry(mocker):
|
def test_completion_succeeds_after_one_retry(mocker):
|
||||||
"""Dopo 1 fallimento transient, il retry riesce al 2 tentativo."""
|
"""Dopo 1 fallimento transient, il retry riesce al 2 tentativo."""
|
||||||
|
|||||||
Reference in New Issue
Block a user