feat(llm): make tier-C/tier-B model + OpenRouter URL configurable from .env

LLM_MODEL_TIER_C, LLM_MODEL_TIER_B e OPENROUTER_BASE_URL ora override-abili
via env. Default invariati (back-compat). LLMClient accetta i tre valori
come kwargs opzionali; run_phase1 li propaga da Settings.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-10 09:13:14 +02:00
parent ea403ec2d5
commit 7482600146
6 changed files with 97 additions and 5 deletions
+5
View File
@@ -8,6 +8,11 @@ CERBERO_BOT_TAG=swarm-poc-phase1
OPENROUTER_API_KEY= OPENROUTER_API_KEY=
ANTHROPIC_API_KEY= ANTHROPIC_API_KEY=
# LLM models (override Phase 1 defaults if needed)
LLM_MODEL_TIER_C=qwen/qwen-2.5-72b-instruct
LLM_MODEL_TIER_B=claude-sonnet-4-6
OPENROUTER_BASE_URL=https://openrouter.ai/api/v1
# Run config # Run config
RUN_NAME=phase1-spike-001 RUN_NAME=phase1-spike-001
DATA_DIR=./data DATA_DIR=./data
+3
View File
@@ -48,6 +48,9 @@ def main() -> None:
settings.anthropic_api_key.get_secret_value() settings.anthropic_api_key.get_secret_value()
if settings.anthropic_api_key else None if settings.anthropic_api_key else None
), ),
model_tier_c=settings.llm_model_tier_c,
model_tier_b=settings.llm_model_tier_b,
openrouter_base_url=settings.openrouter_base_url,
) )
cfg = RunConfig( cfg = RunConfig(
+4
View File
@@ -26,6 +26,10 @@ class Settings(BaseSettings):
openrouter_api_key: SecretStr openrouter_api_key: SecretStr
anthropic_api_key: SecretStr | None = None anthropic_api_key: SecretStr | None = None
llm_model_tier_c: str = "qwen/qwen-2.5-72b-instruct"
llm_model_tier_b: str = "claude-sonnet-4-6"
openrouter_base_url: str = "https://openrouter.ai/api/v1"
run_name: str = "phase1-spike-001" run_name: str = "phase1-spike-001"
data_dir: Path = Field(default=Path("./data")) data_dir: Path = Field(default=Path("./data"))
series_dir: Path = Field(default=Path("./series")) series_dir: Path = Field(default=Path("./series"))
+11 -5
View File
@@ -45,8 +45,14 @@ class LLMClient:
self, self,
openrouter_api_key: str, openrouter_api_key: str,
anthropic_api_key: str | None = None, anthropic_api_key: str | None = None,
model_tier_c: str = MODEL_TIER_C,
model_tier_b: str = MODEL_TIER_B,
openrouter_base_url: str = OPENROUTER_BASE_URL,
) -> None: ) -> None:
self._openrouter = OpenAI(api_key=openrouter_api_key, base_url=OPENROUTER_BASE_URL) self.model_tier_c = model_tier_c
self.model_tier_b = model_tier_b
self.openrouter_base_url = openrouter_base_url
self._openrouter = OpenAI(api_key=openrouter_api_key, base_url=openrouter_base_url)
self._anthropic = Anthropic(api_key=anthropic_api_key) if anthropic_api_key else None self._anthropic = Anthropic(api_key=anthropic_api_key) if anthropic_api_key else None
@retry( @retry(
@@ -64,7 +70,7 @@ class LLMClient:
) -> CompletionResult: ) -> CompletionResult:
if genome.model_tier == ModelTier.C: if genome.model_tier == ModelTier.C:
resp = self._openrouter.chat.completions.create( resp = self._openrouter.chat.completions.create(
model=MODEL_TIER_C, model=self.model_tier_c,
messages=[ messages=[
{"role": "system", "content": system}, {"role": "system", "content": system},
{"role": "user", "content": user}, {"role": "user", "content": user},
@@ -80,14 +86,14 @@ class LLMClient:
input_tokens=usage.prompt_tokens, input_tokens=usage.prompt_tokens,
output_tokens=usage.completion_tokens, output_tokens=usage.completion_tokens,
tier=ModelTier.C, tier=ModelTier.C,
model=MODEL_TIER_C, model=self.model_tier_c,
) )
if self._anthropic is None: if self._anthropic is None:
raise RuntimeError("ANTHROPIC_API_KEY required for tier B genomes") raise RuntimeError("ANTHROPIC_API_KEY required for tier B genomes")
msg = self._anthropic.messages.create( msg = self._anthropic.messages.create(
model=MODEL_TIER_B, model=self.model_tier_b,
system=system, system=system,
messages=[{"role": "user", "content": user}], messages=[{"role": "user", "content": user}],
temperature=genome.temperature, temperature=genome.temperature,
@@ -100,5 +106,5 @@ class LLMClient:
input_tokens=msg.usage.input_tokens, input_tokens=msg.usage.input_tokens,
output_tokens=msg.usage.output_tokens, output_tokens=msg.usage.output_tokens,
tier=ModelTier.B, tier=ModelTier.B,
model=MODEL_TIER_B, model=self.model_tier_b,
) )
+28
View File
@@ -40,3 +40,31 @@ def test_settings_requires_tokens(monkeypatch: pytest.MonkeyPatch) -> None:
# Disable .env loading to keep the test deterministic regardless of # Disable .env loading to keep the test deterministic regardless of
# whether a developer's local .env exists and is populated. # whether a developer's local .env exists and is populated.
Settings(_env_file=None) # type: ignore[call-arg] Settings(_env_file=None) # type: ignore[call-arg]
def test_settings_loads_llm_model_overrides(monkeypatch: pytest.MonkeyPatch) -> None:
monkeypatch.setenv("CERBERO_TESTNET_TOKEN", "tok-test")
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
monkeypatch.setenv("LLM_MODEL_TIER_C", "deepseek/deepseek-chat")
monkeypatch.setenv("LLM_MODEL_TIER_B", "claude-opus-4-7")
monkeypatch.setenv("OPENROUTER_BASE_URL", "https://example.com/api/v1")
s = Settings(_env_file=None) # type: ignore[call-arg]
assert s.llm_model_tier_c == "deepseek/deepseek-chat"
assert s.llm_model_tier_b == "claude-opus-4-7"
assert s.openrouter_base_url == "https://example.com/api/v1"
def test_settings_llm_model_defaults(monkeypatch: pytest.MonkeyPatch) -> None:
monkeypatch.setenv("CERBERO_TESTNET_TOKEN", "tok-test")
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
monkeypatch.delenv("LLM_MODEL_TIER_C", raising=False)
monkeypatch.delenv("LLM_MODEL_TIER_B", raising=False)
monkeypatch.delenv("OPENROUTER_BASE_URL", raising=False)
s = Settings(_env_file=None) # type: ignore[call-arg]
assert s.llm_model_tier_c == "qwen/qwen-2.5-72b-instruct"
assert s.llm_model_tier_b == "claude-sonnet-4-6"
assert s.openrouter_base_url == "https://openrouter.ai/api/v1"
+46
View File
@@ -75,6 +75,52 @@ def test_completion_retries_on_connection_error(mocker):
assert fake_openai.chat.completions.create.call_count == 3 assert fake_openai.chat.completions.create.call_count == 3
def test_completion_uses_custom_model_tier_c(mocker):
fake_openai = mocker.MagicMock()
fake_response = mocker.MagicMock()
fake_response.choices = [
mocker.MagicMock(message=mocker.MagicMock(content="(strategy ...)"))
]
fake_response.usage = mocker.MagicMock(prompt_tokens=10, completion_tokens=20)
fake_openai.chat.completions.create.return_value = fake_response
mocker.patch("multi_swarm.llm.client.OpenAI", return_value=fake_openai)
client = LLMClient(
openrouter_api_key="or-x",
anthropic_api_key=None,
model_tier_c="deepseek/deepseek-chat",
)
g = make_genome(ModelTier.C)
out = client.complete(g, system="sys", user="usr")
fake_openai.chat.completions.create.assert_called_once()
call_kwargs = fake_openai.chat.completions.create.call_args.kwargs
assert call_kwargs["model"] == "deepseek/deepseek-chat"
assert out.model == "deepseek/deepseek-chat"
def test_completion_uses_custom_model_tier_b(mocker):
fake_anthropic = mocker.MagicMock()
fake_msg = mocker.MagicMock()
fake_msg.content = [mocker.MagicMock(text="(strategy ...)")]
fake_msg.usage = mocker.MagicMock(input_tokens=10, output_tokens=20)
fake_anthropic.messages.create.return_value = fake_msg
mocker.patch("multi_swarm.llm.client.Anthropic", return_value=fake_anthropic)
client = LLMClient(
openrouter_api_key="or-x",
anthropic_api_key="an-x",
model_tier_b="claude-opus-4-7",
)
g = make_genome(ModelTier.B)
out = client.complete(g, system="sys", user="usr")
fake_anthropic.messages.create.assert_called_once()
call_kwargs = fake_anthropic.messages.create.call_args.kwargs
assert call_kwargs["model"] == "claude-opus-4-7"
assert out.model == "claude-opus-4-7"
@pytest.mark.slow @pytest.mark.slow
def test_completion_succeeds_after_one_retry(mocker): def test_completion_succeeds_after_one_retry(mocker):
"""Dopo 1 fallimento transient, il retry riesce al 2 tentativo.""" """Dopo 1 fallimento transient, il retry riesce al 2 tentativo."""