From 7482600146a525bfc81ca0d42c65f346e252ba00 Mon Sep 17 00:00:00 2001 From: AdrianoDev Date: Sun, 10 May 2026 09:13:14 +0200 Subject: [PATCH] feat(llm): make tier-C/tier-B model + OpenRouter URL configurable from .env LLM_MODEL_TIER_C, LLM_MODEL_TIER_B e OPENROUTER_BASE_URL ora override-abili via env. Default invariati (back-compat). LLMClient accetta i tre valori come kwargs opzionali; run_phase1 li propaga da Settings. Co-Authored-By: Claude Opus 4.7 (1M context) --- .env.example | 5 ++++ scripts/run_phase1.py | 3 +++ src/multi_swarm/config.py | 4 +++ src/multi_swarm/llm/client.py | 16 ++++++++---- tests/unit/test_config.py | 28 +++++++++++++++++++++ tests/unit/test_llm_client.py | 46 +++++++++++++++++++++++++++++++++++ 6 files changed, 97 insertions(+), 5 deletions(-) diff --git a/.env.example b/.env.example index 4759446..0740ef0 100644 --- a/.env.example +++ b/.env.example @@ -8,6 +8,11 @@ CERBERO_BOT_TAG=swarm-poc-phase1 OPENROUTER_API_KEY= ANTHROPIC_API_KEY= +# LLM models (override Phase 1 defaults if needed) +LLM_MODEL_TIER_C=qwen/qwen-2.5-72b-instruct +LLM_MODEL_TIER_B=claude-sonnet-4-6 +OPENROUTER_BASE_URL=https://openrouter.ai/api/v1 + # Run config RUN_NAME=phase1-spike-001 DATA_DIR=./data diff --git a/scripts/run_phase1.py b/scripts/run_phase1.py index 2e65c8b..4d380e7 100644 --- a/scripts/run_phase1.py +++ b/scripts/run_phase1.py @@ -48,6 +48,9 @@ def main() -> None: settings.anthropic_api_key.get_secret_value() if settings.anthropic_api_key else None ), + model_tier_c=settings.llm_model_tier_c, + model_tier_b=settings.llm_model_tier_b, + openrouter_base_url=settings.openrouter_base_url, ) cfg = RunConfig( diff --git a/src/multi_swarm/config.py b/src/multi_swarm/config.py index 3e95f6c..01e0026 100644 --- a/src/multi_swarm/config.py +++ b/src/multi_swarm/config.py @@ -26,6 +26,10 @@ class Settings(BaseSettings): openrouter_api_key: SecretStr anthropic_api_key: SecretStr | None = None + llm_model_tier_c: str = "qwen/qwen-2.5-72b-instruct" + llm_model_tier_b: str = "claude-sonnet-4-6" + openrouter_base_url: str = "https://openrouter.ai/api/v1" + run_name: str = "phase1-spike-001" data_dir: Path = Field(default=Path("./data")) series_dir: Path = Field(default=Path("./series")) diff --git a/src/multi_swarm/llm/client.py b/src/multi_swarm/llm/client.py index 70416ef..7e558de 100644 --- a/src/multi_swarm/llm/client.py +++ b/src/multi_swarm/llm/client.py @@ -45,8 +45,14 @@ class LLMClient: self, openrouter_api_key: str, anthropic_api_key: str | None = None, + model_tier_c: str = MODEL_TIER_C, + model_tier_b: str = MODEL_TIER_B, + openrouter_base_url: str = OPENROUTER_BASE_URL, ) -> None: - self._openrouter = OpenAI(api_key=openrouter_api_key, base_url=OPENROUTER_BASE_URL) + self.model_tier_c = model_tier_c + self.model_tier_b = model_tier_b + self.openrouter_base_url = openrouter_base_url + self._openrouter = OpenAI(api_key=openrouter_api_key, base_url=openrouter_base_url) self._anthropic = Anthropic(api_key=anthropic_api_key) if anthropic_api_key else None @retry( @@ -64,7 +70,7 @@ class LLMClient: ) -> CompletionResult: if genome.model_tier == ModelTier.C: resp = self._openrouter.chat.completions.create( - model=MODEL_TIER_C, + model=self.model_tier_c, messages=[ {"role": "system", "content": system}, {"role": "user", "content": user}, @@ -80,14 +86,14 @@ class LLMClient: input_tokens=usage.prompt_tokens, output_tokens=usage.completion_tokens, tier=ModelTier.C, - model=MODEL_TIER_C, + model=self.model_tier_c, ) if self._anthropic is None: raise RuntimeError("ANTHROPIC_API_KEY required for tier B genomes") msg = self._anthropic.messages.create( - model=MODEL_TIER_B, + model=self.model_tier_b, system=system, messages=[{"role": "user", "content": user}], temperature=genome.temperature, @@ -100,5 +106,5 @@ class LLMClient: input_tokens=msg.usage.input_tokens, output_tokens=msg.usage.output_tokens, tier=ModelTier.B, - model=MODEL_TIER_B, + model=self.model_tier_b, ) diff --git a/tests/unit/test_config.py b/tests/unit/test_config.py index 436db38..e8c0de6 100644 --- a/tests/unit/test_config.py +++ b/tests/unit/test_config.py @@ -40,3 +40,31 @@ def test_settings_requires_tokens(monkeypatch: pytest.MonkeyPatch) -> None: # Disable .env loading to keep the test deterministic regardless of # whether a developer's local .env exists and is populated. Settings(_env_file=None) # type: ignore[call-arg] + + +def test_settings_loads_llm_model_overrides(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("CERBERO_TESTNET_TOKEN", "tok-test") + monkeypatch.setenv("OPENROUTER_API_KEY", "or-key") + monkeypatch.setenv("LLM_MODEL_TIER_C", "deepseek/deepseek-chat") + monkeypatch.setenv("LLM_MODEL_TIER_B", "claude-opus-4-7") + monkeypatch.setenv("OPENROUTER_BASE_URL", "https://example.com/api/v1") + + s = Settings(_env_file=None) # type: ignore[call-arg] + + assert s.llm_model_tier_c == "deepseek/deepseek-chat" + assert s.llm_model_tier_b == "claude-opus-4-7" + assert s.openrouter_base_url == "https://example.com/api/v1" + + +def test_settings_llm_model_defaults(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("CERBERO_TESTNET_TOKEN", "tok-test") + monkeypatch.setenv("OPENROUTER_API_KEY", "or-key") + monkeypatch.delenv("LLM_MODEL_TIER_C", raising=False) + monkeypatch.delenv("LLM_MODEL_TIER_B", raising=False) + monkeypatch.delenv("OPENROUTER_BASE_URL", raising=False) + + s = Settings(_env_file=None) # type: ignore[call-arg] + + assert s.llm_model_tier_c == "qwen/qwen-2.5-72b-instruct" + assert s.llm_model_tier_b == "claude-sonnet-4-6" + assert s.openrouter_base_url == "https://openrouter.ai/api/v1" diff --git a/tests/unit/test_llm_client.py b/tests/unit/test_llm_client.py index 931614c..68f686d 100644 --- a/tests/unit/test_llm_client.py +++ b/tests/unit/test_llm_client.py @@ -75,6 +75,52 @@ def test_completion_retries_on_connection_error(mocker): assert fake_openai.chat.completions.create.call_count == 3 +def test_completion_uses_custom_model_tier_c(mocker): + fake_openai = mocker.MagicMock() + fake_response = mocker.MagicMock() + fake_response.choices = [ + mocker.MagicMock(message=mocker.MagicMock(content="(strategy ...)")) + ] + fake_response.usage = mocker.MagicMock(prompt_tokens=10, completion_tokens=20) + fake_openai.chat.completions.create.return_value = fake_response + mocker.patch("multi_swarm.llm.client.OpenAI", return_value=fake_openai) + + client = LLMClient( + openrouter_api_key="or-x", + anthropic_api_key=None, + model_tier_c="deepseek/deepseek-chat", + ) + g = make_genome(ModelTier.C) + out = client.complete(g, system="sys", user="usr") + + fake_openai.chat.completions.create.assert_called_once() + call_kwargs = fake_openai.chat.completions.create.call_args.kwargs + assert call_kwargs["model"] == "deepseek/deepseek-chat" + assert out.model == "deepseek/deepseek-chat" + + +def test_completion_uses_custom_model_tier_b(mocker): + fake_anthropic = mocker.MagicMock() + fake_msg = mocker.MagicMock() + fake_msg.content = [mocker.MagicMock(text="(strategy ...)")] + fake_msg.usage = mocker.MagicMock(input_tokens=10, output_tokens=20) + fake_anthropic.messages.create.return_value = fake_msg + mocker.patch("multi_swarm.llm.client.Anthropic", return_value=fake_anthropic) + + client = LLMClient( + openrouter_api_key="or-x", + anthropic_api_key="an-x", + model_tier_b="claude-opus-4-7", + ) + g = make_genome(ModelTier.B) + out = client.complete(g, system="sys", user="usr") + + fake_anthropic.messages.create.assert_called_once() + call_kwargs = fake_anthropic.messages.create.call_args.kwargs + assert call_kwargs["model"] == "claude-opus-4-7" + assert out.model == "claude-opus-4-7" + + @pytest.mark.slow def test_completion_succeeds_after_one_retry(mocker): """Dopo 1 fallimento transient, il retry riesce al 2 tentativo."""