feat(llm): make tier-C/tier-B model + OpenRouter URL configurable from .env
LLM_MODEL_TIER_C, LLM_MODEL_TIER_B e OPENROUTER_BASE_URL ora override-abili via env. Default invariati (back-compat). LLMClient accetta i tre valori come kwargs opzionali; run_phase1 li propaga da Settings. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -8,6 +8,11 @@ CERBERO_BOT_TAG=swarm-poc-phase1
|
|||||||
OPENROUTER_API_KEY=
|
OPENROUTER_API_KEY=
|
||||||
ANTHROPIC_API_KEY=
|
ANTHROPIC_API_KEY=
|
||||||
|
|
||||||
|
# LLM models (override Phase 1 defaults if needed)
|
||||||
|
LLM_MODEL_TIER_C=qwen/qwen-2.5-72b-instruct
|
||||||
|
LLM_MODEL_TIER_B=claude-sonnet-4-6
|
||||||
|
OPENROUTER_BASE_URL=https://openrouter.ai/api/v1
|
||||||
|
|
||||||
# Run config
|
# Run config
|
||||||
RUN_NAME=phase1-spike-001
|
RUN_NAME=phase1-spike-001
|
||||||
DATA_DIR=./data
|
DATA_DIR=./data
|
||||||
|
|||||||
@@ -48,6 +48,9 @@ def main() -> None:
|
|||||||
settings.anthropic_api_key.get_secret_value()
|
settings.anthropic_api_key.get_secret_value()
|
||||||
if settings.anthropic_api_key else None
|
if settings.anthropic_api_key else None
|
||||||
),
|
),
|
||||||
|
model_tier_c=settings.llm_model_tier_c,
|
||||||
|
model_tier_b=settings.llm_model_tier_b,
|
||||||
|
openrouter_base_url=settings.openrouter_base_url,
|
||||||
)
|
)
|
||||||
|
|
||||||
cfg = RunConfig(
|
cfg = RunConfig(
|
||||||
|
|||||||
@@ -26,6 +26,10 @@ class Settings(BaseSettings):
|
|||||||
openrouter_api_key: SecretStr
|
openrouter_api_key: SecretStr
|
||||||
anthropic_api_key: SecretStr | None = None
|
anthropic_api_key: SecretStr | None = None
|
||||||
|
|
||||||
|
llm_model_tier_c: str = "qwen/qwen-2.5-72b-instruct"
|
||||||
|
llm_model_tier_b: str = "claude-sonnet-4-6"
|
||||||
|
openrouter_base_url: str = "https://openrouter.ai/api/v1"
|
||||||
|
|
||||||
run_name: str = "phase1-spike-001"
|
run_name: str = "phase1-spike-001"
|
||||||
data_dir: Path = Field(default=Path("./data"))
|
data_dir: Path = Field(default=Path("./data"))
|
||||||
series_dir: Path = Field(default=Path("./series"))
|
series_dir: Path = Field(default=Path("./series"))
|
||||||
|
|||||||
@@ -45,8 +45,14 @@ class LLMClient:
|
|||||||
self,
|
self,
|
||||||
openrouter_api_key: str,
|
openrouter_api_key: str,
|
||||||
anthropic_api_key: str | None = None,
|
anthropic_api_key: str | None = None,
|
||||||
|
model_tier_c: str = MODEL_TIER_C,
|
||||||
|
model_tier_b: str = MODEL_TIER_B,
|
||||||
|
openrouter_base_url: str = OPENROUTER_BASE_URL,
|
||||||
) -> None:
|
) -> None:
|
||||||
self._openrouter = OpenAI(api_key=openrouter_api_key, base_url=OPENROUTER_BASE_URL)
|
self.model_tier_c = model_tier_c
|
||||||
|
self.model_tier_b = model_tier_b
|
||||||
|
self.openrouter_base_url = openrouter_base_url
|
||||||
|
self._openrouter = OpenAI(api_key=openrouter_api_key, base_url=openrouter_base_url)
|
||||||
self._anthropic = Anthropic(api_key=anthropic_api_key) if anthropic_api_key else None
|
self._anthropic = Anthropic(api_key=anthropic_api_key) if anthropic_api_key else None
|
||||||
|
|
||||||
@retry(
|
@retry(
|
||||||
@@ -64,7 +70,7 @@ class LLMClient:
|
|||||||
) -> CompletionResult:
|
) -> CompletionResult:
|
||||||
if genome.model_tier == ModelTier.C:
|
if genome.model_tier == ModelTier.C:
|
||||||
resp = self._openrouter.chat.completions.create(
|
resp = self._openrouter.chat.completions.create(
|
||||||
model=MODEL_TIER_C,
|
model=self.model_tier_c,
|
||||||
messages=[
|
messages=[
|
||||||
{"role": "system", "content": system},
|
{"role": "system", "content": system},
|
||||||
{"role": "user", "content": user},
|
{"role": "user", "content": user},
|
||||||
@@ -80,14 +86,14 @@ class LLMClient:
|
|||||||
input_tokens=usage.prompt_tokens,
|
input_tokens=usage.prompt_tokens,
|
||||||
output_tokens=usage.completion_tokens,
|
output_tokens=usage.completion_tokens,
|
||||||
tier=ModelTier.C,
|
tier=ModelTier.C,
|
||||||
model=MODEL_TIER_C,
|
model=self.model_tier_c,
|
||||||
)
|
)
|
||||||
|
|
||||||
if self._anthropic is None:
|
if self._anthropic is None:
|
||||||
raise RuntimeError("ANTHROPIC_API_KEY required for tier B genomes")
|
raise RuntimeError("ANTHROPIC_API_KEY required for tier B genomes")
|
||||||
|
|
||||||
msg = self._anthropic.messages.create(
|
msg = self._anthropic.messages.create(
|
||||||
model=MODEL_TIER_B,
|
model=self.model_tier_b,
|
||||||
system=system,
|
system=system,
|
||||||
messages=[{"role": "user", "content": user}],
|
messages=[{"role": "user", "content": user}],
|
||||||
temperature=genome.temperature,
|
temperature=genome.temperature,
|
||||||
@@ -100,5 +106,5 @@ class LLMClient:
|
|||||||
input_tokens=msg.usage.input_tokens,
|
input_tokens=msg.usage.input_tokens,
|
||||||
output_tokens=msg.usage.output_tokens,
|
output_tokens=msg.usage.output_tokens,
|
||||||
tier=ModelTier.B,
|
tier=ModelTier.B,
|
||||||
model=MODEL_TIER_B,
|
model=self.model_tier_b,
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -40,3 +40,31 @@ def test_settings_requires_tokens(monkeypatch: pytest.MonkeyPatch) -> None:
|
|||||||
# Disable .env loading to keep the test deterministic regardless of
|
# Disable .env loading to keep the test deterministic regardless of
|
||||||
# whether a developer's local .env exists and is populated.
|
# whether a developer's local .env exists and is populated.
|
||||||
Settings(_env_file=None) # type: ignore[call-arg]
|
Settings(_env_file=None) # type: ignore[call-arg]
|
||||||
|
|
||||||
|
|
||||||
|
def test_settings_loads_llm_model_overrides(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||||
|
monkeypatch.setenv("CERBERO_TESTNET_TOKEN", "tok-test")
|
||||||
|
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
||||||
|
monkeypatch.setenv("LLM_MODEL_TIER_C", "deepseek/deepseek-chat")
|
||||||
|
monkeypatch.setenv("LLM_MODEL_TIER_B", "claude-opus-4-7")
|
||||||
|
monkeypatch.setenv("OPENROUTER_BASE_URL", "https://example.com/api/v1")
|
||||||
|
|
||||||
|
s = Settings(_env_file=None) # type: ignore[call-arg]
|
||||||
|
|
||||||
|
assert s.llm_model_tier_c == "deepseek/deepseek-chat"
|
||||||
|
assert s.llm_model_tier_b == "claude-opus-4-7"
|
||||||
|
assert s.openrouter_base_url == "https://example.com/api/v1"
|
||||||
|
|
||||||
|
|
||||||
|
def test_settings_llm_model_defaults(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||||
|
monkeypatch.setenv("CERBERO_TESTNET_TOKEN", "tok-test")
|
||||||
|
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
||||||
|
monkeypatch.delenv("LLM_MODEL_TIER_C", raising=False)
|
||||||
|
monkeypatch.delenv("LLM_MODEL_TIER_B", raising=False)
|
||||||
|
monkeypatch.delenv("OPENROUTER_BASE_URL", raising=False)
|
||||||
|
|
||||||
|
s = Settings(_env_file=None) # type: ignore[call-arg]
|
||||||
|
|
||||||
|
assert s.llm_model_tier_c == "qwen/qwen-2.5-72b-instruct"
|
||||||
|
assert s.llm_model_tier_b == "claude-sonnet-4-6"
|
||||||
|
assert s.openrouter_base_url == "https://openrouter.ai/api/v1"
|
||||||
|
|||||||
@@ -75,6 +75,52 @@ def test_completion_retries_on_connection_error(mocker):
|
|||||||
assert fake_openai.chat.completions.create.call_count == 3
|
assert fake_openai.chat.completions.create.call_count == 3
|
||||||
|
|
||||||
|
|
||||||
|
def test_completion_uses_custom_model_tier_c(mocker):
|
||||||
|
fake_openai = mocker.MagicMock()
|
||||||
|
fake_response = mocker.MagicMock()
|
||||||
|
fake_response.choices = [
|
||||||
|
mocker.MagicMock(message=mocker.MagicMock(content="(strategy ...)"))
|
||||||
|
]
|
||||||
|
fake_response.usage = mocker.MagicMock(prompt_tokens=10, completion_tokens=20)
|
||||||
|
fake_openai.chat.completions.create.return_value = fake_response
|
||||||
|
mocker.patch("multi_swarm.llm.client.OpenAI", return_value=fake_openai)
|
||||||
|
|
||||||
|
client = LLMClient(
|
||||||
|
openrouter_api_key="or-x",
|
||||||
|
anthropic_api_key=None,
|
||||||
|
model_tier_c="deepseek/deepseek-chat",
|
||||||
|
)
|
||||||
|
g = make_genome(ModelTier.C)
|
||||||
|
out = client.complete(g, system="sys", user="usr")
|
||||||
|
|
||||||
|
fake_openai.chat.completions.create.assert_called_once()
|
||||||
|
call_kwargs = fake_openai.chat.completions.create.call_args.kwargs
|
||||||
|
assert call_kwargs["model"] == "deepseek/deepseek-chat"
|
||||||
|
assert out.model == "deepseek/deepseek-chat"
|
||||||
|
|
||||||
|
|
||||||
|
def test_completion_uses_custom_model_tier_b(mocker):
|
||||||
|
fake_anthropic = mocker.MagicMock()
|
||||||
|
fake_msg = mocker.MagicMock()
|
||||||
|
fake_msg.content = [mocker.MagicMock(text="(strategy ...)")]
|
||||||
|
fake_msg.usage = mocker.MagicMock(input_tokens=10, output_tokens=20)
|
||||||
|
fake_anthropic.messages.create.return_value = fake_msg
|
||||||
|
mocker.patch("multi_swarm.llm.client.Anthropic", return_value=fake_anthropic)
|
||||||
|
|
||||||
|
client = LLMClient(
|
||||||
|
openrouter_api_key="or-x",
|
||||||
|
anthropic_api_key="an-x",
|
||||||
|
model_tier_b="claude-opus-4-7",
|
||||||
|
)
|
||||||
|
g = make_genome(ModelTier.B)
|
||||||
|
out = client.complete(g, system="sys", user="usr")
|
||||||
|
|
||||||
|
fake_anthropic.messages.create.assert_called_once()
|
||||||
|
call_kwargs = fake_anthropic.messages.create.call_args.kwargs
|
||||||
|
assert call_kwargs["model"] == "claude-opus-4-7"
|
||||||
|
assert out.model == "claude-opus-4-7"
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.slow
|
@pytest.mark.slow
|
||||||
def test_completion_succeeds_after_one_retry(mocker):
|
def test_completion_succeeds_after_one_retry(mocker):
|
||||||
"""Dopo 1 fallimento transient, il retry riesce al 2 tentativo."""
|
"""Dopo 1 fallimento transient, il retry riesce al 2 tentativo."""
|
||||||
|
|||||||
Reference in New Issue
Block a user