feat(llm): retry tenacity su errori transient connection/timeout/5xx

Avvolge LLMClient.complete con tenacity (3 attempts, backoff esponenziale 2-10s) che ritenta solo su errori transient di OpenAI/Anthropic SDK (APIConnectionError, APITimeoutError, InternalServerError). RateLimit, Authentication e 4xx non vengono ritentati. reraise=True preserva l'eccezione originale dopo l'esaurimento dei tentativi. Aggiunti 2 test (marker slow): esaurimento retry su APIConnectionError e successo al secondo tentativo dopo APITimeoutError. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-09 22:32:34 +02:00
parent 18259325a1
commit ea403ec2d5
2 changed files with 73 additions and 0 deletions
@@ -2,8 +2,16 @@ from __future__ import annotations

 from dataclasses import dataclass

+import anthropic
+import openai
 from anthropic import Anthropic
 from openai import OpenAI
+from tenacity import (
+    retry,
+    retry_if_exception_type,
+    stop_after_attempt,
+    wait_exponential,
+)

 from ..genome.hypothesis import HypothesisAgentGenome, ModelTier

@@ -12,6 +20,16 @@ MODEL_TIER_C = "qwen/qwen-2.5-72b-instruct"  # via OpenRouter
 MODEL_TIER_B = "claude-sonnet-4-6"  # via Anthropic
 OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1"

+# Errori transient: retry. RateLimit/Auth/InvalidRequest: NO retry.
+_RETRYABLE_EXCEPTIONS: tuple[type[BaseException], ...] = (
+    openai.APIConnectionError,
+    openai.APITimeoutError,
+    openai.InternalServerError,
+    anthropic.APIConnectionError,
+    anthropic.APITimeoutError,
+    anthropic.InternalServerError,
+)
+

@dataclass(frozen=True)
 class CompletionResult:
@@ -31,6 +49,12 @@ class LLMClient:
        self._openrouter = OpenAI(api_key=openrouter_api_key, base_url=OPENROUTER_BASE_URL)
        self._anthropic = Anthropic(api_key=anthropic_api_key) if anthropic_api_key else None

+    @retry(
+        stop=stop_after_attempt(3),
+        wait=wait_exponential(multiplier=1.0, min=2.0, max=10.0),
+        retry=retry_if_exception_type(_RETRYABLE_EXCEPTIONS),
+        reraise=True,
+    )
    def complete(
        self,
        genome: HypothesisAgentGenome,
@@ -1,3 +1,5 @@
+import pytest
+
 from multi_swarm.genome.hypothesis import HypothesisAgentGenome, ModelTier
 from multi_swarm.llm.client import CompletionResult, LLMClient

@@ -51,3 +53,50 @@ def test_completion_tier_b_uses_anthropic(mocker):
    assert out.input_tokens == 80
    assert out.output_tokens == 150
    assert out.tier == ModelTier.B
+
+
+@pytest.mark.slow
+def test_completion_retries_on_connection_error(mocker):
+    """Retry esegue 3 tentativi su APIConnectionError, poi rilancia."""
+    import openai
+
+    fake_openai = mocker.MagicMock()
+    fake_openai.chat.completions.create.side_effect = openai.APIConnectionError(
+        request=mocker.MagicMock()
+    )
+    mocker.patch("multi_swarm.llm.client.OpenAI", return_value=fake_openai)
+
+    client = LLMClient(openrouter_api_key="or-x", anthropic_api_key=None)
+    g = make_genome(ModelTier.C)
+
+    with pytest.raises(openai.APIConnectionError):
+        client.complete(g, system="sys", user="usr")
+
+    assert fake_openai.chat.completions.create.call_count == 3
+
+
+@pytest.mark.slow
+def test_completion_succeeds_after_one_retry(mocker):
+    """Dopo 1 fallimento transient, il retry riesce al 2 tentativo."""
+    import openai
+
+    fake_response = mocker.MagicMock()
+    fake_response.choices = [
+        mocker.MagicMock(message=mocker.MagicMock(content="(strategy ...)"))
+    ]
+    fake_response.usage = mocker.MagicMock(prompt_tokens=100, completion_tokens=200)
+
+    fake_openai = mocker.MagicMock()
+    fake_openai.chat.completions.create.side_effect = [
+        openai.APITimeoutError(request=mocker.MagicMock()),
+        fake_response,
+    ]
+    mocker.patch("multi_swarm.llm.client.OpenAI", return_value=fake_openai)
+
+    client = LLMClient(openrouter_api_key="or-x", anthropic_api_key=None)
+    g = make_genome(ModelTier.C)
+    out = client.complete(g, system="sys", user="usr")
+
+    assert isinstance(out, CompletionResult)
+    assert out.text == "(strategy ...)"
+    assert fake_openai.chat.completions.create.call_count == 2