feat(llm): retry tenacity su errori transient connection/timeout/5xx

Avvolge LLMClient.complete con tenacity (3 attempts, backoff esponenziale 2-10s) che ritenta solo su errori transient di OpenAI/Anthropic SDK (APIConnectionError, APITimeoutError, InternalServerError). RateLimit, Authentication e 4xx non vengono ritentati. reraise=True preserva l'eccezione originale dopo l'esaurimento dei tentativi. Aggiunti 2 test (marker slow): esaurimento retry su APIConnectionError e successo al secondo tentativo dopo APITimeoutError. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-09 22:32:34 +02:00
parent 18259325a1
commit ea403ec2d5
2 changed files with 73 additions and 0 deletions
@@ -2,8 +2,16 @@ from __future__ import annotations
 from dataclasses import dataclass
 import anthropic
 import openai
 from anthropic import Anthropic
 from openai import OpenAI
 from tenacity import (
    retry,
    retry_if_exception_type,
    stop_after_attempt,
    wait_exponential,
 )
 from ..genome.hypothesis import HypothesisAgentGenome, ModelTier
@@ -12,6 +20,16 @@ MODEL_TIER_C = "qwen/qwen-2.5-72b-instruct"  # via OpenRouter
 MODEL_TIER_B = "claude-sonnet-4-6"  # via Anthropic
 OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1"
 # Errori transient: retry. RateLimit/Auth/InvalidRequest: NO retry.
 _RETRYABLE_EXCEPTIONS: tuple[type[BaseException], ...] = (
    openai.APIConnectionError,
    openai.APITimeoutError,
    openai.InternalServerError,
    anthropic.APIConnectionError,
    anthropic.APITimeoutError,
    anthropic.InternalServerError,
 )
@dataclass(frozen=True)
 class CompletionResult:
@@ -31,6 +49,12 @@ class LLMClient:
        self._openrouter = OpenAI(api_key=openrouter_api_key, base_url=OPENROUTER_BASE_URL)
        self._anthropic = Anthropic(api_key=anthropic_api_key) if anthropic_api_key else None
    @retry(
        stop=stop_after_attempt(3),
        wait=wait_exponential(multiplier=1.0, min=2.0, max=10.0),
        retry=retry_if_exception_type(_RETRYABLE_EXCEPTIONS),
        reraise=True,
    )
    def complete(
        self,
        genome: HypothesisAgentGenome,
@@ -1,3 +1,5 @@
 import pytest
 from multi_swarm.genome.hypothesis import HypothesisAgentGenome, ModelTier
 from multi_swarm.llm.client import CompletionResult, LLMClient
@@ -51,3 +53,50 @@ def test_completion_tier_b_uses_anthropic(mocker):
    assert out.input_tokens == 80
    assert out.output_tokens == 150
    assert out.tier == ModelTier.B
@pytest.mark.slow
 def test_completion_retries_on_connection_error(mocker):
    """Retry esegue 3 tentativi su APIConnectionError, poi rilancia."""
    import openai
    fake_openai = mocker.MagicMock()
    fake_openai.chat.completions.create.side_effect = openai.APIConnectionError(
        request=mocker.MagicMock()
    )
    mocker.patch("multi_swarm.llm.client.OpenAI", return_value=fake_openai)
    client = LLMClient(openrouter_api_key="or-x", anthropic_api_key=None)
    g = make_genome(ModelTier.C)
    with pytest.raises(openai.APIConnectionError):
        client.complete(g, system="sys", user="usr")
    assert fake_openai.chat.completions.create.call_count == 3
@pytest.mark.slow
 def test_completion_succeeds_after_one_retry(mocker):
    """Dopo 1 fallimento transient, il retry riesce al 2 tentativo."""
    import openai
    fake_response = mocker.MagicMock()
    fake_response.choices = [
        mocker.MagicMock(message=mocker.MagicMock(content="(strategy ...)"))
    ]
    fake_response.usage = mocker.MagicMock(prompt_tokens=100, completion_tokens=200)
    fake_openai = mocker.MagicMock()
    fake_openai.chat.completions.create.side_effect = [
        openai.APITimeoutError(request=mocker.MagicMock()),
        fake_response,
    ]
    mocker.patch("multi_swarm.llm.client.OpenAI", return_value=fake_openai)
    client = LLMClient(openrouter_api_key="or-x", anthropic_api_key=None)
    g = make_genome(ModelTier.C)
    out = client.complete(g, system="sys", user="usr")
    assert isinstance(out, CompletionResult)
    assert out.text == "(strategy ...)"
    assert fake_openai.chat.completions.create.call_count == 2