feat(llm): retry tenacity su errori transient connection/timeout/5xx

Avvolge LLMClient.complete con tenacity (3 attempts, backoff esponenziale
2-10s) che ritenta solo su errori transient di OpenAI/Anthropic SDK
(APIConnectionError, APITimeoutError, InternalServerError). RateLimit,
Authentication e 4xx non vengono ritentati. reraise=True preserva
l'eccezione originale dopo l'esaurimento dei tentativi.

Aggiunti 2 test (marker slow): esaurimento retry su APIConnectionError
e successo al secondo tentativo dopo APITimeoutError.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-09 22:32:34 +02:00
parent 18259325a1
commit ea403ec2d5
2 changed files with 73 additions and 0 deletions
+24
View File
@@ -2,8 +2,16 @@ from __future__ import annotations
from dataclasses import dataclass from dataclasses import dataclass
import anthropic
import openai
from anthropic import Anthropic from anthropic import Anthropic
from openai import OpenAI from openai import OpenAI
from tenacity import (
retry,
retry_if_exception_type,
stop_after_attempt,
wait_exponential,
)
from ..genome.hypothesis import HypothesisAgentGenome, ModelTier from ..genome.hypothesis import HypothesisAgentGenome, ModelTier
@@ -12,6 +20,16 @@ MODEL_TIER_C = "qwen/qwen-2.5-72b-instruct" # via OpenRouter
MODEL_TIER_B = "claude-sonnet-4-6" # via Anthropic MODEL_TIER_B = "claude-sonnet-4-6" # via Anthropic
OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1" OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1"
# Errori transient: retry. RateLimit/Auth/InvalidRequest: NO retry.
_RETRYABLE_EXCEPTIONS: tuple[type[BaseException], ...] = (
openai.APIConnectionError,
openai.APITimeoutError,
openai.InternalServerError,
anthropic.APIConnectionError,
anthropic.APITimeoutError,
anthropic.InternalServerError,
)
@dataclass(frozen=True) @dataclass(frozen=True)
class CompletionResult: class CompletionResult:
@@ -31,6 +49,12 @@ class LLMClient:
self._openrouter = OpenAI(api_key=openrouter_api_key, base_url=OPENROUTER_BASE_URL) self._openrouter = OpenAI(api_key=openrouter_api_key, base_url=OPENROUTER_BASE_URL)
self._anthropic = Anthropic(api_key=anthropic_api_key) if anthropic_api_key else None self._anthropic = Anthropic(api_key=anthropic_api_key) if anthropic_api_key else None
@retry(
stop=stop_after_attempt(3),
wait=wait_exponential(multiplier=1.0, min=2.0, max=10.0),
retry=retry_if_exception_type(_RETRYABLE_EXCEPTIONS),
reraise=True,
)
def complete( def complete(
self, self,
genome: HypothesisAgentGenome, genome: HypothesisAgentGenome,
+49
View File
@@ -1,3 +1,5 @@
import pytest
from multi_swarm.genome.hypothesis import HypothesisAgentGenome, ModelTier from multi_swarm.genome.hypothesis import HypothesisAgentGenome, ModelTier
from multi_swarm.llm.client import CompletionResult, LLMClient from multi_swarm.llm.client import CompletionResult, LLMClient
@@ -51,3 +53,50 @@ def test_completion_tier_b_uses_anthropic(mocker):
assert out.input_tokens == 80 assert out.input_tokens == 80
assert out.output_tokens == 150 assert out.output_tokens == 150
assert out.tier == ModelTier.B assert out.tier == ModelTier.B
@pytest.mark.slow
def test_completion_retries_on_connection_error(mocker):
"""Retry esegue 3 tentativi su APIConnectionError, poi rilancia."""
import openai
fake_openai = mocker.MagicMock()
fake_openai.chat.completions.create.side_effect = openai.APIConnectionError(
request=mocker.MagicMock()
)
mocker.patch("multi_swarm.llm.client.OpenAI", return_value=fake_openai)
client = LLMClient(openrouter_api_key="or-x", anthropic_api_key=None)
g = make_genome(ModelTier.C)
with pytest.raises(openai.APIConnectionError):
client.complete(g, system="sys", user="usr")
assert fake_openai.chat.completions.create.call_count == 3
@pytest.mark.slow
def test_completion_succeeds_after_one_retry(mocker):
"""Dopo 1 fallimento transient, il retry riesce al 2 tentativo."""
import openai
fake_response = mocker.MagicMock()
fake_response.choices = [
mocker.MagicMock(message=mocker.MagicMock(content="(strategy ...)"))
]
fake_response.usage = mocker.MagicMock(prompt_tokens=100, completion_tokens=200)
fake_openai = mocker.MagicMock()
fake_openai.chat.completions.create.side_effect = [
openai.APITimeoutError(request=mocker.MagicMock()),
fake_response,
]
mocker.patch("multi_swarm.llm.client.OpenAI", return_value=fake_openai)
client = LLMClient(openrouter_api_key="or-x", anthropic_api_key=None)
g = make_genome(ModelTier.C)
out = client.complete(g, system="sys", user="usr")
assert isinstance(out, CompletionResult)
assert out.text == "(strategy ...)"
assert fake_openai.chat.completions.create.call_count == 2