feat(llm): retry tenacity su errori transient connection/timeout/5xx
Avvolge LLMClient.complete con tenacity (3 attempts, backoff esponenziale 2-10s) che ritenta solo su errori transient di OpenAI/Anthropic SDK (APIConnectionError, APITimeoutError, InternalServerError). RateLimit, Authentication e 4xx non vengono ritentati. reraise=True preserva l'eccezione originale dopo l'esaurimento dei tentativi. Aggiunti 2 test (marker slow): esaurimento retry su APIConnectionError e successo al secondo tentativo dopo APITimeoutError. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -2,8 +2,16 @@ from __future__ import annotations
|
|||||||
|
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
import anthropic
|
||||||
|
import openai
|
||||||
from anthropic import Anthropic
|
from anthropic import Anthropic
|
||||||
from openai import OpenAI
|
from openai import OpenAI
|
||||||
|
from tenacity import (
|
||||||
|
retry,
|
||||||
|
retry_if_exception_type,
|
||||||
|
stop_after_attempt,
|
||||||
|
wait_exponential,
|
||||||
|
)
|
||||||
|
|
||||||
from ..genome.hypothesis import HypothesisAgentGenome, ModelTier
|
from ..genome.hypothesis import HypothesisAgentGenome, ModelTier
|
||||||
|
|
||||||
@@ -12,6 +20,16 @@ MODEL_TIER_C = "qwen/qwen-2.5-72b-instruct" # via OpenRouter
|
|||||||
MODEL_TIER_B = "claude-sonnet-4-6" # via Anthropic
|
MODEL_TIER_B = "claude-sonnet-4-6" # via Anthropic
|
||||||
OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1"
|
OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1"
|
||||||
|
|
||||||
|
# Errori transient: retry. RateLimit/Auth/InvalidRequest: NO retry.
|
||||||
|
_RETRYABLE_EXCEPTIONS: tuple[type[BaseException], ...] = (
|
||||||
|
openai.APIConnectionError,
|
||||||
|
openai.APITimeoutError,
|
||||||
|
openai.InternalServerError,
|
||||||
|
anthropic.APIConnectionError,
|
||||||
|
anthropic.APITimeoutError,
|
||||||
|
anthropic.InternalServerError,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
@dataclass(frozen=True)
|
||||||
class CompletionResult:
|
class CompletionResult:
|
||||||
@@ -31,6 +49,12 @@ class LLMClient:
|
|||||||
self._openrouter = OpenAI(api_key=openrouter_api_key, base_url=OPENROUTER_BASE_URL)
|
self._openrouter = OpenAI(api_key=openrouter_api_key, base_url=OPENROUTER_BASE_URL)
|
||||||
self._anthropic = Anthropic(api_key=anthropic_api_key) if anthropic_api_key else None
|
self._anthropic = Anthropic(api_key=anthropic_api_key) if anthropic_api_key else None
|
||||||
|
|
||||||
|
@retry(
|
||||||
|
stop=stop_after_attempt(3),
|
||||||
|
wait=wait_exponential(multiplier=1.0, min=2.0, max=10.0),
|
||||||
|
retry=retry_if_exception_type(_RETRYABLE_EXCEPTIONS),
|
||||||
|
reraise=True,
|
||||||
|
)
|
||||||
def complete(
|
def complete(
|
||||||
self,
|
self,
|
||||||
genome: HypothesisAgentGenome,
|
genome: HypothesisAgentGenome,
|
||||||
|
|||||||
@@ -1,3 +1,5 @@
|
|||||||
|
import pytest
|
||||||
|
|
||||||
from multi_swarm.genome.hypothesis import HypothesisAgentGenome, ModelTier
|
from multi_swarm.genome.hypothesis import HypothesisAgentGenome, ModelTier
|
||||||
from multi_swarm.llm.client import CompletionResult, LLMClient
|
from multi_swarm.llm.client import CompletionResult, LLMClient
|
||||||
|
|
||||||
@@ -51,3 +53,50 @@ def test_completion_tier_b_uses_anthropic(mocker):
|
|||||||
assert out.input_tokens == 80
|
assert out.input_tokens == 80
|
||||||
assert out.output_tokens == 150
|
assert out.output_tokens == 150
|
||||||
assert out.tier == ModelTier.B
|
assert out.tier == ModelTier.B
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.slow
|
||||||
|
def test_completion_retries_on_connection_error(mocker):
|
||||||
|
"""Retry esegue 3 tentativi su APIConnectionError, poi rilancia."""
|
||||||
|
import openai
|
||||||
|
|
||||||
|
fake_openai = mocker.MagicMock()
|
||||||
|
fake_openai.chat.completions.create.side_effect = openai.APIConnectionError(
|
||||||
|
request=mocker.MagicMock()
|
||||||
|
)
|
||||||
|
mocker.patch("multi_swarm.llm.client.OpenAI", return_value=fake_openai)
|
||||||
|
|
||||||
|
client = LLMClient(openrouter_api_key="or-x", anthropic_api_key=None)
|
||||||
|
g = make_genome(ModelTier.C)
|
||||||
|
|
||||||
|
with pytest.raises(openai.APIConnectionError):
|
||||||
|
client.complete(g, system="sys", user="usr")
|
||||||
|
|
||||||
|
assert fake_openai.chat.completions.create.call_count == 3
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.slow
|
||||||
|
def test_completion_succeeds_after_one_retry(mocker):
|
||||||
|
"""Dopo 1 fallimento transient, il retry riesce al 2 tentativo."""
|
||||||
|
import openai
|
||||||
|
|
||||||
|
fake_response = mocker.MagicMock()
|
||||||
|
fake_response.choices = [
|
||||||
|
mocker.MagicMock(message=mocker.MagicMock(content="(strategy ...)"))
|
||||||
|
]
|
||||||
|
fake_response.usage = mocker.MagicMock(prompt_tokens=100, completion_tokens=200)
|
||||||
|
|
||||||
|
fake_openai = mocker.MagicMock()
|
||||||
|
fake_openai.chat.completions.create.side_effect = [
|
||||||
|
openai.APITimeoutError(request=mocker.MagicMock()),
|
||||||
|
fake_response,
|
||||||
|
]
|
||||||
|
mocker.patch("multi_swarm.llm.client.OpenAI", return_value=fake_openai)
|
||||||
|
|
||||||
|
client = LLMClient(openrouter_api_key="or-x", anthropic_api_key=None)
|
||||||
|
g = make_genome(ModelTier.C)
|
||||||
|
out = client.complete(g, system="sys", user="usr")
|
||||||
|
|
||||||
|
assert isinstance(out, CompletionResult)
|
||||||
|
assert out.text == "(strategy ...)"
|
||||||
|
assert fake_openai.chat.completions.create.call_count == 2
|
||||||
|
|||||||
Reference in New Issue
Block a user