diff --git a/src/multi_swarm/llm/client.py b/src/multi_swarm/llm/client.py index 230434a..0cecf27 100644 --- a/src/multi_swarm/llm/client.py +++ b/src/multi_swarm/llm/client.py @@ -21,11 +21,16 @@ MODEL_TIER_C = "qwen/qwen-2.5-72b-instruct" MODEL_TIER_D = "meta-llama/llama-3.3-70b-instruct" OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1" +class EmptyCompletionError(RuntimeError): + pass + + # Errori transient: retry. RateLimit/Auth/InvalidRequest: NO retry. _RETRYABLE_EXCEPTIONS: tuple[type[BaseException], ...] = ( openai.APIConnectionError, openai.APITimeoutError, openai.InternalServerError, + EmptyCompletionError, ) @@ -88,12 +93,13 @@ class LLMClient: top_p=genome.top_p, max_tokens=max_tokens, ) + if not resp.choices or resp.choices[0].message.content is None: + raise EmptyCompletionError(f"empty response from {model}") usage = resp.usage - assert usage is not None return CompletionResult( - text=resp.choices[0].message.content or "", - input_tokens=usage.prompt_tokens, - output_tokens=usage.completion_tokens, + text=resp.choices[0].message.content, + input_tokens=usage.prompt_tokens if usage else 0, + output_tokens=usage.completion_tokens if usage else 0, tier=genome.model_tier, model=model, )