From 9d0deb3ae0c82546ead7f0b97c50a9dbba255ce0 Mon Sep 17 00:00:00 2001 From: AdrianoDev Date: Mon, 11 May 2026 09:45:49 +0200 Subject: [PATCH] fix(llm): handle empty completions + missing usage (nemotron-friendly) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit EmptyCompletionError ora retryable. resp.usage può essere None su provider :free (es. nemotron-3-super-120b-a12b:free) → no assert. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/multi_swarm/llm/client.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/multi_swarm/llm/client.py b/src/multi_swarm/llm/client.py index 230434a..0cecf27 100644 --- a/src/multi_swarm/llm/client.py +++ b/src/multi_swarm/llm/client.py @@ -21,11 +21,16 @@ MODEL_TIER_C = "qwen/qwen-2.5-72b-instruct" MODEL_TIER_D = "meta-llama/llama-3.3-70b-instruct" OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1" +class EmptyCompletionError(RuntimeError): + pass + + # Errori transient: retry. RateLimit/Auth/InvalidRequest: NO retry. _RETRYABLE_EXCEPTIONS: tuple[type[BaseException], ...] = ( openai.APIConnectionError, openai.APITimeoutError, openai.InternalServerError, + EmptyCompletionError, ) @@ -88,12 +93,13 @@ class LLMClient: top_p=genome.top_p, max_tokens=max_tokens, ) + if not resp.choices or resp.choices[0].message.content is None: + raise EmptyCompletionError(f"empty response from {model}") usage = resp.usage - assert usage is not None return CompletionResult( - text=resp.choices[0].message.content or "", - input_tokens=usage.prompt_tokens, - output_tokens=usage.completion_tokens, + text=resp.choices[0].message.content, + input_tokens=usage.prompt_tokens if usage else 0, + output_tokens=usage.completion_tokens if usage else 0, tier=genome.model_tier, model=model, )