fix(llm): handle empty completions + missing usage (nemotron-friendly)

EmptyCompletionError ora retryable. resp.usage può essere None su
provider :free (es. nemotron-3-super-120b-a12b:free) → no assert.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-11 09:45:49 +02:00
parent d3662f6098
commit 9d0deb3ae0
+10 -4
View File
@@ -21,11 +21,16 @@ MODEL_TIER_C = "qwen/qwen-2.5-72b-instruct"
MODEL_TIER_D = "meta-llama/llama-3.3-70b-instruct" MODEL_TIER_D = "meta-llama/llama-3.3-70b-instruct"
OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1" OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1"
class EmptyCompletionError(RuntimeError):
pass
# Errori transient: retry. RateLimit/Auth/InvalidRequest: NO retry. # Errori transient: retry. RateLimit/Auth/InvalidRequest: NO retry.
_RETRYABLE_EXCEPTIONS: tuple[type[BaseException], ...] = ( _RETRYABLE_EXCEPTIONS: tuple[type[BaseException], ...] = (
openai.APIConnectionError, openai.APIConnectionError,
openai.APITimeoutError, openai.APITimeoutError,
openai.InternalServerError, openai.InternalServerError,
EmptyCompletionError,
) )
@@ -88,12 +93,13 @@ class LLMClient:
top_p=genome.top_p, top_p=genome.top_p,
max_tokens=max_tokens, max_tokens=max_tokens,
) )
if not resp.choices or resp.choices[0].message.content is None:
raise EmptyCompletionError(f"empty response from {model}")
usage = resp.usage usage = resp.usage
assert usage is not None
return CompletionResult( return CompletionResult(
text=resp.choices[0].message.content or "", text=resp.choices[0].message.content,
input_tokens=usage.prompt_tokens, input_tokens=usage.prompt_tokens if usage else 0,
output_tokens=usage.completion_tokens, output_tokens=usage.completion_tokens if usage else 0,
tier=genome.model_tier, tier=genome.model_tier,
model=model, model=model,
) )