fix(llm): handle empty completions + missing usage (nemotron-friendly)

EmptyCompletionError ora retryable. resp.usage può essere None su
provider :free (es. nemotron-3-super-120b-a12b:free) → no assert.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-11 09:45:49 +02:00
parent d3662f6098
commit 9d0deb3ae0
+10 -4
View File
@@ -21,11 +21,16 @@ MODEL_TIER_C = "qwen/qwen-2.5-72b-instruct"
MODEL_TIER_D = "meta-llama/llama-3.3-70b-instruct"
OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1"
class EmptyCompletionError(RuntimeError):
pass
# Errori transient: retry. RateLimit/Auth/InvalidRequest: NO retry.
_RETRYABLE_EXCEPTIONS: tuple[type[BaseException], ...] = (
openai.APIConnectionError,
openai.APITimeoutError,
openai.InternalServerError,
EmptyCompletionError,
)
@@ -88,12 +93,13 @@ class LLMClient:
top_p=genome.top_p,
max_tokens=max_tokens,
)
if not resp.choices or resp.choices[0].message.content is None:
raise EmptyCompletionError(f"empty response from {model}")
usage = resp.usage
assert usage is not None
return CompletionResult(
text=resp.choices[0].message.content or "",
input_tokens=usage.prompt_tokens,
output_tokens=usage.completion_tokens,
text=resp.choices[0].message.content,
input_tokens=usage.prompt_tokens if usage else 0,
output_tokens=usage.completion_tokens if usage else 0,
tier=genome.model_tier,
model=model,
)