fix(llm): handle empty completions + missing usage (nemotron-friendly)
EmptyCompletionError ora retryable. resp.usage può essere None su provider :free (es. nemotron-3-super-120b-a12b:free) → no assert. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -21,11 +21,16 @@ MODEL_TIER_C = "qwen/qwen-2.5-72b-instruct"
|
||||
MODEL_TIER_D = "meta-llama/llama-3.3-70b-instruct"
|
||||
OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1"
|
||||
|
||||
class EmptyCompletionError(RuntimeError):
|
||||
pass
|
||||
|
||||
|
||||
# Errori transient: retry. RateLimit/Auth/InvalidRequest: NO retry.
|
||||
_RETRYABLE_EXCEPTIONS: tuple[type[BaseException], ...] = (
|
||||
openai.APIConnectionError,
|
||||
openai.APITimeoutError,
|
||||
openai.InternalServerError,
|
||||
EmptyCompletionError,
|
||||
)
|
||||
|
||||
|
||||
@@ -88,12 +93,13 @@ class LLMClient:
|
||||
top_p=genome.top_p,
|
||||
max_tokens=max_tokens,
|
||||
)
|
||||
if not resp.choices or resp.choices[0].message.content is None:
|
||||
raise EmptyCompletionError(f"empty response from {model}")
|
||||
usage = resp.usage
|
||||
assert usage is not None
|
||||
return CompletionResult(
|
||||
text=resp.choices[0].message.content or "",
|
||||
input_tokens=usage.prompt_tokens,
|
||||
output_tokens=usage.completion_tokens,
|
||||
text=resp.choices[0].message.content,
|
||||
input_tokens=usage.prompt_tokens if usage else 0,
|
||||
output_tokens=usage.completion_tokens if usage else 0,
|
||||
tier=genome.model_tier,
|
||||
model=model,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user