fix(llm): handle empty completions + missing usage (nemotron-friendly)
EmptyCompletionError ora retryable. resp.usage può essere None su provider :free (es. nemotron-3-super-120b-a12b:free) → no assert. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -21,11 +21,16 @@ MODEL_TIER_C = "qwen/qwen-2.5-72b-instruct"
|
|||||||
MODEL_TIER_D = "meta-llama/llama-3.3-70b-instruct"
|
MODEL_TIER_D = "meta-llama/llama-3.3-70b-instruct"
|
||||||
OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1"
|
OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1"
|
||||||
|
|
||||||
|
class EmptyCompletionError(RuntimeError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
# Errori transient: retry. RateLimit/Auth/InvalidRequest: NO retry.
|
# Errori transient: retry. RateLimit/Auth/InvalidRequest: NO retry.
|
||||||
_RETRYABLE_EXCEPTIONS: tuple[type[BaseException], ...] = (
|
_RETRYABLE_EXCEPTIONS: tuple[type[BaseException], ...] = (
|
||||||
openai.APIConnectionError,
|
openai.APIConnectionError,
|
||||||
openai.APITimeoutError,
|
openai.APITimeoutError,
|
||||||
openai.InternalServerError,
|
openai.InternalServerError,
|
||||||
|
EmptyCompletionError,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -88,12 +93,13 @@ class LLMClient:
|
|||||||
top_p=genome.top_p,
|
top_p=genome.top_p,
|
||||||
max_tokens=max_tokens,
|
max_tokens=max_tokens,
|
||||||
)
|
)
|
||||||
|
if not resp.choices or resp.choices[0].message.content is None:
|
||||||
|
raise EmptyCompletionError(f"empty response from {model}")
|
||||||
usage = resp.usage
|
usage = resp.usage
|
||||||
assert usage is not None
|
|
||||||
return CompletionResult(
|
return CompletionResult(
|
||||||
text=resp.choices[0].message.content or "",
|
text=resp.choices[0].message.content,
|
||||||
input_tokens=usage.prompt_tokens,
|
input_tokens=usage.prompt_tokens if usage else 0,
|
||||||
output_tokens=usage.completion_tokens,
|
output_tokens=usage.completion_tokens if usage else 0,
|
||||||
tier=genome.model_tier,
|
tier=genome.model_tier,
|
||||||
model=model,
|
model=model,
|
||||||
)
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user