fix(core): detect vLLM context overflow errors (#17763)

Co-authored-by: Aiden Cline <63023139+rekram1-node@users.noreply.github.com>
This commit is contained in:
Johannes Loher 2026-03-18 05:52:16 +01:00 committed by GitHub
parent 1b86c27fb8
commit 56102ff642
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -13,7 +13,7 @@ export namespace ProviderError {
/input token count.*exceeds the maximum/i, // Google (Gemini)
/maximum prompt length is \d+/i, // xAI (Grok)
/reduce the length of the messages/i, // Groq
/maximum context length is \d+ tokens/i, // OpenRouter, DeepSeek
/maximum context length is \d+ tokens/i, // OpenRouter, DeepSeek, vLLM
/exceeds the limit of \d+/i, // GitHub Copilot
/exceeds the available context size/i, // llama.cpp server
/greater than the context length/i, // LM Studio
@ -21,6 +21,8 @@ export namespace ProviderError {
/exceeded model token limit/i, // Kimi For Coding, Moonshot
/context[_ ]length[_ ]exceeded/i, // Generic fallback
/request entity too large/i, // HTTP 413
/context length is only \d+ tokens/i, // vLLM
/input length.*exceeds.*context length/i, // vLLM
]
function isOpenAiErrorRetryable(e: APICallError) {