Fix vLLM CPU initialize engine issue for DeepSeek models (#1762)

Signed-off-by: lvliang-intel <liang1.lv@intel.com>
2025-04-09 09:47:08 +08:00
parent 6917d5bdb1
commit 7b7728c6c3
1 changed files with 1 additions and 0 deletions
--- a/ChatQnA/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose.yaml
@@ -96,6 +96,7 @@ services:
      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      LLM_MODEL_ID: ${LLM_MODEL_ID}
      VLLM_TORCH_PROFILER_DIR: "/mnt"
+      VLLM_CPU_KVCACHE_SPACE: 40
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://$host_ip:9009/health || exit 1"]
      interval: 10s