Files
GenAIExamples/ChatQnA/kubernetes/helm/cpu-values.yaml
2025-04-21 09:38:07 +08:00

11 lines
383 B
YAML

# Copyright (C) 2025 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
vllm:
LLM_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct
# Uncomment the following model specific settings for DeepSeek models
#VLLM_CPU_KVCACHE_SPACE: 40
#resources:
# requests:
# memory: 60Gi # 40G for KV cache, and 20G for DeepSeek-R1-Distill-Qwen-7B, need to adjust it for other models