Fix vllm model cache directory (#1642)

Signed-off-by: Wang, Kai Lawrence <kai.lawrence.wang@intel.com>
This commit is contained in:
Wang, Kai Lawrence
2025-03-10 13:40:42 +08:00
committed by GitHub
parent eb245fd085
commit 5362321d3a
12 changed files with 16 additions and 16 deletions

View File

@@ -219,7 +219,7 @@ For users in China who are unable to download models directly from Huggingface,
export HF_ENDPOINT="https://hf-mirror.com"
model_name="meta-llama/Meta-Llama-3-8B-Instruct"
# Start vLLM LLM Service
docker run -p 8008:80 -v ./data:/data --name vllm-service -e HF_ENDPOINT=$HF_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy --shm-size 128g opea/vllm:latest --model $model_name --host 0.0.0.0 --port 80
docker run -p 8008:80 -v ./data:/root/.cache/huggingface/hub --name vllm-service -e HF_ENDPOINT=$HF_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy --shm-size 128g opea/vllm:latest --model $model_name --host 0.0.0.0 --port 80
# Start TGI LLM Service
docker run -p 8008:80 -v ./data:/data --name tgi-service -e HF_ENDPOINT=$HF_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy --shm-size 1g ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu --model-id $model_name
```
@@ -236,7 +236,7 @@ For users in China who are unable to download models directly from Huggingface,
export HF_TOKEN=${your_hf_token}
export model_path="/path/to/model"
# Start vLLM LLM Service
docker run -p 8008:80 -v $model_path:/data --name vllm-service --shm-size 128g opea/vllm:latest --model /data --host 0.0.0.0 --port 80
docker run -p 8008:80 -v $model_path:/root/.cache/huggingface/hub --name vllm-service --shm-size 128g opea/vllm:latest --model /root/.cache/huggingface/hub --host 0.0.0.0 --port 80
# Start TGI LLM Service
docker run -p 8008:80 -v $model_path:/data --name tgi-service --shm-size 1g ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu --model-id /data
```

View File

@@ -201,7 +201,7 @@ For users in China who are unable to download models directly from Huggingface,
export HF_TOKEN=${your_hf_token}
export HF_ENDPOINT="https://hf-mirror.com"
model_name="meta-llama/Meta-Llama-3-8B-Instruct"
docker run -p 8008:80 -v ./data:/data --name vllm-service -e HF_ENDPOINT=$HF_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy --shm-size 128g opea/vllm:latest --model $model_name --host 0.0.0.0 --port 80
docker run -p 8008:80 -v ./data:/root/.cache/huggingface/hub --name vllm-service -e HF_ENDPOINT=$HF_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy --shm-size 128g opea/vllm:latest --model $model_name --host 0.0.0.0 --port 80
```
2. Offline
@@ -215,7 +215,7 @@ For users in China who are unable to download models directly from Huggingface,
```bash
export HF_TOKEN=${your_hf_token}
export model_path="/path/to/model"
docker run -p 8008:80 -v $model_path:/data --name vllm-service --shm-size 128g opea/vllm:latest --model /data --host 0.0.0.0 --port 80
docker run -p 8008:80 -v $model_path:/root/.cache/huggingface/hub --name vllm-service --shm-size 128g opea/vllm:latest --model /root/.cache/huggingface/hub --host 0.0.0.0 --port 80
```
### Setup Environment Variables

View File

@@ -80,7 +80,7 @@ services:
ports:
- "9009:80"
volumes:
- "${MODEL_CACHE:-./data}:/data"
- "${MODEL_CACHE:-./data}:/root/.cache/huggingface/hub"
shm_size: 128g
environment:
no_proxy: ${no_proxy}

View File

@@ -144,7 +144,7 @@ services:
ports:
- "9009:80"
volumes:
- "./data:/data"
- "${MODEL_CACHE:-./data}:/root/.cache/huggingface/hub"
shm_size: 128g
environment:
no_proxy: ${no_proxy}

View File

@@ -80,7 +80,7 @@ services:
ports:
- "6042:80"
volumes:
- "${MODEL_CACHE:-./data}:/data"
- "${MODEL_CACHE:-./data}:/root/.cache/huggingface/hub"
shm_size: 128g
environment:
no_proxy: ${no_proxy}

View File

@@ -64,7 +64,7 @@ services:
ports:
- "9009:80"
volumes:
- "${MODEL_CACHE:-./data}:/data"
- "${MODEL_CACHE:-./data}:/root/.cache/huggingface/hub"
shm_size: 128g
environment:
no_proxy: ${no_proxy}