Fix vllm model cache directory (#1642)
Signed-off-by: Wang, Kai Lawrence <kai.lawrence.wang@intel.com>
This commit is contained in:
committed by
GitHub
parent
eb245fd085
commit
5362321d3a
@@ -219,7 +219,7 @@ For users in China who are unable to download models directly from Huggingface,
|
||||
export HF_ENDPOINT="https://hf-mirror.com"
|
||||
model_name="meta-llama/Meta-Llama-3-8B-Instruct"
|
||||
# Start vLLM LLM Service
|
||||
docker run -p 8008:80 -v ./data:/data --name vllm-service -e HF_ENDPOINT=$HF_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy --shm-size 128g opea/vllm:latest --model $model_name --host 0.0.0.0 --port 80
|
||||
docker run -p 8008:80 -v ./data:/root/.cache/huggingface/hub --name vllm-service -e HF_ENDPOINT=$HF_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy --shm-size 128g opea/vllm:latest --model $model_name --host 0.0.0.0 --port 80
|
||||
# Start TGI LLM Service
|
||||
docker run -p 8008:80 -v ./data:/data --name tgi-service -e HF_ENDPOINT=$HF_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy --shm-size 1g ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu --model-id $model_name
|
||||
```
|
||||
@@ -236,7 +236,7 @@ For users in China who are unable to download models directly from Huggingface,
|
||||
export HF_TOKEN=${your_hf_token}
|
||||
export model_path="/path/to/model"
|
||||
# Start vLLM LLM Service
|
||||
docker run -p 8008:80 -v $model_path:/data --name vllm-service --shm-size 128g opea/vllm:latest --model /data --host 0.0.0.0 --port 80
|
||||
docker run -p 8008:80 -v $model_path:/root/.cache/huggingface/hub --name vllm-service --shm-size 128g opea/vllm:latest --model /root/.cache/huggingface/hub --host 0.0.0.0 --port 80
|
||||
# Start TGI LLM Service
|
||||
docker run -p 8008:80 -v $model_path:/data --name tgi-service --shm-size 1g ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu --model-id /data
|
||||
```
|
||||
|
||||
@@ -201,7 +201,7 @@ For users in China who are unable to download models directly from Huggingface,
|
||||
export HF_TOKEN=${your_hf_token}
|
||||
export HF_ENDPOINT="https://hf-mirror.com"
|
||||
model_name="meta-llama/Meta-Llama-3-8B-Instruct"
|
||||
docker run -p 8008:80 -v ./data:/data --name vllm-service -e HF_ENDPOINT=$HF_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy --shm-size 128g opea/vllm:latest --model $model_name --host 0.0.0.0 --port 80
|
||||
docker run -p 8008:80 -v ./data:/root/.cache/huggingface/hub --name vllm-service -e HF_ENDPOINT=$HF_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy --shm-size 128g opea/vllm:latest --model $model_name --host 0.0.0.0 --port 80
|
||||
```
|
||||
|
||||
2. Offline
|
||||
@@ -215,7 +215,7 @@ For users in China who are unable to download models directly from Huggingface,
|
||||
```bash
|
||||
export HF_TOKEN=${your_hf_token}
|
||||
export model_path="/path/to/model"
|
||||
docker run -p 8008:80 -v $model_path:/data --name vllm-service --shm-size 128g opea/vllm:latest --model /data --host 0.0.0.0 --port 80
|
||||
docker run -p 8008:80 -v $model_path:/root/.cache/huggingface/hub --name vllm-service --shm-size 128g opea/vllm:latest --model /root/.cache/huggingface/hub --host 0.0.0.0 --port 80
|
||||
```
|
||||
|
||||
### Setup Environment Variables
|
||||
|
||||
@@ -80,7 +80,7 @@ services:
|
||||
ports:
|
||||
- "9009:80"
|
||||
volumes:
|
||||
- "${MODEL_CACHE:-./data}:/data"
|
||||
- "${MODEL_CACHE:-./data}:/root/.cache/huggingface/hub"
|
||||
shm_size: 128g
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
|
||||
@@ -144,7 +144,7 @@ services:
|
||||
ports:
|
||||
- "9009:80"
|
||||
volumes:
|
||||
- "./data:/data"
|
||||
- "${MODEL_CACHE:-./data}:/root/.cache/huggingface/hub"
|
||||
shm_size: 128g
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
|
||||
@@ -80,7 +80,7 @@ services:
|
||||
ports:
|
||||
- "6042:80"
|
||||
volumes:
|
||||
- "${MODEL_CACHE:-./data}:/data"
|
||||
- "${MODEL_CACHE:-./data}:/root/.cache/huggingface/hub"
|
||||
shm_size: 128g
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
|
||||
@@ -64,7 +64,7 @@ services:
|
||||
ports:
|
||||
- "9009:80"
|
||||
volumes:
|
||||
- "${MODEL_CACHE:-./data}:/data"
|
||||
- "${MODEL_CACHE:-./data}:/root/.cache/huggingface/hub"
|
||||
shm_size: 128g
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
|
||||
Reference in New Issue
Block a user