Update the default LLM to llama3-8B on cpu/nvgpu/amdgpu/gaudi for docker-compose deployment to avoid the potential model serving issue or the missing chat-template issue using neural-chat-7b. Slow serving issue of neural-chat-7b on ICX: #1420 Signed-off-by: Wang, Kai Lawrence <kai.lawrence.wang@intel.com>
23 lines
903 B
Bash
23 lines
903 B
Bash
#!/usr/bin/env bash
|
|
|
|
# Copyright (C) 2024 Intel Corporation
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
|
|
|
|
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
|
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
|
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
|
export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:8090"
|
|
export INDEX_NAME="rag-redis"
|
|
export MEGA_SERVICE_HOST_IP=${host_ip}
|
|
export RETRIEVER_SERVICE_HOST_IP=${host_ip}
|
|
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/chatqna"
|
|
export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/ingest"
|
|
export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get"
|
|
export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete"
|
|
export FRONTEND_SERVICE_IP=${host_ip}
|
|
export FRONTEND_SERVICE_PORT=5173
|
|
export BACKEND_SERVICE_NAME=chatqna
|
|
export BACKEND_SERVICE_IP=${host_ip}
|
|
export BACKEND_SERVICE_PORT=8888
|