Update the default LLM to llama3-8B on cpu/nvgpu/amdgpu/gaudi for docker-compose deployment to avoid the potential model serving issue or the missing chat-template issue using neural-chat-7b. Slow serving issue of neural-chat-7b on ICX: #1420 Signed-off-by: Wang, Kai Lawrence <kai.lawrence.wang@intel.com>
35 lines
1.8 KiB
Bash
35 lines
1.8 KiB
Bash
#!/usr/bin/env bash
|
|
|
|
# Copyright (C) 2024 Advanced Micro Devices, Inc.
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
|
|
export CHATQNA_TGI_SERVICE_IMAGE="ghcr.io/huggingface/text-generation-inference:2.3.1-rocm"
|
|
export CHATQNA_EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
|
export CHATQNA_RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
|
export CHATQNA_LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
|
export CHATQNA_TGI_SERVICE_PORT=18008
|
|
export CHATQNA_TEI_EMBEDDING_PORT=18090
|
|
export CHATQNA_TEI_EMBEDDING_ENDPOINT="http://${HOST_IP}:${CHATQNA_TEI_EMBEDDING_PORT}"
|
|
export CHATQNA_TEI_RERANKING_PORT=18808
|
|
export CHATQNA_REDIS_VECTOR_PORT=16379
|
|
export CHATQNA_REDIS_VECTOR_INSIGHT_PORT=8001
|
|
export CHATQNA_REDIS_DATAPREP_PORT=6007
|
|
export CHATQNA_REDIS_RETRIEVER_PORT=7000
|
|
export CHATQNA_INDEX_NAME="rag-redis"
|
|
export CHATQNA_MEGA_SERVICE_HOST_IP=${HOST_IP}
|
|
export CHATQNA_RETRIEVER_SERVICE_HOST_IP=${HOST_IP}
|
|
export CHATQNA_BACKEND_SERVICE_ENDPOINT="http://127.0.0.1:${CHATQNA_BACKEND_SERVICE_PORT}/v1/chatqna"
|
|
export CHATQNA_DATAPREP_SERVICE_ENDPOINT="http://127.0.0.1:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/ingest"
|
|
export CHATQNA_DATAPREP_GET_FILE_ENDPOINT="http://127.0.0.1:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/get"
|
|
export CHATQNA_DATAPREP_DELETE_FILE_ENDPOINT="http://127.0.0.1:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/delete"
|
|
export CHATQNA_FRONTEND_SERVICE_IP=${HOST_IP}
|
|
export CHATQNA_FRONTEND_SERVICE_PORT=15173
|
|
export CHATQNA_BACKEND_SERVICE_NAME=chatqna
|
|
export CHATQNA_BACKEND_SERVICE_IP=${HOST_IP}
|
|
export CHATQNA_BACKEND_SERVICE_PORT=18888
|
|
export CHATQNA_REDIS_URL="redis://${HOST_IP}:${CHATQNA_REDIS_VECTOR_PORT}"
|
|
export CHATQNA_EMBEDDING_SERVICE_HOST_IP=${HOST_IP}
|
|
export CHATQNA_RERANK_SERVICE_HOST_IP=${HOST_IP}
|
|
export CHATQNA_LLM_SERVICE_HOST_IP=${HOST_IP}
|
|
export CHATQNA_NGINX_PORT=15176
|