Files
Wang, Kai Lawrence 3d3ac59bfb [ChatQnA] Update the default LLM to llama3-8B on cpu/gpu/hpu (#1430)
Update the default LLM to llama3-8B on cpu/nvgpu/amdgpu/gaudi for docker-compose deployment to avoid the potential model serving issue or the missing chat-template issue using neural-chat-7b.

Slow serving issue of neural-chat-7b on ICX: #1420
Signed-off-by: Wang, Kai Lawrence <kai.lawrence.wang@intel.com>
2025-01-20 22:47:56 +08:00

35 lines
1.8 KiB
Bash

#!/usr/bin/env bash
# Copyright (C) 2024 Advanced Micro Devices, Inc.
# SPDX-License-Identifier: Apache-2.0
export CHATQNA_TGI_SERVICE_IMAGE="ghcr.io/huggingface/text-generation-inference:2.3.1-rocm"
export CHATQNA_EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
export CHATQNA_RERANK_MODEL_ID="BAAI/bge-reranker-base"
export CHATQNA_LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
export CHATQNA_TGI_SERVICE_PORT=18008
export CHATQNA_TEI_EMBEDDING_PORT=18090
export CHATQNA_TEI_EMBEDDING_ENDPOINT="http://${HOST_IP}:${CHATQNA_TEI_EMBEDDING_PORT}"
export CHATQNA_TEI_RERANKING_PORT=18808
export CHATQNA_REDIS_VECTOR_PORT=16379
export CHATQNA_REDIS_VECTOR_INSIGHT_PORT=8001
export CHATQNA_REDIS_DATAPREP_PORT=6007
export CHATQNA_REDIS_RETRIEVER_PORT=7000
export CHATQNA_INDEX_NAME="rag-redis"
export CHATQNA_MEGA_SERVICE_HOST_IP=${HOST_IP}
export CHATQNA_RETRIEVER_SERVICE_HOST_IP=${HOST_IP}
export CHATQNA_BACKEND_SERVICE_ENDPOINT="http://127.0.0.1:${CHATQNA_BACKEND_SERVICE_PORT}/v1/chatqna"
export CHATQNA_DATAPREP_SERVICE_ENDPOINT="http://127.0.0.1:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/ingest"
export CHATQNA_DATAPREP_GET_FILE_ENDPOINT="http://127.0.0.1:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/get"
export CHATQNA_DATAPREP_DELETE_FILE_ENDPOINT="http://127.0.0.1:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/delete"
export CHATQNA_FRONTEND_SERVICE_IP=${HOST_IP}
export CHATQNA_FRONTEND_SERVICE_PORT=15173
export CHATQNA_BACKEND_SERVICE_NAME=chatqna
export CHATQNA_BACKEND_SERVICE_IP=${HOST_IP}
export CHATQNA_BACKEND_SERVICE_PORT=18888
export CHATQNA_REDIS_URL="redis://${HOST_IP}:${CHATQNA_REDIS_VECTOR_PORT}"
export CHATQNA_EMBEDDING_SERVICE_HOST_IP=${HOST_IP}
export CHATQNA_RERANK_SERVICE_HOST_IP=${HOST_IP}
export CHATQNA_LLM_SERVICE_HOST_IP=${HOST_IP}
export CHATQNA_NGINX_PORT=15176