Update the default LLM to llama3-8B on cpu/nvgpu/amdgpu/gaudi for docker-compose deployment to avoid the potential model serving issue or the missing chat-template issue using neural-chat-7b. Slow serving issue of neural-chat-7b on ICX: #1420 Signed-off-by: Wang, Kai Lawrence <kai.lawrence.wang@intel.com>
17 lines
503 B
Bash
Executable File
17 lines
503 B
Bash
Executable File
#!/usr/bin/env bash
|
|
|
|
# Copyright (C) 2024 Intel Corporation
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
|
|
pushd "../../../../../" > /dev/null
|
|
source .set_env.sh
|
|
popd > /dev/null
|
|
|
|
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
|
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
|
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
|
export INDEX_NAME="rag-redis"
|
|
# Set it as a non-null string, such as true, if you want to enable logging facility,
|
|
# otherwise, keep it as "" to disable it.
|
|
export LOGFLAG=""
|