Update the default LLM to llama3-8B on cpu/nvgpu/amdgpu/gaudi for docker-compose deployment to avoid the potential model serving issue or the missing chat-template issue using neural-chat-7b. Slow serving issue of neural-chat-7b on ICX: #1420 Signed-off-by: Wang, Kai Lawrence <kai.lawrence.wang@intel.com>
21 lines
730 B
Bash
21 lines
730 B
Bash
#!/usr/bin/env bash
|
|
|
|
# Copyright (C) 2024 Intel Corporation
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
pushd "../../../../../" > /dev/null
|
|
source .set_env.sh
|
|
popd > /dev/null
|
|
|
|
|
|
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
|
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
|
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
|
|
export INDEX_NAME="rag-redis"
|
|
# Set it as a non-null string, such as true, if you want to enable logging facility,
|
|
# otherwise, keep it as "" to disable it.
|
|
export LOGFLAG=""
|
|
# Set OpenTelemetry Tracing Endpoint
|
|
export JAEGER_IP=$(ip route get 8.8.8.8 | grep -oP 'src \K[^ ]+')
|
|
export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=grpc://$JAEGER_IP:4317
|
|
export TELEMETRY_ENDPOINT=http://$JAEGER_IP:4318/v1/traces
|