Support Long context for DocSum (#1255)
Signed-off-by: Xinyao Wang <xinyao.wang@intel.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: lkk <33276950+lkk12014402@users.noreply.github.com>
This commit is contained in:
@@ -27,7 +27,7 @@ services:
|
||||
security_opt:
|
||||
- seccomp:unconfined
|
||||
ipc: host
|
||||
command: --model-id ${DOCSUM_LLM_MODEL_ID}
|
||||
command: --model-id ${DOCSUM_LLM_MODEL_ID} --max-input-length ${MAX_INPUT_TOKENS} --max-total-tokens ${MAX_TOTAL_TOKENS}
|
||||
|
||||
docsum-llm-server:
|
||||
image: ${REGISTRY:-opea}/llm-docsum-tgi:${TAG:-latest}
|
||||
@@ -53,6 +53,9 @@ services:
|
||||
https_proxy: ${https_proxy}
|
||||
TGI_LLM_ENDPOINT: "http://${HOST_IP}:${DOCSUM_TGI_SERVICE_PORT}"
|
||||
HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN}
|
||||
MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS}
|
||||
MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS}
|
||||
LLM_MODEL_ID: ${DOCSUM_LLM_MODEL_ID}
|
||||
restart: unless-stopped
|
||||
|
||||
whisper:
|
||||
|
||||
@@ -3,6 +3,8 @@
|
||||
# Copyright (C) 2024 Advanced Micro Devices, Inc.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
export MAX_INPUT_TOKENS=2048
|
||||
export MAX_TOTAL_TOKENS=4096
|
||||
export DOCSUM_TGI_IMAGE="ghcr.io/huggingface/text-generation-inference:2.3.1-rocm"
|
||||
export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
|
||||
export HOST_IP=${host_ip}
|
||||
|
||||
Reference in New Issue
Block a user