Support Long context for DocSum (#1255)

Signed-off-by: Xinyao Wang <xinyao.wang@intel.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: lkk <33276950+lkk12014402@users.noreply.github.com>
This commit is contained in:
XinyaoWa
2024-12-20 19:17:10 +08:00
committed by GitHub
parent 05365b6140
commit 50dd959d60
15 changed files with 861 additions and 267 deletions

View File

@@ -27,7 +27,7 @@ services:
security_opt:
- seccomp:unconfined
ipc: host
command: --model-id ${DOCSUM_LLM_MODEL_ID}
command: --model-id ${DOCSUM_LLM_MODEL_ID} --max-input-length ${MAX_INPUT_TOKENS} --max-total-tokens ${MAX_TOTAL_TOKENS}
docsum-llm-server:
image: ${REGISTRY:-opea}/llm-docsum-tgi:${TAG:-latest}
@@ -53,6 +53,9 @@ services:
https_proxy: ${https_proxy}
TGI_LLM_ENDPOINT: "http://${HOST_IP}:${DOCSUM_TGI_SERVICE_PORT}"
HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN}
MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS}
MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS}
LLM_MODEL_ID: ${DOCSUM_LLM_MODEL_ID}
restart: unless-stopped
whisper:

View File

@@ -3,6 +3,8 @@
# Copyright (C) 2024 Advanced Micro Devices, Inc.
# SPDX-License-Identifier: Apache-2.0
export MAX_INPUT_TOKENS=2048
export MAX_TOTAL_TOKENS=4096
export DOCSUM_TGI_IMAGE="ghcr.io/huggingface/text-generation-inference:2.3.1-rocm"
export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
export HOST_IP=${host_ip}