Support Long context for DocSum (#1255)

Signed-off-by: Xinyao Wang <xinyao.wang@intel.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: lkk <33276950+lkk12014402@users.noreply.github.com>
2024-12-20 19:17:10 +08:00
parent 05365b6140
commit 50dd959d60
15 changed files with 861 additions and 267 deletions
--- a/DocSum/docker_compose/amd/gpu/rocm/compose.yaml
+++ b/DocSum/docker_compose/amd/gpu/rocm/compose.yaml
@@ -27,7 +27,7 @@ services:
    security_opt:
      - seccomp:unconfined
    ipc: host
-    command: --model-id ${DOCSUM_LLM_MODEL_ID}
+    command: --model-id ${DOCSUM_LLM_MODEL_ID} --max-input-length ${MAX_INPUT_TOKENS} --max-total-tokens ${MAX_TOTAL_TOKENS}

  docsum-llm-server:
    image: ${REGISTRY:-opea}/llm-docsum-tgi:${TAG:-latest}
@@ -53,6 +53,9 @@ services:
      https_proxy: ${https_proxy}
      TGI_LLM_ENDPOINT: "http://${HOST_IP}:${DOCSUM_TGI_SERVICE_PORT}"
      HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN}
+      MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS}
+      MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS}
+      LLM_MODEL_ID: ${DOCSUM_LLM_MODEL_ID}
    restart: unless-stopped

  whisper:
--- a/DocSum/docker_compose/amd/gpu/rocm/set_env.sh
+++ b/DocSum/docker_compose/amd/gpu/rocm/set_env.sh
@@ -3,6 +3,8 @@
 # Copyright (C) 2024 Advanced Micro Devices, Inc.
 # SPDX-License-Identifier: Apache-2.0

+export MAX_INPUT_TOKENS=2048
+export MAX_TOTAL_TOKENS=4096
 export DOCSUM_TGI_IMAGE="ghcr.io/huggingface/text-generation-inference:2.3.1-rocm"
 export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
 export HOST_IP=${host_ip}