GenAIExamples/AgentQnA/docker_compose/amd/gpu/rocm/compose.yaml

# Copyright (C) 2025 Advanced Micro Devices, Inc.

services:
  tgi-service:
    image: ghcr.io/huggingface/text-generation-inference:3.0.0-rocm
    container_name: tgi-service
    ports:
      - "${TGI_SERVICE_PORT-8085}:80"
    volumes:
      - "${MODEL_CACHE:-./data}:/data"
    environment:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
      TGI_LLM_ENDPOINT: "http://${ip_address}:${TGI_SERVICE_PORT}"
      HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
    shm_size: 32g
    devices:
      - /dev/kfd:/dev/kfd
      - /dev/dri:/dev/dri
    cap_add:
      - SYS_PTRACE
    group_add:
      - video
    security_opt:
      - seccomp:unconfined
    ipc: host
    command: --model-id ${LLM_MODEL_ID} --max-input-length 4096 --max-total-tokens 8192

  worker-rag-agent:
    image: ${REGISTRY:-opea}/agent:${TAG:-latest}
    container_name: rag-agent-endpoint
    volumes:
      - "${TOOLSET_PATH}:/home/user/tools/"
    ports:
      - "${WORKER_RAG_AGENT_PORT:-9095}:9095"
    ipc: host
    environment:
      ip_address: ${ip_address}
      strategy: rag_agent_llama
      with_memory: false
      recursion_limit: ${recursion_limit_worker}
      llm_engine: tgi
      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      llm_endpoint_url: ${LLM_ENDPOINT_URL}
      model: ${LLM_MODEL_ID}
      temperature: ${temperature}
      max_new_tokens: ${max_new_tokens}
      stream: false
      tools: /home/user/tools/worker_agent_tools.yaml
      require_human_feedback: false
      RETRIEVAL_TOOL_URL: ${RETRIEVAL_TOOL_URL}
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
      LANGCHAIN_PROJECT: "opea-worker-agent-service"
      port: 9095

  worker-sql-agent:
    image: ${REGISTRY:-opea}/agent:${TAG:-latest}
    container_name: sql-agent-endpoint
    volumes:
      - "${WORKDIR}/tests/Chinook_Sqlite.sqlite:/home/user/chinook-db/Chinook_Sqlite.sqlite:rw"
    ports:
      - "${WORKER_SQL_AGENT_PORT:-9096}:9096"
    ipc: host
    environment:
      ip_address: ${ip_address}
      strategy: sql_agent_llama
      with_memory: false
      db_name: ${db_name}
      db_path: ${db_path}
      use_hints: false
      recursion_limit: ${recursion_limit_worker}
      llm_engine: vllm
      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      llm_endpoint_url: ${LLM_ENDPOINT_URL}
      model: ${LLM_MODEL_ID}
      temperature: ${temperature}
      max_new_tokens: ${max_new_tokens}
      stream: false
      require_human_feedback: false
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
      port: 9096

  supervisor-react-agent:
    image: ${REGISTRY:-opea}/agent:${TAG:-latest}
    container_name: react-agent-endpoint
    depends_on:
      - worker-rag-agent
    volumes:
      - "${TOOLSET_PATH}:/home/user/tools/"
    ports:
      - "${SUPERVISOR_REACT_AGENT_PORT:-9090}:9090"
    ipc: host
    environment:
      ip_address: ${ip_address}
      strategy: react_llama
      with_memory: true
      recursion_limit: ${recursion_limit_supervisor}
      llm_engine: tgi
      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      llm_endpoint_url: ${LLM_ENDPOINT_URL}
      model: ${LLM_MODEL_ID}
      temperature: ${temperature}
      max_new_tokens: ${max_new_tokens}
      stream: true
      tools: /home/user/tools/supervisor_agent_tools.yaml
      require_human_feedback: false
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
      LANGCHAIN_PROJECT: "opea-supervisor-agent-service"
      CRAG_SERVER: ${CRAG_SERVER}
      WORKER_AGENT_URL: ${WORKER_AGENT_URL}
      SQL_AGENT_URL: ${SQL_AGENT_URL}
      port: 9090