GenAIExamples/AgentQnA/docker_compose/intel/hpu/gaudi/compose.yaml

# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

services:
  worker-rag-agent:
    image: opea/agent:latest
    container_name: rag-agent-endpoint
    volumes:
      - ${TOOLSET_PATH}:/home/user/tools/
    ports:
      - "9095:9095"
    ipc: host
    environment:
      ip_address: ${ip_address}
      strategy: rag_agent_llama
      with_memory: false
      recursion_limit: ${recursion_limit_worker}
      llm_engine: vllm
      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      llm_endpoint_url: ${LLM_ENDPOINT_URL}
      model: ${LLM_MODEL_ID}
      temperature: ${temperature}
      max_new_tokens: ${max_new_tokens}
      stream: false
      tools: /home/user/tools/worker_agent_tools.yaml
      require_human_feedback: false
      RETRIEVAL_TOOL_URL: ${RETRIEVAL_TOOL_URL}
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
      LANGCHAIN_PROJECT: "opea-worker-agent-service"
      port: 9095

  worker-sql-agent:
    image: opea/agent:latest
    container_name: sql-agent-endpoint
    volumes:
      - ${WORKDIR}/GenAIExamples/AgentQnA/tests:/home/user/chinook-db # test db
    ports:
      - "9096:9096"
    ipc: host
    environment:
      ip_address: ${ip_address}
      strategy: sql_agent_llama
      with_memory: false
      db_name: ${db_name}
      db_path: ${db_path}
      use_hints: false
      recursion_limit: ${recursion_limit_worker}
      llm_engine: vllm
      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      llm_endpoint_url: ${LLM_ENDPOINT_URL}
      model: ${LLM_MODEL_ID}
      temperature: ${temperature}
      max_new_tokens: ${max_new_tokens}
      stream: false
      require_human_feedback: false
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
      port: 9096

  supervisor-react-agent:
    image: opea/agent:latest
    container_name: react-agent-endpoint
    depends_on:
      - worker-rag-agent
      - worker-sql-agent
    volumes:
      - ${TOOLSET_PATH}:/home/user/tools/
    ports:
      - "9090:9090"
    ipc: host
    environment:
      ip_address: ${ip_address}
      strategy: react_llama
      with_memory: true
      recursion_limit: ${recursion_limit_supervisor}
      llm_engine: vllm
      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      llm_endpoint_url: ${LLM_ENDPOINT_URL}
      model: ${LLM_MODEL_ID}
      temperature: ${temperature}
      max_new_tokens: ${max_new_tokens}
      stream: true
      tools: /home/user/tools/supervisor_agent_tools.yaml
      require_human_feedback: false
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
      LANGCHAIN_PROJECT: "opea-supervisor-agent-service"
      CRAG_SERVER: $CRAG_SERVER
      WORKER_AGENT_URL: $WORKER_AGENT_URL
      SQL_AGENT_URL: $SQL_AGENT_URL
      port: 9090
  mock-api:
    image: docker.io/aicrowd/kdd-cup-24-crag-mock-api:v0
    container_name: mock-api
    ports:
      - "8080:8000"
    ipc: host
  agent-ui:
    image: opea/agent-ui
    container_name: agent-ui
    volumes:
      - ${WORKDIR}/GenAIExamples/AgentQnA/ui/svelte/.env:/home/user/svelte/.env
    environment:
      host_ip: ${host_ip}
    ports:
      - "5173:5173"
    ipc: host
  vllm-service:
    image: ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest}
    container_name: vllm-gaudi-server
    ports:
      - "8086:8000"
    volumes:
      - "./data:/data"
    environment:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      HABANA_VISIBLE_DEVICES: all
      OMPI_MCA_btl_vader_single_copy_mechanism: none
      LLM_MODEL_ID: ${LLM_MODEL_ID}
      VLLM_TORCH_PROFILER_DIR: "/mnt"
      VLLM_SKIP_WARMUP: true
      PT_HPU_ENABLE_LAZY_COLLECTIVES: true
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://$host_ip:8086/health || exit 1"]
      interval: 10s
      timeout: 10s
      retries: 100
    runtime: habana
    cap_add:
      - SYS_NICE
    ipc: host
    command: --model $LLM_MODEL_ID --tensor-parallel-size 4 --host 0.0.0.0 --port 8000 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 16384