diff --git a/CodeGen/docker_compose/amd/gpu/rocm/compose.yaml b/CodeGen/docker_compose/amd/gpu/rocm/compose.yaml index cf7d2369c..55abc832b 100644 --- a/CodeGen/docker_compose/amd/gpu/rocm/compose.yaml +++ b/CodeGen/docker_compose/amd/gpu/rocm/compose.yaml @@ -15,6 +15,12 @@ services: https_proxy: ${https_proxy} HUGGING_FACE_HUB_TOKEN: ${CODEGEN_HUGGINGFACEHUB_API_TOKEN} HUGGINGFACEHUB_API_TOKEN: ${CODEGEN_HUGGINGFACEHUB_API_TOKEN} + host_ip: ${host_ip} + healthcheck: + test: ["CMD-SHELL", "curl -f http://$host_ip:${CODEGEN_TGI_SERVICE_PORT:-8028}/health || exit 1"] + interval: 10s + timeout: 10s + retries: 100 shm_size: 1g devices: - /dev/kfd:/dev/kfd @@ -31,7 +37,8 @@ services: image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest} container_name: codegen-llm-server depends_on: - - codegen-tgi-service + codegen-tgi-service: + condition: service_healthy ports: - "${CODEGEN_LLM_SERVICE_PORT:-9000}:9000" ipc: host @@ -39,7 +46,8 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - TGI_LLM_ENDPOINT: "http://codegen-tgi-service" + LLM_ENDPOINT: "http://codegen-tgi-service" + LLM_MODEL_ID: ${CODEGEN_LLM_MODEL_ID} HUGGINGFACEHUB_API_TOKEN: ${CODEGEN_HUGGINGFACEHUB_API_TOKEN} restart: unless-stopped codegen-backend-server: diff --git a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml index 64b74db71..96226fe21 100644 --- a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml +++ b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml @@ -15,12 +15,19 @@ services: http_proxy: ${http_proxy} https_proxy: ${https_proxy} HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + host_ip: ${host_ip} + healthcheck: + test: ["CMD-SHELL", "curl -f http://$host_ip:8028/health || exit 1"] + interval: 10s + timeout: 10s + retries: 100 command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0 llm: image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest} container_name: llm-tgi-server depends_on: - - tgi-service + tgi-service: + condition: service_healthy ports: - "9000:9000" ipc: host @@ -28,7 +35,8 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT} + LLM_ENDPOINT: ${TGI_LLM_ENDPOINT} + LLM_MODEL_ID: ${LLM_MODEL_ID} HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} restart: unless-stopped codegen-xeon-backend-server: diff --git a/CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml b/CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml index 92b70b099..7f7e71295 100644 --- a/CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml @@ -20,6 +20,11 @@ services: LIMIT_HPU_GRAPH: true USE_FLASH_ATTENTION: true FLASH_ATTENTION_RECOMPUTE: true + healthcheck: + test: ["CMD-SHELL", "sleep 500 && exit 0"] + interval: 1s + timeout: 505s + retries: 1 runtime: habana cap_add: - SYS_NICE @@ -29,7 +34,8 @@ services: image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest} container_name: llm-tgi-gaudi-server depends_on: - - tgi-service + tgi-service: + condition: service_healthy ports: - "9000:9000" ipc: host @@ -37,7 +43,8 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT} + LLM_ENDPOINT: ${TGI_LLM_ENDPOINT} + LLM_MODEL_ID: ${LLM_MODEL_ID} HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} restart: unless-stopped codegen-gaudi-backend-server: diff --git a/CodeGen/tests/test_compose_on_gaudi.sh b/CodeGen/tests/test_compose_on_gaudi.sh index bd0e36688..8e06a904d 100644 --- a/CodeGen/tests/test_compose_on_gaudi.sh +++ b/CodeGen/tests/test_compose_on_gaudi.sh @@ -34,6 +34,7 @@ function start_services() { export MEGA_SERVICE_HOST_IP=${ip_address} export LLM_SERVICE_HOST_IP=${ip_address} export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:7778/v1/codegen" + export host_ip=${ip_address} sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env diff --git a/CodeGen/tests/test_compose_on_rocm.sh b/CodeGen/tests/test_compose_on_rocm.sh index 7f37669e5..a09a368b6 100644 --- a/CodeGen/tests/test_compose_on_rocm.sh +++ b/CodeGen/tests/test_compose_on_rocm.sh @@ -39,6 +39,7 @@ function start_services() { export CODEGEN_BACKEND_SERVICE_PORT=7778 export CODEGEN_BACKEND_SERVICE_URL="http://${ip_address}:${CODEGEN_BACKEND_SERVICE_PORT}/v1/codegen" export CODEGEN_UI_SERVICE_PORT=5173 + export host_ip=${ip_address} sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env diff --git a/CodeGen/tests/test_compose_on_xeon.sh b/CodeGen/tests/test_compose_on_xeon.sh index 637cc00f5..e95052497 100644 --- a/CodeGen/tests/test_compose_on_xeon.sh +++ b/CodeGen/tests/test_compose_on_xeon.sh @@ -35,6 +35,7 @@ function start_services() { export MEGA_SERVICE_HOST_IP=${ip_address} export LLM_SERVICE_HOST_IP=${ip_address} export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:7778/v1/codegen" + export host_ip=${ip_address} sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env diff --git a/CodeGen/ui/svelte/src/routes/+page.svelte b/CodeGen/ui/svelte/src/routes/+page.svelte index ff18c5817..0e7d43bea 100644 --- a/CodeGen/ui/svelte/src/routes/+page.svelte +++ b/CodeGen/ui/svelte/src/routes/+page.svelte @@ -34,22 +34,21 @@ const eventSource = await fetchTextStream(query); eventSource.addEventListener("message", (e: any) => { - let Msg = e.data; - console.log("Msg", Msg); + let res = e.data; - if (Msg.startsWith("b")) { - const trimmedData = Msg.slice(2, -1); - if (trimmedData.includes("'''")) { - deleteFlag = true; - } else if (deleteFlag && trimmedData.includes("\\n")) { - deleteFlag = false; - } else if (trimmedData !== "" && !deleteFlag) { - code_output += trimmedData.replace(/\\n/g, "\n"); - } - } else if (Msg === "[DONE]") { + if (res === "[DONE]") { deleteFlag = false; loading = false; query = ''; + } else { + let Msg = JSON.parse(res).choices[0].text; + if (Msg.includes("'''")) { + deleteFlag = true; + } else if (deleteFlag && Msg.includes("\\n")) { + deleteFlag = false; + } else if (Msg !== "" && !deleteFlag) { + code_output += Msg.replace(/\\n/g, "\n"); + } } }); eventSource.stream();