Update CodeGen for GenAIComps Refactor (#1308)

Signed-off-by: lvliang-intel <liang1.lv@intel.com> Co-authored-by: WenjiaoYue <wenjiao.yue@intel.com>
2024-12-29 10:11:35 +08:00
parent 9efbc91774
commit 73b3f50737
7 changed files with 43 additions and 18 deletions
--- a/CodeGen/docker_compose/amd/gpu/rocm/compose.yaml
+++ b/CodeGen/docker_compose/amd/gpu/rocm/compose.yaml
@@ -15,6 +15,12 @@ services:
      https_proxy: ${https_proxy}
      HUGGING_FACE_HUB_TOKEN: ${CODEGEN_HUGGINGFACEHUB_API_TOKEN}
      HUGGINGFACEHUB_API_TOKEN: ${CODEGEN_HUGGINGFACEHUB_API_TOKEN}
+      host_ip: ${host_ip}
+    healthcheck:
+      test: ["CMD-SHELL", "curl -f http://$host_ip:${CODEGEN_TGI_SERVICE_PORT:-8028}/health || exit 1"]
+      interval: 10s
+      timeout: 10s
+      retries: 100
    shm_size: 1g
    devices:
      - /dev/kfd:/dev/kfd
@@ -31,7 +37,8 @@ services:
    image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
    container_name: codegen-llm-server
    depends_on:
-      - codegen-tgi-service
+      codegen-tgi-service:
+        condition: service_healthy
    ports:
      - "${CODEGEN_LLM_SERVICE_PORT:-9000}:9000"
    ipc: host
@@ -39,7 +46,8 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      TGI_LLM_ENDPOINT: "http://codegen-tgi-service"
+      LLM_ENDPOINT: "http://codegen-tgi-service"
+      LLM_MODEL_ID: ${CODEGEN_LLM_MODEL_ID}
      HUGGINGFACEHUB_API_TOKEN: ${CODEGEN_HUGGINGFACEHUB_API_TOKEN}
    restart: unless-stopped
  codegen-backend-server:
--- a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml
@@ -15,12 +15,19 @@ services:
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      host_ip: ${host_ip}
+    healthcheck:
+      test: ["CMD-SHELL", "curl -f http://$host_ip:8028/health || exit 1"]
+      interval: 10s
+      timeout: 10s
+      retries: 100
    command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
  llm:
    image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
    container_name: llm-tgi-server
    depends_on:
-      - tgi-service
+      tgi-service:
+        condition: service_healthy
    ports:
      - "9000:9000"
    ipc: host
@@ -28,7 +35,8 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
+      LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
+      LLM_MODEL_ID: ${LLM_MODEL_ID}
      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
    restart: unless-stopped
  codegen-xeon-backend-server:
--- a/CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -20,6 +20,11 @@ services:
      LIMIT_HPU_GRAPH: true
      USE_FLASH_ATTENTION: true
      FLASH_ATTENTION_RECOMPUTE: true
+    healthcheck:
+      test: ["CMD-SHELL", "sleep 500 && exit 0"]
+      interval: 1s
+      timeout: 505s
+      retries: 1
    runtime: habana
    cap_add:
      - SYS_NICE
@@ -29,7 +34,8 @@ services:
    image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
    container_name: llm-tgi-gaudi-server
    depends_on:
-      - tgi-service
+      tgi-service:
+        condition: service_healthy
    ports:
      - "9000:9000"
    ipc: host
@@ -37,7 +43,8 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
+      LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
+      LLM_MODEL_ID: ${LLM_MODEL_ID}
      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
    restart: unless-stopped
  codegen-gaudi-backend-server:
--- a/CodeGen/tests/test_compose_on_gaudi.sh
+++ b/CodeGen/tests/test_compose_on_gaudi.sh
@@ -34,6 +34,7 @@ function start_services() {
    export MEGA_SERVICE_HOST_IP=${ip_address}
    export LLM_SERVICE_HOST_IP=${ip_address}
    export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:7778/v1/codegen"
+    export host_ip=${ip_address}

    sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env

--- a/CodeGen/tests/test_compose_on_rocm.sh
+++ b/CodeGen/tests/test_compose_on_rocm.sh
@@ -39,6 +39,7 @@ function start_services() {
    export CODEGEN_BACKEND_SERVICE_PORT=7778
    export CODEGEN_BACKEND_SERVICE_URL="http://${ip_address}:${CODEGEN_BACKEND_SERVICE_PORT}/v1/codegen"
    export CODEGEN_UI_SERVICE_PORT=5173
+    export host_ip=${ip_address}

    sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env

--- a/CodeGen/tests/test_compose_on_xeon.sh
+++ b/CodeGen/tests/test_compose_on_xeon.sh
@@ -35,6 +35,7 @@ function start_services() {
    export MEGA_SERVICE_HOST_IP=${ip_address}
    export LLM_SERVICE_HOST_IP=${ip_address}
    export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:7778/v1/codegen"
+    export host_ip=${ip_address}

    sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env

--- a/CodeGen/ui/svelte/src/routes/+page.svelte
+++ b/CodeGen/ui/svelte/src/routes/+page.svelte
@@ -34,22 +34,21 @@
 		const eventSource = await fetchTextStream(query);

 		eventSource.addEventListener("message", (e: any) => {
-			let Msg = e.data;
-			console.log("Msg", Msg);
+			let res = e.data;

-			if (Msg.startsWith("b")) {
-				const trimmedData = Msg.slice(2, -1);
-				if (trimmedData.includes("'''")) {
-					deleteFlag = true;
-				} else if (deleteFlag && trimmedData.includes("\\n")) {
-					deleteFlag = false;
-				} else if (trimmedData !== "</s>" && !deleteFlag) {
-					code_output += trimmedData.replace(/\\n/g, "\n");
-				}
-			} else if (Msg === "[DONE]") {
+			if (res === "[DONE]") {
 				deleteFlag = false;
 				loading = false;
 				query = '';
+			} else {
+				let Msg = JSON.parse(res).choices[0].text;
+				if (Msg.includes("'''")) {
+					deleteFlag = true;
+				} else if (deleteFlag && Msg.includes("\\n")) {
+					deleteFlag = false;
+				} else if (Msg !== "</s>" && !deleteFlag) {
+					code_output += Msg.replace(/\\n/g, "\n");
+				}
 			}
 		});
 		eventSource.stream();