Text2image code refactor (#1054)

Signed-off-by: Ye, Xinyu <xinyu.ye@intel.com>
2025-01-09 11:45:15 +08:00
parent 179b5da06b
commit 2587a2978a
14 changed files with 239 additions and 135 deletions
--- a/.github/workflows/docker/compose/text2image-compose.yaml
+++ b/.github/workflows/docker/compose/text2image-compose.yaml
@@ -5,9 +5,9 @@
 services:
  text2image:
    build:
-      dockerfile: comps/text2image/Dockerfile
+      dockerfile: comps/text2image/src/Dockerfile
    image: ${REGISTRY:-opea}/text2image:${TAG:-latest}
  text2image-gaudi:
    build:
-      dockerfile: comps/text2image/Dockerfile.intel_hpu
+      dockerfile: comps/text2image/src/Dockerfile.intel_hpu
    image: ${REGISTRY:-opea}/text2image-gaudi:${TAG:-latest}
--- a/comps/text2image/deployment/docker_compose/README.md
+++ b/comps/text2image/deployment/docker_compose/README.md
--- a/comps/text2image/deployment/kubernetes/README.md
+++ b/comps/text2image/deployment/kubernetes/README.md
--- a/comps/text2image/src/Dockerfile
+++ b/comps/text2image/src/Dockerfile
@@ -13,12 +13,12 @@ COPY comps /home/comps
 RUN pip install --no-cache-dir --upgrade pip setuptools && \
    if [ ${ARCH} = "cpu" ]; then \
      pip install --no-cache-dir torch torchvision --index-url https://download.pytorch.org/whl/cpu; fi && \
-    pip install --no-cache-dir -r /home/comps/text2image/requirements.txt
+    pip install --no-cache-dir -r /home/comps/text2image/src/requirements.txt

 ENV PYTHONPATH=$PYTHONPATH:/home

-WORKDIR /home/comps/text2image
+WORKDIR /home/comps/text2image/src

-RUN echo python text2image.py --bf16 >> run.sh
+RUN echo python opea_text2image_microservice.py --bf16 >> run.sh

 CMD bash run.sh
--- a/comps/text2image/src/Dockerfile.intel_hpu
+++ b/comps/text2image/src/Dockerfile.intel_hpu
@@ -19,11 +19,11 @@ ENV PYTHONPATH=/home/user:/usr/lib/habanalabs/:/home/user/optimum-habana

 # Install requirements and optimum habana
 RUN pip install --no-cache-dir --upgrade pip && \
-    pip install --no-cache-dir -r /home/user/comps/text2image/requirements.txt && \
+    pip install --no-cache-dir -r /home/user/comps/text2image/src/requirements.txt && \
    pip install --no-cache-dir optimum[habana]

-WORKDIR /home/user/comps/text2image
+WORKDIR /home/user/comps/text2image/src

-RUN echo python text2image.py --device hpu --use_hpu_graphs --bf16 >> run.sh
+RUN echo python opea_text2image_microservice.py --device hpu --use_hpu_graphs --bf16 >> run.sh

 CMD bash run.sh
--- a/comps/text2image/src/README.md
+++ b/comps/text2image/src/README.md
@@ -34,7 +34,7 @@ export HF_TOKEN=<your huggingface token>
 Start the OPEA Microservice:

 ```bash
-python text2image.py --bf16 --model_name_or_path $MODEL --token $HF_TOKEN
+python opea_text2image_microservice.py --bf16 --model_name_or_path $MODEL --token $HF_TOKEN
 ```

 # 🚀2. Start Microservice with Docker (Option 2)
@@ -60,7 +60,7 @@ Build text-to-image service image on Xeon with below command:

 ```bash
 cd ../..
-docker build -t opea/text2image:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/text2image/Dockerfile .
+docker build -t opea/text2image:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/text2image/src/Dockerfile .
 ```

 ### 2.1.2 Text-to-Image Service Image on Gaudi
@@ -69,7 +69,7 @@ Build text-to-image service image on Gaudi with below command:

 ```bash
 cd ../..
-docker build -t opea/text2image-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/text2image/Dockerfile.intel_hpu .
+docker build -t opea/text2image-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/text2image/src/Dockerfile.intel_hpu .
 ```

 ## 2.2 Start Text-to-Image Service
--- a/comps/text2image/src/init.py
+++ b/comps/text2image/src/init.py
--- a/comps/text2image/src/integrations/init.py
+++ b/comps/text2image/src/integrations/init.py
@@ -0,0 +1,2 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
--- a/comps/text2image/src/integrations/opea.py
+++ b/comps/text2image/src/integrations/opea.py
@@ -0,0 +1,99 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import base64
+import os
+import tempfile
+
+import torch
+from diffusers import DiffusionPipeline
+
+from comps import CustomLogger, OpeaComponent, OpeaComponentRegistry, SDInputs, SDOutputs, ServiceType
+
+logger = CustomLogger("opea")
+
+
+@OpeaComponentRegistry.register("OPEA_TEXT2IMAGE")
+class OpeaText2image(OpeaComponent):
+    """A specialized text2image component derived from OpeaComponent for text2image services.
+
+    Attributes:
+        client (AsyncInferenceClient): An instance of the async client for embedding generation.
+        model_name (str): The name of the embedding model used.
+    """
+
+    def __init__(self, name: str, description: str, config: dict = None):
+        super().__init__(name, ServiceType.TEXT2IMAGE.name.lower(), description, config)
+
+        # initialize model and tokenizer
+        self.seed = config["seed"]
+        model_name_or_path = config["model_name_or_path"]
+        device = config["device"]
+        if os.getenv("MODEL", None):
+            model_name_or_path = os.getenv("MODEL")
+        kwargs = {}
+        if config["bf16"]:
+            kwargs["torch_dtype"] = torch.bfloat16
+        if not config["token"]:
+            config["token"] = os.getenv("HF_TOKEN")
+        if device == "hpu":
+            kwargs.update(
+                {
+                    "use_habana": True,
+                    "use_hpu_graphs": config["use_hpu_graphs"],
+                    "gaudi_config": "Habana/stable-diffusion",
+                    "token": config["token"],
+                }
+            )
+            if "stable-diffusion-3" in model_name_or_path:
+                from optimum.habana.diffusers import GaudiStableDiffusion3Pipeline
+
+                self.pipe = GaudiStableDiffusion3Pipeline.from_pretrained(
+                    model_name_or_path,
+                    **kwargs,
+                )
+            elif "stable-diffusion" in model_name_or_path.lower() or "flux" in model_name_or_path.lower():
+                from optimum.habana.diffusers import AutoPipelineForText2Image
+
+                self.pipe = AutoPipelineForText2Image.from_pretrained(
+                    model_name_or_path,
+                    **kwargs,
+                )
+            else:
+                raise NotImplementedError(
+                    "Only support stable-diffusion, stable-diffusion-xl, stable-diffusion-3 and flux now, "
+                    + f"model {model_name_or_path} not supported."
+                )
+        elif device == "cpu":
+            self.pipe = DiffusionPipeline.from_pretrained(model_name_or_path, token=config["token"], **kwargs)
+        else:
+            raise NotImplementedError(f"Only support cpu and hpu device now, device {device} not supported.")
+        logger.info("Stable Diffusion model initialized.")
+
+    async def invoke(self, input: SDInputs) -> SDOutputs:
+        """Invokes the text2image service to generate image(s) for the provided input.
+
+        Args:
+            input (SDInputs): The input for text2image service, including prompt and optional parameters like num_images_per_prompt.
+
+        Returns:
+            SDOutputs: The response is a list of images.
+        """
+        prompt = input.prompt
+        num_images_per_prompt = input.num_images_per_prompt
+
+        generator = torch.manual_seed(self.seed)
+        images = self.pipe(prompt, generator=generator, num_images_per_prompt=num_images_per_prompt).images
+        with tempfile.TemporaryDirectory() as image_path:
+            results = []
+            for i, image in enumerate(images):
+                save_path = os.path.join(image_path, f"image_{i+1}.png")
+                image.save(save_path)
+                with open(save_path, "rb") as f:
+                    bytes = f.read()
+                b64_str = base64.b64encode(bytes).decode()
+                results.append(b64_str)
+        return SDOutputs(images=results)
+
+    def check_health(self) -> bool:
+        return True
--- a/comps/text2image/src/opea_text2image_microservice.py
+++ b/comps/text2image/src/opea_text2image_microservice.py
@@ -0,0 +1,67 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import argparse
+import os
+import time
+
+from integrations.opea import OpeaText2image
+
+from comps import (
+    CustomLogger,
+    OpeaComponentLoader,
+    SDInputs,
+    SDOutputs,
+    ServiceType,
+    opea_microservices,
+    register_microservice,
+    register_statistics,
+    statistics_dict,
+)
+
+logger = CustomLogger("opea_text2image_microservice")
+
+
+@register_microservice(
+    name="opea_service@text2image",
+    service_type=ServiceType.TEXT2IMAGE,
+    endpoint="/v1/text2image",
+    host="0.0.0.0",
+    port=9379,
+    input_datatype=SDInputs,
+    output_datatype=SDOutputs,
+)
+@register_statistics(names=["opea_service@text2image"])
+async def text2image(input: SDInputs):
+    start = time.time()
+    try:
+        # Use the loader to invoke the active component
+        results = await loader.invoke(input)
+        statistics_dict["opea_service@text2image"].append_latency(time.time() - start, None)
+        return results
+    except Exception as e:
+        logger.error(f"Error during text2image invocation: {e}")
+        raise
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model_name_or_path", type=str, default="stabilityai/stable-diffusion-3-medium-diffusers")
+    parser.add_argument("--use_hpu_graphs", default=False, action="store_true")
+    parser.add_argument("--device", type=str, default="cpu")
+    parser.add_argument("--token", type=str, default=None)
+    parser.add_argument("--seed", type=int, default=42)
+    parser.add_argument("--bf16", action="store_true")
+
+    args = parser.parse_args()
+
+    text2image_component_name = os.getenv("TEXT2IMAGE_COMPONENT_NAME", "OPEA_TEXT2IMAGE")
+    # Initialize OpeaComponentLoader
+    loader = OpeaComponentLoader(
+        text2image_component_name,
+        description=f"OPEA TEXT2IMAGE Component: {text2image_component_name}",
+        config=args.__dict__,
+    )
+
+    logger.info("Text2image server started.")
+    opea_microservices["opea_service@text2image"].start()
--- a/comps/text2image/src/requirements.txt
+++ b/comps/text2image/src/requirements.txt
--- a/comps/text2image/text2image.py
+++ b/comps/text2image/text2image.py
@@ -1,123 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-import argparse
-import base64
-import os
-import threading
-import time
-
-import torch
-from diffusers import DiffusionPipeline
-
-from comps import (
-    CustomLogger,
-    SDInputs,
-    SDOutputs,
-    ServiceType,
-    opea_microservices,
-    register_microservice,
-    register_statistics,
-    statistics_dict,
-)
-
-logger = CustomLogger("text2image")
-pipe = None
-args = None
-initialization_lock = threading.Lock()
-initialized = False
-
-
-def initialize():
-    global pipe, args, initialized
-    with initialization_lock:
-        if not initialized:
-            # initialize model and tokenizer
-            if os.getenv("MODEL", None):
-                args.model_name_or_path = os.getenv("MODEL")
-            kwargs = {}
-            if args.bf16:
-                kwargs["torch_dtype"] = torch.bfloat16
-            if not args.token:
-                args.token = os.getenv("HF_TOKEN")
-            if args.device == "hpu":
-                kwargs.update(
-                    {
-                        "use_habana": True,
-                        "use_hpu_graphs": args.use_hpu_graphs,
-                        "gaudi_config": "Habana/stable-diffusion",
-                        "token": args.token,
-                    }
-                )
-                if "stable-diffusion-3" in args.model_name_or_path:
-                    from optimum.habana.diffusers import GaudiStableDiffusion3Pipeline
-
-                    pipe = GaudiStableDiffusion3Pipeline.from_pretrained(
-                        args.model_name_or_path,
-                        **kwargs,
-                    )
-                elif "stable-diffusion" in args.model_name_or_path.lower() or "flux" in args.model_name_or_path.lower():
-                    from optimum.habana.diffusers import AutoPipelineForText2Image
-
-                    pipe = AutoPipelineForText2Image.from_pretrained(
-                        args.model_name_or_path,
-                        **kwargs,
-                    )
-                else:
-                    raise NotImplementedError(
-                        "Only support stable-diffusion, stable-diffusion-xl, stable-diffusion-3 and flux now, "
-                        + f"model {args.model_name_or_path} not supported."
-                    )
-            elif args.device == "cpu":
-                pipe = DiffusionPipeline.from_pretrained(args.model_name_or_path, token=args.token, **kwargs)
-            else:
-                raise NotImplementedError(f"Only support cpu and hpu device now, device {args.device} not supported.")
-            logger.info("Stable Diffusion model initialized.")
-            initialized = True
-
-
-@register_microservice(
-    name="opea_service@text2image",
-    service_type=ServiceType.TEXT2IMAGE,
-    endpoint="/v1/text2image",
-    host="0.0.0.0",
-    port=9379,
-    input_datatype=SDInputs,
-    output_datatype=SDOutputs,
-)
-@register_statistics(names=["opea_service@text2image"])
-def text2image(input: SDInputs):
-    initialize()
-    start = time.time()
-    prompt = input.prompt
-    num_images_per_prompt = input.num_images_per_prompt
-
-    generator = torch.manual_seed(args.seed)
-    images = pipe(prompt, generator=generator, num_images_per_prompt=num_images_per_prompt).images
-    image_path = os.path.join(os.getcwd(), prompt.strip().replace(" ", "_").replace("/", ""))
-    os.makedirs(image_path, exist_ok=True)
-    results = []
-    for i, image in enumerate(images):
-        save_path = os.path.join(image_path, f"image_{i+1}.png")
-        image.save(save_path)
-        with open(save_path, "rb") as f:
-            bytes = f.read()
-        b64_str = base64.b64encode(bytes).decode()
-        results.append(b64_str)
-    statistics_dict["opea_service@text2image"].append_latency(time.time() - start, None)
-    return SDOutputs(images=results)
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--model_name_or_path", type=str, default="stabilityai/stable-diffusion-3-medium-diffusers")
-    parser.add_argument("--use_hpu_graphs", default=False, action="store_true")
-    parser.add_argument("--device", type=str, default="cpu")
-    parser.add_argument("--token", type=str, default=None)
-    parser.add_argument("--seed", type=int, default=42)
-    parser.add_argument("--bf16", action="store_true")
-
-    args = parser.parse_args()
-
-    logger.info("Text2image server started.")
-    opea_microservices["opea_service@text2image"].start()
--- a/tests/text2image/test_text2image_opea.sh
+++ b/tests/text2image/test_text2image_opea.sh
@@ -10,7 +10,7 @@ ip_address=$(hostname -I | awk '{print $1}')
 function build_docker_images() {
    cd $WORKPATH
    echo $(pwd)
-    docker build --no-cache -t opea/text2image:latest -f comps/text2image/Dockerfile .
+    docker build --no-cache -t opea/text2image:latest -f comps/text2image/src/Dockerfile .
    if [ $? -ne 0 ]; then
        echo "opea/text2image built fail"
        exit 1
--- a/tests/text2image/test_text2image_opea_on_intel_hpu.sh
+++ b/tests/text2image/test_text2image_opea_on_intel_hpu.sh
@@ -0,0 +1,59 @@
+#!/bin/bash
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+set -x
+
+WORKPATH=$(dirname "$PWD")
+ip_address=$(hostname -I | awk '{print $1}')
+
+function build_docker_images() {
+    cd $WORKPATH
+    echo $(pwd)
+    docker build --no-cache -t opea/text2image-gaudi:latest -f comps/text2image/src/Dockerfile.intel_hpu .
+    if [ $? -ne 0 ]; then
+        echo "opea/text2image-gaudi built fail"
+        exit 1
+    else
+        echo "opea/text2image-gaudi built successful"
+    fi
+}
+
+function start_service() {
+    unset http_proxy
+    docker run -d -p 9379:9379 --name="test-comps-text2image-gaudi" --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e HF_TOKEN=$HF_TOKEN -e MODEL=stabilityai/stable-diffusion-xl-base-1.0 opea/text2image-gaudi:latest
+    sleep 30s
+}
+
+function validate_microservice() {
+    result=$(http_proxy="" curl http://localhost:9379/v1/text2image -XPOST -d '{"prompt":"An astronaut riding a green horse", "num_images_per_prompt":1}' -H 'Content-Type: application/json')
+    if [[ $result == *"images"* ]]; then
+        echo "Result correct."
+    else
+        echo "Result wrong."
+        docker logs test-comps-text2image-gaudi
+        exit 1
+    fi
+
+}
+
+function stop_docker() {
+    cid=$(docker ps -aq --filter "name=test-comps-text2image*")
+    if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
+}
+
+function main() {
+
+    stop_docker
+
+    build_docker_images
+    start_service
+
+    validate_microservice
+
+    stop_docker
+    echo y | docker system prune
+
+}
+
+main