From eef8d49d20fe643a8a469ac04a9df7ecd30195ae Mon Sep 17 00:00:00 2001 From: tylertitsworth Date: Tue, 26 Mar 2024 09:00:07 -0700 Subject: [PATCH] merge readme updates --- ChatQnA/README.md | 24 ++++++++++++++++++++-- CodeGen/README.md | 10 ++++++++- CodeGen/codegen/Dockerfile | 33 ++++++++++++++++++++---------- CodeGen/codegen/build_docker.sh | 23 ++------------------- VisualQnA/serving/Dockerfile | 6 +++--- VisualQnA/serving/requirements.txt | 2 +- 6 files changed, 59 insertions(+), 39 deletions(-) diff --git a/ChatQnA/README.md b/ChatQnA/README.md index 4e6dc322d..4ad0bec4e 100644 --- a/ChatQnA/README.md +++ b/ChatQnA/README.md @@ -3,7 +3,16 @@ This ChatQnA use case performs RAG using LangChain, Redis vectordb and Text Gene # Environment Setup To use [🤗 text-generation-inference](https://github.com/huggingface/text-generation-inference) on Habana Gaudi/Gaudi2, please follow these steps: -## Build TGI Gaudi Docker Image +## Prepare Docker + +Getting started is straightforward with the official Docker container. Simply pull the image using: + +```bash +docker pull ghcr.io/huggingface/tgi-gaudi:1.2.1 +``` + +Alternatively, you can build the Docker image yourself with: + ```bash bash ./serving/tgi_gaudi/build_docker.sh ``` @@ -68,7 +77,7 @@ docker cp 262e04bbe466:/usr/src/optimum-habana/examples/text-generation/quantiza ```bash docker run -d -p 8080:80 -e QUANT_CONFIG=/data/maxabs_quant.json -e HUGGING_FACE_HUB_TOKEN= -v $volume:/data -- -runtime=habana -e HABANA_VISIBLE_DEVICES="4,5,6" -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host tgi_gaudi -- +runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host tgi_gaudi -- model-id meta-llama/Llama-2-7b-hf ``` @@ -106,6 +115,17 @@ Note: `ingest.py` will download the embedding model, please set the proxy if nec # Start LangChain Server +## Enable GuardRails using Meta's Llama Guard model (Optional) + +We offer content moderation support utilizing Meta's [Llama Guard](https://huggingface.co/meta-llama/LlamaGuard-7b) model. To activate GuardRails, kindly follow the instructions below to deploy the Llama Guard model on TGI Gaudi. + +```bash +volume=$PWD/data +model_id="meta-llama/LlamaGuard-7b" +docker run -p 8088:80 -v $volume:/data --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HUGGING_FACE_HUB_TOKEN= -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy tgi_gaudi --model-id $model_id +export SAFETY_GUARD_ENDPOINT="http://xxx.xxx.xxx.xxx:8088" +``` + ## Start the Backend Service Make sure TGI-Gaudi service is running and also make sure data is populated into Redis. Launch the backend service: diff --git a/CodeGen/README.md b/CodeGen/README.md index b45d919d2..ae5338778 100644 --- a/CodeGen/README.md +++ b/CodeGen/README.md @@ -4,7 +4,15 @@ Code generation is a noteworthy application of Large Language Model (LLM) techno # Environment Setup To use [🤗 text-generation-inference](https://github.com/huggingface/text-generation-inference) on Intel Gaudi2, please follow these steps: -## Build TGI Gaudi Docker Image +## Prepare Gaudi Image +Getting started is straightforward with the official Docker container. Simply pull the image using: + +```bash +docker pull ghcr.io/huggingface/tgi-gaudi:1.2.1 +``` + +Alternatively, you can build the Docker image yourself with: + ```bash bash ./tgi_gaudi/build_docker.sh ``` diff --git a/CodeGen/codegen/Dockerfile b/CodeGen/codegen/Dockerfile index 22e377459..c410331da 100644 --- a/CodeGen/codegen/Dockerfile +++ b/CodeGen/codegen/Dockerfile @@ -12,14 +12,25 @@ # See the License for the specific language governing permissions and # limitations under the License. -FROM langchain/langchain -RUN apt-get update && apt-get -y install libgl1-mesa-glx -RUN pip install -U langchain-cli pydantic==1.10.13 -RUN pip install langchain==0.1.11 -RUN pip install shortuuid -RUN pip install huggingface_hub -RUN mkdir -p /ws -ENV PYTHONPATH=/ws -COPY codegen-app /codegen-app -WORKDIR /codegen-app -CMD ["/bin/bash"] +FROM langchain/langchain:latest + +RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ + libgl1-mesa-glx \ + libjemalloc-dev + +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ + +USER user + +COPY requirements.txt /tmp/requirements.txt + +RUN pip install -U -r /tmp/requirements.txt + +ENV PYTHONPATH=/home/user:/home/user/codegen-app + +WORKDIR /home/user/codegen-app +COPY codegen-app /home/user/codegen-app + +SHELL ["/bin/bash", "-c"] diff --git a/CodeGen/codegen/build_docker.sh b/CodeGen/codegen/build_docker.sh index c410331da..d649a5c48 100644 --- a/CodeGen/codegen/build_docker.sh +++ b/CodeGen/codegen/build_docker.sh @@ -12,25 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -FROM langchain/langchain:latest +#!/bin/bash -RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ - libgl1-mesa-glx \ - libjemalloc-dev - -RUN useradd -m -s /bin/bash user && \ - mkdir -p /home/user && \ - chown -R user /home/user/ - -USER user - -COPY requirements.txt /tmp/requirements.txt - -RUN pip install -U -r /tmp/requirements.txt - -ENV PYTHONPATH=/home/user:/home/user/codegen-app - -WORKDIR /home/user/codegen-app -COPY codegen-app /home/user/codegen-app - -SHELL ["/bin/bash", "-c"] +docker build . -t copilot:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy diff --git a/VisualQnA/serving/Dockerfile b/VisualQnA/serving/Dockerfile index a01719dd6..3d8a15fc6 100644 --- a/VisualQnA/serving/Dockerfile +++ b/VisualQnA/serving/Dockerfile @@ -19,15 +19,15 @@ FROM vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installe ENV LANG=en_US.UTF-8 ENV PYTHONPATH=/home/user:/usr/lib/habanalabs/:/optimum-habana +# Install required branch +RUN git clone https://github.com/lkk12014402/optimum-habana.git /optimum-habana -b enable_llava_generation + RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ chown -R user /home/user/ USER user -# Install required branch -RUN git clone https://github.com/lkk12014402/optimum-habana.git /optimum-habana -b enable_llava_generation - COPY requirements.txt /tmp/requirements.txt # Install dependency diff --git a/VisualQnA/serving/requirements.txt b/VisualQnA/serving/requirements.txt index 82ab73bea..a93c455d6 100644 --- a/VisualQnA/serving/requirements.txt +++ b/VisualQnA/serving/requirements.txt @@ -1,4 +1,4 @@ eager fastapi optimum[habana] -uvicorn \ No newline at end of file +uvicorn