merge readme updates

2024-03-26 09:00:07 -07:00
parent 0f1659765a
commit eef8d49d20
6 changed files with 59 additions and 39 deletions
--- a/ChatQnA/README.md
+++ b/ChatQnA/README.md
@@ -3,7 +3,16 @@ This ChatQnA use case performs RAG using LangChain, Redis vectordb and Text Gene
 # Environment Setup
 To use [🤗 text-generation-inference](https://github.com/huggingface/text-generation-inference) on Habana Gaudi/Gaudi2, please follow these steps:

-## Build TGI Gaudi Docker Image
+## Prepare Docker
+
+Getting started is straightforward with the official Docker container. Simply pull the image using:
+
+```bash
+docker pull ghcr.io/huggingface/tgi-gaudi:1.2.1
+```
+
+Alternatively, you can build the Docker image yourself with:
+
 ```bash
 bash ./serving/tgi_gaudi/build_docker.sh
 ```
@@ -68,7 +77,7 @@ docker cp 262e04bbe466:/usr/src/optimum-habana/examples/text-generation/quantiza

 ```bash
 docker run -d -p 8080:80 -e QUANT_CONFIG=/data/maxabs_quant.json -e HUGGING_FACE_HUB_TOKEN=<your HuggingFace token> -v $volume:/data --
-runtime=habana -e HABANA_VISIBLE_DEVICES="4,5,6" -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host tgi_gaudi --
+runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host tgi_gaudi --
 model-id meta-llama/Llama-2-7b-hf
 ```

@@ -106,6 +115,17 @@ Note: `ingest.py` will download the embedding model, please set the proxy if nec

 # Start LangChain Server

+## Enable GuardRails using Meta's Llama Guard model (Optional)
+
+We offer content moderation support utilizing Meta's [Llama Guard](https://huggingface.co/meta-llama/LlamaGuard-7b) model. To activate GuardRails, kindly follow the instructions below to deploy the Llama Guard model on TGI Gaudi.
+
+```bash
+volume=$PWD/data
+model_id="meta-llama/LlamaGuard-7b"
+docker run -p 8088:80 -v $volume:/data --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HUGGING_FACE_HUB_TOKEN=<your HuggingFace token> -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy tgi_gaudi --model-id $model_id
+export SAFETY_GUARD_ENDPOINT="http://xxx.xxx.xxx.xxx:8088"
+```
+
 ## Start the Backend Service
 Make sure TGI-Gaudi service is running and also make sure data is populated into Redis. Launch the backend service:

--- a/CodeGen/README.md
+++ b/CodeGen/README.md
@@ -4,7 +4,15 @@ Code generation is a noteworthy application of Large Language Model (LLM) techno
 # Environment Setup
 To use [🤗 text-generation-inference](https://github.com/huggingface/text-generation-inference) on Intel Gaudi2, please follow these steps:

-## Build TGI Gaudi Docker Image
+## Prepare Gaudi Image
+Getting started is straightforward with the official Docker container. Simply pull the image using:
+
+```bash
+docker pull ghcr.io/huggingface/tgi-gaudi:1.2.1
+```
+
+Alternatively, you can build the Docker image yourself with:
+
 ```bash
 bash ./tgi_gaudi/build_docker.sh
 ```
--- a/CodeGen/codegen/Dockerfile
+++ b/CodeGen/codegen/Dockerfile
@@ -12,14 +12,25 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-FROM langchain/langchain
-RUN apt-get update && apt-get -y install libgl1-mesa-glx
-RUN pip install -U langchain-cli pydantic==1.10.13
-RUN pip install langchain==0.1.11
-RUN pip install shortuuid
-RUN pip install huggingface_hub
-RUN mkdir -p /ws
-ENV PYTHONPATH=/ws
-COPY codegen-app /codegen-app
-WORKDIR /codegen-app
-CMD ["/bin/bash"]
+FROM langchain/langchain:latest
+
+RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
+    libgl1-mesa-glx \
+    libjemalloc-dev
+
+RUN useradd -m -s /bin/bash user && \
+    mkdir -p /home/user && \
+    chown -R user /home/user/
+
+USER user
+
+COPY requirements.txt /tmp/requirements.txt
+
+RUN pip install -U -r /tmp/requirements.txt
+
+ENV PYTHONPATH=/home/user:/home/user/codegen-app
+
+WORKDIR /home/user/codegen-app
+COPY codegen-app /home/user/codegen-app
+
+SHELL ["/bin/bash", "-c"]
--- a/CodeGen/codegen/build_docker.sh
+++ b/CodeGen/codegen/build_docker.sh
@@ -12,25 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-FROM langchain/langchain:latest
+#!/bin/bash

-RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
-    libgl1-mesa-glx \
-    libjemalloc-dev
-
-RUN useradd -m -s /bin/bash user && \
-    mkdir -p /home/user && \
-    chown -R user /home/user/
-
-USER user
-
-COPY requirements.txt /tmp/requirements.txt
-
-RUN pip install -U -r /tmp/requirements.txt
-
-ENV PYTHONPATH=/home/user:/home/user/codegen-app
-
-WORKDIR /home/user/codegen-app
-COPY codegen-app /home/user/codegen-app
-
-SHELL ["/bin/bash", "-c"]
+docker build . -t copilot:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy
--- a/VisualQnA/serving/Dockerfile
+++ b/VisualQnA/serving/Dockerfile
@@ -19,15 +19,15 @@ FROM vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installe
 ENV LANG=en_US.UTF-8
 ENV PYTHONPATH=/home/user:/usr/lib/habanalabs/:/optimum-habana

+# Install required branch
+RUN git clone https://github.com/lkk12014402/optimum-habana.git /optimum-habana -b enable_llava_generation
+
 RUN useradd -m -s /bin/bash user && \
    mkdir -p /home/user && \
    chown -R user /home/user/

 USER user

-# Install required branch
-RUN git clone https://github.com/lkk12014402/optimum-habana.git /optimum-habana -b enable_llava_generation
-
 COPY requirements.txt /tmp/requirements.txt

 # Install dependency
--- a/VisualQnA/serving/requirements.txt
+++ b/VisualQnA/serving/requirements.txt
@@ -1,4 +1,4 @@
 eager
 fastapi
 optimum[habana]
-uvicorn
+uvicorn