From 9d3bc0e00c949973c4accde5af98f003e682e09b Mon Sep 17 00:00:00 2001 From: lvliang-intel Date: Thu, 30 May 2024 16:04:59 +0800 Subject: [PATCH] Fix huggingface hub token environment variable (#214) Signed-off-by: lvliang-intel --- .github/workflows/AudioQnA.yml | 6 +++--- .github/workflows/E2E_test_with_compose.yml | 2 +- .github/workflows/SearchQnA.yml | 2 +- .github/workflows/Translation.yml | 2 +- .github/workflows/VisualQnA.yml | 2 +- AudioQnA/README.md | 10 +++++----- AudioQnA/langchain/docker/docker-compose.yml | 2 +- AudioQnA/serving/tgi_gaudi/README.md | 4 ++-- AudioQnA/tests/test_langchain_inference.sh | 2 +- ChatQnA/chatqna.yaml | 2 +- ChatQnA/deprecated/README.md | 10 +++++----- ChatQnA/deprecated/deployment/nginx/.env | 2 +- .../langchain/docker/docker-compose-qdrant.yml | 2 +- ChatQnA/deprecated/langchain/docker/docker-compose.yml | 2 +- .../deprecated/langchain/test/end_to_end_rag_test.py | 2 +- ChatQnA/deprecated/serving/tgi_gaudi/README.md | 4 ++-- ChatQnA/deprecated/tests/test_langchain_inference.sh | 2 +- ChatQnA/docker/gaudi/README.md | 2 +- ChatQnA/docker/gaudi/docker_compose.yaml | 4 ++-- ChatQnA/docker/xeon/README.md | 2 +- ChatQnA/docker/xeon/docker_compose.yaml | 4 ++-- ChatQnA/kubernetes/manifests/README.md | 2 +- ChatQnA/kubernetes/manifests/qna_configmap_gaudi.yaml | 2 +- ChatQnA/kubernetes/manifests/qna_configmap_xeon.yaml | 2 +- ChatQnA/tests/test_chatqna_on_gaudi.sh | 2 +- ChatQnA/tests/test_chatqna_on_xeon.sh | 2 +- CodeGen/codegen.yaml | 4 ++-- CodeGen/deprecated/README.md | 4 ++-- CodeGen/deprecated/tests/test_codegen_inference.sh | 2 +- CodeGen/docker/gaudi/README.md | 2 +- CodeGen/docker/gaudi/docker_compose.yaml | 2 +- CodeGen/docker/xeon/README.md | 2 +- CodeGen/docker/xeon/docker_compose.yaml | 4 ++-- CodeGen/kubernetes/manifests/README.md | 10 +++++----- CodeGen/kubernetes/manifests/gaudi/codegen.yaml | 2 +- CodeGen/kubernetes/manifests/xeon/codegen.yaml | 2 +- CodeGen/tests/test_codegen_on_gaudi.sh | 2 +- CodeGen/tests/test_codegen_on_xeon.sh | 2 +- CodeTrans/codetrans.yaml | 4 ++-- CodeTrans/deprecated/README.md | 4 ++-- CodeTrans/docker/gaudi/README.md | 2 +- CodeTrans/docker/gaudi/docker_compose.yaml | 2 +- CodeTrans/docker/xeon/README.md | 2 +- CodeTrans/docker/xeon/docker_compose.yaml | 2 +- CodeTrans/tests/test_codetrans_on_gaudi.sh | 2 +- CodeTrans/tests/test_codetrans_on_xeon.sh | 2 +- DocSum/deprecated/README.md | 8 ++++---- DocSum/deprecated/tests/test_langchain_inference.sh | 2 +- DocSum/docker/gaudi/README.md | 2 +- DocSum/docker/gaudi/docker_compose.yaml | 4 ++-- DocSum/docker/xeon/README.md | 2 +- DocSum/docker/xeon/docker_compose.yaml | 4 ++-- DocSum/docsum.yaml | 4 ++-- DocSum/tests/test_docsum_on_gaudi.sh | 2 +- DocSum/tests/test_docsum_on_xeon.sh | 2 +- SearchQnA/README.md | 4 ++-- SearchQnA/tests/test_langchain_inference.sh | 2 +- Translation/README.md | 4 ++-- Translation/tests/test_langchain_inference.sh | 2 +- 59 files changed, 90 insertions(+), 90 deletions(-) diff --git a/.github/workflows/AudioQnA.yml b/.github/workflows/AudioQnA.yml index c02370e83..007853585 100644 --- a/.github/workflows/AudioQnA.yml +++ b/.github/workflows/AudioQnA.yml @@ -37,21 +37,21 @@ jobs: - name: Run Test ASR env: - HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }} + HF_TOKEN: ${{ secrets.HF_TOKEN }} run: | cd ${{ github.workspace }}/AudioQnA/tests bash test_asr.sh - name: Run Test TTS env: - HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }} + HF_TOKEN: ${{ secrets.HF_TOKEN }} run: | cd ${{ github.workspace }}/AudioQnA/tests bash test_tts.sh - name: Run Test LLM engine env: - HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }} + HF_TOKEN: ${{ secrets.HF_TOKEN }} run: | cd ${{ github.workspace }}/AudioQnA/tests bash test_${{ matrix.job_name }}_inference.sh diff --git a/.github/workflows/E2E_test_with_compose.yml b/.github/workflows/E2E_test_with_compose.yml index 8be309367..e9bd065ae 100644 --- a/.github/workflows/E2E_test_with_compose.yml +++ b/.github/workflows/E2E_test_with_compose.yml @@ -76,7 +76,7 @@ jobs: - name: Run test env: - HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }} + HF_TOKEN: ${{ secrets.HF_TOKEN }} example: ${{ matrix.example }} hardware: ${{ matrix.hardware }} run: | diff --git a/.github/workflows/SearchQnA.yml b/.github/workflows/SearchQnA.yml index aaeaf962d..dffdb7bd2 100644 --- a/.github/workflows/SearchQnA.yml +++ b/.github/workflows/SearchQnA.yml @@ -36,7 +36,7 @@ jobs: ref: "refs/pull/${{ github.event.number }}/merge" - name: Run Test env: - HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }} + HF_TOKEN: ${{ secrets.HF_TOKEN }} GOOGLE_CSE_ID: ${{ secrets.GOOGLE_CSE_ID }} GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }} AISE_GAUDI_00_IP: ${{ secrets.AISE_GAUDI_00_IP }} diff --git a/.github/workflows/Translation.yml b/.github/workflows/Translation.yml index c4b867dbc..b54a58348 100644 --- a/.github/workflows/Translation.yml +++ b/.github/workflows/Translation.yml @@ -37,7 +37,7 @@ jobs: - name: Run Test env: - HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }} + HF_TOKEN: ${{ secrets.HF_TOKEN }} run: | cd ${{ github.workspace }}/Translation/tests bash test_${{ matrix.job_name }}_inference.sh diff --git a/.github/workflows/VisualQnA.yml b/.github/workflows/VisualQnA.yml index da0194829..5e68a57ea 100644 --- a/.github/workflows/VisualQnA.yml +++ b/.github/workflows/VisualQnA.yml @@ -37,7 +37,7 @@ jobs: - name: Run Test env: - HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }} + HF_TOKEN: ${{ secrets.HF_TOKEN }} run: | cd ${{ github.workspace }}/VisualQnA/tests bash test_${{ matrix.job_name }}_inference.sh diff --git a/AudioQnA/README.md b/AudioQnA/README.md index 9d9d56f1f..722a9fe9b 100644 --- a/AudioQnA/README.md +++ b/AudioQnA/README.md @@ -83,12 +83,12 @@ bash ./serving/tgi_gaudi/build_docker.sh bash ./serving/tgi_gaudi/launch_tgi_service.sh ``` -For gated models such as `LLAMA-2`, you will have to pass -e HUGGING_FACE_HUB_TOKEN=\ to the docker run command above with a valid Hugging Face Hub read token. +For gated models such as `LLAMA-2`, you will have to pass -e HF_TOKEN=\ to the docker run command above with a valid Hugging Face Hub read token. -Please follow this link [huggingface token](https://huggingface.co/docs/hub/security-tokens) to get the access token and export `HUGGINGFACEHUB_API_TOKEN` environment with the token. +Please follow this link [huggingface token](https://huggingface.co/docs/hub/security-tokens) to get the access token and export `HF_TOKEN` environment with the token. ```bash -export HUGGINGFACEHUB_API_TOKEN= +export HF_TOKEN= ``` ### Launch a local server instance on 8 Gaudi cards: @@ -147,7 +147,7 @@ Note: If you want to integrate the TEI service into the LangChain application, y ## Launch Redis and LangChain Backend Service -Update the `HUGGINGFACEHUB_API_TOKEN` environment variable with your huggingface token in the `docker-compose.yml` +Update the `HF_TOKEN` environment variable with your huggingface token in the `docker-compose.yml` ```bash cd langchain/docker @@ -180,7 +180,7 @@ We offer content moderation support utilizing Meta's [Llama Guard](https://huggi ```bash volume=$PWD/data model_id="meta-llama/LlamaGuard-7b" -docker run -p 8088:80 -v $volume:/data --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HUGGING_FACE_HUB_TOKEN= -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy tgi_gaudi --model-id $model_id +docker run -p 8088:80 -v $volume:/data --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HF_TOKEN= -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy tgi_gaudi --model-id $model_id export SAFETY_GUARD_ENDPOINT="http://xxx.xxx.xxx.xxx:8088" ``` diff --git a/AudioQnA/langchain/docker/docker-compose.yml b/AudioQnA/langchain/docker/docker-compose.yml index ac8e34742..daa3c6cee 100644 --- a/AudioQnA/langchain/docker/docker-compose.yml +++ b/AudioQnA/langchain/docker/docker-compose.yml @@ -28,7 +28,7 @@ services: container_name: qna-rag-redis-server environment: - https_proxy=${https_proxy} - - HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} + - HF_TOKEN=${HF_TOKEN} - "REDIS_PORT=6379" - "EMBED_MODEL=BAAI/bge-base-en-v1.5" - "REDIS_SCHEMA=schema_dim_768.yml" diff --git a/AudioQnA/serving/tgi_gaudi/README.md b/AudioQnA/serving/tgi_gaudi/README.md index c9a8d510e..ecbc10c40 100644 --- a/AudioQnA/serving/tgi_gaudi/README.md +++ b/AudioQnA/serving/tgi_gaudi/README.md @@ -79,11 +79,11 @@ For the System Management Interface Tool please check [hl-smi](https://docs.haba ## Docker command for 70B model ```bash -docker run -p 8080:80 -v $volume:/data --runtime=habana -e HUGGING_FACE_HUB_TOKEN=$HUGGINGFACEHUB_API_TOKEN -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e HABANA_VISIBLE_DEVICES="6,7,4,5" -e HABANA_VISIBLE_MODULES="0,1,2,3" -e BATCH_BUCKET_SIZE=22 -e PREFILL_BATCH_BUCKET_SIZE=1 -e MAX_BATCH_PREFILL_TOKENS=5102 -e MAX_BATCH_TOTAL_TOKENS=32256 -e MAX_INPUT_LENGTH=1024 -e PAD_SEQUENCE_TO_MULTIPLE_OF=1024 -e MAX_WAITING_TOKENS=5 -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host tgi_gaudi --model-id $model --sharded true --num-shard 4 +docker run -p 8080:80 -v $volume:/data --runtime=habana -e HF_TOKEN=$HF_TOKEN -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e HABANA_VISIBLE_DEVICES="6,7,4,5" -e HABANA_VISIBLE_MODULES="0,1,2,3" -e BATCH_BUCKET_SIZE=22 -e PREFILL_BATCH_BUCKET_SIZE=1 -e MAX_BATCH_PREFILL_TOKENS=5102 -e MAX_BATCH_TOTAL_TOKENS=32256 -e MAX_INPUT_LENGTH=1024 -e PAD_SEQUENCE_TO_MULTIPLE_OF=1024 -e MAX_WAITING_TOKENS=5 -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host tgi_gaudi --model-id $model --sharded true --num-shard 4 ``` ## Docker command for 13B model ```bash -docker run -p 8080:80 -v $volume:/data --runtime=habana -e HUGGING_FACE_HUB_TOKEN=$HUGGINGFACEHUB_API_TOKEN -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e PAD_SEQUENCE_TO_MULTIPLE_OF=128 -e HABANA_VISIBLE_DEVICES="4" -e BATCH_BUCKET_SIZE=16 -e PREFILL_BATCH_BUCKET_SIZE=1 -e MAX_BATCH_PREFILL_TOKENS=4096 -e MAX_BATCH_TOTAL_TOKENS=18432 -e PAD_SEQUENCE_TO_MULTIPLE_OF=1024 -e MAX_INPUT_LENGTH=1024 -e MAX_TOTAL_TOKENS=1152 -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host tgi_gaudi --model-id $model +docker run -p 8080:80 -v $volume:/data --runtime=habana -e HF_TOKEN=$HF_TOKEN -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e PAD_SEQUENCE_TO_MULTIPLE_OF=128 -e HABANA_VISIBLE_DEVICES="4" -e BATCH_BUCKET_SIZE=16 -e PREFILL_BATCH_BUCKET_SIZE=1 -e MAX_BATCH_PREFILL_TOKENS=4096 -e MAX_BATCH_TOTAL_TOKENS=18432 -e PAD_SEQUENCE_TO_MULTIPLE_OF=1024 -e MAX_INPUT_LENGTH=1024 -e MAX_TOTAL_TOKENS=1152 -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host tgi_gaudi --model-id $model ``` diff --git a/AudioQnA/tests/test_langchain_inference.sh b/AudioQnA/tests/test_langchain_inference.sh index 960fc75fc..57303e7c7 100644 --- a/AudioQnA/tests/test_langchain_inference.sh +++ b/AudioQnA/tests/test_langchain_inference.sh @@ -52,7 +52,7 @@ function launch_tgi_gaudi_service() { function launch_redis_and_langchain_service() { cd $WORKPATH - export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} + export HF_TOKEN=${HF_TOKEN} local port=8890 sed -i "s/port=8000/port=$port/g" langchain/docker/qna-app/app/server.py docker compose -f langchain/docker/docker-compose.yml up -d --build diff --git a/ChatQnA/chatqna.yaml b/ChatQnA/chatqna.yaml index 276effa8c..bd5698bc8 100644 --- a/ChatQnA/chatqna.yaml +++ b/ChatQnA/chatqna.yaml @@ -66,7 +66,7 @@ opea_micro_services: - SYS_NICE ipc: host environment: - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} HABANA_VISIBLE_DEVICES: all OMPI_MCA_btl_vader_single_copy_mechanism: none model-id: ${LLM_MODEL_ID} diff --git a/ChatQnA/deprecated/README.md b/ChatQnA/deprecated/README.md index 985f7cd29..40a4b0605 100644 --- a/ChatQnA/deprecated/README.md +++ b/ChatQnA/deprecated/README.md @@ -51,12 +51,12 @@ bash ./serving/tgi_gaudi/build_docker.sh bash ./serving/tgi_gaudi/launch_tgi_service.sh ``` -For gated models such as `LLAMA-2`, you will have to pass -e HUGGING_FACE_HUB_TOKEN=\ to the docker run command above with a valid Hugging Face Hub read token. +For gated models such as `LLAMA-2`, you will have to pass -e HF_TOKEN=\ to the docker run command above with a valid Hugging Face Hub read token. -Please follow this link [huggingface token](https://huggingface.co/docs/hub/security-tokens) to get the access token and export `HUGGINGFACEHUB_API_TOKEN` environment with the token. +Please follow this link [huggingface token](https://huggingface.co/docs/hub/security-tokens) to get the access token and export `HF_TOKEN` environment with the token. ```bash -export HUGGINGFACEHUB_API_TOKEN= +export HF_TOKEN= ``` ### Launch a local server instance on 8 Gaudi cards: @@ -115,7 +115,7 @@ Note: If you want to integrate the TEI service into the LangChain application, y ## Launch Vector Database and LangChain Backend Service -Update the `HUGGINGFACEHUB_API_TOKEN` environment variable with your huggingface token in the `docker-compose.yml` +Update the `HF_TOKEN` environment variable with your huggingface token in the `docker-compose.yml` By default, Redis is used as the vector store. To use Qdrant, use the `docker-compose-qdrant.yml` file instead. @@ -153,7 +153,7 @@ We offer content moderation support utilizing Meta's [Llama Guard](https://huggi ```bash volume=$PWD/data model_id="meta-llama/LlamaGuard-7b" -docker run -p 8088:80 -v $volume:/data --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HUGGING_FACE_HUB_TOKEN= -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy tgi_gaudi --model-id $model_id +docker run -p 8088:80 -v $volume:/data --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HF_TOKEN= -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy tgi_gaudi --model-id $model_id export SAFETY_GUARD_ENDPOINT="http://xxx.xxx.xxx.xxx:8088" ``` diff --git a/ChatQnA/deprecated/deployment/nginx/.env b/ChatQnA/deprecated/deployment/nginx/.env index bc3da51d3..b7d5bbf15 100644 --- a/ChatQnA/deprecated/deployment/nginx/.env +++ b/ChatQnA/deprecated/deployment/nginx/.env @@ -1,4 +1,4 @@ -HUGGING_FACE_HUB_TOKEN= +HF_TOKEN= volume=./data model=meta-llama/Llama-2-13b-chat-hf MAX_TOTAL_TOKENS=2000 diff --git a/ChatQnA/deprecated/langchain/docker/docker-compose-qdrant.yml b/ChatQnA/deprecated/langchain/docker/docker-compose-qdrant.yml index 53c4f8ce1..3ba901130 100644 --- a/ChatQnA/deprecated/langchain/docker/docker-compose-qdrant.yml +++ b/ChatQnA/deprecated/langchain/docker/docker-compose-qdrant.yml @@ -30,7 +30,7 @@ services: container_name: qna-rag-qdrant-server environment: - https_proxy=${https_proxy} - - HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} + - HF_TOKEN=${HF_TOKEN} - "EMBED_MODEL=BAAI/bge-base-en-v1.5" - "VECTOR_DATABASE=QDRANT" - "TGI_LLM_ENDPOINT=http://localhost:8080" diff --git a/ChatQnA/deprecated/langchain/docker/docker-compose.yml b/ChatQnA/deprecated/langchain/docker/docker-compose.yml index e12e7557d..6593ef8d3 100644 --- a/ChatQnA/deprecated/langchain/docker/docker-compose.yml +++ b/ChatQnA/deprecated/langchain/docker/docker-compose.yml @@ -38,7 +38,7 @@ services: - socks_proxy=${socks_proxy} - FTP_PROXY=${FTP_PROXY} - ftp_proxy=${ftp_proxy} - - HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} + - HF_TOKEN=${HF_TOKEN} - CONFLUENCE_ACCESS_TOKEN=${CONFLUENCE_ACCESS_TOKEN} - "REDIS_PORT=6379" - "EMBED_MODEL=BAAI/bge-base-en-v1.5" diff --git a/ChatQnA/deprecated/langchain/test/end_to_end_rag_test.py b/ChatQnA/deprecated/langchain/test/end_to_end_rag_test.py index bfaff3124..e4d2d28f6 100644 --- a/ChatQnA/deprecated/langchain/test/end_to_end_rag_test.py +++ b/ChatQnA/deprecated/langchain/test/end_to_end_rag_test.py @@ -242,7 +242,7 @@ if __name__ == "__main__": tokenizer = AutoTokenizer.from_pretrained(args.model_name) os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com" os.environ["LANGCHAIN_API_KEY"] = args.langchain_token - os.environ["HUGGINGFACEHUB_API_TOKEN"] = args.huggingface_token + os.environ["HF_TOKEN"] = args.huggingface_token chain = buildchain(args) run_test(args, chain) diff --git a/ChatQnA/deprecated/serving/tgi_gaudi/README.md b/ChatQnA/deprecated/serving/tgi_gaudi/README.md index c9a8d510e..ecbc10c40 100644 --- a/ChatQnA/deprecated/serving/tgi_gaudi/README.md +++ b/ChatQnA/deprecated/serving/tgi_gaudi/README.md @@ -79,11 +79,11 @@ For the System Management Interface Tool please check [hl-smi](https://docs.haba ## Docker command for 70B model ```bash -docker run -p 8080:80 -v $volume:/data --runtime=habana -e HUGGING_FACE_HUB_TOKEN=$HUGGINGFACEHUB_API_TOKEN -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e HABANA_VISIBLE_DEVICES="6,7,4,5" -e HABANA_VISIBLE_MODULES="0,1,2,3" -e BATCH_BUCKET_SIZE=22 -e PREFILL_BATCH_BUCKET_SIZE=1 -e MAX_BATCH_PREFILL_TOKENS=5102 -e MAX_BATCH_TOTAL_TOKENS=32256 -e MAX_INPUT_LENGTH=1024 -e PAD_SEQUENCE_TO_MULTIPLE_OF=1024 -e MAX_WAITING_TOKENS=5 -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host tgi_gaudi --model-id $model --sharded true --num-shard 4 +docker run -p 8080:80 -v $volume:/data --runtime=habana -e HF_TOKEN=$HF_TOKEN -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e HABANA_VISIBLE_DEVICES="6,7,4,5" -e HABANA_VISIBLE_MODULES="0,1,2,3" -e BATCH_BUCKET_SIZE=22 -e PREFILL_BATCH_BUCKET_SIZE=1 -e MAX_BATCH_PREFILL_TOKENS=5102 -e MAX_BATCH_TOTAL_TOKENS=32256 -e MAX_INPUT_LENGTH=1024 -e PAD_SEQUENCE_TO_MULTIPLE_OF=1024 -e MAX_WAITING_TOKENS=5 -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host tgi_gaudi --model-id $model --sharded true --num-shard 4 ``` ## Docker command for 13B model ```bash -docker run -p 8080:80 -v $volume:/data --runtime=habana -e HUGGING_FACE_HUB_TOKEN=$HUGGINGFACEHUB_API_TOKEN -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e PAD_SEQUENCE_TO_MULTIPLE_OF=128 -e HABANA_VISIBLE_DEVICES="4" -e BATCH_BUCKET_SIZE=16 -e PREFILL_BATCH_BUCKET_SIZE=1 -e MAX_BATCH_PREFILL_TOKENS=4096 -e MAX_BATCH_TOTAL_TOKENS=18432 -e PAD_SEQUENCE_TO_MULTIPLE_OF=1024 -e MAX_INPUT_LENGTH=1024 -e MAX_TOTAL_TOKENS=1152 -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host tgi_gaudi --model-id $model +docker run -p 8080:80 -v $volume:/data --runtime=habana -e HF_TOKEN=$HF_TOKEN -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e PAD_SEQUENCE_TO_MULTIPLE_OF=128 -e HABANA_VISIBLE_DEVICES="4" -e BATCH_BUCKET_SIZE=16 -e PREFILL_BATCH_BUCKET_SIZE=1 -e MAX_BATCH_PREFILL_TOKENS=4096 -e MAX_BATCH_TOTAL_TOKENS=18432 -e PAD_SEQUENCE_TO_MULTIPLE_OF=1024 -e MAX_INPUT_LENGTH=1024 -e MAX_TOTAL_TOKENS=1152 -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host tgi_gaudi --model-id $model ``` diff --git a/ChatQnA/deprecated/tests/test_langchain_inference.sh b/ChatQnA/deprecated/tests/test_langchain_inference.sh index 1f520a445..ea8fde32d 100644 --- a/ChatQnA/deprecated/tests/test_langchain_inference.sh +++ b/ChatQnA/deprecated/tests/test_langchain_inference.sh @@ -52,7 +52,7 @@ function launch_tgi_gaudi_service() { function launch_redis_and_langchain_service() { cd $WORKPATH - export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} + export HF_TOKEN=${HF_TOKEN} local port=8890 sed -i "s/port=8000/port=$port/g" langchain/docker/qna-app/app/server.py docker compose -f langchain/docker/docker-compose.yml up -d --build diff --git a/ChatQnA/docker/gaudi/README.md b/ChatQnA/docker/gaudi/README.md index 43ad533d4..4bb405919 100644 --- a/ChatQnA/docker/gaudi/README.md +++ b/ChatQnA/docker/gaudi/README.md @@ -100,7 +100,7 @@ export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808" export TGI_LLM_ENDPOINT="http://${host_ip}:8008" export REDIS_URL="redis://${host_ip}:6379" export INDEX_NAME="rag-redis" -export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} +export HF_TOKEN=${your_hf_api_token} export MEGA_SERVICE_HOST_IP=${host_ip} export EMBEDDING_SERVICE_HOST_IP=${host_ip} export RETRIEVER_SERVICE_HOST_IP=${host_ip} diff --git a/ChatQnA/docker/gaudi/docker_compose.yaml b/ChatQnA/docker/gaudi/docker_compose.yaml index f7fd02d13..b142124fe 100644 --- a/ChatQnA/docker/gaudi/docker_compose.yaml +++ b/ChatQnA/docker/gaudi/docker_compose.yaml @@ -107,7 +107,7 @@ services: http_proxy: ${http_proxy} https_proxy: ${https_proxy} TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} LANGCHAIN_PROJECT: "opea-reranking-service" @@ -141,7 +141,7 @@ services: http_proxy: ${http_proxy} https_proxy: ${https_proxy} TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} LANGCHAIN_PROJECT: "opea-llm-service" diff --git a/ChatQnA/docker/xeon/README.md b/ChatQnA/docker/xeon/README.md index 5ba2f1707..5949e43c9 100644 --- a/ChatQnA/docker/xeon/README.md +++ b/ChatQnA/docker/xeon/README.md @@ -158,7 +158,7 @@ export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808" export TGI_LLM_ENDPOINT="http://${host_ip}:9009" export REDIS_URL="redis://${host_ip}:6379" export INDEX_NAME="rag-redis" -export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} +export HF_TOKEN=${your_hf_api_token} export MEGA_SERVICE_HOST_IP=${host_ip} export EMBEDDING_SERVICE_HOST_IP=${host_ip} export RETRIEVER_SERVICE_HOST_IP=${host_ip} diff --git a/ChatQnA/docker/xeon/docker_compose.yaml b/ChatQnA/docker/xeon/docker_compose.yaml index af4629443..be512b492 100644 --- a/ChatQnA/docker/xeon/docker_compose.yaml +++ b/ChatQnA/docker/xeon/docker_compose.yaml @@ -103,7 +103,7 @@ services: http_proxy: ${http_proxy} https_proxy: ${https_proxy} TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} LANGCHAIN_PROJECT: "opea-reranking-service" @@ -132,7 +132,7 @@ services: http_proxy: ${http_proxy} https_proxy: ${https_proxy} TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} LANGCHAIN_PROJECT: "opea-llm-service" diff --git a/ChatQnA/kubernetes/manifests/README.md b/ChatQnA/kubernetes/manifests/README.md index 997238738..32541a5c4 100644 --- a/ChatQnA/kubernetes/manifests/README.md +++ b/ChatQnA/kubernetes/manifests/README.md @@ -26,7 +26,7 @@ For Gaudi: > [NOTE] -- Be sure to modify HUGGINGFACEHUB_API_TOKEN and other important values in qna_configmap_guadi.yaml and qna_configmap_xeon.yaml +- Be sure to modify HF_TOKEN and other important values in qna_configmap_guadi.yaml and qna_configmap_xeon.yaml - Be sure the node has path /mnt/models to store all the models ### Deploy diff --git a/ChatQnA/kubernetes/manifests/qna_configmap_gaudi.yaml b/ChatQnA/kubernetes/manifests/qna_configmap_gaudi.yaml index dbd67872c..f0902a4eb 100644 --- a/ChatQnA/kubernetes/manifests/qna_configmap_gaudi.yaml +++ b/ChatQnA/kubernetes/manifests/qna_configmap_gaudi.yaml @@ -14,7 +14,7 @@ data: TGI_LLM_ENDPOINT: "http://tgi-gaudi-svc.default.svc.cluster.local:9009" REDIS_URL: "redis://redis-vector-db.default.svc.cluster.local:6379" INDEX_NAME: "rag-redis" - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} EMBEDDING_SERVICE_HOST_IP: embedding-svc RETRIEVER_SERVICE_HOST_IP: retriever-svc RERANK_SERVICE_HOST_IP: reranking-svc diff --git a/ChatQnA/kubernetes/manifests/qna_configmap_xeon.yaml b/ChatQnA/kubernetes/manifests/qna_configmap_xeon.yaml index b5dd22303..6c6a17225 100644 --- a/ChatQnA/kubernetes/manifests/qna_configmap_xeon.yaml +++ b/ChatQnA/kubernetes/manifests/qna_configmap_xeon.yaml @@ -14,7 +14,7 @@ data: TGI_LLM_ENDPOINT: "http://tgi-svc.default.svc.cluster.local:9009" REDIS_URL: "redis://redis-vector-db.default.svc.cluster.local:6379" INDEX_NAME: "rag-redis" - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} EMBEDDING_SERVICE_HOST_IP: embedding-svc RETRIEVER_SERVICE_HOST_IP: retriever-svc RERANK_SERVICE_HOST_IP: reranking-svc diff --git a/ChatQnA/tests/test_chatqna_on_gaudi.sh b/ChatQnA/tests/test_chatqna_on_gaudi.sh index f2ae74535..d14e368ab 100644 --- a/ChatQnA/tests/test_chatqna_on_gaudi.sh +++ b/ChatQnA/tests/test_chatqna_on_gaudi.sh @@ -47,7 +47,7 @@ function start_services() { export TGI_LLM_ENDPOINT="http://${ip_address}:8008" export REDIS_URL="redis://${ip_address}:6379" export INDEX_NAME="rag-redis" - export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} + export HF_TOKEN=${HF_TOKEN} export MEGA_SERVICE_HOST_IP=${ip_address} export EMBEDDING_SERVICE_HOST_IP=${ip_address} export RETRIEVER_SERVICE_HOST_IP=${ip_address} diff --git a/ChatQnA/tests/test_chatqna_on_xeon.sh b/ChatQnA/tests/test_chatqna_on_xeon.sh index 3d8bfb4ab..0c6036cb0 100644 --- a/ChatQnA/tests/test_chatqna_on_xeon.sh +++ b/ChatQnA/tests/test_chatqna_on_xeon.sh @@ -39,7 +39,7 @@ function start_services() { export TGI_LLM_ENDPOINT="http://${ip_address}:9009" export REDIS_URL="redis://${ip_address}:6379" export INDEX_NAME="rag-redis" - export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} + export HF_TOKEN=${HF_TOKEN} export MEGA_SERVICE_HOST_IP=${ip_address} export EMBEDDING_SERVICE_HOST_IP=${ip_address} export RETRIEVER_SERVICE_HOST_IP=${ip_address} diff --git a/CodeGen/codegen.yaml b/CodeGen/codegen.yaml index d5349c2c2..d572ee638 100644 --- a/CodeGen/codegen.yaml +++ b/CodeGen/codegen.yaml @@ -24,7 +24,7 @@ opea_micro_services: - SYS_NICE ipc: host environment: - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} HABANA_VISIBLE_DEVICES: all OMPI_MCA_btl_vader_single_copy_mechanism: none model-id: ${LLM_MODEL_ID} @@ -35,7 +35,7 @@ opea_micro_services: endpoint: /v1/chat/completions environment: TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} ui: host: ${UI_SERVICE_HOST_IP} ports: diff --git a/CodeGen/deprecated/README.md b/CodeGen/deprecated/README.md index d79bcc19d..2d2050664 100644 --- a/CodeGen/deprecated/README.md +++ b/CodeGen/deprecated/README.md @@ -86,10 +86,10 @@ docker run -it -e http_proxy=${http_proxy} -e https_proxy=${https_proxy} --net=h Make sure TGI-Gaudi service is running and also make sure data is populated into Redis. Launch the backend service: -Please follow this link [huggingface token](https://huggingface.co/docs/hub/security-tokens) to get the access token and export `HUGGINGFACEHUB_API_TOKEN` environment with the token. +Please follow this link [huggingface token](https://huggingface.co/docs/hub/security-tokens) to get the access token and export `HF_TOKEN` environment with the token. ```bash -export HUGGINGFACEHUB_API_TOKEN= +export HF_TOKEN= nohup python server.py & ``` diff --git a/CodeGen/deprecated/tests/test_codegen_inference.sh b/CodeGen/deprecated/tests/test_codegen_inference.sh index 5a7c2a59d..0cb1a1c7e 100644 --- a/CodeGen/deprecated/tests/test_codegen_inference.sh +++ b/CodeGen/deprecated/tests/test_codegen_inference.sh @@ -63,7 +63,7 @@ function launch_server() { # Start the Backend Service docker exec $COPILOT_CONTAINER_NAME \ - bash -c "export HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN;nohup python server.py &" + bash -c "export HF_TOKEN=$HF_TOKEN;nohup python server.py &" sleep 1m } diff --git a/CodeGen/docker/gaudi/README.md b/CodeGen/docker/gaudi/README.md index 0d6931f6c..574298d66 100644 --- a/CodeGen/docker/gaudi/README.md +++ b/CodeGen/docker/gaudi/README.md @@ -55,7 +55,7 @@ export http_proxy=${your_http_proxy} export https_proxy=${your_http_proxy} export LLM_MODEL_ID="meta-llama/CodeLlama-7b-hf" export TGI_LLM_ENDPOINT="http://${host_ip}:8028" -export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} +export HF_TOKEN=${your_hf_api_token} export MEGA_SERVICE_HOST_IP=${host_ip} export LLM_SERVICE_HOST_IP=${host_ip} export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:6666/v1/codegen" diff --git a/CodeGen/docker/gaudi/docker_compose.yaml b/CodeGen/docker/gaudi/docker_compose.yaml index 9ce810956..223fec5c5 100644 --- a/CodeGen/docker/gaudi/docker_compose.yaml +++ b/CodeGen/docker/gaudi/docker_compose.yaml @@ -44,7 +44,7 @@ services: http_proxy: ${http_proxy} https_proxy: ${https_proxy} TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} LANGCHAIN_PROJECT: "opea-llm-service" diff --git a/CodeGen/docker/xeon/README.md b/CodeGen/docker/xeon/README.md index 80d3156d5..03de03850 100644 --- a/CodeGen/docker/xeon/README.md +++ b/CodeGen/docker/xeon/README.md @@ -63,7 +63,7 @@ export http_proxy=${your_http_proxy} export https_proxy=${your_http_proxy} export LLM_MODEL_ID="meta-llama/CodeLlama-7b-hf" export TGI_LLM_ENDPOINT="http://${host_ip}:8028" -export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} +export HF_TOKEN=${your_hf_api_token} export MEGA_SERVICE_HOST_IP=${host_ip} export LLM_SERVICE_HOST_IP=${host_ip} export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:6666/v1/codegen" diff --git a/CodeGen/docker/xeon/docker_compose.yaml b/CodeGen/docker/xeon/docker_compose.yaml index d47f37a7b..f3cd67be0 100644 --- a/CodeGen/docker/xeon/docker_compose.yaml +++ b/CodeGen/docker/xeon/docker_compose.yaml @@ -26,7 +26,7 @@ services: environment: http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} command: --model-id ${LLM_MODEL_ID} llm: image: opea/llm-tgi:latest @@ -40,7 +40,7 @@ services: http_proxy: ${http_proxy} https_proxy: ${https_proxy} TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} LANGCHAIN_PROJECT: "opea-llm-service" diff --git a/CodeGen/kubernetes/manifests/README.md b/CodeGen/kubernetes/manifests/README.md index b7ca064e9..352ab1baf 100644 --- a/CodeGen/kubernetes/manifests/README.md +++ b/CodeGen/kubernetes/manifests/README.md @@ -2,15 +2,15 @@ > [NOTE] > The following values must be set before you can deploy: -> HUGGINGFACEHUB_API_TOKEN +> HF_TOKEN > You can also customize the "MODEL_ID" and "model-volume" ## Deploy On Xeon ``` cd GenAIExamples/CodeGen/kubernetes/manifests/xeon -export HUGGINGFACEHUB_API_TOKEN="YourOwnToken" -sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" codegen.yaml +export HF_TOKEN="YourOwnToken" +sed -i "s/insert-your-huggingface-token-here/${HF_TOKEN}/g" codegen.yaml kubectl apply -f codegen.yaml ``` @@ -18,8 +18,8 @@ kubectl apply -f codegen.yaml ``` cd GenAIExamples/CodeGen/kubernetes/manifests/gaudi -export HUGGINGFACEHUB_API_TOKEN="YourOwnToken" -sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" codegen.yaml +export HF_TOKEN="YourOwnToken" +sed -i "s/insert-your-huggingface-token-here/${HF_TOKEN}/g" codegen.yaml kubectl apply -f codegen.yaml ``` diff --git a/CodeGen/kubernetes/manifests/gaudi/codegen.yaml b/CodeGen/kubernetes/manifests/gaudi/codegen.yaml index e92c64a74..88231e110 100644 --- a/CodeGen/kubernetes/manifests/gaudi/codegen.yaml +++ b/CodeGen/kubernetes/manifests/gaudi/codegen.yaml @@ -143,7 +143,7 @@ spec: env: - name: TGI_LLM_ENDPOINT value: "http://codegen-tgi:80" - - name: HUGGINGFACEHUB_API_TOKEN + - name: HF_TOKEN value: "insert-your-huggingface-token-here" - name: http_proxy value: diff --git a/CodeGen/kubernetes/manifests/xeon/codegen.yaml b/CodeGen/kubernetes/manifests/xeon/codegen.yaml index 1887741cf..803dd4879 100644 --- a/CodeGen/kubernetes/manifests/xeon/codegen.yaml +++ b/CodeGen/kubernetes/manifests/xeon/codegen.yaml @@ -141,7 +141,7 @@ spec: env: - name: TGI_LLM_ENDPOINT value: "http://codegen-tgi:80" - - name: HUGGINGFACEHUB_API_TOKEN + - name: HF_TOKEN value: "insert-your-huggingface-token-here" - name: http_proxy value: diff --git a/CodeGen/tests/test_codegen_on_gaudi.sh b/CodeGen/tests/test_codegen_on_gaudi.sh index 12af7a393..5d4232ec9 100644 --- a/CodeGen/tests/test_codegen_on_gaudi.sh +++ b/CodeGen/tests/test_codegen_on_gaudi.sh @@ -41,7 +41,7 @@ function start_services() { export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" export TGI_LLM_ENDPOINT="http://${ip_address}:8028" - export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} + export HF_TOKEN=${HF_TOKEN} export MEGA_SERVICE_HOST_IP=${ip_address} export LLM_SERVICE_HOST_IP=${ip_address} export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:6666/v1/codegen" diff --git a/CodeGen/tests/test_codegen_on_xeon.sh b/CodeGen/tests/test_codegen_on_xeon.sh index b96d4395e..418277335 100644 --- a/CodeGen/tests/test_codegen_on_xeon.sh +++ b/CodeGen/tests/test_codegen_on_xeon.sh @@ -29,7 +29,7 @@ function start_services() { export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" export TGI_LLM_ENDPOINT="http://${ip_address}:8028" - export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} + export HF_TOKEN=${HF_TOKEN} export MEGA_SERVICE_HOST_IP=${ip_address} export LLM_SERVICE_HOST_IP=${ip_address} export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:6666/v1/codegen" diff --git a/CodeTrans/codetrans.yaml b/CodeTrans/codetrans.yaml index b244b8c48..4a6035e15 100644 --- a/CodeTrans/codetrans.yaml +++ b/CodeTrans/codetrans.yaml @@ -24,7 +24,7 @@ opea_micro_services: - SYS_NICE ipc: host environment: - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} HABANA_VISIBLE_DEVICES: all OMPI_MCA_btl_vader_single_copy_mechanism: none model-id: ${LLM_MODEL_ID} @@ -35,7 +35,7 @@ opea_micro_services: endpoint: /v1/chat/completions environment: TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} ui: host: ${UI_SERVICE_HOST_IP} ports: diff --git a/CodeTrans/deprecated/README.md b/CodeTrans/deprecated/README.md index 90b6197fa..4e0ef4e8c 100644 --- a/CodeTrans/deprecated/README.md +++ b/CodeTrans/deprecated/README.md @@ -23,13 +23,13 @@ bash launch_tgi_service.sh ```sh cd langchain/docker bash build_docker.sh -docker run -it --name code_trans_server --net=host --ipc=host -e TGI_ENDPOINT=${TGI ENDPOINT} -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACE_API_TOKEN} -e SERVER_PORT=8000 -e http_proxy=${http_proxy} -e https_proxy=${https_proxy} intel/gen-ai-examples:code-translation bash +docker run -it --name code_trans_server --net=host --ipc=host -e TGI_ENDPOINT=${TGI ENDPOINT} -e HF_TOKEN=${HUGGINGFACE_API_TOKEN} -e SERVER_PORT=8000 -e http_proxy=${http_proxy} -e https_proxy=${https_proxy} intel/gen-ai-examples:code-translation bash ``` Here is the explanation of some of the above parameters: - `TGI_ENDPOINT`: The endpoint of your TGI service, usually equal to `:`. -- `HUGGINGFACEHUB_API_TOKEN`: Your HuggingFace hub API token, usually generated [here](https://huggingface.co/settings/tokens). +- `HF_TOKEN`: Your HuggingFace hub API token, usually generated [here](https://huggingface.co/settings/tokens). - `SERVER_PORT`: The port of the CodeTranslation service on the host. 3. Quick test diff --git a/CodeTrans/docker/gaudi/README.md b/CodeTrans/docker/gaudi/README.md index a28320a1f..9498bb0f5 100755 --- a/CodeTrans/docker/gaudi/README.md +++ b/CodeTrans/docker/gaudi/README.md @@ -51,7 +51,7 @@ export http_proxy=${your_http_proxy} export https_proxy=${your_http_proxy} export LLM_MODEL_ID="HuggingFaceH4/mistral-7b-grok" export TGI_LLM_ENDPOINT="http://${host_ip}:8008" -export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} +export HF_TOKEN=${your_hf_api_token} export MEGA_SERVICE_HOST_IP=${host_ip} export LLM_SERVICE_HOST_IP=${host_ip} export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:7777/v1/codetrans" diff --git a/CodeTrans/docker/gaudi/docker_compose.yaml b/CodeTrans/docker/gaudi/docker_compose.yaml index eca8724da..69b1422f9 100644 --- a/CodeTrans/docker/gaudi/docker_compose.yaml +++ b/CodeTrans/docker/gaudi/docker_compose.yaml @@ -42,7 +42,7 @@ services: http_proxy: ${http_proxy} https_proxy: ${https_proxy} TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} LANGCHAIN_PROJECT: "opea-llm-service" diff --git a/CodeTrans/docker/xeon/README.md b/CodeTrans/docker/xeon/README.md index f8ae59d7e..167ce1018 100755 --- a/CodeTrans/docker/xeon/README.md +++ b/CodeTrans/docker/xeon/README.md @@ -59,7 +59,7 @@ export http_proxy=${your_http_proxy} export https_proxy=${your_http_proxy} export LLM_MODEL_ID="HuggingFaceH4/mistral-7b-grok" export TGI_LLM_ENDPOINT="http://${host_ip}:8008" -export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} +export HF_TOKEN=${your_hf_api_token} export MEGA_SERVICE_HOST_IP=${host_ip} export LLM_SERVICE_HOST_IP=${host_ip} export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:7777/v1/codetrans" diff --git a/CodeTrans/docker/xeon/docker_compose.yaml b/CodeTrans/docker/xeon/docker_compose.yaml index febb41231..81f24f989 100644 --- a/CodeTrans/docker/xeon/docker_compose.yaml +++ b/CodeTrans/docker/xeon/docker_compose.yaml @@ -37,7 +37,7 @@ services: http_proxy: ${http_proxy} https_proxy: ${https_proxy} TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} LANGCHAIN_PROJECT: "opea-llm-service" diff --git a/CodeTrans/tests/test_codetrans_on_gaudi.sh b/CodeTrans/tests/test_codetrans_on_gaudi.sh index 6e4773fac..dee814196 100644 --- a/CodeTrans/tests/test_codetrans_on_gaudi.sh +++ b/CodeTrans/tests/test_codetrans_on_gaudi.sh @@ -33,7 +33,7 @@ function start_services() { export https_proxy=${http_proxy} export LLM_MODEL_ID="HuggingFaceH4/mistral-7b-grok" export TGI_LLM_ENDPOINT="http://${ip_address}:8008" - export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} + export HF_TOKEN=${HF_TOKEN} export MEGA_SERVICE_HOST_IP=${ip_address} export LLM_SERVICE_HOST_IP=${ip_address} export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:7777/v1/codetrans" diff --git a/CodeTrans/tests/test_codetrans_on_xeon.sh b/CodeTrans/tests/test_codetrans_on_xeon.sh index c6bbe09b6..b54fc3aa9 100644 --- a/CodeTrans/tests/test_codetrans_on_xeon.sh +++ b/CodeTrans/tests/test_codetrans_on_xeon.sh @@ -30,7 +30,7 @@ function start_services() { export https_proxy=${http_proxy} export LLM_MODEL_ID="HuggingFaceH4/mistral-7b-grok" export TGI_LLM_ENDPOINT="http://${ip_address}:8008" - export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} + export HF_TOKEN=${HF_TOKEN} export MEGA_SERVICE_HOST_IP=${ip_address} export LLM_SERVICE_HOST_IP=${ip_address} export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:7777/v1/codetrans" diff --git a/DocSum/deprecated/README.md b/DocSum/deprecated/README.md index fff815a94..19cbd1994 100644 --- a/DocSum/deprecated/README.md +++ b/DocSum/deprecated/README.md @@ -29,12 +29,12 @@ bash ./serving/tgi_gaudi/build_docker.sh bash ./serving/tgi_gaudi/launch_tgi_service.sh ``` -For gated models such as `LLAMA-2`, you will have to pass -e HUGGING_FACE_HUB_TOKEN=\ to the docker run command above with a valid Hugging Face Hub read token. +For gated models such as `LLAMA-2`, you will have to pass -e HF_TOKEN=\ to the docker run command above with a valid Hugging Face Hub read token. -Please follow this link [huggingface token](https://huggingface.co/docs/hub/security-tokens) to get the access token and export `HUGGINGFACEHUB_API_TOKEN` environment with the token. +Please follow this link [huggingface token](https://huggingface.co/docs/hub/security-tokens) to get the access token and export `HF_TOKEN` environment with the token. ```bash -export HUGGINGFACEHUB_API_TOKEN= +export HF_TOKEN= ``` ### Launch a local server instance on 8 Gaudi cards: @@ -80,7 +80,7 @@ docker run -it --net=host --ipc=host -e http_proxy=${http_proxy} -e https_proxy= Make sure TGI-Gaudi service is running. Launch the backend service: ```bash -export HUGGINGFACEHUB_API_TOKEN= +export HF_TOKEN= nohup python app/server.py & ``` diff --git a/DocSum/deprecated/tests/test_langchain_inference.sh b/DocSum/deprecated/tests/test_langchain_inference.sh index ccb1b8886..b65d21fbe 100644 --- a/DocSum/deprecated/tests/test_langchain_inference.sh +++ b/DocSum/deprecated/tests/test_langchain_inference.sh @@ -63,7 +63,7 @@ function launch_server() { # Start the Backend Service docker exec $DOCUMENT_SUMMARY_CONTAINER_NAME \ - bash -c "export HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN;nohup python app/server.py &" + bash -c "export HF_TOKEN=$HF_TOKEN;nohup python app/server.py &" sleep 1m } diff --git a/DocSum/docker/gaudi/README.md b/DocSum/docker/gaudi/README.md index cbe7e3b0c..3a1635426 100644 --- a/DocSum/docker/gaudi/README.md +++ b/DocSum/docker/gaudi/README.md @@ -62,7 +62,7 @@ export http_proxy=${your_http_proxy} export https_proxy=${your_http_proxy} export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" export TGI_LLM_ENDPOINT="http://${your_ip}:8008" -export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} +export HF_TOKEN=${your_hf_api_token} export MEGA_SERVICE_HOST_IP=${host_ip} export LLM_SERVICE_HOST_IP=${host_ip} export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum" diff --git a/DocSum/docker/gaudi/docker_compose.yaml b/DocSum/docker/gaudi/docker_compose.yaml index 6640583cf..37681a7be 100644 --- a/DocSum/docker/gaudi/docker_compose.yaml +++ b/DocSum/docker/gaudi/docker_compose.yaml @@ -24,7 +24,7 @@ services: http_proxy: ${http_proxy} https_proxy: ${https_proxy} TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} volumes: - "./data:/data" runtime: habana @@ -44,7 +44,7 @@ services: http_proxy: ${http_proxy} https_proxy: ${https_proxy} TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} LANGCHAIN_PROJECT: "opea-llm-service" diff --git a/DocSum/docker/xeon/README.md b/DocSum/docker/xeon/README.md index b4ace4c80..8be1d4a6f 100644 --- a/DocSum/docker/xeon/README.md +++ b/DocSum/docker/xeon/README.md @@ -63,7 +63,7 @@ export http_proxy=${your_http_proxy} export https_proxy=${your_http_proxy} export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" export TGI_LLM_ENDPOINT="http://${your_ip}:8008" -export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} +export HF_TOKEN=${your_hf_api_token} export MEGA_SERVICE_HOST_IP=${host_ip} export LLM_SERVICE_HOST_IP=${host_ip} export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum" diff --git a/DocSum/docker/xeon/docker_compose.yaml b/DocSum/docker/xeon/docker_compose.yaml index 6b214e3c5..4bc253442 100644 --- a/DocSum/docker/xeon/docker_compose.yaml +++ b/DocSum/docker/xeon/docker_compose.yaml @@ -24,7 +24,7 @@ services: http_proxy: ${http_proxy} https_proxy: ${https_proxy} TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} volumes: - "./data:/data" shm_size: 1g @@ -41,7 +41,7 @@ services: http_proxy: ${http_proxy} https_proxy: ${https_proxy} TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} LANGCHAIN_PROJECT: "opea-llm-service" diff --git a/DocSum/docsum.yaml b/DocSum/docsum.yaml index e65bcf8ab..a632ee6b7 100644 --- a/DocSum/docsum.yaml +++ b/DocSum/docsum.yaml @@ -24,7 +24,7 @@ opea_micro_services: - SYS_NICE ipc: host environment: - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} HABANA_VISIBLE_DEVICES: all OMPI_MCA_btl_vader_single_copy_mechanism: none model-id: ${LLM_MODEL_ID} @@ -35,7 +35,7 @@ opea_micro_services: endpoint: /v1/chat/completions environment: TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_TOKEN: ${HF_TOKEN} ui: host: ${UI_SERVICE_HOST_IP} ports: diff --git a/DocSum/tests/test_docsum_on_gaudi.sh b/DocSum/tests/test_docsum_on_gaudi.sh index a8686733a..552ea1dd1 100644 --- a/DocSum/tests/test_docsum_on_gaudi.sh +++ b/DocSum/tests/test_docsum_on_gaudi.sh @@ -31,7 +31,7 @@ function start_services() { export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" export TGI_LLM_ENDPOINT="http://${ip_address}:8008" - export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} + export HF_TOKEN=${HF_TOKEN} export MEGA_SERVICE_HOST_IP=${ip_address} export LLM_SERVICE_HOST_IP=${ip_address} export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:8888/v1/docsum" diff --git a/DocSum/tests/test_docsum_on_xeon.sh b/DocSum/tests/test_docsum_on_xeon.sh index 68ae5f3f4..5a4bad82e 100644 --- a/DocSum/tests/test_docsum_on_xeon.sh +++ b/DocSum/tests/test_docsum_on_xeon.sh @@ -29,7 +29,7 @@ function start_services() { export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" export TGI_LLM_ENDPOINT="http://${ip_address}:8008" - export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} + export HF_TOKEN=${HF_TOKEN} export MEGA_SERVICE_HOST_IP=${ip_address} export LLM_SERVICE_HOST_IP=${ip_address} export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:8888/v1/docsum" diff --git a/SearchQnA/README.md b/SearchQnA/README.md index 3b2b8f25f..e314a4235 100644 --- a/SearchQnA/README.md +++ b/SearchQnA/README.md @@ -37,7 +37,7 @@ bash launch_tgi_service.sh ```sh cd langchain/docker docker build . --build-arg http_proxy=${http_proxy} --build-arg https_proxy=${http_proxy} -t intel/gen-ai-examples:searchqna-gaudi --no-cache -docker run -e TGI_ENDPOINT= -e GOOGLE_CSE_ID= -e GOOGLE_API_KEY= -e HUGGINGFACEHUB_API_TOKEN= -p 8085:8000 -e http_proxy=$http_proxy -e https_proxy=$https_proxy --runtime=habana -e HABANA_VISIBE_DEVILCES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host intel/gen-ai-examples:searchqna-gaudi +docker run -e TGI_ENDPOINT= -e GOOGLE_CSE_ID= -e GOOGLE_API_KEY= -e HF_TOKEN= -p 8085:8000 -e http_proxy=$http_proxy -e https_proxy=$https_proxy --runtime=habana -e HABANA_VISIBE_DEVILCES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host intel/gen-ai-examples:searchqna-gaudi ``` Here is the explanation of some of the above parameters: @@ -45,7 +45,7 @@ Here is the explanation of some of the above parameters: - `TGI_ENDPOINT`: the endpoint of your TGI service, usually equal to `:` - `GOOGLE_CSE_ID`: your CSE ID for Google Search Engine, usually generated [here](https://programmablesearchengine.google.com/controlpanel/all) - `GOOGLE_API_KEY`: your API key for Google Search Engine, usually generated [here](https://console.cloud.google.com/apis/credentials) -- `HUGGINGFACEHUB_API_TOKEN`: your HuggingFace hub API token, usually generated [here](https://huggingface.co/settings/tokens) +- `HF_TOKEN`: your HuggingFace hub API token, usually generated [here](https://huggingface.co/settings/tokens) - `-p 8085:8000`: This will map the 8000 port of the SearchQnA service inside the container to the 8085 port on the host 3. Quick test diff --git a/SearchQnA/tests/test_langchain_inference.sh b/SearchQnA/tests/test_langchain_inference.sh index 709f350b5..269d09b0a 100644 --- a/SearchQnA/tests/test_langchain_inference.sh +++ b/SearchQnA/tests/test_langchain_inference.sh @@ -48,7 +48,7 @@ function launch_langchain_service() { tgi_ip_name=$(echo $(hostname) | tr '[a-z]-' '[A-Z]_')_$(echo 'IP') tgi_ip=$(eval echo '$'$tgi_ip_name) - docker run -d --name=${LANGCHAIN_CONTAINER_NAME} -e TGI_ENDPOINT=http://${tgi_ip}:8870 -e GOOGLE_CSE_ID=${GOOGLE_CSE_ID} -e GOOGLE_API_KEY=${GOOGLE_API_KEY} -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} \ + docker run -d --name=${LANGCHAIN_CONTAINER_NAME} -e TGI_ENDPOINT=http://${tgi_ip}:8870 -e GOOGLE_CSE_ID=${GOOGLE_CSE_ID} -e GOOGLE_API_KEY=${GOOGLE_API_KEY} -e HF_TOKEN=${HF_TOKEN} \ -p ${port}:8000 --runtime=habana -e HABANA_VISIBE_DEVILCES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host intel/gen-ai-examples:${LANGCHAIN_CONTAINER_NAME} sleep 2m diff --git a/Translation/README.md b/Translation/README.md index 111cab5a6..c625384e8 100644 --- a/Translation/README.md +++ b/Translation/README.md @@ -23,13 +23,13 @@ bash launch_tgi_service.sh ```sh cd langchain/docker bash build_docker.sh -docker run -it --name translation_server --net=host --ipc=host -e TGI_ENDPOINT=${TGI_ENDPOINT} -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -e SERVER_PORT=8000 -e http_proxy=${http_proxy} -e https_proxy=${https_proxy} translation:latest bash +docker run -it --name translation_server --net=host --ipc=host -e TGI_ENDPOINT=${TGI_ENDPOINT} -e HF_TOKEN=${HF_TOKEN} -e SERVER_PORT=8000 -e http_proxy=${http_proxy} -e https_proxy=${https_proxy} translation:latest bash ``` **Note**: Set the following parameters before running the above command - `TGI_ENDPOINT`: The endpoint of your TGI service, usually equal to `:`. -- `HUGGINGFACEHUB_API_TOKEN`: Your HuggingFace hub API token, usually generated [here](https://huggingface.co/settings/tokens). +- `HF_TOKEN`: Your HuggingFace hub API token, usually generated [here](https://huggingface.co/settings/tokens). - `SERVER_PORT`: The port of the Translation service on the host. 3. Quick Test diff --git a/Translation/tests/test_langchain_inference.sh b/Translation/tests/test_langchain_inference.sh index 211986da2..6f4cef4f1 100644 --- a/Translation/tests/test_langchain_inference.sh +++ b/Translation/tests/test_langchain_inference.sh @@ -46,7 +46,7 @@ function launch_langchain_service() { cd langchain/docker docker build . --build-arg http_proxy=${http_proxy} --build-arg https_proxy=${http_proxy} -t intel/gen-ai-examples:${LANGCHAIN_CONTAINER_NAME} - docker run -d --name=${LANGCHAIN_CONTAINER_NAME} --net=host -e TGI_ENDPOINT=http://localhost:8870 -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} \ + docker run -d --name=${LANGCHAIN_CONTAINER_NAME} --net=host -e TGI_ENDPOINT=http://localhost:8870 -e HF_TOKEN=${HF_TOKEN} \ -e SERVER_PORT=${port} -e http_proxy=${http_proxy} -e https_proxy=${https_proxy} --ipc=host intel/gen-ai-examples:${LANGCHAIN_CONTAINER_NAME} sleep 2m }