From 9d3bc0e00c949973c4accde5af98f003e682e09b Mon Sep 17 00:00:00 2001
From: lvliang-intel <liang1.lv@intel.com>
Date: Thu, 30 May 2024 16:04:59 +0800
Subject: [PATCH] Fix huggingface hub token environment variable (#214)

Signed-off-by: lvliang-intel <liang1.lv@intel.com>
---
 .github/workflows/AudioQnA.yml                         |  6 +++---
 .github/workflows/E2E_test_with_compose.yml            |  2 +-
 .github/workflows/SearchQnA.yml                        |  2 +-
 .github/workflows/Translation.yml                      |  2 +-
 .github/workflows/VisualQnA.yml                        |  2 +-
 AudioQnA/README.md                                     | 10 +++++-----
 AudioQnA/langchain/docker/docker-compose.yml           |  2 +-
 AudioQnA/serving/tgi_gaudi/README.md                   |  4 ++--
 AudioQnA/tests/test_langchain_inference.sh             |  2 +-
 ChatQnA/chatqna.yaml                                   |  2 +-
 ChatQnA/deprecated/README.md                           | 10 +++++-----
 ChatQnA/deprecated/deployment/nginx/.env               |  2 +-
 .../langchain/docker/docker-compose-qdrant.yml         |  2 +-
 ChatQnA/deprecated/langchain/docker/docker-compose.yml |  2 +-
 .../deprecated/langchain/test/end_to_end_rag_test.py   |  2 +-
 ChatQnA/deprecated/serving/tgi_gaudi/README.md         |  4 ++--
 ChatQnA/deprecated/tests/test_langchain_inference.sh   |  2 +-
 ChatQnA/docker/gaudi/README.md                         |  2 +-
 ChatQnA/docker/gaudi/docker_compose.yaml               |  4 ++--
 ChatQnA/docker/xeon/README.md                          |  2 +-
 ChatQnA/docker/xeon/docker_compose.yaml                |  4 ++--
 ChatQnA/kubernetes/manifests/README.md                 |  2 +-
 ChatQnA/kubernetes/manifests/qna_configmap_gaudi.yaml  |  2 +-
 ChatQnA/kubernetes/manifests/qna_configmap_xeon.yaml   |  2 +-
 ChatQnA/tests/test_chatqna_on_gaudi.sh                 |  2 +-
 ChatQnA/tests/test_chatqna_on_xeon.sh                  |  2 +-
 CodeGen/codegen.yaml                                   |  4 ++--
 CodeGen/deprecated/README.md                           |  4 ++--
 CodeGen/deprecated/tests/test_codegen_inference.sh     |  2 +-
 CodeGen/docker/gaudi/README.md                         |  2 +-
 CodeGen/docker/gaudi/docker_compose.yaml               |  2 +-
 CodeGen/docker/xeon/README.md                          |  2 +-
 CodeGen/docker/xeon/docker_compose.yaml                |  4 ++--
 CodeGen/kubernetes/manifests/README.md                 | 10 +++++-----
 CodeGen/kubernetes/manifests/gaudi/codegen.yaml        |  2 +-
 CodeGen/kubernetes/manifests/xeon/codegen.yaml         |  2 +-
 CodeGen/tests/test_codegen_on_gaudi.sh                 |  2 +-
 CodeGen/tests/test_codegen_on_xeon.sh                  |  2 +-
 CodeTrans/codetrans.yaml                               |  4 ++--
 CodeTrans/deprecated/README.md                         |  4 ++--
 CodeTrans/docker/gaudi/README.md                       |  2 +-
 CodeTrans/docker/gaudi/docker_compose.yaml             |  2 +-
 CodeTrans/docker/xeon/README.md                        |  2 +-
 CodeTrans/docker/xeon/docker_compose.yaml              |  2 +-
 CodeTrans/tests/test_codetrans_on_gaudi.sh             |  2 +-
 CodeTrans/tests/test_codetrans_on_xeon.sh              |  2 +-
 DocSum/deprecated/README.md                            |  8 ++++----
 DocSum/deprecated/tests/test_langchain_inference.sh    |  2 +-
 DocSum/docker/gaudi/README.md                          |  2 +-
 DocSum/docker/gaudi/docker_compose.yaml                |  4 ++--
 DocSum/docker/xeon/README.md                           |  2 +-
 DocSum/docker/xeon/docker_compose.yaml                 |  4 ++--
 DocSum/docsum.yaml                                     |  4 ++--
 DocSum/tests/test_docsum_on_gaudi.sh                   |  2 +-
 DocSum/tests/test_docsum_on_xeon.sh                    |  2 +-
 SearchQnA/README.md                                    |  4 ++--
 SearchQnA/tests/test_langchain_inference.sh            |  2 +-
 Translation/README.md                                  |  4 ++--
 Translation/tests/test_langchain_inference.sh          |  2 +-
 59 files changed, 90 insertions(+), 90 deletions(-)

diff --git a/.github/workflows/AudioQnA.yml b/.github/workflows/AudioQnA.yml
index c02370e83..007853585 100644
--- a/.github/workflows/AudioQnA.yml
+++ b/.github/workflows/AudioQnA.yml
@@ -37,21 +37,21 @@ jobs:
 
       - name: Run Test ASR
         env:
-          HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
         run: |
           cd ${{ github.workspace }}/AudioQnA/tests
           bash test_asr.sh
 
       - name: Run Test TTS
         env:
-          HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
         run: |
           cd ${{ github.workspace }}/AudioQnA/tests
           bash test_tts.sh
 
       - name: Run Test LLM engine
         env:
-          HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
         run: |
           cd ${{ github.workspace }}/AudioQnA/tests
           bash test_${{ matrix.job_name }}_inference.sh
diff --git a/.github/workflows/E2E_test_with_compose.yml b/.github/workflows/E2E_test_with_compose.yml
index 8be309367..e9bd065ae 100644
--- a/.github/workflows/E2E_test_with_compose.yml
+++ b/.github/workflows/E2E_test_with_compose.yml
@@ -76,7 +76,7 @@ jobs:
 
       - name: Run test
         env:
-          HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
           example: ${{ matrix.example }}
           hardware: ${{ matrix.hardware }}
         run: |
diff --git a/.github/workflows/SearchQnA.yml b/.github/workflows/SearchQnA.yml
index aaeaf962d..dffdb7bd2 100644
--- a/.github/workflows/SearchQnA.yml
+++ b/.github/workflows/SearchQnA.yml
@@ -36,7 +36,7 @@ jobs:
           ref: "refs/pull/${{ github.event.number }}/merge"
       - name: Run Test
         env:
-          HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
           GOOGLE_CSE_ID: ${{ secrets.GOOGLE_CSE_ID }}
           GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
           AISE_GAUDI_00_IP: ${{ secrets.AISE_GAUDI_00_IP }}
diff --git a/.github/workflows/Translation.yml b/.github/workflows/Translation.yml
index c4b867dbc..b54a58348 100644
--- a/.github/workflows/Translation.yml
+++ b/.github/workflows/Translation.yml
@@ -37,7 +37,7 @@ jobs:
 
       - name: Run Test
         env:
-          HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
         run: |
           cd ${{ github.workspace }}/Translation/tests
           bash test_${{ matrix.job_name }}_inference.sh
diff --git a/.github/workflows/VisualQnA.yml b/.github/workflows/VisualQnA.yml
index da0194829..5e68a57ea 100644
--- a/.github/workflows/VisualQnA.yml
+++ b/.github/workflows/VisualQnA.yml
@@ -37,7 +37,7 @@ jobs:
 
       - name: Run Test
         env:
-          HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
         run: |
           cd ${{ github.workspace }}/VisualQnA/tests
           bash test_${{ matrix.job_name }}_inference.sh
diff --git a/AudioQnA/README.md b/AudioQnA/README.md
index 9d9d56f1f..722a9fe9b 100644
--- a/AudioQnA/README.md
+++ b/AudioQnA/README.md
@@ -83,12 +83,12 @@ bash ./serving/tgi_gaudi/build_docker.sh
 bash ./serving/tgi_gaudi/launch_tgi_service.sh
 ```
 
-For gated models such as `LLAMA-2`, you will have to pass -e HUGGING_FACE_HUB_TOKEN=\<token\> to the docker run command above with a valid Hugging Face Hub read token.
+For gated models such as `LLAMA-2`, you will have to pass -e HF_TOKEN=\<token\> to the docker run command above with a valid Hugging Face Hub read token.
 
-Please follow this link [huggingface token](https://huggingface.co/docs/hub/security-tokens) to get the access token and export `HUGGINGFACEHUB_API_TOKEN` environment with the token.
+Please follow this link [huggingface token](https://huggingface.co/docs/hub/security-tokens) to get the access token and export `HF_TOKEN` environment with the token.
 
 ```bash
-export HUGGINGFACEHUB_API_TOKEN=<token>
+export HF_TOKEN=<token>
 ```
 
 ### Launch a local server instance on 8 Gaudi cards:
@@ -147,7 +147,7 @@ Note: If you want to integrate the TEI service into the LangChain application, y
 
 ## Launch Redis and LangChain Backend Service
 
-Update the `HUGGINGFACEHUB_API_TOKEN` environment variable with your huggingface token in the `docker-compose.yml`
+Update the `HF_TOKEN` environment variable with your huggingface token in the `docker-compose.yml`
 
 ```bash
 cd langchain/docker
@@ -180,7 +180,7 @@ We offer content moderation support utilizing Meta's [Llama Guard](https://huggi
 ```bash
 volume=$PWD/data
 model_id="meta-llama/LlamaGuard-7b"
-docker run -p 8088:80 -v $volume:/data --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HUGGING_FACE_HUB_TOKEN=<your HuggingFace token> -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy tgi_gaudi --model-id $model_id
+docker run -p 8088:80 -v $volume:/data --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HF_TOKEN=<your HuggingFace token> -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy tgi_gaudi --model-id $model_id
 export SAFETY_GUARD_ENDPOINT="http://xxx.xxx.xxx.xxx:8088"
 ```
 
diff --git a/AudioQnA/langchain/docker/docker-compose.yml b/AudioQnA/langchain/docker/docker-compose.yml
index ac8e34742..daa3c6cee 100644
--- a/AudioQnA/langchain/docker/docker-compose.yml
+++ b/AudioQnA/langchain/docker/docker-compose.yml
@@ -28,7 +28,7 @@ services:
     container_name: qna-rag-redis-server
     environment:
       - https_proxy=${https_proxy}
-      - HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+      - HF_TOKEN=${HF_TOKEN}
       - "REDIS_PORT=6379"
       - "EMBED_MODEL=BAAI/bge-base-en-v1.5"
       - "REDIS_SCHEMA=schema_dim_768.yml"
diff --git a/AudioQnA/serving/tgi_gaudi/README.md b/AudioQnA/serving/tgi_gaudi/README.md
index c9a8d510e..ecbc10c40 100644
--- a/AudioQnA/serving/tgi_gaudi/README.md
+++ b/AudioQnA/serving/tgi_gaudi/README.md
@@ -79,11 +79,11 @@ For the System Management Interface Tool please check [hl-smi](https://docs.haba
 ## Docker command for 70B model
 
 ```bash
-docker run -p 8080:80 -v $volume:/data --runtime=habana -e HUGGING_FACE_HUB_TOKEN=$HUGGINGFACEHUB_API_TOKEN -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e HABANA_VISIBLE_DEVICES="6,7,4,5" -e HABANA_VISIBLE_MODULES="0,1,2,3" -e BATCH_BUCKET_SIZE=22 -e PREFILL_BATCH_BUCKET_SIZE=1 -e MAX_BATCH_PREFILL_TOKENS=5102 -e MAX_BATCH_TOTAL_TOKENS=32256 -e MAX_INPUT_LENGTH=1024 -e PAD_SEQUENCE_TO_MULTIPLE_OF=1024 -e MAX_WAITING_TOKENS=5 -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host tgi_gaudi --model-id $model --sharded true --num-shard 4
+docker run -p 8080:80 -v $volume:/data --runtime=habana -e HF_TOKEN=$HF_TOKEN -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e HABANA_VISIBLE_DEVICES="6,7,4,5" -e HABANA_VISIBLE_MODULES="0,1,2,3" -e BATCH_BUCKET_SIZE=22 -e PREFILL_BATCH_BUCKET_SIZE=1 -e MAX_BATCH_PREFILL_TOKENS=5102 -e MAX_BATCH_TOTAL_TOKENS=32256 -e MAX_INPUT_LENGTH=1024 -e PAD_SEQUENCE_TO_MULTIPLE_OF=1024 -e MAX_WAITING_TOKENS=5 -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host tgi_gaudi --model-id $model --sharded true --num-shard 4
 ```
 
 ## Docker command for 13B model
 
 ```bash
-docker run -p 8080:80 -v $volume:/data --runtime=habana -e HUGGING_FACE_HUB_TOKEN=$HUGGINGFACEHUB_API_TOKEN -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e PAD_SEQUENCE_TO_MULTIPLE_OF=128  -e HABANA_VISIBLE_DEVICES="4" -e BATCH_BUCKET_SIZE=16 -e PREFILL_BATCH_BUCKET_SIZE=1 -e MAX_BATCH_PREFILL_TOKENS=4096 -e MAX_BATCH_TOTAL_TOKENS=18432 -e PAD_SEQUENCE_TO_MULTIPLE_OF=1024 -e MAX_INPUT_LENGTH=1024 -e MAX_TOTAL_TOKENS=1152  -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host tgi_gaudi --model-id $model
+docker run -p 8080:80 -v $volume:/data --runtime=habana -e HF_TOKEN=$HF_TOKEN -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e PAD_SEQUENCE_TO_MULTIPLE_OF=128  -e HABANA_VISIBLE_DEVICES="4" -e BATCH_BUCKET_SIZE=16 -e PREFILL_BATCH_BUCKET_SIZE=1 -e MAX_BATCH_PREFILL_TOKENS=4096 -e MAX_BATCH_TOTAL_TOKENS=18432 -e PAD_SEQUENCE_TO_MULTIPLE_OF=1024 -e MAX_INPUT_LENGTH=1024 -e MAX_TOTAL_TOKENS=1152  -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host tgi_gaudi --model-id $model
 ```
diff --git a/AudioQnA/tests/test_langchain_inference.sh b/AudioQnA/tests/test_langchain_inference.sh
index 960fc75fc..57303e7c7 100644
--- a/AudioQnA/tests/test_langchain_inference.sh
+++ b/AudioQnA/tests/test_langchain_inference.sh
@@ -52,7 +52,7 @@ function launch_tgi_gaudi_service() {
 
 function launch_redis_and_langchain_service() {
     cd $WORKPATH
-    export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+    export HF_TOKEN=${HF_TOKEN}
     local port=8890
     sed -i "s/port=8000/port=$port/g" langchain/docker/qna-app/app/server.py
     docker compose -f langchain/docker/docker-compose.yml up -d --build
diff --git a/ChatQnA/chatqna.yaml b/ChatQnA/chatqna.yaml
index 276effa8c..bd5698bc8 100644
--- a/ChatQnA/chatqna.yaml
+++ b/ChatQnA/chatqna.yaml
@@ -66,7 +66,7 @@ opea_micro_services:
       - SYS_NICE
     ipc: host
     environment:
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
       HABANA_VISIBLE_DEVICES: all
       OMPI_MCA_btl_vader_single_copy_mechanism: none
     model-id: ${LLM_MODEL_ID}
diff --git a/ChatQnA/deprecated/README.md b/ChatQnA/deprecated/README.md
index 985f7cd29..40a4b0605 100644
--- a/ChatQnA/deprecated/README.md
+++ b/ChatQnA/deprecated/README.md
@@ -51,12 +51,12 @@ bash ./serving/tgi_gaudi/build_docker.sh
 bash ./serving/tgi_gaudi/launch_tgi_service.sh
 ```
 
-For gated models such as `LLAMA-2`, you will have to pass -e HUGGING_FACE_HUB_TOKEN=\<token\> to the docker run command above with a valid Hugging Face Hub read token.
+For gated models such as `LLAMA-2`, you will have to pass -e HF_TOKEN=\<token\> to the docker run command above with a valid Hugging Face Hub read token.
 
-Please follow this link [huggingface token](https://huggingface.co/docs/hub/security-tokens) to get the access token and export `HUGGINGFACEHUB_API_TOKEN` environment with the token.
+Please follow this link [huggingface token](https://huggingface.co/docs/hub/security-tokens) to get the access token and export `HF_TOKEN` environment with the token.
 
 ```bash
-export HUGGINGFACEHUB_API_TOKEN=<token>
+export HF_TOKEN=<token>
 ```
 
 ### Launch a local server instance on 8 Gaudi cards:
@@ -115,7 +115,7 @@ Note: If you want to integrate the TEI service into the LangChain application, y
 
 ## Launch Vector Database and LangChain Backend Service
 
-Update the `HUGGINGFACEHUB_API_TOKEN` environment variable with your huggingface token in the `docker-compose.yml`
+Update the `HF_TOKEN` environment variable with your huggingface token in the `docker-compose.yml`
 
 By default, Redis is used as the vector store. To use Qdrant, use the `docker-compose-qdrant.yml` file instead.
 
@@ -153,7 +153,7 @@ We offer content moderation support utilizing Meta's [Llama Guard](https://huggi
 ```bash
 volume=$PWD/data
 model_id="meta-llama/LlamaGuard-7b"
-docker run -p 8088:80 -v $volume:/data --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HUGGING_FACE_HUB_TOKEN=<your HuggingFace token> -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy tgi_gaudi --model-id $model_id
+docker run -p 8088:80 -v $volume:/data --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HF_TOKEN=<your HuggingFace token> -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy tgi_gaudi --model-id $model_id
 export SAFETY_GUARD_ENDPOINT="http://xxx.xxx.xxx.xxx:8088"
 ```
 
diff --git a/ChatQnA/deprecated/deployment/nginx/.env b/ChatQnA/deprecated/deployment/nginx/.env
index bc3da51d3..b7d5bbf15 100644
--- a/ChatQnA/deprecated/deployment/nginx/.env
+++ b/ChatQnA/deprecated/deployment/nginx/.env
@@ -1,4 +1,4 @@
-HUGGING_FACE_HUB_TOKEN=<your-hf-token>
+HF_TOKEN=<your-hf-token>
 volume=./data
 model=meta-llama/Llama-2-13b-chat-hf
 MAX_TOTAL_TOKENS=2000
diff --git a/ChatQnA/deprecated/langchain/docker/docker-compose-qdrant.yml b/ChatQnA/deprecated/langchain/docker/docker-compose-qdrant.yml
index 53c4f8ce1..3ba901130 100644
--- a/ChatQnA/deprecated/langchain/docker/docker-compose-qdrant.yml
+++ b/ChatQnA/deprecated/langchain/docker/docker-compose-qdrant.yml
@@ -30,7 +30,7 @@ services:
     container_name: qna-rag-qdrant-server
     environment:
       - https_proxy=${https_proxy}
-      - HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+      - HF_TOKEN=${HF_TOKEN}
       - "EMBED_MODEL=BAAI/bge-base-en-v1.5"
       - "VECTOR_DATABASE=QDRANT"
       - "TGI_LLM_ENDPOINT=http://localhost:8080"
diff --git a/ChatQnA/deprecated/langchain/docker/docker-compose.yml b/ChatQnA/deprecated/langchain/docker/docker-compose.yml
index e12e7557d..6593ef8d3 100644
--- a/ChatQnA/deprecated/langchain/docker/docker-compose.yml
+++ b/ChatQnA/deprecated/langchain/docker/docker-compose.yml
@@ -38,7 +38,7 @@ services:
       - socks_proxy=${socks_proxy}
       - FTP_PROXY=${FTP_PROXY}
       - ftp_proxy=${ftp_proxy}
-      - HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+      - HF_TOKEN=${HF_TOKEN}
       - CONFLUENCE_ACCESS_TOKEN=${CONFLUENCE_ACCESS_TOKEN}
       - "REDIS_PORT=6379"
       - "EMBED_MODEL=BAAI/bge-base-en-v1.5"
diff --git a/ChatQnA/deprecated/langchain/test/end_to_end_rag_test.py b/ChatQnA/deprecated/langchain/test/end_to_end_rag_test.py
index bfaff3124..e4d2d28f6 100644
--- a/ChatQnA/deprecated/langchain/test/end_to_end_rag_test.py
+++ b/ChatQnA/deprecated/langchain/test/end_to_end_rag_test.py
@@ -242,7 +242,7 @@ if __name__ == "__main__":
     tokenizer = AutoTokenizer.from_pretrained(args.model_name)
     os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
     os.environ["LANGCHAIN_API_KEY"] = args.langchain_token
-    os.environ["HUGGINGFACEHUB_API_TOKEN"] = args.huggingface_token
+    os.environ["HF_TOKEN"] = args.huggingface_token
 
     chain = buildchain(args)
     run_test(args, chain)
diff --git a/ChatQnA/deprecated/serving/tgi_gaudi/README.md b/ChatQnA/deprecated/serving/tgi_gaudi/README.md
index c9a8d510e..ecbc10c40 100644
--- a/ChatQnA/deprecated/serving/tgi_gaudi/README.md
+++ b/ChatQnA/deprecated/serving/tgi_gaudi/README.md
@@ -79,11 +79,11 @@ For the System Management Interface Tool please check [hl-smi](https://docs.haba
 ## Docker command for 70B model
 
 ```bash
-docker run -p 8080:80 -v $volume:/data --runtime=habana -e HUGGING_FACE_HUB_TOKEN=$HUGGINGFACEHUB_API_TOKEN -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e HABANA_VISIBLE_DEVICES="6,7,4,5" -e HABANA_VISIBLE_MODULES="0,1,2,3" -e BATCH_BUCKET_SIZE=22 -e PREFILL_BATCH_BUCKET_SIZE=1 -e MAX_BATCH_PREFILL_TOKENS=5102 -e MAX_BATCH_TOTAL_TOKENS=32256 -e MAX_INPUT_LENGTH=1024 -e PAD_SEQUENCE_TO_MULTIPLE_OF=1024 -e MAX_WAITING_TOKENS=5 -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host tgi_gaudi --model-id $model --sharded true --num-shard 4
+docker run -p 8080:80 -v $volume:/data --runtime=habana -e HF_TOKEN=$HF_TOKEN -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e HABANA_VISIBLE_DEVICES="6,7,4,5" -e HABANA_VISIBLE_MODULES="0,1,2,3" -e BATCH_BUCKET_SIZE=22 -e PREFILL_BATCH_BUCKET_SIZE=1 -e MAX_BATCH_PREFILL_TOKENS=5102 -e MAX_BATCH_TOTAL_TOKENS=32256 -e MAX_INPUT_LENGTH=1024 -e PAD_SEQUENCE_TO_MULTIPLE_OF=1024 -e MAX_WAITING_TOKENS=5 -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host tgi_gaudi --model-id $model --sharded true --num-shard 4
 ```
 
 ## Docker command for 13B model
 
 ```bash
-docker run -p 8080:80 -v $volume:/data --runtime=habana -e HUGGING_FACE_HUB_TOKEN=$HUGGINGFACEHUB_API_TOKEN -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e PAD_SEQUENCE_TO_MULTIPLE_OF=128  -e HABANA_VISIBLE_DEVICES="4" -e BATCH_BUCKET_SIZE=16 -e PREFILL_BATCH_BUCKET_SIZE=1 -e MAX_BATCH_PREFILL_TOKENS=4096 -e MAX_BATCH_TOTAL_TOKENS=18432 -e PAD_SEQUENCE_TO_MULTIPLE_OF=1024 -e MAX_INPUT_LENGTH=1024 -e MAX_TOTAL_TOKENS=1152  -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host tgi_gaudi --model-id $model
+docker run -p 8080:80 -v $volume:/data --runtime=habana -e HF_TOKEN=$HF_TOKEN -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e PAD_SEQUENCE_TO_MULTIPLE_OF=128  -e HABANA_VISIBLE_DEVICES="4" -e BATCH_BUCKET_SIZE=16 -e PREFILL_BATCH_BUCKET_SIZE=1 -e MAX_BATCH_PREFILL_TOKENS=4096 -e MAX_BATCH_TOTAL_TOKENS=18432 -e PAD_SEQUENCE_TO_MULTIPLE_OF=1024 -e MAX_INPUT_LENGTH=1024 -e MAX_TOTAL_TOKENS=1152  -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host tgi_gaudi --model-id $model
 ```
diff --git a/ChatQnA/deprecated/tests/test_langchain_inference.sh b/ChatQnA/deprecated/tests/test_langchain_inference.sh
index 1f520a445..ea8fde32d 100644
--- a/ChatQnA/deprecated/tests/test_langchain_inference.sh
+++ b/ChatQnA/deprecated/tests/test_langchain_inference.sh
@@ -52,7 +52,7 @@ function launch_tgi_gaudi_service() {
 
 function launch_redis_and_langchain_service() {
     cd $WORKPATH
-    export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+    export HF_TOKEN=${HF_TOKEN}
     local port=8890
     sed -i "s/port=8000/port=$port/g" langchain/docker/qna-app/app/server.py
     docker compose -f langchain/docker/docker-compose.yml up -d --build
diff --git a/ChatQnA/docker/gaudi/README.md b/ChatQnA/docker/gaudi/README.md
index 43ad533d4..4bb405919 100644
--- a/ChatQnA/docker/gaudi/README.md
+++ b/ChatQnA/docker/gaudi/README.md
@@ -100,7 +100,7 @@ export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
 export TGI_LLM_ENDPOINT="http://${host_ip}:8008"
 export REDIS_URL="redis://${host_ip}:6379"
 export INDEX_NAME="rag-redis"
-export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
+export HF_TOKEN=${your_hf_api_token}
 export MEGA_SERVICE_HOST_IP=${host_ip}
 export EMBEDDING_SERVICE_HOST_IP=${host_ip}
 export RETRIEVER_SERVICE_HOST_IP=${host_ip}
diff --git a/ChatQnA/docker/gaudi/docker_compose.yaml b/ChatQnA/docker/gaudi/docker_compose.yaml
index f7fd02d13..b142124fe 100644
--- a/ChatQnA/docker/gaudi/docker_compose.yaml
+++ b/ChatQnA/docker/gaudi/docker_compose.yaml
@@ -107,7 +107,7 @@ services:
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
       TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
       LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
       LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
       LANGCHAIN_PROJECT: "opea-reranking-service"
@@ -141,7 +141,7 @@ services:
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
       TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
       LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
       LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
       LANGCHAIN_PROJECT: "opea-llm-service"
diff --git a/ChatQnA/docker/xeon/README.md b/ChatQnA/docker/xeon/README.md
index 5ba2f1707..5949e43c9 100644
--- a/ChatQnA/docker/xeon/README.md
+++ b/ChatQnA/docker/xeon/README.md
@@ -158,7 +158,7 @@ export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
 export TGI_LLM_ENDPOINT="http://${host_ip}:9009"
 export REDIS_URL="redis://${host_ip}:6379"
 export INDEX_NAME="rag-redis"
-export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
+export HF_TOKEN=${your_hf_api_token}
 export MEGA_SERVICE_HOST_IP=${host_ip}
 export EMBEDDING_SERVICE_HOST_IP=${host_ip}
 export RETRIEVER_SERVICE_HOST_IP=${host_ip}
diff --git a/ChatQnA/docker/xeon/docker_compose.yaml b/ChatQnA/docker/xeon/docker_compose.yaml
index af4629443..be512b492 100644
--- a/ChatQnA/docker/xeon/docker_compose.yaml
+++ b/ChatQnA/docker/xeon/docker_compose.yaml
@@ -103,7 +103,7 @@ services:
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
       TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
       LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
       LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
       LANGCHAIN_PROJECT: "opea-reranking-service"
@@ -132,7 +132,7 @@ services:
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
       TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
       LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
       LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
       LANGCHAIN_PROJECT: "opea-llm-service"
diff --git a/ChatQnA/kubernetes/manifests/README.md b/ChatQnA/kubernetes/manifests/README.md
index 997238738..32541a5c4 100644
--- a/ChatQnA/kubernetes/manifests/README.md
+++ b/ChatQnA/kubernetes/manifests/README.md
@@ -26,7 +26,7 @@ For Gaudi:
 
 > [NOTE]
 
-- Be sure to modify HUGGINGFACEHUB_API_TOKEN and other important values in qna_configmap_guadi.yaml and qna_configmap_xeon.yaml
+- Be sure to modify HF_TOKEN and other important values in qna_configmap_guadi.yaml and qna_configmap_xeon.yaml
 - Be sure the node has path /mnt/models to store all the models
 
 ### Deploy
diff --git a/ChatQnA/kubernetes/manifests/qna_configmap_gaudi.yaml b/ChatQnA/kubernetes/manifests/qna_configmap_gaudi.yaml
index dbd67872c..f0902a4eb 100644
--- a/ChatQnA/kubernetes/manifests/qna_configmap_gaudi.yaml
+++ b/ChatQnA/kubernetes/manifests/qna_configmap_gaudi.yaml
@@ -14,7 +14,7 @@ data:
   TGI_LLM_ENDPOINT: "http://tgi-gaudi-svc.default.svc.cluster.local:9009"
   REDIS_URL: "redis://redis-vector-db.default.svc.cluster.local:6379"
   INDEX_NAME: "rag-redis"
-  HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+  HF_TOKEN: ${HF_TOKEN}
   EMBEDDING_SERVICE_HOST_IP: embedding-svc
   RETRIEVER_SERVICE_HOST_IP: retriever-svc
   RERANK_SERVICE_HOST_IP: reranking-svc
diff --git a/ChatQnA/kubernetes/manifests/qna_configmap_xeon.yaml b/ChatQnA/kubernetes/manifests/qna_configmap_xeon.yaml
index b5dd22303..6c6a17225 100644
--- a/ChatQnA/kubernetes/manifests/qna_configmap_xeon.yaml
+++ b/ChatQnA/kubernetes/manifests/qna_configmap_xeon.yaml
@@ -14,7 +14,7 @@ data:
   TGI_LLM_ENDPOINT: "http://tgi-svc.default.svc.cluster.local:9009"
   REDIS_URL: "redis://redis-vector-db.default.svc.cluster.local:6379"
   INDEX_NAME: "rag-redis"
-  HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+  HF_TOKEN: ${HF_TOKEN}
   EMBEDDING_SERVICE_HOST_IP: embedding-svc
   RETRIEVER_SERVICE_HOST_IP: retriever-svc
   RERANK_SERVICE_HOST_IP: reranking-svc
diff --git a/ChatQnA/tests/test_chatqna_on_gaudi.sh b/ChatQnA/tests/test_chatqna_on_gaudi.sh
index f2ae74535..d14e368ab 100644
--- a/ChatQnA/tests/test_chatqna_on_gaudi.sh
+++ b/ChatQnA/tests/test_chatqna_on_gaudi.sh
@@ -47,7 +47,7 @@ function start_services() {
     export TGI_LLM_ENDPOINT="http://${ip_address}:8008"
     export REDIS_URL="redis://${ip_address}:6379"
     export INDEX_NAME="rag-redis"
-    export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+    export HF_TOKEN=${HF_TOKEN}
     export MEGA_SERVICE_HOST_IP=${ip_address}
     export EMBEDDING_SERVICE_HOST_IP=${ip_address}
     export RETRIEVER_SERVICE_HOST_IP=${ip_address}
diff --git a/ChatQnA/tests/test_chatqna_on_xeon.sh b/ChatQnA/tests/test_chatqna_on_xeon.sh
index 3d8bfb4ab..0c6036cb0 100644
--- a/ChatQnA/tests/test_chatqna_on_xeon.sh
+++ b/ChatQnA/tests/test_chatqna_on_xeon.sh
@@ -39,7 +39,7 @@ function start_services() {
     export TGI_LLM_ENDPOINT="http://${ip_address}:9009"
     export REDIS_URL="redis://${ip_address}:6379"
     export INDEX_NAME="rag-redis"
-    export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+    export HF_TOKEN=${HF_TOKEN}
     export MEGA_SERVICE_HOST_IP=${ip_address}
     export EMBEDDING_SERVICE_HOST_IP=${ip_address}
     export RETRIEVER_SERVICE_HOST_IP=${ip_address}
diff --git a/CodeGen/codegen.yaml b/CodeGen/codegen.yaml
index d5349c2c2..d572ee638 100644
--- a/CodeGen/codegen.yaml
+++ b/CodeGen/codegen.yaml
@@ -24,7 +24,7 @@ opea_micro_services:
       - SYS_NICE
     ipc: host
     environment:
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
       HABANA_VISIBLE_DEVICES: all
       OMPI_MCA_btl_vader_single_copy_mechanism: none
     model-id: ${LLM_MODEL_ID}
@@ -35,7 +35,7 @@ opea_micro_services:
     endpoint: /v1/chat/completions
     environment:
       TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
   ui:
     host: ${UI_SERVICE_HOST_IP}
     ports:
diff --git a/CodeGen/deprecated/README.md b/CodeGen/deprecated/README.md
index d79bcc19d..2d2050664 100644
--- a/CodeGen/deprecated/README.md
+++ b/CodeGen/deprecated/README.md
@@ -86,10 +86,10 @@ docker run -it -e http_proxy=${http_proxy} -e https_proxy=${https_proxy} --net=h
 
 Make sure TGI-Gaudi service is running and also make sure data is populated into Redis. Launch the backend service:
 
-Please follow this link [huggingface token](https://huggingface.co/docs/hub/security-tokens) to get the access token and export `HUGGINGFACEHUB_API_TOKEN` environment with the token.
+Please follow this link [huggingface token](https://huggingface.co/docs/hub/security-tokens) to get the access token and export `HF_TOKEN` environment with the token.
 
 ```bash
-export HUGGINGFACEHUB_API_TOKEN=<token>
+export HF_TOKEN=<token>
 nohup python server.py &
 ```
 
diff --git a/CodeGen/deprecated/tests/test_codegen_inference.sh b/CodeGen/deprecated/tests/test_codegen_inference.sh
index 5a7c2a59d..0cb1a1c7e 100644
--- a/CodeGen/deprecated/tests/test_codegen_inference.sh
+++ b/CodeGen/deprecated/tests/test_codegen_inference.sh
@@ -63,7 +63,7 @@ function launch_server() {
 
     # Start the Backend Service
     docker exec $COPILOT_CONTAINER_NAME \
-        bash -c "export HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN;nohup python server.py &"
+        bash -c "export HF_TOKEN=$HF_TOKEN;nohup python server.py &"
     sleep 1m
 }
 
diff --git a/CodeGen/docker/gaudi/README.md b/CodeGen/docker/gaudi/README.md
index 0d6931f6c..574298d66 100644
--- a/CodeGen/docker/gaudi/README.md
+++ b/CodeGen/docker/gaudi/README.md
@@ -55,7 +55,7 @@ export http_proxy=${your_http_proxy}
 export https_proxy=${your_http_proxy}
 export LLM_MODEL_ID="meta-llama/CodeLlama-7b-hf"
 export TGI_LLM_ENDPOINT="http://${host_ip}:8028"
-export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
+export HF_TOKEN=${your_hf_api_token}
 export MEGA_SERVICE_HOST_IP=${host_ip}
 export LLM_SERVICE_HOST_IP=${host_ip}
 export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:6666/v1/codegen"
diff --git a/CodeGen/docker/gaudi/docker_compose.yaml b/CodeGen/docker/gaudi/docker_compose.yaml
index 9ce810956..223fec5c5 100644
--- a/CodeGen/docker/gaudi/docker_compose.yaml
+++ b/CodeGen/docker/gaudi/docker_compose.yaml
@@ -44,7 +44,7 @@ services:
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
       TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
       LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
       LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
       LANGCHAIN_PROJECT: "opea-llm-service"
diff --git a/CodeGen/docker/xeon/README.md b/CodeGen/docker/xeon/README.md
index 80d3156d5..03de03850 100644
--- a/CodeGen/docker/xeon/README.md
+++ b/CodeGen/docker/xeon/README.md
@@ -63,7 +63,7 @@ export http_proxy=${your_http_proxy}
 export https_proxy=${your_http_proxy}
 export LLM_MODEL_ID="meta-llama/CodeLlama-7b-hf"
 export TGI_LLM_ENDPOINT="http://${host_ip}:8028"
-export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
+export HF_TOKEN=${your_hf_api_token}
 export MEGA_SERVICE_HOST_IP=${host_ip}
 export LLM_SERVICE_HOST_IP=${host_ip}
 export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:6666/v1/codegen"
diff --git a/CodeGen/docker/xeon/docker_compose.yaml b/CodeGen/docker/xeon/docker_compose.yaml
index d47f37a7b..f3cd67be0 100644
--- a/CodeGen/docker/xeon/docker_compose.yaml
+++ b/CodeGen/docker/xeon/docker_compose.yaml
@@ -26,7 +26,7 @@ services:
     environment:
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
     command: --model-id ${LLM_MODEL_ID}
   llm:
     image: opea/llm-tgi:latest
@@ -40,7 +40,7 @@ services:
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
       TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
       LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
       LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
       LANGCHAIN_PROJECT: "opea-llm-service"
diff --git a/CodeGen/kubernetes/manifests/README.md b/CodeGen/kubernetes/manifests/README.md
index b7ca064e9..352ab1baf 100644
--- a/CodeGen/kubernetes/manifests/README.md
+++ b/CodeGen/kubernetes/manifests/README.md
@@ -2,15 +2,15 @@
 
 > [NOTE]
 > The following values must be set before you can deploy:
-> HUGGINGFACEHUB_API_TOKEN
+> HF_TOKEN
 > You can also customize the "MODEL_ID" and "model-volume"
 
 ## Deploy On Xeon
 
 ```
 cd GenAIExamples/CodeGen/kubernetes/manifests/xeon
-export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
-sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" codegen.yaml
+export HF_TOKEN="YourOwnToken"
+sed -i "s/insert-your-huggingface-token-here/${HF_TOKEN}/g" codegen.yaml
 kubectl apply -f codegen.yaml
 ```
 
@@ -18,8 +18,8 @@ kubectl apply -f codegen.yaml
 
 ```
 cd GenAIExamples/CodeGen/kubernetes/manifests/gaudi
-export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
-sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" codegen.yaml
+export HF_TOKEN="YourOwnToken"
+sed -i "s/insert-your-huggingface-token-here/${HF_TOKEN}/g" codegen.yaml
 kubectl apply -f codegen.yaml
 ```
 
diff --git a/CodeGen/kubernetes/manifests/gaudi/codegen.yaml b/CodeGen/kubernetes/manifests/gaudi/codegen.yaml
index e92c64a74..88231e110 100644
--- a/CodeGen/kubernetes/manifests/gaudi/codegen.yaml
+++ b/CodeGen/kubernetes/manifests/gaudi/codegen.yaml
@@ -143,7 +143,7 @@ spec:
           env:
             - name: TGI_LLM_ENDPOINT
               value: "http://codegen-tgi:80"
-            - name: HUGGINGFACEHUB_API_TOKEN
+            - name: HF_TOKEN
               value: "insert-your-huggingface-token-here"
             - name: http_proxy
               value:
diff --git a/CodeGen/kubernetes/manifests/xeon/codegen.yaml b/CodeGen/kubernetes/manifests/xeon/codegen.yaml
index 1887741cf..803dd4879 100644
--- a/CodeGen/kubernetes/manifests/xeon/codegen.yaml
+++ b/CodeGen/kubernetes/manifests/xeon/codegen.yaml
@@ -141,7 +141,7 @@ spec:
           env:
             - name: TGI_LLM_ENDPOINT
               value: "http://codegen-tgi:80"
-            - name: HUGGINGFACEHUB_API_TOKEN
+            - name: HF_TOKEN
               value: "insert-your-huggingface-token-here"
             - name: http_proxy
               value:
diff --git a/CodeGen/tests/test_codegen_on_gaudi.sh b/CodeGen/tests/test_codegen_on_gaudi.sh
index 12af7a393..5d4232ec9 100644
--- a/CodeGen/tests/test_codegen_on_gaudi.sh
+++ b/CodeGen/tests/test_codegen_on_gaudi.sh
@@ -41,7 +41,7 @@ function start_services() {
 
     export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
     export TGI_LLM_ENDPOINT="http://${ip_address}:8028"
-    export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+    export HF_TOKEN=${HF_TOKEN}
     export MEGA_SERVICE_HOST_IP=${ip_address}
     export LLM_SERVICE_HOST_IP=${ip_address}
     export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:6666/v1/codegen"
diff --git a/CodeGen/tests/test_codegen_on_xeon.sh b/CodeGen/tests/test_codegen_on_xeon.sh
index b96d4395e..418277335 100644
--- a/CodeGen/tests/test_codegen_on_xeon.sh
+++ b/CodeGen/tests/test_codegen_on_xeon.sh
@@ -29,7 +29,7 @@ function start_services() {
 
     export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
     export TGI_LLM_ENDPOINT="http://${ip_address}:8028"
-    export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+    export HF_TOKEN=${HF_TOKEN}
     export MEGA_SERVICE_HOST_IP=${ip_address}
     export LLM_SERVICE_HOST_IP=${ip_address}
     export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:6666/v1/codegen"
diff --git a/CodeTrans/codetrans.yaml b/CodeTrans/codetrans.yaml
index b244b8c48..4a6035e15 100644
--- a/CodeTrans/codetrans.yaml
+++ b/CodeTrans/codetrans.yaml
@@ -24,7 +24,7 @@ opea_micro_services:
       - SYS_NICE
     ipc: host
     environment:
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
       HABANA_VISIBLE_DEVICES: all
       OMPI_MCA_btl_vader_single_copy_mechanism: none
     model-id: ${LLM_MODEL_ID}
@@ -35,7 +35,7 @@ opea_micro_services:
     endpoint: /v1/chat/completions
     environment:
       TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
   ui:
     host: ${UI_SERVICE_HOST_IP}
     ports:
diff --git a/CodeTrans/deprecated/README.md b/CodeTrans/deprecated/README.md
index 90b6197fa..4e0ef4e8c 100644
--- a/CodeTrans/deprecated/README.md
+++ b/CodeTrans/deprecated/README.md
@@ -23,13 +23,13 @@ bash launch_tgi_service.sh
 ```sh
 cd langchain/docker
 bash build_docker.sh
-docker run -it --name code_trans_server --net=host --ipc=host -e TGI_ENDPOINT=${TGI ENDPOINT} -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACE_API_TOKEN} -e SERVER_PORT=8000 -e http_proxy=${http_proxy} -e https_proxy=${https_proxy} intel/gen-ai-examples:code-translation bash
+docker run -it --name code_trans_server --net=host --ipc=host -e TGI_ENDPOINT=${TGI ENDPOINT} -e HF_TOKEN=${HUGGINGFACE_API_TOKEN} -e SERVER_PORT=8000 -e http_proxy=${http_proxy} -e https_proxy=${https_proxy} intel/gen-ai-examples:code-translation bash
 ```
 
 Here is the explanation of some of the above parameters:
 
 - `TGI_ENDPOINT`: The endpoint of your TGI service, usually equal to `<ip of your machine>:<port of your TGI service>`.
-- `HUGGINGFACEHUB_API_TOKEN`: Your HuggingFace hub API token, usually generated [here](https://huggingface.co/settings/tokens).
+- `HF_TOKEN`: Your HuggingFace hub API token, usually generated [here](https://huggingface.co/settings/tokens).
 - `SERVER_PORT`: The port of the CodeTranslation service on the host.
 
 3. Quick test
diff --git a/CodeTrans/docker/gaudi/README.md b/CodeTrans/docker/gaudi/README.md
index a28320a1f..9498bb0f5 100755
--- a/CodeTrans/docker/gaudi/README.md
+++ b/CodeTrans/docker/gaudi/README.md
@@ -51,7 +51,7 @@ export http_proxy=${your_http_proxy}
 export https_proxy=${your_http_proxy}
 export LLM_MODEL_ID="HuggingFaceH4/mistral-7b-grok"
 export TGI_LLM_ENDPOINT="http://${host_ip}:8008"
-export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
+export HF_TOKEN=${your_hf_api_token}
 export MEGA_SERVICE_HOST_IP=${host_ip}
 export LLM_SERVICE_HOST_IP=${host_ip}
 export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:7777/v1/codetrans"
diff --git a/CodeTrans/docker/gaudi/docker_compose.yaml b/CodeTrans/docker/gaudi/docker_compose.yaml
index eca8724da..69b1422f9 100644
--- a/CodeTrans/docker/gaudi/docker_compose.yaml
+++ b/CodeTrans/docker/gaudi/docker_compose.yaml
@@ -42,7 +42,7 @@ services:
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
       TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
       LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
       LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
       LANGCHAIN_PROJECT: "opea-llm-service"
diff --git a/CodeTrans/docker/xeon/README.md b/CodeTrans/docker/xeon/README.md
index f8ae59d7e..167ce1018 100755
--- a/CodeTrans/docker/xeon/README.md
+++ b/CodeTrans/docker/xeon/README.md
@@ -59,7 +59,7 @@ export http_proxy=${your_http_proxy}
 export https_proxy=${your_http_proxy}
 export LLM_MODEL_ID="HuggingFaceH4/mistral-7b-grok"
 export TGI_LLM_ENDPOINT="http://${host_ip}:8008"
-export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
+export HF_TOKEN=${your_hf_api_token}
 export MEGA_SERVICE_HOST_IP=${host_ip}
 export LLM_SERVICE_HOST_IP=${host_ip}
 export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:7777/v1/codetrans"
diff --git a/CodeTrans/docker/xeon/docker_compose.yaml b/CodeTrans/docker/xeon/docker_compose.yaml
index febb41231..81f24f989 100644
--- a/CodeTrans/docker/xeon/docker_compose.yaml
+++ b/CodeTrans/docker/xeon/docker_compose.yaml
@@ -37,7 +37,7 @@ services:
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
       TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
       LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
       LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
       LANGCHAIN_PROJECT: "opea-llm-service"
diff --git a/CodeTrans/tests/test_codetrans_on_gaudi.sh b/CodeTrans/tests/test_codetrans_on_gaudi.sh
index 6e4773fac..dee814196 100644
--- a/CodeTrans/tests/test_codetrans_on_gaudi.sh
+++ b/CodeTrans/tests/test_codetrans_on_gaudi.sh
@@ -33,7 +33,7 @@ function start_services() {
     export https_proxy=${http_proxy}
     export LLM_MODEL_ID="HuggingFaceH4/mistral-7b-grok"
     export TGI_LLM_ENDPOINT="http://${ip_address}:8008"
-    export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+    export HF_TOKEN=${HF_TOKEN}
     export MEGA_SERVICE_HOST_IP=${ip_address}
     export LLM_SERVICE_HOST_IP=${ip_address}
     export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:7777/v1/codetrans"
diff --git a/CodeTrans/tests/test_codetrans_on_xeon.sh b/CodeTrans/tests/test_codetrans_on_xeon.sh
index c6bbe09b6..b54fc3aa9 100644
--- a/CodeTrans/tests/test_codetrans_on_xeon.sh
+++ b/CodeTrans/tests/test_codetrans_on_xeon.sh
@@ -30,7 +30,7 @@ function start_services() {
     export https_proxy=${http_proxy}
     export LLM_MODEL_ID="HuggingFaceH4/mistral-7b-grok"
     export TGI_LLM_ENDPOINT="http://${ip_address}:8008"
-    export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+    export HF_TOKEN=${HF_TOKEN}
     export MEGA_SERVICE_HOST_IP=${ip_address}
     export LLM_SERVICE_HOST_IP=${ip_address}
     export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:7777/v1/codetrans"
diff --git a/DocSum/deprecated/README.md b/DocSum/deprecated/README.md
index fff815a94..19cbd1994 100644
--- a/DocSum/deprecated/README.md
+++ b/DocSum/deprecated/README.md
@@ -29,12 +29,12 @@ bash ./serving/tgi_gaudi/build_docker.sh
 bash ./serving/tgi_gaudi/launch_tgi_service.sh
 ```
 
-For gated models such as `LLAMA-2`, you will have to pass -e HUGGING_FACE_HUB_TOKEN=\<token\> to the docker run command above with a valid Hugging Face Hub read token.
+For gated models such as `LLAMA-2`, you will have to pass -e HF_TOKEN=\<token\> to the docker run command above with a valid Hugging Face Hub read token.
 
-Please follow this link [huggingface token](https://huggingface.co/docs/hub/security-tokens) to get the access token and export `HUGGINGFACEHUB_API_TOKEN` environment with the token.
+Please follow this link [huggingface token](https://huggingface.co/docs/hub/security-tokens) to get the access token and export `HF_TOKEN` environment with the token.
 
 ```bash
-export HUGGINGFACEHUB_API_TOKEN=<token>
+export HF_TOKEN=<token>
 ```
 
 ### Launch a local server instance on 8 Gaudi cards:
@@ -80,7 +80,7 @@ docker run -it --net=host --ipc=host -e http_proxy=${http_proxy} -e https_proxy=
 Make sure TGI-Gaudi service is running. Launch the backend service:
 
 ```bash
-export HUGGINGFACEHUB_API_TOKEN=<token>
+export HF_TOKEN=<token>
 nohup python app/server.py &
 ```
 
diff --git a/DocSum/deprecated/tests/test_langchain_inference.sh b/DocSum/deprecated/tests/test_langchain_inference.sh
index ccb1b8886..b65d21fbe 100644
--- a/DocSum/deprecated/tests/test_langchain_inference.sh
+++ b/DocSum/deprecated/tests/test_langchain_inference.sh
@@ -63,7 +63,7 @@ function launch_server() {
 
     # Start the Backend Service
     docker exec $DOCUMENT_SUMMARY_CONTAINER_NAME \
-        bash -c "export HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN;nohup python app/server.py &"
+        bash -c "export HF_TOKEN=$HF_TOKEN;nohup python app/server.py &"
     sleep 1m
 }
 
diff --git a/DocSum/docker/gaudi/README.md b/DocSum/docker/gaudi/README.md
index cbe7e3b0c..3a1635426 100644
--- a/DocSum/docker/gaudi/README.md
+++ b/DocSum/docker/gaudi/README.md
@@ -62,7 +62,7 @@ export http_proxy=${your_http_proxy}
 export https_proxy=${your_http_proxy}
 export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
 export TGI_LLM_ENDPOINT="http://${your_ip}:8008"
-export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
+export HF_TOKEN=${your_hf_api_token}
 export MEGA_SERVICE_HOST_IP=${host_ip}
 export LLM_SERVICE_HOST_IP=${host_ip}
 export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum"
diff --git a/DocSum/docker/gaudi/docker_compose.yaml b/DocSum/docker/gaudi/docker_compose.yaml
index 6640583cf..37681a7be 100644
--- a/DocSum/docker/gaudi/docker_compose.yaml
+++ b/DocSum/docker/gaudi/docker_compose.yaml
@@ -24,7 +24,7 @@ services:
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
       TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
     volumes:
       - "./data:/data"
     runtime: habana
@@ -44,7 +44,7 @@ services:
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
       TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
       LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
       LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
       LANGCHAIN_PROJECT: "opea-llm-service"
diff --git a/DocSum/docker/xeon/README.md b/DocSum/docker/xeon/README.md
index b4ace4c80..8be1d4a6f 100644
--- a/DocSum/docker/xeon/README.md
+++ b/DocSum/docker/xeon/README.md
@@ -63,7 +63,7 @@ export http_proxy=${your_http_proxy}
 export https_proxy=${your_http_proxy}
 export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
 export TGI_LLM_ENDPOINT="http://${your_ip}:8008"
-export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
+export HF_TOKEN=${your_hf_api_token}
 export MEGA_SERVICE_HOST_IP=${host_ip}
 export LLM_SERVICE_HOST_IP=${host_ip}
 export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum"
diff --git a/DocSum/docker/xeon/docker_compose.yaml b/DocSum/docker/xeon/docker_compose.yaml
index 6b214e3c5..4bc253442 100644
--- a/DocSum/docker/xeon/docker_compose.yaml
+++ b/DocSum/docker/xeon/docker_compose.yaml
@@ -24,7 +24,7 @@ services:
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
       TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
     volumes:
       - "./data:/data"
     shm_size: 1g
@@ -41,7 +41,7 @@ services:
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
       TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
       LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
       LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
       LANGCHAIN_PROJECT: "opea-llm-service"
diff --git a/DocSum/docsum.yaml b/DocSum/docsum.yaml
index e65bcf8ab..a632ee6b7 100644
--- a/DocSum/docsum.yaml
+++ b/DocSum/docsum.yaml
@@ -24,7 +24,7 @@ opea_micro_services:
       - SYS_NICE
     ipc: host
     environment:
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
       HABANA_VISIBLE_DEVICES: all
       OMPI_MCA_btl_vader_single_copy_mechanism: none
     model-id: ${LLM_MODEL_ID}
@@ -35,7 +35,7 @@ opea_micro_services:
     endpoint: /v1/chat/completions
     environment:
       TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
   ui:
     host: ${UI_SERVICE_HOST_IP}
     ports:
diff --git a/DocSum/tests/test_docsum_on_gaudi.sh b/DocSum/tests/test_docsum_on_gaudi.sh
index a8686733a..552ea1dd1 100644
--- a/DocSum/tests/test_docsum_on_gaudi.sh
+++ b/DocSum/tests/test_docsum_on_gaudi.sh
@@ -31,7 +31,7 @@ function start_services() {
 
     export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
     export TGI_LLM_ENDPOINT="http://${ip_address}:8008"
-    export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+    export HF_TOKEN=${HF_TOKEN}
     export MEGA_SERVICE_HOST_IP=${ip_address}
     export LLM_SERVICE_HOST_IP=${ip_address}
     export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:8888/v1/docsum"
diff --git a/DocSum/tests/test_docsum_on_xeon.sh b/DocSum/tests/test_docsum_on_xeon.sh
index 68ae5f3f4..5a4bad82e 100644
--- a/DocSum/tests/test_docsum_on_xeon.sh
+++ b/DocSum/tests/test_docsum_on_xeon.sh
@@ -29,7 +29,7 @@ function start_services() {
 
     export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
     export TGI_LLM_ENDPOINT="http://${ip_address}:8008"
-    export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+    export HF_TOKEN=${HF_TOKEN}
     export MEGA_SERVICE_HOST_IP=${ip_address}
     export LLM_SERVICE_HOST_IP=${ip_address}
     export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:8888/v1/docsum"
diff --git a/SearchQnA/README.md b/SearchQnA/README.md
index 3b2b8f25f..e314a4235 100644
--- a/SearchQnA/README.md
+++ b/SearchQnA/README.md
@@ -37,7 +37,7 @@ bash launch_tgi_service.sh
 ```sh
 cd langchain/docker
 docker build . --build-arg http_proxy=${http_proxy} --build-arg https_proxy=${http_proxy}  -t intel/gen-ai-examples:searchqna-gaudi --no-cache
-docker run -e TGI_ENDPOINT=<TGI ENDPOINT> -e GOOGLE_CSE_ID=<GOOGLE CSE ID> -e GOOGLE_API_KEY=<GOOGLE API KEY> -e HUGGINGFACEHUB_API_TOKEN=<HUGGINGFACE API TOKEN> -p 8085:8000 -e http_proxy=$http_proxy -e https_proxy=$https_proxy --runtime=habana -e HABANA_VISIBE_DEVILCES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host intel/gen-ai-examples:searchqna-gaudi
+docker run -e TGI_ENDPOINT=<TGI ENDPOINT> -e GOOGLE_CSE_ID=<GOOGLE CSE ID> -e GOOGLE_API_KEY=<GOOGLE API KEY> -e HF_TOKEN=<HUGGINGFACE API TOKEN> -p 8085:8000 -e http_proxy=$http_proxy -e https_proxy=$https_proxy --runtime=habana -e HABANA_VISIBE_DEVILCES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host intel/gen-ai-examples:searchqna-gaudi
 ```
 
 Here is the explanation of some of the above parameters:
@@ -45,7 +45,7 @@ Here is the explanation of some of the above parameters:
 - `TGI_ENDPOINT`: the endpoint of your TGI service, usually equal to `<ip of your machine>:<port of your TGI service>`
 - `GOOGLE_CSE_ID`: your CSE ID for Google Search Engine, usually generated [here](https://programmablesearchengine.google.com/controlpanel/all)
 - `GOOGLE_API_KEY`: your API key for Google Search Engine, usually generated [here](https://console.cloud.google.com/apis/credentials)
-- `HUGGINGFACEHUB_API_TOKEN`: your HuggingFace hub API token, usually generated [here](https://huggingface.co/settings/tokens)
+- `HF_TOKEN`: your HuggingFace hub API token, usually generated [here](https://huggingface.co/settings/tokens)
 - `-p 8085:8000`: This will map the 8000 port of the SearchQnA service inside the container to the 8085 port on the host
 
 3. Quick test
diff --git a/SearchQnA/tests/test_langchain_inference.sh b/SearchQnA/tests/test_langchain_inference.sh
index 709f350b5..269d09b0a 100644
--- a/SearchQnA/tests/test_langchain_inference.sh
+++ b/SearchQnA/tests/test_langchain_inference.sh
@@ -48,7 +48,7 @@ function launch_langchain_service() {
 
     tgi_ip_name=$(echo $(hostname) | tr '[a-z]-' '[A-Z]_')_$(echo 'IP')
     tgi_ip=$(eval echo '$'$tgi_ip_name)
-    docker run -d --name=${LANGCHAIN_CONTAINER_NAME} -e TGI_ENDPOINT=http://${tgi_ip}:8870 -e GOOGLE_CSE_ID=${GOOGLE_CSE_ID} -e GOOGLE_API_KEY=${GOOGLE_API_KEY} -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} \
+    docker run -d --name=${LANGCHAIN_CONTAINER_NAME} -e TGI_ENDPOINT=http://${tgi_ip}:8870 -e GOOGLE_CSE_ID=${GOOGLE_CSE_ID} -e GOOGLE_API_KEY=${GOOGLE_API_KEY} -e HF_TOKEN=${HF_TOKEN} \
     -p ${port}:8000 --runtime=habana -e HABANA_VISIBE_DEVILCES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host intel/gen-ai-examples:${LANGCHAIN_CONTAINER_NAME}
 
     sleep 2m
diff --git a/Translation/README.md b/Translation/README.md
index 111cab5a6..c625384e8 100644
--- a/Translation/README.md
+++ b/Translation/README.md
@@ -23,13 +23,13 @@ bash launch_tgi_service.sh
 ```sh
 cd langchain/docker
 bash build_docker.sh
-docker run -it --name translation_server --net=host --ipc=host -e TGI_ENDPOINT=${TGI_ENDPOINT} -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -e SERVER_PORT=8000 -e http_proxy=${http_proxy} -e https_proxy=${https_proxy} translation:latest bash
+docker run -it --name translation_server --net=host --ipc=host -e TGI_ENDPOINT=${TGI_ENDPOINT} -e HF_TOKEN=${HF_TOKEN} -e SERVER_PORT=8000 -e http_proxy=${http_proxy} -e https_proxy=${https_proxy} translation:latest bash
 ```
 
 **Note**: Set the following parameters before running the above command
 
 - `TGI_ENDPOINT`: The endpoint of your TGI service, usually equal to `<ip of your machine>:<port of your TGI service>`.
-- `HUGGINGFACEHUB_API_TOKEN`: Your HuggingFace hub API token, usually generated [here](https://huggingface.co/settings/tokens).
+- `HF_TOKEN`: Your HuggingFace hub API token, usually generated [here](https://huggingface.co/settings/tokens).
 - `SERVER_PORT`: The port of the Translation service on the host.
 
 3. Quick Test
diff --git a/Translation/tests/test_langchain_inference.sh b/Translation/tests/test_langchain_inference.sh
index 211986da2..6f4cef4f1 100644
--- a/Translation/tests/test_langchain_inference.sh
+++ b/Translation/tests/test_langchain_inference.sh
@@ -46,7 +46,7 @@ function launch_langchain_service() {
     cd langchain/docker
     docker build . --build-arg http_proxy=${http_proxy} --build-arg https_proxy=${http_proxy} -t intel/gen-ai-examples:${LANGCHAIN_CONTAINER_NAME}
 
-    docker run -d --name=${LANGCHAIN_CONTAINER_NAME} --net=host -e TGI_ENDPOINT=http://localhost:8870 -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} \
+    docker run -d --name=${LANGCHAIN_CONTAINER_NAME} --net=host -e TGI_ENDPOINT=http://localhost:8870 -e HF_TOKEN=${HF_TOKEN} \
     -e SERVER_PORT=${port} -e http_proxy=${http_proxy} -e https_proxy=${https_proxy} --ipc=host intel/gen-ai-examples:${LANGCHAIN_CONTAINER_NAME}
     sleep 2m
 }