Fix huggingface hub token environment variable (#214)

Signed-off-by: lvliang-intel <liang1.lv@intel.com>
2024-05-30 16:04:59 +08:00
parent f23acc0077
commit 9d3bc0e00c
59 changed files with 90 additions and 90 deletions
--- a/.github/workflows/AudioQnA.yml
+++ b/.github/workflows/AudioQnA.yml
@@ -37,21 +37,21 @@ jobs:

      - name: Run Test ASR
        env:
-          HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        run: |
          cd ${{ github.workspace }}/AudioQnA/tests
          bash test_asr.sh

      - name: Run Test TTS
        env:
-          HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        run: |
          cd ${{ github.workspace }}/AudioQnA/tests
          bash test_tts.sh

      - name: Run Test LLM engine
        env:
-          HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        run: |
          cd ${{ github.workspace }}/AudioQnA/tests
          bash test_${{ matrix.job_name }}_inference.sh
--- a/.github/workflows/E2E_test_with_compose.yml
+++ b/.github/workflows/E2E_test_with_compose.yml
@@ -76,7 +76,7 @@ jobs:

      - name: Run test
        env:
-          HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
          example: ${{ matrix.example }}
          hardware: ${{ matrix.hardware }}
        run: |
--- a/.github/workflows/SearchQnA.yml
+++ b/.github/workflows/SearchQnA.yml
@@ -36,7 +36,7 @@ jobs:
          ref: "refs/pull/${{ github.event.number }}/merge"
      - name: Run Test
        env:
-          HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
          GOOGLE_CSE_ID: ${{ secrets.GOOGLE_CSE_ID }}
          GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
          AISE_GAUDI_00_IP: ${{ secrets.AISE_GAUDI_00_IP }}
--- a/.github/workflows/Translation.yml
+++ b/.github/workflows/Translation.yml
@@ -37,7 +37,7 @@ jobs:

      - name: Run Test
        env:
-          HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        run: |
          cd ${{ github.workspace }}/Translation/tests
          bash test_${{ matrix.job_name }}_inference.sh
--- a/.github/workflows/VisualQnA.yml
+++ b/.github/workflows/VisualQnA.yml
@@ -37,7 +37,7 @@ jobs:

      - name: Run Test
        env:
-          HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        run: |
          cd ${{ github.workspace }}/VisualQnA/tests
          bash test_${{ matrix.job_name }}_inference.sh
--- a/AudioQnA/README.md
+++ b/AudioQnA/README.md
@@ -83,12 +83,12 @@ bash ./serving/tgi_gaudi/build_docker.sh
 bash ./serving/tgi_gaudi/launch_tgi_service.sh
 ```

-For gated models such as `LLAMA-2`, you will have to pass -e HUGGING_FACE_HUB_TOKEN=\<token\> to the docker run command above with a valid Hugging Face Hub read token.
+For gated models such as `LLAMA-2`, you will have to pass -e HF_TOKEN=\<token\> to the docker run command above with a valid Hugging Face Hub read token.

-Please follow this link [huggingface token](https://huggingface.co/docs/hub/security-tokens) to get the access token and export `HUGGINGFACEHUB_API_TOKEN` environment with the token.
+Please follow this link [huggingface token](https://huggingface.co/docs/hub/security-tokens) to get the access token and export `HF_TOKEN` environment with the token.

 ```bash
-export HUGGINGFACEHUB_API_TOKEN=<token>
+export HF_TOKEN=<token>
 ```

 ### Launch a local server instance on 8 Gaudi cards:
@@ -147,7 +147,7 @@ Note: If you want to integrate the TEI service into the LangChain application, y

 ## Launch Redis and LangChain Backend Service

-Update the `HUGGINGFACEHUB_API_TOKEN` environment variable with your huggingface token in the `docker-compose.yml`
+Update the `HF_TOKEN` environment variable with your huggingface token in the `docker-compose.yml`

 ```bash
 cd langchain/docker
@@ -180,7 +180,7 @@ We offer content moderation support utilizing Meta's [Llama Guard](https://huggi
 ```bash
 volume=$PWD/data
 model_id="meta-llama/LlamaGuard-7b"
-docker run -p 8088:80 -v $volume:/data --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HUGGING_FACE_HUB_TOKEN=<your HuggingFace token> -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy tgi_gaudi --model-id $model_id
+docker run -p 8088:80 -v $volume:/data --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HF_TOKEN=<your HuggingFace token> -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy tgi_gaudi --model-id $model_id
 export SAFETY_GUARD_ENDPOINT="http://xxx.xxx.xxx.xxx:8088"
 ```

--- a/AudioQnA/langchain/docker/docker-compose.yml
+++ b/AudioQnA/langchain/docker/docker-compose.yml
@@ -28,7 +28,7 @@ services:
    container_name: qna-rag-redis-server
    environment:
      - https_proxy=${https_proxy}
-      - HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+      - HF_TOKEN=${HF_TOKEN}
      - "REDIS_PORT=6379"
      - "EMBED_MODEL=BAAI/bge-base-en-v1.5"
      - "REDIS_SCHEMA=schema_dim_768.yml"
--- a/AudioQnA/serving/tgi_gaudi/README.md
+++ b/AudioQnA/serving/tgi_gaudi/README.md
@@ -79,11 +79,11 @@ For the System Management Interface Tool please check [hl-smi](https://docs.haba
 ## Docker command for 70B model

 ```bash
-docker run -p 8080:80 -v $volume:/data --runtime=habana -e HUGGING_FACE_HUB_TOKEN=$HUGGINGFACEHUB_API_TOKEN -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e HABANA_VISIBLE_DEVICES="6,7,4,5" -e HABANA_VISIBLE_MODULES="0,1,2,3" -e BATCH_BUCKET_SIZE=22 -e PREFILL_BATCH_BUCKET_SIZE=1 -e MAX_BATCH_PREFILL_TOKENS=5102 -e MAX_BATCH_TOTAL_TOKENS=32256 -e MAX_INPUT_LENGTH=1024 -e PAD_SEQUENCE_TO_MULTIPLE_OF=1024 -e MAX_WAITING_TOKENS=5 -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host tgi_gaudi --model-id $model --sharded true --num-shard 4
+docker run -p 8080:80 -v $volume:/data --runtime=habana -e HF_TOKEN=$HF_TOKEN -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e HABANA_VISIBLE_DEVICES="6,7,4,5" -e HABANA_VISIBLE_MODULES="0,1,2,3" -e BATCH_BUCKET_SIZE=22 -e PREFILL_BATCH_BUCKET_SIZE=1 -e MAX_BATCH_PREFILL_TOKENS=5102 -e MAX_BATCH_TOTAL_TOKENS=32256 -e MAX_INPUT_LENGTH=1024 -e PAD_SEQUENCE_TO_MULTIPLE_OF=1024 -e MAX_WAITING_TOKENS=5 -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host tgi_gaudi --model-id $model --sharded true --num-shard 4
 ```

 ## Docker command for 13B model

 ```bash
-docker run -p 8080:80 -v $volume:/data --runtime=habana -e HUGGING_FACE_HUB_TOKEN=$HUGGINGFACEHUB_API_TOKEN -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e PAD_SEQUENCE_TO_MULTIPLE_OF=128  -e HABANA_VISIBLE_DEVICES="4" -e BATCH_BUCKET_SIZE=16 -e PREFILL_BATCH_BUCKET_SIZE=1 -e MAX_BATCH_PREFILL_TOKENS=4096 -e MAX_BATCH_TOTAL_TOKENS=18432 -e PAD_SEQUENCE_TO_MULTIPLE_OF=1024 -e MAX_INPUT_LENGTH=1024 -e MAX_TOTAL_TOKENS=1152  -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host tgi_gaudi --model-id $model
+docker run -p 8080:80 -v $volume:/data --runtime=habana -e HF_TOKEN=$HF_TOKEN -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e PAD_SEQUENCE_TO_MULTIPLE_OF=128  -e HABANA_VISIBLE_DEVICES="4" -e BATCH_BUCKET_SIZE=16 -e PREFILL_BATCH_BUCKET_SIZE=1 -e MAX_BATCH_PREFILL_TOKENS=4096 -e MAX_BATCH_TOTAL_TOKENS=18432 -e PAD_SEQUENCE_TO_MULTIPLE_OF=1024 -e MAX_INPUT_LENGTH=1024 -e MAX_TOTAL_TOKENS=1152  -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host tgi_gaudi --model-id $model
 ```
--- a/AudioQnA/tests/test_langchain_inference.sh
+++ b/AudioQnA/tests/test_langchain_inference.sh
@@ -52,7 +52,7 @@ function launch_tgi_gaudi_service() {

 function launch_redis_and_langchain_service() {
    cd $WORKPATH
-    export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+    export HF_TOKEN=${HF_TOKEN}
    local port=8890
    sed -i "s/port=8000/port=$port/g" langchain/docker/qna-app/app/server.py
    docker compose -f langchain/docker/docker-compose.yml up -d --build
--- a/ChatQnA/chatqna.yaml
+++ b/ChatQnA/chatqna.yaml
@@ -66,7 +66,7 @@ opea_micro_services:
      - SYS_NICE
    ipc: host
    environment:
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
      HABANA_VISIBLE_DEVICES: all
      OMPI_MCA_btl_vader_single_copy_mechanism: none
    model-id: ${LLM_MODEL_ID}
--- a/ChatQnA/deprecated/README.md
+++ b/ChatQnA/deprecated/README.md
@@ -51,12 +51,12 @@ bash ./serving/tgi_gaudi/build_docker.sh
 bash ./serving/tgi_gaudi/launch_tgi_service.sh
 ```

-For gated models such as `LLAMA-2`, you will have to pass -e HUGGING_FACE_HUB_TOKEN=\<token\> to the docker run command above with a valid Hugging Face Hub read token.
+For gated models such as `LLAMA-2`, you will have to pass -e HF_TOKEN=\<token\> to the docker run command above with a valid Hugging Face Hub read token.

-Please follow this link [huggingface token](https://huggingface.co/docs/hub/security-tokens) to get the access token and export `HUGGINGFACEHUB_API_TOKEN` environment with the token.
+Please follow this link [huggingface token](https://huggingface.co/docs/hub/security-tokens) to get the access token and export `HF_TOKEN` environment with the token.

 ```bash
-export HUGGINGFACEHUB_API_TOKEN=<token>
+export HF_TOKEN=<token>
 ```

 ### Launch a local server instance on 8 Gaudi cards:
@@ -115,7 +115,7 @@ Note: If you want to integrate the TEI service into the LangChain application, y

 ## Launch Vector Database and LangChain Backend Service

-Update the `HUGGINGFACEHUB_API_TOKEN` environment variable with your huggingface token in the `docker-compose.yml`
+Update the `HF_TOKEN` environment variable with your huggingface token in the `docker-compose.yml`

 By default, Redis is used as the vector store. To use Qdrant, use the `docker-compose-qdrant.yml` file instead.

@@ -153,7 +153,7 @@ We offer content moderation support utilizing Meta's [Llama Guard](https://huggi
 ```bash
 volume=$PWD/data
 model_id="meta-llama/LlamaGuard-7b"
-docker run -p 8088:80 -v $volume:/data --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HUGGING_FACE_HUB_TOKEN=<your HuggingFace token> -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy tgi_gaudi --model-id $model_id
+docker run -p 8088:80 -v $volume:/data --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HF_TOKEN=<your HuggingFace token> -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy tgi_gaudi --model-id $model_id
 export SAFETY_GUARD_ENDPOINT="http://xxx.xxx.xxx.xxx:8088"
 ```

--- a/ChatQnA/deprecated/deployment/nginx/.env
+++ b/ChatQnA/deprecated/deployment/nginx/.env
@@ -1,4 +1,4 @@
-HUGGING_FACE_HUB_TOKEN=<your-hf-token>
+HF_TOKEN=<your-hf-token>
 volume=./data
 model=meta-llama/Llama-2-13b-chat-hf
 MAX_TOTAL_TOKENS=2000
--- a/ChatQnA/deprecated/langchain/docker/docker-compose-qdrant.yml
+++ b/ChatQnA/deprecated/langchain/docker/docker-compose-qdrant.yml
@@ -30,7 +30,7 @@ services:
    container_name: qna-rag-qdrant-server
    environment:
      - https_proxy=${https_proxy}
-      - HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+      - HF_TOKEN=${HF_TOKEN}
      - "EMBED_MODEL=BAAI/bge-base-en-v1.5"
      - "VECTOR_DATABASE=QDRANT"
      - "TGI_LLM_ENDPOINT=http://localhost:8080"
--- a/ChatQnA/deprecated/langchain/docker/docker-compose.yml
+++ b/ChatQnA/deprecated/langchain/docker/docker-compose.yml
@@ -38,7 +38,7 @@ services:
      - socks_proxy=${socks_proxy}
      - FTP_PROXY=${FTP_PROXY}
      - ftp_proxy=${ftp_proxy}
-      - HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+      - HF_TOKEN=${HF_TOKEN}
      - CONFLUENCE_ACCESS_TOKEN=${CONFLUENCE_ACCESS_TOKEN}
      - "REDIS_PORT=6379"
      - "EMBED_MODEL=BAAI/bge-base-en-v1.5"
--- a/ChatQnA/deprecated/langchain/test/end_to_end_rag_test.py
+++ b/ChatQnA/deprecated/langchain/test/end_to_end_rag_test.py
@@ -242,7 +242,7 @@ if __name__ == "__main__":
    tokenizer = AutoTokenizer.from_pretrained(args.model_name)
    os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
    os.environ["LANGCHAIN_API_KEY"] = args.langchain_token
-    os.environ["HUGGINGFACEHUB_API_TOKEN"] = args.huggingface_token
+    os.environ["HF_TOKEN"] = args.huggingface_token

    chain = buildchain(args)
    run_test(args, chain)
--- a/ChatQnA/deprecated/serving/tgi_gaudi/README.md
+++ b/ChatQnA/deprecated/serving/tgi_gaudi/README.md
@@ -79,11 +79,11 @@ For the System Management Interface Tool please check [hl-smi](https://docs.haba
 ## Docker command for 70B model

 ```bash
-docker run -p 8080:80 -v $volume:/data --runtime=habana -e HUGGING_FACE_HUB_TOKEN=$HUGGINGFACEHUB_API_TOKEN -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e HABANA_VISIBLE_DEVICES="6,7,4,5" -e HABANA_VISIBLE_MODULES="0,1,2,3" -e BATCH_BUCKET_SIZE=22 -e PREFILL_BATCH_BUCKET_SIZE=1 -e MAX_BATCH_PREFILL_TOKENS=5102 -e MAX_BATCH_TOTAL_TOKENS=32256 -e MAX_INPUT_LENGTH=1024 -e PAD_SEQUENCE_TO_MULTIPLE_OF=1024 -e MAX_WAITING_TOKENS=5 -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host tgi_gaudi --model-id $model --sharded true --num-shard 4
+docker run -p 8080:80 -v $volume:/data --runtime=habana -e HF_TOKEN=$HF_TOKEN -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e HABANA_VISIBLE_DEVICES="6,7,4,5" -e HABANA_VISIBLE_MODULES="0,1,2,3" -e BATCH_BUCKET_SIZE=22 -e PREFILL_BATCH_BUCKET_SIZE=1 -e MAX_BATCH_PREFILL_TOKENS=5102 -e MAX_BATCH_TOTAL_TOKENS=32256 -e MAX_INPUT_LENGTH=1024 -e PAD_SEQUENCE_TO_MULTIPLE_OF=1024 -e MAX_WAITING_TOKENS=5 -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host tgi_gaudi --model-id $model --sharded true --num-shard 4
 ```

 ## Docker command for 13B model

 ```bash
-docker run -p 8080:80 -v $volume:/data --runtime=habana -e HUGGING_FACE_HUB_TOKEN=$HUGGINGFACEHUB_API_TOKEN -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e PAD_SEQUENCE_TO_MULTIPLE_OF=128  -e HABANA_VISIBLE_DEVICES="4" -e BATCH_BUCKET_SIZE=16 -e PREFILL_BATCH_BUCKET_SIZE=1 -e MAX_BATCH_PREFILL_TOKENS=4096 -e MAX_BATCH_TOTAL_TOKENS=18432 -e PAD_SEQUENCE_TO_MULTIPLE_OF=1024 -e MAX_INPUT_LENGTH=1024 -e MAX_TOTAL_TOKENS=1152  -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host tgi_gaudi --model-id $model
+docker run -p 8080:80 -v $volume:/data --runtime=habana -e HF_TOKEN=$HF_TOKEN -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e PAD_SEQUENCE_TO_MULTIPLE_OF=128  -e HABANA_VISIBLE_DEVICES="4" -e BATCH_BUCKET_SIZE=16 -e PREFILL_BATCH_BUCKET_SIZE=1 -e MAX_BATCH_PREFILL_TOKENS=4096 -e MAX_BATCH_TOTAL_TOKENS=18432 -e PAD_SEQUENCE_TO_MULTIPLE_OF=1024 -e MAX_INPUT_LENGTH=1024 -e MAX_TOTAL_TOKENS=1152  -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host tgi_gaudi --model-id $model
 ```
--- a/ChatQnA/deprecated/tests/test_langchain_inference.sh
+++ b/ChatQnA/deprecated/tests/test_langchain_inference.sh
@@ -52,7 +52,7 @@ function launch_tgi_gaudi_service() {

 function launch_redis_and_langchain_service() {
    cd $WORKPATH
-    export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+    export HF_TOKEN=${HF_TOKEN}
    local port=8890
    sed -i "s/port=8000/port=$port/g" langchain/docker/qna-app/app/server.py
    docker compose -f langchain/docker/docker-compose.yml up -d --build
--- a/ChatQnA/docker/gaudi/README.md
+++ b/ChatQnA/docker/gaudi/README.md
@@ -100,7 +100,7 @@ export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
 export TGI_LLM_ENDPOINT="http://${host_ip}:8008"
 export REDIS_URL="redis://${host_ip}:6379"
 export INDEX_NAME="rag-redis"
-export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
+export HF_TOKEN=${your_hf_api_token}
 export MEGA_SERVICE_HOST_IP=${host_ip}
 export EMBEDDING_SERVICE_HOST_IP=${host_ip}
 export RETRIEVER_SERVICE_HOST_IP=${host_ip}
--- a/ChatQnA/docker/gaudi/docker_compose.yaml
+++ b/ChatQnA/docker/gaudi/docker_compose.yaml
@@ -107,7 +107,7 @@ services:
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
      TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
      LANGCHAIN_PROJECT: "opea-reranking-service"
@@ -141,7 +141,7 @@ services:
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
      TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
      LANGCHAIN_PROJECT: "opea-llm-service"
--- a/ChatQnA/docker/xeon/README.md
+++ b/ChatQnA/docker/xeon/README.md
@@ -158,7 +158,7 @@ export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
 export TGI_LLM_ENDPOINT="http://${host_ip}:9009"
 export REDIS_URL="redis://${host_ip}:6379"
 export INDEX_NAME="rag-redis"
-export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
+export HF_TOKEN=${your_hf_api_token}
 export MEGA_SERVICE_HOST_IP=${host_ip}
 export EMBEDDING_SERVICE_HOST_IP=${host_ip}
 export RETRIEVER_SERVICE_HOST_IP=${host_ip}
--- a/ChatQnA/docker/xeon/docker_compose.yaml
+++ b/ChatQnA/docker/xeon/docker_compose.yaml
@@ -103,7 +103,7 @@ services:
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
      TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
      LANGCHAIN_PROJECT: "opea-reranking-service"
@@ -132,7 +132,7 @@ services:
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
      TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
      LANGCHAIN_PROJECT: "opea-llm-service"
--- a/ChatQnA/kubernetes/manifests/README.md
+++ b/ChatQnA/kubernetes/manifests/README.md
@@ -26,7 +26,7 @@ For Gaudi:

 > [NOTE]

- Be sure to modify HUGGINGFACEHUB_API_TOKEN and other important values in qna_configmap_guadi.yaml and qna_configmap_xeon.yaml
+- Be sure to modify HF_TOKEN and other important values in qna_configmap_guadi.yaml and qna_configmap_xeon.yaml
 - Be sure the node has path /mnt/models to store all the models

 ### Deploy
--- a/ChatQnA/kubernetes/manifests/qna_configmap_gaudi.yaml
+++ b/ChatQnA/kubernetes/manifests/qna_configmap_gaudi.yaml
@@ -14,7 +14,7 @@ data:
  TGI_LLM_ENDPOINT: "http://tgi-gaudi-svc.default.svc.cluster.local:9009"
  REDIS_URL: "redis://redis-vector-db.default.svc.cluster.local:6379"
  INDEX_NAME: "rag-redis"
-  HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+  HF_TOKEN: ${HF_TOKEN}
  EMBEDDING_SERVICE_HOST_IP: embedding-svc
  RETRIEVER_SERVICE_HOST_IP: retriever-svc
  RERANK_SERVICE_HOST_IP: reranking-svc
--- a/ChatQnA/kubernetes/manifests/qna_configmap_xeon.yaml
+++ b/ChatQnA/kubernetes/manifests/qna_configmap_xeon.yaml
@@ -14,7 +14,7 @@ data:
  TGI_LLM_ENDPOINT: "http://tgi-svc.default.svc.cluster.local:9009"
  REDIS_URL: "redis://redis-vector-db.default.svc.cluster.local:6379"
  INDEX_NAME: "rag-redis"
-  HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+  HF_TOKEN: ${HF_TOKEN}
  EMBEDDING_SERVICE_HOST_IP: embedding-svc
  RETRIEVER_SERVICE_HOST_IP: retriever-svc
  RERANK_SERVICE_HOST_IP: reranking-svc
--- a/ChatQnA/tests/test_chatqna_on_gaudi.sh
+++ b/ChatQnA/tests/test_chatqna_on_gaudi.sh
@@ -47,7 +47,7 @@ function start_services() {
    export TGI_LLM_ENDPOINT="http://${ip_address}:8008"
    export REDIS_URL="redis://${ip_address}:6379"
    export INDEX_NAME="rag-redis"
-    export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+    export HF_TOKEN=${HF_TOKEN}
    export MEGA_SERVICE_HOST_IP=${ip_address}
    export EMBEDDING_SERVICE_HOST_IP=${ip_address}
    export RETRIEVER_SERVICE_HOST_IP=${ip_address}
--- a/ChatQnA/tests/test_chatqna_on_xeon.sh
+++ b/ChatQnA/tests/test_chatqna_on_xeon.sh
@@ -39,7 +39,7 @@ function start_services() {
    export TGI_LLM_ENDPOINT="http://${ip_address}:9009"
    export REDIS_URL="redis://${ip_address}:6379"
    export INDEX_NAME="rag-redis"
-    export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+    export HF_TOKEN=${HF_TOKEN}
    export MEGA_SERVICE_HOST_IP=${ip_address}
    export EMBEDDING_SERVICE_HOST_IP=${ip_address}
    export RETRIEVER_SERVICE_HOST_IP=${ip_address}
--- a/CodeGen/codegen.yaml
+++ b/CodeGen/codegen.yaml
@@ -24,7 +24,7 @@ opea_micro_services:
      - SYS_NICE
    ipc: host
    environment:
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
      HABANA_VISIBLE_DEVICES: all
      OMPI_MCA_btl_vader_single_copy_mechanism: none
    model-id: ${LLM_MODEL_ID}
@@ -35,7 +35,7 @@ opea_micro_services:
    endpoint: /v1/chat/completions
    environment:
      TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
  ui:
    host: ${UI_SERVICE_HOST_IP}
    ports:
--- a/CodeGen/deprecated/README.md
+++ b/CodeGen/deprecated/README.md
@@ -86,10 +86,10 @@ docker run -it -e http_proxy=${http_proxy} -e https_proxy=${https_proxy} --net=h

 Make sure TGI-Gaudi service is running and also make sure data is populated into Redis. Launch the backend service:

-Please follow this link [huggingface token](https://huggingface.co/docs/hub/security-tokens) to get the access token and export `HUGGINGFACEHUB_API_TOKEN` environment with the token.
+Please follow this link [huggingface token](https://huggingface.co/docs/hub/security-tokens) to get the access token and export `HF_TOKEN` environment with the token.

 ```bash
-export HUGGINGFACEHUB_API_TOKEN=<token>
+export HF_TOKEN=<token>
 nohup python server.py &
 ```

--- a/CodeGen/deprecated/tests/test_codegen_inference.sh
+++ b/CodeGen/deprecated/tests/test_codegen_inference.sh
@@ -63,7 +63,7 @@ function launch_server() {

    # Start the Backend Service
    docker exec $COPILOT_CONTAINER_NAME \
-        bash -c "export HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN;nohup python server.py &"
+        bash -c "export HF_TOKEN=$HF_TOKEN;nohup python server.py &"
    sleep 1m
 }

--- a/CodeGen/docker/gaudi/README.md
+++ b/CodeGen/docker/gaudi/README.md
@@ -55,7 +55,7 @@ export http_proxy=${your_http_proxy}
 export https_proxy=${your_http_proxy}
 export LLM_MODEL_ID="meta-llama/CodeLlama-7b-hf"
 export TGI_LLM_ENDPOINT="http://${host_ip}:8028"
-export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
+export HF_TOKEN=${your_hf_api_token}
 export MEGA_SERVICE_HOST_IP=${host_ip}
 export LLM_SERVICE_HOST_IP=${host_ip}
 export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:6666/v1/codegen"
--- a/CodeGen/docker/gaudi/docker_compose.yaml
+++ b/CodeGen/docker/gaudi/docker_compose.yaml
@@ -44,7 +44,7 @@ services:
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
      TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
      LANGCHAIN_PROJECT: "opea-llm-service"
--- a/CodeGen/docker/xeon/README.md
+++ b/CodeGen/docker/xeon/README.md
@@ -63,7 +63,7 @@ export http_proxy=${your_http_proxy}
 export https_proxy=${your_http_proxy}
 export LLM_MODEL_ID="meta-llama/CodeLlama-7b-hf"
 export TGI_LLM_ENDPOINT="http://${host_ip}:8028"
-export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
+export HF_TOKEN=${your_hf_api_token}
 export MEGA_SERVICE_HOST_IP=${host_ip}
 export LLM_SERVICE_HOST_IP=${host_ip}
 export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:6666/v1/codegen"
--- a/CodeGen/docker/xeon/docker_compose.yaml
+++ b/CodeGen/docker/xeon/docker_compose.yaml
@@ -26,7 +26,7 @@ services:
    environment:
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
    command: --model-id ${LLM_MODEL_ID}
  llm:
    image: opea/llm-tgi:latest
@@ -40,7 +40,7 @@ services:
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
      TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
      LANGCHAIN_PROJECT: "opea-llm-service"
--- a/CodeGen/kubernetes/manifests/README.md
+++ b/CodeGen/kubernetes/manifests/README.md
@@ -2,15 +2,15 @@

 > [NOTE]
 > The following values must be set before you can deploy:
-> HUGGINGFACEHUB_API_TOKEN
+> HF_TOKEN
 > You can also customize the "MODEL_ID" and "model-volume"

 ## Deploy On Xeon

 ```
 cd GenAIExamples/CodeGen/kubernetes/manifests/xeon
-export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
-sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" codegen.yaml
+export HF_TOKEN="YourOwnToken"
+sed -i "s/insert-your-huggingface-token-here/${HF_TOKEN}/g" codegen.yaml
 kubectl apply -f codegen.yaml
 ```

@@ -18,8 +18,8 @@ kubectl apply -f codegen.yaml

 ```
 cd GenAIExamples/CodeGen/kubernetes/manifests/gaudi
-export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
-sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" codegen.yaml
+export HF_TOKEN="YourOwnToken"
+sed -i "s/insert-your-huggingface-token-here/${HF_TOKEN}/g" codegen.yaml
 kubectl apply -f codegen.yaml
 ```

--- a/CodeGen/kubernetes/manifests/gaudi/codegen.yaml
+++ b/CodeGen/kubernetes/manifests/gaudi/codegen.yaml
@@ -143,7 +143,7 @@ spec:
          env:
            - name: TGI_LLM_ENDPOINT
              value: "http://codegen-tgi:80"
-            - name: HUGGINGFACEHUB_API_TOKEN
+            - name: HF_TOKEN
              value: "insert-your-huggingface-token-here"
            - name: http_proxy
              value:
--- a/CodeGen/kubernetes/manifests/xeon/codegen.yaml
+++ b/CodeGen/kubernetes/manifests/xeon/codegen.yaml
@@ -141,7 +141,7 @@ spec:
          env:
            - name: TGI_LLM_ENDPOINT
              value: "http://codegen-tgi:80"
-            - name: HUGGINGFACEHUB_API_TOKEN
+            - name: HF_TOKEN
              value: "insert-your-huggingface-token-here"
            - name: http_proxy
              value:
--- a/CodeGen/tests/test_codegen_on_gaudi.sh
+++ b/CodeGen/tests/test_codegen_on_gaudi.sh
@@ -41,7 +41,7 @@ function start_services() {

    export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
    export TGI_LLM_ENDPOINT="http://${ip_address}:8028"
-    export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+    export HF_TOKEN=${HF_TOKEN}
    export MEGA_SERVICE_HOST_IP=${ip_address}
    export LLM_SERVICE_HOST_IP=${ip_address}
    export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:6666/v1/codegen"
--- a/CodeGen/tests/test_codegen_on_xeon.sh
+++ b/CodeGen/tests/test_codegen_on_xeon.sh
@@ -29,7 +29,7 @@ function start_services() {

    export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
    export TGI_LLM_ENDPOINT="http://${ip_address}:8028"
-    export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+    export HF_TOKEN=${HF_TOKEN}
    export MEGA_SERVICE_HOST_IP=${ip_address}
    export LLM_SERVICE_HOST_IP=${ip_address}
    export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:6666/v1/codegen"
--- a/CodeTrans/codetrans.yaml
+++ b/CodeTrans/codetrans.yaml
@@ -24,7 +24,7 @@ opea_micro_services:
      - SYS_NICE
    ipc: host
    environment:
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
      HABANA_VISIBLE_DEVICES: all
      OMPI_MCA_btl_vader_single_copy_mechanism: none
    model-id: ${LLM_MODEL_ID}
@@ -35,7 +35,7 @@ opea_micro_services:
    endpoint: /v1/chat/completions
    environment:
      TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
  ui:
    host: ${UI_SERVICE_HOST_IP}
    ports:
--- a/CodeTrans/deprecated/README.md
+++ b/CodeTrans/deprecated/README.md
@@ -23,13 +23,13 @@ bash launch_tgi_service.sh
 ```sh
 cd langchain/docker
 bash build_docker.sh
-docker run -it --name code_trans_server --net=host --ipc=host -e TGI_ENDPOINT=${TGI ENDPOINT} -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACE_API_TOKEN} -e SERVER_PORT=8000 -e http_proxy=${http_proxy} -e https_proxy=${https_proxy} intel/gen-ai-examples:code-translation bash
+docker run -it --name code_trans_server --net=host --ipc=host -e TGI_ENDPOINT=${TGI ENDPOINT} -e HF_TOKEN=${HUGGINGFACE_API_TOKEN} -e SERVER_PORT=8000 -e http_proxy=${http_proxy} -e https_proxy=${https_proxy} intel/gen-ai-examples:code-translation bash
 ```

 Here is the explanation of some of the above parameters:

 - `TGI_ENDPOINT`: The endpoint of your TGI service, usually equal to `<ip of your machine>:<port of your TGI service>`.
- `HUGGINGFACEHUB_API_TOKEN`: Your HuggingFace hub API token, usually generated [here](https://huggingface.co/settings/tokens).
+- `HF_TOKEN`: Your HuggingFace hub API token, usually generated [here](https://huggingface.co/settings/tokens).
 - `SERVER_PORT`: The port of the CodeTranslation service on the host.

 3. Quick test
--- a/CodeTrans/docker/gaudi/README.md
+++ b/CodeTrans/docker/gaudi/README.md
@@ -51,7 +51,7 @@ export http_proxy=${your_http_proxy}
 export https_proxy=${your_http_proxy}
 export LLM_MODEL_ID="HuggingFaceH4/mistral-7b-grok"
 export TGI_LLM_ENDPOINT="http://${host_ip}:8008"
-export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
+export HF_TOKEN=${your_hf_api_token}
 export MEGA_SERVICE_HOST_IP=${host_ip}
 export LLM_SERVICE_HOST_IP=${host_ip}
 export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:7777/v1/codetrans"
--- a/CodeTrans/docker/gaudi/docker_compose.yaml
+++ b/CodeTrans/docker/gaudi/docker_compose.yaml
@@ -42,7 +42,7 @@ services:
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
      TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
      LANGCHAIN_PROJECT: "opea-llm-service"
--- a/CodeTrans/docker/xeon/README.md
+++ b/CodeTrans/docker/xeon/README.md
@@ -59,7 +59,7 @@ export http_proxy=${your_http_proxy}
 export https_proxy=${your_http_proxy}
 export LLM_MODEL_ID="HuggingFaceH4/mistral-7b-grok"
 export TGI_LLM_ENDPOINT="http://${host_ip}:8008"
-export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
+export HF_TOKEN=${your_hf_api_token}
 export MEGA_SERVICE_HOST_IP=${host_ip}
 export LLM_SERVICE_HOST_IP=${host_ip}
 export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:7777/v1/codetrans"
--- a/CodeTrans/docker/xeon/docker_compose.yaml
+++ b/CodeTrans/docker/xeon/docker_compose.yaml
@@ -37,7 +37,7 @@ services:
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
      TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
      LANGCHAIN_PROJECT: "opea-llm-service"
--- a/CodeTrans/tests/test_codetrans_on_gaudi.sh
+++ b/CodeTrans/tests/test_codetrans_on_gaudi.sh
@@ -33,7 +33,7 @@ function start_services() {
    export https_proxy=${http_proxy}
    export LLM_MODEL_ID="HuggingFaceH4/mistral-7b-grok"
    export TGI_LLM_ENDPOINT="http://${ip_address}:8008"
-    export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+    export HF_TOKEN=${HF_TOKEN}
    export MEGA_SERVICE_HOST_IP=${ip_address}
    export LLM_SERVICE_HOST_IP=${ip_address}
    export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:7777/v1/codetrans"
--- a/CodeTrans/tests/test_codetrans_on_xeon.sh
+++ b/CodeTrans/tests/test_codetrans_on_xeon.sh
@@ -30,7 +30,7 @@ function start_services() {
    export https_proxy=${http_proxy}
    export LLM_MODEL_ID="HuggingFaceH4/mistral-7b-grok"
    export TGI_LLM_ENDPOINT="http://${ip_address}:8008"
-    export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+    export HF_TOKEN=${HF_TOKEN}
    export MEGA_SERVICE_HOST_IP=${ip_address}
    export LLM_SERVICE_HOST_IP=${ip_address}
    export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:7777/v1/codetrans"
--- a/DocSum/deprecated/README.md
+++ b/DocSum/deprecated/README.md
@@ -29,12 +29,12 @@ bash ./serving/tgi_gaudi/build_docker.sh
 bash ./serving/tgi_gaudi/launch_tgi_service.sh
 ```

-For gated models such as `LLAMA-2`, you will have to pass -e HUGGING_FACE_HUB_TOKEN=\<token\> to the docker run command above with a valid Hugging Face Hub read token.
+For gated models such as `LLAMA-2`, you will have to pass -e HF_TOKEN=\<token\> to the docker run command above with a valid Hugging Face Hub read token.

-Please follow this link [huggingface token](https://huggingface.co/docs/hub/security-tokens) to get the access token and export `HUGGINGFACEHUB_API_TOKEN` environment with the token.
+Please follow this link [huggingface token](https://huggingface.co/docs/hub/security-tokens) to get the access token and export `HF_TOKEN` environment with the token.

 ```bash
-export HUGGINGFACEHUB_API_TOKEN=<token>
+export HF_TOKEN=<token>
 ```

 ### Launch a local server instance on 8 Gaudi cards:
@@ -80,7 +80,7 @@ docker run -it --net=host --ipc=host -e http_proxy=${http_proxy} -e https_proxy=
 Make sure TGI-Gaudi service is running. Launch the backend service:

 ```bash
-export HUGGINGFACEHUB_API_TOKEN=<token>
+export HF_TOKEN=<token>
 nohup python app/server.py &
 ```

--- a/DocSum/deprecated/tests/test_langchain_inference.sh
+++ b/DocSum/deprecated/tests/test_langchain_inference.sh
@@ -63,7 +63,7 @@ function launch_server() {

    # Start the Backend Service
    docker exec $DOCUMENT_SUMMARY_CONTAINER_NAME \
-        bash -c "export HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN;nohup python app/server.py &"
+        bash -c "export HF_TOKEN=$HF_TOKEN;nohup python app/server.py &"
    sleep 1m
 }

--- a/DocSum/docker/gaudi/README.md
+++ b/DocSum/docker/gaudi/README.md
@@ -62,7 +62,7 @@ export http_proxy=${your_http_proxy}
 export https_proxy=${your_http_proxy}
 export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
 export TGI_LLM_ENDPOINT="http://${your_ip}:8008"
-export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
+export HF_TOKEN=${your_hf_api_token}
 export MEGA_SERVICE_HOST_IP=${host_ip}
 export LLM_SERVICE_HOST_IP=${host_ip}
 export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum"
--- a/DocSum/docker/gaudi/docker_compose.yaml
+++ b/DocSum/docker/gaudi/docker_compose.yaml
@@ -24,7 +24,7 @@ services:
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
      TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
    volumes:
      - "./data:/data"
    runtime: habana
@@ -44,7 +44,7 @@ services:
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
      TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
      LANGCHAIN_PROJECT: "opea-llm-service"
--- a/DocSum/docker/xeon/README.md
+++ b/DocSum/docker/xeon/README.md
@@ -63,7 +63,7 @@ export http_proxy=${your_http_proxy}
 export https_proxy=${your_http_proxy}
 export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
 export TGI_LLM_ENDPOINT="http://${your_ip}:8008"
-export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
+export HF_TOKEN=${your_hf_api_token}
 export MEGA_SERVICE_HOST_IP=${host_ip}
 export LLM_SERVICE_HOST_IP=${host_ip}
 export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum"
--- a/DocSum/docker/xeon/docker_compose.yaml
+++ b/DocSum/docker/xeon/docker_compose.yaml
@@ -24,7 +24,7 @@ services:
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
      TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
    volumes:
      - "./data:/data"
    shm_size: 1g
@@ -41,7 +41,7 @@ services:
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
      TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
      LANGCHAIN_PROJECT: "opea-llm-service"
--- a/DocSum/docsum.yaml
+++ b/DocSum/docsum.yaml
@@ -24,7 +24,7 @@ opea_micro_services:
      - SYS_NICE
    ipc: host
    environment:
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
      HABANA_VISIBLE_DEVICES: all
      OMPI_MCA_btl_vader_single_copy_mechanism: none
    model-id: ${LLM_MODEL_ID}
@@ -35,7 +35,7 @@ opea_micro_services:
    endpoint: /v1/chat/completions
    environment:
      TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
  ui:
    host: ${UI_SERVICE_HOST_IP}
    ports:
--- a/DocSum/tests/test_docsum_on_gaudi.sh
+++ b/DocSum/tests/test_docsum_on_gaudi.sh
@@ -31,7 +31,7 @@ function start_services() {

    export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
    export TGI_LLM_ENDPOINT="http://${ip_address}:8008"
-    export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+    export HF_TOKEN=${HF_TOKEN}
    export MEGA_SERVICE_HOST_IP=${ip_address}
    export LLM_SERVICE_HOST_IP=${ip_address}
    export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:8888/v1/docsum"
--- a/DocSum/tests/test_docsum_on_xeon.sh
+++ b/DocSum/tests/test_docsum_on_xeon.sh
@@ -29,7 +29,7 @@ function start_services() {

    export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
    export TGI_LLM_ENDPOINT="http://${ip_address}:8008"
-    export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+    export HF_TOKEN=${HF_TOKEN}
    export MEGA_SERVICE_HOST_IP=${ip_address}
    export LLM_SERVICE_HOST_IP=${ip_address}
    export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:8888/v1/docsum"
--- a/SearchQnA/README.md
+++ b/SearchQnA/README.md
@@ -37,7 +37,7 @@ bash launch_tgi_service.sh
 ```sh
 cd langchain/docker
 docker build . --build-arg http_proxy=${http_proxy} --build-arg https_proxy=${http_proxy}  -t intel/gen-ai-examples:searchqna-gaudi --no-cache
-docker run -e TGI_ENDPOINT=<TGI ENDPOINT> -e GOOGLE_CSE_ID=<GOOGLE CSE ID> -e GOOGLE_API_KEY=<GOOGLE API KEY> -e HUGGINGFACEHUB_API_TOKEN=<HUGGINGFACE API TOKEN> -p 8085:8000 -e http_proxy=$http_proxy -e https_proxy=$https_proxy --runtime=habana -e HABANA_VISIBE_DEVILCES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host intel/gen-ai-examples:searchqna-gaudi
+docker run -e TGI_ENDPOINT=<TGI ENDPOINT> -e GOOGLE_CSE_ID=<GOOGLE CSE ID> -e GOOGLE_API_KEY=<GOOGLE API KEY> -e HF_TOKEN=<HUGGINGFACE API TOKEN> -p 8085:8000 -e http_proxy=$http_proxy -e https_proxy=$https_proxy --runtime=habana -e HABANA_VISIBE_DEVILCES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host intel/gen-ai-examples:searchqna-gaudi
 ```

 Here is the explanation of some of the above parameters:
@@ -45,7 +45,7 @@ Here is the explanation of some of the above parameters:
 - `TGI_ENDPOINT`: the endpoint of your TGI service, usually equal to `<ip of your machine>:<port of your TGI service>`
 - `GOOGLE_CSE_ID`: your CSE ID for Google Search Engine, usually generated [here](https://programmablesearchengine.google.com/controlpanel/all)
 - `GOOGLE_API_KEY`: your API key for Google Search Engine, usually generated [here](https://console.cloud.google.com/apis/credentials)
- `HUGGINGFACEHUB_API_TOKEN`: your HuggingFace hub API token, usually generated [here](https://huggingface.co/settings/tokens)
+- `HF_TOKEN`: your HuggingFace hub API token, usually generated [here](https://huggingface.co/settings/tokens)
 - `-p 8085:8000`: This will map the 8000 port of the SearchQnA service inside the container to the 8085 port on the host

 3. Quick test
--- a/SearchQnA/tests/test_langchain_inference.sh
+++ b/SearchQnA/tests/test_langchain_inference.sh
@@ -48,7 +48,7 @@ function launch_langchain_service() {

    tgi_ip_name=$(echo $(hostname) | tr '[a-z]-' '[A-Z]_')_$(echo 'IP')
    tgi_ip=$(eval echo '$'$tgi_ip_name)
-    docker run -d --name=${LANGCHAIN_CONTAINER_NAME} -e TGI_ENDPOINT=http://${tgi_ip}:8870 -e GOOGLE_CSE_ID=${GOOGLE_CSE_ID} -e GOOGLE_API_KEY=${GOOGLE_API_KEY} -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} \
+    docker run -d --name=${LANGCHAIN_CONTAINER_NAME} -e TGI_ENDPOINT=http://${tgi_ip}:8870 -e GOOGLE_CSE_ID=${GOOGLE_CSE_ID} -e GOOGLE_API_KEY=${GOOGLE_API_KEY} -e HF_TOKEN=${HF_TOKEN} \
    -p ${port}:8000 --runtime=habana -e HABANA_VISIBE_DEVILCES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host intel/gen-ai-examples:${LANGCHAIN_CONTAINER_NAME}

    sleep 2m
--- a/Translation/README.md
+++ b/Translation/README.md
@@ -23,13 +23,13 @@ bash launch_tgi_service.sh
 ```sh
 cd langchain/docker
 bash build_docker.sh
-docker run -it --name translation_server --net=host --ipc=host -e TGI_ENDPOINT=${TGI_ENDPOINT} -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -e SERVER_PORT=8000 -e http_proxy=${http_proxy} -e https_proxy=${https_proxy} translation:latest bash
+docker run -it --name translation_server --net=host --ipc=host -e TGI_ENDPOINT=${TGI_ENDPOINT} -e HF_TOKEN=${HF_TOKEN} -e SERVER_PORT=8000 -e http_proxy=${http_proxy} -e https_proxy=${https_proxy} translation:latest bash
 ```

 **Note**: Set the following parameters before running the above command

 - `TGI_ENDPOINT`: The endpoint of your TGI service, usually equal to `<ip of your machine>:<port of your TGI service>`.
- `HUGGINGFACEHUB_API_TOKEN`: Your HuggingFace hub API token, usually generated [here](https://huggingface.co/settings/tokens).
+- `HF_TOKEN`: Your HuggingFace hub API token, usually generated [here](https://huggingface.co/settings/tokens).
 - `SERVER_PORT`: The port of the Translation service on the host.

 3. Quick Test
--- a/Translation/tests/test_langchain_inference.sh
+++ b/Translation/tests/test_langchain_inference.sh
@@ -46,7 +46,7 @@ function launch_langchain_service() {
    cd langchain/docker
    docker build . --build-arg http_proxy=${http_proxy} --build-arg https_proxy=${http_proxy} -t intel/gen-ai-examples:${LANGCHAIN_CONTAINER_NAME}

-    docker run -d --name=${LANGCHAIN_CONTAINER_NAME} --net=host -e TGI_ENDPOINT=http://localhost:8870 -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} \
+    docker run -d --name=${LANGCHAIN_CONTAINER_NAME} --net=host -e TGI_ENDPOINT=http://localhost:8870 -e HF_TOKEN=${HF_TOKEN} \
    -e SERVER_PORT=${port} -e http_proxy=${http_proxy} -e https_proxy=${https_proxy} --ipc=host intel/gen-ai-examples:${LANGCHAIN_CONTAINER_NAME}
    sleep 2m
 }