Revert "HUGGINGFACEHUB_API_TOKEN environment is change to HF_TOKEN (#… (#1521)

Revert this PR since the test is not triggered properly due to the false merge of a WIP CI PR, 44a689b0bf, which block the CI test. This change will be submitted in another PR.
2025-02-11 18:36:12 +08:00
parent 47069ac70c
commit 81b02bb947
69 changed files with 113 additions and 263 deletions
--- a/ChatQnA/docker_compose/intel/cpu/aipc/README.md
+++ b/ChatQnA/docker_compose/intel/cpu/aipc/README.md
@@ -105,7 +105,7 @@ export https_proxy=${your_http_proxy}
 export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
 export RERANK_MODEL_ID="BAAI/bge-reranker-base"
 export INDEX_NAME="rag-redis"
-export HF_TOKEN=${your_hf_api_token}
+export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
 export OLLAMA_HOST=${host_ip}
 export OLLAMA_MODEL="llama3.2"
 ```
@@ -116,7 +116,7 @@ export OLLAMA_MODEL="llama3.2"
 set EMBEDDING_MODEL_ID=BAAI/bge-base-en-v1.5
 set RERANK_MODEL_ID=BAAI/bge-reranker-base
 set INDEX_NAME=rag-redis
-set HF_TOKEN=%your_hf_api_token%
+set HUGGINGFACEHUB_API_TOKEN=%your_hf_api_token%
 set OLLAMA_HOST=host.docker.internal
 set OLLAMA_MODEL="llama3.2"
 ```
--- a/ChatQnA/docker_compose/intel/cpu/aipc/compose.yaml
+++ b/ChatQnA/docker_compose/intel/cpu/aipc/compose.yaml
@@ -24,8 +24,7 @@ services:
      REDIS_HOST: redis-vector-db
      INDEX_NAME: ${INDEX_NAME}
      TEI_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
-      HF_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
  tei-embedding-service:
    image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
    container_name: tei-embedding-server
@@ -55,8 +54,7 @@ services:
      REDIS_HOST: redis-vector-db
      INDEX_NAME: ${INDEX_NAME}
      TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
-      HF_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      LOGFLAG: ${LOGFLAG}
      RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS"
    restart: unless-stopped
@@ -72,8 +70,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
-      HF_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
    command: --model-id ${RERANK_MODEL_ID} --auto-truncate
--- a/ChatQnA/docker_compose/intel/cpu/aipc/set_env.sh
+++ b/ChatQnA/docker_compose/intel/cpu/aipc/set_env.sh
@@ -7,11 +7,15 @@ pushd "../../../../../" > /dev/null
 source .set_env.sh
 popd > /dev/null

-if [ -z "${HF_TOKEN}" ]; then
-    echo "Error: HF_TOKEN is not set. Please set HF_TOKEN."
+if [ -z "${your_hf_api_token}" ]; then
+    echo "Error: HUGGINGFACEHUB_API_TOKEN is not set. Please set your_hf_api_token."
 fi

-export host_ip=$(hostname -I | awk '{print $1}')
+if [ -z "${host_ip}" ]; then
+    echo "Error: host_ip is not set. Please set host_ip first."
+fi
+
+export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
 export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
 export RERANK_MODEL_ID="BAAI/bge-reranker-base"
 export INDEX_NAME="rag-redis"
--- a/ChatQnA/docker_compose/intel/cpu/xeon/README.md
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/README.md
@@ -21,7 +21,7 @@ To set up environment variables for deploying ChatQnA services, follow these ste
   ```bash
   # Example: host_ip="192.168.1.1"
   export host_ip="External_Public_IP"
-   export HF_TOKEN="Your_Huggingface_API_Token"
+   export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
   ```

 2. If you are in a proxy environment, also set the proxy-related environment variables:
@@ -236,7 +236,7 @@ For users in China who are unable to download models directly from Huggingface,
   ```bash
   # Example: host_ip="192.168.1.1"
   export host_ip="External_Public_IP"
-   export HF_TOKEN="Your_Huggingface_API_Token"
+   export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
   # Example: NGINX_PORT=80
   export NGINX_PORT=${your_nginx_port}
   ```
--- a/ChatQnA/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose.yaml
@@ -24,8 +24,7 @@ services:
      REDIS_HOST: redis-vector-db
      INDEX_NAME: ${INDEX_NAME}
      TEI_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
-      HF_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
  tei-embedding-service:
    image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
    container_name: tei-embedding-server
@@ -55,8 +54,7 @@ services:
      REDIS_HOST: redis-vector-db
      INDEX_NAME: ${INDEX_NAME}
      TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
-      HF_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      LOGFLAG: ${LOGFLAG}
      RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS"
    restart: unless-stopped
@@ -72,8 +70,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
-      HF_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      HF_HUB_DISABLE_PROGRESS_BARS: 1
      HF_HUB_ENABLE_HF_TRANSFER: 0
    command: --model-id ${RERANK_MODEL_ID} --auto-truncate
--- a/ChatQnA/docker_compose/intel/cpu/xeon/set_env.sh
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/set_env.sh
@@ -7,11 +7,6 @@ pushd "../../../../../" > /dev/null
 source .set_env.sh
 popd > /dev/null

-if [ -z "$HF_TOKEN" ]; then
-    echo "Error: The HF_TOKEN environment variable is **NOT** set. Please set it"
-    return -1
-fi
-
 export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
 export RERANK_MODEL_ID="BAAI/bge-reranker-base"
 export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
--- a/ChatQnA/docker_compose/intel/hpu/gaudi/README.md
+++ b/ChatQnA/docker_compose/intel/hpu/gaudi/README.md
@@ -21,7 +21,7 @@ To set up environment variables for deploying ChatQnA services, follow these ste
   ```bash
   # Example: host_ip="192.168.1.1"
   export host_ip="External_Public_IP"
-   export HF_TOKEN="Your_Huggingface_API_Token"
+   export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
   ```

 2. If you are in a proxy environment, also set the proxy-related environment variables:
@@ -203,9 +203,9 @@ For users in China who are unable to download models directly from Huggingface,
   export HF_ENDPOINT="https://hf-mirror.com"
   model_name="meta-llama/Meta-Llama-3-8B-Instruct"
   # Start vLLM LLM Service
-   docker run -p 8007:80 -v ./data:/data --name vllm-gaudi-server -e HF_ENDPOINT=$HF_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HUGGING_FACE_HUB_TOKEN=$HF_TOKEN -e HF_TOKEN=$HF_TOKEN -e VLLM_TORCH_PROFILER_DIR="/mnt" --cap-add=sys_nice --ipc=host opea/vllm-gaudi:latest --model $model_name --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048
+   docker run -p 8007:80 -v ./data:/data --name vllm-gaudi-server -e HF_ENDPOINT=$HF_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HUGGING_FACE_HUB_TOKEN=$HF_TOKEN -e VLLM_TORCH_PROFILER_DIR="/mnt" --cap-add=sys_nice --ipc=host opea/vllm-gaudi:latest --model $model_name --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048
   # Start TGI LLM Service
-   docker run -p 8005:80 -v ./data:/data --name tgi-gaudi-server -e HF_ENDPOINT=$HF_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HUGGING_FACE_HUB_TOKEN=$HF_TOKEN -e HF_TOKEN=$HF_TOKEN -e ENABLE_HPU_GRAPH=true -e LIMIT_HPU_GRAPH=true -e USE_FLASH_ATTENTION=true -e FLASH_ATTENTION_RECOMPUTE=true --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:2.0.6 --model-id $model_name --max-input-tokens 1024 --max-total-tokens 2048
+   docker run -p 8005:80 -v ./data:/data --name tgi-gaudi-server -e HF_ENDPOINT=$HF_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HUGGING_FACE_HUB_TOKEN=$HF_TOKEN -e ENABLE_HPU_GRAPH=true -e LIMIT_HPU_GRAPH=true -e USE_FLASH_ATTENTION=true -e FLASH_ATTENTION_RECOMPUTE=true --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:2.0.6 --model-id $model_name --max-input-tokens 1024 --max-total-tokens 2048
   ```

 2. Offline
@@ -220,9 +220,9 @@ For users in China who are unable to download models directly from Huggingface,
     export HF_TOKEN=${your_hf_token}
     export model_path="/path/to/model"
     # Start vLLM LLM Service
-     docker run -p 8007:80 -v $model_path:/data --name vllm-gaudi-server --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HUGGING_FACE_HUB_TOKEN=$HF_TOKEN -e HF_TOKEN=$HF_TOKEN -e VLLM_TORCH_PROFILER_DIR="/mnt" --cap-add=sys_nice --ipc=host opea/vllm-gaudi:latest --model /data --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048
+     docker run -p 8007:80 -v $model_path:/data --name vllm-gaudi-server --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HUGGING_FACE_HUB_TOKEN=$HF_TOKEN -e VLLM_TORCH_PROFILER_DIR="/mnt" --cap-add=sys_nice --ipc=host opea/vllm-gaudi:latest --model /data --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048
     # Start TGI LLM Service
-     docker run -p 8005:80 -v $model_path:/data --name tgi-gaudi-server --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HUGGING_FACE_HUB_TOKEN=$HF_TOKEN -e HF_TOKEN=$HF_TOKEN -e ENABLE_HPU_GRAPH=true -e LIMIT_HPU_GRAPH=true -e USE_FLASH_ATTENTION=true -e FLASH_ATTENTION_RECOMPUTE=true --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:2.0.6 --model-id /data --max-input-tokens 1024 --max-total-tokens 2048
+     docker run -p 8005:80 -v $model_path:/data --name tgi-gaudi-server --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HUGGING_FACE_HUB_TOKEN=$HF_TOKEN -e ENABLE_HPU_GRAPH=true -e LIMIT_HPU_GRAPH=true -e USE_FLASH_ATTENTION=true -e FLASH_ATTENTION_RECOMPUTE=true --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:2.0.6 --model-id /data --max-input-tokens 1024 --max-total-tokens 2048
     ```

 ### Setup Environment Variables
@@ -232,7 +232,7 @@ For users in China who are unable to download models directly from Huggingface,
   ```bash
   # Example: host_ip="192.168.1.1"
   export host_ip="External_Public_IP"
-   export HF_TOKEN="Your_Huggingface_API_Token"
+   export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
   # Example: NGINX_PORT=80
   export NGINX_PORT=${your_nginx_port}
   ```
--- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -24,8 +24,7 @@ services:
      REDIS_HOST: redis-vector-db
      INDEX_NAME: ${INDEX_NAME}
      TEI_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
-      HF_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
  tei-embedding-service:
    image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
    container_name: tei-embedding-gaudi-server
@@ -55,8 +54,7 @@ services:
      REDIS_HOST: redis-vector-db
      INDEX_NAME: ${INDEX_NAME}
      TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
-      HF_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
    restart: unless-stopped
  tei-reranking-service:
    image: ghcr.io/huggingface/tei-gaudi:1.5.0
@@ -90,7 +88,7 @@ services:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      HABANA_VISIBLE_DEVICES: all
      OMPI_MCA_btl_vader_single_copy_mechanism: none
      LLM_MODEL_ID: ${LLM_MODEL_ID}
--- a/ChatQnA/docker_compose/intel/hpu/gaudi/set_env.sh
+++ b/ChatQnA/docker_compose/intel/hpu/gaudi/set_env.sh
@@ -6,10 +6,6 @@ pushd "../../../../../" > /dev/null
 source .set_env.sh
 popd > /dev/null

-if [ -z "$HF_TOKEN" ]; then
-    echo "Error: The HF_TOKEN environment variable is **NOT** set. Please set it"
-    return -1
-fi

 export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
 export RERANK_MODEL_ID="BAAI/bge-reranker-base"