Fix huggingface hub token environment variable (#214)

Signed-off-by: lvliang-intel <liang1.lv@intel.com>
This commit is contained in:
lvliang-intel
2024-05-30 16:04:59 +08:00
committed by GitHub
parent f23acc0077
commit 9d3bc0e00c
59 changed files with 90 additions and 90 deletions

View File

@@ -37,21 +37,21 @@ jobs:
- name: Run Test ASR
env:
HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
HF_TOKEN: ${{ secrets.HF_TOKEN }}
run: |
cd ${{ github.workspace }}/AudioQnA/tests
bash test_asr.sh
- name: Run Test TTS
env:
HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
HF_TOKEN: ${{ secrets.HF_TOKEN }}
run: |
cd ${{ github.workspace }}/AudioQnA/tests
bash test_tts.sh
- name: Run Test LLM engine
env:
HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
HF_TOKEN: ${{ secrets.HF_TOKEN }}
run: |
cd ${{ github.workspace }}/AudioQnA/tests
bash test_${{ matrix.job_name }}_inference.sh

View File

@@ -76,7 +76,7 @@ jobs:
- name: Run test
env:
HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
HF_TOKEN: ${{ secrets.HF_TOKEN }}
example: ${{ matrix.example }}
hardware: ${{ matrix.hardware }}
run: |

View File

@@ -36,7 +36,7 @@ jobs:
ref: "refs/pull/${{ github.event.number }}/merge"
- name: Run Test
env:
HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
HF_TOKEN: ${{ secrets.HF_TOKEN }}
GOOGLE_CSE_ID: ${{ secrets.GOOGLE_CSE_ID }}
GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
AISE_GAUDI_00_IP: ${{ secrets.AISE_GAUDI_00_IP }}

View File

@@ -37,7 +37,7 @@ jobs:
- name: Run Test
env:
HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
HF_TOKEN: ${{ secrets.HF_TOKEN }}
run: |
cd ${{ github.workspace }}/Translation/tests
bash test_${{ matrix.job_name }}_inference.sh

View File

@@ -37,7 +37,7 @@ jobs:
- name: Run Test
env:
HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
HF_TOKEN: ${{ secrets.HF_TOKEN }}
run: |
cd ${{ github.workspace }}/VisualQnA/tests
bash test_${{ matrix.job_name }}_inference.sh

View File

@@ -83,12 +83,12 @@ bash ./serving/tgi_gaudi/build_docker.sh
bash ./serving/tgi_gaudi/launch_tgi_service.sh
```
For gated models such as `LLAMA-2`, you will have to pass -e HUGGING_FACE_HUB_TOKEN=\<token\> to the docker run command above with a valid Hugging Face Hub read token.
For gated models such as `LLAMA-2`, you will have to pass -e HF_TOKEN=\<token\> to the docker run command above with a valid Hugging Face Hub read token.
Please follow this link [huggingface token](https://huggingface.co/docs/hub/security-tokens) to get the access token and export `HUGGINGFACEHUB_API_TOKEN` environment with the token.
Please follow this link [huggingface token](https://huggingface.co/docs/hub/security-tokens) to get the access token and export `HF_TOKEN` environment with the token.
```bash
export HUGGINGFACEHUB_API_TOKEN=<token>
export HF_TOKEN=<token>
```
### Launch a local server instance on 8 Gaudi cards:
@@ -147,7 +147,7 @@ Note: If you want to integrate the TEI service into the LangChain application, y
## Launch Redis and LangChain Backend Service
Update the `HUGGINGFACEHUB_API_TOKEN` environment variable with your huggingface token in the `docker-compose.yml`
Update the `HF_TOKEN` environment variable with your huggingface token in the `docker-compose.yml`
```bash
cd langchain/docker
@@ -180,7 +180,7 @@ We offer content moderation support utilizing Meta's [Llama Guard](https://huggi
```bash
volume=$PWD/data
model_id="meta-llama/LlamaGuard-7b"
docker run -p 8088:80 -v $volume:/data --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HUGGING_FACE_HUB_TOKEN=<your HuggingFace token> -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy tgi_gaudi --model-id $model_id
docker run -p 8088:80 -v $volume:/data --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HF_TOKEN=<your HuggingFace token> -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy tgi_gaudi --model-id $model_id
export SAFETY_GUARD_ENDPOINT="http://xxx.xxx.xxx.xxx:8088"
```

View File

@@ -28,7 +28,7 @@ services:
container_name: qna-rag-redis-server
environment:
- https_proxy=${https_proxy}
- HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
- HF_TOKEN=${HF_TOKEN}
- "REDIS_PORT=6379"
- "EMBED_MODEL=BAAI/bge-base-en-v1.5"
- "REDIS_SCHEMA=schema_dim_768.yml"

View File

@@ -79,11 +79,11 @@ For the System Management Interface Tool please check [hl-smi](https://docs.haba
## Docker command for 70B model
```bash
docker run -p 8080:80 -v $volume:/data --runtime=habana -e HUGGING_FACE_HUB_TOKEN=$HUGGINGFACEHUB_API_TOKEN -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e HABANA_VISIBLE_DEVICES="6,7,4,5" -e HABANA_VISIBLE_MODULES="0,1,2,3" -e BATCH_BUCKET_SIZE=22 -e PREFILL_BATCH_BUCKET_SIZE=1 -e MAX_BATCH_PREFILL_TOKENS=5102 -e MAX_BATCH_TOTAL_TOKENS=32256 -e MAX_INPUT_LENGTH=1024 -e PAD_SEQUENCE_TO_MULTIPLE_OF=1024 -e MAX_WAITING_TOKENS=5 -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host tgi_gaudi --model-id $model --sharded true --num-shard 4
docker run -p 8080:80 -v $volume:/data --runtime=habana -e HF_TOKEN=$HF_TOKEN -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e HABANA_VISIBLE_DEVICES="6,7,4,5" -e HABANA_VISIBLE_MODULES="0,1,2,3" -e BATCH_BUCKET_SIZE=22 -e PREFILL_BATCH_BUCKET_SIZE=1 -e MAX_BATCH_PREFILL_TOKENS=5102 -e MAX_BATCH_TOTAL_TOKENS=32256 -e MAX_INPUT_LENGTH=1024 -e PAD_SEQUENCE_TO_MULTIPLE_OF=1024 -e MAX_WAITING_TOKENS=5 -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host tgi_gaudi --model-id $model --sharded true --num-shard 4
```
## Docker command for 13B model
```bash
docker run -p 8080:80 -v $volume:/data --runtime=habana -e HUGGING_FACE_HUB_TOKEN=$HUGGINGFACEHUB_API_TOKEN -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e PAD_SEQUENCE_TO_MULTIPLE_OF=128 -e HABANA_VISIBLE_DEVICES="4" -e BATCH_BUCKET_SIZE=16 -e PREFILL_BATCH_BUCKET_SIZE=1 -e MAX_BATCH_PREFILL_TOKENS=4096 -e MAX_BATCH_TOTAL_TOKENS=18432 -e PAD_SEQUENCE_TO_MULTIPLE_OF=1024 -e MAX_INPUT_LENGTH=1024 -e MAX_TOTAL_TOKENS=1152 -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host tgi_gaudi --model-id $model
docker run -p 8080:80 -v $volume:/data --runtime=habana -e HF_TOKEN=$HF_TOKEN -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e PAD_SEQUENCE_TO_MULTIPLE_OF=128 -e HABANA_VISIBLE_DEVICES="4" -e BATCH_BUCKET_SIZE=16 -e PREFILL_BATCH_BUCKET_SIZE=1 -e MAX_BATCH_PREFILL_TOKENS=4096 -e MAX_BATCH_TOTAL_TOKENS=18432 -e PAD_SEQUENCE_TO_MULTIPLE_OF=1024 -e MAX_INPUT_LENGTH=1024 -e MAX_TOTAL_TOKENS=1152 -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host tgi_gaudi --model-id $model
```

View File

@@ -52,7 +52,7 @@ function launch_tgi_gaudi_service() {
function launch_redis_and_langchain_service() {
cd $WORKPATH
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export HF_TOKEN=${HF_TOKEN}
local port=8890
sed -i "s/port=8000/port=$port/g" langchain/docker/qna-app/app/server.py
docker compose -f langchain/docker/docker-compose.yml up -d --build

View File

@@ -66,7 +66,7 @@ opea_micro_services:
- SYS_NICE
ipc: host
environment:
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_TOKEN: ${HF_TOKEN}
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
model-id: ${LLM_MODEL_ID}

View File

@@ -51,12 +51,12 @@ bash ./serving/tgi_gaudi/build_docker.sh
bash ./serving/tgi_gaudi/launch_tgi_service.sh
```
For gated models such as `LLAMA-2`, you will have to pass -e HUGGING_FACE_HUB_TOKEN=\<token\> to the docker run command above with a valid Hugging Face Hub read token.
For gated models such as `LLAMA-2`, you will have to pass -e HF_TOKEN=\<token\> to the docker run command above with a valid Hugging Face Hub read token.
Please follow this link [huggingface token](https://huggingface.co/docs/hub/security-tokens) to get the access token and export `HUGGINGFACEHUB_API_TOKEN` environment with the token.
Please follow this link [huggingface token](https://huggingface.co/docs/hub/security-tokens) to get the access token and export `HF_TOKEN` environment with the token.
```bash
export HUGGINGFACEHUB_API_TOKEN=<token>
export HF_TOKEN=<token>
```
### Launch a local server instance on 8 Gaudi cards:
@@ -115,7 +115,7 @@ Note: If you want to integrate the TEI service into the LangChain application, y
## Launch Vector Database and LangChain Backend Service
Update the `HUGGINGFACEHUB_API_TOKEN` environment variable with your huggingface token in the `docker-compose.yml`
Update the `HF_TOKEN` environment variable with your huggingface token in the `docker-compose.yml`
By default, Redis is used as the vector store. To use Qdrant, use the `docker-compose-qdrant.yml` file instead.
@@ -153,7 +153,7 @@ We offer content moderation support utilizing Meta's [Llama Guard](https://huggi
```bash
volume=$PWD/data
model_id="meta-llama/LlamaGuard-7b"
docker run -p 8088:80 -v $volume:/data --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HUGGING_FACE_HUB_TOKEN=<your HuggingFace token> -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy tgi_gaudi --model-id $model_id
docker run -p 8088:80 -v $volume:/data --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HF_TOKEN=<your HuggingFace token> -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy tgi_gaudi --model-id $model_id
export SAFETY_GUARD_ENDPOINT="http://xxx.xxx.xxx.xxx:8088"
```

View File

@@ -1,4 +1,4 @@
HUGGING_FACE_HUB_TOKEN=<your-hf-token>
HF_TOKEN=<your-hf-token>
volume=./data
model=meta-llama/Llama-2-13b-chat-hf
MAX_TOTAL_TOKENS=2000

View File

@@ -30,7 +30,7 @@ services:
container_name: qna-rag-qdrant-server
environment:
- https_proxy=${https_proxy}
- HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
- HF_TOKEN=${HF_TOKEN}
- "EMBED_MODEL=BAAI/bge-base-en-v1.5"
- "VECTOR_DATABASE=QDRANT"
- "TGI_LLM_ENDPOINT=http://localhost:8080"

View File

@@ -38,7 +38,7 @@ services:
- socks_proxy=${socks_proxy}
- FTP_PROXY=${FTP_PROXY}
- ftp_proxy=${ftp_proxy}
- HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
- HF_TOKEN=${HF_TOKEN}
- CONFLUENCE_ACCESS_TOKEN=${CONFLUENCE_ACCESS_TOKEN}
- "REDIS_PORT=6379"
- "EMBED_MODEL=BAAI/bge-base-en-v1.5"

View File

@@ -242,7 +242,7 @@ if __name__ == "__main__":
tokenizer = AutoTokenizer.from_pretrained(args.model_name)
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
os.environ["LANGCHAIN_API_KEY"] = args.langchain_token
os.environ["HUGGINGFACEHUB_API_TOKEN"] = args.huggingface_token
os.environ["HF_TOKEN"] = args.huggingface_token
chain = buildchain(args)
run_test(args, chain)

View File

@@ -79,11 +79,11 @@ For the System Management Interface Tool please check [hl-smi](https://docs.haba
## Docker command for 70B model
```bash
docker run -p 8080:80 -v $volume:/data --runtime=habana -e HUGGING_FACE_HUB_TOKEN=$HUGGINGFACEHUB_API_TOKEN -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e HABANA_VISIBLE_DEVICES="6,7,4,5" -e HABANA_VISIBLE_MODULES="0,1,2,3" -e BATCH_BUCKET_SIZE=22 -e PREFILL_BATCH_BUCKET_SIZE=1 -e MAX_BATCH_PREFILL_TOKENS=5102 -e MAX_BATCH_TOTAL_TOKENS=32256 -e MAX_INPUT_LENGTH=1024 -e PAD_SEQUENCE_TO_MULTIPLE_OF=1024 -e MAX_WAITING_TOKENS=5 -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host tgi_gaudi --model-id $model --sharded true --num-shard 4
docker run -p 8080:80 -v $volume:/data --runtime=habana -e HF_TOKEN=$HF_TOKEN -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e HABANA_VISIBLE_DEVICES="6,7,4,5" -e HABANA_VISIBLE_MODULES="0,1,2,3" -e BATCH_BUCKET_SIZE=22 -e PREFILL_BATCH_BUCKET_SIZE=1 -e MAX_BATCH_PREFILL_TOKENS=5102 -e MAX_BATCH_TOTAL_TOKENS=32256 -e MAX_INPUT_LENGTH=1024 -e PAD_SEQUENCE_TO_MULTIPLE_OF=1024 -e MAX_WAITING_TOKENS=5 -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host tgi_gaudi --model-id $model --sharded true --num-shard 4
```
## Docker command for 13B model
```bash
docker run -p 8080:80 -v $volume:/data --runtime=habana -e HUGGING_FACE_HUB_TOKEN=$HUGGINGFACEHUB_API_TOKEN -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e PAD_SEQUENCE_TO_MULTIPLE_OF=128 -e HABANA_VISIBLE_DEVICES="4" -e BATCH_BUCKET_SIZE=16 -e PREFILL_BATCH_BUCKET_SIZE=1 -e MAX_BATCH_PREFILL_TOKENS=4096 -e MAX_BATCH_TOTAL_TOKENS=18432 -e PAD_SEQUENCE_TO_MULTIPLE_OF=1024 -e MAX_INPUT_LENGTH=1024 -e MAX_TOTAL_TOKENS=1152 -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host tgi_gaudi --model-id $model
docker run -p 8080:80 -v $volume:/data --runtime=habana -e HF_TOKEN=$HF_TOKEN -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e PAD_SEQUENCE_TO_MULTIPLE_OF=128 -e HABANA_VISIBLE_DEVICES="4" -e BATCH_BUCKET_SIZE=16 -e PREFILL_BATCH_BUCKET_SIZE=1 -e MAX_BATCH_PREFILL_TOKENS=4096 -e MAX_BATCH_TOTAL_TOKENS=18432 -e PAD_SEQUENCE_TO_MULTIPLE_OF=1024 -e MAX_INPUT_LENGTH=1024 -e MAX_TOTAL_TOKENS=1152 -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host tgi_gaudi --model-id $model
```

View File

@@ -52,7 +52,7 @@ function launch_tgi_gaudi_service() {
function launch_redis_and_langchain_service() {
cd $WORKPATH
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export HF_TOKEN=${HF_TOKEN}
local port=8890
sed -i "s/port=8000/port=$port/g" langchain/docker/qna-app/app/server.py
docker compose -f langchain/docker/docker-compose.yml up -d --build

View File

@@ -100,7 +100,7 @@ export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
export TGI_LLM_ENDPOINT="http://${host_ip}:8008"
export REDIS_URL="redis://${host_ip}:6379"
export INDEX_NAME="rag-redis"
export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
export HF_TOKEN=${your_hf_api_token}
export MEGA_SERVICE_HOST_IP=${host_ip}
export EMBEDDING_SERVICE_HOST_IP=${host_ip}
export RETRIEVER_SERVICE_HOST_IP=${host_ip}

View File

@@ -107,7 +107,7 @@ services:
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_TOKEN: ${HF_TOKEN}
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
LANGCHAIN_PROJECT: "opea-reranking-service"
@@ -141,7 +141,7 @@ services:
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_TOKEN: ${HF_TOKEN}
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
LANGCHAIN_PROJECT: "opea-llm-service"

View File

@@ -158,7 +158,7 @@ export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
export TGI_LLM_ENDPOINT="http://${host_ip}:9009"
export REDIS_URL="redis://${host_ip}:6379"
export INDEX_NAME="rag-redis"
export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
export HF_TOKEN=${your_hf_api_token}
export MEGA_SERVICE_HOST_IP=${host_ip}
export EMBEDDING_SERVICE_HOST_IP=${host_ip}
export RETRIEVER_SERVICE_HOST_IP=${host_ip}

View File

@@ -103,7 +103,7 @@ services:
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_TOKEN: ${HF_TOKEN}
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
LANGCHAIN_PROJECT: "opea-reranking-service"
@@ -132,7 +132,7 @@ services:
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_TOKEN: ${HF_TOKEN}
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
LANGCHAIN_PROJECT: "opea-llm-service"

View File

@@ -26,7 +26,7 @@ For Gaudi:
> [NOTE]
- Be sure to modify HUGGINGFACEHUB_API_TOKEN and other important values in qna_configmap_guadi.yaml and qna_configmap_xeon.yaml
- Be sure to modify HF_TOKEN and other important values in qna_configmap_guadi.yaml and qna_configmap_xeon.yaml
- Be sure the node has path /mnt/models to store all the models
### Deploy

View File

@@ -14,7 +14,7 @@ data:
TGI_LLM_ENDPOINT: "http://tgi-gaudi-svc.default.svc.cluster.local:9009"
REDIS_URL: "redis://redis-vector-db.default.svc.cluster.local:6379"
INDEX_NAME: "rag-redis"
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_TOKEN: ${HF_TOKEN}
EMBEDDING_SERVICE_HOST_IP: embedding-svc
RETRIEVER_SERVICE_HOST_IP: retriever-svc
RERANK_SERVICE_HOST_IP: reranking-svc

View File

@@ -14,7 +14,7 @@ data:
TGI_LLM_ENDPOINT: "http://tgi-svc.default.svc.cluster.local:9009"
REDIS_URL: "redis://redis-vector-db.default.svc.cluster.local:6379"
INDEX_NAME: "rag-redis"
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_TOKEN: ${HF_TOKEN}
EMBEDDING_SERVICE_HOST_IP: embedding-svc
RETRIEVER_SERVICE_HOST_IP: retriever-svc
RERANK_SERVICE_HOST_IP: reranking-svc

View File

@@ -47,7 +47,7 @@ function start_services() {
export TGI_LLM_ENDPOINT="http://${ip_address}:8008"
export REDIS_URL="redis://${ip_address}:6379"
export INDEX_NAME="rag-redis"
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export HF_TOKEN=${HF_TOKEN}
export MEGA_SERVICE_HOST_IP=${ip_address}
export EMBEDDING_SERVICE_HOST_IP=${ip_address}
export RETRIEVER_SERVICE_HOST_IP=${ip_address}

View File

@@ -39,7 +39,7 @@ function start_services() {
export TGI_LLM_ENDPOINT="http://${ip_address}:9009"
export REDIS_URL="redis://${ip_address}:6379"
export INDEX_NAME="rag-redis"
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export HF_TOKEN=${HF_TOKEN}
export MEGA_SERVICE_HOST_IP=${ip_address}
export EMBEDDING_SERVICE_HOST_IP=${ip_address}
export RETRIEVER_SERVICE_HOST_IP=${ip_address}

View File

@@ -24,7 +24,7 @@ opea_micro_services:
- SYS_NICE
ipc: host
environment:
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_TOKEN: ${HF_TOKEN}
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
model-id: ${LLM_MODEL_ID}
@@ -35,7 +35,7 @@ opea_micro_services:
endpoint: /v1/chat/completions
environment:
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_TOKEN: ${HF_TOKEN}
ui:
host: ${UI_SERVICE_HOST_IP}
ports:

View File

@@ -86,10 +86,10 @@ docker run -it -e http_proxy=${http_proxy} -e https_proxy=${https_proxy} --net=h
Make sure TGI-Gaudi service is running and also make sure data is populated into Redis. Launch the backend service:
Please follow this link [huggingface token](https://huggingface.co/docs/hub/security-tokens) to get the access token and export `HUGGINGFACEHUB_API_TOKEN` environment with the token.
Please follow this link [huggingface token](https://huggingface.co/docs/hub/security-tokens) to get the access token and export `HF_TOKEN` environment with the token.
```bash
export HUGGINGFACEHUB_API_TOKEN=<token>
export HF_TOKEN=<token>
nohup python server.py &
```

View File

@@ -63,7 +63,7 @@ function launch_server() {
# Start the Backend Service
docker exec $COPILOT_CONTAINER_NAME \
bash -c "export HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN;nohup python server.py &"
bash -c "export HF_TOKEN=$HF_TOKEN;nohup python server.py &"
sleep 1m
}

View File

@@ -55,7 +55,7 @@ export http_proxy=${your_http_proxy}
export https_proxy=${your_http_proxy}
export LLM_MODEL_ID="meta-llama/CodeLlama-7b-hf"
export TGI_LLM_ENDPOINT="http://${host_ip}:8028"
export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
export HF_TOKEN=${your_hf_api_token}
export MEGA_SERVICE_HOST_IP=${host_ip}
export LLM_SERVICE_HOST_IP=${host_ip}
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:6666/v1/codegen"

View File

@@ -44,7 +44,7 @@ services:
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_TOKEN: ${HF_TOKEN}
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
LANGCHAIN_PROJECT: "opea-llm-service"

View File

@@ -63,7 +63,7 @@ export http_proxy=${your_http_proxy}
export https_proxy=${your_http_proxy}
export LLM_MODEL_ID="meta-llama/CodeLlama-7b-hf"
export TGI_LLM_ENDPOINT="http://${host_ip}:8028"
export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
export HF_TOKEN=${your_hf_api_token}
export MEGA_SERVICE_HOST_IP=${host_ip}
export LLM_SERVICE_HOST_IP=${host_ip}
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:6666/v1/codegen"

View File

@@ -26,7 +26,7 @@ services:
environment:
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_TOKEN: ${HF_TOKEN}
command: --model-id ${LLM_MODEL_ID}
llm:
image: opea/llm-tgi:latest
@@ -40,7 +40,7 @@ services:
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_TOKEN: ${HF_TOKEN}
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
LANGCHAIN_PROJECT: "opea-llm-service"

View File

@@ -2,15 +2,15 @@
> [NOTE]
> The following values must be set before you can deploy:
> HUGGINGFACEHUB_API_TOKEN
> HF_TOKEN
> You can also customize the "MODEL_ID" and "model-volume"
## Deploy On Xeon
```
cd GenAIExamples/CodeGen/kubernetes/manifests/xeon
export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" codegen.yaml
export HF_TOKEN="YourOwnToken"
sed -i "s/insert-your-huggingface-token-here/${HF_TOKEN}/g" codegen.yaml
kubectl apply -f codegen.yaml
```
@@ -18,8 +18,8 @@ kubectl apply -f codegen.yaml
```
cd GenAIExamples/CodeGen/kubernetes/manifests/gaudi
export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" codegen.yaml
export HF_TOKEN="YourOwnToken"
sed -i "s/insert-your-huggingface-token-here/${HF_TOKEN}/g" codegen.yaml
kubectl apply -f codegen.yaml
```

View File

@@ -143,7 +143,7 @@ spec:
env:
- name: TGI_LLM_ENDPOINT
value: "http://codegen-tgi:80"
- name: HUGGINGFACEHUB_API_TOKEN
- name: HF_TOKEN
value: "insert-your-huggingface-token-here"
- name: http_proxy
value:

View File

@@ -141,7 +141,7 @@ spec:
env:
- name: TGI_LLM_ENDPOINT
value: "http://codegen-tgi:80"
- name: HUGGINGFACEHUB_API_TOKEN
- name: HF_TOKEN
value: "insert-your-huggingface-token-here"
- name: http_proxy
value:

View File

@@ -41,7 +41,7 @@ function start_services() {
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
export TGI_LLM_ENDPOINT="http://${ip_address}:8028"
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export HF_TOKEN=${HF_TOKEN}
export MEGA_SERVICE_HOST_IP=${ip_address}
export LLM_SERVICE_HOST_IP=${ip_address}
export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:6666/v1/codegen"

View File

@@ -29,7 +29,7 @@ function start_services() {
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
export TGI_LLM_ENDPOINT="http://${ip_address}:8028"
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export HF_TOKEN=${HF_TOKEN}
export MEGA_SERVICE_HOST_IP=${ip_address}
export LLM_SERVICE_HOST_IP=${ip_address}
export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:6666/v1/codegen"

View File

@@ -24,7 +24,7 @@ opea_micro_services:
- SYS_NICE
ipc: host
environment:
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_TOKEN: ${HF_TOKEN}
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
model-id: ${LLM_MODEL_ID}
@@ -35,7 +35,7 @@ opea_micro_services:
endpoint: /v1/chat/completions
environment:
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_TOKEN: ${HF_TOKEN}
ui:
host: ${UI_SERVICE_HOST_IP}
ports:

View File

@@ -23,13 +23,13 @@ bash launch_tgi_service.sh
```sh
cd langchain/docker
bash build_docker.sh
docker run -it --name code_trans_server --net=host --ipc=host -e TGI_ENDPOINT=${TGI ENDPOINT} -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACE_API_TOKEN} -e SERVER_PORT=8000 -e http_proxy=${http_proxy} -e https_proxy=${https_proxy} intel/gen-ai-examples:code-translation bash
docker run -it --name code_trans_server --net=host --ipc=host -e TGI_ENDPOINT=${TGI ENDPOINT} -e HF_TOKEN=${HUGGINGFACE_API_TOKEN} -e SERVER_PORT=8000 -e http_proxy=${http_proxy} -e https_proxy=${https_proxy} intel/gen-ai-examples:code-translation bash
```
Here is the explanation of some of the above parameters:
- `TGI_ENDPOINT`: The endpoint of your TGI service, usually equal to `<ip of your machine>:<port of your TGI service>`.
- `HUGGINGFACEHUB_API_TOKEN`: Your HuggingFace hub API token, usually generated [here](https://huggingface.co/settings/tokens).
- `HF_TOKEN`: Your HuggingFace hub API token, usually generated [here](https://huggingface.co/settings/tokens).
- `SERVER_PORT`: The port of the CodeTranslation service on the host.
3. Quick test

View File

@@ -51,7 +51,7 @@ export http_proxy=${your_http_proxy}
export https_proxy=${your_http_proxy}
export LLM_MODEL_ID="HuggingFaceH4/mistral-7b-grok"
export TGI_LLM_ENDPOINT="http://${host_ip}:8008"
export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
export HF_TOKEN=${your_hf_api_token}
export MEGA_SERVICE_HOST_IP=${host_ip}
export LLM_SERVICE_HOST_IP=${host_ip}
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:7777/v1/codetrans"

View File

@@ -42,7 +42,7 @@ services:
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_TOKEN: ${HF_TOKEN}
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
LANGCHAIN_PROJECT: "opea-llm-service"

View File

@@ -59,7 +59,7 @@ export http_proxy=${your_http_proxy}
export https_proxy=${your_http_proxy}
export LLM_MODEL_ID="HuggingFaceH4/mistral-7b-grok"
export TGI_LLM_ENDPOINT="http://${host_ip}:8008"
export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
export HF_TOKEN=${your_hf_api_token}
export MEGA_SERVICE_HOST_IP=${host_ip}
export LLM_SERVICE_HOST_IP=${host_ip}
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:7777/v1/codetrans"

View File

@@ -37,7 +37,7 @@ services:
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_TOKEN: ${HF_TOKEN}
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
LANGCHAIN_PROJECT: "opea-llm-service"

View File

@@ -33,7 +33,7 @@ function start_services() {
export https_proxy=${http_proxy}
export LLM_MODEL_ID="HuggingFaceH4/mistral-7b-grok"
export TGI_LLM_ENDPOINT="http://${ip_address}:8008"
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export HF_TOKEN=${HF_TOKEN}
export MEGA_SERVICE_HOST_IP=${ip_address}
export LLM_SERVICE_HOST_IP=${ip_address}
export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:7777/v1/codetrans"

View File

@@ -30,7 +30,7 @@ function start_services() {
export https_proxy=${http_proxy}
export LLM_MODEL_ID="HuggingFaceH4/mistral-7b-grok"
export TGI_LLM_ENDPOINT="http://${ip_address}:8008"
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export HF_TOKEN=${HF_TOKEN}
export MEGA_SERVICE_HOST_IP=${ip_address}
export LLM_SERVICE_HOST_IP=${ip_address}
export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:7777/v1/codetrans"

View File

@@ -29,12 +29,12 @@ bash ./serving/tgi_gaudi/build_docker.sh
bash ./serving/tgi_gaudi/launch_tgi_service.sh
```
For gated models such as `LLAMA-2`, you will have to pass -e HUGGING_FACE_HUB_TOKEN=\<token\> to the docker run command above with a valid Hugging Face Hub read token.
For gated models such as `LLAMA-2`, you will have to pass -e HF_TOKEN=\<token\> to the docker run command above with a valid Hugging Face Hub read token.
Please follow this link [huggingface token](https://huggingface.co/docs/hub/security-tokens) to get the access token and export `HUGGINGFACEHUB_API_TOKEN` environment with the token.
Please follow this link [huggingface token](https://huggingface.co/docs/hub/security-tokens) to get the access token and export `HF_TOKEN` environment with the token.
```bash
export HUGGINGFACEHUB_API_TOKEN=<token>
export HF_TOKEN=<token>
```
### Launch a local server instance on 8 Gaudi cards:
@@ -80,7 +80,7 @@ docker run -it --net=host --ipc=host -e http_proxy=${http_proxy} -e https_proxy=
Make sure TGI-Gaudi service is running. Launch the backend service:
```bash
export HUGGINGFACEHUB_API_TOKEN=<token>
export HF_TOKEN=<token>
nohup python app/server.py &
```

View File

@@ -63,7 +63,7 @@ function launch_server() {
# Start the Backend Service
docker exec $DOCUMENT_SUMMARY_CONTAINER_NAME \
bash -c "export HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN;nohup python app/server.py &"
bash -c "export HF_TOKEN=$HF_TOKEN;nohup python app/server.py &"
sleep 1m
}

View File

@@ -62,7 +62,7 @@ export http_proxy=${your_http_proxy}
export https_proxy=${your_http_proxy}
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
export TGI_LLM_ENDPOINT="http://${your_ip}:8008"
export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
export HF_TOKEN=${your_hf_api_token}
export MEGA_SERVICE_HOST_IP=${host_ip}
export LLM_SERVICE_HOST_IP=${host_ip}
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum"

View File

@@ -24,7 +24,7 @@ services:
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_TOKEN: ${HF_TOKEN}
volumes:
- "./data:/data"
runtime: habana
@@ -44,7 +44,7 @@ services:
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_TOKEN: ${HF_TOKEN}
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
LANGCHAIN_PROJECT: "opea-llm-service"

View File

@@ -63,7 +63,7 @@ export http_proxy=${your_http_proxy}
export https_proxy=${your_http_proxy}
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
export TGI_LLM_ENDPOINT="http://${your_ip}:8008"
export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
export HF_TOKEN=${your_hf_api_token}
export MEGA_SERVICE_HOST_IP=${host_ip}
export LLM_SERVICE_HOST_IP=${host_ip}
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum"

View File

@@ -24,7 +24,7 @@ services:
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_TOKEN: ${HF_TOKEN}
volumes:
- "./data:/data"
shm_size: 1g
@@ -41,7 +41,7 @@ services:
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_TOKEN: ${HF_TOKEN}
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
LANGCHAIN_PROJECT: "opea-llm-service"

View File

@@ -24,7 +24,7 @@ opea_micro_services:
- SYS_NICE
ipc: host
environment:
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_TOKEN: ${HF_TOKEN}
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
model-id: ${LLM_MODEL_ID}
@@ -35,7 +35,7 @@ opea_micro_services:
endpoint: /v1/chat/completions
environment:
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_TOKEN: ${HF_TOKEN}
ui:
host: ${UI_SERVICE_HOST_IP}
ports:

View File

@@ -31,7 +31,7 @@ function start_services() {
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
export TGI_LLM_ENDPOINT="http://${ip_address}:8008"
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export HF_TOKEN=${HF_TOKEN}
export MEGA_SERVICE_HOST_IP=${ip_address}
export LLM_SERVICE_HOST_IP=${ip_address}
export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:8888/v1/docsum"

View File

@@ -29,7 +29,7 @@ function start_services() {
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
export TGI_LLM_ENDPOINT="http://${ip_address}:8008"
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export HF_TOKEN=${HF_TOKEN}
export MEGA_SERVICE_HOST_IP=${ip_address}
export LLM_SERVICE_HOST_IP=${ip_address}
export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:8888/v1/docsum"

View File

@@ -37,7 +37,7 @@ bash launch_tgi_service.sh
```sh
cd langchain/docker
docker build . --build-arg http_proxy=${http_proxy} --build-arg https_proxy=${http_proxy} -t intel/gen-ai-examples:searchqna-gaudi --no-cache
docker run -e TGI_ENDPOINT=<TGI ENDPOINT> -e GOOGLE_CSE_ID=<GOOGLE CSE ID> -e GOOGLE_API_KEY=<GOOGLE API KEY> -e HUGGINGFACEHUB_API_TOKEN=<HUGGINGFACE API TOKEN> -p 8085:8000 -e http_proxy=$http_proxy -e https_proxy=$https_proxy --runtime=habana -e HABANA_VISIBE_DEVILCES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host intel/gen-ai-examples:searchqna-gaudi
docker run -e TGI_ENDPOINT=<TGI ENDPOINT> -e GOOGLE_CSE_ID=<GOOGLE CSE ID> -e GOOGLE_API_KEY=<GOOGLE API KEY> -e HF_TOKEN=<HUGGINGFACE API TOKEN> -p 8085:8000 -e http_proxy=$http_proxy -e https_proxy=$https_proxy --runtime=habana -e HABANA_VISIBE_DEVILCES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host intel/gen-ai-examples:searchqna-gaudi
```
Here is the explanation of some of the above parameters:
@@ -45,7 +45,7 @@ Here is the explanation of some of the above parameters:
- `TGI_ENDPOINT`: the endpoint of your TGI service, usually equal to `<ip of your machine>:<port of your TGI service>`
- `GOOGLE_CSE_ID`: your CSE ID for Google Search Engine, usually generated [here](https://programmablesearchengine.google.com/controlpanel/all)
- `GOOGLE_API_KEY`: your API key for Google Search Engine, usually generated [here](https://console.cloud.google.com/apis/credentials)
- `HUGGINGFACEHUB_API_TOKEN`: your HuggingFace hub API token, usually generated [here](https://huggingface.co/settings/tokens)
- `HF_TOKEN`: your HuggingFace hub API token, usually generated [here](https://huggingface.co/settings/tokens)
- `-p 8085:8000`: This will map the 8000 port of the SearchQnA service inside the container to the 8085 port on the host
3. Quick test

View File

@@ -48,7 +48,7 @@ function launch_langchain_service() {
tgi_ip_name=$(echo $(hostname) | tr '[a-z]-' '[A-Z]_')_$(echo 'IP')
tgi_ip=$(eval echo '$'$tgi_ip_name)
docker run -d --name=${LANGCHAIN_CONTAINER_NAME} -e TGI_ENDPOINT=http://${tgi_ip}:8870 -e GOOGLE_CSE_ID=${GOOGLE_CSE_ID} -e GOOGLE_API_KEY=${GOOGLE_API_KEY} -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} \
docker run -d --name=${LANGCHAIN_CONTAINER_NAME} -e TGI_ENDPOINT=http://${tgi_ip}:8870 -e GOOGLE_CSE_ID=${GOOGLE_CSE_ID} -e GOOGLE_API_KEY=${GOOGLE_API_KEY} -e HF_TOKEN=${HF_TOKEN} \
-p ${port}:8000 --runtime=habana -e HABANA_VISIBE_DEVILCES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host intel/gen-ai-examples:${LANGCHAIN_CONTAINER_NAME}
sleep 2m

View File

@@ -23,13 +23,13 @@ bash launch_tgi_service.sh
```sh
cd langchain/docker
bash build_docker.sh
docker run -it --name translation_server --net=host --ipc=host -e TGI_ENDPOINT=${TGI_ENDPOINT} -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -e SERVER_PORT=8000 -e http_proxy=${http_proxy} -e https_proxy=${https_proxy} translation:latest bash
docker run -it --name translation_server --net=host --ipc=host -e TGI_ENDPOINT=${TGI_ENDPOINT} -e HF_TOKEN=${HF_TOKEN} -e SERVER_PORT=8000 -e http_proxy=${http_proxy} -e https_proxy=${https_proxy} translation:latest bash
```
**Note**: Set the following parameters before running the above command
- `TGI_ENDPOINT`: The endpoint of your TGI service, usually equal to `<ip of your machine>:<port of your TGI service>`.
- `HUGGINGFACEHUB_API_TOKEN`: Your HuggingFace hub API token, usually generated [here](https://huggingface.co/settings/tokens).
- `HF_TOKEN`: Your HuggingFace hub API token, usually generated [here](https://huggingface.co/settings/tokens).
- `SERVER_PORT`: The port of the Translation service on the host.
3. Quick Test

View File

@@ -46,7 +46,7 @@ function launch_langchain_service() {
cd langchain/docker
docker build . --build-arg http_proxy=${http_proxy} --build-arg https_proxy=${http_proxy} -t intel/gen-ai-examples:${LANGCHAIN_CONTAINER_NAME}
docker run -d --name=${LANGCHAIN_CONTAINER_NAME} --net=host -e TGI_ENDPOINT=http://localhost:8870 -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} \
docker run -d --name=${LANGCHAIN_CONTAINER_NAME} --net=host -e TGI_ENDPOINT=http://localhost:8870 -e HF_TOKEN=${HF_TOKEN} \
-e SERVER_PORT=${port} -e http_proxy=${http_proxy} -e https_proxy=${https_proxy} --ipc=host intel/gen-ai-examples:${LANGCHAIN_CONTAINER_NAME}
sleep 2m
}