Fix huggingface hub token environment variable (#214)
Signed-off-by: lvliang-intel <liang1.lv@intel.com>
This commit is contained in:
6
.github/workflows/AudioQnA.yml
vendored
6
.github/workflows/AudioQnA.yml
vendored
@@ -37,21 +37,21 @@ jobs:
|
||||
|
||||
- name: Run Test ASR
|
||||
env:
|
||||
HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
|
||||
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
||||
run: |
|
||||
cd ${{ github.workspace }}/AudioQnA/tests
|
||||
bash test_asr.sh
|
||||
|
||||
- name: Run Test TTS
|
||||
env:
|
||||
HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
|
||||
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
||||
run: |
|
||||
cd ${{ github.workspace }}/AudioQnA/tests
|
||||
bash test_tts.sh
|
||||
|
||||
- name: Run Test LLM engine
|
||||
env:
|
||||
HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
|
||||
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
||||
run: |
|
||||
cd ${{ github.workspace }}/AudioQnA/tests
|
||||
bash test_${{ matrix.job_name }}_inference.sh
|
||||
|
||||
2
.github/workflows/E2E_test_with_compose.yml
vendored
2
.github/workflows/E2E_test_with_compose.yml
vendored
@@ -76,7 +76,7 @@ jobs:
|
||||
|
||||
- name: Run test
|
||||
env:
|
||||
HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
|
||||
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
||||
example: ${{ matrix.example }}
|
||||
hardware: ${{ matrix.hardware }}
|
||||
run: |
|
||||
|
||||
2
.github/workflows/SearchQnA.yml
vendored
2
.github/workflows/SearchQnA.yml
vendored
@@ -36,7 +36,7 @@ jobs:
|
||||
ref: "refs/pull/${{ github.event.number }}/merge"
|
||||
- name: Run Test
|
||||
env:
|
||||
HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
|
||||
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
||||
GOOGLE_CSE_ID: ${{ secrets.GOOGLE_CSE_ID }}
|
||||
GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
|
||||
AISE_GAUDI_00_IP: ${{ secrets.AISE_GAUDI_00_IP }}
|
||||
|
||||
2
.github/workflows/Translation.yml
vendored
2
.github/workflows/Translation.yml
vendored
@@ -37,7 +37,7 @@ jobs:
|
||||
|
||||
- name: Run Test
|
||||
env:
|
||||
HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
|
||||
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
||||
run: |
|
||||
cd ${{ github.workspace }}/Translation/tests
|
||||
bash test_${{ matrix.job_name }}_inference.sh
|
||||
|
||||
2
.github/workflows/VisualQnA.yml
vendored
2
.github/workflows/VisualQnA.yml
vendored
@@ -37,7 +37,7 @@ jobs:
|
||||
|
||||
- name: Run Test
|
||||
env:
|
||||
HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
|
||||
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
||||
run: |
|
||||
cd ${{ github.workspace }}/VisualQnA/tests
|
||||
bash test_${{ matrix.job_name }}_inference.sh
|
||||
|
||||
@@ -83,12 +83,12 @@ bash ./serving/tgi_gaudi/build_docker.sh
|
||||
bash ./serving/tgi_gaudi/launch_tgi_service.sh
|
||||
```
|
||||
|
||||
For gated models such as `LLAMA-2`, you will have to pass -e HUGGING_FACE_HUB_TOKEN=\<token\> to the docker run command above with a valid Hugging Face Hub read token.
|
||||
For gated models such as `LLAMA-2`, you will have to pass -e HF_TOKEN=\<token\> to the docker run command above with a valid Hugging Face Hub read token.
|
||||
|
||||
Please follow this link [huggingface token](https://huggingface.co/docs/hub/security-tokens) to get the access token and export `HUGGINGFACEHUB_API_TOKEN` environment with the token.
|
||||
Please follow this link [huggingface token](https://huggingface.co/docs/hub/security-tokens) to get the access token and export `HF_TOKEN` environment with the token.
|
||||
|
||||
```bash
|
||||
export HUGGINGFACEHUB_API_TOKEN=<token>
|
||||
export HF_TOKEN=<token>
|
||||
```
|
||||
|
||||
### Launch a local server instance on 8 Gaudi cards:
|
||||
@@ -147,7 +147,7 @@ Note: If you want to integrate the TEI service into the LangChain application, y
|
||||
|
||||
## Launch Redis and LangChain Backend Service
|
||||
|
||||
Update the `HUGGINGFACEHUB_API_TOKEN` environment variable with your huggingface token in the `docker-compose.yml`
|
||||
Update the `HF_TOKEN` environment variable with your huggingface token in the `docker-compose.yml`
|
||||
|
||||
```bash
|
||||
cd langchain/docker
|
||||
@@ -180,7 +180,7 @@ We offer content moderation support utilizing Meta's [Llama Guard](https://huggi
|
||||
```bash
|
||||
volume=$PWD/data
|
||||
model_id="meta-llama/LlamaGuard-7b"
|
||||
docker run -p 8088:80 -v $volume:/data --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HUGGING_FACE_HUB_TOKEN=<your HuggingFace token> -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy tgi_gaudi --model-id $model_id
|
||||
docker run -p 8088:80 -v $volume:/data --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HF_TOKEN=<your HuggingFace token> -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy tgi_gaudi --model-id $model_id
|
||||
export SAFETY_GUARD_ENDPOINT="http://xxx.xxx.xxx.xxx:8088"
|
||||
```
|
||||
|
||||
|
||||
@@ -28,7 +28,7 @@ services:
|
||||
container_name: qna-rag-redis-server
|
||||
environment:
|
||||
- https_proxy=${https_proxy}
|
||||
- HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
- HF_TOKEN=${HF_TOKEN}
|
||||
- "REDIS_PORT=6379"
|
||||
- "EMBED_MODEL=BAAI/bge-base-en-v1.5"
|
||||
- "REDIS_SCHEMA=schema_dim_768.yml"
|
||||
|
||||
@@ -79,11 +79,11 @@ For the System Management Interface Tool please check [hl-smi](https://docs.haba
|
||||
## Docker command for 70B model
|
||||
|
||||
```bash
|
||||
docker run -p 8080:80 -v $volume:/data --runtime=habana -e HUGGING_FACE_HUB_TOKEN=$HUGGINGFACEHUB_API_TOKEN -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e HABANA_VISIBLE_DEVICES="6,7,4,5" -e HABANA_VISIBLE_MODULES="0,1,2,3" -e BATCH_BUCKET_SIZE=22 -e PREFILL_BATCH_BUCKET_SIZE=1 -e MAX_BATCH_PREFILL_TOKENS=5102 -e MAX_BATCH_TOTAL_TOKENS=32256 -e MAX_INPUT_LENGTH=1024 -e PAD_SEQUENCE_TO_MULTIPLE_OF=1024 -e MAX_WAITING_TOKENS=5 -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host tgi_gaudi --model-id $model --sharded true --num-shard 4
|
||||
docker run -p 8080:80 -v $volume:/data --runtime=habana -e HF_TOKEN=$HF_TOKEN -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e HABANA_VISIBLE_DEVICES="6,7,4,5" -e HABANA_VISIBLE_MODULES="0,1,2,3" -e BATCH_BUCKET_SIZE=22 -e PREFILL_BATCH_BUCKET_SIZE=1 -e MAX_BATCH_PREFILL_TOKENS=5102 -e MAX_BATCH_TOTAL_TOKENS=32256 -e MAX_INPUT_LENGTH=1024 -e PAD_SEQUENCE_TO_MULTIPLE_OF=1024 -e MAX_WAITING_TOKENS=5 -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host tgi_gaudi --model-id $model --sharded true --num-shard 4
|
||||
```
|
||||
|
||||
## Docker command for 13B model
|
||||
|
||||
```bash
|
||||
docker run -p 8080:80 -v $volume:/data --runtime=habana -e HUGGING_FACE_HUB_TOKEN=$HUGGINGFACEHUB_API_TOKEN -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e PAD_SEQUENCE_TO_MULTIPLE_OF=128 -e HABANA_VISIBLE_DEVICES="4" -e BATCH_BUCKET_SIZE=16 -e PREFILL_BATCH_BUCKET_SIZE=1 -e MAX_BATCH_PREFILL_TOKENS=4096 -e MAX_BATCH_TOTAL_TOKENS=18432 -e PAD_SEQUENCE_TO_MULTIPLE_OF=1024 -e MAX_INPUT_LENGTH=1024 -e MAX_TOTAL_TOKENS=1152 -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host tgi_gaudi --model-id $model
|
||||
docker run -p 8080:80 -v $volume:/data --runtime=habana -e HF_TOKEN=$HF_TOKEN -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e PAD_SEQUENCE_TO_MULTIPLE_OF=128 -e HABANA_VISIBLE_DEVICES="4" -e BATCH_BUCKET_SIZE=16 -e PREFILL_BATCH_BUCKET_SIZE=1 -e MAX_BATCH_PREFILL_TOKENS=4096 -e MAX_BATCH_TOTAL_TOKENS=18432 -e PAD_SEQUENCE_TO_MULTIPLE_OF=1024 -e MAX_INPUT_LENGTH=1024 -e MAX_TOTAL_TOKENS=1152 -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host tgi_gaudi --model-id $model
|
||||
```
|
||||
|
||||
@@ -52,7 +52,7 @@ function launch_tgi_gaudi_service() {
|
||||
|
||||
function launch_redis_and_langchain_service() {
|
||||
cd $WORKPATH
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export HF_TOKEN=${HF_TOKEN}
|
||||
local port=8890
|
||||
sed -i "s/port=8000/port=$port/g" langchain/docker/qna-app/app/server.py
|
||||
docker compose -f langchain/docker/docker-compose.yml up -d --build
|
||||
|
||||
@@ -66,7 +66,7 @@ opea_micro_services:
|
||||
- SYS_NICE
|
||||
ipc: host
|
||||
environment:
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_TOKEN: ${HF_TOKEN}
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
model-id: ${LLM_MODEL_ID}
|
||||
|
||||
@@ -51,12 +51,12 @@ bash ./serving/tgi_gaudi/build_docker.sh
|
||||
bash ./serving/tgi_gaudi/launch_tgi_service.sh
|
||||
```
|
||||
|
||||
For gated models such as `LLAMA-2`, you will have to pass -e HUGGING_FACE_HUB_TOKEN=\<token\> to the docker run command above with a valid Hugging Face Hub read token.
|
||||
For gated models such as `LLAMA-2`, you will have to pass -e HF_TOKEN=\<token\> to the docker run command above with a valid Hugging Face Hub read token.
|
||||
|
||||
Please follow this link [huggingface token](https://huggingface.co/docs/hub/security-tokens) to get the access token and export `HUGGINGFACEHUB_API_TOKEN` environment with the token.
|
||||
Please follow this link [huggingface token](https://huggingface.co/docs/hub/security-tokens) to get the access token and export `HF_TOKEN` environment with the token.
|
||||
|
||||
```bash
|
||||
export HUGGINGFACEHUB_API_TOKEN=<token>
|
||||
export HF_TOKEN=<token>
|
||||
```
|
||||
|
||||
### Launch a local server instance on 8 Gaudi cards:
|
||||
@@ -115,7 +115,7 @@ Note: If you want to integrate the TEI service into the LangChain application, y
|
||||
|
||||
## Launch Vector Database and LangChain Backend Service
|
||||
|
||||
Update the `HUGGINGFACEHUB_API_TOKEN` environment variable with your huggingface token in the `docker-compose.yml`
|
||||
Update the `HF_TOKEN` environment variable with your huggingface token in the `docker-compose.yml`
|
||||
|
||||
By default, Redis is used as the vector store. To use Qdrant, use the `docker-compose-qdrant.yml` file instead.
|
||||
|
||||
@@ -153,7 +153,7 @@ We offer content moderation support utilizing Meta's [Llama Guard](https://huggi
|
||||
```bash
|
||||
volume=$PWD/data
|
||||
model_id="meta-llama/LlamaGuard-7b"
|
||||
docker run -p 8088:80 -v $volume:/data --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HUGGING_FACE_HUB_TOKEN=<your HuggingFace token> -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy tgi_gaudi --model-id $model_id
|
||||
docker run -p 8088:80 -v $volume:/data --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HF_TOKEN=<your HuggingFace token> -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy tgi_gaudi --model-id $model_id
|
||||
export SAFETY_GUARD_ENDPOINT="http://xxx.xxx.xxx.xxx:8088"
|
||||
```
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
HUGGING_FACE_HUB_TOKEN=<your-hf-token>
|
||||
HF_TOKEN=<your-hf-token>
|
||||
volume=./data
|
||||
model=meta-llama/Llama-2-13b-chat-hf
|
||||
MAX_TOTAL_TOKENS=2000
|
||||
|
||||
@@ -30,7 +30,7 @@ services:
|
||||
container_name: qna-rag-qdrant-server
|
||||
environment:
|
||||
- https_proxy=${https_proxy}
|
||||
- HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
- HF_TOKEN=${HF_TOKEN}
|
||||
- "EMBED_MODEL=BAAI/bge-base-en-v1.5"
|
||||
- "VECTOR_DATABASE=QDRANT"
|
||||
- "TGI_LLM_ENDPOINT=http://localhost:8080"
|
||||
|
||||
@@ -38,7 +38,7 @@ services:
|
||||
- socks_proxy=${socks_proxy}
|
||||
- FTP_PROXY=${FTP_PROXY}
|
||||
- ftp_proxy=${ftp_proxy}
|
||||
- HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
- HF_TOKEN=${HF_TOKEN}
|
||||
- CONFLUENCE_ACCESS_TOKEN=${CONFLUENCE_ACCESS_TOKEN}
|
||||
- "REDIS_PORT=6379"
|
||||
- "EMBED_MODEL=BAAI/bge-base-en-v1.5"
|
||||
|
||||
@@ -242,7 +242,7 @@ if __name__ == "__main__":
|
||||
tokenizer = AutoTokenizer.from_pretrained(args.model_name)
|
||||
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
|
||||
os.environ["LANGCHAIN_API_KEY"] = args.langchain_token
|
||||
os.environ["HUGGINGFACEHUB_API_TOKEN"] = args.huggingface_token
|
||||
os.environ["HF_TOKEN"] = args.huggingface_token
|
||||
|
||||
chain = buildchain(args)
|
||||
run_test(args, chain)
|
||||
|
||||
@@ -79,11 +79,11 @@ For the System Management Interface Tool please check [hl-smi](https://docs.haba
|
||||
## Docker command for 70B model
|
||||
|
||||
```bash
|
||||
docker run -p 8080:80 -v $volume:/data --runtime=habana -e HUGGING_FACE_HUB_TOKEN=$HUGGINGFACEHUB_API_TOKEN -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e HABANA_VISIBLE_DEVICES="6,7,4,5" -e HABANA_VISIBLE_MODULES="0,1,2,3" -e BATCH_BUCKET_SIZE=22 -e PREFILL_BATCH_BUCKET_SIZE=1 -e MAX_BATCH_PREFILL_TOKENS=5102 -e MAX_BATCH_TOTAL_TOKENS=32256 -e MAX_INPUT_LENGTH=1024 -e PAD_SEQUENCE_TO_MULTIPLE_OF=1024 -e MAX_WAITING_TOKENS=5 -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host tgi_gaudi --model-id $model --sharded true --num-shard 4
|
||||
docker run -p 8080:80 -v $volume:/data --runtime=habana -e HF_TOKEN=$HF_TOKEN -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e HABANA_VISIBLE_DEVICES="6,7,4,5" -e HABANA_VISIBLE_MODULES="0,1,2,3" -e BATCH_BUCKET_SIZE=22 -e PREFILL_BATCH_BUCKET_SIZE=1 -e MAX_BATCH_PREFILL_TOKENS=5102 -e MAX_BATCH_TOTAL_TOKENS=32256 -e MAX_INPUT_LENGTH=1024 -e PAD_SEQUENCE_TO_MULTIPLE_OF=1024 -e MAX_WAITING_TOKENS=5 -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host tgi_gaudi --model-id $model --sharded true --num-shard 4
|
||||
```
|
||||
|
||||
## Docker command for 13B model
|
||||
|
||||
```bash
|
||||
docker run -p 8080:80 -v $volume:/data --runtime=habana -e HUGGING_FACE_HUB_TOKEN=$HUGGINGFACEHUB_API_TOKEN -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e PAD_SEQUENCE_TO_MULTIPLE_OF=128 -e HABANA_VISIBLE_DEVICES="4" -e BATCH_BUCKET_SIZE=16 -e PREFILL_BATCH_BUCKET_SIZE=1 -e MAX_BATCH_PREFILL_TOKENS=4096 -e MAX_BATCH_TOTAL_TOKENS=18432 -e PAD_SEQUENCE_TO_MULTIPLE_OF=1024 -e MAX_INPUT_LENGTH=1024 -e MAX_TOTAL_TOKENS=1152 -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host tgi_gaudi --model-id $model
|
||||
docker run -p 8080:80 -v $volume:/data --runtime=habana -e HF_TOKEN=$HF_TOKEN -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e PAD_SEQUENCE_TO_MULTIPLE_OF=128 -e HABANA_VISIBLE_DEVICES="4" -e BATCH_BUCKET_SIZE=16 -e PREFILL_BATCH_BUCKET_SIZE=1 -e MAX_BATCH_PREFILL_TOKENS=4096 -e MAX_BATCH_TOTAL_TOKENS=18432 -e PAD_SEQUENCE_TO_MULTIPLE_OF=1024 -e MAX_INPUT_LENGTH=1024 -e MAX_TOTAL_TOKENS=1152 -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host tgi_gaudi --model-id $model
|
||||
```
|
||||
|
||||
@@ -52,7 +52,7 @@ function launch_tgi_gaudi_service() {
|
||||
|
||||
function launch_redis_and_langchain_service() {
|
||||
cd $WORKPATH
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export HF_TOKEN=${HF_TOKEN}
|
||||
local port=8890
|
||||
sed -i "s/port=8000/port=$port/g" langchain/docker/qna-app/app/server.py
|
||||
docker compose -f langchain/docker/docker-compose.yml up -d --build
|
||||
|
||||
@@ -100,7 +100,7 @@ export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
|
||||
export TGI_LLM_ENDPOINT="http://${host_ip}:8008"
|
||||
export REDIS_URL="redis://${host_ip}:6379"
|
||||
export INDEX_NAME="rag-redis"
|
||||
export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
|
||||
export HF_TOKEN=${your_hf_api_token}
|
||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||
export EMBEDDING_SERVICE_HOST_IP=${host_ip}
|
||||
export RETRIEVER_SERVICE_HOST_IP=${host_ip}
|
||||
|
||||
@@ -107,7 +107,7 @@ services:
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_TOKEN: ${HF_TOKEN}
|
||||
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
|
||||
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
|
||||
LANGCHAIN_PROJECT: "opea-reranking-service"
|
||||
@@ -141,7 +141,7 @@ services:
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_TOKEN: ${HF_TOKEN}
|
||||
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
|
||||
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
|
||||
LANGCHAIN_PROJECT: "opea-llm-service"
|
||||
|
||||
@@ -158,7 +158,7 @@ export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
|
||||
export TGI_LLM_ENDPOINT="http://${host_ip}:9009"
|
||||
export REDIS_URL="redis://${host_ip}:6379"
|
||||
export INDEX_NAME="rag-redis"
|
||||
export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
|
||||
export HF_TOKEN=${your_hf_api_token}
|
||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||
export EMBEDDING_SERVICE_HOST_IP=${host_ip}
|
||||
export RETRIEVER_SERVICE_HOST_IP=${host_ip}
|
||||
|
||||
@@ -103,7 +103,7 @@ services:
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_TOKEN: ${HF_TOKEN}
|
||||
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
|
||||
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
|
||||
LANGCHAIN_PROJECT: "opea-reranking-service"
|
||||
@@ -132,7 +132,7 @@ services:
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_TOKEN: ${HF_TOKEN}
|
||||
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
|
||||
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
|
||||
LANGCHAIN_PROJECT: "opea-llm-service"
|
||||
|
||||
@@ -26,7 +26,7 @@ For Gaudi:
|
||||
|
||||
> [NOTE]
|
||||
|
||||
- Be sure to modify HUGGINGFACEHUB_API_TOKEN and other important values in qna_configmap_guadi.yaml and qna_configmap_xeon.yaml
|
||||
- Be sure to modify HF_TOKEN and other important values in qna_configmap_guadi.yaml and qna_configmap_xeon.yaml
|
||||
- Be sure the node has path /mnt/models to store all the models
|
||||
|
||||
### Deploy
|
||||
|
||||
@@ -14,7 +14,7 @@ data:
|
||||
TGI_LLM_ENDPOINT: "http://tgi-gaudi-svc.default.svc.cluster.local:9009"
|
||||
REDIS_URL: "redis://redis-vector-db.default.svc.cluster.local:6379"
|
||||
INDEX_NAME: "rag-redis"
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_TOKEN: ${HF_TOKEN}
|
||||
EMBEDDING_SERVICE_HOST_IP: embedding-svc
|
||||
RETRIEVER_SERVICE_HOST_IP: retriever-svc
|
||||
RERANK_SERVICE_HOST_IP: reranking-svc
|
||||
|
||||
@@ -14,7 +14,7 @@ data:
|
||||
TGI_LLM_ENDPOINT: "http://tgi-svc.default.svc.cluster.local:9009"
|
||||
REDIS_URL: "redis://redis-vector-db.default.svc.cluster.local:6379"
|
||||
INDEX_NAME: "rag-redis"
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_TOKEN: ${HF_TOKEN}
|
||||
EMBEDDING_SERVICE_HOST_IP: embedding-svc
|
||||
RETRIEVER_SERVICE_HOST_IP: retriever-svc
|
||||
RERANK_SERVICE_HOST_IP: reranking-svc
|
||||
|
||||
@@ -47,7 +47,7 @@ function start_services() {
|
||||
export TGI_LLM_ENDPOINT="http://${ip_address}:8008"
|
||||
export REDIS_URL="redis://${ip_address}:6379"
|
||||
export INDEX_NAME="rag-redis"
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export HF_TOKEN=${HF_TOKEN}
|
||||
export MEGA_SERVICE_HOST_IP=${ip_address}
|
||||
export EMBEDDING_SERVICE_HOST_IP=${ip_address}
|
||||
export RETRIEVER_SERVICE_HOST_IP=${ip_address}
|
||||
|
||||
@@ -39,7 +39,7 @@ function start_services() {
|
||||
export TGI_LLM_ENDPOINT="http://${ip_address}:9009"
|
||||
export REDIS_URL="redis://${ip_address}:6379"
|
||||
export INDEX_NAME="rag-redis"
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export HF_TOKEN=${HF_TOKEN}
|
||||
export MEGA_SERVICE_HOST_IP=${ip_address}
|
||||
export EMBEDDING_SERVICE_HOST_IP=${ip_address}
|
||||
export RETRIEVER_SERVICE_HOST_IP=${ip_address}
|
||||
|
||||
@@ -24,7 +24,7 @@ opea_micro_services:
|
||||
- SYS_NICE
|
||||
ipc: host
|
||||
environment:
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_TOKEN: ${HF_TOKEN}
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
model-id: ${LLM_MODEL_ID}
|
||||
@@ -35,7 +35,7 @@ opea_micro_services:
|
||||
endpoint: /v1/chat/completions
|
||||
environment:
|
||||
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_TOKEN: ${HF_TOKEN}
|
||||
ui:
|
||||
host: ${UI_SERVICE_HOST_IP}
|
||||
ports:
|
||||
|
||||
@@ -86,10 +86,10 @@ docker run -it -e http_proxy=${http_proxy} -e https_proxy=${https_proxy} --net=h
|
||||
|
||||
Make sure TGI-Gaudi service is running and also make sure data is populated into Redis. Launch the backend service:
|
||||
|
||||
Please follow this link [huggingface token](https://huggingface.co/docs/hub/security-tokens) to get the access token and export `HUGGINGFACEHUB_API_TOKEN` environment with the token.
|
||||
Please follow this link [huggingface token](https://huggingface.co/docs/hub/security-tokens) to get the access token and export `HF_TOKEN` environment with the token.
|
||||
|
||||
```bash
|
||||
export HUGGINGFACEHUB_API_TOKEN=<token>
|
||||
export HF_TOKEN=<token>
|
||||
nohup python server.py &
|
||||
```
|
||||
|
||||
|
||||
@@ -63,7 +63,7 @@ function launch_server() {
|
||||
|
||||
# Start the Backend Service
|
||||
docker exec $COPILOT_CONTAINER_NAME \
|
||||
bash -c "export HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN;nohup python server.py &"
|
||||
bash -c "export HF_TOKEN=$HF_TOKEN;nohup python server.py &"
|
||||
sleep 1m
|
||||
}
|
||||
|
||||
|
||||
@@ -55,7 +55,7 @@ export http_proxy=${your_http_proxy}
|
||||
export https_proxy=${your_http_proxy}
|
||||
export LLM_MODEL_ID="meta-llama/CodeLlama-7b-hf"
|
||||
export TGI_LLM_ENDPOINT="http://${host_ip}:8028"
|
||||
export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
|
||||
export HF_TOKEN=${your_hf_api_token}
|
||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||
export LLM_SERVICE_HOST_IP=${host_ip}
|
||||
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:6666/v1/codegen"
|
||||
|
||||
@@ -44,7 +44,7 @@ services:
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_TOKEN: ${HF_TOKEN}
|
||||
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
|
||||
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
|
||||
LANGCHAIN_PROJECT: "opea-llm-service"
|
||||
|
||||
@@ -63,7 +63,7 @@ export http_proxy=${your_http_proxy}
|
||||
export https_proxy=${your_http_proxy}
|
||||
export LLM_MODEL_ID="meta-llama/CodeLlama-7b-hf"
|
||||
export TGI_LLM_ENDPOINT="http://${host_ip}:8028"
|
||||
export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
|
||||
export HF_TOKEN=${your_hf_api_token}
|
||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||
export LLM_SERVICE_HOST_IP=${host_ip}
|
||||
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:6666/v1/codegen"
|
||||
|
||||
@@ -26,7 +26,7 @@ services:
|
||||
environment:
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_TOKEN: ${HF_TOKEN}
|
||||
command: --model-id ${LLM_MODEL_ID}
|
||||
llm:
|
||||
image: opea/llm-tgi:latest
|
||||
@@ -40,7 +40,7 @@ services:
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_TOKEN: ${HF_TOKEN}
|
||||
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
|
||||
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
|
||||
LANGCHAIN_PROJECT: "opea-llm-service"
|
||||
|
||||
@@ -2,15 +2,15 @@
|
||||
|
||||
> [NOTE]
|
||||
> The following values must be set before you can deploy:
|
||||
> HUGGINGFACEHUB_API_TOKEN
|
||||
> HF_TOKEN
|
||||
> You can also customize the "MODEL_ID" and "model-volume"
|
||||
|
||||
## Deploy On Xeon
|
||||
|
||||
```
|
||||
cd GenAIExamples/CodeGen/kubernetes/manifests/xeon
|
||||
export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
|
||||
sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" codegen.yaml
|
||||
export HF_TOKEN="YourOwnToken"
|
||||
sed -i "s/insert-your-huggingface-token-here/${HF_TOKEN}/g" codegen.yaml
|
||||
kubectl apply -f codegen.yaml
|
||||
```
|
||||
|
||||
@@ -18,8 +18,8 @@ kubectl apply -f codegen.yaml
|
||||
|
||||
```
|
||||
cd GenAIExamples/CodeGen/kubernetes/manifests/gaudi
|
||||
export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
|
||||
sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" codegen.yaml
|
||||
export HF_TOKEN="YourOwnToken"
|
||||
sed -i "s/insert-your-huggingface-token-here/${HF_TOKEN}/g" codegen.yaml
|
||||
kubectl apply -f codegen.yaml
|
||||
```
|
||||
|
||||
|
||||
@@ -143,7 +143,7 @@ spec:
|
||||
env:
|
||||
- name: TGI_LLM_ENDPOINT
|
||||
value: "http://codegen-tgi:80"
|
||||
- name: HUGGINGFACEHUB_API_TOKEN
|
||||
- name: HF_TOKEN
|
||||
value: "insert-your-huggingface-token-here"
|
||||
- name: http_proxy
|
||||
value:
|
||||
|
||||
@@ -141,7 +141,7 @@ spec:
|
||||
env:
|
||||
- name: TGI_LLM_ENDPOINT
|
||||
value: "http://codegen-tgi:80"
|
||||
- name: HUGGINGFACEHUB_API_TOKEN
|
||||
- name: HF_TOKEN
|
||||
value: "insert-your-huggingface-token-here"
|
||||
- name: http_proxy
|
||||
value:
|
||||
|
||||
@@ -41,7 +41,7 @@ function start_services() {
|
||||
|
||||
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
|
||||
export TGI_LLM_ENDPOINT="http://${ip_address}:8028"
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export HF_TOKEN=${HF_TOKEN}
|
||||
export MEGA_SERVICE_HOST_IP=${ip_address}
|
||||
export LLM_SERVICE_HOST_IP=${ip_address}
|
||||
export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:6666/v1/codegen"
|
||||
|
||||
@@ -29,7 +29,7 @@ function start_services() {
|
||||
|
||||
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
|
||||
export TGI_LLM_ENDPOINT="http://${ip_address}:8028"
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export HF_TOKEN=${HF_TOKEN}
|
||||
export MEGA_SERVICE_HOST_IP=${ip_address}
|
||||
export LLM_SERVICE_HOST_IP=${ip_address}
|
||||
export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:6666/v1/codegen"
|
||||
|
||||
@@ -24,7 +24,7 @@ opea_micro_services:
|
||||
- SYS_NICE
|
||||
ipc: host
|
||||
environment:
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_TOKEN: ${HF_TOKEN}
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
model-id: ${LLM_MODEL_ID}
|
||||
@@ -35,7 +35,7 @@ opea_micro_services:
|
||||
endpoint: /v1/chat/completions
|
||||
environment:
|
||||
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_TOKEN: ${HF_TOKEN}
|
||||
ui:
|
||||
host: ${UI_SERVICE_HOST_IP}
|
||||
ports:
|
||||
|
||||
@@ -23,13 +23,13 @@ bash launch_tgi_service.sh
|
||||
```sh
|
||||
cd langchain/docker
|
||||
bash build_docker.sh
|
||||
docker run -it --name code_trans_server --net=host --ipc=host -e TGI_ENDPOINT=${TGI ENDPOINT} -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACE_API_TOKEN} -e SERVER_PORT=8000 -e http_proxy=${http_proxy} -e https_proxy=${https_proxy} intel/gen-ai-examples:code-translation bash
|
||||
docker run -it --name code_trans_server --net=host --ipc=host -e TGI_ENDPOINT=${TGI ENDPOINT} -e HF_TOKEN=${HUGGINGFACE_API_TOKEN} -e SERVER_PORT=8000 -e http_proxy=${http_proxy} -e https_proxy=${https_proxy} intel/gen-ai-examples:code-translation bash
|
||||
```
|
||||
|
||||
Here is the explanation of some of the above parameters:
|
||||
|
||||
- `TGI_ENDPOINT`: The endpoint of your TGI service, usually equal to `<ip of your machine>:<port of your TGI service>`.
|
||||
- `HUGGINGFACEHUB_API_TOKEN`: Your HuggingFace hub API token, usually generated [here](https://huggingface.co/settings/tokens).
|
||||
- `HF_TOKEN`: Your HuggingFace hub API token, usually generated [here](https://huggingface.co/settings/tokens).
|
||||
- `SERVER_PORT`: The port of the CodeTranslation service on the host.
|
||||
|
||||
3. Quick test
|
||||
|
||||
@@ -51,7 +51,7 @@ export http_proxy=${your_http_proxy}
|
||||
export https_proxy=${your_http_proxy}
|
||||
export LLM_MODEL_ID="HuggingFaceH4/mistral-7b-grok"
|
||||
export TGI_LLM_ENDPOINT="http://${host_ip}:8008"
|
||||
export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
|
||||
export HF_TOKEN=${your_hf_api_token}
|
||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||
export LLM_SERVICE_HOST_IP=${host_ip}
|
||||
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:7777/v1/codetrans"
|
||||
|
||||
@@ -42,7 +42,7 @@ services:
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_TOKEN: ${HF_TOKEN}
|
||||
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
|
||||
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
|
||||
LANGCHAIN_PROJECT: "opea-llm-service"
|
||||
|
||||
@@ -59,7 +59,7 @@ export http_proxy=${your_http_proxy}
|
||||
export https_proxy=${your_http_proxy}
|
||||
export LLM_MODEL_ID="HuggingFaceH4/mistral-7b-grok"
|
||||
export TGI_LLM_ENDPOINT="http://${host_ip}:8008"
|
||||
export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
|
||||
export HF_TOKEN=${your_hf_api_token}
|
||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||
export LLM_SERVICE_HOST_IP=${host_ip}
|
||||
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:7777/v1/codetrans"
|
||||
|
||||
@@ -37,7 +37,7 @@ services:
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_TOKEN: ${HF_TOKEN}
|
||||
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
|
||||
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
|
||||
LANGCHAIN_PROJECT: "opea-llm-service"
|
||||
|
||||
@@ -33,7 +33,7 @@ function start_services() {
|
||||
export https_proxy=${http_proxy}
|
||||
export LLM_MODEL_ID="HuggingFaceH4/mistral-7b-grok"
|
||||
export TGI_LLM_ENDPOINT="http://${ip_address}:8008"
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export HF_TOKEN=${HF_TOKEN}
|
||||
export MEGA_SERVICE_HOST_IP=${ip_address}
|
||||
export LLM_SERVICE_HOST_IP=${ip_address}
|
||||
export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:7777/v1/codetrans"
|
||||
|
||||
@@ -30,7 +30,7 @@ function start_services() {
|
||||
export https_proxy=${http_proxy}
|
||||
export LLM_MODEL_ID="HuggingFaceH4/mistral-7b-grok"
|
||||
export TGI_LLM_ENDPOINT="http://${ip_address}:8008"
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export HF_TOKEN=${HF_TOKEN}
|
||||
export MEGA_SERVICE_HOST_IP=${ip_address}
|
||||
export LLM_SERVICE_HOST_IP=${ip_address}
|
||||
export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:7777/v1/codetrans"
|
||||
|
||||
@@ -29,12 +29,12 @@ bash ./serving/tgi_gaudi/build_docker.sh
|
||||
bash ./serving/tgi_gaudi/launch_tgi_service.sh
|
||||
```
|
||||
|
||||
For gated models such as `LLAMA-2`, you will have to pass -e HUGGING_FACE_HUB_TOKEN=\<token\> to the docker run command above with a valid Hugging Face Hub read token.
|
||||
For gated models such as `LLAMA-2`, you will have to pass -e HF_TOKEN=\<token\> to the docker run command above with a valid Hugging Face Hub read token.
|
||||
|
||||
Please follow this link [huggingface token](https://huggingface.co/docs/hub/security-tokens) to get the access token and export `HUGGINGFACEHUB_API_TOKEN` environment with the token.
|
||||
Please follow this link [huggingface token](https://huggingface.co/docs/hub/security-tokens) to get the access token and export `HF_TOKEN` environment with the token.
|
||||
|
||||
```bash
|
||||
export HUGGINGFACEHUB_API_TOKEN=<token>
|
||||
export HF_TOKEN=<token>
|
||||
```
|
||||
|
||||
### Launch a local server instance on 8 Gaudi cards:
|
||||
@@ -80,7 +80,7 @@ docker run -it --net=host --ipc=host -e http_proxy=${http_proxy} -e https_proxy=
|
||||
Make sure TGI-Gaudi service is running. Launch the backend service:
|
||||
|
||||
```bash
|
||||
export HUGGINGFACEHUB_API_TOKEN=<token>
|
||||
export HF_TOKEN=<token>
|
||||
nohup python app/server.py &
|
||||
```
|
||||
|
||||
|
||||
@@ -63,7 +63,7 @@ function launch_server() {
|
||||
|
||||
# Start the Backend Service
|
||||
docker exec $DOCUMENT_SUMMARY_CONTAINER_NAME \
|
||||
bash -c "export HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN;nohup python app/server.py &"
|
||||
bash -c "export HF_TOKEN=$HF_TOKEN;nohup python app/server.py &"
|
||||
sleep 1m
|
||||
}
|
||||
|
||||
|
||||
@@ -62,7 +62,7 @@ export http_proxy=${your_http_proxy}
|
||||
export https_proxy=${your_http_proxy}
|
||||
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
|
||||
export TGI_LLM_ENDPOINT="http://${your_ip}:8008"
|
||||
export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
|
||||
export HF_TOKEN=${your_hf_api_token}
|
||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||
export LLM_SERVICE_HOST_IP=${host_ip}
|
||||
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum"
|
||||
|
||||
@@ -24,7 +24,7 @@ services:
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_TOKEN: ${HF_TOKEN}
|
||||
volumes:
|
||||
- "./data:/data"
|
||||
runtime: habana
|
||||
@@ -44,7 +44,7 @@ services:
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_TOKEN: ${HF_TOKEN}
|
||||
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
|
||||
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
|
||||
LANGCHAIN_PROJECT: "opea-llm-service"
|
||||
|
||||
@@ -63,7 +63,7 @@ export http_proxy=${your_http_proxy}
|
||||
export https_proxy=${your_http_proxy}
|
||||
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
|
||||
export TGI_LLM_ENDPOINT="http://${your_ip}:8008"
|
||||
export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
|
||||
export HF_TOKEN=${your_hf_api_token}
|
||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||
export LLM_SERVICE_HOST_IP=${host_ip}
|
||||
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum"
|
||||
|
||||
@@ -24,7 +24,7 @@ services:
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_TOKEN: ${HF_TOKEN}
|
||||
volumes:
|
||||
- "./data:/data"
|
||||
shm_size: 1g
|
||||
@@ -41,7 +41,7 @@ services:
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_TOKEN: ${HF_TOKEN}
|
||||
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
|
||||
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
|
||||
LANGCHAIN_PROJECT: "opea-llm-service"
|
||||
|
||||
@@ -24,7 +24,7 @@ opea_micro_services:
|
||||
- SYS_NICE
|
||||
ipc: host
|
||||
environment:
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_TOKEN: ${HF_TOKEN}
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
model-id: ${LLM_MODEL_ID}
|
||||
@@ -35,7 +35,7 @@ opea_micro_services:
|
||||
endpoint: /v1/chat/completions
|
||||
environment:
|
||||
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_TOKEN: ${HF_TOKEN}
|
||||
ui:
|
||||
host: ${UI_SERVICE_HOST_IP}
|
||||
ports:
|
||||
|
||||
@@ -31,7 +31,7 @@ function start_services() {
|
||||
|
||||
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
|
||||
export TGI_LLM_ENDPOINT="http://${ip_address}:8008"
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export HF_TOKEN=${HF_TOKEN}
|
||||
export MEGA_SERVICE_HOST_IP=${ip_address}
|
||||
export LLM_SERVICE_HOST_IP=${ip_address}
|
||||
export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:8888/v1/docsum"
|
||||
|
||||
@@ -29,7 +29,7 @@ function start_services() {
|
||||
|
||||
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
|
||||
export TGI_LLM_ENDPOINT="http://${ip_address}:8008"
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export HF_TOKEN=${HF_TOKEN}
|
||||
export MEGA_SERVICE_HOST_IP=${ip_address}
|
||||
export LLM_SERVICE_HOST_IP=${ip_address}
|
||||
export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:8888/v1/docsum"
|
||||
|
||||
@@ -37,7 +37,7 @@ bash launch_tgi_service.sh
|
||||
```sh
|
||||
cd langchain/docker
|
||||
docker build . --build-arg http_proxy=${http_proxy} --build-arg https_proxy=${http_proxy} -t intel/gen-ai-examples:searchqna-gaudi --no-cache
|
||||
docker run -e TGI_ENDPOINT=<TGI ENDPOINT> -e GOOGLE_CSE_ID=<GOOGLE CSE ID> -e GOOGLE_API_KEY=<GOOGLE API KEY> -e HUGGINGFACEHUB_API_TOKEN=<HUGGINGFACE API TOKEN> -p 8085:8000 -e http_proxy=$http_proxy -e https_proxy=$https_proxy --runtime=habana -e HABANA_VISIBE_DEVILCES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host intel/gen-ai-examples:searchqna-gaudi
|
||||
docker run -e TGI_ENDPOINT=<TGI ENDPOINT> -e GOOGLE_CSE_ID=<GOOGLE CSE ID> -e GOOGLE_API_KEY=<GOOGLE API KEY> -e HF_TOKEN=<HUGGINGFACE API TOKEN> -p 8085:8000 -e http_proxy=$http_proxy -e https_proxy=$https_proxy --runtime=habana -e HABANA_VISIBE_DEVILCES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host intel/gen-ai-examples:searchqna-gaudi
|
||||
```
|
||||
|
||||
Here is the explanation of some of the above parameters:
|
||||
@@ -45,7 +45,7 @@ Here is the explanation of some of the above parameters:
|
||||
- `TGI_ENDPOINT`: the endpoint of your TGI service, usually equal to `<ip of your machine>:<port of your TGI service>`
|
||||
- `GOOGLE_CSE_ID`: your CSE ID for Google Search Engine, usually generated [here](https://programmablesearchengine.google.com/controlpanel/all)
|
||||
- `GOOGLE_API_KEY`: your API key for Google Search Engine, usually generated [here](https://console.cloud.google.com/apis/credentials)
|
||||
- `HUGGINGFACEHUB_API_TOKEN`: your HuggingFace hub API token, usually generated [here](https://huggingface.co/settings/tokens)
|
||||
- `HF_TOKEN`: your HuggingFace hub API token, usually generated [here](https://huggingface.co/settings/tokens)
|
||||
- `-p 8085:8000`: This will map the 8000 port of the SearchQnA service inside the container to the 8085 port on the host
|
||||
|
||||
3. Quick test
|
||||
|
||||
@@ -48,7 +48,7 @@ function launch_langchain_service() {
|
||||
|
||||
tgi_ip_name=$(echo $(hostname) | tr '[a-z]-' '[A-Z]_')_$(echo 'IP')
|
||||
tgi_ip=$(eval echo '$'$tgi_ip_name)
|
||||
docker run -d --name=${LANGCHAIN_CONTAINER_NAME} -e TGI_ENDPOINT=http://${tgi_ip}:8870 -e GOOGLE_CSE_ID=${GOOGLE_CSE_ID} -e GOOGLE_API_KEY=${GOOGLE_API_KEY} -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} \
|
||||
docker run -d --name=${LANGCHAIN_CONTAINER_NAME} -e TGI_ENDPOINT=http://${tgi_ip}:8870 -e GOOGLE_CSE_ID=${GOOGLE_CSE_ID} -e GOOGLE_API_KEY=${GOOGLE_API_KEY} -e HF_TOKEN=${HF_TOKEN} \
|
||||
-p ${port}:8000 --runtime=habana -e HABANA_VISIBE_DEVILCES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host intel/gen-ai-examples:${LANGCHAIN_CONTAINER_NAME}
|
||||
|
||||
sleep 2m
|
||||
|
||||
@@ -23,13 +23,13 @@ bash launch_tgi_service.sh
|
||||
```sh
|
||||
cd langchain/docker
|
||||
bash build_docker.sh
|
||||
docker run -it --name translation_server --net=host --ipc=host -e TGI_ENDPOINT=${TGI_ENDPOINT} -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -e SERVER_PORT=8000 -e http_proxy=${http_proxy} -e https_proxy=${https_proxy} translation:latest bash
|
||||
docker run -it --name translation_server --net=host --ipc=host -e TGI_ENDPOINT=${TGI_ENDPOINT} -e HF_TOKEN=${HF_TOKEN} -e SERVER_PORT=8000 -e http_proxy=${http_proxy} -e https_proxy=${https_proxy} translation:latest bash
|
||||
```
|
||||
|
||||
**Note**: Set the following parameters before running the above command
|
||||
|
||||
- `TGI_ENDPOINT`: The endpoint of your TGI service, usually equal to `<ip of your machine>:<port of your TGI service>`.
|
||||
- `HUGGINGFACEHUB_API_TOKEN`: Your HuggingFace hub API token, usually generated [here](https://huggingface.co/settings/tokens).
|
||||
- `HF_TOKEN`: Your HuggingFace hub API token, usually generated [here](https://huggingface.co/settings/tokens).
|
||||
- `SERVER_PORT`: The port of the Translation service on the host.
|
||||
|
||||
3. Quick Test
|
||||
|
||||
@@ -46,7 +46,7 @@ function launch_langchain_service() {
|
||||
cd langchain/docker
|
||||
docker build . --build-arg http_proxy=${http_proxy} --build-arg https_proxy=${http_proxy} -t intel/gen-ai-examples:${LANGCHAIN_CONTAINER_NAME}
|
||||
|
||||
docker run -d --name=${LANGCHAIN_CONTAINER_NAME} --net=host -e TGI_ENDPOINT=http://localhost:8870 -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} \
|
||||
docker run -d --name=${LANGCHAIN_CONTAINER_NAME} --net=host -e TGI_ENDPOINT=http://localhost:8870 -e HF_TOKEN=${HF_TOKEN} \
|
||||
-e SERVER_PORT=${port} -e http_proxy=${http_proxy} -e https_proxy=${https_proxy} --ipc=host intel/gen-ai-examples:${LANGCHAIN_CONTAINER_NAME}
|
||||
sleep 2m
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user