From aa314f6757c503f0b10a6481b1e2eb00db13fa6e Mon Sep 17 00:00:00 2001 From: Letong Han <106566639+letonghan@users.noreply.github.com> Date: Mon, 11 Nov 2024 13:53:06 +0800 Subject: [PATCH] [Readme] Update ChatQnA Readme for LLM Endpoint (#1086) Signed-off-by: letonghan --- ChatQnA/docker_compose/intel/cpu/xeon/README.md | 9 ++++----- .../intel/cpu/xeon/README_qdrant.md | 4 ++-- .../docker_compose/intel/hpu/gaudi/README.md | 17 ++++++----------- ChatQnA/docker_compose/nvidia/gpu/README.md | 4 ++-- 4 files changed, 14 insertions(+), 20 deletions(-) diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/README.md b/ChatQnA/docker_compose/intel/cpu/xeon/README.md index 494ba6a52..990cb3537 100644 --- a/ChatQnA/docker_compose/intel/cpu/xeon/README.md +++ b/ChatQnA/docker_compose/intel/cpu/xeon/README.md @@ -26,7 +26,6 @@ To set up environment variables for deploying ChatQnA services, follow these ste export http_proxy="Your_HTTP_Proxy" export https_proxy="Your_HTTPs_Proxy" # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1" - # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1" export no_proxy="Your_No_Proxy",chatqna-xeon-ui-server,chatqna-xeon-backend-server,dataprep-redis-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service,vllm_service ``` @@ -324,17 +323,17 @@ For details on how to verify the correctness of the response, refer to [how-to-v ```bash # TGI service - curl http://${host_ip}:9009/generate \ + curl http://${host_ip}:9009/v1/chat/completions \ -X POST \ - -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \ + -d '{"model": "Intel/neural-chat-7b-v3-3", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens":17}' \ -H 'Content-Type: application/json' ``` ```bash # vLLM Service - curl http://${host_ip}:9009/v1/completions \ + curl http://${host_ip}:9009/v1/chat/completions \ -H "Content-Type: application/json" \ - -d '{"model": "Intel/neural-chat-7b-v3-3", "prompt": "What is Deep Learning?", "max_tokens": 32, "temperature": 0}' + -d '{"model": "Intel/neural-chat-7b-v3-3", "messages": [{"role": "user", "content": "What is Deep Learning?"}]}' ``` 5. MegaService diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/README_qdrant.md b/ChatQnA/docker_compose/intel/cpu/xeon/README_qdrant.md index 1adfe8cf1..2f9fa1b82 100644 --- a/ChatQnA/docker_compose/intel/cpu/xeon/README_qdrant.md +++ b/ChatQnA/docker_compose/intel/cpu/xeon/README_qdrant.md @@ -252,9 +252,9 @@ For details on how to verify the correctness of the response, refer to [how-to-v Then try the `cURL` command below to validate TGI. ```bash - curl http://${host_ip}:6042/generate \ + curl http://${host_ip}:6042/v1/chat/completions \ -X POST \ - -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \ + -d '{"model": "Intel/neural-chat-7b-v3-3", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens":17}' \ -H 'Content-Type: application/json' ``` diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/README.md b/ChatQnA/docker_compose/intel/hpu/gaudi/README.md index a922ec031..02620ea7b 100644 --- a/ChatQnA/docker_compose/intel/hpu/gaudi/README.md +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/README.md @@ -326,23 +326,18 @@ For validation details, please refer to [how-to-validate_service](./how_to_valid Then try the `cURL` command below to validate services. ```bash - #TGI Service - curl http://${host_ip}:8005/generate \ + # TGI service + curl http://${host_ip}:9009/v1/chat/completions \ -X POST \ - -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":64, "do_sample": true}}' \ + -d '{"model": ${LLM_MODEL_ID}, "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens":17}' \ -H 'Content-Type: application/json' ``` ```bash - #vLLM Service - curl http://${host_ip}:8007/v1/completions \ + # vLLM Service + curl http://${host_ip}:9009/v1/chat/completions \ -H "Content-Type: application/json" \ - -d '{ - "model": "${LLM_MODEL_ID}", - "prompt": "What is Deep Learning?", - "max_tokens": 32, - "temperature": 0 - }' + -d '{"model": ${LLM_MODEL_ID}, "messages": [{"role": "user", "content": "What is Deep Learning?"}]}' ``` 5. MegaService diff --git a/ChatQnA/docker_compose/nvidia/gpu/README.md b/ChatQnA/docker_compose/nvidia/gpu/README.md index 24eb39f98..fc647a555 100644 --- a/ChatQnA/docker_compose/nvidia/gpu/README.md +++ b/ChatQnA/docker_compose/nvidia/gpu/README.md @@ -238,9 +238,9 @@ docker compose up -d Then try the `cURL` command below to validate TGI. ```bash - curl http://${host_ip}:8008/generate \ + curl http://${host_ip}:9009/v1/chat/completions \ -X POST \ - -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":64, "do_sample": true}}' \ + -d '{"model": "Intel/neural-chat-7b-v3-3", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens":17}' \ -H 'Content-Type: application/json' ```