Align parameters for "max_token, repetition_penalty,presence_penalty,frequency_penalty" (#726)

Signed-off-by: Xinyao Wang <xinyao.wang@intel.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
This commit is contained in:
XinyaoWa
2024-09-19 14:15:25 +08:00
committed by GitHub
parent 372d78c2ac
commit 2f03a3a894
24 changed files with 111 additions and 73 deletions

View File

@@ -108,7 +108,7 @@ curl http://${host_ip}:3006/generate \
# llm microservice # llm microservice
curl http://${host_ip}:3007/v1/chat/completions\ curl http://${host_ip}:3007/v1/chat/completions\
-X POST \ -X POST \
-d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":false}' \ -d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":false}' \
-H 'Content-Type: application/json' -H 'Content-Type: application/json'
# speecht5 service # speecht5 service

View File

@@ -108,7 +108,7 @@ curl http://${host_ip}:3006/generate \
# llm microservice # llm microservice
curl http://${host_ip}:3007/v1/chat/completions\ curl http://${host_ip}:3007/v1/chat/completions\
-X POST \ -X POST \
-d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":false}' \ -d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":false}' \
-H 'Content-Type: application/json' -H 'Content-Type: application/json'
# speecht5 service # speecht5 service

View File

@@ -34,7 +34,7 @@ function validate_audioqa() {
export CLIENT_POD=$(kubectl get pod -n $APP_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name}) export CLIENT_POD=$(kubectl get pod -n $APP_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name})
echo "$CLIENT_POD" echo "$CLIENT_POD"
accessUrl=$(kubectl get gmc -n $APP_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='audioqa')].status.accessUrl}") accessUrl=$(kubectl get gmc -n $APP_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='audioqa')].status.accessUrl}")
byte_str=$(kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -s -X POST -d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "parameters":{"max_new_tokens":64, "do_sample": true, "streaming":false}}' -H 'Content-Type: application/json' | jq .byte_str) byte_str=$(kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -s -X POST -d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "parameters":{"max_tokens":64, "do_sample": true, "streaming":false}}' -H 'Content-Type: application/json' | jq .byte_str)
echo "$byte_str" > $LOG_PATH/curl_audioqa.log echo "$byte_str" > $LOG_PATH/curl_audioqa.log
if [ -z "$byte_str" ]; then if [ -z "$byte_str" ]; then
echo "audioqa failed, please check the logs in ${LOG_PATH}!" echo "audioqa failed, please check the logs in ${LOG_PATH}!"

View File

@@ -34,7 +34,7 @@ function validate_audioqa() {
export CLIENT_POD=$(kubectl get pod -n $APP_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name}) export CLIENT_POD=$(kubectl get pod -n $APP_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name})
echo "$CLIENT_POD" echo "$CLIENT_POD"
accessUrl=$(kubectl get gmc -n $APP_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='audioqa')].status.accessUrl}") accessUrl=$(kubectl get gmc -n $APP_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='audioqa')].status.accessUrl}")
byte_str=$(kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -s -X POST -d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "parameters":{"max_new_tokens":64, "do_sample": true, "streaming":false}}' -H 'Content-Type: application/json' | jq .byte_str) byte_str=$(kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -s -X POST -d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "parameters":{"max_tokens":64, "do_sample": true, "streaming":false}}' -H 'Content-Type: application/json' | jq .byte_str)
echo "$byte_str" > $LOG_PATH/curl_audioqa.log echo "$byte_str" > $LOG_PATH/curl_audioqa.log
if [ -z "$byte_str" ]; then if [ -z "$byte_str" ]; then
echo "audioqa failed, please check the logs in ${LOG_PATH}!" echo "audioqa failed, please check the logs in ${LOG_PATH}!"

View File

@@ -41,7 +41,7 @@ test_cases:
run_test: false run_test: false
service_name: "llm-svc" # Replace with your service name service_name: "llm-svc" # Replace with your service name
parameters: parameters:
max_new_tokens: 128 max_tokens: 128
temperature: 0.01 temperature: 0.01
top_k: 10 top_k: 10
top_p: 0.95 top_p: 0.95

View File

@@ -69,10 +69,12 @@ def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **k
next_inputs = {} next_inputs = {}
next_inputs["model"] = "tgi" # specifically clarify the fake model to make the format unified next_inputs["model"] = "tgi" # specifically clarify the fake model to make the format unified
next_inputs["messages"] = [{"role": "user", "content": inputs["inputs"]}] next_inputs["messages"] = [{"role": "user", "content": inputs["inputs"]}]
next_inputs["max_tokens"] = llm_parameters_dict["max_new_tokens"] next_inputs["max_tokens"] = llm_parameters_dict["max_tokens"]
next_inputs["top_p"] = llm_parameters_dict["top_p"] next_inputs["top_p"] = llm_parameters_dict["top_p"]
next_inputs["stream"] = inputs["streaming"] next_inputs["stream"] = inputs["streaming"]
next_inputs["frequency_penalty"] = inputs["repetition_penalty"] next_inputs["frequency_penalty"] = inputs["frequency_penalty"]
next_inputs["presence_penalty"] = inputs["presence_penalty"]
next_inputs["repetition_penalty"] = inputs["repetition_penalty"]
next_inputs["temperature"] = inputs["temperature"] next_inputs["temperature"] = inputs["temperature"]
inputs = next_inputs inputs = next_inputs

View File

@@ -229,7 +229,7 @@ OLLAMA_HOST=${host_ip}:11434 ollama run $OLLAMA_MODEL
```bash ```bash
curl http://${host_ip}:9000/v1/chat/completions\ curl http://${host_ip}:9000/v1/chat/completions\
-X POST \ -X POST \
-d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ -d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
-H 'Content-Type: application/json' -H 'Content-Type: application/json'
``` ```

View File

@@ -438,18 +438,31 @@ docker compose -f compose_vllm.yaml up -d
This service depends on above LLM backend service startup. It will be ready after long time, to wait for them being ready in first startup. This service depends on above LLM backend service startup. It will be ready after long time, to wait for them being ready in first startup.
```bash ```bash
# TGI service
curl http://${host_ip}:9000/v1/chat/completions\ curl http://${host_ip}:9000/v1/chat/completions\
-X POST \ -X POST \
-d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ -d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
-H 'Content-Type: application/json' -H 'Content-Type: application/json'
``` ```
For parameters in TGI modes, please refer to [HuggingFace InferenceClient API](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.text_generation) (except we rename "max_new_tokens" to "max_tokens".)
```bash
# vLLM Service
curl http://${your_ip}:9000/v1/chat/completions \
-X POST \
-d '{"query":"What is Deep Learning?","max_tokens":17,"top_p":1,"temperature":0.7,"frequency_penalty":0,"presence_penalty":0, "streaming":false}' \
-H 'Content-Type: application/json'
```
For parameters in vLLM modes, can refer to [LangChain VLLMOpenAI API](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.vllm.VLLMOpenAI.html)
8. MegaService 8. MegaService
```bash ```bash
curl http://${host_ip}:8888/v1/chatqna -H "Content-Type: application/json" -d '{ curl http://${host_ip}:8888/v1/chatqna -H "Content-Type: application/json" -d '{
"messages": "What is the revenue of Nike in 2023?" "messages": "What is the revenue of Nike in 2023?"
}' }'
``` ```
9. Dataprep MicroserviceOptional 9. Dataprep MicroserviceOptional

View File

@@ -304,7 +304,7 @@ docker compose -f compose_qdrant.yaml up -d
```bash ```bash
curl http://${host_ip}:6047/v1/chat/completions\ curl http://${host_ip}:6047/v1/chat/completions\
-X POST \ -X POST \
-d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ -d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
-H 'Content-Type: application/json' -H 'Content-Type: application/json'
``` ```

View File

@@ -442,18 +442,41 @@ For validation details, please refer to [how-to-validate_service](./how_to_valid
7. LLM Microservice 7. LLM Microservice
```bash ```bash
curl http://${host_ip}:9000/v1/chat/completions \ # TGI service
curl http://${host_ip}:9000/v1/chat/completions\
-X POST \ -X POST \
-d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ -d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
-H 'Content-Type: application/json' -H 'Content-Type: application/json'
``` ```
For parameters in TGI mode, please refer to [HuggingFace InferenceClient API](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.text_generation) (except we rename "max_new_tokens" to "max_tokens".)
```bash
# vLLM Service
curl http://${host_ip}:9000/v1/chat/completions \
-X POST \
-d '{"query":"What is Deep Learning?","max_tokens":17,"top_p":1,"temperature":0.7,"frequency_penalty":0,"presence_penalty":0, "streaming":false}' \
-H 'Content-Type: application/json'
```
For parameters in vLLM Mode, can refer to [LangChain VLLMOpenAI API](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.vllm.VLLMOpenAI.html)
```bash
# vLLM-on-Ray Service
curl http://${your_ip}:9000/v1/chat/completions \
-X POST \
-d '{"query":"What is Deep Learning?","max_tokens":17,"presence_penalty":1.03","streaming":false}' \
-H 'Content-Type: application/json'
```
For parameters in vLLM-on-Ray mode, can refer to [LangChain ChatOpenAI API](https://python.langchain.com/v0.2/api_reference/openai/chat_models/langchain_openai.chat_models.base.ChatOpenAI.html)
8. MegaService 8. MegaService
```bash ```bash
curl http://${host_ip}:8888/v1/chatqna -H "Content-Type: application/json" -d '{ curl http://${host_ip}:8888/v1/chatqna -H "Content-Type: application/json" -d '{
"messages": "What is the revenue of Nike in 2023?" "messages": "What is the revenue of Nike in 2023?"
}' }'
``` ```
9. Dataprep MicroserviceOptional 9. Dataprep MicroserviceOptional

View File

@@ -278,7 +278,7 @@ and the log shows model warm up, please wait for a while and try it later.
``` ```
curl http://${host_ip}:9000/v1/chat/completions\ curl http://${host_ip}:9000/v1/chat/completions\
-X POST \ -X POST \
-d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ -d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
-H 'Content-Type: application/json' -H 'Content-Type: application/json'
``` ```

View File

@@ -280,7 +280,7 @@ docker compose up -d
```bash ```bash
curl http://${host_ip}:9000/v1/chat/completions \ curl http://${host_ip}:9000/v1/chat/completions \
-X POST \ -X POST \
-d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ -d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
-H 'Content-Type: application/json' -H 'Content-Type: application/json'
``` ```

View File

@@ -132,7 +132,7 @@ Two ways of consuming CodeGen Service:
http_proxy="" http_proxy=""
curl http://${host_ip}:8028/generate \ curl http://${host_ip}:8028/generate \
-X POST \ -X POST \
-d '{"inputs":"Implement a high-level API for a TODO list application. The API takes as input an operation request and updates the TODO list in place. If the request is invalid, raise an exception.","parameters":{"max_new_tokens":256, "do_sample": true}}' \ -d '{"inputs":"Implement a high-level API for a TODO list application. The API takes as input an operation request and updates the TODO list in place. If the request is invalid, raise an exception.","parameters":{"max_tokens":256, "do_sample": true}}' \
-H 'Content-Type: application/json' -H 'Content-Type: application/json'
``` ```

View File

@@ -138,7 +138,7 @@ docker compose up -d
```bash ```bash
curl http://${host_ip}:9000/v1/chat/completions\ curl http://${host_ip}:9000/v1/chat/completions\
-X POST \ -X POST \
-d '{"query":"Implement a high-level API for a TODO list application. The API takes as input an operation request and updates the TODO list in place. If the request is invalid, raise an exception.","max_new_tokens":256,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ -d '{"query":"Implement a high-level API for a TODO list application. The API takes as input an operation request and updates the TODO list in place. If the request is invalid, raise an exception.","max_tokens":256,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
-H 'Content-Type: application/json' -H 'Content-Type: application/json'
``` ```

View File

@@ -119,7 +119,7 @@ docker compose up -d
```bash ```bash
curl http://${host_ip}:9000/v1/chat/completions\ curl http://${host_ip}:9000/v1/chat/completions\
-X POST \ -X POST \
-d '{"query":"Implement a high-level API for a TODO list application. The API takes as input an operation request and updates the TODO list in place. If the request is invalid, raise an exception.","max_new_tokens":256,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ -d '{"query":"Implement a high-level API for a TODO list application. The API takes as input an operation request and updates the TODO list in place. If the request is invalid, raise an exception.","max_tokens":256,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
-H 'Content-Type: application/json' -H 'Content-Type: application/json'
``` ```

View File

@@ -34,7 +34,7 @@ function validate_codegen() {
export CLIENT_POD=$(kubectl get pod -n $APP_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name}) export CLIENT_POD=$(kubectl get pod -n $APP_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name})
echo "$CLIENT_POD" echo "$CLIENT_POD"
accessUrl=$(kubectl get gmc -n $APP_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='codegen')].status.accessUrl}") accessUrl=$(kubectl get gmc -n $APP_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='codegen')].status.accessUrl}")
kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -X POST -d '{"inputs":"Implement a high-level API for a TODO list application. The API takes as input an operation request and updates the TODO list in place. If the request is invalid, raise an exception.","parameters":{"max_new_tokens":256, "do_sample": true}}' -H 'Content-Type: application/json' > $LOG_PATH/gmc_codegen.log kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -X POST -d '{"inputs":"Implement a high-level API for a TODO list application. The API takes as input an operation request and updates the TODO list in place. If the request is invalid, raise an exception.","parameters":{"max_tokens":256, "do_sample": true}}' -H 'Content-Type: application/json' > $LOG_PATH/gmc_codegen.log
exit_code=$? exit_code=$?
if [ $exit_code -ne 0 ]; then if [ $exit_code -ne 0 ]; then
echo "chatqna failed, please check the logs in ${LOG_PATH}!" echo "chatqna failed, please check the logs in ${LOG_PATH}!"

View File

@@ -34,7 +34,7 @@ function validate_codegen() {
export CLIENT_POD=$(kubectl get pod -n $APP_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name}) export CLIENT_POD=$(kubectl get pod -n $APP_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name})
echo "$CLIENT_POD" echo "$CLIENT_POD"
accessUrl=$(kubectl get gmc -n $APP_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='codegen')].status.accessUrl}") accessUrl=$(kubectl get gmc -n $APP_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='codegen')].status.accessUrl}")
kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -X POST -d '{"inputs":"Implement a high-level API for a TODO list application. The API takes as input an operation request and updates the TODO list in place. If the request is invalid, raise an exception.","parameters":{"max_new_tokens":256, "do_sample": true}}' -H 'Content-Type: application/json' > $LOG_PATH/gmc_codegen.log kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -X POST -d '{"inputs":"Implement a high-level API for a TODO list application. The API takes as input an operation request and updates the TODO list in place. If the request is invalid, raise an exception.","parameters":{"max_tokens":256, "do_sample": true}}' -H 'Content-Type: application/json' > $LOG_PATH/gmc_codegen.log
exit_code=$? exit_code=$?
if [ $exit_code -ne 0 ]; then if [ $exit_code -ne 0 ]; then
echo "chatqna failed, please check the logs in ${LOG_PATH}!" echo "chatqna failed, please check the logs in ${LOG_PATH}!"

View File

@@ -127,7 +127,7 @@ By default, the UI runs on port 5173 internally.
http_proxy="" http_proxy=""
curl http://${host_ip}:8008/generate \ curl http://${host_ip}:8008/generate \
-X POST \ -X POST \
-d '{"inputs":" ### System: Please translate the following Golang codes into Python codes. ### Original codes: '\'''\'''\''Golang \npackage main\n\nimport \"fmt\"\nfunc main() {\n fmt.Println(\"Hello, World!\");\n '\'''\'''\'' ### Translated codes:","parameters":{"max_new_tokens":17, "do_sample": true}}' \ -d '{"inputs":" ### System: Please translate the following Golang codes into Python codes. ### Original codes: '\'''\'''\''Golang \npackage main\n\nimport \"fmt\"\nfunc main() {\n fmt.Println(\"Hello, World!\");\n '\'''\'''\'' ### Translated codes:","parameters":{"max_tokens":17, "do_sample": true}}' \
-H 'Content-Type: application/json' -H 'Content-Type: application/json'
``` ```

View File

@@ -149,7 +149,7 @@ Two ways of consuming Document Summarization Service:
http_proxy="" http_proxy=""
curl http://${host_ip}:8008/generate \ curl http://${host_ip}:8008/generate \
-X POST \ -X POST \
-d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \ -d '{"inputs":"What is Deep Learning?","parameters":{"max_tokens":17, "do_sample": true}}' \
-H 'Content-Type: application/json' -H 'Content-Type: application/json'
``` ```

View File

@@ -271,7 +271,7 @@ Please refer to [keycloak_setup_guide](keycloak_setup_guide.md) for more detail
```bash ```bash
curl http://${host_ip}:9000/v1/chat/completions\ curl http://${host_ip}:9000/v1/chat/completions\
-X POST \ -X POST \
-d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ -d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
-H 'Content-Type: application/json' -H 'Content-Type: application/json'
``` ```

View File

@@ -140,7 +140,7 @@ curl http://${host_ip}:3006/generate \
# llm microservice # llm microservice
curl http://${host_ip}:3007/v1/chat/completions\ curl http://${host_ip}:3007/v1/chat/completions\
-X POST \ -X POST \
-d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ -d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
-H 'Content-Type: application/json' -H 'Content-Type: application/json'
``` ```

View File

@@ -150,7 +150,7 @@ curl http://${host_ip}:3006/generate \
# llm microservice # llm microservice
curl http://${host_ip}:3007/v1/chat/completions\ curl http://${host_ip}:3007/v1/chat/completions\
-X POST \ -X POST \
-d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ -d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
-H 'Content-Type: application/json' -H 'Content-Type: application/json'
``` ```

View File

@@ -138,28 +138,28 @@ Follow the instructions to validate MicroServices.
2. MegaService 2. MegaService
```bash ```bash
curl http://${host_ip}:8888/v1/visualqna -H "Content-Type: application/json" -d '{ curl http://${host_ip}:8888/v1/visualqna -H "Content-Type: application/json" -d '{
"messages": [ "messages": [
{ {
"role": "user", "role": "user",
"content": [ "content": [
{ {
"type": "text", "type": "text",
"text": "What'\''s in this image?" "text": "What'\''s in this image?"
}, },
{ {
"type": "image_url", "type": "image_url",
"image_url": { "image_url": {
"url": "https://www.ilankelman.org/stopsigns/australia.jpg" "url": "https://www.ilankelman.org/stopsigns/australia.jpg"
} }
} }
] ]
} }
], ],
"max_tokens": 300 "max_tokens": 300
}' }'
``` ```
## 🚀 Launch the UI ## 🚀 Launch the UI

View File

@@ -95,28 +95,28 @@ Follow the instructions to validate MicroServices.
2. MegaService 2. MegaService
```bash ```bash
curl http://${host_ip}:8888/v1/visualqna -H "Content-Type: application/json" -d '{ curl http://${host_ip}:8888/v1/visualqna -H "Content-Type: application/json" -d '{
"messages": [ "messages": [
{ {
"role": "user", "role": "user",
"content": [ "content": [
{ {
"type": "text", "type": "text",
"text": "What'\''s in this image?" "text": "What'\''s in this image?"
}, },
{ {
"type": "image_url", "type": "image_url",
"image_url": { "image_url": {
"url": "https://www.ilankelman.org/stopsigns/australia.jpg" "url": "https://www.ilankelman.org/stopsigns/australia.jpg"
} }
} }
] ]
} }
], ],
"max_tokens": 300 "max_tokens": 300
}' }'
``` ```
## 🚀 Launch the UI ## 🚀 Launch the UI