Add new example of Translation for GenAIExample (#455)
* Add Translation GMC pipeline Signed-off-by: zhlsunshine <huailong.zhang@intel.com>
This commit is contained in:
@@ -14,6 +14,8 @@ In the below example we illustrate on Xeon.
|
||||
export APP_NAMESPACE=CT
|
||||
kubectl create ns $APP_NAMESPACE
|
||||
sed -i "s|namespace: searchqa|namespace: $APP_NAMESPACE|g" ./searchQnA_xeon.yaml
|
||||
sed -i "s|insert-your-google-api-key-here|$GOOGLE_API_KEY|g" ./searchQnA_xeon.yaml
|
||||
sed -i "s|insert-your-google-cse-id-here|$GOOGLE_CSE_ID|g" ./searchQnA_xeon.yaml
|
||||
kubectl apply -f ./searchQnA_xeon.yaml
|
||||
```
|
||||
|
||||
|
||||
@@ -0,0 +1,40 @@
|
||||
<h1 align="center" id="title">Deploy Translation in a Kubernetes Cluster</h1>
|
||||
|
||||
This document outlines the deployment process for a Code Generation (Translation) application that utilizes the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice components on Intel Xeon servers and Gaudi machines.
|
||||
|
||||
Please install GMC in your Kubernetes cluster, if you have not already done so, by following the steps in Section "Getting Started" at [GMC Install](https://github.com/opea-project/GenAIInfra/tree/main/microservices-connector#readme). We will soon publish images to Docker Hub, at which point no builds will be required, further simplifying install.
|
||||
|
||||
If you have only Intel Xeon machines you could use the translation_xeon.yaml file or if you have a Gaudi cluster you could use translation_gaudi.yaml
|
||||
In the below example we illustrate on Xeon.
|
||||
|
||||
## Deploy the RAG application
|
||||
|
||||
1. Create the desired namespace if it does not already exist and deploy the application
|
||||
```bash
|
||||
export APP_NAMESPACE=CT
|
||||
kubectl create ns $APP_NAMESPACE
|
||||
sed -i "s|namespace: translation|namespace: $APP_NAMESPACE|g" ./translation_xeon.yaml
|
||||
kubectl apply -f ./translation_xeon.yaml
|
||||
```
|
||||
|
||||
2. Check if the application is up and ready
|
||||
```bash
|
||||
kubectl get pods -n $APP_NAMESPACE
|
||||
```
|
||||
|
||||
3. Deploy a client pod for testing
|
||||
```bash
|
||||
kubectl create deployment client-test -n $APP_NAMESPACE --image=python:3.8.13 -- sleep infinity
|
||||
```
|
||||
|
||||
4. Check that client pod is ready
|
||||
```bash
|
||||
kubectl get pods -n $APP_NAMESPACE
|
||||
```
|
||||
|
||||
5. Send request to application
|
||||
```bash
|
||||
export CLIENT_POD=$(kubectl get pod -n $APP_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name})
|
||||
export accessUrl=$(kubectl get gmc -n $APP_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='translation')].status.accessUrl}")
|
||||
kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -X POST -d '{"query":"Translate this from Chinese to English:\nChinese: 我爱机器翻译。\nEnglish:"}' -H 'Content-Type: application/json' > $LOG_PATH/gmc_translation.log
|
||||
```
|
||||
|
||||
34
Translation/kubernetes/translation_gaudi.yaml
Normal file
34
Translation/kubernetes/translation_gaudi.yaml
Normal file
@@ -0,0 +1,34 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: gmc.opea.io/v1alpha3
|
||||
kind: GMConnector
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: gmconnector
|
||||
app.kubernetes.io/managed-by: kustomize
|
||||
gmc/platform: gaudi
|
||||
name: translation
|
||||
namespace: translation
|
||||
spec:
|
||||
routerConfig:
|
||||
name: router
|
||||
serviceName: router-service
|
||||
nodes:
|
||||
root:
|
||||
routerType: Sequence
|
||||
steps:
|
||||
- name: Llm
|
||||
data: $response
|
||||
internalService:
|
||||
serviceName: llm-service
|
||||
config:
|
||||
endpoint: /v1/chat/completions
|
||||
TGI_LLM_ENDPOINT: tgi-gaudi-svc
|
||||
- name: TgiGaudi
|
||||
internalService:
|
||||
serviceName: tgi-gaudi-svc
|
||||
config:
|
||||
MODEL_ID: haoranxu/ALMA-13B
|
||||
endpoint: /generate
|
||||
isDownstreamService: true
|
||||
34
Translation/kubernetes/translation_xeon.yaml
Normal file
34
Translation/kubernetes/translation_xeon.yaml
Normal file
@@ -0,0 +1,34 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: gmc.opea.io/v1alpha3
|
||||
kind: GMConnector
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: gmconnector
|
||||
app.kubernetes.io/managed-by: kustomize
|
||||
gmc/platform: xeon
|
||||
name: translation
|
||||
namespace: translation
|
||||
spec:
|
||||
routerConfig:
|
||||
name: router
|
||||
serviceName: router-service
|
||||
nodes:
|
||||
root:
|
||||
routerType: Sequence
|
||||
steps:
|
||||
- name: Llm
|
||||
data: $response
|
||||
internalService:
|
||||
serviceName: llm-service
|
||||
config:
|
||||
endpoint: /v1/chat/completions
|
||||
TGI_LLM_ENDPOINT: tgi-service
|
||||
- name: Tgi
|
||||
internalService:
|
||||
serviceName: tgi-service
|
||||
config:
|
||||
MODEL_ID: haoranxu/ALMA-13B
|
||||
endpoint: /generate
|
||||
isDownstreamService: true
|
||||
127
Translation/tests/test_gmc_on_gaudi.sh
Executable file
127
Translation/tests/test_gmc_on_gaudi.sh
Executable file
@@ -0,0 +1,127 @@
|
||||
#!/bin/bash
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
set -xe
|
||||
USER_ID=$(whoami)
|
||||
LOG_PATH=/home/$(whoami)/logs
|
||||
MOUNT_DIR=/home/$USER_ID/.cache/huggingface/hub
|
||||
IMAGE_REPO=${IMAGE_REPO:-}
|
||||
|
||||
function install_translation() {
|
||||
kubectl create ns $APP_NAMESPACE
|
||||
sed -i "s|namespace: translation|namespace: $APP_NAMESPACE|g" ./translation_gaudi.yaml
|
||||
kubectl apply -f ./translation_gaudi.yaml
|
||||
|
||||
# Wait until the router service is ready
|
||||
echo "Waiting for the translation router service to be ready..."
|
||||
wait_until_pod_ready "translation router" $APP_NAMESPACE "router-service"
|
||||
output=$(kubectl get pods -n $APP_NAMESPACE)
|
||||
echo $output
|
||||
}
|
||||
|
||||
function validate_translation() {
|
||||
# deploy client pod for testing
|
||||
kubectl create deployment client-test -n $APP_NAMESPACE --image=python:3.8.13 -- sleep infinity
|
||||
|
||||
# wait for client pod ready
|
||||
wait_until_pod_ready "client-test" $APP_NAMESPACE "client-test"
|
||||
# giving time to populating data
|
||||
sleep 60
|
||||
|
||||
kubectl get pods -n $APP_NAMESPACE
|
||||
# send request to translation
|
||||
export CLIENT_POD=$(kubectl get pod -n $APP_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name})
|
||||
echo "$CLIENT_POD"
|
||||
accessUrl=$(kubectl get gmc -n $APP_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='translation')].status.accessUrl}")
|
||||
kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -X POST -d '{"query":"Translate this from Chinese to English:\nChinese: 我爱机器翻译。\nEnglish:"}' -H 'Content-Type: application/json' > $LOG_PATH/gmc_translation.log
|
||||
exit_code=$?
|
||||
if [ $exit_code -ne 0 ]; then
|
||||
echo "chatqna failed, please check the logs in ${LOG_PATH}!"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Checking response results, make sure the output is reasonable. "
|
||||
local status=false
|
||||
if [[ -f $LOG_PATH/gmc_translation.log ]] && \
|
||||
[[ $(grep -c "[DONE]" $LOG_PATH/gmc_translation.log) != 0 ]]; then
|
||||
status=true
|
||||
fi
|
||||
if [ $status == false ]; then
|
||||
if [[ -f $LOG_PATH/gmc_translation.log ]]; then
|
||||
cat $LOG_PATH/gmc_translation.log
|
||||
fi
|
||||
echo "Response check failed, please check the logs in artifacts!"
|
||||
cat $LOG_PATH/gmc_translation.log
|
||||
exit 1
|
||||
else
|
||||
echo "Response check succeed!"
|
||||
fi
|
||||
}
|
||||
|
||||
function wait_until_pod_ready() {
|
||||
echo "Waiting for the $1 to be ready..."
|
||||
max_retries=30
|
||||
retry_count=0
|
||||
while ! is_pod_ready $2 $3; do
|
||||
if [ $retry_count -ge $max_retries ]; then
|
||||
echo "$1 is not ready after waiting for a significant amount of time"
|
||||
get_gmc_controller_logs
|
||||
exit 1
|
||||
fi
|
||||
echo "$1 is not ready yet. Retrying in 10 seconds..."
|
||||
sleep 10
|
||||
output=$(kubectl get pods -n $2)
|
||||
echo $output
|
||||
retry_count=$((retry_count + 1))
|
||||
done
|
||||
}
|
||||
|
||||
function is_pod_ready() {
|
||||
if [ "$2" == "gmc-controller" ]; then
|
||||
pod_status=$(kubectl get pods -n $1 -o jsonpath='{.items[].status.conditions[?(@.type=="Ready")].status}')
|
||||
else
|
||||
pod_status=$(kubectl get pods -n $1 -l app=$2 -o jsonpath='{.items[].status.conditions[?(@.type=="Ready")].status}')
|
||||
fi
|
||||
if [ "$pod_status" == "True" ]; then
|
||||
return 0
|
||||
else
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
function get_gmc_controller_logs() {
|
||||
# Fetch the name of the pod with the app-name gmc-controller in the specified namespace
|
||||
pod_name=$(kubectl get pods -n $SYSTEM_NAMESPACE -l control-plane=gmc-controller -o jsonpath='{.items[0].metadata.name}')
|
||||
|
||||
# Check if the pod name was found
|
||||
if [ -z "$pod_name" ]; then
|
||||
echo "No pod found with app-name gmc-controller in namespace $SYSTEM_NAMESPACE"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Get the logs of the found pod
|
||||
echo "Fetching logs for pod $pod_name in namespace $SYSTEM_NAMESPACE..."
|
||||
kubectl logs $pod_name -n $SYSTEM_NAMESPACE
|
||||
}
|
||||
|
||||
if [ $# -eq 0 ]; then
|
||||
echo "Usage: $0 <function_name>"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
case "$1" in
|
||||
install_Translation)
|
||||
pushd Translation/kubernetes
|
||||
install_translation
|
||||
popd
|
||||
;;
|
||||
validate_Translation)
|
||||
pushd Translation/kubernetes
|
||||
validate_translation
|
||||
popd
|
||||
;;
|
||||
*)
|
||||
echo "Unknown function: $1"
|
||||
;;
|
||||
esac
|
||||
127
Translation/tests/test_gmc_on_xeon.sh
Executable file
127
Translation/tests/test_gmc_on_xeon.sh
Executable file
@@ -0,0 +1,127 @@
|
||||
#!/bin/bash
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
set -xe
|
||||
USER_ID=$(whoami)
|
||||
LOG_PATH=/home/$(whoami)/logs
|
||||
MOUNT_DIR=/home/$USER_ID/.cache/huggingface/hub
|
||||
IMAGE_REPO=${IMAGE_REPO:-}
|
||||
|
||||
function install_translation() {
|
||||
kubectl create ns $APP_NAMESPACE
|
||||
sed -i "s|namespace: translation|namespace: $APP_NAMESPACE|g" ./translation_xeon.yaml
|
||||
kubectl apply -f ./translation_xeon.yaml
|
||||
|
||||
# Wait until the router service is ready
|
||||
echo "Waiting for the translation router service to be ready..."
|
||||
wait_until_pod_ready "translation router" $APP_NAMESPACE "router-service"
|
||||
output=$(kubectl get pods -n $APP_NAMESPACE)
|
||||
echo $output
|
||||
}
|
||||
|
||||
function validate_translation() {
|
||||
# deploy client pod for testing
|
||||
kubectl create deployment client-test -n $APP_NAMESPACE --image=python:3.8.13 -- sleep infinity
|
||||
|
||||
# wait for client pod ready
|
||||
wait_until_pod_ready "client-test" $APP_NAMESPACE "client-test"
|
||||
# giving time to populating data
|
||||
sleep 60
|
||||
|
||||
kubectl get pods -n $APP_NAMESPACE
|
||||
# send request to translation
|
||||
export CLIENT_POD=$(kubectl get pod -n $APP_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name})
|
||||
echo "$CLIENT_POD"
|
||||
accessUrl=$(kubectl get gmc -n $APP_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='translation')].status.accessUrl}")
|
||||
kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -X POST -d '{"query":"Translate this from Chinese to English:\nChinese: 我爱机器翻译。\nEnglish:"}' -H 'Content-Type: application/json' > $LOG_PATH/gmc_translation.log
|
||||
exit_code=$?
|
||||
if [ $exit_code -ne 0 ]; then
|
||||
echo "chatqna failed, please check the logs in ${LOG_PATH}!"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Checking response results, make sure the output is reasonable. "
|
||||
local status=false
|
||||
if [[ -f $LOG_PATH/gmc_translation.log ]] && \
|
||||
[[ $(grep -c "[DONE]" $LOG_PATH/gmc_translation.log) != 0 ]]; then
|
||||
status=true
|
||||
fi
|
||||
if [ $status == false ]; then
|
||||
if [[ -f $LOG_PATH/gmc_translation.log ]]; then
|
||||
cat $LOG_PATH/gmc_translation.log
|
||||
fi
|
||||
echo "Response check failed, please check the logs in artifacts!"
|
||||
cat $LOG_PATH/gmc_translation.log
|
||||
exit 1
|
||||
else
|
||||
echo "Response check succeed!"
|
||||
fi
|
||||
}
|
||||
|
||||
function wait_until_pod_ready() {
|
||||
echo "Waiting for the $1 to be ready..."
|
||||
max_retries=30
|
||||
retry_count=0
|
||||
while ! is_pod_ready $2 $3; do
|
||||
if [ $retry_count -ge $max_retries ]; then
|
||||
echo "$1 is not ready after waiting for a significant amount of time"
|
||||
get_gmc_controller_logs
|
||||
exit 1
|
||||
fi
|
||||
echo "$1 is not ready yet. Retrying in 10 seconds..."
|
||||
sleep 10
|
||||
output=$(kubectl get pods -n $2)
|
||||
echo $output
|
||||
retry_count=$((retry_count + 1))
|
||||
done
|
||||
}
|
||||
|
||||
function is_pod_ready() {
|
||||
if [ "$2" == "gmc-controller" ]; then
|
||||
pod_status=$(kubectl get pods -n $1 -o jsonpath='{.items[].status.conditions[?(@.type=="Ready")].status}')
|
||||
else
|
||||
pod_status=$(kubectl get pods -n $1 -l app=$2 -o jsonpath='{.items[].status.conditions[?(@.type=="Ready")].status}')
|
||||
fi
|
||||
if [ "$pod_status" == "True" ]; then
|
||||
return 0
|
||||
else
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
function get_gmc_controller_logs() {
|
||||
# Fetch the name of the pod with the app-name gmc-controller in the specified namespace
|
||||
pod_name=$(kubectl get pods -n $SYSTEM_NAMESPACE -l control-plane=gmc-controller -o jsonpath='{.items[0].metadata.name}')
|
||||
|
||||
# Check if the pod name was found
|
||||
if [ -z "$pod_name" ]; then
|
||||
echo "No pod found with app-name gmc-controller in namespace $SYSTEM_NAMESPACE"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Get the logs of the found pod
|
||||
echo "Fetching logs for pod $pod_name in namespace $SYSTEM_NAMESPACE..."
|
||||
kubectl logs $pod_name -n $SYSTEM_NAMESPACE
|
||||
}
|
||||
|
||||
if [ $# -eq 0 ]; then
|
||||
echo "Usage: $0 <function_name>"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
case "$1" in
|
||||
install_Translation)
|
||||
pushd Translation/kubernetes
|
||||
install_translation
|
||||
popd
|
||||
;;
|
||||
validate_Translation)
|
||||
pushd Translation/kubernetes
|
||||
validate_translation
|
||||
popd
|
||||
;;
|
||||
*)
|
||||
echo "Unknown function: $1"
|
||||
;;
|
||||
esac
|
||||
Reference in New Issue
Block a user