Revert the LLM model for kubernetes GMS (#675)
* revert the LLM model to meta-llama/CodeLlama-7b-hf Signed-off-by: zhlsunshine <huailong.zhang@intel.com>
This commit is contained in:
@@ -29,6 +29,6 @@ spec:
|
|||||||
internalService:
|
internalService:
|
||||||
serviceName: tgi-service
|
serviceName: tgi-service
|
||||||
config:
|
config:
|
||||||
MODEL_ID: HuggingFaceH4/mistral-7b-grok
|
MODEL_ID: meta-llama/CodeLlama-7b-hf
|
||||||
endpoint: /generate
|
endpoint: /generate
|
||||||
isDownstreamService: true
|
isDownstreamService: true
|
||||||
|
|||||||
@@ -34,7 +34,7 @@ function validate_codegen() {
|
|||||||
export CLIENT_POD=$(kubectl get pod -n $APP_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name})
|
export CLIENT_POD=$(kubectl get pod -n $APP_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name})
|
||||||
echo "$CLIENT_POD"
|
echo "$CLIENT_POD"
|
||||||
accessUrl=$(kubectl get gmc -n $APP_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='codegen')].status.accessUrl}")
|
accessUrl=$(kubectl get gmc -n $APP_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='codegen')].status.accessUrl}")
|
||||||
kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -X POST -d '{"query": "def print_hello_world():"}' -H 'Content-Type: application/json' > $LOG_PATH/gmc_codegen.log
|
kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -X POST -d '{"inputs":"Implement a high-level API for a TODO list application. The API takes as input an operation request and updates the TODO list in place. If the request is invalid, raise an exception.","parameters":{"max_new_tokens":256, "do_sample": true}}' -H 'Content-Type: application/json' > $LOG_PATH/gmc_codegen.log
|
||||||
exit_code=$?
|
exit_code=$?
|
||||||
if [ $exit_code -ne 0 ]; then
|
if [ $exit_code -ne 0 ]; then
|
||||||
echo "chatqna failed, please check the logs in ${LOG_PATH}!"
|
echo "chatqna failed, please check the logs in ${LOG_PATH}!"
|
||||||
|
|||||||
@@ -34,7 +34,7 @@ function validate_codegen() {
|
|||||||
export CLIENT_POD=$(kubectl get pod -n $APP_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name})
|
export CLIENT_POD=$(kubectl get pod -n $APP_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name})
|
||||||
echo "$CLIENT_POD"
|
echo "$CLIENT_POD"
|
||||||
accessUrl=$(kubectl get gmc -n $APP_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='codegen')].status.accessUrl}")
|
accessUrl=$(kubectl get gmc -n $APP_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='codegen')].status.accessUrl}")
|
||||||
kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -X POST -d '{"query": "def print_hello_world():"}' -H 'Content-Type: application/json' > $LOG_PATH/gmc_codegen.log
|
kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -X POST -d '{"inputs":"Implement a high-level API for a TODO list application. The API takes as input an operation request and updates the TODO list in place. If the request is invalid, raise an exception.","parameters":{"max_new_tokens":256, "do_sample": true}}' -H 'Content-Type: application/json' > $LOG_PATH/gmc_codegen.log
|
||||||
exit_code=$?
|
exit_code=$?
|
||||||
if [ $exit_code -ne 0 ]; then
|
if [ $exit_code -ne 0 ]; then
|
||||||
echo "chatqna failed, please check the logs in ${LOG_PATH}!"
|
echo "chatqna failed, please check the logs in ${LOG_PATH}!"
|
||||||
|
|||||||
Reference in New Issue
Block a user