Sync value files from GenAIInfra (#1428)
All gaudi values updated with extra flags. Added helm support for 2 new examples Text2Image and SearchQnA. Minor fix for llm-uservice. Signed-off-by: Dolpher Du <dolpher.du@intel.com>
This commit is contained in:
@@ -4,35 +4,13 @@
|
|||||||
# Accelerate inferencing in heaviest components to improve performance
|
# Accelerate inferencing in heaviest components to improve performance
|
||||||
# by overriding their subchart values
|
# by overriding their subchart values
|
||||||
|
|
||||||
tgi:
|
vllm:
|
||||||
enabled: true
|
enabled: true
|
||||||
accelDevice: "gaudi"
|
|
||||||
image:
|
image:
|
||||||
repository: ghcr.io/huggingface/tgi-gaudi
|
repository: opea/vllm-gaudi
|
||||||
tag: "2.0.6"
|
supervisor:
|
||||||
resources:
|
llm_endpoint_url: http://{{ .Release.Name }}-vllm
|
||||||
limits:
|
ragagent:
|
||||||
habana.ai/gaudi: 4
|
llm_endpoint_url: http://{{ .Release.Name }}-vllm
|
||||||
MAX_INPUT_LENGTH: "4096"
|
sqlagent:
|
||||||
MAX_TOTAL_TOKENS: "8192"
|
llm_endpoint_url: http://{{ .Release.Name }}-vllm
|
||||||
CUDA_GRAPHS: ""
|
|
||||||
OMPI_MCA_btl_vader_single_copy_mechanism: "none"
|
|
||||||
PT_HPU_ENABLE_LAZY_COLLECTIVES: "true"
|
|
||||||
ENABLE_HPU_GRAPH: "true"
|
|
||||||
LIMIT_HPU_GRAPH: "true"
|
|
||||||
USE_FLASH_ATTENTION: "true"
|
|
||||||
FLASH_ATTENTION_RECOMPUTE: "true"
|
|
||||||
extraCmdArgs: ["--sharded","true","--num-shard","4"]
|
|
||||||
livenessProbe:
|
|
||||||
initialDelaySeconds: 5
|
|
||||||
periodSeconds: 5
|
|
||||||
timeoutSeconds: 1
|
|
||||||
readinessProbe:
|
|
||||||
initialDelaySeconds: 5
|
|
||||||
periodSeconds: 5
|
|
||||||
timeoutSeconds: 1
|
|
||||||
startupProbe:
|
|
||||||
initialDelaySeconds: 5
|
|
||||||
periodSeconds: 5
|
|
||||||
timeoutSeconds: 1
|
|
||||||
failureThreshold: 120
|
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ tgi:
|
|||||||
accelDevice: "gaudi"
|
accelDevice: "gaudi"
|
||||||
image:
|
image:
|
||||||
repository: ghcr.io/huggingface/tgi-gaudi
|
repository: ghcr.io/huggingface/tgi-gaudi
|
||||||
tag: "2.0.6"
|
tag: "2.3.1"
|
||||||
resources:
|
resources:
|
||||||
limits:
|
limits:
|
||||||
habana.ai/gaudi: 1
|
habana.ai/gaudi: 1
|
||||||
@@ -33,11 +33,15 @@ tgi:
|
|||||||
failureThreshold: 120
|
failureThreshold: 120
|
||||||
|
|
||||||
whisper:
|
whisper:
|
||||||
|
image:
|
||||||
|
repository: opea/whisper-gaudi
|
||||||
resources:
|
resources:
|
||||||
limits:
|
limits:
|
||||||
habana.ai/gaudi: 1
|
habana.ai/gaudi: 1
|
||||||
|
|
||||||
speecht5:
|
speecht5:
|
||||||
|
image:
|
||||||
|
repository: opea/speecht5-gaudi
|
||||||
resources:
|
resources:
|
||||||
limits:
|
limits:
|
||||||
habana.ai/gaudi: 1
|
habana.ai/gaudi: 1
|
||||||
|
|||||||
112
ChatQnA/kubernetes/helm/cpu-tgi-values.yaml
Normal file
112
ChatQnA/kubernetes/helm/cpu-tgi-values.yaml
Normal file
@@ -0,0 +1,112 @@
|
|||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
# Override CPU resource request and probe timing values in specific subcharts
|
||||||
|
#
|
||||||
|
# RESOURCES
|
||||||
|
#
|
||||||
|
# Resource request matching actual resource usage (with enough slack)
|
||||||
|
# is important when service is scaled up, so that right amount of pods
|
||||||
|
# get scheduled to right nodes.
|
||||||
|
#
|
||||||
|
# Because resource usage depends on the used devices, model, data type
|
||||||
|
# and SW versions, and this top-level chart has overrides for them,
|
||||||
|
# resource requests need to be specified here too.
|
||||||
|
#
|
||||||
|
# To test service without resource request, use "resources: {}".
|
||||||
|
#
|
||||||
|
# PROBES
|
||||||
|
#
|
||||||
|
# Inferencing pods startup / warmup takes *much* longer on CPUs than
|
||||||
|
# with acceleration devices, and their responses are also slower,
|
||||||
|
# especially when node is running several instances of these services.
|
||||||
|
#
|
||||||
|
# Kubernetes restarting pod before its startup finishes, or not
|
||||||
|
# sending it queries because it's not in ready state due to slow
|
||||||
|
# readiness responses, does really NOT help in getting faster responses.
|
||||||
|
#
|
||||||
|
# => probe timings need to be increased when running on CPU.
|
||||||
|
|
||||||
|
vllm:
|
||||||
|
enabled: false
|
||||||
|
tgi:
|
||||||
|
enabled: true
|
||||||
|
# TODO: add Helm value also for TGI data type option:
|
||||||
|
# https://github.com/opea-project/GenAIExamples/issues/330
|
||||||
|
LLM_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct
|
||||||
|
|
||||||
|
# Potentially suitable values for scaling CPU TGI 2.2 with Intel/neural-chat-7b-v3-3 @ 32-bit:
|
||||||
|
#resources:
|
||||||
|
# limits:
|
||||||
|
# cpu: 8
|
||||||
|
# memory: 70Gi
|
||||||
|
# requests:
|
||||||
|
# cpu: 6
|
||||||
|
# memory: 65Gi
|
||||||
|
|
||||||
|
livenessProbe:
|
||||||
|
initialDelaySeconds: 8
|
||||||
|
periodSeconds: 8
|
||||||
|
failureThreshold: 24
|
||||||
|
timeoutSeconds: 4
|
||||||
|
readinessProbe:
|
||||||
|
initialDelaySeconds: 16
|
||||||
|
periodSeconds: 8
|
||||||
|
timeoutSeconds: 4
|
||||||
|
startupProbe:
|
||||||
|
initialDelaySeconds: 10
|
||||||
|
periodSeconds: 5
|
||||||
|
failureThreshold: 180
|
||||||
|
timeoutSeconds: 2
|
||||||
|
|
||||||
|
teirerank:
|
||||||
|
RERANK_MODEL_ID: "BAAI/bge-reranker-base"
|
||||||
|
|
||||||
|
# Potentially suitable values for scaling CPU TEI v1.5 with BAAI/bge-reranker-base model:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpu: 4
|
||||||
|
memory: 30Gi
|
||||||
|
requests:
|
||||||
|
cpu: 2
|
||||||
|
memory: 25Gi
|
||||||
|
|
||||||
|
livenessProbe:
|
||||||
|
initialDelaySeconds: 8
|
||||||
|
periodSeconds: 8
|
||||||
|
failureThreshold: 24
|
||||||
|
timeoutSeconds: 4
|
||||||
|
readinessProbe:
|
||||||
|
initialDelaySeconds: 8
|
||||||
|
periodSeconds: 8
|
||||||
|
timeoutSeconds: 4
|
||||||
|
startupProbe:
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
periodSeconds: 5
|
||||||
|
failureThreshold: 120
|
||||||
|
|
||||||
|
tei:
|
||||||
|
EMBEDDING_MODEL_ID: "BAAI/bge-base-en-v1.5"
|
||||||
|
|
||||||
|
# Potentially suitable values for scaling CPU TEI 1.5 with BAAI/bge-base-en-v1.5 model:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpu: 4
|
||||||
|
memory: 4Gi
|
||||||
|
requests:
|
||||||
|
cpu: 2
|
||||||
|
memory: 3Gi
|
||||||
|
|
||||||
|
livenessProbe:
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
periodSeconds: 5
|
||||||
|
failureThreshold: 24
|
||||||
|
timeoutSeconds: 2
|
||||||
|
readinessProbe:
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
periodSeconds: 5
|
||||||
|
timeoutSeconds: 2
|
||||||
|
startupProbe:
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
periodSeconds: 5
|
||||||
|
failureThreshold: 120
|
||||||
@@ -1,109 +1,5 @@
|
|||||||
# Copyright (C) 2024 Intel Corporation
|
# Copyright (C) 2025 Intel Corporation
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
# Override CPU resource request and probe timing values in specific subcharts
|
image:
|
||||||
#
|
repository: opea/chatqna
|
||||||
# RESOURCES
|
|
||||||
#
|
|
||||||
# Resource request matching actual resource usage (with enough slack)
|
|
||||||
# is important when service is scaled up, so that right amount of pods
|
|
||||||
# get scheduled to right nodes.
|
|
||||||
#
|
|
||||||
# Because resource usage depends on the used devices, model, data type
|
|
||||||
# and SW versions, and this top-level chart has overrides for them,
|
|
||||||
# resource requests need to be specified here too.
|
|
||||||
#
|
|
||||||
# To test service without resource request, use "resources: {}".
|
|
||||||
#
|
|
||||||
# PROBES
|
|
||||||
#
|
|
||||||
# Inferencing pods startup / warmup takes *much* longer on CPUs than
|
|
||||||
# with acceleration devices, and their responses are also slower,
|
|
||||||
# especially when node is running several instances of these services.
|
|
||||||
#
|
|
||||||
# Kubernetes restarting pod before its startup finishes, or not
|
|
||||||
# sending it queries because it's not in ready state due to slow
|
|
||||||
# readiness responses, does really NOT help in getting faster responses.
|
|
||||||
#
|
|
||||||
# => probe timings need to be increased when running on CPU.
|
|
||||||
|
|
||||||
tgi:
|
|
||||||
# TODO: add Helm value also for TGI data type option:
|
|
||||||
# https://github.com/opea-project/GenAIExamples/issues/330
|
|
||||||
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
|
||||||
|
|
||||||
# Potentially suitable values for scaling CPU TGI 2.2 with Intel/neural-chat-7b-v3-3 @ 32-bit:
|
|
||||||
resources:
|
|
||||||
limits:
|
|
||||||
cpu: 8
|
|
||||||
memory: 70Gi
|
|
||||||
requests:
|
|
||||||
cpu: 6
|
|
||||||
memory: 65Gi
|
|
||||||
|
|
||||||
livenessProbe:
|
|
||||||
initialDelaySeconds: 8
|
|
||||||
periodSeconds: 8
|
|
||||||
failureThreshold: 24
|
|
||||||
timeoutSeconds: 4
|
|
||||||
readinessProbe:
|
|
||||||
initialDelaySeconds: 16
|
|
||||||
periodSeconds: 8
|
|
||||||
timeoutSeconds: 4
|
|
||||||
startupProbe:
|
|
||||||
initialDelaySeconds: 10
|
|
||||||
periodSeconds: 5
|
|
||||||
failureThreshold: 180
|
|
||||||
timeoutSeconds: 2
|
|
||||||
|
|
||||||
teirerank:
|
|
||||||
RERANK_MODEL_ID: "BAAI/bge-reranker-base"
|
|
||||||
|
|
||||||
# Potentially suitable values for scaling CPU TEI v1.5 with BAAI/bge-reranker-base model:
|
|
||||||
resources:
|
|
||||||
limits:
|
|
||||||
cpu: 4
|
|
||||||
memory: 30Gi
|
|
||||||
requests:
|
|
||||||
cpu: 2
|
|
||||||
memory: 25Gi
|
|
||||||
|
|
||||||
livenessProbe:
|
|
||||||
initialDelaySeconds: 8
|
|
||||||
periodSeconds: 8
|
|
||||||
failureThreshold: 24
|
|
||||||
timeoutSeconds: 4
|
|
||||||
readinessProbe:
|
|
||||||
initialDelaySeconds: 8
|
|
||||||
periodSeconds: 8
|
|
||||||
timeoutSeconds: 4
|
|
||||||
startupProbe:
|
|
||||||
initialDelaySeconds: 5
|
|
||||||
periodSeconds: 5
|
|
||||||
failureThreshold: 120
|
|
||||||
|
|
||||||
tei:
|
|
||||||
EMBEDDING_MODEL_ID: "BAAI/bge-base-en-v1.5"
|
|
||||||
|
|
||||||
# Potentially suitable values for scaling CPU TEI 1.5 with BAAI/bge-base-en-v1.5 model:
|
|
||||||
resources:
|
|
||||||
limits:
|
|
||||||
cpu: 4
|
|
||||||
memory: 4Gi
|
|
||||||
requests:
|
|
||||||
cpu: 2
|
|
||||||
memory: 3Gi
|
|
||||||
|
|
||||||
livenessProbe:
|
|
||||||
initialDelaySeconds: 5
|
|
||||||
periodSeconds: 5
|
|
||||||
failureThreshold: 24
|
|
||||||
timeoutSeconds: 2
|
|
||||||
readinessProbe:
|
|
||||||
initialDelaySeconds: 5
|
|
||||||
periodSeconds: 5
|
|
||||||
timeoutSeconds: 2
|
|
||||||
startupProbe:
|
|
||||||
initialDelaySeconds: 5
|
|
||||||
periodSeconds: 5
|
|
||||||
failureThreshold: 120
|
|
||||||
|
|||||||
@@ -4,12 +4,15 @@
|
|||||||
# Accelerate inferencing in heaviest components to improve performance
|
# Accelerate inferencing in heaviest components to improve performance
|
||||||
# by overriding their subchart values
|
# by overriding their subchart values
|
||||||
|
|
||||||
|
vllm:
|
||||||
|
enabled: false
|
||||||
# TGI: largest bottleneck for ChatQnA
|
# TGI: largest bottleneck for ChatQnA
|
||||||
tgi:
|
tgi:
|
||||||
|
enabled: true
|
||||||
accelDevice: "gaudi"
|
accelDevice: "gaudi"
|
||||||
image:
|
image:
|
||||||
repository: ghcr.io/huggingface/tgi-gaudi
|
repository: ghcr.io/huggingface/tgi-gaudi
|
||||||
tag: "2.0.6"
|
tag: "2.3.1"
|
||||||
resources:
|
resources:
|
||||||
limits:
|
limits:
|
||||||
habana.ai/gaudi: 1
|
habana.ai/gaudi: 1
|
||||||
@@ -6,9 +6,9 @@
|
|||||||
|
|
||||||
tgi:
|
tgi:
|
||||||
enabled: false
|
enabled: false
|
||||||
|
|
||||||
vllm:
|
vllm:
|
||||||
enabled: true
|
enabled: true
|
||||||
|
shmSize: 1Gi
|
||||||
accelDevice: "gaudi"
|
accelDevice: "gaudi"
|
||||||
image:
|
image:
|
||||||
repository: opea/vllm-gaudi
|
repository: opea/vllm-gaudi
|
||||||
@@ -19,7 +19,7 @@ vllm:
|
|||||||
initialDelaySeconds: 5
|
initialDelaySeconds: 5
|
||||||
periodSeconds: 5
|
periodSeconds: 5
|
||||||
timeoutSeconds: 1
|
timeoutSeconds: 1
|
||||||
failureThreshold: 120
|
failureThreshold: 180
|
||||||
readinessProbe:
|
readinessProbe:
|
||||||
initialDelaySeconds: 5
|
initialDelaySeconds: 5
|
||||||
periodSeconds: 5
|
periodSeconds: 5
|
||||||
@@ -39,7 +39,6 @@ vllm:
|
|||||||
"--max-seq_len-to-capture", "2048"
|
"--max-seq_len-to-capture", "2048"
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
# Reranking: second largest bottleneck when reranking is in use
|
# Reranking: second largest bottleneck when reranking is in use
|
||||||
# (i.e. query context docs have been uploaded with data-prep)
|
# (i.e. query context docs have been uploaded with data-prep)
|
||||||
#
|
#
|
||||||
|
|||||||
@@ -44,44 +44,13 @@ teirerank:
|
|||||||
readinessProbe:
|
readinessProbe:
|
||||||
timeoutSeconds: 1
|
timeoutSeconds: 1
|
||||||
|
|
||||||
tgi:
|
|
||||||
accelDevice: "gaudi"
|
|
||||||
image:
|
|
||||||
repository: ghcr.io/huggingface/tgi-gaudi
|
|
||||||
tag: "2.0.6"
|
|
||||||
resources:
|
|
||||||
limits:
|
|
||||||
habana.ai/gaudi: 1
|
|
||||||
# higher limits are needed with extra input tokens added by rerank
|
|
||||||
MAX_INPUT_LENGTH: "2048"
|
|
||||||
MAX_TOTAL_TOKENS: "4096"
|
|
||||||
CUDA_GRAPHS: ""
|
|
||||||
OMPI_MCA_btl_vader_single_copy_mechanism: "none"
|
|
||||||
ENABLE_HPU_GRAPH: "true"
|
|
||||||
LIMIT_HPU_GRAPH: "true"
|
|
||||||
USE_FLASH_ATTENTION: "true"
|
|
||||||
FLASH_ATTENTION_RECOMPUTE: "true"
|
|
||||||
livenessProbe:
|
|
||||||
initialDelaySeconds: 5
|
|
||||||
periodSeconds: 5
|
|
||||||
timeoutSeconds: 1
|
|
||||||
readinessProbe:
|
|
||||||
initialDelaySeconds: 5
|
|
||||||
periodSeconds: 5
|
|
||||||
timeoutSeconds: 1
|
|
||||||
startupProbe:
|
|
||||||
initialDelaySeconds: 5
|
|
||||||
periodSeconds: 5
|
|
||||||
timeoutSeconds: 1
|
|
||||||
failureThreshold: 120
|
|
||||||
|
|
||||||
tgi-guardrails:
|
tgi-guardrails:
|
||||||
enabled: true
|
enabled: true
|
||||||
accelDevice: "gaudi"
|
accelDevice: "gaudi"
|
||||||
LLM_MODEL_ID: "meta-llama/Meta-Llama-Guard-2-8B"
|
LLM_MODEL_ID: "meta-llama/Meta-Llama-Guard-2-8B"
|
||||||
image:
|
image:
|
||||||
repository: ghcr.io/huggingface/tgi-gaudi
|
repository: ghcr.io/huggingface/tgi-gaudi
|
||||||
tag: "2.0.6"
|
tag: "2.3.1"
|
||||||
resources:
|
resources:
|
||||||
limits:
|
limits:
|
||||||
habana.ai/gaudi: 1
|
habana.ai/gaudi: 1
|
||||||
@@ -106,3 +75,38 @@ tgi-guardrails:
|
|||||||
periodSeconds: 5
|
periodSeconds: 5
|
||||||
timeoutSeconds: 1
|
timeoutSeconds: 1
|
||||||
failureThreshold: 120
|
failureThreshold: 120
|
||||||
|
|
||||||
|
tgi:
|
||||||
|
enabled: false
|
||||||
|
vllm:
|
||||||
|
enabled: true
|
||||||
|
shmSize: 1Gi
|
||||||
|
accelDevice: "gaudi"
|
||||||
|
image:
|
||||||
|
repository: opea/vllm-gaudi
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
habana.ai/gaudi: 1
|
||||||
|
startupProbe:
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
periodSeconds: 5
|
||||||
|
timeoutSeconds: 1
|
||||||
|
failureThreshold: 180
|
||||||
|
readinessProbe:
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
periodSeconds: 5
|
||||||
|
timeoutSeconds: 1
|
||||||
|
livenessProbe:
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
periodSeconds: 5
|
||||||
|
timeoutSeconds: 1
|
||||||
|
|
||||||
|
PT_HPU_ENABLE_LAZY_COLLECTIVES: "true"
|
||||||
|
OMPI_MCA_btl_vader_single_copy_mechanism: "none"
|
||||||
|
|
||||||
|
extraCmdArgs: [
|
||||||
|
"--tensor-parallel-size", "1",
|
||||||
|
"--block-size", "128",
|
||||||
|
"--max-num-seqs", "256",
|
||||||
|
"--max-seq_len-to-capture", "2048"
|
||||||
|
]
|
||||||
|
|||||||
@@ -1,14 +0,0 @@
|
|||||||
# Copyright (C) 2024 Intel Corporation
|
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
|
|
||||||
image:
|
|
||||||
repository: opea/chatqna-guardrails
|
|
||||||
|
|
||||||
# guardrails related config
|
|
||||||
guardrails-usvc:
|
|
||||||
enabled: true
|
|
||||||
# SAFETY_GUARD_ENDPOINT: "http://{{ .Release.Name }}-tgi-guardrails"
|
|
||||||
SAFETY_GUARD_MODEL_ID: "meta-llama/Meta-Llama-Guard-2-8B"
|
|
||||||
tgi-guardrails:
|
|
||||||
enabled: true
|
|
||||||
LLM_MODEL_ID: "meta-llama/Meta-Llama-Guard-2-8B"
|
|
||||||
@@ -1,25 +0,0 @@
|
|||||||
# Copyright (C) 2024 Intel Corporation
|
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
|
|
||||||
# To override values in subchart tgi
|
|
||||||
tgi:
|
|
||||||
accelDevice: "nvidia"
|
|
||||||
image:
|
|
||||||
repository: ghcr.io/huggingface/text-generation-inference
|
|
||||||
tag: "2.2.0"
|
|
||||||
resources:
|
|
||||||
limits:
|
|
||||||
nvidia.com/gpu: 1
|
|
||||||
livenessProbe:
|
|
||||||
initialDelaySeconds: 5
|
|
||||||
periodSeconds: 5
|
|
||||||
timeoutSeconds: 1
|
|
||||||
readinessProbe:
|
|
||||||
initialDelaySeconds: 5
|
|
||||||
periodSeconds: 5
|
|
||||||
timeoutSeconds: 1
|
|
||||||
startupProbe:
|
|
||||||
initialDelaySeconds: 5
|
|
||||||
periodSeconds: 5
|
|
||||||
timeoutSeconds: 1
|
|
||||||
failureThreshold: 120
|
|
||||||
@@ -1,117 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
# Copyright (C) 2024 Intel Corporation
|
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
|
|
||||||
set -xe
|
|
||||||
USER_ID=$(whoami)
|
|
||||||
LOG_PATH=/home/$(whoami)/logs
|
|
||||||
MOUNT_DIR=/home/$USER_ID/.cache/huggingface/hub
|
|
||||||
IMAGE_REPO=${IMAGE_REPO:-opea}
|
|
||||||
IMAGE_TAG=${IMAGE_TAG:-latest}
|
|
||||||
|
|
||||||
ROLLOUT_TIMEOUT_SECONDS="1800s"
|
|
||||||
KUBECTL_TIMEOUT_SECONDS="60s"
|
|
||||||
|
|
||||||
function validate_chatqna() {
|
|
||||||
local ns=$1
|
|
||||||
local log=$2
|
|
||||||
max_retry=20
|
|
||||||
# make sure microservice retriever-usvc is ready
|
|
||||||
# try to curl retriever-svc for max_retry times
|
|
||||||
test_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)")
|
|
||||||
for ((i=1; i<=max_retry; i++))
|
|
||||||
do
|
|
||||||
endpoint_url=$(bash ChatQnA/tests/common/_test_manifest_utils.sh get_end_point "chatqna-retriever-usvc" $ns)
|
|
||||||
curl http://$endpoint_url/v1/retrieval -X POST \
|
|
||||||
-d "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${test_embedding}}" \
|
|
||||||
-H 'Content-Type: application/json' && break
|
|
||||||
sleep 30
|
|
||||||
done
|
|
||||||
# if i is bigger than max_retry, then exit with error
|
|
||||||
if [ $i -gt $max_retry ]; then
|
|
||||||
echo "Microservice retriever failed, exit with error."
|
|
||||||
return 1
|
|
||||||
fi
|
|
||||||
# make sure microservice tgi-svc is ready
|
|
||||||
for ((i=1; i<=max_retry; i++))
|
|
||||||
do
|
|
||||||
endpoint_url=$(bash ChatQnA/tests/common/_test_manifest_utils.sh get_end_point "chatqna-tgi" $ns)
|
|
||||||
curl http://$endpoint_url/generate -X POST \
|
|
||||||
-d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
|
|
||||||
-H 'Content-Type: application/json' && break
|
|
||||||
sleep 30
|
|
||||||
done
|
|
||||||
# if i is bigger than max_retry, then exit with error
|
|
||||||
if [ $i -gt $max_retry ]; then
|
|
||||||
echo "Microservice tgi failed, exit with error."
|
|
||||||
return 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# check megaservice works
|
|
||||||
# generate a random logfile name to avoid conflict among multiple runners
|
|
||||||
LOGFILE=$LOG_PATH/curlmega_$log.log
|
|
||||||
endpoint_url=$(bash ChatQnA/tests/common/_test_manifest_utils.sh get_end_point "chatqna" $ns)
|
|
||||||
curl http://$endpoint_url/v1/chatqna -H "Content-Type: application/json" -d '{"messages": "What is the revenue of Nike in 2023?"}' > $LOGFILE
|
|
||||||
exit_code=$?
|
|
||||||
if [ $exit_code -ne 0 ]; then
|
|
||||||
echo "Megaservice failed, please check the logs in $LOGFILE!"
|
|
||||||
return ${exit_code}
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "Checking response results, make sure the output is reasonable. "
|
|
||||||
local status=false
|
|
||||||
if [[ -f $LOGFILE ]] &&
|
|
||||||
[[ $(grep -c "\[DONE\]" $LOGFILE) != 0 ]]; then
|
|
||||||
status=true
|
|
||||||
fi
|
|
||||||
if [[ $status == false ]]; then
|
|
||||||
echo "Response check failed, please check the logs in artifacts!"
|
|
||||||
return 1
|
|
||||||
else
|
|
||||||
echo "Response check succeed!"
|
|
||||||
fi
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
|
|
||||||
function install_chatqna() {
|
|
||||||
echo "Testing manifests chatqna_guardrails"
|
|
||||||
local ns=$1
|
|
||||||
bash ChatQnA/tests/common/_test_manifest_utils.sh _cleanup_ns $ns
|
|
||||||
pushd ChatQnA/kubernetes/intel/hpu/gaudi/manifest
|
|
||||||
kubectl create namespace $ns
|
|
||||||
# install guardrails
|
|
||||||
kubectl apply -f chatqna-guardrails.yaml -n $ns
|
|
||||||
# Sleep enough time for chatqna_guardrails to be ready
|
|
||||||
sleep 60
|
|
||||||
}
|
|
||||||
|
|
||||||
if [ $# -eq 0 ]; then
|
|
||||||
echo "Usage: $0 <function_name>"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
case "$1" in
|
|
||||||
init_ChatQnA)
|
|
||||||
pushd ChatQnA/tests/common
|
|
||||||
bash _test_manifest_utils.sh init_ChatQnA
|
|
||||||
popd
|
|
||||||
;;
|
|
||||||
install_ChatQnA)
|
|
||||||
NAMESPACE=$2
|
|
||||||
install_chatqna $NAMESPACE
|
|
||||||
popd
|
|
||||||
;;
|
|
||||||
validate_ChatQnA)
|
|
||||||
NAMESPACE=$2
|
|
||||||
SERVICE_NAME=chatqna-guardrails
|
|
||||||
validate_chatqna $NAMESPACE chatqna-guardrails
|
|
||||||
ret=$?
|
|
||||||
if [ $ret -ne 0 ]; then
|
|
||||||
exit $ret
|
|
||||||
fi
|
|
||||||
;;
|
|
||||||
|
|
||||||
*)
|
|
||||||
echo "Unknown function: $1"
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
@@ -1,117 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
# Copyright (C) 2024 Intel Corporation
|
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
|
|
||||||
set -xe
|
|
||||||
USER_ID=$(whoami)
|
|
||||||
LOG_PATH=/home/$(whoami)/logs
|
|
||||||
MOUNT_DIR=/home/$USER_ID/.cache/huggingface/hub
|
|
||||||
IMAGE_REPO=${IMAGE_REPO:-opea}
|
|
||||||
IMAGE_TAG=${IMAGE_TAG:-latest}
|
|
||||||
|
|
||||||
ROLLOUT_TIMEOUT_SECONDS="1800s"
|
|
||||||
KUBECTL_TIMEOUT_SECONDS="60s"
|
|
||||||
|
|
||||||
function validate_chatqna() {
|
|
||||||
local ns=$1
|
|
||||||
local log=$2
|
|
||||||
max_retry=10
|
|
||||||
# make sure microservice retriever-usvc is ready
|
|
||||||
# try to curl retriever-svc for max_retry times
|
|
||||||
test_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)")
|
|
||||||
for ((i=1; i<=max_retry; i++))
|
|
||||||
do
|
|
||||||
endpoint_url=$(bash ChatQnA/tests/common/_test_manifest_utils.sh get_end_point "chatqna-retriever-usvc" $ns)
|
|
||||||
curl http://$endpoint_url/v1/retrieval -X POST \
|
|
||||||
-d "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${test_embedding}}" \
|
|
||||||
-H 'Content-Type: application/json' && break
|
|
||||||
sleep 30
|
|
||||||
done
|
|
||||||
# if i is bigger than max_retry, then exit with error
|
|
||||||
if [ $i -gt $max_retry ]; then
|
|
||||||
echo "Microservice retriever failed, exit with error."
|
|
||||||
return 1
|
|
||||||
fi
|
|
||||||
# make sure microservice tgi-svc is ready
|
|
||||||
for ((i=1; i<=max_retry; i++))
|
|
||||||
do
|
|
||||||
endpoint_url=$(bash ChatQnA/tests/common/_test_manifest_utils.sh get_end_point "chatqna-tgi" $ns)
|
|
||||||
curl http://$endpoint_url/generate -X POST \
|
|
||||||
-d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
|
|
||||||
-H 'Content-Type: application/json' && break
|
|
||||||
sleep 30
|
|
||||||
done
|
|
||||||
# if i is bigger than max_retry, then exit with error
|
|
||||||
if [ $i -gt $max_retry ]; then
|
|
||||||
echo "Microservice tgi failed, exit with error."
|
|
||||||
return 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# check megaservice works
|
|
||||||
# generate a random logfile name to avoid conflict among multiple runners
|
|
||||||
LOGFILE=$LOG_PATH/curlmega_$log.log
|
|
||||||
endpoint_url=$(bash ChatQnA/tests/common/_test_manifest_utils.sh get_end_point "chatqna" $ns)
|
|
||||||
curl http://$endpoint_url/v1/chatqna -H "Content-Type: application/json" -d '{"messages": "What is the revenue of Nike in 2023?"}' > $LOGFILE
|
|
||||||
exit_code=$?
|
|
||||||
if [ $exit_code -ne 0 ]; then
|
|
||||||
echo "Megaservice failed, please check the logs in $LOGFILE!"
|
|
||||||
return ${exit_code}
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "Checking response results, make sure the output is reasonable. "
|
|
||||||
local status=false
|
|
||||||
if [[ -f $LOGFILE ]] &&
|
|
||||||
[[ $(grep -c "\[DONE\]" $LOGFILE) != 0 ]]; then
|
|
||||||
status=true
|
|
||||||
fi
|
|
||||||
if [ $status == false ]; then
|
|
||||||
echo "Response check failed, please check the logs in artifacts!"
|
|
||||||
return 1
|
|
||||||
else
|
|
||||||
echo "Response check succeed!"
|
|
||||||
fi
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
|
|
||||||
function install_chatqna() {
|
|
||||||
echo "Testing manifests chatqna_guardrails"
|
|
||||||
local ns=$1
|
|
||||||
bash ChatQnA/tests/common/_test_manifest_utils.sh _cleanup_ns $ns
|
|
||||||
pushd ChatQnA/kubernetes/intel/cpu/xeon/manifest
|
|
||||||
kubectl create namespace $ns
|
|
||||||
# install guardrail
|
|
||||||
kubectl apply -f chatqna-guardrails.yaml -n $ns
|
|
||||||
# Sleep enough time for chatqna_guardrails to be ready
|
|
||||||
sleep 60
|
|
||||||
}
|
|
||||||
|
|
||||||
if [ $# -eq 0 ]; then
|
|
||||||
echo "Usage: $0 <function_name>"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
case "$1" in
|
|
||||||
init_ChatQnA)
|
|
||||||
pushd ChatQnA/tests/common
|
|
||||||
bash _test_manifest_utils.sh init_ChatQnA
|
|
||||||
popd
|
|
||||||
;;
|
|
||||||
install_ChatQnA)
|
|
||||||
NAMESPACE=$2
|
|
||||||
install_chatqna $NAMESPACE
|
|
||||||
popd
|
|
||||||
;;
|
|
||||||
validate_ChatQnA)
|
|
||||||
NAMESPACE=$2
|
|
||||||
SERVICE_NAME=chatqna-guardrails
|
|
||||||
validate_chatqna $NAMESPACE chatqna-guardrails
|
|
||||||
ret=$?
|
|
||||||
if [ $ret -ne 0 ]; then
|
|
||||||
exit $ret
|
|
||||||
fi
|
|
||||||
;;
|
|
||||||
|
|
||||||
*)
|
|
||||||
echo "Unknown function: $1"
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
@@ -1,113 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
# Copyright (C) 2024 Intel Corporation
|
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
|
|
||||||
set -xe
|
|
||||||
USER_ID=$(whoami)
|
|
||||||
LOG_PATH=/home/$(whoami)/logs
|
|
||||||
MOUNT_DIR=/home/$USER_ID/.cache/huggingface/hub
|
|
||||||
IMAGE_REPO=${IMAGE_REPO:-opea}
|
|
||||||
IMAGE_TAG=${IMAGE_TAG:-latest}
|
|
||||||
|
|
||||||
ROLLOUT_TIMEOUT_SECONDS="1800s"
|
|
||||||
KUBECTL_TIMEOUT_SECONDS="60s"
|
|
||||||
|
|
||||||
function install_chatqna {
|
|
||||||
echo "namespace is $NAMESPACE"
|
|
||||||
kubectl apply -f chatqna.yaml -n $NAMESPACE
|
|
||||||
# Sleep enough time for retreiver-usvc to be ready
|
|
||||||
sleep 60
|
|
||||||
}
|
|
||||||
|
|
||||||
function validate_chatqna() {
|
|
||||||
local ns=$1
|
|
||||||
local log=$2
|
|
||||||
max_retry=20
|
|
||||||
# make sure microservice retriever-usvc is ready
|
|
||||||
# try to curl retriever-svc for max_retry times
|
|
||||||
test_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)")
|
|
||||||
for ((i=1; i<=max_retry; i++))
|
|
||||||
do
|
|
||||||
endpoint_url=$(bash ChatQnA/tests/common/_test_manifest_utils.sh get_end_point "chatqna-retriever-usvc" $ns)
|
|
||||||
curl http://$endpoint_url/v1/retrieval -X POST \
|
|
||||||
-d "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${test_embedding}}" \
|
|
||||||
-H 'Content-Type: application/json' && break
|
|
||||||
sleep 30
|
|
||||||
done
|
|
||||||
# if i is bigger than max_retry, then exit with error
|
|
||||||
if [ $i -gt $max_retry ]; then
|
|
||||||
echo "Microservice retriever failed, exit with error."
|
|
||||||
return 1
|
|
||||||
fi
|
|
||||||
# make sure microservice tgi-svc is ready
|
|
||||||
for ((i=1; i<=max_retry; i++))
|
|
||||||
do
|
|
||||||
endpoint_url=$(bash ChatQnA/tests/common/_test_manifest_utils.sh get_end_point "chatqna-tgi" $ns)
|
|
||||||
curl http://$endpoint_url/generate -X POST \
|
|
||||||
-d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
|
|
||||||
-H 'Content-Type: application/json' && break
|
|
||||||
sleep 30
|
|
||||||
done
|
|
||||||
# if i is bigger than max_retry, then exit with error
|
|
||||||
if [ $i -gt $max_retry ]; then
|
|
||||||
echo "Microservice tgi failed, exit with error."
|
|
||||||
return 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# check megaservice works
|
|
||||||
# generate a random logfile name to avoid conflict among multiple runners
|
|
||||||
LOGFILE=$LOG_PATH/curlmega_$log.log
|
|
||||||
endpoint_url=$(bash ChatQnA/tests/common/_test_manifest_utils.sh get_end_point "chatqna" $ns)
|
|
||||||
curl http://$endpoint_url/v1/chatqna -H "Content-Type: application/json" -d '{"messages": "What is the revenue of Nike in 2023?"}' > $LOGFILE
|
|
||||||
exit_code=$?
|
|
||||||
if [ $exit_code -ne 0 ]; then
|
|
||||||
echo "Megaservice failed, please check the logs in $LOGFILE!"
|
|
||||||
return ${exit_code}
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "Checking response results, make sure the output is reasonable. "
|
|
||||||
local status=false
|
|
||||||
if [[ -f $LOGFILE ]] &&
|
|
||||||
[[ $(grep -c "\[DONE\]" $LOGFILE) != 0 ]]; then
|
|
||||||
status=true
|
|
||||||
fi
|
|
||||||
if [ $status == false ]; then
|
|
||||||
echo "Response check failed, please check the logs in artifacts!"
|
|
||||||
return 1
|
|
||||||
else
|
|
||||||
echo "Response check succeed!"
|
|
||||||
fi
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
|
|
||||||
if [ $# -eq 0 ]; then
|
|
||||||
echo "Usage: $0 <function_name>"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
case "$1" in
|
|
||||||
init_ChatQnA)
|
|
||||||
pushd ChatQnA/tests/common
|
|
||||||
bash _test_manifest_utils.sh init_ChatQnA
|
|
||||||
popd
|
|
||||||
;;
|
|
||||||
install_ChatQnA)
|
|
||||||
pushd ChatQnA/kubernetes/intel/hpu/gaudi/manifest
|
|
||||||
NAMESPACE=$2
|
|
||||||
install_chatqna
|
|
||||||
popd
|
|
||||||
;;
|
|
||||||
validate_ChatQnA)
|
|
||||||
NAMESPACE=$2
|
|
||||||
SERVICE_NAME=chatqna
|
|
||||||
validate_chatqna $NAMESPACE chatqna
|
|
||||||
ret=$?
|
|
||||||
if [ $ret -ne 0 ]; then
|
|
||||||
exit $ret
|
|
||||||
fi
|
|
||||||
;;
|
|
||||||
|
|
||||||
*)
|
|
||||||
echo "Unknown function: $1"
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
@@ -1,112 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
# Copyright (C) 2024 Intel Corporation
|
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
|
|
||||||
set -xe
|
|
||||||
USER_ID=$(whoami)
|
|
||||||
LOG_PATH=/home/$(whoami)/logs
|
|
||||||
MOUNT_DIR=/home/$USER_ID/.cache/huggingface/hub
|
|
||||||
IMAGE_REPO=${IMAGE_REPO:-opea}
|
|
||||||
IMAGE_TAG=${IMAGE_TAG:-latest}
|
|
||||||
|
|
||||||
ROLLOUT_TIMEOUT_SECONDS="1800s"
|
|
||||||
KUBECTL_TIMEOUT_SECONDS="60s"
|
|
||||||
|
|
||||||
function install_chatqna {
|
|
||||||
echo "namespace is $NAMESPACE"
|
|
||||||
kubectl apply -f chatqna.yaml -n $NAMESPACE
|
|
||||||
# Sleep enough time for retreiver-usvc to be ready
|
|
||||||
sleep 60
|
|
||||||
}
|
|
||||||
|
|
||||||
function validate_chatqna() {
|
|
||||||
local ns=$1
|
|
||||||
local log=$2
|
|
||||||
max_retry=10
|
|
||||||
# make sure microservice retriever-usvc is ready
|
|
||||||
# try to curl retriever-svc for max_retry times
|
|
||||||
test_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)")
|
|
||||||
for ((i=1; i<=max_retry; i++))
|
|
||||||
do
|
|
||||||
endpoint_url=$(bash ChatQnA/tests/common/_test_manifest_utils.sh get_end_point "chatqna-retriever-usvc" $ns)
|
|
||||||
curl http://$endpoint_url/v1/retrieval -X POST \
|
|
||||||
-d "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${test_embedding}}" \
|
|
||||||
-H 'Content-Type: application/json' && break
|
|
||||||
sleep 30
|
|
||||||
done
|
|
||||||
# if i is bigger than max_retry, then exit with error
|
|
||||||
if [ $i -gt $max_retry ]; then
|
|
||||||
echo "Microservice retriever failed, exit with error."
|
|
||||||
return 1
|
|
||||||
fi
|
|
||||||
# make sure microservice tgi-svc is ready
|
|
||||||
for ((i=1; i<=max_retry; i++))
|
|
||||||
do
|
|
||||||
endpoint_url=$(bash ChatQnA/tests/common/_test_manifest_utils.sh get_end_point "chatqna-tgi" $ns)
|
|
||||||
curl http://$endpoint_url/generate -X POST \
|
|
||||||
-d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
|
|
||||||
-H 'Content-Type: application/json' && break
|
|
||||||
sleep 30
|
|
||||||
done
|
|
||||||
# if i is bigger than max_retry, then exit with error
|
|
||||||
if [ $i -gt $max_retry ]; then
|
|
||||||
echo "Microservice tgi failed, exit with error."
|
|
||||||
return 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# check megaservice works
|
|
||||||
# generate a random logfile name to avoid conflict among multiple runners
|
|
||||||
LOGFILE=$LOG_PATH/curlmega_$log.log
|
|
||||||
endpoint_url=$(bash ChatQnA/tests/common/_test_manifest_utils.sh get_end_point "chatqna" $ns)
|
|
||||||
curl http://$endpoint_url/v1/chatqna -H "Content-Type: application/json" -d '{"messages": "What is the revenue of Nike in 2023?"}' > $LOGFILE
|
|
||||||
exit_code=$?
|
|
||||||
if [ $exit_code -ne 0 ]; then
|
|
||||||
echo "Megaservice failed, please check the logs in $LOGFILE!"
|
|
||||||
return ${exit_code}
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "Checking response results, make sure the output is reasonable. "
|
|
||||||
local status=false
|
|
||||||
if [[ -f $LOGFILE ]] &&
|
|
||||||
[[ $(grep -c "\[DONE\]" $LOGFILE) != 0 ]]; then
|
|
||||||
status=true
|
|
||||||
fi
|
|
||||||
if [ $status == false ]; then
|
|
||||||
echo "Response check failed, please check the logs in artifacts!"
|
|
||||||
return 1
|
|
||||||
else
|
|
||||||
echo "Response check succeed!"
|
|
||||||
fi
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
|
|
||||||
if [ $# -eq 0 ]; then
|
|
||||||
echo "Usage: $0 <function_name>"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
case "$1" in
|
|
||||||
init_ChatQnA)
|
|
||||||
pushd ChatQnA/tests/common
|
|
||||||
bash _test_manifest_utils.sh init_ChatQnA
|
|
||||||
popd
|
|
||||||
;;
|
|
||||||
install_ChatQnA)
|
|
||||||
pushd ChatQnA/kubernetes/intel/cpu/xeon/manifest
|
|
||||||
NAMESPACE=$2
|
|
||||||
install_chatqna
|
|
||||||
popd
|
|
||||||
;;
|
|
||||||
validate_ChatQnA)
|
|
||||||
NAMESPACE=$2
|
|
||||||
SERVICE_NAME=chatqna
|
|
||||||
validate_chatqna $NAMESPACE chatqna
|
|
||||||
ret=$?
|
|
||||||
if [ $ret -ne 0 ]; then
|
|
||||||
exit $ret
|
|
||||||
fi
|
|
||||||
;;
|
|
||||||
*)
|
|
||||||
echo "Unknown function: $1"
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
@@ -1,118 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
# Copyright (C) 2024 Intel Corporation
|
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
|
|
||||||
set -xe
|
|
||||||
USER_ID=$(whoami)
|
|
||||||
LOG_PATH=/home/$(whoami)/logs
|
|
||||||
MOUNT_DIR=/home/$USER_ID/.cache/huggingface/hub
|
|
||||||
IMAGE_REPO=${IMAGE_REPO:-opea}
|
|
||||||
IMAGE_TAG=${IMAGE_TAG:-latest}
|
|
||||||
|
|
||||||
ROLLOUT_TIMEOUT_SECONDS="1800s"
|
|
||||||
KUBECTL_TIMEOUT_SECONDS="60s"
|
|
||||||
|
|
||||||
function validate_chatqna() {
|
|
||||||
local ns=$1
|
|
||||||
local log=$2
|
|
||||||
max_retry=20
|
|
||||||
# make sure microservice retriever-usvc is ready
|
|
||||||
# try to curl retriever-svc for max_retry times
|
|
||||||
test_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)")
|
|
||||||
for ((i=1; i<=max_retry; i++))
|
|
||||||
do
|
|
||||||
endpoint_url=$(bash ChatQnA/tests/common/_test_manifest_utils.sh get_end_point "chatqna-retriever-usvc" $ns)
|
|
||||||
curl http://$endpoint_url/v1/retrieval -X POST \
|
|
||||||
-d "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${test_embedding}}" \
|
|
||||||
-H 'Content-Type: application/json' && break
|
|
||||||
sleep 30
|
|
||||||
done
|
|
||||||
# if i is bigger than max_retry, then exit with error
|
|
||||||
if [ $i -gt $max_retry ]; then
|
|
||||||
echo "Microservice retriever failed, exit with error."
|
|
||||||
return 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# make sure microservice vllm-svc is ready
|
|
||||||
for ((i=1; i<=max_retry; i++))
|
|
||||||
do
|
|
||||||
endpoint_url=$(bash ChatQnA/tests/common/_test_manifest_utils.sh get_end_point "chatqna-vllm" $ns)
|
|
||||||
curl http://$endpoint_url/v1/chat/completions -X POST \
|
|
||||||
-d '{"model": "meta-llama/Meta-Llama-3-8B-Instruct", "messages": [{"role": "user", "content": "What is Deep Learning?"}]}' \
|
|
||||||
-H 'Content-Type: application/json' && break
|
|
||||||
sleep 30
|
|
||||||
done
|
|
||||||
# if i is bigger than max_retry, then exit with error
|
|
||||||
if [ $i -gt $max_retry ]; then
|
|
||||||
echo "Microservice vllm failed, exit with error."
|
|
||||||
return 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# check megaservice works
|
|
||||||
# generate a random logfile name to avoid conflict among multiple runners
|
|
||||||
LOGFILE=$LOG_PATH/curlmega_$log.log
|
|
||||||
endpoint_url=$(bash ChatQnA/tests/common/_test_manifest_utils.sh get_end_point "chatqna" $ns)
|
|
||||||
curl http://$endpoint_url/v1/chatqna -H "Content-Type: application/json" -d '{"model": "meta-llama/Meta-Llama-3-8B-Instruct", "messages": "What is the revenue of Nike in 2023?"}' > $LOGFILE
|
|
||||||
exit_code=$?
|
|
||||||
if [ $exit_code -ne 0 ]; then
|
|
||||||
echo "Megaservice failed, please check the logs in $LOGFILE!"
|
|
||||||
return ${exit_code}
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "Checking response results, make sure the output is reasonable. "
|
|
||||||
local status=false
|
|
||||||
if [[ -f $LOGFILE ]] &&
|
|
||||||
[[ $(grep -c "\[DONE\]" $LOGFILE) != 0 ]]; then
|
|
||||||
status=true
|
|
||||||
fi
|
|
||||||
if [ $status == false ]; then
|
|
||||||
echo "Response check failed, please check the logs in artifacts!"
|
|
||||||
return 1
|
|
||||||
else
|
|
||||||
echo "Response check succeed!"
|
|
||||||
fi
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
|
|
||||||
function install_chatqna() {
|
|
||||||
echo "Testing manifests chatqna_vllm"
|
|
||||||
local ns=$1
|
|
||||||
bash ChatQnA/tests/common/_test_manifest_utils.sh _cleanup_ns $ns
|
|
||||||
kubectl create namespace $ns
|
|
||||||
# install guardrail
|
|
||||||
pushd ChatQnA/kubernetes/intel/hpu/gaudi/manifest
|
|
||||||
kubectl apply -f chatqna-vllm.yaml -n $ns
|
|
||||||
# Sleep enough time for chatqna_vllm to be ready, vllm warmup takes about 5 minutes
|
|
||||||
sleep 280
|
|
||||||
}
|
|
||||||
|
|
||||||
if [ $# -eq 0 ]; then
|
|
||||||
echo "Usage: $0 <function_name>"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
case "$1" in
|
|
||||||
init_ChatQnA)
|
|
||||||
pushd ChatQnA/tests/common
|
|
||||||
bash _test_manifest_utils.sh init_ChatQnA
|
|
||||||
popd
|
|
||||||
;;
|
|
||||||
install_ChatQnA)
|
|
||||||
NAMESPACE=$2
|
|
||||||
install_chatqna $NAMESPACE
|
|
||||||
popd
|
|
||||||
;;
|
|
||||||
validate_ChatQnA)
|
|
||||||
NAMESPACE=$2
|
|
||||||
SERVICE_NAME=chatqna-vllm
|
|
||||||
validate_chatqna $NAMESPACE chatqna-vllm
|
|
||||||
ret=$?
|
|
||||||
if [ $ret -ne 0 ]; then
|
|
||||||
exit $ret
|
|
||||||
fi
|
|
||||||
;;
|
|
||||||
|
|
||||||
*)
|
|
||||||
echo "Unknown function: $1"
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
@@ -6,13 +6,18 @@ tgi:
|
|||||||
LLM_MODEL_ID: Qwen/Qwen2.5-Coder-7B-Instruct
|
LLM_MODEL_ID: Qwen/Qwen2.5-Coder-7B-Instruct
|
||||||
image:
|
image:
|
||||||
repository: ghcr.io/huggingface/tgi-gaudi
|
repository: ghcr.io/huggingface/tgi-gaudi
|
||||||
tag: "2.0.6"
|
tag: "2.3.1"
|
||||||
resources:
|
resources:
|
||||||
limits:
|
limits:
|
||||||
habana.ai/gaudi: 1
|
habana.ai/gaudi: 1
|
||||||
MAX_INPUT_LENGTH: "1024"
|
MAX_INPUT_LENGTH: "1024"
|
||||||
MAX_TOTAL_TOKENS: "2048"
|
MAX_TOTAL_TOKENS: "2048"
|
||||||
CUDA_GRAPHS: ""
|
CUDA_GRAPHS: ""
|
||||||
|
OMPI_MCA_btl_vader_single_copy_mechanism: "none"
|
||||||
|
ENABLE_HPU_GRAPH: "true"
|
||||||
|
LIMIT_HPU_GRAPH: "true"
|
||||||
|
USE_FLASH_ATTENTION: "true"
|
||||||
|
FLASH_ATTENTION_RECOMPUTE: "true"
|
||||||
livenessProbe:
|
livenessProbe:
|
||||||
initialDelaySeconds: 5
|
initialDelaySeconds: 5
|
||||||
periodSeconds: 5
|
periodSeconds: 5
|
||||||
|
|||||||
@@ -1,85 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
# Copyright (C) 2024 Intel Corporation
|
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
|
|
||||||
set -xe
|
|
||||||
USER_ID=$(whoami)
|
|
||||||
LOG_PATH=/home/$(whoami)/logs
|
|
||||||
MOUNT_DIR=/home/$USER_ID/.cache/huggingface/hub
|
|
||||||
IMAGE_REPO=${IMAGE_REPO:-opea}
|
|
||||||
IMAGE_TAG=${IMAGE_TAG:-latest}
|
|
||||||
|
|
||||||
function init_codegen() {
|
|
||||||
# executed under path manifest/codegen/xeon
|
|
||||||
# replace the mount dir "path: /mnt/model" with "path: $CHART_MOUNT"
|
|
||||||
find . -name '*.yaml' -type f -exec sed -i "s#path: /mnt/opea-models#path: $MOUNT_DIR#g" {} \;
|
|
||||||
# replace microservice image tag
|
|
||||||
find . -name '*.yaml' -type f -exec sed -i "s#image: \"opea/\(.*\):latest#image: \"opea/\1:${IMAGE_TAG}#g" {} \;
|
|
||||||
# replace the repository "image: opea/*" with "image: $IMAGE_REPO/"
|
|
||||||
find . -name '*.yaml' -type f -exec sed -i "s#image: \"opea/*#image: \"${IMAGE_REPO}/#g" {} \;
|
|
||||||
# set huggingface token
|
|
||||||
find . -name '*.yaml' -type f -exec sed -i "s#insert-your-huggingface-token-here#$(cat /home/$USER_ID/.cache/huggingface/token)#g" {} \;
|
|
||||||
}
|
|
||||||
|
|
||||||
function install_codegen {
|
|
||||||
echo "namespace is $NAMESPACE"
|
|
||||||
kubectl apply -f codegen.yaml -n $NAMESPACE
|
|
||||||
}
|
|
||||||
|
|
||||||
function validate_codegen() {
|
|
||||||
ip_address=$(kubectl get svc $SERVICE_NAME -n $NAMESPACE -o jsonpath='{.spec.clusterIP}')
|
|
||||||
port=$(kubectl get svc $SERVICE_NAME -n $NAMESPACE -o jsonpath='{.spec.ports[0].port}')
|
|
||||||
echo "try to curl http://${ip_address}:${port}/v1/codegen..."
|
|
||||||
|
|
||||||
# generate a random logfile name to avoid conflict among multiple runners
|
|
||||||
LOGFILE=$LOG_PATH/curlmega_$NAMESPACE.log
|
|
||||||
# Curl the Mega Service
|
|
||||||
curl http://${ip_address}:${port}/v1/codegen -H "Content-Type: application/json" \
|
|
||||||
-d '{"messages": "def print_hello_world():"}' > $LOGFILE
|
|
||||||
exit_code=$?
|
|
||||||
if [ $exit_code -ne 0 ]; then
|
|
||||||
echo "Megaservice codegen failed, please check the logs in $LOGFILE!"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "Checking response results, make sure the output is reasonable. "
|
|
||||||
local status=false
|
|
||||||
if [[ -f $LOGFILE ]] && \
|
|
||||||
[[ $(grep -c "print" $LOGFILE) != 0 ]]; then
|
|
||||||
status=true
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ $status == false ]; then
|
|
||||||
echo "Response check failed, please check the logs in artifacts!"
|
|
||||||
exit 1
|
|
||||||
else
|
|
||||||
echo "Response check succeed!"
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
if [ $# -eq 0 ]; then
|
|
||||||
echo "Usage: $0 <function_name>"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
case "$1" in
|
|
||||||
init_CodeGen)
|
|
||||||
pushd CodeGen/kubernetes/intel/hpu/gaudi/manifest
|
|
||||||
init_codegen
|
|
||||||
popd
|
|
||||||
;;
|
|
||||||
install_CodeGen)
|
|
||||||
pushd CodeGen/kubernetes/intel/hpu/gaudi/manifest
|
|
||||||
NAMESPACE=$2
|
|
||||||
install_codegen
|
|
||||||
popd
|
|
||||||
;;
|
|
||||||
validate_CodeGen)
|
|
||||||
NAMESPACE=$2
|
|
||||||
SERVICE_NAME=codegen
|
|
||||||
validate_codegen
|
|
||||||
;;
|
|
||||||
*)
|
|
||||||
echo "Unknown function: $1"
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
@@ -1,85 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
# Copyright (C) 2024 Intel Corporation
|
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
|
|
||||||
set -xe
|
|
||||||
USER_ID=$(whoami)
|
|
||||||
LOG_PATH=/home/$(whoami)/logs
|
|
||||||
MOUNT_DIR=/home/$USER_ID/.cache/huggingface/hub
|
|
||||||
IMAGE_REPO=${IMAGE_REPO:-opea}
|
|
||||||
IMAGE_TAG=${IMAGE_TAG:-latest}
|
|
||||||
|
|
||||||
function init_codegen() {
|
|
||||||
# executed under path manifest/codegen/xeon
|
|
||||||
# replace the mount dir "path: /mnt/model" with "path: $CHART_MOUNT"
|
|
||||||
find . -name '*.yaml' -type f -exec sed -i "s#path: /mnt/opea-models#path: $MOUNT_DIR#g" {} \;
|
|
||||||
# replace microservice image tag
|
|
||||||
find . -name '*.yaml' -type f -exec sed -i "s#image: \"opea/\(.*\):latest#image: \"opea/\1:${IMAGE_TAG}#g" {} \;
|
|
||||||
# replace the repository "image: opea/*" with "image: $IMAGE_REPO/"
|
|
||||||
find . -name '*.yaml' -type f -exec sed -i "s#image: \"opea/*#image: \"${IMAGE_REPO}/#g" {} \;
|
|
||||||
# set huggingface token
|
|
||||||
find . -name '*.yaml' -type f -exec sed -i "s#insert-your-huggingface-token-here#$(cat /home/$USER_ID/.cache/huggingface/token)#g" {} \;
|
|
||||||
}
|
|
||||||
|
|
||||||
function install_codegen {
|
|
||||||
echo "namespace is $NAMESPACE"
|
|
||||||
kubectl apply -f codegen.yaml -n $NAMESPACE
|
|
||||||
}
|
|
||||||
|
|
||||||
function validate_codegen() {
|
|
||||||
ip_address=$(kubectl get svc $SERVICE_NAME -n $NAMESPACE -o jsonpath='{.spec.clusterIP}')
|
|
||||||
port=$(kubectl get svc $SERVICE_NAME -n $NAMESPACE -o jsonpath='{.spec.ports[0].port}')
|
|
||||||
echo "try to curl http://${ip_address}:${port}/v1/codegen..."
|
|
||||||
|
|
||||||
# generate a random logfile name to avoid conflict among multiple runners
|
|
||||||
LOGFILE=$LOG_PATH/curlmega_$NAMESPACE.log
|
|
||||||
# Curl the Mega Service
|
|
||||||
curl http://${ip_address}:${port}/v1/codegen -H "Content-Type: application/json" \
|
|
||||||
-d '{"messages": "def print_hello_world():"}' > $LOGFILE
|
|
||||||
exit_code=$?
|
|
||||||
if [ $exit_code -ne 0 ]; then
|
|
||||||
echo "Megaservice codegen failed, please check the logs in $LOGFILE!"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "Checking response results, make sure the output is reasonable. "
|
|
||||||
local status=false
|
|
||||||
if [[ -f $LOGFILE ]] && \
|
|
||||||
[[ $(grep -c "print" $LOGFILE) != 0 ]]; then
|
|
||||||
status=true
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ $status == false ]; then
|
|
||||||
echo "Response check failed, please check the logs in artifacts!"
|
|
||||||
exit 1
|
|
||||||
else
|
|
||||||
echo "Response check succeed!"
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
if [ $# -eq 0 ]; then
|
|
||||||
echo "Usage: $0 <function_name>"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
case "$1" in
|
|
||||||
init_CodeGen)
|
|
||||||
pushd CodeGen/kubernetes/intel/cpu/xeon/manifest
|
|
||||||
init_codegen
|
|
||||||
popd
|
|
||||||
;;
|
|
||||||
install_CodeGen)
|
|
||||||
pushd CodeGen/kubernetes/intel/cpu/xeon/manifest
|
|
||||||
NAMESPACE=$2
|
|
||||||
install_codegen
|
|
||||||
popd
|
|
||||||
;;
|
|
||||||
validate_CodeGen)
|
|
||||||
NAMESPACE=$2
|
|
||||||
SERVICE_NAME=codegen
|
|
||||||
validate_codegen
|
|
||||||
;;
|
|
||||||
*)
|
|
||||||
echo "Unknown function: $1"
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
@@ -5,13 +5,18 @@ tgi:
|
|||||||
accelDevice: "gaudi"
|
accelDevice: "gaudi"
|
||||||
image:
|
image:
|
||||||
repository: ghcr.io/huggingface/tgi-gaudi
|
repository: ghcr.io/huggingface/tgi-gaudi
|
||||||
tag: "2.0.6"
|
tag: "2.3.1"
|
||||||
resources:
|
resources:
|
||||||
limits:
|
limits:
|
||||||
habana.ai/gaudi: 1
|
habana.ai/gaudi: 1
|
||||||
MAX_INPUT_LENGTH: "1024"
|
MAX_INPUT_LENGTH: "1024"
|
||||||
MAX_TOTAL_TOKENS: "2048"
|
MAX_TOTAL_TOKENS: "2048"
|
||||||
CUDA_GRAPHS: ""
|
CUDA_GRAPHS: ""
|
||||||
|
OMPI_MCA_btl_vader_single_copy_mechanism: "none"
|
||||||
|
ENABLE_HPU_GRAPH: "true"
|
||||||
|
LIMIT_HPU_GRAPH: "true"
|
||||||
|
USE_FLASH_ATTENTION: "true"
|
||||||
|
FLASH_ATTENTION_RECOMPUTE: "true"
|
||||||
livenessProbe:
|
livenessProbe:
|
||||||
initialDelaySeconds: 5
|
initialDelaySeconds: 5
|
||||||
periodSeconds: 5
|
periodSeconds: 5
|
||||||
|
|||||||
@@ -1,86 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
# Copyright (C) 2024 Intel Corporation
|
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
|
|
||||||
set -xe
|
|
||||||
USER_ID=$(whoami)
|
|
||||||
LOG_PATH=/home/$(whoami)/logs
|
|
||||||
MOUNT_DIR=/home/$USER_ID/.cache/huggingface/hub
|
|
||||||
IMAGE_REPO=${IMAGE_REPO:-opea}
|
|
||||||
IMAGE_TAG=${IMAGE_TAG:-latest}
|
|
||||||
|
|
||||||
function init_codetrans() {
|
|
||||||
# executed under path manifest/codetrans/xeon
|
|
||||||
# replace the mount dir "path: /mnt/model" with "path: $CHART_MOUNT"
|
|
||||||
find . -name '*.yaml' -type f -exec sed -i "s#path: /mnt/opea-models#path: $MOUNT_DIR#g" {} \;
|
|
||||||
# replace microservice image tag
|
|
||||||
find . -name '*.yaml' -type f -exec sed -i "s#image: \"opea/\(.*\):latest#image: \"opea/\1:${IMAGE_TAG}#g" {} \;
|
|
||||||
# replace the repository "image: opea/*" with "image: $IMAGE_REPO/"
|
|
||||||
find . -name '*.yaml' -type f -exec sed -i "s#image: \"opea/*#image: \"${IMAGE_REPO}/#g" {} \;
|
|
||||||
# set huggingface token
|
|
||||||
find . -name '*.yaml' -type f -exec sed -i "s#insert-your-huggingface-token-here#$(cat /home/$USER_ID/.cache/huggingface/token)#g" {} \;
|
|
||||||
}
|
|
||||||
|
|
||||||
function install_codetrans {
|
|
||||||
echo "namespace is $NAMESPACE"
|
|
||||||
kubectl apply -f codetrans.yaml -n $NAMESPACE
|
|
||||||
}
|
|
||||||
|
|
||||||
function validate_codetrans() {
|
|
||||||
ip_address=$(kubectl get svc $SERVICE_NAME -n $NAMESPACE -o jsonpath='{.spec.clusterIP}')
|
|
||||||
port=$(kubectl get svc $SERVICE_NAME -n $NAMESPACE -o jsonpath='{.spec.ports[0].port}')
|
|
||||||
echo "try to curl http://${ip_address}:${port}/v1/codetrans..."
|
|
||||||
|
|
||||||
# generate a random logfile name to avoid conflict among multiple runners
|
|
||||||
LOGFILE=$LOG_PATH/curlmega_$NAMESPACE.log
|
|
||||||
# Curl the Mega Service
|
|
||||||
curl http://${ip_address}:${port}/v1/codetrans \
|
|
||||||
-H 'Content-Type: application/json' \
|
|
||||||
-d '{"language_from": "Golang","language_to": "Python","source_code": "package main\n\nimport \"fmt\"\nfunc main() {\n fmt.Println(\"Hello, World!\");\n}"}' > $LOGFILE
|
|
||||||
exit_code=$?
|
|
||||||
if [ $exit_code -ne 0 ]; then
|
|
||||||
echo "Megaservice codetrans failed, please check the logs in $LOGFILE!"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "Checking response results, make sure the output is reasonable. "
|
|
||||||
local status=false
|
|
||||||
if [[ -f $LOGFILE ]] && \
|
|
||||||
[[ $(grep -c "print" $LOGFILE) != 0 ]]; then
|
|
||||||
status=true
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ $status == false ]; then
|
|
||||||
echo "Response check failed, please check the logs in artifacts!"
|
|
||||||
exit 1
|
|
||||||
else
|
|
||||||
echo "Response check succeed!"
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
if [ $# -eq 0 ]; then
|
|
||||||
echo "Usage: $0 <function_name>"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
case "$1" in
|
|
||||||
init_CodeTrans)
|
|
||||||
pushd CodeTrans/kubernetes/intel/hpu/gaudi/manifest
|
|
||||||
init_codetrans
|
|
||||||
popd
|
|
||||||
;;
|
|
||||||
install_CodeTrans)
|
|
||||||
pushd CodeTrans/kubernetes/intel/hpu/gaudi/manifest
|
|
||||||
NAMESPACE=$2
|
|
||||||
install_codetrans
|
|
||||||
popd
|
|
||||||
;;
|
|
||||||
validate_CodeTrans)
|
|
||||||
NAMESPACE=$2
|
|
||||||
SERVICE_NAME=codetrans
|
|
||||||
validate_codetrans
|
|
||||||
;;
|
|
||||||
*)
|
|
||||||
echo "Unknown function: $1"
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
@@ -1,86 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
# Copyright (C) 2024 Intel Corporation
|
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
|
|
||||||
set -xe
|
|
||||||
USER_ID=$(whoami)
|
|
||||||
LOG_PATH=/home/$(whoami)/logs
|
|
||||||
MOUNT_DIR=/home/$USER_ID/.cache/huggingface/hub
|
|
||||||
IMAGE_REPO=${IMAGE_REPO:-opea}
|
|
||||||
IMAGE_TAG=${IMAGE_TAG:-latest}
|
|
||||||
|
|
||||||
function init_codetrans() {
|
|
||||||
# executed under path manifest/codetrans/xeon
|
|
||||||
# replace the mount dir "path: /mnt/model" with "path: $CHART_MOUNT"
|
|
||||||
find . -name '*.yaml' -type f -exec sed -i "s#path: /mnt/opea-models#path: $MOUNT_DIR#g" {} \;
|
|
||||||
# replace microservice image tag
|
|
||||||
find . -name '*.yaml' -type f -exec sed -i "s#image: \"opea/\(.*\):latest#image: \"opea/\1:${IMAGE_TAG}#g" {} \;
|
|
||||||
# replace the repository "image: opea/*" with "image: $IMAGE_REPO/"
|
|
||||||
find . -name '*.yaml' -type f -exec sed -i "s#image: \"opea/*#image: \"${IMAGE_REPO}/#g" {} \;
|
|
||||||
# set huggingface token
|
|
||||||
find . -name '*.yaml' -type f -exec sed -i "s#insert-your-huggingface-token-here#$(cat /home/$USER_ID/.cache/huggingface/token)#g" {} \;
|
|
||||||
}
|
|
||||||
|
|
||||||
function install_codetrans {
|
|
||||||
echo "namespace is $NAMESPACE"
|
|
||||||
kubectl apply -f codetrans.yaml -n $NAMESPACE
|
|
||||||
}
|
|
||||||
|
|
||||||
function validate_codetrans() {
|
|
||||||
ip_address=$(kubectl get svc $SERVICE_NAME -n $NAMESPACE -o jsonpath='{.spec.clusterIP}')
|
|
||||||
port=$(kubectl get svc $SERVICE_NAME -n $NAMESPACE -o jsonpath='{.spec.ports[0].port}')
|
|
||||||
echo "try to curl http://${ip_address}:${port}/v1/codetrans..."
|
|
||||||
|
|
||||||
# generate a random logfile name to avoid conflict among multiple runners
|
|
||||||
LOGFILE=$LOG_PATH/curlmega_$NAMESPACE.log
|
|
||||||
# Curl the Mega Service
|
|
||||||
curl http://${ip_address}:${port}/v1/codetrans \
|
|
||||||
-H 'Content-Type: application/json' \
|
|
||||||
-d '{"language_from": "Golang","language_to": "Python","source_code": "package main\n\nimport \"fmt\"\nfunc main() {\n fmt.Println(\"Hello, World!\");\n}"}' > $LOGFILE
|
|
||||||
exit_code=$?
|
|
||||||
if [ $exit_code -ne 0 ]; then
|
|
||||||
echo "Megaservice codetrans failed, please check the logs in $LOGFILE!"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "Checking response results, make sure the output is reasonable. "
|
|
||||||
local status=false
|
|
||||||
if [[ -f $LOGFILE ]] && \
|
|
||||||
[[ $(grep -c "print" $LOGFILE) != 0 ]]; then
|
|
||||||
status=true
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ $status == false ]; then
|
|
||||||
echo "Response check failed, please check the logs in artifacts!"
|
|
||||||
exit 1
|
|
||||||
else
|
|
||||||
echo "Response check succeed!"
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
if [ $# -eq 0 ]; then
|
|
||||||
echo "Usage: $0 <function_name>"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
case "$1" in
|
|
||||||
init_CodeTrans)
|
|
||||||
pushd CodeTrans/kubernetes/intel/cpu/xeon/manifest
|
|
||||||
init_codetrans
|
|
||||||
popd
|
|
||||||
;;
|
|
||||||
install_CodeTrans)
|
|
||||||
pushd CodeTrans/kubernetes/intel/cpu/xeon/manifest
|
|
||||||
NAMESPACE=$2
|
|
||||||
install_codetrans
|
|
||||||
popd
|
|
||||||
;;
|
|
||||||
validate_CodeTrans)
|
|
||||||
NAMESPACE=$2
|
|
||||||
SERVICE_NAME=codetrans
|
|
||||||
validate_codetrans
|
|
||||||
;;
|
|
||||||
*)
|
|
||||||
echo "Unknown function: $1"
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
@@ -2,4 +2,6 @@
|
|||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
tgi:
|
tgi:
|
||||||
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
enabled: true
|
||||||
|
vllm:
|
||||||
|
enabled: false
|
||||||
|
|||||||
@@ -1,16 +1,21 @@
|
|||||||
# Copyright (C) 2024 Intel Corporation
|
# Copyright (C) 2024 Intel Corporation
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
vllm:
|
||||||
|
enabled: false
|
||||||
|
|
||||||
|
llm-uservice:
|
||||||
|
DOCSUM_BACKEND: "TGI"
|
||||||
|
|
||||||
tgi:
|
tgi:
|
||||||
|
enabled: true
|
||||||
accelDevice: "gaudi"
|
accelDevice: "gaudi"
|
||||||
image:
|
image:
|
||||||
repository: ghcr.io/huggingface/tgi-gaudi
|
repository: ghcr.io/huggingface/tgi-gaudi
|
||||||
tag: "2.0.6"
|
tag: "2.3.1"
|
||||||
resources:
|
resources:
|
||||||
limits:
|
limits:
|
||||||
habana.ai/gaudi: 1
|
habana.ai/gaudi: 1
|
||||||
MAX_INPUT_LENGTH: "1024"
|
|
||||||
MAX_TOTAL_TOKENS: "2048"
|
|
||||||
CUDA_GRAPHS: ""
|
CUDA_GRAPHS: ""
|
||||||
ENABLE_HPU_GRAPH: true
|
ENABLE_HPU_GRAPH: true
|
||||||
LIMIT_HPU_GRAPH: true
|
LIMIT_HPU_GRAPH: true
|
||||||
|
|||||||
@@ -1,87 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
# Copyright (C) 2024 Intel Corporation
|
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
|
|
||||||
set -xe
|
|
||||||
USER_ID=$(whoami)
|
|
||||||
LOG_PATH=/home/$(whoami)/logs
|
|
||||||
MOUNT_DIR=/home/$USER_ID/.cache/huggingface/hub
|
|
||||||
IMAGE_REPO=${IMAGE_REPO:-opea}
|
|
||||||
IMAGE_TAG=${IMAGE_TAG:-latest}
|
|
||||||
|
|
||||||
function init_docsum() {
|
|
||||||
# executed under path manifest/docsum/xeon
|
|
||||||
# replace the mount dir "path: /mnt/model" with "path: $CHART_MOUNT"
|
|
||||||
find . -name '*.yaml' -type f -exec sed -i "s#path: /mnt/opea-models#path: $MOUNT_DIR#g" {} \;
|
|
||||||
# replace microservice image tag
|
|
||||||
find . -name '*.yaml' -type f -exec sed -i "s#image: \"opea/\(.*\):latest#image: \"opea/\1:${IMAGE_TAG}#g" {} \;
|
|
||||||
# replace the repository "image: opea/*" with "image: $IMAGE_REPO/"
|
|
||||||
find . -name '*.yaml' -type f -exec sed -i "s#image: \"opea/*#image: \"${IMAGE_REPO}/#g" {} \;
|
|
||||||
# set huggingface token
|
|
||||||
find . -name '*.yaml' -type f -exec sed -i "s#insert-your-huggingface-token-here#$(cat /home/$USER_ID/.cache/huggingface/token)#g" {} \;
|
|
||||||
}
|
|
||||||
|
|
||||||
function install_docsum {
|
|
||||||
echo "namespace is $NAMESPACE"
|
|
||||||
kubectl apply -f docsum.yaml -n $NAMESPACE
|
|
||||||
}
|
|
||||||
|
|
||||||
function validate_docsum() {
|
|
||||||
ip_address=$(kubectl get svc $SERVICE_NAME -n $NAMESPACE -o jsonpath='{.spec.clusterIP}')
|
|
||||||
port=$(kubectl get svc $SERVICE_NAME -n $NAMESPACE -o jsonpath='{.spec.ports[0].port}')
|
|
||||||
echo "try to curl http://${ip_address}:${port}/v1/docsum..."
|
|
||||||
|
|
||||||
# generate a random logfile name to avoid conflict among multiple runners
|
|
||||||
LOGFILE=$LOG_PATH/curlmega_$NAMESPACE.log
|
|
||||||
# Curl the Mega Service
|
|
||||||
curl http://${ip_address}:${port}/v1/docsum \
|
|
||||||
-H 'Content-Type: multipart/form-data' \
|
|
||||||
-F 'type=text' \
|
|
||||||
-F "messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5." > $LOGFILE
|
|
||||||
exit_code=$?
|
|
||||||
if [ $exit_code -ne 0 ]; then
|
|
||||||
echo "Megaservice docsum failed, please check the logs in $LOGFILE!"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "Checking response results, make sure the output is reasonable. "
|
|
||||||
local status=false
|
|
||||||
if [[ -f $LOGFILE ]] && \
|
|
||||||
[[ $(grep -c "\[DONE\]" $LOGFILE) != 0 ]]; then
|
|
||||||
status=true
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ $status == false ]; then
|
|
||||||
echo "Response check failed, please check the logs in artifacts!"
|
|
||||||
exit 1
|
|
||||||
else
|
|
||||||
echo "Response check succeed!"
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
if [ $# -eq 0 ]; then
|
|
||||||
echo "Usage: $0 <function_name>"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
case "$1" in
|
|
||||||
init_DocSum)
|
|
||||||
pushd DocSum/kubernetes/intel/hpu/gaudi/manifest
|
|
||||||
init_docsum
|
|
||||||
popd
|
|
||||||
;;
|
|
||||||
install_DocSum)
|
|
||||||
pushd DocSum/kubernetes/intel/hpu/gaudi/manifest
|
|
||||||
NAMESPACE=$2
|
|
||||||
install_docsum
|
|
||||||
popd
|
|
||||||
;;
|
|
||||||
validate_DocSum)
|
|
||||||
NAMESPACE=$2
|
|
||||||
SERVICE_NAME=docsum
|
|
||||||
validate_docsum
|
|
||||||
;;
|
|
||||||
*)
|
|
||||||
echo "Unknown function: $1"
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
@@ -1,87 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
# Copyright (C) 2024 Intel Corporation
|
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
|
|
||||||
set -xe
|
|
||||||
USER_ID=$(whoami)
|
|
||||||
LOG_PATH=/home/$(whoami)/logs
|
|
||||||
MOUNT_DIR=/home/$USER_ID/.cache/huggingface/hub
|
|
||||||
IMAGE_REPO=${IMAGE_REPO:-opea}
|
|
||||||
IMAGE_TAG=${IMAGE_TAG:-latest}
|
|
||||||
|
|
||||||
function init_docsum() {
|
|
||||||
# executed under path manifest/docsum/xeon
|
|
||||||
# replace the mount dir "path: /mnt/model" with "path: $CHART_MOUNT"
|
|
||||||
find . -name '*.yaml' -type f -exec sed -i "s#path: /mnt/opea-models#path: $MOUNT_DIR#g" {} \;
|
|
||||||
# replace microservice image tag
|
|
||||||
find . -name '*.yaml' -type f -exec sed -i "s#image: \"opea/\(.*\):latest#image: \"opea/\1:${IMAGE_TAG}#g" {} \;
|
|
||||||
# replace the repository "image: opea/*" with "image: $IMAGE_REPO/opea/"
|
|
||||||
find . -name '*.yaml' -type f -exec sed -i "s#image: \"opea/*#image: \"${IMAGE_REPO}/#g" {} \;
|
|
||||||
# set huggingface token
|
|
||||||
find . -name '*.yaml' -type f -exec sed -i "s#insert-your-huggingface-token-here#$(cat /home/$USER_ID/.cache/huggingface/token)#g" {} \;
|
|
||||||
}
|
|
||||||
|
|
||||||
function install_docsum {
|
|
||||||
echo "namespace is $NAMESPACE"
|
|
||||||
kubectl apply -f docsum.yaml -n $NAMESPACE
|
|
||||||
}
|
|
||||||
|
|
||||||
function validate_docsum() {
|
|
||||||
ip_address=$(kubectl get svc $SERVICE_NAME -n $NAMESPACE -o jsonpath='{.spec.clusterIP}')
|
|
||||||
port=$(kubectl get svc $SERVICE_NAME -n $NAMESPACE -o jsonpath='{.spec.ports[0].port}')
|
|
||||||
echo "try to curl http://${ip_address}:${port}/v1/docsum..."
|
|
||||||
|
|
||||||
# generate a random logfile name to avoid conflict among multiple runners
|
|
||||||
LOGFILE=$LOG_PATH/curlmega_$NAMESPACE.log
|
|
||||||
# Curl the Mega Service
|
|
||||||
curl http://${ip_address}:${port}/v1/docsum \
|
|
||||||
-H 'Content-Type: multipart/form-data' \
|
|
||||||
-F 'type=text' \
|
|
||||||
-F "messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5." > $LOGFILE
|
|
||||||
exit_code=$?
|
|
||||||
if [ $exit_code -ne 0 ]; then
|
|
||||||
echo "Megaservice docsum failed, please check the logs in $LOGFILE!"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "Checking response results, make sure the output is reasonable. "
|
|
||||||
local status=false
|
|
||||||
if [[ -f $LOGFILE ]] && \
|
|
||||||
[[ $(grep -c "\[DONE\]" $LOGFILE) != 0 ]]; then
|
|
||||||
status=true
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ $status == false ]; then
|
|
||||||
echo "Response check failed, please check the logs in artifacts!"
|
|
||||||
exit 1
|
|
||||||
else
|
|
||||||
echo "Response check succeed!"
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
if [ $# -eq 0 ]; then
|
|
||||||
echo "Usage: $0 <function_name>"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
case "$1" in
|
|
||||||
init_DocSum)
|
|
||||||
pushd DocSum/kubernetes/intel/cpu/xeon/manifest
|
|
||||||
init_docsum
|
|
||||||
popd
|
|
||||||
;;
|
|
||||||
install_DocSum)
|
|
||||||
pushd DocSum/kubernetes/intel/cpu/xeon/manifest
|
|
||||||
NAMESPACE=$2
|
|
||||||
install_docsum
|
|
||||||
popd
|
|
||||||
;;
|
|
||||||
validate_DocSum)
|
|
||||||
NAMESPACE=$2
|
|
||||||
SERVICE_NAME=docsum
|
|
||||||
validate_docsum
|
|
||||||
;;
|
|
||||||
*)
|
|
||||||
echo "Unknown function: $1"
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
@@ -5,13 +5,25 @@ tgi:
|
|||||||
accelDevice: "gaudi"
|
accelDevice: "gaudi"
|
||||||
image:
|
image:
|
||||||
repository: ghcr.io/huggingface/tgi-gaudi
|
repository: ghcr.io/huggingface/tgi-gaudi
|
||||||
tag: "2.0.6"
|
tag: "2.3.1"
|
||||||
resources:
|
resources:
|
||||||
limits:
|
limits:
|
||||||
habana.ai/gaudi: 1
|
habana.ai/gaudi: 1
|
||||||
MAX_INPUT_LENGTH: "4096"
|
MAX_INPUT_LENGTH: "1024"
|
||||||
MAX_TOTAL_TOKENS: "8192"
|
MAX_TOTAL_TOKENS: "2048"
|
||||||
CUDA_GRAPHS: "0"
|
CUDA_GRAPHS: "0"
|
||||||
|
OMPI_MCA_btl_vader_single_copy_mechanism: "none"
|
||||||
|
ENABLE_HPU_GRAPH: "true"
|
||||||
|
LIMIT_HPU_GRAPH: "true"
|
||||||
|
USE_FLASH_ATTENTION: "true"
|
||||||
|
FLASH_ATTENTION_RECOMPUTE: "true"
|
||||||
|
PREFILL_BATCH_BUCKET_SIZE: 1
|
||||||
|
BATCH_BUCKET_SIZE: 8
|
||||||
|
extraCmdArgs:
|
||||||
|
- "--max-batch-total-tokens"
|
||||||
|
- "65536"
|
||||||
|
- "--max-batch-prefill-tokens"
|
||||||
|
- "4096"
|
||||||
livenessProbe:
|
livenessProbe:
|
||||||
initialDelaySeconds: 5
|
initialDelaySeconds: 5
|
||||||
periodSeconds: 5
|
periodSeconds: 5
|
||||||
|
|||||||
18
SearchQnA/kubernetes/helm/README.md
Normal file
18
SearchQnA/kubernetes/helm/README.md
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
# Deploy SearchQnA on Kubernetes cluster
|
||||||
|
|
||||||
|
- You should have Helm (version >= 3.15) installed. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information.
|
||||||
|
- For more deploy options, refer to [helm charts README](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts#readme).
|
||||||
|
|
||||||
|
## Deploy on Xeon
|
||||||
|
|
||||||
|
```
|
||||||
|
export HFTOKEN="insert-your-huggingface-token-here"
|
||||||
|
helm install searchqna oci://ghcr.io/opea-project/charts/searchqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f cpu-values.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
## Deploy on Gaudi
|
||||||
|
|
||||||
|
```
|
||||||
|
export HFTOKEN="insert-your-huggingface-token-here"
|
||||||
|
helm install searchqna oci://ghcr.io/opea-project/charts/searchqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f gaudi-values.yaml
|
||||||
|
```
|
||||||
7
SearchQnA/kubernetes/helm/cpu-values.yaml
Normal file
7
SearchQnA/kubernetes/helm/cpu-values.yaml
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
tgi:
|
||||||
|
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||||
|
llm_uservice:
|
||||||
|
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||||
50
SearchQnA/kubernetes/helm/gaudi-values.yaml
Normal file
50
SearchQnA/kubernetes/helm/gaudi-values.yaml
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
tgi:
|
||||||
|
accelDevice: "gaudi"
|
||||||
|
image:
|
||||||
|
repository: ghcr.io/huggingface/tgi-gaudi
|
||||||
|
tag: "2.3.1"
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
habana.ai/gaudi: 1
|
||||||
|
MAX_INPUT_LENGTH: "2048"
|
||||||
|
MAX_TOTAL_TOKENS: "4096"
|
||||||
|
CUDA_GRAPHS: ""
|
||||||
|
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||||
|
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||||
|
ENABLE_HPU_GRAPH: true
|
||||||
|
LIMIT_HPU_GRAPH: true
|
||||||
|
USE_FLASH_ATTENTION: true
|
||||||
|
FLASH_ATTENTION_RECOMPUTE: true
|
||||||
|
livenessProbe:
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
periodSeconds: 5
|
||||||
|
timeoutSeconds: 1
|
||||||
|
readinessProbe:
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
periodSeconds: 5
|
||||||
|
timeoutSeconds: 1
|
||||||
|
startupProbe:
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
periodSeconds: 5
|
||||||
|
timeoutSeconds: 1
|
||||||
|
failureThreshold: 120
|
||||||
|
|
||||||
|
tei:
|
||||||
|
accelDevice: "gaudi"
|
||||||
|
image:
|
||||||
|
repository: ghcr.io/huggingface/tei-gaudi
|
||||||
|
tag: "1.5.0"
|
||||||
|
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||||
|
MAX_WARMUP_SEQUENCE_LENGTH: 512
|
||||||
|
securityContext:
|
||||||
|
readOnlyRootFilesystem: false
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
habana.ai/gaudi: 1
|
||||||
|
livenessProbe:
|
||||||
|
timeoutSeconds: 1
|
||||||
|
readinessProbe:
|
||||||
|
timeoutSeconds: 1
|
||||||
18
Text2Image/kubernetes/helm/README.md
Normal file
18
Text2Image/kubernetes/helm/README.md
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
# Deploy txt2img on Kubernetes cluster
|
||||||
|
|
||||||
|
- You should have Helm (version >= 3.15) installed. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information.
|
||||||
|
- For more deploy options, refer to [helm charts README](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts#readme).
|
||||||
|
|
||||||
|
## Deploy on Xeon
|
||||||
|
|
||||||
|
```
|
||||||
|
export HFTOKEN="insert-your-huggingface-token-here"
|
||||||
|
helm install txt2img oci://ghcr.io/opea-project/charts/txt2img --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f cpu-values.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
## Deploy on Gaudi
|
||||||
|
|
||||||
|
```
|
||||||
|
export HFTOKEN="insert-your-huggingface-token-here"
|
||||||
|
helm install txt2img oci://ghcr.io/opea-project/charts/txt2img --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f gaudi-values.yaml
|
||||||
|
```
|
||||||
6
Text2Image/kubernetes/helm/cpu-values.yaml
Normal file
6
Text2Image/kubernetes/helm/cpu-values.yaml
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
# Copyright (C) 2025 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
text2image:
|
||||||
|
image:
|
||||||
|
repository: opea/text2image
|
||||||
30
Text2Image/kubernetes/helm/gaudi-values.yaml
Normal file
30
Text2Image/kubernetes/helm/gaudi-values.yaml
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
# Copyright (C) 2025 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
text2image:
|
||||||
|
accelDevice: "gaudi"
|
||||||
|
image:
|
||||||
|
repository: opea/text2image-gaudi
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
habana.ai/gaudi: 1
|
||||||
|
# The following hugepage related settings is for default MODEL stable-diffusion-v1-5/stable-diffusion-v1-5
|
||||||
|
# User should change the resource limits for other models
|
||||||
|
hugepages-2Mi: 256Mi
|
||||||
|
volumes:
|
||||||
|
- name: hugepage-2mi
|
||||||
|
emptyDir:
|
||||||
|
medium: HugePages-2Mi
|
||||||
|
volumeMounts:
|
||||||
|
- name: hugepage-2mi
|
||||||
|
mountPath: /hugepages-2Mi
|
||||||
|
OMPI_MCA_btl_vader_single_copy_mechanism: "none"
|
||||||
|
readinessProbe:
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
periodSeconds: 5
|
||||||
|
timeoutSeconds: 1
|
||||||
|
startupProbe:
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
periodSeconds: 5
|
||||||
|
timeoutSeconds: 1
|
||||||
|
failureThreshold: 120
|
||||||
@@ -1,86 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
# Copyright (C) 2024 Intel Corporation
|
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
|
|
||||||
set -xe
|
|
||||||
USER_ID=$(whoami)
|
|
||||||
LOG_PATH=/home/$(whoami)/logs
|
|
||||||
MOUNT_DIR=/home/$USER_ID/.cache/huggingface/hub
|
|
||||||
IMAGE_REPO=${IMAGE_REPO:-opea}
|
|
||||||
IMAGE_TAG=${IMAGE_TAG:-latest}
|
|
||||||
|
|
||||||
function init_translation() {
|
|
||||||
# executed under path manifest/translation/xeon
|
|
||||||
# replace the mount dir "path: /mnt/model" with "path: $CHART_MOUNT"
|
|
||||||
find . -name '*.yaml' -type f -exec sed -i "s#path: /mnt/opea-models#path: $MOUNT_DIR#g" {} \;
|
|
||||||
# replace microservice image tag
|
|
||||||
find . -name '*.yaml' -type f -exec sed -i "s#image: \"opea/\(.*\):latest#image: \"opea/\1:${IMAGE_TAG}#g" {} \;
|
|
||||||
# replace the repository "image: opea/*" with "image: $IMAGE_REPO/"
|
|
||||||
find . -name '*.yaml' -type f -exec sed -i "s#image: \"opea/*#image: \"${IMAGE_REPO}/#g" {} \;
|
|
||||||
# set huggingface token
|
|
||||||
find . -name '*.yaml' -type f -exec sed -i "s#insert-your-huggingface-token-here#$(cat /home/$USER_ID/.cache/huggingface/token)#g" {} \;
|
|
||||||
}
|
|
||||||
|
|
||||||
function install_translation {
|
|
||||||
echo "namespace is $NAMESPACE"
|
|
||||||
kubectl apply -f translation.yaml -n $NAMESPACE
|
|
||||||
sleep 50s
|
|
||||||
}
|
|
||||||
|
|
||||||
function validate_translation() {
|
|
||||||
ip_address=$(kubectl get svc $SERVICE_NAME -n $NAMESPACE -o jsonpath='{.spec.clusterIP}')
|
|
||||||
port=$(kubectl get svc $SERVICE_NAME -n $NAMESPACE -o jsonpath='{.spec.ports[0].port}')
|
|
||||||
echo "try to curl http://${ip_address}:${port}/v1/translation..."
|
|
||||||
|
|
||||||
# generate a random logfile name to avoid conflict among multiple runners
|
|
||||||
LOGFILE=$LOG_PATH/curlmega_$NAMESPACE.log
|
|
||||||
# Curl the Mega Service
|
|
||||||
curl http://${ip_address}:${port}/v1/translation \
|
|
||||||
-H 'Content-Type: application/json' \
|
|
||||||
-d '{"language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。"}' > $LOGFILE
|
|
||||||
exit_code=$?
|
|
||||||
if [ $exit_code -ne 0 ]; then
|
|
||||||
echo "Megaservice translation failed, please check the logs in $LOGFILE!"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "Checking response results, make sure the output is reasonable. "
|
|
||||||
local status=false
|
|
||||||
if [[ -f $LOGFILE ]] && \
|
|
||||||
[[ $(grep -c "translation" $LOGFILE) != 0 ]]; then
|
|
||||||
status=true
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ $status == false ]; then
|
|
||||||
echo "Response check failed, please check the logs in artifacts!"
|
|
||||||
else
|
|
||||||
echo "Response check succeed!"
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
if [ $# -eq 0 ]; then
|
|
||||||
echo "Usage: $0 <function_name>"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
case "$1" in
|
|
||||||
init_Translation)
|
|
||||||
pushd Translation/kubernetes/intel/hpu/gaudi/manifest
|
|
||||||
init_translation
|
|
||||||
popd
|
|
||||||
;;
|
|
||||||
install_Translation)
|
|
||||||
pushd Translation/kubernetes/intel/hpu/gaudi/manifest
|
|
||||||
NAMESPACE=$2
|
|
||||||
install_translation
|
|
||||||
popd
|
|
||||||
;;
|
|
||||||
validate_Translation)
|
|
||||||
NAMESPACE=$2
|
|
||||||
SERVICE_NAME=translation
|
|
||||||
validate_translation
|
|
||||||
;;
|
|
||||||
*)
|
|
||||||
echo "Unknown function: $1"
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
@@ -1,85 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
# Copyright (C) 2024 Intel Corporation
|
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
|
|
||||||
set -xe
|
|
||||||
USER_ID=$(whoami)
|
|
||||||
LOG_PATH=/home/$(whoami)/logs
|
|
||||||
MOUNT_DIR=/home/$USER_ID/.cache/huggingface/hub
|
|
||||||
IMAGE_REPO=${IMAGE_REPO:-opea}
|
|
||||||
IMAGE_TAG=${IMAGE_TAG:-latest}
|
|
||||||
|
|
||||||
function init_translation() {
|
|
||||||
# executed under path manifest/translation/xeon
|
|
||||||
# replace the mount dir "path: /mnt/model" with "path: $CHART_MOUNT"
|
|
||||||
find . -name '*.yaml' -type f -exec sed -i "s#path: /mnt/opea-models#path: $MOUNT_DIR#g" {} \;
|
|
||||||
# replace microservice image tag
|
|
||||||
find . -name '*.yaml' -type f -exec sed -i "s#image: \"opea/\(.*\):latest#image: \"opea/\1:${IMAGE_TAG}#g" {} \;
|
|
||||||
# replace the repository "image: opea/*" with "image: $IMAGE_REPO/"
|
|
||||||
find . -name '*.yaml' -type f -exec sed -i "s#image: \"opea/*#image: \"${IMAGE_REPO}/#g" {} \;
|
|
||||||
# set huggingface token
|
|
||||||
find . -name '*.yaml' -type f -exec sed -i "s#insert-your-huggingface-token-here#$(cat /home/$USER_ID/.cache/huggingface/token)#g" {} \;
|
|
||||||
}
|
|
||||||
|
|
||||||
function install_translation {
|
|
||||||
echo "namespace is $NAMESPACE"
|
|
||||||
kubectl apply -f translation.yaml -n $NAMESPACE
|
|
||||||
}
|
|
||||||
|
|
||||||
function validate_translation() {
|
|
||||||
ip_address=$(kubectl get svc $SERVICE_NAME -n $NAMESPACE -o jsonpath='{.spec.clusterIP}')
|
|
||||||
port=$(kubectl get svc $SERVICE_NAME -n $NAMESPACE -o jsonpath='{.spec.ports[0].port}')
|
|
||||||
echo "try to curl http://${ip_address}:${port}/v1/translation..."
|
|
||||||
|
|
||||||
# generate a random logfile name to avoid conflict among multiple runners
|
|
||||||
LOGFILE=$LOG_PATH/curlmega_$NAMESPACE.log
|
|
||||||
# Curl the Mega Service
|
|
||||||
curl http://${ip_address}:${port}/v1/translation \
|
|
||||||
-H 'Content-Type: application/json' \
|
|
||||||
-d '{"language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。"}' > $LOGFILE
|
|
||||||
exit_code=$?
|
|
||||||
if [ $exit_code -ne 0 ]; then
|
|
||||||
echo "Megaservice translation failed, please check the logs in $LOGFILE!"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "Checking response results, make sure the output is reasonable. "
|
|
||||||
local status=false
|
|
||||||
if [[ -f $LOGFILE ]] && \
|
|
||||||
[[ $(grep -c "translation" $LOGFILE) != 0 ]]; then
|
|
||||||
status=true
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ $status == false ]; then
|
|
||||||
echo "Response check failed, please check the logs in artifacts!"
|
|
||||||
else
|
|
||||||
echo "Response check succeed!"
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
if [ $# -eq 0 ]; then
|
|
||||||
echo "Usage: $0 <function_name>"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
case "$1" in
|
|
||||||
init_Translation)
|
|
||||||
pushd Translation/kubernetes/intel/cpu/xeon/manifest
|
|
||||||
init_translation
|
|
||||||
popd
|
|
||||||
;;
|
|
||||||
install_Translation)
|
|
||||||
pushd Translation/kubernetes/intel/cpu/xeon/manifest
|
|
||||||
NAMESPACE=$2
|
|
||||||
install_translation
|
|
||||||
popd
|
|
||||||
;;
|
|
||||||
validate_Translation)
|
|
||||||
NAMESPACE=$2
|
|
||||||
SERVICE_NAME=translation
|
|
||||||
validate_translation
|
|
||||||
;;
|
|
||||||
*)
|
|
||||||
echo "Unknown function: $1"
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
@@ -9,13 +9,18 @@ tgi:
|
|||||||
accelDevice: "gaudi"
|
accelDevice: "gaudi"
|
||||||
image:
|
image:
|
||||||
repository: ghcr.io/huggingface/tgi-gaudi
|
repository: ghcr.io/huggingface/tgi-gaudi
|
||||||
tag: "2.0.6"
|
tag: "2.3.1"
|
||||||
resources:
|
resources:
|
||||||
limits:
|
limits:
|
||||||
habana.ai/gaudi: 1
|
habana.ai/gaudi: 1
|
||||||
MAX_INPUT_LENGTH: "4096"
|
MAX_INPUT_LENGTH: "4096"
|
||||||
MAX_TOTAL_TOKENS: "8192"
|
MAX_TOTAL_TOKENS: "8192"
|
||||||
CUDA_GRAPHS: ""
|
CUDA_GRAPHS: ""
|
||||||
|
OMPI_MCA_btl_vader_single_copy_mechanism: "none"
|
||||||
|
ENABLE_HPU_GRAPH: "true"
|
||||||
|
LIMIT_HPU_GRAPH: "true"
|
||||||
|
USE_FLASH_ATTENTION: "true"
|
||||||
|
FLASH_ATTENTION_RECOMPUTE: "true"
|
||||||
livenessProbe:
|
livenessProbe:
|
||||||
initialDelaySeconds: 5
|
initialDelaySeconds: 5
|
||||||
periodSeconds: 5
|
periodSeconds: 5
|
||||||
|
|||||||
Reference in New Issue
Block a user