Update all examples yaml files of GMC in GenAIExample (#436)
* Update all examples yaml files of GMC in GenAIExample. Signed-off-by: zhlsunshine <huailong.zhang@intel.com>
This commit is contained in:
@@ -23,6 +23,7 @@ spec:
|
|||||||
serviceName: embedding-svc
|
serviceName: embedding-svc
|
||||||
config:
|
config:
|
||||||
endpoint: /v1/embeddings
|
endpoint: /v1/embeddings
|
||||||
|
TEI_EMBEDDING_ENDPOINT: tei-embedding-gaudi-svc
|
||||||
- name: TeiEmbeddingGaudi
|
- name: TeiEmbeddingGaudi
|
||||||
internalService:
|
internalService:
|
||||||
serviceName: tei-embedding-gaudi-svc
|
serviceName: tei-embedding-gaudi-svc
|
||||||
@@ -33,6 +34,8 @@ spec:
|
|||||||
serviceName: retriever-svc
|
serviceName: retriever-svc
|
||||||
config:
|
config:
|
||||||
endpoint: /v1/retrieval
|
endpoint: /v1/retrieval
|
||||||
|
REDIS_URL: redis-vector-db
|
||||||
|
TEI_EMBEDDING_ENDPOINT: tei-embedding-gaudi-svc
|
||||||
- name: VectorDB
|
- name: VectorDB
|
||||||
internalService:
|
internalService:
|
||||||
serviceName: redis-vector-db
|
serviceName: redis-vector-db
|
||||||
@@ -43,6 +46,7 @@ spec:
|
|||||||
serviceName: reranking-svc
|
serviceName: reranking-svc
|
||||||
config:
|
config:
|
||||||
endpoint: /v1/reranking
|
endpoint: /v1/reranking
|
||||||
|
TEI_RERANKING_ENDPOINT: tei-reranking-svc
|
||||||
- name: TeiReranking
|
- name: TeiReranking
|
||||||
internalService:
|
internalService:
|
||||||
serviceName: tei-reranking-svc
|
serviceName: tei-reranking-svc
|
||||||
@@ -55,6 +59,7 @@ spec:
|
|||||||
serviceName: llm-svc
|
serviceName: llm-svc
|
||||||
config:
|
config:
|
||||||
endpoint: /v1/chat/completions
|
endpoint: /v1/chat/completions
|
||||||
|
TGI_LLM_ENDPOINT: tgi-gaudi-svc
|
||||||
- name: TgiGaudi
|
- name: TgiGaudi
|
||||||
internalService:
|
internalService:
|
||||||
serviceName: tgi-gaudi-svc
|
serviceName: tgi-gaudi-svc
|
||||||
|
|||||||
124
ChatQnA/kubernetes/chatQnA_switch_gaudi.yaml
Normal file
124
ChatQnA/kubernetes/chatQnA_switch_gaudi.yaml
Normal file
@@ -0,0 +1,124 @@
|
|||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: gmc.opea.io/v1alpha3
|
||||||
|
kind: GMConnector
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: gmconnector
|
||||||
|
app.kubernetes.io/managed-by: kustomize
|
||||||
|
gmc/platform: gaudi
|
||||||
|
name: switch
|
||||||
|
namespace: switch
|
||||||
|
spec:
|
||||||
|
routerConfig:
|
||||||
|
name: router
|
||||||
|
serviceName: router-service
|
||||||
|
nodes:
|
||||||
|
root:
|
||||||
|
routerType: Sequence
|
||||||
|
steps:
|
||||||
|
- name: Embedding
|
||||||
|
nodeName: node1
|
||||||
|
- name: Reranking
|
||||||
|
data: $response
|
||||||
|
internalService:
|
||||||
|
serviceName: reranking-svc
|
||||||
|
config:
|
||||||
|
endpoint: /v1/reranking
|
||||||
|
TEI_RERANKING_ENDPOINT: tei-reranking-svc
|
||||||
|
- name: TeiReranking
|
||||||
|
internalService:
|
||||||
|
serviceName: tei-reranking-svc
|
||||||
|
config:
|
||||||
|
endpoint: /rerank
|
||||||
|
isDownstreamService: true
|
||||||
|
- name: Llm
|
||||||
|
data: $response
|
||||||
|
nodeName: node2
|
||||||
|
node1:
|
||||||
|
routerType: Switch
|
||||||
|
steps:
|
||||||
|
- name: Embedding
|
||||||
|
condition: embedding-model-id==large
|
||||||
|
internalService:
|
||||||
|
serviceName: embedding-svc-large
|
||||||
|
config:
|
||||||
|
endpoint: /v1/embeddings
|
||||||
|
TEI_EMBEDDING_ENDPOINT: tei-embedding-gaudi-svc-bge15
|
||||||
|
- name: Embedding
|
||||||
|
condition: embedding-model-id==small
|
||||||
|
internalService:
|
||||||
|
serviceName: embedding-svc-small
|
||||||
|
config:
|
||||||
|
endpoint: /v1/embeddings
|
||||||
|
TEI_EMBEDDING_ENDPOINT: tei-embedding-gaudi-svc-bge-small
|
||||||
|
- name: TeiEmbeddingGaudi
|
||||||
|
internalService:
|
||||||
|
serviceName: tei-embedding-gaudi-svc-bge15
|
||||||
|
config:
|
||||||
|
MODEL_ID: BAAI/bge-base-en-v1.5
|
||||||
|
isDownstreamService: true
|
||||||
|
- name: TeiEmbeddingGaudi
|
||||||
|
internalService:
|
||||||
|
serviceName: tei-embedding-gaudi-svc-bge-small
|
||||||
|
config:
|
||||||
|
MODEL_ID: BAAI/bge-base-en-v1.5
|
||||||
|
isDownstreamService: true
|
||||||
|
- name: Retriever
|
||||||
|
condition: embedding-model-id==large
|
||||||
|
data: $response
|
||||||
|
internalService:
|
||||||
|
serviceName: retriever-svc-large
|
||||||
|
config:
|
||||||
|
endpoint: /v1/retrieval
|
||||||
|
REDIS_URL: redis-vector-db-large
|
||||||
|
TEI_EMBEDDING_ENDPOINT: tei-embedding-gaudi-svc-bge15
|
||||||
|
- name: Retriever
|
||||||
|
condition: embedding-model-id==small
|
||||||
|
data: $response
|
||||||
|
internalService:
|
||||||
|
serviceName: retriever-svc-small
|
||||||
|
config:
|
||||||
|
endpoint: /v1/retrieval
|
||||||
|
REDIS_URL: redis-vector-db-small
|
||||||
|
TEI_EMBEDDING_ENDPOINT: tei-embedding-gaudi-svc-bge-small
|
||||||
|
- name: VectorDB
|
||||||
|
internalService:
|
||||||
|
serviceName: redis-vector-db-large
|
||||||
|
isDownstreamService: true
|
||||||
|
- name: VectorDB
|
||||||
|
internalService:
|
||||||
|
serviceName: redis-vector-db-small
|
||||||
|
isDownstreamService: true
|
||||||
|
node2:
|
||||||
|
routerType: Switch
|
||||||
|
steps:
|
||||||
|
- name: Llm
|
||||||
|
condition: model-id==intel
|
||||||
|
internalService:
|
||||||
|
serviceName: llm-svc-intel
|
||||||
|
config:
|
||||||
|
endpoint: /v1/chat/completions
|
||||||
|
TGI_LLM_ENDPOINT: tgi-gaudi-service-intel
|
||||||
|
- name: Llm
|
||||||
|
condition: model-id==llama
|
||||||
|
internalService:
|
||||||
|
serviceName: llm-svc-llama
|
||||||
|
config:
|
||||||
|
endpoint: /v1/chat/completions
|
||||||
|
TGI_LLM_ENDPOINT: tgi-gaudi-service-llama
|
||||||
|
- name: TgiGaudi
|
||||||
|
internalService:
|
||||||
|
serviceName: tgi-gaudi-service-intel
|
||||||
|
config:
|
||||||
|
endpoint: /generate
|
||||||
|
MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||||
|
isDownstreamService: true
|
||||||
|
- name: TgiGaudi
|
||||||
|
internalService:
|
||||||
|
serviceName: tgi-gaudi-service-llama
|
||||||
|
config:
|
||||||
|
endpoint: /generate
|
||||||
|
MODEL_ID: openlm-research/open_llama_3b
|
||||||
|
isDownstreamService: true
|
||||||
124
ChatQnA/kubernetes/chatQnA_switch_xeon.yaml
Normal file
124
ChatQnA/kubernetes/chatQnA_switch_xeon.yaml
Normal file
@@ -0,0 +1,124 @@
|
|||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
apiVersion: gmc.opea.io/v1alpha3
|
||||||
|
kind: GMConnector
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: gmconnector
|
||||||
|
app.kubernetes.io/managed-by: kustomize
|
||||||
|
gmc/platform: xeon
|
||||||
|
name: switch
|
||||||
|
namespace: switch
|
||||||
|
spec:
|
||||||
|
routerConfig:
|
||||||
|
name: router
|
||||||
|
serviceName: router-service
|
||||||
|
nodes:
|
||||||
|
root:
|
||||||
|
routerType: Sequence
|
||||||
|
steps:
|
||||||
|
- name: Embedding
|
||||||
|
nodeName: node1
|
||||||
|
- name: Reranking
|
||||||
|
data: $response
|
||||||
|
internalService:
|
||||||
|
serviceName: reranking-svc
|
||||||
|
config:
|
||||||
|
endpoint: /v1/reranking
|
||||||
|
TEI_RERANKING_ENDPOINT: tei-reranking-svc
|
||||||
|
- name: TeiReranking
|
||||||
|
internalService:
|
||||||
|
serviceName: tei-reranking-svc
|
||||||
|
config:
|
||||||
|
endpoint: /rerank
|
||||||
|
isDownstreamService: true
|
||||||
|
- name: Llm
|
||||||
|
data: $response
|
||||||
|
nodeName: node2
|
||||||
|
node1:
|
||||||
|
routerType: Switch
|
||||||
|
steps:
|
||||||
|
- name: Embedding
|
||||||
|
condition: embedding-model-id==large
|
||||||
|
internalService:
|
||||||
|
serviceName: embedding-svc-large
|
||||||
|
config:
|
||||||
|
endpoint: /v1/embeddings
|
||||||
|
TEI_EMBEDDING_ENDPOINT: tei-embedding-svc-bge15
|
||||||
|
- name: Embedding
|
||||||
|
condition: embedding-model-id==small
|
||||||
|
internalService:
|
||||||
|
serviceName: embedding-svc-small
|
||||||
|
config:
|
||||||
|
endpoint: /v1/embeddings
|
||||||
|
TEI_EMBEDDING_ENDPOINT: tei-embedding-svc-bge-small
|
||||||
|
- name: TeiEmbedding
|
||||||
|
internalService:
|
||||||
|
serviceName: tei-embedding-svc-bge15
|
||||||
|
config:
|
||||||
|
MODEL_ID: BAAI/bge-base-en-v1.5
|
||||||
|
isDownstreamService: true
|
||||||
|
- name: TeiEmbedding
|
||||||
|
internalService:
|
||||||
|
serviceName: tei-embedding-svc-bge-small
|
||||||
|
config:
|
||||||
|
MODEL_ID: BAAI/bge-base-en-v1.5
|
||||||
|
isDownstreamService: true
|
||||||
|
- name: Retriever
|
||||||
|
condition: embedding-model-id==large
|
||||||
|
data: $response
|
||||||
|
internalService:
|
||||||
|
serviceName: retriever-svc-large
|
||||||
|
config:
|
||||||
|
endpoint: /v1/retrieval
|
||||||
|
REDIS_URL: redis-vector-db-large
|
||||||
|
TEI_EMBEDDING_ENDPOINT: tei-embedding-svc-bge15
|
||||||
|
- name: Retriever
|
||||||
|
condition: embedding-model-id==small
|
||||||
|
data: $response
|
||||||
|
internalService:
|
||||||
|
serviceName: retriever-svc-small
|
||||||
|
config:
|
||||||
|
endpoint: /v1/retrieval
|
||||||
|
REDIS_URL: redis-vector-db-small
|
||||||
|
TEI_EMBEDDING_ENDPOINT: tei-embedding-svc-bge-small
|
||||||
|
- name: VectorDB
|
||||||
|
internalService:
|
||||||
|
serviceName: redis-vector-db-large
|
||||||
|
isDownstreamService: true
|
||||||
|
- name: VectorDB
|
||||||
|
internalService:
|
||||||
|
serviceName: redis-vector-db-small
|
||||||
|
isDownstreamService: true
|
||||||
|
node2:
|
||||||
|
routerType: Switch
|
||||||
|
steps:
|
||||||
|
- name: Llm
|
||||||
|
condition: model-id==intel
|
||||||
|
internalService:
|
||||||
|
serviceName: llm-svc-intel
|
||||||
|
config:
|
||||||
|
endpoint: /v1/chat/completions
|
||||||
|
TGI_LLM_ENDPOINT: tgi-service-intel
|
||||||
|
- name: Llm
|
||||||
|
condition: model-id==llama
|
||||||
|
internalService:
|
||||||
|
serviceName: llm-svc-llama
|
||||||
|
config:
|
||||||
|
endpoint: /v1/chat/completions
|
||||||
|
TGI_LLM_ENDPOINT: tgi-service-llama
|
||||||
|
- name: Tgi
|
||||||
|
internalService:
|
||||||
|
serviceName: tgi-service-intel
|
||||||
|
config:
|
||||||
|
endpoint: /generate
|
||||||
|
MODEL_ID: Intel/neural-chat-7b-v3-3
|
||||||
|
isDownstreamService: true
|
||||||
|
- name: Tgi
|
||||||
|
internalService:
|
||||||
|
serviceName: tgi-service-llama
|
||||||
|
config:
|
||||||
|
endpoint: /generate
|
||||||
|
MODEL_ID: bigscience/bloom-560m
|
||||||
|
isDownstreamService: true
|
||||||
@@ -23,6 +23,7 @@ spec:
|
|||||||
serviceName: embedding-svc
|
serviceName: embedding-svc
|
||||||
config:
|
config:
|
||||||
endpoint: /v1/embeddings
|
endpoint: /v1/embeddings
|
||||||
|
TEI_EMBEDDING_ENDPOINT: tei-embedding-svc
|
||||||
- name: TeiEmbedding
|
- name: TeiEmbedding
|
||||||
internalService:
|
internalService:
|
||||||
serviceName: tei-embedding-svc
|
serviceName: tei-embedding-svc
|
||||||
@@ -33,6 +34,8 @@ spec:
|
|||||||
serviceName: retriever-svc
|
serviceName: retriever-svc
|
||||||
config:
|
config:
|
||||||
endpoint: /v1/retrieval
|
endpoint: /v1/retrieval
|
||||||
|
REDIS_URL: redis-vector-db
|
||||||
|
TEI_EMBEDDING_ENDPOINT: tei-embedding-svc
|
||||||
- name: VectorDB
|
- name: VectorDB
|
||||||
internalService:
|
internalService:
|
||||||
serviceName: redis-vector-db
|
serviceName: redis-vector-db
|
||||||
@@ -43,6 +46,7 @@ spec:
|
|||||||
serviceName: reranking-svc
|
serviceName: reranking-svc
|
||||||
config:
|
config:
|
||||||
endpoint: /v1/reranking
|
endpoint: /v1/reranking
|
||||||
|
TEI_RERANKING_ENDPOINT: tei-reranking-svc
|
||||||
- name: TeiReranking
|
- name: TeiReranking
|
||||||
internalService:
|
internalService:
|
||||||
serviceName: tei-reranking-svc
|
serviceName: tei-reranking-svc
|
||||||
@@ -55,6 +59,7 @@ spec:
|
|||||||
serviceName: llm-svc
|
serviceName: llm-svc
|
||||||
config:
|
config:
|
||||||
endpoint: /v1/chat/completions
|
endpoint: /v1/chat/completions
|
||||||
|
TGI_LLM_ENDPOINT: tgi-service-m
|
||||||
- name: Tgi
|
- name: Tgi
|
||||||
internalService:
|
internalService:
|
||||||
serviceName: tgi-service-m
|
serviceName: tgi-service-m
|
||||||
|
|||||||
@@ -64,7 +64,7 @@ function validate_chatqna() {
|
|||||||
echo "Checking response results, make sure the output is reasonable. "
|
echo "Checking response results, make sure the output is reasonable. "
|
||||||
local status=false
|
local status=false
|
||||||
if [[ -f $LOG_PATH/curl_chatqna.log ]] && \
|
if [[ -f $LOG_PATH/curl_chatqna.log ]] && \
|
||||||
[[ $(grep -c "billion" $LOG_PATH/curl_chatqna.log) != 0 ]]; then
|
[[ $(grep -c "[DONE]" $LOG_PATH/curl_chatqna.log) != 0 ]]; then
|
||||||
status=true
|
status=true
|
||||||
fi
|
fi
|
||||||
if [ $status == false ]; then
|
if [ $status == false ]; then
|
||||||
|
|||||||
@@ -65,7 +65,7 @@ function validate_chatqna() {
|
|||||||
echo "Checking response results, make sure the output is reasonable. "
|
echo "Checking response results, make sure the output is reasonable. "
|
||||||
local status=false
|
local status=false
|
||||||
if [[ -f $LOG_PATH/curl_chatqna.log ]] && \
|
if [[ -f $LOG_PATH/curl_chatqna.log ]] && \
|
||||||
[[ $(grep -c "billion" $LOG_PATH/curl_chatqna.log) != 0 ]]; then
|
[[ $(grep -c "[DONE]" $LOG_PATH/curl_chatqna.log) != 0 ]]; then
|
||||||
status=true
|
status=true
|
||||||
fi
|
fi
|
||||||
if [ $status == false ]; then
|
if [ $status == false ]; then
|
||||||
|
|||||||
40
CodeGen/kubernetes/README.md
Normal file
40
CodeGen/kubernetes/README.md
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
<h1 align="center" id="title">Deploy CodeGen in a Kubernetes Cluster</h1>
|
||||||
|
|
||||||
|
This document outlines the deployment process for a Code Generation (CodeGen) application that utilizes the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice components on Intel Xeon servers and Gaudi machines.
|
||||||
|
|
||||||
|
Please install GMC in your Kubernetes cluster, if you have not already done so, by following the steps in Section "Getting Started" at [GMC Install](https://github.com/opea-project/GenAIInfra/tree/main/microservices-connector#readme). We will soon publish images to Docker Hub, at which point no builds will be required, further simplifying install.
|
||||||
|
|
||||||
|
If you have only Intel Xeon machines you could use the codegen_xeon.yaml file or if you have a Gaudi cluster you could use codegen_gaudi.yaml
|
||||||
|
In the below example we illustrate on Xeon.
|
||||||
|
|
||||||
|
## Deploy the RAG application
|
||||||
|
|
||||||
|
1. Create the desired namespace if it does not already exist and deploy the application
|
||||||
|
```bash
|
||||||
|
export APP_NAMESPACE=CT
|
||||||
|
kubectl create ns $APP_NAMESPACE
|
||||||
|
sed -i "s|namespace: codegen|namespace: $APP_NAMESPACE|g" ./codegen_xeon.yaml
|
||||||
|
kubectl apply -f ./codegen_xeon.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Check if the application is up and ready
|
||||||
|
```bash
|
||||||
|
kubectl get pods -n $APP_NAMESPACE
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Deploy a client pod for testing
|
||||||
|
```bash
|
||||||
|
kubectl create deployment client-test -n $APP_NAMESPACE --image=python:3.8.13 -- sleep infinity
|
||||||
|
```
|
||||||
|
|
||||||
|
4. Check that client pod is ready
|
||||||
|
```bash
|
||||||
|
kubectl get pods -n $APP_NAMESPACE
|
||||||
|
```
|
||||||
|
|
||||||
|
5. Send request to application
|
||||||
|
```bash
|
||||||
|
export CLIENT_POD=$(kubectl get pod -n $APP_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name})
|
||||||
|
export accessUrl=$(kubectl get gmc -n $APP_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='codegen')].status.accessUrl}")
|
||||||
|
kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -X POST -d '{"query": "def print_hello_world():"}' -H 'Content-Type: application/json' > $LOG_PATH/gmc_codegen.log
|
||||||
|
```
|
||||||
@@ -24,10 +24,11 @@ spec:
|
|||||||
serviceName: llm-service
|
serviceName: llm-service
|
||||||
config:
|
config:
|
||||||
endpoint: /v1/chat/completions
|
endpoint: /v1/chat/completions
|
||||||
|
TGI_LLM_ENDPOINT: tgi-gaudi-svc
|
||||||
- name: TgiGaudi
|
- name: TgiGaudi
|
||||||
internalService:
|
internalService:
|
||||||
serviceName: tgi-gaudi-svc
|
serviceName: tgi-gaudi-svc
|
||||||
config:
|
config:
|
||||||
LLM_MODEL_ID: ise-uiuc/Magicoder-S-DS-6.7B
|
MODEL_ID: ise-uiuc/Magicoder-S-DS-6.7B
|
||||||
endpoint: /generate
|
endpoint: /generate
|
||||||
isDownstreamService: true
|
isDownstreamService: true
|
||||||
|
|||||||
@@ -24,10 +24,11 @@ spec:
|
|||||||
serviceName: llm-service
|
serviceName: llm-service
|
||||||
config:
|
config:
|
||||||
endpoint: /v1/chat/completions
|
endpoint: /v1/chat/completions
|
||||||
|
TGI_LLM_ENDPOINT: tgi-service
|
||||||
- name: Tgi
|
- name: Tgi
|
||||||
internalService:
|
internalService:
|
||||||
serviceName: tgi-service
|
serviceName: tgi-service
|
||||||
config:
|
config:
|
||||||
LLM_MODEL_ID: ise-uiuc/Magicoder-S-DS-6.7B
|
MODEL_ID: ise-uiuc/Magicoder-S-DS-6.7B
|
||||||
endpoint: /generate
|
endpoint: /generate
|
||||||
isDownstreamService: true
|
isDownstreamService: true
|
||||||
|
|||||||
@@ -44,7 +44,7 @@ function validate_codegen() {
|
|||||||
echo "Checking response results, make sure the output is reasonable. "
|
echo "Checking response results, make sure the output is reasonable. "
|
||||||
local status=false
|
local status=false
|
||||||
if [[ -f $LOG_PATH/gmc_codegen.log ]] && \
|
if [[ -f $LOG_PATH/gmc_codegen.log ]] && \
|
||||||
[[ $(grep -c "print" $LOG_PATH/gmc_codegen.log) != 0 ]]; then
|
[[ $(grep -c "[DONE]" $LOG_PATH/gmc_codegen.log) != 0 ]]; then
|
||||||
status=true
|
status=true
|
||||||
fi
|
fi
|
||||||
if [ $status == false ]; then
|
if [ $status == false ]; then
|
||||||
|
|||||||
@@ -44,7 +44,7 @@ function validate_codegen() {
|
|||||||
echo "Checking response results, make sure the output is reasonable. "
|
echo "Checking response results, make sure the output is reasonable. "
|
||||||
local status=false
|
local status=false
|
||||||
if [[ -f $LOG_PATH/gmc_codegen.log ]] && \
|
if [[ -f $LOG_PATH/gmc_codegen.log ]] && \
|
||||||
[[ $(grep -c "print" $LOG_PATH/gmc_codegen.log) != 0 ]]; then
|
[[ $(grep -c "[DONE]" $LOG_PATH/gmc_codegen.log) != 0 ]]; then
|
||||||
status=true
|
status=true
|
||||||
fi
|
fi
|
||||||
if [ $status == false ]; then
|
if [ $status == false ]; then
|
||||||
|
|||||||
@@ -24,10 +24,11 @@ spec:
|
|||||||
serviceName: codetrans-service
|
serviceName: codetrans-service
|
||||||
config:
|
config:
|
||||||
endpoint: /v1/chat/completions
|
endpoint: /v1/chat/completions
|
||||||
|
TGI_LLM_ENDPOINT: tgi-gaudi-svc
|
||||||
- name: TgiGaudi
|
- name: TgiGaudi
|
||||||
internalService:
|
internalService:
|
||||||
serviceName: tgi-gaudi-svc
|
serviceName: tgi-gaudi-svc
|
||||||
config:
|
config:
|
||||||
LLM_MODEL_ID: HuggingFaceH4/mistral-7b-grok
|
MODEL_ID: HuggingFaceH4/mistral-7b-grok
|
||||||
endpoint: /generate
|
endpoint: /generate
|
||||||
isDownstreamService: true
|
isDownstreamService: true
|
||||||
|
|||||||
@@ -24,10 +24,11 @@ spec:
|
|||||||
serviceName: codetrans-service
|
serviceName: codetrans-service
|
||||||
config:
|
config:
|
||||||
endpoint: /v1/chat/completions
|
endpoint: /v1/chat/completions
|
||||||
|
TGI_LLM_ENDPOINT: tgi-service
|
||||||
- name: Tgi
|
- name: Tgi
|
||||||
internalService:
|
internalService:
|
||||||
serviceName: tgi-service
|
serviceName: tgi-service
|
||||||
config:
|
config:
|
||||||
LLM_MODEL_ID: HuggingFaceH4/mistral-7b-grok
|
MODEL_ID: HuggingFaceH4/mistral-7b-grok
|
||||||
endpoint: /generate
|
endpoint: /generate
|
||||||
isDownstreamService: true
|
isDownstreamService: true
|
||||||
|
|||||||
@@ -18,13 +18,14 @@ spec:
|
|||||||
root:
|
root:
|
||||||
routerType: Sequence
|
routerType: Sequence
|
||||||
steps:
|
steps:
|
||||||
- name: DocSumGaudi
|
- name: DocSum
|
||||||
data: $response
|
data: $response
|
||||||
internalService:
|
internalService:
|
||||||
serviceName: docsum-llm-uservice
|
serviceName: docsum-llm-uservice
|
||||||
config:
|
config:
|
||||||
endpoint: /v1/chat/docsum
|
endpoint: /v1/chat/docsum
|
||||||
PORT: "9009"
|
PORT: "9009"
|
||||||
|
TGI_LLM_ENDPOINT: tgi-gaudi-svc
|
||||||
- name: TgiGaudi
|
- name: TgiGaudi
|
||||||
internalService:
|
internalService:
|
||||||
serviceName: tgi-gaudi-svc
|
serviceName: tgi-gaudi-svc
|
||||||
|
|||||||
@@ -25,6 +25,7 @@ spec:
|
|||||||
config:
|
config:
|
||||||
endpoint: /v1/chat/docsum
|
endpoint: /v1/chat/docsum
|
||||||
PORT: "9009"
|
PORT: "9009"
|
||||||
|
TGI_LLM_ENDPOINT: tgi-svc
|
||||||
- name: Tgi
|
- name: Tgi
|
||||||
internalService:
|
internalService:
|
||||||
serviceName: tgi-svc
|
serviceName: tgi-svc
|
||||||
|
|||||||
Reference in New Issue
Block a user