Update Kubernetes manifest files for CodeGen (#441)

Updated Kubernetes manifest files for CodeGen to reflect latest changes:
Signed-off-by: Lianhao Lu <lianhao.lu@intel.com>
This commit is contained in:
Lianhao Lu
2024-07-23 14:07:42 +08:00
committed by GitHub
parent c9548d7921
commit 2f9397e012
5 changed files with 443 additions and 193 deletions

View File

@@ -3,7 +3,10 @@
> [NOTE]
> The following values must be set before you can deploy:
> HUGGINGFACEHUB_API_TOKEN
> You can also customize the "MODEL_ID" and "model-volume"
> You can also customize the "MODEL_ID" if needed.
> You need to make sure you have created the directory `/mnt/opea-models` to save the cached model on the node where the CodeGEn workload is running. Otherwise, you need to modify the `codegen.yaml` file to change the `model-volume` to a directory that exists on the node.
## Deploy On Xeon
@@ -25,11 +28,15 @@ kubectl apply -f codegen.yaml
## Verify Services
Make sure all the pods are running, and restart the codegen-xxxx pod if necessary.
To verify the installation, run the command `kubectl get pod` to make sure all pods are running.
Then run the command `kubectl port-forward svc/codegen 7778:7778` to expose the CodeGEn service for access.
Open another terminal and run the following command to verify the service if working:
```
kubectl get pods
curl http://codegen:7778/v1/codegen -H "Content-Type: application/json" -d '{
curl http://localhost:7778/v1/codegen -H "Content-Type: application/json" -d '{
"messages": "Implement a high-level API for a TODO list application. The API takes as input an operation request and updates the TODO list in place. If the request is invalid, raise an exception."
}'
```

View File

@@ -1,35 +1,68 @@
---
# Source: codegen/charts/llm-uservice/charts/tgi/templates/service.yaml
# Source: codegen/charts/llm-uservice/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: ConfigMap
metadata:
name: codegen-llm-uservice-config
labels:
helm.sh/chart: llm-uservice-0.8.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: codegen
app.kubernetes.io/version: "1.0.0"
app.kubernetes.io/managed-by: Helm
data:
TGI_LLM_ENDPOINT: "http://codegen-tgi"
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
HF_HOME: "/tmp/.cache/huggingface"
http_proxy:
https_proxy:
no_proxy:
LANGCHAIN_TRACING_V2: "false"
LANGCHAIN_API_KEY: insert-your-langchain-key-here
LANGCHAIN_PROJECT: "opea-llm-uservice"
---
# Source: codegen/charts/tgi/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: ConfigMap
metadata:
name: codegen-tgi-config
labels:
helm.sh/chart: tgi-0.8.0
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: codegen
app.kubernetes.io/version: "1.4"
app.kubernetes.io/managed-by: Helm
data:
MODEL_ID: "meta-llama/CodeLlama-7b-hf"
PORT: "2080"
HUGGING_FACE_HUB_TOKEN: "insert-your-huggingface-token-here"
HF_TOKEN: "insert-your-huggingface-token-here"
MAX_INPUT_TOKENS: "1024"
MAX_TOTAL_TOKENS: "4096"
http_proxy:
https_proxy:
no_proxy:
HABANA_LOGS: "/tmp/habana_logs"
NUMBA_CACHE_DIR: "/tmp"
TRANSFORMERS_CACHE: "/tmp/transformers_cache"
HF_HOME: "/tmp/.cache/huggingface"
---
# Source: codegen/charts/llm-uservice/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: Service
metadata:
name: codegen-tgi
labels:
helm.sh/chart: tgi-0.1.0
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: codegen
app.kubernetes.io/version: "1.4"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
ports:
- port: 80
targetPort: 80
protocol: TCP
name: tgi
selector:
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: codegen
---
apiVersion: v1
kind: Service
metadata:
name: codegen-llm-uservice
labels:
helm.sh/chart: llm-uservice-0.1.0
helm.sh/chart: llm-uservice-0.8.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: codegen
app.kubernetes.io/version: "1.0.0"
@@ -45,12 +78,41 @@ spec:
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: codegen
---
# Source: codegen/charts/tgi/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: Service
metadata:
name: codegen-tgi
labels:
helm.sh/chart: tgi-0.8.0
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: codegen
app.kubernetes.io/version: "1.4"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
ports:
- port: 80
targetPort: 2080
protocol: TCP
name: tgi
selector:
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: codegen
---
# Source: codegen/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: Service
metadata:
name: codegen
labels:
helm.sh/chart: codegen-0.1.0
helm.sh/chart: codegen-0.8.0
app.kubernetes.io/name: codegen
app.kubernetes.io/instance: codegen
app.kubernetes.io/version: "1.0.0"
@@ -66,61 +128,16 @@ spec:
app.kubernetes.io/name: codegen
app.kubernetes.io/instance: codegen
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: codegen-tgi
labels:
helm.sh/chart: tgi-0.1.0
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: codegen
app.kubernetes.io/version: "1.4"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: codegen
template:
metadata:
labels:
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: codegen
spec:
securityContext: {}
containers:
- name: tgi
env:
- name: MODEL_ID
value: ise-uiuc/Magicoder-S-DS-6.7B
- name: PORT
value: "80"
securityContext: {}
image: "ghcr.io/huggingface/tgi-gaudi:1.2.1"
imagePullPolicy: IfNotPresent
volumeMounts:
- mountPath: /data
name: model-volume
ports:
- name: http
containerPort: 80
protocol: TCP
resources:
limits:
habana.ai/gaudi: 1
volumes:
- name: model-volume
hostPath:
path: /mnt
type: Directory
---
# Source: codegen/charts/llm-uservice/templates/deployment.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: codegen-llm-uservice
labels:
helm.sh/chart: llm-uservice-0.1.0
helm.sh/chart: llm-uservice-0.8.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: codegen
app.kubernetes.io/version: "1.0.0"
@@ -137,44 +154,113 @@ spec:
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: codegen
spec:
securityContext: {}
securityContext:
{}
containers:
- name: codegen
env:
- name: TGI_LLM_ENDPOINT
value: "http://codegen-tgi:80"
- name: HUGGINGFACEHUB_API_TOKEN
value: "insert-your-huggingface-token-here"
- name: http_proxy
value:
- name: https_proxy
value:
- name: no_proxy
value:
securityContext: {}
envFrom:
- configMapRef:
name: codegen-llm-uservice-config
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: false
runAsNonRoot: true
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "opea/llm-tgi:latest"
imagePullPolicy: IfNotPresent
ports:
- name: llm-uservice
containerPort: 9000
protocol: TCP
volumeMounts:
- mountPath: /tmp
name: tmp
startupProbe:
exec:
command:
- curl
- http://codegen-tgi:80
- curl
- http://codegen-tgi
initialDelaySeconds: 5
periodSeconds: 5
failureThreshold: 120
resources: {}
resources:
{}
volumes:
- name: tmp
emptyDir: {}
---
# Source: codegen/charts/tgi/templates/deployment.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: codegen-tgi
labels:
helm.sh/chart: tgi-0.8.0
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: codegen
app.kubernetes.io/version: "1.4"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: codegen
template:
metadata:
labels:
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: codegen
spec:
securityContext:
{}
containers:
- name: tgi
envFrom:
- configMapRef:
name: codegen-tgi-config
securityContext:
{}
image: "ghcr.io/huggingface/tgi-gaudi:2.0.1"
imagePullPolicy: IfNotPresent
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /tmp
name: tmp
ports:
- name: http
containerPort: 2080
protocol: TCP
resources:
limits:
habana.ai/gaudi: 1
volumes:
- name: model-volume
hostPath:
path: /mnt/opea-models
type: Directory
- name: tmp
emptyDir: {}
---
# Source: codegen/templates/deployment.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: codegen
labels:
helm.sh/chart: codegen-0.1.0
helm.sh/chart: codegen-0.8.0
app.kubernetes.io/name: codegen
app.kubernetes.io/instance: codegen
app.kubernetes.io/version: "1.0.0"
@@ -191,17 +277,52 @@ spec:
app.kubernetes.io/name: codegen
app.kubernetes.io/instance: codegen
spec:
securityContext: null
securityContext:
null
containers:
- name: codegen
env:
- name: LLM_SERVICE_HOST_IP
value: codegen-llm-uservice
securityContext: null
#- name: MEGA_SERVICE_PORT
# value: 7778
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "opea/codegen:latest"
imagePullPolicy: IfNotPresent
volumeMounts:
- mountPath: /tmp
name: tmp
ports:
- name: codegen
containerPort: 7778
protocol: TCP
resources: null
# startupProbe:
# httpGet:
# host: codegen-llm-uservice
# port: 9000
# path: /
# initialDelaySeconds: 5
# periodSeconds: 5
# failureThreshold: 120
# livenessProbe:
# httpGet:
# path: /
# port: 7778
# readinessProbe:
# httpGet:
# path: /
# port: 7778
resources:
null
volumes:
- name: tmp
emptyDir: {}

View File

@@ -1,35 +1,68 @@
---
# Source: codegen/charts/llm-uservice/charts/tgi/templates/service.yaml
# Source: codegen/charts/llm-uservice/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: ConfigMap
metadata:
name: codegen-llm-uservice-config
labels:
helm.sh/chart: llm-uservice-0.8.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: codegen
app.kubernetes.io/version: "1.0.0"
app.kubernetes.io/managed-by: Helm
data:
TGI_LLM_ENDPOINT: "http://codegen-tgi"
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
HF_HOME: "/tmp/.cache/huggingface"
http_proxy:
https_proxy:
no_proxy:
LANGCHAIN_TRACING_V2: "false"
LANGCHAIN_API_KEY: insert-your-langchain-key-here
LANGCHAIN_PROJECT: "opea-llm-uservice"
---
# Source: codegen/charts/tgi/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: ConfigMap
metadata:
name: codegen-tgi-config
labels:
helm.sh/chart: tgi-0.8.0
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: codegen
app.kubernetes.io/version: "1.4"
app.kubernetes.io/managed-by: Helm
data:
MODEL_ID: "meta-llama/CodeLlama-7b-hf"
PORT: "2080"
HUGGING_FACE_HUB_TOKEN: "insert-your-huggingface-token-here"
HF_TOKEN: "insert-your-huggingface-token-here"
MAX_INPUT_TOKENS: "1024"
MAX_TOTAL_TOKENS: "4096"
http_proxy:
https_proxy:
no_proxy:
HABANA_LOGS: "/tmp/habana_logs"
NUMBA_CACHE_DIR: "/tmp"
TRANSFORMERS_CACHE: "/tmp/transformers_cache"
HF_HOME: "/tmp/.cache/huggingface"
---
# Source: codegen/charts/llm-uservice/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: Service
metadata:
name: codegen-tgi
labels:
helm.sh/chart: tgi-0.1.0
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: codegen
app.kubernetes.io/version: "1.4"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
ports:
- port: 80
targetPort: 80
protocol: TCP
name: tgi
selector:
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: codegen
---
apiVersion: v1
kind: Service
metadata:
name: codegen-llm-uservice
labels:
helm.sh/chart: llm-uservice-0.1.0
helm.sh/chart: llm-uservice-0.8.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: codegen
app.kubernetes.io/version: "1.0.0"
@@ -45,12 +78,41 @@ spec:
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: codegen
---
# Source: codegen/charts/tgi/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: Service
metadata:
name: codegen-tgi
labels:
helm.sh/chart: tgi-0.8.0
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: codegen
app.kubernetes.io/version: "1.4"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
ports:
- port: 80
targetPort: 2080
protocol: TCP
name: tgi
selector:
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: codegen
---
# Source: codegen/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: Service
metadata:
name: codegen
labels:
helm.sh/chart: codegen-0.1.0
helm.sh/chart: codegen-0.8.0
app.kubernetes.io/name: codegen
app.kubernetes.io/instance: codegen
app.kubernetes.io/version: "1.0.0"
@@ -66,59 +128,16 @@ spec:
app.kubernetes.io/name: codegen
app.kubernetes.io/instance: codegen
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: codegen-tgi
labels:
helm.sh/chart: tgi-0.1.0
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: codegen
app.kubernetes.io/version: "1.4"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: codegen
template:
metadata:
labels:
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: codegen
spec:
securityContext: {}
containers:
- name: tgi
env:
- name: MODEL_ID
value: ise-uiuc/Magicoder-S-DS-6.7B
- name: PORT
value: "80"
securityContext: {}
image: "ghcr.io/huggingface/text-generation-inference:1.4"
imagePullPolicy: IfNotPresent
volumeMounts:
- mountPath: /data
name: model-volume
ports:
- name: http
containerPort: 80
protocol: TCP
resources: {}
volumes:
- name: model-volume
hostPath:
path: /mnt
type: Directory
---
# Source: codegen/charts/llm-uservice/templates/deployment.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: codegen-llm-uservice
labels:
helm.sh/chart: llm-uservice-0.1.0
helm.sh/chart: llm-uservice-0.8.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: codegen
app.kubernetes.io/version: "1.0.0"
@@ -135,44 +154,112 @@ spec:
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: codegen
spec:
securityContext: {}
securityContext:
{}
containers:
- name: codegen
env:
- name: TGI_LLM_ENDPOINT
value: "http://codegen-tgi:80"
- name: HUGGINGFACEHUB_API_TOKEN
value: "insert-your-huggingface-token-here"
- name: http_proxy
value:
- name: https_proxy
value:
- name: no_proxy
value:
securityContext: {}
envFrom:
- configMapRef:
name: codegen-llm-uservice-config
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: false
runAsNonRoot: true
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "opea/llm-tgi:latest"
imagePullPolicy: IfNotPresent
ports:
- name: llm-uservice
containerPort: 9000
protocol: TCP
volumeMounts:
- mountPath: /tmp
name: tmp
startupProbe:
exec:
command:
- curl
- http://codegen-tgi:80
- curl
- http://codegen-tgi
initialDelaySeconds: 5
periodSeconds: 5
failureThreshold: 120
resources: {}
resources:
{}
volumes:
- name: tmp
emptyDir: {}
---
# Source: codegen/charts/tgi/templates/deployment.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: codegen-tgi
labels:
helm.sh/chart: tgi-0.8.0
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: codegen
app.kubernetes.io/version: "1.4"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: codegen
template:
metadata:
labels:
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: codegen
spec:
securityContext:
{}
containers:
- name: tgi
envFrom:
- configMapRef:
name: codegen-tgi-config
securityContext:
{}
image: "ghcr.io/huggingface/text-generation-inference:1.4"
imagePullPolicy: IfNotPresent
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /tmp
name: tmp
ports:
- name: http
containerPort: 2080
protocol: TCP
resources:
{}
volumes:
- name: model-volume
hostPath:
path: /mnt/opea-models
type: Directory
- name: tmp
emptyDir: {}
---
# Source: codegen/templates/deployment.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: codegen
labels:
helm.sh/chart: codegen-0.1.0
helm.sh/chart: codegen-0.8.0
app.kubernetes.io/name: codegen
app.kubernetes.io/instance: codegen
app.kubernetes.io/version: "1.0.0"
@@ -189,17 +276,52 @@ spec:
app.kubernetes.io/name: codegen
app.kubernetes.io/instance: codegen
spec:
securityContext: null
securityContext:
null
containers:
- name: codegen
env:
- name: LLM_SERVICE_HOST_IP
value: codegen-llm-uservice
securityContext: null
#- name: MEGA_SERVICE_PORT
# value: 7778
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "opea/codegen:latest"
imagePullPolicy: IfNotPresent
volumeMounts:
- mountPath: /tmp
name: tmp
ports:
- name: codegen
containerPort: 7778
protocol: TCP
resources: null
# startupProbe:
# httpGet:
# host: codegen-llm-uservice
# port: 9000
# path: /
# initialDelaySeconds: 5
# periodSeconds: 5
# failureThreshold: 120
# livenessProbe:
# httpGet:
# path: /
# port: 7778
# readinessProbe:
# httpGet:
# path: /
# port: 7778
resources:
null
volumes:
- name: tmp
emptyDir: {}

View File

@@ -12,7 +12,7 @@ IMAGE_TAG=${IMAGE_TAG:-latest}
function init_codegen() {
# executed under path manifest/codegen/xeon
# replace the mount dir "path: /mnt/model" with "path: $CHART_MOUNT"
find . -name '*.yaml' -type f -exec sed -i "s#path: /mnt#path: $MOUNT_DIR#g" {} \;
find . -name '*.yaml' -type f -exec sed -i "s#path: /mnt/opea-models#path: $MOUNT_DIR#g" {} \;
# replace megaservice image tag
find . -name '*.yaml' -type f -exec sed -i "s#image: opea/codegen:latest#image: opea/codegen:${IMAGE_TAG}#g" {} \;
# replace the repository "image: opea/*" with "image: $IMAGE_REPO/opea/"

View File

@@ -12,7 +12,7 @@ IMAGE_TAG=${IMAGE_TAG:-latest}
function init_codegen() {
# executed under path manifest/codegen/xeon
# replace the mount dir "path: /mnt/model" with "path: $CHART_MOUNT"
find . -name '*.yaml' -type f -exec sed -i "s#path: /mnt#path: $MOUNT_DIR#g" {} \;
find . -name '*.yaml' -type f -exec sed -i "s#path: /mnt/opea-models#path: $MOUNT_DIR#g" {} \;
# replace megaservice image tag
find . -name '*.yaml' -type f -exec sed -i "s#image: opea/codegen:latest#image: opea/codegen:${IMAGE_TAG}#g" {} \;
# replace the repository "image: opea/*" with "image: $IMAGE_REPO/opea/"