Update Kubernetes manifest files for CodeGen (#441)
Updated Kubernetes manifest files for CodeGen to reflect latest changes: Signed-off-by: Lianhao Lu <lianhao.lu@intel.com>
This commit is contained in:
@@ -3,7 +3,10 @@
|
||||
> [NOTE]
|
||||
> The following values must be set before you can deploy:
|
||||
> HUGGINGFACEHUB_API_TOKEN
|
||||
> You can also customize the "MODEL_ID" and "model-volume"
|
||||
|
||||
> You can also customize the "MODEL_ID" if needed.
|
||||
|
||||
> You need to make sure you have created the directory `/mnt/opea-models` to save the cached model on the node where the CodeGEn workload is running. Otherwise, you need to modify the `codegen.yaml` file to change the `model-volume` to a directory that exists on the node.
|
||||
|
||||
## Deploy On Xeon
|
||||
|
||||
@@ -25,11 +28,15 @@ kubectl apply -f codegen.yaml
|
||||
|
||||
## Verify Services
|
||||
|
||||
Make sure all the pods are running, and restart the codegen-xxxx pod if necessary.
|
||||
To verify the installation, run the command `kubectl get pod` to make sure all pods are running.
|
||||
|
||||
Then run the command `kubectl port-forward svc/codegen 7778:7778` to expose the CodeGEn service for access.
|
||||
|
||||
Open another terminal and run the following command to verify the service if working:
|
||||
|
||||
```
|
||||
kubectl get pods
|
||||
curl http://codegen:7778/v1/codegen -H "Content-Type: application/json" -d '{
|
||||
curl http://localhost:7778/v1/codegen -H "Content-Type: application/json" -d '{
|
||||
"messages": "Implement a high-level API for a TODO list application. The API takes as input an operation request and updates the TODO list in place. If the request is invalid, raise an exception."
|
||||
}'
|
||||
```
|
||||
|
||||
@@ -1,35 +1,68 @@
|
||||
---
|
||||
# Source: codegen/charts/llm-uservice/charts/tgi/templates/service.yaml
|
||||
# Source: codegen/charts/llm-uservice/templates/configmap.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: codegen-llm-uservice-config
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-0.8.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: codegen
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
TGI_LLM_ENDPOINT: "http://codegen-tgi"
|
||||
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
|
||||
HF_HOME: "/tmp/.cache/huggingface"
|
||||
http_proxy:
|
||||
https_proxy:
|
||||
no_proxy:
|
||||
LANGCHAIN_TRACING_V2: "false"
|
||||
LANGCHAIN_API_KEY: insert-your-langchain-key-here
|
||||
LANGCHAIN_PROJECT: "opea-llm-uservice"
|
||||
---
|
||||
# Source: codegen/charts/tgi/templates/configmap.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: codegen-tgi-config
|
||||
labels:
|
||||
helm.sh/chart: tgi-0.8.0
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: codegen
|
||||
app.kubernetes.io/version: "1.4"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
MODEL_ID: "meta-llama/CodeLlama-7b-hf"
|
||||
PORT: "2080"
|
||||
HUGGING_FACE_HUB_TOKEN: "insert-your-huggingface-token-here"
|
||||
HF_TOKEN: "insert-your-huggingface-token-here"
|
||||
MAX_INPUT_TOKENS: "1024"
|
||||
MAX_TOTAL_TOKENS: "4096"
|
||||
http_proxy:
|
||||
https_proxy:
|
||||
no_proxy:
|
||||
HABANA_LOGS: "/tmp/habana_logs"
|
||||
NUMBA_CACHE_DIR: "/tmp"
|
||||
TRANSFORMERS_CACHE: "/tmp/transformers_cache"
|
||||
HF_HOME: "/tmp/.cache/huggingface"
|
||||
---
|
||||
# Source: codegen/charts/llm-uservice/templates/service.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: codegen-tgi
|
||||
labels:
|
||||
helm.sh/chart: tgi-0.1.0
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: codegen
|
||||
app.kubernetes.io/version: "1.4"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 80
|
||||
targetPort: 80
|
||||
protocol: TCP
|
||||
name: tgi
|
||||
selector:
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: codegen
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: codegen-llm-uservice
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-0.1.0
|
||||
helm.sh/chart: llm-uservice-0.8.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: codegen
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
@@ -45,12 +78,41 @@ spec:
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: codegen
|
||||
---
|
||||
# Source: codegen/charts/tgi/templates/service.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: codegen-tgi
|
||||
labels:
|
||||
helm.sh/chart: tgi-0.8.0
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: codegen
|
||||
app.kubernetes.io/version: "1.4"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 80
|
||||
targetPort: 2080
|
||||
protocol: TCP
|
||||
name: tgi
|
||||
selector:
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: codegen
|
||||
---
|
||||
# Source: codegen/templates/service.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: codegen
|
||||
labels:
|
||||
helm.sh/chart: codegen-0.1.0
|
||||
helm.sh/chart: codegen-0.8.0
|
||||
app.kubernetes.io/name: codegen
|
||||
app.kubernetes.io/instance: codegen
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
@@ -66,61 +128,16 @@ spec:
|
||||
app.kubernetes.io/name: codegen
|
||||
app.kubernetes.io/instance: codegen
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: codegen-tgi
|
||||
labels:
|
||||
helm.sh/chart: tgi-0.1.0
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: codegen
|
||||
app.kubernetes.io/version: "1.4"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: codegen
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: codegen
|
||||
spec:
|
||||
securityContext: {}
|
||||
containers:
|
||||
- name: tgi
|
||||
env:
|
||||
- name: MODEL_ID
|
||||
value: ise-uiuc/Magicoder-S-DS-6.7B
|
||||
- name: PORT
|
||||
value: "80"
|
||||
securityContext: {}
|
||||
image: "ghcr.io/huggingface/tgi-gaudi:1.2.1"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: 80
|
||||
protocol: TCP
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
volumes:
|
||||
- name: model-volume
|
||||
hostPath:
|
||||
path: /mnt
|
||||
type: Directory
|
||||
---
|
||||
# Source: codegen/charts/llm-uservice/templates/deployment.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: codegen-llm-uservice
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-0.1.0
|
||||
helm.sh/chart: llm-uservice-0.8.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: codegen
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
@@ -137,44 +154,113 @@ spec:
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: codegen
|
||||
spec:
|
||||
securityContext: {}
|
||||
securityContext:
|
||||
{}
|
||||
containers:
|
||||
- name: codegen
|
||||
env:
|
||||
- name: TGI_LLM_ENDPOINT
|
||||
value: "http://codegen-tgi:80"
|
||||
- name: HUGGINGFACEHUB_API_TOKEN
|
||||
value: "insert-your-huggingface-token-here"
|
||||
- name: http_proxy
|
||||
value:
|
||||
- name: https_proxy
|
||||
value:
|
||||
- name: no_proxy
|
||||
value:
|
||||
|
||||
securityContext: {}
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: codegen-llm-uservice-config
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: false
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "opea/llm-tgi:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: llm-uservice
|
||||
containerPort: 9000
|
||||
protocol: TCP
|
||||
volumeMounts:
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
startupProbe:
|
||||
exec:
|
||||
command:
|
||||
- curl
|
||||
- http://codegen-tgi:80
|
||||
- curl
|
||||
- http://codegen-tgi
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
failureThreshold: 120
|
||||
resources: {}
|
||||
resources:
|
||||
{}
|
||||
volumes:
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
---
|
||||
# Source: codegen/charts/tgi/templates/deployment.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: codegen-tgi
|
||||
labels:
|
||||
helm.sh/chart: tgi-0.8.0
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: codegen
|
||||
app.kubernetes.io/version: "1.4"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: codegen
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: codegen
|
||||
spec:
|
||||
securityContext:
|
||||
{}
|
||||
containers:
|
||||
- name: tgi
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: codegen-tgi-config
|
||||
securityContext:
|
||||
{}
|
||||
image: "ghcr.io/huggingface/tgi-gaudi:2.0.1"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: 2080
|
||||
protocol: TCP
|
||||
resources:
|
||||
limits:
|
||||
habana.ai/gaudi: 1
|
||||
volumes:
|
||||
- name: model-volume
|
||||
hostPath:
|
||||
path: /mnt/opea-models
|
||||
type: Directory
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
---
|
||||
# Source: codegen/templates/deployment.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: codegen
|
||||
labels:
|
||||
helm.sh/chart: codegen-0.1.0
|
||||
helm.sh/chart: codegen-0.8.0
|
||||
app.kubernetes.io/name: codegen
|
||||
app.kubernetes.io/instance: codegen
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
@@ -191,17 +277,52 @@ spec:
|
||||
app.kubernetes.io/name: codegen
|
||||
app.kubernetes.io/instance: codegen
|
||||
spec:
|
||||
securityContext: null
|
||||
securityContext:
|
||||
null
|
||||
containers:
|
||||
- name: codegen
|
||||
env:
|
||||
- name: LLM_SERVICE_HOST_IP
|
||||
value: codegen-llm-uservice
|
||||
securityContext: null
|
||||
#- name: MEGA_SERVICE_PORT
|
||||
# value: 7778
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "opea/codegen:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
ports:
|
||||
- name: codegen
|
||||
containerPort: 7778
|
||||
protocol: TCP
|
||||
resources: null
|
||||
# startupProbe:
|
||||
# httpGet:
|
||||
# host: codegen-llm-uservice
|
||||
# port: 9000
|
||||
# path: /
|
||||
# initialDelaySeconds: 5
|
||||
# periodSeconds: 5
|
||||
# failureThreshold: 120
|
||||
# livenessProbe:
|
||||
# httpGet:
|
||||
# path: /
|
||||
# port: 7778
|
||||
# readinessProbe:
|
||||
# httpGet:
|
||||
# path: /
|
||||
# port: 7778
|
||||
resources:
|
||||
null
|
||||
volumes:
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
|
||||
@@ -1,35 +1,68 @@
|
||||
---
|
||||
# Source: codegen/charts/llm-uservice/charts/tgi/templates/service.yaml
|
||||
# Source: codegen/charts/llm-uservice/templates/configmap.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: codegen-llm-uservice-config
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-0.8.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: codegen
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
TGI_LLM_ENDPOINT: "http://codegen-tgi"
|
||||
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
|
||||
HF_HOME: "/tmp/.cache/huggingface"
|
||||
http_proxy:
|
||||
https_proxy:
|
||||
no_proxy:
|
||||
LANGCHAIN_TRACING_V2: "false"
|
||||
LANGCHAIN_API_KEY: insert-your-langchain-key-here
|
||||
LANGCHAIN_PROJECT: "opea-llm-uservice"
|
||||
---
|
||||
# Source: codegen/charts/tgi/templates/configmap.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: codegen-tgi-config
|
||||
labels:
|
||||
helm.sh/chart: tgi-0.8.0
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: codegen
|
||||
app.kubernetes.io/version: "1.4"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
data:
|
||||
MODEL_ID: "meta-llama/CodeLlama-7b-hf"
|
||||
PORT: "2080"
|
||||
HUGGING_FACE_HUB_TOKEN: "insert-your-huggingface-token-here"
|
||||
HF_TOKEN: "insert-your-huggingface-token-here"
|
||||
MAX_INPUT_TOKENS: "1024"
|
||||
MAX_TOTAL_TOKENS: "4096"
|
||||
http_proxy:
|
||||
https_proxy:
|
||||
no_proxy:
|
||||
HABANA_LOGS: "/tmp/habana_logs"
|
||||
NUMBA_CACHE_DIR: "/tmp"
|
||||
TRANSFORMERS_CACHE: "/tmp/transformers_cache"
|
||||
HF_HOME: "/tmp/.cache/huggingface"
|
||||
---
|
||||
# Source: codegen/charts/llm-uservice/templates/service.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: codegen-tgi
|
||||
labels:
|
||||
helm.sh/chart: tgi-0.1.0
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: codegen
|
||||
app.kubernetes.io/version: "1.4"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 80
|
||||
targetPort: 80
|
||||
protocol: TCP
|
||||
name: tgi
|
||||
selector:
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: codegen
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: codegen-llm-uservice
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-0.1.0
|
||||
helm.sh/chart: llm-uservice-0.8.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: codegen
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
@@ -45,12 +78,41 @@ spec:
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: codegen
|
||||
---
|
||||
# Source: codegen/charts/tgi/templates/service.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: codegen-tgi
|
||||
labels:
|
||||
helm.sh/chart: tgi-0.8.0
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: codegen
|
||||
app.kubernetes.io/version: "1.4"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 80
|
||||
targetPort: 2080
|
||||
protocol: TCP
|
||||
name: tgi
|
||||
selector:
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: codegen
|
||||
---
|
||||
# Source: codegen/templates/service.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: codegen
|
||||
labels:
|
||||
helm.sh/chart: codegen-0.1.0
|
||||
helm.sh/chart: codegen-0.8.0
|
||||
app.kubernetes.io/name: codegen
|
||||
app.kubernetes.io/instance: codegen
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
@@ -66,59 +128,16 @@ spec:
|
||||
app.kubernetes.io/name: codegen
|
||||
app.kubernetes.io/instance: codegen
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: codegen-tgi
|
||||
labels:
|
||||
helm.sh/chart: tgi-0.1.0
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: codegen
|
||||
app.kubernetes.io/version: "1.4"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: codegen
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: codegen
|
||||
spec:
|
||||
securityContext: {}
|
||||
containers:
|
||||
- name: tgi
|
||||
env:
|
||||
- name: MODEL_ID
|
||||
value: ise-uiuc/Magicoder-S-DS-6.7B
|
||||
- name: PORT
|
||||
value: "80"
|
||||
securityContext: {}
|
||||
image: "ghcr.io/huggingface/text-generation-inference:1.4"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: 80
|
||||
protocol: TCP
|
||||
resources: {}
|
||||
volumes:
|
||||
- name: model-volume
|
||||
hostPath:
|
||||
path: /mnt
|
||||
type: Directory
|
||||
---
|
||||
# Source: codegen/charts/llm-uservice/templates/deployment.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: codegen-llm-uservice
|
||||
labels:
|
||||
helm.sh/chart: llm-uservice-0.1.0
|
||||
helm.sh/chart: llm-uservice-0.8.0
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: codegen
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
@@ -135,44 +154,112 @@ spec:
|
||||
app.kubernetes.io/name: llm-uservice
|
||||
app.kubernetes.io/instance: codegen
|
||||
spec:
|
||||
securityContext: {}
|
||||
securityContext:
|
||||
{}
|
||||
containers:
|
||||
- name: codegen
|
||||
env:
|
||||
- name: TGI_LLM_ENDPOINT
|
||||
value: "http://codegen-tgi:80"
|
||||
- name: HUGGINGFACEHUB_API_TOKEN
|
||||
value: "insert-your-huggingface-token-here"
|
||||
- name: http_proxy
|
||||
value:
|
||||
- name: https_proxy
|
||||
value:
|
||||
- name: no_proxy
|
||||
value:
|
||||
|
||||
securityContext: {}
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: codegen-llm-uservice-config
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: false
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "opea/llm-tgi:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: llm-uservice
|
||||
containerPort: 9000
|
||||
protocol: TCP
|
||||
volumeMounts:
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
startupProbe:
|
||||
exec:
|
||||
command:
|
||||
- curl
|
||||
- http://codegen-tgi:80
|
||||
- curl
|
||||
- http://codegen-tgi
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
failureThreshold: 120
|
||||
resources: {}
|
||||
resources:
|
||||
{}
|
||||
volumes:
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
---
|
||||
# Source: codegen/charts/tgi/templates/deployment.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: codegen-tgi
|
||||
labels:
|
||||
helm.sh/chart: tgi-0.8.0
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: codegen
|
||||
app.kubernetes.io/version: "1.4"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: codegen
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: tgi
|
||||
app.kubernetes.io/instance: codegen
|
||||
spec:
|
||||
securityContext:
|
||||
{}
|
||||
containers:
|
||||
- name: tgi
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: codegen-tgi-config
|
||||
securityContext:
|
||||
{}
|
||||
image: "ghcr.io/huggingface/text-generation-inference:1.4"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: model-volume
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: 2080
|
||||
protocol: TCP
|
||||
resources:
|
||||
{}
|
||||
volumes:
|
||||
- name: model-volume
|
||||
hostPath:
|
||||
path: /mnt/opea-models
|
||||
type: Directory
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
---
|
||||
# Source: codegen/templates/deployment.yaml
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: codegen
|
||||
labels:
|
||||
helm.sh/chart: codegen-0.1.0
|
||||
helm.sh/chart: codegen-0.8.0
|
||||
app.kubernetes.io/name: codegen
|
||||
app.kubernetes.io/instance: codegen
|
||||
app.kubernetes.io/version: "1.0.0"
|
||||
@@ -189,17 +276,52 @@ spec:
|
||||
app.kubernetes.io/name: codegen
|
||||
app.kubernetes.io/instance: codegen
|
||||
spec:
|
||||
securityContext: null
|
||||
securityContext:
|
||||
null
|
||||
containers:
|
||||
- name: codegen
|
||||
env:
|
||||
- name: LLM_SERVICE_HOST_IP
|
||||
value: codegen-llm-uservice
|
||||
securityContext: null
|
||||
#- name: MEGA_SERVICE_PORT
|
||||
# value: 7778
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "opea/codegen:latest"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
ports:
|
||||
- name: codegen
|
||||
containerPort: 7778
|
||||
protocol: TCP
|
||||
resources: null
|
||||
# startupProbe:
|
||||
# httpGet:
|
||||
# host: codegen-llm-uservice
|
||||
# port: 9000
|
||||
# path: /
|
||||
# initialDelaySeconds: 5
|
||||
# periodSeconds: 5
|
||||
# failureThreshold: 120
|
||||
# livenessProbe:
|
||||
# httpGet:
|
||||
# path: /
|
||||
# port: 7778
|
||||
# readinessProbe:
|
||||
# httpGet:
|
||||
# path: /
|
||||
# port: 7778
|
||||
resources:
|
||||
null
|
||||
volumes:
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
|
||||
@@ -12,7 +12,7 @@ IMAGE_TAG=${IMAGE_TAG:-latest}
|
||||
function init_codegen() {
|
||||
# executed under path manifest/codegen/xeon
|
||||
# replace the mount dir "path: /mnt/model" with "path: $CHART_MOUNT"
|
||||
find . -name '*.yaml' -type f -exec sed -i "s#path: /mnt#path: $MOUNT_DIR#g" {} \;
|
||||
find . -name '*.yaml' -type f -exec sed -i "s#path: /mnt/opea-models#path: $MOUNT_DIR#g" {} \;
|
||||
# replace megaservice image tag
|
||||
find . -name '*.yaml' -type f -exec sed -i "s#image: opea/codegen:latest#image: opea/codegen:${IMAGE_TAG}#g" {} \;
|
||||
# replace the repository "image: opea/*" with "image: $IMAGE_REPO/opea/"
|
||||
|
||||
@@ -12,7 +12,7 @@ IMAGE_TAG=${IMAGE_TAG:-latest}
|
||||
function init_codegen() {
|
||||
# executed under path manifest/codegen/xeon
|
||||
# replace the mount dir "path: /mnt/model" with "path: $CHART_MOUNT"
|
||||
find . -name '*.yaml' -type f -exec sed -i "s#path: /mnt#path: $MOUNT_DIR#g" {} \;
|
||||
find . -name '*.yaml' -type f -exec sed -i "s#path: /mnt/opea-models#path: $MOUNT_DIR#g" {} \;
|
||||
# replace megaservice image tag
|
||||
find . -name '*.yaml' -type f -exec sed -i "s#image: opea/codegen:latest#image: opea/codegen:${IMAGE_TAG}#g" {} \;
|
||||
# replace the repository "image: opea/*" with "image: $IMAGE_REPO/opea/"
|
||||
|
||||
Reference in New Issue
Block a user