Files
GenAIExamples/CodeTrans/kubernetes/manifests/gaudi/codetrans.yaml
2024-08-29 17:32:30 +08:00

410 lines
10 KiB
YAML

---
# Source: codetrans/charts/llm-uservice/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: ConfigMap
metadata:
name: codetrans-llm-uservice-config
labels:
helm.sh/chart: llm-uservice-0.8.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: codetrans
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/managed-by: Helm
data:
TGI_LLM_ENDPOINT: "http://codetrans-tgi"
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
HF_HOME: "/tmp/.cache/huggingface"
http_proxy: ""
https_proxy: ""
no_proxy: ""
---
# Source: codetrans/charts/tgi/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: ConfigMap
metadata:
name: codetrans-tgi-config
labels:
helm.sh/chart: tgi-0.8.0
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: codetrans
app.kubernetes.io/version: "2.1.0"
app.kubernetes.io/managed-by: Helm
data:
MODEL_ID: "HuggingFaceH4/mistral-7b-grok"
PORT: "2080"
HF_TOKEN: "insert-your-huggingface-token-here"
http_proxy: ""
https_proxy: ""
no_proxy: ""
HABANA_LOGS: "/tmp/habana_logs"
NUMBA_CACHE_DIR: "/tmp"
TRANSFORMERS_CACHE: "/tmp/transformers_cache"
HF_HOME: "/tmp/.cache/huggingface"
MAX_INPUT_LENGTH: "1024"
MAX_TOTAL_TOKENS: "2048"
---
# Source: codetrans/charts/llm-uservice/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: Service
metadata:
name: codetrans-llm-uservice
labels:
helm.sh/chart: llm-uservice-0.8.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: codetrans
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
ports:
- port: 9000
targetPort: 9000
protocol: TCP
name: llm-uservice
selector:
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: codetrans
---
# Source: codetrans/charts/tgi/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: Service
metadata:
name: codetrans-tgi
labels:
helm.sh/chart: tgi-0.8.0
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: codetrans
app.kubernetes.io/version: "2.1.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
ports:
- port: 80
targetPort: 2080
protocol: TCP
name: tgi
selector:
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: codetrans
---
# Source: codetrans/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: Service
metadata:
name: codetrans
labels:
helm.sh/chart: codetrans-0.8.0
app.kubernetes.io/name: codetrans
app.kubernetes.io/instance: codetrans
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
ports:
- port: 7777
targetPort: 7777
protocol: TCP
name: codetrans
selector:
app.kubernetes.io/name: codetrans
app.kubernetes.io/instance: codetrans
---
# Source: codetrans/charts/llm-uservice/templates/deployment.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: codetrans-llm-uservice
labels:
helm.sh/chart: llm-uservice-0.8.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: codetrans
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: codetrans
template:
metadata:
labels:
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: codetrans
spec:
securityContext:
{}
containers:
- name: codetrans
envFrom:
- configMapRef:
name: codetrans-llm-uservice-config
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: false
runAsNonRoot: true
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "opea/llm-tgi:latest"
imagePullPolicy: IfNotPresent
ports:
- name: llm-uservice
containerPort: 9000
protocol: TCP
volumeMounts:
- mountPath: /tmp
name: tmp
livenessProbe:
failureThreshold: 24
httpGet:
path: v1/health_check
port: llm-uservice
initialDelaySeconds: 5
periodSeconds: 5
readinessProbe:
httpGet:
path: v1/health_check
port: llm-uservice
initialDelaySeconds: 5
periodSeconds: 5
startupProbe:
failureThreshold: 120
httpGet:
path: v1/health_check
port: llm-uservice
initialDelaySeconds: 5
periodSeconds: 5
resources:
{}
volumes:
- name: tmp
emptyDir: {}
---
# Source: codetrans/charts/tgi/templates/deployment.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: codetrans-tgi
labels:
helm.sh/chart: tgi-0.8.0
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: codetrans
app.kubernetes.io/version: "2.1.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: codetrans
template:
metadata:
labels:
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: codetrans
spec:
securityContext:
{}
containers:
- name: tgi
envFrom:
- configMapRef:
name: codetrans-tgi-config
securityContext:
{}
image: "ghcr.io/huggingface/tgi-gaudi:2.0.1"
imagePullPolicy: IfNotPresent
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /tmp
name: tmp
ports:
- name: http
containerPort: 2080
protocol: TCP
livenessProbe:
failureThreshold: 24
initialDelaySeconds: 5
periodSeconds: 5
tcpSocket:
port: http
readinessProbe:
initialDelaySeconds: 5
periodSeconds: 5
tcpSocket:
port: http
startupProbe:
failureThreshold: 120
initialDelaySeconds: 5
periodSeconds: 5
tcpSocket:
port: http
resources:
limits:
habana.ai/gaudi: 1
volumes:
- name: model-volume
hostPath:
path: /mnt/opea-models
type: Directory
- name: tmp
emptyDir: {}
---
# Source: codetrans/templates/deployment.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: codetrans
labels:
helm.sh/chart: codetrans-0.8.0
app.kubernetes.io/name: codetrans
app.kubernetes.io/instance: codetrans
app.kubernetes.io/version: "v0.8"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: codetrans
app.kubernetes.io/instance: codetrans
template:
metadata:
labels:
app.kubernetes.io/name: codetrans
app.kubernetes.io/instance: codetrans
spec:
securityContext:
null
containers:
- name: codetrans
env:
- name: LLM_SERVICE_HOST_IP
value: codetrans-llm-uservice
#- name: MEGA_SERVICE_PORT
# value: 7777
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "opea/codetrans:latest"
imagePullPolicy: IfNotPresent
volumeMounts:
- mountPath: /tmp
name: tmp
ports:
- name: codetrans
containerPort: 7777
protocol: TCP
resources:
null
volumes:
- name: tmp
emptyDir: {}
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: ConfigMap
metadata:
name: codetrans-nginx-config
data:
default.conf: |
server {
listen 80;
listen [::]:80;
location / {
root /usr/share/nginx/html;
index index.html index.htm;
}
location /v1/codetrans {
proxy_pass http://codetrans:7777;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
}
}
---
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: codetrans-nginx-deployment
spec:
replicas: 1
selector:
matchLabels:
app: nginx
template:
metadata:
labels:
app: nginx
spec:
containers:
- name: nginx
image: nginx:latest
ports:
- containerPort: 80
volumeMounts:
- name: nginx-config-volume
mountPath: /etc/nginx/conf.d/default.conf
subPath: default.conf
volumes:
- name: nginx-config-volume
configMap:
name: codetrans-nginx-config
---
kind: Service
apiVersion: v1
metadata:
name: nginx-svc
spec:
selector:
app: nginx
ports:
- protocol: TCP
port: 80
targetPort: 80
nodePort: 30789
type: NodePort