Files
GenAIExamples/ChatQnA/benchmark/two_gaudi/embedding-dependency_run.yaml
Zhenzhong1 b61c2abd61 upload manifests (#454)
* uploadded manifests

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2024-07-26 20:12:54 +08:00

68 lines
1.5 KiB
YAML

# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: embedding-dependency-deploy
namespace: default
spec:
replicas: 10
selector:
matchLabels:
app: embedding-dependency-deploy
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: embedding-dependency-deploy
spec:
containers:
- envFrom:
- configMapRef:
name: qna-config
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2
name: embedding-dependency-deploy
args:
- --model-id
- $(EMBEDDING_MODEL_ID)
- --auto-truncate
volumeMounts:
- mountPath: /data
name: model-volume
- mountPath: /dev/shm
name: shm
ports:
- containerPort: 80
resources:
limits:
cpu: 24
memory: 4000Mi
requests:
cpu: 24
memory: 4000Mi
serviceAccountName: default
volumes:
- name: model-volume
hostPath:
path: /home/sdp/cesg
type: Directory
- name: shm
emptyDir:
medium: Memory
sizeLimit: 1Gi
---
kind: Service
apiVersion: v1
metadata:
name: embedding-dependency-svc
spec:
type: ClusterIP
selector:
app: embedding-dependency-deploy
ports:
- name: service
port: 6006
targetPort: 80