[ChatQnA]Update manifests (#716)

* update manifests for v0.9
This commit is contained in:
Zhenzhong1
2024-09-03 15:24:54 +08:00
committed by GitHub
parent 8c40204eda
commit 3563f5db6b
4 changed files with 7 additions and 7 deletions

View File

@@ -15,7 +15,7 @@ data:
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
INDEX_NAME: rag-redis INDEX_NAME: rag-redis
HUGGINGFACEHUB_API_TOKEN: {HF_TOKEN} HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
EMBEDDING_SERVICE_HOST_IP: embedding-svc EMBEDDING_SERVICE_HOST_IP: embedding-svc
RETRIEVER_SERVICE_HOST_IP: retriever-svc RETRIEVER_SERVICE_HOST_IP: retriever-svc
RERANK_SERVICE_HOST_IP: reranking-svc RERANK_SERVICE_HOST_IP: reranking-svc

View File

@@ -35,9 +35,9 @@ spec:
- --model-id - --model-id
- $(LLM_MODEL_ID) - $(LLM_MODEL_ID)
- --max-input-length - --max-input-length
- '2048' - '1024'
- --max-total-tokens - --max-total-tokens
- '4096' - '2048'
- --max-batch-total-tokens - --max-batch-total-tokens
- '65536' - '65536'
- --max-batch-prefill-tokens - --max-batch-prefill-tokens

View File

@@ -35,9 +35,9 @@ spec:
- --model-id - --model-id
- $(LLM_MODEL_ID) - $(LLM_MODEL_ID)
- --max-input-length - --max-input-length
- '2048' - '1024'
- --max-total-tokens - --max-total-tokens
- '4096' - '2048'
- --max-batch-total-tokens - --max-batch-total-tokens
- '65536' - '65536'
- --max-batch-prefill-tokens - --max-batch-prefill-tokens

View File

@@ -35,9 +35,9 @@ spec:
- --model-id - --model-id
- $(LLM_MODEL_ID) - $(LLM_MODEL_ID)
- --max-input-length - --max-input-length
- '2048' - '1024'
- --max-total-tokens - --max-total-tokens
- '4096' - '2048'
- --max-batch-total-tokens - --max-batch-total-tokens
- '65536' - '65536'
- --max-batch-prefill-tokens - --max-batch-prefill-tokens