Compare commits
11 Commits
Fix-sec
...
refactor_b
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
97d277cd1d | ||
|
|
3f918422c9 | ||
|
|
53e15bfb79 | ||
|
|
bbe649c44c | ||
|
|
6e26d4615a | ||
|
|
500fcdb975 | ||
|
|
4825420f04 | ||
|
|
78a1efd7f0 | ||
|
|
9b9314b062 | ||
|
|
8b85e8c793 | ||
|
|
eba1c300b3 |
90
ChatQnA/chatqna.yaml
Normal file
90
ChatQnA/chatqna.yaml
Normal file
@@ -0,0 +1,90 @@
|
||||
# Copyright (C) 2025 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
deploy:
|
||||
device: gaudi
|
||||
version: 1.1.0
|
||||
modelUseHostPath: /mnt/models
|
||||
HUGGINGFACEHUB_API_TOKEN: ""
|
||||
node: [1, 2, 4]
|
||||
namespace: "default"
|
||||
cards_per_node: 8
|
||||
|
||||
services:
|
||||
backend:
|
||||
instance_num: [2, 2, 4]
|
||||
cores_per_instance: ""
|
||||
memory_capacity: ""
|
||||
|
||||
teirerank:
|
||||
enabled: True
|
||||
model_id: ""
|
||||
instance_num: [1, 1, 1]
|
||||
cards_per_instance: 1
|
||||
|
||||
tei:
|
||||
model_id: ""
|
||||
instance_num: [1, 2, 4]
|
||||
cores_per_instance: ""
|
||||
memory_capacity: ""
|
||||
|
||||
llm:
|
||||
engine: tgi
|
||||
model_id: ""
|
||||
instance_num: [7, 15, 31]
|
||||
max_batch_size: [1, 2, 4, 8]
|
||||
max_input_length: ""
|
||||
max_total_tokens: ""
|
||||
max_batch_total_tokens: ""
|
||||
max_batch_prefill_tokens: ""
|
||||
cards_per_instance: 1
|
||||
|
||||
data-prep:
|
||||
instance_num: [1, 1, 1]
|
||||
cores_per_instance: ""
|
||||
memory_capacity: ""
|
||||
|
||||
retriever-usvc:
|
||||
instance_num: [2, 2, 4]
|
||||
cores_per_instance: ""
|
||||
memory_capacity: ""
|
||||
|
||||
redis-vector-db:
|
||||
instance_num: [1, 1, 1]
|
||||
cores_per_instance: ""
|
||||
memory_capacity: ""
|
||||
|
||||
chatqna-ui:
|
||||
instance_num: [1, 1, 1]
|
||||
|
||||
nginx:
|
||||
instance_num: [1, 1, 1]
|
||||
|
||||
benchmark:
|
||||
# http request behavior related fields
|
||||
concurrency: [1, 2, 4]
|
||||
totoal_query_num: [2048, 4096]
|
||||
duration: [5, 10] # unit minutes
|
||||
query_num_per_concurrency: [4, 8, 16]
|
||||
possion: True
|
||||
possion_arrival_rate: 1.0
|
||||
warmup_iterations: 10
|
||||
seed: 1024
|
||||
|
||||
# dataset relted fields
|
||||
dataset: pub_med10 # [dummy_english, dummy_chinese, pub_med100] predefined keywords for supported dataset
|
||||
user_queries: [1, 2, 4]
|
||||
query_token_size: 128 # if specified, means fixed query token size will be sent out
|
||||
|
||||
# advance settings in each component which will impact perf.
|
||||
dataprep: # not target this time
|
||||
chunk_size: [1024]
|
||||
chunk_overlap: [1000]
|
||||
retriever: # not target this time
|
||||
algo: IVF
|
||||
fetch_k: 2
|
||||
k: 1
|
||||
rerank:
|
||||
top_n: 2
|
||||
llm:
|
||||
max_token_size: 128 # specify the output token size
|
||||
1134
deploy_and_benchmark.py
Normal file
1134
deploy_and_benchmark.py
Normal file
File diff suppressed because it is too large
Load Diff
9
requirements.txt
Normal file
9
requirements.txt
Normal file
@@ -0,0 +1,9 @@
|
||||
kubernetes
|
||||
locust
|
||||
numpy
|
||||
opea-eval
|
||||
pytest
|
||||
pyyaml
|
||||
requests
|
||||
sseclient-py
|
||||
transformers
|
||||
Reference in New Issue
Block a user