Compare commits
11 Commits
main
...
refactor_b
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
97d277cd1d | ||
|
|
3f918422c9 | ||
|
|
53e15bfb79 | ||
|
|
bbe649c44c | ||
|
|
6e26d4615a | ||
|
|
500fcdb975 | ||
|
|
4825420f04 | ||
|
|
78a1efd7f0 | ||
|
|
9b9314b062 | ||
|
|
8b85e8c793 | ||
|
|
eba1c300b3 |
90
ChatQnA/chatqna.yaml
Normal file
90
ChatQnA/chatqna.yaml
Normal file
@@ -0,0 +1,90 @@
|
|||||||
|
# Copyright (C) 2025 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
deploy:
|
||||||
|
device: gaudi
|
||||||
|
version: 1.1.0
|
||||||
|
modelUseHostPath: /mnt/models
|
||||||
|
HUGGINGFACEHUB_API_TOKEN: ""
|
||||||
|
node: [1, 2, 4]
|
||||||
|
namespace: "default"
|
||||||
|
cards_per_node: 8
|
||||||
|
|
||||||
|
services:
|
||||||
|
backend:
|
||||||
|
instance_num: [2, 2, 4]
|
||||||
|
cores_per_instance: ""
|
||||||
|
memory_capacity: ""
|
||||||
|
|
||||||
|
teirerank:
|
||||||
|
enabled: True
|
||||||
|
model_id: ""
|
||||||
|
instance_num: [1, 1, 1]
|
||||||
|
cards_per_instance: 1
|
||||||
|
|
||||||
|
tei:
|
||||||
|
model_id: ""
|
||||||
|
instance_num: [1, 2, 4]
|
||||||
|
cores_per_instance: ""
|
||||||
|
memory_capacity: ""
|
||||||
|
|
||||||
|
llm:
|
||||||
|
engine: tgi
|
||||||
|
model_id: ""
|
||||||
|
instance_num: [7, 15, 31]
|
||||||
|
max_batch_size: [1, 2, 4, 8]
|
||||||
|
max_input_length: ""
|
||||||
|
max_total_tokens: ""
|
||||||
|
max_batch_total_tokens: ""
|
||||||
|
max_batch_prefill_tokens: ""
|
||||||
|
cards_per_instance: 1
|
||||||
|
|
||||||
|
data-prep:
|
||||||
|
instance_num: [1, 1, 1]
|
||||||
|
cores_per_instance: ""
|
||||||
|
memory_capacity: ""
|
||||||
|
|
||||||
|
retriever-usvc:
|
||||||
|
instance_num: [2, 2, 4]
|
||||||
|
cores_per_instance: ""
|
||||||
|
memory_capacity: ""
|
||||||
|
|
||||||
|
redis-vector-db:
|
||||||
|
instance_num: [1, 1, 1]
|
||||||
|
cores_per_instance: ""
|
||||||
|
memory_capacity: ""
|
||||||
|
|
||||||
|
chatqna-ui:
|
||||||
|
instance_num: [1, 1, 1]
|
||||||
|
|
||||||
|
nginx:
|
||||||
|
instance_num: [1, 1, 1]
|
||||||
|
|
||||||
|
benchmark:
|
||||||
|
# http request behavior related fields
|
||||||
|
concurrency: [1, 2, 4]
|
||||||
|
totoal_query_num: [2048, 4096]
|
||||||
|
duration: [5, 10] # unit minutes
|
||||||
|
query_num_per_concurrency: [4, 8, 16]
|
||||||
|
possion: True
|
||||||
|
possion_arrival_rate: 1.0
|
||||||
|
warmup_iterations: 10
|
||||||
|
seed: 1024
|
||||||
|
|
||||||
|
# dataset relted fields
|
||||||
|
dataset: pub_med10 # [dummy_english, dummy_chinese, pub_med100] predefined keywords for supported dataset
|
||||||
|
user_queries: [1, 2, 4]
|
||||||
|
query_token_size: 128 # if specified, means fixed query token size will be sent out
|
||||||
|
|
||||||
|
# advance settings in each component which will impact perf.
|
||||||
|
dataprep: # not target this time
|
||||||
|
chunk_size: [1024]
|
||||||
|
chunk_overlap: [1000]
|
||||||
|
retriever: # not target this time
|
||||||
|
algo: IVF
|
||||||
|
fetch_k: 2
|
||||||
|
k: 1
|
||||||
|
rerank:
|
||||||
|
top_n: 2
|
||||||
|
llm:
|
||||||
|
max_token_size: 128 # specify the output token size
|
||||||
1134
deploy_and_benchmark.py
Normal file
1134
deploy_and_benchmark.py
Normal file
File diff suppressed because it is too large
Load Diff
9
requirements.txt
Normal file
9
requirements.txt
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
kubernetes
|
||||||
|
locust
|
||||||
|
numpy
|
||||||
|
opea-eval
|
||||||
|
pytest
|
||||||
|
pyyaml
|
||||||
|
requests
|
||||||
|
sseclient-py
|
||||||
|
transformers
|
||||||
Reference in New Issue
Block a user