69 lines
3.2 KiB
YAML
69 lines
3.2 KiB
YAML
# Copyright (C) 2024 Intel Corporation
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
|
|
test_suite_config: # Overall configuration settings for the test suite
|
|
examples: ["chatqna"] # The specific test cases being tested, e.g., chatqna, codegen, codetrans, faqgen, audioqna, visualqna
|
|
deployment_type: ${DEPLOYMENT_TYPE} # Default is "k8s", can also be "docker"
|
|
service_ip: ${SERVICE_IP} # Leave as None for k8s, specify for Docker
|
|
service_port: ${SERVICE_PORT} # Leave as None for k8s, specify for Docker
|
|
warm_ups: ${WARMUP} # Number of test requests for warm-up
|
|
run_time: 60m # The max total run time for the test suite
|
|
seed: # The seed for all RNGs
|
|
user_queries: ${USER_QUERIES} # Number of test requests at each concurrency level
|
|
query_timeout: 120 # Number of seconds to wait for a simulated user to complete any executing task before exiting. 120 sec by defeult.
|
|
random_prompt: false # Use random prompts if true, fixed prompts if false
|
|
collect_service_metric: false # Collect service metrics if true, do not collect service metrics if false
|
|
data_visualization: false # Generate data visualization if true, do not generate data visualization if false
|
|
llm_model: "Intel/neural-chat-7b-v3-3" # The LLM model used for the test
|
|
test_output_dir: "${TEST_OUTPUT_DIR}" # The directory to store the test output
|
|
load_shape: # Tenant concurrency pattern
|
|
name: ${LOAD_SHAPE} # poisson or constant(locust default load shape)
|
|
params: # Loadshape-specific parameters
|
|
constant: # Constant load shape specific parameters, activate only if load_shape.name is constant
|
|
concurrent_level: ${CONCURRENT_LEVEL} # If user_queries is specified, concurrent_level is target number of requests per user. If not, it is the number of simulated users
|
|
poisson: # Poisson load shape specific parameters, activate only if load_shape.name is poisson
|
|
arrival_rate: ${ARRIVAL_RATE} # Request arrival rate
|
|
|
|
test_cases:
|
|
chatqna:
|
|
embedding:
|
|
run_test: false
|
|
service_name: "chatqna-embedding-usvc" # Replace with your service name
|
|
embedserve:
|
|
run_test: false
|
|
service_name: "chatqna-tei" # Replace with your service name
|
|
retriever:
|
|
run_test: false
|
|
service_name: "chatqna-retriever-usvc" # Replace with your service name
|
|
parameters:
|
|
search_type: "similarity"
|
|
k: 1
|
|
fetch_k: 20
|
|
lambda_mult: 0.5
|
|
score_threshold: 0.2
|
|
reranking:
|
|
run_test: false
|
|
service_name: "chatqna-reranking-usvc" # Replace with your service name
|
|
parameters:
|
|
top_n: 1
|
|
rerankserve:
|
|
run_test: false
|
|
service_name: "chatqna-teirerank" # Replace with your service name
|
|
llm:
|
|
run_test: false
|
|
service_name: "chatqna-llm-uservice" # Replace with your service name
|
|
parameters:
|
|
max_tokens: 128
|
|
temperature: 0.01
|
|
top_k: 10
|
|
top_p: 0.95
|
|
repetition_penalty: 1.03
|
|
stream: true
|
|
llmserve:
|
|
run_test: false
|
|
service_name: "chatqna-tgi" # Replace with your service name
|
|
e2e:
|
|
run_test: true
|
|
service_name: "chatqna" # Replace with your service name
|
|
k: 1
|