48 lines
2.7 KiB
YAML
48 lines
2.7 KiB
YAML
# Copyright (C) 2024 Intel Corporation
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
|
|
test_suite_config: # Overall configuration settings for the test suite
|
|
examples: ["codetrans"] # The specific test cases being tested, e.g., chatqna, codegen, codetrans, faqgen, audioqna, visualqna
|
|
deployment_type: "k8s" # Default is "k8s", can also be "docker"
|
|
service_ip: None # Leave as None for k8s, specify for Docker
|
|
service_port: None # Leave as None for k8s, specify for Docker
|
|
warm_ups: 0 # Number of test requests for warm-up
|
|
run_time: 60m # The max total run time for the test suite
|
|
seed: # The seed for all RNGs
|
|
user_queries: [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048] # Number of test requests at each concurrency level
|
|
query_timeout: 120 # Number of seconds to wait for a simulated user to complete any executing task before exiting. 120 sec by defeult.
|
|
random_prompt: false # Use random prompts if true, fixed prompts if false
|
|
collect_service_metric: false # Collect service metrics if true, do not collect service metrics if false
|
|
data_visualization: false # Generate data visualization if true, do not generate data visualization if false
|
|
llm_model: "HuggingFaceH4/mistral-7b-grok" # The LLM model used for the test
|
|
test_output_dir: "/home/sdp/benchmark_output" # The directory to store the test output
|
|
load_shape: # Tenant concurrency pattern
|
|
name: constant # poisson or constant(locust default load shape)
|
|
params: # Loadshape-specific parameters
|
|
constant: # Constant load shape specific parameters, activate only if load_shape.name is constant
|
|
concurrent_level: 4 # If user_queries is specified, concurrent_level is target number of requests per user. If not, it is the number of simulated users
|
|
# arrival_rate: 1.0 # Request arrival rate. If set, concurrent_level will be overridden, constant load will be generated based on arrival-rate
|
|
poisson: # Poisson load shape specific parameters, activate only if load_shape.name is poisson
|
|
arrival_rate: 1.0 # Request arrival rate
|
|
namespace: "" # Fill the user-defined namespace. Otherwise, it will be default.
|
|
|
|
test_cases:
|
|
codetrans:
|
|
llm:
|
|
run_test: true
|
|
service_name: "llm-svc" # Replace with your service name
|
|
parameters:
|
|
model_name: "HuggingFaceH4/mistral-7b-grok"
|
|
max_new_tokens: 128
|
|
temperature: 0.01
|
|
top_k: 10
|
|
top_p: 0.95
|
|
repetition_penalty: 1.03
|
|
stream: true
|
|
llmserve:
|
|
run_test: true
|
|
service_name: "codetrans-llm-svc" # Replace with your service name
|
|
e2e:
|
|
run_test: true
|
|
service_name: "codetrans-backend-server-svc" # Replace with your service name
|