Files
GenAIExamples/CodeTrans/benchmark/performance/benchmark.yaml
2025-01-06 13:25:55 +08:00

48 lines
2.7 KiB
YAML

# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
test_suite_config: # Overall configuration settings for the test suite
examples: ["codetrans"] # The specific test cases being tested, e.g., chatqna, codegen, codetrans, faqgen, audioqna, visualqna
deployment_type: "k8s" # Default is "k8s", can also be "docker"
service_ip: None # Leave as None for k8s, specify for Docker
service_port: None # Leave as None for k8s, specify for Docker
warm_ups: 0 # Number of test requests for warm-up
run_time: 60m # The max total run time for the test suite
seed: # The seed for all RNGs
user_queries: [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048] # Number of test requests at each concurrency level
query_timeout: 120 # Number of seconds to wait for a simulated user to complete any executing task before exiting. 120 sec by defeult.
random_prompt: false # Use random prompts if true, fixed prompts if false
collect_service_metric: false # Collect service metrics if true, do not collect service metrics if false
data_visualization: false # Generate data visualization if true, do not generate data visualization if false
llm_model: "HuggingFaceH4/mistral-7b-grok" # The LLM model used for the test
test_output_dir: "/home/sdp/benchmark_output" # The directory to store the test output
load_shape: # Tenant concurrency pattern
name: constant # poisson or constant(locust default load shape)
params: # Loadshape-specific parameters
constant: # Constant load shape specific parameters, activate only if load_shape.name is constant
concurrent_level: 4 # If user_queries is specified, concurrent_level is target number of requests per user. If not, it is the number of simulated users
# arrival_rate: 1.0 # Request arrival rate. If set, concurrent_level will be overridden, constant load will be generated based on arrival-rate
poisson: # Poisson load shape specific parameters, activate only if load_shape.name is poisson
arrival_rate: 1.0 # Request arrival rate
namespace: "" # Fill the user-defined namespace. Otherwise, it will be default.
test_cases:
codetrans:
llm:
run_test: true
service_name: "llm-svc" # Replace with your service name
parameters:
model_name: "HuggingFaceH4/mistral-7b-grok"
max_new_tokens: 128
temperature: 0.01
top_k: 10
top_p: 0.95
repetition_penalty: 1.03
stream: true
llmserve:
run_test: true
service_name: "codetrans-llm-svc" # Replace with your service name
e2e:
run_test: true
service_name: "codetrans-backend-server-svc" # Replace with your service name