microsoft · LiliDeng · Oct 28, 2025 · Copilot · Dec 10, 2025 · Copilot
@@ -959,6 +959,95 @@ Example of log_agent notifier:
 The AI analysis results are stored in the test result message's ``analysis["AI"]``
 field and can be consumed by other notifiers like HTML or custom reporting systems.
 
+perfevaluation
+^^^^^^^^^^^^^^
+
+Evaluates performance test results against predefined criteria and optionally fails tests when targets are not met.
+
+**Basic Usage:**
+
+.. code:: yaml
+
+   notifier:
+     - type: perfevaluation
+       criteria_file: "perf_criteria.yml"
+       output_file: "results.json"
+       fail_test_on_performance_failure: true
+
+**Parameters:**
+
+criteria_file
+'''''''''''''
+type: str, optional, default: "*_criteria.yml"
+
+Path or glob pattern to YAML files containing performance criteria.
+
+criteria
+''''''''
+type: dict, optional, default: None
+
+Direct criteria definition in runbook. Takes priority over criteria_file.
+
+output_file
+'''''''''''
+type: str, optional, default: None
+
+Output path for detailed evaluation results in JSON format.
+
+fail_test_on_performance_failure
+''''''''''''''''''''''''''''''''
+type: bool, optional, default: False
+
+Mark tests as failed when performance criteria are not met.
+
+**YAML Criteria Format:**
+
+Hierarchical format with groups and conditions:
+
+.. code:: yaml
+
+   # Global settings
+   statistics_times: 3
+   error_threshold: 0.1
+   statistics_type: average
+
+   groups:
+     - name: "Storage Performance"
+       conditions:
+         - name: "test_case"
+           type: "metadata"
+           value: "*fio*"
+         - name: "vm_size"
+           type: "information"
+           value: "Standard_D*"
+
+       metrics:
+         - name: "IOPS_Read"
+           min_value: 1000
+           target_value: 5000
+           error_threshold: 0.10
+
+**Metric Properties:**
+
+- ``min_value``: Minimum acceptable value
+- ``max_value``: Maximum acceptable value
+- ``target_value``: Expected target value
+- ``error_threshold``: Acceptable deviation from target (as decimal, e.g., 0.10 = 10%)
+
+**Pattern Matching:**
+
+Uses fnmatch patterns:
+
+- ``Standard_D*``: All D-series VMs
+- ``*fio*``: Test cases containing "fio"
+- ``Standard_L??s_v2``: L-series with specific patterns
+
+**Condition Types:**
+
+- ``test_case``: Match test case name
+- ``vm_size``: Match VM size
+- All conditions must match (AND logic)
+
 environment
 ~~~~~~~~~~~
 

@@ -15,6 +15,7 @@
 import lisa.notifiers.file  # noqa: F401
 import lisa.notifiers.junit  # noqa: F401
 import lisa.notifiers.perfdump  # noqa: F401
+import lisa.notifiers.perfevaluation.perfevaluation  # noqa: F401
 import lisa.notifiers.text_result  # noqa: F401
 import lisa.runners.lisa_runner  # noqa: F401
 import lisa.sut_orchestrator.ready  # noqa: F401

@@ -0,0 +1,14 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+"""
+Performance Evaluation Notifier Package
+
+This package provides performance evaluation capabilities for LISA tests,
+including criteria validation and test result modification based on
+performance metrics.
+"""
+
+__all__ = ["PerfEvaluation", "PerfEvaluationSchema", "MetricCriteria"]
+
+from .perfevaluation import MetricCriteria, PerfEvaluation, PerfEvaluationSchema
@@ -0,0 +1,180 @@
+# Performance evaluation criteria for NVMe storage tests
+# Global configuration
+statistics_times: 3  # Default: run each test 3 times to calculate statistics
+error_threshold: 0.1  # Default: 10% tolerance
+statistics_type: average  # Default statistics method
+
+# Test suite groups
+groups:
+  - name: "NVMe Performance - L64s_v2 Specific"
+    description: "Performance criteria for Standard_L64s_v2 VM"
+    error_threshold: 0.20
+    statistics_type: average
+    statistics_times: 1
+
+    conditions:
+      - name: "test_case"
+        type: "metadata"
+        value: "perf_nvme"
+      - name: "vm_size"
+        type: "information"
+        value: "Standard_L64s_v2"
+
+    metrics:
+      # 32 cores, 8 disks performance criteria for L64s_v2
+      - name: "qdepth_32_iodepth_1_numjob_32_setup_raw_bs_4k_cores_32_disks_8_read_iops"
+        min_value: 800000.0
+        target_value: 1033000.0
+        error_threshold: 0.25
+
+      - name: "qdepth_32_iodepth_1_numjob_32_setup_raw_bs_4k_cores_32_disks_8_write_iops"
+        min_value: 750000.0
+        target_value: 950000.0
+        error_threshold: 0.25
+
+      - name: "qdepth_32_iodepth_1_numjob_32_setup_raw_bs_4k_cores_32_disks_8_randread_iops"
+        min_value: 500000.0
+        target_value: 1200000.0
+        error_threshold: 0.30
+
+      - name: "qdepth_32_iodepth_1_numjob_32_setup_raw_bs_4k_cores_32_disks_8_randwrite_iops"
+        min_value: 150000.0
+        target_value: 650000.0
+        error_threshold: 0.30
+
+      - name: "qdepth_32_iodepth_1_numjob_32_setup_raw_bs_4k_cores_32_disks_8_read_latency"
+        max_value: 50.0
+        target_value: 10
+        error_threshold: 0.30
+
+      - name: "qdepth_32_iodepth_1_numjob_32_setup_raw_bs_4k_cores_32_disks_8_write_latency"
+        max_value: 60.0
+        target_value: 10
+        error_threshold: 0.30
+
+      - name: "qdepth_32_iodepth_1_numjob_32_setup_raw_bs_4k_cores_32_disks_8_randread_latency"
+        max_value: 80.0
+        target_value: 25.0
+        error_threshold: 0.30
+
+      - name: "qdepth_32_iodepth_1_numjob_32_setup_raw_bs_4k_cores_32_disks_8_randwrite_latency"
+        max_value: 100.0
+        target_value: 80.0
+        error_threshold: 0.30
+
+      # Higher queue depth tests
+      - name: "qdepth_64_iodepth_2_numjob_32_setup_raw_bs_4k_cores_32_disks_8_read_iops"
+        min_value: 1200000.0
+        target_value: 1500000.0
+        error_threshold: 0.25
+
+      - name: "qdepth_64_iodepth_2_numjob_32_setup_raw_bs_4k_cores_32_disks_8_write_iops"
+        min_value: 1000000.0
+        target_value: 1300000.0
+        error_threshold: 0.25
+
+      - name: "qdepth_64_iodepth_2_numjob_32_setup_raw_bs_4k_cores_32_disks_8_randread_iops"
+        min_value: 800000.0
+        target_value: 1100000.0
+        error_threshold: 0.30
+
+      - name: "qdepth_64_iodepth_2_numjob_32_setup_raw_bs_4k_cores_32_disks_8_randwrite_iops"
+        min_value: 700000.0
+        target_value: 1000000.0
+        error_threshold: 0.30
+
+      # Highest queue depth tests
+      - name: "qdepth_128_iodepth_4_numjob_32_setup_raw_bs_4k_cores_32_disks_8_read_iops"
+        min_value: 1800000.0
+        target_value: 2200000.0
+        error_threshold: 0.25
+
+      - name: "qdepth_128_iodepth_4_numjob_32_setup_raw_bs_4k_cores_32_disks_8_write_iops"
+        min_value: 1600000.0
+        target_value: 2000000.0
+        error_threshold: 0.25
+
+      - name: "qdepth_128_iodepth_4_numjob_32_setup_raw_bs_4k_cores_32_disks_8_randread_iops"
+        min_value: 1200000.0
+        target_value: 1600000.0
+        error_threshold: 0.30
+
+      - name: "qdepth_128_iodepth_4_numjob_32_setup_raw_bs_4k_cores_32_disks_8_randwrite_iops"
+        min_value: 1000000.0
+        target_value: 1400000.0
+        error_threshold: 0.30
+
+  - name: "TCP NTTTCP SRIOV Performance - D2ads_v5 Specific"
+    description: "Performance criteria for Standard_D2ads_v5 VM - TCP NTTTCP SRIOV"
+    error_threshold: 0.20
+    statistics_type: average
+    statistics_times: 1
+
+    conditions:
+      - name: "test_case"
+        type: "metadata"
+        value: "perf_tcp_ntttcp_sriov"
+      - name: "vm_size"
+        type: "information"
+        value: "Standard_D2ads_v5"
+
+    metrics:
+      - name: "buffer_size_conn_1"
+        min_value: 0
-        min_value: 0
+        min_value: 0.0
-        min_value: 0
+        min_value: 0.0
+        target_value: 65536
+        error_threshold: 0.30
+
+      - name: "client_mtu_conn_1"
+        min_value: 1400
+        target_value: 1500
+        error_threshold: 0.05
+
+      - name: "connections_created_time_conn_1"
+        max_value: 5.0
+        target_value: 1.0
+        error_threshold: 0.50
+
+      - name: "latency_us_conn_1"
+        max_value: 500.0
+        target_value: 100.0
+        error_threshold: 0.50
+
+      - name: "pkts_interrupts_conn_1"
+        min_value: 0
+        target_value: 100000
+        error_threshold: 0.50
+
+      - name: "receiver_cycles_per_byte_conn_1"
+        max_value: 100.0
+        target_value: 20.0
+        error_threshold: 0.50
+
+      - name: "retrans_segments_conn_1"
+        max_value: 1000
+        target_value: 0
+        error_threshold: 2.0
+
+      - name: "rx_packets_conn_1"
+        min_value: 100000
+        target_value: 1000000
+        error_threshold: 0.50
+
+      - name: "sender_cycles_per_byte_conn_1"
+        max_value: 100.0
+        target_value: 20.0
+        error_threshold: 0.50
+
+      - name: "server_mtu_conn_1"
+        min_value: 1400
+        target_value: 1500
+        error_threshold: 0.05
+
+      - name: "throughput_in_gbps_conn_1"
+        min_value: 0.5
+        target_value: 2.0
+        error_threshold: 0.30
+
+      - name: "tx_packets_conn_1"
+        min_value: 100000
+        target_value: 1000000
+        error_threshold: 0.50