pytorch · zingo · Dec 12, 2025 · Dec 9, 2025 · Dec 9, 2025 · Dec 12, 2025
@@ -38,7 +38,16 @@
 ALL_EDGE_OPS = SAMPLE_INPUT.keys() | CUSTOM_EDGE_OPS
 
 # Add all targets and TOSA profiles we support here.
-TARGETS = ["tosa_FP", "tosa_INT", "u55_INT", "u85_INT", "vgf_INT", "vgf_FP"]
+TARGETS = [
+    "tosa_FP",
+    "tosa_INT",
+    "u55_INT",
+    "u85_INT",
+    "vgf_INT",
+    "vgf_FP",
+    "vgf_quant",
+    "vgf_no_quant",
+]
 
 
 def get_op_name_map():

@@ -51,6 +51,9 @@ class TestCLIPTextModelWithProjection:
         "torch.ops.higher_order.executorch_call_delegate": 2,
     }
 
+    ops_after_partitioner_vgf_quantize = ops_after_partitioner_FP
+    ops_after_partitioner_vgf_no_quantize = ops_after_partitioner_FP
+
     def _prepare_inputs(
         self,
         batch_size=12,
@@ -119,7 +122,7 @@ def test_CLIPTextModelWithProjection_tosa_INT():
 
 
 @common.SkipIfNoModelConverter
-def test_CLIPTextModelWithProjection_vgf_FP():
+def test_CLIPTextModelWithProjection_vgf_no_quant():
     text_encoder_model, text_encoder_model_inputs = (
         TestCLIPTextModelWithProjection().prepare_model_and_inputs()
     )
@@ -129,23 +132,24 @@ def test_CLIPTextModelWithProjection_vgf_FP():
             text_encoder_model_inputs,
             aten_op=[],
             exir_op=[],
-            tosa_version="TOSA-1.0+FP",
             use_to_edge_transform_and_lower=True,
-            atol=4,  # TODO: Investiage numerical issue: MAX Diff ~50%
+            atol=4,
             transform_passes=[
                 ConvertInt64ConstOpsToInt32Pass(),
                 ConvertInt64OutputOpsToInt32Pass(),
                 InsertInt32CastsAfterInt64PlaceholdersPass(),
             ],
+            quantize=False,
         )
         pipeline.change_args(
-            "check_count.exir", TestCLIPTextModelWithProjection.ops_after_partitioner_FP
+            "check_count.exir",
+            TestCLIPTextModelWithProjection.ops_after_partitioner_vgf_no_quantize,
         )
         pipeline.run()
 
 
 @common.SkipIfNoModelConverter
-def test_CLIPTextModelWithProjection_vgf_INT():
+def test_CLIPTextModelWithProjection_vgf_quant():
     text_encoder_model, text_encoder_model_inputs = (
         TestCLIPTextModelWithProjection().prepare_model_and_inputs()
     )
@@ -155,12 +159,12 @@ def test_CLIPTextModelWithProjection_vgf_INT():
             text_encoder_model_inputs,
             aten_op=[],
             exir_op=[],
-            tosa_version="TOSA-1.0+INT",
             use_to_edge_transform_and_lower=True,
             atol=0.8,
+            quantize=True,
         )
         pipeline.change_args(
             "check_count.exir",
-            TestCLIPTextModelWithProjection.ops_after_partitioner_INT,
+            TestCLIPTextModelWithProjection.ops_after_partitioner_vgf_quantize,
         )
         pipeline.run()
@@ -43,6 +43,9 @@ class TestSD3Transformer2DModel:
         "executorch_exir_dialects_edge__ops_aten_permute_copy_default": 1,
     }
 
+    ops_after_partitioner_vgf_quantize = ops_after_partitioner_FP
+    ops_after_partitioner_vgf_no_quantize = ops_after_partitioner_FP
+
     def _prepare_inputs(
         self,
         batch_size=2,
@@ -141,7 +144,7 @@ def test_SD3Transformer2DModel_tosa_INT():
 
 
 @common.SkipIfNoModelConverter
-def test_SD3Transformer2DModel_vgf_FP():
+def test_SD3Transformer2DModel_vgf_no_quant():
     sd35_transformer2D_model, sd35_transformer2D_model_inputs = (
         TestSD3Transformer2DModel().prepare_model_and_inputs()
     )
@@ -151,19 +154,20 @@ def test_SD3Transformer2DModel_vgf_FP():
             sd35_transformer2D_model_inputs,
             aten_op=[],
             exir_op=[],
-            tosa_version="TOSA-1.0+FP",
             use_to_edge_transform_and_lower=True,
-            rtol=1.0,  # TODO: MLETORCH-875: Reduce tolerance of SD3Transformer2DModel with FP and INT
+            rtol=1.0,  # TODO: MLETORCH-875: Reduce tolerance of SD3Transformer2DModel with FP and INT,
             atol=4.0,
+            quantize=False,
         )
         pipeline.change_args(
-            "check_count.exir", TestSD3Transformer2DModel.ops_after_partitioner_FP
+            "check_count.exir",
+            TestSD3Transformer2DModel.ops_after_partitioner_vgf_no_quantize,
         )
         pipeline.run()
 
 
 @common.SkipIfNoModelConverter
-def test_SD3Transformer2DModel_vgf_INT():
+def test_SD3Transformer2DModel_vgf_quant():
     sd35_transformer2D_model, sd35_transformer2D_model_inputs = (
         TestSD3Transformer2DModel().prepare_model_and_inputs()
     )
@@ -173,13 +177,14 @@ def test_SD3Transformer2DModel_vgf_INT():
             sd35_transformer2D_model_inputs,
             aten_op=[],
             exir_op=[],
-            tosa_version="TOSA-1.0+INT",
             use_to_edge_transform_and_lower=True,
-            qtol=1.0,  # TODO: MLETORCH-875: Reduce tolerance of SD3Transformer2DModel with FP and INT
-            rtol=1.0,
+            qtol=1.0,
+            rtol=1.0,  # TODO: MLETORCH-875: Reduce tolerance of SD3Transformer2DModel with FP and INT,
             atol=4.0,
+            quantize=True,
         )
         pipeline.change_args(
-            "check_count.exir", TestSD3Transformer2DModel.ops_after_partitioner_INT
+            "check_count.exir",
+            TestSD3Transformer2DModel.ops_after_partitioner_vgf_quantize,
         )
         pipeline.run()
@@ -44,6 +44,13 @@ class TestT5EncoderModel:
         "torch.ops.higher_order.executorch_call_delegate": 3,
     }
 
+    ops_after_partitioner_vgf_quantize = {
+        "executorch_exir_dialects_edge__ops_dim_order_ops__to_dim_order_copy_default": 1,
+        "torch.ops.higher_order.executorch_call_delegate": 1,
+    }
+
+    ops_after_partitioner_vgf_no_quantize = ops_after_partitioner_vgf_quantize
+
     def _prepare_inputs(
         self,
         batch_size=12,
@@ -110,7 +117,7 @@ def test_T5EncoderModel_tosa_INT():
 
 
 @common.SkipIfNoModelConverter
-def test_T5EncoderModel_vgf_FP():
+def test_T5EncoderModel_vgf_no_quant():
     t5_encoder_model, t5_encoder_model_inputs = (
         TestT5EncoderModel().prepare_model_and_inputs()
     )
@@ -120,22 +127,22 @@ def test_T5EncoderModel_vgf_FP():
             t5_encoder_model_inputs,
             aten_op=[],
             exir_op=[],
-            tosa_version="TOSA-1.0+FP",
             use_to_edge_transform_and_lower=True,
             transform_passes=[
                 ConvertInt64ConstOpsToInt32Pass(),
                 ConvertInt64OutputOpsToInt32Pass(),
                 InsertInt32CastsAfterInt64PlaceholdersPass(),
             ],
+            quantize=False,
         )
         pipeline.change_args(
-            "check_count.exir", TestT5EncoderModel.ops_after_partitioner_FP
+            "check_count.exir", TestT5EncoderModel.ops_after_partitioner_vgf_no_quantize
         )
         pipeline.run()
 
 
 @common.SkipIfNoModelConverter
-def test_T5EncoderModel_vgf_INT():
+def test_T5EncoderModel_vgf_quant():
     t5_encoder_model, t5_encoder_model_inputs = (
         TestT5EncoderModel().prepare_model_and_inputs()
     )
@@ -145,10 +152,10 @@ def test_T5EncoderModel_vgf_INT():
             t5_encoder_model_inputs,
             aten_op=[],
             exir_op=[],
-            tosa_version="TOSA-1.0+INT",
             use_to_edge_transform_and_lower=True,
+            quantize=True,
         )
         pipeline.change_args(
-            "check_count.exir", TestT5EncoderModel.ops_after_partitioner_INT
+            "check_count.exir", TestT5EncoderModel.ops_after_partitioner_vgf_quantize
         )
         pipeline.run()
@@ -84,7 +84,7 @@ def test_AutoencoderKL_tosa_INT():
 
 
 @common.SkipIfNoModelConverter
-def test_AutoencoderKL_vgf_FP():
+def test_AutoencoderKL_vgf_no_quant():
     auto_encoder_model, auto_encoder_model_inputs = (
         TestAutoencoderKL().prepare_model_and_inputs()
     )
@@ -94,14 +94,14 @@ def test_AutoencoderKL_vgf_FP():
             auto_encoder_model_inputs,
             aten_op=[],
             exir_op=[],
-            tosa_version="TOSA-1.0+FP",
             use_to_edge_transform_and_lower=True,
+            quantize=False,
         )
         pipeline.run()
 
 
 @common.SkipIfNoModelConverter
-def test_AutoencoderKL_vgf_INT():
+def test_AutoencoderKL_vgf_quant():
     auto_encoder_model, auto_encoder_model_inputs = (
         TestAutoencoderKL().prepare_model_and_inputs()
     )
@@ -111,8 +111,8 @@ def test_AutoencoderKL_vgf_INT():
             auto_encoder_model_inputs,
             aten_op=[],
             exir_op=[],
-            tosa_version="TOSA-1.0+INT",
             use_to_edge_transform_and_lower=True,
             atol=1.0,  # TODO: MLETORCH-990 Reduce tolerance of vae(AutoencoderKL) with INT
+            quantize=True,
         )
         pipeline.run()
@@ -130,14 +130,14 @@ def test_conformer_u85_INT():
 
 
 @common.SkipIfNoModelConverter
-def test_conformer_vgf_INT():
+def test_conformer_vgf_quant():
     pipeline = VgfPipeline[input_t](
         TestConformer.conformer,
         TestConformer.model_example_inputs,
         aten_op=[],
         exir_op=[],
-        tosa_version="TOSA-1.0+INT",
         use_to_edge_transform_and_lower=True,
+        quantize=True,
     )
     pipeline.pop_stage("check_count.exir")
     pipeline.change_args(
@@ -152,13 +152,13 @@ def test_conformer_vgf_INT():
 
 
 @common.SkipIfNoModelConverter
-def test_conformer_vgf_FP():
+def test_conformer_vgf_no_quant():
     pipeline = VgfPipeline[input_t](
         TestConformer.conformer,
         TestConformer.model_example_inputs,
         aten_op=TestConformer.aten_ops,
         exir_op=[],
-        tosa_version="TOSA-1.0+FP",
         use_to_edge_transform_and_lower=True,
+        quantize=False,
     )
     pipeline.run()
@@ -93,28 +93,28 @@ def test_deit_tiny_u85_INT():
 
 
 @common.SkipIfNoModelConverter
-def test_deit_tiny_vgf_INT():
+def test_deit_tiny_vgf_quant():
     pipeline = VgfPipeline[input_t](
         deit_tiny,
         model_inputs,
         aten_op=[],
         exir_op=[],
-        tosa_version="TOSA-1.0+INT",
         use_to_edge_transform_and_lower=True,
         atol=1.5,
         qtol=1,
+        quantize=True,
     )
     pipeline.run()
 
 
 @common.SkipIfNoModelConverter
-def test_deit_tiny_vgf_FP():
+def test_deit_tiny_vgf_no_quant():
     pipeline = VgfPipeline[input_t](
         deit_tiny,
         model_inputs,
         aten_op=[],
         exir_op=[],
-        tosa_version="TOSA-1.0+FP",
         use_to_edge_transform_and_lower=True,
+        quantize=False,
     )
     pipeline.run()
@@ -89,15 +89,15 @@ def test_dl3_u85_INT():
 
 
 @common.SkipIfNoModelConverter
-def test_dl3_vgf_INT():
+def test_dl3_vgf_quant():
     pipeline = VgfPipeline[input_t](
         TestDl3.dl3,
         TestDl3.model_example_inputs,
         aten_op=[],
         exir_op=[],
-        tosa_version="TOSA-1.0+INT",
         use_to_edge_transform_and_lower=True,
-        run_on_vulkan_runtime=True,  # TODO: run on vulkan runtime
+        run_on_vulkan_runtime=True,
+        quantize=True,
     )
     pipeline.change_args(
         "run_method_and_compare_outputs", rtol=0.1, atol=0.1
@@ -106,13 +106,13 @@ def test_dl3_vgf_INT():
 
 
 @common.SkipIfNoModelConverter
-def test_dl3_vgf_FP():
+def test_dl3_vgf_no_quant():
     pipeline = VgfPipeline[input_t](
         TestDl3.dl3,
         TestDl3.model_example_inputs,
         aten_op=[],
         exir_op=[],
-        tosa_version="TOSA-1.0+FP",
         use_to_edge_transform_and_lower=True,
+        quantize=False,
     )
     pipeline.run()
@@ -93,28 +93,28 @@ def test_ic3_u85_BI():
 @pytest.mark.slow
 @pytest.mark.skip(reason="Takes too long to run on CI")
 @common.SkipIfNoModelConverter
-def test_ic3_vgf_FP():
+def test_ic3_vgf_no_quant():
     pipeline = VgfPipeline[input_t](
         ic3,
         model_inputs,
         aten_op=[],
         exir_op=[],
-        tosa_version="TOSA-1.0+FP",
         use_to_edge_transform_and_lower=True,
+        quantize=False,
     )
     pipeline.run()
 
 
 @pytest.mark.slow
 @pytest.mark.skip(reason="Takes too long to run on CI")
 @common.SkipIfNoModelConverter
-def test_ic3_vgf_INT():
+def test_ic3_vgf_quant():
     pipeline = VgfPipeline[input_t](
         ic3,
         model_inputs,
         aten_op=[],
         exir_op=[],
-        tosa_version="TOSA-1.0+INT",
         use_to_edge_transform_and_lower=True,
+        quantize=True,
     )
     pipeline.run()