NVIDIA · haoxiz-nvidia · Jun 13, 2026 · coderabbitai · Jun 22, 2026 · coderabbitai
@@ -27,6 +27,7 @@
 from transformers import AutoConfig, AutoTokenizer
 
 from modelopt.onnx.quantization.int4 import quantize as quantize_int4
+from modelopt.onnx.quantization.ort_utils import register_abi_ep
 
 logging.getLogger().setLevel(logging.INFO)
 
@@ -72,7 +73,7 @@ def make_input_shapes_profile_for_ep_list(ep_list, model_name_or_path):
     # Using empty shapes_profile for non-NvTensorRtRtx EPs.
     input_shapes_profile_sequence = []
     for ep in ep_list:
-        if ep == "NvTensorRtRtx":
+        if ep in {"NvTensorRtRtx", "NvTensorRtRtx-abi"}:
             min_shapes, max_shapes, opt_shapes = get_input_shapes_profile(model_name_or_path)
             input_shapes_profile = {
                 "nv_profile_min_shapes": min_shapes,
@@ -304,14 +305,15 @@ def get_calib_inputs(
 
 def parse_calibration_eps(value):
     """Parse and validate the calibration_eps input."""
-    valid_choices = {"cuda", "cpu", "dml", "NvTensorRtRtx"}
+    valid_choices = {"cuda", "cpu", "dml", "NvTensorRtRtx", "NvTensorRtRtx-abi"}
     # Split the input by commas and remove any surrounding whitespace
     eps = [item.strip() for item in value.split(",")]
     # Validate each calibration endpoint
     for ep in eps:
         if ep not in valid_choices:
             raise argparse.ArgumentTypeError(
-                f"Invalid calibration endpoint: '{ep}'. Choose from 'cuda', 'cpu', 'dml', 'NvTensorRtRtx'."
+                f"Invalid calibration endpoint: '{ep}'. Choose from 'cuda', 'cpu', 'dml', "
+                "'NvTensorRtRtx', 'NvTensorRtRtx-abi'."
             )
     return eps
 
@@ -413,8 +415,13 @@ def main(args):
         args.trust_remote_code,
     )
 
+    if "NvTensorRtRtx-abi" in args.calibration_eps:
+        register_abi_ep(args.abi_ep_path)
+
     input_shapes_profile_data = None
-    if "NvTensorRtRtx" in args.calibration_eps and (args.algo not in ["rtn", "rtn_dq"]):
+    if any(ep in args.calibration_eps for ep in {"NvTensorRtRtx", "NvTensorRtRtx-abi"}) and (
+        args.algo not in ["rtn", "rtn_dq"]
+    ):
         # NvTensorRtRtx EP uses (min, max, opt) profile for dynamic shapes in the model's inputs.
         input_shapes_profile_data = make_input_shapes_profile_for_ep_list(
             args.calibration_eps, args.model_name
@@ -607,7 +614,16 @@ def main(args):
         "--calibration_eps",
         type=parse_calibration_eps,  # Use the custom parser
         default=["cuda", "cpu"],  # Default as a list
-        help="Comma-separated list of calibration endpoints. Choose from 'cuda', 'cpu', 'dml', 'NvTensorRtRtx'.",
+        help=(
+            "Comma-separated list of calibration endpoints. Choose from 'cuda', 'cpu', 'dml', "
+            "'NvTensorRtRtx', 'NvTensorRtRtx-abi'."
+        ),
+    )
+    parser.add_argument(
+        "--abi_ep_path",
+        type=str,
+        default=None,
+        help="Path to an external NvTensorRtRtx ABI execution-provider library.",
     )
     parser.add_argument(
         "--trust_remote_code",

@@ -62,6 +62,12 @@ def get_parser() -> argparse.ArgumentParser:
     argparser.add_argument(
         "--onnx_path", required=True, type=str, help="Input onnx model without Q/DQ nodes."
     )
+    argparser.add_argument(
+        "--abi_ep_path",
+        required=False,
+        type=str,
+        help="Path to an external NvTensorRtRtx ABI execution-provider library.",
+    )
     argparser.add_argument(
         "--quantize_mode",
         type=str,
@@ -110,7 +116,8 @@ def get_parser() -> argparse.ArgumentParser:
         nargs="+",
         help=(
             "Priority order for the execution providers (EP) to calibrate the model. "
-            "Any subset of ['trt', 'cuda:x', dml:x, 'cpu'], where 'x' is the device id."
+            "Any subset of ['trt', 'cuda:x', dml:x, 'cpu', 'NvTensorRtRtx', "
+            "'NvTensorRtRtx-abi'], where 'x' is the device id."
             "If a custom op is detected in the model, 'trt' will automatically be added to the EP list."
         ),
     )
@@ -507,6 +514,7 @@ def main():
         autotune_warmup_runs=args.autotune_warmup_runs,
         autotune_timing_runs=args.autotune_timing_runs,
         autotune_trtexec_args=args.autotune_trtexec_args,
+        abi_ep_path=args.abi_ep_path,
     )
 
 

@@ -314,6 +314,24 @@ def _check_for_nv_tensorrt_rtx_libs():
     return found
 
 
+def _check_for_nv_tensorrt_rtx_abi_libs(ep_path: str):
+    logger.info("Checking for NvTensorRtRtx ABI EP library")
+    if not ep_path:
+        raise FileNotFoundError("Need to provide abi_ep_path to use NvTensorRtRtx-abi")
+    if not os.path.isfile(ep_path):
+        raise FileNotFoundError(f"NvTensorRtRtx ABI EP library not found: {ep_path}")
+
+    ort.register_execution_provider_library("NvTensorRTRTXExecutionProvider", ep_path)
+    return True
+
+
+def register_abi_ep(abi_ep_path: str | None):
+    """Register an external NvTensorRtRtx ABI execution-provider library."""
+
+    _check_for_nv_tensorrt_rtx_abi_libs(abi_ep_path or "")
+    logger.debug("Registered NvTensorRtRtx ABI EP")
+
+
 def _prepare_ep_list(calibration_eps: list[str]):
     """Prepares the EP list for ORT from the given user input."""
     logger.debug(f"Preparing execution providers list from: {calibration_eps}")
@@ -334,6 +352,9 @@ def _prepare_ep_list(calibration_eps: list[str]):
         elif "cpu" in ep:
             providers.append("CPUExecutionProvider")
             logger.debug("Added CPU EP")
+        elif "NvTensorRtRtx-abi" in ep:
+            providers.append("NvTensorRTRTXExecutionProvider")
+            logger.debug("Added NvTensorRtRtx ABI EP")
-        elif "NvTensorRtRtx-abi" in ep:
-            providers.append("NvTensorRTRTXExecutionProvider")
-            logger.debug("Added NvTensorRtRtx ABI EP")
+        elif ep == "NvTensorRtRtx-abi":
+            providers.append("NvTensorRTRTXExecutionProvider")
+            logger.debug("Added NvTensorRtRtx ABI EP")
-        elif "NvTensorRtRtx-abi" in ep:
-            providers.append("NvTensorRTRTXExecutionProvider")
-            logger.debug("Added NvTensorRtRtx ABI EP")
+        elif ep == "NvTensorRtRtx-abi":
+            providers.append("NvTensorRTRTXExecutionProvider")
+            logger.debug("Added NvTensorRtRtx ABI EP")
         elif "NvTensorRtRtx" in ep:
             try:
                 _check_for_nv_tensorrt_rtx_libs()

@@ -63,7 +63,7 @@
 )
 from modelopt.onnx.quantization.int4 import quantize as quantize_int4
 from modelopt.onnx.quantization.int8 import quantize as quantize_int8
-from modelopt.onnx.quantization.ort_utils import update_trt_ep_support
+from modelopt.onnx.quantization.ort_utils import register_abi_ep, update_trt_ep_support
 from modelopt.onnx.quantization.qdq_utils import (
     qdq_to_dq,
     remove_graph_input_q,
@@ -358,6 +358,7 @@ def quantize(
     simplify: bool = False,
     calibrate_per_node: bool = False,
     input_shapes_profile: Sequence[dict[str, str]] | None = None,
+    abi_ep_path: str | None = None,
     direct_io_types: bool = False,
     opset: int | None = None,
     autotune: bool = False,
@@ -491,6 +492,9 @@ def quantize(
             If None of the calibration_eps require any such shapes profile for model inputs, then nothing needs to be
             set for this "input_shapes_profile" parameter.
             Default value is None.
+        abi_ep_path:
+            Path to an external NvTensorRtRtx ABI execution-provider library. Required when
+            ``NvTensorRtRtx-abi`` is present in ``calibration_eps``.
         direct_io_types:
             If True, modify the I/O types in the quantized ONNX model to be lower precision whenever possible.
             If False, keep the I/O types in the quantized ONNX model the same as in the given ONNX model.
@@ -547,6 +551,9 @@ def quantize(
             "Per node calibration is only supported for int8 and fp8 quantization modes"
         )
 
+    if "NvTensorRtRtx-abi" in calibration_eps:
+        register_abi_ep(abi_ep_path)
+
     # quantize_static creates a shape-inferred copy at the input model's directory
     # Needs to check if we have write permission to this directory
     assert onnx_path.endswith((".onnx", ".pb"))