Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 21 additions & 5 deletions examples/windows/onnx_ptq/genai_llm/quantize.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from transformers import AutoConfig, AutoTokenizer

from modelopt.onnx.quantization.int4 import quantize as quantize_int4
from modelopt.onnx.quantization.ort_utils import register_abi_ep

logging.getLogger().setLevel(logging.INFO)

Expand Down Expand Up @@ -72,7 +73,7 @@ def make_input_shapes_profile_for_ep_list(ep_list, model_name_or_path):
# Using empty shapes_profile for non-NvTensorRtRtx EPs.
input_shapes_profile_sequence = []
for ep in ep_list:
if ep == "NvTensorRtRtx":
if ep in {"NvTensorRtRtx", "NvTensorRtRtx-abi"}:
min_shapes, max_shapes, opt_shapes = get_input_shapes_profile(model_name_or_path)
input_shapes_profile = {
"nv_profile_min_shapes": min_shapes,
Expand Down Expand Up @@ -304,14 +305,15 @@ def get_calib_inputs(

def parse_calibration_eps(value):
"""Parse and validate the calibration_eps input."""
valid_choices = {"cuda", "cpu", "dml", "NvTensorRtRtx"}
valid_choices = {"cuda", "cpu", "dml", "NvTensorRtRtx", "NvTensorRtRtx-abi"}
# Split the input by commas and remove any surrounding whitespace
eps = [item.strip() for item in value.split(",")]
# Validate each calibration endpoint
for ep in eps:
if ep not in valid_choices:
raise argparse.ArgumentTypeError(
f"Invalid calibration endpoint: '{ep}'. Choose from 'cuda', 'cpu', 'dml', 'NvTensorRtRtx'."
f"Invalid calibration endpoint: '{ep}'. Choose from 'cuda', 'cpu', 'dml', "
"'NvTensorRtRtx', 'NvTensorRtRtx-abi'."
)
return eps

Expand Down Expand Up @@ -413,8 +415,13 @@ def main(args):
args.trust_remote_code,
)

if "NvTensorRtRtx-abi" in args.calibration_eps:
register_abi_ep(args.abi_ep_path)

input_shapes_profile_data = None
if "NvTensorRtRtx" in args.calibration_eps and (args.algo not in ["rtn", "rtn_dq"]):
if any(ep in args.calibration_eps for ep in {"NvTensorRtRtx", "NvTensorRtRtx-abi"}) and (
args.algo not in ["rtn", "rtn_dq"]
):
# NvTensorRtRtx EP uses (min, max, opt) profile for dynamic shapes in the model's inputs.
input_shapes_profile_data = make_input_shapes_profile_for_ep_list(
args.calibration_eps, args.model_name
Expand Down Expand Up @@ -607,7 +614,16 @@ def main(args):
"--calibration_eps",
type=parse_calibration_eps, # Use the custom parser
default=["cuda", "cpu"], # Default as a list
help="Comma-separated list of calibration endpoints. Choose from 'cuda', 'cpu', 'dml', 'NvTensorRtRtx'.",
help=(
"Comma-separated list of calibration endpoints. Choose from 'cuda', 'cpu', 'dml', "
"'NvTensorRtRtx', 'NvTensorRtRtx-abi'."
),
)
parser.add_argument(
"--abi_ep_path",
type=str,
default=None,
help="Path to an external NvTensorRtRtx ABI execution-provider library.",
)
parser.add_argument(
"--trust_remote_code",
Expand Down
10 changes: 9 additions & 1 deletion modelopt/onnx/quantization/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,12 @@ def get_parser() -> argparse.ArgumentParser:
argparser.add_argument(
"--onnx_path", required=True, type=str, help="Input onnx model without Q/DQ nodes."
)
argparser.add_argument(
"--abi_ep_path",
required=False,
type=str,
help="Path to an external NvTensorRtRtx ABI execution-provider library.",
)
argparser.add_argument(
"--quantize_mode",
type=str,
Expand Down Expand Up @@ -110,7 +116,8 @@ def get_parser() -> argparse.ArgumentParser:
nargs="+",
help=(
"Priority order for the execution providers (EP) to calibrate the model. "
"Any subset of ['trt', 'cuda:x', dml:x, 'cpu'], where 'x' is the device id."
"Any subset of ['trt', 'cuda:x', dml:x, 'cpu', 'NvTensorRtRtx', "
"'NvTensorRtRtx-abi'], where 'x' is the device id."
Comment on lines +119 to +120

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor | ⚡ Quick win

Fix malformed --calibration_eps help text token.

Line 119 shows dml:x without quotes while all other choices are quoted. This looks like a typo in the user-facing guidance.

Suggested fix
-            "Any subset of ['trt', 'cuda:x', dml:x, 'cpu', 'NvTensorRtRtx', "
+            "Any subset of ['trt', 'cuda:x', 'dml:x', 'cpu', 'NvTensorRtRtx', "
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@modelopt/onnx/quantization/__main__.py` around lines 119 - 120, The help text
string for the calibration_eps parameter contains a malformed token where
`dml:x` is missing quotes while all other device options like `'trt'`,
`'cuda:x'`, `'cpu'`, `'NvTensorRtRtx'`, and `'NvTensorRtRtx-abi'` are quoted.
Add quotes around `dml:x` to make it `'dml:x'` to maintain consistent formatting
in the user-facing help message.

"If a custom op is detected in the model, 'trt' will automatically be added to the EP list."
),
)
Expand Down Expand Up @@ -507,6 +514,7 @@ def main():
autotune_warmup_runs=args.autotune_warmup_runs,
autotune_timing_runs=args.autotune_timing_runs,
autotune_trtexec_args=args.autotune_trtexec_args,
abi_ep_path=args.abi_ep_path,
)


Expand Down
21 changes: 21 additions & 0 deletions modelopt/onnx/quantization/ort_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,24 @@ def _check_for_nv_tensorrt_rtx_libs():
return found


def _check_for_nv_tensorrt_rtx_abi_libs(ep_path: str):
logger.info("Checking for NvTensorRtRtx ABI EP library")
if not ep_path:
raise FileNotFoundError("Need to provide abi_ep_path to use NvTensorRtRtx-abi")
if not os.path.isfile(ep_path):
raise FileNotFoundError(f"NvTensorRtRtx ABI EP library not found: {ep_path}")

ort.register_execution_provider_library("NvTensorRTRTXExecutionProvider", ep_path)
return True
Comment on lines +317 to +325

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

# First, let's examine the file and the exact code at the specified lines
cd modelopt && cat -n onnx/quantization/ort_utils.py | sed -n '310,330p'

Repository: NVIDIA/Model-Optimizer

Length of output: 1205


🏁 Script executed:

# Check how ep_path is passed and where _check_for_nv_tensorrt_rtx_abi_libs is called
rg "_check_for_nv_tensorrt_rtx_abi_libs" -B 3 -A 3 modelopt/

Repository: NVIDIA/Model-Optimizer

Length of output: 1109


🏁 Script executed:

# Check the function signatures that call this function to understand the trust boundary
rg "abi_ep_path" -B 5 -A 5 modelopt/ | head -100

Repository: NVIDIA/Model-Optimizer

Length of output: 6303


🏁 Script executed:

# Check if there's any documentation or config validation for abi_ep_path
rg "register_execution_provider_library" -B 5 -A 5 modelopt/

Repository: NVIDIA/Model-Optimizer

Length of output: 965


Add explicit trust boundary for ABI EP library loading.

Line 324 loads a user-supplied shared library path into the process via register_execution_provider_library. As documented in SECURITY.md, user-provided artifact paths (including the external abi_ep_path DLL) must be treated as untrusted. Security-sensitive dynamic component loading requires an explicit trust contract—either a trusted-source gate with signature verification or an inline comment documenting why the path is safe (e.g., confirming it is internally-generated and not user-supplied)—plus security codeowners review.

Currently the code only validates file existence, with no trust boundary or safety justification. This creates an RCE vector if a malicious DLL is placed at the user-supplied path.

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@modelopt/onnx/quantization/ort_utils.py` around lines 317 - 325, The
_check_for_nv_tensorrt_rtx_abi_libs function accepts a user-supplied ep_path
parameter and loads it as a shared library via
ort.register_execution_provider_library without establishing a trust boundary.
Add either explicit trust validation (such as signature verification) or an
inline comment documenting why the library path is safe (for example, confirming
it is internally-generated and not directly user-supplied), then request
security team review as indicated in SECURITY.md for dynamic component loading
from user-provided artifact paths.

Source: Coding guidelines



def register_abi_ep(abi_ep_path: str | None):
"""Register an external NvTensorRtRtx ABI execution-provider library."""

_check_for_nv_tensorrt_rtx_abi_libs(abi_ep_path or "")
logger.debug("Registered NvTensorRtRtx ABI EP")


def _prepare_ep_list(calibration_eps: list[str]):
"""Prepares the EP list for ORT from the given user input."""
logger.debug(f"Preparing execution providers list from: {calibration_eps}")
Expand All @@ -334,6 +352,9 @@ def _prepare_ep_list(calibration_eps: list[str]):
elif "cpu" in ep:
providers.append("CPUExecutionProvider")
logger.debug("Added CPU EP")
elif "NvTensorRtRtx-abi" in ep:
providers.append("NvTensorRTRTXExecutionProvider")
logger.debug("Added NvTensorRtRtx ABI EP")
Comment on lines +355 to +357

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

Use exact matching for NvTensorRtRtx-abi EP selection.

Using substring matching here accepts malformed values (for example, NvTensorRtRtx-abi:0), but ABI registration is triggered via exact membership in quantize(). That creates a mismatch where provider selection can proceed without registration and fail later at runtime.

Suggested fix
-        elif "NvTensorRtRtx-abi" in ep:
+        elif ep == "NvTensorRtRtx-abi":
             providers.append("NvTensorRTRTXExecutionProvider")
             logger.debug("Added NvTensorRtRtx ABI EP")
-        elif "NvTensorRtRtx" in ep:
+        elif ep == "NvTensorRtRtx":
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
elif "NvTensorRtRtx-abi" in ep:
providers.append("NvTensorRTRTXExecutionProvider")
logger.debug("Added NvTensorRtRtx ABI EP")
elif ep == "NvTensorRtRtx-abi":
providers.append("NvTensorRTRTXExecutionProvider")
logger.debug("Added NvTensorRtRtx ABI EP")
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@modelopt/onnx/quantization/ort_utils.py` around lines 355 - 357, Replace the
substring matching condition in the elif statement that checks for
NvTensorRtRtx-abi with an exact equality comparison. Change the condition from
using the 'in' operator to using the equality operator (==) to ensure that only
the exact provider string "NvTensorRtRtx-abi" matches, not variations or
malformed values like "NvTensorRtRtx-abi:0". This ensures consistency with how
the ABI registration is triggered in the quantize() function through exact
membership checking.

elif "NvTensorRtRtx" in ep:
try:
_check_for_nv_tensorrt_rtx_libs()
Expand Down
9 changes: 8 additions & 1 deletion modelopt/onnx/quantization/quantize.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@
)
from modelopt.onnx.quantization.int4 import quantize as quantize_int4
from modelopt.onnx.quantization.int8 import quantize as quantize_int8
from modelopt.onnx.quantization.ort_utils import update_trt_ep_support
from modelopt.onnx.quantization.ort_utils import register_abi_ep, update_trt_ep_support
from modelopt.onnx.quantization.qdq_utils import (
qdq_to_dq,
remove_graph_input_q,
Expand Down Expand Up @@ -358,6 +358,7 @@ def quantize(
simplify: bool = False,
calibrate_per_node: bool = False,
input_shapes_profile: Sequence[dict[str, str]] | None = None,
abi_ep_path: str | None = None,
direct_io_types: bool = False,
opset: int | None = None,
autotune: bool = False,
Expand Down Expand Up @@ -491,6 +492,9 @@ def quantize(
If None of the calibration_eps require any such shapes profile for model inputs, then nothing needs to be
set for this "input_shapes_profile" parameter.
Default value is None.
abi_ep_path:
Path to an external NvTensorRtRtx ABI execution-provider library. Required when
``NvTensorRtRtx-abi`` is present in ``calibration_eps``.
Comment on lines +495 to +497

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor | ⚡ Quick win

Keep calibration_eps documentation aligned with the new ABI EP option.

The new abi_ep_path docs are added, but the allowed calibration_eps list in this same docstring still omits NvTensorRtRtx-abi. That makes the public API docs internally inconsistent.

As per coding guidelines, public APIs should be clearly documented and kept accurate.

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@modelopt/onnx/quantization/quantize.py` around lines 495 - 497, The
abi_ep_path parameter documentation references NvTensorRtRtx-abi as a valid
value in calibration_eps, but the calibration_eps parameter documentation in the
same docstring does not list NvTensorRtRtx-abi in its allowed values. Update the
calibration_eps documentation to include NvTensorRtRtx-abi in the list of
supported execution provider values to maintain consistency with the abi_ep_path
documentation.

Source: Coding guidelines

direct_io_types:
If True, modify the I/O types in the quantized ONNX model to be lower precision whenever possible.
If False, keep the I/O types in the quantized ONNX model the same as in the given ONNX model.
Expand Down Expand Up @@ -547,6 +551,9 @@ def quantize(
"Per node calibration is only supported for int8 and fp8 quantization modes"
)

if "NvTensorRtRtx-abi" in calibration_eps:
register_abi_ep(abi_ep_path)

# quantize_static creates a shape-inferred copy at the input model's directory
# Needs to check if we have write permission to this directory
assert onnx_path.endswith((".onnx", ".pb"))
Expand Down
Loading