Add 2d quant for mxfp8 #2634

greptile-apps · 2026-01-29T20:55:30Z

scale_from_shmem is potentially uninitialized for threads where thread_lane >= THREADS_X. While __shfl_sync only reads from lanes specified by tid_X_rowwise (which should be < THREADS_X), it's safer to initialize this variable.

Suggested change

e8m0_t scale_from_shmem;

if (thread_lane < THREADS_X) {

scale_from_shmem = block_scales_2d[thread_lane];

}

// Broadcast: each thread gets scale from lane matching its tid_X_rowwise

biased_exponent = __shfl_sync(0xffffffff, scale_from_shmem, tid_X_rowwise);

e8m0_t scale_from_shmem = 0;

if (thread_lane < THREADS_X) {

scale_from_shmem = block_scales_2d[thread_lane];

}

-Original file line number
+Diff line change
@@ Expand Up @@
           Tensor *dummy_workspace_tensor = nullptr;
           mxfp8::quantize</*IS_DBIAS=*/false, /*IS_DACT=*/false, IS_ACT, ParamOP, OP>(
               *input_tensor, dummy_input_tensor, noop_tensor, output_tensor, dummy_dbias_tensor,
-              dummy_workspace_tensor, stream);
+              dummy_workspace_tensor, quant_config_cpp.mxfp8_2d_quantization, stream);
           break;
         }
         case NVTE_NVFP4_1D_SCALING: {
@@ Expand Down Expand Up @@
         case NVTE_MXFP8_1D_SCALING: {
           mxfp8::quantize<IS_DBIAS, IS_DACT, /*IS_ACT=*/false, ParamOP, OP>(
               *grad_tensor, input_tensor, noop_tensor, output_tensor, dbias_tensor, workspace_tensor,
-              stream);
+              quant_config_cpp.mxfp8_2d_quantization, stream);
           break;
         }
         case NVTE_NVFP4_1D_SCALING: {
@@ Expand Down @@

-Original file line number
+Diff line change
@@ Expand Up / @@ -411,6 +411,7 @@ struct QuantizationConfig { @@
       bool nvfp4_2d_quantization = false;
       bool stochastic_rounding = false;
       bool use_fast_math = false;
+      bool mxfp8_2d_quantization = false;
       static constexpr size_t attr_sizes[] = {
           sizeof(uint8_t),                       // force_pow_2_scales
@@ Expand All / @@ -420,7 +421,8 @@ struct QuantizationConfig { @@
           sizeof(NVTETensor),                    // rng_seed and offset
           sizeof(uint8_t),                       // nvfp4_2d_quantization
           sizeof(uint8_t),                       // stochastic_rounding
-          sizeof(uint8_t)                        // use_fast_math
+          sizeof(uint8_t),                       // use_fast_math
+          sizeof(uint8_t)                        // mxfp8_2d_quantization
       };
     };
@@ Expand Down @@

-Original file line number
+Diff line change
@@ Expand Up / @@ -370,6 +370,8 @@ enum NVTEQuantizationConfigAttribute { @@
        *  inconsistently between kernels.
        */
       kNVTEQuantizationConfigUseFastMath = 7,
+      /*! Whether to use 2D block scaling for MXFP8 */
+      kNVTEQuantizationConfigMXFP82DQuantization = 8,
       kNVTEQuantizationConfigNumAttributes
     };
@@ Expand Down Expand Up / @@ -1046,6 +1048,13 @@ class QuantizationConfigWrapper { @@
                                                sizeof(val));
       }
+      /*! \brief Set whether to use 2D block scaling for MXFP8 */
+      void set_mxfp8_2d_quantization(bool mxfp8_2d_quantization) {
+        const auto val = static_cast<uint8_t>(mxfp8_2d_quantization);
+        nvte_set_quantization_config_attribute(config_, kNVTEQuantizationConfigMXFP82DQuantization,
+                                               &val, sizeof(val));
+      }
      private:
       /*! \brief Wrapped NVTEQuantizationConfig. */
       NVTEQuantizationConfig config_ = nullptr;
@@ Expand Down @@

-Original file line number
+Diff line change
@@ Expand Up / @@ -65,21 +65,25 @@ class QParams: @@
         amax_epsilon: optional minimum value of abs max
         random_hadamard_transform: whether to use random hadamard transform
         stochastic_rounding: whether to use stocastic rounding
+        fp4_2d_quantization: whether to use 2D block scaling for NVFP4
+        mxfp8_2d_quantization: whether to use 2D block scaling for MXFP8
         """
         power_2_scale: bool = False
         amax_epsilon: float = 0.0
         random_hadamard_transform: bool = False
         stochastic_rounding: bool = False
         fp4_2d_quantization: bool = False
+        mxfp8_2d_quantization: bool = False
         def __repr__(self) -> str:
             return (
                 f"Qparams(\npower_2_scale={self.power_2_scale},\n"
                 f"amax_epsilon={self.amax_epsilon},\n"
                 f"random_hadamard_transform={self.random_hadamard_transform},\n"
                 f"stochastic_rounding={self.stochastic_rounding},\n"
-                f"fp4_2d_quantization={self.fp4_2d_quantization}\n)"
+                f"fp4_2d_quantization={self.fp4_2d_quantization},\n"
+                f"mxfp8_2d_quantization={self.mxfp8_2d_quantization}\n)"
             )
@@ Expand Down Expand Up / @@ -284,8 +288,13 @@ class MXFP8BlockScaling(Recipe): @@
         fp8_format : {Format.E4M3, Format.HYBRID}, default = Format.E4M3
                     Controls the FP8 data format used during forward and backward
                     pass.
+        enable_2d_quantization : bool, default = False
+                    If set to `True`, 2D block scaling is used for weight tensors.
         """
+        # Configuration envvars
+        enable_2d_quantization: bool = os.getenv("NVTE_MXFP8_ENABLE_2D_QUANTIZATION", "0") == "1"
         margin: int = 0
         fp8_format: Format = Format.E4M3
         fp8_dpa: bool = False
@@ Expand All / @@ -294,11 +303,17 @@ class MXFP8BlockScaling(Recipe): @@
         def __post_init__(self) -> None:
             assert self.fp8_format != Format.E5M2, "Pure E5M2 training is not supported."
+            # Quantization params (same pattern as NVFP4BlockScaling)
+            self.fp8_quant_fwd_inp = QParams(mxfp8_2d_quantization=False)
+            self.fp8_quant_fwd_weight = QParams(mxfp8_2d_quantization=self.enable_2d_quantization)
+            self.fp8_quant_bwd_grad = QParams(mxfp8_2d_quantization=False)
         def __repr__(self) -> str:
             return (
                 f"recipe_type={self.__class__.__name__}, "
                 f"margin={self.margin}, "
-                f"format={str(self.fp8_format).split('.')[1]}"
+                f"format={str(self.fp8_format).split('.')[1]}, "
+                f"enable_2d_quantization={self.enable_2d_quantization}"
             )
@@ Expand Down @@

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Add 2d quant for mxfp8 #2634

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Uh oh!

greptile-apps bot Jan 29, 2026

Uh oh!

Uh oh!

Add 2d quant for mxfp8 #2634

Are you sure you want to change the base?

Uh oh!

Add 2d quant for mxfp8 #2634

Uh oh!

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Uh oh!

greptile-apps bot Jan 29, 2026

Choose a reason for hiding this comment

Uh oh!

Uh oh!