From 3d463b3d7948520cca82afe67df7d67c4163b897 Mon Sep 17 00:00:00 2001 From: laurilaatu Date: Mon, 26 Jan 2026 20:37:28 +0000 Subject: [PATCH 01/12] weights for dense --- hls4ml/backends/oneapi/passes/core_templates.py | 15 +++++++++++---- hls4ml/templates/oneapi/firmware/myproject.cpp | 5 ++++- hls4ml/templates/oneapi/firmware/myproject.h | 3 +++ .../oneapi/firmware/nnet_utils/nnet_dense.h | 7 +++---- hls4ml/writer/oneapi_writer.py | 8 ++++++++ 5 files changed, 29 insertions(+), 9 deletions(-) diff --git a/hls4ml/backends/oneapi/passes/core_templates.py b/hls4ml/backends/oneapi/passes/core_templates.py index 9602b2d0fc..64a4c7097a 100644 --- a/hls4ml/backends/oneapi/passes/core_templates.py +++ b/hls4ml/backends/oneapi/passes/core_templates.py @@ -6,6 +6,7 @@ # Dense templates dense_config_template = """struct config{index} : nnet::dense_config {{ + static constexpr unsigned n_in = {n_in}; static constexpr unsigned n_out = {n_out}; static constexpr unsigned io_type = nnet::{iotype}; @@ -30,13 +31,16 @@ typedef {weight_t.name} weight_t; typedef {index_t.name} index_t; + static constexpr weight_t weights = {weights}; + static constexpr bias_t biases = {biases}; + template using product = nnet::product::{product_type}; }};\n""" -dense_function_template = 'nnet::dense_{strategy}<{input_t}, {output_t}, {config}>({input}, {output}, {w}, {b});' +dense_function_template = 'nnet::dense_{strategy}<{input_t}, {output_t}, {config}>({input}, {output});' dense_task_sequence_template = 'task_sequence> {name};' -dense_stream_function_template = '{name}.async({w}, {b});' +dense_stream_function_template = '{name}.async();' dense_include_list = ['nnet_utils/nnet_dense.h', 'nnet_utils/nnet_dense_stream.h'] @@ -53,6 +57,9 @@ def format(self, node): node.get_input_variable().type.precision, node.get_weights('weight').type.precision ) + params['weights'] = node.get_weights('weight').name + params['biases'] = node.get_weights('bias').name + return self.template.format(**params) @@ -63,8 +70,8 @@ def __init__(self): def format(self, node): params = self._default_function_params(node) - params['w'] = node.get_weights('weight').name - params['b'] = node.get_weights('bias').name + #params['w'] = node.get_weights('weight').name + #params['b'] = node.get_weights('bias').name return self.template.format(**params) diff --git a/hls4ml/templates/oneapi/firmware/myproject.cpp b/hls4ml/templates/oneapi/firmware/myproject.cpp index 06e7d3fe37..da9439f74a 100644 --- a/hls4ml/templates/oneapi/firmware/myproject.cpp +++ b/hls4ml/templates/oneapi/firmware/myproject.cpp @@ -1,9 +1,12 @@ #include "myproject.h" -#include "parameters.h" #include // hls-fpga-machine-learning insert weights + +#include "parameters.h" + + // The inter-task pipes need to be declared in the global scope // hls-fpga-machine-learning insert inter-task pipes diff --git a/hls4ml/templates/oneapi/firmware/myproject.h b/hls4ml/templates/oneapi/firmware/myproject.h index 082ae5dc8c..8f313ea30f 100644 --- a/hls4ml/templates/oneapi/firmware/myproject.h +++ b/hls4ml/templates/oneapi/firmware/myproject.h @@ -3,6 +3,9 @@ #include "defines.h" +// hls-fpga-machine-learning insert weights + + // This file defines the interface to the kernel // currently this is fixed diff --git a/hls4ml/templates/oneapi/firmware/nnet_utils/nnet_dense.h b/hls4ml/templates/oneapi/firmware/nnet_utils/nnet_dense.h index dc76189083..2b65eef42b 100644 --- a/hls4ml/templates/oneapi/firmware/nnet_utils/nnet_dense.h +++ b/hls4ml/templates/oneapi/firmware/nnet_utils/nnet_dense.h @@ -152,12 +152,11 @@ void dense_rf_lt(const data_T &data, res_T &res, const typename CONFIG_T::weight } } template -void dense_resource(const data_T &data, res_T &res, const typename CONFIG_T::weight_t &weights, - const typename CONFIG_T::bias_t &biases) { +void dense_resource(const data_T &data, res_T &res) { if (CONFIG_T::reuse_factor <= CONFIG_T::n_in) { - dense_rf_lt(data, res, weights, biases); + dense_rf_lt(data, res, CONFIG_T::weights, CONFIG_T::biases); } else { - dense_rf_gt(data, res, weights, biases); + dense_rf_gt(data, res, CONFIG_T::weights, CONFIG_T::biases); } } } // namespace nnet diff --git a/hls4ml/writer/oneapi_writer.py b/hls4ml/writer/oneapi_writer.py index 3c0a778c50..b42ff2990f 100644 --- a/hls4ml/writer/oneapi_writer.py +++ b/hls4ml/writer/oneapi_writer.py @@ -242,6 +242,14 @@ def write_project_header(self, model): for out in model_outputs: newline += out.declare_cpp() + # Insert weights + elif '// hls-fpga-machine-learning insert weights' in line: + newline = line + for layer in model.get_layers(): + for w in layer.get_weights(): + #if w not in model_brams: + newline += f'#include "weights/{w.name}.h"\n' + # Simply copy line, if no inserts are required else: newline = line From d67857369385d066b7cdaad49077069b3bf9473c Mon Sep 17 00:00:00 2001 From: Chang Sun Date: Tue, 27 Jan 2026 18:58:42 +0000 Subject: [PATCH 02/12] hgq2 homogeneous quant fix --- hls4ml/converters/keras_v3/hgq2/_base.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/hls4ml/converters/keras_v3/hgq2/_base.py b/hls4ml/converters/keras_v3/hgq2/_base.py index 4a6d0a22c2..f7b4c9ddd3 100644 --- a/hls4ml/converters/keras_v3/hgq2/_base.py +++ b/hls4ml/converters/keras_v3/hgq2/_base.py @@ -30,15 +30,19 @@ def extract_fixed_quantizer_config(q, tensor: 'KerasTensor', is_input: bool) -> k, B, I = ops.convert_to_numpy(k), ops.convert_to_numpy(B), ops.convert_to_numpy(I) # noqa: E741 I = np.where(B > 0, I, 0) # noqa: E741 # type: ignore - k = np.broadcast_to(k.astype(np.int16), (1,) + shape) # type: ignore - B = np.broadcast_to(B.astype(np.int16), (1,) + shape) # type: ignore - I = np.broadcast_to(I.astype(np.int16), (1,) + shape) # noqa: E741 + if np.size(k) != 1: + k = np.broadcast_to(k.astype(np.int16), (1,) + shape) # type: ignore + B = np.broadcast_to(B.astype(np.int16), (1,) + shape) # type: ignore + I = np.broadcast_to(I.astype(np.int16), (1,) + shape) # noqa: E741 + else: + k = np.ravel(k).astype(np.int16) + B = np.ravel(B).astype(np.int16) + I = np.ravel(I).astype(np.int16) # noqa: E741 overflow_mode: str = internal_q.overflow_mode round_mode: str = internal_q.round_mode if round_mode.startswith('S_'): round_mode = round_mode[2:] - fusible = np.unique(k).size == 1 and np.unique(B).size == 1 and np.unique(I).size == 1 input_keras_tensor_names = tensor.name if is_input else f'{tensor.name}_q' output_keras_tensor_names = f'{tensor.name}_q' if is_input else tensor.name @@ -48,7 +52,7 @@ def extract_fixed_quantizer_config(q, tensor: 'KerasTensor', is_input: bool) -> 'mask_kbi': (k, B, I), 'SAT': overflow_mode, 'RND': round_mode, - 'fusible': fusible, + 'fusible': None, 'input_keras_tensor_names': [input_keras_tensor_names], 'output_keras_tensor_names': [output_keras_tensor_names], 'overrides': {}, From 59bd96f0c5e9c8e95538a9e96e0233c2d70695ba Mon Sep 17 00:00:00 2001 From: laurilaatu Date: Mon, 9 Feb 2026 16:31:00 +0000 Subject: [PATCH 03/12] Changes required for oneAPI MHA --- hls4ml/backends/oneapi/oneapi_backend.py | 8 - .../backends/oneapi/passes/core_templates.py | 88 ++++++++++- .../keras_v3/hgq2/multi_head_attention.py | 4 +- .../firmware/nnet_utils/nnet_activation.h | 82 +++++++--- .../oneapi/firmware/nnet_utils/nnet_dense.h | 7 +- hls4ml/writer/oneapi_writer.py | 149 ++++++++++-------- 6 files changed, 233 insertions(+), 105 deletions(-) diff --git a/hls4ml/backends/oneapi/oneapi_backend.py b/hls4ml/backends/oneapi/oneapi_backend.py index 0c11c16d09..94f26c9f1c 100644 --- a/hls4ml/backends/oneapi/oneapi_backend.py +++ b/hls4ml/backends/oneapi/oneapi_backend.py @@ -19,7 +19,6 @@ Embedding, Layer, SimpleRNN, - Softmax, ) from hls4ml.model.optimizer import get_backend_passes, layer_optimizer from hls4ml.model.types import FixedPrecisionType, IntegerPrecisionType, NamedType @@ -257,13 +256,6 @@ def init_activation(self, layer): if layer.get_attr('recurrent_activation') == 'tanh': layer.set_attr('recurrent_activation', 'dense_tanh') - @layer_optimizer(Softmax) - def init_softmax(self, layer): - if layer.model.config.get_config_value('IOType') == 'io_parallel': - assert len(layer.get_input_variable().shape) == 1, ( - 'Softmax with io_parallel strategy cannot be used on multidimensional tensors.' - ) - @layer_optimizer(Embedding) def init_embed(self, layer): if layer.attributes['n_in'] is None: diff --git a/hls4ml/backends/oneapi/passes/core_templates.py b/hls4ml/backends/oneapi/passes/core_templates.py index 64a4c7097a..5a2d765e8f 100644 --- a/hls4ml/backends/oneapi/passes/core_templates.py +++ b/hls4ml/backends/oneapi/passes/core_templates.py @@ -38,7 +38,7 @@ using product = nnet::product::{product_type}; }};\n""" -dense_function_template = 'nnet::dense_{strategy}<{input_t}, {output_t}, {config}>({input}, {output});' +dense_function_template = 'nnet::dense_{strategy}<{input_t}, {output_t}, {config}>({input}, {output}, {w}, {b});' dense_task_sequence_template = 'task_sequence> {name};' dense_stream_function_template = '{name}.async();' dense_include_list = ['nnet_utils/nnet_dense.h', 'nnet_utils/nnet_dense_stream.h'] @@ -70,8 +70,8 @@ def __init__(self): def format(self, node): params = self._default_function_params(node) - #params['w'] = node.get_weights('weight').name - #params['b'] = node.get_weights('bias').name + params['w'] = node.get_weights('weight').name + params['b'] = node.get_weights('bias').name return self.template.format(**params) @@ -199,7 +199,7 @@ def format(self, node): static constexpr unsigned reuse_factor = {reuse}; }};\n""" -softmax_config_template = """struct {type}_config{index} : nnet::activ_config {{ +softmax_config_template_qkeras = """struct {type}_config{index} : nnet::activ_config {{ static constexpr unsigned n_in = {n_in}; static constexpr unsigned table_size = {table_size}; static constexpr unsigned io_type = nnet::{iotype}; @@ -209,6 +209,26 @@ def format(self, node): typedef {inv_table_t.name} inv_table_t; }};\n""" +softmax_config_template = """struct {type}_config{index} : nnet::activ_config {{ + static const unsigned n_in = {n_in}; + static const unsigned n_slice = {n_slice}; + static const unsigned n_outer = {n_outer}; + static const unsigned n_inner = {n_inner}; + static const unsigned parallelization_factor = {parallelization_factor}; + static const unsigned exp_table_size = {exp_table_size}; + static const unsigned inv_table_size = {inv_table_size}; + static const unsigned io_type = nnet::{iotype}; + static const unsigned reuse_factor = {reuse}; + static const unsigned axis = {axis}; + static const nnet::softmax_implementation implementation = nnet::softmax_implementation::{implementation}; + static constexpr float exp_scale = {exp_scale}; + typedef {exp_table_t.name} exp_table_t; + typedef {inv_table_t.name} inv_table_t; + typedef {accum_t.name} accum_t; + typedef {inv_inp_t.name} inv_inp_t; + typedef {inp_norm_t_str} inp_norm_t; +}};\n""" + activ_function_template = 'nnet::{activation}<{input_t}, {output_t}, {config}>({input}, {output});' param_activ_function_template = 'nnet::{activation}<{input_t}, {output_t}, {config}>({input}, {param}, {output});' @@ -260,10 +280,68 @@ def __init__(self): super(ActivationConfigTemplate, self).__init__(Softmax) # Skip ActivationConfigTemplate's __init__ self.template = softmax_config_template + def format(self, node): + from math import ceil, log2 + + params = self._default_config_params(node) + params['type'] = node.get_attr('activation') + params.setdefault('exp_table_size', params['table_size']) + params.setdefault('inv_table_size', params['table_size']) + params.setdefault('n_inner', 1) + params.setdefault('n_outer', 1) + params.setdefault('exp_scale', 1.0) + params.setdefault('parallelization_factor', -1) + + n_slice = params['n_in'] // params['n_inner'] // params['n_outer'] # type: ignore + params['n_slice'] = n_slice + + if params['accum_t'].name == 'model_default_t': # type: ignore + scale = ceil(log2(n_slice)) + exp_table_t = node.attributes['exp_table_t'].precision + signed, width, integers = exp_table_t.signed, exp_table_t.width, exp_table_t.integer + params['accum_t_str'] = f'ac_{"" if signed else "u"}fixed<{width + scale}, {integers + scale}>' + else: + params['accum_t_str'] = params['accum_t'].name # type: ignore + if params['inv_inp_t'].name == 'model_default_t': # type: ignore + params['inv_inp_t'] = params['exp_table_t'] + + if params['implementation'] == 'stable': + if 'inp_norm_t' not in params: + # Only used in stable (max-normalized) implementation + input_t = node.get_input_variable().type.precision + width, iwidth, signed = input_t.width, input_t.integer, input_t.signed # noqa: F841 + width, iwidth = width - signed, iwidth - signed + if signed: + # Fix table size if too large + exp_table_size = params['inv_table_size'] + params['exp_table_size'] = str(min(int(exp_table_size), 2**width)) + params['inp_norm_t_str'] = f'ac_ufixed<{width}, {iwidth}>' + else: + params['inp_norm_t_str'] = params['inp_norm_t'].name # type: ignore + else: + params['inp_norm_t_str'] = 'ac_fixed<1,0>' + + return self.template.format(**params) + + +class SoftmaxFunctionTemplate(FunctionCallTemplate): + def __init__(self): + super().__init__(Softmax, include_header=activ_include_list) + self.template = activ_function_template + + def format(self, node): + params = self._default_function_params(node) + use_multidim = node.get_attr('n_inner', 1) > 1 or node.get_attr('n_outer', 1) > 1 + use_multidim = use_multidim and node.model.config.get_config_value('IOType') == 'io_parallel' + params['activation'] = 'softmax' if not use_multidim else 'softmax_multidim' + params['config'] = f'softmax_config{node.index}' + + return self.template.format(**params) + class ActivationFunctionTemplate(FunctionCallTemplate): def __init__(self): - super().__init__((Activation, HardActivation, Softmax), include_header=activ_include_list) + super().__init__((Activation, HardActivation), include_header=activ_include_list) self.template = activ_function_template def format(self, node): diff --git a/hls4ml/converters/keras_v3/hgq2/multi_head_attention.py b/hls4ml/converters/keras_v3/hgq2/multi_head_attention.py index 24bd87d3e9..d5c1eda7b9 100644 --- a/hls4ml/converters/keras_v3/hgq2/multi_head_attention.py +++ b/hls4ml/converters/keras_v3/hgq2/multi_head_attention.py @@ -15,7 +15,7 @@ @register class QMultiHeadAttentionHandler(QLayerHandler): - handles = ('hgq.layers.multi_head_attention.QMultiHeadAttention',) + handles = ('hgq.layers.attn.mha.QMultiHeadAttention',) def handle( self, @@ -129,7 +129,7 @@ def _handle(self, layer, tensor_q, tensor_O, node_index, tensor_k, tensor_v): @register class QLinformerAttentionHandler(QMultiHeadAttentionHandler): - handles = ('hgq.layers.linformer_attention.QLinformerAttention',) + handles = ('hgq.layers.attn.linformer.QLinformerAttention',) def handle( self, diff --git a/hls4ml/templates/oneapi/firmware/nnet_utils/nnet_activation.h b/hls4ml/templates/oneapi/firmware/nnet_utils/nnet_activation.h index f118ecb05c..c2353c34a8 100644 --- a/hls4ml/templates/oneapi/firmware/nnet_utils/nnet_activation.h +++ b/hls4ml/templates/oneapi/firmware/nnet_utils/nnet_activation.h @@ -100,15 +100,8 @@ template void sigmoid(const data_ enum class softmax_implementation { latency = 0, legacy = 1, stable = 2, argmax = 3 }; template inline unsigned softmax_stable_idx_from_real_val(const data_T x) { - // Number of address bits for table - static constexpr int N = ceillog2::val; - - // Slice the top N bits of the input - [[intel::fpga_register]] ac_int y = x.template slc(x.width - N - 1); - // If x is the most negative value, the slice will be 0, so we need to set the 0-th bit to ensure correctness - if (x != 0 && y == 0) - y[0] = 1; - return y.to_uint(); + // Extract the lower 'width' bits of x + return x.template slc(0).to_uint(); } template inline unsigned softmax_latency_idx_from_real_val(const data_T x) { @@ -121,7 +114,6 @@ template inline unsigned softmax_latency_idx_f } template void softmax_stable(const data_T &data, res_T &res) { -// Look-up tables #include "activation_tables/exp_table.tb" #include "activation_tables/invert_table.tb" @@ -130,29 +122,34 @@ template void softmax_stable(cons [[intel::fpga_register]] auto x_max = reduce>(data.data(), op_max); - // For the diffs, use the same type as the input but force rounding and saturation - [[intel::fpga_register]] ac_fixed - d_xi_xmax[CONFIG_T::n_in]; + // Normalize inputs: d = x_max - x + [[intel::fpga_register]] typename CONFIG_T::inp_norm_t d_xi_xmax[CONFIG_T::n_in]; #pragma unroll for (unsigned i = 0; i < CONFIG_T::n_in; i++) { - d_xi_xmax[i] = data[i] - x_max; + // HGQ stable: d = x_max - data + d_xi_xmax[i] = x_max - data[i]; } - // Calculate all the e^x's + // Exponentials [[intel::fpga_register]] typename CONFIG_T::exp_table_t exp_res[CONFIG_T::n_in]; #pragma unroll for (unsigned i = 0; i < CONFIG_T::n_in; i++) { - exp_res[i] = exp_table[softmax_stable_idx_from_real_val(d_xi_xmax[i])]; + unsigned idx = softmax_stable_idx_from_real_val(d_xi_xmax[i]); + exp_res[i] = exp_table[idx]; } - // Explicitly sum previously calculated exponentials with an adder tree - Op_add op_add; - [[intel::fpga_register]] typename CONFIG_T::exp_table_t exp_sum = - reduce>(exp_res, op_add); + // Sum of Exponentials + Op_add op_add; + [[intel::fpga_register]] typename CONFIG_T::accum_t exp_sum = + reduce>(exp_res, op_add); - // Multiply previously calculated exponetials with the reciprocal of the sum - [[intel::fpga_register]] typename CONFIG_T::inv_table_t inv_exp_sum = - invert_table[softmax_stable_idx_from_real_val(exp_sum)]; + // Reciprocal of Sum + typename CONFIG_T::inv_inp_t exp_sum_cast = exp_sum; + unsigned inv_idx = softmax_stable_idx_from_real_val(exp_sum_cast); + + [[intel::fpga_register]] typename CONFIG_T::inv_table_t inv_exp_sum = invert_table[inv_idx]; + + // Final Multiplication #pragma unroll for (unsigned i = 0; i < CONFIG_T::n_in; i++) { res[i] = exp_res[i] * inv_exp_sum; @@ -265,6 +262,45 @@ template inline void softmax(cons } } +// ************************************************* +// Multidimensional Softmax +// ************************************************* + +// Helper to remap the config for the core softmax function +template struct softmax_multidim_slice_config : CONFIG_T { + static constexpr unsigned n_in = CONFIG_T::n_slice; +}; + +template inline void softmax_multidim(const data_T &data, res_T &res) { + using buffer_data_t = std::array; + using buffer_res_t = std::array; + using slice_config = softmax_multidim_slice_config; + + #pragma unroll + for (unsigned i = 0; i < CONFIG_T::n_outer; i++) { + #pragma unroll + for (unsigned k = 0; k < CONFIG_T::n_inner; k++) { + + [[intel::fpga_register]] buffer_data_t buffer_in; + [[intel::fpga_register]] buffer_res_t buffer_out; + + // Gather Phase + #pragma unroll + for (unsigned j = 0; j < CONFIG_T::n_slice; j++) { + unsigned idx = (i * CONFIG_T::n_slice * CONFIG_T::n_inner) + (j * CONFIG_T::n_inner) + k; + buffer_in[j] = data[idx]; + } + + nnet::softmax(buffer_in, buffer_out); + + #pragma unroll + for (unsigned j = 0; j < CONFIG_T::n_slice; j++) { + unsigned idx = (i * CONFIG_T::n_slice * CONFIG_T::n_inner) + (j * CONFIG_T::n_inner) + k; + res[idx] = buffer_out[j]; + } + } + } +} // ************************************************* // TanH Activation // ************************************************* diff --git a/hls4ml/templates/oneapi/firmware/nnet_utils/nnet_dense.h b/hls4ml/templates/oneapi/firmware/nnet_utils/nnet_dense.h index 2b65eef42b..dc76189083 100644 --- a/hls4ml/templates/oneapi/firmware/nnet_utils/nnet_dense.h +++ b/hls4ml/templates/oneapi/firmware/nnet_utils/nnet_dense.h @@ -152,11 +152,12 @@ void dense_rf_lt(const data_T &data, res_T &res, const typename CONFIG_T::weight } } template -void dense_resource(const data_T &data, res_T &res) { +void dense_resource(const data_T &data, res_T &res, const typename CONFIG_T::weight_t &weights, + const typename CONFIG_T::bias_t &biases) { if (CONFIG_T::reuse_factor <= CONFIG_T::n_in) { - dense_rf_lt(data, res, CONFIG_T::weights, CONFIG_T::biases); + dense_rf_lt(data, res, weights, biases); } else { - dense_rf_gt(data, res, CONFIG_T::weights, CONFIG_T::biases); + dense_rf_gt(data, res, weights, biases); } } } // namespace nnet diff --git a/hls4ml/writer/oneapi_writer.py b/hls4ml/writer/oneapi_writer.py index b42ff2990f..007b645cb0 100644 --- a/hls4ml/writer/oneapi_writer.py +++ b/hls4ml/writer/oneapi_writer.py @@ -242,13 +242,13 @@ def write_project_header(self, model): for out in model_outputs: newline += out.declare_cpp() - # Insert weights + # Insert weights elif '// hls-fpga-machine-learning insert weights' in line: newline = line for layer in model.get_layers(): for w in layer.get_weights(): - #if w not in model_brams: - newline += f'#include "weights/{w.name}.h"\n' + # if w not in model_brams: + newline += f'#include "weights/{w.name}.h"\n' # Simply copy line, if no inserts are required else: @@ -557,16 +557,16 @@ def write_nnet_utils(self, model): dstpath = f'{model.config.get_output_dir()}/src/firmware/{dst}' copyfile(srcpath, dstpath) - def __get_table_size(self, model, activation): + def __get_table_size(self, model, activation, table_name='table_size'): for layer in model.get_layers(): if ( layer.get_attr('activation') == activation or layer.get_attr('recurrent_activation') == activation - ) and layer.get_attr('table_size') is not None: - return int(layer.get_attr('table_size')) + ) and layer.get_attr(table_name) is not None: + return int(layer.get_attr(table_name)) return 1024 - def __get_table_header(self, table_name, table_size): - table_header = f'static const typename CONFIG_T::table_t {table_name}[{table_size}] = {{' + def __get_table_header(self, table_name, table_size, table_type='table_t'): + table_header = f'static const typename CONFIG_T::{table_type} {table_name}[{table_size}] = {{' return table_header def __write_elu_table(self, model, path): @@ -695,46 +695,58 @@ def __write_selu_table(self, model, path): h_file.write('};\n') h_file.close() + def __get_table_precision(self, model, activation, table_name='table_precision'): + for layer in model.get_layers(): + if layer.get_attr('activation') == activation and layer.get_attr(table_name) is not None: + precision = layer.get_attr(table_name) + return precision.precision + + return None # fp_bits, fp_integer, fp_signed + def __write_exp_table(self, model, path): table_name = 'exp_table' - table_size = self.__get_table_size(model, 'softmax') + table_size = self.__get_table_size(model, 'softmax', table_name='exp_table_size') h_file = open(f'{path}/{table_name}.tb', 'w') - h_file.write(self.__get_table_header(table_name, table_size)) + h_file.write(self.__get_table_header(table_name, table_size, table_type='exp_table_t')) # Default fixed point precision # 6 bits for integer part, 10 bits for decimal - total, 16 - fp_bits = 16 - fp_integer = 6 - fp_signed = True + precision = self.__get_table_precision(model, 'softmax', table_name='inp_norm_t') + + if precision is None: + fp_bits = 16 + fp_integer = 6 + fp_signed = True + + for layer in model.get_layers(): + if layer.name == 'softmax': + ac_type = layer.get_input_variable().type + if ac_type is not None: + try: + fp_bits = ac_type.precision.integer + ac_type.precision.fractional + fp_integer = ac_type.precision.integer + fp_signed = ac_type.precision.signed + except Exception: + # FixedPrecisionType wasn't correctly stored in layer attributes, use default values + pass + if fp_signed is False: + raise Exception('Softmax types need to be signed') - # Exp table should use the same precision as exp_table, as seen in Vivado code - # init_exp_table(exp_table); - for layer in model.get_layers(): - if layer.name == 'softmax': - ac_type = layer.get_input_variable().type - if ac_type is not None: - try: - fp_bits = ac_type.precision.integer + ac_type.precision.fractional - fp_integer = ac_type.precision.integer - fp_signed = ac_type.precision.signed - except Exception: - # FixedPrecisionType wasn't correctly stored in layer attributes, use default values - pass - if fp_signed is False: - raise Exception('Softmax types need to be signed') + else: + fp_bits = precision.width + fp_integer = precision.integer + fp_signed = precision.signed + f_bits = fp_bits - fp_integer sep = '' - N = ceil_log2(table_size) for i in range(table_size): - f = FixedPointEmulator(fp_bits, fp_integer, signed=fp_signed) - b = uint_to_binary(i, N) - if i == 0: - b.insert(0, 0) - else: - b.insert(0, 1) - f.set_msb_bits(b) - real_val = f.exp_float() + # Index represents the raw bit pattern of the input + real_val_in = i * (2.0 ** (-f_bits)) + + # Calculate exp(-x) for the stable implementation + real_val = np.exp(-real_val_in) + h_file.write(sep + str(real_val)) sep = ', ' @@ -743,41 +755,50 @@ def __write_exp_table(self, model, path): def __write_invert_table(self, model, path): table_name = 'invert_table' - table_size = self.__get_table_size(model, 'softmax') + table_size = self.__get_table_size(model, 'softmax', table_name='inv_table_size') h_file = open(f'{path}/{table_name}.tb', 'w') - h_file.write(self.__get_table_header(table_name, table_size)) - + h_file.write(self.__get_table_header(table_name, table_size, table_type='inv_table_t')) # Default fixed point precision, in case values from layer attributes cannot be extracted # 8 bits for integer part, 10 bits for decimal - total, 18 - fp_bits = 18 - fp_integer = 8 - fp_signed = True - # Invert table should use the same precision as exp_table, as seen in Vivado code - # init_invert_table(invert_table); - for layer in model.get_layers(): - if layer.name == 'softmax': - ac_type = layer.get_attr('exp_table_t') - if ac_type is not None: - try: - fp_bits = ac_type.precision.integer + ac_type.precision.fractional - fp_integer = ac_type.precision.integer - fp_signed = ac_type.precision.signed - except Exception: - # FixedPrecisionType wasn't correctly stored in layer attributes, use default values - pass - if fp_signed is False: - raise Exception('Softmax types need to be signed') + precision = self.__get_table_precision(model, 'softmax', table_name='inv_inp_t') + + if precision is None: + fp_bits = 18 + fp_integer = 8 + fp_signed = True + + for layer in model.get_layers(): + if layer.name == 'softmax': + ac_type = layer.get_attr('exp_table_t') + if ac_type is not None: + try: + fp_bits = ac_type.precision.integer + ac_type.precision.fractional + fp_integer = ac_type.precision.integer + fp_signed = ac_type.precision.signed + except Exception: + # FixedPrecisionType wasn't correctly stored in layer attributes, use default values + pass + if fp_signed is False: + raise Exception('Softmax types need to be signed') + + else: + fp_bits = precision.width + fp_integer = precision.integer + fp_signed = precision.signed + f_bits = fp_bits - fp_integer sep = '' - N = ceil_log2(table_size) for i in range(table_size): - f = FixedPointEmulator(fp_bits, fp_integer, signed=fp_signed) - b = uint_to_binary(i, N) - b.insert(0, 0) - f.set_msb_bits(b) - real_val = f.inv_float() + # Index represents the raw bit pattern of the input + real_val_in = i * (2.0 ** (-f_bits)) + + if real_val_in == 0: + real_val = 999.0 + else: + real_val = 1.0 / real_val_in + h_file.write(sep + str(real_val)) sep = ', ' From dbb207b7a5c1f343d8100bba9645340a2098730c Mon Sep 17 00:00:00 2001 From: laurilaatu Date: Mon, 9 Feb 2026 16:33:38 +0000 Subject: [PATCH 04/12] Original weight implementation --- .../backends/oneapi/passes/core_templates.py | 91 +------------------ 1 file changed, 3 insertions(+), 88 deletions(-) diff --git a/hls4ml/backends/oneapi/passes/core_templates.py b/hls4ml/backends/oneapi/passes/core_templates.py index 5a2d765e8f..9602b2d0fc 100644 --- a/hls4ml/backends/oneapi/passes/core_templates.py +++ b/hls4ml/backends/oneapi/passes/core_templates.py @@ -6,7 +6,6 @@ # Dense templates dense_config_template = """struct config{index} : nnet::dense_config {{ - static constexpr unsigned n_in = {n_in}; static constexpr unsigned n_out = {n_out}; static constexpr unsigned io_type = nnet::{iotype}; @@ -31,16 +30,13 @@ typedef {weight_t.name} weight_t; typedef {index_t.name} index_t; - static constexpr weight_t weights = {weights}; - static constexpr bias_t biases = {biases}; - template using product = nnet::product::{product_type}; }};\n""" dense_function_template = 'nnet::dense_{strategy}<{input_t}, {output_t}, {config}>({input}, {output}, {w}, {b});' dense_task_sequence_template = 'task_sequence> {name};' -dense_stream_function_template = '{name}.async();' +dense_stream_function_template = '{name}.async({w}, {b});' dense_include_list = ['nnet_utils/nnet_dense.h', 'nnet_utils/nnet_dense_stream.h'] @@ -57,9 +53,6 @@ def format(self, node): node.get_input_variable().type.precision, node.get_weights('weight').type.precision ) - params['weights'] = node.get_weights('weight').name - params['biases'] = node.get_weights('bias').name - return self.template.format(**params) @@ -199,7 +192,7 @@ def format(self, node): static constexpr unsigned reuse_factor = {reuse}; }};\n""" -softmax_config_template_qkeras = """struct {type}_config{index} : nnet::activ_config {{ +softmax_config_template = """struct {type}_config{index} : nnet::activ_config {{ static constexpr unsigned n_in = {n_in}; static constexpr unsigned table_size = {table_size}; static constexpr unsigned io_type = nnet::{iotype}; @@ -209,26 +202,6 @@ def format(self, node): typedef {inv_table_t.name} inv_table_t; }};\n""" -softmax_config_template = """struct {type}_config{index} : nnet::activ_config {{ - static const unsigned n_in = {n_in}; - static const unsigned n_slice = {n_slice}; - static const unsigned n_outer = {n_outer}; - static const unsigned n_inner = {n_inner}; - static const unsigned parallelization_factor = {parallelization_factor}; - static const unsigned exp_table_size = {exp_table_size}; - static const unsigned inv_table_size = {inv_table_size}; - static const unsigned io_type = nnet::{iotype}; - static const unsigned reuse_factor = {reuse}; - static const unsigned axis = {axis}; - static const nnet::softmax_implementation implementation = nnet::softmax_implementation::{implementation}; - static constexpr float exp_scale = {exp_scale}; - typedef {exp_table_t.name} exp_table_t; - typedef {inv_table_t.name} inv_table_t; - typedef {accum_t.name} accum_t; - typedef {inv_inp_t.name} inv_inp_t; - typedef {inp_norm_t_str} inp_norm_t; -}};\n""" - activ_function_template = 'nnet::{activation}<{input_t}, {output_t}, {config}>({input}, {output});' param_activ_function_template = 'nnet::{activation}<{input_t}, {output_t}, {config}>({input}, {param}, {output});' @@ -280,68 +253,10 @@ def __init__(self): super(ActivationConfigTemplate, self).__init__(Softmax) # Skip ActivationConfigTemplate's __init__ self.template = softmax_config_template - def format(self, node): - from math import ceil, log2 - - params = self._default_config_params(node) - params['type'] = node.get_attr('activation') - params.setdefault('exp_table_size', params['table_size']) - params.setdefault('inv_table_size', params['table_size']) - params.setdefault('n_inner', 1) - params.setdefault('n_outer', 1) - params.setdefault('exp_scale', 1.0) - params.setdefault('parallelization_factor', -1) - - n_slice = params['n_in'] // params['n_inner'] // params['n_outer'] # type: ignore - params['n_slice'] = n_slice - - if params['accum_t'].name == 'model_default_t': # type: ignore - scale = ceil(log2(n_slice)) - exp_table_t = node.attributes['exp_table_t'].precision - signed, width, integers = exp_table_t.signed, exp_table_t.width, exp_table_t.integer - params['accum_t_str'] = f'ac_{"" if signed else "u"}fixed<{width + scale}, {integers + scale}>' - else: - params['accum_t_str'] = params['accum_t'].name # type: ignore - if params['inv_inp_t'].name == 'model_default_t': # type: ignore - params['inv_inp_t'] = params['exp_table_t'] - - if params['implementation'] == 'stable': - if 'inp_norm_t' not in params: - # Only used in stable (max-normalized) implementation - input_t = node.get_input_variable().type.precision - width, iwidth, signed = input_t.width, input_t.integer, input_t.signed # noqa: F841 - width, iwidth = width - signed, iwidth - signed - if signed: - # Fix table size if too large - exp_table_size = params['inv_table_size'] - params['exp_table_size'] = str(min(int(exp_table_size), 2**width)) - params['inp_norm_t_str'] = f'ac_ufixed<{width}, {iwidth}>' - else: - params['inp_norm_t_str'] = params['inp_norm_t'].name # type: ignore - else: - params['inp_norm_t_str'] = 'ac_fixed<1,0>' - - return self.template.format(**params) - - -class SoftmaxFunctionTemplate(FunctionCallTemplate): - def __init__(self): - super().__init__(Softmax, include_header=activ_include_list) - self.template = activ_function_template - - def format(self, node): - params = self._default_function_params(node) - use_multidim = node.get_attr('n_inner', 1) > 1 or node.get_attr('n_outer', 1) > 1 - use_multidim = use_multidim and node.model.config.get_config_value('IOType') == 'io_parallel' - params['activation'] = 'softmax' if not use_multidim else 'softmax_multidim' - params['config'] = f'softmax_config{node.index}' - - return self.template.format(**params) - class ActivationFunctionTemplate(FunctionCallTemplate): def __init__(self): - super().__init__((Activation, HardActivation), include_header=activ_include_list) + super().__init__((Activation, HardActivation, Softmax), include_header=activ_include_list) self.template = activ_function_template def format(self, node): From 51efff0c34744ab2fa70d7e3a52fdbf196ffcf0a Mon Sep 17 00:00:00 2001 From: laurilaatu Date: Mon, 9 Feb 2026 16:51:19 +0000 Subject: [PATCH 05/12] Restore oneAPI weight placement --- hls4ml/templates/oneapi/firmware/myproject.cpp | 5 +---- hls4ml/templates/oneapi/firmware/myproject.h | 3 --- hls4ml/writer/oneapi_writer.py | 7 ------- 3 files changed, 1 insertion(+), 14 deletions(-) diff --git a/hls4ml/templates/oneapi/firmware/myproject.cpp b/hls4ml/templates/oneapi/firmware/myproject.cpp index da9439f74a..06e7d3fe37 100644 --- a/hls4ml/templates/oneapi/firmware/myproject.cpp +++ b/hls4ml/templates/oneapi/firmware/myproject.cpp @@ -1,12 +1,9 @@ #include "myproject.h" +#include "parameters.h" #include // hls-fpga-machine-learning insert weights - -#include "parameters.h" - - // The inter-task pipes need to be declared in the global scope // hls-fpga-machine-learning insert inter-task pipes diff --git a/hls4ml/templates/oneapi/firmware/myproject.h b/hls4ml/templates/oneapi/firmware/myproject.h index 8f313ea30f..082ae5dc8c 100644 --- a/hls4ml/templates/oneapi/firmware/myproject.h +++ b/hls4ml/templates/oneapi/firmware/myproject.h @@ -3,9 +3,6 @@ #include "defines.h" -// hls-fpga-machine-learning insert weights - - // This file defines the interface to the kernel // currently this is fixed diff --git a/hls4ml/writer/oneapi_writer.py b/hls4ml/writer/oneapi_writer.py index 007b645cb0..8ef2b0b0a1 100644 --- a/hls4ml/writer/oneapi_writer.py +++ b/hls4ml/writer/oneapi_writer.py @@ -242,13 +242,6 @@ def write_project_header(self, model): for out in model_outputs: newline += out.declare_cpp() - # Insert weights - elif '// hls-fpga-machine-learning insert weights' in line: - newline = line - for layer in model.get_layers(): - for w in layer.get_weights(): - # if w not in model_brams: - newline += f'#include "weights/{w.name}.h"\n' # Simply copy line, if no inserts are required else: From 6067bea99e35fd0bb3b2d89323e721e3916b0960 Mon Sep 17 00:00:00 2001 From: laurilaatu Date: Mon, 9 Feb 2026 16:52:42 +0000 Subject: [PATCH 06/12] pre-commit --- hls4ml/writer/oneapi_writer.py | 1 - 1 file changed, 1 deletion(-) diff --git a/hls4ml/writer/oneapi_writer.py b/hls4ml/writer/oneapi_writer.py index 8ef2b0b0a1..b945f3faf9 100644 --- a/hls4ml/writer/oneapi_writer.py +++ b/hls4ml/writer/oneapi_writer.py @@ -242,7 +242,6 @@ def write_project_header(self, model): for out in model_outputs: newline += out.declare_cpp() - # Simply copy line, if no inserts are required else: newline = line From 16ca197d57e0d72485265cf25e170ee7fc576280 Mon Sep 17 00:00:00 2001 From: laurilaatu Date: Tue, 24 Feb 2026 16:40:32 +0000 Subject: [PATCH 07/12] softmax multidim templates --- .../backends/oneapi/passes/core_templates.py | 74 ++++++++++++++++--- 1 file changed, 64 insertions(+), 10 deletions(-) diff --git a/hls4ml/backends/oneapi/passes/core_templates.py b/hls4ml/backends/oneapi/passes/core_templates.py index 9602b2d0fc..8205fb1e17 100644 --- a/hls4ml/backends/oneapi/passes/core_templates.py +++ b/hls4ml/backends/oneapi/passes/core_templates.py @@ -1,3 +1,5 @@ +from math import ceil, log2 + from hls4ml.backends.backend import get_backend from hls4ml.backends.oneapi.oneapi_template import StreamFunctionCallTemplate, TaskSequenceTemplate from hls4ml.backends.template import FunctionCallTemplate, LayerConfigTemplate @@ -192,14 +194,24 @@ def format(self, node): static constexpr unsigned reuse_factor = {reuse}; }};\n""" + softmax_config_template = """struct {type}_config{index} : nnet::activ_config {{ - static constexpr unsigned n_in = {n_in}; - static constexpr unsigned table_size = {table_size}; - static constexpr unsigned io_type = nnet::{iotype}; - static constexpr unsigned reuse_factor = {reuse}; - static constexpr nnet::softmax_implementation implementation = nnet::softmax_implementation::{implementation}; + static const unsigned n_in = {n_in}; + static const unsigned n_slice = {n_slice}; + static const unsigned n_outer = {n_outer}; + static const unsigned n_inner = {n_inner}; + static const unsigned parallelization_factor = {parallelization_factor}; + static const unsigned exp_table_size = {exp_table_size}; + static const unsigned inv_table_size = {inv_table_size}; + static const unsigned io_type = nnet::{iotype}; + static const unsigned reuse_factor = {reuse}; + static const unsigned axis = {axis}; + static const nnet::softmax_implementation implementation = nnet::softmax_implementation::{implementation}; + static constexpr float exp_scale = {exp_scale}; typedef {exp_table_t.name} exp_table_t; typedef {inv_table_t.name} inv_table_t; + //typedef {accum_t.name} accum_t; + //typedef {inp_norm_t_str} inp_norm_t; }};\n""" activ_function_template = 'nnet::{activation}<{input_t}, {output_t}, {config}>({input}, {output});' @@ -253,6 +265,48 @@ def __init__(self): super(ActivationConfigTemplate, self).__init__(Softmax) # Skip ActivationConfigTemplate's __init__ self.template = softmax_config_template + def format(self, node): + params = self._default_config_params(node) + params['type'] = node.get_attr('activation') + params.setdefault('exp_table_size', params['table_size']) + params.setdefault('inv_table_size', params['table_size']) + params.setdefault('n_inner', 1) + params.setdefault('n_outer', 1) + params.setdefault('exp_scale', 1.0) + params.setdefault('parallelization_factor', -1) + + n_slice = params['n_in'] // params['n_inner'] // params['n_outer'] # type: ignore + params['n_slice'] = n_slice + + if params['accum_t'].name == 'model_default_t': # type: ignore + scale = ceil(log2(n_slice)) + exp_table_t = node.attributes['exp_table_t'].precision + signed, width, integers = exp_table_t.signed, exp_table_t.width, exp_table_t.integer + params['accum_t_str'] = f'ac_fixed<{width + scale}, {integers + scale}, {"true" if signed else "false"}>' + else: + params['accum_t_str'] = params['accum_t'].name # type: ignore + if params['inv_inp_t'].name == 'model_default_t': # type: ignore + params['inv_inp_t'] = params['exp_table_t'] + + if params['implementation'] == 'stable': + if 'inp_norm_t' not in params: + # Only used in stable (max-normalized) implementation + input_t = node.get_input_variable().type.precision + width, iwidth, signed = input_t.width, input_t.integer, input_t.signed # noqa: F841 + width, iwidth = width - signed, iwidth - signed + if signed: + # Fix table size if too large + exp_table_size = params['inv_table_size'] + params['exp_table_size'] = str(min(int(exp_table_size), 2**width)) + params['inp_norm_t_str'] = f'ac_fixed<{width}, {iwidth}, false>' + else: + params['inp_norm_t_str'] = params['inp_norm_t'].name # type: ignore + else: + params['inp_norm_t_str'] = 'ac_fixed<2,0>' + + return self.template.format(**params) + + class ActivationFunctionTemplate(FunctionCallTemplate): def __init__(self): @@ -262,7 +316,7 @@ def __init__(self): def format(self, node): params = self._default_function_params(node) params['activation'] = node.get_attr('activation').lower() - params['config'] = f'{node.get_attr("activation")}_config{node.index}' + params['config'] = f"{node.get_attr('activation')}_config{node.index}" return self.template.format(**params) @@ -276,7 +330,7 @@ def format(self, node): params = self._default_function_params(node) params['activation'] = node._get_act_function_name() params['param'] = node.get_attr('activ_param', 1.0) - params['config'] = f'{node.get_attr("activation")}_config{node.index}' + params['config'] = f"{node.get_attr('activation')}_config{node.index}" return self.template.format(**params) @@ -290,7 +344,7 @@ def format(self, node): params = self._default_function_params(node) params['activation'] = node.get_attr('activation').lower() params['param'] = node.get_weights('param').name - params['config'] = f'{node.get_attr("activation")}_config{node.index}' + params['config'] = f"{node.get_attr('activation')}_config{node.index}" return self.template.format(**params) @@ -303,7 +357,7 @@ def __init__(self): def format(self, node): params = self._default_function_params(node) params['activation'] = node.get_attr('activation').lower() - params['config'] = f'{node.get_attr("activation")}_config{node.index}' + params['config'] = f"{node.get_attr('activation')}_config{node.index}" return self.template.format(**params) @@ -315,7 +369,7 @@ def __init__(self): def format(self, node): params = self._default_function_params(node) params['activation'] = node._get_act_function_name() - params['config'] = f'{node.get_attr("activation")}_config{node.index}' + params['config'] = f"{node.get_attr('activation')}_config{node.index}" return self.template.format(**params) From 974e75a3962de9afa9b832d9c6d97edf85659a4f Mon Sep 17 00:00:00 2001 From: laurilaatu Date: Tue, 24 Feb 2026 16:43:13 +0000 Subject: [PATCH 08/12] pre-commit --- hls4ml/backends/oneapi/passes/core_templates.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/hls4ml/backends/oneapi/passes/core_templates.py b/hls4ml/backends/oneapi/passes/core_templates.py index 8205fb1e17..4fae515efc 100644 --- a/hls4ml/backends/oneapi/passes/core_templates.py +++ b/hls4ml/backends/oneapi/passes/core_templates.py @@ -307,7 +307,6 @@ def format(self, node): return self.template.format(**params) - class ActivationFunctionTemplate(FunctionCallTemplate): def __init__(self): super().__init__((Activation, HardActivation, Softmax), include_header=activ_include_list) @@ -316,7 +315,7 @@ def __init__(self): def format(self, node): params = self._default_function_params(node) params['activation'] = node.get_attr('activation').lower() - params['config'] = f"{node.get_attr('activation')}_config{node.index}" + params['config'] = f'{node.get_attr("activation")}_config{node.index}' return self.template.format(**params) @@ -330,7 +329,7 @@ def format(self, node): params = self._default_function_params(node) params['activation'] = node._get_act_function_name() params['param'] = node.get_attr('activ_param', 1.0) - params['config'] = f"{node.get_attr('activation')}_config{node.index}" + params['config'] = f'{node.get_attr("activation")}_config{node.index}' return self.template.format(**params) @@ -344,7 +343,7 @@ def format(self, node): params = self._default_function_params(node) params['activation'] = node.get_attr('activation').lower() params['param'] = node.get_weights('param').name - params['config'] = f"{node.get_attr('activation')}_config{node.index}" + params['config'] = f'{node.get_attr("activation")}_config{node.index}' return self.template.format(**params) @@ -357,7 +356,7 @@ def __init__(self): def format(self, node): params = self._default_function_params(node) params['activation'] = node.get_attr('activation').lower() - params['config'] = f"{node.get_attr('activation')}_config{node.index}" + params['config'] = f'{node.get_attr("activation")}_config{node.index}' return self.template.format(**params) @@ -369,7 +368,7 @@ def __init__(self): def format(self, node): params = self._default_function_params(node) params['activation'] = node._get_act_function_name() - params['config'] = f"{node.get_attr('activation')}_config{node.index}" + params['config'] = f'{node.get_attr("activation")}_config{node.index}' return self.template.format(**params) From 060c398933705518e10007b80a1d5dd4bc1a5b10 Mon Sep 17 00:00:00 2001 From: laurilaatu Date: Tue, 24 Feb 2026 17:07:48 +0000 Subject: [PATCH 09/12] uncomment --- hls4ml/backends/oneapi/passes/core_templates.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hls4ml/backends/oneapi/passes/core_templates.py b/hls4ml/backends/oneapi/passes/core_templates.py index 4fae515efc..21dcc69490 100644 --- a/hls4ml/backends/oneapi/passes/core_templates.py +++ b/hls4ml/backends/oneapi/passes/core_templates.py @@ -210,8 +210,8 @@ def format(self, node): static constexpr float exp_scale = {exp_scale}; typedef {exp_table_t.name} exp_table_t; typedef {inv_table_t.name} inv_table_t; - //typedef {accum_t.name} accum_t; - //typedef {inp_norm_t_str} inp_norm_t; + typedef {accum_t.name} accum_t; + typedef {inp_norm_t_str} inp_norm_t; }};\n""" activ_function_template = 'nnet::{activation}<{input_t}, {output_t}, {config}>({input}, {output});' From 772b93ad31d680e2df79ee756dbef06a5e783284 Mon Sep 17 00:00:00 2001 From: laurilaatu Date: Wed, 25 Feb 2026 17:16:51 +0000 Subject: [PATCH 10/12] int_inp_t to config --- hls4ml/backends/oneapi/passes/core_templates.py | 1 + 1 file changed, 1 insertion(+) diff --git a/hls4ml/backends/oneapi/passes/core_templates.py b/hls4ml/backends/oneapi/passes/core_templates.py index 21dcc69490..f8d1b573d5 100644 --- a/hls4ml/backends/oneapi/passes/core_templates.py +++ b/hls4ml/backends/oneapi/passes/core_templates.py @@ -211,6 +211,7 @@ def format(self, node): typedef {exp_table_t.name} exp_table_t; typedef {inv_table_t.name} inv_table_t; typedef {accum_t.name} accum_t; + typedef {inv_inp_t.name} inv_inp_t; typedef {inp_norm_t_str} inp_norm_t; }};\n""" From c3a45848a04dd98c0124b7ec8ae33545ab28e680 Mon Sep 17 00:00:00 2001 From: bugracyln Date: Mon, 13 Apr 2026 02:26:17 +0100 Subject: [PATCH 11/12] softmax fixed --- .../backends/oneapi/passes/core_templates.py | 132 ++++++++------- .../firmware/nnet_utils/nnet_activation.h | 50 +++--- .../nnet_utils/nnet_activation_stream.h | 57 ++++--- hls4ml/templates/oneapi/firmware/parameters.h | 2 + hls4ml/writer/oneapi_writer.py | 156 ++++++++++-------- 5 files changed, 220 insertions(+), 177 deletions(-) diff --git a/hls4ml/backends/oneapi/passes/core_templates.py b/hls4ml/backends/oneapi/passes/core_templates.py index f8d1b573d5..c6050dfb57 100644 --- a/hls4ml/backends/oneapi/passes/core_templates.py +++ b/hls4ml/backends/oneapi/passes/core_templates.py @@ -1,9 +1,10 @@ -from math import ceil, log2 - from hls4ml.backends.backend import get_backend from hls4ml.backends.oneapi.oneapi_template import StreamFunctionCallTemplate, TaskSequenceTemplate from hls4ml.backends.template import FunctionCallTemplate, LayerConfigTemplate +from hls4ml.model.types import FixedPrecisionType, RoundingMode, SaturationMode from hls4ml.model.layers import Activation, BatchNormalization, Dense, HardActivation, ParametrizedActivation, PReLU, Softmax +from hls4ml.utils.fixed_point_utils import FixedPointEmulator, ceil_log2, uint_to_binary +import numpy as np # Dense templates @@ -194,25 +195,28 @@ def format(self, node): static constexpr unsigned reuse_factor = {reuse}; }};\n""" - softmax_config_template = """struct {type}_config{index} : nnet::activ_config {{ - static const unsigned n_in = {n_in}; - static const unsigned n_slice = {n_slice}; - static const unsigned n_outer = {n_outer}; - static const unsigned n_inner = {n_inner}; - static const unsigned parallelization_factor = {parallelization_factor}; - static const unsigned exp_table_size = {exp_table_size}; - static const unsigned inv_table_size = {inv_table_size}; - static const unsigned io_type = nnet::{iotype}; - static const unsigned reuse_factor = {reuse}; - static const unsigned axis = {axis}; - static const nnet::softmax_implementation implementation = nnet::softmax_implementation::{implementation}; - static constexpr float exp_scale = {exp_scale}; + static constexpr unsigned n_in = {n_in}; + static constexpr unsigned exp_table_size = {exp_table_size}; + static constexpr unsigned inv_table_size = {inv_table_size}; + static constexpr unsigned io_type = nnet::{iotype}; + static constexpr unsigned reuse_factor = {reuse}; + static constexpr nnet::softmax_implementation implementation = nnet::softmax_implementation::{implementation}; typedef {exp_table_t.name} exp_table_t; - typedef {inv_table_t.name} inv_table_t; - typedef {accum_t.name} accum_t; + typedef {inv_table_t.name} inv_table_t;""" + +softmax_config_table_template = """ + + static constexpr const exp_table_t *exp_table = &{exp_table_name}[0]; + static constexpr const inv_table_t *invert_table = &{inv_table_name}[0]; +}};\n""" + +softmax_config_table_template_stable = """ typedef {inv_inp_t.name} inv_inp_t; - typedef {inp_norm_t_str} inp_norm_t; + typedef {inp_norm_t.name} inp_norm_t; + + static constexpr const exp_table_t *exp_table = &{exp_table_name}[0]; + static constexpr const inv_table_t *invert_table = &{inv_table_name}[0]; }};\n""" activ_function_template = 'nnet::{activation}<{input_t}, {output_t}, {config}>({input}, {output});' @@ -233,7 +237,58 @@ def __init__(self): def format(self, node): params = self._default_config_params(node) params['type'] = node.get_attr('activation') + + if params['type'] == 'softmax': + + if 'exp_table_size' in params: + params['exp_table_size'] //= 2 + else: + params['exp_table_size'] = 1024 + params['exp_table_t'].precision.width = ceil_log2(params['exp_table_size']) + params['exp_table_t'].precision.integer = 3 + params['exp_table_t'].precision.signed = False + + if 'inp_norm_t' not in params: + input_t = node.get_input_variable().type.precision + width, iwidth, signed = input_t.width, input_t.integer, input_t.signed # noqa: F841 + width, iwidth = width - signed, iwidth - signed + import copy + params['inp_norm_t'] = copy.deepcopy(params['exp_table_t']) #assign type,later override + + #this checks if table sizes will be default, if it is just use the table size to derive precision + if 'inv_table_size' not in params: + params['inp_norm_t'].precision.width = params['exp_table_t'].precision.width + 1 + params['inp_norm_t'].precision.integer = params['exp_table_t'].precision.integer + 1 + params['inp_norm_t'].precision.signed = True + params['inp_norm_t'].name = f'{node.name}_inp_norm_t' + else: + params['inp_norm_t'].name = f'ac_fixed<{width},{iwidth},{'true' if signed else 'false'},AC_RND,AC_SAT_SYM>' + + node.set_attr('inp_norm_t', params['inp_norm_t']) + + if 'inv_table_size' in params: + params['inv_table_size'] //= 2 + else: + params['inv_table_size'] = 1024 + + params['inv_table_t'].precision.width = ceil_log2(params['inv_table_size']) + params['inv_table_t'].precision.integer = 3 + params['inv_table_t'].precision.signed = False + + params['inv_inp_t'].precision.width = params['inv_table_t'].precision.width + 1 + params['inv_inp_t'].precision.integer = params['inv_table_t'].precision.integer + 1 + params['inv_inp_t'].precision.signed = True + + + if params['implementation'] == 'stable': + self.template += softmax_config_table_template_stable + else: + self.template += softmax_config_table_template + + params['exp_table_name'] = node.name + '_exp_table' + params['inv_table_name'] = node.name + '_inv_table' + return self.template.format(**params) @@ -266,47 +321,6 @@ def __init__(self): super(ActivationConfigTemplate, self).__init__(Softmax) # Skip ActivationConfigTemplate's __init__ self.template = softmax_config_template - def format(self, node): - params = self._default_config_params(node) - params['type'] = node.get_attr('activation') - params.setdefault('exp_table_size', params['table_size']) - params.setdefault('inv_table_size', params['table_size']) - params.setdefault('n_inner', 1) - params.setdefault('n_outer', 1) - params.setdefault('exp_scale', 1.0) - params.setdefault('parallelization_factor', -1) - - n_slice = params['n_in'] // params['n_inner'] // params['n_outer'] # type: ignore - params['n_slice'] = n_slice - - if params['accum_t'].name == 'model_default_t': # type: ignore - scale = ceil(log2(n_slice)) - exp_table_t = node.attributes['exp_table_t'].precision - signed, width, integers = exp_table_t.signed, exp_table_t.width, exp_table_t.integer - params['accum_t_str'] = f'ac_fixed<{width + scale}, {integers + scale}, {"true" if signed else "false"}>' - else: - params['accum_t_str'] = params['accum_t'].name # type: ignore - if params['inv_inp_t'].name == 'model_default_t': # type: ignore - params['inv_inp_t'] = params['exp_table_t'] - - if params['implementation'] == 'stable': - if 'inp_norm_t' not in params: - # Only used in stable (max-normalized) implementation - input_t = node.get_input_variable().type.precision - width, iwidth, signed = input_t.width, input_t.integer, input_t.signed # noqa: F841 - width, iwidth = width - signed, iwidth - signed - if signed: - # Fix table size if too large - exp_table_size = params['inv_table_size'] - params['exp_table_size'] = str(min(int(exp_table_size), 2**width)) - params['inp_norm_t_str'] = f'ac_fixed<{width}, {iwidth}, false>' - else: - params['inp_norm_t_str'] = params['inp_norm_t'].name # type: ignore - else: - params['inp_norm_t_str'] = 'ac_fixed<2,0>' - - return self.template.format(**params) - class ActivationFunctionTemplate(FunctionCallTemplate): def __init__(self): diff --git a/hls4ml/templates/oneapi/firmware/nnet_utils/nnet_activation.h b/hls4ml/templates/oneapi/firmware/nnet_utils/nnet_activation.h index c2353c34a8..385457204d 100644 --- a/hls4ml/templates/oneapi/firmware/nnet_utils/nnet_activation.h +++ b/hls4ml/templates/oneapi/firmware/nnet_utils/nnet_activation.h @@ -99,11 +99,21 @@ template void sigmoid(const data_ enum class softmax_implementation { latency = 0, legacy = 1, stable = 2, argmax = 3 }; -template inline unsigned softmax_stable_idx_from_real_val(const data_T x) { - // Extract the lower 'width' bits of x - return x.template slc(0).to_uint(); + +template inline unsigned softmax_stable_idx_from_real_val(const data_T x) { + // Number of address bits for table + static constexpr int N = ceillog2::val; + + // Slice the top N bits of the input + [[intel::fpga_register]] ac_int y = x.template slc(x.width - N - 1); + + // If x is the most negative value, the slice will be 0, so we need to set the 0-th bit to ensure correctness + if (x != 0 && y == 0) + y[0] = 1; + return y.to_uint(); } + template inline unsigned softmax_latency_idx_from_real_val(const data_T x) { // Number of address bits for table static constexpr int N = ceillog2::val; @@ -113,49 +123,45 @@ template inline unsigned softmax_latency_idx_f return y.to_uint(); } + template void softmax_stable(const data_T &data, res_T &res) { -#include "activation_tables/exp_table.tb" -#include "activation_tables/invert_table.tb" // Find maximum Op_max op_max; [[intel::fpga_register]] auto x_max = reduce>(data.data(), op_max); - // Normalize inputs: d = x_max - x - [[intel::fpga_register]] typename CONFIG_T::inp_norm_t d_xi_xmax[CONFIG_T::n_in]; + // For the diffs, use the same type as the input but force rounding and saturation + [[intel::fpga_register]] + typename CONFIG_T::inp_norm_t d_xi_xmax[CONFIG_T::n_in]; #pragma unroll for (unsigned i = 0; i < CONFIG_T::n_in; i++) { - // HGQ stable: d = x_max - data - d_xi_xmax[i] = x_max - data[i]; + d_xi_xmax[i] = data[i] - x_max; } - // Exponentials + // Calculate all the e^x's [[intel::fpga_register]] typename CONFIG_T::exp_table_t exp_res[CONFIG_T::n_in]; #pragma unroll for (unsigned i = 0; i < CONFIG_T::n_in; i++) { - unsigned idx = softmax_stable_idx_from_real_val(d_xi_xmax[i]); - exp_res[i] = exp_table[idx]; + exp_res[i] = CONFIG_T::exp_table[softmax_stable_idx_from_real_val(d_xi_xmax[i])]; //input_t, CONFIG_T } - // Sum of Exponentials - Op_add op_add; - [[intel::fpga_register]] typename CONFIG_T::accum_t exp_sum = - reduce>(exp_res, op_add); - - // Reciprocal of Sum - typename CONFIG_T::inv_inp_t exp_sum_cast = exp_sum; - unsigned inv_idx = softmax_stable_idx_from_real_val(exp_sum_cast); + // Explicitly sum previously calculated exponentials with an adder tree + Op_add op_add; + [[intel::fpga_register]] typename CONFIG_T::inv_inp_t exp_sum = + reduce>(exp_res, op_add); - [[intel::fpga_register]] typename CONFIG_T::inv_table_t inv_exp_sum = invert_table[inv_idx]; + // Multiply previously calculated exponetials with the reciprocal of the sum + [[intel::fpga_register]] typename CONFIG_T::inv_table_t inv_exp_sum = + CONFIG_T::invert_table[softmax_stable_idx_from_real_val(exp_sum)]; - // Final Multiplication #pragma unroll for (unsigned i = 0; i < CONFIG_T::n_in; i++) { res[i] = exp_res[i] * inv_exp_sum; } } + // TODO - Improve accuracy template void softmax_latency(const data_T &data, res_T &res) { #include "activation_tables/exp_table_latency.tb" diff --git a/hls4ml/templates/oneapi/firmware/nnet_utils/nnet_activation_stream.h b/hls4ml/templates/oneapi/firmware/nnet_utils/nnet_activation_stream.h index e860c38988..d640f89f7e 100644 --- a/hls4ml/templates/oneapi/firmware/nnet_utils/nnet_activation_stream.h +++ b/hls4ml/templates/oneapi/firmware/nnet_utils/nnet_activation_stream.h @@ -271,64 +271,63 @@ template void softsign_stre // ************************************************* template void softmax_stable_stream() { -#include "activation_tables/exp_table.tb" -#include "activation_tables/invert_table.tb" + + using input_arr_t = typename ExtractPipeType::value_type; + using input_t = typename ExtractPipeType::value_type::value_type; + constexpr unsigned input_arr_size = std::tuple_size{}; + constexpr unsigned multiplier_limit = - DIV_ROUNDUP(std::tuple_size::value_type>{}, CONFIG_T::reuse_factor); - constexpr unsigned pipeline = std::tuple_size::value_type>{} / multiplier_limit; + DIV_ROUNDUP(input_arr_size, CONFIG_T::reuse_factor); + constexpr unsigned pipeline = input_arr_size / multiplier_limit; - [[intel::fpga_register]] typename ExtractPipeType::value_type::value_type - data_array[std::tuple_size::value_type>{}]; + + [[intel::fpga_register]] input_t data_array[input_arr_size]; SoftmaxArrayLoop: - [[intel::initiation_interval(pipeline)]] for (unsigned i = 0; - i < CONFIG_T::n_in / - std::tuple_size::value_type>{}; - i++) { + [[intel::initiation_interval(pipeline)]] + for (unsigned i = 0; i < CONFIG_T::n_in / input_arr_size; i++) { auto in_pack = data_pipe::read(); SoftmaxArrayPackLoop: #pragma unroll - for (unsigned j = 0; j < std::tuple_size::value_type>{}; j++) { + for (unsigned j = 0; j < input_arr_size; j++) { data_array[j] = in_pack[j]; } // Find the max and compute all delta(x_i, x_max) - Op_max::value_type::value_type> op_max; - [[intel::fpga_register]] typename ExtractPipeType::value_type::value_type x_max = - reduce::value_type::value_type, - std::tuple_size::value_type>{}, - Op_max::value_type::value_type>>(data_array, op_max); - - // For the diffs, use the same type as the input but force rounding and saturation - [[intel::fpga_register]] ac_fixed::value_type::value_type::width, - ExtractPipeType::value_type::value_type::i_width, true, AC_RND, AC_SAT> - d_xi_xmax[std::tuple_size::value_type>{}]; + Op_max op_max; + [[intel::fpga_register]] + input_t x_max = reduce>(data_array, op_max); + + [[intel::fpga_register]] + typename CONFIG_T::inp_norm_t d_xi_xmax[input_arr_size]; + #pragma unroll - for (unsigned j = 0; j < std::tuple_size::value_type>{}; j++) { + for (unsigned j = 0; j < input_arr_size; j++) { d_xi_xmax[j] = data_array[j] - x_max; } // Calculate all the e^x's [[intel::fpga_register]] - typename CONFIG_T::exp_table_t exp_res[std::tuple_size::value_type>{}]; + typename CONFIG_T::exp_table_t exp_res[input_arr_size]; + #pragma unroll - for (unsigned j = 0; j < std::tuple_size::value_type>{}; j++) { + for (unsigned j = 0; j < input_arr_size; j++) { exp_res[j] = - exp_table[softmax_stable_idx_from_real_val::value_type::value_type, - CONFIG_T>(d_xi_xmax[j])]; + CONFIG_T::exp_table[softmax_stable_idx_from_real_val(d_xi_xmax[j])]; } // Explicitly sum the results with an adder tree. // Rounding & Saturation mode, which improve accuracy, prevent Vivado from expression balancing Op_add op_add; - [[intel::fpga_register]] typename CONFIG_T::exp_table_t exp_sum = - reduce::value_type>{}, + [[intel::fpga_register]] typename CONFIG_T::inv_inp_t exp_sum = + reduce>(exp_res, op_add); [[intel::fpga_register]] typename CONFIG_T::inv_table_t inv_exp_sum = - invert_table[softmax_stable_idx_from_real_val(exp_sum)]; + CONFIG_T::invert_table[softmax_stable_idx_from_real_val(exp_sum)]; + typename ExtractPipeType::value_type out_pack; SoftmaxInvPackLoop: diff --git a/hls4ml/templates/oneapi/firmware/parameters.h b/hls4ml/templates/oneapi/firmware/parameters.h index 717059f1e8..ef4e5d26b9 100644 --- a/hls4ml/templates/oneapi/firmware/parameters.h +++ b/hls4ml/templates/oneapi/firmware/parameters.h @@ -6,6 +6,8 @@ #include "nnet_utils/nnet_code_gen.h" #include "nnet_utils/nnet_helpers.h" +// hls-fpga-machine-learning insert softmax tables + // hls-fpga-machine-learning insert includes // hls-fpga-machine-learning insert layer-config diff --git a/hls4ml/writer/oneapi_writer.py b/hls4ml/writer/oneapi_writer.py index b945f3faf9..7f95830c21 100644 --- a/hls4ml/writer/oneapi_writer.py +++ b/hls4ml/writer/oneapi_writer.py @@ -302,6 +302,14 @@ def write_parameters(self, model): config = layer.get_attr('config_cpp', None) if config: newline += config + '\n' + + elif '// hls-fpga-machine-learning insert softmax tables' in line: + newline = line + for layer in model.get_layers(): + if 'softmax' in layer.name: + newline += f'#include "nnet_utils/activation_tables/{layer.name}_exp_table.h"\n' + newline += f'#include "nnet_utils/activation_tables/{layer.name}_inv_table.h"\n' + else: newline = line fout.write(newline) @@ -695,25 +703,29 @@ def __get_table_precision(self, model, activation, table_name='table_precision') return None # fp_bits, fp_integer, fp_signed + def __write_exp_table(self, model, path): - table_name = 'exp_table' - table_size = self.__get_table_size(model, 'softmax', table_name='exp_table_size') - h_file = open(f'{path}/{table_name}.tb', 'w') - h_file.write(self.__get_table_header(table_name, table_size, table_type='exp_table_t')) + for layer in model.get_layers(): + + if 'softmax' in layer.name: + + table_name = layer.name + '_exp_table' + table_size = int(layer.get_attr('exp_table_size'))//2 if ( + layer.get_attr('activation') == 'softmax' or layer.get_attr('recurrent_activation') == 'softmax' + ) and layer.get_attr('exp_table_size') is not None else 1024 - # Default fixed point precision - # 6 bits for integer part, 10 bits for decimal - total, 16 - precision = self.__get_table_precision(model, 'softmax', table_name='inp_norm_t') + with open(f'{path}/{table_name}.h', 'w') as h_file: + + header_name = table_name + h_file.write(f'#ifndef {header_name.upper()}_H_\n') + h_file.write(f'#define {header_name.upper()}_H_\n\n') - if precision is None: - fp_bits = 16 - fp_integer = 6 - fp_signed = True + h_file.write(f'static constexpr {table_name}_t {table_name}[{table_size}] = {{') - for layer in model.get_layers(): - if layer.name == 'softmax': - ac_type = layer.get_input_variable().type + #ac_type = layer.get_input_variable().type + ac_type = layer.get_attr('inp_norm_t') + if ac_type is not None: try: fp_bits = ac_type.precision.integer + ac_type.precision.fractional @@ -721,49 +733,55 @@ def __write_exp_table(self, model, path): fp_signed = ac_type.precision.signed except Exception: # FixedPrecisionType wasn't correctly stored in layer attributes, use default values - pass + fp_bits = 16 + fp_integer = 6 + fp_signed = True + if fp_signed is False: raise Exception('Softmax types need to be signed') + + else: + fp_bits = 16 + fp_integer = 6 + fp_signed = True + + sep = '' + N = ceil_log2(table_size) + for i in range(table_size): + f = FixedPointEmulator(fp_bits, fp_integer, signed=fp_signed) + b = uint_to_binary(i, N) + if i == 0: + b.insert(0, 0) + else: + b.insert(0, 1) + f.set_msb_bits(b) + real_val = f.exp_float() + h_file.write(sep + str(real_val)) + sep = ', ' + + h_file.write('};\n\n') + h_file.write('#endif') - else: - fp_bits = precision.width - fp_integer = precision.integer - fp_signed = precision.signed - - f_bits = fp_bits - fp_integer - sep = '' - for i in range(table_size): - # Index represents the raw bit pattern of the input - real_val_in = i * (2.0 ** (-f_bits)) - - # Calculate exp(-x) for the stable implementation - real_val = np.exp(-real_val_in) - - h_file.write(sep + str(real_val)) - sep = ', ' - - h_file.write('};\n') - h_file.close() def __write_invert_table(self, model, path): - table_name = 'invert_table' - table_size = self.__get_table_size(model, 'softmax', table_name='inv_table_size') + for layer in model.get_layers(): + if 'softmax' in layer.name: - h_file = open(f'{path}/{table_name}.tb', 'w') - h_file.write(self.__get_table_header(table_name, table_size, table_type='inv_table_t')) - # Default fixed point precision, in case values from layer attributes cannot be extracted - # 8 bits for integer part, 10 bits for decimal - total, 18 + table_name = layer.name + '_inv_table' + table_size = int(layer.get_attr('inv_table_size')) //2 if ( + layer.get_attr('activation') == 'softmax' or layer.get_attr('recurrent_activation') == 'softmax' + ) and layer.get_attr('inv_table_size') is not None else 1024 + + with open(f'{path}/{table_name}.h', 'w') as h_file: + + header_name = table_name + h_file.write(f'#ifndef {header_name.upper()}_H_\n') + h_file.write(f'#define {header_name.upper()}_H_\n\n') - precision = self.__get_table_precision(model, 'softmax', table_name='inv_inp_t') + h_file.write(f'static constexpr {table_name}_t {table_name}[{table_size}] = {{') - if precision is None: - fp_bits = 18 - fp_integer = 8 - fp_signed = True + ac_type = layer.get_attr('inv_inp_t') - for layer in model.get_layers(): - if layer.name == 'softmax': - ac_type = layer.get_attr('exp_table_t') if ac_type is not None: try: fp_bits = ac_type.precision.integer + ac_type.precision.fractional @@ -771,31 +789,32 @@ def __write_invert_table(self, model, path): fp_signed = ac_type.precision.signed except Exception: # FixedPrecisionType wasn't correctly stored in layer attributes, use default values - pass + fp_bits = 18 + fp_integer = 8 + fp_signed = True + if fp_signed is False: raise Exception('Softmax types need to be signed') - else: - fp_bits = precision.width - fp_integer = precision.integer - fp_signed = precision.signed - - f_bits = fp_bits - fp_integer - sep = '' - for i in range(table_size): - # Index represents the raw bit pattern of the input - real_val_in = i * (2.0 ** (-f_bits)) + else: + fp_bits = 18 + fp_integer = 8 + fp_signed = True - if real_val_in == 0: - real_val = 999.0 - else: - real_val = 1.0 / real_val_in + sep = '' + N = ceil_log2(table_size) + for i in range(table_size): + f = FixedPointEmulator(fp_bits, fp_integer, signed=fp_signed) + b = uint_to_binary(i, N) + b.insert(0, 0) + f.set_msb_bits(b) + real_val = f.inv_float() + h_file.write(sep + str(real_val)) + sep = ', ' - h_file.write(sep + str(real_val)) - sep = ', ' + h_file.write('};\n\n') + h_file.write('#endif') - h_file.write('};\n') - h_file.close() def __write_exp_table_latency(self, model, path): table_name = 'exp_table_latency' @@ -1015,3 +1034,6 @@ def write_hls(self, model): self.write_generated_code(model) self.write_yml(model) self.write_tar(model) + + + From 31b7ad65eca392429b2da852097261acebf883be Mon Sep 17 00:00:00 2001 From: bugracyln Date: Tue, 14 Apr 2026 21:39:58 +0100 Subject: [PATCH 12/12] table generation cleanup --- hls4ml/writer/oneapi_writer.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/hls4ml/writer/oneapi_writer.py b/hls4ml/writer/oneapi_writer.py index 7f95830c21..320afa74db 100644 --- a/hls4ml/writer/oneapi_writer.py +++ b/hls4ml/writer/oneapi_writer.py @@ -722,8 +722,7 @@ def __write_exp_table(self, model, path): h_file.write(f'#define {header_name.upper()}_H_\n\n') h_file.write(f'static constexpr {table_name}_t {table_name}[{table_size}] = {{') - - #ac_type = layer.get_input_variable().type + ac_type = layer.get_attr('inp_norm_t') if ac_type is not None: