Aurora LTS 2523.40#574
Open
michel2323 wants to merge 10 commits into
Open
Conversation
Contributor
|
Your PR requires formatting changes to meet the project's style guidelines. Click here to view the suggested changes.diff --git a/deps/generate_interfaces.jl b/deps/generate_interfaces.jl
index 8c62b75..108a001 100644
--- a/deps/generate_interfaces.jl
+++ b/deps/generate_interfaces.jl
@@ -337,34 +337,34 @@ function generate_headers(library::String, filename::Vector{String}, output::Str
end
end
- # Dedup: when two signatures map to the same C function name (because MKL
- # added an overload), keep the one with more parameters — typically the
- # newer signature (e.g. set_csr_data gained an `nnz` arg in MKL 2025.3.1).
- # Without this the generated onemkl.cpp has duplicate function definitions
- # and won't compile.
- _fn_name(h) = (pos = findfirst('(', h); strip(split(strip(h[1:pos-1]))[end]))
- _param_cnt(h) = (pos = findfirst('(', h); ep = findnext(')', h, pos); count(==(','), h[pos+1:ep-1]) + 1)
- keep_idx = Dict{String,Int}()
- keep_pc = Dict{String,Int}()
- for (i, sig) in enumerate(signatures)
- (sig[2] in blacklist) && continue
- fn = _fn_name(sig[1])
- pc = _param_cnt(sig[1])
- if !haskey(keep_idx, fn) || pc > keep_pc[fn]
- keep_idx[fn] = i
- keep_pc[fn] = pc
+ # Dedup: when two signatures map to the same C function name (because MKL
+ # added an overload), keep the one with more parameters — typically the
+ # newer signature (e.g. set_csr_data gained an `nnz` arg in MKL 2025.3.1).
+ # Without this the generated onemkl.cpp has duplicate function definitions
+ # and won't compile.
+ _fn_name(h) = (pos = findfirst('(', h); strip(split(strip(h[1:(pos - 1)]))[end]))
+ _param_cnt(h) = (pos = findfirst('(', h); ep = findnext(')', h, pos); count(==(','), h[(pos + 1):(ep - 1)]) + 1)
+ keep_idx = Dict{String, Int}()
+ keep_pc = Dict{String, Int}()
+ for (i, sig) in enumerate(signatures)
+ (sig[2] in blacklist) && continue
+ fn = _fn_name(sig[1])
+ pc = _param_cnt(sig[1])
+ if !haskey(keep_idx, fn) || pc > keep_pc[fn]
+ keep_idx[fn] = i
+ keep_pc[fn] = pc
+ end
end
- end
- keep_set = Set(values(keep_idx))
+ keep_set = Set(values(keep_idx))
path_oneapi_headers = joinpath(@__DIR__, output)
oneapi_headers = open(path_oneapi_headers, "w")
- for (i, (header, name_routine, version, type_routine, template)) in enumerate(signatures)
+ for (i, (header, name_routine, version, type_routine, template)) in enumerate(signatures)
# Blacklist
(name_routine in blacklist) && continue
- # Dedup
- (i in keep_set) || continue
+ # Dedup
+ (i in keep_set) || continue
# Pass scalars (e.g. alpha/beta inputs) as references instead of values
for type in ("short", "float", "double", "float _Complex", "double _Complex")
diff --git a/lib/level-zero/cmdlist.jl b/lib/level-zero/cmdlist.jl
index 24d1d3a..19c9aa3 100644
--- a/lib/level-zero/cmdlist.jl
+++ b/lib/level-zero/cmdlist.jl
@@ -57,7 +57,7 @@ end
# oversubscription matters more than speed.
const sync_each_submission = Ref{Bool}(false)
-function execute!(queue::ZeCommandQueue, lists::Vector{ZeCommandList}, fence=nothing)
+function execute!(queue::ZeCommandQueue, lists::Vector{ZeCommandList}, fence = nothing)
r = zeCommandQueueExecuteCommandLists(queue, length(lists), lists, something(fence, C_NULL))
sync_each_submission[] && synchronize(queue)
return r
diff --git a/lib/level-zero/oneL0.jl b/lib/level-zero/oneL0.jl
index e80c893..78a19a8 100644
--- a/lib/level-zero/oneL0.jl
+++ b/lib/level-zero/oneL0.jl
@@ -195,7 +195,7 @@ function __init__()
validation_layer[] = parse(Bool, get(ENV, "ZE_ENABLE_VALIDATION_LAYER", "false"))
parameter_validation[] = parse(Bool, get(ENV, "ZE_ENABLE_PARAMETER_VALIDATION", "false"))
- sync_each_submission[] = lowercase(get(ENV, "ONEAPI_SYNC_EACH_SUBMISSION", "")) in ("1", "true", "yes")
+ return sync_each_submission[] = lowercase(get(ENV, "ONEAPI_SYNC_EACH_SUBMISSION", "")) in ("1", "true", "yes")
end
end
diff --git a/lib/support/liboneapi_support.jl b/lib/support/liboneapi_support.jl
index e1b7327..c3d9958 100644
--- a/lib/support/liboneapi_support.jl
+++ b/lib/support/liboneapi_support.jl
@@ -6429,194 +6429,240 @@ function onemklZunmqr_batch_scratchpad_size(device_queue, side, trans, m, n, k,
end
function onemklXsparse_init_matrix_handle(p_spmat)
- @ccall liboneapi_support.onemklXsparse_init_matrix_handle(p_spmat::Ptr{matrix_handle_t})::Cint
+ return @ccall liboneapi_support.onemklXsparse_init_matrix_handle(p_spmat::Ptr{matrix_handle_t})::Cint
end
function onemklXsparse_release_matrix_handle(device_queue, p_spmat)
@ccall liboneapi_support.onemklXsparse_release_matrix_handle(device_queue::syclQueue_t,
- p_spmat::Ptr{matrix_handle_t})::Cint
+ p_spmat::Ptr{matrix_handle_t}
+ )::Cint
end
-function onemklSsparse_set_csr_data(device_queue, spmat, nrows, ncols, nnz, index, row_ptr,
+function onemklSsparse_set_csr_data(
+ device_queue, spmat, nrows, ncols, nnz, index, row_ptr,
col_ind, values)
@ccall liboneapi_support.onemklSsparse_set_csr_data(device_queue::syclQueue_t,
- spmat::matrix_handle_t,
- nrows::Int64, ncols::Int64,
- nnz::Int64, index::onemklIndex,
+ spmat::matrix_handle_t,
+ nrows::Int64, ncols::Int64,
+ nnz::Int64, index::onemklIndex,
row_ptr::ZePtr{Int32},
col_ind::ZePtr{Int32},
values::ZePtr{Cfloat})::Cint
end
-function onemklSsparse_set_csr_data_64(device_queue, spmat, nrows, ncols, nnz, index,
- row_ptr, col_ind, values)
+function onemklSsparse_set_csr_data_64(
+ device_queue, spmat, nrows, ncols, nnz, index,
+ row_ptr, col_ind, values
+ )
@ccall liboneapi_support.onemklSsparse_set_csr_data_64(device_queue::syclQueue_t,
- spmat::matrix_handle_t,
+ spmat::matrix_handle_t,
nrows::Int64, ncols::Int64,
- nnz::Int64, index::onemklIndex,
+ nnz::Int64, index::onemklIndex,
row_ptr::ZePtr{Int64},
col_ind::ZePtr{Int64},
values::ZePtr{Cfloat})::Cint
end
-function onemklDsparse_set_csr_data(device_queue, spmat, nrows, ncols, nnz, index, row_ptr,
+function onemklDsparse_set_csr_data(
+ device_queue, spmat, nrows, ncols, nnz, index, row_ptr,
col_ind, values)
@ccall liboneapi_support.onemklDsparse_set_csr_data(device_queue::syclQueue_t,
- spmat::matrix_handle_t,
- nrows::Int64, ncols::Int64,
- nnz::Int64, index::onemklIndex,
+ spmat::matrix_handle_t,
+ nrows::Int64, ncols::Int64,
+ nnz::Int64, index::onemklIndex,
row_ptr::ZePtr{Int32},
col_ind::ZePtr{Int32},
values::ZePtr{Cdouble})::Cint
end
-function onemklDsparse_set_csr_data_64(device_queue, spmat, nrows, ncols, nnz, index,
- row_ptr, col_ind, values)
+function onemklDsparse_set_csr_data_64(
+ device_queue, spmat, nrows, ncols, nnz, index,
+ row_ptr, col_ind, values
+ )
@ccall liboneapi_support.onemklDsparse_set_csr_data_64(device_queue::syclQueue_t,
- spmat::matrix_handle_t,
+ spmat::matrix_handle_t,
nrows::Int64, ncols::Int64,
- nnz::Int64, index::onemklIndex,
+ nnz::Int64, index::onemklIndex,
row_ptr::ZePtr{Int64},
col_ind::ZePtr{Int64},
values::ZePtr{Cdouble})::Cint
end
-function onemklCsparse_set_csr_data(device_queue, spmat, nrows, ncols, nnz, index, row_ptr,
+function onemklCsparse_set_csr_data(
+ device_queue, spmat, nrows, ncols, nnz, index, row_ptr,
col_ind, values)
@ccall liboneapi_support.onemklCsparse_set_csr_data(device_queue::syclQueue_t,
- spmat::matrix_handle_t,
- nrows::Int64, ncols::Int64,
- nnz::Int64, index::onemklIndex,
+ spmat::matrix_handle_t,
+ nrows::Int64, ncols::Int64,
+ nnz::Int64, index::onemklIndex,
row_ptr::ZePtr{Int32},
col_ind::ZePtr{Int32},
values::ZePtr{ComplexF32})::Cint
end
-function onemklCsparse_set_csr_data_64(device_queue, spmat, nrows, ncols, nnz, index,
- row_ptr, col_ind, values)
+function onemklCsparse_set_csr_data_64(
+ device_queue, spmat, nrows, ncols, nnz, index,
+ row_ptr, col_ind, values
+ )
@ccall liboneapi_support.onemklCsparse_set_csr_data_64(device_queue::syclQueue_t,
- spmat::matrix_handle_t,
+ spmat::matrix_handle_t,
nrows::Int64, ncols::Int64,
- nnz::Int64, index::onemklIndex,
+ nnz::Int64, index::onemklIndex,
row_ptr::ZePtr{Int64},
col_ind::ZePtr{Int64},
values::ZePtr{ComplexF32})::Cint
end
-function onemklZsparse_set_csr_data(device_queue, spmat, nrows, ncols, nnz, index, row_ptr,
+function onemklZsparse_set_csr_data(
+ device_queue, spmat, nrows, ncols, nnz, index, row_ptr,
col_ind, values)
@ccall liboneapi_support.onemklZsparse_set_csr_data(device_queue::syclQueue_t,
- spmat::matrix_handle_t,
- nrows::Int64, ncols::Int64,
- nnz::Int64, index::onemklIndex,
+ spmat::matrix_handle_t,
+ nrows::Int64, ncols::Int64,
+ nnz::Int64, index::onemklIndex,
row_ptr::ZePtr{Int32},
col_ind::ZePtr{Int32},
values::ZePtr{ComplexF64})::Cint
end
-function onemklZsparse_set_csr_data_64(device_queue, spmat, nrows, ncols, nnz, index,
- row_ptr, col_ind, values)
+function onemklZsparse_set_csr_data_64(
+ device_queue, spmat, nrows, ncols, nnz, index,
+ row_ptr, col_ind, values
+ )
@ccall liboneapi_support.onemklZsparse_set_csr_data_64(device_queue::syclQueue_t,
- spmat::matrix_handle_t,
+ spmat::matrix_handle_t,
nrows::Int64, ncols::Int64,
- nnz::Int64, index::onemklIndex,
+ nnz::Int64, index::onemklIndex,
row_ptr::ZePtr{Int64},
col_ind::ZePtr{Int64},
values::ZePtr{ComplexF64})::Cint
end
-function onemklSsparse_set_csc_data(device_queue, spMat, nrows, ncols, nnz, index, col_ptr,
- row_ind, values)
- @ccall liboneapi_support.onemklSsparse_set_csc_data(device_queue::syclQueue_t,
- spMat::matrix_handle_t,
- nrows::Int64, ncols::Int64,
- nnz::Int64, index::onemklIndex,
- col_ptr::Ptr{Int32},
- row_ind::Ptr{Int32},
- values::Ptr{Cfloat})::Cint
-end
-
-function onemklSsparse_set_csc_data_64(device_queue, spMat, nrows, ncols, nnz, index,
- col_ptr, row_ind, values)
- @ccall liboneapi_support.onemklSsparse_set_csc_data_64(device_queue::syclQueue_t,
- spMat::matrix_handle_t,
- nrows::Int64, ncols::Int64,
- nnz::Int64, index::onemklIndex,
- col_ptr::Ptr{Int64},
- row_ind::Ptr{Int64},
- values::Ptr{Cfloat})::Cint
-end
-
-function onemklDsparse_set_csc_data(device_queue, spMat, nrows, ncols, nnz, index, col_ptr,
- row_ind, values)
- @ccall liboneapi_support.onemklDsparse_set_csc_data(device_queue::syclQueue_t,
- spMat::matrix_handle_t,
- nrows::Int64, ncols::Int64,
- nnz::Int64, index::onemklIndex,
- col_ptr::Ptr{Int32},
- row_ind::Ptr{Int32},
- values::Ptr{Cdouble})::Cint
-end
-
-function onemklDsparse_set_csc_data_64(device_queue, spMat, nrows, ncols, nnz, index,
- col_ptr, row_ind, values)
- @ccall liboneapi_support.onemklDsparse_set_csc_data_64(device_queue::syclQueue_t,
- spMat::matrix_handle_t,
- nrows::Int64, ncols::Int64,
- nnz::Int64, index::onemklIndex,
- col_ptr::Ptr{Int64},
- row_ind::Ptr{Int64},
- values::Ptr{Cdouble})::Cint
-end
-
-function onemklCsparse_set_csc_data(device_queue, spMat, nrows, ncols, nnz, index, col_ptr,
- row_ind, values)
- @ccall liboneapi_support.onemklCsparse_set_csc_data(device_queue::syclQueue_t,
- spMat::matrix_handle_t,
- nrows::Int64, ncols::Int64,
- nnz::Int64, index::onemklIndex,
- col_ptr::Ptr{Int32},
- row_ind::Ptr{Int32},
- values::Ptr{ComplexF32})::Cint
-end
-
-function onemklCsparse_set_csc_data_64(device_queue, spMat, nrows, ncols, nnz, index,
- col_ptr, row_ind, values)
- @ccall liboneapi_support.onemklCsparse_set_csc_data_64(device_queue::syclQueue_t,
- spMat::matrix_handle_t,
- nrows::Int64, ncols::Int64,
- nnz::Int64, index::onemklIndex,
- col_ptr::Ptr{Int64},
- row_ind::Ptr{Int64},
- values::Ptr{ComplexF32})::Cint
-end
-
-function onemklZsparse_set_csc_data(device_queue, spMat, nrows, ncols, nnz, index, col_ptr,
- row_ind, values)
- @ccall liboneapi_support.onemklZsparse_set_csc_data(device_queue::syclQueue_t,
- spMat::matrix_handle_t,
- nrows::Int64, ncols::Int64,
- nnz::Int64, index::onemklIndex,
- col_ptr::Ptr{Int32},
- row_ind::Ptr{Int32},
- values::Ptr{ComplexF32})::Cint
-end
-
-function onemklZsparse_set_csc_data_64(device_queue, spMat, nrows, ncols, nnz, index,
- col_ptr, row_ind, values)
- @ccall liboneapi_support.onemklZsparse_set_csc_data_64(device_queue::syclQueue_t,
- spMat::matrix_handle_t,
- nrows::Int64, ncols::Int64,
- nnz::Int64, index::onemklIndex,
- col_ptr::Ptr{Int64},
- row_ind::Ptr{Int64},
- values::Ptr{ComplexF32})::Cint
-end
-
-function onemklSsparse_set_coo_data(device_queue, spmat, nrows, ncols, nnz, index, row_ind,
+function onemklSsparse_set_csc_data(
+ device_queue, spMat, nrows, ncols, nnz, index, col_ptr,
+ row_ind, values
+ )
+ return @ccall liboneapi_support.onemklSsparse_set_csc_data(
+ device_queue::syclQueue_t,
+ spMat::matrix_handle_t,
+ nrows::Int64, ncols::Int64,
+ nnz::Int64, index::onemklIndex,
+ col_ptr::Ptr{Int32},
+ row_ind::Ptr{Int32},
+ values::Ptr{Cfloat}
+ )::Cint
+end
+
+function onemklSsparse_set_csc_data_64(
+ device_queue, spMat, nrows, ncols, nnz, index,
+ col_ptr, row_ind, values
+ )
+ return @ccall liboneapi_support.onemklSsparse_set_csc_data_64(
+ device_queue::syclQueue_t,
+ spMat::matrix_handle_t,
+ nrows::Int64, ncols::Int64,
+ nnz::Int64, index::onemklIndex,
+ col_ptr::Ptr{Int64},
+ row_ind::Ptr{Int64},
+ values::Ptr{Cfloat}
+ )::Cint
+end
+
+function onemklDsparse_set_csc_data(
+ device_queue, spMat, nrows, ncols, nnz, index, col_ptr,
+ row_ind, values
+ )
+ return @ccall liboneapi_support.onemklDsparse_set_csc_data(
+ device_queue::syclQueue_t,
+ spMat::matrix_handle_t,
+ nrows::Int64, ncols::Int64,
+ nnz::Int64, index::onemklIndex,
+ col_ptr::Ptr{Int32},
+ row_ind::Ptr{Int32},
+ values::Ptr{Cdouble}
+ )::Cint
+end
+
+function onemklDsparse_set_csc_data_64(
+ device_queue, spMat, nrows, ncols, nnz, index,
+ col_ptr, row_ind, values
+ )
+ return @ccall liboneapi_support.onemklDsparse_set_csc_data_64(
+ device_queue::syclQueue_t,
+ spMat::matrix_handle_t,
+ nrows::Int64, ncols::Int64,
+ nnz::Int64, index::onemklIndex,
+ col_ptr::Ptr{Int64},
+ row_ind::Ptr{Int64},
+ values::Ptr{Cdouble}
+ )::Cint
+end
+
+function onemklCsparse_set_csc_data(
+ device_queue, spMat, nrows, ncols, nnz, index, col_ptr,
+ row_ind, values
+ )
+ return @ccall liboneapi_support.onemklCsparse_set_csc_data(
+ device_queue::syclQueue_t,
+ spMat::matrix_handle_t,
+ nrows::Int64, ncols::Int64,
+ nnz::Int64, index::onemklIndex,
+ col_ptr::Ptr{Int32},
+ row_ind::Ptr{Int32},
+ values::Ptr{ComplexF32}
+ )::Cint
+end
+
+function onemklCsparse_set_csc_data_64(
+ device_queue, spMat, nrows, ncols, nnz, index,
+ col_ptr, row_ind, values
+ )
+ return @ccall liboneapi_support.onemklCsparse_set_csc_data_64(
+ device_queue::syclQueue_t,
+ spMat::matrix_handle_t,
+ nrows::Int64, ncols::Int64,
+ nnz::Int64, index::onemklIndex,
+ col_ptr::Ptr{Int64},
+ row_ind::Ptr{Int64},
+ values::Ptr{ComplexF32}
+ )::Cint
+end
+
+function onemklZsparse_set_csc_data(
+ device_queue, spMat, nrows, ncols, nnz, index, col_ptr,
+ row_ind, values
+ )
+ return @ccall liboneapi_support.onemklZsparse_set_csc_data(
+ device_queue::syclQueue_t,
+ spMat::matrix_handle_t,
+ nrows::Int64, ncols::Int64,
+ nnz::Int64, index::onemklIndex,
+ col_ptr::Ptr{Int32},
+ row_ind::Ptr{Int32},
+ values::Ptr{ComplexF32}
+ )::Cint
+end
+
+function onemklZsparse_set_csc_data_64(
+ device_queue, spMat, nrows, ncols, nnz, index,
+ col_ptr, row_ind, values
+ )
+ return @ccall liboneapi_support.onemklZsparse_set_csc_data_64(
+ device_queue::syclQueue_t,
+ spMat::matrix_handle_t,
+ nrows::Int64, ncols::Int64,
+ nnz::Int64, index::onemklIndex,
+ col_ptr::Ptr{Int64},
+ row_ind::Ptr{Int64},
+ values::Ptr{ComplexF32}
+ )::Cint
+end
+
+function onemklSsparse_set_coo_data(
+ device_queue, spmat, nrows, ncols, nnz, index, row_ind,
col_ind, values)
@ccall liboneapi_support.onemklSsparse_set_coo_data(device_queue::syclQueue_t,
- spmat::matrix_handle_t,
+ spmat::matrix_handle_t,
nrows::Int32, ncols::Int32,
nnz::Int32, index::onemklIndex,
row_ind::ZePtr{Int32},
@@ -6624,10 +6670,11 @@ function onemklSsparse_set_coo_data(device_queue, spmat, nrows, ncols, nnz, inde
values::ZePtr{Cfloat})::Cint
end
-function onemklSsparse_set_coo_data_64(device_queue, spmat, nrows, ncols, nnz, index,
+function onemklSsparse_set_coo_data_64(
+ device_queue, spmat, nrows, ncols, nnz, index,
row_ind, col_ind, values)
@ccall liboneapi_support.onemklSsparse_set_coo_data_64(device_queue::syclQueue_t,
- spmat::matrix_handle_t,
+ spmat::matrix_handle_t,
nrows::Int64, ncols::Int64,
nnz::Int64, index::onemklIndex,
row_ind::ZePtr{Int64},
@@ -6635,10 +6682,11 @@ function onemklSsparse_set_coo_data_64(device_queue, spmat, nrows, ncols, nnz, i
values::ZePtr{Cfloat})::Cint
end
-function onemklDsparse_set_coo_data(device_queue, spmat, nrows, ncols, nnz, index, row_ind,
+function onemklDsparse_set_coo_data(
+ device_queue, spmat, nrows, ncols, nnz, index, row_ind,
col_ind, values)
@ccall liboneapi_support.onemklDsparse_set_coo_data(device_queue::syclQueue_t,
- spmat::matrix_handle_t,
+ spmat::matrix_handle_t,
nrows::Int32, ncols::Int32,
nnz::Int32, index::onemklIndex,
row_ind::ZePtr{Int32},
@@ -6646,10 +6694,11 @@ function onemklDsparse_set_coo_data(device_queue, spmat, nrows, ncols, nnz, inde
values::ZePtr{Cdouble})::Cint
end
-function onemklDsparse_set_coo_data_64(device_queue, spmat, nrows, ncols, nnz, index,
+function onemklDsparse_set_coo_data_64(
+ device_queue, spmat, nrows, ncols, nnz, index,
row_ind, col_ind, values)
@ccall liboneapi_support.onemklDsparse_set_coo_data_64(device_queue::syclQueue_t,
- spmat::matrix_handle_t,
+ spmat::matrix_handle_t,
nrows::Int64, ncols::Int64,
nnz::Int64, index::onemklIndex,
row_ind::ZePtr{Int64},
@@ -6657,10 +6706,11 @@ function onemklDsparse_set_coo_data_64(device_queue, spmat, nrows, ncols, nnz, i
values::ZePtr{Cdouble})::Cint
end
-function onemklCsparse_set_coo_data(device_queue, spmat, nrows, ncols, nnz, index, row_ind,
+function onemklCsparse_set_coo_data(
+ device_queue, spmat, nrows, ncols, nnz, index, row_ind,
col_ind, values)
@ccall liboneapi_support.onemklCsparse_set_coo_data(device_queue::syclQueue_t,
- spmat::matrix_handle_t,
+ spmat::matrix_handle_t,
nrows::Int32, ncols::Int32,
nnz::Int32, index::onemklIndex,
row_ind::ZePtr{Int32},
@@ -6668,10 +6718,11 @@ function onemklCsparse_set_coo_data(device_queue, spmat, nrows, ncols, nnz, inde
values::ZePtr{ComplexF32})::Cint
end
-function onemklCsparse_set_coo_data_64(device_queue, spmat, nrows, ncols, nnz, index,
+function onemklCsparse_set_coo_data_64(
+ device_queue, spmat, nrows, ncols, nnz, index,
row_ind, col_ind, values)
@ccall liboneapi_support.onemklCsparse_set_coo_data_64(device_queue::syclQueue_t,
- spmat::matrix_handle_t,
+ spmat::matrix_handle_t,
nrows::Int64, ncols::Int64,
nnz::Int64, index::onemklIndex,
row_ind::ZePtr{Int64},
@@ -6679,10 +6730,11 @@ function onemklCsparse_set_coo_data_64(device_queue, spmat, nrows, ncols, nnz, i
values::ZePtr{ComplexF32})::Cint
end
-function onemklZsparse_set_coo_data(device_queue, spmat, nrows, ncols, nnz, index, row_ind,
+function onemklZsparse_set_coo_data(
+ device_queue, spmat, nrows, ncols, nnz, index, row_ind,
col_ind, values)
@ccall liboneapi_support.onemklZsparse_set_coo_data(device_queue::syclQueue_t,
- spmat::matrix_handle_t,
+ spmat::matrix_handle_t,
nrows::Int32, ncols::Int32,
nnz::Int32, index::onemklIndex,
row_ind::ZePtr{Int32},
@@ -6690,10 +6742,11 @@ function onemklZsparse_set_coo_data(device_queue, spmat, nrows, ncols, nnz, inde
values::ZePtr{ComplexF64})::Cint
end
-function onemklZsparse_set_coo_data_64(device_queue, spmat, nrows, ncols, nnz, index,
+function onemklZsparse_set_coo_data_64(
+ device_queue, spmat, nrows, ncols, nnz, index,
row_ind, col_ind, values)
@ccall liboneapi_support.onemklZsparse_set_coo_data_64(device_queue::syclQueue_t,
- spmat::matrix_handle_t,
+ spmat::matrix_handle_t,
nrows::Int64, ncols::Int64,
nnz::Int64, index::onemklIndex,
row_ind::ZePtr{Int64},
@@ -6701,128 +6754,160 @@ function onemklZsparse_set_coo_data_64(device_queue, spmat, nrows, ncols, nnz, i
values::ZePtr{ComplexF64})::Cint
end
-function onemklSsparse_set_bsr_data(device_queue, spmat, blk_nrows, blk_ncols, blk_nnz,
- row_blk_size, col_blk_size, blk_layout, index,
- bsr_row_ptr, bsr_col_ind, bsr_values)
- @ccall liboneapi_support.onemklSsparse_set_bsr_data(device_queue::syclQueue_t,
- spmat::matrix_handle_t,
- blk_nrows::Int64, blk_ncols::Int64,
- blk_nnz::Int64, row_blk_size::Int64,
- col_blk_size::Int64,
- blk_layout::onemklLayout,
- index::onemklIndex,
- bsr_row_ptr::Ptr{Int32},
- bsr_col_ind::Ptr{Int32},
- bsr_values::Ptr{Cfloat})::Cint
-end
-
-function onemklSsparse_set_bsr_data_64(device_queue, spmat, blk_nrows, blk_ncols, blk_nnz,
- row_blk_size, col_blk_size, blk_layout, index,
- bsr_row_ptr, bsr_col_ind, bsr_values)
- @ccall liboneapi_support.onemklSsparse_set_bsr_data_64(device_queue::syclQueue_t,
- spmat::matrix_handle_t,
- blk_nrows::Int64,
- blk_ncols::Int64, blk_nnz::Int64,
- row_blk_size::Int64,
- col_blk_size::Int64,
- blk_layout::onemklLayout,
- index::onemklIndex,
- bsr_row_ptr::Ptr{Int64},
- bsr_col_ind::Ptr{Int64},
- bsr_values::Ptr{Cfloat})::Cint
-end
-
-function onemklDsparse_set_bsr_data(device_queue, spmat, blk_nrows, blk_ncols, blk_nnz,
- row_blk_size, col_blk_size, blk_layout, index,
- bsr_row_ptr, bsr_col_ind, bsr_values)
- @ccall liboneapi_support.onemklDsparse_set_bsr_data(device_queue::syclQueue_t,
- spmat::matrix_handle_t,
- blk_nrows::Int64, blk_ncols::Int64,
- blk_nnz::Int64, row_blk_size::Int64,
- col_blk_size::Int64,
- blk_layout::onemklLayout,
- index::onemklIndex,
- bsr_row_ptr::Ptr{Int32},
- bsr_col_ind::Ptr{Int32},
- bsr_values::Ptr{Cdouble})::Cint
-end
-
-function onemklDsparse_set_bsr_data_64(device_queue, spmat, blk_nrows, blk_ncols, blk_nnz,
- row_blk_size, col_blk_size, blk_layout, index,
- bsr_row_ptr, bsr_col_ind, bsr_values)
- @ccall liboneapi_support.onemklDsparse_set_bsr_data_64(device_queue::syclQueue_t,
- spmat::matrix_handle_t,
- blk_nrows::Int64,
- blk_ncols::Int64, blk_nnz::Int64,
- row_blk_size::Int64,
- col_blk_size::Int64,
- blk_layout::onemklLayout,
- index::onemklIndex,
- bsr_row_ptr::Ptr{Int64},
- bsr_col_ind::Ptr{Int64},
- bsr_values::Ptr{Cdouble})::Cint
-end
-
-function onemklCsparse_set_bsr_data(device_queue, spmat, blk_nrows, blk_ncols, blk_nnz,
- row_blk_size, col_blk_size, blk_layout, index,
- bsr_row_ptr, bsr_col_ind, bsr_values)
- @ccall liboneapi_support.onemklCsparse_set_bsr_data(device_queue::syclQueue_t,
- spmat::matrix_handle_t,
- blk_nrows::Int64, blk_ncols::Int64,
- blk_nnz::Int64, row_blk_size::Int64,
- col_blk_size::Int64,
- blk_layout::onemklLayout,
- index::onemklIndex,
- bsr_row_ptr::Ptr{Int32},
- bsr_col_ind::Ptr{Int32},
- bsr_values::Ptr{ComplexF32})::Cint
-end
-
-function onemklCsparse_set_bsr_data_64(device_queue, spmat, blk_nrows, blk_ncols, blk_nnz,
- row_blk_size, col_blk_size, blk_layout, index,
- bsr_row_ptr, bsr_col_ind, bsr_values)
- @ccall liboneapi_support.onemklCsparse_set_bsr_data_64(device_queue::syclQueue_t,
- spmat::matrix_handle_t,
- blk_nrows::Int64,
- blk_ncols::Int64, blk_nnz::Int64,
- row_blk_size::Int64,
- col_blk_size::Int64,
- blk_layout::onemklLayout,
- index::onemklIndex,
- bsr_row_ptr::Ptr{Int64},
- bsr_col_ind::Ptr{Int64},
- bsr_values::Ptr{ComplexF32})::Cint
-end
-
-function onemklZsparse_set_bsr_data(device_queue, spmat, blk_nrows, blk_ncols, blk_nnz,
- row_blk_size, col_blk_size, blk_layout, index,
- bsr_row_ptr, bsr_col_ind, bsr_values)
- @ccall liboneapi_support.onemklZsparse_set_bsr_data(device_queue::syclQueue_t,
- spmat::matrix_handle_t,
- blk_nrows::Int64, blk_ncols::Int64,
- blk_nnz::Int64, row_blk_size::Int64,
- col_blk_size::Int64,
- blk_layout::onemklLayout,
- index::onemklIndex,
- bsr_row_ptr::Ptr{Int32},
- bsr_col_ind::Ptr{Int32},
- bsr_values::Ptr{ComplexF32})::Cint
-end
-
-function onemklZsparse_set_bsr_data_64(device_queue, spmat, blk_nrows, blk_ncols, blk_nnz,
- row_blk_size, col_blk_size, blk_layout, index,
- bsr_row_ptr, bsr_col_ind, bsr_values)
- @ccall liboneapi_support.onemklZsparse_set_bsr_data_64(device_queue::syclQueue_t,
- spmat::matrix_handle_t,
- blk_nrows::Int64,
- blk_ncols::Int64, blk_nnz::Int64,
- row_blk_size::Int64,
- col_blk_size::Int64,
- blk_layout::onemklLayout,
- index::onemklIndex,
- bsr_row_ptr::Ptr{Int64},
- bsr_col_ind::Ptr{Int64},
- bsr_values::Ptr{ComplexF32})::Cint
+function onemklSsparse_set_bsr_data(
+ device_queue, spmat, blk_nrows, blk_ncols, blk_nnz,
+ row_blk_size, col_blk_size, blk_layout, index,
+ bsr_row_ptr, bsr_col_ind, bsr_values
+ )
+ return @ccall liboneapi_support.onemklSsparse_set_bsr_data(
+ device_queue::syclQueue_t,
+ spmat::matrix_handle_t,
+ blk_nrows::Int64, blk_ncols::Int64,
+ blk_nnz::Int64, row_blk_size::Int64,
+ col_blk_size::Int64,
+ blk_layout::onemklLayout,
+ index::onemklIndex,
+ bsr_row_ptr::Ptr{Int32},
+ bsr_col_ind::Ptr{Int32},
+ bsr_values::Ptr{Cfloat}
+ )::Cint
+end
+
+function onemklSsparse_set_bsr_data_64(
+ device_queue, spmat, blk_nrows, blk_ncols, blk_nnz,
+ row_blk_size, col_blk_size, blk_layout, index,
+ bsr_row_ptr, bsr_col_ind, bsr_values
+ )
+ return @ccall liboneapi_support.onemklSsparse_set_bsr_data_64(
+ device_queue::syclQueue_t,
+ spmat::matrix_handle_t,
+ blk_nrows::Int64,
+ blk_ncols::Int64, blk_nnz::Int64,
+ row_blk_size::Int64,
+ col_blk_size::Int64,
+ blk_layout::onemklLayout,
+ index::onemklIndex,
+ bsr_row_ptr::Ptr{Int64},
+ bsr_col_ind::Ptr{Int64},
+ bsr_values::Ptr{Cfloat}
+ )::Cint
+end
+
+function onemklDsparse_set_bsr_data(
+ device_queue, spmat, blk_nrows, blk_ncols, blk_nnz,
+ row_blk_size, col_blk_size, blk_layout, index,
+ bsr_row_ptr, bsr_col_ind, bsr_values
+ )
+ return @ccall liboneapi_support.onemklDsparse_set_bsr_data(
+ device_queue::syclQueue_t,
+ spmat::matrix_handle_t,
+ blk_nrows::Int64, blk_ncols::Int64,
+ blk_nnz::Int64, row_blk_size::Int64,
+ col_blk_size::Int64,
+ blk_layout::onemklLayout,
+ index::onemklIndex,
+ bsr_row_ptr::Ptr{Int32},
+ bsr_col_ind::Ptr{Int32},
+ bsr_values::Ptr{Cdouble}
+ )::Cint
+end
+
+function onemklDsparse_set_bsr_data_64(
+ device_queue, spmat, blk_nrows, blk_ncols, blk_nnz,
+ row_blk_size, col_blk_size, blk_layout, index,
+ bsr_row_ptr, bsr_col_ind, bsr_values
+ )
+ return @ccall liboneapi_support.onemklDsparse_set_bsr_data_64(
+ device_queue::syclQueue_t,
+ spmat::matrix_handle_t,
+ blk_nrows::Int64,
+ blk_ncols::Int64, blk_nnz::Int64,
+ row_blk_size::Int64,
+ col_blk_size::Int64,
+ blk_layout::onemklLayout,
+ index::onemklIndex,
+ bsr_row_ptr::Ptr{Int64},
+ bsr_col_ind::Ptr{Int64},
+ bsr_values::Ptr{Cdouble}
+ )::Cint
+end
+
+function onemklCsparse_set_bsr_data(
+ device_queue, spmat, blk_nrows, blk_ncols, blk_nnz,
+ row_blk_size, col_blk_size, blk_layout, index,
+ bsr_row_ptr, bsr_col_ind, bsr_values
+ )
+ return @ccall liboneapi_support.onemklCsparse_set_bsr_data(
+ device_queue::syclQueue_t,
+ spmat::matrix_handle_t,
+ blk_nrows::Int64, blk_ncols::Int64,
+ blk_nnz::Int64, row_blk_size::Int64,
+ col_blk_size::Int64,
+ blk_layout::onemklLayout,
+ index::onemklIndex,
+ bsr_row_ptr::Ptr{Int32},
+ bsr_col_ind::Ptr{Int32},
+ bsr_values::Ptr{ComplexF32}
+ )::Cint
+end
+
+function onemklCsparse_set_bsr_data_64(
+ device_queue, spmat, blk_nrows, blk_ncols, blk_nnz,
+ row_blk_size, col_blk_size, blk_layout, index,
+ bsr_row_ptr, bsr_col_ind, bsr_values
+ )
+ return @ccall liboneapi_support.onemklCsparse_set_bsr_data_64(
+ device_queue::syclQueue_t,
+ spmat::matrix_handle_t,
+ blk_nrows::Int64,
+ blk_ncols::Int64, blk_nnz::Int64,
+ row_blk_size::Int64,
+ col_blk_size::Int64,
+ blk_layout::onemklLayout,
+ index::onemklIndex,
+ bsr_row_ptr::Ptr{Int64},
+ bsr_col_ind::Ptr{Int64},
+ bsr_values::Ptr{ComplexF32}
+ )::Cint
+end
+
+function onemklZsparse_set_bsr_data(
+ device_queue, spmat, blk_nrows, blk_ncols, blk_nnz,
+ row_blk_size, col_blk_size, blk_layout, index,
+ bsr_row_ptr, bsr_col_ind, bsr_values
+ )
+ return @ccall liboneapi_support.onemklZsparse_set_bsr_data(
+ device_queue::syclQueue_t,
+ spmat::matrix_handle_t,
+ blk_nrows::Int64, blk_ncols::Int64,
+ blk_nnz::Int64, row_blk_size::Int64,
+ col_blk_size::Int64,
+ blk_layout::onemklLayout,
+ index::onemklIndex,
+ bsr_row_ptr::Ptr{Int32},
+ bsr_col_ind::Ptr{Int32},
+ bsr_values::Ptr{ComplexF32}
+ )::Cint
+end
+
+function onemklZsparse_set_bsr_data_64(
+ device_queue, spmat, blk_nrows, blk_ncols, blk_nnz,
+ row_blk_size, col_blk_size, blk_layout, index,
+ bsr_row_ptr, bsr_col_ind, bsr_values
+ )
+ return @ccall liboneapi_support.onemklZsparse_set_bsr_data_64(
+ device_queue::syclQueue_t,
+ spmat::matrix_handle_t,
+ blk_nrows::Int64,
+ blk_ncols::Int64, blk_nnz::Int64,
+ row_blk_size::Int64,
+ col_blk_size::Int64,
+ blk_layout::onemklLayout,
+ index::onemklIndex,
+ bsr_row_ptr::Ptr{Int64},
+ bsr_col_ind::Ptr{Int64},
+ bsr_values::Ptr{ComplexF32}
+ )::Cint
end
function onemklXsparse_init_matmat_descr(p_desc)
diff --git a/src/array.jl b/src/array.jl
index 0e79dde..6e62640 100644
--- a/src/array.jl
+++ b/src/array.jl
@@ -526,12 +526,12 @@ function Base.fill!(A::oneDenseArray{T}, val) where T
val = convert(T, val)
sizeof(T) == 0 && return A
- # execute! is async, so we need to allocate the pattern in USM memory and keep it alive
- # until the operation completes. The fill reads this host buffer on the GPU, so it must
- # be made resident on the device — a non-resident host buffer read by a kernel can take
- # a NotPresent pagefault on the LTS NEO stack (see `allocate(::HostBuffer, ...)`).
+ # execute! is async, so we need to allocate the pattern in USM memory and keep it alive
+ # until the operation completes. The fill reads this host buffer on the GPU, so it must
+ # be made resident on the device — a non-resident host buffer read by a kernel can take
+ # a NotPresent pagefault on the LTS NEO stack (see `allocate(::HostBuffer, ...)`).
buf = oneL0.host_alloc(context(A), sizeof(T), Base.datatype_alignment(T))
- oneL0.make_resident(context(A), device(), buf)
+ oneL0.make_resident(context(A), device(), buf)
unsafe_store!(convert(Ptr{T}, buf), val)
unsafe_fill!(context(A), device(), pointer(A), convert(ZePtr{T}, buf), length(A))
synchronize(global_queue(context(A), device()))
diff --git a/src/context.jl b/src/context.jl
index 9150b36..35417c3 100644
--- a/src/context.jl
+++ b/src/context.jl
@@ -231,8 +231,10 @@ function global_queue(ctx::ZeContext, dev::ZeDevice)
GC.enable_finalizers(false)
try
@lock queue_registry_lock begin
- push!(get!(Vector{Tuple{WeakRef,ZeCommandQueue}}, queue_registry, (ctx, dev)),
- (WeakRef(current_task()), queue))
+ push!(
+ get!(Vector{Tuple{WeakRef, ZeCommandQueue}}, queue_registry, (ctx, dev)),
+ (WeakRef(current_task()), queue)
+ )
end
finally
GC.enable_finalizers(true)
@@ -253,14 +255,14 @@ end
# task-local, so once their task is dead no new work can reach them, and the entry can
# be dropped (allowing the queue to be finalized) after a final synchronize.
const queue_registry_lock = ReentrantLock()
-const queue_registry = Dict{Tuple{ZeContext,ZeDevice},Vector{Tuple{WeakRef,ZeCommandQueue}}}()
+const queue_registry = Dict{Tuple{ZeContext, ZeDevice}, Vector{Tuple{WeakRef, ZeCommandQueue}}}()
# synchronize all known queues that target the given context (and device, if specified),
# i.e., all queues whose in-flight work could possibly reference an allocation that is
# about to be freed.
-function synchronize_all_queues(ctx::ZeContext, dev::Union{ZeDevice,Nothing})
+function synchronize_all_queues(ctx::ZeContext, dev::Union{ZeDevice, Nothing})
queues = ZeCommandQueue[]
- stale = Tuple{WeakRef,ZeCommandQueue}[]
+ stale = Tuple{WeakRef, ZeCommandQueue}[]
GC.enable_finalizers(false)
try
@lock queue_registry_lock begin
diff --git a/src/mapreduce.jl b/src/mapreduce.jl
index 28725c8..fc1030a 100644
--- a/src/mapreduce.jl
+++ b/src/mapreduce.jl
@@ -168,8 +168,9 @@ function GPUArrays.mapreducedim!(f::F, op::OP, R::oneWrappedArray{T},
if size(Rreduce, 1) == 1
items = clamp(length(Rother), 1, 256)
groups = min(cld(length(Rother), items), 1024)
- @oneapi items=items groups=groups coalesced_mapreduce_device(
- f, op, init, Rreduce, Rother, R′, A)
+ @oneapi items = items groups = groups coalesced_mapreduce_device(
+ f, op, init, Rreduce, Rother, R′, A
+ )
return R
end
diff --git a/src/utils.jl b/src/utils.jl
index e7d232e..ab2fe40 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -2,7 +2,8 @@
function versioninfo(io::IO=stdout)
if Sys.islinux()
println(io, "Binary dependencies:")
- for jll in [oneL0.NEO_jll, oneL0.NEO_jll.libigc_LTS_jll, oneL0.NEO_jll.gmmlib_jll,
+ for jll in [
+ oneL0.NEO_jll, oneL0.NEO_jll.libigc_LTS_jll, oneL0.NEO_jll.gmmlib_jll,
SPIRV_LLVM_Translator_jll, SPIRV_Tools_jll, oneAPI_Support_jll]
name = string(jll)
print(io, "- $(name[1:end-4]): $(Base.pkgversion(jll))") |
plan_fft and friends passed `pointer(lengths)`/`pointer(strides)` to the onemklDft* ccall wrappers. A raw Ptr does not root the vector, so under GC pressure it was collected mid-call and oneMKL read garbage through the dangling pointer, making multi-dimensional plan commits fail nondeterministically (invalid_descriptor_exception or SIGFPE, surfacing as DivideError in the fft testset). Pass the arrays themselves so the ccall roots them. Also create the plan's SYCL queue through the cached task-local sycl_queue() accessor like the other oneMKL wrappers, instead of fresh syclContext/syclQueue objects per plan whose finalizers tear down SYCL runtime state for the still-in-use underlying queue. Harden the free-path queue synchronization against finalized queues: ZeCommandQueue now nulls its handle on destroy, and synchronize_all_queues skips destroyed queues and keeps finalizers disabled until it is done synchronizing. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
Two distinct GPU pagefault crashes on the Aurora LTS NEO stack (25.18), both surfacing as a banned context -> ZE_RESULT_ERROR_UNKNOWN at an innocent later op: 1. Host-buffer residency (the array-test crash). HostBuffer-backed oneArrays were never made resident on the device, unlike DeviceBuffer/SharedBuffer. A kernel reading a non-resident host buffer intermittently takes a NotPresent pagefault under GC/alloc churn. Isolated with a minimal read/noread/readsync loop: the fault is at the GPU *read*, not the free, and is not curable by any synchronization. Fix: make_resident in allocate(::HostBuffer) (src/pool.jl) and for fill!'s host pattern buffer (src/array.jl). 2. Device-buffer free under a dead task's queue. global_queue is task-local, so a test file's task can die with work still in flight; the queue was destroyed without draining and a WeakRef registry hid it from the pre-free synchronize, so a later free raced the in-flight kernel. Fix: queue finalizer drains before destroy (lib/level-zero/cmdqueue.jl); the queue registry holds strong queue refs keyed by a weak owning-task ref and retires drained dead-task queues (src/context.jl). Validated with single-process reproducers and the full array test file (30/30). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
…ound Under heavy multi-process oversubscription of a single tile on the Aurora LTS NEO stack, a whole-queue zeCommandQueueSynchronize does not reliably retire the tail of an earlier separately-submitted command list, producing silent "dropped tail" corruption (e.g. the gpuarrays/broadcasting `A .* ET(10)` mismatch; see ISSUE_dropped_tail.md). Synchronizing after every submission eliminates it. This is off by default (it costs ~3x throughput) and enabled with ONEAPI_SYNC_EACH_SUBMISSION=1, at the single submission chokepoint execute!(queue, lists, fence). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
ONEAPI_TEST_SPREAD_GPUS=1 pins each test worker process to a distinct GPU via ZE_AFFINITY_MASK (claimed round-robin through an atomic mkdir counter, set before `using oneAPI` so the Level Zero driver picks it up at init). This spreads the suite across all tiles instead of oversubscribing device 0. device() is task-local and Malt runs each test in a fresh task, so a device! in init_worker_code would not stick — process-level pinning is the robust approach. Default (unset) keeps every worker on the first device, preserving single-tile oversubscription which is useful for surfacing contention bugs. Verified: 6 concurrent claimers -> 6 distinct device UUIDs; real harness with --jobs=4 spreads cleanly (SUCCESS). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
eec2785 to
991d29e
Compare
Codecov Report❌ Patch coverage is
Additional details and impacted files@@ Coverage Diff @@
## main #574 +/- ##
==========================================
+ Coverage 79.67% 80.02% +0.35%
==========================================
Files 48 48
Lines 3232 3284 +52
==========================================
+ Hits 2575 2628 +53
+ Misses 657 656 -1 ☔ View full report in Codecov by Harness. 🚀 New features to boost your workflow:
|
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.This suggestion is invalid because no changes were made to the code.Suggestions cannot be applied while the pull request is closed.Suggestions cannot be applied while viewing a subset of changes.Only one suggestion per line can be applied in a batch.Add this suggestion to a batch that can be applied as a single commit.Applying suggestions on deleted lines is not supported.You must change the existing code in this line in order to create a valid suggestion.Outdated suggestions cannot be applied.This suggestion has been applied or marked resolved.Suggestions cannot be applied from pending reviews.Suggestions cannot be applied on multi-line comments.Suggestions cannot be applied while the pull request is queued to merge.Suggestion cannot be applied right now. Please check back later.
No description provided.