Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
64 commits
Select commit Hold shift + click to select a range
d4fd805
Rename folder dpctl to dpctl_ext
vlad-perevezentsev Feb 2, 2026
c040713
Add simplify_iteration_space implementation to libtensor
vlad-perevezentsev Feb 5, 2026
14b466f
Extend codespell ignore list for libtensor
vlad-perevezentsev Feb 5, 2026
dcc421b
Add copy_and_cast kernels to libtensor
vlad-perevezentsev Feb 5, 2026
5a9c14c
Add copy_usm_ndarray_into_usm_ndarray implementation
vlad-perevezentsev Feb 5, 2026
4f63340
Add pybind11 bindings for dpctl_ext.tensor._tensor_impl
vlad-perevezentsev Feb 5, 2026
634579c
Add CMake build files for dpctl_ext
vlad-perevezentsev Feb 5, 2026
79d40f2
Add empty __init__ to dpctl_ext/
vlad-perevezentsev Feb 5, 2026
7949c17
Enable _same_logical_tensors in _tensor_impl
vlad-perevezentsev Feb 5, 2026
29d6c02
Add device_support_queries to enable default device types
vlad-perevezentsev Feb 5, 2026
936e719
Enable building and packaging of dpctl_ext
vlad-perevezentsev Feb 5, 2026
cd85f1e
Use _tensor_impl from dpctl_ext.tensor in dpnp
vlad-perevezentsev Feb 5, 2026
0c6780a
Move put() and take() to dpctl_ext/tensor
vlad-perevezentsev Feb 5, 2026
87e5482
Use put/take from dpctl_ext.tensor in dpnp
vlad-perevezentsev Feb 5, 2026
b537f30
Move full() to dpctl_ext/tensor
vlad-perevezentsev Feb 5, 2026
d50f263
Use full and _full_usm_ndarray from dpctl_ext in dpnp
vlad-perevezentsev Feb 6, 2026
f189dc5
Update .gitignore to ignore .so files in dpctl_ext
vlad-perevezentsev Feb 6, 2026
f9a1817
Move _zeros_usm_ndarray to dpctl_ext
vlad-perevezentsev Feb 6, 2026
4b8505a
Use _zeros_usm_ndarray from dpctl_ext in dpnp_fill.py
vlad-perevezentsev Feb 6, 2026
61106b2
Move linear-sequence implementations to dpctl_ext/tensor
vlad-perevezentsev Feb 6, 2026
a030579
Use _tensor_impl from dpctl_ext in dpnp_utils_fft.py
vlad-perevezentsev Feb 6, 2026
a1d6fa3
Move tril()/triu() to dpctl_ext/tensor
vlad-perevezentsev Feb 6, 2026
f1d6e56
Use tril/triu/_tril from dpctl_ext.tensor in dpnp
vlad-perevezentsev Feb 6, 2026
6680790
Disable pylint no-name-in-module for dpctl_ext
vlad-perevezentsev Feb 9, 2026
263b717
Add TODO comments
vlad-perevezentsev Feb 12, 2026
4130c1b
Use default_device_complex_type from dpctl_ext on test_array_api_info.py
vlad-perevezentsev Feb 12, 2026
17ca9ab
Remove unused build_dpctl_ext function
vlad-perevezentsev Feb 12, 2026
79cb2a4
Apply remarks for CMake files
vlad-perevezentsev Feb 12, 2026
4bf080e
Apply remarks for c++ files
vlad-perevezentsev Feb 12, 2026
cfa6cd6
Remove linear-sequence implementations
vlad-perevezentsev Feb 16, 2026
e0e50ac
Merge move_tensor_impl_ext into move_tensor_impl_ext_part_2
vlad-perevezentsev Feb 16, 2026
087a2ec
Use _tensor_impl from dpctl_ext in dpnp
vlad-perevezentsev Feb 16, 2026
f4492fb
Add missing include
vlad-perevezentsev Feb 16, 2026
b367c9f
Use nested namespace syntax
vlad-perevezentsev Feb 16, 2026
3113716
Add missing include complex
vlad-perevezentsev Feb 17, 2026
978afee
Add missing memory and queue checks
vlad-perevezentsev Feb 17, 2026
fec84ec
Move ti._copy_numpy_ndarray_into_usm_ndarray()
vlad-perevezentsev Feb 17, 2026
497e810
Move asnumpy(),from_numpy(), to_numpy() to dpctl_ext/tensor
vlad-perevezentsev Feb 17, 2026
3be4e14
Update dpnp.asnumpy to use dpctl_ext functions
vlad-perevezentsev Feb 17, 2026
1d88365
Move copy(), astype() to dpctl_ext/tensor
vlad-perevezentsev Feb 17, 2026
fd18db0
reuse astype(), copy() from dpctl_ext
vlad-perevezentsev Feb 17, 2026
0976171
Move _copy_usm_ndarray_for_reshape
vlad-perevezentsev Feb 17, 2026
318692e
Move reshape() to dpctl_ext/tensor
vlad-perevezentsev Feb 17, 2026
3c0c113
Reuse reshape from dpctl_ext in dpnp
vlad-perevezentsev Feb 17, 2026
30f2c53
Move _copy_usm_ndarray_for_roll
vlad-perevezentsev Feb 17, 2026
85c29da
Move roll() to dpctl_ext/tensor
vlad-perevezentsev Feb 17, 2026
6e8d857
Update dpnp.roll to use dpctl_ext
vlad-perevezentsev Feb 17, 2026
19e93b9
Update .gitignore to ignore .so files in dpctl_ext
vlad-perevezentsev Feb 18, 2026
b111e49
Remove unused includes in tensor_ctors.cpp
vlad-perevezentsev Feb 18, 2026
c082224
Use Python::Module for dpctl_ext static lib to avoid libpython depend…
vlad-perevezentsev Feb 18, 2026
9e7deb3
Merge move_tensor_impl_ext into move_tensor_impl_ext_part_2
vlad-perevezentsev Feb 18, 2026
1a736f7
Merge include-dpctl-tensor into move_tensor_impl_ext_part_2
vlad-perevezentsev Feb 19, 2026
8bde614
Merge move_tensor_impl_ext_part_2 into move_tensor_impl_copy
vlad-perevezentsev Feb 19, 2026
c3bc8ba
Move ti.mask_positions() and ti._cumsum_1d()
vlad-perevezentsev Feb 19, 2026
69b36b2
Move ti._extract(), ti._place(), ti._nonzero()
vlad-perevezentsev Feb 19, 2026
70a0fc6
Move place() to dpctl_ext/tensor and reuse it in dpnp
vlad-perevezentsev Feb 19, 2026
afa5411
Move extract() to dpctl_ext/tensor and reuse it in dpnp
vlad-perevezentsev Feb 19, 2026
7feb4ee
Move nonzero() to dpctl_ext/tensor and reuse it in dpnp
vlad-perevezentsev Feb 19, 2026
f63f2f0
Move put_along_axis to dpctl_ext/tensor and reuse it in dpnp
vlad-perevezentsev Feb 19, 2026
6fecefe
Move take_along_axis() to dpctl_ext/tensor and reuse it in dpnp
vlad-perevezentsev Feb 19, 2026
b60d095
Move ti._eye to() to dpctl_ext/tensor/libtensor
vlad-perevezentsev Feb 19, 2026
a1eea4e
Move eye() to dpctl_ext/tensor and reuse it in dpnp
vlad-perevezentsev Feb 19, 2026
134bb35
Update imports in dpnp/dpnp_iface_indexing.py
vlad-perevezentsev Feb 20, 2026
991693f
Merge include-dpctl-tensor into move_tensor_impl_accum_bool_ind_eye
vlad-perevezentsev Mar 3, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions dpctl_ext/tensor/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -45,16 +45,16 @@ set(_static_lib_sources
)
set(_tensor_impl_sources
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/tensor_ctors.cpp
# ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/accumulators.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/accumulators.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/copy_and_cast_usm_to_usm.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/copy_as_contig.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/copy_numpy_ndarray_into_usm_ndarray.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/copy_for_reshape.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/copy_for_roll.cpp
# ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/linear_sequences.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/integer_advanced_indexing.cpp
# ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/boolean_advanced_indexing.cpp
# ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/eye_ctor.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/boolean_advanced_indexing.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/eye_ctor.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/full_ctor.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/zeros_ctor.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/triul_ctor.cpp
Expand Down
12 changes: 12 additions & 0 deletions dpctl_ext/tensor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,19 @@
to_numpy,
)
from dpctl_ext.tensor._ctors import (
eye,
full,
tril,
triu,
)
from dpctl_ext.tensor._indexing_functions import (
extract,
nonzero,
place,
put,
put_along_axis,
take,
take_along_axis,
)
from dpctl_ext.tensor._manipulation_functions import (
roll,
Expand All @@ -52,12 +58,18 @@
"asnumpy",
"astype",
"copy",
"extract",
"eye",
"from_numpy",
"full",
"nonzero",
"place",
"put",
"put_along_axis",
"reshape",
"roll",
"take",
"take_along_axis",
"to_numpy",
"tril",
"triu",
Expand Down
306 changes: 306 additions & 0 deletions dpctl_ext/tensor/_copy_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@
# *****************************************************************************

import builtins
import operator
from numbers import Integral

import dpctl
import dpctl.memory as dpm
Expand All @@ -39,8 +41,11 @@

# TODO: revert to `import dpctl.tensor...`
# when dpnp fully migrates dpctl/tensor
import dpctl_ext.tensor as dpt_ext
import dpctl_ext.tensor._tensor_impl as ti

from ._numpy_helper import normalize_axis_index

__doc__ = (
"Implementation module for copy- and cast- operations on "
":class:`dpctl.tensor.usm_ndarray`."
Expand Down Expand Up @@ -130,6 +135,307 @@ def _copy_from_numpy_into(dst, np_ary):
)


def _extract_impl(ary, ary_mask, axis=0):
"""
Extract elements of ary by applying mask starting from slot
dimension axis
"""
if not isinstance(ary, dpt.usm_ndarray):
raise TypeError(
f"Expecting type dpctl.tensor.usm_ndarray, got {type(ary)}"
)
if isinstance(ary_mask, dpt.usm_ndarray):
dst_usm_type = dpctl.utils.get_coerced_usm_type(
(ary.usm_type, ary_mask.usm_type)
)
exec_q = dpctl.utils.get_execution_queue(
(ary.sycl_queue, ary_mask.sycl_queue)
)
if exec_q is None:
raise dpctl.utils.ExecutionPlacementError(
"arrays have different associated queues. "
"Use `y.to_device(x.device)` to migrate."
)
elif isinstance(ary_mask, np.ndarray):
dst_usm_type = ary.usm_type
exec_q = ary.sycl_queue
ary_mask = dpt.asarray(
ary_mask, usm_type=dst_usm_type, sycl_queue=exec_q
)
else:
raise TypeError(
"Expecting type dpctl.tensor.usm_ndarray or numpy.ndarray, got "
f"{type(ary_mask)}"
)
ary_nd = ary.ndim
pp = normalize_axis_index(operator.index(axis), ary_nd)
mask_nd = ary_mask.ndim
if pp < 0 or pp + mask_nd > ary_nd:
raise ValueError(
"Parameter p is inconsistent with input array dimensions"
)
mask_nelems = ary_mask.size
cumsum_dt = dpt.int32 if mask_nelems < int32_t_max else dpt.int64
cumsum = dpt.empty(mask_nelems, dtype=cumsum_dt, device=ary_mask.device)
exec_q = cumsum.sycl_queue
_manager = dpctl.utils.SequentialOrderManager[exec_q]
dep_evs = _manager.submitted_events
mask_count = ti.mask_positions(
ary_mask, cumsum, sycl_queue=exec_q, depends=dep_evs
)
dst_shape = ary.shape[:pp] + (mask_count,) + ary.shape[pp + mask_nd :]
dst = dpt.empty(
dst_shape, dtype=ary.dtype, usm_type=dst_usm_type, device=ary.device
)
if dst.size == 0:
return dst
hev, ev = ti._extract(
src=ary,
cumsum=cumsum,
axis_start=pp,
axis_end=pp + mask_nd,
dst=dst,
sycl_queue=exec_q,
depends=dep_evs,
)
_manager.add_event_pair(hev, ev)
return dst


def _get_indices_queue_usm_type(inds, queue, usm_type):
"""
Utility for validating indices are NumPy ndarray or usm_ndarray of integral
dtype or Python integers. At least one must be an array.

For each array, the queue and usm type are appended to `queue_list` and
`usm_type_list`, respectively.
"""
queues = [queue]
usm_types = [usm_type]
any_array = False
for ind in inds:
if isinstance(ind, (np.ndarray, dpt.usm_ndarray)):
any_array = True
if ind.dtype.kind not in "ui":
raise IndexError(
"arrays used as indices must be of integer (or boolean) "
"type"
)
if isinstance(ind, dpt.usm_ndarray):
queues.append(ind.sycl_queue)
usm_types.append(ind.usm_type)
elif not isinstance(ind, Integral):
raise TypeError(
"all elements of `ind` expected to be usm_ndarrays, "
f"NumPy arrays, or integers, found {type(ind)}"
)
if not any_array:
raise TypeError(
"at least one element of `inds` expected to be an array"
)
usm_type = dpctl.utils.get_coerced_usm_type(usm_types)
q = dpctl.utils.get_execution_queue(queues)
return q, usm_type


def _nonzero_impl(ary):
if not isinstance(ary, dpt.usm_ndarray):
raise TypeError(
f"Expecting type dpctl.tensor.usm_ndarray, got {type(ary)}"
)
exec_q = ary.sycl_queue
usm_type = ary.usm_type
mask_nelems = ary.size
cumsum_dt = dpt.int32 if mask_nelems < int32_t_max else dpt.int64
cumsum = dpt.empty(
mask_nelems, dtype=cumsum_dt, sycl_queue=exec_q, order="C"
)
_manager = dpctl.utils.SequentialOrderManager[exec_q]
dep_evs = _manager.submitted_events
mask_count = ti.mask_positions(
ary, cumsum, sycl_queue=exec_q, depends=dep_evs
)
indexes_dt = ti.default_device_index_type(exec_q.sycl_device)
indexes = dpt.empty(
(ary.ndim, mask_count),
dtype=indexes_dt,
usm_type=usm_type,
sycl_queue=exec_q,
order="C",
)
hev, nz_ev = ti._nonzero(cumsum, indexes, ary.shape, exec_q)
res = tuple(indexes[i, :] for i in range(ary.ndim))
_manager.add_event_pair(hev, nz_ev)
return res


def _prepare_indices_arrays(inds, q, usm_type):
"""
Utility taking a mix of usm_ndarray and possibly Python int scalar indices,
a queue (assumed to be common to arrays in inds), and a usm type.

Python scalar integers are promoted to arrays on the provided queue and
with the provided usm type. All arrays are then promoted to a common
integral type (if possible) before being broadcast to a common shape.
"""
# scalar integers -> arrays
inds = tuple(
map(
lambda ind: (
ind
if isinstance(ind, dpt.usm_ndarray)
else dpt.asarray(ind, usm_type=usm_type, sycl_queue=q)
),
inds,
)
)

# promote to a common integral type if possible
ind_dt = dpt.result_type(*inds)
if ind_dt.kind not in "ui":
raise ValueError(
"cannot safely promote indices to an integer data type"
)
inds = tuple(
map(
lambda ind: (
ind if ind.dtype == ind_dt else dpt.astype(ind, ind_dt)
),
inds,
)
)

# broadcast
inds = dpt.broadcast_arrays(*inds)

return inds


def _put_multi_index(ary, inds, p, vals, mode=0):
if not isinstance(ary, dpt.usm_ndarray):
raise TypeError(
f"Expecting type dpctl.tensor.usm_ndarray, got {type(ary)}"
)
ary_nd = ary.ndim
p = normalize_axis_index(operator.index(p), ary_nd)
mode = operator.index(mode)
if mode not in [0, 1]:
raise ValueError(
"Invalid value for mode keyword, only 0 or 1 is supported"
)
if not isinstance(inds, (list, tuple)):
inds = (inds,)

exec_q, coerced_usm_type = _get_indices_queue_usm_type(
inds, ary.sycl_queue, ary.usm_type
)

if exec_q is not None:
if not isinstance(vals, dpt.usm_ndarray):
vals = dpt.asarray(
vals,
dtype=ary.dtype,
usm_type=coerced_usm_type,
sycl_queue=exec_q,
)
else:
exec_q = dpctl.utils.get_execution_queue((exec_q, vals.sycl_queue))
coerced_usm_type = dpctl.utils.get_coerced_usm_type(
(
coerced_usm_type,
vals.usm_type,
)
)
if exec_q is None:
raise dpctl.utils.ExecutionPlacementError(
"Can not automatically determine where to allocate the "
"result or performance execution. "
"Use `usm_ndarray.to_device` method to migrate data to "
"be associated with the same queue."
)

inds = _prepare_indices_arrays(inds, exec_q, coerced_usm_type)

ind0 = inds[0]
ary_sh = ary.shape
p_end = p + len(inds)
if 0 in ary_sh[p:p_end] and ind0.size != 0:
raise IndexError(
"cannot put into non-empty indices along an empty axis"
)
expected_vals_shape = ary_sh[:p] + ind0.shape + ary_sh[p_end:]
if vals.dtype == ary.dtype:
rhs = vals
else:
rhs = dpt_ext.astype(vals, ary.dtype)
rhs = dpt.broadcast_to(rhs, expected_vals_shape)
_manager = dpctl.utils.SequentialOrderManager[exec_q]
dep_ev = _manager.submitted_events
hev, put_ev = ti._put(
dst=ary,
ind=inds,
val=rhs,
axis_start=p,
mode=mode,
sycl_queue=exec_q,
depends=dep_ev,
)
_manager.add_event_pair(hev, put_ev)
return


def _take_multi_index(ary, inds, p, mode=0):
if not isinstance(ary, dpt.usm_ndarray):
raise TypeError(
f"Expecting type dpctl.tensor.usm_ndarray, got {type(ary)}"
)
ary_nd = ary.ndim
p = normalize_axis_index(operator.index(p), ary_nd)
mode = operator.index(mode)
if mode not in [0, 1]:
raise ValueError(
"Invalid value for mode keyword, only 0 or 1 is supported"
)
if not isinstance(inds, (list, tuple)):
inds = (inds,)

exec_q, res_usm_type = _get_indices_queue_usm_type(
inds, ary.sycl_queue, ary.usm_type
)
if exec_q is None:
raise dpctl.utils.ExecutionPlacementError(
"Can not automatically determine where to allocate the "
"result or performance execution. "
"Use `usm_ndarray.to_device` method to migrate data to "
"be associated with the same queue."
)

inds = _prepare_indices_arrays(inds, exec_q, res_usm_type)

ind0 = inds[0]
ary_sh = ary.shape
p_end = p + len(inds)
if 0 in ary_sh[p:p_end] and ind0.size != 0:
raise IndexError("cannot take non-empty indices from an empty axis")
res_shape = ary_sh[:p] + ind0.shape + ary_sh[p_end:]
res = dpt.empty(
res_shape, dtype=ary.dtype, usm_type=res_usm_type, sycl_queue=exec_q
)
_manager = dpctl.utils.SequentialOrderManager[exec_q]
dep_ev = _manager.submitted_events
hev, take_ev = ti._take(
src=ary,
ind=inds,
dst=res,
axis_start=p,
mode=mode,
sycl_queue=exec_q,
depends=dep_ev,
)
_manager.add_event_pair(hev, take_ev)
return res


def from_numpy(np_ary, /, *, device=None, usm_type="device", sycl_queue=None):
"""
from_numpy(arg, device=None, usm_type="device", sycl_queue=None)
Expand Down
Loading
Loading