DPNP C++ backend kernel library 0.18.0dev0
Data Parallel Extension for NumPy*
Loading...
Searching...
No Matches
common.hpp
1//*****************************************************************************
2// Copyright (c) 2023-2025, Intel Corporation
3// All rights reserved.
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are met:
7// - Redistributions of source code must retain the above copyright notice,
8// this list of conditions and the following disclaimer.
9// - Redistributions in binary form must reproduce the above copyright notice,
10// this list of conditions and the following disclaimer in the documentation
11// and/or other materials provided with the distribution.
12//
13// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
14// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
17// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
19// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
23// THE POSSIBILITY OF SUCH DAMAGE.
24//*****************************************************************************
25
26#pragma once
27
28#include <oneapi/mkl.hpp>
29#include <sycl/sycl.hpp>
30
31#include <dpctl4pybind11.hpp>
32#include <pybind11/pybind11.h>
33
34// dpctl tensor headers
35#include "utils/memory_overlap.hpp"
36#include "utils/type_dispatch.hpp"
37
38#include "dpnp_utils.hpp"
39
40static_assert(INTEL_MKL_VERSION >= __INTEL_MKL_2023_2_0_VERSION_REQUIRED,
41 "OneMKL does not meet minimum version requirement");
42
43namespace py = pybind11;
44namespace td_ns = dpctl::tensor::type_dispatch;
45
46namespace dpnp::extensions::vm::py_internal
47{
48template <typename output_typesT, typename contig_dispatchT>
49bool need_to_call_unary_ufunc(sycl::queue &exec_q,
50 const dpctl::tensor::usm_ndarray &src,
51 const dpctl::tensor::usm_ndarray &dst,
52 const output_typesT &output_type_vec,
53 const contig_dispatchT &contig_dispatch_vector)
54{
55 // check type_nums
56 int src_typenum = src.get_typenum();
57 int dst_typenum = dst.get_typenum();
58
59 auto array_types = td_ns::usm_ndarray_types();
60 int src_typeid = array_types.typenum_to_lookup_id(src_typenum);
61 int dst_typeid = array_types.typenum_to_lookup_id(dst_typenum);
62
63 // check that types are supported
64 int func_output_typeid = output_type_vec[src_typeid];
65 if (dst_typeid != func_output_typeid) {
66 return false;
67 }
68
69 // OneMKL VM functions perform a copy on host if no double type support
70 if (!exec_q.get_device().has(sycl::aspect::fp64)) {
71 return false;
72 }
73
74 // check that queues are compatible
75 if (!dpctl::utils::queues_are_compatible(exec_q, {src, dst})) {
76 return false;
77 }
78
79 // dimensions must be the same
80 int dst_nd = dst.get_ndim();
81 if (dst_nd != src.get_ndim()) {
82 return false;
83 }
84 else if (dst_nd == 0) {
85 // don't call OneMKL for 0d arrays
86 return false;
87 }
88
89 // shapes must be the same
90 const py::ssize_t *src_shape = src.get_shape_raw();
91 const py::ssize_t *dst_shape = dst.get_shape_raw();
92 bool shapes_equal(true);
93 size_t src_nelems(1);
94
95 for (int i = 0; i < dst_nd; ++i) {
96 src_nelems *= static_cast<size_t>(src_shape[i]);
97 shapes_equal = shapes_equal && (src_shape[i] == dst_shape[i]);
98 }
99 if (!shapes_equal) {
100 return false;
101 }
102
103 // if nelems is zero, return false
104 if (src_nelems == 0) {
105 return false;
106 }
107
108 // ensure that output is ample enough to accommodate all elements
109 auto dst_offsets = dst.get_minmax_offsets();
110 // destination must be ample enough to accommodate all elements
111 {
112 size_t range =
113 static_cast<size_t>(dst_offsets.second - dst_offsets.first);
114 if (range + 1 < src_nelems) {
115 return false;
116 }
117 }
118
119 // check memory overlap
120 auto const &overlap = dpctl::tensor::overlap::MemoryOverlap();
121 if (overlap(src, dst)) {
122 return false;
123 }
124
125 // support only contiguous inputs
126 bool is_src_c_contig = src.is_c_contiguous();
127 bool is_dst_c_contig = dst.is_c_contiguous();
128
129 bool all_c_contig = (is_src_c_contig && is_dst_c_contig);
130 if (!all_c_contig) {
131 return false;
132 }
133
134 // MKL function is not defined for the type
135 if (contig_dispatch_vector[src_typeid] == nullptr) {
136 return false;
137 }
138 return true;
139}
140
141template <typename output_typesT, typename contig_dispatchT>
142bool need_to_call_binary_ufunc(sycl::queue &exec_q,
143 const dpctl::tensor::usm_ndarray &src1,
144 const dpctl::tensor::usm_ndarray &src2,
145 const dpctl::tensor::usm_ndarray &dst,
146 const output_typesT &output_type_table,
147 const contig_dispatchT &contig_dispatch_table)
148{
149 // check type_nums
150 int src1_typenum = src1.get_typenum();
151 int src2_typenum = src2.get_typenum();
152 int dst_typenum = dst.get_typenum();
153
154 auto array_types = td_ns::usm_ndarray_types();
155 int src1_typeid = array_types.typenum_to_lookup_id(src1_typenum);
156 int src2_typeid = array_types.typenum_to_lookup_id(src2_typenum);
157 int dst_typeid = array_types.typenum_to_lookup_id(dst_typenum);
158
159 // check that types are supported
160 int output_typeid = output_type_table[src1_typeid][src2_typeid];
161 if (output_typeid != dst_typeid) {
162 return false;
163 }
164
165 // types must be the same
166 if (src1_typeid != src2_typeid) {
167 return false;
168 }
169
170 // OneMKL VM functions perform a copy on host if no double type support
171 if (!exec_q.get_device().has(sycl::aspect::fp64)) {
172 return false;
173 }
174
175 // check that queues are compatible
176 if (!dpctl::utils::queues_are_compatible(exec_q, {src1, src2, dst})) {
177 return false;
178 }
179
180 // dimensions must be the same
181 int dst_nd = dst.get_ndim();
182 if (dst_nd != src1.get_ndim() || dst_nd != src2.get_ndim()) {
183 return false;
184 }
185 else if (dst_nd == 0) {
186 // don't call OneMKL for 0d arrays
187 return false;
188 }
189
190 // shapes must be the same
191 const py::ssize_t *src1_shape = src1.get_shape_raw();
192 const py::ssize_t *src2_shape = src2.get_shape_raw();
193 const py::ssize_t *dst_shape = dst.get_shape_raw();
194 bool shapes_equal(true);
195 size_t src_nelems(1);
196
197 for (int i = 0; i < dst_nd; ++i) {
198 src_nelems *= static_cast<size_t>(src1_shape[i]);
199 shapes_equal = shapes_equal && (src1_shape[i] == dst_shape[i] &&
200 src2_shape[i] == dst_shape[i]);
201 }
202 if (!shapes_equal) {
203 return false;
204 }
205
206 // if nelems is zero, return false
207 if (src_nelems == 0) {
208 return false;
209 }
210
211 // ensure that output is ample enough to accommodate all elements
212 auto dst_offsets = dst.get_minmax_offsets();
213 // destination must be ample enough to accommodate all elements
214 {
215 size_t range =
216 static_cast<size_t>(dst_offsets.second - dst_offsets.first);
217 if (range + 1 < src_nelems) {
218 return false;
219 }
220 }
221
222 // check memory overlap
223 auto const &overlap = dpctl::tensor::overlap::MemoryOverlap();
224 if (overlap(src1, dst) || overlap(src2, dst)) {
225 return false;
226 }
227
228 // support only contiguous inputs
229 bool is_src1_c_contig = src1.is_c_contiguous();
230 bool is_src2_c_contig = src2.is_c_contiguous();
231 bool is_dst_c_contig = dst.is_c_contiguous();
232
233 bool all_c_contig =
234 (is_src1_c_contig && is_src2_c_contig && is_dst_c_contig);
235 if (!all_c_contig) {
236 return false;
237 }
238
239 // MKL function is not defined for the type
240 if (contig_dispatch_table[src1_typeid] == nullptr) {
241 return false;
242 }
243 return true;
244}
245
251#define MACRO_POPULATE_DISPATCH_VECTORS(__name__) \
252 template <typename fnT, typename T> \
253 struct ContigFactory \
254 { \
255 fnT get() \
256 { \
257 if constexpr (std::is_same_v<typename OutputType<T>::value_type, \
258 void>) { \
259 return nullptr; \
260 } \
261 else { \
262 return __name__##_contig_impl<T>; \
263 } \
264 } \
265 }; \
266 \
267 template <typename fnT, typename T> \
268 struct TypeMapFactory \
269 { \
270 std::enable_if_t<std::is_same<fnT, int>::value, int> get() \
271 { \
272 using rT = typename OutputType<T>::value_type; \
273 return td_ns::GetTypeid<rT>{}.get(); \
274 } \
275 }; \
276 \
277 static void populate_dispatch_vectors(void) \
278 { \
279 py_internal::init_ufunc_dispatch_vector<int, TypeMapFactory>( \
280 output_typeid_vector); \
281 py_internal::init_ufunc_dispatch_vector<unary_contig_impl_fn_ptr_t, \
282 ContigFactory>( \
283 contig_dispatch_vector); \
284 };
285
291#define MACRO_POPULATE_DISPATCH_TABLES(__name__) \
292 template <typename fnT, typename T1, typename T2> \
293 struct ContigFactory \
294 { \
295 fnT get() \
296 { \
297 if constexpr (std::is_same_v< \
298 typename OutputType<T1, T2>::value_type, void>) \
299 { \
300 return nullptr; \
301 } \
302 else { \
303 return __name__##_contig_impl<T1, T2>; \
304 } \
305 } \
306 }; \
307 \
308 template <typename fnT, typename T1, typename T2> \
309 struct TypeMapFactory \
310 { \
311 std::enable_if_t<std::is_same<fnT, int>::value, int> get() \
312 { \
313 using rT = typename OutputType<T1, T2>::value_type; \
314 return td_ns::GetTypeid<rT>{}.get(); \
315 } \
316 }; \
317 \
318 static void populate_dispatch_tables(void) \
319 { \
320 py_internal::init_ufunc_dispatch_table<int, TypeMapFactory>( \
321 output_typeid_vector); \
322 py_internal::init_ufunc_dispatch_table<binary_contig_impl_fn_ptr_t, \
323 ContigFactory>( \
324 contig_dispatch_vector); \
325 };
326
327template <typename dispatchT,
328 template <typename fnT, typename T>
329 typename factoryT,
330 int _num_types = td_ns::num_types>
331void init_ufunc_dispatch_vector(dispatchT dispatch_vector[])
332{
333 td_ns::DispatchVectorBuilder<dispatchT, factoryT, _num_types> dvb;
334 dvb.populate_dispatch_vector(dispatch_vector);
335}
336
337template <typename dispatchT,
338 template <typename fnT, typename D, typename S>
339 typename factoryT,
340 int _num_types = td_ns::num_types>
341void init_ufunc_dispatch_table(dispatchT dispatch_table[][_num_types])
342{
343 td_ns::DispatchTableBuilder<dispatchT, factoryT, _num_types> dtb;
344 dtb.populate_dispatch_table(dispatch_table);
345}
346} // namespace dpnp::extensions::vm::py_internal