DPNP C++ backend kernel library 0.19.0dev3
Data Parallel Extension for NumPy*
Loading...
Searching...
No Matches
common.hpp
1//*****************************************************************************
2// Copyright (c) 2023-2025, Intel Corporation
3// All rights reserved.
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are met:
7// - Redistributions of source code must retain the above copyright notice,
8// this list of conditions and the following disclaimer.
9// - Redistributions in binary form must reproduce the above copyright notice,
10// this list of conditions and the following disclaimer in the documentation
11// and/or other materials provided with the distribution.
12//
13// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
14// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
17// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
19// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
23// THE POSSIBILITY OF SUCH DAMAGE.
24//*****************************************************************************
25
26#pragma once
27
28#include <type_traits>
29
30#include <oneapi/mkl.hpp>
31#include <sycl/sycl.hpp>
32
33#include <dpctl4pybind11.hpp>
34#include <pybind11/pybind11.h>
35
36// dpctl tensor headers
37#include "utils/memory_overlap.hpp"
38#include "utils/type_dispatch.hpp"
39
40#include "dpnp_utils.hpp"
41
42static_assert(INTEL_MKL_VERSION >= __INTEL_MKL_2023_2_0_VERSION_REQUIRED,
43 "OneMKL does not meet minimum version requirement");
44
45namespace py = pybind11;
46namespace td_ns = dpctl::tensor::type_dispatch;
47
48namespace dpnp::extensions::vm::py_internal
49{
50template <typename output_typesT, typename contig_dispatchT>
51bool need_to_call_unary_ufunc(sycl::queue &exec_q,
52 const dpctl::tensor::usm_ndarray &src,
53 const dpctl::tensor::usm_ndarray &dst,
54 const output_typesT &output_type_vec,
55 const contig_dispatchT &contig_dispatch_vector)
56{
57 // check type_nums
58 int src_typenum = src.get_typenum();
59 int dst_typenum = dst.get_typenum();
60
61 auto array_types = td_ns::usm_ndarray_types();
62 int src_typeid = array_types.typenum_to_lookup_id(src_typenum);
63 int dst_typeid = array_types.typenum_to_lookup_id(dst_typenum);
64
65 // check that types are supported
66 int func_output_typeid = output_type_vec[src_typeid];
67 if (dst_typeid != func_output_typeid) {
68 return false;
69 }
70
71 // OneMKL VM functions perform a copy on host if no double type support
72 if (!exec_q.get_device().has(sycl::aspect::fp64)) {
73 return false;
74 }
75
76 // check that queues are compatible
77 if (!dpctl::utils::queues_are_compatible(exec_q, {src, dst})) {
78 return false;
79 }
80
81 // dimensions must be the same
82 int dst_nd = dst.get_ndim();
83 if (dst_nd != src.get_ndim()) {
84 return false;
85 }
86 else if (dst_nd == 0) {
87 // don't call OneMKL for 0d arrays
88 return false;
89 }
90
91 // shapes must be the same
92 const py::ssize_t *src_shape = src.get_shape_raw();
93 const py::ssize_t *dst_shape = dst.get_shape_raw();
94 bool shapes_equal(true);
95 size_t src_nelems(1);
96
97 for (int i = 0; i < dst_nd; ++i) {
98 src_nelems *= static_cast<size_t>(src_shape[i]);
99 shapes_equal = shapes_equal && (src_shape[i] == dst_shape[i]);
100 }
101 if (!shapes_equal) {
102 return false;
103 }
104
105 // if nelems is zero, return false
106 if (src_nelems == 0) {
107 return false;
108 }
109
110 // ensure that output is ample enough to accommodate all elements
111 auto dst_offsets = dst.get_minmax_offsets();
112 // destination must be ample enough to accommodate all elements
113 {
114 size_t range =
115 static_cast<size_t>(dst_offsets.second - dst_offsets.first);
116 if (range + 1 < src_nelems) {
117 return false;
118 }
119 }
120
121 // check memory overlap
122 auto const &overlap = dpctl::tensor::overlap::MemoryOverlap();
123 if (overlap(src, dst)) {
124 return false;
125 }
126
127 // support only contiguous inputs
128 bool is_src_c_contig = src.is_c_contiguous();
129 bool is_dst_c_contig = dst.is_c_contiguous();
130
131 bool all_c_contig = (is_src_c_contig && is_dst_c_contig);
132 if (!all_c_contig) {
133 return false;
134 }
135
136 // MKL function is not defined for the type
137 if (contig_dispatch_vector[src_typeid] == nullptr) {
138 return false;
139 }
140 return true;
141}
142
143template <typename output_typesT, typename contig_dispatchT>
144bool need_to_call_binary_ufunc(sycl::queue &exec_q,
145 const dpctl::tensor::usm_ndarray &src1,
146 const dpctl::tensor::usm_ndarray &src2,
147 const dpctl::tensor::usm_ndarray &dst,
148 const output_typesT &output_type_table,
149 const contig_dispatchT &contig_dispatch_table)
150{
151 // check type_nums
152 int src1_typenum = src1.get_typenum();
153 int src2_typenum = src2.get_typenum();
154 int dst_typenum = dst.get_typenum();
155
156 auto array_types = td_ns::usm_ndarray_types();
157 int src1_typeid = array_types.typenum_to_lookup_id(src1_typenum);
158 int src2_typeid = array_types.typenum_to_lookup_id(src2_typenum);
159 int dst_typeid = array_types.typenum_to_lookup_id(dst_typenum);
160
161 // check that types are supported
162 int output_typeid = output_type_table[src1_typeid][src2_typeid];
163 if (output_typeid != dst_typeid) {
164 return false;
165 }
166
167 // types must be the same
168 if (src1_typeid != src2_typeid) {
169 return false;
170 }
171
172 // OneMKL VM functions perform a copy on host if no double type support
173 if (!exec_q.get_device().has(sycl::aspect::fp64)) {
174 return false;
175 }
176
177 // check that queues are compatible
178 if (!dpctl::utils::queues_are_compatible(exec_q, {src1, src2, dst})) {
179 return false;
180 }
181
182 // dimensions must be the same
183 int dst_nd = dst.get_ndim();
184 if (dst_nd != src1.get_ndim() || dst_nd != src2.get_ndim()) {
185 return false;
186 }
187 else if (dst_nd == 0) {
188 // don't call OneMKL for 0d arrays
189 return false;
190 }
191
192 // shapes must be the same
193 const py::ssize_t *src1_shape = src1.get_shape_raw();
194 const py::ssize_t *src2_shape = src2.get_shape_raw();
195 const py::ssize_t *dst_shape = dst.get_shape_raw();
196 bool shapes_equal(true);
197 size_t src_nelems(1);
198
199 for (int i = 0; i < dst_nd; ++i) {
200 src_nelems *= static_cast<size_t>(src1_shape[i]);
201 shapes_equal = shapes_equal && (src1_shape[i] == dst_shape[i] &&
202 src2_shape[i] == dst_shape[i]);
203 }
204 if (!shapes_equal) {
205 return false;
206 }
207
208 // if nelems is zero, return false
209 if (src_nelems == 0) {
210 return false;
211 }
212
213 // ensure that output is ample enough to accommodate all elements
214 auto dst_offsets = dst.get_minmax_offsets();
215 // destination must be ample enough to accommodate all elements
216 {
217 size_t range =
218 static_cast<size_t>(dst_offsets.second - dst_offsets.first);
219 if (range + 1 < src_nelems) {
220 return false;
221 }
222 }
223
224 // check memory overlap
225 auto const &overlap = dpctl::tensor::overlap::MemoryOverlap();
226 if (overlap(src1, dst) || overlap(src2, dst)) {
227 return false;
228 }
229
230 // support only contiguous inputs
231 bool is_src1_c_contig = src1.is_c_contiguous();
232 bool is_src2_c_contig = src2.is_c_contiguous();
233 bool is_dst_c_contig = dst.is_c_contiguous();
234
235 bool all_c_contig =
236 (is_src1_c_contig && is_src2_c_contig && is_dst_c_contig);
237 if (!all_c_contig) {
238 return false;
239 }
240
241 // MKL function is not defined for the type
242 if (contig_dispatch_table[src1_typeid] == nullptr) {
243 return false;
244 }
245 return true;
246}
247
253#define MACRO_POPULATE_DISPATCH_VECTORS(__name__) \
254 template <typename fnT, typename T> \
255 struct ContigFactory \
256 { \
257 fnT get() \
258 { \
259 if constexpr (std::is_same_v<typename OutputType<T>::value_type, \
260 void>) { \
261 return nullptr; \
262 } \
263 else { \
264 return __name__##_contig_impl<T>; \
265 } \
266 } \
267 }; \
268 \
269 template <typename fnT, typename T> \
270 struct TypeMapFactory \
271 { \
272 std::enable_if_t<std::is_same<fnT, int>::value, int> get() \
273 { \
274 using rT = typename OutputType<T>::value_type; \
275 return td_ns::GetTypeid<rT>{}.get(); \
276 } \
277 }; \
278 \
279 static void populate_dispatch_vectors(void) \
280 { \
281 py_internal::init_ufunc_dispatch_vector<int, TypeMapFactory>( \
282 output_typeid_vector); \
283 py_internal::init_ufunc_dispatch_vector<unary_contig_impl_fn_ptr_t, \
284 ContigFactory>( \
285 contig_dispatch_vector); \
286 };
287
293#define MACRO_POPULATE_DISPATCH_TABLES(__name__) \
294 template <typename fnT, typename T1, typename T2> \
295 struct ContigFactory \
296 { \
297 fnT get() \
298 { \
299 if constexpr (std::is_same_v< \
300 typename OutputType<T1, T2>::value_type, void>) \
301 { \
302 return nullptr; \
303 } \
304 else { \
305 return __name__##_contig_impl<T1, T2>; \
306 } \
307 } \
308 }; \
309 \
310 template <typename fnT, typename T1, typename T2> \
311 struct TypeMapFactory \
312 { \
313 std::enable_if_t<std::is_same<fnT, int>::value, int> get() \
314 { \
315 using rT = typename OutputType<T1, T2>::value_type; \
316 return td_ns::GetTypeid<rT>{}.get(); \
317 } \
318 }; \
319 \
320 static void populate_dispatch_tables(void) \
321 { \
322 py_internal::init_ufunc_dispatch_table<int, TypeMapFactory>( \
323 output_typeid_vector); \
324 py_internal::init_ufunc_dispatch_table<binary_contig_impl_fn_ptr_t, \
325 ContigFactory>( \
326 contig_dispatch_vector); \
327 };
328
329template <typename dispatchT,
330 template <typename fnT, typename T>
331 typename factoryT,
332 int _num_types = td_ns::num_types>
333void init_ufunc_dispatch_vector(dispatchT dispatch_vector[])
334{
335 td_ns::DispatchVectorBuilder<dispatchT, factoryT, _num_types> dvb;
336 dvb.populate_dispatch_vector(dispatch_vector);
337}
338
339template <typename dispatchT,
340 template <typename fnT, typename D, typename S>
341 typename factoryT,
342 int _num_types = td_ns::num_types>
343void init_ufunc_dispatch_table(dispatchT dispatch_table[][_num_types])
344{
345 td_ns::DispatchTableBuilder<dispatchT, factoryT, _num_types> dtb;
346 dtb.populate_dispatch_table(dispatch_table);
347}
348} // namespace dpnp::extensions::vm::py_internal