DPNP C++ backend kernel library 0.19.0dev6
Data Parallel Extension for NumPy*
Loading...
Searching...
No Matches
common.hpp
1//*****************************************************************************
2// Copyright (c) 2023-2025, Intel Corporation
3// All rights reserved.
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are met:
7// - Redistributions of source code must retain the above copyright notice,
8// this list of conditions and the following disclaimer.
9// - Redistributions in binary form must reproduce the above copyright notice,
10// this list of conditions and the following disclaimer in the documentation
11// and/or other materials provided with the distribution.
12//
13// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
14// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
17// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
19// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
23// THE POSSIBILITY OF SUCH DAMAGE.
24//*****************************************************************************
25
26#pragma once
27
28#include <type_traits>
29
30#include <oneapi/mkl.hpp>
31#include <sycl/sycl.hpp>
32
33#include <dpctl4pybind11.hpp>
34#include <pybind11/pybind11.h>
35
36// utils extension header
37#include "ext/common.hpp"
38
39// dpctl tensor headers
40#include "utils/memory_overlap.hpp"
41#include "utils/type_dispatch.hpp"
42
49#ifndef __INTEL_MKL_2023_2_0_VERSION_REQUIRED
50#define __INTEL_MKL_2023_2_0_VERSION_REQUIRED 20230002L
51#endif
52
53static_assert(INTEL_MKL_VERSION >= __INTEL_MKL_2023_2_0_VERSION_REQUIRED,
54 "OneMKL does not meet minimum version requirement");
55
56namespace ext_ns = ext::common;
57namespace py = pybind11;
58namespace td_ns = dpctl::tensor::type_dispatch;
59
60namespace dpnp::extensions::vm::py_internal
61{
62template <typename output_typesT, typename contig_dispatchT>
63bool need_to_call_unary_ufunc(sycl::queue &exec_q,
64 const dpctl::tensor::usm_ndarray &src,
65 const dpctl::tensor::usm_ndarray &dst,
66 const output_typesT &output_type_vec,
67 const contig_dispatchT &contig_dispatch_vector)
68{
69 // check type_nums
70 int src_typenum = src.get_typenum();
71 int dst_typenum = dst.get_typenum();
72
73 auto array_types = td_ns::usm_ndarray_types();
74 int src_typeid = array_types.typenum_to_lookup_id(src_typenum);
75 int dst_typeid = array_types.typenum_to_lookup_id(dst_typenum);
76
77 // check that types are supported
78 int func_output_typeid = output_type_vec[src_typeid];
79 if (dst_typeid != func_output_typeid) {
80 return false;
81 }
82
83 // OneMKL VM functions perform a copy on host if no double type support
84 if (!exec_q.get_device().has(sycl::aspect::fp64)) {
85 return false;
86 }
87
88 // check that queues are compatible
89 if (!dpctl::utils::queues_are_compatible(exec_q, {src, dst})) {
90 return false;
91 }
92
93 // dimensions must be the same
94 int dst_nd = dst.get_ndim();
95 if (dst_nd != src.get_ndim()) {
96 return false;
97 }
98 else if (dst_nd == 0) {
99 // don't call OneMKL for 0d arrays
100 return false;
101 }
102
103 // shapes must be the same
104 const py::ssize_t *src_shape = src.get_shape_raw();
105 const py::ssize_t *dst_shape = dst.get_shape_raw();
106 bool shapes_equal(true);
107 size_t src_nelems(1);
108
109 for (int i = 0; i < dst_nd; ++i) {
110 src_nelems *= static_cast<size_t>(src_shape[i]);
111 shapes_equal = shapes_equal && (src_shape[i] == dst_shape[i]);
112 }
113 if (!shapes_equal) {
114 return false;
115 }
116
117 // if nelems is zero, return false
118 if (src_nelems == 0) {
119 return false;
120 }
121
122 // ensure that output is ample enough to accommodate all elements
123 auto dst_offsets = dst.get_minmax_offsets();
124 // destination must be ample enough to accommodate all elements
125 {
126 size_t range =
127 static_cast<size_t>(dst_offsets.second - dst_offsets.first);
128 if (range + 1 < src_nelems) {
129 return false;
130 }
131 }
132
133 // check memory overlap
134 auto const &overlap = dpctl::tensor::overlap::MemoryOverlap();
135 if (overlap(src, dst)) {
136 return false;
137 }
138
139 // support only contiguous inputs
140 bool is_src_c_contig = src.is_c_contiguous();
141 bool is_dst_c_contig = dst.is_c_contiguous();
142
143 bool all_c_contig = (is_src_c_contig && is_dst_c_contig);
144 if (!all_c_contig) {
145 return false;
146 }
147
148 // MKL function is not defined for the type
149 if (contig_dispatch_vector[src_typeid] == nullptr) {
150 return false;
151 }
152 return true;
153}
154
155template <typename output_typesT, typename contig_dispatchT>
156bool need_to_call_binary_ufunc(sycl::queue &exec_q,
157 const dpctl::tensor::usm_ndarray &src1,
158 const dpctl::tensor::usm_ndarray &src2,
159 const dpctl::tensor::usm_ndarray &dst,
160 const output_typesT &output_type_table,
161 const contig_dispatchT &contig_dispatch_table)
162{
163 // check type_nums
164 int src1_typenum = src1.get_typenum();
165 int src2_typenum = src2.get_typenum();
166 int dst_typenum = dst.get_typenum();
167
168 auto array_types = td_ns::usm_ndarray_types();
169 int src1_typeid = array_types.typenum_to_lookup_id(src1_typenum);
170 int src2_typeid = array_types.typenum_to_lookup_id(src2_typenum);
171 int dst_typeid = array_types.typenum_to_lookup_id(dst_typenum);
172
173 // check that types are supported
174 int output_typeid = output_type_table[src1_typeid][src2_typeid];
175 if (output_typeid != dst_typeid) {
176 return false;
177 }
178
179 // types must be the same
180 if (src1_typeid != src2_typeid) {
181 return false;
182 }
183
184 // OneMKL VM functions perform a copy on host if no double type support
185 if (!exec_q.get_device().has(sycl::aspect::fp64)) {
186 return false;
187 }
188
189 // check that queues are compatible
190 if (!dpctl::utils::queues_are_compatible(exec_q, {src1, src2, dst})) {
191 return false;
192 }
193
194 // dimensions must be the same
195 int dst_nd = dst.get_ndim();
196 if (dst_nd != src1.get_ndim() || dst_nd != src2.get_ndim()) {
197 return false;
198 }
199 else if (dst_nd == 0) {
200 // don't call OneMKL for 0d arrays
201 return false;
202 }
203
204 // shapes must be the same
205 const py::ssize_t *src1_shape = src1.get_shape_raw();
206 const py::ssize_t *src2_shape = src2.get_shape_raw();
207 const py::ssize_t *dst_shape = dst.get_shape_raw();
208 bool shapes_equal(true);
209 size_t src_nelems(1);
210
211 for (int i = 0; i < dst_nd; ++i) {
212 src_nelems *= static_cast<size_t>(src1_shape[i]);
213 shapes_equal = shapes_equal && (src1_shape[i] == dst_shape[i] &&
214 src2_shape[i] == dst_shape[i]);
215 }
216 if (!shapes_equal) {
217 return false;
218 }
219
220 // if nelems is zero, return false
221 if (src_nelems == 0) {
222 return false;
223 }
224
225 // ensure that output is ample enough to accommodate all elements
226 auto dst_offsets = dst.get_minmax_offsets();
227 // destination must be ample enough to accommodate all elements
228 {
229 size_t range =
230 static_cast<size_t>(dst_offsets.second - dst_offsets.first);
231 if (range + 1 < src_nelems) {
232 return false;
233 }
234 }
235
236 // check memory overlap
237 auto const &overlap = dpctl::tensor::overlap::MemoryOverlap();
238 if (overlap(src1, dst) || overlap(src2, dst)) {
239 return false;
240 }
241
242 // support only contiguous inputs
243 bool is_src1_c_contig = src1.is_c_contiguous();
244 bool is_src2_c_contig = src2.is_c_contiguous();
245 bool is_dst_c_contig = dst.is_c_contiguous();
246
247 bool all_c_contig =
248 (is_src1_c_contig && is_src2_c_contig && is_dst_c_contig);
249 if (!all_c_contig) {
250 return false;
251 }
252
253 // MKL function is not defined for the type
254 if (contig_dispatch_table[src1_typeid] == nullptr) {
255 return false;
256 }
257 return true;
258}
259
265#define MACRO_POPULATE_DISPATCH_VECTORS(__name__) \
266 template <typename fnT, typename T> \
267 struct ContigFactory \
268 { \
269 fnT get() \
270 { \
271 if constexpr (std::is_same_v<typename OutputType<T>::value_type, \
272 void>) { \
273 return nullptr; \
274 } \
275 else { \
276 return __name__##_contig_impl<T>; \
277 } \
278 } \
279 }; \
280 \
281 template <typename fnT, typename T> \
282 struct TypeMapFactory \
283 { \
284 std::enable_if_t<std::is_same<fnT, int>::value, int> get() \
285 { \
286 using rT = typename OutputType<T>::value_type; \
287 return td_ns::GetTypeid<rT>{}.get(); \
288 } \
289 }; \
290 \
291 static void populate_dispatch_vectors(void) \
292 { \
293 ext_ns::init_dispatch_vector<int, TypeMapFactory>( \
294 output_typeid_vector); \
295 ext_ns::init_dispatch_vector<unary_contig_impl_fn_ptr_t, \
296 ContigFactory>(contig_dispatch_vector); \
297 };
298
304#define MACRO_POPULATE_DISPATCH_TABLES(__name__) \
305 template <typename fnT, typename T1, typename T2> \
306 struct ContigFactory \
307 { \
308 fnT get() \
309 { \
310 if constexpr (std::is_same_v< \
311 typename OutputType<T1, T2>::value_type, void>) \
312 { \
313 return nullptr; \
314 } \
315 else { \
316 return __name__##_contig_impl<T1, T2>; \
317 } \
318 } \
319 }; \
320 \
321 template <typename fnT, typename T1, typename T2> \
322 struct TypeMapFactory \
323 { \
324 std::enable_if_t<std::is_same<fnT, int>::value, int> get() \
325 { \
326 using rT = typename OutputType<T1, T2>::value_type; \
327 return td_ns::GetTypeid<rT>{}.get(); \
328 } \
329 }; \
330 \
331 static void populate_dispatch_tables(void) \
332 { \
333 ext_ns::init_dispatch_table<int, TypeMapFactory>( \
334 output_typeid_vector); \
335 ext_ns::init_dispatch_table<binary_contig_impl_fn_ptr_t, \
336 ContigFactory>(contig_dispatch_vector); \
337 };
338} // namespace dpnp::extensions::vm::py_internal