DPNP C++ backend kernel library 0.20.0dev0
Data Parallel Extension for NumPy*
Loading...
Searching...
No Matches
common.hpp
1//*****************************************************************************
2// Copyright (c) 2023, Intel Corporation
3// All rights reserved.
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are met:
7// - Redistributions of source code must retain the above copyright notice,
8// this list of conditions and the following disclaimer.
9// - Redistributions in binary form must reproduce the above copyright notice,
10// this list of conditions and the following disclaimer in the documentation
11// and/or other materials provided with the distribution.
12// - Neither the name of the copyright holder nor the names of its contributors
13// may be used to endorse or promote products derived from this software
14// without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
20// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26// THE POSSIBILITY OF SUCH DAMAGE.
27//*****************************************************************************
28
29#pragma once
30
31#include <cstddef>
32#include <type_traits>
33#include <utility>
34#include <vector>
35
36#include <oneapi/mkl.hpp>
37#include <sycl/sycl.hpp>
38
39#include <dpctl4pybind11.hpp>
40#include <pybind11/pybind11.h>
41
42// utils extension header
43#include "ext/common.hpp"
44
45// dpctl tensor headers
46#include "utils/memory_overlap.hpp"
47#include "utils/type_dispatch.hpp"
48
55#ifndef __INTEL_MKL_2023_2_0_VERSION_REQUIRED
56#define __INTEL_MKL_2023_2_0_VERSION_REQUIRED 20230002L
57#endif
58
59static_assert(INTEL_MKL_VERSION >= __INTEL_MKL_2023_2_0_VERSION_REQUIRED,
60 "OneMKL does not meet minimum version requirement");
61
62namespace ext_ns = ext::common;
63namespace py = pybind11;
64namespace td_ns = dpctl::tensor::type_dispatch;
65
66namespace dpnp::extensions::vm::py_internal
67{
68template <typename output_typesT, typename contig_dispatchT>
69bool need_to_call_unary_ufunc(sycl::queue &exec_q,
70 const dpctl::tensor::usm_ndarray &src,
71 const dpctl::tensor::usm_ndarray &dst,
72 const output_typesT &output_type_vec,
73 const contig_dispatchT &contig_dispatch_vector)
74{
75 // check type_nums
76 int src_typenum = src.get_typenum();
77 int dst_typenum = dst.get_typenum();
78
79 auto array_types = td_ns::usm_ndarray_types();
80 int src_typeid = array_types.typenum_to_lookup_id(src_typenum);
81 int dst_typeid = array_types.typenum_to_lookup_id(dst_typenum);
82
83 // check that types are supported
84 int func_output_typeid = output_type_vec[src_typeid];
85 if (dst_typeid != func_output_typeid) {
86 return false;
87 }
88
89 // OneMKL VM functions perform a copy on host if no double type support
90 if (!exec_q.get_device().has(sycl::aspect::fp64)) {
91 return false;
92 }
93
94 // check that queues are compatible
95 if (!dpctl::utils::queues_are_compatible(exec_q, {src, dst})) {
96 return false;
97 }
98
99 // dimensions must be the same
100 int dst_nd = dst.get_ndim();
101 if (dst_nd != src.get_ndim()) {
102 return false;
103 }
104 else if (dst_nd == 0) {
105 // don't call OneMKL for 0d arrays
106 return false;
107 }
108
109 // shapes must be the same
110 const py::ssize_t *src_shape = src.get_shape_raw();
111 const py::ssize_t *dst_shape = dst.get_shape_raw();
112 bool shapes_equal(true);
113 size_t src_nelems(1);
114
115 for (int i = 0; i < dst_nd; ++i) {
116 src_nelems *= static_cast<size_t>(src_shape[i]);
117 shapes_equal = shapes_equal && (src_shape[i] == dst_shape[i]);
118 }
119 if (!shapes_equal) {
120 return false;
121 }
122
123 // if nelems is zero, return false
124 if (src_nelems == 0) {
125 return false;
126 }
127
128 // ensure that output is ample enough to accommodate all elements
129 auto dst_offsets = dst.get_minmax_offsets();
130 // destination must be ample enough to accommodate all elements
131 {
132 size_t range =
133 static_cast<size_t>(dst_offsets.second - dst_offsets.first);
134 if (range + 1 < src_nelems) {
135 return false;
136 }
137 }
138
139 // check memory overlap
140 auto const &overlap = dpctl::tensor::overlap::MemoryOverlap();
141 if (overlap(src, dst)) {
142 return false;
143 }
144
145 // support only contiguous inputs
146 bool is_src_c_contig = src.is_c_contiguous();
147 bool is_dst_c_contig = dst.is_c_contiguous();
148
149 bool all_c_contig = (is_src_c_contig && is_dst_c_contig);
150 if (!all_c_contig) {
151 return false;
152 }
153
154 // MKL function is not defined for the type
155 if (contig_dispatch_vector[src_typeid] == nullptr) {
156 return false;
157 }
158 return true;
159}
160
161template <typename output_typesT, typename contig_dispatchT>
162bool need_to_call_unary_two_outputs_ufunc(
163 sycl::queue &exec_q,
164 const dpctl::tensor::usm_ndarray &src,
165 const dpctl::tensor::usm_ndarray &dst1,
166 const dpctl::tensor::usm_ndarray &dst2,
167 const output_typesT &output_type_vec,
168 const contig_dispatchT &contig_dispatch_vector)
169{
170 // check type_nums
171 int src_typenum = src.get_typenum();
172 int dst1_typenum = dst1.get_typenum();
173 int dst2_typenum = dst2.get_typenum();
174
175 const auto &array_types = td_ns::usm_ndarray_types();
176 int src_typeid = array_types.typenum_to_lookup_id(src_typenum);
177 int dst1_typeid = array_types.typenum_to_lookup_id(dst1_typenum);
178 int dst2_typeid = array_types.typenum_to_lookup_id(dst2_typenum);
179
180 std::pair<int, int> func_output_typeids = output_type_vec[src_typeid];
181
182 // check that types are supported
183 if (dst1_typeid != func_output_typeids.first ||
184 dst2_typeid != func_output_typeids.second)
185 {
186 return false;
187 }
188
189 // OneMKL VM functions perform a copy on host if no double type support
190 if (!exec_q.get_device().has(sycl::aspect::fp64)) {
191 return false;
192 }
193
194 // check that queues are compatible
195 if (!dpctl::utils::queues_are_compatible(exec_q, {src, dst1, dst2})) {
196 return false;
197 }
198
199 // dimensions must be the same
200 int src_nd = src.get_ndim();
201 int dst1_nd = dst1.get_ndim();
202 int dst2_nd = dst2.get_ndim();
203 if (src_nd != dst1_nd || src_nd != dst2_nd) {
204 return false;
205 }
206 else if (dst1_nd == 0 || dst2_nd == 0) {
207 // don't call OneMKL for 0d arrays
208 return false;
209 }
210
211 // shapes must be the same
212 const py::ssize_t *src_shape = src.get_shape_raw();
213 const py::ssize_t *dst1_shape = dst1.get_shape_raw();
214 const py::ssize_t *dst2_shape = dst2.get_shape_raw();
215 bool shapes_equal(true);
216 size_t src_nelems(1);
217
218 for (int i = 0; i < src_nd; ++i) {
219 src_nelems *= static_cast<std::size_t>(src_shape[i]);
220 shapes_equal = shapes_equal && (src_shape[i] == dst1_shape[i]) &&
221 (src_shape[i] == dst2_shape[i]);
222 }
223 if (!shapes_equal) {
224 return false;
225 }
226
227 // if nelems is zero, return false
228 if (src_nelems == 0) {
229 return false;
230 }
231
232 // ensure that outputs are ample enough to accommodate all elements
233 auto dst1_offsets = dst1.get_minmax_offsets();
234 auto dst2_offsets = dst2.get_minmax_offsets();
235 // destinations must be ample enough to accommodate all elements
236 {
237 size_t range1 =
238 static_cast<size_t>(dst1_offsets.second - dst1_offsets.first);
239 size_t range2 =
240 static_cast<size_t>(dst2_offsets.second - dst2_offsets.first);
241 if ((range1 + 1 < src_nelems) || (range2 + 1 < src_nelems)) {
242 return false;
243 }
244 }
245
246 // check memory overlap
247 auto const &overlap = dpctl::tensor::overlap::MemoryOverlap();
248 if (overlap(src, dst1) || overlap(src, dst2) || overlap(dst1, dst2)) {
249 return false;
250 }
251
252 // support only contiguous inputs
253 bool is_src_c_contig = src.is_c_contiguous();
254 bool is_dst1_c_contig = dst1.is_c_contiguous();
255 bool is_dst2_c_contig = dst2.is_c_contiguous();
256
257 bool all_c_contig =
258 (is_src_c_contig && is_dst1_c_contig && is_dst2_c_contig);
259 if (!all_c_contig) {
260 return false;
261 }
262
263 // MKL function is not defined for the type
264 if (contig_dispatch_vector[src_typeid] == nullptr) {
265 return false;
266 }
267 return true;
268}
269
270template <typename output_typesT, typename contig_dispatchT>
271bool need_to_call_binary_ufunc(sycl::queue &exec_q,
272 const dpctl::tensor::usm_ndarray &src1,
273 const dpctl::tensor::usm_ndarray &src2,
274 const dpctl::tensor::usm_ndarray &dst,
275 const output_typesT &output_type_table,
276 const contig_dispatchT &contig_dispatch_table)
277{
278 // check type_nums
279 int src1_typenum = src1.get_typenum();
280 int src2_typenum = src2.get_typenum();
281 int dst_typenum = dst.get_typenum();
282
283 auto array_types = td_ns::usm_ndarray_types();
284 int src1_typeid = array_types.typenum_to_lookup_id(src1_typenum);
285 int src2_typeid = array_types.typenum_to_lookup_id(src2_typenum);
286 int dst_typeid = array_types.typenum_to_lookup_id(dst_typenum);
287
288 // check that types are supported
289 int output_typeid = output_type_table[src1_typeid][src2_typeid];
290 if (output_typeid != dst_typeid) {
291 return false;
292 }
293
294 // types must be the same
295 if (src1_typeid != src2_typeid) {
296 return false;
297 }
298
299 // OneMKL VM functions perform a copy on host if no double type support
300 if (!exec_q.get_device().has(sycl::aspect::fp64)) {
301 return false;
302 }
303
304 // check that queues are compatible
305 if (!dpctl::utils::queues_are_compatible(exec_q, {src1, src2, dst})) {
306 return false;
307 }
308
309 // dimensions must be the same
310 int dst_nd = dst.get_ndim();
311 if (dst_nd != src1.get_ndim() || dst_nd != src2.get_ndim()) {
312 return false;
313 }
314 else if (dst_nd == 0) {
315 // don't call OneMKL for 0d arrays
316 return false;
317 }
318
319 // shapes must be the same
320 const py::ssize_t *src1_shape = src1.get_shape_raw();
321 const py::ssize_t *src2_shape = src2.get_shape_raw();
322 const py::ssize_t *dst_shape = dst.get_shape_raw();
323 bool shapes_equal(true);
324 size_t src_nelems(1);
325
326 for (int i = 0; i < dst_nd; ++i) {
327 src_nelems *= static_cast<size_t>(src1_shape[i]);
328 shapes_equal = shapes_equal && (src1_shape[i] == dst_shape[i] &&
329 src2_shape[i] == dst_shape[i]);
330 }
331 if (!shapes_equal) {
332 return false;
333 }
334
335 // if nelems is zero, return false
336 if (src_nelems == 0) {
337 return false;
338 }
339
340 // ensure that output is ample enough to accommodate all elements
341 auto dst_offsets = dst.get_minmax_offsets();
342 // destination must be ample enough to accommodate all elements
343 {
344 size_t range =
345 static_cast<size_t>(dst_offsets.second - dst_offsets.first);
346 if (range + 1 < src_nelems) {
347 return false;
348 }
349 }
350
351 // check memory overlap
352 auto const &overlap = dpctl::tensor::overlap::MemoryOverlap();
353 if (overlap(src1, dst) || overlap(src2, dst)) {
354 return false;
355 }
356
357 // support only contiguous inputs
358 bool is_src1_c_contig = src1.is_c_contiguous();
359 bool is_src2_c_contig = src2.is_c_contiguous();
360 bool is_dst_c_contig = dst.is_c_contiguous();
361
362 bool all_c_contig =
363 (is_src1_c_contig && is_src2_c_contig && is_dst_c_contig);
364 if (!all_c_contig) {
365 return false;
366 }
367
368 // MKL function is not defined for the type
369 if (contig_dispatch_table[src1_typeid] == nullptr) {
370 return false;
371 }
372 return true;
373}
374
380#define MACRO_POPULATE_DISPATCH_VECTORS(__name__) \
381 template <typename fnT, typename T> \
382 struct ContigFactory \
383 { \
384 fnT get() \
385 { \
386 if constexpr (std::is_same_v<typename OutputType<T>::value_type, \
387 void>) { \
388 return nullptr; \
389 } \
390 else { \
391 return __name__##_contig_impl<T>; \
392 } \
393 } \
394 }; \
395 \
396 template <typename fnT, typename T> \
397 struct TypeMapFactory \
398 { \
399 std::enable_if_t<std::is_same<fnT, int>::value, int> get() \
400 { \
401 using rT = typename OutputType<T>::value_type; \
402 return td_ns::GetTypeid<rT>{}.get(); \
403 } \
404 }; \
405 \
406 static void populate_dispatch_vectors(void) \
407 { \
408 ext_ns::init_dispatch_vector<int, TypeMapFactory>( \
409 output_typeid_vector); \
410 ext_ns::init_dispatch_vector<unary_contig_impl_fn_ptr_t, \
411 ContigFactory>(contig_dispatch_vector); \
412 };
413
419#define MACRO_POPULATE_DISPATCH_2OUTS_VECTORS(__name__) \
420 template <typename fnT, typename T> \
421 struct ContigFactory \
422 { \
423 fnT get() \
424 { \
425 if constexpr (std::is_same_v<typename OutputType<T>::value_type1, \
426 void> || \
427 std::is_same_v<typename OutputType<T>::value_type2, \
428 void>) \
429 { \
430 fnT fn = nullptr; \
431 return fn; \
432 } \
433 else { \
434 fnT fn = __name__##_contig_impl<T>; \
435 return fn; \
436 } \
437 } \
438 }; \
439 \
440 template <typename fnT, typename T> \
441 struct TypeMapFactory \
442 { \
443 std::enable_if_t<std::is_same<fnT, std::pair<int, int>>::value, \
444 std::pair<int, int>> \
445 get() \
446 { \
447 using rT1 = typename OutputType<T>::value_type1; \
448 using rT2 = typename OutputType<T>::value_type2; \
449 return std::make_pair(td_ns::GetTypeid<rT1>{}.get(), \
450 td_ns::GetTypeid<rT2>{}.get()); \
451 } \
452 }; \
453 \
454 static void populate_dispatch_vectors(void) \
455 { \
456 ext_ns::init_dispatch_vector<std::pair<int, int>, TypeMapFactory>( \
457 output_typeid_vector); \
458 ext_ns::init_dispatch_vector<unary_two_outputs_contig_impl_fn_ptr_t, \
459 ContigFactory>(contig_dispatch_vector); \
460 };
461
467#define MACRO_POPULATE_DISPATCH_TABLES(__name__) \
468 template <typename fnT, typename T1, typename T2> \
469 struct ContigFactory \
470 { \
471 fnT get() \
472 { \
473 if constexpr (std::is_same_v< \
474 typename OutputType<T1, T2>::value_type, void>) \
475 { \
476 return nullptr; \
477 } \
478 else { \
479 return __name__##_contig_impl<T1, T2>; \
480 } \
481 } \
482 }; \
483 \
484 template <typename fnT, typename T1, typename T2> \
485 struct TypeMapFactory \
486 { \
487 std::enable_if_t<std::is_same<fnT, int>::value, int> get() \
488 { \
489 using rT = typename OutputType<T1, T2>::value_type; \
490 return td_ns::GetTypeid<rT>{}.get(); \
491 } \
492 }; \
493 \
494 static void populate_dispatch_tables(void) \
495 { \
496 ext_ns::init_dispatch_table<int, TypeMapFactory>( \
497 output_typeid_vector); \
498 ext_ns::init_dispatch_table<binary_contig_impl_fn_ptr_t, \
499 ContigFactory>(contig_dispatch_vector); \
500 };
501} // namespace dpnp::extensions::vm::py_internal