DPNP C++ backend kernel library 0.20.0dev0
Data Parallel Extension for NumPy*
Loading...
Searching...
No Matches
common.hpp
1//*****************************************************************************
2// Copyright (c) 2023, Intel Corporation
3// All rights reserved.
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are met:
7// - Redistributions of source code must retain the above copyright notice,
8// this list of conditions and the following disclaimer.
9// - Redistributions in binary form must reproduce the above copyright notice,
10// this list of conditions and the following disclaimer in the documentation
11// and/or other materials provided with the distribution.
12// - Neither the name of the copyright holder nor the names of its contributors
13// may be used to endorse or promote products derived from this software
14// without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
20// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26// THE POSSIBILITY OF SUCH DAMAGE.
27//*****************************************************************************
28
29#pragma once
30
31#include <type_traits>
32
33#include <oneapi/mkl.hpp>
34#include <sycl/sycl.hpp>
35
36#include <dpctl4pybind11.hpp>
37#include <pybind11/pybind11.h>
38
39// utils extension header
40#include "ext/common.hpp"
41
42// dpctl tensor headers
43#include "utils/memory_overlap.hpp"
44#include "utils/type_dispatch.hpp"
45
52#ifndef __INTEL_MKL_2023_2_0_VERSION_REQUIRED
53#define __INTEL_MKL_2023_2_0_VERSION_REQUIRED 20230002L
54#endif
55
56static_assert(INTEL_MKL_VERSION >= __INTEL_MKL_2023_2_0_VERSION_REQUIRED,
57 "OneMKL does not meet minimum version requirement");
58
59namespace ext_ns = ext::common;
60namespace py = pybind11;
61namespace td_ns = dpctl::tensor::type_dispatch;
62
63namespace dpnp::extensions::vm::py_internal
64{
65template <typename output_typesT, typename contig_dispatchT>
66bool need_to_call_unary_ufunc(sycl::queue &exec_q,
67 const dpctl::tensor::usm_ndarray &src,
68 const dpctl::tensor::usm_ndarray &dst,
69 const output_typesT &output_type_vec,
70 const contig_dispatchT &contig_dispatch_vector)
71{
72 // check type_nums
73 int src_typenum = src.get_typenum();
74 int dst_typenum = dst.get_typenum();
75
76 auto array_types = td_ns::usm_ndarray_types();
77 int src_typeid = array_types.typenum_to_lookup_id(src_typenum);
78 int dst_typeid = array_types.typenum_to_lookup_id(dst_typenum);
79
80 // check that types are supported
81 int func_output_typeid = output_type_vec[src_typeid];
82 if (dst_typeid != func_output_typeid) {
83 return false;
84 }
85
86 // OneMKL VM functions perform a copy on host if no double type support
87 if (!exec_q.get_device().has(sycl::aspect::fp64)) {
88 return false;
89 }
90
91 // check that queues are compatible
92 if (!dpctl::utils::queues_are_compatible(exec_q, {src, dst})) {
93 return false;
94 }
95
96 // dimensions must be the same
97 int dst_nd = dst.get_ndim();
98 if (dst_nd != src.get_ndim()) {
99 return false;
100 }
101 else if (dst_nd == 0) {
102 // don't call OneMKL for 0d arrays
103 return false;
104 }
105
106 // shapes must be the same
107 const py::ssize_t *src_shape = src.get_shape_raw();
108 const py::ssize_t *dst_shape = dst.get_shape_raw();
109 bool shapes_equal(true);
110 size_t src_nelems(1);
111
112 for (int i = 0; i < dst_nd; ++i) {
113 src_nelems *= static_cast<size_t>(src_shape[i]);
114 shapes_equal = shapes_equal && (src_shape[i] == dst_shape[i]);
115 }
116 if (!shapes_equal) {
117 return false;
118 }
119
120 // if nelems is zero, return false
121 if (src_nelems == 0) {
122 return false;
123 }
124
125 // ensure that output is ample enough to accommodate all elements
126 auto dst_offsets = dst.get_minmax_offsets();
127 // destination must be ample enough to accommodate all elements
128 {
129 size_t range =
130 static_cast<size_t>(dst_offsets.second - dst_offsets.first);
131 if (range + 1 < src_nelems) {
132 return false;
133 }
134 }
135
136 // check memory overlap
137 auto const &overlap = dpctl::tensor::overlap::MemoryOverlap();
138 if (overlap(src, dst)) {
139 return false;
140 }
141
142 // support only contiguous inputs
143 bool is_src_c_contig = src.is_c_contiguous();
144 bool is_dst_c_contig = dst.is_c_contiguous();
145
146 bool all_c_contig = (is_src_c_contig && is_dst_c_contig);
147 if (!all_c_contig) {
148 return false;
149 }
150
151 // MKL function is not defined for the type
152 if (contig_dispatch_vector[src_typeid] == nullptr) {
153 return false;
154 }
155 return true;
156}
157
158template <typename output_typesT, typename contig_dispatchT>
159bool need_to_call_unary_two_outputs_ufunc(
160 sycl::queue &exec_q,
161 const dpctl::tensor::usm_ndarray &src,
162 const dpctl::tensor::usm_ndarray &dst1,
163 const dpctl::tensor::usm_ndarray &dst2,
164 const output_typesT &output_type_vec,
165 const contig_dispatchT &contig_dispatch_vector)
166{
167 // check type_nums
168 int src_typenum = src.get_typenum();
169 int dst1_typenum = dst1.get_typenum();
170 int dst2_typenum = dst2.get_typenum();
171
172 const auto &array_types = td_ns::usm_ndarray_types();
173 int src_typeid = array_types.typenum_to_lookup_id(src_typenum);
174 int dst1_typeid = array_types.typenum_to_lookup_id(dst1_typenum);
175 int dst2_typeid = array_types.typenum_to_lookup_id(dst2_typenum);
176
177 std::pair<int, int> func_output_typeids = output_type_vec[src_typeid];
178
179 // check that types are supported
180 if (dst1_typeid != func_output_typeids.first ||
181 dst2_typeid != func_output_typeids.second)
182 {
183 return false;
184 }
185
186 // OneMKL VM functions perform a copy on host if no double type support
187 if (!exec_q.get_device().has(sycl::aspect::fp64)) {
188 return false;
189 }
190
191 // check that queues are compatible
192 if (!dpctl::utils::queues_are_compatible(exec_q, {src, dst1, dst2})) {
193 return false;
194 }
195
196 // dimensions must be the same
197 int src_nd = src.get_ndim();
198 int dst1_nd = dst1.get_ndim();
199 int dst2_nd = dst2.get_ndim();
200 if (src_nd != dst1_nd || src_nd != dst2_nd) {
201 return false;
202 }
203 else if (dst1_nd == 0 || dst2_nd == 0) {
204 // don't call OneMKL for 0d arrays
205 return false;
206 }
207
208 // shapes must be the same
209 const py::ssize_t *src_shape = src.get_shape_raw();
210 const py::ssize_t *dst1_shape = dst1.get_shape_raw();
211 const py::ssize_t *dst2_shape = dst2.get_shape_raw();
212 bool shapes_equal(true);
213 size_t src_nelems(1);
214
215 for (int i = 0; i < src_nd; ++i) {
216 src_nelems *= static_cast<std::size_t>(src_shape[i]);
217 shapes_equal = shapes_equal && (src_shape[i] == dst1_shape[i]) &&
218 (src_shape[i] == dst2_shape[i]);
219 }
220 if (!shapes_equal) {
221 return false;
222 }
223
224 // if nelems is zero, return false
225 if (src_nelems == 0) {
226 return false;
227 }
228
229 // ensure that outputs are ample enough to accommodate all elements
230 auto dst1_offsets = dst1.get_minmax_offsets();
231 auto dst2_offsets = dst2.get_minmax_offsets();
232 // destinations must be ample enough to accommodate all elements
233 {
234 size_t range1 =
235 static_cast<size_t>(dst1_offsets.second - dst1_offsets.first);
236 size_t range2 =
237 static_cast<size_t>(dst2_offsets.second - dst2_offsets.first);
238 if ((range1 + 1 < src_nelems) || (range2 + 1 < src_nelems)) {
239 return false;
240 }
241 }
242
243 // check memory overlap
244 auto const &overlap = dpctl::tensor::overlap::MemoryOverlap();
245 if (overlap(src, dst1) || overlap(src, dst2) || overlap(dst1, dst2)) {
246 return false;
247 }
248
249 // support only contiguous inputs
250 bool is_src_c_contig = src.is_c_contiguous();
251 bool is_dst1_c_contig = dst1.is_c_contiguous();
252 bool is_dst2_c_contig = dst2.is_c_contiguous();
253
254 bool all_c_contig =
255 (is_src_c_contig && is_dst1_c_contig && is_dst2_c_contig);
256 if (!all_c_contig) {
257 return false;
258 }
259
260 // MKL function is not defined for the type
261 if (contig_dispatch_vector[src_typeid] == nullptr) {
262 return false;
263 }
264 return true;
265}
266
267template <typename output_typesT, typename contig_dispatchT>
268bool need_to_call_binary_ufunc(sycl::queue &exec_q,
269 const dpctl::tensor::usm_ndarray &src1,
270 const dpctl::tensor::usm_ndarray &src2,
271 const dpctl::tensor::usm_ndarray &dst,
272 const output_typesT &output_type_table,
273 const contig_dispatchT &contig_dispatch_table)
274{
275 // check type_nums
276 int src1_typenum = src1.get_typenum();
277 int src2_typenum = src2.get_typenum();
278 int dst_typenum = dst.get_typenum();
279
280 auto array_types = td_ns::usm_ndarray_types();
281 int src1_typeid = array_types.typenum_to_lookup_id(src1_typenum);
282 int src2_typeid = array_types.typenum_to_lookup_id(src2_typenum);
283 int dst_typeid = array_types.typenum_to_lookup_id(dst_typenum);
284
285 // check that types are supported
286 int output_typeid = output_type_table[src1_typeid][src2_typeid];
287 if (output_typeid != dst_typeid) {
288 return false;
289 }
290
291 // types must be the same
292 if (src1_typeid != src2_typeid) {
293 return false;
294 }
295
296 // OneMKL VM functions perform a copy on host if no double type support
297 if (!exec_q.get_device().has(sycl::aspect::fp64)) {
298 return false;
299 }
300
301 // check that queues are compatible
302 if (!dpctl::utils::queues_are_compatible(exec_q, {src1, src2, dst})) {
303 return false;
304 }
305
306 // dimensions must be the same
307 int dst_nd = dst.get_ndim();
308 if (dst_nd != src1.get_ndim() || dst_nd != src2.get_ndim()) {
309 return false;
310 }
311 else if (dst_nd == 0) {
312 // don't call OneMKL for 0d arrays
313 return false;
314 }
315
316 // shapes must be the same
317 const py::ssize_t *src1_shape = src1.get_shape_raw();
318 const py::ssize_t *src2_shape = src2.get_shape_raw();
319 const py::ssize_t *dst_shape = dst.get_shape_raw();
320 bool shapes_equal(true);
321 size_t src_nelems(1);
322
323 for (int i = 0; i < dst_nd; ++i) {
324 src_nelems *= static_cast<size_t>(src1_shape[i]);
325 shapes_equal = shapes_equal && (src1_shape[i] == dst_shape[i] &&
326 src2_shape[i] == dst_shape[i]);
327 }
328 if (!shapes_equal) {
329 return false;
330 }
331
332 // if nelems is zero, return false
333 if (src_nelems == 0) {
334 return false;
335 }
336
337 // ensure that output is ample enough to accommodate all elements
338 auto dst_offsets = dst.get_minmax_offsets();
339 // destination must be ample enough to accommodate all elements
340 {
341 size_t range =
342 static_cast<size_t>(dst_offsets.second - dst_offsets.first);
343 if (range + 1 < src_nelems) {
344 return false;
345 }
346 }
347
348 // check memory overlap
349 auto const &overlap = dpctl::tensor::overlap::MemoryOverlap();
350 if (overlap(src1, dst) || overlap(src2, dst)) {
351 return false;
352 }
353
354 // support only contiguous inputs
355 bool is_src1_c_contig = src1.is_c_contiguous();
356 bool is_src2_c_contig = src2.is_c_contiguous();
357 bool is_dst_c_contig = dst.is_c_contiguous();
358
359 bool all_c_contig =
360 (is_src1_c_contig && is_src2_c_contig && is_dst_c_contig);
361 if (!all_c_contig) {
362 return false;
363 }
364
365 // MKL function is not defined for the type
366 if (contig_dispatch_table[src1_typeid] == nullptr) {
367 return false;
368 }
369 return true;
370}
371
377#define MACRO_POPULATE_DISPATCH_VECTORS(__name__) \
378 template <typename fnT, typename T> \
379 struct ContigFactory \
380 { \
381 fnT get() \
382 { \
383 if constexpr (std::is_same_v<typename OutputType<T>::value_type, \
384 void>) { \
385 return nullptr; \
386 } \
387 else { \
388 return __name__##_contig_impl<T>; \
389 } \
390 } \
391 }; \
392 \
393 template <typename fnT, typename T> \
394 struct TypeMapFactory \
395 { \
396 std::enable_if_t<std::is_same<fnT, int>::value, int> get() \
397 { \
398 using rT = typename OutputType<T>::value_type; \
399 return td_ns::GetTypeid<rT>{}.get(); \
400 } \
401 }; \
402 \
403 static void populate_dispatch_vectors(void) \
404 { \
405 ext_ns::init_dispatch_vector<int, TypeMapFactory>( \
406 output_typeid_vector); \
407 ext_ns::init_dispatch_vector<unary_contig_impl_fn_ptr_t, \
408 ContigFactory>(contig_dispatch_vector); \
409 };
410
416#define MACRO_POPULATE_DISPATCH_2OUTS_VECTORS(__name__) \
417 template <typename fnT, typename T> \
418 struct ContigFactory \
419 { \
420 fnT get() \
421 { \
422 if constexpr (std::is_same_v<typename OutputType<T>::value_type1, \
423 void> || \
424 std::is_same_v<typename OutputType<T>::value_type2, \
425 void>) \
426 { \
427 fnT fn = nullptr; \
428 return fn; \
429 } \
430 else { \
431 fnT fn = __name__##_contig_impl<T>; \
432 return fn; \
433 } \
434 } \
435 }; \
436 \
437 template <typename fnT, typename T> \
438 struct TypeMapFactory \
439 { \
440 std::enable_if_t<std::is_same<fnT, std::pair<int, int>>::value, \
441 std::pair<int, int>> \
442 get() \
443 { \
444 using rT1 = typename OutputType<T>::value_type1; \
445 using rT2 = typename OutputType<T>::value_type2; \
446 return std::make_pair(td_ns::GetTypeid<rT1>{}.get(), \
447 td_ns::GetTypeid<rT2>{}.get()); \
448 } \
449 }; \
450 \
451 static void populate_dispatch_vectors(void) \
452 { \
453 ext_ns::init_dispatch_vector<std::pair<int, int>, TypeMapFactory>( \
454 output_typeid_vector); \
455 ext_ns::init_dispatch_vector<unary_two_outputs_contig_impl_fn_ptr_t, \
456 ContigFactory>(contig_dispatch_vector); \
457 };
458
464#define MACRO_POPULATE_DISPATCH_TABLES(__name__) \
465 template <typename fnT, typename T1, typename T2> \
466 struct ContigFactory \
467 { \
468 fnT get() \
469 { \
470 if constexpr (std::is_same_v< \
471 typename OutputType<T1, T2>::value_type, void>) \
472 { \
473 return nullptr; \
474 } \
475 else { \
476 return __name__##_contig_impl<T1, T2>; \
477 } \
478 } \
479 }; \
480 \
481 template <typename fnT, typename T1, typename T2> \
482 struct TypeMapFactory \
483 { \
484 std::enable_if_t<std::is_same<fnT, int>::value, int> get() \
485 { \
486 using rT = typename OutputType<T1, T2>::value_type; \
487 return td_ns::GetTypeid<rT>{}.get(); \
488 } \
489 }; \
490 \
491 static void populate_dispatch_tables(void) \
492 { \
493 ext_ns::init_dispatch_table<int, TypeMapFactory>( \
494 output_typeid_vector); \
495 ext_ns::init_dispatch_table<binary_contig_impl_fn_ptr_t, \
496 ContigFactory>(contig_dispatch_vector); \
497 };
498} // namespace dpnp::extensions::vm::py_internal