DPNP C++ backend kernel library 0.18.0dev1
Data Parallel Extension for NumPy*
Loading...
Searching...
No Matches
dot_common.hpp
1//*****************************************************************************
2// Copyright (c) 2024-2025, Intel Corporation
3// All rights reserved.
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are met:
7// - Redistributions of source code must retain the above copyright notice,
8// this list of conditions and the following disclaimer.
9// - Redistributions in binary form must reproduce the above copyright notice,
10// this list of conditions and the following disclaimer in the documentation
11// and/or other materials provided with the distribution.
12//
13// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
14// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
17// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
19// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
23// THE POSSIBILITY OF SUCH DAMAGE.
24//*****************************************************************************
25
26#pragma once
27
28#include <oneapi/mkl.hpp>
29#include <pybind11/pybind11.h>
30
31// dpctl tensor headers
32#include "utils/memory_overlap.hpp"
33#include "utils/output_validation.hpp"
34#include "utils/type_dispatch.hpp"
35#include "utils/type_utils.hpp"
36
37#include "types_matrix.hpp"
38
39namespace dpnp::extensions::blas::dot
40{
41typedef sycl::event (*dot_impl_fn_ptr_t)(sycl::queue &,
42 const std::int64_t,
43 const char *,
44 const std::int64_t,
45 const char *,
46 const std::int64_t,
47 char *,
48 const std::vector<sycl::event> &);
49
50namespace dpctl_td_ns = dpctl::tensor::type_dispatch;
51namespace py = pybind11;
52
53std::pair<sycl::event, sycl::event>
54 dot_func(sycl::queue &exec_q,
55 const dpctl::tensor::usm_ndarray &vectorX,
56 const dpctl::tensor::usm_ndarray &vectorY,
57 const dpctl::tensor::usm_ndarray &result,
58 const std::vector<sycl::event> &depends,
59 const dot_impl_fn_ptr_t *dot_dispatch_vector)
60{
61 const int vectorX_nd = vectorX.get_ndim();
62 const int vectorY_nd = vectorY.get_ndim();
63 const int result_nd = result.get_ndim();
64
65 if ((vectorX_nd != 1)) {
66 throw py::value_error(
67 "The first input array has ndim=" + std::to_string(vectorX_nd) +
68 ", but a 1-dimensional array is expected.");
69 }
70
71 if ((vectorY_nd != 1)) {
72 throw py::value_error(
73 "The second input array has ndim=" + std::to_string(vectorY_nd) +
74 ", but a 1-dimensional array is expected.");
75 }
76
77 if ((result_nd != 0)) {
78 throw py::value_error(
79 "The output array has ndim=" + std::to_string(result_nd) +
80 ", but a 0-dimensional array is expected.");
81 }
82
83 auto const &overlap = dpctl::tensor::overlap::MemoryOverlap();
84 if (overlap(vectorX, result)) {
85 throw py::value_error(
86 "The first input array and output array are overlapping "
87 "segments of memory");
88 }
89 if (overlap(vectorY, result)) {
90 throw py::value_error(
91 "The second input array and output array are overlapping "
92 "segments of memory");
93 }
94
95 if (!dpctl::utils::queues_are_compatible(
96 exec_q,
97 {vectorX.get_queue(), vectorY.get_queue(), result.get_queue()}))
98 {
99 throw py::value_error(
100 "USM allocations are not compatible with the execution queue.");
101 }
102
103 const int src_nelems = 1;
104 dpctl::tensor::validation::CheckWritable::throw_if_not_writable(result);
105 dpctl::tensor::validation::AmpleMemory::throw_if_not_ample(result,
106 src_nelems);
107
108 const py::ssize_t x_size = vectorX.get_size();
109 const py::ssize_t y_size = vectorY.get_size();
110 const std::int64_t n = x_size;
111 if (x_size != y_size) {
112 throw py::value_error("The size of the first input array must be "
113 "equal to the size of the second input array.");
114 }
115
116 const int vectorX_typenum = vectorX.get_typenum();
117 const int vectorY_typenum = vectorY.get_typenum();
118 const int result_typenum = result.get_typenum();
119
120 if (result_typenum != vectorX_typenum || result_typenum != vectorY_typenum)
121 {
122 throw py::value_error("Given arrays must be of the same type.");
123 }
124
125 auto array_types = dpctl_td_ns::usm_ndarray_types();
126 const int type_id = array_types.typenum_to_lookup_id(vectorX_typenum);
127
128 dot_impl_fn_ptr_t dot_fn = dot_dispatch_vector[type_id];
129 if (dot_fn == nullptr) {
130 throw py::value_error(
131 "Types of input vectors and result array are mismatched.");
132 }
133
134 char *x_typeless_ptr = vectorX.get_data();
135 char *y_typeless_ptr = vectorY.get_data();
136 char *r_typeless_ptr = result.get_data();
137
138 const std::vector<py::ssize_t> x_stride = vectorX.get_strides_vector();
139 const std::vector<py::ssize_t> y_stride = vectorY.get_strides_vector();
140 const int x_elemsize = vectorX.get_elemsize();
141 const int y_elemsize = vectorY.get_elemsize();
142
143 const std::int64_t incx = x_stride[0];
144 const std::int64_t incy = y_stride[0];
145 // In OneMKL, the pointer should always point out to the first element of
146 // the array and OneMKL handle the rest depending on the sign of stride.
147 // In OneMKL, when the stride is positive, the data is read in order and
148 // when it is negative, the data is read in reverse order while pointer
149 // always point to the first element
150 // When the stride is negative, the pointer of the array coming from dpnp
151 // points to the last element. So, we need to adjust the pointer
152 if (incx < 0) {
153 x_typeless_ptr -= (n - 1) * std::abs(incx) * x_elemsize;
154 }
155 if (incy < 0) {
156 y_typeless_ptr -= (n - 1) * std::abs(incy) * y_elemsize;
157 }
158
159 sycl::event dot_ev = dot_fn(exec_q, n, x_typeless_ptr, incx, y_typeless_ptr,
160 incy, r_typeless_ptr, depends);
161
162 sycl::event args_ev = dpctl::utils::keep_args_alive(
163 exec_q, {vectorX, vectorY, result}, {dot_ev});
164
165 return std::make_pair(args_ev, dot_ev);
166}
167
168template <template <typename fnT, typename T> typename factoryT>
169void init_dot_dispatch_vector(dot_impl_fn_ptr_t dot_dispatch_vector[])
170{
171 dpctl_td_ns::DispatchVectorBuilder<dot_impl_fn_ptr_t, factoryT,
172 dpctl_td_ns::num_types>
173 contig;
174 contig.populate_dispatch_vector(dot_dispatch_vector);
175}
176} // namespace dpnp::extensions::blas::dot