DPNP C++ backend kernel library 0.18.0dev0
Data Parallel Extension for NumPy*
Loading...
Searching...
No Matches
populate.hpp
1//*****************************************************************************
2// Copyright (c) 2024-2025, Intel Corporation
3// All rights reserved.
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are met:
7// - Redistributions of source code must retain the above copyright notice,
8// this list of conditions and the following disclaimer.
9// - Redistributions in binary form must reproduce the above copyright notice,
10// this list of conditions and the following disclaimer in the documentation
11// and/or other materials provided with the distribution.
12//
13// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
14// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
17// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
19// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
23// THE POSSIBILITY OF SUCH DAMAGE.
24//*****************************************************************************
25
26#pragma once
27
32#define MACRO_POPULATE_DISPATCH_VECTORS(__name__) \
33 template <typename T1, typename T2, unsigned int vec_sz, \
34 unsigned int n_vecs> \
35 class __name__##_contig_kernel; \
36 \
37 template <typename argTy> \
38 sycl::event __name__##_contig_impl( \
39 sycl::queue &exec_q, size_t nelems, const char *arg_p, char *res_p, \
40 const std::vector<sycl::event> &depends = {}) \
41 { \
42 return ew_cmn_ns::unary_contig_impl<argTy, OutputType, ContigFunctor, \
43 __name__##_contig_kernel>( \
44 exec_q, nelems, arg_p, res_p, depends); \
45 } \
46 \
47 template <typename fnT, typename T> \
48 struct ContigFactory \
49 { \
50 fnT get() \
51 { \
52 if constexpr (std::is_same_v<typename OutputType<T>::value_type, \
53 void>) { \
54 fnT fn = nullptr; \
55 return fn; \
56 } \
57 else { \
58 fnT fn = __name__##_contig_impl<T>; \
59 return fn; \
60 } \
61 } \
62 }; \
63 \
64 template <typename fnT, typename T> \
65 struct TypeMapFactory \
66 { \
67 std::enable_if_t<std::is_same<fnT, int>::value, int> get() \
68 { \
69 using rT = typename OutputType<T>::value_type; \
70 return td_ns::GetTypeid<rT>{}.get(); \
71 } \
72 }; \
73 \
74 template <typename T1, typename T2, typename T3> \
75 class __name__##_strided_kernel; \
76 \
77 template <typename argTy> \
78 sycl::event __name__##_strided_impl( \
79 sycl::queue &exec_q, size_t nelems, int nd, \
80 const py::ssize_t *shape_and_strides, const char *arg_p, \
81 py::ssize_t arg_offset, char *res_p, py::ssize_t res_offset, \
82 const std::vector<sycl::event> &depends, \
83 const std::vector<sycl::event> &additional_depends) \
84 { \
85 return ew_cmn_ns::unary_strided_impl< \
86 argTy, OutputType, StridedFunctor, __name__##_strided_kernel>( \
87 exec_q, nelems, nd, shape_and_strides, arg_p, arg_offset, res_p, \
88 res_offset, depends, additional_depends); \
89 } \
90 \
91 template <typename fnT, typename T> \
92 struct StridedFactory \
93 { \
94 fnT get() \
95 { \
96 if constexpr (std::is_same_v<typename OutputType<T>::value_type, \
97 void>) { \
98 fnT fn = nullptr; \
99 return fn; \
100 } \
101 else { \
102 fnT fn = __name__##_strided_impl<T>; \
103 return fn; \
104 } \
105 } \
106 }; \
107 \
108 void populate_##__name__##_dispatch_vectors(void) \
109 { \
110 td_ns::DispatchVectorBuilder<unary_contig_impl_fn_ptr_t, \
111 ContigFactory, td_ns::num_types> \
112 dvb1; \
113 dvb1.populate_dispatch_vector(__name__##_contig_dispatch_vector); \
114 \
115 td_ns::DispatchVectorBuilder<unary_strided_impl_fn_ptr_t, \
116 StridedFactory, td_ns::num_types> \
117 dvb2; \
118 dvb2.populate_dispatch_vector(__name__##_strided_dispatch_vector); \
119 \
120 td_ns::DispatchVectorBuilder<int, TypeMapFactory, td_ns::num_types> \
121 dvb3; \
122 dvb3.populate_dispatch_vector(__name__##_output_typeid_vector); \
123 };
124
129#define MACRO_POPULATE_DISPATCH_TABLES(__name__) \
130 template <typename argT1, typename argT2, typename resT, \
131 unsigned int vec_sz, unsigned int n_vecs> \
132 class __name__##_contig_kernel; \
133 \
134 template <typename argTy1, typename argTy2> \
135 sycl::event __name__##_contig_impl( \
136 sycl::queue &exec_q, size_t nelems, const char *arg1_p, \
137 py::ssize_t arg1_offset, const char *arg2_p, py::ssize_t arg2_offset, \
138 char *res_p, py::ssize_t res_offset, \
139 const std::vector<sycl::event> &depends = {}) \
140 { \
141 return ew_cmn_ns::binary_contig_impl<argTy1, argTy2, OutputType, \
142 ContigFunctor, \
143 __name__##_contig_kernel>( \
144 exec_q, nelems, arg1_p, arg1_offset, arg2_p, arg2_offset, res_p, \
145 res_offset, depends); \
146 } \
147 \
148 template <typename fnT, typename T1, typename T2> \
149 struct ContigFactory \
150 { \
151 fnT get() \
152 { \
153 if constexpr (std::is_same_v< \
154 typename OutputType<T1, T2>::value_type, void>) \
155 { \
156 \
157 fnT fn = nullptr; \
158 return fn; \
159 } \
160 else { \
161 fnT fn = __name__##_contig_impl<T1, T2>; \
162 return fn; \
163 } \
164 } \
165 }; \
166 \
167 template <typename fnT, typename T1, typename T2> \
168 struct TypeMapFactory \
169 { \
170 std::enable_if_t<std::is_same<fnT, int>::value, int> get() \
171 { \
172 using rT = typename OutputType<T1, T2>::value_type; \
173 return td_ns::GetTypeid<rT>{}.get(); \
174 } \
175 }; \
176 \
177 template <typename T1, typename T2, typename resT, typename IndexerT> \
178 class __name__##_strided_kernel; \
179 \
180 template <typename argTy1, typename argTy2> \
181 sycl::event __name__##_strided_impl( \
182 sycl::queue &exec_q, size_t nelems, int nd, \
183 const py::ssize_t *shape_and_strides, const char *arg1_p, \
184 py::ssize_t arg1_offset, const char *arg2_p, py::ssize_t arg2_offset, \
185 char *res_p, py::ssize_t res_offset, \
186 const std::vector<sycl::event> &depends, \
187 const std::vector<sycl::event> &additional_depends) \
188 { \
189 return ew_cmn_ns::binary_strided_impl<argTy1, argTy2, OutputType, \
190 StridedFunctor, \
191 __name__##_strided_kernel>( \
192 exec_q, nelems, nd, shape_and_strides, arg1_p, arg1_offset, \
193 arg2_p, arg2_offset, res_p, res_offset, depends, \
194 additional_depends); \
195 } \
196 \
197 template <typename fnT, typename T1, typename T2> \
198 struct StridedFactory \
199 { \
200 fnT get() \
201 { \
202 if constexpr (std::is_same_v< \
203 typename OutputType<T1, T2>::value_type, void>) \
204 { \
205 fnT fn = nullptr; \
206 return fn; \
207 } \
208 else { \
209 fnT fn = __name__##_strided_impl<T1, T2>; \
210 return fn; \
211 } \
212 } \
213 }; \
214 \
215 void populate_##__name__##_dispatch_tables(void) \
216 { \
217 td_ns::DispatchTableBuilder<binary_contig_impl_fn_ptr_t, \
218 ContigFactory, td_ns::num_types> \
219 dvb1; \
220 dvb1.populate_dispatch_table(__name__##_contig_dispatch_table); \
221 \
222 td_ns::DispatchTableBuilder<binary_strided_impl_fn_ptr_t, \
223 StridedFactory, td_ns::num_types> \
224 dvb2; \
225 dvb2.populate_dispatch_table(__name__##_strided_dispatch_table); \
226 \
227 td_ns::DispatchTableBuilder<int, TypeMapFactory, td_ns::num_types> \
228 dvb3; \
229 dvb3.populate_dispatch_table(__name__##_output_typeid_table); \
230 };