DPNP C++ backend kernel library
0.18.0dev0
Data Parallel Extension for NumPy*
Loading...
Searching...
No Matches
populate.hpp
1
//*****************************************************************************
2
// Copyright (c) 2024-2025, Intel Corporation
3
// All rights reserved.
4
//
5
// Redistribution and use in source and binary forms, with or without
6
// modification, are permitted provided that the following conditions are met:
7
// - Redistributions of source code must retain the above copyright notice,
8
// this list of conditions and the following disclaimer.
9
// - Redistributions in binary form must reproduce the above copyright notice,
10
// this list of conditions and the following disclaimer in the documentation
11
// and/or other materials provided with the distribution.
12
//
13
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
14
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
17
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
19
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
23
// THE POSSIBILITY OF SUCH DAMAGE.
24
//*****************************************************************************
25
26
#pragma once
27
32
#define MACRO_POPULATE_DISPATCH_VECTORS(__name__) \
33
template <typename T1, typename T2, unsigned int vec_sz, \
34
unsigned int n_vecs> \
35
class __name__##_contig_kernel; \
36
\
37
template <typename argTy> \
38
sycl::event __name__##_contig_impl( \
39
sycl::queue &exec_q, size_t nelems, const char *arg_p, char *res_p, \
40
const std::vector<sycl::event> &depends = {}) \
41
{ \
42
return ew_cmn_ns::unary_contig_impl<argTy, OutputType, ContigFunctor, \
43
__name__##_contig_kernel>( \
44
exec_q, nelems, arg_p, res_p, depends); \
45
} \
46
\
47
template <typename fnT, typename T> \
48
struct ContigFactory \
49
{ \
50
fnT get() \
51
{ \
52
if constexpr (std::is_same_v<typename OutputType<T>::value_type, \
53
void>) { \
54
fnT fn = nullptr; \
55
return fn; \
56
} \
57
else { \
58
fnT fn = __name__##_contig_impl<T>; \
59
return fn; \
60
} \
61
} \
62
}; \
63
\
64
template <typename fnT, typename T> \
65
struct TypeMapFactory \
66
{ \
67
std::enable_if_t<std::is_same<fnT, int>::value, int> get() \
68
{ \
69
using rT = typename OutputType<T>::value_type; \
70
return td_ns::GetTypeid<rT>{}.get(); \
71
} \
72
}; \
73
\
74
template <typename T1, typename T2, typename T3> \
75
class __name__##_strided_kernel; \
76
\
77
template <typename argTy> \
78
sycl::event __name__##_strided_impl( \
79
sycl::queue &exec_q, size_t nelems, int nd, \
80
const py::ssize_t *shape_and_strides, const char *arg_p, \
81
py::ssize_t arg_offset, char *res_p, py::ssize_t res_offset, \
82
const std::vector<sycl::event> &depends, \
83
const std::vector<sycl::event> &additional_depends) \
84
{ \
85
return ew_cmn_ns::unary_strided_impl< \
86
argTy, OutputType, StridedFunctor, __name__##_strided_kernel>( \
87
exec_q, nelems, nd, shape_and_strides, arg_p, arg_offset, res_p, \
88
res_offset, depends, additional_depends); \
89
} \
90
\
91
template <typename fnT, typename T> \
92
struct StridedFactory \
93
{ \
94
fnT get() \
95
{ \
96
if constexpr (std::is_same_v<typename OutputType<T>::value_type, \
97
void>) { \
98
fnT fn = nullptr; \
99
return fn; \
100
} \
101
else { \
102
fnT fn = __name__##_strided_impl<T>; \
103
return fn; \
104
} \
105
} \
106
}; \
107
\
108
void populate_##__name__##_dispatch_vectors(void) \
109
{ \
110
td_ns::DispatchVectorBuilder<unary_contig_impl_fn_ptr_t, \
111
ContigFactory, td_ns::num_types> \
112
dvb1; \
113
dvb1.populate_dispatch_vector(__name__##_contig_dispatch_vector); \
114
\
115
td_ns::DispatchVectorBuilder<unary_strided_impl_fn_ptr_t, \
116
StridedFactory, td_ns::num_types> \
117
dvb2; \
118
dvb2.populate_dispatch_vector(__name__##_strided_dispatch_vector); \
119
\
120
td_ns::DispatchVectorBuilder<int, TypeMapFactory, td_ns::num_types> \
121
dvb3; \
122
dvb3.populate_dispatch_vector(__name__##_output_typeid_vector); \
123
};
124
129
#define MACRO_POPULATE_DISPATCH_TABLES(__name__) \
130
template <typename argT1, typename argT2, typename resT, \
131
unsigned int vec_sz, unsigned int n_vecs> \
132
class __name__##_contig_kernel; \
133
\
134
template <typename argTy1, typename argTy2> \
135
sycl::event __name__##_contig_impl( \
136
sycl::queue &exec_q, size_t nelems, const char *arg1_p, \
137
py::ssize_t arg1_offset, const char *arg2_p, py::ssize_t arg2_offset, \
138
char *res_p, py::ssize_t res_offset, \
139
const std::vector<sycl::event> &depends = {}) \
140
{ \
141
return ew_cmn_ns::binary_contig_impl<argTy1, argTy2, OutputType, \
142
ContigFunctor, \
143
__name__##_contig_kernel>( \
144
exec_q, nelems, arg1_p, arg1_offset, arg2_p, arg2_offset, res_p, \
145
res_offset, depends); \
146
} \
147
\
148
template <typename fnT, typename T1, typename T2> \
149
struct ContigFactory \
150
{ \
151
fnT get() \
152
{ \
153
if constexpr (std::is_same_v< \
154
typename OutputType<T1, T2>::value_type, void>) \
155
{ \
156
\
157
fnT fn = nullptr; \
158
return fn; \
159
} \
160
else { \
161
fnT fn = __name__##_contig_impl<T1, T2>; \
162
return fn; \
163
} \
164
} \
165
}; \
166
\
167
template <typename fnT, typename T1, typename T2> \
168
struct TypeMapFactory \
169
{ \
170
std::enable_if_t<std::is_same<fnT, int>::value, int> get() \
171
{ \
172
using rT = typename OutputType<T1, T2>::value_type; \
173
return td_ns::GetTypeid<rT>{}.get(); \
174
} \
175
}; \
176
\
177
template <typename T1, typename T2, typename resT, typename IndexerT> \
178
class __name__##_strided_kernel; \
179
\
180
template <typename argTy1, typename argTy2> \
181
sycl::event __name__##_strided_impl( \
182
sycl::queue &exec_q, size_t nelems, int nd, \
183
const py::ssize_t *shape_and_strides, const char *arg1_p, \
184
py::ssize_t arg1_offset, const char *arg2_p, py::ssize_t arg2_offset, \
185
char *res_p, py::ssize_t res_offset, \
186
const std::vector<sycl::event> &depends, \
187
const std::vector<sycl::event> &additional_depends) \
188
{ \
189
return ew_cmn_ns::binary_strided_impl<argTy1, argTy2, OutputType, \
190
StridedFunctor, \
191
__name__##_strided_kernel>( \
192
exec_q, nelems, nd, shape_and_strides, arg1_p, arg1_offset, \
193
arg2_p, arg2_offset, res_p, res_offset, depends, \
194
additional_depends); \
195
} \
196
\
197
template <typename fnT, typename T1, typename T2> \
198
struct StridedFactory \
199
{ \
200
fnT get() \
201
{ \
202
if constexpr (std::is_same_v< \
203
typename OutputType<T1, T2>::value_type, void>) \
204
{ \
205
fnT fn = nullptr; \
206
return fn; \
207
} \
208
else { \
209
fnT fn = __name__##_strided_impl<T1, T2>; \
210
return fn; \
211
} \
212
} \
213
}; \
214
\
215
void populate_##__name__##_dispatch_tables(void) \
216
{ \
217
td_ns::DispatchTableBuilder<binary_contig_impl_fn_ptr_t, \
218
ContigFactory, td_ns::num_types> \
219
dvb1; \
220
dvb1.populate_dispatch_table(__name__##_contig_dispatch_table); \
221
\
222
td_ns::DispatchTableBuilder<binary_strided_impl_fn_ptr_t, \
223
StridedFactory, td_ns::num_types> \
224
dvb2; \
225
dvb2.populate_dispatch_table(__name__##_strided_dispatch_table); \
226
\
227
td_ns::DispatchTableBuilder<int, TypeMapFactory, td_ns::num_types> \
228
dvb3; \
229
dvb3.populate_dispatch_table(__name__##_output_typeid_table); \
230
};
extensions
ufunc
elementwise_functions
populate.hpp
Generated by
1.12.0