DPNP C++ backend kernel library 0.20.0dev4
Data Parallel Extension for NumPy*
Loading...
Searching...
No Matches
sliding_window1d.hpp
1//*****************************************************************************
2// Copyright (c) 2024, Intel Corporation
3// All rights reserved.
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are met:
7// - Redistributions of source code must retain the above copyright notice,
8// this list of conditions and the following disclaimer.
9// - Redistributions in binary form must reproduce the above copyright notice,
10// this list of conditions and the following disclaimer in the documentation
11// and/or other materials provided with the distribution.
12// - Neither the name of the copyright holder nor the names of its contributors
13// may be used to endorse or promote products derived from this software
14// without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
20// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26// THE POSSIBILITY OF SUCH DAMAGE.
27//*****************************************************************************
28
29#pragma once
30
31#include <cstddef>
32#include <cstdint>
33#include <type_traits>
34
35#include <sycl/sycl.hpp>
36
37#include "dpctl4pybind11.hpp"
38
39#include "kernels/statistics/sliding_window1d.hpp"
40
41namespace statistics::sliding_window1d
42{
43using dpctl::tensor::usm_ndarray;
44
45template <typename T, std::uint32_t Size>
47{
48public:
49 using ncT = typename std::remove_const_t<T>;
50 using SizeT = decltype(Size);
51 static constexpr SizeT _size = Size;
52
53 _RegistryDataStorage(const sycl::nd_item<1> &item)
54 : sbgroup(item.get_sub_group())
55 {
56 }
57
58 template <typename yT>
59 T &operator[](const yT &idx)
60 {
61 static_assert(std::is_integral_v<yT>,
62 "idx must be of an integral type");
63 return data[idx];
64 }
65
66 template <typename yT>
67 const T &operator[](const yT &idx) const
68 {
69 static_assert(std::is_integral_v<yT>,
70 "idx must be of an integral type");
71 return data[idx];
72 }
73
74 T &value()
75 {
76 static_assert(Size == 1,
77 "Size is not equal to 1. Use value(idx) instead");
78 return data[0];
79 }
80
81 const T &value() const
82 {
83 static_assert(Size == 1,
84 "Size is not equal to 1. Use value(idx) instead");
85 return data[0];
86 }
87
88 template <typename yT, typename xT>
89 T broadcast(const yT &y, const xT &x) const
90 {
91 static_assert(std::is_integral_v<std::remove_reference_t<yT>>,
92 "y must be of an integral type");
93 static_assert(std::is_integral_v<std::remove_reference_t<xT>>,
94 "x must be of an integral type");
95
96 return sycl::select_from_group(sbgroup, data[y], x);
97 }
98
99 template <typename iT>
100 T broadcast(const iT &idx) const
101 {
102 if constexpr (Size == 1) {
103 return broadcast(0, idx);
104 }
105 else {
106 return broadcast(idx / size_x(), idx % size_x());
107 }
108 }
109
110 template <typename yT, typename xT>
111 T shift_left(const yT &y, const xT &x) const
112 {
113 static_assert(std::is_integral_v<yT>, "y must be of an integral type");
114 static_assert(std::is_integral_v<xT>, "x must be of an integral type");
115
116 return sycl::shift_group_left(sbgroup, data[y], x);
117 }
118
119 template <typename yT, typename xT>
120 T shift_right(const yT &y, const xT &x) const
121 {
122 static_assert(std::is_integral_v<yT>, "y must be of an integral type");
123 static_assert(std::is_integral_v<xT>, "x must be of an integral type");
124
125 return sycl::shift_group_right(sbgroup, data[y], x);
126 }
127
128 constexpr SizeT size_y() const { return _size; }
129
130 SizeT size_x() const { return sbgroup.get_max_local_range()[0]; }
131
132 SizeT total_size() const { return size_x() * size_y(); }
133
134 ncT *ptr() { return data; }
135
136 SizeT x() const { return sbgroup.get_local_linear_id(); }
137
138protected:
139 const sycl::sub_group sbgroup;
140 ncT data[Size];
141};
142
143template <typename T, std::uint32_t Size = 1>
144struct RegistryData : public _RegistryDataStorage<T, Size>
145{
146 using SizeT = typename _RegistryDataStorage<T, Size>::SizeT;
147
148 using _RegistryDataStorage<T, Size>::_RegistryDataStorage;
149
150 template <typename LaneIdT,
151 typename Condition,
152 typename = std::enable_if_t<
153 std::is_invocable_r_v<bool, Condition, SizeT>>>
154 void fill_lane(const LaneIdT &lane_id, const T &value, Condition &&mask)
155 {
156 static_assert(std::is_integral_v<LaneIdT>,
157 "lane_id must be of an integral type");
158 if (mask(this->x())) {
159 this->data[lane_id] = value;
160 }
161 }
162
163 template <typename LaneIdT>
164 void fill_lane(const LaneIdT &lane_id, const T &value, const bool &mask)
165 {
166 fill_lane(lane_id, value, [mask](auto &&) { return mask; });
167 }
168
169 template <typename LaneIdT>
170 void fill_lane(const LaneIdT &lane_id, const T &value)
171 {
172 fill_lane(lane_id, value, true);
173 }
174
175 template <typename Condition,
176 typename = std::enable_if_t<
177 std::is_invocable_r_v<bool, Condition, SizeT, SizeT>>>
178 void fill(const T &value, Condition &&mask)
179 {
180 for (SizeT i = 0; i < Size; ++i) {
181 fill_lane(i, value, mask(i, this->x()));
182 }
183 }
184
185 void fill(const T &value)
186 {
187 fill(value, [](auto &&, auto &&) { return true; });
188 }
189
190 template <typename LaneIdT,
191 typename Condition,
192 typename = std::enable_if_t<
193 std::is_invocable_r_v<bool, Condition, const T *const>>>
194 T *load_lane(const LaneIdT &lane_id,
195 const T *const data,
196 Condition &&mask,
197 const T &default_v)
198 {
199 static_assert(std::is_integral_v<LaneIdT>,
200 "lane_id must be of an integral type");
201 this->data[lane_id] = mask(data) ? data[0] : default_v;
202
203 return data + this->size_x();
204 }
205
206 template <typename LaneIdT>
207 T *load_lane(const LaneIdT &laned_id,
208 const T *const data,
209 const bool &mask,
210 const T &default_v)
211 {
212 return load_lane(
213 laned_id, data, [mask](auto &&) { return mask; }, default_v);
214 }
215
216 template <typename LaneIdT>
217 T *load_lane(const LaneIdT &laned_id, const T *const data)
218 {
219 constexpr T default_v = 0;
220 return load_lane(laned_id, data, true, default_v);
221 }
222
223 template <typename yStrideT,
224 typename Condition,
225 typename = std::enable_if_t<
226 std::is_invocable_r_v<bool, Condition, const T *const>>>
227 T *load(const T *const data,
228 const yStrideT &y_stride,
229 Condition &&mask,
230 const T &default_v)
231 {
232 auto *it = data;
233 for (SizeT i = 0; i < Size; ++i) {
234 load_lane(i, it, mask, default_v);
235 it += y_stride;
236 }
237
238 return it;
239 }
240
241 template <typename yStrideT>
242 T *load(const T *const data,
243 const yStrideT &y_stride,
244 const bool &mask,
245 const T &default_v)
246 {
247 return load(
248 data, y_stride, [mask](auto &&) { return mask; }, default_v);
249 }
250
251 template <typename Condition,
252 typename = std::enable_if_t<
253 std::is_invocable_r_v<bool, Condition, const T *const>>>
254 T *load(const T *const data, Condition &&mask, const T &default_v)
255 {
256 return load(data, this->size_x(), mask, default_v);
257 }
258
259 T *load(const T *const data, const bool &mask, const T &default_v)
260 {
261 return load(data, [mask](auto &&) { return mask; }, default_v);
262 }
263
264 T *load(const T *const data)
265 {
266 constexpr T default_v = 0;
267 return load(data, true, default_v);
268 }
269
270 template <typename LaneIdT,
271 typename Condition,
272 typename = std::enable_if_t<
273 std::is_invocable_r_v<bool, Condition, const T *const>>>
274 T *store_lane(const LaneIdT &lane_id, T *const data, Condition &&mask)
275 {
276 static_assert(std::is_integral_v<LaneIdT>,
277 "lane_id must be of an integral type");
278
279 if (mask(data)) {
280 data[0] = this->data[lane_id];
281 }
282
283 return data + this->size_x();
284 }
285
286 template <typename LaneIdT>
287 T *store_lane(const LaneIdT &lane_id, T *const data, const bool &mask)
288 {
289 return store_lane(lane_id, data, [mask](auto &&) { return mask; });
290 }
291
292 template <typename LaneIdT>
293 T *store_lane(const LaneIdT &lane_id, T *const data)
294 {
295 return store_lane(lane_id, data, true);
296 }
297
298 template <typename yStrideT,
299 typename Condition,
300 typename = std::enable_if_t<
301 std::is_invocable_r_v<bool, Condition, const T *const>>>
302 T *store(T *const data, const yStrideT &y_stride, Condition &&condition)
303 {
304 auto *it = data;
305 for (SizeT i = 0; i < Size; ++i) {
306 store_lane(i, it, condition);
307 it += y_stride;
308 }
309
310 return it;
311 }
312
313 template <typename yStrideT>
314 T *store(T *const data, const yStrideT &y_stride, const bool &mask)
315 {
316 return store(data, y_stride, [mask](auto &&) { return mask; });
317 }
318
319 template <typename Condition,
320 typename = std::enable_if_t<
321 std::is_invocable_r_v<bool, Condition, const T *const>>>
322 T *store(T *const data, Condition &&condition)
323 {
324 return store(data, this->size_x(), condition);
325 }
326
327 T *store(T *const data, const bool &mask)
328 {
329 return store(data, [mask](auto &&) { return mask; });
330 }
331
332 T *store(T *const data) { return store(data, true); }
333};
334
335template <typename T, std::uint32_t Size>
336struct RegistryWindow : public RegistryData<T, Size>
337{
338 using SizeT = typename RegistryData<T, Size>::SizeT;
339
340 using RegistryData<T, Size>::RegistryData;
341
342 template <typename shT>
343 void advance_left(const shT &shift, const T &fill_value)
344 {
345 static_assert(std::is_integral_v<shT>,
346 "shift must be of an integral type");
347
348 std::uint32_t shift_r = this->size_x() - shift;
349 for (SizeT i = 0; i < Size; ++i) {
350 this->data[i] = this->shift_left(i, shift);
351 auto border =
352 i < Size - 1 ? this->shift_right(i + 1, shift_r) : fill_value;
353 if (this->x() >= shift_r) {
354 this->data[i] = border;
355 }
356 }
357 }
358
359 void advance_left(const T &fill_value) { advance_left(1, fill_value); }
360
361 void advance_left()
362 {
363 constexpr T fill_value = 0;
364 advance_left(fill_value);
365 }
366};
367
368template <typename T, typename SizeT = std::size_t>
369class Span
370{
371public:
372 using value_type = T;
373 using size_type = SizeT;
374
375 Span(T *const data, const SizeT size) : data_(data), size_(size) {}
376
377 T *begin() const { return data(); }
378
379 T *end() const { return data() + size(); }
380
381 SizeT size() const { return size_; }
382
383 T *data() const { return data_; }
384
385protected:
386 T *const data_;
387 const SizeT size_;
388};
389
390template <typename T, typename SizeT = std::size_t>
391Span<T, SizeT> make_span(T *const data, const SizeT size)
392{
393 return Span<T, SizeT>(data, size);
394}
395
396template <typename T, typename SizeT = std::size_t>
397class PaddedSpan : public Span<T, SizeT>
398{
399public:
400 using value_type = T;
401 using size_type = SizeT;
402
403 PaddedSpan(T *const data, const SizeT size, const SizeT pad)
404 : Span<T, SizeT>(data, size), pad_(pad)
405 {
406 }
407
408 T *padded_begin() const { return this->begin() - pad(); }
409
410 SizeT pad() const { return pad_; }
411
412protected:
413 const SizeT pad_;
414};
415
416template <typename T, typename SizeT = std::size_t>
418 make_padded_span(T *const data, const SizeT size, const SizeT offset)
419{
420 return PaddedSpan<T, SizeT>(data, size, offset);
421}
422
423template <std::uint32_t WorkPI,
424 typename T,
425 typename SizeT,
426 typename Op,
427 typename Red>
428void submit_sliding_window1d(const PaddedSpan<const T, SizeT> &a,
429 const Span<const T, SizeT> &v,
430 const Op &op,
431 const Red &red,
432 Span<T, SizeT> &out,
433 sycl::nd_range<1> nd_range,
434 sycl::handler &cgh)
435{
436 using SlidingWindow1dKernel =
438 WorkPI, PaddedSpan<const T, SizeT>, Span<const T, SizeT>, Op, Red,
439 Span<T, SizeT>, RegistryData, RegistryWindow>;
440
441 cgh.parallel_for<SlidingWindow1dKernel>(
442 nd_range, SlidingWindow1dKernel(a, v, op, red, out));
443}
444
445template <std::uint32_t WorkPI,
446 typename T,
447 typename SizeT,
448 typename Op,
449 typename Red>
450void submit_sliding_window1d_small_kernel(const PaddedSpan<const T, SizeT> &a,
451 const Span<const T, SizeT> &v,
452 const Op &op,
453 const Red &red,
454 Span<T, SizeT> &out,
455 sycl::nd_range<1> nd_range,
456 sycl::handler &cgh)
457{
458 using SlidingWindow1dSmallKernel =
460 WorkPI, PaddedSpan<const T, SizeT>, Span<const T, SizeT>, Op, Red,
461 Span<T, SizeT>, RegistryData, RegistryWindow>;
462
463 cgh.parallel_for<SlidingWindow1dSmallKernel>(
464 nd_range, SlidingWindow1dSmallKernel(a, v, op, red, out));
465}
466
467void validate(const usm_ndarray &a,
468 const usm_ndarray &v,
469 const usm_ndarray &out,
470 const std::size_t l_pad,
471 const std::size_t r_pad);
472} // namespace statistics::sliding_window1d