DPNP C++ backend kernel library 0.20.0dev4
Data Parallel Extension for NumPy*
Loading...
Searching...
No Matches
histogram_common.hpp
1//*****************************************************************************
2// Copyright (c) 2024, Intel Corporation
3// All rights reserved.
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are met:
7// - Redistributions of source code must retain the above copyright notice,
8// this list of conditions and the following disclaimer.
9// - Redistributions in binary form must reproduce the above copyright notice,
10// this list of conditions and the following disclaimer in the documentation
11// and/or other materials provided with the distribution.
12// - Neither the name of the copyright holder nor the names of its contributors
13// may be used to endorse or promote products derived from this software
14// without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
20// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26// THE POSSIBILITY OF SUCH DAMAGE.
27//*****************************************************************************
28
29#pragma once
30
31#include <cstddef>
32#include <cstdint>
33#include <optional>
34#include <type_traits>
35
36#include <sycl/sycl.hpp>
37
38#include "dpctl4pybind11.hpp"
39
40#include "ext/common.hpp"
41#include "kernels/statistics/histogram.hpp"
42
43namespace statistics::histogram
44{
45using dpctl::tensor::usm_ndarray;
46
50
51template <typename T, int Dims>
53{
54 static constexpr bool const sync_after_init = true;
55 using Shape = sycl::range<Dims>;
56 using value_type = T;
57 using pointer_type = value_type *;
58 static constexpr auto dims = Dims;
59
60 using ncT = typename std::remove_const<value_type>::type;
61 using LocalData = sycl::local_accessor<ncT, Dims>;
62
63 CachedData(T *global_data, Shape shape, sycl::handler &cgh)
64 {
65 this->global_data = global_data;
66 local_data = LocalData(shape, cgh);
67 }
68
69 T *get_ptr() const { return &local_data[0]; }
70
71 template <int _Dims>
72 void init(const sycl::nd_item<_Dims> &item) const
73 {
74 std::uint32_t llid = item.get_local_linear_id();
75 auto local_ptr = &local_data[0];
76 std::uint32_t size = local_data.size();
77 auto group = item.get_group();
78 std::uint32_t local_size = group.get_local_linear_range();
79
80 for (std::uint32_t i = llid; i < size; i += local_size) {
81 local_ptr[i] = global_data[i];
82 }
83 }
84
85 std::size_t size() const { return local_data.size(); }
86
87 T &operator[](const sycl::id<Dims> &id) const { return local_data[id]; }
88
89 template <typename = std::enable_if_t<Dims == 1>>
90 T &operator[](const std::size_t id) const
91 {
92 return local_data[id];
93 }
94
95private:
96 LocalData local_data;
97 value_type *global_data = nullptr;
98};
99
100template <typename T, int Dims>
102{
103 static constexpr bool const sync_after_init = false;
104 using Shape = sycl::range<Dims>;
105 using value_type = T;
106 using pointer_type = value_type *;
107 static constexpr auto dims = Dims;
108
109 UncachedData(T *global_data, const Shape &shape, sycl::handler &)
110 {
111 this->global_data = global_data;
112 _shape = shape;
113 }
114
115 T *get_ptr() const { return global_data; }
116
117 template <int _Dims>
118 void init(const sycl::nd_item<_Dims> &) const
119 {
120 }
121
122 std::size_t size() const { return _shape.size(); }
123
124 T &operator[](const sycl::id<Dims> &id) const { return global_data[id]; }
125
126 template <typename = std::enable_if_t<Dims == 1>>
127 T &operator[](const std::size_t id) const
128 {
129 return global_data[id];
130 }
131
132private:
133 T *global_data = nullptr;
134 Shape _shape;
135};
136
137template <typename T>
139{
140 using type = T;
141};
142
143template <>
144struct HistLocalType<std::uint64_t>
145{
146 using type = std::uint32_t;
147};
148
149template <>
150struct HistLocalType<std::int64_t>
151{
152 using type = std::int32_t;
153};
154
155template <typename T, typename localT = typename HistLocalType<T>::type>
157{
158 static constexpr bool const sync_after_init = true;
159 static constexpr bool const sync_before_finalize = true;
160
161 using LocalHist = sycl::local_accessor<localT, 2>;
162
163 HistWithLocalCopies(T *global_data,
164 std::size_t bins_count,
165 std::int32_t copies_count,
166 sycl::handler &cgh)
167 {
168 local_hist = LocalHist(sycl::range<2>(copies_count, bins_count), cgh);
169 global_hist = global_data;
170 }
171
172 template <int _Dims>
173 void init(const sycl::nd_item<_Dims> &item, localT val = 0) const
174 {
175 std::uint32_t llid = item.get_local_linear_id();
176 auto *local_ptr = &local_hist[0][0];
177 std::uint32_t size = local_hist.size();
178 auto group = item.get_group();
179 std::uint32_t local_size = group.get_local_linear_range();
180
181 for (std::uint32_t i = llid; i < size; i += local_size) {
182 local_ptr[i] = val;
183 }
184 }
185
186 template <int _Dims>
187 void add(const sycl::nd_item<_Dims> &item,
188 std::int32_t bin,
189 localT value) const
190 {
191 std::int32_t llid = item.get_local_linear_id();
192 std::int32_t local_hist_count = local_hist.get_range().get(0);
193 std::int32_t local_copy_id =
194 local_hist_count == 1 ? 0 : llid % local_hist_count;
195
196 AtomicOp<localT, sycl::memory_order::relaxed,
197 sycl::memory_scope::work_group>::add(local_hist[local_copy_id]
198 [bin],
199 value);
200 }
201
202 template <int _Dims>
203 void finalize(const sycl::nd_item<_Dims> &item) const
204 {
205 std::uint32_t llid = item.get_local_linear_id();
206 std::uint32_t bins_count = local_hist.get_range().get(1);
207 std::uint32_t local_hist_count = local_hist.get_range().get(0);
208 auto group = item.get_group();
209 std::uint32_t local_size = group.get_local_linear_range();
210
211 for (std::uint32_t i = llid; i < bins_count; i += local_size) {
212 auto value = local_hist[0][i];
213 for (std::uint32_t lhc = 1; lhc < local_hist_count; ++lhc) {
214 value += local_hist[lhc][i];
215 }
216 if (value != T(0)) {
217 AtomicOp<T, sycl::memory_order::relaxed,
218 sycl::memory_scope::device>::add(global_hist[i],
219 value);
220 }
221 }
222 }
223
224 std::uint32_t size() const { return local_hist.size(); }
225
226private:
227 LocalHist local_hist;
228 T *global_hist = nullptr;
229};
230
231template <typename T>
233{
234 static constexpr bool const sync_after_init = false;
235 static constexpr bool const sync_before_finalize = false;
236
237 HistGlobalMemory(T *global_data) { global_hist = global_data; }
238
239 template <int _Dims>
240 void init(const sycl::nd_item<_Dims> &) const
241 {
242 }
243
244 template <int _Dims>
245 void add(const sycl::nd_item<_Dims> &, std::int32_t bin, T value) const
246 {
247 AtomicOp<T, sycl::memory_order::relaxed,
248 sycl::memory_scope::device>::add(global_hist[bin], value);
249 }
250
251 template <int _Dims>
252 void finalize(const sycl::nd_item<_Dims> &) const
253 {
254 }
255
256private:
257 T *global_hist = nullptr;
258};
259
260template <typename T = std::uint32_t>
262{
263 constexpr T get(std::size_t) const { return 1; }
264};
265
266template <typename T>
268{
269 Weights(T *weights) { data = weights; }
270
271 T get(std::size_t id) const { return data[id]; }
272
273private:
274 T *data = nullptr;
275};
276
277template <typename dT>
278bool check_in_bounds(const dT &val, const dT &min, const dT &max)
279{
280 Less<dT> _less;
281 return !_less(val, min) && !_less(max, val) && !IsNan<dT>::isnan(val);
282}
283
284template <typename T, typename HistImpl, typename Edges, typename Weights>
285void submit_histogram(const T *in,
286 const std::size_t size,
287 const std::size_t dims,
288 const std::uint32_t WorkPI,
289 const HistImpl &hist,
290 const Edges &edges,
291 const Weights &weights,
292 sycl::nd_range<1> nd_range,
293 sycl::handler &cgh)
294{
295 using HistogramKernel =
297
298 cgh.parallel_for<HistogramKernel>(
299 nd_range,
300 HistogramKernel(in, size, dims, WorkPI, hist, edges, weights));
301}
302
303void validate(const usm_ndarray &sample,
304 const std::optional<const dpctl::tensor::usm_ndarray> &bins,
305 const std::optional<const dpctl::tensor::usm_ndarray> &weights,
306 const usm_ndarray &histogram);
307
308std::uint32_t get_local_hist_copies_count(std::uint32_t loc_mem_size_in_items,
309 std::uint32_t local_size,
310 std::uint32_t hist_size_in_items);
311
312} // namespace statistics::histogram