DPNP C++ backend kernel library 0.18.0dev1
Data Parallel Extension for NumPy*
Loading...
Searching...
No Matches
histogram_common.hpp
1//*****************************************************************************
2// Copyright (c) 2024-2025, Intel Corporation
3// All rights reserved.
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are met:
7// - Redistributions of source code must retain the above copyright notice,
8// this list of conditions and the following disclaimer.
9// - Redistributions in binary form must reproduce the above copyright notice,
10// this list of conditions and the following disclaimer in the documentation
11// and/or other materials provided with the distribution.
12//
13// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
14// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
17// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
19// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
23// THE POSSIBILITY OF SUCH DAMAGE.
24//*****************************************************************************
25
26#pragma once
27
28#include <sycl/sycl.hpp>
29
30#include "ext/common.hpp"
31
32namespace dpctl::tensor
33{
34class usm_ndarray;
35}
36
37using dpctl::tensor::usm_ndarray;
38
42
43namespace statistics::histogram
44{
45
46template <typename T, int Dims>
48{
49 static constexpr bool const sync_after_init = true;
50 using Shape = sycl::range<Dims>;
51 using value_type = T;
52 using pointer_type = value_type *;
53 static constexpr auto dims = Dims;
54
55 using ncT = typename std::remove_const<value_type>::type;
56 using LocalData = sycl::local_accessor<ncT, Dims>;
57
58 CachedData(T *global_data, Shape shape, sycl::handler &cgh)
59 {
60 this->global_data = global_data;
61 local_data = LocalData(shape, cgh);
62 }
63
64 T *get_ptr() const
65 {
66 return &local_data[0];
67 }
68
69 template <int _Dims>
70 void init(const sycl::nd_item<_Dims> &item) const
71 {
72 uint32_t llid = item.get_local_linear_id();
73 auto local_ptr = &local_data[0];
74 uint32_t size = local_data.size();
75 auto group = item.get_group();
76 uint32_t local_size = group.get_local_linear_range();
77
78 for (uint32_t i = llid; i < size; i += local_size) {
79 local_ptr[i] = global_data[i];
80 }
81 }
82
83 size_t size() const
84 {
85 return local_data.size();
86 }
87
88 T &operator[](const sycl::id<Dims> &id) const
89 {
90 return local_data[id];
91 }
92
93 template <typename = std::enable_if_t<Dims == 1>>
94 T &operator[](const size_t id) const
95 {
96 return local_data[id];
97 }
98
99private:
100 LocalData local_data;
101 value_type *global_data = nullptr;
102};
103
104template <typename T, int Dims>
106{
107 static constexpr bool const sync_after_init = false;
108 using Shape = sycl::range<Dims>;
109 using value_type = T;
110 using pointer_type = value_type *;
111 static constexpr auto dims = Dims;
112
113 UncachedData(T *global_data, const Shape &shape, sycl::handler &)
114 {
115 this->global_data = global_data;
116 _shape = shape;
117 }
118
119 T *get_ptr() const
120 {
121 return global_data;
122 }
123
124 template <int _Dims>
125 void init(const sycl::nd_item<_Dims> &) const
126 {
127 }
128
129 size_t size() const
130 {
131 return _shape.size();
132 }
133
134 T &operator[](const sycl::id<Dims> &id) const
135 {
136 return global_data[id];
137 }
138
139 template <typename = std::enable_if_t<Dims == 1>>
140 T &operator[](const size_t id) const
141 {
142 return global_data[id];
143 }
144
145private:
146 T *global_data = nullptr;
147 Shape _shape;
148};
149
150template <typename T>
152{
153 using type = T;
154};
155
156template <>
157struct HistLocalType<uint64_t>
158{
159 using type = uint32_t;
160};
161
162template <>
163struct HistLocalType<int64_t>
164{
165 using type = int32_t;
166};
167
168template <typename T, typename localT = typename HistLocalType<T>::type>
170{
171 static constexpr bool const sync_after_init = true;
172 static constexpr bool const sync_before_finalize = true;
173
174 using LocalHist = sycl::local_accessor<localT, 2>;
175
176 HistWithLocalCopies(T *global_data,
177 size_t bins_count,
178 int32_t copies_count,
179 sycl::handler &cgh)
180 {
181 local_hist = LocalHist(sycl::range<2>(copies_count, bins_count), cgh);
182 global_hist = global_data;
183 }
184
185 template <int _Dims>
186 void init(const sycl::nd_item<_Dims> &item, localT val = 0) const
187 {
188 uint32_t llid = item.get_local_linear_id();
189 auto *local_ptr = &local_hist[0][0];
190 uint32_t size = local_hist.size();
191 auto group = item.get_group();
192 uint32_t local_size = group.get_local_linear_range();
193
194 for (uint32_t i = llid; i < size; i += local_size) {
195 local_ptr[i] = val;
196 }
197 }
198
199 template <int _Dims>
200 void add(const sycl::nd_item<_Dims> &item, int32_t bin, localT value) const
201 {
202 int32_t llid = item.get_local_linear_id();
203 int32_t local_hist_count = local_hist.get_range().get(0);
204 int32_t local_copy_id =
205 local_hist_count == 1 ? 0 : llid % local_hist_count;
206
207 AtomicOp<localT, sycl::memory_order::relaxed,
208 sycl::memory_scope::work_group>::add(local_hist[local_copy_id]
209 [bin],
210 value);
211 }
212
213 template <int _Dims>
214 void finalize(const sycl::nd_item<_Dims> &item) const
215 {
216 uint32_t llid = item.get_local_linear_id();
217 uint32_t bins_count = local_hist.get_range().get(1);
218 uint32_t local_hist_count = local_hist.get_range().get(0);
219 auto group = item.get_group();
220 uint32_t local_size = group.get_local_linear_range();
221
222 for (uint32_t i = llid; i < bins_count; i += local_size) {
223 auto value = local_hist[0][i];
224 for (uint32_t lhc = 1; lhc < local_hist_count; ++lhc) {
225 value += local_hist[lhc][i];
226 }
227 if (value != T(0)) {
228 AtomicOp<T, sycl::memory_order::relaxed,
229 sycl::memory_scope::device>::add(global_hist[i],
230 value);
231 }
232 }
233 }
234
235 uint32_t size() const
236 {
237 return local_hist.size();
238 }
239
240private:
241 LocalHist local_hist;
242 T *global_hist = nullptr;
243};
244
245template <typename T>
247{
248 static constexpr bool const sync_after_init = false;
249 static constexpr bool const sync_before_finalize = false;
250
251 HistGlobalMemory(T *global_data)
252 {
253 global_hist = global_data;
254 }
255
256 template <int _Dims>
257 void init(const sycl::nd_item<_Dims> &) const
258 {
259 }
260
261 template <int _Dims>
262 void add(const sycl::nd_item<_Dims> &, int32_t bin, T value) const
263 {
264 AtomicOp<T, sycl::memory_order::relaxed,
265 sycl::memory_scope::device>::add(global_hist[bin], value);
266 }
267
268 template <int _Dims>
269 void finalize(const sycl::nd_item<_Dims> &) const
270 {
271 }
272
273private:
274 T *global_hist = nullptr;
275};
276
277template <typename T = uint32_t>
279{
280 constexpr T get(size_t) const
281 {
282 return 1;
283 }
284};
285
286template <typename T>
288{
289 Weights(T *weights)
290 {
291 data = weights;
292 }
293
294 T get(size_t id) const
295 {
296 return data[id];
297 }
298
299private:
300 T *data = nullptr;
301};
302
303template <typename dT>
304bool check_in_bounds(const dT &val, const dT &min, const dT &max)
305{
306 Less<dT> _less;
307 return !_less(val, min) && !_less(max, val) && !IsNan<dT>::isnan(val);
308}
309
310template <typename T, typename HistImpl, typename Edges, typename Weights>
312
313template <typename T, typename HistImpl, typename Edges, typename Weights>
314void submit_histogram(const T *in,
315 const size_t size,
316 const size_t dims,
317 const uint32_t WorkPI,
318 const HistImpl &hist,
319 const Edges &edges,
320 const Weights &weights,
321 sycl::nd_range<1> nd_range,
322 sycl::handler &cgh)
323{
325 nd_range, [=](sycl::nd_item<1> item) {
326 auto id = item.get_group_linear_id();
327 auto lid = item.get_local_linear_id();
328 auto group = item.get_group();
329 auto local_size = item.get_local_range(0);
330
331 hist.init(item);
332 edges.init(item);
333
334 if constexpr (HistImpl::sync_after_init || Edges::sync_after_init) {
335 sycl::group_barrier(group, sycl::memory_scope::work_group);
336 }
337
338 auto bounds = edges.get_bounds();
339
340 for (uint32_t i = 0; i < WorkPI; ++i) {
341 auto data_idx = id * WorkPI * local_size + i * local_size + lid;
342 if (data_idx < size) {
343 auto *d = &in[data_idx * dims];
344
345 if (edges.in_bounds(d, bounds)) {
346 auto bin = edges.get_bin(item, d, bounds);
347 auto weight = weights.get(data_idx);
348 hist.add(item, bin, weight);
349 }
350 }
351 }
352
353 if constexpr (HistImpl::sync_before_finalize) {
354 sycl::group_barrier(group, sycl::memory_scope::work_group);
355 }
356
357 hist.finalize(item);
358 });
359}
360
361void validate(const usm_ndarray &sample,
362 const std::optional<const dpctl::tensor::usm_ndarray> &bins,
363 const std::optional<const dpctl::tensor::usm_ndarray> &weights,
364 const usm_ndarray &histogram);
365
366uint32_t get_local_hist_copies_count(uint32_t loc_mem_size_in_items,
367 uint32_t local_size,
368 uint32_t hist_size_in_items);
369
370} // namespace statistics::histogram