DPNP C++ backend kernel library 0.20.0dev0
Data Parallel Extension for NumPy*
Loading...
Searching...
No Matches
histogram_common.hpp
1//*****************************************************************************
2// Copyright (c) 2024, Intel Corporation
3// All rights reserved.
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are met:
7// - Redistributions of source code must retain the above copyright notice,
8// this list of conditions and the following disclaimer.
9// - Redistributions in binary form must reproduce the above copyright notice,
10// this list of conditions and the following disclaimer in the documentation
11// and/or other materials provided with the distribution.
12// - Neither the name of the copyright holder nor the names of its contributors
13// may be used to endorse or promote products derived from this software
14// without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
20// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26// THE POSSIBILITY OF SUCH DAMAGE.
27//*****************************************************************************
28
29#pragma once
30
31#include <sycl/sycl.hpp>
32
33#include "ext/common.hpp"
34
35namespace dpctl::tensor
36{
37class usm_ndarray;
38}
39
40using dpctl::tensor::usm_ndarray;
41
45
46namespace statistics::histogram
47{
48
49template <typename T, int Dims>
51{
52 static constexpr bool const sync_after_init = true;
53 using Shape = sycl::range<Dims>;
54 using value_type = T;
55 using pointer_type = value_type *;
56 static constexpr auto dims = Dims;
57
58 using ncT = typename std::remove_const<value_type>::type;
59 using LocalData = sycl::local_accessor<ncT, Dims>;
60
61 CachedData(T *global_data, Shape shape, sycl::handler &cgh)
62 {
63 this->global_data = global_data;
64 local_data = LocalData(shape, cgh);
65 }
66
67 T *get_ptr() const
68 {
69 return &local_data[0];
70 }
71
72 template <int _Dims>
73 void init(const sycl::nd_item<_Dims> &item) const
74 {
75 uint32_t llid = item.get_local_linear_id();
76 auto local_ptr = &local_data[0];
77 uint32_t size = local_data.size();
78 auto group = item.get_group();
79 uint32_t local_size = group.get_local_linear_range();
80
81 for (uint32_t i = llid; i < size; i += local_size) {
82 local_ptr[i] = global_data[i];
83 }
84 }
85
86 size_t size() const
87 {
88 return local_data.size();
89 }
90
91 T &operator[](const sycl::id<Dims> &id) const
92 {
93 return local_data[id];
94 }
95
96 template <typename = std::enable_if_t<Dims == 1>>
97 T &operator[](const size_t id) const
98 {
99 return local_data[id];
100 }
101
102private:
103 LocalData local_data;
104 value_type *global_data = nullptr;
105};
106
107template <typename T, int Dims>
109{
110 static constexpr bool const sync_after_init = false;
111 using Shape = sycl::range<Dims>;
112 using value_type = T;
113 using pointer_type = value_type *;
114 static constexpr auto dims = Dims;
115
116 UncachedData(T *global_data, const Shape &shape, sycl::handler &)
117 {
118 this->global_data = global_data;
119 _shape = shape;
120 }
121
122 T *get_ptr() const
123 {
124 return global_data;
125 }
126
127 template <int _Dims>
128 void init(const sycl::nd_item<_Dims> &) const
129 {
130 }
131
132 size_t size() const
133 {
134 return _shape.size();
135 }
136
137 T &operator[](const sycl::id<Dims> &id) const
138 {
139 return global_data[id];
140 }
141
142 template <typename = std::enable_if_t<Dims == 1>>
143 T &operator[](const size_t id) const
144 {
145 return global_data[id];
146 }
147
148private:
149 T *global_data = nullptr;
150 Shape _shape;
151};
152
153template <typename T>
155{
156 using type = T;
157};
158
159template <>
160struct HistLocalType<uint64_t>
161{
162 using type = uint32_t;
163};
164
165template <>
166struct HistLocalType<int64_t>
167{
168 using type = int32_t;
169};
170
171template <typename T, typename localT = typename HistLocalType<T>::type>
173{
174 static constexpr bool const sync_after_init = true;
175 static constexpr bool const sync_before_finalize = true;
176
177 using LocalHist = sycl::local_accessor<localT, 2>;
178
179 HistWithLocalCopies(T *global_data,
180 size_t bins_count,
181 int32_t copies_count,
182 sycl::handler &cgh)
183 {
184 local_hist = LocalHist(sycl::range<2>(copies_count, bins_count), cgh);
185 global_hist = global_data;
186 }
187
188 template <int _Dims>
189 void init(const sycl::nd_item<_Dims> &item, localT val = 0) const
190 {
191 uint32_t llid = item.get_local_linear_id();
192 auto *local_ptr = &local_hist[0][0];
193 uint32_t size = local_hist.size();
194 auto group = item.get_group();
195 uint32_t local_size = group.get_local_linear_range();
196
197 for (uint32_t i = llid; i < size; i += local_size) {
198 local_ptr[i] = val;
199 }
200 }
201
202 template <int _Dims>
203 void add(const sycl::nd_item<_Dims> &item, int32_t bin, localT value) const
204 {
205 int32_t llid = item.get_local_linear_id();
206 int32_t local_hist_count = local_hist.get_range().get(0);
207 int32_t local_copy_id =
208 local_hist_count == 1 ? 0 : llid % local_hist_count;
209
210 AtomicOp<localT, sycl::memory_order::relaxed,
211 sycl::memory_scope::work_group>::add(local_hist[local_copy_id]
212 [bin],
213 value);
214 }
215
216 template <int _Dims>
217 void finalize(const sycl::nd_item<_Dims> &item) const
218 {
219 uint32_t llid = item.get_local_linear_id();
220 uint32_t bins_count = local_hist.get_range().get(1);
221 uint32_t local_hist_count = local_hist.get_range().get(0);
222 auto group = item.get_group();
223 uint32_t local_size = group.get_local_linear_range();
224
225 for (uint32_t i = llid; i < bins_count; i += local_size) {
226 auto value = local_hist[0][i];
227 for (uint32_t lhc = 1; lhc < local_hist_count; ++lhc) {
228 value += local_hist[lhc][i];
229 }
230 if (value != T(0)) {
231 AtomicOp<T, sycl::memory_order::relaxed,
232 sycl::memory_scope::device>::add(global_hist[i],
233 value);
234 }
235 }
236 }
237
238 uint32_t size() const
239 {
240 return local_hist.size();
241 }
242
243private:
244 LocalHist local_hist;
245 T *global_hist = nullptr;
246};
247
248template <typename T>
250{
251 static constexpr bool const sync_after_init = false;
252 static constexpr bool const sync_before_finalize = false;
253
254 HistGlobalMemory(T *global_data)
255 {
256 global_hist = global_data;
257 }
258
259 template <int _Dims>
260 void init(const sycl::nd_item<_Dims> &) const
261 {
262 }
263
264 template <int _Dims>
265 void add(const sycl::nd_item<_Dims> &, int32_t bin, T value) const
266 {
267 AtomicOp<T, sycl::memory_order::relaxed,
268 sycl::memory_scope::device>::add(global_hist[bin], value);
269 }
270
271 template <int _Dims>
272 void finalize(const sycl::nd_item<_Dims> &) const
273 {
274 }
275
276private:
277 T *global_hist = nullptr;
278};
279
280template <typename T = uint32_t>
282{
283 constexpr T get(size_t) const
284 {
285 return 1;
286 }
287};
288
289template <typename T>
291{
292 Weights(T *weights)
293 {
294 data = weights;
295 }
296
297 T get(size_t id) const
298 {
299 return data[id];
300 }
301
302private:
303 T *data = nullptr;
304};
305
306template <typename dT>
307bool check_in_bounds(const dT &val, const dT &min, const dT &max)
308{
309 Less<dT> _less;
310 return !_less(val, min) && !_less(max, val) && !IsNan<dT>::isnan(val);
311}
312
313template <typename T, typename HistImpl, typename Edges, typename Weights>
315
316template <typename T, typename HistImpl, typename Edges, typename Weights>
317void submit_histogram(const T *in,
318 const size_t size,
319 const size_t dims,
320 const uint32_t WorkPI,
321 const HistImpl &hist,
322 const Edges &edges,
323 const Weights &weights,
324 sycl::nd_range<1> nd_range,
325 sycl::handler &cgh)
326{
328 nd_range, [=](sycl::nd_item<1> item) {
329 auto id = item.get_group_linear_id();
330 auto lid = item.get_local_linear_id();
331 auto group = item.get_group();
332 auto local_size = item.get_local_range(0);
333
334 hist.init(item);
335 edges.init(item);
336
337 if constexpr (HistImpl::sync_after_init || Edges::sync_after_init) {
338 sycl::group_barrier(group, sycl::memory_scope::work_group);
339 }
340
341 auto bounds = edges.get_bounds();
342
343 for (uint32_t i = 0; i < WorkPI; ++i) {
344 auto data_idx = id * WorkPI * local_size + i * local_size + lid;
345 if (data_idx < size) {
346 auto *d = &in[data_idx * dims];
347
348 if (edges.in_bounds(d, bounds)) {
349 auto bin = edges.get_bin(item, d, bounds);
350 auto weight = weights.get(data_idx);
351 hist.add(item, bin, weight);
352 }
353 }
354 }
355
356 if constexpr (HistImpl::sync_before_finalize) {
357 sycl::group_barrier(group, sycl::memory_scope::work_group);
358 }
359
360 hist.finalize(item);
361 });
362}
363
364void validate(const usm_ndarray &sample,
365 const std::optional<const dpctl::tensor::usm_ndarray> &bins,
366 const std::optional<const dpctl::tensor::usm_ndarray> &weights,
367 const usm_ndarray &histogram);
368
369uint32_t get_local_hist_copies_count(uint32_t loc_mem_size_in_items,
370 uint32_t local_size,
371 uint32_t hist_size_in_items);
372
373} // namespace statistics::histogram