DPNP C++ backend kernel library 0.20.0dev4
Data Parallel Extension for NumPy*
Loading...
Searching...
No Matches
histogram_common.hpp
1//*****************************************************************************
2// Copyright (c) 2024, Intel Corporation
3// All rights reserved.
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are met:
7// - Redistributions of source code must retain the above copyright notice,
8// this list of conditions and the following disclaimer.
9// - Redistributions in binary form must reproduce the above copyright notice,
10// this list of conditions and the following disclaimer in the documentation
11// and/or other materials provided with the distribution.
12// - Neither the name of the copyright holder nor the names of its contributors
13// may be used to endorse or promote products derived from this software
14// without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
20// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26// THE POSSIBILITY OF SUCH DAMAGE.
27//*****************************************************************************
28
29#pragma once
30
31#include <sycl/sycl.hpp>
32
33#include "ext/common.hpp"
34
35namespace dpctl::tensor
36{
37class usm_ndarray;
38}
39
40using dpctl::tensor::usm_ndarray;
41
45
46namespace statistics::histogram
47{
48
49template <typename T, int Dims>
51{
52 static constexpr bool const sync_after_init = true;
53 using Shape = sycl::range<Dims>;
54 using value_type = T;
55 using pointer_type = value_type *;
56 static constexpr auto dims = Dims;
57
58 using ncT = typename std::remove_const<value_type>::type;
59 using LocalData = sycl::local_accessor<ncT, Dims>;
60
61 CachedData(T *global_data, Shape shape, sycl::handler &cgh)
62 {
63 this->global_data = global_data;
64 local_data = LocalData(shape, cgh);
65 }
66
67 T *get_ptr() const { return &local_data[0]; }
68
69 template <int _Dims>
70 void init(const sycl::nd_item<_Dims> &item) const
71 {
72 uint32_t llid = item.get_local_linear_id();
73 auto local_ptr = &local_data[0];
74 uint32_t size = local_data.size();
75 auto group = item.get_group();
76 uint32_t local_size = group.get_local_linear_range();
77
78 for (uint32_t i = llid; i < size; i += local_size) {
79 local_ptr[i] = global_data[i];
80 }
81 }
82
83 size_t size() const { return local_data.size(); }
84
85 T &operator[](const sycl::id<Dims> &id) const { return local_data[id]; }
86
87 template <typename = std::enable_if_t<Dims == 1>>
88 T &operator[](const size_t id) const
89 {
90 return local_data[id];
91 }
92
93private:
94 LocalData local_data;
95 value_type *global_data = nullptr;
96};
97
98template <typename T, int Dims>
100{
101 static constexpr bool const sync_after_init = false;
102 using Shape = sycl::range<Dims>;
103 using value_type = T;
104 using pointer_type = value_type *;
105 static constexpr auto dims = Dims;
106
107 UncachedData(T *global_data, const Shape &shape, sycl::handler &)
108 {
109 this->global_data = global_data;
110 _shape = shape;
111 }
112
113 T *get_ptr() const { return global_data; }
114
115 template <int _Dims>
116 void init(const sycl::nd_item<_Dims> &) const
117 {
118 }
119
120 size_t size() const { return _shape.size(); }
121
122 T &operator[](const sycl::id<Dims> &id) const { return global_data[id]; }
123
124 template <typename = std::enable_if_t<Dims == 1>>
125 T &operator[](const size_t id) const
126 {
127 return global_data[id];
128 }
129
130private:
131 T *global_data = nullptr;
132 Shape _shape;
133};
134
135template <typename T>
137{
138 using type = T;
139};
140
141template <>
142struct HistLocalType<uint64_t>
143{
144 using type = uint32_t;
145};
146
147template <>
148struct HistLocalType<int64_t>
149{
150 using type = int32_t;
151};
152
153template <typename T, typename localT = typename HistLocalType<T>::type>
155{
156 static constexpr bool const sync_after_init = true;
157 static constexpr bool const sync_before_finalize = true;
158
159 using LocalHist = sycl::local_accessor<localT, 2>;
160
161 HistWithLocalCopies(T *global_data,
162 size_t bins_count,
163 int32_t copies_count,
164 sycl::handler &cgh)
165 {
166 local_hist = LocalHist(sycl::range<2>(copies_count, bins_count), cgh);
167 global_hist = global_data;
168 }
169
170 template <int _Dims>
171 void init(const sycl::nd_item<_Dims> &item, localT val = 0) const
172 {
173 uint32_t llid = item.get_local_linear_id();
174 auto *local_ptr = &local_hist[0][0];
175 uint32_t size = local_hist.size();
176 auto group = item.get_group();
177 uint32_t local_size = group.get_local_linear_range();
178
179 for (uint32_t i = llid; i < size; i += local_size) {
180 local_ptr[i] = val;
181 }
182 }
183
184 template <int _Dims>
185 void add(const sycl::nd_item<_Dims> &item, int32_t bin, localT value) const
186 {
187 int32_t llid = item.get_local_linear_id();
188 int32_t local_hist_count = local_hist.get_range().get(0);
189 int32_t local_copy_id =
190 local_hist_count == 1 ? 0 : llid % local_hist_count;
191
192 AtomicOp<localT, sycl::memory_order::relaxed,
193 sycl::memory_scope::work_group>::add(local_hist[local_copy_id]
194 [bin],
195 value);
196 }
197
198 template <int _Dims>
199 void finalize(const sycl::nd_item<_Dims> &item) const
200 {
201 uint32_t llid = item.get_local_linear_id();
202 uint32_t bins_count = local_hist.get_range().get(1);
203 uint32_t local_hist_count = local_hist.get_range().get(0);
204 auto group = item.get_group();
205 uint32_t local_size = group.get_local_linear_range();
206
207 for (uint32_t i = llid; i < bins_count; i += local_size) {
208 auto value = local_hist[0][i];
209 for (uint32_t lhc = 1; lhc < local_hist_count; ++lhc) {
210 value += local_hist[lhc][i];
211 }
212 if (value != T(0)) {
213 AtomicOp<T, sycl::memory_order::relaxed,
214 sycl::memory_scope::device>::add(global_hist[i],
215 value);
216 }
217 }
218 }
219
220 uint32_t size() const { return local_hist.size(); }
221
222private:
223 LocalHist local_hist;
224 T *global_hist = nullptr;
225};
226
227template <typename T>
229{
230 static constexpr bool const sync_after_init = false;
231 static constexpr bool const sync_before_finalize = false;
232
233 HistGlobalMemory(T *global_data) { global_hist = global_data; }
234
235 template <int _Dims>
236 void init(const sycl::nd_item<_Dims> &) const
237 {
238 }
239
240 template <int _Dims>
241 void add(const sycl::nd_item<_Dims> &, int32_t bin, T value) const
242 {
243 AtomicOp<T, sycl::memory_order::relaxed,
244 sycl::memory_scope::device>::add(global_hist[bin], value);
245 }
246
247 template <int _Dims>
248 void finalize(const sycl::nd_item<_Dims> &) const
249 {
250 }
251
252private:
253 T *global_hist = nullptr;
254};
255
256template <typename T = uint32_t>
258{
259 constexpr T get(size_t) const { return 1; }
260};
261
262template <typename T>
264{
265 Weights(T *weights) { data = weights; }
266
267 T get(size_t id) const { return data[id]; }
268
269private:
270 T *data = nullptr;
271};
272
273template <typename dT>
274bool check_in_bounds(const dT &val, const dT &min, const dT &max)
275{
276 Less<dT> _less;
277 return !_less(val, min) && !_less(max, val) && !IsNan<dT>::isnan(val);
278}
279
280template <typename T, typename HistImpl, typename Edges, typename Weights>
282
283template <typename T, typename HistImpl, typename Edges, typename Weights>
284void submit_histogram(const T *in,
285 const size_t size,
286 const size_t dims,
287 const uint32_t WorkPI,
288 const HistImpl &hist,
289 const Edges &edges,
290 const Weights &weights,
291 sycl::nd_range<1> nd_range,
292 sycl::handler &cgh)
293{
295 nd_range, [=](sycl::nd_item<1> item) {
296 auto id = item.get_group_linear_id();
297 auto lid = item.get_local_linear_id();
298 auto group = item.get_group();
299 auto local_size = item.get_local_range(0);
300
301 hist.init(item);
302 edges.init(item);
303
304 if constexpr (HistImpl::sync_after_init || Edges::sync_after_init) {
305 sycl::group_barrier(group, sycl::memory_scope::work_group);
306 }
307
308 auto bounds = edges.get_bounds();
309
310 for (uint32_t i = 0; i < WorkPI; ++i) {
311 auto data_idx = id * WorkPI * local_size + i * local_size + lid;
312 if (data_idx < size) {
313 auto *d = &in[data_idx * dims];
314
315 if (edges.in_bounds(d, bounds)) {
316 auto bin = edges.get_bin(item, d, bounds);
317 auto weight = weights.get(data_idx);
318 hist.add(item, bin, weight);
319 }
320 }
321 }
322
323 if constexpr (HistImpl::sync_before_finalize) {
324 sycl::group_barrier(group, sycl::memory_scope::work_group);
325 }
326
327 hist.finalize(item);
328 });
329}
330
331void validate(const usm_ndarray &sample,
332 const std::optional<const dpctl::tensor::usm_ndarray> &bins,
333 const std::optional<const dpctl::tensor::usm_ndarray> &weights,
334 const usm_ndarray &histogram);
335
336uint32_t get_local_hist_copies_count(uint32_t loc_mem_size_in_items,
337 uint32_t local_size,
338 uint32_t hist_size_in_items);
339
340} // namespace statistics::histogram