54 static constexpr bool const sync_after_init =
true;
55 using Shape = sycl::range<Dims>;
57 using pointer_type = value_type *;
58 static constexpr auto dims = Dims;
60 using ncT =
typename std::remove_const<value_type>::type;
61 using LocalData = sycl::local_accessor<ncT, Dims>;
63 CachedData(T *global_data, Shape shape, sycl::handler &cgh)
65 this->global_data = global_data;
66 local_data = LocalData(shape, cgh);
69 T *get_ptr()
const {
return &local_data[0]; }
72 void init(
const sycl::nd_item<_Dims> &item)
const
74 std::uint32_t llid = item.get_local_linear_id();
75 auto local_ptr = &local_data[0];
76 std::uint32_t size = local_data.size();
77 auto group = item.get_group();
78 std::uint32_t local_size = group.get_local_linear_range();
80 for (std::uint32_t i = llid; i < size; i += local_size) {
81 local_ptr[i] = global_data[i];
85 std::size_t size()
const {
return local_data.size(); }
87 T &operator[](
const sycl::id<Dims> &
id)
const {
return local_data[id]; }
89 template <
typename = std::enable_if_t<Dims == 1>>
90 T &operator[](
const std::size_t
id)
const
92 return local_data[id];
97 value_type *global_data =
nullptr;
103 static constexpr bool const sync_after_init =
false;
104 using Shape = sycl::range<Dims>;
105 using value_type = T;
106 using pointer_type = value_type *;
107 static constexpr auto dims = Dims;
109 UncachedData(T *global_data,
const Shape &shape, sycl::handler &)
111 this->global_data = global_data;
115 T *get_ptr()
const {
return global_data; }
118 void init(
const sycl::nd_item<_Dims> &)
const
122 std::size_t size()
const {
return _shape.size(); }
124 T &operator[](
const sycl::id<Dims> &
id)
const {
return global_data[id]; }
126 template <
typename = std::enable_if_t<Dims == 1>>
127 T &operator[](
const std::size_t
id)
const
129 return global_data[id];
133 T *global_data =
nullptr;
158 static constexpr bool const sync_after_init =
true;
159 static constexpr bool const sync_before_finalize =
true;
161 using LocalHist = sycl::local_accessor<localT, 2>;
164 std::size_t bins_count,
165 std::int32_t copies_count,
168 local_hist = LocalHist(sycl::range<2>(copies_count, bins_count), cgh);
169 global_hist = global_data;
173 void init(
const sycl::nd_item<_Dims> &item, localT val = 0)
const
175 std::uint32_t llid = item.get_local_linear_id();
176 auto *local_ptr = &local_hist[0][0];
177 std::uint32_t size = local_hist.size();
178 auto group = item.get_group();
179 std::uint32_t local_size = group.get_local_linear_range();
181 for (std::uint32_t i = llid; i < size; i += local_size) {
187 void add(
const sycl::nd_item<_Dims> &item,
191 std::int32_t llid = item.get_local_linear_id();
192 std::int32_t local_hist_count = local_hist.get_range().get(0);
193 std::int32_t local_copy_id =
194 local_hist_count == 1 ? 0 : llid % local_hist_count;
196 AtomicOp<localT, sycl::memory_order::relaxed,
197 sycl::memory_scope::work_group>::add(local_hist[local_copy_id]
203 void finalize(
const sycl::nd_item<_Dims> &item)
const
205 std::uint32_t llid = item.get_local_linear_id();
206 std::uint32_t bins_count = local_hist.get_range().get(1);
207 std::uint32_t local_hist_count = local_hist.get_range().get(0);
208 auto group = item.get_group();
209 std::uint32_t local_size = group.get_local_linear_range();
211 for (std::uint32_t i = llid; i < bins_count; i += local_size) {
212 auto value = local_hist[0][i];
213 for (std::uint32_t lhc = 1; lhc < local_hist_count; ++lhc) {
214 value += local_hist[lhc][i];
217 AtomicOp<T, sycl::memory_order::relaxed,
218 sycl::memory_scope::device>::add(global_hist[i],
224 std::uint32_t size()
const {
return local_hist.size(); }
227 LocalHist local_hist;
228 T *global_hist =
nullptr;
234 static constexpr bool const sync_after_init =
false;
235 static constexpr bool const sync_before_finalize =
false;
240 void init(
const sycl::nd_item<_Dims> &)
const
245 void add(
const sycl::nd_item<_Dims> &, std::int32_t bin, T value)
const
247 AtomicOp<T, sycl::memory_order::relaxed,
248 sycl::memory_scope::device>::add(global_hist[bin], value);
252 void finalize(
const sycl::nd_item<_Dims> &)
const
257 T *global_hist =
nullptr;
300 HistogramKernel(in, size, dims, WorkPI, hist, edges, weights));