51 static constexpr bool const sync_after_init =
true;
52 using Shape = sycl::range<Dims>;
54 using pointer_type = value_type *;
55 static constexpr auto dims = Dims;
57 using ncT =
typename std::remove_const<value_type>::type;
58 using LocalData = sycl::local_accessor<ncT, Dims>;
60 CachedData(T *global_data, Shape shape, sycl::handler &cgh)
62 this->global_data = global_data;
63 local_data = LocalData(shape, cgh);
68 return &local_data[0];
72 void init(
const sycl::nd_item<_Dims> &item)
const
74 uint32_t llid = item.get_local_linear_id();
75 auto local_ptr = &local_data[0];
76 uint32_t size = local_data.size();
77 auto group = item.get_group();
78 uint32_t local_size = group.get_local_linear_range();
80 for (uint32_t i = llid; i < size; i += local_size) {
81 local_ptr[i] = global_data[i];
87 return local_data.size();
90 T &operator[](
const sycl::id<Dims> &
id)
const
92 return local_data[id];
95 template <
typename = std::enable_if_t<Dims == 1>>
96 T &operator[](
const size_t id)
const
98 return local_data[id];
102 LocalData local_data;
103 value_type *global_data =
nullptr;
109 static constexpr bool const sync_after_init =
false;
110 using Shape = sycl::range<Dims>;
111 using value_type = T;
112 using pointer_type = value_type *;
113 static constexpr auto dims = Dims;
115 UncachedData(T *global_data,
const Shape &shape, sycl::handler &)
117 this->global_data = global_data;
127 void init(
const sycl::nd_item<_Dims> &)
const
133 return _shape.size();
136 T &operator[](
const sycl::id<Dims> &
id)
const
138 return global_data[id];
141 template <
typename = std::enable_if_t<Dims == 1>>
142 T &operator[](
const size_t id)
const
144 return global_data[id];
148 T *global_data =
nullptr;
173 static constexpr bool const sync_after_init =
true;
174 static constexpr bool const sync_before_finalize =
true;
176 using LocalHist = sycl::local_accessor<localT, 2>;
180 int32_t copies_count,
183 local_hist = LocalHist(sycl::range<2>(copies_count, bins_count), cgh);
184 global_hist = global_data;
188 void init(
const sycl::nd_item<_Dims> &item, localT val = 0)
const
190 uint32_t llid = item.get_local_linear_id();
191 auto *local_ptr = &local_hist[0][0];
192 uint32_t size = local_hist.size();
193 auto group = item.get_group();
194 uint32_t local_size = group.get_local_linear_range();
196 for (uint32_t i = llid; i < size; i += local_size) {
202 void add(
const sycl::nd_item<_Dims> &item, int32_t bin, localT value)
const
204 int32_t llid = item.get_local_linear_id();
205 int32_t local_hist_count = local_hist.get_range().get(0);
206 int32_t local_copy_id =
207 local_hist_count == 1 ? 0 : llid % local_hist_count;
209 AtomicOp<localT, sycl::memory_order::relaxed,
210 sycl::memory_scope::work_group>::add(local_hist[local_copy_id]
216 void finalize(
const sycl::nd_item<_Dims> &item)
const
218 uint32_t llid = item.get_local_linear_id();
219 uint32_t bins_count = local_hist.get_range().get(1);
220 uint32_t local_hist_count = local_hist.get_range().get(0);
221 auto group = item.get_group();
222 uint32_t local_size = group.get_local_linear_range();
224 for (uint32_t i = llid; i < bins_count; i += local_size) {
225 auto value = local_hist[0][i];
226 for (uint32_t lhc = 1; lhc < local_hist_count; ++lhc) {
227 value += local_hist[lhc][i];
230 AtomicOp<T, sycl::memory_order::relaxed,
231 sycl::memory_scope::device>::add(global_hist[i],
237 uint32_t size()
const
239 return local_hist.size();
243 LocalHist local_hist;
244 T *global_hist =
nullptr;
250 static constexpr bool const sync_after_init =
false;
251 static constexpr bool const sync_before_finalize =
false;
255 global_hist = global_data;
259 void init(
const sycl::nd_item<_Dims> &)
const
264 void add(
const sycl::nd_item<_Dims> &, int32_t bin, T value)
const
266 AtomicOp<T, sycl::memory_order::relaxed,
267 sycl::memory_scope::device>::add(global_hist[bin], value);
271 void finalize(
const sycl::nd_item<_Dims> &)
const
276 T *global_hist =
nullptr;