52 static constexpr bool const sync_after_init =
true;
53 using Shape = sycl::range<Dims>;
55 using pointer_type = value_type *;
56 static constexpr auto dims = Dims;
58 using ncT =
typename std::remove_const<value_type>::type;
59 using LocalData = sycl::local_accessor<ncT, Dims>;
61 CachedData(T *global_data, Shape shape, sycl::handler &cgh)
63 this->global_data = global_data;
64 local_data = LocalData(shape, cgh);
69 return &local_data[0];
73 void init(
const sycl::nd_item<_Dims> &item)
const
75 uint32_t llid = item.get_local_linear_id();
76 auto local_ptr = &local_data[0];
77 uint32_t size = local_data.size();
78 auto group = item.get_group();
79 uint32_t local_size = group.get_local_linear_range();
81 for (uint32_t i = llid; i < size; i += local_size) {
82 local_ptr[i] = global_data[i];
88 return local_data.size();
91 T &operator[](
const sycl::id<Dims> &
id)
const
93 return local_data[id];
96 template <
typename = std::enable_if_t<Dims == 1>>
97 T &operator[](
const size_t id)
const
99 return local_data[id];
103 LocalData local_data;
104 value_type *global_data =
nullptr;
110 static constexpr bool const sync_after_init =
false;
111 using Shape = sycl::range<Dims>;
112 using value_type = T;
113 using pointer_type = value_type *;
114 static constexpr auto dims = Dims;
116 UncachedData(T *global_data,
const Shape &shape, sycl::handler &)
118 this->global_data = global_data;
128 void init(
const sycl::nd_item<_Dims> &)
const
134 return _shape.size();
137 T &operator[](
const sycl::id<Dims> &
id)
const
139 return global_data[id];
142 template <
typename = std::enable_if_t<Dims == 1>>
143 T &operator[](
const size_t id)
const
145 return global_data[id];
149 T *global_data =
nullptr;
174 static constexpr bool const sync_after_init =
true;
175 static constexpr bool const sync_before_finalize =
true;
177 using LocalHist = sycl::local_accessor<localT, 2>;
181 int32_t copies_count,
184 local_hist = LocalHist(sycl::range<2>(copies_count, bins_count), cgh);
185 global_hist = global_data;
189 void init(
const sycl::nd_item<_Dims> &item, localT val = 0)
const
191 uint32_t llid = item.get_local_linear_id();
192 auto *local_ptr = &local_hist[0][0];
193 uint32_t size = local_hist.size();
194 auto group = item.get_group();
195 uint32_t local_size = group.get_local_linear_range();
197 for (uint32_t i = llid; i < size; i += local_size) {
203 void add(
const sycl::nd_item<_Dims> &item, int32_t bin, localT value)
const
205 int32_t llid = item.get_local_linear_id();
206 int32_t local_hist_count = local_hist.get_range().get(0);
207 int32_t local_copy_id =
208 local_hist_count == 1 ? 0 : llid % local_hist_count;
210 AtomicOp<localT, sycl::memory_order::relaxed,
211 sycl::memory_scope::work_group>::add(local_hist[local_copy_id]
217 void finalize(
const sycl::nd_item<_Dims> &item)
const
219 uint32_t llid = item.get_local_linear_id();
220 uint32_t bins_count = local_hist.get_range().get(1);
221 uint32_t local_hist_count = local_hist.get_range().get(0);
222 auto group = item.get_group();
223 uint32_t local_size = group.get_local_linear_range();
225 for (uint32_t i = llid; i < bins_count; i += local_size) {
226 auto value = local_hist[0][i];
227 for (uint32_t lhc = 1; lhc < local_hist_count; ++lhc) {
228 value += local_hist[lhc][i];
231 AtomicOp<T, sycl::memory_order::relaxed,
232 sycl::memory_scope::device>::add(global_hist[i],
238 uint32_t size()
const
240 return local_hist.size();
244 LocalHist local_hist;
245 T *global_hist =
nullptr;
251 static constexpr bool const sync_after_init =
false;
252 static constexpr bool const sync_before_finalize =
false;
256 global_hist = global_data;
260 void init(
const sycl::nd_item<_Dims> &)
const
265 void add(
const sycl::nd_item<_Dims> &, int32_t bin, T value)
const
267 AtomicOp<T, sycl::memory_order::relaxed,
268 sycl::memory_scope::device>::add(global_hist[bin], value);
272 void finalize(
const sycl::nd_item<_Dims> &)
const
277 T *global_hist =
nullptr;