52    static constexpr bool const sync_after_init = 
true;
 
   53    using Shape = sycl::range<Dims>;
 
   55    using pointer_type = value_type *;
 
   56    static constexpr auto dims = Dims;
 
   58    using ncT = 
typename std::remove_const<value_type>::type;
 
   59    using LocalData = sycl::local_accessor<ncT, Dims>;
 
   61    CachedData(T *global_data, Shape shape, sycl::handler &cgh)
 
   63        this->global_data = global_data;
 
   64        local_data = LocalData(shape, cgh);
 
   69        return &local_data[0];
 
   73    void init(
const sycl::nd_item<_Dims> &item)
 const 
   75        uint32_t llid = item.get_local_linear_id();
 
   76        auto local_ptr = &local_data[0];
 
   77        uint32_t size = local_data.size();
 
   78        auto group = item.get_group();
 
   79        uint32_t local_size = group.get_local_linear_range();
 
   81        for (uint32_t i = llid; i < size; i += local_size) {
 
   82            local_ptr[i] = global_data[i];
 
   88        return local_data.size();
 
   91    T &operator[](
const sycl::id<Dims> &
id)
 const 
   93        return local_data[id];
 
   96    template <
typename = std::enable_if_t<Dims == 1>>
 
   97    T &operator[](
const size_t id)
 const 
   99        return local_data[id];
 
  103    LocalData local_data;
 
  104    value_type *global_data = 
nullptr;
 
 
  110    static constexpr bool const sync_after_init = 
false;
 
  111    using Shape = sycl::range<Dims>;
 
  112    using value_type = T;
 
  113    using pointer_type = value_type *;
 
  114    static constexpr auto dims = Dims;
 
  116    UncachedData(T *global_data, 
const Shape &shape, sycl::handler &)
 
  118        this->global_data = global_data;
 
  128    void init(
const sycl::nd_item<_Dims> &)
 const 
  134        return _shape.size();
 
  137    T &operator[](
const sycl::id<Dims> &
id)
 const 
  139        return global_data[id];
 
  142    template <
typename = std::enable_if_t<Dims == 1>>
 
  143    T &operator[](
const size_t id)
 const 
  145        return global_data[id];
 
  149    T *global_data = 
nullptr;
 
 
  174    static constexpr bool const sync_after_init = 
true;
 
  175    static constexpr bool const sync_before_finalize = 
true;
 
  177    using LocalHist = sycl::local_accessor<localT, 2>;
 
  181                        int32_t copies_count,
 
  184        local_hist = LocalHist(sycl::range<2>(copies_count, bins_count), cgh);
 
  185        global_hist = global_data;
 
  189    void init(
const sycl::nd_item<_Dims> &item, localT val = 0)
 const 
  191        uint32_t llid = item.get_local_linear_id();
 
  192        auto *local_ptr = &local_hist[0][0];
 
  193        uint32_t size = local_hist.size();
 
  194        auto group = item.get_group();
 
  195        uint32_t local_size = group.get_local_linear_range();
 
  197        for (uint32_t i = llid; i < size; i += local_size) {
 
  203    void add(
const sycl::nd_item<_Dims> &item, int32_t bin, localT value)
 const 
  205        int32_t llid = item.get_local_linear_id();
 
  206        int32_t local_hist_count = local_hist.get_range().get(0);
 
  207        int32_t local_copy_id =
 
  208            local_hist_count == 1 ? 0 : llid % local_hist_count;
 
  210        AtomicOp<localT, sycl::memory_order::relaxed,
 
  211                 sycl::memory_scope::work_group>::add(local_hist[local_copy_id]
 
  217    void finalize(
const sycl::nd_item<_Dims> &item)
 const 
  219        uint32_t llid = item.get_local_linear_id();
 
  220        uint32_t bins_count = local_hist.get_range().get(1);
 
  221        uint32_t local_hist_count = local_hist.get_range().get(0);
 
  222        auto group = item.get_group();
 
  223        uint32_t local_size = group.get_local_linear_range();
 
  225        for (uint32_t i = llid; i < bins_count; i += local_size) {
 
  226            auto value = local_hist[0][i];
 
  227            for (uint32_t lhc = 1; lhc < local_hist_count; ++lhc) {
 
  228                value += local_hist[lhc][i];
 
  231                AtomicOp<T, sycl::memory_order::relaxed,
 
  232                         sycl::memory_scope::device>::add(global_hist[i],
 
  238    uint32_t size()
 const 
  240        return local_hist.size();
 
  244    LocalHist local_hist;
 
  245    T *global_hist = 
nullptr;
 
 
  251    static constexpr bool const sync_after_init = 
false;
 
  252    static constexpr bool const sync_before_finalize = 
false;
 
  256        global_hist = global_data;
 
  260    void init(
const sycl::nd_item<_Dims> &)
 const 
  265    void add(
const sycl::nd_item<_Dims> &, int32_t bin, T value)
 const 
  267        AtomicOp<T, sycl::memory_order::relaxed,
 
  268                 sycl::memory_scope::device>::add(global_hist[bin], value);
 
  272    void finalize(
const sycl::nd_item<_Dims> &)
 const 
  277    T *global_hist = 
nullptr;