49    static constexpr bool const sync_after_init = 
true;
 
   50    using Shape = sycl::range<Dims>;
 
   52    using pointer_type = value_type *;
 
   53    static constexpr auto dims = Dims;
 
   55    using ncT = 
typename std::remove_const<value_type>::type;
 
   56    using LocalData = sycl::local_accessor<ncT, Dims>;
 
   58    CachedData(T *global_data, Shape shape, sycl::handler &cgh)
 
   60        this->global_data = global_data;
 
   61        local_data = LocalData(shape, cgh);
 
   66        return &local_data[0];
 
   70    void init(
const sycl::nd_item<_Dims> &item)
 const 
   72        uint32_t llid = item.get_local_linear_id();
 
   73        auto local_ptr = &local_data[0];
 
   74        uint32_t size = local_data.size();
 
   75        auto group = item.get_group();
 
   76        uint32_t local_size = group.get_local_linear_range();
 
   78        for (uint32_t i = llid; i < size; i += local_size) {
 
   79            local_ptr[i] = global_data[i];
 
   85        return local_data.size();
 
   88    T &operator[](
const sycl::id<Dims> &
id)
 const 
   90        return local_data[id];
 
   93    template <
typename = std::enable_if_t<Dims == 1>>
 
   94    T &operator[](
const size_t id)
 const 
   96        return local_data[id];
 
  100    LocalData local_data;
 
  101    value_type *global_data = 
nullptr;
 
 
  107    static constexpr bool const sync_after_init = 
false;
 
  108    using Shape = sycl::range<Dims>;
 
  109    using value_type = T;
 
  110    using pointer_type = value_type *;
 
  111    static constexpr auto dims = Dims;
 
  113    UncachedData(T *global_data, 
const Shape &shape, sycl::handler &)
 
  115        this->global_data = global_data;
 
  125    void init(
const sycl::nd_item<_Dims> &)
 const 
  131        return _shape.size();
 
  134    T &operator[](
const sycl::id<Dims> &
id)
 const 
  136        return global_data[id];
 
  139    template <
typename = std::enable_if_t<Dims == 1>>
 
  140    T &operator[](
const size_t id)
 const 
  142        return global_data[id];
 
  146    T *global_data = 
nullptr;
 
 
  171    static constexpr bool const sync_after_init = 
true;
 
  172    static constexpr bool const sync_before_finalize = 
true;
 
  174    using LocalHist = sycl::local_accessor<localT, 2>;
 
  178                        int32_t copies_count,
 
  181        local_hist = LocalHist(sycl::range<2>(copies_count, bins_count), cgh);
 
  182        global_hist = global_data;
 
  186    void init(
const sycl::nd_item<_Dims> &item, localT val = 0)
 const 
  188        uint32_t llid = item.get_local_linear_id();
 
  189        auto *local_ptr = &local_hist[0][0];
 
  190        uint32_t size = local_hist.size();
 
  191        auto group = item.get_group();
 
  192        uint32_t local_size = group.get_local_linear_range();
 
  194        for (uint32_t i = llid; i < size; i += local_size) {
 
  200    void add(
const sycl::nd_item<_Dims> &item, int32_t bin, localT value)
 const 
  202        int32_t llid = item.get_local_linear_id();
 
  203        int32_t local_hist_count = local_hist.get_range().get(0);
 
  204        int32_t local_copy_id =
 
  205            local_hist_count == 1 ? 0 : llid % local_hist_count;
 
  207        AtomicOp<localT, sycl::memory_order::relaxed,
 
  208                 sycl::memory_scope::work_group>::add(local_hist[local_copy_id]
 
  214    void finalize(
const sycl::nd_item<_Dims> &item)
 const 
  216        uint32_t llid = item.get_local_linear_id();
 
  217        uint32_t bins_count = local_hist.get_range().get(1);
 
  218        uint32_t local_hist_count = local_hist.get_range().get(0);
 
  219        auto group = item.get_group();
 
  220        uint32_t local_size = group.get_local_linear_range();
 
  222        for (uint32_t i = llid; i < bins_count; i += local_size) {
 
  223            auto value = local_hist[0][i];
 
  224            for (uint32_t lhc = 1; lhc < local_hist_count; ++lhc) {
 
  225                value += local_hist[lhc][i];
 
  228                AtomicOp<T, sycl::memory_order::relaxed,
 
  229                         sycl::memory_scope::device>::add(global_hist[i],
 
  235    uint32_t size()
 const 
  237        return local_hist.size();
 
  241    LocalHist local_hist;
 
  242    T *global_hist = 
nullptr;
 
 
  248    static constexpr bool const sync_after_init = 
false;
 
  249    static constexpr bool const sync_before_finalize = 
false;
 
  253        global_hist = global_data;
 
  257    void init(
const sycl::nd_item<_Dims> &)
 const 
  262    void add(
const sycl::nd_item<_Dims> &, int32_t bin, T value)
 const 
  264        AtomicOp<T, sycl::memory_order::relaxed,
 
  265                 sycl::memory_scope::device>::add(global_hist[bin], value);
 
  269    void finalize(
const sycl::nd_item<_Dims> &)
 const 
  274    T *global_hist = 
nullptr;