49 using ncT =
typename std::remove_const_t<T>;
50 using SizeT =
decltype(Size);
51 static constexpr SizeT _size = Size;
54 : sbgroup(item.get_sub_group())
58 template <
typename yT>
59 T &operator[](
const yT &idx)
61 static_assert(std::is_integral_v<yT>,
62 "idx must be of an integral type");
66 template <
typename yT>
67 const T &operator[](
const yT &idx)
const
69 static_assert(std::is_integral_v<yT>,
70 "idx must be of an integral type");
76 static_assert(Size == 1,
77 "Size is not equal to 1. Use value(idx) instead");
81 const T &value()
const
83 static_assert(Size == 1,
84 "Size is not equal to 1. Use value(idx) instead");
88 template <
typename yT,
typename xT>
89 T broadcast(
const yT &y,
const xT &x)
const
91 static_assert(std::is_integral_v<std::remove_reference_t<yT>>,
92 "y must be of an integral type");
93 static_assert(std::is_integral_v<std::remove_reference_t<xT>>,
94 "x must be of an integral type");
96 return sycl::select_from_group(sbgroup, data[y], x);
99 template <
typename iT>
100 T broadcast(
const iT &idx)
const
102 if constexpr (Size == 1) {
103 return broadcast(0, idx);
106 return broadcast(idx / size_x(), idx % size_x());
110 template <
typename yT,
typename xT>
111 T shift_left(
const yT &y,
const xT &x)
const
113 static_assert(std::is_integral_v<yT>,
"y must be of an integral type");
114 static_assert(std::is_integral_v<xT>,
"x must be of an integral type");
116 return sycl::shift_group_left(sbgroup, data[y], x);
119 template <
typename yT,
typename xT>
120 T shift_right(
const yT &y,
const xT &x)
const
122 static_assert(std::is_integral_v<yT>,
"y must be of an integral type");
123 static_assert(std::is_integral_v<xT>,
"x must be of an integral type");
125 return sycl::shift_group_right(sbgroup, data[y], x);
128 constexpr SizeT size_y()
const {
return _size; }
130 SizeT size_x()
const {
return sbgroup.get_max_local_range()[0]; }
132 SizeT total_size()
const {
return size_x() * size_y(); }
134 ncT *ptr() {
return data; }
136 SizeT x()
const {
return sbgroup.get_local_linear_id(); }
139 const sycl::sub_group sbgroup;
146 using SizeT =
typename _RegistryDataStorage<T, Size>::SizeT;
150 template <
typename LaneIdT,
152 typename = std::enable_if_t<
153 std::is_invocable_r_v<bool, Condition, SizeT>>>
154 void fill_lane(
const LaneIdT &lane_id,
const T &value, Condition &&mask)
156 static_assert(std::is_integral_v<LaneIdT>,
157 "lane_id must be of an integral type");
158 if (mask(this->x())) {
159 this->data[lane_id] = value;
163 template <
typename LaneIdT>
164 void fill_lane(
const LaneIdT &lane_id,
const T &value,
const bool &mask)
166 fill_lane(lane_id, value, [mask](
auto &&) {
return mask; });
169 template <
typename LaneIdT>
170 void fill_lane(
const LaneIdT &lane_id,
const T &value)
172 fill_lane(lane_id, value,
true);
175 template <
typename Condition,
176 typename = std::enable_if_t<
177 std::is_invocable_r_v<bool, Condition, SizeT, SizeT>>>
178 void fill(
const T &value, Condition &&mask)
180 for (SizeT i = 0; i < Size; ++i) {
181 fill_lane(i, value, mask(i, this->x()));
185 void fill(
const T &value)
187 fill(value, [](
auto &&,
auto &&) {
return true; });
190 template <
typename LaneIdT,
192 typename = std::enable_if_t<
193 std::is_invocable_r_v<bool, Condition, const T *const>>>
194 T *load_lane(
const LaneIdT &lane_id,
199 static_assert(std::is_integral_v<LaneIdT>,
200 "lane_id must be of an integral type");
201 this->data[lane_id] = mask(data) ? data[0] : default_v;
203 return data + this->size_x();
206 template <
typename LaneIdT>
207 T *load_lane(
const LaneIdT &laned_id,
213 laned_id, data, [mask](
auto &&) {
return mask; }, default_v);
216 template <
typename LaneIdT>
217 T *load_lane(
const LaneIdT &laned_id,
const T *
const data)
219 constexpr T default_v = 0;
220 return load_lane(laned_id, data,
true, default_v);
223 template <
typename yStrideT,
225 typename = std::enable_if_t<
226 std::is_invocable_r_v<bool, Condition, const T *const>>>
227 T *load(
const T *
const data,
228 const yStrideT &y_stride,
233 for (SizeT i = 0; i < Size; ++i) {
234 load_lane(i, it, mask, default_v);
241 template <
typename yStr
ideT>
242 T *load(
const T *
const data,
243 const yStrideT &y_stride,
248 data, y_stride, [mask](
auto &&) {
return mask; }, default_v);
251 template <
typename Condition,
252 typename = std::enable_if_t<
253 std::is_invocable_r_v<bool, Condition, const T *const>>>
254 T *load(
const T *
const data, Condition &&mask,
const T &default_v)
256 return load(data, this->size_x(), mask, default_v);
259 T *load(
const T *
const data,
const bool &mask,
const T &default_v)
261 return load(data, [mask](
auto &&) {
return mask; }, default_v);
264 T *load(
const T *
const data)
266 constexpr T default_v = 0;
267 return load(data,
true, default_v);
270 template <
typename LaneIdT,
272 typename = std::enable_if_t<
273 std::is_invocable_r_v<bool, Condition, const T *const>>>
274 T *store_lane(
const LaneIdT &lane_id, T *
const data, Condition &&mask)
276 static_assert(std::is_integral_v<LaneIdT>,
277 "lane_id must be of an integral type");
280 data[0] = this->data[lane_id];
283 return data + this->size_x();
286 template <
typename LaneIdT>
287 T *store_lane(
const LaneIdT &lane_id, T *
const data,
const bool &mask)
289 return store_lane(lane_id, data, [mask](
auto &&) {
return mask; });
292 template <
typename LaneIdT>
293 T *store_lane(
const LaneIdT &lane_id, T *
const data)
295 return store_lane(lane_id, data,
true);
298 template <
typename yStrideT,
300 typename = std::enable_if_t<
301 std::is_invocable_r_v<bool, Condition, const T *const>>>
302 T *store(T *
const data,
const yStrideT &y_stride, Condition &&condition)
305 for (SizeT i = 0; i < Size; ++i) {
306 store_lane(i, it, condition);
313 template <
typename yStr
ideT>
314 T *store(T *
const data,
const yStrideT &y_stride,
const bool &mask)
316 return store(data, y_stride, [mask](
auto &&) {
return mask; });
319 template <
typename Condition,
320 typename = std::enable_if_t<
321 std::is_invocable_r_v<bool, Condition, const T *const>>>
322 T *store(T *
const data, Condition &&condition)
324 return store(data, this->size_x(), condition);
327 T *store(T *
const data,
const bool &mask)
329 return store(data, [mask](
auto &&) {
return mask; });
332 T *store(T *
const data) {
return store(data,
true); }
338 using SizeT =
typename RegistryData<T, Size>::SizeT;
342 template <
typename shT>
343 void advance_left(
const shT &shift,
const T &fill_value)
345 static_assert(std::is_integral_v<shT>,
346 "shift must be of an integral type");
348 std::uint32_t shift_r = this->size_x() - shift;
349 for (SizeT i = 0; i < Size; ++i) {
350 this->data[i] = this->shift_left(i, shift);
352 i < Size - 1 ? this->shift_right(i + 1, shift_r) : fill_value;
353 if (this->x() >= shift_r) {
354 this->data[i] = border;
359 void advance_left(
const T &fill_value) { advance_left(1, fill_value); }
363 constexpr T fill_value = 0;
364 advance_left(fill_value);
438 WorkPI, PaddedSpan<const T, SizeT>, Span<const T, SizeT>, Op, Red,
460 WorkPI, PaddedSpan<const T, SizeT>, Span<const T, SizeT>, Op, Red,