49 uint64_t *equal_count;
50 uint64_t *greater_equal_count;
55 less_count = sycl::malloc_device<uint64_t>(1, queue);
56 equal_count = sycl::malloc_device<uint64_t>(1, queue);
57 greater_equal_count = sycl::malloc_device<uint64_t>(1, queue);
58 nan_count = sycl::malloc_device<uint64_t>(1, queue);
61 void cleanup(sycl::queue &queue)
63 sycl::free(less_count, queue);
64 sycl::free(equal_count, queue);
65 sycl::free(greater_equal_count, queue);
66 sycl::free(nan_count, queue);
87 State(sycl::queue &queue,
size_t _n, T *values_buff)
88 : counters(queue), iteration_counters(queue)
90 stop = sycl::malloc_device<bool>(1, queue);
91 target_found = sycl::malloc_device<bool>(1, queue);
92 left = sycl::malloc_device<bool>(1, queue);
94 pivot = sycl::malloc_device<T>(1, queue);
97 num_elems = sycl::malloc_device<size_t>(1, queue);
102 sycl::event init(sycl::queue &queue,
const std::vector<sycl::event> &deps)
105 queue.fill<uint64_t>(counters.less_count, 0, 1, deps);
106 fill_e = queue.fill<uint64_t>(counters.equal_count, 0, 1, {fill_e});
108 queue.fill<uint64_t>(counters.greater_equal_count, n, 1, {fill_e});
109 fill_e = queue.fill<uint64_t>(counters.nan_count, 0, 1, {fill_e});
110 fill_e = queue.fill<uint64_t>(num_elems, 0, 1, {fill_e});
111 fill_e = queue.fill<
bool>(stop,
false, 1, {fill_e});
112 fill_e = queue.fill<
bool>(target_found,
false, 1, {fill_e});
113 fill_e = queue.fill<
bool>(left,
false, 1, {fill_e});
114 fill_e = queue.fill<T>(pivot, 0, 1, {fill_e});
119 void update_counters()
const
122 counters.less_count[0] -= iteration_counters.greater_equal_count[0];
123 counters.greater_equal_count[0] +=
124 iteration_counters.greater_equal_count[0];
127 counters.less_count[0] += iteration_counters.less_count[0];
128 counters.greater_equal_count[0] -= iteration_counters.less_count[0];
130 counters.equal_count[0] = iteration_counters.equal_count[0];
131 counters.nan_count[0] += iteration_counters.nan_count[0];
134 void reset_iteration_counters()
const
136 iteration_counters.less_count[0] = 0;
137 iteration_counters.equal_count[0] = 0;
138 iteration_counters.greater_equal_count[0] = 0;
139 iteration_counters.nan_count[0] = 0;
142 void cleanup(sycl::queue &queue)
144 counters.cleanup(queue);
145 iteration_counters.cleanup(queue);
147 sycl::free(stop, queue);
148 sycl::free(target_found, queue);
149 sycl::free(left, queue);
151 sycl::free(num_elems, queue);
152 sycl::free(pivot, queue);
170 : iteration_counters(state.iteration_counters)
175 num_elems = state.num_elems;
181 sycl::event init(sycl::queue &queue,
const std::vector<sycl::event> &deps)
184 queue.fill<uint64_t>(iteration_counters.less_count, n, 1, deps);
185 fill_e = queue.fill<uint64_t>(iteration_counters.equal_count, 0, 1,
187 fill_e = queue.fill<uint64_t>(iteration_counters.greater_equal_count, 0,
190 queue.fill<uint64_t>(iteration_counters.nan_count, 0, 1, {fill_e});
216 return run_partition_one_pivot_gpu<T>(exec_q, in, out, state, deps,
223 return run_partition_one_pivot_cpu<T, WorkPI>(exec_q, in, out, state,