DPNP C++ backend kernel library 0.18.0dev0
Data Parallel Extension for NumPy*
Loading...
Searching...
No Matches
dpnp_iterator.hpp
1//*****************************************************************************
2// Copyright (c) 2016-2025, Intel Corporation
3// All rights reserved.
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are met:
7// - Redistributions of source code must retain the above copyright notice,
8// this list of conditions and the following disclaimer.
9// - Redistributions in binary form must reproduce the above copyright notice,
10// this list of conditions and the following disclaimer in the documentation
11// and/or other materials provided with the distribution.
12//
13// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
14// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
17// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
19// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
23// THE POSSIBILITY OF SUCH DAMAGE.
24//*****************************************************************************
25
26#pragma once
27#ifndef DPNP_ITERATOR_H // Cython compatibility
28#define DPNP_ITERATOR_H
29
30#include <algorithm>
31#include <cassert>
32#include <iostream>
33#include <iterator>
34#include <numeric>
35#include <vector>
36
37#include <dpnp_utils.hpp>
38
48template <typename _Tp>
50{
51public:
52 using value_type = _Tp;
53 using difference_type = std::ptrdiff_t;
54 using iterator_category = std::random_access_iterator_tag;
55 using pointer = value_type *;
56 using reference = value_type &;
57 using size_type = shape_elem_type;
58
59 DPNP_USM_iterator(pointer __base_ptr,
60 size_type __id,
61 const size_type *__shape_stride = nullptr,
62 const size_type *__axes_stride = nullptr,
63 size_type __shape_size = 0)
64 : base(__base_ptr), iter_id(__id), iteration_shape_size(__shape_size),
65 iteration_shape_strides(__shape_stride),
66 axes_shape_strides(__axes_stride)
67 {
68 }
69
70 DPNP_USM_iterator() = delete;
71
72 inline reference operator*() const
73 {
74 return *ptr();
75 }
76
77 inline pointer operator->() const
78 {
79 return ptr();
80 }
81
84 {
85 ++iter_id;
86
87 return *this;
88 }
89
92 {
93 DPNP_USM_iterator tmp = *this;
94 ++(*this); // call prefix increment
95
96 return tmp;
97 }
98
99 inline bool operator==(const DPNP_USM_iterator &__rhs) const
100 {
101 assert(base == __rhs.base); // iterators are incomparable if base
102 // pointers are different
103 return (iter_id == __rhs.iter_id);
104 };
105
106 inline bool operator!=(const DPNP_USM_iterator &__rhs) const
107 {
108 return !(*this == __rhs);
109 };
110
111 inline bool operator<(const DPNP_USM_iterator &__rhs) const
112 {
113 return iter_id < __rhs.iter_id;
114 };
115
116 // TODO need more operators
117
118 // Random access iterator requirements
119 inline reference operator[](size_type __n) const
120 {
121 return *ptr(__n);
122 }
123
124 inline difference_type operator-(const DPNP_USM_iterator &__rhs) const
125 {
126 difference_type diff =
127 difference_type(iter_id) - difference_type(__rhs.iter_id);
128
129 return diff;
130 }
131
133 friend std::ostream &operator<<(std::ostream &__out,
134 const DPNP_USM_iterator &__it)
135 {
136 const std::vector<size_type> it_strides(__it.iteration_shape_strides,
137 __it.iteration_shape_strides +
138 __it.iteration_shape_size);
139 const std::vector<size_type> it_axes_strides(
140 __it.axes_shape_strides,
141 __it.axes_shape_strides + __it.iteration_shape_size);
142
143 __out << "DPNP_USM_iterator(base=" << __it.base;
144 __out << ", iter_id=" << __it.iter_id;
145 __out << ", iteration_shape_size=" << __it.iteration_shape_size;
146 __out << ", iteration_shape_strides=" << it_strides;
147 __out << ", axes_shape_strides=" << it_axes_strides;
148 __out << ")";
149
150 return __out;
151 }
152
153private:
154 inline pointer ptr() const
155 {
156 return ptr(iter_id);
157 }
158
159 inline pointer ptr(size_type iteration_id) const
160 {
161 size_type offset = 0;
162
163 if (iteration_shape_size > 0) {
164 long reminder = iteration_id;
165 for (size_t it = 0; it < static_cast<size_t>(iteration_shape_size);
166 ++it) {
167 const size_type axis_val = iteration_shape_strides[it];
168 size_type xyz_id = reminder / axis_val;
169 offset += (xyz_id * axes_shape_strides[it]);
170
171 reminder = reminder % axis_val;
172 }
173 }
174 else {
175 offset = iteration_id;
176 }
177
178 return base + offset;
179 }
180
181 const pointer base = nullptr;
182 size_type iter_id =
183 size_type{};
184 const size_type iteration_shape_size =
185 size_type{};
187 const size_type *iteration_shape_strides = nullptr;
188 const size_type *axes_shape_strides = nullptr;
189};
190
199template <typename _Tp>
200class DPNPC_id final
201{
202public:
203 using value_type = _Tp;
205 using pointer = value_type *;
206 using reference = value_type &;
207 using size_type = shape_elem_type;
208
209 DPNPC_id(DPCTLSyclQueueRef q_ref,
210 pointer __ptr,
211 const size_type *__shape,
212 const size_type __shape_size)
213 {
214 queue_ref = q_ref;
215 std::vector<size_type> shape(__shape, __shape + __shape_size);
216 init_container(__ptr, shape);
217 }
218
219 DPNPC_id(DPCTLSyclQueueRef q_ref,
220 pointer __ptr,
221 const size_type *__shape,
222 const size_type *__strides,
223 const size_type __ndim)
224 {
225 queue_ref = q_ref;
226 std::vector<size_type> shape(__shape, __shape + __ndim);
227 std::vector<size_type> strides(__strides, __strides + __ndim);
228 init_container(__ptr, shape, strides);
229 }
230
246 DPNPC_id(DPCTLSyclQueueRef q_ref,
247 pointer __ptr,
248 const std::vector<size_type> &__shape)
249 {
250 queue_ref = q_ref;
251 init_container(__ptr, __shape);
252 }
253
269 DPNPC_id(pointer __ptr,
270 const std::vector<size_type> &__shape,
271 const std::vector<size_type> &__strides)
272 {
273 init_container(__ptr, __shape, __strides);
274 }
275
276 DPNPC_id() = delete;
277
278 ~DPNPC_id()
279 {
280 free_memory();
281 }
282
284 inline size_type get_output_size() const
285 {
286 return output_size;
287 }
288
289 inline void broadcast_to_shape(const size_type *__shape,
290 const size_type __shape_size)
291 {
292 std::vector<size_type> shape(__shape, __shape + __shape_size);
293 broadcast_to_shape(shape);
294 }
295
306 inline void broadcast_to_shape(const std::vector<size_type> &__shape)
307 {
308 if (axis_use) {
309 return;
310 }
311
312 if (broadcastable(input_shape, input_shape_size, __shape)) {
313 free_broadcast_axes_memory();
314 free_output_memory();
315
316 std::vector<size_type> valid_axes;
317 broadcast_use = true;
318
319 output_shape_size = __shape.size();
320 const size_type output_shape_size_in_bytes =
321 output_shape_size * sizeof(size_type);
322 output_shape = reinterpret_cast<size_type *>(
323 dpnp_memory_alloc_c(queue_ref, output_shape_size_in_bytes));
324
325 for (int irit = input_shape_size - 1, orit = output_shape_size - 1;
326 orit >= 0; --irit, --orit)
327 {
328 output_shape[orit] = __shape[orit];
329
330 // ex: input_shape = {7, 1, 5}, output_shape = {8, 7, 6, 5} =>
331 // valid_axes = {0, 2}
332 if (irit < 0 || input_shape[irit] != output_shape[orit]) {
333 valid_axes.insert(valid_axes.begin(), orit);
334 }
335 }
336
337 broadcast_axes_size = valid_axes.size();
338 const size_type broadcast_axes_size_in_bytes =
339 broadcast_axes_size * sizeof(size_type);
340 broadcast_axes = reinterpret_cast<size_type *>(
341 dpnp_memory_alloc_c(queue_ref, broadcast_axes_size_in_bytes));
342 std::copy(valid_axes.begin(), valid_axes.end(), broadcast_axes);
343
344 output_size =
345 std::accumulate(output_shape, output_shape + output_shape_size,
346 size_type(1), std::multiplies<size_type>());
347
348 output_shape_strides = reinterpret_cast<size_type *>(
349 dpnp_memory_alloc_c(queue_ref, output_shape_size_in_bytes));
351 output_shape, output_shape_size, output_shape_strides);
352
353 iteration_size = 1;
354 }
355 }
356
374 inline void set_axis(shape_elem_type __axis)
375 {
376 set_axes({__axis});
377 }
378
379 inline void set_axes(const shape_elem_type *__axes, const size_t axes_ndim)
380 {
381 const std::vector<shape_elem_type> axes_vec(__axes, __axes + axes_ndim);
382 set_axes(axes_vec);
383 }
384
402 inline void set_axes(const std::vector<shape_elem_type> &__axes)
403 {
404 if (broadcast_use) {
405 return;
406 }
407
408 if (!__axes.empty() && input_shape_size) {
409 free_axes_memory();
410 free_iteration_memory();
411 free_output_memory();
412
413 axes = get_validated_axes(__axes, input_shape_size);
414 axis_use = true;
415
416 output_shape_size = input_shape_size - axes.size();
417 const size_type output_shape_size_in_bytes =
418 output_shape_size * sizeof(size_type);
419
420 iteration_shape_size = axes.size();
421 const size_type iteration_shape_size_in_bytes =
422 iteration_shape_size * sizeof(size_type);
423 std::vector<size_type> iteration_shape;
424
425 output_shape = reinterpret_cast<size_type *>(
426 dpnp_memory_alloc_c(queue_ref, output_shape_size_in_bytes));
427 size_type *output_shape_it = output_shape;
428 for (size_type i = 0; i < input_shape_size; ++i) {
429 if (std::find(axes.begin(), axes.end(), i) == axes.end()) {
430 *output_shape_it = input_shape[i];
431 ++output_shape_it;
432 }
433 }
434
435 output_size =
436 std::accumulate(output_shape, output_shape + output_shape_size,
437 size_type(1), std::multiplies<size_type>());
438
439 output_shape_strides = reinterpret_cast<size_type *>(
440 dpnp_memory_alloc_c(queue_ref, output_shape_size_in_bytes));
442 output_shape, output_shape_size, output_shape_strides);
443
444 iteration_size = 1;
445 iteration_shape.reserve(iteration_shape_size);
446 for (const auto &axis : axes) {
447 const size_type axis_dim = input_shape[axis];
448 iteration_shape.push_back(axis_dim);
449 iteration_size *= axis_dim;
450 }
451
452 iteration_shape_strides = reinterpret_cast<size_type *>(
453 dpnp_memory_alloc_c(queue_ref, iteration_shape_size_in_bytes));
454 get_shape_offsets_inkernel<size_type>(iteration_shape.data(),
455 iteration_shape.size(),
456 iteration_shape_strides);
457
458 axes_shape_strides = reinterpret_cast<size_type *>(
459 dpnp_memory_alloc_c(queue_ref, iteration_shape_size_in_bytes));
460 for (size_t i = 0; i < static_cast<size_t>(iteration_shape_size);
461 ++i) {
462 axes_shape_strides[i] = input_shape_strides[axes[i]];
463 }
464 }
465 }
466
468 inline iterator begin(size_type output_global_id = 0) const
469 {
470 return iterator(data + get_input_begin_offset(output_global_id), 0,
471 iteration_shape_strides, axes_shape_strides,
472 iteration_shape_size);
473 }
474
476 inline iterator end(size_type output_global_id = 0) const
477 {
478 // TODO it is better to get begin() iterator as a parameter
479
480 return iterator(data + get_input_begin_offset(output_global_id),
481 get_iteration_size(), iteration_shape_strides,
482 axes_shape_strides, iteration_shape_size);
483 }
484
486 inline reference operator[](size_type __n) const
487 {
488 if (broadcast_use) {
489 return *begin(__n);
490 }
491
492 const iterator it = begin();
493 return it[__n];
494 }
495
496private:
497 void init_container(pointer __ptr, const std::vector<size_type> &__shape)
498 {
499 // TODO needs to address negative values in __shape with exception
500 if ((__ptr == nullptr) && __shape.empty()) {
501 return;
502 }
503
504 if (__ptr != nullptr) {
505 data = __ptr;
506 input_size = 1; // means scalar at this stage
507 output_size = 1; // if input size is not zero it means we have
508 // scalar as output
509 iteration_size = 1;
510 }
511
512 if (!__shape.empty()) {
513 input_size =
514 std::accumulate(__shape.begin(), __shape.end(), size_type(1),
515 std::multiplies<size_type>());
516 if (input_size == 0)
517 { // shape might be shape[3, 4, 0, 6]. This means no input memory
518 // and no output expected
519 output_size = 0; // depends on axes. zero at this stage only
520 }
521
522 input_shape_size = __shape.size();
523 input_shape = reinterpret_cast<size_type *>(dpnp_memory_alloc_c(
524 queue_ref, input_shape_size * sizeof(size_type)));
525 std::copy(__shape.begin(), __shape.end(), input_shape);
526
527 input_shape_strides =
528 reinterpret_cast<size_type *>(dpnp_memory_alloc_c(
529 queue_ref, input_shape_size * sizeof(size_type)));
530 get_shape_offsets_inkernel<size_type>(input_shape, input_shape_size,
531 input_shape_strides);
532 }
533 iteration_size = input_size;
534 }
535
536 void init_container(pointer __ptr,
537 const std::vector<size_type> &__shape,
538 const std::vector<size_type> &__strides)
539 {
540 // TODO needs to address negative values in __shape with exception
541 if ((__ptr == nullptr) && __shape.empty()) {
542 return;
543 }
544
545 if (__ptr != nullptr) {
546 data = __ptr;
547 input_size = 1; // means scalar at this stage
548 output_size = 1; // if input size is not zero it means we have
549 // scalar as output
550 iteration_size = 1;
551 }
552
553 if (!__shape.empty()) {
554 input_size =
555 std::accumulate(__shape.begin(), __shape.end(), size_type(1),
556 std::multiplies<size_type>());
557 if (input_size == 0)
558 { // shape might be shape[3, 4, 0, 6]. This means no input memory
559 // and no output expected
560 output_size = 0; // depends on axes. zero at this stage only
561 }
562
563 input_shape_size = __shape.size();
564 input_shape = reinterpret_cast<size_type *>(dpnp_memory_alloc_c(
565 queue_ref, input_shape_size * sizeof(size_type)));
566 std::copy(__shape.begin(), __shape.end(), input_shape);
567
568 input_shape_strides =
569 reinterpret_cast<size_type *>(dpnp_memory_alloc_c(
570 queue_ref, input_shape_size * sizeof(size_type)));
571 std::copy(__strides.begin(), __strides.end(), input_shape_strides);
572 }
573 iteration_size = input_size;
574 }
575
577 size_type get_input_begin_offset(size_type output_global_id) const
578 {
579 size_type input_global_id = 0;
580 if (axis_use) {
581 assert(output_global_id < output_size);
582
583 for (size_t iit = 0, oit = 0;
584 iit < static_cast<size_t>(input_shape_size); ++iit)
585 {
586 if (std::find(axes.begin(), axes.end(), iit) == axes.end()) {
587 const size_type output_xyz_id = get_xyz_id_by_id_inkernel(
588 output_global_id, output_shape_strides,
589 output_shape_size, oit);
590 input_global_id +=
591 (output_xyz_id * input_shape_strides[iit]);
592 ++oit;
593 }
594 }
595 }
596 else if (broadcast_use) {
597 assert(output_global_id < output_size);
598 assert(input_shape_size <= output_shape_size);
599
600 for (int irit = input_shape_size - 1, orit = output_shape_size - 1;
601 irit >= 0; --irit, --orit)
602 {
603 size_type *broadcast_axes_end =
604 broadcast_axes + broadcast_axes_size;
605 if (std::find(broadcast_axes, broadcast_axes_end, orit) ==
606 broadcast_axes_end) {
607 const size_type output_xyz_id = get_xyz_id_by_id_inkernel(
608 output_global_id, output_shape_strides,
609 output_shape_size, orit);
610 input_global_id +=
611 (output_xyz_id * input_shape_strides[irit]);
612 }
613 }
614 }
615
616 return input_global_id;
617 }
618
620 size_type get_iteration_size() const
621 {
622 return iteration_size;
623 }
624
625 void free_axes_memory()
626 {
627 axes.clear();
628 dpnp_memory_free_c(queue_ref, axes_shape_strides);
629 axes_shape_strides = nullptr;
630 }
631
632 void free_broadcast_axes_memory()
633 {
634 broadcast_axes_size = size_type{};
635 dpnp_memory_free_c(queue_ref, broadcast_axes);
636 broadcast_axes = nullptr;
637 }
638
639 void free_input_memory()
640 {
641 input_size = size_type{};
642 input_shape_size = size_type{};
643 dpnp_memory_free_c(queue_ref, input_shape);
644 dpnp_memory_free_c(queue_ref, input_shape_strides);
645 input_shape = nullptr;
646 input_shape_strides = nullptr;
647 }
648
649 void free_iteration_memory()
650 {
651 iteration_size = size_type{};
652 iteration_shape_size = size_type{};
653 dpnp_memory_free_c(queue_ref, iteration_shape_strides);
654 iteration_shape_strides = nullptr;
655 }
656
657 void free_output_memory()
658 {
659 output_size = size_type{};
660 output_shape_size = size_type{};
661 dpnp_memory_free_c(queue_ref, output_shape);
662 dpnp_memory_free_c(queue_ref, output_shape_strides);
663 output_shape = nullptr;
664 output_shape_strides = nullptr;
665 }
666
667 void free_memory()
668 {
669 free_axes_memory();
670 free_broadcast_axes_memory();
671 free_input_memory();
672 free_iteration_memory();
673 free_output_memory();
674 }
675
676 DPCTLSyclQueueRef queue_ref = nullptr;
678 pointer data = nullptr;
679 size_type input_size = size_type{};
680 size_type *input_shape = nullptr;
681 size_type input_shape_size = size_type{};
682 size_type *input_shape_strides =
683 nullptr;
685 std::vector<size_type> axes;
686 bool axis_use = false;
687
688 size_type *broadcast_axes = nullptr;
689 size_type broadcast_axes_size =
690 size_type{};
691 bool broadcast_use = false;
692
693 size_type output_size =
694 size_type{};
695 size_type *output_shape = nullptr;
696 size_type output_shape_size = size_type{};
697 size_type *output_shape_strides =
698 nullptr;
700 size_type iteration_size =
701 size_type{};
702 size_type iteration_shape_size = size_type{};
703 size_type *iteration_shape_strides = nullptr;
704 size_type *axes_shape_strides = nullptr;
705};
706
707#endif // DPNP_ITERATOR_H
Iterator for DPNPC_id type.
DPNP_USM_iterator operator++(int)
postfix increment
DPNP_USM_iterator & operator++()
prefix increment
friend std::ostream & operator<<(std::ostream &__out, const DPNP_USM_iterator &__it)
Print this container in human readable form in error reporting.
Type to keep USM array pointers used in kernels.
iterator begin(size_type output_global_id=0) const
this function is designed for SYCL environment execution
reference operator[](size_type __n) const
this function is designed for SYCL environment execution
size_type get_output_size() const
this function return number of elements in output
iterator end(size_type output_global_id=0) const
this function is designed for SYCL environment execution
char * dpnp_memory_alloc_c(DPCTLSyclQueueRef q_ref, size_t size_in_bytes)
SYCL queue memory allocation.
DPNPC_id(DPCTLSyclQueueRef q_ref, pointer __ptr, const std::vector< size_type > &__shape)
Main container for reduction iterator.
void get_shape_offsets_inkernel(const _DataType *shape, size_t shape_size, _DataType *offsets)
Shape offset calculation used in kernels.
void broadcast_to_shape(const std::vector< size_type > &__shape)
Broadcast input data to specified shape.
void set_axes(const std::vector< shape_elem_type > &__axes)
Set axes for the data object to use in computation.
_DataType get_xyz_id_by_id_inkernel(size_t global_id, const _DataType *offsets, size_t offsets_size, size_t axis)
Calculate xyz id for given axis from linear index.
void set_axis(shape_elem_type __axis)
Set axis for the data object to use in computation.
DPNPC_id(pointer __ptr, const std::vector< size_type > &__shape, const std::vector< size_type > &__strides)
Main container for reduction/broadcasting iterator.