DPNP C++ backend kernel library 0.18.0dev0
Data Parallel Extension for NumPy*
Loading...
Searching...
No Matches
common_helpers.hpp
1//*****************************************************************************
2// Copyright (c) 2023-2025, Intel Corporation
3// All rights reserved.
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are met:
7// - Redistributions of source code must retain the above copyright notice,
8// this list of conditions and the following disclaimer.
9// - Redistributions in binary form must reproduce the above copyright notice,
10// this list of conditions and the following disclaimer in the documentation
11// and/or other materials provided with the distribution.
12//
13// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
14// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
17// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
19// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
23// THE POSSIBILITY OF SUCH DAMAGE.
24//*****************************************************************************
25
26#pragma once
27
28#include <stdexcept>
29
30#include <pybind11/pybind11.h>
31#include <sycl/sycl.hpp>
32
33#include <complex>
34#include <cstring>
35#include <stdexcept>
36
37// dpctl tensor headers
38#include "utils/sycl_alloc_utils.hpp"
39
40namespace dpnp::extensions::lapack::helper
41{
42namespace py = pybind11;
43
44template <typename T>
46{
47 using type = T;
48};
49
50template <typename T>
51struct value_type_of<std::complex<T>>
52{
53 using type = T;
54};
55
56// Rounds up the number `value` to the nearest multiple of `mult`.
57template <typename intT>
58inline intT round_up_mult(intT value, intT mult)
59{
60 intT q = (value + (mult - 1)) / mult;
61 return q * mult;
62}
63
64// Checks if the shape array has any non-zero dimension.
65inline bool check_zeros_shape(int ndim, const py::ssize_t *shape)
66{
67 size_t src_nelems(1);
68
69 for (int i = 0; i < ndim; ++i) {
70 src_nelems *= static_cast<size_t>(shape[i]);
71 }
72 return src_nelems == 0;
73}
74
75// Allocate the memory for the pivot indices
76inline std::int64_t *alloc_ipiv(const std::int64_t n, sycl::queue &exec_q)
77{
78 std::int64_t *ipiv = nullptr;
79
80 try {
81 ipiv = sycl::malloc_device<std::int64_t>(n, exec_q);
82 if (!ipiv) {
83 throw std::runtime_error("Device allocation for ipiv failed");
84 }
85 } catch (sycl::exception const &e) {
86 if (ipiv != nullptr)
87 dpctl::tensor::alloc_utils::sycl_free_noexcept(ipiv, exec_q);
88 throw std::runtime_error(
89 std::string(
90 "Unexpected SYCL exception caught during ipiv allocation: ") +
91 e.what());
92 }
93
94 return ipiv;
95}
96
97// Allocate the total memory for the total pivot indices with proper alignment
98// for batch implementations
99template <typename T>
100inline std::int64_t *alloc_ipiv_batch(const std::int64_t n,
101 std::int64_t n_linear_streams,
102 sycl::queue &exec_q)
103{
104 // Get padding size to ensure memory allocations are aligned to 256 bytes
105 // for better performance
106 const std::int64_t padding = 256 / sizeof(T);
107
108 // Calculate the total size needed for the pivot indices array for all
109 // linear streams with proper alignment
110 size_t alloc_ipiv_size = round_up_mult(n_linear_streams * n, padding);
111
112 return alloc_ipiv(alloc_ipiv_size, exec_q);
113}
114
115// Allocate the memory for the scratchpad
116template <typename T>
117inline T *alloc_scratchpad(std::int64_t scratchpad_size, sycl::queue &exec_q)
118{
119 T *scratchpad = nullptr;
120
121 try {
122 if (scratchpad_size > 0) {
123 scratchpad = sycl::malloc_device<T>(scratchpad_size, exec_q);
124 if (!scratchpad) {
125 throw std::runtime_error(
126 "Device allocation for scratchpad failed");
127 }
128 }
129 } catch (sycl::exception const &e) {
130 if (scratchpad != nullptr) {
131 dpctl::tensor::alloc_utils::sycl_free_noexcept(scratchpad, exec_q);
132 }
133 throw std::runtime_error(std::string("Unexpected SYCL exception caught "
134 "during scratchpad allocation: ") +
135 e.what());
136 }
137
138 return scratchpad;
139}
140
141// Allocate the total scratchpad memory with proper alignment for batch
142// implementations
143template <typename T>
144inline T *alloc_scratchpad_batch(std::int64_t scratchpad_size,
145 std::int64_t n_linear_streams,
146 sycl::queue &exec_q)
147{
148 // Get padding size to ensure memory allocations are aligned to 256 bytes
149 // for better performance
150 const std::int64_t padding = 256 / sizeof(T);
151
152 // Calculate the total scratchpad memory size needed for all linear
153 // streams with proper alignment
154 const size_t alloc_scratch_size =
155 round_up_mult(n_linear_streams * scratchpad_size, padding);
156
157 return alloc_scratchpad<T>(alloc_scratch_size, exec_q);
158}
159} // namespace dpnp::extensions::lapack::helper