DPNP C++ backend kernel library 0.20.0dev0
Data Parallel Extension for NumPy*
Loading...
Searching...
No Matches
common_helpers.hpp
1//*****************************************************************************
2// Copyright (c) 2023, Intel Corporation
3// All rights reserved.
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are met:
7// - Redistributions of source code must retain the above copyright notice,
8// this list of conditions and the following disclaimer.
9// - Redistributions in binary form must reproduce the above copyright notice,
10// this list of conditions and the following disclaimer in the documentation
11// and/or other materials provided with the distribution.
12// - Neither the name of the copyright holder nor the names of its contributors
13// may be used to endorse or promote products derived from this software
14// without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
20// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26// THE POSSIBILITY OF SUCH DAMAGE.
27//*****************************************************************************
28
29#pragma once
30
31#include <stdexcept>
32
33#include <pybind11/pybind11.h>
34#include <sycl/sycl.hpp>
35
36#include <complex>
37#include <cstring>
38#include <stdexcept>
39
40// dpctl tensor headers
41#include "utils/sycl_alloc_utils.hpp"
42
43namespace dpnp::extensions::lapack::helper
44{
45namespace py = pybind11;
46
47template <typename T>
49{
50 using type = T;
51};
52
53template <typename T>
54struct value_type_of<std::complex<T>>
55{
56 using type = T;
57};
58
59// Rounds up the number `value` to the nearest multiple of `mult`.
60template <typename intT>
61inline intT round_up_mult(intT value, intT mult)
62{
63 intT q = (value + (mult - 1)) / mult;
64 return q * mult;
65}
66
67// Checks if the shape array has any non-zero dimension.
68inline bool check_zeros_shape(int ndim, const py::ssize_t *shape)
69{
70 size_t src_nelems(1);
71
72 for (int i = 0; i < ndim; ++i) {
73 src_nelems *= static_cast<size_t>(shape[i]);
74 }
75 return src_nelems == 0;
76}
77
78// Allocate the memory for the pivot indices
79inline std::int64_t *alloc_ipiv(const std::int64_t n, sycl::queue &exec_q)
80{
81 std::int64_t *ipiv = nullptr;
82
83 try {
84 ipiv = sycl::malloc_device<std::int64_t>(n, exec_q);
85 if (!ipiv) {
86 throw std::runtime_error("Device allocation for ipiv failed");
87 }
88 } catch (sycl::exception const &e) {
89 if (ipiv != nullptr)
90 dpctl::tensor::alloc_utils::sycl_free_noexcept(ipiv, exec_q);
91 throw std::runtime_error(
92 std::string(
93 "Unexpected SYCL exception caught during ipiv allocation: ") +
94 e.what());
95 }
96
97 return ipiv;
98}
99
100// Allocate the total memory for the total pivot indices with proper alignment
101// for batch implementations
102template <typename T>
103inline std::int64_t *alloc_ipiv_batch(const std::int64_t n,
104 std::int64_t n_linear_streams,
105 sycl::queue &exec_q)
106{
107 // Get padding size to ensure memory allocations are aligned to 256 bytes
108 // for better performance
109 const std::int64_t padding = 256 / sizeof(T);
110
111 // Calculate the total size needed for the pivot indices array for all
112 // linear streams with proper alignment
113 size_t alloc_ipiv_size = round_up_mult(n_linear_streams * n, padding);
114
115 return alloc_ipiv(alloc_ipiv_size, exec_q);
116}
117
118// Allocate the memory for the scratchpad
119template <typename T>
120inline T *alloc_scratchpad(std::int64_t scratchpad_size, sycl::queue &exec_q)
121{
122 T *scratchpad = nullptr;
123
124 try {
125 if (scratchpad_size > 0) {
126 scratchpad = sycl::malloc_device<T>(scratchpad_size, exec_q);
127 if (!scratchpad) {
128 throw std::runtime_error(
129 "Device allocation for scratchpad failed");
130 }
131 }
132 } catch (sycl::exception const &e) {
133 if (scratchpad != nullptr) {
134 dpctl::tensor::alloc_utils::sycl_free_noexcept(scratchpad, exec_q);
135 }
136 throw std::runtime_error(std::string("Unexpected SYCL exception caught "
137 "during scratchpad allocation: ") +
138 e.what());
139 }
140
141 return scratchpad;
142}
143
144// Allocate the total scratchpad memory with proper alignment for batch
145// implementations
146template <typename T>
147inline T *alloc_scratchpad_batch(std::int64_t scratchpad_size,
148 std::int64_t n_linear_streams,
149 sycl::queue &exec_q)
150{
151 // Get padding size to ensure memory allocations are aligned to 256 bytes
152 // for better performance
153 const std::int64_t padding = 256 / sizeof(T);
154
155 // Calculate the total scratchpad memory size needed for all linear
156 // streams with proper alignment
157 const size_t alloc_scratch_size =
158 round_up_mult(n_linear_streams * scratchpad_size, padding);
159
160 return alloc_scratchpad<T>(alloc_scratch_size, exec_q);
161}
162} // namespace dpnp::extensions::lapack::helper