DPNP C++ backend kernel library 0.18.0dev2
Data Parallel Extension for NumPy*
Loading...
Searching...
No Matches
i0.hpp
1//*****************************************************************************
2// Copyright (c) 2024-2025, Intel Corporation
3// All rights reserved.
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are met:
7// - Redistributions of source code must retain the above copyright notice,
8// this list of conditions and the following disclaimer.
9// - Redistributions in binary form must reproduce the above copyright notice,
10// this list of conditions and the following disclaimer in the documentation
11// and/or other materials provided with the distribution.
12//
13// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
14// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
17// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
19// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
23// THE POSSIBILITY OF SUCH DAMAGE.
24//*****************************************************************************
25
26#pragma once
27
28#include <sycl/sycl.hpp>
29
34#ifndef __SYCL_COMPILER_BESSEL_I0_SUPPORT
35#define __SYCL_COMPILER_BESSEL_I0_SUPPORT 20241208L
36#endif
37
38// Include <sycl/ext/intel/math.hpp> only when targeting Intel devices.
39// This header relies on intel-specific types like _iml_half_internal,
40// which are not supported on non-intel backends (e.g., CUDA, AMD)
41#if defined(__SPIR__) && defined(__INTEL_LLVM_COMPILER) && \
42 (__SYCL_COMPILER_VERSION >= __SYCL_COMPILER_BESSEL_I0_SUPPORT)
43#include <sycl/ext/intel/math.hpp>
44#endif
45
46namespace dpnp::kernels::i0
47{
52namespace impl
53{
61template <typename Tp>
62inline Tp cyl_bessel_ij_0_series(const Tp x, const unsigned int max_iter)
63{
64 const Tp x2 = x / Tp(2);
65 const Tp fact = sycl::exp(-sycl::lgamma(Tp(1)));
66
67 const Tp xx4 = x2 * x2;
68 Tp Jn = Tp(1);
69 Tp term = Tp(1);
70 constexpr Tp eps = std::numeric_limits<Tp>::epsilon();
71
72 for (unsigned int i = 1; i < max_iter; ++i) {
73 term *= xx4 / (Tp(i) * Tp(i));
74 Jn += term;
75 if (sycl::fabs(term / Jn) < eps) {
76 break;
77 }
78 }
79 return fact * Jn;
80}
81
88template <typename Tp>
89inline Tp bessel_ik_0(Tp x)
90{
91 constexpr Tp eps = std::numeric_limits<Tp>::epsilon();
92 constexpr Tp fp_min = Tp(10) * eps;
93 constexpr int max_iter = 15000;
94 constexpr Tp x_min = Tp(2);
95
96 const Tp mu = Tp(0);
97 const Tp mu2 = mu * mu;
98 const Tp xi = Tp(1) / x;
99 const Tp xi2 = Tp(2) * xi;
100 Tp h = fp_min;
101
102 Tp b = Tp(0);
103 Tp d = Tp(0);
104 Tp c = h;
105 int i;
106 for (i = 1; i <= max_iter; ++i) {
107 b += xi2;
108 d = Tp(1) / (b + d);
109 c = b + Tp(1) / c;
110
111 const Tp del = c * d;
112 h *= del;
113 if (sycl::fabs(del - Tp(1)) < eps) {
114 break;
115 }
116 }
117 if (i > max_iter) {
118 // argument `x` is too large
119 return std::numeric_limits<Tp>::infinity();
120 }
121
122 Tp Inul = fp_min;
123 const Tp Inul1 = Inul;
124 const Tp Ipnul = h * Inul;
125
126 constexpr Tp pi = static_cast<Tp>(3.1415926535897932384626433832795029L);
127 Tp f = Ipnul / Inul;
128 Tp Kmu, Knu1;
129 if (x < x_min) {
130 const Tp x2 = x / Tp(2);
131 const Tp pimu = pi * mu;
132 const Tp fact =
133 (sycl::fabs(pimu) < eps ? Tp(1) : pimu / sycl::sin(pimu));
134
135 Tp d = -sycl::log(x2);
136 Tp e = mu * d;
137 const Tp fact2 = (sycl::fabs(e) < eps ? Tp(1) : sycl::sinh(e) / e);
138
139 // compute the gamma functions required by the Temme series expansions
140 constexpr Tp gam1 =
141 -static_cast<Tp>(0.5772156649015328606065120900824024L);
142 const Tp gam2 = Tp(1) / sycl::tgamma(Tp(1));
143
144 Tp ff = fact * (gam1 * sycl::cosh(e) + gam2 * fact2 * d);
145 Tp sum = ff;
146 e = sycl::exp(e);
147
148 Tp p = e / (Tp(2) * gam2);
149 Tp q = Tp(1) / (Tp(2) * e * gam2);
150 Tp c = Tp(1);
151 d = x2 * x2;
152 Tp sum1 = p;
153 int i;
154 for (i = 1; i <= max_iter; ++i) {
155 ff = (i * ff + p + q) / (i * i - mu2);
156 c *= d / i;
157 p /= i - mu;
158 q /= i + mu;
159 const Tp del = c * ff;
160 sum += del;
161 const Tp __del1 = c * (p - i * ff);
162 sum1 += __del1;
163 if (sycl::fabs(del) < eps * sycl::fabs(sum)) {
164 break;
165 }
166 }
167 if (i > max_iter) {
168 // Bessel k series failed to converge
169 return std::numeric_limits<Tp>::quiet_NaN();
170 }
171 Kmu = sum;
172 Knu1 = sum1 * xi2;
173 }
174 else {
175 Tp b = Tp(2) * (Tp(1) + x);
176 Tp d = Tp(1) / b;
177 Tp delh = d;
178 Tp h = delh;
179 Tp q1 = Tp(0);
180 Tp q2 = Tp(1);
181 Tp a1 = Tp(0.25L) - mu2;
182 Tp q = c = a1;
183 Tp a = -a1;
184 Tp s = Tp(1) + q * delh;
185 int i;
186 for (i = 2; i <= max_iter; ++i) {
187 a -= 2 * (i - 1);
188 c = -a * c / i;
189 const Tp qnew = (q1 - b * q2) / a;
190 q1 = q2;
191 q2 = qnew;
192 q += c * qnew;
193 b += Tp(2);
194 d = Tp(1) / (b + a * d);
195 delh = (b * d - Tp(1)) * delh;
196 h += delh;
197 const Tp dels = q * delh;
198 s += dels;
199 if (sycl::fabs(dels / s) < eps) {
200 break;
201 }
202 }
203 if (i > max_iter) {
204 // Steed's method failed
205 return std::numeric_limits<Tp>::quiet_NaN();
206 }
207 h = a1 * h;
208 Kmu = sycl::sqrt(pi / (Tp(2) * x)) * sycl::exp(-x) / s;
209 Knu1 = Kmu * (mu + x + Tp(0.5L) - h) * xi;
210 }
211
212 Tp Kpmu = mu * xi * Kmu - Knu1;
213 Tp Inumu = xi / (f * Kmu - Kpmu);
214 return Inumu * Inul1 / Inul;
215}
216
223template <typename Tp>
224inline Tp cyl_bessel_i0(Tp x)
225{
226 if (sycl::isnan(x)) {
227 return std::numeric_limits<Tp>::quiet_NaN();
228 }
229
230 if (sycl::isinf(x)) {
231 // return +inf per any input infinity
232 return std::numeric_limits<Tp>::infinity();
233 }
234
235 if (x == Tp(0)) {
236 return Tp(1);
237 }
238
239 if (x * x < Tp(10)) {
240 return cyl_bessel_ij_0_series<Tp>(x, 200);
241 }
242 return bessel_ik_0(sycl::fabs(x));
243}
244} // namespace impl
245
246template <typename argT, typename resT>
248{
249 // is function constant for given argT
250 using is_constant = typename std::false_type;
251 // constant value, if constant
252 // constexpr resT constant_value = resT{};
253 // is function defined for sycl::vec
254 using supports_vec = typename std::false_type;
255 // do both argT and resT support subgroup store/load operation
256 using supports_sg_loadstore = typename std::true_type;
257
258 resT operator()(const argT &x) const
259 {
260#if defined(__SPIR__) && defined(__INTEL_LLVM_COMPILER) && \
261 (__SYCL_COMPILER_VERSION >= __SYCL_COMPILER_BESSEL_I0_SUPPORT)
262 using sycl::ext::intel::math::cyl_bessel_i0;
263#else
265#endif
266
267 if constexpr (std::is_same_v<resT, sycl::half>) {
268 return static_cast<resT>(cyl_bessel_i0<float>(float(x)));
269 }
270 else {
271 return cyl_bessel_i0(x);
272 }
273 }
274};
275} // namespace dpnp::kernels::i0
Tp cyl_bessel_i0(Tp x)
Return the regular modified Bessel function of order 0.
Definition i0.hpp:224
Tp cyl_bessel_ij_0_series(const Tp x, const unsigned int max_iter)
This routine returns the cylindrical Bessel functions of order 0 by series expansion.
Definition i0.hpp:62
Tp bessel_ik_0(Tp x)
Compute the modified Bessel functions.
Definition i0.hpp:89