DPNP C++ backend kernel library 0.18.0rc1
Data Parallel Extension for NumPy*
Loading...
Searching...
No Matches
i0.hpp
1//*****************************************************************************
2// Copyright (c) 2024-2025, Intel Corporation
3// All rights reserved.
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are met:
7// - Redistributions of source code must retain the above copyright notice,
8// this list of conditions and the following disclaimer.
9// - Redistributions in binary form must reproduce the above copyright notice,
10// this list of conditions and the following disclaimer in the documentation
11// and/or other materials provided with the distribution.
12//
13// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
14// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
17// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
19// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
23// THE POSSIBILITY OF SUCH DAMAGE.
24//*****************************************************************************
25
26#pragma once
27
28#include <sycl/sycl.hpp>
29
34#ifndef __SYCL_COMPILER_BESSEL_I0_SUPPORT
35#define __SYCL_COMPILER_BESSEL_I0_SUPPORT 20241208L
36#endif
37
43#if defined(__SPIR__) && defined(__INTEL_LLVM_COMPILER)
44#define __SYCL_EXT_INTEL_MATH_SUPPORT
45#endif
46
47#if defined(__SYCL_EXT_INTEL_MATH_SUPPORT) && \
48 (__SYCL_COMPILER_VERSION >= __SYCL_COMPILER_BESSEL_I0_SUPPORT)
49#include <sycl/ext/intel/math.hpp>
50#endif
51
53{
54#if defined(__SYCL_EXT_INTEL_MATH_SUPPORT) && \
55 (__SYCL_COMPILER_VERSION >= __SYCL_COMPILER_BESSEL_I0_SUPPORT)
56using sycl::ext::intel::math::cyl_bessel_i0;
57
58#else
59
64namespace impl
65{
73template <typename Tp>
74inline Tp cyl_bessel_ij_0_series(const Tp x, const unsigned int max_iter)
75{
76 const Tp x2 = x / Tp(2);
77 const Tp fact = sycl::exp(-sycl::lgamma(Tp(1)));
78
79 const Tp xx4 = x2 * x2;
80 Tp Jn = Tp(1);
81 Tp term = Tp(1);
82 constexpr Tp eps = std::numeric_limits<Tp>::epsilon();
83
84 for (unsigned int i = 1; i < max_iter; ++i) {
85 term *= xx4 / (Tp(i) * Tp(i));
86 Jn += term;
87 if (sycl::fabs(term / Jn) < eps) {
88 break;
89 }
90 }
91 return fact * Jn;
92}
93
100template <typename Tp>
101inline Tp bessel_ik_0(Tp x)
102{
103 constexpr Tp eps = std::numeric_limits<Tp>::epsilon();
104 constexpr Tp fp_min = Tp(10) * eps;
105 constexpr int max_iter = 15000;
106 constexpr Tp x_min = Tp(2);
107
108 const Tp mu = Tp(0);
109 const Tp mu2 = mu * mu;
110 const Tp xi = Tp(1) / x;
111 const Tp xi2 = Tp(2) * xi;
112 Tp h = fp_min;
113
114 Tp b = Tp(0);
115 Tp d = Tp(0);
116 Tp c = h;
117 int i;
118 for (i = 1; i <= max_iter; ++i) {
119 b += xi2;
120 d = Tp(1) / (b + d);
121 c = b + Tp(1) / c;
122
123 const Tp del = c * d;
124 h *= del;
125 if (sycl::fabs(del - Tp(1)) < eps) {
126 break;
127 }
128 }
129 if (i > max_iter) {
130 // argument `x` is too large
131 return std::numeric_limits<Tp>::infinity();
132 }
133
134 Tp Inul = fp_min;
135 const Tp Inul1 = Inul;
136 const Tp Ipnul = h * Inul;
137
138 constexpr Tp pi = static_cast<Tp>(3.1415926535897932384626433832795029L);
139 Tp f = Ipnul / Inul;
140 Tp Kmu, Knu1;
141 if (x < x_min) {
142 const Tp x2 = x / Tp(2);
143 const Tp pimu = pi * mu;
144 const Tp fact =
145 (sycl::fabs(pimu) < eps ? Tp(1) : pimu / sycl::sin(pimu));
146
147 Tp d = -sycl::log(x2);
148 Tp e = mu * d;
149 const Tp fact2 = (sycl::fabs(e) < eps ? Tp(1) : sycl::sinh(e) / e);
150
151 // compute the gamma functions required by the Temme series expansions
152 constexpr Tp gam1 =
153 -static_cast<Tp>(0.5772156649015328606065120900824024L);
154 const Tp gam2 = Tp(1) / sycl::tgamma(Tp(1));
155
156 Tp ff = fact * (gam1 * sycl::cosh(e) + gam2 * fact2 * d);
157 Tp sum = ff;
158 e = sycl::exp(e);
159
160 Tp p = e / (Tp(2) * gam2);
161 Tp q = Tp(1) / (Tp(2) * e * gam2);
162 Tp c = Tp(1);
163 d = x2 * x2;
164 Tp sum1 = p;
165 int i;
166 for (i = 1; i <= max_iter; ++i) {
167 ff = (i * ff + p + q) / (i * i - mu2);
168 c *= d / i;
169 p /= i - mu;
170 q /= i + mu;
171 const Tp del = c * ff;
172 sum += del;
173 const Tp __del1 = c * (p - i * ff);
174 sum1 += __del1;
175 if (sycl::fabs(del) < eps * sycl::fabs(sum)) {
176 break;
177 }
178 }
179 if (i > max_iter) {
180 // Bessel k series failed to converge
181 return std::numeric_limits<Tp>::quiet_NaN();
182 }
183 Kmu = sum;
184 Knu1 = sum1 * xi2;
185 }
186 else {
187 Tp b = Tp(2) * (Tp(1) + x);
188 Tp d = Tp(1) / b;
189 Tp delh = d;
190 Tp h = delh;
191 Tp q1 = Tp(0);
192 Tp q2 = Tp(1);
193 Tp a1 = Tp(0.25L) - mu2;
194 Tp q = c = a1;
195 Tp a = -a1;
196 Tp s = Tp(1) + q * delh;
197 int i;
198 for (i = 2; i <= max_iter; ++i) {
199 a -= 2 * (i - 1);
200 c = -a * c / i;
201 const Tp qnew = (q1 - b * q2) / a;
202 q1 = q2;
203 q2 = qnew;
204 q += c * qnew;
205 b += Tp(2);
206 d = Tp(1) / (b + a * d);
207 delh = (b * d - Tp(1)) * delh;
208 h += delh;
209 const Tp dels = q * delh;
210 s += dels;
211 if (sycl::fabs(dels / s) < eps) {
212 break;
213 }
214 }
215 if (i > max_iter) {
216 // Steed's method failed
217 return std::numeric_limits<Tp>::quiet_NaN();
218 }
219 h = a1 * h;
220 Kmu = sycl::sqrt(pi / (Tp(2) * x)) * sycl::exp(-x) / s;
221 Knu1 = Kmu * (mu + x + Tp(0.5L) - h) * xi;
222 }
223
224 Tp Kpmu = mu * xi * Kmu - Knu1;
225 Tp Inumu = xi / (f * Kmu - Kpmu);
226 return Inumu * Inul1 / Inul;
227}
228
235template <typename Tp>
236inline Tp cyl_bessel_i0(Tp x)
237{
238 if (sycl::isnan(x)) {
239 return std::numeric_limits<Tp>::quiet_NaN();
240 }
241
242 if (sycl::isinf(x)) {
243 // return +inf per any input infinity
244 return std::numeric_limits<Tp>::infinity();
245 }
246
247 if (x == Tp(0)) {
248 return Tp(1);
249 }
250
251 if (x * x < Tp(10)) {
252 return cyl_bessel_ij_0_series<Tp>(x, 200);
253 }
254 return bessel_ik_0(sycl::fabs(x));
255}
256} // namespace impl
257
259
260#endif
261
262template <typename argT, typename resT>
264{
265 // is function constant for given argT
266 using is_constant = typename std::false_type;
267 // constant value, if constant
268 // constexpr resT constant_value = resT{};
269 // is function defined for sycl::vec
270 using supports_vec = typename std::false_type;
271 // do both argT and resT support subgroup store/load operation
272 using supports_sg_loadstore = typename std::true_type;
273
274 resT operator()(const argT &x) const
275 {
276 if constexpr (std::is_same_v<resT, sycl::half>) {
277 return static_cast<resT>(cyl_bessel_i0<float>(float(x)));
278 }
279 else {
280 return cyl_bessel_i0(x);
281 }
282 }
283};
284} // namespace dpnp::kernels::i0
Tp cyl_bessel_i0(Tp x)
Return the regular modified Bessel function of order 0.
Definition i0.hpp:236
Tp cyl_bessel_ij_0_series(const Tp x, const unsigned int max_iter)
This routine returns the cylindrical Bessel functions of order 0 by series expansion.
Definition i0.hpp:74
Tp bessel_ik_0(Tp x)
Compute the modified Bessel functions.
Definition i0.hpp:101