DPNP C++ backend kernel library 0.18.0dev0
Data Parallel Extension for NumPy*
Loading...
Searching...
No Matches
i0.hpp
1//*****************************************************************************
2// Copyright (c) 2024-2025, Intel Corporation
3// All rights reserved.
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are met:
7// - Redistributions of source code must retain the above copyright notice,
8// this list of conditions and the following disclaimer.
9// - Redistributions in binary form must reproduce the above copyright notice,
10// this list of conditions and the following disclaimer in the documentation
11// and/or other materials provided with the distribution.
12//
13// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
14// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
17// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
19// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
23// THE POSSIBILITY OF SUCH DAMAGE.
24//*****************************************************************************
25
26#pragma once
27
28#include <sycl/sycl.hpp>
29
34#ifndef __SYCL_COMPILER_BESSEL_I0_SUPPORT
35#define __SYCL_COMPILER_BESSEL_I0_SUPPORT 20241208L
36#endif
37
38#if __SYCL_COMPILER_VERSION >= __SYCL_COMPILER_BESSEL_I0_SUPPORT
39#include <sycl/ext/intel/math.hpp>
40#endif
41
42namespace dpnp::kernels::i0
43{
48namespace impl
49{
57template <typename Tp>
58inline Tp cyl_bessel_ij_0_series(const Tp x, const unsigned int max_iter)
59{
60 const Tp x2 = x / Tp(2);
61 const Tp fact = sycl::exp(-sycl::lgamma(Tp(1)));
62
63 const Tp xx4 = x2 * x2;
64 Tp Jn = Tp(1);
65 Tp term = Tp(1);
66 constexpr Tp eps = std::numeric_limits<Tp>::epsilon();
67
68 for (unsigned int i = 1; i < max_iter; ++i) {
69 term *= xx4 / (Tp(i) * Tp(i));
70 Jn += term;
71 if (sycl::fabs(term / Jn) < eps) {
72 break;
73 }
74 }
75 return fact * Jn;
76}
77
84template <typename Tp>
85inline Tp bessel_ik_0(Tp x)
86{
87 constexpr Tp eps = std::numeric_limits<Tp>::epsilon();
88 constexpr Tp fp_min = Tp(10) * eps;
89 constexpr int max_iter = 15000;
90 constexpr Tp x_min = Tp(2);
91
92 const Tp mu = Tp(0);
93 const Tp mu2 = mu * mu;
94 const Tp xi = Tp(1) / x;
95 const Tp xi2 = Tp(2) * xi;
96 Tp h = fp_min;
97
98 Tp b = Tp(0);
99 Tp d = Tp(0);
100 Tp c = h;
101 int i;
102 for (i = 1; i <= max_iter; ++i) {
103 b += xi2;
104 d = Tp(1) / (b + d);
105 c = b + Tp(1) / c;
106
107 const Tp del = c * d;
108 h *= del;
109 if (sycl::fabs(del - Tp(1)) < eps) {
110 break;
111 }
112 }
113 if (i > max_iter) {
114 // argument `x` is too large
115 return std::numeric_limits<Tp>::infinity();
116 }
117
118 Tp Inul = fp_min;
119 const Tp Inul1 = Inul;
120 const Tp Ipnul = h * Inul;
121
122 constexpr Tp pi = static_cast<Tp>(3.1415926535897932384626433832795029L);
123 Tp f = Ipnul / Inul;
124 Tp Kmu, Knu1;
125 if (x < x_min) {
126 const Tp x2 = x / Tp(2);
127 const Tp pimu = pi * mu;
128 const Tp fact =
129 (sycl::fabs(pimu) < eps ? Tp(1) : pimu / sycl::sin(pimu));
130
131 Tp d = -sycl::log(x2);
132 Tp e = mu * d;
133 const Tp fact2 = (sycl::fabs(e) < eps ? Tp(1) : sycl::sinh(e) / e);
134
135 // compute the gamma functions required by the Temme series expansions
136 constexpr Tp gam1 =
137 -static_cast<Tp>(0.5772156649015328606065120900824024L);
138 const Tp gam2 = Tp(1) / sycl::tgamma(Tp(1));
139
140 Tp ff = fact * (gam1 * sycl::cosh(e) + gam2 * fact2 * d);
141 Tp sum = ff;
142 e = sycl::exp(e);
143
144 Tp p = e / (Tp(2) * gam2);
145 Tp q = Tp(1) / (Tp(2) * e * gam2);
146 Tp c = Tp(1);
147 d = x2 * x2;
148 Tp sum1 = p;
149 int i;
150 for (i = 1; i <= max_iter; ++i) {
151 ff = (i * ff + p + q) / (i * i - mu2);
152 c *= d / i;
153 p /= i - mu;
154 q /= i + mu;
155 const Tp del = c * ff;
156 sum += del;
157 const Tp __del1 = c * (p - i * ff);
158 sum1 += __del1;
159 if (sycl::fabs(del) < eps * sycl::fabs(sum)) {
160 break;
161 }
162 }
163 if (i > max_iter) {
164 // Bessel k series failed to converge
165 return std::numeric_limits<Tp>::quiet_NaN();
166 }
167 Kmu = sum;
168 Knu1 = sum1 * xi2;
169 }
170 else {
171 Tp b = Tp(2) * (Tp(1) + x);
172 Tp d = Tp(1) / b;
173 Tp delh = d;
174 Tp h = delh;
175 Tp q1 = Tp(0);
176 Tp q2 = Tp(1);
177 Tp a1 = Tp(0.25L) - mu2;
178 Tp q = c = a1;
179 Tp a = -a1;
180 Tp s = Tp(1) + q * delh;
181 int i;
182 for (i = 2; i <= max_iter; ++i) {
183 a -= 2 * (i - 1);
184 c = -a * c / i;
185 const Tp qnew = (q1 - b * q2) / a;
186 q1 = q2;
187 q2 = qnew;
188 q += c * qnew;
189 b += Tp(2);
190 d = Tp(1) / (b + a * d);
191 delh = (b * d - Tp(1)) * delh;
192 h += delh;
193 const Tp dels = q * delh;
194 s += dels;
195 if (sycl::fabs(dels / s) < eps) {
196 break;
197 }
198 }
199 if (i > max_iter) {
200 // Steed's method failed
201 return std::numeric_limits<Tp>::quiet_NaN();
202 }
203 h = a1 * h;
204 Kmu = sycl::sqrt(pi / (Tp(2) * x)) * sycl::exp(-x) / s;
205 Knu1 = Kmu * (mu + x + Tp(0.5L) - h) * xi;
206 }
207
208 Tp Kpmu = mu * xi * Kmu - Knu1;
209 Tp Inumu = xi / (f * Kmu - Kpmu);
210 return Inumu * Inul1 / Inul;
211}
212
219template <typename Tp>
220inline Tp cyl_bessel_i0(Tp x)
221{
222 if (sycl::isnan(x)) {
223 return std::numeric_limits<Tp>::quiet_NaN();
224 }
225
226 if (sycl::isinf(x)) {
227 // return +inf per any input infinity
228 return std::numeric_limits<Tp>::infinity();
229 }
230
231 if (x == Tp(0)) {
232 return Tp(1);
233 }
234
235 if (x * x < Tp(10)) {
236 return cyl_bessel_ij_0_series<Tp>(x, 200);
237 }
238 return bessel_ik_0(sycl::fabs(x));
239}
240} // namespace impl
241
242template <typename argT, typename resT>
244{
245 // is function constant for given argT
246 using is_constant = typename std::false_type;
247 // constant value, if constant
248 // constexpr resT constant_value = resT{};
249 // is function defined for sycl::vec
250 using supports_vec = typename std::false_type;
251 // do both argT and resT support subgroup store/load operation
252 using supports_sg_loadstore = typename std::true_type;
253
254 resT operator()(const argT &x) const
255 {
256#if __SYCL_COMPILER_VERSION >= __SYCL_COMPILER_BESSEL_I0_SUPPORT
257 using sycl::ext::intel::math::cyl_bessel_i0;
258#else
260#endif
261
262 if constexpr (std::is_same_v<resT, sycl::half>) {
263 return static_cast<resT>(cyl_bessel_i0<float>(float(x)));
264 }
265 else {
266 return cyl_bessel_i0(x);
267 }
268 }
269};
270} // namespace dpnp::kernels::i0
Tp cyl_bessel_i0(Tp x)
Return the regular modified Bessel function of order 0.
Definition i0.hpp:220
Tp cyl_bessel_ij_0_series(const Tp x, const unsigned int max_iter)
This routine returns the cylindrical Bessel functions of order 0 by series expansion.
Definition i0.hpp:58
Tp bessel_ik_0(Tp x)
Compute the modified Bessel functions.
Definition i0.hpp:85