DPNP C++ backend kernel library 0.20.0dev0
Data Parallel Extension for NumPy*
Loading...
Searching...
No Matches
i0.hpp
1//*****************************************************************************
2// Copyright (c) 2024, Intel Corporation
3// All rights reserved.
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are met:
7// - Redistributions of source code must retain the above copyright notice,
8// this list of conditions and the following disclaimer.
9// - Redistributions in binary form must reproduce the above copyright notice,
10// this list of conditions and the following disclaimer in the documentation
11// and/or other materials provided with the distribution.
12// - Neither the name of the copyright holder nor the names of its contributors
13// may be used to endorse or promote products derived from this software
14// without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
20// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26// THE POSSIBILITY OF SUCH DAMAGE.
27//*****************************************************************************
28
29#pragma once
30
31#include <sycl/sycl.hpp>
32
37#ifndef __SYCL_COMPILER_BESSEL_I0_SUPPORT
38#define __SYCL_COMPILER_BESSEL_I0_SUPPORT 20241208L
39#endif
40
46#if defined(__SPIR__) && defined(__INTEL_LLVM_COMPILER)
47#define __SYCL_EXT_INTEL_MATH_SUPPORT
48#endif
49
50#if defined(__SYCL_EXT_INTEL_MATH_SUPPORT) && \
51 (__SYCL_COMPILER_VERSION >= __SYCL_COMPILER_BESSEL_I0_SUPPORT)
52#include <sycl/ext/intel/math.hpp>
53#endif
54
56{
57#if defined(__SYCL_EXT_INTEL_MATH_SUPPORT) && \
58 (__SYCL_COMPILER_VERSION >= __SYCL_COMPILER_BESSEL_I0_SUPPORT)
59using sycl::ext::intel::math::cyl_bessel_i0;
60
61#else
62
67namespace impl
68{
76template <typename Tp>
77inline Tp cyl_bessel_ij_0_series(const Tp x, const unsigned int max_iter)
78{
79 const Tp x2 = x / Tp(2);
80 const Tp fact = sycl::exp(-sycl::lgamma(Tp(1)));
81
82 const Tp xx4 = x2 * x2;
83 Tp Jn = Tp(1);
84 Tp term = Tp(1);
85 constexpr Tp eps = std::numeric_limits<Tp>::epsilon();
86
87 for (unsigned int i = 1; i < max_iter; ++i) {
88 term *= xx4 / (Tp(i) * Tp(i));
89 Jn += term;
90 if (sycl::fabs(term / Jn) < eps) {
91 break;
92 }
93 }
94 return fact * Jn;
95}
96
103template <typename Tp>
104inline Tp bessel_ik_0(Tp x)
105{
106 constexpr Tp eps = std::numeric_limits<Tp>::epsilon();
107 constexpr Tp fp_min = Tp(10) * eps;
108 constexpr int max_iter = 15000;
109 constexpr Tp x_min = Tp(2);
110
111 const Tp mu = Tp(0);
112 const Tp mu2 = mu * mu;
113 const Tp xi = Tp(1) / x;
114 const Tp xi2 = Tp(2) * xi;
115 Tp h = fp_min;
116
117 Tp b = Tp(0);
118 Tp d = Tp(0);
119 Tp c = h;
120 int i;
121 for (i = 1; i <= max_iter; ++i) {
122 b += xi2;
123 d = Tp(1) / (b + d);
124 c = b + Tp(1) / c;
125
126 const Tp del = c * d;
127 h *= del;
128 if (sycl::fabs(del - Tp(1)) < eps) {
129 break;
130 }
131 }
132 if (i > max_iter) {
133 // argument `x` is too large
134 return std::numeric_limits<Tp>::infinity();
135 }
136
137 Tp Inul = fp_min;
138 const Tp Inul1 = Inul;
139 const Tp Ipnul = h * Inul;
140
141 constexpr Tp pi = static_cast<Tp>(3.1415926535897932384626433832795029L);
142 Tp f = Ipnul / Inul;
143 Tp Kmu, Knu1;
144 if (x < x_min) {
145 const Tp x2 = x / Tp(2);
146 const Tp pimu = pi * mu;
147 const Tp fact =
148 (sycl::fabs(pimu) < eps ? Tp(1) : pimu / sycl::sin(pimu));
149
150 Tp d = -sycl::log(x2);
151 Tp e = mu * d;
152 const Tp fact2 = (sycl::fabs(e) < eps ? Tp(1) : sycl::sinh(e) / e);
153
154 // compute the gamma functions required by the Temme series expansions
155 constexpr Tp gam1 =
156 -static_cast<Tp>(0.5772156649015328606065120900824024L);
157 const Tp gam2 = Tp(1) / sycl::tgamma(Tp(1));
158
159 Tp ff = fact * (gam1 * sycl::cosh(e) + gam2 * fact2 * d);
160 Tp sum = ff;
161 e = sycl::exp(e);
162
163 Tp p = e / (Tp(2) * gam2);
164 Tp q = Tp(1) / (Tp(2) * e * gam2);
165 Tp c = Tp(1);
166 d = x2 * x2;
167 Tp sum1 = p;
168 int i;
169 for (i = 1; i <= max_iter; ++i) {
170 ff = (i * ff + p + q) / (i * i - mu2);
171 c *= d / i;
172 p /= i - mu;
173 q /= i + mu;
174 const Tp del = c * ff;
175 sum += del;
176 const Tp __del1 = c * (p - i * ff);
177 sum1 += __del1;
178 if (sycl::fabs(del) < eps * sycl::fabs(sum)) {
179 break;
180 }
181 }
182 if (i > max_iter) {
183 // Bessel k series failed to converge
184 return std::numeric_limits<Tp>::quiet_NaN();
185 }
186 Kmu = sum;
187 Knu1 = sum1 * xi2;
188 }
189 else {
190 Tp b = Tp(2) * (Tp(1) + x);
191 Tp d = Tp(1) / b;
192 Tp delh = d;
193 Tp h = delh;
194 Tp q1 = Tp(0);
195 Tp q2 = Tp(1);
196 Tp a1 = Tp(0.25L) - mu2;
197 Tp q = c = a1;
198 Tp a = -a1;
199 Tp s = Tp(1) + q * delh;
200 int i;
201 for (i = 2; i <= max_iter; ++i) {
202 a -= 2 * (i - 1);
203 c = -a * c / i;
204 const Tp qnew = (q1 - b * q2) / a;
205 q1 = q2;
206 q2 = qnew;
207 q += c * qnew;
208 b += Tp(2);
209 d = Tp(1) / (b + a * d);
210 delh = (b * d - Tp(1)) * delh;
211 h += delh;
212 const Tp dels = q * delh;
213 s += dels;
214 if (sycl::fabs(dels / s) < eps) {
215 break;
216 }
217 }
218 if (i > max_iter) {
219 // Steed's method failed
220 return std::numeric_limits<Tp>::quiet_NaN();
221 }
222 h = a1 * h;
223 Kmu = sycl::sqrt(pi / (Tp(2) * x)) * sycl::exp(-x) / s;
224 Knu1 = Kmu * (mu + x + Tp(0.5L) - h) * xi;
225 }
226
227 Tp Kpmu = mu * xi * Kmu - Knu1;
228 Tp Inumu = xi / (f * Kmu - Kpmu);
229 return Inumu * Inul1 / Inul;
230}
231
238template <typename Tp>
239inline Tp cyl_bessel_i0(Tp x)
240{
241 if (sycl::isnan(x)) {
242 return std::numeric_limits<Tp>::quiet_NaN();
243 }
244
245 if (sycl::isinf(x)) {
246 // return +inf per any input infinity
247 return std::numeric_limits<Tp>::infinity();
248 }
249
250 if (x == Tp(0)) {
251 return Tp(1);
252 }
253
254 if (x * x < Tp(10)) {
255 return cyl_bessel_ij_0_series<Tp>(x, 200);
256 }
257 return bessel_ik_0(sycl::fabs(x));
258}
259} // namespace impl
260
262
263#endif
264
265template <typename argT, typename resT>
267{
268 // is function constant for given argT
269 using is_constant = typename std::false_type;
270 // constant value, if constant
271 // constexpr resT constant_value = resT{};
272 // is function defined for sycl::vec
273 using supports_vec = typename std::false_type;
274 // do both argT and resT support subgroup store/load operation
275 using supports_sg_loadstore = typename std::true_type;
276
277 resT operator()(const argT &x) const
278 {
279 if constexpr (std::is_same_v<resT, sycl::half>) {
280 return static_cast<resT>(cyl_bessel_i0<float>(float(x)));
281 }
282 else {
283 return cyl_bessel_i0(x);
284 }
285 }
286};
287} // namespace dpnp::kernels::i0
Tp cyl_bessel_i0(Tp x)
Return the regular modified Bessel function of order 0.
Definition i0.hpp:239
Tp cyl_bessel_ij_0_series(const Tp x, const unsigned int max_iter)
This routine returns the cylindrical Bessel functions of order 0 by series expansion.
Definition i0.hpp:77
Tp bessel_ik_0(Tp x)
Compute the modified Bessel functions.
Definition i0.hpp:104