1 // (C) Copyright Nick Thompson 2019.
2 // Use, modification and distribution are subject to the
3 // Boost Software License, Version 1.0. (See accompanying file
4 // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
5
6 #ifndef BOOST_MATH_DIFFERENTIATION_LANCZOS_SMOOTHING_HPP
7 #define BOOST_MATH_DIFFERENTIATION_LANCZOS_SMOOTHING_HPP
8 #include <cmath> // for std::abs
9 #include <limits> // to nan initialize
10 #include <vector>
11 #include <string>
12 #include <stdexcept>
13 #include <boost/assert.hpp>
14
15 namespace boost::math::differentiation {
16
17 namespace detail {
18 template <typename Real>
19 class discrete_legendre {
20 public:
discrete_legendre(std::size_t n,Real x)21 explicit discrete_legendre(std::size_t n, Real x) : m_n{n}, m_r{2}, m_x{x},
22 m_qrm2{1}, m_qrm1{x},
23 m_qrm2p{0}, m_qrm1p{1},
24 m_qrm2pp{0}, m_qrm1pp{0}
25 {
26 using std::abs;
27 BOOST_ASSERT_MSG(abs(m_x) <= 1, "Three term recurrence is stable only for |x| <=1.");
28 // The integer n indexes a family of discrete Legendre polynomials indexed by k <= 2*n
29 }
30
norm_sq(int r) const31 Real norm_sq(int r) const
32 {
33 Real prod = Real(2) / Real(2 * r + 1);
34 for (int k = -r; k <= r; ++k) {
35 prod *= Real(2 * m_n + 1 + k) / Real(2 * m_n);
36 }
37 return prod;
38 }
39
next()40 Real next()
41 {
42 Real N = 2 * m_n + 1;
43 Real num = (m_r - 1) * (N * N - (m_r - 1) * (m_r - 1)) * m_qrm2;
44 Real tmp = (2 * m_r - 1) * m_x * m_qrm1 - num / Real(4 * m_n * m_n);
45 m_qrm2 = m_qrm1;
46 m_qrm1 = tmp / m_r;
47 ++m_r;
48 return m_qrm1;
49 }
50
next_prime()51 Real next_prime()
52 {
53 Real N = 2 * m_n + 1;
54 Real s = (m_r - 1) * (N * N - (m_r - 1) * (m_r - 1)) / Real(4 * m_n * m_n);
55 Real tmp1 = ((2 * m_r - 1) * m_x * m_qrm1 - s * m_qrm2) / m_r;
56 Real tmp2 = ((2 * m_r - 1) * (m_qrm1 + m_x * m_qrm1p) - s * m_qrm2p) / m_r;
57 m_qrm2 = m_qrm1;
58 m_qrm1 = tmp1;
59 m_qrm2p = m_qrm1p;
60 m_qrm1p = tmp2;
61 ++m_r;
62 return m_qrm1p;
63 }
64
next_dbl_prime()65 Real next_dbl_prime()
66 {
67 Real N = 2*m_n + 1;
68 Real trm1 = 2*m_r - 1;
69 Real s = (m_r - 1) * (N * N - (m_r - 1) * (m_r - 1)) / Real(4 * m_n * m_n);
70 Real rqrpp = 2*trm1*m_qrm1p + trm1*m_x*m_qrm1pp - s*m_qrm2pp;
71 Real tmp1 = ((2 * m_r - 1) * m_x * m_qrm1 - s * m_qrm2) / m_r;
72 Real tmp2 = ((2 * m_r - 1) * (m_qrm1 + m_x * m_qrm1p) - s * m_qrm2p) / m_r;
73 m_qrm2 = m_qrm1;
74 m_qrm1 = tmp1;
75 m_qrm2p = m_qrm1p;
76 m_qrm1p = tmp2;
77 m_qrm2pp = m_qrm1pp;
78 m_qrm1pp = rqrpp/m_r;
79 ++m_r;
80 return m_qrm1pp;
81 }
82
operator ()(Real x,std::size_t k)83 Real operator()(Real x, std::size_t k)
84 {
85 BOOST_ASSERT_MSG(k <= 2 * m_n, "r <= 2n is required.");
86 if (k == 0)
87 {
88 return 1;
89 }
90 if (k == 1)
91 {
92 return x;
93 }
94 Real qrm2 = 1;
95 Real qrm1 = x;
96 Real N = 2 * m_n + 1;
97 for (std::size_t r = 2; r <= k; ++r) {
98 Real num = (r - 1) * (N * N - (r - 1) * (r - 1)) * qrm2;
99 Real tmp = (2 * r - 1) * x * qrm1 - num / Real(4 * m_n * m_n);
100 qrm2 = qrm1;
101 qrm1 = tmp / r;
102 }
103 return qrm1;
104 }
105
prime(Real x,std::size_t k)106 Real prime(Real x, std::size_t k) {
107 BOOST_ASSERT_MSG(k <= 2 * m_n, "r <= 2n is required.");
108 if (k == 0) {
109 return 0;
110 }
111 if (k == 1) {
112 return 1;
113 }
114 Real qrm2 = 1;
115 Real qrm1 = x;
116 Real qrm2p = 0;
117 Real qrm1p = 1;
118 Real N = 2 * m_n + 1;
119 for (std::size_t r = 2; r <= k; ++r) {
120 Real s =
121 (r - 1) * (N * N - (r - 1) * (r - 1)) / Real(4 * m_n * m_n);
122 Real tmp1 = ((2 * r - 1) * x * qrm1 - s * qrm2) / r;
123 Real tmp2 = ((2 * r - 1) * (qrm1 + x * qrm1p) - s * qrm2p) / r;
124 qrm2 = qrm1;
125 qrm1 = tmp1;
126 qrm2p = qrm1p;
127 qrm1p = tmp2;
128 }
129 return qrm1p;
130 }
131
132 private:
133 std::size_t m_n;
134 std::size_t m_r;
135 Real m_x;
136 Real m_qrm2;
137 Real m_qrm1;
138 Real m_qrm2p;
139 Real m_qrm1p;
140 Real m_qrm2pp;
141 Real m_qrm1pp;
142 };
143
144 template <class Real>
interior_velocity_filter(std::size_t n,std::size_t p)145 std::vector<Real> interior_velocity_filter(std::size_t n, std::size_t p) {
146 auto dlp = discrete_legendre<Real>(n, 0);
147 std::vector<Real> coeffs(p+1);
148 coeffs[1] = 1/dlp.norm_sq(1);
149 for (std::size_t l = 3; l < p + 1; l += 2)
150 {
151 dlp.next_prime();
152 coeffs[l] = dlp.next_prime()/ dlp.norm_sq(l);
153 }
154
155 // We could make the filter length n, as f[0] = 0,
156 // but that'd make the indexing awkward when applying the filter.
157 std::vector<Real> f(n + 1);
158 // This value should never be read, but this is the correct value *if it is read*.
159 // Hmm, should it be a nan then? I'm not gonna agonize.
160 f[0] = 0;
161 for (std::size_t j = 1; j < f.size(); ++j)
162 {
163 Real arg = Real(j) / Real(n);
164 dlp = discrete_legendre<Real>(n, arg);
165 f[j] = coeffs[1]*arg;
166 for (std::size_t l = 3; l <= p; l += 2)
167 {
168 dlp.next();
169 f[j] += coeffs[l]*dlp.next();
170 }
171 f[j] /= (n * n);
172 }
173 return f;
174 }
175
176 template <class Real>
boundary_velocity_filter(std::size_t n,std::size_t p,int64_t s)177 std::vector<Real> boundary_velocity_filter(std::size_t n, std::size_t p, int64_t s)
178 {
179 std::vector<Real> coeffs(p+1, std::numeric_limits<Real>::quiet_NaN());
180 Real sn = Real(s) / Real(n);
181 auto dlp = discrete_legendre<Real>(n, sn);
182 coeffs[0] = 0;
183 coeffs[1] = 1/dlp.norm_sq(1);
184 for (std::size_t l = 2; l < p + 1; ++l)
185 {
186 // Calculation of the norms is common to all filters,
187 // so it seems like an obvious optimization target.
188 // I tried this: The spent in computing the norms time is not negligible,
189 // but still a small fraction of the total compute time.
190 // Hence I'm not refactoring out these norm calculations.
191 coeffs[l] = dlp.next_prime()/ dlp.norm_sq(l);
192 }
193
194 std::vector<Real> f(2*n + 1);
195 for (std::size_t k = 0; k < f.size(); ++k)
196 {
197 Real j = Real(k) - Real(n);
198 Real arg = j/Real(n);
199 dlp = discrete_legendre<Real>(n, arg);
200 f[k] = coeffs[1]*arg;
201 for (std::size_t l = 2; l <= p; ++l)
202 {
203 f[k] += coeffs[l]*dlp.next();
204 }
205 f[k] /= (n * n);
206 }
207 return f;
208 }
209
210 template <class Real>
acceleration_filter(std::size_t n,std::size_t p,int64_t s)211 std::vector<Real> acceleration_filter(std::size_t n, std::size_t p, int64_t s)
212 {
213 BOOST_ASSERT_MSG(p <= 2*n, "Approximation order must be <= 2*n");
214 BOOST_ASSERT_MSG(p > 2, "Approximation order must be > 2");
215
216 std::vector<Real> coeffs(p+1, std::numeric_limits<Real>::quiet_NaN());
217 Real sn = Real(s) / Real(n);
218 auto dlp = discrete_legendre<Real>(n, sn);
219 coeffs[0] = 0;
220 coeffs[1] = 0;
221 for (std::size_t l = 2; l < p + 1; ++l)
222 {
223 coeffs[l] = dlp.next_dbl_prime()/ dlp.norm_sq(l);
224 }
225
226 std::vector<Real> f(2*n + 1, 0);
227 for (std::size_t k = 0; k < f.size(); ++k)
228 {
229 Real j = Real(k) - Real(n);
230 Real arg = j/Real(n);
231 dlp = discrete_legendre<Real>(n, arg);
232 for (std::size_t l = 2; l <= p; ++l)
233 {
234 f[k] += coeffs[l]*dlp.next();
235 }
236 f[k] /= (n * n * n);
237 }
238 return f;
239 }
240
241
242 } // namespace detail
243
244 template <typename Real, std::size_t order = 1>
245 class discrete_lanczos_derivative {
246 public:
discrete_lanczos_derivative(Real const & spacing,std::size_t n=18,std::size_t approximation_order=3)247 discrete_lanczos_derivative(Real const & spacing,
248 std::size_t n = 18,
249 std::size_t approximation_order = 3)
250 : m_dt{spacing}
251 {
252 static_assert(!std::is_integral_v<Real>,
253 "Spacing must be a floating point type.");
254 BOOST_ASSERT_MSG(spacing > 0,
255 "Spacing between samples must be > 0.");
256
257 if constexpr (order == 1)
258 {
259 BOOST_ASSERT_MSG(approximation_order <= 2 * n,
260 "The approximation order must be <= 2n");
261 BOOST_ASSERT_MSG(approximation_order >= 2,
262 "The approximation order must be >= 2");
263
264 if constexpr (std::is_same_v<Real, float> || std::is_same_v<Real, double>)
265 {
266 auto interior = detail::interior_velocity_filter<long double>(n, approximation_order);
267 m_f.resize(interior.size());
268 for (std::size_t j = 0; j < interior.size(); ++j)
269 {
270 m_f[j] = static_cast<Real>(interior[j])/m_dt;
271 }
272 }
273 else
274 {
275 m_f = detail::interior_velocity_filter<Real>(n, approximation_order);
276 for (auto & x : m_f)
277 {
278 x /= m_dt;
279 }
280 }
281
282 m_boundary_filters.resize(n);
283 // This for loop is a natural candidate for parallelization.
284 // But does it matter? Probably not.
285 for (std::size_t i = 0; i < n; ++i)
286 {
287 if constexpr (std::is_same_v<Real, float> || std::is_same_v<Real, double>)
288 {
289 int64_t s = static_cast<int64_t>(i) - static_cast<int64_t>(n);
290 auto bf = detail::boundary_velocity_filter<long double>(n, approximation_order, s);
291 m_boundary_filters[i].resize(bf.size());
292 for (std::size_t j = 0; j < bf.size(); ++j)
293 {
294 m_boundary_filters[i][j] = static_cast<Real>(bf[j])/m_dt;
295 }
296 }
297 else
298 {
299 int64_t s = static_cast<int64_t>(i) - static_cast<int64_t>(n);
300 m_boundary_filters[i] = detail::boundary_velocity_filter<Real>(n, approximation_order, s);
301 for (auto & bf : m_boundary_filters[i])
302 {
303 bf /= m_dt;
304 }
305 }
306 }
307 }
308 else if constexpr (order == 2)
309 {
310 // High precision isn't warranted for small p; only for large p.
311 // (The computation appears stable for large n.)
312 // But given that the filters are reusable for many vectors,
313 // it's better to do a high precision computation and then cast back,
314 // since the resulting cost is a factor of 2, and the cost of the filters not working is hours of debugging.
315 if constexpr (std::is_same_v<Real, double> || std::is_same_v<Real, float>)
316 {
317 auto f = detail::acceleration_filter<long double>(n, approximation_order, 0);
318 m_f.resize(n+1);
319 for (std::size_t i = 0; i < m_f.size(); ++i)
320 {
321 m_f[i] = static_cast<Real>(f[i+n])/(m_dt*m_dt);
322 }
323 m_boundary_filters.resize(n);
324 for (std::size_t i = 0; i < n; ++i)
325 {
326 int64_t s = static_cast<int64_t>(i) - static_cast<int64_t>(n);
327 auto bf = detail::acceleration_filter<long double>(n, approximation_order, s);
328 m_boundary_filters[i].resize(bf.size());
329 for (std::size_t j = 0; j < bf.size(); ++j)
330 {
331 m_boundary_filters[i][j] = static_cast<Real>(bf[j])/(m_dt*m_dt);
332 }
333 }
334 }
335 else
336 {
337 // Given that the purpose is denoising, for higher precision calculations,
338 // the default precision should be fine.
339 auto f = detail::acceleration_filter<Real>(n, approximation_order, 0);
340 m_f.resize(n+1);
341 for (std::size_t i = 0; i < m_f.size(); ++i)
342 {
343 m_f[i] = f[i+n]/(m_dt*m_dt);
344 }
345 m_boundary_filters.resize(n);
346 for (std::size_t i = 0; i < n; ++i)
347 {
348 int64_t s = static_cast<int64_t>(i) - static_cast<int64_t>(n);
349 m_boundary_filters[i] = detail::acceleration_filter<Real>(n, approximation_order, s);
350 for (auto & bf : m_boundary_filters[i])
351 {
352 bf /= (m_dt*m_dt);
353 }
354 }
355 }
356 }
357 else
358 {
359 BOOST_ASSERT_MSG(false, "Derivatives of order 3 and higher are not implemented.");
360 }
361 }
362
get_spacing() const363 Real get_spacing() const
364 {
365 return m_dt;
366 }
367
368 template<class RandomAccessContainer>
operator ()(RandomAccessContainer const & v,std::size_t i) const369 Real operator()(RandomAccessContainer const & v, std::size_t i) const
370 {
371 static_assert(std::is_same_v<typename RandomAccessContainer::value_type, Real>,
372 "The type of the values in the vector provided does not match the type in the filters.");
373
374 BOOST_ASSERT_MSG(std::size(v) >= m_boundary_filters[0].size(),
375 "Vector must be at least as long as the filter length");
376
377 if constexpr (order==1)
378 {
379 if (i >= m_f.size() - 1 && i <= std::size(v) - m_f.size())
380 {
381 // The filter has length >= 1:
382 Real dvdt = m_f[1] * (v[i + 1] - v[i - 1]);
383 for (std::size_t j = 2; j < m_f.size(); ++j)
384 {
385 dvdt += m_f[j] * (v[i + j] - v[i - j]);
386 }
387 return dvdt;
388 }
389
390 // m_f.size() = N+1
391 if (i < m_f.size() - 1)
392 {
393 auto &bf = m_boundary_filters[i];
394 Real dvdt = bf[0]*v[0];
395 for (std::size_t j = 1; j < bf.size(); ++j)
396 {
397 dvdt += bf[j] * v[j];
398 }
399 return dvdt;
400 }
401
402 if (i > std::size(v) - m_f.size() && i < std::size(v))
403 {
404 int k = std::size(v) - 1 - i;
405 auto &bf = m_boundary_filters[k];
406 Real dvdt = bf[0]*v[std::size(v)-1];
407 for (std::size_t j = 1; j < bf.size(); ++j)
408 {
409 dvdt += bf[j] * v[std::size(v) - 1 - j];
410 }
411 return -dvdt;
412 }
413 }
414 else if constexpr (order==2)
415 {
416 if (i >= m_f.size() - 1 && i <= std::size(v) - m_f.size())
417 {
418 Real d2vdt2 = m_f[0]*v[i];
419 for (std::size_t j = 1; j < m_f.size(); ++j)
420 {
421 d2vdt2 += m_f[j] * (v[i + j] + v[i - j]);
422 }
423 return d2vdt2;
424 }
425
426 // m_f.size() = N+1
427 if (i < m_f.size() - 1)
428 {
429 auto &bf = m_boundary_filters[i];
430 Real d2vdt2 = bf[0]*v[0];
431 for (std::size_t j = 1; j < bf.size(); ++j)
432 {
433 d2vdt2 += bf[j] * v[j];
434 }
435 return d2vdt2;
436 }
437
438 if (i > std::size(v) - m_f.size() && i < std::size(v))
439 {
440 int k = std::size(v) - 1 - i;
441 auto &bf = m_boundary_filters[k];
442 Real d2vdt2 = bf[0] * v[std::size(v) - 1];
443 for (std::size_t j = 1; j < bf.size(); ++j)
444 {
445 d2vdt2 += bf[j] * v[std::size(v) - 1 - j];
446 }
447 return d2vdt2;
448 }
449 }
450
451 // OOB access:
452 std::string msg = "Out of bounds access in Lanczos derivative.";
453 msg += "Input vector has length " + std::to_string(std::size(v)) + ", but user requested access at index " + std::to_string(i) + ".";
454 throw std::out_of_range(msg);
455 return std::numeric_limits<Real>::quiet_NaN();
456 }
457
458 template<class RandomAccessContainer>
operator ()(RandomAccessContainer const & v,RandomAccessContainer & w) const459 void operator()(RandomAccessContainer const & v, RandomAccessContainer & w) const
460 {
461 static_assert(std::is_same_v<typename RandomAccessContainer::value_type, Real>,
462 "The type of the values in the vector provided does not match the type in the filters.");
463 if (&w[0] == &v[0])
464 {
465 throw std::logic_error("This transform cannot be performed in-place.");
466 }
467
468 if (std::size(v) < m_boundary_filters[0].size())
469 {
470 std::string msg = "The input vector must be at least as long as the filter length. ";
471 msg += "The input vector has length = " + std::to_string(std::size(v)) + ", the filter has length " + std::to_string(m_boundary_filters[0].size());
472 throw std::length_error(msg);
473 }
474
475 if (std::size(w) < std::size(v))
476 {
477 std::string msg = "The output vector (containing the derivative) must be at least as long as the input vector.";
478 msg += "The output vector has length = " + std::to_string(std::size(w)) + ", the input vector has length " + std::to_string(std::size(v));
479 throw std::length_error(msg);
480 }
481
482 if constexpr (order==1)
483 {
484 for (std::size_t i = 0; i < m_f.size() - 1; ++i)
485 {
486 auto &bf = m_boundary_filters[i];
487 Real dvdt = bf[0] * v[0];
488 for (std::size_t j = 1; j < bf.size(); ++j)
489 {
490 dvdt += bf[j] * v[j];
491 }
492 w[i] = dvdt;
493 }
494
495 for(std::size_t i = m_f.size() - 1; i <= std::size(v) - m_f.size(); ++i)
496 {
497 Real dvdt = m_f[1] * (v[i + 1] - v[i - 1]);
498 for (std::size_t j = 2; j < m_f.size(); ++j)
499 {
500 dvdt += m_f[j] *(v[i + j] - v[i - j]);
501 }
502 w[i] = dvdt;
503 }
504
505
506 for(std::size_t i = std::size(v) - m_f.size() + 1; i < std::size(v); ++i)
507 {
508 int k = std::size(v) - 1 - i;
509 auto &f = m_boundary_filters[k];
510 Real dvdt = f[0] * v[std::size(v) - 1];;
511 for (std::size_t j = 1; j < f.size(); ++j)
512 {
513 dvdt += f[j] * v[std::size(v) - 1 - j];
514 }
515 w[i] = -dvdt;
516 }
517 }
518 else if constexpr (order==2)
519 {
520 // m_f.size() = N+1
521 for (std::size_t i = 0; i < m_f.size() - 1; ++i)
522 {
523 auto &bf = m_boundary_filters[i];
524 Real d2vdt2 = 0;
525 for (std::size_t j = 0; j < bf.size(); ++j)
526 {
527 d2vdt2 += bf[j] * v[j];
528 }
529 w[i] = d2vdt2;
530 }
531
532 for (std::size_t i = m_f.size() - 1; i <= std::size(v) - m_f.size(); ++i)
533 {
534 Real d2vdt2 = m_f[0]*v[i];
535 for (std::size_t j = 1; j < m_f.size(); ++j)
536 {
537 d2vdt2 += m_f[j] * (v[i + j] + v[i - j]);
538 }
539 w[i] = d2vdt2;
540 }
541
542 for (std::size_t i = std::size(v) - m_f.size() + 1; i < std::size(v); ++i)
543 {
544 int k = std::size(v) - 1 - i;
545 auto &bf = m_boundary_filters[k];
546 Real d2vdt2 = bf[0] * v[std::size(v) - 1];
547 for (std::size_t j = 1; j < bf.size(); ++j)
548 {
549 d2vdt2 += bf[j] * v[std::size(v) - 1 - j];
550 }
551 w[i] = d2vdt2;
552 }
553 }
554 }
555
556 template<class RandomAccessContainer>
operator ()(RandomAccessContainer const & v) const557 RandomAccessContainer operator()(RandomAccessContainer const & v) const
558 {
559 RandomAccessContainer w(std::size(v));
560 this->operator()(v, w);
561 return w;
562 }
563
564
565 // Don't copy; too big.
566 discrete_lanczos_derivative( const discrete_lanczos_derivative & ) = delete;
567 discrete_lanczos_derivative& operator=(const discrete_lanczos_derivative&) = delete;
568
569 // Allow moves:
570 discrete_lanczos_derivative(discrete_lanczos_derivative&&) = default;
571 discrete_lanczos_derivative& operator=(discrete_lanczos_derivative&&) = default;
572
573 private:
574 std::vector<Real> m_f;
575 std::vector<std::vector<Real>> m_boundary_filters;
576 Real m_dt;
577 };
578
579 } // namespaces
580 #endif
581