Eigen-unsupported  5.0.1-dev+284dcc12
 
Loading...
Searching...
No Matches
TensorFFT.h
1// This file is part of Eigen, a lightweight C++ template library
2// for linear algebra.
3//
4// Copyright (C) 2015 Jianwei Cui <thucjw@gmail.com>
5//
6// This Source Code Form is subject to the terms of the Mozilla
7// Public License v. 2.0. If a copy of the MPL was not distributed
8// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9
10#ifndef EIGEN_CXX11_TENSOR_TENSOR_FFT_H
11#define EIGEN_CXX11_TENSOR_TENSOR_FFT_H
12
13// IWYU pragma: private
14#include "./InternalHeaderCheck.h"
15
16namespace Eigen {
17
18template <bool IsReal>
19struct MakeComplex {
20 template <typename T>
21 EIGEN_DEVICE_FUNC T operator()(const T& val) const {
22 return val;
23 }
24};
25
26template <>
27struct MakeComplex<true> {
28 template <typename T>
29 EIGEN_DEVICE_FUNC internal::make_complex_t<T> operator()(const T& val) const {
30 return internal::make_complex_t<T>(val, T(0));
31 }
32};
33
34template <int ResultType>
35struct PartOf {
36 template <typename T>
37 T operator()(const T& val) const {
38 return val;
39 }
40};
41
42template <>
43struct PartOf<RealPart> {
44 template <typename T, typename EnableIf = std::enable_if_t<NumTraits<T>::IsComplex>>
45 typename NumTraits<T>::Real operator()(const T& val) const {
46 return Eigen::numext::real(val);
47 }
48};
49
50template <>
51struct PartOf<ImagPart> {
52 template <typename T, typename EnableIf = std::enable_if_t<NumTraits<T>::IsComplex>>
53 typename NumTraits<T>::Real operator()(const T& val) const {
54 return Eigen::numext::imag(val);
55 }
56};
57
58namespace internal {
59template <typename FFT, typename XprType, int FFTResultType, int FFTDir>
60struct traits<TensorFFTOp<FFT, XprType, FFTResultType, FFTDir> > : public traits<XprType> {
61 typedef traits<XprType> XprTraits;
62 typedef typename XprTraits::Scalar Scalar;
63 typedef typename NumTraits<Scalar>::Real RealScalar;
64 typedef make_complex_t<Scalar> ComplexScalar;
65 typedef typename XprTraits::Scalar InputScalar;
66 typedef std::conditional_t<FFTResultType == RealPart || FFTResultType == ImagPart, RealScalar, ComplexScalar>
67 OutputScalar;
68 typedef typename XprTraits::StorageKind StorageKind;
69 typedef typename XprTraits::Index Index;
70 typedef typename XprType::Nested Nested;
71 typedef std::remove_reference_t<Nested> Nested_;
72 static constexpr int NumDimensions = XprTraits::NumDimensions;
73 static constexpr int Layout = XprTraits::Layout;
74 typedef typename traits<XprType>::PointerType PointerType;
75};
76
77template <typename FFT, typename XprType, int FFTResultType, int FFTDirection>
78struct eval<TensorFFTOp<FFT, XprType, FFTResultType, FFTDirection>, Eigen::Dense> {
79 typedef const TensorFFTOp<FFT, XprType, FFTResultType, FFTDirection>& type;
80};
81
82template <typename FFT, typename XprType, int FFTResultType, int FFTDirection>
83struct nested<TensorFFTOp<FFT, XprType, FFTResultType, FFTDirection>, 1,
84 typename eval<TensorFFTOp<FFT, XprType, FFTResultType, FFTDirection> >::type> {
85 typedef TensorFFTOp<FFT, XprType, FFTResultType, FFTDirection> type;
86};
87
88} // end namespace internal
89
100template <typename FFT, typename XprType, int FFTResultType, int FFTDir>
101class TensorFFTOp : public TensorBase<TensorFFTOp<FFT, XprType, FFTResultType, FFTDir>, ReadOnlyAccessors> {
102 public:
103 typedef typename Eigen::internal::traits<TensorFFTOp>::Scalar Scalar;
104 typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
105 typedef internal::make_complex_t<Scalar> ComplexScalar;
106 typedef std::conditional_t<FFTResultType == RealPart || FFTResultType == ImagPart, RealScalar, ComplexScalar>
107 OutputScalar;
108 typedef OutputScalar CoeffReturnType;
109 typedef typename Eigen::internal::nested<TensorFFTOp>::type Nested;
110 typedef typename Eigen::internal::traits<TensorFFTOp>::StorageKind StorageKind;
111 typedef typename Eigen::internal::traits<TensorFFTOp>::Index Index;
112
113 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorFFTOp(const XprType& expr, const FFT& fft) : m_xpr(expr), m_fft(fft) {}
114
115 EIGEN_DEVICE_FUNC const FFT& fft() const { return m_fft; }
116
117 EIGEN_DEVICE_FUNC const internal::remove_all_t<typename XprType::Nested>& expression() const { return m_xpr; }
118
119 protected:
120 typename XprType::Nested m_xpr;
121 const FFT m_fft;
122};
123
124// Eval as rvalue
125template <typename FFT, typename ArgType, typename Device, int FFTResultType, int FFTDir>
126struct TensorEvaluator<const TensorFFTOp<FFT, ArgType, FFTResultType, FFTDir>, Device> {
128 typedef typename XprType::Index Index;
129 static constexpr int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
130 typedef DSizes<Index, NumDims> Dimensions;
131 typedef typename XprType::Scalar Scalar;
132 typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
133 typedef internal::make_complex_t<Scalar> ComplexScalar;
134 typedef typename TensorEvaluator<ArgType, Device>::Dimensions InputDimensions;
135 typedef internal::traits<XprType> XprTraits;
136 typedef typename XprTraits::Scalar InputScalar;
137 typedef std::conditional_t<FFTResultType == RealPart || FFTResultType == ImagPart, RealScalar, ComplexScalar>
138 OutputScalar;
139 typedef OutputScalar CoeffReturnType;
140 typedef typename PacketType<OutputScalar, Device>::type PacketReturnType;
141 static constexpr int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
142 typedef StorageMemory<CoeffReturnType, Device> Storage;
143 typedef typename Storage::Type EvaluatorPointerType;
144
145 static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
146 enum {
147 IsAligned = false,
148 PacketAccess = true,
149 BlockAccess = false,
150 PreferBlockAccess = false,
151 CoordAccess = false,
152 RawAccess = false
153 };
154
155 //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
156 typedef internal::TensorBlockNotImplemented TensorBlock;
157 //===--------------------------------------------------------------------===//
158
159 EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
160 : m_fft(op.fft()), m_impl(op.expression(), device), m_data(NULL), m_device(device) {
161 const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
162 for (int i = 0; i < NumDims; ++i) {
163 eigen_assert(input_dims[i] > 0);
164 m_dimensions[i] = input_dims[i];
165 }
166
167 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
168 m_strides[0] = 1;
169 for (int i = 1; i < NumDims; ++i) {
170 m_strides[i] = m_strides[i - 1] * m_dimensions[i - 1];
171 }
172 } else {
173 m_strides[NumDims - 1] = 1;
174 for (int i = NumDims - 2; i >= 0; --i) {
175 m_strides[i] = m_strides[i + 1] * m_dimensions[i + 1];
176 }
177 }
178 m_size = m_dimensions.TotalSize();
179 }
180
181 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
182
183 EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
184 m_impl.evalSubExprsIfNeeded(NULL);
185 if (data) {
186 evalToBuf(data);
187 return false;
188 } else {
189 m_data = (EvaluatorPointerType)m_device.get(
190 (CoeffReturnType*)(m_device.allocate_temp(sizeof(CoeffReturnType) * m_size)));
191 evalToBuf(m_data);
192 return true;
193 }
194 }
195
196 EIGEN_STRONG_INLINE void cleanup() {
197 if (m_data) {
198 m_device.deallocate(m_data);
199 m_data = NULL;
200 }
201 m_impl.cleanup();
202 }
203
204 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE CoeffReturnType coeff(Index index) const { return m_data[index]; }
205
206 template <int LoadMode>
207 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketReturnType packet(Index index) const {
208 return internal::ploadt<PacketReturnType, LoadMode>(m_data + index);
209 }
210
211 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
212 return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize);
213 }
214
215 EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return m_data; }
216
217 private:
218 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalToBuf(EvaluatorPointerType data) {
219 const bool write_to_out = internal::is_same<OutputScalar, ComplexScalar>::value;
220 ComplexScalar* buf =
221 write_to_out ? (ComplexScalar*)data : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * m_size);
222
223 for (Index i = 0; i < m_size; ++i) {
224 buf[i] = MakeComplex<internal::is_same<InputScalar, RealScalar>::value>()(m_impl.coeff(i));
225 }
226
227 for (size_t i = 0; i < m_fft.size(); ++i) {
228 Index dim = m_fft[i];
229 eigen_assert(dim >= 0 && dim < NumDims);
230 Index line_len = m_dimensions[dim];
231 eigen_assert(line_len >= 1);
232 ComplexScalar* line_buf = (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * line_len);
233 const bool is_power_of_two = isPowerOfTwo(line_len);
234 const Index good_composite = is_power_of_two ? 0 : findGoodComposite(line_len);
235 const Index log_len = is_power_of_two ? getLog2(line_len) : getLog2(good_composite);
236
237 ComplexScalar* a =
238 is_power_of_two ? NULL : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * good_composite);
239 ComplexScalar* b =
240 is_power_of_two ? NULL : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * good_composite);
241 ComplexScalar* pos_j_base_powered =
242 is_power_of_two ? NULL : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * (line_len + 1));
243 if (!is_power_of_two) {
244 // Compute twiddle factors
245 // t_n = exp(sqrt(-1) * pi * n^2 / line_len)
246 // for n = 0, 1,..., line_len-1.
247 // For n > 2 we use the recurrence t_n = t_{n-1}^2 / t_{n-2} * t_1^2
248
249 // The recurrence is correct in exact arithmetic, but causes
250 // numerical issues for large transforms, especially in
251 // single-precision floating point.
252 //
253 // pos_j_base_powered[0] = ComplexScalar(1, 0);
254 // if (line_len > 1) {
255 // const ComplexScalar pos_j_base = ComplexScalar(
256 // numext::cos(EIGEN_PI / line_len), numext::sin(EIGEN_PI / line_len));
257 // pos_j_base_powered[1] = pos_j_base;
258 // if (line_len > 2) {
259 // const ComplexScalar pos_j_base_sq = pos_j_base * pos_j_base;
260 // for (int i = 2; i < line_len + 1; ++i) {
261 // pos_j_base_powered[i] = pos_j_base_powered[i - 1] *
262 // pos_j_base_powered[i - 1] /
263 // pos_j_base_powered[i - 2] *
264 // pos_j_base_sq;
265 // }
266 // }
267 // }
268 // TODO(rmlarsen): Find a way to use Eigen's vectorized sin
269 // and cosine functions here.
270 for (int j = 0; j < line_len + 1; ++j) {
271 double arg = ((EIGEN_PI * j) * j) / line_len;
272 std::complex<double> tmp(numext::cos(arg), numext::sin(arg));
273 pos_j_base_powered[j] = static_cast<ComplexScalar>(tmp);
274 }
275 }
276
277 for (Index partial_index = 0; partial_index < m_size / line_len; ++partial_index) {
278 const Index base_offset = getBaseOffsetFromIndex(partial_index, dim);
279
280 // get data into line_buf
281 const Index stride = m_strides[dim];
282 if (stride == 1) {
283 m_device.memcpy(line_buf, &buf[base_offset], line_len * sizeof(ComplexScalar));
284 } else {
285 Index offset = base_offset;
286 for (int j = 0; j < line_len; ++j, offset += stride) {
287 line_buf[j] = buf[offset];
288 }
289 }
290
291 // process the line
292 if (is_power_of_two) {
293 processDataLineCooleyTukey(line_buf, line_len, log_len);
294 } else {
295 processDataLineBluestein(line_buf, line_len, good_composite, log_len, a, b, pos_j_base_powered);
296 }
297
298 // write back
299 if (FFTDir == FFT_FORWARD && stride == 1) {
300 m_device.memcpy(&buf[base_offset], line_buf, line_len * sizeof(ComplexScalar));
301 } else {
302 Index offset = base_offset;
303 const ComplexScalar div_factor = ComplexScalar(1.0 / line_len, 0);
304 for (int j = 0; j < line_len; ++j, offset += stride) {
305 buf[offset] = (FFTDir == FFT_FORWARD) ? line_buf[j] : line_buf[j] * div_factor;
306 }
307 }
308 }
309 m_device.deallocate(line_buf);
310 if (!is_power_of_two) {
311 m_device.deallocate(a);
312 m_device.deallocate(b);
313 m_device.deallocate(pos_j_base_powered);
314 }
315 }
316
317 if (!write_to_out) {
318 for (Index i = 0; i < m_size; ++i) {
319 data[i] = PartOf<FFTResultType>()(buf[i]);
320 }
321 m_device.deallocate(buf);
322 }
323 }
324
325 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static bool isPowerOfTwo(Index x) {
326 eigen_assert(x > 0);
327 return !(x & (x - 1));
328 }
329
330 // The composite number for padding, used in Bluestein's FFT algorithm
331 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static Index findGoodComposite(Index n) {
332 Index i = 2;
333 while (i < 2 * n - 1) i *= 2;
334 return i;
335 }
336
337 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static Index getLog2(Index m) {
338 Index log2m = 0;
339 while (m >>= 1) log2m++;
340 return log2m;
341 }
342
343 // Call Cooley Tukey algorithm directly, data length must be power of 2
344 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void processDataLineCooleyTukey(ComplexScalar* line_buf, Index line_len,
345 Index log_len) {
346 eigen_assert(isPowerOfTwo(line_len));
347 scramble_FFT(line_buf, line_len);
348 compute_1D_Butterfly<FFTDir>(line_buf, line_len, log_len);
349 }
350
351 // Call Bluestein's FFT algorithm, m is a good composite number greater than (2 * n - 1), used as the padding length
352 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void processDataLineBluestein(ComplexScalar* line_buf, Index line_len,
353 Index good_composite, Index log_len,
354 ComplexScalar* a, ComplexScalar* b,
355 const ComplexScalar* pos_j_base_powered) {
356 Index n = line_len;
357 Index m = good_composite;
358 ComplexScalar* data = line_buf;
359
360 for (Index i = 0; i < n; ++i) {
361 if (FFTDir == FFT_FORWARD) {
362 a[i] = data[i] * numext::conj(pos_j_base_powered[i]);
363 } else {
364 a[i] = data[i] * pos_j_base_powered[i];
365 }
366 }
367 for (Index i = n; i < m; ++i) {
368 a[i] = ComplexScalar(0, 0);
369 }
370
371 for (Index i = 0; i < n; ++i) {
372 if (FFTDir == FFT_FORWARD) {
373 b[i] = pos_j_base_powered[i];
374 } else {
375 b[i] = numext::conj(pos_j_base_powered[i]);
376 }
377 }
378 for (Index i = n; i < m - n; ++i) {
379 b[i] = ComplexScalar(0, 0);
380 }
381 for (Index i = m - n; i < m; ++i) {
382 if (FFTDir == FFT_FORWARD) {
383 b[i] = pos_j_base_powered[m - i];
384 } else {
385 b[i] = numext::conj(pos_j_base_powered[m - i]);
386 }
387 }
388
389 scramble_FFT(a, m);
390 compute_1D_Butterfly<FFT_FORWARD>(a, m, log_len);
391
392 scramble_FFT(b, m);
393 compute_1D_Butterfly<FFT_FORWARD>(b, m, log_len);
394
395 for (Index i = 0; i < m; ++i) {
396 a[i] *= b[i];
397 }
398
399 scramble_FFT(a, m);
400 compute_1D_Butterfly<FFT_REVERSE>(a, m, log_len);
401
402 // Do the scaling after ifft
403 for (Index i = 0; i < m; ++i) {
404 a[i] /= m;
405 }
406
407 for (Index i = 0; i < n; ++i) {
408 if (FFTDir == FFT_FORWARD) {
409 data[i] = a[i] * numext::conj(pos_j_base_powered[i]);
410 } else {
411 data[i] = a[i] * pos_j_base_powered[i];
412 }
413 }
414 }
415
416 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static void scramble_FFT(ComplexScalar* data, Index n) {
417 eigen_assert(isPowerOfTwo(n));
418 Index j = 1;
419 for (Index i = 1; i < n; ++i) {
420 if (j > i) {
421 std::swap(data[j - 1], data[i - 1]);
422 }
423 Index m = n >> 1;
424 while (m >= 2 && j > m) {
425 j -= m;
426 m >>= 1;
427 }
428 j += m;
429 }
430 }
431
432 template <int Dir>
433 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void butterfly_2(ComplexScalar* data) {
434 ComplexScalar tmp = data[1];
435 data[1] = data[0] - data[1];
436 data[0] += tmp;
437 }
438
439 template <int Dir>
440 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void butterfly_4(ComplexScalar* data) {
441 ComplexScalar tmp[4];
442 tmp[0] = data[0] + data[1];
443 tmp[1] = data[0] - data[1];
444 tmp[2] = data[2] + data[3];
445 if (Dir == FFT_FORWARD) {
446 tmp[3] = ComplexScalar(0.0, -1.0) * (data[2] - data[3]);
447 } else {
448 tmp[3] = ComplexScalar(0.0, 1.0) * (data[2] - data[3]);
449 }
450 data[0] = tmp[0] + tmp[2];
451 data[1] = tmp[1] + tmp[3];
452 data[2] = tmp[0] - tmp[2];
453 data[3] = tmp[1] - tmp[3];
454 }
455
456 template <int Dir>
457 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void butterfly_8(ComplexScalar* data) {
458 ComplexScalar tmp_1[8];
459 ComplexScalar tmp_2[8];
460
461 tmp_1[0] = data[0] + data[1];
462 tmp_1[1] = data[0] - data[1];
463 tmp_1[2] = data[2] + data[3];
464 if (Dir == FFT_FORWARD) {
465 tmp_1[3] = (data[2] - data[3]) * ComplexScalar(0, -1);
466 } else {
467 tmp_1[3] = (data[2] - data[3]) * ComplexScalar(0, 1);
468 }
469 tmp_1[4] = data[4] + data[5];
470 tmp_1[5] = data[4] - data[5];
471 tmp_1[6] = data[6] + data[7];
472 if (Dir == FFT_FORWARD) {
473 tmp_1[7] = (data[6] - data[7]) * ComplexScalar(0, -1);
474 } else {
475 tmp_1[7] = (data[6] - data[7]) * ComplexScalar(0, 1);
476 }
477 tmp_2[0] = tmp_1[0] + tmp_1[2];
478 tmp_2[1] = tmp_1[1] + tmp_1[3];
479 tmp_2[2] = tmp_1[0] - tmp_1[2];
480 tmp_2[3] = tmp_1[1] - tmp_1[3];
481 tmp_2[4] = tmp_1[4] + tmp_1[6];
482// SQRT2DIV2 = sqrt(2)/2
483#define SQRT2DIV2 0.7071067811865476
484 if (Dir == FFT_FORWARD) {
485 tmp_2[5] = (tmp_1[5] + tmp_1[7]) * ComplexScalar(SQRT2DIV2, -SQRT2DIV2);
486 tmp_2[6] = (tmp_1[4] - tmp_1[6]) * ComplexScalar(0, -1);
487 tmp_2[7] = (tmp_1[5] - tmp_1[7]) * ComplexScalar(-SQRT2DIV2, -SQRT2DIV2);
488 } else {
489 tmp_2[5] = (tmp_1[5] + tmp_1[7]) * ComplexScalar(SQRT2DIV2, SQRT2DIV2);
490 tmp_2[6] = (tmp_1[4] - tmp_1[6]) * ComplexScalar(0, 1);
491 tmp_2[7] = (tmp_1[5] - tmp_1[7]) * ComplexScalar(-SQRT2DIV2, SQRT2DIV2);
492 }
493 data[0] = tmp_2[0] + tmp_2[4];
494 data[1] = tmp_2[1] + tmp_2[5];
495 data[2] = tmp_2[2] + tmp_2[6];
496 data[3] = tmp_2[3] + tmp_2[7];
497 data[4] = tmp_2[0] - tmp_2[4];
498 data[5] = tmp_2[1] - tmp_2[5];
499 data[6] = tmp_2[2] - tmp_2[6];
500 data[7] = tmp_2[3] - tmp_2[7];
501 }
502
503 template <int Dir>
504 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void butterfly_1D_merge(ComplexScalar* data, Index n, Index n_power_of_2) {
505 // Original code:
506 // RealScalar wtemp = std::sin(EIGEN_PI/n);
507 // RealScalar wpi = -std::sin(2 * EIGEN_PI/n);
508 const RealScalar wtemp = m_sin_PI_div_n_LUT[n_power_of_2];
509 const RealScalar wpi =
510 (Dir == FFT_FORWARD) ? m_minus_sin_2_PI_div_n_LUT[n_power_of_2] : -m_minus_sin_2_PI_div_n_LUT[n_power_of_2];
511
512 const ComplexScalar wp(wtemp, wpi);
513 const ComplexScalar wp_one = wp + ComplexScalar(1, 0);
514 const ComplexScalar wp_one_2 = wp_one * wp_one;
515 const ComplexScalar wp_one_3 = wp_one_2 * wp_one;
516 const ComplexScalar wp_one_4 = wp_one_3 * wp_one;
517 const Index n2 = n / 2;
518 ComplexScalar w(1.0, 0.0);
519 for (Index i = 0; i < n2; i += 4) {
520 ComplexScalar temp0(data[i + n2] * w);
521 ComplexScalar temp1(data[i + 1 + n2] * w * wp_one);
522 ComplexScalar temp2(data[i + 2 + n2] * w * wp_one_2);
523 ComplexScalar temp3(data[i + 3 + n2] * w * wp_one_3);
524 w = w * wp_one_4;
525
526 data[i + n2] = data[i] - temp0;
527 data[i] += temp0;
528
529 data[i + 1 + n2] = data[i + 1] - temp1;
530 data[i + 1] += temp1;
531
532 data[i + 2 + n2] = data[i + 2] - temp2;
533 data[i + 2] += temp2;
534
535 data[i + 3 + n2] = data[i + 3] - temp3;
536 data[i + 3] += temp3;
537 }
538 }
539
540 template <int Dir>
541 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void compute_1D_Butterfly(ComplexScalar* data, Index n, Index n_power_of_2) {
542 eigen_assert(isPowerOfTwo(n));
543 if (n > 8) {
544 compute_1D_Butterfly<Dir>(data, n / 2, n_power_of_2 - 1);
545 compute_1D_Butterfly<Dir>(data + n / 2, n / 2, n_power_of_2 - 1);
546 butterfly_1D_merge<Dir>(data, n, n_power_of_2);
547 } else if (n == 8) {
548 butterfly_8<Dir>(data);
549 } else if (n == 4) {
550 butterfly_4<Dir>(data);
551 } else if (n == 2) {
552 butterfly_2<Dir>(data);
553 }
554 }
555
556 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index getBaseOffsetFromIndex(Index index, Index omitted_dim) const {
557 Index result = 0;
558
559 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
560 for (int i = NumDims - 1; i > omitted_dim; --i) {
561 const Index partial_m_stride = m_strides[i] / m_dimensions[omitted_dim];
562 const Index idx = index / partial_m_stride;
563 index -= idx * partial_m_stride;
564 result += idx * m_strides[i];
565 }
566 result += index;
567 } else {
568 for (Index i = 0; i < omitted_dim; ++i) {
569 const Index partial_m_stride = m_strides[i] / m_dimensions[omitted_dim];
570 const Index idx = index / partial_m_stride;
571 index -= idx * partial_m_stride;
572 result += idx * m_strides[i];
573 }
574 result += index;
575 }
576 // Value of index_coords[omitted_dim] is not determined to this step
577 return result;
578 }
579
580 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index getIndexFromOffset(Index base, Index omitted_dim, Index offset) const {
581 Index result = base + offset * m_strides[omitted_dim];
582 return result;
583 }
584
585 protected:
586 Index m_size;
587 const FFT EIGEN_DEVICE_REF m_fft;
588 Dimensions m_dimensions;
589 array<Index, NumDims> m_strides;
590 TensorEvaluator<ArgType, Device> m_impl;
591 EvaluatorPointerType m_data;
592 const Device EIGEN_DEVICE_REF m_device;
593
594 // This will support a maximum FFT size of 2^32 for each dimension
595 // m_sin_PI_div_n_LUT[i] = (-2) * std::sin(EIGEN_PI / std::pow(2,i)) ^ 2;
596 const RealScalar m_sin_PI_div_n_LUT[32] = {RealScalar(0.0),
597 RealScalar(-2),
598 RealScalar(-0.999999999999999),
599 RealScalar(-0.292893218813453),
600 RealScalar(-0.0761204674887130),
601 RealScalar(-0.0192147195967696),
602 RealScalar(-0.00481527332780311),
603 RealScalar(-0.00120454379482761),
604 RealScalar(-3.01181303795779e-04),
605 RealScalar(-7.52981608554592e-05),
606 RealScalar(-1.88247173988574e-05),
607 RealScalar(-4.70619042382852e-06),
608 RealScalar(-1.17654829809007e-06),
609 RealScalar(-2.94137117780840e-07),
610 RealScalar(-7.35342821488550e-08),
611 RealScalar(-1.83835707061916e-08),
612 RealScalar(-4.59589268710903e-09),
613 RealScalar(-1.14897317243732e-09),
614 RealScalar(-2.87243293150586e-10),
615 RealScalar(-7.18108232902250e-11),
616 RealScalar(-1.79527058227174e-11),
617 RealScalar(-4.48817645568941e-12),
618 RealScalar(-1.12204411392298e-12),
619 RealScalar(-2.80511028480785e-13),
620 RealScalar(-7.01277571201985e-14),
621 RealScalar(-1.75319392800498e-14),
622 RealScalar(-4.38298482001247e-15),
623 RealScalar(-1.09574620500312e-15),
624 RealScalar(-2.73936551250781e-16),
625 RealScalar(-6.84841378126949e-17),
626 RealScalar(-1.71210344531737e-17),
627 RealScalar(-4.28025861329343e-18)};
628
629 // m_minus_sin_2_PI_div_n_LUT[i] = -std::sin(2 * EIGEN_PI / std::pow(2,i));
630 const RealScalar m_minus_sin_2_PI_div_n_LUT[32] = {RealScalar(0.0),
631 RealScalar(0.0),
632 RealScalar(-1.00000000000000e+00),
633 RealScalar(-7.07106781186547e-01),
634 RealScalar(-3.82683432365090e-01),
635 RealScalar(-1.95090322016128e-01),
636 RealScalar(-9.80171403295606e-02),
637 RealScalar(-4.90676743274180e-02),
638 RealScalar(-2.45412285229123e-02),
639 RealScalar(-1.22715382857199e-02),
640 RealScalar(-6.13588464915448e-03),
641 RealScalar(-3.06795676296598e-03),
642 RealScalar(-1.53398018628477e-03),
643 RealScalar(-7.66990318742704e-04),
644 RealScalar(-3.83495187571396e-04),
645 RealScalar(-1.91747597310703e-04),
646 RealScalar(-9.58737990959773e-05),
647 RealScalar(-4.79368996030669e-05),
648 RealScalar(-2.39684498084182e-05),
649 RealScalar(-1.19842249050697e-05),
650 RealScalar(-5.99211245264243e-06),
651 RealScalar(-2.99605622633466e-06),
652 RealScalar(-1.49802811316901e-06),
653 RealScalar(-7.49014056584716e-07),
654 RealScalar(-3.74507028292384e-07),
655 RealScalar(-1.87253514146195e-07),
656 RealScalar(-9.36267570730981e-08),
657 RealScalar(-4.68133785365491e-08),
658 RealScalar(-2.34066892682746e-08),
659 RealScalar(-1.17033446341373e-08),
660 RealScalar(-5.85167231706864e-09),
661 RealScalar(-2.92583615853432e-09)};
662};
663
664} // end namespace Eigen
665
666#endif // EIGEN_CXX11_TENSOR_TENSOR_FFT_H
The tensor base class.
Definition TensorForwardDeclarations.h:68
Tensor FFT class.
Definition TensorFFT.h:101
Namespace containing all symbols from the Eigen library.
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_arg_op< typename Derived::Scalar >, const Derived > arg(const Eigen::ArrayBase< Derived > &x)
The tensor evaluator class.
Definition TensorEvaluator.h:30