TensorFFT.h
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2015 Jianwei Cui <thucjw@gmail.com>
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 
10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_FFT_H
11 #define EIGEN_CXX11_TENSOR_TENSOR_FFT_H
12 
13 // IWYU pragma: private
14 #include "./InternalHeaderCheck.h"
15 
16 namespace Eigen {
17 
29 template <bool NeedUprade>
30 struct MakeComplex {
31  template <typename T>
33  return val;
34  }
35 };
36 
37 template <>
38 struct MakeComplex<true> {
39  template <typename T>
40  EIGEN_DEVICE_FUNC std::complex<T> operator()(const T& val) const {
41  return std::complex<T>(val, 0);
42  }
43 };
44 
45 template <>
46 struct MakeComplex<false> {
47  template <typename T>
48  EIGEN_DEVICE_FUNC std::complex<T> operator()(const std::complex<T>& val) const {
49  return val;
50  }
51 };
52 
53 template <int ResultType>
54 struct PartOf {
55  template <typename T>
56  T operator()(const T& val) const {
57  return val;
58  }
59 };
60 
61 template <>
62 struct PartOf<RealPart> {
63  template <typename T>
64  T operator()(const std::complex<T>& val) const {
65  return val.real();
66  }
67 };
68 
69 template <>
70 struct PartOf<ImagPart> {
71  template <typename T>
72  T operator()(const std::complex<T>& val) const {
73  return val.imag();
74  }
75 };
76 
77 namespace internal {
78 template <typename FFT, typename XprType, int FFTResultType, int FFTDir>
79 struct traits<TensorFFTOp<FFT, XprType, FFTResultType, FFTDir> > : public traits<XprType> {
82  typedef typename std::complex<RealScalar> ComplexScalar;
83  typedef typename XprTraits::Scalar InputScalar;
84  typedef std::conditional_t<FFTResultType == RealPart || FFTResultType == ImagPart, RealScalar, ComplexScalar>
86  typedef typename XprTraits::StorageKind StorageKind;
87  typedef typename XprTraits::Index Index;
88  typedef typename XprType::Nested Nested;
89  typedef std::remove_reference_t<Nested> Nested_;
90  static constexpr int NumDimensions = XprTraits::NumDimensions;
91  static constexpr int Layout = XprTraits::Layout;
93 };
94 
95 template <typename FFT, typename XprType, int FFTResultType, int FFTDirection>
98 };
99 
100 template <typename FFT, typename XprType, int FFTResultType, int FFTDirection>
102  typename eval<TensorFFTOp<FFT, XprType, FFTResultType, FFTDirection> >::type> {
104 };
105 
106 } // end namespace internal
107 
108 template <typename FFT, typename XprType, int FFTResultType, int FFTDir>
109 class TensorFFTOp : public TensorBase<TensorFFTOp<FFT, XprType, FFTResultType, FFTDir>, ReadOnlyAccessors> {
110  public:
113  typedef typename std::complex<RealScalar> ComplexScalar;
114  typedef std::conditional_t<FFTResultType == RealPart || FFTResultType == ImagPart, RealScalar, ComplexScalar>
120 
122 
123  EIGEN_DEVICE_FUNC const FFT& fft() const { return m_fft; }
124 
126 
127  protected:
128  typename XprType::Nested m_xpr;
129  const FFT m_fft;
130 };
131 
132 // Eval as rvalue
133 template <typename FFT, typename ArgType, typename Device, int FFTResultType, int FFTDir>
134 struct TensorEvaluator<const TensorFFTOp<FFT, ArgType, FFTResultType, FFTDir>, Device> {
136  typedef typename XprType::Index Index;
139  typedef typename XprType::Scalar Scalar;
141  typedef typename std::complex<RealScalar> ComplexScalar;
144  typedef typename XprTraits::Scalar InputScalar;
145  typedef std::conditional_t<FFTResultType == RealPart || FFTResultType == ImagPart, RealScalar, ComplexScalar>
152 
154  enum {
155  IsAligned = false,
156  PacketAccess = true,
157  BlockAccess = false,
158  PreferBlockAccess = false,
159  CoordAccess = false,
160  RawAccess = false
161  };
162 
163  //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
165  //===--------------------------------------------------------------------===//
166 
167  EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
168  : m_fft(op.fft()), m_impl(op.expression(), device), m_data(NULL), m_device(device) {
169  const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
170  for (int i = 0; i < NumDims; ++i) {
171  eigen_assert(input_dims[i] > 0);
172  m_dimensions[i] = input_dims[i];
173  }
174 
175  if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
176  m_strides[0] = 1;
177  for (int i = 1; i < NumDims; ++i) {
178  m_strides[i] = m_strides[i - 1] * m_dimensions[i - 1];
179  }
180  } else {
181  m_strides[NumDims - 1] = 1;
182  for (int i = NumDims - 2; i >= 0; --i) {
183  m_strides[i] = m_strides[i + 1] * m_dimensions[i + 1];
184  }
185  }
186  m_size = m_dimensions.TotalSize();
187  }
188 
189  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
190 
192  m_impl.evalSubExprsIfNeeded(NULL);
193  if (data) {
194  evalToBuf(data);
195  return false;
196  } else {
198  (CoeffReturnType*)(m_device.allocate_temp(sizeof(CoeffReturnType) * m_size)));
199  evalToBuf(m_data);
200  return true;
201  }
202  }
203 
205  if (m_data) {
206  m_device.deallocate(m_data);
207  m_data = NULL;
208  }
209  m_impl.cleanup();
210  }
211 
213 
214  template <int LoadMode>
216  return internal::ploadt<PacketReturnType, LoadMode>(m_data + index);
217  }
218 
220  return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize);
221  }
222 
224 
225  private:
228  ComplexScalar* buf =
229  write_to_out ? (ComplexScalar*)data : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * m_size);
230 
231  for (Index i = 0; i < m_size; ++i) {
233  }
234 
235  for (size_t i = 0; i < m_fft.size(); ++i) {
236  Index dim = m_fft[i];
237  eigen_assert(dim >= 0 && dim < NumDims);
238  Index line_len = m_dimensions[dim];
239  eigen_assert(line_len >= 1);
240  ComplexScalar* line_buf = (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * line_len);
241  const bool is_power_of_two = isPowerOfTwo(line_len);
242  const Index good_composite = is_power_of_two ? 0 : findGoodComposite(line_len);
243  const Index log_len = is_power_of_two ? getLog2(line_len) : getLog2(good_composite);
244 
245  ComplexScalar* a =
246  is_power_of_two ? NULL : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * good_composite);
247  ComplexScalar* b =
248  is_power_of_two ? NULL : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * good_composite);
249  ComplexScalar* pos_j_base_powered =
250  is_power_of_two ? NULL : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * (line_len + 1));
251  if (!is_power_of_two) {
252  // Compute twiddle factors
253  // t_n = exp(sqrt(-1) * pi * n^2 / line_len)
254  // for n = 0, 1,..., line_len-1.
255  // For n > 2 we use the recurrence t_n = t_{n-1}^2 / t_{n-2} * t_1^2
256 
257  // The recurrence is correct in exact arithmetic, but causes
258  // numerical issues for large transforms, especially in
259  // single-precision floating point.
260  //
261  // pos_j_base_powered[0] = ComplexScalar(1, 0);
262  // if (line_len > 1) {
263  // const ComplexScalar pos_j_base = ComplexScalar(
264  // numext::cos(M_PI / line_len), numext::sin(M_PI / line_len));
265  // pos_j_base_powered[1] = pos_j_base;
266  // if (line_len > 2) {
267  // const ComplexScalar pos_j_base_sq = pos_j_base * pos_j_base;
268  // for (int i = 2; i < line_len + 1; ++i) {
269  // pos_j_base_powered[i] = pos_j_base_powered[i - 1] *
270  // pos_j_base_powered[i - 1] /
271  // pos_j_base_powered[i - 2] *
272  // pos_j_base_sq;
273  // }
274  // }
275  // }
276  // TODO(rmlarsen): Find a way to use Eigen's vectorized sin
277  // and cosine functions here.
278  for (int j = 0; j < line_len + 1; ++j) {
279  double arg = ((EIGEN_PI * j) * j) / line_len;
280  std::complex<double> tmp(numext::cos(arg), numext::sin(arg));
281  pos_j_base_powered[j] = static_cast<ComplexScalar>(tmp);
282  }
283  }
284 
285  for (Index partial_index = 0; partial_index < m_size / line_len; ++partial_index) {
286  const Index base_offset = getBaseOffsetFromIndex(partial_index, dim);
287 
288  // get data into line_buf
289  const Index stride = m_strides[dim];
290  if (stride == 1) {
291  m_device.memcpy(line_buf, &buf[base_offset], line_len * sizeof(ComplexScalar));
292  } else {
293  Index offset = base_offset;
294  for (int j = 0; j < line_len; ++j, offset += stride) {
295  line_buf[j] = buf[offset];
296  }
297  }
298 
299  // process the line
300  if (is_power_of_two) {
301  processDataLineCooleyTukey(line_buf, line_len, log_len);
302  } else {
303  processDataLineBluestein(line_buf, line_len, good_composite, log_len, a, b, pos_j_base_powered);
304  }
305 
306  // write back
307  if (FFTDir == FFT_FORWARD && stride == 1) {
308  m_device.memcpy(&buf[base_offset], line_buf, line_len * sizeof(ComplexScalar));
309  } else {
310  Index offset = base_offset;
311  const ComplexScalar div_factor = ComplexScalar(1.0 / line_len, 0);
312  for (int j = 0; j < line_len; ++j, offset += stride) {
313  buf[offset] = (FFTDir == FFT_FORWARD) ? line_buf[j] : line_buf[j] * div_factor;
314  }
315  }
316  }
317  m_device.deallocate(line_buf);
318  if (!is_power_of_two) {
319  m_device.deallocate(a);
320  m_device.deallocate(b);
321  m_device.deallocate(pos_j_base_powered);
322  }
323  }
324 
325  if (!write_to_out) {
326  for (Index i = 0; i < m_size; ++i) {
327  data[i] = PartOf<FFTResultType>()(buf[i]);
328  }
329  m_device.deallocate(buf);
330  }
331  }
332 
334  eigen_assert(x > 0);
335  return !(x & (x - 1));
336  }
337 
338  // The composite number for padding, used in Bluestein's FFT algorithm
340  Index i = 2;
341  while (i < 2 * n - 1) i *= 2;
342  return i;
343  }
344 
346  Index log2m = 0;
347  while (m >>= 1) log2m++;
348  return log2m;
349  }
350 
351  // Call Cooley Tukey algorithm directly, data length must be power of 2
353  Index log_len) {
354  eigen_assert(isPowerOfTwo(line_len));
355  scramble_FFT(line_buf, line_len);
356  compute_1D_Butterfly<FFTDir>(line_buf, line_len, log_len);
357  }
358 
359  // Call Bluestein's FFT algorithm, m is a good composite number greater than (2 * n - 1), used as the padding length
361  Index good_composite, Index log_len,
363  const ComplexScalar* pos_j_base_powered) {
364  Index n = line_len;
365  Index m = good_composite;
366  ComplexScalar* data = line_buf;
367 
368  for (Index i = 0; i < n; ++i) {
369  if (FFTDir == FFT_FORWARD) {
370  a[i] = data[i] * numext::conj(pos_j_base_powered[i]);
371  } else {
372  a[i] = data[i] * pos_j_base_powered[i];
373  }
374  }
375  for (Index i = n; i < m; ++i) {
376  a[i] = ComplexScalar(0, 0);
377  }
378 
379  for (Index i = 0; i < n; ++i) {
380  if (FFTDir == FFT_FORWARD) {
381  b[i] = pos_j_base_powered[i];
382  } else {
383  b[i] = numext::conj(pos_j_base_powered[i]);
384  }
385  }
386  for (Index i = n; i < m - n; ++i) {
387  b[i] = ComplexScalar(0, 0);
388  }
389  for (Index i = m - n; i < m; ++i) {
390  if (FFTDir == FFT_FORWARD) {
391  b[i] = pos_j_base_powered[m - i];
392  } else {
393  b[i] = numext::conj(pos_j_base_powered[m - i]);
394  }
395  }
396 
397  scramble_FFT(a, m);
398  compute_1D_Butterfly<FFT_FORWARD>(a, m, log_len);
399 
400  scramble_FFT(b, m);
401  compute_1D_Butterfly<FFT_FORWARD>(b, m, log_len);
402 
403  for (Index i = 0; i < m; ++i) {
404  a[i] *= b[i];
405  }
406 
407  scramble_FFT(a, m);
408  compute_1D_Butterfly<FFT_REVERSE>(a, m, log_len);
409 
410  // Do the scaling after ifft
411  for (Index i = 0; i < m; ++i) {
412  a[i] /= m;
413  }
414 
415  for (Index i = 0; i < n; ++i) {
416  if (FFTDir == FFT_FORWARD) {
417  data[i] = a[i] * numext::conj(pos_j_base_powered[i]);
418  } else {
419  data[i] = a[i] * pos_j_base_powered[i];
420  }
421  }
422  }
423 
425  eigen_assert(isPowerOfTwo(n));
426  Index j = 1;
427  for (Index i = 1; i < n; ++i) {
428  if (j > i) {
429  std::swap(data[j - 1], data[i - 1]);
430  }
431  Index m = n >> 1;
432  while (m >= 2 && j > m) {
433  j -= m;
434  m >>= 1;
435  }
436  j += m;
437  }
438  }
439 
440  template <int Dir>
442  ComplexScalar tmp = data[1];
443  data[1] = data[0] - data[1];
444  data[0] += tmp;
445  }
446 
447  template <int Dir>
449  ComplexScalar tmp[4];
450  tmp[0] = data[0] + data[1];
451  tmp[1] = data[0] - data[1];
452  tmp[2] = data[2] + data[3];
453  if (Dir == FFT_FORWARD) {
454  tmp[3] = ComplexScalar(0.0, -1.0) * (data[2] - data[3]);
455  } else {
456  tmp[3] = ComplexScalar(0.0, 1.0) * (data[2] - data[3]);
457  }
458  data[0] = tmp[0] + tmp[2];
459  data[1] = tmp[1] + tmp[3];
460  data[2] = tmp[0] - tmp[2];
461  data[3] = tmp[1] - tmp[3];
462  }
463 
464  template <int Dir>
466  ComplexScalar tmp_1[8];
467  ComplexScalar tmp_2[8];
468 
469  tmp_1[0] = data[0] + data[1];
470  tmp_1[1] = data[0] - data[1];
471  tmp_1[2] = data[2] + data[3];
472  if (Dir == FFT_FORWARD) {
473  tmp_1[3] = (data[2] - data[3]) * ComplexScalar(0, -1);
474  } else {
475  tmp_1[3] = (data[2] - data[3]) * ComplexScalar(0, 1);
476  }
477  tmp_1[4] = data[4] + data[5];
478  tmp_1[5] = data[4] - data[5];
479  tmp_1[6] = data[6] + data[7];
480  if (Dir == FFT_FORWARD) {
481  tmp_1[7] = (data[6] - data[7]) * ComplexScalar(0, -1);
482  } else {
483  tmp_1[7] = (data[6] - data[7]) * ComplexScalar(0, 1);
484  }
485  tmp_2[0] = tmp_1[0] + tmp_1[2];
486  tmp_2[1] = tmp_1[1] + tmp_1[3];
487  tmp_2[2] = tmp_1[0] - tmp_1[2];
488  tmp_2[3] = tmp_1[1] - tmp_1[3];
489  tmp_2[4] = tmp_1[4] + tmp_1[6];
490 // SQRT2DIV2 = sqrt(2)/2
491 #define SQRT2DIV2 0.7071067811865476
492  if (Dir == FFT_FORWARD) {
493  tmp_2[5] = (tmp_1[5] + tmp_1[7]) * ComplexScalar(SQRT2DIV2, -SQRT2DIV2);
494  tmp_2[6] = (tmp_1[4] - tmp_1[6]) * ComplexScalar(0, -1);
495  tmp_2[7] = (tmp_1[5] - tmp_1[7]) * ComplexScalar(-SQRT2DIV2, -SQRT2DIV2);
496  } else {
497  tmp_2[5] = (tmp_1[5] + tmp_1[7]) * ComplexScalar(SQRT2DIV2, SQRT2DIV2);
498  tmp_2[6] = (tmp_1[4] - tmp_1[6]) * ComplexScalar(0, 1);
499  tmp_2[7] = (tmp_1[5] - tmp_1[7]) * ComplexScalar(-SQRT2DIV2, SQRT2DIV2);
500  }
501  data[0] = tmp_2[0] + tmp_2[4];
502  data[1] = tmp_2[1] + tmp_2[5];
503  data[2] = tmp_2[2] + tmp_2[6];
504  data[3] = tmp_2[3] + tmp_2[7];
505  data[4] = tmp_2[0] - tmp_2[4];
506  data[5] = tmp_2[1] - tmp_2[5];
507  data[6] = tmp_2[2] - tmp_2[6];
508  data[7] = tmp_2[3] - tmp_2[7];
509  }
510 
511  template <int Dir>
513  // Original code:
514  // RealScalar wtemp = std::sin(M_PI/n);
515  // RealScalar wpi = -std::sin(2 * M_PI/n);
516  const RealScalar wtemp = m_sin_PI_div_n_LUT[n_power_of_2];
517  const RealScalar wpi =
518  (Dir == FFT_FORWARD) ? m_minus_sin_2_PI_div_n_LUT[n_power_of_2] : -m_minus_sin_2_PI_div_n_LUT[n_power_of_2];
519 
520  const ComplexScalar wp(wtemp, wpi);
521  const ComplexScalar wp_one = wp + ComplexScalar(1, 0);
522  const ComplexScalar wp_one_2 = wp_one * wp_one;
523  const ComplexScalar wp_one_3 = wp_one_2 * wp_one;
524  const ComplexScalar wp_one_4 = wp_one_3 * wp_one;
525  const Index n2 = n / 2;
526  ComplexScalar w(1.0, 0.0);
527  for (Index i = 0; i < n2; i += 4) {
528  ComplexScalar temp0(data[i + n2] * w);
529  ComplexScalar temp1(data[i + 1 + n2] * w * wp_one);
530  ComplexScalar temp2(data[i + 2 + n2] * w * wp_one_2);
531  ComplexScalar temp3(data[i + 3 + n2] * w * wp_one_3);
532  w = w * wp_one_4;
533 
534  data[i + n2] = data[i] - temp0;
535  data[i] += temp0;
536 
537  data[i + 1 + n2] = data[i + 1] - temp1;
538  data[i + 1] += temp1;
539 
540  data[i + 2 + n2] = data[i + 2] - temp2;
541  data[i + 2] += temp2;
542 
543  data[i + 3 + n2] = data[i + 3] - temp3;
544  data[i + 3] += temp3;
545  }
546  }
547 
548  template <int Dir>
550  eigen_assert(isPowerOfTwo(n));
551  if (n > 8) {
552  compute_1D_Butterfly<Dir>(data, n / 2, n_power_of_2 - 1);
553  compute_1D_Butterfly<Dir>(data + n / 2, n / 2, n_power_of_2 - 1);
554  butterfly_1D_merge<Dir>(data, n, n_power_of_2);
555  } else if (n == 8) {
556  butterfly_8<Dir>(data);
557  } else if (n == 4) {
558  butterfly_4<Dir>(data);
559  } else if (n == 2) {
560  butterfly_2<Dir>(data);
561  }
562  }
563 
565  Index result = 0;
566 
567  if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
568  for (int i = NumDims - 1; i > omitted_dim; --i) {
569  const Index partial_m_stride = m_strides[i] / m_dimensions[omitted_dim];
570  const Index idx = index / partial_m_stride;
571  index -= idx * partial_m_stride;
572  result += idx * m_strides[i];
573  }
574  result += index;
575  } else {
576  for (Index i = 0; i < omitted_dim; ++i) {
577  const Index partial_m_stride = m_strides[i] / m_dimensions[omitted_dim];
578  const Index idx = index / partial_m_stride;
579  index -= idx * partial_m_stride;
580  result += idx * m_strides[i];
581  }
582  result += index;
583  }
584  // Value of index_coords[omitted_dim] is not determined to this step
585  return result;
586  }
587 
589  Index result = base + offset * m_strides[omitted_dim];
590  return result;
591  }
592 
593  protected:
601 
602  // This will support a maximum FFT size of 2^32 for each dimension
603  // m_sin_PI_div_n_LUT[i] = (-2) * std::sin(M_PI / std::pow(2,i)) ^ 2;
604  const RealScalar m_sin_PI_div_n_LUT[32] = {RealScalar(0.0),
605  RealScalar(-2),
606  RealScalar(-0.999999999999999),
607  RealScalar(-0.292893218813453),
608  RealScalar(-0.0761204674887130),
609  RealScalar(-0.0192147195967696),
610  RealScalar(-0.00481527332780311),
611  RealScalar(-0.00120454379482761),
612  RealScalar(-3.01181303795779e-04),
613  RealScalar(-7.52981608554592e-05),
614  RealScalar(-1.88247173988574e-05),
615  RealScalar(-4.70619042382852e-06),
616  RealScalar(-1.17654829809007e-06),
617  RealScalar(-2.94137117780840e-07),
618  RealScalar(-7.35342821488550e-08),
619  RealScalar(-1.83835707061916e-08),
620  RealScalar(-4.59589268710903e-09),
621  RealScalar(-1.14897317243732e-09),
622  RealScalar(-2.87243293150586e-10),
623  RealScalar(-7.18108232902250e-11),
624  RealScalar(-1.79527058227174e-11),
625  RealScalar(-4.48817645568941e-12),
626  RealScalar(-1.12204411392298e-12),
627  RealScalar(-2.80511028480785e-13),
628  RealScalar(-7.01277571201985e-14),
629  RealScalar(-1.75319392800498e-14),
630  RealScalar(-4.38298482001247e-15),
631  RealScalar(-1.09574620500312e-15),
632  RealScalar(-2.73936551250781e-16),
633  RealScalar(-6.84841378126949e-17),
634  RealScalar(-1.71210344531737e-17),
635  RealScalar(-4.28025861329343e-18)};
636 
637  // m_minus_sin_2_PI_div_n_LUT[i] = -std::sin(2 * M_PI / std::pow(2,i));
638  const RealScalar m_minus_sin_2_PI_div_n_LUT[32] = {RealScalar(0.0),
639  RealScalar(0.0),
640  RealScalar(-1.00000000000000e+00),
641  RealScalar(-7.07106781186547e-01),
642  RealScalar(-3.82683432365090e-01),
643  RealScalar(-1.95090322016128e-01),
644  RealScalar(-9.80171403295606e-02),
645  RealScalar(-4.90676743274180e-02),
646  RealScalar(-2.45412285229123e-02),
647  RealScalar(-1.22715382857199e-02),
648  RealScalar(-6.13588464915448e-03),
649  RealScalar(-3.06795676296598e-03),
650  RealScalar(-1.53398018628477e-03),
651  RealScalar(-7.66990318742704e-04),
652  RealScalar(-3.83495187571396e-04),
653  RealScalar(-1.91747597310703e-04),
654  RealScalar(-9.58737990959773e-05),
655  RealScalar(-4.79368996030669e-05),
656  RealScalar(-2.39684498084182e-05),
657  RealScalar(-1.19842249050697e-05),
658  RealScalar(-5.99211245264243e-06),
659  RealScalar(-2.99605622633466e-06),
660  RealScalar(-1.49802811316901e-06),
661  RealScalar(-7.49014056584716e-07),
662  RealScalar(-3.74507028292384e-07),
663  RealScalar(-1.87253514146195e-07),
664  RealScalar(-9.36267570730981e-08),
665  RealScalar(-4.68133785365491e-08),
666  RealScalar(-2.34066892682746e-08),
667  RealScalar(-1.17033446341373e-08),
668  RealScalar(-5.85167231706864e-09),
669  RealScalar(-2.92583615853432e-09)};
670 };
671 
672 } // end namespace Eigen
673 
674 #endif // EIGEN_CXX11_TENSOR_TENSOR_FFT_H
AnnoyingScalar conj(const AnnoyingScalar &x)
Definition: AnnoyingScalar.h:133
int i
Definition: BiCGSTAB_step_by_step.cpp:9
const unsigned n
Definition: CG3DPackingUnitTest.cpp:11
#define EIGEN_ALWAYS_INLINE
Definition: Macros.h:845
#define EIGEN_DEVICE_FUNC
Definition: Macros.h:892
#define eigen_assert(x)
Definition: Macros.h:910
#define EIGEN_STRONG_INLINE
Definition: Macros.h:834
#define EIGEN_PI
Definition: MathFunctions.h:16
RowVector3d w
Definition: Matrix_resize_int.cpp:3
#define SQRT2DIV2
#define EIGEN_DEVICE_REF
Definition: TensorMacros.h:34
Scalar * b
Definition: benchVecAdd.cpp:17
SCALAR Scalar
Definition: bench_gemm.cpp:45
NumTraits< Scalar >::Real RealScalar
Definition: bench_gemm.cpp:46
Generic expression where a coefficient-wise binary operator is applied to two expressions.
Definition: CwiseBinaryOp.h:79
The tensor base class.
Definition: TensorBase.h:1026
Definition: TensorFFT.h:109
const FFT m_fft
Definition: TensorFFT.h:129
Eigen::internal::nested< TensorFFTOp >::type Nested
Definition: TensorFFT.h:117
EIGEN_DEVICE_FUNC const FFT & fft() const
Definition: TensorFFT.h:123
OutputScalar CoeffReturnType
Definition: TensorFFT.h:116
Eigen::NumTraits< Scalar >::Real RealScalar
Definition: TensorFFT.h:112
Eigen::internal::traits< TensorFFTOp >::Scalar Scalar
Definition: TensorFFT.h:111
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorFFTOp(const XprType &expr, const FFT &fft)
Definition: TensorFFT.h:121
EIGEN_DEVICE_FUNC const internal::remove_all_t< typename XprType::Nested > & expression() const
Definition: TensorFFT.h:125
Eigen::internal::traits< TensorFFTOp >::StorageKind StorageKind
Definition: TensorFFT.h:118
std::conditional_t< FFTResultType==RealPart||FFTResultType==ImagPart, RealScalar, ComplexScalar > OutputScalar
Definition: TensorFFT.h:115
Eigen::internal::traits< TensorFFTOp >::Index Index
Definition: TensorFFT.h:119
std::complex< RealScalar > ComplexScalar
Definition: TensorFFT.h:113
XprType::Nested m_xpr
Definition: TensorFFT.h:128
Definition: TensorCostModel.h:28
Definition: TensorBlock.h:566
@ ColMajor
Definition: Constants.h:318
EIGEN_BLAS_FUNC() swap(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy)
Definition: level1_impl.h:117
const Scalar * a
Definition: level2_cplx_impl.h:32
int * m
Definition: level2_cplx_impl.h:294
char char * op
Definition: level2_impl.h:374
Eigen::Matrix< Scalar, Dynamic, Dynamic, ColMajor > tmp
Definition: level3_impl.h:365
typename remove_all< T >::type remove_all_t
Definition: Meta.h:142
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T cos(const T &x)
Definition: MathFunctions.h:1559
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T sin(const T &x)
Definition: MathFunctions.h:1581
Namespace containing all symbols from the Eigen library.
Definition: bench_norm.cpp:70
FFTDirection
Definition: TensorForwardDeclarations.h:170
@ FFT_FORWARD
Definition: TensorForwardDeclarations.h:170
std::array< T, N > array
Definition: EmulateArray.h:231
squared absolute value
Definition: GlobalFunctions.h:87
FFTResultType
Definition: TensorForwardDeclarations.h:168
@ ImagPart
Definition: TensorForwardDeclarations.h:168
@ RealPart
Definition: TensorForwardDeclarations.h:168
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:83
val
Definition: calibrate.py:119
Definition: Eigen_Colamd.h:49
list x
Definition: plotDoE.py:28
Definition: Constants.h:519
T Real
Definition: NumTraits.h:183
EIGEN_DEVICE_FUNC std::complex< T > operator()(const std::complex< T > &val) const
Definition: TensorFFT.h:48
EIGEN_DEVICE_FUNC std::complex< T > operator()(const T &val) const
Definition: TensorFFT.h:40
Definition: TensorFFT.h:30
EIGEN_DEVICE_FUNC T operator()(const T &val) const
Definition: TensorFFT.h:32
T operator()(const std::complex< T > &val) const
Definition: TensorFFT.h:72
T operator()(const std::complex< T > &val) const
Definition: TensorFFT.h:64
Definition: TensorFFT.h:54
T operator()(const T &val) const
Definition: TensorFFT.h:56
Definition: TensorForwardDeclarations.h:42
internal::TensorBlockNotImplemented TensorBlock
Definition: TensorFFT.h:164
EIGEN_DEVICE_FUNC EvaluatorPointerType data() const
Definition: TensorFFT.h:223
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void butterfly_8(ComplexScalar *data)
Definition: TensorFFT.h:465
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalToBuf(EvaluatorPointerType data)
Definition: TensorFFT.h:226
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void processDataLineCooleyTukey(ComplexScalar *line_buf, Index line_len, Index log_len)
Definition: TensorFFT.h:352
TensorEvaluator< ArgType, Device > m_impl
Definition: TensorFFT.h:598
TensorFFTOp< FFT, ArgType, FFTResultType, FFTDir > XprType
Definition: TensorFFT.h:135
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index getIndexFromOffset(Index base, Index omitted_dim, Index offset) const
Definition: TensorFFT.h:588
EIGEN_STRONG_INLINE TensorEvaluator(const XprType &op, const Device &device)
Definition: TensorFFT.h:167
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE bool isPowerOfTwo(Index x)
Definition: TensorFFT.h:333
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index getBaseOffsetFromIndex(Index index, Index omitted_dim) const
Definition: TensorFFT.h:564
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void processDataLineBluestein(ComplexScalar *line_buf, Index line_len, Index good_composite, Index log_len, ComplexScalar *a, ComplexScalar *b, const ComplexScalar *pos_j_base_powered)
Definition: TensorFFT.h:360
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE CoeffReturnType coeff(Index index) const
Definition: TensorFFT.h:212
PacketType< OutputScalar, Device >::type PacketReturnType
Definition: TensorFFT.h:148
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void butterfly_1D_merge(ComplexScalar *data, Index n, Index n_power_of_2)
Definition: TensorFFT.h:512
Eigen::NumTraits< Scalar >::Real RealScalar
Definition: TensorFFT.h:140
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index findGoodComposite(Index n)
Definition: TensorFFT.h:339
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const
Definition: TensorFFT.h:219
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void scramble_FFT(ComplexScalar *data, Index n)
Definition: TensorFFT.h:424
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions & dimensions() const
Definition: TensorFFT.h:189
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void butterfly_4(ComplexScalar *data)
Definition: TensorFFT.h:448
std::conditional_t< FFTResultType==RealPart||FFTResultType==ImagPart, RealScalar, ComplexScalar > OutputScalar
Definition: TensorFFT.h:146
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void butterfly_2(ComplexScalar *data)
Definition: TensorFFT.h:441
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void compute_1D_Butterfly(ComplexScalar *data, Index n, Index n_power_of_2)
Definition: TensorFFT.h:549
StorageMemory< CoeffReturnType, Device > Storage
Definition: TensorFFT.h:150
EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data)
Definition: TensorFFT.h:191
TensorEvaluator< ArgType, Device >::Dimensions InputDimensions
Definition: TensorFFT.h:142
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketReturnType packet(Index index) const
Definition: TensorFFT.h:215
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index getLog2(Index m)
Definition: TensorFFT.h:345
A cost model used to limit the number of threads used for evaluating tensor expression.
Definition: TensorEvaluator.h:31
static constexpr int Layout
Definition: TensorEvaluator.h:46
const Device EIGEN_DEVICE_REF m_device
Definition: TensorEvaluator.h:170
Storage::Type EvaluatorPointerType
Definition: TensorEvaluator.h:41
@ PacketAccess
Definition: TensorEvaluator.h:50
@ IsAligned
Definition: TensorEvaluator.h:49
static constexpr int PacketSize
Definition: TensorEvaluator.h:38
EIGEN_DEVICE_FUNC EvaluatorPointerType data() const
Definition: TensorEvaluator.h:165
EvaluatorPointerType m_data
Definition: TensorEvaluator.h:168
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions & dimensions() const
Definition: TensorEvaluator.h:69
Definition: Meta.h:305
const TensorFFTOp< FFT, XprType, FFTResultType, FFTDirection > & type
Definition: TensorFFT.h:97
Definition: XprHelper.h:427
Definition: Meta.h:205
Definition: TensorTraits.h:152
ref_selector< T >::type type
Definition: TensorTraits.h:153
std::conditional_t< FFTResultType==RealPart||FFTResultType==ImagPart, RealScalar, ComplexScalar > OutputScalar
Definition: TensorFFT.h:85
traits< XprType >::PointerType PointerType
Definition: TensorFFT.h:92
std::complex< RealScalar > ComplexScalar
Definition: TensorFFT.h:82
std::remove_reference_t< Nested > Nested_
Definition: TensorFFT.h:89
XprTraits::StorageKind StorageKind
Definition: TensorFFT.h:86
NumTraits< typename XprTraits::Scalar >::Real RealScalar
Definition: TensorFFT.h:81
Definition: ForwardDeclarations.h:21
Definition: GenericPacketMath.h:134
std::ptrdiff_t j
Definition: tut_arithmetic_redux_minmax.cpp:2