TensorMorphing.h
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 
10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H
11 #define EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H
12 
13 // IWYU pragma: private
14 #include "./InternalHeaderCheck.h"
15 
16 namespace Eigen {
17 
25 namespace internal {
26 template <typename NewDimensions, typename XprType>
27 struct traits<TensorReshapingOp<NewDimensions, XprType>> : public traits<XprType> {
28  typedef typename XprType::Scalar Scalar;
30  typedef typename XprTraits::StorageKind StorageKind;
31  typedef typename XprTraits::Index Index;
32  typedef typename XprType::Nested Nested;
33  typedef std::remove_reference_t<Nested> Nested_;
34  static constexpr int NumDimensions = array_size<NewDimensions>::value;
35  static constexpr int Layout = XprTraits::Layout;
36  typedef typename XprTraits::PointerType PointerType;
37 };
38 
39 template <typename NewDimensions, typename XprType>
40 struct eval<TensorReshapingOp<NewDimensions, XprType>, Eigen::Dense> {
42 };
43 
44 template <typename NewDimensions, typename XprType>
45 struct nested<TensorReshapingOp<NewDimensions, XprType>, 1,
46  typename eval<TensorReshapingOp<NewDimensions, XprType>>::type> {
48 };
49 
50 } // end namespace internal
51 
52 template <typename NewDimensions, typename XprType>
53 class TensorReshapingOp : public TensorBase<TensorReshapingOp<NewDimensions, XprType>, WriteAccessors> {
54  public:
57  typedef std::remove_const_t<typename XprType::CoeffReturnType> CoeffReturnType;
61 
62  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorReshapingOp(const XprType& expr, const NewDimensions& dims)
63  : m_xpr(expr), m_dims(dims) {}
64 
65  EIGEN_DEVICE_FUNC const NewDimensions& dimensions() const { return m_dims; }
66 
68 
70 
71  protected:
72  typename XprType::Nested m_xpr;
73  const NewDimensions m_dims;
74 };
75 
76 // Eval as rvalue
77 template <typename NewDimensions, typename ArgType, typename Device>
78 struct TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device> {
80  typedef NewDimensions Dimensions;
81 
82  typedef typename XprType::Index Index;
83  typedef typename XprType::Scalar Scalar;
89 
90  static constexpr int NumOutputDims = internal::array_size<Dimensions>::value;
91  static constexpr int NumInputDims =
93 
95  // We do not use layout information to determine reshaping kind.
96  // Depending on the layout `N` can be inner or outer dimension.
97  OneByN = 0, // expr.reshape(1, N)
98  NByOne = 1, // expr.reshape(N, 1)
99  Runtime = 2 // Reshape dimensions are dynamic (specified at runtime).
100  };
101 
102  // clang-format off
103  static const ReshapingKind kind =
104  (NumOutputDims == 2 && internal::index_statically_eq<NewDimensions>(/*index=*/0, /*value=*/1)) ? OneByN
105  : (NumOutputDims == 2 && internal::index_statically_eq<NewDimensions>(/*index=*/1, /*value=*/1)) ? NByOne
106  : Runtime;
107  // clang-format on
108 
110  enum {
113  // For trivial reshapes with raw access to underlying data we will provide
114  // zero overhead block access.
115  // TODO(ezhulenev): Consider adding block access without raw access?
116  BlockAccess = TensorEvaluator<ArgType, Device>::RawAccess && NumInputDims > 0 && NumOutputDims > 0,
117  PreferBlockAccess = false,
118  CoordAccess = false, // to be implemented
120  };
121 
122  typedef std::remove_const_t<Scalar> ScalarNoConst;
123 
124  //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
127 
129  //===--------------------------------------------------------------------===//
130 
131  EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
132  : m_impl(op.expression(), device), m_dimensions(op.dimensions()) {
133  // The total size of the reshaped tensor must be equal to the total size
134  // of the input tensor.
135  eigen_assert(internal::array_prod(m_impl.dimensions()) == internal::array_prod(op.dimensions()));
136  }
137 
138  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
139 
140 #ifdef EIGEN_USE_THREADS
141  template <typename EvalSubExprsCallback>
142  EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(EvaluatorPointerType data, EvalSubExprsCallback done) {
143  m_impl.evalSubExprsIfNeededAsync(data, std::move(done));
144  }
145 #endif
146 
147  EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) { return m_impl.evalSubExprsIfNeeded(data); }
148  EIGEN_STRONG_INLINE void cleanup() { m_impl.cleanup(); }
149 
150  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { return m_impl.coeff(index); }
151 
152  template <int LoadMode>
154  return m_impl.template packet<LoadMode>(index);
155  }
156 
158  return m_impl.costPerCoeff(vectorized);
159  }
160 
163  }
164 
165  // required in block(OutputTensorBlock* output_block) const
166  // For C++03 compatibility this must be defined outside the method
167  struct BlockIteratorState {
172  };
173 
175  bool /*root_of_expr_ast*/ = false) const {
176  eigen_assert(m_impl.data() != NULL);
177  eigen_assert((kind == Runtime) || (kind == OneByN && desc.dimensions()[0] == 1) ||
178  (kind == NByOne && desc.dimensions()[1] == 1));
179 
180  if (kind == OneByN || kind == NByOne) {
181  // We can guarantee at compile time that block is just a contiguous slice
182  // of the underlying expression memory buffer.
183  return TensorBlock(internal::TensorBlockKind::kView, m_impl.data() + desc.offset(), desc.dimensions());
184  } else {
185  // This will do additional runtime checks, and in the end it might be also
186  // a view, or it might be a block materialized in the temporary buffer.
187  return TensorBlock::materialize(m_impl.data(), m_dimensions, desc, scratch);
188  }
189  }
190 
191  EIGEN_DEVICE_FUNC typename Storage::Type data() const { return constCast(m_impl.data()); }
192 
193  EIGEN_DEVICE_FUNC const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
194 
195  protected:
197  NewDimensions m_dimensions;
198 };
199 
200 // Eval as lvalue
201 template <typename NewDimensions, typename ArgType, typename Device>
202 struct TensorEvaluator<TensorReshapingOp<NewDimensions, ArgType>, Device>
203  : public TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
204 
205 {
208  typedef NewDimensions Dimensions;
209 
211  enum {
215  PreferBlockAccess = false,
216  CoordAccess = false, // to be implemented
218  };
219 
220  EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) : Base(op, device) {}
221 
222  typedef typename XprType::Index Index;
223  typedef typename XprType::Scalar Scalar;
226 
227  //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
229  //===--------------------------------------------------------------------===//
230 
232  return this->m_impl.coeffRef(index);
233  }
234 
235  template <int StoreMode>
237  this->m_impl.template writePacket<StoreMode>(index, x);
238  }
239 
240  template <typename TensorBlock>
242  eigen_assert(this->m_impl.data() != NULL);
243 
244  typedef typename TensorBlock::XprType TensorBlockExpr;
246  TensorBlockAssign;
247 
248  TensorBlockAssign::Run(TensorBlockAssign::target(desc.dimensions(), internal::strides<Layout>(this->dimensions()),
249  this->m_impl.data(), desc.offset()),
250  block.expr());
251  }
252 };
253 
261 namespace internal {
262 template <typename StartIndices, typename Sizes, typename XprType>
263 struct traits<TensorSlicingOp<StartIndices, Sizes, XprType>> : public traits<XprType> {
264  typedef typename XprType::Scalar Scalar;
266  typedef typename XprTraits::StorageKind StorageKind;
267  typedef typename XprTraits::Index Index;
268  typedef typename XprType::Nested Nested;
269  typedef std::remove_reference_t<Nested> Nested_;
270  static constexpr int NumDimensions = array_size<StartIndices>::value;
271  static constexpr int Layout = XprTraits::Layout;
272  typedef typename XprTraits::PointerType PointerType;
273 };
274 
275 template <typename StartIndices, typename Sizes, typename XprType>
276 struct eval<TensorSlicingOp<StartIndices, Sizes, XprType>, Eigen::Dense> {
278 };
279 
280 template <typename StartIndices, typename Sizes, typename XprType>
281 struct nested<TensorSlicingOp<StartIndices, Sizes, XprType>, 1,
282  typename eval<TensorSlicingOp<StartIndices, Sizes, XprType>>::type> {
284 };
285 
286 } // end namespace internal
287 
288 template <typename StartIndices, typename Sizes, typename XprType>
289 class TensorSlicingOp : public TensorBase<TensorSlicingOp<StartIndices, Sizes, XprType>> {
290  public:
293  typedef typename XprType::CoeffReturnType CoeffReturnType;
297 
298  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorSlicingOp(const XprType& expr, const StartIndices& indices,
299  const Sizes& sizes)
300  : m_xpr(expr), m_indices(indices), m_sizes(sizes) {}
301 
302  EIGEN_DEVICE_FUNC const StartIndices& startIndices() const { return m_indices; }
303  EIGEN_DEVICE_FUNC const Sizes& sizes() const { return m_sizes; }
304 
306 
308 
309  protected:
310  typename XprType::Nested m_xpr;
311  const StartIndices m_indices;
312  const Sizes m_sizes;
313 };
314 
315 namespace internal {
316 
317 // Fixme: figure out the exact threshold
318 template <typename Index, typename Device, bool BlockAccess>
320  EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(const Device& device) : threshold_(2 * device.numThreads()) {}
321  EIGEN_DEVICE_FUNC bool operator()(Index total, Index contiguous) const {
322  const bool prefer_block_evaluation = BlockAccess && total > 32 * 1024;
323  return !prefer_block_evaluation && contiguous > threshold_;
324  }
325 
326  private:
328 };
329 
330 // It is very expensive to start the memcpy kernel on GPU: we therefore only
331 // use it for large copies.
332 #ifdef EIGEN_USE_GPU
333 template <typename Index, bool BlockAccess>
334 struct MemcpyTriggerForSlicing<Index, GpuDevice, BlockAccess> {
335  EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(const GpuDevice&) {}
336  EIGEN_DEVICE_FUNC bool operator()(Index, Index contiguous) const { return contiguous > 4 * 1024 * 1024; }
337 };
338 #endif
339 
340 // It is very expensive to start the memcpy kernel on GPU: we therefore only
341 // use it for large copies.
342 #ifdef EIGEN_USE_SYCL
343 template <typename Index, bool BlockAccess>
344 struct MemcpyTriggerForSlicing<Index, Eigen::SyclDevice, BlockAccess> {
345  EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(const SyclDevice&) {}
346  EIGEN_DEVICE_FUNC bool operator()(Index, Index contiguous) const { return contiguous > 4 * 1024 * 1024; }
347 };
348 #endif
349 
350 } // namespace internal
351 
352 // Eval as rvalue
353 template <typename StartIndices, typename Sizes, typename ArgType, typename Device>
354 struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device> {
356  static constexpr int NumDims = internal::array_size<Sizes>::value;
357 
358  typedef typename XprType::Index Index;
359  typedef typename XprType::Scalar Scalar;
362  typedef Sizes Dimensions;
366 
368  enum {
369  // Alignment can't be guaranteed at compile time since it depends on the
370  // slice offsets and sizes.
371  IsAligned = false,
374  // FIXME: Temporary workaround for bug in slicing of bool tensors.
376  PreferBlockAccess = true,
377  CoordAccess = false,
378  RawAccess = false
379  };
380 
381  typedef std::remove_const_t<Scalar> ScalarNoConst;
382 
383  //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
386 
387  // Tensor slicing does not change the block type.
389  //===--------------------------------------------------------------------===//
390 
391  EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
392  : m_impl(op.expression(), device), m_device(device), m_dimensions(op.sizes()), m_offsets(op.startIndices()) {
393  m_is_identity = true;
395  eigen_assert(m_impl.dimensions()[i] >= op.sizes()[i] + op.startIndices()[i]);
396  if (m_impl.dimensions()[i] != op.sizes()[i] || op.startIndices()[i] != 0) {
397  m_is_identity = false;
398  }
399  }
400 
401  // No strides for scalars.
402  if (NumDims == 0) return;
403 
404  const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
405  const Sizes& output_dims = op.sizes();
406  if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
407  m_inputStrides[0] = 1;
408  for (int i = 1; i < NumDims; ++i) {
409  m_inputStrides[i] = m_inputStrides[i - 1] * input_dims[i - 1];
410  }
411 
412  // Don't initialize m_fastOutputStrides[0] since it won't ever be accessed.
413  m_outputStrides[0] = 1;
414  for (int i = 1; i < NumDims; ++i) {
415  m_outputStrides[i] = m_outputStrides[i - 1] * output_dims[i - 1];
416  m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i] > 0 ? m_outputStrides[i] : 1);
417  }
418  } else {
419  m_inputStrides[NumDims - 1] = 1;
420  for (int i = NumDims - 2; i >= 0; --i) {
421  m_inputStrides[i] = m_inputStrides[i + 1] * input_dims[i + 1];
422  }
423 
424  // Don't initialize m_fastOutputStrides[NumDims-1] since it won't ever be accessed.
425  m_outputStrides[NumDims - 1] = 1;
426  for (int i = NumDims - 2; i >= 0; --i) {
427  m_outputStrides[i] = m_outputStrides[i + 1] * output_dims[i + 1];
428  m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i] > 0 ? m_outputStrides[i] : 1);
429  }
430  }
431  }
432 
433  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
434 
436  m_impl.evalSubExprsIfNeeded(NULL);
437  if (!NumTraits<std::remove_const_t<Scalar>>::RequireInitialization && data && m_impl.data()) {
438  Index contiguous_values = 1;
439  if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
440  for (int i = 0; i < NumDims; ++i) {
441  contiguous_values *= dimensions()[i];
442  if (dimensions()[i] != m_impl.dimensions()[i]) {
443  break;
444  }
445  }
446  } else {
447  for (int i = NumDims - 1; i >= 0; --i) {
448  contiguous_values *= dimensions()[i];
449  if (dimensions()[i] != m_impl.dimensions()[i]) {
450  break;
451  }
452  }
453  }
454  // Use memcpy if it's going to be faster than using the regular evaluation.
456  if (trigger(internal::array_prod(dimensions()), contiguous_values)) {
457  EvaluatorPointerType src = (EvaluatorPointerType)m_impl.data();
458  for (Index i = 0; i < internal::array_prod(dimensions()); i += contiguous_values) {
459  Index offset = srcCoeff(i);
460  m_device.memcpy((void*)(m_device.get(data + i)), m_device.get(src + offset),
461  contiguous_values * sizeof(Scalar));
462  }
463  return false;
464  }
465  }
466  return true;
467  }
468 
469 #ifdef EIGEN_USE_THREADS
470  template <typename EvalSubExprsCallback>
471  EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(EvaluatorPointerType /*data*/, EvalSubExprsCallback done) {
472  m_impl.evalSubExprsIfNeededAsync(nullptr, [done](bool) { done(true); });
473  }
474 #endif // EIGEN_USE_THREADS
475 
476  EIGEN_STRONG_INLINE void cleanup() { m_impl.cleanup(); }
477 
479  if (m_is_identity) {
480  return m_impl.coeff(index);
481  } else {
482  return m_impl.coeff(srcCoeff(index));
483  }
484  }
485 
486  template <int LoadMode>
488  const int packetSize = PacketType<CoeffReturnType, Device>::size;
489  EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
490  eigen_assert(index + packetSize - 1 < internal::array_prod(dimensions()));
491 
492  if (m_is_identity) {
493  return m_impl.template packet<LoadMode>(index);
494  }
495 
496  Index inputIndices[] = {0, 0};
497  Index indices[] = {index, index + packetSize - 1};
498  if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
500  for (int i = NumDims - 1; i > 0; --i) {
501  const Index idx0 = indices[0] / m_fastOutputStrides[i];
502  const Index idx1 = indices[1] / m_fastOutputStrides[i];
503  inputIndices[0] += (idx0 + m_offsets[i]) * m_inputStrides[i];
504  inputIndices[1] += (idx1 + m_offsets[i]) * m_inputStrides[i];
505  indices[0] -= idx0 * m_outputStrides[i];
506  indices[1] -= idx1 * m_outputStrides[i];
507  }
508  inputIndices[0] += (indices[0] + m_offsets[0]);
509  inputIndices[1] += (indices[1] + m_offsets[0]);
510  } else {
512  for (int i = 0; i < NumDims - 1; ++i) {
513  const Index idx0 = indices[0] / m_fastOutputStrides[i];
514  const Index idx1 = indices[1] / m_fastOutputStrides[i];
515  inputIndices[0] += (idx0 + m_offsets[i]) * m_inputStrides[i];
516  inputIndices[1] += (idx1 + m_offsets[i]) * m_inputStrides[i];
517  indices[0] -= idx0 * m_outputStrides[i];
518  indices[1] -= idx1 * m_outputStrides[i];
519  }
520  inputIndices[0] += (indices[0] + m_offsets[NumDims - 1]);
521  inputIndices[1] += (indices[1] + m_offsets[NumDims - 1]);
522  }
523  if (inputIndices[1] - inputIndices[0] == packetSize - 1) {
524  PacketReturnType rslt = m_impl.template packet<Unaligned>(inputIndices[0]);
525  return rslt;
526  } else {
527  EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[packetSize];
528  values[0] = m_impl.coeff(inputIndices[0]);
529  values[packetSize - 1] = m_impl.coeff(inputIndices[1]);
531  for (int i = 1; i < packetSize - 1; ++i) {
532  values[i] = coeff(index + i);
533  }
534  PacketReturnType rslt = internal::pload<PacketReturnType>(values);
535  return rslt;
536  }
537  }
538 
540  return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, m_is_identity ? 1 : NumDims);
541  }
542 
544  const size_t target_size = m_device.lastLevelCacheSize();
546  internal::TensorBlockResourceRequirements::skewed<Scalar>(target_size), m_impl.getResourceRequirements());
547  }
548 
550  bool /*root_of_expr_ast*/ = false) const {
551  TensorBlockDesc arg_desc = desc.WithOffset(srcCoeff(desc.offset()));
552  TensorBlock block = m_impl.block(arg_desc, scratch);
553  if (!arg_desc.HasDestinationBuffer()) desc.DropDestinationBuffer();
554  return block;
555  }
556 
558  typename Storage::Type result = constCast(m_impl.data());
559  if (result) {
560  Index offset = 0;
561  if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
562  for (int i = 0; i < NumDims; ++i) {
563  if (m_dimensions[i] != m_impl.dimensions()[i]) {
564  offset += m_offsets[i] * m_inputStrides[i];
565  for (int j = i + 1; j < NumDims; ++j) {
566  if (m_dimensions[j] > 1) {
567  return NULL;
568  }
569  offset += m_offsets[j] * m_inputStrides[j];
570  }
571  break;
572  }
573  }
574  } else {
575  for (int i = NumDims - 1; i >= 0; --i) {
576  if (m_dimensions[i] != m_impl.dimensions()[i]) {
577  offset += m_offsets[i] * m_inputStrides[i];
578  for (int j = i - 1; j >= 0; --j) {
579  if (m_dimensions[j] > 1) {
580  return NULL;
581  }
582  offset += m_offsets[j] * m_inputStrides[j];
583  }
584  break;
585  }
586  }
587  }
588  return result + offset;
589  }
590  return NULL;
591  }
592 
593  protected:
595  Index inputIndex = 0;
596  if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
598  for (int i = NumDims - 1; i > 0; --i) {
599  const Index idx = index / m_fastOutputStrides[i];
600  inputIndex += (idx + m_offsets[i]) * m_inputStrides[i];
601  index -= idx * m_outputStrides[i];
602  }
603  inputIndex += (index + m_offsets[0]);
604  } else {
606  for (int i = 0; i < NumDims - 1; ++i) {
607  const Index idx = index / m_fastOutputStrides[i];
608  inputIndex += (idx + m_offsets[i]) * m_inputStrides[i];
609  index -= idx * m_outputStrides[i];
610  }
611  inputIndex += (index + m_offsets[NumDims - 1]);
612  }
613  return inputIndex;
614  }
615 
623  const StartIndices m_offsets;
624 };
625 
626 // Eval as lvalue
627 template <typename StartIndices, typename Sizes, typename ArgType, typename Device>
628 struct TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
629  : public TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device> {
632  static constexpr int NumDims = internal::array_size<Sizes>::value;
633 
634  typedef typename XprType::Index Index;
635  typedef typename XprType::Scalar Scalar;
638  typedef Sizes Dimensions;
639 
641  enum {
642  IsAligned = false,
645  PreferBlockAccess = true,
646  CoordAccess = false,
647  RawAccess = (NumDims == 1) & TensorEvaluator<ArgType, Device>::RawAccess
648  };
649 
650  typedef std::remove_const_t<Scalar> ScalarNoConst;
651 
652  //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
655  //===--------------------------------------------------------------------===//
656 
657  EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) : Base(op, device) {}
658 
660  if (this->m_is_identity) {
661  return this->m_impl.coeffRef(index);
662  } else {
663  return this->m_impl.coeffRef(this->srcCoeff(index));
664  }
665  }
666 
667  template <int StoreMode>
669  if (this->m_is_identity) {
670  this->m_impl.template writePacket<StoreMode>(index, x);
671  return;
672  }
673 
674  const int packetSize = PacketType<CoeffReturnType, Device>::size;
675  Index inputIndices[] = {0, 0};
676  Index indices[] = {index, index + packetSize - 1};
677  if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
679  for (int i = NumDims - 1; i > 0; --i) {
680  const Index idx0 = indices[0] / this->m_fastOutputStrides[i];
681  const Index idx1 = indices[1] / this->m_fastOutputStrides[i];
682  inputIndices[0] += (idx0 + this->m_offsets[i]) * this->m_inputStrides[i];
683  inputIndices[1] += (idx1 + this->m_offsets[i]) * this->m_inputStrides[i];
684  indices[0] -= idx0 * this->m_outputStrides[i];
685  indices[1] -= idx1 * this->m_outputStrides[i];
686  }
687  inputIndices[0] += (indices[0] + this->m_offsets[0]);
688  inputIndices[1] += (indices[1] + this->m_offsets[0]);
689  } else {
691  for (int i = 0; i < NumDims - 1; ++i) {
692  const Index idx0 = indices[0] / this->m_fastOutputStrides[i];
693  const Index idx1 = indices[1] / this->m_fastOutputStrides[i];
694  inputIndices[0] += (idx0 + this->m_offsets[i]) * this->m_inputStrides[i];
695  inputIndices[1] += (idx1 + this->m_offsets[i]) * this->m_inputStrides[i];
696  indices[0] -= idx0 * this->m_outputStrides[i];
697  indices[1] -= idx1 * this->m_outputStrides[i];
698  }
699  inputIndices[0] += (indices[0] + this->m_offsets[NumDims - 1]);
700  inputIndices[1] += (indices[1] + this->m_offsets[NumDims - 1]);
701  }
702  if (inputIndices[1] - inputIndices[0] == packetSize - 1) {
703  this->m_impl.template writePacket<StoreMode>(inputIndices[0], x);
704  } else {
705  EIGEN_ALIGN_MAX CoeffReturnType values[packetSize];
706  internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
707  this->m_impl.coeffRef(inputIndices[0]) = values[0];
708  this->m_impl.coeffRef(inputIndices[1]) = values[packetSize - 1];
710  for (int i = 1; i < packetSize - 1; ++i) {
711  this->coeffRef(index + i) = values[i];
712  }
713  }
714  }
715 
716  template <typename TensorBlock>
718  TensorBlockDesc arg_desc = desc.WithOffset(this->srcCoeff(desc.offset()));
719  this->m_impl.writeBlock(arg_desc, block);
720  }
721 };
722 
723 namespace internal {
724 template <typename StartIndices, typename StopIndices, typename Strides, typename XprType>
725 struct traits<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>> : public traits<XprType> {
726  typedef typename XprType::Scalar Scalar;
728  typedef typename XprTraits::StorageKind StorageKind;
729  typedef typename XprTraits::Index Index;
730  typedef typename XprType::Nested Nested;
731  typedef std::remove_reference_t<Nested> Nested_;
732  static constexpr int NumDimensions = array_size<StartIndices>::value;
733  static constexpr int Layout = XprTraits::Layout;
734  typedef typename XprTraits::PointerType PointerType;
735 };
736 
737 template <typename StartIndices, typename StopIndices, typename Strides, typename XprType>
738 struct eval<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>, Eigen::Dense> {
740 };
741 
742 template <typename StartIndices, typename StopIndices, typename Strides, typename XprType>
743 struct nested<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>, 1,
744  typename eval<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>>::type> {
746 };
747 
748 } // end namespace internal
749 
750 template <typename StartIndices, typename StopIndices, typename Strides, typename XprType>
752  : public TensorBase<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>> {
753  public:
756  typedef typename XprType::CoeffReturnType CoeffReturnType;
760 
762  const StopIndices& stopIndices, const Strides& strides)
764 
765  EIGEN_DEVICE_FUNC const StartIndices& startIndices() const { return m_startIndices; }
766  EIGEN_DEVICE_FUNC const StartIndices& stopIndices() const { return m_stopIndices; }
767  EIGEN_DEVICE_FUNC const StartIndices& strides() const { return m_strides; }
768 
770 
772 
773  protected:
774  typename XprType::Nested m_xpr;
775  const StartIndices m_startIndices;
776  const StopIndices m_stopIndices;
777  const Strides m_strides;
778 };
779 
780 // Eval as rvalue
781 template <typename StartIndices, typename StopIndices, typename Strides, typename ArgType, typename Device>
782 struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device> {
784  static constexpr int NumDims = internal::array_size<Strides>::value;
785  typedef typename XprType::Index Index;
786  typedef typename XprType::Scalar Scalar;
791  typedef Strides Dimensions;
792 
794  enum {
795  // Alignment can't be guaranteed at compile time since it depends on the
796  // slice offsets and sizes.
797  IsAligned = false,
798  PacketAccess = false,
799  BlockAccess = false,
801  RawAccess = false
802  };
803 
804  //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
806  //===--------------------------------------------------------------------===//
807 
808  EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
809  : m_impl(op.expression(), device), m_device(device), m_strides(op.strides()) {
810  // Handle degenerate intervals by gracefully clamping and allowing m_dimensions to be zero
811  DSizes<Index, NumDims> startIndicesClamped, stopIndicesClamped;
812  for (ptrdiff_t i = 0; i < internal::array_size<Dimensions>::value; ++i) {
813  eigen_assert(m_strides[i] != 0 && "0 stride is invalid");
814  if (m_strides[i] > 0) {
815  startIndicesClamped[i] = clamp(op.startIndices()[i], 0, m_impl.dimensions()[i]);
816  stopIndicesClamped[i] = clamp(op.stopIndices()[i], 0, m_impl.dimensions()[i]);
817  } else {
818  /* implies m_strides[i] < 0 by assert */
819  startIndicesClamped[i] = clamp(op.startIndices()[i], -1, m_impl.dimensions()[i] - 1);
820  stopIndicesClamped[i] = clamp(op.stopIndices()[i], -1, m_impl.dimensions()[i] - 1);
821  }
822  m_startIndices[i] = startIndicesClamped[i];
823  }
824 
825  typedef typename TensorEvaluator<ArgType, Device>::Dimensions InputDimensions;
826  const InputDimensions& input_dims = m_impl.dimensions();
827 
828  // compute output tensor shape
829  m_is_identity = true;
830  for (int i = 0; i < NumDims; i++) {
831  Index interval = stopIndicesClamped[i] - startIndicesClamped[i];
832  if (interval == 0 || ((interval < 0) != (m_strides[i] < 0))) {
833  m_dimensions[i] = 0;
834  } else {
835  m_dimensions[i] = (interval / m_strides[i]) + (interval % m_strides[i] != 0 ? 1 : 0);
836  eigen_assert(m_dimensions[i] >= 0);
837  }
838  if (m_strides[i] != 1 || interval != m_impl.dimensions()[i]) {
839  m_is_identity = false;
840  }
841  }
842 
843  Strides output_dims = m_dimensions;
844 
845  if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
846  m_inputStrides[0] = m_strides[0];
847  m_offsets[0] = startIndicesClamped[0];
848  Index previousDimProduct = 1;
849  for (int i = 1; i < NumDims; ++i) {
850  previousDimProduct *= input_dims[i - 1];
851  m_inputStrides[i] = previousDimProduct * m_strides[i];
852  m_offsets[i] = startIndicesClamped[i] * previousDimProduct;
853  }
854 
855  // Don't initialize m_fastOutputStrides[0] since it won't ever be accessed.
856  m_outputStrides[0] = 1;
857  for (int i = 1; i < NumDims; ++i) {
858  m_outputStrides[i] = m_outputStrides[i - 1] * output_dims[i - 1];
859  m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i] > 0 ? m_outputStrides[i] : 1);
860  }
861  } else {
862  m_inputStrides[NumDims - 1] = m_strides[NumDims - 1];
863  m_offsets[NumDims - 1] = startIndicesClamped[NumDims - 1];
864  Index previousDimProduct = 1;
865  for (int i = NumDims - 2; i >= 0; --i) {
866  previousDimProduct *= input_dims[i + 1];
867  m_inputStrides[i] = previousDimProduct * m_strides[i];
868  m_offsets[i] = startIndicesClamped[i] * previousDimProduct;
869  }
870 
871  m_outputStrides[NumDims - 1] = 1;
872  for (int i = NumDims - 2; i >= 0; --i) {
873  m_outputStrides[i] = m_outputStrides[i + 1] * output_dims[i + 1];
874  m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i] > 0 ? m_outputStrides[i] : 1);
875  }
876  }
877  }
878 
879  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
880 
882  m_impl.evalSubExprsIfNeeded(NULL);
883  return true;
884  }
885 
886  EIGEN_STRONG_INLINE void cleanup() { m_impl.cleanup(); }
887 
889  if (m_is_identity) {
890  return m_impl.coeff(index);
891  } else {
892  return m_impl.coeff(srcCoeff(index));
893  }
894  }
895 
897  return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, m_is_identity ? 1 : NumDims);
898  }
899 
900  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Storage::Type data() const { return NULL; }
901 
902  protected:
904  Index inputIndex = 0;
905  if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
907  for (int i = NumDims - 1; i >= 0; --i) {
908  const Index idx = index / m_fastOutputStrides[i];
909  inputIndex += idx * m_inputStrides[i] + m_offsets[i];
910  index -= idx * m_outputStrides[i];
911  }
912  } else {
914  for (int i = 0; i < NumDims; ++i) {
915  const Index idx = index / m_fastOutputStrides[i];
916  inputIndex += idx * m_inputStrides[i] + m_offsets[i];
917  index -= idx * m_outputStrides[i];
918  }
919  }
920  return inputIndex;
921  }
922 
924 #ifndef SYCL_DEVICE_ONLY
926 #else
927  return cl::sycl::clamp(value, min, max);
928 #endif
929  }
930 
937  DSizes<Index, NumDims> m_startIndices; // clamped startIndices
939  DSizes<Index, NumDims> m_offsets; // offset in a flattened shape
940  const Strides m_strides;
941 };
942 
943 // Eval as lvalue
944 template <typename StartIndices, typename StopIndices, typename Strides, typename ArgType, typename Device>
945 struct TensorEvaluator<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device>
946  : public TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device> {
949  static constexpr int NumDims = internal::array_size<Strides>::value;
951 
952  enum {
953  IsAligned = false,
954  PacketAccess = false,
955  BlockAccess = false,
958  RawAccess = false
959  };
960 
961  //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
963  //===--------------------------------------------------------------------===//
964 
965  EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) : Base(op, device) {}
966 
967  typedef typename XprType::Index Index;
968  typedef typename XprType::Scalar Scalar;
971  typedef Strides Dimensions;
972 
974  if (this->m_is_identity) {
975  return this->m_impl.coeffRef(index);
976  } else {
977  return this->m_impl.coeffRef(this->srcCoeff(index));
978  }
979  }
980 };
981 
982 } // end namespace Eigen
983 
984 #endif // EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H
int i
Definition: BiCGSTAB_step_by_step.cpp:9
#define EIGEN_ALIGN_MAX
Definition: ConfigureVectorization.h:146
#define EIGEN_UNROLL_LOOP
Definition: Macros.h:1298
#define EIGEN_DEVICE_FUNC
Definition: Macros.h:892
#define eigen_assert(x)
Definition: Macros.h:910
#define EIGEN_STRONG_INLINE
Definition: Macros.h:834
#define EIGEN_STATIC_ASSERT(X, MSG)
Definition: StaticAssert.h:26
#define EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(Derived)
Macro to manually inherit assignment operators. This is necessary, because the implicitly defined ass...
Definition: TensorMacros.h:81
#define EIGEN_DEVICE_REF
Definition: TensorMacros.h:34
SCALAR Scalar
Definition: bench_gemm.cpp:45
Generic expression where a coefficient-wise binary operator is applied to two expressions.
Definition: CwiseBinaryOp.h:79
The tensor base class.
Definition: TensorBase.h:1026
Definition: TensorCostModel.h:28
Definition: TensorMorphing.h:53
Eigen::internal::nested< TensorReshapingOp >::type Nested
Definition: TensorMorphing.h:58
Eigen::internal::traits< TensorReshapingOp >::Scalar Scalar
Definition: TensorMorphing.h:56
TensorBase< TensorReshapingOp< NewDimensions, XprType >, WriteAccessors > Base
Definition: TensorMorphing.h:55
const NewDimensions m_dims
Definition: TensorMorphing.h:73
std::remove_const_t< typename XprType::CoeffReturnType > CoeffReturnType
Definition: TensorMorphing.h:57
EIGEN_DEVICE_FUNC const internal::remove_all_t< typename XprType::Nested > & expression() const
Definition: TensorMorphing.h:67
Eigen::internal::traits< TensorReshapingOp >::StorageKind StorageKind
Definition: TensorMorphing.h:59
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorReshapingOp(const XprType &expr, const NewDimensions &dims)
Definition: TensorMorphing.h:62
EIGEN_DEVICE_FUNC const NewDimensions & dimensions() const
Definition: TensorMorphing.h:65
XprType::Nested m_xpr
Definition: TensorMorphing.h:72
Eigen::internal::traits< TensorReshapingOp >::Index Index
Definition: TensorMorphing.h:60
Definition: TensorMorphing.h:289
XprType::Nested m_xpr
Definition: TensorMorphing.h:310
XprType::CoeffReturnType CoeffReturnType
Definition: TensorMorphing.h:293
TensorBase< TensorSlicingOp< StartIndices, Sizes, XprType > > Base
Definition: TensorMorphing.h:291
const Sizes m_sizes
Definition: TensorMorphing.h:312
Eigen::internal::traits< TensorSlicingOp >::Scalar Scalar
Definition: TensorMorphing.h:292
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorSlicingOp(const XprType &expr, const StartIndices &indices, const Sizes &sizes)
Definition: TensorMorphing.h:298
Eigen::internal::traits< TensorSlicingOp >::Index Index
Definition: TensorMorphing.h:296
EIGEN_DEVICE_FUNC const StartIndices & startIndices() const
Definition: TensorMorphing.h:302
Eigen::internal::traits< TensorSlicingOp >::StorageKind StorageKind
Definition: TensorMorphing.h:295
const StartIndices m_indices
Definition: TensorMorphing.h:311
EIGEN_DEVICE_FUNC const internal::remove_all_t< typename XprType::Nested > & expression() const
Definition: TensorMorphing.h:305
EIGEN_DEVICE_FUNC const Sizes & sizes() const
Definition: TensorMorphing.h:303
Eigen::internal::nested< TensorSlicingOp >::type Nested
Definition: TensorMorphing.h:294
Definition: TensorMorphing.h:752
internal::traits< TensorStridingSlicingOp >::Scalar Scalar
Definition: TensorMorphing.h:755
internal::traits< TensorStridingSlicingOp >::Index Index
Definition: TensorMorphing.h:759
EIGEN_DEVICE_FUNC const StartIndices & strides() const
Definition: TensorMorphing.h:767
XprType::Nested m_xpr
Definition: TensorMorphing.h:774
internal::traits< TensorStridingSlicingOp >::StorageKind StorageKind
Definition: TensorMorphing.h:758
const StartIndices m_startIndices
Definition: TensorMorphing.h:775
internal::nested< TensorStridingSlicingOp >::type Nested
Definition: TensorMorphing.h:757
XprType::CoeffReturnType CoeffReturnType
Definition: TensorMorphing.h:756
EIGEN_DEVICE_FUNC const StartIndices & stopIndices() const
Definition: TensorMorphing.h:766
EIGEN_DEVICE_FUNC const StartIndices & startIndices() const
Definition: TensorMorphing.h:765
TensorBase< TensorStridingSlicingOp< StartIndices, StopIndices, Strides, XprType > > Base
Definition: TensorMorphing.h:754
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorStridingSlicingOp(const XprType &expr, const StartIndices &startIndices, const StopIndices &stopIndices, const Strides &strides)
Definition: TensorMorphing.h:761
EIGEN_DEVICE_FUNC const internal::remove_all_t< typename XprType::Nested > & expression() const
Definition: TensorMorphing.h:769
const StopIndices m_stopIndices
Definition: TensorMorphing.h:776
const Strides m_strides
Definition: TensorMorphing.h:777
Definition: TensorBlock.h:1314
Definition: TensorBlock.h:171
TensorBlockDescriptor WithOffset(IndexType offset) const
Definition: TensorBlock.h:298
IndexType offset() const
Definition: TensorBlock.h:270
bool HasDestinationBuffer() const
Definition: TensorBlock.h:295
TensorBlockDescriptor & DropDestinationBuffer()
Definition: TensorBlock.h:289
const Dimensions & dimensions() const
Definition: TensorBlock.h:271
Definition: TensorBlock.h:566
Definition: TensorBlock.h:604
static EIGEN_STRONG_INLINE TensorMaterializedBlock materialize(const Scalar *data, const DataDimensions &data_dims, TensorBlockDesc &desc, TensorBlockScratch &scratch)
Definition: TensorBlock.h:699
const XprType & expr() const
Definition: TensorBlock.h:621
Eigen::IndexList< Index, Eigen::type2index< 1 > > NByOne(Index n)
Definition: cxx11_tensor_block_eval.cpp:104
std::vector< Array2i > sizes
Definition: dense_solvers.cpp:12
@ WriteAccessors
Definition: Constants.h:374
@ ColMajor
Definition: Constants.h:318
char char * op
Definition: level2_impl.h:374
@ kView
Definition: TensorBlock.h:545
constexpr EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE auto array_prod(const array< T, N > &arr) -> decltype(array_reduce< product_op, T, N >(arr, static_cast< T >(1)))
Definition: MoreMeta.h:497
typename remove_all< T >::type remove_all_t
Definition: Meta.h:142
EIGEN_ALWAYS_INLINE DSizes< IndexType, NumDims > strides(const DSizes< IndexType, NumDims > &dimensions)
Definition: TensorBlock.h:29
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T maxi(const T &x, const T &y)
Definition: MathFunctions.h:926
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T mini(const T &x, const T &y)
Definition: MathFunctions.h:920
Namespace containing all symbols from the Eigen library.
Definition: bench_norm.cpp:70
std::array< T, N > array
Definition: EmulateArray.h:231
squared absolute value
Definition: GlobalFunctions.h:87
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:83
CleanedUpDerType< DerType >::type() min(const AutoDiffScalar< DerType > &x, const T &y)
Definition: AutoDiffScalar.h:494
CleanedUpDerType< DerType >::type() max(const AutoDiffScalar< DerType > &x, const T &y)
Definition: AutoDiffScalar.h:499
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T * constCast(const T *data)
Definition: TensorForwardDeclarations.h:31
Definition: Eigen_Colamd.h:49
list x
Definition: plotDoE.py:28
Definition: Constants.h:519
Holds information about the various numeric (i.e. scalar) types allowed by Eigen.
Definition: NumTraits.h:217
Definition: TensorMeta.h:47
Definition: TensorDimensions.h:85
Definition: TensorForwardDeclarations.h:42
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index index, const PacketReturnType &x) const
Definition: TensorMorphing.h:236
TensorEvaluator< const TensorReshapingOp< NewDimensions, ArgType >, Device > Base
Definition: TensorMorphing.h:206
TensorReshapingOp< NewDimensions, ArgType > XprType
Definition: TensorMorphing.h:207
XprType::CoeffReturnType CoeffReturnType
Definition: TensorMorphing.h:224
PacketType< CoeffReturnType, Device >::type PacketReturnType
Definition: TensorMorphing.h:225
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType & coeffRef(Index index) const
Definition: TensorMorphing.h:231
internal::TensorBlockDescriptor< TensorEvaluator::NumOutputDims, Index > TensorBlockDesc
Definition: TensorMorphing.h:228
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlock(const TensorBlockDesc &desc, const TensorBlock &block)
Definition: TensorMorphing.h:241
EIGEN_STRONG_INLINE TensorEvaluator(const XprType &op, const Device &device)
Definition: TensorMorphing.h:220
XprType::CoeffReturnType CoeffReturnType
Definition: TensorMorphing.h:636
EIGEN_STRONG_INLINE TensorEvaluator(const XprType &op, const Device &device)
Definition: TensorMorphing.h:657
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlock(const TensorBlockDesc &desc, const TensorBlock &block)
Definition: TensorMorphing.h:717
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType & coeffRef(Index index) const
Definition: TensorMorphing.h:659
TensorSlicingOp< StartIndices, Sizes, ArgType > XprType
Definition: TensorMorphing.h:631
TensorEvaluator< const TensorSlicingOp< StartIndices, Sizes, ArgType >, Device > Base
Definition: TensorMorphing.h:630
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index index, const PacketReturnType &x) const
Definition: TensorMorphing.h:668
internal::TensorBlockScratchAllocator< Device > TensorBlockScratch
Definition: TensorMorphing.h:654
PacketType< CoeffReturnType, Device >::type PacketReturnType
Definition: TensorMorphing.h:637
std::remove_const_t< Scalar > ScalarNoConst
Definition: TensorMorphing.h:650
internal::TensorBlockDescriptor< NumDims, Index > TensorBlockDesc
Definition: TensorMorphing.h:653
internal::TensorBlockNotImplemented TensorBlock
Definition: TensorMorphing.h:962
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType & coeffRef(Index index) const
Definition: TensorMorphing.h:973
TensorStridingSlicingOp< StartIndices, StopIndices, Strides, ArgType > XprType
Definition: TensorMorphing.h:948
TensorEvaluator< const TensorStridingSlicingOp< StartIndices, StopIndices, Strides, ArgType >, Device > Base
Definition: TensorMorphing.h:947
PacketType< CoeffReturnType, Device >::type PacketReturnType
Definition: TensorMorphing.h:970
EIGEN_STRONG_INLINE TensorEvaluator(const XprType &op, const Device &device)
Definition: TensorMorphing.h:965
TensorReshapingOp< NewDimensions, ArgType > XprType
Definition: TensorMorphing.h:79
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions & dimensions() const
Definition: TensorMorphing.h:138
EIGEN_DEVICE_FUNC Storage::Type data() const
Definition: TensorMorphing.h:191
StorageMemory< std::remove_const_t< CoeffReturnType >, Device > ConstCastStorage
Definition: TensorMorphing.h:88
EIGEN_STRONG_INLINE TensorEvaluator(const XprType &op, const Device &device)
Definition: TensorMorphing.h:131
PacketType< CoeffReturnType, Device >::type PacketReturnType
Definition: TensorMorphing.h:85
EIGEN_DEVICE_FUNC const TensorEvaluator< ArgType, Device > & impl() const
Definition: TensorMorphing.h:193
TensorEvaluator< ArgType, Device > m_impl
Definition: TensorMorphing.h:196
StorageMemory< CoeffReturnType, Device > Storage
Definition: TensorMorphing.h:86
EIGEN_STRONG_INLINE void cleanup()
Definition: TensorMorphing.h:148
internal::TensorBlockDescriptor< NumOutputDims, Index > TensorBlockDesc
Definition: TensorMorphing.h:125
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
Definition: TensorMorphing.h:153
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock block(TensorBlockDesc &desc, TensorBlockScratch &scratch, bool=false) const
Definition: TensorMorphing.h:174
internal::TensorMaterializedBlock< ScalarNoConst, NumOutputDims, Layout, Index > TensorBlock
Definition: TensorMorphing.h:128
EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data)
Definition: TensorMorphing.h:147
std::remove_const_t< Scalar > ScalarNoConst
Definition: TensorMorphing.h:122
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
Definition: TensorMorphing.h:150
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE internal::TensorBlockResourceRequirements getResourceRequirements() const
Definition: TensorMorphing.h:161
XprType::CoeffReturnType CoeffReturnType
Definition: TensorMorphing.h:84
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const
Definition: TensorMorphing.h:157
internal::TensorBlockScratchAllocator< Device > TensorBlockScratch
Definition: TensorMorphing.h:126
TensorEvaluator< ArgType, Device > m_impl
Definition: TensorMorphing.h:619
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock block(TensorBlockDesc &desc, TensorBlockScratch &scratch, bool=false) const
Definition: TensorMorphing.h:549
internal::TensorBlockDescriptor< NumDims, Index > TensorBlockDesc
Definition: TensorMorphing.h:384
array< Index, NumDims > m_outputStrides
Definition: TensorMorphing.h:616
internal::TensorBlockScratchAllocator< Device > TensorBlockScratch
Definition: TensorMorphing.h:385
const Device EIGEN_DEVICE_REF m_device
Definition: TensorMorphing.h:620
StorageMemory< CoeffReturnType, Device > Storage
Definition: TensorMorphing.h:363
StorageMemory< std::remove_const_t< CoeffReturnType >, Device > ConstCastStorage
Definition: TensorMorphing.h:364
EIGEN_STRONG_INLINE TensorEvaluator(const XprType &op, const Device &device)
Definition: TensorMorphing.h:391
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const
Definition: TensorMorphing.h:594
EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data)
Definition: TensorMorphing.h:435
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
Definition: TensorMorphing.h:487
std::remove_const_t< Scalar > ScalarNoConst
Definition: TensorMorphing.h:381
EIGEN_STRONG_INLINE void cleanup()
Definition: TensorMorphing.h:476
TensorEvaluator< const ArgType, Device >::TensorBlock TensorBlock
Definition: TensorMorphing.h:388
XprType::CoeffReturnType CoeffReturnType
Definition: TensorMorphing.h:360
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE internal::TensorBlockResourceRequirements getResourceRequirements() const
Definition: TensorMorphing.h:543
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions & dimensions() const
Definition: TensorMorphing.h:433
array< Index, NumDims > m_inputStrides
Definition: TensorMorphing.h:618
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Storage::Type data() const
Definition: TensorMorphing.h:557
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
Definition: TensorMorphing.h:478
PacketType< CoeffReturnType, Device >::type PacketReturnType
Definition: TensorMorphing.h:361
TensorSlicingOp< StartIndices, Sizes, ArgType > XprType
Definition: TensorMorphing.h:355
array< internal::TensorIntDivisor< Index >, NumDims > m_fastOutputStrides
Definition: TensorMorphing.h:617
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const
Definition: TensorMorphing.h:539
PacketType< CoeffReturnType, Device >::type PacketReturnType
Definition: TensorMorphing.h:788
array< internal::TensorIntDivisor< Index >, NumDims > m_fastOutputStrides
Definition: TensorMorphing.h:932
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
Definition: TensorMorphing.h:888
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index clamp(Index value, Index min, Index max)
Definition: TensorMorphing.h:923
StorageMemory< CoeffReturnType, Device > Storage
Definition: TensorMorphing.h:789
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const
Definition: TensorMorphing.h:903
TensorStridingSlicingOp< StartIndices, StopIndices, Strides, ArgType > XprType
Definition: TensorMorphing.h:783
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions & dimensions() const
Definition: TensorMorphing.h:879
EIGEN_STRONG_INLINE TensorEvaluator(const XprType &op, const Device &device)
Definition: TensorMorphing.h:808
EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType)
Definition: TensorMorphing.h:881
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const
Definition: TensorMorphing.h:896
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Storage::Type data() const
Definition: TensorMorphing.h:900
A cost model used to limit the number of threads used for evaluating tensor expression.
Definition: TensorEvaluator.h:31
static constexpr int Layout
Definition: TensorEvaluator.h:46
const Device EIGEN_DEVICE_REF m_device
Definition: TensorEvaluator.h:170
Storage::Type EvaluatorPointerType
Definition: TensorEvaluator.h:41
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType & coeffRef(Index index) const
Definition: TensorEvaluator.h:94
@ PacketAccess
Definition: TensorEvaluator.h:50
@ IsAligned
Definition: TensorEvaluator.h:49
EIGEN_DEVICE_FUNC EvaluatorPointerType data() const
Definition: TensorEvaluator.h:165
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
Definition: TensorEvaluator.h:89
internal::TensorMaterializedBlock< ScalarNoConst, NumCoords, Layout, Index > TensorBlock
Definition: TensorEvaluator.h:63
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions & dimensions() const
Definition: TensorEvaluator.h:69
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock block(TensorBlockDesc &desc, TensorBlockScratch &scratch, bool=false) const
Definition: TensorEvaluator.h:147
Definition: TensorMorphing.h:319
EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(const Device &device)
Definition: TensorMorphing.h:320
EIGEN_DEVICE_FUNC bool operator()(Index total, Index contiguous) const
Definition: TensorMorphing.h:321
Index threshold_
Definition: TensorMorphing.h:327
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockResourceRequirements any()
Definition: TensorBlock.h:143
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockResourceRequirements merge(const TensorBlockResourceRequirements &lhs, const TensorBlockResourceRequirements &rhs)
Definition: TensorBlock.h:129
Definition: Meta.h:305
const TensorReshapingOp< NewDimensions, XprType > EIGEN_DEVICE_REF type
Definition: TensorMorphing.h:41
const TensorSlicingOp< StartIndices, Sizes, XprType > EIGEN_DEVICE_REF type
Definition: TensorMorphing.h:277
const TensorStridingSlicingOp< StartIndices, StopIndices, Strides, XprType > EIGEN_DEVICE_REF type
Definition: TensorMorphing.h:739
Definition: XprHelper.h:427
Definition: Meta.h:205
Definition: TensorTraits.h:152
ref_selector< T >::type type
Definition: TensorTraits.h:153
XprType::Scalar Scalar
Definition: TensorMorphing.h:28
std::remove_reference_t< Nested > Nested_
Definition: TensorMorphing.h:33
XprTraits::StorageKind StorageKind
Definition: TensorMorphing.h:30
XprType::Nested Nested
Definition: TensorMorphing.h:32
XprTraits::PointerType PointerType
Definition: TensorMorphing.h:36
traits< XprType > XprTraits
Definition: TensorMorphing.h:29
XprTraits::Index Index
Definition: TensorMorphing.h:31
std::remove_reference_t< Nested > Nested_
Definition: TensorMorphing.h:269
XprTraits::StorageKind StorageKind
Definition: TensorMorphing.h:266
XprTraits::PointerType PointerType
Definition: TensorMorphing.h:272
XprTraits::Index Index
Definition: TensorMorphing.h:267
traits< XprType > XprTraits
Definition: TensorMorphing.h:265
std::remove_reference_t< Nested > Nested_
Definition: TensorMorphing.h:731
Definition: ForwardDeclarations.h:21
std::ptrdiff_t j
Definition: tut_arithmetic_redux_minmax.cpp:2