1 // This file is part of Eigen, a lightweight C++ template library 2 // for linear algebra. 3 // 4 // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> 5 // 6 // This Source Code Form is subject to the terms of the Mozilla 7 // Public License v. 2.0. If a copy of the MPL was not distributed 8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 9 10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_CHIPPING_H 11 #define EIGEN_CXX11_TENSOR_TENSOR_CHIPPING_H 12 13 namespace Eigen { 14 15 /** \class TensorKChippingReshaping 16 * \ingroup CXX11_Tensor_Module 17 * 18 * \brief A chip is a thin slice, corresponding to a column or a row in a 2-d tensor. 19 * 20 * 21 */ 22 23 namespace internal { 24 template<DenseIndex DimId, typename XprType> 25 struct traits<TensorChippingOp<DimId, XprType> > : public traits<XprType> 26 { 27 typedef typename XprType::Scalar Scalar; 28 typedef traits<XprType> XprTraits; 29 typedef typename XprTraits::StorageKind StorageKind; 30 typedef typename XprTraits::Index Index; 31 typedef typename XprType::Nested Nested; 32 typedef typename remove_reference<Nested>::type _Nested; 33 static const int NumDimensions = XprTraits::NumDimensions - 1; 34 static const int Layout = XprTraits::Layout; 35 typedef typename XprTraits::PointerType PointerType; 36 }; 37 38 template<DenseIndex DimId, typename XprType> 39 struct eval<TensorChippingOp<DimId, XprType>, Eigen::Dense> 40 { 41 typedef const TensorChippingOp<DimId, XprType> EIGEN_DEVICE_REF type; 42 }; 43 44 template<DenseIndex DimId, typename XprType> 45 struct nested<TensorChippingOp<DimId, XprType>, 1, typename eval<TensorChippingOp<DimId, XprType> >::type> 46 { 47 typedef TensorChippingOp<DimId, XprType> type; 48 }; 49 50 template <DenseIndex DimId> 51 struct DimensionId 52 { 53 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DimensionId(DenseIndex dim) { 54 EIGEN_UNUSED_VARIABLE(dim); 55 eigen_assert(dim == DimId); 56 } 57 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex actualDim() const { 58 return DimId; 59 } 60 }; 61 template <> 62 struct DimensionId<Dynamic> 63 { 64 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DimensionId(DenseIndex dim) : actual_dim(dim) { 65 eigen_assert(dim >= 0); 66 } 67 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex actualDim() const { 68 return actual_dim; 69 } 70 private: 71 const DenseIndex actual_dim; 72 }; 73 74 75 } // end namespace internal 76 77 78 79 template<DenseIndex DimId, typename XprType> 80 class TensorChippingOp : public TensorBase<TensorChippingOp<DimId, XprType> > 81 { 82 public: 83 typedef TensorBase<TensorChippingOp<DimId, XprType> > Base; 84 typedef typename Eigen::internal::traits<TensorChippingOp>::Scalar Scalar; 85 typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; 86 typedef typename XprType::CoeffReturnType CoeffReturnType; 87 typedef typename Eigen::internal::nested<TensorChippingOp>::type Nested; 88 typedef typename Eigen::internal::traits<TensorChippingOp>::StorageKind StorageKind; 89 typedef typename Eigen::internal::traits<TensorChippingOp>::Index Index; 90 91 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorChippingOp(const XprType& expr, const Index offset, const Index dim) 92 : m_xpr(expr), m_offset(offset), m_dim(dim) { 93 } 94 95 EIGEN_DEVICE_FUNC 96 const Index offset() const { return m_offset; } 97 EIGEN_DEVICE_FUNC 98 const Index dim() const { return m_dim.actualDim(); } 99 100 EIGEN_DEVICE_FUNC 101 const typename internal::remove_all<typename XprType::Nested>::type& 102 expression() const { return m_xpr; } 103 104 EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorChippingOp) 105 106 protected: 107 typename XprType::Nested m_xpr; 108 const Index m_offset; 109 const internal::DimensionId<DimId> m_dim; 110 }; 111 112 113 // Eval as rvalue 114 template<DenseIndex DimId, typename ArgType, typename Device> 115 struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device> 116 { 117 typedef TensorChippingOp<DimId, ArgType> XprType; 118 static const int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value; 119 static const int NumDims = NumInputDims-1; 120 typedef typename XprType::Index Index; 121 typedef DSizes<Index, NumDims> Dimensions; 122 typedef typename XprType::Scalar Scalar; 123 typedef typename XprType::CoeffReturnType CoeffReturnType; 124 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; 125 static const int PacketSize = PacketType<CoeffReturnType, Device>::size; 126 typedef StorageMemory<CoeffReturnType, Device> Storage; 127 typedef typename Storage::Type EvaluatorPointerType; 128 129 enum { 130 // Alignment can't be guaranteed at compile time since it depends on the 131 // slice offsets. 132 IsAligned = false, 133 Layout = TensorEvaluator<ArgType, Device>::Layout, 134 PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, 135 BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess, 136 // Chipping of outer-most dimension is a trivial operation, because we can 137 // read and write directly from the underlying tensor using single offset. 138 IsOuterChipping = (static_cast<int>(Layout) == ColMajor && DimId == NumInputDims - 1) || 139 (static_cast<int>(Layout) == RowMajor && DimId == 0), 140 // Chipping inner-most dimension. 141 IsInnerChipping = (static_cast<int>(Layout) == ColMajor && DimId == 0) || 142 (static_cast<int>(Layout) == RowMajor && DimId == NumInputDims - 1), 143 // Prefer block access if the underlying expression prefers it, otherwise 144 // only if chipping is not trivial. 145 PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess || 146 !IsOuterChipping, 147 CoordAccess = false, // to be implemented 148 RawAccess = false 149 }; 150 151 typedef typename internal::remove_const<Scalar>::type ScalarNoConst; 152 153 //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// 154 typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc; 155 typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch; 156 157 typedef internal::TensorBlockDescriptor<NumInputDims, Index> 158 ArgTensorBlockDesc; 159 typedef typename TensorEvaluator<const ArgType, Device>::TensorBlock 160 ArgTensorBlock; 161 162 typedef typename internal::TensorMaterializedBlock<ScalarNoConst, NumDims, 163 Layout, Index> 164 TensorBlock; 165 //===--------------------------------------------------------------------===// 166 167 EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) 168 : m_impl(op.expression(), device), m_dim(op.dim()), m_device(device) 169 { 170 EIGEN_STATIC_ASSERT((NumInputDims >= 1), YOU_MADE_A_PROGRAMMING_MISTAKE); 171 eigen_assert(NumInputDims > m_dim.actualDim()); 172 173 const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions(); 174 eigen_assert(op.offset() < input_dims[m_dim.actualDim()]); 175 176 int j = 0; 177 for (int i = 0; i < NumInputDims; ++i) { 178 if (i != m_dim.actualDim()) { 179 m_dimensions[j] = input_dims[i]; 180 ++j; 181 } 182 } 183 184 m_stride = 1; 185 m_inputStride = 1; 186 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { 187 for (int i = 0; i < m_dim.actualDim(); ++i) { 188 m_stride *= input_dims[i]; 189 m_inputStride *= input_dims[i]; 190 } 191 } else { 192 for (int i = NumInputDims-1; i > m_dim.actualDim(); --i) { 193 m_stride *= input_dims[i]; 194 m_inputStride *= input_dims[i]; 195 } 196 } 197 m_inputStride *= input_dims[m_dim.actualDim()]; 198 m_inputOffset = m_stride * op.offset(); 199 } 200 201 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } 202 203 EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) { 204 m_impl.evalSubExprsIfNeeded(NULL); 205 return true; 206 } 207 208 EIGEN_STRONG_INLINE void cleanup() { 209 m_impl.cleanup(); 210 } 211 212 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const 213 { 214 return m_impl.coeff(srcCoeff(index)); 215 } 216 217 template<int LoadMode> 218 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const 219 { 220 EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) 221 eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); 222 223 if (isInnerChipping()) { 224 // m_stride is equal to 1, so let's avoid the integer division. 225 eigen_assert(m_stride == 1); 226 Index inputIndex = index * m_inputStride + m_inputOffset; 227 EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize]; 228 EIGEN_UNROLL_LOOP 229 for (int i = 0; i < PacketSize; ++i) { 230 values[i] = m_impl.coeff(inputIndex); 231 inputIndex += m_inputStride; 232 } 233 PacketReturnType rslt = internal::pload<PacketReturnType>(values); 234 return rslt; 235 } else if (isOuterChipping()) { 236 // m_stride is always greater than index, so let's avoid the integer division. 237 eigen_assert(m_stride > index); 238 return m_impl.template packet<LoadMode>(index + m_inputOffset); 239 } else { 240 const Index idx = index / m_stride; 241 const Index rem = index - idx * m_stride; 242 if (rem + PacketSize <= m_stride) { 243 Index inputIndex = idx * m_inputStride + m_inputOffset + rem; 244 return m_impl.template packet<LoadMode>(inputIndex); 245 } else { 246 // Cross the stride boundary. Fallback to slow path. 247 EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize]; 248 EIGEN_UNROLL_LOOP 249 for (int i = 0; i < PacketSize; ++i) { 250 values[i] = coeff(index); 251 ++index; 252 } 253 PacketReturnType rslt = internal::pload<PacketReturnType>(values); 254 return rslt; 255 } 256 } 257 } 258 259 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost 260 costPerCoeff(bool vectorized) const { 261 double cost = 0; 262 if ((static_cast<int>(Layout) == static_cast<int>(ColMajor) && 263 m_dim.actualDim() == 0) || 264 (static_cast<int>(Layout) == static_cast<int>(RowMajor) && 265 m_dim.actualDim() == NumInputDims - 1)) { 266 cost += TensorOpCost::MulCost<Index>() + TensorOpCost::AddCost<Index>(); 267 } else if ((static_cast<int>(Layout) == static_cast<int>(ColMajor) && 268 m_dim.actualDim() == NumInputDims - 1) || 269 (static_cast<int>(Layout) == static_cast<int>(RowMajor) && 270 m_dim.actualDim() == 0)) { 271 cost += TensorOpCost::AddCost<Index>(); 272 } else { 273 cost += 3 * TensorOpCost::MulCost<Index>() + TensorOpCost::DivCost<Index>() + 274 3 * TensorOpCost::AddCost<Index>(); 275 } 276 277 return m_impl.costPerCoeff(vectorized) + 278 TensorOpCost(0, 0, cost, vectorized, PacketSize); 279 } 280 281 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 282 internal::TensorBlockResourceRequirements getResourceRequirements() const { 283 const size_t target_size = m_device.lastLevelCacheSize(); 284 return internal::TensorBlockResourceRequirements::merge( 285 internal::TensorBlockResourceRequirements::skewed<Scalar>(target_size), 286 m_impl.getResourceRequirements()); 287 } 288 289 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock 290 block(TensorBlockDesc& desc, TensorBlockScratch& scratch, 291 bool root_of_expr_ast = false) const { 292 const Index chip_dim = m_dim.actualDim(); 293 294 DSizes<Index, NumInputDims> input_block_dims; 295 for (int i = 0; i < NumInputDims; ++i) { 296 input_block_dims[i] 297 = i < chip_dim ? desc.dimension(i) 298 : i > chip_dim ? desc.dimension(i - 1) 299 : 1; 300 } 301 302 ArgTensorBlockDesc arg_desc(srcCoeff(desc.offset()), input_block_dims); 303 304 // Try to reuse destination buffer for materializing argument block. 305 if (desc.HasDestinationBuffer()) { 306 DSizes<Index, NumInputDims> arg_destination_strides; 307 for (int i = 0; i < NumInputDims; ++i) { 308 arg_destination_strides[i] 309 = i < chip_dim ? desc.destination().strides()[i] 310 : i > chip_dim ? desc.destination().strides()[i - 1] 311 : 0; // for dimensions of size `1` stride should never be used. 312 } 313 314 arg_desc.template AddDestinationBuffer<Layout>( 315 desc.destination().template data<ScalarNoConst>(), 316 arg_destination_strides); 317 } 318 319 ArgTensorBlock arg_block = m_impl.block(arg_desc, scratch, root_of_expr_ast); 320 if (!arg_desc.HasDestinationBuffer()) desc.DropDestinationBuffer(); 321 322 if (arg_block.data() != NULL) { 323 // Forward argument block buffer if possible. 324 return TensorBlock(arg_block.kind(), arg_block.data(), 325 desc.dimensions()); 326 327 } else { 328 // Assign argument block expression to a buffer. 329 330 // Prepare storage for the materialized chipping result. 331 const typename TensorBlock::Storage block_storage = 332 TensorBlock::prepareStorage(desc, scratch); 333 334 typedef internal::TensorBlockAssignment< 335 ScalarNoConst, NumInputDims, typename ArgTensorBlock::XprType, Index> 336 TensorBlockAssignment; 337 338 TensorBlockAssignment::Run( 339 TensorBlockAssignment::target( 340 arg_desc.dimensions(), 341 internal::strides<Layout>(arg_desc.dimensions()), 342 block_storage.data()), 343 arg_block.expr()); 344 345 return block_storage.AsTensorMaterializedBlock(); 346 } 347 } 348 349 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Storage::Type data() const { 350 typename Storage::Type result = constCast(m_impl.data()); 351 if (isOuterChipping() && result) { 352 return result + m_inputOffset; 353 } else { 354 return NULL; 355 } 356 } 357 #ifdef EIGEN_USE_SYCL 358 // binding placeholder accessors to a command group handler for SYCL 359 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const { 360 m_impl.bind(cgh); 361 } 362 #endif 363 364 protected: 365 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const 366 { 367 Index inputIndex; 368 if (isInnerChipping()) { 369 // m_stride is equal to 1, so let's avoid the integer division. 370 eigen_assert(m_stride == 1); 371 inputIndex = index * m_inputStride + m_inputOffset; 372 } else if (isOuterChipping()) { 373 // m_stride is always greater than index, so let's avoid the integer 374 // division. 375 eigen_assert(m_stride > index); 376 inputIndex = index + m_inputOffset; 377 } else { 378 const Index idx = index / m_stride; 379 inputIndex = idx * m_inputStride + m_inputOffset; 380 index -= idx * m_stride; 381 inputIndex += index; 382 } 383 return inputIndex; 384 } 385 386 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool isInnerChipping() const { 387 return IsInnerChipping || 388 (static_cast<int>(Layout) == ColMajor && m_dim.actualDim() == 0) || 389 (static_cast<int>(Layout) == RowMajor && m_dim.actualDim() == NumInputDims - 1); 390 } 391 392 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool isOuterChipping() const { 393 return IsOuterChipping || 394 (static_cast<int>(Layout) == ColMajor && m_dim.actualDim() == NumInputDims-1) || 395 (static_cast<int>(Layout) == RowMajor && m_dim.actualDim() == 0); 396 } 397 398 Dimensions m_dimensions; 399 Index m_stride; 400 Index m_inputOffset; 401 Index m_inputStride; 402 TensorEvaluator<ArgType, Device> m_impl; 403 const internal::DimensionId<DimId> m_dim; 404 const Device EIGEN_DEVICE_REF m_device; 405 }; 406 407 408 // Eval as lvalue 409 template<DenseIndex DimId, typename ArgType, typename Device> 410 struct TensorEvaluator<TensorChippingOp<DimId, ArgType>, Device> 411 : public TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device> 412 { 413 typedef TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device> Base; 414 typedef TensorChippingOp<DimId, ArgType> XprType; 415 static const int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value; 416 static const int NumDims = NumInputDims-1; 417 typedef typename XprType::Index Index; 418 typedef DSizes<Index, NumDims> Dimensions; 419 typedef typename XprType::Scalar Scalar; 420 typedef typename XprType::CoeffReturnType CoeffReturnType; 421 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; 422 static const int PacketSize = PacketType<CoeffReturnType, Device>::size; 423 424 enum { 425 IsAligned = false, 426 PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, 427 BlockAccess = TensorEvaluator<ArgType, Device>::RawAccess, 428 Layout = TensorEvaluator<ArgType, Device>::Layout, 429 RawAccess = false 430 }; 431 432 //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// 433 typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc; 434 //===--------------------------------------------------------------------===// 435 436 EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) 437 : Base(op, device) 438 { } 439 440 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) 441 { 442 return this->m_impl.coeffRef(this->srcCoeff(index)); 443 } 444 445 template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 446 void writePacket(Index index, const PacketReturnType& x) 447 { 448 EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) 449 450 if (this->isInnerChipping()) { 451 // m_stride is equal to 1, so let's avoid the integer division. 452 eigen_assert(this->m_stride == 1); 453 EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize]; 454 internal::pstore<CoeffReturnType, PacketReturnType>(values, x); 455 Index inputIndex = index * this->m_inputStride + this->m_inputOffset; 456 EIGEN_UNROLL_LOOP 457 for (int i = 0; i < PacketSize; ++i) { 458 this->m_impl.coeffRef(inputIndex) = values[i]; 459 inputIndex += this->m_inputStride; 460 } 461 } else if (this->isOuterChipping()) { 462 // m_stride is always greater than index, so let's avoid the integer division. 463 eigen_assert(this->m_stride > index); 464 this->m_impl.template writePacket<StoreMode>(index + this->m_inputOffset, x); 465 } else { 466 const Index idx = index / this->m_stride; 467 const Index rem = index - idx * this->m_stride; 468 if (rem + PacketSize <= this->m_stride) { 469 const Index inputIndex = idx * this->m_inputStride + this->m_inputOffset + rem; 470 this->m_impl.template writePacket<StoreMode>(inputIndex, x); 471 } else { 472 // Cross stride boundary. Fallback to slow path. 473 EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize]; 474 internal::pstore<CoeffReturnType, PacketReturnType>(values, x); 475 EIGEN_UNROLL_LOOP 476 for (int i = 0; i < PacketSize; ++i) { 477 this->coeffRef(index) = values[i]; 478 ++index; 479 } 480 } 481 } 482 } 483 484 template <typename TensorBlock> 485 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlock( 486 const TensorBlockDesc& desc, const TensorBlock& block) { 487 assert(this->m_impl.data() != NULL); 488 489 const Index chip_dim = this->m_dim.actualDim(); 490 491 DSizes<Index, NumInputDims> input_block_dims; 492 for (int i = 0; i < NumInputDims; ++i) { 493 input_block_dims[i] = i < chip_dim ? desc.dimension(i) 494 : i > chip_dim ? desc.dimension(i - 1) 495 : 1; 496 } 497 498 typedef TensorReshapingOp<const DSizes<Index, NumInputDims>, 499 const typename TensorBlock::XprType> 500 TensorBlockExpr; 501 502 typedef internal::TensorBlockAssignment<Scalar, NumInputDims, 503 TensorBlockExpr, Index> 504 TensorBlockAssign; 505 506 TensorBlockAssign::Run( 507 TensorBlockAssign::target( 508 input_block_dims, 509 internal::strides<Layout>(this->m_impl.dimensions()), 510 this->m_impl.data(), this->srcCoeff(desc.offset())), 511 block.expr().reshape(input_block_dims)); 512 } 513 }; 514 515 516 } // end namespace Eigen 517 518 #endif // EIGEN_CXX11_TENSOR_TENSOR_CHIPPING_H 519