1 //=================================================================================================
2 /*!
3 // \file blaze/math/expressions/TSVecDMatMultExpr.h
4 // \brief Header file for the transpose sparse vector/dense matrix multiplication expression
5 //
6 // Copyright (C) 2012-2020 Klaus Iglberger - All Rights Reserved
7 //
8 // This file is part of the Blaze library. You can redistribute it and/or modify it under
9 // the terms of the New (Revised) BSD License. Redistribution and use in source and binary
10 // forms, with or without modification, are permitted provided that the following conditions
11 // are met:
12 //
13 // 1. Redistributions of source code must retain the above copyright notice, this list of
14 // conditions and the following disclaimer.
15 // 2. Redistributions in binary form must reproduce the above copyright notice, this list
16 // of conditions and the following disclaimer in the documentation and/or other materials
17 // provided with the distribution.
18 // 3. Neither the names of the Blaze development group nor the names of its contributors
19 // may be used to endorse or promote products derived from this software without specific
20 // prior written permission.
21 //
22 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
23 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
24 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
25 // SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
27 // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
28 // BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30 // ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
31 // DAMAGE.
32 */
33 //=================================================================================================
34
35 #ifndef _BLAZE_MATH_EXPRESSIONS_TSVECDMATMULTEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_TSVECDMATMULTEXPR_H_
37
38
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42
43 #include <blaze/math/Aliases.h>
44 #include <blaze/math/constraints/DenseMatrix.h>
45 #include <blaze/math/constraints/DenseVector.h>
46 #include <blaze/math/constraints/MatMatMultExpr.h>
47 #include <blaze/math/constraints/RequiresEvaluation.h>
48 #include <blaze/math/constraints/RowMajorMatrix.h>
49 #include <blaze/math/constraints/RowVector.h>
50 #include <blaze/math/constraints/SparseVector.h>
51 #include <blaze/math/constraints/TVecMatMultExpr.h>
52 #include <blaze/math/constraints/Zero.h>
53 #include <blaze/math/Exception.h>
54 #include <blaze/math/expressions/Computation.h>
55 #include <blaze/math/expressions/DenseVector.h>
56 #include <blaze/math/expressions/Forward.h>
57 #include <blaze/math/expressions/MatMatMultExpr.h>
58 #include <blaze/math/expressions/TVecMatMultExpr.h>
59 #include <blaze/math/shims/PrevMultiple.h>
60 #include <blaze/math/shims/Reset.h>
61 #include <blaze/math/shims/Serial.h>
62 #include <blaze/math/SIMD.h>
63 #include <blaze/math/traits/MultTrait.h>
64 #include <blaze/math/typetraits/HasSIMDAdd.h>
65 #include <blaze/math/typetraits/HasSIMDMult.h>
66 #include <blaze/math/typetraits/IsAligned.h>
67 #include <blaze/math/typetraits/IsComputation.h>
68 #include <blaze/math/typetraits/IsDiagonal.h>
69 #include <blaze/math/typetraits/IsExpression.h>
70 #include <blaze/math/typetraits/IsLower.h>
71 #include <blaze/math/typetraits/IsPadded.h>
72 #include <blaze/math/typetraits/IsResizable.h>
73 #include <blaze/math/typetraits/IsSIMDCombinable.h>
74 #include <blaze/math/typetraits/IsStrictlyLower.h>
75 #include <blaze/math/typetraits/IsStrictlyUpper.h>
76 #include <blaze/math/typetraits/IsUpper.h>
77 #include <blaze/math/typetraits/IsZero.h>
78 #include <blaze/math/typetraits/RequiresEvaluation.h>
79 #include <blaze/math/views/Check.h>
80 #include <blaze/system/MacroDisable.h>
81 #include <blaze/system/Optimizations.h>
82 #include <blaze/system/Thresholds.h>
83 #include <blaze/util/Assert.h>
84 #include <blaze/util/EnableIf.h>
85 #include <blaze/util/FunctionTrace.h>
86 #include <blaze/util/MaybeUnused.h>
87 #include <blaze/util/mpl/If.h>
88 #include <blaze/util/Types.h>
89
90
91 namespace blaze {
92
93 //=================================================================================================
94 //
95 // CLASS TSVECDMATMULTEXPR
96 //
97 //=================================================================================================
98
99 //*************************************************************************************************
100 /*!\brief Expression object for transpose sparse vector-dense matrix multiplications.
101 // \ingroup dense_vector_expression
102 //
103 // The TSVecDMatMultExpr class represents the compile time expression for multiplications
104 // between transpose sparse vectors and row-major dense matrices.
105 */
106 template< typename VT // Type of the left-hand side sparse vector
107 , typename MT > // Type of the right-hand side dense matrix
108 class TSVecDMatMultExpr
109 : public TVecMatMultExpr< DenseVector< TSVecDMatMultExpr<VT,MT>, true > >
110 , private Computation
111 {
112 private:
113 //**Type definitions****************************************************************************
114 using VRT = ResultType_t<VT>; //!< Result type of the left-hand side sparse vector expression.
115 using MRT = ResultType_t<MT>; //!< Result type of the right-hand side dense matrix expression.
116 using VET = ElementType_t<VRT>; //!< Element type of the left-hand side sparse vector expression.
117 using MET = ElementType_t<MRT>; //!< Element type of the right-hand side dense matrix expression.
118 using VCT = CompositeType_t<VT>; //!< Composite type of the left-hand side sparse vector expression.
119 using MCT = CompositeType_t<MT>; //!< Composite type of the right-hand side dense matrix expression.
120 //**********************************************************************************************
121
122 //**********************************************************************************************
123 //! Compilation switch for the composite type of the left-hand side sparse vector expression.
124 static constexpr bool evaluateVector = ( IsComputation_v<VT> || RequiresEvaluation_v<VT> );
125 //**********************************************************************************************
126
127 //**********************************************************************************************
128 //! Compilation switch for the composite type of the right-hand side dense matrix expression.
129 static constexpr bool evaluateMatrix = RequiresEvaluation_v<MT>;
130 //**********************************************************************************************
131
132 //**********************************************************************************************
133 /*! \cond BLAZE_INTERNAL */
134 //! Helper variable template for the explicit application of the SFINAE principle.
135 /*! This variable template is a helper for the selection of the parallel evaluation strategy.
136 In case either the vector or the matrix operand requires an intermediate evaluation, the
137 variable will be set to 1, otherwise it will be 0. */
138 template< typename T1 >
139 static constexpr bool UseSMPAssign_v = ( evaluateVector || evaluateMatrix );
140 /*! \endcond */
141 //**********************************************************************************************
142
143 //**********************************************************************************************
144 /*! \cond BLAZE_INTERNAL */
145 //! Helper variable template for the explicit application of the SFINAE principle.
146 /*! In case the matrix type and the two involved vector types are suited for a vectorized
147 computation of the vector/matrix multiplication, the variable will be set to 1, otherwise
148 it will be 0. */
149 template< typename T1, typename T2, typename T3 >
150 static constexpr bool UseVectorizedKernel_v =
151 ( useOptimizedKernels &&
152 !IsDiagonal_v<T3> &&
153 T1::simdEnabled && T3::simdEnabled &&
154 IsSIMDCombinable_v< ElementType_t<T1>
155 , ElementType_t<T2>
156 , ElementType_t<T3> > &&
157 HasSIMDAdd_v< ElementType_t<T2>, ElementType_t<T3> > &&
158 HasSIMDMult_v< ElementType_t<T2>, ElementType_t<T3> > );
159 /*! \endcond */
160 //**********************************************************************************************
161
162 //**********************************************************************************************
163 /*! \cond BLAZE_INTERNAL */
164 //! Helper variable template for the explicit application of the SFINAE principle.
165 /*! In case a vectorized computation of the vector/matrix multiplication is not possible, but
166 a loop-unrolled computation is feasible, the variable will be set to 1, otherwise it will
167 be 0. */
168 template< typename T1, typename T2, typename T3 >
169 static constexpr bool UseOptimizedKernel_v =
170 ( useOptimizedKernels &&
171 !UseVectorizedKernel_v<T1,T2,T3> &&
172 !IsDiagonal_v<T3> &&
173 !IsResizable_v< ElementType_t<T1> > &&
174 !IsResizable_v<VET> );
175 /*! \endcond */
176 //**********************************************************************************************
177
178 //**********************************************************************************************
179 /*! \cond BLAZE_INTERNAL */
180 //! Helper variable template for the explicit application of the SFINAE principle.
181 /*! In case neither a vectorized nor optimized computation is possible, the variable will be
182 set to 1, otherwise it will be 0. */
183 template< typename T1, typename T2, typename T3 >
184 static constexpr bool UseDefaultKernel_v =
185 ( !UseVectorizedKernel_v<T1,T2,T3> && !UseOptimizedKernel_v<T1,T2,T3> );
186 /*! \endcond */
187 //**********************************************************************************************
188
189 public:
190 //**Type definitions****************************************************************************
191 //! Type of this TSVecDMatMultExpr instance.
192 using This = TSVecDMatMultExpr<VT,MT>;
193
194 //! Base type of this TSVecDMatMultExpr instance.
195 using BaseType = TVecMatMultExpr< DenseVector<This,true> >;
196
197 using ResultType = MultTrait_t<VRT,MRT>; //!< Result type for expression template evaluations.
198 using TransposeType = TransposeType_t<ResultType>; //!< Transpose type for expression template evaluations.
199 using ElementType = ElementType_t<ResultType>; //!< Resulting element type.
200 using SIMDType = SIMDTrait_t<ElementType>; //!< Resulting SIMD element type.
201 using ReturnType = const ElementType; //!< Return type for expression template evaluations.
202 using CompositeType = const ResultType; //!< Data type for composite expression templates.
203
204 //! Composite type of the left-hand side sparse vector expression.
205 using LeftOperand = If_t< IsExpression_v<VT>, const VT, const VT& >;
206
207 //! Composite type of the right-hand side sparse matrix expression.
208 using RightOperand = If_t< IsExpression_v<MT>, const MT, const MT& >;
209
210 //! Type for the assignment of the left-hand side sparse vector operand.
211 using LT = If_t< evaluateVector, const VRT, VCT >;
212
213 //! Type for the assignment of the right-hand side dense matrix operand.
214 using RT = If_t< evaluateMatrix, const MRT, MCT >;
215 //**********************************************************************************************
216
217 //**Compilation flags***************************************************************************
218 //! Compilation switch for the expression template evaluation strategy.
219 static constexpr bool simdEnabled =
220 ( !IsDiagonal_v<MT> &&
221 MT::simdEnabled &&
222 HasSIMDAdd_v<VET,MET> &&
223 HasSIMDMult_v<VET,MET> );
224
225 //! Compilation switch for the expression template assignment strategy.
226 static constexpr bool smpAssignable =
227 ( !evaluateVector && VT::smpAssignable && !evaluateMatrix && MT::smpAssignable );
228 //**********************************************************************************************
229
230 //**SIMD properties*****************************************************************************
231 //! The number of elements packed within a single SIMD element.
232 static constexpr size_t SIMDSIZE = SIMDTrait<ElementType>::size;
233 //**********************************************************************************************
234
235 //**Constructor*********************************************************************************
236 /*!\brief Constructor for the TSVecDMatMultExpr class.
237 //
238 // \param vec The left-hand side sparse vector operand of the multiplication expression.
239 // \param mat The right-hand side dense matrix operand of the multiplication expression.
240 */
TSVecDMatMultExpr(const VT & vec,const MT & mat)241 inline TSVecDMatMultExpr( const VT& vec, const MT& mat ) noexcept
242 : vec_( vec ) // Left-hand side sparse vector of the multiplication expression
243 , mat_( mat ) // Right-hand side dense matrix of the multiplication expression
244 {
245 BLAZE_INTERNAL_ASSERT( vec_.size() == mat_.rows(), "Invalid vector and matrix sizes" );
246 }
247 //**********************************************************************************************
248
249 //**Subscript operator**************************************************************************
250 /*!\brief Subscript operator for the direct access to the vector elements.
251 //
252 // \param index Access index. The index has to be in the range \f$[0..N-1]\f$.
253 // \return The resulting value.
254 */
255 inline ReturnType operator[]( size_t index ) const {
256 BLAZE_INTERNAL_ASSERT( index < mat_.columns(), "Invalid vector access index" );
257
258 if( IsDiagonal_v<MT> )
259 {
260 return vec_[index] * mat_(index,index);
261 }
262 else if( IsLower_v<MT> )
263 {
264 const size_t begin( IsStrictlyLower_v<MT> ? index+1UL : index );
265 const size_t n ( mat_.rows() - begin );
266 return subvector( vec_, begin, n, unchecked ) *
267 subvector( column( mat_, index, unchecked ), begin, n, unchecked );
268 }
269 else if( IsUpper_v<MT> )
270 {
271 const size_t n( IsStrictlyUpper_v<MT> ? index : index+1UL );
272 return subvector( vec_, 0UL, n, unchecked ) *
273 subvector( column( mat_, index, unchecked ), 0UL, n, unchecked );
274 }
275 else
276 {
277 return vec_ * column( mat_, index, unchecked );
278 }
279 }
280 //**********************************************************************************************
281
282 //**At function*********************************************************************************
283 /*!\brief Checked access to the vector elements.
284 //
285 // \param index Access index. The index has to be in the range \f$[0..N-1]\f$.
286 // \return The resulting value.
287 // \exception std::out_of_range Invalid vector access index.
288 */
at(size_t index)289 inline ReturnType at( size_t index ) const {
290 if( index >= mat_.columns() ) {
291 BLAZE_THROW_OUT_OF_RANGE( "Invalid vector access index" );
292 }
293 return (*this)[index];
294 }
295 //**********************************************************************************************
296
297 //**Size function*******************************************************************************
298 /*!\brief Returns the current size/dimension of the vector.
299 //
300 // \return The size of the vector.
301 */
size()302 inline size_t size() const noexcept {
303 return mat_.columns();
304 }
305 //**********************************************************************************************
306
307 //**Left operand access*************************************************************************
308 /*!\brief Returns the left-hand side sparse vector operand.
309 //
310 // \return The left-hand side sparse vector operand.
311 */
leftOperand()312 inline LeftOperand leftOperand() const noexcept {
313 return vec_;
314 }
315 //**********************************************************************************************
316
317 //**Right operand access************************************************************************
318 /*!\brief Returns the right-hand side dense matrix operand.
319 //
320 // \return The right-hand side dense matrix operand.
321 */
rightOperand()322 inline RightOperand rightOperand() const noexcept {
323 return mat_;
324 }
325 //**********************************************************************************************
326
327 //**********************************************************************************************
328 /*!\brief Returns whether the expression can alias with the given address \a alias.
329 //
330 // \param alias The alias to be checked.
331 // \return \a true in case the expression can alias, \a false otherwise.
332 */
333 template< typename T >
canAlias(const T * alias)334 inline bool canAlias( const T* alias ) const noexcept {
335 return vec_.isAliased( alias ) || mat_.isAliased( alias );
336 }
337 //**********************************************************************************************
338
339 //**********************************************************************************************
340 /*!\brief Returns whether the expression is aliased with the given address \a alias.
341 //
342 // \param alias The alias to be checked.
343 // \return \a true in case an alias effect is detected, \a false otherwise.
344 */
345 template< typename T >
isAliased(const T * alias)346 inline bool isAliased( const T* alias ) const noexcept {
347 return vec_.isAliased( alias ) || mat_.isAliased( alias );
348 }
349 //**********************************************************************************************
350
351 //**********************************************************************************************
352 /*!\brief Returns whether the operands of the expression are properly aligned in memory.
353 //
354 // \return \a true in case the operands are aligned, \a false if not.
355 */
isAligned()356 inline bool isAligned() const noexcept {
357 return mat_.isAligned();
358 }
359 //**********************************************************************************************
360
361 //**********************************************************************************************
362 /*!\brief Returns whether the expression can be used in SMP assignments.
363 //
364 // \return \a true in case the expression can be used in SMP assignments, \a false if not.
365 */
canSMPAssign()366 inline bool canSMPAssign() const noexcept {
367 return ( size() > SMP_TSVECDMATMULT_THRESHOLD );
368 }
369 //**********************************************************************************************
370
371 private:
372 //**Member variables****************************************************************************
373 LeftOperand vec_; //!< Left-hand side sparse vector of the multiplication expression.
374 RightOperand mat_; //!< Right-hand side dense matrix of the multiplication expression.
375 //**********************************************************************************************
376
377 //**Assignment to dense vectors*****************************************************************
378 /*! \cond BLAZE_INTERNAL */
379 /*!\brief Assignment of a transpose sparse vector-dense matrix multiplication to a dense vector
380 // (\f$ \vec{y}^T=\vec{x}^T*A \f$).
381 // \ingroup dense_vector
382 //
383 // \param lhs The target left-hand side dense vector.
384 // \param rhs The right-hand side multiplication expression to be assigned.
385 // \return void
386 //
387 // This function implements the performance optimized assignment of a transpose sparse vector-
388 // dense matrix multiplication expression to a dense vector.
389 */
390 template< typename VT2 > // Type of the target dense vector
assign(DenseVector<VT2,true> & lhs,const TSVecDMatMultExpr & rhs)391 friend inline void assign( DenseVector<VT2,true>& lhs, const TSVecDMatMultExpr& rhs )
392 {
393 BLAZE_FUNCTION_TRACE;
394
395 BLAZE_INTERNAL_ASSERT( (*lhs).size() == rhs.size(), "Invalid vector sizes" );
396
397 // Evaluation of the left-hand side sparse vector operand
398 LT x( serial( rhs.vec_ ) );
399 if( x.nonZeros() == 0UL ) {
400 reset( *lhs );
401 return;
402 }
403
404 // Evaluation of the right-hand side dense matrix operand
405 RT A( serial( rhs.mat_ ) );
406
407 // Checking the evaluated operands
408 BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
409 BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
410 BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
411 BLAZE_INTERNAL_ASSERT( A.columns() == (*lhs).size() , "Invalid vector size" );
412
413 // Performing the sparse vector-dense matrix multiplication
414 TSVecDMatMultExpr::selectAssignKernel( *lhs, x, A );
415 }
416 /*! \endcond */
417 //**********************************************************************************************
418
419 //**Default assignment to dense vectors*********************************************************
420 /*! \cond BLAZE_INTERNAL */
421 /*!\brief Default assignment of a transpose sparse vector-dense matrix multiplication
422 // (\f$ \vec{y}^T=\vec{x}^T*A \f$).
423 // \ingroup dense_vector
424 //
425 // \param y The target left-hand side dense vector.
426 // \param x The left-hand side sparse vector operand.
427 // \param A The right-hand side dense matrix operand.
428 // \return void
429 //
430 // This function implements the default assignment kernel for the transpose sparse vector-
431 // dense matrix multiplication.
432 */
433 template< typename VT1 // Type of the left-hand side target vector
434 , typename VT2 // Type of the left-hand side vector operand
435 , typename MT1 > // Type of the right-hand side matrix operand
436 static inline auto selectAssignKernel( VT1& y, const VT2& x, const MT1& A )
437 -> EnableIf_t< UseDefaultKernel_v<VT1,VT2,MT1> >
438 {
439 BLAZE_INTERNAL_ASSERT( x.nonZeros() != 0UL, "Invalid number of non-zero elements" );
440
441 const size_t N( A.columns() );
442
443 auto element( x.begin() );
444 const auto end( x.end() );
445
446 size_t last( 0UL );
447
448 if( IsUpper_v<MT1> ) {
449 const size_t jend( IsStrictlyUpper_v<MT1> ? element->index()+1UL : element->index() );
450 for( size_t j=0UL; j<jend; ++j )
451 reset( y[j] );
452 }
453
454 for( ; element!=end; ++element )
455 {
456 const size_t index( element->index() );
457
458 if( IsDiagonal_v<MT1> )
459 {
460 for( size_t j=last; j<index; ++j )
461 reset( y[j] );
462
463 y[index] = element->value() * A(index,index);
464 last = index + 1UL;
465 }
466 else
467 {
468 const size_t jbegin( ( IsUpper_v<MT1> )
469 ?( IsStrictlyUpper_v<MT1> ? index+1UL : index )
470 :( 0UL ) );
471 const size_t jend( ( IsLower_v<MT1> )
472 ?( IsStrictlyLower_v<MT1> ? index : index+1UL )
473 :( N ) );
474 BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
475
476 for( size_t j=jbegin; j<last; ++j ) {
477 y[j] += element->value() * A(index,j);
478 }
479 for( size_t j=last; j<jend; ++j ) {
480 y[j] = element->value() * A(index,j);
481 }
482
483 last = jend;
484 }
485 }
486
487 if( IsLower_v<MT1> ) {
488 for( size_t j=last; j<N; ++j )
489 reset( y[j] );
490 }
491 }
492 /*! \endcond */
493 //**********************************************************************************************
494
495 //**Optimized assignment to dense vectors*******************************************************
496 /*! \cond BLAZE_INTERNAL */
497 /*!\brief Optimized assignment of a transpose sparse vector-dense matrix multiplication
498 // (\f$ \vec{y}^T=\vec{x}^T*A \f$).
499 // \ingroup dense_vector
500 //
501 // \param y The target left-hand side dense vector.
502 // \param x The left-hand side sparse vector operand.
503 // \param A The right-hand side dense matrix operand.
504 // \return void
505 //
506 // This function implements the optimized assignment kernel for the transpose sparse vector-
507 // dense matrix multiplication.
508 */
509 template< typename VT1 // Type of the left-hand side target vector
510 , typename VT2 // Type of the left-hand side vector operand
511 , typename MT1 > // Type of the right-hand side matrix operand
512 static inline auto selectAssignKernel( VT1& y, const VT2& x, const MT1& A )
513 -> EnableIf_t< UseOptimizedKernel_v<VT1,VT2,MT1> >
514 {
515 BLAZE_INTERNAL_ASSERT( x.nonZeros() != 0UL, "Invalid number of non-zero elements" );
516
517 const size_t N( A.columns() );
518
519 auto element( x.begin() );
520 const auto end( x.end() );
521
522 const size_t ipos( prevMultiple( x.nonZeros(), 4UL ) );
523 BLAZE_INTERNAL_ASSERT( ipos <= x.nonZeros(), "Invalid end calculation" );
524
525 if( ipos > 3UL )
526 {
527 const size_t i1( element->index() );
528 const VET v1( element->value() );
529 ++element;
530 const size_t i2( element->index() );
531 const VET v2( element->value() );
532 ++element;
533 const size_t i3( element->index() );
534 const VET v3( element->value() );
535 ++element;
536 const size_t i4( element->index() );
537 const VET v4( element->value() );
538 ++element;
539
540 BLAZE_INTERNAL_ASSERT( i1 < i2 && i2 < i3 && i3 < i4, "Invalid sparse vector index detected" );
541
542 for( size_t j=0UL; j<N; ++j ) {
543 y[j] = v1 * A(i1,j) + v2 * A(i2,j) + v3 * A(i3,j) + v4 * A(i4,j);
544 }
545 }
546 else
547 {
548 const size_t i1( element->index() );
549 const VET v1( element->value() );
550 ++element;
551
552 for( size_t j=0UL; j<N; ++j ) {
553 y[j] = v1 * A(i1,j);
554 }
555 }
556
557 for( size_t i=(ipos>3UL)?(4UL):(1UL); (i+4UL)<=ipos; i+=4UL )
558 {
559 const size_t i1( element->index() );
560 const VET v1( element->value() );
561 ++element;
562 const size_t i2( element->index() );
563 const VET v2( element->value() );
564 ++element;
565 const size_t i3( element->index() );
566 const VET v3( element->value() );
567 ++element;
568 const size_t i4( element->index() );
569 const VET v4( element->value() );
570 ++element;
571
572 BLAZE_INTERNAL_ASSERT( i1 < i2 && i2 < i3 && i3 < i4, "Invalid sparse vector index detected" );
573
574 const size_t jbegin( ( IsUpper_v<MT1> )
575 ?( IsStrictlyUpper_v<MT1> ? i1+1UL : i1 )
576 :( 0UL ) );
577 const size_t jend( ( IsLower_v<MT1> )
578 ?( IsStrictlyLower_v<MT1> ? i4 : i4+1UL )
579 :( N ) );
580 BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
581
582 for( size_t j=jbegin; j<jend; ++j ) {
583 y[j] += v1 * A(i1,j) + v2 * A(i2,j) + v3 * A(i3,j) + v4 * A(i4,j);
584 }
585 }
586 for( ; element!=end; ++element )
587 {
588 const size_t i1( element->index() );
589 const VET v1( element->value() );
590
591 const size_t jbegin( ( IsUpper_v<MT1> )
592 ?( IsStrictlyUpper_v<MT1> ? i1+1UL : i1 )
593 :( 0UL ) );
594 const size_t jend( ( IsLower_v<MT1> )
595 ?( IsStrictlyLower_v<MT1> ? i1 : i1+1UL )
596 :( N ) );
597 BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
598
599 for( size_t j=jbegin; j<jend; ++j ) {
600 y[j] += v1 * A(i1,j);
601 }
602 }
603 }
604 /*! \endcond */
605 //**********************************************************************************************
606
607 //**Vectorized assignment to dense vectors******************************************************
608 /*! \cond BLAZE_INTERNAL */
609 /*!\brief Vectorized assignment of a transpose sparse vector-dense matrix multiplication
610 // (\f$ \vec{y}^T=\vec{x}^T*A \f$).
611 // \ingroup dense_vector
612 //
613 // \param y The target left-hand side dense vector.
614 // \param x The left-hand side sparse vector operand.
615 // \param A The right-hand side dense matrix operand.
616 // \return void
617 //
618 // This function implements the vectorized assignment kernel for the transpose sparse vector-
619 // dense matrix multiplication.
620 */
621 template< typename VT1 // Type of the left-hand side target vector
622 , typename VT2 // Type of the left-hand side vector operand
623 , typename MT1 > // Type of the right-hand side matrix operand
624 static inline auto selectAssignKernel( VT1& y, const VT2& x, const MT1& A )
625 -> EnableIf_t< UseVectorizedKernel_v<VT1,VT2,MT1> >
626 {
627 BLAZE_INTERNAL_ASSERT( x.nonZeros() != 0UL, "Invalid number of non-zero elements" );
628
629 constexpr bool remainder( !IsPadded_v<VT1> || !IsPadded_v<MT1> );
630
631 const size_t N( A.columns() );
632
633 auto element( x.begin() );
634 const auto end( x.end() );
635
636 const size_t ipos( prevMultiple( x.nonZeros(), 4UL ) );
637 BLAZE_INTERNAL_ASSERT( ipos <= x.nonZeros(), "Invalid end calculation" );
638
639 if( ipos > 3UL )
640 {
641 const size_t i1( element->index() );
642 const VET v1( element->value() );
643 ++element;
644 const size_t i2( element->index() );
645 const VET v2( element->value() );
646 ++element;
647 const size_t i3( element->index() );
648 const VET v3( element->value() );
649 ++element;
650 const size_t i4( element->index() );
651 const VET v4( element->value() );
652 ++element;
653
654 BLAZE_INTERNAL_ASSERT( i1 < i2 && i2 < i3 && i3 < i4, "Invalid sparse vector index detected" );
655
656 const SIMDType xmm1( set( v1 ) );
657 const SIMDType xmm2( set( v2 ) );
658 const SIMDType xmm3( set( v3 ) );
659 const SIMDType xmm4( set( v4 ) );
660
661 const size_t jpos( remainder ? prevMultiple( N, SIMDSIZE ) : N );
662 BLAZE_INTERNAL_ASSERT( jpos <= N, "Invalid end calculation" );
663
664 size_t j( 0UL );
665
666 for( ; j<jpos; j+=SIMDSIZE ) {
667 y.store( j, xmm1 * A.load(i1,j) + xmm2 * A.load(i2,j) + xmm3 * A.load(i3,j) + xmm4 * A.load(i4,j) );
668 }
669 for( ; remainder && j<N; ++j ) {
670 y[j] = v1 * A(i1,j) + v2 * A(i2,j) + v3 * A(i3,j) + v4 * A(i4,j);
671 }
672 }
673 else
674 {
675 const size_t i1( element->index() );
676 const VET v1( element->value() );
677 ++element;
678
679 const SIMDType xmm1( set( v1 ) );
680
681 const size_t jpos( remainder ? prevMultiple( N, SIMDSIZE ) : N );
682 BLAZE_INTERNAL_ASSERT( jpos <= N, "Invalid end calculation" );
683
684 size_t j( 0UL );
685
686 for( ; j<jpos; j+=SIMDSIZE ) {
687 y.store( j, xmm1 * A.load(i1,j) );
688 }
689 for( ; remainder && j<N; ++j ) {
690 y[j] = v1 * A(i1,j);
691 }
692 }
693
694 for( size_t i=(ipos>3UL)?(4UL):(1UL); (i+4UL)<=ipos; i+=4UL )
695 {
696 const size_t i1( element->index() );
697 const VET v1( element->value() );
698 ++element;
699 const size_t i2( element->index() );
700 const VET v2( element->value() );
701 ++element;
702 const size_t i3( element->index() );
703 const VET v3( element->value() );
704 ++element;
705 const size_t i4( element->index() );
706 const VET v4( element->value() );
707 ++element;
708
709 BLAZE_INTERNAL_ASSERT( i1 < i2 && i2 < i3 && i3 < i4, "Invalid sparse vector index detected" );
710
711 const SIMDType xmm1( set( v1 ) );
712 const SIMDType xmm2( set( v2 ) );
713 const SIMDType xmm3( set( v3 ) );
714 const SIMDType xmm4( set( v4 ) );
715
716 const size_t jbegin( ( IsUpper_v<MT1> )
717 ?( prevMultiple( ( IsStrictlyUpper_v<MT1> ? i1+1UL : i1 ), SIMDSIZE ) )
718 :( 0UL ) );
719 const size_t jend( ( IsLower_v<MT1> )
720 ?( IsStrictlyLower_v<MT1> ? i4 : i4+1UL )
721 :( N ) );
722 BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
723
724 const size_t jpos( remainder ? prevMultiple( jend, SIMDSIZE ) : jend );
725 BLAZE_INTERNAL_ASSERT( jpos <= jend, "Invalid end calculation" );
726
727 size_t j( jbegin );
728
729 for( ; j<jpos; j+=SIMDSIZE ) {
730 y.store( j, y.load(j) + xmm1 * A.load(i1,j) + xmm2 * A.load(i2,j) + xmm3 * A.load(i3,j) + xmm4 * A.load(i4,j) );
731 }
732 for( ; remainder && j<jend; ++j ) {
733 y[j] += v1 * A(i1,j) + v2 * A(i2,j) + v3 * A(i3,j) + v4 * A(i4,j);
734 }
735 }
736 for( ; element!=end; ++element )
737 {
738 const size_t i1( element->index() );
739 const VET v1( element->value() );
740
741 const SIMDType xmm1( set( v1 ) );
742
743 const size_t jbegin( ( IsUpper_v<MT1> )
744 ?( prevMultiple( ( IsStrictlyUpper_v<MT1> ? i1+1UL : i1 ), SIMDSIZE ) )
745 :( 0UL ) );
746 const size_t jend( ( IsLower_v<MT1> )
747 ?( IsStrictlyLower_v<MT1> ? i1 : i1+1UL )
748 :( N ) );
749 BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
750
751 const size_t jpos( remainder ? prevMultiple( jend, SIMDSIZE ) : jend );
752 BLAZE_INTERNAL_ASSERT( jpos <= jend, "Invalid end calculation" );
753
754 size_t j( jbegin );
755
756 for( ; j<jpos; j+=SIMDSIZE ) {
757 y.store( j, y.load(j) + xmm1 * A.load(i1,j) );
758 }
759 for( ; remainder && j<jend; ++j ) {
760 y[j] += v1 * A(i1,j);
761 }
762 }
763 }
764 /*! \endcond */
765 //**********************************************************************************************
766
767 //**Assignment to sparse vectors****************************************************************
768 /*! \cond BLAZE_INTERNAL */
769 /*!\brief Assignment of a transpose sparse vector-dense matrix multiplication to a sparse
770 // vector (\f$ \vec{y}^T=\vec{x}^T*A \f$).
771 // \ingroup dense_vector
772 //
773 // \param lhs The target left-hand side sparse vector.
774 // \param rhs The right-hand side multiplication expression to be assigned.
775 // \return void
776 //
777 // This function implements the performance optimized assignment of a transpose sparse vector-
778 // dense matrix multiplication expression to a sparse vector.
779 */
780 template< typename VT2 > // Type of the target sparse vector
assign(SparseVector<VT2,true> & lhs,const TSVecDMatMultExpr & rhs)781 friend inline void assign( SparseVector<VT2,true>& lhs, const TSVecDMatMultExpr& rhs )
782 {
783 BLAZE_FUNCTION_TRACE;
784
785 BLAZE_CONSTRAINT_MUST_BE_DENSE_VECTOR_TYPE( ResultType );
786 BLAZE_CONSTRAINT_MUST_BE_ROW_VECTOR_TYPE( ResultType );
787 BLAZE_CONSTRAINT_MUST_NOT_REQUIRE_EVALUATION( ResultType );
788
789 BLAZE_INTERNAL_ASSERT( (*lhs).size() == rhs.size(), "Invalid vector sizes" );
790
791 const ResultType tmp( serial( rhs ) );
792 assign( *lhs, tmp );
793 }
794 /*! \endcond */
795 //**********************************************************************************************
796
797 //**Addition assignment to dense vectors********************************************************
798 /*!\brief Addition assignment of a transpose sparse vector-dense matrix multiplication to a
799 // dense vector (\f$ \vec{y}^T+=\vec{x}^T*A \f$).
800 // \ingroup dense_vector
801 //
802 // \param lhs The target left-hand side dense vector.
803 // \param rhs The right-hand side multiplication expression to be added.
804 // \return void
805 //
806 // This function implements the performance optimized addition assignment of a transpose sparse
807 // vector-dense matrix multiplication expression to a dense vector.
808 */
809 template< typename VT2 > // Type of the target dense vector
addAssign(DenseVector<VT2,true> & lhs,const TSVecDMatMultExpr & rhs)810 friend inline void addAssign( DenseVector<VT2,true>& lhs, const TSVecDMatMultExpr& rhs )
811 {
812 BLAZE_FUNCTION_TRACE;
813
814 BLAZE_INTERNAL_ASSERT( (*lhs).size() == rhs.size(), "Invalid vector sizes" );
815
816 // Evaluation of the left-hand side sparse vector operand
817 LT x( serial( rhs.vec_ ) );
818 if( x.nonZeros() == 0UL ) return;
819
820 // Evaluation of the right-hand side dense matrix operand
821 RT A( serial( rhs.mat_ ) );
822
823 // Checking the evaluated operands
824 BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
825 BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
826 BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
827 BLAZE_INTERNAL_ASSERT( A.columns() == (*lhs).size() , "Invalid vector size" );
828
829 // Performing the sparse vector-dense matrix multiplication
830 TSVecDMatMultExpr::selectAddAssignKernel( *lhs, x, A );
831 }
832 //**********************************************************************************************
833
834 //**Default addition assignment to dense vectors************************************************
835 /*! \cond BLAZE_INTERNAL */
836 /*!\brief Default addition assignment of a transpose sparse vector-dense matrix multiplication
837 // (\f$ \vec{y}^T+=\vec{x}^T*A \f$).
838 // \ingroup dense_vector
839 //
840 // \param y The target left-hand side dense vector.
841 // \param x The left-hand side sparse vector operand.
842 // \param A The right-hand side dense matrix operand.
843 // \return void
844 //
845 // This function implements the default addition assignment kernel for the transpose sparse
846 // vector-dense matrix multiplication.
847 */
848 template< typename VT1 // Type of the left-hand side target vector
849 , typename VT2 // Type of the left-hand side vector operand
850 , typename MT1 > // Type of the right-hand side matrix operand
851 static inline auto selectAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
852 -> EnableIf_t< UseDefaultKernel_v<VT1,VT2,MT1> >
853 {
854 BLAZE_INTERNAL_ASSERT( x.nonZeros() != 0UL, "Invalid number of non-zero elements" );
855
856 const size_t N( A.columns() );
857
858 auto element( x.begin() );
859 const auto end( x.end() );
860
861 for( ; element!=end; ++element )
862 {
863 const size_t index( element->index() );
864
865 if( IsDiagonal_v<MT1> )
866 {
867 y[index] += A(index,index) * element->value();
868 }
869 else
870 {
871 const size_t jbegin( ( IsUpper_v<MT1> )
872 ?( IsStrictlyUpper_v<MT1> ? index+1UL : index )
873 :( 0UL ) );
874 const size_t jend( ( IsLower_v<MT1> )
875 ?( IsStrictlyLower_v<MT1> ? index : index+1UL )
876 :( N ) );
877 BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
878
879 for( size_t j=jbegin; j<jend; ++j ) {
880 y[j] += element->value() * A(index,j);
881 }
882 }
883 }
884 }
885 /*! \endcond */
886 //**********************************************************************************************
887
888 //**Optimized addition assignment to dense vectors**********************************************
889 /*! \cond BLAZE_INTERNAL */
890 /*!\brief Optimized addition assignment of a transpose sparse vector-dense matrix multiplication
891 // (\f$ \vec{y}^T+=\vec{x}^T*A \f$).
892 // \ingroup dense_vector
893 //
894 // \param y The target left-hand side dense vector.
895 // \param x The left-hand side sparse vector operand.
896 // \param A The right-hand side dense matrix operand.
897 // \return void
898 //
899 // This function implements the optimized addition assignment kernel for the transpose sparse
900 // vector-dense matrix multiplication.
901 */
902 template< typename VT1 // Type of the left-hand side target vector
903 , typename VT2 // Type of the left-hand side vector operand
904 , typename MT1 > // Type of the right-hand side matrix operand
905 static inline auto selectAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
906 -> EnableIf_t< UseOptimizedKernel_v<VT1,VT2,MT1> >
907 {
908 BLAZE_INTERNAL_ASSERT( x.nonZeros() != 0UL, "Invalid number of non-zero elements" );
909
910 const size_t N( A.columns() );
911
912 auto element( x.begin() );
913 const auto end( x.end() );
914
915 const size_t ipos( prevMultiple( x.nonZeros(), 4UL ) );
916 BLAZE_INTERNAL_ASSERT( ipos <= x.nonZeros(), "Invalid end calculation" );
917
918 for( size_t i=0UL; (i+4UL)<=ipos; i+=4UL )
919 {
920 const size_t i1( element->index() );
921 const VET v1( element->value() );
922 ++element;
923 const size_t i2( element->index() );
924 const VET v2( element->value() );
925 ++element;
926 const size_t i3( element->index() );
927 const VET v3( element->value() );
928 ++element;
929 const size_t i4( element->index() );
930 const VET v4( element->value() );
931 ++element;
932
933 BLAZE_INTERNAL_ASSERT( i1 < i2 && i2 < i3 && i3 < i4, "Invalid sparse vector index detected" );
934
935 const size_t jbegin( ( IsUpper_v<MT1> )
936 ?( IsStrictlyUpper_v<MT1> ? i+1UL : i1 )
937 :( 0UL ) );
938 const size_t jend( ( IsLower_v<MT1> )
939 ?( IsStrictlyLower_v<MT1> ? i4 : i4+1UL )
940 :( N ) );
941 BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
942
943 for( size_t j=jbegin; j<jend; ++j ) {
944 y[j] += v1 * A(i1,j) + v2 * A(i2,j) + v3 * A(i3,j) + v4 * A(i4,j);
945 }
946 }
947 for( ; element!=end; ++element )
948 {
949 const size_t i1( element->index() );
950 const VET v1( element->value() );
951
952 const size_t jbegin( ( IsUpper_v<MT1> )
953 ?( IsStrictlyUpper_v<MT1> ? i1+1UL : i1 )
954 :( 0UL ) );
955 const size_t jend( ( IsLower_v<MT1> )
956 ?( IsStrictlyLower_v<MT1> ? i1 : i1+1UL )
957 :( N ) );
958 BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
959
960 for( size_t j=jbegin; j<jend; ++j ) {
961 y[j] += v1 * A(i1,j);
962 }
963 }
964 }
965 /*! \endcond */
966 //**********************************************************************************************
967
968 //**Vectorized addition assignment to dense vectors*********************************************
969 /*! \cond BLAZE_INTERNAL */
970 /*!\brief Vectorized addition assignment of a transpose sparse vector-dense matrix multiplication
971 // (\f$ \vec{y}^T+=\vec{x}^T*A \f$).
972 // \ingroup dense_vector
973 //
974 // \param y The target left-hand side dense vector.
975 // \param x The left-hand side sparse vector operand.
976 // \param A The right-hand side dense matrix operand.
977 // \return void
978 //
979 // This function implements the vectorized addition assignment kernel for the transpose sparse
980 // vector-dense matrix multiplication.
981 */
982 template< typename VT1 // Type of the left-hand side target vector
983 , typename VT2 // Type of the left-hand side vector operand
984 , typename MT1 > // Type of the right-hand side matrix operand
985 static inline auto selectAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
986 -> EnableIf_t< UseVectorizedKernel_v<VT1,VT2,MT1> >
987 {
988 BLAZE_INTERNAL_ASSERT( x.nonZeros() != 0UL, "Invalid number of non-zero elements" );
989
990 constexpr bool remainder( !IsPadded_v<VT1> || !IsPadded_v<MT1> );
991
992 const size_t N( A.columns() );
993
994 auto element( x.begin() );
995 const auto end( x.end() );
996
997 const size_t ipos( prevMultiple( x.nonZeros(), 4UL ) );
998 BLAZE_INTERNAL_ASSERT( ipos <= x.nonZeros(), "Invalid end calculation" );
999
1000 for( size_t i=0UL; (i+4UL)<=ipos; i+=4UL )
1001 {
1002 const size_t i1( element->index() );
1003 const VET v1( element->value() );
1004 ++element;
1005 const size_t i2( element->index() );
1006 const VET v2( element->value() );
1007 ++element;
1008 const size_t i3( element->index() );
1009 const VET v3( element->value() );
1010 ++element;
1011 const size_t i4( element->index() );
1012 const VET v4( element->value() );
1013 ++element;
1014
1015 BLAZE_INTERNAL_ASSERT( i1 < i2 && i2 < i3 && i3 < i4, "Invalid sparse vector index detected" );
1016
1017 const SIMDType xmm1( set( v1 ) );
1018 const SIMDType xmm2( set( v2 ) );
1019 const SIMDType xmm3( set( v3 ) );
1020 const SIMDType xmm4( set( v4 ) );
1021
1022 const size_t jbegin( ( IsUpper_v<MT1> )
1023 ?( prevMultiple( ( IsStrictlyUpper_v<MT1> ? i1+1UL : i1 ), SIMDSIZE ) )
1024 :( 0UL ) );
1025 const size_t jend( ( IsLower_v<MT1> )
1026 ?( IsStrictlyLower_v<MT1> ? i4 : i4+1UL )
1027 :( N ) );
1028 BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1029
1030 const size_t jpos( remainder ? prevMultiple( jend, SIMDSIZE ) : jend );
1031 BLAZE_INTERNAL_ASSERT( jpos <= jend, "Invalid end calculation" );
1032
1033 size_t j( jbegin );
1034
1035 for( ; j<jpos; j+=SIMDSIZE ) {
1036 y.store( j, y.load(j) + xmm1 * A.load(i1,j) + xmm2 * A.load(i2,j) + xmm3 * A.load(i3,j) + xmm4 * A.load(i4,j) );
1037 }
1038 for( ; remainder && j<jend; ++j ) {
1039 y[j] += v1 * A(i1,j) + v2 * A(i2,j) + v3 * A(i3,j) + v4 * A(i4,j);
1040 }
1041 }
1042 for( ; element!=end; ++element )
1043 {
1044 const size_t i1( element->index() );
1045 const VET v1( element->value() );
1046
1047 const SIMDType xmm1( set( v1 ) );
1048
1049 const size_t jbegin( ( IsUpper_v<MT1> )
1050 ?( prevMultiple( ( IsStrictlyUpper_v<MT1> ? i1+1UL : i1 ), SIMDSIZE ) )
1051 :( 0UL ) );
1052 const size_t jend( ( IsLower_v<MT1> )
1053 ?( IsStrictlyLower_v<MT1> ? i1 : i1+1UL )
1054 :( N ) );
1055 BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1056
1057 const size_t jpos( remainder ? prevMultiple( jend, SIMDSIZE ) : jend );
1058 BLAZE_INTERNAL_ASSERT( jpos <= jend, "Invalid end calculation" );
1059
1060 size_t j( jbegin );
1061
1062 for( ; j<jpos; j+=SIMDSIZE ) {
1063 y.store( j, y.load(j) + xmm1 * A.load(i1,j) );
1064 }
1065 for( ; remainder && j<jend; ++j ) {
1066 y[j] += v1 * A(i1,j);
1067 }
1068 }
1069 }
1070 /*! \endcond */
1071 //**********************************************************************************************
1072
1073 //**Addition assignment to sparse vectors*******************************************************
1074 // No special implementation for the addition assignment to sparse vectors.
1075 //**********************************************************************************************
1076
1077 //**Subtraction assignment to dense vectors*****************************************************
1078 /*!\brief Subtraction assignment of a transpose sparse vector-dense matrix multiplication to a
1079 // dense vector (\f$ \vec{y}^T-=\vec{x}^T*A \f$).
1080 // \ingroup dense_vector
1081 //
1082 // \param lhs The target left-hand side dense vector.
1083 // \param rhs The right-hand side multiplication expression to be subtracted.
1084 // \return void
1085 //
1086 // This function implements the performance optimized subtraction assignment of a transpose
1087 // sparse vector-dense matrix multiplication expression to a dense vector.
1088 */
1089 template< typename VT2 > // Type of the target dense vector
subAssign(DenseVector<VT2,true> & lhs,const TSVecDMatMultExpr & rhs)1090 friend inline void subAssign( DenseVector<VT2,true>& lhs, const TSVecDMatMultExpr& rhs )
1091 {
1092 BLAZE_FUNCTION_TRACE;
1093
1094 BLAZE_INTERNAL_ASSERT( (*lhs).size() == rhs.size(), "Invalid vector sizes" );
1095
1096 // Evaluation of the left-hand side sparse vector operand
1097 LT x( serial( rhs.vec_ ) );
1098 if( x.nonZeros() == 0UL ) return;
1099
1100 // Evaluation of the right-hand side dense matrix operand
1101 RT A( serial( rhs.mat_ ) );
1102
1103 // Checking the evaluated operands
1104 BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
1105 BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
1106 BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
1107 BLAZE_INTERNAL_ASSERT( A.columns() == (*lhs).size() , "Invalid vector size" );
1108
1109 // Performing the sparse vector-dense matrix multiplication
1110 TSVecDMatMultExpr::selectSubAssignKernel( *lhs, x, A );
1111 }
1112 //**********************************************************************************************
1113
1114 //**Default subtraction assignment to dense vectors*********************************************
1115 /*! \cond BLAZE_INTERNAL */
1116 /*!\brief Default subtraction assignment of a transpose sparse vector-dense matrix
1117 // multiplication (\f$ \vec{y}^T-=\vec{x}^T*A \f$).
1118 // \ingroup dense_vector
1119 //
1120 // \param y The target left-hand side dense vector.
1121 // \param x The left-hand side sparse vector operand.
1122 // \param A The right-hand side dense matrix operand.
1123 // \return void
1124 //
1125 // This function implements the default subtraction assignment kernel for the transpose
1126 // sparse vector-dense matrix multiplication.
1127 */
1128 template< typename VT1 // Type of the left-hand side target vector
1129 , typename VT2 // Type of the left-hand side vector operand
1130 , typename MT1 > // Type of the right-hand side matrix operand
1131 static inline auto selectSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1132 -> EnableIf_t< UseDefaultKernel_v<VT1,VT2,MT1> >
1133 {
1134 BLAZE_INTERNAL_ASSERT( x.nonZeros() != 0UL, "Invalid number of non-zero elements" );
1135
1136 const size_t N( A.columns() );
1137
1138 auto element( x.begin() );
1139 const auto end( x.end() );
1140
1141 for( ; element!=end; ++element )
1142 {
1143 const size_t index( element->index() );
1144
1145 if( IsDiagonal_v<MT1> )
1146 {
1147 y[index] -= A(index,index) * element->value();
1148 }
1149 else
1150 {
1151 const size_t jbegin( ( IsUpper_v<MT1> )
1152 ?( IsStrictlyUpper_v<MT1> ? index+1UL : index )
1153 :( 0UL ) );
1154 const size_t jend( ( IsLower_v<MT1> )
1155 ?( IsStrictlyLower_v<MT1> ? index : index+1UL )
1156 :( N ) );
1157 BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1158
1159 for( size_t j=jbegin; j<jend; ++j ) {
1160 y[j] -= element->value() * A(index,j);
1161 }
1162 }
1163 }
1164 }
1165 /*! \endcond */
1166 //**********************************************************************************************
1167
1168 //**Optimized subtraction assignment to dense vectors*******************************************
1169 /*! \cond BLAZE_INTERNAL */
1170 /*!\brief Optimized subtraction assignment of a transpose sparse vector-dense matrix
1171 // multiplication (\f$ \vec{y}^T-=\vec{x}^T*A \f$).
1172 // \ingroup dense_vector
1173 //
1174 // \param y The target left-hand side dense vector.
1175 // \param x The left-hand side sparse vector operand.
1176 // \param A The right-hand side dense matrix operand.
1177 // \return void
1178 //
1179 // This function implements the optimized subtraction assignment kernel for the transpose
1180 // sparse vector-dense matrix multiplication.
1181 */
1182 template< typename VT1 // Type of the left-hand side target vector
1183 , typename VT2 // Type of the left-hand side vector operand
1184 , typename MT1 > // Type of the right-hand side matrix operand
1185 static inline auto selectSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1186 -> EnableIf_t< UseOptimizedKernel_v<VT1,VT2,MT1> >
1187 {
1188 BLAZE_INTERNAL_ASSERT( x.nonZeros() != 0UL, "Invalid number of non-zero elements" );
1189
1190 const size_t N( A.columns() );
1191
1192 auto element( x.begin() );
1193 const auto end( x.end() );
1194
1195 const size_t ipos( prevMultiple( x.nonZeros(), 4UL ) );
1196 BLAZE_INTERNAL_ASSERT( ipos <= x.nonZeros(), "Invalid end calculation" );
1197
1198 for( size_t i=0UL; (i+4UL)<=ipos; i+=4UL )
1199 {
1200 const size_t i1( element->index() );
1201 const VET v1( element->value() );
1202 ++element;
1203 const size_t i2( element->index() );
1204 const VET v2( element->value() );
1205 ++element;
1206 const size_t i3( element->index() );
1207 const VET v3( element->value() );
1208 ++element;
1209 const size_t i4( element->index() );
1210 const VET v4( element->value() );
1211 ++element;
1212
1213 BLAZE_INTERNAL_ASSERT( i1 < i2 && i2 < i3 && i3 < i4, "Invalid sparse vector index detected" );
1214
1215 const size_t jbegin( ( IsUpper_v<MT1> )
1216 ?( IsStrictlyUpper_v<MT1> ? i+1UL : i1 )
1217 :( 0UL ) );
1218 const size_t jend( ( IsLower_v<MT1> )
1219 ?( IsStrictlyLower_v<MT1> ? i4 : i4+1UL )
1220 :( N ) );
1221 BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1222
1223 for( size_t j=jbegin; j<jend; ++j ) {
1224 y[j] -= v1 * A(i1,j) + v2 * A(i2,j) + v3 * A(i3,j) + v4 * A(i4,j);
1225 }
1226 }
1227 for( ; element!=end; ++element )
1228 {
1229 const size_t i1( element->index() );
1230 const VET v1( element->value() );
1231
1232 const size_t jbegin( ( IsUpper_v<MT1> )
1233 ?( IsStrictlyUpper_v<MT1> ? i1+1UL : i1 )
1234 :( 0UL ) );
1235 const size_t jend( ( IsLower_v<MT1> )
1236 ?( IsStrictlyLower_v<MT1> ? i1 : i1+1UL )
1237 :( N ) );
1238 BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1239
1240 for( size_t j=jbegin; j<jend; ++j ) {
1241 y[j] -= v1 * A(i1,j);
1242 }
1243 }
1244 }
1245 /*! \endcond */
1246 //**********************************************************************************************
1247
1248 //**Vectorized subtraction assignment to dense vectors******************************************
1249 /*! \cond BLAZE_INTERNAL */
1250 /*!\brief Vectorized subtraction assignment of a transpose sparse vector-dense matrix
1251 // multiplication (\f$ \vec{y}^T-=\vec{x}^T*A \f$).
1252 // \ingroup dense_vector
1253 //
1254 // \param y The target left-hand side dense vector.
1255 // \param x The left-hand side sparse vector operand.
1256 // \param A The right-hand side dense matrix operand.
1257 // \return void
1258 //
1259 // This function implements the vectorized subtraction assignment kernel for the transpose
1260 // sparse vector-dense matrix multiplication.
1261 */
1262 template< typename VT1 // Type of the left-hand side target vector
1263 , typename VT2 // Type of the left-hand side vector operand
1264 , typename MT1 > // Type of the right-hand side matrix operand
1265 static inline auto selectSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1266 -> EnableIf_t< UseVectorizedKernel_v<VT1,VT2,MT1> >
1267 {
1268 BLAZE_INTERNAL_ASSERT( x.nonZeros() != 0UL, "Invalid number of non-zero elements" );
1269
1270 constexpr bool remainder( !IsPadded_v<VT1> || !IsPadded_v<MT1> );
1271
1272 const size_t N( A.columns() );
1273
1274 auto element( x.begin() );
1275 const auto end( x.end() );
1276
1277 const size_t ipos( prevMultiple( x.nonZeros(), 4UL ) );
1278 BLAZE_INTERNAL_ASSERT( ipos <= x.nonZeros(), "Invalid end calculation" );
1279
1280 for( size_t i=0UL; (i+4UL)<=ipos; i+=4UL )
1281 {
1282 const size_t i1( element->index() );
1283 const VET v1( element->value() );
1284 ++element;
1285 const size_t i2( element->index() );
1286 const VET v2( element->value() );
1287 ++element;
1288 const size_t i3( element->index() );
1289 const VET v3( element->value() );
1290 ++element;
1291 const size_t i4( element->index() );
1292 const VET v4( element->value() );
1293 ++element;
1294
1295 BLAZE_INTERNAL_ASSERT( i1 < i2 && i2 < i3 && i3 < i4, "Invalid sparse vector index detected" );
1296
1297 const SIMDType xmm1( set( v1 ) );
1298 const SIMDType xmm2( set( v2 ) );
1299 const SIMDType xmm3( set( v3 ) );
1300 const SIMDType xmm4( set( v4 ) );
1301
1302 const size_t jbegin( ( IsUpper_v<MT1> )
1303 ?( prevMultiple( ( IsStrictlyUpper_v<MT1> ? i1+1UL : i1 ), SIMDSIZE ) )
1304 :( 0UL ) );
1305 const size_t jend( ( IsLower_v<MT1> )
1306 ?( IsStrictlyLower_v<MT1> ? i4 : i4+1UL )
1307 :( N ) );
1308 BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1309
1310 const size_t jpos( remainder ? prevMultiple( jend, SIMDSIZE ) : jend );
1311 BLAZE_INTERNAL_ASSERT( jpos <= jend, "Invalid end calculation" );
1312
1313 size_t j( jbegin );
1314
1315 for( ; j<jpos; j+=SIMDSIZE ) {
1316 y.store( j, y.load(j) - xmm1 * A.load(i1,j) - xmm2 * A.load(i2,j) - xmm3 * A.load(i3,j) - xmm4 * A.load(i4,j) );
1317 }
1318 for( ; remainder && j<jend; ++j ) {
1319 y[j] -= v1 * A(i1,j) + v2 * A(i2,j) + v3 * A(i3,j) + v4 * A(i4,j);
1320 }
1321 }
1322 for( ; element!=end; ++element )
1323 {
1324 const size_t i1( element->index() );
1325 const VET v1( element->value() );
1326
1327 const SIMDType xmm1( set( v1 ) );
1328
1329 const size_t jbegin( ( IsUpper_v<MT1> )
1330 ?( prevMultiple( ( IsStrictlyUpper_v<MT1> ? i1+1UL : i1 ), SIMDSIZE ) )
1331 :( 0UL ) );
1332 const size_t jend( ( IsLower_v<MT1> )
1333 ?( IsStrictlyLower_v<MT1> ? i1 : i1+1UL )
1334 :( N ) );
1335 BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1336
1337 const size_t jpos( remainder ? prevMultiple( jend, SIMDSIZE ) : jend );
1338 BLAZE_INTERNAL_ASSERT( jpos <= jend, "Invalid end calculation" );
1339
1340 size_t j( jbegin );
1341
1342 for( ; j<jpos; j+=SIMDSIZE ) {
1343 y.store( j, y.load(j) - xmm1 * A.load(i1,j) );
1344 }
1345 for( ; remainder && j<jend; ++j ) {
1346 y[j] -= v1 * A(i1,j);
1347 }
1348 }
1349 }
1350 /*! \endcond */
1351 //**********************************************************************************************
1352
1353 //**Subtraction assignment to sparse vectors****************************************************
1354 // No special implementation for the subtraction assignment to sparse vectors.
1355 //**********************************************************************************************
1356
1357 //**Multiplication assignment to dense vectors**************************************************
1358 /*!\brief Multiplication assignment of a transpose sparse vector-dense matrix multiplication
1359 // to a dense vector (\f$ \vec{y}^T*=\vec{x}^T*A \f$).
1360 // \ingroup dense_vector
1361 //
1362 // \param lhs The target left-hand side dense vector.
1363 // \param rhs The right-hand side multiplication expression to be multiplied.
1364 // \return void
1365 //
1366 // This function implements the performance optimized multiplication assignment of a transpose
1367 // sparse vector-dense matrix multiplication expression to a dense vector.
1368 */
1369 template< typename VT2 > // Type of the target dense vector
multAssign(DenseVector<VT2,true> & lhs,const TSVecDMatMultExpr & rhs)1370 friend inline void multAssign( DenseVector<VT2,true>& lhs, const TSVecDMatMultExpr& rhs )
1371 {
1372 BLAZE_FUNCTION_TRACE;
1373
1374 BLAZE_CONSTRAINT_MUST_BE_DENSE_VECTOR_TYPE( ResultType );
1375 BLAZE_CONSTRAINT_MUST_BE_ROW_VECTOR_TYPE( ResultType );
1376 BLAZE_CONSTRAINT_MUST_NOT_REQUIRE_EVALUATION( ResultType );
1377
1378 BLAZE_INTERNAL_ASSERT( (*lhs).size() == rhs.size(), "Invalid vector sizes" );
1379
1380 const ResultType tmp( serial( rhs ) );
1381 multAssign( *lhs, tmp );
1382 }
1383 //**********************************************************************************************
1384
1385 //**Multiplication assignment to sparse vectors*************************************************
1386 // No special implementation for the multiplication assignment to sparse vectors.
1387 //**********************************************************************************************
1388
1389 //**Division assignment to dense vectors********************************************************
1390 /*!\brief Division assignment of a transpose sparse vector-dense matrix multiplication to a
1391 // dense vector (\f$ \vec{y}^T/=\vec{x}^T*A \f$).
1392 // \ingroup dense_vector
1393 //
1394 // \param lhs The target left-hand side dense vector.
1395 // \param rhs The right-hand side multiplication expression divisor.
1396 // \return void
1397 //
1398 // This function implements the performance optimized division assignment of a transpose sparse
1399 // vector-dense matrix multiplication expression to a dense vector.
1400 */
1401 template< typename VT2 > // Type of the target dense vector
divAssign(DenseVector<VT2,true> & lhs,const TSVecDMatMultExpr & rhs)1402 friend inline void divAssign( DenseVector<VT2,true>& lhs, const TSVecDMatMultExpr& rhs )
1403 {
1404 BLAZE_FUNCTION_TRACE;
1405
1406 BLAZE_CONSTRAINT_MUST_BE_DENSE_VECTOR_TYPE( ResultType );
1407 BLAZE_CONSTRAINT_MUST_BE_ROW_VECTOR_TYPE( ResultType );
1408 BLAZE_CONSTRAINT_MUST_NOT_REQUIRE_EVALUATION( ResultType );
1409
1410 BLAZE_INTERNAL_ASSERT( (*lhs).size() == rhs.size(), "Invalid vector sizes" );
1411
1412 const ResultType tmp( serial( rhs ) );
1413 divAssign( *lhs, tmp );
1414 }
1415 //**********************************************************************************************
1416
1417 //**Division assignment to sparse vectors*******************************************************
1418 // No special implementation for the division assignment to sparse vectors.
1419 //**********************************************************************************************
1420
1421 //**SMP assignment to dense vectors*************************************************************
1422 /*! \cond BLAZE_INTERNAL */
1423 /*!\brief SMP assignment of a transpose sparse vector-dense matrix multiplication to a dense
1424 // vector (\f$ \vec{y}^T=\vec{x}^T*A \f$).
1425 // \ingroup dense_vector
1426 //
1427 // \param lhs The target left-hand side dense vector.
1428 // \param rhs The right-hand side multiplication expression to be assigned.
1429 // \return void
1430 //
1431 // This function implements the performance optimized SMP assignment of a transpose sparse
1432 // vector-dense matrix multiplication expression to a dense vector. Due to the explicit
1433 // application of the SFINAE principle, this function can only be selected by the compiler
1434 // in case the expression specific parallel evaluation strategy is selected.
1435 */
1436 template< typename VT2 > // Type of the target dense vector
1437 friend inline auto smpAssign( DenseVector<VT2,true>& lhs, const TSVecDMatMultExpr& rhs )
1438 -> EnableIf_t< UseSMPAssign_v<VT2> >
1439 {
1440 BLAZE_FUNCTION_TRACE;
1441
1442 BLAZE_INTERNAL_ASSERT( (*lhs).size() == rhs.size(), "Invalid vector sizes" );
1443
1444 // Evaluation of the left-hand side sparse vector operand
1445 LT x( rhs.vec_ );
1446 if( x.nonZeros() == 0UL ) {
1447 reset( *lhs );
1448 return;
1449 }
1450
1451 // Evaluation of the right-hand side dense matrix operand
1452 RT A( rhs.mat_ );
1453
1454 // Checking the evaluated operands
1455 BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
1456 BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
1457 BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
1458 BLAZE_INTERNAL_ASSERT( A.columns() == (*lhs).size() , "Invalid vector size" );
1459
1460 // Performing the sparse vector-dense matrix multiplication
1461 smpAssign( *lhs, x * A );
1462 }
1463 /*! \endcond */
1464 //**********************************************************************************************
1465
1466 //**SMP assignment to sparse vectors************************************************************
1467 /*! \cond BLAZE_INTERNAL */
1468 /*!\brief SMP assignment of a transpose sparse vector-dense matrix multiplication to a sparse
1469 // vector (\f$ \vec{y}^T=\vec{x}^T*A \f$).
1470 // \ingroup dense_vector
1471 //
1472 // \param lhs The target left-hand side sparse vector.
1473 // \param rhs The right-hand side multiplication expression to be assigned.
1474 // \return void
1475 //
1476 // This function implements the performance optimized SMP assignment of a transpose sparse
1477 // vector-dense matrix multiplication expression to a sparse vector. Due to the explicit
1478 // application of the SFINAE principle, this function can only be selected by the compiler
1479 // in case the expression specific parallel evaluation strategy is selected.
1480 */
1481 template< typename VT2 > // Type of the target sparse vector
1482 friend inline auto smpAssign( SparseVector<VT2,true>& lhs, const TSVecDMatMultExpr& rhs )
1483 -> EnableIf_t< UseSMPAssign_v<VT2> >
1484 {
1485 BLAZE_FUNCTION_TRACE;
1486
1487 BLAZE_CONSTRAINT_MUST_BE_DENSE_VECTOR_TYPE( ResultType );
1488 BLAZE_CONSTRAINT_MUST_BE_ROW_VECTOR_TYPE( ResultType );
1489 BLAZE_CONSTRAINT_MUST_NOT_REQUIRE_EVALUATION( ResultType );
1490
1491 BLAZE_INTERNAL_ASSERT( (*lhs).size() == rhs.size(), "Invalid vector sizes" );
1492
1493 const ResultType tmp( rhs );
1494 smpAssign( *lhs, tmp );
1495 }
1496 /*! \endcond */
1497 //**********************************************************************************************
1498
1499 //**SMP addition assignment to dense vectors****************************************************
1500 /*!\brief SMP addition assignment of a transpose sparse vector-dense matrix multiplication to
1501 // a dense vector (\f$ \vec{y}^T+=\vec{x}^T*A \f$).
1502 // \ingroup dense_vector
1503 //
1504 // \param lhs The target left-hand side dense vector.
1505 // \param rhs The right-hand side multiplication expression to be added.
1506 // \return void
1507 //
1508 // This function implements the performance optimized SMP addition assignment of a transpose
1509 // sparse vector-dense matrix multiplication expression to a dense vector. Due to the explicit
1510 // application of the SFINAE principle, this function can only be selected by the compiler
1511 // in case the expression specific parallel evaluation strategy is selected.
1512 */
1513 template< typename VT2 > // Type of the target dense vector
1514 friend inline auto smpAddAssign( DenseVector<VT2,true>& lhs, const TSVecDMatMultExpr& rhs )
1515 -> EnableIf_t< UseSMPAssign_v<VT2> >
1516 {
1517 BLAZE_FUNCTION_TRACE;
1518
1519 BLAZE_INTERNAL_ASSERT( (*lhs).size() == rhs.size(), "Invalid vector sizes" );
1520
1521 // Evaluation of the left-hand side sparse vector operand
1522 LT x( rhs.vec_ );
1523 if( x.nonZeros() == 0UL ) return;
1524
1525 // Evaluation of the right-hand side dense matrix operand
1526 RT A( rhs.mat_ );
1527
1528 // Checking the evaluated operands
1529 BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
1530 BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
1531 BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
1532 BLAZE_INTERNAL_ASSERT( A.columns() == (*lhs).size() , "Invalid vector size" );
1533
1534 // Performing the sparse vector-dense matrix multiplication
1535 smpAddAssign( *lhs, x * A );
1536 }
1537 //**********************************************************************************************
1538
1539 //**SMP addition assignment to sparse vectors***************************************************
1540 // No special implementation for the SMP addition assignment to sparse vectors.
1541 //**********************************************************************************************
1542
1543 //**SMP subtraction assignment to dense vectors*************************************************
1544 /*!\brief SMP subtraction assignment of a transpose sparse vector-dense matrix multiplication
1545 // to a dense vector (\f$ \vec{y}^T-=\vec{x}^T*A \f$).
1546 // \ingroup dense_vector
1547 //
1548 // \param lhs The target left-hand side dense vector.
1549 // \param rhs The right-hand side multiplication expression to be subtracted.
1550 // \return void
1551 //
1552 // This function implements the performance optimized SMP subtraction assignment of a transpose
1553 // sparse vector-dense matrix multiplication expression to a dense vector. Due to the explicit
1554 // application of the SFINAE principle, this function can only be selected by the compiler
1555 // in case the expression specific parallel evaluation strategy is selected.
1556 */
1557 template< typename VT2 > // Type of the target dense vector
1558 friend inline auto smpSubAssign( DenseVector<VT2,true>& lhs, const TSVecDMatMultExpr& rhs )
1559 -> EnableIf_t< UseSMPAssign_v<VT2> >
1560 {
1561 BLAZE_FUNCTION_TRACE;
1562
1563 BLAZE_INTERNAL_ASSERT( (*lhs).size() == rhs.size(), "Invalid vector sizes" );
1564
1565 // Evaluation of the left-hand side sparse vector operand
1566 LT x( rhs.vec_ );
1567 if( x.nonZeros() == 0UL ) return;
1568
1569 // Evaluation of the right-hand side dense matrix operand
1570 RT A( rhs.mat_ );
1571
1572 // Checking the evaluated operands
1573 BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
1574 BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
1575 BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
1576 BLAZE_INTERNAL_ASSERT( A.columns() == (*lhs).size() , "Invalid vector size" );
1577
1578 // Performing the sparse vector-dense matrix multiplication
1579 smpSubAssign( *lhs, x * A );
1580 }
1581 //**********************************************************************************************
1582
1583 //**SMP subtraction assignment to sparse vectors************************************************
1584 // No special implementation for the SMP subtraction assignment to sparse vectors.
1585 //**********************************************************************************************
1586
1587 //**SMP multiplication assignment to dense vectors**********************************************
1588 /*!\brief SMP multiplication assignment of a transpose sparse vector-dense matrix multiplication
1589 // to a dense vector (\f$ \vec{y}^T*=\vec{x}^T*A \f$).
1590 // \ingroup dense_vector
1591 //
1592 // \param lhs The target left-hand side dense vector.
1593 // \param rhs The right-hand side multiplication expression to be multiplied.
1594 // \return void
1595 //
1596 // This function implements the performance optimized SMP multiplication assignment of a
1597 // transpose sparse vector-dense matrix multiplication expression to a dense vector. Due
1598 // to the explicit application of the SFINAE principle, this function can only be selected
1599 // by the compiler in case the expression specific parallel evaluation strategy is selected.
1600 */
1601 template< typename VT2 > // Type of the target dense vector
1602 friend inline auto smpMultAssign( DenseVector<VT2,true>& lhs, const TSVecDMatMultExpr& rhs )
1603 -> EnableIf_t< UseSMPAssign_v<VT2> >
1604 {
1605 BLAZE_FUNCTION_TRACE;
1606
1607 BLAZE_CONSTRAINT_MUST_BE_DENSE_VECTOR_TYPE( ResultType );
1608 BLAZE_CONSTRAINT_MUST_BE_ROW_VECTOR_TYPE( ResultType );
1609 BLAZE_CONSTRAINT_MUST_NOT_REQUIRE_EVALUATION( ResultType );
1610
1611 BLAZE_INTERNAL_ASSERT( (*lhs).size() == rhs.size(), "Invalid vector sizes" );
1612
1613 const ResultType tmp( rhs );
1614 smpMultAssign( *lhs, tmp );
1615 }
1616 //**********************************************************************************************
1617
1618 //**SMP multiplication assignment to sparse vectors*********************************************
1619 // No special implementation for the SMP multiplication assignment to sparse vectors.
1620 //**********************************************************************************************
1621
1622 //**SMP division assignment to dense vectors****************************************************
1623 /*!\brief SMP division assignment of a transpose sparse vector-dense matrix multiplication to
1624 // a dense vector (\f$ \vec{y}^T/=\vec{x}^T*A \f$).
1625 // \ingroup dense_vector
1626 //
1627 // \param lhs The target left-hand side dense vector.
1628 // \param rhs The right-hand side multiplication expression divisor.
1629 // \return void
1630 //
1631 // This function implements the performance optimized SMP division assignment of a transpose
1632 // sparse vector-dense matrix multiplication expression to a dense vector. Due to the explicit
1633 // application of the SFINAE principle, this function can only be selected by the compiler in
1634 // case the expression specific parallel evaluation strategy is selected.
1635 */
1636 template< typename VT2 > // Type of the target dense vector
1637 friend inline auto smpDivAssign( DenseVector<VT2,true>& lhs, const TSVecDMatMultExpr& rhs )
1638 -> EnableIf_t< UseSMPAssign_v<VT2> >
1639 {
1640 BLAZE_FUNCTION_TRACE;
1641
1642 BLAZE_CONSTRAINT_MUST_BE_DENSE_VECTOR_TYPE( ResultType );
1643 BLAZE_CONSTRAINT_MUST_BE_ROW_VECTOR_TYPE( ResultType );
1644 BLAZE_CONSTRAINT_MUST_NOT_REQUIRE_EVALUATION( ResultType );
1645
1646 BLAZE_INTERNAL_ASSERT( (*lhs).size() == rhs.size(), "Invalid vector sizes" );
1647
1648 const ResultType tmp( rhs );
1649 smpDivAssign( *lhs, tmp );
1650 }
1651 //**********************************************************************************************
1652
1653 //**SMP division assignment to sparse vectors***************************************************
1654 // No special implementation for the SMP division assignment to sparse vectors.
1655 //**********************************************************************************************
1656
1657 //**Compile time checks*************************************************************************
1658 /*! \cond BLAZE_INTERNAL */
1659 BLAZE_CONSTRAINT_MUST_BE_SPARSE_VECTOR_TYPE( VT );
1660 BLAZE_CONSTRAINT_MUST_BE_ROW_VECTOR_TYPE( VT );
1661 BLAZE_CONSTRAINT_MUST_NOT_BE_ZERO_TYPE( VT );
1662 BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE( MT );
1663 BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE( MT );
1664 BLAZE_CONSTRAINT_MUST_FORM_VALID_TVECMATMULTEXPR( VT, MT );
1665 /*! \endcond */
1666 //**********************************************************************************************
1667 };
1668 //*************************************************************************************************
1669
1670
1671
1672
1673 //=================================================================================================
1674 //
1675 // GLOBAL BINARY ARITHMETIC OPERATORS
1676 //
1677 //=================================================================================================
1678
1679 //*************************************************************************************************
1680 /*! \cond BLAZE_INTERNAL */
1681 /*!\brief Backend implementation of the multiplication of a transpose sparse vector
1682 // and a row-major dense matrix (\f$ \vec{a}=B*\vec{c} \f$).
1683 // \ingroup dense_vector
1684 //
1685 // \param vec The left-hand side transpose sparse vector for the multiplication.
1686 // \param mat The right-hand side row-major dense matrix for the multiplication.
1687 // \return The resulting transpose vector.
1688 //
1689 // This function implements the performance optimized treatment of the multiplication of a
1690 // transpose sparse vector and a row-major dense matrix.
1691 */
1692 template< typename VT // Type of the left-hand side sparse vector
1693 , typename MT // Type of the right-hand side dense matrix
1694 , DisableIf_t< IsZero_v<VT> >* = nullptr >
1695 inline const TSVecDMatMultExpr<VT,MT>
tsvecdmatmult(const SparseVector<VT,true> & vec,const DenseMatrix<MT,false> & mat)1696 tsvecdmatmult( const SparseVector<VT,true>& vec, const DenseMatrix<MT,false>& mat )
1697 {
1698 BLAZE_FUNCTION_TRACE;
1699
1700 BLAZE_INTERNAL_ASSERT( (*vec).size() == (*mat).rows(), "Invalid vector and matrix sizes" );
1701
1702 return TSVecDMatMultExpr<VT,MT>( *vec, *mat );
1703 }
1704 /*! \endcond */
1705 //*************************************************************************************************
1706
1707
1708 //*************************************************************************************************
1709 /*! \cond BLAZE_INTERNAL */
1710 /*!\brief Backend implementation of the multiplication of a transpose zero vector
1711 // and a row-major dense matrix (\f$ \vec{a}=B*\vec{c} \f$).
1712 // \ingroup dense_vector
1713 //
1714 // \param vec The left-hand side transpose zero vector for the multiplication.
1715 // \param mat The right-hand side row-major dense matrix for the multiplication.
1716 // \return The resulting zero vector.
1717 //
1718 // This function implements the performance optimized treatment of the multiplication of a
1719 // transpose zero vector and a row-major dense matrix. It returns a zero vector.
1720 */
1721 template< typename VT // Type of the left-hand side sparse vector
1722 , typename MT // Type of the right-hand side dense matrix
1723 , EnableIf_t< IsZero_v<VT> >* = nullptr >
decltype(auto)1724 inline decltype(auto)
1725 tsvecdmatmult( const SparseVector<VT,true>& vec, const DenseMatrix<MT,false>& mat )
1726 {
1727 BLAZE_FUNCTION_TRACE;
1728
1729 MAYBE_UNUSED( vec );
1730
1731 BLAZE_INTERNAL_ASSERT( (*vec).size() == (*mat).rows(), "Invalid vector and matrix sizes" );
1732
1733 using ReturnType = const MultTrait_t< ResultType_t<VT>, ResultType_t<MT> >;
1734
1735 BLAZE_CONSTRAINT_MUST_BE_ROW_VECTOR_TYPE( ReturnType );
1736 BLAZE_CONSTRAINT_MUST_BE_ZERO_TYPE( ReturnType );
1737
1738 return ReturnType( (*mat).columns() );
1739 }
1740 /*! \endcond */
1741 //*************************************************************************************************
1742
1743
1744 //*************************************************************************************************
1745 /*!\brief Multiplication operator for the multiplication of a transpose sparse vector and a
1746 // row-major dense matrix (\f$ \vec{y}^T=\vec{x}^T*A \f$).
1747 // \ingroup dense_matrix
1748 //
1749 // \param vec The left-hand side transpose sparse vector for the multiplication.
1750 // \param mat The right-hand side row-major dense matrix for the multiplication.
1751 // \return The resulting transpose vector.
1752 // \exception std::invalid_argument Vector and matrix sizes do not match.
1753 //
1754 // This operator represents the multiplication between a transpose sparse vector and a row-major
1755 // dense matrix:
1756
1757 \code
1758 using blaze::rowVector;
1759 using blaze::rowMajor;
1760
1761 blaze::CompressedVector<double,rowVector> x, y;
1762 blaze::DynamicMatrix<double,rowMajor> A;
1763 // ... Resizing and initialization
1764 y = x * A;
1765 \endcode
1766
1767 // The operator returns an expression representing a transpose sparse vector of the higher-order
1768 // element type of the two involved element types \a VT::ElementType and \a MT::ElementType.
1769 // Both the dense matrix type \a VT and the dense vector type \a MT as well as the two element
1770 // types \a VT::ElementType and \a MT::ElementType have to be supported by the MultTrait class
1771 // template.\n
1772 // In case the current size of the vector \a vec doesn't match the current number of rows of
1773 // the matrix \a mat, a \a std::invalid_argument is thrown.
1774 */
1775 template< typename VT // Type of the left-hand side sparse vector
1776 , typename MT > // Type of the right-hand side dense matrix
decltype(auto)1777 inline decltype(auto)
1778 operator*( const SparseVector<VT,true>& vec, const DenseMatrix<MT,false>& mat )
1779 {
1780 BLAZE_FUNCTION_TRACE;
1781
1782 BLAZE_CONSTRAINT_MUST_NOT_BE_MATMATMULTEXPR_TYPE( MT );
1783
1784 if( (*vec).size() != (*mat).rows() ) {
1785 BLAZE_THROW_INVALID_ARGUMENT( "Vector and matrix sizes do not match" );
1786 }
1787
1788 return tsvecdmatmult( *vec, *mat );
1789 }
1790 //*************************************************************************************************
1791
1792
1793
1794
1795 //=================================================================================================
1796 //
1797 // ISALIGNED SPECIALIZATIONS
1798 //
1799 //=================================================================================================
1800
1801 //*************************************************************************************************
1802 /*! \cond BLAZE_INTERNAL */
1803 template< typename VT, typename MT >
1804 struct IsAligned< TSVecDMatMultExpr<VT,MT> >
1805 : public IsAligned<MT>
1806 {};
1807 /*! \endcond */
1808 //*************************************************************************************************
1809
1810 } // namespace blaze
1811
1812 #endif
1813