1 //=================================================================================================
2 /*!
3 //  \file blaze/math/expressions/TSVecDMatMultExpr.h
4 //  \brief Header file for the transpose sparse vector/dense matrix multiplication expression
5 //
6 //  Copyright (C) 2012-2020 Klaus Iglberger - All Rights Reserved
7 //
8 //  This file is part of the Blaze library. You can redistribute it and/or modify it under
9 //  the terms of the New (Revised) BSD License. Redistribution and use in source and binary
10 //  forms, with or without modification, are permitted provided that the following conditions
11 //  are met:
12 //
13 //  1. Redistributions of source code must retain the above copyright notice, this list of
14 //     conditions and the following disclaimer.
15 //  2. Redistributions in binary form must reproduce the above copyright notice, this list
16 //     of conditions and the following disclaimer in the documentation and/or other materials
17 //     provided with the distribution.
18 //  3. Neither the names of the Blaze development group nor the names of its contributors
19 //     may be used to endorse or promote products derived from this software without specific
20 //     prior written permission.
21 //
22 //  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
23 //  EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
24 //  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
25 //  SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 //  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
27 //  TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
28 //  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 //  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30 //  ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
31 //  DAMAGE.
32 */
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_EXPRESSIONS_TSVECDMATMULTEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_TSVECDMATMULTEXPR_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
43 #include <blaze/math/Aliases.h>
44 #include <blaze/math/constraints/DenseMatrix.h>
45 #include <blaze/math/constraints/DenseVector.h>
46 #include <blaze/math/constraints/MatMatMultExpr.h>
47 #include <blaze/math/constraints/RequiresEvaluation.h>
48 #include <blaze/math/constraints/RowMajorMatrix.h>
49 #include <blaze/math/constraints/RowVector.h>
50 #include <blaze/math/constraints/SparseVector.h>
51 #include <blaze/math/constraints/TVecMatMultExpr.h>
52 #include <blaze/math/constraints/Zero.h>
53 #include <blaze/math/Exception.h>
54 #include <blaze/math/expressions/Computation.h>
55 #include <blaze/math/expressions/DenseVector.h>
56 #include <blaze/math/expressions/Forward.h>
57 #include <blaze/math/expressions/MatMatMultExpr.h>
58 #include <blaze/math/expressions/TVecMatMultExpr.h>
59 #include <blaze/math/shims/PrevMultiple.h>
60 #include <blaze/math/shims/Reset.h>
61 #include <blaze/math/shims/Serial.h>
62 #include <blaze/math/SIMD.h>
63 #include <blaze/math/traits/MultTrait.h>
64 #include <blaze/math/typetraits/HasSIMDAdd.h>
65 #include <blaze/math/typetraits/HasSIMDMult.h>
66 #include <blaze/math/typetraits/IsAligned.h>
67 #include <blaze/math/typetraits/IsComputation.h>
68 #include <blaze/math/typetraits/IsDiagonal.h>
69 #include <blaze/math/typetraits/IsExpression.h>
70 #include <blaze/math/typetraits/IsLower.h>
71 #include <blaze/math/typetraits/IsPadded.h>
72 #include <blaze/math/typetraits/IsResizable.h>
73 #include <blaze/math/typetraits/IsSIMDCombinable.h>
74 #include <blaze/math/typetraits/IsStrictlyLower.h>
75 #include <blaze/math/typetraits/IsStrictlyUpper.h>
76 #include <blaze/math/typetraits/IsUpper.h>
77 #include <blaze/math/typetraits/IsZero.h>
78 #include <blaze/math/typetraits/RequiresEvaluation.h>
79 #include <blaze/math/views/Check.h>
80 #include <blaze/system/MacroDisable.h>
81 #include <blaze/system/Optimizations.h>
82 #include <blaze/system/Thresholds.h>
83 #include <blaze/util/Assert.h>
84 #include <blaze/util/EnableIf.h>
85 #include <blaze/util/FunctionTrace.h>
86 #include <blaze/util/MaybeUnused.h>
87 #include <blaze/util/mpl/If.h>
88 #include <blaze/util/Types.h>
89 
90 
91 namespace blaze {
92 
93 //=================================================================================================
94 //
95 //  CLASS TSVECDMATMULTEXPR
96 //
97 //=================================================================================================
98 
99 //*************************************************************************************************
100 /*!\brief Expression object for transpose sparse vector-dense matrix multiplications.
101 // \ingroup dense_vector_expression
102 //
103 // The TSVecDMatMultExpr class represents the compile time expression for multiplications
104 // between transpose sparse vectors and row-major dense matrices.
105 */
106 template< typename VT    // Type of the left-hand side sparse vector
107         , typename MT >  // Type of the right-hand side dense matrix
108 class TSVecDMatMultExpr
109    : public TVecMatMultExpr< DenseVector< TSVecDMatMultExpr<VT,MT>, true > >
110    , private Computation
111 {
112  private:
113    //**Type definitions****************************************************************************
114    using VRT = ResultType_t<VT>;     //!< Result type of the left-hand side sparse vector expression.
115    using MRT = ResultType_t<MT>;     //!< Result type of the right-hand side dense matrix expression.
116    using VET = ElementType_t<VRT>;   //!< Element type of the left-hand side sparse vector expression.
117    using MET = ElementType_t<MRT>;   //!< Element type of the right-hand side dense matrix expression.
118    using VCT = CompositeType_t<VT>;  //!< Composite type of the left-hand side sparse vector expression.
119    using MCT = CompositeType_t<MT>;  //!< Composite type of the right-hand side dense matrix expression.
120    //**********************************************************************************************
121 
122    //**********************************************************************************************
123    //! Compilation switch for the composite type of the left-hand side sparse vector expression.
124    static constexpr bool evaluateVector = ( IsComputation_v<VT> || RequiresEvaluation_v<VT> );
125    //**********************************************************************************************
126 
127    //**********************************************************************************************
128    //! Compilation switch for the composite type of the right-hand side dense matrix expression.
129    static constexpr bool evaluateMatrix = RequiresEvaluation_v<MT>;
130    //**********************************************************************************************
131 
132    //**********************************************************************************************
133    /*! \cond BLAZE_INTERNAL */
134    //! Helper variable template for the explicit application of the SFINAE principle.
135    /*! This variable template is a helper for the selection of the parallel evaluation strategy.
136        In case either the vector or the matrix operand requires an intermediate evaluation, the
137        variable will be set to 1, otherwise it will be 0. */
138    template< typename T1 >
139    static constexpr bool UseSMPAssign_v = ( evaluateVector || evaluateMatrix );
140    /*! \endcond */
141    //**********************************************************************************************
142 
143    //**********************************************************************************************
144    /*! \cond BLAZE_INTERNAL */
145    //! Helper variable template for the explicit application of the SFINAE principle.
146    /*! In case the matrix type and the two involved vector types are suited for a vectorized
147        computation of the vector/matrix multiplication, the variable will be set to 1, otherwise
148        it will be 0. */
149    template< typename T1, typename T2, typename T3 >
150    static constexpr bool UseVectorizedKernel_v =
151       ( useOptimizedKernels &&
152         !IsDiagonal_v<T3> &&
153         T1::simdEnabled && T3::simdEnabled &&
154         IsSIMDCombinable_v< ElementType_t<T1>
155                           , ElementType_t<T2>
156                           , ElementType_t<T3> > &&
157         HasSIMDAdd_v< ElementType_t<T2>, ElementType_t<T3> > &&
158         HasSIMDMult_v< ElementType_t<T2>, ElementType_t<T3> > );
159    /*! \endcond */
160    //**********************************************************************************************
161 
162    //**********************************************************************************************
163    /*! \cond BLAZE_INTERNAL */
164    //! Helper variable template for the explicit application of the SFINAE principle.
165    /*! In case a vectorized computation of the vector/matrix multiplication is not possible, but
166        a loop-unrolled computation is feasible, the variable will be set to 1, otherwise it will
167        be 0. */
168    template< typename T1, typename T2, typename T3 >
169    static constexpr bool UseOptimizedKernel_v =
170       ( useOptimizedKernels &&
171         !UseVectorizedKernel_v<T1,T2,T3> &&
172         !IsDiagonal_v<T3> &&
173         !IsResizable_v< ElementType_t<T1> > &&
174         !IsResizable_v<VET> );
175    /*! \endcond */
176    //**********************************************************************************************
177 
178    //**********************************************************************************************
179    /*! \cond BLAZE_INTERNAL */
180    //! Helper variable template for the explicit application of the SFINAE principle.
181    /*! In case neither a vectorized nor optimized computation is possible, the variable will be
182        set to 1, otherwise it will be 0. */
183    template< typename T1, typename T2, typename T3 >
184    static constexpr bool UseDefaultKernel_v =
185       ( !UseVectorizedKernel_v<T1,T2,T3> && !UseOptimizedKernel_v<T1,T2,T3> );
186    /*! \endcond */
187    //**********************************************************************************************
188 
189  public:
190    //**Type definitions****************************************************************************
191    //! Type of this TSVecDMatMultExpr instance.
192    using This = TSVecDMatMultExpr<VT,MT>;
193 
194    //! Base type of this TSVecDMatMultExpr instance.
195    using BaseType = TVecMatMultExpr< DenseVector<This,true> >;
196 
197    using ResultType    = MultTrait_t<VRT,MRT>;         //!< Result type for expression template evaluations.
198    using TransposeType = TransposeType_t<ResultType>;  //!< Transpose type for expression template evaluations.
199    using ElementType   = ElementType_t<ResultType>;    //!< Resulting element type.
200    using SIMDType      = SIMDTrait_t<ElementType>;     //!< Resulting SIMD element type.
201    using ReturnType    = const ElementType;            //!< Return type for expression template evaluations.
202    using CompositeType = const ResultType;             //!< Data type for composite expression templates.
203 
204    //! Composite type of the left-hand side sparse vector expression.
205    using LeftOperand = If_t< IsExpression_v<VT>, const VT, const VT& >;
206 
207    //! Composite type of the right-hand side sparse matrix expression.
208    using RightOperand = If_t< IsExpression_v<MT>, const MT, const MT& >;
209 
210    //! Type for the assignment of the left-hand side sparse vector operand.
211    using LT = If_t< evaluateVector, const VRT, VCT >;
212 
213    //! Type for the assignment of the right-hand side dense matrix operand.
214    using RT = If_t< evaluateMatrix, const MRT, MCT >;
215    //**********************************************************************************************
216 
217    //**Compilation flags***************************************************************************
218    //! Compilation switch for the expression template evaluation strategy.
219    static constexpr bool simdEnabled =
220       ( !IsDiagonal_v<MT> &&
221         MT::simdEnabled &&
222         HasSIMDAdd_v<VET,MET> &&
223         HasSIMDMult_v<VET,MET> );
224 
225    //! Compilation switch for the expression template assignment strategy.
226    static constexpr bool smpAssignable =
227       ( !evaluateVector && VT::smpAssignable && !evaluateMatrix && MT::smpAssignable );
228    //**********************************************************************************************
229 
230    //**SIMD properties*****************************************************************************
231    //! The number of elements packed within a single SIMD element.
232    static constexpr size_t SIMDSIZE = SIMDTrait<ElementType>::size;
233    //**********************************************************************************************
234 
235    //**Constructor*********************************************************************************
236    /*!\brief Constructor for the TSVecDMatMultExpr class.
237    //
238    // \param vec The left-hand side sparse vector operand of the multiplication expression.
239    // \param mat The right-hand side dense matrix operand of the multiplication expression.
240    */
TSVecDMatMultExpr(const VT & vec,const MT & mat)241    inline TSVecDMatMultExpr( const VT& vec, const MT& mat ) noexcept
242       : vec_( vec )  // Left-hand side sparse vector of the multiplication expression
243       , mat_( mat )  // Right-hand side dense matrix of the multiplication expression
244    {
245       BLAZE_INTERNAL_ASSERT( vec_.size() == mat_.rows(), "Invalid vector and matrix sizes" );
246    }
247    //**********************************************************************************************
248 
249    //**Subscript operator**************************************************************************
250    /*!\brief Subscript operator for the direct access to the vector elements.
251    //
252    // \param index Access index. The index has to be in the range \f$[0..N-1]\f$.
253    // \return The resulting value.
254    */
255    inline ReturnType operator[]( size_t index ) const {
256       BLAZE_INTERNAL_ASSERT( index < mat_.columns(), "Invalid vector access index" );
257 
258       if( IsDiagonal_v<MT> )
259       {
260          return vec_[index] * mat_(index,index);
261       }
262       else if( IsLower_v<MT> )
263       {
264          const size_t begin( IsStrictlyLower_v<MT> ? index+1UL : index );
265          const size_t n    ( mat_.rows() - begin );
266          return subvector( vec_, begin, n, unchecked ) *
267                 subvector( column( mat_, index, unchecked ), begin, n, unchecked );
268       }
269       else if( IsUpper_v<MT> )
270       {
271          const size_t n( IsStrictlyUpper_v<MT> ? index : index+1UL );
272          return subvector( vec_, 0UL, n, unchecked ) *
273                 subvector( column( mat_, index, unchecked ), 0UL, n, unchecked );
274       }
275       else
276       {
277          return vec_ * column( mat_, index, unchecked );
278       }
279    }
280    //**********************************************************************************************
281 
282    //**At function*********************************************************************************
283    /*!\brief Checked access to the vector elements.
284    //
285    // \param index Access index. The index has to be in the range \f$[0..N-1]\f$.
286    // \return The resulting value.
287    // \exception std::out_of_range Invalid vector access index.
288    */
at(size_t index)289    inline ReturnType at( size_t index ) const {
290       if( index >= mat_.columns() ) {
291          BLAZE_THROW_OUT_OF_RANGE( "Invalid vector access index" );
292       }
293       return (*this)[index];
294    }
295    //**********************************************************************************************
296 
297    //**Size function*******************************************************************************
298    /*!\brief Returns the current size/dimension of the vector.
299    //
300    // \return The size of the vector.
301    */
size()302    inline size_t size() const noexcept {
303       return mat_.columns();
304    }
305    //**********************************************************************************************
306 
307    //**Left operand access*************************************************************************
308    /*!\brief Returns the left-hand side sparse vector operand.
309    //
310    // \return The left-hand side sparse vector operand.
311    */
leftOperand()312    inline LeftOperand leftOperand() const noexcept {
313       return vec_;
314    }
315    //**********************************************************************************************
316 
317    //**Right operand access************************************************************************
318    /*!\brief Returns the right-hand side dense matrix operand.
319    //
320    // \return The right-hand side dense matrix operand.
321    */
rightOperand()322    inline RightOperand rightOperand() const noexcept {
323       return mat_;
324    }
325    //**********************************************************************************************
326 
327    //**********************************************************************************************
328    /*!\brief Returns whether the expression can alias with the given address \a alias.
329    //
330    // \param alias The alias to be checked.
331    // \return \a true in case the expression can alias, \a false otherwise.
332    */
333    template< typename T >
canAlias(const T * alias)334    inline bool canAlias( const T* alias ) const noexcept {
335       return vec_.isAliased( alias ) || mat_.isAliased( alias );
336    }
337    //**********************************************************************************************
338 
339    //**********************************************************************************************
340    /*!\brief Returns whether the expression is aliased with the given address \a alias.
341    //
342    // \param alias The alias to be checked.
343    // \return \a true in case an alias effect is detected, \a false otherwise.
344    */
345    template< typename T >
isAliased(const T * alias)346    inline bool isAliased( const T* alias ) const noexcept {
347       return vec_.isAliased( alias ) || mat_.isAliased( alias );
348    }
349    //**********************************************************************************************
350 
351    //**********************************************************************************************
352    /*!\brief Returns whether the operands of the expression are properly aligned in memory.
353    //
354    // \return \a true in case the operands are aligned, \a false if not.
355    */
isAligned()356    inline bool isAligned() const noexcept {
357       return mat_.isAligned();
358    }
359    //**********************************************************************************************
360 
361    //**********************************************************************************************
362    /*!\brief Returns whether the expression can be used in SMP assignments.
363    //
364    // \return \a true in case the expression can be used in SMP assignments, \a false if not.
365    */
canSMPAssign()366    inline bool canSMPAssign() const noexcept {
367       return ( size() > SMP_TSVECDMATMULT_THRESHOLD );
368    }
369    //**********************************************************************************************
370 
371  private:
372    //**Member variables****************************************************************************
373    LeftOperand  vec_;  //!< Left-hand side sparse vector of the multiplication expression.
374    RightOperand mat_;  //!< Right-hand side dense matrix of the multiplication expression.
375    //**********************************************************************************************
376 
377    //**Assignment to dense vectors*****************************************************************
378    /*! \cond BLAZE_INTERNAL */
379    /*!\brief Assignment of a transpose sparse vector-dense matrix multiplication to a dense vector
380    //        (\f$ \vec{y}^T=\vec{x}^T*A \f$).
381    // \ingroup dense_vector
382    //
383    // \param lhs The target left-hand side dense vector.
384    // \param rhs The right-hand side multiplication expression to be assigned.
385    // \return void
386    //
387    // This function implements the performance optimized assignment of a transpose sparse vector-
388    // dense matrix multiplication expression to a dense vector.
389    */
390    template< typename VT2 >  // Type of the target dense vector
assign(DenseVector<VT2,true> & lhs,const TSVecDMatMultExpr & rhs)391    friend inline void assign( DenseVector<VT2,true>& lhs, const TSVecDMatMultExpr& rhs )
392    {
393       BLAZE_FUNCTION_TRACE;
394 
395       BLAZE_INTERNAL_ASSERT( (*lhs).size() == rhs.size(), "Invalid vector sizes" );
396 
397       // Evaluation of the left-hand side sparse vector operand
398       LT x( serial( rhs.vec_ ) );
399       if( x.nonZeros() == 0UL ) {
400          reset( *lhs );
401          return;
402       }
403 
404       // Evaluation of the right-hand side dense matrix operand
405       RT A( serial( rhs.mat_ ) );
406 
407       // Checking the evaluated operands
408       BLAZE_INTERNAL_ASSERT( x.size()    == rhs.vec_.size()   , "Invalid vector size"       );
409       BLAZE_INTERNAL_ASSERT( A.rows()    == rhs.mat_.rows()   , "Invalid number of rows"    );
410       BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
411       BLAZE_INTERNAL_ASSERT( A.columns() == (*lhs).size()     , "Invalid vector size"       );
412 
413       // Performing the sparse vector-dense matrix multiplication
414       TSVecDMatMultExpr::selectAssignKernel( *lhs, x, A );
415    }
416    /*! \endcond */
417    //**********************************************************************************************
418 
419    //**Default assignment to dense vectors*********************************************************
420    /*! \cond BLAZE_INTERNAL */
421    /*!\brief Default assignment of a transpose sparse vector-dense matrix multiplication
422    //        (\f$ \vec{y}^T=\vec{x}^T*A \f$).
423    // \ingroup dense_vector
424    //
425    // \param y The target left-hand side dense vector.
426    // \param x The left-hand side sparse vector operand.
427    // \param A The right-hand side dense matrix operand.
428    // \return void
429    //
430    // This function implements the default assignment kernel for the transpose sparse vector-
431    // dense matrix multiplication.
432    */
433    template< typename VT1    // Type of the left-hand side target vector
434            , typename VT2    // Type of the left-hand side vector operand
435            , typename MT1 >  // Type of the right-hand side matrix operand
436    static inline auto selectAssignKernel( VT1& y, const VT2& x, const MT1& A )
437       -> EnableIf_t< UseDefaultKernel_v<VT1,VT2,MT1> >
438    {
439       BLAZE_INTERNAL_ASSERT( x.nonZeros() != 0UL, "Invalid number of non-zero elements" );
440 
441       const size_t N( A.columns() );
442 
443       auto element( x.begin() );
444       const auto end( x.end() );
445 
446       size_t last( 0UL );
447 
448       if( IsUpper_v<MT1> ) {
449          const size_t jend( IsStrictlyUpper_v<MT1> ? element->index()+1UL : element->index() );
450          for( size_t j=0UL; j<jend; ++j )
451             reset( y[j] );
452       }
453 
454       for( ; element!=end; ++element )
455       {
456          const size_t index( element->index() );
457 
458          if( IsDiagonal_v<MT1> )
459          {
460             for( size_t j=last; j<index; ++j )
461                reset( y[j] );
462 
463             y[index] = element->value() * A(index,index);
464             last = index + 1UL;
465          }
466          else
467          {
468             const size_t jbegin( ( IsUpper_v<MT1> )
469                                  ?( IsStrictlyUpper_v<MT1> ? index+1UL : index )
470                                  :( 0UL ) );
471             const size_t jend( ( IsLower_v<MT1> )
472                                ?( IsStrictlyLower_v<MT1> ? index : index+1UL )
473                                :( N ) );
474             BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
475 
476             for( size_t j=jbegin; j<last; ++j ) {
477                y[j] += element->value() * A(index,j);
478             }
479             for( size_t j=last; j<jend; ++j ) {
480                y[j] = element->value() * A(index,j);
481             }
482 
483             last = jend;
484          }
485       }
486 
487       if( IsLower_v<MT1> ) {
488          for( size_t j=last; j<N; ++j )
489             reset( y[j] );
490       }
491    }
492    /*! \endcond */
493    //**********************************************************************************************
494 
495    //**Optimized assignment to dense vectors*******************************************************
496    /*! \cond BLAZE_INTERNAL */
497    /*!\brief Optimized assignment of a transpose sparse vector-dense matrix multiplication
498    //        (\f$ \vec{y}^T=\vec{x}^T*A \f$).
499    // \ingroup dense_vector
500    //
501    // \param y The target left-hand side dense vector.
502    // \param x The left-hand side sparse vector operand.
503    // \param A The right-hand side dense matrix operand.
504    // \return void
505    //
506    // This function implements the optimized assignment kernel for the transpose sparse vector-
507    // dense matrix multiplication.
508    */
509    template< typename VT1    // Type of the left-hand side target vector
510            , typename VT2    // Type of the left-hand side vector operand
511            , typename MT1 >  // Type of the right-hand side matrix operand
512    static inline auto selectAssignKernel( VT1& y, const VT2& x, const MT1& A )
513       -> EnableIf_t< UseOptimizedKernel_v<VT1,VT2,MT1> >
514    {
515       BLAZE_INTERNAL_ASSERT( x.nonZeros() != 0UL, "Invalid number of non-zero elements" );
516 
517       const size_t N( A.columns() );
518 
519       auto element( x.begin() );
520       const auto end( x.end() );
521 
522       const size_t ipos( prevMultiple( x.nonZeros(), 4UL ) );
523       BLAZE_INTERNAL_ASSERT( ipos <= x.nonZeros(), "Invalid end calculation" );
524 
525       if( ipos > 3UL )
526       {
527          const size_t i1( element->index() );
528          const VET    v1( element->value() );
529          ++element;
530          const size_t i2( element->index() );
531          const VET    v2( element->value() );
532          ++element;
533          const size_t i3( element->index() );
534          const VET    v3( element->value() );
535          ++element;
536          const size_t i4( element->index() );
537          const VET    v4( element->value() );
538          ++element;
539 
540          BLAZE_INTERNAL_ASSERT( i1 < i2 && i2 < i3 && i3 < i4, "Invalid sparse vector index detected" );
541 
542          for( size_t j=0UL; j<N; ++j ) {
543             y[j] = v1 * A(i1,j) + v2 * A(i2,j) + v3 * A(i3,j) + v4 * A(i4,j);
544          }
545       }
546       else
547       {
548          const size_t i1( element->index() );
549          const VET    v1( element->value() );
550          ++element;
551 
552          for( size_t j=0UL; j<N; ++j ) {
553             y[j] = v1 * A(i1,j);
554          }
555       }
556 
557       for( size_t i=(ipos>3UL)?(4UL):(1UL); (i+4UL)<=ipos; i+=4UL )
558       {
559          const size_t i1( element->index() );
560          const VET    v1( element->value() );
561          ++element;
562          const size_t i2( element->index() );
563          const VET    v2( element->value() );
564          ++element;
565          const size_t i3( element->index() );
566          const VET    v3( element->value() );
567          ++element;
568          const size_t i4( element->index() );
569          const VET    v4( element->value() );
570          ++element;
571 
572          BLAZE_INTERNAL_ASSERT( i1 < i2 && i2 < i3 && i3 < i4, "Invalid sparse vector index detected" );
573 
574          const size_t jbegin( ( IsUpper_v<MT1> )
575                               ?( IsStrictlyUpper_v<MT1> ? i1+1UL : i1 )
576                               :( 0UL ) );
577          const size_t jend( ( IsLower_v<MT1> )
578                             ?( IsStrictlyLower_v<MT1> ? i4 : i4+1UL )
579                             :( N ) );
580          BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
581 
582          for( size_t j=jbegin; j<jend; ++j ) {
583             y[j] += v1 * A(i1,j) + v2 * A(i2,j) + v3 * A(i3,j) + v4 * A(i4,j);
584          }
585       }
586       for( ; element!=end; ++element )
587       {
588          const size_t i1( element->index() );
589          const VET    v1( element->value() );
590 
591          const size_t jbegin( ( IsUpper_v<MT1> )
592                               ?( IsStrictlyUpper_v<MT1> ? i1+1UL : i1 )
593                               :( 0UL ) );
594          const size_t jend( ( IsLower_v<MT1> )
595                             ?( IsStrictlyLower_v<MT1> ? i1 : i1+1UL )
596                             :( N ) );
597          BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
598 
599          for( size_t j=jbegin; j<jend; ++j ) {
600             y[j] += v1 * A(i1,j);
601          }
602       }
603    }
604    /*! \endcond */
605    //**********************************************************************************************
606 
607    //**Vectorized assignment to dense vectors******************************************************
608    /*! \cond BLAZE_INTERNAL */
609    /*!\brief Vectorized assignment of a transpose sparse vector-dense matrix multiplication
610    //        (\f$ \vec{y}^T=\vec{x}^T*A \f$).
611    // \ingroup dense_vector
612    //
613    // \param y The target left-hand side dense vector.
614    // \param x The left-hand side sparse vector operand.
615    // \param A The right-hand side dense matrix operand.
616    // \return void
617    //
618    // This function implements the vectorized assignment kernel for the transpose sparse vector-
619    // dense matrix multiplication.
620    */
621    template< typename VT1    // Type of the left-hand side target vector
622            , typename VT2    // Type of the left-hand side vector operand
623            , typename MT1 >  // Type of the right-hand side matrix operand
624    static inline auto selectAssignKernel( VT1& y, const VT2& x, const MT1& A )
625       -> EnableIf_t< UseVectorizedKernel_v<VT1,VT2,MT1> >
626    {
627       BLAZE_INTERNAL_ASSERT( x.nonZeros() != 0UL, "Invalid number of non-zero elements" );
628 
629       constexpr bool remainder( !IsPadded_v<VT1> || !IsPadded_v<MT1> );
630 
631       const size_t N( A.columns() );
632 
633       auto element( x.begin() );
634       const auto end( x.end() );
635 
636       const size_t ipos( prevMultiple( x.nonZeros(), 4UL ) );
637       BLAZE_INTERNAL_ASSERT( ipos <= x.nonZeros(), "Invalid end calculation" );
638 
639       if( ipos > 3UL )
640       {
641          const size_t i1( element->index() );
642          const VET    v1( element->value() );
643          ++element;
644          const size_t i2( element->index() );
645          const VET    v2( element->value() );
646          ++element;
647          const size_t i3( element->index() );
648          const VET    v3( element->value() );
649          ++element;
650          const size_t i4( element->index() );
651          const VET    v4( element->value() );
652          ++element;
653 
654          BLAZE_INTERNAL_ASSERT( i1 < i2 && i2 < i3 && i3 < i4, "Invalid sparse vector index detected" );
655 
656          const SIMDType xmm1( set( v1 ) );
657          const SIMDType xmm2( set( v2 ) );
658          const SIMDType xmm3( set( v3 ) );
659          const SIMDType xmm4( set( v4 ) );
660 
661          const size_t jpos( remainder ? prevMultiple( N, SIMDSIZE ) : N );
662          BLAZE_INTERNAL_ASSERT( jpos <= N, "Invalid end calculation" );
663 
664          size_t j( 0UL );
665 
666          for( ; j<jpos; j+=SIMDSIZE ) {
667             y.store( j, xmm1 * A.load(i1,j) + xmm2 * A.load(i2,j) + xmm3 * A.load(i3,j) + xmm4 * A.load(i4,j) );
668          }
669          for( ; remainder && j<N; ++j ) {
670             y[j] = v1 * A(i1,j) + v2 * A(i2,j) + v3 * A(i3,j) + v4 * A(i4,j);
671          }
672       }
673       else
674       {
675          const size_t i1( element->index() );
676          const VET    v1( element->value() );
677          ++element;
678 
679          const SIMDType xmm1( set( v1 ) );
680 
681          const size_t jpos( remainder ? prevMultiple( N, SIMDSIZE ) : N );
682          BLAZE_INTERNAL_ASSERT( jpos <= N, "Invalid end calculation" );
683 
684          size_t j( 0UL );
685 
686          for( ; j<jpos; j+=SIMDSIZE ) {
687             y.store( j, xmm1 * A.load(i1,j) );
688          }
689          for( ; remainder && j<N; ++j ) {
690             y[j] = v1 * A(i1,j);
691          }
692       }
693 
694       for( size_t i=(ipos>3UL)?(4UL):(1UL); (i+4UL)<=ipos; i+=4UL )
695       {
696          const size_t i1( element->index() );
697          const VET    v1( element->value() );
698          ++element;
699          const size_t i2( element->index() );
700          const VET    v2( element->value() );
701          ++element;
702          const size_t i3( element->index() );
703          const VET    v3( element->value() );
704          ++element;
705          const size_t i4( element->index() );
706          const VET    v4( element->value() );
707          ++element;
708 
709          BLAZE_INTERNAL_ASSERT( i1 < i2 && i2 < i3 && i3 < i4, "Invalid sparse vector index detected" );
710 
711          const SIMDType xmm1( set( v1 ) );
712          const SIMDType xmm2( set( v2 ) );
713          const SIMDType xmm3( set( v3 ) );
714          const SIMDType xmm4( set( v4 ) );
715 
716          const size_t jbegin( ( IsUpper_v<MT1> )
717                               ?( prevMultiple( ( IsStrictlyUpper_v<MT1> ? i1+1UL : i1 ), SIMDSIZE ) )
718                               :( 0UL ) );
719          const size_t jend( ( IsLower_v<MT1> )
720                             ?( IsStrictlyLower_v<MT1> ? i4 : i4+1UL )
721                             :( N ) );
722          BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
723 
724          const size_t jpos( remainder ? prevMultiple( jend, SIMDSIZE ) : jend );
725          BLAZE_INTERNAL_ASSERT( jpos <= jend, "Invalid end calculation" );
726 
727          size_t j( jbegin );
728 
729          for( ; j<jpos; j+=SIMDSIZE ) {
730             y.store( j, y.load(j) + xmm1 * A.load(i1,j) + xmm2 * A.load(i2,j) + xmm3 * A.load(i3,j) + xmm4 * A.load(i4,j) );
731          }
732          for( ; remainder && j<jend; ++j ) {
733             y[j] += v1 * A(i1,j) + v2 * A(i2,j) + v3 * A(i3,j) + v4 * A(i4,j);
734          }
735       }
736       for( ; element!=end; ++element )
737       {
738          const size_t i1( element->index() );
739          const VET    v1( element->value() );
740 
741          const SIMDType xmm1( set( v1 ) );
742 
743          const size_t jbegin( ( IsUpper_v<MT1> )
744                               ?( prevMultiple( ( IsStrictlyUpper_v<MT1> ? i1+1UL : i1 ), SIMDSIZE ) )
745                               :( 0UL ) );
746          const size_t jend( ( IsLower_v<MT1> )
747                             ?( IsStrictlyLower_v<MT1> ? i1 : i1+1UL )
748                             :( N ) );
749          BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
750 
751          const size_t jpos( remainder ? prevMultiple( jend, SIMDSIZE ) : jend );
752          BLAZE_INTERNAL_ASSERT( jpos <= jend, "Invalid end calculation" );
753 
754          size_t j( jbegin );
755 
756          for( ; j<jpos; j+=SIMDSIZE ) {
757             y.store( j, y.load(j) + xmm1 * A.load(i1,j) );
758          }
759          for( ; remainder && j<jend; ++j ) {
760             y[j] += v1 * A(i1,j);
761          }
762       }
763    }
764    /*! \endcond */
765    //**********************************************************************************************
766 
767    //**Assignment to sparse vectors****************************************************************
768    /*! \cond BLAZE_INTERNAL */
769    /*!\brief Assignment of a transpose sparse vector-dense matrix multiplication to a sparse
770    //        vector (\f$ \vec{y}^T=\vec{x}^T*A \f$).
771    // \ingroup dense_vector
772    //
773    // \param lhs The target left-hand side sparse vector.
774    // \param rhs The right-hand side multiplication expression to be assigned.
775    // \return void
776    //
777    // This function implements the performance optimized assignment of a transpose sparse vector-
778    // dense matrix multiplication expression to a sparse vector.
779    */
780    template< typename VT2 >  // Type of the target sparse vector
assign(SparseVector<VT2,true> & lhs,const TSVecDMatMultExpr & rhs)781    friend inline void assign( SparseVector<VT2,true>& lhs, const TSVecDMatMultExpr& rhs )
782    {
783       BLAZE_FUNCTION_TRACE;
784 
785       BLAZE_CONSTRAINT_MUST_BE_DENSE_VECTOR_TYPE( ResultType );
786       BLAZE_CONSTRAINT_MUST_BE_ROW_VECTOR_TYPE( ResultType );
787       BLAZE_CONSTRAINT_MUST_NOT_REQUIRE_EVALUATION( ResultType );
788 
789       BLAZE_INTERNAL_ASSERT( (*lhs).size() == rhs.size(), "Invalid vector sizes" );
790 
791       const ResultType tmp( serial( rhs ) );
792       assign( *lhs, tmp );
793    }
794    /*! \endcond */
795    //**********************************************************************************************
796 
797    //**Addition assignment to dense vectors********************************************************
798    /*!\brief Addition assignment of a transpose sparse vector-dense matrix multiplication to a
799    //        dense vector (\f$ \vec{y}^T+=\vec{x}^T*A \f$).
800    // \ingroup dense_vector
801    //
802    // \param lhs The target left-hand side dense vector.
803    // \param rhs The right-hand side multiplication expression to be added.
804    // \return void
805    //
806    // This function implements the performance optimized addition assignment of a transpose sparse
807    // vector-dense matrix multiplication expression to a dense vector.
808    */
809    template< typename VT2 >  // Type of the target dense vector
addAssign(DenseVector<VT2,true> & lhs,const TSVecDMatMultExpr & rhs)810    friend inline void addAssign( DenseVector<VT2,true>& lhs, const TSVecDMatMultExpr& rhs )
811    {
812       BLAZE_FUNCTION_TRACE;
813 
814       BLAZE_INTERNAL_ASSERT( (*lhs).size() == rhs.size(), "Invalid vector sizes" );
815 
816       // Evaluation of the left-hand side sparse vector operand
817       LT x( serial( rhs.vec_ ) );
818       if( x.nonZeros() == 0UL ) return;
819 
820       // Evaluation of the right-hand side dense matrix operand
821       RT A( serial( rhs.mat_ ) );
822 
823       // Checking the evaluated operands
824       BLAZE_INTERNAL_ASSERT( x.size()    == rhs.vec_.size()   , "Invalid vector size"       );
825       BLAZE_INTERNAL_ASSERT( A.rows()    == rhs.mat_.rows()   , "Invalid number of rows"    );
826       BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
827       BLAZE_INTERNAL_ASSERT( A.columns() == (*lhs).size()     , "Invalid vector size"       );
828 
829       // Performing the sparse vector-dense matrix multiplication
830       TSVecDMatMultExpr::selectAddAssignKernel( *lhs, x, A );
831    }
832    //**********************************************************************************************
833 
834    //**Default addition assignment to dense vectors************************************************
835    /*! \cond BLAZE_INTERNAL */
836    /*!\brief Default addition assignment of a transpose sparse vector-dense matrix multiplication
837    //        (\f$ \vec{y}^T+=\vec{x}^T*A \f$).
838    // \ingroup dense_vector
839    //
840    // \param y The target left-hand side dense vector.
841    // \param x The left-hand side sparse vector operand.
842    // \param A The right-hand side dense matrix operand.
843    // \return void
844    //
845    // This function implements the default addition assignment kernel for the transpose sparse
846    // vector-dense matrix multiplication.
847    */
848    template< typename VT1    // Type of the left-hand side target vector
849            , typename VT2    // Type of the left-hand side vector operand
850            , typename MT1 >  // Type of the right-hand side matrix operand
851    static inline auto selectAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
852       -> EnableIf_t< UseDefaultKernel_v<VT1,VT2,MT1> >
853    {
854       BLAZE_INTERNAL_ASSERT( x.nonZeros() != 0UL, "Invalid number of non-zero elements" );
855 
856       const size_t N( A.columns() );
857 
858       auto element( x.begin() );
859       const auto end( x.end() );
860 
861       for( ; element!=end; ++element )
862       {
863          const size_t index( element->index() );
864 
865          if( IsDiagonal_v<MT1> )
866          {
867             y[index] += A(index,index) * element->value();
868          }
869          else
870          {
871             const size_t jbegin( ( IsUpper_v<MT1> )
872                                  ?( IsStrictlyUpper_v<MT1> ? index+1UL : index )
873                                  :( 0UL ) );
874             const size_t jend( ( IsLower_v<MT1> )
875                                ?( IsStrictlyLower_v<MT1> ? index : index+1UL )
876                                :( N ) );
877             BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
878 
879             for( size_t j=jbegin; j<jend; ++j ) {
880                y[j] += element->value() * A(index,j);
881             }
882          }
883       }
884    }
885    /*! \endcond */
886    //**********************************************************************************************
887 
888    //**Optimized addition assignment to dense vectors**********************************************
889    /*! \cond BLAZE_INTERNAL */
890    /*!\brief Optimized addition assignment of a transpose sparse vector-dense matrix multiplication
891    //        (\f$ \vec{y}^T+=\vec{x}^T*A \f$).
892    // \ingroup dense_vector
893    //
894    // \param y The target left-hand side dense vector.
895    // \param x The left-hand side sparse vector operand.
896    // \param A The right-hand side dense matrix operand.
897    // \return void
898    //
899    // This function implements the optimized addition assignment kernel for the transpose sparse
900    // vector-dense matrix multiplication.
901    */
902    template< typename VT1    // Type of the left-hand side target vector
903            , typename VT2    // Type of the left-hand side vector operand
904            , typename MT1 >  // Type of the right-hand side matrix operand
905    static inline auto selectAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
906       -> EnableIf_t< UseOptimizedKernel_v<VT1,VT2,MT1> >
907    {
908       BLAZE_INTERNAL_ASSERT( x.nonZeros() != 0UL, "Invalid number of non-zero elements" );
909 
910       const size_t N( A.columns() );
911 
912       auto element( x.begin() );
913       const auto end( x.end() );
914 
915       const size_t ipos( prevMultiple( x.nonZeros(), 4UL ) );
916       BLAZE_INTERNAL_ASSERT( ipos <= x.nonZeros(), "Invalid end calculation" );
917 
918       for( size_t i=0UL; (i+4UL)<=ipos; i+=4UL )
919       {
920          const size_t i1( element->index() );
921          const VET    v1( element->value() );
922          ++element;
923          const size_t i2( element->index() );
924          const VET    v2( element->value() );
925          ++element;
926          const size_t i3( element->index() );
927          const VET    v3( element->value() );
928          ++element;
929          const size_t i4( element->index() );
930          const VET    v4( element->value() );
931          ++element;
932 
933          BLAZE_INTERNAL_ASSERT( i1 < i2 && i2 < i3 && i3 < i4, "Invalid sparse vector index detected" );
934 
935          const size_t jbegin( ( IsUpper_v<MT1> )
936                               ?( IsStrictlyUpper_v<MT1> ? i+1UL : i1 )
937                               :( 0UL ) );
938          const size_t jend( ( IsLower_v<MT1> )
939                             ?( IsStrictlyLower_v<MT1> ? i4 : i4+1UL )
940                             :( N ) );
941          BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
942 
943          for( size_t j=jbegin; j<jend; ++j ) {
944             y[j] += v1 * A(i1,j) + v2 * A(i2,j) + v3 * A(i3,j) + v4 * A(i4,j);
945          }
946       }
947       for( ; element!=end; ++element )
948       {
949          const size_t i1( element->index() );
950          const VET    v1( element->value() );
951 
952          const size_t jbegin( ( IsUpper_v<MT1> )
953                               ?( IsStrictlyUpper_v<MT1> ? i1+1UL : i1 )
954                               :( 0UL ) );
955          const size_t jend( ( IsLower_v<MT1> )
956                             ?( IsStrictlyLower_v<MT1> ? i1 : i1+1UL )
957                             :( N ) );
958          BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
959 
960          for( size_t j=jbegin; j<jend; ++j ) {
961             y[j] += v1 * A(i1,j);
962          }
963       }
964    }
965    /*! \endcond */
966    //**********************************************************************************************
967 
968    //**Vectorized addition assignment to dense vectors*********************************************
969    /*! \cond BLAZE_INTERNAL */
970    /*!\brief Vectorized addition assignment of a transpose sparse vector-dense matrix multiplication
971    //        (\f$ \vec{y}^T+=\vec{x}^T*A \f$).
972    // \ingroup dense_vector
973    //
974    // \param y The target left-hand side dense vector.
975    // \param x The left-hand side sparse vector operand.
976    // \param A The right-hand side dense matrix operand.
977    // \return void
978    //
979    // This function implements the vectorized addition assignment kernel for the transpose sparse
980    // vector-dense matrix multiplication.
981    */
982    template< typename VT1    // Type of the left-hand side target vector
983            , typename VT2    // Type of the left-hand side vector operand
984            , typename MT1 >  // Type of the right-hand side matrix operand
985    static inline auto selectAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
986       -> EnableIf_t< UseVectorizedKernel_v<VT1,VT2,MT1> >
987    {
988       BLAZE_INTERNAL_ASSERT( x.nonZeros() != 0UL, "Invalid number of non-zero elements" );
989 
990       constexpr bool remainder( !IsPadded_v<VT1> || !IsPadded_v<MT1> );
991 
992       const size_t N( A.columns() );
993 
994       auto element( x.begin() );
995       const auto end( x.end() );
996 
997       const size_t ipos( prevMultiple( x.nonZeros(), 4UL ) );
998       BLAZE_INTERNAL_ASSERT( ipos <= x.nonZeros(), "Invalid end calculation" );
999 
1000       for( size_t i=0UL; (i+4UL)<=ipos; i+=4UL )
1001       {
1002          const size_t i1( element->index() );
1003          const VET    v1( element->value() );
1004          ++element;
1005          const size_t i2( element->index() );
1006          const VET    v2( element->value() );
1007          ++element;
1008          const size_t i3( element->index() );
1009          const VET    v3( element->value() );
1010          ++element;
1011          const size_t i4( element->index() );
1012          const VET    v4( element->value() );
1013          ++element;
1014 
1015          BLAZE_INTERNAL_ASSERT( i1 < i2 && i2 < i3 && i3 < i4, "Invalid sparse vector index detected" );
1016 
1017          const SIMDType xmm1( set( v1 ) );
1018          const SIMDType xmm2( set( v2 ) );
1019          const SIMDType xmm3( set( v3 ) );
1020          const SIMDType xmm4( set( v4 ) );
1021 
1022          const size_t jbegin( ( IsUpper_v<MT1> )
1023                               ?( prevMultiple( ( IsStrictlyUpper_v<MT1> ? i1+1UL : i1 ), SIMDSIZE ) )
1024                               :( 0UL ) );
1025          const size_t jend( ( IsLower_v<MT1> )
1026                             ?( IsStrictlyLower_v<MT1> ? i4 : i4+1UL )
1027                             :( N ) );
1028          BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1029 
1030          const size_t jpos( remainder ? prevMultiple( jend, SIMDSIZE ) : jend );
1031          BLAZE_INTERNAL_ASSERT( jpos <= jend, "Invalid end calculation" );
1032 
1033          size_t j( jbegin );
1034 
1035          for( ; j<jpos; j+=SIMDSIZE ) {
1036             y.store( j, y.load(j) + xmm1 * A.load(i1,j) + xmm2 * A.load(i2,j) + xmm3 * A.load(i3,j) + xmm4 * A.load(i4,j) );
1037          }
1038          for( ; remainder && j<jend; ++j ) {
1039             y[j] += v1 * A(i1,j) + v2 * A(i2,j) + v3 * A(i3,j) + v4 * A(i4,j);
1040          }
1041       }
1042       for( ; element!=end; ++element )
1043       {
1044          const size_t i1( element->index() );
1045          const VET    v1( element->value() );
1046 
1047          const SIMDType xmm1( set( v1 ) );
1048 
1049          const size_t jbegin( ( IsUpper_v<MT1> )
1050                               ?( prevMultiple( ( IsStrictlyUpper_v<MT1> ? i1+1UL : i1 ), SIMDSIZE ) )
1051                               :( 0UL ) );
1052          const size_t jend( ( IsLower_v<MT1> )
1053                             ?( IsStrictlyLower_v<MT1> ? i1 : i1+1UL )
1054                             :( N ) );
1055          BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1056 
1057          const size_t jpos( remainder ? prevMultiple( jend, SIMDSIZE ) : jend );
1058          BLAZE_INTERNAL_ASSERT( jpos <= jend, "Invalid end calculation" );
1059 
1060          size_t j( jbegin );
1061 
1062          for( ; j<jpos; j+=SIMDSIZE ) {
1063             y.store( j, y.load(j) + xmm1 * A.load(i1,j) );
1064          }
1065          for( ; remainder && j<jend; ++j ) {
1066             y[j] += v1 * A(i1,j);
1067          }
1068       }
1069    }
1070    /*! \endcond */
1071    //**********************************************************************************************
1072 
1073    //**Addition assignment to sparse vectors*******************************************************
1074    // No special implementation for the addition assignment to sparse vectors.
1075    //**********************************************************************************************
1076 
1077    //**Subtraction assignment to dense vectors*****************************************************
1078    /*!\brief Subtraction assignment of a transpose sparse vector-dense matrix multiplication to a
1079    //        dense vector (\f$ \vec{y}^T-=\vec{x}^T*A \f$).
1080    // \ingroup dense_vector
1081    //
1082    // \param lhs The target left-hand side dense vector.
1083    // \param rhs The right-hand side multiplication expression to be subtracted.
1084    // \return void
1085    //
1086    // This function implements the performance optimized subtraction assignment of a transpose
1087    // sparse vector-dense matrix multiplication expression to a dense vector.
1088    */
1089    template< typename VT2 >  // Type of the target dense vector
subAssign(DenseVector<VT2,true> & lhs,const TSVecDMatMultExpr & rhs)1090    friend inline void subAssign( DenseVector<VT2,true>& lhs, const TSVecDMatMultExpr& rhs )
1091    {
1092       BLAZE_FUNCTION_TRACE;
1093 
1094       BLAZE_INTERNAL_ASSERT( (*lhs).size() == rhs.size(), "Invalid vector sizes" );
1095 
1096       // Evaluation of the left-hand side sparse vector operand
1097       LT x( serial( rhs.vec_ ) );
1098       if( x.nonZeros() == 0UL ) return;
1099 
1100       // Evaluation of the right-hand side dense matrix operand
1101       RT A( serial( rhs.mat_ ) );
1102 
1103       // Checking the evaluated operands
1104       BLAZE_INTERNAL_ASSERT( x.size()    == rhs.vec_.size()   , "Invalid vector size"       );
1105       BLAZE_INTERNAL_ASSERT( A.rows()    == rhs.mat_.rows()   , "Invalid number of rows"    );
1106       BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
1107       BLAZE_INTERNAL_ASSERT( A.columns() == (*lhs).size()     , "Invalid vector size"       );
1108 
1109       // Performing the sparse vector-dense matrix multiplication
1110       TSVecDMatMultExpr::selectSubAssignKernel( *lhs, x, A );
1111    }
1112    //**********************************************************************************************
1113 
1114    //**Default subtraction assignment to dense vectors*********************************************
1115    /*! \cond BLAZE_INTERNAL */
1116    /*!\brief Default subtraction assignment of a transpose sparse vector-dense matrix
1117    //        multiplication (\f$ \vec{y}^T-=\vec{x}^T*A \f$).
1118    // \ingroup dense_vector
1119    //
1120    // \param y The target left-hand side dense vector.
1121    // \param x The left-hand side sparse vector operand.
1122    // \param A The right-hand side dense matrix operand.
1123    // \return void
1124    //
1125    // This function implements the default subtraction assignment kernel for the transpose
1126    // sparse vector-dense matrix multiplication.
1127    */
1128    template< typename VT1    // Type of the left-hand side target vector
1129            , typename VT2    // Type of the left-hand side vector operand
1130            , typename MT1 >  // Type of the right-hand side matrix operand
1131    static inline auto selectSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1132       -> EnableIf_t< UseDefaultKernel_v<VT1,VT2,MT1> >
1133    {
1134       BLAZE_INTERNAL_ASSERT( x.nonZeros() != 0UL, "Invalid number of non-zero elements" );
1135 
1136       const size_t N( A.columns() );
1137 
1138       auto element( x.begin() );
1139       const auto end( x.end() );
1140 
1141       for( ; element!=end; ++element )
1142       {
1143          const size_t index( element->index() );
1144 
1145          if( IsDiagonal_v<MT1> )
1146          {
1147             y[index] -= A(index,index) * element->value();
1148          }
1149          else
1150          {
1151             const size_t jbegin( ( IsUpper_v<MT1> )
1152                                  ?( IsStrictlyUpper_v<MT1> ? index+1UL : index )
1153                                  :( 0UL ) );
1154             const size_t jend( ( IsLower_v<MT1> )
1155                                ?( IsStrictlyLower_v<MT1> ? index : index+1UL )
1156                                :( N ) );
1157             BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1158 
1159             for( size_t j=jbegin; j<jend; ++j ) {
1160                y[j] -= element->value() * A(index,j);
1161             }
1162          }
1163       }
1164    }
1165    /*! \endcond */
1166    //**********************************************************************************************
1167 
1168    //**Optimized subtraction assignment to dense vectors*******************************************
1169    /*! \cond BLAZE_INTERNAL */
1170    /*!\brief Optimized subtraction assignment of a transpose sparse vector-dense matrix
1171    //        multiplication (\f$ \vec{y}^T-=\vec{x}^T*A \f$).
1172    // \ingroup dense_vector
1173    //
1174    // \param y The target left-hand side dense vector.
1175    // \param x The left-hand side sparse vector operand.
1176    // \param A The right-hand side dense matrix operand.
1177    // \return void
1178    //
1179    // This function implements the optimized subtraction assignment kernel for the transpose
1180    // sparse vector-dense matrix multiplication.
1181    */
1182    template< typename VT1    // Type of the left-hand side target vector
1183            , typename VT2    // Type of the left-hand side vector operand
1184            , typename MT1 >  // Type of the right-hand side matrix operand
1185    static inline auto selectSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1186       -> EnableIf_t< UseOptimizedKernel_v<VT1,VT2,MT1> >
1187    {
1188       BLAZE_INTERNAL_ASSERT( x.nonZeros() != 0UL, "Invalid number of non-zero elements" );
1189 
1190       const size_t N( A.columns() );
1191 
1192       auto element( x.begin() );
1193       const auto end( x.end() );
1194 
1195       const size_t ipos( prevMultiple( x.nonZeros(), 4UL ) );
1196       BLAZE_INTERNAL_ASSERT( ipos <= x.nonZeros(), "Invalid end calculation" );
1197 
1198       for( size_t i=0UL; (i+4UL)<=ipos; i+=4UL )
1199       {
1200          const size_t i1( element->index() );
1201          const VET    v1( element->value() );
1202          ++element;
1203          const size_t i2( element->index() );
1204          const VET    v2( element->value() );
1205          ++element;
1206          const size_t i3( element->index() );
1207          const VET    v3( element->value() );
1208          ++element;
1209          const size_t i4( element->index() );
1210          const VET    v4( element->value() );
1211          ++element;
1212 
1213          BLAZE_INTERNAL_ASSERT( i1 < i2 && i2 < i3 && i3 < i4, "Invalid sparse vector index detected" );
1214 
1215          const size_t jbegin( ( IsUpper_v<MT1> )
1216                               ?( IsStrictlyUpper_v<MT1> ? i+1UL : i1 )
1217                               :( 0UL ) );
1218          const size_t jend( ( IsLower_v<MT1> )
1219                             ?( IsStrictlyLower_v<MT1> ? i4 : i4+1UL )
1220                             :( N ) );
1221          BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1222 
1223          for( size_t j=jbegin; j<jend; ++j ) {
1224             y[j] -= v1 * A(i1,j) + v2 * A(i2,j) + v3 * A(i3,j) + v4 * A(i4,j);
1225          }
1226       }
1227       for( ; element!=end; ++element )
1228       {
1229          const size_t i1( element->index() );
1230          const VET    v1( element->value() );
1231 
1232          const size_t jbegin( ( IsUpper_v<MT1> )
1233                               ?( IsStrictlyUpper_v<MT1> ? i1+1UL : i1 )
1234                               :( 0UL ) );
1235          const size_t jend( ( IsLower_v<MT1> )
1236                             ?( IsStrictlyLower_v<MT1> ? i1 : i1+1UL )
1237                             :( N ) );
1238          BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1239 
1240          for( size_t j=jbegin; j<jend; ++j ) {
1241             y[j] -= v1 * A(i1,j);
1242          }
1243       }
1244    }
1245    /*! \endcond */
1246    //**********************************************************************************************
1247 
1248    //**Vectorized subtraction assignment to dense vectors******************************************
1249    /*! \cond BLAZE_INTERNAL */
1250    /*!\brief Vectorized subtraction assignment of a transpose sparse vector-dense matrix
1251    //        multiplication (\f$ \vec{y}^T-=\vec{x}^T*A \f$).
1252    // \ingroup dense_vector
1253    //
1254    // \param y The target left-hand side dense vector.
1255    // \param x The left-hand side sparse vector operand.
1256    // \param A The right-hand side dense matrix operand.
1257    // \return void
1258    //
1259    // This function implements the vectorized subtraction assignment kernel for the transpose
1260    // sparse vector-dense matrix multiplication.
1261    */
1262    template< typename VT1    // Type of the left-hand side target vector
1263            , typename VT2    // Type of the left-hand side vector operand
1264            , typename MT1 >  // Type of the right-hand side matrix operand
1265    static inline auto selectSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1266       -> EnableIf_t< UseVectorizedKernel_v<VT1,VT2,MT1> >
1267    {
1268       BLAZE_INTERNAL_ASSERT( x.nonZeros() != 0UL, "Invalid number of non-zero elements" );
1269 
1270       constexpr bool remainder( !IsPadded_v<VT1> || !IsPadded_v<MT1> );
1271 
1272       const size_t N( A.columns() );
1273 
1274       auto element( x.begin() );
1275       const auto end( x.end() );
1276 
1277       const size_t ipos( prevMultiple( x.nonZeros(), 4UL ) );
1278       BLAZE_INTERNAL_ASSERT( ipos <= x.nonZeros(), "Invalid end calculation" );
1279 
1280       for( size_t i=0UL; (i+4UL)<=ipos; i+=4UL )
1281       {
1282          const size_t i1( element->index() );
1283          const VET    v1( element->value() );
1284          ++element;
1285          const size_t i2( element->index() );
1286          const VET    v2( element->value() );
1287          ++element;
1288          const size_t i3( element->index() );
1289          const VET    v3( element->value() );
1290          ++element;
1291          const size_t i4( element->index() );
1292          const VET    v4( element->value() );
1293          ++element;
1294 
1295          BLAZE_INTERNAL_ASSERT( i1 < i2 && i2 < i3 && i3 < i4, "Invalid sparse vector index detected" );
1296 
1297          const SIMDType xmm1( set( v1 ) );
1298          const SIMDType xmm2( set( v2 ) );
1299          const SIMDType xmm3( set( v3 ) );
1300          const SIMDType xmm4( set( v4 ) );
1301 
1302          const size_t jbegin( ( IsUpper_v<MT1> )
1303                               ?( prevMultiple( ( IsStrictlyUpper_v<MT1> ? i1+1UL : i1 ), SIMDSIZE ) )
1304                               :( 0UL ) );
1305          const size_t jend( ( IsLower_v<MT1> )
1306                             ?( IsStrictlyLower_v<MT1> ? i4 : i4+1UL )
1307                             :( N ) );
1308          BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1309 
1310          const size_t jpos( remainder ? prevMultiple( jend, SIMDSIZE ) : jend );
1311          BLAZE_INTERNAL_ASSERT( jpos <= jend, "Invalid end calculation" );
1312 
1313          size_t j( jbegin );
1314 
1315          for( ; j<jpos; j+=SIMDSIZE ) {
1316             y.store( j, y.load(j) - xmm1 * A.load(i1,j) - xmm2 * A.load(i2,j) - xmm3 * A.load(i3,j) - xmm4 * A.load(i4,j) );
1317          }
1318          for( ; remainder && j<jend; ++j ) {
1319             y[j] -= v1 * A(i1,j) + v2 * A(i2,j) + v3 * A(i3,j) + v4 * A(i4,j);
1320          }
1321       }
1322       for( ; element!=end; ++element )
1323       {
1324          const size_t i1( element->index() );
1325          const VET    v1( element->value() );
1326 
1327          const SIMDType xmm1( set( v1 ) );
1328 
1329          const size_t jbegin( ( IsUpper_v<MT1> )
1330                               ?( prevMultiple( ( IsStrictlyUpper_v<MT1> ? i1+1UL : i1 ), SIMDSIZE ) )
1331                               :( 0UL ) );
1332          const size_t jend( ( IsLower_v<MT1> )
1333                             ?( IsStrictlyLower_v<MT1> ? i1 : i1+1UL )
1334                             :( N ) );
1335          BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1336 
1337          const size_t jpos( remainder ? prevMultiple( jend, SIMDSIZE ) : jend );
1338          BLAZE_INTERNAL_ASSERT( jpos <= jend, "Invalid end calculation" );
1339 
1340          size_t j( jbegin );
1341 
1342          for( ; j<jpos; j+=SIMDSIZE ) {
1343             y.store( j, y.load(j) - xmm1 * A.load(i1,j) );
1344          }
1345          for( ; remainder && j<jend; ++j ) {
1346             y[j] -= v1 * A(i1,j);
1347          }
1348       }
1349    }
1350    /*! \endcond */
1351    //**********************************************************************************************
1352 
1353    //**Subtraction assignment to sparse vectors****************************************************
1354    // No special implementation for the subtraction assignment to sparse vectors.
1355    //**********************************************************************************************
1356 
1357    //**Multiplication assignment to dense vectors**************************************************
1358    /*!\brief Multiplication assignment of a transpose sparse vector-dense matrix multiplication
1359    //        to a dense vector (\f$ \vec{y}^T*=\vec{x}^T*A \f$).
1360    // \ingroup dense_vector
1361    //
1362    // \param lhs The target left-hand side dense vector.
1363    // \param rhs The right-hand side multiplication expression to be multiplied.
1364    // \return void
1365    //
1366    // This function implements the performance optimized multiplication assignment of a transpose
1367    // sparse vector-dense matrix multiplication expression to a dense vector.
1368    */
1369    template< typename VT2 >  // Type of the target dense vector
multAssign(DenseVector<VT2,true> & lhs,const TSVecDMatMultExpr & rhs)1370    friend inline void multAssign( DenseVector<VT2,true>& lhs, const TSVecDMatMultExpr& rhs )
1371    {
1372       BLAZE_FUNCTION_TRACE;
1373 
1374       BLAZE_CONSTRAINT_MUST_BE_DENSE_VECTOR_TYPE( ResultType );
1375       BLAZE_CONSTRAINT_MUST_BE_ROW_VECTOR_TYPE( ResultType );
1376       BLAZE_CONSTRAINT_MUST_NOT_REQUIRE_EVALUATION( ResultType );
1377 
1378       BLAZE_INTERNAL_ASSERT( (*lhs).size() == rhs.size(), "Invalid vector sizes" );
1379 
1380       const ResultType tmp( serial( rhs ) );
1381       multAssign( *lhs, tmp );
1382    }
1383    //**********************************************************************************************
1384 
1385    //**Multiplication assignment to sparse vectors*************************************************
1386    // No special implementation for the multiplication assignment to sparse vectors.
1387    //**********************************************************************************************
1388 
1389    //**Division assignment to dense vectors********************************************************
1390    /*!\brief Division assignment of a transpose sparse vector-dense matrix multiplication to a
1391    //        dense vector (\f$ \vec{y}^T/=\vec{x}^T*A \f$).
1392    // \ingroup dense_vector
1393    //
1394    // \param lhs The target left-hand side dense vector.
1395    // \param rhs The right-hand side multiplication expression divisor.
1396    // \return void
1397    //
1398    // This function implements the performance optimized division assignment of a transpose sparse
1399    // vector-dense matrix multiplication expression to a dense vector.
1400    */
1401    template< typename VT2 >  // Type of the target dense vector
divAssign(DenseVector<VT2,true> & lhs,const TSVecDMatMultExpr & rhs)1402    friend inline void divAssign( DenseVector<VT2,true>& lhs, const TSVecDMatMultExpr& rhs )
1403    {
1404       BLAZE_FUNCTION_TRACE;
1405 
1406       BLAZE_CONSTRAINT_MUST_BE_DENSE_VECTOR_TYPE( ResultType );
1407       BLAZE_CONSTRAINT_MUST_BE_ROW_VECTOR_TYPE( ResultType );
1408       BLAZE_CONSTRAINT_MUST_NOT_REQUIRE_EVALUATION( ResultType );
1409 
1410       BLAZE_INTERNAL_ASSERT( (*lhs).size() == rhs.size(), "Invalid vector sizes" );
1411 
1412       const ResultType tmp( serial( rhs ) );
1413       divAssign( *lhs, tmp );
1414    }
1415    //**********************************************************************************************
1416 
1417    //**Division assignment to sparse vectors*******************************************************
1418    // No special implementation for the division assignment to sparse vectors.
1419    //**********************************************************************************************
1420 
1421    //**SMP assignment to dense vectors*************************************************************
1422    /*! \cond BLAZE_INTERNAL */
1423    /*!\brief SMP assignment of a transpose sparse vector-dense matrix multiplication to a dense
1424    //        vector (\f$ \vec{y}^T=\vec{x}^T*A \f$).
1425    // \ingroup dense_vector
1426    //
1427    // \param lhs The target left-hand side dense vector.
1428    // \param rhs The right-hand side multiplication expression to be assigned.
1429    // \return void
1430    //
1431    // This function implements the performance optimized SMP assignment of a transpose sparse
1432    // vector-dense matrix multiplication expression to a dense vector. Due to the explicit
1433    // application of the SFINAE principle, this function can only be selected by the compiler
1434    // in case the expression specific parallel evaluation strategy is selected.
1435    */
1436    template< typename VT2 >  // Type of the target dense vector
1437    friend inline auto smpAssign( DenseVector<VT2,true>& lhs, const TSVecDMatMultExpr& rhs )
1438       -> EnableIf_t< UseSMPAssign_v<VT2> >
1439    {
1440       BLAZE_FUNCTION_TRACE;
1441 
1442       BLAZE_INTERNAL_ASSERT( (*lhs).size() == rhs.size(), "Invalid vector sizes" );
1443 
1444       // Evaluation of the left-hand side sparse vector operand
1445       LT x( rhs.vec_ );
1446       if( x.nonZeros() == 0UL ) {
1447          reset( *lhs );
1448          return;
1449       }
1450 
1451       // Evaluation of the right-hand side dense matrix operand
1452       RT A( rhs.mat_ );
1453 
1454       // Checking the evaluated operands
1455       BLAZE_INTERNAL_ASSERT( x.size()    == rhs.vec_.size()   , "Invalid vector size"       );
1456       BLAZE_INTERNAL_ASSERT( A.rows()    == rhs.mat_.rows()   , "Invalid number of rows"    );
1457       BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
1458       BLAZE_INTERNAL_ASSERT( A.columns() == (*lhs).size()     , "Invalid vector size"       );
1459 
1460       // Performing the sparse vector-dense matrix multiplication
1461       smpAssign( *lhs, x * A );
1462    }
1463    /*! \endcond */
1464    //**********************************************************************************************
1465 
1466    //**SMP assignment to sparse vectors************************************************************
1467    /*! \cond BLAZE_INTERNAL */
1468    /*!\brief SMP assignment of a transpose sparse vector-dense matrix multiplication to a sparse
1469    //        vector (\f$ \vec{y}^T=\vec{x}^T*A \f$).
1470    // \ingroup dense_vector
1471    //
1472    // \param lhs The target left-hand side sparse vector.
1473    // \param rhs The right-hand side multiplication expression to be assigned.
1474    // \return void
1475    //
1476    // This function implements the performance optimized SMP assignment of a transpose sparse
1477    // vector-dense matrix multiplication expression to a sparse vector. Due to the explicit
1478    // application of the SFINAE principle, this function can only be selected by the compiler
1479    // in case the expression specific parallel evaluation strategy is selected.
1480    */
1481    template< typename VT2 >  // Type of the target sparse vector
1482    friend inline auto smpAssign( SparseVector<VT2,true>& lhs, const TSVecDMatMultExpr& rhs )
1483       -> EnableIf_t< UseSMPAssign_v<VT2> >
1484    {
1485       BLAZE_FUNCTION_TRACE;
1486 
1487       BLAZE_CONSTRAINT_MUST_BE_DENSE_VECTOR_TYPE( ResultType );
1488       BLAZE_CONSTRAINT_MUST_BE_ROW_VECTOR_TYPE( ResultType );
1489       BLAZE_CONSTRAINT_MUST_NOT_REQUIRE_EVALUATION( ResultType );
1490 
1491       BLAZE_INTERNAL_ASSERT( (*lhs).size() == rhs.size(), "Invalid vector sizes" );
1492 
1493       const ResultType tmp( rhs );
1494       smpAssign( *lhs, tmp );
1495    }
1496    /*! \endcond */
1497    //**********************************************************************************************
1498 
1499    //**SMP addition assignment to dense vectors****************************************************
1500    /*!\brief SMP addition assignment of a transpose sparse vector-dense matrix multiplication to
1501    //        a dense vector (\f$ \vec{y}^T+=\vec{x}^T*A \f$).
1502    // \ingroup dense_vector
1503    //
1504    // \param lhs The target left-hand side dense vector.
1505    // \param rhs The right-hand side multiplication expression to be added.
1506    // \return void
1507    //
1508    // This function implements the performance optimized SMP addition assignment of a transpose
1509    // sparse vector-dense matrix multiplication expression to a dense vector. Due to the explicit
1510    // application of the SFINAE principle, this function can only be selected by the compiler
1511    // in case the expression specific parallel evaluation strategy is selected.
1512    */
1513    template< typename VT2 >  // Type of the target dense vector
1514    friend inline auto smpAddAssign( DenseVector<VT2,true>& lhs, const TSVecDMatMultExpr& rhs )
1515       -> EnableIf_t< UseSMPAssign_v<VT2> >
1516    {
1517       BLAZE_FUNCTION_TRACE;
1518 
1519       BLAZE_INTERNAL_ASSERT( (*lhs).size() == rhs.size(), "Invalid vector sizes" );
1520 
1521       // Evaluation of the left-hand side sparse vector operand
1522       LT x( rhs.vec_ );
1523       if( x.nonZeros() == 0UL ) return;
1524 
1525       // Evaluation of the right-hand side dense matrix operand
1526       RT A( rhs.mat_ );
1527 
1528       // Checking the evaluated operands
1529       BLAZE_INTERNAL_ASSERT( x.size()    == rhs.vec_.size()   , "Invalid vector size"       );
1530       BLAZE_INTERNAL_ASSERT( A.rows()    == rhs.mat_.rows()   , "Invalid number of rows"    );
1531       BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
1532       BLAZE_INTERNAL_ASSERT( A.columns() == (*lhs).size()     , "Invalid vector size"       );
1533 
1534       // Performing the sparse vector-dense matrix multiplication
1535       smpAddAssign( *lhs, x * A );
1536    }
1537    //**********************************************************************************************
1538 
1539    //**SMP addition assignment to sparse vectors***************************************************
1540    // No special implementation for the SMP addition assignment to sparse vectors.
1541    //**********************************************************************************************
1542 
1543    //**SMP subtraction assignment to dense vectors*************************************************
1544    /*!\brief SMP subtraction assignment of a transpose sparse vector-dense matrix multiplication
1545    //        to a dense vector (\f$ \vec{y}^T-=\vec{x}^T*A \f$).
1546    // \ingroup dense_vector
1547    //
1548    // \param lhs The target left-hand side dense vector.
1549    // \param rhs The right-hand side multiplication expression to be subtracted.
1550    // \return void
1551    //
1552    // This function implements the performance optimized SMP subtraction assignment of a transpose
1553    // sparse vector-dense matrix multiplication expression to a dense vector. Due to the explicit
1554    // application of the SFINAE principle, this function can only be selected by the compiler
1555    // in case the expression specific parallel evaluation strategy is selected.
1556    */
1557    template< typename VT2 >  // Type of the target dense vector
1558    friend inline auto smpSubAssign( DenseVector<VT2,true>& lhs, const TSVecDMatMultExpr& rhs )
1559       -> EnableIf_t< UseSMPAssign_v<VT2> >
1560    {
1561       BLAZE_FUNCTION_TRACE;
1562 
1563       BLAZE_INTERNAL_ASSERT( (*lhs).size() == rhs.size(), "Invalid vector sizes" );
1564 
1565       // Evaluation of the left-hand side sparse vector operand
1566       LT x( rhs.vec_ );
1567       if( x.nonZeros() == 0UL ) return;
1568 
1569       // Evaluation of the right-hand side dense matrix operand
1570       RT A( rhs.mat_ );
1571 
1572       // Checking the evaluated operands
1573       BLAZE_INTERNAL_ASSERT( x.size()    == rhs.vec_.size()   , "Invalid vector size"       );
1574       BLAZE_INTERNAL_ASSERT( A.rows()    == rhs.mat_.rows()   , "Invalid number of rows"    );
1575       BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
1576       BLAZE_INTERNAL_ASSERT( A.columns() == (*lhs).size()     , "Invalid vector size"       );
1577 
1578       // Performing the sparse vector-dense matrix multiplication
1579       smpSubAssign( *lhs, x * A );
1580    }
1581    //**********************************************************************************************
1582 
1583    //**SMP subtraction assignment to sparse vectors************************************************
1584    // No special implementation for the SMP subtraction assignment to sparse vectors.
1585    //**********************************************************************************************
1586 
1587    //**SMP multiplication assignment to dense vectors**********************************************
1588    /*!\brief SMP multiplication assignment of a transpose sparse vector-dense matrix multiplication
1589    //        to a dense vector (\f$ \vec{y}^T*=\vec{x}^T*A \f$).
1590    // \ingroup dense_vector
1591    //
1592    // \param lhs The target left-hand side dense vector.
1593    // \param rhs The right-hand side multiplication expression to be multiplied.
1594    // \return void
1595    //
1596    // This function implements the performance optimized SMP multiplication assignment of a
1597    // transpose sparse vector-dense matrix multiplication expression to a dense vector. Due
1598    // to the explicit application of the SFINAE principle, this function can only be selected
1599    // by the compiler in case the expression specific parallel evaluation strategy is selected.
1600    */
1601    template< typename VT2 >  // Type of the target dense vector
1602    friend inline auto smpMultAssign( DenseVector<VT2,true>& lhs, const TSVecDMatMultExpr& rhs )
1603       -> EnableIf_t< UseSMPAssign_v<VT2> >
1604    {
1605       BLAZE_FUNCTION_TRACE;
1606 
1607       BLAZE_CONSTRAINT_MUST_BE_DENSE_VECTOR_TYPE( ResultType );
1608       BLAZE_CONSTRAINT_MUST_BE_ROW_VECTOR_TYPE( ResultType );
1609       BLAZE_CONSTRAINT_MUST_NOT_REQUIRE_EVALUATION( ResultType );
1610 
1611       BLAZE_INTERNAL_ASSERT( (*lhs).size() == rhs.size(), "Invalid vector sizes" );
1612 
1613       const ResultType tmp( rhs );
1614       smpMultAssign( *lhs, tmp );
1615    }
1616    //**********************************************************************************************
1617 
1618    //**SMP multiplication assignment to sparse vectors*********************************************
1619    // No special implementation for the SMP multiplication assignment to sparse vectors.
1620    //**********************************************************************************************
1621 
1622    //**SMP division assignment to dense vectors****************************************************
1623    /*!\brief SMP division assignment of a transpose sparse vector-dense matrix multiplication to
1624    //        a dense vector (\f$ \vec{y}^T/=\vec{x}^T*A \f$).
1625    // \ingroup dense_vector
1626    //
1627    // \param lhs The target left-hand side dense vector.
1628    // \param rhs The right-hand side multiplication expression divisor.
1629    // \return void
1630    //
1631    // This function implements the performance optimized SMP division assignment of a transpose
1632    // sparse vector-dense matrix multiplication expression to a dense vector. Due to the explicit
1633    // application of the SFINAE principle, this function can only be selected by the compiler in
1634    // case the expression specific parallel evaluation strategy is selected.
1635    */
1636    template< typename VT2 >  // Type of the target dense vector
1637    friend inline auto smpDivAssign( DenseVector<VT2,true>& lhs, const TSVecDMatMultExpr& rhs )
1638       -> EnableIf_t< UseSMPAssign_v<VT2> >
1639    {
1640       BLAZE_FUNCTION_TRACE;
1641 
1642       BLAZE_CONSTRAINT_MUST_BE_DENSE_VECTOR_TYPE( ResultType );
1643       BLAZE_CONSTRAINT_MUST_BE_ROW_VECTOR_TYPE( ResultType );
1644       BLAZE_CONSTRAINT_MUST_NOT_REQUIRE_EVALUATION( ResultType );
1645 
1646       BLAZE_INTERNAL_ASSERT( (*lhs).size() == rhs.size(), "Invalid vector sizes" );
1647 
1648       const ResultType tmp( rhs );
1649       smpDivAssign( *lhs, tmp );
1650    }
1651    //**********************************************************************************************
1652 
1653    //**SMP division assignment to sparse vectors***************************************************
1654    // No special implementation for the SMP division assignment to sparse vectors.
1655    //**********************************************************************************************
1656 
1657    //**Compile time checks*************************************************************************
1658    /*! \cond BLAZE_INTERNAL */
1659    BLAZE_CONSTRAINT_MUST_BE_SPARSE_VECTOR_TYPE( VT );
1660    BLAZE_CONSTRAINT_MUST_BE_ROW_VECTOR_TYPE( VT );
1661    BLAZE_CONSTRAINT_MUST_NOT_BE_ZERO_TYPE( VT );
1662    BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE( MT );
1663    BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE( MT );
1664    BLAZE_CONSTRAINT_MUST_FORM_VALID_TVECMATMULTEXPR( VT, MT );
1665    /*! \endcond */
1666    //**********************************************************************************************
1667 };
1668 //*************************************************************************************************
1669 
1670 
1671 
1672 
1673 //=================================================================================================
1674 //
1675 //  GLOBAL BINARY ARITHMETIC OPERATORS
1676 //
1677 //=================================================================================================
1678 
1679 //*************************************************************************************************
1680 /*! \cond BLAZE_INTERNAL */
1681 /*!\brief Backend implementation of the multiplication of a transpose sparse vector
1682 //        and a row-major dense matrix (\f$ \vec{a}=B*\vec{c} \f$).
1683 // \ingroup dense_vector
1684 //
1685 // \param vec The left-hand side transpose sparse vector for the multiplication.
1686 // \param mat The right-hand side row-major dense matrix for the multiplication.
1687 // \return The resulting transpose vector.
1688 //
1689 // This function implements the performance optimized treatment of the multiplication of a
1690 // transpose sparse vector and a row-major dense matrix.
1691 */
1692 template< typename VT  // Type of the left-hand side sparse vector
1693         , typename MT  // Type of the right-hand side dense matrix
1694         , DisableIf_t< IsZero_v<VT> >* = nullptr >
1695 inline const TSVecDMatMultExpr<VT,MT>
tsvecdmatmult(const SparseVector<VT,true> & vec,const DenseMatrix<MT,false> & mat)1696    tsvecdmatmult( const SparseVector<VT,true>& vec, const DenseMatrix<MT,false>& mat )
1697 {
1698    BLAZE_FUNCTION_TRACE;
1699 
1700    BLAZE_INTERNAL_ASSERT( (*vec).size() == (*mat).rows(), "Invalid vector and matrix sizes" );
1701 
1702    return TSVecDMatMultExpr<VT,MT>( *vec, *mat );
1703 }
1704 /*! \endcond */
1705 //*************************************************************************************************
1706 
1707 
1708 //*************************************************************************************************
1709 /*! \cond BLAZE_INTERNAL */
1710 /*!\brief Backend implementation of the multiplication of a transpose zero vector
1711 //        and a row-major dense matrix (\f$ \vec{a}=B*\vec{c} \f$).
1712 // \ingroup dense_vector
1713 //
1714 // \param vec The left-hand side transpose zero vector for the multiplication.
1715 // \param mat The right-hand side row-major dense matrix for the multiplication.
1716 // \return The resulting zero vector.
1717 //
1718 // This function implements the performance optimized treatment of the multiplication of a
1719 // transpose zero vector and a row-major dense matrix. It returns a zero vector.
1720 */
1721 template< typename VT  // Type of the left-hand side sparse vector
1722         , typename MT  // Type of the right-hand side dense matrix
1723         , EnableIf_t< IsZero_v<VT> >* = nullptr >
decltype(auto)1724 inline decltype(auto)
1725    tsvecdmatmult( const SparseVector<VT,true>& vec, const DenseMatrix<MT,false>& mat )
1726 {
1727    BLAZE_FUNCTION_TRACE;
1728 
1729    MAYBE_UNUSED( vec );
1730 
1731    BLAZE_INTERNAL_ASSERT( (*vec).size() == (*mat).rows(), "Invalid vector and matrix sizes" );
1732 
1733    using ReturnType = const MultTrait_t< ResultType_t<VT>, ResultType_t<MT> >;
1734 
1735    BLAZE_CONSTRAINT_MUST_BE_ROW_VECTOR_TYPE( ReturnType );
1736    BLAZE_CONSTRAINT_MUST_BE_ZERO_TYPE( ReturnType );
1737 
1738    return ReturnType( (*mat).columns() );
1739 }
1740 /*! \endcond */
1741 //*************************************************************************************************
1742 
1743 
1744 //*************************************************************************************************
1745 /*!\brief Multiplication operator for the multiplication of a transpose sparse vector and a
1746 //        row-major dense matrix (\f$ \vec{y}^T=\vec{x}^T*A \f$).
1747 // \ingroup dense_matrix
1748 //
1749 // \param vec The left-hand side transpose sparse vector for the multiplication.
1750 // \param mat The right-hand side row-major dense matrix for the multiplication.
1751 // \return The resulting transpose vector.
1752 // \exception std::invalid_argument Vector and matrix sizes do not match.
1753 //
1754 // This operator represents the multiplication between a transpose sparse vector and a row-major
1755 // dense matrix:
1756 
1757    \code
1758    using blaze::rowVector;
1759    using blaze::rowMajor;
1760 
1761    blaze::CompressedVector<double,rowVector> x, y;
1762    blaze::DynamicMatrix<double,rowMajor> A;
1763    // ... Resizing and initialization
1764    y = x * A;
1765    \endcode
1766 
1767 // The operator returns an expression representing a transpose sparse vector of the higher-order
1768 // element type of the two involved element types \a VT::ElementType and \a MT::ElementType.
1769 // Both the dense matrix type \a VT and the dense vector type \a MT as well as the two element
1770 // types \a VT::ElementType and \a MT::ElementType have to be supported by the MultTrait class
1771 // template.\n
1772 // In case the current size of the vector \a vec doesn't match the current number of rows of
1773 // the matrix \a mat, a \a std::invalid_argument is thrown.
1774 */
1775 template< typename VT    // Type of the left-hand side sparse vector
1776         , typename MT >  // Type of the right-hand side dense matrix
decltype(auto)1777 inline decltype(auto)
1778    operator*( const SparseVector<VT,true>& vec, const DenseMatrix<MT,false>& mat )
1779 {
1780    BLAZE_FUNCTION_TRACE;
1781 
1782    BLAZE_CONSTRAINT_MUST_NOT_BE_MATMATMULTEXPR_TYPE( MT );
1783 
1784    if( (*vec).size() != (*mat).rows() ) {
1785       BLAZE_THROW_INVALID_ARGUMENT( "Vector and matrix sizes do not match" );
1786    }
1787 
1788    return tsvecdmatmult( *vec, *mat );
1789 }
1790 //*************************************************************************************************
1791 
1792 
1793 
1794 
1795 //=================================================================================================
1796 //
1797 //  ISALIGNED SPECIALIZATIONS
1798 //
1799 //=================================================================================================
1800 
1801 //*************************************************************************************************
1802 /*! \cond BLAZE_INTERNAL */
1803 template< typename VT, typename MT >
1804 struct IsAligned< TSVecDMatMultExpr<VT,MT> >
1805    : public IsAligned<MT>
1806 {};
1807 /*! \endcond */
1808 //*************************************************************************************************
1809 
1810 } // namespace blaze
1811 
1812 #endif
1813