1// Copyright ©2015 The Gonum Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package gonum
6
7import (
8	"gonum.org/v1/gonum/blas"
9	"gonum.org/v1/gonum/lapack"
10)
11
12// Dormlq multiplies the matrix C by the orthogonal matrix Q defined by the
13// slices a and tau. A and tau are as returned from Dgelqf.
14//  C = Q * C    if side == blas.Left and trans == blas.NoTrans
15//  C = Q^T * C  if side == blas.Left and trans == blas.Trans
16//  C = C * Q    if side == blas.Right and trans == blas.NoTrans
17//  C = C * Q^T  if side == blas.Right and trans == blas.Trans
18// If side == blas.Left, A is a matrix of side k×m, and if side == blas.Right
19// A is of size k×n. This uses a blocked algorithm.
20//
21// work is temporary storage, and lwork specifies the usable memory length.
22// At minimum, lwork >= m if side == blas.Left and lwork >= n if side == blas.Right,
23// and this function will panic otherwise.
24// Dormlq uses a block algorithm, but the block size is limited
25// by the temporary space available. If lwork == -1, instead of performing Dormlq,
26// the optimal work length will be stored into work[0].
27//
28// tau contains the Householder scales and must have length at least k, and
29// this function will panic otherwise.
30func (impl Implementation) Dormlq(side blas.Side, trans blas.Transpose, m, n, k int, a []float64, lda int, tau, c []float64, ldc int, work []float64, lwork int) {
31	left := side == blas.Left
32	nw := m
33	if left {
34		nw = n
35	}
36	switch {
37	case !left && side != blas.Right:
38		panic(badSide)
39	case trans != blas.Trans && trans != blas.NoTrans:
40		panic(badTrans)
41	case m < 0:
42		panic(mLT0)
43	case n < 0:
44		panic(nLT0)
45	case k < 0:
46		panic(kLT0)
47	case left && k > m:
48		panic(kGTM)
49	case !left && k > n:
50		panic(kGTN)
51	case left && lda < max(1, m):
52		panic(badLdA)
53	case !left && lda < max(1, n):
54		panic(badLdA)
55	case lwork < max(1, nw) && lwork != -1:
56		panic(badLWork)
57	case len(work) < max(1, lwork):
58		panic(shortWork)
59	}
60
61	// Quick return if possible.
62	if m == 0 || n == 0 || k == 0 {
63		work[0] = 1
64		return
65	}
66
67	const (
68		nbmax = 64
69		ldt   = nbmax
70		tsize = nbmax * ldt
71	)
72	opts := string(side) + string(trans)
73	nb := min(nbmax, impl.Ilaenv(1, "DORMLQ", opts, m, n, k, -1))
74	lworkopt := max(1, nw)*nb + tsize
75	if lwork == -1 {
76		work[0] = float64(lworkopt)
77		return
78	}
79
80	switch {
81	case left && len(a) < (k-1)*lda+m:
82		panic(shortA)
83	case !left && len(a) < (k-1)*lda+n:
84		panic(shortA)
85	case len(tau) < k:
86		panic(shortTau)
87	case len(c) < (m-1)*ldc+n:
88		panic(shortC)
89	}
90
91	nbmin := 2
92	if 1 < nb && nb < k {
93		iws := nw*nb + tsize
94		if lwork < iws {
95			nb = (lwork - tsize) / nw
96			nbmin = max(2, impl.Ilaenv(2, "DORMLQ", opts, m, n, k, -1))
97		}
98	}
99	if nb < nbmin || k <= nb {
100		// Call unblocked code.
101		impl.Dorml2(side, trans, m, n, k, a, lda, tau, c, ldc, work)
102		work[0] = float64(lworkopt)
103		return
104	}
105
106	t := work[:tsize]
107	wrk := work[tsize:]
108	ldwrk := nb
109
110	notrans := trans == blas.NoTrans
111	transt := blas.NoTrans
112	if notrans {
113		transt = blas.Trans
114	}
115
116	switch {
117	case left && notrans:
118		for i := 0; i < k; i += nb {
119			ib := min(nb, k-i)
120			impl.Dlarft(lapack.Forward, lapack.RowWise, m-i, ib,
121				a[i*lda+i:], lda,
122				tau[i:],
123				t, ldt)
124			impl.Dlarfb(side, transt, lapack.Forward, lapack.RowWise, m-i, n, ib,
125				a[i*lda+i:], lda,
126				t, ldt,
127				c[i*ldc:], ldc,
128				wrk, ldwrk)
129		}
130
131	case left && !notrans:
132		for i := ((k - 1) / nb) * nb; i >= 0; i -= nb {
133			ib := min(nb, k-i)
134			impl.Dlarft(lapack.Forward, lapack.RowWise, m-i, ib,
135				a[i*lda+i:], lda,
136				tau[i:],
137				t, ldt)
138			impl.Dlarfb(side, transt, lapack.Forward, lapack.RowWise, m-i, n, ib,
139				a[i*lda+i:], lda,
140				t, ldt,
141				c[i*ldc:], ldc,
142				wrk, ldwrk)
143		}
144
145	case !left && notrans:
146		for i := ((k - 1) / nb) * nb; i >= 0; i -= nb {
147			ib := min(nb, k-i)
148			impl.Dlarft(lapack.Forward, lapack.RowWise, n-i, ib,
149				a[i*lda+i:], lda,
150				tau[i:],
151				t, ldt)
152			impl.Dlarfb(side, transt, lapack.Forward, lapack.RowWise, m, n-i, ib,
153				a[i*lda+i:], lda,
154				t, ldt,
155				c[i:], ldc,
156				wrk, ldwrk)
157		}
158
159	case !left && !notrans:
160		for i := 0; i < k; i += nb {
161			ib := min(nb, k-i)
162			impl.Dlarft(lapack.Forward, lapack.RowWise, n-i, ib,
163				a[i*lda+i:], lda,
164				tau[i:],
165				t, ldt)
166			impl.Dlarfb(side, transt, lapack.Forward, lapack.RowWise, m, n-i, ib,
167				a[i*lda+i:], lda,
168				t, ldt,
169				c[i:], ldc,
170				wrk, ldwrk)
171		}
172	}
173	work[0] = float64(lworkopt)
174}
175