187 lines
4.7 KiB
Go
187 lines
4.7 KiB
Go
|
|
// Copyright ©2017 The Gonum Authors. All rights reserved.
|
|||
|
|
// Use of this source code is governed by a BSD-style
|
|||
|
|
// license that can be found in the LICENSE file.
|
|||
|
|
|
|||
|
|
package gonum
|
|||
|
|
|
|||
|
|
import (
|
|||
|
|
"gonum.org/v1/gonum/blas"
|
|||
|
|
"gonum.org/v1/gonum/blas/blas64"
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
// Dgeqp3 computes a QR factorization with column pivoting of the
|
|||
|
|
// m×n matrix A: A*P = Q*R using Level 3 BLAS.
|
|||
|
|
//
|
|||
|
|
// The matrix Q is represented as a product of elementary reflectors
|
|||
|
|
// Q = H_0 H_1 . . . H_{k-1}, where k = min(m,n).
|
|||
|
|
// Each H_i has the form
|
|||
|
|
// H_i = I - tau * v * v^T
|
|||
|
|
// where tau and v are real vectors with v[0:i-1] = 0 and v[i] = 1;
|
|||
|
|
// v[i:m] is stored on exit in A[i:m, i], and tau in tau[i].
|
|||
|
|
//
|
|||
|
|
// jpvt specifies a column pivot to be applied to A. If
|
|||
|
|
// jpvt[j] is at least zero, the jth column of A is permuted
|
|||
|
|
// to the front of A*P (a leading column), if jpvt[j] is -1
|
|||
|
|
// the jth column of A is a free column. If jpvt[j] < -1, Dgeqp3
|
|||
|
|
// will panic. On return, jpvt holds the permutation that was
|
|||
|
|
// applied; the jth column of A*P was the jpvt[j] column of A.
|
|||
|
|
// jpvt must have length n or Dgeqp3 will panic.
|
|||
|
|
//
|
|||
|
|
// tau holds the scalar factors of the elementary reflectors.
|
|||
|
|
// It must have length min(m, n), otherwise Dgeqp3 will panic.
|
|||
|
|
//
|
|||
|
|
// work must have length at least max(1,lwork), and lwork must be at least
|
|||
|
|
// 3*n+1, otherwise Dgeqp3 will panic. For optimal performance lwork must
|
|||
|
|
// be at least 2*n+(n+1)*nb, where nb is the optimal blocksize. On return,
|
|||
|
|
// work[0] will contain the optimal value of lwork.
|
|||
|
|
//
|
|||
|
|
// If lwork == -1, instead of performing Dgeqp3, only the optimal value of lwork
|
|||
|
|
// will be stored in work[0].
|
|||
|
|
//
|
|||
|
|
// Dgeqp3 is an internal routine. It is exported for testing purposes.
|
|||
|
|
func (impl Implementation) Dgeqp3(m, n int, a []float64, lda int, jpvt []int, tau, work []float64, lwork int) {
|
|||
|
|
const (
|
|||
|
|
inb = 1
|
|||
|
|
inbmin = 2
|
|||
|
|
ixover = 3
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
minmn := min(m, n)
|
|||
|
|
iws := 3*n + 1
|
|||
|
|
if minmn == 0 {
|
|||
|
|
iws = 1
|
|||
|
|
}
|
|||
|
|
switch {
|
|||
|
|
case m < 0:
|
|||
|
|
panic(mLT0)
|
|||
|
|
case n < 0:
|
|||
|
|
panic(nLT0)
|
|||
|
|
case lda < max(1, n):
|
|||
|
|
panic(badLdA)
|
|||
|
|
case lwork < iws && lwork != -1:
|
|||
|
|
panic(badLWork)
|
|||
|
|
case len(work) < max(1, lwork):
|
|||
|
|
panic(shortWork)
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Quick return if possible.
|
|||
|
|
if minmn == 0 {
|
|||
|
|
work[0] = 1
|
|||
|
|
return
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
nb := impl.Ilaenv(inb, "DGEQRF", " ", m, n, -1, -1)
|
|||
|
|
if lwork == -1 {
|
|||
|
|
work[0] = float64(2*n + (n+1)*nb)
|
|||
|
|
return
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
switch {
|
|||
|
|
case len(a) < (m-1)*lda+n:
|
|||
|
|
panic(shortA)
|
|||
|
|
case len(jpvt) != n:
|
|||
|
|
panic(badLenJpvt)
|
|||
|
|
case len(tau) < minmn:
|
|||
|
|
panic(shortTau)
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
for _, v := range jpvt {
|
|||
|
|
if v < -1 || n <= v {
|
|||
|
|
panic(badJpvt)
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
bi := blas64.Implementation()
|
|||
|
|
|
|||
|
|
// Move initial columns up front.
|
|||
|
|
var nfxd int
|
|||
|
|
for j := 0; j < n; j++ {
|
|||
|
|
if jpvt[j] == -1 {
|
|||
|
|
jpvt[j] = j
|
|||
|
|
continue
|
|||
|
|
}
|
|||
|
|
if j != nfxd {
|
|||
|
|
bi.Dswap(m, a[j:], lda, a[nfxd:], lda)
|
|||
|
|
jpvt[j], jpvt[nfxd] = jpvt[nfxd], j
|
|||
|
|
} else {
|
|||
|
|
jpvt[j] = j
|
|||
|
|
}
|
|||
|
|
nfxd++
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Factorize nfxd columns.
|
|||
|
|
//
|
|||
|
|
// Compute the QR factorization of nfxd columns and update remaining columns.
|
|||
|
|
if nfxd > 0 {
|
|||
|
|
na := min(m, nfxd)
|
|||
|
|
impl.Dgeqrf(m, na, a, lda, tau, work, lwork)
|
|||
|
|
iws = max(iws, int(work[0]))
|
|||
|
|
if na < n {
|
|||
|
|
impl.Dormqr(blas.Left, blas.Trans, m, n-na, na, a, lda, tau[:na], a[na:], lda,
|
|||
|
|
work, lwork)
|
|||
|
|
iws = max(iws, int(work[0]))
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
if nfxd >= minmn {
|
|||
|
|
work[0] = float64(iws)
|
|||
|
|
return
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Factorize free columns.
|
|||
|
|
sm := m - nfxd
|
|||
|
|
sn := n - nfxd
|
|||
|
|
sminmn := minmn - nfxd
|
|||
|
|
|
|||
|
|
// Determine the block size.
|
|||
|
|
nb = impl.Ilaenv(inb, "DGEQRF", " ", sm, sn, -1, -1)
|
|||
|
|
nbmin := 2
|
|||
|
|
nx := 0
|
|||
|
|
|
|||
|
|
if 1 < nb && nb < sminmn {
|
|||
|
|
// Determine when to cross over from blocked to unblocked code.
|
|||
|
|
nx = max(0, impl.Ilaenv(ixover, "DGEQRF", " ", sm, sn, -1, -1))
|
|||
|
|
|
|||
|
|
if nx < sminmn {
|
|||
|
|
// Determine if workspace is large enough for blocked code.
|
|||
|
|
minws := 2*sn + (sn+1)*nb
|
|||
|
|
iws = max(iws, minws)
|
|||
|
|
if lwork < minws {
|
|||
|
|
// Not enough workspace to use optimal nb. Reduce
|
|||
|
|
// nb and determine the minimum value of nb.
|
|||
|
|
nb = (lwork - 2*sn) / (sn + 1)
|
|||
|
|
nbmin = max(2, impl.Ilaenv(inbmin, "DGEQRF", " ", sm, sn, -1, -1))
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Initialize partial column norms.
|
|||
|
|
// The first n elements of work store the exact column norms.
|
|||
|
|
for j := nfxd; j < n; j++ {
|
|||
|
|
work[j] = bi.Dnrm2(sm, a[nfxd*lda+j:], lda)
|
|||
|
|
work[n+j] = work[j]
|
|||
|
|
}
|
|||
|
|
j := nfxd
|
|||
|
|
if nbmin <= nb && nb < sminmn && nx < sminmn {
|
|||
|
|
// Use blocked code initially.
|
|||
|
|
|
|||
|
|
// Compute factorization.
|
|||
|
|
var fjb int
|
|||
|
|
for topbmn := minmn - nx; j < topbmn; j += fjb {
|
|||
|
|
jb := min(nb, topbmn-j)
|
|||
|
|
|
|||
|
|
// Factorize jb columns among columns j:n.
|
|||
|
|
fjb = impl.Dlaqps(m, n-j, j, jb, a[j:], lda, jpvt[j:], tau[j:],
|
|||
|
|
work[j:n], work[j+n:2*n], work[2*n:2*n+jb], work[2*n+jb:], jb)
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Use unblocked code to factor the last or only block.
|
|||
|
|
if j < minmn {
|
|||
|
|
impl.Dlaqp2(m, n-j, j, a[j:], lda, jpvt[j:], tau[j:],
|
|||
|
|
work[j:n], work[j+n:2*n], work[2*n:])
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
work[0] = float64(iws)
|
|||
|
|
}
|