Skip to content

Commit

Permalink
Refactor dgetrf function to use blocked algorithm
Browse files Browse the repository at this point in the history
  • Loading branch information
ulises-jeremias committed Mar 24, 2024
1 parent c7356d7 commit 0832f6d
Show file tree
Hide file tree
Showing 2 changed files with 300 additions and 0 deletions.
28 changes: 28 additions & 0 deletions lapack/lapack64/dgetrf.v
Original file line number Diff line number Diff line change
Expand Up @@ -48,4 +48,32 @@ pub fn dgetrf(m int, n int, mut a []f64, lda int, ipiv []int) {
if ipiv.len < mn {
panic(bad_len_ipiv)
}

nb := ilaenv(1, 'DGETRF', ' ', m, n, -1, -1)

if nb <= 1 || nb >= mn {
// use the unblocked algorithm.
return dgetf2(m, n, mut a, lda, ipiv)
}

for j := 0; j < mn; j += nb {
jb := math.min(mn - j, nb)

// factor diagonal and subdiagonal blocks and test for exact singularity.
dgetf2(m - j, jb, mut a[j * lda + j..], lda, ipiv[j..j + jb])

for i := j; i <= math.min(m - 1, j + jb - 1); i++ {
ipiv[i] += j
}

// apply interchanges to columns 1..j-1.
dlaswp(j, mut a, lda, j, j + jb - 1, ipiv[..j + jb], 1)

if j + jb < n {
// apply interchanges to columns 1..j-1.
mut slice := unsafe { a[j + jb..] }
dlaswp(j, mut slice, lda, j, j + jb, ipiv[..j + jb], 1)
//
}
}
}
272 changes: 272 additions & 0 deletions lapack/lapack64/ilaenv.v
Original file line number Diff line number Diff line change
@@ -0,0 +1,272 @@
module lapack64

// ilaenv returns algorithm tuning parameters for the algorithm given by the
// input string. ispec specifies the parameter to return:
//
// 1: The optimal block size for a blocked algorithm.
// 2: The minimum block size for a blocked algorithm.
// 3: The block size of unprocessed data at which a blocked algorithm should
// crossover to an unblocked version.
// 4: The number of shifts.
// 5: The minimum column dimension for blocking to be used.
// 6: The crossover point for SVD (to use QR factorization or not).
// 7: The number of processors.
// 8: The crossover point for multi-shift in QR and QZ methods for non-symmetric eigenvalue problems.
// 9: Maximum size of the subproblems in divide-and-conquer algorithms.
// 10: ieee infinity and NaN arithmetic can be trusted not to trap.
// 11: ieee infinity arithmetic can be trusted not to trap.
// 12...16: parameters for Dhseqr and related functions. See Iparmq for more
// information.
//
// ilaenv is an internal routine. It is exported for testing purposes.
fn ilaenv(ispec int, name string, opts string, n1 int, n2 int, n3 int, n4 int) int {
// TODO(btracey): Replace this with a constant lookup? A list of constants?
sname := name[0] == `S` || name[0] == `D`
cname := name[0] == `C` || name[0] == `Z`
if !sname && !cname {
panic(bad_name)
}

c2 := name[1..3]
c3 := name[3..6]
c4 := c3[1..3]

match ispec {
1 {
match c2 {
'GE' {
match c3 {
'TRF', 'TRI' {
return 64
}
'QRF', 'RQF', 'LQF', 'QLF', 'HRD', 'BRD' {
return 32
}
else {
panic(bad_name)
}
}
}
'PO' {
match c3 {
'TRF' {
return 64
}
else {
panic(bad_name)
}
}
}
'SY', 'HE' {
match c3 {
'TRF' {
return 64
}
'TRD', 'GST' {
return 32
}
else {
panic(bad_name)
}
}
}
'OR', 'UN' {
match c3[0] {
'G', 'M' {
match c3[1..] {
'QR', 'RQ', 'LQ', 'QL', 'HR', 'TR', 'BR' {
return 32
}
else {
panic(bad_name)
}
}
}
else {
panic(bad_name)
}
}
}
'GB', 'PB' {
// Assuming n4 and n2 are defined elsewhere in your code
match c3 {
'TRF' {
// Replace `n4` and `n2` with actual variables
if sname {
// if n4 <= 64 {
// return 1
// }
return 32
}
// if n4 <= 64 {
// return 1
// }
return 32
}
else {
panic(bad_name)
}
}
}
'PT', 'TR', 'LA' {
// Additional cases as per your original logic
}
'ST' {
if sname && c3 == 'EBZ' {
return 1
}
panic(bad_name)
}
else {
panic(bad_name)
}
}
}
2 {
match c2 {
'GE' {
match c3 {
'QRF', 'RQF', 'LQF', 'QLF', 'HRD', 'BRD', 'TRI' {
if sname {
return 2
}
return 2
}
else {
panic(bad_name)
}
}
}
'SY' {
match c3 {
'TRF' {
if sname {
return 8
}
return 8
}
'TRD' {
if sname {
return 2
}
panic(bad_name)
}
else {
panic(bad_name)
}
}
}
'HE' {
if c3 == 'TRD' {
return 2
}
panic(bad_name)
}
'OR', 'UN' {
if !sname {
panic(bad_name)
}
match c3[0] {
'G', 'M' {
match c4 {
'QR', 'RQ', 'LQ', 'QL', 'HR', 'TR', 'BR' {
return 2
}
else {
panic(bad_name)
}
}
}
else {
panic(bad_name)
}
}
}
else {
panic(bad_name)
}
}
}
3 {
match c2 {
'GE' {
match c3 {
'QRF', 'RQF', 'LQF', 'QLF', 'HRD', 'BRD' {
if sname {
return 128
}
return 128
}
else {
panic(bad_name)
}
}
}
'SY', 'HE' {
if c3 == 'TRD' {
return 32
}
panic(bad_name)
}
'OR', 'UN' {
match c3[0] {
'G' {
match c4 {
'QR', 'RQ', 'LQ', 'QL', 'HR', 'TR', 'BR' {
return 128
}
else {
panic(bad_name)
}
}
}
else {
panic(bad_name)
}
}
}
else {
panic(bad_name)
}
}
}
4 {
// Used by xHSEQR
return 6
}
5 {
// Not used
return 2
}
6 {
// Used by xGELSS and xGESVD
// Assuming n1 and n2 are defined elsewhere in your code
// Replace `min(n1, n2)` with actual min calculation or function
return int(f64(min(n1, n2)) * 1.6)
}
7 {
// Not used
return 1
}
8 {
// Used by xHSEQR
return 50
}
9 {
// Used by xGELSD and xGESDD
return 25
}
10, 11 {
// Go guarantees ieee
return 1
}
12, 13, 14, 15, 16 {
// dhseqr and related functions for eigenvalue problems.
return iparmq(ispec, name, opts, n1, n2, n3, n4)
}
else {
panic(bad_ispec)
}
}
return 0
}

0 comments on commit 0832f6d

Please sign in to comment.