Enhanced KLU to avoid loosing computation cycles. The speedup is quite low, though: ~1%
This commit is contained in:
parent
15e732dbb4
commit
4a7dd29867
|
|
@ -119,7 +119,15 @@ static Int dfs
|
||||||
|
|
||||||
/* Finds the pattern of x, for the solution of Lx=b */
|
/* Finds the pattern of x, for the solution of Lx=b */
|
||||||
|
|
||||||
static Int lsolve_symbolic
|
/* ========================================================================== */
|
||||||
|
/* === construct_column ===================================================== */
|
||||||
|
/* ========================================================================== */
|
||||||
|
|
||||||
|
/* Construct the kth column of A, and the off-diagonal part, if requested.
|
||||||
|
* Scatter the numerical values into the workspace X, and construct the
|
||||||
|
* corresponding column of the off-diagonal matrix. */
|
||||||
|
|
||||||
|
static Int lsolve_symbolic_and_construct_column
|
||||||
(
|
(
|
||||||
/* input, not modified on output: */
|
/* input, not modified on output: */
|
||||||
Int n, /* L is n-by-n, where n >= 0 */
|
Int n, /* L is n-by-n, where n >= 0 */
|
||||||
|
|
@ -150,69 +158,10 @@ static Int lsolve_symbolic
|
||||||
/* ---- the following are only used in the BTF case --- */
|
/* ---- the following are only used in the BTF case --- */
|
||||||
|
|
||||||
Int k1, /* the block of A is from k1 to k2-1 */
|
Int k1, /* the block of A is from k1 to k2-1 */
|
||||||
Int PSinv [ ] /* inverse of P from symbolic factorization */
|
Int PSinv [ ], /* inverse of P from symbolic factorization */
|
||||||
)
|
|
||||||
{
|
|
||||||
Int *Lik ;
|
|
||||||
Int i, p, pend, oldcol, kglobal, top, l_length ;
|
|
||||||
|
|
||||||
top = n ;
|
|
||||||
l_length = 0 ;
|
|
||||||
Lik = (Int *) (LU + lup);
|
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
|
||||||
/* BTF factorization of A (k1:k2-1, k1:k2-1) */
|
|
||||||
/* ---------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
kglobal = k + k1 ; /* column k of the block is col kglobal of A */
|
|
||||||
oldcol = Q [kglobal] ; /* Q must be present for BTF case */
|
|
||||||
pend = Ap [oldcol+1] ;
|
|
||||||
for (p = Ap [oldcol] ; p < pend ; p++)
|
|
||||||
{
|
|
||||||
i = PSinv [Ai [p]] - k1 ;
|
|
||||||
if (i < 0) continue ; /* skip entry outside the block */
|
|
||||||
|
|
||||||
/* (i,k) is an entry in the block. start a DFS at node i */
|
|
||||||
PRINTF (("\n ===== DFS at node %d in b, inew: %d\n", i, Pinv [i])) ;
|
|
||||||
if (Flag [i] != k)
|
|
||||||
{
|
|
||||||
if (Pinv [i] >= 0)
|
|
||||||
{
|
|
||||||
top = dfs (i, k, Pinv, Llen, Lip, Stack, Flag,
|
|
||||||
Lpend, top, LU, Lik, &l_length, Ap_pos) ;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
/* i is not pivotal, and not flagged. Flag and put in L */
|
|
||||||
Flag [i] = k ;
|
|
||||||
Lik [l_length] = i ;
|
|
||||||
l_length++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* If Llen [k] is zero, the matrix is structurally singular */
|
|
||||||
Llen [k] = l_length ;
|
|
||||||
return (top) ;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/* ========================================================================== */
|
|
||||||
/* === construct_column ===================================================== */
|
|
||||||
/* ========================================================================== */
|
|
||||||
|
|
||||||
/* Construct the kth column of A, and the off-diagonal part, if requested.
|
|
||||||
* Scatter the numerical values into the workspace X, and construct the
|
|
||||||
* corresponding column of the off-diagonal matrix. */
|
|
||||||
|
|
||||||
static void construct_column
|
|
||||||
(
|
|
||||||
/* inputs, not modified on output */
|
/* inputs, not modified on output */
|
||||||
Int k, /* the column of A (or the column of the block) to get */
|
|
||||||
Int Ap [ ],
|
|
||||||
Int Ai [ ],
|
|
||||||
Entry Ax [ ],
|
Entry Ax [ ],
|
||||||
Int Q [ ], /* column pre-ordering */
|
|
||||||
|
|
||||||
/* zero on input, modified on output */
|
/* zero on input, modified on output */
|
||||||
Entry X [ ],
|
Entry X [ ],
|
||||||
|
|
@ -220,8 +169,6 @@ static void construct_column
|
||||||
/* ---- the following are only used in the BTF case --- */
|
/* ---- the following are only used in the BTF case --- */
|
||||||
|
|
||||||
/* inputs, not modified on output */
|
/* inputs, not modified on output */
|
||||||
Int k1, /* the block of A is from k1 to k2-1 */
|
|
||||||
Int PSinv [ ], /* inverse of P from symbolic factorization */
|
|
||||||
double Rs [ ], /* scale factors for A */
|
double Rs [ ], /* scale factors for A */
|
||||||
Int scale, /* 0: no scaling, nonzero: scale the rows with Rs */
|
Int scale, /* 0: no scaling, nonzero: scale the rows with Rs */
|
||||||
|
|
||||||
|
|
@ -232,27 +179,36 @@ static void construct_column
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
Entry aik ;
|
Entry aik ;
|
||||||
Int i, p, pend, oldcol, kglobal, poff, oldrow ;
|
Int *Lik ;
|
||||||
|
Int i, p, pend, oldcol, kglobal, poff, oldrow, top, l_length ;
|
||||||
|
|
||||||
|
top = n ;
|
||||||
|
l_length = 0 ;
|
||||||
|
Lik = (Int *) (LU + lup);
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
/* ---------------------------------------------------------------------- */
|
||||||
/* Scale and scatter the column into X. */
|
/* BTF factorization of A (k1:k2-1, k1:k2-1) */
|
||||||
/* ---------------------------------------------------------------------- */
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
kglobal = k + k1 ; /* column k of the block is col kglobal of A */
|
kglobal = k + k1 ; /* column k of the block is col kglobal of A */
|
||||||
poff = Offp [kglobal] ; /* start of off-diagonal column */
|
poff = Offp [kglobal] ; /* start of off-diagonal column */
|
||||||
oldcol = Q [kglobal] ;
|
oldcol = Q [kglobal] ; /* Q must be present for BTF case */
|
||||||
pend = Ap [oldcol+1] ;
|
pend = Ap [oldcol+1] ;
|
||||||
|
|
||||||
if (scale <= 0)
|
if (scale <= 0)
|
||||||
{
|
{
|
||||||
/* no scaling */
|
|
||||||
for (p = Ap [oldcol] ; p < pend ; p++)
|
for (p = Ap [oldcol] ; p < pend ; p++)
|
||||||
{
|
{
|
||||||
oldrow = Ai [p] ;
|
oldrow = Ai [p] ;
|
||||||
i = PSinv [oldrow] - k1 ;
|
i = PSinv [oldrow] - k1 ;
|
||||||
aik = Ax [p] ;
|
|
||||||
if (i < 0)
|
if (i < 0)
|
||||||
{
|
{
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
/* Scatter the column into X. */
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
aik = Ax [p] ;
|
||||||
|
|
||||||
/* this is an entry in the off-diagonal part */
|
/* this is an entry in the off-diagonal part */
|
||||||
Offi [poff] = oldrow ;
|
Offi [poff] = oldrow ;
|
||||||
Offx [poff] = aik ;
|
Offx [poff] = aik ;
|
||||||
|
|
@ -260,6 +216,31 @@ static void construct_column
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
/* (i,k) is an entry in the block. start a DFS at node i */
|
||||||
|
PRINTF (("\n ===== DFS at node %d in b, inew: %d\n", i, Pinv [i])) ;
|
||||||
|
if (Flag [i] != k)
|
||||||
|
{
|
||||||
|
if (Pinv [i] >= 0)
|
||||||
|
{
|
||||||
|
top = dfs (i, k, Pinv, Llen, Lip, Stack, Flag,
|
||||||
|
Lpend, top, LU, Lik, &l_length, Ap_pos) ;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* i is not pivotal, and not flagged. Flag and put in L */
|
||||||
|
Flag [i] = k ;
|
||||||
|
Lik [l_length] = i ;
|
||||||
|
l_length++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
/* Scatter the column into X. */
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
/* no scaling */
|
||||||
|
aik = Ax [p] ;
|
||||||
|
|
||||||
/* (i,k) is an entry in the block. scatter into X */
|
/* (i,k) is an entry in the block. scatter into X */
|
||||||
X [i] = aik ;
|
X [i] = aik ;
|
||||||
}
|
}
|
||||||
|
|
@ -267,15 +248,20 @@ static void construct_column
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
/* row scaling */
|
|
||||||
for (p = Ap [oldcol] ; p < pend ; p++)
|
for (p = Ap [oldcol] ; p < pend ; p++)
|
||||||
{
|
{
|
||||||
oldrow = Ai [p] ;
|
oldrow = Ai [p] ;
|
||||||
i = PSinv [oldrow] - k1 ;
|
i = PSinv [oldrow] - k1 ;
|
||||||
aik = Ax [p] ;
|
|
||||||
SCALE_DIV (aik, Rs [oldrow]) ;
|
|
||||||
if (i < 0)
|
if (i < 0)
|
||||||
{
|
{
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
/* Scale and scatter the column into X. */
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
/* row scaling */
|
||||||
|
aik = Ax [p] ;
|
||||||
|
SCALE_DIV (aik, Rs [oldrow]) ;
|
||||||
|
|
||||||
/* this is an entry in the off-diagonal part */
|
/* this is an entry in the off-diagonal part */
|
||||||
Offi [poff] = oldrow ;
|
Offi [poff] = oldrow ;
|
||||||
Offx [poff] = aik ;
|
Offx [poff] = aik ;
|
||||||
|
|
@ -283,6 +269,32 @@ static void construct_column
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
/* (i,k) is an entry in the block. start a DFS at node i */
|
||||||
|
PRINTF (("\n ===== DFS at node %d in b, inew: %d\n", i, Pinv [i])) ;
|
||||||
|
if (Flag [i] != k)
|
||||||
|
{
|
||||||
|
if (Pinv [i] >= 0)
|
||||||
|
{
|
||||||
|
top = dfs (i, k, Pinv, Llen, Lip, Stack, Flag,
|
||||||
|
Lpend, top, LU, Lik, &l_length, Ap_pos) ;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* i is not pivotal, and not flagged. Flag and put in L */
|
||||||
|
Flag [i] = k ;
|
||||||
|
Lik [l_length] = i ;
|
||||||
|
l_length++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
/* Scale and scatter the column into X. */
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
/* row scaling */
|
||||||
|
aik = Ax [p] ;
|
||||||
|
SCALE_DIV (aik, Rs [oldrow]) ;
|
||||||
|
|
||||||
/* (i,k) is an entry in the block. scatter into X */
|
/* (i,k) is an entry in the block. scatter into X */
|
||||||
X [i] = aik ;
|
X [i] = aik ;
|
||||||
}
|
}
|
||||||
|
|
@ -290,6 +302,10 @@ static void construct_column
|
||||||
}
|
}
|
||||||
|
|
||||||
Offp [kglobal+1] = poff ; /* start of the next col of off-diag part */
|
Offp [kglobal+1] = poff ; /* start of the next col of off-diag part */
|
||||||
|
|
||||||
|
/* If Llen [k] is zero, the matrix is structurally singular */
|
||||||
|
Llen [k] = l_length ;
|
||||||
|
return (top) ;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -802,9 +818,15 @@ size_t KLU_kernel /* final size of LU on output */
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
top = lsolve_symbolic (n, k, Ap, Ai, Q, Pinv, Stack, Flag,
|
/* Francesco - Compressed 'lsolve_symbolic' and 'cosntruct_column' in one routine */
|
||||||
Lpend, Ap_pos, LU, lup, Llen, Lip, k1, PSinv) ;
|
top = lsolve_symbolic_and_construct_column (n, k, Ap, Ai, Q, Pinv, Stack, Flag, Lpend,
|
||||||
|
Ap_pos, LU, lup, Llen, Lip, k1, PSinv, Ax, X, Rs, scale, Offp, Offi, Offx) ;
|
||||||
|
|
||||||
|
/* ------------------------------------------------------------------ */
|
||||||
|
/* get the column of the matrix to factorize and scatter into X */
|
||||||
|
/* ------------------------------------------------------------------ */
|
||||||
|
|
||||||
|
/*
|
||||||
#ifndef NDEBUG
|
#ifndef NDEBUG
|
||||||
PRINTF (("--- in U:\n")) ;
|
PRINTF (("--- in U:\n")) ;
|
||||||
for (p = top ; p < n ; p++)
|
for (p = top ; p < n ; p++)
|
||||||
|
|
@ -828,13 +850,7 @@ size_t KLU_kernel /* final size of LU on output */
|
||||||
if (Flag [i] == k) p++ ;
|
if (Flag [i] == k) p++ ;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
*/
|
||||||
/* ------------------------------------------------------------------ */
|
|
||||||
/* get the column of the matrix to factorize and scatter into X */
|
|
||||||
/* ------------------------------------------------------------------ */
|
|
||||||
|
|
||||||
construct_column (k, Ap, Ai, Ax, Q, X,
|
|
||||||
k1, PSinv, Rs, scale, Offp, Offi, Offx) ;
|
|
||||||
|
|
||||||
/* ------------------------------------------------------------------ */
|
/* ------------------------------------------------------------------ */
|
||||||
/* compute the numerical values of the kth column (s = L \ A (:,k)) */
|
/* compute the numerical values of the kth column (s = L \ A (:,k)) */
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue