Enhanced KLU to avoid loosing computation cycles. The speedup is quite low, though: ~1%

This commit is contained in:
Francesco Lannutti 2016-03-26 20:25:54 +01:00 committed by rlar
parent 0441fbf3fa
commit ddc75e57b5
1 changed files with 97 additions and 81 deletions

View File

@ -119,7 +119,15 @@ static Int dfs
/* Finds the pattern of x, for the solution of Lx=b */
static Int lsolve_symbolic
/* ========================================================================== */
/* === construct_column ===================================================== */
/* ========================================================================== */
/* Construct the kth column of A, and the off-diagonal part, if requested.
* Scatter the numerical values into the workspace X, and construct the
* corresponding column of the off-diagonal matrix. */
static Int lsolve_symbolic_and_construct_column
(
/* input, not modified on output: */
Int n, /* L is n-by-n, where n >= 0 */
@ -150,69 +158,10 @@ static Int lsolve_symbolic
/* ---- the following are only used in the BTF case --- */
Int k1, /* the block of A is from k1 to k2-1 */
Int PSinv [ ] /* inverse of P from symbolic factorization */
)
{
Int *Lik ;
Int i, p, pend, oldcol, kglobal, top, l_length ;
Int PSinv [ ], /* inverse of P from symbolic factorization */
top = n ;
l_length = 0 ;
Lik = (Int *) (LU + lup);
/* ---------------------------------------------------------------------- */
/* BTF factorization of A (k1:k2-1, k1:k2-1) */
/* ---------------------------------------------------------------------- */
kglobal = k + k1 ; /* column k of the block is col kglobal of A */
oldcol = Q [kglobal] ; /* Q must be present for BTF case */
pend = Ap [oldcol+1] ;
for (p = Ap [oldcol] ; p < pend ; p++)
{
i = PSinv [Ai [p]] - k1 ;
if (i < 0) continue ; /* skip entry outside the block */
/* (i,k) is an entry in the block. start a DFS at node i */
PRINTF (("\n ===== DFS at node %d in b, inew: %d\n", i, Pinv [i])) ;
if (Flag [i] != k)
{
if (Pinv [i] >= 0)
{
top = dfs (i, k, Pinv, Llen, Lip, Stack, Flag,
Lpend, top, LU, Lik, &l_length, Ap_pos) ;
}
else
{
/* i is not pivotal, and not flagged. Flag and put in L */
Flag [i] = k ;
Lik [l_length] = i ;
l_length++;
}
}
}
/* If Llen [k] is zero, the matrix is structurally singular */
Llen [k] = l_length ;
return (top) ;
}
/* ========================================================================== */
/* === construct_column ===================================================== */
/* ========================================================================== */
/* Construct the kth column of A, and the off-diagonal part, if requested.
* Scatter the numerical values into the workspace X, and construct the
* corresponding column of the off-diagonal matrix. */
static void construct_column
(
/* inputs, not modified on output */
Int k, /* the column of A (or the column of the block) to get */
Int Ap [ ],
Int Ai [ ],
Entry Ax [ ],
Int Q [ ], /* column pre-ordering */
/* zero on input, modified on output */
Entry X [ ],
@ -220,8 +169,6 @@ static void construct_column
/* ---- the following are only used in the BTF case --- */
/* inputs, not modified on output */
Int k1, /* the block of A is from k1 to k2-1 */
Int PSinv [ ], /* inverse of P from symbolic factorization */
double Rs [ ], /* scale factors for A */
Int scale, /* 0: no scaling, nonzero: scale the rows with Rs */
@ -232,27 +179,36 @@ static void construct_column
)
{
Entry aik ;
Int i, p, pend, oldcol, kglobal, poff, oldrow ;
Int *Lik ;
Int i, p, pend, oldcol, kglobal, poff, oldrow, top, l_length ;
top = n ;
l_length = 0 ;
Lik = (Int *) (LU + lup);
/* ---------------------------------------------------------------------- */
/* Scale and scatter the column into X. */
/* BTF factorization of A (k1:k2-1, k1:k2-1) */
/* ---------------------------------------------------------------------- */
kglobal = k + k1 ; /* column k of the block is col kglobal of A */
kglobal = k + k1 ; /* column k of the block is col kglobal of A */
poff = Offp [kglobal] ; /* start of off-diagonal column */
oldcol = Q [kglobal] ;
oldcol = Q [kglobal] ; /* Q must be present for BTF case */
pend = Ap [oldcol+1] ;
if (scale <= 0)
{
/* no scaling */
for (p = Ap [oldcol] ; p < pend ; p++)
{
oldrow = Ai [p] ;
i = PSinv [oldrow] - k1 ;
aik = Ax [p] ;
if (i < 0)
{
/* ---------------------------------------------------------------------- */
/* Scatter the column into X. */
/* ---------------------------------------------------------------------- */
aik = Ax [p] ;
/* this is an entry in the off-diagonal part */
Offi [poff] = oldrow ;
Offx [poff] = aik ;
@ -260,6 +216,31 @@ static void construct_column
}
else
{
/* (i,k) is an entry in the block. start a DFS at node i */
PRINTF (("\n ===== DFS at node %d in b, inew: %d\n", i, Pinv [i])) ;
if (Flag [i] != k)
{
if (Pinv [i] >= 0)
{
top = dfs (i, k, Pinv, Llen, Lip, Stack, Flag,
Lpend, top, LU, Lik, &l_length, Ap_pos) ;
}
else
{
/* i is not pivotal, and not flagged. Flag and put in L */
Flag [i] = k ;
Lik [l_length] = i ;
l_length++;
}
}
/* ---------------------------------------------------------------------- */
/* Scatter the column into X. */
/* ---------------------------------------------------------------------- */
/* no scaling */
aik = Ax [p] ;
/* (i,k) is an entry in the block. scatter into X */
X [i] = aik ;
}
@ -267,15 +248,20 @@ static void construct_column
}
else
{
/* row scaling */
for (p = Ap [oldcol] ; p < pend ; p++)
{
oldrow = Ai [p] ;
i = PSinv [oldrow] - k1 ;
aik = Ax [p] ;
SCALE_DIV (aik, Rs [oldrow]) ;
if (i < 0)
{
/* ---------------------------------------------------------------------- */
/* Scale and scatter the column into X. */
/* ---------------------------------------------------------------------- */
/* row scaling */
aik = Ax [p] ;
SCALE_DIV (aik, Rs [oldrow]) ;
/* this is an entry in the off-diagonal part */
Offi [poff] = oldrow ;
Offx [poff] = aik ;
@ -283,6 +269,32 @@ static void construct_column
}
else
{
/* (i,k) is an entry in the block. start a DFS at node i */
PRINTF (("\n ===== DFS at node %d in b, inew: %d\n", i, Pinv [i])) ;
if (Flag [i] != k)
{
if (Pinv [i] >= 0)
{
top = dfs (i, k, Pinv, Llen, Lip, Stack, Flag,
Lpend, top, LU, Lik, &l_length, Ap_pos) ;
}
else
{
/* i is not pivotal, and not flagged. Flag and put in L */
Flag [i] = k ;
Lik [l_length] = i ;
l_length++;
}
}
/* ---------------------------------------------------------------------- */
/* Scale and scatter the column into X. */
/* ---------------------------------------------------------------------- */
/* row scaling */
aik = Ax [p] ;
SCALE_DIV (aik, Rs [oldrow]) ;
/* (i,k) is an entry in the block. scatter into X */
X [i] = aik ;
}
@ -290,6 +302,10 @@ static void construct_column
}
Offp [kglobal+1] = poff ; /* start of the next col of off-diag part */
/* If Llen [k] is zero, the matrix is structurally singular */
Llen [k] = l_length ;
return (top) ;
}
@ -802,9 +818,15 @@ size_t KLU_kernel /* final size of LU on output */
}
#endif
top = lsolve_symbolic (n, k, Ap, Ai, Q, Pinv, Stack, Flag,
Lpend, Ap_pos, LU, lup, Llen, Lip, k1, PSinv) ;
/* Francesco - Compressed 'lsolve_symbolic' and 'cosntruct_column' in one routine */
top = lsolve_symbolic_and_construct_column (n, k, Ap, Ai, Q, Pinv, Stack, Flag, Lpend,
Ap_pos, LU, lup, Llen, Lip, k1, PSinv, Ax, X, Rs, scale, Offp, Offi, Offx) ;
/* ------------------------------------------------------------------ */
/* get the column of the matrix to factorize and scatter into X */
/* ------------------------------------------------------------------ */
/*
#ifndef NDEBUG
PRINTF (("--- in U:\n")) ;
for (p = top ; p < n ; p++)
@ -828,13 +850,7 @@ size_t KLU_kernel /* final size of LU on output */
if (Flag [i] == k) p++ ;
}
#endif
/* ------------------------------------------------------------------ */
/* get the column of the matrix to factorize and scatter into X */
/* ------------------------------------------------------------------ */
construct_column (k, Ap, Ai, Ax, Q, X,
k1, PSinv, Rs, scale, Offp, Offi, Offx) ;
*/
/* ------------------------------------------------------------------ */
/* compute the numerical values of the kth column (s = L \ A (:,k)) */