From ddc75e57b5a8e9b7e3fef9f67820e7cb03f0d27e Mon Sep 17 00:00:00 2001 From: Francesco Lannutti Date: Sat, 26 Mar 2016 20:25:54 +0100 Subject: [PATCH] Enhanced KLU to avoid loosing computation cycles. The speedup is quite low, though: ~1% --- src/maths/KLU/klu_kernel.c | 178 ++++++++++++++++++++----------------- 1 file changed, 97 insertions(+), 81 deletions(-) diff --git a/src/maths/KLU/klu_kernel.c b/src/maths/KLU/klu_kernel.c index bfdbe80fe..c91a721ce 100644 --- a/src/maths/KLU/klu_kernel.c +++ b/src/maths/KLU/klu_kernel.c @@ -119,7 +119,15 @@ static Int dfs /* Finds the pattern of x, for the solution of Lx=b */ -static Int lsolve_symbolic +/* ========================================================================== */ +/* === construct_column ===================================================== */ +/* ========================================================================== */ + +/* Construct the kth column of A, and the off-diagonal part, if requested. + * Scatter the numerical values into the workspace X, and construct the + * corresponding column of the off-diagonal matrix. */ + +static Int lsolve_symbolic_and_construct_column ( /* input, not modified on output: */ Int n, /* L is n-by-n, where n >= 0 */ @@ -150,69 +158,10 @@ static Int lsolve_symbolic /* ---- the following are only used in the BTF case --- */ Int k1, /* the block of A is from k1 to k2-1 */ - Int PSinv [ ] /* inverse of P from symbolic factorization */ -) -{ - Int *Lik ; - Int i, p, pend, oldcol, kglobal, top, l_length ; + Int PSinv [ ], /* inverse of P from symbolic factorization */ - top = n ; - l_length = 0 ; - Lik = (Int *) (LU + lup); - - /* ---------------------------------------------------------------------- */ - /* BTF factorization of A (k1:k2-1, k1:k2-1) */ - /* ---------------------------------------------------------------------- */ - - kglobal = k + k1 ; /* column k of the block is col kglobal of A */ - oldcol = Q [kglobal] ; /* Q must be present for BTF case */ - pend = Ap [oldcol+1] ; - for (p = Ap [oldcol] ; p < pend ; p++) - { - i = PSinv [Ai [p]] - k1 ; - if (i < 0) continue ; /* skip entry outside the block */ - - /* (i,k) is an entry in the block. start a DFS at node i */ - PRINTF (("\n ===== DFS at node %d in b, inew: %d\n", i, Pinv [i])) ; - if (Flag [i] != k) - { - if (Pinv [i] >= 0) - { - top = dfs (i, k, Pinv, Llen, Lip, Stack, Flag, - Lpend, top, LU, Lik, &l_length, Ap_pos) ; - } - else - { - /* i is not pivotal, and not flagged. Flag and put in L */ - Flag [i] = k ; - Lik [l_length] = i ; - l_length++; - } - } - } - - /* If Llen [k] is zero, the matrix is structurally singular */ - Llen [k] = l_length ; - return (top) ; -} - - -/* ========================================================================== */ -/* === construct_column ===================================================== */ -/* ========================================================================== */ - -/* Construct the kth column of A, and the off-diagonal part, if requested. - * Scatter the numerical values into the workspace X, and construct the - * corresponding column of the off-diagonal matrix. */ - -static void construct_column -( /* inputs, not modified on output */ - Int k, /* the column of A (or the column of the block) to get */ - Int Ap [ ], - Int Ai [ ], Entry Ax [ ], - Int Q [ ], /* column pre-ordering */ /* zero on input, modified on output */ Entry X [ ], @@ -220,8 +169,6 @@ static void construct_column /* ---- the following are only used in the BTF case --- */ /* inputs, not modified on output */ - Int k1, /* the block of A is from k1 to k2-1 */ - Int PSinv [ ], /* inverse of P from symbolic factorization */ double Rs [ ], /* scale factors for A */ Int scale, /* 0: no scaling, nonzero: scale the rows with Rs */ @@ -232,27 +179,36 @@ static void construct_column ) { Entry aik ; - Int i, p, pend, oldcol, kglobal, poff, oldrow ; + Int *Lik ; + Int i, p, pend, oldcol, kglobal, poff, oldrow, top, l_length ; + + top = n ; + l_length = 0 ; + Lik = (Int *) (LU + lup); /* ---------------------------------------------------------------------- */ - /* Scale and scatter the column into X. */ + /* BTF factorization of A (k1:k2-1, k1:k2-1) */ /* ---------------------------------------------------------------------- */ - kglobal = k + k1 ; /* column k of the block is col kglobal of A */ + kglobal = k + k1 ; /* column k of the block is col kglobal of A */ poff = Offp [kglobal] ; /* start of off-diagonal column */ - oldcol = Q [kglobal] ; + oldcol = Q [kglobal] ; /* Q must be present for BTF case */ pend = Ap [oldcol+1] ; if (scale <= 0) { - /* no scaling */ for (p = Ap [oldcol] ; p < pend ; p++) { oldrow = Ai [p] ; i = PSinv [oldrow] - k1 ; - aik = Ax [p] ; if (i < 0) { + /* ---------------------------------------------------------------------- */ + /* Scatter the column into X. */ + /* ---------------------------------------------------------------------- */ + + aik = Ax [p] ; + /* this is an entry in the off-diagonal part */ Offi [poff] = oldrow ; Offx [poff] = aik ; @@ -260,6 +216,31 @@ static void construct_column } else { + /* (i,k) is an entry in the block. start a DFS at node i */ + PRINTF (("\n ===== DFS at node %d in b, inew: %d\n", i, Pinv [i])) ; + if (Flag [i] != k) + { + if (Pinv [i] >= 0) + { + top = dfs (i, k, Pinv, Llen, Lip, Stack, Flag, + Lpend, top, LU, Lik, &l_length, Ap_pos) ; + } + else + { + /* i is not pivotal, and not flagged. Flag and put in L */ + Flag [i] = k ; + Lik [l_length] = i ; + l_length++; + } + } + + /* ---------------------------------------------------------------------- */ + /* Scatter the column into X. */ + /* ---------------------------------------------------------------------- */ + + /* no scaling */ + aik = Ax [p] ; + /* (i,k) is an entry in the block. scatter into X */ X [i] = aik ; } @@ -267,15 +248,20 @@ static void construct_column } else { - /* row scaling */ for (p = Ap [oldcol] ; p < pend ; p++) { oldrow = Ai [p] ; i = PSinv [oldrow] - k1 ; - aik = Ax [p] ; - SCALE_DIV (aik, Rs [oldrow]) ; if (i < 0) { + /* ---------------------------------------------------------------------- */ + /* Scale and scatter the column into X. */ + /* ---------------------------------------------------------------------- */ + + /* row scaling */ + aik = Ax [p] ; + SCALE_DIV (aik, Rs [oldrow]) ; + /* this is an entry in the off-diagonal part */ Offi [poff] = oldrow ; Offx [poff] = aik ; @@ -283,6 +269,32 @@ static void construct_column } else { + /* (i,k) is an entry in the block. start a DFS at node i */ + PRINTF (("\n ===== DFS at node %d in b, inew: %d\n", i, Pinv [i])) ; + if (Flag [i] != k) + { + if (Pinv [i] >= 0) + { + top = dfs (i, k, Pinv, Llen, Lip, Stack, Flag, + Lpend, top, LU, Lik, &l_length, Ap_pos) ; + } + else + { + /* i is not pivotal, and not flagged. Flag and put in L */ + Flag [i] = k ; + Lik [l_length] = i ; + l_length++; + } + } + + /* ---------------------------------------------------------------------- */ + /* Scale and scatter the column into X. */ + /* ---------------------------------------------------------------------- */ + + /* row scaling */ + aik = Ax [p] ; + SCALE_DIV (aik, Rs [oldrow]) ; + /* (i,k) is an entry in the block. scatter into X */ X [i] = aik ; } @@ -290,6 +302,10 @@ static void construct_column } Offp [kglobal+1] = poff ; /* start of the next col of off-diag part */ + + /* If Llen [k] is zero, the matrix is structurally singular */ + Llen [k] = l_length ; + return (top) ; } @@ -802,9 +818,15 @@ size_t KLU_kernel /* final size of LU on output */ } #endif - top = lsolve_symbolic (n, k, Ap, Ai, Q, Pinv, Stack, Flag, - Lpend, Ap_pos, LU, lup, Llen, Lip, k1, PSinv) ; + /* Francesco - Compressed 'lsolve_symbolic' and 'cosntruct_column' in one routine */ + top = lsolve_symbolic_and_construct_column (n, k, Ap, Ai, Q, Pinv, Stack, Flag, Lpend, + Ap_pos, LU, lup, Llen, Lip, k1, PSinv, Ax, X, Rs, scale, Offp, Offi, Offx) ; + /* ------------------------------------------------------------------ */ + /* get the column of the matrix to factorize and scatter into X */ + /* ------------------------------------------------------------------ */ + +/* #ifndef NDEBUG PRINTF (("--- in U:\n")) ; for (p = top ; p < n ; p++) @@ -828,13 +850,7 @@ size_t KLU_kernel /* final size of LU on output */ if (Flag [i] == k) p++ ; } #endif - - /* ------------------------------------------------------------------ */ - /* get the column of the matrix to factorize and scatter into X */ - /* ------------------------------------------------------------------ */ - - construct_column (k, Ap, Ai, Ax, Q, X, - k1, PSinv, Rs, scale, Offp, Offi, Offx) ; +*/ /* ------------------------------------------------------------------ */ /* compute the numerical values of the kth column (s = L \ A (:,k)) */