incorporate KLU source files from SuiteSparse-3.7.0.tar.gz

This commit is contained in:
Francesco Lannutti 2014-04-21 14:11:28 +02:00 committed by rlar
parent f65715242d
commit 691f4b14f5
44 changed files with 17674 additions and 0 deletions

151
src/maths/KLU/UFconfig.h Normal file
View File

@ -0,0 +1,151 @@
/* ========================================================================== */
/* === UFconfig.h =========================================================== */
/* ========================================================================== */
/* Configuration file for SuiteSparse: a Suite of Sparse matrix packages
* (AMD, COLAMD, CCOLAMD, CAMD, CHOLMOD, UMFPACK, CXSparse, and others).
*
* UFconfig.h provides the definition of the long integer. On most systems,
* a C program can be compiled in LP64 mode, in which long's and pointers are
* both 64-bits, and int's are 32-bits. Windows 64, however, uses the LLP64
* model, in which int's and long's are 32-bits, and long long's and pointers
* are 64-bits.
*
* SuiteSparse packages that include long integer versions are
* intended for the LP64 mode. However, as a workaround for Windows 64
* (and perhaps other systems), the long integer can be redefined.
*
* If _WIN64 is defined, then the __int64 type is used instead of long.
*
* The long integer can also be defined at compile time. For example, this
* could be added to UFconfig.mk:
*
* CFLAGS = -O -D'UF_long=long long' -D'UF_long_max=9223372036854775801' \
* -D'UF_long_idd="lld"'
*
* This file defines UF_long as either long (on all but _WIN64) or
* __int64 on Windows 64. The intent is that a UF_long is always a 64-bit
* integer in a 64-bit code. ptrdiff_t might be a better choice than long;
* it is always the same size as a pointer.
*
* This file also defines the SUITESPARSE_VERSION and related definitions.
*
* Copyright (c) 2007, University of Florida. No licensing restrictions
* apply to this file or to the UFconfig directory. Author: Timothy A. Davis.
*/
#ifndef _UFCONFIG_H
#define _UFCONFIG_H
#ifdef __cplusplus
extern "C" {
#endif
#include <limits.h>
#include <stdlib.h>
/* ========================================================================== */
/* === UF_long ============================================================== */
/* ========================================================================== */
#ifndef UF_long
#ifdef _WIN64
#define UF_long __int64
#define UF_long_max _I64_MAX
#define UF_long_idd "I64d"
#else
#define UF_long long
#define UF_long_max LONG_MAX
#define UF_long_idd "ld"
#endif
#define UF_long_id "%" UF_long_idd
#endif
/* ========================================================================== */
/* === UFconfig parameters and functions ==================================== */
/* ========================================================================== */
/* SuiteSparse-wide parameters will be placed in this struct. So far, they
are only used by RBio. */
typedef struct UFconfig_struct
{
void *(*malloc_memory) (size_t) ; /* pointer to malloc */
void *(*realloc_memory) (void *, size_t) ; /* pointer to realloc */
void (*free_memory) (void *) ; /* pointer to free */
void *(*calloc_memory) (size_t, size_t) ; /* pointer to calloc */
} UFconfig ;
void *UFmalloc /* pointer to allocated block of memory */
(
size_t nitems, /* number of items to malloc (>=1 is enforced) */
size_t size_of_item, /* sizeof each item */
int *ok, /* TRUE if successful, FALSE otherwise */
UFconfig *config /* SuiteSparse-wide configuration */
) ;
void *UFfree /* always returns NULL */
(
void *p, /* block to free */
UFconfig *config /* SuiteSparse-wide configuration */
) ;
/* ========================================================================== */
/* === SuiteSparse version ================================================== */
/* ========================================================================== */
/* SuiteSparse is not a package itself, but a collection of packages, some of
* which must be used together (UMFPACK requires AMD, CHOLMOD requires AMD,
* COLAMD, CAMD, and CCOLAMD, etc). A version number is provided here for the
* collection itself. The versions of packages within each version of
* SuiteSparse are meant to work together. Combining one packge from one
* version of SuiteSparse, with another package from another version of
* SuiteSparse, may or may not work.
*
* SuiteSparse Version 3.7.0 contains the following packages:
*
* UFconfig version 3.7.0 (version always the same as SuiteSparse)
* AMD version 2.2.3
* CAMD version 2.2.3
* CCOLAMD version 2.7.4
* COLAMD version 2.7.4
* BTF version 1.1.3
* CHOLMOD version 1.7.4
* CSparse3 version 3.0.2
* CSparse version 2.2.6
* CXSparse version 2.2.6
* KLU version 1.1.3
* LDL version 2.0.4
* RBio version 2.0.2
* SPQR version 1.2.3 (also called SuiteSparseQR)
* UFcollection version 1.6.0
* UMFPACK version 5.5.2
* SSMULT version 2.0.3
* spqr_rank version 1.0.0
* MATLAB_Tools various packages & M-files. No specific version number.
*
* Other package dependencies:
* BLAS required by CHOLMOD and UMFPACK
* LAPACK required by CHOLMOD
* METIS 4.0.1 required by CHOLMOD (optional) and KLU (optional)
*/
#define SUITESPARSE_DATE "Dec 15, 2011"
#define SUITESPARSE_VER_CODE(main,sub) ((main) * 1000 + (sub))
#define SUITESPARSE_MAIN_VERSION 3
#define SUITESPARSE_SUB_VERSION 7
#define SUITESPARSE_SUBSUB_VERSION 0
#define SUITESPARSE_VERSION \
SUITESPARSE_VER_CODE(SUITESPARSE_MAIN_VERSION,SUITESPARSE_SUB_VERSION)
#ifdef __cplusplus
}
#endif
#endif

412
src/maths/KLU/amd.h Normal file
View File

@ -0,0 +1,412 @@
/* ========================================================================= */
/* === AMD: approximate minimum degree ordering =========================== */
/* ========================================================================= */
/* ------------------------------------------------------------------------- */
/* AMD Version 2.2, Copyright (c) 2007 by Timothy A. Davis, */
/* Patrick R. Amestoy, and Iain S. Duff. See ../README.txt for License. */
/* email: davis at cise.ufl.edu CISE Department, Univ. of Florida. */
/* web: http://www.cise.ufl.edu/research/sparse/amd */
/* ------------------------------------------------------------------------- */
/* AMD finds a symmetric ordering P of a matrix A so that the Cholesky
* factorization of P*A*P' has fewer nonzeros and takes less work than the
* Cholesky factorization of A. If A is not symmetric, then it performs its
* ordering on the matrix A+A'. Two sets of user-callable routines are
* provided, one for int integers and the other for UF_long integers.
*
* The method is based on the approximate minimum degree algorithm, discussed
* in Amestoy, Davis, and Duff, "An approximate degree ordering algorithm",
* SIAM Journal of Matrix Analysis and Applications, vol. 17, no. 4, pp.
* 886-905, 1996. This package can perform both the AMD ordering (with
* aggressive absorption), and the AMDBAR ordering (without aggressive
* absorption) discussed in the above paper. This package differs from the
* Fortran codes discussed in the paper:
*
* (1) it can ignore "dense" rows and columns, leading to faster run times
* (2) it computes the ordering of A+A' if A is not symmetric
* (3) it is followed by a depth-first post-ordering of the assembly tree
* (or supernodal elimination tree)
*
* For historical reasons, the Fortran versions, amd.f and amdbar.f, have
* been left (nearly) unchanged. They compute the identical ordering as
* described in the above paper.
*/
#ifndef AMD_H
#define AMD_H
/* make it easy for C++ programs to include AMD */
#ifdef __cplusplus
extern "C" {
#endif
/* get the definition of size_t: */
#include <stddef.h>
/* define UF_long */
#include "UFconfig.h"
int amd_order /* returns AMD_OK, AMD_OK_BUT_JUMBLED,
* AMD_INVALID, or AMD_OUT_OF_MEMORY */
(
int n, /* A is n-by-n. n must be >= 0. */
const int Ap [ ], /* column pointers for A, of size n+1 */
const int Ai [ ], /* row indices of A, of size nz = Ap [n] */
int P [ ], /* output permutation, of size n */
double Control [ ], /* input Control settings, of size AMD_CONTROL */
double Info [ ] /* output Info statistics, of size AMD_INFO */
) ;
UF_long amd_l_order /* see above for description of arguments */
(
UF_long n,
const UF_long Ap [ ],
const UF_long Ai [ ],
UF_long P [ ],
double Control [ ],
double Info [ ]
) ;
/* Input arguments (not modified):
*
* n: the matrix A is n-by-n.
* Ap: an int/UF_long array of size n+1, containing column pointers of A.
* Ai: an int/UF_long array of size nz, containing the row indices of A,
* where nz = Ap [n].
* Control: a double array of size AMD_CONTROL, containing control
* parameters. Defaults are used if Control is NULL.
*
* Output arguments (not defined on input):
*
* P: an int/UF_long array of size n, containing the output permutation. If
* row i is the kth pivot row, then P [k] = i. In MATLAB notation,
* the reordered matrix is A (P,P).
* Info: a double array of size AMD_INFO, containing statistical
* information. Ignored if Info is NULL.
*
* On input, the matrix A is stored in column-oriented form. The row indices
* of nonzero entries in column j are stored in Ai [Ap [j] ... Ap [j+1]-1].
*
* If the row indices appear in ascending order in each column, and there
* are no duplicate entries, then amd_order is slightly more efficient in
* terms of time and memory usage. If this condition does not hold, a copy
* of the matrix is created (where these conditions do hold), and the copy is
* ordered. This feature is new to v2.0 (v1.2 and earlier required this
* condition to hold for the input matrix).
*
* Row indices must be in the range 0 to
* n-1. Ap [0] must be zero, and thus nz = Ap [n] is the number of nonzeros
* in A. The array Ap is of size n+1, and the array Ai is of size nz = Ap [n].
* The matrix does not need to be symmetric, and the diagonal does not need to
* be present (if diagonal entries are present, they are ignored except for
* the output statistic Info [AMD_NZDIAG]). The arrays Ai and Ap are not
* modified. This form of the Ap and Ai arrays to represent the nonzero
* pattern of the matrix A is the same as that used internally by MATLAB.
* If you wish to use a more flexible input structure, please see the
* umfpack_*_triplet_to_col routines in the UMFPACK package, at
* http://www.cise.ufl.edu/research/sparse/umfpack.
*
* Restrictions: n >= 0. Ap [0] = 0. Ap [j] <= Ap [j+1] for all j in the
* range 0 to n-1. nz = Ap [n] >= 0. Ai [0..nz-1] must be in the range 0
* to n-1. Finally, Ai, Ap, and P must not be NULL. If any of these
* restrictions are not met, AMD returns AMD_INVALID.
*
* AMD returns:
*
* AMD_OK if the matrix is valid and sufficient memory can be allocated to
* perform the ordering.
*
* AMD_OUT_OF_MEMORY if not enough memory can be allocated.
*
* AMD_INVALID if the input arguments n, Ap, Ai are invalid, or if P is
* NULL.
*
* AMD_OK_BUT_JUMBLED if the matrix had unsorted columns, and/or duplicate
* entries, but was otherwise valid.
*
* The AMD routine first forms the pattern of the matrix A+A', and then
* computes a fill-reducing ordering, P. If P [k] = i, then row/column i of
* the original is the kth pivotal row. In MATLAB notation, the permuted
* matrix is A (P,P), except that 0-based indexing is used instead of the
* 1-based indexing in MATLAB.
*
* The Control array is used to set various parameters for AMD. If a NULL
* pointer is passed, default values are used. The Control array is not
* modified.
*
* Control [AMD_DENSE]: controls the threshold for "dense" rows/columns.
* A dense row/column in A+A' can cause AMD to spend a lot of time in
* ordering the matrix. If Control [AMD_DENSE] >= 0, rows/columns
* with more than Control [AMD_DENSE] * sqrt (n) entries are ignored
* during the ordering, and placed last in the output order. The
* default value of Control [AMD_DENSE] is 10. If negative, no
* rows/columns are treated as "dense". Rows/columns with 16 or
* fewer off-diagonal entries are never considered "dense".
*
* Control [AMD_AGGRESSIVE]: controls whether or not to use aggressive
* absorption, in which a prior element is absorbed into the current
* element if is a subset of the current element, even if it is not
* adjacent to the current pivot element (refer to Amestoy, Davis,
* & Duff, 1996, for more details). The default value is nonzero,
* which means to perform aggressive absorption. This nearly always
* leads to a better ordering (because the approximate degrees are
* more accurate) and a lower execution time. There are cases where
* it can lead to a slightly worse ordering, however. To turn it off,
* set Control [AMD_AGGRESSIVE] to 0.
*
* Control [2..4] are not used in the current version, but may be used in
* future versions.
*
* The Info array provides statistics about the ordering on output. If it is
* not present, the statistics are not returned. This is not an error
* condition.
*
* Info [AMD_STATUS]: the return value of AMD, either AMD_OK,
* AMD_OK_BUT_JUMBLED, AMD_OUT_OF_MEMORY, or AMD_INVALID.
*
* Info [AMD_N]: n, the size of the input matrix
*
* Info [AMD_NZ]: the number of nonzeros in A, nz = Ap [n]
*
* Info [AMD_SYMMETRY]: the symmetry of the matrix A. It is the number
* of "matched" off-diagonal entries divided by the total number of
* off-diagonal entries. An entry A(i,j) is matched if A(j,i) is also
* an entry, for any pair (i,j) for which i != j. In MATLAB notation,
* S = spones (A) ;
* B = tril (S, -1) + triu (S, 1) ;
* symmetry = nnz (B & B') / nnz (B) ;
*
* Info [AMD_NZDIAG]: the number of entries on the diagonal of A.
*
* Info [AMD_NZ_A_PLUS_AT]: the number of nonzeros in A+A', excluding the
* diagonal. If A is perfectly symmetric (Info [AMD_SYMMETRY] = 1)
* with a fully nonzero diagonal, then Info [AMD_NZ_A_PLUS_AT] = nz-n
* (the smallest possible value). If A is perfectly unsymmetric
* (Info [AMD_SYMMETRY] = 0, for an upper triangular matrix, for
* example) with no diagonal, then Info [AMD_NZ_A_PLUS_AT] = 2*nz
* (the largest possible value).
*
* Info [AMD_NDENSE]: the number of "dense" rows/columns of A+A' that were
* removed from A prior to ordering. These are placed last in the
* output order P.
*
* Info [AMD_MEMORY]: the amount of memory used by AMD, in bytes. In the
* current version, this is 1.2 * Info [AMD_NZ_A_PLUS_AT] + 9*n
* times the size of an integer. This is at most 2.4nz + 9n. This
* excludes the size of the input arguments Ai, Ap, and P, which have
* a total size of nz + 2*n + 1 integers.
*
* Info [AMD_NCMPA]: the number of garbage collections performed.
*
* Info [AMD_LNZ]: the number of nonzeros in L (excluding the diagonal).
* This is a slight upper bound because mass elimination is combined
* with the approximate degree update. It is a rough upper bound if
* there are many "dense" rows/columns. The rest of the statistics,
* below, are also slight or rough upper bounds, for the same reasons.
* The post-ordering of the assembly tree might also not exactly
* correspond to a true elimination tree postordering.
*
* Info [AMD_NDIV]: the number of divide operations for a subsequent LDL'
* or LU factorization of the permuted matrix A (P,P).
*
* Info [AMD_NMULTSUBS_LDL]: the number of multiply-subtract pairs for a
* subsequent LDL' factorization of A (P,P).
*
* Info [AMD_NMULTSUBS_LU]: the number of multiply-subtract pairs for a
* subsequent LU factorization of A (P,P), assuming that no numerical
* pivoting is required.
*
* Info [AMD_DMAX]: the maximum number of nonzeros in any column of L,
* including the diagonal.
*
* Info [14..19] are not used in the current version, but may be used in
* future versions.
*/
/* ------------------------------------------------------------------------- */
/* direct interface to AMD */
/* ------------------------------------------------------------------------- */
/* amd_2 is the primary AMD ordering routine. It is not meant to be
* user-callable because of its restrictive inputs and because it destroys
* the user's input matrix. It does not check its inputs for errors, either.
* However, if you can work with these restrictions it can be faster than
* amd_order and use less memory (assuming that you can create your own copy
* of the matrix for AMD to destroy). Refer to AMD/Source/amd_2.c for a
* description of each parameter. */
void amd_2
(
int n,
int Pe [ ],
int Iw [ ],
int Len [ ],
int iwlen,
int pfree,
int Nv [ ],
int Next [ ],
int Last [ ],
int Head [ ],
int Elen [ ],
int Degree [ ],
int W [ ],
double Control [ ],
double Info [ ]
) ;
void amd_l2
(
UF_long n,
UF_long Pe [ ],
UF_long Iw [ ],
UF_long Len [ ],
UF_long iwlen,
UF_long pfree,
UF_long Nv [ ],
UF_long Next [ ],
UF_long Last [ ],
UF_long Head [ ],
UF_long Elen [ ],
UF_long Degree [ ],
UF_long W [ ],
double Control [ ],
double Info [ ]
) ;
/* ------------------------------------------------------------------------- */
/* amd_valid */
/* ------------------------------------------------------------------------- */
/* Returns AMD_OK or AMD_OK_BUT_JUMBLED if the matrix is valid as input to
* amd_order; the latter is returned if the matrix has unsorted and/or
* duplicate row indices in one or more columns. Returns AMD_INVALID if the
* matrix cannot be passed to amd_order. For amd_order, the matrix must also
* be square. The first two arguments are the number of rows and the number
* of columns of the matrix. For its use in AMD, these must both equal n.
*
* NOTE: this routine returned TRUE/FALSE in v1.2 and earlier.
*/
int amd_valid
(
int n_row, /* # of rows */
int n_col, /* # of columns */
const int Ap [ ], /* column pointers, of size n_col+1 */
const int Ai [ ] /* row indices, of size Ap [n_col] */
) ;
UF_long amd_l_valid
(
UF_long n_row,
UF_long n_col,
const UF_long Ap [ ],
const UF_long Ai [ ]
) ;
/* ------------------------------------------------------------------------- */
/* AMD memory manager and printf routines */
/* ------------------------------------------------------------------------- */
/* The user can redefine these to change the malloc, free, and printf routines
* that AMD uses. */
#ifndef EXTERN
#define EXTERN extern
#endif
EXTERN void *(*amd_malloc) (size_t) ; /* pointer to malloc */
EXTERN void (*amd_free) (void *) ; /* pointer to free */
EXTERN void *(*amd_realloc) (void *, size_t) ; /* pointer to realloc */
EXTERN void *(*amd_calloc) (size_t, size_t) ; /* pointer to calloc */
EXTERN int (*amd_printf) (const char *, ...) ; /* pointer to printf */
/* ------------------------------------------------------------------------- */
/* AMD Control and Info arrays */
/* ------------------------------------------------------------------------- */
/* amd_defaults: sets the default control settings */
void amd_defaults (double Control [ ]) ;
void amd_l_defaults (double Control [ ]) ;
/* amd_control: prints the control settings */
void amd_control (double Control [ ]) ;
void amd_l_control (double Control [ ]) ;
/* amd_info: prints the statistics */
void amd_info (double Info [ ]) ;
void amd_l_info (double Info [ ]) ;
#define AMD_CONTROL 5 /* size of Control array */
#define AMD_INFO 20 /* size of Info array */
/* contents of Control */
#define AMD_DENSE 0 /* "dense" if degree > Control [0] * sqrt (n) */
#define AMD_AGGRESSIVE 1 /* do aggressive absorption if Control [1] != 0 */
/* default Control settings */
#define AMD_DEFAULT_DENSE 10.0 /* default "dense" degree 10*sqrt(n) */
#define AMD_DEFAULT_AGGRESSIVE 1 /* do aggressive absorption by default */
/* contents of Info */
#define AMD_STATUS 0 /* return value of amd_order and amd_l_order */
#define AMD_N 1 /* A is n-by-n */
#define AMD_NZ 2 /* number of nonzeros in A */
#define AMD_SYMMETRY 3 /* symmetry of pattern (1 is sym., 0 is unsym.) */
#define AMD_NZDIAG 4 /* # of entries on diagonal */
#define AMD_NZ_A_PLUS_AT 5 /* nz in A+A' */
#define AMD_NDENSE 6 /* number of "dense" rows/columns in A */
#define AMD_MEMORY 7 /* amount of memory used by AMD */
#define AMD_NCMPA 8 /* number of garbage collections in AMD */
#define AMD_LNZ 9 /* approx. nz in L, excluding the diagonal */
#define AMD_NDIV 10 /* number of fl. point divides for LU and LDL' */
#define AMD_NMULTSUBS_LDL 11 /* number of fl. point (*,-) pairs for LDL' */
#define AMD_NMULTSUBS_LU 12 /* number of fl. point (*,-) pairs for LU */
#define AMD_DMAX 13 /* max nz. in any column of L, incl. diagonal */
/* ------------------------------------------------------------------------- */
/* return values of AMD */
/* ------------------------------------------------------------------------- */
#define AMD_OK 0 /* success */
#define AMD_OUT_OF_MEMORY -1 /* malloc failed, or problem too large */
#define AMD_INVALID -2 /* input arguments are not valid */
#define AMD_OK_BUT_JUMBLED 1 /* input matrix is OK for amd_order, but
* columns were not sorted, and/or duplicate entries were present. AMD had
* to do extra work before ordering the matrix. This is a warning, not an
* error. */
/* ========================================================================== */
/* === AMD version ========================================================== */
/* ========================================================================== */
/* AMD Version 1.2 and later include the following definitions.
* As an example, to test if the version you are using is 1.2 or later:
*
* #ifdef AMD_VERSION
* if (AMD_VERSION >= AMD_VERSION_CODE (1,2)) ...
* #endif
*
* This also works during compile-time:
*
* #if defined(AMD_VERSION) && (AMD_VERSION >= AMD_VERSION_CODE (1,2))
* printf ("This is version 1.2 or later\n") ;
* #else
* printf ("This is an early version\n") ;
* #endif
*
* Versions 1.1 and earlier of AMD do not include a #define'd version number.
*/
#define AMD_DATE "Dec 7, 2011"
#define AMD_VERSION_CODE(main,sub) ((main) * 1000 + (sub))
#define AMD_MAIN_VERSION 2
#define AMD_SUB_VERSION 2
#define AMD_SUBSUB_VERSION 3
#define AMD_VERSION AMD_VERSION_CODE(AMD_MAIN_VERSION,AMD_SUB_VERSION)
#ifdef __cplusplus
}
#endif
#endif

181
src/maths/KLU/amd_1.c Normal file
View File

@ -0,0 +1,181 @@
/* ========================================================================= */
/* === AMD_1 =============================================================== */
/* ========================================================================= */
/* ------------------------------------------------------------------------- */
/* AMD, Copyright (c) Timothy A. Davis, */
/* Patrick R. Amestoy, and Iain S. Duff. See ../README.txt for License. */
/* email: davis at cise.ufl.edu CISE Department, Univ. of Florida. */
/* web: http://www.cise.ufl.edu/research/sparse/amd */
/* ------------------------------------------------------------------------- */
/* AMD_1: Construct A+A' for a sparse matrix A and perform the AMD ordering.
*
* The n-by-n sparse matrix A can be unsymmetric. It is stored in MATLAB-style
* compressed-column form, with sorted row indices in each column, and no
* duplicate entries. Diagonal entries may be present, but they are ignored.
* Row indices of column j of A are stored in Ai [Ap [j] ... Ap [j+1]-1].
* Ap [0] must be zero, and nz = Ap [n] is the number of entries in A. The
* size of the matrix, n, must be greater than or equal to zero.
*
* This routine must be preceded by a call to AMD_aat, which computes the
* number of entries in each row/column in A+A', excluding the diagonal.
* Len [j], on input, is the number of entries in row/column j of A+A'. This
* routine constructs the matrix A+A' and then calls AMD_2. No error checking
* is performed (this was done in AMD_valid).
*/
#include "amd_internal.h"
GLOBAL void AMD_1
(
Int n, /* n > 0 */
const Int Ap [ ], /* input of size n+1, not modified */
const Int Ai [ ], /* input of size nz = Ap [n], not modified */
Int P [ ], /* size n output permutation */
Int Pinv [ ], /* size n output inverse permutation */
Int Len [ ], /* size n input, undefined on output */
Int slen, /* slen >= sum (Len [0..n-1]) + 7n,
* ideally slen = 1.2 * sum (Len) + 8n */
Int S [ ], /* size slen workspace */
double Control [ ], /* input array of size AMD_CONTROL */
double Info [ ] /* output array of size AMD_INFO */
)
{
Int i, j, k, p, pfree, iwlen, pj, p1, p2, pj2, *Iw, *Pe, *Nv, *Head,
*Elen, *Degree, *s, *W, *Sp, *Tp ;
/* --------------------------------------------------------------------- */
/* construct the matrix for AMD_2 */
/* --------------------------------------------------------------------- */
ASSERT (n > 0) ;
iwlen = slen - 6*n ;
s = S ;
Pe = s ; s += n ;
Nv = s ; s += n ;
Head = s ; s += n ;
Elen = s ; s += n ;
Degree = s ; s += n ;
W = s ; s += n ;
Iw = s ; s += iwlen ;
ASSERT (AMD_valid (n, n, Ap, Ai) == AMD_OK) ;
/* construct the pointers for A+A' */
Sp = Nv ; /* use Nv and W as workspace for Sp and Tp [ */
Tp = W ;
pfree = 0 ;
for (j = 0 ; j < n ; j++)
{
Pe [j] = pfree ;
Sp [j] = pfree ;
pfree += Len [j] ;
}
/* Note that this restriction on iwlen is slightly more restrictive than
* what is strictly required in AMD_2. AMD_2 can operate with no elbow
* room at all, but it will be very slow. For better performance, at
* least size-n elbow room is enforced. */
ASSERT (iwlen >= pfree + n) ;
#ifndef NDEBUG
for (p = 0 ; p < iwlen ; p++) Iw [p] = EMPTY ;
#endif
for (k = 0 ; k < n ; k++)
{
AMD_DEBUG1 (("Construct row/column k= "ID" of A+A'\n", k)) ;
p1 = Ap [k] ;
p2 = Ap [k+1] ;
/* construct A+A' */
for (p = p1 ; p < p2 ; )
{
/* scan the upper triangular part of A */
j = Ai [p] ;
ASSERT (j >= 0 && j < n) ;
if (j < k)
{
/* entry A (j,k) in the strictly upper triangular part */
ASSERT (Sp [j] < (j == n-1 ? pfree : Pe [j+1])) ;
ASSERT (Sp [k] < (k == n-1 ? pfree : Pe [k+1])) ;
Iw [Sp [j]++] = k ;
Iw [Sp [k]++] = j ;
p++ ;
}
else if (j == k)
{
/* skip the diagonal */
p++ ;
break ;
}
else /* j > k */
{
/* first entry below the diagonal */
break ;
}
/* scan lower triangular part of A, in column j until reaching
* row k. Start where last scan left off. */
ASSERT (Ap [j] <= Tp [j] && Tp [j] <= Ap [j+1]) ;
pj2 = Ap [j+1] ;
for (pj = Tp [j] ; pj < pj2 ; )
{
i = Ai [pj] ;
ASSERT (i >= 0 && i < n) ;
if (i < k)
{
/* A (i,j) is only in the lower part, not in upper */
ASSERT (Sp [i] < (i == n-1 ? pfree : Pe [i+1])) ;
ASSERT (Sp [j] < (j == n-1 ? pfree : Pe [j+1])) ;
Iw [Sp [i]++] = j ;
Iw [Sp [j]++] = i ;
pj++ ;
}
else if (i == k)
{
/* entry A (k,j) in lower part and A (j,k) in upper */
pj++ ;
break ;
}
else /* i > k */
{
/* consider this entry later, when k advances to i */
break ;
}
}
Tp [j] = pj ;
}
Tp [k] = p ;
}
/* clean up, for remaining mismatched entries */
for (j = 0 ; j < n ; j++)
{
for (pj = Tp [j] ; pj < Ap [j+1] ; pj++)
{
i = Ai [pj] ;
ASSERT (i >= 0 && i < n) ;
/* A (i,j) is only in the lower part, not in upper */
ASSERT (Sp [i] < (i == n-1 ? pfree : Pe [i+1])) ;
ASSERT (Sp [j] < (j == n-1 ? pfree : Pe [j+1])) ;
Iw [Sp [i]++] = j ;
Iw [Sp [j]++] = i ;
}
}
#ifndef NDEBUG
for (j = 0 ; j < n-1 ; j++) ASSERT (Sp [j] == Pe [j+1]) ;
ASSERT (Sp [n-1] == pfree) ;
#endif
/* Tp and Sp no longer needed ] */
/* --------------------------------------------------------------------- */
/* order the matrix */
/* --------------------------------------------------------------------- */
AMD_2 (n, Pe, Iw, Len, iwlen, pfree,
Nv, Pinv, P, Head, Elen, Degree, W, Control, Info) ;
}

1842
src/maths/KLU/amd_2.c Normal file

File diff suppressed because it is too large Load Diff

185
src/maths/KLU/amd_aat.c Normal file
View File

@ -0,0 +1,185 @@
/* ========================================================================= */
/* === AMD_aat ============================================================= */
/* ========================================================================= */
/* ------------------------------------------------------------------------- */
/* AMD, Copyright (c) Timothy A. Davis, */
/* Patrick R. Amestoy, and Iain S. Duff. See ../README.txt for License. */
/* email: davis at cise.ufl.edu CISE Department, Univ. of Florida. */
/* web: http://www.cise.ufl.edu/research/sparse/amd */
/* ------------------------------------------------------------------------- */
/* AMD_aat: compute the symmetry of the pattern of A, and count the number of
* nonzeros each column of A+A' (excluding the diagonal). Assumes the input
* matrix has no errors, with sorted columns and no duplicates
* (AMD_valid (n, n, Ap, Ai) must be AMD_OK, but this condition is not
* checked).
*/
#include "amd_internal.h"
GLOBAL size_t AMD_aat /* returns nz in A+A' */
(
Int n,
const Int Ap [ ],
const Int Ai [ ],
Int Len [ ], /* Len [j]: length of column j of A+A', excl diagonal*/
Int Tp [ ], /* workspace of size n */
double Info [ ]
)
{
Int p1, p2, p, i, j, pj, pj2, k, nzdiag, nzboth, nz ;
double sym ;
size_t nzaat ;
#ifndef NDEBUG
AMD_debug_init ("AMD AAT") ;
for (k = 0 ; k < n ; k++) Tp [k] = EMPTY ;
ASSERT (AMD_valid (n, n, Ap, Ai) == AMD_OK) ;
#endif
if (Info != (double *) NULL)
{
/* clear the Info array, if it exists */
for (i = 0 ; i < AMD_INFO ; i++)
{
Info [i] = EMPTY ;
}
Info [AMD_STATUS] = AMD_OK ;
}
for (k = 0 ; k < n ; k++)
{
Len [k] = 0 ;
}
nzdiag = 0 ;
nzboth = 0 ;
nz = Ap [n] ;
for (k = 0 ; k < n ; k++)
{
p1 = Ap [k] ;
p2 = Ap [k+1] ;
AMD_DEBUG2 (("\nAAT Column: "ID" p1: "ID" p2: "ID"\n", k, p1, p2)) ;
/* construct A+A' */
for (p = p1 ; p < p2 ; )
{
/* scan the upper triangular part of A */
j = Ai [p] ;
if (j < k)
{
/* entry A (j,k) is in the strictly upper triangular part,
* add both A (j,k) and A (k,j) to the matrix A+A' */
Len [j]++ ;
Len [k]++ ;
AMD_DEBUG3 ((" upper ("ID","ID") ("ID","ID")\n", j,k, k,j));
p++ ;
}
else if (j == k)
{
/* skip the diagonal */
p++ ;
nzdiag++ ;
break ;
}
else /* j > k */
{
/* first entry below the diagonal */
break ;
}
/* scan lower triangular part of A, in column j until reaching
* row k. Start where last scan left off. */
ASSERT (Tp [j] != EMPTY) ;
ASSERT (Ap [j] <= Tp [j] && Tp [j] <= Ap [j+1]) ;
pj2 = Ap [j+1] ;
for (pj = Tp [j] ; pj < pj2 ; )
{
i = Ai [pj] ;
if (i < k)
{
/* A (i,j) is only in the lower part, not in upper.
* add both A (i,j) and A (j,i) to the matrix A+A' */
Len [i]++ ;
Len [j]++ ;
AMD_DEBUG3 ((" lower ("ID","ID") ("ID","ID")\n",
i,j, j,i)) ;
pj++ ;
}
else if (i == k)
{
/* entry A (k,j) in lower part and A (j,k) in upper */
pj++ ;
nzboth++ ;
break ;
}
else /* i > k */
{
/* consider this entry later, when k advances to i */
break ;
}
}
Tp [j] = pj ;
}
/* Tp [k] points to the entry just below the diagonal in column k */
Tp [k] = p ;
}
/* clean up, for remaining mismatched entries */
for (j = 0 ; j < n ; j++)
{
for (pj = Tp [j] ; pj < Ap [j+1] ; pj++)
{
i = Ai [pj] ;
/* A (i,j) is only in the lower part, not in upper.
* add both A (i,j) and A (j,i) to the matrix A+A' */
Len [i]++ ;
Len [j]++ ;
AMD_DEBUG3 ((" lower cleanup ("ID","ID") ("ID","ID")\n",
i,j, j,i)) ;
}
}
/* --------------------------------------------------------------------- */
/* compute the symmetry of the nonzero pattern of A */
/* --------------------------------------------------------------------- */
/* Given a matrix A, the symmetry of A is:
* B = tril (spones (A), -1) + triu (spones (A), 1) ;
* sym = nnz (B & B') / nnz (B) ;
* or 1 if nnz (B) is zero.
*/
if (nz == nzdiag)
{
sym = 1 ;
}
else
{
sym = (2 * (double) nzboth) / ((double) (nz - nzdiag)) ;
}
nzaat = 0 ;
for (k = 0 ; k < n ; k++)
{
nzaat += Len [k] ;
}
AMD_DEBUG1 (("AMD nz in A+A', excluding diagonal (nzaat) = %g\n",
(double) nzaat)) ;
AMD_DEBUG1 ((" nzboth: "ID" nz: "ID" nzdiag: "ID" symmetry: %g\n",
nzboth, nz, nzdiag, sym)) ;
if (Info != (double *) NULL)
{
Info [AMD_STATUS] = AMD_OK ;
Info [AMD_N] = n ;
Info [AMD_NZ] = nz ;
Info [AMD_SYMMETRY] = sym ; /* symmetry of pattern of A */
Info [AMD_NZDIAG] = nzdiag ; /* nonzeros on diagonal of A */
Info [AMD_NZ_A_PLUS_AT] = nzaat ; /* nonzeros in A+A' */
}
return (nzaat) ;
}

View File

@ -0,0 +1,64 @@
/* ========================================================================= */
/* === AMD_control ========================================================= */
/* ========================================================================= */
/* ------------------------------------------------------------------------- */
/* AMD, Copyright (c) Timothy A. Davis, */
/* Patrick R. Amestoy, and Iain S. Duff. See ../README.txt for License. */
/* email: davis at cise.ufl.edu CISE Department, Univ. of Florida. */
/* web: http://www.cise.ufl.edu/research/sparse/amd */
/* ------------------------------------------------------------------------- */
/* User-callable. Prints the control parameters for AMD. See amd.h
* for details. If the Control array is not present, the defaults are
* printed instead.
*/
#include "amd_internal.h"
GLOBAL void AMD_control
(
double Control [ ]
)
{
double alpha ;
Int aggressive ;
if (Control != (double *) NULL)
{
alpha = Control [AMD_DENSE] ;
aggressive = Control [AMD_AGGRESSIVE] != 0 ;
}
else
{
alpha = AMD_DEFAULT_DENSE ;
aggressive = AMD_DEFAULT_AGGRESSIVE ;
}
PRINTF (("\nAMD version %d.%d.%d, %s: approximate minimum degree ordering\n"
" dense row parameter: %g\n", AMD_MAIN_VERSION, AMD_SUB_VERSION,
AMD_SUBSUB_VERSION, AMD_DATE, alpha)) ;
if (alpha < 0)
{
PRINTF ((" no rows treated as dense\n")) ;
}
else
{
PRINTF ((
" (rows with more than max (%g * sqrt (n), 16) entries are\n"
" considered \"dense\", and placed last in output permutation)\n",
alpha)) ;
}
if (aggressive)
{
PRINTF ((" aggressive absorption: yes\n")) ;
}
else
{
PRINTF ((" aggressive absorption: no\n")) ;
}
PRINTF ((" size of AMD integer: %d\n\n", sizeof (Int))) ;
}

View File

@ -0,0 +1,38 @@
/* ========================================================================= */
/* === AMD_defaults ======================================================== */
/* ========================================================================= */
/* ------------------------------------------------------------------------- */
/* AMD, Copyright (c) Timothy A. Davis, */
/* Patrick R. Amestoy, and Iain S. Duff. See ../README.txt for License. */
/* email: davis at cise.ufl.edu CISE Department, Univ. of Florida. */
/* web: http://www.cise.ufl.edu/research/sparse/amd */
/* ------------------------------------------------------------------------- */
/* User-callable. Sets default control parameters for AMD. See amd.h
* for details.
*/
#include "amd_internal.h"
/* ========================================================================= */
/* === AMD defaults ======================================================== */
/* ========================================================================= */
GLOBAL void AMD_defaults
(
double Control [ ]
)
{
Int i ;
if (Control != (double *) NULL)
{
for (i = 0 ; i < AMD_CONTROL ; i++)
{
Control [i] = 0 ;
}
Control [AMD_DENSE] = AMD_DEFAULT_DENSE ;
Control [AMD_AGGRESSIVE] = AMD_DEFAULT_AGGRESSIVE ;
}
}

180
src/maths/KLU/amd_dump.c Normal file
View File

@ -0,0 +1,180 @@
/* ========================================================================= */
/* === AMD_dump ============================================================ */
/* ========================================================================= */
/* ------------------------------------------------------------------------- */
/* AMD, Copyright (c) Timothy A. Davis, */
/* Patrick R. Amestoy, and Iain S. Duff. See ../README.txt for License. */
/* email: davis at cise.ufl.edu CISE Department, Univ. of Florida. */
/* web: http://www.cise.ufl.edu/research/sparse/amd */
/* ------------------------------------------------------------------------- */
/* Debugging routines for AMD. Not used if NDEBUG is not defined at compile-
* time (the default). See comments in amd_internal.h on how to enable
* debugging. Not user-callable.
*/
#include "amd_internal.h"
#ifndef NDEBUG
/* This global variable is present only when debugging */
GLOBAL Int AMD_debug = -999 ; /* default is no debug printing */
/* ========================================================================= */
/* === AMD_debug_init ====================================================== */
/* ========================================================================= */
/* Sets the debug print level, by reading the file debug.amd (if it exists) */
GLOBAL void AMD_debug_init ( char *s )
{
FILE *f ;
f = fopen ("debug.amd", "r") ;
if (f == (FILE *) NULL)
{
AMD_debug = -999 ;
}
else
{
fscanf (f, ID, &AMD_debug) ;
fclose (f) ;
}
if (AMD_debug >= 0)
{
printf ("%s: AMD_debug_init, D= "ID"\n", s, AMD_debug) ;
}
}
/* ========================================================================= */
/* === AMD_dump ============================================================ */
/* ========================================================================= */
/* Dump AMD's data structure, except for the hash buckets. This routine
* cannot be called when the hash buckets are non-empty.
*/
GLOBAL void AMD_dump (
Int n, /* A is n-by-n */
Int Pe [ ], /* pe [0..n-1]: index in iw of start of row i */
Int Iw [ ], /* workspace of size iwlen, iwlen [0..pfree-1]
* holds the matrix on input */
Int Len [ ], /* len [0..n-1]: length for row i */
Int iwlen, /* length of iw */
Int pfree, /* iw [pfree ... iwlen-1] is empty on input */
Int Nv [ ], /* nv [0..n-1] */
Int Next [ ], /* next [0..n-1] */
Int Last [ ], /* last [0..n-1] */
Int Head [ ], /* head [0..n-1] */
Int Elen [ ], /* size n */
Int Degree [ ], /* size n */
Int W [ ], /* size n */
Int nel
)
{
Int i, pe, elen, nv, len, e, p, k, j, deg, w, cnt, ilast ;
if (AMD_debug < 0) return ;
ASSERT (pfree <= iwlen) ;
AMD_DEBUG3 (("\nAMD dump, pfree: "ID"\n", pfree)) ;
for (i = 0 ; i < n ; i++)
{
pe = Pe [i] ;
elen = Elen [i] ;
nv = Nv [i] ;
len = Len [i] ;
w = W [i] ;
if (elen >= EMPTY)
{
if (nv == 0)
{
AMD_DEBUG3 (("\nI "ID": nonprincipal: ", i)) ;
ASSERT (elen == EMPTY) ;
if (pe == EMPTY)
{
AMD_DEBUG3 ((" dense node\n")) ;
ASSERT (w == 1) ;
}
else
{
ASSERT (pe < EMPTY) ;
AMD_DEBUG3 ((" i "ID" -> parent "ID"\n", i, FLIP (Pe[i])));
}
}
else
{
AMD_DEBUG3 (("\nI "ID": active principal supervariable:\n",i));
AMD_DEBUG3 ((" nv(i): "ID" Flag: %d\n", nv, (nv < 0))) ;
ASSERT (elen >= 0) ;
ASSERT (nv > 0 && pe >= 0) ;
p = pe ;
AMD_DEBUG3 ((" e/s: ")) ;
if (elen == 0) AMD_DEBUG3 ((" : ")) ;
ASSERT (pe + len <= pfree) ;
for (k = 0 ; k < len ; k++)
{
j = Iw [p] ;
AMD_DEBUG3 ((" "ID"", j)) ;
ASSERT (j >= 0 && j < n) ;
if (k == elen-1) AMD_DEBUG3 ((" : ")) ;
p++ ;
}
AMD_DEBUG3 (("\n")) ;
}
}
else
{
e = i ;
if (w == 0)
{
AMD_DEBUG3 (("\nE "ID": absorbed element: w "ID"\n", e, w)) ;
ASSERT (nv > 0 && pe < 0) ;
AMD_DEBUG3 ((" e "ID" -> parent "ID"\n", e, FLIP (Pe [e]))) ;
}
else
{
AMD_DEBUG3 (("\nE "ID": unabsorbed element: w "ID"\n", e, w)) ;
ASSERT (nv > 0 && pe >= 0) ;
p = pe ;
AMD_DEBUG3 ((" : ")) ;
ASSERT (pe + len <= pfree) ;
for (k = 0 ; k < len ; k++)
{
j = Iw [p] ;
AMD_DEBUG3 ((" "ID"", j)) ;
ASSERT (j >= 0 && j < n) ;
p++ ;
}
AMD_DEBUG3 (("\n")) ;
}
}
}
/* this routine cannot be called when the hash buckets are non-empty */
AMD_DEBUG3 (("\nDegree lists:\n")) ;
if (nel >= 0)
{
cnt = 0 ;
for (deg = 0 ; deg < n ; deg++)
{
if (Head [deg] == EMPTY) continue ;
ilast = EMPTY ;
AMD_DEBUG3 ((ID": \n", deg)) ;
for (i = Head [deg] ; i != EMPTY ; i = Next [i])
{
AMD_DEBUG3 ((" "ID" : next "ID" last "ID" deg "ID"\n",
i, Next [i], Last [i], Degree [i])) ;
ASSERT (i >= 0 && i < n && ilast == Last [i] &&
deg == Degree [i]) ;
cnt += Nv [i] ;
ilast = i ;
}
AMD_DEBUG3 (("\n")) ;
}
ASSERT (cnt == n - nel) ;
}
}
#endif

View File

@ -0,0 +1,84 @@
/* ========================================================================= */
/* === amd_global ========================================================== */
/* ========================================================================= */
/* ------------------------------------------------------------------------- */
/* AMD, Copyright (c) Timothy A. Davis, */
/* Patrick R. Amestoy, and Iain S. Duff. See ../README.txt for License. */
/* email: davis at cise.ufl.edu CISE Department, Univ. of Florida. */
/* web: http://www.cise.ufl.edu/research/sparse/amd */
/* ------------------------------------------------------------------------- */
#include <stdlib.h>
#ifdef MATLAB_MEX_FILE
#include "mex.h"
#include "matrix.h"
#endif
#ifndef NULL
#define NULL 0
#endif
/* ========================================================================= */
/* === Default AMD memory manager ========================================== */
/* ========================================================================= */
/* The user can redefine these global pointers at run-time to change the memory
* manager used by AMD. AMD only uses malloc and free; realloc and calloc are
* include for completeness, in case another package wants to use the same
* memory manager as AMD.
*
* If compiling as a MATLAB mexFunction, the default memory manager is mxMalloc.
* You can also compile AMD as a standard ANSI-C library and link a mexFunction
* against it, and then redefine these pointers at run-time, in your
* mexFunction.
*
* If -DNMALLOC is defined at compile-time, no memory manager is specified at
* compile-time. You must then define these functions at run-time, before
* calling AMD, for AMD to work properly.
*/
#ifndef NMALLOC
#ifdef MATLAB_MEX_FILE
/* MATLAB mexFunction: */
void *(*amd_malloc) (size_t) = mxMalloc ;
void (*amd_free) (void *) = mxFree ;
void *(*amd_realloc) (void *, size_t) = mxRealloc ;
void *(*amd_calloc) (size_t, size_t) = mxCalloc ;
#else
/* standard ANSI-C: */
void *(*amd_malloc) (size_t) = malloc ;
void (*amd_free) (void *) = free ;
void *(*amd_realloc) (void *, size_t) = realloc ;
void *(*amd_calloc) (size_t, size_t) = calloc ;
#endif
#else
/* no memory manager defined at compile-time; you MUST define one at run-time */
void *(*amd_malloc) (size_t) = NULL ;
void (*amd_free) (void *) = NULL ;
void *(*amd_realloc) (void *, size_t) = NULL ;
void *(*amd_calloc) (size_t, size_t) = NULL ;
#endif
/* ========================================================================= */
/* === Default AMD printf routine ========================================== */
/* ========================================================================= */
/* The user can redefine this global pointer at run-time to change the printf
* routine used by AMD. If NULL, no printing occurs.
*
* If -DNPRINT is defined at compile-time, stdio.h is not included. Printing
* can then be enabled at run-time by setting amd_printf to a non-NULL function.
*/
#ifndef NPRINT
#ifdef MATLAB_MEX_FILE
int (*amd_printf) (const char *, ...) = mexPrintf ;
#else
#include <stdio.h>
int (*amd_printf) (const char *, ...) = printf ;
#endif
#else
int (*amd_printf) (const char *, ...) = NULL ;
#endif

120
src/maths/KLU/amd_info.c Normal file
View File

@ -0,0 +1,120 @@
/* ========================================================================= */
/* === AMD_info ============================================================ */
/* ========================================================================= */
/* ------------------------------------------------------------------------- */
/* AMD, Copyright (c) Timothy A. Davis, */
/* Patrick R. Amestoy, and Iain S. Duff. See ../README.txt for License. */
/* email: davis at cise.ufl.edu CISE Department, Univ. of Florida. */
/* web: http://www.cise.ufl.edu/research/sparse/amd */
/* ------------------------------------------------------------------------- */
/* User-callable. Prints the output statistics for AMD. See amd.h
* for details. If the Info array is not present, nothing is printed.
*/
#include "amd_internal.h"
#define PRI(format,x) { if (x >= 0) { PRINTF ((format, x)) ; }}
GLOBAL void AMD_info
(
double Info [ ]
)
{
double n, ndiv, nmultsubs_ldl, nmultsubs_lu, lnz, lnzd ;
PRINTF (("\nAMD version %d.%d.%d, %s, results:\n",
AMD_MAIN_VERSION, AMD_SUB_VERSION, AMD_SUBSUB_VERSION, AMD_DATE)) ;
if (!Info)
{
return ;
}
n = Info [AMD_N] ;
ndiv = Info [AMD_NDIV] ;
nmultsubs_ldl = Info [AMD_NMULTSUBS_LDL] ;
nmultsubs_lu = Info [AMD_NMULTSUBS_LU] ;
lnz = Info [AMD_LNZ] ;
lnzd = (n >= 0 && lnz >= 0) ? (n + lnz) : (-1) ;
/* AMD return status */
PRINTF ((" status: ")) ;
if (Info [AMD_STATUS] == AMD_OK)
{
PRINTF (("OK\n")) ;
}
else if (Info [AMD_STATUS] == AMD_OUT_OF_MEMORY)
{
PRINTF (("out of memory\n")) ;
}
else if (Info [AMD_STATUS] == AMD_INVALID)
{
PRINTF (("invalid matrix\n")) ;
}
else if (Info [AMD_STATUS] == AMD_OK_BUT_JUMBLED)
{
PRINTF (("OK, but jumbled\n")) ;
}
else
{
PRINTF (("unknown\n")) ;
}
/* statistics about the input matrix */
PRI (" n, dimension of A: %.20g\n", n);
PRI (" nz, number of nonzeros in A: %.20g\n",
Info [AMD_NZ]) ;
PRI (" symmetry of A: %.4f\n",
Info [AMD_SYMMETRY]) ;
PRI (" number of nonzeros on diagonal: %.20g\n",
Info [AMD_NZDIAG]) ;
PRI (" nonzeros in pattern of A+A' (excl. diagonal): %.20g\n",
Info [AMD_NZ_A_PLUS_AT]) ;
PRI (" # dense rows/columns of A+A': %.20g\n",
Info [AMD_NDENSE]) ;
/* statistics about AMD's behavior */
PRI (" memory used, in bytes: %.20g\n",
Info [AMD_MEMORY]) ;
PRI (" # of memory compactions: %.20g\n",
Info [AMD_NCMPA]) ;
/* statistics about the ordering quality */
PRINTF (("\n"
" The following approximate statistics are for a subsequent\n"
" factorization of A(P,P) + A(P,P)'. They are slight upper\n"
" bounds if there are no dense rows/columns in A+A', and become\n"
" looser if dense rows/columns exist.\n\n")) ;
PRI (" nonzeros in L (excluding diagonal): %.20g\n",
lnz) ;
PRI (" nonzeros in L (including diagonal): %.20g\n",
lnzd) ;
PRI (" # divide operations for LDL' or LU: %.20g\n",
ndiv) ;
PRI (" # multiply-subtract operations for LDL': %.20g\n",
nmultsubs_ldl) ;
PRI (" # multiply-subtract operations for LU: %.20g\n",
nmultsubs_lu) ;
PRI (" max nz. in any column of L (incl. diagonal): %.20g\n",
Info [AMD_DMAX]) ;
/* total flop counts for various factorizations */
if (n >= 0 && ndiv >= 0 && nmultsubs_ldl >= 0 && nmultsubs_lu >= 0)
{
PRINTF (("\n"
" chol flop count for real A, sqrt counted as 1 flop: %.20g\n"
" LDL' flop count for real A: %.20g\n"
" LDL' flop count for complex A: %.20g\n"
" LU flop count for real A (with no pivoting): %.20g\n"
" LU flop count for complex A (with no pivoting): %.20g\n\n",
n + ndiv + 2*nmultsubs_ldl,
ndiv + 2*nmultsubs_ldl,
9*ndiv + 8*nmultsubs_ldl,
ndiv + 2*nmultsubs_lu,
9*ndiv + 8*nmultsubs_lu)) ;
}
}

View File

@ -0,0 +1,350 @@
/* ========================================================================= */
/* === amd_internal.h ====================================================== */
/* ========================================================================= */
/* ------------------------------------------------------------------------- */
/* AMD, Copyright (c) Timothy A. Davis, */
/* Patrick R. Amestoy, and Iain S. Duff. See ../README.txt for License. */
/* email: davis at cise.ufl.edu CISE Department, Univ. of Florida. */
/* web: http://www.cise.ufl.edu/research/sparse/amd */
/* ------------------------------------------------------------------------- */
/* This file is for internal use in AMD itself, and does not normally need to
* be included in user code (it is included in UMFPACK, however). All others
* should use amd.h instead.
*
* The following compile-time definitions affect how AMD is compiled.
*
* -DNPRINT
*
* Disable all printing. stdio.h will not be included. Printing can
* be re-enabled at run-time by setting the global pointer amd_printf
* to printf (or mexPrintf for a MATLAB mexFunction).
*
* -DNMALLOC
*
* No memory manager is defined at compile-time. You MUST define the
* function pointers amd_malloc, amd_free, amd_realloc, and
* amd_calloc at run-time for AMD to work properly.
*/
/* ========================================================================= */
/* === NDEBUG ============================================================== */
/* ========================================================================= */
/*
* Turning on debugging takes some work (see below). If you do not edit this
* file, then debugging is always turned off, regardless of whether or not
* -DNDEBUG is specified in your compiler options.
*
* If AMD is being compiled as a mexFunction, then MATLAB_MEX_FILE is defined,
* and mxAssert is used instead of assert. If debugging is not enabled, no
* MATLAB include files or functions are used. Thus, the AMD library libamd.a
* can be safely used in either a stand-alone C program or in another
* mexFunction, without any change.
*/
/*
AMD will be exceedingly slow when running in debug mode. The next three
lines ensure that debugging is turned off.
*/
#ifndef NDEBUG
#define NDEBUG
#endif
/*
To enable debugging, uncomment the following line:
#undef NDEBUG
*/
/* ------------------------------------------------------------------------- */
/* ANSI include files */
/* ------------------------------------------------------------------------- */
/* from stdlib.h: size_t, malloc, free, realloc, and calloc */
#include <stdlib.h>
#if !defined(NPRINT) || !defined(NDEBUG)
/* from stdio.h: printf. Not included if NPRINT is defined at compile time.
* fopen and fscanf are used when debugging. */
#include <stdio.h>
#endif
/* from limits.h: INT_MAX and LONG_MAX */
#include <limits.h>
/* from math.h: sqrt */
#include <math.h>
/* ------------------------------------------------------------------------- */
/* MATLAB include files (only if being used in or via MATLAB) */
/* ------------------------------------------------------------------------- */
#ifdef MATLAB_MEX_FILE
#include "matrix.h"
#include "mex.h"
#endif
/* ------------------------------------------------------------------------- */
/* basic definitions */
/* ------------------------------------------------------------------------- */
#ifdef FLIP
#undef FLIP
#endif
#ifdef MAX
#undef MAX
#endif
#ifdef MIN
#undef MIN
#endif
#ifdef EMPTY
#undef EMPTY
#endif
#ifdef GLOBAL
#undef GLOBAL
#endif
#ifdef PRIVATE
#undef PRIVATE
#endif
/* FLIP is a "negation about -1", and is used to mark an integer i that is
* normally non-negative. FLIP (EMPTY) is EMPTY. FLIP of a number > EMPTY
* is negative, and FLIP of a number < EMTPY is positive. FLIP (FLIP (i)) = i
* for all integers i. UNFLIP (i) is >= EMPTY. */
#define EMPTY (-1)
#define FLIP(i) (-(i)-2)
#define UNFLIP(i) ((i < EMPTY) ? FLIP (i) : (i))
/* for integer MAX/MIN, or for doubles when we don't care how NaN's behave: */
#define MAX(a,b) (((a) > (b)) ? (a) : (b))
#define MIN(a,b) (((a) < (b)) ? (a) : (b))
/* logical expression of p implies q: */
#define IMPLIES(p,q) (!(p) || (q))
/* Note that the IBM RS 6000 xlc predefines TRUE and FALSE in <types.h>. */
/* The Compaq Alpha also predefines TRUE and FALSE. */
#ifdef TRUE
#undef TRUE
#endif
#ifdef FALSE
#undef FALSE
#endif
#define TRUE (1)
#define FALSE (0)
#define PRIVATE static
#define GLOBAL
#define EMPTY (-1)
/* Note that Linux's gcc 2.96 defines NULL as ((void *) 0), but other */
/* compilers (even gcc 2.95.2 on Solaris) define NULL as 0 or (0). We */
/* need to use the ANSI standard value of 0. */
#ifdef NULL
#undef NULL
#endif
#define NULL 0
/* largest value of size_t */
#ifndef SIZE_T_MAX
#define SIZE_T_MAX ((size_t) (-1))
#endif
/* ------------------------------------------------------------------------- */
/* integer type for AMD: int or UF_long */
/* ------------------------------------------------------------------------- */
/* define UF_long */
#include "UFconfig.h"
#if defined (DLONG) || defined (ZLONG)
#define Int UF_long
#define ID UF_long_id
#define Int_MAX UF_long_max
#define AMD_order amd_l_order
#define AMD_defaults amd_l_defaults
#define AMD_control amd_l_control
#define AMD_info amd_l_info
#define AMD_1 amd_l1
#define AMD_2 amd_l2
#define AMD_valid amd_l_valid
#define AMD_aat amd_l_aat
#define AMD_postorder amd_l_postorder
#define AMD_post_tree amd_l_post_tree
#define AMD_dump amd_l_dump
#define AMD_debug amd_l_debug
#define AMD_debug_init amd_l_debug_init
#define AMD_preprocess amd_l_preprocess
#else
#define Int int
#define ID "%d"
#define Int_MAX INT_MAX
#define AMD_order amd_order
#define AMD_defaults amd_defaults
#define AMD_control amd_control
#define AMD_info amd_info
#define AMD_1 amd_1
#define AMD_2 amd_2
#define AMD_valid amd_valid
#define AMD_aat amd_aat
#define AMD_postorder amd_postorder
#define AMD_post_tree amd_post_tree
#define AMD_dump amd_dump
#define AMD_debug amd_debug
#define AMD_debug_init amd_debug_init
#define AMD_preprocess amd_preprocess
#endif
/* ========================================================================= */
/* === PRINTF macro ======================================================== */
/* ========================================================================= */
/* All output goes through the PRINTF macro. */
#define PRINTF(params) { if (amd_printf != NULL) (void) amd_printf params ; }
/* ------------------------------------------------------------------------- */
/* AMD routine definitions (user-callable) */
/* ------------------------------------------------------------------------- */
#include "amd.h"
/* ------------------------------------------------------------------------- */
/* AMD routine definitions (not user-callable) */
/* ------------------------------------------------------------------------- */
GLOBAL size_t AMD_aat
(
Int n,
const Int Ap [ ],
const Int Ai [ ],
Int Len [ ],
Int Tp [ ],
double Info [ ]
) ;
GLOBAL void AMD_1
(
Int n,
const Int Ap [ ],
const Int Ai [ ],
Int P [ ],
Int Pinv [ ],
Int Len [ ],
Int slen,
Int S [ ],
double Control [ ],
double Info [ ]
) ;
GLOBAL void AMD_postorder
(
Int nn,
Int Parent [ ],
Int Npiv [ ],
Int Fsize [ ],
Int Order [ ],
Int Child [ ],
Int Sibling [ ],
Int Stack [ ]
) ;
GLOBAL Int AMD_post_tree
(
Int root,
Int k,
Int Child [ ],
const Int Sibling [ ],
Int Order [ ],
Int Stack [ ]
#ifndef NDEBUG
, Int nn
#endif
) ;
GLOBAL void AMD_preprocess
(
Int n,
const Int Ap [ ],
const Int Ai [ ],
Int Rp [ ],
Int Ri [ ],
Int W [ ],
Int Flag [ ]
) ;
/* ------------------------------------------------------------------------- */
/* debugging definitions */
/* ------------------------------------------------------------------------- */
#ifndef NDEBUG
/* from assert.h: assert macro */
#include <assert.h>
#ifndef EXTERN
#define EXTERN extern
#endif
EXTERN Int AMD_debug ;
GLOBAL void AMD_debug_init ( char *s ) ;
GLOBAL void AMD_dump
(
Int n,
Int Pe [ ],
Int Iw [ ],
Int Len [ ],
Int iwlen,
Int pfree,
Int Nv [ ],
Int Next [ ],
Int Last [ ],
Int Head [ ],
Int Elen [ ],
Int Degree [ ],
Int W [ ],
Int nel
) ;
#ifdef ASSERT
#undef ASSERT
#endif
/* Use mxAssert if AMD is compiled into a mexFunction */
#ifdef MATLAB_MEX_FILE
#define ASSERT(expression) (mxAssert ((expression), ""))
#else
#define ASSERT(expression) (assert (expression))
#endif
#define AMD_DEBUG0(params) { PRINTF (params) ; }
#define AMD_DEBUG1(params) { if (AMD_debug >= 1) PRINTF (params) ; }
#define AMD_DEBUG2(params) { if (AMD_debug >= 2) PRINTF (params) ; }
#define AMD_DEBUG3(params) { if (AMD_debug >= 3) PRINTF (params) ; }
#define AMD_DEBUG4(params) { if (AMD_debug >= 4) PRINTF (params) ; }
#else
/* no debugging */
#define ASSERT(expression)
#define AMD_DEBUG0(params)
#define AMD_DEBUG1(params)
#define AMD_DEBUG2(params)
#define AMD_DEBUG3(params)
#define AMD_DEBUG4(params)
#endif

200
src/maths/KLU/amd_order.c Normal file
View File

@ -0,0 +1,200 @@
/* ========================================================================= */
/* === AMD_order =========================================================== */
/* ========================================================================= */
/* ------------------------------------------------------------------------- */
/* AMD, Copyright (c) Timothy A. Davis, */
/* Patrick R. Amestoy, and Iain S. Duff. See ../README.txt for License. */
/* email: davis at cise.ufl.edu CISE Department, Univ. of Florida. */
/* web: http://www.cise.ufl.edu/research/sparse/amd */
/* ------------------------------------------------------------------------- */
/* User-callable AMD minimum degree ordering routine. See amd.h for
* documentation.
*/
#include "amd_internal.h"
/* ========================================================================= */
/* === AMD_order =========================================================== */
/* ========================================================================= */
GLOBAL Int AMD_order
(
Int n,
const Int Ap [ ],
const Int Ai [ ],
Int P [ ],
double Control [ ],
double Info [ ]
)
{
Int *Len, *S, nz, i, *Pinv, info, status, *Rp, *Ri, *Cp, *Ci, ok ;
size_t nzaat, slen ;
double mem = 0 ;
#ifndef NDEBUG
AMD_debug_init ("amd") ;
#endif
/* clear the Info array, if it exists */
info = Info != (double *) NULL ;
if (info)
{
for (i = 0 ; i < AMD_INFO ; i++)
{
Info [i] = EMPTY ;
}
Info [AMD_N] = n ;
Info [AMD_STATUS] = AMD_OK ;
}
/* make sure inputs exist and n is >= 0 */
if (Ai == (Int *) NULL || Ap == (Int *) NULL || P == (Int *) NULL || n < 0)
{
if (info) Info [AMD_STATUS] = AMD_INVALID ;
return (AMD_INVALID) ; /* arguments are invalid */
}
if (n == 0)
{
return (AMD_OK) ; /* n is 0 so there's nothing to do */
}
nz = Ap [n] ;
if (info)
{
Info [AMD_NZ] = nz ;
}
if (nz < 0)
{
if (info) Info [AMD_STATUS] = AMD_INVALID ;
return (AMD_INVALID) ;
}
/* check if n or nz will cause size_t overflow */
if (((size_t) n) >= SIZE_T_MAX / sizeof (Int)
|| ((size_t) nz) >= SIZE_T_MAX / sizeof (Int))
{
if (info) Info [AMD_STATUS] = AMD_OUT_OF_MEMORY ;
return (AMD_OUT_OF_MEMORY) ; /* problem too large */
}
/* check the input matrix: AMD_OK, AMD_INVALID, or AMD_OK_BUT_JUMBLED */
status = AMD_valid (n, n, Ap, Ai) ;
if (status == AMD_INVALID)
{
if (info) Info [AMD_STATUS] = AMD_INVALID ;
return (AMD_INVALID) ; /* matrix is invalid */
}
/* allocate two size-n integer workspaces */
Len = amd_malloc (n * sizeof (Int)) ;
Pinv = amd_malloc (n * sizeof (Int)) ;
mem += n ;
mem += n ;
if (!Len || !Pinv)
{
/* :: out of memory :: */
amd_free (Len) ;
amd_free (Pinv) ;
if (info) Info [AMD_STATUS] = AMD_OUT_OF_MEMORY ;
return (AMD_OUT_OF_MEMORY) ;
}
if (status == AMD_OK_BUT_JUMBLED)
{
/* sort the input matrix and remove duplicate entries */
AMD_DEBUG1 (("Matrix is jumbled\n")) ;
Rp = amd_malloc ((n+1) * sizeof (Int)) ;
Ri = amd_malloc (MAX (nz,1) * sizeof (Int)) ;
mem += (n+1) ;
mem += MAX (nz,1) ;
if (!Rp || !Ri)
{
/* :: out of memory :: */
amd_free (Rp) ;
amd_free (Ri) ;
amd_free (Len) ;
amd_free (Pinv) ;
if (info) Info [AMD_STATUS] = AMD_OUT_OF_MEMORY ;
return (AMD_OUT_OF_MEMORY) ;
}
/* use Len and Pinv as workspace to create R = A' */
AMD_preprocess (n, Ap, Ai, Rp, Ri, Len, Pinv) ;
Cp = Rp ;
Ci = Ri ;
}
else
{
/* order the input matrix as-is. No need to compute R = A' first */
Rp = NULL ;
Ri = NULL ;
Cp = (Int *) Ap ;
Ci = (Int *) Ai ;
}
/* --------------------------------------------------------------------- */
/* determine the symmetry and count off-diagonal nonzeros in A+A' */
/* --------------------------------------------------------------------- */
nzaat = AMD_aat (n, Cp, Ci, Len, P, Info) ;
AMD_DEBUG1 (("nzaat: %g\n", (double) nzaat)) ;
ASSERT ((MAX (nz-n, 0) <= nzaat) && (nzaat <= 2 * (size_t) nz)) ;
/* --------------------------------------------------------------------- */
/* allocate workspace for matrix, elbow room, and 6 size-n vectors */
/* --------------------------------------------------------------------- */
S = NULL ;
slen = nzaat ; /* space for matrix */
ok = ((slen + nzaat/5) >= slen) ; /* check for size_t overflow */
slen += nzaat/5 ; /* add elbow room */
for (i = 0 ; ok && i < 7 ; i++)
{
ok = ((slen + n) > slen) ; /* check for size_t overflow */
slen += n ; /* size-n elbow room, 6 size-n work */
}
mem += slen ;
ok = ok && (slen < SIZE_T_MAX / sizeof (Int)) ; /* check for overflow */
ok = ok && (slen < Int_MAX) ; /* S[i] for Int i must be OK */
if (ok)
{
S = amd_malloc (slen * sizeof (Int)) ;
}
AMD_DEBUG1 (("slen %g\n", (double) slen)) ;
if (!S)
{
/* :: out of memory :: (or problem too large) */
amd_free (Rp) ;
amd_free (Ri) ;
amd_free (Len) ;
amd_free (Pinv) ;
if (info) Info [AMD_STATUS] = AMD_OUT_OF_MEMORY ;
return (AMD_OUT_OF_MEMORY) ;
}
if (info)
{
/* memory usage, in bytes. */
Info [AMD_MEMORY] = mem * sizeof (Int) ;
}
/* --------------------------------------------------------------------- */
/* order the matrix */
/* --------------------------------------------------------------------- */
AMD_1 (n, Cp, Ci, P, Pinv, Len, slen, S, Control, Info) ;
/* --------------------------------------------------------------------- */
/* free the workspace */
/* --------------------------------------------------------------------- */
amd_free (Rp) ;
amd_free (Ri) ;
amd_free (Len) ;
amd_free (Pinv) ;
amd_free (S) ;
if (info) Info [AMD_STATUS] = status ;
return (status) ; /* successful ordering */
}

View File

@ -0,0 +1,121 @@
/* ========================================================================= */
/* === AMD_post_tree ======================================================= */
/* ========================================================================= */
/* ------------------------------------------------------------------------- */
/* AMD, Copyright (c) Timothy A. Davis, */
/* Patrick R. Amestoy, and Iain S. Duff. See ../README.txt for License. */
/* email: davis at cise.ufl.edu CISE Department, Univ. of Florida. */
/* web: http://www.cise.ufl.edu/research/sparse/amd */
/* ------------------------------------------------------------------------- */
/* Post-ordering of a supernodal elimination tree. */
#include "amd_internal.h"
GLOBAL Int AMD_post_tree
(
Int root, /* root of the tree */
Int k, /* start numbering at k */
Int Child [ ], /* input argument of size nn, undefined on
* output. Child [i] is the head of a link
* list of all nodes that are children of node
* i in the tree. */
const Int Sibling [ ], /* input argument of size nn, not modified.
* If f is a node in the link list of the
* children of node i, then Sibling [f] is the
* next child of node i.
*/
Int Order [ ], /* output order, of size nn. Order [i] = k
* if node i is the kth node of the reordered
* tree. */
Int Stack [ ] /* workspace of size nn */
#ifndef NDEBUG
, Int nn /* nodes are in the range 0..nn-1. */
#endif
)
{
Int f, head, h, i ;
#if 0
/* --------------------------------------------------------------------- */
/* recursive version (Stack [ ] is not used): */
/* --------------------------------------------------------------------- */
/* this is simple, but can caouse stack overflow if nn is large */
i = root ;
for (f = Child [i] ; f != EMPTY ; f = Sibling [f])
{
k = AMD_post_tree (f, k, Child, Sibling, Order, Stack, nn) ;
}
Order [i] = k++ ;
return (k) ;
#endif
/* --------------------------------------------------------------------- */
/* non-recursive version, using an explicit stack */
/* --------------------------------------------------------------------- */
/* push root on the stack */
head = 0 ;
Stack [0] = root ;
while (head >= 0)
{
/* get head of stack */
ASSERT (head < nn) ;
i = Stack [head] ;
AMD_DEBUG1 (("head of stack "ID" \n", i)) ;
ASSERT (i >= 0 && i < nn) ;
if (Child [i] != EMPTY)
{
/* the children of i are not yet ordered */
/* push each child onto the stack in reverse order */
/* so that small ones at the head of the list get popped first */
/* and the biggest one at the end of the list gets popped last */
for (f = Child [i] ; f != EMPTY ; f = Sibling [f])
{
head++ ;
ASSERT (head < nn) ;
ASSERT (f >= 0 && f < nn) ;
}
h = head ;
ASSERT (head < nn) ;
for (f = Child [i] ; f != EMPTY ; f = Sibling [f])
{
ASSERT (h > 0) ;
Stack [h--] = f ;
AMD_DEBUG1 (("push "ID" on stack\n", f)) ;
ASSERT (f >= 0 && f < nn) ;
}
ASSERT (Stack [h] == i) ;
/* delete child list so that i gets ordered next time we see it */
Child [i] = EMPTY ;
}
else
{
/* the children of i (if there were any) are already ordered */
/* remove i from the stack and order it. Front i is kth front */
head-- ;
AMD_DEBUG1 (("pop "ID" order "ID"\n", i, k)) ;
Order [i] = k++ ;
ASSERT (k <= nn) ;
}
#ifndef NDEBUG
AMD_DEBUG1 (("\nStack:")) ;
for (h = head ; h >= 0 ; h--)
{
Int j = Stack [h] ;
AMD_DEBUG1 ((" "ID, j)) ;
ASSERT (j >= 0 && j < nn) ;
}
AMD_DEBUG1 (("\n\n")) ;
ASSERT (head < nn) ;
#endif
}
return (k) ;
}

View File

@ -0,0 +1,207 @@
/* ========================================================================= */
/* === AMD_postorder ======================================================= */
/* ========================================================================= */
/* ------------------------------------------------------------------------- */
/* AMD, Copyright (c) Timothy A. Davis, */
/* Patrick R. Amestoy, and Iain S. Duff. See ../README.txt for License. */
/* email: davis at cise.ufl.edu CISE Department, Univ. of Florida. */
/* web: http://www.cise.ufl.edu/research/sparse/amd */
/* ------------------------------------------------------------------------- */
/* Perform a postordering (via depth-first search) of an assembly tree. */
#include "amd_internal.h"
GLOBAL void AMD_postorder
(
/* inputs, not modified on output: */
Int nn, /* nodes are in the range 0..nn-1 */
Int Parent [ ], /* Parent [j] is the parent of j, or EMPTY if root */
Int Nv [ ], /* Nv [j] > 0 number of pivots represented by node j,
* or zero if j is not a node. */
Int Fsize [ ], /* Fsize [j]: size of node j */
/* output, not defined on input: */
Int Order [ ], /* output post-order */
/* workspaces of size nn: */
Int Child [ ],
Int Sibling [ ],
Int Stack [ ]
)
{
Int i, j, k, parent, frsize, f, fprev, maxfrsize, bigfprev, bigf, fnext ;
for (j = 0 ; j < nn ; j++)
{
Child [j] = EMPTY ;
Sibling [j] = EMPTY ;
}
/* --------------------------------------------------------------------- */
/* place the children in link lists - bigger elements tend to be last */
/* --------------------------------------------------------------------- */
for (j = nn-1 ; j >= 0 ; j--)
{
if (Nv [j] > 0)
{
/* this is an element */
parent = Parent [j] ;
if (parent != EMPTY)
{
/* place the element in link list of the children its parent */
/* bigger elements will tend to be at the end of the list */
Sibling [j] = Child [parent] ;
Child [parent] = j ;
}
}
}
#ifndef NDEBUG
{
Int nels, ff, nchild ;
AMD_DEBUG1 (("\n\n================================ AMD_postorder:\n"));
nels = 0 ;
for (j = 0 ; j < nn ; j++)
{
if (Nv [j] > 0)
{
AMD_DEBUG1 (( ""ID" : nels "ID" npiv "ID" size "ID
" parent "ID" maxfr "ID"\n", j, nels,
Nv [j], Fsize [j], Parent [j], Fsize [j])) ;
/* this is an element */
/* dump the link list of children */
nchild = 0 ;
AMD_DEBUG1 ((" Children: ")) ;
for (ff = Child [j] ; ff != EMPTY ; ff = Sibling [ff])
{
AMD_DEBUG1 ((ID" ", ff)) ;
ASSERT (Parent [ff] == j) ;
nchild++ ;
ASSERT (nchild < nn) ;
}
AMD_DEBUG1 (("\n")) ;
parent = Parent [j] ;
if (parent != EMPTY)
{
ASSERT (Nv [parent] > 0) ;
}
nels++ ;
}
}
}
AMD_DEBUG1 (("\n\nGo through the children of each node, and put\n"
"the biggest child last in each list:\n")) ;
#endif
/* --------------------------------------------------------------------- */
/* place the largest child last in the list of children for each node */
/* --------------------------------------------------------------------- */
for (i = 0 ; i < nn ; i++)
{
if (Nv [i] > 0 && Child [i] != EMPTY)
{
#ifndef NDEBUG
Int nchild ;
AMD_DEBUG1 (("Before partial sort, element "ID"\n", i)) ;
nchild = 0 ;
for (f = Child [i] ; f != EMPTY ; f = Sibling [f])
{
ASSERT (f >= 0 && f < nn) ;
AMD_DEBUG1 ((" f: "ID" size: "ID"\n", f, Fsize [f])) ;
nchild++ ;
ASSERT (nchild <= nn) ;
}
#endif
/* find the biggest element in the child list */
fprev = EMPTY ;
maxfrsize = EMPTY ;
bigfprev = EMPTY ;
bigf = EMPTY ;
for (f = Child [i] ; f != EMPTY ; f = Sibling [f])
{
ASSERT (f >= 0 && f < nn) ;
frsize = Fsize [f] ;
if (frsize >= maxfrsize)
{
/* this is the biggest seen so far */
maxfrsize = frsize ;
bigfprev = fprev ;
bigf = f ;
}
fprev = f ;
}
ASSERT (bigf != EMPTY) ;
fnext = Sibling [bigf] ;
AMD_DEBUG1 (("bigf "ID" maxfrsize "ID" bigfprev "ID" fnext "ID
" fprev " ID"\n", bigf, maxfrsize, bigfprev, fnext, fprev)) ;
if (fnext != EMPTY)
{
/* if fnext is EMPTY then bigf is already at the end of list */
if (bigfprev == EMPTY)
{
/* delete bigf from the element of the list */
Child [i] = fnext ;
}
else
{
/* delete bigf from the middle of the list */
Sibling [bigfprev] = fnext ;
}
/* put bigf at the end of the list */
Sibling [bigf] = EMPTY ;
ASSERT (Child [i] != EMPTY) ;
ASSERT (fprev != bigf) ;
ASSERT (fprev != EMPTY) ;
Sibling [fprev] = bigf ;
}
#ifndef NDEBUG
AMD_DEBUG1 (("After partial sort, element "ID"\n", i)) ;
for (f = Child [i] ; f != EMPTY ; f = Sibling [f])
{
ASSERT (f >= 0 && f < nn) ;
AMD_DEBUG1 ((" "ID" "ID"\n", f, Fsize [f])) ;
ASSERT (Nv [f] > 0) ;
nchild-- ;
}
ASSERT (nchild == 0) ;
#endif
}
}
/* --------------------------------------------------------------------- */
/* postorder the assembly tree */
/* --------------------------------------------------------------------- */
for (i = 0 ; i < nn ; i++)
{
Order [i] = EMPTY ;
}
k = 0 ;
for (i = 0 ; i < nn ; i++)
{
if (Parent [i] == EMPTY && Nv [i] > 0)
{
AMD_DEBUG1 (("Root of assembly tree "ID"\n", i)) ;
k = AMD_post_tree (i, k, Child, Sibling, Order, Stack
#ifndef NDEBUG
, nn
#endif
) ;
}
}
}

View File

@ -0,0 +1,119 @@
/* ========================================================================= */
/* === AMD_preprocess ====================================================== */
/* ========================================================================= */
/* ------------------------------------------------------------------------- */
/* AMD, Copyright (c) Timothy A. Davis, */
/* Patrick R. Amestoy, and Iain S. Duff. See ../README.txt for License. */
/* email: davis at cise.ufl.edu CISE Department, Univ. of Florida. */
/* web: http://www.cise.ufl.edu/research/sparse/amd */
/* ------------------------------------------------------------------------- */
/* Sorts, removes duplicate entries, and transposes from the nonzero pattern of
* a column-form matrix A, to obtain the matrix R. The input matrix can have
* duplicate entries and/or unsorted columns (AMD_valid (n,Ap,Ai) must not be
* AMD_INVALID).
*
* This input condition is NOT checked. This routine is not user-callable.
*/
#include "amd_internal.h"
/* ========================================================================= */
/* === AMD_preprocess ====================================================== */
/* ========================================================================= */
/* AMD_preprocess does not check its input for errors or allocate workspace.
* On input, the condition (AMD_valid (n,n,Ap,Ai) != AMD_INVALID) must hold.
*/
GLOBAL void AMD_preprocess
(
Int n, /* input matrix: A is n-by-n */
const Int Ap [ ], /* size n+1 */
const Int Ai [ ], /* size nz = Ap [n] */
/* output matrix R: */
Int Rp [ ], /* size n+1 */
Int Ri [ ], /* size nz (or less, if duplicates present) */
Int W [ ], /* workspace of size n */
Int Flag [ ] /* workspace of size n */
)
{
/* --------------------------------------------------------------------- */
/* local variables */
/* --------------------------------------------------------------------- */
Int i, j, p, p2 ;
ASSERT (AMD_valid (n, n, Ap, Ai) != AMD_INVALID) ;
/* --------------------------------------------------------------------- */
/* count the entries in each row of A (excluding duplicates) */
/* --------------------------------------------------------------------- */
for (i = 0 ; i < n ; i++)
{
W [i] = 0 ; /* # of nonzeros in row i (excl duplicates) */
Flag [i] = EMPTY ; /* Flag [i] = j if i appears in column j */
}
for (j = 0 ; j < n ; j++)
{
p2 = Ap [j+1] ;
for (p = Ap [j] ; p < p2 ; p++)
{
i = Ai [p] ;
if (Flag [i] != j)
{
/* row index i has not yet appeared in column j */
W [i]++ ; /* one more entry in row i */
Flag [i] = j ; /* flag row index i as appearing in col j*/
}
}
}
/* --------------------------------------------------------------------- */
/* compute the row pointers for R */
/* --------------------------------------------------------------------- */
Rp [0] = 0 ;
for (i = 0 ; i < n ; i++)
{
Rp [i+1] = Rp [i] + W [i] ;
}
for (i = 0 ; i < n ; i++)
{
W [i] = Rp [i] ;
Flag [i] = EMPTY ;
}
/* --------------------------------------------------------------------- */
/* construct the row form matrix R */
/* --------------------------------------------------------------------- */
/* R = row form of pattern of A */
for (j = 0 ; j < n ; j++)
{
p2 = Ap [j+1] ;
for (p = Ap [j] ; p < p2 ; p++)
{
i = Ai [p] ;
if (Flag [i] != j)
{
/* row index i has not yet appeared in column j */
Ri [W [i]++] = j ; /* put col j in row i */
Flag [i] = j ; /* flag row index i as appearing in col j*/
}
}
}
#ifndef NDEBUG
ASSERT (AMD_valid (n, n, Rp, Ri) == AMD_OK) ;
for (j = 0 ; j < n ; j++)
{
ASSERT (W [j] == Rp [j+1]) ;
}
#endif
}

93
src/maths/KLU/amd_valid.c Normal file
View File

@ -0,0 +1,93 @@
/* ========================================================================= */
/* === AMD_valid =========================================================== */
/* ========================================================================= */
/* ------------------------------------------------------------------------- */
/* AMD, Copyright (c) Timothy A. Davis, */
/* Patrick R. Amestoy, and Iain S. Duff. See ../README.txt for License. */
/* email: davis at cise.ufl.edu CISE Department, Univ. of Florida. */
/* web: http://www.cise.ufl.edu/research/sparse/amd */
/* ------------------------------------------------------------------------- */
/* Check if a column-form matrix is valid or not. The matrix A is
* n_row-by-n_col. The row indices of entries in column j are in
* Ai [Ap [j] ... Ap [j+1]-1]. Required conditions are:
*
* n_row >= 0
* n_col >= 0
* nz = Ap [n_col] >= 0 number of entries in the matrix
* Ap [0] == 0
* Ap [j] <= Ap [j+1] for all j in the range 0 to n_col.
* Ai [0 ... nz-1] must be in the range 0 to n_row-1.
*
* If any of the above conditions hold, AMD_INVALID is returned. If the
* following condition holds, AMD_OK_BUT_JUMBLED is returned (a warning,
* not an error):
*
* row indices in Ai [Ap [j] ... Ap [j+1]-1] are not sorted in ascending
* order, and/or duplicate entries exist.
*
* Otherwise, AMD_OK is returned.
*
* In v1.2 and earlier, this function returned TRUE if the matrix was valid
* (now returns AMD_OK), or FALSE otherwise (now returns AMD_INVALID or
* AMD_OK_BUT_JUMBLED).
*/
#include "amd_internal.h"
GLOBAL Int AMD_valid
(
/* inputs, not modified on output: */
Int n_row, /* A is n_row-by-n_col */
Int n_col,
const Int Ap [ ], /* column pointers of A, of size n_col+1 */
const Int Ai [ ] /* row indices of A, of size nz = Ap [n_col] */
)
{
Int nz, j, p1, p2, ilast, i, p, result = AMD_OK ;
if (n_row < 0 || n_col < 0 || Ap == NULL || Ai == NULL)
{
return (AMD_INVALID) ;
}
nz = Ap [n_col] ;
if (Ap [0] != 0 || nz < 0)
{
/* column pointers must start at Ap [0] = 0, and Ap [n] must be >= 0 */
AMD_DEBUG0 (("column 0 pointer bad or nz < 0\n")) ;
return (AMD_INVALID) ;
}
for (j = 0 ; j < n_col ; j++)
{
p1 = Ap [j] ;
p2 = Ap [j+1] ;
AMD_DEBUG2 (("\nColumn: "ID" p1: "ID" p2: "ID"\n", j, p1, p2)) ;
if (p1 > p2)
{
/* column pointers must be ascending */
AMD_DEBUG0 (("column "ID" pointer bad\n", j)) ;
return (AMD_INVALID) ;
}
ilast = EMPTY ;
for (p = p1 ; p < p2 ; p++)
{
i = Ai [p] ;
AMD_DEBUG3 (("row: "ID"\n", i)) ;
if (i < 0 || i >= n_row)
{
/* row index out of range */
AMD_DEBUG0 (("index out of range, col "ID" row "ID"\n", j, i));
return (AMD_INVALID) ;
}
if (i <= ilast)
{
/* row index unsorted, or duplicate entry present */
AMD_DEBUG1 (("index unsorted/dupl col "ID" row "ID"\n", j, i));
result = AMD_OK_BUT_JUMBLED ;
}
ilast = i ;
}
}
return (result) ;
}

263
src/maths/KLU/btf.h Normal file
View File

@ -0,0 +1,263 @@
/* ========================================================================== */
/* === BTF package ========================================================== */
/* ========================================================================== */
/* BTF_MAXTRANS: find a column permutation Q to give A*Q a zero-free diagonal
* BTF_STRONGCOMP: find a symmetric permutation P to put P*A*P' into block
* upper triangular form.
* BTF_ORDER: do both of the above (btf_maxtrans then btf_strongcomp).
*
* Copyright (c) 2004-2007. Tim Davis, University of Florida,
* with support from Sandia National Laboratories. All Rights Reserved.
*/
/* ========================================================================== */
/* === BTF_MAXTRANS ========================================================= */
/* ========================================================================== */
/* BTF_MAXTRANS: finds a permutation of the columns of a matrix so that it has a
* zero-free diagonal. The input is an m-by-n sparse matrix in compressed
* column form. The array Ap of size n+1 gives the starting and ending
* positions of the columns in the array Ai. Ap[0] must be zero. The array Ai
* contains the row indices of the nonzeros of the matrix A, and is of size
* Ap[n]. The row indices of column j are located in Ai[Ap[j] ... Ap[j+1]-1].
* Row indices must be in the range 0 to m-1. Duplicate entries may be present
* in any given column. The input matrix is not checked for validity (row
* indices out of the range 0 to m-1 will lead to an undeterminate result -
* possibly a core dump, for example). Row indices in any given column need
* not be in sorted order. However, if they are sorted and the matrix already
* has a zero-free diagonal, then the identity permutation is returned.
*
* The output of btf_maxtrans is an array Match of size n. If row i is matched
* with column j, then A(i,j) is nonzero, and then Match[i] = j. If the matrix
* is structurally nonsingular, all entries in the Match array are unique, and
* Match can be viewed as a column permutation if A is square. That is, column
* k of the original matrix becomes column Match[k] of the permuted matrix. In
* MATLAB, this can be expressed as (for non-structurally singular matrices):
*
* Match = maxtrans (A) ;
* B = A (:, Match) ;
*
* except of course here the A matrix and Match vector are all 0-based (rows
* and columns in the range 0 to n-1), not 1-based (rows/cols in range 1 to n).
* The MATLAB dmperm routine returns a row permutation. See the maxtrans
* mexFunction for more details.
*
* If row i is not matched to any column, then Match[i] is == -1. The
* btf_maxtrans routine returns the number of nonzeros on diagonal of the
* permuted matrix.
*
* In the MATLAB mexFunction interface to btf_maxtrans, 1 is added to the Match
* array to obtain a 1-based permutation. Thus, in MATLAB where A is m-by-n:
*
* q = maxtrans (A) ; % has entries in the range 0:n
* q % a column permutation (only if sprank(A)==n)
* B = A (:, q) ; % permuted matrix (only if sprank(A)==n)
* sum (q > 0) ; % same as "sprank (A)"
*
* This behaviour differs from p = dmperm (A) in MATLAB, which returns the
* matching as p(j)=i if row i and column j are matched, and p(j)=0 if column j
* is unmatched.
*
* p = dmperm (A) ; % has entries in the range 0:m
* p % a row permutation (only if sprank(A)==m)
* B = A (p, :) ; % permuted matrix (only if sprank(A)==m)
* sum (p > 0) ; % definition of sprank (A)
*
* This algorithm is based on the paper "On Algorithms for obtaining a maximum
* transversal" by Iain Duff, ACM Trans. Mathematical Software, vol 7, no. 1,
* pp. 315-330, and "Algorithm 575: Permutations for a zero-free diagonal",
* same issue, pp. 387-390. Algorithm 575 is MC21A in the Harwell Subroutine
* Library. This code is not merely a translation of the Fortran code into C.
* It is a completely new implementation of the basic underlying method (depth
* first search over a subgraph with nodes corresponding to columns matched so
* far, and cheap matching). This code was written with minimal observation of
* the MC21A/B code itself. See comments below for a comparison between the
* maxtrans and MC21A/B codes.
*
* This routine operates on a column-form matrix and produces a column
* permutation. MC21A uses a row-form matrix and produces a row permutation.
* The difference is merely one of convention in the comments and interpretation
* of the inputs and outputs. If you want a row permutation, simply pass a
* compressed-row sparse matrix to this routine and you will get a row
* permutation (just like MC21A). Similarly, you can pass a column-oriented
* matrix to MC21A and it will happily return a column permutation.
*/
#ifndef _BTF_H
#define _BTF_H
/* make it easy for C++ programs to include BTF */
#ifdef __cplusplus
extern "C" {
#endif
#include "UFconfig.h"
int btf_maxtrans /* returns # of columns matched */
(
/* --- input, not modified: --- */
int nrow, /* A is nrow-by-ncol in compressed column form */
int ncol,
int Ap [ ], /* size ncol+1 */
int Ai [ ], /* size nz = Ap [ncol] */
double maxwork, /* maximum amount of work to do is maxwork*nnz(A); no limit
* if <= 0 */
/* --- output, not defined on input --- */
double *work, /* work = -1 if maxwork > 0 and the total work performed
* reached the maximum of maxwork*nnz(A).
* Otherwise, work = the total work performed. */
int Match [ ], /* size nrow. Match [i] = j if column j matched to row i
* (see above for the singular-matrix case) */
/* --- workspace, not defined on input or output --- */
int Work [ ] /* size 5*ncol */
) ;
/* long integer version (all "int" parameters become "UF_long") */
UF_long btf_l_maxtrans (UF_long, UF_long, UF_long *, UF_long *, double,
double *, UF_long *, UF_long *) ;
/* ========================================================================== */
/* === BTF_STRONGCOMP ======================================================= */
/* ========================================================================== */
/* BTF_STRONGCOMP finds the strongly connected components of a graph, returning
* a symmetric permutation. The matrix A must be square, and is provided on
* input in compressed-column form (see BTF_MAXTRANS, above). The diagonal of
* the input matrix A (or A*Q if Q is provided on input) is ignored.
*
* If Q is not NULL on input, then the strongly connected components of A*Q are
* found. Q may be flagged on input, where Q[k] < 0 denotes a flagged column k.
* The permutation is j = BTF_UNFLIP (Q [k]). On output, Q is modified (the
* flags are preserved) so that P*A*Q is in block upper triangular form.
*
* If Q is NULL, then the permutation P is returned so that P*A*P' is in upper
* block triangular form.
*
* The vector R gives the block boundaries, where block b is in rows/columns
* R[b] to R[b+1]-1 of the permuted matrix, and where b ranges from 1 to the
* number of strongly connected components found.
*/
int btf_strongcomp /* return # of strongly connected components */
(
/* input, not modified: */
int n, /* A is n-by-n in compressed column form */
int Ap [ ], /* size n+1 */
int Ai [ ], /* size nz = Ap [n] */
/* optional input, modified (if present) on output: */
int Q [ ], /* size n, input column permutation */
/* output, not defined on input */
int P [ ], /* size n. P [k] = j if row and column j are kth row/col
* in permuted matrix. */
int R [ ], /* size n+1. block b is in rows/cols R[b] ... R[b+1]-1 */
/* workspace, not defined on input or output */
int Work [ ] /* size 4n */
) ;
UF_long btf_l_strongcomp (UF_long, UF_long *, UF_long *, UF_long *, UF_long *,
UF_long *, UF_long *) ;
/* ========================================================================== */
/* === BTF_ORDER ============================================================ */
/* ========================================================================== */
/* BTF_ORDER permutes a square matrix into upper block triangular form. It
* does this by first finding a maximum matching (or perhaps a limited matching
* if the work is limited), via the btf_maxtrans function. If a complete
* matching is not found, BTF_ORDER completes the permutation, but flags the
* columns of P*A*Q to denote which columns are not matched. If the matrix is
* structurally rank deficient, some of the entries on the diagonal of the
* permuted matrix will be zero. BTF_ORDER then calls btf_strongcomp to find
* the strongly-connected components.
*
* On output, P and Q are the row and column permutations, where i = P[k] if
* row i of A is the kth row of P*A*Q, and j = BTF_UNFLIP(Q[k]) if column j of
* A is the kth column of P*A*Q. If Q[k] < 0, then the (k,k)th entry in P*A*Q
* is structurally zero.
*
* The vector R gives the block boundaries, where block b is in rows/columns
* R[b] to R[b+1]-1 of the permuted matrix, and where b ranges from 1 to the
* number of strongly connected components found.
*/
int btf_order /* returns number of blocks found */
(
/* --- input, not modified: --- */
int n, /* A is n-by-n in compressed column form */
int Ap [ ], /* size n+1 */
int Ai [ ], /* size nz = Ap [n] */
double maxwork, /* do at most maxwork*nnz(A) work in the maximum
* transversal; no limit if <= 0 */
/* --- output, not defined on input --- */
double *work, /* return value from btf_maxtrans */
int P [ ], /* size n, row permutation */
int Q [ ], /* size n, column permutation */
int R [ ], /* size n+1. block b is in rows/cols R[b] ... R[b+1]-1 */
int *nmatch, /* # nonzeros on diagonal of P*A*Q */
/* --- workspace, not defined on input or output --- */
int Work [ ] /* size 5n */
) ;
UF_long btf_l_order (UF_long, UF_long *, UF_long *, double , double *,
UF_long *, UF_long *, UF_long *, UF_long *, UF_long *) ;
/* ========================================================================== */
/* === BTF marking of singular columns ====================================== */
/* ========================================================================== */
/* BTF_FLIP is a "negation about -1", and is used to mark an integer j
* that is normally non-negative. BTF_FLIP (-1) is -1. BTF_FLIP of
* a number > -1 is negative, and BTF_FLIP of a number < -1 is positive.
* BTF_FLIP (BTF_FLIP (j)) = j for all integers j. UNFLIP (j) acts
* like an "absolute value" operation, and is always >= -1. You can test
* whether or not an integer j is "flipped" with the BTF_ISFLIPPED (j)
* macro.
*/
#define BTF_FLIP(j) (-(j)-2)
#define BTF_ISFLIPPED(j) ((j) < -1)
#define BTF_UNFLIP(j) ((BTF_ISFLIPPED (j)) ? BTF_FLIP (j) : (j))
/* ========================================================================== */
/* === BTF version ========================================================== */
/* ========================================================================== */
/* All versions of BTF include these definitions.
* As an example, to test if the version you are using is 1.2 or later:
*
* if (BTF_VERSION >= BTF_VERSION_CODE (1,2)) ...
*
* This also works during compile-time:
*
* #if (BTF >= BTF_VERSION_CODE (1,2))
* printf ("This is version 1.2 or later\n") ;
* #else
* printf ("This is an early version\n") ;
* #endif
*/
#define BTF_DATE "Dec 7, 2011"
#define BTF_VERSION_CODE(main,sub) ((main) * 1000 + (sub))
#define BTF_MAIN_VERSION 1
#define BTF_SUB_VERSION 1
#define BTF_SUBSUB_VERSION 3
#define BTF_VERSION BTF_VERSION_CODE(BTF_MAIN_VERSION,BTF_SUB_VERSION)
#ifdef __cplusplus
}
#endif
#endif

View File

@ -0,0 +1,64 @@
/* ========================================================================== */
/* === btf_internal include file ============================================ */
/* ========================================================================== */
#ifndef _BTF_INTERNAL_H
#define _BTF_INTERNAL_H
/*
* Copyright (c) 2004-2007. Tim Davis, University of Florida,
* with support from Sandia National Laboratories. All Rights Reserved.
*/
/* Not to be included in any user program. */
#ifdef DLONG
#define Int UF_long
#define Int_id UF_long_id
#define BTF(name) btf_l_ ## name
#else
#define Int int
#define Int_id "%d"
#define BTF(name) btf_ ## name
#endif
/* ========================================================================== */
/* make sure debugging and printing is turned off */
#ifndef NDEBUG
#define NDEBUG
#endif
#ifndef NPRINT
#define NPRINT
#endif
/* To enable debugging and assertions, uncomment this line:
#undef NDEBUG
*/
/* To enable diagnostic printing, uncomment this line:
#undef NPRINT
*/
/* ========================================================================== */
#include <stdio.h>
#include <assert.h>
#define ASSERT(a) assert(a)
#undef TRUE
#undef FALSE
#undef PRINTF
#undef MIN
#ifndef NPRINT
#define PRINTF(s) { printf s ; } ;
#else
#define PRINTF(s)
#endif
#define TRUE 1
#define FALSE 0
#define EMPTY (-1)
#define MIN(a,b) (((a) < (b)) ? (a) : (b))
#endif

View File

@ -0,0 +1,387 @@
/* ========================================================================== */
/* === BTF_MAXTRANS ========================================================= */
/* ========================================================================== */
/* Finds a column permutation that maximizes the number of entries on the
* diagonal of a sparse matrix. See btf.h for more information.
*
* This function is identical to cs_maxtrans in CSparse, with the following
* exceptions:
*
* (1) cs_maxtrans finds both jmatch and imatch, where jmatch [i] = j and
* imatch [j] = i if row i is matched to column j. This function returns
* just jmatch (the Match array). The MATLAB interface to cs_maxtrans
* (the single-output cs_dmperm) returns imatch, not jmatch to the MATLAB
* caller.
*
* (2) cs_maxtrans includes a pre-pass that counts the number of non-empty
* rows and columns (m2 and n2, respectively), and computes the matching
* using the transpose of A if m2 < n2. cs_maxtrans also returns quickly
* if the diagonal of the matrix is already zero-free. This pre-pass
* allows cs_maxtrans to be much faster than maxtrans, if the use of the
* transpose is warranted.
*
* However, for square structurally non-singular matrices with one or more
* zeros on the diagonal, the pre-pass is a waste of time, and for these
* matrices, maxtrans can be twice as fast as cs_maxtrans. Since the
* maxtrans function is intended primarily for square matrices that are
* typically structurally nonsingular, the pre-pass is not included here.
* If this maxtrans function is used on a matrix with many more columns
* than rows, consider passing the transpose to this function, or use
* cs_maxtrans instead.
*
* (3) cs_maxtrans can operate as a randomized algorithm, to help avoid
* rare cases of excessive run-time.
*
* (4) this maxtrans function includes an option that limits the total work
* performed. If this limit is reached, the maximum transveral might not
* be found.
*
* Thus, for general usage, cs_maxtrans is preferred. For square matrices that
* are typically structurally non-singular, maxtrans is preferred. A partial
* maxtrans can still be very useful when solving a sparse linear system.
*
* Copyright (c) 2004-2007. Tim Davis, University of Florida,
* with support from Sandia National Laboratories. All Rights Reserved.
*/
#include "btf.h"
#include "btf_internal.h"
/* ========================================================================== */
/* === augment ============================================================== */
/* ========================================================================== */
/* Perform a depth-first-search starting at column k, to find an augmenting
* path. An augmenting path is a sequence of row/column pairs (i1,k), (i2,j1),
* (i3,j2), ..., (i(s+1), js), such that all of the following properties hold:
*
* * column k is not matched to any row
* * entries in the path are nonzero
* * the pairs (i1,j1), (i2,j2), (i3,j3) ..., (is,js) have been
* previously matched to each other
* * (i(s+1), js) is nonzero, and row i(s+1) is not matched to any column
*
* Once this path is found, the matching can be changed to the set of pairs
* path. An augmenting path is a sequence of row/column pairs
*
* (i1,k), (i2,j1), (i3,j2), ..., (i(s+1), js)
*
* Once a row is matched with a column it remains matched with some column, but
* not necessarily the column it was first matched with.
*
* In the worst case, this function can examine every nonzero in A. Since it
* is called n times by maxtrans, the total time of maxtrans can be as high as
* O(n*nnz(A)). To limit this work, pass a value of maxwork > 0. Then at
* most O((maxwork+1)*nnz(A)) work will be performed; the maximum matching might
* not be found, however.
*
* This routine is very similar to the dfs routine in klu_kernel.c, in the
* KLU sparse LU factorization package. It is essentially identical to the
* cs_augment routine in CSparse, and its recursive version (augment function
* in cs_maxtransr_mex.c), except that this routine allows for the search to be
* terminated early if too much work is being performed.
*
* The algorithm is based on the paper "On Algorithms for obtaining a maximum
* transversal" by Iain Duff, ACM Trans. Mathematical Software, vol 7, no. 1,
* pp. 315-330, and "Algorithm 575: Permutations for a zero-free diagonal",
* same issue, pp. 387-390. The code here is a new implementation of that
* algorithm, with different data structures and control flow. After writing
* this code, I carefully compared my algorithm with MC21A/B (ACM Algorithm 575)
* Some of the comparisons are partial because I didn't dig deeply into all of
* the details of MC21A/B, such as how the stack is maintained. The following
* arguments are essentially identical between this code and MC21A:
*
* maxtrans MC21A,B
* -------- -------
* n N identical
* k JORD identical
* Ap IP column / row pointers
* Ai ICN row / column indices
* Ap[n] LICN length of index array (# of nonzeros in A)
* Match IPERM output column / row permutation
* nmatch NUMNZ # of nonzeros on diagonal of permuted matrix
* Flag CV mark a node as visited by the depth-first-search
*
* The following are different, but analogous:
*
* Cheap ARP indicates what part of the a column / row has
* already been matched.
*
* The following arguments are very different:
*
* - LENR # of entries in each row/column (unused in maxtrans)
* Pstack OUT Pstack keeps track of where we are in the depth-
* first-search scan of column j. I think that OUT
* plays a similar role in MC21B, but I'm unsure.
* Istack PR keeps track of the rows in the path. PR is a link
* list, though, whereas Istack is a stack. Maxtrans
* does not use any link lists.
* Jstack OUT? PR? the stack for nodes in the path (unsure)
*
* The following control structures are roughly comparable:
*
* maxtrans MC21B
* -------- -----
* for (k = 0 ; k < n ; k++) DO 100 JORD=1,N
* while (head >= 0) DO 70 K=1,JORD
* for (p = Cheap [j] ; ...) DO 20 II=IN1,IN2
* for (p = head ; ...) DO 90 K=1,JORD
*/
static Int augment
(
Int k, /* which stage of the main loop we're in */
Int Ap [ ], /* column pointers, size n+1 */
Int Ai [ ], /* row indices, size nz = Ap [n] */
Int Match [ ], /* size n, Match [i] = j if col j matched to i */
Int Cheap [ ], /* rows Ai [Ap [j] .. Cheap [j]-1] alread matched */
Int Flag [ ], /* Flag [j] = k if j already visited this stage */
Int Istack [ ], /* size n. Row index stack. */
Int Jstack [ ], /* size n. Column index stack. */
Int Pstack [ ], /* size n. Keeps track of position in adjacency list */
double *work, /* work performed by the depth-first-search */
double maxwork /* maximum work allowed */
)
{
/* local variables, but "global" to all DFS levels: */
Int found ; /* true if match found. */
Int head ; /* top of stack */
/* variables that are purely local to any one DFS level: */
Int j2 ; /* the next DFS goes to node j2 */
Int pend ; /* one past the end of the adjacency list for node j */
Int pstart ;
Int quick ;
/* variables that need to be pushed then popped from the stack: */
Int i ; /* the row tentatively matched to i if DFS successful */
Int j ; /* the DFS is at the current node j */
Int p ; /* current index into the adj. list for node j */
/* the variables i, j, and p are stacked in Istack, Jstack, and Pstack */
quick = (maxwork > 0) ;
/* start a DFS to find a match for column k */
found = FALSE ;
i = EMPTY ;
head = 0 ;
Jstack [0] = k ;
ASSERT (Flag [k] != k) ;
while (head >= 0)
{
j = Jstack [head] ;
pend = Ap [j+1] ;
if (Flag [j] != k) /* a node is not yet visited */
{
/* -------------------------------------------------------------- */
/* prework for node j */
/* -------------------------------------------------------------- */
/* first time that j has been visited */
Flag [j] = k ;
/* cheap assignment: find the next unmatched row in col j. This
* loop takes at most O(nnz(A)) time for the sum total of all
* calls to augment. */
for (p = Cheap [j] ; p < pend && !found ; p++)
{
i = Ai [p] ;
found = (Match [i] == EMPTY) ;
}
Cheap [j] = p ;
/* -------------------------------------------------------------- */
/* prepare for DFS */
if (found)
{
/* end of augmenting path, column j matched with row i */
Istack [head] = i ;
break ;
}
/* set Pstack [head] to the first entry in column j to scan */
Pstack [head] = Ap [j] ;
}
/* ------------------------------------------------------------------ */
/* quick return if too much work done */
/* ------------------------------------------------------------------ */
if (quick && *work > maxwork)
{
/* too much work has been performed; abort the search */
return (EMPTY) ;
}
/* ------------------------------------------------------------------ */
/* DFS for nodes adjacent to j */
/* ------------------------------------------------------------------ */
/* If cheap assignment not made, continue the depth-first search. All
* rows in column j are already matched. Add the adjacent nodes to the
* stack by iterating through until finding another non-visited node.
*
* It is the following loop that can force maxtrans to take
* O(n*nnz(A)) time. */
pstart = Pstack [head] ;
for (p = pstart ; p < pend ; p++)
{
i = Ai [p] ;
j2 = Match [i] ;
ASSERT (j2 != EMPTY) ;
if (Flag [j2] != k)
{
/* Node j2 is not yet visited, start a depth-first search on
* node j2. Keep track of where we left off in the scan of adj
* list of node j so we can restart j where we left off. */
Pstack [head] = p + 1 ;
/* Push j2 onto the stack and immediately break so we can
* recurse on node j2. Also keep track of row i which (if this
* search for an augmenting path works) will be matched with the
* current node j. */
Istack [head] = i ;
Jstack [++head] = j2 ;
break ;
}
}
/* ------------------------------------------------------------------ */
/* determine how much work was just performed */
/* ------------------------------------------------------------------ */
*work += (p - pstart + 1) ;
/* ------------------------------------------------------------------ */
/* node j is done, but the postwork is postponed - see below */
/* ------------------------------------------------------------------ */
if (p == pend)
{
/* If all adjacent nodes of j are already visited, pop j from
* stack and continue. We failed to find a match. */
head-- ;
}
}
/* postwork for all nodes j in the stack */
/* unwind the path and make the corresponding matches */
if (found)
{
for (p = head ; p >= 0 ; p--)
{
j = Jstack [p] ;
i = Istack [p] ;
/* -------------------------------------------------------------- */
/* postwork for node j */
/* -------------------------------------------------------------- */
/* if found, match row i with column j */
Match [i] = j ;
}
}
return (found) ;
}
/* ========================================================================== */
/* === maxtrans ============================================================= */
/* ========================================================================== */
Int BTF(maxtrans) /* returns # of columns in the matching */
(
/* --- input --- */
Int nrow, /* A is nrow-by-ncol in compressed column form */
Int ncol,
Int Ap [ ], /* size ncol+1 */
Int Ai [ ], /* size nz = Ap [ncol] */
double maxwork, /* do at most maxwork*nnz(A) work; no limit if <= 0. This
* work limit excludes the O(nnz(A)) cheap-match phase. */
/* --- output --- */
double *work, /* work = -1 if maxwork > 0 and the total work performed
* reached the maximum of maxwork*nnz(A)).
* Otherwise, work = the total work performed. */
Int Match [ ], /* size nrow. Match [i] = j if column j matched to row i */
/* --- workspace --- */
Int Work [ ] /* size 5*ncol */
)
{
Int *Cheap, *Flag, *Istack, *Jstack, *Pstack ;
Int i, j, k, nmatch, work_limit_reached, result ;
/* ---------------------------------------------------------------------- */
/* get workspace and initialize */
/* ---------------------------------------------------------------------- */
Cheap = Work ; Work += ncol ;
Flag = Work ; Work += ncol ;
/* stack for non-recursive depth-first search in augment function */
Istack = Work ; Work += ncol ;
Jstack = Work ; Work += ncol ;
Pstack = Work ;
/* in column j, rows Ai [Ap [j] .. Cheap [j]-1] are known to be matched */
for (j = 0 ; j < ncol ; j++)
{
Cheap [j] = Ap [j] ;
Flag [j] = EMPTY ;
}
/* all rows and columns are currently unmatched */
for (i = 0 ; i < nrow ; i++)
{
Match [i] = EMPTY ;
}
if (maxwork > 0)
{
maxwork *= Ap [ncol] ;
}
*work = 0 ;
/* ---------------------------------------------------------------------- */
/* find a matching row for each column k */
/* ---------------------------------------------------------------------- */
nmatch = 0 ;
work_limit_reached = FALSE ;
for (k = 0 ; k < ncol ; k++)
{
/* find an augmenting path to match some row i to column k */
result = augment (k, Ap, Ai, Match, Cheap, Flag, Istack, Jstack, Pstack,
work, maxwork) ;
if (result == TRUE)
{
/* we found it. Match [i] = k for some row i has been done. */
nmatch++ ;
}
else if (result == EMPTY)
{
/* augment gave up because of too much work, and no match found */
work_limit_reached = TRUE ;
}
}
/* ---------------------------------------------------------------------- */
/* return the Match, and the # of matches made */
/* ---------------------------------------------------------------------- */
/* At this point, row i is matched to j = Match [i] if j >= 0. i is an
* unmatched row if Match [i] == EMPTY. */
if (work_limit_reached)
{
/* return -1 if the work limit of maxwork*nnz(A) was reached */
*work = EMPTY ;
}
return (nmatch) ;
}

132
src/maths/KLU/btf_order.c Normal file
View File

@ -0,0 +1,132 @@
/* ========================================================================== */
/* === BTF_ORDER ============================================================ */
/* ========================================================================== */
/* Find a permutation P and Q to permute a square sparse matrix into upper block
* triangular form. A(P,Q) will contain a zero-free diagonal if A has
* structural full-rank. Otherwise, the number of nonzeros on the diagonal of
* A(P,Q) will be maximized, and will equal the structural rank of A.
*
* Q[k] will be "flipped" if a zero-free diagonal was not found. Q[k] will be
* negative, and j = BTF_UNFLIP (Q [k]) gives the corresponding permutation.
*
* R defines the block boundaries of A(P,Q). The kth block consists of rows
* and columns R[k] to R[k+1]-1.
*
* If maxwork > 0 on input, then the work performed in btf_maxtrans is limited
* to maxwork*nnz(A) (excluding the "cheap match" phase, which can take another
* nnz(A) work). On output, the work parameter gives the actual work performed,
* or -1 if the limit was reached. In the latter case, the diagonal of A(P,Q)
* might not be zero-free, and the number of nonzeros on the diagonal of A(P,Q)
* might not be equal to the structural rank.
*
* See btf.h for more details.
*
* Copyright (c) 2004-2007. Tim Davis, University of Florida,
* with support from Sandia National Laboratories. All Rights Reserved.
*/
#include "btf.h"
#include "btf_internal.h"
/* This function only operates on square matrices (either structurally full-
* rank, or structurally rank deficient). */
Int BTF(order) /* returns number of blocks found */
(
/* input, not modified: */
Int n, /* A is n-by-n in compressed column form */
Int Ap [ ], /* size n+1 */
Int Ai [ ], /* size nz = Ap [n] */
double maxwork, /* do at most maxwork*nnz(A) work in the maximum
* transversal; no limit if <= 0 */
/* output, not defined on input */
double *work, /* work performed in maxtrans, or -1 if limit reached */
Int P [ ], /* size n, row permutation */
Int Q [ ], /* size n, column permutation */
Int R [ ], /* size n+1. block b is in rows/cols R[b] ... R[b+1]-1 */
Int *nmatch, /* # nonzeros on diagonal of P*A*Q */
/* workspace, not defined on input or output */
Int Work [ ] /* size 5n */
)
{
Int *Flag ;
Int nblocks, i, j, nbadcol ;
/* ---------------------------------------------------------------------- */
/* compute the maximum matching */
/* ---------------------------------------------------------------------- */
/* if maxwork > 0, then a maximum matching might not be found */
*nmatch = BTF(maxtrans) (n, n, Ap, Ai, maxwork, work, Q, Work) ;
/* ---------------------------------------------------------------------- */
/* complete permutation if the matrix is structurally singular */
/* ---------------------------------------------------------------------- */
/* Since the matrix is square, ensure BTF_UNFLIP(Q[0..n-1]) is a
* permutation of the columns of A so that A has as many nonzeros on the
* diagonal as possible.
*/
if (*nmatch < n)
{
/* get a size-n work array */
Flag = Work + n ;
for (j = 0 ; j < n ; j++)
{
Flag [j] = 0 ;
}
/* flag all matched columns */
for (i = 0 ; i < n ; i++)
{
j = Q [i] ;
if (j != EMPTY)
{
/* row i and column j are matched to each other */
Flag [j] = 1 ;
}
}
/* make a list of all unmatched columns, in Work [0..nbadcol-1] */
nbadcol = 0 ;
for (j = n-1 ; j >= 0 ; j--)
{
if (!Flag [j])
{
/* j is matched to nobody */
Work [nbadcol++] = j ;
}
}
ASSERT (*nmatch + nbadcol == n) ;
/* make an assignment for each unmatched row */
for (i = 0 ; i < n ; i++)
{
if (Q [i] == EMPTY && nbadcol > 0)
{
/* get an unmatched column j */
j = Work [--nbadcol] ;
/* assign j to row i and flag the entry by "flipping" it */
Q [i] = BTF_FLIP (j) ;
}
}
}
/* The permutation of a square matrix can be recovered as follows: Row i is
* matched with column j, where j = BTF_UNFLIP (Q [i]) and where j
* will always be in the valid range 0 to n-1. The entry A(i,j) is zero
* if BTF_ISFLIPPED (Q [i]) is true, and nonzero otherwise. nmatch
* is the number of entries in the Q array that are non-negative. */
/* ---------------------------------------------------------------------- */
/* find the strongly connected components */
/* ---------------------------------------------------------------------- */
nblocks = BTF(strongcomp) (n, Ap, Ai, Q, P, R, Work) ;
return (nblocks) ;
}

View File

@ -0,0 +1,593 @@
/* ========================================================================== */
/* === BTF_STRONGCOMP ======================================================= */
/* ========================================================================== */
/* Finds the strongly connected components of a graph, or equivalently, permutes
* the matrix into upper block triangular form. See btf.h for more details.
* Input matrix and Q are not checked on input.
*
* Copyright (c) 2004-2007. Tim Davis, University of Florida,
* with support from Sandia National Laboratories. All Rights Reserved.
*/
#include "btf.h"
#include "btf_internal.h"
#define UNVISITED (-2) /* Flag [j] = UNVISITED if node j not visited yet */
#define UNASSIGNED (-1) /* Flag [j] = UNASSIGNED if node j has been visited,
* but not yet assigned to a strongly-connected
* component (aka block). Flag [j] = k (k in the
* range 0 to nblocks-1) if node j has been visited
* (and completed, with its postwork done) and
* assigned to component k. */
/* This file contains two versions of the depth-first-search, a recursive one
* and a non-recursive one. By default, the non-recursive one is used. */
#ifndef RECURSIVE
/* ========================================================================== */
/* === dfs: non-recursive version (default) ================================= */
/* ========================================================================== */
/* Perform a depth-first-search of a graph, stored in an adjacency-list form.
* The row indices of column j (equivalently, the out-adjacency list of node j)
* are stored in Ai [Ap[j] ... Ap[j+1]-1]. Self-edge (diagonal entries) are
* ignored. Ap[0] must be zero, and thus nz = Ap[n] is the number of entries
* in the matrix (or edges in the graph). The row indices in each column need
* not be in any particular order. If an input column permutation is given,
* node j (in the permuted matrix A*Q) is located in
* Ai [Ap[Q[j]] ... Ap[Q[j]+1]-1]. This Q can be the same as the Match array
* output from the maxtrans routine, for a square matrix that is structurally
* full rank.
*
* The algorithm is from the paper by Robert E. Tarjan, "Depth-first search and
* linear graph algorithms," SIAM Journal on Computing, vol. 1, no. 2,
* pp. 146-160, 1972. The time taken by strongcomp is O(nnz(A)).
*
* See also MC13A/B in the Harwell subroutine library (Iain S. Duff and John
* K. Reid, "Algorithm 529: permutations to block triangular form," ACM Trans.
* on Mathematical Software, vol. 4, no. 2, pp. 189-192, 1978, and "An
* implementation of Tarjan's algorithm for the block triangular form of a
* matrix," same journal, pp. 137-147. This code is implements the same
* algorithm as MC13A/B, except that the data structures are very different.
* Also, unlike MC13A/B, the output permutation preserves the natural ordering
* within each block.
*/
static void dfs
(
/* inputs, not modified on output: */
Int j, /* start the DFS at node j */
Int Ap [ ], /* size n+1, column pointers for the matrix A */
Int Ai [ ], /* row indices, size nz = Ap [n] */
Int Q [ ], /* input column permutation */
/* inputs, modified on output (each array is of size n): */
Int Time [ ], /* Time [j] = "time" that node j was first visited */
Int Flag [ ], /* Flag [j]: see above */
Int Low [ ], /* Low [j]: see definition below */
Int *p_nblocks, /* number of blocks (aka strongly-connected-comp.)*/
Int *p_timestamp, /* current "time" */
/* workspace, not defined on input or output: */
Int Cstack [ ], /* size n, output stack to hold nodes of components */
Int Jstack [ ], /* size n, stack for the variable j */
Int Pstack [ ] /* size n, stack for the variable p */
)
{
/* ---------------------------------------------------------------------- */
/* local variables, and initializations */
/* ---------------------------------------------------------------------- */
/* local variables, but "global" to all DFS levels: */
Int chead ; /* top of Cstack */
Int jhead ; /* top of Jstack and Pstack */
/* variables that are purely local to any one DFS level: */
Int i ; /* edge (j,i) considered; i can be next node to traverse */
Int parent ; /* parent of node j in the DFS tree */
Int pend ; /* one past the end of the adjacency list for node j */
Int jj ; /* column j of A*Q is column jj of the input matrix A */
/* variables that need to be pushed then popped from the stack: */
Int p ; /* current index into the adj. list for node j */
/* the variables j and p are stacked in Jstack and Pstack */
/* local copies of variables in the calling routine */
Int nblocks = *p_nblocks ;
Int timestamp = *p_timestamp ;
/* ---------------------------------------------------------------------- */
/* start a DFS at node j (same as the recursive call dfs (EMPTY, j)) */
/* ---------------------------------------------------------------------- */
chead = 0 ; /* component stack is empty */
jhead = 0 ; /* Jstack and Pstack are empty */
Jstack [0] = j ; /* put the first node j on the Jstack */
ASSERT (Flag [j] == UNVISITED) ;
while (jhead >= 0)
{
j = Jstack [jhead] ; /* grab the node j from the top of Jstack */
/* determine which column jj of the A is column j of A*Q */
jj = (Q == (Int *) NULL) ? (j) : (BTF_UNFLIP (Q [j])) ;
pend = Ap [jj+1] ; /* j's row index list ends at Ai [pend-1] */
if (Flag [j] == UNVISITED)
{
/* -------------------------------------------------------------- */
/* prework at node j */
/* -------------------------------------------------------------- */
/* node j is being visited for the first time */
Cstack [++chead] = j ; /* push j onto the stack */
timestamp++ ; /* get a timestamp */
Time [j] = timestamp ; /* give the timestamp to node j */
Low [j] = timestamp ;
Flag [j] = UNASSIGNED ; /* flag node j as visited */
/* -------------------------------------------------------------- */
/* set Pstack [jhead] to the first entry in column j to scan */
/* -------------------------------------------------------------- */
Pstack [jhead] = Ap [jj] ;
}
/* ------------------------------------------------------------------ */
/* DFS rooted at node j (start it, or continue where left off) */
/* ------------------------------------------------------------------ */
for (p = Pstack [jhead] ; p < pend ; p++)
{
i = Ai [p] ; /* examine the edge from node j to node i */
if (Flag [i] == UNVISITED)
{
/* Node i has not been visited - start a DFS at node i.
* Keep track of where we left off in the scan of adjacency list
* of node j so we can restart j where we left off. */
Pstack [jhead] = p + 1 ;
/* Push i onto the stack and immediately break
* so we can recurse on node i. */
Jstack [++jhead] = i ;
ASSERT (Time [i] == EMPTY) ;
ASSERT (Low [i] == EMPTY) ;
/* break here to do what the recursive call dfs (j,i) does */
break ;
}
else if (Flag [i] == UNASSIGNED)
{
/* Node i has been visited, but still unassigned to a block
* this is a back or cross edge if Time [i] < Time [j].
* Note that i might equal j, in which case this code does
* nothing. */
ASSERT (Time [i] > 0) ;
ASSERT (Low [i] > 0) ;
Low [j] = MIN (Low [j], Time [i]) ;
}
}
if (p == pend)
{
/* If all adjacent nodes of j are already visited, pop j from
* Jstack and do the post work for node j. This also pops p
* from the Pstack. */
jhead-- ;
/* -------------------------------------------------------------- */
/* postwork at node j */
/* -------------------------------------------------------------- */
/* determine if node j is the head of a component */
if (Low [j] == Time [j])
{
/* pop all nodes in this SCC from Cstack */
while (TRUE)
{
ASSERT (chead >= 0) ; /* stack not empty (j in it) */
i = Cstack [chead--] ; /* pop a node from the Cstack */
ASSERT (i >= 0) ;
ASSERT (Flag [i] == UNASSIGNED) ;
Flag [i] = nblocks ; /* assign i to current block */
if (i == j) break ; /* current block ends at j */
}
nblocks++ ; /* one more block has been found */
}
/* update Low [parent], if the parent exists */
if (jhead >= 0)
{
parent = Jstack [jhead] ;
Low [parent] = MIN (Low [parent], Low [j]) ;
}
}
}
/* ---------------------------------------------------------------------- */
/* cleanup: update timestamp and nblocks */
/* ---------------------------------------------------------------------- */
*p_timestamp = timestamp ;
*p_nblocks = nblocks ;
}
#else
/* ========================================================================== */
/* === dfs: recursive version (only for illustration) ======================= */
/* ========================================================================== */
/* The following is a recursive version of dfs, which computes identical results
* as the non-recursive dfs. It is included here because it is easier to read.
* Compare the comments in the code below with the identical comments in the
* non-recursive code above, and that will help you see the correlation between
* the two routines.
*
* This routine can cause stack overflow, and is thus not recommended for heavy
* usage, particularly for large matrices. To help in delaying stack overflow,
* global variables are used, reducing the amount of information each call to
* dfs places on the call/return stack (the integers i, j, p, parent, and the
* return address). Note that this means the recursive code is not thread-safe.
* To try this version, compile the code with -DRECURSIVE or include the
* following line at the top of this file:
#define RECURSIVE
*/
static Int /* for recursive illustration only, not for production use */
chead, timestamp, nblocks, n, *Ap, *Ai, *Flag, *Cstack, *Time, *Low,
*P, *R, *Q ;
static void dfs
(
Int parent, /* came from parent node */
Int j /* at node j in the DFS */
)
{
Int p ; /* current index into the adj. list for node j */
Int i ; /* edge (j,i) considered; i can be next node to traverse */
Int jj ; /* column j of A*Q is column jj of the input matrix A */
/* ---------------------------------------------------------------------- */
/* prework at node j */
/* ---------------------------------------------------------------------- */
/* node j is being visited for the first time */
Cstack [++chead] = j ; /* push j onto the stack */
timestamp++ ; /* get a timestamp */
Time [j] = timestamp ; /* give the timestamp to node j */
Low [j] = timestamp ;
Flag [j] = UNASSIGNED ; /* flag node j as visited */
/* ---------------------------------------------------------------------- */
/* DFS rooted at node j */
/* ---------------------------------------------------------------------- */
/* determine which column jj of the A is column j of A*Q */
jj = (Q == (Int *) NULL) ? (j) : (BTF_UNFLIP (Q [j])) ;
for (p = Ap [jj] ; p < Ap [jj+1] ; p++)
{
i = Ai [p] ; /* examine the edge from node j to node i */
if (Flag [i] == UNVISITED)
{
/* Node i has not been visited - start a DFS at node i. */
dfs (j, i) ;
}
else if (Flag [i] == UNASSIGNED)
{
/* Node i has been visited, but still unassigned to a block
* this is a back or cross edge if Time [i] < Time [j].
* Note that i might equal j, in which case this code does
* nothing. */
Low [j] = MIN (Low [j], Time [i]) ;
}
}
/* ---------------------------------------------------------------------- */
/* postwork at node j */
/* ---------------------------------------------------------------------- */
/* determine if node j is the head of a component */
if (Low [j] == Time [j])
{
/* pop all nodes in this strongly connected component from Cstack */
while (TRUE)
{
i = Cstack [chead--] ; /* pop a node from the Cstack */
Flag [i] = nblocks ; /* assign node i to current block */
if (i == j) break ; /* current block ends at node j */
}
nblocks++ ; /* one more block has been found */
}
/* update Low [parent] */
if (parent != EMPTY)
{
/* Note that this could be done with Low[j] = MIN(Low[j],Low[i]) just
* after the dfs (j,i) statement above, and then parent would not have
* to be an input argument. Putting it here places all the postwork
* for node j in one place, thus making the non-recursive DFS easier. */
Low [parent] = MIN (Low [parent], Low [j]) ;
}
}
#endif
/* ========================================================================== */
/* === btf_strongcomp ======================================================= */
/* ========================================================================== */
#ifndef RECURSIVE
Int BTF(strongcomp) /* return # of strongly connected components */
(
/* input, not modified: */
Int n, /* A is n-by-n in compressed column form */
Int Ap [ ], /* size n+1 */
Int Ai [ ], /* size nz = Ap [n] */
/* optional input, modified (if present) on output: */
Int Q [ ], /* size n, input column permutation. The permutation Q can
* include a flag which indicates an unmatched row.
* jold = BTF_UNFLIP (Q [jnew]) is the permutation;
* this function ingnores these flags. On output, it is
* modified according to the permutation P. */
/* output, not defined on input: */
Int P [ ], /* size n. P [k] = j if row and column j are kth row/col
* in permuted matrix. */
Int R [ ], /* size n+1. kth block is in rows/cols R[k] ... R[k+1]-1
* of the permuted matrix. */
/* workspace, not defined on input or output: */
Int Work [ ] /* size 4n */
)
#else
Int BTF(strongcomp) /* recursive version - same as above except for Work size */
(
Int n_in,
Int Ap_in [ ],
Int Ai_in [ ],
Int Q_in [ ],
Int P_in [ ],
Int R_in [ ],
Int Work [ ] /* size 2n */
)
#endif
{
Int j, k, b ;
#ifndef RECURSIVE
Int timestamp, nblocks, *Flag, *Cstack, *Time, *Low, *Jstack, *Pstack ;
#else
n = n_in ;
Ap = Ap_in ;
Ai = Ai_in ;
Q = Q_in ;
P = P_in ;
R = R_in ;
chead = EMPTY ;
#endif
/* ---------------------------------------------------------------------- */
/* get and initialize workspace */
/* ---------------------------------------------------------------------- */
/* timestamp is incremented each time a new node is visited.
*
* Time [j] is the timestamp given to node j.
*
* Low [j] is the lowest timestamp of any node reachable from j via either
* a path to any descendent of j in the DFS tree, or via a single edge to
* an either an ancestor (a back edge) or another node that's neither an
* ancestor nor a descendant (a cross edge). If Low [j] is equal to
* the timestamp of node j (Time [j]), then node j is the "head" of a
* strongly connected component (SCC). That is, it is the first node
* visited in its strongly connected component, and the DFS subtree rooted
* at node j spans all the nodes of the strongly connected component.
*
* The term "block" and "component" are used interchangebly in this code;
* "block" being a matrix term and "component" being a graph term for the
* same thing.
*
* When a node is visited, it is placed on the Cstack (for "component"
* stack). When node j is found to be an SCC head, all the nodes from the
* top of the stack to node j itself form the nodes in the SCC. This Cstack
* is used for both the recursive and non-recursive versions.
*/
Time = Work ; Work += n ;
Flag = Work ; Work += n ;
Low = P ; /* use output array P as workspace for Low */
Cstack = R ; /* use output array R as workspace for Cstack */
#ifndef RECURSIVE
/* stack for non-recursive dfs */
Jstack = Work ; Work += n ; /* stack for j */
Pstack = Work ; /* stack for p */
#endif
for (j = 0 ; j < n ; j++)
{
Flag [j] = UNVISITED ;
Low [j] = EMPTY ;
Time [j] = EMPTY ;
#ifndef NDEBUG
Cstack [j] = EMPTY ;
#ifndef RECURSIVE
Jstack [j] = EMPTY ;
Pstack [j] = EMPTY ;
#endif
#endif
}
timestamp = 0 ; /* each node given a timestamp when it is visited */
nblocks = 0 ; /* number of blocks found so far */
/* ---------------------------------------------------------------------- */
/* find the connected components via a depth-first-search */
/* ---------------------------------------------------------------------- */
for (j = 0 ; j < n ; j++)
{
/* node j is unvisited or assigned to a block. Cstack is empty. */
ASSERT (Flag [j] == UNVISITED || (Flag [j] >= 0 && Flag [j] < nblocks));
if (Flag [j] == UNVISITED)
{
#ifndef RECURSIVE
/* non-recursive dfs (default) */
dfs (j, Ap, Ai, Q, Time, Flag, Low, &nblocks, &timestamp,
Cstack, Jstack, Pstack) ;
#else
/* recursive dfs (for illustration only) */
ASSERT (chead == EMPTY) ;
dfs (EMPTY, j) ;
ASSERT (chead == EMPTY) ;
#endif
}
}
ASSERT (timestamp == n) ;
/* ---------------------------------------------------------------------- */
/* construct the block boundary array, R */
/* ---------------------------------------------------------------------- */
for (b = 0 ; b < nblocks ; b++)
{
R [b] = 0 ;
}
for (j = 0 ; j < n ; j++)
{
/* node j has been assigned to block b = Flag [j] */
ASSERT (Time [j] > 0 && Time [j] <= n) ;
ASSERT (Low [j] > 0 && Low [j] <= n) ;
ASSERT (Flag [j] >= 0 && Flag [j] < nblocks) ;
R [Flag [j]]++ ;
}
/* R [b] is now the number of nodes in block b. Compute cumulative sum
* of R, using Time [0 ... nblocks-1] as workspace. */
Time [0] = 0 ;
for (b = 1 ; b < nblocks ; b++)
{
Time [b] = Time [b-1] + R [b-1] ;
}
for (b = 0 ; b < nblocks ; b++)
{
R [b] = Time [b] ;
}
R [nblocks] = n ;
/* ---------------------------------------------------------------------- */
/* construct the permutation, preserving the natural order */
/* ---------------------------------------------------------------------- */
#ifndef NDEBUG
for (k = 0 ; k < n ; k++)
{
P [k] = EMPTY ;
}
#endif
for (j = 0 ; j < n ; j++)
{
/* place column j in the permutation */
P [Time [Flag [j]]++] = j ;
}
#ifndef NDEBUG
for (k = 0 ; k < n ; k++)
{
ASSERT (P [k] != EMPTY) ;
}
#endif
/* Now block b consists of the nodes k1 to k2-1 in the permuted matrix,
* where k1 = R [b] and k2 = R [b+1]. Row and column j of the original
* matrix becomes row and column P [k] of the permuted matrix. The set of
* of rows/columns (nodes) in block b is given by P [k1 ... k2-1], and this
* set is sorted in ascending order. Thus, if the matrix consists of just
* one block, P is the identity permutation. */
/* ---------------------------------------------------------------------- */
/* if Q is present on input, set Q = Q*P' */
/* ---------------------------------------------------------------------- */
if (Q != (Int *) NULL)
{
/* We found a symmetric permutation P for the matrix A*Q. The overall
* permutation is thus P*(A*Q)*P'. Set Q=Q*P' so that the final
* permutation is P*A*Q. Use Time as workspace. Note that this
* preserves the negative values of Q if the matrix is structurally
* singular. */
for (k = 0 ; k < n ; k++)
{
Time [k] = Q [P [k]] ;
}
for (k = 0 ; k < n ; k++)
{
Q [k] = Time [k] ;
}
}
/* ---------------------------------------------------------------------- */
/* how to traverse the permuted matrix */
/* ---------------------------------------------------------------------- */
/* If Q is not present, the following code can be used to traverse the
* permuted matrix P*A*P'
*
* // compute the inverse of P
* for (knew = 0 ; knew < n ; knew++)
* {
* // row and column kold in the old matrix is row/column knew
* // in the permuted matrix P*A*P'
* kold = P [knew] ;
* Pinv [kold] = knew ;
* }
* for (b = 0 ; b < nblocks ; b++)
* {
* // traverse block b of the permuted matrix P*A*P'
* k1 = R [b] ;
* k2 = R [b+1] ;
* nk = k2 - k1 ;
* for (jnew = k1 ; jnew < k2 ; jnew++)
* {
* jold = P [jnew] ;
* for (p = Ap [jold] ; p < Ap [jold+1] ; p++)
* {
* iold = Ai [p] ;
* inew = Pinv [iold] ;
* // Entry in the old matrix is A (iold, jold), and its
* // position in the new matrix P*A*P' is (inew, jnew).
* // Let B be the bth diagonal block of the permuted
* // matrix. If inew >= k1, then this entry is in row/
* // column (inew-k1, jnew-k1) of the nk-by-nk matrix B.
* // Otherwise, the entry is in the upper block triangular
* // part, not in any diagonal block.
* }
* }
* }
*
* If Q is present replace the above statement
* jold = P [jnew] ;
* with
* jold = Q [jnew] ;
* or
* jold = BTF_UNFLIP (Q [jnew]) ;
*
* then entry A (iold,jold) in the old (unpermuted) matrix is at (inew,jnew)
* in the permuted matrix P*A*Q. Everything else remains the same as the
* above (simply replace P*A*P' with P*A*Q in the above comments).
*/
/* ---------------------------------------------------------------------- */
/* return # of blocks / # of strongly connected components */
/* ---------------------------------------------------------------------- */
return (nblocks) ;
}

3611
src/maths/KLU/colamd.c Normal file

File diff suppressed because it is too large Load Diff

255
src/maths/KLU/colamd.h Normal file
View File

@ -0,0 +1,255 @@
/* ========================================================================== */
/* === colamd/symamd prototypes and definitions ============================= */
/* ========================================================================== */
/* COLAMD / SYMAMD include file
You must include this file (colamd.h) in any routine that uses colamd,
symamd, or the related macros and definitions.
Authors:
The authors of the code itself are Stefan I. Larimore and Timothy A.
Davis (davis at cise.ufl.edu), University of Florida. The algorithm was
developed in collaboration with John Gilbert, Xerox PARC, and Esmond
Ng, Oak Ridge National Laboratory.
Acknowledgements:
This work was supported by the National Science Foundation, under
grants DMS-9504974 and DMS-9803599.
Notice:
Copyright (c) 1998-2007, Timothy A. Davis, All Rights Reserved.
THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
Permission is hereby granted to use, copy, modify, and/or distribute
this program, provided that the Copyright, this License, and the
Availability of the original version is retained on all copies and made
accessible to the end-user of any code or package that includes COLAMD
or any modified version of COLAMD.
Availability:
The colamd/symamd library is available at
http://www.cise.ufl.edu/research/sparse/colamd/
This is the http://www.cise.ufl.edu/research/sparse/colamd/colamd.h
file. It is required by the colamd.c, colamdmex.c, and symamdmex.c
files, and by any C code that calls the routines whose prototypes are
listed below, or that uses the colamd/symamd definitions listed below.
*/
#ifndef COLAMD_H
#define COLAMD_H
/* make it easy for C++ programs to include COLAMD */
#ifdef __cplusplus
extern "C" {
#endif
/* ========================================================================== */
/* === Include files ======================================================== */
/* ========================================================================== */
#include <stdlib.h>
/* ========================================================================== */
/* === COLAMD version ======================================================= */
/* ========================================================================== */
/* COLAMD Version 2.4 and later will include the following definitions.
* As an example, to test if the version you are using is 2.4 or later:
*
* #ifdef COLAMD_VERSION
* if (COLAMD_VERSION >= COLAMD_VERSION_CODE (2,4)) ...
* #endif
*
* This also works during compile-time:
*
* #if defined(COLAMD_VERSION) && (COLAMD_VERSION >= COLAMD_VERSION_CODE (2,4))
* printf ("This is version 2.4 or later\n") ;
* #else
* printf ("This is an early version\n") ;
* #endif
*
* Versions 2.3 and earlier of COLAMD do not include a #define'd version number.
*/
#define COLAMD_DATE "Dec 7, 2011"
#define COLAMD_VERSION_CODE(main,sub) ((main) * 1000 + (sub))
#define COLAMD_MAIN_VERSION 2
#define COLAMD_SUB_VERSION 7
#define COLAMD_SUBSUB_VERSION 4
#define COLAMD_VERSION \
COLAMD_VERSION_CODE(COLAMD_MAIN_VERSION,COLAMD_SUB_VERSION)
/* ========================================================================== */
/* === Knob and statistics definitions ====================================== */
/* ========================================================================== */
/* size of the knobs [ ] array. Only knobs [0..1] are currently used. */
#define COLAMD_KNOBS 20
/* number of output statistics. Only stats [0..6] are currently used. */
#define COLAMD_STATS 20
/* knobs [0] and stats [0]: dense row knob and output statistic. */
#define COLAMD_DENSE_ROW 0
/* knobs [1] and stats [1]: dense column knob and output statistic. */
#define COLAMD_DENSE_COL 1
/* knobs [2]: aggressive absorption */
#define COLAMD_AGGRESSIVE 2
/* stats [2]: memory defragmentation count output statistic */
#define COLAMD_DEFRAG_COUNT 2
/* stats [3]: colamd status: zero OK, > 0 warning or notice, < 0 error */
#define COLAMD_STATUS 3
/* stats [4..6]: error info, or info on jumbled columns */
#define COLAMD_INFO1 4
#define COLAMD_INFO2 5
#define COLAMD_INFO3 6
/* error codes returned in stats [3]: */
#define COLAMD_OK (0)
#define COLAMD_OK_BUT_JUMBLED (1)
#define COLAMD_ERROR_A_not_present (-1)
#define COLAMD_ERROR_p_not_present (-2)
#define COLAMD_ERROR_nrow_negative (-3)
#define COLAMD_ERROR_ncol_negative (-4)
#define COLAMD_ERROR_nnz_negative (-5)
#define COLAMD_ERROR_p0_nonzero (-6)
#define COLAMD_ERROR_A_too_small (-7)
#define COLAMD_ERROR_col_length_negative (-8)
#define COLAMD_ERROR_row_index_out_of_bounds (-9)
#define COLAMD_ERROR_out_of_memory (-10)
#define COLAMD_ERROR_internal_error (-999)
/* ========================================================================== */
/* === Prototypes of user-callable routines ================================= */
/* ========================================================================== */
/* define UF_long */
#include "UFconfig.h"
size_t colamd_recommended /* returns recommended value of Alen, */
/* or 0 if input arguments are erroneous */
(
int nnz, /* nonzeros in A */
int n_row, /* number of rows in A */
int n_col /* number of columns in A */
) ;
size_t colamd_l_recommended /* returns recommended value of Alen, */
/* or 0 if input arguments are erroneous */
(
UF_long nnz, /* nonzeros in A */
UF_long n_row, /* number of rows in A */
UF_long n_col /* number of columns in A */
) ;
void colamd_set_defaults /* sets default parameters */
( /* knobs argument is modified on output */
double knobs [COLAMD_KNOBS] /* parameter settings for colamd */
) ;
void colamd_l_set_defaults /* sets default parameters */
( /* knobs argument is modified on output */
double knobs [COLAMD_KNOBS] /* parameter settings for colamd */
) ;
int colamd /* returns (1) if successful, (0) otherwise*/
( /* A and p arguments are modified on output */
int n_row, /* number of rows in A */
int n_col, /* number of columns in A */
int Alen, /* size of the array A */
int A [], /* row indices of A, of size Alen */
int p [], /* column pointers of A, of size n_col+1 */
double knobs [COLAMD_KNOBS],/* parameter settings for colamd */
int stats [COLAMD_STATS] /* colamd output statistics and error codes */
) ;
UF_long colamd_l /* returns (1) if successful, (0) otherwise*/
( /* A and p arguments are modified on output */
UF_long n_row, /* number of rows in A */
UF_long n_col, /* number of columns in A */
UF_long Alen, /* size of the array A */
UF_long A [], /* row indices of A, of size Alen */
UF_long p [], /* column pointers of A, of size n_col+1 */
double knobs [COLAMD_KNOBS],/* parameter settings for colamd */
UF_long stats [COLAMD_STATS]/* colamd output statistics and error codes */
) ;
int symamd /* return (1) if OK, (0) otherwise */
(
int n, /* number of rows and columns of A */
int A [], /* row indices of A */
int p [], /* column pointers of A */
int perm [], /* output permutation, size n_col+1 */
double knobs [COLAMD_KNOBS], /* parameters (uses defaults if NULL) */
int stats [COLAMD_STATS], /* output statistics and error codes */
void * (*allocate) (size_t, size_t),
/* pointer to calloc (ANSI C) or */
/* mxCalloc (for MATLAB mexFunction) */
void (*release) (void *)
/* pointer to free (ANSI C) or */
/* mxFree (for MATLAB mexFunction) */
) ;
UF_long symamd_l /* return (1) if OK, (0) otherwise */
(
UF_long n, /* number of rows and columns of A */
UF_long A [], /* row indices of A */
UF_long p [], /* column pointers of A */
UF_long perm [], /* output permutation, size n_col+1 */
double knobs [COLAMD_KNOBS], /* parameters (uses defaults if NULL) */
UF_long stats [COLAMD_STATS], /* output statistics and error codes */
void * (*allocate) (size_t, size_t),
/* pointer to calloc (ANSI C) or */
/* mxCalloc (for MATLAB mexFunction) */
void (*release) (void *)
/* pointer to free (ANSI C) or */
/* mxFree (for MATLAB mexFunction) */
) ;
void colamd_report
(
int stats [COLAMD_STATS]
) ;
void colamd_l_report
(
UF_long stats [COLAMD_STATS]
) ;
void symamd_report
(
int stats [COLAMD_STATS]
) ;
void symamd_l_report
(
UF_long stats [COLAMD_STATS]
) ;
#ifndef EXTERN
#define EXTERN extern
#endif
EXTERN int (*colamd_printf) (const char *, ...) ;
#ifdef __cplusplus
}
#endif
#endif /* COLAMD_H */

View File

@ -0,0 +1,24 @@
/* ========================================================================== */
/* === colamd_global.c ====================================================== */
/* ========================================================================== */
/* ----------------------------------------------------------------------------
* COLAMD, Copyright (C) 2007, Timothy A. Davis.
* See License.txt for the Version 2.1 of the GNU Lesser General Public License
* http://www.cise.ufl.edu/research/sparse
* -------------------------------------------------------------------------- */
/* Global variables for COLAMD */
#ifndef NPRINT
#ifdef MATLAB_MEX_FILE
#include "mex.h"
int (*colamd_printf) (const char *, ...) = mexPrintf ;
#else
#include <stdio.h>
int (*colamd_printf) (const char *, ...) = printf ;
#endif
#else
int (*colamd_printf) (const char *, ...) = ((void *) 0) ;
#endif

773
src/maths/KLU/klu.c Normal file
View File

@ -0,0 +1,773 @@
/* ========================================================================== */
/* === klu ================================================================== */
/* ========================================================================== */
/* KLU: factorizes P*A into L*U, using the Gilbert-Peierls algorithm [1], with
* optional symmetric pruning by Eisenstat and Liu [2]. The code is by Tim
* Davis. This algorithm is what appears as the default sparse LU routine in
* MATLAB version 6.0, and still appears in MATLAB 6.5 as [L,U,P] = lu (A).
* Note that no column ordering is provided (see COLAMD or AMD for suitable
* orderings). SuperLU is based on this algorithm, except that it adds the
* use of dense matrix operations on "supernodes" (adjacent columns with
* identical). This code doesn't use supernodes, thus its name ("Kent" LU,
* as in "Clark Kent", in contrast with Super-LU...). This algorithm is slower
* than SuperLU and UMFPACK for large matrices with lots of nonzeros in their
* factors (such as for most finite-element problems). However, for matrices
* with very sparse LU factors, this algorithm is typically faster than both
* SuperLU and UMFPACK, since in this case there is little chance to exploit
* dense matrix kernels (the BLAS).
*
* Only one block of A is factorized, in the BTF form. The input n is the
* size of the block; k1 is the first row and column in the block.
*
* NOTE: no error checking is done on the inputs. This version is not meant to
* be called directly by the user. Use klu_factor instead.
*
* No fill-reducing ordering is provided. The ordering quality of
* klu_kernel_factor is the responsibility of the caller. The input A must
* pre-permuted to reduce fill-in, or fill-reducing input permutation Q must
* be provided.
*
* The input matrix A must be in compressed-column form, with either sorted
* or unsorted row indices. Row indices for column j of A is in
* Ai [Ap [j] ... Ap [j+1]-1] and the same range of indices in Ax holds the
* numerical values. No duplicate entries are allowed.
*
* Copyright 2004-2009, Tim Davis. All rights reserved. See the README
* file for details on permitted use. Note that no code from The MathWorks,
* Inc, or from SuperLU, or from any other source appears here. The code is
* written from scratch, from the algorithmic description in Gilbert & Peierls'
* and Eisenstat & Liu's journal papers [1,2].
*
* If an input permutation Q is provided, the factorization L*U = A (P,Q)
* is computed, where P is determined by partial pivoting, and Q is the input
* ordering. If the pivot tolerance is less than 1, the "diagonal" entry that
* KLU attempts to choose is the diagonal of A (Q,Q). In other words, the
* input permutation is applied symmetrically to the input matrix. The output
* permutation P includes both the partial pivoting ordering and the input
* permutation. If Q is NULL, then it is assumed to be the identity
* permutation. Q is not modified.
*
* [1] Gilbert, J. R. and Peierls, T., "Sparse Partial Pivoting in Time
* Proportional to Arithmetic Operations," SIAM J. Sci. Stat. Comp.,
* vol 9, pp. 862-874, 1988.
* [2] Eisenstat, S. C. and Liu, J. W. H., "Exploiting Structural Symmetry in
* Unsymmetric Sparse Symbolic Factorization," SIAM J. Matrix Analysis &
* Applic., vol 13, pp. 202-211, 1992.
*/
/* ========================================================================== */
#include "klu_internal.h"
size_t KLU_kernel_factor /* 0 if failure, size of LU if OK */
(
/* inputs, not modified */
Int n, /* A is n-by-n. n must be > 0. */
Int Ap [ ], /* size n+1, column pointers for A */
Int Ai [ ], /* size nz = Ap [n], row indices for A */
Entry Ax [ ], /* size nz, values of A */
Int Q [ ], /* size n, optional column permutation */
double Lsize, /* estimate of number of nonzeros in L */
/* outputs, not defined on input */
Unit **p_LU, /* row indices and values of L and U */
Entry Udiag [ ], /* size n, diagonal of U */
Int Llen [ ], /* size n, column length of L */
Int Ulen [ ], /* size n, column length of U */
Int Lip [ ], /* size n, column pointers for L */
Int Uip [ ], /* size n, column pointers for U */
Int P [ ], /* row permutation, size n */
Int *lnz, /* size of L */
Int *unz, /* size of U */
/* workspace, undefined on input */
Entry *X, /* size n double's, zero on output */
Int *Work, /* size 5n Int's */
/* inputs, not modified on output */
Int k1, /* the block of A is from k1 to k2-1 */
Int PSinv [ ], /* inverse of P from symbolic factorization */
double Rs [ ], /* scale factors for A */
/* inputs, modified on output */
Int Offp [ ], /* off-diagonal matrix (modified by this routine) */
Int Offi [ ],
Entry Offx [ ],
/* --------------- */
KLU_common *Common
)
{
double maxlnz, dunits ;
Unit *LU ;
Int *Pinv, *Lpend, *Stack, *Flag, *Ap_pos, *W ;
Int lsize, usize, anz, ok ;
size_t lusize ;
ASSERT (Common != NULL) ;
/* ---------------------------------------------------------------------- */
/* get control parameters, or use defaults */
/* ---------------------------------------------------------------------- */
n = MAX (1, n) ;
anz = Ap [n+k1] - Ap [k1] ;
if (Lsize <= 0)
{
Lsize = -Lsize ;
Lsize = MAX (Lsize, 1.0) ;
lsize = Lsize * anz + n ;
}
else
{
lsize = Lsize ;
}
usize = lsize ;
lsize = MAX (n+1, lsize) ;
usize = MAX (n+1, usize) ;
maxlnz = (((double) n) * ((double) n) + ((double) n)) / 2. ;
maxlnz = MIN (maxlnz, ((double) INT_MAX)) ;
lsize = MIN (maxlnz, lsize) ;
usize = MIN (maxlnz, usize) ;
PRINTF (("Welcome to klu: n %d anz %d k1 %d lsize %d usize %d maxlnz %g\n",
n, anz, k1, lsize, usize, maxlnz)) ;
/* ---------------------------------------------------------------------- */
/* allocate workspace and outputs */
/* ---------------------------------------------------------------------- */
/* return arguments are not yet assigned */
*p_LU = (Unit *) NULL ;
/* these computations are safe from size_t overflow */
W = Work ;
Pinv = (Int *) W ; W += n ;
Stack = (Int *) W ; W += n ;
Flag = (Int *) W ; W += n ;
Lpend = (Int *) W ; W += n ;
Ap_pos = (Int *) W ; W += n ;
dunits = DUNITS (Int, lsize) + DUNITS (Entry, lsize) +
DUNITS (Int, usize) + DUNITS (Entry, usize) ;
lusize = (size_t) dunits ;
ok = !INT_OVERFLOW (dunits) ;
LU = ok ? KLU_malloc (lusize, sizeof (Unit), Common) : NULL ;
if (LU == NULL)
{
/* out of memory, or problem too large */
Common->status = KLU_OUT_OF_MEMORY ;
lusize = 0 ;
return (lusize) ;
}
/* ---------------------------------------------------------------------- */
/* factorize */
/* ---------------------------------------------------------------------- */
/* with pruning, and non-recursive depth-first-search */
lusize = KLU_kernel (n, Ap, Ai, Ax, Q, lusize,
Pinv, P, &LU, Udiag, Llen, Ulen, Lip, Uip, lnz, unz,
X, Stack, Flag, Ap_pos, Lpend,
k1, PSinv, Rs, Offp, Offi, Offx, Common) ;
/* ---------------------------------------------------------------------- */
/* return LU factors, or return nothing if an error occurred */
/* ---------------------------------------------------------------------- */
if (Common->status < KLU_OK)
{
LU = KLU_free (LU, lusize, sizeof (Unit), Common) ;
lusize = 0 ;
}
*p_LU = LU ;
PRINTF ((" in klu noffdiag %d\n", Common->noffdiag)) ;
return (lusize) ;
}
/* ========================================================================== */
/* === KLU_lsolve =========================================================== */
/* ========================================================================== */
/* Solve Lx=b. Assumes L is unit lower triangular and where the unit diagonal
* entry is NOT stored. Overwrites B with the solution X. B is n-by-nrhs
* and is stored in ROW form with row dimension nrhs. nrhs must be in the
* range 1 to 4. */
void KLU_lsolve
(
/* inputs, not modified: */
Int n,
Int Lip [ ],
Int Llen [ ],
Unit LU [ ],
Int nrhs,
/* right-hand-side on input, solution to Lx=b on output */
Entry X [ ]
)
{
Entry x [4], lik ;
Int *Li ;
Entry *Lx ;
Int k, p, len, i ;
switch (nrhs)
{
case 1:
for (k = 0 ; k < n ; k++)
{
x [0] = X [k] ;
GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
/* unit diagonal of L is not stored*/
for (p = 0 ; p < len ; p++)
{
/* X [Li [p]] -= Lx [p] * x [0] ; */
MULT_SUB (X [Li [p]], Lx [p], x [0]) ;
}
}
break ;
case 2:
for (k = 0 ; k < n ; k++)
{
x [0] = X [2*k ] ;
x [1] = X [2*k + 1] ;
GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
for (p = 0 ; p < len ; p++)
{
i = Li [p] ;
lik = Lx [p] ;
MULT_SUB (X [2*i], lik, x [0]) ;
MULT_SUB (X [2*i + 1], lik, x [1]) ;
}
}
break ;
case 3:
for (k = 0 ; k < n ; k++)
{
x [0] = X [3*k ] ;
x [1] = X [3*k + 1] ;
x [2] = X [3*k + 2] ;
GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
for (p = 0 ; p < len ; p++)
{
i = Li [p] ;
lik = Lx [p] ;
MULT_SUB (X [3*i], lik, x [0]) ;
MULT_SUB (X [3*i + 1], lik, x [1]) ;
MULT_SUB (X [3*i + 2], lik, x [2]) ;
}
}
break ;
case 4:
for (k = 0 ; k < n ; k++)
{
x [0] = X [4*k ] ;
x [1] = X [4*k + 1] ;
x [2] = X [4*k + 2] ;
x [3] = X [4*k + 3] ;
GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
for (p = 0 ; p < len ; p++)
{
i = Li [p] ;
lik = Lx [p] ;
MULT_SUB (X [4*i], lik, x [0]) ;
MULT_SUB (X [4*i + 1], lik, x [1]) ;
MULT_SUB (X [4*i + 2], lik, x [2]) ;
MULT_SUB (X [4*i + 3], lik, x [3]) ;
}
}
break ;
}
}
/* ========================================================================== */
/* === KLU_usolve =========================================================== */
/* ========================================================================== */
/* Solve Ux=b. Assumes U is non-unit upper triangular and where the diagonal
* entry is NOT stored. Overwrites B with the solution X. B is n-by-nrhs
* and is stored in ROW form with row dimension nrhs. nrhs must be in the
* range 1 to 4. */
void KLU_usolve
(
/* inputs, not modified: */
Int n,
Int Uip [ ],
Int Ulen [ ],
Unit LU [ ],
Entry Udiag [ ],
Int nrhs,
/* right-hand-side on input, solution to Ux=b on output */
Entry X [ ]
)
{
Entry x [4], uik, ukk ;
Int *Ui ;
Entry *Ux ;
Int k, p, len, i ;
switch (nrhs)
{
case 1:
for (k = n-1 ; k >= 0 ; k--)
{
GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
/* x [0] = X [k] / Udiag [k] ; */
DIV (x [0], X [k], Udiag [k]) ;
X [k] = x [0] ;
for (p = 0 ; p < len ; p++)
{
/* X [Ui [p]] -= Ux [p] * x [0] ; */
MULT_SUB (X [Ui [p]], Ux [p], x [0]) ;
}
}
break ;
case 2:
for (k = n-1 ; k >= 0 ; k--)
{
GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
ukk = Udiag [k] ;
/* x [0] = X [2*k ] / ukk ;
x [1] = X [2*k + 1] / ukk ; */
DIV (x [0], X [2*k], ukk) ;
DIV (x [1], X [2*k + 1], ukk) ;
X [2*k ] = x [0] ;
X [2*k + 1] = x [1] ;
for (p = 0 ; p < len ; p++)
{
i = Ui [p] ;
uik = Ux [p] ;
/* X [2*i ] -= uik * x [0] ;
X [2*i + 1] -= uik * x [1] ; */
MULT_SUB (X [2*i], uik, x [0]) ;
MULT_SUB (X [2*i + 1], uik, x [1]) ;
}
}
break ;
case 3:
for (k = n-1 ; k >= 0 ; k--)
{
GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
ukk = Udiag [k] ;
DIV (x [0], X [3*k], ukk) ;
DIV (x [1], X [3*k + 1], ukk) ;
DIV (x [2], X [3*k + 2], ukk) ;
X [3*k ] = x [0] ;
X [3*k + 1] = x [1] ;
X [3*k + 2] = x [2] ;
for (p = 0 ; p < len ; p++)
{
i = Ui [p] ;
uik = Ux [p] ;
MULT_SUB (X [3*i], uik, x [0]) ;
MULT_SUB (X [3*i + 1], uik, x [1]) ;
MULT_SUB (X [3*i + 2], uik, x [2]) ;
}
}
break ;
case 4:
for (k = n-1 ; k >= 0 ; k--)
{
GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
ukk = Udiag [k] ;
DIV (x [0], X [4*k], ukk) ;
DIV (x [1], X [4*k + 1], ukk) ;
DIV (x [2], X [4*k + 2], ukk) ;
DIV (x [3], X [4*k + 3], ukk) ;
X [4*k ] = x [0] ;
X [4*k + 1] = x [1] ;
X [4*k + 2] = x [2] ;
X [4*k + 3] = x [3] ;
for (p = 0 ; p < len ; p++)
{
i = Ui [p] ;
uik = Ux [p] ;
MULT_SUB (X [4*i], uik, x [0]) ;
MULT_SUB (X [4*i + 1], uik, x [1]) ;
MULT_SUB (X [4*i + 2], uik, x [2]) ;
MULT_SUB (X [4*i + 3], uik, x [3]) ;
}
}
break ;
}
}
/* ========================================================================== */
/* === KLU_ltsolve ========================================================== */
/* ========================================================================== */
/* Solve L'x=b. Assumes L is unit lower triangular and where the unit diagonal
* entry is NOT stored. Overwrites B with the solution X. B is n-by-nrhs
* and is stored in ROW form with row dimension nrhs. nrhs must in the
* range 1 to 4. */
void KLU_ltsolve
(
/* inputs, not modified: */
Int n,
Int Lip [ ],
Int Llen [ ],
Unit LU [ ],
Int nrhs,
#ifdef COMPLEX
Int conj_solve,
#endif
/* right-hand-side on input, solution to L'x=b on output */
Entry X [ ]
)
{
Entry x [4], lik ;
Int *Li ;
Entry *Lx ;
Int k, p, len, i ;
switch (nrhs)
{
case 1:
for (k = n-1 ; k >= 0 ; k--)
{
GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
x [0] = X [k] ;
for (p = 0 ; p < len ; p++)
{
#ifdef COMPLEX
if (conj_solve)
{
/* x [0] -= CONJ (Lx [p]) * X [Li [p]] ; */
MULT_SUB_CONJ (x [0], X [Li [p]], Lx [p]) ;
}
else
#endif
{
/*x [0] -= Lx [p] * X [Li [p]] ;*/
MULT_SUB (x [0], Lx [p], X [Li [p]]) ;
}
}
X [k] = x [0] ;
}
break ;
case 2:
for (k = n-1 ; k >= 0 ; k--)
{
x [0] = X [2*k ] ;
x [1] = X [2*k + 1] ;
GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
for (p = 0 ; p < len ; p++)
{
i = Li [p] ;
#ifdef COMPLEX
if (conj_solve)
{
CONJ (lik, Lx [p]) ;
}
else
#endif
{
lik = Lx [p] ;
}
MULT_SUB (x [0], lik, X [2*i]) ;
MULT_SUB (x [1], lik, X [2*i + 1]) ;
}
X [2*k ] = x [0] ;
X [2*k + 1] = x [1] ;
}
break ;
case 3:
for (k = n-1 ; k >= 0 ; k--)
{
x [0] = X [3*k ] ;
x [1] = X [3*k + 1] ;
x [2] = X [3*k + 2] ;
GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
for (p = 0 ; p < len ; p++)
{
i = Li [p] ;
#ifdef COMPLEX
if (conj_solve)
{
CONJ (lik, Lx [p]) ;
}
else
#endif
{
lik = Lx [p] ;
}
MULT_SUB (x [0], lik, X [3*i]) ;
MULT_SUB (x [1], lik, X [3*i + 1]) ;
MULT_SUB (x [2], lik, X [3*i + 2]) ;
}
X [3*k ] = x [0] ;
X [3*k + 1] = x [1] ;
X [3*k + 2] = x [2] ;
}
break ;
case 4:
for (k = n-1 ; k >= 0 ; k--)
{
x [0] = X [4*k ] ;
x [1] = X [4*k + 1] ;
x [2] = X [4*k + 2] ;
x [3] = X [4*k + 3] ;
GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
for (p = 0 ; p < len ; p++)
{
i = Li [p] ;
#ifdef COMPLEX
if (conj_solve)
{
CONJ (lik, Lx [p]) ;
}
else
#endif
{
lik = Lx [p] ;
}
MULT_SUB (x [0], lik, X [4*i]) ;
MULT_SUB (x [1], lik, X [4*i + 1]) ;
MULT_SUB (x [2], lik, X [4*i + 2]) ;
MULT_SUB (x [3], lik, X [4*i + 3]) ;
}
X [4*k ] = x [0] ;
X [4*k + 1] = x [1] ;
X [4*k + 2] = x [2] ;
X [4*k + 3] = x [3] ;
}
break ;
}
}
/* ========================================================================== */
/* === KLU_utsolve ========================================================== */
/* ========================================================================== */
/* Solve U'x=b. Assumes U is non-unit upper triangular and where the diagonal
* entry is stored (and appears last in each column of U). Overwrites B
* with the solution X. B is n-by-nrhs and is stored in ROW form with row
* dimension nrhs. nrhs must be in the range 1 to 4. */
void KLU_utsolve
(
/* inputs, not modified: */
Int n,
Int Uip [ ],
Int Ulen [ ],
Unit LU [ ],
Entry Udiag [ ],
Int nrhs,
#ifdef COMPLEX
Int conj_solve,
#endif
/* right-hand-side on input, solution to Ux=b on output */
Entry X [ ]
)
{
Entry x [4], uik, ukk ;
Int k, p, len, i ;
Int *Ui ;
Entry *Ux ;
switch (nrhs)
{
case 1:
for (k = 0 ; k < n ; k++)
{
GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
x [0] = X [k] ;
for (p = 0 ; p < len ; p++)
{
#ifdef COMPLEX
if (conj_solve)
{
/* x [0] -= CONJ (Ux [p]) * X [Ui [p]] ; */
MULT_SUB_CONJ (x [0], X [Ui [p]], Ux [p]) ;
}
else
#endif
{
/* x [0] -= Ux [p] * X [Ui [p]] ; */
MULT_SUB (x [0], Ux [p], X [Ui [p]]) ;
}
}
#ifdef COMPLEX
if (conj_solve)
{
CONJ (ukk, Udiag [k]) ;
}
else
#endif
{
ukk = Udiag [k] ;
}
DIV (X [k], x [0], ukk) ;
}
break ;
case 2:
for (k = 0 ; k < n ; k++)
{
GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
x [0] = X [2*k ] ;
x [1] = X [2*k + 1] ;
for (p = 0 ; p < len ; p++)
{
i = Ui [p] ;
#ifdef COMPLEX
if (conj_solve)
{
CONJ (uik, Ux [p]) ;
}
else
#endif
{
uik = Ux [p] ;
}
MULT_SUB (x [0], uik, X [2*i]) ;
MULT_SUB (x [1], uik, X [2*i + 1]) ;
}
#ifdef COMPLEX
if (conj_solve)
{
CONJ (ukk, Udiag [k]) ;
}
else
#endif
{
ukk = Udiag [k] ;
}
DIV (X [2*k], x [0], ukk) ;
DIV (X [2*k + 1], x [1], ukk) ;
}
break ;
case 3:
for (k = 0 ; k < n ; k++)
{
GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
x [0] = X [3*k ] ;
x [1] = X [3*k + 1] ;
x [2] = X [3*k + 2] ;
for (p = 0 ; p < len ; p++)
{
i = Ui [p] ;
#ifdef COMPLEX
if (conj_solve)
{
CONJ (uik, Ux [p]) ;
}
else
#endif
{
uik = Ux [p] ;
}
MULT_SUB (x [0], uik, X [3*i]) ;
MULT_SUB (x [1], uik, X [3*i + 1]) ;
MULT_SUB (x [2], uik, X [3*i + 2]) ;
}
#ifdef COMPLEX
if (conj_solve)
{
CONJ (ukk, Udiag [k]) ;
}
else
#endif
{
ukk = Udiag [k] ;
}
DIV (X [3*k], x [0], ukk) ;
DIV (X [3*k + 1], x [1], ukk) ;
DIV (X [3*k + 2], x [2], ukk) ;
}
break ;
case 4:
for (k = 0 ; k < n ; k++)
{
GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
x [0] = X [4*k ] ;
x [1] = X [4*k + 1] ;
x [2] = X [4*k + 2] ;
x [3] = X [4*k + 3] ;
for (p = 0 ; p < len ; p++)
{
i = Ui [p] ;
#ifdef COMPLEX
if (conj_solve)
{
CONJ (uik, Ux [p]) ;
}
else
#endif
{
uik = Ux [p] ;
}
MULT_SUB (x [0], uik, X [4*i]) ;
MULT_SUB (x [1], uik, X [4*i + 1]) ;
MULT_SUB (x [2], uik, X [4*i + 2]) ;
MULT_SUB (x [3], uik, X [4*i + 3]) ;
}
#ifdef COMPLEX
if (conj_solve)
{
CONJ (ukk, Udiag [k]) ;
}
else
#endif
{
ukk = Udiag [k] ;
}
DIV (X [4*k], x [0], ukk) ;
DIV (X [4*k + 1], x [1], ukk) ;
DIV (X [4*k + 2], x [2], ukk) ;
DIV (X [4*k + 3], x [3], ukk) ;
}
break ;
}
}

831
src/maths/KLU/klu.h Normal file
View File

@ -0,0 +1,831 @@
/* ========================================================================== */
/* === klu include file ===================================================== */
/* ========================================================================== */
/* Include file for user programs that call klu_* routines */
#ifndef _KLU_H
#define _KLU_H
/* make it easy for C++ programs to include KLU */
#ifdef __cplusplus
extern "C" {
#endif
#include "amd.h"
#include "colamd.h"
#include "btf.h"
/* -------------------------------------------------------------------------- */
/* Symbolic object - contains the pre-ordering computed by klu_analyze */
/* -------------------------------------------------------------------------- */
typedef struct
{
/* A (P,Q) is in upper block triangular form. The kth block goes from
* row/col index R [k] to R [k+1]-1. The estimated number of nonzeros
* in the L factor of the kth block is Lnz [k].
*/
/* only computed if the AMD ordering is chosen: */
double symmetry ; /* symmetry of largest block */
double est_flops ; /* est. factorization flop count */
double lnz, unz ; /* estimated nz in L and U, including diagonals */
double *Lnz ; /* size n, but only Lnz [0..nblocks-1] is used */
/* computed for all orderings: */
int
n, /* input matrix A is n-by-n */
nz, /* # entries in input matrix */
*P, /* size n */
*Q, /* size n */
*R, /* size n+1, but only R [0..nblocks] is used */
nzoff, /* nz in off-diagonal blocks */
nblocks, /* number of blocks */
maxblock, /* size of largest block */
ordering, /* ordering used (AMD, COLAMD, or GIVEN) */
do_btf ; /* whether or not BTF preordering was requested */
/* only computed if BTF preordering requested */
int structural_rank ; /* 0 to n-1 if the matrix is structurally rank
* deficient. -1 if not computed. n if the matrix has
* full structural rank */
} klu_symbolic ;
typedef struct /* 64-bit version (otherwise same as above) */
{
double symmetry, est_flops, lnz, unz ;
double *Lnz ;
UF_long n, nz, *P, *Q, *R, nzoff, nblocks, maxblock, ordering, do_btf,
structural_rank ;
} klu_l_symbolic ;
/* -------------------------------------------------------------------------- */
/* Numeric object - contains the factors computed by klu_factor */
/* -------------------------------------------------------------------------- */
typedef struct
{
/* LU factors of each block, the pivot row permutation, and the
* entries in the off-diagonal blocks */
int n ; /* A is n-by-n */
int nblocks ; /* number of diagonal blocks */
int lnz ; /* actual nz in L, including diagonal */
int unz ; /* actual nz in U, including diagonal */
int max_lnz_block ; /* max actual nz in L in any one block, incl. diag */
int max_unz_block ; /* max actual nz in U in any one block, incl. diag */
int *Pnum ; /* size n. final pivot permutation */
int *Pinv ; /* size n. inverse of final pivot permutation */
/* LU factors of each block */
int *Lip ; /* size n. pointers into LUbx[block] for L */
int *Uip ; /* size n. pointers into LUbx[block] for U */
int *Llen ; /* size n. Llen [k] = # of entries in kth column of L */
int *Ulen ; /* size n. Ulen [k] = # of entries in kth column of U */
void **LUbx ; /* L and U indices and entries (excl. diagonal of U) */
size_t *LUsize ; /* size of each LUbx [block], in sizeof (Unit) */
void *Udiag ; /* diagonal of U */
/* scale factors; can be NULL if no scaling */
double *Rs ; /* size n. Rs [i] is scale factor for row i */
/* permanent workspace for factorization and solve */
size_t worksize ; /* size (in bytes) of Work */
void *Work ; /* workspace */
void *Xwork ; /* alias into Numeric->Work */
int *Iwork ; /* alias into Numeric->Work */
/* off-diagonal entries in a conventional compressed-column sparse matrix */
int *Offp ; /* size n+1, column pointers */
int *Offi ; /* size nzoff, row indices */
void *Offx ; /* size nzoff, numerical values */
int nzoff ;
} klu_numeric ;
typedef struct /* 64-bit version (otherwise same as above) */
{
UF_long n, nblocks, lnz, unz, max_lnz_block, max_unz_block, *Pnum, *Pinv,
*Lip, *Uip, *Llen, *Ulen ;
void **LUbx ;
size_t *LUsize ;
void *Udiag ;
double *Rs ;
size_t worksize ;
void *Work, *Xwork ;
UF_long *Iwork ;
UF_long *Offp, *Offi ;
void *Offx ;
UF_long nzoff ;
} klu_l_numeric ;
/* -------------------------------------------------------------------------- */
/* KLU control parameters and statistics */
/* -------------------------------------------------------------------------- */
/* Common->status values */
#define KLU_OK 0
#define KLU_SINGULAR (1) /* status > 0 is a warning, not an error */
#define KLU_OUT_OF_MEMORY (-2)
#define KLU_INVALID (-3)
#define KLU_TOO_LARGE (-4) /* integer overflow has occured */
typedef struct klu_common_struct
{
/* ---------------------------------------------------------------------- */
/* parameters */
/* ---------------------------------------------------------------------- */
double tol ; /* pivot tolerance for diagonal preference */
double memgrow ; /* realloc memory growth size for LU factors */
double initmem_amd ; /* init. memory size with AMD: c*nnz(L) + n */
double initmem ; /* init. memory size: c*nnz(A) + n */
double maxwork ; /* maxwork for BTF, <= 0 if no limit */
int btf ; /* use BTF pre-ordering, or not */
int ordering ; /* 0: AMD, 1: COLAMD, 2: user P and Q,
* 3: user function */
int scale ; /* row scaling: -1: none (and no error check),
* 0: none, 1: sum, 2: max */
/* memory management routines */
void *(*malloc_memory) (size_t) ; /* pointer to malloc */
void *(*realloc_memory) (void *, size_t) ; /* pointer to realloc */
void (*free_memory) (void *) ; /* pointer to free */
void *(*calloc_memory) (size_t, size_t) ; /* pointer to calloc */
/* pointer to user ordering function */
int (*user_order) (int, int *, int *, int *, struct klu_common_struct *) ;
/* pointer to user data, passed unchanged as the last parameter to the
* user ordering function (optional, the user function need not use this
* information). */
void *user_data ;
int halt_if_singular ; /* how to handle a singular matrix:
* FALSE: keep going. Return a Numeric object with a zero U(k,k). A
* divide-by-zero may occur when computing L(:,k). The Numeric object
* can be passed to klu_solve (a divide-by-zero will occur). It can
* also be safely passed to klu_refactor.
* TRUE: stop quickly. klu_factor will free the partially-constructed
* Numeric object. klu_refactor will not free it, but will leave the
* numerical values only partially defined. This is the default. */
/* ---------------------------------------------------------------------- */
/* statistics */
/* ---------------------------------------------------------------------- */
int status ; /* KLU_OK if OK, < 0 if error */
int nrealloc ; /* # of reallocations of L and U */
int structural_rank ; /* 0 to n-1 if the matrix is structurally rank
* deficient (as determined by maxtrans). -1 if not computed. n if the
* matrix has full structural rank. This is computed by klu_analyze
* if a BTF preordering is requested. */
int numerical_rank ; /* First k for which a zero U(k,k) was found,
* if the matrix was singular (in the range 0 to n-1). n if the matrix
* has full rank. This is not a true rank-estimation. It just reports
* where the first zero pivot was found. -1 if not computed.
* Computed by klu_factor and klu_refactor. */
int singular_col ; /* n if the matrix is not singular. If in the
* range 0 to n-1, this is the column index of the original matrix A that
* corresponds to the column of U that contains a zero diagonal entry.
* -1 if not computed. Computed by klu_factor and klu_refactor. */
int noffdiag ; /* # of off-diagonal pivots, -1 if not computed */
double flops ; /* actual factorization flop count, from klu_flops */
double rcond ; /* crude reciprocal condition est., from klu_rcond */
double condest ; /* accurate condition est., from klu_condest */
double rgrowth ; /* reciprocal pivot rgrowth, from klu_rgrowth */
double work ; /* actual work done in BTF, in klu_analyze */
size_t memusage ; /* current memory usage, in bytes */
size_t mempeak ; /* peak memory usage, in bytes */
} klu_common ;
typedef struct klu_l_common_struct /* 64-bit version (otherwise same as above)*/
{
double tol, memgrow, initmem_amd, initmem, maxwork ;
UF_long btf, ordering, scale ;
void *(*malloc_memory) (size_t) ;
void *(*realloc_memory) (void *, size_t) ;
void (*free_memory) (void *) ;
void *(*calloc_memory) (size_t, size_t) ;
UF_long (*user_order) (UF_long, UF_long *, UF_long *, UF_long *,
struct klu_l_common_struct *) ;
void *user_data ;
UF_long halt_if_singular ;
UF_long status, nrealloc, structural_rank, numerical_rank, singular_col,
noffdiag ;
double flops, rcond, condest, rgrowth, work ;
size_t memusage, mempeak ;
} klu_l_common ;
/* -------------------------------------------------------------------------- */
/* klu_defaults: sets default control parameters */
/* -------------------------------------------------------------------------- */
int klu_defaults
(
klu_common *Common
) ;
UF_long klu_l_defaults (klu_l_common *Common) ;
/* -------------------------------------------------------------------------- */
/* klu_analyze: orders and analyzes a matrix */
/* -------------------------------------------------------------------------- */
/* Order the matrix with BTF (or not), then order each block with AMD, COLAMD,
* a natural ordering, or with a user-provided ordering function */
klu_symbolic *klu_analyze
(
/* inputs, not modified */
int n, /* A is n-by-n */
int Ap [ ], /* size n+1, column pointers */
int Ai [ ], /* size nz, row indices */
klu_common *Common
) ;
klu_l_symbolic *klu_l_analyze (UF_long, UF_long *, UF_long *,
klu_l_common *Common) ;
/* -------------------------------------------------------------------------- */
/* klu_analyze_given: analyzes a matrix using given P and Q */
/* -------------------------------------------------------------------------- */
/* Order the matrix with BTF (or not), then use natural or given ordering
* P and Q on the blocks. P and Q are interpretted as identity
* if NULL. */
klu_symbolic *klu_analyze_given
(
/* inputs, not modified */
int n, /* A is n-by-n */
int Ap [ ], /* size n+1, column pointers */
int Ai [ ], /* size nz, row indices */
int P [ ], /* size n, user's row permutation (may be NULL) */
int Q [ ], /* size n, user's column permutation (may be NULL) */
klu_common *Common
) ;
klu_l_symbolic *klu_l_analyze_given (UF_long, UF_long *, UF_long *, UF_long *,
UF_long *, klu_l_common *) ;
/* -------------------------------------------------------------------------- */
/* klu_factor: factors a matrix using the klu_analyze results */
/* -------------------------------------------------------------------------- */
klu_numeric *klu_factor /* returns KLU_OK if OK, < 0 if error */
(
/* inputs, not modified */
int Ap [ ], /* size n+1, column pointers */
int Ai [ ], /* size nz, row indices */
double Ax [ ], /* size nz, numerical values */
klu_symbolic *Symbolic,
klu_common *Common
) ;
klu_numeric *klu_z_factor /* returns KLU_OK if OK, < 0 if error */
(
/* inputs, not modified */
int Ap [ ], /* size n+1, column pointers */
int Ai [ ], /* size nz, row indices */
double Ax [ ], /* size 2*nz, numerical values (real,imag pairs) */
klu_symbolic *Symbolic,
klu_common *Common
) ;
/* long / real version */
klu_l_numeric *klu_l_factor (UF_long *, UF_long *, double *, klu_l_symbolic *,
klu_l_common *) ;
/* long / complex version */
klu_l_numeric *klu_zl_factor (UF_long *, UF_long *, double *, klu_l_symbolic *,
klu_l_common *) ;
/* -------------------------------------------------------------------------- */
/* klu_solve: solves Ax=b using the Symbolic and Numeric objects */
/* -------------------------------------------------------------------------- */
int klu_solve
(
/* inputs, not modified */
klu_symbolic *Symbolic,
klu_numeric *Numeric,
int ldim, /* leading dimension of B */
int nrhs, /* number of right-hand-sides */
/* right-hand-side on input, overwritten with solution to Ax=b on output */
double B [ ], /* size ldim*nrhs */
klu_common *Common
) ;
int klu_z_solve
(
/* inputs, not modified */
klu_symbolic *Symbolic,
klu_numeric *Numeric,
int ldim, /* leading dimension of B */
int nrhs, /* number of right-hand-sides */
/* right-hand-side on input, overwritten with solution to Ax=b on output */
double B [ ], /* size 2*ldim*nrhs */
klu_common *Common
) ;
UF_long klu_l_solve (klu_l_symbolic *, klu_l_numeric *, UF_long, UF_long,
double *, klu_l_common *) ;
UF_long klu_zl_solve (klu_l_symbolic *, klu_l_numeric *, UF_long, UF_long,
double *, klu_l_common *) ;
/* -------------------------------------------------------------------------- */
/* klu_tsolve: solves A'x=b using the Symbolic and Numeric objects */
/* -------------------------------------------------------------------------- */
int klu_tsolve
(
/* inputs, not modified */
klu_symbolic *Symbolic,
klu_numeric *Numeric,
int ldim, /* leading dimension of B */
int nrhs, /* number of right-hand-sides */
/* right-hand-side on input, overwritten with solution to Ax=b on output */
double B [ ], /* size ldim*nrhs */
klu_common *Common
) ;
int klu_z_tsolve
(
/* inputs, not modified */
klu_symbolic *Symbolic,
klu_numeric *Numeric,
int ldim, /* leading dimension of B */
int nrhs, /* number of right-hand-sides */
/* right-hand-side on input, overwritten with solution to Ax=b on output */
double B [ ], /* size 2*ldim*nrhs */
int conj_solve, /* TRUE: conjugate solve, FALSE: solve A.'x=b */
klu_common *Common
) ;
UF_long klu_l_tsolve (klu_l_symbolic *, klu_l_numeric *, UF_long, UF_long,
double *, klu_l_common *) ;
UF_long klu_zl_tsolve (klu_l_symbolic *, klu_l_numeric *, UF_long, UF_long,
double *, UF_long, klu_l_common * ) ;
/* -------------------------------------------------------------------------- */
/* klu_refactor: refactorizes matrix with same ordering as klu_factor */
/* -------------------------------------------------------------------------- */
int klu_refactor /* return TRUE if successful, FALSE otherwise */
(
/* inputs, not modified */
int Ap [ ], /* size n+1, column pointers */
int Ai [ ], /* size nz, row indices */
double Ax [ ], /* size nz, numerical values */
klu_symbolic *Symbolic,
/* input, and numerical values modified on output */
klu_numeric *Numeric,
klu_common *Common
) ;
int klu_z_refactor /* return TRUE if successful, FALSE otherwise */
(
/* inputs, not modified */
int Ap [ ], /* size n+1, column pointers */
int Ai [ ], /* size nz, row indices */
double Ax [ ], /* size 2*nz, numerical values */
klu_symbolic *Symbolic,
/* input, and numerical values modified on output */
klu_numeric *Numeric,
klu_common *Common
) ;
UF_long klu_l_refactor (UF_long *, UF_long *, double *, klu_l_symbolic *,
klu_l_numeric *, klu_l_common *) ;
UF_long klu_zl_refactor (UF_long *, UF_long *, double *, klu_l_symbolic *,
klu_l_numeric *, klu_l_common *) ;
/* -------------------------------------------------------------------------- */
/* klu_free_symbolic: destroys the Symbolic object */
/* -------------------------------------------------------------------------- */
int klu_free_symbolic
(
klu_symbolic **Symbolic,
klu_common *Common
) ;
UF_long klu_l_free_symbolic (klu_l_symbolic **, klu_l_common *) ;
/* -------------------------------------------------------------------------- */
/* klu_free_numeric: destroys the Numeric object */
/* -------------------------------------------------------------------------- */
/* Note that klu_free_numeric and klu_z_free_numeric are identical; each can
* free both kinds of Numeric objects (real and complex) */
int klu_free_numeric
(
klu_numeric **Numeric,
klu_common *Common
) ;
int klu_z_free_numeric
(
klu_numeric **Numeric,
klu_common *Common
) ;
UF_long klu_l_free_numeric (klu_l_numeric **, klu_l_common *) ;
UF_long klu_zl_free_numeric (klu_l_numeric **, klu_l_common *) ;
/* -------------------------------------------------------------------------- */
/* klu_sort: sorts the columns of the LU factorization */
/* -------------------------------------------------------------------------- */
/* this is not needed except for the MATLAB interface */
int klu_sort
(
/* inputs, not modified */
klu_symbolic *Symbolic,
/* input/output */
klu_numeric *Numeric,
klu_common *Common
) ;
int klu_z_sort
(
/* inputs, not modified */
klu_symbolic *Symbolic,
/* input/output */
klu_numeric *Numeric,
klu_common *Common
) ;
UF_long klu_l_sort (klu_l_symbolic *, klu_l_numeric *, klu_l_common *) ;
UF_long klu_zl_sort (klu_l_symbolic *, klu_l_numeric *, klu_l_common *) ;
/* -------------------------------------------------------------------------- */
/* klu_flops: determines # of flops performed in numeric factorzation */
/* -------------------------------------------------------------------------- */
int klu_flops
(
/* inputs, not modified */
klu_symbolic *Symbolic,
klu_numeric *Numeric,
/* input/output */
klu_common *Common
) ;
int klu_z_flops
(
/* inputs, not modified */
klu_symbolic *Symbolic,
klu_numeric *Numeric,
/* input/output */
klu_common *Common
) ;
UF_long klu_l_flops (klu_l_symbolic *, klu_l_numeric *, klu_l_common *) ;
UF_long klu_zl_flops (klu_l_symbolic *, klu_l_numeric *, klu_l_common *) ;
/* -------------------------------------------------------------------------- */
/* klu_rgrowth : compute the reciprocal pivot growth */
/* -------------------------------------------------------------------------- */
/* Pivot growth is computed after the input matrix is permuted, scaled, and
* off-diagonal entries pruned. This is because the LU factorization of each
* block takes as input the scaled diagonal blocks of the BTF form. The
* reciprocal pivot growth in column j of an LU factorization of a matrix C
* is the largest entry in C divided by the largest entry in U; then the overall
* reciprocal pivot growth is the smallest such value for all columns j. Note
* that the off-diagonal entries are not scaled, since they do not take part in
* the LU factorization of the diagonal blocks.
*
* In MATLAB notation:
*
* rgrowth = min (max (abs ((R \ A(p,q)) - F)) ./ max (abs (U))) */
int klu_rgrowth
(
int Ap [ ],
int Ai [ ],
double Ax [ ],
klu_symbolic *Symbolic,
klu_numeric *Numeric,
klu_common *Common /* Common->rgrowth = reciprocal pivot growth */
) ;
int klu_z_rgrowth
(
int Ap [ ],
int Ai [ ],
double Ax [ ],
klu_symbolic *Symbolic,
klu_numeric *Numeric,
klu_common *Common /* Common->rgrowth = reciprocal pivot growth */
) ;
UF_long klu_l_rgrowth (UF_long *, UF_long *, double *, klu_l_symbolic *,
klu_l_numeric *, klu_l_common *) ;
UF_long klu_zl_rgrowth (UF_long *, UF_long *, double *, klu_l_symbolic *,
klu_l_numeric *, klu_l_common *) ;
/* -------------------------------------------------------------------------- */
/* klu_condest */
/* -------------------------------------------------------------------------- */
/* Computes a reasonably accurate estimate of the 1-norm condition number, using
* Hager's method, as modified by Higham and Tisseur (same method as used in
* MATLAB's condest */
int klu_condest
(
int Ap [ ], /* size n+1, column pointers, not modified */
double Ax [ ], /* size nz = Ap[n], numerical values, not modified*/
klu_symbolic *Symbolic, /* symbolic analysis, not modified */
klu_numeric *Numeric, /* numeric factorization, not modified */
klu_common *Common /* result returned in Common->condest */
) ;
int klu_z_condest
(
int Ap [ ],
double Ax [ ], /* size 2*nz */
klu_symbolic *Symbolic,
klu_numeric *Numeric,
klu_common *Common /* result returned in Common->condest */
) ;
UF_long klu_l_condest (UF_long *, double *, klu_l_symbolic *, klu_l_numeric *,
klu_l_common *) ;
UF_long klu_zl_condest (UF_long *, double *, klu_l_symbolic *, klu_l_numeric *,
klu_l_common *) ;
/* -------------------------------------------------------------------------- */
/* klu_rcond: compute min(abs(diag(U))) / max(abs(diag(U))) */
/* -------------------------------------------------------------------------- */
int klu_rcond
(
klu_symbolic *Symbolic, /* input, not modified */
klu_numeric *Numeric, /* input, not modified */
klu_common *Common /* result in Common->rcond */
) ;
int klu_z_rcond
(
klu_symbolic *Symbolic, /* input, not modified */
klu_numeric *Numeric, /* input, not modified */
klu_common *Common /* result in Common->rcond */
) ;
UF_long klu_l_rcond (klu_l_symbolic *, klu_l_numeric *, klu_l_common *) ;
UF_long klu_zl_rcond (klu_l_symbolic *, klu_l_numeric *, klu_l_common *) ;
/* -------------------------------------------------------------------------- */
/* klu_scale */
/* -------------------------------------------------------------------------- */
int klu_scale /* return TRUE if successful, FALSE otherwise */
(
/* inputs, not modified */
int scale, /* <0: none, no error check; 0: none, 1: sum, 2: max */
int n,
int Ap [ ], /* size n+1, column pointers */
int Ai [ ], /* size nz, row indices */
double Ax [ ],
/* outputs, not defined on input */
double Rs [ ],
/* workspace, not defined on input or output */
int W [ ], /* size n, can be NULL */
klu_common *Common
) ;
int klu_z_scale /* return TRUE if successful, FALSE otherwise */
(
/* inputs, not modified */
int scale, /* <0: none, no error check; 0: none, 1: sum, 2: max */
int n,
int Ap [ ], /* size n+1, column pointers */
int Ai [ ], /* size nz, row indices */
double Ax [ ],
/* outputs, not defined on input */
double Rs [ ],
/* workspace, not defined on input or output */
int W [ ], /* size n, can be NULL */
klu_common *Common
) ;
UF_long klu_l_scale (UF_long, UF_long, UF_long *, UF_long *, double *,
double *, UF_long *, klu_l_common *) ;
UF_long klu_zl_scale (UF_long, UF_long, UF_long *, UF_long *, double *,
double *, UF_long *, klu_l_common *) ;
/* -------------------------------------------------------------------------- */
/* klu_extract */
/* -------------------------------------------------------------------------- */
int klu_extract /* returns TRUE if successful, FALSE otherwise */
(
/* inputs: */
klu_numeric *Numeric,
klu_symbolic *Symbolic,
/* outputs, either allocated on input, or ignored otherwise */
/* L */
int *Lp, /* size n+1 */
int *Li, /* size Numeric->lnz */
double *Lx, /* size Numeric->lnz */
/* U */
int *Up, /* size n+1 */
int *Ui, /* size Numeric->unz */
double *Ux, /* size Numeric->unz */
/* F */
int *Fp, /* size n+1 */
int *Fi, /* size Numeric->nzoff */
double *Fx, /* size Numeric->nzoff */
/* P, row permutation */
int *P, /* size n */
/* Q, column permutation */
int *Q, /* size n */
/* Rs, scale factors */
double *Rs, /* size n */
/* R, block boundaries */
int *R, /* size Symbolic->nblocks+1 (nblocks is at most n) */
klu_common *Common
) ;
int klu_z_extract /* returns TRUE if successful, FALSE otherwise */
(
/* inputs: */
klu_numeric *Numeric,
klu_symbolic *Symbolic,
/* outputs, all of which must be allocated on input */
/* L */
int *Lp, /* size n+1 */
int *Li, /* size nnz(L) */
double *Lx, /* size nnz(L) */
double *Lz, /* size nnz(L) for the complex case, ignored if real */
/* U */
int *Up, /* size n+1 */
int *Ui, /* size nnz(U) */
double *Ux, /* size nnz(U) */
double *Uz, /* size nnz(U) for the complex case, ignored if real */
/* F */
int *Fp, /* size n+1 */
int *Fi, /* size nnz(F) */
double *Fx, /* size nnz(F) */
double *Fz, /* size nnz(F) for the complex case, ignored if real */
/* P, row permutation */
int *P, /* size n */
/* Q, column permutation */
int *Q, /* size n */
/* Rs, scale factors */
double *Rs, /* size n */
/* R, block boundaries */
int *R, /* size Symbolic->nblocks+1 (nblocks is at most n) */
klu_common *Common
) ;
UF_long klu_l_extract (klu_l_numeric *, klu_l_symbolic *,
UF_long *, UF_long *, double *,
UF_long *, UF_long *, double *,
UF_long *, UF_long *, double *,
UF_long *, UF_long *, double *, UF_long *, klu_l_common *) ;
UF_long klu_zl_extract (klu_l_numeric *, klu_l_symbolic *,
UF_long *, UF_long *, double *, double *,
UF_long *, UF_long *, double *, double *,
UF_long *, UF_long *, double *, double *,
UF_long *, UF_long *, double *, UF_long *, klu_l_common *) ;
/* -------------------------------------------------------------------------- */
/* KLU memory management routines */
/* -------------------------------------------------------------------------- */
void *klu_malloc /* returns pointer to the newly malloc'd block */
(
/* ---- input ---- */
size_t n, /* number of items */
size_t size, /* size of each item */
/* --------------- */
klu_common *Common
) ;
void *klu_free /* always returns NULL */
(
/* ---- in/out --- */
void *p, /* block of memory to free */
size_t n, /* number of items */
size_t size, /* size of each item */
/* --------------- */
klu_common *Common
) ;
void *klu_realloc /* returns pointer to reallocated block */
(
/* ---- input ---- */
size_t nnew, /* requested # of items in reallocated block */
size_t nold, /* current size of block, in # of items */
size_t size, /* size of each item */
/* ---- in/out --- */
void *p, /* block of memory to realloc */
/* --------------- */
klu_common *Common
) ;
void *klu_l_malloc (size_t, size_t, klu_l_common *) ;
void *klu_l_free (void *, size_t, size_t, klu_l_common *) ;
void *klu_l_realloc (size_t, size_t, size_t, void *, klu_l_common *) ;
/* ========================================================================== */
/* === KLU version ========================================================== */
/* ========================================================================== */
/* All versions of KLU include these definitions.
* As an example, to test if the version you are using is 1.2 or later:
*
* if (KLU_VERSION >= KLU_VERSION_CODE (1,2)) ...
*
* This also works during compile-time:
*
* #if (KLU >= KLU_VERSION_CODE (1,2))
* printf ("This is version 1.2 or later\n") ;
* #else
* printf ("This is an early version\n") ;
* #endif
*/
#define KLU_DATE "Dec 7, 2011"
#define KLU_VERSION_CODE(main,sub) ((main) * 1000 + (sub))
#define KLU_MAIN_VERSION 1
#define KLU_SUB_VERSION 1
#define KLU_SUBSUB_VERSION 3
#define KLU_VERSION KLU_VERSION_CODE(KLU_MAIN_VERSION,KLU_SUB_VERSION)
#ifdef __cplusplus
}
#endif
#endif

488
src/maths/KLU/klu_analyze.c Normal file
View File

@ -0,0 +1,488 @@
/* ========================================================================== */
/* === klu_analyze ========================================================== */
/* ========================================================================== */
/* Order the matrix using BTF (or not), and then AMD, COLAMD, the natural
* ordering, or the user-provided-function on the blocks. Does not support
* using a given ordering (use klu_analyze_given for that case). */
#include "klu_internal.h"
/* ========================================================================== */
/* === analyze_worker ======================================================= */
/* ========================================================================== */
static Int analyze_worker /* returns KLU_OK or < 0 if error */
(
/* inputs, not modified */
Int n, /* A is n-by-n */
Int Ap [ ], /* size n+1, column pointers */
Int Ai [ ], /* size nz, row indices */
Int nblocks, /* # of blocks */
Int Pbtf [ ], /* BTF row permutation */
Int Qbtf [ ], /* BTF col permutation */
Int R [ ], /* size n+1, but only Rbtf [0..nblocks] is used */
Int ordering, /* what ordering to use (0, 1, or 3 for this routine) */
/* output only, not defined on input */
Int P [ ], /* size n */
Int Q [ ], /* size n */
double Lnz [ ], /* size n, but only Lnz [0..nblocks-1] is used */
/* workspace, not defined on input or output */
Int Pblk [ ], /* size maxblock */
Int Cp [ ], /* size maxblock+1 */
Int Ci [ ], /* size MAX (nz+1, Cilen) */
Int Cilen, /* nz+1, or COLAMD_recommend(nz,n,n) for COLAMD */
Int Pinv [ ], /* size maxblock */
/* input/output */
KLU_symbolic *Symbolic,
KLU_common *Common
)
{
double amd_Info [AMD_INFO], lnz, lnz1, flops, flops1 ;
Int k1, k2, nk, k, block, oldcol, pend, newcol, result, pc, p, newrow,
maxnz, nzoff, cstats [COLAMD_STATS], ok, err = KLU_INVALID ;
/* ---------------------------------------------------------------------- */
/* initializations */
/* ---------------------------------------------------------------------- */
/* compute the inverse of Pbtf */
#ifndef NDEBUG
for (k = 0 ; k < n ; k++)
{
P [k] = EMPTY ;
Q [k] = EMPTY ;
Pinv [k] = EMPTY ;
}
#endif
for (k = 0 ; k < n ; k++)
{
ASSERT (Pbtf [k] >= 0 && Pbtf [k] < n) ;
Pinv [Pbtf [k]] = k ;
}
#ifndef NDEBUG
for (k = 0 ; k < n ; k++) ASSERT (Pinv [k] != EMPTY) ;
#endif
nzoff = 0 ;
lnz = 0 ;
maxnz = 0 ;
flops = 0 ;
Symbolic->symmetry = EMPTY ; /* only computed by AMD */
/* ---------------------------------------------------------------------- */
/* order each block */
/* ---------------------------------------------------------------------- */
for (block = 0 ; block < nblocks ; block++)
{
/* ------------------------------------------------------------------ */
/* the block is from rows/columns k1 to k2-1 */
/* ------------------------------------------------------------------ */
k1 = R [block] ;
k2 = R [block+1] ;
nk = k2 - k1 ;
PRINTF (("BLOCK %d, k1 %d k2-1 %d nk %d\n", block, k1, k2-1, nk)) ;
/* ------------------------------------------------------------------ */
/* construct the kth block, C */
/* ------------------------------------------------------------------ */
Lnz [block] = EMPTY ;
pc = 0 ;
for (k = k1 ; k < k2 ; k++)
{
newcol = k-k1 ;
Cp [newcol] = pc ;
oldcol = Qbtf [k] ;
pend = Ap [oldcol+1] ;
for (p = Ap [oldcol] ; p < pend ; p++)
{
newrow = Pinv [Ai [p]] ;
if (newrow < k1)
{
nzoff++ ;
}
else
{
/* (newrow,newcol) is an entry in the block */
ASSERT (newrow < k2) ;
newrow -= k1 ;
Ci [pc++] = newrow ;
}
}
}
Cp [nk] = pc ;
maxnz = MAX (maxnz, pc) ;
ASSERT (KLU_valid (nk, Cp, Ci, NULL)) ;
/* ------------------------------------------------------------------ */
/* order the block C */
/* ------------------------------------------------------------------ */
if (nk <= 3)
{
/* -------------------------------------------------------------- */
/* use natural ordering for tiny blocks (3-by-3 or less) */
/* -------------------------------------------------------------- */
for (k = 0 ; k < nk ; k++)
{
Pblk [k] = k ;
}
lnz1 = nk * (nk + 1) / 2 ;
flops1 = nk * (nk - 1) / 2 + (nk-1)*nk*(2*nk-1) / 6 ;
ok = TRUE ;
}
else if (ordering == 0)
{
/* -------------------------------------------------------------- */
/* order the block with AMD (C+C') */
/* -------------------------------------------------------------- */
result = AMD_order (nk, Cp, Ci, Pblk, NULL, amd_Info) ;
ok = (result >= AMD_OK) ;
if (result == AMD_OUT_OF_MEMORY)
{
err = KLU_OUT_OF_MEMORY ;
}
/* account for memory usage in AMD */
Common->mempeak = MAX (Common->mempeak,
Common->memusage + amd_Info [AMD_MEMORY]) ;
/* get the ordering statistics from AMD */
lnz1 = (Int) (amd_Info [AMD_LNZ]) + nk ;
flops1 = 2 * amd_Info [AMD_NMULTSUBS_LU] + amd_Info [AMD_NDIV] ;
if (pc == maxnz)
{
/* get the symmetry of the biggest block */
Symbolic->symmetry = amd_Info [AMD_SYMMETRY] ;
}
}
else if (ordering == 1)
{
/* -------------------------------------------------------------- */
/* order the block with COLAMD (C) */
/* -------------------------------------------------------------- */
/* order (and destroy) Ci, returning column permutation in Cp.
* COLAMD "cannot" fail since the matrix has already been checked,
* and Ci allocated. */
ok = COLAMD (nk, nk, Cilen, Ci, Cp, NULL, cstats) ;
lnz1 = EMPTY ;
flops1 = EMPTY ;
/* copy the permutation from Cp to Pblk */
for (k = 0 ; k < nk ; k++)
{
Pblk [k] = Cp [k] ;
}
}
else
{
/* -------------------------------------------------------------- */
/* pass the block to the user-provided ordering function */
/* -------------------------------------------------------------- */
lnz1 = (Common->user_order) (nk, Cp, Ci, Pblk, Common) ;
flops1 = EMPTY ;
ok = (lnz1 != 0) ;
}
if (!ok)
{
return (err) ; /* ordering method failed */
}
/* ------------------------------------------------------------------ */
/* keep track of nnz(L) and flops statistics */
/* ------------------------------------------------------------------ */
Lnz [block] = lnz1 ;
lnz = (lnz == EMPTY || lnz1 == EMPTY) ? EMPTY : (lnz + lnz1) ;
flops = (flops == EMPTY || flops1 == EMPTY) ? EMPTY : (flops + flops1) ;
/* ------------------------------------------------------------------ */
/* combine the preordering with the BTF ordering */
/* ------------------------------------------------------------------ */
PRINTF (("Pblk, 1-based:\n")) ;
for (k = 0 ; k < nk ; k++)
{
ASSERT (k + k1 < n) ;
ASSERT (Pblk [k] + k1 < n) ;
Q [k + k1] = Qbtf [Pblk [k] + k1] ;
}
for (k = 0 ; k < nk ; k++)
{
ASSERT (k + k1 < n) ;
ASSERT (Pblk [k] + k1 < n) ;
P [k + k1] = Pbtf [Pblk [k] + k1] ;
}
}
PRINTF (("nzoff %d Ap[n] %d\n", nzoff, Ap [n])) ;
ASSERT (nzoff >= 0 && nzoff <= Ap [n]) ;
/* return estimates of # of nonzeros in L including diagonal */
Symbolic->lnz = lnz ; /* EMPTY if COLAMD used */
Symbolic->unz = lnz ;
Symbolic->nzoff = nzoff ;
Symbolic->est_flops = flops ; /* EMPTY if COLAMD or user-ordering used */
return (KLU_OK) ;
}
/* ========================================================================== */
/* === order_and_analyze ==================================================== */
/* ========================================================================== */
/* Orders the matrix with or with BTF, then orders each block with AMD, COLAMD,
* or the user ordering function. Does not handle the natural or given
* ordering cases. */
static KLU_symbolic *order_and_analyze /* returns NULL if error, or a valid
KLU_symbolic object if successful */
(
/* inputs, not modified */
Int n, /* A is n-by-n */
Int Ap [ ], /* size n+1, column pointers */
Int Ai [ ], /* size nz, row indices */
/* --------------------- */
KLU_common *Common
)
{
double work ;
KLU_symbolic *Symbolic ;
double *Lnz ;
Int *Qbtf, *Cp, *Ci, *Pinv, *Pblk, *Pbtf, *P, *Q, *R ;
Int nblocks, nz, block, maxblock, k1, k2, nk, do_btf, ordering, k, Cilen,
*Work ;
/* ---------------------------------------------------------------------- */
/* allocate the Symbolic object, and check input matrix */
/* ---------------------------------------------------------------------- */
Symbolic = KLU_alloc_symbolic (n, Ap, Ai, Common) ;
if (Symbolic == NULL)
{
return (NULL) ;
}
P = Symbolic->P ;
Q = Symbolic->Q ;
R = Symbolic->R ;
Lnz = Symbolic->Lnz ;
nz = Symbolic->nz ;
ordering = Common->ordering ;
if (ordering == 1)
{
/* COLAMD */
Cilen = COLAMD_recommended (nz, n, n) ;
}
else if (ordering == 0 || (ordering == 3 && Common->user_order != NULL))
{
/* AMD or user ordering function */
Cilen = nz+1 ;
}
else
{
/* invalid ordering */
Common->status = KLU_INVALID ;
KLU_free_symbolic (&Symbolic, Common) ;
return (NULL) ;
}
/* AMD memory management routines */
amd_malloc = Common->malloc_memory ;
amd_free = Common->free_memory ;
amd_calloc = Common->calloc_memory ;
amd_realloc = Common->realloc_memory ;
/* ---------------------------------------------------------------------- */
/* allocate workspace for BTF permutation */
/* ---------------------------------------------------------------------- */
Pbtf = KLU_malloc (n, sizeof (Int), Common) ;
Qbtf = KLU_malloc (n, sizeof (Int), Common) ;
if (Common->status < KLU_OK)
{
KLU_free (Pbtf, n, sizeof (Int), Common) ;
KLU_free (Qbtf, n, sizeof (Int), Common) ;
KLU_free_symbolic (&Symbolic, Common) ;
return (NULL) ;
}
/* ---------------------------------------------------------------------- */
/* get the common parameters for BTF and ordering method */
/* ---------------------------------------------------------------------- */
do_btf = Common->btf ;
do_btf = (do_btf) ? TRUE : FALSE ;
Symbolic->ordering = ordering ;
Symbolic->do_btf = do_btf ;
Symbolic->structural_rank = EMPTY ;
/* ---------------------------------------------------------------------- */
/* find the block triangular form (if requested) */
/* ---------------------------------------------------------------------- */
Common->work = 0 ;
if (do_btf)
{
Work = KLU_malloc (5*n, sizeof (Int), Common) ;
if (Common->status < KLU_OK)
{
/* out of memory */
KLU_free (Pbtf, n, sizeof (Int), Common) ;
KLU_free (Qbtf, n, sizeof (Int), Common) ;
KLU_free_symbolic (&Symbolic, Common) ;
return (NULL) ;
}
nblocks = BTF_order (n, Ap, Ai, Common->maxwork, &work, Pbtf, Qbtf, R,
&(Symbolic->structural_rank), Work) ;
Common->structural_rank = Symbolic->structural_rank ;
Common->work += work ;
KLU_free (Work, 5*n, sizeof (Int), Common) ;
/* unflip Qbtf if the matrix does not have full structural rank */
if (Symbolic->structural_rank < n)
{
for (k = 0 ; k < n ; k++)
{
Qbtf [k] = BTF_UNFLIP (Qbtf [k]) ;
}
}
/* find the size of the largest block */
maxblock = 1 ;
for (block = 0 ; block < nblocks ; block++)
{
k1 = R [block] ;
k2 = R [block+1] ;
nk = k2 - k1 ;
PRINTF (("block %d size %d\n", block, nk)) ;
maxblock = MAX (maxblock, nk) ;
}
}
else
{
/* BTF not requested */
nblocks = 1 ;
maxblock = n ;
R [0] = 0 ;
R [1] = n ;
for (k = 0 ; k < n ; k++)
{
Pbtf [k] = k ;
Qbtf [k] = k ;
}
}
Symbolic->nblocks = nblocks ;
PRINTF (("maxblock size %d\n", maxblock)) ;
Symbolic->maxblock = maxblock ;
/* ---------------------------------------------------------------------- */
/* allocate more workspace, for analyze_worker */
/* ---------------------------------------------------------------------- */
Pblk = KLU_malloc (maxblock, sizeof (Int), Common) ;
Cp = KLU_malloc (maxblock + 1, sizeof (Int), Common) ;
Ci = KLU_malloc (MAX (Cilen, nz+1), sizeof (Int), Common) ;
Pinv = KLU_malloc (n, sizeof (Int), Common) ;
/* ---------------------------------------------------------------------- */
/* order each block of the BTF ordering, and a fill-reducing ordering */
/* ---------------------------------------------------------------------- */
if (Common->status == KLU_OK)
{
PRINTF (("calling analyze_worker\n")) ;
Common->status = analyze_worker (n, Ap, Ai, nblocks, Pbtf, Qbtf, R,
ordering, P, Q, Lnz, Pblk, Cp, Ci, Cilen, Pinv, Symbolic, Common) ;
PRINTF (("analyze_worker done\n")) ;
}
/* ---------------------------------------------------------------------- */
/* free all workspace */
/* ---------------------------------------------------------------------- */
KLU_free (Pblk, maxblock, sizeof (Int), Common) ;
KLU_free (Cp, maxblock+1, sizeof (Int), Common) ;
KLU_free (Ci, MAX (Cilen, nz+1), sizeof (Int), Common) ;
KLU_free (Pinv, n, sizeof (Int), Common) ;
KLU_free (Pbtf, n, sizeof (Int), Common) ;
KLU_free (Qbtf, n, sizeof (Int), Common) ;
/* ---------------------------------------------------------------------- */
/* return the symbolic object */
/* ---------------------------------------------------------------------- */
if (Common->status < KLU_OK)
{
KLU_free_symbolic (&Symbolic, Common) ;
}
return (Symbolic) ;
}
/* ========================================================================== */
/* === KLU_analyze ========================================================== */
/* ========================================================================== */
KLU_symbolic *KLU_analyze /* returns NULL if error, or a valid
KLU_symbolic object if successful */
(
/* inputs, not modified */
Int n, /* A is n-by-n */
Int Ap [ ], /* size n+1, column pointers */
Int Ai [ ], /* size nz, row indices */
/* -------------------- */
KLU_common *Common
)
{
/* ---------------------------------------------------------------------- */
/* get the control parameters for BTF and ordering method */
/* ---------------------------------------------------------------------- */
if (Common == NULL)
{
return (NULL) ;
}
Common->status = KLU_OK ;
Common->structural_rank = EMPTY ;
/* ---------------------------------------------------------------------- */
/* order and analyze */
/* ---------------------------------------------------------------------- */
if (Common->ordering == 2)
{
/* natural ordering */
return (KLU_analyze_given (n, Ap, Ai, NULL, NULL, Common)) ;
}
else
{
/* order with P and Q */
return (order_and_analyze (n, Ap, Ai, Common)) ;
}
}

View File

@ -0,0 +1,369 @@
/* ========================================================================== */
/* === klu_analyze_given ==================================================== */
/* ========================================================================== */
/* Given an input permutation P and Q, create the Symbolic object. BTF can
* be done to modify the user's P and Q (does not perform the max transversal;
* just finds the strongly-connected components). */
#include "klu_internal.h"
/* ========================================================================== */
/* === klu_alloc_symbolic =================================================== */
/* ========================================================================== */
/* Allocate Symbolic object, and check input matrix. Not user callable. */
KLU_symbolic *KLU_alloc_symbolic
(
Int n,
Int *Ap,
Int *Ai,
KLU_common *Common
)
{
KLU_symbolic *Symbolic ;
Int *P, *Q, *R ;
double *Lnz ;
Int nz, i, j, p, pend ;
if (Common == NULL)
{
return (NULL) ;
}
Common->status = KLU_OK ;
/* A is n-by-n, with n > 0. Ap [0] = 0 and nz = Ap [n] >= 0 required.
* Ap [j] <= Ap [j+1] must hold for all j = 0 to n-1. Row indices in Ai
* must be in the range 0 to n-1, and no duplicate entries can be present.
* The list of row indices in each column of A need not be sorted.
*/
if (n <= 0 || Ap == NULL || Ai == NULL)
{
/* Ap and Ai must be present, and n must be > 0 */
Common->status = KLU_INVALID ;
return (NULL) ;
}
nz = Ap [n] ;
if (Ap [0] != 0 || nz < 0)
{
/* nz must be >= 0 and Ap [0] must equal zero */
Common->status = KLU_INVALID ;
return (NULL) ;
}
for (j = 0 ; j < n ; j++)
{
if (Ap [j] > Ap [j+1])
{
/* column pointers must be non-decreasing */
Common->status = KLU_INVALID ;
return (NULL) ;
}
}
P = KLU_malloc (n, sizeof (Int), Common) ;
if (Common->status < KLU_OK)
{
/* out of memory */
Common->status = KLU_OUT_OF_MEMORY ;
return (NULL) ;
}
for (i = 0 ; i < n ; i++)
{
P [i] = EMPTY ;
}
for (j = 0 ; j < n ; j++)
{
pend = Ap [j+1] ;
for (p = Ap [j] ; p < pend ; p++)
{
i = Ai [p] ;
if (i < 0 || i >= n || P [i] == j)
{
/* row index out of range, or duplicate entry */
KLU_free (P, n, sizeof (Int), Common) ;
Common->status = KLU_INVALID ;
return (NULL) ;
}
/* flag row i as appearing in column j */
P [i] = j ;
}
}
/* ---------------------------------------------------------------------- */
/* allocate the Symbolic object */
/* ---------------------------------------------------------------------- */
Symbolic = KLU_malloc (sizeof (KLU_symbolic), 1, Common) ;
if (Common->status < KLU_OK)
{
/* out of memory */
KLU_free (P, n, sizeof (Int), Common) ;
Common->status = KLU_OUT_OF_MEMORY ;
return (NULL) ;
}
Q = KLU_malloc (n, sizeof (Int), Common) ;
R = KLU_malloc (n+1, sizeof (Int), Common) ;
Lnz = KLU_malloc (n, sizeof (double), Common) ;
Symbolic->n = n ;
Symbolic->nz = nz ;
Symbolic->P = P ;
Symbolic->Q = Q ;
Symbolic->R = R ;
Symbolic->Lnz = Lnz ;
if (Common->status < KLU_OK)
{
/* out of memory */
KLU_free_symbolic (&Symbolic, Common) ;
Common->status = KLU_OUT_OF_MEMORY ;
return (NULL) ;
}
return (Symbolic) ;
}
/* ========================================================================== */
/* === KLU_analyze_given ==================================================== */
/* ========================================================================== */
KLU_symbolic *KLU_analyze_given /* returns NULL if error, or a valid
KLU_symbolic object if successful */
(
/* inputs, not modified */
Int n, /* A is n-by-n */
Int Ap [ ], /* size n+1, column pointers */
Int Ai [ ], /* size nz, row indices */
Int Puser [ ], /* size n, user's row permutation (may be NULL) */
Int Quser [ ], /* size n, user's column permutation (may be NULL) */
/* -------------------- */
KLU_common *Common
)
{
KLU_symbolic *Symbolic ;
double *Lnz ;
Int nblocks, nz, block, maxblock, *P, *Q, *R, nzoff, p, pend, do_btf, k ;
/* ---------------------------------------------------------------------- */
/* determine if input matrix is valid, and get # of nonzeros */
/* ---------------------------------------------------------------------- */
Symbolic = KLU_alloc_symbolic (n, Ap, Ai, Common) ;
if (Symbolic == NULL)
{
return (NULL) ;
}
P = Symbolic->P ;
Q = Symbolic->Q ;
R = Symbolic->R ;
Lnz = Symbolic->Lnz ;
nz = Symbolic->nz ;
/* ---------------------------------------------------------------------- */
/* Q = Quser, or identity if Quser is NULL */
/* ---------------------------------------------------------------------- */
if (Quser == (Int *) NULL)
{
for (k = 0 ; k < n ; k++)
{
Q [k] = k ;
}
}
else
{
for (k = 0 ; k < n ; k++)
{
Q [k] = Quser [k] ;
}
}
/* ---------------------------------------------------------------------- */
/* get the control parameters for BTF and ordering method */
/* ---------------------------------------------------------------------- */
do_btf = Common->btf ;
do_btf = (do_btf) ? TRUE : FALSE ;
Symbolic->ordering = 2 ;
Symbolic->do_btf = do_btf ;
/* ---------------------------------------------------------------------- */
/* find the block triangular form, if requested */
/* ---------------------------------------------------------------------- */
if (do_btf)
{
/* ------------------------------------------------------------------ */
/* get workspace for BTF_strongcomp */
/* ------------------------------------------------------------------ */
Int *Pinv, *Work, *Bi, k1, k2, nk, oldcol ;
Work = KLU_malloc (4*n, sizeof (Int), Common) ;
Pinv = KLU_malloc (n, sizeof (Int), Common) ;
if (Puser != (Int *) NULL)
{
Bi = KLU_malloc (nz+1, sizeof (Int), Common) ;
}
else
{
Bi = Ai ;
}
if (Common->status < KLU_OK)
{
/* out of memory */
KLU_free (Work, 4*n, sizeof (Int), Common) ;
KLU_free (Pinv, n, sizeof (Int), Common) ;
if (Puser != (Int *) NULL)
{
KLU_free (Bi, nz+1, sizeof (Int), Common) ;
}
KLU_free_symbolic (&Symbolic, Common) ;
Common->status = KLU_OUT_OF_MEMORY ;
return (NULL) ;
}
/* ------------------------------------------------------------------ */
/* B = Puser * A */
/* ------------------------------------------------------------------ */
if (Puser != (Int *) NULL)
{
for (k = 0 ; k < n ; k++)
{
Pinv [Puser [k]] = k ;
}
for (p = 0 ; p < nz ; p++)
{
Bi [p] = Pinv [Ai [p]] ;
}
}
/* ------------------------------------------------------------------ */
/* find the strongly-connected components */
/* ------------------------------------------------------------------ */
/* modifies Q, and determines P and R */
nblocks = BTF_strongcomp (n, Ap, Bi, Q, P, R, Work) ;
/* ------------------------------------------------------------------ */
/* P = P * Puser */
/* ------------------------------------------------------------------ */
if (Puser != (Int *) NULL)
{
for (k = 0 ; k < n ; k++)
{
Work [k] = Puser [P [k]] ;
}
for (k = 0 ; k < n ; k++)
{
P [k] = Work [k] ;
}
}
/* ------------------------------------------------------------------ */
/* Pinv = inverse of P */
/* ------------------------------------------------------------------ */
for (k = 0 ; k < n ; k++)
{
Pinv [P [k]] = k ;
}
/* ------------------------------------------------------------------ */
/* analyze each block */
/* ------------------------------------------------------------------ */
nzoff = 0 ; /* nz in off-diagonal part */
maxblock = 1 ; /* size of the largest block */
for (block = 0 ; block < nblocks ; block++)
{
/* -------------------------------------------------------------- */
/* the block is from rows/columns k1 to k2-1 */
/* -------------------------------------------------------------- */
k1 = R [block] ;
k2 = R [block+1] ;
nk = k2 - k1 ;
PRINTF (("BLOCK %d, k1 %d k2-1 %d nk %d\n", block, k1, k2-1, nk)) ;
maxblock = MAX (maxblock, nk) ;
/* -------------------------------------------------------------- */
/* scan the kth block, C */
/* -------------------------------------------------------------- */
for (k = k1 ; k < k2 ; k++)
{
oldcol = Q [k] ;
pend = Ap [oldcol+1] ;
for (p = Ap [oldcol] ; p < pend ; p++)
{
if (Pinv [Ai [p]] < k1)
{
nzoff++ ;
}
}
}
/* fill-in not estimated */
Lnz [block] = EMPTY ;
}
/* ------------------------------------------------------------------ */
/* free all workspace */
/* ------------------------------------------------------------------ */
KLU_free (Work, 4*n, sizeof (Int), Common) ;
KLU_free (Pinv, n, sizeof (Int), Common) ;
if (Puser != (Int *) NULL)
{
KLU_free (Bi, nz+1, sizeof (Int), Common) ;
}
}
else
{
/* ------------------------------------------------------------------ */
/* BTF not requested */
/* ------------------------------------------------------------------ */
nzoff = 0 ;
nblocks = 1 ;
maxblock = n ;
R [0] = 0 ;
R [1] = n ;
Lnz [0] = EMPTY ;
/* ------------------------------------------------------------------ */
/* P = Puser, or identity if Puser is NULL */
/* ------------------------------------------------------------------ */
for (k = 0 ; k < n ; k++)
{
P [k] = (Puser == NULL) ? k : Puser [k] ;
}
}
/* ---------------------------------------------------------------------- */
/* return the symbolic object */
/* ---------------------------------------------------------------------- */
Symbolic->nblocks = nblocks ;
Symbolic->maxblock = maxblock ;
Symbolic->lnz = EMPTY ;
Symbolic->unz = EMPTY ;
Symbolic->nzoff = nzoff ;
return (Symbolic) ;
}

View File

@ -0,0 +1,60 @@
/* ========================================================================== */
/* === KLU_defaults ========================================================= */
/* ========================================================================== */
/* Sets default parameters for KLU */
#include "klu_internal.h"
Int KLU_defaults
(
KLU_common *Common
)
{
if (Common == NULL)
{
return (FALSE) ;
}
/* parameters */
Common->tol = 0.001 ; /* pivot tolerance for diagonal */
Common->memgrow = 1.2; /* realloc size ratio increase for LU factors */
Common->initmem_amd = 1.2 ; /* init. mem with AMD: c*nnz(L) + n */
Common->initmem = 10 ; /* init. mem otherwise: c*nnz(A) + n */
Common->btf = TRUE ; /* use BTF pre-ordering, or not */
Common->maxwork = 0 ; /* no limit to work done by btf_order */
Common->ordering = 0 ; /* 0: AMD, 1: COLAMD, 2: user-provided P and Q,
* 3: user-provided function */
Common->scale = 2 ; /* scale: -1: none, and do not check for errors
* in the input matrix in KLU_refactor.
* 0: none, but check for errors,
* 1: sum, 2: max */
Common->halt_if_singular = TRUE ; /* quick halt if matrix is singular */
/* memory management routines */
Common->malloc_memory = malloc ;
Common->calloc_memory = calloc ;
Common->free_memory = free ;
Common->realloc_memory = realloc ;
/* user ordering function and optional argument */
Common->user_order = NULL ;
Common->user_data = NULL ;
/* statistics */
Common->status = KLU_OK ;
Common->nrealloc = 0 ;
Common->structural_rank = EMPTY ;
Common->numerical_rank = EMPTY ;
Common->noffdiag = EMPTY ;
Common->flops = EMPTY ;
Common->rcond = EMPTY ;
Common->condest = EMPTY ;
Common->rgrowth = EMPTY ;
Common->work = 0 ; /* work done by btf_order */
Common->memusage = 0 ;
Common->mempeak = 0 ;
return (TRUE) ;
}

View File

@ -0,0 +1,570 @@
/* ========================================================================== */
/* === KLU_diagnostics ====================================================== */
/* ========================================================================== */
/* Linear algebraic diagnostics:
* KLU_rgrowth: reciprocal pivot growth, takes O(|A|+|U|) time
* KLU_condest: condition number estimator, takes about O(|A|+5*(|L|+|U|)) time
* KLU_flops: compute # flops required to factorize A into L*U
* KLU_rcond: compute a really cheap estimate of the reciprocal of the
* condition number, min(abs(diag(U))) / max(abs(diag(U))).
* Takes O(n) time.
*/
#include "klu_internal.h"
/* ========================================================================== */
/* === KLU_rgrowth ========================================================== */
/* ========================================================================== */
/* Compute the reciprocal pivot growth factor. In MATLAB notation:
*
* rgrowth = min (max (abs ((R \ A (p,q)) - F))) ./ max (abs (U)))
*/
Int KLU_rgrowth /* return TRUE if successful, FALSE otherwise */
(
Int *Ap,
Int *Ai,
double *Ax,
KLU_symbolic *Symbolic,
KLU_numeric *Numeric,
KLU_common *Common
)
{
double temp, max_ai, max_ui, min_block_rgrowth ;
Entry aik ;
Int *Q, *Ui, *Uip, *Ulen, *Pinv ;
Unit *LU ;
Entry *Aentry, *Ux, *Ukk ;
double *Rs ;
Int i, newrow, oldrow, k1, k2, nk, j, oldcol, k, pend, len ;
/* ---------------------------------------------------------------------- */
/* check inputs */
/* ---------------------------------------------------------------------- */
if (Common == NULL)
{
return (FALSE) ;
}
if (Symbolic == NULL || Ap == NULL || Ai == NULL || Ax == NULL)
{
Common->status = KLU_INVALID ;
return (FALSE) ;
}
if (Numeric == NULL)
{
/* treat this as a singular matrix */
Common->rgrowth = 0 ;
Common->status = KLU_SINGULAR ;
return (TRUE) ;
}
Common->status = KLU_OK ;
/* ---------------------------------------------------------------------- */
/* compute the reciprocal pivot growth */
/* ---------------------------------------------------------------------- */
Aentry = (Entry *) Ax ;
Pinv = Numeric->Pinv ;
Rs = Numeric->Rs ;
Q = Symbolic->Q ;
Common->rgrowth = 1 ;
for (i = 0 ; i < Symbolic->nblocks ; i++)
{
k1 = Symbolic->R[i] ;
k2 = Symbolic->R[i+1] ;
nk = k2 - k1 ;
if (nk == 1)
{
continue ; /* skip singleton blocks */
}
LU = (Unit *) Numeric->LUbx[i] ;
Uip = Numeric->Uip + k1 ;
Ulen = Numeric->Ulen + k1 ;
Ukk = ((Entry *) Numeric->Udiag) + k1 ;
min_block_rgrowth = 1 ;
for (j = 0 ; j < nk ; j++)
{
max_ai = 0 ;
max_ui = 0 ;
oldcol = Q[j + k1] ;
pend = Ap [oldcol + 1] ;
for (k = Ap [oldcol] ; k < pend ; k++)
{
oldrow = Ai [k] ;
newrow = Pinv [oldrow] ;
if (newrow < k1)
{
continue ; /* skip entry outside the block */
}
ASSERT (newrow < k2) ;
if (Rs != NULL)
{
/* aik = Aentry [k] / Rs [oldrow] */
SCALE_DIV_ASSIGN (aik, Aentry [k], Rs [newrow]) ;
}
else
{
aik = Aentry [k] ;
}
/* temp = ABS (aik) */
ABS (temp, aik) ;
if (temp > max_ai)
{
max_ai = temp ;
}
}
GET_POINTER (LU, Uip, Ulen, Ui, Ux, j, len) ;
for (k = 0 ; k < len ; k++)
{
/* temp = ABS (Ux [k]) */
ABS (temp, Ux [k]) ;
if (temp > max_ui)
{
max_ui = temp ;
}
}
/* consider the diagonal element */
ABS (temp, Ukk [j]) ;
if (temp > max_ui)
{
max_ui = temp ;
}
/* if max_ui is 0, skip the column */
if (SCALAR_IS_ZERO (max_ui))
{
continue ;
}
temp = max_ai / max_ui ;
if (temp < min_block_rgrowth)
{
min_block_rgrowth = temp ;
}
}
if (min_block_rgrowth < Common->rgrowth)
{
Common->rgrowth = min_block_rgrowth ;
}
}
return (TRUE) ;
}
/* ========================================================================== */
/* === KLU_condest ========================================================== */
/* ========================================================================== */
/* Estimate the condition number. Uses Higham and Tisseur's algorithm
* (A block algorithm for matrix 1-norm estimation, with applications to
* 1-norm pseudospectra, SIAM J. Matrix Anal. Appl., 21(4):1185-1201, 2000.
*/
Int KLU_condest /* return TRUE if successful, FALSE otherwise */
(
Int Ap [ ],
double Ax [ ],
KLU_symbolic *Symbolic,
KLU_numeric *Numeric,
KLU_common *Common
)
{
double xj, Xmax, csum, anorm, ainv_norm, est_old, est_new, abs_value ;
Entry *Udiag, *Aentry, *X, *S ;
Int *R ;
Int nblocks, i, j, jmax, jnew, pend, n ;
#ifndef COMPLEX
Int unchanged ;
#endif
/* ---------------------------------------------------------------------- */
/* check inputs */
/* ---------------------------------------------------------------------- */
if (Common == NULL)
{
return (FALSE) ;
}
if (Symbolic == NULL || Ap == NULL || Ax == NULL)
{
Common->status = KLU_INVALID ;
return (FALSE) ;
}
abs_value = 0 ;
if (Numeric == NULL)
{
/* treat this as a singular matrix */
Common->condest = 1 / abs_value ;
Common->status = KLU_SINGULAR ;
return (TRUE) ;
}
Common->status = KLU_OK ;
/* ---------------------------------------------------------------------- */
/* get inputs */
/* ---------------------------------------------------------------------- */
n = Symbolic->n ;
nblocks = Symbolic->nblocks ;
R = Symbolic->R ;
Udiag = Numeric->Udiag ;
/* ---------------------------------------------------------------------- */
/* check if diagonal of U has a zero on it */
/* ---------------------------------------------------------------------- */
for (i = 0 ; i < n ; i++)
{
ABS (abs_value, Udiag [i]) ;
if (SCALAR_IS_ZERO (abs_value))
{
Common->condest = 1 / abs_value ;
Common->status = KLU_SINGULAR ;
return (TRUE) ;
}
}
/* ---------------------------------------------------------------------- */
/* compute 1-norm (maximum column sum) of the matrix */
/* ---------------------------------------------------------------------- */
anorm = 0.0 ;
Aentry = (Entry *) Ax ;
for (i = 0 ; i < n ; i++)
{
pend = Ap [i + 1] ;
csum = 0.0 ;
for (j = Ap [i] ; j < pend ; j++)
{
ABS (abs_value, Aentry [j]) ;
csum += abs_value ;
}
if (csum > anorm)
{
anorm = csum ;
}
}
/* ---------------------------------------------------------------------- */
/* compute estimate of 1-norm of inv (A) */
/* ---------------------------------------------------------------------- */
/* get workspace (size 2*n Entry's) */
X = Numeric->Xwork ; /* size n space used in KLU_solve, tsolve */
X += n ; /* X is size n */
S = X + n ; /* S is size n */
for (i = 0 ; i < n ; i++)
{
CLEAR (S [i]) ;
CLEAR (X [i]) ;
REAL (X [i]) = 1.0 / ((double) n) ;
}
jmax = 0 ;
ainv_norm = 0.0 ;
for (i = 0 ; i < 5 ; i++)
{
if (i > 0)
{
/* X [jmax] is the largest entry in X */
for (j = 0 ; j < n ; j++)
{
/* X [j] = 0 ;*/
CLEAR (X [j]) ;
}
REAL (X [jmax]) = 1 ;
}
KLU_solve (Symbolic, Numeric, n, 1, (double *) X, Common) ;
est_old = ainv_norm ;
ainv_norm = 0.0 ;
for (j = 0 ; j < n ; j++)
{
/* ainv_norm += ABS (X [j]) ;*/
ABS (abs_value, X [j]) ;
ainv_norm += abs_value ;
}
#ifndef COMPLEX
unchanged = TRUE ;
for (j = 0 ; j < n ; j++)
{
double s = (X [j] >= 0) ? 1 : -1 ;
if (s != (Int) REAL (S [j]))
{
S [j] = s ;
unchanged = FALSE ;
}
}
if (i > 0 && (ainv_norm <= est_old || unchanged))
{
break ;
}
#else
for (j = 0 ; j < n ; j++)
{
if (IS_NONZERO (X [j]))
{
ABS (abs_value, X [j]) ;
SCALE_DIV_ASSIGN (S [j], X [j], abs_value) ;
}
else
{
CLEAR (S [j]) ;
REAL (S [j]) = 1 ;
}
}
if (i > 0 && ainv_norm <= est_old)
{
break ;
}
#endif
for (j = 0 ; j < n ; j++)
{
X [j] = S [j] ;
}
#ifndef COMPLEX
/* do a transpose solve */
KLU_tsolve (Symbolic, Numeric, n, 1, X, Common) ;
#else
/* do a conjugate transpose solve */
KLU_tsolve (Symbolic, Numeric, n, 1, (double *) X, 1, Common) ;
#endif
/* jnew = the position of the largest entry in X */
jnew = 0 ;
Xmax = 0 ;
for (j = 0 ; j < n ; j++)
{
/* xj = ABS (X [j]) ;*/
ABS (xj, X [j]) ;
if (xj > Xmax)
{
Xmax = xj ;
jnew = j ;
}
}
if (i > 0 && jnew == jmax)
{
/* the position of the largest entry did not change
* from the previous iteration */
break ;
}
jmax = jnew ;
}
/* ---------------------------------------------------------------------- */
/* compute another estimate of norm(inv(A),1), and take the largest one */
/* ---------------------------------------------------------------------- */
for (j = 0 ; j < n ; j++)
{
CLEAR (X [j]) ;
if (j % 2)
{
REAL (X [j]) = 1 + ((double) j) / ((double) (n-1)) ;
}
else
{
REAL (X [j]) = -1 - ((double) j) / ((double) (n-1)) ;
}
}
KLU_solve (Symbolic, Numeric, n, 1, (double *) X, Common) ;
est_new = 0.0 ;
for (j = 0 ; j < n ; j++)
{
/* est_new += ABS (X [j]) ;*/
ABS (abs_value, X [j]) ;
est_new += abs_value ;
}
est_new = 2 * est_new / (3 * n) ;
ainv_norm = MAX (est_new, ainv_norm) ;
/* ---------------------------------------------------------------------- */
/* compute estimate of condition number */
/* ---------------------------------------------------------------------- */
Common->condest = ainv_norm * anorm ;
return (TRUE) ;
}
/* ========================================================================== */
/* === KLU_flops ============================================================ */
/* ========================================================================== */
/* Compute the flop count for the LU factorization (in Common->flops) */
Int KLU_flops /* return TRUE if successful, FALSE otherwise */
(
KLU_symbolic *Symbolic,
KLU_numeric *Numeric,
KLU_common *Common
)
{
double flops = 0 ;
Int *R, *Ui, *Uip, *Llen, *Ulen ;
Unit **LUbx ;
Unit *LU ;
Int k, ulen, p, n, nk, block, nblocks, k1 ;
/* ---------------------------------------------------------------------- */
/* check inputs */
/* ---------------------------------------------------------------------- */
if (Common == NULL)
{
return (FALSE) ;
}
Common->flops = EMPTY ;
if (Numeric == NULL || Symbolic == NULL)
{
Common->status = KLU_INVALID ;
return (FALSE) ;
}
Common->status = KLU_OK ;
/* ---------------------------------------------------------------------- */
/* get the contents of the Symbolic object */
/* ---------------------------------------------------------------------- */
n = Symbolic->n ;
R = Symbolic->R ;
nblocks = Symbolic->nblocks ;
/* ---------------------------------------------------------------------- */
/* get the contents of the Numeric object */
/* ---------------------------------------------------------------------- */
LUbx = (Unit **) Numeric->LUbx ;
/* ---------------------------------------------------------------------- */
/* compute the flop count */
/* ---------------------------------------------------------------------- */
for (block = 0 ; block < nblocks ; block++)
{
k1 = R [block] ;
nk = R [block+1] - k1 ;
if (nk > 1)
{
Llen = Numeric->Llen + k1 ;
Uip = Numeric->Uip + k1 ;
Ulen = Numeric->Ulen + k1 ;
LU = LUbx [block] ;
for (k = 0 ; k < nk ; k++)
{
/* compute kth column of U, and update kth column of A */
GET_I_POINTER (LU, Uip, Ui, k) ;
ulen = Ulen [k] ;
for (p = 0 ; p < ulen ; p++)
{
flops += 2 * Llen [Ui [p]] ;
}
/* gather and divide by pivot to get kth column of L */
flops += Llen [k] ;
}
}
}
Common->flops = flops ;
return (TRUE) ;
}
/* ========================================================================== */
/* === KLU_rcond ============================================================ */
/* ========================================================================== */
/* Compute a really cheap estimate of the reciprocal of the condition number,
* condition number, min(abs(diag(U))) / max(abs(diag(U))). If U has a zero
* pivot, or a NaN pivot, rcond will be zero. Takes O(n) time.
*/
Int KLU_rcond /* return TRUE if successful, FALSE otherwise */
(
KLU_symbolic *Symbolic, /* input, not modified */
KLU_numeric *Numeric, /* input, not modified */
KLU_common *Common /* result in Common->rcond */
)
{
double ukk, umin = 0, umax = 0 ;
Entry *Udiag ;
Int j, n ;
/* ---------------------------------------------------------------------- */
/* check inputs */
/* ---------------------------------------------------------------------- */
if (Common == NULL)
{
return (FALSE) ;
}
if (Symbolic == NULL)
{
Common->status = KLU_INVALID ;
return (FALSE) ;
}
if (Numeric == NULL)
{
Common->rcond = 0 ;
Common->status = KLU_SINGULAR ;
return (TRUE) ;
}
Common->status = KLU_OK ;
/* ---------------------------------------------------------------------- */
/* compute rcond */
/* ---------------------------------------------------------------------- */
n = Symbolic->n ;
Udiag = Numeric->Udiag ;
for (j = 0 ; j < n ; j++)
{
/* get the magnitude of the pivot */
ABS (ukk, Udiag [j]) ;
if (SCALAR_IS_NAN (ukk) || SCALAR_IS_ZERO (ukk))
{
/* if NaN, or zero, the rcond is zero */
Common->rcond = 0 ;
Common->status = KLU_SINGULAR ;
return (TRUE) ;
}
if (j == 0)
{
/* first pivot entry */
umin = ukk ;
umax = ukk ;
}
else
{
/* subsequent pivots */
umin = MIN (umin, ukk) ;
umax = MAX (umax, ukk) ;
}
}
Common->rcond = umin / umax ;
if (SCALAR_IS_NAN (Common->rcond) || SCALAR_IS_ZERO (Common->rcond))
{
/* this can occur if umin or umax are Inf or NaN */
Common->rcond = 0 ;
Common->status = KLU_SINGULAR ;
}
return (TRUE) ;
}

142
src/maths/KLU/klu_dump.c Normal file
View File

@ -0,0 +1,142 @@
/* ========================================================================== */
/* === KLU_dump ============================================================= */
/* ========================================================================== */
/* Debug routines for klu. Only used when NDEBUG is not defined at
* compile-time.
*/
#include "klu_internal.h"
#ifndef NDEBUG
/* ========================================================================== */
/* === KLU_valid ============================================================ */
/* ========================================================================== */
/* Check if a column-form matrix is valid or not. The matrix A is
* n-by-n. The row indices of entries in column j are in
* Ai [Ap [j] ... Ap [j+1]-1]. Required conditions are:
*
* n >= 0
* nz = Ap [n_col] >= 0 number of entries in the matrix
* Ap [0] == 0
* Ap [j] <= Ap [j+1] for all j in the range 0 to n_col.
* row indices in Ai [Ap [j] ... Ap [j+1]-1]
* must be in the range 0 to n_row-1,
* and no duplicate entries can exist (duplicates not checked here).
*
* Not user-callable. Only used when debugging.
*/
Int KLU_valid (Int n, Int Ap [ ], Int Ai [ ], Entry Ax [ ])
{
Int nz, j, p1, p2, i, p ;
PRINTF (("\ncolumn oriented matrix, n = %d\n", n)) ;
if (n <= 0)
{
PRINTF (("n must be >= 0: %d\n", n)) ;
return (FALSE) ;
}
nz = Ap [n] ;
if (Ap [0] != 0 || nz < 0)
{
/* column pointers must start at Ap [0] = 0, and Ap [n] must be >= 0 */
PRINTF (("column 0 pointer bad or nz < 0\n")) ;
return (FALSE) ;
}
for (j = 0 ; j < n ; j++)
{
p1 = Ap [j] ;
p2 = Ap [j+1] ;
PRINTF (("\nColumn: %d p1: %d p2: %d\n", j, p1, p2)) ;
if (p1 > p2)
{
/* column pointers must be ascending */
PRINTF (("column %d pointer bad\n", j)) ;
return (FALSE) ;
}
for (p = p1 ; p < p2 ; p++)
{
i = Ai [p] ;
PRINTF (("row: %d", i)) ;
if (i < 0 || i >= n)
{
/* row index out of range */
PRINTF (("index out of range, col %d row %d\n", j, i)) ;
return (FALSE) ;
}
if (Ax != (Entry *) NULL)
{
PRINT_ENTRY (Ax [p]) ;
}
PRINTF (("\n")) ;
}
}
return (TRUE) ;
}
/* ========================================================================== */
/* === KLU_valid_LU ========================================================= */
/* ========================================================================== */
/* This function does the same validity tests as KLU_valid but for the
* LU factor storage format. The flag flag_test_start_ptr is used to
* test if Xip [0] = 0. This is not applicable for U. So when calling this
* function for U, the flag should be set to false. Only used when debugging.
*/
Int KLU_valid_LU (Int n, Int flag_test_start_ptr, Int Xip [ ],
Int Xlen [ ], Unit LU [ ])
{
Int *Xi ;
Entry *Xx ;
Int j, p1, p2, i, p, len ;
PRINTF (("\ncolumn oriented matrix, n = %d\n", n)) ;
if (n <= 0)
{
PRINTF (("n must be >= 0: %d\n", n)) ;
return (FALSE) ;
}
if (flag_test_start_ptr && Xip [0] != 0)
{
/* column pointers must start at Xip [0] = 0*/
PRINTF (("column 0 pointer bad\n")) ;
return (FALSE) ;
}
for (j = 0 ; j < n ; j++)
{
p1 = Xip [j] ;
p2 = Xip [j+1] ;
PRINTF (("\nColumn: %d p1: %d p2: %d\n", j, p1, p2)) ;
if (p1 > p2)
{
/* column pointers must be ascending */
PRINTF (("column %d pointer bad\n", j)) ;
return (FALSE) ;
}
GET_POINTER (LU, Xip, Xlen, Xi, Xx, j, len) ;
for (p = 0 ; p < len ; p++)
{
i = Xi [p] ;
PRINTF (("row: %d", i)) ;
if (i < 0 || i >= n)
{
/* row index out of range */
PRINTF (("index out of range, col %d row %d\n", j, i)) ;
return (FALSE) ;
}
if (Xx != (Entry *) NULL)
{
PRINT_ENTRY (Xx [p]) ;
}
PRINTF (("\n")) ;
}
}
return (TRUE) ;
}
#endif

290
src/maths/KLU/klu_extract.c Normal file
View File

@ -0,0 +1,290 @@
/* ========================================================================== */
/* === KLU_extract ========================================================== */
/* ========================================================================== */
/* Extract KLU factorization into conventional compressed-column matrices.
* If any output array is NULL, that part of the LU factorization is not
* extracted (this is not an error condition).
*
* nnz(L) = Numeric->lnz, nnz(U) = Numeric->unz, and nnz(F) = Numeric->Offp [n]
*/
#include "klu_internal.h"
Int KLU_extract /* returns TRUE if successful, FALSE otherwise */
(
/* inputs: */
KLU_numeric *Numeric,
KLU_symbolic *Symbolic,
/* outputs, all of which must be allocated on input */
/* L */
Int *Lp, /* size n+1 */
Int *Li, /* size nnz(L) */
double *Lx, /* size nnz(L) */
#ifdef COMPLEX
double *Lz, /* size nnz(L) for the complex case, ignored if real */
#endif
/* U */
Int *Up, /* size n+1 */
Int *Ui, /* size nnz(U) */
double *Ux, /* size nnz(U) */
#ifdef COMPLEX
double *Uz, /* size nnz(U) for the complex case, ignored if real */
#endif
/* F */
Int *Fp, /* size n+1 */
Int *Fi, /* size nnz(F) */
double *Fx, /* size nnz(F) */
#ifdef COMPLEX
double *Fz, /* size nnz(F) for the complex case, ignored if real */
#endif
/* P, row permutation */
Int *P, /* size n */
/* Q, column permutation */
Int *Q, /* size n */
/* Rs, scale factors */
double *Rs, /* size n */
/* R, block boundaries */
Int *R, /* size nblocks+1 */
KLU_common *Common
)
{
Int *Lip, *Llen, *Uip, *Ulen, *Li2, *Ui2 ;
Unit *LU ;
Entry *Lx2, *Ux2, *Ukk ;
Int i, k, block, nblocks, n, nz, k1, k2, nk, len, kk, p ;
if (Common == NULL)
{
return (FALSE) ;
}
if (Symbolic == NULL || Numeric == NULL)
{
Common->status = KLU_INVALID ;
return (FALSE) ;
}
Common->status = KLU_OK ;
n = Symbolic->n ;
nblocks = Symbolic->nblocks ;
/* ---------------------------------------------------------------------- */
/* extract scale factors */
/* ---------------------------------------------------------------------- */
if (Rs != NULL)
{
if (Numeric->Rs != NULL)
{
for (i = 0 ; i < n ; i++)
{
Rs [i] = Numeric->Rs [i] ;
}
}
else
{
/* no scaling */
for (i = 0 ; i < n ; i++)
{
Rs [i] = 1 ;
}
}
}
/* ---------------------------------------------------------------------- */
/* extract block boundaries */
/* ---------------------------------------------------------------------- */
if (R != NULL)
{
for (block = 0 ; block <= nblocks ; block++)
{
R [block] = Symbolic->R [block] ;
}
}
/* ---------------------------------------------------------------------- */
/* extract final row permutation */
/* ---------------------------------------------------------------------- */
if (P != NULL)
{
for (k = 0 ; k < n ; k++)
{
P [k] = Numeric->Pnum [k] ;
}
}
/* ---------------------------------------------------------------------- */
/* extract column permutation */
/* ---------------------------------------------------------------------- */
if (Q != NULL)
{
for (k = 0 ; k < n ; k++)
{
Q [k] = Symbolic->Q [k] ;
}
}
/* ---------------------------------------------------------------------- */
/* extract each block of L */
/* ---------------------------------------------------------------------- */
if (Lp != NULL && Li != NULL && Lx != NULL
#ifdef COMPLEX
&& Lz != NULL
#endif
)
{
nz = 0 ;
for (block = 0 ; block < nblocks ; block++)
{
k1 = Symbolic->R [block] ;
k2 = Symbolic->R [block+1] ;
nk = k2 - k1 ;
if (nk == 1)
{
/* singleton block */
Lp [k1] = nz ;
Li [nz] = k1 ;
Lx [nz] = 1 ;
#ifdef COMPLEX
Lz [nz] = 0 ;
#endif
nz++ ;
}
else
{
/* non-singleton block */
LU = Numeric->LUbx [block] ;
Lip = Numeric->Lip + k1 ;
Llen = Numeric->Llen + k1 ;
for (kk = 0 ; kk < nk ; kk++)
{
Lp [k1+kk] = nz ;
/* add the unit diagonal entry */
Li [nz] = k1 + kk ;
Lx [nz] = 1 ;
#ifdef COMPLEX
Lz [nz] = 0 ;
#endif
nz++ ;
GET_POINTER (LU, Lip, Llen, Li2, Lx2, kk, len) ;
for (p = 0 ; p < len ; p++)
{
Li [nz] = k1 + Li2 [p] ;
Lx [nz] = REAL (Lx2 [p]) ;
#ifdef COMPLEX
Lz [nz] = IMAG (Lx2 [p]) ;
#endif
nz++ ;
}
}
}
}
Lp [n] = nz ;
ASSERT (nz == Numeric->lnz) ;
}
/* ---------------------------------------------------------------------- */
/* extract each block of U */
/* ---------------------------------------------------------------------- */
if (Up != NULL && Ui != NULL && Ux != NULL
#ifdef COMPLEX
&& Uz != NULL
#endif
)
{
nz = 0 ;
for (block = 0 ; block < nblocks ; block++)
{
k1 = Symbolic->R [block] ;
k2 = Symbolic->R [block+1] ;
nk = k2 - k1 ;
Ukk = ((Entry *) Numeric->Udiag) + k1 ;
if (nk == 1)
{
/* singleton block */
Up [k1] = nz ;
Ui [nz] = k1 ;
Ux [nz] = REAL (Ukk [0]) ;
#ifdef COMPLEX
Uz [nz] = IMAG (Ukk [0]) ;
#endif
nz++ ;
}
else
{
/* non-singleton block */
LU = Numeric->LUbx [block] ;
Uip = Numeric->Uip + k1 ;
Ulen = Numeric->Ulen + k1 ;
for (kk = 0 ; kk < nk ; kk++)
{
Up [k1+kk] = nz ;
GET_POINTER (LU, Uip, Ulen, Ui2, Ux2, kk, len) ;
for (p = 0 ; p < len ; p++)
{
Ui [nz] = k1 + Ui2 [p] ;
Ux [nz] = REAL (Ux2 [p]) ;
#ifdef COMPLEX
Uz [nz] = IMAG (Ux2 [p]) ;
#endif
nz++ ;
}
/* add the diagonal entry */
Ui [nz] = k1 + kk ;
Ux [nz] = REAL (Ukk [kk]) ;
#ifdef COMPLEX
Uz [nz] = IMAG (Ukk [kk]) ;
#endif
nz++ ;
}
}
}
Up [n] = nz ;
ASSERT (nz == Numeric->unz) ;
}
/* ---------------------------------------------------------------------- */
/* extract the off-diagonal blocks, F */
/* ---------------------------------------------------------------------- */
if (Fp != NULL && Fi != NULL && Fx != NULL
#ifdef COMPLEX
&& Fz != NULL
#endif
)
{
for (k = 0 ; k <= n ; k++)
{
Fp [k] = Numeric->Offp [k] ;
}
nz = Fp [n] ;
for (k = 0 ; k < nz ; k++)
{
Fi [k] = Numeric->Offi [k] ;
}
for (k = 0 ; k < nz ; k++)
{
Fx [k] = REAL (((Entry *) Numeric->Offx) [k]) ;
#ifdef COMPLEX
Fz [k] = IMAG (((Entry *) Numeric->Offx) [k]) ;
#endif
}
}
return (TRUE) ;
}

545
src/maths/KLU/klu_factor.c Normal file
View File

@ -0,0 +1,545 @@
/* ========================================================================== */
/* === KLU_factor =========================================================== */
/* ========================================================================== */
/* Factor the matrix, after ordering and analyzing it with KLU_analyze
* or KLU_analyze_given.
*/
#include "klu_internal.h"
/* ========================================================================== */
/* === KLU_factor2 ========================================================== */
/* ========================================================================== */
static void factor2
(
/* inputs, not modified */
Int Ap [ ], /* size n+1, column pointers */
Int Ai [ ], /* size nz, row indices */
Entry Ax [ ],
KLU_symbolic *Symbolic,
/* inputs, modified on output: */
KLU_numeric *Numeric,
KLU_common *Common
)
{
double lsize ;
double *Lnz, *Rs ;
Int *P, *Q, *R, *Pnum, *Offp, *Offi, *Pblock, *Pinv, *Iwork,
*Lip, *Uip, *Llen, *Ulen ;
Entry *Offx, *X, s, *Udiag ;
Unit **LUbx ;
Int k1, k2, nk, k, block, oldcol, pend, oldrow, n, lnz, unz, p, newrow,
nblocks, poff, nzoff, lnz_block, unz_block, scale, max_lnz_block,
max_unz_block ;
/* ---------------------------------------------------------------------- */
/* initializations */
/* ---------------------------------------------------------------------- */
/* get the contents of the Symbolic object */
n = Symbolic->n ;
P = Symbolic->P ;
Q = Symbolic->Q ;
R = Symbolic->R ;
Lnz = Symbolic->Lnz ;
nblocks = Symbolic->nblocks ;
nzoff = Symbolic->nzoff ;
Pnum = Numeric->Pnum ;
Offp = Numeric->Offp ;
Offi = Numeric->Offi ;
Offx = (Entry *) Numeric->Offx ;
Lip = Numeric->Lip ;
Uip = Numeric->Uip ;
Llen = Numeric->Llen ;
Ulen = Numeric->Ulen ;
LUbx = (Unit **) Numeric->LUbx ;
Udiag = Numeric->Udiag ;
Rs = Numeric->Rs ;
Pinv = Numeric->Pinv ;
X = (Entry *) Numeric->Xwork ; /* X is of size n */
Iwork = Numeric->Iwork ; /* 5*maxblock for KLU_factor */
/* 1*maxblock for Pblock */
Pblock = Iwork + 5*((size_t) Symbolic->maxblock) ;
Common->nrealloc = 0 ;
scale = Common->scale ;
max_lnz_block = 1 ;
max_unz_block = 1 ;
/* compute the inverse of P from symbolic analysis. Will be updated to
* become the inverse of the numerical factorization when the factorization
* is done, for use in KLU_refactor */
#ifndef NDEBUG
for (k = 0 ; k < n ; k++)
{
Pinv [k] = EMPTY ;
}
#endif
for (k = 0 ; k < n ; k++)
{
ASSERT (P [k] >= 0 && P [k] < n) ;
Pinv [P [k]] = k ;
}
#ifndef NDEBUG
for (k = 0 ; k < n ; k++) ASSERT (Pinv [k] != EMPTY) ;
#endif
lnz = 0 ;
unz = 0 ;
Common->noffdiag = 0 ;
Offp [0] = 0 ;
/* ---------------------------------------------------------------------- */
/* optionally check input matrix and compute scale factors */
/* ---------------------------------------------------------------------- */
if (scale >= 0)
{
/* use Pnum as workspace. NOTE: scale factors are not yet permuted
* according to the final pivot row ordering, so Rs [oldrow] is the
* scale factor for A (oldrow,:), for the user's matrix A. Pnum is
* used as workspace in KLU_scale. When the factorization is done,
* the scale factors are permuted according to the final pivot row
* permutation, so that Rs [k] is the scale factor for the kth row of
* A(p,q) where p and q are the final row and column permutations. */
KLU_scale (scale, n, Ap, Ai, (double *) Ax, Rs, Pnum, Common) ;
if (Common->status < KLU_OK)
{
/* matrix is invalid */
return ;
}
}
#ifndef NDEBUG
if (scale > 0)
{
for (k = 0 ; k < n ; k++) PRINTF (("Rs [%d] %g\n", k, Rs [k])) ;
}
#endif
/* ---------------------------------------------------------------------- */
/* factor each block using klu */
/* ---------------------------------------------------------------------- */
for (block = 0 ; block < nblocks ; block++)
{
/* ------------------------------------------------------------------ */
/* the block is from rows/columns k1 to k2-1 */
/* ------------------------------------------------------------------ */
k1 = R [block] ;
k2 = R [block+1] ;
nk = k2 - k1 ;
PRINTF (("FACTOR BLOCK %d, k1 %d k2-1 %d nk %d\n", block, k1,k2-1,nk)) ;
if (nk == 1)
{
/* -------------------------------------------------------------- */
/* singleton case */
/* -------------------------------------------------------------- */
poff = Offp [k1] ;
oldcol = Q [k1] ;
pend = Ap [oldcol+1] ;
CLEAR (s) ;
if (scale <= 0)
{
/* no scaling */
for (p = Ap [oldcol] ; p < pend ; p++)
{
oldrow = Ai [p] ;
newrow = Pinv [oldrow] ;
if (newrow < k1)
{
Offi [poff] = oldrow ;
Offx [poff] = Ax [p] ;
poff++ ;
}
else
{
ASSERT (newrow == k1) ;
PRINTF (("singleton block %d", block)) ;
PRINT_ENTRY (Ax [p]) ;
s = Ax [p] ;
}
}
}
else
{
/* row scaling. NOTE: scale factors are not yet permuted
* according to the pivot row permutation, so Rs [oldrow] is
* used below. When the factorization is done, the scale
* factors are permuted, so that Rs [newrow] will be used in
* klu_solve, klu_tsolve, and klu_rgrowth */
for (p = Ap [oldcol] ; p < pend ; p++)
{
oldrow = Ai [p] ;
newrow = Pinv [oldrow] ;
if (newrow < k1)
{
Offi [poff] = oldrow ;
/* Offx [poff] = Ax [p] / Rs [oldrow] ; */
SCALE_DIV_ASSIGN (Offx [poff], Ax [p], Rs [oldrow]) ;
poff++ ;
}
else
{
ASSERT (newrow == k1) ;
PRINTF (("singleton block %d ", block)) ;
PRINT_ENTRY (Ax[p]) ;
SCALE_DIV_ASSIGN (s, Ax [p], Rs [oldrow]) ;
}
}
}
Udiag [k1] = s ;
if (IS_ZERO (s))
{
/* singular singleton */
Common->status = KLU_SINGULAR ;
Common->numerical_rank = k1 ;
Common->singular_col = oldcol ;
if (Common->halt_if_singular)
{
return ;
}
}
Offp [k1+1] = poff ;
Pnum [k1] = P [k1] ;
lnz++ ;
unz++ ;
}
else
{
/* -------------------------------------------------------------- */
/* construct and factorize the kth block */
/* -------------------------------------------------------------- */
if (Lnz [block] < 0)
{
/* COLAMD was used - no estimate of fill-in */
/* use 10 times the nnz in A, plus n */
lsize = -(Common->initmem) ;
}
else
{
lsize = Common->initmem_amd * Lnz [block] + nk ;
}
/* allocates 1 arrays: LUbx [block] */
Numeric->LUsize [block] = KLU_kernel_factor (nk, Ap, Ai, Ax, Q,
lsize, &LUbx [block], Udiag + k1, Llen + k1, Ulen + k1,
Lip + k1, Uip + k1, Pblock, &lnz_block, &unz_block,
X, Iwork, k1, Pinv, Rs, Offp, Offi, Offx, Common) ;
if (Common->status < KLU_OK ||
(Common->status == KLU_SINGULAR && Common->halt_if_singular))
{
/* out of memory, invalid inputs, or singular */
return ;
}
PRINTF (("\n----------------------- L %d:\n", block)) ;
ASSERT (KLU_valid_LU (nk, TRUE, Lip+k1, Llen+k1, LUbx [block])) ;
PRINTF (("\n----------------------- U %d:\n", block)) ;
ASSERT (KLU_valid_LU (nk, FALSE, Uip+k1, Ulen+k1, LUbx [block])) ;
/* -------------------------------------------------------------- */
/* get statistics */
/* -------------------------------------------------------------- */
lnz += lnz_block ;
unz += unz_block ;
max_lnz_block = MAX (max_lnz_block, lnz_block) ;
max_unz_block = MAX (max_unz_block, unz_block) ;
if (Lnz [block] == EMPTY)
{
/* revise estimate for subsequent factorization */
Lnz [block] = MAX (lnz_block, unz_block) ;
}
/* -------------------------------------------------------------- */
/* combine the klu row ordering with the symbolic pre-ordering */
/* -------------------------------------------------------------- */
PRINTF (("Pnum, 1-based:\n")) ;
for (k = 0 ; k < nk ; k++)
{
ASSERT (k + k1 < n) ;
ASSERT (Pblock [k] + k1 < n) ;
Pnum [k + k1] = P [Pblock [k] + k1] ;
PRINTF (("Pnum (%d + %d + 1 = %d) = %d + 1 = %d\n",
k, k1, k+k1+1, Pnum [k+k1], Pnum [k+k1]+1)) ;
}
/* the local pivot row permutation Pblock is no longer needed */
}
}
ASSERT (nzoff == Offp [n]) ;
PRINTF (("\n------------------- Off diagonal entries:\n")) ;
ASSERT (KLU_valid (n, Offp, Offi, Offx)) ;
Numeric->lnz = lnz ;
Numeric->unz = unz ;
Numeric->max_lnz_block = max_lnz_block ;
Numeric->max_unz_block = max_unz_block ;
/* compute the inverse of Pnum */
#ifndef NDEBUG
for (k = 0 ; k < n ; k++)
{
Pinv [k] = EMPTY ;
}
#endif
for (k = 0 ; k < n ; k++)
{
ASSERT (Pnum [k] >= 0 && Pnum [k] < n) ;
Pinv [Pnum [k]] = k ;
}
#ifndef NDEBUG
for (k = 0 ; k < n ; k++) ASSERT (Pinv [k] != EMPTY) ;
#endif
/* permute scale factors Rs according to pivotal row order */
if (scale > 0)
{
for (k = 0 ; k < n ; k++)
{
REAL (X [k]) = Rs [Pnum [k]] ;
}
for (k = 0 ; k < n ; k++)
{
Rs [k] = REAL (X [k]) ;
}
}
PRINTF (("\n------------------- Off diagonal entries, old:\n")) ;
ASSERT (KLU_valid (n, Offp, Offi, Offx)) ;
/* apply the pivot row permutations to the off-diagonal entries */
for (p = 0 ; p < nzoff ; p++)
{
ASSERT (Offi [p] >= 0 && Offi [p] < n) ;
Offi [p] = Pinv [Offi [p]] ;
}
PRINTF (("\n------------------- Off diagonal entries, new:\n")) ;
ASSERT (KLU_valid (n, Offp, Offi, Offx)) ;
#ifndef NDEBUG
{
PRINTF (("\n ############# KLU_BTF_FACTOR done, nblocks %d\n",nblocks));
Entry ss, *Udiag = Numeric->Udiag ;
for (block = 0 ; block < nblocks && Common->status == KLU_OK ; block++)
{
k1 = R [block] ;
k2 = R [block+1] ;
nk = k2 - k1 ;
PRINTF (("\n======================KLU_factor output: k1 %d k2 %d nk %d\n",k1,k2,nk)) ;
if (nk == 1)
{
PRINTF (("singleton ")) ;
/* ENTRY_PRINT (singleton [block]) ; */
ss = Udiag [k1] ;
PRINT_ENTRY (ss) ;
}
else
{
Int *Lip, *Uip, *Llen, *Ulen ;
Unit *LU ;
Lip = Numeric->Lip + k1 ;
Llen = Numeric->Llen + k1 ;
LU = (Unit *) Numeric->LUbx [block] ;
PRINTF (("\n---- L block %d\n", block));
ASSERT (KLU_valid_LU (nk, TRUE, Lip, Llen, LU)) ;
Uip = Numeric->Uip + k1 ;
Ulen = Numeric->Ulen + k1 ;
PRINTF (("\n---- U block %d\n", block)) ;
ASSERT (KLU_valid_LU (nk, FALSE, Uip, Ulen, LU)) ;
}
}
}
#endif
}
/* ========================================================================== */
/* === KLU_factor =========================================================== */
/* ========================================================================== */
KLU_numeric *KLU_factor /* returns NULL if error, or a valid
KLU_numeric object if successful */
(
/* --- inputs --- */
Int Ap [ ], /* size n+1, column pointers */
Int Ai [ ], /* size nz, row indices */
double Ax [ ],
KLU_symbolic *Symbolic,
/* -------------- */
KLU_common *Common
)
{
Int n, nzoff, nblocks, maxblock, k, ok = TRUE ;
Int *R ;
KLU_numeric *Numeric ;
size_t n1, nzoff1, s, b6, n3 ;
if (Common == NULL)
{
return (NULL) ;
}
Common->status = KLU_OK ;
Common->numerical_rank = EMPTY ;
Common->singular_col = EMPTY ;
/* ---------------------------------------------------------------------- */
/* get the contents of the Symbolic object */
/* ---------------------------------------------------------------------- */
/* check for a valid Symbolic object */
if (Symbolic == NULL)
{
Common->status = KLU_INVALID ;
return (NULL) ;
}
n = Symbolic->n ;
nzoff = Symbolic->nzoff ;
nblocks = Symbolic->nblocks ;
maxblock = Symbolic->maxblock ;
R = Symbolic->R ;
PRINTF (("KLU_factor: n %d nzoff %d nblocks %d maxblock %d\n",
n, nzoff, nblocks, maxblock)) ;
/* ---------------------------------------------------------------------- */
/* get control parameters and make sure they are in the proper range */
/* ---------------------------------------------------------------------- */
Common->initmem_amd = MAX (1.0, Common->initmem_amd) ;
Common->initmem = MAX (1.0, Common->initmem) ;
Common->tol = MIN (Common->tol, 1.0) ;
Common->tol = MAX (0.0, Common->tol) ;
Common->memgrow = MAX (1.0, Common->memgrow) ;
/* ---------------------------------------------------------------------- */
/* allocate the Numeric object */
/* ---------------------------------------------------------------------- */
/* this will not cause size_t overflow (already checked by KLU_symbolic) */
n1 = ((size_t) n) + 1 ;
nzoff1 = ((size_t) nzoff) + 1 ;
Numeric = KLU_malloc (sizeof (KLU_numeric), 1, Common) ;
if (Common->status < KLU_OK)
{
/* out of memory */
Common->status = KLU_OUT_OF_MEMORY ;
return (NULL) ;
}
Numeric->n = n ;
Numeric->nblocks = nblocks ;
Numeric->nzoff = nzoff ;
Numeric->Pnum = KLU_malloc (n, sizeof (Int), Common) ;
Numeric->Offp = KLU_malloc (n1, sizeof (Int), Common) ;
Numeric->Offi = KLU_malloc (nzoff1, sizeof (Int), Common) ;
Numeric->Offx = KLU_malloc (nzoff1, sizeof (Entry), Common) ;
Numeric->Lip = KLU_malloc (n, sizeof (Int), Common) ;
Numeric->Uip = KLU_malloc (n, sizeof (Int), Common) ;
Numeric->Llen = KLU_malloc (n, sizeof (Int), Common) ;
Numeric->Ulen = KLU_malloc (n, sizeof (Int), Common) ;
Numeric->LUsize = KLU_malloc (nblocks, sizeof (size_t), Common) ;
Numeric->LUbx = KLU_malloc (nblocks, sizeof (Unit *), Common) ;
if (Numeric->LUbx != NULL)
{
for (k = 0 ; k < nblocks ; k++)
{
Numeric->LUbx [k] = NULL ;
}
}
Numeric->Udiag = KLU_malloc (n, sizeof (Entry), Common) ;
if (Common->scale > 0)
{
Numeric->Rs = KLU_malloc (n, sizeof (double), Common) ;
}
else
{
/* no scaling */
Numeric->Rs = NULL ;
}
Numeric->Pinv = KLU_malloc (n, sizeof (Int), Common) ;
/* allocate permanent workspace for factorization and solve. Note that the
* solver will use an Xwork of size 4n, whereas the factorization codes use
* an Xwork of size n and integer space (Iwork) of size 6n. KLU_condest
* uses an Xwork of size 2n. Total size is:
*
* n*sizeof(Entry) + max (6*maxblock*sizeof(Int), 3*n*sizeof(Entry))
*/
s = KLU_mult_size_t (n, sizeof (Entry), &ok) ;
n3 = KLU_mult_size_t (n, 3 * sizeof (Entry), &ok) ;
b6 = KLU_mult_size_t (maxblock, 6 * sizeof (Int), &ok) ;
Numeric->worksize = KLU_add_size_t (s, MAX (n3, b6), &ok) ;
Numeric->Work = KLU_malloc (Numeric->worksize, 1, Common) ;
Numeric->Xwork = Numeric->Work ;
Numeric->Iwork = (Int *) ((Entry *) Numeric->Xwork + n) ;
if (!ok || Common->status < KLU_OK)
{
/* out of memory or problem too large */
Common->status = ok ? KLU_OUT_OF_MEMORY : KLU_TOO_LARGE ;
KLU_free_numeric (&Numeric, Common) ;
return (NULL) ;
}
/* ---------------------------------------------------------------------- */
/* factorize the blocks */
/* ---------------------------------------------------------------------- */
factor2 (Ap, Ai, (Entry *) Ax, Symbolic, Numeric, Common) ;
/* ---------------------------------------------------------------------- */
/* return or free the Numeric object */
/* ---------------------------------------------------------------------- */
if (Common->status < KLU_OK)
{
/* out of memory or inputs invalid */
KLU_free_numeric (&Numeric, Common) ;
}
else if (Common->status == KLU_SINGULAR)
{
if (Common->halt_if_singular)
{
/* Matrix is singular, and the Numeric object is only partially
* defined because we halted early. This is the default case for
* a singular matrix. */
KLU_free_numeric (&Numeric, Common) ;
}
}
else if (Common->status == KLU_OK)
{
/* successful non-singular factorization */
Common->numerical_rank = n ;
Common->singular_col = n ;
}
return (Numeric) ;
}

View File

@ -0,0 +1,71 @@
/* ========================================================================== */
/* === KLU_free_numeric ===================================================== */
/* ========================================================================== */
/* Free the KLU Numeric object. */
#include "klu_internal.h"
Int KLU_free_numeric
(
KLU_numeric **NumericHandle,
KLU_common *Common
)
{
KLU_numeric *Numeric ;
Unit **LUbx ;
size_t *LUsize ;
Int block, n, nzoff, nblocks ;
if (Common == NULL)
{
return (FALSE) ;
}
if (NumericHandle == NULL || *NumericHandle == NULL)
{
return (TRUE) ;
}
Numeric = *NumericHandle ;
n = Numeric->n ;
nzoff = Numeric->nzoff ;
nblocks = Numeric->nblocks ;
LUsize = Numeric->LUsize ;
LUbx = (Unit **) Numeric->LUbx ;
if (LUbx != NULL)
{
for (block = 0 ; block < nblocks ; block++)
{
KLU_free (LUbx [block], LUsize ? LUsize [block] : 0,
sizeof (Unit), Common) ;
}
}
KLU_free (Numeric->Pnum, n, sizeof (Int), Common) ;
KLU_free (Numeric->Offp, n+1, sizeof (Int), Common) ;
KLU_free (Numeric->Offi, nzoff+1, sizeof (Int), Common) ;
KLU_free (Numeric->Offx, nzoff+1, sizeof (Entry), Common) ;
KLU_free (Numeric->Lip, n, sizeof (Int), Common) ;
KLU_free (Numeric->Llen, n, sizeof (Int), Common) ;
KLU_free (Numeric->Uip, n, sizeof (Int), Common) ;
KLU_free (Numeric->Ulen, n, sizeof (Int), Common) ;
KLU_free (Numeric->LUsize, nblocks, sizeof (size_t), Common) ;
KLU_free (Numeric->LUbx, nblocks, sizeof (Unit *), Common) ;
KLU_free (Numeric->Udiag, n, sizeof (Entry), Common) ;
KLU_free (Numeric->Rs, n, sizeof (double), Common) ;
KLU_free (Numeric->Pinv, n, sizeof (Int), Common) ;
KLU_free (Numeric->Work, Numeric->worksize, 1, Common) ;
KLU_free (Numeric, 1, sizeof (KLU_numeric), Common) ;
*NumericHandle = NULL ;
return (TRUE) ;
}

View File

@ -0,0 +1,34 @@
/* ========================================================================== */
/* === KLU_free_symbolic ==================================================== */
/* ========================================================================== */
/* Free the KLU Symbolic object. */
#include "klu_internal.h"
Int KLU_free_symbolic
(
KLU_symbolic **SymbolicHandle,
KLU_common *Common
)
{
KLU_symbolic *Symbolic ;
Int n ;
if (Common == NULL)
{
return (FALSE) ;
}
if (SymbolicHandle == NULL || *SymbolicHandle == NULL)
{
return (TRUE) ;
}
Symbolic = *SymbolicHandle ;
n = Symbolic->n ;
KLU_free (Symbolic->P, n, sizeof (Int), Common) ;
KLU_free (Symbolic->Q, n, sizeof (Int), Common) ;
KLU_free (Symbolic->R, n+1, sizeof (Int), Common) ;
KLU_free (Symbolic->Lnz, n, sizeof (double), Common) ;
KLU_free (Symbolic, 1, sizeof (KLU_symbolic), Common) ;
*SymbolicHandle = NULL ;
return (TRUE) ;
}

View File

@ -0,0 +1,243 @@
/* ========================================================================== */
/* === KLU/Include/klu_internal.h =========================================== */
/* ========================================================================== */
/* For internal use in KLU routines only, not for user programs */
#ifndef _KLU_INTERNAL_H
#define _KLU_INTERNAL_H
#include "klu.h"
#include "btf.h"
#include "klu_version.h"
/* ========================================================================== */
/* make sure debugging and printing is turned off */
#ifndef NDEBUG
#define NDEBUG
#endif
#ifndef NPRINT
#define NPRINT
#endif
/* To enable debugging and assertions, uncomment this line:
#undef NDEBUG
*/
/* To enable diagnostic printing, uncomment this line:
#undef NPRINT
*/
/* ========================================================================== */
#include <stdio.h>
#include <assert.h>
#include <limits.h>
#include <stdlib.h>
#include <math.h>
#undef ASSERT
#ifndef NDEBUG
#define ASSERT(a) assert(a)
#else
#define ASSERT(a)
#endif
#define SCALAR_IS_NAN(x) ((x) != (x))
/* true if an integer (stored in double x) would overflow (or if x is NaN) */
#define INT_OVERFLOW(x) ((!((x) * (1.0+1e-8) <= (double) INT_MAX)) \
|| SCALAR_IS_NAN (x))
#undef TRUE
#undef FALSE
#undef MAX
#undef MIN
#undef PRINTF
#undef FLIP
#ifndef NPRINT
#define PRINTF(s) { printf s ; } ;
#else
#define PRINTF(s)
#endif
#define TRUE 1
#define FALSE 0
#define MAX(a,b) (((a) > (b)) ? (a) : (b))
#define MIN(a,b) (((a) < (b)) ? (a) : (b))
/* FLIP is a "negation about -1", and is used to mark an integer i that is
* normally non-negative. FLIP (EMPTY) is EMPTY. FLIP of a number > EMPTY
* is negative, and FLIP of a number < EMTPY is positive. FLIP (FLIP (i)) = i
* for all integers i. UNFLIP (i) is >= EMPTY. */
#define EMPTY (-1)
#define FLIP(i) (-(i)-2)
#define UNFLIP(i) (((i) < EMPTY) ? FLIP (i) : (i))
size_t KLU_kernel /* final size of LU on output */
(
/* input, not modified */
Int n, /* A is n-by-n */
Int Ap [ ], /* size n+1, column pointers for A */
Int Ai [ ], /* size nz = Ap [n], row indices for A */
Entry Ax [ ], /* size nz, values of A */
Int Q [ ], /* size n, optional input permutation */
size_t lusize, /* initial size of LU */
/* output, not defined on input */
Int Pinv [ ], /* size n */
Int P [ ], /* size n */
Unit **p_LU, /* size lusize on input, size Uxp[n] on output*/
Entry Udiag [ ], /* size n, diagonal of U */
Int Llen [ ], /* size n, column length of L */
Int Ulen [ ], /* size n, column length of U */
Int Lip [ ], /* size n+1 */
Int Uip [ ], /* size n+1 */
Int *lnz, /* size of L */
Int *unz, /* size of U */
/* workspace, not defined on input */
Entry X [ ], /* size n, zero on output */
/* workspace, not defined on input or output */
Int Stack [ ], /* size n */
Int Flag [ ], /* size n */
Int adj_pos [ ], /* size n */
/* workspace for pruning only */
Int Lpend [ ], /* size n workspace */
/* inputs, not modified on output */
Int k1, /* the block of A is from k1 to k2-1 */
Int PSinv [ ], /* inverse of P from symbolic factorization */
double Rs [ ], /* scale factors for A */
/* inputs, modified on output */
Int Offp [ ], /* off-diagonal matrix (modified by this routine) */
Int Offi [ ],
Entry Offx [ ],
KLU_common *Common /* the control input/output structure */
) ;
size_t KLU_kernel_factor /* 0 if failure, size of LU if OK */
(
/* inputs, not modified */
Int n, /* A is n-by-n. n must be > 0. */
Int Ap [ ], /* size n+1, column pointers for A */
Int Ai [ ], /* size nz = Ap [n], row indices for A */
Entry Ax [ ], /* size nz, values of A */
Int Q [ ], /* size n, optional column permutation */
double Lsize, /* initial size of L and U */
/* outputs, not defined on input */
Unit **p_LU, /* row indices and values of L and U */
Entry Udiag [ ], /* size n, diagonal of U */
Int Llen [ ], /* size n, column length of L */
Int Ulen [ ], /* size n, column length of U */
Int Lip [ ], /* size n+1, column pointers of L */
Int Uip [ ], /* size n+1, column pointers of U */
Int P [ ], /* row permutation, size n */
Int *lnz, /* size of L */
Int *unz, /* size of U */
/* workspace, undefined on input */
Entry *X, /* size n entries. Zero on output */
Int *Work, /* size 5n Int's */
/* inputs, not modified on output */
Int k1, /* the block of A is from k1 to k2-1 */
Int PSinv [ ], /* inverse of P from symbolic factorization */
double Rs [ ], /* scale factors for A */
/* inputs, modified on output */
Int Offp [ ], /* off-diagonal matrix (modified by this routine) */
Int Offi [ ],
Entry Offx [ ],
KLU_common *Common /* the control input/output structure */
) ;
void KLU_lsolve
(
/* inputs, not modified: */
Int n,
Int Lp [ ],
Int Li [ ],
Unit LU [ ],
Int nrhs,
/* right-hand-side on input, solution to Lx=b on output */
Entry X [ ]
) ;
void KLU_ltsolve
(
/* inputs, not modified: */
Int n,
Int Lp [ ],
Int Li [ ],
Unit LU [ ],
Int nrhs,
#ifdef COMPLEX
Int conj_solve,
#endif
/* right-hand-side on input, solution to L'x=b on output */
Entry X [ ]
) ;
void KLU_usolve
(
/* inputs, not modified: */
Int n,
Int Up [ ],
Int Ui [ ],
Unit LU [ ],
Entry Udiag [ ],
Int nrhs,
/* right-hand-side on input, solution to Ux=b on output */
Entry X [ ]
) ;
void KLU_utsolve
(
/* inputs, not modified: */
Int n,
Int Up [ ],
Int Ui [ ],
Unit LU [ ],
Entry Udiag [ ],
Int nrhs,
#ifdef COMPLEX
Int conj_solve,
#endif
/* right-hand-side on input, solution to U'x=b on output */
Entry X [ ]
) ;
Int KLU_valid
(
Int n,
Int Ap [ ],
Int Ai [ ],
Entry Ax [ ]
) ;
Int KLU_valid_LU
(
Int n,
Int flag_test_start_ptr,
Int Xip [ ],
Int Xlen [ ],
Unit LU [ ]
);
size_t KLU_add_size_t (size_t a, size_t b, Int *ok) ;
size_t KLU_mult_size_t (size_t a, size_t k, Int *ok) ;
KLU_symbolic *KLU_alloc_symbolic (Int n, Int *Ap, Int *Ai, KLU_common *Common) ;
#endif

1009
src/maths/KLU/klu_kernel.c Normal file

File diff suppressed because it is too large Load Diff

225
src/maths/KLU/klu_memory.c Normal file
View File

@ -0,0 +1,225 @@
/* ========================================================================== */
/* === KLU_memory =========================================================== */
/* ========================================================================== */
/* KLU memory management routines:
*
* KLU_malloc malloc wrapper
* KLU_free free wrapper
* KLU_realloc realloc wrapper
*/
#include "klu_internal.h"
/* ========================================================================== */
/* === KLU_add_size_t ======================================================= */
/* ========================================================================== */
/* Safely compute a+b, and check for size_t overflow */
size_t KLU_add_size_t (size_t a, size_t b, Int *ok)
{
(*ok) = (*ok) && ((a + b) >= MAX (a,b)) ;
return ((*ok) ? (a + b) : ((size_t) -1)) ;
}
/* ========================================================================== */
/* === KLU_mult_size_t ====================================================== */
/* ========================================================================== */
/* Safely compute a*k, where k should be small, and check for size_t overflow */
size_t KLU_mult_size_t (size_t a, size_t k, Int *ok)
{
size_t i, s = 0 ;
for (i = 0 ; i < k ; i++)
{
s = KLU_add_size_t (s, a, ok) ;
}
return ((*ok) ? s : ((size_t) -1)) ;
}
/* ========================================================================== */
/* === KLU_malloc =========================================================== */
/* ========================================================================== */
/* Wrapper around malloc routine (mxMalloc for a mexFunction). Allocates
* space of size MAX(1,n)*size, where size is normally a sizeof (...).
*
* This routine and KLU_realloc do not set Common->status to KLU_OK on success,
* so that a sequence of KLU_malloc's or KLU_realloc's can be used. If any of
* them fails, the Common->status will hold the most recent error status.
*
* Usage, for a pointer to Int:
*
* p = KLU_malloc (n, sizeof (Int), Common)
*
* Uses a pointer to the malloc routine (or its equivalent) defined in Common.
*/
void *KLU_malloc /* returns pointer to the newly malloc'd block */
(
/* ---- input ---- */
size_t n, /* number of items */
size_t size, /* size of each item */
/* --------------- */
KLU_common *Common
)
{
void *p ;
size_t s ;
Int ok = TRUE ;
if (Common == NULL)
{
p = NULL ;
}
else if (size == 0)
{
/* size must be > 0 */
Common->status = KLU_INVALID ;
p = NULL ;
}
else if (n >= INT_MAX)
{
/* object is too big to allocate; p[i] where i is an Int will not
* be enough. */
Common->status = KLU_TOO_LARGE ;
p = NULL ;
}
else
{
/* call malloc, or its equivalent */
s = KLU_mult_size_t (MAX (1,n), size, &ok) ;
p = ok ? ((Common->malloc_memory) (s)) : NULL ;
if (p == NULL)
{
/* failure: out of memory */
Common->status = KLU_OUT_OF_MEMORY ;
}
else
{
Common->memusage += s ;
Common->mempeak = MAX (Common->mempeak, Common->memusage) ;
}
}
return (p) ;
}
/* ========================================================================== */
/* === KLU_free ============================================================= */
/* ========================================================================== */
/* Wrapper around free routine (mxFree for a mexFunction). Returns NULL,
* which can be assigned to the pointer being freed, as in:
*
* p = KLU_free (p, n, sizeof (int), Common) ;
*/
void *KLU_free /* always returns NULL */
(
/* ---- in/out --- */
void *p, /* block of memory to free */
/* ---- input --- */
size_t n, /* size of block to free, in # of items */
size_t size, /* size of each item */
/* --------------- */
KLU_common *Common
)
{
size_t s ;
Int ok = TRUE ;
if (p != NULL && Common != NULL)
{
/* only free the object if the pointer is not NULL */
/* call free, or its equivalent */
(Common->free_memory) (p) ;
s = KLU_mult_size_t (MAX (1,n), size, &ok) ;
Common->memusage -= s ;
}
/* return NULL, and the caller should assign this to p. This avoids
* freeing the same pointer twice. */
return (NULL) ;
}
/* ========================================================================== */
/* === KLU_realloc ========================================================== */
/* ========================================================================== */
/* Wrapper around realloc routine (mxRealloc for a mexFunction). Given a
* pointer p to a block allocated by KLU_malloc, it changes the size of the
* block pointed to by p to be MAX(1,nnew)*size in size. It may return a
* pointer different than p. This should be used as (for a pointer to Int):
*
* p = KLU_realloc (nnew, nold, sizeof (Int), p, Common) ;
*
* If p is NULL, this is the same as p = KLU_malloc (...).
* A size of nnew=0 is treated as nnew=1.
*
* If the realloc fails, p is returned unchanged and Common->status is set
* to KLU_OUT_OF_MEMORY. If successful, Common->status is not modified,
* and p is returned (possibly changed) and pointing to a large block of memory.
*
* Uses a pointer to the realloc routine (or its equivalent) defined in Common.
*/
void *KLU_realloc /* returns pointer to reallocated block */
(
/* ---- input ---- */
size_t nnew, /* requested # of items in reallocated block */
size_t nold, /* old # of items */
size_t size, /* size of each item */
/* ---- in/out --- */
void *p, /* block of memory to realloc */
/* --------------- */
KLU_common *Common
)
{
void *pnew ;
size_t snew, sold ;
Int ok = TRUE ;
if (Common == NULL)
{
p = NULL ;
}
else if (size == 0)
{
/* size must be > 0 */
Common->status = KLU_INVALID ;
p = NULL ;
}
else if (p == NULL)
{
/* A fresh object is being allocated. */
p = KLU_malloc (nnew, size, Common) ;
}
else if (nnew >= INT_MAX)
{
/* failure: nnew is too big. Do not change p */
Common->status = KLU_TOO_LARGE ;
}
else
{
/* The object exists, and is changing to some other nonzero size. */
/* call realloc, or its equivalent */
snew = KLU_mult_size_t (MAX (1,nnew), size, &ok) ;
sold = KLU_mult_size_t (MAX (1,nold), size, &ok) ;
pnew = ok ? ((Common->realloc_memory) (p, snew)) : NULL ;
if (pnew == NULL)
{
/* Do not change p, since it still points to allocated memory */
Common->status = KLU_OUT_OF_MEMORY ;
}
else
{
/* success: return the new p and change the size of the block */
Common->memusage += (snew - sold) ;
Common->mempeak = MAX (Common->mempeak, Common->memusage) ;
p = pnew ;
}
}
return (p) ;
}

View File

@ -0,0 +1,478 @@
/* ========================================================================== */
/* === KLU_refactor ========================================================= */
/* ========================================================================== */
/* Factor the matrix, after ordering and analyzing it with KLU_analyze, and
* factoring it once with KLU_factor. This routine cannot do any numerical
* pivoting. The pattern of the input matrix (Ap, Ai) must be identical to
* the pattern given to KLU_factor.
*/
#include "klu_internal.h"
/* ========================================================================== */
/* === KLU_refactor ========================================================= */
/* ========================================================================== */
Int KLU_refactor /* returns TRUE if successful, FALSE otherwise */
(
/* inputs, not modified */
Int Ap [ ], /* size n+1, column pointers */
Int Ai [ ], /* size nz, row indices */
double Ax [ ],
KLU_symbolic *Symbolic,
/* input/output */
KLU_numeric *Numeric,
KLU_common *Common
)
{
Entry ukk, ujk, s ;
Entry *Offx, *Lx, *Ux, *X, *Az, *Udiag ;
double *Rs ;
Int *P, *Q, *R, *Pnum, *Offp, *Offi, *Ui, *Li, *Pinv, *Lip, *Uip, *Llen,
*Ulen ;
Unit **LUbx ;
Unit *LU ;
Int k1, k2, nk, k, block, oldcol, pend, oldrow, n, p, newrow, scale,
nblocks, poff, i, j, up, ulen, llen, maxblock, nzoff ;
/* ---------------------------------------------------------------------- */
/* check inputs */
/* ---------------------------------------------------------------------- */
if (Common == NULL)
{
return (FALSE) ;
}
Common->status = KLU_OK ;
if (Numeric == NULL)
{
/* invalid Numeric object */
Common->status = KLU_INVALID ;
return (FALSE) ;
}
Common->numerical_rank = EMPTY ;
Common->singular_col = EMPTY ;
Az = (Entry *) Ax ;
/* ---------------------------------------------------------------------- */
/* get the contents of the Symbolic object */
/* ---------------------------------------------------------------------- */
n = Symbolic->n ;
P = Symbolic->P ;
Q = Symbolic->Q ;
R = Symbolic->R ;
nblocks = Symbolic->nblocks ;
maxblock = Symbolic->maxblock ;
/* ---------------------------------------------------------------------- */
/* get the contents of the Numeric object */
/* ---------------------------------------------------------------------- */
Pnum = Numeric->Pnum ;
Offp = Numeric->Offp ;
Offi = Numeric->Offi ;
Offx = (Entry *) Numeric->Offx ;
LUbx = (Unit **) Numeric->LUbx ;
scale = Common->scale ;
if (scale > 0)
{
/* factorization was not scaled, but refactorization is scaled */
if (Numeric->Rs == NULL)
{
Numeric->Rs = KLU_malloc (n, sizeof (double), Common) ;
if (Common->status < KLU_OK)
{
Common->status = KLU_OUT_OF_MEMORY ;
return (FALSE) ;
}
}
}
else
{
/* no scaling for refactorization; ensure Numeric->Rs is freed. This
* does nothing if Numeric->Rs is already NULL. */
Numeric->Rs = KLU_free (Numeric->Rs, n, sizeof (double), Common) ;
}
Rs = Numeric->Rs ;
Pinv = Numeric->Pinv ;
X = (Entry *) Numeric->Xwork ;
Common->nrealloc = 0 ;
Udiag = Numeric->Udiag ;
nzoff = Symbolic->nzoff ;
/* ---------------------------------------------------------------------- */
/* check the input matrix compute the row scale factors, Rs */
/* ---------------------------------------------------------------------- */
/* do no scale, or check the input matrix, if scale < 0 */
if (scale >= 0)
{
/* check for out-of-range indices, but do not check for duplicates */
if (!KLU_scale (scale, n, Ap, Ai, Ax, Rs, NULL, Common))
{
return (FALSE) ;
}
}
/* ---------------------------------------------------------------------- */
/* clear workspace X */
/* ---------------------------------------------------------------------- */
for (k = 0 ; k < maxblock ; k++)
{
/* X [k] = 0 */
CLEAR (X [k]) ;
}
poff = 0 ;
/* ---------------------------------------------------------------------- */
/* factor each block */
/* ---------------------------------------------------------------------- */
if (scale <= 0)
{
/* ------------------------------------------------------------------ */
/* no scaling */
/* ------------------------------------------------------------------ */
for (block = 0 ; block < nblocks ; block++)
{
/* -------------------------------------------------------------- */
/* the block is from rows/columns k1 to k2-1 */
/* -------------------------------------------------------------- */
k1 = R [block] ;
k2 = R [block+1] ;
nk = k2 - k1 ;
if (nk == 1)
{
/* ---------------------------------------------------------- */
/* singleton case */
/* ---------------------------------------------------------- */
oldcol = Q [k1] ;
pend = Ap [oldcol+1] ;
CLEAR (s) ;
for (p = Ap [oldcol] ; p < pend ; p++)
{
newrow = Pinv [Ai [p]] - k1 ;
if (newrow < 0 && poff < nzoff)
{
/* entry in off-diagonal block */
Offx [poff] = Az [p] ;
poff++ ;
}
else
{
/* singleton */
s = Az [p] ;
}
}
Udiag [k1] = s ;
}
else
{
/* ---------------------------------------------------------- */
/* construct and factor the kth block */
/* ---------------------------------------------------------- */
Lip = Numeric->Lip + k1 ;
Llen = Numeric->Llen + k1 ;
Uip = Numeric->Uip + k1 ;
Ulen = Numeric->Ulen + k1 ;
LU = LUbx [block] ;
for (k = 0 ; k < nk ; k++)
{
/* ------------------------------------------------------ */
/* scatter kth column of the block into workspace X */
/* ------------------------------------------------------ */
oldcol = Q [k+k1] ;
pend = Ap [oldcol+1] ;
for (p = Ap [oldcol] ; p < pend ; p++)
{
newrow = Pinv [Ai [p]] - k1 ;
if (newrow < 0 && poff < nzoff)
{
/* entry in off-diagonal block */
Offx [poff] = Az [p] ;
poff++ ;
}
else
{
/* (newrow,k) is an entry in the block */
X [newrow] = Az [p] ;
}
}
/* ------------------------------------------------------ */
/* compute kth column of U, and update kth column of A */
/* ------------------------------------------------------ */
GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, ulen) ;
for (up = 0 ; up < ulen ; up++)
{
j = Ui [up] ;
ujk = X [j] ;
/* X [j] = 0 */
CLEAR (X [j]) ;
Ux [up] = ujk ;
GET_POINTER (LU, Lip, Llen, Li, Lx, j, llen) ;
for (p = 0 ; p < llen ; p++)
{
/* X [Li [p]] -= Lx [p] * ujk */
MULT_SUB (X [Li [p]], Lx [p], ujk) ;
}
}
/* get the diagonal entry of U */
ukk = X [k] ;
/* X [k] = 0 */
CLEAR (X [k]) ;
/* singular case */
if (IS_ZERO (ukk))
{
/* matrix is numerically singular */
Common->status = KLU_SINGULAR ;
if (Common->numerical_rank == EMPTY)
{
Common->numerical_rank = k+k1 ;
Common->singular_col = Q [k+k1] ;
}
if (Common->halt_if_singular)
{
/* do not continue the factorization */
return (FALSE) ;
}
}
Udiag [k+k1] = ukk ;
/* gather and divide by pivot to get kth column of L */
GET_POINTER (LU, Lip, Llen, Li, Lx, k, llen) ;
for (p = 0 ; p < llen ; p++)
{
i = Li [p] ;
DIV (Lx [p], X [i], ukk) ;
CLEAR (X [i]) ;
}
}
}
}
}
else
{
/* ------------------------------------------------------------------ */
/* scaling */
/* ------------------------------------------------------------------ */
for (block = 0 ; block < nblocks ; block++)
{
/* -------------------------------------------------------------- */
/* the block is from rows/columns k1 to k2-1 */
/* -------------------------------------------------------------- */
k1 = R [block] ;
k2 = R [block+1] ;
nk = k2 - k1 ;
if (nk == 1)
{
/* ---------------------------------------------------------- */
/* singleton case */
/* ---------------------------------------------------------- */
oldcol = Q [k1] ;
pend = Ap [oldcol+1] ;
CLEAR (s) ;
for (p = Ap [oldcol] ; p < pend ; p++)
{
oldrow = Ai [p] ;
newrow = Pinv [oldrow] - k1 ;
if (newrow < 0 && poff < nzoff)
{
/* entry in off-diagonal block */
/* Offx [poff] = Az [p] / Rs [oldrow] */
SCALE_DIV_ASSIGN (Offx [poff], Az [p], Rs [oldrow]) ;
poff++ ;
}
else
{
/* singleton */
/* s = Az [p] / Rs [oldrow] */
SCALE_DIV_ASSIGN (s, Az [p], Rs [oldrow]) ;
}
}
Udiag [k1] = s ;
}
else
{
/* ---------------------------------------------------------- */
/* construct and factor the kth block */
/* ---------------------------------------------------------- */
Lip = Numeric->Lip + k1 ;
Llen = Numeric->Llen + k1 ;
Uip = Numeric->Uip + k1 ;
Ulen = Numeric->Ulen + k1 ;
LU = LUbx [block] ;
for (k = 0 ; k < nk ; k++)
{
/* ------------------------------------------------------ */
/* scatter kth column of the block into workspace X */
/* ------------------------------------------------------ */
oldcol = Q [k+k1] ;
pend = Ap [oldcol+1] ;
for (p = Ap [oldcol] ; p < pend ; p++)
{
oldrow = Ai [p] ;
newrow = Pinv [oldrow] - k1 ;
if (newrow < 0 && poff < nzoff)
{
/* entry in off-diagonal part */
/* Offx [poff] = Az [p] / Rs [oldrow] */
SCALE_DIV_ASSIGN (Offx [poff], Az [p], Rs [oldrow]);
poff++ ;
}
else
{
/* (newrow,k) is an entry in the block */
/* X [newrow] = Az [p] / Rs [oldrow] */
SCALE_DIV_ASSIGN (X [newrow], Az [p], Rs [oldrow]) ;
}
}
/* ------------------------------------------------------ */
/* compute kth column of U, and update kth column of A */
/* ------------------------------------------------------ */
GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, ulen) ;
for (up = 0 ; up < ulen ; up++)
{
j = Ui [up] ;
ujk = X [j] ;
/* X [j] = 0 */
CLEAR (X [j]) ;
Ux [up] = ujk ;
GET_POINTER (LU, Lip, Llen, Li, Lx, j, llen) ;
for (p = 0 ; p < llen ; p++)
{
/* X [Li [p]] -= Lx [p] * ujk */
MULT_SUB (X [Li [p]], Lx [p], ujk) ;
}
}
/* get the diagonal entry of U */
ukk = X [k] ;
/* X [k] = 0 */
CLEAR (X [k]) ;
/* singular case */
if (IS_ZERO (ukk))
{
/* matrix is numerically singular */
Common->status = KLU_SINGULAR ;
if (Common->numerical_rank == EMPTY)
{
Common->numerical_rank = k+k1 ;
Common->singular_col = Q [k+k1] ;
}
if (Common->halt_if_singular)
{
/* do not continue the factorization */
return (FALSE) ;
}
}
Udiag [k+k1] = ukk ;
/* gather and divide by pivot to get kth column of L */
GET_POINTER (LU, Lip, Llen, Li, Lx, k, llen) ;
for (p = 0 ; p < llen ; p++)
{
i = Li [p] ;
DIV (Lx [p], X [i], ukk) ;
CLEAR (X [i]) ;
}
}
}
}
}
/* ---------------------------------------------------------------------- */
/* permute scale factors Rs according to pivotal row order */
/* ---------------------------------------------------------------------- */
if (scale > 0)
{
for (k = 0 ; k < n ; k++)
{
REAL (X [k]) = Rs [Pnum [k]] ;
}
for (k = 0 ; k < n ; k++)
{
Rs [k] = REAL (X [k]) ;
}
}
#ifndef NDEBUG
ASSERT (Offp [n] == poff) ;
ASSERT (Symbolic->nzoff == poff) ;
PRINTF (("\n------------------- Off diagonal entries, new:\n")) ;
ASSERT (KLU_valid (n, Offp, Offi, Offx)) ;
if (Common->status == KLU_OK)
{
PRINTF (("\n ########### KLU_BTF_REFACTOR done, nblocks %d\n",nblocks));
for (block = 0 ; block < nblocks ; block++)
{
k1 = R [block] ;
k2 = R [block+1] ;
nk = k2 - k1 ;
PRINTF ((
"\n================KLU_refactor output: k1 %d k2 %d nk %d\n",
k1, k2, nk)) ;
if (nk == 1)
{
PRINTF (("singleton ")) ;
PRINT_ENTRY (Udiag [k1]) ;
}
else
{
Lip = Numeric->Lip + k1 ;
Llen = Numeric->Llen + k1 ;
LU = (Unit *) Numeric->LUbx [block] ;
PRINTF (("\n---- L block %d\n", block)) ;
ASSERT (KLU_valid_LU (nk, TRUE, Lip, Llen, LU)) ;
Uip = Numeric->Uip + k1 ;
Ulen = Numeric->Ulen + k1 ;
PRINTF (("\n---- U block %d\n", block)) ;
ASSERT (KLU_valid_LU (nk, FALSE, Uip, Ulen, LU)) ;
}
}
}
#endif
return (TRUE) ;
}

159
src/maths/KLU/klu_scale.c Normal file
View File

@ -0,0 +1,159 @@
/* ========================================================================== */
/* === KLU_scale ============================================================ */
/* ========================================================================== */
/* Scale a matrix and check to see if it is valid. Can be called by the user.
* This is called by KLU_factor and KLU_refactor. Returns TRUE if the input
* matrix is valid, FALSE otherwise. If the W input argument is non-NULL,
* then the input matrix is checked for duplicate entries.
*
* scaling methods:
* <0: no scaling, do not compute Rs, and do not check input matrix.
* 0: no scaling
* 1: the scale factor for row i is sum (abs (A (i,:)))
* 2 or more: the scale factor for row i is max (abs (A (i,:)))
*/
#include "klu_internal.h"
Int KLU_scale /* return TRUE if successful, FALSE otherwise */
(
/* inputs, not modified */
Int scale, /* 0: none, 1: sum, 2: max */
Int n,
Int Ap [ ], /* size n+1, column pointers */
Int Ai [ ], /* size nz, row indices */
double Ax [ ],
/* outputs, not defined on input */
double Rs [ ], /* size n, can be NULL if scale <= 0 */
/* workspace, not defined on input or output */
Int W [ ], /* size n, can be NULL */
/* --------------- */
KLU_common *Common
)
{
double a ;
Entry *Az ;
Int row, col, p, pend, check_duplicates ;
/* ---------------------------------------------------------------------- */
/* check inputs */
/* ---------------------------------------------------------------------- */
if (Common == NULL)
{
return (FALSE) ;
}
Common->status = KLU_OK ;
if (scale < 0)
{
/* return without checking anything and without computing the
* scale factors */
return (TRUE) ;
}
Az = (Entry *) Ax ;
if (n <= 0 || Ap == NULL || Ai == NULL || Az == NULL ||
(scale > 0 && Rs == NULL))
{
/* Ap, Ai, Ax and Rs must be present, and n must be > 0 */
Common->status = KLU_INVALID ;
return (FALSE) ;
}
if (Ap [0] != 0 || Ap [n] < 0)
{
/* nz = Ap [n] must be >= 0 and Ap [0] must equal zero */
Common->status = KLU_INVALID ;
return (FALSE) ;
}
for (col = 0 ; col < n ; col++)
{
if (Ap [col] > Ap [col+1])
{
/* column pointers must be non-decreasing */
Common->status = KLU_INVALID ;
return (FALSE) ;
}
}
/* ---------------------------------------------------------------------- */
/* scale */
/* ---------------------------------------------------------------------- */
if (scale > 0)
{
/* initialize row sum or row max */
for (row = 0 ; row < n ; row++)
{
Rs [row] = 0 ;
}
}
/* check for duplicates only if W is present */
check_duplicates = (W != (Int *) NULL) ;
if (check_duplicates)
{
for (row = 0 ; row < n ; row++)
{
W [row] = EMPTY ;
}
}
for (col = 0 ; col < n ; col++)
{
pend = Ap [col+1] ;
for (p = Ap [col] ; p < pend ; p++)
{
row = Ai [p] ;
if (row < 0 || row >= n)
{
/* row index out of range, or duplicate entry */
Common->status = KLU_INVALID ;
return (FALSE) ;
}
if (check_duplicates)
{
if (W [row] == col)
{
/* duplicate entry */
Common->status = KLU_INVALID ;
return (FALSE) ;
}
/* flag row i as appearing in column col */
W [row] = col ;
}
/* a = ABS (Az [p]) ;*/
ABS (a, Az [p]) ;
if (scale == 1)
{
/* accumulate the abs. row sum */
Rs [row] += a ;
}
else if (scale > 1)
{
/* find the max abs. value in the row */
Rs [row] = MAX (Rs [row], a) ;
}
}
}
if (scale > 0)
{
/* do not scale empty rows */
for (row = 0 ; row < n ; row++)
{
/* matrix is singular */
PRINTF (("Rs [%d] = %g\n", row, Rs [row])) ;
if (Rs [row] == 0.0)
{
PRINTF (("Row %d of A is all zero\n", row)) ;
Rs [row] = 1.0 ;
}
}
}
return (TRUE) ;
}

396
src/maths/KLU/klu_solve.c Normal file
View File

@ -0,0 +1,396 @@
/* ========================================================================== */
/* === KLU_solve ============================================================ */
/* ========================================================================== */
/* Solve Ax=b using the symbolic and numeric objects from KLU_analyze
* (or KLU_analyze_given) and KLU_factor. Note that no iterative refinement is
* performed. Uses Numeric->Xwork as workspace (undefined on input and output),
* of size 4n Entry's (note that columns 2 to 4 of Xwork overlap with
* Numeric->Iwork).
*/
#include "klu_internal.h"
Int KLU_solve
(
/* inputs, not modified */
KLU_symbolic *Symbolic,
KLU_numeric *Numeric,
Int d, /* leading dimension of B */
Int nrhs, /* number of right-hand-sides */
/* right-hand-side on input, overwritten with solution to Ax=b on output */
double B [ ], /* size n*nrhs, in column-oriented form, with
* leading dimension d. */
/* --------------- */
KLU_common *Common
)
{
Entry x [4], offik, s ;
double rs, *Rs ;
Entry *Offx, *X, *Bz, *Udiag ;
Int *Q, *R, *Pnum, *Offp, *Offi, *Lip, *Uip, *Llen, *Ulen ;
Unit **LUbx ;
Int k1, k2, nk, k, block, pend, n, p, nblocks, chunk, nr, i ;
/* ---------------------------------------------------------------------- */
/* check inputs */
/* ---------------------------------------------------------------------- */
if (Common == NULL)
{
return (FALSE) ;
}
if (Numeric == NULL || Symbolic == NULL || d < Symbolic->n || nrhs < 0 ||
B == NULL)
{
Common->status = KLU_INVALID ;
return (FALSE) ;
}
Common->status = KLU_OK ;
/* ---------------------------------------------------------------------- */
/* get the contents of the Symbolic object */
/* ---------------------------------------------------------------------- */
Bz = (Entry *) B ;
n = Symbolic->n ;
nblocks = Symbolic->nblocks ;
Q = Symbolic->Q ;
R = Symbolic->R ;
/* ---------------------------------------------------------------------- */
/* get the contents of the Numeric object */
/* ---------------------------------------------------------------------- */
ASSERT (nblocks == Numeric->nblocks) ;
Pnum = Numeric->Pnum ;
Offp = Numeric->Offp ;
Offi = Numeric->Offi ;
Offx = (Entry *) Numeric->Offx ;
Lip = Numeric->Lip ;
Llen = Numeric->Llen ;
Uip = Numeric->Uip ;
Ulen = Numeric->Ulen ;
LUbx = (Unit **) Numeric->LUbx ;
Udiag = Numeric->Udiag ;
Rs = Numeric->Rs ;
X = (Entry *) Numeric->Xwork ;
ASSERT (KLU_valid (n, Offp, Offi, Offx)) ;
/* ---------------------------------------------------------------------- */
/* solve in chunks of 4 columns at a time */
/* ---------------------------------------------------------------------- */
for (chunk = 0 ; chunk < nrhs ; chunk += 4)
{
/* ------------------------------------------------------------------ */
/* get the size of the current chunk */
/* ------------------------------------------------------------------ */
nr = MIN (nrhs - chunk, 4) ;
/* ------------------------------------------------------------------ */
/* scale and permute the right hand side, X = P*(R\B) */
/* ------------------------------------------------------------------ */
if (Rs == NULL)
{
/* no scaling */
switch (nr)
{
case 1:
for (k = 0 ; k < n ; k++)
{
X [k] = Bz [Pnum [k]] ;
}
break ;
case 2:
for (k = 0 ; k < n ; k++)
{
i = Pnum [k] ;
X [2*k ] = Bz [i ] ;
X [2*k + 1] = Bz [i + d ] ;
}
break ;
case 3:
for (k = 0 ; k < n ; k++)
{
i = Pnum [k] ;
X [3*k ] = Bz [i ] ;
X [3*k + 1] = Bz [i + d ] ;
X [3*k + 2] = Bz [i + d*2] ;
}
break ;
case 4:
for (k = 0 ; k < n ; k++)
{
i = Pnum [k] ;
X [4*k ] = Bz [i ] ;
X [4*k + 1] = Bz [i + d ] ;
X [4*k + 2] = Bz [i + d*2] ;
X [4*k + 3] = Bz [i + d*3] ;
}
break ;
}
}
else
{
switch (nr)
{
case 1:
for (k = 0 ; k < n ; k++)
{
SCALE_DIV_ASSIGN (X [k], Bz [Pnum [k]], Rs [k]) ;
}
break ;
case 2:
for (k = 0 ; k < n ; k++)
{
i = Pnum [k] ;
rs = Rs [k] ;
SCALE_DIV_ASSIGN (X [2*k], Bz [i], rs) ;
SCALE_DIV_ASSIGN (X [2*k + 1], Bz [i + d], rs) ;
}
break ;
case 3:
for (k = 0 ; k < n ; k++)
{
i = Pnum [k] ;
rs = Rs [k] ;
SCALE_DIV_ASSIGN (X [3*k], Bz [i], rs) ;
SCALE_DIV_ASSIGN (X [3*k + 1], Bz [i + d], rs) ;
SCALE_DIV_ASSIGN (X [3*k + 2], Bz [i + d*2], rs) ;
}
break ;
case 4:
for (k = 0 ; k < n ; k++)
{
i = Pnum [k] ;
rs = Rs [k] ;
SCALE_DIV_ASSIGN (X [4*k], Bz [i], rs) ;
SCALE_DIV_ASSIGN (X [4*k + 1], Bz [i + d], rs) ;
SCALE_DIV_ASSIGN (X [4*k + 2], Bz [i + d*2], rs) ;
SCALE_DIV_ASSIGN (X [4*k + 3], Bz [i + d*3], rs) ;
}
break ;
}
}
/* ------------------------------------------------------------------ */
/* solve X = (L*U + Off)\X */
/* ------------------------------------------------------------------ */
for (block = nblocks-1 ; block >= 0 ; block--)
{
/* -------------------------------------------------------------- */
/* the block of size nk is from rows/columns k1 to k2-1 */
/* -------------------------------------------------------------- */
k1 = R [block] ;
k2 = R [block+1] ;
nk = k2 - k1 ;
PRINTF (("solve %d, k1 %d k2-1 %d nk %d\n", block, k1,k2-1,nk)) ;
/* solve the block system */
if (nk == 1)
{
s = Udiag [k1] ;
switch (nr)
{
case 1:
DIV (X [k1], X [k1], s) ;
break ;
case 2:
DIV (X [2*k1], X [2*k1], s) ;
DIV (X [2*k1 + 1], X [2*k1 + 1], s) ;
break ;
case 3:
DIV (X [3*k1], X [3*k1], s) ;
DIV (X [3*k1 + 1], X [3*k1 + 1], s) ;
DIV (X [3*k1 + 2], X [3*k1 + 2], s) ;
break ;
case 4:
DIV (X [4*k1], X [4*k1], s) ;
DIV (X [4*k1 + 1], X [4*k1 + 1], s) ;
DIV (X [4*k1 + 2], X [4*k1 + 2], s) ;
DIV (X [4*k1 + 3], X [4*k1 + 3], s) ;
break ;
}
}
else
{
KLU_lsolve (nk, Lip + k1, Llen + k1, LUbx [block], nr,
X + nr*k1) ;
KLU_usolve (nk, Uip + k1, Ulen + k1, LUbx [block],
Udiag + k1, nr, X + nr*k1) ;
}
/* -------------------------------------------------------------- */
/* block back-substitution for the off-diagonal-block entries */
/* -------------------------------------------------------------- */
if (block > 0)
{
switch (nr)
{
case 1:
for (k = k1 ; k < k2 ; k++)
{
pend = Offp [k+1] ;
x [0] = X [k] ;
for (p = Offp [k] ; p < pend ; p++)
{
MULT_SUB (X [Offi [p]], Offx [p], x [0]) ;
}
}
break ;
case 2:
for (k = k1 ; k < k2 ; k++)
{
pend = Offp [k+1] ;
x [0] = X [2*k ] ;
x [1] = X [2*k + 1] ;
for (p = Offp [k] ; p < pend ; p++)
{
i = Offi [p] ;
offik = Offx [p] ;
MULT_SUB (X [2*i], offik, x [0]) ;
MULT_SUB (X [2*i + 1], offik, x [1]) ;
}
}
break ;
case 3:
for (k = k1 ; k < k2 ; k++)
{
pend = Offp [k+1] ;
x [0] = X [3*k ] ;
x [1] = X [3*k + 1] ;
x [2] = X [3*k + 2] ;
for (p = Offp [k] ; p < pend ; p++)
{
i = Offi [p] ;
offik = Offx [p] ;
MULT_SUB (X [3*i], offik, x [0]) ;
MULT_SUB (X [3*i + 1], offik, x [1]) ;
MULT_SUB (X [3*i + 2], offik, x [2]) ;
}
}
break ;
case 4:
for (k = k1 ; k < k2 ; k++)
{
pend = Offp [k+1] ;
x [0] = X [4*k ] ;
x [1] = X [4*k + 1] ;
x [2] = X [4*k + 2] ;
x [3] = X [4*k + 3] ;
for (p = Offp [k] ; p < pend ; p++)
{
i = Offi [p] ;
offik = Offx [p] ;
MULT_SUB (X [4*i], offik, x [0]) ;
MULT_SUB (X [4*i + 1], offik, x [1]) ;
MULT_SUB (X [4*i + 2], offik, x [2]) ;
MULT_SUB (X [4*i + 3], offik, x [3]) ;
}
}
break ;
}
}
}
/* ------------------------------------------------------------------ */
/* permute the result, Bz = Q*X */
/* ------------------------------------------------------------------ */
switch (nr)
{
case 1:
for (k = 0 ; k < n ; k++)
{
Bz [Q [k]] = X [k] ;
}
break ;
case 2:
for (k = 0 ; k < n ; k++)
{
i = Q [k] ;
Bz [i ] = X [2*k ] ;
Bz [i + d ] = X [2*k + 1] ;
}
break ;
case 3:
for (k = 0 ; k < n ; k++)
{
i = Q [k] ;
Bz [i ] = X [3*k ] ;
Bz [i + d ] = X [3*k + 1] ;
Bz [i + d*2] = X [3*k + 2] ;
}
break ;
case 4:
for (k = 0 ; k < n ; k++)
{
i = Q [k] ;
Bz [i ] = X [4*k ] ;
Bz [i + d ] = X [4*k + 1] ;
Bz [i + d*2] = X [4*k + 2] ;
Bz [i + d*3] = X [4*k + 3] ;
}
break ;
}
/* ------------------------------------------------------------------ */
/* go to the next chunk of B */
/* ------------------------------------------------------------------ */
Bz += d*4 ;
}
return (TRUE) ;
}

156
src/maths/KLU/klu_sort.c Normal file
View File

@ -0,0 +1,156 @@
/* ========================================================================== */
/* === KLU_sort ============================================================= */
/* ========================================================================== */
/* sorts the columns of L and U so that the row indices appear in strictly
* increasing order.
*/
#include "klu_internal.h"
/* ========================================================================== */
/* === sort ================================================================= */
/* ========================================================================== */
/* Sort L or U using a double-transpose */
static void sort (Int n, Int *Xip, Int *Xlen, Unit *LU, Int *Tp, Int *Tj,
Entry *Tx, Int *W)
{
Int *Xi ;
Entry *Xx ;
Int p, i, j, len, nz, tp, xlen, pend ;
ASSERT (KLU_valid_LU (n, FALSE, Xip, Xlen, LU)) ;
/* count the number of entries in each row of L or U */
for (i = 0 ; i < n ; i++)
{
W [i] = 0 ;
}
for (j = 0 ; j < n ; j++)
{
GET_POINTER (LU, Xip, Xlen, Xi, Xx, j, len) ;
for (p = 0 ; p < len ; p++)
{
W [Xi [p]]++ ;
}
}
/* construct the row pointers for T */
nz = 0 ;
for (i = 0 ; i < n ; i++)
{
Tp [i] = nz ;
nz += W [i] ;
}
Tp [n] = nz ;
for (i = 0 ; i < n ; i++)
{
W [i] = Tp [i] ;
}
/* transpose the matrix into Tp, Ti, Tx */
for (j = 0 ; j < n ; j++)
{
GET_POINTER (LU, Xip, Xlen, Xi, Xx, j, len) ;
for (p = 0 ; p < len ; p++)
{
tp = W [Xi [p]]++ ;
Tj [tp] = j ;
Tx [tp] = Xx [p] ;
}
}
/* transpose the matrix back into Xip, Xlen, Xi, Xx */
for (j = 0 ; j < n ; j++)
{
W [j] = 0 ;
}
for (i = 0 ; i < n ; i++)
{
pend = Tp [i+1] ;
for (p = Tp [i] ; p < pend ; p++)
{
j = Tj [p] ;
GET_POINTER (LU, Xip, Xlen, Xi, Xx, j, len) ;
xlen = W [j]++ ;
Xi [xlen] = i ;
Xx [xlen] = Tx [p] ;
}
}
ASSERT (KLU_valid_LU (n, FALSE, Xip, Xlen, LU)) ;
}
/* ========================================================================== */
/* === KLU_sort ============================================================= */
/* ========================================================================== */
Int KLU_sort
(
KLU_symbolic *Symbolic,
KLU_numeric *Numeric,
KLU_common *Common
)
{
Int *R, *W, *Tp, *Ti, *Lip, *Uip, *Llen, *Ulen ;
Entry *Tx ;
Unit **LUbx ;
Int n, nk, nz, block, nblocks, maxblock, k1 ;
size_t m1 ;
if (Common == NULL)
{
return (FALSE) ;
}
Common->status = KLU_OK ;
n = Symbolic->n ;
R = Symbolic->R ;
nblocks = Symbolic->nblocks ;
maxblock = Symbolic->maxblock ;
Lip = Numeric->Lip ;
Llen = Numeric->Llen ;
Uip = Numeric->Uip ;
Ulen = Numeric->Ulen ;
LUbx = (Unit **) Numeric->LUbx ;
m1 = ((size_t) maxblock) + 1 ;
/* allocate workspace */
nz = MAX (Numeric->max_lnz_block, Numeric->max_unz_block) ;
W = KLU_malloc (maxblock, sizeof (Int), Common) ;
Tp = KLU_malloc (m1, sizeof (Int), Common) ;
Ti = KLU_malloc (nz, sizeof (Int), Common) ;
Tx = KLU_malloc (nz, sizeof (Entry), Common) ;
PRINTF (("\n======================= Start sort:\n")) ;
if (Common->status == KLU_OK)
{
/* sort each block of L and U */
for (block = 0 ; block < nblocks ; block++)
{
k1 = R [block] ;
nk = R [block+1] - k1 ;
if (nk > 1)
{
PRINTF (("\n-------------------block: %d nk %d\n", block, nk)) ;
sort (nk, Lip + k1, Llen + k1, LUbx [block], Tp, Ti, Tx, W) ;
sort (nk, Uip + k1, Ulen + k1, LUbx [block], Tp, Ti, Tx, W) ;
}
}
}
PRINTF (("\n======================= sort done.\n")) ;
/* free workspace */
KLU_free (W, maxblock, sizeof (Int), Common) ;
KLU_free (Tp, m1, sizeof (Int), Common) ;
KLU_free (Ti, nz, sizeof (Int), Common) ;
KLU_free (Tx, nz, sizeof (Entry), Common) ;
return (Common->status == KLU_OK) ;
}

465
src/maths/KLU/klu_tsolve.c Normal file
View File

@ -0,0 +1,465 @@
/* ========================================================================== */
/* === KLU_tsolve =========================================================== */
/* ========================================================================== */
/* Solve A'x=b using the symbolic and numeric objects from KLU_analyze
* (or KLU_analyze_given) and KLU_factor. Note that no iterative refinement is
* performed. Uses Numeric->Xwork as workspace (undefined on input and output),
* of size 4n Entry's (note that columns 2 to 4 of Xwork overlap with
* Numeric->Iwork).
*/
#include "klu_internal.h"
Int KLU_tsolve
(
/* inputs, not modified */
KLU_symbolic *Symbolic,
KLU_numeric *Numeric,
Int d, /* leading dimension of B */
Int nrhs, /* number of right-hand-sides */
/* right-hand-side on input, overwritten with solution to Ax=b on output */
double B [ ], /* size n*nrhs, in column-oriented form, with
* leading dimension d. */
#ifdef COMPLEX
Int conj_solve, /* TRUE for conjugate transpose solve, FALSE for
* array transpose solve. Used for the complex
* case only. */
#endif
/* --------------- */
KLU_common *Common
)
{
Entry x [4], offik, s ;
double rs, *Rs ;
Entry *Offx, *X, *Bz, *Udiag ;
Int *Q, *R, *Pnum, *Offp, *Offi, *Lip, *Uip, *Llen, *Ulen ;
Unit **LUbx ;
Int k1, k2, nk, k, block, pend, n, p, nblocks, chunk, nr, i ;
/* ---------------------------------------------------------------------- */
/* check inputs */
/* ---------------------------------------------------------------------- */
if (Common == NULL)
{
return (FALSE) ;
}
if (Numeric == NULL || Symbolic == NULL || d < Symbolic->n || nrhs < 0 ||
B == NULL)
{
Common->status = KLU_INVALID ;
return (FALSE) ;
}
Common->status = KLU_OK ;
/* ---------------------------------------------------------------------- */
/* get the contents of the Symbolic object */
/* ---------------------------------------------------------------------- */
Bz = (Entry *) B ;
n = Symbolic->n ;
nblocks = Symbolic->nblocks ;
Q = Symbolic->Q ;
R = Symbolic->R ;
/* ---------------------------------------------------------------------- */
/* get the contents of the Numeric object */
/* ---------------------------------------------------------------------- */
ASSERT (nblocks == Numeric->nblocks) ;
Pnum = Numeric->Pnum ;
Offp = Numeric->Offp ;
Offi = Numeric->Offi ;
Offx = (Entry *) Numeric->Offx ;
Lip = Numeric->Lip ;
Llen = Numeric->Llen ;
Uip = Numeric->Uip ;
Ulen = Numeric->Ulen ;
LUbx = (Unit **) Numeric->LUbx ;
Udiag = Numeric->Udiag ;
Rs = Numeric->Rs ;
X = (Entry *) Numeric->Xwork ;
ASSERT (KLU_valid (n, Offp, Offi, Offx)) ;
/* ---------------------------------------------------------------------- */
/* solve in chunks of 4 columns at a time */
/* ---------------------------------------------------------------------- */
for (chunk = 0 ; chunk < nrhs ; chunk += 4)
{
/* ------------------------------------------------------------------ */
/* get the size of the current chunk */
/* ------------------------------------------------------------------ */
nr = MIN (nrhs - chunk, 4) ;
/* ------------------------------------------------------------------ */
/* permute the right hand side, X = Q'*B */
/* ------------------------------------------------------------------ */
switch (nr)
{
case 1:
for (k = 0 ; k < n ; k++)
{
X [k] = Bz [Q [k]] ;
}
break ;
case 2:
for (k = 0 ; k < n ; k++)
{
i = Q [k] ;
X [2*k ] = Bz [i ] ;
X [2*k + 1] = Bz [i + d ] ;
}
break ;
case 3:
for (k = 0 ; k < n ; k++)
{
i = Q [k] ;
X [3*k ] = Bz [i ] ;
X [3*k + 1] = Bz [i + d ] ;
X [3*k + 2] = Bz [i + d*2] ;
}
break ;
case 4:
for (k = 0 ; k < n ; k++)
{
i = Q [k] ;
X [4*k ] = Bz [i ] ;
X [4*k + 1] = Bz [i + d ] ;
X [4*k + 2] = Bz [i + d*2] ;
X [4*k + 3] = Bz [i + d*3] ;
}
break ;
}
/* ------------------------------------------------------------------ */
/* solve X = (L*U + Off)'\X */
/* ------------------------------------------------------------------ */
for (block = 0 ; block < nblocks ; block++)
{
/* -------------------------------------------------------------- */
/* the block of size nk is from rows/columns k1 to k2-1 */
/* -------------------------------------------------------------- */
k1 = R [block] ;
k2 = R [block+1] ;
nk = k2 - k1 ;
PRINTF (("tsolve %d, k1 %d k2-1 %d nk %d\n", block, k1,k2-1,nk)) ;
/* -------------------------------------------------------------- */
/* block back-substitution for the off-diagonal-block entries */
/* -------------------------------------------------------------- */
if (block > 0)
{
switch (nr)
{
case 1:
for (k = k1 ; k < k2 ; k++)
{
pend = Offp [k+1] ;
for (p = Offp [k] ; p < pend ; p++)
{
#ifdef COMPLEX
if (conj_solve)
{
MULT_SUB_CONJ (X [k], X [Offi [p]],
Offx [p]) ;
}
else
#endif
{
MULT_SUB (X [k], Offx [p], X [Offi [p]]) ;
}
}
}
break ;
case 2:
for (k = k1 ; k < k2 ; k++)
{
pend = Offp [k+1] ;
x [0] = X [2*k ] ;
x [1] = X [2*k + 1] ;
for (p = Offp [k] ; p < pend ; p++)
{
i = Offi [p] ;
#ifdef COMPLEX
if (conj_solve)
{
CONJ (offik, Offx [p]) ;
}
else
#endif
{
offik = Offx [p] ;
}
MULT_SUB (x [0], offik, X [2*i]) ;
MULT_SUB (x [1], offik, X [2*i + 1]) ;
}
X [2*k ] = x [0] ;
X [2*k + 1] = x [1] ;
}
break ;
case 3:
for (k = k1 ; k < k2 ; k++)
{
pend = Offp [k+1] ;
x [0] = X [3*k ] ;
x [1] = X [3*k + 1] ;
x [2] = X [3*k + 2] ;
for (p = Offp [k] ; p < pend ; p++)
{
i = Offi [p] ;
#ifdef COMPLEX
if (conj_solve)
{
CONJ (offik, Offx [p]) ;
}
else
#endif
{
offik = Offx [p] ;
}
MULT_SUB (x [0], offik, X [3*i]) ;
MULT_SUB (x [1], offik, X [3*i + 1]) ;
MULT_SUB (x [2], offik, X [3*i + 2]) ;
}
X [3*k ] = x [0] ;
X [3*k + 1] = x [1] ;
X [3*k + 2] = x [2] ;
}
break ;
case 4:
for (k = k1 ; k < k2 ; k++)
{
pend = Offp [k+1] ;
x [0] = X [4*k ] ;
x [1] = X [4*k + 1] ;
x [2] = X [4*k + 2] ;
x [3] = X [4*k + 3] ;
for (p = Offp [k] ; p < pend ; p++)
{
i = Offi [p] ;
#ifdef COMPLEX
if (conj_solve)
{
CONJ(offik, Offx [p]) ;
}
else
#endif
{
offik = Offx [p] ;
}
MULT_SUB (x [0], offik, X [4*i]) ;
MULT_SUB (x [1], offik, X [4*i + 1]) ;
MULT_SUB (x [2], offik, X [4*i + 2]) ;
MULT_SUB (x [3], offik, X [4*i + 3]) ;
}
X [4*k ] = x [0] ;
X [4*k + 1] = x [1] ;
X [4*k + 2] = x [2] ;
X [4*k + 3] = x [3] ;
}
break ;
}
}
/* -------------------------------------------------------------- */
/* solve the block system */
/* -------------------------------------------------------------- */
if (nk == 1)
{
#ifdef COMPLEX
if (conj_solve)
{
CONJ (s, Udiag [k1]) ;
}
else
#endif
{
s = Udiag [k1] ;
}
switch (nr)
{
case 1:
DIV (X [k1], X [k1], s) ;
break ;
case 2:
DIV (X [2*k1], X [2*k1], s) ;
DIV (X [2*k1 + 1], X [2*k1 + 1], s) ;
break ;
case 3:
DIV (X [3*k1], X [3*k1], s) ;
DIV (X [3*k1 + 1], X [3*k1 + 1], s) ;
DIV (X [3*k1 + 2], X [3*k1 + 2], s) ;
break ;
case 4:
DIV (X [4*k1], X [4*k1], s) ;
DIV (X [4*k1 + 1], X [4*k1 + 1], s) ;
DIV (X [4*k1 + 2], X [4*k1 + 2], s) ;
DIV (X [4*k1 + 3], X [4*k1 + 3], s) ;
break ;
}
}
else
{
KLU_utsolve (nk, Uip + k1, Ulen + k1, LUbx [block],
Udiag + k1, nr,
#ifdef COMPLEX
conj_solve,
#endif
X + nr*k1) ;
KLU_ltsolve (nk, Lip + k1, Llen + k1, LUbx [block], nr,
#ifdef COMPLEX
conj_solve,
#endif
X + nr*k1) ;
}
}
/* ------------------------------------------------------------------ */
/* scale and permute the result, Bz = P'(R\X) */
/* ------------------------------------------------------------------ */
if (Rs == NULL)
{
/* no scaling */
switch (nr)
{
case 1:
for (k = 0 ; k < n ; k++)
{
Bz [Pnum [k]] = X [k] ;
}
break ;
case 2:
for (k = 0 ; k < n ; k++)
{
i = Pnum [k] ;
Bz [i ] = X [2*k ] ;
Bz [i + d ] = X [2*k + 1] ;
}
break ;
case 3:
for (k = 0 ; k < n ; k++)
{
i = Pnum [k] ;
Bz [i ] = X [3*k ] ;
Bz [i + d ] = X [3*k + 1] ;
Bz [i + d*2] = X [3*k + 2] ;
}
break ;
case 4:
for (k = 0 ; k < n ; k++)
{
i = Pnum [k] ;
Bz [i ] = X [4*k ] ;
Bz [i + d ] = X [4*k + 1] ;
Bz [i + d*2] = X [4*k + 2] ;
Bz [i + d*3] = X [4*k + 3] ;
}
break ;
}
}
else
{
switch (nr)
{
case 1:
for (k = 0 ; k < n ; k++)
{
SCALE_DIV_ASSIGN (Bz [Pnum [k]], X [k], Rs [k]) ;
}
break ;
case 2:
for (k = 0 ; k < n ; k++)
{
i = Pnum [k] ;
rs = Rs [k] ;
SCALE_DIV_ASSIGN (Bz [i], X [2*k], rs) ;
SCALE_DIV_ASSIGN (Bz [i + d], X [2*k + 1], rs) ;
}
break ;
case 3:
for (k = 0 ; k < n ; k++)
{
i = Pnum [k] ;
rs = Rs [k] ;
SCALE_DIV_ASSIGN (Bz [i], X [3*k], rs) ;
SCALE_DIV_ASSIGN (Bz [i + d], X [3*k + 1], rs) ;
SCALE_DIV_ASSIGN (Bz [i + d*2], X [3*k + 2], rs) ;
}
break ;
case 4:
for (k = 0 ; k < n ; k++)
{
i = Pnum [k] ;
rs = Rs [k] ;
SCALE_DIV_ASSIGN (Bz [i], X [4*k], rs) ;
SCALE_DIV_ASSIGN (Bz [i + d], X [4*k + 1], rs) ;
SCALE_DIV_ASSIGN (Bz [i + d*2], X [4*k + 2], rs) ;
SCALE_DIV_ASSIGN (Bz [i + d*3], X [4*k + 3], rs) ;
}
break ;
}
}
/* ------------------------------------------------------------------ */
/* go to the next chunk of B */
/* ------------------------------------------------------------------ */
Bz += d*4 ;
}
return (TRUE) ;
}

694
src/maths/KLU/klu_version.h Normal file
View File

@ -0,0 +1,694 @@
#ifndef _KLU_VERSION_H
#define _KLU_VERSION_H
#ifdef DLONG
#define Int UF_long
#define Int_id UF_long_id
#define Int_MAX UF_long_max
#else
#define Int int
#define Int_id "%d"
#define Int_MAX INT_MAX
#endif
#define NPRINT
#define BYTES(type,n) (sizeof (type) * (n))
#define CEILING(b,u) (((b)+(u)-1) / (u))
#define UNITS(type,n) (CEILING (BYTES (type,n), sizeof (Unit)))
#define DUNITS(type,n) (ceil (BYTES (type, (double) n) / sizeof (Unit)))
#define GET_I_POINTER(LU, Xip, Xi, k) \
{ \
Xi = (Int *) (LU + Xip [k]) ; \
}
#define GET_X_POINTER(LU, Xip, Xlen, Xx, k) \
{ \
Xx = (Entry *) (LU + Xip [k] + UNITS (Int, Xlen [k])) ; \
}
#define GET_POINTER(LU, Xip, Xlen, Xi, Xx, k, xlen) \
{ \
Unit *xp = LU + Xip [k] ; \
xlen = Xlen [k] ; \
Xi = (Int *) xp ; \
Xx = (Entry *) (xp + UNITS (Int, xlen)) ; \
}
/* function names */
#ifdef COMPLEX
#ifdef DLONG
#define KLU_scale klu_zl_scale
#define KLU_solve klu_zl_solve
#define KLU_tsolve klu_zl_tsolve
#define KLU_free_numeric klu_zl_free_numeric
#define KLU_factor klu_zl_factor
#define KLU_refactor klu_zl_refactor
#define KLU_kernel_factor klu_zl_kernel_factor
#define KLU_lsolve klu_zl_lsolve
#define KLU_ltsolve klu_zl_ltsolve
#define KLU_usolve klu_zl_usolve
#define KLU_utsolve klu_zl_utsolve
#define KLU_kernel klu_zl_kernel
#define KLU_valid klu_zl_valid
#define KLU_valid_LU klu_zl_valid_LU
#define KLU_sort klu_zl_sort
#define KLU_rgrowth klu_zl_rgrowth
#define KLU_rcond klu_zl_rcond
#define KLU_extract klu_zl_extract
#define KLU_condest klu_zl_condest
#define KLU_flops klu_zl_flops
#else
#define KLU_scale klu_z_scale
#define KLU_solve klu_z_solve
#define KLU_tsolve klu_z_tsolve
#define KLU_free_numeric klu_z_free_numeric
#define KLU_factor klu_z_factor
#define KLU_refactor klu_z_refactor
#define KLU_kernel_factor klu_z_kernel_factor
#define KLU_lsolve klu_z_lsolve
#define KLU_ltsolve klu_z_ltsolve
#define KLU_usolve klu_z_usolve
#define KLU_utsolve klu_z_utsolve
#define KLU_kernel klu_z_kernel
#define KLU_valid klu_z_valid
#define KLU_valid_LU klu_z_valid_LU
#define KLU_sort klu_z_sort
#define KLU_rgrowth klu_z_rgrowth
#define KLU_rcond klu_z_rcond
#define KLU_extract klu_z_extract
#define KLU_condest klu_z_condest
#define KLU_flops klu_z_flops
#endif
#else
#ifdef DLONG
#define KLU_scale klu_l_scale
#define KLU_solve klu_l_solve
#define KLU_tsolve klu_l_tsolve
#define KLU_free_numeric klu_l_free_numeric
#define KLU_factor klu_l_factor
#define KLU_refactor klu_l_refactor
#define KLU_kernel_factor klu_l_kernel_factor
#define KLU_lsolve klu_l_lsolve
#define KLU_ltsolve klu_l_ltsolve
#define KLU_usolve klu_l_usolve
#define KLU_utsolve klu_l_utsolve
#define KLU_kernel klu_l_kernel
#define KLU_valid klu_l_valid
#define KLU_valid_LU klu_l_valid_LU
#define KLU_sort klu_l_sort
#define KLU_rgrowth klu_l_rgrowth
#define KLU_rcond klu_l_rcond
#define KLU_extract klu_l_extract
#define KLU_condest klu_l_condest
#define KLU_flops klu_l_flops
#else
#define KLU_scale klu_scale
#define KLU_solve klu_solve
#define KLU_tsolve klu_tsolve
#define KLU_free_numeric klu_free_numeric
#define KLU_factor klu_factor
#define KLU_refactor klu_refactor
#define KLU_kernel_factor klu_kernel_factor
#define KLU_lsolve klu_lsolve
#define KLU_ltsolve klu_ltsolve
#define KLU_usolve klu_usolve
#define KLU_utsolve klu_utsolve
#define KLU_kernel klu_kernel
#define KLU_valid klu_valid
#define KLU_valid_LU klu_valid_LU
#define KLU_sort klu_sort
#define KLU_rgrowth klu_rgrowth
#define KLU_rcond klu_rcond
#define KLU_extract klu_extract
#define KLU_condest klu_condest
#define KLU_flops klu_flops
#endif
#endif
#ifdef DLONG
#define KLU_analyze klu_l_analyze
#define KLU_analyze_given klu_l_analyze_given
#define KLU_alloc_symbolic klu_l_alloc_symbolic
#define KLU_free_symbolic klu_l_free_symbolic
#define KLU_defaults klu_l_defaults
#define KLU_free klu_l_free
#define KLU_malloc klu_l_malloc
#define KLU_realloc klu_l_realloc
#define KLU_add_size_t klu_l_add_size_t
#define KLU_mult_size_t klu_l_mult_size_t
#define KLU_symbolic klu_l_symbolic
#define KLU_numeric klu_l_numeric
#define KLU_common klu_l_common
#define BTF_order btf_l_order
#define BTF_strongcomp btf_l_strongcomp
#define AMD_order amd_l_order
#define COLAMD colamd_l
#define COLAMD_recommended colamd_l_recommended
#else
#define KLU_analyze klu_analyze
#define KLU_analyze_given klu_analyze_given
#define KLU_alloc_symbolic klu_alloc_symbolic
#define KLU_free_symbolic klu_free_symbolic
#define KLU_defaults klu_defaults
#define KLU_free klu_free
#define KLU_malloc klu_malloc
#define KLU_realloc klu_realloc
#define KLU_add_size_t klu_add_size_t
#define KLU_mult_size_t klu_mult_size_t
#define KLU_symbolic klu_symbolic
#define KLU_numeric klu_numeric
#define KLU_common klu_common
#define BTF_order btf_order
#define BTF_strongcomp btf_strongcomp
#define AMD_order amd_order
#define COLAMD colamd
#define COLAMD_recommended colamd_recommended
#endif
/* -------------------------------------------------------------------------- */
/* Numerical relop macros for correctly handling the NaN case */
/* -------------------------------------------------------------------------- */
/*
SCALAR_IS_NAN(x):
True if x is NaN. False otherwise. The commonly-existing isnan(x)
function could be used, but it's not in Kernighan & Ritchie 2nd edition
(ANSI C). It may appear in <math.h>, but I'm not certain about
portability. The expression x != x is true if and only if x is NaN,
according to the IEEE 754 floating-point standard.
SCALAR_IS_ZERO(x):
True if x is zero. False if x is nonzero, NaN, or +/- Inf.
This is (x == 0) if the compiler is IEEE 754 compliant.
SCALAR_IS_NONZERO(x):
True if x is nonzero, NaN, or +/- Inf. False if x zero.
This is (x != 0) if the compiler is IEEE 754 compliant.
SCALAR_IS_LTZERO(x):
True if x is < zero or -Inf. False if x is >= 0, NaN, or +Inf.
This is (x < 0) if the compiler is IEEE 754 compliant.
*/
/* These all work properly, according to the IEEE 754 standard ... except on */
/* a PC with windows. Works fine in Linux on the same PC... */
#define SCALAR_IS_NAN(x) ((x) != (x))
#define SCALAR_IS_ZERO(x) ((x) == 0.)
#define SCALAR_IS_NONZERO(x) ((x) != 0.)
#define SCALAR_IS_LTZERO(x) ((x) < 0.)
/* scalar absolute value macro. If x is NaN, the result is NaN: */
#define SCALAR_ABS(x) ((SCALAR_IS_LTZERO (x)) ? -(x) : (x))
/* print a scalar (avoid printing "-0" for negative zero). */
#ifdef NPRINT
#define PRINT_SCALAR(a)
#else
#define PRINT_SCALAR(a) \
{ \
if (SCALAR_IS_NONZERO (a)) \
{ \
PRINTF ((" (%g)", (a))) ; \
} \
else \
{ \
PRINTF ((" (0)")) ; \
} \
}
#endif
/* -------------------------------------------------------------------------- */
/* Real floating-point arithmetic */
/* -------------------------------------------------------------------------- */
#ifndef COMPLEX
typedef double Unit ;
#define Entry double
#define SPLIT(s) (1)
#define REAL(c) (c)
#define IMAG(c) (0.)
#define ASSIGN(c,s1,s2,p,split) { (c) = (s1)[p] ; }
#define CLEAR(c) { (c) = 0. ; }
#define CLEAR_AND_INCREMENT(p) { *p++ = 0. ; }
#define IS_NAN(a) SCALAR_IS_NAN (a)
#define IS_ZERO(a) SCALAR_IS_ZERO (a)
#define IS_NONZERO(a) SCALAR_IS_NONZERO (a)
#define SCALE_DIV(c,s) { (c) /= (s) ; }
#define SCALE_DIV_ASSIGN(a,c,s) { a = c / s ; }
#define SCALE(c,s) { (c) *= (s) ; }
#define ASSEMBLE(c,a) { (c) += (a) ; }
#define ASSEMBLE_AND_INCREMENT(c,p) { (c) += *p++ ; }
#define DECREMENT(c,a) { (c) -= (a) ; }
#define MULT(c,a,b) { (c) = (a) * (b) ; }
#define MULT_CONJ(c,a,b) { (c) = (a) * (b) ; }
#define MULT_SUB(c,a,b) { (c) -= (a) * (b) ; }
#define MULT_SUB_CONJ(c,a,b) { (c) -= (a) * (b) ; }
#define DIV(c,a,b) { (c) = (a) / (b) ; }
#define RECIPROCAL(c) { (c) = 1.0 / (c) ; }
#define DIV_CONJ(c,a,b) { (c) = (a) / (b) ; }
#define APPROX_ABS(s,a) { (s) = SCALAR_ABS (a) ; }
#define ABS(s,a) { (s) = SCALAR_ABS (a) ; }
#define PRINT_ENTRY(a) PRINT_SCALAR (a)
#define CONJ(a,x) a = x
/* for flop counts */
#define MULTSUB_FLOPS 2. /* c -= a*b */
#define DIV_FLOPS 1. /* c = a/b */
#define ABS_FLOPS 0. /* c = abs (a) */
#define ASSEMBLE_FLOPS 1. /* c += a */
#define DECREMENT_FLOPS 1. /* c -= a */
#define MULT_FLOPS 1. /* c = a*b */
#define SCALE_FLOPS 1. /* c = a/s */
#else
/* -------------------------------------------------------------------------- */
/* Complex floating-point arithmetic */
/* -------------------------------------------------------------------------- */
/*
Note: An alternative to this Double_Complex type would be to use a
struct { double r ; double i ; }. The problem with that method
(used by the Sun Performance Library, for example) is that ANSI C provides
no guarantee about the layout of a struct. It is possible that the sizeof
the struct above would be greater than 2 * sizeof (double). This would
mean that the complex BLAS could not be used. The method used here avoids
that possibility. ANSI C *does* guarantee that an array of structs has
the same size as n times the size of one struct.
The ANSI C99 version of the C language includes a "double _Complex" type.
It should be possible in that case to do the following:
#define Entry double _Complex
and remove the Double_Complex struct. The macros, below, could then be
replaced with instrinsic operators. Note that the #define Real and
#define Imag should also be removed (they only appear in this file).
For the MULT, MULT_SUB, MULT_SUB_CONJ, and MULT_CONJ macros,
the output argument c cannot be the same as any input argument.
*/
typedef struct
{
double component [2] ; /* real and imaginary parts */
} Double_Complex ;
typedef Double_Complex Unit ;
#define Entry Double_Complex
#define Real component [0]
#define Imag component [1]
/* for flop counts */
#define MULTSUB_FLOPS 8. /* c -= a*b */
#define DIV_FLOPS 9. /* c = a/b */
#define ABS_FLOPS 6. /* c = abs (a), count sqrt as one flop */
#define ASSEMBLE_FLOPS 2. /* c += a */
#define DECREMENT_FLOPS 2. /* c -= a */
#define MULT_FLOPS 6. /* c = a*b */
#define SCALE_FLOPS 2. /* c = a/s or c = a*s */
/* -------------------------------------------------------------------------- */
/* real part of c */
#define REAL(c) ((c).Real)
/* -------------------------------------------------------------------------- */
/* imag part of c */
#define IMAG(c) ((c).Imag)
/* -------------------------------------------------------------------------- */
/* Return TRUE if a complex number is in split form, FALSE if in packed form */
#define SPLIT(sz) ((sz) != (double *) NULL)
/* c = (s1) + (s2)*i, if s2 is null, then X is in "packed" format (compatible
* with Entry and ANSI C99 double _Complex type). */
/*#define ASSIGN(c,s1,s2,p,split) \
{ \
if (split) \
{ \
(c).Real = (s1)[p] ; \
(c).Imag = (s2)[p] ; \
} \
else \
{ \
(c) = ((Entry *)(s1))[p] ; \
} \
}*/
/* -------------------------------------------------------------------------- */
#define CONJ(a, x) \
{ \
a.Real = x.Real ; \
a.Imag = -x.Imag ; \
}
/* c = 0 */
#define CLEAR(c) \
{ \
(c).Real = 0. ; \
(c).Imag = 0. ; \
}
/* -------------------------------------------------------------------------- */
/* *p++ = 0 */
#define CLEAR_AND_INCREMENT(p) \
{ \
p->Real = 0. ; \
p->Imag = 0. ; \
p++ ; \
}
/* -------------------------------------------------------------------------- */
/* True if a == 0 */
#define IS_ZERO(a) \
(SCALAR_IS_ZERO ((a).Real) && SCALAR_IS_ZERO ((a).Imag))
/* -------------------------------------------------------------------------- */
/* True if a is NaN */
#define IS_NAN(a) \
(SCALAR_IS_NAN ((a).Real) || SCALAR_IS_NAN ((a).Imag))
/* -------------------------------------------------------------------------- */
/* True if a != 0 */
#define IS_NONZERO(a) \
(SCALAR_IS_NONZERO ((a).Real) || SCALAR_IS_NONZERO ((a).Imag))
/* -------------------------------------------------------------------------- */
/* a = c/s */
#define SCALE_DIV_ASSIGN(a,c,s) \
{ \
a.Real = c.Real / s ; \
a.Imag = c.Imag / s ; \
}
/* c /= s */
#define SCALE_DIV(c,s) \
{ \
(c).Real /= (s) ; \
(c).Imag /= (s) ; \
}
/* -------------------------------------------------------------------------- */
/* c *= s */
#define SCALE(c,s) \
{ \
(c).Real *= (s) ; \
(c).Imag *= (s) ; \
}
/* -------------------------------------------------------------------------- */
/* c += a */
#define ASSEMBLE(c,a) \
{ \
(c).Real += (a).Real ; \
(c).Imag += (a).Imag ; \
}
/* -------------------------------------------------------------------------- */
/* c += *p++ */
#define ASSEMBLE_AND_INCREMENT(c,p) \
{ \
(c).Real += p->Real ; \
(c).Imag += p->Imag ; \
p++ ; \
}
/* -------------------------------------------------------------------------- */
/* c -= a */
#define DECREMENT(c,a) \
{ \
(c).Real -= (a).Real ; \
(c).Imag -= (a).Imag ; \
}
/* -------------------------------------------------------------------------- */
/* c = a*b, assert because c cannot be the same as a or b */
#define MULT(c,a,b) \
{ \
ASSERT (&(c) != &(a) && &(c) != &(b)) ; \
(c).Real = (a).Real * (b).Real - (a).Imag * (b).Imag ; \
(c).Imag = (a).Imag * (b).Real + (a).Real * (b).Imag ; \
}
/* -------------------------------------------------------------------------- */
/* c = a*conjugate(b), assert because c cannot be the same as a or b */
#define MULT_CONJ(c,a,b) \
{ \
ASSERT (&(c) != &(a) && &(c) != &(b)) ; \
(c).Real = (a).Real * (b).Real + (a).Imag * (b).Imag ; \
(c).Imag = (a).Imag * (b).Real - (a).Real * (b).Imag ; \
}
/* -------------------------------------------------------------------------- */
/* c -= a*b, assert because c cannot be the same as a or b */
#define MULT_SUB(c,a,b) \
{ \
ASSERT (&(c) != &(a) && &(c) != &(b)) ; \
(c).Real -= (a).Real * (b).Real - (a).Imag * (b).Imag ; \
(c).Imag -= (a).Imag * (b).Real + (a).Real * (b).Imag ; \
}
/* -------------------------------------------------------------------------- */
/* c -= a*conjugate(b), assert because c cannot be the same as a or b */
#define MULT_SUB_CONJ(c,a,b) \
{ \
ASSERT (&(c) != &(a) && &(c) != &(b)) ; \
(c).Real -= (a).Real * (b).Real + (a).Imag * (b).Imag ; \
(c).Imag -= (a).Imag * (b).Real - (a).Real * (b).Imag ; \
}
/* -------------------------------------------------------------------------- */
/* c = a/b, be careful to avoid underflow and overflow */
#ifdef MATHWORKS
#define DIV(c,a,b) \
{ \
(void) utDivideComplex ((a).Real, (a).Imag, (b).Real, (b).Imag, \
&((c).Real), &((c).Imag)) ; \
}
#else
/* This uses ACM Algo 116, by R. L. Smith, 1962. */
/* c can be the same variable as a or b. */
/* Ignore NaN case for double relop br>=bi. */
#define DIV(c,a,b) \
{ \
double r, den, ar, ai, br, bi ; \
br = (b).Real ; \
bi = (b).Imag ; \
ar = (a).Real ; \
ai = (a).Imag ; \
if (SCALAR_ABS (br) >= SCALAR_ABS (bi)) \
{ \
r = bi / br ; \
den = br + r * bi ; \
(c).Real = (ar + ai * r) / den ; \
(c).Imag = (ai - ar * r) / den ; \
} \
else \
{ \
r = br / bi ; \
den = r * br + bi ; \
(c).Real = (ar * r + ai) / den ; \
(c).Imag = (ai * r - ar) / den ; \
} \
}
#endif
/* -------------------------------------------------------------------------- */
/* c = 1/c, be careful to avoid underflow and overflow */
/* Not used if MATHWORKS is defined. */
/* This uses ACM Algo 116, by R. L. Smith, 1962. */
/* Ignore NaN case for double relop cr>=ci. */
#define RECIPROCAL(c) \
{ \
double r, den, cr, ci ; \
cr = (c).Real ; \
ci = (c).Imag ; \
if (SCALAR_ABS (cr) >= SCALAR_ABS (ci)) \
{ \
r = ci / cr ; \
den = cr + r * ci ; \
(c).Real = 1.0 / den ; \
(c).Imag = - r / den ; \
} \
else \
{ \
r = cr / ci ; \
den = r * cr + ci ; \
(c).Real = r / den ; \
(c).Imag = - 1.0 / den ; \
} \
}
/* -------------------------------------------------------------------------- */
/* c = a/conjugate(b), be careful to avoid underflow and overflow */
#ifdef MATHWORKS
#define DIV_CONJ(c,a,b) \
{ \
(void) utDivideComplex ((a).Real, (a).Imag, (b).Real, (-(b).Imag), \
&((c).Real), &((c).Imag)) ; \
}
#else
/* This uses ACM Algo 116, by R. L. Smith, 1962. */
/* c can be the same variable as a or b. */
/* Ignore NaN case for double relop br>=bi. */
#define DIV_CONJ(c,a,b) \
{ \
double r, den, ar, ai, br, bi ; \
br = (b).Real ; \
bi = (b).Imag ; \
ar = (a).Real ; \
ai = (a).Imag ; \
if (SCALAR_ABS (br) >= SCALAR_ABS (bi)) \
{ \
r = (-bi) / br ; \
den = br - r * bi ; \
(c).Real = (ar + ai * r) / den ; \
(c).Imag = (ai - ar * r) / den ; \
} \
else \
{ \
r = br / (-bi) ; \
den = r * br - bi; \
(c).Real = (ar * r + ai) / den ; \
(c).Imag = (ai * r - ar) / den ; \
} \
}
#endif
/* -------------------------------------------------------------------------- */
/* approximate absolute value, s = |r|+|i| */
#define APPROX_ABS(s,a) \
{ \
(s) = SCALAR_ABS ((a).Real) + SCALAR_ABS ((a).Imag) ; \
}
/* -------------------------------------------------------------------------- */
/* exact absolute value, s = sqrt (a.real^2 + amag^2) */
#ifdef MATHWORKS
#define ABS(s,a) \
{ \
(s) = utFdlibm_hypot ((a).Real, (a).Imag) ; \
}
#else
/* Ignore NaN case for the double relops ar>=ai and ar+ai==ar. */
#define ABS(s,a) \
{ \
double r, ar, ai ; \
ar = SCALAR_ABS ((a).Real) ; \
ai = SCALAR_ABS ((a).Imag) ; \
if (ar >= ai) \
{ \
if (ar + ai == ar) \
{ \
(s) = ar ; \
} \
else \
{ \
r = ai / ar ; \
(s) = ar * sqrt (1.0 + r*r) ; \
} \
} \
else \
{ \
if (ai + ar == ai) \
{ \
(s) = ai ; \
} \
else \
{ \
r = ar / ai ; \
(s) = ai * sqrt (1.0 + r*r) ; \
} \
} \
}
#endif
/* -------------------------------------------------------------------------- */
/* print an entry (avoid printing "-0" for negative zero). */
#ifdef NPRINT
#define PRINT_ENTRY(a)
#else
#define PRINT_ENTRY(a) \
{ \
if (SCALAR_IS_NONZERO ((a).Real)) \
{ \
PRINTF ((" (%g", (a).Real)) ; \
} \
else \
{ \
PRINTF ((" (0")) ; \
} \
if (SCALAR_IS_LTZERO ((a).Imag)) \
{ \
PRINTF ((" - %gi)", -(a).Imag)) ; \
} \
else if (SCALAR_IS_ZERO ((a).Imag)) \
{ \
PRINTF ((" + 0i)")) ; \
} \
else \
{ \
PRINTF ((" + %gi)", (a).Imag)) ; \
} \
}
#endif
/* -------------------------------------------------------------------------- */
#endif /* #ifndef COMPLEX */
#endif