diff --git a/src/maths/KLU/UFconfig.h b/src/maths/KLU/UFconfig.h new file mode 100644 index 000000000..4b36a9e31 --- /dev/null +++ b/src/maths/KLU/UFconfig.h @@ -0,0 +1,151 @@ +/* ========================================================================== */ +/* === UFconfig.h =========================================================== */ +/* ========================================================================== */ + +/* Configuration file for SuiteSparse: a Suite of Sparse matrix packages + * (AMD, COLAMD, CCOLAMD, CAMD, CHOLMOD, UMFPACK, CXSparse, and others). + * + * UFconfig.h provides the definition of the long integer. On most systems, + * a C program can be compiled in LP64 mode, in which long's and pointers are + * both 64-bits, and int's are 32-bits. Windows 64, however, uses the LLP64 + * model, in which int's and long's are 32-bits, and long long's and pointers + * are 64-bits. + * + * SuiteSparse packages that include long integer versions are + * intended for the LP64 mode. However, as a workaround for Windows 64 + * (and perhaps other systems), the long integer can be redefined. + * + * If _WIN64 is defined, then the __int64 type is used instead of long. + * + * The long integer can also be defined at compile time. For example, this + * could be added to UFconfig.mk: + * + * CFLAGS = -O -D'UF_long=long long' -D'UF_long_max=9223372036854775801' \ + * -D'UF_long_idd="lld"' + * + * This file defines UF_long as either long (on all but _WIN64) or + * __int64 on Windows 64. The intent is that a UF_long is always a 64-bit + * integer in a 64-bit code. ptrdiff_t might be a better choice than long; + * it is always the same size as a pointer. + * + * This file also defines the SUITESPARSE_VERSION and related definitions. + * + * Copyright (c) 2007, University of Florida. No licensing restrictions + * apply to this file or to the UFconfig directory. Author: Timothy A. Davis. + */ + +#ifndef _UFCONFIG_H +#define _UFCONFIG_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +/* ========================================================================== */ +/* === UF_long ============================================================== */ +/* ========================================================================== */ + +#ifndef UF_long + +#ifdef _WIN64 + +#define UF_long __int64 +#define UF_long_max _I64_MAX +#define UF_long_idd "I64d" + +#else + +#define UF_long long +#define UF_long_max LONG_MAX +#define UF_long_idd "ld" + +#endif +#define UF_long_id "%" UF_long_idd +#endif + +/* ========================================================================== */ +/* === UFconfig parameters and functions ==================================== */ +/* ========================================================================== */ + +/* SuiteSparse-wide parameters will be placed in this struct. So far, they + are only used by RBio. */ + +typedef struct UFconfig_struct +{ + void *(*malloc_memory) (size_t) ; /* pointer to malloc */ + void *(*realloc_memory) (void *, size_t) ; /* pointer to realloc */ + void (*free_memory) (void *) ; /* pointer to free */ + void *(*calloc_memory) (size_t, size_t) ; /* pointer to calloc */ + +} UFconfig ; + +void *UFmalloc /* pointer to allocated block of memory */ +( + size_t nitems, /* number of items to malloc (>=1 is enforced) */ + size_t size_of_item, /* sizeof each item */ + int *ok, /* TRUE if successful, FALSE otherwise */ + UFconfig *config /* SuiteSparse-wide configuration */ +) ; + +void *UFfree /* always returns NULL */ +( + void *p, /* block to free */ + UFconfig *config /* SuiteSparse-wide configuration */ +) ; + + +/* ========================================================================== */ +/* === SuiteSparse version ================================================== */ +/* ========================================================================== */ + +/* SuiteSparse is not a package itself, but a collection of packages, some of + * which must be used together (UMFPACK requires AMD, CHOLMOD requires AMD, + * COLAMD, CAMD, and CCOLAMD, etc). A version number is provided here for the + * collection itself. The versions of packages within each version of + * SuiteSparse are meant to work together. Combining one packge from one + * version of SuiteSparse, with another package from another version of + * SuiteSparse, may or may not work. + * + * SuiteSparse Version 3.7.0 contains the following packages: + * + * UFconfig version 3.7.0 (version always the same as SuiteSparse) + * AMD version 2.2.3 + * CAMD version 2.2.3 + * CCOLAMD version 2.7.4 + * COLAMD version 2.7.4 + * BTF version 1.1.3 + * CHOLMOD version 1.7.4 + * CSparse3 version 3.0.2 + * CSparse version 2.2.6 + * CXSparse version 2.2.6 + * KLU version 1.1.3 + * LDL version 2.0.4 + * RBio version 2.0.2 + * SPQR version 1.2.3 (also called SuiteSparseQR) + * UFcollection version 1.6.0 + * UMFPACK version 5.5.2 + * SSMULT version 2.0.3 + * spqr_rank version 1.0.0 + * MATLAB_Tools various packages & M-files. No specific version number. + * + * Other package dependencies: + * BLAS required by CHOLMOD and UMFPACK + * LAPACK required by CHOLMOD + * METIS 4.0.1 required by CHOLMOD (optional) and KLU (optional) + */ + +#define SUITESPARSE_DATE "Dec 15, 2011" +#define SUITESPARSE_VER_CODE(main,sub) ((main) * 1000 + (sub)) +#define SUITESPARSE_MAIN_VERSION 3 +#define SUITESPARSE_SUB_VERSION 7 +#define SUITESPARSE_SUBSUB_VERSION 0 +#define SUITESPARSE_VERSION \ + SUITESPARSE_VER_CODE(SUITESPARSE_MAIN_VERSION,SUITESPARSE_SUB_VERSION) + +#ifdef __cplusplus +} +#endif +#endif diff --git a/src/maths/KLU/amd.h b/src/maths/KLU/amd.h new file mode 100644 index 000000000..2fcaef117 --- /dev/null +++ b/src/maths/KLU/amd.h @@ -0,0 +1,412 @@ +/* ========================================================================= */ +/* === AMD: approximate minimum degree ordering =========================== */ +/* ========================================================================= */ + +/* ------------------------------------------------------------------------- */ +/* AMD Version 2.2, Copyright (c) 2007 by Timothy A. Davis, */ +/* Patrick R. Amestoy, and Iain S. Duff. See ../README.txt for License. */ +/* email: davis at cise.ufl.edu CISE Department, Univ. of Florida. */ +/* web: http://www.cise.ufl.edu/research/sparse/amd */ +/* ------------------------------------------------------------------------- */ + +/* AMD finds a symmetric ordering P of a matrix A so that the Cholesky + * factorization of P*A*P' has fewer nonzeros and takes less work than the + * Cholesky factorization of A. If A is not symmetric, then it performs its + * ordering on the matrix A+A'. Two sets of user-callable routines are + * provided, one for int integers and the other for UF_long integers. + * + * The method is based on the approximate minimum degree algorithm, discussed + * in Amestoy, Davis, and Duff, "An approximate degree ordering algorithm", + * SIAM Journal of Matrix Analysis and Applications, vol. 17, no. 4, pp. + * 886-905, 1996. This package can perform both the AMD ordering (with + * aggressive absorption), and the AMDBAR ordering (without aggressive + * absorption) discussed in the above paper. This package differs from the + * Fortran codes discussed in the paper: + * + * (1) it can ignore "dense" rows and columns, leading to faster run times + * (2) it computes the ordering of A+A' if A is not symmetric + * (3) it is followed by a depth-first post-ordering of the assembly tree + * (or supernodal elimination tree) + * + * For historical reasons, the Fortran versions, amd.f and amdbar.f, have + * been left (nearly) unchanged. They compute the identical ordering as + * described in the above paper. + */ + +#ifndef AMD_H +#define AMD_H + +/* make it easy for C++ programs to include AMD */ +#ifdef __cplusplus +extern "C" { +#endif + +/* get the definition of size_t: */ +#include + +/* define UF_long */ +#include "UFconfig.h" + +int amd_order /* returns AMD_OK, AMD_OK_BUT_JUMBLED, + * AMD_INVALID, or AMD_OUT_OF_MEMORY */ +( + int n, /* A is n-by-n. n must be >= 0. */ + const int Ap [ ], /* column pointers for A, of size n+1 */ + const int Ai [ ], /* row indices of A, of size nz = Ap [n] */ + int P [ ], /* output permutation, of size n */ + double Control [ ], /* input Control settings, of size AMD_CONTROL */ + double Info [ ] /* output Info statistics, of size AMD_INFO */ +) ; + +UF_long amd_l_order /* see above for description of arguments */ +( + UF_long n, + const UF_long Ap [ ], + const UF_long Ai [ ], + UF_long P [ ], + double Control [ ], + double Info [ ] +) ; + +/* Input arguments (not modified): + * + * n: the matrix A is n-by-n. + * Ap: an int/UF_long array of size n+1, containing column pointers of A. + * Ai: an int/UF_long array of size nz, containing the row indices of A, + * where nz = Ap [n]. + * Control: a double array of size AMD_CONTROL, containing control + * parameters. Defaults are used if Control is NULL. + * + * Output arguments (not defined on input): + * + * P: an int/UF_long array of size n, containing the output permutation. If + * row i is the kth pivot row, then P [k] = i. In MATLAB notation, + * the reordered matrix is A (P,P). + * Info: a double array of size AMD_INFO, containing statistical + * information. Ignored if Info is NULL. + * + * On input, the matrix A is stored in column-oriented form. The row indices + * of nonzero entries in column j are stored in Ai [Ap [j] ... Ap [j+1]-1]. + * + * If the row indices appear in ascending order in each column, and there + * are no duplicate entries, then amd_order is slightly more efficient in + * terms of time and memory usage. If this condition does not hold, a copy + * of the matrix is created (where these conditions do hold), and the copy is + * ordered. This feature is new to v2.0 (v1.2 and earlier required this + * condition to hold for the input matrix). + * + * Row indices must be in the range 0 to + * n-1. Ap [0] must be zero, and thus nz = Ap [n] is the number of nonzeros + * in A. The array Ap is of size n+1, and the array Ai is of size nz = Ap [n]. + * The matrix does not need to be symmetric, and the diagonal does not need to + * be present (if diagonal entries are present, they are ignored except for + * the output statistic Info [AMD_NZDIAG]). The arrays Ai and Ap are not + * modified. This form of the Ap and Ai arrays to represent the nonzero + * pattern of the matrix A is the same as that used internally by MATLAB. + * If you wish to use a more flexible input structure, please see the + * umfpack_*_triplet_to_col routines in the UMFPACK package, at + * http://www.cise.ufl.edu/research/sparse/umfpack. + * + * Restrictions: n >= 0. Ap [0] = 0. Ap [j] <= Ap [j+1] for all j in the + * range 0 to n-1. nz = Ap [n] >= 0. Ai [0..nz-1] must be in the range 0 + * to n-1. Finally, Ai, Ap, and P must not be NULL. If any of these + * restrictions are not met, AMD returns AMD_INVALID. + * + * AMD returns: + * + * AMD_OK if the matrix is valid and sufficient memory can be allocated to + * perform the ordering. + * + * AMD_OUT_OF_MEMORY if not enough memory can be allocated. + * + * AMD_INVALID if the input arguments n, Ap, Ai are invalid, or if P is + * NULL. + * + * AMD_OK_BUT_JUMBLED if the matrix had unsorted columns, and/or duplicate + * entries, but was otherwise valid. + * + * The AMD routine first forms the pattern of the matrix A+A', and then + * computes a fill-reducing ordering, P. If P [k] = i, then row/column i of + * the original is the kth pivotal row. In MATLAB notation, the permuted + * matrix is A (P,P), except that 0-based indexing is used instead of the + * 1-based indexing in MATLAB. + * + * The Control array is used to set various parameters for AMD. If a NULL + * pointer is passed, default values are used. The Control array is not + * modified. + * + * Control [AMD_DENSE]: controls the threshold for "dense" rows/columns. + * A dense row/column in A+A' can cause AMD to spend a lot of time in + * ordering the matrix. If Control [AMD_DENSE] >= 0, rows/columns + * with more than Control [AMD_DENSE] * sqrt (n) entries are ignored + * during the ordering, and placed last in the output order. The + * default value of Control [AMD_DENSE] is 10. If negative, no + * rows/columns are treated as "dense". Rows/columns with 16 or + * fewer off-diagonal entries are never considered "dense". + * + * Control [AMD_AGGRESSIVE]: controls whether or not to use aggressive + * absorption, in which a prior element is absorbed into the current + * element if is a subset of the current element, even if it is not + * adjacent to the current pivot element (refer to Amestoy, Davis, + * & Duff, 1996, for more details). The default value is nonzero, + * which means to perform aggressive absorption. This nearly always + * leads to a better ordering (because the approximate degrees are + * more accurate) and a lower execution time. There are cases where + * it can lead to a slightly worse ordering, however. To turn it off, + * set Control [AMD_AGGRESSIVE] to 0. + * + * Control [2..4] are not used in the current version, but may be used in + * future versions. + * + * The Info array provides statistics about the ordering on output. If it is + * not present, the statistics are not returned. This is not an error + * condition. + * + * Info [AMD_STATUS]: the return value of AMD, either AMD_OK, + * AMD_OK_BUT_JUMBLED, AMD_OUT_OF_MEMORY, or AMD_INVALID. + * + * Info [AMD_N]: n, the size of the input matrix + * + * Info [AMD_NZ]: the number of nonzeros in A, nz = Ap [n] + * + * Info [AMD_SYMMETRY]: the symmetry of the matrix A. It is the number + * of "matched" off-diagonal entries divided by the total number of + * off-diagonal entries. An entry A(i,j) is matched if A(j,i) is also + * an entry, for any pair (i,j) for which i != j. In MATLAB notation, + * S = spones (A) ; + * B = tril (S, -1) + triu (S, 1) ; + * symmetry = nnz (B & B') / nnz (B) ; + * + * Info [AMD_NZDIAG]: the number of entries on the diagonal of A. + * + * Info [AMD_NZ_A_PLUS_AT]: the number of nonzeros in A+A', excluding the + * diagonal. If A is perfectly symmetric (Info [AMD_SYMMETRY] = 1) + * with a fully nonzero diagonal, then Info [AMD_NZ_A_PLUS_AT] = nz-n + * (the smallest possible value). If A is perfectly unsymmetric + * (Info [AMD_SYMMETRY] = 0, for an upper triangular matrix, for + * example) with no diagonal, then Info [AMD_NZ_A_PLUS_AT] = 2*nz + * (the largest possible value). + * + * Info [AMD_NDENSE]: the number of "dense" rows/columns of A+A' that were + * removed from A prior to ordering. These are placed last in the + * output order P. + * + * Info [AMD_MEMORY]: the amount of memory used by AMD, in bytes. In the + * current version, this is 1.2 * Info [AMD_NZ_A_PLUS_AT] + 9*n + * times the size of an integer. This is at most 2.4nz + 9n. This + * excludes the size of the input arguments Ai, Ap, and P, which have + * a total size of nz + 2*n + 1 integers. + * + * Info [AMD_NCMPA]: the number of garbage collections performed. + * + * Info [AMD_LNZ]: the number of nonzeros in L (excluding the diagonal). + * This is a slight upper bound because mass elimination is combined + * with the approximate degree update. It is a rough upper bound if + * there are many "dense" rows/columns. The rest of the statistics, + * below, are also slight or rough upper bounds, for the same reasons. + * The post-ordering of the assembly tree might also not exactly + * correspond to a true elimination tree postordering. + * + * Info [AMD_NDIV]: the number of divide operations for a subsequent LDL' + * or LU factorization of the permuted matrix A (P,P). + * + * Info [AMD_NMULTSUBS_LDL]: the number of multiply-subtract pairs for a + * subsequent LDL' factorization of A (P,P). + * + * Info [AMD_NMULTSUBS_LU]: the number of multiply-subtract pairs for a + * subsequent LU factorization of A (P,P), assuming that no numerical + * pivoting is required. + * + * Info [AMD_DMAX]: the maximum number of nonzeros in any column of L, + * including the diagonal. + * + * Info [14..19] are not used in the current version, but may be used in + * future versions. + */ + +/* ------------------------------------------------------------------------- */ +/* direct interface to AMD */ +/* ------------------------------------------------------------------------- */ + +/* amd_2 is the primary AMD ordering routine. It is not meant to be + * user-callable because of its restrictive inputs and because it destroys + * the user's input matrix. It does not check its inputs for errors, either. + * However, if you can work with these restrictions it can be faster than + * amd_order and use less memory (assuming that you can create your own copy + * of the matrix for AMD to destroy). Refer to AMD/Source/amd_2.c for a + * description of each parameter. */ + +void amd_2 +( + int n, + int Pe [ ], + int Iw [ ], + int Len [ ], + int iwlen, + int pfree, + int Nv [ ], + int Next [ ], + int Last [ ], + int Head [ ], + int Elen [ ], + int Degree [ ], + int W [ ], + double Control [ ], + double Info [ ] +) ; + +void amd_l2 +( + UF_long n, + UF_long Pe [ ], + UF_long Iw [ ], + UF_long Len [ ], + UF_long iwlen, + UF_long pfree, + UF_long Nv [ ], + UF_long Next [ ], + UF_long Last [ ], + UF_long Head [ ], + UF_long Elen [ ], + UF_long Degree [ ], + UF_long W [ ], + double Control [ ], + double Info [ ] +) ; + +/* ------------------------------------------------------------------------- */ +/* amd_valid */ +/* ------------------------------------------------------------------------- */ + +/* Returns AMD_OK or AMD_OK_BUT_JUMBLED if the matrix is valid as input to + * amd_order; the latter is returned if the matrix has unsorted and/or + * duplicate row indices in one or more columns. Returns AMD_INVALID if the + * matrix cannot be passed to amd_order. For amd_order, the matrix must also + * be square. The first two arguments are the number of rows and the number + * of columns of the matrix. For its use in AMD, these must both equal n. + * + * NOTE: this routine returned TRUE/FALSE in v1.2 and earlier. + */ + +int amd_valid +( + int n_row, /* # of rows */ + int n_col, /* # of columns */ + const int Ap [ ], /* column pointers, of size n_col+1 */ + const int Ai [ ] /* row indices, of size Ap [n_col] */ +) ; + +UF_long amd_l_valid +( + UF_long n_row, + UF_long n_col, + const UF_long Ap [ ], + const UF_long Ai [ ] +) ; + +/* ------------------------------------------------------------------------- */ +/* AMD memory manager and printf routines */ +/* ------------------------------------------------------------------------- */ + +/* The user can redefine these to change the malloc, free, and printf routines + * that AMD uses. */ + +#ifndef EXTERN +#define EXTERN extern +#endif + +EXTERN void *(*amd_malloc) (size_t) ; /* pointer to malloc */ +EXTERN void (*amd_free) (void *) ; /* pointer to free */ +EXTERN void *(*amd_realloc) (void *, size_t) ; /* pointer to realloc */ +EXTERN void *(*amd_calloc) (size_t, size_t) ; /* pointer to calloc */ +EXTERN int (*amd_printf) (const char *, ...) ; /* pointer to printf */ + +/* ------------------------------------------------------------------------- */ +/* AMD Control and Info arrays */ +/* ------------------------------------------------------------------------- */ + +/* amd_defaults: sets the default control settings */ +void amd_defaults (double Control [ ]) ; +void amd_l_defaults (double Control [ ]) ; + +/* amd_control: prints the control settings */ +void amd_control (double Control [ ]) ; +void amd_l_control (double Control [ ]) ; + +/* amd_info: prints the statistics */ +void amd_info (double Info [ ]) ; +void amd_l_info (double Info [ ]) ; + +#define AMD_CONTROL 5 /* size of Control array */ +#define AMD_INFO 20 /* size of Info array */ + +/* contents of Control */ +#define AMD_DENSE 0 /* "dense" if degree > Control [0] * sqrt (n) */ +#define AMD_AGGRESSIVE 1 /* do aggressive absorption if Control [1] != 0 */ + +/* default Control settings */ +#define AMD_DEFAULT_DENSE 10.0 /* default "dense" degree 10*sqrt(n) */ +#define AMD_DEFAULT_AGGRESSIVE 1 /* do aggressive absorption by default */ + +/* contents of Info */ +#define AMD_STATUS 0 /* return value of amd_order and amd_l_order */ +#define AMD_N 1 /* A is n-by-n */ +#define AMD_NZ 2 /* number of nonzeros in A */ +#define AMD_SYMMETRY 3 /* symmetry of pattern (1 is sym., 0 is unsym.) */ +#define AMD_NZDIAG 4 /* # of entries on diagonal */ +#define AMD_NZ_A_PLUS_AT 5 /* nz in A+A' */ +#define AMD_NDENSE 6 /* number of "dense" rows/columns in A */ +#define AMD_MEMORY 7 /* amount of memory used by AMD */ +#define AMD_NCMPA 8 /* number of garbage collections in AMD */ +#define AMD_LNZ 9 /* approx. nz in L, excluding the diagonal */ +#define AMD_NDIV 10 /* number of fl. point divides for LU and LDL' */ +#define AMD_NMULTSUBS_LDL 11 /* number of fl. point (*,-) pairs for LDL' */ +#define AMD_NMULTSUBS_LU 12 /* number of fl. point (*,-) pairs for LU */ +#define AMD_DMAX 13 /* max nz. in any column of L, incl. diagonal */ + +/* ------------------------------------------------------------------------- */ +/* return values of AMD */ +/* ------------------------------------------------------------------------- */ + +#define AMD_OK 0 /* success */ +#define AMD_OUT_OF_MEMORY -1 /* malloc failed, or problem too large */ +#define AMD_INVALID -2 /* input arguments are not valid */ +#define AMD_OK_BUT_JUMBLED 1 /* input matrix is OK for amd_order, but + * columns were not sorted, and/or duplicate entries were present. AMD had + * to do extra work before ordering the matrix. This is a warning, not an + * error. */ + +/* ========================================================================== */ +/* === AMD version ========================================================== */ +/* ========================================================================== */ + +/* AMD Version 1.2 and later include the following definitions. + * As an example, to test if the version you are using is 1.2 or later: + * + * #ifdef AMD_VERSION + * if (AMD_VERSION >= AMD_VERSION_CODE (1,2)) ... + * #endif + * + * This also works during compile-time: + * + * #if defined(AMD_VERSION) && (AMD_VERSION >= AMD_VERSION_CODE (1,2)) + * printf ("This is version 1.2 or later\n") ; + * #else + * printf ("This is an early version\n") ; + * #endif + * + * Versions 1.1 and earlier of AMD do not include a #define'd version number. + */ + +#define AMD_DATE "Dec 7, 2011" +#define AMD_VERSION_CODE(main,sub) ((main) * 1000 + (sub)) +#define AMD_MAIN_VERSION 2 +#define AMD_SUB_VERSION 2 +#define AMD_SUBSUB_VERSION 3 +#define AMD_VERSION AMD_VERSION_CODE(AMD_MAIN_VERSION,AMD_SUB_VERSION) + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/maths/KLU/amd_1.c b/src/maths/KLU/amd_1.c new file mode 100644 index 000000000..30cb27724 --- /dev/null +++ b/src/maths/KLU/amd_1.c @@ -0,0 +1,181 @@ +/* ========================================================================= */ +/* === AMD_1 =============================================================== */ +/* ========================================================================= */ + +/* ------------------------------------------------------------------------- */ +/* AMD, Copyright (c) Timothy A. Davis, */ +/* Patrick R. Amestoy, and Iain S. Duff. See ../README.txt for License. */ +/* email: davis at cise.ufl.edu CISE Department, Univ. of Florida. */ +/* web: http://www.cise.ufl.edu/research/sparse/amd */ +/* ------------------------------------------------------------------------- */ + +/* AMD_1: Construct A+A' for a sparse matrix A and perform the AMD ordering. + * + * The n-by-n sparse matrix A can be unsymmetric. It is stored in MATLAB-style + * compressed-column form, with sorted row indices in each column, and no + * duplicate entries. Diagonal entries may be present, but they are ignored. + * Row indices of column j of A are stored in Ai [Ap [j] ... Ap [j+1]-1]. + * Ap [0] must be zero, and nz = Ap [n] is the number of entries in A. The + * size of the matrix, n, must be greater than or equal to zero. + * + * This routine must be preceded by a call to AMD_aat, which computes the + * number of entries in each row/column in A+A', excluding the diagonal. + * Len [j], on input, is the number of entries in row/column j of A+A'. This + * routine constructs the matrix A+A' and then calls AMD_2. No error checking + * is performed (this was done in AMD_valid). + */ + +#include "amd_internal.h" + +GLOBAL void AMD_1 +( + Int n, /* n > 0 */ + const Int Ap [ ], /* input of size n+1, not modified */ + const Int Ai [ ], /* input of size nz = Ap [n], not modified */ + Int P [ ], /* size n output permutation */ + Int Pinv [ ], /* size n output inverse permutation */ + Int Len [ ], /* size n input, undefined on output */ + Int slen, /* slen >= sum (Len [0..n-1]) + 7n, + * ideally slen = 1.2 * sum (Len) + 8n */ + Int S [ ], /* size slen workspace */ + double Control [ ], /* input array of size AMD_CONTROL */ + double Info [ ] /* output array of size AMD_INFO */ +) +{ + Int i, j, k, p, pfree, iwlen, pj, p1, p2, pj2, *Iw, *Pe, *Nv, *Head, + *Elen, *Degree, *s, *W, *Sp, *Tp ; + + /* --------------------------------------------------------------------- */ + /* construct the matrix for AMD_2 */ + /* --------------------------------------------------------------------- */ + + ASSERT (n > 0) ; + + iwlen = slen - 6*n ; + s = S ; + Pe = s ; s += n ; + Nv = s ; s += n ; + Head = s ; s += n ; + Elen = s ; s += n ; + Degree = s ; s += n ; + W = s ; s += n ; + Iw = s ; s += iwlen ; + + ASSERT (AMD_valid (n, n, Ap, Ai) == AMD_OK) ; + + /* construct the pointers for A+A' */ + Sp = Nv ; /* use Nv and W as workspace for Sp and Tp [ */ + Tp = W ; + pfree = 0 ; + for (j = 0 ; j < n ; j++) + { + Pe [j] = pfree ; + Sp [j] = pfree ; + pfree += Len [j] ; + } + + /* Note that this restriction on iwlen is slightly more restrictive than + * what is strictly required in AMD_2. AMD_2 can operate with no elbow + * room at all, but it will be very slow. For better performance, at + * least size-n elbow room is enforced. */ + ASSERT (iwlen >= pfree + n) ; + +#ifndef NDEBUG + for (p = 0 ; p < iwlen ; p++) Iw [p] = EMPTY ; +#endif + + for (k = 0 ; k < n ; k++) + { + AMD_DEBUG1 (("Construct row/column k= "ID" of A+A'\n", k)) ; + p1 = Ap [k] ; + p2 = Ap [k+1] ; + + /* construct A+A' */ + for (p = p1 ; p < p2 ; ) + { + /* scan the upper triangular part of A */ + j = Ai [p] ; + ASSERT (j >= 0 && j < n) ; + if (j < k) + { + /* entry A (j,k) in the strictly upper triangular part */ + ASSERT (Sp [j] < (j == n-1 ? pfree : Pe [j+1])) ; + ASSERT (Sp [k] < (k == n-1 ? pfree : Pe [k+1])) ; + Iw [Sp [j]++] = k ; + Iw [Sp [k]++] = j ; + p++ ; + } + else if (j == k) + { + /* skip the diagonal */ + p++ ; + break ; + } + else /* j > k */ + { + /* first entry below the diagonal */ + break ; + } + /* scan lower triangular part of A, in column j until reaching + * row k. Start where last scan left off. */ + ASSERT (Ap [j] <= Tp [j] && Tp [j] <= Ap [j+1]) ; + pj2 = Ap [j+1] ; + for (pj = Tp [j] ; pj < pj2 ; ) + { + i = Ai [pj] ; + ASSERT (i >= 0 && i < n) ; + if (i < k) + { + /* A (i,j) is only in the lower part, not in upper */ + ASSERT (Sp [i] < (i == n-1 ? pfree : Pe [i+1])) ; + ASSERT (Sp [j] < (j == n-1 ? pfree : Pe [j+1])) ; + Iw [Sp [i]++] = j ; + Iw [Sp [j]++] = i ; + pj++ ; + } + else if (i == k) + { + /* entry A (k,j) in lower part and A (j,k) in upper */ + pj++ ; + break ; + } + else /* i > k */ + { + /* consider this entry later, when k advances to i */ + break ; + } + } + Tp [j] = pj ; + } + Tp [k] = p ; + } + + /* clean up, for remaining mismatched entries */ + for (j = 0 ; j < n ; j++) + { + for (pj = Tp [j] ; pj < Ap [j+1] ; pj++) + { + i = Ai [pj] ; + ASSERT (i >= 0 && i < n) ; + /* A (i,j) is only in the lower part, not in upper */ + ASSERT (Sp [i] < (i == n-1 ? pfree : Pe [i+1])) ; + ASSERT (Sp [j] < (j == n-1 ? pfree : Pe [j+1])) ; + Iw [Sp [i]++] = j ; + Iw [Sp [j]++] = i ; + } + } + +#ifndef NDEBUG + for (j = 0 ; j < n-1 ; j++) ASSERT (Sp [j] == Pe [j+1]) ; + ASSERT (Sp [n-1] == pfree) ; +#endif + + /* Tp and Sp no longer needed ] */ + + /* --------------------------------------------------------------------- */ + /* order the matrix */ + /* --------------------------------------------------------------------- */ + + AMD_2 (n, Pe, Iw, Len, iwlen, pfree, + Nv, Pinv, P, Head, Elen, Degree, W, Control, Info) ; +} diff --git a/src/maths/KLU/amd_2.c b/src/maths/KLU/amd_2.c new file mode 100644 index 000000000..97b4f7a36 --- /dev/null +++ b/src/maths/KLU/amd_2.c @@ -0,0 +1,1842 @@ +/* ========================================================================= */ +/* === AMD_2 =============================================================== */ +/* ========================================================================= */ + +/* ------------------------------------------------------------------------- */ +/* AMD, Copyright (c) Timothy A. Davis, */ +/* Patrick R. Amestoy, and Iain S. Duff. See ../README.txt for License. */ +/* email: davis at cise.ufl.edu CISE Department, Univ. of Florida. */ +/* web: http://www.cise.ufl.edu/research/sparse/amd */ +/* ------------------------------------------------------------------------- */ + +/* AMD_2: performs the AMD ordering on a symmetric sparse matrix A, followed + * by a postordering (via depth-first search) of the assembly tree using the + * AMD_postorder routine. + */ + +#include "amd_internal.h" + +/* ========================================================================= */ +/* === clear_flag ========================================================== */ +/* ========================================================================= */ + +static Int clear_flag (Int wflg, Int wbig, Int W [ ], Int n) +{ + Int x ; + if (wflg < 2 || wflg >= wbig) + { + for (x = 0 ; x < n ; x++) + { + if (W [x] != 0) W [x] = 1 ; + } + wflg = 2 ; + } + /* at this point, W [0..n-1] < wflg holds */ + return (wflg) ; +} + + +/* ========================================================================= */ +/* === AMD_2 =============================================================== */ +/* ========================================================================= */ + +GLOBAL void AMD_2 +( + Int n, /* A is n-by-n, where n > 0 */ + Int Pe [ ], /* Pe [0..n-1]: index in Iw of row i on input */ + Int Iw [ ], /* workspace of size iwlen. Iw [0..pfree-1] + * holds the matrix on input */ + Int Len [ ], /* Len [0..n-1]: length for row/column i on input */ + Int iwlen, /* length of Iw. iwlen >= pfree + n */ + Int pfree, /* Iw [pfree ... iwlen-1] is empty on input */ + + /* 7 size-n workspaces, not defined on input: */ + Int Nv [ ], /* the size of each supernode on output */ + Int Next [ ], /* the output inverse permutation */ + Int Last [ ], /* the output permutation */ + Int Head [ ], + Int Elen [ ], /* the size columns of L for each supernode */ + Int Degree [ ], + Int W [ ], + + /* control parameters and output statistics */ + double Control [ ], /* array of size AMD_CONTROL */ + double Info [ ] /* array of size AMD_INFO */ +) +{ + +/* + * Given a representation of the nonzero pattern of a symmetric matrix, A, + * (excluding the diagonal) perform an approximate minimum (UMFPACK/MA38-style) + * degree ordering to compute a pivot order such that the introduction of + * nonzeros (fill-in) in the Cholesky factors A = LL' is kept low. At each + * step, the pivot selected is the one with the minimum UMFAPACK/MA38-style + * upper-bound on the external degree. This routine can optionally perform + * aggresive absorption (as done by MC47B in the Harwell Subroutine + * Library). + * + * The approximate degree algorithm implemented here is the symmetric analog of + * the degree update algorithm in MA38 and UMFPACK (the Unsymmetric-pattern + * MultiFrontal PACKage, both by Davis and Duff). The routine is based on the + * MA27 minimum degree ordering algorithm by Iain Duff and John Reid. + * + * This routine is a translation of the original AMDBAR and MC47B routines, + * in Fortran, with the following modifications: + * + * (1) dense rows/columns are removed prior to ordering the matrix, and placed + * last in the output order. The presence of a dense row/column can + * increase the ordering time by up to O(n^2), unless they are removed + * prior to ordering. + * + * (2) the minimum degree ordering is followed by a postordering (depth-first + * search) of the assembly tree. Note that mass elimination (discussed + * below) combined with the approximate degree update can lead to the mass + * elimination of nodes with lower exact degree than the current pivot + * element. No additional fill-in is caused in the representation of the + * Schur complement. The mass-eliminated nodes merge with the current + * pivot element. They are ordered prior to the current pivot element. + * Because they can have lower exact degree than the current element, the + * merger of two or more of these nodes in the current pivot element can + * lead to a single element that is not a "fundamental supernode". The + * diagonal block can have zeros in it. Thus, the assembly tree used here + * is not guaranteed to be the precise supernodal elemination tree (with + * "funadmental" supernodes), and the postordering performed by this + * routine is not guaranteed to be a precise postordering of the + * elimination tree. + * + * (3) input parameters are added, to control aggressive absorption and the + * detection of "dense" rows/columns of A. + * + * (4) additional statistical information is returned, such as the number of + * nonzeros in L, and the flop counts for subsequent LDL' and LU + * factorizations. These are slight upper bounds, because of the mass + * elimination issue discussed above. + * + * (5) additional routines are added to interface this routine to MATLAB + * to provide a simple C-callable user-interface, to check inputs for + * errors, compute the symmetry of the pattern of A and the number of + * nonzeros in each row/column of A+A', to compute the pattern of A+A', + * to perform the assembly tree postordering, and to provide debugging + * ouput. Many of these functions are also provided by the Fortran + * Harwell Subroutine Library routine MC47A. + * + * (6) both int and UF_long versions are provided. In the descriptions below + * and integer is and int or UF_long depending on which version is + * being used. + + ********************************************************************** + ***** CAUTION: ARGUMENTS ARE NOT CHECKED FOR ERRORS ON INPUT. ****** + ********************************************************************** + ** If you want error checking, a more versatile input format, and a ** + ** simpler user interface, use amd_order or amd_l_order instead. ** + ** This routine is not meant to be user-callable. ** + ********************************************************************** + + * ---------------------------------------------------------------------------- + * References: + * ---------------------------------------------------------------------------- + * + * [1] Timothy A. Davis and Iain Duff, "An unsymmetric-pattern multifrontal + * method for sparse LU factorization", SIAM J. Matrix Analysis and + * Applications, vol. 18, no. 1, pp. 140-158. Discusses UMFPACK / MA38, + * which first introduced the approximate minimum degree used by this + * routine. + * + * [2] Patrick Amestoy, Timothy A. Davis, and Iain S. Duff, "An approximate + * minimum degree ordering algorithm," SIAM J. Matrix Analysis and + * Applications, vol. 17, no. 4, pp. 886-905, 1996. Discusses AMDBAR and + * MC47B, which are the Fortran versions of this routine. + * + * [3] Alan George and Joseph Liu, "The evolution of the minimum degree + * ordering algorithm," SIAM Review, vol. 31, no. 1, pp. 1-19, 1989. + * We list below the features mentioned in that paper that this code + * includes: + * + * mass elimination: + * Yes. MA27 relied on supervariable detection for mass elimination. + * + * indistinguishable nodes: + * Yes (we call these "supervariables"). This was also in the MA27 + * code - although we modified the method of detecting them (the + * previous hash was the true degree, which we no longer keep track + * of). A supervariable is a set of rows with identical nonzero + * pattern. All variables in a supervariable are eliminated together. + * Each supervariable has as its numerical name that of one of its + * variables (its principal variable). + * + * quotient graph representation: + * Yes. We use the term "element" for the cliques formed during + * elimination. This was also in the MA27 code. The algorithm can + * operate in place, but it will work more efficiently if given some + * "elbow room." + * + * element absorption: + * Yes. This was also in the MA27 code. + * + * external degree: + * Yes. The MA27 code was based on the true degree. + * + * incomplete degree update and multiple elimination: + * No. This was not in MA27, either. Our method of degree update + * within MC47B is element-based, not variable-based. It is thus + * not well-suited for use with incomplete degree update or multiple + * elimination. + * + * Authors, and Copyright (C) 2004 by: + * Timothy A. Davis, Patrick Amestoy, Iain S. Duff, John K. Reid. + * + * Acknowledgements: This work (and the UMFPACK package) was supported by the + * National Science Foundation (ASC-9111263, DMS-9223088, and CCR-0203270). + * The UMFPACK/MA38 approximate degree update algorithm, the unsymmetric analog + * which forms the basis of AMD, was developed while Tim Davis was supported by + * CERFACS (Toulouse, France) in a post-doctoral position. This C version, and + * the etree postorder, were written while Tim Davis was on sabbatical at + * Stanford University and Lawrence Berkeley National Laboratory. + + * ---------------------------------------------------------------------------- + * INPUT ARGUMENTS (unaltered): + * ---------------------------------------------------------------------------- + + * n: The matrix order. Restriction: n >= 1. + * + * iwlen: The size of the Iw array. On input, the matrix is stored in + * Iw [0..pfree-1]. However, Iw [0..iwlen-1] should be slightly larger + * than what is required to hold the matrix, at least iwlen >= pfree + n. + * Otherwise, excessive compressions will take place. The recommended + * value of iwlen is 1.2 * pfree + n, which is the value used in the + * user-callable interface to this routine (amd_order.c). The algorithm + * will not run at all if iwlen < pfree. Restriction: iwlen >= pfree + n. + * Note that this is slightly more restrictive than the actual minimum + * (iwlen >= pfree), but AMD_2 will be very slow with no elbow room. + * Thus, this routine enforces a bare minimum elbow room of size n. + * + * pfree: On input the tail end of the array, Iw [pfree..iwlen-1], is empty, + * and the matrix is stored in Iw [0..pfree-1]. During execution, + * additional data is placed in Iw, and pfree is modified so that + * Iw [pfree..iwlen-1] is always the unused part of Iw. + * + * Control: A double array of size AMD_CONTROL containing input parameters + * that affect how the ordering is computed. If NULL, then default + * settings are used. + * + * Control [AMD_DENSE] is used to determine whether or not a given input + * row is "dense". A row is "dense" if the number of entries in the row + * exceeds Control [AMD_DENSE] times sqrt (n), except that rows with 16 or + * fewer entries are never considered "dense". To turn off the detection + * of dense rows, set Control [AMD_DENSE] to a negative number, or to a + * number larger than sqrt (n). The default value of Control [AMD_DENSE] + * is AMD_DEFAULT_DENSE, which is defined in amd.h as 10. + * + * Control [AMD_AGGRESSIVE] is used to determine whether or not aggressive + * absorption is to be performed. If nonzero, then aggressive absorption + * is performed (this is the default). + + * ---------------------------------------------------------------------------- + * INPUT/OUPUT ARGUMENTS: + * ---------------------------------------------------------------------------- + * + * Pe: An integer array of size n. On input, Pe [i] is the index in Iw of + * the start of row i. Pe [i] is ignored if row i has no off-diagonal + * entries. Thus Pe [i] must be in the range 0 to pfree-1 for non-empty + * rows. + * + * During execution, it is used for both supervariables and elements: + * + * Principal supervariable i: index into Iw of the description of + * supervariable i. A supervariable represents one or more rows of + * the matrix with identical nonzero pattern. In this case, + * Pe [i] >= 0. + * + * Non-principal supervariable i: if i has been absorbed into another + * supervariable j, then Pe [i] = FLIP (j), where FLIP (j) is defined + * as (-(j)-2). Row j has the same pattern as row i. Note that j + * might later be absorbed into another supervariable j2, in which + * case Pe [i] is still FLIP (j), and Pe [j] = FLIP (j2) which is + * < EMPTY, where EMPTY is defined as (-1) in amd_internal.h. + * + * Unabsorbed element e: the index into Iw of the description of element + * e, if e has not yet been absorbed by a subsequent element. Element + * e is created when the supervariable of the same name is selected as + * the pivot. In this case, Pe [i] >= 0. + * + * Absorbed element e: if element e is absorbed into element e2, then + * Pe [e] = FLIP (e2). This occurs when the pattern of e (which we + * refer to as Le) is found to be a subset of the pattern of e2 (that + * is, Le2). In this case, Pe [i] < EMPTY. If element e is "null" + * (it has no nonzeros outside its pivot block), then Pe [e] = EMPTY, + * and e is the root of an assembly subtree (or the whole tree if + * there is just one such root). + * + * Dense variable i: if i is "dense", then Pe [i] = EMPTY. + * + * On output, Pe holds the assembly tree/forest, which implicitly + * represents a pivot order with identical fill-in as the actual order + * (via a depth-first search of the tree), as follows. If Nv [i] > 0, + * then i represents a node in the assembly tree, and the parent of i is + * Pe [i], or EMPTY if i is a root. If Nv [i] = 0, then (i, Pe [i]) + * represents an edge in a subtree, the root of which is a node in the + * assembly tree. Note that i refers to a row/column in the original + * matrix, not the permuted matrix. + * + * Info: A double array of size AMD_INFO. If present, (that is, not NULL), + * then statistics about the ordering are returned in the Info array. + * See amd.h for a description. + + * ---------------------------------------------------------------------------- + * INPUT/MODIFIED (undefined on output): + * ---------------------------------------------------------------------------- + * + * Len: An integer array of size n. On input, Len [i] holds the number of + * entries in row i of the matrix, excluding the diagonal. The contents + * of Len are undefined on output. + * + * Iw: An integer array of size iwlen. On input, Iw [0..pfree-1] holds the + * description of each row i in the matrix. The matrix must be symmetric, + * and both upper and lower triangular parts must be present. The + * diagonal must not be present. Row i is held as follows: + * + * Len [i]: the length of the row i data structure in the Iw array. + * Iw [Pe [i] ... Pe [i] + Len [i] - 1]: + * the list of column indices for nonzeros in row i (simple + * supervariables), excluding the diagonal. All supervariables + * start with one row/column each (supervariable i is just row i). + * If Len [i] is zero on input, then Pe [i] is ignored on input. + * + * Note that the rows need not be in any particular order, and there + * may be empty space between the rows. + * + * During execution, the supervariable i experiences fill-in. This is + * represented by placing in i a list of the elements that cause fill-in + * in supervariable i: + * + * Len [i]: the length of supervariable i in the Iw array. + * Iw [Pe [i] ... Pe [i] + Elen [i] - 1]: + * the list of elements that contain i. This list is kept short + * by removing absorbed elements. + * Iw [Pe [i] + Elen [i] ... Pe [i] + Len [i] - 1]: + * the list of supervariables in i. This list is kept short by + * removing nonprincipal variables, and any entry j that is also + * contained in at least one of the elements (j in Le) in the list + * for i (e in row i). + * + * When supervariable i is selected as pivot, we create an element e of + * the same name (e=i): + * + * Len [e]: the length of element e in the Iw array. + * Iw [Pe [e] ... Pe [e] + Len [e] - 1]: + * the list of supervariables in element e. + * + * An element represents the fill-in that occurs when supervariable i is + * selected as pivot (which represents the selection of row i and all + * non-principal variables whose principal variable is i). We use the + * term Le to denote the set of all supervariables in element e. Absorbed + * supervariables and elements are pruned from these lists when + * computationally convenient. + * + * CAUTION: THE INPUT MATRIX IS OVERWRITTEN DURING COMPUTATION. + * The contents of Iw are undefined on output. + + * ---------------------------------------------------------------------------- + * OUTPUT (need not be set on input): + * ---------------------------------------------------------------------------- + * + * Nv: An integer array of size n. During execution, ABS (Nv [i]) is equal to + * the number of rows that are represented by the principal supervariable + * i. If i is a nonprincipal or dense variable, then Nv [i] = 0. + * Initially, Nv [i] = 1 for all i. Nv [i] < 0 signifies that i is a + * principal variable in the pattern Lme of the current pivot element me. + * After element me is constructed, Nv [i] is set back to a positive + * value. + * + * On output, Nv [i] holds the number of pivots represented by super + * row/column i of the original matrix, or Nv [i] = 0 for non-principal + * rows/columns. Note that i refers to a row/column in the original + * matrix, not the permuted matrix. + * + * Elen: An integer array of size n. See the description of Iw above. At the + * start of execution, Elen [i] is set to zero for all rows i. During + * execution, Elen [i] is the number of elements in the list for + * supervariable i. When e becomes an element, Elen [e] = FLIP (esize) is + * set, where esize is the size of the element (the number of pivots, plus + * the number of nonpivotal entries). Thus Elen [e] < EMPTY. + * Elen (i) = EMPTY set when variable i becomes nonprincipal. + * + * For variables, Elen (i) >= EMPTY holds until just before the + * postordering and permutation vectors are computed. For elements, + * Elen [e] < EMPTY holds. + * + * On output, Elen [i] is the degree of the row/column in the Cholesky + * factorization of the permuted matrix, corresponding to the original row + * i, if i is a super row/column. It is equal to EMPTY if i is + * non-principal. Note that i refers to a row/column in the original + * matrix, not the permuted matrix. + * + * Note that the contents of Elen on output differ from the Fortran + * version (Elen holds the inverse permutation in the Fortran version, + * which is instead returned in the Next array in this C version, + * described below). + * + * Last: In a degree list, Last [i] is the supervariable preceding i, or EMPTY + * if i is the head of the list. In a hash bucket, Last [i] is the hash + * key for i. + * + * Last [Head [hash]] is also used as the head of a hash bucket if + * Head [hash] contains a degree list (see the description of Head, + * below). + * + * On output, Last [0..n-1] holds the permutation. That is, if + * i = Last [k], then row i is the kth pivot row (where k ranges from 0 to + * n-1). Row Last [k] of A is the kth row in the permuted matrix, PAP'. + * + * Next: Next [i] is the supervariable following i in a link list, or EMPTY if + * i is the last in the list. Used for two kinds of lists: degree lists + * and hash buckets (a supervariable can be in only one kind of list at a + * time). + * + * On output Next [0..n-1] holds the inverse permutation. That is, if + * k = Next [i], then row i is the kth pivot row. Row i of A appears as + * the (Next[i])-th row in the permuted matrix, PAP'. + * + * Note that the contents of Next on output differ from the Fortran + * version (Next is undefined on output in the Fortran version). + + * ---------------------------------------------------------------------------- + * LOCAL WORKSPACE (not input or output - used only during execution): + * ---------------------------------------------------------------------------- + * + * Degree: An integer array of size n. If i is a supervariable, then + * Degree [i] holds the current approximation of the external degree of + * row i (an upper bound). The external degree is the number of nonzeros + * in row i, minus ABS (Nv [i]), the diagonal part. The bound is equal to + * the exact external degree if Elen [i] is less than or equal to two. + * + * We also use the term "external degree" for elements e to refer to + * |Le \ Lme|. If e is an element, then Degree [e] is |Le|, which is the + * degree of the off-diagonal part of the element e (not including the + * diagonal part). + * + * Head: An integer array of size n. Head is used for degree lists. + * Head [deg] is the first supervariable in a degree list. All + * supervariables i in a degree list Head [deg] have the same approximate + * degree, namely, deg = Degree [i]. If the list Head [deg] is empty then + * Head [deg] = EMPTY. + * + * During supervariable detection Head [hash] also serves as a pointer to + * a hash bucket. If Head [hash] >= 0, there is a degree list of degree + * hash. The hash bucket head pointer is Last [Head [hash]]. If + * Head [hash] = EMPTY, then the degree list and hash bucket are both + * empty. If Head [hash] < EMPTY, then the degree list is empty, and + * FLIP (Head [hash]) is the head of the hash bucket. After supervariable + * detection is complete, all hash buckets are empty, and the + * (Last [Head [hash]] = EMPTY) condition is restored for the non-empty + * degree lists. + * + * W: An integer array of size n. The flag array W determines the status of + * elements and variables, and the external degree of elements. + * + * for elements: + * if W [e] = 0, then the element e is absorbed. + * if W [e] >= wflg, then W [e] - wflg is the size of the set + * |Le \ Lme|, in terms of nonzeros (the sum of ABS (Nv [i]) for + * each principal variable i that is both in the pattern of + * element e and NOT in the pattern of the current pivot element, + * me). + * if wflg > W [e] > 0, then e is not absorbed and has not yet been + * seen in the scan of the element lists in the computation of + * |Le\Lme| in Scan 1 below. + * + * for variables: + * during supervariable detection, if W [j] != wflg then j is + * not in the pattern of variable i. + * + * The W array is initialized by setting W [i] = 1 for all i, and by + * setting wflg = 2. It is reinitialized if wflg becomes too large (to + * ensure that wflg+n does not cause integer overflow). + + * ---------------------------------------------------------------------------- + * LOCAL INTEGERS: + * ---------------------------------------------------------------------------- + */ + + Int deg, degme, dext, lemax, e, elenme, eln, i, ilast, inext, j, + jlast, jnext, k, knt1, knt2, knt3, lenj, ln, me, mindeg, nel, nleft, + nvi, nvj, nvpiv, slenme, wbig, we, wflg, wnvi, ok, ndense, ncmpa, + dense, aggressive ; + + unsigned Int hash ; /* unsigned, so that hash % n is well defined.*/ + +/* + * deg: the degree of a variable or element + * degme: size, |Lme|, of the current element, me (= Degree [me]) + * dext: external degree, |Le \ Lme|, of some element e + * lemax: largest |Le| seen so far (called dmax in Fortran version) + * e: an element + * elenme: the length, Elen [me], of element list of pivotal variable + * eln: the length, Elen [...], of an element list + * hash: the computed value of the hash function + * i: a supervariable + * ilast: the entry in a link list preceding i + * inext: the entry in a link list following i + * j: a supervariable + * jlast: the entry in a link list preceding j + * jnext: the entry in a link list, or path, following j + * k: the pivot order of an element or variable + * knt1: loop counter used during element construction + * knt2: loop counter used during element construction + * knt3: loop counter used during compression + * lenj: Len [j] + * ln: length of a supervariable list + * me: current supervariable being eliminated, and the current + * element created by eliminating that supervariable + * mindeg: current minimum degree + * nel: number of pivots selected so far + * nleft: n - nel, the number of nonpivotal rows/columns remaining + * nvi: the number of variables in a supervariable i (= Nv [i]) + * nvj: the number of variables in a supervariable j (= Nv [j]) + * nvpiv: number of pivots in current element + * slenme: number of variables in variable list of pivotal variable + * wbig: = INT_MAX - n for the int version, UF_long_max - n for the + * UF_long version. wflg is not allowed to be >= wbig. + * we: W [e] + * wflg: used for flagging the W array. See description of Iw. + * wnvi: wflg - Nv [i] + * x: either a supervariable or an element + * + * ok: true if supervariable j can be absorbed into i + * ndense: number of "dense" rows/columns + * dense: rows/columns with initial degree > dense are considered "dense" + * aggressive: true if aggressive absorption is being performed + * ncmpa: number of garbage collections + + * ---------------------------------------------------------------------------- + * LOCAL DOUBLES, used for statistical output only (except for alpha): + * ---------------------------------------------------------------------------- + */ + + double f, r, ndiv, s, nms_lu, nms_ldl, dmax, alpha, lnz, lnzme ; + +/* + * f: nvpiv + * r: degme + nvpiv + * ndiv: number of divisions for LU or LDL' factorizations + * s: number of multiply-subtract pairs for LU factorization, for the + * current element me + * nms_lu number of multiply-subtract pairs for LU factorization + * nms_ldl number of multiply-subtract pairs for LDL' factorization + * dmax: the largest number of entries in any column of L, including the + * diagonal + * alpha: "dense" degree ratio + * lnz: the number of nonzeros in L (excluding the diagonal) + * lnzme: the number of nonzeros in L (excl. the diagonal) for the + * current element me + + * ---------------------------------------------------------------------------- + * LOCAL "POINTERS" (indices into the Iw array) + * ---------------------------------------------------------------------------- +*/ + + Int p, p1, p2, p3, p4, pdst, pend, pj, pme, pme1, pme2, pn, psrc ; + +/* + * Any parameter (Pe [...] or pfree) or local variable starting with "p" (for + * Pointer) is an index into Iw, and all indices into Iw use variables starting + * with "p." The only exception to this rule is the iwlen input argument. + * + * p: pointer into lots of things + * p1: Pe [i] for some variable i (start of element list) + * p2: Pe [i] + Elen [i] - 1 for some variable i + * p3: index of first supervariable in clean list + * p4: + * pdst: destination pointer, for compression + * pend: end of memory to compress + * pj: pointer into an element or variable + * pme: pointer into the current element (pme1...pme2) + * pme1: the current element, me, is stored in Iw [pme1...pme2] + * pme2: the end of the current element + * pn: pointer into a "clean" variable, also used to compress + * psrc: source pointer, for compression +*/ + +/* ========================================================================= */ +/* INITIALIZATIONS */ +/* ========================================================================= */ + + /* Note that this restriction on iwlen is slightly more restrictive than + * what is actually required in AMD_2. AMD_2 can operate with no elbow + * room at all, but it will be slow. For better performance, at least + * size-n elbow room is enforced. */ + ASSERT (iwlen >= pfree + n) ; + ASSERT (n > 0) ; + + /* initialize output statistics */ + lnz = 0 ; + ndiv = 0 ; + nms_lu = 0 ; + nms_ldl = 0 ; + dmax = 1 ; + me = EMPTY ; + + mindeg = 0 ; + ncmpa = 0 ; + nel = 0 ; + lemax = 0 ; + + /* get control parameters */ + if (Control != (double *) NULL) + { + alpha = Control [AMD_DENSE] ; + aggressive = (Control [AMD_AGGRESSIVE] != 0) ; + } + else + { + alpha = AMD_DEFAULT_DENSE ; + aggressive = AMD_DEFAULT_AGGRESSIVE ; + } + /* Note: if alpha is NaN, this is undefined: */ + if (alpha < 0) + { + /* only remove completely dense rows/columns */ + dense = n-2 ; + } + else + { + dense = alpha * sqrt ((double) n) ; + } + dense = MAX (16, dense) ; + dense = MIN (n, dense) ; + AMD_DEBUG1 (("\n\nAMD (debug), alpha %g, aggr. "ID"\n", + alpha, aggressive)) ; + + for (i = 0 ; i < n ; i++) + { + Last [i] = EMPTY ; + Head [i] = EMPTY ; + Next [i] = EMPTY ; + /* if separate Hhead array is used for hash buckets: * + Hhead [i] = EMPTY ; + */ + Nv [i] = 1 ; + W [i] = 1 ; + Elen [i] = 0 ; + Degree [i] = Len [i] ; + } + +#ifndef NDEBUG + AMD_DEBUG1 (("\n======Nel "ID" initial\n", nel)) ; + AMD_dump (n, Pe, Iw, Len, iwlen, pfree, Nv, Next, Last, + Head, Elen, Degree, W, -1) ; +#endif + + /* initialize wflg */ + wbig = Int_MAX - n ; + wflg = clear_flag (0, wbig, W, n) ; + + /* --------------------------------------------------------------------- */ + /* initialize degree lists and eliminate dense and empty rows */ + /* --------------------------------------------------------------------- */ + + ndense = 0 ; + + for (i = 0 ; i < n ; i++) + { + deg = Degree [i] ; + ASSERT (deg >= 0 && deg < n) ; + if (deg == 0) + { + + /* ------------------------------------------------------------- + * we have a variable that can be eliminated at once because + * there is no off-diagonal non-zero in its row. Note that + * Nv [i] = 1 for an empty variable i. It is treated just + * the same as an eliminated element i. + * ------------------------------------------------------------- */ + + Elen [i] = FLIP (1) ; + nel++ ; + Pe [i] = EMPTY ; + W [i] = 0 ; + + } + else if (deg > dense) + { + + /* ------------------------------------------------------------- + * Dense variables are not treated as elements, but as unordered, + * non-principal variables that have no parent. They do not take + * part in the postorder, since Nv [i] = 0. Note that the Fortran + * version does not have this option. + * ------------------------------------------------------------- */ + + AMD_DEBUG1 (("Dense node "ID" degree "ID"\n", i, deg)) ; + ndense++ ; + Nv [i] = 0 ; /* do not postorder this node */ + Elen [i] = EMPTY ; + nel++ ; + Pe [i] = EMPTY ; + + } + else + { + + /* ------------------------------------------------------------- + * place i in the degree list corresponding to its degree + * ------------------------------------------------------------- */ + + inext = Head [deg] ; + ASSERT (inext >= EMPTY && inext < n) ; + if (inext != EMPTY) Last [inext] = i ; + Next [i] = inext ; + Head [deg] = i ; + + } + } + +/* ========================================================================= */ +/* WHILE (selecting pivots) DO */ +/* ========================================================================= */ + + while (nel < n) + { + +#ifndef NDEBUG + AMD_DEBUG1 (("\n======Nel "ID"\n", nel)) ; + if (AMD_debug >= 2) + { + AMD_dump (n, Pe, Iw, Len, iwlen, pfree, Nv, Next, + Last, Head, Elen, Degree, W, nel) ; + } +#endif + +/* ========================================================================= */ +/* GET PIVOT OF MINIMUM DEGREE */ +/* ========================================================================= */ + + /* ----------------------------------------------------------------- */ + /* find next supervariable for elimination */ + /* ----------------------------------------------------------------- */ + + ASSERT (mindeg >= 0 && mindeg < n) ; + for (deg = mindeg ; deg < n ; deg++) + { + me = Head [deg] ; + if (me != EMPTY) break ; + } + mindeg = deg ; + ASSERT (me >= 0 && me < n) ; + AMD_DEBUG1 (("=================me: "ID"\n", me)) ; + + /* ----------------------------------------------------------------- */ + /* remove chosen variable from link list */ + /* ----------------------------------------------------------------- */ + + inext = Next [me] ; + ASSERT (inext >= EMPTY && inext < n) ; + if (inext != EMPTY) Last [inext] = EMPTY ; + Head [deg] = inext ; + + /* ----------------------------------------------------------------- */ + /* me represents the elimination of pivots nel to nel+Nv[me]-1. */ + /* place me itself as the first in this set. */ + /* ----------------------------------------------------------------- */ + + elenme = Elen [me] ; + nvpiv = Nv [me] ; + ASSERT (nvpiv > 0) ; + nel += nvpiv ; + +/* ========================================================================= */ +/* CONSTRUCT NEW ELEMENT */ +/* ========================================================================= */ + + /* ----------------------------------------------------------------- + * At this point, me is the pivotal supervariable. It will be + * converted into the current element. Scan list of the pivotal + * supervariable, me, setting tree pointers and constructing new list + * of supervariables for the new element, me. p is a pointer to the + * current position in the old list. + * ----------------------------------------------------------------- */ + + /* flag the variable "me" as being in Lme by negating Nv [me] */ + Nv [me] = -nvpiv ; + degme = 0 ; + ASSERT (Pe [me] >= 0 && Pe [me] < iwlen) ; + + if (elenme == 0) + { + + /* ------------------------------------------------------------- */ + /* construct the new element in place */ + /* ------------------------------------------------------------- */ + + pme1 = Pe [me] ; + pme2 = pme1 - 1 ; + + for (p = pme1 ; p <= pme1 + Len [me] - 1 ; p++) + { + i = Iw [p] ; + ASSERT (i >= 0 && i < n && Nv [i] >= 0) ; + nvi = Nv [i] ; + if (nvi > 0) + { + + /* ----------------------------------------------------- */ + /* i is a principal variable not yet placed in Lme. */ + /* store i in new list */ + /* ----------------------------------------------------- */ + + /* flag i as being in Lme by negating Nv [i] */ + degme += nvi ; + Nv [i] = -nvi ; + Iw [++pme2] = i ; + + /* ----------------------------------------------------- */ + /* remove variable i from degree list. */ + /* ----------------------------------------------------- */ + + ilast = Last [i] ; + inext = Next [i] ; + ASSERT (ilast >= EMPTY && ilast < n) ; + ASSERT (inext >= EMPTY && inext < n) ; + if (inext != EMPTY) Last [inext] = ilast ; + if (ilast != EMPTY) + { + Next [ilast] = inext ; + } + else + { + /* i is at the head of the degree list */ + ASSERT (Degree [i] >= 0 && Degree [i] < n) ; + Head [Degree [i]] = inext ; + } + } + } + } + else + { + + /* ------------------------------------------------------------- */ + /* construct the new element in empty space, Iw [pfree ...] */ + /* ------------------------------------------------------------- */ + + p = Pe [me] ; + pme1 = pfree ; + slenme = Len [me] - elenme ; + + for (knt1 = 1 ; knt1 <= elenme + 1 ; knt1++) + { + + if (knt1 > elenme) + { + /* search the supervariables in me. */ + e = me ; + pj = p ; + ln = slenme ; + AMD_DEBUG2 (("Search sv: "ID" "ID" "ID"\n", me,pj,ln)) ; + } + else + { + /* search the elements in me. */ + e = Iw [p++] ; + ASSERT (e >= 0 && e < n) ; + pj = Pe [e] ; + ln = Len [e] ; + AMD_DEBUG2 (("Search element e "ID" in me "ID"\n", e,me)) ; + ASSERT (Elen [e] < EMPTY && W [e] > 0 && pj >= 0) ; + } + ASSERT (ln >= 0 && (ln == 0 || (pj >= 0 && pj < iwlen))) ; + + /* --------------------------------------------------------- + * search for different supervariables and add them to the + * new list, compressing when necessary. this loop is + * executed once for each element in the list and once for + * all the supervariables in the list. + * --------------------------------------------------------- */ + + for (knt2 = 1 ; knt2 <= ln ; knt2++) + { + i = Iw [pj++] ; + ASSERT (i >= 0 && i < n && (i == me || Elen [i] >= EMPTY)); + nvi = Nv [i] ; + AMD_DEBUG2 ((": "ID" "ID" "ID" "ID"\n", + i, Elen [i], Nv [i], wflg)) ; + + if (nvi > 0) + { + + /* ------------------------------------------------- */ + /* compress Iw, if necessary */ + /* ------------------------------------------------- */ + + if (pfree >= iwlen) + { + + AMD_DEBUG1 (("GARBAGE COLLECTION\n")) ; + + /* prepare for compressing Iw by adjusting pointers + * and lengths so that the lists being searched in + * the inner and outer loops contain only the + * remaining entries. */ + + Pe [me] = p ; + Len [me] -= knt1 ; + /* check if nothing left of supervariable me */ + if (Len [me] == 0) Pe [me] = EMPTY ; + Pe [e] = pj ; + Len [e] = ln - knt2 ; + /* nothing left of element e */ + if (Len [e] == 0) Pe [e] = EMPTY ; + + ncmpa++ ; /* one more garbage collection */ + + /* store first entry of each object in Pe */ + /* FLIP the first entry in each object */ + for (j = 0 ; j < n ; j++) + { + pn = Pe [j] ; + if (pn >= 0) + { + ASSERT (pn >= 0 && pn < iwlen) ; + Pe [j] = Iw [pn] ; + Iw [pn] = FLIP (j) ; + } + } + + /* psrc/pdst point to source/destination */ + psrc = 0 ; + pdst = 0 ; + pend = pme1 - 1 ; + + while (psrc <= pend) + { + /* search for next FLIP'd entry */ + j = FLIP (Iw [psrc++]) ; + if (j >= 0) + { + AMD_DEBUG2 (("Got object j: "ID"\n", j)) ; + Iw [pdst] = Pe [j] ; + Pe [j] = pdst++ ; + lenj = Len [j] ; + /* copy from source to destination */ + for (knt3 = 0 ; knt3 <= lenj - 2 ; knt3++) + { + Iw [pdst++] = Iw [psrc++] ; + } + } + } + + /* move the new partially-constructed element */ + p1 = pdst ; + for (psrc = pme1 ; psrc <= pfree-1 ; psrc++) + { + Iw [pdst++] = Iw [psrc] ; + } + pme1 = p1 ; + pfree = pdst ; + pj = Pe [e] ; + p = Pe [me] ; + + } + + /* ------------------------------------------------- */ + /* i is a principal variable not yet placed in Lme */ + /* store i in new list */ + /* ------------------------------------------------- */ + + /* flag i as being in Lme by negating Nv [i] */ + degme += nvi ; + Nv [i] = -nvi ; + Iw [pfree++] = i ; + AMD_DEBUG2 ((" s: "ID" nv "ID"\n", i, Nv [i])); + + /* ------------------------------------------------- */ + /* remove variable i from degree link list */ + /* ------------------------------------------------- */ + + ilast = Last [i] ; + inext = Next [i] ; + ASSERT (ilast >= EMPTY && ilast < n) ; + ASSERT (inext >= EMPTY && inext < n) ; + if (inext != EMPTY) Last [inext] = ilast ; + if (ilast != EMPTY) + { + Next [ilast] = inext ; + } + else + { + /* i is at the head of the degree list */ + ASSERT (Degree [i] >= 0 && Degree [i] < n) ; + Head [Degree [i]] = inext ; + } + } + } + + if (e != me) + { + /* set tree pointer and flag to indicate element e is + * absorbed into new element me (the parent of e is me) */ + AMD_DEBUG1 ((" Element "ID" => "ID"\n", e, me)) ; + Pe [e] = FLIP (me) ; + W [e] = 0 ; + } + } + + pme2 = pfree - 1 ; + } + + /* ----------------------------------------------------------------- */ + /* me has now been converted into an element in Iw [pme1..pme2] */ + /* ----------------------------------------------------------------- */ + + /* degme holds the external degree of new element */ + Degree [me] = degme ; + Pe [me] = pme1 ; + Len [me] = pme2 - pme1 + 1 ; + ASSERT (Pe [me] >= 0 && Pe [me] < iwlen) ; + + Elen [me] = FLIP (nvpiv + degme) ; + /* FLIP (Elen (me)) is now the degree of pivot (including + * diagonal part). */ + +#ifndef NDEBUG + AMD_DEBUG2 (("New element structure: length= "ID"\n", pme2-pme1+1)) ; + for (pme = pme1 ; pme <= pme2 ; pme++) AMD_DEBUG3 ((" "ID"", Iw[pme])); + AMD_DEBUG3 (("\n")) ; +#endif + + /* ----------------------------------------------------------------- */ + /* make sure that wflg is not too large. */ + /* ----------------------------------------------------------------- */ + + /* With the current value of wflg, wflg+n must not cause integer + * overflow */ + + wflg = clear_flag (wflg, wbig, W, n) ; + +/* ========================================================================= */ +/* COMPUTE (W [e] - wflg) = |Le\Lme| FOR ALL ELEMENTS */ +/* ========================================================================= */ + + /* ----------------------------------------------------------------- + * Scan 1: compute the external degrees of previous elements with + * respect to the current element. That is: + * (W [e] - wflg) = |Le \ Lme| + * for each element e that appears in any supervariable in Lme. The + * notation Le refers to the pattern (list of supervariables) of a + * previous element e, where e is not yet absorbed, stored in + * Iw [Pe [e] + 1 ... Pe [e] + Len [e]]. The notation Lme + * refers to the pattern of the current element (stored in + * Iw [pme1..pme2]). If aggressive absorption is enabled, and + * (W [e] - wflg) becomes zero, then the element e will be absorbed + * in Scan 2. + * ----------------------------------------------------------------- */ + + AMD_DEBUG2 (("me: ")) ; + for (pme = pme1 ; pme <= pme2 ; pme++) + { + i = Iw [pme] ; + ASSERT (i >= 0 && i < n) ; + eln = Elen [i] ; + AMD_DEBUG3 ((""ID" Elen "ID": \n", i, eln)) ; + if (eln > 0) + { + /* note that Nv [i] has been negated to denote i in Lme: */ + nvi = -Nv [i] ; + ASSERT (nvi > 0 && Pe [i] >= 0 && Pe [i] < iwlen) ; + wnvi = wflg - nvi ; + for (p = Pe [i] ; p <= Pe [i] + eln - 1 ; p++) + { + e = Iw [p] ; + ASSERT (e >= 0 && e < n) ; + we = W [e] ; + AMD_DEBUG4 ((" e "ID" we "ID" ", e, we)) ; + if (we >= wflg) + { + /* unabsorbed element e has been seen in this loop */ + AMD_DEBUG4 ((" unabsorbed, first time seen")) ; + we -= nvi ; + } + else if (we != 0) + { + /* e is an unabsorbed element */ + /* this is the first we have seen e in all of Scan 1 */ + AMD_DEBUG4 ((" unabsorbed")) ; + we = Degree [e] + wnvi ; + } + AMD_DEBUG4 (("\n")) ; + W [e] = we ; + } + } + } + AMD_DEBUG2 (("\n")) ; + +/* ========================================================================= */ +/* DEGREE UPDATE AND ELEMENT ABSORPTION */ +/* ========================================================================= */ + + /* ----------------------------------------------------------------- + * Scan 2: for each i in Lme, sum up the degree of Lme (which is + * degme), plus the sum of the external degrees of each Le for the + * elements e appearing within i, plus the supervariables in i. + * Place i in hash list. + * ----------------------------------------------------------------- */ + + for (pme = pme1 ; pme <= pme2 ; pme++) + { + i = Iw [pme] ; + ASSERT (i >= 0 && i < n && Nv [i] < 0 && Elen [i] >= 0) ; + AMD_DEBUG2 (("Updating: i "ID" "ID" "ID"\n", i, Elen[i], Len [i])); + p1 = Pe [i] ; + p2 = p1 + Elen [i] - 1 ; + pn = p1 ; + hash = 0 ; + deg = 0 ; + ASSERT (p1 >= 0 && p1 < iwlen && p2 >= -1 && p2 < iwlen) ; + + /* ------------------------------------------------------------- */ + /* scan the element list associated with supervariable i */ + /* ------------------------------------------------------------- */ + + /* UMFPACK/MA38-style approximate degree: */ + if (aggressive) + { + for (p = p1 ; p <= p2 ; p++) + { + e = Iw [p] ; + ASSERT (e >= 0 && e < n) ; + we = W [e] ; + if (we != 0) + { + /* e is an unabsorbed element */ + /* dext = | Le \ Lme | */ + dext = we - wflg ; + if (dext > 0) + { + deg += dext ; + Iw [pn++] = e ; + hash += e ; + AMD_DEBUG4 ((" e: "ID" hash = "ID"\n",e,hash)) ; + } + else + { + /* external degree of e is zero, absorb e into me*/ + AMD_DEBUG1 ((" Element "ID" =>"ID" (aggressive)\n", + e, me)) ; + ASSERT (dext == 0) ; + Pe [e] = FLIP (me) ; + W [e] = 0 ; + } + } + } + } + else + { + for (p = p1 ; p <= p2 ; p++) + { + e = Iw [p] ; + ASSERT (e >= 0 && e < n) ; + we = W [e] ; + if (we != 0) + { + /* e is an unabsorbed element */ + dext = we - wflg ; + ASSERT (dext >= 0) ; + deg += dext ; + Iw [pn++] = e ; + hash += e ; + AMD_DEBUG4 ((" e: "ID" hash = "ID"\n",e,hash)) ; + } + } + } + + /* count the number of elements in i (including me): */ + Elen [i] = pn - p1 + 1 ; + + /* ------------------------------------------------------------- */ + /* scan the supervariables in the list associated with i */ + /* ------------------------------------------------------------- */ + + /* The bulk of the AMD run time is typically spent in this loop, + * particularly if the matrix has many dense rows that are not + * removed prior to ordering. */ + p3 = pn ; + p4 = p1 + Len [i] ; + for (p = p2 + 1 ; p < p4 ; p++) + { + j = Iw [p] ; + ASSERT (j >= 0 && j < n) ; + nvj = Nv [j] ; + if (nvj > 0) + { + /* j is unabsorbed, and not in Lme. */ + /* add to degree and add to new list */ + deg += nvj ; + Iw [pn++] = j ; + hash += j ; + AMD_DEBUG4 ((" s: "ID" hash "ID" Nv[j]= "ID"\n", + j, hash, nvj)) ; + } + } + + /* ------------------------------------------------------------- */ + /* update the degree and check for mass elimination */ + /* ------------------------------------------------------------- */ + + /* with aggressive absorption, deg==0 is identical to the + * Elen [i] == 1 && p3 == pn test, below. */ + ASSERT (IMPLIES (aggressive, (deg==0) == (Elen[i]==1 && p3==pn))) ; + + if (Elen [i] == 1 && p3 == pn) + { + + /* --------------------------------------------------------- */ + /* mass elimination */ + /* --------------------------------------------------------- */ + + /* There is nothing left of this node except for an edge to + * the current pivot element. Elen [i] is 1, and there are + * no variables adjacent to node i. Absorb i into the + * current pivot element, me. Note that if there are two or + * more mass eliminations, fillin due to mass elimination is + * possible within the nvpiv-by-nvpiv pivot block. It is this + * step that causes AMD's analysis to be an upper bound. + * + * The reason is that the selected pivot has a lower + * approximate degree than the true degree of the two mass + * eliminated nodes. There is no edge between the two mass + * eliminated nodes. They are merged with the current pivot + * anyway. + * + * No fillin occurs in the Schur complement, in any case, + * and this effect does not decrease the quality of the + * ordering itself, just the quality of the nonzero and + * flop count analysis. It also means that the post-ordering + * is not an exact elimination tree post-ordering. */ + + AMD_DEBUG1 ((" MASS i "ID" => parent e "ID"\n", i, me)) ; + Pe [i] = FLIP (me) ; + nvi = -Nv [i] ; + degme -= nvi ; + nvpiv += nvi ; + nel += nvi ; + Nv [i] = 0 ; + Elen [i] = EMPTY ; + + } + else + { + + /* --------------------------------------------------------- */ + /* update the upper-bound degree of i */ + /* --------------------------------------------------------- */ + + /* the following degree does not yet include the size + * of the current element, which is added later: */ + + Degree [i] = MIN (Degree [i], deg) ; + + /* --------------------------------------------------------- */ + /* add me to the list for i */ + /* --------------------------------------------------------- */ + + /* move first supervariable to end of list */ + Iw [pn] = Iw [p3] ; + /* move first element to end of element part of list */ + Iw [p3] = Iw [p1] ; + /* add new element, me, to front of list. */ + Iw [p1] = me ; + /* store the new length of the list in Len [i] */ + Len [i] = pn - p1 + 1 ; + + /* --------------------------------------------------------- */ + /* place in hash bucket. Save hash key of i in Last [i]. */ + /* --------------------------------------------------------- */ + + /* NOTE: this can fail if hash is negative, because the ANSI C + * standard does not define a % b when a and/or b are negative. + * That's why hash is defined as an unsigned Int, to avoid this + * problem. */ + hash = hash % n ; + ASSERT (((Int) hash) >= 0 && ((Int) hash) < n) ; + + /* if the Hhead array is not used: */ + j = Head [hash] ; + if (j <= EMPTY) + { + /* degree list is empty, hash head is FLIP (j) */ + Next [i] = FLIP (j) ; + Head [hash] = FLIP (i) ; + } + else + { + /* degree list is not empty, use Last [Head [hash]] as + * hash head. */ + Next [i] = Last [j] ; + Last [j] = i ; + } + + /* if a separate Hhead array is used: * + Next [i] = Hhead [hash] ; + Hhead [hash] = i ; + */ + + Last [i] = hash ; + } + } + + Degree [me] = degme ; + + /* ----------------------------------------------------------------- */ + /* Clear the counter array, W [...], by incrementing wflg. */ + /* ----------------------------------------------------------------- */ + + /* make sure that wflg+n does not cause integer overflow */ + lemax = MAX (lemax, degme) ; + wflg += lemax ; + wflg = clear_flag (wflg, wbig, W, n) ; + /* at this point, W [0..n-1] < wflg holds */ + +/* ========================================================================= */ +/* SUPERVARIABLE DETECTION */ +/* ========================================================================= */ + + AMD_DEBUG1 (("Detecting supervariables:\n")) ; + for (pme = pme1 ; pme <= pme2 ; pme++) + { + i = Iw [pme] ; + ASSERT (i >= 0 && i < n) ; + AMD_DEBUG2 (("Consider i "ID" nv "ID"\n", i, Nv [i])) ; + if (Nv [i] < 0) + { + /* i is a principal variable in Lme */ + + /* --------------------------------------------------------- + * examine all hash buckets with 2 or more variables. We do + * this by examing all unique hash keys for supervariables in + * the pattern Lme of the current element, me + * --------------------------------------------------------- */ + + /* let i = head of hash bucket, and empty the hash bucket */ + ASSERT (Last [i] >= 0 && Last [i] < n) ; + hash = Last [i] ; + + /* if Hhead array is not used: */ + j = Head [hash] ; + if (j == EMPTY) + { + /* hash bucket and degree list are both empty */ + i = EMPTY ; + } + else if (j < EMPTY) + { + /* degree list is empty */ + i = FLIP (j) ; + Head [hash] = EMPTY ; + } + else + { + /* degree list is not empty, restore Last [j] of head j */ + i = Last [j] ; + Last [j] = EMPTY ; + } + + /* if separate Hhead array is used: * + i = Hhead [hash] ; + Hhead [hash] = EMPTY ; + */ + + ASSERT (i >= EMPTY && i < n) ; + AMD_DEBUG2 (("----i "ID" hash "ID"\n", i, hash)) ; + + while (i != EMPTY && Next [i] != EMPTY) + { + + /* ----------------------------------------------------- + * this bucket has one or more variables following i. + * scan all of them to see if i can absorb any entries + * that follow i in hash bucket. Scatter i into w. + * ----------------------------------------------------- */ + + ln = Len [i] ; + eln = Elen [i] ; + ASSERT (ln >= 0 && eln >= 0) ; + ASSERT (Pe [i] >= 0 && Pe [i] < iwlen) ; + /* do not flag the first element in the list (me) */ + for (p = Pe [i] + 1 ; p <= Pe [i] + ln - 1 ; p++) + { + ASSERT (Iw [p] >= 0 && Iw [p] < n) ; + W [Iw [p]] = wflg ; + } + + /* ----------------------------------------------------- */ + /* scan every other entry j following i in bucket */ + /* ----------------------------------------------------- */ + + jlast = i ; + j = Next [i] ; + ASSERT (j >= EMPTY && j < n) ; + + while (j != EMPTY) + { + /* ------------------------------------------------- */ + /* check if j and i have identical nonzero pattern */ + /* ------------------------------------------------- */ + + AMD_DEBUG3 (("compare i "ID" and j "ID"\n", i,j)) ; + + /* check if i and j have the same Len and Elen */ + ASSERT (Len [j] >= 0 && Elen [j] >= 0) ; + ASSERT (Pe [j] >= 0 && Pe [j] < iwlen) ; + ok = (Len [j] == ln) && (Elen [j] == eln) ; + /* skip the first element in the list (me) */ + for (p = Pe [j] + 1 ; ok && p <= Pe [j] + ln - 1 ; p++) + { + ASSERT (Iw [p] >= 0 && Iw [p] < n) ; + if (W [Iw [p]] != wflg) ok = 0 ; + } + if (ok) + { + /* --------------------------------------------- */ + /* found it! j can be absorbed into i */ + /* --------------------------------------------- */ + + AMD_DEBUG1 (("found it! j "ID" => i "ID"\n", j,i)); + Pe [j] = FLIP (i) ; + /* both Nv [i] and Nv [j] are negated since they */ + /* are in Lme, and the absolute values of each */ + /* are the number of variables in i and j: */ + Nv [i] += Nv [j] ; + Nv [j] = 0 ; + Elen [j] = EMPTY ; + /* delete j from hash bucket */ + ASSERT (j != Next [j]) ; + j = Next [j] ; + Next [jlast] = j ; + + } + else + { + /* j cannot be absorbed into i */ + jlast = j ; + ASSERT (j != Next [j]) ; + j = Next [j] ; + } + ASSERT (j >= EMPTY && j < n) ; + } + + /* ----------------------------------------------------- + * no more variables can be absorbed into i + * go to next i in bucket and clear flag array + * ----------------------------------------------------- */ + + wflg++ ; + i = Next [i] ; + ASSERT (i >= EMPTY && i < n) ; + + } + } + } + AMD_DEBUG2 (("detect done\n")) ; + +/* ========================================================================= */ +/* RESTORE DEGREE LISTS AND REMOVE NONPRINCIPAL SUPERVARIABLES FROM ELEMENT */ +/* ========================================================================= */ + + p = pme1 ; + nleft = n - nel ; + for (pme = pme1 ; pme <= pme2 ; pme++) + { + i = Iw [pme] ; + ASSERT (i >= 0 && i < n) ; + nvi = -Nv [i] ; + AMD_DEBUG3 (("Restore i "ID" "ID"\n", i, nvi)) ; + if (nvi > 0) + { + /* i is a principal variable in Lme */ + /* restore Nv [i] to signify that i is principal */ + Nv [i] = nvi ; + + /* --------------------------------------------------------- */ + /* compute the external degree (add size of current element) */ + /* --------------------------------------------------------- */ + + deg = Degree [i] + degme - nvi ; + deg = MIN (deg, nleft - nvi) ; + ASSERT (IMPLIES (aggressive, deg > 0) && deg >= 0 && deg < n) ; + + /* --------------------------------------------------------- */ + /* place the supervariable at the head of the degree list */ + /* --------------------------------------------------------- */ + + inext = Head [deg] ; + ASSERT (inext >= EMPTY && inext < n) ; + if (inext != EMPTY) Last [inext] = i ; + Next [i] = inext ; + Last [i] = EMPTY ; + Head [deg] = i ; + + /* --------------------------------------------------------- */ + /* save the new degree, and find the minimum degree */ + /* --------------------------------------------------------- */ + + mindeg = MIN (mindeg, deg) ; + Degree [i] = deg ; + + /* --------------------------------------------------------- */ + /* place the supervariable in the element pattern */ + /* --------------------------------------------------------- */ + + Iw [p++] = i ; + + } + } + AMD_DEBUG2 (("restore done\n")) ; + +/* ========================================================================= */ +/* FINALIZE THE NEW ELEMENT */ +/* ========================================================================= */ + + AMD_DEBUG2 (("ME = "ID" DONE\n", me)) ; + Nv [me] = nvpiv ; + /* save the length of the list for the new element me */ + Len [me] = p - pme1 ; + if (Len [me] == 0) + { + /* there is nothing left of the current pivot element */ + /* it is a root of the assembly tree */ + Pe [me] = EMPTY ; + W [me] = 0 ; + } + if (elenme != 0) + { + /* element was not constructed in place: deallocate part of */ + /* it since newly nonprincipal variables may have been removed */ + pfree = p ; + } + + /* The new element has nvpiv pivots and the size of the contribution + * block for a multifrontal method is degme-by-degme, not including + * the "dense" rows/columns. If the "dense" rows/columns are included, + * the frontal matrix is no larger than + * (degme+ndense)-by-(degme+ndense). + */ + + if (Info != (double *) NULL) + { + f = nvpiv ; + r = degme + ndense ; + dmax = MAX (dmax, f + r) ; + + /* number of nonzeros in L (excluding the diagonal) */ + lnzme = f*r + (f-1)*f/2 ; + lnz += lnzme ; + + /* number of divide operations for LDL' and for LU */ + ndiv += lnzme ; + + /* number of multiply-subtract pairs for LU */ + s = f*r*r + r*(f-1)*f + (f-1)*f*(2*f-1)/6 ; + nms_lu += s ; + + /* number of multiply-subtract pairs for LDL' */ + nms_ldl += (s + lnzme)/2 ; + } + +#ifndef NDEBUG + AMD_DEBUG2 (("finalize done nel "ID" n "ID"\n ::::\n", nel, n)) ; + for (pme = Pe [me] ; pme <= Pe [me] + Len [me] - 1 ; pme++) + { + AMD_DEBUG3 ((" "ID"", Iw [pme])) ; + } + AMD_DEBUG3 (("\n")) ; +#endif + + } + +/* ========================================================================= */ +/* DONE SELECTING PIVOTS */ +/* ========================================================================= */ + + if (Info != (double *) NULL) + { + + /* count the work to factorize the ndense-by-ndense submatrix */ + f = ndense ; + dmax = MAX (dmax, (double) ndense) ; + + /* number of nonzeros in L (excluding the diagonal) */ + lnzme = (f-1)*f/2 ; + lnz += lnzme ; + + /* number of divide operations for LDL' and for LU */ + ndiv += lnzme ; + + /* number of multiply-subtract pairs for LU */ + s = (f-1)*f*(2*f-1)/6 ; + nms_lu += s ; + + /* number of multiply-subtract pairs for LDL' */ + nms_ldl += (s + lnzme)/2 ; + + /* number of nz's in L (excl. diagonal) */ + Info [AMD_LNZ] = lnz ; + + /* number of divide ops for LU and LDL' */ + Info [AMD_NDIV] = ndiv ; + + /* number of multiply-subtract pairs for LDL' */ + Info [AMD_NMULTSUBS_LDL] = nms_ldl ; + + /* number of multiply-subtract pairs for LU */ + Info [AMD_NMULTSUBS_LU] = nms_lu ; + + /* number of "dense" rows/columns */ + Info [AMD_NDENSE] = ndense ; + + /* largest front is dmax-by-dmax */ + Info [AMD_DMAX] = dmax ; + + /* number of garbage collections in AMD */ + Info [AMD_NCMPA] = ncmpa ; + + /* successful ordering */ + Info [AMD_STATUS] = AMD_OK ; + } + +/* ========================================================================= */ +/* POST-ORDERING */ +/* ========================================================================= */ + +/* ------------------------------------------------------------------------- + * Variables at this point: + * + * Pe: holds the elimination tree. The parent of j is FLIP (Pe [j]), + * or EMPTY if j is a root. The tree holds both elements and + * non-principal (unordered) variables absorbed into them. + * Dense variables are non-principal and unordered. + * + * Elen: holds the size of each element, including the diagonal part. + * FLIP (Elen [e]) > 0 if e is an element. For unordered + * variables i, Elen [i] is EMPTY. + * + * Nv: Nv [e] > 0 is the number of pivots represented by the element e. + * For unordered variables i, Nv [i] is zero. + * + * Contents no longer needed: + * W, Iw, Len, Degree, Head, Next, Last. + * + * The matrix itself has been destroyed. + * + * n: the size of the matrix. + * No other scalars needed (pfree, iwlen, etc.) + * ------------------------------------------------------------------------- */ + + /* restore Pe */ + for (i = 0 ; i < n ; i++) + { + Pe [i] = FLIP (Pe [i]) ; + } + + /* restore Elen, for output information, and for postordering */ + for (i = 0 ; i < n ; i++) + { + Elen [i] = FLIP (Elen [i]) ; + } + +/* Now the parent of j is Pe [j], or EMPTY if j is a root. Elen [e] > 0 + * is the size of element e. Elen [i] is EMPTY for unordered variable i. */ + +#ifndef NDEBUG + AMD_DEBUG2 (("\nTree:\n")) ; + for (i = 0 ; i < n ; i++) + { + AMD_DEBUG2 ((" "ID" parent: "ID" ", i, Pe [i])) ; + ASSERT (Pe [i] >= EMPTY && Pe [i] < n) ; + if (Nv [i] > 0) + { + /* this is an element */ + e = i ; + AMD_DEBUG2 ((" element, size is "ID"\n", Elen [i])) ; + ASSERT (Elen [e] > 0) ; + } + AMD_DEBUG2 (("\n")) ; + } + AMD_DEBUG2 (("\nelements:\n")) ; + for (e = 0 ; e < n ; e++) + { + if (Nv [e] > 0) + { + AMD_DEBUG3 (("Element e= "ID" size "ID" nv "ID" \n", e, + Elen [e], Nv [e])) ; + } + } + AMD_DEBUG2 (("\nvariables:\n")) ; + for (i = 0 ; i < n ; i++) + { + Int cnt ; + if (Nv [i] == 0) + { + AMD_DEBUG3 (("i unordered: "ID"\n", i)) ; + j = Pe [i] ; + cnt = 0 ; + AMD_DEBUG3 ((" j: "ID"\n", j)) ; + if (j == EMPTY) + { + AMD_DEBUG3 ((" i is a dense variable\n")) ; + } + else + { + ASSERT (j >= 0 && j < n) ; + while (Nv [j] == 0) + { + AMD_DEBUG3 ((" j : "ID"\n", j)) ; + j = Pe [j] ; + AMD_DEBUG3 ((" j:: "ID"\n", j)) ; + cnt++ ; + if (cnt > n) break ; + } + e = j ; + AMD_DEBUG3 ((" got to e: "ID"\n", e)) ; + } + } + } +#endif + +/* ========================================================================= */ +/* compress the paths of the variables */ +/* ========================================================================= */ + + for (i = 0 ; i < n ; i++) + { + if (Nv [i] == 0) + { + + /* ------------------------------------------------------------- + * i is an un-ordered row. Traverse the tree from i until + * reaching an element, e. The element, e, was the principal + * supervariable of i and all nodes in the path from i to when e + * was selected as pivot. + * ------------------------------------------------------------- */ + + AMD_DEBUG1 (("Path compression, i unordered: "ID"\n", i)) ; + j = Pe [i] ; + ASSERT (j >= EMPTY && j < n) ; + AMD_DEBUG3 ((" j: "ID"\n", j)) ; + if (j == EMPTY) + { + /* Skip a dense variable. It has no parent. */ + AMD_DEBUG3 ((" i is a dense variable\n")) ; + continue ; + } + + /* while (j is a variable) */ + while (Nv [j] == 0) + { + AMD_DEBUG3 ((" j : "ID"\n", j)) ; + j = Pe [j] ; + AMD_DEBUG3 ((" j:: "ID"\n", j)) ; + ASSERT (j >= 0 && j < n) ; + } + /* got to an element e */ + e = j ; + AMD_DEBUG3 (("got to e: "ID"\n", e)) ; + + /* ------------------------------------------------------------- + * traverse the path again from i to e, and compress the path + * (all nodes point to e). Path compression allows this code to + * compute in O(n) time. + * ------------------------------------------------------------- */ + + j = i ; + /* while (j is a variable) */ + while (Nv [j] == 0) + { + jnext = Pe [j] ; + AMD_DEBUG3 (("j "ID" jnext "ID"\n", j, jnext)) ; + Pe [j] = e ; + j = jnext ; + ASSERT (j >= 0 && j < n) ; + } + } + } + +/* ========================================================================= */ +/* postorder the assembly tree */ +/* ========================================================================= */ + + AMD_postorder (n, Pe, Nv, Elen, + W, /* output order */ + Head, Next, Last) ; /* workspace */ + +/* ========================================================================= */ +/* compute output permutation and inverse permutation */ +/* ========================================================================= */ + + /* W [e] = k means that element e is the kth element in the new + * order. e is in the range 0 to n-1, and k is in the range 0 to + * the number of elements. Use Head for inverse order. */ + + for (k = 0 ; k < n ; k++) + { + Head [k] = EMPTY ; + Next [k] = EMPTY ; + } + for (e = 0 ; e < n ; e++) + { + k = W [e] ; + ASSERT ((k == EMPTY) == (Nv [e] == 0)) ; + if (k != EMPTY) + { + ASSERT (k >= 0 && k < n) ; + Head [k] = e ; + } + } + + /* construct output inverse permutation in Next, + * and permutation in Last */ + nel = 0 ; + for (k = 0 ; k < n ; k++) + { + e = Head [k] ; + if (e == EMPTY) break ; + ASSERT (e >= 0 && e < n && Nv [e] > 0) ; + Next [e] = nel ; + nel += Nv [e] ; + } + ASSERT (nel == n - ndense) ; + + /* order non-principal variables (dense, & those merged into supervar's) */ + for (i = 0 ; i < n ; i++) + { + if (Nv [i] == 0) + { + e = Pe [i] ; + ASSERT (e >= EMPTY && e < n) ; + if (e != EMPTY) + { + /* This is an unordered variable that was merged + * into element e via supernode detection or mass + * elimination of i when e became the pivot element. + * Place i in order just before e. */ + ASSERT (Next [i] == EMPTY && Nv [e] > 0) ; + Next [i] = Next [e] ; + Next [e]++ ; + } + else + { + /* This is a dense unordered variable, with no parent. + * Place it last in the output order. */ + Next [i] = nel++ ; + } + } + } + ASSERT (nel == n) ; + + AMD_DEBUG2 (("\n\nPerm:\n")) ; + for (i = 0 ; i < n ; i++) + { + k = Next [i] ; + ASSERT (k >= 0 && k < n) ; + Last [k] = i ; + AMD_DEBUG2 ((" perm ["ID"] = "ID"\n", k, i)) ; + } +} diff --git a/src/maths/KLU/amd_aat.c b/src/maths/KLU/amd_aat.c new file mode 100644 index 000000000..4f02b755f --- /dev/null +++ b/src/maths/KLU/amd_aat.c @@ -0,0 +1,185 @@ +/* ========================================================================= */ +/* === AMD_aat ============================================================= */ +/* ========================================================================= */ + +/* ------------------------------------------------------------------------- */ +/* AMD, Copyright (c) Timothy A. Davis, */ +/* Patrick R. Amestoy, and Iain S. Duff. See ../README.txt for License. */ +/* email: davis at cise.ufl.edu CISE Department, Univ. of Florida. */ +/* web: http://www.cise.ufl.edu/research/sparse/amd */ +/* ------------------------------------------------------------------------- */ + +/* AMD_aat: compute the symmetry of the pattern of A, and count the number of + * nonzeros each column of A+A' (excluding the diagonal). Assumes the input + * matrix has no errors, with sorted columns and no duplicates + * (AMD_valid (n, n, Ap, Ai) must be AMD_OK, but this condition is not + * checked). + */ + +#include "amd_internal.h" + +GLOBAL size_t AMD_aat /* returns nz in A+A' */ +( + Int n, + const Int Ap [ ], + const Int Ai [ ], + Int Len [ ], /* Len [j]: length of column j of A+A', excl diagonal*/ + Int Tp [ ], /* workspace of size n */ + double Info [ ] +) +{ + Int p1, p2, p, i, j, pj, pj2, k, nzdiag, nzboth, nz ; + double sym ; + size_t nzaat ; + +#ifndef NDEBUG + AMD_debug_init ("AMD AAT") ; + for (k = 0 ; k < n ; k++) Tp [k] = EMPTY ; + ASSERT (AMD_valid (n, n, Ap, Ai) == AMD_OK) ; +#endif + + if (Info != (double *) NULL) + { + /* clear the Info array, if it exists */ + for (i = 0 ; i < AMD_INFO ; i++) + { + Info [i] = EMPTY ; + } + Info [AMD_STATUS] = AMD_OK ; + } + + for (k = 0 ; k < n ; k++) + { + Len [k] = 0 ; + } + + nzdiag = 0 ; + nzboth = 0 ; + nz = Ap [n] ; + + for (k = 0 ; k < n ; k++) + { + p1 = Ap [k] ; + p2 = Ap [k+1] ; + AMD_DEBUG2 (("\nAAT Column: "ID" p1: "ID" p2: "ID"\n", k, p1, p2)) ; + + /* construct A+A' */ + for (p = p1 ; p < p2 ; ) + { + /* scan the upper triangular part of A */ + j = Ai [p] ; + if (j < k) + { + /* entry A (j,k) is in the strictly upper triangular part, + * add both A (j,k) and A (k,j) to the matrix A+A' */ + Len [j]++ ; + Len [k]++ ; + AMD_DEBUG3 ((" upper ("ID","ID") ("ID","ID")\n", j,k, k,j)); + p++ ; + } + else if (j == k) + { + /* skip the diagonal */ + p++ ; + nzdiag++ ; + break ; + } + else /* j > k */ + { + /* first entry below the diagonal */ + break ; + } + /* scan lower triangular part of A, in column j until reaching + * row k. Start where last scan left off. */ + ASSERT (Tp [j] != EMPTY) ; + ASSERT (Ap [j] <= Tp [j] && Tp [j] <= Ap [j+1]) ; + pj2 = Ap [j+1] ; + for (pj = Tp [j] ; pj < pj2 ; ) + { + i = Ai [pj] ; + if (i < k) + { + /* A (i,j) is only in the lower part, not in upper. + * add both A (i,j) and A (j,i) to the matrix A+A' */ + Len [i]++ ; + Len [j]++ ; + AMD_DEBUG3 ((" lower ("ID","ID") ("ID","ID")\n", + i,j, j,i)) ; + pj++ ; + } + else if (i == k) + { + /* entry A (k,j) in lower part and A (j,k) in upper */ + pj++ ; + nzboth++ ; + break ; + } + else /* i > k */ + { + /* consider this entry later, when k advances to i */ + break ; + } + } + Tp [j] = pj ; + } + /* Tp [k] points to the entry just below the diagonal in column k */ + Tp [k] = p ; + } + + /* clean up, for remaining mismatched entries */ + for (j = 0 ; j < n ; j++) + { + for (pj = Tp [j] ; pj < Ap [j+1] ; pj++) + { + i = Ai [pj] ; + /* A (i,j) is only in the lower part, not in upper. + * add both A (i,j) and A (j,i) to the matrix A+A' */ + Len [i]++ ; + Len [j]++ ; + AMD_DEBUG3 ((" lower cleanup ("ID","ID") ("ID","ID")\n", + i,j, j,i)) ; + } + } + + /* --------------------------------------------------------------------- */ + /* compute the symmetry of the nonzero pattern of A */ + /* --------------------------------------------------------------------- */ + + /* Given a matrix A, the symmetry of A is: + * B = tril (spones (A), -1) + triu (spones (A), 1) ; + * sym = nnz (B & B') / nnz (B) ; + * or 1 if nnz (B) is zero. + */ + + if (nz == nzdiag) + { + sym = 1 ; + } + else + { + sym = (2 * (double) nzboth) / ((double) (nz - nzdiag)) ; + } + + nzaat = 0 ; + for (k = 0 ; k < n ; k++) + { + nzaat += Len [k] ; + } + + AMD_DEBUG1 (("AMD nz in A+A', excluding diagonal (nzaat) = %g\n", + (double) nzaat)) ; + AMD_DEBUG1 ((" nzboth: "ID" nz: "ID" nzdiag: "ID" symmetry: %g\n", + nzboth, nz, nzdiag, sym)) ; + + if (Info != (double *) NULL) + { + Info [AMD_STATUS] = AMD_OK ; + Info [AMD_N] = n ; + Info [AMD_NZ] = nz ; + Info [AMD_SYMMETRY] = sym ; /* symmetry of pattern of A */ + Info [AMD_NZDIAG] = nzdiag ; /* nonzeros on diagonal of A */ + Info [AMD_NZ_A_PLUS_AT] = nzaat ; /* nonzeros in A+A' */ + } + + return (nzaat) ; +} diff --git a/src/maths/KLU/amd_control.c b/src/maths/KLU/amd_control.c new file mode 100644 index 000000000..c2aec9f0b --- /dev/null +++ b/src/maths/KLU/amd_control.c @@ -0,0 +1,64 @@ +/* ========================================================================= */ +/* === AMD_control ========================================================= */ +/* ========================================================================= */ + +/* ------------------------------------------------------------------------- */ +/* AMD, Copyright (c) Timothy A. Davis, */ +/* Patrick R. Amestoy, and Iain S. Duff. See ../README.txt for License. */ +/* email: davis at cise.ufl.edu CISE Department, Univ. of Florida. */ +/* web: http://www.cise.ufl.edu/research/sparse/amd */ +/* ------------------------------------------------------------------------- */ + +/* User-callable. Prints the control parameters for AMD. See amd.h + * for details. If the Control array is not present, the defaults are + * printed instead. + */ + +#include "amd_internal.h" + +GLOBAL void AMD_control +( + double Control [ ] +) +{ + double alpha ; + Int aggressive ; + + if (Control != (double *) NULL) + { + alpha = Control [AMD_DENSE] ; + aggressive = Control [AMD_AGGRESSIVE] != 0 ; + } + else + { + alpha = AMD_DEFAULT_DENSE ; + aggressive = AMD_DEFAULT_AGGRESSIVE ; + } + + PRINTF (("\nAMD version %d.%d.%d, %s: approximate minimum degree ordering\n" + " dense row parameter: %g\n", AMD_MAIN_VERSION, AMD_SUB_VERSION, + AMD_SUBSUB_VERSION, AMD_DATE, alpha)) ; + + if (alpha < 0) + { + PRINTF ((" no rows treated as dense\n")) ; + } + else + { + PRINTF (( + " (rows with more than max (%g * sqrt (n), 16) entries are\n" + " considered \"dense\", and placed last in output permutation)\n", + alpha)) ; + } + + if (aggressive) + { + PRINTF ((" aggressive absorption: yes\n")) ; + } + else + { + PRINTF ((" aggressive absorption: no\n")) ; + } + + PRINTF ((" size of AMD integer: %d\n\n", sizeof (Int))) ; +} diff --git a/src/maths/KLU/amd_defaults.c b/src/maths/KLU/amd_defaults.c new file mode 100644 index 000000000..ffe3f4bd5 --- /dev/null +++ b/src/maths/KLU/amd_defaults.c @@ -0,0 +1,38 @@ +/* ========================================================================= */ +/* === AMD_defaults ======================================================== */ +/* ========================================================================= */ + +/* ------------------------------------------------------------------------- */ +/* AMD, Copyright (c) Timothy A. Davis, */ +/* Patrick R. Amestoy, and Iain S. Duff. See ../README.txt for License. */ +/* email: davis at cise.ufl.edu CISE Department, Univ. of Florida. */ +/* web: http://www.cise.ufl.edu/research/sparse/amd */ +/* ------------------------------------------------------------------------- */ + +/* User-callable. Sets default control parameters for AMD. See amd.h + * for details. + */ + +#include "amd_internal.h" + +/* ========================================================================= */ +/* === AMD defaults ======================================================== */ +/* ========================================================================= */ + +GLOBAL void AMD_defaults +( + double Control [ ] +) +{ + Int i ; + + if (Control != (double *) NULL) + { + for (i = 0 ; i < AMD_CONTROL ; i++) + { + Control [i] = 0 ; + } + Control [AMD_DENSE] = AMD_DEFAULT_DENSE ; + Control [AMD_AGGRESSIVE] = AMD_DEFAULT_AGGRESSIVE ; + } +} diff --git a/src/maths/KLU/amd_dump.c b/src/maths/KLU/amd_dump.c new file mode 100644 index 000000000..89d67b875 --- /dev/null +++ b/src/maths/KLU/amd_dump.c @@ -0,0 +1,180 @@ +/* ========================================================================= */ +/* === AMD_dump ============================================================ */ +/* ========================================================================= */ + +/* ------------------------------------------------------------------------- */ +/* AMD, Copyright (c) Timothy A. Davis, */ +/* Patrick R. Amestoy, and Iain S. Duff. See ../README.txt for License. */ +/* email: davis at cise.ufl.edu CISE Department, Univ. of Florida. */ +/* web: http://www.cise.ufl.edu/research/sparse/amd */ +/* ------------------------------------------------------------------------- */ + +/* Debugging routines for AMD. Not used if NDEBUG is not defined at compile- + * time (the default). See comments in amd_internal.h on how to enable + * debugging. Not user-callable. + */ + +#include "amd_internal.h" + +#ifndef NDEBUG + +/* This global variable is present only when debugging */ +GLOBAL Int AMD_debug = -999 ; /* default is no debug printing */ + +/* ========================================================================= */ +/* === AMD_debug_init ====================================================== */ +/* ========================================================================= */ + +/* Sets the debug print level, by reading the file debug.amd (if it exists) */ + +GLOBAL void AMD_debug_init ( char *s ) +{ + FILE *f ; + f = fopen ("debug.amd", "r") ; + if (f == (FILE *) NULL) + { + AMD_debug = -999 ; + } + else + { + fscanf (f, ID, &AMD_debug) ; + fclose (f) ; + } + if (AMD_debug >= 0) + { + printf ("%s: AMD_debug_init, D= "ID"\n", s, AMD_debug) ; + } +} + +/* ========================================================================= */ +/* === AMD_dump ============================================================ */ +/* ========================================================================= */ + +/* Dump AMD's data structure, except for the hash buckets. This routine + * cannot be called when the hash buckets are non-empty. + */ + +GLOBAL void AMD_dump ( + Int n, /* A is n-by-n */ + Int Pe [ ], /* pe [0..n-1]: index in iw of start of row i */ + Int Iw [ ], /* workspace of size iwlen, iwlen [0..pfree-1] + * holds the matrix on input */ + Int Len [ ], /* len [0..n-1]: length for row i */ + Int iwlen, /* length of iw */ + Int pfree, /* iw [pfree ... iwlen-1] is empty on input */ + Int Nv [ ], /* nv [0..n-1] */ + Int Next [ ], /* next [0..n-1] */ + Int Last [ ], /* last [0..n-1] */ + Int Head [ ], /* head [0..n-1] */ + Int Elen [ ], /* size n */ + Int Degree [ ], /* size n */ + Int W [ ], /* size n */ + Int nel +) +{ + Int i, pe, elen, nv, len, e, p, k, j, deg, w, cnt, ilast ; + + if (AMD_debug < 0) return ; + ASSERT (pfree <= iwlen) ; + AMD_DEBUG3 (("\nAMD dump, pfree: "ID"\n", pfree)) ; + for (i = 0 ; i < n ; i++) + { + pe = Pe [i] ; + elen = Elen [i] ; + nv = Nv [i] ; + len = Len [i] ; + w = W [i] ; + + if (elen >= EMPTY) + { + if (nv == 0) + { + AMD_DEBUG3 (("\nI "ID": nonprincipal: ", i)) ; + ASSERT (elen == EMPTY) ; + if (pe == EMPTY) + { + AMD_DEBUG3 ((" dense node\n")) ; + ASSERT (w == 1) ; + } + else + { + ASSERT (pe < EMPTY) ; + AMD_DEBUG3 ((" i "ID" -> parent "ID"\n", i, FLIP (Pe[i]))); + } + } + else + { + AMD_DEBUG3 (("\nI "ID": active principal supervariable:\n",i)); + AMD_DEBUG3 ((" nv(i): "ID" Flag: %d\n", nv, (nv < 0))) ; + ASSERT (elen >= 0) ; + ASSERT (nv > 0 && pe >= 0) ; + p = pe ; + AMD_DEBUG3 ((" e/s: ")) ; + if (elen == 0) AMD_DEBUG3 ((" : ")) ; + ASSERT (pe + len <= pfree) ; + for (k = 0 ; k < len ; k++) + { + j = Iw [p] ; + AMD_DEBUG3 ((" "ID"", j)) ; + ASSERT (j >= 0 && j < n) ; + if (k == elen-1) AMD_DEBUG3 ((" : ")) ; + p++ ; + } + AMD_DEBUG3 (("\n")) ; + } + } + else + { + e = i ; + if (w == 0) + { + AMD_DEBUG3 (("\nE "ID": absorbed element: w "ID"\n", e, w)) ; + ASSERT (nv > 0 && pe < 0) ; + AMD_DEBUG3 ((" e "ID" -> parent "ID"\n", e, FLIP (Pe [e]))) ; + } + else + { + AMD_DEBUG3 (("\nE "ID": unabsorbed element: w "ID"\n", e, w)) ; + ASSERT (nv > 0 && pe >= 0) ; + p = pe ; + AMD_DEBUG3 ((" : ")) ; + ASSERT (pe + len <= pfree) ; + for (k = 0 ; k < len ; k++) + { + j = Iw [p] ; + AMD_DEBUG3 ((" "ID"", j)) ; + ASSERT (j >= 0 && j < n) ; + p++ ; + } + AMD_DEBUG3 (("\n")) ; + } + } + } + + /* this routine cannot be called when the hash buckets are non-empty */ + AMD_DEBUG3 (("\nDegree lists:\n")) ; + if (nel >= 0) + { + cnt = 0 ; + for (deg = 0 ; deg < n ; deg++) + { + if (Head [deg] == EMPTY) continue ; + ilast = EMPTY ; + AMD_DEBUG3 ((ID": \n", deg)) ; + for (i = Head [deg] ; i != EMPTY ; i = Next [i]) + { + AMD_DEBUG3 ((" "ID" : next "ID" last "ID" deg "ID"\n", + i, Next [i], Last [i], Degree [i])) ; + ASSERT (i >= 0 && i < n && ilast == Last [i] && + deg == Degree [i]) ; + cnt += Nv [i] ; + ilast = i ; + } + AMD_DEBUG3 (("\n")) ; + } + ASSERT (cnt == n - nel) ; + } + +} + +#endif diff --git a/src/maths/KLU/amd_global.c b/src/maths/KLU/amd_global.c new file mode 100644 index 000000000..93f2b4518 --- /dev/null +++ b/src/maths/KLU/amd_global.c @@ -0,0 +1,84 @@ +/* ========================================================================= */ +/* === amd_global ========================================================== */ +/* ========================================================================= */ + +/* ------------------------------------------------------------------------- */ +/* AMD, Copyright (c) Timothy A. Davis, */ +/* Patrick R. Amestoy, and Iain S. Duff. See ../README.txt for License. */ +/* email: davis at cise.ufl.edu CISE Department, Univ. of Florida. */ +/* web: http://www.cise.ufl.edu/research/sparse/amd */ +/* ------------------------------------------------------------------------- */ + +#include + +#ifdef MATLAB_MEX_FILE +#include "mex.h" +#include "matrix.h" +#endif + +#ifndef NULL +#define NULL 0 +#endif + +/* ========================================================================= */ +/* === Default AMD memory manager ========================================== */ +/* ========================================================================= */ + +/* The user can redefine these global pointers at run-time to change the memory + * manager used by AMD. AMD only uses malloc and free; realloc and calloc are + * include for completeness, in case another package wants to use the same + * memory manager as AMD. + * + * If compiling as a MATLAB mexFunction, the default memory manager is mxMalloc. + * You can also compile AMD as a standard ANSI-C library and link a mexFunction + * against it, and then redefine these pointers at run-time, in your + * mexFunction. + * + * If -DNMALLOC is defined at compile-time, no memory manager is specified at + * compile-time. You must then define these functions at run-time, before + * calling AMD, for AMD to work properly. + */ + +#ifndef NMALLOC +#ifdef MATLAB_MEX_FILE +/* MATLAB mexFunction: */ +void *(*amd_malloc) (size_t) = mxMalloc ; +void (*amd_free) (void *) = mxFree ; +void *(*amd_realloc) (void *, size_t) = mxRealloc ; +void *(*amd_calloc) (size_t, size_t) = mxCalloc ; +#else +/* standard ANSI-C: */ +void *(*amd_malloc) (size_t) = malloc ; +void (*amd_free) (void *) = free ; +void *(*amd_realloc) (void *, size_t) = realloc ; +void *(*amd_calloc) (size_t, size_t) = calloc ; +#endif +#else +/* no memory manager defined at compile-time; you MUST define one at run-time */ +void *(*amd_malloc) (size_t) = NULL ; +void (*amd_free) (void *) = NULL ; +void *(*amd_realloc) (void *, size_t) = NULL ; +void *(*amd_calloc) (size_t, size_t) = NULL ; +#endif + +/* ========================================================================= */ +/* === Default AMD printf routine ========================================== */ +/* ========================================================================= */ + +/* The user can redefine this global pointer at run-time to change the printf + * routine used by AMD. If NULL, no printing occurs. + * + * If -DNPRINT is defined at compile-time, stdio.h is not included. Printing + * can then be enabled at run-time by setting amd_printf to a non-NULL function. + */ + +#ifndef NPRINT +#ifdef MATLAB_MEX_FILE +int (*amd_printf) (const char *, ...) = mexPrintf ; +#else +#include +int (*amd_printf) (const char *, ...) = printf ; +#endif +#else +int (*amd_printf) (const char *, ...) = NULL ; +#endif diff --git a/src/maths/KLU/amd_info.c b/src/maths/KLU/amd_info.c new file mode 100644 index 000000000..0a842adf0 --- /dev/null +++ b/src/maths/KLU/amd_info.c @@ -0,0 +1,120 @@ +/* ========================================================================= */ +/* === AMD_info ============================================================ */ +/* ========================================================================= */ + +/* ------------------------------------------------------------------------- */ +/* AMD, Copyright (c) Timothy A. Davis, */ +/* Patrick R. Amestoy, and Iain S. Duff. See ../README.txt for License. */ +/* email: davis at cise.ufl.edu CISE Department, Univ. of Florida. */ +/* web: http://www.cise.ufl.edu/research/sparse/amd */ +/* ------------------------------------------------------------------------- */ + +/* User-callable. Prints the output statistics for AMD. See amd.h + * for details. If the Info array is not present, nothing is printed. + */ + +#include "amd_internal.h" + +#define PRI(format,x) { if (x >= 0) { PRINTF ((format, x)) ; }} + +GLOBAL void AMD_info +( + double Info [ ] +) +{ + double n, ndiv, nmultsubs_ldl, nmultsubs_lu, lnz, lnzd ; + + PRINTF (("\nAMD version %d.%d.%d, %s, results:\n", + AMD_MAIN_VERSION, AMD_SUB_VERSION, AMD_SUBSUB_VERSION, AMD_DATE)) ; + + if (!Info) + { + return ; + } + + n = Info [AMD_N] ; + ndiv = Info [AMD_NDIV] ; + nmultsubs_ldl = Info [AMD_NMULTSUBS_LDL] ; + nmultsubs_lu = Info [AMD_NMULTSUBS_LU] ; + lnz = Info [AMD_LNZ] ; + lnzd = (n >= 0 && lnz >= 0) ? (n + lnz) : (-1) ; + + /* AMD return status */ + PRINTF ((" status: ")) ; + if (Info [AMD_STATUS] == AMD_OK) + { + PRINTF (("OK\n")) ; + } + else if (Info [AMD_STATUS] == AMD_OUT_OF_MEMORY) + { + PRINTF (("out of memory\n")) ; + } + else if (Info [AMD_STATUS] == AMD_INVALID) + { + PRINTF (("invalid matrix\n")) ; + } + else if (Info [AMD_STATUS] == AMD_OK_BUT_JUMBLED) + { + PRINTF (("OK, but jumbled\n")) ; + } + else + { + PRINTF (("unknown\n")) ; + } + + /* statistics about the input matrix */ + PRI (" n, dimension of A: %.20g\n", n); + PRI (" nz, number of nonzeros in A: %.20g\n", + Info [AMD_NZ]) ; + PRI (" symmetry of A: %.4f\n", + Info [AMD_SYMMETRY]) ; + PRI (" number of nonzeros on diagonal: %.20g\n", + Info [AMD_NZDIAG]) ; + PRI (" nonzeros in pattern of A+A' (excl. diagonal): %.20g\n", + Info [AMD_NZ_A_PLUS_AT]) ; + PRI (" # dense rows/columns of A+A': %.20g\n", + Info [AMD_NDENSE]) ; + + /* statistics about AMD's behavior */ + PRI (" memory used, in bytes: %.20g\n", + Info [AMD_MEMORY]) ; + PRI (" # of memory compactions: %.20g\n", + Info [AMD_NCMPA]) ; + + /* statistics about the ordering quality */ + PRINTF (("\n" + " The following approximate statistics are for a subsequent\n" + " factorization of A(P,P) + A(P,P)'. They are slight upper\n" + " bounds if there are no dense rows/columns in A+A', and become\n" + " looser if dense rows/columns exist.\n\n")) ; + + PRI (" nonzeros in L (excluding diagonal): %.20g\n", + lnz) ; + PRI (" nonzeros in L (including diagonal): %.20g\n", + lnzd) ; + PRI (" # divide operations for LDL' or LU: %.20g\n", + ndiv) ; + PRI (" # multiply-subtract operations for LDL': %.20g\n", + nmultsubs_ldl) ; + PRI (" # multiply-subtract operations for LU: %.20g\n", + nmultsubs_lu) ; + PRI (" max nz. in any column of L (incl. diagonal): %.20g\n", + Info [AMD_DMAX]) ; + + /* total flop counts for various factorizations */ + + if (n >= 0 && ndiv >= 0 && nmultsubs_ldl >= 0 && nmultsubs_lu >= 0) + { + PRINTF (("\n" + " chol flop count for real A, sqrt counted as 1 flop: %.20g\n" + " LDL' flop count for real A: %.20g\n" + " LDL' flop count for complex A: %.20g\n" + " LU flop count for real A (with no pivoting): %.20g\n" + " LU flop count for complex A (with no pivoting): %.20g\n\n", + n + ndiv + 2*nmultsubs_ldl, + ndiv + 2*nmultsubs_ldl, + 9*ndiv + 8*nmultsubs_ldl, + ndiv + 2*nmultsubs_lu, + 9*ndiv + 8*nmultsubs_lu)) ; + } +} diff --git a/src/maths/KLU/amd_internal.h b/src/maths/KLU/amd_internal.h new file mode 100644 index 000000000..750aee82b --- /dev/null +++ b/src/maths/KLU/amd_internal.h @@ -0,0 +1,350 @@ +/* ========================================================================= */ +/* === amd_internal.h ====================================================== */ +/* ========================================================================= */ + +/* ------------------------------------------------------------------------- */ +/* AMD, Copyright (c) Timothy A. Davis, */ +/* Patrick R. Amestoy, and Iain S. Duff. See ../README.txt for License. */ +/* email: davis at cise.ufl.edu CISE Department, Univ. of Florida. */ +/* web: http://www.cise.ufl.edu/research/sparse/amd */ +/* ------------------------------------------------------------------------- */ + +/* This file is for internal use in AMD itself, and does not normally need to + * be included in user code (it is included in UMFPACK, however). All others + * should use amd.h instead. + * + * The following compile-time definitions affect how AMD is compiled. + * + * -DNPRINT + * + * Disable all printing. stdio.h will not be included. Printing can + * be re-enabled at run-time by setting the global pointer amd_printf + * to printf (or mexPrintf for a MATLAB mexFunction). + * + * -DNMALLOC + * + * No memory manager is defined at compile-time. You MUST define the + * function pointers amd_malloc, amd_free, amd_realloc, and + * amd_calloc at run-time for AMD to work properly. + */ + +/* ========================================================================= */ +/* === NDEBUG ============================================================== */ +/* ========================================================================= */ + +/* + * Turning on debugging takes some work (see below). If you do not edit this + * file, then debugging is always turned off, regardless of whether or not + * -DNDEBUG is specified in your compiler options. + * + * If AMD is being compiled as a mexFunction, then MATLAB_MEX_FILE is defined, + * and mxAssert is used instead of assert. If debugging is not enabled, no + * MATLAB include files or functions are used. Thus, the AMD library libamd.a + * can be safely used in either a stand-alone C program or in another + * mexFunction, without any change. + */ + +/* + AMD will be exceedingly slow when running in debug mode. The next three + lines ensure that debugging is turned off. +*/ +#ifndef NDEBUG +#define NDEBUG +#endif + +/* + To enable debugging, uncomment the following line: +#undef NDEBUG +*/ + +/* ------------------------------------------------------------------------- */ +/* ANSI include files */ +/* ------------------------------------------------------------------------- */ + +/* from stdlib.h: size_t, malloc, free, realloc, and calloc */ +#include + +#if !defined(NPRINT) || !defined(NDEBUG) +/* from stdio.h: printf. Not included if NPRINT is defined at compile time. + * fopen and fscanf are used when debugging. */ +#include +#endif + +/* from limits.h: INT_MAX and LONG_MAX */ +#include + +/* from math.h: sqrt */ +#include + +/* ------------------------------------------------------------------------- */ +/* MATLAB include files (only if being used in or via MATLAB) */ +/* ------------------------------------------------------------------------- */ + +#ifdef MATLAB_MEX_FILE +#include "matrix.h" +#include "mex.h" +#endif + +/* ------------------------------------------------------------------------- */ +/* basic definitions */ +/* ------------------------------------------------------------------------- */ + +#ifdef FLIP +#undef FLIP +#endif + +#ifdef MAX +#undef MAX +#endif + +#ifdef MIN +#undef MIN +#endif + +#ifdef EMPTY +#undef EMPTY +#endif + +#ifdef GLOBAL +#undef GLOBAL +#endif + +#ifdef PRIVATE +#undef PRIVATE +#endif + +/* FLIP is a "negation about -1", and is used to mark an integer i that is + * normally non-negative. FLIP (EMPTY) is EMPTY. FLIP of a number > EMPTY + * is negative, and FLIP of a number < EMTPY is positive. FLIP (FLIP (i)) = i + * for all integers i. UNFLIP (i) is >= EMPTY. */ +#define EMPTY (-1) +#define FLIP(i) (-(i)-2) +#define UNFLIP(i) ((i < EMPTY) ? FLIP (i) : (i)) + +/* for integer MAX/MIN, or for doubles when we don't care how NaN's behave: */ +#define MAX(a,b) (((a) > (b)) ? (a) : (b)) +#define MIN(a,b) (((a) < (b)) ? (a) : (b)) + +/* logical expression of p implies q: */ +#define IMPLIES(p,q) (!(p) || (q)) + +/* Note that the IBM RS 6000 xlc predefines TRUE and FALSE in . */ +/* The Compaq Alpha also predefines TRUE and FALSE. */ +#ifdef TRUE +#undef TRUE +#endif +#ifdef FALSE +#undef FALSE +#endif + +#define TRUE (1) +#define FALSE (0) +#define PRIVATE static +#define GLOBAL +#define EMPTY (-1) + +/* Note that Linux's gcc 2.96 defines NULL as ((void *) 0), but other */ +/* compilers (even gcc 2.95.2 on Solaris) define NULL as 0 or (0). We */ +/* need to use the ANSI standard value of 0. */ +#ifdef NULL +#undef NULL +#endif + +#define NULL 0 + +/* largest value of size_t */ +#ifndef SIZE_T_MAX +#define SIZE_T_MAX ((size_t) (-1)) +#endif + +/* ------------------------------------------------------------------------- */ +/* integer type for AMD: int or UF_long */ +/* ------------------------------------------------------------------------- */ + +/* define UF_long */ +#include "UFconfig.h" + +#if defined (DLONG) || defined (ZLONG) + +#define Int UF_long +#define ID UF_long_id +#define Int_MAX UF_long_max + +#define AMD_order amd_l_order +#define AMD_defaults amd_l_defaults +#define AMD_control amd_l_control +#define AMD_info amd_l_info +#define AMD_1 amd_l1 +#define AMD_2 amd_l2 +#define AMD_valid amd_l_valid +#define AMD_aat amd_l_aat +#define AMD_postorder amd_l_postorder +#define AMD_post_tree amd_l_post_tree +#define AMD_dump amd_l_dump +#define AMD_debug amd_l_debug +#define AMD_debug_init amd_l_debug_init +#define AMD_preprocess amd_l_preprocess + +#else + +#define Int int +#define ID "%d" +#define Int_MAX INT_MAX + +#define AMD_order amd_order +#define AMD_defaults amd_defaults +#define AMD_control amd_control +#define AMD_info amd_info +#define AMD_1 amd_1 +#define AMD_2 amd_2 +#define AMD_valid amd_valid +#define AMD_aat amd_aat +#define AMD_postorder amd_postorder +#define AMD_post_tree amd_post_tree +#define AMD_dump amd_dump +#define AMD_debug amd_debug +#define AMD_debug_init amd_debug_init +#define AMD_preprocess amd_preprocess + +#endif + +/* ========================================================================= */ +/* === PRINTF macro ======================================================== */ +/* ========================================================================= */ + +/* All output goes through the PRINTF macro. */ +#define PRINTF(params) { if (amd_printf != NULL) (void) amd_printf params ; } + +/* ------------------------------------------------------------------------- */ +/* AMD routine definitions (user-callable) */ +/* ------------------------------------------------------------------------- */ + +#include "amd.h" + +/* ------------------------------------------------------------------------- */ +/* AMD routine definitions (not user-callable) */ +/* ------------------------------------------------------------------------- */ + +GLOBAL size_t AMD_aat +( + Int n, + const Int Ap [ ], + const Int Ai [ ], + Int Len [ ], + Int Tp [ ], + double Info [ ] +) ; + +GLOBAL void AMD_1 +( + Int n, + const Int Ap [ ], + const Int Ai [ ], + Int P [ ], + Int Pinv [ ], + Int Len [ ], + Int slen, + Int S [ ], + double Control [ ], + double Info [ ] +) ; + +GLOBAL void AMD_postorder +( + Int nn, + Int Parent [ ], + Int Npiv [ ], + Int Fsize [ ], + Int Order [ ], + Int Child [ ], + Int Sibling [ ], + Int Stack [ ] +) ; + +GLOBAL Int AMD_post_tree +( + Int root, + Int k, + Int Child [ ], + const Int Sibling [ ], + Int Order [ ], + Int Stack [ ] +#ifndef NDEBUG + , Int nn +#endif +) ; + +GLOBAL void AMD_preprocess +( + Int n, + const Int Ap [ ], + const Int Ai [ ], + Int Rp [ ], + Int Ri [ ], + Int W [ ], + Int Flag [ ] +) ; + +/* ------------------------------------------------------------------------- */ +/* debugging definitions */ +/* ------------------------------------------------------------------------- */ + +#ifndef NDEBUG + +/* from assert.h: assert macro */ +#include + +#ifndef EXTERN +#define EXTERN extern +#endif + +EXTERN Int AMD_debug ; + +GLOBAL void AMD_debug_init ( char *s ) ; + +GLOBAL void AMD_dump +( + Int n, + Int Pe [ ], + Int Iw [ ], + Int Len [ ], + Int iwlen, + Int pfree, + Int Nv [ ], + Int Next [ ], + Int Last [ ], + Int Head [ ], + Int Elen [ ], + Int Degree [ ], + Int W [ ], + Int nel +) ; + +#ifdef ASSERT +#undef ASSERT +#endif + +/* Use mxAssert if AMD is compiled into a mexFunction */ +#ifdef MATLAB_MEX_FILE +#define ASSERT(expression) (mxAssert ((expression), "")) +#else +#define ASSERT(expression) (assert (expression)) +#endif + +#define AMD_DEBUG0(params) { PRINTF (params) ; } +#define AMD_DEBUG1(params) { if (AMD_debug >= 1) PRINTF (params) ; } +#define AMD_DEBUG2(params) { if (AMD_debug >= 2) PRINTF (params) ; } +#define AMD_DEBUG3(params) { if (AMD_debug >= 3) PRINTF (params) ; } +#define AMD_DEBUG4(params) { if (AMD_debug >= 4) PRINTF (params) ; } + +#else + +/* no debugging */ +#define ASSERT(expression) +#define AMD_DEBUG0(params) +#define AMD_DEBUG1(params) +#define AMD_DEBUG2(params) +#define AMD_DEBUG3(params) +#define AMD_DEBUG4(params) + +#endif diff --git a/src/maths/KLU/amd_order.c b/src/maths/KLU/amd_order.c new file mode 100644 index 000000000..d3f6853d1 --- /dev/null +++ b/src/maths/KLU/amd_order.c @@ -0,0 +1,200 @@ +/* ========================================================================= */ +/* === AMD_order =========================================================== */ +/* ========================================================================= */ + +/* ------------------------------------------------------------------------- */ +/* AMD, Copyright (c) Timothy A. Davis, */ +/* Patrick R. Amestoy, and Iain S. Duff. See ../README.txt for License. */ +/* email: davis at cise.ufl.edu CISE Department, Univ. of Florida. */ +/* web: http://www.cise.ufl.edu/research/sparse/amd */ +/* ------------------------------------------------------------------------- */ + +/* User-callable AMD minimum degree ordering routine. See amd.h for + * documentation. + */ + +#include "amd_internal.h" + +/* ========================================================================= */ +/* === AMD_order =========================================================== */ +/* ========================================================================= */ + +GLOBAL Int AMD_order +( + Int n, + const Int Ap [ ], + const Int Ai [ ], + Int P [ ], + double Control [ ], + double Info [ ] +) +{ + Int *Len, *S, nz, i, *Pinv, info, status, *Rp, *Ri, *Cp, *Ci, ok ; + size_t nzaat, slen ; + double mem = 0 ; + +#ifndef NDEBUG + AMD_debug_init ("amd") ; +#endif + + /* clear the Info array, if it exists */ + info = Info != (double *) NULL ; + if (info) + { + for (i = 0 ; i < AMD_INFO ; i++) + { + Info [i] = EMPTY ; + } + Info [AMD_N] = n ; + Info [AMD_STATUS] = AMD_OK ; + } + + /* make sure inputs exist and n is >= 0 */ + if (Ai == (Int *) NULL || Ap == (Int *) NULL || P == (Int *) NULL || n < 0) + { + if (info) Info [AMD_STATUS] = AMD_INVALID ; + return (AMD_INVALID) ; /* arguments are invalid */ + } + + if (n == 0) + { + return (AMD_OK) ; /* n is 0 so there's nothing to do */ + } + + nz = Ap [n] ; + if (info) + { + Info [AMD_NZ] = nz ; + } + if (nz < 0) + { + if (info) Info [AMD_STATUS] = AMD_INVALID ; + return (AMD_INVALID) ; + } + + /* check if n or nz will cause size_t overflow */ + if (((size_t) n) >= SIZE_T_MAX / sizeof (Int) + || ((size_t) nz) >= SIZE_T_MAX / sizeof (Int)) + { + if (info) Info [AMD_STATUS] = AMD_OUT_OF_MEMORY ; + return (AMD_OUT_OF_MEMORY) ; /* problem too large */ + } + + /* check the input matrix: AMD_OK, AMD_INVALID, or AMD_OK_BUT_JUMBLED */ + status = AMD_valid (n, n, Ap, Ai) ; + + if (status == AMD_INVALID) + { + if (info) Info [AMD_STATUS] = AMD_INVALID ; + return (AMD_INVALID) ; /* matrix is invalid */ + } + + /* allocate two size-n integer workspaces */ + Len = amd_malloc (n * sizeof (Int)) ; + Pinv = amd_malloc (n * sizeof (Int)) ; + mem += n ; + mem += n ; + if (!Len || !Pinv) + { + /* :: out of memory :: */ + amd_free (Len) ; + amd_free (Pinv) ; + if (info) Info [AMD_STATUS] = AMD_OUT_OF_MEMORY ; + return (AMD_OUT_OF_MEMORY) ; + } + + if (status == AMD_OK_BUT_JUMBLED) + { + /* sort the input matrix and remove duplicate entries */ + AMD_DEBUG1 (("Matrix is jumbled\n")) ; + Rp = amd_malloc ((n+1) * sizeof (Int)) ; + Ri = amd_malloc (MAX (nz,1) * sizeof (Int)) ; + mem += (n+1) ; + mem += MAX (nz,1) ; + if (!Rp || !Ri) + { + /* :: out of memory :: */ + amd_free (Rp) ; + amd_free (Ri) ; + amd_free (Len) ; + amd_free (Pinv) ; + if (info) Info [AMD_STATUS] = AMD_OUT_OF_MEMORY ; + return (AMD_OUT_OF_MEMORY) ; + } + /* use Len and Pinv as workspace to create R = A' */ + AMD_preprocess (n, Ap, Ai, Rp, Ri, Len, Pinv) ; + Cp = Rp ; + Ci = Ri ; + } + else + { + /* order the input matrix as-is. No need to compute R = A' first */ + Rp = NULL ; + Ri = NULL ; + Cp = (Int *) Ap ; + Ci = (Int *) Ai ; + } + + /* --------------------------------------------------------------------- */ + /* determine the symmetry and count off-diagonal nonzeros in A+A' */ + /* --------------------------------------------------------------------- */ + + nzaat = AMD_aat (n, Cp, Ci, Len, P, Info) ; + AMD_DEBUG1 (("nzaat: %g\n", (double) nzaat)) ; + ASSERT ((MAX (nz-n, 0) <= nzaat) && (nzaat <= 2 * (size_t) nz)) ; + + /* --------------------------------------------------------------------- */ + /* allocate workspace for matrix, elbow room, and 6 size-n vectors */ + /* --------------------------------------------------------------------- */ + + S = NULL ; + slen = nzaat ; /* space for matrix */ + ok = ((slen + nzaat/5) >= slen) ; /* check for size_t overflow */ + slen += nzaat/5 ; /* add elbow room */ + for (i = 0 ; ok && i < 7 ; i++) + { + ok = ((slen + n) > slen) ; /* check for size_t overflow */ + slen += n ; /* size-n elbow room, 6 size-n work */ + } + mem += slen ; + ok = ok && (slen < SIZE_T_MAX / sizeof (Int)) ; /* check for overflow */ + ok = ok && (slen < Int_MAX) ; /* S[i] for Int i must be OK */ + if (ok) + { + S = amd_malloc (slen * sizeof (Int)) ; + } + AMD_DEBUG1 (("slen %g\n", (double) slen)) ; + if (!S) + { + /* :: out of memory :: (or problem too large) */ + amd_free (Rp) ; + amd_free (Ri) ; + amd_free (Len) ; + amd_free (Pinv) ; + if (info) Info [AMD_STATUS] = AMD_OUT_OF_MEMORY ; + return (AMD_OUT_OF_MEMORY) ; + } + if (info) + { + /* memory usage, in bytes. */ + Info [AMD_MEMORY] = mem * sizeof (Int) ; + } + + /* --------------------------------------------------------------------- */ + /* order the matrix */ + /* --------------------------------------------------------------------- */ + + AMD_1 (n, Cp, Ci, P, Pinv, Len, slen, S, Control, Info) ; + + /* --------------------------------------------------------------------- */ + /* free the workspace */ + /* --------------------------------------------------------------------- */ + + amd_free (Rp) ; + amd_free (Ri) ; + amd_free (Len) ; + amd_free (Pinv) ; + amd_free (S) ; + if (info) Info [AMD_STATUS] = status ; + return (status) ; /* successful ordering */ +} diff --git a/src/maths/KLU/amd_post_tree.c b/src/maths/KLU/amd_post_tree.c new file mode 100644 index 000000000..b4e063d52 --- /dev/null +++ b/src/maths/KLU/amd_post_tree.c @@ -0,0 +1,121 @@ +/* ========================================================================= */ +/* === AMD_post_tree ======================================================= */ +/* ========================================================================= */ + +/* ------------------------------------------------------------------------- */ +/* AMD, Copyright (c) Timothy A. Davis, */ +/* Patrick R. Amestoy, and Iain S. Duff. See ../README.txt for License. */ +/* email: davis at cise.ufl.edu CISE Department, Univ. of Florida. */ +/* web: http://www.cise.ufl.edu/research/sparse/amd */ +/* ------------------------------------------------------------------------- */ + +/* Post-ordering of a supernodal elimination tree. */ + +#include "amd_internal.h" + +GLOBAL Int AMD_post_tree +( + Int root, /* root of the tree */ + Int k, /* start numbering at k */ + Int Child [ ], /* input argument of size nn, undefined on + * output. Child [i] is the head of a link + * list of all nodes that are children of node + * i in the tree. */ + const Int Sibling [ ], /* input argument of size nn, not modified. + * If f is a node in the link list of the + * children of node i, then Sibling [f] is the + * next child of node i. + */ + Int Order [ ], /* output order, of size nn. Order [i] = k + * if node i is the kth node of the reordered + * tree. */ + Int Stack [ ] /* workspace of size nn */ +#ifndef NDEBUG + , Int nn /* nodes are in the range 0..nn-1. */ +#endif +) +{ + Int f, head, h, i ; + +#if 0 + /* --------------------------------------------------------------------- */ + /* recursive version (Stack [ ] is not used): */ + /* --------------------------------------------------------------------- */ + + /* this is simple, but can caouse stack overflow if nn is large */ + i = root ; + for (f = Child [i] ; f != EMPTY ; f = Sibling [f]) + { + k = AMD_post_tree (f, k, Child, Sibling, Order, Stack, nn) ; + } + Order [i] = k++ ; + return (k) ; +#endif + + /* --------------------------------------------------------------------- */ + /* non-recursive version, using an explicit stack */ + /* --------------------------------------------------------------------- */ + + /* push root on the stack */ + head = 0 ; + Stack [0] = root ; + + while (head >= 0) + { + /* get head of stack */ + ASSERT (head < nn) ; + i = Stack [head] ; + AMD_DEBUG1 (("head of stack "ID" \n", i)) ; + ASSERT (i >= 0 && i < nn) ; + + if (Child [i] != EMPTY) + { + /* the children of i are not yet ordered */ + /* push each child onto the stack in reverse order */ + /* so that small ones at the head of the list get popped first */ + /* and the biggest one at the end of the list gets popped last */ + for (f = Child [i] ; f != EMPTY ; f = Sibling [f]) + { + head++ ; + ASSERT (head < nn) ; + ASSERT (f >= 0 && f < nn) ; + } + h = head ; + ASSERT (head < nn) ; + for (f = Child [i] ; f != EMPTY ; f = Sibling [f]) + { + ASSERT (h > 0) ; + Stack [h--] = f ; + AMD_DEBUG1 (("push "ID" on stack\n", f)) ; + ASSERT (f >= 0 && f < nn) ; + } + ASSERT (Stack [h] == i) ; + + /* delete child list so that i gets ordered next time we see it */ + Child [i] = EMPTY ; + } + else + { + /* the children of i (if there were any) are already ordered */ + /* remove i from the stack and order it. Front i is kth front */ + head-- ; + AMD_DEBUG1 (("pop "ID" order "ID"\n", i, k)) ; + Order [i] = k++ ; + ASSERT (k <= nn) ; + } + +#ifndef NDEBUG + AMD_DEBUG1 (("\nStack:")) ; + for (h = head ; h >= 0 ; h--) + { + Int j = Stack [h] ; + AMD_DEBUG1 ((" "ID, j)) ; + ASSERT (j >= 0 && j < nn) ; + } + AMD_DEBUG1 (("\n\n")) ; + ASSERT (head < nn) ; +#endif + + } + return (k) ; +} diff --git a/src/maths/KLU/amd_postorder.c b/src/maths/KLU/amd_postorder.c new file mode 100644 index 000000000..4adcea3c0 --- /dev/null +++ b/src/maths/KLU/amd_postorder.c @@ -0,0 +1,207 @@ +/* ========================================================================= */ +/* === AMD_postorder ======================================================= */ +/* ========================================================================= */ + +/* ------------------------------------------------------------------------- */ +/* AMD, Copyright (c) Timothy A. Davis, */ +/* Patrick R. Amestoy, and Iain S. Duff. See ../README.txt for License. */ +/* email: davis at cise.ufl.edu CISE Department, Univ. of Florida. */ +/* web: http://www.cise.ufl.edu/research/sparse/amd */ +/* ------------------------------------------------------------------------- */ + +/* Perform a postordering (via depth-first search) of an assembly tree. */ + +#include "amd_internal.h" + +GLOBAL void AMD_postorder +( + /* inputs, not modified on output: */ + Int nn, /* nodes are in the range 0..nn-1 */ + Int Parent [ ], /* Parent [j] is the parent of j, or EMPTY if root */ + Int Nv [ ], /* Nv [j] > 0 number of pivots represented by node j, + * or zero if j is not a node. */ + Int Fsize [ ], /* Fsize [j]: size of node j */ + + /* output, not defined on input: */ + Int Order [ ], /* output post-order */ + + /* workspaces of size nn: */ + Int Child [ ], + Int Sibling [ ], + Int Stack [ ] +) +{ + Int i, j, k, parent, frsize, f, fprev, maxfrsize, bigfprev, bigf, fnext ; + + for (j = 0 ; j < nn ; j++) + { + Child [j] = EMPTY ; + Sibling [j] = EMPTY ; + } + + /* --------------------------------------------------------------------- */ + /* place the children in link lists - bigger elements tend to be last */ + /* --------------------------------------------------------------------- */ + + for (j = nn-1 ; j >= 0 ; j--) + { + if (Nv [j] > 0) + { + /* this is an element */ + parent = Parent [j] ; + if (parent != EMPTY) + { + /* place the element in link list of the children its parent */ + /* bigger elements will tend to be at the end of the list */ + Sibling [j] = Child [parent] ; + Child [parent] = j ; + } + } + } + +#ifndef NDEBUG + { + Int nels, ff, nchild ; + AMD_DEBUG1 (("\n\n================================ AMD_postorder:\n")); + nels = 0 ; + for (j = 0 ; j < nn ; j++) + { + if (Nv [j] > 0) + { + AMD_DEBUG1 (( ""ID" : nels "ID" npiv "ID" size "ID + " parent "ID" maxfr "ID"\n", j, nels, + Nv [j], Fsize [j], Parent [j], Fsize [j])) ; + /* this is an element */ + /* dump the link list of children */ + nchild = 0 ; + AMD_DEBUG1 ((" Children: ")) ; + for (ff = Child [j] ; ff != EMPTY ; ff = Sibling [ff]) + { + AMD_DEBUG1 ((ID" ", ff)) ; + ASSERT (Parent [ff] == j) ; + nchild++ ; + ASSERT (nchild < nn) ; + } + AMD_DEBUG1 (("\n")) ; + parent = Parent [j] ; + if (parent != EMPTY) + { + ASSERT (Nv [parent] > 0) ; + } + nels++ ; + } + } + } + AMD_DEBUG1 (("\n\nGo through the children of each node, and put\n" + "the biggest child last in each list:\n")) ; +#endif + + /* --------------------------------------------------------------------- */ + /* place the largest child last in the list of children for each node */ + /* --------------------------------------------------------------------- */ + + for (i = 0 ; i < nn ; i++) + { + if (Nv [i] > 0 && Child [i] != EMPTY) + { + +#ifndef NDEBUG + Int nchild ; + AMD_DEBUG1 (("Before partial sort, element "ID"\n", i)) ; + nchild = 0 ; + for (f = Child [i] ; f != EMPTY ; f = Sibling [f]) + { + ASSERT (f >= 0 && f < nn) ; + AMD_DEBUG1 ((" f: "ID" size: "ID"\n", f, Fsize [f])) ; + nchild++ ; + ASSERT (nchild <= nn) ; + } +#endif + + /* find the biggest element in the child list */ + fprev = EMPTY ; + maxfrsize = EMPTY ; + bigfprev = EMPTY ; + bigf = EMPTY ; + for (f = Child [i] ; f != EMPTY ; f = Sibling [f]) + { + ASSERT (f >= 0 && f < nn) ; + frsize = Fsize [f] ; + if (frsize >= maxfrsize) + { + /* this is the biggest seen so far */ + maxfrsize = frsize ; + bigfprev = fprev ; + bigf = f ; + } + fprev = f ; + } + ASSERT (bigf != EMPTY) ; + + fnext = Sibling [bigf] ; + + AMD_DEBUG1 (("bigf "ID" maxfrsize "ID" bigfprev "ID" fnext "ID + " fprev " ID"\n", bigf, maxfrsize, bigfprev, fnext, fprev)) ; + + if (fnext != EMPTY) + { + /* if fnext is EMPTY then bigf is already at the end of list */ + + if (bigfprev == EMPTY) + { + /* delete bigf from the element of the list */ + Child [i] = fnext ; + } + else + { + /* delete bigf from the middle of the list */ + Sibling [bigfprev] = fnext ; + } + + /* put bigf at the end of the list */ + Sibling [bigf] = EMPTY ; + ASSERT (Child [i] != EMPTY) ; + ASSERT (fprev != bigf) ; + ASSERT (fprev != EMPTY) ; + Sibling [fprev] = bigf ; + } + +#ifndef NDEBUG + AMD_DEBUG1 (("After partial sort, element "ID"\n", i)) ; + for (f = Child [i] ; f != EMPTY ; f = Sibling [f]) + { + ASSERT (f >= 0 && f < nn) ; + AMD_DEBUG1 ((" "ID" "ID"\n", f, Fsize [f])) ; + ASSERT (Nv [f] > 0) ; + nchild-- ; + } + ASSERT (nchild == 0) ; +#endif + + } + } + + /* --------------------------------------------------------------------- */ + /* postorder the assembly tree */ + /* --------------------------------------------------------------------- */ + + for (i = 0 ; i < nn ; i++) + { + Order [i] = EMPTY ; + } + + k = 0 ; + + for (i = 0 ; i < nn ; i++) + { + if (Parent [i] == EMPTY && Nv [i] > 0) + { + AMD_DEBUG1 (("Root of assembly tree "ID"\n", i)) ; + k = AMD_post_tree (i, k, Child, Sibling, Order, Stack +#ifndef NDEBUG + , nn +#endif + ) ; + } + } +} diff --git a/src/maths/KLU/amd_preprocess.c b/src/maths/KLU/amd_preprocess.c new file mode 100644 index 000000000..86ea07f86 --- /dev/null +++ b/src/maths/KLU/amd_preprocess.c @@ -0,0 +1,119 @@ +/* ========================================================================= */ +/* === AMD_preprocess ====================================================== */ +/* ========================================================================= */ + +/* ------------------------------------------------------------------------- */ +/* AMD, Copyright (c) Timothy A. Davis, */ +/* Patrick R. Amestoy, and Iain S. Duff. See ../README.txt for License. */ +/* email: davis at cise.ufl.edu CISE Department, Univ. of Florida. */ +/* web: http://www.cise.ufl.edu/research/sparse/amd */ +/* ------------------------------------------------------------------------- */ + +/* Sorts, removes duplicate entries, and transposes from the nonzero pattern of + * a column-form matrix A, to obtain the matrix R. The input matrix can have + * duplicate entries and/or unsorted columns (AMD_valid (n,Ap,Ai) must not be + * AMD_INVALID). + * + * This input condition is NOT checked. This routine is not user-callable. + */ + +#include "amd_internal.h" + +/* ========================================================================= */ +/* === AMD_preprocess ====================================================== */ +/* ========================================================================= */ + +/* AMD_preprocess does not check its input for errors or allocate workspace. + * On input, the condition (AMD_valid (n,n,Ap,Ai) != AMD_INVALID) must hold. + */ + +GLOBAL void AMD_preprocess +( + Int n, /* input matrix: A is n-by-n */ + const Int Ap [ ], /* size n+1 */ + const Int Ai [ ], /* size nz = Ap [n] */ + + /* output matrix R: */ + Int Rp [ ], /* size n+1 */ + Int Ri [ ], /* size nz (or less, if duplicates present) */ + + Int W [ ], /* workspace of size n */ + Int Flag [ ] /* workspace of size n */ +) +{ + + /* --------------------------------------------------------------------- */ + /* local variables */ + /* --------------------------------------------------------------------- */ + + Int i, j, p, p2 ; + + ASSERT (AMD_valid (n, n, Ap, Ai) != AMD_INVALID) ; + + /* --------------------------------------------------------------------- */ + /* count the entries in each row of A (excluding duplicates) */ + /* --------------------------------------------------------------------- */ + + for (i = 0 ; i < n ; i++) + { + W [i] = 0 ; /* # of nonzeros in row i (excl duplicates) */ + Flag [i] = EMPTY ; /* Flag [i] = j if i appears in column j */ + } + for (j = 0 ; j < n ; j++) + { + p2 = Ap [j+1] ; + for (p = Ap [j] ; p < p2 ; p++) + { + i = Ai [p] ; + if (Flag [i] != j) + { + /* row index i has not yet appeared in column j */ + W [i]++ ; /* one more entry in row i */ + Flag [i] = j ; /* flag row index i as appearing in col j*/ + } + } + } + + /* --------------------------------------------------------------------- */ + /* compute the row pointers for R */ + /* --------------------------------------------------------------------- */ + + Rp [0] = 0 ; + for (i = 0 ; i < n ; i++) + { + Rp [i+1] = Rp [i] + W [i] ; + } + for (i = 0 ; i < n ; i++) + { + W [i] = Rp [i] ; + Flag [i] = EMPTY ; + } + + /* --------------------------------------------------------------------- */ + /* construct the row form matrix R */ + /* --------------------------------------------------------------------- */ + + /* R = row form of pattern of A */ + for (j = 0 ; j < n ; j++) + { + p2 = Ap [j+1] ; + for (p = Ap [j] ; p < p2 ; p++) + { + i = Ai [p] ; + if (Flag [i] != j) + { + /* row index i has not yet appeared in column j */ + Ri [W [i]++] = j ; /* put col j in row i */ + Flag [i] = j ; /* flag row index i as appearing in col j*/ + } + } + } + +#ifndef NDEBUG + ASSERT (AMD_valid (n, n, Rp, Ri) == AMD_OK) ; + for (j = 0 ; j < n ; j++) + { + ASSERT (W [j] == Rp [j+1]) ; + } +#endif +} diff --git a/src/maths/KLU/amd_valid.c b/src/maths/KLU/amd_valid.c new file mode 100644 index 000000000..4d05925c2 --- /dev/null +++ b/src/maths/KLU/amd_valid.c @@ -0,0 +1,93 @@ +/* ========================================================================= */ +/* === AMD_valid =========================================================== */ +/* ========================================================================= */ + +/* ------------------------------------------------------------------------- */ +/* AMD, Copyright (c) Timothy A. Davis, */ +/* Patrick R. Amestoy, and Iain S. Duff. See ../README.txt for License. */ +/* email: davis at cise.ufl.edu CISE Department, Univ. of Florida. */ +/* web: http://www.cise.ufl.edu/research/sparse/amd */ +/* ------------------------------------------------------------------------- */ + +/* Check if a column-form matrix is valid or not. The matrix A is + * n_row-by-n_col. The row indices of entries in column j are in + * Ai [Ap [j] ... Ap [j+1]-1]. Required conditions are: + * + * n_row >= 0 + * n_col >= 0 + * nz = Ap [n_col] >= 0 number of entries in the matrix + * Ap [0] == 0 + * Ap [j] <= Ap [j+1] for all j in the range 0 to n_col. + * Ai [0 ... nz-1] must be in the range 0 to n_row-1. + * + * If any of the above conditions hold, AMD_INVALID is returned. If the + * following condition holds, AMD_OK_BUT_JUMBLED is returned (a warning, + * not an error): + * + * row indices in Ai [Ap [j] ... Ap [j+1]-1] are not sorted in ascending + * order, and/or duplicate entries exist. + * + * Otherwise, AMD_OK is returned. + * + * In v1.2 and earlier, this function returned TRUE if the matrix was valid + * (now returns AMD_OK), or FALSE otherwise (now returns AMD_INVALID or + * AMD_OK_BUT_JUMBLED). + */ + +#include "amd_internal.h" + +GLOBAL Int AMD_valid +( + /* inputs, not modified on output: */ + Int n_row, /* A is n_row-by-n_col */ + Int n_col, + const Int Ap [ ], /* column pointers of A, of size n_col+1 */ + const Int Ai [ ] /* row indices of A, of size nz = Ap [n_col] */ +) +{ + Int nz, j, p1, p2, ilast, i, p, result = AMD_OK ; + + if (n_row < 0 || n_col < 0 || Ap == NULL || Ai == NULL) + { + return (AMD_INVALID) ; + } + nz = Ap [n_col] ; + if (Ap [0] != 0 || nz < 0) + { + /* column pointers must start at Ap [0] = 0, and Ap [n] must be >= 0 */ + AMD_DEBUG0 (("column 0 pointer bad or nz < 0\n")) ; + return (AMD_INVALID) ; + } + for (j = 0 ; j < n_col ; j++) + { + p1 = Ap [j] ; + p2 = Ap [j+1] ; + AMD_DEBUG2 (("\nColumn: "ID" p1: "ID" p2: "ID"\n", j, p1, p2)) ; + if (p1 > p2) + { + /* column pointers must be ascending */ + AMD_DEBUG0 (("column "ID" pointer bad\n", j)) ; + return (AMD_INVALID) ; + } + ilast = EMPTY ; + for (p = p1 ; p < p2 ; p++) + { + i = Ai [p] ; + AMD_DEBUG3 (("row: "ID"\n", i)) ; + if (i < 0 || i >= n_row) + { + /* row index out of range */ + AMD_DEBUG0 (("index out of range, col "ID" row "ID"\n", j, i)); + return (AMD_INVALID) ; + } + if (i <= ilast) + { + /* row index unsorted, or duplicate entry present */ + AMD_DEBUG1 (("index unsorted/dupl col "ID" row "ID"\n", j, i)); + result = AMD_OK_BUT_JUMBLED ; + } + ilast = i ; + } + } + return (result) ; +} diff --git a/src/maths/KLU/btf.h b/src/maths/KLU/btf.h new file mode 100644 index 000000000..32be79cf9 --- /dev/null +++ b/src/maths/KLU/btf.h @@ -0,0 +1,263 @@ +/* ========================================================================== */ +/* === BTF package ========================================================== */ +/* ========================================================================== */ + +/* BTF_MAXTRANS: find a column permutation Q to give A*Q a zero-free diagonal + * BTF_STRONGCOMP: find a symmetric permutation P to put P*A*P' into block + * upper triangular form. + * BTF_ORDER: do both of the above (btf_maxtrans then btf_strongcomp). + * + * Copyright (c) 2004-2007. Tim Davis, University of Florida, + * with support from Sandia National Laboratories. All Rights Reserved. + */ + + +/* ========================================================================== */ +/* === BTF_MAXTRANS ========================================================= */ +/* ========================================================================== */ + +/* BTF_MAXTRANS: finds a permutation of the columns of a matrix so that it has a + * zero-free diagonal. The input is an m-by-n sparse matrix in compressed + * column form. The array Ap of size n+1 gives the starting and ending + * positions of the columns in the array Ai. Ap[0] must be zero. The array Ai + * contains the row indices of the nonzeros of the matrix A, and is of size + * Ap[n]. The row indices of column j are located in Ai[Ap[j] ... Ap[j+1]-1]. + * Row indices must be in the range 0 to m-1. Duplicate entries may be present + * in any given column. The input matrix is not checked for validity (row + * indices out of the range 0 to m-1 will lead to an undeterminate result - + * possibly a core dump, for example). Row indices in any given column need + * not be in sorted order. However, if they are sorted and the matrix already + * has a zero-free diagonal, then the identity permutation is returned. + * + * The output of btf_maxtrans is an array Match of size n. If row i is matched + * with column j, then A(i,j) is nonzero, and then Match[i] = j. If the matrix + * is structurally nonsingular, all entries in the Match array are unique, and + * Match can be viewed as a column permutation if A is square. That is, column + * k of the original matrix becomes column Match[k] of the permuted matrix. In + * MATLAB, this can be expressed as (for non-structurally singular matrices): + * + * Match = maxtrans (A) ; + * B = A (:, Match) ; + * + * except of course here the A matrix and Match vector are all 0-based (rows + * and columns in the range 0 to n-1), not 1-based (rows/cols in range 1 to n). + * The MATLAB dmperm routine returns a row permutation. See the maxtrans + * mexFunction for more details. + * + * If row i is not matched to any column, then Match[i] is == -1. The + * btf_maxtrans routine returns the number of nonzeros on diagonal of the + * permuted matrix. + * + * In the MATLAB mexFunction interface to btf_maxtrans, 1 is added to the Match + * array to obtain a 1-based permutation. Thus, in MATLAB where A is m-by-n: + * + * q = maxtrans (A) ; % has entries in the range 0:n + * q % a column permutation (only if sprank(A)==n) + * B = A (:, q) ; % permuted matrix (only if sprank(A)==n) + * sum (q > 0) ; % same as "sprank (A)" + * + * This behaviour differs from p = dmperm (A) in MATLAB, which returns the + * matching as p(j)=i if row i and column j are matched, and p(j)=0 if column j + * is unmatched. + * + * p = dmperm (A) ; % has entries in the range 0:m + * p % a row permutation (only if sprank(A)==m) + * B = A (p, :) ; % permuted matrix (only if sprank(A)==m) + * sum (p > 0) ; % definition of sprank (A) + * + * This algorithm is based on the paper "On Algorithms for obtaining a maximum + * transversal" by Iain Duff, ACM Trans. Mathematical Software, vol 7, no. 1, + * pp. 315-330, and "Algorithm 575: Permutations for a zero-free diagonal", + * same issue, pp. 387-390. Algorithm 575 is MC21A in the Harwell Subroutine + * Library. This code is not merely a translation of the Fortran code into C. + * It is a completely new implementation of the basic underlying method (depth + * first search over a subgraph with nodes corresponding to columns matched so + * far, and cheap matching). This code was written with minimal observation of + * the MC21A/B code itself. See comments below for a comparison between the + * maxtrans and MC21A/B codes. + * + * This routine operates on a column-form matrix and produces a column + * permutation. MC21A uses a row-form matrix and produces a row permutation. + * The difference is merely one of convention in the comments and interpretation + * of the inputs and outputs. If you want a row permutation, simply pass a + * compressed-row sparse matrix to this routine and you will get a row + * permutation (just like MC21A). Similarly, you can pass a column-oriented + * matrix to MC21A and it will happily return a column permutation. + */ + +#ifndef _BTF_H +#define _BTF_H + +/* make it easy for C++ programs to include BTF */ +#ifdef __cplusplus +extern "C" { +#endif + +#include "UFconfig.h" + +int btf_maxtrans /* returns # of columns matched */ +( + /* --- input, not modified: --- */ + int nrow, /* A is nrow-by-ncol in compressed column form */ + int ncol, + int Ap [ ], /* size ncol+1 */ + int Ai [ ], /* size nz = Ap [ncol] */ + double maxwork, /* maximum amount of work to do is maxwork*nnz(A); no limit + * if <= 0 */ + + /* --- output, not defined on input --- */ + double *work, /* work = -1 if maxwork > 0 and the total work performed + * reached the maximum of maxwork*nnz(A). + * Otherwise, work = the total work performed. */ + + int Match [ ], /* size nrow. Match [i] = j if column j matched to row i + * (see above for the singular-matrix case) */ + + /* --- workspace, not defined on input or output --- */ + int Work [ ] /* size 5*ncol */ +) ; + +/* long integer version (all "int" parameters become "UF_long") */ +UF_long btf_l_maxtrans (UF_long, UF_long, UF_long *, UF_long *, double, + double *, UF_long *, UF_long *) ; + + +/* ========================================================================== */ +/* === BTF_STRONGCOMP ======================================================= */ +/* ========================================================================== */ + +/* BTF_STRONGCOMP finds the strongly connected components of a graph, returning + * a symmetric permutation. The matrix A must be square, and is provided on + * input in compressed-column form (see BTF_MAXTRANS, above). The diagonal of + * the input matrix A (or A*Q if Q is provided on input) is ignored. + * + * If Q is not NULL on input, then the strongly connected components of A*Q are + * found. Q may be flagged on input, where Q[k] < 0 denotes a flagged column k. + * The permutation is j = BTF_UNFLIP (Q [k]). On output, Q is modified (the + * flags are preserved) so that P*A*Q is in block upper triangular form. + * + * If Q is NULL, then the permutation P is returned so that P*A*P' is in upper + * block triangular form. + * + * The vector R gives the block boundaries, where block b is in rows/columns + * R[b] to R[b+1]-1 of the permuted matrix, and where b ranges from 1 to the + * number of strongly connected components found. + */ + +int btf_strongcomp /* return # of strongly connected components */ +( + /* input, not modified: */ + int n, /* A is n-by-n in compressed column form */ + int Ap [ ], /* size n+1 */ + int Ai [ ], /* size nz = Ap [n] */ + + /* optional input, modified (if present) on output: */ + int Q [ ], /* size n, input column permutation */ + + /* output, not defined on input */ + int P [ ], /* size n. P [k] = j if row and column j are kth row/col + * in permuted matrix. */ + + int R [ ], /* size n+1. block b is in rows/cols R[b] ... R[b+1]-1 */ + + /* workspace, not defined on input or output */ + int Work [ ] /* size 4n */ +) ; + +UF_long btf_l_strongcomp (UF_long, UF_long *, UF_long *, UF_long *, UF_long *, + UF_long *, UF_long *) ; + + +/* ========================================================================== */ +/* === BTF_ORDER ============================================================ */ +/* ========================================================================== */ + +/* BTF_ORDER permutes a square matrix into upper block triangular form. It + * does this by first finding a maximum matching (or perhaps a limited matching + * if the work is limited), via the btf_maxtrans function. If a complete + * matching is not found, BTF_ORDER completes the permutation, but flags the + * columns of P*A*Q to denote which columns are not matched. If the matrix is + * structurally rank deficient, some of the entries on the diagonal of the + * permuted matrix will be zero. BTF_ORDER then calls btf_strongcomp to find + * the strongly-connected components. + * + * On output, P and Q are the row and column permutations, where i = P[k] if + * row i of A is the kth row of P*A*Q, and j = BTF_UNFLIP(Q[k]) if column j of + * A is the kth column of P*A*Q. If Q[k] < 0, then the (k,k)th entry in P*A*Q + * is structurally zero. + * + * The vector R gives the block boundaries, where block b is in rows/columns + * R[b] to R[b+1]-1 of the permuted matrix, and where b ranges from 1 to the + * number of strongly connected components found. + */ + +int btf_order /* returns number of blocks found */ +( + /* --- input, not modified: --- */ + int n, /* A is n-by-n in compressed column form */ + int Ap [ ], /* size n+1 */ + int Ai [ ], /* size nz = Ap [n] */ + double maxwork, /* do at most maxwork*nnz(A) work in the maximum + * transversal; no limit if <= 0 */ + + /* --- output, not defined on input --- */ + double *work, /* return value from btf_maxtrans */ + int P [ ], /* size n, row permutation */ + int Q [ ], /* size n, column permutation */ + int R [ ], /* size n+1. block b is in rows/cols R[b] ... R[b+1]-1 */ + int *nmatch, /* # nonzeros on diagonal of P*A*Q */ + + /* --- workspace, not defined on input or output --- */ + int Work [ ] /* size 5n */ +) ; + +UF_long btf_l_order (UF_long, UF_long *, UF_long *, double , double *, + UF_long *, UF_long *, UF_long *, UF_long *, UF_long *) ; + + +/* ========================================================================== */ +/* === BTF marking of singular columns ====================================== */ +/* ========================================================================== */ + +/* BTF_FLIP is a "negation about -1", and is used to mark an integer j + * that is normally non-negative. BTF_FLIP (-1) is -1. BTF_FLIP of + * a number > -1 is negative, and BTF_FLIP of a number < -1 is positive. + * BTF_FLIP (BTF_FLIP (j)) = j for all integers j. UNFLIP (j) acts + * like an "absolute value" operation, and is always >= -1. You can test + * whether or not an integer j is "flipped" with the BTF_ISFLIPPED (j) + * macro. + */ + +#define BTF_FLIP(j) (-(j)-2) +#define BTF_ISFLIPPED(j) ((j) < -1) +#define BTF_UNFLIP(j) ((BTF_ISFLIPPED (j)) ? BTF_FLIP (j) : (j)) + +/* ========================================================================== */ +/* === BTF version ========================================================== */ +/* ========================================================================== */ + +/* All versions of BTF include these definitions. + * As an example, to test if the version you are using is 1.2 or later: + * + * if (BTF_VERSION >= BTF_VERSION_CODE (1,2)) ... + * + * This also works during compile-time: + * + * #if (BTF >= BTF_VERSION_CODE (1,2)) + * printf ("This is version 1.2 or later\n") ; + * #else + * printf ("This is an early version\n") ; + * #endif + */ + +#define BTF_DATE "Dec 7, 2011" +#define BTF_VERSION_CODE(main,sub) ((main) * 1000 + (sub)) +#define BTF_MAIN_VERSION 1 +#define BTF_SUB_VERSION 1 +#define BTF_SUBSUB_VERSION 3 +#define BTF_VERSION BTF_VERSION_CODE(BTF_MAIN_VERSION,BTF_SUB_VERSION) + +#ifdef __cplusplus +} +#endif +#endif diff --git a/src/maths/KLU/btf_internal.h b/src/maths/KLU/btf_internal.h new file mode 100644 index 000000000..83f76f1ec --- /dev/null +++ b/src/maths/KLU/btf_internal.h @@ -0,0 +1,64 @@ +/* ========================================================================== */ +/* === btf_internal include file ============================================ */ +/* ========================================================================== */ + +#ifndef _BTF_INTERNAL_H +#define _BTF_INTERNAL_H + +/* + * Copyright (c) 2004-2007. Tim Davis, University of Florida, + * with support from Sandia National Laboratories. All Rights Reserved. + */ + +/* Not to be included in any user program. */ + +#ifdef DLONG +#define Int UF_long +#define Int_id UF_long_id +#define BTF(name) btf_l_ ## name +#else +#define Int int +#define Int_id "%d" +#define BTF(name) btf_ ## name +#endif + +/* ========================================================================== */ +/* make sure debugging and printing is turned off */ + +#ifndef NDEBUG +#define NDEBUG +#endif +#ifndef NPRINT +#define NPRINT +#endif + +/* To enable debugging and assertions, uncomment this line: + #undef NDEBUG +*/ +/* To enable diagnostic printing, uncomment this line: + #undef NPRINT +*/ + +/* ========================================================================== */ + +#include +#include +#define ASSERT(a) assert(a) + +#undef TRUE +#undef FALSE +#undef PRINTF +#undef MIN + +#ifndef NPRINT +#define PRINTF(s) { printf s ; } ; +#else +#define PRINTF(s) +#endif + +#define TRUE 1 +#define FALSE 0 +#define EMPTY (-1) +#define MIN(a,b) (((a) < (b)) ? (a) : (b)) + +#endif diff --git a/src/maths/KLU/btf_maxtrans.c b/src/maths/KLU/btf_maxtrans.c new file mode 100644 index 000000000..ab3a26ea0 --- /dev/null +++ b/src/maths/KLU/btf_maxtrans.c @@ -0,0 +1,387 @@ +/* ========================================================================== */ +/* === BTF_MAXTRANS ========================================================= */ +/* ========================================================================== */ + +/* Finds a column permutation that maximizes the number of entries on the + * diagonal of a sparse matrix. See btf.h for more information. + * + * This function is identical to cs_maxtrans in CSparse, with the following + * exceptions: + * + * (1) cs_maxtrans finds both jmatch and imatch, where jmatch [i] = j and + * imatch [j] = i if row i is matched to column j. This function returns + * just jmatch (the Match array). The MATLAB interface to cs_maxtrans + * (the single-output cs_dmperm) returns imatch, not jmatch to the MATLAB + * caller. + * + * (2) cs_maxtrans includes a pre-pass that counts the number of non-empty + * rows and columns (m2 and n2, respectively), and computes the matching + * using the transpose of A if m2 < n2. cs_maxtrans also returns quickly + * if the diagonal of the matrix is already zero-free. This pre-pass + * allows cs_maxtrans to be much faster than maxtrans, if the use of the + * transpose is warranted. + * + * However, for square structurally non-singular matrices with one or more + * zeros on the diagonal, the pre-pass is a waste of time, and for these + * matrices, maxtrans can be twice as fast as cs_maxtrans. Since the + * maxtrans function is intended primarily for square matrices that are + * typically structurally nonsingular, the pre-pass is not included here. + * If this maxtrans function is used on a matrix with many more columns + * than rows, consider passing the transpose to this function, or use + * cs_maxtrans instead. + * + * (3) cs_maxtrans can operate as a randomized algorithm, to help avoid + * rare cases of excessive run-time. + * + * (4) this maxtrans function includes an option that limits the total work + * performed. If this limit is reached, the maximum transveral might not + * be found. + * + * Thus, for general usage, cs_maxtrans is preferred. For square matrices that + * are typically structurally non-singular, maxtrans is preferred. A partial + * maxtrans can still be very useful when solving a sparse linear system. + * + * Copyright (c) 2004-2007. Tim Davis, University of Florida, + * with support from Sandia National Laboratories. All Rights Reserved. + */ + +#include "btf.h" +#include "btf_internal.h" + + +/* ========================================================================== */ +/* === augment ============================================================== */ +/* ========================================================================== */ + +/* Perform a depth-first-search starting at column k, to find an augmenting + * path. An augmenting path is a sequence of row/column pairs (i1,k), (i2,j1), + * (i3,j2), ..., (i(s+1), js), such that all of the following properties hold: + * + * * column k is not matched to any row + * * entries in the path are nonzero + * * the pairs (i1,j1), (i2,j2), (i3,j3) ..., (is,js) have been + * previously matched to each other + * * (i(s+1), js) is nonzero, and row i(s+1) is not matched to any column + * + * Once this path is found, the matching can be changed to the set of pairs + * path. An augmenting path is a sequence of row/column pairs + * + * (i1,k), (i2,j1), (i3,j2), ..., (i(s+1), js) + * + * Once a row is matched with a column it remains matched with some column, but + * not necessarily the column it was first matched with. + * + * In the worst case, this function can examine every nonzero in A. Since it + * is called n times by maxtrans, the total time of maxtrans can be as high as + * O(n*nnz(A)). To limit this work, pass a value of maxwork > 0. Then at + * most O((maxwork+1)*nnz(A)) work will be performed; the maximum matching might + * not be found, however. + * + * This routine is very similar to the dfs routine in klu_kernel.c, in the + * KLU sparse LU factorization package. It is essentially identical to the + * cs_augment routine in CSparse, and its recursive version (augment function + * in cs_maxtransr_mex.c), except that this routine allows for the search to be + * terminated early if too much work is being performed. + * + * The algorithm is based on the paper "On Algorithms for obtaining a maximum + * transversal" by Iain Duff, ACM Trans. Mathematical Software, vol 7, no. 1, + * pp. 315-330, and "Algorithm 575: Permutations for a zero-free diagonal", + * same issue, pp. 387-390. The code here is a new implementation of that + * algorithm, with different data structures and control flow. After writing + * this code, I carefully compared my algorithm with MC21A/B (ACM Algorithm 575) + * Some of the comparisons are partial because I didn't dig deeply into all of + * the details of MC21A/B, such as how the stack is maintained. The following + * arguments are essentially identical between this code and MC21A: + * + * maxtrans MC21A,B + * -------- ------- + * n N identical + * k JORD identical + * Ap IP column / row pointers + * Ai ICN row / column indices + * Ap[n] LICN length of index array (# of nonzeros in A) + * Match IPERM output column / row permutation + * nmatch NUMNZ # of nonzeros on diagonal of permuted matrix + * Flag CV mark a node as visited by the depth-first-search + * + * The following are different, but analogous: + * + * Cheap ARP indicates what part of the a column / row has + * already been matched. + * + * The following arguments are very different: + * + * - LENR # of entries in each row/column (unused in maxtrans) + * Pstack OUT Pstack keeps track of where we are in the depth- + * first-search scan of column j. I think that OUT + * plays a similar role in MC21B, but I'm unsure. + * Istack PR keeps track of the rows in the path. PR is a link + * list, though, whereas Istack is a stack. Maxtrans + * does not use any link lists. + * Jstack OUT? PR? the stack for nodes in the path (unsure) + * + * The following control structures are roughly comparable: + * + * maxtrans MC21B + * -------- ----- + * for (k = 0 ; k < n ; k++) DO 100 JORD=1,N + * while (head >= 0) DO 70 K=1,JORD + * for (p = Cheap [j] ; ...) DO 20 II=IN1,IN2 + * for (p = head ; ...) DO 90 K=1,JORD + */ + +static Int augment +( + Int k, /* which stage of the main loop we're in */ + Int Ap [ ], /* column pointers, size n+1 */ + Int Ai [ ], /* row indices, size nz = Ap [n] */ + Int Match [ ], /* size n, Match [i] = j if col j matched to i */ + Int Cheap [ ], /* rows Ai [Ap [j] .. Cheap [j]-1] alread matched */ + Int Flag [ ], /* Flag [j] = k if j already visited this stage */ + Int Istack [ ], /* size n. Row index stack. */ + Int Jstack [ ], /* size n. Column index stack. */ + Int Pstack [ ], /* size n. Keeps track of position in adjacency list */ + double *work, /* work performed by the depth-first-search */ + double maxwork /* maximum work allowed */ +) +{ + /* local variables, but "global" to all DFS levels: */ + Int found ; /* true if match found. */ + Int head ; /* top of stack */ + + /* variables that are purely local to any one DFS level: */ + Int j2 ; /* the next DFS goes to node j2 */ + Int pend ; /* one past the end of the adjacency list for node j */ + Int pstart ; + Int quick ; + + /* variables that need to be pushed then popped from the stack: */ + Int i ; /* the row tentatively matched to i if DFS successful */ + Int j ; /* the DFS is at the current node j */ + Int p ; /* current index into the adj. list for node j */ + /* the variables i, j, and p are stacked in Istack, Jstack, and Pstack */ + + quick = (maxwork > 0) ; + + /* start a DFS to find a match for column k */ + found = FALSE ; + i = EMPTY ; + head = 0 ; + Jstack [0] = k ; + ASSERT (Flag [k] != k) ; + + while (head >= 0) + { + j = Jstack [head] ; + pend = Ap [j+1] ; + + if (Flag [j] != k) /* a node is not yet visited */ + { + + /* -------------------------------------------------------------- */ + /* prework for node j */ + /* -------------------------------------------------------------- */ + + /* first time that j has been visited */ + Flag [j] = k ; + /* cheap assignment: find the next unmatched row in col j. This + * loop takes at most O(nnz(A)) time for the sum total of all + * calls to augment. */ + for (p = Cheap [j] ; p < pend && !found ; p++) + { + i = Ai [p] ; + found = (Match [i] == EMPTY) ; + } + Cheap [j] = p ; + + /* -------------------------------------------------------------- */ + + /* prepare for DFS */ + if (found) + { + /* end of augmenting path, column j matched with row i */ + Istack [head] = i ; + break ; + } + /* set Pstack [head] to the first entry in column j to scan */ + Pstack [head] = Ap [j] ; + } + + /* ------------------------------------------------------------------ */ + /* quick return if too much work done */ + /* ------------------------------------------------------------------ */ + + if (quick && *work > maxwork) + { + /* too much work has been performed; abort the search */ + return (EMPTY) ; + } + + /* ------------------------------------------------------------------ */ + /* DFS for nodes adjacent to j */ + /* ------------------------------------------------------------------ */ + + /* If cheap assignment not made, continue the depth-first search. All + * rows in column j are already matched. Add the adjacent nodes to the + * stack by iterating through until finding another non-visited node. + * + * It is the following loop that can force maxtrans to take + * O(n*nnz(A)) time. */ + + pstart = Pstack [head] ; + for (p = pstart ; p < pend ; p++) + { + i = Ai [p] ; + j2 = Match [i] ; + ASSERT (j2 != EMPTY) ; + if (Flag [j2] != k) + { + /* Node j2 is not yet visited, start a depth-first search on + * node j2. Keep track of where we left off in the scan of adj + * list of node j so we can restart j where we left off. */ + Pstack [head] = p + 1 ; + /* Push j2 onto the stack and immediately break so we can + * recurse on node j2. Also keep track of row i which (if this + * search for an augmenting path works) will be matched with the + * current node j. */ + Istack [head] = i ; + Jstack [++head] = j2 ; + break ; + } + } + + /* ------------------------------------------------------------------ */ + /* determine how much work was just performed */ + /* ------------------------------------------------------------------ */ + + *work += (p - pstart + 1) ; + + /* ------------------------------------------------------------------ */ + /* node j is done, but the postwork is postponed - see below */ + /* ------------------------------------------------------------------ */ + + if (p == pend) + { + /* If all adjacent nodes of j are already visited, pop j from + * stack and continue. We failed to find a match. */ + head-- ; + } + } + + /* postwork for all nodes j in the stack */ + /* unwind the path and make the corresponding matches */ + if (found) + { + for (p = head ; p >= 0 ; p--) + { + j = Jstack [p] ; + i = Istack [p] ; + + /* -------------------------------------------------------------- */ + /* postwork for node j */ + /* -------------------------------------------------------------- */ + /* if found, match row i with column j */ + Match [i] = j ; + } + } + return (found) ; +} + + +/* ========================================================================== */ +/* === maxtrans ============================================================= */ +/* ========================================================================== */ + +Int BTF(maxtrans) /* returns # of columns in the matching */ +( + /* --- input --- */ + Int nrow, /* A is nrow-by-ncol in compressed column form */ + Int ncol, + Int Ap [ ], /* size ncol+1 */ + Int Ai [ ], /* size nz = Ap [ncol] */ + double maxwork, /* do at most maxwork*nnz(A) work; no limit if <= 0. This + * work limit excludes the O(nnz(A)) cheap-match phase. */ + + /* --- output --- */ + double *work, /* work = -1 if maxwork > 0 and the total work performed + * reached the maximum of maxwork*nnz(A)). + * Otherwise, work = the total work performed. */ + + Int Match [ ], /* size nrow. Match [i] = j if column j matched to row i */ + + /* --- workspace --- */ + Int Work [ ] /* size 5*ncol */ +) +{ + Int *Cheap, *Flag, *Istack, *Jstack, *Pstack ; + Int i, j, k, nmatch, work_limit_reached, result ; + + /* ---------------------------------------------------------------------- */ + /* get workspace and initialize */ + /* ---------------------------------------------------------------------- */ + + Cheap = Work ; Work += ncol ; + Flag = Work ; Work += ncol ; + + /* stack for non-recursive depth-first search in augment function */ + Istack = Work ; Work += ncol ; + Jstack = Work ; Work += ncol ; + Pstack = Work ; + + /* in column j, rows Ai [Ap [j] .. Cheap [j]-1] are known to be matched */ + for (j = 0 ; j < ncol ; j++) + { + Cheap [j] = Ap [j] ; + Flag [j] = EMPTY ; + } + + /* all rows and columns are currently unmatched */ + for (i = 0 ; i < nrow ; i++) + { + Match [i] = EMPTY ; + } + + if (maxwork > 0) + { + maxwork *= Ap [ncol] ; + } + *work = 0 ; + + /* ---------------------------------------------------------------------- */ + /* find a matching row for each column k */ + /* ---------------------------------------------------------------------- */ + + nmatch = 0 ; + work_limit_reached = FALSE ; + for (k = 0 ; k < ncol ; k++) + { + /* find an augmenting path to match some row i to column k */ + result = augment (k, Ap, Ai, Match, Cheap, Flag, Istack, Jstack, Pstack, + work, maxwork) ; + if (result == TRUE) + { + /* we found it. Match [i] = k for some row i has been done. */ + nmatch++ ; + } + else if (result == EMPTY) + { + /* augment gave up because of too much work, and no match found */ + work_limit_reached = TRUE ; + } + } + + /* ---------------------------------------------------------------------- */ + /* return the Match, and the # of matches made */ + /* ---------------------------------------------------------------------- */ + + /* At this point, row i is matched to j = Match [i] if j >= 0. i is an + * unmatched row if Match [i] == EMPTY. */ + + if (work_limit_reached) + { + /* return -1 if the work limit of maxwork*nnz(A) was reached */ + *work = EMPTY ; + } + + return (nmatch) ; +} diff --git a/src/maths/KLU/btf_order.c b/src/maths/KLU/btf_order.c new file mode 100644 index 000000000..7198d0cdd --- /dev/null +++ b/src/maths/KLU/btf_order.c @@ -0,0 +1,132 @@ +/* ========================================================================== */ +/* === BTF_ORDER ============================================================ */ +/* ========================================================================== */ + +/* Find a permutation P and Q to permute a square sparse matrix into upper block + * triangular form. A(P,Q) will contain a zero-free diagonal if A has + * structural full-rank. Otherwise, the number of nonzeros on the diagonal of + * A(P,Q) will be maximized, and will equal the structural rank of A. + * + * Q[k] will be "flipped" if a zero-free diagonal was not found. Q[k] will be + * negative, and j = BTF_UNFLIP (Q [k]) gives the corresponding permutation. + * + * R defines the block boundaries of A(P,Q). The kth block consists of rows + * and columns R[k] to R[k+1]-1. + * + * If maxwork > 0 on input, then the work performed in btf_maxtrans is limited + * to maxwork*nnz(A) (excluding the "cheap match" phase, which can take another + * nnz(A) work). On output, the work parameter gives the actual work performed, + * or -1 if the limit was reached. In the latter case, the diagonal of A(P,Q) + * might not be zero-free, and the number of nonzeros on the diagonal of A(P,Q) + * might not be equal to the structural rank. + * + * See btf.h for more details. + * + * Copyright (c) 2004-2007. Tim Davis, University of Florida, + * with support from Sandia National Laboratories. All Rights Reserved. + */ + +#include "btf.h" +#include "btf_internal.h" + +/* This function only operates on square matrices (either structurally full- + * rank, or structurally rank deficient). */ + +Int BTF(order) /* returns number of blocks found */ +( + /* input, not modified: */ + Int n, /* A is n-by-n in compressed column form */ + Int Ap [ ], /* size n+1 */ + Int Ai [ ], /* size nz = Ap [n] */ + double maxwork, /* do at most maxwork*nnz(A) work in the maximum + * transversal; no limit if <= 0 */ + + /* output, not defined on input */ + double *work, /* work performed in maxtrans, or -1 if limit reached */ + Int P [ ], /* size n, row permutation */ + Int Q [ ], /* size n, column permutation */ + Int R [ ], /* size n+1. block b is in rows/cols R[b] ... R[b+1]-1 */ + Int *nmatch, /* # nonzeros on diagonal of P*A*Q */ + + /* workspace, not defined on input or output */ + Int Work [ ] /* size 5n */ +) +{ + Int *Flag ; + Int nblocks, i, j, nbadcol ; + + /* ---------------------------------------------------------------------- */ + /* compute the maximum matching */ + /* ---------------------------------------------------------------------- */ + + /* if maxwork > 0, then a maximum matching might not be found */ + + *nmatch = BTF(maxtrans) (n, n, Ap, Ai, maxwork, work, Q, Work) ; + + /* ---------------------------------------------------------------------- */ + /* complete permutation if the matrix is structurally singular */ + /* ---------------------------------------------------------------------- */ + + /* Since the matrix is square, ensure BTF_UNFLIP(Q[0..n-1]) is a + * permutation of the columns of A so that A has as many nonzeros on the + * diagonal as possible. + */ + + if (*nmatch < n) + { + /* get a size-n work array */ + Flag = Work + n ; + for (j = 0 ; j < n ; j++) + { + Flag [j] = 0 ; + } + + /* flag all matched columns */ + for (i = 0 ; i < n ; i++) + { + j = Q [i] ; + if (j != EMPTY) + { + /* row i and column j are matched to each other */ + Flag [j] = 1 ; + } + } + + /* make a list of all unmatched columns, in Work [0..nbadcol-1] */ + nbadcol = 0 ; + for (j = n-1 ; j >= 0 ; j--) + { + if (!Flag [j]) + { + /* j is matched to nobody */ + Work [nbadcol++] = j ; + } + } + ASSERT (*nmatch + nbadcol == n) ; + + /* make an assignment for each unmatched row */ + for (i = 0 ; i < n ; i++) + { + if (Q [i] == EMPTY && nbadcol > 0) + { + /* get an unmatched column j */ + j = Work [--nbadcol] ; + /* assign j to row i and flag the entry by "flipping" it */ + Q [i] = BTF_FLIP (j) ; + } + } + } + + /* The permutation of a square matrix can be recovered as follows: Row i is + * matched with column j, where j = BTF_UNFLIP (Q [i]) and where j + * will always be in the valid range 0 to n-1. The entry A(i,j) is zero + * if BTF_ISFLIPPED (Q [i]) is true, and nonzero otherwise. nmatch + * is the number of entries in the Q array that are non-negative. */ + + /* ---------------------------------------------------------------------- */ + /* find the strongly connected components */ + /* ---------------------------------------------------------------------- */ + + nblocks = BTF(strongcomp) (n, Ap, Ai, Q, P, R, Work) ; + return (nblocks) ; +} diff --git a/src/maths/KLU/btf_strongcomp.c b/src/maths/KLU/btf_strongcomp.c new file mode 100644 index 000000000..aeb555ad8 --- /dev/null +++ b/src/maths/KLU/btf_strongcomp.c @@ -0,0 +1,593 @@ +/* ========================================================================== */ +/* === BTF_STRONGCOMP ======================================================= */ +/* ========================================================================== */ + +/* Finds the strongly connected components of a graph, or equivalently, permutes + * the matrix into upper block triangular form. See btf.h for more details. + * Input matrix and Q are not checked on input. + * + * Copyright (c) 2004-2007. Tim Davis, University of Florida, + * with support from Sandia National Laboratories. All Rights Reserved. + */ + +#include "btf.h" +#include "btf_internal.h" + +#define UNVISITED (-2) /* Flag [j] = UNVISITED if node j not visited yet */ +#define UNASSIGNED (-1) /* Flag [j] = UNASSIGNED if node j has been visited, + * but not yet assigned to a strongly-connected + * component (aka block). Flag [j] = k (k in the + * range 0 to nblocks-1) if node j has been visited + * (and completed, with its postwork done) and + * assigned to component k. */ + +/* This file contains two versions of the depth-first-search, a recursive one + * and a non-recursive one. By default, the non-recursive one is used. */ + +#ifndef RECURSIVE + +/* ========================================================================== */ +/* === dfs: non-recursive version (default) ================================= */ +/* ========================================================================== */ + +/* Perform a depth-first-search of a graph, stored in an adjacency-list form. + * The row indices of column j (equivalently, the out-adjacency list of node j) + * are stored in Ai [Ap[j] ... Ap[j+1]-1]. Self-edge (diagonal entries) are + * ignored. Ap[0] must be zero, and thus nz = Ap[n] is the number of entries + * in the matrix (or edges in the graph). The row indices in each column need + * not be in any particular order. If an input column permutation is given, + * node j (in the permuted matrix A*Q) is located in + * Ai [Ap[Q[j]] ... Ap[Q[j]+1]-1]. This Q can be the same as the Match array + * output from the maxtrans routine, for a square matrix that is structurally + * full rank. + * + * The algorithm is from the paper by Robert E. Tarjan, "Depth-first search and + * linear graph algorithms," SIAM Journal on Computing, vol. 1, no. 2, + * pp. 146-160, 1972. The time taken by strongcomp is O(nnz(A)). + * + * See also MC13A/B in the Harwell subroutine library (Iain S. Duff and John + * K. Reid, "Algorithm 529: permutations to block triangular form," ACM Trans. + * on Mathematical Software, vol. 4, no. 2, pp. 189-192, 1978, and "An + * implementation of Tarjan's algorithm for the block triangular form of a + * matrix," same journal, pp. 137-147. This code is implements the same + * algorithm as MC13A/B, except that the data structures are very different. + * Also, unlike MC13A/B, the output permutation preserves the natural ordering + * within each block. + */ + +static void dfs +( + /* inputs, not modified on output: */ + Int j, /* start the DFS at node j */ + Int Ap [ ], /* size n+1, column pointers for the matrix A */ + Int Ai [ ], /* row indices, size nz = Ap [n] */ + Int Q [ ], /* input column permutation */ + + /* inputs, modified on output (each array is of size n): */ + Int Time [ ], /* Time [j] = "time" that node j was first visited */ + Int Flag [ ], /* Flag [j]: see above */ + Int Low [ ], /* Low [j]: see definition below */ + Int *p_nblocks, /* number of blocks (aka strongly-connected-comp.)*/ + Int *p_timestamp, /* current "time" */ + + /* workspace, not defined on input or output: */ + Int Cstack [ ], /* size n, output stack to hold nodes of components */ + Int Jstack [ ], /* size n, stack for the variable j */ + Int Pstack [ ] /* size n, stack for the variable p */ +) +{ + /* ---------------------------------------------------------------------- */ + /* local variables, and initializations */ + /* ---------------------------------------------------------------------- */ + + /* local variables, but "global" to all DFS levels: */ + Int chead ; /* top of Cstack */ + Int jhead ; /* top of Jstack and Pstack */ + + /* variables that are purely local to any one DFS level: */ + Int i ; /* edge (j,i) considered; i can be next node to traverse */ + Int parent ; /* parent of node j in the DFS tree */ + Int pend ; /* one past the end of the adjacency list for node j */ + Int jj ; /* column j of A*Q is column jj of the input matrix A */ + + /* variables that need to be pushed then popped from the stack: */ + Int p ; /* current index into the adj. list for node j */ + /* the variables j and p are stacked in Jstack and Pstack */ + + /* local copies of variables in the calling routine */ + Int nblocks = *p_nblocks ; + Int timestamp = *p_timestamp ; + + /* ---------------------------------------------------------------------- */ + /* start a DFS at node j (same as the recursive call dfs (EMPTY, j)) */ + /* ---------------------------------------------------------------------- */ + + chead = 0 ; /* component stack is empty */ + jhead = 0 ; /* Jstack and Pstack are empty */ + Jstack [0] = j ; /* put the first node j on the Jstack */ + ASSERT (Flag [j] == UNVISITED) ; + + while (jhead >= 0) + { + j = Jstack [jhead] ; /* grab the node j from the top of Jstack */ + + /* determine which column jj of the A is column j of A*Q */ + jj = (Q == (Int *) NULL) ? (j) : (BTF_UNFLIP (Q [j])) ; + pend = Ap [jj+1] ; /* j's row index list ends at Ai [pend-1] */ + + if (Flag [j] == UNVISITED) + { + + /* -------------------------------------------------------------- */ + /* prework at node j */ + /* -------------------------------------------------------------- */ + + /* node j is being visited for the first time */ + Cstack [++chead] = j ; /* push j onto the stack */ + timestamp++ ; /* get a timestamp */ + Time [j] = timestamp ; /* give the timestamp to node j */ + Low [j] = timestamp ; + Flag [j] = UNASSIGNED ; /* flag node j as visited */ + + /* -------------------------------------------------------------- */ + /* set Pstack [jhead] to the first entry in column j to scan */ + /* -------------------------------------------------------------- */ + + Pstack [jhead] = Ap [jj] ; + } + + /* ------------------------------------------------------------------ */ + /* DFS rooted at node j (start it, or continue where left off) */ + /* ------------------------------------------------------------------ */ + + for (p = Pstack [jhead] ; p < pend ; p++) + { + i = Ai [p] ; /* examine the edge from node j to node i */ + if (Flag [i] == UNVISITED) + { + /* Node i has not been visited - start a DFS at node i. + * Keep track of where we left off in the scan of adjacency list + * of node j so we can restart j where we left off. */ + Pstack [jhead] = p + 1 ; + /* Push i onto the stack and immediately break + * so we can recurse on node i. */ + Jstack [++jhead] = i ; + ASSERT (Time [i] == EMPTY) ; + ASSERT (Low [i] == EMPTY) ; + /* break here to do what the recursive call dfs (j,i) does */ + break ; + } + else if (Flag [i] == UNASSIGNED) + { + /* Node i has been visited, but still unassigned to a block + * this is a back or cross edge if Time [i] < Time [j]. + * Note that i might equal j, in which case this code does + * nothing. */ + ASSERT (Time [i] > 0) ; + ASSERT (Low [i] > 0) ; + Low [j] = MIN (Low [j], Time [i]) ; + } + } + + if (p == pend) + { + /* If all adjacent nodes of j are already visited, pop j from + * Jstack and do the post work for node j. This also pops p + * from the Pstack. */ + jhead-- ; + + /* -------------------------------------------------------------- */ + /* postwork at node j */ + /* -------------------------------------------------------------- */ + + /* determine if node j is the head of a component */ + if (Low [j] == Time [j]) + { + /* pop all nodes in this SCC from Cstack */ + while (TRUE) + { + ASSERT (chead >= 0) ; /* stack not empty (j in it) */ + i = Cstack [chead--] ; /* pop a node from the Cstack */ + ASSERT (i >= 0) ; + ASSERT (Flag [i] == UNASSIGNED) ; + Flag [i] = nblocks ; /* assign i to current block */ + if (i == j) break ; /* current block ends at j */ + } + nblocks++ ; /* one more block has been found */ + } + /* update Low [parent], if the parent exists */ + if (jhead >= 0) + { + parent = Jstack [jhead] ; + Low [parent] = MIN (Low [parent], Low [j]) ; + } + } + } + + /* ---------------------------------------------------------------------- */ + /* cleanup: update timestamp and nblocks */ + /* ---------------------------------------------------------------------- */ + + *p_timestamp = timestamp ; + *p_nblocks = nblocks ; +} + +#else + +/* ========================================================================== */ +/* === dfs: recursive version (only for illustration) ======================= */ +/* ========================================================================== */ + +/* The following is a recursive version of dfs, which computes identical results + * as the non-recursive dfs. It is included here because it is easier to read. + * Compare the comments in the code below with the identical comments in the + * non-recursive code above, and that will help you see the correlation between + * the two routines. + * + * This routine can cause stack overflow, and is thus not recommended for heavy + * usage, particularly for large matrices. To help in delaying stack overflow, + * global variables are used, reducing the amount of information each call to + * dfs places on the call/return stack (the integers i, j, p, parent, and the + * return address). Note that this means the recursive code is not thread-safe. + * To try this version, compile the code with -DRECURSIVE or include the + * following line at the top of this file: + +#define RECURSIVE + + */ + +static Int /* for recursive illustration only, not for production use */ + chead, timestamp, nblocks, n, *Ap, *Ai, *Flag, *Cstack, *Time, *Low, + *P, *R, *Q ; + +static void dfs +( + Int parent, /* came from parent node */ + Int j /* at node j in the DFS */ +) +{ + Int p ; /* current index into the adj. list for node j */ + Int i ; /* edge (j,i) considered; i can be next node to traverse */ + Int jj ; /* column j of A*Q is column jj of the input matrix A */ + + /* ---------------------------------------------------------------------- */ + /* prework at node j */ + /* ---------------------------------------------------------------------- */ + + /* node j is being visited for the first time */ + Cstack [++chead] = j ; /* push j onto the stack */ + timestamp++ ; /* get a timestamp */ + Time [j] = timestamp ; /* give the timestamp to node j */ + Low [j] = timestamp ; + Flag [j] = UNASSIGNED ; /* flag node j as visited */ + + /* ---------------------------------------------------------------------- */ + /* DFS rooted at node j */ + /* ---------------------------------------------------------------------- */ + + /* determine which column jj of the A is column j of A*Q */ + jj = (Q == (Int *) NULL) ? (j) : (BTF_UNFLIP (Q [j])) ; + for (p = Ap [jj] ; p < Ap [jj+1] ; p++) + { + i = Ai [p] ; /* examine the edge from node j to node i */ + if (Flag [i] == UNVISITED) + { + /* Node i has not been visited - start a DFS at node i. */ + dfs (j, i) ; + } + else if (Flag [i] == UNASSIGNED) + { + /* Node i has been visited, but still unassigned to a block + * this is a back or cross edge if Time [i] < Time [j]. + * Note that i might equal j, in which case this code does + * nothing. */ + Low [j] = MIN (Low [j], Time [i]) ; + } + } + + /* ---------------------------------------------------------------------- */ + /* postwork at node j */ + /* ---------------------------------------------------------------------- */ + + /* determine if node j is the head of a component */ + if (Low [j] == Time [j]) + { + /* pop all nodes in this strongly connected component from Cstack */ + while (TRUE) + { + i = Cstack [chead--] ; /* pop a node from the Cstack */ + Flag [i] = nblocks ; /* assign node i to current block */ + if (i == j) break ; /* current block ends at node j */ + } + nblocks++ ; /* one more block has been found */ + } + /* update Low [parent] */ + if (parent != EMPTY) + { + /* Note that this could be done with Low[j] = MIN(Low[j],Low[i]) just + * after the dfs (j,i) statement above, and then parent would not have + * to be an input argument. Putting it here places all the postwork + * for node j in one place, thus making the non-recursive DFS easier. */ + Low [parent] = MIN (Low [parent], Low [j]) ; + } +} + +#endif + +/* ========================================================================== */ +/* === btf_strongcomp ======================================================= */ +/* ========================================================================== */ + +#ifndef RECURSIVE + +Int BTF(strongcomp) /* return # of strongly connected components */ +( + /* input, not modified: */ + Int n, /* A is n-by-n in compressed column form */ + Int Ap [ ], /* size n+1 */ + Int Ai [ ], /* size nz = Ap [n] */ + + /* optional input, modified (if present) on output: */ + Int Q [ ], /* size n, input column permutation. The permutation Q can + * include a flag which indicates an unmatched row. + * jold = BTF_UNFLIP (Q [jnew]) is the permutation; + * this function ingnores these flags. On output, it is + * modified according to the permutation P. */ + + /* output, not defined on input: */ + Int P [ ], /* size n. P [k] = j if row and column j are kth row/col + * in permuted matrix. */ + Int R [ ], /* size n+1. kth block is in rows/cols R[k] ... R[k+1]-1 + * of the permuted matrix. */ + + /* workspace, not defined on input or output: */ + Int Work [ ] /* size 4n */ +) + +#else + +Int BTF(strongcomp) /* recursive version - same as above except for Work size */ +( + Int n_in, + Int Ap_in [ ], + Int Ai_in [ ], + Int Q_in [ ], + Int P_in [ ], + Int R_in [ ], + Int Work [ ] /* size 2n */ +) + +#endif + +{ + Int j, k, b ; + +#ifndef RECURSIVE + Int timestamp, nblocks, *Flag, *Cstack, *Time, *Low, *Jstack, *Pstack ; +#else + n = n_in ; + Ap = Ap_in ; + Ai = Ai_in ; + Q = Q_in ; + P = P_in ; + R = R_in ; + chead = EMPTY ; +#endif + + /* ---------------------------------------------------------------------- */ + /* get and initialize workspace */ + /* ---------------------------------------------------------------------- */ + + /* timestamp is incremented each time a new node is visited. + * + * Time [j] is the timestamp given to node j. + * + * Low [j] is the lowest timestamp of any node reachable from j via either + * a path to any descendent of j in the DFS tree, or via a single edge to + * an either an ancestor (a back edge) or another node that's neither an + * ancestor nor a descendant (a cross edge). If Low [j] is equal to + * the timestamp of node j (Time [j]), then node j is the "head" of a + * strongly connected component (SCC). That is, it is the first node + * visited in its strongly connected component, and the DFS subtree rooted + * at node j spans all the nodes of the strongly connected component. + * + * The term "block" and "component" are used interchangebly in this code; + * "block" being a matrix term and "component" being a graph term for the + * same thing. + * + * When a node is visited, it is placed on the Cstack (for "component" + * stack). When node j is found to be an SCC head, all the nodes from the + * top of the stack to node j itself form the nodes in the SCC. This Cstack + * is used for both the recursive and non-recursive versions. + */ + + Time = Work ; Work += n ; + Flag = Work ; Work += n ; + Low = P ; /* use output array P as workspace for Low */ + Cstack = R ; /* use output array R as workspace for Cstack */ + +#ifndef RECURSIVE + /* stack for non-recursive dfs */ + Jstack = Work ; Work += n ; /* stack for j */ + Pstack = Work ; /* stack for p */ +#endif + + for (j = 0 ; j < n ; j++) + { + Flag [j] = UNVISITED ; + Low [j] = EMPTY ; + Time [j] = EMPTY ; +#ifndef NDEBUG + Cstack [j] = EMPTY ; +#ifndef RECURSIVE + Jstack [j] = EMPTY ; + Pstack [j] = EMPTY ; +#endif +#endif + } + + timestamp = 0 ; /* each node given a timestamp when it is visited */ + nblocks = 0 ; /* number of blocks found so far */ + + /* ---------------------------------------------------------------------- */ + /* find the connected components via a depth-first-search */ + /* ---------------------------------------------------------------------- */ + + for (j = 0 ; j < n ; j++) + { + /* node j is unvisited or assigned to a block. Cstack is empty. */ + ASSERT (Flag [j] == UNVISITED || (Flag [j] >= 0 && Flag [j] < nblocks)); + if (Flag [j] == UNVISITED) + { +#ifndef RECURSIVE + /* non-recursive dfs (default) */ + dfs (j, Ap, Ai, Q, Time, Flag, Low, &nblocks, ×tamp, + Cstack, Jstack, Pstack) ; +#else + /* recursive dfs (for illustration only) */ + ASSERT (chead == EMPTY) ; + dfs (EMPTY, j) ; + ASSERT (chead == EMPTY) ; +#endif + } + } + ASSERT (timestamp == n) ; + + /* ---------------------------------------------------------------------- */ + /* construct the block boundary array, R */ + /* ---------------------------------------------------------------------- */ + + for (b = 0 ; b < nblocks ; b++) + { + R [b] = 0 ; + } + for (j = 0 ; j < n ; j++) + { + /* node j has been assigned to block b = Flag [j] */ + ASSERT (Time [j] > 0 && Time [j] <= n) ; + ASSERT (Low [j] > 0 && Low [j] <= n) ; + ASSERT (Flag [j] >= 0 && Flag [j] < nblocks) ; + R [Flag [j]]++ ; + } + /* R [b] is now the number of nodes in block b. Compute cumulative sum + * of R, using Time [0 ... nblocks-1] as workspace. */ + Time [0] = 0 ; + for (b = 1 ; b < nblocks ; b++) + { + Time [b] = Time [b-1] + R [b-1] ; + } + for (b = 0 ; b < nblocks ; b++) + { + R [b] = Time [b] ; + } + R [nblocks] = n ; + + /* ---------------------------------------------------------------------- */ + /* construct the permutation, preserving the natural order */ + /* ---------------------------------------------------------------------- */ + +#ifndef NDEBUG + for (k = 0 ; k < n ; k++) + { + P [k] = EMPTY ; + } +#endif + + for (j = 0 ; j < n ; j++) + { + /* place column j in the permutation */ + P [Time [Flag [j]]++] = j ; + } + +#ifndef NDEBUG + for (k = 0 ; k < n ; k++) + { + ASSERT (P [k] != EMPTY) ; + } +#endif + + /* Now block b consists of the nodes k1 to k2-1 in the permuted matrix, + * where k1 = R [b] and k2 = R [b+1]. Row and column j of the original + * matrix becomes row and column P [k] of the permuted matrix. The set of + * of rows/columns (nodes) in block b is given by P [k1 ... k2-1], and this + * set is sorted in ascending order. Thus, if the matrix consists of just + * one block, P is the identity permutation. */ + + /* ---------------------------------------------------------------------- */ + /* if Q is present on input, set Q = Q*P' */ + /* ---------------------------------------------------------------------- */ + + if (Q != (Int *) NULL) + { + /* We found a symmetric permutation P for the matrix A*Q. The overall + * permutation is thus P*(A*Q)*P'. Set Q=Q*P' so that the final + * permutation is P*A*Q. Use Time as workspace. Note that this + * preserves the negative values of Q if the matrix is structurally + * singular. */ + for (k = 0 ; k < n ; k++) + { + Time [k] = Q [P [k]] ; + } + for (k = 0 ; k < n ; k++) + { + Q [k] = Time [k] ; + } + } + + /* ---------------------------------------------------------------------- */ + /* how to traverse the permuted matrix */ + /* ---------------------------------------------------------------------- */ + + /* If Q is not present, the following code can be used to traverse the + * permuted matrix P*A*P' + * + * // compute the inverse of P + * for (knew = 0 ; knew < n ; knew++) + * { + * // row and column kold in the old matrix is row/column knew + * // in the permuted matrix P*A*P' + * kold = P [knew] ; + * Pinv [kold] = knew ; + * } + * for (b = 0 ; b < nblocks ; b++) + * { + * // traverse block b of the permuted matrix P*A*P' + * k1 = R [b] ; + * k2 = R [b+1] ; + * nk = k2 - k1 ; + * for (jnew = k1 ; jnew < k2 ; jnew++) + * { + * jold = P [jnew] ; + * for (p = Ap [jold] ; p < Ap [jold+1] ; p++) + * { + * iold = Ai [p] ; + * inew = Pinv [iold] ; + * // Entry in the old matrix is A (iold, jold), and its + * // position in the new matrix P*A*P' is (inew, jnew). + * // Let B be the bth diagonal block of the permuted + * // matrix. If inew >= k1, then this entry is in row/ + * // column (inew-k1, jnew-k1) of the nk-by-nk matrix B. + * // Otherwise, the entry is in the upper block triangular + * // part, not in any diagonal block. + * } + * } + * } + * + * If Q is present replace the above statement + * jold = P [jnew] ; + * with + * jold = Q [jnew] ; + * or + * jold = BTF_UNFLIP (Q [jnew]) ; + * + * then entry A (iold,jold) in the old (unpermuted) matrix is at (inew,jnew) + * in the permuted matrix P*A*Q. Everything else remains the same as the + * above (simply replace P*A*P' with P*A*Q in the above comments). + */ + + /* ---------------------------------------------------------------------- */ + /* return # of blocks / # of strongly connected components */ + /* ---------------------------------------------------------------------- */ + + return (nblocks) ; +} diff --git a/src/maths/KLU/colamd.c b/src/maths/KLU/colamd.c new file mode 100644 index 000000000..5fe20d628 --- /dev/null +++ b/src/maths/KLU/colamd.c @@ -0,0 +1,3611 @@ +/* ========================================================================== */ +/* === colamd/symamd - a sparse matrix column ordering algorithm ============ */ +/* ========================================================================== */ + +/* COLAMD / SYMAMD + + colamd: an approximate minimum degree column ordering algorithm, + for LU factorization of symmetric or unsymmetric matrices, + QR factorization, least squares, interior point methods for + linear programming problems, and other related problems. + + symamd: an approximate minimum degree ordering algorithm for Cholesky + factorization of symmetric matrices. + + Purpose: + + Colamd computes a permutation Q such that the Cholesky factorization of + (AQ)'(AQ) has less fill-in and requires fewer floating point operations + than A'A. This also provides a good ordering for sparse partial + pivoting methods, P(AQ) = LU, where Q is computed prior to numerical + factorization, and P is computed during numerical factorization via + conventional partial pivoting with row interchanges. Colamd is the + column ordering method used in SuperLU, part of the ScaLAPACK library. + It is also available as built-in function in MATLAB Version 6, + available from MathWorks, Inc. (http://www.mathworks.com). This + routine can be used in place of colmmd in MATLAB. + + Symamd computes a permutation P of a symmetric matrix A such that the + Cholesky factorization of PAP' has less fill-in and requires fewer + floating point operations than A. Symamd constructs a matrix M such + that M'M has the same nonzero pattern of A, and then orders the columns + of M using colmmd. The column ordering of M is then returned as the + row and column ordering P of A. + + Authors: + + The authors of the code itself are Stefan I. Larimore and Timothy A. + Davis (davis at cise.ufl.edu), University of Florida. The algorithm was + developed in collaboration with John Gilbert, Xerox PARC, and Esmond + Ng, Oak Ridge National Laboratory. + + Acknowledgements: + + This work was supported by the National Science Foundation, under + grants DMS-9504974 and DMS-9803599. + + Copyright and License: + + Copyright (c) 1998-2007, Timothy A. Davis, All Rights Reserved. + COLAMD is also available under alternate licenses, contact T. Davis + for details. + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + USA + + Permission is hereby granted to use or copy this program under the + terms of the GNU LGPL, provided that the Copyright, this License, + and the Availability of the original version is retained on all copies. + User documentation of any code that uses this code or any modified + version of this code must cite the Copyright, this License, the + Availability note, and "Used by permission." Permission to modify + the code and to distribute modified code is granted, provided the + Copyright, this License, and the Availability note are retained, + and a notice that the code was modified is included. + + Availability: + + The colamd/symamd library is available at + + http://www.cise.ufl.edu/research/sparse/colamd/ + + This is the http://www.cise.ufl.edu/research/sparse/colamd/colamd.c + file. It requires the colamd.h file. It is required by the colamdmex.c + and symamdmex.c files, for the MATLAB interface to colamd and symamd. + Appears as ACM Algorithm 836. + + See the ChangeLog file for changes since Version 1.0. + + References: + + T. A. Davis, J. R. Gilbert, S. Larimore, E. Ng, An approximate column + minimum degree ordering algorithm, ACM Transactions on Mathematical + Software, vol. 30, no. 3., pp. 353-376, 2004. + + T. A. Davis, J. R. Gilbert, S. Larimore, E. Ng, Algorithm 836: COLAMD, + an approximate column minimum degree ordering algorithm, ACM + Transactions on Mathematical Software, vol. 30, no. 3., pp. 377-380, + 2004. + +*/ + +/* ========================================================================== */ +/* === Description of user-callable routines ================================ */ +/* ========================================================================== */ + +/* COLAMD includes both int and UF_long versions of all its routines. The + * description below is for the int version. For UF_long, all int arguments + * become UF_long. UF_long is normally defined as long, except for WIN64. + + ---------------------------------------------------------------------------- + colamd_recommended: + ---------------------------------------------------------------------------- + + C syntax: + + #include "colamd.h" + size_t colamd_recommended (int nnz, int n_row, int n_col) ; + size_t colamd_l_recommended (UF_long nnz, UF_long n_row, + UF_long n_col) ; + + Purpose: + + Returns recommended value of Alen for use by colamd. Returns 0 + if any input argument is negative. The use of this routine + is optional. Not needed for symamd, which dynamically allocates + its own memory. + + Note that in v2.4 and earlier, these routines returned int or long. + They now return a value of type size_t. + + Arguments (all input arguments): + + int nnz ; Number of nonzeros in the matrix A. This must + be the same value as p [n_col] in the call to + colamd - otherwise you will get a wrong value + of the recommended memory to use. + + int n_row ; Number of rows in the matrix A. + + int n_col ; Number of columns in the matrix A. + + ---------------------------------------------------------------------------- + colamd_set_defaults: + ---------------------------------------------------------------------------- + + C syntax: + + #include "colamd.h" + colamd_set_defaults (double knobs [COLAMD_KNOBS]) ; + colamd_l_set_defaults (double knobs [COLAMD_KNOBS]) ; + + Purpose: + + Sets the default parameters. The use of this routine is optional. + + Arguments: + + double knobs [COLAMD_KNOBS] ; Output only. + + NOTE: the meaning of the dense row/col knobs has changed in v2.4 + + knobs [0] and knobs [1] control dense row and col detection: + + Colamd: rows with more than + max (16, knobs [COLAMD_DENSE_ROW] * sqrt (n_col)) + entries are removed prior to ordering. Columns with more than + max (16, knobs [COLAMD_DENSE_COL] * sqrt (MIN (n_row,n_col))) + entries are removed prior to + ordering, and placed last in the output column ordering. + + Symamd: uses only knobs [COLAMD_DENSE_ROW], which is knobs [0]. + Rows and columns with more than + max (16, knobs [COLAMD_DENSE_ROW] * sqrt (n)) + entries are removed prior to ordering, and placed last in the + output ordering. + + COLAMD_DENSE_ROW and COLAMD_DENSE_COL are defined as 0 and 1, + respectively, in colamd.h. Default values of these two knobs + are both 10. Currently, only knobs [0] and knobs [1] are + used, but future versions may use more knobs. If so, they will + be properly set to their defaults by the future version of + colamd_set_defaults, so that the code that calls colamd will + not need to change, assuming that you either use + colamd_set_defaults, or pass a (double *) NULL pointer as the + knobs array to colamd or symamd. + + knobs [2]: aggressive absorption + + knobs [COLAMD_AGGRESSIVE] controls whether or not to do + aggressive absorption during the ordering. Default is TRUE. + + + ---------------------------------------------------------------------------- + colamd: + ---------------------------------------------------------------------------- + + C syntax: + + #include "colamd.h" + int colamd (int n_row, int n_col, int Alen, int *A, int *p, + double knobs [COLAMD_KNOBS], int stats [COLAMD_STATS]) ; + UF_long colamd_l (UF_long n_row, UF_long n_col, UF_long Alen, + UF_long *A, UF_long *p, double knobs [COLAMD_KNOBS], + UF_long stats [COLAMD_STATS]) ; + + Purpose: + + Computes a column ordering (Q) of A such that P(AQ)=LU or + (AQ)'AQ=LL' have less fill-in and require fewer floating point + operations than factorizing the unpermuted matrix A or A'A, + respectively. + + Returns: + + TRUE (1) if successful, FALSE (0) otherwise. + + Arguments: + + int n_row ; Input argument. + + Number of rows in the matrix A. + Restriction: n_row >= 0. + Colamd returns FALSE if n_row is negative. + + int n_col ; Input argument. + + Number of columns in the matrix A. + Restriction: n_col >= 0. + Colamd returns FALSE if n_col is negative. + + int Alen ; Input argument. + + Restriction (see note): + Alen >= 2*nnz + 6*(n_col+1) + 4*(n_row+1) + n_col + Colamd returns FALSE if these conditions are not met. + + Note: this restriction makes an modest assumption regarding + the size of the two typedef's structures in colamd.h. + We do, however, guarantee that + + Alen >= colamd_recommended (nnz, n_row, n_col) + + will be sufficient. Note: the macro version does not check + for integer overflow, and thus is not recommended. Use + the colamd_recommended routine instead. + + int A [Alen] ; Input argument, undefined on output. + + A is an integer array of size Alen. Alen must be at least as + large as the bare minimum value given above, but this is very + low, and can result in excessive run time. For best + performance, we recommend that Alen be greater than or equal to + colamd_recommended (nnz, n_row, n_col), which adds + nnz/5 to the bare minimum value given above. + + On input, the row indices of the entries in column c of the + matrix are held in A [(p [c]) ... (p [c+1]-1)]. The row indices + in a given column c need not be in ascending order, and + duplicate row indices may be be present. However, colamd will + work a little faster if both of these conditions are met + (Colamd puts the matrix into this format, if it finds that the + the conditions are not met). + + The matrix is 0-based. That is, rows are in the range 0 to + n_row-1, and columns are in the range 0 to n_col-1. Colamd + returns FALSE if any row index is out of range. + + The contents of A are modified during ordering, and are + undefined on output. + + int p [n_col+1] ; Both input and output argument. + + p is an integer array of size n_col+1. On input, it holds the + "pointers" for the column form of the matrix A. Column c of + the matrix A is held in A [(p [c]) ... (p [c+1]-1)]. The first + entry, p [0], must be zero, and p [c] <= p [c+1] must hold + for all c in the range 0 to n_col-1. The value p [n_col] is + thus the total number of entries in the pattern of the matrix A. + Colamd returns FALSE if these conditions are not met. + + On output, if colamd returns TRUE, the array p holds the column + permutation (Q, for P(AQ)=LU or (AQ)'(AQ)=LL'), where p [0] is + the first column index in the new ordering, and p [n_col-1] is + the last. That is, p [k] = j means that column j of A is the + kth pivot column, in AQ, where k is in the range 0 to n_col-1 + (p [0] = j means that column j of A is the first column in AQ). + + If colamd returns FALSE, then no permutation is returned, and + p is undefined on output. + + double knobs [COLAMD_KNOBS] ; Input argument. + + See colamd_set_defaults for a description. + + int stats [COLAMD_STATS] ; Output argument. + + Statistics on the ordering, and error status. + See colamd.h for related definitions. + Colamd returns FALSE if stats is not present. + + stats [0]: number of dense or empty rows ignored. + + stats [1]: number of dense or empty columns ignored (and + ordered last in the output permutation p) + Note that a row can become "empty" if it + contains only "dense" and/or "empty" columns, + and similarly a column can become "empty" if it + only contains "dense" and/or "empty" rows. + + stats [2]: number of garbage collections performed. + This can be excessively high if Alen is close + to the minimum required value. + + stats [3]: status code. < 0 is an error code. + > 1 is a warning or notice. + + 0 OK. Each column of the input matrix contained + row indices in increasing order, with no + duplicates. + + 1 OK, but columns of input matrix were jumbled + (unsorted columns or duplicate entries). Colamd + had to do some extra work to sort the matrix + first and remove duplicate entries, but it + still was able to return a valid permutation + (return value of colamd was TRUE). + + stats [4]: highest numbered column that + is unsorted or has duplicate + entries. + stats [5]: last seen duplicate or + unsorted row index. + stats [6]: number of duplicate or + unsorted row indices. + + -1 A is a null pointer + + -2 p is a null pointer + + -3 n_row is negative + + stats [4]: n_row + + -4 n_col is negative + + stats [4]: n_col + + -5 number of nonzeros in matrix is negative + + stats [4]: number of nonzeros, p [n_col] + + -6 p [0] is nonzero + + stats [4]: p [0] + + -7 A is too small + + stats [4]: required size + stats [5]: actual size (Alen) + + -8 a column has a negative number of entries + + stats [4]: column with < 0 entries + stats [5]: number of entries in col + + -9 a row index is out of bounds + + stats [4]: column with bad row index + stats [5]: bad row index + stats [6]: n_row, # of rows of matrx + + -10 (unused; see symamd.c) + + -999 (unused; see symamd.c) + + Future versions may return more statistics in the stats array. + + Example: + + See http://www.cise.ufl.edu/research/sparse/colamd/example.c + for a complete example. + + To order the columns of a 5-by-4 matrix with 11 nonzero entries in + the following nonzero pattern + + x 0 x 0 + x 0 x x + 0 x x 0 + 0 0 x x + x x 0 0 + + with default knobs and no output statistics, do the following: + + #include "colamd.h" + #define ALEN 100 + int A [ALEN] = {0, 1, 4, 2, 4, 0, 1, 2, 3, 1, 3} ; + int p [ ] = {0, 3, 5, 9, 11} ; + int stats [COLAMD_STATS] ; + colamd (5, 4, ALEN, A, p, (double *) NULL, stats) ; + + The permutation is returned in the array p, and A is destroyed. + + ---------------------------------------------------------------------------- + symamd: + ---------------------------------------------------------------------------- + + C syntax: + + #include "colamd.h" + int symamd (int n, int *A, int *p, int *perm, + double knobs [COLAMD_KNOBS], int stats [COLAMD_STATS], + void (*allocate) (size_t, size_t), void (*release) (void *)) ; + UF_long symamd_l (UF_long n, UF_long *A, UF_long *p, UF_long *perm, + double knobs [COLAMD_KNOBS], UF_long stats [COLAMD_STATS], + void (*allocate) (size_t, size_t), void (*release) (void *)) ; + + Purpose: + + The symamd routine computes an ordering P of a symmetric sparse + matrix A such that the Cholesky factorization PAP' = LL' remains + sparse. It is based on a column ordering of a matrix M constructed + so that the nonzero pattern of M'M is the same as A. The matrix A + is assumed to be symmetric; only the strictly lower triangular part + is accessed. You must pass your selected memory allocator (usually + calloc/free or mxCalloc/mxFree) to symamd, for it to allocate + memory for the temporary matrix M. + + Returns: + + TRUE (1) if successful, FALSE (0) otherwise. + + Arguments: + + int n ; Input argument. + + Number of rows and columns in the symmetrix matrix A. + Restriction: n >= 0. + Symamd returns FALSE if n is negative. + + int A [nnz] ; Input argument. + + A is an integer array of size nnz, where nnz = p [n]. + + The row indices of the entries in column c of the matrix are + held in A [(p [c]) ... (p [c+1]-1)]. The row indices in a + given column c need not be in ascending order, and duplicate + row indices may be present. However, symamd will run faster + if the columns are in sorted order with no duplicate entries. + + The matrix is 0-based. That is, rows are in the range 0 to + n-1, and columns are in the range 0 to n-1. Symamd + returns FALSE if any row index is out of range. + + The contents of A are not modified. + + int p [n+1] ; Input argument. + + p is an integer array of size n+1. On input, it holds the + "pointers" for the column form of the matrix A. Column c of + the matrix A is held in A [(p [c]) ... (p [c+1]-1)]. The first + entry, p [0], must be zero, and p [c] <= p [c+1] must hold + for all c in the range 0 to n-1. The value p [n] is + thus the total number of entries in the pattern of the matrix A. + Symamd returns FALSE if these conditions are not met. + + The contents of p are not modified. + + int perm [n+1] ; Output argument. + + On output, if symamd returns TRUE, the array perm holds the + permutation P, where perm [0] is the first index in the new + ordering, and perm [n-1] is the last. That is, perm [k] = j + means that row and column j of A is the kth column in PAP', + where k is in the range 0 to n-1 (perm [0] = j means + that row and column j of A are the first row and column in + PAP'). The array is used as a workspace during the ordering, + which is why it must be of length n+1, not just n. + + double knobs [COLAMD_KNOBS] ; Input argument. + + See colamd_set_defaults for a description. + + int stats [COLAMD_STATS] ; Output argument. + + Statistics on the ordering, and error status. + See colamd.h for related definitions. + Symamd returns FALSE if stats is not present. + + stats [0]: number of dense or empty row and columns ignored + (and ordered last in the output permutation + perm). Note that a row/column can become + "empty" if it contains only "dense" and/or + "empty" columns/rows. + + stats [1]: (same as stats [0]) + + stats [2]: number of garbage collections performed. + + stats [3]: status code. < 0 is an error code. + > 1 is a warning or notice. + + 0 OK. Each column of the input matrix contained + row indices in increasing order, with no + duplicates. + + 1 OK, but columns of input matrix were jumbled + (unsorted columns or duplicate entries). Symamd + had to do some extra work to sort the matrix + first and remove duplicate entries, but it + still was able to return a valid permutation + (return value of symamd was TRUE). + + stats [4]: highest numbered column that + is unsorted or has duplicate + entries. + stats [5]: last seen duplicate or + unsorted row index. + stats [6]: number of duplicate or + unsorted row indices. + + -1 A is a null pointer + + -2 p is a null pointer + + -3 (unused, see colamd.c) + + -4 n is negative + + stats [4]: n + + -5 number of nonzeros in matrix is negative + + stats [4]: # of nonzeros (p [n]). + + -6 p [0] is nonzero + + stats [4]: p [0] + + -7 (unused) + + -8 a column has a negative number of entries + + stats [4]: column with < 0 entries + stats [5]: number of entries in col + + -9 a row index is out of bounds + + stats [4]: column with bad row index + stats [5]: bad row index + stats [6]: n_row, # of rows of matrx + + -10 out of memory (unable to allocate temporary + workspace for M or count arrays using the + "allocate" routine passed into symamd). + + Future versions may return more statistics in the stats array. + + void * (*allocate) (size_t, size_t) + + A pointer to a function providing memory allocation. The + allocated memory must be returned initialized to zero. For a + C application, this argument should normally be a pointer to + calloc. For a MATLAB mexFunction, the routine mxCalloc is + passed instead. + + void (*release) (size_t, size_t) + + A pointer to a function that frees memory allocated by the + memory allocation routine above. For a C application, this + argument should normally be a pointer to free. For a MATLAB + mexFunction, the routine mxFree is passed instead. + + + ---------------------------------------------------------------------------- + colamd_report: + ---------------------------------------------------------------------------- + + C syntax: + + #include "colamd.h" + colamd_report (int stats [COLAMD_STATS]) ; + colamd_l_report (UF_long stats [COLAMD_STATS]) ; + + Purpose: + + Prints the error status and statistics recorded in the stats + array on the standard error output (for a standard C routine) + or on the MATLAB output (for a mexFunction). + + Arguments: + + int stats [COLAMD_STATS] ; Input only. Statistics from colamd. + + + ---------------------------------------------------------------------------- + symamd_report: + ---------------------------------------------------------------------------- + + C syntax: + + #include "colamd.h" + symamd_report (int stats [COLAMD_STATS]) ; + symamd_l_report (UF_long stats [COLAMD_STATS]) ; + + Purpose: + + Prints the error status and statistics recorded in the stats + array on the standard error output (for a standard C routine) + or on the MATLAB output (for a mexFunction). + + Arguments: + + int stats [COLAMD_STATS] ; Input only. Statistics from symamd. + + +*/ + +/* ========================================================================== */ +/* === Scaffolding code definitions ======================================== */ +/* ========================================================================== */ + +/* Ensure that debugging is turned off: */ +#ifndef NDEBUG +#define NDEBUG +#endif + +/* turn on debugging by uncommenting the following line + #undef NDEBUG +*/ + +/* + Our "scaffolding code" philosophy: In our opinion, well-written library + code should keep its "debugging" code, and just normally have it turned off + by the compiler so as not to interfere with performance. This serves + several purposes: + + (1) assertions act as comments to the reader, telling you what the code + expects at that point. All assertions will always be true (unless + there really is a bug, of course). + + (2) leaving in the scaffolding code assists anyone who would like to modify + the code, or understand the algorithm (by reading the debugging output, + one can get a glimpse into what the code is doing). + + (3) (gasp!) for actually finding bugs. This code has been heavily tested + and "should" be fully functional and bug-free ... but you never know... + + The code will become outrageously slow when debugging is + enabled. To control the level of debugging output, set an environment + variable D to 0 (little), 1 (some), 2, 3, or 4 (lots). When debugging, + you should see the following message on the standard output: + + colamd: debug version, D = 1 (THIS WILL BE SLOW!) + + or a similar message for symamd. If you don't, then debugging has not + been enabled. + +*/ + +/* ========================================================================== */ +/* === Include files ======================================================== */ +/* ========================================================================== */ + +#include "colamd.h" +#include +#include + +#ifdef MATLAB_MEX_FILE +#include "mex.h" +#include "matrix.h" +#endif /* MATLAB_MEX_FILE */ + +#if !defined (NPRINT) || !defined (NDEBUG) +#include +#endif + +#ifndef NULL +#define NULL ((void *) 0) +#endif + +/* ========================================================================== */ +/* === int or UF_long ======================================================= */ +/* ========================================================================== */ + +/* define UF_long */ +#include "UFconfig.h" + +#ifdef DLONG + +#define Int UF_long +#define ID UF_long_id +#define Int_MAX UF_long_max + +#define COLAMD_recommended colamd_l_recommended +#define COLAMD_set_defaults colamd_l_set_defaults +#define COLAMD_MAIN colamd_l +#define SYMAMD_MAIN symamd_l +#define COLAMD_report colamd_l_report +#define SYMAMD_report symamd_l_report + +#else + +#define Int int +#define ID "%d" +#define Int_MAX INT_MAX + +#define COLAMD_recommended colamd_recommended +#define COLAMD_set_defaults colamd_set_defaults +#define COLAMD_MAIN colamd +#define SYMAMD_MAIN symamd +#define COLAMD_report colamd_report +#define SYMAMD_report symamd_report + +#endif + +/* ========================================================================== */ +/* === Row and Column structures ============================================ */ +/* ========================================================================== */ + +/* User code that makes use of the colamd/symamd routines need not directly */ +/* reference these structures. They are used only for colamd_recommended. */ + +typedef struct Colamd_Col_struct +{ + Int start ; /* index for A of first row in this column, or DEAD */ + /* if column is dead */ + Int length ; /* number of rows in this column */ + union + { + Int thickness ; /* number of original columns represented by this */ + /* col, if the column is alive */ + Int parent ; /* parent in parent tree super-column structure, if */ + /* the column is dead */ + } shared1 ; + union + { + Int score ; /* the score used to maintain heap, if col is alive */ + Int order ; /* pivot ordering of this column, if col is dead */ + } shared2 ; + union + { + Int headhash ; /* head of a hash bucket, if col is at the head of */ + /* a degree list */ + Int hash ; /* hash value, if col is not in a degree list */ + Int prev ; /* previous column in degree list, if col is in a */ + /* degree list (but not at the head of a degree list) */ + } shared3 ; + union + { + Int degree_next ; /* next column, if col is in a degree list */ + Int hash_next ; /* next column, if col is in a hash list */ + } shared4 ; + +} Colamd_Col ; + +typedef struct Colamd_Row_struct +{ + Int start ; /* index for A of first col in this row */ + Int length ; /* number of principal columns in this row */ + union + { + Int degree ; /* number of principal & non-principal columns in row */ + Int p ; /* used as a row pointer in init_rows_cols () */ + } shared1 ; + union + { + Int mark ; /* for computing set differences and marking dead rows*/ + Int first_column ;/* first column in row (used in garbage collection) */ + } shared2 ; + +} Colamd_Row ; + +/* ========================================================================== */ +/* === Definitions ========================================================== */ +/* ========================================================================== */ + +/* Routines are either PUBLIC (user-callable) or PRIVATE (not user-callable) */ +#define PUBLIC +#define PRIVATE static + +#define DENSE_DEGREE(alpha,n) \ + ((Int) MAX (16.0, (alpha) * sqrt ((double) (n)))) + +#define MAX(a,b) (((a) > (b)) ? (a) : (b)) +#define MIN(a,b) (((a) < (b)) ? (a) : (b)) + +#define ONES_COMPLEMENT(r) (-(r)-1) + +/* -------------------------------------------------------------------------- */ +/* Change for version 2.1: define TRUE and FALSE only if not yet defined */ +/* -------------------------------------------------------------------------- */ + +#ifndef TRUE +#define TRUE (1) +#endif + +#ifndef FALSE +#define FALSE (0) +#endif + +/* -------------------------------------------------------------------------- */ + +#define EMPTY (-1) + +/* Row and column status */ +#define ALIVE (0) +#define DEAD (-1) + +/* Column status */ +#define DEAD_PRINCIPAL (-1) +#define DEAD_NON_PRINCIPAL (-2) + +/* Macros for row and column status update and checking. */ +#define ROW_IS_DEAD(r) ROW_IS_MARKED_DEAD (Row[r].shared2.mark) +#define ROW_IS_MARKED_DEAD(row_mark) (row_mark < ALIVE) +#define ROW_IS_ALIVE(r) (Row [r].shared2.mark >= ALIVE) +#define COL_IS_DEAD(c) (Col [c].start < ALIVE) +#define COL_IS_ALIVE(c) (Col [c].start >= ALIVE) +#define COL_IS_DEAD_PRINCIPAL(c) (Col [c].start == DEAD_PRINCIPAL) +#define KILL_ROW(r) { Row [r].shared2.mark = DEAD ; } +#define KILL_PRINCIPAL_COL(c) { Col [c].start = DEAD_PRINCIPAL ; } +#define KILL_NON_PRINCIPAL_COL(c) { Col [c].start = DEAD_NON_PRINCIPAL ; } + +/* ========================================================================== */ +/* === Colamd reporting mechanism =========================================== */ +/* ========================================================================== */ + +#if defined (MATLAB_MEX_FILE) || defined (MATHWORKS) +/* In MATLAB, matrices are 1-based to the user, but 0-based internally */ +#define INDEX(i) ((i)+1) +#else +/* In C, matrices are 0-based and indices are reported as such in *_report */ +#define INDEX(i) (i) +#endif + +/* All output goes through the PRINTF macro. */ +#define PRINTF(params) { if (colamd_printf != NULL) (void) colamd_printf params ; } + +/* ========================================================================== */ +/* === Prototypes of PRIVATE routines ======================================= */ +/* ========================================================================== */ + +PRIVATE Int init_rows_cols +( + Int n_row, + Int n_col, + Colamd_Row Row [], + Colamd_Col Col [], + Int A [], + Int p [], + Int stats [COLAMD_STATS] +) ; + +PRIVATE void init_scoring +( + Int n_row, + Int n_col, + Colamd_Row Row [], + Colamd_Col Col [], + Int A [], + Int head [], + double knobs [COLAMD_KNOBS], + Int *p_n_row2, + Int *p_n_col2, + Int *p_max_deg +) ; + +PRIVATE Int find_ordering +( + Int n_row, + Int n_col, + Int Alen, + Colamd_Row Row [], + Colamd_Col Col [], + Int A [], + Int head [], + Int n_col2, + Int max_deg, + Int pfree, + Int aggressive +) ; + +PRIVATE void order_children +( + Int n_col, + Colamd_Col Col [], + Int p [] +) ; + +PRIVATE void detect_super_cols +( + +#ifndef NDEBUG + Int n_col, + Colamd_Row Row [], +#endif /* NDEBUG */ + + Colamd_Col Col [], + Int A [], + Int head [], + Int row_start, + Int row_length +) ; + +PRIVATE Int garbage_collection +( + Int n_row, + Int n_col, + Colamd_Row Row [], + Colamd_Col Col [], + Int A [], + Int *pfree +) ; + +PRIVATE Int clear_mark +( + Int tag_mark, + Int max_mark, + Int n_row, + Colamd_Row Row [] +) ; + +PRIVATE void print_report +( + char *method, + Int stats [COLAMD_STATS] +) ; + +/* ========================================================================== */ +/* === Debugging prototypes and definitions ================================= */ +/* ========================================================================== */ + +#ifndef NDEBUG + +#include + +/* colamd_debug is the *ONLY* global variable, and is only */ +/* present when debugging */ + +PRIVATE Int colamd_debug = 0 ; /* debug print level */ + +#define DEBUG0(params) { PRINTF (params) ; } +#define DEBUG1(params) { if (colamd_debug >= 1) PRINTF (params) ; } +#define DEBUG2(params) { if (colamd_debug >= 2) PRINTF (params) ; } +#define DEBUG3(params) { if (colamd_debug >= 3) PRINTF (params) ; } +#define DEBUG4(params) { if (colamd_debug >= 4) PRINTF (params) ; } + +#ifdef MATLAB_MEX_FILE +#define ASSERT(expression) (mxAssert ((expression), "")) +#else +#define ASSERT(expression) (assert (expression)) +#endif /* MATLAB_MEX_FILE */ + +PRIVATE void colamd_get_debug /* gets the debug print level from getenv */ +( + char *method +) ; + +PRIVATE void debug_deg_lists +( + Int n_row, + Int n_col, + Colamd_Row Row [], + Colamd_Col Col [], + Int head [], + Int min_score, + Int should, + Int max_deg +) ; + +PRIVATE void debug_mark +( + Int n_row, + Colamd_Row Row [], + Int tag_mark, + Int max_mark +) ; + +PRIVATE void debug_matrix +( + Int n_row, + Int n_col, + Colamd_Row Row [], + Colamd_Col Col [], + Int A [] +) ; + +PRIVATE void debug_structures +( + Int n_row, + Int n_col, + Colamd_Row Row [], + Colamd_Col Col [], + Int A [], + Int n_col2 +) ; + +#else /* NDEBUG */ + +/* === No debugging ========================================================= */ + +#define DEBUG0(params) ; +#define DEBUG1(params) ; +#define DEBUG2(params) ; +#define DEBUG3(params) ; +#define DEBUG4(params) ; + +#define ASSERT(expression) + +#endif /* NDEBUG */ + +/* ========================================================================== */ +/* === USER-CALLABLE ROUTINES: ============================================== */ +/* ========================================================================== */ + +/* ========================================================================== */ +/* === colamd_recommended =================================================== */ +/* ========================================================================== */ + +/* + The colamd_recommended routine returns the suggested size for Alen. This + value has been determined to provide good balance between the number of + garbage collections and the memory requirements for colamd. If any + argument is negative, or if integer overflow occurs, a 0 is returned as an + error condition. 2*nnz space is required for the row and column + indices of the matrix. COLAMD_C (n_col) + COLAMD_R (n_row) space is + required for the Col and Row arrays, respectively, which are internal to + colamd (roughly 6*n_col + 4*n_row). An additional n_col space is the + minimal amount of "elbow room", and nnz/5 more space is recommended for + run time efficiency. + + Alen is approximately 2.2*nnz + 7*n_col + 4*n_row + 10. + + This function is not needed when using symamd. +*/ + +/* add two values of type size_t, and check for integer overflow */ +static size_t t_add (size_t a, size_t b, int *ok) +{ + (*ok) = (*ok) && ((a + b) >= MAX (a,b)) ; + return ((*ok) ? (a + b) : 0) ; +} + +/* compute a*k where k is a small integer, and check for integer overflow */ +static size_t t_mult (size_t a, size_t k, int *ok) +{ + size_t i, s = 0 ; + for (i = 0 ; i < k ; i++) + { + s = t_add (s, a, ok) ; + } + return (s) ; +} + +/* size of the Col and Row structures */ +#define COLAMD_C(n_col,ok) \ + ((t_mult (t_add (n_col, 1, ok), sizeof (Colamd_Col), ok) / sizeof (Int))) + +#define COLAMD_R(n_row,ok) \ + ((t_mult (t_add (n_row, 1, ok), sizeof (Colamd_Row), ok) / sizeof (Int))) + + +PUBLIC size_t COLAMD_recommended /* returns recommended value of Alen. */ +( + /* === Parameters ======================================================= */ + + Int nnz, /* number of nonzeros in A */ + Int n_row, /* number of rows in A */ + Int n_col /* number of columns in A */ +) +{ + size_t s, c, r ; + int ok = TRUE ; + if (nnz < 0 || n_row < 0 || n_col < 0) + { + return (0) ; + } + s = t_mult (nnz, 2, &ok) ; /* 2*nnz */ + c = COLAMD_C (n_col, &ok) ; /* size of column structures */ + r = COLAMD_R (n_row, &ok) ; /* size of row structures */ + s = t_add (s, c, &ok) ; + s = t_add (s, r, &ok) ; + s = t_add (s, n_col, &ok) ; /* elbow room */ + s = t_add (s, nnz/5, &ok) ; /* elbow room */ + ok = ok && (s < Int_MAX) ; + return (ok ? s : 0) ; +} + + +/* ========================================================================== */ +/* === colamd_set_defaults ================================================== */ +/* ========================================================================== */ + +/* + The colamd_set_defaults routine sets the default values of the user- + controllable parameters for colamd and symamd: + + Colamd: rows with more than max (16, knobs [0] * sqrt (n_col)) + entries are removed prior to ordering. Columns with more than + max (16, knobs [1] * sqrt (MIN (n_row,n_col))) entries are removed + prior to ordering, and placed last in the output column ordering. + + Symamd: Rows and columns with more than max (16, knobs [0] * sqrt (n)) + entries are removed prior to ordering, and placed last in the + output ordering. + + knobs [0] dense row control + + knobs [1] dense column control + + knobs [2] if nonzero, do aggresive absorption + + knobs [3..19] unused, but future versions might use this + +*/ + +PUBLIC void COLAMD_set_defaults +( + /* === Parameters ======================================================= */ + + double knobs [COLAMD_KNOBS] /* knob array */ +) +{ + /* === Local variables ================================================== */ + + Int i ; + + if (!knobs) + { + return ; /* no knobs to initialize */ + } + for (i = 0 ; i < COLAMD_KNOBS ; i++) + { + knobs [i] = 0 ; + } + knobs [COLAMD_DENSE_ROW] = 10 ; + knobs [COLAMD_DENSE_COL] = 10 ; + knobs [COLAMD_AGGRESSIVE] = TRUE ; /* default: do aggressive absorption*/ +} + + +/* ========================================================================== */ +/* === symamd =============================================================== */ +/* ========================================================================== */ + +PUBLIC Int SYMAMD_MAIN /* return TRUE if OK, FALSE otherwise */ +( + /* === Parameters ======================================================= */ + + Int n, /* number of rows and columns of A */ + Int A [], /* row indices of A */ + Int p [], /* column pointers of A */ + Int perm [], /* output permutation, size n+1 */ + double knobs [COLAMD_KNOBS], /* parameters (uses defaults if NULL) */ + Int stats [COLAMD_STATS], /* output statistics and error codes */ + void * (*allocate) (size_t, size_t), + /* pointer to calloc (ANSI C) or */ + /* mxCalloc (for MATLAB mexFunction) */ + void (*release) (void *) + /* pointer to free (ANSI C) or */ + /* mxFree (for MATLAB mexFunction) */ +) +{ + /* === Local variables ================================================== */ + + Int *count ; /* length of each column of M, and col pointer*/ + Int *mark ; /* mark array for finding duplicate entries */ + Int *M ; /* row indices of matrix M */ + size_t Mlen ; /* length of M */ + Int n_row ; /* number of rows in M */ + Int nnz ; /* number of entries in A */ + Int i ; /* row index of A */ + Int j ; /* column index of A */ + Int k ; /* row index of M */ + Int mnz ; /* number of nonzeros in M */ + Int pp ; /* index into a column of A */ + Int last_row ; /* last row seen in the current column */ + Int length ; /* number of nonzeros in a column */ + + double cknobs [COLAMD_KNOBS] ; /* knobs for colamd */ + double default_knobs [COLAMD_KNOBS] ; /* default knobs for colamd */ + +#ifndef NDEBUG + colamd_get_debug ("symamd") ; +#endif /* NDEBUG */ + + /* === Check the input arguments ======================================== */ + + if (!stats) + { + DEBUG0 (("symamd: stats not present\n")) ; + return (FALSE) ; + } + for (i = 0 ; i < COLAMD_STATS ; i++) + { + stats [i] = 0 ; + } + stats [COLAMD_STATUS] = COLAMD_OK ; + stats [COLAMD_INFO1] = -1 ; + stats [COLAMD_INFO2] = -1 ; + + if (!A) + { + stats [COLAMD_STATUS] = COLAMD_ERROR_A_not_present ; + DEBUG0 (("symamd: A not present\n")) ; + return (FALSE) ; + } + + if (!p) /* p is not present */ + { + stats [COLAMD_STATUS] = COLAMD_ERROR_p_not_present ; + DEBUG0 (("symamd: p not present\n")) ; + return (FALSE) ; + } + + if (n < 0) /* n must be >= 0 */ + { + stats [COLAMD_STATUS] = COLAMD_ERROR_ncol_negative ; + stats [COLAMD_INFO1] = n ; + DEBUG0 (("symamd: n negative %d\n", n)) ; + return (FALSE) ; + } + + nnz = p [n] ; + if (nnz < 0) /* nnz must be >= 0 */ + { + stats [COLAMD_STATUS] = COLAMD_ERROR_nnz_negative ; + stats [COLAMD_INFO1] = nnz ; + DEBUG0 (("symamd: number of entries negative %d\n", nnz)) ; + return (FALSE) ; + } + + if (p [0] != 0) + { + stats [COLAMD_STATUS] = COLAMD_ERROR_p0_nonzero ; + stats [COLAMD_INFO1] = p [0] ; + DEBUG0 (("symamd: p[0] not zero %d\n", p [0])) ; + return (FALSE) ; + } + + /* === If no knobs, set default knobs =================================== */ + + if (!knobs) + { + COLAMD_set_defaults (default_knobs) ; + knobs = default_knobs ; + } + + /* === Allocate count and mark ========================================== */ + + count = (Int *) ((*allocate) (n+1, sizeof (Int))) ; + if (!count) + { + stats [COLAMD_STATUS] = COLAMD_ERROR_out_of_memory ; + DEBUG0 (("symamd: allocate count (size %d) failed\n", n+1)) ; + return (FALSE) ; + } + + mark = (Int *) ((*allocate) (n+1, sizeof (Int))) ; + if (!mark) + { + stats [COLAMD_STATUS] = COLAMD_ERROR_out_of_memory ; + (*release) ((void *) count) ; + DEBUG0 (("symamd: allocate mark (size %d) failed\n", n+1)) ; + return (FALSE) ; + } + + /* === Compute column counts of M, check if A is valid ================== */ + + stats [COLAMD_INFO3] = 0 ; /* number of duplicate or unsorted row indices*/ + + for (i = 0 ; i < n ; i++) + { + mark [i] = -1 ; + } + + for (j = 0 ; j < n ; j++) + { + last_row = -1 ; + + length = p [j+1] - p [j] ; + if (length < 0) + { + /* column pointers must be non-decreasing */ + stats [COLAMD_STATUS] = COLAMD_ERROR_col_length_negative ; + stats [COLAMD_INFO1] = j ; + stats [COLAMD_INFO2] = length ; + (*release) ((void *) count) ; + (*release) ((void *) mark) ; + DEBUG0 (("symamd: col %d negative length %d\n", j, length)) ; + return (FALSE) ; + } + + for (pp = p [j] ; pp < p [j+1] ; pp++) + { + i = A [pp] ; + if (i < 0 || i >= n) + { + /* row index i, in column j, is out of bounds */ + stats [COLAMD_STATUS] = COLAMD_ERROR_row_index_out_of_bounds ; + stats [COLAMD_INFO1] = j ; + stats [COLAMD_INFO2] = i ; + stats [COLAMD_INFO3] = n ; + (*release) ((void *) count) ; + (*release) ((void *) mark) ; + DEBUG0 (("symamd: row %d col %d out of bounds\n", i, j)) ; + return (FALSE) ; + } + + if (i <= last_row || mark [i] == j) + { + /* row index is unsorted or repeated (or both), thus col */ + /* is jumbled. This is a notice, not an error condition. */ + stats [COLAMD_STATUS] = COLAMD_OK_BUT_JUMBLED ; + stats [COLAMD_INFO1] = j ; + stats [COLAMD_INFO2] = i ; + (stats [COLAMD_INFO3]) ++ ; + DEBUG1 (("symamd: row %d col %d unsorted/duplicate\n", i, j)) ; + } + + if (i > j && mark [i] != j) + { + /* row k of M will contain column indices i and j */ + count [i]++ ; + count [j]++ ; + } + + /* mark the row as having been seen in this column */ + mark [i] = j ; + + last_row = i ; + } + } + + /* v2.4: removed free(mark) */ + + /* === Compute column pointers of M ===================================== */ + + /* use output permutation, perm, for column pointers of M */ + perm [0] = 0 ; + for (j = 1 ; j <= n ; j++) + { + perm [j] = perm [j-1] + count [j-1] ; + } + for (j = 0 ; j < n ; j++) + { + count [j] = perm [j] ; + } + + /* === Construct M ====================================================== */ + + mnz = perm [n] ; + n_row = mnz / 2 ; + Mlen = COLAMD_recommended (mnz, n_row, n) ; + M = (Int *) ((*allocate) (Mlen, sizeof (Int))) ; + DEBUG0 (("symamd: M is %d-by-%d with %d entries, Mlen = %g\n", + n_row, n, mnz, (double) Mlen)) ; + + if (!M) + { + stats [COLAMD_STATUS] = COLAMD_ERROR_out_of_memory ; + (*release) ((void *) count) ; + (*release) ((void *) mark) ; + DEBUG0 (("symamd: allocate M (size %g) failed\n", (double) Mlen)) ; + return (FALSE) ; + } + + k = 0 ; + + if (stats [COLAMD_STATUS] == COLAMD_OK) + { + /* Matrix is OK */ + for (j = 0 ; j < n ; j++) + { + ASSERT (p [j+1] - p [j] >= 0) ; + for (pp = p [j] ; pp < p [j+1] ; pp++) + { + i = A [pp] ; + ASSERT (i >= 0 && i < n) ; + if (i > j) + { + /* row k of M contains column indices i and j */ + M [count [i]++] = k ; + M [count [j]++] = k ; + k++ ; + } + } + } + } + else + { + /* Matrix is jumbled. Do not add duplicates to M. Unsorted cols OK. */ + DEBUG0 (("symamd: Duplicates in A.\n")) ; + for (i = 0 ; i < n ; i++) + { + mark [i] = -1 ; + } + for (j = 0 ; j < n ; j++) + { + ASSERT (p [j+1] - p [j] >= 0) ; + for (pp = p [j] ; pp < p [j+1] ; pp++) + { + i = A [pp] ; + ASSERT (i >= 0 && i < n) ; + if (i > j && mark [i] != j) + { + /* row k of M contains column indices i and j */ + M [count [i]++] = k ; + M [count [j]++] = k ; + k++ ; + mark [i] = j ; + } + } + } + /* v2.4: free(mark) moved below */ + } + + /* count and mark no longer needed */ + (*release) ((void *) count) ; + (*release) ((void *) mark) ; /* v2.4: free (mark) moved here */ + ASSERT (k == n_row) ; + + /* === Adjust the knobs for M =========================================== */ + + for (i = 0 ; i < COLAMD_KNOBS ; i++) + { + cknobs [i] = knobs [i] ; + } + + /* there are no dense rows in M */ + cknobs [COLAMD_DENSE_ROW] = -1 ; + cknobs [COLAMD_DENSE_COL] = knobs [COLAMD_DENSE_ROW] ; + + /* === Order the columns of M =========================================== */ + + /* v2.4: colamd cannot fail here, so the error check is removed */ + (void) COLAMD_MAIN (n_row, n, (Int) Mlen, M, perm, cknobs, stats) ; + + /* Note that the output permutation is now in perm */ + + /* === get the statistics for symamd from colamd ======================== */ + + /* a dense column in colamd means a dense row and col in symamd */ + stats [COLAMD_DENSE_ROW] = stats [COLAMD_DENSE_COL] ; + + /* === Free M =========================================================== */ + + (*release) ((void *) M) ; + DEBUG0 (("symamd: done.\n")) ; + return (TRUE) ; + +} + +/* ========================================================================== */ +/* === colamd =============================================================== */ +/* ========================================================================== */ + +/* + The colamd routine computes a column ordering Q of a sparse matrix + A such that the LU factorization P(AQ) = LU remains sparse, where P is + selected via partial pivoting. The routine can also be viewed as + providing a permutation Q such that the Cholesky factorization + (AQ)'(AQ) = LL' remains sparse. +*/ + +PUBLIC Int COLAMD_MAIN /* returns TRUE if successful, FALSE otherwise*/ +( + /* === Parameters ======================================================= */ + + Int n_row, /* number of rows in A */ + Int n_col, /* number of columns in A */ + Int Alen, /* length of A */ + Int A [], /* row indices of A */ + Int p [], /* pointers to columns in A */ + double knobs [COLAMD_KNOBS],/* parameters (uses defaults if NULL) */ + Int stats [COLAMD_STATS] /* output statistics and error codes */ +) +{ + /* === Local variables ================================================== */ + + Int i ; /* loop index */ + Int nnz ; /* nonzeros in A */ + size_t Row_size ; /* size of Row [], in integers */ + size_t Col_size ; /* size of Col [], in integers */ + size_t need ; /* minimum required length of A */ + Colamd_Row *Row ; /* pointer into A of Row [0..n_row] array */ + Colamd_Col *Col ; /* pointer into A of Col [0..n_col] array */ + Int n_col2 ; /* number of non-dense, non-empty columns */ + Int n_row2 ; /* number of non-dense, non-empty rows */ + Int ngarbage ; /* number of garbage collections performed */ + Int max_deg ; /* maximum row degree */ + double default_knobs [COLAMD_KNOBS] ; /* default knobs array */ + Int aggressive ; /* do aggressive absorption */ + int ok ; + +#ifndef NDEBUG + colamd_get_debug ("colamd") ; +#endif /* NDEBUG */ + + /* === Check the input arguments ======================================== */ + + if (!stats) + { + DEBUG0 (("colamd: stats not present\n")) ; + return (FALSE) ; + } + for (i = 0 ; i < COLAMD_STATS ; i++) + { + stats [i] = 0 ; + } + stats [COLAMD_STATUS] = COLAMD_OK ; + stats [COLAMD_INFO1] = -1 ; + stats [COLAMD_INFO2] = -1 ; + + if (!A) /* A is not present */ + { + stats [COLAMD_STATUS] = COLAMD_ERROR_A_not_present ; + DEBUG0 (("colamd: A not present\n")) ; + return (FALSE) ; + } + + if (!p) /* p is not present */ + { + stats [COLAMD_STATUS] = COLAMD_ERROR_p_not_present ; + DEBUG0 (("colamd: p not present\n")) ; + return (FALSE) ; + } + + if (n_row < 0) /* n_row must be >= 0 */ + { + stats [COLAMD_STATUS] = COLAMD_ERROR_nrow_negative ; + stats [COLAMD_INFO1] = n_row ; + DEBUG0 (("colamd: nrow negative %d\n", n_row)) ; + return (FALSE) ; + } + + if (n_col < 0) /* n_col must be >= 0 */ + { + stats [COLAMD_STATUS] = COLAMD_ERROR_ncol_negative ; + stats [COLAMD_INFO1] = n_col ; + DEBUG0 (("colamd: ncol negative %d\n", n_col)) ; + return (FALSE) ; + } + + nnz = p [n_col] ; + if (nnz < 0) /* nnz must be >= 0 */ + { + stats [COLAMD_STATUS] = COLAMD_ERROR_nnz_negative ; + stats [COLAMD_INFO1] = nnz ; + DEBUG0 (("colamd: number of entries negative %d\n", nnz)) ; + return (FALSE) ; + } + + if (p [0] != 0) + { + stats [COLAMD_STATUS] = COLAMD_ERROR_p0_nonzero ; + stats [COLAMD_INFO1] = p [0] ; + DEBUG0 (("colamd: p[0] not zero %d\n", p [0])) ; + return (FALSE) ; + } + + /* === If no knobs, set default knobs =================================== */ + + if (!knobs) + { + COLAMD_set_defaults (default_knobs) ; + knobs = default_knobs ; + } + + aggressive = (knobs [COLAMD_AGGRESSIVE] != FALSE) ; + + /* === Allocate the Row and Col arrays from array A ===================== */ + + ok = TRUE ; + Col_size = COLAMD_C (n_col, &ok) ; /* size of Col array of structs */ + Row_size = COLAMD_R (n_row, &ok) ; /* size of Row array of structs */ + + /* need = 2*nnz + n_col + Col_size + Row_size ; */ + need = t_mult (nnz, 2, &ok) ; + need = t_add (need, n_col, &ok) ; + need = t_add (need, Col_size, &ok) ; + need = t_add (need, Row_size, &ok) ; + + if (!ok || need > (size_t) Alen || need > Int_MAX) + { + /* not enough space in array A to perform the ordering */ + stats [COLAMD_STATUS] = COLAMD_ERROR_A_too_small ; + stats [COLAMD_INFO1] = need ; + stats [COLAMD_INFO2] = Alen ; + DEBUG0 (("colamd: Need Alen >= %d, given only Alen = %d\n", need,Alen)); + return (FALSE) ; + } + + Alen -= Col_size + Row_size ; + Col = (Colamd_Col *) &A [Alen] ; + Row = (Colamd_Row *) &A [Alen + Col_size] ; + + /* === Construct the row and column data structures ===================== */ + + if (!init_rows_cols (n_row, n_col, Row, Col, A, p, stats)) + { + /* input matrix is invalid */ + DEBUG0 (("colamd: Matrix invalid\n")) ; + return (FALSE) ; + } + + /* === Initialize scores, kill dense rows/columns ======================= */ + + init_scoring (n_row, n_col, Row, Col, A, p, knobs, + &n_row2, &n_col2, &max_deg) ; + + /* === Order the supercolumns =========================================== */ + + ngarbage = find_ordering (n_row, n_col, Alen, Row, Col, A, p, + n_col2, max_deg, 2*nnz, aggressive) ; + + /* === Order the non-principal columns ================================== */ + + order_children (n_col, Col, p) ; + + /* === Return statistics in stats ======================================= */ + + stats [COLAMD_DENSE_ROW] = n_row - n_row2 ; + stats [COLAMD_DENSE_COL] = n_col - n_col2 ; + stats [COLAMD_DEFRAG_COUNT] = ngarbage ; + DEBUG0 (("colamd: done.\n")) ; + return (TRUE) ; +} + + +/* ========================================================================== */ +/* === colamd_report ======================================================== */ +/* ========================================================================== */ + +PUBLIC void COLAMD_report +( + Int stats [COLAMD_STATS] +) +{ + print_report ("colamd", stats) ; +} + + +/* ========================================================================== */ +/* === symamd_report ======================================================== */ +/* ========================================================================== */ + +PUBLIC void SYMAMD_report +( + Int stats [COLAMD_STATS] +) +{ + print_report ("symamd", stats) ; +} + + + +/* ========================================================================== */ +/* === NON-USER-CALLABLE ROUTINES: ========================================== */ +/* ========================================================================== */ + +/* There are no user-callable routines beyond this point in the file */ + + +/* ========================================================================== */ +/* === init_rows_cols ======================================================= */ +/* ========================================================================== */ + +/* + Takes the column form of the matrix in A and creates the row form of the + matrix. Also, row and column attributes are stored in the Col and Row + structs. If the columns are un-sorted or contain duplicate row indices, + this routine will also sort and remove duplicate row indices from the + column form of the matrix. Returns FALSE if the matrix is invalid, + TRUE otherwise. Not user-callable. +*/ + +PRIVATE Int init_rows_cols /* returns TRUE if OK, or FALSE otherwise */ +( + /* === Parameters ======================================================= */ + + Int n_row, /* number of rows of A */ + Int n_col, /* number of columns of A */ + Colamd_Row Row [], /* of size n_row+1 */ + Colamd_Col Col [], /* of size n_col+1 */ + Int A [], /* row indices of A, of size Alen */ + Int p [], /* pointers to columns in A, of size n_col+1 */ + Int stats [COLAMD_STATS] /* colamd statistics */ +) +{ + /* === Local variables ================================================== */ + + Int col ; /* a column index */ + Int row ; /* a row index */ + Int *cp ; /* a column pointer */ + Int *cp_end ; /* a pointer to the end of a column */ + Int *rp ; /* a row pointer */ + Int *rp_end ; /* a pointer to the end of a row */ + Int last_row ; /* previous row */ + + /* === Initialize columns, and check column pointers ==================== */ + + for (col = 0 ; col < n_col ; col++) + { + Col [col].start = p [col] ; + Col [col].length = p [col+1] - p [col] ; + + if (Col [col].length < 0) + { + /* column pointers must be non-decreasing */ + stats [COLAMD_STATUS] = COLAMD_ERROR_col_length_negative ; + stats [COLAMD_INFO1] = col ; + stats [COLAMD_INFO2] = Col [col].length ; + DEBUG0 (("colamd: col %d length %d < 0\n", col, Col [col].length)) ; + return (FALSE) ; + } + + Col [col].shared1.thickness = 1 ; + Col [col].shared2.score = 0 ; + Col [col].shared3.prev = EMPTY ; + Col [col].shared4.degree_next = EMPTY ; + } + + /* p [0..n_col] no longer needed, used as "head" in subsequent routines */ + + /* === Scan columns, compute row degrees, and check row indices ========= */ + + stats [COLAMD_INFO3] = 0 ; /* number of duplicate or unsorted row indices*/ + + for (row = 0 ; row < n_row ; row++) + { + Row [row].length = 0 ; + Row [row].shared2.mark = -1 ; + } + + for (col = 0 ; col < n_col ; col++) + { + last_row = -1 ; + + cp = &A [p [col]] ; + cp_end = &A [p [col+1]] ; + + while (cp < cp_end) + { + row = *cp++ ; + + /* make sure row indices within range */ + if (row < 0 || row >= n_row) + { + stats [COLAMD_STATUS] = COLAMD_ERROR_row_index_out_of_bounds ; + stats [COLAMD_INFO1] = col ; + stats [COLAMD_INFO2] = row ; + stats [COLAMD_INFO3] = n_row ; + DEBUG0 (("colamd: row %d col %d out of bounds\n", row, col)) ; + return (FALSE) ; + } + + if (row <= last_row || Row [row].shared2.mark == col) + { + /* row index are unsorted or repeated (or both), thus col */ + /* is jumbled. This is a notice, not an error condition. */ + stats [COLAMD_STATUS] = COLAMD_OK_BUT_JUMBLED ; + stats [COLAMD_INFO1] = col ; + stats [COLAMD_INFO2] = row ; + (stats [COLAMD_INFO3]) ++ ; + DEBUG1 (("colamd: row %d col %d unsorted/duplicate\n",row,col)); + } + + if (Row [row].shared2.mark != col) + { + Row [row].length++ ; + } + else + { + /* this is a repeated entry in the column, */ + /* it will be removed */ + Col [col].length-- ; + } + + /* mark the row as having been seen in this column */ + Row [row].shared2.mark = col ; + + last_row = row ; + } + } + + /* === Compute row pointers ============================================= */ + + /* row form of the matrix starts directly after the column */ + /* form of matrix in A */ + Row [0].start = p [n_col] ; + Row [0].shared1.p = Row [0].start ; + Row [0].shared2.mark = -1 ; + for (row = 1 ; row < n_row ; row++) + { + Row [row].start = Row [row-1].start + Row [row-1].length ; + Row [row].shared1.p = Row [row].start ; + Row [row].shared2.mark = -1 ; + } + + /* === Create row form ================================================== */ + + if (stats [COLAMD_STATUS] == COLAMD_OK_BUT_JUMBLED) + { + /* if cols jumbled, watch for repeated row indices */ + for (col = 0 ; col < n_col ; col++) + { + cp = &A [p [col]] ; + cp_end = &A [p [col+1]] ; + while (cp < cp_end) + { + row = *cp++ ; + if (Row [row].shared2.mark != col) + { + A [(Row [row].shared1.p)++] = col ; + Row [row].shared2.mark = col ; + } + } + } + } + else + { + /* if cols not jumbled, we don't need the mark (this is faster) */ + for (col = 0 ; col < n_col ; col++) + { + cp = &A [p [col]] ; + cp_end = &A [p [col+1]] ; + while (cp < cp_end) + { + A [(Row [*cp++].shared1.p)++] = col ; + } + } + } + + /* === Clear the row marks and set row degrees ========================== */ + + for (row = 0 ; row < n_row ; row++) + { + Row [row].shared2.mark = 0 ; + Row [row].shared1.degree = Row [row].length ; + } + + /* === See if we need to re-create columns ============================== */ + + if (stats [COLAMD_STATUS] == COLAMD_OK_BUT_JUMBLED) + { + DEBUG0 (("colamd: reconstructing column form, matrix jumbled\n")) ; + +#ifndef NDEBUG + /* make sure column lengths are correct */ + for (col = 0 ; col < n_col ; col++) + { + p [col] = Col [col].length ; + } + for (row = 0 ; row < n_row ; row++) + { + rp = &A [Row [row].start] ; + rp_end = rp + Row [row].length ; + while (rp < rp_end) + { + p [*rp++]-- ; + } + } + for (col = 0 ; col < n_col ; col++) + { + ASSERT (p [col] == 0) ; + } + /* now p is all zero (different than when debugging is turned off) */ +#endif /* NDEBUG */ + + /* === Compute col pointers ========================================= */ + + /* col form of the matrix starts at A [0]. */ + /* Note, we may have a gap between the col form and the row */ + /* form if there were duplicate entries, if so, it will be */ + /* removed upon the first garbage collection */ + Col [0].start = 0 ; + p [0] = Col [0].start ; + for (col = 1 ; col < n_col ; col++) + { + /* note that the lengths here are for pruned columns, i.e. */ + /* no duplicate row indices will exist for these columns */ + Col [col].start = Col [col-1].start + Col [col-1].length ; + p [col] = Col [col].start ; + } + + /* === Re-create col form =========================================== */ + + for (row = 0 ; row < n_row ; row++) + { + rp = &A [Row [row].start] ; + rp_end = rp + Row [row].length ; + while (rp < rp_end) + { + A [(p [*rp++])++] = row ; + } + } + } + + /* === Done. Matrix is not (or no longer) jumbled ====================== */ + + return (TRUE) ; +} + + +/* ========================================================================== */ +/* === init_scoring ========================================================= */ +/* ========================================================================== */ + +/* + Kills dense or empty columns and rows, calculates an initial score for + each column, and places all columns in the degree lists. Not user-callable. +*/ + +PRIVATE void init_scoring +( + /* === Parameters ======================================================= */ + + Int n_row, /* number of rows of A */ + Int n_col, /* number of columns of A */ + Colamd_Row Row [], /* of size n_row+1 */ + Colamd_Col Col [], /* of size n_col+1 */ + Int A [], /* column form and row form of A */ + Int head [], /* of size n_col+1 */ + double knobs [COLAMD_KNOBS],/* parameters */ + Int *p_n_row2, /* number of non-dense, non-empty rows */ + Int *p_n_col2, /* number of non-dense, non-empty columns */ + Int *p_max_deg /* maximum row degree */ +) +{ + /* === Local variables ================================================== */ + + Int c ; /* a column index */ + Int r, row ; /* a row index */ + Int *cp ; /* a column pointer */ + Int deg ; /* degree of a row or column */ + Int *cp_end ; /* a pointer to the end of a column */ + Int *new_cp ; /* new column pointer */ + Int col_length ; /* length of pruned column */ + Int score ; /* current column score */ + Int n_col2 ; /* number of non-dense, non-empty columns */ + Int n_row2 ; /* number of non-dense, non-empty rows */ + Int dense_row_count ; /* remove rows with more entries than this */ + Int dense_col_count ; /* remove cols with more entries than this */ + Int min_score ; /* smallest column score */ + Int max_deg ; /* maximum row degree */ + Int next_col ; /* Used to add to degree list.*/ + +#ifndef NDEBUG + Int debug_count ; /* debug only. */ +#endif /* NDEBUG */ + + /* === Extract knobs ==================================================== */ + + /* Note: if knobs contains a NaN, this is undefined: */ + if (knobs [COLAMD_DENSE_ROW] < 0) + { + /* only remove completely dense rows */ + dense_row_count = n_col-1 ; + } + else + { + dense_row_count = DENSE_DEGREE (knobs [COLAMD_DENSE_ROW], n_col) ; + } + if (knobs [COLAMD_DENSE_COL] < 0) + { + /* only remove completely dense columns */ + dense_col_count = n_row-1 ; + } + else + { + dense_col_count = + DENSE_DEGREE (knobs [COLAMD_DENSE_COL], MIN (n_row, n_col)) ; + } + + DEBUG1 (("colamd: densecount: %d %d\n", dense_row_count, dense_col_count)) ; + max_deg = 0 ; + n_col2 = n_col ; + n_row2 = n_row ; + + /* === Kill empty columns =============================================== */ + + /* Put the empty columns at the end in their natural order, so that LU */ + /* factorization can proceed as far as possible. */ + for (c = n_col-1 ; c >= 0 ; c--) + { + deg = Col [c].length ; + if (deg == 0) + { + /* this is a empty column, kill and order it last */ + Col [c].shared2.order = --n_col2 ; + KILL_PRINCIPAL_COL (c) ; + } + } + DEBUG1 (("colamd: null columns killed: %d\n", n_col - n_col2)) ; + + /* === Kill dense columns =============================================== */ + + /* Put the dense columns at the end, in their natural order */ + for (c = n_col-1 ; c >= 0 ; c--) + { + /* skip any dead columns */ + if (COL_IS_DEAD (c)) + { + continue ; + } + deg = Col [c].length ; + if (deg > dense_col_count) + { + /* this is a dense column, kill and order it last */ + Col [c].shared2.order = --n_col2 ; + /* decrement the row degrees */ + cp = &A [Col [c].start] ; + cp_end = cp + Col [c].length ; + while (cp < cp_end) + { + Row [*cp++].shared1.degree-- ; + } + KILL_PRINCIPAL_COL (c) ; + } + } + DEBUG1 (("colamd: Dense and null columns killed: %d\n", n_col - n_col2)) ; + + /* === Kill dense and empty rows ======================================== */ + + for (r = 0 ; r < n_row ; r++) + { + deg = Row [r].shared1.degree ; + ASSERT (deg >= 0 && deg <= n_col) ; + if (deg > dense_row_count || deg == 0) + { + /* kill a dense or empty row */ + KILL_ROW (r) ; + --n_row2 ; + } + else + { + /* keep track of max degree of remaining rows */ + max_deg = MAX (max_deg, deg) ; + } + } + DEBUG1 (("colamd: Dense and null rows killed: %d\n", n_row - n_row2)) ; + + /* === Compute initial column scores ==================================== */ + + /* At this point the row degrees are accurate. They reflect the number */ + /* of "live" (non-dense) columns in each row. No empty rows exist. */ + /* Some "live" columns may contain only dead rows, however. These are */ + /* pruned in the code below. */ + + /* now find the initial matlab score for each column */ + for (c = n_col-1 ; c >= 0 ; c--) + { + /* skip dead column */ + if (COL_IS_DEAD (c)) + { + continue ; + } + score = 0 ; + cp = &A [Col [c].start] ; + new_cp = cp ; + cp_end = cp + Col [c].length ; + while (cp < cp_end) + { + /* get a row */ + row = *cp++ ; + /* skip if dead */ + if (ROW_IS_DEAD (row)) + { + continue ; + } + /* compact the column */ + *new_cp++ = row ; + /* add row's external degree */ + score += Row [row].shared1.degree - 1 ; + /* guard against integer overflow */ + score = MIN (score, n_col) ; + } + /* determine pruned column length */ + col_length = (Int) (new_cp - &A [Col [c].start]) ; + if (col_length == 0) + { + /* a newly-made null column (all rows in this col are "dense" */ + /* and have already been killed) */ + DEBUG2 (("Newly null killed: %d\n", c)) ; + Col [c].shared2.order = --n_col2 ; + KILL_PRINCIPAL_COL (c) ; + } + else + { + /* set column length and set score */ + ASSERT (score >= 0) ; + ASSERT (score <= n_col) ; + Col [c].length = col_length ; + Col [c].shared2.score = score ; + } + } + DEBUG1 (("colamd: Dense, null, and newly-null columns killed: %d\n", + n_col-n_col2)) ; + + /* At this point, all empty rows and columns are dead. All live columns */ + /* are "clean" (containing no dead rows) and simplicial (no supercolumns */ + /* yet). Rows may contain dead columns, but all live rows contain at */ + /* least one live column. */ + +#ifndef NDEBUG + debug_structures (n_row, n_col, Row, Col, A, n_col2) ; +#endif /* NDEBUG */ + + /* === Initialize degree lists ========================================== */ + +#ifndef NDEBUG + debug_count = 0 ; +#endif /* NDEBUG */ + + /* clear the hash buckets */ + for (c = 0 ; c <= n_col ; c++) + { + head [c] = EMPTY ; + } + min_score = n_col ; + /* place in reverse order, so low column indices are at the front */ + /* of the lists. This is to encourage natural tie-breaking */ + for (c = n_col-1 ; c >= 0 ; c--) + { + /* only add principal columns to degree lists */ + if (COL_IS_ALIVE (c)) + { + DEBUG4 (("place %d score %d minscore %d ncol %d\n", + c, Col [c].shared2.score, min_score, n_col)) ; + + /* === Add columns score to DList =============================== */ + + score = Col [c].shared2.score ; + + ASSERT (min_score >= 0) ; + ASSERT (min_score <= n_col) ; + ASSERT (score >= 0) ; + ASSERT (score <= n_col) ; + ASSERT (head [score] >= EMPTY) ; + + /* now add this column to dList at proper score location */ + next_col = head [score] ; + Col [c].shared3.prev = EMPTY ; + Col [c].shared4.degree_next = next_col ; + + /* if there already was a column with the same score, set its */ + /* previous pointer to this new column */ + if (next_col != EMPTY) + { + Col [next_col].shared3.prev = c ; + } + head [score] = c ; + + /* see if this score is less than current min */ + min_score = MIN (min_score, score) ; + +#ifndef NDEBUG + debug_count++ ; +#endif /* NDEBUG */ + + } + } + +#ifndef NDEBUG + DEBUG1 (("colamd: Live cols %d out of %d, non-princ: %d\n", + debug_count, n_col, n_col-debug_count)) ; + ASSERT (debug_count == n_col2) ; + debug_deg_lists (n_row, n_col, Row, Col, head, min_score, n_col2, max_deg) ; +#endif /* NDEBUG */ + + /* === Return number of remaining columns, and max row degree =========== */ + + *p_n_col2 = n_col2 ; + *p_n_row2 = n_row2 ; + *p_max_deg = max_deg ; +} + + +/* ========================================================================== */ +/* === find_ordering ======================================================== */ +/* ========================================================================== */ + +/* + Order the principal columns of the supercolumn form of the matrix + (no supercolumns on input). Uses a minimum approximate column minimum + degree ordering method. Not user-callable. +*/ + +PRIVATE Int find_ordering /* return the number of garbage collections */ +( + /* === Parameters ======================================================= */ + + Int n_row, /* number of rows of A */ + Int n_col, /* number of columns of A */ + Int Alen, /* size of A, 2*nnz + n_col or larger */ + Colamd_Row Row [], /* of size n_row+1 */ + Colamd_Col Col [], /* of size n_col+1 */ + Int A [], /* column form and row form of A */ + Int head [], /* of size n_col+1 */ + Int n_col2, /* Remaining columns to order */ + Int max_deg, /* Maximum row degree */ + Int pfree, /* index of first free slot (2*nnz on entry) */ + Int aggressive +) +{ + /* === Local variables ================================================== */ + + Int k ; /* current pivot ordering step */ + Int pivot_col ; /* current pivot column */ + Int *cp ; /* a column pointer */ + Int *rp ; /* a row pointer */ + Int pivot_row ; /* current pivot row */ + Int *new_cp ; /* modified column pointer */ + Int *new_rp ; /* modified row pointer */ + Int pivot_row_start ; /* pointer to start of pivot row */ + Int pivot_row_degree ; /* number of columns in pivot row */ + Int pivot_row_length ; /* number of supercolumns in pivot row */ + Int pivot_col_score ; /* score of pivot column */ + Int needed_memory ; /* free space needed for pivot row */ + Int *cp_end ; /* pointer to the end of a column */ + Int *rp_end ; /* pointer to the end of a row */ + Int row ; /* a row index */ + Int col ; /* a column index */ + Int max_score ; /* maximum possible score */ + Int cur_score ; /* score of current column */ + unsigned Int hash ; /* hash value for supernode detection */ + Int head_column ; /* head of hash bucket */ + Int first_col ; /* first column in hash bucket */ + Int tag_mark ; /* marker value for mark array */ + Int row_mark ; /* Row [row].shared2.mark */ + Int set_difference ; /* set difference size of row with pivot row */ + Int min_score ; /* smallest column score */ + Int col_thickness ; /* "thickness" (no. of columns in a supercol) */ + Int max_mark ; /* maximum value of tag_mark */ + Int pivot_col_thickness ; /* number of columns represented by pivot col */ + Int prev_col ; /* Used by Dlist operations. */ + Int next_col ; /* Used by Dlist operations. */ + Int ngarbage ; /* number of garbage collections performed */ + +#ifndef NDEBUG + Int debug_d ; /* debug loop counter */ + Int debug_step = 0 ; /* debug loop counter */ +#endif /* NDEBUG */ + + /* === Initialization and clear mark ==================================== */ + + max_mark = INT_MAX - n_col ; /* INT_MAX defined in */ + tag_mark = clear_mark (0, max_mark, n_row, Row) ; + min_score = 0 ; + ngarbage = 0 ; + DEBUG1 (("colamd: Ordering, n_col2=%d\n", n_col2)) ; + + /* === Order the columns ================================================ */ + + for (k = 0 ; k < n_col2 ; /* 'k' is incremented below */) + { + +#ifndef NDEBUG + if (debug_step % 100 == 0) + { + DEBUG2 (("\n... Step k: %d out of n_col2: %d\n", k, n_col2)) ; + } + else + { + DEBUG3 (("\n----------Step k: %d out of n_col2: %d\n", k, n_col2)) ; + } + debug_step++ ; + debug_deg_lists (n_row, n_col, Row, Col, head, + min_score, n_col2-k, max_deg) ; + debug_matrix (n_row, n_col, Row, Col, A) ; +#endif /* NDEBUG */ + + /* === Select pivot column, and order it ============================ */ + + /* make sure degree list isn't empty */ + ASSERT (min_score >= 0) ; + ASSERT (min_score <= n_col) ; + ASSERT (head [min_score] >= EMPTY) ; + +#ifndef NDEBUG + for (debug_d = 0 ; debug_d < min_score ; debug_d++) + { + ASSERT (head [debug_d] == EMPTY) ; + } +#endif /* NDEBUG */ + + /* get pivot column from head of minimum degree list */ + while (head [min_score] == EMPTY && min_score < n_col) + { + min_score++ ; + } + pivot_col = head [min_score] ; + ASSERT (pivot_col >= 0 && pivot_col <= n_col) ; + next_col = Col [pivot_col].shared4.degree_next ; + head [min_score] = next_col ; + if (next_col != EMPTY) + { + Col [next_col].shared3.prev = EMPTY ; + } + + ASSERT (COL_IS_ALIVE (pivot_col)) ; + + /* remember score for defrag check */ + pivot_col_score = Col [pivot_col].shared2.score ; + + /* the pivot column is the kth column in the pivot order */ + Col [pivot_col].shared2.order = k ; + + /* increment order count by column thickness */ + pivot_col_thickness = Col [pivot_col].shared1.thickness ; + k += pivot_col_thickness ; + ASSERT (pivot_col_thickness > 0) ; + DEBUG3 (("Pivot col: %d thick %d\n", pivot_col, pivot_col_thickness)) ; + + /* === Garbage_collection, if necessary ============================= */ + + needed_memory = MIN (pivot_col_score, n_col - k) ; + if (pfree + needed_memory >= Alen) + { + pfree = garbage_collection (n_row, n_col, Row, Col, A, &A [pfree]) ; + ngarbage++ ; + /* after garbage collection we will have enough */ + ASSERT (pfree + needed_memory < Alen) ; + /* garbage collection has wiped out the Row[].shared2.mark array */ + tag_mark = clear_mark (0, max_mark, n_row, Row) ; + +#ifndef NDEBUG + debug_matrix (n_row, n_col, Row, Col, A) ; +#endif /* NDEBUG */ + } + + /* === Compute pivot row pattern ==================================== */ + + /* get starting location for this new merged row */ + pivot_row_start = pfree ; + + /* initialize new row counts to zero */ + pivot_row_degree = 0 ; + + /* tag pivot column as having been visited so it isn't included */ + /* in merged pivot row */ + Col [pivot_col].shared1.thickness = -pivot_col_thickness ; + + /* pivot row is the union of all rows in the pivot column pattern */ + cp = &A [Col [pivot_col].start] ; + cp_end = cp + Col [pivot_col].length ; + while (cp < cp_end) + { + /* get a row */ + row = *cp++ ; + DEBUG4 (("Pivot col pattern %d %d\n", ROW_IS_ALIVE (row), row)) ; + /* skip if row is dead */ + if (ROW_IS_ALIVE (row)) + { + rp = &A [Row [row].start] ; + rp_end = rp + Row [row].length ; + while (rp < rp_end) + { + /* get a column */ + col = *rp++ ; + /* add the column, if alive and untagged */ + col_thickness = Col [col].shared1.thickness ; + if (col_thickness > 0 && COL_IS_ALIVE (col)) + { + /* tag column in pivot row */ + Col [col].shared1.thickness = -col_thickness ; + ASSERT (pfree < Alen) ; + /* place column in pivot row */ + A [pfree++] = col ; + pivot_row_degree += col_thickness ; + } + } + } + } + + /* clear tag on pivot column */ + Col [pivot_col].shared1.thickness = pivot_col_thickness ; + max_deg = MAX (max_deg, pivot_row_degree) ; + +#ifndef NDEBUG + DEBUG3 (("check2\n")) ; + debug_mark (n_row, Row, tag_mark, max_mark) ; +#endif /* NDEBUG */ + + /* === Kill all rows used to construct pivot row ==================== */ + + /* also kill pivot row, temporarily */ + cp = &A [Col [pivot_col].start] ; + cp_end = cp + Col [pivot_col].length ; + while (cp < cp_end) + { + /* may be killing an already dead row */ + row = *cp++ ; + DEBUG3 (("Kill row in pivot col: %d\n", row)) ; + KILL_ROW (row) ; + } + + /* === Select a row index to use as the new pivot row =============== */ + + pivot_row_length = pfree - pivot_row_start ; + if (pivot_row_length > 0) + { + /* pick the "pivot" row arbitrarily (first row in col) */ + pivot_row = A [Col [pivot_col].start] ; + DEBUG3 (("Pivotal row is %d\n", pivot_row)) ; + } + else + { + /* there is no pivot row, since it is of zero length */ + pivot_row = EMPTY ; + ASSERT (pivot_row_length == 0) ; + } + ASSERT (Col [pivot_col].length > 0 || pivot_row_length == 0) ; + + /* === Approximate degree computation =============================== */ + + /* Here begins the computation of the approximate degree. The column */ + /* score is the sum of the pivot row "length", plus the size of the */ + /* set differences of each row in the column minus the pattern of the */ + /* pivot row itself. The column ("thickness") itself is also */ + /* excluded from the column score (we thus use an approximate */ + /* external degree). */ + + /* The time taken by the following code (compute set differences, and */ + /* add them up) is proportional to the size of the data structure */ + /* being scanned - that is, the sum of the sizes of each column in */ + /* the pivot row. Thus, the amortized time to compute a column score */ + /* is proportional to the size of that column (where size, in this */ + /* context, is the column "length", or the number of row indices */ + /* in that column). The number of row indices in a column is */ + /* monotonically non-decreasing, from the length of the original */ + /* column on input to colamd. */ + + /* === Compute set differences ====================================== */ + + DEBUG3 (("** Computing set differences phase. **\n")) ; + + /* pivot row is currently dead - it will be revived later. */ + + DEBUG3 (("Pivot row: ")) ; + /* for each column in pivot row */ + rp = &A [pivot_row_start] ; + rp_end = rp + pivot_row_length ; + while (rp < rp_end) + { + col = *rp++ ; + ASSERT (COL_IS_ALIVE (col) && col != pivot_col) ; + DEBUG3 (("Col: %d\n", col)) ; + + /* clear tags used to construct pivot row pattern */ + col_thickness = -Col [col].shared1.thickness ; + ASSERT (col_thickness > 0) ; + Col [col].shared1.thickness = col_thickness ; + + /* === Remove column from degree list =========================== */ + + cur_score = Col [col].shared2.score ; + prev_col = Col [col].shared3.prev ; + next_col = Col [col].shared4.degree_next ; + ASSERT (cur_score >= 0) ; + ASSERT (cur_score <= n_col) ; + ASSERT (cur_score >= EMPTY) ; + if (prev_col == EMPTY) + { + head [cur_score] = next_col ; + } + else + { + Col [prev_col].shared4.degree_next = next_col ; + } + if (next_col != EMPTY) + { + Col [next_col].shared3.prev = prev_col ; + } + + /* === Scan the column ========================================== */ + + cp = &A [Col [col].start] ; + cp_end = cp + Col [col].length ; + while (cp < cp_end) + { + /* get a row */ + row = *cp++ ; + row_mark = Row [row].shared2.mark ; + /* skip if dead */ + if (ROW_IS_MARKED_DEAD (row_mark)) + { + continue ; + } + ASSERT (row != pivot_row) ; + set_difference = row_mark - tag_mark ; + /* check if the row has been seen yet */ + if (set_difference < 0) + { + ASSERT (Row [row].shared1.degree <= max_deg) ; + set_difference = Row [row].shared1.degree ; + } + /* subtract column thickness from this row's set difference */ + set_difference -= col_thickness ; + ASSERT (set_difference >= 0) ; + /* absorb this row if the set difference becomes zero */ + if (set_difference == 0 && aggressive) + { + DEBUG3 (("aggressive absorption. Row: %d\n", row)) ; + KILL_ROW (row) ; + } + else + { + /* save the new mark */ + Row [row].shared2.mark = set_difference + tag_mark ; + } + } + } + +#ifndef NDEBUG + debug_deg_lists (n_row, n_col, Row, Col, head, + min_score, n_col2-k-pivot_row_degree, max_deg) ; +#endif /* NDEBUG */ + + /* === Add up set differences for each column ======================= */ + + DEBUG3 (("** Adding set differences phase. **\n")) ; + + /* for each column in pivot row */ + rp = &A [pivot_row_start] ; + rp_end = rp + pivot_row_length ; + while (rp < rp_end) + { + /* get a column */ + col = *rp++ ; + ASSERT (COL_IS_ALIVE (col) && col != pivot_col) ; + hash = 0 ; + cur_score = 0 ; + cp = &A [Col [col].start] ; + /* compact the column */ + new_cp = cp ; + cp_end = cp + Col [col].length ; + + DEBUG4 (("Adding set diffs for Col: %d.\n", col)) ; + + while (cp < cp_end) + { + /* get a row */ + row = *cp++ ; + ASSERT(row >= 0 && row < n_row) ; + row_mark = Row [row].shared2.mark ; + /* skip if dead */ + if (ROW_IS_MARKED_DEAD (row_mark)) + { + DEBUG4 ((" Row %d, dead\n", row)) ; + continue ; + } + DEBUG4 ((" Row %d, set diff %d\n", row, row_mark-tag_mark)); + ASSERT (row_mark >= tag_mark) ; + /* compact the column */ + *new_cp++ = row ; + /* compute hash function */ + hash += row ; + /* add set difference */ + cur_score += row_mark - tag_mark ; + /* integer overflow... */ + cur_score = MIN (cur_score, n_col) ; + } + + /* recompute the column's length */ + Col [col].length = (Int) (new_cp - &A [Col [col].start]) ; + + /* === Further mass elimination ================================= */ + + if (Col [col].length == 0) + { + DEBUG4 (("further mass elimination. Col: %d\n", col)) ; + /* nothing left but the pivot row in this column */ + KILL_PRINCIPAL_COL (col) ; + pivot_row_degree -= Col [col].shared1.thickness ; + ASSERT (pivot_row_degree >= 0) ; + /* order it */ + Col [col].shared2.order = k ; + /* increment order count by column thickness */ + k += Col [col].shared1.thickness ; + } + else + { + /* === Prepare for supercolumn detection ==================== */ + + DEBUG4 (("Preparing supercol detection for Col: %d.\n", col)) ; + + /* save score so far */ + Col [col].shared2.score = cur_score ; + + /* add column to hash table, for supercolumn detection */ + hash %= n_col + 1 ; + + DEBUG4 ((" Hash = %d, n_col = %d.\n", hash, n_col)) ; + ASSERT (((Int) hash) <= n_col) ; + + head_column = head [hash] ; + if (head_column > EMPTY) + { + /* degree list "hash" is non-empty, use prev (shared3) of */ + /* first column in degree list as head of hash bucket */ + first_col = Col [head_column].shared3.headhash ; + Col [head_column].shared3.headhash = col ; + } + else + { + /* degree list "hash" is empty, use head as hash bucket */ + first_col = - (head_column + 2) ; + head [hash] = - (col + 2) ; + } + Col [col].shared4.hash_next = first_col ; + + /* save hash function in Col [col].shared3.hash */ + Col [col].shared3.hash = (Int) hash ; + ASSERT (COL_IS_ALIVE (col)) ; + } + } + + /* The approximate external column degree is now computed. */ + + /* === Supercolumn detection ======================================== */ + + DEBUG3 (("** Supercolumn detection phase. **\n")) ; + + detect_super_cols ( + +#ifndef NDEBUG + n_col, Row, +#endif /* NDEBUG */ + + Col, A, head, pivot_row_start, pivot_row_length) ; + + /* === Kill the pivotal column ====================================== */ + + KILL_PRINCIPAL_COL (pivot_col) ; + + /* === Clear mark =================================================== */ + + tag_mark = clear_mark (tag_mark+max_deg+1, max_mark, n_row, Row) ; + +#ifndef NDEBUG + DEBUG3 (("check3\n")) ; + debug_mark (n_row, Row, tag_mark, max_mark) ; +#endif /* NDEBUG */ + + /* === Finalize the new pivot row, and column scores ================ */ + + DEBUG3 (("** Finalize scores phase. **\n")) ; + + /* for each column in pivot row */ + rp = &A [pivot_row_start] ; + /* compact the pivot row */ + new_rp = rp ; + rp_end = rp + pivot_row_length ; + while (rp < rp_end) + { + col = *rp++ ; + /* skip dead columns */ + if (COL_IS_DEAD (col)) + { + continue ; + } + *new_rp++ = col ; + /* add new pivot row to column */ + A [Col [col].start + (Col [col].length++)] = pivot_row ; + + /* retrieve score so far and add on pivot row's degree. */ + /* (we wait until here for this in case the pivot */ + /* row's degree was reduced due to mass elimination). */ + cur_score = Col [col].shared2.score + pivot_row_degree ; + + /* calculate the max possible score as the number of */ + /* external columns minus the 'k' value minus the */ + /* columns thickness */ + max_score = n_col - k - Col [col].shared1.thickness ; + + /* make the score the external degree of the union-of-rows */ + cur_score -= Col [col].shared1.thickness ; + + /* make sure score is less or equal than the max score */ + cur_score = MIN (cur_score, max_score) ; + ASSERT (cur_score >= 0) ; + + /* store updated score */ + Col [col].shared2.score = cur_score ; + + /* === Place column back in degree list ========================= */ + + ASSERT (min_score >= 0) ; + ASSERT (min_score <= n_col) ; + ASSERT (cur_score >= 0) ; + ASSERT (cur_score <= n_col) ; + ASSERT (head [cur_score] >= EMPTY) ; + next_col = head [cur_score] ; + Col [col].shared4.degree_next = next_col ; + Col [col].shared3.prev = EMPTY ; + if (next_col != EMPTY) + { + Col [next_col].shared3.prev = col ; + } + head [cur_score] = col ; + + /* see if this score is less than current min */ + min_score = MIN (min_score, cur_score) ; + + } + +#ifndef NDEBUG + debug_deg_lists (n_row, n_col, Row, Col, head, + min_score, n_col2-k, max_deg) ; +#endif /* NDEBUG */ + + /* === Resurrect the new pivot row ================================== */ + + if (pivot_row_degree > 0) + { + /* update pivot row length to reflect any cols that were killed */ + /* during super-col detection and mass elimination */ + Row [pivot_row].start = pivot_row_start ; + Row [pivot_row].length = (Int) (new_rp - &A[pivot_row_start]) ; + ASSERT (Row [pivot_row].length > 0) ; + Row [pivot_row].shared1.degree = pivot_row_degree ; + Row [pivot_row].shared2.mark = 0 ; + /* pivot row is no longer dead */ + + DEBUG1 (("Resurrect Pivot_row %d deg: %d\n", + pivot_row, pivot_row_degree)) ; + } + } + + /* === All principal columns have now been ordered ====================== */ + + return (ngarbage) ; +} + + +/* ========================================================================== */ +/* === order_children ======================================================= */ +/* ========================================================================== */ + +/* + The find_ordering routine has ordered all of the principal columns (the + representatives of the supercolumns). The non-principal columns have not + yet been ordered. This routine orders those columns by walking up the + parent tree (a column is a child of the column which absorbed it). The + final permutation vector is then placed in p [0 ... n_col-1], with p [0] + being the first column, and p [n_col-1] being the last. It doesn't look + like it at first glance, but be assured that this routine takes time linear + in the number of columns. Although not immediately obvious, the time + taken by this routine is O (n_col), that is, linear in the number of + columns. Not user-callable. +*/ + +PRIVATE void order_children +( + /* === Parameters ======================================================= */ + + Int n_col, /* number of columns of A */ + Colamd_Col Col [], /* of size n_col+1 */ + Int p [] /* p [0 ... n_col-1] is the column permutation*/ +) +{ + /* === Local variables ================================================== */ + + Int i ; /* loop counter for all columns */ + Int c ; /* column index */ + Int parent ; /* index of column's parent */ + Int order ; /* column's order */ + + /* === Order each non-principal column ================================== */ + + for (i = 0 ; i < n_col ; i++) + { + /* find an un-ordered non-principal column */ + ASSERT (COL_IS_DEAD (i)) ; + if (!COL_IS_DEAD_PRINCIPAL (i) && Col [i].shared2.order == EMPTY) + { + parent = i ; + /* once found, find its principal parent */ + do + { + parent = Col [parent].shared1.parent ; + } while (!COL_IS_DEAD_PRINCIPAL (parent)) ; + + /* now, order all un-ordered non-principal columns along path */ + /* to this parent. collapse tree at the same time */ + c = i ; + /* get order of parent */ + order = Col [parent].shared2.order ; + + do + { + ASSERT (Col [c].shared2.order == EMPTY) ; + + /* order this column */ + Col [c].shared2.order = order++ ; + /* collaps tree */ + Col [c].shared1.parent = parent ; + + /* get immediate parent of this column */ + c = Col [c].shared1.parent ; + + /* continue until we hit an ordered column. There are */ + /* guarranteed not to be anymore unordered columns */ + /* above an ordered column */ + } while (Col [c].shared2.order == EMPTY) ; + + /* re-order the super_col parent to largest order for this group */ + Col [parent].shared2.order = order ; + } + } + + /* === Generate the permutation ========================================= */ + + for (c = 0 ; c < n_col ; c++) + { + p [Col [c].shared2.order] = c ; + } +} + + +/* ========================================================================== */ +/* === detect_super_cols ==================================================== */ +/* ========================================================================== */ + +/* + Detects supercolumns by finding matches between columns in the hash buckets. + Check amongst columns in the set A [row_start ... row_start + row_length-1]. + The columns under consideration are currently *not* in the degree lists, + and have already been placed in the hash buckets. + + The hash bucket for columns whose hash function is equal to h is stored + as follows: + + if head [h] is >= 0, then head [h] contains a degree list, so: + + head [h] is the first column in degree bucket h. + Col [head [h]].headhash gives the first column in hash bucket h. + + otherwise, the degree list is empty, and: + + -(head [h] + 2) is the first column in hash bucket h. + + For a column c in a hash bucket, Col [c].shared3.prev is NOT a "previous + column" pointer. Col [c].shared3.hash is used instead as the hash number + for that column. The value of Col [c].shared4.hash_next is the next column + in the same hash bucket. + + Assuming no, or "few" hash collisions, the time taken by this routine is + linear in the sum of the sizes (lengths) of each column whose score has + just been computed in the approximate degree computation. + Not user-callable. +*/ + +PRIVATE void detect_super_cols +( + /* === Parameters ======================================================= */ + +#ifndef NDEBUG + /* these two parameters are only needed when debugging is enabled: */ + Int n_col, /* number of columns of A */ + Colamd_Row Row [], /* of size n_row+1 */ +#endif /* NDEBUG */ + + Colamd_Col Col [], /* of size n_col+1 */ + Int A [], /* row indices of A */ + Int head [], /* head of degree lists and hash buckets */ + Int row_start, /* pointer to set of columns to check */ + Int row_length /* number of columns to check */ +) +{ + /* === Local variables ================================================== */ + + Int hash ; /* hash value for a column */ + Int *rp ; /* pointer to a row */ + Int c ; /* a column index */ + Int super_c ; /* column index of the column to absorb into */ + Int *cp1 ; /* column pointer for column super_c */ + Int *cp2 ; /* column pointer for column c */ + Int length ; /* length of column super_c */ + Int prev_c ; /* column preceding c in hash bucket */ + Int i ; /* loop counter */ + Int *rp_end ; /* pointer to the end of the row */ + Int col ; /* a column index in the row to check */ + Int head_column ; /* first column in hash bucket or degree list */ + Int first_col ; /* first column in hash bucket */ + + /* === Consider each column in the row ================================== */ + + rp = &A [row_start] ; + rp_end = rp + row_length ; + while (rp < rp_end) + { + col = *rp++ ; + if (COL_IS_DEAD (col)) + { + continue ; + } + + /* get hash number for this column */ + hash = Col [col].shared3.hash ; + ASSERT (hash <= n_col) ; + + /* === Get the first column in this hash bucket ===================== */ + + head_column = head [hash] ; + if (head_column > EMPTY) + { + first_col = Col [head_column].shared3.headhash ; + } + else + { + first_col = - (head_column + 2) ; + } + + /* === Consider each column in the hash bucket ====================== */ + + for (super_c = first_col ; super_c != EMPTY ; + super_c = Col [super_c].shared4.hash_next) + { + ASSERT (COL_IS_ALIVE (super_c)) ; + ASSERT (Col [super_c].shared3.hash == hash) ; + length = Col [super_c].length ; + + /* prev_c is the column preceding column c in the hash bucket */ + prev_c = super_c ; + + /* === Compare super_c with all columns after it ================ */ + + for (c = Col [super_c].shared4.hash_next ; + c != EMPTY ; c = Col [c].shared4.hash_next) + { + ASSERT (c != super_c) ; + ASSERT (COL_IS_ALIVE (c)) ; + ASSERT (Col [c].shared3.hash == hash) ; + + /* not identical if lengths or scores are different */ + if (Col [c].length != length || + Col [c].shared2.score != Col [super_c].shared2.score) + { + prev_c = c ; + continue ; + } + + /* compare the two columns */ + cp1 = &A [Col [super_c].start] ; + cp2 = &A [Col [c].start] ; + + for (i = 0 ; i < length ; i++) + { + /* the columns are "clean" (no dead rows) */ + ASSERT (ROW_IS_ALIVE (*cp1)) ; + ASSERT (ROW_IS_ALIVE (*cp2)) ; + /* row indices will same order for both supercols, */ + /* no gather scatter nessasary */ + if (*cp1++ != *cp2++) + { + break ; + } + } + + /* the two columns are different if the for-loop "broke" */ + if (i != length) + { + prev_c = c ; + continue ; + } + + /* === Got it! two columns are identical =================== */ + + ASSERT (Col [c].shared2.score == Col [super_c].shared2.score) ; + + Col [super_c].shared1.thickness += Col [c].shared1.thickness ; + Col [c].shared1.parent = super_c ; + KILL_NON_PRINCIPAL_COL (c) ; + /* order c later, in order_children() */ + Col [c].shared2.order = EMPTY ; + /* remove c from hash bucket */ + Col [prev_c].shared4.hash_next = Col [c].shared4.hash_next ; + } + } + + /* === Empty this hash bucket ======================================= */ + + if (head_column > EMPTY) + { + /* corresponding degree list "hash" is not empty */ + Col [head_column].shared3.headhash = EMPTY ; + } + else + { + /* corresponding degree list "hash" is empty */ + head [hash] = EMPTY ; + } + } +} + + +/* ========================================================================== */ +/* === garbage_collection =================================================== */ +/* ========================================================================== */ + +/* + Defragments and compacts columns and rows in the workspace A. Used when + all avaliable memory has been used while performing row merging. Returns + the index of the first free position in A, after garbage collection. The + time taken by this routine is linear is the size of the array A, which is + itself linear in the number of nonzeros in the input matrix. + Not user-callable. +*/ + +PRIVATE Int garbage_collection /* returns the new value of pfree */ +( + /* === Parameters ======================================================= */ + + Int n_row, /* number of rows */ + Int n_col, /* number of columns */ + Colamd_Row Row [], /* row info */ + Colamd_Col Col [], /* column info */ + Int A [], /* A [0 ... Alen-1] holds the matrix */ + Int *pfree /* &A [0] ... pfree is in use */ +) +{ + /* === Local variables ================================================== */ + + Int *psrc ; /* source pointer */ + Int *pdest ; /* destination pointer */ + Int j ; /* counter */ + Int r ; /* a row index */ + Int c ; /* a column index */ + Int length ; /* length of a row or column */ + +#ifndef NDEBUG + Int debug_rows ; + DEBUG2 (("Defrag..\n")) ; + for (psrc = &A[0] ; psrc < pfree ; psrc++) ASSERT (*psrc >= 0) ; + debug_rows = 0 ; +#endif /* NDEBUG */ + + /* === Defragment the columns =========================================== */ + + pdest = &A[0] ; + for (c = 0 ; c < n_col ; c++) + { + if (COL_IS_ALIVE (c)) + { + psrc = &A [Col [c].start] ; + + /* move and compact the column */ + ASSERT (pdest <= psrc) ; + Col [c].start = (Int) (pdest - &A [0]) ; + length = Col [c].length ; + for (j = 0 ; j < length ; j++) + { + r = *psrc++ ; + if (ROW_IS_ALIVE (r)) + { + *pdest++ = r ; + } + } + Col [c].length = (Int) (pdest - &A [Col [c].start]) ; + } + } + + /* === Prepare to defragment the rows =================================== */ + + for (r = 0 ; r < n_row ; r++) + { + if (ROW_IS_DEAD (r) || (Row [r].length == 0)) + { + /* This row is already dead, or is of zero length. Cannot compact + * a row of zero length, so kill it. NOTE: in the current version, + * there are no zero-length live rows. Kill the row (for the first + * time, or again) just to be safe. */ + KILL_ROW (r) ; + } + else + { + /* save first column index in Row [r].shared2.first_column */ + psrc = &A [Row [r].start] ; + Row [r].shared2.first_column = *psrc ; + ASSERT (ROW_IS_ALIVE (r)) ; + /* flag the start of the row with the one's complement of row */ + *psrc = ONES_COMPLEMENT (r) ; +#ifndef NDEBUG + debug_rows++ ; +#endif /* NDEBUG */ + } + } + + /* === Defragment the rows ============================================== */ + + psrc = pdest ; + while (psrc < pfree) + { + /* find a negative number ... the start of a row */ + if (*psrc++ < 0) + { + psrc-- ; + /* get the row index */ + r = ONES_COMPLEMENT (*psrc) ; + ASSERT (r >= 0 && r < n_row) ; + /* restore first column index */ + *psrc = Row [r].shared2.first_column ; + ASSERT (ROW_IS_ALIVE (r)) ; + ASSERT (Row [r].length > 0) ; + /* move and compact the row */ + ASSERT (pdest <= psrc) ; + Row [r].start = (Int) (pdest - &A [0]) ; + length = Row [r].length ; + for (j = 0 ; j < length ; j++) + { + c = *psrc++ ; + if (COL_IS_ALIVE (c)) + { + *pdest++ = c ; + } + } + Row [r].length = (Int) (pdest - &A [Row [r].start]) ; + ASSERT (Row [r].length > 0) ; +#ifndef NDEBUG + debug_rows-- ; +#endif /* NDEBUG */ + } + } + /* ensure we found all the rows */ + ASSERT (debug_rows == 0) ; + + /* === Return the new value of pfree ==================================== */ + + return ((Int) (pdest - &A [0])) ; +} + + +/* ========================================================================== */ +/* === clear_mark =========================================================== */ +/* ========================================================================== */ + +/* + Clears the Row [].shared2.mark array, and returns the new tag_mark. + Return value is the new tag_mark. Not user-callable. +*/ + +PRIVATE Int clear_mark /* return the new value for tag_mark */ +( + /* === Parameters ======================================================= */ + + Int tag_mark, /* new value of tag_mark */ + Int max_mark, /* max allowed value of tag_mark */ + + Int n_row, /* number of rows in A */ + Colamd_Row Row [] /* Row [0 ... n_row-1].shared2.mark is set to zero */ +) +{ + /* === Local variables ================================================== */ + + Int r ; + + if (tag_mark <= 0 || tag_mark >= max_mark) + { + for (r = 0 ; r < n_row ; r++) + { + if (ROW_IS_ALIVE (r)) + { + Row [r].shared2.mark = 0 ; + } + } + tag_mark = 1 ; + } + + return (tag_mark) ; +} + + +/* ========================================================================== */ +/* === print_report ========================================================= */ +/* ========================================================================== */ + +PRIVATE void print_report +( + char *method, + Int stats [COLAMD_STATS] +) +{ + + Int i1, i2, i3 ; + + PRINTF (("\n%s version %d.%d, %s: ", method, + COLAMD_MAIN_VERSION, COLAMD_SUB_VERSION, COLAMD_DATE)) ; + + if (!stats) + { + PRINTF (("No statistics available.\n")) ; + return ; + } + + i1 = stats [COLAMD_INFO1] ; + i2 = stats [COLAMD_INFO2] ; + i3 = stats [COLAMD_INFO3] ; + + if (stats [COLAMD_STATUS] >= 0) + { + PRINTF (("OK. ")) ; + } + else + { + PRINTF (("ERROR. ")) ; + } + + switch (stats [COLAMD_STATUS]) + { + + case COLAMD_OK_BUT_JUMBLED: + + PRINTF(("Matrix has unsorted or duplicate row indices.\n")) ; + + PRINTF(("%s: number of duplicate or out-of-order row indices: %d\n", + method, i3)) ; + + PRINTF(("%s: last seen duplicate or out-of-order row index: %d\n", + method, INDEX (i2))) ; + + PRINTF(("%s: last seen in column: %d", + method, INDEX (i1))) ; + + /* no break - fall through to next case instead */ + + case COLAMD_OK: + + PRINTF(("\n")) ; + + PRINTF(("%s: number of dense or empty rows ignored: %d\n", + method, stats [COLAMD_DENSE_ROW])) ; + + PRINTF(("%s: number of dense or empty columns ignored: %d\n", + method, stats [COLAMD_DENSE_COL])) ; + + PRINTF(("%s: number of garbage collections performed: %d\n", + method, stats [COLAMD_DEFRAG_COUNT])) ; + break ; + + case COLAMD_ERROR_A_not_present: + + PRINTF(("Array A (row indices of matrix) not present.\n")) ; + break ; + + case COLAMD_ERROR_p_not_present: + + PRINTF(("Array p (column pointers for matrix) not present.\n")) ; + break ; + + case COLAMD_ERROR_nrow_negative: + + PRINTF(("Invalid number of rows (%d).\n", i1)) ; + break ; + + case COLAMD_ERROR_ncol_negative: + + PRINTF(("Invalid number of columns (%d).\n", i1)) ; + break ; + + case COLAMD_ERROR_nnz_negative: + + PRINTF(("Invalid number of nonzero entries (%d).\n", i1)) ; + break ; + + case COLAMD_ERROR_p0_nonzero: + + PRINTF(("Invalid column pointer, p [0] = %d, must be zero.\n", i1)); + break ; + + case COLAMD_ERROR_A_too_small: + + PRINTF(("Array A too small.\n")) ; + PRINTF((" Need Alen >= %d, but given only Alen = %d.\n", + i1, i2)) ; + break ; + + case COLAMD_ERROR_col_length_negative: + + PRINTF + (("Column %d has a negative number of nonzero entries (%d).\n", + INDEX (i1), i2)) ; + break ; + + case COLAMD_ERROR_row_index_out_of_bounds: + + PRINTF + (("Row index (row %d) out of bounds (%d to %d) in column %d.\n", + INDEX (i2), INDEX (0), INDEX (i3-1), INDEX (i1))) ; + break ; + + case COLAMD_ERROR_out_of_memory: + + PRINTF(("Out of memory.\n")) ; + break ; + + /* v2.4: internal-error case deleted */ + } +} + + + + +/* ========================================================================== */ +/* === colamd debugging routines ============================================ */ +/* ========================================================================== */ + +/* When debugging is disabled, the remainder of this file is ignored. */ + +#ifndef NDEBUG + + +/* ========================================================================== */ +/* === debug_structures ===================================================== */ +/* ========================================================================== */ + +/* + At this point, all empty rows and columns are dead. All live columns + are "clean" (containing no dead rows) and simplicial (no supercolumns + yet). Rows may contain dead columns, but all live rows contain at + least one live column. +*/ + +PRIVATE void debug_structures +( + /* === Parameters ======================================================= */ + + Int n_row, + Int n_col, + Colamd_Row Row [], + Colamd_Col Col [], + Int A [], + Int n_col2 +) +{ + /* === Local variables ================================================== */ + + Int i ; + Int c ; + Int *cp ; + Int *cp_end ; + Int len ; + Int score ; + Int r ; + Int *rp ; + Int *rp_end ; + Int deg ; + + /* === Check A, Row, and Col ============================================ */ + + for (c = 0 ; c < n_col ; c++) + { + if (COL_IS_ALIVE (c)) + { + len = Col [c].length ; + score = Col [c].shared2.score ; + DEBUG4 (("initial live col %5d %5d %5d\n", c, len, score)) ; + ASSERT (len > 0) ; + ASSERT (score >= 0) ; + ASSERT (Col [c].shared1.thickness == 1) ; + cp = &A [Col [c].start] ; + cp_end = cp + len ; + while (cp < cp_end) + { + r = *cp++ ; + ASSERT (ROW_IS_ALIVE (r)) ; + } + } + else + { + i = Col [c].shared2.order ; + ASSERT (i >= n_col2 && i < n_col) ; + } + } + + for (r = 0 ; r < n_row ; r++) + { + if (ROW_IS_ALIVE (r)) + { + i = 0 ; + len = Row [r].length ; + deg = Row [r].shared1.degree ; + ASSERT (len > 0) ; + ASSERT (deg > 0) ; + rp = &A [Row [r].start] ; + rp_end = rp + len ; + while (rp < rp_end) + { + c = *rp++ ; + if (COL_IS_ALIVE (c)) + { + i++ ; + } + } + ASSERT (i > 0) ; + } + } +} + + +/* ========================================================================== */ +/* === debug_deg_lists ====================================================== */ +/* ========================================================================== */ + +/* + Prints the contents of the degree lists. Counts the number of columns + in the degree list and compares it to the total it should have. Also + checks the row degrees. +*/ + +PRIVATE void debug_deg_lists +( + /* === Parameters ======================================================= */ + + Int n_row, + Int n_col, + Colamd_Row Row [], + Colamd_Col Col [], + Int head [], + Int min_score, + Int should, + Int max_deg +) +{ + /* === Local variables ================================================== */ + + Int deg ; + Int col ; + Int have ; + Int row ; + + /* === Check the degree lists =========================================== */ + + if (n_col > 10000 && colamd_debug <= 0) + { + return ; + } + have = 0 ; + DEBUG4 (("Degree lists: %d\n", min_score)) ; + for (deg = 0 ; deg <= n_col ; deg++) + { + col = head [deg] ; + if (col == EMPTY) + { + continue ; + } + DEBUG4 (("%d:", deg)) ; + while (col != EMPTY) + { + DEBUG4 ((" %d", col)) ; + have += Col [col].shared1.thickness ; + ASSERT (COL_IS_ALIVE (col)) ; + col = Col [col].shared4.degree_next ; + } + DEBUG4 (("\n")) ; + } + DEBUG4 (("should %d have %d\n", should, have)) ; + ASSERT (should == have) ; + + /* === Check the row degrees ============================================ */ + + if (n_row > 10000 && colamd_debug <= 0) + { + return ; + } + for (row = 0 ; row < n_row ; row++) + { + if (ROW_IS_ALIVE (row)) + { + ASSERT (Row [row].shared1.degree <= max_deg) ; + } + } +} + + +/* ========================================================================== */ +/* === debug_mark =========================================================== */ +/* ========================================================================== */ + +/* + Ensures that the tag_mark is less that the maximum and also ensures that + each entry in the mark array is less than the tag mark. +*/ + +PRIVATE void debug_mark +( + /* === Parameters ======================================================= */ + + Int n_row, + Colamd_Row Row [], + Int tag_mark, + Int max_mark +) +{ + /* === Local variables ================================================== */ + + Int r ; + + /* === Check the Row marks ============================================== */ + + ASSERT (tag_mark > 0 && tag_mark <= max_mark) ; + if (n_row > 10000 && colamd_debug <= 0) + { + return ; + } + for (r = 0 ; r < n_row ; r++) + { + ASSERT (Row [r].shared2.mark < tag_mark) ; + } +} + + +/* ========================================================================== */ +/* === debug_matrix ========================================================= */ +/* ========================================================================== */ + +/* + Prints out the contents of the columns and the rows. +*/ + +PRIVATE void debug_matrix +( + /* === Parameters ======================================================= */ + + Int n_row, + Int n_col, + Colamd_Row Row [], + Colamd_Col Col [], + Int A [] +) +{ + /* === Local variables ================================================== */ + + Int r ; + Int c ; + Int *rp ; + Int *rp_end ; + Int *cp ; + Int *cp_end ; + + /* === Dump the rows and columns of the matrix ========================== */ + + if (colamd_debug < 3) + { + return ; + } + DEBUG3 (("DUMP MATRIX:\n")) ; + for (r = 0 ; r < n_row ; r++) + { + DEBUG3 (("Row %d alive? %d\n", r, ROW_IS_ALIVE (r))) ; + if (ROW_IS_DEAD (r)) + { + continue ; + } + DEBUG3 (("start %d length %d degree %d\n", + Row [r].start, Row [r].length, Row [r].shared1.degree)) ; + rp = &A [Row [r].start] ; + rp_end = rp + Row [r].length ; + while (rp < rp_end) + { + c = *rp++ ; + DEBUG4 ((" %d col %d\n", COL_IS_ALIVE (c), c)) ; + } + } + + for (c = 0 ; c < n_col ; c++) + { + DEBUG3 (("Col %d alive? %d\n", c, COL_IS_ALIVE (c))) ; + if (COL_IS_DEAD (c)) + { + continue ; + } + DEBUG3 (("start %d length %d shared1 %d shared2 %d\n", + Col [c].start, Col [c].length, + Col [c].shared1.thickness, Col [c].shared2.score)) ; + cp = &A [Col [c].start] ; + cp_end = cp + Col [c].length ; + while (cp < cp_end) + { + r = *cp++ ; + DEBUG4 ((" %d row %d\n", ROW_IS_ALIVE (r), r)) ; + } + } +} + +PRIVATE void colamd_get_debug +( + char *method +) +{ + FILE *f ; + colamd_debug = 0 ; /* no debug printing */ + f = fopen ("debug", "r") ; + if (f == (FILE *) NULL) + { + colamd_debug = 0 ; + } + else + { + fscanf (f, "%d", &colamd_debug) ; + fclose (f) ; + } + DEBUG0 (("%s: debug version, D = %d (THIS WILL BE SLOW!)\n", + method, colamd_debug)) ; +} + +#endif /* NDEBUG */ diff --git a/src/maths/KLU/colamd.h b/src/maths/KLU/colamd.h new file mode 100644 index 000000000..91665cef7 --- /dev/null +++ b/src/maths/KLU/colamd.h @@ -0,0 +1,255 @@ +/* ========================================================================== */ +/* === colamd/symamd prototypes and definitions ============================= */ +/* ========================================================================== */ + +/* COLAMD / SYMAMD include file + + You must include this file (colamd.h) in any routine that uses colamd, + symamd, or the related macros and definitions. + + Authors: + + The authors of the code itself are Stefan I. Larimore and Timothy A. + Davis (davis at cise.ufl.edu), University of Florida. The algorithm was + developed in collaboration with John Gilbert, Xerox PARC, and Esmond + Ng, Oak Ridge National Laboratory. + + Acknowledgements: + + This work was supported by the National Science Foundation, under + grants DMS-9504974 and DMS-9803599. + + Notice: + + Copyright (c) 1998-2007, Timothy A. Davis, All Rights Reserved. + + THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY + EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. + + Permission is hereby granted to use, copy, modify, and/or distribute + this program, provided that the Copyright, this License, and the + Availability of the original version is retained on all copies and made + accessible to the end-user of any code or package that includes COLAMD + or any modified version of COLAMD. + + Availability: + + The colamd/symamd library is available at + + http://www.cise.ufl.edu/research/sparse/colamd/ + + This is the http://www.cise.ufl.edu/research/sparse/colamd/colamd.h + file. It is required by the colamd.c, colamdmex.c, and symamdmex.c + files, and by any C code that calls the routines whose prototypes are + listed below, or that uses the colamd/symamd definitions listed below. + +*/ + +#ifndef COLAMD_H +#define COLAMD_H + +/* make it easy for C++ programs to include COLAMD */ +#ifdef __cplusplus +extern "C" { +#endif + +/* ========================================================================== */ +/* === Include files ======================================================== */ +/* ========================================================================== */ + +#include + +/* ========================================================================== */ +/* === COLAMD version ======================================================= */ +/* ========================================================================== */ + +/* COLAMD Version 2.4 and later will include the following definitions. + * As an example, to test if the version you are using is 2.4 or later: + * + * #ifdef COLAMD_VERSION + * if (COLAMD_VERSION >= COLAMD_VERSION_CODE (2,4)) ... + * #endif + * + * This also works during compile-time: + * + * #if defined(COLAMD_VERSION) && (COLAMD_VERSION >= COLAMD_VERSION_CODE (2,4)) + * printf ("This is version 2.4 or later\n") ; + * #else + * printf ("This is an early version\n") ; + * #endif + * + * Versions 2.3 and earlier of COLAMD do not include a #define'd version number. + */ + +#define COLAMD_DATE "Dec 7, 2011" +#define COLAMD_VERSION_CODE(main,sub) ((main) * 1000 + (sub)) +#define COLAMD_MAIN_VERSION 2 +#define COLAMD_SUB_VERSION 7 +#define COLAMD_SUBSUB_VERSION 4 +#define COLAMD_VERSION \ + COLAMD_VERSION_CODE(COLAMD_MAIN_VERSION,COLAMD_SUB_VERSION) + +/* ========================================================================== */ +/* === Knob and statistics definitions ====================================== */ +/* ========================================================================== */ + +/* size of the knobs [ ] array. Only knobs [0..1] are currently used. */ +#define COLAMD_KNOBS 20 + +/* number of output statistics. Only stats [0..6] are currently used. */ +#define COLAMD_STATS 20 + +/* knobs [0] and stats [0]: dense row knob and output statistic. */ +#define COLAMD_DENSE_ROW 0 + +/* knobs [1] and stats [1]: dense column knob and output statistic. */ +#define COLAMD_DENSE_COL 1 + +/* knobs [2]: aggressive absorption */ +#define COLAMD_AGGRESSIVE 2 + +/* stats [2]: memory defragmentation count output statistic */ +#define COLAMD_DEFRAG_COUNT 2 + +/* stats [3]: colamd status: zero OK, > 0 warning or notice, < 0 error */ +#define COLAMD_STATUS 3 + +/* stats [4..6]: error info, or info on jumbled columns */ +#define COLAMD_INFO1 4 +#define COLAMD_INFO2 5 +#define COLAMD_INFO3 6 + +/* error codes returned in stats [3]: */ +#define COLAMD_OK (0) +#define COLAMD_OK_BUT_JUMBLED (1) +#define COLAMD_ERROR_A_not_present (-1) +#define COLAMD_ERROR_p_not_present (-2) +#define COLAMD_ERROR_nrow_negative (-3) +#define COLAMD_ERROR_ncol_negative (-4) +#define COLAMD_ERROR_nnz_negative (-5) +#define COLAMD_ERROR_p0_nonzero (-6) +#define COLAMD_ERROR_A_too_small (-7) +#define COLAMD_ERROR_col_length_negative (-8) +#define COLAMD_ERROR_row_index_out_of_bounds (-9) +#define COLAMD_ERROR_out_of_memory (-10) +#define COLAMD_ERROR_internal_error (-999) + + +/* ========================================================================== */ +/* === Prototypes of user-callable routines ================================= */ +/* ========================================================================== */ + +/* define UF_long */ +#include "UFconfig.h" + +size_t colamd_recommended /* returns recommended value of Alen, */ + /* or 0 if input arguments are erroneous */ +( + int nnz, /* nonzeros in A */ + int n_row, /* number of rows in A */ + int n_col /* number of columns in A */ +) ; + +size_t colamd_l_recommended /* returns recommended value of Alen, */ + /* or 0 if input arguments are erroneous */ +( + UF_long nnz, /* nonzeros in A */ + UF_long n_row, /* number of rows in A */ + UF_long n_col /* number of columns in A */ +) ; + +void colamd_set_defaults /* sets default parameters */ +( /* knobs argument is modified on output */ + double knobs [COLAMD_KNOBS] /* parameter settings for colamd */ +) ; + +void colamd_l_set_defaults /* sets default parameters */ +( /* knobs argument is modified on output */ + double knobs [COLAMD_KNOBS] /* parameter settings for colamd */ +) ; + +int colamd /* returns (1) if successful, (0) otherwise*/ +( /* A and p arguments are modified on output */ + int n_row, /* number of rows in A */ + int n_col, /* number of columns in A */ + int Alen, /* size of the array A */ + int A [], /* row indices of A, of size Alen */ + int p [], /* column pointers of A, of size n_col+1 */ + double knobs [COLAMD_KNOBS],/* parameter settings for colamd */ + int stats [COLAMD_STATS] /* colamd output statistics and error codes */ +) ; + +UF_long colamd_l /* returns (1) if successful, (0) otherwise*/ +( /* A and p arguments are modified on output */ + UF_long n_row, /* number of rows in A */ + UF_long n_col, /* number of columns in A */ + UF_long Alen, /* size of the array A */ + UF_long A [], /* row indices of A, of size Alen */ + UF_long p [], /* column pointers of A, of size n_col+1 */ + double knobs [COLAMD_KNOBS],/* parameter settings for colamd */ + UF_long stats [COLAMD_STATS]/* colamd output statistics and error codes */ +) ; + +int symamd /* return (1) if OK, (0) otherwise */ +( + int n, /* number of rows and columns of A */ + int A [], /* row indices of A */ + int p [], /* column pointers of A */ + int perm [], /* output permutation, size n_col+1 */ + double knobs [COLAMD_KNOBS], /* parameters (uses defaults if NULL) */ + int stats [COLAMD_STATS], /* output statistics and error codes */ + void * (*allocate) (size_t, size_t), + /* pointer to calloc (ANSI C) or */ + /* mxCalloc (for MATLAB mexFunction) */ + void (*release) (void *) + /* pointer to free (ANSI C) or */ + /* mxFree (for MATLAB mexFunction) */ +) ; + +UF_long symamd_l /* return (1) if OK, (0) otherwise */ +( + UF_long n, /* number of rows and columns of A */ + UF_long A [], /* row indices of A */ + UF_long p [], /* column pointers of A */ + UF_long perm [], /* output permutation, size n_col+1 */ + double knobs [COLAMD_KNOBS], /* parameters (uses defaults if NULL) */ + UF_long stats [COLAMD_STATS], /* output statistics and error codes */ + void * (*allocate) (size_t, size_t), + /* pointer to calloc (ANSI C) or */ + /* mxCalloc (for MATLAB mexFunction) */ + void (*release) (void *) + /* pointer to free (ANSI C) or */ + /* mxFree (for MATLAB mexFunction) */ +) ; + +void colamd_report +( + int stats [COLAMD_STATS] +) ; + +void colamd_l_report +( + UF_long stats [COLAMD_STATS] +) ; + +void symamd_report +( + int stats [COLAMD_STATS] +) ; + +void symamd_l_report +( + UF_long stats [COLAMD_STATS] +) ; + +#ifndef EXTERN +#define EXTERN extern +#endif + +EXTERN int (*colamd_printf) (const char *, ...) ; + +#ifdef __cplusplus +} +#endif + +#endif /* COLAMD_H */ diff --git a/src/maths/KLU/colamd_global.c b/src/maths/KLU/colamd_global.c new file mode 100644 index 000000000..4d1ae2230 --- /dev/null +++ b/src/maths/KLU/colamd_global.c @@ -0,0 +1,24 @@ +/* ========================================================================== */ +/* === colamd_global.c ====================================================== */ +/* ========================================================================== */ + +/* ---------------------------------------------------------------------------- + * COLAMD, Copyright (C) 2007, Timothy A. Davis. + * See License.txt for the Version 2.1 of the GNU Lesser General Public License + * http://www.cise.ufl.edu/research/sparse + * -------------------------------------------------------------------------- */ + +/* Global variables for COLAMD */ + +#ifndef NPRINT +#ifdef MATLAB_MEX_FILE +#include "mex.h" +int (*colamd_printf) (const char *, ...) = mexPrintf ; +#else +#include +int (*colamd_printf) (const char *, ...) = printf ; +#endif +#else +int (*colamd_printf) (const char *, ...) = ((void *) 0) ; +#endif + diff --git a/src/maths/KLU/klu.c b/src/maths/KLU/klu.c new file mode 100644 index 000000000..eb85884c3 --- /dev/null +++ b/src/maths/KLU/klu.c @@ -0,0 +1,773 @@ +/* ========================================================================== */ +/* === klu ================================================================== */ +/* ========================================================================== */ + +/* KLU: factorizes P*A into L*U, using the Gilbert-Peierls algorithm [1], with + * optional symmetric pruning by Eisenstat and Liu [2]. The code is by Tim + * Davis. This algorithm is what appears as the default sparse LU routine in + * MATLAB version 6.0, and still appears in MATLAB 6.5 as [L,U,P] = lu (A). + * Note that no column ordering is provided (see COLAMD or AMD for suitable + * orderings). SuperLU is based on this algorithm, except that it adds the + * use of dense matrix operations on "supernodes" (adjacent columns with + * identical). This code doesn't use supernodes, thus its name ("Kent" LU, + * as in "Clark Kent", in contrast with Super-LU...). This algorithm is slower + * than SuperLU and UMFPACK for large matrices with lots of nonzeros in their + * factors (such as for most finite-element problems). However, for matrices + * with very sparse LU factors, this algorithm is typically faster than both + * SuperLU and UMFPACK, since in this case there is little chance to exploit + * dense matrix kernels (the BLAS). + * + * Only one block of A is factorized, in the BTF form. The input n is the + * size of the block; k1 is the first row and column in the block. + * + * NOTE: no error checking is done on the inputs. This version is not meant to + * be called directly by the user. Use klu_factor instead. + * + * No fill-reducing ordering is provided. The ordering quality of + * klu_kernel_factor is the responsibility of the caller. The input A must + * pre-permuted to reduce fill-in, or fill-reducing input permutation Q must + * be provided. + * + * The input matrix A must be in compressed-column form, with either sorted + * or unsorted row indices. Row indices for column j of A is in + * Ai [Ap [j] ... Ap [j+1]-1] and the same range of indices in Ax holds the + * numerical values. No duplicate entries are allowed. + * + * Copyright 2004-2009, Tim Davis. All rights reserved. See the README + * file for details on permitted use. Note that no code from The MathWorks, + * Inc, or from SuperLU, or from any other source appears here. The code is + * written from scratch, from the algorithmic description in Gilbert & Peierls' + * and Eisenstat & Liu's journal papers [1,2]. + * + * If an input permutation Q is provided, the factorization L*U = A (P,Q) + * is computed, where P is determined by partial pivoting, and Q is the input + * ordering. If the pivot tolerance is less than 1, the "diagonal" entry that + * KLU attempts to choose is the diagonal of A (Q,Q). In other words, the + * input permutation is applied symmetrically to the input matrix. The output + * permutation P includes both the partial pivoting ordering and the input + * permutation. If Q is NULL, then it is assumed to be the identity + * permutation. Q is not modified. + * + * [1] Gilbert, J. R. and Peierls, T., "Sparse Partial Pivoting in Time + * Proportional to Arithmetic Operations," SIAM J. Sci. Stat. Comp., + * vol 9, pp. 862-874, 1988. + * [2] Eisenstat, S. C. and Liu, J. W. H., "Exploiting Structural Symmetry in + * Unsymmetric Sparse Symbolic Factorization," SIAM J. Matrix Analysis & + * Applic., vol 13, pp. 202-211, 1992. + */ + +/* ========================================================================== */ + +#include "klu_internal.h" + +size_t KLU_kernel_factor /* 0 if failure, size of LU if OK */ +( + /* inputs, not modified */ + Int n, /* A is n-by-n. n must be > 0. */ + Int Ap [ ], /* size n+1, column pointers for A */ + Int Ai [ ], /* size nz = Ap [n], row indices for A */ + Entry Ax [ ], /* size nz, values of A */ + Int Q [ ], /* size n, optional column permutation */ + double Lsize, /* estimate of number of nonzeros in L */ + + /* outputs, not defined on input */ + Unit **p_LU, /* row indices and values of L and U */ + Entry Udiag [ ], /* size n, diagonal of U */ + Int Llen [ ], /* size n, column length of L */ + Int Ulen [ ], /* size n, column length of U */ + Int Lip [ ], /* size n, column pointers for L */ + Int Uip [ ], /* size n, column pointers for U */ + Int P [ ], /* row permutation, size n */ + Int *lnz, /* size of L */ + Int *unz, /* size of U */ + + /* workspace, undefined on input */ + Entry *X, /* size n double's, zero on output */ + Int *Work, /* size 5n Int's */ + + /* inputs, not modified on output */ + Int k1, /* the block of A is from k1 to k2-1 */ + Int PSinv [ ], /* inverse of P from symbolic factorization */ + double Rs [ ], /* scale factors for A */ + + /* inputs, modified on output */ + Int Offp [ ], /* off-diagonal matrix (modified by this routine) */ + Int Offi [ ], + Entry Offx [ ], + /* --------------- */ + KLU_common *Common +) +{ + double maxlnz, dunits ; + Unit *LU ; + Int *Pinv, *Lpend, *Stack, *Flag, *Ap_pos, *W ; + Int lsize, usize, anz, ok ; + size_t lusize ; + ASSERT (Common != NULL) ; + + /* ---------------------------------------------------------------------- */ + /* get control parameters, or use defaults */ + /* ---------------------------------------------------------------------- */ + + n = MAX (1, n) ; + anz = Ap [n+k1] - Ap [k1] ; + + if (Lsize <= 0) + { + Lsize = -Lsize ; + Lsize = MAX (Lsize, 1.0) ; + lsize = Lsize * anz + n ; + } + else + { + lsize = Lsize ; + } + + usize = lsize ; + + lsize = MAX (n+1, lsize) ; + usize = MAX (n+1, usize) ; + + maxlnz = (((double) n) * ((double) n) + ((double) n)) / 2. ; + maxlnz = MIN (maxlnz, ((double) INT_MAX)) ; + lsize = MIN (maxlnz, lsize) ; + usize = MIN (maxlnz, usize) ; + + PRINTF (("Welcome to klu: n %d anz %d k1 %d lsize %d usize %d maxlnz %g\n", + n, anz, k1, lsize, usize, maxlnz)) ; + + /* ---------------------------------------------------------------------- */ + /* allocate workspace and outputs */ + /* ---------------------------------------------------------------------- */ + + /* return arguments are not yet assigned */ + *p_LU = (Unit *) NULL ; + + /* these computations are safe from size_t overflow */ + W = Work ; + Pinv = (Int *) W ; W += n ; + Stack = (Int *) W ; W += n ; + Flag = (Int *) W ; W += n ; + Lpend = (Int *) W ; W += n ; + Ap_pos = (Int *) W ; W += n ; + + dunits = DUNITS (Int, lsize) + DUNITS (Entry, lsize) + + DUNITS (Int, usize) + DUNITS (Entry, usize) ; + lusize = (size_t) dunits ; + ok = !INT_OVERFLOW (dunits) ; + LU = ok ? KLU_malloc (lusize, sizeof (Unit), Common) : NULL ; + if (LU == NULL) + { + /* out of memory, or problem too large */ + Common->status = KLU_OUT_OF_MEMORY ; + lusize = 0 ; + return (lusize) ; + } + + /* ---------------------------------------------------------------------- */ + /* factorize */ + /* ---------------------------------------------------------------------- */ + + /* with pruning, and non-recursive depth-first-search */ + lusize = KLU_kernel (n, Ap, Ai, Ax, Q, lusize, + Pinv, P, &LU, Udiag, Llen, Ulen, Lip, Uip, lnz, unz, + X, Stack, Flag, Ap_pos, Lpend, + k1, PSinv, Rs, Offp, Offi, Offx, Common) ; + + /* ---------------------------------------------------------------------- */ + /* return LU factors, or return nothing if an error occurred */ + /* ---------------------------------------------------------------------- */ + + if (Common->status < KLU_OK) + { + LU = KLU_free (LU, lusize, sizeof (Unit), Common) ; + lusize = 0 ; + } + *p_LU = LU ; + PRINTF ((" in klu noffdiag %d\n", Common->noffdiag)) ; + return (lusize) ; +} + + +/* ========================================================================== */ +/* === KLU_lsolve =========================================================== */ +/* ========================================================================== */ + +/* Solve Lx=b. Assumes L is unit lower triangular and where the unit diagonal + * entry is NOT stored. Overwrites B with the solution X. B is n-by-nrhs + * and is stored in ROW form with row dimension nrhs. nrhs must be in the + * range 1 to 4. */ + +void KLU_lsolve +( + /* inputs, not modified: */ + Int n, + Int Lip [ ], + Int Llen [ ], + Unit LU [ ], + Int nrhs, + /* right-hand-side on input, solution to Lx=b on output */ + Entry X [ ] +) +{ + Entry x [4], lik ; + Int *Li ; + Entry *Lx ; + Int k, p, len, i ; + + switch (nrhs) + { + + case 1: + for (k = 0 ; k < n ; k++) + { + x [0] = X [k] ; + GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ; + /* unit diagonal of L is not stored*/ + for (p = 0 ; p < len ; p++) + { + /* X [Li [p]] -= Lx [p] * x [0] ; */ + MULT_SUB (X [Li [p]], Lx [p], x [0]) ; + } + } + break ; + + case 2: + + for (k = 0 ; k < n ; k++) + { + x [0] = X [2*k ] ; + x [1] = X [2*k + 1] ; + GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ; + for (p = 0 ; p < len ; p++) + { + i = Li [p] ; + lik = Lx [p] ; + MULT_SUB (X [2*i], lik, x [0]) ; + MULT_SUB (X [2*i + 1], lik, x [1]) ; + } + } + break ; + + case 3: + + for (k = 0 ; k < n ; k++) + { + x [0] = X [3*k ] ; + x [1] = X [3*k + 1] ; + x [2] = X [3*k + 2] ; + GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ; + for (p = 0 ; p < len ; p++) + { + i = Li [p] ; + lik = Lx [p] ; + MULT_SUB (X [3*i], lik, x [0]) ; + MULT_SUB (X [3*i + 1], lik, x [1]) ; + MULT_SUB (X [3*i + 2], lik, x [2]) ; + } + } + break ; + + case 4: + + for (k = 0 ; k < n ; k++) + { + x [0] = X [4*k ] ; + x [1] = X [4*k + 1] ; + x [2] = X [4*k + 2] ; + x [3] = X [4*k + 3] ; + GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ; + for (p = 0 ; p < len ; p++) + { + i = Li [p] ; + lik = Lx [p] ; + MULT_SUB (X [4*i], lik, x [0]) ; + MULT_SUB (X [4*i + 1], lik, x [1]) ; + MULT_SUB (X [4*i + 2], lik, x [2]) ; + MULT_SUB (X [4*i + 3], lik, x [3]) ; + } + } + break ; + + } +} + +/* ========================================================================== */ +/* === KLU_usolve =========================================================== */ +/* ========================================================================== */ + +/* Solve Ux=b. Assumes U is non-unit upper triangular and where the diagonal + * entry is NOT stored. Overwrites B with the solution X. B is n-by-nrhs + * and is stored in ROW form with row dimension nrhs. nrhs must be in the + * range 1 to 4. */ + +void KLU_usolve +( + /* inputs, not modified: */ + Int n, + Int Uip [ ], + Int Ulen [ ], + Unit LU [ ], + Entry Udiag [ ], + Int nrhs, + /* right-hand-side on input, solution to Ux=b on output */ + Entry X [ ] +) +{ + Entry x [4], uik, ukk ; + Int *Ui ; + Entry *Ux ; + Int k, p, len, i ; + + switch (nrhs) + { + + case 1: + + for (k = n-1 ; k >= 0 ; k--) + { + GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ; + /* x [0] = X [k] / Udiag [k] ; */ + DIV (x [0], X [k], Udiag [k]) ; + X [k] = x [0] ; + for (p = 0 ; p < len ; p++) + { + /* X [Ui [p]] -= Ux [p] * x [0] ; */ + MULT_SUB (X [Ui [p]], Ux [p], x [0]) ; + + } + } + + break ; + + case 2: + + for (k = n-1 ; k >= 0 ; k--) + { + GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ; + ukk = Udiag [k] ; + /* x [0] = X [2*k ] / ukk ; + x [1] = X [2*k + 1] / ukk ; */ + DIV (x [0], X [2*k], ukk) ; + DIV (x [1], X [2*k + 1], ukk) ; + + X [2*k ] = x [0] ; + X [2*k + 1] = x [1] ; + for (p = 0 ; p < len ; p++) + { + i = Ui [p] ; + uik = Ux [p] ; + /* X [2*i ] -= uik * x [0] ; + X [2*i + 1] -= uik * x [1] ; */ + MULT_SUB (X [2*i], uik, x [0]) ; + MULT_SUB (X [2*i + 1], uik, x [1]) ; + } + } + + break ; + + case 3: + + for (k = n-1 ; k >= 0 ; k--) + { + GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ; + ukk = Udiag [k] ; + + DIV (x [0], X [3*k], ukk) ; + DIV (x [1], X [3*k + 1], ukk) ; + DIV (x [2], X [3*k + 2], ukk) ; + + X [3*k ] = x [0] ; + X [3*k + 1] = x [1] ; + X [3*k + 2] = x [2] ; + for (p = 0 ; p < len ; p++) + { + i = Ui [p] ; + uik = Ux [p] ; + MULT_SUB (X [3*i], uik, x [0]) ; + MULT_SUB (X [3*i + 1], uik, x [1]) ; + MULT_SUB (X [3*i + 2], uik, x [2]) ; + } + } + + break ; + + case 4: + + for (k = n-1 ; k >= 0 ; k--) + { + GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ; + ukk = Udiag [k] ; + + DIV (x [0], X [4*k], ukk) ; + DIV (x [1], X [4*k + 1], ukk) ; + DIV (x [2], X [4*k + 2], ukk) ; + DIV (x [3], X [4*k + 3], ukk) ; + + X [4*k ] = x [0] ; + X [4*k + 1] = x [1] ; + X [4*k + 2] = x [2] ; + X [4*k + 3] = x [3] ; + for (p = 0 ; p < len ; p++) + { + i = Ui [p] ; + uik = Ux [p] ; + + MULT_SUB (X [4*i], uik, x [0]) ; + MULT_SUB (X [4*i + 1], uik, x [1]) ; + MULT_SUB (X [4*i + 2], uik, x [2]) ; + MULT_SUB (X [4*i + 3], uik, x [3]) ; + } + } + + break ; + + } +} + + +/* ========================================================================== */ +/* === KLU_ltsolve ========================================================== */ +/* ========================================================================== */ + +/* Solve L'x=b. Assumes L is unit lower triangular and where the unit diagonal + * entry is NOT stored. Overwrites B with the solution X. B is n-by-nrhs + * and is stored in ROW form with row dimension nrhs. nrhs must in the + * range 1 to 4. */ + +void KLU_ltsolve +( + /* inputs, not modified: */ + Int n, + Int Lip [ ], + Int Llen [ ], + Unit LU [ ], + Int nrhs, +#ifdef COMPLEX + Int conj_solve, +#endif + /* right-hand-side on input, solution to L'x=b on output */ + Entry X [ ] +) +{ + Entry x [4], lik ; + Int *Li ; + Entry *Lx ; + Int k, p, len, i ; + + switch (nrhs) + { + + case 1: + + for (k = n-1 ; k >= 0 ; k--) + { + GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ; + x [0] = X [k] ; + for (p = 0 ; p < len ; p++) + { +#ifdef COMPLEX + if (conj_solve) + { + /* x [0] -= CONJ (Lx [p]) * X [Li [p]] ; */ + MULT_SUB_CONJ (x [0], X [Li [p]], Lx [p]) ; + } + else +#endif + { + /*x [0] -= Lx [p] * X [Li [p]] ;*/ + MULT_SUB (x [0], Lx [p], X [Li [p]]) ; + } + } + X [k] = x [0] ; + } + break ; + + case 2: + + for (k = n-1 ; k >= 0 ; k--) + { + x [0] = X [2*k ] ; + x [1] = X [2*k + 1] ; + GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ; + for (p = 0 ; p < len ; p++) + { + i = Li [p] ; +#ifdef COMPLEX + if (conj_solve) + { + CONJ (lik, Lx [p]) ; + } + else +#endif + { + lik = Lx [p] ; + } + MULT_SUB (x [0], lik, X [2*i]) ; + MULT_SUB (x [1], lik, X [2*i + 1]) ; + } + X [2*k ] = x [0] ; + X [2*k + 1] = x [1] ; + } + break ; + + case 3: + + for (k = n-1 ; k >= 0 ; k--) + { + x [0] = X [3*k ] ; + x [1] = X [3*k + 1] ; + x [2] = X [3*k + 2] ; + GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ; + for (p = 0 ; p < len ; p++) + { + i = Li [p] ; +#ifdef COMPLEX + if (conj_solve) + { + CONJ (lik, Lx [p]) ; + } + else +#endif + { + lik = Lx [p] ; + } + MULT_SUB (x [0], lik, X [3*i]) ; + MULT_SUB (x [1], lik, X [3*i + 1]) ; + MULT_SUB (x [2], lik, X [3*i + 2]) ; + } + X [3*k ] = x [0] ; + X [3*k + 1] = x [1] ; + X [3*k + 2] = x [2] ; + } + break ; + + case 4: + + for (k = n-1 ; k >= 0 ; k--) + { + x [0] = X [4*k ] ; + x [1] = X [4*k + 1] ; + x [2] = X [4*k + 2] ; + x [3] = X [4*k + 3] ; + GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ; + for (p = 0 ; p < len ; p++) + { + i = Li [p] ; +#ifdef COMPLEX + if (conj_solve) + { + CONJ (lik, Lx [p]) ; + } + else +#endif + { + lik = Lx [p] ; + } + MULT_SUB (x [0], lik, X [4*i]) ; + MULT_SUB (x [1], lik, X [4*i + 1]) ; + MULT_SUB (x [2], lik, X [4*i + 2]) ; + MULT_SUB (x [3], lik, X [4*i + 3]) ; + } + X [4*k ] = x [0] ; + X [4*k + 1] = x [1] ; + X [4*k + 2] = x [2] ; + X [4*k + 3] = x [3] ; + } + break ; + } +} + + +/* ========================================================================== */ +/* === KLU_utsolve ========================================================== */ +/* ========================================================================== */ + +/* Solve U'x=b. Assumes U is non-unit upper triangular and where the diagonal + * entry is stored (and appears last in each column of U). Overwrites B + * with the solution X. B is n-by-nrhs and is stored in ROW form with row + * dimension nrhs. nrhs must be in the range 1 to 4. */ + +void KLU_utsolve +( + /* inputs, not modified: */ + Int n, + Int Uip [ ], + Int Ulen [ ], + Unit LU [ ], + Entry Udiag [ ], + Int nrhs, +#ifdef COMPLEX + Int conj_solve, +#endif + /* right-hand-side on input, solution to Ux=b on output */ + Entry X [ ] +) +{ + Entry x [4], uik, ukk ; + Int k, p, len, i ; + Int *Ui ; + Entry *Ux ; + + switch (nrhs) + { + + case 1: + + for (k = 0 ; k < n ; k++) + { + GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ; + x [0] = X [k] ; + for (p = 0 ; p < len ; p++) + { +#ifdef COMPLEX + if (conj_solve) + { + /* x [0] -= CONJ (Ux [p]) * X [Ui [p]] ; */ + MULT_SUB_CONJ (x [0], X [Ui [p]], Ux [p]) ; + } + else +#endif + { + /* x [0] -= Ux [p] * X [Ui [p]] ; */ + MULT_SUB (x [0], Ux [p], X [Ui [p]]) ; + } + } +#ifdef COMPLEX + if (conj_solve) + { + CONJ (ukk, Udiag [k]) ; + } + else +#endif + { + ukk = Udiag [k] ; + } + DIV (X [k], x [0], ukk) ; + } + break ; + + case 2: + + for (k = 0 ; k < n ; k++) + { + GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ; + x [0] = X [2*k ] ; + x [1] = X [2*k + 1] ; + for (p = 0 ; p < len ; p++) + { + i = Ui [p] ; +#ifdef COMPLEX + if (conj_solve) + { + CONJ (uik, Ux [p]) ; + } + else +#endif + { + uik = Ux [p] ; + } + MULT_SUB (x [0], uik, X [2*i]) ; + MULT_SUB (x [1], uik, X [2*i + 1]) ; + } +#ifdef COMPLEX + if (conj_solve) + { + CONJ (ukk, Udiag [k]) ; + } + else +#endif + { + ukk = Udiag [k] ; + } + DIV (X [2*k], x [0], ukk) ; + DIV (X [2*k + 1], x [1], ukk) ; + } + break ; + + case 3: + + for (k = 0 ; k < n ; k++) + { + GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ; + x [0] = X [3*k ] ; + x [1] = X [3*k + 1] ; + x [2] = X [3*k + 2] ; + for (p = 0 ; p < len ; p++) + { + i = Ui [p] ; +#ifdef COMPLEX + if (conj_solve) + { + CONJ (uik, Ux [p]) ; + } + else +#endif + { + uik = Ux [p] ; + } + MULT_SUB (x [0], uik, X [3*i]) ; + MULT_SUB (x [1], uik, X [3*i + 1]) ; + MULT_SUB (x [2], uik, X [3*i + 2]) ; + } +#ifdef COMPLEX + if (conj_solve) + { + CONJ (ukk, Udiag [k]) ; + } + else +#endif + { + ukk = Udiag [k] ; + } + DIV (X [3*k], x [0], ukk) ; + DIV (X [3*k + 1], x [1], ukk) ; + DIV (X [3*k + 2], x [2], ukk) ; + } + break ; + + case 4: + + for (k = 0 ; k < n ; k++) + { + GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ; + x [0] = X [4*k ] ; + x [1] = X [4*k + 1] ; + x [2] = X [4*k + 2] ; + x [3] = X [4*k + 3] ; + for (p = 0 ; p < len ; p++) + { + i = Ui [p] ; +#ifdef COMPLEX + if (conj_solve) + { + CONJ (uik, Ux [p]) ; + } + else +#endif + { + uik = Ux [p] ; + } + MULT_SUB (x [0], uik, X [4*i]) ; + MULT_SUB (x [1], uik, X [4*i + 1]) ; + MULT_SUB (x [2], uik, X [4*i + 2]) ; + MULT_SUB (x [3], uik, X [4*i + 3]) ; + } +#ifdef COMPLEX + if (conj_solve) + { + CONJ (ukk, Udiag [k]) ; + } + else +#endif + { + ukk = Udiag [k] ; + } + DIV (X [4*k], x [0], ukk) ; + DIV (X [4*k + 1], x [1], ukk) ; + DIV (X [4*k + 2], x [2], ukk) ; + DIV (X [4*k + 3], x [3], ukk) ; + } + break ; + } +} diff --git a/src/maths/KLU/klu.h b/src/maths/KLU/klu.h new file mode 100644 index 000000000..78d6c5776 --- /dev/null +++ b/src/maths/KLU/klu.h @@ -0,0 +1,831 @@ +/* ========================================================================== */ +/* === klu include file ===================================================== */ +/* ========================================================================== */ + +/* Include file for user programs that call klu_* routines */ + +#ifndef _KLU_H +#define _KLU_H + +/* make it easy for C++ programs to include KLU */ +#ifdef __cplusplus +extern "C" { +#endif + +#include "amd.h" +#include "colamd.h" +#include "btf.h" + +/* -------------------------------------------------------------------------- */ +/* Symbolic object - contains the pre-ordering computed by klu_analyze */ +/* -------------------------------------------------------------------------- */ + +typedef struct +{ + /* A (P,Q) is in upper block triangular form. The kth block goes from + * row/col index R [k] to R [k+1]-1. The estimated number of nonzeros + * in the L factor of the kth block is Lnz [k]. + */ + + /* only computed if the AMD ordering is chosen: */ + double symmetry ; /* symmetry of largest block */ + double est_flops ; /* est. factorization flop count */ + double lnz, unz ; /* estimated nz in L and U, including diagonals */ + double *Lnz ; /* size n, but only Lnz [0..nblocks-1] is used */ + + /* computed for all orderings: */ + int + n, /* input matrix A is n-by-n */ + nz, /* # entries in input matrix */ + *P, /* size n */ + *Q, /* size n */ + *R, /* size n+1, but only R [0..nblocks] is used */ + nzoff, /* nz in off-diagonal blocks */ + nblocks, /* number of blocks */ + maxblock, /* size of largest block */ + ordering, /* ordering used (AMD, COLAMD, or GIVEN) */ + do_btf ; /* whether or not BTF preordering was requested */ + + /* only computed if BTF preordering requested */ + int structural_rank ; /* 0 to n-1 if the matrix is structurally rank + * deficient. -1 if not computed. n if the matrix has + * full structural rank */ + +} klu_symbolic ; + +typedef struct /* 64-bit version (otherwise same as above) */ +{ + double symmetry, est_flops, lnz, unz ; + double *Lnz ; + UF_long n, nz, *P, *Q, *R, nzoff, nblocks, maxblock, ordering, do_btf, + structural_rank ; + +} klu_l_symbolic ; + +/* -------------------------------------------------------------------------- */ +/* Numeric object - contains the factors computed by klu_factor */ +/* -------------------------------------------------------------------------- */ + +typedef struct +{ + /* LU factors of each block, the pivot row permutation, and the + * entries in the off-diagonal blocks */ + + int n ; /* A is n-by-n */ + int nblocks ; /* number of diagonal blocks */ + int lnz ; /* actual nz in L, including diagonal */ + int unz ; /* actual nz in U, including diagonal */ + int max_lnz_block ; /* max actual nz in L in any one block, incl. diag */ + int max_unz_block ; /* max actual nz in U in any one block, incl. diag */ + int *Pnum ; /* size n. final pivot permutation */ + int *Pinv ; /* size n. inverse of final pivot permutation */ + + /* LU factors of each block */ + int *Lip ; /* size n. pointers into LUbx[block] for L */ + int *Uip ; /* size n. pointers into LUbx[block] for U */ + int *Llen ; /* size n. Llen [k] = # of entries in kth column of L */ + int *Ulen ; /* size n. Ulen [k] = # of entries in kth column of U */ + void **LUbx ; /* L and U indices and entries (excl. diagonal of U) */ + size_t *LUsize ; /* size of each LUbx [block], in sizeof (Unit) */ + void *Udiag ; /* diagonal of U */ + + /* scale factors; can be NULL if no scaling */ + double *Rs ; /* size n. Rs [i] is scale factor for row i */ + + /* permanent workspace for factorization and solve */ + size_t worksize ; /* size (in bytes) of Work */ + void *Work ; /* workspace */ + void *Xwork ; /* alias into Numeric->Work */ + int *Iwork ; /* alias into Numeric->Work */ + + /* off-diagonal entries in a conventional compressed-column sparse matrix */ + int *Offp ; /* size n+1, column pointers */ + int *Offi ; /* size nzoff, row indices */ + void *Offx ; /* size nzoff, numerical values */ + int nzoff ; + +} klu_numeric ; + +typedef struct /* 64-bit version (otherwise same as above) */ +{ + UF_long n, nblocks, lnz, unz, max_lnz_block, max_unz_block, *Pnum, *Pinv, + *Lip, *Uip, *Llen, *Ulen ; + void **LUbx ; + size_t *LUsize ; + void *Udiag ; + double *Rs ; + size_t worksize ; + void *Work, *Xwork ; + UF_long *Iwork ; + UF_long *Offp, *Offi ; + void *Offx ; + UF_long nzoff ; + +} klu_l_numeric ; + +/* -------------------------------------------------------------------------- */ +/* KLU control parameters and statistics */ +/* -------------------------------------------------------------------------- */ + +/* Common->status values */ +#define KLU_OK 0 +#define KLU_SINGULAR (1) /* status > 0 is a warning, not an error */ +#define KLU_OUT_OF_MEMORY (-2) +#define KLU_INVALID (-3) +#define KLU_TOO_LARGE (-4) /* integer overflow has occured */ + +typedef struct klu_common_struct +{ + + /* ---------------------------------------------------------------------- */ + /* parameters */ + /* ---------------------------------------------------------------------- */ + + double tol ; /* pivot tolerance for diagonal preference */ + double memgrow ; /* realloc memory growth size for LU factors */ + double initmem_amd ; /* init. memory size with AMD: c*nnz(L) + n */ + double initmem ; /* init. memory size: c*nnz(A) + n */ + double maxwork ; /* maxwork for BTF, <= 0 if no limit */ + + int btf ; /* use BTF pre-ordering, or not */ + int ordering ; /* 0: AMD, 1: COLAMD, 2: user P and Q, + * 3: user function */ + int scale ; /* row scaling: -1: none (and no error check), + * 0: none, 1: sum, 2: max */ + + /* memory management routines */ + void *(*malloc_memory) (size_t) ; /* pointer to malloc */ + void *(*realloc_memory) (void *, size_t) ; /* pointer to realloc */ + void (*free_memory) (void *) ; /* pointer to free */ + void *(*calloc_memory) (size_t, size_t) ; /* pointer to calloc */ + + /* pointer to user ordering function */ + int (*user_order) (int, int *, int *, int *, struct klu_common_struct *) ; + + /* pointer to user data, passed unchanged as the last parameter to the + * user ordering function (optional, the user function need not use this + * information). */ + void *user_data ; + + int halt_if_singular ; /* how to handle a singular matrix: + * FALSE: keep going. Return a Numeric object with a zero U(k,k). A + * divide-by-zero may occur when computing L(:,k). The Numeric object + * can be passed to klu_solve (a divide-by-zero will occur). It can + * also be safely passed to klu_refactor. + * TRUE: stop quickly. klu_factor will free the partially-constructed + * Numeric object. klu_refactor will not free it, but will leave the + * numerical values only partially defined. This is the default. */ + + /* ---------------------------------------------------------------------- */ + /* statistics */ + /* ---------------------------------------------------------------------- */ + + int status ; /* KLU_OK if OK, < 0 if error */ + int nrealloc ; /* # of reallocations of L and U */ + + int structural_rank ; /* 0 to n-1 if the matrix is structurally rank + * deficient (as determined by maxtrans). -1 if not computed. n if the + * matrix has full structural rank. This is computed by klu_analyze + * if a BTF preordering is requested. */ + + int numerical_rank ; /* First k for which a zero U(k,k) was found, + * if the matrix was singular (in the range 0 to n-1). n if the matrix + * has full rank. This is not a true rank-estimation. It just reports + * where the first zero pivot was found. -1 if not computed. + * Computed by klu_factor and klu_refactor. */ + + int singular_col ; /* n if the matrix is not singular. If in the + * range 0 to n-1, this is the column index of the original matrix A that + * corresponds to the column of U that contains a zero diagonal entry. + * -1 if not computed. Computed by klu_factor and klu_refactor. */ + + int noffdiag ; /* # of off-diagonal pivots, -1 if not computed */ + + double flops ; /* actual factorization flop count, from klu_flops */ + double rcond ; /* crude reciprocal condition est., from klu_rcond */ + double condest ; /* accurate condition est., from klu_condest */ + double rgrowth ; /* reciprocal pivot rgrowth, from klu_rgrowth */ + double work ; /* actual work done in BTF, in klu_analyze */ + + size_t memusage ; /* current memory usage, in bytes */ + size_t mempeak ; /* peak memory usage, in bytes */ + +} klu_common ; + +typedef struct klu_l_common_struct /* 64-bit version (otherwise same as above)*/ +{ + + double tol, memgrow, initmem_amd, initmem, maxwork ; + UF_long btf, ordering, scale ; + void *(*malloc_memory) (size_t) ; + void *(*realloc_memory) (void *, size_t) ; + void (*free_memory) (void *) ; + void *(*calloc_memory) (size_t, size_t) ; + UF_long (*user_order) (UF_long, UF_long *, UF_long *, UF_long *, + struct klu_l_common_struct *) ; + void *user_data ; + UF_long halt_if_singular ; + UF_long status, nrealloc, structural_rank, numerical_rank, singular_col, + noffdiag ; + double flops, rcond, condest, rgrowth, work ; + size_t memusage, mempeak ; + +} klu_l_common ; + +/* -------------------------------------------------------------------------- */ +/* klu_defaults: sets default control parameters */ +/* -------------------------------------------------------------------------- */ + +int klu_defaults +( + klu_common *Common +) ; + +UF_long klu_l_defaults (klu_l_common *Common) ; + +/* -------------------------------------------------------------------------- */ +/* klu_analyze: orders and analyzes a matrix */ +/* -------------------------------------------------------------------------- */ + +/* Order the matrix with BTF (or not), then order each block with AMD, COLAMD, + * a natural ordering, or with a user-provided ordering function */ + +klu_symbolic *klu_analyze +( + /* inputs, not modified */ + int n, /* A is n-by-n */ + int Ap [ ], /* size n+1, column pointers */ + int Ai [ ], /* size nz, row indices */ + klu_common *Common +) ; + +klu_l_symbolic *klu_l_analyze (UF_long, UF_long *, UF_long *, + klu_l_common *Common) ; + + +/* -------------------------------------------------------------------------- */ +/* klu_analyze_given: analyzes a matrix using given P and Q */ +/* -------------------------------------------------------------------------- */ + +/* Order the matrix with BTF (or not), then use natural or given ordering + * P and Q on the blocks. P and Q are interpretted as identity + * if NULL. */ + +klu_symbolic *klu_analyze_given +( + /* inputs, not modified */ + int n, /* A is n-by-n */ + int Ap [ ], /* size n+1, column pointers */ + int Ai [ ], /* size nz, row indices */ + int P [ ], /* size n, user's row permutation (may be NULL) */ + int Q [ ], /* size n, user's column permutation (may be NULL) */ + klu_common *Common +) ; + +klu_l_symbolic *klu_l_analyze_given (UF_long, UF_long *, UF_long *, UF_long *, + UF_long *, klu_l_common *) ; + + +/* -------------------------------------------------------------------------- */ +/* klu_factor: factors a matrix using the klu_analyze results */ +/* -------------------------------------------------------------------------- */ + +klu_numeric *klu_factor /* returns KLU_OK if OK, < 0 if error */ +( + /* inputs, not modified */ + int Ap [ ], /* size n+1, column pointers */ + int Ai [ ], /* size nz, row indices */ + double Ax [ ], /* size nz, numerical values */ + klu_symbolic *Symbolic, + klu_common *Common +) ; + +klu_numeric *klu_z_factor /* returns KLU_OK if OK, < 0 if error */ +( + /* inputs, not modified */ + int Ap [ ], /* size n+1, column pointers */ + int Ai [ ], /* size nz, row indices */ + double Ax [ ], /* size 2*nz, numerical values (real,imag pairs) */ + klu_symbolic *Symbolic, + klu_common *Common +) ; + +/* long / real version */ +klu_l_numeric *klu_l_factor (UF_long *, UF_long *, double *, klu_l_symbolic *, + klu_l_common *) ; + +/* long / complex version */ +klu_l_numeric *klu_zl_factor (UF_long *, UF_long *, double *, klu_l_symbolic *, + klu_l_common *) ; + + +/* -------------------------------------------------------------------------- */ +/* klu_solve: solves Ax=b using the Symbolic and Numeric objects */ +/* -------------------------------------------------------------------------- */ + +int klu_solve +( + /* inputs, not modified */ + klu_symbolic *Symbolic, + klu_numeric *Numeric, + int ldim, /* leading dimension of B */ + int nrhs, /* number of right-hand-sides */ + + /* right-hand-side on input, overwritten with solution to Ax=b on output */ + double B [ ], /* size ldim*nrhs */ + klu_common *Common +) ; + +int klu_z_solve +( + /* inputs, not modified */ + klu_symbolic *Symbolic, + klu_numeric *Numeric, + int ldim, /* leading dimension of B */ + int nrhs, /* number of right-hand-sides */ + + /* right-hand-side on input, overwritten with solution to Ax=b on output */ + double B [ ], /* size 2*ldim*nrhs */ + klu_common *Common +) ; + +UF_long klu_l_solve (klu_l_symbolic *, klu_l_numeric *, UF_long, UF_long, + double *, klu_l_common *) ; + +UF_long klu_zl_solve (klu_l_symbolic *, klu_l_numeric *, UF_long, UF_long, + double *, klu_l_common *) ; + + +/* -------------------------------------------------------------------------- */ +/* klu_tsolve: solves A'x=b using the Symbolic and Numeric objects */ +/* -------------------------------------------------------------------------- */ + +int klu_tsolve +( + /* inputs, not modified */ + klu_symbolic *Symbolic, + klu_numeric *Numeric, + int ldim, /* leading dimension of B */ + int nrhs, /* number of right-hand-sides */ + + /* right-hand-side on input, overwritten with solution to Ax=b on output */ + double B [ ], /* size ldim*nrhs */ + klu_common *Common +) ; + +int klu_z_tsolve +( + /* inputs, not modified */ + klu_symbolic *Symbolic, + klu_numeric *Numeric, + int ldim, /* leading dimension of B */ + int nrhs, /* number of right-hand-sides */ + + /* right-hand-side on input, overwritten with solution to Ax=b on output */ + double B [ ], /* size 2*ldim*nrhs */ + int conj_solve, /* TRUE: conjugate solve, FALSE: solve A.'x=b */ + klu_common *Common + +) ; + +UF_long klu_l_tsolve (klu_l_symbolic *, klu_l_numeric *, UF_long, UF_long, + double *, klu_l_common *) ; + +UF_long klu_zl_tsolve (klu_l_symbolic *, klu_l_numeric *, UF_long, UF_long, + double *, UF_long, klu_l_common * ) ; + + +/* -------------------------------------------------------------------------- */ +/* klu_refactor: refactorizes matrix with same ordering as klu_factor */ +/* -------------------------------------------------------------------------- */ + +int klu_refactor /* return TRUE if successful, FALSE otherwise */ +( + /* inputs, not modified */ + int Ap [ ], /* size n+1, column pointers */ + int Ai [ ], /* size nz, row indices */ + double Ax [ ], /* size nz, numerical values */ + klu_symbolic *Symbolic, + /* input, and numerical values modified on output */ + klu_numeric *Numeric, + klu_common *Common +) ; + +int klu_z_refactor /* return TRUE if successful, FALSE otherwise */ +( + /* inputs, not modified */ + int Ap [ ], /* size n+1, column pointers */ + int Ai [ ], /* size nz, row indices */ + double Ax [ ], /* size 2*nz, numerical values */ + klu_symbolic *Symbolic, + /* input, and numerical values modified on output */ + klu_numeric *Numeric, + klu_common *Common +) ; + +UF_long klu_l_refactor (UF_long *, UF_long *, double *, klu_l_symbolic *, + klu_l_numeric *, klu_l_common *) ; + +UF_long klu_zl_refactor (UF_long *, UF_long *, double *, klu_l_symbolic *, + klu_l_numeric *, klu_l_common *) ; + + +/* -------------------------------------------------------------------------- */ +/* klu_free_symbolic: destroys the Symbolic object */ +/* -------------------------------------------------------------------------- */ + +int klu_free_symbolic +( + klu_symbolic **Symbolic, + klu_common *Common +) ; + +UF_long klu_l_free_symbolic (klu_l_symbolic **, klu_l_common *) ; + + +/* -------------------------------------------------------------------------- */ +/* klu_free_numeric: destroys the Numeric object */ +/* -------------------------------------------------------------------------- */ + +/* Note that klu_free_numeric and klu_z_free_numeric are identical; each can + * free both kinds of Numeric objects (real and complex) */ + +int klu_free_numeric +( + klu_numeric **Numeric, + klu_common *Common +) ; + +int klu_z_free_numeric +( + klu_numeric **Numeric, + klu_common *Common +) ; + +UF_long klu_l_free_numeric (klu_l_numeric **, klu_l_common *) ; +UF_long klu_zl_free_numeric (klu_l_numeric **, klu_l_common *) ; + + +/* -------------------------------------------------------------------------- */ +/* klu_sort: sorts the columns of the LU factorization */ +/* -------------------------------------------------------------------------- */ + +/* this is not needed except for the MATLAB interface */ + +int klu_sort +( + /* inputs, not modified */ + klu_symbolic *Symbolic, + /* input/output */ + klu_numeric *Numeric, + klu_common *Common +) ; + +int klu_z_sort +( + /* inputs, not modified */ + klu_symbolic *Symbolic, + /* input/output */ + klu_numeric *Numeric, + klu_common *Common +) ; + +UF_long klu_l_sort (klu_l_symbolic *, klu_l_numeric *, klu_l_common *) ; +UF_long klu_zl_sort (klu_l_symbolic *, klu_l_numeric *, klu_l_common *) ; + + +/* -------------------------------------------------------------------------- */ +/* klu_flops: determines # of flops performed in numeric factorzation */ +/* -------------------------------------------------------------------------- */ + +int klu_flops +( + /* inputs, not modified */ + klu_symbolic *Symbolic, + klu_numeric *Numeric, + /* input/output */ + klu_common *Common +) ; + +int klu_z_flops +( + /* inputs, not modified */ + klu_symbolic *Symbolic, + klu_numeric *Numeric, + /* input/output */ + klu_common *Common +) ; + +UF_long klu_l_flops (klu_l_symbolic *, klu_l_numeric *, klu_l_common *) ; +UF_long klu_zl_flops (klu_l_symbolic *, klu_l_numeric *, klu_l_common *) ; + + + +/* -------------------------------------------------------------------------- */ +/* klu_rgrowth : compute the reciprocal pivot growth */ +/* -------------------------------------------------------------------------- */ + +/* Pivot growth is computed after the input matrix is permuted, scaled, and + * off-diagonal entries pruned. This is because the LU factorization of each + * block takes as input the scaled diagonal blocks of the BTF form. The + * reciprocal pivot growth in column j of an LU factorization of a matrix C + * is the largest entry in C divided by the largest entry in U; then the overall + * reciprocal pivot growth is the smallest such value for all columns j. Note + * that the off-diagonal entries are not scaled, since they do not take part in + * the LU factorization of the diagonal blocks. + * + * In MATLAB notation: + * + * rgrowth = min (max (abs ((R \ A(p,q)) - F)) ./ max (abs (U))) */ + +int klu_rgrowth +( + int Ap [ ], + int Ai [ ], + double Ax [ ], + klu_symbolic *Symbolic, + klu_numeric *Numeric, + klu_common *Common /* Common->rgrowth = reciprocal pivot growth */ +) ; + +int klu_z_rgrowth +( + int Ap [ ], + int Ai [ ], + double Ax [ ], + klu_symbolic *Symbolic, + klu_numeric *Numeric, + klu_common *Common /* Common->rgrowth = reciprocal pivot growth */ +) ; + +UF_long klu_l_rgrowth (UF_long *, UF_long *, double *, klu_l_symbolic *, + klu_l_numeric *, klu_l_common *) ; + +UF_long klu_zl_rgrowth (UF_long *, UF_long *, double *, klu_l_symbolic *, + klu_l_numeric *, klu_l_common *) ; + + +/* -------------------------------------------------------------------------- */ +/* klu_condest */ +/* -------------------------------------------------------------------------- */ + +/* Computes a reasonably accurate estimate of the 1-norm condition number, using + * Hager's method, as modified by Higham and Tisseur (same method as used in + * MATLAB's condest */ + +int klu_condest +( + int Ap [ ], /* size n+1, column pointers, not modified */ + double Ax [ ], /* size nz = Ap[n], numerical values, not modified*/ + klu_symbolic *Symbolic, /* symbolic analysis, not modified */ + klu_numeric *Numeric, /* numeric factorization, not modified */ + klu_common *Common /* result returned in Common->condest */ +) ; + +int klu_z_condest +( + int Ap [ ], + double Ax [ ], /* size 2*nz */ + klu_symbolic *Symbolic, + klu_numeric *Numeric, + klu_common *Common /* result returned in Common->condest */ +) ; + +UF_long klu_l_condest (UF_long *, double *, klu_l_symbolic *, klu_l_numeric *, + klu_l_common *) ; + +UF_long klu_zl_condest (UF_long *, double *, klu_l_symbolic *, klu_l_numeric *, + klu_l_common *) ; + + +/* -------------------------------------------------------------------------- */ +/* klu_rcond: compute min(abs(diag(U))) / max(abs(diag(U))) */ +/* -------------------------------------------------------------------------- */ + +int klu_rcond +( + klu_symbolic *Symbolic, /* input, not modified */ + klu_numeric *Numeric, /* input, not modified */ + klu_common *Common /* result in Common->rcond */ +) ; + +int klu_z_rcond +( + klu_symbolic *Symbolic, /* input, not modified */ + klu_numeric *Numeric, /* input, not modified */ + klu_common *Common /* result in Common->rcond */ +) ; + +UF_long klu_l_rcond (klu_l_symbolic *, klu_l_numeric *, klu_l_common *) ; + +UF_long klu_zl_rcond (klu_l_symbolic *, klu_l_numeric *, klu_l_common *) ; + + + +/* -------------------------------------------------------------------------- */ +/* klu_scale */ +/* -------------------------------------------------------------------------- */ + +int klu_scale /* return TRUE if successful, FALSE otherwise */ +( + /* inputs, not modified */ + int scale, /* <0: none, no error check; 0: none, 1: sum, 2: max */ + int n, + int Ap [ ], /* size n+1, column pointers */ + int Ai [ ], /* size nz, row indices */ + double Ax [ ], + /* outputs, not defined on input */ + double Rs [ ], + /* workspace, not defined on input or output */ + int W [ ], /* size n, can be NULL */ + klu_common *Common +) ; + +int klu_z_scale /* return TRUE if successful, FALSE otherwise */ +( + /* inputs, not modified */ + int scale, /* <0: none, no error check; 0: none, 1: sum, 2: max */ + int n, + int Ap [ ], /* size n+1, column pointers */ + int Ai [ ], /* size nz, row indices */ + double Ax [ ], + /* outputs, not defined on input */ + double Rs [ ], + /* workspace, not defined on input or output */ + int W [ ], /* size n, can be NULL */ + klu_common *Common +) ; + +UF_long klu_l_scale (UF_long, UF_long, UF_long *, UF_long *, double *, + double *, UF_long *, klu_l_common *) ; + +UF_long klu_zl_scale (UF_long, UF_long, UF_long *, UF_long *, double *, + double *, UF_long *, klu_l_common *) ; + + +/* -------------------------------------------------------------------------- */ +/* klu_extract */ +/* -------------------------------------------------------------------------- */ + +int klu_extract /* returns TRUE if successful, FALSE otherwise */ +( + /* inputs: */ + klu_numeric *Numeric, + klu_symbolic *Symbolic, + + /* outputs, either allocated on input, or ignored otherwise */ + + /* L */ + int *Lp, /* size n+1 */ + int *Li, /* size Numeric->lnz */ + double *Lx, /* size Numeric->lnz */ + + /* U */ + int *Up, /* size n+1 */ + int *Ui, /* size Numeric->unz */ + double *Ux, /* size Numeric->unz */ + + /* F */ + int *Fp, /* size n+1 */ + int *Fi, /* size Numeric->nzoff */ + double *Fx, /* size Numeric->nzoff */ + + /* P, row permutation */ + int *P, /* size n */ + + /* Q, column permutation */ + int *Q, /* size n */ + + /* Rs, scale factors */ + double *Rs, /* size n */ + + /* R, block boundaries */ + int *R, /* size Symbolic->nblocks+1 (nblocks is at most n) */ + + klu_common *Common +) ; + + +int klu_z_extract /* returns TRUE if successful, FALSE otherwise */ +( + /* inputs: */ + klu_numeric *Numeric, + klu_symbolic *Symbolic, + + /* outputs, all of which must be allocated on input */ + + /* L */ + int *Lp, /* size n+1 */ + int *Li, /* size nnz(L) */ + double *Lx, /* size nnz(L) */ + double *Lz, /* size nnz(L) for the complex case, ignored if real */ + + /* U */ + int *Up, /* size n+1 */ + int *Ui, /* size nnz(U) */ + double *Ux, /* size nnz(U) */ + double *Uz, /* size nnz(U) for the complex case, ignored if real */ + + /* F */ + int *Fp, /* size n+1 */ + int *Fi, /* size nnz(F) */ + double *Fx, /* size nnz(F) */ + double *Fz, /* size nnz(F) for the complex case, ignored if real */ + + /* P, row permutation */ + int *P, /* size n */ + + /* Q, column permutation */ + int *Q, /* size n */ + + /* Rs, scale factors */ + double *Rs, /* size n */ + + /* R, block boundaries */ + int *R, /* size Symbolic->nblocks+1 (nblocks is at most n) */ + + klu_common *Common +) ; + +UF_long klu_l_extract (klu_l_numeric *, klu_l_symbolic *, + UF_long *, UF_long *, double *, + UF_long *, UF_long *, double *, + UF_long *, UF_long *, double *, + UF_long *, UF_long *, double *, UF_long *, klu_l_common *) ; + +UF_long klu_zl_extract (klu_l_numeric *, klu_l_symbolic *, + UF_long *, UF_long *, double *, double *, + UF_long *, UF_long *, double *, double *, + UF_long *, UF_long *, double *, double *, + UF_long *, UF_long *, double *, UF_long *, klu_l_common *) ; + + +/* -------------------------------------------------------------------------- */ +/* KLU memory management routines */ +/* -------------------------------------------------------------------------- */ + +void *klu_malloc /* returns pointer to the newly malloc'd block */ +( + /* ---- input ---- */ + size_t n, /* number of items */ + size_t size, /* size of each item */ + /* --------------- */ + klu_common *Common +) ; + +void *klu_free /* always returns NULL */ +( + /* ---- in/out --- */ + void *p, /* block of memory to free */ + size_t n, /* number of items */ + size_t size, /* size of each item */ + /* --------------- */ + klu_common *Common +) ; + +void *klu_realloc /* returns pointer to reallocated block */ +( + /* ---- input ---- */ + size_t nnew, /* requested # of items in reallocated block */ + size_t nold, /* current size of block, in # of items */ + size_t size, /* size of each item */ + /* ---- in/out --- */ + void *p, /* block of memory to realloc */ + /* --------------- */ + klu_common *Common +) ; + +void *klu_l_malloc (size_t, size_t, klu_l_common *) ; +void *klu_l_free (void *, size_t, size_t, klu_l_common *) ; +void *klu_l_realloc (size_t, size_t, size_t, void *, klu_l_common *) ; + + +/* ========================================================================== */ +/* === KLU version ========================================================== */ +/* ========================================================================== */ + +/* All versions of KLU include these definitions. + * As an example, to test if the version you are using is 1.2 or later: + * + * if (KLU_VERSION >= KLU_VERSION_CODE (1,2)) ... + * + * This also works during compile-time: + * + * #if (KLU >= KLU_VERSION_CODE (1,2)) + * printf ("This is version 1.2 or later\n") ; + * #else + * printf ("This is an early version\n") ; + * #endif + */ + +#define KLU_DATE "Dec 7, 2011" +#define KLU_VERSION_CODE(main,sub) ((main) * 1000 + (sub)) +#define KLU_MAIN_VERSION 1 +#define KLU_SUB_VERSION 1 +#define KLU_SUBSUB_VERSION 3 +#define KLU_VERSION KLU_VERSION_CODE(KLU_MAIN_VERSION,KLU_SUB_VERSION) + +#ifdef __cplusplus +} +#endif +#endif diff --git a/src/maths/KLU/klu_analyze.c b/src/maths/KLU/klu_analyze.c new file mode 100644 index 000000000..504b3a1af --- /dev/null +++ b/src/maths/KLU/klu_analyze.c @@ -0,0 +1,488 @@ +/* ========================================================================== */ +/* === klu_analyze ========================================================== */ +/* ========================================================================== */ + +/* Order the matrix using BTF (or not), and then AMD, COLAMD, the natural + * ordering, or the user-provided-function on the blocks. Does not support + * using a given ordering (use klu_analyze_given for that case). */ + +#include "klu_internal.h" + +/* ========================================================================== */ +/* === analyze_worker ======================================================= */ +/* ========================================================================== */ + +static Int analyze_worker /* returns KLU_OK or < 0 if error */ +( + /* inputs, not modified */ + Int n, /* A is n-by-n */ + Int Ap [ ], /* size n+1, column pointers */ + Int Ai [ ], /* size nz, row indices */ + Int nblocks, /* # of blocks */ + Int Pbtf [ ], /* BTF row permutation */ + Int Qbtf [ ], /* BTF col permutation */ + Int R [ ], /* size n+1, but only Rbtf [0..nblocks] is used */ + Int ordering, /* what ordering to use (0, 1, or 3 for this routine) */ + + /* output only, not defined on input */ + Int P [ ], /* size n */ + Int Q [ ], /* size n */ + double Lnz [ ], /* size n, but only Lnz [0..nblocks-1] is used */ + + /* workspace, not defined on input or output */ + Int Pblk [ ], /* size maxblock */ + Int Cp [ ], /* size maxblock+1 */ + Int Ci [ ], /* size MAX (nz+1, Cilen) */ + Int Cilen, /* nz+1, or COLAMD_recommend(nz,n,n) for COLAMD */ + Int Pinv [ ], /* size maxblock */ + + /* input/output */ + KLU_symbolic *Symbolic, + KLU_common *Common +) +{ + double amd_Info [AMD_INFO], lnz, lnz1, flops, flops1 ; + Int k1, k2, nk, k, block, oldcol, pend, newcol, result, pc, p, newrow, + maxnz, nzoff, cstats [COLAMD_STATS], ok, err = KLU_INVALID ; + + /* ---------------------------------------------------------------------- */ + /* initializations */ + /* ---------------------------------------------------------------------- */ + + /* compute the inverse of Pbtf */ +#ifndef NDEBUG + for (k = 0 ; k < n ; k++) + { + P [k] = EMPTY ; + Q [k] = EMPTY ; + Pinv [k] = EMPTY ; + } +#endif + for (k = 0 ; k < n ; k++) + { + ASSERT (Pbtf [k] >= 0 && Pbtf [k] < n) ; + Pinv [Pbtf [k]] = k ; + } +#ifndef NDEBUG + for (k = 0 ; k < n ; k++) ASSERT (Pinv [k] != EMPTY) ; +#endif + nzoff = 0 ; + lnz = 0 ; + maxnz = 0 ; + flops = 0 ; + Symbolic->symmetry = EMPTY ; /* only computed by AMD */ + + /* ---------------------------------------------------------------------- */ + /* order each block */ + /* ---------------------------------------------------------------------- */ + + for (block = 0 ; block < nblocks ; block++) + { + + /* ------------------------------------------------------------------ */ + /* the block is from rows/columns k1 to k2-1 */ + /* ------------------------------------------------------------------ */ + + k1 = R [block] ; + k2 = R [block+1] ; + nk = k2 - k1 ; + PRINTF (("BLOCK %d, k1 %d k2-1 %d nk %d\n", block, k1, k2-1, nk)) ; + + /* ------------------------------------------------------------------ */ + /* construct the kth block, C */ + /* ------------------------------------------------------------------ */ + + Lnz [block] = EMPTY ; + pc = 0 ; + for (k = k1 ; k < k2 ; k++) + { + newcol = k-k1 ; + Cp [newcol] = pc ; + oldcol = Qbtf [k] ; + pend = Ap [oldcol+1] ; + for (p = Ap [oldcol] ; p < pend ; p++) + { + newrow = Pinv [Ai [p]] ; + if (newrow < k1) + { + nzoff++ ; + } + else + { + /* (newrow,newcol) is an entry in the block */ + ASSERT (newrow < k2) ; + newrow -= k1 ; + Ci [pc++] = newrow ; + } + } + } + Cp [nk] = pc ; + maxnz = MAX (maxnz, pc) ; + ASSERT (KLU_valid (nk, Cp, Ci, NULL)) ; + + /* ------------------------------------------------------------------ */ + /* order the block C */ + /* ------------------------------------------------------------------ */ + + if (nk <= 3) + { + + /* -------------------------------------------------------------- */ + /* use natural ordering for tiny blocks (3-by-3 or less) */ + /* -------------------------------------------------------------- */ + + for (k = 0 ; k < nk ; k++) + { + Pblk [k] = k ; + } + lnz1 = nk * (nk + 1) / 2 ; + flops1 = nk * (nk - 1) / 2 + (nk-1)*nk*(2*nk-1) / 6 ; + ok = TRUE ; + + } + else if (ordering == 0) + { + + /* -------------------------------------------------------------- */ + /* order the block with AMD (C+C') */ + /* -------------------------------------------------------------- */ + + result = AMD_order (nk, Cp, Ci, Pblk, NULL, amd_Info) ; + ok = (result >= AMD_OK) ; + if (result == AMD_OUT_OF_MEMORY) + { + err = KLU_OUT_OF_MEMORY ; + } + + /* account for memory usage in AMD */ + Common->mempeak = MAX (Common->mempeak, + Common->memusage + amd_Info [AMD_MEMORY]) ; + + /* get the ordering statistics from AMD */ + lnz1 = (Int) (amd_Info [AMD_LNZ]) + nk ; + flops1 = 2 * amd_Info [AMD_NMULTSUBS_LU] + amd_Info [AMD_NDIV] ; + if (pc == maxnz) + { + /* get the symmetry of the biggest block */ + Symbolic->symmetry = amd_Info [AMD_SYMMETRY] ; + } + + } + else if (ordering == 1) + { + + /* -------------------------------------------------------------- */ + /* order the block with COLAMD (C) */ + /* -------------------------------------------------------------- */ + + /* order (and destroy) Ci, returning column permutation in Cp. + * COLAMD "cannot" fail since the matrix has already been checked, + * and Ci allocated. */ + + ok = COLAMD (nk, nk, Cilen, Ci, Cp, NULL, cstats) ; + lnz1 = EMPTY ; + flops1 = EMPTY ; + + /* copy the permutation from Cp to Pblk */ + for (k = 0 ; k < nk ; k++) + { + Pblk [k] = Cp [k] ; + } + + } + else + { + + /* -------------------------------------------------------------- */ + /* pass the block to the user-provided ordering function */ + /* -------------------------------------------------------------- */ + + lnz1 = (Common->user_order) (nk, Cp, Ci, Pblk, Common) ; + flops1 = EMPTY ; + ok = (lnz1 != 0) ; + } + + if (!ok) + { + return (err) ; /* ordering method failed */ + } + + /* ------------------------------------------------------------------ */ + /* keep track of nnz(L) and flops statistics */ + /* ------------------------------------------------------------------ */ + + Lnz [block] = lnz1 ; + lnz = (lnz == EMPTY || lnz1 == EMPTY) ? EMPTY : (lnz + lnz1) ; + flops = (flops == EMPTY || flops1 == EMPTY) ? EMPTY : (flops + flops1) ; + + /* ------------------------------------------------------------------ */ + /* combine the preordering with the BTF ordering */ + /* ------------------------------------------------------------------ */ + + PRINTF (("Pblk, 1-based:\n")) ; + for (k = 0 ; k < nk ; k++) + { + ASSERT (k + k1 < n) ; + ASSERT (Pblk [k] + k1 < n) ; + Q [k + k1] = Qbtf [Pblk [k] + k1] ; + } + for (k = 0 ; k < nk ; k++) + { + ASSERT (k + k1 < n) ; + ASSERT (Pblk [k] + k1 < n) ; + P [k + k1] = Pbtf [Pblk [k] + k1] ; + } + } + + PRINTF (("nzoff %d Ap[n] %d\n", nzoff, Ap [n])) ; + ASSERT (nzoff >= 0 && nzoff <= Ap [n]) ; + + /* return estimates of # of nonzeros in L including diagonal */ + Symbolic->lnz = lnz ; /* EMPTY if COLAMD used */ + Symbolic->unz = lnz ; + Symbolic->nzoff = nzoff ; + Symbolic->est_flops = flops ; /* EMPTY if COLAMD or user-ordering used */ + return (KLU_OK) ; +} + + +/* ========================================================================== */ +/* === order_and_analyze ==================================================== */ +/* ========================================================================== */ + +/* Orders the matrix with or with BTF, then orders each block with AMD, COLAMD, + * or the user ordering function. Does not handle the natural or given + * ordering cases. */ + +static KLU_symbolic *order_and_analyze /* returns NULL if error, or a valid + KLU_symbolic object if successful */ +( + /* inputs, not modified */ + Int n, /* A is n-by-n */ + Int Ap [ ], /* size n+1, column pointers */ + Int Ai [ ], /* size nz, row indices */ + /* --------------------- */ + KLU_common *Common +) +{ + double work ; + KLU_symbolic *Symbolic ; + double *Lnz ; + Int *Qbtf, *Cp, *Ci, *Pinv, *Pblk, *Pbtf, *P, *Q, *R ; + Int nblocks, nz, block, maxblock, k1, k2, nk, do_btf, ordering, k, Cilen, + *Work ; + + /* ---------------------------------------------------------------------- */ + /* allocate the Symbolic object, and check input matrix */ + /* ---------------------------------------------------------------------- */ + + Symbolic = KLU_alloc_symbolic (n, Ap, Ai, Common) ; + if (Symbolic == NULL) + { + return (NULL) ; + } + P = Symbolic->P ; + Q = Symbolic->Q ; + R = Symbolic->R ; + Lnz = Symbolic->Lnz ; + nz = Symbolic->nz ; + + ordering = Common->ordering ; + if (ordering == 1) + { + /* COLAMD */ + Cilen = COLAMD_recommended (nz, n, n) ; + } + else if (ordering == 0 || (ordering == 3 && Common->user_order != NULL)) + { + /* AMD or user ordering function */ + Cilen = nz+1 ; + } + else + { + /* invalid ordering */ + Common->status = KLU_INVALID ; + KLU_free_symbolic (&Symbolic, Common) ; + return (NULL) ; + } + + /* AMD memory management routines */ + amd_malloc = Common->malloc_memory ; + amd_free = Common->free_memory ; + amd_calloc = Common->calloc_memory ; + amd_realloc = Common->realloc_memory ; + + /* ---------------------------------------------------------------------- */ + /* allocate workspace for BTF permutation */ + /* ---------------------------------------------------------------------- */ + + Pbtf = KLU_malloc (n, sizeof (Int), Common) ; + Qbtf = KLU_malloc (n, sizeof (Int), Common) ; + if (Common->status < KLU_OK) + { + KLU_free (Pbtf, n, sizeof (Int), Common) ; + KLU_free (Qbtf, n, sizeof (Int), Common) ; + KLU_free_symbolic (&Symbolic, Common) ; + return (NULL) ; + } + + /* ---------------------------------------------------------------------- */ + /* get the common parameters for BTF and ordering method */ + /* ---------------------------------------------------------------------- */ + + do_btf = Common->btf ; + do_btf = (do_btf) ? TRUE : FALSE ; + Symbolic->ordering = ordering ; + Symbolic->do_btf = do_btf ; + Symbolic->structural_rank = EMPTY ; + + /* ---------------------------------------------------------------------- */ + /* find the block triangular form (if requested) */ + /* ---------------------------------------------------------------------- */ + + Common->work = 0 ; + + if (do_btf) + { + Work = KLU_malloc (5*n, sizeof (Int), Common) ; + if (Common->status < KLU_OK) + { + /* out of memory */ + KLU_free (Pbtf, n, sizeof (Int), Common) ; + KLU_free (Qbtf, n, sizeof (Int), Common) ; + KLU_free_symbolic (&Symbolic, Common) ; + return (NULL) ; + } + + nblocks = BTF_order (n, Ap, Ai, Common->maxwork, &work, Pbtf, Qbtf, R, + &(Symbolic->structural_rank), Work) ; + Common->structural_rank = Symbolic->structural_rank ; + Common->work += work ; + + KLU_free (Work, 5*n, sizeof (Int), Common) ; + + /* unflip Qbtf if the matrix does not have full structural rank */ + if (Symbolic->structural_rank < n) + { + for (k = 0 ; k < n ; k++) + { + Qbtf [k] = BTF_UNFLIP (Qbtf [k]) ; + } + } + + /* find the size of the largest block */ + maxblock = 1 ; + for (block = 0 ; block < nblocks ; block++) + { + k1 = R [block] ; + k2 = R [block+1] ; + nk = k2 - k1 ; + PRINTF (("block %d size %d\n", block, nk)) ; + maxblock = MAX (maxblock, nk) ; + } + } + else + { + /* BTF not requested */ + nblocks = 1 ; + maxblock = n ; + R [0] = 0 ; + R [1] = n ; + for (k = 0 ; k < n ; k++) + { + Pbtf [k] = k ; + Qbtf [k] = k ; + } + } + + Symbolic->nblocks = nblocks ; + + PRINTF (("maxblock size %d\n", maxblock)) ; + Symbolic->maxblock = maxblock ; + + /* ---------------------------------------------------------------------- */ + /* allocate more workspace, for analyze_worker */ + /* ---------------------------------------------------------------------- */ + + Pblk = KLU_malloc (maxblock, sizeof (Int), Common) ; + Cp = KLU_malloc (maxblock + 1, sizeof (Int), Common) ; + Ci = KLU_malloc (MAX (Cilen, nz+1), sizeof (Int), Common) ; + Pinv = KLU_malloc (n, sizeof (Int), Common) ; + + /* ---------------------------------------------------------------------- */ + /* order each block of the BTF ordering, and a fill-reducing ordering */ + /* ---------------------------------------------------------------------- */ + + if (Common->status == KLU_OK) + { + PRINTF (("calling analyze_worker\n")) ; + Common->status = analyze_worker (n, Ap, Ai, nblocks, Pbtf, Qbtf, R, + ordering, P, Q, Lnz, Pblk, Cp, Ci, Cilen, Pinv, Symbolic, Common) ; + PRINTF (("analyze_worker done\n")) ; + } + + /* ---------------------------------------------------------------------- */ + /* free all workspace */ + /* ---------------------------------------------------------------------- */ + + KLU_free (Pblk, maxblock, sizeof (Int), Common) ; + KLU_free (Cp, maxblock+1, sizeof (Int), Common) ; + KLU_free (Ci, MAX (Cilen, nz+1), sizeof (Int), Common) ; + KLU_free (Pinv, n, sizeof (Int), Common) ; + KLU_free (Pbtf, n, sizeof (Int), Common) ; + KLU_free (Qbtf, n, sizeof (Int), Common) ; + + /* ---------------------------------------------------------------------- */ + /* return the symbolic object */ + /* ---------------------------------------------------------------------- */ + + if (Common->status < KLU_OK) + { + KLU_free_symbolic (&Symbolic, Common) ; + } + return (Symbolic) ; +} + + +/* ========================================================================== */ +/* === KLU_analyze ========================================================== */ +/* ========================================================================== */ + +KLU_symbolic *KLU_analyze /* returns NULL if error, or a valid + KLU_symbolic object if successful */ +( + /* inputs, not modified */ + Int n, /* A is n-by-n */ + Int Ap [ ], /* size n+1, column pointers */ + Int Ai [ ], /* size nz, row indices */ + /* -------------------- */ + KLU_common *Common +) +{ + + /* ---------------------------------------------------------------------- */ + /* get the control parameters for BTF and ordering method */ + /* ---------------------------------------------------------------------- */ + + if (Common == NULL) + { + return (NULL) ; + } + Common->status = KLU_OK ; + Common->structural_rank = EMPTY ; + + /* ---------------------------------------------------------------------- */ + /* order and analyze */ + /* ---------------------------------------------------------------------- */ + + if (Common->ordering == 2) + { + /* natural ordering */ + return (KLU_analyze_given (n, Ap, Ai, NULL, NULL, Common)) ; + } + else + { + /* order with P and Q */ + return (order_and_analyze (n, Ap, Ai, Common)) ; + } +} diff --git a/src/maths/KLU/klu_analyze_given.c b/src/maths/KLU/klu_analyze_given.c new file mode 100644 index 000000000..bee547345 --- /dev/null +++ b/src/maths/KLU/klu_analyze_given.c @@ -0,0 +1,369 @@ +/* ========================================================================== */ +/* === klu_analyze_given ==================================================== */ +/* ========================================================================== */ + +/* Given an input permutation P and Q, create the Symbolic object. BTF can + * be done to modify the user's P and Q (does not perform the max transversal; + * just finds the strongly-connected components). */ + +#include "klu_internal.h" + +/* ========================================================================== */ +/* === klu_alloc_symbolic =================================================== */ +/* ========================================================================== */ + +/* Allocate Symbolic object, and check input matrix. Not user callable. */ + +KLU_symbolic *KLU_alloc_symbolic +( + Int n, + Int *Ap, + Int *Ai, + KLU_common *Common +) +{ + KLU_symbolic *Symbolic ; + Int *P, *Q, *R ; + double *Lnz ; + Int nz, i, j, p, pend ; + + if (Common == NULL) + { + return (NULL) ; + } + Common->status = KLU_OK ; + + /* A is n-by-n, with n > 0. Ap [0] = 0 and nz = Ap [n] >= 0 required. + * Ap [j] <= Ap [j+1] must hold for all j = 0 to n-1. Row indices in Ai + * must be in the range 0 to n-1, and no duplicate entries can be present. + * The list of row indices in each column of A need not be sorted. + */ + + if (n <= 0 || Ap == NULL || Ai == NULL) + { + /* Ap and Ai must be present, and n must be > 0 */ + Common->status = KLU_INVALID ; + return (NULL) ; + } + + nz = Ap [n] ; + if (Ap [0] != 0 || nz < 0) + { + /* nz must be >= 0 and Ap [0] must equal zero */ + Common->status = KLU_INVALID ; + return (NULL) ; + } + + for (j = 0 ; j < n ; j++) + { + if (Ap [j] > Ap [j+1]) + { + /* column pointers must be non-decreasing */ + Common->status = KLU_INVALID ; + return (NULL) ; + } + } + P = KLU_malloc (n, sizeof (Int), Common) ; + if (Common->status < KLU_OK) + { + /* out of memory */ + Common->status = KLU_OUT_OF_MEMORY ; + return (NULL) ; + } + for (i = 0 ; i < n ; i++) + { + P [i] = EMPTY ; + } + for (j = 0 ; j < n ; j++) + { + pend = Ap [j+1] ; + for (p = Ap [j] ; p < pend ; p++) + { + i = Ai [p] ; + if (i < 0 || i >= n || P [i] == j) + { + /* row index out of range, or duplicate entry */ + KLU_free (P, n, sizeof (Int), Common) ; + Common->status = KLU_INVALID ; + return (NULL) ; + } + /* flag row i as appearing in column j */ + P [i] = j ; + } + } + + /* ---------------------------------------------------------------------- */ + /* allocate the Symbolic object */ + /* ---------------------------------------------------------------------- */ + + Symbolic = KLU_malloc (sizeof (KLU_symbolic), 1, Common) ; + if (Common->status < KLU_OK) + { + /* out of memory */ + KLU_free (P, n, sizeof (Int), Common) ; + Common->status = KLU_OUT_OF_MEMORY ; + return (NULL) ; + } + + Q = KLU_malloc (n, sizeof (Int), Common) ; + R = KLU_malloc (n+1, sizeof (Int), Common) ; + Lnz = KLU_malloc (n, sizeof (double), Common) ; + + Symbolic->n = n ; + Symbolic->nz = nz ; + Symbolic->P = P ; + Symbolic->Q = Q ; + Symbolic->R = R ; + Symbolic->Lnz = Lnz ; + + if (Common->status < KLU_OK) + { + /* out of memory */ + KLU_free_symbolic (&Symbolic, Common) ; + Common->status = KLU_OUT_OF_MEMORY ; + return (NULL) ; + } + + return (Symbolic) ; +} + + +/* ========================================================================== */ +/* === KLU_analyze_given ==================================================== */ +/* ========================================================================== */ + +KLU_symbolic *KLU_analyze_given /* returns NULL if error, or a valid + KLU_symbolic object if successful */ +( + /* inputs, not modified */ + Int n, /* A is n-by-n */ + Int Ap [ ], /* size n+1, column pointers */ + Int Ai [ ], /* size nz, row indices */ + Int Puser [ ], /* size n, user's row permutation (may be NULL) */ + Int Quser [ ], /* size n, user's column permutation (may be NULL) */ + /* -------------------- */ + KLU_common *Common +) +{ + KLU_symbolic *Symbolic ; + double *Lnz ; + Int nblocks, nz, block, maxblock, *P, *Q, *R, nzoff, p, pend, do_btf, k ; + + /* ---------------------------------------------------------------------- */ + /* determine if input matrix is valid, and get # of nonzeros */ + /* ---------------------------------------------------------------------- */ + + Symbolic = KLU_alloc_symbolic (n, Ap, Ai, Common) ; + if (Symbolic == NULL) + { + return (NULL) ; + } + P = Symbolic->P ; + Q = Symbolic->Q ; + R = Symbolic->R ; + Lnz = Symbolic->Lnz ; + nz = Symbolic->nz ; + + /* ---------------------------------------------------------------------- */ + /* Q = Quser, or identity if Quser is NULL */ + /* ---------------------------------------------------------------------- */ + + if (Quser == (Int *) NULL) + { + for (k = 0 ; k < n ; k++) + { + Q [k] = k ; + } + } + else + { + for (k = 0 ; k < n ; k++) + { + Q [k] = Quser [k] ; + } + } + + /* ---------------------------------------------------------------------- */ + /* get the control parameters for BTF and ordering method */ + /* ---------------------------------------------------------------------- */ + + do_btf = Common->btf ; + do_btf = (do_btf) ? TRUE : FALSE ; + Symbolic->ordering = 2 ; + Symbolic->do_btf = do_btf ; + + /* ---------------------------------------------------------------------- */ + /* find the block triangular form, if requested */ + /* ---------------------------------------------------------------------- */ + + if (do_btf) + { + + /* ------------------------------------------------------------------ */ + /* get workspace for BTF_strongcomp */ + /* ------------------------------------------------------------------ */ + + Int *Pinv, *Work, *Bi, k1, k2, nk, oldcol ; + + Work = KLU_malloc (4*n, sizeof (Int), Common) ; + Pinv = KLU_malloc (n, sizeof (Int), Common) ; + if (Puser != (Int *) NULL) + { + Bi = KLU_malloc (nz+1, sizeof (Int), Common) ; + } + else + { + Bi = Ai ; + } + + if (Common->status < KLU_OK) + { + /* out of memory */ + KLU_free (Work, 4*n, sizeof (Int), Common) ; + KLU_free (Pinv, n, sizeof (Int), Common) ; + if (Puser != (Int *) NULL) + { + KLU_free (Bi, nz+1, sizeof (Int), Common) ; + } + KLU_free_symbolic (&Symbolic, Common) ; + Common->status = KLU_OUT_OF_MEMORY ; + return (NULL) ; + } + + /* ------------------------------------------------------------------ */ + /* B = Puser * A */ + /* ------------------------------------------------------------------ */ + + if (Puser != (Int *) NULL) + { + for (k = 0 ; k < n ; k++) + { + Pinv [Puser [k]] = k ; + } + for (p = 0 ; p < nz ; p++) + { + Bi [p] = Pinv [Ai [p]] ; + } + } + + /* ------------------------------------------------------------------ */ + /* find the strongly-connected components */ + /* ------------------------------------------------------------------ */ + + /* modifies Q, and determines P and R */ + nblocks = BTF_strongcomp (n, Ap, Bi, Q, P, R, Work) ; + + /* ------------------------------------------------------------------ */ + /* P = P * Puser */ + /* ------------------------------------------------------------------ */ + + if (Puser != (Int *) NULL) + { + for (k = 0 ; k < n ; k++) + { + Work [k] = Puser [P [k]] ; + } + for (k = 0 ; k < n ; k++) + { + P [k] = Work [k] ; + } + } + + /* ------------------------------------------------------------------ */ + /* Pinv = inverse of P */ + /* ------------------------------------------------------------------ */ + + for (k = 0 ; k < n ; k++) + { + Pinv [P [k]] = k ; + } + + /* ------------------------------------------------------------------ */ + /* analyze each block */ + /* ------------------------------------------------------------------ */ + + nzoff = 0 ; /* nz in off-diagonal part */ + maxblock = 1 ; /* size of the largest block */ + + for (block = 0 ; block < nblocks ; block++) + { + + /* -------------------------------------------------------------- */ + /* the block is from rows/columns k1 to k2-1 */ + /* -------------------------------------------------------------- */ + + k1 = R [block] ; + k2 = R [block+1] ; + nk = k2 - k1 ; + PRINTF (("BLOCK %d, k1 %d k2-1 %d nk %d\n", block, k1, k2-1, nk)) ; + maxblock = MAX (maxblock, nk) ; + + /* -------------------------------------------------------------- */ + /* scan the kth block, C */ + /* -------------------------------------------------------------- */ + + for (k = k1 ; k < k2 ; k++) + { + oldcol = Q [k] ; + pend = Ap [oldcol+1] ; + for (p = Ap [oldcol] ; p < pend ; p++) + { + if (Pinv [Ai [p]] < k1) + { + nzoff++ ; + } + } + } + + /* fill-in not estimated */ + Lnz [block] = EMPTY ; + } + + /* ------------------------------------------------------------------ */ + /* free all workspace */ + /* ------------------------------------------------------------------ */ + + KLU_free (Work, 4*n, sizeof (Int), Common) ; + KLU_free (Pinv, n, sizeof (Int), Common) ; + if (Puser != (Int *) NULL) + { + KLU_free (Bi, nz+1, sizeof (Int), Common) ; + } + + } + else + { + + /* ------------------------------------------------------------------ */ + /* BTF not requested */ + /* ------------------------------------------------------------------ */ + + nzoff = 0 ; + nblocks = 1 ; + maxblock = n ; + R [0] = 0 ; + R [1] = n ; + Lnz [0] = EMPTY ; + + /* ------------------------------------------------------------------ */ + /* P = Puser, or identity if Puser is NULL */ + /* ------------------------------------------------------------------ */ + + for (k = 0 ; k < n ; k++) + { + P [k] = (Puser == NULL) ? k : Puser [k] ; + } + } + + /* ---------------------------------------------------------------------- */ + /* return the symbolic object */ + /* ---------------------------------------------------------------------- */ + + Symbolic->nblocks = nblocks ; + Symbolic->maxblock = maxblock ; + Symbolic->lnz = EMPTY ; + Symbolic->unz = EMPTY ; + Symbolic->nzoff = nzoff ; + + return (Symbolic) ; +} diff --git a/src/maths/KLU/klu_defaults.c b/src/maths/KLU/klu_defaults.c new file mode 100644 index 000000000..0369a37f3 --- /dev/null +++ b/src/maths/KLU/klu_defaults.c @@ -0,0 +1,60 @@ +/* ========================================================================== */ +/* === KLU_defaults ========================================================= */ +/* ========================================================================== */ + +/* Sets default parameters for KLU */ + +#include "klu_internal.h" + +Int KLU_defaults +( + KLU_common *Common +) +{ + if (Common == NULL) + { + return (FALSE) ; + } + + /* parameters */ + Common->tol = 0.001 ; /* pivot tolerance for diagonal */ + Common->memgrow = 1.2; /* realloc size ratio increase for LU factors */ + Common->initmem_amd = 1.2 ; /* init. mem with AMD: c*nnz(L) + n */ + Common->initmem = 10 ; /* init. mem otherwise: c*nnz(A) + n */ + Common->btf = TRUE ; /* use BTF pre-ordering, or not */ + Common->maxwork = 0 ; /* no limit to work done by btf_order */ + Common->ordering = 0 ; /* 0: AMD, 1: COLAMD, 2: user-provided P and Q, + * 3: user-provided function */ + Common->scale = 2 ; /* scale: -1: none, and do not check for errors + * in the input matrix in KLU_refactor. + * 0: none, but check for errors, + * 1: sum, 2: max */ + Common->halt_if_singular = TRUE ; /* quick halt if matrix is singular */ + + /* memory management routines */ + Common->malloc_memory = malloc ; + Common->calloc_memory = calloc ; + Common->free_memory = free ; + Common->realloc_memory = realloc ; + + /* user ordering function and optional argument */ + Common->user_order = NULL ; + Common->user_data = NULL ; + + /* statistics */ + Common->status = KLU_OK ; + Common->nrealloc = 0 ; + Common->structural_rank = EMPTY ; + Common->numerical_rank = EMPTY ; + Common->noffdiag = EMPTY ; + Common->flops = EMPTY ; + Common->rcond = EMPTY ; + Common->condest = EMPTY ; + Common->rgrowth = EMPTY ; + Common->work = 0 ; /* work done by btf_order */ + + Common->memusage = 0 ; + Common->mempeak = 0 ; + + return (TRUE) ; +} diff --git a/src/maths/KLU/klu_diagnostics.c b/src/maths/KLU/klu_diagnostics.c new file mode 100644 index 000000000..45cb62764 --- /dev/null +++ b/src/maths/KLU/klu_diagnostics.c @@ -0,0 +1,570 @@ +/* ========================================================================== */ +/* === KLU_diagnostics ====================================================== */ +/* ========================================================================== */ + +/* Linear algebraic diagnostics: + * KLU_rgrowth: reciprocal pivot growth, takes O(|A|+|U|) time + * KLU_condest: condition number estimator, takes about O(|A|+5*(|L|+|U|)) time + * KLU_flops: compute # flops required to factorize A into L*U + * KLU_rcond: compute a really cheap estimate of the reciprocal of the + * condition number, min(abs(diag(U))) / max(abs(diag(U))). + * Takes O(n) time. + */ + +#include "klu_internal.h" + +/* ========================================================================== */ +/* === KLU_rgrowth ========================================================== */ +/* ========================================================================== */ + +/* Compute the reciprocal pivot growth factor. In MATLAB notation: + * + * rgrowth = min (max (abs ((R \ A (p,q)) - F))) ./ max (abs (U))) + */ + +Int KLU_rgrowth /* return TRUE if successful, FALSE otherwise */ +( + Int *Ap, + Int *Ai, + double *Ax, + KLU_symbolic *Symbolic, + KLU_numeric *Numeric, + KLU_common *Common +) +{ + double temp, max_ai, max_ui, min_block_rgrowth ; + Entry aik ; + Int *Q, *Ui, *Uip, *Ulen, *Pinv ; + Unit *LU ; + Entry *Aentry, *Ux, *Ukk ; + double *Rs ; + Int i, newrow, oldrow, k1, k2, nk, j, oldcol, k, pend, len ; + + /* ---------------------------------------------------------------------- */ + /* check inputs */ + /* ---------------------------------------------------------------------- */ + + if (Common == NULL) + { + return (FALSE) ; + } + + if (Symbolic == NULL || Ap == NULL || Ai == NULL || Ax == NULL) + { + Common->status = KLU_INVALID ; + return (FALSE) ; + } + + if (Numeric == NULL) + { + /* treat this as a singular matrix */ + Common->rgrowth = 0 ; + Common->status = KLU_SINGULAR ; + return (TRUE) ; + } + Common->status = KLU_OK ; + + /* ---------------------------------------------------------------------- */ + /* compute the reciprocal pivot growth */ + /* ---------------------------------------------------------------------- */ + + Aentry = (Entry *) Ax ; + Pinv = Numeric->Pinv ; + Rs = Numeric->Rs ; + Q = Symbolic->Q ; + Common->rgrowth = 1 ; + + for (i = 0 ; i < Symbolic->nblocks ; i++) + { + k1 = Symbolic->R[i] ; + k2 = Symbolic->R[i+1] ; + nk = k2 - k1 ; + if (nk == 1) + { + continue ; /* skip singleton blocks */ + } + LU = (Unit *) Numeric->LUbx[i] ; + Uip = Numeric->Uip + k1 ; + Ulen = Numeric->Ulen + k1 ; + Ukk = ((Entry *) Numeric->Udiag) + k1 ; + min_block_rgrowth = 1 ; + for (j = 0 ; j < nk ; j++) + { + max_ai = 0 ; + max_ui = 0 ; + oldcol = Q[j + k1] ; + pend = Ap [oldcol + 1] ; + for (k = Ap [oldcol] ; k < pend ; k++) + { + oldrow = Ai [k] ; + newrow = Pinv [oldrow] ; + if (newrow < k1) + { + continue ; /* skip entry outside the block */ + } + ASSERT (newrow < k2) ; + if (Rs != NULL) + { + /* aik = Aentry [k] / Rs [oldrow] */ + SCALE_DIV_ASSIGN (aik, Aentry [k], Rs [newrow]) ; + } + else + { + aik = Aentry [k] ; + } + /* temp = ABS (aik) */ + ABS (temp, aik) ; + if (temp > max_ai) + { + max_ai = temp ; + } + } + + GET_POINTER (LU, Uip, Ulen, Ui, Ux, j, len) ; + for (k = 0 ; k < len ; k++) + { + /* temp = ABS (Ux [k]) */ + ABS (temp, Ux [k]) ; + if (temp > max_ui) + { + max_ui = temp ; + } + } + /* consider the diagonal element */ + ABS (temp, Ukk [j]) ; + if (temp > max_ui) + { + max_ui = temp ; + } + + /* if max_ui is 0, skip the column */ + if (SCALAR_IS_ZERO (max_ui)) + { + continue ; + } + temp = max_ai / max_ui ; + if (temp < min_block_rgrowth) + { + min_block_rgrowth = temp ; + } + } + + if (min_block_rgrowth < Common->rgrowth) + { + Common->rgrowth = min_block_rgrowth ; + } + } + return (TRUE) ; +} + + +/* ========================================================================== */ +/* === KLU_condest ========================================================== */ +/* ========================================================================== */ + +/* Estimate the condition number. Uses Higham and Tisseur's algorithm + * (A block algorithm for matrix 1-norm estimation, with applications to + * 1-norm pseudospectra, SIAM J. Matrix Anal. Appl., 21(4):1185-1201, 2000. + */ + +Int KLU_condest /* return TRUE if successful, FALSE otherwise */ +( + Int Ap [ ], + double Ax [ ], + KLU_symbolic *Symbolic, + KLU_numeric *Numeric, + KLU_common *Common +) +{ + double xj, Xmax, csum, anorm, ainv_norm, est_old, est_new, abs_value ; + Entry *Udiag, *Aentry, *X, *S ; + Int *R ; + Int nblocks, i, j, jmax, jnew, pend, n ; +#ifndef COMPLEX + Int unchanged ; +#endif + + /* ---------------------------------------------------------------------- */ + /* check inputs */ + /* ---------------------------------------------------------------------- */ + + if (Common == NULL) + { + return (FALSE) ; + } + if (Symbolic == NULL || Ap == NULL || Ax == NULL) + { + Common->status = KLU_INVALID ; + return (FALSE) ; + } + abs_value = 0 ; + if (Numeric == NULL) + { + /* treat this as a singular matrix */ + Common->condest = 1 / abs_value ; + Common->status = KLU_SINGULAR ; + return (TRUE) ; + } + Common->status = KLU_OK ; + + /* ---------------------------------------------------------------------- */ + /* get inputs */ + /* ---------------------------------------------------------------------- */ + + n = Symbolic->n ; + nblocks = Symbolic->nblocks ; + R = Symbolic->R ; + Udiag = Numeric->Udiag ; + + /* ---------------------------------------------------------------------- */ + /* check if diagonal of U has a zero on it */ + /* ---------------------------------------------------------------------- */ + + for (i = 0 ; i < n ; i++) + { + ABS (abs_value, Udiag [i]) ; + if (SCALAR_IS_ZERO (abs_value)) + { + Common->condest = 1 / abs_value ; + Common->status = KLU_SINGULAR ; + return (TRUE) ; + } + } + + /* ---------------------------------------------------------------------- */ + /* compute 1-norm (maximum column sum) of the matrix */ + /* ---------------------------------------------------------------------- */ + + anorm = 0.0 ; + Aentry = (Entry *) Ax ; + for (i = 0 ; i < n ; i++) + { + pend = Ap [i + 1] ; + csum = 0.0 ; + for (j = Ap [i] ; j < pend ; j++) + { + ABS (abs_value, Aentry [j]) ; + csum += abs_value ; + } + if (csum > anorm) + { + anorm = csum ; + } + } + + /* ---------------------------------------------------------------------- */ + /* compute estimate of 1-norm of inv (A) */ + /* ---------------------------------------------------------------------- */ + + /* get workspace (size 2*n Entry's) */ + X = Numeric->Xwork ; /* size n space used in KLU_solve, tsolve */ + X += n ; /* X is size n */ + S = X + n ; /* S is size n */ + + for (i = 0 ; i < n ; i++) + { + CLEAR (S [i]) ; + CLEAR (X [i]) ; + REAL (X [i]) = 1.0 / ((double) n) ; + } + jmax = 0 ; + + ainv_norm = 0.0 ; + for (i = 0 ; i < 5 ; i++) + { + if (i > 0) + { + /* X [jmax] is the largest entry in X */ + for (j = 0 ; j < n ; j++) + { + /* X [j] = 0 ;*/ + CLEAR (X [j]) ; + } + REAL (X [jmax]) = 1 ; + } + + KLU_solve (Symbolic, Numeric, n, 1, (double *) X, Common) ; + est_old = ainv_norm ; + ainv_norm = 0.0 ; + + for (j = 0 ; j < n ; j++) + { + /* ainv_norm += ABS (X [j]) ;*/ + ABS (abs_value, X [j]) ; + ainv_norm += abs_value ; + } + +#ifndef COMPLEX + unchanged = TRUE ; + + for (j = 0 ; j < n ; j++) + { + double s = (X [j] >= 0) ? 1 : -1 ; + if (s != (Int) REAL (S [j])) + { + S [j] = s ; + unchanged = FALSE ; + } + } + + if (i > 0 && (ainv_norm <= est_old || unchanged)) + { + break ; + } +#else + for (j = 0 ; j < n ; j++) + { + if (IS_NONZERO (X [j])) + { + ABS (abs_value, X [j]) ; + SCALE_DIV_ASSIGN (S [j], X [j], abs_value) ; + } + else + { + CLEAR (S [j]) ; + REAL (S [j]) = 1 ; + } + } + + if (i > 0 && ainv_norm <= est_old) + { + break ; + } +#endif + + for (j = 0 ; j < n ; j++) + { + X [j] = S [j] ; + } + +#ifndef COMPLEX + /* do a transpose solve */ + KLU_tsolve (Symbolic, Numeric, n, 1, X, Common) ; +#else + /* do a conjugate transpose solve */ + KLU_tsolve (Symbolic, Numeric, n, 1, (double *) X, 1, Common) ; +#endif + + /* jnew = the position of the largest entry in X */ + jnew = 0 ; + Xmax = 0 ; + for (j = 0 ; j < n ; j++) + { + /* xj = ABS (X [j]) ;*/ + ABS (xj, X [j]) ; + if (xj > Xmax) + { + Xmax = xj ; + jnew = j ; + } + } + if (i > 0 && jnew == jmax) + { + /* the position of the largest entry did not change + * from the previous iteration */ + break ; + } + jmax = jnew ; + } + + /* ---------------------------------------------------------------------- */ + /* compute another estimate of norm(inv(A),1), and take the largest one */ + /* ---------------------------------------------------------------------- */ + + for (j = 0 ; j < n ; j++) + { + CLEAR (X [j]) ; + if (j % 2) + { + REAL (X [j]) = 1 + ((double) j) / ((double) (n-1)) ; + } + else + { + REAL (X [j]) = -1 - ((double) j) / ((double) (n-1)) ; + } + } + + KLU_solve (Symbolic, Numeric, n, 1, (double *) X, Common) ; + + est_new = 0.0 ; + for (j = 0 ; j < n ; j++) + { + /* est_new += ABS (X [j]) ;*/ + ABS (abs_value, X [j]) ; + est_new += abs_value ; + } + est_new = 2 * est_new / (3 * n) ; + ainv_norm = MAX (est_new, ainv_norm) ; + + /* ---------------------------------------------------------------------- */ + /* compute estimate of condition number */ + /* ---------------------------------------------------------------------- */ + + Common->condest = ainv_norm * anorm ; + return (TRUE) ; +} + + +/* ========================================================================== */ +/* === KLU_flops ============================================================ */ +/* ========================================================================== */ + +/* Compute the flop count for the LU factorization (in Common->flops) */ + +Int KLU_flops /* return TRUE if successful, FALSE otherwise */ +( + KLU_symbolic *Symbolic, + KLU_numeric *Numeric, + KLU_common *Common +) +{ + double flops = 0 ; + Int *R, *Ui, *Uip, *Llen, *Ulen ; + Unit **LUbx ; + Unit *LU ; + Int k, ulen, p, n, nk, block, nblocks, k1 ; + + /* ---------------------------------------------------------------------- */ + /* check inputs */ + /* ---------------------------------------------------------------------- */ + + if (Common == NULL) + { + return (FALSE) ; + } + Common->flops = EMPTY ; + if (Numeric == NULL || Symbolic == NULL) + { + Common->status = KLU_INVALID ; + return (FALSE) ; + } + Common->status = KLU_OK ; + + /* ---------------------------------------------------------------------- */ + /* get the contents of the Symbolic object */ + /* ---------------------------------------------------------------------- */ + + n = Symbolic->n ; + R = Symbolic->R ; + nblocks = Symbolic->nblocks ; + + /* ---------------------------------------------------------------------- */ + /* get the contents of the Numeric object */ + /* ---------------------------------------------------------------------- */ + + LUbx = (Unit **) Numeric->LUbx ; + + /* ---------------------------------------------------------------------- */ + /* compute the flop count */ + /* ---------------------------------------------------------------------- */ + + for (block = 0 ; block < nblocks ; block++) + { + k1 = R [block] ; + nk = R [block+1] - k1 ; + if (nk > 1) + { + Llen = Numeric->Llen + k1 ; + Uip = Numeric->Uip + k1 ; + Ulen = Numeric->Ulen + k1 ; + LU = LUbx [block] ; + for (k = 0 ; k < nk ; k++) + { + /* compute kth column of U, and update kth column of A */ + GET_I_POINTER (LU, Uip, Ui, k) ; + ulen = Ulen [k] ; + for (p = 0 ; p < ulen ; p++) + { + flops += 2 * Llen [Ui [p]] ; + } + /* gather and divide by pivot to get kth column of L */ + flops += Llen [k] ; + } + } + } + Common->flops = flops ; + return (TRUE) ; +} + + +/* ========================================================================== */ +/* === KLU_rcond ============================================================ */ +/* ========================================================================== */ + +/* Compute a really cheap estimate of the reciprocal of the condition number, + * condition number, min(abs(diag(U))) / max(abs(diag(U))). If U has a zero + * pivot, or a NaN pivot, rcond will be zero. Takes O(n) time. + */ + +Int KLU_rcond /* return TRUE if successful, FALSE otherwise */ +( + KLU_symbolic *Symbolic, /* input, not modified */ + KLU_numeric *Numeric, /* input, not modified */ + KLU_common *Common /* result in Common->rcond */ +) +{ + double ukk, umin = 0, umax = 0 ; + Entry *Udiag ; + Int j, n ; + + /* ---------------------------------------------------------------------- */ + /* check inputs */ + /* ---------------------------------------------------------------------- */ + + if (Common == NULL) + { + return (FALSE) ; + } + if (Symbolic == NULL) + { + Common->status = KLU_INVALID ; + return (FALSE) ; + } + if (Numeric == NULL) + { + Common->rcond = 0 ; + Common->status = KLU_SINGULAR ; + return (TRUE) ; + } + Common->status = KLU_OK ; + + /* ---------------------------------------------------------------------- */ + /* compute rcond */ + /* ---------------------------------------------------------------------- */ + + n = Symbolic->n ; + Udiag = Numeric->Udiag ; + for (j = 0 ; j < n ; j++) + { + /* get the magnitude of the pivot */ + ABS (ukk, Udiag [j]) ; + if (SCALAR_IS_NAN (ukk) || SCALAR_IS_ZERO (ukk)) + { + /* if NaN, or zero, the rcond is zero */ + Common->rcond = 0 ; + Common->status = KLU_SINGULAR ; + return (TRUE) ; + } + if (j == 0) + { + /* first pivot entry */ + umin = ukk ; + umax = ukk ; + } + else + { + /* subsequent pivots */ + umin = MIN (umin, ukk) ; + umax = MAX (umax, ukk) ; + } + } + + Common->rcond = umin / umax ; + if (SCALAR_IS_NAN (Common->rcond) || SCALAR_IS_ZERO (Common->rcond)) + { + /* this can occur if umin or umax are Inf or NaN */ + Common->rcond = 0 ; + Common->status = KLU_SINGULAR ; + } + return (TRUE) ; +} diff --git a/src/maths/KLU/klu_dump.c b/src/maths/KLU/klu_dump.c new file mode 100644 index 000000000..63170bc4c --- /dev/null +++ b/src/maths/KLU/klu_dump.c @@ -0,0 +1,142 @@ +/* ========================================================================== */ +/* === KLU_dump ============================================================= */ +/* ========================================================================== */ + +/* Debug routines for klu. Only used when NDEBUG is not defined at + * compile-time. + */ + +#include "klu_internal.h" + +#ifndef NDEBUG + +/* ========================================================================== */ +/* === KLU_valid ============================================================ */ +/* ========================================================================== */ + +/* Check if a column-form matrix is valid or not. The matrix A is + * n-by-n. The row indices of entries in column j are in + * Ai [Ap [j] ... Ap [j+1]-1]. Required conditions are: + * + * n >= 0 + * nz = Ap [n_col] >= 0 number of entries in the matrix + * Ap [0] == 0 + * Ap [j] <= Ap [j+1] for all j in the range 0 to n_col. + * row indices in Ai [Ap [j] ... Ap [j+1]-1] + * must be in the range 0 to n_row-1, + * and no duplicate entries can exist (duplicates not checked here). + * + * Not user-callable. Only used when debugging. + */ + +Int KLU_valid (Int n, Int Ap [ ], Int Ai [ ], Entry Ax [ ]) +{ + Int nz, j, p1, p2, i, p ; + PRINTF (("\ncolumn oriented matrix, n = %d\n", n)) ; + if (n <= 0) + { + PRINTF (("n must be >= 0: %d\n", n)) ; + return (FALSE) ; + } + nz = Ap [n] ; + if (Ap [0] != 0 || nz < 0) + { + /* column pointers must start at Ap [0] = 0, and Ap [n] must be >= 0 */ + PRINTF (("column 0 pointer bad or nz < 0\n")) ; + return (FALSE) ; + } + for (j = 0 ; j < n ; j++) + { + p1 = Ap [j] ; + p2 = Ap [j+1] ; + PRINTF (("\nColumn: %d p1: %d p2: %d\n", j, p1, p2)) ; + if (p1 > p2) + { + /* column pointers must be ascending */ + PRINTF (("column %d pointer bad\n", j)) ; + return (FALSE) ; + } + for (p = p1 ; p < p2 ; p++) + { + i = Ai [p] ; + PRINTF (("row: %d", i)) ; + if (i < 0 || i >= n) + { + /* row index out of range */ + PRINTF (("index out of range, col %d row %d\n", j, i)) ; + return (FALSE) ; + } + if (Ax != (Entry *) NULL) + { + PRINT_ENTRY (Ax [p]) ; + } + PRINTF (("\n")) ; + } + } + return (TRUE) ; +} + + +/* ========================================================================== */ +/* === KLU_valid_LU ========================================================= */ +/* ========================================================================== */ + +/* This function does the same validity tests as KLU_valid but for the + * LU factor storage format. The flag flag_test_start_ptr is used to + * test if Xip [0] = 0. This is not applicable for U. So when calling this + * function for U, the flag should be set to false. Only used when debugging. + */ + +Int KLU_valid_LU (Int n, Int flag_test_start_ptr, Int Xip [ ], + Int Xlen [ ], Unit LU [ ]) +{ + Int *Xi ; + Entry *Xx ; + Int j, p1, p2, i, p, len ; + + PRINTF (("\ncolumn oriented matrix, n = %d\n", n)) ; + if (n <= 0) + { + PRINTF (("n must be >= 0: %d\n", n)) ; + return (FALSE) ; + } + if (flag_test_start_ptr && Xip [0] != 0) + { + /* column pointers must start at Xip [0] = 0*/ + PRINTF (("column 0 pointer bad\n")) ; + return (FALSE) ; + } + + for (j = 0 ; j < n ; j++) + { + p1 = Xip [j] ; + p2 = Xip [j+1] ; + PRINTF (("\nColumn: %d p1: %d p2: %d\n", j, p1, p2)) ; + if (p1 > p2) + { + /* column pointers must be ascending */ + PRINTF (("column %d pointer bad\n", j)) ; + return (FALSE) ; + } + GET_POINTER (LU, Xip, Xlen, Xi, Xx, j, len) ; + for (p = 0 ; p < len ; p++) + { + i = Xi [p] ; + PRINTF (("row: %d", i)) ; + if (i < 0 || i >= n) + { + /* row index out of range */ + PRINTF (("index out of range, col %d row %d\n", j, i)) ; + return (FALSE) ; + } + if (Xx != (Entry *) NULL) + { + PRINT_ENTRY (Xx [p]) ; + } + PRINTF (("\n")) ; + } + } + + return (TRUE) ; +} +#endif diff --git a/src/maths/KLU/klu_extract.c b/src/maths/KLU/klu_extract.c new file mode 100644 index 000000000..b009828a7 --- /dev/null +++ b/src/maths/KLU/klu_extract.c @@ -0,0 +1,290 @@ +/* ========================================================================== */ +/* === KLU_extract ========================================================== */ +/* ========================================================================== */ + +/* Extract KLU factorization into conventional compressed-column matrices. + * If any output array is NULL, that part of the LU factorization is not + * extracted (this is not an error condition). + * + * nnz(L) = Numeric->lnz, nnz(U) = Numeric->unz, and nnz(F) = Numeric->Offp [n] + */ + +#include "klu_internal.h" + +Int KLU_extract /* returns TRUE if successful, FALSE otherwise */ +( + /* inputs: */ + KLU_numeric *Numeric, + KLU_symbolic *Symbolic, + + /* outputs, all of which must be allocated on input */ + + /* L */ + Int *Lp, /* size n+1 */ + Int *Li, /* size nnz(L) */ + double *Lx, /* size nnz(L) */ +#ifdef COMPLEX + double *Lz, /* size nnz(L) for the complex case, ignored if real */ +#endif + + /* U */ + Int *Up, /* size n+1 */ + Int *Ui, /* size nnz(U) */ + double *Ux, /* size nnz(U) */ +#ifdef COMPLEX + double *Uz, /* size nnz(U) for the complex case, ignored if real */ +#endif + + /* F */ + Int *Fp, /* size n+1 */ + Int *Fi, /* size nnz(F) */ + double *Fx, /* size nnz(F) */ +#ifdef COMPLEX + double *Fz, /* size nnz(F) for the complex case, ignored if real */ +#endif + + /* P, row permutation */ + Int *P, /* size n */ + + /* Q, column permutation */ + Int *Q, /* size n */ + + /* Rs, scale factors */ + double *Rs, /* size n */ + + /* R, block boundaries */ + Int *R, /* size nblocks+1 */ + + KLU_common *Common +) +{ + Int *Lip, *Llen, *Uip, *Ulen, *Li2, *Ui2 ; + Unit *LU ; + Entry *Lx2, *Ux2, *Ukk ; + Int i, k, block, nblocks, n, nz, k1, k2, nk, len, kk, p ; + + if (Common == NULL) + { + return (FALSE) ; + } + + if (Symbolic == NULL || Numeric == NULL) + { + Common->status = KLU_INVALID ; + return (FALSE) ; + } + + Common->status = KLU_OK ; + n = Symbolic->n ; + nblocks = Symbolic->nblocks ; + + /* ---------------------------------------------------------------------- */ + /* extract scale factors */ + /* ---------------------------------------------------------------------- */ + + if (Rs != NULL) + { + if (Numeric->Rs != NULL) + { + for (i = 0 ; i < n ; i++) + { + Rs [i] = Numeric->Rs [i] ; + } + } + else + { + /* no scaling */ + for (i = 0 ; i < n ; i++) + { + Rs [i] = 1 ; + } + } + } + + /* ---------------------------------------------------------------------- */ + /* extract block boundaries */ + /* ---------------------------------------------------------------------- */ + + if (R != NULL) + { + for (block = 0 ; block <= nblocks ; block++) + { + R [block] = Symbolic->R [block] ; + } + } + + /* ---------------------------------------------------------------------- */ + /* extract final row permutation */ + /* ---------------------------------------------------------------------- */ + + if (P != NULL) + { + for (k = 0 ; k < n ; k++) + { + P [k] = Numeric->Pnum [k] ; + } + } + + /* ---------------------------------------------------------------------- */ + /* extract column permutation */ + /* ---------------------------------------------------------------------- */ + + if (Q != NULL) + { + for (k = 0 ; k < n ; k++) + { + Q [k] = Symbolic->Q [k] ; + } + } + + /* ---------------------------------------------------------------------- */ + /* extract each block of L */ + /* ---------------------------------------------------------------------- */ + + if (Lp != NULL && Li != NULL && Lx != NULL +#ifdef COMPLEX + && Lz != NULL +#endif + ) + { + nz = 0 ; + for (block = 0 ; block < nblocks ; block++) + { + k1 = Symbolic->R [block] ; + k2 = Symbolic->R [block+1] ; + nk = k2 - k1 ; + if (nk == 1) + { + /* singleton block */ + Lp [k1] = nz ; + Li [nz] = k1 ; + Lx [nz] = 1 ; +#ifdef COMPLEX + Lz [nz] = 0 ; +#endif + nz++ ; + } + else + { + /* non-singleton block */ + LU = Numeric->LUbx [block] ; + Lip = Numeric->Lip + k1 ; + Llen = Numeric->Llen + k1 ; + for (kk = 0 ; kk < nk ; kk++) + { + Lp [k1+kk] = nz ; + /* add the unit diagonal entry */ + Li [nz] = k1 + kk ; + Lx [nz] = 1 ; +#ifdef COMPLEX + Lz [nz] = 0 ; +#endif + nz++ ; + GET_POINTER (LU, Lip, Llen, Li2, Lx2, kk, len) ; + for (p = 0 ; p < len ; p++) + { + Li [nz] = k1 + Li2 [p] ; + Lx [nz] = REAL (Lx2 [p]) ; +#ifdef COMPLEX + Lz [nz] = IMAG (Lx2 [p]) ; +#endif + nz++ ; + } + } + } + } + Lp [n] = nz ; + ASSERT (nz == Numeric->lnz) ; + } + + /* ---------------------------------------------------------------------- */ + /* extract each block of U */ + /* ---------------------------------------------------------------------- */ + + if (Up != NULL && Ui != NULL && Ux != NULL +#ifdef COMPLEX + && Uz != NULL +#endif + ) + { + nz = 0 ; + for (block = 0 ; block < nblocks ; block++) + { + k1 = Symbolic->R [block] ; + k2 = Symbolic->R [block+1] ; + nk = k2 - k1 ; + Ukk = ((Entry *) Numeric->Udiag) + k1 ; + if (nk == 1) + { + /* singleton block */ + Up [k1] = nz ; + Ui [nz] = k1 ; + Ux [nz] = REAL (Ukk [0]) ; +#ifdef COMPLEX + Uz [nz] = IMAG (Ukk [0]) ; +#endif + nz++ ; + } + else + { + /* non-singleton block */ + LU = Numeric->LUbx [block] ; + Uip = Numeric->Uip + k1 ; + Ulen = Numeric->Ulen + k1 ; + for (kk = 0 ; kk < nk ; kk++) + { + Up [k1+kk] = nz ; + GET_POINTER (LU, Uip, Ulen, Ui2, Ux2, kk, len) ; + for (p = 0 ; p < len ; p++) + { + Ui [nz] = k1 + Ui2 [p] ; + Ux [nz] = REAL (Ux2 [p]) ; +#ifdef COMPLEX + Uz [nz] = IMAG (Ux2 [p]) ; +#endif + nz++ ; + } + /* add the diagonal entry */ + Ui [nz] = k1 + kk ; + Ux [nz] = REAL (Ukk [kk]) ; +#ifdef COMPLEX + Uz [nz] = IMAG (Ukk [kk]) ; +#endif + nz++ ; + } + } + } + Up [n] = nz ; + ASSERT (nz == Numeric->unz) ; + } + + /* ---------------------------------------------------------------------- */ + /* extract the off-diagonal blocks, F */ + /* ---------------------------------------------------------------------- */ + + if (Fp != NULL && Fi != NULL && Fx != NULL +#ifdef COMPLEX + && Fz != NULL +#endif + ) + { + for (k = 0 ; k <= n ; k++) + { + Fp [k] = Numeric->Offp [k] ; + } + nz = Fp [n] ; + for (k = 0 ; k < nz ; k++) + { + Fi [k] = Numeric->Offi [k] ; + } + for (k = 0 ; k < nz ; k++) + { + Fx [k] = REAL (((Entry *) Numeric->Offx) [k]) ; +#ifdef COMPLEX + Fz [k] = IMAG (((Entry *) Numeric->Offx) [k]) ; +#endif + } + } + + return (TRUE) ; +} diff --git a/src/maths/KLU/klu_factor.c b/src/maths/KLU/klu_factor.c new file mode 100644 index 000000000..50b52ea6d --- /dev/null +++ b/src/maths/KLU/klu_factor.c @@ -0,0 +1,545 @@ +/* ========================================================================== */ +/* === KLU_factor =========================================================== */ +/* ========================================================================== */ + +/* Factor the matrix, after ordering and analyzing it with KLU_analyze + * or KLU_analyze_given. + */ + +#include "klu_internal.h" + +/* ========================================================================== */ +/* === KLU_factor2 ========================================================== */ +/* ========================================================================== */ + +static void factor2 +( + /* inputs, not modified */ + Int Ap [ ], /* size n+1, column pointers */ + Int Ai [ ], /* size nz, row indices */ + Entry Ax [ ], + KLU_symbolic *Symbolic, + + /* inputs, modified on output: */ + KLU_numeric *Numeric, + KLU_common *Common +) +{ + double lsize ; + double *Lnz, *Rs ; + Int *P, *Q, *R, *Pnum, *Offp, *Offi, *Pblock, *Pinv, *Iwork, + *Lip, *Uip, *Llen, *Ulen ; + Entry *Offx, *X, s, *Udiag ; + Unit **LUbx ; + Int k1, k2, nk, k, block, oldcol, pend, oldrow, n, lnz, unz, p, newrow, + nblocks, poff, nzoff, lnz_block, unz_block, scale, max_lnz_block, + max_unz_block ; + + /* ---------------------------------------------------------------------- */ + /* initializations */ + /* ---------------------------------------------------------------------- */ + + /* get the contents of the Symbolic object */ + n = Symbolic->n ; + P = Symbolic->P ; + Q = Symbolic->Q ; + R = Symbolic->R ; + Lnz = Symbolic->Lnz ; + nblocks = Symbolic->nblocks ; + nzoff = Symbolic->nzoff ; + + Pnum = Numeric->Pnum ; + Offp = Numeric->Offp ; + Offi = Numeric->Offi ; + Offx = (Entry *) Numeric->Offx ; + + Lip = Numeric->Lip ; + Uip = Numeric->Uip ; + Llen = Numeric->Llen ; + Ulen = Numeric->Ulen ; + LUbx = (Unit **) Numeric->LUbx ; + Udiag = Numeric->Udiag ; + + Rs = Numeric->Rs ; + Pinv = Numeric->Pinv ; + X = (Entry *) Numeric->Xwork ; /* X is of size n */ + Iwork = Numeric->Iwork ; /* 5*maxblock for KLU_factor */ + /* 1*maxblock for Pblock */ + Pblock = Iwork + 5*((size_t) Symbolic->maxblock) ; + Common->nrealloc = 0 ; + scale = Common->scale ; + max_lnz_block = 1 ; + max_unz_block = 1 ; + + /* compute the inverse of P from symbolic analysis. Will be updated to + * become the inverse of the numerical factorization when the factorization + * is done, for use in KLU_refactor */ +#ifndef NDEBUG + for (k = 0 ; k < n ; k++) + { + Pinv [k] = EMPTY ; + } +#endif + for (k = 0 ; k < n ; k++) + { + ASSERT (P [k] >= 0 && P [k] < n) ; + Pinv [P [k]] = k ; + } +#ifndef NDEBUG + for (k = 0 ; k < n ; k++) ASSERT (Pinv [k] != EMPTY) ; +#endif + + lnz = 0 ; + unz = 0 ; + Common->noffdiag = 0 ; + Offp [0] = 0 ; + + /* ---------------------------------------------------------------------- */ + /* optionally check input matrix and compute scale factors */ + /* ---------------------------------------------------------------------- */ + + if (scale >= 0) + { + /* use Pnum as workspace. NOTE: scale factors are not yet permuted + * according to the final pivot row ordering, so Rs [oldrow] is the + * scale factor for A (oldrow,:), for the user's matrix A. Pnum is + * used as workspace in KLU_scale. When the factorization is done, + * the scale factors are permuted according to the final pivot row + * permutation, so that Rs [k] is the scale factor for the kth row of + * A(p,q) where p and q are the final row and column permutations. */ + KLU_scale (scale, n, Ap, Ai, (double *) Ax, Rs, Pnum, Common) ; + if (Common->status < KLU_OK) + { + /* matrix is invalid */ + return ; + } + } + +#ifndef NDEBUG + if (scale > 0) + { + for (k = 0 ; k < n ; k++) PRINTF (("Rs [%d] %g\n", k, Rs [k])) ; + } +#endif + + /* ---------------------------------------------------------------------- */ + /* factor each block using klu */ + /* ---------------------------------------------------------------------- */ + + for (block = 0 ; block < nblocks ; block++) + { + + /* ------------------------------------------------------------------ */ + /* the block is from rows/columns k1 to k2-1 */ + /* ------------------------------------------------------------------ */ + + k1 = R [block] ; + k2 = R [block+1] ; + nk = k2 - k1 ; + PRINTF (("FACTOR BLOCK %d, k1 %d k2-1 %d nk %d\n", block, k1,k2-1,nk)) ; + + if (nk == 1) + { + + /* -------------------------------------------------------------- */ + /* singleton case */ + /* -------------------------------------------------------------- */ + + poff = Offp [k1] ; + oldcol = Q [k1] ; + pend = Ap [oldcol+1] ; + CLEAR (s) ; + + if (scale <= 0) + { + /* no scaling */ + for (p = Ap [oldcol] ; p < pend ; p++) + { + oldrow = Ai [p] ; + newrow = Pinv [oldrow] ; + if (newrow < k1) + { + Offi [poff] = oldrow ; + Offx [poff] = Ax [p] ; + poff++ ; + } + else + { + ASSERT (newrow == k1) ; + PRINTF (("singleton block %d", block)) ; + PRINT_ENTRY (Ax [p]) ; + s = Ax [p] ; + } + } + } + else + { + /* row scaling. NOTE: scale factors are not yet permuted + * according to the pivot row permutation, so Rs [oldrow] is + * used below. When the factorization is done, the scale + * factors are permuted, so that Rs [newrow] will be used in + * klu_solve, klu_tsolve, and klu_rgrowth */ + for (p = Ap [oldcol] ; p < pend ; p++) + { + oldrow = Ai [p] ; + newrow = Pinv [oldrow] ; + if (newrow < k1) + { + Offi [poff] = oldrow ; + /* Offx [poff] = Ax [p] / Rs [oldrow] ; */ + SCALE_DIV_ASSIGN (Offx [poff], Ax [p], Rs [oldrow]) ; + poff++ ; + } + else + { + ASSERT (newrow == k1) ; + PRINTF (("singleton block %d ", block)) ; + PRINT_ENTRY (Ax[p]) ; + SCALE_DIV_ASSIGN (s, Ax [p], Rs [oldrow]) ; + } + } + } + + Udiag [k1] = s ; + + if (IS_ZERO (s)) + { + /* singular singleton */ + Common->status = KLU_SINGULAR ; + Common->numerical_rank = k1 ; + Common->singular_col = oldcol ; + if (Common->halt_if_singular) + { + return ; + } + } + + Offp [k1+1] = poff ; + Pnum [k1] = P [k1] ; + lnz++ ; + unz++ ; + + } + else + { + + /* -------------------------------------------------------------- */ + /* construct and factorize the kth block */ + /* -------------------------------------------------------------- */ + + if (Lnz [block] < 0) + { + /* COLAMD was used - no estimate of fill-in */ + /* use 10 times the nnz in A, plus n */ + lsize = -(Common->initmem) ; + } + else + { + lsize = Common->initmem_amd * Lnz [block] + nk ; + } + + /* allocates 1 arrays: LUbx [block] */ + Numeric->LUsize [block] = KLU_kernel_factor (nk, Ap, Ai, Ax, Q, + lsize, &LUbx [block], Udiag + k1, Llen + k1, Ulen + k1, + Lip + k1, Uip + k1, Pblock, &lnz_block, &unz_block, + X, Iwork, k1, Pinv, Rs, Offp, Offi, Offx, Common) ; + + if (Common->status < KLU_OK || + (Common->status == KLU_SINGULAR && Common->halt_if_singular)) + { + /* out of memory, invalid inputs, or singular */ + return ; + } + + PRINTF (("\n----------------------- L %d:\n", block)) ; + ASSERT (KLU_valid_LU (nk, TRUE, Lip+k1, Llen+k1, LUbx [block])) ; + PRINTF (("\n----------------------- U %d:\n", block)) ; + ASSERT (KLU_valid_LU (nk, FALSE, Uip+k1, Ulen+k1, LUbx [block])) ; + + /* -------------------------------------------------------------- */ + /* get statistics */ + /* -------------------------------------------------------------- */ + + lnz += lnz_block ; + unz += unz_block ; + max_lnz_block = MAX (max_lnz_block, lnz_block) ; + max_unz_block = MAX (max_unz_block, unz_block) ; + + if (Lnz [block] == EMPTY) + { + /* revise estimate for subsequent factorization */ + Lnz [block] = MAX (lnz_block, unz_block) ; + } + + /* -------------------------------------------------------------- */ + /* combine the klu row ordering with the symbolic pre-ordering */ + /* -------------------------------------------------------------- */ + + PRINTF (("Pnum, 1-based:\n")) ; + for (k = 0 ; k < nk ; k++) + { + ASSERT (k + k1 < n) ; + ASSERT (Pblock [k] + k1 < n) ; + Pnum [k + k1] = P [Pblock [k] + k1] ; + PRINTF (("Pnum (%d + %d + 1 = %d) = %d + 1 = %d\n", + k, k1, k+k1+1, Pnum [k+k1], Pnum [k+k1]+1)) ; + } + + /* the local pivot row permutation Pblock is no longer needed */ + } + } + ASSERT (nzoff == Offp [n]) ; + PRINTF (("\n------------------- Off diagonal entries:\n")) ; + ASSERT (KLU_valid (n, Offp, Offi, Offx)) ; + + Numeric->lnz = lnz ; + Numeric->unz = unz ; + Numeric->max_lnz_block = max_lnz_block ; + Numeric->max_unz_block = max_unz_block ; + + /* compute the inverse of Pnum */ +#ifndef NDEBUG + for (k = 0 ; k < n ; k++) + { + Pinv [k] = EMPTY ; + } +#endif + for (k = 0 ; k < n ; k++) + { + ASSERT (Pnum [k] >= 0 && Pnum [k] < n) ; + Pinv [Pnum [k]] = k ; + } +#ifndef NDEBUG + for (k = 0 ; k < n ; k++) ASSERT (Pinv [k] != EMPTY) ; +#endif + + /* permute scale factors Rs according to pivotal row order */ + if (scale > 0) + { + for (k = 0 ; k < n ; k++) + { + REAL (X [k]) = Rs [Pnum [k]] ; + } + for (k = 0 ; k < n ; k++) + { + Rs [k] = REAL (X [k]) ; + } + } + + PRINTF (("\n------------------- Off diagonal entries, old:\n")) ; + ASSERT (KLU_valid (n, Offp, Offi, Offx)) ; + + /* apply the pivot row permutations to the off-diagonal entries */ + for (p = 0 ; p < nzoff ; p++) + { + ASSERT (Offi [p] >= 0 && Offi [p] < n) ; + Offi [p] = Pinv [Offi [p]] ; + } + + PRINTF (("\n------------------- Off diagonal entries, new:\n")) ; + ASSERT (KLU_valid (n, Offp, Offi, Offx)) ; + +#ifndef NDEBUG + { + PRINTF (("\n ############# KLU_BTF_FACTOR done, nblocks %d\n",nblocks)); + Entry ss, *Udiag = Numeric->Udiag ; + for (block = 0 ; block < nblocks && Common->status == KLU_OK ; block++) + { + k1 = R [block] ; + k2 = R [block+1] ; + nk = k2 - k1 ; + PRINTF (("\n======================KLU_factor output: k1 %d k2 %d nk %d\n",k1,k2,nk)) ; + if (nk == 1) + { + PRINTF (("singleton ")) ; + /* ENTRY_PRINT (singleton [block]) ; */ + ss = Udiag [k1] ; + PRINT_ENTRY (ss) ; + } + else + { + Int *Lip, *Uip, *Llen, *Ulen ; + Unit *LU ; + Lip = Numeric->Lip + k1 ; + Llen = Numeric->Llen + k1 ; + LU = (Unit *) Numeric->LUbx [block] ; + PRINTF (("\n---- L block %d\n", block)); + ASSERT (KLU_valid_LU (nk, TRUE, Lip, Llen, LU)) ; + Uip = Numeric->Uip + k1 ; + Ulen = Numeric->Ulen + k1 ; + PRINTF (("\n---- U block %d\n", block)) ; + ASSERT (KLU_valid_LU (nk, FALSE, Uip, Ulen, LU)) ; + } + } + } +#endif +} + + + +/* ========================================================================== */ +/* === KLU_factor =========================================================== */ +/* ========================================================================== */ + +KLU_numeric *KLU_factor /* returns NULL if error, or a valid + KLU_numeric object if successful */ +( + /* --- inputs --- */ + Int Ap [ ], /* size n+1, column pointers */ + Int Ai [ ], /* size nz, row indices */ + double Ax [ ], + KLU_symbolic *Symbolic, + /* -------------- */ + KLU_common *Common +) +{ + Int n, nzoff, nblocks, maxblock, k, ok = TRUE ; + Int *R ; + KLU_numeric *Numeric ; + size_t n1, nzoff1, s, b6, n3 ; + + if (Common == NULL) + { + return (NULL) ; + } + Common->status = KLU_OK ; + Common->numerical_rank = EMPTY ; + Common->singular_col = EMPTY ; + + /* ---------------------------------------------------------------------- */ + /* get the contents of the Symbolic object */ + /* ---------------------------------------------------------------------- */ + + /* check for a valid Symbolic object */ + if (Symbolic == NULL) + { + Common->status = KLU_INVALID ; + return (NULL) ; + } + + n = Symbolic->n ; + nzoff = Symbolic->nzoff ; + nblocks = Symbolic->nblocks ; + maxblock = Symbolic->maxblock ; + R = Symbolic->R ; + PRINTF (("KLU_factor: n %d nzoff %d nblocks %d maxblock %d\n", + n, nzoff, nblocks, maxblock)) ; + + /* ---------------------------------------------------------------------- */ + /* get control parameters and make sure they are in the proper range */ + /* ---------------------------------------------------------------------- */ + + Common->initmem_amd = MAX (1.0, Common->initmem_amd) ; + Common->initmem = MAX (1.0, Common->initmem) ; + Common->tol = MIN (Common->tol, 1.0) ; + Common->tol = MAX (0.0, Common->tol) ; + Common->memgrow = MAX (1.0, Common->memgrow) ; + + /* ---------------------------------------------------------------------- */ + /* allocate the Numeric object */ + /* ---------------------------------------------------------------------- */ + + /* this will not cause size_t overflow (already checked by KLU_symbolic) */ + n1 = ((size_t) n) + 1 ; + nzoff1 = ((size_t) nzoff) + 1 ; + + Numeric = KLU_malloc (sizeof (KLU_numeric), 1, Common) ; + if (Common->status < KLU_OK) + { + /* out of memory */ + Common->status = KLU_OUT_OF_MEMORY ; + return (NULL) ; + } + Numeric->n = n ; + Numeric->nblocks = nblocks ; + Numeric->nzoff = nzoff ; + Numeric->Pnum = KLU_malloc (n, sizeof (Int), Common) ; + Numeric->Offp = KLU_malloc (n1, sizeof (Int), Common) ; + Numeric->Offi = KLU_malloc (nzoff1, sizeof (Int), Common) ; + Numeric->Offx = KLU_malloc (nzoff1, sizeof (Entry), Common) ; + + Numeric->Lip = KLU_malloc (n, sizeof (Int), Common) ; + Numeric->Uip = KLU_malloc (n, sizeof (Int), Common) ; + Numeric->Llen = KLU_malloc (n, sizeof (Int), Common) ; + Numeric->Ulen = KLU_malloc (n, sizeof (Int), Common) ; + + Numeric->LUsize = KLU_malloc (nblocks, sizeof (size_t), Common) ; + + Numeric->LUbx = KLU_malloc (nblocks, sizeof (Unit *), Common) ; + if (Numeric->LUbx != NULL) + { + for (k = 0 ; k < nblocks ; k++) + { + Numeric->LUbx [k] = NULL ; + } + } + + Numeric->Udiag = KLU_malloc (n, sizeof (Entry), Common) ; + + if (Common->scale > 0) + { + Numeric->Rs = KLU_malloc (n, sizeof (double), Common) ; + } + else + { + /* no scaling */ + Numeric->Rs = NULL ; + } + + Numeric->Pinv = KLU_malloc (n, sizeof (Int), Common) ; + + /* allocate permanent workspace for factorization and solve. Note that the + * solver will use an Xwork of size 4n, whereas the factorization codes use + * an Xwork of size n and integer space (Iwork) of size 6n. KLU_condest + * uses an Xwork of size 2n. Total size is: + * + * n*sizeof(Entry) + max (6*maxblock*sizeof(Int), 3*n*sizeof(Entry)) + */ + s = KLU_mult_size_t (n, sizeof (Entry), &ok) ; + n3 = KLU_mult_size_t (n, 3 * sizeof (Entry), &ok) ; + b6 = KLU_mult_size_t (maxblock, 6 * sizeof (Int), &ok) ; + Numeric->worksize = KLU_add_size_t (s, MAX (n3, b6), &ok) ; + Numeric->Work = KLU_malloc (Numeric->worksize, 1, Common) ; + Numeric->Xwork = Numeric->Work ; + Numeric->Iwork = (Int *) ((Entry *) Numeric->Xwork + n) ; + if (!ok || Common->status < KLU_OK) + { + /* out of memory or problem too large */ + Common->status = ok ? KLU_OUT_OF_MEMORY : KLU_TOO_LARGE ; + KLU_free_numeric (&Numeric, Common) ; + return (NULL) ; + } + + /* ---------------------------------------------------------------------- */ + /* factorize the blocks */ + /* ---------------------------------------------------------------------- */ + + factor2 (Ap, Ai, (Entry *) Ax, Symbolic, Numeric, Common) ; + + /* ---------------------------------------------------------------------- */ + /* return or free the Numeric object */ + /* ---------------------------------------------------------------------- */ + + if (Common->status < KLU_OK) + { + /* out of memory or inputs invalid */ + KLU_free_numeric (&Numeric, Common) ; + } + else if (Common->status == KLU_SINGULAR) + { + if (Common->halt_if_singular) + { + /* Matrix is singular, and the Numeric object is only partially + * defined because we halted early. This is the default case for + * a singular matrix. */ + KLU_free_numeric (&Numeric, Common) ; + } + } + else if (Common->status == KLU_OK) + { + /* successful non-singular factorization */ + Common->numerical_rank = n ; + Common->singular_col = n ; + } + return (Numeric) ; +} diff --git a/src/maths/KLU/klu_free_numeric.c b/src/maths/KLU/klu_free_numeric.c new file mode 100644 index 000000000..cd4f3bdcd --- /dev/null +++ b/src/maths/KLU/klu_free_numeric.c @@ -0,0 +1,71 @@ +/* ========================================================================== */ +/* === KLU_free_numeric ===================================================== */ +/* ========================================================================== */ + +/* Free the KLU Numeric object. */ + +#include "klu_internal.h" + +Int KLU_free_numeric +( + KLU_numeric **NumericHandle, + KLU_common *Common +) +{ + KLU_numeric *Numeric ; + Unit **LUbx ; + size_t *LUsize ; + Int block, n, nzoff, nblocks ; + + if (Common == NULL) + { + return (FALSE) ; + } + if (NumericHandle == NULL || *NumericHandle == NULL) + { + return (TRUE) ; + } + + Numeric = *NumericHandle ; + + n = Numeric->n ; + nzoff = Numeric->nzoff ; + nblocks = Numeric->nblocks ; + LUsize = Numeric->LUsize ; + + LUbx = (Unit **) Numeric->LUbx ; + if (LUbx != NULL) + { + for (block = 0 ; block < nblocks ; block++) + { + KLU_free (LUbx [block], LUsize ? LUsize [block] : 0, + sizeof (Unit), Common) ; + } + } + + KLU_free (Numeric->Pnum, n, sizeof (Int), Common) ; + KLU_free (Numeric->Offp, n+1, sizeof (Int), Common) ; + KLU_free (Numeric->Offi, nzoff+1, sizeof (Int), Common) ; + KLU_free (Numeric->Offx, nzoff+1, sizeof (Entry), Common) ; + + KLU_free (Numeric->Lip, n, sizeof (Int), Common) ; + KLU_free (Numeric->Llen, n, sizeof (Int), Common) ; + KLU_free (Numeric->Uip, n, sizeof (Int), Common) ; + KLU_free (Numeric->Ulen, n, sizeof (Int), Common) ; + + KLU_free (Numeric->LUsize, nblocks, sizeof (size_t), Common) ; + + KLU_free (Numeric->LUbx, nblocks, sizeof (Unit *), Common) ; + + KLU_free (Numeric->Udiag, n, sizeof (Entry), Common) ; + + KLU_free (Numeric->Rs, n, sizeof (double), Common) ; + KLU_free (Numeric->Pinv, n, sizeof (Int), Common) ; + + KLU_free (Numeric->Work, Numeric->worksize, 1, Common) ; + + KLU_free (Numeric, 1, sizeof (KLU_numeric), Common) ; + + *NumericHandle = NULL ; + return (TRUE) ; +} diff --git a/src/maths/KLU/klu_free_symbolic.c b/src/maths/KLU/klu_free_symbolic.c new file mode 100644 index 000000000..20b4000e6 --- /dev/null +++ b/src/maths/KLU/klu_free_symbolic.c @@ -0,0 +1,34 @@ +/* ========================================================================== */ +/* === KLU_free_symbolic ==================================================== */ +/* ========================================================================== */ + +/* Free the KLU Symbolic object. */ + +#include "klu_internal.h" + +Int KLU_free_symbolic +( + KLU_symbolic **SymbolicHandle, + KLU_common *Common +) +{ + KLU_symbolic *Symbolic ; + Int n ; + if (Common == NULL) + { + return (FALSE) ; + } + if (SymbolicHandle == NULL || *SymbolicHandle == NULL) + { + return (TRUE) ; + } + Symbolic = *SymbolicHandle ; + n = Symbolic->n ; + KLU_free (Symbolic->P, n, sizeof (Int), Common) ; + KLU_free (Symbolic->Q, n, sizeof (Int), Common) ; + KLU_free (Symbolic->R, n+1, sizeof (Int), Common) ; + KLU_free (Symbolic->Lnz, n, sizeof (double), Common) ; + KLU_free (Symbolic, 1, sizeof (KLU_symbolic), Common) ; + *SymbolicHandle = NULL ; + return (TRUE) ; +} diff --git a/src/maths/KLU/klu_internal.h b/src/maths/KLU/klu_internal.h new file mode 100644 index 000000000..662fa8ca2 --- /dev/null +++ b/src/maths/KLU/klu_internal.h @@ -0,0 +1,243 @@ +/* ========================================================================== */ +/* === KLU/Include/klu_internal.h =========================================== */ +/* ========================================================================== */ + +/* For internal use in KLU routines only, not for user programs */ + +#ifndef _KLU_INTERNAL_H +#define _KLU_INTERNAL_H + +#include "klu.h" +#include "btf.h" +#include "klu_version.h" + +/* ========================================================================== */ +/* make sure debugging and printing is turned off */ + +#ifndef NDEBUG +#define NDEBUG +#endif +#ifndef NPRINT +#define NPRINT +#endif + +/* To enable debugging and assertions, uncomment this line: + #undef NDEBUG + */ + +/* To enable diagnostic printing, uncomment this line: + #undef NPRINT + */ + +/* ========================================================================== */ + +#include +#include +#include +#include +#include + +#undef ASSERT +#ifndef NDEBUG +#define ASSERT(a) assert(a) +#else +#define ASSERT(a) +#endif + +#define SCALAR_IS_NAN(x) ((x) != (x)) + +/* true if an integer (stored in double x) would overflow (or if x is NaN) */ +#define INT_OVERFLOW(x) ((!((x) * (1.0+1e-8) <= (double) INT_MAX)) \ + || SCALAR_IS_NAN (x)) + +#undef TRUE +#undef FALSE +#undef MAX +#undef MIN +#undef PRINTF +#undef FLIP + +#ifndef NPRINT +#define PRINTF(s) { printf s ; } ; +#else +#define PRINTF(s) +#endif + +#define TRUE 1 +#define FALSE 0 +#define MAX(a,b) (((a) > (b)) ? (a) : (b)) +#define MIN(a,b) (((a) < (b)) ? (a) : (b)) + +/* FLIP is a "negation about -1", and is used to mark an integer i that is + * normally non-negative. FLIP (EMPTY) is EMPTY. FLIP of a number > EMPTY + * is negative, and FLIP of a number < EMTPY is positive. FLIP (FLIP (i)) = i + * for all integers i. UNFLIP (i) is >= EMPTY. */ +#define EMPTY (-1) +#define FLIP(i) (-(i)-2) +#define UNFLIP(i) (((i) < EMPTY) ? FLIP (i) : (i)) + + +size_t KLU_kernel /* final size of LU on output */ +( + /* input, not modified */ + Int n, /* A is n-by-n */ + Int Ap [ ], /* size n+1, column pointers for A */ + Int Ai [ ], /* size nz = Ap [n], row indices for A */ + Entry Ax [ ], /* size nz, values of A */ + Int Q [ ], /* size n, optional input permutation */ + size_t lusize, /* initial size of LU */ + + /* output, not defined on input */ + Int Pinv [ ], /* size n */ + Int P [ ], /* size n */ + Unit **p_LU, /* size lusize on input, size Uxp[n] on output*/ + Entry Udiag [ ], /* size n, diagonal of U */ + Int Llen [ ], /* size n, column length of L */ + Int Ulen [ ], /* size n, column length of U */ + Int Lip [ ], /* size n+1 */ + Int Uip [ ], /* size n+1 */ + Int *lnz, /* size of L */ + Int *unz, /* size of U */ + + /* workspace, not defined on input */ + Entry X [ ], /* size n, zero on output */ + + /* workspace, not defined on input or output */ + Int Stack [ ], /* size n */ + Int Flag [ ], /* size n */ + Int adj_pos [ ], /* size n */ + + /* workspace for pruning only */ + Int Lpend [ ], /* size n workspace */ + + /* inputs, not modified on output */ + Int k1, /* the block of A is from k1 to k2-1 */ + Int PSinv [ ], /* inverse of P from symbolic factorization */ + double Rs [ ], /* scale factors for A */ + + /* inputs, modified on output */ + Int Offp [ ], /* off-diagonal matrix (modified by this routine) */ + Int Offi [ ], + Entry Offx [ ], + KLU_common *Common /* the control input/output structure */ +) ; + + +size_t KLU_kernel_factor /* 0 if failure, size of LU if OK */ +( + /* inputs, not modified */ + Int n, /* A is n-by-n. n must be > 0. */ + Int Ap [ ], /* size n+1, column pointers for A */ + Int Ai [ ], /* size nz = Ap [n], row indices for A */ + Entry Ax [ ], /* size nz, values of A */ + Int Q [ ], /* size n, optional column permutation */ + double Lsize, /* initial size of L and U */ + + /* outputs, not defined on input */ + Unit **p_LU, /* row indices and values of L and U */ + Entry Udiag [ ], /* size n, diagonal of U */ + Int Llen [ ], /* size n, column length of L */ + Int Ulen [ ], /* size n, column length of U */ + Int Lip [ ], /* size n+1, column pointers of L */ + Int Uip [ ], /* size n+1, column pointers of U */ + Int P [ ], /* row permutation, size n */ + Int *lnz, /* size of L */ + Int *unz, /* size of U */ + + /* workspace, undefined on input */ + Entry *X, /* size n entries. Zero on output */ + Int *Work, /* size 5n Int's */ + + /* inputs, not modified on output */ + Int k1, /* the block of A is from k1 to k2-1 */ + Int PSinv [ ], /* inverse of P from symbolic factorization */ + double Rs [ ], /* scale factors for A */ + + /* inputs, modified on output */ + Int Offp [ ], /* off-diagonal matrix (modified by this routine) */ + Int Offi [ ], + Entry Offx [ ], + KLU_common *Common /* the control input/output structure */ +) ; + +void KLU_lsolve +( + /* inputs, not modified: */ + Int n, + Int Lp [ ], + Int Li [ ], + Unit LU [ ], + Int nrhs, + /* right-hand-side on input, solution to Lx=b on output */ + Entry X [ ] +) ; + +void KLU_ltsolve +( + /* inputs, not modified: */ + Int n, + Int Lp [ ], + Int Li [ ], + Unit LU [ ], + Int nrhs, +#ifdef COMPLEX + Int conj_solve, +#endif + /* right-hand-side on input, solution to L'x=b on output */ + Entry X [ ] +) ; + + +void KLU_usolve +( + /* inputs, not modified: */ + Int n, + Int Up [ ], + Int Ui [ ], + Unit LU [ ], + Entry Udiag [ ], + Int nrhs, + /* right-hand-side on input, solution to Ux=b on output */ + Entry X [ ] +) ; + +void KLU_utsolve +( + /* inputs, not modified: */ + Int n, + Int Up [ ], + Int Ui [ ], + Unit LU [ ], + Entry Udiag [ ], + Int nrhs, +#ifdef COMPLEX + Int conj_solve, +#endif + /* right-hand-side on input, solution to U'x=b on output */ + Entry X [ ] +) ; + +Int KLU_valid +( + Int n, + Int Ap [ ], + Int Ai [ ], + Entry Ax [ ] +) ; + +Int KLU_valid_LU +( + Int n, + Int flag_test_start_ptr, + Int Xip [ ], + Int Xlen [ ], + Unit LU [ ] +); + +size_t KLU_add_size_t (size_t a, size_t b, Int *ok) ; + +size_t KLU_mult_size_t (size_t a, size_t k, Int *ok) ; + +KLU_symbolic *KLU_alloc_symbolic (Int n, Int *Ap, Int *Ai, KLU_common *Common) ; + +#endif diff --git a/src/maths/KLU/klu_kernel.c b/src/maths/KLU/klu_kernel.c new file mode 100644 index 000000000..bfdbe80fe --- /dev/null +++ b/src/maths/KLU/klu_kernel.c @@ -0,0 +1,1009 @@ +/* ========================================================================== */ +/* === KLU_kernel =========================================================== */ +/* ========================================================================== */ + +/* Sparse left-looking LU factorization, with partial pivoting. Based on + * Gilbert & Peierl's method, with a non-recursive DFS and with Eisenstat & + * Liu's symmetric pruning. No user-callable routines are in this file. + */ + +#include "klu_internal.h" + +/* ========================================================================== */ +/* === dfs ================================================================== */ +/* ========================================================================== */ + +/* Does a depth-first-search, starting at node j. */ + +static Int dfs +( + /* input, not modified on output: */ + Int j, /* node at which to start the DFS */ + Int k, /* mark value, for the Flag array */ + Int Pinv [ ], /* Pinv [i] = k if row i is kth pivot row, or EMPTY if + * row i is not yet pivotal. */ + Int Llen [ ], /* size n, Llen [k] = # nonzeros in column k of L */ + Int Lip [ ], /* size n, Lip [k] is position in LU of column k of L */ + + /* workspace, not defined on input or output */ + Int Stack [ ], /* size n */ + + /* input/output: */ + Int Flag [ ], /* Flag [i] == k means i is marked */ + Int Lpend [ ], /* for symmetric pruning */ + Int top, /* top of stack on input*/ + Unit LU [], + Int *Lik, /* Li row index array of the kth column */ + Int *plength, + + /* other, not defined on input or output */ + Int Ap_pos [ ] /* keeps track of position in adj list during DFS */ +) +{ + Int i, pos, jnew, head, l_length ; + Int *Li ; + + l_length = *plength ; + + head = 0 ; + Stack [0] = j ; + ASSERT (Flag [j] != k) ; + + while (head >= 0) + { + j = Stack [head] ; + jnew = Pinv [j] ; + ASSERT (jnew >= 0 && jnew < k) ; /* j is pivotal */ + + if (Flag [j] != k) /* a node is not yet visited */ + { + /* first time that j has been visited */ + Flag [j] = k ; + PRINTF (("[ start dfs at %d : new %d\n", j, jnew)) ; + /* set Ap_pos [head] to one past the last entry in col j to scan */ + Ap_pos [head] = + (Lpend [jnew] == EMPTY) ? Llen [jnew] : Lpend [jnew] ; + } + + /* add the adjacent nodes to the recursive stack by iterating through + * until finding another non-visited pivotal node */ + Li = (Int *) (LU + Lip [jnew]) ; + for (pos = --Ap_pos [head] ; pos >= 0 ; --pos) + { + i = Li [pos] ; + if (Flag [i] != k) + { + /* node i is not yet visited */ + if (Pinv [i] >= 0) + { + /* keep track of where we left off in the scan of the + * adjacency list of node j so we can restart j where we + * left off. */ + Ap_pos [head] = pos ; + + /* node i is pivotal; push it onto the recursive stack + * and immediately break so we can recurse on node i. */ + Stack [++head] = i ; + break ; + } + else + { + /* node i is not pivotal (no outgoing edges). */ + /* Flag as visited and store directly into L, + * and continue with current node j. */ + Flag [i] = k ; + Lik [l_length] = i ; + l_length++ ; + } + } + } + + if (pos == -1) + { + /* if all adjacent nodes of j are already visited, pop j from + * recursive stack and push j onto output stack */ + head-- ; + Stack[--top] = j ; + PRINTF ((" end dfs at %d ] head : %d\n", j, head)) ; + } + } + + *plength = l_length ; + return (top) ; +} + + +/* ========================================================================== */ +/* === lsolve_symbolic ====================================================== */ +/* ========================================================================== */ + +/* Finds the pattern of x, for the solution of Lx=b */ + +static Int lsolve_symbolic +( + /* input, not modified on output: */ + Int n, /* L is n-by-n, where n >= 0 */ + Int k, /* also used as the mark value, for the Flag array */ + Int Ap [ ], + Int Ai [ ], + Int Q [ ], + Int Pinv [ ], /* Pinv [i] = k if i is kth pivot row, or EMPTY if row i + * is not yet pivotal. */ + + /* workspace, not defined on input or output */ + Int Stack [ ], /* size n */ + + /* workspace, defined on input and output */ + Int Flag [ ], /* size n. Initially, all of Flag [0..n-1] < k. After + * lsolve_symbolic is done, Flag [i] == k if i is in + * the pattern of the output, and Flag [0..n-1] <= k. */ + + /* other */ + Int Lpend [ ], /* for symmetric pruning */ + Int Ap_pos [ ], /* workspace used in dfs */ + + Unit LU [ ], /* LU factors (pattern and values) */ + Int lup, /* pointer to free space in LU */ + Int Llen [ ], /* size n, Llen [k] = # nonzeros in column k of L */ + Int Lip [ ], /* size n, Lip [k] is position in LU of column k of L */ + + /* ---- the following are only used in the BTF case --- */ + + Int k1, /* the block of A is from k1 to k2-1 */ + Int PSinv [ ] /* inverse of P from symbolic factorization */ +) +{ + Int *Lik ; + Int i, p, pend, oldcol, kglobal, top, l_length ; + + top = n ; + l_length = 0 ; + Lik = (Int *) (LU + lup); + + /* ---------------------------------------------------------------------- */ + /* BTF factorization of A (k1:k2-1, k1:k2-1) */ + /* ---------------------------------------------------------------------- */ + + kglobal = k + k1 ; /* column k of the block is col kglobal of A */ + oldcol = Q [kglobal] ; /* Q must be present for BTF case */ + pend = Ap [oldcol+1] ; + for (p = Ap [oldcol] ; p < pend ; p++) + { + i = PSinv [Ai [p]] - k1 ; + if (i < 0) continue ; /* skip entry outside the block */ + + /* (i,k) is an entry in the block. start a DFS at node i */ + PRINTF (("\n ===== DFS at node %d in b, inew: %d\n", i, Pinv [i])) ; + if (Flag [i] != k) + { + if (Pinv [i] >= 0) + { + top = dfs (i, k, Pinv, Llen, Lip, Stack, Flag, + Lpend, top, LU, Lik, &l_length, Ap_pos) ; + } + else + { + /* i is not pivotal, and not flagged. Flag and put in L */ + Flag [i] = k ; + Lik [l_length] = i ; + l_length++; + } + } + } + + /* If Llen [k] is zero, the matrix is structurally singular */ + Llen [k] = l_length ; + return (top) ; +} + + +/* ========================================================================== */ +/* === construct_column ===================================================== */ +/* ========================================================================== */ + +/* Construct the kth column of A, and the off-diagonal part, if requested. + * Scatter the numerical values into the workspace X, and construct the + * corresponding column of the off-diagonal matrix. */ + +static void construct_column +( + /* inputs, not modified on output */ + Int k, /* the column of A (or the column of the block) to get */ + Int Ap [ ], + Int Ai [ ], + Entry Ax [ ], + Int Q [ ], /* column pre-ordering */ + + /* zero on input, modified on output */ + Entry X [ ], + + /* ---- the following are only used in the BTF case --- */ + + /* inputs, not modified on output */ + Int k1, /* the block of A is from k1 to k2-1 */ + Int PSinv [ ], /* inverse of P from symbolic factorization */ + double Rs [ ], /* scale factors for A */ + Int scale, /* 0: no scaling, nonzero: scale the rows with Rs */ + + /* inputs, modified on output */ + Int Offp [ ], /* off-diagonal matrix (modified by this routine) */ + Int Offi [ ], + Entry Offx [ ] +) +{ + Entry aik ; + Int i, p, pend, oldcol, kglobal, poff, oldrow ; + + /* ---------------------------------------------------------------------- */ + /* Scale and scatter the column into X. */ + /* ---------------------------------------------------------------------- */ + + kglobal = k + k1 ; /* column k of the block is col kglobal of A */ + poff = Offp [kglobal] ; /* start of off-diagonal column */ + oldcol = Q [kglobal] ; + pend = Ap [oldcol+1] ; + + if (scale <= 0) + { + /* no scaling */ + for (p = Ap [oldcol] ; p < pend ; p++) + { + oldrow = Ai [p] ; + i = PSinv [oldrow] - k1 ; + aik = Ax [p] ; + if (i < 0) + { + /* this is an entry in the off-diagonal part */ + Offi [poff] = oldrow ; + Offx [poff] = aik ; + poff++ ; + } + else + { + /* (i,k) is an entry in the block. scatter into X */ + X [i] = aik ; + } + } + } + else + { + /* row scaling */ + for (p = Ap [oldcol] ; p < pend ; p++) + { + oldrow = Ai [p] ; + i = PSinv [oldrow] - k1 ; + aik = Ax [p] ; + SCALE_DIV (aik, Rs [oldrow]) ; + if (i < 0) + { + /* this is an entry in the off-diagonal part */ + Offi [poff] = oldrow ; + Offx [poff] = aik ; + poff++ ; + } + else + { + /* (i,k) is an entry in the block. scatter into X */ + X [i] = aik ; + } + } + } + + Offp [kglobal+1] = poff ; /* start of the next col of off-diag part */ +} + + +/* ========================================================================== */ +/* === lsolve_numeric ======================================================= */ +/* ========================================================================== */ + +/* Computes the numerical values of x, for the solution of Lx=b. Note that x + * may include explicit zeros if numerical cancelation occurs. L is assumed + * to be unit-diagonal, with possibly unsorted columns (but the first entry in + * the column must always be the diagonal entry). */ + +static void lsolve_numeric +( + /* input, not modified on output: */ + Int Pinv [ ], /* Pinv [i] = k if i is kth pivot row, or EMPTY if row i + * is not yet pivotal. */ + Unit *LU, /* LU factors (pattern and values) */ + Int Stack [ ], /* stack for dfs */ + Int Lip [ ], /* size n, Lip [k] is position in LU of column k of L */ + Int top, /* top of stack on input */ + Int n, /* A is n-by-n */ + Int Llen [ ], /* size n, Llen [k] = # nonzeros in column k of L */ + + /* output, must be zero on input: */ + Entry X [ ] /* size n, initially zero. On output, + * X [Ui [up1..up-1]] and X [Li [lp1..lp-1]] + * contains the solution. */ + +) +{ + Entry xj ; + Entry *Lx ; + Int *Li ; + Int p, s, j, jnew, len ; + + /* solve Lx=b */ + for (s = top ; s < n ; s++) + { + /* forward solve with column j of L */ + j = Stack [s] ; + jnew = Pinv [j] ; + ASSERT (jnew >= 0) ; + xj = X [j] ; + GET_POINTER (LU, Lip, Llen, Li, Lx, jnew, len) ; + ASSERT (Lip [jnew] <= Lip [jnew+1]) ; + for (p = 0 ; p < len ; p++) + { + /*X [Li [p]] -= Lx [p] * xj ; */ + MULT_SUB (X [Li [p]], Lx [p], xj) ; + } + } +} + + +/* ========================================================================== */ +/* === lpivot =============================================================== */ +/* ========================================================================== */ + +/* Find a pivot via partial pivoting, and scale the column of L. */ + +static Int lpivot +( + Int diagrow, + Int *p_pivrow, + Entry *p_pivot, + double *p_abs_pivot, + double tol, + Entry X [ ], + Unit *LU, /* LU factors (pattern and values) */ + Int Lip [ ], + Int Llen [ ], + Int k, + Int n, + + Int Pinv [ ], /* Pinv [i] = k if row i is kth pivot row, or EMPTY if + * row i is not yet pivotal. */ + + Int *p_firstrow, + KLU_common *Common +) +{ + Entry x, pivot, *Lx ; + double abs_pivot, xabs ; + Int p, i, ppivrow, pdiag, pivrow, *Li, last_row_index, firstrow, len ; + + pivrow = EMPTY ; + if (Llen [k] == 0) + { + /* matrix is structurally singular */ + if (Common->halt_if_singular) + { + return (FALSE) ; + } + for (firstrow = *p_firstrow ; firstrow < n ; firstrow++) + { + PRINTF (("check %d\n", firstrow)) ; + if (Pinv [firstrow] < 0) + { + /* found the lowest-numbered non-pivotal row. Pick it. */ + pivrow = firstrow ; + PRINTF (("Got pivotal row: %d\n", pivrow)) ; + break ; + } + } + ASSERT (pivrow >= 0 && pivrow < n) ; + CLEAR (pivot) ; + *p_pivrow = pivrow ; + *p_pivot = pivot ; + *p_abs_pivot = 0 ; + *p_firstrow = firstrow ; + return (FALSE) ; + } + + pdiag = EMPTY ; + ppivrow = EMPTY ; + abs_pivot = EMPTY ; + i = Llen [k] - 1 ; + GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ; + last_row_index = Li [i] ; + + /* decrement the length by 1 */ + Llen [k] = i ; + GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ; + + /* look in Li [0 ..Llen [k] - 1 ] for a pivot row */ + for (p = 0 ; p < len ; p++) + { + /* gather the entry from X and store in L */ + i = Li [p] ; + x = X [i] ; + CLEAR (X [i]) ; + + Lx [p] = x ; + /* xabs = ABS (x) ; */ + ABS (xabs, x) ; + + /* find the diagonal */ + if (i == diagrow) + { + pdiag = p ; + } + + /* find the partial-pivoting choice */ + if (xabs > abs_pivot) + { + abs_pivot = xabs ; + ppivrow = p ; + } + } + + /* xabs = ABS (X [last_row_index]) ;*/ + ABS (xabs, X [last_row_index]) ; + if (xabs > abs_pivot) + { + abs_pivot = xabs ; + ppivrow = EMPTY ; + } + + /* compare the diagonal with the largest entry */ + if (last_row_index == diagrow) + { + if (xabs >= tol * abs_pivot) + { + abs_pivot = xabs ; + ppivrow = EMPTY ; + } + } + else if (pdiag != EMPTY) + { + /* xabs = ABS (Lx [pdiag]) ;*/ + ABS (xabs, Lx [pdiag]) ; + if (xabs >= tol * abs_pivot) + { + /* the diagonal is large enough */ + abs_pivot = xabs ; + ppivrow = pdiag ; + } + } + + if (ppivrow != EMPTY) + { + pivrow = Li [ppivrow] ; + pivot = Lx [ppivrow] ; + /* overwrite the ppivrow values with last index values */ + Li [ppivrow] = last_row_index ; + Lx [ppivrow] = X [last_row_index] ; + } + else + { + pivrow = last_row_index ; + pivot = X [last_row_index] ; + } + CLEAR (X [last_row_index]) ; + + *p_pivrow = pivrow ; + *p_pivot = pivot ; + *p_abs_pivot = abs_pivot ; + ASSERT (pivrow >= 0 && pivrow < n) ; + + if (IS_ZERO (pivot) && Common->halt_if_singular) + { + /* numerically singular case */ + return (FALSE) ; + } + + /* divide L by the pivot value */ + for (p = 0 ; p < Llen [k] ; p++) + { + /* Lx [p] /= pivot ; */ + DIV (Lx [p], Lx [p], pivot) ; + } + + return (TRUE) ; +} + + +/* ========================================================================== */ +/* === prune ================================================================ */ +/* ========================================================================== */ + +/* Prune the columns of L to reduce work in subsequent depth-first searches */ +static void prune +( + /* input/output: */ + Int Lpend [ ], /* Lpend [j] marks symmetric pruning point for L(:,j) */ + + /* input: */ + Int Pinv [ ], /* Pinv [i] = k if row i is kth pivot row, or EMPTY if + * row i is not yet pivotal. */ + Int k, /* prune using column k of U */ + Int pivrow, /* current pivot row */ + + /* input/output: */ + Unit *LU, /* LU factors (pattern and values) */ + + /* input */ + Int Uip [ ], /* size n, column pointers for U */ + Int Lip [ ], /* size n, column pointers for L */ + Int Ulen [ ], /* size n, column length of U */ + Int Llen [ ] /* size n, column length of L */ +) +{ + Entry x ; + Entry *Lx, *Ux ; + Int *Li, *Ui ; + Int p, i, j, p2, phead, ptail, llen, ulen ; + + /* check to see if any column of L can be pruned */ + GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, ulen) ; + for (p = 0 ; p < ulen ; p++) + { + j = Ui [p] ; + ASSERT (j < k) ; + PRINTF (("%d is pruned: %d. Lpend[j] %d Lip[j+1] %d\n", + j, Lpend [j] != EMPTY, Lpend [j], Lip [j+1])) ; + if (Lpend [j] == EMPTY) + { + /* scan column j of L for the pivot row */ + GET_POINTER (LU, Lip, Llen, Li, Lx, j, llen) ; + for (p2 = 0 ; p2 < llen ; p2++) + { + if (pivrow == Li [p2]) + { + /* found it! This column can be pruned */ +#ifndef NDEBUG + PRINTF (("==== PRUNE: col j %d of L\n", j)) ; + { + Int p3 ; + for (p3 = 0 ; p3 < Llen [j] ; p3++) + { + PRINTF (("before: %i pivotal: %d\n", Li [p3], + Pinv [Li [p3]] >= 0)) ; + } + } +#endif + + /* partition column j of L. The unit diagonal of L + * is not stored in the column of L. */ + phead = 0 ; + ptail = Llen [j] ; + while (phead < ptail) + { + i = Li [phead] ; + if (Pinv [i] >= 0) + { + /* leave at the head */ + phead++ ; + } + else + { + /* swap with the tail */ + ptail-- ; + Li [phead] = Li [ptail] ; + Li [ptail] = i ; + x = Lx [phead] ; + Lx [phead] = Lx [ptail] ; + Lx [ptail] = x ; + } + } + + /* set Lpend to one past the last entry in the + * first part of the column of L. Entries in + * Li [0 ... Lpend [j]-1] are the only part of + * column j of L that needs to be scanned in the DFS. + * Lpend [j] was EMPTY; setting it >= 0 also flags + * column j as pruned. */ + Lpend [j] = ptail ; + +#ifndef NDEBUG + { + Int p3 ; + for (p3 = 0 ; p3 < Llen [j] ; p3++) + { + if (p3 == Lpend [j]) PRINTF (("----\n")) ; + PRINTF (("after: %i pivotal: %d\n", Li [p3], + Pinv [Li [p3]] >= 0)) ; + } + } +#endif + + break ; + } + } + } + } +} + + +/* ========================================================================== */ +/* === KLU_kernel =========================================================== */ +/* ========================================================================== */ + +size_t KLU_kernel /* final size of LU on output */ +( + /* input, not modified */ + Int n, /* A is n-by-n */ + Int Ap [ ], /* size n+1, column pointers for A */ + Int Ai [ ], /* size nz = Ap [n], row indices for A */ + Entry Ax [ ], /* size nz, values of A */ + Int Q [ ], /* size n, optional input permutation */ + size_t lusize, /* initial size of LU on input */ + + /* output, not defined on input */ + Int Pinv [ ], /* size n, inverse row permutation, where Pinv [i] = k if + * row i is the kth pivot row */ + Int P [ ], /* size n, row permutation, where P [k] = i if row i is the + * kth pivot row. */ + Unit **p_LU, /* LU array, size lusize on input */ + Entry Udiag [ ], /* size n, diagonal of U */ + Int Llen [ ], /* size n, column length of L */ + Int Ulen [ ], /* size n, column length of U */ + Int Lip [ ], /* size n, column pointers for L */ + Int Uip [ ], /* size n, column pointers for U */ + Int *lnz, /* size of L*/ + Int *unz, /* size of U*/ + /* workspace, not defined on input */ + Entry X [ ], /* size n, undefined on input, zero on output */ + + /* workspace, not defined on input or output */ + Int Stack [ ], /* size n */ + Int Flag [ ], /* size n */ + Int Ap_pos [ ], /* size n */ + + /* other workspace: */ + Int Lpend [ ], /* size n workspace, for pruning only */ + + /* inputs, not modified on output */ + Int k1, /* the block of A is from k1 to k2-1 */ + Int PSinv [ ], /* inverse of P from symbolic factorization */ + double Rs [ ], /* scale factors for A */ + + /* inputs, modified on output */ + Int Offp [ ], /* off-diagonal matrix (modified by this routine) */ + Int Offi [ ], + Entry Offx [ ], + /* --------------- */ + KLU_common *Common +) +{ + Entry pivot ; + double abs_pivot, xsize, nunits, tol, memgrow ; + Entry *Ux ; + Int *Li, *Ui ; + Unit *LU ; /* LU factors (pattern and values) */ + Int k, p, i, j, pivrow = 0, kbar, diagrow, firstrow, lup, top, scale, len ; + size_t newlusize ; + +#ifndef NDEBUG + Entry *Lx ; +#endif + + ASSERT (Common != NULL) ; + scale = Common->scale ; + tol = Common->tol ; + memgrow = Common->memgrow ; + *lnz = 0 ; + *unz = 0 ; + CLEAR (pivot) ; + + /* ---------------------------------------------------------------------- */ + /* get initial Li, Lx, Ui, and Ux */ + /* ---------------------------------------------------------------------- */ + + PRINTF (("input: lusize %d \n", lusize)) ; + ASSERT (lusize > 0) ; + LU = *p_LU ; + + /* ---------------------------------------------------------------------- */ + /* initializations */ + /* ---------------------------------------------------------------------- */ + + firstrow = 0 ; + lup = 0 ; + + for (k = 0 ; k < n ; k++) + { + /* X [k] = 0 ; */ + CLEAR (X [k]) ; + Flag [k] = EMPTY ; + Lpend [k] = EMPTY ; /* flag k as not pruned */ + } + + /* ---------------------------------------------------------------------- */ + /* mark all rows as non-pivotal and determine initial diagonal mapping */ + /* ---------------------------------------------------------------------- */ + + /* PSinv does the symmetric permutation, so don't do it here */ + for (k = 0 ; k < n ; k++) + { + P [k] = k ; + Pinv [k] = FLIP (k) ; /* mark all rows as non-pivotal */ + } + /* initialize the construction of the off-diagonal matrix */ + Offp [0] = 0 ; + + /* P [k] = row means that UNFLIP (Pinv [row]) = k, and visa versa. + * If row is pivotal, then Pinv [row] >= 0. A row is initially "flipped" + * (Pinv [k] < EMPTY), and then marked "unflipped" when it becomes + * pivotal. */ + +#ifndef NDEBUG + for (k = 0 ; k < n ; k++) + { + PRINTF (("Initial P [%d] = %d\n", k, P [k])) ; + } +#endif + + /* ---------------------------------------------------------------------- */ + /* factorize */ + /* ---------------------------------------------------------------------- */ + + for (k = 0 ; k < n ; k++) + { + + PRINTF (("\n\n==================================== k: %d\n", k)) ; + + /* ------------------------------------------------------------------ */ + /* determine if LU factors have grown too big */ + /* ------------------------------------------------------------------ */ + + /* (n - k) entries for L and k entries for U */ + nunits = DUNITS (Int, n - k) + DUNITS (Int, k) + + DUNITS (Entry, n - k) + DUNITS (Entry, k) ; + + /* LU can grow by at most 'nunits' entries if the column is dense */ + PRINTF (("lup %d lusize %g lup+nunits: %g\n", lup, (double) lusize, + lup+nunits)); + xsize = ((double) lup) + nunits ; + if (xsize > (double) lusize) + { + /* check here how much to grow */ + xsize = (memgrow * ((double) lusize) + 4*n + 1) ; + if (INT_OVERFLOW (xsize)) + { + PRINTF (("Matrix is too large (Int overflow)\n")) ; + Common->status = KLU_TOO_LARGE ; + return (lusize) ; + } + newlusize = memgrow * lusize + 2*n + 1 ; + /* Future work: retry mechanism in case of malloc failure */ + LU = KLU_realloc (newlusize, lusize, sizeof (Unit), LU, Common) ; + Common->nrealloc++ ; + *p_LU = LU ; + if (Common->status == KLU_OUT_OF_MEMORY) + { + PRINTF (("Matrix is too large (LU)\n")) ; + return (lusize) ; + } + lusize = newlusize ; + PRINTF (("inc LU to %d done\n", lusize)) ; + } + + /* ------------------------------------------------------------------ */ + /* start the kth column of L and U */ + /* ------------------------------------------------------------------ */ + + Lip [k] = lup ; + + /* ------------------------------------------------------------------ */ + /* compute the nonzero pattern of the kth column of L and U */ + /* ------------------------------------------------------------------ */ + +#ifndef NDEBUG + for (i = 0 ; i < n ; i++) + { + ASSERT (Flag [i] < k) ; + /* ASSERT (X [i] == 0) ; */ + ASSERT (IS_ZERO (X [i])) ; + } +#endif + + top = lsolve_symbolic (n, k, Ap, Ai, Q, Pinv, Stack, Flag, + Lpend, Ap_pos, LU, lup, Llen, Lip, k1, PSinv) ; + +#ifndef NDEBUG + PRINTF (("--- in U:\n")) ; + for (p = top ; p < n ; p++) + { + PRINTF (("pattern of X for U: %d : %d pivot row: %d\n", + p, Stack [p], Pinv [Stack [p]])) ; + ASSERT (Flag [Stack [p]] == k) ; + } + PRINTF (("--- in L:\n")) ; + Li = (Int *) (LU + Lip [k]); + for (p = 0 ; p < Llen [k] ; p++) + { + PRINTF (("pattern of X in L: %d : %d pivot row: %d\n", + p, Li [p], Pinv [Li [p]])) ; + ASSERT (Flag [Li [p]] == k) ; + } + p = 0 ; + for (i = 0 ; i < n ; i++) + { + ASSERT (Flag [i] <= k) ; + if (Flag [i] == k) p++ ; + } +#endif + + /* ------------------------------------------------------------------ */ + /* get the column of the matrix to factorize and scatter into X */ + /* ------------------------------------------------------------------ */ + + construct_column (k, Ap, Ai, Ax, Q, X, + k1, PSinv, Rs, scale, Offp, Offi, Offx) ; + + /* ------------------------------------------------------------------ */ + /* compute the numerical values of the kth column (s = L \ A (:,k)) */ + /* ------------------------------------------------------------------ */ + + lsolve_numeric (Pinv, LU, Stack, Lip, top, n, Llen, X) ; + +#ifndef NDEBUG + for (p = top ; p < n ; p++) + { + PRINTF (("X for U %d : ", Stack [p])) ; + PRINT_ENTRY (X [Stack [p]]) ; + } + Li = (Int *) (LU + Lip [k]) ; + for (p = 0 ; p < Llen [k] ; p++) + { + PRINTF (("X for L %d : ", Li [p])) ; + PRINT_ENTRY (X [Li [p]]) ; + } +#endif + + /* ------------------------------------------------------------------ */ + /* partial pivoting with diagonal preference */ + /* ------------------------------------------------------------------ */ + + /* determine what the "diagonal" is */ + diagrow = P [k] ; /* might already be pivotal */ + PRINTF (("k %d, diagrow = %d, UNFLIP (diagrow) = %d\n", + k, diagrow, UNFLIP (diagrow))) ; + + /* find a pivot and scale the pivot column */ + if (!lpivot (diagrow, &pivrow, &pivot, &abs_pivot, tol, X, LU, Lip, + Llen, k, n, Pinv, &firstrow, Common)) + { + /* matrix is structurally or numerically singular */ + Common->status = KLU_SINGULAR ; + if (Common->numerical_rank == EMPTY) + { + Common->numerical_rank = k+k1 ; + Common->singular_col = Q [k+k1] ; + } + if (Common->halt_if_singular) + { + /* do not continue the factorization */ + return (lusize) ; + } + } + + /* we now have a valid pivot row, even if the column has NaN's or + * has no entries on or below the diagonal at all. */ + PRINTF (("\nk %d : Pivot row %d : ", k, pivrow)) ; + PRINT_ENTRY (pivot) ; + ASSERT (pivrow >= 0 && pivrow < n) ; + ASSERT (Pinv [pivrow] < 0) ; + + /* set the Uip pointer */ + Uip [k] = Lip [k] + UNITS (Int, Llen [k]) + UNITS (Entry, Llen [k]) ; + + /* move the lup pointer to the position where indices of U + * should be stored */ + lup += UNITS (Int, Llen [k]) + UNITS (Entry, Llen [k]) ; + + Ulen [k] = n - top ; + + /* extract Stack [top..n-1] to Ui and the values to Ux and clear X */ + GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ; + for (p = top, i = 0 ; p < n ; p++, i++) + { + j = Stack [p] ; + Ui [i] = Pinv [j] ; + Ux [i] = X [j] ; + CLEAR (X [j]) ; + } + + /* position the lu index at the starting point for next column */ + lup += UNITS (Int, Ulen [k]) + UNITS (Entry, Ulen [k]) ; + + /* U(k,k) = pivot */ + Udiag [k] = pivot ; + + /* ------------------------------------------------------------------ */ + /* log the pivot permutation */ + /* ------------------------------------------------------------------ */ + + ASSERT (UNFLIP (Pinv [diagrow]) < n) ; + ASSERT (P [UNFLIP (Pinv [diagrow])] == diagrow) ; + + if (pivrow != diagrow) + { + /* an off-diagonal pivot has been chosen */ + Common->noffdiag++ ; + PRINTF ((">>>>>>>>>>>>>>>>> pivrow %d k %d off-diagonal\n", + pivrow, k)) ; + if (Pinv [diagrow] < 0) + { + /* the former diagonal row index, diagrow, has not yet been + * chosen as a pivot row. Log this diagrow as the "diagonal" + * entry in the column kbar for which the chosen pivot row, + * pivrow, was originally logged as the "diagonal" */ + kbar = FLIP (Pinv [pivrow]) ; + P [kbar] = diagrow ; + Pinv [diagrow] = FLIP (kbar) ; + } + } + P [k] = pivrow ; + Pinv [pivrow] = k ; + +#ifndef NDEBUG + for (i = 0 ; i < n ; i++) { ASSERT (IS_ZERO (X [i])) ;} + GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ; + for (p = 0 ; p < len ; p++) + { + PRINTF (("Column %d of U: %d : ", k, Ui [p])) ; + PRINT_ENTRY (Ux [p]) ; + } + GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ; + for (p = 0 ; p < len ; p++) + { + PRINTF (("Column %d of L: %d : ", k, Li [p])) ; + PRINT_ENTRY (Lx [p]) ; + } +#endif + + /* ------------------------------------------------------------------ */ + /* symmetric pruning */ + /* ------------------------------------------------------------------ */ + + prune (Lpend, Pinv, k, pivrow, LU, Uip, Lip, Ulen, Llen) ; + + *lnz += Llen [k] + 1 ; /* 1 added to lnz for diagonal */ + *unz += Ulen [k] + 1 ; /* 1 added to unz for diagonal */ + } + + /* ---------------------------------------------------------------------- */ + /* finalize column pointers for L and U, and put L in the pivotal order */ + /* ---------------------------------------------------------------------- */ + + for (p = 0 ; p < n ; p++) + { + Li = (Int *) (LU + Lip [p]) ; + for (i = 0 ; i < Llen [p] ; i++) + { + Li [i] = Pinv [Li [i]] ; + } + } + +#ifndef NDEBUG + for (i = 0 ; i < n ; i++) + { + PRINTF (("P [%d] = %d Pinv [%d] = %d\n", i, P [i], i, Pinv [i])) ; + } + for (i = 0 ; i < n ; i++) + { + ASSERT (Pinv [i] >= 0 && Pinv [i] < n) ; + ASSERT (P [i] >= 0 && P [i] < n) ; + ASSERT (P [Pinv [i]] == i) ; + ASSERT (IS_ZERO (X [i])) ; + } +#endif + + /* ---------------------------------------------------------------------- */ + /* shrink the LU factors to just the required size */ + /* ---------------------------------------------------------------------- */ + + newlusize = lup ; + ASSERT ((size_t) newlusize <= lusize) ; + + /* this cannot fail, since the block is descreasing in size */ + LU = KLU_realloc (newlusize, lusize, sizeof (Unit), LU, Common) ; + *p_LU = LU ; + return (newlusize) ; +} diff --git a/src/maths/KLU/klu_memory.c b/src/maths/KLU/klu_memory.c new file mode 100644 index 000000000..6359941ad --- /dev/null +++ b/src/maths/KLU/klu_memory.c @@ -0,0 +1,225 @@ +/* ========================================================================== */ +/* === KLU_memory =========================================================== */ +/* ========================================================================== */ + +/* KLU memory management routines: + * + * KLU_malloc malloc wrapper + * KLU_free free wrapper + * KLU_realloc realloc wrapper + */ + +#include "klu_internal.h" + +/* ========================================================================== */ +/* === KLU_add_size_t ======================================================= */ +/* ========================================================================== */ + +/* Safely compute a+b, and check for size_t overflow */ + +size_t KLU_add_size_t (size_t a, size_t b, Int *ok) +{ + (*ok) = (*ok) && ((a + b) >= MAX (a,b)) ; + return ((*ok) ? (a + b) : ((size_t) -1)) ; +} + +/* ========================================================================== */ +/* === KLU_mult_size_t ====================================================== */ +/* ========================================================================== */ + +/* Safely compute a*k, where k should be small, and check for size_t overflow */ + +size_t KLU_mult_size_t (size_t a, size_t k, Int *ok) +{ + size_t i, s = 0 ; + for (i = 0 ; i < k ; i++) + { + s = KLU_add_size_t (s, a, ok) ; + } + return ((*ok) ? s : ((size_t) -1)) ; +} + +/* ========================================================================== */ +/* === KLU_malloc =========================================================== */ +/* ========================================================================== */ + +/* Wrapper around malloc routine (mxMalloc for a mexFunction). Allocates + * space of size MAX(1,n)*size, where size is normally a sizeof (...). + * + * This routine and KLU_realloc do not set Common->status to KLU_OK on success, + * so that a sequence of KLU_malloc's or KLU_realloc's can be used. If any of + * them fails, the Common->status will hold the most recent error status. + * + * Usage, for a pointer to Int: + * + * p = KLU_malloc (n, sizeof (Int), Common) + * + * Uses a pointer to the malloc routine (or its equivalent) defined in Common. + */ + +void *KLU_malloc /* returns pointer to the newly malloc'd block */ +( + /* ---- input ---- */ + size_t n, /* number of items */ + size_t size, /* size of each item */ + /* --------------- */ + KLU_common *Common +) +{ + void *p ; + size_t s ; + Int ok = TRUE ; + + if (Common == NULL) + { + p = NULL ; + } + else if (size == 0) + { + /* size must be > 0 */ + Common->status = KLU_INVALID ; + p = NULL ; + } + else if (n >= INT_MAX) + { + /* object is too big to allocate; p[i] where i is an Int will not + * be enough. */ + Common->status = KLU_TOO_LARGE ; + p = NULL ; + } + else + { + /* call malloc, or its equivalent */ + s = KLU_mult_size_t (MAX (1,n), size, &ok) ; + p = ok ? ((Common->malloc_memory) (s)) : NULL ; + if (p == NULL) + { + /* failure: out of memory */ + Common->status = KLU_OUT_OF_MEMORY ; + } + else + { + Common->memusage += s ; + Common->mempeak = MAX (Common->mempeak, Common->memusage) ; + } + } + return (p) ; +} + + +/* ========================================================================== */ +/* === KLU_free ============================================================= */ +/* ========================================================================== */ + +/* Wrapper around free routine (mxFree for a mexFunction). Returns NULL, + * which can be assigned to the pointer being freed, as in: + * + * p = KLU_free (p, n, sizeof (int), Common) ; + */ + +void *KLU_free /* always returns NULL */ +( + /* ---- in/out --- */ + void *p, /* block of memory to free */ + /* ---- input --- */ + size_t n, /* size of block to free, in # of items */ + size_t size, /* size of each item */ + /* --------------- */ + KLU_common *Common +) +{ + size_t s ; + Int ok = TRUE ; + if (p != NULL && Common != NULL) + { + /* only free the object if the pointer is not NULL */ + /* call free, or its equivalent */ + (Common->free_memory) (p) ; + s = KLU_mult_size_t (MAX (1,n), size, &ok) ; + Common->memusage -= s ; + } + /* return NULL, and the caller should assign this to p. This avoids + * freeing the same pointer twice. */ + return (NULL) ; +} + + +/* ========================================================================== */ +/* === KLU_realloc ========================================================== */ +/* ========================================================================== */ + +/* Wrapper around realloc routine (mxRealloc for a mexFunction). Given a + * pointer p to a block allocated by KLU_malloc, it changes the size of the + * block pointed to by p to be MAX(1,nnew)*size in size. It may return a + * pointer different than p. This should be used as (for a pointer to Int): + * + * p = KLU_realloc (nnew, nold, sizeof (Int), p, Common) ; + * + * If p is NULL, this is the same as p = KLU_malloc (...). + * A size of nnew=0 is treated as nnew=1. + * + * If the realloc fails, p is returned unchanged and Common->status is set + * to KLU_OUT_OF_MEMORY. If successful, Common->status is not modified, + * and p is returned (possibly changed) and pointing to a large block of memory. + * + * Uses a pointer to the realloc routine (or its equivalent) defined in Common. + */ + +void *KLU_realloc /* returns pointer to reallocated block */ +( + /* ---- input ---- */ + size_t nnew, /* requested # of items in reallocated block */ + size_t nold, /* old # of items */ + size_t size, /* size of each item */ + /* ---- in/out --- */ + void *p, /* block of memory to realloc */ + /* --------------- */ + KLU_common *Common +) +{ + void *pnew ; + size_t snew, sold ; + Int ok = TRUE ; + + if (Common == NULL) + { + p = NULL ; + } + else if (size == 0) + { + /* size must be > 0 */ + Common->status = KLU_INVALID ; + p = NULL ; + } + else if (p == NULL) + { + /* A fresh object is being allocated. */ + p = KLU_malloc (nnew, size, Common) ; + } + else if (nnew >= INT_MAX) + { + /* failure: nnew is too big. Do not change p */ + Common->status = KLU_TOO_LARGE ; + } + else + { + /* The object exists, and is changing to some other nonzero size. */ + /* call realloc, or its equivalent */ + snew = KLU_mult_size_t (MAX (1,nnew), size, &ok) ; + sold = KLU_mult_size_t (MAX (1,nold), size, &ok) ; + pnew = ok ? ((Common->realloc_memory) (p, snew)) : NULL ; + if (pnew == NULL) + { + /* Do not change p, since it still points to allocated memory */ + Common->status = KLU_OUT_OF_MEMORY ; + } + else + { + /* success: return the new p and change the size of the block */ + Common->memusage += (snew - sold) ; + Common->mempeak = MAX (Common->mempeak, Common->memusage) ; + p = pnew ; + } + } + return (p) ; +} diff --git a/src/maths/KLU/klu_refactor.c b/src/maths/KLU/klu_refactor.c new file mode 100644 index 000000000..7eb6fe8a9 --- /dev/null +++ b/src/maths/KLU/klu_refactor.c @@ -0,0 +1,478 @@ +/* ========================================================================== */ +/* === KLU_refactor ========================================================= */ +/* ========================================================================== */ + +/* Factor the matrix, after ordering and analyzing it with KLU_analyze, and + * factoring it once with KLU_factor. This routine cannot do any numerical + * pivoting. The pattern of the input matrix (Ap, Ai) must be identical to + * the pattern given to KLU_factor. + */ + +#include "klu_internal.h" + + +/* ========================================================================== */ +/* === KLU_refactor ========================================================= */ +/* ========================================================================== */ + +Int KLU_refactor /* returns TRUE if successful, FALSE otherwise */ +( + /* inputs, not modified */ + Int Ap [ ], /* size n+1, column pointers */ + Int Ai [ ], /* size nz, row indices */ + double Ax [ ], + KLU_symbolic *Symbolic, + + /* input/output */ + KLU_numeric *Numeric, + KLU_common *Common +) +{ + Entry ukk, ujk, s ; + Entry *Offx, *Lx, *Ux, *X, *Az, *Udiag ; + double *Rs ; + Int *P, *Q, *R, *Pnum, *Offp, *Offi, *Ui, *Li, *Pinv, *Lip, *Uip, *Llen, + *Ulen ; + Unit **LUbx ; + Unit *LU ; + Int k1, k2, nk, k, block, oldcol, pend, oldrow, n, p, newrow, scale, + nblocks, poff, i, j, up, ulen, llen, maxblock, nzoff ; + + /* ---------------------------------------------------------------------- */ + /* check inputs */ + /* ---------------------------------------------------------------------- */ + + if (Common == NULL) + { + return (FALSE) ; + } + Common->status = KLU_OK ; + + if (Numeric == NULL) + { + /* invalid Numeric object */ + Common->status = KLU_INVALID ; + return (FALSE) ; + } + + Common->numerical_rank = EMPTY ; + Common->singular_col = EMPTY ; + + Az = (Entry *) Ax ; + + /* ---------------------------------------------------------------------- */ + /* get the contents of the Symbolic object */ + /* ---------------------------------------------------------------------- */ + + n = Symbolic->n ; + P = Symbolic->P ; + Q = Symbolic->Q ; + R = Symbolic->R ; + nblocks = Symbolic->nblocks ; + maxblock = Symbolic->maxblock ; + + /* ---------------------------------------------------------------------- */ + /* get the contents of the Numeric object */ + /* ---------------------------------------------------------------------- */ + + Pnum = Numeric->Pnum ; + Offp = Numeric->Offp ; + Offi = Numeric->Offi ; + Offx = (Entry *) Numeric->Offx ; + + LUbx = (Unit **) Numeric->LUbx ; + + scale = Common->scale ; + if (scale > 0) + { + /* factorization was not scaled, but refactorization is scaled */ + if (Numeric->Rs == NULL) + { + Numeric->Rs = KLU_malloc (n, sizeof (double), Common) ; + if (Common->status < KLU_OK) + { + Common->status = KLU_OUT_OF_MEMORY ; + return (FALSE) ; + } + } + } + else + { + /* no scaling for refactorization; ensure Numeric->Rs is freed. This + * does nothing if Numeric->Rs is already NULL. */ + Numeric->Rs = KLU_free (Numeric->Rs, n, sizeof (double), Common) ; + } + Rs = Numeric->Rs ; + + Pinv = Numeric->Pinv ; + X = (Entry *) Numeric->Xwork ; + Common->nrealloc = 0 ; + Udiag = Numeric->Udiag ; + nzoff = Symbolic->nzoff ; + + /* ---------------------------------------------------------------------- */ + /* check the input matrix compute the row scale factors, Rs */ + /* ---------------------------------------------------------------------- */ + + /* do no scale, or check the input matrix, if scale < 0 */ + if (scale >= 0) + { + /* check for out-of-range indices, but do not check for duplicates */ + if (!KLU_scale (scale, n, Ap, Ai, Ax, Rs, NULL, Common)) + { + return (FALSE) ; + } + } + + /* ---------------------------------------------------------------------- */ + /* clear workspace X */ + /* ---------------------------------------------------------------------- */ + + for (k = 0 ; k < maxblock ; k++) + { + /* X [k] = 0 */ + CLEAR (X [k]) ; + } + + poff = 0 ; + + /* ---------------------------------------------------------------------- */ + /* factor each block */ + /* ---------------------------------------------------------------------- */ + + if (scale <= 0) + { + + /* ------------------------------------------------------------------ */ + /* no scaling */ + /* ------------------------------------------------------------------ */ + + for (block = 0 ; block < nblocks ; block++) + { + + /* -------------------------------------------------------------- */ + /* the block is from rows/columns k1 to k2-1 */ + /* -------------------------------------------------------------- */ + + k1 = R [block] ; + k2 = R [block+1] ; + nk = k2 - k1 ; + + if (nk == 1) + { + + /* ---------------------------------------------------------- */ + /* singleton case */ + /* ---------------------------------------------------------- */ + + oldcol = Q [k1] ; + pend = Ap [oldcol+1] ; + CLEAR (s) ; + for (p = Ap [oldcol] ; p < pend ; p++) + { + newrow = Pinv [Ai [p]] - k1 ; + if (newrow < 0 && poff < nzoff) + { + /* entry in off-diagonal block */ + Offx [poff] = Az [p] ; + poff++ ; + } + else + { + /* singleton */ + s = Az [p] ; + } + } + Udiag [k1] = s ; + + } + else + { + + /* ---------------------------------------------------------- */ + /* construct and factor the kth block */ + /* ---------------------------------------------------------- */ + + Lip = Numeric->Lip + k1 ; + Llen = Numeric->Llen + k1 ; + Uip = Numeric->Uip + k1 ; + Ulen = Numeric->Ulen + k1 ; + LU = LUbx [block] ; + + for (k = 0 ; k < nk ; k++) + { + + /* ------------------------------------------------------ */ + /* scatter kth column of the block into workspace X */ + /* ------------------------------------------------------ */ + + oldcol = Q [k+k1] ; + pend = Ap [oldcol+1] ; + for (p = Ap [oldcol] ; p < pend ; p++) + { + newrow = Pinv [Ai [p]] - k1 ; + if (newrow < 0 && poff < nzoff) + { + /* entry in off-diagonal block */ + Offx [poff] = Az [p] ; + poff++ ; + } + else + { + /* (newrow,k) is an entry in the block */ + X [newrow] = Az [p] ; + } + } + + /* ------------------------------------------------------ */ + /* compute kth column of U, and update kth column of A */ + /* ------------------------------------------------------ */ + + GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, ulen) ; + for (up = 0 ; up < ulen ; up++) + { + j = Ui [up] ; + ujk = X [j] ; + /* X [j] = 0 */ + CLEAR (X [j]) ; + Ux [up] = ujk ; + GET_POINTER (LU, Lip, Llen, Li, Lx, j, llen) ; + for (p = 0 ; p < llen ; p++) + { + /* X [Li [p]] -= Lx [p] * ujk */ + MULT_SUB (X [Li [p]], Lx [p], ujk) ; + } + } + /* get the diagonal entry of U */ + ukk = X [k] ; + /* X [k] = 0 */ + CLEAR (X [k]) ; + /* singular case */ + if (IS_ZERO (ukk)) + { + /* matrix is numerically singular */ + Common->status = KLU_SINGULAR ; + if (Common->numerical_rank == EMPTY) + { + Common->numerical_rank = k+k1 ; + Common->singular_col = Q [k+k1] ; + } + if (Common->halt_if_singular) + { + /* do not continue the factorization */ + return (FALSE) ; + } + } + Udiag [k+k1] = ukk ; + /* gather and divide by pivot to get kth column of L */ + GET_POINTER (LU, Lip, Llen, Li, Lx, k, llen) ; + for (p = 0 ; p < llen ; p++) + { + i = Li [p] ; + DIV (Lx [p], X [i], ukk) ; + CLEAR (X [i]) ; + } + + } + } + } + + } + else + { + + /* ------------------------------------------------------------------ */ + /* scaling */ + /* ------------------------------------------------------------------ */ + + for (block = 0 ; block < nblocks ; block++) + { + + /* -------------------------------------------------------------- */ + /* the block is from rows/columns k1 to k2-1 */ + /* -------------------------------------------------------------- */ + + k1 = R [block] ; + k2 = R [block+1] ; + nk = k2 - k1 ; + + if (nk == 1) + { + + /* ---------------------------------------------------------- */ + /* singleton case */ + /* ---------------------------------------------------------- */ + + oldcol = Q [k1] ; + pend = Ap [oldcol+1] ; + CLEAR (s) ; + for (p = Ap [oldcol] ; p < pend ; p++) + { + oldrow = Ai [p] ; + newrow = Pinv [oldrow] - k1 ; + if (newrow < 0 && poff < nzoff) + { + /* entry in off-diagonal block */ + /* Offx [poff] = Az [p] / Rs [oldrow] */ + SCALE_DIV_ASSIGN (Offx [poff], Az [p], Rs [oldrow]) ; + poff++ ; + } + else + { + /* singleton */ + /* s = Az [p] / Rs [oldrow] */ + SCALE_DIV_ASSIGN (s, Az [p], Rs [oldrow]) ; + } + } + Udiag [k1] = s ; + + } + else + { + + /* ---------------------------------------------------------- */ + /* construct and factor the kth block */ + /* ---------------------------------------------------------- */ + + Lip = Numeric->Lip + k1 ; + Llen = Numeric->Llen + k1 ; + Uip = Numeric->Uip + k1 ; + Ulen = Numeric->Ulen + k1 ; + LU = LUbx [block] ; + + for (k = 0 ; k < nk ; k++) + { + + /* ------------------------------------------------------ */ + /* scatter kth column of the block into workspace X */ + /* ------------------------------------------------------ */ + + oldcol = Q [k+k1] ; + pend = Ap [oldcol+1] ; + for (p = Ap [oldcol] ; p < pend ; p++) + { + oldrow = Ai [p] ; + newrow = Pinv [oldrow] - k1 ; + if (newrow < 0 && poff < nzoff) + { + /* entry in off-diagonal part */ + /* Offx [poff] = Az [p] / Rs [oldrow] */ + SCALE_DIV_ASSIGN (Offx [poff], Az [p], Rs [oldrow]); + poff++ ; + } + else + { + /* (newrow,k) is an entry in the block */ + /* X [newrow] = Az [p] / Rs [oldrow] */ + SCALE_DIV_ASSIGN (X [newrow], Az [p], Rs [oldrow]) ; + } + } + + /* ------------------------------------------------------ */ + /* compute kth column of U, and update kth column of A */ + /* ------------------------------------------------------ */ + + GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, ulen) ; + for (up = 0 ; up < ulen ; up++) + { + j = Ui [up] ; + ujk = X [j] ; + /* X [j] = 0 */ + CLEAR (X [j]) ; + Ux [up] = ujk ; + GET_POINTER (LU, Lip, Llen, Li, Lx, j, llen) ; + for (p = 0 ; p < llen ; p++) + { + /* X [Li [p]] -= Lx [p] * ujk */ + MULT_SUB (X [Li [p]], Lx [p], ujk) ; + } + } + /* get the diagonal entry of U */ + ukk = X [k] ; + /* X [k] = 0 */ + CLEAR (X [k]) ; + /* singular case */ + if (IS_ZERO (ukk)) + { + /* matrix is numerically singular */ + Common->status = KLU_SINGULAR ; + if (Common->numerical_rank == EMPTY) + { + Common->numerical_rank = k+k1 ; + Common->singular_col = Q [k+k1] ; + } + if (Common->halt_if_singular) + { + /* do not continue the factorization */ + return (FALSE) ; + } + } + Udiag [k+k1] = ukk ; + /* gather and divide by pivot to get kth column of L */ + GET_POINTER (LU, Lip, Llen, Li, Lx, k, llen) ; + for (p = 0 ; p < llen ; p++) + { + i = Li [p] ; + DIV (Lx [p], X [i], ukk) ; + CLEAR (X [i]) ; + } + } + } + } + } + + /* ---------------------------------------------------------------------- */ + /* permute scale factors Rs according to pivotal row order */ + /* ---------------------------------------------------------------------- */ + + if (scale > 0) + { + for (k = 0 ; k < n ; k++) + { + REAL (X [k]) = Rs [Pnum [k]] ; + } + for (k = 0 ; k < n ; k++) + { + Rs [k] = REAL (X [k]) ; + } + } + +#ifndef NDEBUG + ASSERT (Offp [n] == poff) ; + ASSERT (Symbolic->nzoff == poff) ; + PRINTF (("\n------------------- Off diagonal entries, new:\n")) ; + ASSERT (KLU_valid (n, Offp, Offi, Offx)) ; + if (Common->status == KLU_OK) + { + PRINTF (("\n ########### KLU_BTF_REFACTOR done, nblocks %d\n",nblocks)); + for (block = 0 ; block < nblocks ; block++) + { + k1 = R [block] ; + k2 = R [block+1] ; + nk = k2 - k1 ; + PRINTF (( + "\n================KLU_refactor output: k1 %d k2 %d nk %d\n", + k1, k2, nk)) ; + if (nk == 1) + { + PRINTF (("singleton ")) ; + PRINT_ENTRY (Udiag [k1]) ; + } + else + { + Lip = Numeric->Lip + k1 ; + Llen = Numeric->Llen + k1 ; + LU = (Unit *) Numeric->LUbx [block] ; + PRINTF (("\n---- L block %d\n", block)) ; + ASSERT (KLU_valid_LU (nk, TRUE, Lip, Llen, LU)) ; + Uip = Numeric->Uip + k1 ; + Ulen = Numeric->Ulen + k1 ; + PRINTF (("\n---- U block %d\n", block)) ; + ASSERT (KLU_valid_LU (nk, FALSE, Uip, Ulen, LU)) ; + } + } + } +#endif + + return (TRUE) ; +} diff --git a/src/maths/KLU/klu_scale.c b/src/maths/KLU/klu_scale.c new file mode 100644 index 000000000..479612006 --- /dev/null +++ b/src/maths/KLU/klu_scale.c @@ -0,0 +1,159 @@ +/* ========================================================================== */ +/* === KLU_scale ============================================================ */ +/* ========================================================================== */ + +/* Scale a matrix and check to see if it is valid. Can be called by the user. + * This is called by KLU_factor and KLU_refactor. Returns TRUE if the input + * matrix is valid, FALSE otherwise. If the W input argument is non-NULL, + * then the input matrix is checked for duplicate entries. + * + * scaling methods: + * <0: no scaling, do not compute Rs, and do not check input matrix. + * 0: no scaling + * 1: the scale factor for row i is sum (abs (A (i,:))) + * 2 or more: the scale factor for row i is max (abs (A (i,:))) + */ + +#include "klu_internal.h" + +Int KLU_scale /* return TRUE if successful, FALSE otherwise */ +( + /* inputs, not modified */ + Int scale, /* 0: none, 1: sum, 2: max */ + Int n, + Int Ap [ ], /* size n+1, column pointers */ + Int Ai [ ], /* size nz, row indices */ + double Ax [ ], + /* outputs, not defined on input */ + double Rs [ ], /* size n, can be NULL if scale <= 0 */ + /* workspace, not defined on input or output */ + Int W [ ], /* size n, can be NULL */ + /* --------------- */ + KLU_common *Common +) +{ + double a ; + Entry *Az ; + Int row, col, p, pend, check_duplicates ; + + /* ---------------------------------------------------------------------- */ + /* check inputs */ + /* ---------------------------------------------------------------------- */ + + if (Common == NULL) + { + return (FALSE) ; + } + Common->status = KLU_OK ; + + if (scale < 0) + { + /* return without checking anything and without computing the + * scale factors */ + return (TRUE) ; + } + + Az = (Entry *) Ax ; + + if (n <= 0 || Ap == NULL || Ai == NULL || Az == NULL || + (scale > 0 && Rs == NULL)) + { + /* Ap, Ai, Ax and Rs must be present, and n must be > 0 */ + Common->status = KLU_INVALID ; + return (FALSE) ; + } + if (Ap [0] != 0 || Ap [n] < 0) + { + /* nz = Ap [n] must be >= 0 and Ap [0] must equal zero */ + Common->status = KLU_INVALID ; + return (FALSE) ; + } + for (col = 0 ; col < n ; col++) + { + if (Ap [col] > Ap [col+1]) + { + /* column pointers must be non-decreasing */ + Common->status = KLU_INVALID ; + return (FALSE) ; + } + } + + /* ---------------------------------------------------------------------- */ + /* scale */ + /* ---------------------------------------------------------------------- */ + + if (scale > 0) + { + /* initialize row sum or row max */ + for (row = 0 ; row < n ; row++) + { + Rs [row] = 0 ; + } + } + + /* check for duplicates only if W is present */ + check_duplicates = (W != (Int *) NULL) ; + if (check_duplicates) + { + for (row = 0 ; row < n ; row++) + { + W [row] = EMPTY ; + } + } + + for (col = 0 ; col < n ; col++) + { + pend = Ap [col+1] ; + for (p = Ap [col] ; p < pend ; p++) + { + row = Ai [p] ; + if (row < 0 || row >= n) + { + /* row index out of range, or duplicate entry */ + Common->status = KLU_INVALID ; + return (FALSE) ; + } + if (check_duplicates) + { + if (W [row] == col) + { + /* duplicate entry */ + Common->status = KLU_INVALID ; + return (FALSE) ; + } + /* flag row i as appearing in column col */ + W [row] = col ; + } + /* a = ABS (Az [p]) ;*/ + ABS (a, Az [p]) ; + if (scale == 1) + { + /* accumulate the abs. row sum */ + Rs [row] += a ; + } + else if (scale > 1) + { + /* find the max abs. value in the row */ + Rs [row] = MAX (Rs [row], a) ; + } + } + } + + if (scale > 0) + { + /* do not scale empty rows */ + for (row = 0 ; row < n ; row++) + { + /* matrix is singular */ + PRINTF (("Rs [%d] = %g\n", row, Rs [row])) ; + + if (Rs [row] == 0.0) + { + PRINTF (("Row %d of A is all zero\n", row)) ; + Rs [row] = 1.0 ; + } + } + } + + return (TRUE) ; +} diff --git a/src/maths/KLU/klu_solve.c b/src/maths/KLU/klu_solve.c new file mode 100644 index 000000000..d23a14095 --- /dev/null +++ b/src/maths/KLU/klu_solve.c @@ -0,0 +1,396 @@ +/* ========================================================================== */ +/* === KLU_solve ============================================================ */ +/* ========================================================================== */ + +/* Solve Ax=b using the symbolic and numeric objects from KLU_analyze + * (or KLU_analyze_given) and KLU_factor. Note that no iterative refinement is + * performed. Uses Numeric->Xwork as workspace (undefined on input and output), + * of size 4n Entry's (note that columns 2 to 4 of Xwork overlap with + * Numeric->Iwork). + */ + +#include "klu_internal.h" + +Int KLU_solve +( + /* inputs, not modified */ + KLU_symbolic *Symbolic, + KLU_numeric *Numeric, + Int d, /* leading dimension of B */ + Int nrhs, /* number of right-hand-sides */ + + /* right-hand-side on input, overwritten with solution to Ax=b on output */ + double B [ ], /* size n*nrhs, in column-oriented form, with + * leading dimension d. */ + /* --------------- */ + KLU_common *Common +) +{ + Entry x [4], offik, s ; + double rs, *Rs ; + Entry *Offx, *X, *Bz, *Udiag ; + Int *Q, *R, *Pnum, *Offp, *Offi, *Lip, *Uip, *Llen, *Ulen ; + Unit **LUbx ; + Int k1, k2, nk, k, block, pend, n, p, nblocks, chunk, nr, i ; + + /* ---------------------------------------------------------------------- */ + /* check inputs */ + /* ---------------------------------------------------------------------- */ + + if (Common == NULL) + { + return (FALSE) ; + } + if (Numeric == NULL || Symbolic == NULL || d < Symbolic->n || nrhs < 0 || + B == NULL) + { + Common->status = KLU_INVALID ; + return (FALSE) ; + } + Common->status = KLU_OK ; + + /* ---------------------------------------------------------------------- */ + /* get the contents of the Symbolic object */ + /* ---------------------------------------------------------------------- */ + + Bz = (Entry *) B ; + n = Symbolic->n ; + nblocks = Symbolic->nblocks ; + Q = Symbolic->Q ; + R = Symbolic->R ; + + /* ---------------------------------------------------------------------- */ + /* get the contents of the Numeric object */ + /* ---------------------------------------------------------------------- */ + + ASSERT (nblocks == Numeric->nblocks) ; + Pnum = Numeric->Pnum ; + Offp = Numeric->Offp ; + Offi = Numeric->Offi ; + Offx = (Entry *) Numeric->Offx ; + + Lip = Numeric->Lip ; + Llen = Numeric->Llen ; + Uip = Numeric->Uip ; + Ulen = Numeric->Ulen ; + LUbx = (Unit **) Numeric->LUbx ; + Udiag = Numeric->Udiag ; + + Rs = Numeric->Rs ; + X = (Entry *) Numeric->Xwork ; + + ASSERT (KLU_valid (n, Offp, Offi, Offx)) ; + + /* ---------------------------------------------------------------------- */ + /* solve in chunks of 4 columns at a time */ + /* ---------------------------------------------------------------------- */ + + for (chunk = 0 ; chunk < nrhs ; chunk += 4) + { + + /* ------------------------------------------------------------------ */ + /* get the size of the current chunk */ + /* ------------------------------------------------------------------ */ + + nr = MIN (nrhs - chunk, 4) ; + + /* ------------------------------------------------------------------ */ + /* scale and permute the right hand side, X = P*(R\B) */ + /* ------------------------------------------------------------------ */ + + if (Rs == NULL) + { + + /* no scaling */ + switch (nr) + { + + case 1: + + for (k = 0 ; k < n ; k++) + { + X [k] = Bz [Pnum [k]] ; + } + break ; + + case 2: + + for (k = 0 ; k < n ; k++) + { + i = Pnum [k] ; + X [2*k ] = Bz [i ] ; + X [2*k + 1] = Bz [i + d ] ; + } + break ; + + case 3: + + for (k = 0 ; k < n ; k++) + { + i = Pnum [k] ; + X [3*k ] = Bz [i ] ; + X [3*k + 1] = Bz [i + d ] ; + X [3*k + 2] = Bz [i + d*2] ; + } + break ; + + case 4: + + for (k = 0 ; k < n ; k++) + { + i = Pnum [k] ; + X [4*k ] = Bz [i ] ; + X [4*k + 1] = Bz [i + d ] ; + X [4*k + 2] = Bz [i + d*2] ; + X [4*k + 3] = Bz [i + d*3] ; + } + break ; + } + + } + else + { + + switch (nr) + { + + case 1: + + for (k = 0 ; k < n ; k++) + { + SCALE_DIV_ASSIGN (X [k], Bz [Pnum [k]], Rs [k]) ; + } + break ; + + case 2: + + for (k = 0 ; k < n ; k++) + { + i = Pnum [k] ; + rs = Rs [k] ; + SCALE_DIV_ASSIGN (X [2*k], Bz [i], rs) ; + SCALE_DIV_ASSIGN (X [2*k + 1], Bz [i + d], rs) ; + } + break ; + + case 3: + + for (k = 0 ; k < n ; k++) + { + i = Pnum [k] ; + rs = Rs [k] ; + SCALE_DIV_ASSIGN (X [3*k], Bz [i], rs) ; + SCALE_DIV_ASSIGN (X [3*k + 1], Bz [i + d], rs) ; + SCALE_DIV_ASSIGN (X [3*k + 2], Bz [i + d*2], rs) ; + } + break ; + + case 4: + + for (k = 0 ; k < n ; k++) + { + i = Pnum [k] ; + rs = Rs [k] ; + SCALE_DIV_ASSIGN (X [4*k], Bz [i], rs) ; + SCALE_DIV_ASSIGN (X [4*k + 1], Bz [i + d], rs) ; + SCALE_DIV_ASSIGN (X [4*k + 2], Bz [i + d*2], rs) ; + SCALE_DIV_ASSIGN (X [4*k + 3], Bz [i + d*3], rs) ; + } + break ; + } + } + + /* ------------------------------------------------------------------ */ + /* solve X = (L*U + Off)\X */ + /* ------------------------------------------------------------------ */ + + for (block = nblocks-1 ; block >= 0 ; block--) + { + + /* -------------------------------------------------------------- */ + /* the block of size nk is from rows/columns k1 to k2-1 */ + /* -------------------------------------------------------------- */ + + k1 = R [block] ; + k2 = R [block+1] ; + nk = k2 - k1 ; + PRINTF (("solve %d, k1 %d k2-1 %d nk %d\n", block, k1,k2-1,nk)) ; + + /* solve the block system */ + if (nk == 1) + { + s = Udiag [k1] ; + switch (nr) + { + + case 1: + DIV (X [k1], X [k1], s) ; + break ; + + case 2: + DIV (X [2*k1], X [2*k1], s) ; + DIV (X [2*k1 + 1], X [2*k1 + 1], s) ; + break ; + + case 3: + DIV (X [3*k1], X [3*k1], s) ; + DIV (X [3*k1 + 1], X [3*k1 + 1], s) ; + DIV (X [3*k1 + 2], X [3*k1 + 2], s) ; + break ; + + case 4: + DIV (X [4*k1], X [4*k1], s) ; + DIV (X [4*k1 + 1], X [4*k1 + 1], s) ; + DIV (X [4*k1 + 2], X [4*k1 + 2], s) ; + DIV (X [4*k1 + 3], X [4*k1 + 3], s) ; + break ; + + } + } + else + { + KLU_lsolve (nk, Lip + k1, Llen + k1, LUbx [block], nr, + X + nr*k1) ; + KLU_usolve (nk, Uip + k1, Ulen + k1, LUbx [block], + Udiag + k1, nr, X + nr*k1) ; + } + + /* -------------------------------------------------------------- */ + /* block back-substitution for the off-diagonal-block entries */ + /* -------------------------------------------------------------- */ + + if (block > 0) + { + switch (nr) + { + + case 1: + + for (k = k1 ; k < k2 ; k++) + { + pend = Offp [k+1] ; + x [0] = X [k] ; + for (p = Offp [k] ; p < pend ; p++) + { + MULT_SUB (X [Offi [p]], Offx [p], x [0]) ; + } + } + break ; + + case 2: + + for (k = k1 ; k < k2 ; k++) + { + pend = Offp [k+1] ; + x [0] = X [2*k ] ; + x [1] = X [2*k + 1] ; + for (p = Offp [k] ; p < pend ; p++) + { + i = Offi [p] ; + offik = Offx [p] ; + MULT_SUB (X [2*i], offik, x [0]) ; + MULT_SUB (X [2*i + 1], offik, x [1]) ; + } + } + break ; + + case 3: + + for (k = k1 ; k < k2 ; k++) + { + pend = Offp [k+1] ; + x [0] = X [3*k ] ; + x [1] = X [3*k + 1] ; + x [2] = X [3*k + 2] ; + for (p = Offp [k] ; p < pend ; p++) + { + i = Offi [p] ; + offik = Offx [p] ; + MULT_SUB (X [3*i], offik, x [0]) ; + MULT_SUB (X [3*i + 1], offik, x [1]) ; + MULT_SUB (X [3*i + 2], offik, x [2]) ; + } + } + break ; + + case 4: + + for (k = k1 ; k < k2 ; k++) + { + pend = Offp [k+1] ; + x [0] = X [4*k ] ; + x [1] = X [4*k + 1] ; + x [2] = X [4*k + 2] ; + x [3] = X [4*k + 3] ; + for (p = Offp [k] ; p < pend ; p++) + { + i = Offi [p] ; + offik = Offx [p] ; + MULT_SUB (X [4*i], offik, x [0]) ; + MULT_SUB (X [4*i + 1], offik, x [1]) ; + MULT_SUB (X [4*i + 2], offik, x [2]) ; + MULT_SUB (X [4*i + 3], offik, x [3]) ; + } + } + break ; + } + } + } + + /* ------------------------------------------------------------------ */ + /* permute the result, Bz = Q*X */ + /* ------------------------------------------------------------------ */ + + switch (nr) + { + + case 1: + + for (k = 0 ; k < n ; k++) + { + Bz [Q [k]] = X [k] ; + } + break ; + + case 2: + + for (k = 0 ; k < n ; k++) + { + i = Q [k] ; + Bz [i ] = X [2*k ] ; + Bz [i + d ] = X [2*k + 1] ; + } + break ; + + case 3: + + for (k = 0 ; k < n ; k++) + { + i = Q [k] ; + Bz [i ] = X [3*k ] ; + Bz [i + d ] = X [3*k + 1] ; + Bz [i + d*2] = X [3*k + 2] ; + } + break ; + + case 4: + + for (k = 0 ; k < n ; k++) + { + i = Q [k] ; + Bz [i ] = X [4*k ] ; + Bz [i + d ] = X [4*k + 1] ; + Bz [i + d*2] = X [4*k + 2] ; + Bz [i + d*3] = X [4*k + 3] ; + } + break ; + } + + /* ------------------------------------------------------------------ */ + /* go to the next chunk of B */ + /* ------------------------------------------------------------------ */ + + Bz += d*4 ; + } + return (TRUE) ; +} diff --git a/src/maths/KLU/klu_sort.c b/src/maths/KLU/klu_sort.c new file mode 100644 index 000000000..a3ce98f46 --- /dev/null +++ b/src/maths/KLU/klu_sort.c @@ -0,0 +1,156 @@ +/* ========================================================================== */ +/* === KLU_sort ============================================================= */ +/* ========================================================================== */ + +/* sorts the columns of L and U so that the row indices appear in strictly + * increasing order. + */ + +#include "klu_internal.h" + +/* ========================================================================== */ +/* === sort ================================================================= */ +/* ========================================================================== */ + +/* Sort L or U using a double-transpose */ + +static void sort (Int n, Int *Xip, Int *Xlen, Unit *LU, Int *Tp, Int *Tj, + Entry *Tx, Int *W) +{ + Int *Xi ; + Entry *Xx ; + Int p, i, j, len, nz, tp, xlen, pend ; + + ASSERT (KLU_valid_LU (n, FALSE, Xip, Xlen, LU)) ; + + /* count the number of entries in each row of L or U */ + for (i = 0 ; i < n ; i++) + { + W [i] = 0 ; + } + for (j = 0 ; j < n ; j++) + { + GET_POINTER (LU, Xip, Xlen, Xi, Xx, j, len) ; + for (p = 0 ; p < len ; p++) + { + W [Xi [p]]++ ; + } + } + + /* construct the row pointers for T */ + nz = 0 ; + for (i = 0 ; i < n ; i++) + { + Tp [i] = nz ; + nz += W [i] ; + } + Tp [n] = nz ; + for (i = 0 ; i < n ; i++) + { + W [i] = Tp [i] ; + } + + /* transpose the matrix into Tp, Ti, Tx */ + for (j = 0 ; j < n ; j++) + { + GET_POINTER (LU, Xip, Xlen, Xi, Xx, j, len) ; + for (p = 0 ; p < len ; p++) + { + tp = W [Xi [p]]++ ; + Tj [tp] = j ; + Tx [tp] = Xx [p] ; + } + } + + /* transpose the matrix back into Xip, Xlen, Xi, Xx */ + for (j = 0 ; j < n ; j++) + { + W [j] = 0 ; + } + for (i = 0 ; i < n ; i++) + { + pend = Tp [i+1] ; + for (p = Tp [i] ; p < pend ; p++) + { + j = Tj [p] ; + GET_POINTER (LU, Xip, Xlen, Xi, Xx, j, len) ; + xlen = W [j]++ ; + Xi [xlen] = i ; + Xx [xlen] = Tx [p] ; + } + } + + ASSERT (KLU_valid_LU (n, FALSE, Xip, Xlen, LU)) ; +} + + +/* ========================================================================== */ +/* === KLU_sort ============================================================= */ +/* ========================================================================== */ + +Int KLU_sort +( + KLU_symbolic *Symbolic, + KLU_numeric *Numeric, + KLU_common *Common +) +{ + Int *R, *W, *Tp, *Ti, *Lip, *Uip, *Llen, *Ulen ; + Entry *Tx ; + Unit **LUbx ; + Int n, nk, nz, block, nblocks, maxblock, k1 ; + size_t m1 ; + + if (Common == NULL) + { + return (FALSE) ; + } + Common->status = KLU_OK ; + + n = Symbolic->n ; + R = Symbolic->R ; + nblocks = Symbolic->nblocks ; + maxblock = Symbolic->maxblock ; + + Lip = Numeric->Lip ; + Llen = Numeric->Llen ; + Uip = Numeric->Uip ; + Ulen = Numeric->Ulen ; + LUbx = (Unit **) Numeric->LUbx ; + + m1 = ((size_t) maxblock) + 1 ; + + /* allocate workspace */ + nz = MAX (Numeric->max_lnz_block, Numeric->max_unz_block) ; + W = KLU_malloc (maxblock, sizeof (Int), Common) ; + Tp = KLU_malloc (m1, sizeof (Int), Common) ; + Ti = KLU_malloc (nz, sizeof (Int), Common) ; + Tx = KLU_malloc (nz, sizeof (Entry), Common) ; + + PRINTF (("\n======================= Start sort:\n")) ; + + if (Common->status == KLU_OK) + { + /* sort each block of L and U */ + for (block = 0 ; block < nblocks ; block++) + { + k1 = R [block] ; + nk = R [block+1] - k1 ; + if (nk > 1) + { + PRINTF (("\n-------------------block: %d nk %d\n", block, nk)) ; + sort (nk, Lip + k1, Llen + k1, LUbx [block], Tp, Ti, Tx, W) ; + sort (nk, Uip + k1, Ulen + k1, LUbx [block], Tp, Ti, Tx, W) ; + } + } + } + + PRINTF (("\n======================= sort done.\n")) ; + + /* free workspace */ + KLU_free (W, maxblock, sizeof (Int), Common) ; + KLU_free (Tp, m1, sizeof (Int), Common) ; + KLU_free (Ti, nz, sizeof (Int), Common) ; + KLU_free (Tx, nz, sizeof (Entry), Common) ; + return (Common->status == KLU_OK) ; +} diff --git a/src/maths/KLU/klu_tsolve.c b/src/maths/KLU/klu_tsolve.c new file mode 100644 index 000000000..c1f10f708 --- /dev/null +++ b/src/maths/KLU/klu_tsolve.c @@ -0,0 +1,465 @@ +/* ========================================================================== */ +/* === KLU_tsolve =========================================================== */ +/* ========================================================================== */ + +/* Solve A'x=b using the symbolic and numeric objects from KLU_analyze + * (or KLU_analyze_given) and KLU_factor. Note that no iterative refinement is + * performed. Uses Numeric->Xwork as workspace (undefined on input and output), + * of size 4n Entry's (note that columns 2 to 4 of Xwork overlap with + * Numeric->Iwork). + */ + +#include "klu_internal.h" + +Int KLU_tsolve +( + /* inputs, not modified */ + KLU_symbolic *Symbolic, + KLU_numeric *Numeric, + Int d, /* leading dimension of B */ + Int nrhs, /* number of right-hand-sides */ + + /* right-hand-side on input, overwritten with solution to Ax=b on output */ + double B [ ], /* size n*nrhs, in column-oriented form, with + * leading dimension d. */ +#ifdef COMPLEX + Int conj_solve, /* TRUE for conjugate transpose solve, FALSE for + * array transpose solve. Used for the complex + * case only. */ +#endif + /* --------------- */ + KLU_common *Common +) +{ + Entry x [4], offik, s ; + double rs, *Rs ; + Entry *Offx, *X, *Bz, *Udiag ; + Int *Q, *R, *Pnum, *Offp, *Offi, *Lip, *Uip, *Llen, *Ulen ; + Unit **LUbx ; + Int k1, k2, nk, k, block, pend, n, p, nblocks, chunk, nr, i ; + + /* ---------------------------------------------------------------------- */ + /* check inputs */ + /* ---------------------------------------------------------------------- */ + + if (Common == NULL) + { + return (FALSE) ; + } + if (Numeric == NULL || Symbolic == NULL || d < Symbolic->n || nrhs < 0 || + B == NULL) + { + Common->status = KLU_INVALID ; + return (FALSE) ; + } + Common->status = KLU_OK ; + + /* ---------------------------------------------------------------------- */ + /* get the contents of the Symbolic object */ + /* ---------------------------------------------------------------------- */ + + Bz = (Entry *) B ; + n = Symbolic->n ; + nblocks = Symbolic->nblocks ; + Q = Symbolic->Q ; + R = Symbolic->R ; + + /* ---------------------------------------------------------------------- */ + /* get the contents of the Numeric object */ + /* ---------------------------------------------------------------------- */ + + ASSERT (nblocks == Numeric->nblocks) ; + Pnum = Numeric->Pnum ; + Offp = Numeric->Offp ; + Offi = Numeric->Offi ; + Offx = (Entry *) Numeric->Offx ; + + Lip = Numeric->Lip ; + Llen = Numeric->Llen ; + Uip = Numeric->Uip ; + Ulen = Numeric->Ulen ; + LUbx = (Unit **) Numeric->LUbx ; + Udiag = Numeric->Udiag ; + + Rs = Numeric->Rs ; + X = (Entry *) Numeric->Xwork ; + ASSERT (KLU_valid (n, Offp, Offi, Offx)) ; + + /* ---------------------------------------------------------------------- */ + /* solve in chunks of 4 columns at a time */ + /* ---------------------------------------------------------------------- */ + + for (chunk = 0 ; chunk < nrhs ; chunk += 4) + { + + /* ------------------------------------------------------------------ */ + /* get the size of the current chunk */ + /* ------------------------------------------------------------------ */ + + nr = MIN (nrhs - chunk, 4) ; + + /* ------------------------------------------------------------------ */ + /* permute the right hand side, X = Q'*B */ + /* ------------------------------------------------------------------ */ + + switch (nr) + { + + case 1: + + for (k = 0 ; k < n ; k++) + { + X [k] = Bz [Q [k]] ; + } + break ; + + case 2: + + for (k = 0 ; k < n ; k++) + { + i = Q [k] ; + X [2*k ] = Bz [i ] ; + X [2*k + 1] = Bz [i + d ] ; + } + break ; + + case 3: + + for (k = 0 ; k < n ; k++) + { + i = Q [k] ; + X [3*k ] = Bz [i ] ; + X [3*k + 1] = Bz [i + d ] ; + X [3*k + 2] = Bz [i + d*2] ; + } + break ; + + case 4: + + for (k = 0 ; k < n ; k++) + { + i = Q [k] ; + X [4*k ] = Bz [i ] ; + X [4*k + 1] = Bz [i + d ] ; + X [4*k + 2] = Bz [i + d*2] ; + X [4*k + 3] = Bz [i + d*3] ; + } + break ; + + } + + /* ------------------------------------------------------------------ */ + /* solve X = (L*U + Off)'\X */ + /* ------------------------------------------------------------------ */ + + for (block = 0 ; block < nblocks ; block++) + { + + /* -------------------------------------------------------------- */ + /* the block of size nk is from rows/columns k1 to k2-1 */ + /* -------------------------------------------------------------- */ + + k1 = R [block] ; + k2 = R [block+1] ; + nk = k2 - k1 ; + PRINTF (("tsolve %d, k1 %d k2-1 %d nk %d\n", block, k1,k2-1,nk)) ; + + /* -------------------------------------------------------------- */ + /* block back-substitution for the off-diagonal-block entries */ + /* -------------------------------------------------------------- */ + + if (block > 0) + { + switch (nr) + { + + case 1: + + for (k = k1 ; k < k2 ; k++) + { + pend = Offp [k+1] ; + for (p = Offp [k] ; p < pend ; p++) + { +#ifdef COMPLEX + if (conj_solve) + { + MULT_SUB_CONJ (X [k], X [Offi [p]], + Offx [p]) ; + } + else +#endif + { + MULT_SUB (X [k], Offx [p], X [Offi [p]]) ; + } + } + } + break ; + + case 2: + + for (k = k1 ; k < k2 ; k++) + { + pend = Offp [k+1] ; + x [0] = X [2*k ] ; + x [1] = X [2*k + 1] ; + for (p = Offp [k] ; p < pend ; p++) + { + i = Offi [p] ; +#ifdef COMPLEX + if (conj_solve) + { + CONJ (offik, Offx [p]) ; + } + else +#endif + { + offik = Offx [p] ; + } + MULT_SUB (x [0], offik, X [2*i]) ; + MULT_SUB (x [1], offik, X [2*i + 1]) ; + } + X [2*k ] = x [0] ; + X [2*k + 1] = x [1] ; + } + break ; + + case 3: + + for (k = k1 ; k < k2 ; k++) + { + pend = Offp [k+1] ; + x [0] = X [3*k ] ; + x [1] = X [3*k + 1] ; + x [2] = X [3*k + 2] ; + for (p = Offp [k] ; p < pend ; p++) + { + i = Offi [p] ; +#ifdef COMPLEX + if (conj_solve) + { + CONJ (offik, Offx [p]) ; + } + else +#endif + { + offik = Offx [p] ; + } + MULT_SUB (x [0], offik, X [3*i]) ; + MULT_SUB (x [1], offik, X [3*i + 1]) ; + MULT_SUB (x [2], offik, X [3*i + 2]) ; + } + X [3*k ] = x [0] ; + X [3*k + 1] = x [1] ; + X [3*k + 2] = x [2] ; + } + break ; + + case 4: + + for (k = k1 ; k < k2 ; k++) + { + pend = Offp [k+1] ; + x [0] = X [4*k ] ; + x [1] = X [4*k + 1] ; + x [2] = X [4*k + 2] ; + x [3] = X [4*k + 3] ; + for (p = Offp [k] ; p < pend ; p++) + { + i = Offi [p] ; +#ifdef COMPLEX + if (conj_solve) + { + CONJ(offik, Offx [p]) ; + } + else +#endif + { + offik = Offx [p] ; + } + MULT_SUB (x [0], offik, X [4*i]) ; + MULT_SUB (x [1], offik, X [4*i + 1]) ; + MULT_SUB (x [2], offik, X [4*i + 2]) ; + MULT_SUB (x [3], offik, X [4*i + 3]) ; + } + X [4*k ] = x [0] ; + X [4*k + 1] = x [1] ; + X [4*k + 2] = x [2] ; + X [4*k + 3] = x [3] ; + } + break ; + } + } + + /* -------------------------------------------------------------- */ + /* solve the block system */ + /* -------------------------------------------------------------- */ + + if (nk == 1) + { +#ifdef COMPLEX + if (conj_solve) + { + CONJ (s, Udiag [k1]) ; + } + else +#endif + { + s = Udiag [k1] ; + } + switch (nr) + { + + case 1: + DIV (X [k1], X [k1], s) ; + break ; + + case 2: + DIV (X [2*k1], X [2*k1], s) ; + DIV (X [2*k1 + 1], X [2*k1 + 1], s) ; + break ; + + case 3: + DIV (X [3*k1], X [3*k1], s) ; + DIV (X [3*k1 + 1], X [3*k1 + 1], s) ; + DIV (X [3*k1 + 2], X [3*k1 + 2], s) ; + break ; + + case 4: + DIV (X [4*k1], X [4*k1], s) ; + DIV (X [4*k1 + 1], X [4*k1 + 1], s) ; + DIV (X [4*k1 + 2], X [4*k1 + 2], s) ; + DIV (X [4*k1 + 3], X [4*k1 + 3], s) ; + break ; + + } + } + else + { + KLU_utsolve (nk, Uip + k1, Ulen + k1, LUbx [block], + Udiag + k1, nr, +#ifdef COMPLEX + conj_solve, +#endif + X + nr*k1) ; + KLU_ltsolve (nk, Lip + k1, Llen + k1, LUbx [block], nr, +#ifdef COMPLEX + conj_solve, +#endif + X + nr*k1) ; + } + } + + /* ------------------------------------------------------------------ */ + /* scale and permute the result, Bz = P'(R\X) */ + /* ------------------------------------------------------------------ */ + + if (Rs == NULL) + { + + /* no scaling */ + switch (nr) + { + + case 1: + + for (k = 0 ; k < n ; k++) + { + Bz [Pnum [k]] = X [k] ; + } + break ; + + case 2: + + for (k = 0 ; k < n ; k++) + { + i = Pnum [k] ; + Bz [i ] = X [2*k ] ; + Bz [i + d ] = X [2*k + 1] ; + } + break ; + + case 3: + + for (k = 0 ; k < n ; k++) + { + i = Pnum [k] ; + Bz [i ] = X [3*k ] ; + Bz [i + d ] = X [3*k + 1] ; + Bz [i + d*2] = X [3*k + 2] ; + } + break ; + + case 4: + + for (k = 0 ; k < n ; k++) + { + i = Pnum [k] ; + Bz [i ] = X [4*k ] ; + Bz [i + d ] = X [4*k + 1] ; + Bz [i + d*2] = X [4*k + 2] ; + Bz [i + d*3] = X [4*k + 3] ; + } + break ; + } + + } + else + { + + switch (nr) + { + + case 1: + + for (k = 0 ; k < n ; k++) + { + SCALE_DIV_ASSIGN (Bz [Pnum [k]], X [k], Rs [k]) ; + } + break ; + + case 2: + + for (k = 0 ; k < n ; k++) + { + i = Pnum [k] ; + rs = Rs [k] ; + SCALE_DIV_ASSIGN (Bz [i], X [2*k], rs) ; + SCALE_DIV_ASSIGN (Bz [i + d], X [2*k + 1], rs) ; + } + break ; + + case 3: + + for (k = 0 ; k < n ; k++) + { + i = Pnum [k] ; + rs = Rs [k] ; + SCALE_DIV_ASSIGN (Bz [i], X [3*k], rs) ; + SCALE_DIV_ASSIGN (Bz [i + d], X [3*k + 1], rs) ; + SCALE_DIV_ASSIGN (Bz [i + d*2], X [3*k + 2], rs) ; + } + break ; + + case 4: + + for (k = 0 ; k < n ; k++) + { + i = Pnum [k] ; + rs = Rs [k] ; + SCALE_DIV_ASSIGN (Bz [i], X [4*k], rs) ; + SCALE_DIV_ASSIGN (Bz [i + d], X [4*k + 1], rs) ; + SCALE_DIV_ASSIGN (Bz [i + d*2], X [4*k + 2], rs) ; + SCALE_DIV_ASSIGN (Bz [i + d*3], X [4*k + 3], rs) ; + } + break ; + } + } + + /* ------------------------------------------------------------------ */ + /* go to the next chunk of B */ + /* ------------------------------------------------------------------ */ + + Bz += d*4 ; + } + return (TRUE) ; +} diff --git a/src/maths/KLU/klu_version.h b/src/maths/KLU/klu_version.h new file mode 100644 index 000000000..9e8812107 --- /dev/null +++ b/src/maths/KLU/klu_version.h @@ -0,0 +1,694 @@ +#ifndef _KLU_VERSION_H +#define _KLU_VERSION_H + +#ifdef DLONG +#define Int UF_long +#define Int_id UF_long_id +#define Int_MAX UF_long_max +#else +#define Int int +#define Int_id "%d" +#define Int_MAX INT_MAX +#endif + +#define NPRINT + +#define BYTES(type,n) (sizeof (type) * (n)) +#define CEILING(b,u) (((b)+(u)-1) / (u)) +#define UNITS(type,n) (CEILING (BYTES (type,n), sizeof (Unit))) +#define DUNITS(type,n) (ceil (BYTES (type, (double) n) / sizeof (Unit))) + +#define GET_I_POINTER(LU, Xip, Xi, k) \ +{ \ + Xi = (Int *) (LU + Xip [k]) ; \ +} + +#define GET_X_POINTER(LU, Xip, Xlen, Xx, k) \ +{ \ + Xx = (Entry *) (LU + Xip [k] + UNITS (Int, Xlen [k])) ; \ +} + +#define GET_POINTER(LU, Xip, Xlen, Xi, Xx, k, xlen) \ +{ \ + Unit *xp = LU + Xip [k] ; \ + xlen = Xlen [k] ; \ + Xi = (Int *) xp ; \ + Xx = (Entry *) (xp + UNITS (Int, xlen)) ; \ +} + +/* function names */ +#ifdef COMPLEX + +#ifdef DLONG + +#define KLU_scale klu_zl_scale +#define KLU_solve klu_zl_solve +#define KLU_tsolve klu_zl_tsolve +#define KLU_free_numeric klu_zl_free_numeric +#define KLU_factor klu_zl_factor +#define KLU_refactor klu_zl_refactor +#define KLU_kernel_factor klu_zl_kernel_factor +#define KLU_lsolve klu_zl_lsolve +#define KLU_ltsolve klu_zl_ltsolve +#define KLU_usolve klu_zl_usolve +#define KLU_utsolve klu_zl_utsolve +#define KLU_kernel klu_zl_kernel +#define KLU_valid klu_zl_valid +#define KLU_valid_LU klu_zl_valid_LU +#define KLU_sort klu_zl_sort +#define KLU_rgrowth klu_zl_rgrowth +#define KLU_rcond klu_zl_rcond +#define KLU_extract klu_zl_extract +#define KLU_condest klu_zl_condest +#define KLU_flops klu_zl_flops + +#else + +#define KLU_scale klu_z_scale +#define KLU_solve klu_z_solve +#define KLU_tsolve klu_z_tsolve +#define KLU_free_numeric klu_z_free_numeric +#define KLU_factor klu_z_factor +#define KLU_refactor klu_z_refactor +#define KLU_kernel_factor klu_z_kernel_factor +#define KLU_lsolve klu_z_lsolve +#define KLU_ltsolve klu_z_ltsolve +#define KLU_usolve klu_z_usolve +#define KLU_utsolve klu_z_utsolve +#define KLU_kernel klu_z_kernel +#define KLU_valid klu_z_valid +#define KLU_valid_LU klu_z_valid_LU +#define KLU_sort klu_z_sort +#define KLU_rgrowth klu_z_rgrowth +#define KLU_rcond klu_z_rcond +#define KLU_extract klu_z_extract +#define KLU_condest klu_z_condest +#define KLU_flops klu_z_flops + +#endif + +#else + +#ifdef DLONG + +#define KLU_scale klu_l_scale +#define KLU_solve klu_l_solve +#define KLU_tsolve klu_l_tsolve +#define KLU_free_numeric klu_l_free_numeric +#define KLU_factor klu_l_factor +#define KLU_refactor klu_l_refactor +#define KLU_kernel_factor klu_l_kernel_factor +#define KLU_lsolve klu_l_lsolve +#define KLU_ltsolve klu_l_ltsolve +#define KLU_usolve klu_l_usolve +#define KLU_utsolve klu_l_utsolve +#define KLU_kernel klu_l_kernel +#define KLU_valid klu_l_valid +#define KLU_valid_LU klu_l_valid_LU +#define KLU_sort klu_l_sort +#define KLU_rgrowth klu_l_rgrowth +#define KLU_rcond klu_l_rcond +#define KLU_extract klu_l_extract +#define KLU_condest klu_l_condest +#define KLU_flops klu_l_flops + +#else + +#define KLU_scale klu_scale +#define KLU_solve klu_solve +#define KLU_tsolve klu_tsolve +#define KLU_free_numeric klu_free_numeric +#define KLU_factor klu_factor +#define KLU_refactor klu_refactor +#define KLU_kernel_factor klu_kernel_factor +#define KLU_lsolve klu_lsolve +#define KLU_ltsolve klu_ltsolve +#define KLU_usolve klu_usolve +#define KLU_utsolve klu_utsolve +#define KLU_kernel klu_kernel +#define KLU_valid klu_valid +#define KLU_valid_LU klu_valid_LU +#define KLU_sort klu_sort +#define KLU_rgrowth klu_rgrowth +#define KLU_rcond klu_rcond +#define KLU_extract klu_extract +#define KLU_condest klu_condest +#define KLU_flops klu_flops + +#endif + +#endif + + +#ifdef DLONG + +#define KLU_analyze klu_l_analyze +#define KLU_analyze_given klu_l_analyze_given +#define KLU_alloc_symbolic klu_l_alloc_symbolic +#define KLU_free_symbolic klu_l_free_symbolic +#define KLU_defaults klu_l_defaults +#define KLU_free klu_l_free +#define KLU_malloc klu_l_malloc +#define KLU_realloc klu_l_realloc +#define KLU_add_size_t klu_l_add_size_t +#define KLU_mult_size_t klu_l_mult_size_t + +#define KLU_symbolic klu_l_symbolic +#define KLU_numeric klu_l_numeric +#define KLU_common klu_l_common + +#define BTF_order btf_l_order +#define BTF_strongcomp btf_l_strongcomp + +#define AMD_order amd_l_order +#define COLAMD colamd_l +#define COLAMD_recommended colamd_l_recommended + +#else + +#define KLU_analyze klu_analyze +#define KLU_analyze_given klu_analyze_given +#define KLU_alloc_symbolic klu_alloc_symbolic +#define KLU_free_symbolic klu_free_symbolic +#define KLU_defaults klu_defaults +#define KLU_free klu_free +#define KLU_malloc klu_malloc +#define KLU_realloc klu_realloc +#define KLU_add_size_t klu_add_size_t +#define KLU_mult_size_t klu_mult_size_t + +#define KLU_symbolic klu_symbolic +#define KLU_numeric klu_numeric +#define KLU_common klu_common + +#define BTF_order btf_order +#define BTF_strongcomp btf_strongcomp + +#define AMD_order amd_order +#define COLAMD colamd +#define COLAMD_recommended colamd_recommended + +#endif + + +/* -------------------------------------------------------------------------- */ +/* Numerical relop macros for correctly handling the NaN case */ +/* -------------------------------------------------------------------------- */ + +/* +SCALAR_IS_NAN(x): + True if x is NaN. False otherwise. The commonly-existing isnan(x) + function could be used, but it's not in Kernighan & Ritchie 2nd edition + (ANSI C). It may appear in , but I'm not certain about + portability. The expression x != x is true if and only if x is NaN, + according to the IEEE 754 floating-point standard. + +SCALAR_IS_ZERO(x): + True if x is zero. False if x is nonzero, NaN, or +/- Inf. + This is (x == 0) if the compiler is IEEE 754 compliant. + +SCALAR_IS_NONZERO(x): + True if x is nonzero, NaN, or +/- Inf. False if x zero. + This is (x != 0) if the compiler is IEEE 754 compliant. + +SCALAR_IS_LTZERO(x): + True if x is < zero or -Inf. False if x is >= 0, NaN, or +Inf. + This is (x < 0) if the compiler is IEEE 754 compliant. +*/ + +/* These all work properly, according to the IEEE 754 standard ... except on */ +/* a PC with windows. Works fine in Linux on the same PC... */ +#define SCALAR_IS_NAN(x) ((x) != (x)) +#define SCALAR_IS_ZERO(x) ((x) == 0.) +#define SCALAR_IS_NONZERO(x) ((x) != 0.) +#define SCALAR_IS_LTZERO(x) ((x) < 0.) + + +/* scalar absolute value macro. If x is NaN, the result is NaN: */ +#define SCALAR_ABS(x) ((SCALAR_IS_LTZERO (x)) ? -(x) : (x)) + +/* print a scalar (avoid printing "-0" for negative zero). */ +#ifdef NPRINT +#define PRINT_SCALAR(a) +#else +#define PRINT_SCALAR(a) \ +{ \ + if (SCALAR_IS_NONZERO (a)) \ + { \ + PRINTF ((" (%g)", (a))) ; \ + } \ + else \ + { \ + PRINTF ((" (0)")) ; \ + } \ +} +#endif + +/* -------------------------------------------------------------------------- */ +/* Real floating-point arithmetic */ +/* -------------------------------------------------------------------------- */ + +#ifndef COMPLEX + +typedef double Unit ; +#define Entry double + +#define SPLIT(s) (1) +#define REAL(c) (c) +#define IMAG(c) (0.) +#define ASSIGN(c,s1,s2,p,split) { (c) = (s1)[p] ; } +#define CLEAR(c) { (c) = 0. ; } +#define CLEAR_AND_INCREMENT(p) { *p++ = 0. ; } +#define IS_NAN(a) SCALAR_IS_NAN (a) +#define IS_ZERO(a) SCALAR_IS_ZERO (a) +#define IS_NONZERO(a) SCALAR_IS_NONZERO (a) +#define SCALE_DIV(c,s) { (c) /= (s) ; } +#define SCALE_DIV_ASSIGN(a,c,s) { a = c / s ; } +#define SCALE(c,s) { (c) *= (s) ; } +#define ASSEMBLE(c,a) { (c) += (a) ; } +#define ASSEMBLE_AND_INCREMENT(c,p) { (c) += *p++ ; } +#define DECREMENT(c,a) { (c) -= (a) ; } +#define MULT(c,a,b) { (c) = (a) * (b) ; } +#define MULT_CONJ(c,a,b) { (c) = (a) * (b) ; } +#define MULT_SUB(c,a,b) { (c) -= (a) * (b) ; } +#define MULT_SUB_CONJ(c,a,b) { (c) -= (a) * (b) ; } +#define DIV(c,a,b) { (c) = (a) / (b) ; } +#define RECIPROCAL(c) { (c) = 1.0 / (c) ; } +#define DIV_CONJ(c,a,b) { (c) = (a) / (b) ; } +#define APPROX_ABS(s,a) { (s) = SCALAR_ABS (a) ; } +#define ABS(s,a) { (s) = SCALAR_ABS (a) ; } +#define PRINT_ENTRY(a) PRINT_SCALAR (a) +#define CONJ(a,x) a = x + +/* for flop counts */ +#define MULTSUB_FLOPS 2. /* c -= a*b */ +#define DIV_FLOPS 1. /* c = a/b */ +#define ABS_FLOPS 0. /* c = abs (a) */ +#define ASSEMBLE_FLOPS 1. /* c += a */ +#define DECREMENT_FLOPS 1. /* c -= a */ +#define MULT_FLOPS 1. /* c = a*b */ +#define SCALE_FLOPS 1. /* c = a/s */ + +#else + +/* -------------------------------------------------------------------------- */ +/* Complex floating-point arithmetic */ +/* -------------------------------------------------------------------------- */ + +/* + Note: An alternative to this Double_Complex type would be to use a + struct { double r ; double i ; }. The problem with that method + (used by the Sun Performance Library, for example) is that ANSI C provides + no guarantee about the layout of a struct. It is possible that the sizeof + the struct above would be greater than 2 * sizeof (double). This would + mean that the complex BLAS could not be used. The method used here avoids + that possibility. ANSI C *does* guarantee that an array of structs has + the same size as n times the size of one struct. + + The ANSI C99 version of the C language includes a "double _Complex" type. + It should be possible in that case to do the following: + + #define Entry double _Complex + + and remove the Double_Complex struct. The macros, below, could then be + replaced with instrinsic operators. Note that the #define Real and + #define Imag should also be removed (they only appear in this file). + + For the MULT, MULT_SUB, MULT_SUB_CONJ, and MULT_CONJ macros, + the output argument c cannot be the same as any input argument. + +*/ + +typedef struct +{ + double component [2] ; /* real and imaginary parts */ + +} Double_Complex ; + +typedef Double_Complex Unit ; +#define Entry Double_Complex +#define Real component [0] +#define Imag component [1] + +/* for flop counts */ +#define MULTSUB_FLOPS 8. /* c -= a*b */ +#define DIV_FLOPS 9. /* c = a/b */ +#define ABS_FLOPS 6. /* c = abs (a), count sqrt as one flop */ +#define ASSEMBLE_FLOPS 2. /* c += a */ +#define DECREMENT_FLOPS 2. /* c -= a */ +#define MULT_FLOPS 6. /* c = a*b */ +#define SCALE_FLOPS 2. /* c = a/s or c = a*s */ + +/* -------------------------------------------------------------------------- */ + +/* real part of c */ +#define REAL(c) ((c).Real) + +/* -------------------------------------------------------------------------- */ + +/* imag part of c */ +#define IMAG(c) ((c).Imag) + +/* -------------------------------------------------------------------------- */ + +/* Return TRUE if a complex number is in split form, FALSE if in packed form */ +#define SPLIT(sz) ((sz) != (double *) NULL) + +/* c = (s1) + (s2)*i, if s2 is null, then X is in "packed" format (compatible + * with Entry and ANSI C99 double _Complex type). */ +/*#define ASSIGN(c,s1,s2,p,split) \ +{ \ + if (split) \ + { \ + (c).Real = (s1)[p] ; \ + (c).Imag = (s2)[p] ; \ + } \ + else \ + { \ + (c) = ((Entry *)(s1))[p] ; \ + } \ +}*/ + +/* -------------------------------------------------------------------------- */ +#define CONJ(a, x) \ +{ \ + a.Real = x.Real ; \ + a.Imag = -x.Imag ; \ +} + +/* c = 0 */ +#define CLEAR(c) \ +{ \ + (c).Real = 0. ; \ + (c).Imag = 0. ; \ +} + +/* -------------------------------------------------------------------------- */ + +/* *p++ = 0 */ +#define CLEAR_AND_INCREMENT(p) \ +{ \ + p->Real = 0. ; \ + p->Imag = 0. ; \ + p++ ; \ +} + +/* -------------------------------------------------------------------------- */ + +/* True if a == 0 */ +#define IS_ZERO(a) \ + (SCALAR_IS_ZERO ((a).Real) && SCALAR_IS_ZERO ((a).Imag)) + +/* -------------------------------------------------------------------------- */ + +/* True if a is NaN */ +#define IS_NAN(a) \ + (SCALAR_IS_NAN ((a).Real) || SCALAR_IS_NAN ((a).Imag)) + +/* -------------------------------------------------------------------------- */ + +/* True if a != 0 */ +#define IS_NONZERO(a) \ + (SCALAR_IS_NONZERO ((a).Real) || SCALAR_IS_NONZERO ((a).Imag)) + +/* -------------------------------------------------------------------------- */ + +/* a = c/s */ +#define SCALE_DIV_ASSIGN(a,c,s) \ +{ \ + a.Real = c.Real / s ; \ + a.Imag = c.Imag / s ; \ +} + +/* c /= s */ +#define SCALE_DIV(c,s) \ +{ \ + (c).Real /= (s) ; \ + (c).Imag /= (s) ; \ +} + +/* -------------------------------------------------------------------------- */ + +/* c *= s */ +#define SCALE(c,s) \ +{ \ + (c).Real *= (s) ; \ + (c).Imag *= (s) ; \ +} + +/* -------------------------------------------------------------------------- */ + +/* c += a */ +#define ASSEMBLE(c,a) \ +{ \ + (c).Real += (a).Real ; \ + (c).Imag += (a).Imag ; \ +} + +/* -------------------------------------------------------------------------- */ + +/* c += *p++ */ +#define ASSEMBLE_AND_INCREMENT(c,p) \ +{ \ + (c).Real += p->Real ; \ + (c).Imag += p->Imag ; \ + p++ ; \ +} + +/* -------------------------------------------------------------------------- */ + +/* c -= a */ +#define DECREMENT(c,a) \ +{ \ + (c).Real -= (a).Real ; \ + (c).Imag -= (a).Imag ; \ +} + +/* -------------------------------------------------------------------------- */ + +/* c = a*b, assert because c cannot be the same as a or b */ +#define MULT(c,a,b) \ +{ \ + ASSERT (&(c) != &(a) && &(c) != &(b)) ; \ + (c).Real = (a).Real * (b).Real - (a).Imag * (b).Imag ; \ + (c).Imag = (a).Imag * (b).Real + (a).Real * (b).Imag ; \ +} + +/* -------------------------------------------------------------------------- */ + +/* c = a*conjugate(b), assert because c cannot be the same as a or b */ +#define MULT_CONJ(c,a,b) \ +{ \ + ASSERT (&(c) != &(a) && &(c) != &(b)) ; \ + (c).Real = (a).Real * (b).Real + (a).Imag * (b).Imag ; \ + (c).Imag = (a).Imag * (b).Real - (a).Real * (b).Imag ; \ +} + +/* -------------------------------------------------------------------------- */ + +/* c -= a*b, assert because c cannot be the same as a or b */ +#define MULT_SUB(c,a,b) \ +{ \ + ASSERT (&(c) != &(a) && &(c) != &(b)) ; \ + (c).Real -= (a).Real * (b).Real - (a).Imag * (b).Imag ; \ + (c).Imag -= (a).Imag * (b).Real + (a).Real * (b).Imag ; \ +} + +/* -------------------------------------------------------------------------- */ + +/* c -= a*conjugate(b), assert because c cannot be the same as a or b */ +#define MULT_SUB_CONJ(c,a,b) \ +{ \ + ASSERT (&(c) != &(a) && &(c) != &(b)) ; \ + (c).Real -= (a).Real * (b).Real + (a).Imag * (b).Imag ; \ + (c).Imag -= (a).Imag * (b).Real - (a).Real * (b).Imag ; \ +} + +/* -------------------------------------------------------------------------- */ + +/* c = a/b, be careful to avoid underflow and overflow */ +#ifdef MATHWORKS +#define DIV(c,a,b) \ +{ \ + (void) utDivideComplex ((a).Real, (a).Imag, (b).Real, (b).Imag, \ + &((c).Real), &((c).Imag)) ; \ +} +#else +/* This uses ACM Algo 116, by R. L. Smith, 1962. */ +/* c can be the same variable as a or b. */ +/* Ignore NaN case for double relop br>=bi. */ +#define DIV(c,a,b) \ +{ \ + double r, den, ar, ai, br, bi ; \ + br = (b).Real ; \ + bi = (b).Imag ; \ + ar = (a).Real ; \ + ai = (a).Imag ; \ + if (SCALAR_ABS (br) >= SCALAR_ABS (bi)) \ + { \ + r = bi / br ; \ + den = br + r * bi ; \ + (c).Real = (ar + ai * r) / den ; \ + (c).Imag = (ai - ar * r) / den ; \ + } \ + else \ + { \ + r = br / bi ; \ + den = r * br + bi ; \ + (c).Real = (ar * r + ai) / den ; \ + (c).Imag = (ai * r - ar) / den ; \ + } \ +} +#endif + +/* -------------------------------------------------------------------------- */ + +/* c = 1/c, be careful to avoid underflow and overflow */ +/* Not used if MATHWORKS is defined. */ +/* This uses ACM Algo 116, by R. L. Smith, 1962. */ +/* Ignore NaN case for double relop cr>=ci. */ +#define RECIPROCAL(c) \ +{ \ + double r, den, cr, ci ; \ + cr = (c).Real ; \ + ci = (c).Imag ; \ + if (SCALAR_ABS (cr) >= SCALAR_ABS (ci)) \ + { \ + r = ci / cr ; \ + den = cr + r * ci ; \ + (c).Real = 1.0 / den ; \ + (c).Imag = - r / den ; \ + } \ + else \ + { \ + r = cr / ci ; \ + den = r * cr + ci ; \ + (c).Real = r / den ; \ + (c).Imag = - 1.0 / den ; \ + } \ +} + + +/* -------------------------------------------------------------------------- */ + +/* c = a/conjugate(b), be careful to avoid underflow and overflow */ +#ifdef MATHWORKS +#define DIV_CONJ(c,a,b) \ +{ \ + (void) utDivideComplex ((a).Real, (a).Imag, (b).Real, (-(b).Imag), \ + &((c).Real), &((c).Imag)) ; \ +} +#else +/* This uses ACM Algo 116, by R. L. Smith, 1962. */ +/* c can be the same variable as a or b. */ +/* Ignore NaN case for double relop br>=bi. */ +#define DIV_CONJ(c,a,b) \ +{ \ + double r, den, ar, ai, br, bi ; \ + br = (b).Real ; \ + bi = (b).Imag ; \ + ar = (a).Real ; \ + ai = (a).Imag ; \ + if (SCALAR_ABS (br) >= SCALAR_ABS (bi)) \ + { \ + r = (-bi) / br ; \ + den = br - r * bi ; \ + (c).Real = (ar + ai * r) / den ; \ + (c).Imag = (ai - ar * r) / den ; \ + } \ + else \ + { \ + r = br / (-bi) ; \ + den = r * br - bi; \ + (c).Real = (ar * r + ai) / den ; \ + (c).Imag = (ai * r - ar) / den ; \ + } \ +} +#endif + +/* -------------------------------------------------------------------------- */ + +/* approximate absolute value, s = |r|+|i| */ +#define APPROX_ABS(s,a) \ +{ \ + (s) = SCALAR_ABS ((a).Real) + SCALAR_ABS ((a).Imag) ; \ +} + +/* -------------------------------------------------------------------------- */ + +/* exact absolute value, s = sqrt (a.real^2 + amag^2) */ +#ifdef MATHWORKS +#define ABS(s,a) \ +{ \ + (s) = utFdlibm_hypot ((a).Real, (a).Imag) ; \ +} +#else +/* Ignore NaN case for the double relops ar>=ai and ar+ai==ar. */ +#define ABS(s,a) \ +{ \ + double r, ar, ai ; \ + ar = SCALAR_ABS ((a).Real) ; \ + ai = SCALAR_ABS ((a).Imag) ; \ + if (ar >= ai) \ + { \ + if (ar + ai == ar) \ + { \ + (s) = ar ; \ + } \ + else \ + { \ + r = ai / ar ; \ + (s) = ar * sqrt (1.0 + r*r) ; \ + } \ + } \ + else \ + { \ + if (ai + ar == ai) \ + { \ + (s) = ai ; \ + } \ + else \ + { \ + r = ar / ai ; \ + (s) = ai * sqrt (1.0 + r*r) ; \ + } \ + } \ +} +#endif + +/* -------------------------------------------------------------------------- */ + +/* print an entry (avoid printing "-0" for negative zero). */ +#ifdef NPRINT +#define PRINT_ENTRY(a) +#else +#define PRINT_ENTRY(a) \ +{ \ + if (SCALAR_IS_NONZERO ((a).Real)) \ + { \ + PRINTF ((" (%g", (a).Real)) ; \ + } \ + else \ + { \ + PRINTF ((" (0")) ; \ + } \ + if (SCALAR_IS_LTZERO ((a).Imag)) \ + { \ + PRINTF ((" - %gi)", -(a).Imag)) ; \ + } \ + else if (SCALAR_IS_ZERO ((a).Imag)) \ + { \ + PRINTF ((" + 0i)")) ; \ + } \ + else \ + { \ + PRINTF ((" + %gi)", (a).Imag)) ; \ + } \ +} +#endif + +/* -------------------------------------------------------------------------- */ + +#endif /* #ifndef COMPLEX */ + +#endif