incorporate KLU source files from SuiteSparse-3.7.0.tar.gz
This commit is contained in:
parent
f65715242d
commit
691f4b14f5
|
|
@ -0,0 +1,151 @@
|
|||
/* ========================================================================== */
|
||||
/* === UFconfig.h =========================================================== */
|
||||
/* ========================================================================== */
|
||||
|
||||
/* Configuration file for SuiteSparse: a Suite of Sparse matrix packages
|
||||
* (AMD, COLAMD, CCOLAMD, CAMD, CHOLMOD, UMFPACK, CXSparse, and others).
|
||||
*
|
||||
* UFconfig.h provides the definition of the long integer. On most systems,
|
||||
* a C program can be compiled in LP64 mode, in which long's and pointers are
|
||||
* both 64-bits, and int's are 32-bits. Windows 64, however, uses the LLP64
|
||||
* model, in which int's and long's are 32-bits, and long long's and pointers
|
||||
* are 64-bits.
|
||||
*
|
||||
* SuiteSparse packages that include long integer versions are
|
||||
* intended for the LP64 mode. However, as a workaround for Windows 64
|
||||
* (and perhaps other systems), the long integer can be redefined.
|
||||
*
|
||||
* If _WIN64 is defined, then the __int64 type is used instead of long.
|
||||
*
|
||||
* The long integer can also be defined at compile time. For example, this
|
||||
* could be added to UFconfig.mk:
|
||||
*
|
||||
* CFLAGS = -O -D'UF_long=long long' -D'UF_long_max=9223372036854775801' \
|
||||
* -D'UF_long_idd="lld"'
|
||||
*
|
||||
* This file defines UF_long as either long (on all but _WIN64) or
|
||||
* __int64 on Windows 64. The intent is that a UF_long is always a 64-bit
|
||||
* integer in a 64-bit code. ptrdiff_t might be a better choice than long;
|
||||
* it is always the same size as a pointer.
|
||||
*
|
||||
* This file also defines the SUITESPARSE_VERSION and related definitions.
|
||||
*
|
||||
* Copyright (c) 2007, University of Florida. No licensing restrictions
|
||||
* apply to this file or to the UFconfig directory. Author: Timothy A. Davis.
|
||||
*/
|
||||
|
||||
#ifndef _UFCONFIG_H
|
||||
#define _UFCONFIG_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <limits.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
/* ========================================================================== */
|
||||
/* === UF_long ============================================================== */
|
||||
/* ========================================================================== */
|
||||
|
||||
#ifndef UF_long
|
||||
|
||||
#ifdef _WIN64
|
||||
|
||||
#define UF_long __int64
|
||||
#define UF_long_max _I64_MAX
|
||||
#define UF_long_idd "I64d"
|
||||
|
||||
#else
|
||||
|
||||
#define UF_long long
|
||||
#define UF_long_max LONG_MAX
|
||||
#define UF_long_idd "ld"
|
||||
|
||||
#endif
|
||||
#define UF_long_id "%" UF_long_idd
|
||||
#endif
|
||||
|
||||
/* ========================================================================== */
|
||||
/* === UFconfig parameters and functions ==================================== */
|
||||
/* ========================================================================== */
|
||||
|
||||
/* SuiteSparse-wide parameters will be placed in this struct. So far, they
|
||||
are only used by RBio. */
|
||||
|
||||
typedef struct UFconfig_struct
|
||||
{
|
||||
void *(*malloc_memory) (size_t) ; /* pointer to malloc */
|
||||
void *(*realloc_memory) (void *, size_t) ; /* pointer to realloc */
|
||||
void (*free_memory) (void *) ; /* pointer to free */
|
||||
void *(*calloc_memory) (size_t, size_t) ; /* pointer to calloc */
|
||||
|
||||
} UFconfig ;
|
||||
|
||||
void *UFmalloc /* pointer to allocated block of memory */
|
||||
(
|
||||
size_t nitems, /* number of items to malloc (>=1 is enforced) */
|
||||
size_t size_of_item, /* sizeof each item */
|
||||
int *ok, /* TRUE if successful, FALSE otherwise */
|
||||
UFconfig *config /* SuiteSparse-wide configuration */
|
||||
) ;
|
||||
|
||||
void *UFfree /* always returns NULL */
|
||||
(
|
||||
void *p, /* block to free */
|
||||
UFconfig *config /* SuiteSparse-wide configuration */
|
||||
) ;
|
||||
|
||||
|
||||
/* ========================================================================== */
|
||||
/* === SuiteSparse version ================================================== */
|
||||
/* ========================================================================== */
|
||||
|
||||
/* SuiteSparse is not a package itself, but a collection of packages, some of
|
||||
* which must be used together (UMFPACK requires AMD, CHOLMOD requires AMD,
|
||||
* COLAMD, CAMD, and CCOLAMD, etc). A version number is provided here for the
|
||||
* collection itself. The versions of packages within each version of
|
||||
* SuiteSparse are meant to work together. Combining one packge from one
|
||||
* version of SuiteSparse, with another package from another version of
|
||||
* SuiteSparse, may or may not work.
|
||||
*
|
||||
* SuiteSparse Version 3.7.0 contains the following packages:
|
||||
*
|
||||
* UFconfig version 3.7.0 (version always the same as SuiteSparse)
|
||||
* AMD version 2.2.3
|
||||
* CAMD version 2.2.3
|
||||
* CCOLAMD version 2.7.4
|
||||
* COLAMD version 2.7.4
|
||||
* BTF version 1.1.3
|
||||
* CHOLMOD version 1.7.4
|
||||
* CSparse3 version 3.0.2
|
||||
* CSparse version 2.2.6
|
||||
* CXSparse version 2.2.6
|
||||
* KLU version 1.1.3
|
||||
* LDL version 2.0.4
|
||||
* RBio version 2.0.2
|
||||
* SPQR version 1.2.3 (also called SuiteSparseQR)
|
||||
* UFcollection version 1.6.0
|
||||
* UMFPACK version 5.5.2
|
||||
* SSMULT version 2.0.3
|
||||
* spqr_rank version 1.0.0
|
||||
* MATLAB_Tools various packages & M-files. No specific version number.
|
||||
*
|
||||
* Other package dependencies:
|
||||
* BLAS required by CHOLMOD and UMFPACK
|
||||
* LAPACK required by CHOLMOD
|
||||
* METIS 4.0.1 required by CHOLMOD (optional) and KLU (optional)
|
||||
*/
|
||||
|
||||
#define SUITESPARSE_DATE "Dec 15, 2011"
|
||||
#define SUITESPARSE_VER_CODE(main,sub) ((main) * 1000 + (sub))
|
||||
#define SUITESPARSE_MAIN_VERSION 3
|
||||
#define SUITESPARSE_SUB_VERSION 7
|
||||
#define SUITESPARSE_SUBSUB_VERSION 0
|
||||
#define SUITESPARSE_VERSION \
|
||||
SUITESPARSE_VER_CODE(SUITESPARSE_MAIN_VERSION,SUITESPARSE_SUB_VERSION)
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
|
@ -0,0 +1,412 @@
|
|||
/* ========================================================================= */
|
||||
/* === AMD: approximate minimum degree ordering =========================== */
|
||||
/* ========================================================================= */
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
/* AMD Version 2.2, Copyright (c) 2007 by Timothy A. Davis, */
|
||||
/* Patrick R. Amestoy, and Iain S. Duff. See ../README.txt for License. */
|
||||
/* email: davis at cise.ufl.edu CISE Department, Univ. of Florida. */
|
||||
/* web: http://www.cise.ufl.edu/research/sparse/amd */
|
||||
/* ------------------------------------------------------------------------- */
|
||||
|
||||
/* AMD finds a symmetric ordering P of a matrix A so that the Cholesky
|
||||
* factorization of P*A*P' has fewer nonzeros and takes less work than the
|
||||
* Cholesky factorization of A. If A is not symmetric, then it performs its
|
||||
* ordering on the matrix A+A'. Two sets of user-callable routines are
|
||||
* provided, one for int integers and the other for UF_long integers.
|
||||
*
|
||||
* The method is based on the approximate minimum degree algorithm, discussed
|
||||
* in Amestoy, Davis, and Duff, "An approximate degree ordering algorithm",
|
||||
* SIAM Journal of Matrix Analysis and Applications, vol. 17, no. 4, pp.
|
||||
* 886-905, 1996. This package can perform both the AMD ordering (with
|
||||
* aggressive absorption), and the AMDBAR ordering (without aggressive
|
||||
* absorption) discussed in the above paper. This package differs from the
|
||||
* Fortran codes discussed in the paper:
|
||||
*
|
||||
* (1) it can ignore "dense" rows and columns, leading to faster run times
|
||||
* (2) it computes the ordering of A+A' if A is not symmetric
|
||||
* (3) it is followed by a depth-first post-ordering of the assembly tree
|
||||
* (or supernodal elimination tree)
|
||||
*
|
||||
* For historical reasons, the Fortran versions, amd.f and amdbar.f, have
|
||||
* been left (nearly) unchanged. They compute the identical ordering as
|
||||
* described in the above paper.
|
||||
*/
|
||||
|
||||
#ifndef AMD_H
|
||||
#define AMD_H
|
||||
|
||||
/* make it easy for C++ programs to include AMD */
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* get the definition of size_t: */
|
||||
#include <stddef.h>
|
||||
|
||||
/* define UF_long */
|
||||
#include "UFconfig.h"
|
||||
|
||||
int amd_order /* returns AMD_OK, AMD_OK_BUT_JUMBLED,
|
||||
* AMD_INVALID, or AMD_OUT_OF_MEMORY */
|
||||
(
|
||||
int n, /* A is n-by-n. n must be >= 0. */
|
||||
const int Ap [ ], /* column pointers for A, of size n+1 */
|
||||
const int Ai [ ], /* row indices of A, of size nz = Ap [n] */
|
||||
int P [ ], /* output permutation, of size n */
|
||||
double Control [ ], /* input Control settings, of size AMD_CONTROL */
|
||||
double Info [ ] /* output Info statistics, of size AMD_INFO */
|
||||
) ;
|
||||
|
||||
UF_long amd_l_order /* see above for description of arguments */
|
||||
(
|
||||
UF_long n,
|
||||
const UF_long Ap [ ],
|
||||
const UF_long Ai [ ],
|
||||
UF_long P [ ],
|
||||
double Control [ ],
|
||||
double Info [ ]
|
||||
) ;
|
||||
|
||||
/* Input arguments (not modified):
|
||||
*
|
||||
* n: the matrix A is n-by-n.
|
||||
* Ap: an int/UF_long array of size n+1, containing column pointers of A.
|
||||
* Ai: an int/UF_long array of size nz, containing the row indices of A,
|
||||
* where nz = Ap [n].
|
||||
* Control: a double array of size AMD_CONTROL, containing control
|
||||
* parameters. Defaults are used if Control is NULL.
|
||||
*
|
||||
* Output arguments (not defined on input):
|
||||
*
|
||||
* P: an int/UF_long array of size n, containing the output permutation. If
|
||||
* row i is the kth pivot row, then P [k] = i. In MATLAB notation,
|
||||
* the reordered matrix is A (P,P).
|
||||
* Info: a double array of size AMD_INFO, containing statistical
|
||||
* information. Ignored if Info is NULL.
|
||||
*
|
||||
* On input, the matrix A is stored in column-oriented form. The row indices
|
||||
* of nonzero entries in column j are stored in Ai [Ap [j] ... Ap [j+1]-1].
|
||||
*
|
||||
* If the row indices appear in ascending order in each column, and there
|
||||
* are no duplicate entries, then amd_order is slightly more efficient in
|
||||
* terms of time and memory usage. If this condition does not hold, a copy
|
||||
* of the matrix is created (where these conditions do hold), and the copy is
|
||||
* ordered. This feature is new to v2.0 (v1.2 and earlier required this
|
||||
* condition to hold for the input matrix).
|
||||
*
|
||||
* Row indices must be in the range 0 to
|
||||
* n-1. Ap [0] must be zero, and thus nz = Ap [n] is the number of nonzeros
|
||||
* in A. The array Ap is of size n+1, and the array Ai is of size nz = Ap [n].
|
||||
* The matrix does not need to be symmetric, and the diagonal does not need to
|
||||
* be present (if diagonal entries are present, they are ignored except for
|
||||
* the output statistic Info [AMD_NZDIAG]). The arrays Ai and Ap are not
|
||||
* modified. This form of the Ap and Ai arrays to represent the nonzero
|
||||
* pattern of the matrix A is the same as that used internally by MATLAB.
|
||||
* If you wish to use a more flexible input structure, please see the
|
||||
* umfpack_*_triplet_to_col routines in the UMFPACK package, at
|
||||
* http://www.cise.ufl.edu/research/sparse/umfpack.
|
||||
*
|
||||
* Restrictions: n >= 0. Ap [0] = 0. Ap [j] <= Ap [j+1] for all j in the
|
||||
* range 0 to n-1. nz = Ap [n] >= 0. Ai [0..nz-1] must be in the range 0
|
||||
* to n-1. Finally, Ai, Ap, and P must not be NULL. If any of these
|
||||
* restrictions are not met, AMD returns AMD_INVALID.
|
||||
*
|
||||
* AMD returns:
|
||||
*
|
||||
* AMD_OK if the matrix is valid and sufficient memory can be allocated to
|
||||
* perform the ordering.
|
||||
*
|
||||
* AMD_OUT_OF_MEMORY if not enough memory can be allocated.
|
||||
*
|
||||
* AMD_INVALID if the input arguments n, Ap, Ai are invalid, or if P is
|
||||
* NULL.
|
||||
*
|
||||
* AMD_OK_BUT_JUMBLED if the matrix had unsorted columns, and/or duplicate
|
||||
* entries, but was otherwise valid.
|
||||
*
|
||||
* The AMD routine first forms the pattern of the matrix A+A', and then
|
||||
* computes a fill-reducing ordering, P. If P [k] = i, then row/column i of
|
||||
* the original is the kth pivotal row. In MATLAB notation, the permuted
|
||||
* matrix is A (P,P), except that 0-based indexing is used instead of the
|
||||
* 1-based indexing in MATLAB.
|
||||
*
|
||||
* The Control array is used to set various parameters for AMD. If a NULL
|
||||
* pointer is passed, default values are used. The Control array is not
|
||||
* modified.
|
||||
*
|
||||
* Control [AMD_DENSE]: controls the threshold for "dense" rows/columns.
|
||||
* A dense row/column in A+A' can cause AMD to spend a lot of time in
|
||||
* ordering the matrix. If Control [AMD_DENSE] >= 0, rows/columns
|
||||
* with more than Control [AMD_DENSE] * sqrt (n) entries are ignored
|
||||
* during the ordering, and placed last in the output order. The
|
||||
* default value of Control [AMD_DENSE] is 10. If negative, no
|
||||
* rows/columns are treated as "dense". Rows/columns with 16 or
|
||||
* fewer off-diagonal entries are never considered "dense".
|
||||
*
|
||||
* Control [AMD_AGGRESSIVE]: controls whether or not to use aggressive
|
||||
* absorption, in which a prior element is absorbed into the current
|
||||
* element if is a subset of the current element, even if it is not
|
||||
* adjacent to the current pivot element (refer to Amestoy, Davis,
|
||||
* & Duff, 1996, for more details). The default value is nonzero,
|
||||
* which means to perform aggressive absorption. This nearly always
|
||||
* leads to a better ordering (because the approximate degrees are
|
||||
* more accurate) and a lower execution time. There are cases where
|
||||
* it can lead to a slightly worse ordering, however. To turn it off,
|
||||
* set Control [AMD_AGGRESSIVE] to 0.
|
||||
*
|
||||
* Control [2..4] are not used in the current version, but may be used in
|
||||
* future versions.
|
||||
*
|
||||
* The Info array provides statistics about the ordering on output. If it is
|
||||
* not present, the statistics are not returned. This is not an error
|
||||
* condition.
|
||||
*
|
||||
* Info [AMD_STATUS]: the return value of AMD, either AMD_OK,
|
||||
* AMD_OK_BUT_JUMBLED, AMD_OUT_OF_MEMORY, or AMD_INVALID.
|
||||
*
|
||||
* Info [AMD_N]: n, the size of the input matrix
|
||||
*
|
||||
* Info [AMD_NZ]: the number of nonzeros in A, nz = Ap [n]
|
||||
*
|
||||
* Info [AMD_SYMMETRY]: the symmetry of the matrix A. It is the number
|
||||
* of "matched" off-diagonal entries divided by the total number of
|
||||
* off-diagonal entries. An entry A(i,j) is matched if A(j,i) is also
|
||||
* an entry, for any pair (i,j) for which i != j. In MATLAB notation,
|
||||
* S = spones (A) ;
|
||||
* B = tril (S, -1) + triu (S, 1) ;
|
||||
* symmetry = nnz (B & B') / nnz (B) ;
|
||||
*
|
||||
* Info [AMD_NZDIAG]: the number of entries on the diagonal of A.
|
||||
*
|
||||
* Info [AMD_NZ_A_PLUS_AT]: the number of nonzeros in A+A', excluding the
|
||||
* diagonal. If A is perfectly symmetric (Info [AMD_SYMMETRY] = 1)
|
||||
* with a fully nonzero diagonal, then Info [AMD_NZ_A_PLUS_AT] = nz-n
|
||||
* (the smallest possible value). If A is perfectly unsymmetric
|
||||
* (Info [AMD_SYMMETRY] = 0, for an upper triangular matrix, for
|
||||
* example) with no diagonal, then Info [AMD_NZ_A_PLUS_AT] = 2*nz
|
||||
* (the largest possible value).
|
||||
*
|
||||
* Info [AMD_NDENSE]: the number of "dense" rows/columns of A+A' that were
|
||||
* removed from A prior to ordering. These are placed last in the
|
||||
* output order P.
|
||||
*
|
||||
* Info [AMD_MEMORY]: the amount of memory used by AMD, in bytes. In the
|
||||
* current version, this is 1.2 * Info [AMD_NZ_A_PLUS_AT] + 9*n
|
||||
* times the size of an integer. This is at most 2.4nz + 9n. This
|
||||
* excludes the size of the input arguments Ai, Ap, and P, which have
|
||||
* a total size of nz + 2*n + 1 integers.
|
||||
*
|
||||
* Info [AMD_NCMPA]: the number of garbage collections performed.
|
||||
*
|
||||
* Info [AMD_LNZ]: the number of nonzeros in L (excluding the diagonal).
|
||||
* This is a slight upper bound because mass elimination is combined
|
||||
* with the approximate degree update. It is a rough upper bound if
|
||||
* there are many "dense" rows/columns. The rest of the statistics,
|
||||
* below, are also slight or rough upper bounds, for the same reasons.
|
||||
* The post-ordering of the assembly tree might also not exactly
|
||||
* correspond to a true elimination tree postordering.
|
||||
*
|
||||
* Info [AMD_NDIV]: the number of divide operations for a subsequent LDL'
|
||||
* or LU factorization of the permuted matrix A (P,P).
|
||||
*
|
||||
* Info [AMD_NMULTSUBS_LDL]: the number of multiply-subtract pairs for a
|
||||
* subsequent LDL' factorization of A (P,P).
|
||||
*
|
||||
* Info [AMD_NMULTSUBS_LU]: the number of multiply-subtract pairs for a
|
||||
* subsequent LU factorization of A (P,P), assuming that no numerical
|
||||
* pivoting is required.
|
||||
*
|
||||
* Info [AMD_DMAX]: the maximum number of nonzeros in any column of L,
|
||||
* including the diagonal.
|
||||
*
|
||||
* Info [14..19] are not used in the current version, but may be used in
|
||||
* future versions.
|
||||
*/
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
/* direct interface to AMD */
|
||||
/* ------------------------------------------------------------------------- */
|
||||
|
||||
/* amd_2 is the primary AMD ordering routine. It is not meant to be
|
||||
* user-callable because of its restrictive inputs and because it destroys
|
||||
* the user's input matrix. It does not check its inputs for errors, either.
|
||||
* However, if you can work with these restrictions it can be faster than
|
||||
* amd_order and use less memory (assuming that you can create your own copy
|
||||
* of the matrix for AMD to destroy). Refer to AMD/Source/amd_2.c for a
|
||||
* description of each parameter. */
|
||||
|
||||
void amd_2
|
||||
(
|
||||
int n,
|
||||
int Pe [ ],
|
||||
int Iw [ ],
|
||||
int Len [ ],
|
||||
int iwlen,
|
||||
int pfree,
|
||||
int Nv [ ],
|
||||
int Next [ ],
|
||||
int Last [ ],
|
||||
int Head [ ],
|
||||
int Elen [ ],
|
||||
int Degree [ ],
|
||||
int W [ ],
|
||||
double Control [ ],
|
||||
double Info [ ]
|
||||
) ;
|
||||
|
||||
void amd_l2
|
||||
(
|
||||
UF_long n,
|
||||
UF_long Pe [ ],
|
||||
UF_long Iw [ ],
|
||||
UF_long Len [ ],
|
||||
UF_long iwlen,
|
||||
UF_long pfree,
|
||||
UF_long Nv [ ],
|
||||
UF_long Next [ ],
|
||||
UF_long Last [ ],
|
||||
UF_long Head [ ],
|
||||
UF_long Elen [ ],
|
||||
UF_long Degree [ ],
|
||||
UF_long W [ ],
|
||||
double Control [ ],
|
||||
double Info [ ]
|
||||
) ;
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
/* amd_valid */
|
||||
/* ------------------------------------------------------------------------- */
|
||||
|
||||
/* Returns AMD_OK or AMD_OK_BUT_JUMBLED if the matrix is valid as input to
|
||||
* amd_order; the latter is returned if the matrix has unsorted and/or
|
||||
* duplicate row indices in one or more columns. Returns AMD_INVALID if the
|
||||
* matrix cannot be passed to amd_order. For amd_order, the matrix must also
|
||||
* be square. The first two arguments are the number of rows and the number
|
||||
* of columns of the matrix. For its use in AMD, these must both equal n.
|
||||
*
|
||||
* NOTE: this routine returned TRUE/FALSE in v1.2 and earlier.
|
||||
*/
|
||||
|
||||
int amd_valid
|
||||
(
|
||||
int n_row, /* # of rows */
|
||||
int n_col, /* # of columns */
|
||||
const int Ap [ ], /* column pointers, of size n_col+1 */
|
||||
const int Ai [ ] /* row indices, of size Ap [n_col] */
|
||||
) ;
|
||||
|
||||
UF_long amd_l_valid
|
||||
(
|
||||
UF_long n_row,
|
||||
UF_long n_col,
|
||||
const UF_long Ap [ ],
|
||||
const UF_long Ai [ ]
|
||||
) ;
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
/* AMD memory manager and printf routines */
|
||||
/* ------------------------------------------------------------------------- */
|
||||
|
||||
/* The user can redefine these to change the malloc, free, and printf routines
|
||||
* that AMD uses. */
|
||||
|
||||
#ifndef EXTERN
|
||||
#define EXTERN extern
|
||||
#endif
|
||||
|
||||
EXTERN void *(*amd_malloc) (size_t) ; /* pointer to malloc */
|
||||
EXTERN void (*amd_free) (void *) ; /* pointer to free */
|
||||
EXTERN void *(*amd_realloc) (void *, size_t) ; /* pointer to realloc */
|
||||
EXTERN void *(*amd_calloc) (size_t, size_t) ; /* pointer to calloc */
|
||||
EXTERN int (*amd_printf) (const char *, ...) ; /* pointer to printf */
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
/* AMD Control and Info arrays */
|
||||
/* ------------------------------------------------------------------------- */
|
||||
|
||||
/* amd_defaults: sets the default control settings */
|
||||
void amd_defaults (double Control [ ]) ;
|
||||
void amd_l_defaults (double Control [ ]) ;
|
||||
|
||||
/* amd_control: prints the control settings */
|
||||
void amd_control (double Control [ ]) ;
|
||||
void amd_l_control (double Control [ ]) ;
|
||||
|
||||
/* amd_info: prints the statistics */
|
||||
void amd_info (double Info [ ]) ;
|
||||
void amd_l_info (double Info [ ]) ;
|
||||
|
||||
#define AMD_CONTROL 5 /* size of Control array */
|
||||
#define AMD_INFO 20 /* size of Info array */
|
||||
|
||||
/* contents of Control */
|
||||
#define AMD_DENSE 0 /* "dense" if degree > Control [0] * sqrt (n) */
|
||||
#define AMD_AGGRESSIVE 1 /* do aggressive absorption if Control [1] != 0 */
|
||||
|
||||
/* default Control settings */
|
||||
#define AMD_DEFAULT_DENSE 10.0 /* default "dense" degree 10*sqrt(n) */
|
||||
#define AMD_DEFAULT_AGGRESSIVE 1 /* do aggressive absorption by default */
|
||||
|
||||
/* contents of Info */
|
||||
#define AMD_STATUS 0 /* return value of amd_order and amd_l_order */
|
||||
#define AMD_N 1 /* A is n-by-n */
|
||||
#define AMD_NZ 2 /* number of nonzeros in A */
|
||||
#define AMD_SYMMETRY 3 /* symmetry of pattern (1 is sym., 0 is unsym.) */
|
||||
#define AMD_NZDIAG 4 /* # of entries on diagonal */
|
||||
#define AMD_NZ_A_PLUS_AT 5 /* nz in A+A' */
|
||||
#define AMD_NDENSE 6 /* number of "dense" rows/columns in A */
|
||||
#define AMD_MEMORY 7 /* amount of memory used by AMD */
|
||||
#define AMD_NCMPA 8 /* number of garbage collections in AMD */
|
||||
#define AMD_LNZ 9 /* approx. nz in L, excluding the diagonal */
|
||||
#define AMD_NDIV 10 /* number of fl. point divides for LU and LDL' */
|
||||
#define AMD_NMULTSUBS_LDL 11 /* number of fl. point (*,-) pairs for LDL' */
|
||||
#define AMD_NMULTSUBS_LU 12 /* number of fl. point (*,-) pairs for LU */
|
||||
#define AMD_DMAX 13 /* max nz. in any column of L, incl. diagonal */
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
/* return values of AMD */
|
||||
/* ------------------------------------------------------------------------- */
|
||||
|
||||
#define AMD_OK 0 /* success */
|
||||
#define AMD_OUT_OF_MEMORY -1 /* malloc failed, or problem too large */
|
||||
#define AMD_INVALID -2 /* input arguments are not valid */
|
||||
#define AMD_OK_BUT_JUMBLED 1 /* input matrix is OK for amd_order, but
|
||||
* columns were not sorted, and/or duplicate entries were present. AMD had
|
||||
* to do extra work before ordering the matrix. This is a warning, not an
|
||||
* error. */
|
||||
|
||||
/* ========================================================================== */
|
||||
/* === AMD version ========================================================== */
|
||||
/* ========================================================================== */
|
||||
|
||||
/* AMD Version 1.2 and later include the following definitions.
|
||||
* As an example, to test if the version you are using is 1.2 or later:
|
||||
*
|
||||
* #ifdef AMD_VERSION
|
||||
* if (AMD_VERSION >= AMD_VERSION_CODE (1,2)) ...
|
||||
* #endif
|
||||
*
|
||||
* This also works during compile-time:
|
||||
*
|
||||
* #if defined(AMD_VERSION) && (AMD_VERSION >= AMD_VERSION_CODE (1,2))
|
||||
* printf ("This is version 1.2 or later\n") ;
|
||||
* #else
|
||||
* printf ("This is an early version\n") ;
|
||||
* #endif
|
||||
*
|
||||
* Versions 1.1 and earlier of AMD do not include a #define'd version number.
|
||||
*/
|
||||
|
||||
#define AMD_DATE "Dec 7, 2011"
|
||||
#define AMD_VERSION_CODE(main,sub) ((main) * 1000 + (sub))
|
||||
#define AMD_MAIN_VERSION 2
|
||||
#define AMD_SUB_VERSION 2
|
||||
#define AMD_SUBSUB_VERSION 3
|
||||
#define AMD_VERSION AMD_VERSION_CODE(AMD_MAIN_VERSION,AMD_SUB_VERSION)
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,181 @@
|
|||
/* ========================================================================= */
|
||||
/* === AMD_1 =============================================================== */
|
||||
/* ========================================================================= */
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
/* AMD, Copyright (c) Timothy A. Davis, */
|
||||
/* Patrick R. Amestoy, and Iain S. Duff. See ../README.txt for License. */
|
||||
/* email: davis at cise.ufl.edu CISE Department, Univ. of Florida. */
|
||||
/* web: http://www.cise.ufl.edu/research/sparse/amd */
|
||||
/* ------------------------------------------------------------------------- */
|
||||
|
||||
/* AMD_1: Construct A+A' for a sparse matrix A and perform the AMD ordering.
|
||||
*
|
||||
* The n-by-n sparse matrix A can be unsymmetric. It is stored in MATLAB-style
|
||||
* compressed-column form, with sorted row indices in each column, and no
|
||||
* duplicate entries. Diagonal entries may be present, but they are ignored.
|
||||
* Row indices of column j of A are stored in Ai [Ap [j] ... Ap [j+1]-1].
|
||||
* Ap [0] must be zero, and nz = Ap [n] is the number of entries in A. The
|
||||
* size of the matrix, n, must be greater than or equal to zero.
|
||||
*
|
||||
* This routine must be preceded by a call to AMD_aat, which computes the
|
||||
* number of entries in each row/column in A+A', excluding the diagonal.
|
||||
* Len [j], on input, is the number of entries in row/column j of A+A'. This
|
||||
* routine constructs the matrix A+A' and then calls AMD_2. No error checking
|
||||
* is performed (this was done in AMD_valid).
|
||||
*/
|
||||
|
||||
#include "amd_internal.h"
|
||||
|
||||
GLOBAL void AMD_1
|
||||
(
|
||||
Int n, /* n > 0 */
|
||||
const Int Ap [ ], /* input of size n+1, not modified */
|
||||
const Int Ai [ ], /* input of size nz = Ap [n], not modified */
|
||||
Int P [ ], /* size n output permutation */
|
||||
Int Pinv [ ], /* size n output inverse permutation */
|
||||
Int Len [ ], /* size n input, undefined on output */
|
||||
Int slen, /* slen >= sum (Len [0..n-1]) + 7n,
|
||||
* ideally slen = 1.2 * sum (Len) + 8n */
|
||||
Int S [ ], /* size slen workspace */
|
||||
double Control [ ], /* input array of size AMD_CONTROL */
|
||||
double Info [ ] /* output array of size AMD_INFO */
|
||||
)
|
||||
{
|
||||
Int i, j, k, p, pfree, iwlen, pj, p1, p2, pj2, *Iw, *Pe, *Nv, *Head,
|
||||
*Elen, *Degree, *s, *W, *Sp, *Tp ;
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
/* construct the matrix for AMD_2 */
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
ASSERT (n > 0) ;
|
||||
|
||||
iwlen = slen - 6*n ;
|
||||
s = S ;
|
||||
Pe = s ; s += n ;
|
||||
Nv = s ; s += n ;
|
||||
Head = s ; s += n ;
|
||||
Elen = s ; s += n ;
|
||||
Degree = s ; s += n ;
|
||||
W = s ; s += n ;
|
||||
Iw = s ; s += iwlen ;
|
||||
|
||||
ASSERT (AMD_valid (n, n, Ap, Ai) == AMD_OK) ;
|
||||
|
||||
/* construct the pointers for A+A' */
|
||||
Sp = Nv ; /* use Nv and W as workspace for Sp and Tp [ */
|
||||
Tp = W ;
|
||||
pfree = 0 ;
|
||||
for (j = 0 ; j < n ; j++)
|
||||
{
|
||||
Pe [j] = pfree ;
|
||||
Sp [j] = pfree ;
|
||||
pfree += Len [j] ;
|
||||
}
|
||||
|
||||
/* Note that this restriction on iwlen is slightly more restrictive than
|
||||
* what is strictly required in AMD_2. AMD_2 can operate with no elbow
|
||||
* room at all, but it will be very slow. For better performance, at
|
||||
* least size-n elbow room is enforced. */
|
||||
ASSERT (iwlen >= pfree + n) ;
|
||||
|
||||
#ifndef NDEBUG
|
||||
for (p = 0 ; p < iwlen ; p++) Iw [p] = EMPTY ;
|
||||
#endif
|
||||
|
||||
for (k = 0 ; k < n ; k++)
|
||||
{
|
||||
AMD_DEBUG1 (("Construct row/column k= "ID" of A+A'\n", k)) ;
|
||||
p1 = Ap [k] ;
|
||||
p2 = Ap [k+1] ;
|
||||
|
||||
/* construct A+A' */
|
||||
for (p = p1 ; p < p2 ; )
|
||||
{
|
||||
/* scan the upper triangular part of A */
|
||||
j = Ai [p] ;
|
||||
ASSERT (j >= 0 && j < n) ;
|
||||
if (j < k)
|
||||
{
|
||||
/* entry A (j,k) in the strictly upper triangular part */
|
||||
ASSERT (Sp [j] < (j == n-1 ? pfree : Pe [j+1])) ;
|
||||
ASSERT (Sp [k] < (k == n-1 ? pfree : Pe [k+1])) ;
|
||||
Iw [Sp [j]++] = k ;
|
||||
Iw [Sp [k]++] = j ;
|
||||
p++ ;
|
||||
}
|
||||
else if (j == k)
|
||||
{
|
||||
/* skip the diagonal */
|
||||
p++ ;
|
||||
break ;
|
||||
}
|
||||
else /* j > k */
|
||||
{
|
||||
/* first entry below the diagonal */
|
||||
break ;
|
||||
}
|
||||
/* scan lower triangular part of A, in column j until reaching
|
||||
* row k. Start where last scan left off. */
|
||||
ASSERT (Ap [j] <= Tp [j] && Tp [j] <= Ap [j+1]) ;
|
||||
pj2 = Ap [j+1] ;
|
||||
for (pj = Tp [j] ; pj < pj2 ; )
|
||||
{
|
||||
i = Ai [pj] ;
|
||||
ASSERT (i >= 0 && i < n) ;
|
||||
if (i < k)
|
||||
{
|
||||
/* A (i,j) is only in the lower part, not in upper */
|
||||
ASSERT (Sp [i] < (i == n-1 ? pfree : Pe [i+1])) ;
|
||||
ASSERT (Sp [j] < (j == n-1 ? pfree : Pe [j+1])) ;
|
||||
Iw [Sp [i]++] = j ;
|
||||
Iw [Sp [j]++] = i ;
|
||||
pj++ ;
|
||||
}
|
||||
else if (i == k)
|
||||
{
|
||||
/* entry A (k,j) in lower part and A (j,k) in upper */
|
||||
pj++ ;
|
||||
break ;
|
||||
}
|
||||
else /* i > k */
|
||||
{
|
||||
/* consider this entry later, when k advances to i */
|
||||
break ;
|
||||
}
|
||||
}
|
||||
Tp [j] = pj ;
|
||||
}
|
||||
Tp [k] = p ;
|
||||
}
|
||||
|
||||
/* clean up, for remaining mismatched entries */
|
||||
for (j = 0 ; j < n ; j++)
|
||||
{
|
||||
for (pj = Tp [j] ; pj < Ap [j+1] ; pj++)
|
||||
{
|
||||
i = Ai [pj] ;
|
||||
ASSERT (i >= 0 && i < n) ;
|
||||
/* A (i,j) is only in the lower part, not in upper */
|
||||
ASSERT (Sp [i] < (i == n-1 ? pfree : Pe [i+1])) ;
|
||||
ASSERT (Sp [j] < (j == n-1 ? pfree : Pe [j+1])) ;
|
||||
Iw [Sp [i]++] = j ;
|
||||
Iw [Sp [j]++] = i ;
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef NDEBUG
|
||||
for (j = 0 ; j < n-1 ; j++) ASSERT (Sp [j] == Pe [j+1]) ;
|
||||
ASSERT (Sp [n-1] == pfree) ;
|
||||
#endif
|
||||
|
||||
/* Tp and Sp no longer needed ] */
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
/* order the matrix */
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
AMD_2 (n, Pe, Iw, Len, iwlen, pfree,
|
||||
Nv, Pinv, P, Head, Elen, Degree, W, Control, Info) ;
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,185 @@
|
|||
/* ========================================================================= */
|
||||
/* === AMD_aat ============================================================= */
|
||||
/* ========================================================================= */
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
/* AMD, Copyright (c) Timothy A. Davis, */
|
||||
/* Patrick R. Amestoy, and Iain S. Duff. See ../README.txt for License. */
|
||||
/* email: davis at cise.ufl.edu CISE Department, Univ. of Florida. */
|
||||
/* web: http://www.cise.ufl.edu/research/sparse/amd */
|
||||
/* ------------------------------------------------------------------------- */
|
||||
|
||||
/* AMD_aat: compute the symmetry of the pattern of A, and count the number of
|
||||
* nonzeros each column of A+A' (excluding the diagonal). Assumes the input
|
||||
* matrix has no errors, with sorted columns and no duplicates
|
||||
* (AMD_valid (n, n, Ap, Ai) must be AMD_OK, but this condition is not
|
||||
* checked).
|
||||
*/
|
||||
|
||||
#include "amd_internal.h"
|
||||
|
||||
GLOBAL size_t AMD_aat /* returns nz in A+A' */
|
||||
(
|
||||
Int n,
|
||||
const Int Ap [ ],
|
||||
const Int Ai [ ],
|
||||
Int Len [ ], /* Len [j]: length of column j of A+A', excl diagonal*/
|
||||
Int Tp [ ], /* workspace of size n */
|
||||
double Info [ ]
|
||||
)
|
||||
{
|
||||
Int p1, p2, p, i, j, pj, pj2, k, nzdiag, nzboth, nz ;
|
||||
double sym ;
|
||||
size_t nzaat ;
|
||||
|
||||
#ifndef NDEBUG
|
||||
AMD_debug_init ("AMD AAT") ;
|
||||
for (k = 0 ; k < n ; k++) Tp [k] = EMPTY ;
|
||||
ASSERT (AMD_valid (n, n, Ap, Ai) == AMD_OK) ;
|
||||
#endif
|
||||
|
||||
if (Info != (double *) NULL)
|
||||
{
|
||||
/* clear the Info array, if it exists */
|
||||
for (i = 0 ; i < AMD_INFO ; i++)
|
||||
{
|
||||
Info [i] = EMPTY ;
|
||||
}
|
||||
Info [AMD_STATUS] = AMD_OK ;
|
||||
}
|
||||
|
||||
for (k = 0 ; k < n ; k++)
|
||||
{
|
||||
Len [k] = 0 ;
|
||||
}
|
||||
|
||||
nzdiag = 0 ;
|
||||
nzboth = 0 ;
|
||||
nz = Ap [n] ;
|
||||
|
||||
for (k = 0 ; k < n ; k++)
|
||||
{
|
||||
p1 = Ap [k] ;
|
||||
p2 = Ap [k+1] ;
|
||||
AMD_DEBUG2 (("\nAAT Column: "ID" p1: "ID" p2: "ID"\n", k, p1, p2)) ;
|
||||
|
||||
/* construct A+A' */
|
||||
for (p = p1 ; p < p2 ; )
|
||||
{
|
||||
/* scan the upper triangular part of A */
|
||||
j = Ai [p] ;
|
||||
if (j < k)
|
||||
{
|
||||
/* entry A (j,k) is in the strictly upper triangular part,
|
||||
* add both A (j,k) and A (k,j) to the matrix A+A' */
|
||||
Len [j]++ ;
|
||||
Len [k]++ ;
|
||||
AMD_DEBUG3 ((" upper ("ID","ID") ("ID","ID")\n", j,k, k,j));
|
||||
p++ ;
|
||||
}
|
||||
else if (j == k)
|
||||
{
|
||||
/* skip the diagonal */
|
||||
p++ ;
|
||||
nzdiag++ ;
|
||||
break ;
|
||||
}
|
||||
else /* j > k */
|
||||
{
|
||||
/* first entry below the diagonal */
|
||||
break ;
|
||||
}
|
||||
/* scan lower triangular part of A, in column j until reaching
|
||||
* row k. Start where last scan left off. */
|
||||
ASSERT (Tp [j] != EMPTY) ;
|
||||
ASSERT (Ap [j] <= Tp [j] && Tp [j] <= Ap [j+1]) ;
|
||||
pj2 = Ap [j+1] ;
|
||||
for (pj = Tp [j] ; pj < pj2 ; )
|
||||
{
|
||||
i = Ai [pj] ;
|
||||
if (i < k)
|
||||
{
|
||||
/* A (i,j) is only in the lower part, not in upper.
|
||||
* add both A (i,j) and A (j,i) to the matrix A+A' */
|
||||
Len [i]++ ;
|
||||
Len [j]++ ;
|
||||
AMD_DEBUG3 ((" lower ("ID","ID") ("ID","ID")\n",
|
||||
i,j, j,i)) ;
|
||||
pj++ ;
|
||||
}
|
||||
else if (i == k)
|
||||
{
|
||||
/* entry A (k,j) in lower part and A (j,k) in upper */
|
||||
pj++ ;
|
||||
nzboth++ ;
|
||||
break ;
|
||||
}
|
||||
else /* i > k */
|
||||
{
|
||||
/* consider this entry later, when k advances to i */
|
||||
break ;
|
||||
}
|
||||
}
|
||||
Tp [j] = pj ;
|
||||
}
|
||||
/* Tp [k] points to the entry just below the diagonal in column k */
|
||||
Tp [k] = p ;
|
||||
}
|
||||
|
||||
/* clean up, for remaining mismatched entries */
|
||||
for (j = 0 ; j < n ; j++)
|
||||
{
|
||||
for (pj = Tp [j] ; pj < Ap [j+1] ; pj++)
|
||||
{
|
||||
i = Ai [pj] ;
|
||||
/* A (i,j) is only in the lower part, not in upper.
|
||||
* add both A (i,j) and A (j,i) to the matrix A+A' */
|
||||
Len [i]++ ;
|
||||
Len [j]++ ;
|
||||
AMD_DEBUG3 ((" lower cleanup ("ID","ID") ("ID","ID")\n",
|
||||
i,j, j,i)) ;
|
||||
}
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
/* compute the symmetry of the nonzero pattern of A */
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
/* Given a matrix A, the symmetry of A is:
|
||||
* B = tril (spones (A), -1) + triu (spones (A), 1) ;
|
||||
* sym = nnz (B & B') / nnz (B) ;
|
||||
* or 1 if nnz (B) is zero.
|
||||
*/
|
||||
|
||||
if (nz == nzdiag)
|
||||
{
|
||||
sym = 1 ;
|
||||
}
|
||||
else
|
||||
{
|
||||
sym = (2 * (double) nzboth) / ((double) (nz - nzdiag)) ;
|
||||
}
|
||||
|
||||
nzaat = 0 ;
|
||||
for (k = 0 ; k < n ; k++)
|
||||
{
|
||||
nzaat += Len [k] ;
|
||||
}
|
||||
|
||||
AMD_DEBUG1 (("AMD nz in A+A', excluding diagonal (nzaat) = %g\n",
|
||||
(double) nzaat)) ;
|
||||
AMD_DEBUG1 ((" nzboth: "ID" nz: "ID" nzdiag: "ID" symmetry: %g\n",
|
||||
nzboth, nz, nzdiag, sym)) ;
|
||||
|
||||
if (Info != (double *) NULL)
|
||||
{
|
||||
Info [AMD_STATUS] = AMD_OK ;
|
||||
Info [AMD_N] = n ;
|
||||
Info [AMD_NZ] = nz ;
|
||||
Info [AMD_SYMMETRY] = sym ; /* symmetry of pattern of A */
|
||||
Info [AMD_NZDIAG] = nzdiag ; /* nonzeros on diagonal of A */
|
||||
Info [AMD_NZ_A_PLUS_AT] = nzaat ; /* nonzeros in A+A' */
|
||||
}
|
||||
|
||||
return (nzaat) ;
|
||||
}
|
||||
|
|
@ -0,0 +1,64 @@
|
|||
/* ========================================================================= */
|
||||
/* === AMD_control ========================================================= */
|
||||
/* ========================================================================= */
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
/* AMD, Copyright (c) Timothy A. Davis, */
|
||||
/* Patrick R. Amestoy, and Iain S. Duff. See ../README.txt for License. */
|
||||
/* email: davis at cise.ufl.edu CISE Department, Univ. of Florida. */
|
||||
/* web: http://www.cise.ufl.edu/research/sparse/amd */
|
||||
/* ------------------------------------------------------------------------- */
|
||||
|
||||
/* User-callable. Prints the control parameters for AMD. See amd.h
|
||||
* for details. If the Control array is not present, the defaults are
|
||||
* printed instead.
|
||||
*/
|
||||
|
||||
#include "amd_internal.h"
|
||||
|
||||
GLOBAL void AMD_control
|
||||
(
|
||||
double Control [ ]
|
||||
)
|
||||
{
|
||||
double alpha ;
|
||||
Int aggressive ;
|
||||
|
||||
if (Control != (double *) NULL)
|
||||
{
|
||||
alpha = Control [AMD_DENSE] ;
|
||||
aggressive = Control [AMD_AGGRESSIVE] != 0 ;
|
||||
}
|
||||
else
|
||||
{
|
||||
alpha = AMD_DEFAULT_DENSE ;
|
||||
aggressive = AMD_DEFAULT_AGGRESSIVE ;
|
||||
}
|
||||
|
||||
PRINTF (("\nAMD version %d.%d.%d, %s: approximate minimum degree ordering\n"
|
||||
" dense row parameter: %g\n", AMD_MAIN_VERSION, AMD_SUB_VERSION,
|
||||
AMD_SUBSUB_VERSION, AMD_DATE, alpha)) ;
|
||||
|
||||
if (alpha < 0)
|
||||
{
|
||||
PRINTF ((" no rows treated as dense\n")) ;
|
||||
}
|
||||
else
|
||||
{
|
||||
PRINTF ((
|
||||
" (rows with more than max (%g * sqrt (n), 16) entries are\n"
|
||||
" considered \"dense\", and placed last in output permutation)\n",
|
||||
alpha)) ;
|
||||
}
|
||||
|
||||
if (aggressive)
|
||||
{
|
||||
PRINTF ((" aggressive absorption: yes\n")) ;
|
||||
}
|
||||
else
|
||||
{
|
||||
PRINTF ((" aggressive absorption: no\n")) ;
|
||||
}
|
||||
|
||||
PRINTF ((" size of AMD integer: %d\n\n", sizeof (Int))) ;
|
||||
}
|
||||
|
|
@ -0,0 +1,38 @@
|
|||
/* ========================================================================= */
|
||||
/* === AMD_defaults ======================================================== */
|
||||
/* ========================================================================= */
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
/* AMD, Copyright (c) Timothy A. Davis, */
|
||||
/* Patrick R. Amestoy, and Iain S. Duff. See ../README.txt for License. */
|
||||
/* email: davis at cise.ufl.edu CISE Department, Univ. of Florida. */
|
||||
/* web: http://www.cise.ufl.edu/research/sparse/amd */
|
||||
/* ------------------------------------------------------------------------- */
|
||||
|
||||
/* User-callable. Sets default control parameters for AMD. See amd.h
|
||||
* for details.
|
||||
*/
|
||||
|
||||
#include "amd_internal.h"
|
||||
|
||||
/* ========================================================================= */
|
||||
/* === AMD defaults ======================================================== */
|
||||
/* ========================================================================= */
|
||||
|
||||
GLOBAL void AMD_defaults
|
||||
(
|
||||
double Control [ ]
|
||||
)
|
||||
{
|
||||
Int i ;
|
||||
|
||||
if (Control != (double *) NULL)
|
||||
{
|
||||
for (i = 0 ; i < AMD_CONTROL ; i++)
|
||||
{
|
||||
Control [i] = 0 ;
|
||||
}
|
||||
Control [AMD_DENSE] = AMD_DEFAULT_DENSE ;
|
||||
Control [AMD_AGGRESSIVE] = AMD_DEFAULT_AGGRESSIVE ;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,180 @@
|
|||
/* ========================================================================= */
|
||||
/* === AMD_dump ============================================================ */
|
||||
/* ========================================================================= */
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
/* AMD, Copyright (c) Timothy A. Davis, */
|
||||
/* Patrick R. Amestoy, and Iain S. Duff. See ../README.txt for License. */
|
||||
/* email: davis at cise.ufl.edu CISE Department, Univ. of Florida. */
|
||||
/* web: http://www.cise.ufl.edu/research/sparse/amd */
|
||||
/* ------------------------------------------------------------------------- */
|
||||
|
||||
/* Debugging routines for AMD. Not used if NDEBUG is not defined at compile-
|
||||
* time (the default). See comments in amd_internal.h on how to enable
|
||||
* debugging. Not user-callable.
|
||||
*/
|
||||
|
||||
#include "amd_internal.h"
|
||||
|
||||
#ifndef NDEBUG
|
||||
|
||||
/* This global variable is present only when debugging */
|
||||
GLOBAL Int AMD_debug = -999 ; /* default is no debug printing */
|
||||
|
||||
/* ========================================================================= */
|
||||
/* === AMD_debug_init ====================================================== */
|
||||
/* ========================================================================= */
|
||||
|
||||
/* Sets the debug print level, by reading the file debug.amd (if it exists) */
|
||||
|
||||
GLOBAL void AMD_debug_init ( char *s )
|
||||
{
|
||||
FILE *f ;
|
||||
f = fopen ("debug.amd", "r") ;
|
||||
if (f == (FILE *) NULL)
|
||||
{
|
||||
AMD_debug = -999 ;
|
||||
}
|
||||
else
|
||||
{
|
||||
fscanf (f, ID, &AMD_debug) ;
|
||||
fclose (f) ;
|
||||
}
|
||||
if (AMD_debug >= 0)
|
||||
{
|
||||
printf ("%s: AMD_debug_init, D= "ID"\n", s, AMD_debug) ;
|
||||
}
|
||||
}
|
||||
|
||||
/* ========================================================================= */
|
||||
/* === AMD_dump ============================================================ */
|
||||
/* ========================================================================= */
|
||||
|
||||
/* Dump AMD's data structure, except for the hash buckets. This routine
|
||||
* cannot be called when the hash buckets are non-empty.
|
||||
*/
|
||||
|
||||
GLOBAL void AMD_dump (
|
||||
Int n, /* A is n-by-n */
|
||||
Int Pe [ ], /* pe [0..n-1]: index in iw of start of row i */
|
||||
Int Iw [ ], /* workspace of size iwlen, iwlen [0..pfree-1]
|
||||
* holds the matrix on input */
|
||||
Int Len [ ], /* len [0..n-1]: length for row i */
|
||||
Int iwlen, /* length of iw */
|
||||
Int pfree, /* iw [pfree ... iwlen-1] is empty on input */
|
||||
Int Nv [ ], /* nv [0..n-1] */
|
||||
Int Next [ ], /* next [0..n-1] */
|
||||
Int Last [ ], /* last [0..n-1] */
|
||||
Int Head [ ], /* head [0..n-1] */
|
||||
Int Elen [ ], /* size n */
|
||||
Int Degree [ ], /* size n */
|
||||
Int W [ ], /* size n */
|
||||
Int nel
|
||||
)
|
||||
{
|
||||
Int i, pe, elen, nv, len, e, p, k, j, deg, w, cnt, ilast ;
|
||||
|
||||
if (AMD_debug < 0) return ;
|
||||
ASSERT (pfree <= iwlen) ;
|
||||
AMD_DEBUG3 (("\nAMD dump, pfree: "ID"\n", pfree)) ;
|
||||
for (i = 0 ; i < n ; i++)
|
||||
{
|
||||
pe = Pe [i] ;
|
||||
elen = Elen [i] ;
|
||||
nv = Nv [i] ;
|
||||
len = Len [i] ;
|
||||
w = W [i] ;
|
||||
|
||||
if (elen >= EMPTY)
|
||||
{
|
||||
if (nv == 0)
|
||||
{
|
||||
AMD_DEBUG3 (("\nI "ID": nonprincipal: ", i)) ;
|
||||
ASSERT (elen == EMPTY) ;
|
||||
if (pe == EMPTY)
|
||||
{
|
||||
AMD_DEBUG3 ((" dense node\n")) ;
|
||||
ASSERT (w == 1) ;
|
||||
}
|
||||
else
|
||||
{
|
||||
ASSERT (pe < EMPTY) ;
|
||||
AMD_DEBUG3 ((" i "ID" -> parent "ID"\n", i, FLIP (Pe[i])));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
AMD_DEBUG3 (("\nI "ID": active principal supervariable:\n",i));
|
||||
AMD_DEBUG3 ((" nv(i): "ID" Flag: %d\n", nv, (nv < 0))) ;
|
||||
ASSERT (elen >= 0) ;
|
||||
ASSERT (nv > 0 && pe >= 0) ;
|
||||
p = pe ;
|
||||
AMD_DEBUG3 ((" e/s: ")) ;
|
||||
if (elen == 0) AMD_DEBUG3 ((" : ")) ;
|
||||
ASSERT (pe + len <= pfree) ;
|
||||
for (k = 0 ; k < len ; k++)
|
||||
{
|
||||
j = Iw [p] ;
|
||||
AMD_DEBUG3 ((" "ID"", j)) ;
|
||||
ASSERT (j >= 0 && j < n) ;
|
||||
if (k == elen-1) AMD_DEBUG3 ((" : ")) ;
|
||||
p++ ;
|
||||
}
|
||||
AMD_DEBUG3 (("\n")) ;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
e = i ;
|
||||
if (w == 0)
|
||||
{
|
||||
AMD_DEBUG3 (("\nE "ID": absorbed element: w "ID"\n", e, w)) ;
|
||||
ASSERT (nv > 0 && pe < 0) ;
|
||||
AMD_DEBUG3 ((" e "ID" -> parent "ID"\n", e, FLIP (Pe [e]))) ;
|
||||
}
|
||||
else
|
||||
{
|
||||
AMD_DEBUG3 (("\nE "ID": unabsorbed element: w "ID"\n", e, w)) ;
|
||||
ASSERT (nv > 0 && pe >= 0) ;
|
||||
p = pe ;
|
||||
AMD_DEBUG3 ((" : ")) ;
|
||||
ASSERT (pe + len <= pfree) ;
|
||||
for (k = 0 ; k < len ; k++)
|
||||
{
|
||||
j = Iw [p] ;
|
||||
AMD_DEBUG3 ((" "ID"", j)) ;
|
||||
ASSERT (j >= 0 && j < n) ;
|
||||
p++ ;
|
||||
}
|
||||
AMD_DEBUG3 (("\n")) ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* this routine cannot be called when the hash buckets are non-empty */
|
||||
AMD_DEBUG3 (("\nDegree lists:\n")) ;
|
||||
if (nel >= 0)
|
||||
{
|
||||
cnt = 0 ;
|
||||
for (deg = 0 ; deg < n ; deg++)
|
||||
{
|
||||
if (Head [deg] == EMPTY) continue ;
|
||||
ilast = EMPTY ;
|
||||
AMD_DEBUG3 ((ID": \n", deg)) ;
|
||||
for (i = Head [deg] ; i != EMPTY ; i = Next [i])
|
||||
{
|
||||
AMD_DEBUG3 ((" "ID" : next "ID" last "ID" deg "ID"\n",
|
||||
i, Next [i], Last [i], Degree [i])) ;
|
||||
ASSERT (i >= 0 && i < n && ilast == Last [i] &&
|
||||
deg == Degree [i]) ;
|
||||
cnt += Nv [i] ;
|
||||
ilast = i ;
|
||||
}
|
||||
AMD_DEBUG3 (("\n")) ;
|
||||
}
|
||||
ASSERT (cnt == n - nel) ;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,84 @@
|
|||
/* ========================================================================= */
|
||||
/* === amd_global ========================================================== */
|
||||
/* ========================================================================= */
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
/* AMD, Copyright (c) Timothy A. Davis, */
|
||||
/* Patrick R. Amestoy, and Iain S. Duff. See ../README.txt for License. */
|
||||
/* email: davis at cise.ufl.edu CISE Department, Univ. of Florida. */
|
||||
/* web: http://www.cise.ufl.edu/research/sparse/amd */
|
||||
/* ------------------------------------------------------------------------- */
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#ifdef MATLAB_MEX_FILE
|
||||
#include "mex.h"
|
||||
#include "matrix.h"
|
||||
#endif
|
||||
|
||||
#ifndef NULL
|
||||
#define NULL 0
|
||||
#endif
|
||||
|
||||
/* ========================================================================= */
|
||||
/* === Default AMD memory manager ========================================== */
|
||||
/* ========================================================================= */
|
||||
|
||||
/* The user can redefine these global pointers at run-time to change the memory
|
||||
* manager used by AMD. AMD only uses malloc and free; realloc and calloc are
|
||||
* include for completeness, in case another package wants to use the same
|
||||
* memory manager as AMD.
|
||||
*
|
||||
* If compiling as a MATLAB mexFunction, the default memory manager is mxMalloc.
|
||||
* You can also compile AMD as a standard ANSI-C library and link a mexFunction
|
||||
* against it, and then redefine these pointers at run-time, in your
|
||||
* mexFunction.
|
||||
*
|
||||
* If -DNMALLOC is defined at compile-time, no memory manager is specified at
|
||||
* compile-time. You must then define these functions at run-time, before
|
||||
* calling AMD, for AMD to work properly.
|
||||
*/
|
||||
|
||||
#ifndef NMALLOC
|
||||
#ifdef MATLAB_MEX_FILE
|
||||
/* MATLAB mexFunction: */
|
||||
void *(*amd_malloc) (size_t) = mxMalloc ;
|
||||
void (*amd_free) (void *) = mxFree ;
|
||||
void *(*amd_realloc) (void *, size_t) = mxRealloc ;
|
||||
void *(*amd_calloc) (size_t, size_t) = mxCalloc ;
|
||||
#else
|
||||
/* standard ANSI-C: */
|
||||
void *(*amd_malloc) (size_t) = malloc ;
|
||||
void (*amd_free) (void *) = free ;
|
||||
void *(*amd_realloc) (void *, size_t) = realloc ;
|
||||
void *(*amd_calloc) (size_t, size_t) = calloc ;
|
||||
#endif
|
||||
#else
|
||||
/* no memory manager defined at compile-time; you MUST define one at run-time */
|
||||
void *(*amd_malloc) (size_t) = NULL ;
|
||||
void (*amd_free) (void *) = NULL ;
|
||||
void *(*amd_realloc) (void *, size_t) = NULL ;
|
||||
void *(*amd_calloc) (size_t, size_t) = NULL ;
|
||||
#endif
|
||||
|
||||
/* ========================================================================= */
|
||||
/* === Default AMD printf routine ========================================== */
|
||||
/* ========================================================================= */
|
||||
|
||||
/* The user can redefine this global pointer at run-time to change the printf
|
||||
* routine used by AMD. If NULL, no printing occurs.
|
||||
*
|
||||
* If -DNPRINT is defined at compile-time, stdio.h is not included. Printing
|
||||
* can then be enabled at run-time by setting amd_printf to a non-NULL function.
|
||||
*/
|
||||
|
||||
#ifndef NPRINT
|
||||
#ifdef MATLAB_MEX_FILE
|
||||
int (*amd_printf) (const char *, ...) = mexPrintf ;
|
||||
#else
|
||||
#include <stdio.h>
|
||||
int (*amd_printf) (const char *, ...) = printf ;
|
||||
#endif
|
||||
#else
|
||||
int (*amd_printf) (const char *, ...) = NULL ;
|
||||
#endif
|
||||
|
|
@ -0,0 +1,120 @@
|
|||
/* ========================================================================= */
|
||||
/* === AMD_info ============================================================ */
|
||||
/* ========================================================================= */
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
/* AMD, Copyright (c) Timothy A. Davis, */
|
||||
/* Patrick R. Amestoy, and Iain S. Duff. See ../README.txt for License. */
|
||||
/* email: davis at cise.ufl.edu CISE Department, Univ. of Florida. */
|
||||
/* web: http://www.cise.ufl.edu/research/sparse/amd */
|
||||
/* ------------------------------------------------------------------------- */
|
||||
|
||||
/* User-callable. Prints the output statistics for AMD. See amd.h
|
||||
* for details. If the Info array is not present, nothing is printed.
|
||||
*/
|
||||
|
||||
#include "amd_internal.h"
|
||||
|
||||
#define PRI(format,x) { if (x >= 0) { PRINTF ((format, x)) ; }}
|
||||
|
||||
GLOBAL void AMD_info
|
||||
(
|
||||
double Info [ ]
|
||||
)
|
||||
{
|
||||
double n, ndiv, nmultsubs_ldl, nmultsubs_lu, lnz, lnzd ;
|
||||
|
||||
PRINTF (("\nAMD version %d.%d.%d, %s, results:\n",
|
||||
AMD_MAIN_VERSION, AMD_SUB_VERSION, AMD_SUBSUB_VERSION, AMD_DATE)) ;
|
||||
|
||||
if (!Info)
|
||||
{
|
||||
return ;
|
||||
}
|
||||
|
||||
n = Info [AMD_N] ;
|
||||
ndiv = Info [AMD_NDIV] ;
|
||||
nmultsubs_ldl = Info [AMD_NMULTSUBS_LDL] ;
|
||||
nmultsubs_lu = Info [AMD_NMULTSUBS_LU] ;
|
||||
lnz = Info [AMD_LNZ] ;
|
||||
lnzd = (n >= 0 && lnz >= 0) ? (n + lnz) : (-1) ;
|
||||
|
||||
/* AMD return status */
|
||||
PRINTF ((" status: ")) ;
|
||||
if (Info [AMD_STATUS] == AMD_OK)
|
||||
{
|
||||
PRINTF (("OK\n")) ;
|
||||
}
|
||||
else if (Info [AMD_STATUS] == AMD_OUT_OF_MEMORY)
|
||||
{
|
||||
PRINTF (("out of memory\n")) ;
|
||||
}
|
||||
else if (Info [AMD_STATUS] == AMD_INVALID)
|
||||
{
|
||||
PRINTF (("invalid matrix\n")) ;
|
||||
}
|
||||
else if (Info [AMD_STATUS] == AMD_OK_BUT_JUMBLED)
|
||||
{
|
||||
PRINTF (("OK, but jumbled\n")) ;
|
||||
}
|
||||
else
|
||||
{
|
||||
PRINTF (("unknown\n")) ;
|
||||
}
|
||||
|
||||
/* statistics about the input matrix */
|
||||
PRI (" n, dimension of A: %.20g\n", n);
|
||||
PRI (" nz, number of nonzeros in A: %.20g\n",
|
||||
Info [AMD_NZ]) ;
|
||||
PRI (" symmetry of A: %.4f\n",
|
||||
Info [AMD_SYMMETRY]) ;
|
||||
PRI (" number of nonzeros on diagonal: %.20g\n",
|
||||
Info [AMD_NZDIAG]) ;
|
||||
PRI (" nonzeros in pattern of A+A' (excl. diagonal): %.20g\n",
|
||||
Info [AMD_NZ_A_PLUS_AT]) ;
|
||||
PRI (" # dense rows/columns of A+A': %.20g\n",
|
||||
Info [AMD_NDENSE]) ;
|
||||
|
||||
/* statistics about AMD's behavior */
|
||||
PRI (" memory used, in bytes: %.20g\n",
|
||||
Info [AMD_MEMORY]) ;
|
||||
PRI (" # of memory compactions: %.20g\n",
|
||||
Info [AMD_NCMPA]) ;
|
||||
|
||||
/* statistics about the ordering quality */
|
||||
PRINTF (("\n"
|
||||
" The following approximate statistics are for a subsequent\n"
|
||||
" factorization of A(P,P) + A(P,P)'. They are slight upper\n"
|
||||
" bounds if there are no dense rows/columns in A+A', and become\n"
|
||||
" looser if dense rows/columns exist.\n\n")) ;
|
||||
|
||||
PRI (" nonzeros in L (excluding diagonal): %.20g\n",
|
||||
lnz) ;
|
||||
PRI (" nonzeros in L (including diagonal): %.20g\n",
|
||||
lnzd) ;
|
||||
PRI (" # divide operations for LDL' or LU: %.20g\n",
|
||||
ndiv) ;
|
||||
PRI (" # multiply-subtract operations for LDL': %.20g\n",
|
||||
nmultsubs_ldl) ;
|
||||
PRI (" # multiply-subtract operations for LU: %.20g\n",
|
||||
nmultsubs_lu) ;
|
||||
PRI (" max nz. in any column of L (incl. diagonal): %.20g\n",
|
||||
Info [AMD_DMAX]) ;
|
||||
|
||||
/* total flop counts for various factorizations */
|
||||
|
||||
if (n >= 0 && ndiv >= 0 && nmultsubs_ldl >= 0 && nmultsubs_lu >= 0)
|
||||
{
|
||||
PRINTF (("\n"
|
||||
" chol flop count for real A, sqrt counted as 1 flop: %.20g\n"
|
||||
" LDL' flop count for real A: %.20g\n"
|
||||
" LDL' flop count for complex A: %.20g\n"
|
||||
" LU flop count for real A (with no pivoting): %.20g\n"
|
||||
" LU flop count for complex A (with no pivoting): %.20g\n\n",
|
||||
n + ndiv + 2*nmultsubs_ldl,
|
||||
ndiv + 2*nmultsubs_ldl,
|
||||
9*ndiv + 8*nmultsubs_ldl,
|
||||
ndiv + 2*nmultsubs_lu,
|
||||
9*ndiv + 8*nmultsubs_lu)) ;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,350 @@
|
|||
/* ========================================================================= */
|
||||
/* === amd_internal.h ====================================================== */
|
||||
/* ========================================================================= */
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
/* AMD, Copyright (c) Timothy A. Davis, */
|
||||
/* Patrick R. Amestoy, and Iain S. Duff. See ../README.txt for License. */
|
||||
/* email: davis at cise.ufl.edu CISE Department, Univ. of Florida. */
|
||||
/* web: http://www.cise.ufl.edu/research/sparse/amd */
|
||||
/* ------------------------------------------------------------------------- */
|
||||
|
||||
/* This file is for internal use in AMD itself, and does not normally need to
|
||||
* be included in user code (it is included in UMFPACK, however). All others
|
||||
* should use amd.h instead.
|
||||
*
|
||||
* The following compile-time definitions affect how AMD is compiled.
|
||||
*
|
||||
* -DNPRINT
|
||||
*
|
||||
* Disable all printing. stdio.h will not be included. Printing can
|
||||
* be re-enabled at run-time by setting the global pointer amd_printf
|
||||
* to printf (or mexPrintf for a MATLAB mexFunction).
|
||||
*
|
||||
* -DNMALLOC
|
||||
*
|
||||
* No memory manager is defined at compile-time. You MUST define the
|
||||
* function pointers amd_malloc, amd_free, amd_realloc, and
|
||||
* amd_calloc at run-time for AMD to work properly.
|
||||
*/
|
||||
|
||||
/* ========================================================================= */
|
||||
/* === NDEBUG ============================================================== */
|
||||
/* ========================================================================= */
|
||||
|
||||
/*
|
||||
* Turning on debugging takes some work (see below). If you do not edit this
|
||||
* file, then debugging is always turned off, regardless of whether or not
|
||||
* -DNDEBUG is specified in your compiler options.
|
||||
*
|
||||
* If AMD is being compiled as a mexFunction, then MATLAB_MEX_FILE is defined,
|
||||
* and mxAssert is used instead of assert. If debugging is not enabled, no
|
||||
* MATLAB include files or functions are used. Thus, the AMD library libamd.a
|
||||
* can be safely used in either a stand-alone C program or in another
|
||||
* mexFunction, without any change.
|
||||
*/
|
||||
|
||||
/*
|
||||
AMD will be exceedingly slow when running in debug mode. The next three
|
||||
lines ensure that debugging is turned off.
|
||||
*/
|
||||
#ifndef NDEBUG
|
||||
#define NDEBUG
|
||||
#endif
|
||||
|
||||
/*
|
||||
To enable debugging, uncomment the following line:
|
||||
#undef NDEBUG
|
||||
*/
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
/* ANSI include files */
|
||||
/* ------------------------------------------------------------------------- */
|
||||
|
||||
/* from stdlib.h: size_t, malloc, free, realloc, and calloc */
|
||||
#include <stdlib.h>
|
||||
|
||||
#if !defined(NPRINT) || !defined(NDEBUG)
|
||||
/* from stdio.h: printf. Not included if NPRINT is defined at compile time.
|
||||
* fopen and fscanf are used when debugging. */
|
||||
#include <stdio.h>
|
||||
#endif
|
||||
|
||||
/* from limits.h: INT_MAX and LONG_MAX */
|
||||
#include <limits.h>
|
||||
|
||||
/* from math.h: sqrt */
|
||||
#include <math.h>
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
/* MATLAB include files (only if being used in or via MATLAB) */
|
||||
/* ------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef MATLAB_MEX_FILE
|
||||
#include "matrix.h"
|
||||
#include "mex.h"
|
||||
#endif
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
/* basic definitions */
|
||||
/* ------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef FLIP
|
||||
#undef FLIP
|
||||
#endif
|
||||
|
||||
#ifdef MAX
|
||||
#undef MAX
|
||||
#endif
|
||||
|
||||
#ifdef MIN
|
||||
#undef MIN
|
||||
#endif
|
||||
|
||||
#ifdef EMPTY
|
||||
#undef EMPTY
|
||||
#endif
|
||||
|
||||
#ifdef GLOBAL
|
||||
#undef GLOBAL
|
||||
#endif
|
||||
|
||||
#ifdef PRIVATE
|
||||
#undef PRIVATE
|
||||
#endif
|
||||
|
||||
/* FLIP is a "negation about -1", and is used to mark an integer i that is
|
||||
* normally non-negative. FLIP (EMPTY) is EMPTY. FLIP of a number > EMPTY
|
||||
* is negative, and FLIP of a number < EMTPY is positive. FLIP (FLIP (i)) = i
|
||||
* for all integers i. UNFLIP (i) is >= EMPTY. */
|
||||
#define EMPTY (-1)
|
||||
#define FLIP(i) (-(i)-2)
|
||||
#define UNFLIP(i) ((i < EMPTY) ? FLIP (i) : (i))
|
||||
|
||||
/* for integer MAX/MIN, or for doubles when we don't care how NaN's behave: */
|
||||
#define MAX(a,b) (((a) > (b)) ? (a) : (b))
|
||||
#define MIN(a,b) (((a) < (b)) ? (a) : (b))
|
||||
|
||||
/* logical expression of p implies q: */
|
||||
#define IMPLIES(p,q) (!(p) || (q))
|
||||
|
||||
/* Note that the IBM RS 6000 xlc predefines TRUE and FALSE in <types.h>. */
|
||||
/* The Compaq Alpha also predefines TRUE and FALSE. */
|
||||
#ifdef TRUE
|
||||
#undef TRUE
|
||||
#endif
|
||||
#ifdef FALSE
|
||||
#undef FALSE
|
||||
#endif
|
||||
|
||||
#define TRUE (1)
|
||||
#define FALSE (0)
|
||||
#define PRIVATE static
|
||||
#define GLOBAL
|
||||
#define EMPTY (-1)
|
||||
|
||||
/* Note that Linux's gcc 2.96 defines NULL as ((void *) 0), but other */
|
||||
/* compilers (even gcc 2.95.2 on Solaris) define NULL as 0 or (0). We */
|
||||
/* need to use the ANSI standard value of 0. */
|
||||
#ifdef NULL
|
||||
#undef NULL
|
||||
#endif
|
||||
|
||||
#define NULL 0
|
||||
|
||||
/* largest value of size_t */
|
||||
#ifndef SIZE_T_MAX
|
||||
#define SIZE_T_MAX ((size_t) (-1))
|
||||
#endif
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
/* integer type for AMD: int or UF_long */
|
||||
/* ------------------------------------------------------------------------- */
|
||||
|
||||
/* define UF_long */
|
||||
#include "UFconfig.h"
|
||||
|
||||
#if defined (DLONG) || defined (ZLONG)
|
||||
|
||||
#define Int UF_long
|
||||
#define ID UF_long_id
|
||||
#define Int_MAX UF_long_max
|
||||
|
||||
#define AMD_order amd_l_order
|
||||
#define AMD_defaults amd_l_defaults
|
||||
#define AMD_control amd_l_control
|
||||
#define AMD_info amd_l_info
|
||||
#define AMD_1 amd_l1
|
||||
#define AMD_2 amd_l2
|
||||
#define AMD_valid amd_l_valid
|
||||
#define AMD_aat amd_l_aat
|
||||
#define AMD_postorder amd_l_postorder
|
||||
#define AMD_post_tree amd_l_post_tree
|
||||
#define AMD_dump amd_l_dump
|
||||
#define AMD_debug amd_l_debug
|
||||
#define AMD_debug_init amd_l_debug_init
|
||||
#define AMD_preprocess amd_l_preprocess
|
||||
|
||||
#else
|
||||
|
||||
#define Int int
|
||||
#define ID "%d"
|
||||
#define Int_MAX INT_MAX
|
||||
|
||||
#define AMD_order amd_order
|
||||
#define AMD_defaults amd_defaults
|
||||
#define AMD_control amd_control
|
||||
#define AMD_info amd_info
|
||||
#define AMD_1 amd_1
|
||||
#define AMD_2 amd_2
|
||||
#define AMD_valid amd_valid
|
||||
#define AMD_aat amd_aat
|
||||
#define AMD_postorder amd_postorder
|
||||
#define AMD_post_tree amd_post_tree
|
||||
#define AMD_dump amd_dump
|
||||
#define AMD_debug amd_debug
|
||||
#define AMD_debug_init amd_debug_init
|
||||
#define AMD_preprocess amd_preprocess
|
||||
|
||||
#endif
|
||||
|
||||
/* ========================================================================= */
|
||||
/* === PRINTF macro ======================================================== */
|
||||
/* ========================================================================= */
|
||||
|
||||
/* All output goes through the PRINTF macro. */
|
||||
#define PRINTF(params) { if (amd_printf != NULL) (void) amd_printf params ; }
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
/* AMD routine definitions (user-callable) */
|
||||
/* ------------------------------------------------------------------------- */
|
||||
|
||||
#include "amd.h"
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
/* AMD routine definitions (not user-callable) */
|
||||
/* ------------------------------------------------------------------------- */
|
||||
|
||||
GLOBAL size_t AMD_aat
|
||||
(
|
||||
Int n,
|
||||
const Int Ap [ ],
|
||||
const Int Ai [ ],
|
||||
Int Len [ ],
|
||||
Int Tp [ ],
|
||||
double Info [ ]
|
||||
) ;
|
||||
|
||||
GLOBAL void AMD_1
|
||||
(
|
||||
Int n,
|
||||
const Int Ap [ ],
|
||||
const Int Ai [ ],
|
||||
Int P [ ],
|
||||
Int Pinv [ ],
|
||||
Int Len [ ],
|
||||
Int slen,
|
||||
Int S [ ],
|
||||
double Control [ ],
|
||||
double Info [ ]
|
||||
) ;
|
||||
|
||||
GLOBAL void AMD_postorder
|
||||
(
|
||||
Int nn,
|
||||
Int Parent [ ],
|
||||
Int Npiv [ ],
|
||||
Int Fsize [ ],
|
||||
Int Order [ ],
|
||||
Int Child [ ],
|
||||
Int Sibling [ ],
|
||||
Int Stack [ ]
|
||||
) ;
|
||||
|
||||
GLOBAL Int AMD_post_tree
|
||||
(
|
||||
Int root,
|
||||
Int k,
|
||||
Int Child [ ],
|
||||
const Int Sibling [ ],
|
||||
Int Order [ ],
|
||||
Int Stack [ ]
|
||||
#ifndef NDEBUG
|
||||
, Int nn
|
||||
#endif
|
||||
) ;
|
||||
|
||||
GLOBAL void AMD_preprocess
|
||||
(
|
||||
Int n,
|
||||
const Int Ap [ ],
|
||||
const Int Ai [ ],
|
||||
Int Rp [ ],
|
||||
Int Ri [ ],
|
||||
Int W [ ],
|
||||
Int Flag [ ]
|
||||
) ;
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
/* debugging definitions */
|
||||
/* ------------------------------------------------------------------------- */
|
||||
|
||||
#ifndef NDEBUG
|
||||
|
||||
/* from assert.h: assert macro */
|
||||
#include <assert.h>
|
||||
|
||||
#ifndef EXTERN
|
||||
#define EXTERN extern
|
||||
#endif
|
||||
|
||||
EXTERN Int AMD_debug ;
|
||||
|
||||
GLOBAL void AMD_debug_init ( char *s ) ;
|
||||
|
||||
GLOBAL void AMD_dump
|
||||
(
|
||||
Int n,
|
||||
Int Pe [ ],
|
||||
Int Iw [ ],
|
||||
Int Len [ ],
|
||||
Int iwlen,
|
||||
Int pfree,
|
||||
Int Nv [ ],
|
||||
Int Next [ ],
|
||||
Int Last [ ],
|
||||
Int Head [ ],
|
||||
Int Elen [ ],
|
||||
Int Degree [ ],
|
||||
Int W [ ],
|
||||
Int nel
|
||||
) ;
|
||||
|
||||
#ifdef ASSERT
|
||||
#undef ASSERT
|
||||
#endif
|
||||
|
||||
/* Use mxAssert if AMD is compiled into a mexFunction */
|
||||
#ifdef MATLAB_MEX_FILE
|
||||
#define ASSERT(expression) (mxAssert ((expression), ""))
|
||||
#else
|
||||
#define ASSERT(expression) (assert (expression))
|
||||
#endif
|
||||
|
||||
#define AMD_DEBUG0(params) { PRINTF (params) ; }
|
||||
#define AMD_DEBUG1(params) { if (AMD_debug >= 1) PRINTF (params) ; }
|
||||
#define AMD_DEBUG2(params) { if (AMD_debug >= 2) PRINTF (params) ; }
|
||||
#define AMD_DEBUG3(params) { if (AMD_debug >= 3) PRINTF (params) ; }
|
||||
#define AMD_DEBUG4(params) { if (AMD_debug >= 4) PRINTF (params) ; }
|
||||
|
||||
#else
|
||||
|
||||
/* no debugging */
|
||||
#define ASSERT(expression)
|
||||
#define AMD_DEBUG0(params)
|
||||
#define AMD_DEBUG1(params)
|
||||
#define AMD_DEBUG2(params)
|
||||
#define AMD_DEBUG3(params)
|
||||
#define AMD_DEBUG4(params)
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,200 @@
|
|||
/* ========================================================================= */
|
||||
/* === AMD_order =========================================================== */
|
||||
/* ========================================================================= */
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
/* AMD, Copyright (c) Timothy A. Davis, */
|
||||
/* Patrick R. Amestoy, and Iain S. Duff. See ../README.txt for License. */
|
||||
/* email: davis at cise.ufl.edu CISE Department, Univ. of Florida. */
|
||||
/* web: http://www.cise.ufl.edu/research/sparse/amd */
|
||||
/* ------------------------------------------------------------------------- */
|
||||
|
||||
/* User-callable AMD minimum degree ordering routine. See amd.h for
|
||||
* documentation.
|
||||
*/
|
||||
|
||||
#include "amd_internal.h"
|
||||
|
||||
/* ========================================================================= */
|
||||
/* === AMD_order =========================================================== */
|
||||
/* ========================================================================= */
|
||||
|
||||
GLOBAL Int AMD_order
|
||||
(
|
||||
Int n,
|
||||
const Int Ap [ ],
|
||||
const Int Ai [ ],
|
||||
Int P [ ],
|
||||
double Control [ ],
|
||||
double Info [ ]
|
||||
)
|
||||
{
|
||||
Int *Len, *S, nz, i, *Pinv, info, status, *Rp, *Ri, *Cp, *Ci, ok ;
|
||||
size_t nzaat, slen ;
|
||||
double mem = 0 ;
|
||||
|
||||
#ifndef NDEBUG
|
||||
AMD_debug_init ("amd") ;
|
||||
#endif
|
||||
|
||||
/* clear the Info array, if it exists */
|
||||
info = Info != (double *) NULL ;
|
||||
if (info)
|
||||
{
|
||||
for (i = 0 ; i < AMD_INFO ; i++)
|
||||
{
|
||||
Info [i] = EMPTY ;
|
||||
}
|
||||
Info [AMD_N] = n ;
|
||||
Info [AMD_STATUS] = AMD_OK ;
|
||||
}
|
||||
|
||||
/* make sure inputs exist and n is >= 0 */
|
||||
if (Ai == (Int *) NULL || Ap == (Int *) NULL || P == (Int *) NULL || n < 0)
|
||||
{
|
||||
if (info) Info [AMD_STATUS] = AMD_INVALID ;
|
||||
return (AMD_INVALID) ; /* arguments are invalid */
|
||||
}
|
||||
|
||||
if (n == 0)
|
||||
{
|
||||
return (AMD_OK) ; /* n is 0 so there's nothing to do */
|
||||
}
|
||||
|
||||
nz = Ap [n] ;
|
||||
if (info)
|
||||
{
|
||||
Info [AMD_NZ] = nz ;
|
||||
}
|
||||
if (nz < 0)
|
||||
{
|
||||
if (info) Info [AMD_STATUS] = AMD_INVALID ;
|
||||
return (AMD_INVALID) ;
|
||||
}
|
||||
|
||||
/* check if n or nz will cause size_t overflow */
|
||||
if (((size_t) n) >= SIZE_T_MAX / sizeof (Int)
|
||||
|| ((size_t) nz) >= SIZE_T_MAX / sizeof (Int))
|
||||
{
|
||||
if (info) Info [AMD_STATUS] = AMD_OUT_OF_MEMORY ;
|
||||
return (AMD_OUT_OF_MEMORY) ; /* problem too large */
|
||||
}
|
||||
|
||||
/* check the input matrix: AMD_OK, AMD_INVALID, or AMD_OK_BUT_JUMBLED */
|
||||
status = AMD_valid (n, n, Ap, Ai) ;
|
||||
|
||||
if (status == AMD_INVALID)
|
||||
{
|
||||
if (info) Info [AMD_STATUS] = AMD_INVALID ;
|
||||
return (AMD_INVALID) ; /* matrix is invalid */
|
||||
}
|
||||
|
||||
/* allocate two size-n integer workspaces */
|
||||
Len = amd_malloc (n * sizeof (Int)) ;
|
||||
Pinv = amd_malloc (n * sizeof (Int)) ;
|
||||
mem += n ;
|
||||
mem += n ;
|
||||
if (!Len || !Pinv)
|
||||
{
|
||||
/* :: out of memory :: */
|
||||
amd_free (Len) ;
|
||||
amd_free (Pinv) ;
|
||||
if (info) Info [AMD_STATUS] = AMD_OUT_OF_MEMORY ;
|
||||
return (AMD_OUT_OF_MEMORY) ;
|
||||
}
|
||||
|
||||
if (status == AMD_OK_BUT_JUMBLED)
|
||||
{
|
||||
/* sort the input matrix and remove duplicate entries */
|
||||
AMD_DEBUG1 (("Matrix is jumbled\n")) ;
|
||||
Rp = amd_malloc ((n+1) * sizeof (Int)) ;
|
||||
Ri = amd_malloc (MAX (nz,1) * sizeof (Int)) ;
|
||||
mem += (n+1) ;
|
||||
mem += MAX (nz,1) ;
|
||||
if (!Rp || !Ri)
|
||||
{
|
||||
/* :: out of memory :: */
|
||||
amd_free (Rp) ;
|
||||
amd_free (Ri) ;
|
||||
amd_free (Len) ;
|
||||
amd_free (Pinv) ;
|
||||
if (info) Info [AMD_STATUS] = AMD_OUT_OF_MEMORY ;
|
||||
return (AMD_OUT_OF_MEMORY) ;
|
||||
}
|
||||
/* use Len and Pinv as workspace to create R = A' */
|
||||
AMD_preprocess (n, Ap, Ai, Rp, Ri, Len, Pinv) ;
|
||||
Cp = Rp ;
|
||||
Ci = Ri ;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* order the input matrix as-is. No need to compute R = A' first */
|
||||
Rp = NULL ;
|
||||
Ri = NULL ;
|
||||
Cp = (Int *) Ap ;
|
||||
Ci = (Int *) Ai ;
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
/* determine the symmetry and count off-diagonal nonzeros in A+A' */
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
nzaat = AMD_aat (n, Cp, Ci, Len, P, Info) ;
|
||||
AMD_DEBUG1 (("nzaat: %g\n", (double) nzaat)) ;
|
||||
ASSERT ((MAX (nz-n, 0) <= nzaat) && (nzaat <= 2 * (size_t) nz)) ;
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
/* allocate workspace for matrix, elbow room, and 6 size-n vectors */
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
S = NULL ;
|
||||
slen = nzaat ; /* space for matrix */
|
||||
ok = ((slen + nzaat/5) >= slen) ; /* check for size_t overflow */
|
||||
slen += nzaat/5 ; /* add elbow room */
|
||||
for (i = 0 ; ok && i < 7 ; i++)
|
||||
{
|
||||
ok = ((slen + n) > slen) ; /* check for size_t overflow */
|
||||
slen += n ; /* size-n elbow room, 6 size-n work */
|
||||
}
|
||||
mem += slen ;
|
||||
ok = ok && (slen < SIZE_T_MAX / sizeof (Int)) ; /* check for overflow */
|
||||
ok = ok && (slen < Int_MAX) ; /* S[i] for Int i must be OK */
|
||||
if (ok)
|
||||
{
|
||||
S = amd_malloc (slen * sizeof (Int)) ;
|
||||
}
|
||||
AMD_DEBUG1 (("slen %g\n", (double) slen)) ;
|
||||
if (!S)
|
||||
{
|
||||
/* :: out of memory :: (or problem too large) */
|
||||
amd_free (Rp) ;
|
||||
amd_free (Ri) ;
|
||||
amd_free (Len) ;
|
||||
amd_free (Pinv) ;
|
||||
if (info) Info [AMD_STATUS] = AMD_OUT_OF_MEMORY ;
|
||||
return (AMD_OUT_OF_MEMORY) ;
|
||||
}
|
||||
if (info)
|
||||
{
|
||||
/* memory usage, in bytes. */
|
||||
Info [AMD_MEMORY] = mem * sizeof (Int) ;
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
/* order the matrix */
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
AMD_1 (n, Cp, Ci, P, Pinv, Len, slen, S, Control, Info) ;
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
/* free the workspace */
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
amd_free (Rp) ;
|
||||
amd_free (Ri) ;
|
||||
amd_free (Len) ;
|
||||
amd_free (Pinv) ;
|
||||
amd_free (S) ;
|
||||
if (info) Info [AMD_STATUS] = status ;
|
||||
return (status) ; /* successful ordering */
|
||||
}
|
||||
|
|
@ -0,0 +1,121 @@
|
|||
/* ========================================================================= */
|
||||
/* === AMD_post_tree ======================================================= */
|
||||
/* ========================================================================= */
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
/* AMD, Copyright (c) Timothy A. Davis, */
|
||||
/* Patrick R. Amestoy, and Iain S. Duff. See ../README.txt for License. */
|
||||
/* email: davis at cise.ufl.edu CISE Department, Univ. of Florida. */
|
||||
/* web: http://www.cise.ufl.edu/research/sparse/amd */
|
||||
/* ------------------------------------------------------------------------- */
|
||||
|
||||
/* Post-ordering of a supernodal elimination tree. */
|
||||
|
||||
#include "amd_internal.h"
|
||||
|
||||
GLOBAL Int AMD_post_tree
|
||||
(
|
||||
Int root, /* root of the tree */
|
||||
Int k, /* start numbering at k */
|
||||
Int Child [ ], /* input argument of size nn, undefined on
|
||||
* output. Child [i] is the head of a link
|
||||
* list of all nodes that are children of node
|
||||
* i in the tree. */
|
||||
const Int Sibling [ ], /* input argument of size nn, not modified.
|
||||
* If f is a node in the link list of the
|
||||
* children of node i, then Sibling [f] is the
|
||||
* next child of node i.
|
||||
*/
|
||||
Int Order [ ], /* output order, of size nn. Order [i] = k
|
||||
* if node i is the kth node of the reordered
|
||||
* tree. */
|
||||
Int Stack [ ] /* workspace of size nn */
|
||||
#ifndef NDEBUG
|
||||
, Int nn /* nodes are in the range 0..nn-1. */
|
||||
#endif
|
||||
)
|
||||
{
|
||||
Int f, head, h, i ;
|
||||
|
||||
#if 0
|
||||
/* --------------------------------------------------------------------- */
|
||||
/* recursive version (Stack [ ] is not used): */
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
/* this is simple, but can caouse stack overflow if nn is large */
|
||||
i = root ;
|
||||
for (f = Child [i] ; f != EMPTY ; f = Sibling [f])
|
||||
{
|
||||
k = AMD_post_tree (f, k, Child, Sibling, Order, Stack, nn) ;
|
||||
}
|
||||
Order [i] = k++ ;
|
||||
return (k) ;
|
||||
#endif
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
/* non-recursive version, using an explicit stack */
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
/* push root on the stack */
|
||||
head = 0 ;
|
||||
Stack [0] = root ;
|
||||
|
||||
while (head >= 0)
|
||||
{
|
||||
/* get head of stack */
|
||||
ASSERT (head < nn) ;
|
||||
i = Stack [head] ;
|
||||
AMD_DEBUG1 (("head of stack "ID" \n", i)) ;
|
||||
ASSERT (i >= 0 && i < nn) ;
|
||||
|
||||
if (Child [i] != EMPTY)
|
||||
{
|
||||
/* the children of i are not yet ordered */
|
||||
/* push each child onto the stack in reverse order */
|
||||
/* so that small ones at the head of the list get popped first */
|
||||
/* and the biggest one at the end of the list gets popped last */
|
||||
for (f = Child [i] ; f != EMPTY ; f = Sibling [f])
|
||||
{
|
||||
head++ ;
|
||||
ASSERT (head < nn) ;
|
||||
ASSERT (f >= 0 && f < nn) ;
|
||||
}
|
||||
h = head ;
|
||||
ASSERT (head < nn) ;
|
||||
for (f = Child [i] ; f != EMPTY ; f = Sibling [f])
|
||||
{
|
||||
ASSERT (h > 0) ;
|
||||
Stack [h--] = f ;
|
||||
AMD_DEBUG1 (("push "ID" on stack\n", f)) ;
|
||||
ASSERT (f >= 0 && f < nn) ;
|
||||
}
|
||||
ASSERT (Stack [h] == i) ;
|
||||
|
||||
/* delete child list so that i gets ordered next time we see it */
|
||||
Child [i] = EMPTY ;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* the children of i (if there were any) are already ordered */
|
||||
/* remove i from the stack and order it. Front i is kth front */
|
||||
head-- ;
|
||||
AMD_DEBUG1 (("pop "ID" order "ID"\n", i, k)) ;
|
||||
Order [i] = k++ ;
|
||||
ASSERT (k <= nn) ;
|
||||
}
|
||||
|
||||
#ifndef NDEBUG
|
||||
AMD_DEBUG1 (("\nStack:")) ;
|
||||
for (h = head ; h >= 0 ; h--)
|
||||
{
|
||||
Int j = Stack [h] ;
|
||||
AMD_DEBUG1 ((" "ID, j)) ;
|
||||
ASSERT (j >= 0 && j < nn) ;
|
||||
}
|
||||
AMD_DEBUG1 (("\n\n")) ;
|
||||
ASSERT (head < nn) ;
|
||||
#endif
|
||||
|
||||
}
|
||||
return (k) ;
|
||||
}
|
||||
|
|
@ -0,0 +1,207 @@
|
|||
/* ========================================================================= */
|
||||
/* === AMD_postorder ======================================================= */
|
||||
/* ========================================================================= */
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
/* AMD, Copyright (c) Timothy A. Davis, */
|
||||
/* Patrick R. Amestoy, and Iain S. Duff. See ../README.txt for License. */
|
||||
/* email: davis at cise.ufl.edu CISE Department, Univ. of Florida. */
|
||||
/* web: http://www.cise.ufl.edu/research/sparse/amd */
|
||||
/* ------------------------------------------------------------------------- */
|
||||
|
||||
/* Perform a postordering (via depth-first search) of an assembly tree. */
|
||||
|
||||
#include "amd_internal.h"
|
||||
|
||||
GLOBAL void AMD_postorder
|
||||
(
|
||||
/* inputs, not modified on output: */
|
||||
Int nn, /* nodes are in the range 0..nn-1 */
|
||||
Int Parent [ ], /* Parent [j] is the parent of j, or EMPTY if root */
|
||||
Int Nv [ ], /* Nv [j] > 0 number of pivots represented by node j,
|
||||
* or zero if j is not a node. */
|
||||
Int Fsize [ ], /* Fsize [j]: size of node j */
|
||||
|
||||
/* output, not defined on input: */
|
||||
Int Order [ ], /* output post-order */
|
||||
|
||||
/* workspaces of size nn: */
|
||||
Int Child [ ],
|
||||
Int Sibling [ ],
|
||||
Int Stack [ ]
|
||||
)
|
||||
{
|
||||
Int i, j, k, parent, frsize, f, fprev, maxfrsize, bigfprev, bigf, fnext ;
|
||||
|
||||
for (j = 0 ; j < nn ; j++)
|
||||
{
|
||||
Child [j] = EMPTY ;
|
||||
Sibling [j] = EMPTY ;
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
/* place the children in link lists - bigger elements tend to be last */
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
for (j = nn-1 ; j >= 0 ; j--)
|
||||
{
|
||||
if (Nv [j] > 0)
|
||||
{
|
||||
/* this is an element */
|
||||
parent = Parent [j] ;
|
||||
if (parent != EMPTY)
|
||||
{
|
||||
/* place the element in link list of the children its parent */
|
||||
/* bigger elements will tend to be at the end of the list */
|
||||
Sibling [j] = Child [parent] ;
|
||||
Child [parent] = j ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef NDEBUG
|
||||
{
|
||||
Int nels, ff, nchild ;
|
||||
AMD_DEBUG1 (("\n\n================================ AMD_postorder:\n"));
|
||||
nels = 0 ;
|
||||
for (j = 0 ; j < nn ; j++)
|
||||
{
|
||||
if (Nv [j] > 0)
|
||||
{
|
||||
AMD_DEBUG1 (( ""ID" : nels "ID" npiv "ID" size "ID
|
||||
" parent "ID" maxfr "ID"\n", j, nels,
|
||||
Nv [j], Fsize [j], Parent [j], Fsize [j])) ;
|
||||
/* this is an element */
|
||||
/* dump the link list of children */
|
||||
nchild = 0 ;
|
||||
AMD_DEBUG1 ((" Children: ")) ;
|
||||
for (ff = Child [j] ; ff != EMPTY ; ff = Sibling [ff])
|
||||
{
|
||||
AMD_DEBUG1 ((ID" ", ff)) ;
|
||||
ASSERT (Parent [ff] == j) ;
|
||||
nchild++ ;
|
||||
ASSERT (nchild < nn) ;
|
||||
}
|
||||
AMD_DEBUG1 (("\n")) ;
|
||||
parent = Parent [j] ;
|
||||
if (parent != EMPTY)
|
||||
{
|
||||
ASSERT (Nv [parent] > 0) ;
|
||||
}
|
||||
nels++ ;
|
||||
}
|
||||
}
|
||||
}
|
||||
AMD_DEBUG1 (("\n\nGo through the children of each node, and put\n"
|
||||
"the biggest child last in each list:\n")) ;
|
||||
#endif
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
/* place the largest child last in the list of children for each node */
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
for (i = 0 ; i < nn ; i++)
|
||||
{
|
||||
if (Nv [i] > 0 && Child [i] != EMPTY)
|
||||
{
|
||||
|
||||
#ifndef NDEBUG
|
||||
Int nchild ;
|
||||
AMD_DEBUG1 (("Before partial sort, element "ID"\n", i)) ;
|
||||
nchild = 0 ;
|
||||
for (f = Child [i] ; f != EMPTY ; f = Sibling [f])
|
||||
{
|
||||
ASSERT (f >= 0 && f < nn) ;
|
||||
AMD_DEBUG1 ((" f: "ID" size: "ID"\n", f, Fsize [f])) ;
|
||||
nchild++ ;
|
||||
ASSERT (nchild <= nn) ;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* find the biggest element in the child list */
|
||||
fprev = EMPTY ;
|
||||
maxfrsize = EMPTY ;
|
||||
bigfprev = EMPTY ;
|
||||
bigf = EMPTY ;
|
||||
for (f = Child [i] ; f != EMPTY ; f = Sibling [f])
|
||||
{
|
||||
ASSERT (f >= 0 && f < nn) ;
|
||||
frsize = Fsize [f] ;
|
||||
if (frsize >= maxfrsize)
|
||||
{
|
||||
/* this is the biggest seen so far */
|
||||
maxfrsize = frsize ;
|
||||
bigfprev = fprev ;
|
||||
bigf = f ;
|
||||
}
|
||||
fprev = f ;
|
||||
}
|
||||
ASSERT (bigf != EMPTY) ;
|
||||
|
||||
fnext = Sibling [bigf] ;
|
||||
|
||||
AMD_DEBUG1 (("bigf "ID" maxfrsize "ID" bigfprev "ID" fnext "ID
|
||||
" fprev " ID"\n", bigf, maxfrsize, bigfprev, fnext, fprev)) ;
|
||||
|
||||
if (fnext != EMPTY)
|
||||
{
|
||||
/* if fnext is EMPTY then bigf is already at the end of list */
|
||||
|
||||
if (bigfprev == EMPTY)
|
||||
{
|
||||
/* delete bigf from the element of the list */
|
||||
Child [i] = fnext ;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* delete bigf from the middle of the list */
|
||||
Sibling [bigfprev] = fnext ;
|
||||
}
|
||||
|
||||
/* put bigf at the end of the list */
|
||||
Sibling [bigf] = EMPTY ;
|
||||
ASSERT (Child [i] != EMPTY) ;
|
||||
ASSERT (fprev != bigf) ;
|
||||
ASSERT (fprev != EMPTY) ;
|
||||
Sibling [fprev] = bigf ;
|
||||
}
|
||||
|
||||
#ifndef NDEBUG
|
||||
AMD_DEBUG1 (("After partial sort, element "ID"\n", i)) ;
|
||||
for (f = Child [i] ; f != EMPTY ; f = Sibling [f])
|
||||
{
|
||||
ASSERT (f >= 0 && f < nn) ;
|
||||
AMD_DEBUG1 ((" "ID" "ID"\n", f, Fsize [f])) ;
|
||||
ASSERT (Nv [f] > 0) ;
|
||||
nchild-- ;
|
||||
}
|
||||
ASSERT (nchild == 0) ;
|
||||
#endif
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
/* postorder the assembly tree */
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
for (i = 0 ; i < nn ; i++)
|
||||
{
|
||||
Order [i] = EMPTY ;
|
||||
}
|
||||
|
||||
k = 0 ;
|
||||
|
||||
for (i = 0 ; i < nn ; i++)
|
||||
{
|
||||
if (Parent [i] == EMPTY && Nv [i] > 0)
|
||||
{
|
||||
AMD_DEBUG1 (("Root of assembly tree "ID"\n", i)) ;
|
||||
k = AMD_post_tree (i, k, Child, Sibling, Order, Stack
|
||||
#ifndef NDEBUG
|
||||
, nn
|
||||
#endif
|
||||
) ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,119 @@
|
|||
/* ========================================================================= */
|
||||
/* === AMD_preprocess ====================================================== */
|
||||
/* ========================================================================= */
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
/* AMD, Copyright (c) Timothy A. Davis, */
|
||||
/* Patrick R. Amestoy, and Iain S. Duff. See ../README.txt for License. */
|
||||
/* email: davis at cise.ufl.edu CISE Department, Univ. of Florida. */
|
||||
/* web: http://www.cise.ufl.edu/research/sparse/amd */
|
||||
/* ------------------------------------------------------------------------- */
|
||||
|
||||
/* Sorts, removes duplicate entries, and transposes from the nonzero pattern of
|
||||
* a column-form matrix A, to obtain the matrix R. The input matrix can have
|
||||
* duplicate entries and/or unsorted columns (AMD_valid (n,Ap,Ai) must not be
|
||||
* AMD_INVALID).
|
||||
*
|
||||
* This input condition is NOT checked. This routine is not user-callable.
|
||||
*/
|
||||
|
||||
#include "amd_internal.h"
|
||||
|
||||
/* ========================================================================= */
|
||||
/* === AMD_preprocess ====================================================== */
|
||||
/* ========================================================================= */
|
||||
|
||||
/* AMD_preprocess does not check its input for errors or allocate workspace.
|
||||
* On input, the condition (AMD_valid (n,n,Ap,Ai) != AMD_INVALID) must hold.
|
||||
*/
|
||||
|
||||
GLOBAL void AMD_preprocess
|
||||
(
|
||||
Int n, /* input matrix: A is n-by-n */
|
||||
const Int Ap [ ], /* size n+1 */
|
||||
const Int Ai [ ], /* size nz = Ap [n] */
|
||||
|
||||
/* output matrix R: */
|
||||
Int Rp [ ], /* size n+1 */
|
||||
Int Ri [ ], /* size nz (or less, if duplicates present) */
|
||||
|
||||
Int W [ ], /* workspace of size n */
|
||||
Int Flag [ ] /* workspace of size n */
|
||||
)
|
||||
{
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
/* local variables */
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
Int i, j, p, p2 ;
|
||||
|
||||
ASSERT (AMD_valid (n, n, Ap, Ai) != AMD_INVALID) ;
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
/* count the entries in each row of A (excluding duplicates) */
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
for (i = 0 ; i < n ; i++)
|
||||
{
|
||||
W [i] = 0 ; /* # of nonzeros in row i (excl duplicates) */
|
||||
Flag [i] = EMPTY ; /* Flag [i] = j if i appears in column j */
|
||||
}
|
||||
for (j = 0 ; j < n ; j++)
|
||||
{
|
||||
p2 = Ap [j+1] ;
|
||||
for (p = Ap [j] ; p < p2 ; p++)
|
||||
{
|
||||
i = Ai [p] ;
|
||||
if (Flag [i] != j)
|
||||
{
|
||||
/* row index i has not yet appeared in column j */
|
||||
W [i]++ ; /* one more entry in row i */
|
||||
Flag [i] = j ; /* flag row index i as appearing in col j*/
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
/* compute the row pointers for R */
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
Rp [0] = 0 ;
|
||||
for (i = 0 ; i < n ; i++)
|
||||
{
|
||||
Rp [i+1] = Rp [i] + W [i] ;
|
||||
}
|
||||
for (i = 0 ; i < n ; i++)
|
||||
{
|
||||
W [i] = Rp [i] ;
|
||||
Flag [i] = EMPTY ;
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
/* construct the row form matrix R */
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
/* R = row form of pattern of A */
|
||||
for (j = 0 ; j < n ; j++)
|
||||
{
|
||||
p2 = Ap [j+1] ;
|
||||
for (p = Ap [j] ; p < p2 ; p++)
|
||||
{
|
||||
i = Ai [p] ;
|
||||
if (Flag [i] != j)
|
||||
{
|
||||
/* row index i has not yet appeared in column j */
|
||||
Ri [W [i]++] = j ; /* put col j in row i */
|
||||
Flag [i] = j ; /* flag row index i as appearing in col j*/
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef NDEBUG
|
||||
ASSERT (AMD_valid (n, n, Rp, Ri) == AMD_OK) ;
|
||||
for (j = 0 ; j < n ; j++)
|
||||
{
|
||||
ASSERT (W [j] == Rp [j+1]) ;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
|
@ -0,0 +1,93 @@
|
|||
/* ========================================================================= */
|
||||
/* === AMD_valid =========================================================== */
|
||||
/* ========================================================================= */
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
/* AMD, Copyright (c) Timothy A. Davis, */
|
||||
/* Patrick R. Amestoy, and Iain S. Duff. See ../README.txt for License. */
|
||||
/* email: davis at cise.ufl.edu CISE Department, Univ. of Florida. */
|
||||
/* web: http://www.cise.ufl.edu/research/sparse/amd */
|
||||
/* ------------------------------------------------------------------------- */
|
||||
|
||||
/* Check if a column-form matrix is valid or not. The matrix A is
|
||||
* n_row-by-n_col. The row indices of entries in column j are in
|
||||
* Ai [Ap [j] ... Ap [j+1]-1]. Required conditions are:
|
||||
*
|
||||
* n_row >= 0
|
||||
* n_col >= 0
|
||||
* nz = Ap [n_col] >= 0 number of entries in the matrix
|
||||
* Ap [0] == 0
|
||||
* Ap [j] <= Ap [j+1] for all j in the range 0 to n_col.
|
||||
* Ai [0 ... nz-1] must be in the range 0 to n_row-1.
|
||||
*
|
||||
* If any of the above conditions hold, AMD_INVALID is returned. If the
|
||||
* following condition holds, AMD_OK_BUT_JUMBLED is returned (a warning,
|
||||
* not an error):
|
||||
*
|
||||
* row indices in Ai [Ap [j] ... Ap [j+1]-1] are not sorted in ascending
|
||||
* order, and/or duplicate entries exist.
|
||||
*
|
||||
* Otherwise, AMD_OK is returned.
|
||||
*
|
||||
* In v1.2 and earlier, this function returned TRUE if the matrix was valid
|
||||
* (now returns AMD_OK), or FALSE otherwise (now returns AMD_INVALID or
|
||||
* AMD_OK_BUT_JUMBLED).
|
||||
*/
|
||||
|
||||
#include "amd_internal.h"
|
||||
|
||||
GLOBAL Int AMD_valid
|
||||
(
|
||||
/* inputs, not modified on output: */
|
||||
Int n_row, /* A is n_row-by-n_col */
|
||||
Int n_col,
|
||||
const Int Ap [ ], /* column pointers of A, of size n_col+1 */
|
||||
const Int Ai [ ] /* row indices of A, of size nz = Ap [n_col] */
|
||||
)
|
||||
{
|
||||
Int nz, j, p1, p2, ilast, i, p, result = AMD_OK ;
|
||||
|
||||
if (n_row < 0 || n_col < 0 || Ap == NULL || Ai == NULL)
|
||||
{
|
||||
return (AMD_INVALID) ;
|
||||
}
|
||||
nz = Ap [n_col] ;
|
||||
if (Ap [0] != 0 || nz < 0)
|
||||
{
|
||||
/* column pointers must start at Ap [0] = 0, and Ap [n] must be >= 0 */
|
||||
AMD_DEBUG0 (("column 0 pointer bad or nz < 0\n")) ;
|
||||
return (AMD_INVALID) ;
|
||||
}
|
||||
for (j = 0 ; j < n_col ; j++)
|
||||
{
|
||||
p1 = Ap [j] ;
|
||||
p2 = Ap [j+1] ;
|
||||
AMD_DEBUG2 (("\nColumn: "ID" p1: "ID" p2: "ID"\n", j, p1, p2)) ;
|
||||
if (p1 > p2)
|
||||
{
|
||||
/* column pointers must be ascending */
|
||||
AMD_DEBUG0 (("column "ID" pointer bad\n", j)) ;
|
||||
return (AMD_INVALID) ;
|
||||
}
|
||||
ilast = EMPTY ;
|
||||
for (p = p1 ; p < p2 ; p++)
|
||||
{
|
||||
i = Ai [p] ;
|
||||
AMD_DEBUG3 (("row: "ID"\n", i)) ;
|
||||
if (i < 0 || i >= n_row)
|
||||
{
|
||||
/* row index out of range */
|
||||
AMD_DEBUG0 (("index out of range, col "ID" row "ID"\n", j, i));
|
||||
return (AMD_INVALID) ;
|
||||
}
|
||||
if (i <= ilast)
|
||||
{
|
||||
/* row index unsorted, or duplicate entry present */
|
||||
AMD_DEBUG1 (("index unsorted/dupl col "ID" row "ID"\n", j, i));
|
||||
result = AMD_OK_BUT_JUMBLED ;
|
||||
}
|
||||
ilast = i ;
|
||||
}
|
||||
}
|
||||
return (result) ;
|
||||
}
|
||||
|
|
@ -0,0 +1,263 @@
|
|||
/* ========================================================================== */
|
||||
/* === BTF package ========================================================== */
|
||||
/* ========================================================================== */
|
||||
|
||||
/* BTF_MAXTRANS: find a column permutation Q to give A*Q a zero-free diagonal
|
||||
* BTF_STRONGCOMP: find a symmetric permutation P to put P*A*P' into block
|
||||
* upper triangular form.
|
||||
* BTF_ORDER: do both of the above (btf_maxtrans then btf_strongcomp).
|
||||
*
|
||||
* Copyright (c) 2004-2007. Tim Davis, University of Florida,
|
||||
* with support from Sandia National Laboratories. All Rights Reserved.
|
||||
*/
|
||||
|
||||
|
||||
/* ========================================================================== */
|
||||
/* === BTF_MAXTRANS ========================================================= */
|
||||
/* ========================================================================== */
|
||||
|
||||
/* BTF_MAXTRANS: finds a permutation of the columns of a matrix so that it has a
|
||||
* zero-free diagonal. The input is an m-by-n sparse matrix in compressed
|
||||
* column form. The array Ap of size n+1 gives the starting and ending
|
||||
* positions of the columns in the array Ai. Ap[0] must be zero. The array Ai
|
||||
* contains the row indices of the nonzeros of the matrix A, and is of size
|
||||
* Ap[n]. The row indices of column j are located in Ai[Ap[j] ... Ap[j+1]-1].
|
||||
* Row indices must be in the range 0 to m-1. Duplicate entries may be present
|
||||
* in any given column. The input matrix is not checked for validity (row
|
||||
* indices out of the range 0 to m-1 will lead to an undeterminate result -
|
||||
* possibly a core dump, for example). Row indices in any given column need
|
||||
* not be in sorted order. However, if they are sorted and the matrix already
|
||||
* has a zero-free diagonal, then the identity permutation is returned.
|
||||
*
|
||||
* The output of btf_maxtrans is an array Match of size n. If row i is matched
|
||||
* with column j, then A(i,j) is nonzero, and then Match[i] = j. If the matrix
|
||||
* is structurally nonsingular, all entries in the Match array are unique, and
|
||||
* Match can be viewed as a column permutation if A is square. That is, column
|
||||
* k of the original matrix becomes column Match[k] of the permuted matrix. In
|
||||
* MATLAB, this can be expressed as (for non-structurally singular matrices):
|
||||
*
|
||||
* Match = maxtrans (A) ;
|
||||
* B = A (:, Match) ;
|
||||
*
|
||||
* except of course here the A matrix and Match vector are all 0-based (rows
|
||||
* and columns in the range 0 to n-1), not 1-based (rows/cols in range 1 to n).
|
||||
* The MATLAB dmperm routine returns a row permutation. See the maxtrans
|
||||
* mexFunction for more details.
|
||||
*
|
||||
* If row i is not matched to any column, then Match[i] is == -1. The
|
||||
* btf_maxtrans routine returns the number of nonzeros on diagonal of the
|
||||
* permuted matrix.
|
||||
*
|
||||
* In the MATLAB mexFunction interface to btf_maxtrans, 1 is added to the Match
|
||||
* array to obtain a 1-based permutation. Thus, in MATLAB where A is m-by-n:
|
||||
*
|
||||
* q = maxtrans (A) ; % has entries in the range 0:n
|
||||
* q % a column permutation (only if sprank(A)==n)
|
||||
* B = A (:, q) ; % permuted matrix (only if sprank(A)==n)
|
||||
* sum (q > 0) ; % same as "sprank (A)"
|
||||
*
|
||||
* This behaviour differs from p = dmperm (A) in MATLAB, which returns the
|
||||
* matching as p(j)=i if row i and column j are matched, and p(j)=0 if column j
|
||||
* is unmatched.
|
||||
*
|
||||
* p = dmperm (A) ; % has entries in the range 0:m
|
||||
* p % a row permutation (only if sprank(A)==m)
|
||||
* B = A (p, :) ; % permuted matrix (only if sprank(A)==m)
|
||||
* sum (p > 0) ; % definition of sprank (A)
|
||||
*
|
||||
* This algorithm is based on the paper "On Algorithms for obtaining a maximum
|
||||
* transversal" by Iain Duff, ACM Trans. Mathematical Software, vol 7, no. 1,
|
||||
* pp. 315-330, and "Algorithm 575: Permutations for a zero-free diagonal",
|
||||
* same issue, pp. 387-390. Algorithm 575 is MC21A in the Harwell Subroutine
|
||||
* Library. This code is not merely a translation of the Fortran code into C.
|
||||
* It is a completely new implementation of the basic underlying method (depth
|
||||
* first search over a subgraph with nodes corresponding to columns matched so
|
||||
* far, and cheap matching). This code was written with minimal observation of
|
||||
* the MC21A/B code itself. See comments below for a comparison between the
|
||||
* maxtrans and MC21A/B codes.
|
||||
*
|
||||
* This routine operates on a column-form matrix and produces a column
|
||||
* permutation. MC21A uses a row-form matrix and produces a row permutation.
|
||||
* The difference is merely one of convention in the comments and interpretation
|
||||
* of the inputs and outputs. If you want a row permutation, simply pass a
|
||||
* compressed-row sparse matrix to this routine and you will get a row
|
||||
* permutation (just like MC21A). Similarly, you can pass a column-oriented
|
||||
* matrix to MC21A and it will happily return a column permutation.
|
||||
*/
|
||||
|
||||
#ifndef _BTF_H
|
||||
#define _BTF_H
|
||||
|
||||
/* make it easy for C++ programs to include BTF */
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include "UFconfig.h"
|
||||
|
||||
int btf_maxtrans /* returns # of columns matched */
|
||||
(
|
||||
/* --- input, not modified: --- */
|
||||
int nrow, /* A is nrow-by-ncol in compressed column form */
|
||||
int ncol,
|
||||
int Ap [ ], /* size ncol+1 */
|
||||
int Ai [ ], /* size nz = Ap [ncol] */
|
||||
double maxwork, /* maximum amount of work to do is maxwork*nnz(A); no limit
|
||||
* if <= 0 */
|
||||
|
||||
/* --- output, not defined on input --- */
|
||||
double *work, /* work = -1 if maxwork > 0 and the total work performed
|
||||
* reached the maximum of maxwork*nnz(A).
|
||||
* Otherwise, work = the total work performed. */
|
||||
|
||||
int Match [ ], /* size nrow. Match [i] = j if column j matched to row i
|
||||
* (see above for the singular-matrix case) */
|
||||
|
||||
/* --- workspace, not defined on input or output --- */
|
||||
int Work [ ] /* size 5*ncol */
|
||||
) ;
|
||||
|
||||
/* long integer version (all "int" parameters become "UF_long") */
|
||||
UF_long btf_l_maxtrans (UF_long, UF_long, UF_long *, UF_long *, double,
|
||||
double *, UF_long *, UF_long *) ;
|
||||
|
||||
|
||||
/* ========================================================================== */
|
||||
/* === BTF_STRONGCOMP ======================================================= */
|
||||
/* ========================================================================== */
|
||||
|
||||
/* BTF_STRONGCOMP finds the strongly connected components of a graph, returning
|
||||
* a symmetric permutation. The matrix A must be square, and is provided on
|
||||
* input in compressed-column form (see BTF_MAXTRANS, above). The diagonal of
|
||||
* the input matrix A (or A*Q if Q is provided on input) is ignored.
|
||||
*
|
||||
* If Q is not NULL on input, then the strongly connected components of A*Q are
|
||||
* found. Q may be flagged on input, where Q[k] < 0 denotes a flagged column k.
|
||||
* The permutation is j = BTF_UNFLIP (Q [k]). On output, Q is modified (the
|
||||
* flags are preserved) so that P*A*Q is in block upper triangular form.
|
||||
*
|
||||
* If Q is NULL, then the permutation P is returned so that P*A*P' is in upper
|
||||
* block triangular form.
|
||||
*
|
||||
* The vector R gives the block boundaries, where block b is in rows/columns
|
||||
* R[b] to R[b+1]-1 of the permuted matrix, and where b ranges from 1 to the
|
||||
* number of strongly connected components found.
|
||||
*/
|
||||
|
||||
int btf_strongcomp /* return # of strongly connected components */
|
||||
(
|
||||
/* input, not modified: */
|
||||
int n, /* A is n-by-n in compressed column form */
|
||||
int Ap [ ], /* size n+1 */
|
||||
int Ai [ ], /* size nz = Ap [n] */
|
||||
|
||||
/* optional input, modified (if present) on output: */
|
||||
int Q [ ], /* size n, input column permutation */
|
||||
|
||||
/* output, not defined on input */
|
||||
int P [ ], /* size n. P [k] = j if row and column j are kth row/col
|
||||
* in permuted matrix. */
|
||||
|
||||
int R [ ], /* size n+1. block b is in rows/cols R[b] ... R[b+1]-1 */
|
||||
|
||||
/* workspace, not defined on input or output */
|
||||
int Work [ ] /* size 4n */
|
||||
) ;
|
||||
|
||||
UF_long btf_l_strongcomp (UF_long, UF_long *, UF_long *, UF_long *, UF_long *,
|
||||
UF_long *, UF_long *) ;
|
||||
|
||||
|
||||
/* ========================================================================== */
|
||||
/* === BTF_ORDER ============================================================ */
|
||||
/* ========================================================================== */
|
||||
|
||||
/* BTF_ORDER permutes a square matrix into upper block triangular form. It
|
||||
* does this by first finding a maximum matching (or perhaps a limited matching
|
||||
* if the work is limited), via the btf_maxtrans function. If a complete
|
||||
* matching is not found, BTF_ORDER completes the permutation, but flags the
|
||||
* columns of P*A*Q to denote which columns are not matched. If the matrix is
|
||||
* structurally rank deficient, some of the entries on the diagonal of the
|
||||
* permuted matrix will be zero. BTF_ORDER then calls btf_strongcomp to find
|
||||
* the strongly-connected components.
|
||||
*
|
||||
* On output, P and Q are the row and column permutations, where i = P[k] if
|
||||
* row i of A is the kth row of P*A*Q, and j = BTF_UNFLIP(Q[k]) if column j of
|
||||
* A is the kth column of P*A*Q. If Q[k] < 0, then the (k,k)th entry in P*A*Q
|
||||
* is structurally zero.
|
||||
*
|
||||
* The vector R gives the block boundaries, where block b is in rows/columns
|
||||
* R[b] to R[b+1]-1 of the permuted matrix, and where b ranges from 1 to the
|
||||
* number of strongly connected components found.
|
||||
*/
|
||||
|
||||
int btf_order /* returns number of blocks found */
|
||||
(
|
||||
/* --- input, not modified: --- */
|
||||
int n, /* A is n-by-n in compressed column form */
|
||||
int Ap [ ], /* size n+1 */
|
||||
int Ai [ ], /* size nz = Ap [n] */
|
||||
double maxwork, /* do at most maxwork*nnz(A) work in the maximum
|
||||
* transversal; no limit if <= 0 */
|
||||
|
||||
/* --- output, not defined on input --- */
|
||||
double *work, /* return value from btf_maxtrans */
|
||||
int P [ ], /* size n, row permutation */
|
||||
int Q [ ], /* size n, column permutation */
|
||||
int R [ ], /* size n+1. block b is in rows/cols R[b] ... R[b+1]-1 */
|
||||
int *nmatch, /* # nonzeros on diagonal of P*A*Q */
|
||||
|
||||
/* --- workspace, not defined on input or output --- */
|
||||
int Work [ ] /* size 5n */
|
||||
) ;
|
||||
|
||||
UF_long btf_l_order (UF_long, UF_long *, UF_long *, double , double *,
|
||||
UF_long *, UF_long *, UF_long *, UF_long *, UF_long *) ;
|
||||
|
||||
|
||||
/* ========================================================================== */
|
||||
/* === BTF marking of singular columns ====================================== */
|
||||
/* ========================================================================== */
|
||||
|
||||
/* BTF_FLIP is a "negation about -1", and is used to mark an integer j
|
||||
* that is normally non-negative. BTF_FLIP (-1) is -1. BTF_FLIP of
|
||||
* a number > -1 is negative, and BTF_FLIP of a number < -1 is positive.
|
||||
* BTF_FLIP (BTF_FLIP (j)) = j for all integers j. UNFLIP (j) acts
|
||||
* like an "absolute value" operation, and is always >= -1. You can test
|
||||
* whether or not an integer j is "flipped" with the BTF_ISFLIPPED (j)
|
||||
* macro.
|
||||
*/
|
||||
|
||||
#define BTF_FLIP(j) (-(j)-2)
|
||||
#define BTF_ISFLIPPED(j) ((j) < -1)
|
||||
#define BTF_UNFLIP(j) ((BTF_ISFLIPPED (j)) ? BTF_FLIP (j) : (j))
|
||||
|
||||
/* ========================================================================== */
|
||||
/* === BTF version ========================================================== */
|
||||
/* ========================================================================== */
|
||||
|
||||
/* All versions of BTF include these definitions.
|
||||
* As an example, to test if the version you are using is 1.2 or later:
|
||||
*
|
||||
* if (BTF_VERSION >= BTF_VERSION_CODE (1,2)) ...
|
||||
*
|
||||
* This also works during compile-time:
|
||||
*
|
||||
* #if (BTF >= BTF_VERSION_CODE (1,2))
|
||||
* printf ("This is version 1.2 or later\n") ;
|
||||
* #else
|
||||
* printf ("This is an early version\n") ;
|
||||
* #endif
|
||||
*/
|
||||
|
||||
#define BTF_DATE "Dec 7, 2011"
|
||||
#define BTF_VERSION_CODE(main,sub) ((main) * 1000 + (sub))
|
||||
#define BTF_MAIN_VERSION 1
|
||||
#define BTF_SUB_VERSION 1
|
||||
#define BTF_SUBSUB_VERSION 3
|
||||
#define BTF_VERSION BTF_VERSION_CODE(BTF_MAIN_VERSION,BTF_SUB_VERSION)
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
|
@ -0,0 +1,64 @@
|
|||
/* ========================================================================== */
|
||||
/* === btf_internal include file ============================================ */
|
||||
/* ========================================================================== */
|
||||
|
||||
#ifndef _BTF_INTERNAL_H
|
||||
#define _BTF_INTERNAL_H
|
||||
|
||||
/*
|
||||
* Copyright (c) 2004-2007. Tim Davis, University of Florida,
|
||||
* with support from Sandia National Laboratories. All Rights Reserved.
|
||||
*/
|
||||
|
||||
/* Not to be included in any user program. */
|
||||
|
||||
#ifdef DLONG
|
||||
#define Int UF_long
|
||||
#define Int_id UF_long_id
|
||||
#define BTF(name) btf_l_ ## name
|
||||
#else
|
||||
#define Int int
|
||||
#define Int_id "%d"
|
||||
#define BTF(name) btf_ ## name
|
||||
#endif
|
||||
|
||||
/* ========================================================================== */
|
||||
/* make sure debugging and printing is turned off */
|
||||
|
||||
#ifndef NDEBUG
|
||||
#define NDEBUG
|
||||
#endif
|
||||
#ifndef NPRINT
|
||||
#define NPRINT
|
||||
#endif
|
||||
|
||||
/* To enable debugging and assertions, uncomment this line:
|
||||
#undef NDEBUG
|
||||
*/
|
||||
/* To enable diagnostic printing, uncomment this line:
|
||||
#undef NPRINT
|
||||
*/
|
||||
|
||||
/* ========================================================================== */
|
||||
|
||||
#include <stdio.h>
|
||||
#include <assert.h>
|
||||
#define ASSERT(a) assert(a)
|
||||
|
||||
#undef TRUE
|
||||
#undef FALSE
|
||||
#undef PRINTF
|
||||
#undef MIN
|
||||
|
||||
#ifndef NPRINT
|
||||
#define PRINTF(s) { printf s ; } ;
|
||||
#else
|
||||
#define PRINTF(s)
|
||||
#endif
|
||||
|
||||
#define TRUE 1
|
||||
#define FALSE 0
|
||||
#define EMPTY (-1)
|
||||
#define MIN(a,b) (((a) < (b)) ? (a) : (b))
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,387 @@
|
|||
/* ========================================================================== */
|
||||
/* === BTF_MAXTRANS ========================================================= */
|
||||
/* ========================================================================== */
|
||||
|
||||
/* Finds a column permutation that maximizes the number of entries on the
|
||||
* diagonal of a sparse matrix. See btf.h for more information.
|
||||
*
|
||||
* This function is identical to cs_maxtrans in CSparse, with the following
|
||||
* exceptions:
|
||||
*
|
||||
* (1) cs_maxtrans finds both jmatch and imatch, where jmatch [i] = j and
|
||||
* imatch [j] = i if row i is matched to column j. This function returns
|
||||
* just jmatch (the Match array). The MATLAB interface to cs_maxtrans
|
||||
* (the single-output cs_dmperm) returns imatch, not jmatch to the MATLAB
|
||||
* caller.
|
||||
*
|
||||
* (2) cs_maxtrans includes a pre-pass that counts the number of non-empty
|
||||
* rows and columns (m2 and n2, respectively), and computes the matching
|
||||
* using the transpose of A if m2 < n2. cs_maxtrans also returns quickly
|
||||
* if the diagonal of the matrix is already zero-free. This pre-pass
|
||||
* allows cs_maxtrans to be much faster than maxtrans, if the use of the
|
||||
* transpose is warranted.
|
||||
*
|
||||
* However, for square structurally non-singular matrices with one or more
|
||||
* zeros on the diagonal, the pre-pass is a waste of time, and for these
|
||||
* matrices, maxtrans can be twice as fast as cs_maxtrans. Since the
|
||||
* maxtrans function is intended primarily for square matrices that are
|
||||
* typically structurally nonsingular, the pre-pass is not included here.
|
||||
* If this maxtrans function is used on a matrix with many more columns
|
||||
* than rows, consider passing the transpose to this function, or use
|
||||
* cs_maxtrans instead.
|
||||
*
|
||||
* (3) cs_maxtrans can operate as a randomized algorithm, to help avoid
|
||||
* rare cases of excessive run-time.
|
||||
*
|
||||
* (4) this maxtrans function includes an option that limits the total work
|
||||
* performed. If this limit is reached, the maximum transveral might not
|
||||
* be found.
|
||||
*
|
||||
* Thus, for general usage, cs_maxtrans is preferred. For square matrices that
|
||||
* are typically structurally non-singular, maxtrans is preferred. A partial
|
||||
* maxtrans can still be very useful when solving a sparse linear system.
|
||||
*
|
||||
* Copyright (c) 2004-2007. Tim Davis, University of Florida,
|
||||
* with support from Sandia National Laboratories. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#include "btf.h"
|
||||
#include "btf_internal.h"
|
||||
|
||||
|
||||
/* ========================================================================== */
|
||||
/* === augment ============================================================== */
|
||||
/* ========================================================================== */
|
||||
|
||||
/* Perform a depth-first-search starting at column k, to find an augmenting
|
||||
* path. An augmenting path is a sequence of row/column pairs (i1,k), (i2,j1),
|
||||
* (i3,j2), ..., (i(s+1), js), such that all of the following properties hold:
|
||||
*
|
||||
* * column k is not matched to any row
|
||||
* * entries in the path are nonzero
|
||||
* * the pairs (i1,j1), (i2,j2), (i3,j3) ..., (is,js) have been
|
||||
* previously matched to each other
|
||||
* * (i(s+1), js) is nonzero, and row i(s+1) is not matched to any column
|
||||
*
|
||||
* Once this path is found, the matching can be changed to the set of pairs
|
||||
* path. An augmenting path is a sequence of row/column pairs
|
||||
*
|
||||
* (i1,k), (i2,j1), (i3,j2), ..., (i(s+1), js)
|
||||
*
|
||||
* Once a row is matched with a column it remains matched with some column, but
|
||||
* not necessarily the column it was first matched with.
|
||||
*
|
||||
* In the worst case, this function can examine every nonzero in A. Since it
|
||||
* is called n times by maxtrans, the total time of maxtrans can be as high as
|
||||
* O(n*nnz(A)). To limit this work, pass a value of maxwork > 0. Then at
|
||||
* most O((maxwork+1)*nnz(A)) work will be performed; the maximum matching might
|
||||
* not be found, however.
|
||||
*
|
||||
* This routine is very similar to the dfs routine in klu_kernel.c, in the
|
||||
* KLU sparse LU factorization package. It is essentially identical to the
|
||||
* cs_augment routine in CSparse, and its recursive version (augment function
|
||||
* in cs_maxtransr_mex.c), except that this routine allows for the search to be
|
||||
* terminated early if too much work is being performed.
|
||||
*
|
||||
* The algorithm is based on the paper "On Algorithms for obtaining a maximum
|
||||
* transversal" by Iain Duff, ACM Trans. Mathematical Software, vol 7, no. 1,
|
||||
* pp. 315-330, and "Algorithm 575: Permutations for a zero-free diagonal",
|
||||
* same issue, pp. 387-390. The code here is a new implementation of that
|
||||
* algorithm, with different data structures and control flow. After writing
|
||||
* this code, I carefully compared my algorithm with MC21A/B (ACM Algorithm 575)
|
||||
* Some of the comparisons are partial because I didn't dig deeply into all of
|
||||
* the details of MC21A/B, such as how the stack is maintained. The following
|
||||
* arguments are essentially identical between this code and MC21A:
|
||||
*
|
||||
* maxtrans MC21A,B
|
||||
* -------- -------
|
||||
* n N identical
|
||||
* k JORD identical
|
||||
* Ap IP column / row pointers
|
||||
* Ai ICN row / column indices
|
||||
* Ap[n] LICN length of index array (# of nonzeros in A)
|
||||
* Match IPERM output column / row permutation
|
||||
* nmatch NUMNZ # of nonzeros on diagonal of permuted matrix
|
||||
* Flag CV mark a node as visited by the depth-first-search
|
||||
*
|
||||
* The following are different, but analogous:
|
||||
*
|
||||
* Cheap ARP indicates what part of the a column / row has
|
||||
* already been matched.
|
||||
*
|
||||
* The following arguments are very different:
|
||||
*
|
||||
* - LENR # of entries in each row/column (unused in maxtrans)
|
||||
* Pstack OUT Pstack keeps track of where we are in the depth-
|
||||
* first-search scan of column j. I think that OUT
|
||||
* plays a similar role in MC21B, but I'm unsure.
|
||||
* Istack PR keeps track of the rows in the path. PR is a link
|
||||
* list, though, whereas Istack is a stack. Maxtrans
|
||||
* does not use any link lists.
|
||||
* Jstack OUT? PR? the stack for nodes in the path (unsure)
|
||||
*
|
||||
* The following control structures are roughly comparable:
|
||||
*
|
||||
* maxtrans MC21B
|
||||
* -------- -----
|
||||
* for (k = 0 ; k < n ; k++) DO 100 JORD=1,N
|
||||
* while (head >= 0) DO 70 K=1,JORD
|
||||
* for (p = Cheap [j] ; ...) DO 20 II=IN1,IN2
|
||||
* for (p = head ; ...) DO 90 K=1,JORD
|
||||
*/
|
||||
|
||||
static Int augment
|
||||
(
|
||||
Int k, /* which stage of the main loop we're in */
|
||||
Int Ap [ ], /* column pointers, size n+1 */
|
||||
Int Ai [ ], /* row indices, size nz = Ap [n] */
|
||||
Int Match [ ], /* size n, Match [i] = j if col j matched to i */
|
||||
Int Cheap [ ], /* rows Ai [Ap [j] .. Cheap [j]-1] alread matched */
|
||||
Int Flag [ ], /* Flag [j] = k if j already visited this stage */
|
||||
Int Istack [ ], /* size n. Row index stack. */
|
||||
Int Jstack [ ], /* size n. Column index stack. */
|
||||
Int Pstack [ ], /* size n. Keeps track of position in adjacency list */
|
||||
double *work, /* work performed by the depth-first-search */
|
||||
double maxwork /* maximum work allowed */
|
||||
)
|
||||
{
|
||||
/* local variables, but "global" to all DFS levels: */
|
||||
Int found ; /* true if match found. */
|
||||
Int head ; /* top of stack */
|
||||
|
||||
/* variables that are purely local to any one DFS level: */
|
||||
Int j2 ; /* the next DFS goes to node j2 */
|
||||
Int pend ; /* one past the end of the adjacency list for node j */
|
||||
Int pstart ;
|
||||
Int quick ;
|
||||
|
||||
/* variables that need to be pushed then popped from the stack: */
|
||||
Int i ; /* the row tentatively matched to i if DFS successful */
|
||||
Int j ; /* the DFS is at the current node j */
|
||||
Int p ; /* current index into the adj. list for node j */
|
||||
/* the variables i, j, and p are stacked in Istack, Jstack, and Pstack */
|
||||
|
||||
quick = (maxwork > 0) ;
|
||||
|
||||
/* start a DFS to find a match for column k */
|
||||
found = FALSE ;
|
||||
i = EMPTY ;
|
||||
head = 0 ;
|
||||
Jstack [0] = k ;
|
||||
ASSERT (Flag [k] != k) ;
|
||||
|
||||
while (head >= 0)
|
||||
{
|
||||
j = Jstack [head] ;
|
||||
pend = Ap [j+1] ;
|
||||
|
||||
if (Flag [j] != k) /* a node is not yet visited */
|
||||
{
|
||||
|
||||
/* -------------------------------------------------------------- */
|
||||
/* prework for node j */
|
||||
/* -------------------------------------------------------------- */
|
||||
|
||||
/* first time that j has been visited */
|
||||
Flag [j] = k ;
|
||||
/* cheap assignment: find the next unmatched row in col j. This
|
||||
* loop takes at most O(nnz(A)) time for the sum total of all
|
||||
* calls to augment. */
|
||||
for (p = Cheap [j] ; p < pend && !found ; p++)
|
||||
{
|
||||
i = Ai [p] ;
|
||||
found = (Match [i] == EMPTY) ;
|
||||
}
|
||||
Cheap [j] = p ;
|
||||
|
||||
/* -------------------------------------------------------------- */
|
||||
|
||||
/* prepare for DFS */
|
||||
if (found)
|
||||
{
|
||||
/* end of augmenting path, column j matched with row i */
|
||||
Istack [head] = i ;
|
||||
break ;
|
||||
}
|
||||
/* set Pstack [head] to the first entry in column j to scan */
|
||||
Pstack [head] = Ap [j] ;
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* quick return if too much work done */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
if (quick && *work > maxwork)
|
||||
{
|
||||
/* too much work has been performed; abort the search */
|
||||
return (EMPTY) ;
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* DFS for nodes adjacent to j */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
/* If cheap assignment not made, continue the depth-first search. All
|
||||
* rows in column j are already matched. Add the adjacent nodes to the
|
||||
* stack by iterating through until finding another non-visited node.
|
||||
*
|
||||
* It is the following loop that can force maxtrans to take
|
||||
* O(n*nnz(A)) time. */
|
||||
|
||||
pstart = Pstack [head] ;
|
||||
for (p = pstart ; p < pend ; p++)
|
||||
{
|
||||
i = Ai [p] ;
|
||||
j2 = Match [i] ;
|
||||
ASSERT (j2 != EMPTY) ;
|
||||
if (Flag [j2] != k)
|
||||
{
|
||||
/* Node j2 is not yet visited, start a depth-first search on
|
||||
* node j2. Keep track of where we left off in the scan of adj
|
||||
* list of node j so we can restart j where we left off. */
|
||||
Pstack [head] = p + 1 ;
|
||||
/* Push j2 onto the stack and immediately break so we can
|
||||
* recurse on node j2. Also keep track of row i which (if this
|
||||
* search for an augmenting path works) will be matched with the
|
||||
* current node j. */
|
||||
Istack [head] = i ;
|
||||
Jstack [++head] = j2 ;
|
||||
break ;
|
||||
}
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* determine how much work was just performed */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
*work += (p - pstart + 1) ;
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* node j is done, but the postwork is postponed - see below */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
if (p == pend)
|
||||
{
|
||||
/* If all adjacent nodes of j are already visited, pop j from
|
||||
* stack and continue. We failed to find a match. */
|
||||
head-- ;
|
||||
}
|
||||
}
|
||||
|
||||
/* postwork for all nodes j in the stack */
|
||||
/* unwind the path and make the corresponding matches */
|
||||
if (found)
|
||||
{
|
||||
for (p = head ; p >= 0 ; p--)
|
||||
{
|
||||
j = Jstack [p] ;
|
||||
i = Istack [p] ;
|
||||
|
||||
/* -------------------------------------------------------------- */
|
||||
/* postwork for node j */
|
||||
/* -------------------------------------------------------------- */
|
||||
/* if found, match row i with column j */
|
||||
Match [i] = j ;
|
||||
}
|
||||
}
|
||||
return (found) ;
|
||||
}
|
||||
|
||||
|
||||
/* ========================================================================== */
|
||||
/* === maxtrans ============================================================= */
|
||||
/* ========================================================================== */
|
||||
|
||||
Int BTF(maxtrans) /* returns # of columns in the matching */
|
||||
(
|
||||
/* --- input --- */
|
||||
Int nrow, /* A is nrow-by-ncol in compressed column form */
|
||||
Int ncol,
|
||||
Int Ap [ ], /* size ncol+1 */
|
||||
Int Ai [ ], /* size nz = Ap [ncol] */
|
||||
double maxwork, /* do at most maxwork*nnz(A) work; no limit if <= 0. This
|
||||
* work limit excludes the O(nnz(A)) cheap-match phase. */
|
||||
|
||||
/* --- output --- */
|
||||
double *work, /* work = -1 if maxwork > 0 and the total work performed
|
||||
* reached the maximum of maxwork*nnz(A)).
|
||||
* Otherwise, work = the total work performed. */
|
||||
|
||||
Int Match [ ], /* size nrow. Match [i] = j if column j matched to row i */
|
||||
|
||||
/* --- workspace --- */
|
||||
Int Work [ ] /* size 5*ncol */
|
||||
)
|
||||
{
|
||||
Int *Cheap, *Flag, *Istack, *Jstack, *Pstack ;
|
||||
Int i, j, k, nmatch, work_limit_reached, result ;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* get workspace and initialize */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
Cheap = Work ; Work += ncol ;
|
||||
Flag = Work ; Work += ncol ;
|
||||
|
||||
/* stack for non-recursive depth-first search in augment function */
|
||||
Istack = Work ; Work += ncol ;
|
||||
Jstack = Work ; Work += ncol ;
|
||||
Pstack = Work ;
|
||||
|
||||
/* in column j, rows Ai [Ap [j] .. Cheap [j]-1] are known to be matched */
|
||||
for (j = 0 ; j < ncol ; j++)
|
||||
{
|
||||
Cheap [j] = Ap [j] ;
|
||||
Flag [j] = EMPTY ;
|
||||
}
|
||||
|
||||
/* all rows and columns are currently unmatched */
|
||||
for (i = 0 ; i < nrow ; i++)
|
||||
{
|
||||
Match [i] = EMPTY ;
|
||||
}
|
||||
|
||||
if (maxwork > 0)
|
||||
{
|
||||
maxwork *= Ap [ncol] ;
|
||||
}
|
||||
*work = 0 ;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* find a matching row for each column k */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
nmatch = 0 ;
|
||||
work_limit_reached = FALSE ;
|
||||
for (k = 0 ; k < ncol ; k++)
|
||||
{
|
||||
/* find an augmenting path to match some row i to column k */
|
||||
result = augment (k, Ap, Ai, Match, Cheap, Flag, Istack, Jstack, Pstack,
|
||||
work, maxwork) ;
|
||||
if (result == TRUE)
|
||||
{
|
||||
/* we found it. Match [i] = k for some row i has been done. */
|
||||
nmatch++ ;
|
||||
}
|
||||
else if (result == EMPTY)
|
||||
{
|
||||
/* augment gave up because of too much work, and no match found */
|
||||
work_limit_reached = TRUE ;
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* return the Match, and the # of matches made */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
/* At this point, row i is matched to j = Match [i] if j >= 0. i is an
|
||||
* unmatched row if Match [i] == EMPTY. */
|
||||
|
||||
if (work_limit_reached)
|
||||
{
|
||||
/* return -1 if the work limit of maxwork*nnz(A) was reached */
|
||||
*work = EMPTY ;
|
||||
}
|
||||
|
||||
return (nmatch) ;
|
||||
}
|
||||
|
|
@ -0,0 +1,132 @@
|
|||
/* ========================================================================== */
|
||||
/* === BTF_ORDER ============================================================ */
|
||||
/* ========================================================================== */
|
||||
|
||||
/* Find a permutation P and Q to permute a square sparse matrix into upper block
|
||||
* triangular form. A(P,Q) will contain a zero-free diagonal if A has
|
||||
* structural full-rank. Otherwise, the number of nonzeros on the diagonal of
|
||||
* A(P,Q) will be maximized, and will equal the structural rank of A.
|
||||
*
|
||||
* Q[k] will be "flipped" if a zero-free diagonal was not found. Q[k] will be
|
||||
* negative, and j = BTF_UNFLIP (Q [k]) gives the corresponding permutation.
|
||||
*
|
||||
* R defines the block boundaries of A(P,Q). The kth block consists of rows
|
||||
* and columns R[k] to R[k+1]-1.
|
||||
*
|
||||
* If maxwork > 0 on input, then the work performed in btf_maxtrans is limited
|
||||
* to maxwork*nnz(A) (excluding the "cheap match" phase, which can take another
|
||||
* nnz(A) work). On output, the work parameter gives the actual work performed,
|
||||
* or -1 if the limit was reached. In the latter case, the diagonal of A(P,Q)
|
||||
* might not be zero-free, and the number of nonzeros on the diagonal of A(P,Q)
|
||||
* might not be equal to the structural rank.
|
||||
*
|
||||
* See btf.h for more details.
|
||||
*
|
||||
* Copyright (c) 2004-2007. Tim Davis, University of Florida,
|
||||
* with support from Sandia National Laboratories. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#include "btf.h"
|
||||
#include "btf_internal.h"
|
||||
|
||||
/* This function only operates on square matrices (either structurally full-
|
||||
* rank, or structurally rank deficient). */
|
||||
|
||||
Int BTF(order) /* returns number of blocks found */
|
||||
(
|
||||
/* input, not modified: */
|
||||
Int n, /* A is n-by-n in compressed column form */
|
||||
Int Ap [ ], /* size n+1 */
|
||||
Int Ai [ ], /* size nz = Ap [n] */
|
||||
double maxwork, /* do at most maxwork*nnz(A) work in the maximum
|
||||
* transversal; no limit if <= 0 */
|
||||
|
||||
/* output, not defined on input */
|
||||
double *work, /* work performed in maxtrans, or -1 if limit reached */
|
||||
Int P [ ], /* size n, row permutation */
|
||||
Int Q [ ], /* size n, column permutation */
|
||||
Int R [ ], /* size n+1. block b is in rows/cols R[b] ... R[b+1]-1 */
|
||||
Int *nmatch, /* # nonzeros on diagonal of P*A*Q */
|
||||
|
||||
/* workspace, not defined on input or output */
|
||||
Int Work [ ] /* size 5n */
|
||||
)
|
||||
{
|
||||
Int *Flag ;
|
||||
Int nblocks, i, j, nbadcol ;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* compute the maximum matching */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
/* if maxwork > 0, then a maximum matching might not be found */
|
||||
|
||||
*nmatch = BTF(maxtrans) (n, n, Ap, Ai, maxwork, work, Q, Work) ;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* complete permutation if the matrix is structurally singular */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
/* Since the matrix is square, ensure BTF_UNFLIP(Q[0..n-1]) is a
|
||||
* permutation of the columns of A so that A has as many nonzeros on the
|
||||
* diagonal as possible.
|
||||
*/
|
||||
|
||||
if (*nmatch < n)
|
||||
{
|
||||
/* get a size-n work array */
|
||||
Flag = Work + n ;
|
||||
for (j = 0 ; j < n ; j++)
|
||||
{
|
||||
Flag [j] = 0 ;
|
||||
}
|
||||
|
||||
/* flag all matched columns */
|
||||
for (i = 0 ; i < n ; i++)
|
||||
{
|
||||
j = Q [i] ;
|
||||
if (j != EMPTY)
|
||||
{
|
||||
/* row i and column j are matched to each other */
|
||||
Flag [j] = 1 ;
|
||||
}
|
||||
}
|
||||
|
||||
/* make a list of all unmatched columns, in Work [0..nbadcol-1] */
|
||||
nbadcol = 0 ;
|
||||
for (j = n-1 ; j >= 0 ; j--)
|
||||
{
|
||||
if (!Flag [j])
|
||||
{
|
||||
/* j is matched to nobody */
|
||||
Work [nbadcol++] = j ;
|
||||
}
|
||||
}
|
||||
ASSERT (*nmatch + nbadcol == n) ;
|
||||
|
||||
/* make an assignment for each unmatched row */
|
||||
for (i = 0 ; i < n ; i++)
|
||||
{
|
||||
if (Q [i] == EMPTY && nbadcol > 0)
|
||||
{
|
||||
/* get an unmatched column j */
|
||||
j = Work [--nbadcol] ;
|
||||
/* assign j to row i and flag the entry by "flipping" it */
|
||||
Q [i] = BTF_FLIP (j) ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* The permutation of a square matrix can be recovered as follows: Row i is
|
||||
* matched with column j, where j = BTF_UNFLIP (Q [i]) and where j
|
||||
* will always be in the valid range 0 to n-1. The entry A(i,j) is zero
|
||||
* if BTF_ISFLIPPED (Q [i]) is true, and nonzero otherwise. nmatch
|
||||
* is the number of entries in the Q array that are non-negative. */
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* find the strongly connected components */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
nblocks = BTF(strongcomp) (n, Ap, Ai, Q, P, R, Work) ;
|
||||
return (nblocks) ;
|
||||
}
|
||||
|
|
@ -0,0 +1,593 @@
|
|||
/* ========================================================================== */
|
||||
/* === BTF_STRONGCOMP ======================================================= */
|
||||
/* ========================================================================== */
|
||||
|
||||
/* Finds the strongly connected components of a graph, or equivalently, permutes
|
||||
* the matrix into upper block triangular form. See btf.h for more details.
|
||||
* Input matrix and Q are not checked on input.
|
||||
*
|
||||
* Copyright (c) 2004-2007. Tim Davis, University of Florida,
|
||||
* with support from Sandia National Laboratories. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#include "btf.h"
|
||||
#include "btf_internal.h"
|
||||
|
||||
#define UNVISITED (-2) /* Flag [j] = UNVISITED if node j not visited yet */
|
||||
#define UNASSIGNED (-1) /* Flag [j] = UNASSIGNED if node j has been visited,
|
||||
* but not yet assigned to a strongly-connected
|
||||
* component (aka block). Flag [j] = k (k in the
|
||||
* range 0 to nblocks-1) if node j has been visited
|
||||
* (and completed, with its postwork done) and
|
||||
* assigned to component k. */
|
||||
|
||||
/* This file contains two versions of the depth-first-search, a recursive one
|
||||
* and a non-recursive one. By default, the non-recursive one is used. */
|
||||
|
||||
#ifndef RECURSIVE
|
||||
|
||||
/* ========================================================================== */
|
||||
/* === dfs: non-recursive version (default) ================================= */
|
||||
/* ========================================================================== */
|
||||
|
||||
/* Perform a depth-first-search of a graph, stored in an adjacency-list form.
|
||||
* The row indices of column j (equivalently, the out-adjacency list of node j)
|
||||
* are stored in Ai [Ap[j] ... Ap[j+1]-1]. Self-edge (diagonal entries) are
|
||||
* ignored. Ap[0] must be zero, and thus nz = Ap[n] is the number of entries
|
||||
* in the matrix (or edges in the graph). The row indices in each column need
|
||||
* not be in any particular order. If an input column permutation is given,
|
||||
* node j (in the permuted matrix A*Q) is located in
|
||||
* Ai [Ap[Q[j]] ... Ap[Q[j]+1]-1]. This Q can be the same as the Match array
|
||||
* output from the maxtrans routine, for a square matrix that is structurally
|
||||
* full rank.
|
||||
*
|
||||
* The algorithm is from the paper by Robert E. Tarjan, "Depth-first search and
|
||||
* linear graph algorithms," SIAM Journal on Computing, vol. 1, no. 2,
|
||||
* pp. 146-160, 1972. The time taken by strongcomp is O(nnz(A)).
|
||||
*
|
||||
* See also MC13A/B in the Harwell subroutine library (Iain S. Duff and John
|
||||
* K. Reid, "Algorithm 529: permutations to block triangular form," ACM Trans.
|
||||
* on Mathematical Software, vol. 4, no. 2, pp. 189-192, 1978, and "An
|
||||
* implementation of Tarjan's algorithm for the block triangular form of a
|
||||
* matrix," same journal, pp. 137-147. This code is implements the same
|
||||
* algorithm as MC13A/B, except that the data structures are very different.
|
||||
* Also, unlike MC13A/B, the output permutation preserves the natural ordering
|
||||
* within each block.
|
||||
*/
|
||||
|
||||
static void dfs
|
||||
(
|
||||
/* inputs, not modified on output: */
|
||||
Int j, /* start the DFS at node j */
|
||||
Int Ap [ ], /* size n+1, column pointers for the matrix A */
|
||||
Int Ai [ ], /* row indices, size nz = Ap [n] */
|
||||
Int Q [ ], /* input column permutation */
|
||||
|
||||
/* inputs, modified on output (each array is of size n): */
|
||||
Int Time [ ], /* Time [j] = "time" that node j was first visited */
|
||||
Int Flag [ ], /* Flag [j]: see above */
|
||||
Int Low [ ], /* Low [j]: see definition below */
|
||||
Int *p_nblocks, /* number of blocks (aka strongly-connected-comp.)*/
|
||||
Int *p_timestamp, /* current "time" */
|
||||
|
||||
/* workspace, not defined on input or output: */
|
||||
Int Cstack [ ], /* size n, output stack to hold nodes of components */
|
||||
Int Jstack [ ], /* size n, stack for the variable j */
|
||||
Int Pstack [ ] /* size n, stack for the variable p */
|
||||
)
|
||||
{
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* local variables, and initializations */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
/* local variables, but "global" to all DFS levels: */
|
||||
Int chead ; /* top of Cstack */
|
||||
Int jhead ; /* top of Jstack and Pstack */
|
||||
|
||||
/* variables that are purely local to any one DFS level: */
|
||||
Int i ; /* edge (j,i) considered; i can be next node to traverse */
|
||||
Int parent ; /* parent of node j in the DFS tree */
|
||||
Int pend ; /* one past the end of the adjacency list for node j */
|
||||
Int jj ; /* column j of A*Q is column jj of the input matrix A */
|
||||
|
||||
/* variables that need to be pushed then popped from the stack: */
|
||||
Int p ; /* current index into the adj. list for node j */
|
||||
/* the variables j and p are stacked in Jstack and Pstack */
|
||||
|
||||
/* local copies of variables in the calling routine */
|
||||
Int nblocks = *p_nblocks ;
|
||||
Int timestamp = *p_timestamp ;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* start a DFS at node j (same as the recursive call dfs (EMPTY, j)) */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
chead = 0 ; /* component stack is empty */
|
||||
jhead = 0 ; /* Jstack and Pstack are empty */
|
||||
Jstack [0] = j ; /* put the first node j on the Jstack */
|
||||
ASSERT (Flag [j] == UNVISITED) ;
|
||||
|
||||
while (jhead >= 0)
|
||||
{
|
||||
j = Jstack [jhead] ; /* grab the node j from the top of Jstack */
|
||||
|
||||
/* determine which column jj of the A is column j of A*Q */
|
||||
jj = (Q == (Int *) NULL) ? (j) : (BTF_UNFLIP (Q [j])) ;
|
||||
pend = Ap [jj+1] ; /* j's row index list ends at Ai [pend-1] */
|
||||
|
||||
if (Flag [j] == UNVISITED)
|
||||
{
|
||||
|
||||
/* -------------------------------------------------------------- */
|
||||
/* prework at node j */
|
||||
/* -------------------------------------------------------------- */
|
||||
|
||||
/* node j is being visited for the first time */
|
||||
Cstack [++chead] = j ; /* push j onto the stack */
|
||||
timestamp++ ; /* get a timestamp */
|
||||
Time [j] = timestamp ; /* give the timestamp to node j */
|
||||
Low [j] = timestamp ;
|
||||
Flag [j] = UNASSIGNED ; /* flag node j as visited */
|
||||
|
||||
/* -------------------------------------------------------------- */
|
||||
/* set Pstack [jhead] to the first entry in column j to scan */
|
||||
/* -------------------------------------------------------------- */
|
||||
|
||||
Pstack [jhead] = Ap [jj] ;
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* DFS rooted at node j (start it, or continue where left off) */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
for (p = Pstack [jhead] ; p < pend ; p++)
|
||||
{
|
||||
i = Ai [p] ; /* examine the edge from node j to node i */
|
||||
if (Flag [i] == UNVISITED)
|
||||
{
|
||||
/* Node i has not been visited - start a DFS at node i.
|
||||
* Keep track of where we left off in the scan of adjacency list
|
||||
* of node j so we can restart j where we left off. */
|
||||
Pstack [jhead] = p + 1 ;
|
||||
/* Push i onto the stack and immediately break
|
||||
* so we can recurse on node i. */
|
||||
Jstack [++jhead] = i ;
|
||||
ASSERT (Time [i] == EMPTY) ;
|
||||
ASSERT (Low [i] == EMPTY) ;
|
||||
/* break here to do what the recursive call dfs (j,i) does */
|
||||
break ;
|
||||
}
|
||||
else if (Flag [i] == UNASSIGNED)
|
||||
{
|
||||
/* Node i has been visited, but still unassigned to a block
|
||||
* this is a back or cross edge if Time [i] < Time [j].
|
||||
* Note that i might equal j, in which case this code does
|
||||
* nothing. */
|
||||
ASSERT (Time [i] > 0) ;
|
||||
ASSERT (Low [i] > 0) ;
|
||||
Low [j] = MIN (Low [j], Time [i]) ;
|
||||
}
|
||||
}
|
||||
|
||||
if (p == pend)
|
||||
{
|
||||
/* If all adjacent nodes of j are already visited, pop j from
|
||||
* Jstack and do the post work for node j. This also pops p
|
||||
* from the Pstack. */
|
||||
jhead-- ;
|
||||
|
||||
/* -------------------------------------------------------------- */
|
||||
/* postwork at node j */
|
||||
/* -------------------------------------------------------------- */
|
||||
|
||||
/* determine if node j is the head of a component */
|
||||
if (Low [j] == Time [j])
|
||||
{
|
||||
/* pop all nodes in this SCC from Cstack */
|
||||
while (TRUE)
|
||||
{
|
||||
ASSERT (chead >= 0) ; /* stack not empty (j in it) */
|
||||
i = Cstack [chead--] ; /* pop a node from the Cstack */
|
||||
ASSERT (i >= 0) ;
|
||||
ASSERT (Flag [i] == UNASSIGNED) ;
|
||||
Flag [i] = nblocks ; /* assign i to current block */
|
||||
if (i == j) break ; /* current block ends at j */
|
||||
}
|
||||
nblocks++ ; /* one more block has been found */
|
||||
}
|
||||
/* update Low [parent], if the parent exists */
|
||||
if (jhead >= 0)
|
||||
{
|
||||
parent = Jstack [jhead] ;
|
||||
Low [parent] = MIN (Low [parent], Low [j]) ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* cleanup: update timestamp and nblocks */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
*p_timestamp = timestamp ;
|
||||
*p_nblocks = nblocks ;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* ========================================================================== */
|
||||
/* === dfs: recursive version (only for illustration) ======================= */
|
||||
/* ========================================================================== */
|
||||
|
||||
/* The following is a recursive version of dfs, which computes identical results
|
||||
* as the non-recursive dfs. It is included here because it is easier to read.
|
||||
* Compare the comments in the code below with the identical comments in the
|
||||
* non-recursive code above, and that will help you see the correlation between
|
||||
* the two routines.
|
||||
*
|
||||
* This routine can cause stack overflow, and is thus not recommended for heavy
|
||||
* usage, particularly for large matrices. To help in delaying stack overflow,
|
||||
* global variables are used, reducing the amount of information each call to
|
||||
* dfs places on the call/return stack (the integers i, j, p, parent, and the
|
||||
* return address). Note that this means the recursive code is not thread-safe.
|
||||
* To try this version, compile the code with -DRECURSIVE or include the
|
||||
* following line at the top of this file:
|
||||
|
||||
#define RECURSIVE
|
||||
|
||||
*/
|
||||
|
||||
static Int /* for recursive illustration only, not for production use */
|
||||
chead, timestamp, nblocks, n, *Ap, *Ai, *Flag, *Cstack, *Time, *Low,
|
||||
*P, *R, *Q ;
|
||||
|
||||
static void dfs
|
||||
(
|
||||
Int parent, /* came from parent node */
|
||||
Int j /* at node j in the DFS */
|
||||
)
|
||||
{
|
||||
Int p ; /* current index into the adj. list for node j */
|
||||
Int i ; /* edge (j,i) considered; i can be next node to traverse */
|
||||
Int jj ; /* column j of A*Q is column jj of the input matrix A */
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* prework at node j */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
/* node j is being visited for the first time */
|
||||
Cstack [++chead] = j ; /* push j onto the stack */
|
||||
timestamp++ ; /* get a timestamp */
|
||||
Time [j] = timestamp ; /* give the timestamp to node j */
|
||||
Low [j] = timestamp ;
|
||||
Flag [j] = UNASSIGNED ; /* flag node j as visited */
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* DFS rooted at node j */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
/* determine which column jj of the A is column j of A*Q */
|
||||
jj = (Q == (Int *) NULL) ? (j) : (BTF_UNFLIP (Q [j])) ;
|
||||
for (p = Ap [jj] ; p < Ap [jj+1] ; p++)
|
||||
{
|
||||
i = Ai [p] ; /* examine the edge from node j to node i */
|
||||
if (Flag [i] == UNVISITED)
|
||||
{
|
||||
/* Node i has not been visited - start a DFS at node i. */
|
||||
dfs (j, i) ;
|
||||
}
|
||||
else if (Flag [i] == UNASSIGNED)
|
||||
{
|
||||
/* Node i has been visited, but still unassigned to a block
|
||||
* this is a back or cross edge if Time [i] < Time [j].
|
||||
* Note that i might equal j, in which case this code does
|
||||
* nothing. */
|
||||
Low [j] = MIN (Low [j], Time [i]) ;
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* postwork at node j */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
/* determine if node j is the head of a component */
|
||||
if (Low [j] == Time [j])
|
||||
{
|
||||
/* pop all nodes in this strongly connected component from Cstack */
|
||||
while (TRUE)
|
||||
{
|
||||
i = Cstack [chead--] ; /* pop a node from the Cstack */
|
||||
Flag [i] = nblocks ; /* assign node i to current block */
|
||||
if (i == j) break ; /* current block ends at node j */
|
||||
}
|
||||
nblocks++ ; /* one more block has been found */
|
||||
}
|
||||
/* update Low [parent] */
|
||||
if (parent != EMPTY)
|
||||
{
|
||||
/* Note that this could be done with Low[j] = MIN(Low[j],Low[i]) just
|
||||
* after the dfs (j,i) statement above, and then parent would not have
|
||||
* to be an input argument. Putting it here places all the postwork
|
||||
* for node j in one place, thus making the non-recursive DFS easier. */
|
||||
Low [parent] = MIN (Low [parent], Low [j]) ;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/* ========================================================================== */
|
||||
/* === btf_strongcomp ======================================================= */
|
||||
/* ========================================================================== */
|
||||
|
||||
#ifndef RECURSIVE
|
||||
|
||||
Int BTF(strongcomp) /* return # of strongly connected components */
|
||||
(
|
||||
/* input, not modified: */
|
||||
Int n, /* A is n-by-n in compressed column form */
|
||||
Int Ap [ ], /* size n+1 */
|
||||
Int Ai [ ], /* size nz = Ap [n] */
|
||||
|
||||
/* optional input, modified (if present) on output: */
|
||||
Int Q [ ], /* size n, input column permutation. The permutation Q can
|
||||
* include a flag which indicates an unmatched row.
|
||||
* jold = BTF_UNFLIP (Q [jnew]) is the permutation;
|
||||
* this function ingnores these flags. On output, it is
|
||||
* modified according to the permutation P. */
|
||||
|
||||
/* output, not defined on input: */
|
||||
Int P [ ], /* size n. P [k] = j if row and column j are kth row/col
|
||||
* in permuted matrix. */
|
||||
Int R [ ], /* size n+1. kth block is in rows/cols R[k] ... R[k+1]-1
|
||||
* of the permuted matrix. */
|
||||
|
||||
/* workspace, not defined on input or output: */
|
||||
Int Work [ ] /* size 4n */
|
||||
)
|
||||
|
||||
#else
|
||||
|
||||
Int BTF(strongcomp) /* recursive version - same as above except for Work size */
|
||||
(
|
||||
Int n_in,
|
||||
Int Ap_in [ ],
|
||||
Int Ai_in [ ],
|
||||
Int Q_in [ ],
|
||||
Int P_in [ ],
|
||||
Int R_in [ ],
|
||||
Int Work [ ] /* size 2n */
|
||||
)
|
||||
|
||||
#endif
|
||||
|
||||
{
|
||||
Int j, k, b ;
|
||||
|
||||
#ifndef RECURSIVE
|
||||
Int timestamp, nblocks, *Flag, *Cstack, *Time, *Low, *Jstack, *Pstack ;
|
||||
#else
|
||||
n = n_in ;
|
||||
Ap = Ap_in ;
|
||||
Ai = Ai_in ;
|
||||
Q = Q_in ;
|
||||
P = P_in ;
|
||||
R = R_in ;
|
||||
chead = EMPTY ;
|
||||
#endif
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* get and initialize workspace */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
/* timestamp is incremented each time a new node is visited.
|
||||
*
|
||||
* Time [j] is the timestamp given to node j.
|
||||
*
|
||||
* Low [j] is the lowest timestamp of any node reachable from j via either
|
||||
* a path to any descendent of j in the DFS tree, or via a single edge to
|
||||
* an either an ancestor (a back edge) or another node that's neither an
|
||||
* ancestor nor a descendant (a cross edge). If Low [j] is equal to
|
||||
* the timestamp of node j (Time [j]), then node j is the "head" of a
|
||||
* strongly connected component (SCC). That is, it is the first node
|
||||
* visited in its strongly connected component, and the DFS subtree rooted
|
||||
* at node j spans all the nodes of the strongly connected component.
|
||||
*
|
||||
* The term "block" and "component" are used interchangebly in this code;
|
||||
* "block" being a matrix term and "component" being a graph term for the
|
||||
* same thing.
|
||||
*
|
||||
* When a node is visited, it is placed on the Cstack (for "component"
|
||||
* stack). When node j is found to be an SCC head, all the nodes from the
|
||||
* top of the stack to node j itself form the nodes in the SCC. This Cstack
|
||||
* is used for both the recursive and non-recursive versions.
|
||||
*/
|
||||
|
||||
Time = Work ; Work += n ;
|
||||
Flag = Work ; Work += n ;
|
||||
Low = P ; /* use output array P as workspace for Low */
|
||||
Cstack = R ; /* use output array R as workspace for Cstack */
|
||||
|
||||
#ifndef RECURSIVE
|
||||
/* stack for non-recursive dfs */
|
||||
Jstack = Work ; Work += n ; /* stack for j */
|
||||
Pstack = Work ; /* stack for p */
|
||||
#endif
|
||||
|
||||
for (j = 0 ; j < n ; j++)
|
||||
{
|
||||
Flag [j] = UNVISITED ;
|
||||
Low [j] = EMPTY ;
|
||||
Time [j] = EMPTY ;
|
||||
#ifndef NDEBUG
|
||||
Cstack [j] = EMPTY ;
|
||||
#ifndef RECURSIVE
|
||||
Jstack [j] = EMPTY ;
|
||||
Pstack [j] = EMPTY ;
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
timestamp = 0 ; /* each node given a timestamp when it is visited */
|
||||
nblocks = 0 ; /* number of blocks found so far */
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* find the connected components via a depth-first-search */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
for (j = 0 ; j < n ; j++)
|
||||
{
|
||||
/* node j is unvisited or assigned to a block. Cstack is empty. */
|
||||
ASSERT (Flag [j] == UNVISITED || (Flag [j] >= 0 && Flag [j] < nblocks));
|
||||
if (Flag [j] == UNVISITED)
|
||||
{
|
||||
#ifndef RECURSIVE
|
||||
/* non-recursive dfs (default) */
|
||||
dfs (j, Ap, Ai, Q, Time, Flag, Low, &nblocks, ×tamp,
|
||||
Cstack, Jstack, Pstack) ;
|
||||
#else
|
||||
/* recursive dfs (for illustration only) */
|
||||
ASSERT (chead == EMPTY) ;
|
||||
dfs (EMPTY, j) ;
|
||||
ASSERT (chead == EMPTY) ;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
ASSERT (timestamp == n) ;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* construct the block boundary array, R */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
for (b = 0 ; b < nblocks ; b++)
|
||||
{
|
||||
R [b] = 0 ;
|
||||
}
|
||||
for (j = 0 ; j < n ; j++)
|
||||
{
|
||||
/* node j has been assigned to block b = Flag [j] */
|
||||
ASSERT (Time [j] > 0 && Time [j] <= n) ;
|
||||
ASSERT (Low [j] > 0 && Low [j] <= n) ;
|
||||
ASSERT (Flag [j] >= 0 && Flag [j] < nblocks) ;
|
||||
R [Flag [j]]++ ;
|
||||
}
|
||||
/* R [b] is now the number of nodes in block b. Compute cumulative sum
|
||||
* of R, using Time [0 ... nblocks-1] as workspace. */
|
||||
Time [0] = 0 ;
|
||||
for (b = 1 ; b < nblocks ; b++)
|
||||
{
|
||||
Time [b] = Time [b-1] + R [b-1] ;
|
||||
}
|
||||
for (b = 0 ; b < nblocks ; b++)
|
||||
{
|
||||
R [b] = Time [b] ;
|
||||
}
|
||||
R [nblocks] = n ;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* construct the permutation, preserving the natural order */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
#ifndef NDEBUG
|
||||
for (k = 0 ; k < n ; k++)
|
||||
{
|
||||
P [k] = EMPTY ;
|
||||
}
|
||||
#endif
|
||||
|
||||
for (j = 0 ; j < n ; j++)
|
||||
{
|
||||
/* place column j in the permutation */
|
||||
P [Time [Flag [j]]++] = j ;
|
||||
}
|
||||
|
||||
#ifndef NDEBUG
|
||||
for (k = 0 ; k < n ; k++)
|
||||
{
|
||||
ASSERT (P [k] != EMPTY) ;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Now block b consists of the nodes k1 to k2-1 in the permuted matrix,
|
||||
* where k1 = R [b] and k2 = R [b+1]. Row and column j of the original
|
||||
* matrix becomes row and column P [k] of the permuted matrix. The set of
|
||||
* of rows/columns (nodes) in block b is given by P [k1 ... k2-1], and this
|
||||
* set is sorted in ascending order. Thus, if the matrix consists of just
|
||||
* one block, P is the identity permutation. */
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* if Q is present on input, set Q = Q*P' */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
if (Q != (Int *) NULL)
|
||||
{
|
||||
/* We found a symmetric permutation P for the matrix A*Q. The overall
|
||||
* permutation is thus P*(A*Q)*P'. Set Q=Q*P' so that the final
|
||||
* permutation is P*A*Q. Use Time as workspace. Note that this
|
||||
* preserves the negative values of Q if the matrix is structurally
|
||||
* singular. */
|
||||
for (k = 0 ; k < n ; k++)
|
||||
{
|
||||
Time [k] = Q [P [k]] ;
|
||||
}
|
||||
for (k = 0 ; k < n ; k++)
|
||||
{
|
||||
Q [k] = Time [k] ;
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* how to traverse the permuted matrix */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
/* If Q is not present, the following code can be used to traverse the
|
||||
* permuted matrix P*A*P'
|
||||
*
|
||||
* // compute the inverse of P
|
||||
* for (knew = 0 ; knew < n ; knew++)
|
||||
* {
|
||||
* // row and column kold in the old matrix is row/column knew
|
||||
* // in the permuted matrix P*A*P'
|
||||
* kold = P [knew] ;
|
||||
* Pinv [kold] = knew ;
|
||||
* }
|
||||
* for (b = 0 ; b < nblocks ; b++)
|
||||
* {
|
||||
* // traverse block b of the permuted matrix P*A*P'
|
||||
* k1 = R [b] ;
|
||||
* k2 = R [b+1] ;
|
||||
* nk = k2 - k1 ;
|
||||
* for (jnew = k1 ; jnew < k2 ; jnew++)
|
||||
* {
|
||||
* jold = P [jnew] ;
|
||||
* for (p = Ap [jold] ; p < Ap [jold+1] ; p++)
|
||||
* {
|
||||
* iold = Ai [p] ;
|
||||
* inew = Pinv [iold] ;
|
||||
* // Entry in the old matrix is A (iold, jold), and its
|
||||
* // position in the new matrix P*A*P' is (inew, jnew).
|
||||
* // Let B be the bth diagonal block of the permuted
|
||||
* // matrix. If inew >= k1, then this entry is in row/
|
||||
* // column (inew-k1, jnew-k1) of the nk-by-nk matrix B.
|
||||
* // Otherwise, the entry is in the upper block triangular
|
||||
* // part, not in any diagonal block.
|
||||
* }
|
||||
* }
|
||||
* }
|
||||
*
|
||||
* If Q is present replace the above statement
|
||||
* jold = P [jnew] ;
|
||||
* with
|
||||
* jold = Q [jnew] ;
|
||||
* or
|
||||
* jold = BTF_UNFLIP (Q [jnew]) ;
|
||||
*
|
||||
* then entry A (iold,jold) in the old (unpermuted) matrix is at (inew,jnew)
|
||||
* in the permuted matrix P*A*Q. Everything else remains the same as the
|
||||
* above (simply replace P*A*P' with P*A*Q in the above comments).
|
||||
*/
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* return # of blocks / # of strongly connected components */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
return (nblocks) ;
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,255 @@
|
|||
/* ========================================================================== */
|
||||
/* === colamd/symamd prototypes and definitions ============================= */
|
||||
/* ========================================================================== */
|
||||
|
||||
/* COLAMD / SYMAMD include file
|
||||
|
||||
You must include this file (colamd.h) in any routine that uses colamd,
|
||||
symamd, or the related macros and definitions.
|
||||
|
||||
Authors:
|
||||
|
||||
The authors of the code itself are Stefan I. Larimore and Timothy A.
|
||||
Davis (davis at cise.ufl.edu), University of Florida. The algorithm was
|
||||
developed in collaboration with John Gilbert, Xerox PARC, and Esmond
|
||||
Ng, Oak Ridge National Laboratory.
|
||||
|
||||
Acknowledgements:
|
||||
|
||||
This work was supported by the National Science Foundation, under
|
||||
grants DMS-9504974 and DMS-9803599.
|
||||
|
||||
Notice:
|
||||
|
||||
Copyright (c) 1998-2007, Timothy A. Davis, All Rights Reserved.
|
||||
|
||||
THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
|
||||
EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
|
||||
|
||||
Permission is hereby granted to use, copy, modify, and/or distribute
|
||||
this program, provided that the Copyright, this License, and the
|
||||
Availability of the original version is retained on all copies and made
|
||||
accessible to the end-user of any code or package that includes COLAMD
|
||||
or any modified version of COLAMD.
|
||||
|
||||
Availability:
|
||||
|
||||
The colamd/symamd library is available at
|
||||
|
||||
http://www.cise.ufl.edu/research/sparse/colamd/
|
||||
|
||||
This is the http://www.cise.ufl.edu/research/sparse/colamd/colamd.h
|
||||
file. It is required by the colamd.c, colamdmex.c, and symamdmex.c
|
||||
files, and by any C code that calls the routines whose prototypes are
|
||||
listed below, or that uses the colamd/symamd definitions listed below.
|
||||
|
||||
*/
|
||||
|
||||
#ifndef COLAMD_H
|
||||
#define COLAMD_H
|
||||
|
||||
/* make it easy for C++ programs to include COLAMD */
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* ========================================================================== */
|
||||
/* === Include files ======================================================== */
|
||||
/* ========================================================================== */
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
/* ========================================================================== */
|
||||
/* === COLAMD version ======================================================= */
|
||||
/* ========================================================================== */
|
||||
|
||||
/* COLAMD Version 2.4 and later will include the following definitions.
|
||||
* As an example, to test if the version you are using is 2.4 or later:
|
||||
*
|
||||
* #ifdef COLAMD_VERSION
|
||||
* if (COLAMD_VERSION >= COLAMD_VERSION_CODE (2,4)) ...
|
||||
* #endif
|
||||
*
|
||||
* This also works during compile-time:
|
||||
*
|
||||
* #if defined(COLAMD_VERSION) && (COLAMD_VERSION >= COLAMD_VERSION_CODE (2,4))
|
||||
* printf ("This is version 2.4 or later\n") ;
|
||||
* #else
|
||||
* printf ("This is an early version\n") ;
|
||||
* #endif
|
||||
*
|
||||
* Versions 2.3 and earlier of COLAMD do not include a #define'd version number.
|
||||
*/
|
||||
|
||||
#define COLAMD_DATE "Dec 7, 2011"
|
||||
#define COLAMD_VERSION_CODE(main,sub) ((main) * 1000 + (sub))
|
||||
#define COLAMD_MAIN_VERSION 2
|
||||
#define COLAMD_SUB_VERSION 7
|
||||
#define COLAMD_SUBSUB_VERSION 4
|
||||
#define COLAMD_VERSION \
|
||||
COLAMD_VERSION_CODE(COLAMD_MAIN_VERSION,COLAMD_SUB_VERSION)
|
||||
|
||||
/* ========================================================================== */
|
||||
/* === Knob and statistics definitions ====================================== */
|
||||
/* ========================================================================== */
|
||||
|
||||
/* size of the knobs [ ] array. Only knobs [0..1] are currently used. */
|
||||
#define COLAMD_KNOBS 20
|
||||
|
||||
/* number of output statistics. Only stats [0..6] are currently used. */
|
||||
#define COLAMD_STATS 20
|
||||
|
||||
/* knobs [0] and stats [0]: dense row knob and output statistic. */
|
||||
#define COLAMD_DENSE_ROW 0
|
||||
|
||||
/* knobs [1] and stats [1]: dense column knob and output statistic. */
|
||||
#define COLAMD_DENSE_COL 1
|
||||
|
||||
/* knobs [2]: aggressive absorption */
|
||||
#define COLAMD_AGGRESSIVE 2
|
||||
|
||||
/* stats [2]: memory defragmentation count output statistic */
|
||||
#define COLAMD_DEFRAG_COUNT 2
|
||||
|
||||
/* stats [3]: colamd status: zero OK, > 0 warning or notice, < 0 error */
|
||||
#define COLAMD_STATUS 3
|
||||
|
||||
/* stats [4..6]: error info, or info on jumbled columns */
|
||||
#define COLAMD_INFO1 4
|
||||
#define COLAMD_INFO2 5
|
||||
#define COLAMD_INFO3 6
|
||||
|
||||
/* error codes returned in stats [3]: */
|
||||
#define COLAMD_OK (0)
|
||||
#define COLAMD_OK_BUT_JUMBLED (1)
|
||||
#define COLAMD_ERROR_A_not_present (-1)
|
||||
#define COLAMD_ERROR_p_not_present (-2)
|
||||
#define COLAMD_ERROR_nrow_negative (-3)
|
||||
#define COLAMD_ERROR_ncol_negative (-4)
|
||||
#define COLAMD_ERROR_nnz_negative (-5)
|
||||
#define COLAMD_ERROR_p0_nonzero (-6)
|
||||
#define COLAMD_ERROR_A_too_small (-7)
|
||||
#define COLAMD_ERROR_col_length_negative (-8)
|
||||
#define COLAMD_ERROR_row_index_out_of_bounds (-9)
|
||||
#define COLAMD_ERROR_out_of_memory (-10)
|
||||
#define COLAMD_ERROR_internal_error (-999)
|
||||
|
||||
|
||||
/* ========================================================================== */
|
||||
/* === Prototypes of user-callable routines ================================= */
|
||||
/* ========================================================================== */
|
||||
|
||||
/* define UF_long */
|
||||
#include "UFconfig.h"
|
||||
|
||||
size_t colamd_recommended /* returns recommended value of Alen, */
|
||||
/* or 0 if input arguments are erroneous */
|
||||
(
|
||||
int nnz, /* nonzeros in A */
|
||||
int n_row, /* number of rows in A */
|
||||
int n_col /* number of columns in A */
|
||||
) ;
|
||||
|
||||
size_t colamd_l_recommended /* returns recommended value of Alen, */
|
||||
/* or 0 if input arguments are erroneous */
|
||||
(
|
||||
UF_long nnz, /* nonzeros in A */
|
||||
UF_long n_row, /* number of rows in A */
|
||||
UF_long n_col /* number of columns in A */
|
||||
) ;
|
||||
|
||||
void colamd_set_defaults /* sets default parameters */
|
||||
( /* knobs argument is modified on output */
|
||||
double knobs [COLAMD_KNOBS] /* parameter settings for colamd */
|
||||
) ;
|
||||
|
||||
void colamd_l_set_defaults /* sets default parameters */
|
||||
( /* knobs argument is modified on output */
|
||||
double knobs [COLAMD_KNOBS] /* parameter settings for colamd */
|
||||
) ;
|
||||
|
||||
int colamd /* returns (1) if successful, (0) otherwise*/
|
||||
( /* A and p arguments are modified on output */
|
||||
int n_row, /* number of rows in A */
|
||||
int n_col, /* number of columns in A */
|
||||
int Alen, /* size of the array A */
|
||||
int A [], /* row indices of A, of size Alen */
|
||||
int p [], /* column pointers of A, of size n_col+1 */
|
||||
double knobs [COLAMD_KNOBS],/* parameter settings for colamd */
|
||||
int stats [COLAMD_STATS] /* colamd output statistics and error codes */
|
||||
) ;
|
||||
|
||||
UF_long colamd_l /* returns (1) if successful, (0) otherwise*/
|
||||
( /* A and p arguments are modified on output */
|
||||
UF_long n_row, /* number of rows in A */
|
||||
UF_long n_col, /* number of columns in A */
|
||||
UF_long Alen, /* size of the array A */
|
||||
UF_long A [], /* row indices of A, of size Alen */
|
||||
UF_long p [], /* column pointers of A, of size n_col+1 */
|
||||
double knobs [COLAMD_KNOBS],/* parameter settings for colamd */
|
||||
UF_long stats [COLAMD_STATS]/* colamd output statistics and error codes */
|
||||
) ;
|
||||
|
||||
int symamd /* return (1) if OK, (0) otherwise */
|
||||
(
|
||||
int n, /* number of rows and columns of A */
|
||||
int A [], /* row indices of A */
|
||||
int p [], /* column pointers of A */
|
||||
int perm [], /* output permutation, size n_col+1 */
|
||||
double knobs [COLAMD_KNOBS], /* parameters (uses defaults if NULL) */
|
||||
int stats [COLAMD_STATS], /* output statistics and error codes */
|
||||
void * (*allocate) (size_t, size_t),
|
||||
/* pointer to calloc (ANSI C) or */
|
||||
/* mxCalloc (for MATLAB mexFunction) */
|
||||
void (*release) (void *)
|
||||
/* pointer to free (ANSI C) or */
|
||||
/* mxFree (for MATLAB mexFunction) */
|
||||
) ;
|
||||
|
||||
UF_long symamd_l /* return (1) if OK, (0) otherwise */
|
||||
(
|
||||
UF_long n, /* number of rows and columns of A */
|
||||
UF_long A [], /* row indices of A */
|
||||
UF_long p [], /* column pointers of A */
|
||||
UF_long perm [], /* output permutation, size n_col+1 */
|
||||
double knobs [COLAMD_KNOBS], /* parameters (uses defaults if NULL) */
|
||||
UF_long stats [COLAMD_STATS], /* output statistics and error codes */
|
||||
void * (*allocate) (size_t, size_t),
|
||||
/* pointer to calloc (ANSI C) or */
|
||||
/* mxCalloc (for MATLAB mexFunction) */
|
||||
void (*release) (void *)
|
||||
/* pointer to free (ANSI C) or */
|
||||
/* mxFree (for MATLAB mexFunction) */
|
||||
) ;
|
||||
|
||||
void colamd_report
|
||||
(
|
||||
int stats [COLAMD_STATS]
|
||||
) ;
|
||||
|
||||
void colamd_l_report
|
||||
(
|
||||
UF_long stats [COLAMD_STATS]
|
||||
) ;
|
||||
|
||||
void symamd_report
|
||||
(
|
||||
int stats [COLAMD_STATS]
|
||||
) ;
|
||||
|
||||
void symamd_l_report
|
||||
(
|
||||
UF_long stats [COLAMD_STATS]
|
||||
) ;
|
||||
|
||||
#ifndef EXTERN
|
||||
#define EXTERN extern
|
||||
#endif
|
||||
|
||||
EXTERN int (*colamd_printf) (const char *, ...) ;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* COLAMD_H */
|
||||
|
|
@ -0,0 +1,24 @@
|
|||
/* ========================================================================== */
|
||||
/* === colamd_global.c ====================================================== */
|
||||
/* ========================================================================== */
|
||||
|
||||
/* ----------------------------------------------------------------------------
|
||||
* COLAMD, Copyright (C) 2007, Timothy A. Davis.
|
||||
* See License.txt for the Version 2.1 of the GNU Lesser General Public License
|
||||
* http://www.cise.ufl.edu/research/sparse
|
||||
* -------------------------------------------------------------------------- */
|
||||
|
||||
/* Global variables for COLAMD */
|
||||
|
||||
#ifndef NPRINT
|
||||
#ifdef MATLAB_MEX_FILE
|
||||
#include "mex.h"
|
||||
int (*colamd_printf) (const char *, ...) = mexPrintf ;
|
||||
#else
|
||||
#include <stdio.h>
|
||||
int (*colamd_printf) (const char *, ...) = printf ;
|
||||
#endif
|
||||
#else
|
||||
int (*colamd_printf) (const char *, ...) = ((void *) 0) ;
|
||||
#endif
|
||||
|
||||
|
|
@ -0,0 +1,773 @@
|
|||
/* ========================================================================== */
|
||||
/* === klu ================================================================== */
|
||||
/* ========================================================================== */
|
||||
|
||||
/* KLU: factorizes P*A into L*U, using the Gilbert-Peierls algorithm [1], with
|
||||
* optional symmetric pruning by Eisenstat and Liu [2]. The code is by Tim
|
||||
* Davis. This algorithm is what appears as the default sparse LU routine in
|
||||
* MATLAB version 6.0, and still appears in MATLAB 6.5 as [L,U,P] = lu (A).
|
||||
* Note that no column ordering is provided (see COLAMD or AMD for suitable
|
||||
* orderings). SuperLU is based on this algorithm, except that it adds the
|
||||
* use of dense matrix operations on "supernodes" (adjacent columns with
|
||||
* identical). This code doesn't use supernodes, thus its name ("Kent" LU,
|
||||
* as in "Clark Kent", in contrast with Super-LU...). This algorithm is slower
|
||||
* than SuperLU and UMFPACK for large matrices with lots of nonzeros in their
|
||||
* factors (such as for most finite-element problems). However, for matrices
|
||||
* with very sparse LU factors, this algorithm is typically faster than both
|
||||
* SuperLU and UMFPACK, since in this case there is little chance to exploit
|
||||
* dense matrix kernels (the BLAS).
|
||||
*
|
||||
* Only one block of A is factorized, in the BTF form. The input n is the
|
||||
* size of the block; k1 is the first row and column in the block.
|
||||
*
|
||||
* NOTE: no error checking is done on the inputs. This version is not meant to
|
||||
* be called directly by the user. Use klu_factor instead.
|
||||
*
|
||||
* No fill-reducing ordering is provided. The ordering quality of
|
||||
* klu_kernel_factor is the responsibility of the caller. The input A must
|
||||
* pre-permuted to reduce fill-in, or fill-reducing input permutation Q must
|
||||
* be provided.
|
||||
*
|
||||
* The input matrix A must be in compressed-column form, with either sorted
|
||||
* or unsorted row indices. Row indices for column j of A is in
|
||||
* Ai [Ap [j] ... Ap [j+1]-1] and the same range of indices in Ax holds the
|
||||
* numerical values. No duplicate entries are allowed.
|
||||
*
|
||||
* Copyright 2004-2009, Tim Davis. All rights reserved. See the README
|
||||
* file for details on permitted use. Note that no code from The MathWorks,
|
||||
* Inc, or from SuperLU, or from any other source appears here. The code is
|
||||
* written from scratch, from the algorithmic description in Gilbert & Peierls'
|
||||
* and Eisenstat & Liu's journal papers [1,2].
|
||||
*
|
||||
* If an input permutation Q is provided, the factorization L*U = A (P,Q)
|
||||
* is computed, where P is determined by partial pivoting, and Q is the input
|
||||
* ordering. If the pivot tolerance is less than 1, the "diagonal" entry that
|
||||
* KLU attempts to choose is the diagonal of A (Q,Q). In other words, the
|
||||
* input permutation is applied symmetrically to the input matrix. The output
|
||||
* permutation P includes both the partial pivoting ordering and the input
|
||||
* permutation. If Q is NULL, then it is assumed to be the identity
|
||||
* permutation. Q is not modified.
|
||||
*
|
||||
* [1] Gilbert, J. R. and Peierls, T., "Sparse Partial Pivoting in Time
|
||||
* Proportional to Arithmetic Operations," SIAM J. Sci. Stat. Comp.,
|
||||
* vol 9, pp. 862-874, 1988.
|
||||
* [2] Eisenstat, S. C. and Liu, J. W. H., "Exploiting Structural Symmetry in
|
||||
* Unsymmetric Sparse Symbolic Factorization," SIAM J. Matrix Analysis &
|
||||
* Applic., vol 13, pp. 202-211, 1992.
|
||||
*/
|
||||
|
||||
/* ========================================================================== */
|
||||
|
||||
#include "klu_internal.h"
|
||||
|
||||
size_t KLU_kernel_factor /* 0 if failure, size of LU if OK */
|
||||
(
|
||||
/* inputs, not modified */
|
||||
Int n, /* A is n-by-n. n must be > 0. */
|
||||
Int Ap [ ], /* size n+1, column pointers for A */
|
||||
Int Ai [ ], /* size nz = Ap [n], row indices for A */
|
||||
Entry Ax [ ], /* size nz, values of A */
|
||||
Int Q [ ], /* size n, optional column permutation */
|
||||
double Lsize, /* estimate of number of nonzeros in L */
|
||||
|
||||
/* outputs, not defined on input */
|
||||
Unit **p_LU, /* row indices and values of L and U */
|
||||
Entry Udiag [ ], /* size n, diagonal of U */
|
||||
Int Llen [ ], /* size n, column length of L */
|
||||
Int Ulen [ ], /* size n, column length of U */
|
||||
Int Lip [ ], /* size n, column pointers for L */
|
||||
Int Uip [ ], /* size n, column pointers for U */
|
||||
Int P [ ], /* row permutation, size n */
|
||||
Int *lnz, /* size of L */
|
||||
Int *unz, /* size of U */
|
||||
|
||||
/* workspace, undefined on input */
|
||||
Entry *X, /* size n double's, zero on output */
|
||||
Int *Work, /* size 5n Int's */
|
||||
|
||||
/* inputs, not modified on output */
|
||||
Int k1, /* the block of A is from k1 to k2-1 */
|
||||
Int PSinv [ ], /* inverse of P from symbolic factorization */
|
||||
double Rs [ ], /* scale factors for A */
|
||||
|
||||
/* inputs, modified on output */
|
||||
Int Offp [ ], /* off-diagonal matrix (modified by this routine) */
|
||||
Int Offi [ ],
|
||||
Entry Offx [ ],
|
||||
/* --------------- */
|
||||
KLU_common *Common
|
||||
)
|
||||
{
|
||||
double maxlnz, dunits ;
|
||||
Unit *LU ;
|
||||
Int *Pinv, *Lpend, *Stack, *Flag, *Ap_pos, *W ;
|
||||
Int lsize, usize, anz, ok ;
|
||||
size_t lusize ;
|
||||
ASSERT (Common != NULL) ;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* get control parameters, or use defaults */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
n = MAX (1, n) ;
|
||||
anz = Ap [n+k1] - Ap [k1] ;
|
||||
|
||||
if (Lsize <= 0)
|
||||
{
|
||||
Lsize = -Lsize ;
|
||||
Lsize = MAX (Lsize, 1.0) ;
|
||||
lsize = Lsize * anz + n ;
|
||||
}
|
||||
else
|
||||
{
|
||||
lsize = Lsize ;
|
||||
}
|
||||
|
||||
usize = lsize ;
|
||||
|
||||
lsize = MAX (n+1, lsize) ;
|
||||
usize = MAX (n+1, usize) ;
|
||||
|
||||
maxlnz = (((double) n) * ((double) n) + ((double) n)) / 2. ;
|
||||
maxlnz = MIN (maxlnz, ((double) INT_MAX)) ;
|
||||
lsize = MIN (maxlnz, lsize) ;
|
||||
usize = MIN (maxlnz, usize) ;
|
||||
|
||||
PRINTF (("Welcome to klu: n %d anz %d k1 %d lsize %d usize %d maxlnz %g\n",
|
||||
n, anz, k1, lsize, usize, maxlnz)) ;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* allocate workspace and outputs */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
/* return arguments are not yet assigned */
|
||||
*p_LU = (Unit *) NULL ;
|
||||
|
||||
/* these computations are safe from size_t overflow */
|
||||
W = Work ;
|
||||
Pinv = (Int *) W ; W += n ;
|
||||
Stack = (Int *) W ; W += n ;
|
||||
Flag = (Int *) W ; W += n ;
|
||||
Lpend = (Int *) W ; W += n ;
|
||||
Ap_pos = (Int *) W ; W += n ;
|
||||
|
||||
dunits = DUNITS (Int, lsize) + DUNITS (Entry, lsize) +
|
||||
DUNITS (Int, usize) + DUNITS (Entry, usize) ;
|
||||
lusize = (size_t) dunits ;
|
||||
ok = !INT_OVERFLOW (dunits) ;
|
||||
LU = ok ? KLU_malloc (lusize, sizeof (Unit), Common) : NULL ;
|
||||
if (LU == NULL)
|
||||
{
|
||||
/* out of memory, or problem too large */
|
||||
Common->status = KLU_OUT_OF_MEMORY ;
|
||||
lusize = 0 ;
|
||||
return (lusize) ;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* factorize */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
/* with pruning, and non-recursive depth-first-search */
|
||||
lusize = KLU_kernel (n, Ap, Ai, Ax, Q, lusize,
|
||||
Pinv, P, &LU, Udiag, Llen, Ulen, Lip, Uip, lnz, unz,
|
||||
X, Stack, Flag, Ap_pos, Lpend,
|
||||
k1, PSinv, Rs, Offp, Offi, Offx, Common) ;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* return LU factors, or return nothing if an error occurred */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
if (Common->status < KLU_OK)
|
||||
{
|
||||
LU = KLU_free (LU, lusize, sizeof (Unit), Common) ;
|
||||
lusize = 0 ;
|
||||
}
|
||||
*p_LU = LU ;
|
||||
PRINTF ((" in klu noffdiag %d\n", Common->noffdiag)) ;
|
||||
return (lusize) ;
|
||||
}
|
||||
|
||||
|
||||
/* ========================================================================== */
|
||||
/* === KLU_lsolve =========================================================== */
|
||||
/* ========================================================================== */
|
||||
|
||||
/* Solve Lx=b. Assumes L is unit lower triangular and where the unit diagonal
|
||||
* entry is NOT stored. Overwrites B with the solution X. B is n-by-nrhs
|
||||
* and is stored in ROW form with row dimension nrhs. nrhs must be in the
|
||||
* range 1 to 4. */
|
||||
|
||||
void KLU_lsolve
|
||||
(
|
||||
/* inputs, not modified: */
|
||||
Int n,
|
||||
Int Lip [ ],
|
||||
Int Llen [ ],
|
||||
Unit LU [ ],
|
||||
Int nrhs,
|
||||
/* right-hand-side on input, solution to Lx=b on output */
|
||||
Entry X [ ]
|
||||
)
|
||||
{
|
||||
Entry x [4], lik ;
|
||||
Int *Li ;
|
||||
Entry *Lx ;
|
||||
Int k, p, len, i ;
|
||||
|
||||
switch (nrhs)
|
||||
{
|
||||
|
||||
case 1:
|
||||
for (k = 0 ; k < n ; k++)
|
||||
{
|
||||
x [0] = X [k] ;
|
||||
GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
|
||||
/* unit diagonal of L is not stored*/
|
||||
for (p = 0 ; p < len ; p++)
|
||||
{
|
||||
/* X [Li [p]] -= Lx [p] * x [0] ; */
|
||||
MULT_SUB (X [Li [p]], Lx [p], x [0]) ;
|
||||
}
|
||||
}
|
||||
break ;
|
||||
|
||||
case 2:
|
||||
|
||||
for (k = 0 ; k < n ; k++)
|
||||
{
|
||||
x [0] = X [2*k ] ;
|
||||
x [1] = X [2*k + 1] ;
|
||||
GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
|
||||
for (p = 0 ; p < len ; p++)
|
||||
{
|
||||
i = Li [p] ;
|
||||
lik = Lx [p] ;
|
||||
MULT_SUB (X [2*i], lik, x [0]) ;
|
||||
MULT_SUB (X [2*i + 1], lik, x [1]) ;
|
||||
}
|
||||
}
|
||||
break ;
|
||||
|
||||
case 3:
|
||||
|
||||
for (k = 0 ; k < n ; k++)
|
||||
{
|
||||
x [0] = X [3*k ] ;
|
||||
x [1] = X [3*k + 1] ;
|
||||
x [2] = X [3*k + 2] ;
|
||||
GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
|
||||
for (p = 0 ; p < len ; p++)
|
||||
{
|
||||
i = Li [p] ;
|
||||
lik = Lx [p] ;
|
||||
MULT_SUB (X [3*i], lik, x [0]) ;
|
||||
MULT_SUB (X [3*i + 1], lik, x [1]) ;
|
||||
MULT_SUB (X [3*i + 2], lik, x [2]) ;
|
||||
}
|
||||
}
|
||||
break ;
|
||||
|
||||
case 4:
|
||||
|
||||
for (k = 0 ; k < n ; k++)
|
||||
{
|
||||
x [0] = X [4*k ] ;
|
||||
x [1] = X [4*k + 1] ;
|
||||
x [2] = X [4*k + 2] ;
|
||||
x [3] = X [4*k + 3] ;
|
||||
GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
|
||||
for (p = 0 ; p < len ; p++)
|
||||
{
|
||||
i = Li [p] ;
|
||||
lik = Lx [p] ;
|
||||
MULT_SUB (X [4*i], lik, x [0]) ;
|
||||
MULT_SUB (X [4*i + 1], lik, x [1]) ;
|
||||
MULT_SUB (X [4*i + 2], lik, x [2]) ;
|
||||
MULT_SUB (X [4*i + 3], lik, x [3]) ;
|
||||
}
|
||||
}
|
||||
break ;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
/* ========================================================================== */
|
||||
/* === KLU_usolve =========================================================== */
|
||||
/* ========================================================================== */
|
||||
|
||||
/* Solve Ux=b. Assumes U is non-unit upper triangular and where the diagonal
|
||||
* entry is NOT stored. Overwrites B with the solution X. B is n-by-nrhs
|
||||
* and is stored in ROW form with row dimension nrhs. nrhs must be in the
|
||||
* range 1 to 4. */
|
||||
|
||||
void KLU_usolve
|
||||
(
|
||||
/* inputs, not modified: */
|
||||
Int n,
|
||||
Int Uip [ ],
|
||||
Int Ulen [ ],
|
||||
Unit LU [ ],
|
||||
Entry Udiag [ ],
|
||||
Int nrhs,
|
||||
/* right-hand-side on input, solution to Ux=b on output */
|
||||
Entry X [ ]
|
||||
)
|
||||
{
|
||||
Entry x [4], uik, ukk ;
|
||||
Int *Ui ;
|
||||
Entry *Ux ;
|
||||
Int k, p, len, i ;
|
||||
|
||||
switch (nrhs)
|
||||
{
|
||||
|
||||
case 1:
|
||||
|
||||
for (k = n-1 ; k >= 0 ; k--)
|
||||
{
|
||||
GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
|
||||
/* x [0] = X [k] / Udiag [k] ; */
|
||||
DIV (x [0], X [k], Udiag [k]) ;
|
||||
X [k] = x [0] ;
|
||||
for (p = 0 ; p < len ; p++)
|
||||
{
|
||||
/* X [Ui [p]] -= Ux [p] * x [0] ; */
|
||||
MULT_SUB (X [Ui [p]], Ux [p], x [0]) ;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
break ;
|
||||
|
||||
case 2:
|
||||
|
||||
for (k = n-1 ; k >= 0 ; k--)
|
||||
{
|
||||
GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
|
||||
ukk = Udiag [k] ;
|
||||
/* x [0] = X [2*k ] / ukk ;
|
||||
x [1] = X [2*k + 1] / ukk ; */
|
||||
DIV (x [0], X [2*k], ukk) ;
|
||||
DIV (x [1], X [2*k + 1], ukk) ;
|
||||
|
||||
X [2*k ] = x [0] ;
|
||||
X [2*k + 1] = x [1] ;
|
||||
for (p = 0 ; p < len ; p++)
|
||||
{
|
||||
i = Ui [p] ;
|
||||
uik = Ux [p] ;
|
||||
/* X [2*i ] -= uik * x [0] ;
|
||||
X [2*i + 1] -= uik * x [1] ; */
|
||||
MULT_SUB (X [2*i], uik, x [0]) ;
|
||||
MULT_SUB (X [2*i + 1], uik, x [1]) ;
|
||||
}
|
||||
}
|
||||
|
||||
break ;
|
||||
|
||||
case 3:
|
||||
|
||||
for (k = n-1 ; k >= 0 ; k--)
|
||||
{
|
||||
GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
|
||||
ukk = Udiag [k] ;
|
||||
|
||||
DIV (x [0], X [3*k], ukk) ;
|
||||
DIV (x [1], X [3*k + 1], ukk) ;
|
||||
DIV (x [2], X [3*k + 2], ukk) ;
|
||||
|
||||
X [3*k ] = x [0] ;
|
||||
X [3*k + 1] = x [1] ;
|
||||
X [3*k + 2] = x [2] ;
|
||||
for (p = 0 ; p < len ; p++)
|
||||
{
|
||||
i = Ui [p] ;
|
||||
uik = Ux [p] ;
|
||||
MULT_SUB (X [3*i], uik, x [0]) ;
|
||||
MULT_SUB (X [3*i + 1], uik, x [1]) ;
|
||||
MULT_SUB (X [3*i + 2], uik, x [2]) ;
|
||||
}
|
||||
}
|
||||
|
||||
break ;
|
||||
|
||||
case 4:
|
||||
|
||||
for (k = n-1 ; k >= 0 ; k--)
|
||||
{
|
||||
GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
|
||||
ukk = Udiag [k] ;
|
||||
|
||||
DIV (x [0], X [4*k], ukk) ;
|
||||
DIV (x [1], X [4*k + 1], ukk) ;
|
||||
DIV (x [2], X [4*k + 2], ukk) ;
|
||||
DIV (x [3], X [4*k + 3], ukk) ;
|
||||
|
||||
X [4*k ] = x [0] ;
|
||||
X [4*k + 1] = x [1] ;
|
||||
X [4*k + 2] = x [2] ;
|
||||
X [4*k + 3] = x [3] ;
|
||||
for (p = 0 ; p < len ; p++)
|
||||
{
|
||||
i = Ui [p] ;
|
||||
uik = Ux [p] ;
|
||||
|
||||
MULT_SUB (X [4*i], uik, x [0]) ;
|
||||
MULT_SUB (X [4*i + 1], uik, x [1]) ;
|
||||
MULT_SUB (X [4*i + 2], uik, x [2]) ;
|
||||
MULT_SUB (X [4*i + 3], uik, x [3]) ;
|
||||
}
|
||||
}
|
||||
|
||||
break ;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* ========================================================================== */
|
||||
/* === KLU_ltsolve ========================================================== */
|
||||
/* ========================================================================== */
|
||||
|
||||
/* Solve L'x=b. Assumes L is unit lower triangular and where the unit diagonal
|
||||
* entry is NOT stored. Overwrites B with the solution X. B is n-by-nrhs
|
||||
* and is stored in ROW form with row dimension nrhs. nrhs must in the
|
||||
* range 1 to 4. */
|
||||
|
||||
void KLU_ltsolve
|
||||
(
|
||||
/* inputs, not modified: */
|
||||
Int n,
|
||||
Int Lip [ ],
|
||||
Int Llen [ ],
|
||||
Unit LU [ ],
|
||||
Int nrhs,
|
||||
#ifdef COMPLEX
|
||||
Int conj_solve,
|
||||
#endif
|
||||
/* right-hand-side on input, solution to L'x=b on output */
|
||||
Entry X [ ]
|
||||
)
|
||||
{
|
||||
Entry x [4], lik ;
|
||||
Int *Li ;
|
||||
Entry *Lx ;
|
||||
Int k, p, len, i ;
|
||||
|
||||
switch (nrhs)
|
||||
{
|
||||
|
||||
case 1:
|
||||
|
||||
for (k = n-1 ; k >= 0 ; k--)
|
||||
{
|
||||
GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
|
||||
x [0] = X [k] ;
|
||||
for (p = 0 ; p < len ; p++)
|
||||
{
|
||||
#ifdef COMPLEX
|
||||
if (conj_solve)
|
||||
{
|
||||
/* x [0] -= CONJ (Lx [p]) * X [Li [p]] ; */
|
||||
MULT_SUB_CONJ (x [0], X [Li [p]], Lx [p]) ;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
/*x [0] -= Lx [p] * X [Li [p]] ;*/
|
||||
MULT_SUB (x [0], Lx [p], X [Li [p]]) ;
|
||||
}
|
||||
}
|
||||
X [k] = x [0] ;
|
||||
}
|
||||
break ;
|
||||
|
||||
case 2:
|
||||
|
||||
for (k = n-1 ; k >= 0 ; k--)
|
||||
{
|
||||
x [0] = X [2*k ] ;
|
||||
x [1] = X [2*k + 1] ;
|
||||
GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
|
||||
for (p = 0 ; p < len ; p++)
|
||||
{
|
||||
i = Li [p] ;
|
||||
#ifdef COMPLEX
|
||||
if (conj_solve)
|
||||
{
|
||||
CONJ (lik, Lx [p]) ;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
lik = Lx [p] ;
|
||||
}
|
||||
MULT_SUB (x [0], lik, X [2*i]) ;
|
||||
MULT_SUB (x [1], lik, X [2*i + 1]) ;
|
||||
}
|
||||
X [2*k ] = x [0] ;
|
||||
X [2*k + 1] = x [1] ;
|
||||
}
|
||||
break ;
|
||||
|
||||
case 3:
|
||||
|
||||
for (k = n-1 ; k >= 0 ; k--)
|
||||
{
|
||||
x [0] = X [3*k ] ;
|
||||
x [1] = X [3*k + 1] ;
|
||||
x [2] = X [3*k + 2] ;
|
||||
GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
|
||||
for (p = 0 ; p < len ; p++)
|
||||
{
|
||||
i = Li [p] ;
|
||||
#ifdef COMPLEX
|
||||
if (conj_solve)
|
||||
{
|
||||
CONJ (lik, Lx [p]) ;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
lik = Lx [p] ;
|
||||
}
|
||||
MULT_SUB (x [0], lik, X [3*i]) ;
|
||||
MULT_SUB (x [1], lik, X [3*i + 1]) ;
|
||||
MULT_SUB (x [2], lik, X [3*i + 2]) ;
|
||||
}
|
||||
X [3*k ] = x [0] ;
|
||||
X [3*k + 1] = x [1] ;
|
||||
X [3*k + 2] = x [2] ;
|
||||
}
|
||||
break ;
|
||||
|
||||
case 4:
|
||||
|
||||
for (k = n-1 ; k >= 0 ; k--)
|
||||
{
|
||||
x [0] = X [4*k ] ;
|
||||
x [1] = X [4*k + 1] ;
|
||||
x [2] = X [4*k + 2] ;
|
||||
x [3] = X [4*k + 3] ;
|
||||
GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
|
||||
for (p = 0 ; p < len ; p++)
|
||||
{
|
||||
i = Li [p] ;
|
||||
#ifdef COMPLEX
|
||||
if (conj_solve)
|
||||
{
|
||||
CONJ (lik, Lx [p]) ;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
lik = Lx [p] ;
|
||||
}
|
||||
MULT_SUB (x [0], lik, X [4*i]) ;
|
||||
MULT_SUB (x [1], lik, X [4*i + 1]) ;
|
||||
MULT_SUB (x [2], lik, X [4*i + 2]) ;
|
||||
MULT_SUB (x [3], lik, X [4*i + 3]) ;
|
||||
}
|
||||
X [4*k ] = x [0] ;
|
||||
X [4*k + 1] = x [1] ;
|
||||
X [4*k + 2] = x [2] ;
|
||||
X [4*k + 3] = x [3] ;
|
||||
}
|
||||
break ;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* ========================================================================== */
|
||||
/* === KLU_utsolve ========================================================== */
|
||||
/* ========================================================================== */
|
||||
|
||||
/* Solve U'x=b. Assumes U is non-unit upper triangular and where the diagonal
|
||||
* entry is stored (and appears last in each column of U). Overwrites B
|
||||
* with the solution X. B is n-by-nrhs and is stored in ROW form with row
|
||||
* dimension nrhs. nrhs must be in the range 1 to 4. */
|
||||
|
||||
void KLU_utsolve
|
||||
(
|
||||
/* inputs, not modified: */
|
||||
Int n,
|
||||
Int Uip [ ],
|
||||
Int Ulen [ ],
|
||||
Unit LU [ ],
|
||||
Entry Udiag [ ],
|
||||
Int nrhs,
|
||||
#ifdef COMPLEX
|
||||
Int conj_solve,
|
||||
#endif
|
||||
/* right-hand-side on input, solution to Ux=b on output */
|
||||
Entry X [ ]
|
||||
)
|
||||
{
|
||||
Entry x [4], uik, ukk ;
|
||||
Int k, p, len, i ;
|
||||
Int *Ui ;
|
||||
Entry *Ux ;
|
||||
|
||||
switch (nrhs)
|
||||
{
|
||||
|
||||
case 1:
|
||||
|
||||
for (k = 0 ; k < n ; k++)
|
||||
{
|
||||
GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
|
||||
x [0] = X [k] ;
|
||||
for (p = 0 ; p < len ; p++)
|
||||
{
|
||||
#ifdef COMPLEX
|
||||
if (conj_solve)
|
||||
{
|
||||
/* x [0] -= CONJ (Ux [p]) * X [Ui [p]] ; */
|
||||
MULT_SUB_CONJ (x [0], X [Ui [p]], Ux [p]) ;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
/* x [0] -= Ux [p] * X [Ui [p]] ; */
|
||||
MULT_SUB (x [0], Ux [p], X [Ui [p]]) ;
|
||||
}
|
||||
}
|
||||
#ifdef COMPLEX
|
||||
if (conj_solve)
|
||||
{
|
||||
CONJ (ukk, Udiag [k]) ;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
ukk = Udiag [k] ;
|
||||
}
|
||||
DIV (X [k], x [0], ukk) ;
|
||||
}
|
||||
break ;
|
||||
|
||||
case 2:
|
||||
|
||||
for (k = 0 ; k < n ; k++)
|
||||
{
|
||||
GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
|
||||
x [0] = X [2*k ] ;
|
||||
x [1] = X [2*k + 1] ;
|
||||
for (p = 0 ; p < len ; p++)
|
||||
{
|
||||
i = Ui [p] ;
|
||||
#ifdef COMPLEX
|
||||
if (conj_solve)
|
||||
{
|
||||
CONJ (uik, Ux [p]) ;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
uik = Ux [p] ;
|
||||
}
|
||||
MULT_SUB (x [0], uik, X [2*i]) ;
|
||||
MULT_SUB (x [1], uik, X [2*i + 1]) ;
|
||||
}
|
||||
#ifdef COMPLEX
|
||||
if (conj_solve)
|
||||
{
|
||||
CONJ (ukk, Udiag [k]) ;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
ukk = Udiag [k] ;
|
||||
}
|
||||
DIV (X [2*k], x [0], ukk) ;
|
||||
DIV (X [2*k + 1], x [1], ukk) ;
|
||||
}
|
||||
break ;
|
||||
|
||||
case 3:
|
||||
|
||||
for (k = 0 ; k < n ; k++)
|
||||
{
|
||||
GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
|
||||
x [0] = X [3*k ] ;
|
||||
x [1] = X [3*k + 1] ;
|
||||
x [2] = X [3*k + 2] ;
|
||||
for (p = 0 ; p < len ; p++)
|
||||
{
|
||||
i = Ui [p] ;
|
||||
#ifdef COMPLEX
|
||||
if (conj_solve)
|
||||
{
|
||||
CONJ (uik, Ux [p]) ;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
uik = Ux [p] ;
|
||||
}
|
||||
MULT_SUB (x [0], uik, X [3*i]) ;
|
||||
MULT_SUB (x [1], uik, X [3*i + 1]) ;
|
||||
MULT_SUB (x [2], uik, X [3*i + 2]) ;
|
||||
}
|
||||
#ifdef COMPLEX
|
||||
if (conj_solve)
|
||||
{
|
||||
CONJ (ukk, Udiag [k]) ;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
ukk = Udiag [k] ;
|
||||
}
|
||||
DIV (X [3*k], x [0], ukk) ;
|
||||
DIV (X [3*k + 1], x [1], ukk) ;
|
||||
DIV (X [3*k + 2], x [2], ukk) ;
|
||||
}
|
||||
break ;
|
||||
|
||||
case 4:
|
||||
|
||||
for (k = 0 ; k < n ; k++)
|
||||
{
|
||||
GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
|
||||
x [0] = X [4*k ] ;
|
||||
x [1] = X [4*k + 1] ;
|
||||
x [2] = X [4*k + 2] ;
|
||||
x [3] = X [4*k + 3] ;
|
||||
for (p = 0 ; p < len ; p++)
|
||||
{
|
||||
i = Ui [p] ;
|
||||
#ifdef COMPLEX
|
||||
if (conj_solve)
|
||||
{
|
||||
CONJ (uik, Ux [p]) ;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
uik = Ux [p] ;
|
||||
}
|
||||
MULT_SUB (x [0], uik, X [4*i]) ;
|
||||
MULT_SUB (x [1], uik, X [4*i + 1]) ;
|
||||
MULT_SUB (x [2], uik, X [4*i + 2]) ;
|
||||
MULT_SUB (x [3], uik, X [4*i + 3]) ;
|
||||
}
|
||||
#ifdef COMPLEX
|
||||
if (conj_solve)
|
||||
{
|
||||
CONJ (ukk, Udiag [k]) ;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
ukk = Udiag [k] ;
|
||||
}
|
||||
DIV (X [4*k], x [0], ukk) ;
|
||||
DIV (X [4*k + 1], x [1], ukk) ;
|
||||
DIV (X [4*k + 2], x [2], ukk) ;
|
||||
DIV (X [4*k + 3], x [3], ukk) ;
|
||||
}
|
||||
break ;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,831 @@
|
|||
/* ========================================================================== */
|
||||
/* === klu include file ===================================================== */
|
||||
/* ========================================================================== */
|
||||
|
||||
/* Include file for user programs that call klu_* routines */
|
||||
|
||||
#ifndef _KLU_H
|
||||
#define _KLU_H
|
||||
|
||||
/* make it easy for C++ programs to include KLU */
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include "amd.h"
|
||||
#include "colamd.h"
|
||||
#include "btf.h"
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
/* Symbolic object - contains the pre-ordering computed by klu_analyze */
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
typedef struct
|
||||
{
|
||||
/* A (P,Q) is in upper block triangular form. The kth block goes from
|
||||
* row/col index R [k] to R [k+1]-1. The estimated number of nonzeros
|
||||
* in the L factor of the kth block is Lnz [k].
|
||||
*/
|
||||
|
||||
/* only computed if the AMD ordering is chosen: */
|
||||
double symmetry ; /* symmetry of largest block */
|
||||
double est_flops ; /* est. factorization flop count */
|
||||
double lnz, unz ; /* estimated nz in L and U, including diagonals */
|
||||
double *Lnz ; /* size n, but only Lnz [0..nblocks-1] is used */
|
||||
|
||||
/* computed for all orderings: */
|
||||
int
|
||||
n, /* input matrix A is n-by-n */
|
||||
nz, /* # entries in input matrix */
|
||||
*P, /* size n */
|
||||
*Q, /* size n */
|
||||
*R, /* size n+1, but only R [0..nblocks] is used */
|
||||
nzoff, /* nz in off-diagonal blocks */
|
||||
nblocks, /* number of blocks */
|
||||
maxblock, /* size of largest block */
|
||||
ordering, /* ordering used (AMD, COLAMD, or GIVEN) */
|
||||
do_btf ; /* whether or not BTF preordering was requested */
|
||||
|
||||
/* only computed if BTF preordering requested */
|
||||
int structural_rank ; /* 0 to n-1 if the matrix is structurally rank
|
||||
* deficient. -1 if not computed. n if the matrix has
|
||||
* full structural rank */
|
||||
|
||||
} klu_symbolic ;
|
||||
|
||||
typedef struct /* 64-bit version (otherwise same as above) */
|
||||
{
|
||||
double symmetry, est_flops, lnz, unz ;
|
||||
double *Lnz ;
|
||||
UF_long n, nz, *P, *Q, *R, nzoff, nblocks, maxblock, ordering, do_btf,
|
||||
structural_rank ;
|
||||
|
||||
} klu_l_symbolic ;
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
/* Numeric object - contains the factors computed by klu_factor */
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
typedef struct
|
||||
{
|
||||
/* LU factors of each block, the pivot row permutation, and the
|
||||
* entries in the off-diagonal blocks */
|
||||
|
||||
int n ; /* A is n-by-n */
|
||||
int nblocks ; /* number of diagonal blocks */
|
||||
int lnz ; /* actual nz in L, including diagonal */
|
||||
int unz ; /* actual nz in U, including diagonal */
|
||||
int max_lnz_block ; /* max actual nz in L in any one block, incl. diag */
|
||||
int max_unz_block ; /* max actual nz in U in any one block, incl. diag */
|
||||
int *Pnum ; /* size n. final pivot permutation */
|
||||
int *Pinv ; /* size n. inverse of final pivot permutation */
|
||||
|
||||
/* LU factors of each block */
|
||||
int *Lip ; /* size n. pointers into LUbx[block] for L */
|
||||
int *Uip ; /* size n. pointers into LUbx[block] for U */
|
||||
int *Llen ; /* size n. Llen [k] = # of entries in kth column of L */
|
||||
int *Ulen ; /* size n. Ulen [k] = # of entries in kth column of U */
|
||||
void **LUbx ; /* L and U indices and entries (excl. diagonal of U) */
|
||||
size_t *LUsize ; /* size of each LUbx [block], in sizeof (Unit) */
|
||||
void *Udiag ; /* diagonal of U */
|
||||
|
||||
/* scale factors; can be NULL if no scaling */
|
||||
double *Rs ; /* size n. Rs [i] is scale factor for row i */
|
||||
|
||||
/* permanent workspace for factorization and solve */
|
||||
size_t worksize ; /* size (in bytes) of Work */
|
||||
void *Work ; /* workspace */
|
||||
void *Xwork ; /* alias into Numeric->Work */
|
||||
int *Iwork ; /* alias into Numeric->Work */
|
||||
|
||||
/* off-diagonal entries in a conventional compressed-column sparse matrix */
|
||||
int *Offp ; /* size n+1, column pointers */
|
||||
int *Offi ; /* size nzoff, row indices */
|
||||
void *Offx ; /* size nzoff, numerical values */
|
||||
int nzoff ;
|
||||
|
||||
} klu_numeric ;
|
||||
|
||||
typedef struct /* 64-bit version (otherwise same as above) */
|
||||
{
|
||||
UF_long n, nblocks, lnz, unz, max_lnz_block, max_unz_block, *Pnum, *Pinv,
|
||||
*Lip, *Uip, *Llen, *Ulen ;
|
||||
void **LUbx ;
|
||||
size_t *LUsize ;
|
||||
void *Udiag ;
|
||||
double *Rs ;
|
||||
size_t worksize ;
|
||||
void *Work, *Xwork ;
|
||||
UF_long *Iwork ;
|
||||
UF_long *Offp, *Offi ;
|
||||
void *Offx ;
|
||||
UF_long nzoff ;
|
||||
|
||||
} klu_l_numeric ;
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
/* KLU control parameters and statistics */
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
/* Common->status values */
|
||||
#define KLU_OK 0
|
||||
#define KLU_SINGULAR (1) /* status > 0 is a warning, not an error */
|
||||
#define KLU_OUT_OF_MEMORY (-2)
|
||||
#define KLU_INVALID (-3)
|
||||
#define KLU_TOO_LARGE (-4) /* integer overflow has occured */
|
||||
|
||||
typedef struct klu_common_struct
|
||||
{
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* parameters */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
double tol ; /* pivot tolerance for diagonal preference */
|
||||
double memgrow ; /* realloc memory growth size for LU factors */
|
||||
double initmem_amd ; /* init. memory size with AMD: c*nnz(L) + n */
|
||||
double initmem ; /* init. memory size: c*nnz(A) + n */
|
||||
double maxwork ; /* maxwork for BTF, <= 0 if no limit */
|
||||
|
||||
int btf ; /* use BTF pre-ordering, or not */
|
||||
int ordering ; /* 0: AMD, 1: COLAMD, 2: user P and Q,
|
||||
* 3: user function */
|
||||
int scale ; /* row scaling: -1: none (and no error check),
|
||||
* 0: none, 1: sum, 2: max */
|
||||
|
||||
/* memory management routines */
|
||||
void *(*malloc_memory) (size_t) ; /* pointer to malloc */
|
||||
void *(*realloc_memory) (void *, size_t) ; /* pointer to realloc */
|
||||
void (*free_memory) (void *) ; /* pointer to free */
|
||||
void *(*calloc_memory) (size_t, size_t) ; /* pointer to calloc */
|
||||
|
||||
/* pointer to user ordering function */
|
||||
int (*user_order) (int, int *, int *, int *, struct klu_common_struct *) ;
|
||||
|
||||
/* pointer to user data, passed unchanged as the last parameter to the
|
||||
* user ordering function (optional, the user function need not use this
|
||||
* information). */
|
||||
void *user_data ;
|
||||
|
||||
int halt_if_singular ; /* how to handle a singular matrix:
|
||||
* FALSE: keep going. Return a Numeric object with a zero U(k,k). A
|
||||
* divide-by-zero may occur when computing L(:,k). The Numeric object
|
||||
* can be passed to klu_solve (a divide-by-zero will occur). It can
|
||||
* also be safely passed to klu_refactor.
|
||||
* TRUE: stop quickly. klu_factor will free the partially-constructed
|
||||
* Numeric object. klu_refactor will not free it, but will leave the
|
||||
* numerical values only partially defined. This is the default. */
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* statistics */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
int status ; /* KLU_OK if OK, < 0 if error */
|
||||
int nrealloc ; /* # of reallocations of L and U */
|
||||
|
||||
int structural_rank ; /* 0 to n-1 if the matrix is structurally rank
|
||||
* deficient (as determined by maxtrans). -1 if not computed. n if the
|
||||
* matrix has full structural rank. This is computed by klu_analyze
|
||||
* if a BTF preordering is requested. */
|
||||
|
||||
int numerical_rank ; /* First k for which a zero U(k,k) was found,
|
||||
* if the matrix was singular (in the range 0 to n-1). n if the matrix
|
||||
* has full rank. This is not a true rank-estimation. It just reports
|
||||
* where the first zero pivot was found. -1 if not computed.
|
||||
* Computed by klu_factor and klu_refactor. */
|
||||
|
||||
int singular_col ; /* n if the matrix is not singular. If in the
|
||||
* range 0 to n-1, this is the column index of the original matrix A that
|
||||
* corresponds to the column of U that contains a zero diagonal entry.
|
||||
* -1 if not computed. Computed by klu_factor and klu_refactor. */
|
||||
|
||||
int noffdiag ; /* # of off-diagonal pivots, -1 if not computed */
|
||||
|
||||
double flops ; /* actual factorization flop count, from klu_flops */
|
||||
double rcond ; /* crude reciprocal condition est., from klu_rcond */
|
||||
double condest ; /* accurate condition est., from klu_condest */
|
||||
double rgrowth ; /* reciprocal pivot rgrowth, from klu_rgrowth */
|
||||
double work ; /* actual work done in BTF, in klu_analyze */
|
||||
|
||||
size_t memusage ; /* current memory usage, in bytes */
|
||||
size_t mempeak ; /* peak memory usage, in bytes */
|
||||
|
||||
} klu_common ;
|
||||
|
||||
typedef struct klu_l_common_struct /* 64-bit version (otherwise same as above)*/
|
||||
{
|
||||
|
||||
double tol, memgrow, initmem_amd, initmem, maxwork ;
|
||||
UF_long btf, ordering, scale ;
|
||||
void *(*malloc_memory) (size_t) ;
|
||||
void *(*realloc_memory) (void *, size_t) ;
|
||||
void (*free_memory) (void *) ;
|
||||
void *(*calloc_memory) (size_t, size_t) ;
|
||||
UF_long (*user_order) (UF_long, UF_long *, UF_long *, UF_long *,
|
||||
struct klu_l_common_struct *) ;
|
||||
void *user_data ;
|
||||
UF_long halt_if_singular ;
|
||||
UF_long status, nrealloc, structural_rank, numerical_rank, singular_col,
|
||||
noffdiag ;
|
||||
double flops, rcond, condest, rgrowth, work ;
|
||||
size_t memusage, mempeak ;
|
||||
|
||||
} klu_l_common ;
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
/* klu_defaults: sets default control parameters */
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
int klu_defaults
|
||||
(
|
||||
klu_common *Common
|
||||
) ;
|
||||
|
||||
UF_long klu_l_defaults (klu_l_common *Common) ;
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
/* klu_analyze: orders and analyzes a matrix */
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
/* Order the matrix with BTF (or not), then order each block with AMD, COLAMD,
|
||||
* a natural ordering, or with a user-provided ordering function */
|
||||
|
||||
klu_symbolic *klu_analyze
|
||||
(
|
||||
/* inputs, not modified */
|
||||
int n, /* A is n-by-n */
|
||||
int Ap [ ], /* size n+1, column pointers */
|
||||
int Ai [ ], /* size nz, row indices */
|
||||
klu_common *Common
|
||||
) ;
|
||||
|
||||
klu_l_symbolic *klu_l_analyze (UF_long, UF_long *, UF_long *,
|
||||
klu_l_common *Common) ;
|
||||
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
/* klu_analyze_given: analyzes a matrix using given P and Q */
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
/* Order the matrix with BTF (or not), then use natural or given ordering
|
||||
* P and Q on the blocks. P and Q are interpretted as identity
|
||||
* if NULL. */
|
||||
|
||||
klu_symbolic *klu_analyze_given
|
||||
(
|
||||
/* inputs, not modified */
|
||||
int n, /* A is n-by-n */
|
||||
int Ap [ ], /* size n+1, column pointers */
|
||||
int Ai [ ], /* size nz, row indices */
|
||||
int P [ ], /* size n, user's row permutation (may be NULL) */
|
||||
int Q [ ], /* size n, user's column permutation (may be NULL) */
|
||||
klu_common *Common
|
||||
) ;
|
||||
|
||||
klu_l_symbolic *klu_l_analyze_given (UF_long, UF_long *, UF_long *, UF_long *,
|
||||
UF_long *, klu_l_common *) ;
|
||||
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
/* klu_factor: factors a matrix using the klu_analyze results */
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
klu_numeric *klu_factor /* returns KLU_OK if OK, < 0 if error */
|
||||
(
|
||||
/* inputs, not modified */
|
||||
int Ap [ ], /* size n+1, column pointers */
|
||||
int Ai [ ], /* size nz, row indices */
|
||||
double Ax [ ], /* size nz, numerical values */
|
||||
klu_symbolic *Symbolic,
|
||||
klu_common *Common
|
||||
) ;
|
||||
|
||||
klu_numeric *klu_z_factor /* returns KLU_OK if OK, < 0 if error */
|
||||
(
|
||||
/* inputs, not modified */
|
||||
int Ap [ ], /* size n+1, column pointers */
|
||||
int Ai [ ], /* size nz, row indices */
|
||||
double Ax [ ], /* size 2*nz, numerical values (real,imag pairs) */
|
||||
klu_symbolic *Symbolic,
|
||||
klu_common *Common
|
||||
) ;
|
||||
|
||||
/* long / real version */
|
||||
klu_l_numeric *klu_l_factor (UF_long *, UF_long *, double *, klu_l_symbolic *,
|
||||
klu_l_common *) ;
|
||||
|
||||
/* long / complex version */
|
||||
klu_l_numeric *klu_zl_factor (UF_long *, UF_long *, double *, klu_l_symbolic *,
|
||||
klu_l_common *) ;
|
||||
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
/* klu_solve: solves Ax=b using the Symbolic and Numeric objects */
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
int klu_solve
|
||||
(
|
||||
/* inputs, not modified */
|
||||
klu_symbolic *Symbolic,
|
||||
klu_numeric *Numeric,
|
||||
int ldim, /* leading dimension of B */
|
||||
int nrhs, /* number of right-hand-sides */
|
||||
|
||||
/* right-hand-side on input, overwritten with solution to Ax=b on output */
|
||||
double B [ ], /* size ldim*nrhs */
|
||||
klu_common *Common
|
||||
) ;
|
||||
|
||||
int klu_z_solve
|
||||
(
|
||||
/* inputs, not modified */
|
||||
klu_symbolic *Symbolic,
|
||||
klu_numeric *Numeric,
|
||||
int ldim, /* leading dimension of B */
|
||||
int nrhs, /* number of right-hand-sides */
|
||||
|
||||
/* right-hand-side on input, overwritten with solution to Ax=b on output */
|
||||
double B [ ], /* size 2*ldim*nrhs */
|
||||
klu_common *Common
|
||||
) ;
|
||||
|
||||
UF_long klu_l_solve (klu_l_symbolic *, klu_l_numeric *, UF_long, UF_long,
|
||||
double *, klu_l_common *) ;
|
||||
|
||||
UF_long klu_zl_solve (klu_l_symbolic *, klu_l_numeric *, UF_long, UF_long,
|
||||
double *, klu_l_common *) ;
|
||||
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
/* klu_tsolve: solves A'x=b using the Symbolic and Numeric objects */
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
int klu_tsolve
|
||||
(
|
||||
/* inputs, not modified */
|
||||
klu_symbolic *Symbolic,
|
||||
klu_numeric *Numeric,
|
||||
int ldim, /* leading dimension of B */
|
||||
int nrhs, /* number of right-hand-sides */
|
||||
|
||||
/* right-hand-side on input, overwritten with solution to Ax=b on output */
|
||||
double B [ ], /* size ldim*nrhs */
|
||||
klu_common *Common
|
||||
) ;
|
||||
|
||||
int klu_z_tsolve
|
||||
(
|
||||
/* inputs, not modified */
|
||||
klu_symbolic *Symbolic,
|
||||
klu_numeric *Numeric,
|
||||
int ldim, /* leading dimension of B */
|
||||
int nrhs, /* number of right-hand-sides */
|
||||
|
||||
/* right-hand-side on input, overwritten with solution to Ax=b on output */
|
||||
double B [ ], /* size 2*ldim*nrhs */
|
||||
int conj_solve, /* TRUE: conjugate solve, FALSE: solve A.'x=b */
|
||||
klu_common *Common
|
||||
|
||||
) ;
|
||||
|
||||
UF_long klu_l_tsolve (klu_l_symbolic *, klu_l_numeric *, UF_long, UF_long,
|
||||
double *, klu_l_common *) ;
|
||||
|
||||
UF_long klu_zl_tsolve (klu_l_symbolic *, klu_l_numeric *, UF_long, UF_long,
|
||||
double *, UF_long, klu_l_common * ) ;
|
||||
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
/* klu_refactor: refactorizes matrix with same ordering as klu_factor */
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
int klu_refactor /* return TRUE if successful, FALSE otherwise */
|
||||
(
|
||||
/* inputs, not modified */
|
||||
int Ap [ ], /* size n+1, column pointers */
|
||||
int Ai [ ], /* size nz, row indices */
|
||||
double Ax [ ], /* size nz, numerical values */
|
||||
klu_symbolic *Symbolic,
|
||||
/* input, and numerical values modified on output */
|
||||
klu_numeric *Numeric,
|
||||
klu_common *Common
|
||||
) ;
|
||||
|
||||
int klu_z_refactor /* return TRUE if successful, FALSE otherwise */
|
||||
(
|
||||
/* inputs, not modified */
|
||||
int Ap [ ], /* size n+1, column pointers */
|
||||
int Ai [ ], /* size nz, row indices */
|
||||
double Ax [ ], /* size 2*nz, numerical values */
|
||||
klu_symbolic *Symbolic,
|
||||
/* input, and numerical values modified on output */
|
||||
klu_numeric *Numeric,
|
||||
klu_common *Common
|
||||
) ;
|
||||
|
||||
UF_long klu_l_refactor (UF_long *, UF_long *, double *, klu_l_symbolic *,
|
||||
klu_l_numeric *, klu_l_common *) ;
|
||||
|
||||
UF_long klu_zl_refactor (UF_long *, UF_long *, double *, klu_l_symbolic *,
|
||||
klu_l_numeric *, klu_l_common *) ;
|
||||
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
/* klu_free_symbolic: destroys the Symbolic object */
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
int klu_free_symbolic
|
||||
(
|
||||
klu_symbolic **Symbolic,
|
||||
klu_common *Common
|
||||
) ;
|
||||
|
||||
UF_long klu_l_free_symbolic (klu_l_symbolic **, klu_l_common *) ;
|
||||
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
/* klu_free_numeric: destroys the Numeric object */
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
/* Note that klu_free_numeric and klu_z_free_numeric are identical; each can
|
||||
* free both kinds of Numeric objects (real and complex) */
|
||||
|
||||
int klu_free_numeric
|
||||
(
|
||||
klu_numeric **Numeric,
|
||||
klu_common *Common
|
||||
) ;
|
||||
|
||||
int klu_z_free_numeric
|
||||
(
|
||||
klu_numeric **Numeric,
|
||||
klu_common *Common
|
||||
) ;
|
||||
|
||||
UF_long klu_l_free_numeric (klu_l_numeric **, klu_l_common *) ;
|
||||
UF_long klu_zl_free_numeric (klu_l_numeric **, klu_l_common *) ;
|
||||
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
/* klu_sort: sorts the columns of the LU factorization */
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
/* this is not needed except for the MATLAB interface */
|
||||
|
||||
int klu_sort
|
||||
(
|
||||
/* inputs, not modified */
|
||||
klu_symbolic *Symbolic,
|
||||
/* input/output */
|
||||
klu_numeric *Numeric,
|
||||
klu_common *Common
|
||||
) ;
|
||||
|
||||
int klu_z_sort
|
||||
(
|
||||
/* inputs, not modified */
|
||||
klu_symbolic *Symbolic,
|
||||
/* input/output */
|
||||
klu_numeric *Numeric,
|
||||
klu_common *Common
|
||||
) ;
|
||||
|
||||
UF_long klu_l_sort (klu_l_symbolic *, klu_l_numeric *, klu_l_common *) ;
|
||||
UF_long klu_zl_sort (klu_l_symbolic *, klu_l_numeric *, klu_l_common *) ;
|
||||
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
/* klu_flops: determines # of flops performed in numeric factorzation */
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
int klu_flops
|
||||
(
|
||||
/* inputs, not modified */
|
||||
klu_symbolic *Symbolic,
|
||||
klu_numeric *Numeric,
|
||||
/* input/output */
|
||||
klu_common *Common
|
||||
) ;
|
||||
|
||||
int klu_z_flops
|
||||
(
|
||||
/* inputs, not modified */
|
||||
klu_symbolic *Symbolic,
|
||||
klu_numeric *Numeric,
|
||||
/* input/output */
|
||||
klu_common *Common
|
||||
) ;
|
||||
|
||||
UF_long klu_l_flops (klu_l_symbolic *, klu_l_numeric *, klu_l_common *) ;
|
||||
UF_long klu_zl_flops (klu_l_symbolic *, klu_l_numeric *, klu_l_common *) ;
|
||||
|
||||
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
/* klu_rgrowth : compute the reciprocal pivot growth */
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
/* Pivot growth is computed after the input matrix is permuted, scaled, and
|
||||
* off-diagonal entries pruned. This is because the LU factorization of each
|
||||
* block takes as input the scaled diagonal blocks of the BTF form. The
|
||||
* reciprocal pivot growth in column j of an LU factorization of a matrix C
|
||||
* is the largest entry in C divided by the largest entry in U; then the overall
|
||||
* reciprocal pivot growth is the smallest such value for all columns j. Note
|
||||
* that the off-diagonal entries are not scaled, since they do not take part in
|
||||
* the LU factorization of the diagonal blocks.
|
||||
*
|
||||
* In MATLAB notation:
|
||||
*
|
||||
* rgrowth = min (max (abs ((R \ A(p,q)) - F)) ./ max (abs (U))) */
|
||||
|
||||
int klu_rgrowth
|
||||
(
|
||||
int Ap [ ],
|
||||
int Ai [ ],
|
||||
double Ax [ ],
|
||||
klu_symbolic *Symbolic,
|
||||
klu_numeric *Numeric,
|
||||
klu_common *Common /* Common->rgrowth = reciprocal pivot growth */
|
||||
) ;
|
||||
|
||||
int klu_z_rgrowth
|
||||
(
|
||||
int Ap [ ],
|
||||
int Ai [ ],
|
||||
double Ax [ ],
|
||||
klu_symbolic *Symbolic,
|
||||
klu_numeric *Numeric,
|
||||
klu_common *Common /* Common->rgrowth = reciprocal pivot growth */
|
||||
) ;
|
||||
|
||||
UF_long klu_l_rgrowth (UF_long *, UF_long *, double *, klu_l_symbolic *,
|
||||
klu_l_numeric *, klu_l_common *) ;
|
||||
|
||||
UF_long klu_zl_rgrowth (UF_long *, UF_long *, double *, klu_l_symbolic *,
|
||||
klu_l_numeric *, klu_l_common *) ;
|
||||
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
/* klu_condest */
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
/* Computes a reasonably accurate estimate of the 1-norm condition number, using
|
||||
* Hager's method, as modified by Higham and Tisseur (same method as used in
|
||||
* MATLAB's condest */
|
||||
|
||||
int klu_condest
|
||||
(
|
||||
int Ap [ ], /* size n+1, column pointers, not modified */
|
||||
double Ax [ ], /* size nz = Ap[n], numerical values, not modified*/
|
||||
klu_symbolic *Symbolic, /* symbolic analysis, not modified */
|
||||
klu_numeric *Numeric, /* numeric factorization, not modified */
|
||||
klu_common *Common /* result returned in Common->condest */
|
||||
) ;
|
||||
|
||||
int klu_z_condest
|
||||
(
|
||||
int Ap [ ],
|
||||
double Ax [ ], /* size 2*nz */
|
||||
klu_symbolic *Symbolic,
|
||||
klu_numeric *Numeric,
|
||||
klu_common *Common /* result returned in Common->condest */
|
||||
) ;
|
||||
|
||||
UF_long klu_l_condest (UF_long *, double *, klu_l_symbolic *, klu_l_numeric *,
|
||||
klu_l_common *) ;
|
||||
|
||||
UF_long klu_zl_condest (UF_long *, double *, klu_l_symbolic *, klu_l_numeric *,
|
||||
klu_l_common *) ;
|
||||
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
/* klu_rcond: compute min(abs(diag(U))) / max(abs(diag(U))) */
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
int klu_rcond
|
||||
(
|
||||
klu_symbolic *Symbolic, /* input, not modified */
|
||||
klu_numeric *Numeric, /* input, not modified */
|
||||
klu_common *Common /* result in Common->rcond */
|
||||
) ;
|
||||
|
||||
int klu_z_rcond
|
||||
(
|
||||
klu_symbolic *Symbolic, /* input, not modified */
|
||||
klu_numeric *Numeric, /* input, not modified */
|
||||
klu_common *Common /* result in Common->rcond */
|
||||
) ;
|
||||
|
||||
UF_long klu_l_rcond (klu_l_symbolic *, klu_l_numeric *, klu_l_common *) ;
|
||||
|
||||
UF_long klu_zl_rcond (klu_l_symbolic *, klu_l_numeric *, klu_l_common *) ;
|
||||
|
||||
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
/* klu_scale */
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
int klu_scale /* return TRUE if successful, FALSE otherwise */
|
||||
(
|
||||
/* inputs, not modified */
|
||||
int scale, /* <0: none, no error check; 0: none, 1: sum, 2: max */
|
||||
int n,
|
||||
int Ap [ ], /* size n+1, column pointers */
|
||||
int Ai [ ], /* size nz, row indices */
|
||||
double Ax [ ],
|
||||
/* outputs, not defined on input */
|
||||
double Rs [ ],
|
||||
/* workspace, not defined on input or output */
|
||||
int W [ ], /* size n, can be NULL */
|
||||
klu_common *Common
|
||||
) ;
|
||||
|
||||
int klu_z_scale /* return TRUE if successful, FALSE otherwise */
|
||||
(
|
||||
/* inputs, not modified */
|
||||
int scale, /* <0: none, no error check; 0: none, 1: sum, 2: max */
|
||||
int n,
|
||||
int Ap [ ], /* size n+1, column pointers */
|
||||
int Ai [ ], /* size nz, row indices */
|
||||
double Ax [ ],
|
||||
/* outputs, not defined on input */
|
||||
double Rs [ ],
|
||||
/* workspace, not defined on input or output */
|
||||
int W [ ], /* size n, can be NULL */
|
||||
klu_common *Common
|
||||
) ;
|
||||
|
||||
UF_long klu_l_scale (UF_long, UF_long, UF_long *, UF_long *, double *,
|
||||
double *, UF_long *, klu_l_common *) ;
|
||||
|
||||
UF_long klu_zl_scale (UF_long, UF_long, UF_long *, UF_long *, double *,
|
||||
double *, UF_long *, klu_l_common *) ;
|
||||
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
/* klu_extract */
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
int klu_extract /* returns TRUE if successful, FALSE otherwise */
|
||||
(
|
||||
/* inputs: */
|
||||
klu_numeric *Numeric,
|
||||
klu_symbolic *Symbolic,
|
||||
|
||||
/* outputs, either allocated on input, or ignored otherwise */
|
||||
|
||||
/* L */
|
||||
int *Lp, /* size n+1 */
|
||||
int *Li, /* size Numeric->lnz */
|
||||
double *Lx, /* size Numeric->lnz */
|
||||
|
||||
/* U */
|
||||
int *Up, /* size n+1 */
|
||||
int *Ui, /* size Numeric->unz */
|
||||
double *Ux, /* size Numeric->unz */
|
||||
|
||||
/* F */
|
||||
int *Fp, /* size n+1 */
|
||||
int *Fi, /* size Numeric->nzoff */
|
||||
double *Fx, /* size Numeric->nzoff */
|
||||
|
||||
/* P, row permutation */
|
||||
int *P, /* size n */
|
||||
|
||||
/* Q, column permutation */
|
||||
int *Q, /* size n */
|
||||
|
||||
/* Rs, scale factors */
|
||||
double *Rs, /* size n */
|
||||
|
||||
/* R, block boundaries */
|
||||
int *R, /* size Symbolic->nblocks+1 (nblocks is at most n) */
|
||||
|
||||
klu_common *Common
|
||||
) ;
|
||||
|
||||
|
||||
int klu_z_extract /* returns TRUE if successful, FALSE otherwise */
|
||||
(
|
||||
/* inputs: */
|
||||
klu_numeric *Numeric,
|
||||
klu_symbolic *Symbolic,
|
||||
|
||||
/* outputs, all of which must be allocated on input */
|
||||
|
||||
/* L */
|
||||
int *Lp, /* size n+1 */
|
||||
int *Li, /* size nnz(L) */
|
||||
double *Lx, /* size nnz(L) */
|
||||
double *Lz, /* size nnz(L) for the complex case, ignored if real */
|
||||
|
||||
/* U */
|
||||
int *Up, /* size n+1 */
|
||||
int *Ui, /* size nnz(U) */
|
||||
double *Ux, /* size nnz(U) */
|
||||
double *Uz, /* size nnz(U) for the complex case, ignored if real */
|
||||
|
||||
/* F */
|
||||
int *Fp, /* size n+1 */
|
||||
int *Fi, /* size nnz(F) */
|
||||
double *Fx, /* size nnz(F) */
|
||||
double *Fz, /* size nnz(F) for the complex case, ignored if real */
|
||||
|
||||
/* P, row permutation */
|
||||
int *P, /* size n */
|
||||
|
||||
/* Q, column permutation */
|
||||
int *Q, /* size n */
|
||||
|
||||
/* Rs, scale factors */
|
||||
double *Rs, /* size n */
|
||||
|
||||
/* R, block boundaries */
|
||||
int *R, /* size Symbolic->nblocks+1 (nblocks is at most n) */
|
||||
|
||||
klu_common *Common
|
||||
) ;
|
||||
|
||||
UF_long klu_l_extract (klu_l_numeric *, klu_l_symbolic *,
|
||||
UF_long *, UF_long *, double *,
|
||||
UF_long *, UF_long *, double *,
|
||||
UF_long *, UF_long *, double *,
|
||||
UF_long *, UF_long *, double *, UF_long *, klu_l_common *) ;
|
||||
|
||||
UF_long klu_zl_extract (klu_l_numeric *, klu_l_symbolic *,
|
||||
UF_long *, UF_long *, double *, double *,
|
||||
UF_long *, UF_long *, double *, double *,
|
||||
UF_long *, UF_long *, double *, double *,
|
||||
UF_long *, UF_long *, double *, UF_long *, klu_l_common *) ;
|
||||
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
/* KLU memory management routines */
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
void *klu_malloc /* returns pointer to the newly malloc'd block */
|
||||
(
|
||||
/* ---- input ---- */
|
||||
size_t n, /* number of items */
|
||||
size_t size, /* size of each item */
|
||||
/* --------------- */
|
||||
klu_common *Common
|
||||
) ;
|
||||
|
||||
void *klu_free /* always returns NULL */
|
||||
(
|
||||
/* ---- in/out --- */
|
||||
void *p, /* block of memory to free */
|
||||
size_t n, /* number of items */
|
||||
size_t size, /* size of each item */
|
||||
/* --------------- */
|
||||
klu_common *Common
|
||||
) ;
|
||||
|
||||
void *klu_realloc /* returns pointer to reallocated block */
|
||||
(
|
||||
/* ---- input ---- */
|
||||
size_t nnew, /* requested # of items in reallocated block */
|
||||
size_t nold, /* current size of block, in # of items */
|
||||
size_t size, /* size of each item */
|
||||
/* ---- in/out --- */
|
||||
void *p, /* block of memory to realloc */
|
||||
/* --------------- */
|
||||
klu_common *Common
|
||||
) ;
|
||||
|
||||
void *klu_l_malloc (size_t, size_t, klu_l_common *) ;
|
||||
void *klu_l_free (void *, size_t, size_t, klu_l_common *) ;
|
||||
void *klu_l_realloc (size_t, size_t, size_t, void *, klu_l_common *) ;
|
||||
|
||||
|
||||
/* ========================================================================== */
|
||||
/* === KLU version ========================================================== */
|
||||
/* ========================================================================== */
|
||||
|
||||
/* All versions of KLU include these definitions.
|
||||
* As an example, to test if the version you are using is 1.2 or later:
|
||||
*
|
||||
* if (KLU_VERSION >= KLU_VERSION_CODE (1,2)) ...
|
||||
*
|
||||
* This also works during compile-time:
|
||||
*
|
||||
* #if (KLU >= KLU_VERSION_CODE (1,2))
|
||||
* printf ("This is version 1.2 or later\n") ;
|
||||
* #else
|
||||
* printf ("This is an early version\n") ;
|
||||
* #endif
|
||||
*/
|
||||
|
||||
#define KLU_DATE "Dec 7, 2011"
|
||||
#define KLU_VERSION_CODE(main,sub) ((main) * 1000 + (sub))
|
||||
#define KLU_MAIN_VERSION 1
|
||||
#define KLU_SUB_VERSION 1
|
||||
#define KLU_SUBSUB_VERSION 3
|
||||
#define KLU_VERSION KLU_VERSION_CODE(KLU_MAIN_VERSION,KLU_SUB_VERSION)
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
|
@ -0,0 +1,488 @@
|
|||
/* ========================================================================== */
|
||||
/* === klu_analyze ========================================================== */
|
||||
/* ========================================================================== */
|
||||
|
||||
/* Order the matrix using BTF (or not), and then AMD, COLAMD, the natural
|
||||
* ordering, or the user-provided-function on the blocks. Does not support
|
||||
* using a given ordering (use klu_analyze_given for that case). */
|
||||
|
||||
#include "klu_internal.h"
|
||||
|
||||
/* ========================================================================== */
|
||||
/* === analyze_worker ======================================================= */
|
||||
/* ========================================================================== */
|
||||
|
||||
static Int analyze_worker /* returns KLU_OK or < 0 if error */
|
||||
(
|
||||
/* inputs, not modified */
|
||||
Int n, /* A is n-by-n */
|
||||
Int Ap [ ], /* size n+1, column pointers */
|
||||
Int Ai [ ], /* size nz, row indices */
|
||||
Int nblocks, /* # of blocks */
|
||||
Int Pbtf [ ], /* BTF row permutation */
|
||||
Int Qbtf [ ], /* BTF col permutation */
|
||||
Int R [ ], /* size n+1, but only Rbtf [0..nblocks] is used */
|
||||
Int ordering, /* what ordering to use (0, 1, or 3 for this routine) */
|
||||
|
||||
/* output only, not defined on input */
|
||||
Int P [ ], /* size n */
|
||||
Int Q [ ], /* size n */
|
||||
double Lnz [ ], /* size n, but only Lnz [0..nblocks-1] is used */
|
||||
|
||||
/* workspace, not defined on input or output */
|
||||
Int Pblk [ ], /* size maxblock */
|
||||
Int Cp [ ], /* size maxblock+1 */
|
||||
Int Ci [ ], /* size MAX (nz+1, Cilen) */
|
||||
Int Cilen, /* nz+1, or COLAMD_recommend(nz,n,n) for COLAMD */
|
||||
Int Pinv [ ], /* size maxblock */
|
||||
|
||||
/* input/output */
|
||||
KLU_symbolic *Symbolic,
|
||||
KLU_common *Common
|
||||
)
|
||||
{
|
||||
double amd_Info [AMD_INFO], lnz, lnz1, flops, flops1 ;
|
||||
Int k1, k2, nk, k, block, oldcol, pend, newcol, result, pc, p, newrow,
|
||||
maxnz, nzoff, cstats [COLAMD_STATS], ok, err = KLU_INVALID ;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* initializations */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
/* compute the inverse of Pbtf */
|
||||
#ifndef NDEBUG
|
||||
for (k = 0 ; k < n ; k++)
|
||||
{
|
||||
P [k] = EMPTY ;
|
||||
Q [k] = EMPTY ;
|
||||
Pinv [k] = EMPTY ;
|
||||
}
|
||||
#endif
|
||||
for (k = 0 ; k < n ; k++)
|
||||
{
|
||||
ASSERT (Pbtf [k] >= 0 && Pbtf [k] < n) ;
|
||||
Pinv [Pbtf [k]] = k ;
|
||||
}
|
||||
#ifndef NDEBUG
|
||||
for (k = 0 ; k < n ; k++) ASSERT (Pinv [k] != EMPTY) ;
|
||||
#endif
|
||||
nzoff = 0 ;
|
||||
lnz = 0 ;
|
||||
maxnz = 0 ;
|
||||
flops = 0 ;
|
||||
Symbolic->symmetry = EMPTY ; /* only computed by AMD */
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* order each block */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
for (block = 0 ; block < nblocks ; block++)
|
||||
{
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* the block is from rows/columns k1 to k2-1 */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
k1 = R [block] ;
|
||||
k2 = R [block+1] ;
|
||||
nk = k2 - k1 ;
|
||||
PRINTF (("BLOCK %d, k1 %d k2-1 %d nk %d\n", block, k1, k2-1, nk)) ;
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* construct the kth block, C */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
Lnz [block] = EMPTY ;
|
||||
pc = 0 ;
|
||||
for (k = k1 ; k < k2 ; k++)
|
||||
{
|
||||
newcol = k-k1 ;
|
||||
Cp [newcol] = pc ;
|
||||
oldcol = Qbtf [k] ;
|
||||
pend = Ap [oldcol+1] ;
|
||||
for (p = Ap [oldcol] ; p < pend ; p++)
|
||||
{
|
||||
newrow = Pinv [Ai [p]] ;
|
||||
if (newrow < k1)
|
||||
{
|
||||
nzoff++ ;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* (newrow,newcol) is an entry in the block */
|
||||
ASSERT (newrow < k2) ;
|
||||
newrow -= k1 ;
|
||||
Ci [pc++] = newrow ;
|
||||
}
|
||||
}
|
||||
}
|
||||
Cp [nk] = pc ;
|
||||
maxnz = MAX (maxnz, pc) ;
|
||||
ASSERT (KLU_valid (nk, Cp, Ci, NULL)) ;
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* order the block C */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
if (nk <= 3)
|
||||
{
|
||||
|
||||
/* -------------------------------------------------------------- */
|
||||
/* use natural ordering for tiny blocks (3-by-3 or less) */
|
||||
/* -------------------------------------------------------------- */
|
||||
|
||||
for (k = 0 ; k < nk ; k++)
|
||||
{
|
||||
Pblk [k] = k ;
|
||||
}
|
||||
lnz1 = nk * (nk + 1) / 2 ;
|
||||
flops1 = nk * (nk - 1) / 2 + (nk-1)*nk*(2*nk-1) / 6 ;
|
||||
ok = TRUE ;
|
||||
|
||||
}
|
||||
else if (ordering == 0)
|
||||
{
|
||||
|
||||
/* -------------------------------------------------------------- */
|
||||
/* order the block with AMD (C+C') */
|
||||
/* -------------------------------------------------------------- */
|
||||
|
||||
result = AMD_order (nk, Cp, Ci, Pblk, NULL, amd_Info) ;
|
||||
ok = (result >= AMD_OK) ;
|
||||
if (result == AMD_OUT_OF_MEMORY)
|
||||
{
|
||||
err = KLU_OUT_OF_MEMORY ;
|
||||
}
|
||||
|
||||
/* account for memory usage in AMD */
|
||||
Common->mempeak = MAX (Common->mempeak,
|
||||
Common->memusage + amd_Info [AMD_MEMORY]) ;
|
||||
|
||||
/* get the ordering statistics from AMD */
|
||||
lnz1 = (Int) (amd_Info [AMD_LNZ]) + nk ;
|
||||
flops1 = 2 * amd_Info [AMD_NMULTSUBS_LU] + amd_Info [AMD_NDIV] ;
|
||||
if (pc == maxnz)
|
||||
{
|
||||
/* get the symmetry of the biggest block */
|
||||
Symbolic->symmetry = amd_Info [AMD_SYMMETRY] ;
|
||||
}
|
||||
|
||||
}
|
||||
else if (ordering == 1)
|
||||
{
|
||||
|
||||
/* -------------------------------------------------------------- */
|
||||
/* order the block with COLAMD (C) */
|
||||
/* -------------------------------------------------------------- */
|
||||
|
||||
/* order (and destroy) Ci, returning column permutation in Cp.
|
||||
* COLAMD "cannot" fail since the matrix has already been checked,
|
||||
* and Ci allocated. */
|
||||
|
||||
ok = COLAMD (nk, nk, Cilen, Ci, Cp, NULL, cstats) ;
|
||||
lnz1 = EMPTY ;
|
||||
flops1 = EMPTY ;
|
||||
|
||||
/* copy the permutation from Cp to Pblk */
|
||||
for (k = 0 ; k < nk ; k++)
|
||||
{
|
||||
Pblk [k] = Cp [k] ;
|
||||
}
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
/* -------------------------------------------------------------- */
|
||||
/* pass the block to the user-provided ordering function */
|
||||
/* -------------------------------------------------------------- */
|
||||
|
||||
lnz1 = (Common->user_order) (nk, Cp, Ci, Pblk, Common) ;
|
||||
flops1 = EMPTY ;
|
||||
ok = (lnz1 != 0) ;
|
||||
}
|
||||
|
||||
if (!ok)
|
||||
{
|
||||
return (err) ; /* ordering method failed */
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* keep track of nnz(L) and flops statistics */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
Lnz [block] = lnz1 ;
|
||||
lnz = (lnz == EMPTY || lnz1 == EMPTY) ? EMPTY : (lnz + lnz1) ;
|
||||
flops = (flops == EMPTY || flops1 == EMPTY) ? EMPTY : (flops + flops1) ;
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* combine the preordering with the BTF ordering */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
PRINTF (("Pblk, 1-based:\n")) ;
|
||||
for (k = 0 ; k < nk ; k++)
|
||||
{
|
||||
ASSERT (k + k1 < n) ;
|
||||
ASSERT (Pblk [k] + k1 < n) ;
|
||||
Q [k + k1] = Qbtf [Pblk [k] + k1] ;
|
||||
}
|
||||
for (k = 0 ; k < nk ; k++)
|
||||
{
|
||||
ASSERT (k + k1 < n) ;
|
||||
ASSERT (Pblk [k] + k1 < n) ;
|
||||
P [k + k1] = Pbtf [Pblk [k] + k1] ;
|
||||
}
|
||||
}
|
||||
|
||||
PRINTF (("nzoff %d Ap[n] %d\n", nzoff, Ap [n])) ;
|
||||
ASSERT (nzoff >= 0 && nzoff <= Ap [n]) ;
|
||||
|
||||
/* return estimates of # of nonzeros in L including diagonal */
|
||||
Symbolic->lnz = lnz ; /* EMPTY if COLAMD used */
|
||||
Symbolic->unz = lnz ;
|
||||
Symbolic->nzoff = nzoff ;
|
||||
Symbolic->est_flops = flops ; /* EMPTY if COLAMD or user-ordering used */
|
||||
return (KLU_OK) ;
|
||||
}
|
||||
|
||||
|
||||
/* ========================================================================== */
|
||||
/* === order_and_analyze ==================================================== */
|
||||
/* ========================================================================== */
|
||||
|
||||
/* Orders the matrix with or with BTF, then orders each block with AMD, COLAMD,
|
||||
* or the user ordering function. Does not handle the natural or given
|
||||
* ordering cases. */
|
||||
|
||||
static KLU_symbolic *order_and_analyze /* returns NULL if error, or a valid
|
||||
KLU_symbolic object if successful */
|
||||
(
|
||||
/* inputs, not modified */
|
||||
Int n, /* A is n-by-n */
|
||||
Int Ap [ ], /* size n+1, column pointers */
|
||||
Int Ai [ ], /* size nz, row indices */
|
||||
/* --------------------- */
|
||||
KLU_common *Common
|
||||
)
|
||||
{
|
||||
double work ;
|
||||
KLU_symbolic *Symbolic ;
|
||||
double *Lnz ;
|
||||
Int *Qbtf, *Cp, *Ci, *Pinv, *Pblk, *Pbtf, *P, *Q, *R ;
|
||||
Int nblocks, nz, block, maxblock, k1, k2, nk, do_btf, ordering, k, Cilen,
|
||||
*Work ;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* allocate the Symbolic object, and check input matrix */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
Symbolic = KLU_alloc_symbolic (n, Ap, Ai, Common) ;
|
||||
if (Symbolic == NULL)
|
||||
{
|
||||
return (NULL) ;
|
||||
}
|
||||
P = Symbolic->P ;
|
||||
Q = Symbolic->Q ;
|
||||
R = Symbolic->R ;
|
||||
Lnz = Symbolic->Lnz ;
|
||||
nz = Symbolic->nz ;
|
||||
|
||||
ordering = Common->ordering ;
|
||||
if (ordering == 1)
|
||||
{
|
||||
/* COLAMD */
|
||||
Cilen = COLAMD_recommended (nz, n, n) ;
|
||||
}
|
||||
else if (ordering == 0 || (ordering == 3 && Common->user_order != NULL))
|
||||
{
|
||||
/* AMD or user ordering function */
|
||||
Cilen = nz+1 ;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* invalid ordering */
|
||||
Common->status = KLU_INVALID ;
|
||||
KLU_free_symbolic (&Symbolic, Common) ;
|
||||
return (NULL) ;
|
||||
}
|
||||
|
||||
/* AMD memory management routines */
|
||||
amd_malloc = Common->malloc_memory ;
|
||||
amd_free = Common->free_memory ;
|
||||
amd_calloc = Common->calloc_memory ;
|
||||
amd_realloc = Common->realloc_memory ;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* allocate workspace for BTF permutation */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
Pbtf = KLU_malloc (n, sizeof (Int), Common) ;
|
||||
Qbtf = KLU_malloc (n, sizeof (Int), Common) ;
|
||||
if (Common->status < KLU_OK)
|
||||
{
|
||||
KLU_free (Pbtf, n, sizeof (Int), Common) ;
|
||||
KLU_free (Qbtf, n, sizeof (Int), Common) ;
|
||||
KLU_free_symbolic (&Symbolic, Common) ;
|
||||
return (NULL) ;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* get the common parameters for BTF and ordering method */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
do_btf = Common->btf ;
|
||||
do_btf = (do_btf) ? TRUE : FALSE ;
|
||||
Symbolic->ordering = ordering ;
|
||||
Symbolic->do_btf = do_btf ;
|
||||
Symbolic->structural_rank = EMPTY ;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* find the block triangular form (if requested) */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
Common->work = 0 ;
|
||||
|
||||
if (do_btf)
|
||||
{
|
||||
Work = KLU_malloc (5*n, sizeof (Int), Common) ;
|
||||
if (Common->status < KLU_OK)
|
||||
{
|
||||
/* out of memory */
|
||||
KLU_free (Pbtf, n, sizeof (Int), Common) ;
|
||||
KLU_free (Qbtf, n, sizeof (Int), Common) ;
|
||||
KLU_free_symbolic (&Symbolic, Common) ;
|
||||
return (NULL) ;
|
||||
}
|
||||
|
||||
nblocks = BTF_order (n, Ap, Ai, Common->maxwork, &work, Pbtf, Qbtf, R,
|
||||
&(Symbolic->structural_rank), Work) ;
|
||||
Common->structural_rank = Symbolic->structural_rank ;
|
||||
Common->work += work ;
|
||||
|
||||
KLU_free (Work, 5*n, sizeof (Int), Common) ;
|
||||
|
||||
/* unflip Qbtf if the matrix does not have full structural rank */
|
||||
if (Symbolic->structural_rank < n)
|
||||
{
|
||||
for (k = 0 ; k < n ; k++)
|
||||
{
|
||||
Qbtf [k] = BTF_UNFLIP (Qbtf [k]) ;
|
||||
}
|
||||
}
|
||||
|
||||
/* find the size of the largest block */
|
||||
maxblock = 1 ;
|
||||
for (block = 0 ; block < nblocks ; block++)
|
||||
{
|
||||
k1 = R [block] ;
|
||||
k2 = R [block+1] ;
|
||||
nk = k2 - k1 ;
|
||||
PRINTF (("block %d size %d\n", block, nk)) ;
|
||||
maxblock = MAX (maxblock, nk) ;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* BTF not requested */
|
||||
nblocks = 1 ;
|
||||
maxblock = n ;
|
||||
R [0] = 0 ;
|
||||
R [1] = n ;
|
||||
for (k = 0 ; k < n ; k++)
|
||||
{
|
||||
Pbtf [k] = k ;
|
||||
Qbtf [k] = k ;
|
||||
}
|
||||
}
|
||||
|
||||
Symbolic->nblocks = nblocks ;
|
||||
|
||||
PRINTF (("maxblock size %d\n", maxblock)) ;
|
||||
Symbolic->maxblock = maxblock ;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* allocate more workspace, for analyze_worker */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
Pblk = KLU_malloc (maxblock, sizeof (Int), Common) ;
|
||||
Cp = KLU_malloc (maxblock + 1, sizeof (Int), Common) ;
|
||||
Ci = KLU_malloc (MAX (Cilen, nz+1), sizeof (Int), Common) ;
|
||||
Pinv = KLU_malloc (n, sizeof (Int), Common) ;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* order each block of the BTF ordering, and a fill-reducing ordering */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
if (Common->status == KLU_OK)
|
||||
{
|
||||
PRINTF (("calling analyze_worker\n")) ;
|
||||
Common->status = analyze_worker (n, Ap, Ai, nblocks, Pbtf, Qbtf, R,
|
||||
ordering, P, Q, Lnz, Pblk, Cp, Ci, Cilen, Pinv, Symbolic, Common) ;
|
||||
PRINTF (("analyze_worker done\n")) ;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* free all workspace */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
KLU_free (Pblk, maxblock, sizeof (Int), Common) ;
|
||||
KLU_free (Cp, maxblock+1, sizeof (Int), Common) ;
|
||||
KLU_free (Ci, MAX (Cilen, nz+1), sizeof (Int), Common) ;
|
||||
KLU_free (Pinv, n, sizeof (Int), Common) ;
|
||||
KLU_free (Pbtf, n, sizeof (Int), Common) ;
|
||||
KLU_free (Qbtf, n, sizeof (Int), Common) ;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* return the symbolic object */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
if (Common->status < KLU_OK)
|
||||
{
|
||||
KLU_free_symbolic (&Symbolic, Common) ;
|
||||
}
|
||||
return (Symbolic) ;
|
||||
}
|
||||
|
||||
|
||||
/* ========================================================================== */
|
||||
/* === KLU_analyze ========================================================== */
|
||||
/* ========================================================================== */
|
||||
|
||||
KLU_symbolic *KLU_analyze /* returns NULL if error, or a valid
|
||||
KLU_symbolic object if successful */
|
||||
(
|
||||
/* inputs, not modified */
|
||||
Int n, /* A is n-by-n */
|
||||
Int Ap [ ], /* size n+1, column pointers */
|
||||
Int Ai [ ], /* size nz, row indices */
|
||||
/* -------------------- */
|
||||
KLU_common *Common
|
||||
)
|
||||
{
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* get the control parameters for BTF and ordering method */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
if (Common == NULL)
|
||||
{
|
||||
return (NULL) ;
|
||||
}
|
||||
Common->status = KLU_OK ;
|
||||
Common->structural_rank = EMPTY ;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* order and analyze */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
if (Common->ordering == 2)
|
||||
{
|
||||
/* natural ordering */
|
||||
return (KLU_analyze_given (n, Ap, Ai, NULL, NULL, Common)) ;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* order with P and Q */
|
||||
return (order_and_analyze (n, Ap, Ai, Common)) ;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,369 @@
|
|||
/* ========================================================================== */
|
||||
/* === klu_analyze_given ==================================================== */
|
||||
/* ========================================================================== */
|
||||
|
||||
/* Given an input permutation P and Q, create the Symbolic object. BTF can
|
||||
* be done to modify the user's P and Q (does not perform the max transversal;
|
||||
* just finds the strongly-connected components). */
|
||||
|
||||
#include "klu_internal.h"
|
||||
|
||||
/* ========================================================================== */
|
||||
/* === klu_alloc_symbolic =================================================== */
|
||||
/* ========================================================================== */
|
||||
|
||||
/* Allocate Symbolic object, and check input matrix. Not user callable. */
|
||||
|
||||
KLU_symbolic *KLU_alloc_symbolic
|
||||
(
|
||||
Int n,
|
||||
Int *Ap,
|
||||
Int *Ai,
|
||||
KLU_common *Common
|
||||
)
|
||||
{
|
||||
KLU_symbolic *Symbolic ;
|
||||
Int *P, *Q, *R ;
|
||||
double *Lnz ;
|
||||
Int nz, i, j, p, pend ;
|
||||
|
||||
if (Common == NULL)
|
||||
{
|
||||
return (NULL) ;
|
||||
}
|
||||
Common->status = KLU_OK ;
|
||||
|
||||
/* A is n-by-n, with n > 0. Ap [0] = 0 and nz = Ap [n] >= 0 required.
|
||||
* Ap [j] <= Ap [j+1] must hold for all j = 0 to n-1. Row indices in Ai
|
||||
* must be in the range 0 to n-1, and no duplicate entries can be present.
|
||||
* The list of row indices in each column of A need not be sorted.
|
||||
*/
|
||||
|
||||
if (n <= 0 || Ap == NULL || Ai == NULL)
|
||||
{
|
||||
/* Ap and Ai must be present, and n must be > 0 */
|
||||
Common->status = KLU_INVALID ;
|
||||
return (NULL) ;
|
||||
}
|
||||
|
||||
nz = Ap [n] ;
|
||||
if (Ap [0] != 0 || nz < 0)
|
||||
{
|
||||
/* nz must be >= 0 and Ap [0] must equal zero */
|
||||
Common->status = KLU_INVALID ;
|
||||
return (NULL) ;
|
||||
}
|
||||
|
||||
for (j = 0 ; j < n ; j++)
|
||||
{
|
||||
if (Ap [j] > Ap [j+1])
|
||||
{
|
||||
/* column pointers must be non-decreasing */
|
||||
Common->status = KLU_INVALID ;
|
||||
return (NULL) ;
|
||||
}
|
||||
}
|
||||
P = KLU_malloc (n, sizeof (Int), Common) ;
|
||||
if (Common->status < KLU_OK)
|
||||
{
|
||||
/* out of memory */
|
||||
Common->status = KLU_OUT_OF_MEMORY ;
|
||||
return (NULL) ;
|
||||
}
|
||||
for (i = 0 ; i < n ; i++)
|
||||
{
|
||||
P [i] = EMPTY ;
|
||||
}
|
||||
for (j = 0 ; j < n ; j++)
|
||||
{
|
||||
pend = Ap [j+1] ;
|
||||
for (p = Ap [j] ; p < pend ; p++)
|
||||
{
|
||||
i = Ai [p] ;
|
||||
if (i < 0 || i >= n || P [i] == j)
|
||||
{
|
||||
/* row index out of range, or duplicate entry */
|
||||
KLU_free (P, n, sizeof (Int), Common) ;
|
||||
Common->status = KLU_INVALID ;
|
||||
return (NULL) ;
|
||||
}
|
||||
/* flag row i as appearing in column j */
|
||||
P [i] = j ;
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* allocate the Symbolic object */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
Symbolic = KLU_malloc (sizeof (KLU_symbolic), 1, Common) ;
|
||||
if (Common->status < KLU_OK)
|
||||
{
|
||||
/* out of memory */
|
||||
KLU_free (P, n, sizeof (Int), Common) ;
|
||||
Common->status = KLU_OUT_OF_MEMORY ;
|
||||
return (NULL) ;
|
||||
}
|
||||
|
||||
Q = KLU_malloc (n, sizeof (Int), Common) ;
|
||||
R = KLU_malloc (n+1, sizeof (Int), Common) ;
|
||||
Lnz = KLU_malloc (n, sizeof (double), Common) ;
|
||||
|
||||
Symbolic->n = n ;
|
||||
Symbolic->nz = nz ;
|
||||
Symbolic->P = P ;
|
||||
Symbolic->Q = Q ;
|
||||
Symbolic->R = R ;
|
||||
Symbolic->Lnz = Lnz ;
|
||||
|
||||
if (Common->status < KLU_OK)
|
||||
{
|
||||
/* out of memory */
|
||||
KLU_free_symbolic (&Symbolic, Common) ;
|
||||
Common->status = KLU_OUT_OF_MEMORY ;
|
||||
return (NULL) ;
|
||||
}
|
||||
|
||||
return (Symbolic) ;
|
||||
}
|
||||
|
||||
|
||||
/* ========================================================================== */
|
||||
/* === KLU_analyze_given ==================================================== */
|
||||
/* ========================================================================== */
|
||||
|
||||
KLU_symbolic *KLU_analyze_given /* returns NULL if error, or a valid
|
||||
KLU_symbolic object if successful */
|
||||
(
|
||||
/* inputs, not modified */
|
||||
Int n, /* A is n-by-n */
|
||||
Int Ap [ ], /* size n+1, column pointers */
|
||||
Int Ai [ ], /* size nz, row indices */
|
||||
Int Puser [ ], /* size n, user's row permutation (may be NULL) */
|
||||
Int Quser [ ], /* size n, user's column permutation (may be NULL) */
|
||||
/* -------------------- */
|
||||
KLU_common *Common
|
||||
)
|
||||
{
|
||||
KLU_symbolic *Symbolic ;
|
||||
double *Lnz ;
|
||||
Int nblocks, nz, block, maxblock, *P, *Q, *R, nzoff, p, pend, do_btf, k ;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* determine if input matrix is valid, and get # of nonzeros */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
Symbolic = KLU_alloc_symbolic (n, Ap, Ai, Common) ;
|
||||
if (Symbolic == NULL)
|
||||
{
|
||||
return (NULL) ;
|
||||
}
|
||||
P = Symbolic->P ;
|
||||
Q = Symbolic->Q ;
|
||||
R = Symbolic->R ;
|
||||
Lnz = Symbolic->Lnz ;
|
||||
nz = Symbolic->nz ;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* Q = Quser, or identity if Quser is NULL */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
if (Quser == (Int *) NULL)
|
||||
{
|
||||
for (k = 0 ; k < n ; k++)
|
||||
{
|
||||
Q [k] = k ;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (k = 0 ; k < n ; k++)
|
||||
{
|
||||
Q [k] = Quser [k] ;
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* get the control parameters for BTF and ordering method */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
do_btf = Common->btf ;
|
||||
do_btf = (do_btf) ? TRUE : FALSE ;
|
||||
Symbolic->ordering = 2 ;
|
||||
Symbolic->do_btf = do_btf ;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* find the block triangular form, if requested */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
if (do_btf)
|
||||
{
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* get workspace for BTF_strongcomp */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
Int *Pinv, *Work, *Bi, k1, k2, nk, oldcol ;
|
||||
|
||||
Work = KLU_malloc (4*n, sizeof (Int), Common) ;
|
||||
Pinv = KLU_malloc (n, sizeof (Int), Common) ;
|
||||
if (Puser != (Int *) NULL)
|
||||
{
|
||||
Bi = KLU_malloc (nz+1, sizeof (Int), Common) ;
|
||||
}
|
||||
else
|
||||
{
|
||||
Bi = Ai ;
|
||||
}
|
||||
|
||||
if (Common->status < KLU_OK)
|
||||
{
|
||||
/* out of memory */
|
||||
KLU_free (Work, 4*n, sizeof (Int), Common) ;
|
||||
KLU_free (Pinv, n, sizeof (Int), Common) ;
|
||||
if (Puser != (Int *) NULL)
|
||||
{
|
||||
KLU_free (Bi, nz+1, sizeof (Int), Common) ;
|
||||
}
|
||||
KLU_free_symbolic (&Symbolic, Common) ;
|
||||
Common->status = KLU_OUT_OF_MEMORY ;
|
||||
return (NULL) ;
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* B = Puser * A */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
if (Puser != (Int *) NULL)
|
||||
{
|
||||
for (k = 0 ; k < n ; k++)
|
||||
{
|
||||
Pinv [Puser [k]] = k ;
|
||||
}
|
||||
for (p = 0 ; p < nz ; p++)
|
||||
{
|
||||
Bi [p] = Pinv [Ai [p]] ;
|
||||
}
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* find the strongly-connected components */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
/* modifies Q, and determines P and R */
|
||||
nblocks = BTF_strongcomp (n, Ap, Bi, Q, P, R, Work) ;
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* P = P * Puser */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
if (Puser != (Int *) NULL)
|
||||
{
|
||||
for (k = 0 ; k < n ; k++)
|
||||
{
|
||||
Work [k] = Puser [P [k]] ;
|
||||
}
|
||||
for (k = 0 ; k < n ; k++)
|
||||
{
|
||||
P [k] = Work [k] ;
|
||||
}
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* Pinv = inverse of P */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
for (k = 0 ; k < n ; k++)
|
||||
{
|
||||
Pinv [P [k]] = k ;
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* analyze each block */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
nzoff = 0 ; /* nz in off-diagonal part */
|
||||
maxblock = 1 ; /* size of the largest block */
|
||||
|
||||
for (block = 0 ; block < nblocks ; block++)
|
||||
{
|
||||
|
||||
/* -------------------------------------------------------------- */
|
||||
/* the block is from rows/columns k1 to k2-1 */
|
||||
/* -------------------------------------------------------------- */
|
||||
|
||||
k1 = R [block] ;
|
||||
k2 = R [block+1] ;
|
||||
nk = k2 - k1 ;
|
||||
PRINTF (("BLOCK %d, k1 %d k2-1 %d nk %d\n", block, k1, k2-1, nk)) ;
|
||||
maxblock = MAX (maxblock, nk) ;
|
||||
|
||||
/* -------------------------------------------------------------- */
|
||||
/* scan the kth block, C */
|
||||
/* -------------------------------------------------------------- */
|
||||
|
||||
for (k = k1 ; k < k2 ; k++)
|
||||
{
|
||||
oldcol = Q [k] ;
|
||||
pend = Ap [oldcol+1] ;
|
||||
for (p = Ap [oldcol] ; p < pend ; p++)
|
||||
{
|
||||
if (Pinv [Ai [p]] < k1)
|
||||
{
|
||||
nzoff++ ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* fill-in not estimated */
|
||||
Lnz [block] = EMPTY ;
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* free all workspace */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
KLU_free (Work, 4*n, sizeof (Int), Common) ;
|
||||
KLU_free (Pinv, n, sizeof (Int), Common) ;
|
||||
if (Puser != (Int *) NULL)
|
||||
{
|
||||
KLU_free (Bi, nz+1, sizeof (Int), Common) ;
|
||||
}
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* BTF not requested */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
nzoff = 0 ;
|
||||
nblocks = 1 ;
|
||||
maxblock = n ;
|
||||
R [0] = 0 ;
|
||||
R [1] = n ;
|
||||
Lnz [0] = EMPTY ;
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* P = Puser, or identity if Puser is NULL */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
for (k = 0 ; k < n ; k++)
|
||||
{
|
||||
P [k] = (Puser == NULL) ? k : Puser [k] ;
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* return the symbolic object */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
Symbolic->nblocks = nblocks ;
|
||||
Symbolic->maxblock = maxblock ;
|
||||
Symbolic->lnz = EMPTY ;
|
||||
Symbolic->unz = EMPTY ;
|
||||
Symbolic->nzoff = nzoff ;
|
||||
|
||||
return (Symbolic) ;
|
||||
}
|
||||
|
|
@ -0,0 +1,60 @@
|
|||
/* ========================================================================== */
|
||||
/* === KLU_defaults ========================================================= */
|
||||
/* ========================================================================== */
|
||||
|
||||
/* Sets default parameters for KLU */
|
||||
|
||||
#include "klu_internal.h"
|
||||
|
||||
Int KLU_defaults
|
||||
(
|
||||
KLU_common *Common
|
||||
)
|
||||
{
|
||||
if (Common == NULL)
|
||||
{
|
||||
return (FALSE) ;
|
||||
}
|
||||
|
||||
/* parameters */
|
||||
Common->tol = 0.001 ; /* pivot tolerance for diagonal */
|
||||
Common->memgrow = 1.2; /* realloc size ratio increase for LU factors */
|
||||
Common->initmem_amd = 1.2 ; /* init. mem with AMD: c*nnz(L) + n */
|
||||
Common->initmem = 10 ; /* init. mem otherwise: c*nnz(A) + n */
|
||||
Common->btf = TRUE ; /* use BTF pre-ordering, or not */
|
||||
Common->maxwork = 0 ; /* no limit to work done by btf_order */
|
||||
Common->ordering = 0 ; /* 0: AMD, 1: COLAMD, 2: user-provided P and Q,
|
||||
* 3: user-provided function */
|
||||
Common->scale = 2 ; /* scale: -1: none, and do not check for errors
|
||||
* in the input matrix in KLU_refactor.
|
||||
* 0: none, but check for errors,
|
||||
* 1: sum, 2: max */
|
||||
Common->halt_if_singular = TRUE ; /* quick halt if matrix is singular */
|
||||
|
||||
/* memory management routines */
|
||||
Common->malloc_memory = malloc ;
|
||||
Common->calloc_memory = calloc ;
|
||||
Common->free_memory = free ;
|
||||
Common->realloc_memory = realloc ;
|
||||
|
||||
/* user ordering function and optional argument */
|
||||
Common->user_order = NULL ;
|
||||
Common->user_data = NULL ;
|
||||
|
||||
/* statistics */
|
||||
Common->status = KLU_OK ;
|
||||
Common->nrealloc = 0 ;
|
||||
Common->structural_rank = EMPTY ;
|
||||
Common->numerical_rank = EMPTY ;
|
||||
Common->noffdiag = EMPTY ;
|
||||
Common->flops = EMPTY ;
|
||||
Common->rcond = EMPTY ;
|
||||
Common->condest = EMPTY ;
|
||||
Common->rgrowth = EMPTY ;
|
||||
Common->work = 0 ; /* work done by btf_order */
|
||||
|
||||
Common->memusage = 0 ;
|
||||
Common->mempeak = 0 ;
|
||||
|
||||
return (TRUE) ;
|
||||
}
|
||||
|
|
@ -0,0 +1,570 @@
|
|||
/* ========================================================================== */
|
||||
/* === KLU_diagnostics ====================================================== */
|
||||
/* ========================================================================== */
|
||||
|
||||
/* Linear algebraic diagnostics:
|
||||
* KLU_rgrowth: reciprocal pivot growth, takes O(|A|+|U|) time
|
||||
* KLU_condest: condition number estimator, takes about O(|A|+5*(|L|+|U|)) time
|
||||
* KLU_flops: compute # flops required to factorize A into L*U
|
||||
* KLU_rcond: compute a really cheap estimate of the reciprocal of the
|
||||
* condition number, min(abs(diag(U))) / max(abs(diag(U))).
|
||||
* Takes O(n) time.
|
||||
*/
|
||||
|
||||
#include "klu_internal.h"
|
||||
|
||||
/* ========================================================================== */
|
||||
/* === KLU_rgrowth ========================================================== */
|
||||
/* ========================================================================== */
|
||||
|
||||
/* Compute the reciprocal pivot growth factor. In MATLAB notation:
|
||||
*
|
||||
* rgrowth = min (max (abs ((R \ A (p,q)) - F))) ./ max (abs (U)))
|
||||
*/
|
||||
|
||||
Int KLU_rgrowth /* return TRUE if successful, FALSE otherwise */
|
||||
(
|
||||
Int *Ap,
|
||||
Int *Ai,
|
||||
double *Ax,
|
||||
KLU_symbolic *Symbolic,
|
||||
KLU_numeric *Numeric,
|
||||
KLU_common *Common
|
||||
)
|
||||
{
|
||||
double temp, max_ai, max_ui, min_block_rgrowth ;
|
||||
Entry aik ;
|
||||
Int *Q, *Ui, *Uip, *Ulen, *Pinv ;
|
||||
Unit *LU ;
|
||||
Entry *Aentry, *Ux, *Ukk ;
|
||||
double *Rs ;
|
||||
Int i, newrow, oldrow, k1, k2, nk, j, oldcol, k, pend, len ;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* check inputs */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
if (Common == NULL)
|
||||
{
|
||||
return (FALSE) ;
|
||||
}
|
||||
|
||||
if (Symbolic == NULL || Ap == NULL || Ai == NULL || Ax == NULL)
|
||||
{
|
||||
Common->status = KLU_INVALID ;
|
||||
return (FALSE) ;
|
||||
}
|
||||
|
||||
if (Numeric == NULL)
|
||||
{
|
||||
/* treat this as a singular matrix */
|
||||
Common->rgrowth = 0 ;
|
||||
Common->status = KLU_SINGULAR ;
|
||||
return (TRUE) ;
|
||||
}
|
||||
Common->status = KLU_OK ;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* compute the reciprocal pivot growth */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
Aentry = (Entry *) Ax ;
|
||||
Pinv = Numeric->Pinv ;
|
||||
Rs = Numeric->Rs ;
|
||||
Q = Symbolic->Q ;
|
||||
Common->rgrowth = 1 ;
|
||||
|
||||
for (i = 0 ; i < Symbolic->nblocks ; i++)
|
||||
{
|
||||
k1 = Symbolic->R[i] ;
|
||||
k2 = Symbolic->R[i+1] ;
|
||||
nk = k2 - k1 ;
|
||||
if (nk == 1)
|
||||
{
|
||||
continue ; /* skip singleton blocks */
|
||||
}
|
||||
LU = (Unit *) Numeric->LUbx[i] ;
|
||||
Uip = Numeric->Uip + k1 ;
|
||||
Ulen = Numeric->Ulen + k1 ;
|
||||
Ukk = ((Entry *) Numeric->Udiag) + k1 ;
|
||||
min_block_rgrowth = 1 ;
|
||||
for (j = 0 ; j < nk ; j++)
|
||||
{
|
||||
max_ai = 0 ;
|
||||
max_ui = 0 ;
|
||||
oldcol = Q[j + k1] ;
|
||||
pend = Ap [oldcol + 1] ;
|
||||
for (k = Ap [oldcol] ; k < pend ; k++)
|
||||
{
|
||||
oldrow = Ai [k] ;
|
||||
newrow = Pinv [oldrow] ;
|
||||
if (newrow < k1)
|
||||
{
|
||||
continue ; /* skip entry outside the block */
|
||||
}
|
||||
ASSERT (newrow < k2) ;
|
||||
if (Rs != NULL)
|
||||
{
|
||||
/* aik = Aentry [k] / Rs [oldrow] */
|
||||
SCALE_DIV_ASSIGN (aik, Aentry [k], Rs [newrow]) ;
|
||||
}
|
||||
else
|
||||
{
|
||||
aik = Aentry [k] ;
|
||||
}
|
||||
/* temp = ABS (aik) */
|
||||
ABS (temp, aik) ;
|
||||
if (temp > max_ai)
|
||||
{
|
||||
max_ai = temp ;
|
||||
}
|
||||
}
|
||||
|
||||
GET_POINTER (LU, Uip, Ulen, Ui, Ux, j, len) ;
|
||||
for (k = 0 ; k < len ; k++)
|
||||
{
|
||||
/* temp = ABS (Ux [k]) */
|
||||
ABS (temp, Ux [k]) ;
|
||||
if (temp > max_ui)
|
||||
{
|
||||
max_ui = temp ;
|
||||
}
|
||||
}
|
||||
/* consider the diagonal element */
|
||||
ABS (temp, Ukk [j]) ;
|
||||
if (temp > max_ui)
|
||||
{
|
||||
max_ui = temp ;
|
||||
}
|
||||
|
||||
/* if max_ui is 0, skip the column */
|
||||
if (SCALAR_IS_ZERO (max_ui))
|
||||
{
|
||||
continue ;
|
||||
}
|
||||
temp = max_ai / max_ui ;
|
||||
if (temp < min_block_rgrowth)
|
||||
{
|
||||
min_block_rgrowth = temp ;
|
||||
}
|
||||
}
|
||||
|
||||
if (min_block_rgrowth < Common->rgrowth)
|
||||
{
|
||||
Common->rgrowth = min_block_rgrowth ;
|
||||
}
|
||||
}
|
||||
return (TRUE) ;
|
||||
}
|
||||
|
||||
|
||||
/* ========================================================================== */
|
||||
/* === KLU_condest ========================================================== */
|
||||
/* ========================================================================== */
|
||||
|
||||
/* Estimate the condition number. Uses Higham and Tisseur's algorithm
|
||||
* (A block algorithm for matrix 1-norm estimation, with applications to
|
||||
* 1-norm pseudospectra, SIAM J. Matrix Anal. Appl., 21(4):1185-1201, 2000.
|
||||
*/
|
||||
|
||||
Int KLU_condest /* return TRUE if successful, FALSE otherwise */
|
||||
(
|
||||
Int Ap [ ],
|
||||
double Ax [ ],
|
||||
KLU_symbolic *Symbolic,
|
||||
KLU_numeric *Numeric,
|
||||
KLU_common *Common
|
||||
)
|
||||
{
|
||||
double xj, Xmax, csum, anorm, ainv_norm, est_old, est_new, abs_value ;
|
||||
Entry *Udiag, *Aentry, *X, *S ;
|
||||
Int *R ;
|
||||
Int nblocks, i, j, jmax, jnew, pend, n ;
|
||||
#ifndef COMPLEX
|
||||
Int unchanged ;
|
||||
#endif
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* check inputs */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
if (Common == NULL)
|
||||
{
|
||||
return (FALSE) ;
|
||||
}
|
||||
if (Symbolic == NULL || Ap == NULL || Ax == NULL)
|
||||
{
|
||||
Common->status = KLU_INVALID ;
|
||||
return (FALSE) ;
|
||||
}
|
||||
abs_value = 0 ;
|
||||
if (Numeric == NULL)
|
||||
{
|
||||
/* treat this as a singular matrix */
|
||||
Common->condest = 1 / abs_value ;
|
||||
Common->status = KLU_SINGULAR ;
|
||||
return (TRUE) ;
|
||||
}
|
||||
Common->status = KLU_OK ;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* get inputs */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
n = Symbolic->n ;
|
||||
nblocks = Symbolic->nblocks ;
|
||||
R = Symbolic->R ;
|
||||
Udiag = Numeric->Udiag ;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* check if diagonal of U has a zero on it */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
for (i = 0 ; i < n ; i++)
|
||||
{
|
||||
ABS (abs_value, Udiag [i]) ;
|
||||
if (SCALAR_IS_ZERO (abs_value))
|
||||
{
|
||||
Common->condest = 1 / abs_value ;
|
||||
Common->status = KLU_SINGULAR ;
|
||||
return (TRUE) ;
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* compute 1-norm (maximum column sum) of the matrix */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
anorm = 0.0 ;
|
||||
Aentry = (Entry *) Ax ;
|
||||
for (i = 0 ; i < n ; i++)
|
||||
{
|
||||
pend = Ap [i + 1] ;
|
||||
csum = 0.0 ;
|
||||
for (j = Ap [i] ; j < pend ; j++)
|
||||
{
|
||||
ABS (abs_value, Aentry [j]) ;
|
||||
csum += abs_value ;
|
||||
}
|
||||
if (csum > anorm)
|
||||
{
|
||||
anorm = csum ;
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* compute estimate of 1-norm of inv (A) */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
/* get workspace (size 2*n Entry's) */
|
||||
X = Numeric->Xwork ; /* size n space used in KLU_solve, tsolve */
|
||||
X += n ; /* X is size n */
|
||||
S = X + n ; /* S is size n */
|
||||
|
||||
for (i = 0 ; i < n ; i++)
|
||||
{
|
||||
CLEAR (S [i]) ;
|
||||
CLEAR (X [i]) ;
|
||||
REAL (X [i]) = 1.0 / ((double) n) ;
|
||||
}
|
||||
jmax = 0 ;
|
||||
|
||||
ainv_norm = 0.0 ;
|
||||
for (i = 0 ; i < 5 ; i++)
|
||||
{
|
||||
if (i > 0)
|
||||
{
|
||||
/* X [jmax] is the largest entry in X */
|
||||
for (j = 0 ; j < n ; j++)
|
||||
{
|
||||
/* X [j] = 0 ;*/
|
||||
CLEAR (X [j]) ;
|
||||
}
|
||||
REAL (X [jmax]) = 1 ;
|
||||
}
|
||||
|
||||
KLU_solve (Symbolic, Numeric, n, 1, (double *) X, Common) ;
|
||||
est_old = ainv_norm ;
|
||||
ainv_norm = 0.0 ;
|
||||
|
||||
for (j = 0 ; j < n ; j++)
|
||||
{
|
||||
/* ainv_norm += ABS (X [j]) ;*/
|
||||
ABS (abs_value, X [j]) ;
|
||||
ainv_norm += abs_value ;
|
||||
}
|
||||
|
||||
#ifndef COMPLEX
|
||||
unchanged = TRUE ;
|
||||
|
||||
for (j = 0 ; j < n ; j++)
|
||||
{
|
||||
double s = (X [j] >= 0) ? 1 : -1 ;
|
||||
if (s != (Int) REAL (S [j]))
|
||||
{
|
||||
S [j] = s ;
|
||||
unchanged = FALSE ;
|
||||
}
|
||||
}
|
||||
|
||||
if (i > 0 && (ainv_norm <= est_old || unchanged))
|
||||
{
|
||||
break ;
|
||||
}
|
||||
#else
|
||||
for (j = 0 ; j < n ; j++)
|
||||
{
|
||||
if (IS_NONZERO (X [j]))
|
||||
{
|
||||
ABS (abs_value, X [j]) ;
|
||||
SCALE_DIV_ASSIGN (S [j], X [j], abs_value) ;
|
||||
}
|
||||
else
|
||||
{
|
||||
CLEAR (S [j]) ;
|
||||
REAL (S [j]) = 1 ;
|
||||
}
|
||||
}
|
||||
|
||||
if (i > 0 && ainv_norm <= est_old)
|
||||
{
|
||||
break ;
|
||||
}
|
||||
#endif
|
||||
|
||||
for (j = 0 ; j < n ; j++)
|
||||
{
|
||||
X [j] = S [j] ;
|
||||
}
|
||||
|
||||
#ifndef COMPLEX
|
||||
/* do a transpose solve */
|
||||
KLU_tsolve (Symbolic, Numeric, n, 1, X, Common) ;
|
||||
#else
|
||||
/* do a conjugate transpose solve */
|
||||
KLU_tsolve (Symbolic, Numeric, n, 1, (double *) X, 1, Common) ;
|
||||
#endif
|
||||
|
||||
/* jnew = the position of the largest entry in X */
|
||||
jnew = 0 ;
|
||||
Xmax = 0 ;
|
||||
for (j = 0 ; j < n ; j++)
|
||||
{
|
||||
/* xj = ABS (X [j]) ;*/
|
||||
ABS (xj, X [j]) ;
|
||||
if (xj > Xmax)
|
||||
{
|
||||
Xmax = xj ;
|
||||
jnew = j ;
|
||||
}
|
||||
}
|
||||
if (i > 0 && jnew == jmax)
|
||||
{
|
||||
/* the position of the largest entry did not change
|
||||
* from the previous iteration */
|
||||
break ;
|
||||
}
|
||||
jmax = jnew ;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* compute another estimate of norm(inv(A),1), and take the largest one */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
for (j = 0 ; j < n ; j++)
|
||||
{
|
||||
CLEAR (X [j]) ;
|
||||
if (j % 2)
|
||||
{
|
||||
REAL (X [j]) = 1 + ((double) j) / ((double) (n-1)) ;
|
||||
}
|
||||
else
|
||||
{
|
||||
REAL (X [j]) = -1 - ((double) j) / ((double) (n-1)) ;
|
||||
}
|
||||
}
|
||||
|
||||
KLU_solve (Symbolic, Numeric, n, 1, (double *) X, Common) ;
|
||||
|
||||
est_new = 0.0 ;
|
||||
for (j = 0 ; j < n ; j++)
|
||||
{
|
||||
/* est_new += ABS (X [j]) ;*/
|
||||
ABS (abs_value, X [j]) ;
|
||||
est_new += abs_value ;
|
||||
}
|
||||
est_new = 2 * est_new / (3 * n) ;
|
||||
ainv_norm = MAX (est_new, ainv_norm) ;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* compute estimate of condition number */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
Common->condest = ainv_norm * anorm ;
|
||||
return (TRUE) ;
|
||||
}
|
||||
|
||||
|
||||
/* ========================================================================== */
|
||||
/* === KLU_flops ============================================================ */
|
||||
/* ========================================================================== */
|
||||
|
||||
/* Compute the flop count for the LU factorization (in Common->flops) */
|
||||
|
||||
Int KLU_flops /* return TRUE if successful, FALSE otherwise */
|
||||
(
|
||||
KLU_symbolic *Symbolic,
|
||||
KLU_numeric *Numeric,
|
||||
KLU_common *Common
|
||||
)
|
||||
{
|
||||
double flops = 0 ;
|
||||
Int *R, *Ui, *Uip, *Llen, *Ulen ;
|
||||
Unit **LUbx ;
|
||||
Unit *LU ;
|
||||
Int k, ulen, p, n, nk, block, nblocks, k1 ;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* check inputs */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
if (Common == NULL)
|
||||
{
|
||||
return (FALSE) ;
|
||||
}
|
||||
Common->flops = EMPTY ;
|
||||
if (Numeric == NULL || Symbolic == NULL)
|
||||
{
|
||||
Common->status = KLU_INVALID ;
|
||||
return (FALSE) ;
|
||||
}
|
||||
Common->status = KLU_OK ;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* get the contents of the Symbolic object */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
n = Symbolic->n ;
|
||||
R = Symbolic->R ;
|
||||
nblocks = Symbolic->nblocks ;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* get the contents of the Numeric object */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
LUbx = (Unit **) Numeric->LUbx ;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* compute the flop count */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
for (block = 0 ; block < nblocks ; block++)
|
||||
{
|
||||
k1 = R [block] ;
|
||||
nk = R [block+1] - k1 ;
|
||||
if (nk > 1)
|
||||
{
|
||||
Llen = Numeric->Llen + k1 ;
|
||||
Uip = Numeric->Uip + k1 ;
|
||||
Ulen = Numeric->Ulen + k1 ;
|
||||
LU = LUbx [block] ;
|
||||
for (k = 0 ; k < nk ; k++)
|
||||
{
|
||||
/* compute kth column of U, and update kth column of A */
|
||||
GET_I_POINTER (LU, Uip, Ui, k) ;
|
||||
ulen = Ulen [k] ;
|
||||
for (p = 0 ; p < ulen ; p++)
|
||||
{
|
||||
flops += 2 * Llen [Ui [p]] ;
|
||||
}
|
||||
/* gather and divide by pivot to get kth column of L */
|
||||
flops += Llen [k] ;
|
||||
}
|
||||
}
|
||||
}
|
||||
Common->flops = flops ;
|
||||
return (TRUE) ;
|
||||
}
|
||||
|
||||
|
||||
/* ========================================================================== */
|
||||
/* === KLU_rcond ============================================================ */
|
||||
/* ========================================================================== */
|
||||
|
||||
/* Compute a really cheap estimate of the reciprocal of the condition number,
|
||||
* condition number, min(abs(diag(U))) / max(abs(diag(U))). If U has a zero
|
||||
* pivot, or a NaN pivot, rcond will be zero. Takes O(n) time.
|
||||
*/
|
||||
|
||||
Int KLU_rcond /* return TRUE if successful, FALSE otherwise */
|
||||
(
|
||||
KLU_symbolic *Symbolic, /* input, not modified */
|
||||
KLU_numeric *Numeric, /* input, not modified */
|
||||
KLU_common *Common /* result in Common->rcond */
|
||||
)
|
||||
{
|
||||
double ukk, umin = 0, umax = 0 ;
|
||||
Entry *Udiag ;
|
||||
Int j, n ;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* check inputs */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
if (Common == NULL)
|
||||
{
|
||||
return (FALSE) ;
|
||||
}
|
||||
if (Symbolic == NULL)
|
||||
{
|
||||
Common->status = KLU_INVALID ;
|
||||
return (FALSE) ;
|
||||
}
|
||||
if (Numeric == NULL)
|
||||
{
|
||||
Common->rcond = 0 ;
|
||||
Common->status = KLU_SINGULAR ;
|
||||
return (TRUE) ;
|
||||
}
|
||||
Common->status = KLU_OK ;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* compute rcond */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
n = Symbolic->n ;
|
||||
Udiag = Numeric->Udiag ;
|
||||
for (j = 0 ; j < n ; j++)
|
||||
{
|
||||
/* get the magnitude of the pivot */
|
||||
ABS (ukk, Udiag [j]) ;
|
||||
if (SCALAR_IS_NAN (ukk) || SCALAR_IS_ZERO (ukk))
|
||||
{
|
||||
/* if NaN, or zero, the rcond is zero */
|
||||
Common->rcond = 0 ;
|
||||
Common->status = KLU_SINGULAR ;
|
||||
return (TRUE) ;
|
||||
}
|
||||
if (j == 0)
|
||||
{
|
||||
/* first pivot entry */
|
||||
umin = ukk ;
|
||||
umax = ukk ;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* subsequent pivots */
|
||||
umin = MIN (umin, ukk) ;
|
||||
umax = MAX (umax, ukk) ;
|
||||
}
|
||||
}
|
||||
|
||||
Common->rcond = umin / umax ;
|
||||
if (SCALAR_IS_NAN (Common->rcond) || SCALAR_IS_ZERO (Common->rcond))
|
||||
{
|
||||
/* this can occur if umin or umax are Inf or NaN */
|
||||
Common->rcond = 0 ;
|
||||
Common->status = KLU_SINGULAR ;
|
||||
}
|
||||
return (TRUE) ;
|
||||
}
|
||||
|
|
@ -0,0 +1,142 @@
|
|||
/* ========================================================================== */
|
||||
/* === KLU_dump ============================================================= */
|
||||
/* ========================================================================== */
|
||||
|
||||
/* Debug routines for klu. Only used when NDEBUG is not defined at
|
||||
* compile-time.
|
||||
*/
|
||||
|
||||
#include "klu_internal.h"
|
||||
|
||||
#ifndef NDEBUG
|
||||
|
||||
/* ========================================================================== */
|
||||
/* === KLU_valid ============================================================ */
|
||||
/* ========================================================================== */
|
||||
|
||||
/* Check if a column-form matrix is valid or not. The matrix A is
|
||||
* n-by-n. The row indices of entries in column j are in
|
||||
* Ai [Ap [j] ... Ap [j+1]-1]. Required conditions are:
|
||||
*
|
||||
* n >= 0
|
||||
* nz = Ap [n_col] >= 0 number of entries in the matrix
|
||||
* Ap [0] == 0
|
||||
* Ap [j] <= Ap [j+1] for all j in the range 0 to n_col.
|
||||
* row indices in Ai [Ap [j] ... Ap [j+1]-1]
|
||||
* must be in the range 0 to n_row-1,
|
||||
* and no duplicate entries can exist (duplicates not checked here).
|
||||
*
|
||||
* Not user-callable. Only used when debugging.
|
||||
*/
|
||||
|
||||
Int KLU_valid (Int n, Int Ap [ ], Int Ai [ ], Entry Ax [ ])
|
||||
{
|
||||
Int nz, j, p1, p2, i, p ;
|
||||
PRINTF (("\ncolumn oriented matrix, n = %d\n", n)) ;
|
||||
if (n <= 0)
|
||||
{
|
||||
PRINTF (("n must be >= 0: %d\n", n)) ;
|
||||
return (FALSE) ;
|
||||
}
|
||||
nz = Ap [n] ;
|
||||
if (Ap [0] != 0 || nz < 0)
|
||||
{
|
||||
/* column pointers must start at Ap [0] = 0, and Ap [n] must be >= 0 */
|
||||
PRINTF (("column 0 pointer bad or nz < 0\n")) ;
|
||||
return (FALSE) ;
|
||||
}
|
||||
for (j = 0 ; j < n ; j++)
|
||||
{
|
||||
p1 = Ap [j] ;
|
||||
p2 = Ap [j+1] ;
|
||||
PRINTF (("\nColumn: %d p1: %d p2: %d\n", j, p1, p2)) ;
|
||||
if (p1 > p2)
|
||||
{
|
||||
/* column pointers must be ascending */
|
||||
PRINTF (("column %d pointer bad\n", j)) ;
|
||||
return (FALSE) ;
|
||||
}
|
||||
for (p = p1 ; p < p2 ; p++)
|
||||
{
|
||||
i = Ai [p] ;
|
||||
PRINTF (("row: %d", i)) ;
|
||||
if (i < 0 || i >= n)
|
||||
{
|
||||
/* row index out of range */
|
||||
PRINTF (("index out of range, col %d row %d\n", j, i)) ;
|
||||
return (FALSE) ;
|
||||
}
|
||||
if (Ax != (Entry *) NULL)
|
||||
{
|
||||
PRINT_ENTRY (Ax [p]) ;
|
||||
}
|
||||
PRINTF (("\n")) ;
|
||||
}
|
||||
}
|
||||
return (TRUE) ;
|
||||
}
|
||||
|
||||
|
||||
/* ========================================================================== */
|
||||
/* === KLU_valid_LU ========================================================= */
|
||||
/* ========================================================================== */
|
||||
|
||||
/* This function does the same validity tests as KLU_valid but for the
|
||||
* LU factor storage format. The flag flag_test_start_ptr is used to
|
||||
* test if Xip [0] = 0. This is not applicable for U. So when calling this
|
||||
* function for U, the flag should be set to false. Only used when debugging.
|
||||
*/
|
||||
|
||||
Int KLU_valid_LU (Int n, Int flag_test_start_ptr, Int Xip [ ],
|
||||
Int Xlen [ ], Unit LU [ ])
|
||||
{
|
||||
Int *Xi ;
|
||||
Entry *Xx ;
|
||||
Int j, p1, p2, i, p, len ;
|
||||
|
||||
PRINTF (("\ncolumn oriented matrix, n = %d\n", n)) ;
|
||||
if (n <= 0)
|
||||
{
|
||||
PRINTF (("n must be >= 0: %d\n", n)) ;
|
||||
return (FALSE) ;
|
||||
}
|
||||
if (flag_test_start_ptr && Xip [0] != 0)
|
||||
{
|
||||
/* column pointers must start at Xip [0] = 0*/
|
||||
PRINTF (("column 0 pointer bad\n")) ;
|
||||
return (FALSE) ;
|
||||
}
|
||||
|
||||
for (j = 0 ; j < n ; j++)
|
||||
{
|
||||
p1 = Xip [j] ;
|
||||
p2 = Xip [j+1] ;
|
||||
PRINTF (("\nColumn: %d p1: %d p2: %d\n", j, p1, p2)) ;
|
||||
if (p1 > p2)
|
||||
{
|
||||
/* column pointers must be ascending */
|
||||
PRINTF (("column %d pointer bad\n", j)) ;
|
||||
return (FALSE) ;
|
||||
}
|
||||
GET_POINTER (LU, Xip, Xlen, Xi, Xx, j, len) ;
|
||||
for (p = 0 ; p < len ; p++)
|
||||
{
|
||||
i = Xi [p] ;
|
||||
PRINTF (("row: %d", i)) ;
|
||||
if (i < 0 || i >= n)
|
||||
{
|
||||
/* row index out of range */
|
||||
PRINTF (("index out of range, col %d row %d\n", j, i)) ;
|
||||
return (FALSE) ;
|
||||
}
|
||||
if (Xx != (Entry *) NULL)
|
||||
{
|
||||
PRINT_ENTRY (Xx [p]) ;
|
||||
}
|
||||
PRINTF (("\n")) ;
|
||||
}
|
||||
}
|
||||
|
||||
return (TRUE) ;
|
||||
}
|
||||
#endif
|
||||
|
|
@ -0,0 +1,290 @@
|
|||
/* ========================================================================== */
|
||||
/* === KLU_extract ========================================================== */
|
||||
/* ========================================================================== */
|
||||
|
||||
/* Extract KLU factorization into conventional compressed-column matrices.
|
||||
* If any output array is NULL, that part of the LU factorization is not
|
||||
* extracted (this is not an error condition).
|
||||
*
|
||||
* nnz(L) = Numeric->lnz, nnz(U) = Numeric->unz, and nnz(F) = Numeric->Offp [n]
|
||||
*/
|
||||
|
||||
#include "klu_internal.h"
|
||||
|
||||
Int KLU_extract /* returns TRUE if successful, FALSE otherwise */
|
||||
(
|
||||
/* inputs: */
|
||||
KLU_numeric *Numeric,
|
||||
KLU_symbolic *Symbolic,
|
||||
|
||||
/* outputs, all of which must be allocated on input */
|
||||
|
||||
/* L */
|
||||
Int *Lp, /* size n+1 */
|
||||
Int *Li, /* size nnz(L) */
|
||||
double *Lx, /* size nnz(L) */
|
||||
#ifdef COMPLEX
|
||||
double *Lz, /* size nnz(L) for the complex case, ignored if real */
|
||||
#endif
|
||||
|
||||
/* U */
|
||||
Int *Up, /* size n+1 */
|
||||
Int *Ui, /* size nnz(U) */
|
||||
double *Ux, /* size nnz(U) */
|
||||
#ifdef COMPLEX
|
||||
double *Uz, /* size nnz(U) for the complex case, ignored if real */
|
||||
#endif
|
||||
|
||||
/* F */
|
||||
Int *Fp, /* size n+1 */
|
||||
Int *Fi, /* size nnz(F) */
|
||||
double *Fx, /* size nnz(F) */
|
||||
#ifdef COMPLEX
|
||||
double *Fz, /* size nnz(F) for the complex case, ignored if real */
|
||||
#endif
|
||||
|
||||
/* P, row permutation */
|
||||
Int *P, /* size n */
|
||||
|
||||
/* Q, column permutation */
|
||||
Int *Q, /* size n */
|
||||
|
||||
/* Rs, scale factors */
|
||||
double *Rs, /* size n */
|
||||
|
||||
/* R, block boundaries */
|
||||
Int *R, /* size nblocks+1 */
|
||||
|
||||
KLU_common *Common
|
||||
)
|
||||
{
|
||||
Int *Lip, *Llen, *Uip, *Ulen, *Li2, *Ui2 ;
|
||||
Unit *LU ;
|
||||
Entry *Lx2, *Ux2, *Ukk ;
|
||||
Int i, k, block, nblocks, n, nz, k1, k2, nk, len, kk, p ;
|
||||
|
||||
if (Common == NULL)
|
||||
{
|
||||
return (FALSE) ;
|
||||
}
|
||||
|
||||
if (Symbolic == NULL || Numeric == NULL)
|
||||
{
|
||||
Common->status = KLU_INVALID ;
|
||||
return (FALSE) ;
|
||||
}
|
||||
|
||||
Common->status = KLU_OK ;
|
||||
n = Symbolic->n ;
|
||||
nblocks = Symbolic->nblocks ;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* extract scale factors */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
if (Rs != NULL)
|
||||
{
|
||||
if (Numeric->Rs != NULL)
|
||||
{
|
||||
for (i = 0 ; i < n ; i++)
|
||||
{
|
||||
Rs [i] = Numeric->Rs [i] ;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* no scaling */
|
||||
for (i = 0 ; i < n ; i++)
|
||||
{
|
||||
Rs [i] = 1 ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* extract block boundaries */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
if (R != NULL)
|
||||
{
|
||||
for (block = 0 ; block <= nblocks ; block++)
|
||||
{
|
||||
R [block] = Symbolic->R [block] ;
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* extract final row permutation */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
if (P != NULL)
|
||||
{
|
||||
for (k = 0 ; k < n ; k++)
|
||||
{
|
||||
P [k] = Numeric->Pnum [k] ;
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* extract column permutation */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
if (Q != NULL)
|
||||
{
|
||||
for (k = 0 ; k < n ; k++)
|
||||
{
|
||||
Q [k] = Symbolic->Q [k] ;
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* extract each block of L */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
if (Lp != NULL && Li != NULL && Lx != NULL
|
||||
#ifdef COMPLEX
|
||||
&& Lz != NULL
|
||||
#endif
|
||||
)
|
||||
{
|
||||
nz = 0 ;
|
||||
for (block = 0 ; block < nblocks ; block++)
|
||||
{
|
||||
k1 = Symbolic->R [block] ;
|
||||
k2 = Symbolic->R [block+1] ;
|
||||
nk = k2 - k1 ;
|
||||
if (nk == 1)
|
||||
{
|
||||
/* singleton block */
|
||||
Lp [k1] = nz ;
|
||||
Li [nz] = k1 ;
|
||||
Lx [nz] = 1 ;
|
||||
#ifdef COMPLEX
|
||||
Lz [nz] = 0 ;
|
||||
#endif
|
||||
nz++ ;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* non-singleton block */
|
||||
LU = Numeric->LUbx [block] ;
|
||||
Lip = Numeric->Lip + k1 ;
|
||||
Llen = Numeric->Llen + k1 ;
|
||||
for (kk = 0 ; kk < nk ; kk++)
|
||||
{
|
||||
Lp [k1+kk] = nz ;
|
||||
/* add the unit diagonal entry */
|
||||
Li [nz] = k1 + kk ;
|
||||
Lx [nz] = 1 ;
|
||||
#ifdef COMPLEX
|
||||
Lz [nz] = 0 ;
|
||||
#endif
|
||||
nz++ ;
|
||||
GET_POINTER (LU, Lip, Llen, Li2, Lx2, kk, len) ;
|
||||
for (p = 0 ; p < len ; p++)
|
||||
{
|
||||
Li [nz] = k1 + Li2 [p] ;
|
||||
Lx [nz] = REAL (Lx2 [p]) ;
|
||||
#ifdef COMPLEX
|
||||
Lz [nz] = IMAG (Lx2 [p]) ;
|
||||
#endif
|
||||
nz++ ;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Lp [n] = nz ;
|
||||
ASSERT (nz == Numeric->lnz) ;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* extract each block of U */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
if (Up != NULL && Ui != NULL && Ux != NULL
|
||||
#ifdef COMPLEX
|
||||
&& Uz != NULL
|
||||
#endif
|
||||
)
|
||||
{
|
||||
nz = 0 ;
|
||||
for (block = 0 ; block < nblocks ; block++)
|
||||
{
|
||||
k1 = Symbolic->R [block] ;
|
||||
k2 = Symbolic->R [block+1] ;
|
||||
nk = k2 - k1 ;
|
||||
Ukk = ((Entry *) Numeric->Udiag) + k1 ;
|
||||
if (nk == 1)
|
||||
{
|
||||
/* singleton block */
|
||||
Up [k1] = nz ;
|
||||
Ui [nz] = k1 ;
|
||||
Ux [nz] = REAL (Ukk [0]) ;
|
||||
#ifdef COMPLEX
|
||||
Uz [nz] = IMAG (Ukk [0]) ;
|
||||
#endif
|
||||
nz++ ;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* non-singleton block */
|
||||
LU = Numeric->LUbx [block] ;
|
||||
Uip = Numeric->Uip + k1 ;
|
||||
Ulen = Numeric->Ulen + k1 ;
|
||||
for (kk = 0 ; kk < nk ; kk++)
|
||||
{
|
||||
Up [k1+kk] = nz ;
|
||||
GET_POINTER (LU, Uip, Ulen, Ui2, Ux2, kk, len) ;
|
||||
for (p = 0 ; p < len ; p++)
|
||||
{
|
||||
Ui [nz] = k1 + Ui2 [p] ;
|
||||
Ux [nz] = REAL (Ux2 [p]) ;
|
||||
#ifdef COMPLEX
|
||||
Uz [nz] = IMAG (Ux2 [p]) ;
|
||||
#endif
|
||||
nz++ ;
|
||||
}
|
||||
/* add the diagonal entry */
|
||||
Ui [nz] = k1 + kk ;
|
||||
Ux [nz] = REAL (Ukk [kk]) ;
|
||||
#ifdef COMPLEX
|
||||
Uz [nz] = IMAG (Ukk [kk]) ;
|
||||
#endif
|
||||
nz++ ;
|
||||
}
|
||||
}
|
||||
}
|
||||
Up [n] = nz ;
|
||||
ASSERT (nz == Numeric->unz) ;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* extract the off-diagonal blocks, F */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
if (Fp != NULL && Fi != NULL && Fx != NULL
|
||||
#ifdef COMPLEX
|
||||
&& Fz != NULL
|
||||
#endif
|
||||
)
|
||||
{
|
||||
for (k = 0 ; k <= n ; k++)
|
||||
{
|
||||
Fp [k] = Numeric->Offp [k] ;
|
||||
}
|
||||
nz = Fp [n] ;
|
||||
for (k = 0 ; k < nz ; k++)
|
||||
{
|
||||
Fi [k] = Numeric->Offi [k] ;
|
||||
}
|
||||
for (k = 0 ; k < nz ; k++)
|
||||
{
|
||||
Fx [k] = REAL (((Entry *) Numeric->Offx) [k]) ;
|
||||
#ifdef COMPLEX
|
||||
Fz [k] = IMAG (((Entry *) Numeric->Offx) [k]) ;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
return (TRUE) ;
|
||||
}
|
||||
|
|
@ -0,0 +1,545 @@
|
|||
/* ========================================================================== */
|
||||
/* === KLU_factor =========================================================== */
|
||||
/* ========================================================================== */
|
||||
|
||||
/* Factor the matrix, after ordering and analyzing it with KLU_analyze
|
||||
* or KLU_analyze_given.
|
||||
*/
|
||||
|
||||
#include "klu_internal.h"
|
||||
|
||||
/* ========================================================================== */
|
||||
/* === KLU_factor2 ========================================================== */
|
||||
/* ========================================================================== */
|
||||
|
||||
static void factor2
|
||||
(
|
||||
/* inputs, not modified */
|
||||
Int Ap [ ], /* size n+1, column pointers */
|
||||
Int Ai [ ], /* size nz, row indices */
|
||||
Entry Ax [ ],
|
||||
KLU_symbolic *Symbolic,
|
||||
|
||||
/* inputs, modified on output: */
|
||||
KLU_numeric *Numeric,
|
||||
KLU_common *Common
|
||||
)
|
||||
{
|
||||
double lsize ;
|
||||
double *Lnz, *Rs ;
|
||||
Int *P, *Q, *R, *Pnum, *Offp, *Offi, *Pblock, *Pinv, *Iwork,
|
||||
*Lip, *Uip, *Llen, *Ulen ;
|
||||
Entry *Offx, *X, s, *Udiag ;
|
||||
Unit **LUbx ;
|
||||
Int k1, k2, nk, k, block, oldcol, pend, oldrow, n, lnz, unz, p, newrow,
|
||||
nblocks, poff, nzoff, lnz_block, unz_block, scale, max_lnz_block,
|
||||
max_unz_block ;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* initializations */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
/* get the contents of the Symbolic object */
|
||||
n = Symbolic->n ;
|
||||
P = Symbolic->P ;
|
||||
Q = Symbolic->Q ;
|
||||
R = Symbolic->R ;
|
||||
Lnz = Symbolic->Lnz ;
|
||||
nblocks = Symbolic->nblocks ;
|
||||
nzoff = Symbolic->nzoff ;
|
||||
|
||||
Pnum = Numeric->Pnum ;
|
||||
Offp = Numeric->Offp ;
|
||||
Offi = Numeric->Offi ;
|
||||
Offx = (Entry *) Numeric->Offx ;
|
||||
|
||||
Lip = Numeric->Lip ;
|
||||
Uip = Numeric->Uip ;
|
||||
Llen = Numeric->Llen ;
|
||||
Ulen = Numeric->Ulen ;
|
||||
LUbx = (Unit **) Numeric->LUbx ;
|
||||
Udiag = Numeric->Udiag ;
|
||||
|
||||
Rs = Numeric->Rs ;
|
||||
Pinv = Numeric->Pinv ;
|
||||
X = (Entry *) Numeric->Xwork ; /* X is of size n */
|
||||
Iwork = Numeric->Iwork ; /* 5*maxblock for KLU_factor */
|
||||
/* 1*maxblock for Pblock */
|
||||
Pblock = Iwork + 5*((size_t) Symbolic->maxblock) ;
|
||||
Common->nrealloc = 0 ;
|
||||
scale = Common->scale ;
|
||||
max_lnz_block = 1 ;
|
||||
max_unz_block = 1 ;
|
||||
|
||||
/* compute the inverse of P from symbolic analysis. Will be updated to
|
||||
* become the inverse of the numerical factorization when the factorization
|
||||
* is done, for use in KLU_refactor */
|
||||
#ifndef NDEBUG
|
||||
for (k = 0 ; k < n ; k++)
|
||||
{
|
||||
Pinv [k] = EMPTY ;
|
||||
}
|
||||
#endif
|
||||
for (k = 0 ; k < n ; k++)
|
||||
{
|
||||
ASSERT (P [k] >= 0 && P [k] < n) ;
|
||||
Pinv [P [k]] = k ;
|
||||
}
|
||||
#ifndef NDEBUG
|
||||
for (k = 0 ; k < n ; k++) ASSERT (Pinv [k] != EMPTY) ;
|
||||
#endif
|
||||
|
||||
lnz = 0 ;
|
||||
unz = 0 ;
|
||||
Common->noffdiag = 0 ;
|
||||
Offp [0] = 0 ;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* optionally check input matrix and compute scale factors */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
if (scale >= 0)
|
||||
{
|
||||
/* use Pnum as workspace. NOTE: scale factors are not yet permuted
|
||||
* according to the final pivot row ordering, so Rs [oldrow] is the
|
||||
* scale factor for A (oldrow,:), for the user's matrix A. Pnum is
|
||||
* used as workspace in KLU_scale. When the factorization is done,
|
||||
* the scale factors are permuted according to the final pivot row
|
||||
* permutation, so that Rs [k] is the scale factor for the kth row of
|
||||
* A(p,q) where p and q are the final row and column permutations. */
|
||||
KLU_scale (scale, n, Ap, Ai, (double *) Ax, Rs, Pnum, Common) ;
|
||||
if (Common->status < KLU_OK)
|
||||
{
|
||||
/* matrix is invalid */
|
||||
return ;
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef NDEBUG
|
||||
if (scale > 0)
|
||||
{
|
||||
for (k = 0 ; k < n ; k++) PRINTF (("Rs [%d] %g\n", k, Rs [k])) ;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* factor each block using klu */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
for (block = 0 ; block < nblocks ; block++)
|
||||
{
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* the block is from rows/columns k1 to k2-1 */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
k1 = R [block] ;
|
||||
k2 = R [block+1] ;
|
||||
nk = k2 - k1 ;
|
||||
PRINTF (("FACTOR BLOCK %d, k1 %d k2-1 %d nk %d\n", block, k1,k2-1,nk)) ;
|
||||
|
||||
if (nk == 1)
|
||||
{
|
||||
|
||||
/* -------------------------------------------------------------- */
|
||||
/* singleton case */
|
||||
/* -------------------------------------------------------------- */
|
||||
|
||||
poff = Offp [k1] ;
|
||||
oldcol = Q [k1] ;
|
||||
pend = Ap [oldcol+1] ;
|
||||
CLEAR (s) ;
|
||||
|
||||
if (scale <= 0)
|
||||
{
|
||||
/* no scaling */
|
||||
for (p = Ap [oldcol] ; p < pend ; p++)
|
||||
{
|
||||
oldrow = Ai [p] ;
|
||||
newrow = Pinv [oldrow] ;
|
||||
if (newrow < k1)
|
||||
{
|
||||
Offi [poff] = oldrow ;
|
||||
Offx [poff] = Ax [p] ;
|
||||
poff++ ;
|
||||
}
|
||||
else
|
||||
{
|
||||
ASSERT (newrow == k1) ;
|
||||
PRINTF (("singleton block %d", block)) ;
|
||||
PRINT_ENTRY (Ax [p]) ;
|
||||
s = Ax [p] ;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* row scaling. NOTE: scale factors are not yet permuted
|
||||
* according to the pivot row permutation, so Rs [oldrow] is
|
||||
* used below. When the factorization is done, the scale
|
||||
* factors are permuted, so that Rs [newrow] will be used in
|
||||
* klu_solve, klu_tsolve, and klu_rgrowth */
|
||||
for (p = Ap [oldcol] ; p < pend ; p++)
|
||||
{
|
||||
oldrow = Ai [p] ;
|
||||
newrow = Pinv [oldrow] ;
|
||||
if (newrow < k1)
|
||||
{
|
||||
Offi [poff] = oldrow ;
|
||||
/* Offx [poff] = Ax [p] / Rs [oldrow] ; */
|
||||
SCALE_DIV_ASSIGN (Offx [poff], Ax [p], Rs [oldrow]) ;
|
||||
poff++ ;
|
||||
}
|
||||
else
|
||||
{
|
||||
ASSERT (newrow == k1) ;
|
||||
PRINTF (("singleton block %d ", block)) ;
|
||||
PRINT_ENTRY (Ax[p]) ;
|
||||
SCALE_DIV_ASSIGN (s, Ax [p], Rs [oldrow]) ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Udiag [k1] = s ;
|
||||
|
||||
if (IS_ZERO (s))
|
||||
{
|
||||
/* singular singleton */
|
||||
Common->status = KLU_SINGULAR ;
|
||||
Common->numerical_rank = k1 ;
|
||||
Common->singular_col = oldcol ;
|
||||
if (Common->halt_if_singular)
|
||||
{
|
||||
return ;
|
||||
}
|
||||
}
|
||||
|
||||
Offp [k1+1] = poff ;
|
||||
Pnum [k1] = P [k1] ;
|
||||
lnz++ ;
|
||||
unz++ ;
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
/* -------------------------------------------------------------- */
|
||||
/* construct and factorize the kth block */
|
||||
/* -------------------------------------------------------------- */
|
||||
|
||||
if (Lnz [block] < 0)
|
||||
{
|
||||
/* COLAMD was used - no estimate of fill-in */
|
||||
/* use 10 times the nnz in A, plus n */
|
||||
lsize = -(Common->initmem) ;
|
||||
}
|
||||
else
|
||||
{
|
||||
lsize = Common->initmem_amd * Lnz [block] + nk ;
|
||||
}
|
||||
|
||||
/* allocates 1 arrays: LUbx [block] */
|
||||
Numeric->LUsize [block] = KLU_kernel_factor (nk, Ap, Ai, Ax, Q,
|
||||
lsize, &LUbx [block], Udiag + k1, Llen + k1, Ulen + k1,
|
||||
Lip + k1, Uip + k1, Pblock, &lnz_block, &unz_block,
|
||||
X, Iwork, k1, Pinv, Rs, Offp, Offi, Offx, Common) ;
|
||||
|
||||
if (Common->status < KLU_OK ||
|
||||
(Common->status == KLU_SINGULAR && Common->halt_if_singular))
|
||||
{
|
||||
/* out of memory, invalid inputs, or singular */
|
||||
return ;
|
||||
}
|
||||
|
||||
PRINTF (("\n----------------------- L %d:\n", block)) ;
|
||||
ASSERT (KLU_valid_LU (nk, TRUE, Lip+k1, Llen+k1, LUbx [block])) ;
|
||||
PRINTF (("\n----------------------- U %d:\n", block)) ;
|
||||
ASSERT (KLU_valid_LU (nk, FALSE, Uip+k1, Ulen+k1, LUbx [block])) ;
|
||||
|
||||
/* -------------------------------------------------------------- */
|
||||
/* get statistics */
|
||||
/* -------------------------------------------------------------- */
|
||||
|
||||
lnz += lnz_block ;
|
||||
unz += unz_block ;
|
||||
max_lnz_block = MAX (max_lnz_block, lnz_block) ;
|
||||
max_unz_block = MAX (max_unz_block, unz_block) ;
|
||||
|
||||
if (Lnz [block] == EMPTY)
|
||||
{
|
||||
/* revise estimate for subsequent factorization */
|
||||
Lnz [block] = MAX (lnz_block, unz_block) ;
|
||||
}
|
||||
|
||||
/* -------------------------------------------------------------- */
|
||||
/* combine the klu row ordering with the symbolic pre-ordering */
|
||||
/* -------------------------------------------------------------- */
|
||||
|
||||
PRINTF (("Pnum, 1-based:\n")) ;
|
||||
for (k = 0 ; k < nk ; k++)
|
||||
{
|
||||
ASSERT (k + k1 < n) ;
|
||||
ASSERT (Pblock [k] + k1 < n) ;
|
||||
Pnum [k + k1] = P [Pblock [k] + k1] ;
|
||||
PRINTF (("Pnum (%d + %d + 1 = %d) = %d + 1 = %d\n",
|
||||
k, k1, k+k1+1, Pnum [k+k1], Pnum [k+k1]+1)) ;
|
||||
}
|
||||
|
||||
/* the local pivot row permutation Pblock is no longer needed */
|
||||
}
|
||||
}
|
||||
ASSERT (nzoff == Offp [n]) ;
|
||||
PRINTF (("\n------------------- Off diagonal entries:\n")) ;
|
||||
ASSERT (KLU_valid (n, Offp, Offi, Offx)) ;
|
||||
|
||||
Numeric->lnz = lnz ;
|
||||
Numeric->unz = unz ;
|
||||
Numeric->max_lnz_block = max_lnz_block ;
|
||||
Numeric->max_unz_block = max_unz_block ;
|
||||
|
||||
/* compute the inverse of Pnum */
|
||||
#ifndef NDEBUG
|
||||
for (k = 0 ; k < n ; k++)
|
||||
{
|
||||
Pinv [k] = EMPTY ;
|
||||
}
|
||||
#endif
|
||||
for (k = 0 ; k < n ; k++)
|
||||
{
|
||||
ASSERT (Pnum [k] >= 0 && Pnum [k] < n) ;
|
||||
Pinv [Pnum [k]] = k ;
|
||||
}
|
||||
#ifndef NDEBUG
|
||||
for (k = 0 ; k < n ; k++) ASSERT (Pinv [k] != EMPTY) ;
|
||||
#endif
|
||||
|
||||
/* permute scale factors Rs according to pivotal row order */
|
||||
if (scale > 0)
|
||||
{
|
||||
for (k = 0 ; k < n ; k++)
|
||||
{
|
||||
REAL (X [k]) = Rs [Pnum [k]] ;
|
||||
}
|
||||
for (k = 0 ; k < n ; k++)
|
||||
{
|
||||
Rs [k] = REAL (X [k]) ;
|
||||
}
|
||||
}
|
||||
|
||||
PRINTF (("\n------------------- Off diagonal entries, old:\n")) ;
|
||||
ASSERT (KLU_valid (n, Offp, Offi, Offx)) ;
|
||||
|
||||
/* apply the pivot row permutations to the off-diagonal entries */
|
||||
for (p = 0 ; p < nzoff ; p++)
|
||||
{
|
||||
ASSERT (Offi [p] >= 0 && Offi [p] < n) ;
|
||||
Offi [p] = Pinv [Offi [p]] ;
|
||||
}
|
||||
|
||||
PRINTF (("\n------------------- Off diagonal entries, new:\n")) ;
|
||||
ASSERT (KLU_valid (n, Offp, Offi, Offx)) ;
|
||||
|
||||
#ifndef NDEBUG
|
||||
{
|
||||
PRINTF (("\n ############# KLU_BTF_FACTOR done, nblocks %d\n",nblocks));
|
||||
Entry ss, *Udiag = Numeric->Udiag ;
|
||||
for (block = 0 ; block < nblocks && Common->status == KLU_OK ; block++)
|
||||
{
|
||||
k1 = R [block] ;
|
||||
k2 = R [block+1] ;
|
||||
nk = k2 - k1 ;
|
||||
PRINTF (("\n======================KLU_factor output: k1 %d k2 %d nk %d\n",k1,k2,nk)) ;
|
||||
if (nk == 1)
|
||||
{
|
||||
PRINTF (("singleton ")) ;
|
||||
/* ENTRY_PRINT (singleton [block]) ; */
|
||||
ss = Udiag [k1] ;
|
||||
PRINT_ENTRY (ss) ;
|
||||
}
|
||||
else
|
||||
{
|
||||
Int *Lip, *Uip, *Llen, *Ulen ;
|
||||
Unit *LU ;
|
||||
Lip = Numeric->Lip + k1 ;
|
||||
Llen = Numeric->Llen + k1 ;
|
||||
LU = (Unit *) Numeric->LUbx [block] ;
|
||||
PRINTF (("\n---- L block %d\n", block));
|
||||
ASSERT (KLU_valid_LU (nk, TRUE, Lip, Llen, LU)) ;
|
||||
Uip = Numeric->Uip + k1 ;
|
||||
Ulen = Numeric->Ulen + k1 ;
|
||||
PRINTF (("\n---- U block %d\n", block)) ;
|
||||
ASSERT (KLU_valid_LU (nk, FALSE, Uip, Ulen, LU)) ;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* ========================================================================== */
|
||||
/* === KLU_factor =========================================================== */
|
||||
/* ========================================================================== */
|
||||
|
||||
KLU_numeric *KLU_factor /* returns NULL if error, or a valid
|
||||
KLU_numeric object if successful */
|
||||
(
|
||||
/* --- inputs --- */
|
||||
Int Ap [ ], /* size n+1, column pointers */
|
||||
Int Ai [ ], /* size nz, row indices */
|
||||
double Ax [ ],
|
||||
KLU_symbolic *Symbolic,
|
||||
/* -------------- */
|
||||
KLU_common *Common
|
||||
)
|
||||
{
|
||||
Int n, nzoff, nblocks, maxblock, k, ok = TRUE ;
|
||||
Int *R ;
|
||||
KLU_numeric *Numeric ;
|
||||
size_t n1, nzoff1, s, b6, n3 ;
|
||||
|
||||
if (Common == NULL)
|
||||
{
|
||||
return (NULL) ;
|
||||
}
|
||||
Common->status = KLU_OK ;
|
||||
Common->numerical_rank = EMPTY ;
|
||||
Common->singular_col = EMPTY ;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* get the contents of the Symbolic object */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
/* check for a valid Symbolic object */
|
||||
if (Symbolic == NULL)
|
||||
{
|
||||
Common->status = KLU_INVALID ;
|
||||
return (NULL) ;
|
||||
}
|
||||
|
||||
n = Symbolic->n ;
|
||||
nzoff = Symbolic->nzoff ;
|
||||
nblocks = Symbolic->nblocks ;
|
||||
maxblock = Symbolic->maxblock ;
|
||||
R = Symbolic->R ;
|
||||
PRINTF (("KLU_factor: n %d nzoff %d nblocks %d maxblock %d\n",
|
||||
n, nzoff, nblocks, maxblock)) ;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* get control parameters and make sure they are in the proper range */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
Common->initmem_amd = MAX (1.0, Common->initmem_amd) ;
|
||||
Common->initmem = MAX (1.0, Common->initmem) ;
|
||||
Common->tol = MIN (Common->tol, 1.0) ;
|
||||
Common->tol = MAX (0.0, Common->tol) ;
|
||||
Common->memgrow = MAX (1.0, Common->memgrow) ;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* allocate the Numeric object */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
/* this will not cause size_t overflow (already checked by KLU_symbolic) */
|
||||
n1 = ((size_t) n) + 1 ;
|
||||
nzoff1 = ((size_t) nzoff) + 1 ;
|
||||
|
||||
Numeric = KLU_malloc (sizeof (KLU_numeric), 1, Common) ;
|
||||
if (Common->status < KLU_OK)
|
||||
{
|
||||
/* out of memory */
|
||||
Common->status = KLU_OUT_OF_MEMORY ;
|
||||
return (NULL) ;
|
||||
}
|
||||
Numeric->n = n ;
|
||||
Numeric->nblocks = nblocks ;
|
||||
Numeric->nzoff = nzoff ;
|
||||
Numeric->Pnum = KLU_malloc (n, sizeof (Int), Common) ;
|
||||
Numeric->Offp = KLU_malloc (n1, sizeof (Int), Common) ;
|
||||
Numeric->Offi = KLU_malloc (nzoff1, sizeof (Int), Common) ;
|
||||
Numeric->Offx = KLU_malloc (nzoff1, sizeof (Entry), Common) ;
|
||||
|
||||
Numeric->Lip = KLU_malloc (n, sizeof (Int), Common) ;
|
||||
Numeric->Uip = KLU_malloc (n, sizeof (Int), Common) ;
|
||||
Numeric->Llen = KLU_malloc (n, sizeof (Int), Common) ;
|
||||
Numeric->Ulen = KLU_malloc (n, sizeof (Int), Common) ;
|
||||
|
||||
Numeric->LUsize = KLU_malloc (nblocks, sizeof (size_t), Common) ;
|
||||
|
||||
Numeric->LUbx = KLU_malloc (nblocks, sizeof (Unit *), Common) ;
|
||||
if (Numeric->LUbx != NULL)
|
||||
{
|
||||
for (k = 0 ; k < nblocks ; k++)
|
||||
{
|
||||
Numeric->LUbx [k] = NULL ;
|
||||
}
|
||||
}
|
||||
|
||||
Numeric->Udiag = KLU_malloc (n, sizeof (Entry), Common) ;
|
||||
|
||||
if (Common->scale > 0)
|
||||
{
|
||||
Numeric->Rs = KLU_malloc (n, sizeof (double), Common) ;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* no scaling */
|
||||
Numeric->Rs = NULL ;
|
||||
}
|
||||
|
||||
Numeric->Pinv = KLU_malloc (n, sizeof (Int), Common) ;
|
||||
|
||||
/* allocate permanent workspace for factorization and solve. Note that the
|
||||
* solver will use an Xwork of size 4n, whereas the factorization codes use
|
||||
* an Xwork of size n and integer space (Iwork) of size 6n. KLU_condest
|
||||
* uses an Xwork of size 2n. Total size is:
|
||||
*
|
||||
* n*sizeof(Entry) + max (6*maxblock*sizeof(Int), 3*n*sizeof(Entry))
|
||||
*/
|
||||
s = KLU_mult_size_t (n, sizeof (Entry), &ok) ;
|
||||
n3 = KLU_mult_size_t (n, 3 * sizeof (Entry), &ok) ;
|
||||
b6 = KLU_mult_size_t (maxblock, 6 * sizeof (Int), &ok) ;
|
||||
Numeric->worksize = KLU_add_size_t (s, MAX (n3, b6), &ok) ;
|
||||
Numeric->Work = KLU_malloc (Numeric->worksize, 1, Common) ;
|
||||
Numeric->Xwork = Numeric->Work ;
|
||||
Numeric->Iwork = (Int *) ((Entry *) Numeric->Xwork + n) ;
|
||||
if (!ok || Common->status < KLU_OK)
|
||||
{
|
||||
/* out of memory or problem too large */
|
||||
Common->status = ok ? KLU_OUT_OF_MEMORY : KLU_TOO_LARGE ;
|
||||
KLU_free_numeric (&Numeric, Common) ;
|
||||
return (NULL) ;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* factorize the blocks */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
factor2 (Ap, Ai, (Entry *) Ax, Symbolic, Numeric, Common) ;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* return or free the Numeric object */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
if (Common->status < KLU_OK)
|
||||
{
|
||||
/* out of memory or inputs invalid */
|
||||
KLU_free_numeric (&Numeric, Common) ;
|
||||
}
|
||||
else if (Common->status == KLU_SINGULAR)
|
||||
{
|
||||
if (Common->halt_if_singular)
|
||||
{
|
||||
/* Matrix is singular, and the Numeric object is only partially
|
||||
* defined because we halted early. This is the default case for
|
||||
* a singular matrix. */
|
||||
KLU_free_numeric (&Numeric, Common) ;
|
||||
}
|
||||
}
|
||||
else if (Common->status == KLU_OK)
|
||||
{
|
||||
/* successful non-singular factorization */
|
||||
Common->numerical_rank = n ;
|
||||
Common->singular_col = n ;
|
||||
}
|
||||
return (Numeric) ;
|
||||
}
|
||||
|
|
@ -0,0 +1,71 @@
|
|||
/* ========================================================================== */
|
||||
/* === KLU_free_numeric ===================================================== */
|
||||
/* ========================================================================== */
|
||||
|
||||
/* Free the KLU Numeric object. */
|
||||
|
||||
#include "klu_internal.h"
|
||||
|
||||
Int KLU_free_numeric
|
||||
(
|
||||
KLU_numeric **NumericHandle,
|
||||
KLU_common *Common
|
||||
)
|
||||
{
|
||||
KLU_numeric *Numeric ;
|
||||
Unit **LUbx ;
|
||||
size_t *LUsize ;
|
||||
Int block, n, nzoff, nblocks ;
|
||||
|
||||
if (Common == NULL)
|
||||
{
|
||||
return (FALSE) ;
|
||||
}
|
||||
if (NumericHandle == NULL || *NumericHandle == NULL)
|
||||
{
|
||||
return (TRUE) ;
|
||||
}
|
||||
|
||||
Numeric = *NumericHandle ;
|
||||
|
||||
n = Numeric->n ;
|
||||
nzoff = Numeric->nzoff ;
|
||||
nblocks = Numeric->nblocks ;
|
||||
LUsize = Numeric->LUsize ;
|
||||
|
||||
LUbx = (Unit **) Numeric->LUbx ;
|
||||
if (LUbx != NULL)
|
||||
{
|
||||
for (block = 0 ; block < nblocks ; block++)
|
||||
{
|
||||
KLU_free (LUbx [block], LUsize ? LUsize [block] : 0,
|
||||
sizeof (Unit), Common) ;
|
||||
}
|
||||
}
|
||||
|
||||
KLU_free (Numeric->Pnum, n, sizeof (Int), Common) ;
|
||||
KLU_free (Numeric->Offp, n+1, sizeof (Int), Common) ;
|
||||
KLU_free (Numeric->Offi, nzoff+1, sizeof (Int), Common) ;
|
||||
KLU_free (Numeric->Offx, nzoff+1, sizeof (Entry), Common) ;
|
||||
|
||||
KLU_free (Numeric->Lip, n, sizeof (Int), Common) ;
|
||||
KLU_free (Numeric->Llen, n, sizeof (Int), Common) ;
|
||||
KLU_free (Numeric->Uip, n, sizeof (Int), Common) ;
|
||||
KLU_free (Numeric->Ulen, n, sizeof (Int), Common) ;
|
||||
|
||||
KLU_free (Numeric->LUsize, nblocks, sizeof (size_t), Common) ;
|
||||
|
||||
KLU_free (Numeric->LUbx, nblocks, sizeof (Unit *), Common) ;
|
||||
|
||||
KLU_free (Numeric->Udiag, n, sizeof (Entry), Common) ;
|
||||
|
||||
KLU_free (Numeric->Rs, n, sizeof (double), Common) ;
|
||||
KLU_free (Numeric->Pinv, n, sizeof (Int), Common) ;
|
||||
|
||||
KLU_free (Numeric->Work, Numeric->worksize, 1, Common) ;
|
||||
|
||||
KLU_free (Numeric, 1, sizeof (KLU_numeric), Common) ;
|
||||
|
||||
*NumericHandle = NULL ;
|
||||
return (TRUE) ;
|
||||
}
|
||||
|
|
@ -0,0 +1,34 @@
|
|||
/* ========================================================================== */
|
||||
/* === KLU_free_symbolic ==================================================== */
|
||||
/* ========================================================================== */
|
||||
|
||||
/* Free the KLU Symbolic object. */
|
||||
|
||||
#include "klu_internal.h"
|
||||
|
||||
Int KLU_free_symbolic
|
||||
(
|
||||
KLU_symbolic **SymbolicHandle,
|
||||
KLU_common *Common
|
||||
)
|
||||
{
|
||||
KLU_symbolic *Symbolic ;
|
||||
Int n ;
|
||||
if (Common == NULL)
|
||||
{
|
||||
return (FALSE) ;
|
||||
}
|
||||
if (SymbolicHandle == NULL || *SymbolicHandle == NULL)
|
||||
{
|
||||
return (TRUE) ;
|
||||
}
|
||||
Symbolic = *SymbolicHandle ;
|
||||
n = Symbolic->n ;
|
||||
KLU_free (Symbolic->P, n, sizeof (Int), Common) ;
|
||||
KLU_free (Symbolic->Q, n, sizeof (Int), Common) ;
|
||||
KLU_free (Symbolic->R, n+1, sizeof (Int), Common) ;
|
||||
KLU_free (Symbolic->Lnz, n, sizeof (double), Common) ;
|
||||
KLU_free (Symbolic, 1, sizeof (KLU_symbolic), Common) ;
|
||||
*SymbolicHandle = NULL ;
|
||||
return (TRUE) ;
|
||||
}
|
||||
|
|
@ -0,0 +1,243 @@
|
|||
/* ========================================================================== */
|
||||
/* === KLU/Include/klu_internal.h =========================================== */
|
||||
/* ========================================================================== */
|
||||
|
||||
/* For internal use in KLU routines only, not for user programs */
|
||||
|
||||
#ifndef _KLU_INTERNAL_H
|
||||
#define _KLU_INTERNAL_H
|
||||
|
||||
#include "klu.h"
|
||||
#include "btf.h"
|
||||
#include "klu_version.h"
|
||||
|
||||
/* ========================================================================== */
|
||||
/* make sure debugging and printing is turned off */
|
||||
|
||||
#ifndef NDEBUG
|
||||
#define NDEBUG
|
||||
#endif
|
||||
#ifndef NPRINT
|
||||
#define NPRINT
|
||||
#endif
|
||||
|
||||
/* To enable debugging and assertions, uncomment this line:
|
||||
#undef NDEBUG
|
||||
*/
|
||||
|
||||
/* To enable diagnostic printing, uncomment this line:
|
||||
#undef NPRINT
|
||||
*/
|
||||
|
||||
/* ========================================================================== */
|
||||
|
||||
#include <stdio.h>
|
||||
#include <assert.h>
|
||||
#include <limits.h>
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
|
||||
#undef ASSERT
|
||||
#ifndef NDEBUG
|
||||
#define ASSERT(a) assert(a)
|
||||
#else
|
||||
#define ASSERT(a)
|
||||
#endif
|
||||
|
||||
#define SCALAR_IS_NAN(x) ((x) != (x))
|
||||
|
||||
/* true if an integer (stored in double x) would overflow (or if x is NaN) */
|
||||
#define INT_OVERFLOW(x) ((!((x) * (1.0+1e-8) <= (double) INT_MAX)) \
|
||||
|| SCALAR_IS_NAN (x))
|
||||
|
||||
#undef TRUE
|
||||
#undef FALSE
|
||||
#undef MAX
|
||||
#undef MIN
|
||||
#undef PRINTF
|
||||
#undef FLIP
|
||||
|
||||
#ifndef NPRINT
|
||||
#define PRINTF(s) { printf s ; } ;
|
||||
#else
|
||||
#define PRINTF(s)
|
||||
#endif
|
||||
|
||||
#define TRUE 1
|
||||
#define FALSE 0
|
||||
#define MAX(a,b) (((a) > (b)) ? (a) : (b))
|
||||
#define MIN(a,b) (((a) < (b)) ? (a) : (b))
|
||||
|
||||
/* FLIP is a "negation about -1", and is used to mark an integer i that is
|
||||
* normally non-negative. FLIP (EMPTY) is EMPTY. FLIP of a number > EMPTY
|
||||
* is negative, and FLIP of a number < EMTPY is positive. FLIP (FLIP (i)) = i
|
||||
* for all integers i. UNFLIP (i) is >= EMPTY. */
|
||||
#define EMPTY (-1)
|
||||
#define FLIP(i) (-(i)-2)
|
||||
#define UNFLIP(i) (((i) < EMPTY) ? FLIP (i) : (i))
|
||||
|
||||
|
||||
size_t KLU_kernel /* final size of LU on output */
|
||||
(
|
||||
/* input, not modified */
|
||||
Int n, /* A is n-by-n */
|
||||
Int Ap [ ], /* size n+1, column pointers for A */
|
||||
Int Ai [ ], /* size nz = Ap [n], row indices for A */
|
||||
Entry Ax [ ], /* size nz, values of A */
|
||||
Int Q [ ], /* size n, optional input permutation */
|
||||
size_t lusize, /* initial size of LU */
|
||||
|
||||
/* output, not defined on input */
|
||||
Int Pinv [ ], /* size n */
|
||||
Int P [ ], /* size n */
|
||||
Unit **p_LU, /* size lusize on input, size Uxp[n] on output*/
|
||||
Entry Udiag [ ], /* size n, diagonal of U */
|
||||
Int Llen [ ], /* size n, column length of L */
|
||||
Int Ulen [ ], /* size n, column length of U */
|
||||
Int Lip [ ], /* size n+1 */
|
||||
Int Uip [ ], /* size n+1 */
|
||||
Int *lnz, /* size of L */
|
||||
Int *unz, /* size of U */
|
||||
|
||||
/* workspace, not defined on input */
|
||||
Entry X [ ], /* size n, zero on output */
|
||||
|
||||
/* workspace, not defined on input or output */
|
||||
Int Stack [ ], /* size n */
|
||||
Int Flag [ ], /* size n */
|
||||
Int adj_pos [ ], /* size n */
|
||||
|
||||
/* workspace for pruning only */
|
||||
Int Lpend [ ], /* size n workspace */
|
||||
|
||||
/* inputs, not modified on output */
|
||||
Int k1, /* the block of A is from k1 to k2-1 */
|
||||
Int PSinv [ ], /* inverse of P from symbolic factorization */
|
||||
double Rs [ ], /* scale factors for A */
|
||||
|
||||
/* inputs, modified on output */
|
||||
Int Offp [ ], /* off-diagonal matrix (modified by this routine) */
|
||||
Int Offi [ ],
|
||||
Entry Offx [ ],
|
||||
KLU_common *Common /* the control input/output structure */
|
||||
) ;
|
||||
|
||||
|
||||
size_t KLU_kernel_factor /* 0 if failure, size of LU if OK */
|
||||
(
|
||||
/* inputs, not modified */
|
||||
Int n, /* A is n-by-n. n must be > 0. */
|
||||
Int Ap [ ], /* size n+1, column pointers for A */
|
||||
Int Ai [ ], /* size nz = Ap [n], row indices for A */
|
||||
Entry Ax [ ], /* size nz, values of A */
|
||||
Int Q [ ], /* size n, optional column permutation */
|
||||
double Lsize, /* initial size of L and U */
|
||||
|
||||
/* outputs, not defined on input */
|
||||
Unit **p_LU, /* row indices and values of L and U */
|
||||
Entry Udiag [ ], /* size n, diagonal of U */
|
||||
Int Llen [ ], /* size n, column length of L */
|
||||
Int Ulen [ ], /* size n, column length of U */
|
||||
Int Lip [ ], /* size n+1, column pointers of L */
|
||||
Int Uip [ ], /* size n+1, column pointers of U */
|
||||
Int P [ ], /* row permutation, size n */
|
||||
Int *lnz, /* size of L */
|
||||
Int *unz, /* size of U */
|
||||
|
||||
/* workspace, undefined on input */
|
||||
Entry *X, /* size n entries. Zero on output */
|
||||
Int *Work, /* size 5n Int's */
|
||||
|
||||
/* inputs, not modified on output */
|
||||
Int k1, /* the block of A is from k1 to k2-1 */
|
||||
Int PSinv [ ], /* inverse of P from symbolic factorization */
|
||||
double Rs [ ], /* scale factors for A */
|
||||
|
||||
/* inputs, modified on output */
|
||||
Int Offp [ ], /* off-diagonal matrix (modified by this routine) */
|
||||
Int Offi [ ],
|
||||
Entry Offx [ ],
|
||||
KLU_common *Common /* the control input/output structure */
|
||||
) ;
|
||||
|
||||
void KLU_lsolve
|
||||
(
|
||||
/* inputs, not modified: */
|
||||
Int n,
|
||||
Int Lp [ ],
|
||||
Int Li [ ],
|
||||
Unit LU [ ],
|
||||
Int nrhs,
|
||||
/* right-hand-side on input, solution to Lx=b on output */
|
||||
Entry X [ ]
|
||||
) ;
|
||||
|
||||
void KLU_ltsolve
|
||||
(
|
||||
/* inputs, not modified: */
|
||||
Int n,
|
||||
Int Lp [ ],
|
||||
Int Li [ ],
|
||||
Unit LU [ ],
|
||||
Int nrhs,
|
||||
#ifdef COMPLEX
|
||||
Int conj_solve,
|
||||
#endif
|
||||
/* right-hand-side on input, solution to L'x=b on output */
|
||||
Entry X [ ]
|
||||
) ;
|
||||
|
||||
|
||||
void KLU_usolve
|
||||
(
|
||||
/* inputs, not modified: */
|
||||
Int n,
|
||||
Int Up [ ],
|
||||
Int Ui [ ],
|
||||
Unit LU [ ],
|
||||
Entry Udiag [ ],
|
||||
Int nrhs,
|
||||
/* right-hand-side on input, solution to Ux=b on output */
|
||||
Entry X [ ]
|
||||
) ;
|
||||
|
||||
void KLU_utsolve
|
||||
(
|
||||
/* inputs, not modified: */
|
||||
Int n,
|
||||
Int Up [ ],
|
||||
Int Ui [ ],
|
||||
Unit LU [ ],
|
||||
Entry Udiag [ ],
|
||||
Int nrhs,
|
||||
#ifdef COMPLEX
|
||||
Int conj_solve,
|
||||
#endif
|
||||
/* right-hand-side on input, solution to U'x=b on output */
|
||||
Entry X [ ]
|
||||
) ;
|
||||
|
||||
Int KLU_valid
|
||||
(
|
||||
Int n,
|
||||
Int Ap [ ],
|
||||
Int Ai [ ],
|
||||
Entry Ax [ ]
|
||||
) ;
|
||||
|
||||
Int KLU_valid_LU
|
||||
(
|
||||
Int n,
|
||||
Int flag_test_start_ptr,
|
||||
Int Xip [ ],
|
||||
Int Xlen [ ],
|
||||
Unit LU [ ]
|
||||
);
|
||||
|
||||
size_t KLU_add_size_t (size_t a, size_t b, Int *ok) ;
|
||||
|
||||
size_t KLU_mult_size_t (size_t a, size_t k, Int *ok) ;
|
||||
|
||||
KLU_symbolic *KLU_alloc_symbolic (Int n, Int *Ap, Int *Ai, KLU_common *Common) ;
|
||||
|
||||
#endif
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,225 @@
|
|||
/* ========================================================================== */
|
||||
/* === KLU_memory =========================================================== */
|
||||
/* ========================================================================== */
|
||||
|
||||
/* KLU memory management routines:
|
||||
*
|
||||
* KLU_malloc malloc wrapper
|
||||
* KLU_free free wrapper
|
||||
* KLU_realloc realloc wrapper
|
||||
*/
|
||||
|
||||
#include "klu_internal.h"
|
||||
|
||||
/* ========================================================================== */
|
||||
/* === KLU_add_size_t ======================================================= */
|
||||
/* ========================================================================== */
|
||||
|
||||
/* Safely compute a+b, and check for size_t overflow */
|
||||
|
||||
size_t KLU_add_size_t (size_t a, size_t b, Int *ok)
|
||||
{
|
||||
(*ok) = (*ok) && ((a + b) >= MAX (a,b)) ;
|
||||
return ((*ok) ? (a + b) : ((size_t) -1)) ;
|
||||
}
|
||||
|
||||
/* ========================================================================== */
|
||||
/* === KLU_mult_size_t ====================================================== */
|
||||
/* ========================================================================== */
|
||||
|
||||
/* Safely compute a*k, where k should be small, and check for size_t overflow */
|
||||
|
||||
size_t KLU_mult_size_t (size_t a, size_t k, Int *ok)
|
||||
{
|
||||
size_t i, s = 0 ;
|
||||
for (i = 0 ; i < k ; i++)
|
||||
{
|
||||
s = KLU_add_size_t (s, a, ok) ;
|
||||
}
|
||||
return ((*ok) ? s : ((size_t) -1)) ;
|
||||
}
|
||||
|
||||
/* ========================================================================== */
|
||||
/* === KLU_malloc =========================================================== */
|
||||
/* ========================================================================== */
|
||||
|
||||
/* Wrapper around malloc routine (mxMalloc for a mexFunction). Allocates
|
||||
* space of size MAX(1,n)*size, where size is normally a sizeof (...).
|
||||
*
|
||||
* This routine and KLU_realloc do not set Common->status to KLU_OK on success,
|
||||
* so that a sequence of KLU_malloc's or KLU_realloc's can be used. If any of
|
||||
* them fails, the Common->status will hold the most recent error status.
|
||||
*
|
||||
* Usage, for a pointer to Int:
|
||||
*
|
||||
* p = KLU_malloc (n, sizeof (Int), Common)
|
||||
*
|
||||
* Uses a pointer to the malloc routine (or its equivalent) defined in Common.
|
||||
*/
|
||||
|
||||
void *KLU_malloc /* returns pointer to the newly malloc'd block */
|
||||
(
|
||||
/* ---- input ---- */
|
||||
size_t n, /* number of items */
|
||||
size_t size, /* size of each item */
|
||||
/* --------------- */
|
||||
KLU_common *Common
|
||||
)
|
||||
{
|
||||
void *p ;
|
||||
size_t s ;
|
||||
Int ok = TRUE ;
|
||||
|
||||
if (Common == NULL)
|
||||
{
|
||||
p = NULL ;
|
||||
}
|
||||
else if (size == 0)
|
||||
{
|
||||
/* size must be > 0 */
|
||||
Common->status = KLU_INVALID ;
|
||||
p = NULL ;
|
||||
}
|
||||
else if (n >= INT_MAX)
|
||||
{
|
||||
/* object is too big to allocate; p[i] where i is an Int will not
|
||||
* be enough. */
|
||||
Common->status = KLU_TOO_LARGE ;
|
||||
p = NULL ;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* call malloc, or its equivalent */
|
||||
s = KLU_mult_size_t (MAX (1,n), size, &ok) ;
|
||||
p = ok ? ((Common->malloc_memory) (s)) : NULL ;
|
||||
if (p == NULL)
|
||||
{
|
||||
/* failure: out of memory */
|
||||
Common->status = KLU_OUT_OF_MEMORY ;
|
||||
}
|
||||
else
|
||||
{
|
||||
Common->memusage += s ;
|
||||
Common->mempeak = MAX (Common->mempeak, Common->memusage) ;
|
||||
}
|
||||
}
|
||||
return (p) ;
|
||||
}
|
||||
|
||||
|
||||
/* ========================================================================== */
|
||||
/* === KLU_free ============================================================= */
|
||||
/* ========================================================================== */
|
||||
|
||||
/* Wrapper around free routine (mxFree for a mexFunction). Returns NULL,
|
||||
* which can be assigned to the pointer being freed, as in:
|
||||
*
|
||||
* p = KLU_free (p, n, sizeof (int), Common) ;
|
||||
*/
|
||||
|
||||
void *KLU_free /* always returns NULL */
|
||||
(
|
||||
/* ---- in/out --- */
|
||||
void *p, /* block of memory to free */
|
||||
/* ---- input --- */
|
||||
size_t n, /* size of block to free, in # of items */
|
||||
size_t size, /* size of each item */
|
||||
/* --------------- */
|
||||
KLU_common *Common
|
||||
)
|
||||
{
|
||||
size_t s ;
|
||||
Int ok = TRUE ;
|
||||
if (p != NULL && Common != NULL)
|
||||
{
|
||||
/* only free the object if the pointer is not NULL */
|
||||
/* call free, or its equivalent */
|
||||
(Common->free_memory) (p) ;
|
||||
s = KLU_mult_size_t (MAX (1,n), size, &ok) ;
|
||||
Common->memusage -= s ;
|
||||
}
|
||||
/* return NULL, and the caller should assign this to p. This avoids
|
||||
* freeing the same pointer twice. */
|
||||
return (NULL) ;
|
||||
}
|
||||
|
||||
|
||||
/* ========================================================================== */
|
||||
/* === KLU_realloc ========================================================== */
|
||||
/* ========================================================================== */
|
||||
|
||||
/* Wrapper around realloc routine (mxRealloc for a mexFunction). Given a
|
||||
* pointer p to a block allocated by KLU_malloc, it changes the size of the
|
||||
* block pointed to by p to be MAX(1,nnew)*size in size. It may return a
|
||||
* pointer different than p. This should be used as (for a pointer to Int):
|
||||
*
|
||||
* p = KLU_realloc (nnew, nold, sizeof (Int), p, Common) ;
|
||||
*
|
||||
* If p is NULL, this is the same as p = KLU_malloc (...).
|
||||
* A size of nnew=0 is treated as nnew=1.
|
||||
*
|
||||
* If the realloc fails, p is returned unchanged and Common->status is set
|
||||
* to KLU_OUT_OF_MEMORY. If successful, Common->status is not modified,
|
||||
* and p is returned (possibly changed) and pointing to a large block of memory.
|
||||
*
|
||||
* Uses a pointer to the realloc routine (or its equivalent) defined in Common.
|
||||
*/
|
||||
|
||||
void *KLU_realloc /* returns pointer to reallocated block */
|
||||
(
|
||||
/* ---- input ---- */
|
||||
size_t nnew, /* requested # of items in reallocated block */
|
||||
size_t nold, /* old # of items */
|
||||
size_t size, /* size of each item */
|
||||
/* ---- in/out --- */
|
||||
void *p, /* block of memory to realloc */
|
||||
/* --------------- */
|
||||
KLU_common *Common
|
||||
)
|
||||
{
|
||||
void *pnew ;
|
||||
size_t snew, sold ;
|
||||
Int ok = TRUE ;
|
||||
|
||||
if (Common == NULL)
|
||||
{
|
||||
p = NULL ;
|
||||
}
|
||||
else if (size == 0)
|
||||
{
|
||||
/* size must be > 0 */
|
||||
Common->status = KLU_INVALID ;
|
||||
p = NULL ;
|
||||
}
|
||||
else if (p == NULL)
|
||||
{
|
||||
/* A fresh object is being allocated. */
|
||||
p = KLU_malloc (nnew, size, Common) ;
|
||||
}
|
||||
else if (nnew >= INT_MAX)
|
||||
{
|
||||
/* failure: nnew is too big. Do not change p */
|
||||
Common->status = KLU_TOO_LARGE ;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* The object exists, and is changing to some other nonzero size. */
|
||||
/* call realloc, or its equivalent */
|
||||
snew = KLU_mult_size_t (MAX (1,nnew), size, &ok) ;
|
||||
sold = KLU_mult_size_t (MAX (1,nold), size, &ok) ;
|
||||
pnew = ok ? ((Common->realloc_memory) (p, snew)) : NULL ;
|
||||
if (pnew == NULL)
|
||||
{
|
||||
/* Do not change p, since it still points to allocated memory */
|
||||
Common->status = KLU_OUT_OF_MEMORY ;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* success: return the new p and change the size of the block */
|
||||
Common->memusage += (snew - sold) ;
|
||||
Common->mempeak = MAX (Common->mempeak, Common->memusage) ;
|
||||
p = pnew ;
|
||||
}
|
||||
}
|
||||
return (p) ;
|
||||
}
|
||||
|
|
@ -0,0 +1,478 @@
|
|||
/* ========================================================================== */
|
||||
/* === KLU_refactor ========================================================= */
|
||||
/* ========================================================================== */
|
||||
|
||||
/* Factor the matrix, after ordering and analyzing it with KLU_analyze, and
|
||||
* factoring it once with KLU_factor. This routine cannot do any numerical
|
||||
* pivoting. The pattern of the input matrix (Ap, Ai) must be identical to
|
||||
* the pattern given to KLU_factor.
|
||||
*/
|
||||
|
||||
#include "klu_internal.h"
|
||||
|
||||
|
||||
/* ========================================================================== */
|
||||
/* === KLU_refactor ========================================================= */
|
||||
/* ========================================================================== */
|
||||
|
||||
Int KLU_refactor /* returns TRUE if successful, FALSE otherwise */
|
||||
(
|
||||
/* inputs, not modified */
|
||||
Int Ap [ ], /* size n+1, column pointers */
|
||||
Int Ai [ ], /* size nz, row indices */
|
||||
double Ax [ ],
|
||||
KLU_symbolic *Symbolic,
|
||||
|
||||
/* input/output */
|
||||
KLU_numeric *Numeric,
|
||||
KLU_common *Common
|
||||
)
|
||||
{
|
||||
Entry ukk, ujk, s ;
|
||||
Entry *Offx, *Lx, *Ux, *X, *Az, *Udiag ;
|
||||
double *Rs ;
|
||||
Int *P, *Q, *R, *Pnum, *Offp, *Offi, *Ui, *Li, *Pinv, *Lip, *Uip, *Llen,
|
||||
*Ulen ;
|
||||
Unit **LUbx ;
|
||||
Unit *LU ;
|
||||
Int k1, k2, nk, k, block, oldcol, pend, oldrow, n, p, newrow, scale,
|
||||
nblocks, poff, i, j, up, ulen, llen, maxblock, nzoff ;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* check inputs */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
if (Common == NULL)
|
||||
{
|
||||
return (FALSE) ;
|
||||
}
|
||||
Common->status = KLU_OK ;
|
||||
|
||||
if (Numeric == NULL)
|
||||
{
|
||||
/* invalid Numeric object */
|
||||
Common->status = KLU_INVALID ;
|
||||
return (FALSE) ;
|
||||
}
|
||||
|
||||
Common->numerical_rank = EMPTY ;
|
||||
Common->singular_col = EMPTY ;
|
||||
|
||||
Az = (Entry *) Ax ;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* get the contents of the Symbolic object */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
n = Symbolic->n ;
|
||||
P = Symbolic->P ;
|
||||
Q = Symbolic->Q ;
|
||||
R = Symbolic->R ;
|
||||
nblocks = Symbolic->nblocks ;
|
||||
maxblock = Symbolic->maxblock ;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* get the contents of the Numeric object */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
Pnum = Numeric->Pnum ;
|
||||
Offp = Numeric->Offp ;
|
||||
Offi = Numeric->Offi ;
|
||||
Offx = (Entry *) Numeric->Offx ;
|
||||
|
||||
LUbx = (Unit **) Numeric->LUbx ;
|
||||
|
||||
scale = Common->scale ;
|
||||
if (scale > 0)
|
||||
{
|
||||
/* factorization was not scaled, but refactorization is scaled */
|
||||
if (Numeric->Rs == NULL)
|
||||
{
|
||||
Numeric->Rs = KLU_malloc (n, sizeof (double), Common) ;
|
||||
if (Common->status < KLU_OK)
|
||||
{
|
||||
Common->status = KLU_OUT_OF_MEMORY ;
|
||||
return (FALSE) ;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* no scaling for refactorization; ensure Numeric->Rs is freed. This
|
||||
* does nothing if Numeric->Rs is already NULL. */
|
||||
Numeric->Rs = KLU_free (Numeric->Rs, n, sizeof (double), Common) ;
|
||||
}
|
||||
Rs = Numeric->Rs ;
|
||||
|
||||
Pinv = Numeric->Pinv ;
|
||||
X = (Entry *) Numeric->Xwork ;
|
||||
Common->nrealloc = 0 ;
|
||||
Udiag = Numeric->Udiag ;
|
||||
nzoff = Symbolic->nzoff ;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* check the input matrix compute the row scale factors, Rs */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
/* do no scale, or check the input matrix, if scale < 0 */
|
||||
if (scale >= 0)
|
||||
{
|
||||
/* check for out-of-range indices, but do not check for duplicates */
|
||||
if (!KLU_scale (scale, n, Ap, Ai, Ax, Rs, NULL, Common))
|
||||
{
|
||||
return (FALSE) ;
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* clear workspace X */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
for (k = 0 ; k < maxblock ; k++)
|
||||
{
|
||||
/* X [k] = 0 */
|
||||
CLEAR (X [k]) ;
|
||||
}
|
||||
|
||||
poff = 0 ;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* factor each block */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
if (scale <= 0)
|
||||
{
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* no scaling */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
for (block = 0 ; block < nblocks ; block++)
|
||||
{
|
||||
|
||||
/* -------------------------------------------------------------- */
|
||||
/* the block is from rows/columns k1 to k2-1 */
|
||||
/* -------------------------------------------------------------- */
|
||||
|
||||
k1 = R [block] ;
|
||||
k2 = R [block+1] ;
|
||||
nk = k2 - k1 ;
|
||||
|
||||
if (nk == 1)
|
||||
{
|
||||
|
||||
/* ---------------------------------------------------------- */
|
||||
/* singleton case */
|
||||
/* ---------------------------------------------------------- */
|
||||
|
||||
oldcol = Q [k1] ;
|
||||
pend = Ap [oldcol+1] ;
|
||||
CLEAR (s) ;
|
||||
for (p = Ap [oldcol] ; p < pend ; p++)
|
||||
{
|
||||
newrow = Pinv [Ai [p]] - k1 ;
|
||||
if (newrow < 0 && poff < nzoff)
|
||||
{
|
||||
/* entry in off-diagonal block */
|
||||
Offx [poff] = Az [p] ;
|
||||
poff++ ;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* singleton */
|
||||
s = Az [p] ;
|
||||
}
|
||||
}
|
||||
Udiag [k1] = s ;
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
/* ---------------------------------------------------------- */
|
||||
/* construct and factor the kth block */
|
||||
/* ---------------------------------------------------------- */
|
||||
|
||||
Lip = Numeric->Lip + k1 ;
|
||||
Llen = Numeric->Llen + k1 ;
|
||||
Uip = Numeric->Uip + k1 ;
|
||||
Ulen = Numeric->Ulen + k1 ;
|
||||
LU = LUbx [block] ;
|
||||
|
||||
for (k = 0 ; k < nk ; k++)
|
||||
{
|
||||
|
||||
/* ------------------------------------------------------ */
|
||||
/* scatter kth column of the block into workspace X */
|
||||
/* ------------------------------------------------------ */
|
||||
|
||||
oldcol = Q [k+k1] ;
|
||||
pend = Ap [oldcol+1] ;
|
||||
for (p = Ap [oldcol] ; p < pend ; p++)
|
||||
{
|
||||
newrow = Pinv [Ai [p]] - k1 ;
|
||||
if (newrow < 0 && poff < nzoff)
|
||||
{
|
||||
/* entry in off-diagonal block */
|
||||
Offx [poff] = Az [p] ;
|
||||
poff++ ;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* (newrow,k) is an entry in the block */
|
||||
X [newrow] = Az [p] ;
|
||||
}
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------ */
|
||||
/* compute kth column of U, and update kth column of A */
|
||||
/* ------------------------------------------------------ */
|
||||
|
||||
GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, ulen) ;
|
||||
for (up = 0 ; up < ulen ; up++)
|
||||
{
|
||||
j = Ui [up] ;
|
||||
ujk = X [j] ;
|
||||
/* X [j] = 0 */
|
||||
CLEAR (X [j]) ;
|
||||
Ux [up] = ujk ;
|
||||
GET_POINTER (LU, Lip, Llen, Li, Lx, j, llen) ;
|
||||
for (p = 0 ; p < llen ; p++)
|
||||
{
|
||||
/* X [Li [p]] -= Lx [p] * ujk */
|
||||
MULT_SUB (X [Li [p]], Lx [p], ujk) ;
|
||||
}
|
||||
}
|
||||
/* get the diagonal entry of U */
|
||||
ukk = X [k] ;
|
||||
/* X [k] = 0 */
|
||||
CLEAR (X [k]) ;
|
||||
/* singular case */
|
||||
if (IS_ZERO (ukk))
|
||||
{
|
||||
/* matrix is numerically singular */
|
||||
Common->status = KLU_SINGULAR ;
|
||||
if (Common->numerical_rank == EMPTY)
|
||||
{
|
||||
Common->numerical_rank = k+k1 ;
|
||||
Common->singular_col = Q [k+k1] ;
|
||||
}
|
||||
if (Common->halt_if_singular)
|
||||
{
|
||||
/* do not continue the factorization */
|
||||
return (FALSE) ;
|
||||
}
|
||||
}
|
||||
Udiag [k+k1] = ukk ;
|
||||
/* gather and divide by pivot to get kth column of L */
|
||||
GET_POINTER (LU, Lip, Llen, Li, Lx, k, llen) ;
|
||||
for (p = 0 ; p < llen ; p++)
|
||||
{
|
||||
i = Li [p] ;
|
||||
DIV (Lx [p], X [i], ukk) ;
|
||||
CLEAR (X [i]) ;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* scaling */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
for (block = 0 ; block < nblocks ; block++)
|
||||
{
|
||||
|
||||
/* -------------------------------------------------------------- */
|
||||
/* the block is from rows/columns k1 to k2-1 */
|
||||
/* -------------------------------------------------------------- */
|
||||
|
||||
k1 = R [block] ;
|
||||
k2 = R [block+1] ;
|
||||
nk = k2 - k1 ;
|
||||
|
||||
if (nk == 1)
|
||||
{
|
||||
|
||||
/* ---------------------------------------------------------- */
|
||||
/* singleton case */
|
||||
/* ---------------------------------------------------------- */
|
||||
|
||||
oldcol = Q [k1] ;
|
||||
pend = Ap [oldcol+1] ;
|
||||
CLEAR (s) ;
|
||||
for (p = Ap [oldcol] ; p < pend ; p++)
|
||||
{
|
||||
oldrow = Ai [p] ;
|
||||
newrow = Pinv [oldrow] - k1 ;
|
||||
if (newrow < 0 && poff < nzoff)
|
||||
{
|
||||
/* entry in off-diagonal block */
|
||||
/* Offx [poff] = Az [p] / Rs [oldrow] */
|
||||
SCALE_DIV_ASSIGN (Offx [poff], Az [p], Rs [oldrow]) ;
|
||||
poff++ ;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* singleton */
|
||||
/* s = Az [p] / Rs [oldrow] */
|
||||
SCALE_DIV_ASSIGN (s, Az [p], Rs [oldrow]) ;
|
||||
}
|
||||
}
|
||||
Udiag [k1] = s ;
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
/* ---------------------------------------------------------- */
|
||||
/* construct and factor the kth block */
|
||||
/* ---------------------------------------------------------- */
|
||||
|
||||
Lip = Numeric->Lip + k1 ;
|
||||
Llen = Numeric->Llen + k1 ;
|
||||
Uip = Numeric->Uip + k1 ;
|
||||
Ulen = Numeric->Ulen + k1 ;
|
||||
LU = LUbx [block] ;
|
||||
|
||||
for (k = 0 ; k < nk ; k++)
|
||||
{
|
||||
|
||||
/* ------------------------------------------------------ */
|
||||
/* scatter kth column of the block into workspace X */
|
||||
/* ------------------------------------------------------ */
|
||||
|
||||
oldcol = Q [k+k1] ;
|
||||
pend = Ap [oldcol+1] ;
|
||||
for (p = Ap [oldcol] ; p < pend ; p++)
|
||||
{
|
||||
oldrow = Ai [p] ;
|
||||
newrow = Pinv [oldrow] - k1 ;
|
||||
if (newrow < 0 && poff < nzoff)
|
||||
{
|
||||
/* entry in off-diagonal part */
|
||||
/* Offx [poff] = Az [p] / Rs [oldrow] */
|
||||
SCALE_DIV_ASSIGN (Offx [poff], Az [p], Rs [oldrow]);
|
||||
poff++ ;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* (newrow,k) is an entry in the block */
|
||||
/* X [newrow] = Az [p] / Rs [oldrow] */
|
||||
SCALE_DIV_ASSIGN (X [newrow], Az [p], Rs [oldrow]) ;
|
||||
}
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------ */
|
||||
/* compute kth column of U, and update kth column of A */
|
||||
/* ------------------------------------------------------ */
|
||||
|
||||
GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, ulen) ;
|
||||
for (up = 0 ; up < ulen ; up++)
|
||||
{
|
||||
j = Ui [up] ;
|
||||
ujk = X [j] ;
|
||||
/* X [j] = 0 */
|
||||
CLEAR (X [j]) ;
|
||||
Ux [up] = ujk ;
|
||||
GET_POINTER (LU, Lip, Llen, Li, Lx, j, llen) ;
|
||||
for (p = 0 ; p < llen ; p++)
|
||||
{
|
||||
/* X [Li [p]] -= Lx [p] * ujk */
|
||||
MULT_SUB (X [Li [p]], Lx [p], ujk) ;
|
||||
}
|
||||
}
|
||||
/* get the diagonal entry of U */
|
||||
ukk = X [k] ;
|
||||
/* X [k] = 0 */
|
||||
CLEAR (X [k]) ;
|
||||
/* singular case */
|
||||
if (IS_ZERO (ukk))
|
||||
{
|
||||
/* matrix is numerically singular */
|
||||
Common->status = KLU_SINGULAR ;
|
||||
if (Common->numerical_rank == EMPTY)
|
||||
{
|
||||
Common->numerical_rank = k+k1 ;
|
||||
Common->singular_col = Q [k+k1] ;
|
||||
}
|
||||
if (Common->halt_if_singular)
|
||||
{
|
||||
/* do not continue the factorization */
|
||||
return (FALSE) ;
|
||||
}
|
||||
}
|
||||
Udiag [k+k1] = ukk ;
|
||||
/* gather and divide by pivot to get kth column of L */
|
||||
GET_POINTER (LU, Lip, Llen, Li, Lx, k, llen) ;
|
||||
for (p = 0 ; p < llen ; p++)
|
||||
{
|
||||
i = Li [p] ;
|
||||
DIV (Lx [p], X [i], ukk) ;
|
||||
CLEAR (X [i]) ;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* permute scale factors Rs according to pivotal row order */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
if (scale > 0)
|
||||
{
|
||||
for (k = 0 ; k < n ; k++)
|
||||
{
|
||||
REAL (X [k]) = Rs [Pnum [k]] ;
|
||||
}
|
||||
for (k = 0 ; k < n ; k++)
|
||||
{
|
||||
Rs [k] = REAL (X [k]) ;
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef NDEBUG
|
||||
ASSERT (Offp [n] == poff) ;
|
||||
ASSERT (Symbolic->nzoff == poff) ;
|
||||
PRINTF (("\n------------------- Off diagonal entries, new:\n")) ;
|
||||
ASSERT (KLU_valid (n, Offp, Offi, Offx)) ;
|
||||
if (Common->status == KLU_OK)
|
||||
{
|
||||
PRINTF (("\n ########### KLU_BTF_REFACTOR done, nblocks %d\n",nblocks));
|
||||
for (block = 0 ; block < nblocks ; block++)
|
||||
{
|
||||
k1 = R [block] ;
|
||||
k2 = R [block+1] ;
|
||||
nk = k2 - k1 ;
|
||||
PRINTF ((
|
||||
"\n================KLU_refactor output: k1 %d k2 %d nk %d\n",
|
||||
k1, k2, nk)) ;
|
||||
if (nk == 1)
|
||||
{
|
||||
PRINTF (("singleton ")) ;
|
||||
PRINT_ENTRY (Udiag [k1]) ;
|
||||
}
|
||||
else
|
||||
{
|
||||
Lip = Numeric->Lip + k1 ;
|
||||
Llen = Numeric->Llen + k1 ;
|
||||
LU = (Unit *) Numeric->LUbx [block] ;
|
||||
PRINTF (("\n---- L block %d\n", block)) ;
|
||||
ASSERT (KLU_valid_LU (nk, TRUE, Lip, Llen, LU)) ;
|
||||
Uip = Numeric->Uip + k1 ;
|
||||
Ulen = Numeric->Ulen + k1 ;
|
||||
PRINTF (("\n---- U block %d\n", block)) ;
|
||||
ASSERT (KLU_valid_LU (nk, FALSE, Uip, Ulen, LU)) ;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
return (TRUE) ;
|
||||
}
|
||||
|
|
@ -0,0 +1,159 @@
|
|||
/* ========================================================================== */
|
||||
/* === KLU_scale ============================================================ */
|
||||
/* ========================================================================== */
|
||||
|
||||
/* Scale a matrix and check to see if it is valid. Can be called by the user.
|
||||
* This is called by KLU_factor and KLU_refactor. Returns TRUE if the input
|
||||
* matrix is valid, FALSE otherwise. If the W input argument is non-NULL,
|
||||
* then the input matrix is checked for duplicate entries.
|
||||
*
|
||||
* scaling methods:
|
||||
* <0: no scaling, do not compute Rs, and do not check input matrix.
|
||||
* 0: no scaling
|
||||
* 1: the scale factor for row i is sum (abs (A (i,:)))
|
||||
* 2 or more: the scale factor for row i is max (abs (A (i,:)))
|
||||
*/
|
||||
|
||||
#include "klu_internal.h"
|
||||
|
||||
Int KLU_scale /* return TRUE if successful, FALSE otherwise */
|
||||
(
|
||||
/* inputs, not modified */
|
||||
Int scale, /* 0: none, 1: sum, 2: max */
|
||||
Int n,
|
||||
Int Ap [ ], /* size n+1, column pointers */
|
||||
Int Ai [ ], /* size nz, row indices */
|
||||
double Ax [ ],
|
||||
/* outputs, not defined on input */
|
||||
double Rs [ ], /* size n, can be NULL if scale <= 0 */
|
||||
/* workspace, not defined on input or output */
|
||||
Int W [ ], /* size n, can be NULL */
|
||||
/* --------------- */
|
||||
KLU_common *Common
|
||||
)
|
||||
{
|
||||
double a ;
|
||||
Entry *Az ;
|
||||
Int row, col, p, pend, check_duplicates ;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* check inputs */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
if (Common == NULL)
|
||||
{
|
||||
return (FALSE) ;
|
||||
}
|
||||
Common->status = KLU_OK ;
|
||||
|
||||
if (scale < 0)
|
||||
{
|
||||
/* return without checking anything and without computing the
|
||||
* scale factors */
|
||||
return (TRUE) ;
|
||||
}
|
||||
|
||||
Az = (Entry *) Ax ;
|
||||
|
||||
if (n <= 0 || Ap == NULL || Ai == NULL || Az == NULL ||
|
||||
(scale > 0 && Rs == NULL))
|
||||
{
|
||||
/* Ap, Ai, Ax and Rs must be present, and n must be > 0 */
|
||||
Common->status = KLU_INVALID ;
|
||||
return (FALSE) ;
|
||||
}
|
||||
if (Ap [0] != 0 || Ap [n] < 0)
|
||||
{
|
||||
/* nz = Ap [n] must be >= 0 and Ap [0] must equal zero */
|
||||
Common->status = KLU_INVALID ;
|
||||
return (FALSE) ;
|
||||
}
|
||||
for (col = 0 ; col < n ; col++)
|
||||
{
|
||||
if (Ap [col] > Ap [col+1])
|
||||
{
|
||||
/* column pointers must be non-decreasing */
|
||||
Common->status = KLU_INVALID ;
|
||||
return (FALSE) ;
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* scale */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
if (scale > 0)
|
||||
{
|
||||
/* initialize row sum or row max */
|
||||
for (row = 0 ; row < n ; row++)
|
||||
{
|
||||
Rs [row] = 0 ;
|
||||
}
|
||||
}
|
||||
|
||||
/* check for duplicates only if W is present */
|
||||
check_duplicates = (W != (Int *) NULL) ;
|
||||
if (check_duplicates)
|
||||
{
|
||||
for (row = 0 ; row < n ; row++)
|
||||
{
|
||||
W [row] = EMPTY ;
|
||||
}
|
||||
}
|
||||
|
||||
for (col = 0 ; col < n ; col++)
|
||||
{
|
||||
pend = Ap [col+1] ;
|
||||
for (p = Ap [col] ; p < pend ; p++)
|
||||
{
|
||||
row = Ai [p] ;
|
||||
if (row < 0 || row >= n)
|
||||
{
|
||||
/* row index out of range, or duplicate entry */
|
||||
Common->status = KLU_INVALID ;
|
||||
return (FALSE) ;
|
||||
}
|
||||
if (check_duplicates)
|
||||
{
|
||||
if (W [row] == col)
|
||||
{
|
||||
/* duplicate entry */
|
||||
Common->status = KLU_INVALID ;
|
||||
return (FALSE) ;
|
||||
}
|
||||
/* flag row i as appearing in column col */
|
||||
W [row] = col ;
|
||||
}
|
||||
/* a = ABS (Az [p]) ;*/
|
||||
ABS (a, Az [p]) ;
|
||||
if (scale == 1)
|
||||
{
|
||||
/* accumulate the abs. row sum */
|
||||
Rs [row] += a ;
|
||||
}
|
||||
else if (scale > 1)
|
||||
{
|
||||
/* find the max abs. value in the row */
|
||||
Rs [row] = MAX (Rs [row], a) ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (scale > 0)
|
||||
{
|
||||
/* do not scale empty rows */
|
||||
for (row = 0 ; row < n ; row++)
|
||||
{
|
||||
/* matrix is singular */
|
||||
PRINTF (("Rs [%d] = %g\n", row, Rs [row])) ;
|
||||
|
||||
if (Rs [row] == 0.0)
|
||||
{
|
||||
PRINTF (("Row %d of A is all zero\n", row)) ;
|
||||
Rs [row] = 1.0 ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return (TRUE) ;
|
||||
}
|
||||
|
|
@ -0,0 +1,396 @@
|
|||
/* ========================================================================== */
|
||||
/* === KLU_solve ============================================================ */
|
||||
/* ========================================================================== */
|
||||
|
||||
/* Solve Ax=b using the symbolic and numeric objects from KLU_analyze
|
||||
* (or KLU_analyze_given) and KLU_factor. Note that no iterative refinement is
|
||||
* performed. Uses Numeric->Xwork as workspace (undefined on input and output),
|
||||
* of size 4n Entry's (note that columns 2 to 4 of Xwork overlap with
|
||||
* Numeric->Iwork).
|
||||
*/
|
||||
|
||||
#include "klu_internal.h"
|
||||
|
||||
Int KLU_solve
|
||||
(
|
||||
/* inputs, not modified */
|
||||
KLU_symbolic *Symbolic,
|
||||
KLU_numeric *Numeric,
|
||||
Int d, /* leading dimension of B */
|
||||
Int nrhs, /* number of right-hand-sides */
|
||||
|
||||
/* right-hand-side on input, overwritten with solution to Ax=b on output */
|
||||
double B [ ], /* size n*nrhs, in column-oriented form, with
|
||||
* leading dimension d. */
|
||||
/* --------------- */
|
||||
KLU_common *Common
|
||||
)
|
||||
{
|
||||
Entry x [4], offik, s ;
|
||||
double rs, *Rs ;
|
||||
Entry *Offx, *X, *Bz, *Udiag ;
|
||||
Int *Q, *R, *Pnum, *Offp, *Offi, *Lip, *Uip, *Llen, *Ulen ;
|
||||
Unit **LUbx ;
|
||||
Int k1, k2, nk, k, block, pend, n, p, nblocks, chunk, nr, i ;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* check inputs */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
if (Common == NULL)
|
||||
{
|
||||
return (FALSE) ;
|
||||
}
|
||||
if (Numeric == NULL || Symbolic == NULL || d < Symbolic->n || nrhs < 0 ||
|
||||
B == NULL)
|
||||
{
|
||||
Common->status = KLU_INVALID ;
|
||||
return (FALSE) ;
|
||||
}
|
||||
Common->status = KLU_OK ;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* get the contents of the Symbolic object */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
Bz = (Entry *) B ;
|
||||
n = Symbolic->n ;
|
||||
nblocks = Symbolic->nblocks ;
|
||||
Q = Symbolic->Q ;
|
||||
R = Symbolic->R ;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* get the contents of the Numeric object */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
ASSERT (nblocks == Numeric->nblocks) ;
|
||||
Pnum = Numeric->Pnum ;
|
||||
Offp = Numeric->Offp ;
|
||||
Offi = Numeric->Offi ;
|
||||
Offx = (Entry *) Numeric->Offx ;
|
||||
|
||||
Lip = Numeric->Lip ;
|
||||
Llen = Numeric->Llen ;
|
||||
Uip = Numeric->Uip ;
|
||||
Ulen = Numeric->Ulen ;
|
||||
LUbx = (Unit **) Numeric->LUbx ;
|
||||
Udiag = Numeric->Udiag ;
|
||||
|
||||
Rs = Numeric->Rs ;
|
||||
X = (Entry *) Numeric->Xwork ;
|
||||
|
||||
ASSERT (KLU_valid (n, Offp, Offi, Offx)) ;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* solve in chunks of 4 columns at a time */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
for (chunk = 0 ; chunk < nrhs ; chunk += 4)
|
||||
{
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* get the size of the current chunk */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
nr = MIN (nrhs - chunk, 4) ;
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* scale and permute the right hand side, X = P*(R\B) */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
if (Rs == NULL)
|
||||
{
|
||||
|
||||
/* no scaling */
|
||||
switch (nr)
|
||||
{
|
||||
|
||||
case 1:
|
||||
|
||||
for (k = 0 ; k < n ; k++)
|
||||
{
|
||||
X [k] = Bz [Pnum [k]] ;
|
||||
}
|
||||
break ;
|
||||
|
||||
case 2:
|
||||
|
||||
for (k = 0 ; k < n ; k++)
|
||||
{
|
||||
i = Pnum [k] ;
|
||||
X [2*k ] = Bz [i ] ;
|
||||
X [2*k + 1] = Bz [i + d ] ;
|
||||
}
|
||||
break ;
|
||||
|
||||
case 3:
|
||||
|
||||
for (k = 0 ; k < n ; k++)
|
||||
{
|
||||
i = Pnum [k] ;
|
||||
X [3*k ] = Bz [i ] ;
|
||||
X [3*k + 1] = Bz [i + d ] ;
|
||||
X [3*k + 2] = Bz [i + d*2] ;
|
||||
}
|
||||
break ;
|
||||
|
||||
case 4:
|
||||
|
||||
for (k = 0 ; k < n ; k++)
|
||||
{
|
||||
i = Pnum [k] ;
|
||||
X [4*k ] = Bz [i ] ;
|
||||
X [4*k + 1] = Bz [i + d ] ;
|
||||
X [4*k + 2] = Bz [i + d*2] ;
|
||||
X [4*k + 3] = Bz [i + d*3] ;
|
||||
}
|
||||
break ;
|
||||
}
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
switch (nr)
|
||||
{
|
||||
|
||||
case 1:
|
||||
|
||||
for (k = 0 ; k < n ; k++)
|
||||
{
|
||||
SCALE_DIV_ASSIGN (X [k], Bz [Pnum [k]], Rs [k]) ;
|
||||
}
|
||||
break ;
|
||||
|
||||
case 2:
|
||||
|
||||
for (k = 0 ; k < n ; k++)
|
||||
{
|
||||
i = Pnum [k] ;
|
||||
rs = Rs [k] ;
|
||||
SCALE_DIV_ASSIGN (X [2*k], Bz [i], rs) ;
|
||||
SCALE_DIV_ASSIGN (X [2*k + 1], Bz [i + d], rs) ;
|
||||
}
|
||||
break ;
|
||||
|
||||
case 3:
|
||||
|
||||
for (k = 0 ; k < n ; k++)
|
||||
{
|
||||
i = Pnum [k] ;
|
||||
rs = Rs [k] ;
|
||||
SCALE_DIV_ASSIGN (X [3*k], Bz [i], rs) ;
|
||||
SCALE_DIV_ASSIGN (X [3*k + 1], Bz [i + d], rs) ;
|
||||
SCALE_DIV_ASSIGN (X [3*k + 2], Bz [i + d*2], rs) ;
|
||||
}
|
||||
break ;
|
||||
|
||||
case 4:
|
||||
|
||||
for (k = 0 ; k < n ; k++)
|
||||
{
|
||||
i = Pnum [k] ;
|
||||
rs = Rs [k] ;
|
||||
SCALE_DIV_ASSIGN (X [4*k], Bz [i], rs) ;
|
||||
SCALE_DIV_ASSIGN (X [4*k + 1], Bz [i + d], rs) ;
|
||||
SCALE_DIV_ASSIGN (X [4*k + 2], Bz [i + d*2], rs) ;
|
||||
SCALE_DIV_ASSIGN (X [4*k + 3], Bz [i + d*3], rs) ;
|
||||
}
|
||||
break ;
|
||||
}
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* solve X = (L*U + Off)\X */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
for (block = nblocks-1 ; block >= 0 ; block--)
|
||||
{
|
||||
|
||||
/* -------------------------------------------------------------- */
|
||||
/* the block of size nk is from rows/columns k1 to k2-1 */
|
||||
/* -------------------------------------------------------------- */
|
||||
|
||||
k1 = R [block] ;
|
||||
k2 = R [block+1] ;
|
||||
nk = k2 - k1 ;
|
||||
PRINTF (("solve %d, k1 %d k2-1 %d nk %d\n", block, k1,k2-1,nk)) ;
|
||||
|
||||
/* solve the block system */
|
||||
if (nk == 1)
|
||||
{
|
||||
s = Udiag [k1] ;
|
||||
switch (nr)
|
||||
{
|
||||
|
||||
case 1:
|
||||
DIV (X [k1], X [k1], s) ;
|
||||
break ;
|
||||
|
||||
case 2:
|
||||
DIV (X [2*k1], X [2*k1], s) ;
|
||||
DIV (X [2*k1 + 1], X [2*k1 + 1], s) ;
|
||||
break ;
|
||||
|
||||
case 3:
|
||||
DIV (X [3*k1], X [3*k1], s) ;
|
||||
DIV (X [3*k1 + 1], X [3*k1 + 1], s) ;
|
||||
DIV (X [3*k1 + 2], X [3*k1 + 2], s) ;
|
||||
break ;
|
||||
|
||||
case 4:
|
||||
DIV (X [4*k1], X [4*k1], s) ;
|
||||
DIV (X [4*k1 + 1], X [4*k1 + 1], s) ;
|
||||
DIV (X [4*k1 + 2], X [4*k1 + 2], s) ;
|
||||
DIV (X [4*k1 + 3], X [4*k1 + 3], s) ;
|
||||
break ;
|
||||
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
KLU_lsolve (nk, Lip + k1, Llen + k1, LUbx [block], nr,
|
||||
X + nr*k1) ;
|
||||
KLU_usolve (nk, Uip + k1, Ulen + k1, LUbx [block],
|
||||
Udiag + k1, nr, X + nr*k1) ;
|
||||
}
|
||||
|
||||
/* -------------------------------------------------------------- */
|
||||
/* block back-substitution for the off-diagonal-block entries */
|
||||
/* -------------------------------------------------------------- */
|
||||
|
||||
if (block > 0)
|
||||
{
|
||||
switch (nr)
|
||||
{
|
||||
|
||||
case 1:
|
||||
|
||||
for (k = k1 ; k < k2 ; k++)
|
||||
{
|
||||
pend = Offp [k+1] ;
|
||||
x [0] = X [k] ;
|
||||
for (p = Offp [k] ; p < pend ; p++)
|
||||
{
|
||||
MULT_SUB (X [Offi [p]], Offx [p], x [0]) ;
|
||||
}
|
||||
}
|
||||
break ;
|
||||
|
||||
case 2:
|
||||
|
||||
for (k = k1 ; k < k2 ; k++)
|
||||
{
|
||||
pend = Offp [k+1] ;
|
||||
x [0] = X [2*k ] ;
|
||||
x [1] = X [2*k + 1] ;
|
||||
for (p = Offp [k] ; p < pend ; p++)
|
||||
{
|
||||
i = Offi [p] ;
|
||||
offik = Offx [p] ;
|
||||
MULT_SUB (X [2*i], offik, x [0]) ;
|
||||
MULT_SUB (X [2*i + 1], offik, x [1]) ;
|
||||
}
|
||||
}
|
||||
break ;
|
||||
|
||||
case 3:
|
||||
|
||||
for (k = k1 ; k < k2 ; k++)
|
||||
{
|
||||
pend = Offp [k+1] ;
|
||||
x [0] = X [3*k ] ;
|
||||
x [1] = X [3*k + 1] ;
|
||||
x [2] = X [3*k + 2] ;
|
||||
for (p = Offp [k] ; p < pend ; p++)
|
||||
{
|
||||
i = Offi [p] ;
|
||||
offik = Offx [p] ;
|
||||
MULT_SUB (X [3*i], offik, x [0]) ;
|
||||
MULT_SUB (X [3*i + 1], offik, x [1]) ;
|
||||
MULT_SUB (X [3*i + 2], offik, x [2]) ;
|
||||
}
|
||||
}
|
||||
break ;
|
||||
|
||||
case 4:
|
||||
|
||||
for (k = k1 ; k < k2 ; k++)
|
||||
{
|
||||
pend = Offp [k+1] ;
|
||||
x [0] = X [4*k ] ;
|
||||
x [1] = X [4*k + 1] ;
|
||||
x [2] = X [4*k + 2] ;
|
||||
x [3] = X [4*k + 3] ;
|
||||
for (p = Offp [k] ; p < pend ; p++)
|
||||
{
|
||||
i = Offi [p] ;
|
||||
offik = Offx [p] ;
|
||||
MULT_SUB (X [4*i], offik, x [0]) ;
|
||||
MULT_SUB (X [4*i + 1], offik, x [1]) ;
|
||||
MULT_SUB (X [4*i + 2], offik, x [2]) ;
|
||||
MULT_SUB (X [4*i + 3], offik, x [3]) ;
|
||||
}
|
||||
}
|
||||
break ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* permute the result, Bz = Q*X */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
switch (nr)
|
||||
{
|
||||
|
||||
case 1:
|
||||
|
||||
for (k = 0 ; k < n ; k++)
|
||||
{
|
||||
Bz [Q [k]] = X [k] ;
|
||||
}
|
||||
break ;
|
||||
|
||||
case 2:
|
||||
|
||||
for (k = 0 ; k < n ; k++)
|
||||
{
|
||||
i = Q [k] ;
|
||||
Bz [i ] = X [2*k ] ;
|
||||
Bz [i + d ] = X [2*k + 1] ;
|
||||
}
|
||||
break ;
|
||||
|
||||
case 3:
|
||||
|
||||
for (k = 0 ; k < n ; k++)
|
||||
{
|
||||
i = Q [k] ;
|
||||
Bz [i ] = X [3*k ] ;
|
||||
Bz [i + d ] = X [3*k + 1] ;
|
||||
Bz [i + d*2] = X [3*k + 2] ;
|
||||
}
|
||||
break ;
|
||||
|
||||
case 4:
|
||||
|
||||
for (k = 0 ; k < n ; k++)
|
||||
{
|
||||
i = Q [k] ;
|
||||
Bz [i ] = X [4*k ] ;
|
||||
Bz [i + d ] = X [4*k + 1] ;
|
||||
Bz [i + d*2] = X [4*k + 2] ;
|
||||
Bz [i + d*3] = X [4*k + 3] ;
|
||||
}
|
||||
break ;
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* go to the next chunk of B */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
Bz += d*4 ;
|
||||
}
|
||||
return (TRUE) ;
|
||||
}
|
||||
|
|
@ -0,0 +1,156 @@
|
|||
/* ========================================================================== */
|
||||
/* === KLU_sort ============================================================= */
|
||||
/* ========================================================================== */
|
||||
|
||||
/* sorts the columns of L and U so that the row indices appear in strictly
|
||||
* increasing order.
|
||||
*/
|
||||
|
||||
#include "klu_internal.h"
|
||||
|
||||
/* ========================================================================== */
|
||||
/* === sort ================================================================= */
|
||||
/* ========================================================================== */
|
||||
|
||||
/* Sort L or U using a double-transpose */
|
||||
|
||||
static void sort (Int n, Int *Xip, Int *Xlen, Unit *LU, Int *Tp, Int *Tj,
|
||||
Entry *Tx, Int *W)
|
||||
{
|
||||
Int *Xi ;
|
||||
Entry *Xx ;
|
||||
Int p, i, j, len, nz, tp, xlen, pend ;
|
||||
|
||||
ASSERT (KLU_valid_LU (n, FALSE, Xip, Xlen, LU)) ;
|
||||
|
||||
/* count the number of entries in each row of L or U */
|
||||
for (i = 0 ; i < n ; i++)
|
||||
{
|
||||
W [i] = 0 ;
|
||||
}
|
||||
for (j = 0 ; j < n ; j++)
|
||||
{
|
||||
GET_POINTER (LU, Xip, Xlen, Xi, Xx, j, len) ;
|
||||
for (p = 0 ; p < len ; p++)
|
||||
{
|
||||
W [Xi [p]]++ ;
|
||||
}
|
||||
}
|
||||
|
||||
/* construct the row pointers for T */
|
||||
nz = 0 ;
|
||||
for (i = 0 ; i < n ; i++)
|
||||
{
|
||||
Tp [i] = nz ;
|
||||
nz += W [i] ;
|
||||
}
|
||||
Tp [n] = nz ;
|
||||
for (i = 0 ; i < n ; i++)
|
||||
{
|
||||
W [i] = Tp [i] ;
|
||||
}
|
||||
|
||||
/* transpose the matrix into Tp, Ti, Tx */
|
||||
for (j = 0 ; j < n ; j++)
|
||||
{
|
||||
GET_POINTER (LU, Xip, Xlen, Xi, Xx, j, len) ;
|
||||
for (p = 0 ; p < len ; p++)
|
||||
{
|
||||
tp = W [Xi [p]]++ ;
|
||||
Tj [tp] = j ;
|
||||
Tx [tp] = Xx [p] ;
|
||||
}
|
||||
}
|
||||
|
||||
/* transpose the matrix back into Xip, Xlen, Xi, Xx */
|
||||
for (j = 0 ; j < n ; j++)
|
||||
{
|
||||
W [j] = 0 ;
|
||||
}
|
||||
for (i = 0 ; i < n ; i++)
|
||||
{
|
||||
pend = Tp [i+1] ;
|
||||
for (p = Tp [i] ; p < pend ; p++)
|
||||
{
|
||||
j = Tj [p] ;
|
||||
GET_POINTER (LU, Xip, Xlen, Xi, Xx, j, len) ;
|
||||
xlen = W [j]++ ;
|
||||
Xi [xlen] = i ;
|
||||
Xx [xlen] = Tx [p] ;
|
||||
}
|
||||
}
|
||||
|
||||
ASSERT (KLU_valid_LU (n, FALSE, Xip, Xlen, LU)) ;
|
||||
}
|
||||
|
||||
|
||||
/* ========================================================================== */
|
||||
/* === KLU_sort ============================================================= */
|
||||
/* ========================================================================== */
|
||||
|
||||
Int KLU_sort
|
||||
(
|
||||
KLU_symbolic *Symbolic,
|
||||
KLU_numeric *Numeric,
|
||||
KLU_common *Common
|
||||
)
|
||||
{
|
||||
Int *R, *W, *Tp, *Ti, *Lip, *Uip, *Llen, *Ulen ;
|
||||
Entry *Tx ;
|
||||
Unit **LUbx ;
|
||||
Int n, nk, nz, block, nblocks, maxblock, k1 ;
|
||||
size_t m1 ;
|
||||
|
||||
if (Common == NULL)
|
||||
{
|
||||
return (FALSE) ;
|
||||
}
|
||||
Common->status = KLU_OK ;
|
||||
|
||||
n = Symbolic->n ;
|
||||
R = Symbolic->R ;
|
||||
nblocks = Symbolic->nblocks ;
|
||||
maxblock = Symbolic->maxblock ;
|
||||
|
||||
Lip = Numeric->Lip ;
|
||||
Llen = Numeric->Llen ;
|
||||
Uip = Numeric->Uip ;
|
||||
Ulen = Numeric->Ulen ;
|
||||
LUbx = (Unit **) Numeric->LUbx ;
|
||||
|
||||
m1 = ((size_t) maxblock) + 1 ;
|
||||
|
||||
/* allocate workspace */
|
||||
nz = MAX (Numeric->max_lnz_block, Numeric->max_unz_block) ;
|
||||
W = KLU_malloc (maxblock, sizeof (Int), Common) ;
|
||||
Tp = KLU_malloc (m1, sizeof (Int), Common) ;
|
||||
Ti = KLU_malloc (nz, sizeof (Int), Common) ;
|
||||
Tx = KLU_malloc (nz, sizeof (Entry), Common) ;
|
||||
|
||||
PRINTF (("\n======================= Start sort:\n")) ;
|
||||
|
||||
if (Common->status == KLU_OK)
|
||||
{
|
||||
/* sort each block of L and U */
|
||||
for (block = 0 ; block < nblocks ; block++)
|
||||
{
|
||||
k1 = R [block] ;
|
||||
nk = R [block+1] - k1 ;
|
||||
if (nk > 1)
|
||||
{
|
||||
PRINTF (("\n-------------------block: %d nk %d\n", block, nk)) ;
|
||||
sort (nk, Lip + k1, Llen + k1, LUbx [block], Tp, Ti, Tx, W) ;
|
||||
sort (nk, Uip + k1, Ulen + k1, LUbx [block], Tp, Ti, Tx, W) ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PRINTF (("\n======================= sort done.\n")) ;
|
||||
|
||||
/* free workspace */
|
||||
KLU_free (W, maxblock, sizeof (Int), Common) ;
|
||||
KLU_free (Tp, m1, sizeof (Int), Common) ;
|
||||
KLU_free (Ti, nz, sizeof (Int), Common) ;
|
||||
KLU_free (Tx, nz, sizeof (Entry), Common) ;
|
||||
return (Common->status == KLU_OK) ;
|
||||
}
|
||||
|
|
@ -0,0 +1,465 @@
|
|||
/* ========================================================================== */
|
||||
/* === KLU_tsolve =========================================================== */
|
||||
/* ========================================================================== */
|
||||
|
||||
/* Solve A'x=b using the symbolic and numeric objects from KLU_analyze
|
||||
* (or KLU_analyze_given) and KLU_factor. Note that no iterative refinement is
|
||||
* performed. Uses Numeric->Xwork as workspace (undefined on input and output),
|
||||
* of size 4n Entry's (note that columns 2 to 4 of Xwork overlap with
|
||||
* Numeric->Iwork).
|
||||
*/
|
||||
|
||||
#include "klu_internal.h"
|
||||
|
||||
Int KLU_tsolve
|
||||
(
|
||||
/* inputs, not modified */
|
||||
KLU_symbolic *Symbolic,
|
||||
KLU_numeric *Numeric,
|
||||
Int d, /* leading dimension of B */
|
||||
Int nrhs, /* number of right-hand-sides */
|
||||
|
||||
/* right-hand-side on input, overwritten with solution to Ax=b on output */
|
||||
double B [ ], /* size n*nrhs, in column-oriented form, with
|
||||
* leading dimension d. */
|
||||
#ifdef COMPLEX
|
||||
Int conj_solve, /* TRUE for conjugate transpose solve, FALSE for
|
||||
* array transpose solve. Used for the complex
|
||||
* case only. */
|
||||
#endif
|
||||
/* --------------- */
|
||||
KLU_common *Common
|
||||
)
|
||||
{
|
||||
Entry x [4], offik, s ;
|
||||
double rs, *Rs ;
|
||||
Entry *Offx, *X, *Bz, *Udiag ;
|
||||
Int *Q, *R, *Pnum, *Offp, *Offi, *Lip, *Uip, *Llen, *Ulen ;
|
||||
Unit **LUbx ;
|
||||
Int k1, k2, nk, k, block, pend, n, p, nblocks, chunk, nr, i ;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* check inputs */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
if (Common == NULL)
|
||||
{
|
||||
return (FALSE) ;
|
||||
}
|
||||
if (Numeric == NULL || Symbolic == NULL || d < Symbolic->n || nrhs < 0 ||
|
||||
B == NULL)
|
||||
{
|
||||
Common->status = KLU_INVALID ;
|
||||
return (FALSE) ;
|
||||
}
|
||||
Common->status = KLU_OK ;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* get the contents of the Symbolic object */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
Bz = (Entry *) B ;
|
||||
n = Symbolic->n ;
|
||||
nblocks = Symbolic->nblocks ;
|
||||
Q = Symbolic->Q ;
|
||||
R = Symbolic->R ;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* get the contents of the Numeric object */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
ASSERT (nblocks == Numeric->nblocks) ;
|
||||
Pnum = Numeric->Pnum ;
|
||||
Offp = Numeric->Offp ;
|
||||
Offi = Numeric->Offi ;
|
||||
Offx = (Entry *) Numeric->Offx ;
|
||||
|
||||
Lip = Numeric->Lip ;
|
||||
Llen = Numeric->Llen ;
|
||||
Uip = Numeric->Uip ;
|
||||
Ulen = Numeric->Ulen ;
|
||||
LUbx = (Unit **) Numeric->LUbx ;
|
||||
Udiag = Numeric->Udiag ;
|
||||
|
||||
Rs = Numeric->Rs ;
|
||||
X = (Entry *) Numeric->Xwork ;
|
||||
ASSERT (KLU_valid (n, Offp, Offi, Offx)) ;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* solve in chunks of 4 columns at a time */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
for (chunk = 0 ; chunk < nrhs ; chunk += 4)
|
||||
{
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* get the size of the current chunk */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
nr = MIN (nrhs - chunk, 4) ;
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* permute the right hand side, X = Q'*B */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
switch (nr)
|
||||
{
|
||||
|
||||
case 1:
|
||||
|
||||
for (k = 0 ; k < n ; k++)
|
||||
{
|
||||
X [k] = Bz [Q [k]] ;
|
||||
}
|
||||
break ;
|
||||
|
||||
case 2:
|
||||
|
||||
for (k = 0 ; k < n ; k++)
|
||||
{
|
||||
i = Q [k] ;
|
||||
X [2*k ] = Bz [i ] ;
|
||||
X [2*k + 1] = Bz [i + d ] ;
|
||||
}
|
||||
break ;
|
||||
|
||||
case 3:
|
||||
|
||||
for (k = 0 ; k < n ; k++)
|
||||
{
|
||||
i = Q [k] ;
|
||||
X [3*k ] = Bz [i ] ;
|
||||
X [3*k + 1] = Bz [i + d ] ;
|
||||
X [3*k + 2] = Bz [i + d*2] ;
|
||||
}
|
||||
break ;
|
||||
|
||||
case 4:
|
||||
|
||||
for (k = 0 ; k < n ; k++)
|
||||
{
|
||||
i = Q [k] ;
|
||||
X [4*k ] = Bz [i ] ;
|
||||
X [4*k + 1] = Bz [i + d ] ;
|
||||
X [4*k + 2] = Bz [i + d*2] ;
|
||||
X [4*k + 3] = Bz [i + d*3] ;
|
||||
}
|
||||
break ;
|
||||
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* solve X = (L*U + Off)'\X */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
for (block = 0 ; block < nblocks ; block++)
|
||||
{
|
||||
|
||||
/* -------------------------------------------------------------- */
|
||||
/* the block of size nk is from rows/columns k1 to k2-1 */
|
||||
/* -------------------------------------------------------------- */
|
||||
|
||||
k1 = R [block] ;
|
||||
k2 = R [block+1] ;
|
||||
nk = k2 - k1 ;
|
||||
PRINTF (("tsolve %d, k1 %d k2-1 %d nk %d\n", block, k1,k2-1,nk)) ;
|
||||
|
||||
/* -------------------------------------------------------------- */
|
||||
/* block back-substitution for the off-diagonal-block entries */
|
||||
/* -------------------------------------------------------------- */
|
||||
|
||||
if (block > 0)
|
||||
{
|
||||
switch (nr)
|
||||
{
|
||||
|
||||
case 1:
|
||||
|
||||
for (k = k1 ; k < k2 ; k++)
|
||||
{
|
||||
pend = Offp [k+1] ;
|
||||
for (p = Offp [k] ; p < pend ; p++)
|
||||
{
|
||||
#ifdef COMPLEX
|
||||
if (conj_solve)
|
||||
{
|
||||
MULT_SUB_CONJ (X [k], X [Offi [p]],
|
||||
Offx [p]) ;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
MULT_SUB (X [k], Offx [p], X [Offi [p]]) ;
|
||||
}
|
||||
}
|
||||
}
|
||||
break ;
|
||||
|
||||
case 2:
|
||||
|
||||
for (k = k1 ; k < k2 ; k++)
|
||||
{
|
||||
pend = Offp [k+1] ;
|
||||
x [0] = X [2*k ] ;
|
||||
x [1] = X [2*k + 1] ;
|
||||
for (p = Offp [k] ; p < pend ; p++)
|
||||
{
|
||||
i = Offi [p] ;
|
||||
#ifdef COMPLEX
|
||||
if (conj_solve)
|
||||
{
|
||||
CONJ (offik, Offx [p]) ;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
offik = Offx [p] ;
|
||||
}
|
||||
MULT_SUB (x [0], offik, X [2*i]) ;
|
||||
MULT_SUB (x [1], offik, X [2*i + 1]) ;
|
||||
}
|
||||
X [2*k ] = x [0] ;
|
||||
X [2*k + 1] = x [1] ;
|
||||
}
|
||||
break ;
|
||||
|
||||
case 3:
|
||||
|
||||
for (k = k1 ; k < k2 ; k++)
|
||||
{
|
||||
pend = Offp [k+1] ;
|
||||
x [0] = X [3*k ] ;
|
||||
x [1] = X [3*k + 1] ;
|
||||
x [2] = X [3*k + 2] ;
|
||||
for (p = Offp [k] ; p < pend ; p++)
|
||||
{
|
||||
i = Offi [p] ;
|
||||
#ifdef COMPLEX
|
||||
if (conj_solve)
|
||||
{
|
||||
CONJ (offik, Offx [p]) ;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
offik = Offx [p] ;
|
||||
}
|
||||
MULT_SUB (x [0], offik, X [3*i]) ;
|
||||
MULT_SUB (x [1], offik, X [3*i + 1]) ;
|
||||
MULT_SUB (x [2], offik, X [3*i + 2]) ;
|
||||
}
|
||||
X [3*k ] = x [0] ;
|
||||
X [3*k + 1] = x [1] ;
|
||||
X [3*k + 2] = x [2] ;
|
||||
}
|
||||
break ;
|
||||
|
||||
case 4:
|
||||
|
||||
for (k = k1 ; k < k2 ; k++)
|
||||
{
|
||||
pend = Offp [k+1] ;
|
||||
x [0] = X [4*k ] ;
|
||||
x [1] = X [4*k + 1] ;
|
||||
x [2] = X [4*k + 2] ;
|
||||
x [3] = X [4*k + 3] ;
|
||||
for (p = Offp [k] ; p < pend ; p++)
|
||||
{
|
||||
i = Offi [p] ;
|
||||
#ifdef COMPLEX
|
||||
if (conj_solve)
|
||||
{
|
||||
CONJ(offik, Offx [p]) ;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
offik = Offx [p] ;
|
||||
}
|
||||
MULT_SUB (x [0], offik, X [4*i]) ;
|
||||
MULT_SUB (x [1], offik, X [4*i + 1]) ;
|
||||
MULT_SUB (x [2], offik, X [4*i + 2]) ;
|
||||
MULT_SUB (x [3], offik, X [4*i + 3]) ;
|
||||
}
|
||||
X [4*k ] = x [0] ;
|
||||
X [4*k + 1] = x [1] ;
|
||||
X [4*k + 2] = x [2] ;
|
||||
X [4*k + 3] = x [3] ;
|
||||
}
|
||||
break ;
|
||||
}
|
||||
}
|
||||
|
||||
/* -------------------------------------------------------------- */
|
||||
/* solve the block system */
|
||||
/* -------------------------------------------------------------- */
|
||||
|
||||
if (nk == 1)
|
||||
{
|
||||
#ifdef COMPLEX
|
||||
if (conj_solve)
|
||||
{
|
||||
CONJ (s, Udiag [k1]) ;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
s = Udiag [k1] ;
|
||||
}
|
||||
switch (nr)
|
||||
{
|
||||
|
||||
case 1:
|
||||
DIV (X [k1], X [k1], s) ;
|
||||
break ;
|
||||
|
||||
case 2:
|
||||
DIV (X [2*k1], X [2*k1], s) ;
|
||||
DIV (X [2*k1 + 1], X [2*k1 + 1], s) ;
|
||||
break ;
|
||||
|
||||
case 3:
|
||||
DIV (X [3*k1], X [3*k1], s) ;
|
||||
DIV (X [3*k1 + 1], X [3*k1 + 1], s) ;
|
||||
DIV (X [3*k1 + 2], X [3*k1 + 2], s) ;
|
||||
break ;
|
||||
|
||||
case 4:
|
||||
DIV (X [4*k1], X [4*k1], s) ;
|
||||
DIV (X [4*k1 + 1], X [4*k1 + 1], s) ;
|
||||
DIV (X [4*k1 + 2], X [4*k1 + 2], s) ;
|
||||
DIV (X [4*k1 + 3], X [4*k1 + 3], s) ;
|
||||
break ;
|
||||
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
KLU_utsolve (nk, Uip + k1, Ulen + k1, LUbx [block],
|
||||
Udiag + k1, nr,
|
||||
#ifdef COMPLEX
|
||||
conj_solve,
|
||||
#endif
|
||||
X + nr*k1) ;
|
||||
KLU_ltsolve (nk, Lip + k1, Llen + k1, LUbx [block], nr,
|
||||
#ifdef COMPLEX
|
||||
conj_solve,
|
||||
#endif
|
||||
X + nr*k1) ;
|
||||
}
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* scale and permute the result, Bz = P'(R\X) */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
if (Rs == NULL)
|
||||
{
|
||||
|
||||
/* no scaling */
|
||||
switch (nr)
|
||||
{
|
||||
|
||||
case 1:
|
||||
|
||||
for (k = 0 ; k < n ; k++)
|
||||
{
|
||||
Bz [Pnum [k]] = X [k] ;
|
||||
}
|
||||
break ;
|
||||
|
||||
case 2:
|
||||
|
||||
for (k = 0 ; k < n ; k++)
|
||||
{
|
||||
i = Pnum [k] ;
|
||||
Bz [i ] = X [2*k ] ;
|
||||
Bz [i + d ] = X [2*k + 1] ;
|
||||
}
|
||||
break ;
|
||||
|
||||
case 3:
|
||||
|
||||
for (k = 0 ; k < n ; k++)
|
||||
{
|
||||
i = Pnum [k] ;
|
||||
Bz [i ] = X [3*k ] ;
|
||||
Bz [i + d ] = X [3*k + 1] ;
|
||||
Bz [i + d*2] = X [3*k + 2] ;
|
||||
}
|
||||
break ;
|
||||
|
||||
case 4:
|
||||
|
||||
for (k = 0 ; k < n ; k++)
|
||||
{
|
||||
i = Pnum [k] ;
|
||||
Bz [i ] = X [4*k ] ;
|
||||
Bz [i + d ] = X [4*k + 1] ;
|
||||
Bz [i + d*2] = X [4*k + 2] ;
|
||||
Bz [i + d*3] = X [4*k + 3] ;
|
||||
}
|
||||
break ;
|
||||
}
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
switch (nr)
|
||||
{
|
||||
|
||||
case 1:
|
||||
|
||||
for (k = 0 ; k < n ; k++)
|
||||
{
|
||||
SCALE_DIV_ASSIGN (Bz [Pnum [k]], X [k], Rs [k]) ;
|
||||
}
|
||||
break ;
|
||||
|
||||
case 2:
|
||||
|
||||
for (k = 0 ; k < n ; k++)
|
||||
{
|
||||
i = Pnum [k] ;
|
||||
rs = Rs [k] ;
|
||||
SCALE_DIV_ASSIGN (Bz [i], X [2*k], rs) ;
|
||||
SCALE_DIV_ASSIGN (Bz [i + d], X [2*k + 1], rs) ;
|
||||
}
|
||||
break ;
|
||||
|
||||
case 3:
|
||||
|
||||
for (k = 0 ; k < n ; k++)
|
||||
{
|
||||
i = Pnum [k] ;
|
||||
rs = Rs [k] ;
|
||||
SCALE_DIV_ASSIGN (Bz [i], X [3*k], rs) ;
|
||||
SCALE_DIV_ASSIGN (Bz [i + d], X [3*k + 1], rs) ;
|
||||
SCALE_DIV_ASSIGN (Bz [i + d*2], X [3*k + 2], rs) ;
|
||||
}
|
||||
break ;
|
||||
|
||||
case 4:
|
||||
|
||||
for (k = 0 ; k < n ; k++)
|
||||
{
|
||||
i = Pnum [k] ;
|
||||
rs = Rs [k] ;
|
||||
SCALE_DIV_ASSIGN (Bz [i], X [4*k], rs) ;
|
||||
SCALE_DIV_ASSIGN (Bz [i + d], X [4*k + 1], rs) ;
|
||||
SCALE_DIV_ASSIGN (Bz [i + d*2], X [4*k + 2], rs) ;
|
||||
SCALE_DIV_ASSIGN (Bz [i + d*3], X [4*k + 3], rs) ;
|
||||
}
|
||||
break ;
|
||||
}
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* go to the next chunk of B */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
Bz += d*4 ;
|
||||
}
|
||||
return (TRUE) ;
|
||||
}
|
||||
|
|
@ -0,0 +1,694 @@
|
|||
#ifndef _KLU_VERSION_H
|
||||
#define _KLU_VERSION_H
|
||||
|
||||
#ifdef DLONG
|
||||
#define Int UF_long
|
||||
#define Int_id UF_long_id
|
||||
#define Int_MAX UF_long_max
|
||||
#else
|
||||
#define Int int
|
||||
#define Int_id "%d"
|
||||
#define Int_MAX INT_MAX
|
||||
#endif
|
||||
|
||||
#define NPRINT
|
||||
|
||||
#define BYTES(type,n) (sizeof (type) * (n))
|
||||
#define CEILING(b,u) (((b)+(u)-1) / (u))
|
||||
#define UNITS(type,n) (CEILING (BYTES (type,n), sizeof (Unit)))
|
||||
#define DUNITS(type,n) (ceil (BYTES (type, (double) n) / sizeof (Unit)))
|
||||
|
||||
#define GET_I_POINTER(LU, Xip, Xi, k) \
|
||||
{ \
|
||||
Xi = (Int *) (LU + Xip [k]) ; \
|
||||
}
|
||||
|
||||
#define GET_X_POINTER(LU, Xip, Xlen, Xx, k) \
|
||||
{ \
|
||||
Xx = (Entry *) (LU + Xip [k] + UNITS (Int, Xlen [k])) ; \
|
||||
}
|
||||
|
||||
#define GET_POINTER(LU, Xip, Xlen, Xi, Xx, k, xlen) \
|
||||
{ \
|
||||
Unit *xp = LU + Xip [k] ; \
|
||||
xlen = Xlen [k] ; \
|
||||
Xi = (Int *) xp ; \
|
||||
Xx = (Entry *) (xp + UNITS (Int, xlen)) ; \
|
||||
}
|
||||
|
||||
/* function names */
|
||||
#ifdef COMPLEX
|
||||
|
||||
#ifdef DLONG
|
||||
|
||||
#define KLU_scale klu_zl_scale
|
||||
#define KLU_solve klu_zl_solve
|
||||
#define KLU_tsolve klu_zl_tsolve
|
||||
#define KLU_free_numeric klu_zl_free_numeric
|
||||
#define KLU_factor klu_zl_factor
|
||||
#define KLU_refactor klu_zl_refactor
|
||||
#define KLU_kernel_factor klu_zl_kernel_factor
|
||||
#define KLU_lsolve klu_zl_lsolve
|
||||
#define KLU_ltsolve klu_zl_ltsolve
|
||||
#define KLU_usolve klu_zl_usolve
|
||||
#define KLU_utsolve klu_zl_utsolve
|
||||
#define KLU_kernel klu_zl_kernel
|
||||
#define KLU_valid klu_zl_valid
|
||||
#define KLU_valid_LU klu_zl_valid_LU
|
||||
#define KLU_sort klu_zl_sort
|
||||
#define KLU_rgrowth klu_zl_rgrowth
|
||||
#define KLU_rcond klu_zl_rcond
|
||||
#define KLU_extract klu_zl_extract
|
||||
#define KLU_condest klu_zl_condest
|
||||
#define KLU_flops klu_zl_flops
|
||||
|
||||
#else
|
||||
|
||||
#define KLU_scale klu_z_scale
|
||||
#define KLU_solve klu_z_solve
|
||||
#define KLU_tsolve klu_z_tsolve
|
||||
#define KLU_free_numeric klu_z_free_numeric
|
||||
#define KLU_factor klu_z_factor
|
||||
#define KLU_refactor klu_z_refactor
|
||||
#define KLU_kernel_factor klu_z_kernel_factor
|
||||
#define KLU_lsolve klu_z_lsolve
|
||||
#define KLU_ltsolve klu_z_ltsolve
|
||||
#define KLU_usolve klu_z_usolve
|
||||
#define KLU_utsolve klu_z_utsolve
|
||||
#define KLU_kernel klu_z_kernel
|
||||
#define KLU_valid klu_z_valid
|
||||
#define KLU_valid_LU klu_z_valid_LU
|
||||
#define KLU_sort klu_z_sort
|
||||
#define KLU_rgrowth klu_z_rgrowth
|
||||
#define KLU_rcond klu_z_rcond
|
||||
#define KLU_extract klu_z_extract
|
||||
#define KLU_condest klu_z_condest
|
||||
#define KLU_flops klu_z_flops
|
||||
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#ifdef DLONG
|
||||
|
||||
#define KLU_scale klu_l_scale
|
||||
#define KLU_solve klu_l_solve
|
||||
#define KLU_tsolve klu_l_tsolve
|
||||
#define KLU_free_numeric klu_l_free_numeric
|
||||
#define KLU_factor klu_l_factor
|
||||
#define KLU_refactor klu_l_refactor
|
||||
#define KLU_kernel_factor klu_l_kernel_factor
|
||||
#define KLU_lsolve klu_l_lsolve
|
||||
#define KLU_ltsolve klu_l_ltsolve
|
||||
#define KLU_usolve klu_l_usolve
|
||||
#define KLU_utsolve klu_l_utsolve
|
||||
#define KLU_kernel klu_l_kernel
|
||||
#define KLU_valid klu_l_valid
|
||||
#define KLU_valid_LU klu_l_valid_LU
|
||||
#define KLU_sort klu_l_sort
|
||||
#define KLU_rgrowth klu_l_rgrowth
|
||||
#define KLU_rcond klu_l_rcond
|
||||
#define KLU_extract klu_l_extract
|
||||
#define KLU_condest klu_l_condest
|
||||
#define KLU_flops klu_l_flops
|
||||
|
||||
#else
|
||||
|
||||
#define KLU_scale klu_scale
|
||||
#define KLU_solve klu_solve
|
||||
#define KLU_tsolve klu_tsolve
|
||||
#define KLU_free_numeric klu_free_numeric
|
||||
#define KLU_factor klu_factor
|
||||
#define KLU_refactor klu_refactor
|
||||
#define KLU_kernel_factor klu_kernel_factor
|
||||
#define KLU_lsolve klu_lsolve
|
||||
#define KLU_ltsolve klu_ltsolve
|
||||
#define KLU_usolve klu_usolve
|
||||
#define KLU_utsolve klu_utsolve
|
||||
#define KLU_kernel klu_kernel
|
||||
#define KLU_valid klu_valid
|
||||
#define KLU_valid_LU klu_valid_LU
|
||||
#define KLU_sort klu_sort
|
||||
#define KLU_rgrowth klu_rgrowth
|
||||
#define KLU_rcond klu_rcond
|
||||
#define KLU_extract klu_extract
|
||||
#define KLU_condest klu_condest
|
||||
#define KLU_flops klu_flops
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef DLONG
|
||||
|
||||
#define KLU_analyze klu_l_analyze
|
||||
#define KLU_analyze_given klu_l_analyze_given
|
||||
#define KLU_alloc_symbolic klu_l_alloc_symbolic
|
||||
#define KLU_free_symbolic klu_l_free_symbolic
|
||||
#define KLU_defaults klu_l_defaults
|
||||
#define KLU_free klu_l_free
|
||||
#define KLU_malloc klu_l_malloc
|
||||
#define KLU_realloc klu_l_realloc
|
||||
#define KLU_add_size_t klu_l_add_size_t
|
||||
#define KLU_mult_size_t klu_l_mult_size_t
|
||||
|
||||
#define KLU_symbolic klu_l_symbolic
|
||||
#define KLU_numeric klu_l_numeric
|
||||
#define KLU_common klu_l_common
|
||||
|
||||
#define BTF_order btf_l_order
|
||||
#define BTF_strongcomp btf_l_strongcomp
|
||||
|
||||
#define AMD_order amd_l_order
|
||||
#define COLAMD colamd_l
|
||||
#define COLAMD_recommended colamd_l_recommended
|
||||
|
||||
#else
|
||||
|
||||
#define KLU_analyze klu_analyze
|
||||
#define KLU_analyze_given klu_analyze_given
|
||||
#define KLU_alloc_symbolic klu_alloc_symbolic
|
||||
#define KLU_free_symbolic klu_free_symbolic
|
||||
#define KLU_defaults klu_defaults
|
||||
#define KLU_free klu_free
|
||||
#define KLU_malloc klu_malloc
|
||||
#define KLU_realloc klu_realloc
|
||||
#define KLU_add_size_t klu_add_size_t
|
||||
#define KLU_mult_size_t klu_mult_size_t
|
||||
|
||||
#define KLU_symbolic klu_symbolic
|
||||
#define KLU_numeric klu_numeric
|
||||
#define KLU_common klu_common
|
||||
|
||||
#define BTF_order btf_order
|
||||
#define BTF_strongcomp btf_strongcomp
|
||||
|
||||
#define AMD_order amd_order
|
||||
#define COLAMD colamd
|
||||
#define COLAMD_recommended colamd_recommended
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
/* Numerical relop macros for correctly handling the NaN case */
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
/*
|
||||
SCALAR_IS_NAN(x):
|
||||
True if x is NaN. False otherwise. The commonly-existing isnan(x)
|
||||
function could be used, but it's not in Kernighan & Ritchie 2nd edition
|
||||
(ANSI C). It may appear in <math.h>, but I'm not certain about
|
||||
portability. The expression x != x is true if and only if x is NaN,
|
||||
according to the IEEE 754 floating-point standard.
|
||||
|
||||
SCALAR_IS_ZERO(x):
|
||||
True if x is zero. False if x is nonzero, NaN, or +/- Inf.
|
||||
This is (x == 0) if the compiler is IEEE 754 compliant.
|
||||
|
||||
SCALAR_IS_NONZERO(x):
|
||||
True if x is nonzero, NaN, or +/- Inf. False if x zero.
|
||||
This is (x != 0) if the compiler is IEEE 754 compliant.
|
||||
|
||||
SCALAR_IS_LTZERO(x):
|
||||
True if x is < zero or -Inf. False if x is >= 0, NaN, or +Inf.
|
||||
This is (x < 0) if the compiler is IEEE 754 compliant.
|
||||
*/
|
||||
|
||||
/* These all work properly, according to the IEEE 754 standard ... except on */
|
||||
/* a PC with windows. Works fine in Linux on the same PC... */
|
||||
#define SCALAR_IS_NAN(x) ((x) != (x))
|
||||
#define SCALAR_IS_ZERO(x) ((x) == 0.)
|
||||
#define SCALAR_IS_NONZERO(x) ((x) != 0.)
|
||||
#define SCALAR_IS_LTZERO(x) ((x) < 0.)
|
||||
|
||||
|
||||
/* scalar absolute value macro. If x is NaN, the result is NaN: */
|
||||
#define SCALAR_ABS(x) ((SCALAR_IS_LTZERO (x)) ? -(x) : (x))
|
||||
|
||||
/* print a scalar (avoid printing "-0" for negative zero). */
|
||||
#ifdef NPRINT
|
||||
#define PRINT_SCALAR(a)
|
||||
#else
|
||||
#define PRINT_SCALAR(a) \
|
||||
{ \
|
||||
if (SCALAR_IS_NONZERO (a)) \
|
||||
{ \
|
||||
PRINTF ((" (%g)", (a))) ; \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
PRINTF ((" (0)")) ; \
|
||||
} \
|
||||
}
|
||||
#endif
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
/* Real floating-point arithmetic */
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
#ifndef COMPLEX
|
||||
|
||||
typedef double Unit ;
|
||||
#define Entry double
|
||||
|
||||
#define SPLIT(s) (1)
|
||||
#define REAL(c) (c)
|
||||
#define IMAG(c) (0.)
|
||||
#define ASSIGN(c,s1,s2,p,split) { (c) = (s1)[p] ; }
|
||||
#define CLEAR(c) { (c) = 0. ; }
|
||||
#define CLEAR_AND_INCREMENT(p) { *p++ = 0. ; }
|
||||
#define IS_NAN(a) SCALAR_IS_NAN (a)
|
||||
#define IS_ZERO(a) SCALAR_IS_ZERO (a)
|
||||
#define IS_NONZERO(a) SCALAR_IS_NONZERO (a)
|
||||
#define SCALE_DIV(c,s) { (c) /= (s) ; }
|
||||
#define SCALE_DIV_ASSIGN(a,c,s) { a = c / s ; }
|
||||
#define SCALE(c,s) { (c) *= (s) ; }
|
||||
#define ASSEMBLE(c,a) { (c) += (a) ; }
|
||||
#define ASSEMBLE_AND_INCREMENT(c,p) { (c) += *p++ ; }
|
||||
#define DECREMENT(c,a) { (c) -= (a) ; }
|
||||
#define MULT(c,a,b) { (c) = (a) * (b) ; }
|
||||
#define MULT_CONJ(c,a,b) { (c) = (a) * (b) ; }
|
||||
#define MULT_SUB(c,a,b) { (c) -= (a) * (b) ; }
|
||||
#define MULT_SUB_CONJ(c,a,b) { (c) -= (a) * (b) ; }
|
||||
#define DIV(c,a,b) { (c) = (a) / (b) ; }
|
||||
#define RECIPROCAL(c) { (c) = 1.0 / (c) ; }
|
||||
#define DIV_CONJ(c,a,b) { (c) = (a) / (b) ; }
|
||||
#define APPROX_ABS(s,a) { (s) = SCALAR_ABS (a) ; }
|
||||
#define ABS(s,a) { (s) = SCALAR_ABS (a) ; }
|
||||
#define PRINT_ENTRY(a) PRINT_SCALAR (a)
|
||||
#define CONJ(a,x) a = x
|
||||
|
||||
/* for flop counts */
|
||||
#define MULTSUB_FLOPS 2. /* c -= a*b */
|
||||
#define DIV_FLOPS 1. /* c = a/b */
|
||||
#define ABS_FLOPS 0. /* c = abs (a) */
|
||||
#define ASSEMBLE_FLOPS 1. /* c += a */
|
||||
#define DECREMENT_FLOPS 1. /* c -= a */
|
||||
#define MULT_FLOPS 1. /* c = a*b */
|
||||
#define SCALE_FLOPS 1. /* c = a/s */
|
||||
|
||||
#else
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
/* Complex floating-point arithmetic */
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
/*
|
||||
Note: An alternative to this Double_Complex type would be to use a
|
||||
struct { double r ; double i ; }. The problem with that method
|
||||
(used by the Sun Performance Library, for example) is that ANSI C provides
|
||||
no guarantee about the layout of a struct. It is possible that the sizeof
|
||||
the struct above would be greater than 2 * sizeof (double). This would
|
||||
mean that the complex BLAS could not be used. The method used here avoids
|
||||
that possibility. ANSI C *does* guarantee that an array of structs has
|
||||
the same size as n times the size of one struct.
|
||||
|
||||
The ANSI C99 version of the C language includes a "double _Complex" type.
|
||||
It should be possible in that case to do the following:
|
||||
|
||||
#define Entry double _Complex
|
||||
|
||||
and remove the Double_Complex struct. The macros, below, could then be
|
||||
replaced with instrinsic operators. Note that the #define Real and
|
||||
#define Imag should also be removed (they only appear in this file).
|
||||
|
||||
For the MULT, MULT_SUB, MULT_SUB_CONJ, and MULT_CONJ macros,
|
||||
the output argument c cannot be the same as any input argument.
|
||||
|
||||
*/
|
||||
|
||||
typedef struct
|
||||
{
|
||||
double component [2] ; /* real and imaginary parts */
|
||||
|
||||
} Double_Complex ;
|
||||
|
||||
typedef Double_Complex Unit ;
|
||||
#define Entry Double_Complex
|
||||
#define Real component [0]
|
||||
#define Imag component [1]
|
||||
|
||||
/* for flop counts */
|
||||
#define MULTSUB_FLOPS 8. /* c -= a*b */
|
||||
#define DIV_FLOPS 9. /* c = a/b */
|
||||
#define ABS_FLOPS 6. /* c = abs (a), count sqrt as one flop */
|
||||
#define ASSEMBLE_FLOPS 2. /* c += a */
|
||||
#define DECREMENT_FLOPS 2. /* c -= a */
|
||||
#define MULT_FLOPS 6. /* c = a*b */
|
||||
#define SCALE_FLOPS 2. /* c = a/s or c = a*s */
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
/* real part of c */
|
||||
#define REAL(c) ((c).Real)
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
/* imag part of c */
|
||||
#define IMAG(c) ((c).Imag)
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
/* Return TRUE if a complex number is in split form, FALSE if in packed form */
|
||||
#define SPLIT(sz) ((sz) != (double *) NULL)
|
||||
|
||||
/* c = (s1) + (s2)*i, if s2 is null, then X is in "packed" format (compatible
|
||||
* with Entry and ANSI C99 double _Complex type). */
|
||||
/*#define ASSIGN(c,s1,s2,p,split) \
|
||||
{ \
|
||||
if (split) \
|
||||
{ \
|
||||
(c).Real = (s1)[p] ; \
|
||||
(c).Imag = (s2)[p] ; \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
(c) = ((Entry *)(s1))[p] ; \
|
||||
} \
|
||||
}*/
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
#define CONJ(a, x) \
|
||||
{ \
|
||||
a.Real = x.Real ; \
|
||||
a.Imag = -x.Imag ; \
|
||||
}
|
||||
|
||||
/* c = 0 */
|
||||
#define CLEAR(c) \
|
||||
{ \
|
||||
(c).Real = 0. ; \
|
||||
(c).Imag = 0. ; \
|
||||
}
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
/* *p++ = 0 */
|
||||
#define CLEAR_AND_INCREMENT(p) \
|
||||
{ \
|
||||
p->Real = 0. ; \
|
||||
p->Imag = 0. ; \
|
||||
p++ ; \
|
||||
}
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
/* True if a == 0 */
|
||||
#define IS_ZERO(a) \
|
||||
(SCALAR_IS_ZERO ((a).Real) && SCALAR_IS_ZERO ((a).Imag))
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
/* True if a is NaN */
|
||||
#define IS_NAN(a) \
|
||||
(SCALAR_IS_NAN ((a).Real) || SCALAR_IS_NAN ((a).Imag))
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
/* True if a != 0 */
|
||||
#define IS_NONZERO(a) \
|
||||
(SCALAR_IS_NONZERO ((a).Real) || SCALAR_IS_NONZERO ((a).Imag))
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
/* a = c/s */
|
||||
#define SCALE_DIV_ASSIGN(a,c,s) \
|
||||
{ \
|
||||
a.Real = c.Real / s ; \
|
||||
a.Imag = c.Imag / s ; \
|
||||
}
|
||||
|
||||
/* c /= s */
|
||||
#define SCALE_DIV(c,s) \
|
||||
{ \
|
||||
(c).Real /= (s) ; \
|
||||
(c).Imag /= (s) ; \
|
||||
}
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
/* c *= s */
|
||||
#define SCALE(c,s) \
|
||||
{ \
|
||||
(c).Real *= (s) ; \
|
||||
(c).Imag *= (s) ; \
|
||||
}
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
/* c += a */
|
||||
#define ASSEMBLE(c,a) \
|
||||
{ \
|
||||
(c).Real += (a).Real ; \
|
||||
(c).Imag += (a).Imag ; \
|
||||
}
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
/* c += *p++ */
|
||||
#define ASSEMBLE_AND_INCREMENT(c,p) \
|
||||
{ \
|
||||
(c).Real += p->Real ; \
|
||||
(c).Imag += p->Imag ; \
|
||||
p++ ; \
|
||||
}
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
/* c -= a */
|
||||
#define DECREMENT(c,a) \
|
||||
{ \
|
||||
(c).Real -= (a).Real ; \
|
||||
(c).Imag -= (a).Imag ; \
|
||||
}
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
/* c = a*b, assert because c cannot be the same as a or b */
|
||||
#define MULT(c,a,b) \
|
||||
{ \
|
||||
ASSERT (&(c) != &(a) && &(c) != &(b)) ; \
|
||||
(c).Real = (a).Real * (b).Real - (a).Imag * (b).Imag ; \
|
||||
(c).Imag = (a).Imag * (b).Real + (a).Real * (b).Imag ; \
|
||||
}
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
/* c = a*conjugate(b), assert because c cannot be the same as a or b */
|
||||
#define MULT_CONJ(c,a,b) \
|
||||
{ \
|
||||
ASSERT (&(c) != &(a) && &(c) != &(b)) ; \
|
||||
(c).Real = (a).Real * (b).Real + (a).Imag * (b).Imag ; \
|
||||
(c).Imag = (a).Imag * (b).Real - (a).Real * (b).Imag ; \
|
||||
}
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
/* c -= a*b, assert because c cannot be the same as a or b */
|
||||
#define MULT_SUB(c,a,b) \
|
||||
{ \
|
||||
ASSERT (&(c) != &(a) && &(c) != &(b)) ; \
|
||||
(c).Real -= (a).Real * (b).Real - (a).Imag * (b).Imag ; \
|
||||
(c).Imag -= (a).Imag * (b).Real + (a).Real * (b).Imag ; \
|
||||
}
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
/* c -= a*conjugate(b), assert because c cannot be the same as a or b */
|
||||
#define MULT_SUB_CONJ(c,a,b) \
|
||||
{ \
|
||||
ASSERT (&(c) != &(a) && &(c) != &(b)) ; \
|
||||
(c).Real -= (a).Real * (b).Real + (a).Imag * (b).Imag ; \
|
||||
(c).Imag -= (a).Imag * (b).Real - (a).Real * (b).Imag ; \
|
||||
}
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
/* c = a/b, be careful to avoid underflow and overflow */
|
||||
#ifdef MATHWORKS
|
||||
#define DIV(c,a,b) \
|
||||
{ \
|
||||
(void) utDivideComplex ((a).Real, (a).Imag, (b).Real, (b).Imag, \
|
||||
&((c).Real), &((c).Imag)) ; \
|
||||
}
|
||||
#else
|
||||
/* This uses ACM Algo 116, by R. L. Smith, 1962. */
|
||||
/* c can be the same variable as a or b. */
|
||||
/* Ignore NaN case for double relop br>=bi. */
|
||||
#define DIV(c,a,b) \
|
||||
{ \
|
||||
double r, den, ar, ai, br, bi ; \
|
||||
br = (b).Real ; \
|
||||
bi = (b).Imag ; \
|
||||
ar = (a).Real ; \
|
||||
ai = (a).Imag ; \
|
||||
if (SCALAR_ABS (br) >= SCALAR_ABS (bi)) \
|
||||
{ \
|
||||
r = bi / br ; \
|
||||
den = br + r * bi ; \
|
||||
(c).Real = (ar + ai * r) / den ; \
|
||||
(c).Imag = (ai - ar * r) / den ; \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
r = br / bi ; \
|
||||
den = r * br + bi ; \
|
||||
(c).Real = (ar * r + ai) / den ; \
|
||||
(c).Imag = (ai * r - ar) / den ; \
|
||||
} \
|
||||
}
|
||||
#endif
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
/* c = 1/c, be careful to avoid underflow and overflow */
|
||||
/* Not used if MATHWORKS is defined. */
|
||||
/* This uses ACM Algo 116, by R. L. Smith, 1962. */
|
||||
/* Ignore NaN case for double relop cr>=ci. */
|
||||
#define RECIPROCAL(c) \
|
||||
{ \
|
||||
double r, den, cr, ci ; \
|
||||
cr = (c).Real ; \
|
||||
ci = (c).Imag ; \
|
||||
if (SCALAR_ABS (cr) >= SCALAR_ABS (ci)) \
|
||||
{ \
|
||||
r = ci / cr ; \
|
||||
den = cr + r * ci ; \
|
||||
(c).Real = 1.0 / den ; \
|
||||
(c).Imag = - r / den ; \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
r = cr / ci ; \
|
||||
den = r * cr + ci ; \
|
||||
(c).Real = r / den ; \
|
||||
(c).Imag = - 1.0 / den ; \
|
||||
} \
|
||||
}
|
||||
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
/* c = a/conjugate(b), be careful to avoid underflow and overflow */
|
||||
#ifdef MATHWORKS
|
||||
#define DIV_CONJ(c,a,b) \
|
||||
{ \
|
||||
(void) utDivideComplex ((a).Real, (a).Imag, (b).Real, (-(b).Imag), \
|
||||
&((c).Real), &((c).Imag)) ; \
|
||||
}
|
||||
#else
|
||||
/* This uses ACM Algo 116, by R. L. Smith, 1962. */
|
||||
/* c can be the same variable as a or b. */
|
||||
/* Ignore NaN case for double relop br>=bi. */
|
||||
#define DIV_CONJ(c,a,b) \
|
||||
{ \
|
||||
double r, den, ar, ai, br, bi ; \
|
||||
br = (b).Real ; \
|
||||
bi = (b).Imag ; \
|
||||
ar = (a).Real ; \
|
||||
ai = (a).Imag ; \
|
||||
if (SCALAR_ABS (br) >= SCALAR_ABS (bi)) \
|
||||
{ \
|
||||
r = (-bi) / br ; \
|
||||
den = br - r * bi ; \
|
||||
(c).Real = (ar + ai * r) / den ; \
|
||||
(c).Imag = (ai - ar * r) / den ; \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
r = br / (-bi) ; \
|
||||
den = r * br - bi; \
|
||||
(c).Real = (ar * r + ai) / den ; \
|
||||
(c).Imag = (ai * r - ar) / den ; \
|
||||
} \
|
||||
}
|
||||
#endif
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
/* approximate absolute value, s = |r|+|i| */
|
||||
#define APPROX_ABS(s,a) \
|
||||
{ \
|
||||
(s) = SCALAR_ABS ((a).Real) + SCALAR_ABS ((a).Imag) ; \
|
||||
}
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
/* exact absolute value, s = sqrt (a.real^2 + amag^2) */
|
||||
#ifdef MATHWORKS
|
||||
#define ABS(s,a) \
|
||||
{ \
|
||||
(s) = utFdlibm_hypot ((a).Real, (a).Imag) ; \
|
||||
}
|
||||
#else
|
||||
/* Ignore NaN case for the double relops ar>=ai and ar+ai==ar. */
|
||||
#define ABS(s,a) \
|
||||
{ \
|
||||
double r, ar, ai ; \
|
||||
ar = SCALAR_ABS ((a).Real) ; \
|
||||
ai = SCALAR_ABS ((a).Imag) ; \
|
||||
if (ar >= ai) \
|
||||
{ \
|
||||
if (ar + ai == ar) \
|
||||
{ \
|
||||
(s) = ar ; \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
r = ai / ar ; \
|
||||
(s) = ar * sqrt (1.0 + r*r) ; \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
if (ai + ar == ai) \
|
||||
{ \
|
||||
(s) = ai ; \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
r = ar / ai ; \
|
||||
(s) = ai * sqrt (1.0 + r*r) ; \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
#endif
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
/* print an entry (avoid printing "-0" for negative zero). */
|
||||
#ifdef NPRINT
|
||||
#define PRINT_ENTRY(a)
|
||||
#else
|
||||
#define PRINT_ENTRY(a) \
|
||||
{ \
|
||||
if (SCALAR_IS_NONZERO ((a).Real)) \
|
||||
{ \
|
||||
PRINTF ((" (%g", (a).Real)) ; \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
PRINTF ((" (0")) ; \
|
||||
} \
|
||||
if (SCALAR_IS_LTZERO ((a).Imag)) \
|
||||
{ \
|
||||
PRINTF ((" - %gi)", -(a).Imag)) ; \
|
||||
} \
|
||||
else if (SCALAR_IS_ZERO ((a).Imag)) \
|
||||
{ \
|
||||
PRINTF ((" + 0i)")) ; \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
PRINTF ((" + %gi)", (a).Imag)) ; \
|
||||
} \
|
||||
}
|
||||
#endif
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
#endif /* #ifndef COMPLEX */
|
||||
|
||||
#endif
|
||||
Loading…
Reference in New Issue