slu_util.h
Go to the documentation of this file.
1 
11 #ifndef __SUPERLU_UTIL /* allow multiple inclusions */
12 #define __SUPERLU_UTIL
13 
14 #include <stdio.h>
15 #include <stdlib.h>
16 #include <string.h>
17 /*
18 #ifndef __STDC__
19 #include <malloc.h>
20 #endif
21 */
22 #include <assert.h>
23 #include "superlu_enum_consts.h"
24 
25 /***********************************************************************
26  * Macros
27  ***********************************************************************/
28 #define FIRSTCOL_OF_SNODE(i) (xsup[i])
29 /* No of marker arrays used in the symbolic factorization,
30  each of size n */
31 #define NO_MARKER 3
32 #define NUM_TEMPV(m,w,t,b) ( SUPERLU_MAX(m, (t + b)*w) )
33 
34 #ifndef USER_ABORT
35 #define USER_ABORT(msg) superlu_abort_and_exit(msg)
36 #endif
37 
38 #define ABORT(err_msg) \
39  { char msg[256];\
40  sprintf(msg,"%s at line %d in file %s\n",err_msg,__LINE__, __FILE__);\
41  USER_ABORT(msg); }
42 
43 
44 #ifndef USER_MALLOC
45 #if 1
46 #define USER_MALLOC(size) superlu_malloc(size)
47 #else
48 /* The following may check out some uninitialized data */
49 #define USER_MALLOC(size) memset (superlu_malloc(size), '\x0F', size)
50 #endif
51 #endif
52 
53 #define SUPERLU_MALLOC(size) USER_MALLOC(size)
54 
55 #ifndef USER_FREE
56 #define USER_FREE(addr) superlu_free(addr)
57 #endif
58 
59 #define SUPERLU_FREE(addr) USER_FREE(addr)
60 
61 #define CHECK_MALLOC(where) { \
62  extern int superlu_malloc_total; \
63  printf("%s: malloc_total %d Bytes\n", \
64  where, superlu_malloc_total); \
65 }
66 
67 #define SUPERLU_MAX(x, y) ( (x) > (y) ? (x) : (y) )
68 #define SUPERLU_MIN(x, y) ( (x) < (y) ? (x) : (y) )
69 
70 /*********************************************************
71  * Macros used for easy access of sparse matrix entries. *
72  *********************************************************/
73 #define L_SUB_START(col) ( Lstore->rowind_colptr[col] )
74 #define L_SUB(ptr) ( Lstore->rowind[ptr] )
75 #define L_NZ_START(col) ( Lstore->nzval_colptr[col] )
76 #define L_FST_SUPC(superno) ( Lstore->sup_to_col[superno] )
77 #define U_NZ_START(col) ( Ustore->colptr[col] )
78 #define U_SUB(ptr) ( Ustore->rowind[ptr] )
79 
80 
81 /***********************************************************************
82  * Constants
83  ***********************************************************************/
84 #define EMPTY (-1)
85 /*#define NO (-1)*/
86 #define FALSE 0
87 #define TRUE 1
88 
89 #define NO_MEMTYPE 4 /* 0: lusup;
90  1: ucol;
91  2: lsub;
92  3: usub */
93 
94 #define GluIntArray(n) (5 * (n) + 5)
95 
96 /* Dropping rules */
97 #define NODROP ( 0x0000 )
98 #define DROP_BASIC ( 0x0001 ) /* ILU(tau) */
99 #define DROP_PROWS ( 0x0002 ) /* ILUTP: keep p maximum rows */
100 #define DROP_COLUMN ( 0x0004 ) /* ILUTP: for j-th column,
101  p = gamma * nnz(A(:,j)) */
102 #define DROP_AREA ( 0x0008 ) /* ILUTP: for j-th column, use
103  nnz(F(:,1:j)) / nnz(A(:,1:j))
104  to limit memory growth */
105 #define DROP_SECONDARY ( 0x000E ) /* PROWS | COLUMN | AREA */
106 #define DROP_DYNAMIC ( 0x0010 ) /* adaptive tau */
107 #define DROP_INTERP ( 0x0100 ) /* use interpolation */
108 
109 
110 #if 1
111 #define MILU_ALPHA (1.0e-2) /* multiple of drop_sum to be added to diagonal */
112 #else
113 #define MILU_ALPHA 1.0 /* multiple of drop_sum to be added to diagonal */
114 #endif
115 
116 
117 /***********************************************************************
118  * Type definitions
119  ***********************************************************************/
120 typedef float flops_t;
121 typedef unsigned char Logical;
122 
123 /*
124  *-- This contains the options used to control the solution process.
125  *
126  * Fact (fact_t)
127  * Specifies whether or not the factored form of the matrix
128  * A is supplied on entry, and if not, how the matrix A should
129  * be factorizaed.
130  * = DOFACT: The matrix A will be factorized from scratch, and the
131  * factors will be stored in L and U.
132  * = SamePattern: The matrix A will be factorized assuming
133  * that a factorization of a matrix with the same sparsity
134  * pattern was performed prior to this one. Therefore, this
135  * factorization will reuse column permutation vector
136  * ScalePermstruct->perm_c and the column elimination tree
137  * LUstruct->etree.
138  * = SamePattern_SameRowPerm: The matrix A will be factorized
139  * assuming that a factorization of a matrix with the same
140  * sparsity pattern and similar numerical values was performed
141  * prior to this one. Therefore, this factorization will reuse
142  * both row and column scaling factors R and C, both row and
143  * column permutation vectors perm_r and perm_c, and the
144  * data structure set up from the previous symbolic factorization.
145  * = FACTORED: On entry, L, U, perm_r and perm_c contain the
146  * factored form of A. If DiagScale is not NOEQUIL, the matrix
147  * A has been equilibrated with scaling factors R and C.
148  *
149  * Equil (yes_no_t)
150  * Specifies whether to equilibrate the system (scale A's row and
151  * columns to have unit norm).
152  *
153  * ColPerm (colperm_t)
154  * Specifies what type of column permutation to use to reduce fill.
155  * = NATURAL: use the natural ordering
156  * = MMD_ATA: use minimum degree ordering on structure of A'*A
157  * = MMD_AT_PLUS_A: use minimum degree ordering on structure of A'+A
158  * = COLAMD: use approximate minimum degree column ordering
159  * = MY_PERMC: use the ordering specified by the user
160  *
161  * Trans (trans_t)
162  * Specifies the form of the system of equations:
163  * = NOTRANS: A * X = B (No transpose)
164  * = TRANS: A**T * X = B (Transpose)
165  * = CONJ: A**H * X = B (Transpose)
166  *
167  * IterRefine (IterRefine_t)
168  * Specifies whether to perform iterative refinement.
169  * = NO: no iterative refinement
170  * = SLU_SINGLE: perform iterative refinement in single precision
171  * = SLU_DOUBLE: perform iterative refinement in double precision
172  * = SLU_EXTRA: perform iterative refinement in extra precision
173  *
174  * DiagPivotThresh (double, in [0.0, 1.0]) (only for sequential SuperLU)
175  * Specifies the threshold used for a diagonal entry to be an
176  * acceptable pivot.
177  *
178  * SymmetricMode (yest_no_t)
179  * Specifies whether to use symmetric mode. Symmetric mode gives
180  * preference to diagonal pivots, and uses an (A'+A)-based column
181  * permutation algorithm.
182  *
183  * PivotGrowth (yes_no_t)
184  * Specifies whether to compute the reciprocal pivot growth.
185  *
186  * ConditionNumber (ues_no_t)
187  * Specifies whether to compute the reciprocal condition number.
188  *
189  * RowPerm (rowperm_t) (only for SuperLU_DIST or ILU)
190  * Specifies whether to permute rows of the original matrix.
191  * = NO: not to permute the rows
192  * = LargeDiag: make the diagonal large relative to the off-diagonal
193  * = MY_PERMR: use the permutation given by the user
194  *
195  * ILU_DropRule (int)
196  * Specifies the dropping rule:
197  * = DROP_BASIC: Basic dropping rule, supernodal based ILUTP(tau).
198  * = DROP_PROWS: Supernodal based ILUTP(p,tau), p = gamma * nnz(A)/n.
199  * = DROP_COLUMN: Variant of ILUTP(p,tau), for j-th column,
200  * p = gamma * nnz(A(:,j)).
201  * = DROP_AREA: Variation of ILUTP, for j-th column, use
202  * nnz(F(:,1:j)) / nnz(A(:,1:j)) to control memory.
203  * = DROP_DYNAMIC: Modify the threshold tau during factorizaion:
204  * If nnz(L(:,1:j)) / nnz(A(:,1:j)) > gamma
205  * tau_L(j) := MIN(tau_0, tau_L(j-1) * 2);
206  * Otherwise
207  * tau_L(j) := MAX(tau_0, tau_L(j-1) / 2);
208  * tau_U(j) uses the similar rule.
209  * NOTE: the thresholds used by L and U are separate.
210  * = DROP_INTERP: Compute the second dropping threshold by
211  * interpolation instead of sorting (default).
212  * In this case, the actual fill ratio is not
213  * guaranteed to be smaller than gamma.
214  * Note: DROP_PROWS, DROP_COLUMN and DROP_AREA are mutually exclusive.
215  * ( Default: DROP_BASIC | DROP_AREA )
216  *
217  * ILU_DropTol (double)
218  * numerical threshold for dropping.
219  *
220  * ILU_FillFactor (double)
221  * Gamma in the secondary dropping.
222  *
223  * ILU_Norm (norm_t)
224  * Specify which norm to use to measure the row size in a
225  * supernode: infinity-norm, 1-norm, or 2-norm.
226  *
227  * ILU_FillTol (double)
228  * numerical threshold for zero pivot perturbation.
229  *
230  * ILU_MILU (milu_t)
231  * Specifies which version of MILU to use.
232  *
233  * ILU_MILU_Dim (double)
234  * Dimension of the PDE if available.
235  *
236  * ReplaceTinyPivot (yes_no_t) (only for SuperLU_DIST)
237  * Specifies whether to replace the tiny diagonals by
238  * sqrt(epsilon)*||A|| during LU factorization.
239  *
240  * SolveInitialized (yes_no_t) (only for SuperLU_DIST)
241  * Specifies whether the initialization has been performed to the
242  * triangular solve.
243  *
244  * RefineInitialized (yes_no_t) (only for SuperLU_DIST)
245  * Specifies whether the initialization has been performed to the
246  * sparse matrix-vector multiplication routine needed in iterative
247  * refinement.
248  *
249  * PrintStat (yes_no_t)
250  * Specifies whether to print the solver's statistics.
251  */
252 typedef struct {
253  fact_t Fact;
254  yes_no_t Equil;
255  colperm_t ColPerm;
256  trans_t Trans;
257  IterRefine_t IterRefine;
258  double DiagPivotThresh;
259  yes_no_t SymmetricMode;
260  yes_no_t PivotGrowth;
261  yes_no_t ConditionNumber;
262  rowperm_t RowPerm;
263  int ILU_DropRule;
264  double ILU_DropTol; /* threshold for dropping */
265  double ILU_FillFactor; /* gamma in the secondary dropping */
266  norm_t ILU_Norm; /* infinity-norm, 1-norm, or 2-norm */
267  double ILU_FillTol; /* threshold for zero pivot perturbation */
268  milu_t ILU_MILU;
269  double ILU_MILU_Dim; /* Dimension of PDE (if available) */
270  yes_no_t ParSymbFact;
271  yes_no_t ReplaceTinyPivot; /* used in SuperLU_DIST */
272  yes_no_t SolveInitialized;
273  yes_no_t RefineInitialized;
274  yes_no_t PrintStat;
275  int nnzL, nnzU; /* used to store nnzs for now */
276  int num_lookaheads; /* num of levels in look-ahead */
277  yes_no_t lookahead_etree; /* use etree computed from the
278  serial symbolic factorization */
279  yes_no_t SymPattern; /* symmetric factorization */
281 
283 typedef struct e_node {
284  int size; /* length of the memory that has been used */
285  void *mem; /* pointer to the new malloc'd store */
287 
288 typedef struct {
289  int size;
290  int used;
291  int top1; /* grow upward, relative to &array[0] */
292  int top2; /* grow downward */
293  void *array;
295 
296 typedef struct {
297  int *panel_histo; /* histogram of panel size distribution */
298  double *utime; /* running time at various phases */
299  flops_t *ops; /* operation count at various phases */
300  int TinyPivots; /* number of tiny pivots */
301  int RefineSteps; /* number of iterative refinement steps */
302  int expansions; /* number of memory expansions */
303 } SuperLUStat_t;
304 
305 typedef struct {
306  float for_lu;
307  float total_needed;
308 } mem_usage_t;
309 
310 
311 /***********************************************************************
312  * Prototypes
313  ***********************************************************************/
314 #ifdef __cplusplus
315 extern "C" {
316 #endif
317 
324 extern void get_perm_c(int, SuperMatrix *, int *);
327 extern void sp_preorder (superlu_options_t *, SuperMatrix*, int*, int*,
329 extern void superlu_abort_and_exit(char*);
330 extern void *superlu_malloc (size_t);
331 extern int *intMalloc (int);
332 extern int *intCalloc (int);
333 extern void superlu_free (void*);
334 extern void SetIWork (int, int, int, int *, int **, int **, int **,
335  int **, int **, int **, int **);
336 extern int sp_coletree (int *, int *, int *, int, int, int *);
337 extern void relax_snode (const int, int *, const int, int *, int *);
338 extern void heap_relax_snode (const int, int *, const int, int *, int *);
339 extern int mark_relax(int, int *, int *, int *, int *, int *, int *);
340 extern void ilu_relax_snode (const int, int *, const int, int *,
341  int *, int *);
342 extern void ilu_heap_relax_snode (const int, int *, const int, int *,
343  int *, int*);
344 extern void resetrep_col (const int, const int *, int *);
345 extern int spcoletree (int *, int *, int *, int, int, int *);
346 extern int *TreePostorder (int, int *);
347 extern double SuperLU_timer_ ();
348 extern int sp_ienv (int);
349 extern int lsame_ (char *, char *);
350 extern int xerbla_ (char *, int *);
351 extern void ifill (int *, int, int);
352 extern void snode_profile (int, int *);
353 extern void super_stats (int, int *);
354 extern void check_repfnz(int, int, int, int *);
355 extern void PrintSumm (char *, int, int, int);
356 extern void StatInit(SuperLUStat_t *);
357 extern void StatPrint (SuperLUStat_t *);
358 extern void StatFree(SuperLUStat_t *);
359 extern void print_panel_seg(int, int, int, int, int *, int *);
360 extern int print_int_vec(char *,int, int *);
361 extern int slu_PrintInt10(char *, int, int *);
362 
363 #ifdef __cplusplus
364  }
365 #endif
366 
367 #endif /* __SUPERLU_UTIL */
Scalar Scalar int size
Definition: benchVecAdd.cpp:17
std::array< T, N > array
Definition: EmulateArray.h:231
trans_t
Definition: oomph_superlu_4.3/superlu_enum_consts.h:21
milu_t
Definition: oomph_superlu_4.3/superlu_enum_consts.h:28
IterRefine_t
Definition: oomph_superlu_4.3/superlu_enum_consts.h:23
rowperm_t
Definition: oomph_superlu_4.3/superlu_enum_consts.h:18
colperm_t
Definition: oomph_superlu_4.3/superlu_enum_consts.h:19
fact_t
Definition: oomph_superlu_4.3/superlu_enum_consts.h:17
norm_t
Definition: oomph_superlu_4.3/superlu_enum_consts.h:27
yes_no_t
Definition: oomph_superlu_4.3/superlu_enum_consts.h:16
enum constants header file
void snode_profile(int, int *)
int spcoletree(int *, int *, int *, int, int, int *)
double SuperLU_timer_()
void superlu_free(void *)
void ilu_relax_snode(const int, int *, const int, int *, int *, int *)
int mark_relax(int, int *, int *, int *, int *, int *, int *)
void get_perm_c(int, SuperMatrix *, int *)
void Destroy_SuperMatrix_Store(SuperMatrix *)
unsigned char Logical
Definition: slu_util.h:115
void StatInit(SuperLUStat_t *)
void Destroy_CompCol_Matrix(SuperMatrix *)
int * TreePostorder(int, int *)
void PrintSumm(char *, int, int, int)
void set_default_options(superlu_options_t *options)
void print_panel_seg(int, int, int, int, int *, int *)
int lsame_(char *, char *)
void Destroy_CompCol_Permuted(SuperMatrix *)
int sp_ienv(int)
void * superlu_malloc(size_t)
int print_int_vec(char *, int, int *)
int xerbla_(char *, int *)
void Destroy_CompRow_Matrix(SuperMatrix *)
void check_repfnz(int, int, int, int *)
void Destroy_Dense_Matrix(SuperMatrix *)
void StatPrint(SuperLUStat_t *)
void relax_snode(const int, int *, const int, int *, int *)
float flops_t
Definition: slu_util.h:114
void ifill(int *, int, int)
void StatFree(SuperLUStat_t *)
void resetrep_col(const int, const int *, int *)
void superlu_abort_and_exit(char *)
void super_stats(int, int *)
int * intMalloc(int)
void ilu_heap_relax_snode(const int, int *, const int, int *, int *, int *)
void Destroy_SuperNode_Matrix(SuperMatrix *)
int slu_PrintInt10(char *, int, int *)
void heap_relax_snode(const int, int *, const int, int *, int *)
void sp_preorder(superlu_options_t *, SuperMatrix *, int *, int *, SuperMatrix *)
void SetIWork(int, int, int, int *, int **, int **, int **, int **, int **, int **, int **)
int * intCalloc(int)
int sp_coletree(int *, int *, int *, int, int, int *)
void ilu_set_default_options(superlu_options_t *options)
struct e_node ExpHeader
Headers for 4 types of dynamatically managed memory.
Definition: slu_util.h:282
Definition: slu_util.h:290
Definition: oomph_superlu_4.3/supermatrix.h:43
Headers for 4 types of dynamatically managed memory.
Definition: slu_util.h:277
int size
Definition: slu_util.h:278
void * mem
Definition: slu_util.h:279
Definition: slu_util.h:299
Definition: slu_util.h:246