1 /* ========================================================================= */ 2 /* === AMD_2 =============================================================== */ 3 /* ========================================================================= */ 4 5 /* ------------------------------------------------------------------------- */ 6 /* AMD, Copyright (c) Timothy A. Davis, */ 7 /* Patrick R. Amestoy, and Iain S. Duff. See ../README.txt for License. */ 8 /* email: DrTimothyAldenDavis@gmail.com */ 9 /* ------------------------------------------------------------------------- */ 10 11 /* AMD_2: performs the AMD ordering on a symmetric sparse matrix A, followed 12 * by a postordering (via depth-first search) of the assembly tree using the 13 * AMD_postorder routine. 14 */ 15 16 module amd_2; 17 18 nothrow @nogc extern(C): 19 20 import glob_opts; 21 import SuiteSparse_config; 22 import amd_internal; 23 import amd; 24 import amd_postorder; 25 import core.stdc.math; 26 27 /* ========================================================================= */ 28 /* === clear_flag ========================================================== */ 29 /* ========================================================================= */ 30 31 static Int clear_flag (Int wflg, Int wbig, Int *W, Int n) 32 { 33 Int x ; 34 if (wflg < 2 || wflg >= wbig) 35 { 36 for (x = 0 ; x < n ; x++) 37 { 38 if (W [x] != 0) W [x] = 1 ; 39 } 40 wflg = 2 ; 41 } 42 /* at this point, W [0..n-1] < wflg holds */ 43 return (wflg) ; 44 } 45 46 47 /* ========================================================================= */ 48 /* === AMD_2 =============================================================== */ 49 /* ========================================================================= */ 50 51 52 /* amd_2 is the primary AMD ordering routine. It is not meant to be 53 * user-callable because of its restrictive inputs and because it destroys 54 * the user's input matrix. It does not check its inputs for errors, either. 55 * However, if you can work with these restrictions it can be faster than 56 * amd_order and use less memory (assuming that you can create your own copy 57 * of the matrix for AMD to destroy). Refer to AMD/Source/amd_2.c for a 58 * description of each parameter. */ 59 60 61 /* 62 * Given a representation of the nonzero pattern of a symmetric matrix, A, 63 * (excluding the diagonal) perform an approximate minimum (UMFPACK/MA38-style) 64 * degree ordering to compute a pivot order such that the introduction of 65 * nonzeros (fill-in) in the Cholesky factors A = LL' is kept low. At each 66 * step, the pivot selected is the one with the minimum UMFAPACK/MA38-style 67 * upper-bound on the external degree. This routine can optionally perform 68 * aggresive absorption (as done by MC47B in the Harwell Subroutine 69 * Library). 70 * 71 * The approximate degree algorithm implemented here is the symmetric analog of 72 * the degree update algorithm in MA38 and UMFPACK (the Unsymmetric-pattern 73 * MultiFrontal PACKage, both by Davis and Duff). The routine is based on the 74 * MA27 minimum degree ordering algorithm by Iain Duff and John Reid. 75 * 76 * This routine is a translation of the original AMDBAR and MC47B routines, 77 * in Fortran, with the following modifications: 78 * 79 * (1) dense rows/columns are removed prior to ordering the matrix, and placed 80 * last in the output order. The presence of a dense row/column can 81 * increase the ordering time by up to O(n^2), unless they are removed 82 * prior to ordering. 83 * 84 * (2) the minimum degree ordering is followed by a postordering (depth-first 85 * search) of the assembly tree. Note that mass elimination (discussed 86 * below) combined with the approximate degree update can lead to the mass 87 * elimination of nodes with lower exact degree than the current pivot 88 * element. No additional fill-in is caused in the representation of the 89 * Schur complement. The mass-eliminated nodes merge with the current 90 * pivot element. They are ordered prior to the current pivot element. 91 * Because they can have lower exact degree than the current element, the 92 * merger of two or more of these nodes in the current pivot element can 93 * lead to a single element that is not a "fundamental supernode". The 94 * diagonal block can have zeros in it. Thus, the assembly tree used here 95 * is not guaranteed to be the precise supernodal elemination tree (with 96 * "funadmental" supernodes), and the postordering performed by this 97 * routine is not guaranteed to be a precise postordering of the 98 * elimination tree. 99 * 100 * (3) input parameters are added, to control aggressive absorption and the 101 * detection of "dense" rows/columns of A. 102 * 103 * (4) additional statistical information is returned, such as the number of 104 * nonzeros in L, and the flop counts for subsequent LDL' and LU 105 * factorizations. These are slight upper bounds, because of the mass 106 * elimination issue discussed above. 107 * 108 * (5) additional routines are added to interface this routine to MATLAB 109 * to provide a simple C-callable user-interface, to check inputs for 110 * errors, compute the symmetry of the pattern of A and the number of 111 * nonzeros in each row/column of A+A', to compute the pattern of A+A', 112 * to perform the assembly tree postordering, and to provide debugging 113 * ouput. Many of these functions are also provided by the Fortran 114 * Harwell Subroutine Library routine MC47A. 115 * 116 * (6) both int and SuiteSparse_long versions are provided. In the 117 * descriptions below and integer is and int or SuiteSparse_long depending 118 * on which version is being used. 119 120 ********************************************************************** 121 ***** CAUTION: ARGUMENTS ARE NOT CHECKED FOR ERRORS ON INPUT. ****** 122 ********************************************************************** 123 ** If you want error checking, a more versatile input format, and a ** 124 ** simpler user interface, use amd_order or amd_l_order instead. ** 125 ** This routine is not meant to be user-callable. ** 126 ********************************************************************** 127 128 * ---------------------------------------------------------------------------- 129 * References: 130 * ---------------------------------------------------------------------------- 131 * 132 * [1] Timothy A. Davis and Iain Duff, "An unsymmetric-pattern multifrontal 133 * method for sparse LU factorization", SIAM J. Matrix Analysis and 134 * Applications, vol. 18, no. 1, pp. 140-158. Discusses UMFPACK / MA38, 135 * which first introduced the approximate minimum degree used by this 136 * routine. 137 * 138 * [2] Patrick Amestoy, Timothy A. Davis, and Iain S. Duff, "An approximate 139 * minimum degree ordering algorithm," SIAM J. Matrix Analysis and 140 * Applications, vol. 17, no. 4, pp. 886-905, 1996. Discusses AMDBAR and 141 * MC47B, which are the Fortran versions of this routine. 142 * 143 * [3] Alan George and Joseph Liu, "The evolution of the minimum degree 144 * ordering algorithm," SIAM Review, vol. 31, no. 1, pp. 1-19, 1989. 145 * We list below the features mentioned in that paper that this code 146 * includes: 147 * 148 * mass elimination: 149 * Yes. MA27 relied on supervariable detection for mass elimination. 150 * 151 * indistinguishable nodes: 152 * Yes (we call these "supervariables"). This was also in the MA27 153 * code - although we modified the method of detecting them (the 154 * previous hash was the true degree, which we no longer keep track 155 * of). A supervariable is a set of rows with identical nonzero 156 * pattern. All variables in a supervariable are eliminated together. 157 * Each supervariable has as its numerical name that of one of its 158 * variables (its principal variable). 159 * 160 * quotient graph representation: 161 * Yes. We use the term "element" for the cliques formed during 162 * elimination. This was also in the MA27 code. The algorithm can 163 * operate in place, but it will work more efficiently if given some 164 * "elbow room." 165 * 166 * element absorption: 167 * Yes. This was also in the MA27 code. 168 * 169 * external degree: 170 * Yes. The MA27 code was based on the true degree. 171 * 172 * incomplete degree update and multiple elimination: 173 * No. This was not in MA27, either. Our method of degree update 174 * within MC47B is element-based, not variable-based. It is thus 175 * not well-suited for use with incomplete degree update or multiple 176 * elimination. 177 * 178 * Authors, and Copyright (C) 2004 by: 179 * Timothy A. Davis, Patrick Amestoy, Iain S. Duff, John K. Reid. 180 * 181 * Acknowledgements: This work (and the UMFPACK package) was supported by the 182 * National Science Foundation (ASC-9111263, DMS-9223088, and CCR-0203270). 183 * The UMFPACK/MA38 approximate degree update algorithm, the unsymmetric analog 184 * which forms the basis of AMD, was developed while Tim Davis was supported by 185 * CERFACS (Toulouse, France) in a post-doctoral position. This C version, and 186 * the etree postorder, were written while Tim Davis was on sabbatical at 187 * Stanford University and Lawrence Berkeley National Laboratory. 188 189 * ---------------------------------------------------------------------------- 190 * INPUT ARGUMENTS (unaltered): 191 * ---------------------------------------------------------------------------- 192 193 * n: The matrix order. Restriction: n >= 1. 194 * 195 * iwlen: The size of the Iw array. On input, the matrix is stored in 196 * Iw [0..pfree-1]. However, Iw [0..iwlen-1] should be slightly larger 197 * than what is required to hold the matrix, at least iwlen >= pfree + n. 198 * Otherwise, excessive compressions will take place. The recommended 199 * value of iwlen is 1.2 * pfree + n, which is the value used in the 200 * user-callable interface to this routine (amd_order.c). The algorithm 201 * will not run at all if iwlen < pfree. Restriction: iwlen >= pfree + n. 202 * Note that this is slightly more restrictive than the actual minimum 203 * (iwlen >= pfree), but AMD_2 will be very slow with no elbow room. 204 * Thus, this routine enforces a bare minimum elbow room of size n. 205 * 206 * pfree: On input the tail end of the array, Iw [pfree..iwlen-1], is empty, 207 * and the matrix is stored in Iw [0..pfree-1]. During execution, 208 * additional data is placed in Iw, and pfree is modified so that 209 * Iw [pfree..iwlen-1] is always the unused part of Iw. 210 * 211 * Control: A c_float array of size AMD_CONTROL containing input parameters 212 * that affect how the ordering is computed. If NULL, then default 213 * settings are used. 214 * 215 * Control [AMD_DENSE] is used to determine whether or not a given input 216 * row is "dense". A row is "dense" if the number of entries in the row 217 * exceeds Control [AMD_DENSE] times sqrt (n), except that rows with 16 or 218 * fewer entries are never considered "dense". To turn off the detection 219 * of dense rows, set Control [AMD_DENSE] to a negative number, or to a 220 * number larger than sqrt (n). The default value of Control [AMD_DENSE] 221 * is AMD_DEFAULT_DENSE, which is defined in amd.h as 10. 222 * 223 * Control [AMD_AGGRESSIVE] is used to determine whether or not aggressive 224 * absorption is to be performed. If nonzero, then aggressive absorption 225 * is performed (this is the default). 226 227 * ---------------------------------------------------------------------------- 228 * INPUT/OUPUT ARGUMENTS: 229 * ---------------------------------------------------------------------------- 230 * 231 * Pe: An integer array of size n. On input, Pe [i] is the index in Iw of 232 * the start of row i. Pe [i] is ignored if row i has no off-diagonal 233 * entries. Thus Pe [i] must be in the range 0 to pfree-1 for non-empty 234 * rows. 235 * 236 * During execution, it is used for both supervariables and elements: 237 * 238 * Principal supervariable i: index into Iw of the description of 239 * supervariable i. A supervariable represents one or more rows of 240 * the matrix with identical nonzero pattern. In this case, 241 * Pe [i] >= 0. 242 * 243 * Non-principal supervariable i: if i has been absorbed into another 244 * supervariable j, then Pe [i] = FLIP (j), where FLIP (j) is defined 245 * as (-(j)-2). Row j has the same pattern as row i. Note that j 246 * might later be absorbed into another supervariable j2, in which 247 * case Pe [i] is still FLIP (j), and Pe [j] = FLIP (j2) which is 248 * < EMPTY, where EMPTY is defined as (-1) in amd_internal.h. 249 * 250 * Unabsorbed element e: the index into Iw of the description of element 251 * e, if e has not yet been absorbed by a subsequent element. Element 252 * e is created when the supervariable of the same name is selected as 253 * the pivot. In this case, Pe [i] >= 0. 254 * 255 * Absorbed element e: if element e is absorbed into element e2, then 256 * Pe [e] = FLIP (e2). This occurs when the pattern of e (which we 257 * refer to as Le) is found to be a subset of the pattern of e2 (that 258 * is, Le2). In this case, Pe [i] < EMPTY. If element e is "null" 259 * (it has no nonzeros outside its pivot block), then Pe [e] = EMPTY, 260 * and e is the root of an assembly subtree (or the whole tree if 261 * there is just one such root). 262 * 263 * Dense variable i: if i is "dense", then Pe [i] = EMPTY. 264 * 265 * On output, Pe holds the assembly tree/forest, which implicitly 266 * represents a pivot order with identical fill-in as the actual order 267 * (via a depth-first search of the tree), as follows. If Nv [i] > 0, 268 * then i represents a node in the assembly tree, and the parent of i is 269 * Pe [i], or EMPTY if i is a root. If Nv [i] = 0, then (i, Pe [i]) 270 * represents an edge in a subtree, the root of which is a node in the 271 * assembly tree. Note that i refers to a row/column in the original 272 * matrix, not the permuted matrix. 273 * 274 * Info: A c_float array of size AMD_INFO. If present, (that is, not NULL), 275 * then statistics about the ordering are returned in the Info array. 276 * See amd.h for a description. 277 278 * ---------------------------------------------------------------------------- 279 * INPUT/MODIFIED (undefined on output): 280 * ---------------------------------------------------------------------------- 281 * 282 * Len: An integer array of size n. On input, Len [i] holds the number of 283 * entries in row i of the matrix, excluding the diagonal. The contents 284 * of Len are undefined on output. 285 * 286 * Iw: An integer array of size iwlen. On input, Iw [0..pfree-1] holds the 287 * description of each row i in the matrix. The matrix must be symmetric, 288 * and both upper and lower triangular parts must be present. The 289 * diagonal must not be present. Row i is held as follows: 290 * 291 * Len [i]: the length of the row i data structure in the Iw array. 292 * Iw [Pe [i] ... Pe [i] + Len [i] - 1]: 293 * the list of column indices for nonzeros in row i (simple 294 * supervariables), excluding the diagonal. All supervariables 295 * start with one row/column each (supervariable i is just row i). 296 * If Len [i] is zero on input, then Pe [i] is ignored on input. 297 * 298 * Note that the rows need not be in any particular order, and there 299 * may be empty space between the rows. 300 * 301 * During execution, the supervariable i experiences fill-in. This is 302 * represented by placing in i a list of the elements that cause fill-in 303 * in supervariable i: 304 * 305 * Len [i]: the length of supervariable i in the Iw array. 306 * Iw [Pe [i] ... Pe [i] + Elen [i] - 1]: 307 * the list of elements that contain i. This list is kept short 308 * by removing absorbed elements. 309 * Iw [Pe [i] + Elen [i] ... Pe [i] + Len [i] - 1]: 310 * the list of supervariables in i. This list is kept short by 311 * removing nonprincipal variables, and any entry j that is also 312 * contained in at least one of the elements (j in Le) in the list 313 * for i (e in row i). 314 * 315 * When supervariable i is selected as pivot, we create an element e of 316 * the same name (e=i): 317 * 318 * Len [e]: the length of element e in the Iw array. 319 * Iw [Pe [e] ... Pe [e] + Len [e] - 1]: 320 * the list of supervariables in element e. 321 * 322 * An element represents the fill-in that occurs when supervariable i is 323 * selected as pivot (which represents the selection of row i and all 324 * non-principal variables whose principal variable is i). We use the 325 * term Le to denote the set of all supervariables in element e. Absorbed 326 * supervariables and elements are pruned from these lists when 327 * computationally convenient. 328 * 329 * CAUTION: THE INPUT MATRIX IS OVERWRITTEN DURING COMPUTATION. 330 * The contents of Iw are undefined on output. 331 332 * ---------------------------------------------------------------------------- 333 * OUTPUT (need not be set on input): 334 * ---------------------------------------------------------------------------- 335 * 336 * Nv: An integer array of size n. During execution, ABS (Nv [i]) is equal to 337 * the number of rows that are represented by the principal supervariable 338 * i. If i is a nonprincipal or dense variable, then Nv [i] = 0. 339 * Initially, Nv [i] = 1 for all i. Nv [i] < 0 signifies that i is a 340 * principal variable in the pattern Lme of the current pivot element me. 341 * After element me is constructed, Nv [i] is set back to a positive 342 * value. 343 * 344 * On output, Nv [i] holds the number of pivots represented by super 345 * row/column i of the original matrix, or Nv [i] = 0 for non-principal 346 * rows/columns. Note that i refers to a row/column in the original 347 * matrix, not the permuted matrix. 348 * 349 * Elen: An integer array of size n. See the description of Iw above. At the 350 * start of execution, Elen [i] is set to zero for all rows i. During 351 * execution, Elen [i] is the number of elements in the list for 352 * supervariable i. When e becomes an element, Elen [e] = FLIP (esize) is 353 * set, where esize is the size of the element (the number of pivots, plus 354 * the number of nonpivotal entries). Thus Elen [e] < EMPTY. 355 * Elen (i) = EMPTY set when variable i becomes nonprincipal. 356 * 357 * For variables, Elen (i) >= EMPTY holds until just before the 358 * postordering and permutation vectors are computed. For elements, 359 * Elen [e] < EMPTY holds. 360 * 361 * On output, Elen [i] is the degree of the row/column in the Cholesky 362 * factorization of the permuted matrix, corresponding to the original row 363 * i, if i is a super row/column. It is equal to EMPTY if i is 364 * non-principal. Note that i refers to a row/column in the original 365 * matrix, not the permuted matrix. 366 * 367 * Note that the contents of Elen on output differ from the Fortran 368 * version (Elen holds the inverse permutation in the Fortran version, 369 * which is instead returned in the Next array in this C version, 370 * described below). 371 * 372 * Last: In a degree list, Last [i] is the supervariable preceding i, or EMPTY 373 * if i is the head of the list. In a hash bucket, Last [i] is the hash 374 * key for i. 375 * 376 * Last [Head [hash]] is also used as the head of a hash bucket if 377 * Head [hash] contains a degree list (see the description of Head, 378 * below). 379 * 380 * On output, Last [0..n-1] holds the permutation. That is, if 381 * i = Last [k], then row i is the kth pivot row (where k ranges from 0 to 382 * n-1). Row Last [k] of A is the kth row in the permuted matrix, PAP'. 383 * 384 * Next: Next [i] is the supervariable following i in a link list, or EMPTY if 385 * i is the last in the list. Used for two kinds of lists: degree lists 386 * and hash buckets (a supervariable can be in only one kind of list at a 387 * time). 388 * 389 * On output Next [0..n-1] holds the inverse permutation. That is, if 390 * k = Next [i], then row i is the kth pivot row. Row i of A appears as 391 * the (Next[i])-th row in the permuted matrix, PAP'. 392 * 393 * Note that the contents of Next on output differ from the Fortran 394 * version (Next is undefined on output in the Fortran version). 395 396 * ---------------------------------------------------------------------------- 397 * LOCAL WORKSPACE (not input or output - used only during execution): 398 * ---------------------------------------------------------------------------- 399 * 400 * Degree: An integer array of size n. If i is a supervariable, then 401 * Degree [i] holds the current approximation of the external degree of 402 * row i (an upper bound). The external degree is the number of nonzeros 403 * in row i, minus ABS (Nv [i]), the diagonal part. The bound is equal to 404 * the exact external degree if Elen [i] is less than or equal to two. 405 * 406 * We also use the term "external degree" for elements e to refer to 407 * |Le \ Lme|. If e is an element, then Degree [e] is |Le|, which is the 408 * degree of the off-diagonal part of the element e (not including the 409 * diagonal part). 410 * 411 * Head: An integer array of size n. Head is used for degree lists. 412 * Head [deg] is the first supervariable in a degree list. All 413 * supervariables i in a degree list Head [deg] have the same approximate 414 * degree, namely, deg = Degree [i]. If the list Head [deg] is empty then 415 * Head [deg] = EMPTY. 416 * 417 * During supervariable detection Head [hash] also serves as a pointer to 418 * a hash bucket. If Head [hash] >= 0, there is a degree list of degree 419 * hash. The hash bucket head pointer is Last [Head [hash]]. If 420 * Head [hash] = EMPTY, then the degree list and hash bucket are both 421 * empty. If Head [hash] < EMPTY, then the degree list is empty, and 422 * FLIP (Head [hash]) is the head of the hash bucket. After supervariable 423 * detection is complete, all hash buckets are empty, and the 424 * (Last [Head [hash]] = EMPTY) condition is restored for the non-empty 425 * degree lists. 426 * 427 * W: An integer array of size n. The flag array W determines the status of 428 * elements and variables, and the external degree of elements. 429 * 430 * for elements: 431 * if W [e] = 0, then the element e is absorbed. 432 * if W [e] >= wflg, then W [e] - wflg is the size of the set 433 * |Le \ Lme|, in terms of nonzeros (the sum of ABS (Nv [i]) for 434 * each principal variable i that is both in the pattern of 435 * element e and NOT in the pattern of the current pivot element, 436 * me). 437 * if wflg > W [e] > 0, then e is not absorbed and has not yet been 438 * seen in the scan of the element lists in the computation of 439 * |Le\Lme| in Scan 1 below. 440 * 441 * for variables: 442 * during supervariable detection, if W [j] != wflg then j is 443 * not in the pattern of variable i. 444 * 445 * The W array is initialized by setting W [i] = 1 for all i, and by 446 * setting wflg = 2. It is reinitialized if wflg becomes too large (to 447 * ensure that wflg+n does not cause integer overflow). 448 449 * ---------------------------------------------------------------------------- 450 * LOCAL INTEGERS: 451 * ---------------------------------------------------------------------------- 452 */ 453 454 455 version (DLONG){ 456 void AMD_2 457 ( 458 SuiteSparse_long n, /* A is n-by-n, where n > 0 */ 459 SuiteSparse_long *Pe, /* Pe [0..n-1]: index in Iw of row i on input */ 460 SuiteSparse_long *Iw, /* workspace of size iwlen. Iw [0..pfree-1] holds the matrix on input */ 461 SuiteSparse_long *Len, /* Len [0..n-1]: length for row/column i on input */ 462 SuiteSparse_long iwlen, /* length of Iw. iwlen >= pfree + n */ 463 SuiteSparse_long pfree, /* Iw [pfree ... iwlen-1] is empty on input */ 464 /* 7 size-n workspaces, not defined on input: */ 465 SuiteSparse_long *Nv, /* the size of each supernode on output */ 466 SuiteSparse_long *Next, /* the output inverse permutation */ 467 SuiteSparse_long *Last, /* the output permutation */ 468 SuiteSparse_long *Head, 469 SuiteSparse_long *Elen,/* the size columns of L for each supernode */ 470 SuiteSparse_long *Degree, 471 SuiteSparse_long *W, 472 /* control parameters and output statistics */ 473 c_float *Control, /* array of size AMD_CONTROL */ 474 c_float *Info /* array of size AMD_INFO */ 475 ) 476 { 477 478 Int deg; 479 Int degme; 480 Int dext; 481 Int lemax; 482 Int e; 483 Int elenme; 484 Int eln; 485 Int i; 486 Int ilast; 487 Int inext; 488 Int j; 489 Int jlast; 490 Int jnext; 491 Int k; 492 Int knt1; 493 Int knt2; 494 Int knt3; 495 Int lenj; 496 Int ln; 497 Int me; 498 Int mindeg; 499 Int nel; 500 Int nleft; 501 Int nvi; 502 Int nvj; 503 Int nvpiv; 504 Int slenme; 505 Int wbig; 506 Int we; 507 Int wflg; 508 Int wnvi; 509 Int ok; 510 Int ndense; 511 Int ncmpa; 512 Int dense; 513 Int aggressive; 514 515 //unsigned Int hash ; /* unsigned, so that hash % n is well defined.*/ 516 UInt hash ; /* unsigned, so that hash % n is well defined.*/ 517 518 /* 519 * deg: the degree of a variable or element 520 * degme: size, |Lme|, of the current element, me (= Degree [me]) 521 * dext: external degree, |Le \ Lme|, of some element e 522 * lemax: largest |Le| seen so far (called dmax in Fortran version) 523 * e: an element 524 * elenme: the length, Elen [me], of element list of pivotal variable 525 * eln: the length, Elen [...], of an element list 526 * hash: the computed value of the hash function 527 * i: a supervariable 528 * ilast: the entry in a link list preceding i 529 * inext: the entry in a link list following i 530 * j: a supervariable 531 * jlast: the entry in a link list preceding j 532 * jnext: the entry in a link list, or path, following j 533 * k: the pivot order of an element or variable 534 * knt1: loop counter used during element construction 535 * knt2: loop counter used during element construction 536 * knt3: loop counter used during compression 537 * lenj: Len [j] 538 * ln: length of a supervariable list 539 * me: current supervariable being eliminated, and the current 540 * element created by eliminating that supervariable 541 * mindeg: current minimum degree 542 * nel: number of pivots selected so far 543 * nleft: n - nel, the number of nonpivotal rows/columns remaining 544 * nvi: the number of variables in a supervariable i (= Nv [i]) 545 * nvj: the number of variables in a supervariable j (= Nv [j]) 546 * nvpiv: number of pivots in current element 547 * slenme: number of variables in variable list of pivotal variable 548 * wbig: = (INT_MAX - n) for the int version, (SuiteSparse_long_max - n) 549 * for the SuiteSparse_long version. wflg is not allowed to 550 * be >= wbig. 551 * we: W [e] 552 * wflg: used for flagging the W array. See description of Iw. 553 * wnvi: wflg - Nv [i] 554 * x: either a supervariable or an element 555 * 556 * ok: true if supervariable j can be absorbed into i 557 * ndense: number of "dense" rows/columns 558 * dense: rows/columns with initial degree > dense are considered "dense" 559 * aggressive: true if aggressive absorption is being performed 560 * ncmpa: number of garbage collections 561 562 * ---------------------------------------------------------------------------- 563 * LOCAL DOUBLES, used for statistical output only (except for alpha): 564 * ---------------------------------------------------------------------------- 565 */ 566 567 c_float f, r, ndiv, s, nms_lu, nms_ldl, dmax, alpha, lnz, lnzme ; 568 569 /* 570 * f: nvpiv 571 * r: degme + nvpiv 572 * ndiv: number of divisions for LU or LDL' factorizations 573 * s: number of multiply-subtract pairs for LU factorization, for the 574 * current element me 575 * nms_lu number of multiply-subtract pairs for LU factorization 576 * nms_ldl number of multiply-subtract pairs for LDL' factorization 577 * dmax: the largest number of entries in any column of L, including the 578 * diagonal 579 * alpha: "dense" degree ratio 580 * lnz: the number of nonzeros in L (excluding the diagonal) 581 * lnzme: the number of nonzeros in L (excl. the diagonal) for the 582 * current element me 583 584 * ---------------------------------------------------------------------------- 585 * LOCAL "POINTERS" (indices into the Iw array) 586 * ---------------------------------------------------------------------------- 587 */ 588 589 Int p, p1, p2, p3, p4, pdst, pend, pj, pme, pme1, pme2, pn, psrc ; 590 591 /* 592 * Any parameter (Pe [...] or pfree) or local variable starting with "p" (for 593 * Pointer) is an index into Iw, and all indices into Iw use variables starting 594 * with "p." The only exception to this rule is the iwlen input argument. 595 * 596 * p: pointer into lots of things 597 * p1: Pe [i] for some variable i (start of element list) 598 * p2: Pe [i] + Elen [i] - 1 for some variable i 599 * p3: index of first supervariable in clean list 600 * p4: 601 * pdst: destination pointer, for compression 602 * pend: end of memory to compress 603 * pj: pointer into an element or variable 604 * pme: pointer into the current element (pme1...pme2) 605 * pme1: the current element, me, is stored in Iw [pme1...pme2] 606 * pme2: the end of the current element 607 * pn: pointer into a "clean" variable, also used to compress 608 * psrc: source pointer, for compression 609 */ 610 611 /* ========================================================================= */ 612 /* INITIALIZATIONS */ 613 /* ========================================================================= */ 614 615 /* Note that this restriction on iwlen is slightly more restrictive than 616 * what is actually required in AMD_2. AMD_2 can operate with no elbow 617 * room at all, but it will be slow. For better performance, at least 618 * size-n elbow room is enforced. */ 619 ASSERT (iwlen >= pfree + n) ; 620 ASSERT (n > 0) ; 621 622 /* initialize output statistics */ 623 lnz = 0 ; 624 ndiv = 0 ; 625 nms_lu = 0 ; 626 nms_ldl = 0 ; 627 dmax = 1 ; 628 me = EMPTY ; 629 630 mindeg = 0 ; 631 ncmpa = 0 ; 632 nel = 0 ; 633 lemax = 0 ; 634 635 /* get control parameters */ 636 if (Control != cast(c_float *) NULL) 637 { 638 alpha = Control [AMD_DENSE] ; 639 aggressive = (Control [AMD_AGGRESSIVE] != 0) ; 640 } 641 else 642 { 643 alpha = AMD_DEFAULT_DENSE ; 644 aggressive = AMD_DEFAULT_AGGRESSIVE ; 645 } 646 /* Note: if alpha is NaN, this is undefined: */ 647 if (alpha < 0) 648 { 649 /* only remove completely dense rows/columns */ 650 dense = n-2 ; 651 } 652 else 653 { 654 dense = cast(Int) (alpha * sqrt (cast(c_float) n)) ; 655 } 656 dense = MAX (16, dense) ; 657 dense = MIN (n, dense) ; 658 AMD_DEBUG1 ("\n\nAMD (debug), alpha %g, aggr. "~ID~"\n", alpha, aggressive); 659 660 for (i = 0 ; i < n ; i++) 661 { 662 Last [i] = EMPTY ; 663 Head [i] = EMPTY ; 664 Next [i] = EMPTY ; 665 /* if separate Hhead array is used for hash buckets: * 666 Hhead [i] = EMPTY ; 667 */ 668 Nv [i] = 1 ; 669 W [i] = 1 ; 670 Elen [i] = 0 ; 671 Degree [i] = Len [i] ; 672 } 673 674 version(NDEBUG){} 675 else { 676 AMD_DEBUG1 ("\n======Nel "~ID~" initial\n", nel); 677 AMD_dump (n, Pe, Iw, Len, iwlen, pfree, Nv, Next, Last, 678 Head, Elen, Degree, W, -1) ; 679 } // !NDEBUG 680 681 /* initialize wflg */ 682 wbig = Int_MAX - n ; 683 wflg = clear_flag (0, wbig, W, n) ; 684 685 /* --------------------------------------------------------------------- */ 686 /* initialize degree lists and eliminate dense and empty rows */ 687 /* --------------------------------------------------------------------- */ 688 689 ndense = 0 ; 690 691 for (i = 0 ; i < n ; i++) 692 { 693 deg = Degree [i] ; 694 ASSERT (deg >= 0 && deg < n) ; 695 if (deg == 0) 696 { 697 698 /* ------------------------------------------------------------- 699 * we have a variable that can be eliminated at once because 700 * there is no off-diagonal non-zero in its row. Note that 701 * Nv [i] = 1 for an empty variable i. It is treated just 702 * the same as an eliminated element i. 703 * ------------------------------------------------------------- */ 704 705 Elen [i] = FLIP (1) ; 706 nel++ ; 707 Pe [i] = EMPTY ; 708 W [i] = 0 ; 709 710 } 711 else if (deg > dense) 712 { 713 714 /* ------------------------------------------------------------- 715 * Dense variables are not treated as elements, but as unordered, 716 * non-principal variables that have no parent. They do not take 717 * part in the postorder, since Nv [i] = 0. Note that the Fortran 718 * version does not have this option. 719 * ------------------------------------------------------------- */ 720 721 AMD_DEBUG1 ("Dense node "~ID~" degree "~ID~"\n", i, deg); 722 ndense++ ; 723 Nv [i] = 0 ; /* do not postorder this node */ 724 Elen [i] = EMPTY ; 725 nel++ ; 726 Pe [i] = EMPTY ; 727 728 } 729 else 730 { 731 732 /* ------------------------------------------------------------- 733 * place i in the degree list corresponding to its degree 734 * ------------------------------------------------------------- */ 735 736 inext = Head [deg] ; 737 ASSERT (inext >= EMPTY && inext < n) ; 738 if (inext != EMPTY) Last [inext] = i ; 739 Next [i] = inext ; 740 Head [deg] = i ; 741 742 } 743 } 744 745 /* ========================================================================= */ 746 /* WHILE (selecting pivots) DO */ 747 /* ========================================================================= */ 748 749 while (nel < n) 750 { 751 752 version(NDEBUG){} 753 else { 754 AMD_DEBUG1 ("\n======Nel "~ID~"\n", nel) ; 755 //if (AMD_debug >= 2) 756 if (true) // todo : 757 { 758 AMD_dump (n, Pe, Iw, Len, iwlen, pfree, Nv, Next, 759 Last, Head, Elen, Degree, W, nel) ; 760 } 761 } // !NDEBUG 762 763 /* ========================================================================= */ 764 /* GET PIVOT OF MINIMUM DEGREE */ 765 /* ========================================================================= */ 766 767 /* ----------------------------------------------------------------- */ 768 /* find next supervariable for elimination */ 769 /* ----------------------------------------------------------------- */ 770 771 ASSERT (mindeg >= 0 && mindeg < n) ; 772 for (deg = mindeg ; deg < n ; deg++) 773 { 774 me = Head [deg] ; 775 if (me != EMPTY) break ; 776 } 777 mindeg = deg ; 778 ASSERT (me >= 0 && me < n) ; 779 AMD_DEBUG1 ("=================me: "~ID~"\n", me) ; 780 781 /* ----------------------------------------------------------------- */ 782 /* remove chosen variable from link list */ 783 /* ----------------------------------------------------------------- */ 784 785 inext = Next [me] ; 786 ASSERT (inext >= EMPTY && inext < n) ; 787 if (inext != EMPTY) Last [inext] = EMPTY ; 788 Head [deg] = inext ; 789 790 /* ----------------------------------------------------------------- */ 791 /* me represents the elimination of pivots nel to nel+Nv[me]-1. */ 792 /* place me itself as the first in this set. */ 793 /* ----------------------------------------------------------------- */ 794 795 elenme = Elen [me] ; 796 nvpiv = Nv [me] ; 797 ASSERT (nvpiv > 0) ; 798 nel += nvpiv ; 799 800 /* ========================================================================= */ 801 /* CONSTRUCT NEW ELEMENT */ 802 /* ========================================================================= */ 803 804 /* ----------------------------------------------------------------- 805 * At this point, me is the pivotal supervariable. It will be 806 * converted into the current element. Scan list of the pivotal 807 * supervariable, me, setting tree pointers and constructing new list 808 * of supervariables for the new element, me. p is a pointer to the 809 * current position in the old list. 810 * ----------------------------------------------------------------- */ 811 812 /* flag the variable "me" as being in Lme by negating Nv [me] */ 813 Nv [me] = -nvpiv ; 814 degme = 0 ; 815 ASSERT (Pe [me] >= 0 && Pe [me] < iwlen) ; 816 817 if (elenme == 0) 818 { 819 820 /* ------------------------------------------------------------- */ 821 /* construct the new element in place */ 822 /* ------------------------------------------------------------- */ 823 824 pme1 = Pe [me] ; 825 pme2 = pme1 - 1 ; 826 827 for (p = pme1 ; p <= pme1 + Len [me] - 1 ; p++) 828 { 829 i = Iw [p] ; 830 ASSERT (i >= 0 && i < n && Nv [i] >= 0) ; 831 nvi = Nv [i] ; 832 if (nvi > 0) 833 { 834 835 /* ----------------------------------------------------- */ 836 /* i is a principal variable not yet placed in Lme. */ 837 /* store i in new list */ 838 /* ----------------------------------------------------- */ 839 840 /* flag i as being in Lme by negating Nv [i] */ 841 degme += nvi ; 842 Nv [i] = -nvi ; 843 Iw [++pme2] = i ; 844 845 /* ----------------------------------------------------- */ 846 /* remove variable i from degree list. */ 847 /* ----------------------------------------------------- */ 848 849 ilast = Last [i] ; 850 inext = Next [i] ; 851 ASSERT (ilast >= EMPTY && ilast < n) ; 852 ASSERT (inext >= EMPTY && inext < n) ; 853 if (inext != EMPTY) Last [inext] = ilast ; 854 if (ilast != EMPTY) 855 { 856 Next [ilast] = inext ; 857 } 858 else 859 { 860 /* i is at the head of the degree list */ 861 ASSERT (Degree [i] >= 0 && Degree [i] < n) ; 862 Head [Degree [i]] = inext ; 863 } 864 } 865 } 866 } 867 else 868 { 869 870 /* ------------------------------------------------------------- */ 871 /* construct the new element in empty space, Iw [pfree ...] */ 872 /* ------------------------------------------------------------- */ 873 874 p = Pe [me] ; 875 pme1 = pfree ; 876 slenme = Len [me] - elenme ; 877 878 for (knt1 = 1 ; knt1 <= elenme + 1 ; knt1++) 879 { 880 881 if (knt1 > elenme) 882 { 883 /* search the supervariables in me. */ 884 e = me ; 885 pj = p ; 886 ln = slenme ; 887 AMD_DEBUG2 ("Search sv: "~ID~" "~ID~" "~ID~"\n", me,pj,ln); 888 } 889 else 890 { 891 /* search the elements in me. */ 892 e = Iw [p++] ; 893 ASSERT (e >= 0 && e < n) ; 894 pj = Pe [e] ; 895 ln = Len [e] ; 896 AMD_DEBUG2 ("Search element e "~ID~" in me "~ID~"\n", e,me); 897 ASSERT (Elen [e] < EMPTY && W [e] > 0 && pj >= 0) ; 898 } 899 ASSERT (ln >= 0 && (ln == 0 || (pj >= 0 && pj < iwlen))) ; 900 901 /* --------------------------------------------------------- 902 * search for different supervariables and add them to the 903 * new list, compressing when necessary. this loop is 904 * executed once for each element in the list and once for 905 * all the supervariables in the list. 906 * --------------------------------------------------------- */ 907 908 for (knt2 = 1 ; knt2 <= ln ; knt2++) 909 { 910 i = Iw [pj++] ; 911 ASSERT (i >= 0 && i < n && (i == me || Elen [i] >= EMPTY)); 912 nvi = Nv [i] ; 913 AMD_DEBUG2 (": "~ID~" "~ID~" "~ID~" "~ID~"\n", i, Elen [i], Nv [i], wflg); 914 915 if (nvi > 0) 916 { 917 918 /* ------------------------------------------------- */ 919 /* compress Iw, if necessary */ 920 /* ------------------------------------------------- */ 921 922 if (pfree >= iwlen) 923 { 924 925 AMD_DEBUG1 ("GARBAGE COLLECTION\n"); 926 927 /* prepare for compressing Iw by adjusting pointers 928 * and lengths so that the lists being searched in 929 * the inner and outer loops contain only the 930 * remaining entries. */ 931 932 Pe [me] = p ; 933 Len [me] -= knt1 ; 934 /* check if nothing left of supervariable me */ 935 if (Len [me] == 0) Pe [me] = EMPTY ; 936 Pe [e] = pj ; 937 Len [e] = ln - knt2 ; 938 /* nothing left of element e */ 939 if (Len [e] == 0) Pe [e] = EMPTY ; 940 941 ncmpa++ ; /* one more garbage collection */ 942 943 /* store first entry of each object in Pe */ 944 /* FLIP the first entry in each object */ 945 for (j = 0 ; j < n ; j++) 946 { 947 pn = Pe [j] ; 948 if (pn >= 0) 949 { 950 ASSERT (pn >= 0 && pn < iwlen) ; 951 Pe [j] = Iw [pn] ; 952 Iw [pn] = FLIP (j) ; 953 } 954 } 955 956 /* psrc/pdst point to source/destination */ 957 psrc = 0 ; 958 pdst = 0 ; 959 pend = pme1 - 1 ; 960 961 while (psrc <= pend) 962 { 963 /* search for next FLIP'd entry */ 964 j = FLIP (Iw [psrc++]) ; 965 if (j >= 0) 966 { 967 AMD_DEBUG2 ("Got object j: "~ID~"\n", j); 968 Iw [pdst] = Pe [j] ; 969 Pe [j] = pdst++ ; 970 lenj = Len [j] ; 971 /* copy from source to destination */ 972 for (knt3 = 0 ; knt3 <= lenj - 2 ; knt3++) 973 { 974 Iw [pdst++] = Iw [psrc++] ; 975 } 976 } 977 } 978 979 /* move the new partially-constructed element */ 980 p1 = pdst ; 981 for (psrc = pme1 ; psrc <= pfree-1 ; psrc++) 982 { 983 Iw [pdst++] = Iw [psrc] ; 984 } 985 pme1 = p1 ; 986 pfree = pdst ; 987 pj = Pe [e] ; 988 p = Pe [me] ; 989 990 } 991 992 /* ------------------------------------------------- */ 993 /* i is a principal variable not yet placed in Lme */ 994 /* store i in new list */ 995 /* ------------------------------------------------- */ 996 997 /* flag i as being in Lme by negating Nv [i] */ 998 degme += nvi ; 999 Nv [i] = -nvi ; 1000 Iw [pfree++] = i ; 1001 AMD_DEBUG2 (" s: "~ID~" nv "~ID~"\n", i, Nv [i]); 1002 1003 /* ------------------------------------------------- */ 1004 /* remove variable i from degree link list */ 1005 /* ------------------------------------------------- */ 1006 1007 ilast = Last [i] ; 1008 inext = Next [i] ; 1009 ASSERT (ilast >= EMPTY && ilast < n) ; 1010 ASSERT (inext >= EMPTY && inext < n) ; 1011 if (inext != EMPTY) Last [inext] = ilast ; 1012 if (ilast != EMPTY) 1013 { 1014 Next [ilast] = inext ; 1015 } 1016 else 1017 { 1018 /* i is at the head of the degree list */ 1019 ASSERT (Degree [i] >= 0 && Degree [i] < n) ; 1020 Head [Degree [i]] = inext ; 1021 } 1022 } 1023 } 1024 1025 if (e != me) 1026 { 1027 /* set tree pointer and flag to indicate element e is 1028 * absorbed into new element me (the parent of e is me) */ 1029 AMD_DEBUG1 (" Element "~ID~" => "~ID~"\n", e, me); 1030 Pe [e] = FLIP (me) ; 1031 W [e] = 0 ; 1032 } 1033 } 1034 1035 pme2 = pfree - 1 ; 1036 } 1037 1038 /* ----------------------------------------------------------------- */ 1039 /* me has now been converted into an element in Iw [pme1..pme2] */ 1040 /* ----------------------------------------------------------------- */ 1041 1042 /* degme holds the external degree of new element */ 1043 Degree [me] = degme ; 1044 Pe [me] = pme1 ; 1045 Len [me] = pme2 - pme1 + 1 ; 1046 ASSERT (Pe [me] >= 0 && Pe [me] < iwlen) ; 1047 1048 Elen [me] = FLIP (nvpiv + degme) ; 1049 /* FLIP (Elen (me)) is now the degree of pivot (including 1050 * diagonal part). */ 1051 1052 version(NDEBUG){} 1053 else { 1054 AMD_DEBUG2 ("New element structure: length= "~ID~"\n", pme2-pme1+1); 1055 for (pme = pme1 ; pme <= pme2 ; pme++) AMD_DEBUG3 (" "~ID~"", Iw[pme]); 1056 AMD_DEBUG3 ("\n"); 1057 } // !NDEBUG 1058 1059 /* ----------------------------------------------------------------- */ 1060 /* make sure that wflg is not too large. */ 1061 /* ----------------------------------------------------------------- */ 1062 1063 /* With the current value of wflg, wflg+n must not cause integer 1064 * overflow */ 1065 1066 wflg = clear_flag (wflg, wbig, W, n) ; 1067 1068 /* ========================================================================= */ 1069 /* COMPUTE (W [e] - wflg) = |Le\Lme| FOR ALL ELEMENTS */ 1070 /* ========================================================================= */ 1071 1072 /* ----------------------------------------------------------------- 1073 * Scan 1: compute the external degrees of previous elements with 1074 * respect to the current element. That is: 1075 * (W [e] - wflg) = |Le \ Lme| 1076 * for each element e that appears in any supervariable in Lme. The 1077 * notation Le refers to the pattern (list of supervariables) of a 1078 * previous element e, where e is not yet absorbed, stored in 1079 * Iw [Pe [e] + 1 ... Pe [e] + Len [e]]. The notation Lme 1080 * refers to the pattern of the current element (stored in 1081 * Iw [pme1..pme2]). If aggressive absorption is enabled, and 1082 * (W [e] - wflg) becomes zero, then the element e will be absorbed 1083 * in Scan 2. 1084 * ----------------------------------------------------------------- */ 1085 1086 AMD_DEBUG2 ("me: "); 1087 for (pme = pme1 ; pme <= pme2 ; pme++) 1088 { 1089 i = Iw [pme] ; 1090 ASSERT (i >= 0 && i < n) ; 1091 eln = Elen [i] ; 1092 AMD_DEBUG3 (""~ID~" Elen "~ID~": \n", i, eln); 1093 if (eln > 0) 1094 { 1095 /* note that Nv [i] has been negated to denote i in Lme: */ 1096 nvi = -Nv [i] ; 1097 ASSERT (nvi > 0 && Pe [i] >= 0 && Pe [i] < iwlen) ; 1098 wnvi = wflg - nvi ; 1099 for (p = Pe [i] ; p <= Pe [i] + eln - 1 ; p++) 1100 { 1101 e = Iw [p] ; 1102 ASSERT (e >= 0 && e < n) ; 1103 we = W [e] ; 1104 AMD_DEBUG4 (" e "~ID~" we "~ID~" ", e, we); 1105 if (we >= wflg) 1106 { 1107 /* unabsorbed element e has been seen in this loop */ 1108 AMD_DEBUG4 (" unabsorbed, first time seen"); 1109 we -= nvi ; 1110 } 1111 else if (we != 0) 1112 { 1113 /* e is an unabsorbed element */ 1114 /* this is the first we have seen e in all of Scan 1 */ 1115 AMD_DEBUG4 (" unabsorbed"); 1116 we = Degree [e] + wnvi ; 1117 } 1118 AMD_DEBUG4 ("\n"); 1119 W [e] = we ; 1120 } 1121 } 1122 } 1123 AMD_DEBUG2 ("\n"); 1124 1125 /* ========================================================================= */ 1126 /* DEGREE UPDATE AND ELEMENT ABSORPTION */ 1127 /* ========================================================================= */ 1128 1129 /* ----------------------------------------------------------------- 1130 * Scan 2: for each i in Lme, sum up the degree of Lme (which is 1131 * degme), plus the sum of the external degrees of each Le for the 1132 * elements e appearing within i, plus the supervariables in i. 1133 * Place i in hash list. 1134 * ----------------------------------------------------------------- */ 1135 1136 for (pme = pme1 ; pme <= pme2 ; pme++) 1137 { 1138 i = Iw [pme] ; 1139 ASSERT (i >= 0 && i < n && Nv [i] < 0 && Elen [i] >= 0) ; 1140 AMD_DEBUG2 ("Updating: i "~ID~" "~ID~" "~ID~"\n", i, Elen[i], Len [i]); 1141 p1 = Pe [i] ; 1142 p2 = p1 + Elen [i] - 1 ; 1143 pn = p1 ; 1144 hash = 0 ; 1145 deg = 0 ; 1146 ASSERT (p1 >= 0 && p1 < iwlen && p2 >= -1 && p2 < iwlen) ; 1147 1148 /* ------------------------------------------------------------- */ 1149 /* scan the element list associated with supervariable i */ 1150 /* ------------------------------------------------------------- */ 1151 1152 /* UMFPACK/MA38-style approximate degree: */ 1153 if (aggressive) 1154 { 1155 for (p = p1 ; p <= p2 ; p++) 1156 { 1157 e = Iw [p] ; 1158 ASSERT (e >= 0 && e < n) ; 1159 we = W [e] ; 1160 if (we != 0) 1161 { 1162 /* e is an unabsorbed element */ 1163 /* dext = | Le \ Lme | */ 1164 dext = we - wflg ; 1165 if (dext > 0) 1166 { 1167 deg += dext ; 1168 Iw [pn++] = e ; 1169 hash += e ; 1170 AMD_DEBUG4 (" e: "~ID~" hash = "~ID~"\n",e,hash); 1171 } 1172 else 1173 { 1174 /* external degree of e is zero, absorb e into me*/ 1175 AMD_DEBUG1 (" Element "~ID~" =>"~ID~" (aggressive)\n", e, me); 1176 ASSERT (dext == 0) ; 1177 Pe [e] = FLIP (me) ; 1178 W [e] = 0 ; 1179 } 1180 } 1181 } 1182 } 1183 else 1184 { 1185 for (p = p1 ; p <= p2 ; p++) 1186 { 1187 e = Iw [p] ; 1188 ASSERT (e >= 0 && e < n) ; 1189 we = W [e] ; 1190 if (we != 0) 1191 { 1192 /* e is an unabsorbed element */ 1193 dext = we - wflg ; 1194 ASSERT (dext >= 0) ; 1195 deg += dext ; 1196 Iw [pn++] = e ; 1197 hash += e ; 1198 AMD_DEBUG4 (" e: "~ID~" hash = "~ID~"\n",e,hash); 1199 } 1200 } 1201 } 1202 1203 /* count the number of elements in i (including me): */ 1204 Elen [i] = pn - p1 + 1 ; 1205 1206 /* ------------------------------------------------------------- */ 1207 /* scan the supervariables in the list associated with i */ 1208 /* ------------------------------------------------------------- */ 1209 1210 /* The bulk of the AMD run time is typically spent in this loop, 1211 * particularly if the matrix has many dense rows that are not 1212 * removed prior to ordering. */ 1213 p3 = pn ; 1214 p4 = p1 + Len [i] ; 1215 for (p = p2 + 1 ; p < p4 ; p++) 1216 { 1217 j = Iw [p] ; 1218 ASSERT (j >= 0 && j < n) ; 1219 nvj = Nv [j] ; 1220 if (nvj > 0) 1221 { 1222 /* j is unabsorbed, and not in Lme. */ 1223 /* add to degree and add to new list */ 1224 deg += nvj ; 1225 Iw [pn++] = j ; 1226 hash += j ; 1227 AMD_DEBUG4 (" s: "~ID~" hash "~ID~" Nv[j]= "~ID~"\n", j, hash, nvj) ; 1228 } 1229 } 1230 1231 /* ------------------------------------------------------------- */ 1232 /* update the degree and check for mass elimination */ 1233 /* ------------------------------------------------------------- */ 1234 1235 /* with aggressive absorption, deg==0 is identical to the 1236 * Elen [i] == 1 && p3 == pn test, below. */ 1237 ASSERT (IMPLIES (aggressive, (deg==0) == (Elen[i]==1 && p3==pn))) ; 1238 1239 if (Elen [i] == 1 && p3 == pn) 1240 { 1241 1242 /* --------------------------------------------------------- */ 1243 /* mass elimination */ 1244 /* --------------------------------------------------------- */ 1245 1246 /* There is nothing left of this node except for an edge to 1247 * the current pivot element. Elen [i] is 1, and there are 1248 * no variables adjacent to node i. Absorb i into the 1249 * current pivot element, me. Note that if there are two or 1250 * more mass eliminations, fillin due to mass elimination is 1251 * possible within the nvpiv-by-nvpiv pivot block. It is this 1252 * step that causes AMD's analysis to be an upper bound. 1253 * 1254 * The reason is that the selected pivot has a lower 1255 * approximate degree than the true degree of the two mass 1256 * eliminated nodes. There is no edge between the two mass 1257 * eliminated nodes. They are merged with the current pivot 1258 * anyway. 1259 * 1260 * No fillin occurs in the Schur complement, in any case, 1261 * and this effect does not decrease the quality of the 1262 * ordering itself, just the quality of the nonzero and 1263 * flop count analysis. It also means that the post-ordering 1264 * is not an exact elimination tree post-ordering. */ 1265 1266 AMD_DEBUG1 (" MASS i "~ID~" => parent e "~ID~"\n", i, me); 1267 Pe [i] = FLIP (me) ; 1268 nvi = -Nv [i] ; 1269 degme -= nvi ; 1270 nvpiv += nvi ; 1271 nel += nvi ; 1272 Nv [i] = 0 ; 1273 Elen [i] = EMPTY ; 1274 1275 } 1276 else 1277 { 1278 1279 /* --------------------------------------------------------- */ 1280 /* update the upper-bound degree of i */ 1281 /* --------------------------------------------------------- */ 1282 1283 /* the following degree does not yet include the size 1284 * of the current element, which is added later: */ 1285 1286 Degree [i] = MIN (Degree [i], deg) ; 1287 1288 /* --------------------------------------------------------- */ 1289 /* add me to the list for i */ 1290 /* --------------------------------------------------------- */ 1291 1292 /* move first supervariable to end of list */ 1293 Iw [pn] = Iw [p3] ; 1294 /* move first element to end of element part of list */ 1295 Iw [p3] = Iw [p1] ; 1296 /* add new element, me, to front of list. */ 1297 Iw [p1] = me ; 1298 /* store the new length of the list in Len [i] */ 1299 Len [i] = pn - p1 + 1 ; 1300 1301 /* --------------------------------------------------------- */ 1302 /* place in hash bucket. Save hash key of i in Last [i]. */ 1303 /* --------------------------------------------------------- */ 1304 1305 /* NOTE: this can fail if hash is negative, because the ANSI C 1306 * standard does not define a % b when a and/or b are negative. 1307 * That's why hash is defined as an unsigned Int, to avoid this 1308 * problem. */ 1309 hash = hash % n ; 1310 ASSERT ((cast(Int) hash) >= 0 && (cast(Int) hash) < n) ; 1311 1312 /* if the Hhead array is not used: */ 1313 j = Head [hash] ; 1314 if (j <= EMPTY) 1315 { 1316 /* degree list is empty, hash head is FLIP (j) */ 1317 Next [i] = FLIP (j) ; 1318 Head [hash] = FLIP (i) ; 1319 } 1320 else 1321 { 1322 /* degree list is not empty, use Last [Head [hash]] as 1323 * hash head. */ 1324 Next [i] = Last [j] ; 1325 Last [j] = i ; 1326 } 1327 1328 /* if a separate Hhead array is used: * 1329 Next [i] = Hhead [hash] ; 1330 Hhead [hash] = i ; 1331 */ 1332 1333 Last [i] = hash ; 1334 } 1335 } 1336 1337 Degree [me] = degme ; 1338 1339 /* ----------------------------------------------------------------- */ 1340 /* Clear the counter array, W [...], by incrementing wflg. */ 1341 /* ----------------------------------------------------------------- */ 1342 1343 /* make sure that wflg+n does not cause integer overflow */ 1344 lemax = MAX (lemax, degme) ; 1345 wflg += lemax ; 1346 wflg = clear_flag (wflg, wbig, W, n) ; 1347 /* at this point, W [0..n-1] < wflg holds */ 1348 1349 /* ========================================================================= */ 1350 /* SUPERVARIABLE DETECTION */ 1351 /* ========================================================================= */ 1352 1353 AMD_DEBUG1 ("Detecting supervariables:\n"); 1354 for (pme = pme1 ; pme <= pme2 ; pme++) 1355 { 1356 i = Iw [pme] ; 1357 ASSERT (i >= 0 && i < n) ; 1358 AMD_DEBUG2 ("Consider i "~ID~" nv "~ID~"\n", i, Nv [i]); 1359 if (Nv [i] < 0) 1360 { 1361 /* i is a principal variable in Lme */ 1362 1363 /* --------------------------------------------------------- 1364 * examine all hash buckets with 2 or more variables. We do 1365 * this by examing all unique hash keys for supervariables in 1366 * the pattern Lme of the current element, me 1367 * --------------------------------------------------------- */ 1368 1369 /* let i = head of hash bucket, and empty the hash bucket */ 1370 ASSERT (Last [i] >= 0 && Last [i] < n) ; 1371 hash = Last [i] ; 1372 1373 /* if Hhead array is not used: */ 1374 j = Head [hash] ; 1375 if (j == EMPTY) 1376 { 1377 /* hash bucket and degree list are both empty */ 1378 i = EMPTY ; 1379 } 1380 else if (j < EMPTY) 1381 { 1382 /* degree list is empty */ 1383 i = FLIP (j) ; 1384 Head [hash] = EMPTY ; 1385 } 1386 else 1387 { 1388 /* degree list is not empty, restore Last [j] of head j */ 1389 i = Last [j] ; 1390 Last [j] = EMPTY ; 1391 } 1392 1393 /* if separate Hhead array is used: * 1394 i = Hhead [hash] ; 1395 Hhead [hash] = EMPTY ; 1396 */ 1397 1398 ASSERT (i >= EMPTY && i < n) ; 1399 AMD_DEBUG2 ("----i "~ID~" hash "~ID~"\n", i, hash); 1400 1401 while (i != EMPTY && Next [i] != EMPTY) 1402 { 1403 1404 /* ----------------------------------------------------- 1405 * this bucket has one or more variables following i. 1406 * scan all of them to see if i can absorb any entries 1407 * that follow i in hash bucket. Scatter i into w. 1408 * ----------------------------------------------------- */ 1409 1410 ln = Len [i] ; 1411 eln = Elen [i] ; 1412 ASSERT (ln >= 0 && eln >= 0) ; 1413 ASSERT (Pe [i] >= 0 && Pe [i] < iwlen) ; 1414 /* do not flag the first element in the list (me) */ 1415 for (p = Pe [i] + 1 ; p <= Pe [i] + ln - 1 ; p++) 1416 { 1417 ASSERT (Iw [p] >= 0 && Iw [p] < n) ; 1418 W [Iw [p]] = wflg ; 1419 } 1420 1421 /* ----------------------------------------------------- */ 1422 /* scan every other entry j following i in bucket */ 1423 /* ----------------------------------------------------- */ 1424 1425 jlast = i ; 1426 j = Next [i] ; 1427 ASSERT (j >= EMPTY && j < n) ; 1428 1429 while (j != EMPTY) 1430 { 1431 /* ------------------------------------------------- */ 1432 /* check if j and i have identical nonzero pattern */ 1433 /* ------------------------------------------------- */ 1434 1435 AMD_DEBUG3 ("compare i "~ID~" and j "~ID~"\n", i,j); 1436 1437 /* check if i and j have the same Len and Elen */ 1438 ASSERT (Len [j] >= 0 && Elen [j] >= 0) ; 1439 ASSERT (Pe [j] >= 0 && Pe [j] < iwlen) ; 1440 ok = (Len [j] == ln) && (Elen [j] == eln) ; 1441 /* skip the first element in the list (me) */ 1442 for (p = Pe [j] + 1 ; ok && p <= Pe [j] + ln - 1 ; p++) 1443 { 1444 ASSERT (Iw [p] >= 0 && Iw [p] < n) ; 1445 if (W [Iw [p]] != wflg) ok = 0 ; 1446 } 1447 if (ok) 1448 { 1449 /* --------------------------------------------- */ 1450 /* found it! j can be absorbed into i */ 1451 /* --------------------------------------------- */ 1452 1453 AMD_DEBUG1 ("found it! j "~ID~" => i "~ID~"\n", j,i); 1454 Pe [j] = FLIP (i) ; 1455 /* both Nv [i] and Nv [j] are negated since they */ 1456 /* are in Lme, and the absolute values of each */ 1457 /* are the number of variables in i and j: */ 1458 Nv [i] += Nv [j] ; 1459 Nv [j] = 0 ; 1460 Elen [j] = EMPTY ; 1461 /* delete j from hash bucket */ 1462 ASSERT (j != Next [j]) ; 1463 j = Next [j] ; 1464 Next [jlast] = j ; 1465 1466 } 1467 else 1468 { 1469 /* j cannot be absorbed into i */ 1470 jlast = j ; 1471 ASSERT (j != Next [j]) ; 1472 j = Next [j] ; 1473 } 1474 ASSERT (j >= EMPTY && j < n) ; 1475 } 1476 1477 /* ----------------------------------------------------- 1478 * no more variables can be absorbed into i 1479 * go to next i in bucket and clear flag array 1480 * ----------------------------------------------------- */ 1481 1482 wflg++ ; 1483 i = Next [i] ; 1484 ASSERT (i >= EMPTY && i < n) ; 1485 1486 } 1487 } 1488 } 1489 AMD_DEBUG2 ("detect done\n"); 1490 1491 /* ========================================================================= */ 1492 /* RESTORE DEGREE LISTS AND REMOVE NONPRINCIPAL SUPERVARIABLES FROM ELEMENT */ 1493 /* ========================================================================= */ 1494 1495 p = pme1 ; 1496 nleft = n - nel ; 1497 for (pme = pme1 ; pme <= pme2 ; pme++) 1498 { 1499 i = Iw [pme] ; 1500 ASSERT (i >= 0 && i < n) ; 1501 nvi = -Nv [i] ; 1502 AMD_DEBUG3 ("Restore i "~ID~" "~ID~"\n", i, nvi); 1503 if (nvi > 0) 1504 { 1505 /* i is a principal variable in Lme */ 1506 /* restore Nv [i] to signify that i is principal */ 1507 Nv [i] = nvi ; 1508 1509 /* --------------------------------------------------------- */ 1510 /* compute the external degree (add size of current element) */ 1511 /* --------------------------------------------------------- */ 1512 1513 deg = Degree [i] + degme - nvi ; 1514 deg = MIN (deg, nleft - nvi) ; 1515 ASSERT (IMPLIES (aggressive, deg > 0) && deg >= 0 && deg < n) ; 1516 1517 /* --------------------------------------------------------- */ 1518 /* place the supervariable at the head of the degree list */ 1519 /* --------------------------------------------------------- */ 1520 1521 inext = Head [deg] ; 1522 ASSERT (inext >= EMPTY && inext < n) ; 1523 if (inext != EMPTY) Last [inext] = i ; 1524 Next [i] = inext ; 1525 Last [i] = EMPTY ; 1526 Head [deg] = i ; 1527 1528 /* --------------------------------------------------------- */ 1529 /* save the new degree, and find the minimum degree */ 1530 /* --------------------------------------------------------- */ 1531 1532 mindeg = MIN (mindeg, deg) ; 1533 Degree [i] = deg ; 1534 1535 /* --------------------------------------------------------- */ 1536 /* place the supervariable in the element pattern */ 1537 /* --------------------------------------------------------- */ 1538 1539 Iw [p++] = i ; 1540 1541 } 1542 } 1543 AMD_DEBUG2("restore done\n"); 1544 1545 /* ========================================================================= */ 1546 /* FINALIZE THE NEW ELEMENT */ 1547 /* ========================================================================= */ 1548 1549 AMD_DEBUG2 ("ME = "~ID~" DONE\n", me); 1550 Nv [me] = nvpiv ; 1551 /* save the length of the list for the new element me */ 1552 Len [me] = p - pme1 ; 1553 if (Len [me] == 0) 1554 { 1555 /* there is nothing left of the current pivot element */ 1556 /* it is a root of the assembly tree */ 1557 Pe [me] = EMPTY ; 1558 W [me] = 0 ; 1559 } 1560 if (elenme != 0) 1561 { 1562 /* element was not constructed in place: deallocate part of */ 1563 /* it since newly nonprincipal variables may have been removed */ 1564 pfree = p ; 1565 } 1566 1567 /* The new element has nvpiv pivots and the size of the contribution 1568 * block for a multifrontal method is degme-by-degme, not including 1569 * the "dense" rows/columns. If the "dense" rows/columns are included, 1570 * the frontal matrix is no larger than 1571 * (degme+ndense)-by-(degme+ndense). 1572 */ 1573 1574 if (Info != cast(c_float *) NULL) 1575 { 1576 f = nvpiv ; 1577 r = degme + ndense ; 1578 dmax = MAX (dmax, f + r) ; 1579 1580 /* number of nonzeros in L (excluding the diagonal) */ 1581 lnzme = f*r + (f-1)*f/2 ; 1582 lnz += lnzme ; 1583 1584 /* number of divide operations for LDL' and for LU */ 1585 ndiv += lnzme ; 1586 1587 /* number of multiply-subtract pairs for LU */ 1588 s = f*r*r + r*(f-1)*f + (f-1)*f*(2*f-1)/6 ; 1589 nms_lu += s ; 1590 1591 /* number of multiply-subtract pairs for LDL' */ 1592 nms_ldl += (s + lnzme)/2 ; 1593 } 1594 1595 version(NDEBUG){} 1596 else { 1597 AMD_DEBUG2 ("finalize done nel "~ID~" n "~ID~"\n ::::\n", nel, n); 1598 for (pme = Pe [me] ; pme <= Pe [me] + Len [me] - 1 ; pme++) 1599 { 1600 AMD_DEBUG3 (" "~ID~"", Iw [pme]); 1601 } 1602 AMD_DEBUG3 ("\n"); 1603 } // !NDEBUG 1604 1605 } 1606 1607 /* ========================================================================= */ 1608 /* DONE SELECTING PIVOTS */ 1609 /* ========================================================================= */ 1610 1611 if (Info != cast(c_float *) NULL) 1612 { 1613 1614 /* count the work to factorize the ndense-by-ndense submatrix */ 1615 f = ndense ; 1616 dmax = MAX (dmax, cast(c_float) ndense) ; 1617 1618 /* number of nonzeros in L (excluding the diagonal) */ 1619 lnzme = (f-1)*f/2 ; 1620 lnz += lnzme ; 1621 1622 /* number of divide operations for LDL' and for LU */ 1623 ndiv += lnzme ; 1624 1625 /* number of multiply-subtract pairs for LU */ 1626 s = (f-1)*f*(2*f-1)/6 ; 1627 nms_lu += s ; 1628 1629 /* number of multiply-subtract pairs for LDL' */ 1630 nms_ldl += (s + lnzme)/2 ; 1631 1632 /* number of nz's in L (excl. diagonal) */ 1633 Info [AMD_LNZ] = lnz ; 1634 1635 /* number of divide ops for LU and LDL' */ 1636 Info [AMD_NDIV] = ndiv ; 1637 1638 /* number of multiply-subtract pairs for LDL' */ 1639 Info [AMD_NMULTSUBS_LDL] = nms_ldl ; 1640 1641 /* number of multiply-subtract pairs for LU */ 1642 Info [AMD_NMULTSUBS_LU] = nms_lu ; 1643 1644 /* number of "dense" rows/columns */ 1645 Info [AMD_NDENSE] = ndense ; 1646 1647 /* largest front is dmax-by-dmax */ 1648 Info [AMD_DMAX] = dmax ; 1649 1650 /* number of garbage collections in AMD */ 1651 Info [AMD_NCMPA] = ncmpa ; 1652 1653 /* successful ordering */ 1654 Info [AMD_STATUS] = AMD_OK ; 1655 } 1656 1657 /* ========================================================================= */ 1658 /* POST-ORDERING */ 1659 /* ========================================================================= */ 1660 1661 /* ------------------------------------------------------------------------- 1662 * Variables at this point: 1663 * 1664 * Pe: holds the elimination tree. The parent of j is FLIP (Pe [j]), 1665 * or EMPTY if j is a root. The tree holds both elements and 1666 * non-principal (unordered) variables absorbed into them. 1667 * Dense variables are non-principal and unordered. 1668 * 1669 * Elen: holds the size of each element, including the diagonal part. 1670 * FLIP (Elen [e]) > 0 if e is an element. For unordered 1671 * variables i, Elen [i] is EMPTY. 1672 * 1673 * Nv: Nv [e] > 0 is the number of pivots represented by the element e. 1674 * For unordered variables i, Nv [i] is zero. 1675 * 1676 * Contents no longer needed: 1677 * W, Iw, Len, Degree, Head, Next, Last. 1678 * 1679 * The matrix itself has been destroyed. 1680 * 1681 * n: the size of the matrix. 1682 * No other scalars needed (pfree, iwlen, etc.) 1683 * ------------------------------------------------------------------------- */ 1684 1685 /* restore Pe */ 1686 for (i = 0 ; i < n ; i++) 1687 { 1688 Pe [i] = FLIP (Pe [i]) ; 1689 } 1690 1691 /* restore Elen, for output information, and for postordering */ 1692 for (i = 0 ; i < n ; i++) 1693 { 1694 Elen [i] = FLIP (Elen [i]) ; 1695 } 1696 1697 /* Now the parent of j is Pe [j], or EMPTY if j is a root. Elen [e] > 0 1698 * is the size of element e. Elen [i] is EMPTY for unordered variable i. */ 1699 1700 version(NDEBUG){} 1701 else { 1702 AMD_DEBUG2 ("\nTree:\n"); 1703 for (i = 0 ; i < n ; i++) 1704 { 1705 AMD_DEBUG2 (" "~ID~" parent: "~ID~" ", i, Pe [i]); 1706 ASSERT (Pe [i] >= EMPTY && Pe [i] < n) ; 1707 if (Nv [i] > 0) 1708 { 1709 /* this is an element */ 1710 e = i ; 1711 AMD_DEBUG2 (" element, size is "~ID~"\n", Elen [i]); 1712 ASSERT (Elen [e] > 0) ; 1713 } 1714 AMD_DEBUG2 ("\n"); 1715 } 1716 AMD_DEBUG2 ("\nelements:\n"); 1717 for (e = 0 ; e < n ; e++) 1718 { 1719 if (Nv [e] > 0) 1720 { 1721 AMD_DEBUG3 ("Element e= "~ID~" size "~ID~" nv "~ID~" \n", e, Elen [e], Nv [e]); 1722 } 1723 } 1724 AMD_DEBUG2 ("\nvariables:\n"); 1725 for (i = 0 ; i < n ; i++) 1726 { 1727 Int cnt ; 1728 if (Nv [i] == 0) 1729 { 1730 AMD_DEBUG3 ("i unordered: "~ID~"\n", i); 1731 j = Pe [i] ; 1732 cnt = 0 ; 1733 AMD_DEBUG3 (" j: "~ID~"\n", j); 1734 if (j == EMPTY) 1735 { 1736 AMD_DEBUG3 (" i is a dense variable\n"); 1737 } 1738 else 1739 { 1740 ASSERT (j >= 0 && j < n) ; 1741 while (Nv [j] == 0) 1742 { 1743 AMD_DEBUG3 (" j : "~ID~"\n", j); 1744 j = Pe [j] ; 1745 AMD_DEBUG3 (" j:: "~ID~"\n", j); 1746 cnt++ ; 1747 if (cnt > n) break ; 1748 } 1749 e = j ; 1750 AMD_DEBUG3 (" got to e: "~ID~"\n", e); 1751 } 1752 } 1753 } 1754 } // !NDEBUG 1755 1756 /* ========================================================================= */ 1757 /* compress the paths of the variables */ 1758 /* ========================================================================= */ 1759 1760 for (i = 0 ; i < n ; i++) 1761 { 1762 if (Nv [i] == 0) 1763 { 1764 1765 /* ------------------------------------------------------------- 1766 * i is an un-ordered row. Traverse the tree from i until 1767 * reaching an element, e. The element, e, was the principal 1768 * supervariable of i and all nodes in the path from i to when e 1769 * was selected as pivot. 1770 * ------------------------------------------------------------- */ 1771 1772 AMD_DEBUG1 ("Path compression, i unordered: "~ID~"\n", i); 1773 j = Pe [i] ; 1774 ASSERT (j >= EMPTY && j < n) ; 1775 AMD_DEBUG3 (" j: "~ID~"\n", j); 1776 if (j == EMPTY) 1777 { 1778 /* Skip a dense variable. It has no parent. */ 1779 AMD_DEBUG3 ((" i is a dense variable\n")) ; 1780 continue ; 1781 } 1782 1783 /* while (j is a variable) */ 1784 while (Nv [j] == 0) 1785 { 1786 AMD_DEBUG3 (" j : "~ID~"\n", j); 1787 j = Pe [j] ; 1788 AMD_DEBUG3 (" j:: "~ID~"\n", j); 1789 ASSERT (j >= 0 && j < n) ; 1790 } 1791 /* got to an element e */ 1792 e = j ; 1793 AMD_DEBUG3 ("got to e: "~ID~"\n", e); 1794 1795 /* ------------------------------------------------------------- 1796 * traverse the path again from i to e, and compress the path 1797 * (all nodes point to e). Path compression allows this code to 1798 * compute in O(n) time. 1799 * ------------------------------------------------------------- */ 1800 1801 j = i ; 1802 /* while (j is a variable) */ 1803 while (Nv [j] == 0) 1804 { 1805 jnext = Pe [j] ; 1806 AMD_DEBUG3 ("j "~ID~" jnext "~ID~"\n", j, jnext); 1807 Pe [j] = e ; 1808 j = jnext ; 1809 ASSERT (j >= 0 && j < n) ; 1810 } 1811 } 1812 } 1813 1814 /* ========================================================================= */ 1815 /* postorder the assembly tree */ 1816 /* ========================================================================= */ 1817 1818 AMD_postorder (n, Pe, Nv, Elen, 1819 W, /* output order */ 1820 Head, Next, Last) ; /* workspace */ 1821 1822 /* ========================================================================= */ 1823 /* compute output permutation and inverse permutation */ 1824 /* ========================================================================= */ 1825 1826 /* W [e] = k means that element e is the kth element in the new 1827 * order. e is in the range 0 to n-1, and k is in the range 0 to 1828 * the number of elements. Use Head for inverse order. */ 1829 1830 for (k = 0 ; k < n ; k++) 1831 { 1832 Head [k] = EMPTY ; 1833 Next [k] = EMPTY ; 1834 } 1835 for (e = 0 ; e < n ; e++) 1836 { 1837 k = W [e] ; 1838 ASSERT ((k == EMPTY) == (Nv [e] == 0)) ; 1839 if (k != EMPTY) 1840 { 1841 ASSERT (k >= 0 && k < n) ; 1842 Head [k] = e ; 1843 } 1844 } 1845 1846 /* construct output inverse permutation in Next, 1847 * and permutation in Last */ 1848 nel = 0 ; 1849 for (k = 0 ; k < n ; k++) 1850 { 1851 e = Head [k] ; 1852 if (e == EMPTY) break ; 1853 ASSERT (e >= 0 && e < n && Nv [e] > 0) ; 1854 Next [e] = nel ; 1855 nel += Nv [e] ; 1856 } 1857 ASSERT (nel == n - ndense) ; 1858 1859 /* order non-principal variables (dense, & those merged into supervar's) */ 1860 for (i = 0 ; i < n ; i++) 1861 { 1862 if (Nv [i] == 0) 1863 { 1864 e = Pe [i] ; 1865 ASSERT (e >= EMPTY && e < n) ; 1866 if (e != EMPTY) 1867 { 1868 /* This is an unordered variable that was merged 1869 * into element e via supernode detection or mass 1870 * elimination of i when e became the pivot element. 1871 * Place i in order just before e. */ 1872 ASSERT (Next [i] == EMPTY && Nv [e] > 0) ; 1873 Next [i] = Next [e] ; 1874 Next [e]++ ; 1875 } 1876 else 1877 { 1878 /* This is a dense unordered variable, with no parent. 1879 * Place it last in the output order. */ 1880 Next [i] = nel++ ; 1881 } 1882 } 1883 } 1884 ASSERT (nel == n) ; 1885 1886 AMD_DEBUG2 (("\n\nPerm:\n")) ; 1887 for (i = 0 ; i < n ; i++) 1888 { 1889 k = Next [i] ; 1890 ASSERT (k >= 0 && k < n) ; 1891 Last [k] = i ; 1892 AMD_DEBUG2 (" perm ["~ID~"] = "~ID~"\n", k, i); 1893 } 1894 } 1895 1896 } // version DLONG 1897 else { 1898 void AMD_2 1899 ( 1900 int n, /* A is n-by-n, where n > 0 */ 1901 int *Pe, /* Pe [0..n-1]: index in Iw of row i on input */ 1902 int *Iw, /* workspace of size iwlen. Iw [0..pfree-1] holds the matrix on input */ 1903 int *Len, /* Len [0..n-1]: length for row/column i on input */ 1904 int iwlen, /* length of Iw. iwlen >= pfree + n */ 1905 int pfree, /* Iw [pfree ... iwlen-1] is empty on input */ 1906 /* 7 size-n workspaces, not defined on input: */ 1907 int *Nv, /* the size of each supernode on output */ 1908 int *Next, /* the output inverse permutation */ 1909 int *Last, /* the output permutation */ 1910 int *Head, 1911 int *Elen,/* the size columns of L for each supernode */ 1912 int *Degree, 1913 int *W, 1914 /* control parameters and output statistics */ 1915 c_float *Control, /* array of size AMD_CONTROL */ 1916 c_float *Info /* array of size AMD_INFO */ 1917 ) 1918 1919 { 1920 1921 Int deg; 1922 Int degme; 1923 Int dext; 1924 Int lemax; 1925 Int e; 1926 Int elenme; 1927 Int eln; 1928 Int i; 1929 Int ilast; 1930 Int inext; 1931 Int j; 1932 Int jlast; 1933 Int jnext; 1934 Int k; 1935 Int knt1; 1936 Int knt2; 1937 Int knt3; 1938 Int lenj; 1939 Int ln; 1940 Int me; 1941 Int mindeg; 1942 Int nel; 1943 Int nleft; 1944 Int nvi; 1945 Int nvj; 1946 Int nvpiv; 1947 Int slenme; 1948 Int wbig; 1949 Int we; 1950 Int wflg; 1951 Int wnvi; 1952 Int ok; 1953 Int ndense; 1954 Int ncmpa; 1955 Int dense; 1956 Int aggressive; 1957 1958 //unsigned Int hash ; /* unsigned, so that hash % n is well defined.*/ 1959 UInt hash ; /* unsigned, so that hash % n is well defined.*/ 1960 1961 /* 1962 * deg: the degree of a variable or element 1963 * degme: size, |Lme|, of the current element, me (= Degree [me]) 1964 * dext: external degree, |Le \ Lme|, of some element e 1965 * lemax: largest |Le| seen so far (called dmax in Fortran version) 1966 * e: an element 1967 * elenme: the length, Elen [me], of element list of pivotal variable 1968 * eln: the length, Elen [...], of an element list 1969 * hash: the computed value of the hash function 1970 * i: a supervariable 1971 * ilast: the entry in a link list preceding i 1972 * inext: the entry in a link list following i 1973 * j: a supervariable 1974 * jlast: the entry in a link list preceding j 1975 * jnext: the entry in a link list, or path, following j 1976 * k: the pivot order of an element or variable 1977 * knt1: loop counter used during element construction 1978 * knt2: loop counter used during element construction 1979 * knt3: loop counter used during compression 1980 * lenj: Len [j] 1981 * ln: length of a supervariable list 1982 * me: current supervariable being eliminated, and the current 1983 * element created by eliminating that supervariable 1984 * mindeg: current minimum degree 1985 * nel: number of pivots selected so far 1986 * nleft: n - nel, the number of nonpivotal rows/columns remaining 1987 * nvi: the number of variables in a supervariable i (= Nv [i]) 1988 * nvj: the number of variables in a supervariable j (= Nv [j]) 1989 * nvpiv: number of pivots in current element 1990 * slenme: number of variables in variable list of pivotal variable 1991 * wbig: = (INT_MAX - n) for the int version, (SuiteSparse_long_max - n) 1992 * for the SuiteSparse_long version. wflg is not allowed to 1993 * be >= wbig. 1994 * we: W [e] 1995 * wflg: used for flagging the W array. See description of Iw. 1996 * wnvi: wflg - Nv [i] 1997 * x: either a supervariable or an element 1998 * 1999 * ok: true if supervariable j can be absorbed into i 2000 * ndense: number of "dense" rows/columns 2001 * dense: rows/columns with initial degree > dense are considered "dense" 2002 * aggressive: true if aggressive absorption is being performed 2003 * ncmpa: number of garbage collections 2004 2005 * ---------------------------------------------------------------------------- 2006 * LOCAL DOUBLES, used for statistical output only (except for alpha): 2007 * ---------------------------------------------------------------------------- 2008 */ 2009 2010 c_float f, r, ndiv, s, nms_lu, nms_ldl, dmax, alpha, lnz, lnzme ; 2011 2012 /* 2013 * f: nvpiv 2014 * r: degme + nvpiv 2015 * ndiv: number of divisions for LU or LDL' factorizations 2016 * s: number of multiply-subtract pairs for LU factorization, for the 2017 * current element me 2018 * nms_lu number of multiply-subtract pairs for LU factorization 2019 * nms_ldl number of multiply-subtract pairs for LDL' factorization 2020 * dmax: the largest number of entries in any column of L, including the 2021 * diagonal 2022 * alpha: "dense" degree ratio 2023 * lnz: the number of nonzeros in L (excluding the diagonal) 2024 * lnzme: the number of nonzeros in L (excl. the diagonal) for the 2025 * current element me 2026 2027 * ---------------------------------------------------------------------------- 2028 * LOCAL "POINTERS" (indices into the Iw array) 2029 * ---------------------------------------------------------------------------- 2030 */ 2031 2032 Int p, p1, p2, p3, p4, pdst, pend, pj, pme, pme1, pme2, pn, psrc ; 2033 2034 /* 2035 * Any parameter (Pe [...] or pfree) or local variable starting with "p" (for 2036 * Pointer) is an index into Iw, and all indices into Iw use variables starting 2037 * with "p." The only exception to this rule is the iwlen input argument. 2038 * 2039 * p: pointer into lots of things 2040 * p1: Pe [i] for some variable i (start of element list) 2041 * p2: Pe [i] + Elen [i] - 1 for some variable i 2042 * p3: index of first supervariable in clean list 2043 * p4: 2044 * pdst: destination pointer, for compression 2045 * pend: end of memory to compress 2046 * pj: pointer into an element or variable 2047 * pme: pointer into the current element (pme1...pme2) 2048 * pme1: the current element, me, is stored in Iw [pme1...pme2] 2049 * pme2: the end of the current element 2050 * pn: pointer into a "clean" variable, also used to compress 2051 * psrc: source pointer, for compression 2052 */ 2053 2054 /* ========================================================================= */ 2055 /* INITIALIZATIONS */ 2056 /* ========================================================================= */ 2057 2058 /* Note that this restriction on iwlen is slightly more restrictive than 2059 * what is actually required in AMD_2. AMD_2 can operate with no elbow 2060 * room at all, but it will be slow. For better performance, at least 2061 * size-n elbow room is enforced. */ 2062 ASSERT (iwlen >= pfree + n) ; 2063 ASSERT (n > 0) ; 2064 2065 /* initialize output statistics */ 2066 lnz = 0 ; 2067 ndiv = 0 ; 2068 nms_lu = 0 ; 2069 nms_ldl = 0 ; 2070 dmax = 1 ; 2071 me = EMPTY ; 2072 2073 mindeg = 0 ; 2074 ncmpa = 0 ; 2075 nel = 0 ; 2076 lemax = 0 ; 2077 2078 /* get control parameters */ 2079 if (Control != cast(c_float *) NULL) 2080 { 2081 alpha = Control [AMD_DENSE] ; 2082 aggressive = (Control [AMD_AGGRESSIVE] != 0) ; 2083 } 2084 else 2085 { 2086 alpha = AMD_DEFAULT_DENSE ; 2087 aggressive = AMD_DEFAULT_AGGRESSIVE ; 2088 } 2089 /* Note: if alpha is NaN, this is undefined: */ 2090 if (alpha < 0) 2091 { 2092 /* only remove completely dense rows/columns */ 2093 dense = n-2 ; 2094 } 2095 else 2096 { 2097 dense = cast(Int) (alpha * sqrt (cast(c_float) n)) ; 2098 } 2099 dense = MAX (16, dense) ; 2100 dense = MIN (n, dense) ; 2101 AMD_DEBUG1 ("\n\nAMD (debug), alpha %g, aggr. "~ID~"\n", alpha, aggressive); 2102 2103 for (i = 0 ; i < n ; i++) 2104 { 2105 Last [i] = EMPTY ; 2106 Head [i] = EMPTY ; 2107 Next [i] = EMPTY ; 2108 /* if separate Hhead array is used for hash buckets: * 2109 Hhead [i] = EMPTY ; 2110 */ 2111 Nv [i] = 1 ; 2112 W [i] = 1 ; 2113 Elen [i] = 0 ; 2114 Degree [i] = Len [i] ; 2115 } 2116 2117 version(NDEBUG){} 2118 else { 2119 AMD_DEBUG1 ("\n======Nel "~ID~" initial\n", nel); 2120 AMD_dump (n, Pe, Iw, Len, iwlen, pfree, Nv, Next, Last, 2121 Head, Elen, Degree, W, -1) ; 2122 } // !NDEBUG 2123 2124 /* initialize wflg */ 2125 wbig = Int_MAX - n ; 2126 wflg = clear_flag (0, wbig, W, n) ; 2127 2128 /* --------------------------------------------------------------------- */ 2129 /* initialize degree lists and eliminate dense and empty rows */ 2130 /* --------------------------------------------------------------------- */ 2131 2132 ndense = 0 ; 2133 2134 for (i = 0 ; i < n ; i++) 2135 { 2136 deg = Degree [i] ; 2137 ASSERT (deg >= 0 && deg < n) ; 2138 if (deg == 0) 2139 { 2140 2141 /* ------------------------------------------------------------- 2142 * we have a variable that can be eliminated at once because 2143 * there is no off-diagonal non-zero in its row. Note that 2144 * Nv [i] = 1 for an empty variable i. It is treated just 2145 * the same as an eliminated element i. 2146 * ------------------------------------------------------------- */ 2147 2148 Elen [i] = FLIP (1) ; 2149 nel++ ; 2150 Pe [i] = EMPTY ; 2151 W [i] = 0 ; 2152 2153 } 2154 else if (deg > dense) 2155 { 2156 2157 /* ------------------------------------------------------------- 2158 * Dense variables are not treated as elements, but as unordered, 2159 * non-principal variables that have no parent. They do not take 2160 * part in the postorder, since Nv [i] = 0. Note that the Fortran 2161 * version does not have this option. 2162 * ------------------------------------------------------------- */ 2163 2164 AMD_DEBUG1 ("Dense node "~ID~" degree "~ID~"\n", i, deg); 2165 ndense++ ; 2166 Nv [i] = 0 ; /* do not postorder this node */ 2167 Elen [i] = EMPTY ; 2168 nel++ ; 2169 Pe [i] = EMPTY ; 2170 2171 } 2172 else 2173 { 2174 2175 /* ------------------------------------------------------------- 2176 * place i in the degree list corresponding to its degree 2177 * ------------------------------------------------------------- */ 2178 2179 inext = Head [deg] ; 2180 ASSERT (inext >= EMPTY && inext < n) ; 2181 if (inext != EMPTY) Last [inext] = i ; 2182 Next [i] = inext ; 2183 Head [deg] = i ; 2184 2185 } 2186 } 2187 2188 /* ========================================================================= */ 2189 /* WHILE (selecting pivots) DO */ 2190 /* ========================================================================= */ 2191 2192 while (nel < n) 2193 { 2194 2195 version(NDEBUG){} 2196 else { 2197 AMD_DEBUG1 ("\n======Nel "~ID~"\n", nel) ; 2198 //if (AMD_debug >= 2) 2199 if (true) // todo : 2200 { 2201 AMD_dump (n, Pe, Iw, Len, iwlen, pfree, Nv, Next, 2202 Last, Head, Elen, Degree, W, nel) ; 2203 } 2204 } // !NDEBUG 2205 2206 /* ========================================================================= */ 2207 /* GET PIVOT OF MINIMUM DEGREE */ 2208 /* ========================================================================= */ 2209 2210 /* ----------------------------------------------------------------- */ 2211 /* find next supervariable for elimination */ 2212 /* ----------------------------------------------------------------- */ 2213 2214 ASSERT (mindeg >= 0 && mindeg < n) ; 2215 for (deg = mindeg ; deg < n ; deg++) 2216 { 2217 me = Head [deg] ; 2218 if (me != EMPTY) break ; 2219 } 2220 mindeg = deg ; 2221 ASSERT (me >= 0 && me < n) ; 2222 AMD_DEBUG1 ("=================me: "~ID~"\n", me) ; 2223 2224 /* ----------------------------------------------------------------- */ 2225 /* remove chosen variable from link list */ 2226 /* ----------------------------------------------------------------- */ 2227 2228 inext = Next [me] ; 2229 ASSERT (inext >= EMPTY && inext < n) ; 2230 if (inext != EMPTY) Last [inext] = EMPTY ; 2231 Head [deg] = inext ; 2232 2233 /* ----------------------------------------------------------------- */ 2234 /* me represents the elimination of pivots nel to nel+Nv[me]-1. */ 2235 /* place me itself as the first in this set. */ 2236 /* ----------------------------------------------------------------- */ 2237 2238 elenme = Elen [me] ; 2239 nvpiv = Nv [me] ; 2240 ASSERT (nvpiv > 0) ; 2241 nel += nvpiv ; 2242 2243 /* ========================================================================= */ 2244 /* CONSTRUCT NEW ELEMENT */ 2245 /* ========================================================================= */ 2246 2247 /* ----------------------------------------------------------------- 2248 * At this point, me is the pivotal supervariable. It will be 2249 * converted into the current element. Scan list of the pivotal 2250 * supervariable, me, setting tree pointers and constructing new list 2251 * of supervariables for the new element, me. p is a pointer to the 2252 * current position in the old list. 2253 * ----------------------------------------------------------------- */ 2254 2255 /* flag the variable "me" as being in Lme by negating Nv [me] */ 2256 Nv [me] = -nvpiv ; 2257 degme = 0 ; 2258 ASSERT (Pe [me] >= 0 && Pe [me] < iwlen) ; 2259 2260 if (elenme == 0) 2261 { 2262 2263 /* ------------------------------------------------------------- */ 2264 /* construct the new element in place */ 2265 /* ------------------------------------------------------------- */ 2266 2267 pme1 = Pe [me] ; 2268 pme2 = pme1 - 1 ; 2269 2270 for (p = pme1 ; p <= pme1 + Len [me] - 1 ; p++) 2271 { 2272 i = Iw [p] ; 2273 ASSERT (i >= 0 && i < n && Nv [i] >= 0) ; 2274 nvi = Nv [i] ; 2275 if (nvi > 0) 2276 { 2277 2278 /* ----------------------------------------------------- */ 2279 /* i is a principal variable not yet placed in Lme. */ 2280 /* store i in new list */ 2281 /* ----------------------------------------------------- */ 2282 2283 /* flag i as being in Lme by negating Nv [i] */ 2284 degme += nvi ; 2285 Nv [i] = -nvi ; 2286 Iw [++pme2] = i ; 2287 2288 /* ----------------------------------------------------- */ 2289 /* remove variable i from degree list. */ 2290 /* ----------------------------------------------------- */ 2291 2292 ilast = Last [i] ; 2293 inext = Next [i] ; 2294 ASSERT (ilast >= EMPTY && ilast < n) ; 2295 ASSERT (inext >= EMPTY && inext < n) ; 2296 if (inext != EMPTY) Last [inext] = ilast ; 2297 if (ilast != EMPTY) 2298 { 2299 Next [ilast] = inext ; 2300 } 2301 else 2302 { 2303 /* i is at the head of the degree list */ 2304 ASSERT (Degree [i] >= 0 && Degree [i] < n) ; 2305 Head [Degree [i]] = inext ; 2306 } 2307 } 2308 } 2309 } 2310 else 2311 { 2312 2313 /* ------------------------------------------------------------- */ 2314 /* construct the new element in empty space, Iw [pfree ...] */ 2315 /* ------------------------------------------------------------- */ 2316 2317 p = Pe [me] ; 2318 pme1 = pfree ; 2319 slenme = Len [me] - elenme ; 2320 2321 for (knt1 = 1 ; knt1 <= elenme + 1 ; knt1++) 2322 { 2323 2324 if (knt1 > elenme) 2325 { 2326 /* search the supervariables in me. */ 2327 e = me ; 2328 pj = p ; 2329 ln = slenme ; 2330 AMD_DEBUG2 ("Search sv: "~ID~" "~ID~" "~ID~"\n", me,pj,ln); 2331 } 2332 else 2333 { 2334 /* search the elements in me. */ 2335 e = Iw [p++] ; 2336 ASSERT (e >= 0 && e < n) ; 2337 pj = Pe [e] ; 2338 ln = Len [e] ; 2339 AMD_DEBUG2 ("Search element e "~ID~" in me "~ID~"\n", e,me); 2340 ASSERT (Elen [e] < EMPTY && W [e] > 0 && pj >= 0) ; 2341 } 2342 ASSERT (ln >= 0 && (ln == 0 || (pj >= 0 && pj < iwlen))) ; 2343 2344 /* --------------------------------------------------------- 2345 * search for different supervariables and add them to the 2346 * new list, compressing when necessary. this loop is 2347 * executed once for each element in the list and once for 2348 * all the supervariables in the list. 2349 * --------------------------------------------------------- */ 2350 2351 for (knt2 = 1 ; knt2 <= ln ; knt2++) 2352 { 2353 i = Iw [pj++] ; 2354 ASSERT (i >= 0 && i < n && (i == me || Elen [i] >= EMPTY)); 2355 nvi = Nv [i] ; 2356 AMD_DEBUG2 (": "~ID~" "~ID~" "~ID~" "~ID~"\n", i, Elen [i], Nv [i], wflg); 2357 2358 if (nvi > 0) 2359 { 2360 2361 /* ------------------------------------------------- */ 2362 /* compress Iw, if necessary */ 2363 /* ------------------------------------------------- */ 2364 2365 if (pfree >= iwlen) 2366 { 2367 2368 AMD_DEBUG1 ("GARBAGE COLLECTION\n"); 2369 2370 /* prepare for compressing Iw by adjusting pointers 2371 * and lengths so that the lists being searched in 2372 * the inner and outer loops contain only the 2373 * remaining entries. */ 2374 2375 Pe [me] = p ; 2376 Len [me] -= knt1 ; 2377 /* check if nothing left of supervariable me */ 2378 if (Len [me] == 0) Pe [me] = EMPTY ; 2379 Pe [e] = pj ; 2380 Len [e] = ln - knt2 ; 2381 /* nothing left of element e */ 2382 if (Len [e] == 0) Pe [e] = EMPTY ; 2383 2384 ncmpa++ ; /* one more garbage collection */ 2385 2386 /* store first entry of each object in Pe */ 2387 /* FLIP the first entry in each object */ 2388 for (j = 0 ; j < n ; j++) 2389 { 2390 pn = Pe [j] ; 2391 if (pn >= 0) 2392 { 2393 ASSERT (pn >= 0 && pn < iwlen) ; 2394 Pe [j] = Iw [pn] ; 2395 Iw [pn] = FLIP (j) ; 2396 } 2397 } 2398 2399 /* psrc/pdst point to source/destination */ 2400 psrc = 0 ; 2401 pdst = 0 ; 2402 pend = pme1 - 1 ; 2403 2404 while (psrc <= pend) 2405 { 2406 /* search for next FLIP'd entry */ 2407 j = FLIP (Iw [psrc++]) ; 2408 if (j >= 0) 2409 { 2410 AMD_DEBUG2 ("Got object j: "~ID~"\n", j); 2411 Iw [pdst] = Pe [j] ; 2412 Pe [j] = pdst++ ; 2413 lenj = Len [j] ; 2414 /* copy from source to destination */ 2415 for (knt3 = 0 ; knt3 <= lenj - 2 ; knt3++) 2416 { 2417 Iw [pdst++] = Iw [psrc++] ; 2418 } 2419 } 2420 } 2421 2422 /* move the new partially-constructed element */ 2423 p1 = pdst ; 2424 for (psrc = pme1 ; psrc <= pfree-1 ; psrc++) 2425 { 2426 Iw [pdst++] = Iw [psrc] ; 2427 } 2428 pme1 = p1 ; 2429 pfree = pdst ; 2430 pj = Pe [e] ; 2431 p = Pe [me] ; 2432 2433 } 2434 2435 /* ------------------------------------------------- */ 2436 /* i is a principal variable not yet placed in Lme */ 2437 /* store i in new list */ 2438 /* ------------------------------------------------- */ 2439 2440 /* flag i as being in Lme by negating Nv [i] */ 2441 degme += nvi ; 2442 Nv [i] = -nvi ; 2443 Iw [pfree++] = i ; 2444 AMD_DEBUG2 (" s: "~ID~" nv "~ID~"\n", i, Nv [i]); 2445 2446 /* ------------------------------------------------- */ 2447 /* remove variable i from degree link list */ 2448 /* ------------------------------------------------- */ 2449 2450 ilast = Last [i] ; 2451 inext = Next [i] ; 2452 ASSERT (ilast >= EMPTY && ilast < n) ; 2453 ASSERT (inext >= EMPTY && inext < n) ; 2454 if (inext != EMPTY) Last [inext] = ilast ; 2455 if (ilast != EMPTY) 2456 { 2457 Next [ilast] = inext ; 2458 } 2459 else 2460 { 2461 /* i is at the head of the degree list */ 2462 ASSERT (Degree [i] >= 0 && Degree [i] < n) ; 2463 Head [Degree [i]] = inext ; 2464 } 2465 } 2466 } 2467 2468 if (e != me) 2469 { 2470 /* set tree pointer and flag to indicate element e is 2471 * absorbed into new element me (the parent of e is me) */ 2472 AMD_DEBUG1 (" Element "~ID~" => "~ID~"\n", e, me); 2473 Pe [e] = FLIP (me) ; 2474 W [e] = 0 ; 2475 } 2476 } 2477 2478 pme2 = pfree - 1 ; 2479 } 2480 2481 /* ----------------------------------------------------------------- */ 2482 /* me has now been converted into an element in Iw [pme1..pme2] */ 2483 /* ----------------------------------------------------------------- */ 2484 2485 /* degme holds the external degree of new element */ 2486 Degree [me] = degme ; 2487 Pe [me] = pme1 ; 2488 Len [me] = pme2 - pme1 + 1 ; 2489 ASSERT (Pe [me] >= 0 && Pe [me] < iwlen) ; 2490 2491 Elen [me] = FLIP (nvpiv + degme) ; 2492 /* FLIP (Elen (me)) is now the degree of pivot (including 2493 * diagonal part). */ 2494 2495 version(NDEBUG){} 2496 else { 2497 AMD_DEBUG2 ("New element structure: length= "~ID~"\n", pme2-pme1+1); 2498 for (pme = pme1 ; pme <= pme2 ; pme++) AMD_DEBUG3 (" "~ID~"", Iw[pme]); 2499 AMD_DEBUG3 ("\n"); 2500 } // !NDEBUG 2501 2502 /* ----------------------------------------------------------------- */ 2503 /* make sure that wflg is not too large. */ 2504 /* ----------------------------------------------------------------- */ 2505 2506 /* With the current value of wflg, wflg+n must not cause integer 2507 * overflow */ 2508 2509 wflg = clear_flag (wflg, wbig, W, n) ; 2510 2511 /* ========================================================================= */ 2512 /* COMPUTE (W [e] - wflg) = |Le\Lme| FOR ALL ELEMENTS */ 2513 /* ========================================================================= */ 2514 2515 /* ----------------------------------------------------------------- 2516 * Scan 1: compute the external degrees of previous elements with 2517 * respect to the current element. That is: 2518 * (W [e] - wflg) = |Le \ Lme| 2519 * for each element e that appears in any supervariable in Lme. The 2520 * notation Le refers to the pattern (list of supervariables) of a 2521 * previous element e, where e is not yet absorbed, stored in 2522 * Iw [Pe [e] + 1 ... Pe [e] + Len [e]]. The notation Lme 2523 * refers to the pattern of the current element (stored in 2524 * Iw [pme1..pme2]). If aggressive absorption is enabled, and 2525 * (W [e] - wflg) becomes zero, then the element e will be absorbed 2526 * in Scan 2. 2527 * ----------------------------------------------------------------- */ 2528 2529 AMD_DEBUG2 ("me: "); 2530 for (pme = pme1 ; pme <= pme2 ; pme++) 2531 { 2532 i = Iw [pme] ; 2533 ASSERT (i >= 0 && i < n) ; 2534 eln = Elen [i] ; 2535 AMD_DEBUG3 (""~ID~" Elen "~ID~": \n", i, eln); 2536 if (eln > 0) 2537 { 2538 /* note that Nv [i] has been negated to denote i in Lme: */ 2539 nvi = -Nv [i] ; 2540 ASSERT (nvi > 0 && Pe [i] >= 0 && Pe [i] < iwlen) ; 2541 wnvi = wflg - nvi ; 2542 for (p = Pe [i] ; p <= Pe [i] + eln - 1 ; p++) 2543 { 2544 e = Iw [p] ; 2545 ASSERT (e >= 0 && e < n) ; 2546 we = W [e] ; 2547 AMD_DEBUG4 (" e "~ID~" we "~ID~" ", e, we); 2548 if (we >= wflg) 2549 { 2550 /* unabsorbed element e has been seen in this loop */ 2551 AMD_DEBUG4 (" unabsorbed, first time seen"); 2552 we -= nvi ; 2553 } 2554 else if (we != 0) 2555 { 2556 /* e is an unabsorbed element */ 2557 /* this is the first we have seen e in all of Scan 1 */ 2558 AMD_DEBUG4 (" unabsorbed"); 2559 we = Degree [e] + wnvi ; 2560 } 2561 AMD_DEBUG4 ("\n"); 2562 W [e] = we ; 2563 } 2564 } 2565 } 2566 AMD_DEBUG2 ("\n"); 2567 2568 /* ========================================================================= */ 2569 /* DEGREE UPDATE AND ELEMENT ABSORPTION */ 2570 /* ========================================================================= */ 2571 2572 /* ----------------------------------------------------------------- 2573 * Scan 2: for each i in Lme, sum up the degree of Lme (which is 2574 * degme), plus the sum of the external degrees of each Le for the 2575 * elements e appearing within i, plus the supervariables in i. 2576 * Place i in hash list. 2577 * ----------------------------------------------------------------- */ 2578 2579 for (pme = pme1 ; pme <= pme2 ; pme++) 2580 { 2581 i = Iw [pme] ; 2582 ASSERT (i >= 0 && i < n && Nv [i] < 0 && Elen [i] >= 0) ; 2583 AMD_DEBUG2 ("Updating: i "~ID~" "~ID~" "~ID~"\n", i, Elen[i], Len [i]); 2584 p1 = Pe [i] ; 2585 p2 = p1 + Elen [i] - 1 ; 2586 pn = p1 ; 2587 hash = 0 ; 2588 deg = 0 ; 2589 ASSERT (p1 >= 0 && p1 < iwlen && p2 >= -1 && p2 < iwlen) ; 2590 2591 /* ------------------------------------------------------------- */ 2592 /* scan the element list associated with supervariable i */ 2593 /* ------------------------------------------------------------- */ 2594 2595 /* UMFPACK/MA38-style approximate degree: */ 2596 if (aggressive) 2597 { 2598 for (p = p1 ; p <= p2 ; p++) 2599 { 2600 e = Iw [p] ; 2601 ASSERT (e >= 0 && e < n) ; 2602 we = W [e] ; 2603 if (we != 0) 2604 { 2605 /* e is an unabsorbed element */ 2606 /* dext = | Le \ Lme | */ 2607 dext = we - wflg ; 2608 if (dext > 0) 2609 { 2610 deg += dext ; 2611 Iw [pn++] = e ; 2612 hash += e ; 2613 AMD_DEBUG4 (" e: "~ID~" hash = "~ID~"\n",e,hash); 2614 } 2615 else 2616 { 2617 /* external degree of e is zero, absorb e into me*/ 2618 AMD_DEBUG1 (" Element "~ID~" =>"~ID~" (aggressive)\n", e, me); 2619 ASSERT (dext == 0) ; 2620 Pe [e] = FLIP (me) ; 2621 W [e] = 0 ; 2622 } 2623 } 2624 } 2625 } 2626 else 2627 { 2628 for (p = p1 ; p <= p2 ; p++) 2629 { 2630 e = Iw [p] ; 2631 ASSERT (e >= 0 && e < n) ; 2632 we = W [e] ; 2633 if (we != 0) 2634 { 2635 /* e is an unabsorbed element */ 2636 dext = we - wflg ; 2637 ASSERT (dext >= 0) ; 2638 deg += dext ; 2639 Iw [pn++] = e ; 2640 hash += e ; 2641 AMD_DEBUG4 (" e: "~ID~" hash = "~ID~"\n",e,hash); 2642 } 2643 } 2644 } 2645 2646 /* count the number of elements in i (including me): */ 2647 Elen [i] = pn - p1 + 1 ; 2648 2649 /* ------------------------------------------------------------- */ 2650 /* scan the supervariables in the list associated with i */ 2651 /* ------------------------------------------------------------- */ 2652 2653 /* The bulk of the AMD run time is typically spent in this loop, 2654 * particularly if the matrix has many dense rows that are not 2655 * removed prior to ordering. */ 2656 p3 = pn ; 2657 p4 = p1 + Len [i] ; 2658 for (p = p2 + 1 ; p < p4 ; p++) 2659 { 2660 j = Iw [p] ; 2661 ASSERT (j >= 0 && j < n) ; 2662 nvj = Nv [j] ; 2663 if (nvj > 0) 2664 { 2665 /* j is unabsorbed, and not in Lme. */ 2666 /* add to degree and add to new list */ 2667 deg += nvj ; 2668 Iw [pn++] = j ; 2669 hash += j ; 2670 AMD_DEBUG4 (" s: "~ID~" hash "~ID~" Nv[j]= "~ID~"\n", j, hash, nvj) ; 2671 } 2672 } 2673 2674 /* ------------------------------------------------------------- */ 2675 /* update the degree and check for mass elimination */ 2676 /* ------------------------------------------------------------- */ 2677 2678 /* with aggressive absorption, deg==0 is identical to the 2679 * Elen [i] == 1 && p3 == pn test, below. */ 2680 ASSERT (IMPLIES (aggressive, (deg==0) == (Elen[i]==1 && p3==pn))) ; 2681 2682 if (Elen [i] == 1 && p3 == pn) 2683 { 2684 2685 /* --------------------------------------------------------- */ 2686 /* mass elimination */ 2687 /* --------------------------------------------------------- */ 2688 2689 /* There is nothing left of this node except for an edge to 2690 * the current pivot element. Elen [i] is 1, and there are 2691 * no variables adjacent to node i. Absorb i into the 2692 * current pivot element, me. Note that if there are two or 2693 * more mass eliminations, fillin due to mass elimination is 2694 * possible within the nvpiv-by-nvpiv pivot block. It is this 2695 * step that causes AMD's analysis to be an upper bound. 2696 * 2697 * The reason is that the selected pivot has a lower 2698 * approximate degree than the true degree of the two mass 2699 * eliminated nodes. There is no edge between the two mass 2700 * eliminated nodes. They are merged with the current pivot 2701 * anyway. 2702 * 2703 * No fillin occurs in the Schur complement, in any case, 2704 * and this effect does not decrease the quality of the 2705 * ordering itself, just the quality of the nonzero and 2706 * flop count analysis. It also means that the post-ordering 2707 * is not an exact elimination tree post-ordering. */ 2708 2709 AMD_DEBUG1 (" MASS i "~ID~" => parent e "~ID~"\n", i, me); 2710 Pe [i] = FLIP (me) ; 2711 nvi = -Nv [i] ; 2712 degme -= nvi ; 2713 nvpiv += nvi ; 2714 nel += nvi ; 2715 Nv [i] = 0 ; 2716 Elen [i] = EMPTY ; 2717 2718 } 2719 else 2720 { 2721 2722 /* --------------------------------------------------------- */ 2723 /* update the upper-bound degree of i */ 2724 /* --------------------------------------------------------- */ 2725 2726 /* the following degree does not yet include the size 2727 * of the current element, which is added later: */ 2728 2729 Degree [i] = MIN (Degree [i], deg) ; 2730 2731 /* --------------------------------------------------------- */ 2732 /* add me to the list for i */ 2733 /* --------------------------------------------------------- */ 2734 2735 /* move first supervariable to end of list */ 2736 Iw [pn] = Iw [p3] ; 2737 /* move first element to end of element part of list */ 2738 Iw [p3] = Iw [p1] ; 2739 /* add new element, me, to front of list. */ 2740 Iw [p1] = me ; 2741 /* store the new length of the list in Len [i] */ 2742 Len [i] = pn - p1 + 1 ; 2743 2744 /* --------------------------------------------------------- */ 2745 /* place in hash bucket. Save hash key of i in Last [i]. */ 2746 /* --------------------------------------------------------- */ 2747 2748 /* NOTE: this can fail if hash is negative, because the ANSI C 2749 * standard does not define a % b when a and/or b are negative. 2750 * That's why hash is defined as an unsigned Int, to avoid this 2751 * problem. */ 2752 hash = hash % n ; 2753 ASSERT ((cast(Int) hash) >= 0 && (cast(Int) hash) < n) ; 2754 2755 /* if the Hhead array is not used: */ 2756 j = Head [hash] ; 2757 if (j <= EMPTY) 2758 { 2759 /* degree list is empty, hash head is FLIP (j) */ 2760 Next [i] = FLIP (j) ; 2761 Head [hash] = FLIP (i) ; 2762 } 2763 else 2764 { 2765 /* degree list is not empty, use Last [Head [hash]] as 2766 * hash head. */ 2767 Next [i] = Last [j] ; 2768 Last [j] = i ; 2769 } 2770 2771 /* if a separate Hhead array is used: * 2772 Next [i] = Hhead [hash] ; 2773 Hhead [hash] = i ; 2774 */ 2775 2776 Last [i] = hash ; 2777 } 2778 } 2779 2780 Degree [me] = degme ; 2781 2782 /* ----------------------------------------------------------------- */ 2783 /* Clear the counter array, W [...], by incrementing wflg. */ 2784 /* ----------------------------------------------------------------- */ 2785 2786 /* make sure that wflg+n does not cause integer overflow */ 2787 lemax = MAX (lemax, degme) ; 2788 wflg += lemax ; 2789 wflg = clear_flag (wflg, wbig, W, n) ; 2790 /* at this point, W [0..n-1] < wflg holds */ 2791 2792 /* ========================================================================= */ 2793 /* SUPERVARIABLE DETECTION */ 2794 /* ========================================================================= */ 2795 2796 AMD_DEBUG1 ("Detecting supervariables:\n"); 2797 for (pme = pme1 ; pme <= pme2 ; pme++) 2798 { 2799 i = Iw [pme] ; 2800 ASSERT (i >= 0 && i < n) ; 2801 AMD_DEBUG2 ("Consider i "~ID~" nv "~ID~"\n", i, Nv [i]); 2802 if (Nv [i] < 0) 2803 { 2804 /* i is a principal variable in Lme */ 2805 2806 /* --------------------------------------------------------- 2807 * examine all hash buckets with 2 or more variables. We do 2808 * this by examing all unique hash keys for supervariables in 2809 * the pattern Lme of the current element, me 2810 * --------------------------------------------------------- */ 2811 2812 /* let i = head of hash bucket, and empty the hash bucket */ 2813 ASSERT (Last [i] >= 0 && Last [i] < n) ; 2814 hash = Last [i] ; 2815 2816 /* if Hhead array is not used: */ 2817 j = Head [hash] ; 2818 if (j == EMPTY) 2819 { 2820 /* hash bucket and degree list are both empty */ 2821 i = EMPTY ; 2822 } 2823 else if (j < EMPTY) 2824 { 2825 /* degree list is empty */ 2826 i = FLIP (j) ; 2827 Head [hash] = EMPTY ; 2828 } 2829 else 2830 { 2831 /* degree list is not empty, restore Last [j] of head j */ 2832 i = Last [j] ; 2833 Last [j] = EMPTY ; 2834 } 2835 2836 /* if separate Hhead array is used: * 2837 i = Hhead [hash] ; 2838 Hhead [hash] = EMPTY ; 2839 */ 2840 2841 ASSERT (i >= EMPTY && i < n) ; 2842 AMD_DEBUG2 ("----i "~ID~" hash "~ID~"\n", i, hash); 2843 2844 while (i != EMPTY && Next [i] != EMPTY) 2845 { 2846 2847 /* ----------------------------------------------------- 2848 * this bucket has one or more variables following i. 2849 * scan all of them to see if i can absorb any entries 2850 * that follow i in hash bucket. Scatter i into w. 2851 * ----------------------------------------------------- */ 2852 2853 ln = Len [i] ; 2854 eln = Elen [i] ; 2855 ASSERT (ln >= 0 && eln >= 0) ; 2856 ASSERT (Pe [i] >= 0 && Pe [i] < iwlen) ; 2857 /* do not flag the first element in the list (me) */ 2858 for (p = Pe [i] + 1 ; p <= Pe [i] + ln - 1 ; p++) 2859 { 2860 ASSERT (Iw [p] >= 0 && Iw [p] < n) ; 2861 W [Iw [p]] = wflg ; 2862 } 2863 2864 /* ----------------------------------------------------- */ 2865 /* scan every other entry j following i in bucket */ 2866 /* ----------------------------------------------------- */ 2867 2868 jlast = i ; 2869 j = Next [i] ; 2870 ASSERT (j >= EMPTY && j < n) ; 2871 2872 while (j != EMPTY) 2873 { 2874 /* ------------------------------------------------- */ 2875 /* check if j and i have identical nonzero pattern */ 2876 /* ------------------------------------------------- */ 2877 2878 AMD_DEBUG3 ("compare i "~ID~" and j "~ID~"\n", i,j); 2879 2880 /* check if i and j have the same Len and Elen */ 2881 ASSERT (Len [j] >= 0 && Elen [j] >= 0) ; 2882 ASSERT (Pe [j] >= 0 && Pe [j] < iwlen) ; 2883 ok = (Len [j] == ln) && (Elen [j] == eln) ; 2884 /* skip the first element in the list (me) */ 2885 for (p = Pe [j] + 1 ; ok && p <= Pe [j] + ln - 1 ; p++) 2886 { 2887 ASSERT (Iw [p] >= 0 && Iw [p] < n) ; 2888 if (W [Iw [p]] != wflg) ok = 0 ; 2889 } 2890 if (ok) 2891 { 2892 /* --------------------------------------------- */ 2893 /* found it! j can be absorbed into i */ 2894 /* --------------------------------------------- */ 2895 2896 AMD_DEBUG1 ("found it! j "~ID~" => i "~ID~"\n", j,i); 2897 Pe [j] = FLIP (i) ; 2898 /* both Nv [i] and Nv [j] are negated since they */ 2899 /* are in Lme, and the absolute values of each */ 2900 /* are the number of variables in i and j: */ 2901 Nv [i] += Nv [j] ; 2902 Nv [j] = 0 ; 2903 Elen [j] = EMPTY ; 2904 /* delete j from hash bucket */ 2905 ASSERT (j != Next [j]) ; 2906 j = Next [j] ; 2907 Next [jlast] = j ; 2908 2909 } 2910 else 2911 { 2912 /* j cannot be absorbed into i */ 2913 jlast = j ; 2914 ASSERT (j != Next [j]) ; 2915 j = Next [j] ; 2916 } 2917 ASSERT (j >= EMPTY && j < n) ; 2918 } 2919 2920 /* ----------------------------------------------------- 2921 * no more variables can be absorbed into i 2922 * go to next i in bucket and clear flag array 2923 * ----------------------------------------------------- */ 2924 2925 wflg++ ; 2926 i = Next [i] ; 2927 ASSERT (i >= EMPTY && i < n) ; 2928 2929 } 2930 } 2931 } 2932 AMD_DEBUG2 ("detect done\n"); 2933 2934 /* ========================================================================= */ 2935 /* RESTORE DEGREE LISTS AND REMOVE NONPRINCIPAL SUPERVARIABLES FROM ELEMENT */ 2936 /* ========================================================================= */ 2937 2938 p = pme1 ; 2939 nleft = n - nel ; 2940 for (pme = pme1 ; pme <= pme2 ; pme++) 2941 { 2942 i = Iw [pme] ; 2943 ASSERT (i >= 0 && i < n) ; 2944 nvi = -Nv [i] ; 2945 AMD_DEBUG3 ("Restore i "~ID~" "~ID~"\n", i, nvi); 2946 if (nvi > 0) 2947 { 2948 /* i is a principal variable in Lme */ 2949 /* restore Nv [i] to signify that i is principal */ 2950 Nv [i] = nvi ; 2951 2952 /* --------------------------------------------------------- */ 2953 /* compute the external degree (add size of current element) */ 2954 /* --------------------------------------------------------- */ 2955 2956 deg = Degree [i] + degme - nvi ; 2957 deg = MIN (deg, nleft - nvi) ; 2958 ASSERT (IMPLIES (aggressive, deg > 0) && deg >= 0 && deg < n) ; 2959 2960 /* --------------------------------------------------------- */ 2961 /* place the supervariable at the head of the degree list */ 2962 /* --------------------------------------------------------- */ 2963 2964 inext = Head [deg] ; 2965 ASSERT (inext >= EMPTY && inext < n) ; 2966 if (inext != EMPTY) Last [inext] = i ; 2967 Next [i] = inext ; 2968 Last [i] = EMPTY ; 2969 Head [deg] = i ; 2970 2971 /* --------------------------------------------------------- */ 2972 /* save the new degree, and find the minimum degree */ 2973 /* --------------------------------------------------------- */ 2974 2975 mindeg = MIN (mindeg, deg) ; 2976 Degree [i] = deg ; 2977 2978 /* --------------------------------------------------------- */ 2979 /* place the supervariable in the element pattern */ 2980 /* --------------------------------------------------------- */ 2981 2982 Iw [p++] = i ; 2983 2984 } 2985 } 2986 AMD_DEBUG2("restore done\n"); 2987 2988 /* ========================================================================= */ 2989 /* FINALIZE THE NEW ELEMENT */ 2990 /* ========================================================================= */ 2991 2992 AMD_DEBUG2 ("ME = "~ID~" DONE\n", me); 2993 Nv [me] = nvpiv ; 2994 /* save the length of the list for the new element me */ 2995 Len [me] = p - pme1 ; 2996 if (Len [me] == 0) 2997 { 2998 /* there is nothing left of the current pivot element */ 2999 /* it is a root of the assembly tree */ 3000 Pe [me] = EMPTY ; 3001 W [me] = 0 ; 3002 } 3003 if (elenme != 0) 3004 { 3005 /* element was not constructed in place: deallocate part of */ 3006 /* it since newly nonprincipal variables may have been removed */ 3007 pfree = p ; 3008 } 3009 3010 /* The new element has nvpiv pivots and the size of the contribution 3011 * block for a multifrontal method is degme-by-degme, not including 3012 * the "dense" rows/columns. If the "dense" rows/columns are included, 3013 * the frontal matrix is no larger than 3014 * (degme+ndense)-by-(degme+ndense). 3015 */ 3016 3017 if (Info != cast(c_float *) NULL) 3018 { 3019 f = nvpiv ; 3020 r = degme + ndense ; 3021 dmax = MAX (dmax, f + r) ; 3022 3023 /* number of nonzeros in L (excluding the diagonal) */ 3024 lnzme = f*r + (f-1)*f/2 ; 3025 lnz += lnzme ; 3026 3027 /* number of divide operations for LDL' and for LU */ 3028 ndiv += lnzme ; 3029 3030 /* number of multiply-subtract pairs for LU */ 3031 s = f*r*r + r*(f-1)*f + (f-1)*f*(2*f-1)/6 ; 3032 nms_lu += s ; 3033 3034 /* number of multiply-subtract pairs for LDL' */ 3035 nms_ldl += (s + lnzme)/2 ; 3036 } 3037 3038 version(NDEBUG){} 3039 else { 3040 AMD_DEBUG2 ("finalize done nel "~ID~" n "~ID~"\n ::::\n", nel, n); 3041 for (pme = Pe [me] ; pme <= Pe [me] + Len [me] - 1 ; pme++) 3042 { 3043 AMD_DEBUG3 (" "~ID~"", Iw [pme]); 3044 } 3045 AMD_DEBUG3 ("\n"); 3046 } // !NDEBUG 3047 3048 } 3049 3050 /* ========================================================================= */ 3051 /* DONE SELECTING PIVOTS */ 3052 /* ========================================================================= */ 3053 3054 if (Info != cast(c_float *) NULL) 3055 { 3056 3057 /* count the work to factorize the ndense-by-ndense submatrix */ 3058 f = ndense ; 3059 dmax = MAX (dmax, cast(c_float) ndense) ; 3060 3061 /* number of nonzeros in L (excluding the diagonal) */ 3062 lnzme = (f-1)*f/2 ; 3063 lnz += lnzme ; 3064 3065 /* number of divide operations for LDL' and for LU */ 3066 ndiv += lnzme ; 3067 3068 /* number of multiply-subtract pairs for LU */ 3069 s = (f-1)*f*(2*f-1)/6 ; 3070 nms_lu += s ; 3071 3072 /* number of multiply-subtract pairs for LDL' */ 3073 nms_ldl += (s + lnzme)/2 ; 3074 3075 /* number of nz's in L (excl. diagonal) */ 3076 Info [AMD_LNZ] = lnz ; 3077 3078 /* number of divide ops for LU and LDL' */ 3079 Info [AMD_NDIV] = ndiv ; 3080 3081 /* number of multiply-subtract pairs for LDL' */ 3082 Info [AMD_NMULTSUBS_LDL] = nms_ldl ; 3083 3084 /* number of multiply-subtract pairs for LU */ 3085 Info [AMD_NMULTSUBS_LU] = nms_lu ; 3086 3087 /* number of "dense" rows/columns */ 3088 Info [AMD_NDENSE] = ndense ; 3089 3090 /* largest front is dmax-by-dmax */ 3091 Info [AMD_DMAX] = dmax ; 3092 3093 /* number of garbage collections in AMD */ 3094 Info [AMD_NCMPA] = ncmpa ; 3095 3096 /* successful ordering */ 3097 Info [AMD_STATUS] = AMD_OK ; 3098 } 3099 3100 /* ========================================================================= */ 3101 /* POST-ORDERING */ 3102 /* ========================================================================= */ 3103 3104 /* ------------------------------------------------------------------------- 3105 * Variables at this point: 3106 * 3107 * Pe: holds the elimination tree. The parent of j is FLIP (Pe [j]), 3108 * or EMPTY if j is a root. The tree holds both elements and 3109 * non-principal (unordered) variables absorbed into them. 3110 * Dense variables are non-principal and unordered. 3111 * 3112 * Elen: holds the size of each element, including the diagonal part. 3113 * FLIP (Elen [e]) > 0 if e is an element. For unordered 3114 * variables i, Elen [i] is EMPTY. 3115 * 3116 * Nv: Nv [e] > 0 is the number of pivots represented by the element e. 3117 * For unordered variables i, Nv [i] is zero. 3118 * 3119 * Contents no longer needed: 3120 * W, Iw, Len, Degree, Head, Next, Last. 3121 * 3122 * The matrix itself has been destroyed. 3123 * 3124 * n: the size of the matrix. 3125 * No other scalars needed (pfree, iwlen, etc.) 3126 * ------------------------------------------------------------------------- */ 3127 3128 /* restore Pe */ 3129 for (i = 0 ; i < n ; i++) 3130 { 3131 Pe [i] = FLIP (Pe [i]) ; 3132 } 3133 3134 /* restore Elen, for output information, and for postordering */ 3135 for (i = 0 ; i < n ; i++) 3136 { 3137 Elen [i] = FLIP (Elen [i]) ; 3138 } 3139 3140 /* Now the parent of j is Pe [j], or EMPTY if j is a root. Elen [e] > 0 3141 * is the size of element e. Elen [i] is EMPTY for unordered variable i. */ 3142 3143 version(NDEBUG){} 3144 else { 3145 AMD_DEBUG2 ("\nTree:\n"); 3146 for (i = 0 ; i < n ; i++) 3147 { 3148 AMD_DEBUG2 (" "~ID~" parent: "~ID~" ", i, Pe [i]); 3149 ASSERT (Pe [i] >= EMPTY && Pe [i] < n) ; 3150 if (Nv [i] > 0) 3151 { 3152 /* this is an element */ 3153 e = i ; 3154 AMD_DEBUG2 (" element, size is "~ID~"\n", Elen [i]); 3155 ASSERT (Elen [e] > 0) ; 3156 } 3157 AMD_DEBUG2 ("\n"); 3158 } 3159 AMD_DEBUG2 ("\nelements:\n"); 3160 for (e = 0 ; e < n ; e++) 3161 { 3162 if (Nv [e] > 0) 3163 { 3164 AMD_DEBUG3 ("Element e= "~ID~" size "~ID~" nv "~ID~" \n", e, Elen [e], Nv [e]); 3165 } 3166 } 3167 AMD_DEBUG2 ("\nvariables:\n"); 3168 for (i = 0 ; i < n ; i++) 3169 { 3170 Int cnt ; 3171 if (Nv [i] == 0) 3172 { 3173 AMD_DEBUG3 ("i unordered: "~ID~"\n", i); 3174 j = Pe [i] ; 3175 cnt = 0 ; 3176 AMD_DEBUG3 (" j: "~ID~"\n", j); 3177 if (j == EMPTY) 3178 { 3179 AMD_DEBUG3 (" i is a dense variable\n"); 3180 } 3181 else 3182 { 3183 ASSERT (j >= 0 && j < n) ; 3184 while (Nv [j] == 0) 3185 { 3186 AMD_DEBUG3 (" j : "~ID~"\n", j); 3187 j = Pe [j] ; 3188 AMD_DEBUG3 (" j:: "~ID~"\n", j); 3189 cnt++ ; 3190 if (cnt > n) break ; 3191 } 3192 e = j ; 3193 AMD_DEBUG3 (" got to e: "~ID~"\n", e); 3194 } 3195 } 3196 } 3197 } // !NDEBUG 3198 3199 /* ========================================================================= */ 3200 /* compress the paths of the variables */ 3201 /* ========================================================================= */ 3202 3203 for (i = 0 ; i < n ; i++) 3204 { 3205 if (Nv [i] == 0) 3206 { 3207 3208 /* ------------------------------------------------------------- 3209 * i is an un-ordered row. Traverse the tree from i until 3210 * reaching an element, e. The element, e, was the principal 3211 * supervariable of i and all nodes in the path from i to when e 3212 * was selected as pivot. 3213 * ------------------------------------------------------------- */ 3214 3215 AMD_DEBUG1 ("Path compression, i unordered: "~ID~"\n", i); 3216 j = Pe [i] ; 3217 ASSERT (j >= EMPTY && j < n) ; 3218 AMD_DEBUG3 (" j: "~ID~"\n", j); 3219 if (j == EMPTY) 3220 { 3221 /* Skip a dense variable. It has no parent. */ 3222 AMD_DEBUG3 ((" i is a dense variable\n")) ; 3223 continue ; 3224 } 3225 3226 /* while (j is a variable) */ 3227 while (Nv [j] == 0) 3228 { 3229 AMD_DEBUG3 (" j : "~ID~"\n", j); 3230 j = Pe [j] ; 3231 AMD_DEBUG3 (" j:: "~ID~"\n", j); 3232 ASSERT (j >= 0 && j < n) ; 3233 } 3234 /* got to an element e */ 3235 e = j ; 3236 AMD_DEBUG3 ("got to e: "~ID~"\n", e); 3237 3238 /* ------------------------------------------------------------- 3239 * traverse the path again from i to e, and compress the path 3240 * (all nodes point to e). Path compression allows this code to 3241 * compute in O(n) time. 3242 * ------------------------------------------------------------- */ 3243 3244 j = i ; 3245 /* while (j is a variable) */ 3246 while (Nv [j] == 0) 3247 { 3248 jnext = Pe [j] ; 3249 AMD_DEBUG3 ("j "~ID~" jnext "~ID~"\n", j, jnext); 3250 Pe [j] = e ; 3251 j = jnext ; 3252 ASSERT (j >= 0 && j < n) ; 3253 } 3254 } 3255 } 3256 3257 /* ========================================================================= */ 3258 /* postorder the assembly tree */ 3259 /* ========================================================================= */ 3260 3261 AMD_postorder (n, Pe, Nv, Elen, 3262 W, /* output order */ 3263 Head, Next, Last) ; /* workspace */ 3264 3265 /* ========================================================================= */ 3266 /* compute output permutation and inverse permutation */ 3267 /* ========================================================================= */ 3268 3269 /* W [e] = k means that element e is the kth element in the new 3270 * order. e is in the range 0 to n-1, and k is in the range 0 to 3271 * the number of elements. Use Head for inverse order. */ 3272 3273 for (k = 0 ; k < n ; k++) 3274 { 3275 Head [k] = EMPTY ; 3276 Next [k] = EMPTY ; 3277 } 3278 for (e = 0 ; e < n ; e++) 3279 { 3280 k = W [e] ; 3281 ASSERT ((k == EMPTY) == (Nv [e] == 0)) ; 3282 if (k != EMPTY) 3283 { 3284 ASSERT (k >= 0 && k < n) ; 3285 Head [k] = e ; 3286 } 3287 } 3288 3289 /* construct output inverse permutation in Next, 3290 * and permutation in Last */ 3291 nel = 0 ; 3292 for (k = 0 ; k < n ; k++) 3293 { 3294 e = Head [k] ; 3295 if (e == EMPTY) break ; 3296 ASSERT (e >= 0 && e < n && Nv [e] > 0) ; 3297 Next [e] = nel ; 3298 nel += Nv [e] ; 3299 } 3300 ASSERT (nel == n - ndense) ; 3301 3302 /* order non-principal variables (dense, & those merged into supervar's) */ 3303 for (i = 0 ; i < n ; i++) 3304 { 3305 if (Nv [i] == 0) 3306 { 3307 e = Pe [i] ; 3308 ASSERT (e >= EMPTY && e < n) ; 3309 if (e != EMPTY) 3310 { 3311 /* This is an unordered variable that was merged 3312 * into element e via supernode detection or mass 3313 * elimination of i when e became the pivot element. 3314 * Place i in order just before e. */ 3315 ASSERT (Next [i] == EMPTY && Nv [e] > 0) ; 3316 Next [i] = Next [e] ; 3317 Next [e]++ ; 3318 } 3319 else 3320 { 3321 /* This is a dense unordered variable, with no parent. 3322 * Place it last in the output order. */ 3323 Next [i] = nel++ ; 3324 } 3325 } 3326 } 3327 ASSERT (nel == n) ; 3328 3329 AMD_DEBUG2 (("\n\nPerm:\n")) ; 3330 for (i = 0 ; i < n ; i++) 3331 { 3332 k = Next [i] ; 3333 ASSERT (k >= 0 && k < n) ; 3334 Last [k] = i ; 3335 AMD_DEBUG2 (" perm ["~ID~"] = "~ID~"\n", k, i); 3336 } 3337 } 3338 3339 } // version DLONG