Actual source code: superlu_dist.c

  1: #define PETSCMAT_DLL

  3: /* 
  4:         Provides an interface to the SuperLU_DIST_2.0 sparse solver
  5: */

  7: #include "src/mat/impls/aij/seq/aij.h"
  8: #include "src/mat/impls/aij/mpi/mpiaij.h"
  9: #if defined(PETSC_HAVE_STDLIB_H) /* This is to get arround weird problem with SuperLU on cray */
 10: #include "stdlib.h"
 11: #endif

 14: #if defined(PETSC_USE_COMPLEX)
 15: #include "superlu_zdefs.h"
 16: #else
 17: #include "superlu_ddefs.h"
 18: #endif

 21: typedef enum { GLOBAL,DISTRIBUTED
 22: } SuperLU_MatInputMode;

 24: typedef struct {
 25:   int_t                   nprow,npcol,*row,*col;
 26:   gridinfo_t              grid;
 27:   superlu_options_t       options;
 28:   SuperMatrix             A_sup;
 29:   ScalePermstruct_t       ScalePermstruct;
 30:   LUstruct_t              LUstruct;
 31:   int                     StatPrint;
 32:   int                     MatInputMode;
 33:   SOLVEstruct_t           SOLVEstruct;
 34:   fact_t                  FactPattern;
 35:   MPI_Comm                comm_superlu;
 36: #if defined(PETSC_USE_COMPLEX)
 37:   doublecomplex           *val;
 38: #else
 39:   double                  *val;
 40: #endif

 42:   /* A few function pointers for inheritance */
 43:   PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*);
 44:   PetscErrorCode (*MatView)(Mat,PetscViewer);
 45:   PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType);
 46:   PetscErrorCode (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*);
 47:   PetscErrorCode (*MatDestroy)(Mat);

 49:   /* Flag to clean up (non-global) SuperLU objects during Destroy */
 50:   PetscTruth CleanUpSuperLU_Dist;
 51: } Mat_SuperLU_DIST;

 53: EXTERN PetscErrorCode MatDuplicate_SuperLU_DIST(Mat,MatDuplicateOption,Mat*);

 66: PetscErrorCode  MatConvert_SuperLU_DIST_AIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
 67: {
 68:   PetscErrorCode   ierr;
 69:   Mat              B=*newmat;
 70:   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr;

 73:   if (reuse == MAT_INITIAL_MATRIX) {
 74:     MatDuplicate(A,MAT_COPY_VALUES,&B);
 75:   }
 76:   /* Reset the original function pointers */
 77:   B->ops->duplicate        = lu->MatDuplicate;
 78:   B->ops->view             = lu->MatView;
 79:   B->ops->assemblyend      = lu->MatAssemblyEnd;
 80:   B->ops->lufactorsymbolic = lu->MatLUFactorSymbolic;
 81:   B->ops->destroy          = lu->MatDestroy;

 83:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_superlu_dist_C","",PETSC_NULL);
 84:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_seqaij_C","",PETSC_NULL);
 85:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C","",PETSC_NULL);
 86:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C","",PETSC_NULL);

 88:   PetscObjectChangeTypeName((PetscObject)B,type);
 89:   *newmat = B;
 90:   return(0);
 91: }

 96: PetscErrorCode MatDestroy_SuperLU_DIST(Mat A)
 97: {
 98:   PetscErrorCode   ierr;
 99:   PetscMPIInt      size;
100:   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
101: 
103:   if (lu->CleanUpSuperLU_Dist) {
104:     /* Deallocate SuperLU_DIST storage */
105:     if (lu->MatInputMode == GLOBAL) {
106:       Destroy_CompCol_Matrix_dist(&lu->A_sup);
107:     } else {
108:       Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);
109:       if ( lu->options.SolveInitialized ) {
110: #if defined(PETSC_USE_COMPLEX)
111:         zSolveFinalize(&lu->options, &lu->SOLVEstruct);
112: #else
113:         dSolveFinalize(&lu->options, &lu->SOLVEstruct);
114: #endif
115:       }
116:     }
117:     Destroy_LU(A->cmap.N, &lu->grid, &lu->LUstruct);
118:     ScalePermstructFree(&lu->ScalePermstruct);
119:     LUstructFree(&lu->LUstruct);
120: 
121:     /* Release the SuperLU_DIST process grid. */
122:     superlu_gridexit(&lu->grid);
123: 
124:     MPI_Comm_free(&(lu->comm_superlu));
125:   }

127:   MPI_Comm_size(A->comm,&size);
128:   if (size == 1) {
129:     MatConvert_SuperLU_DIST_AIJ(A,MATSEQAIJ,MAT_REUSE_MATRIX,&A);
130:   } else {
131:     MatConvert_SuperLU_DIST_AIJ(A,MATMPIAIJ,MAT_REUSE_MATRIX,&A);
132:   }
133:   (*A->ops->destroy)(A);
134:   return(0);
135: }

139: PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x)
140: {
141:   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
142:   PetscErrorCode   ierr;
143:   PetscMPIInt      size;
144:   PetscInt         m=A->rmap.N, N=A->cmap.N;
145:   SuperLUStat_t    stat;
146:   double           berr[1];
147:   PetscScalar      *bptr;
148:   PetscInt         info, nrhs=1;
149:   Vec              x_seq;
150:   IS               iden;
151:   VecScatter       scat;
152: 
154:   MPI_Comm_size(A->comm,&size);
155:   if (size > 1) {
156:     if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */
157:       VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);
158:       ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);
159:       VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);
160:       ISDestroy(iden);

162:       VecScatterBegin(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);
163:       VecScatterEnd(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);
164:       VecGetArray(x_seq,&bptr);
165:     } else { /* distributed mat input */
166:       VecCopy(b_mpi,x);
167:       VecGetArray(x,&bptr);
168:     }
169:   } else { /* size == 1 */
170:     VecCopy(b_mpi,x);
171:     VecGetArray(x,&bptr);
172:   }
173: 
174:   if (lu->options.Fact != FACTORED)
175:     SETERRQ(PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact mush equal FACTORED");

177:   PStatInit(&stat);        /* Initialize the statistics variables. */
178:   if (lu->MatInputMode == GLOBAL) {
179: #if defined(PETSC_USE_COMPLEX)
180:     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs,
181:                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
182: #else
183:     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs,
184:                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
185: #endif 
186:   } else { /* distributed mat input */
187: #if defined(PETSC_USE_COMPLEX)
188:     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->rmap.N, nrhs, &lu->grid,
189:             &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
190:     if (info) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",info);
191: #else
192:     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->rmap.N, nrhs, &lu->grid,
193:             &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
194:     if (info) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info);
195: #endif
196:   }
197:   if (lu->options.PrintStat) {
198:      PStatPrint(&lu->options, &stat, &lu->grid);     /* Print the statistics. */
199:   }
200:   PStatFree(&stat);
201: 
202:   if (size > 1) {
203:     if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */
204:       VecRestoreArray(x_seq,&bptr);
205:       VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);
206:       VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);
207:       VecScatterDestroy(scat);
208:       VecDestroy(x_seq);
209:     } else {
210:       VecRestoreArray(x,&bptr);
211:     }
212:   } else {
213:     VecRestoreArray(x,&bptr);
214:   }
215:   return(0);
216: }

220: PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat A,MatFactorInfo *info,Mat *F)
221: {
222:   Mat              *tseq,A_seq = PETSC_NULL;
223:   Mat_SeqAIJ       *aa,*bb;
224:   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(*F)->spptr;
225:   PetscErrorCode   ierr;
226:   PetscInt         M=A->rmap.N,N=A->cmap.N,sinfo,i,*ai,*aj,*bi,*bj,nz,rstart,*garray,
227:                    m=A->rmap.n, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj;
228:   PetscMPIInt      size,rank;
229:   SuperLUStat_t    stat;
230:   double           *berr=0;
231:   IS               isrow;
232:   PetscLogDouble   time0,time,time_min,time_max;
233:   Mat              F_diag=PETSC_NULL;
234: #if defined(PETSC_USE_COMPLEX)
235:   doublecomplex    *av, *bv;
236: #else
237:   double           *av, *bv;
238: #endif

241:   MPI_Comm_size(A->comm,&size);
242:   MPI_Comm_rank(A->comm,&rank);
243: 
244:   if (lu->options.PrintStat) { /* collect time for mat conversion */
245:     MPI_Barrier(A->comm);
246:     PetscGetTime(&time0);
247:   }

249:   if (lu->MatInputMode == GLOBAL) { /* global mat input */
250:     if (size > 1) { /* convert mpi A to seq mat A */
251:       ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);
252:       MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);
253:       ISDestroy(isrow);
254: 
255:       A_seq = *tseq;
256:       PetscFree(tseq);
257:       aa =  (Mat_SeqAIJ*)A_seq->data;
258:     } else {
259:       aa =  (Mat_SeqAIJ*)A->data;
260:     }

262:     /* Convert Petsc NR matrix to SuperLU_DIST NC. 
263:        Note: memories of lu->val, col and row are allocated by CompRow_to_CompCol_dist()! */
264:     if (lu->options.Fact != DOFACT) {/* successive numeric factorization, sparsity pattern is reused. */
265:       if (lu->FactPattern == SamePattern_SameRowPerm){
266:         Destroy_CompCol_Matrix_dist(&lu->A_sup);
267:         /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */
268:         lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
269:       } else {
270:         Destroy_CompCol_Matrix_dist(&lu->A_sup);
271:         Destroy_LU(N, &lu->grid, &lu->LUstruct);
272:         lu->options.Fact = SamePattern;
273:       }
274:     }
275: #if defined(PETSC_USE_COMPLEX)
276:     zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row);
277: #else
278:     dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row);
279: #endif

281:     /* Create compressed column matrix A_sup. */
282: #if defined(PETSC_USE_COMPLEX)
283:     zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE);
284: #else
285:     dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE);
286: #endif
287:   } else { /* distributed mat input */
288:     Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
289:     aa=(Mat_SeqAIJ*)(mat->A)->data;
290:     bb=(Mat_SeqAIJ*)(mat->B)->data;
291:     ai=aa->i; aj=aa->j;
292:     bi=bb->i; bj=bb->j;
293: #if defined(PETSC_USE_COMPLEX)
294:     av=(doublecomplex*)aa->a;
295:     bv=(doublecomplex*)bb->a;
296: #else
297:     av=aa->a;
298:     bv=bb->a;
299: #endif
300:     rstart = A->rmap.rstart;
301:     nz     = aa->nz + bb->nz;
302:     garray = mat->garray;
303: 
304:     if (lu->options.Fact == DOFACT) {/* first numeric factorization */
305: #if defined(PETSC_USE_COMPLEX)
306:       zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
307: #else
308:       dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
309: #endif
310:     } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */
311:       if (lu->FactPattern == SamePattern_SameRowPerm){
312:         /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */
313:         lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
314:       } else {
315:         Destroy_LU(N, &lu->grid, &lu->LUstruct); /* Deallocate storage associated with the L and U matrices. */
316:         lu->options.Fact = SamePattern;
317:       }
318:     }
319:     nz = 0; irow = rstart;
320:     for ( i=0; i<m; i++ ) {
321:       lu->row[i] = nz;
322:       countA = ai[i+1] - ai[i];
323:       countB = bi[i+1] - bi[i];
324:       ajj = aj + ai[i];  /* ptr to the beginning of this row */
325:       bjj = bj + bi[i];

327:       /* B part, smaller col index */
328:       colA_start = rstart + ajj[0]; /* the smallest global col index of A */
329:       jB = 0;
330:       for (j=0; j<countB; j++){
331:         jcol = garray[bjj[j]];
332:         if (jcol > colA_start) {
333:           jB = j;
334:           break;
335:         }
336:         lu->col[nz] = jcol;
337:         lu->val[nz++] = *bv++;
338:         if (j==countB-1) jB = countB;
339:       }

341:       /* A part */
342:       for (j=0; j<countA; j++){
343:         lu->col[nz] = rstart + ajj[j];
344:         lu->val[nz++] = *av++;
345:       }

347:       /* B part, larger col index */
348:       for (j=jB; j<countB; j++){
349:         lu->col[nz] = garray[bjj[j]];
350:         lu->val[nz++] = *bv++;
351:       }
352:     }
353:     lu->row[m] = nz;
354: #if defined(PETSC_USE_COMPLEX)
355:     zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
356:                                    lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE);
357: #else
358:     dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
359:                                    lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE);
360: #endif
361:   }
362:   if (lu->options.PrintStat) {
363:     PetscGetTime(&time);
364:     time0 = time - time0;
365:   }

367:   /* Factor the matrix. */
368:   PStatInit(&stat);   /* Initialize the statistics variables. */

370:   if (lu->MatInputMode == GLOBAL) { /* global mat input */
371: #if defined(PETSC_USE_COMPLEX)
372:     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
373:                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
374: #else
375:     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
376:                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
377: #endif 
378:   } else { /* distributed mat input */
379: #if defined(PETSC_USE_COMPLEX)
380:     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
381:             &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
382:     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",sinfo);
383: #else
384:     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
385:             &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
386:     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",sinfo);
387: #endif
388:   }

390:   if (lu->MatInputMode == GLOBAL && size > 1){
391:     MatDestroy(A_seq);
392:   }

394:   if (lu->options.PrintStat) {
395:     if (size > 1){
396:       MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,A->comm);
397:       MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,A->comm);
398:       MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,A->comm);
399:       time = time/size; /* average time */
400:       if (!rank)
401:         PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n \
402:                               %g / %g / %g\n",time_max,time_min,time);
403:     } else {
404:       PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time: \n \
405:                               %g\n",time0);
406:     }
407: 
408:     PStatPrint(&lu->options, &stat, &lu->grid);  /* Print the statistics. */
409:   }
410:   PStatFree(&stat);
411:   if (size > 1){
412:     F_diag = ((Mat_MPIAIJ *)(*F)->data)->A;
413:     F_diag->assembled = PETSC_TRUE;
414:   }
415:   (*F)->assembled  = PETSC_TRUE;
416:   lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */
417:   return(0);
418: }

420: /* Note the Petsc r and c permutations are ignored */
423: PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F)
424: {
425:   Mat               B;
426:   Mat_SuperLU_DIST  *lu;
427:   PetscErrorCode    ierr;
428:   PetscInt          M=A->rmap.N,N=A->cmap.N,indx;
429:   PetscMPIInt       size;
430:   superlu_options_t options;
431:   PetscTruth        flg;
432:   const char        *pctype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA"};
433:   const char        *prtype[] = {"LargeDiag","NATURAL"};
434:   const char        *factPattern[] = {"SamePattern","SamePattern_SameRowPerm"};

437:   /* Create the factorization matrix */
438:   MatCreate(A->comm,&B);
439:   MatSetSizes(B,A->rmap.n,A->cmap.n,M,N);
440:   MatSetType(B,A->type_name);
441:   MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
442:   MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);

444:   B->ops->lufactornumeric  = MatLUFactorNumeric_SuperLU_DIST;
445:   B->ops->solve            = MatSolve_SuperLU_DIST;
446:   B->factor                = FACTOR_LU;

448:   lu = (Mat_SuperLU_DIST*)(B->spptr);

450:   /*   Set the default input options:
451:         options.Fact = DOFACT;
452:         options.Equil = YES;
453:         options.ColPerm = MMD_AT_PLUS_A;
454:         options.RowPerm = LargeDiag;
455:         options.ReplaceTinyPivot = YES;
456:         options.Trans = NOTRANS;
457:         options.IterRefine = DOUBLE;
458:         options.SolveInitialized = NO;
459:         options.RefineInitialized = NO;
460:         options.PrintStat = YES;
461:   */
462:   set_default_options_dist(&options);

464:   MPI_Comm_dup(A->comm,&(lu->comm_superlu));
465:   MPI_Comm_size(A->comm,&size);
466:   /* Default num of process columns and rows */
467:   lu->npcol = (PetscMPIInt)(0.5 + sqrt((PetscReal)size));
468:   if (!lu->npcol) lu->npcol = 1;
469:   while (lu->npcol > 0) {
470:     lu->nprow = (PetscMPIInt)(size/lu->npcol);
471:     if (size == lu->nprow * lu->npcol) break;
472:     lu->npcol --;
473:   }
474: 
475:   PetscOptionsBegin(A->comm,A->prefix,"SuperLU_Dist Options","Mat");
476:     PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);
477:     PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);
478:     if (size != lu->nprow * lu->npcol)
479:       SETERRQ3(PETSC_ERR_ARG_SIZ,"Number of processes %d must equal to nprow %d * npcol %d",size,lu->nprow,lu->npcol);
480: 
481:     lu->MatInputMode = DISTRIBUTED;
482:     PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);
483:     if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL;

485:     PetscOptionsTruth("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);
486:     if (!flg) {
487:       options.Equil = NO;
488:     }

490:     PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);
491:     if (flg) {
492:       switch (indx) {
493:       case 0:
494:         options.RowPerm = LargeDiag;
495:         break;
496:       case 1:
497:         options.RowPerm = NOROWPERM;
498:         break;
499:       }
500:     }

502:     PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",pctype,3,pctype[0],&indx,&flg);
503:     if (flg) {
504:       switch (indx) {
505:       case 0:
506:         options.ColPerm = MMD_AT_PLUS_A;
507:         break;
508:       case 1:
509:         options.ColPerm = NATURAL;
510:         break;
511:       case 2:
512:         options.ColPerm = MMD_ATA;
513:         break;
514:       }
515:     }

517:     PetscOptionsTruth("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);
518:     if (!flg) {
519:       options.ReplaceTinyPivot = NO;
520:     }

522:     lu->FactPattern = SamePattern;
523:     PetscOptionsEList("-mat_superlu_dist_fact","Sparsity pattern for repeated matrix factorization","None",factPattern,2,factPattern[0],&indx,&flg);
524:     if (flg) {
525:       switch (indx) {
526:       case 0:
527:         lu->FactPattern = SamePattern;
528:         break;
529:       case 1:
530:         lu->FactPattern = SamePattern_SameRowPerm;
531:         break;
532:       }
533:     }
534: 
535:     options.IterRefine = NOREFINE;
536:     PetscOptionsTruth("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);
537:     if (flg) {
538:       options.IterRefine = DOUBLE;
539:     }

541:     if (PetscLogPrintInfo) {
542:       options.PrintStat = YES;
543:     } else {
544:       options.PrintStat = NO;
545:     }
546:     PetscOptionsTruth("-mat_superlu_dist_statprint","Print factorization information","None",
547:                               (PetscTruth)options.PrintStat,(PetscTruth*)&options.PrintStat,0);
548:   PetscOptionsEnd();

550:   /* Initialize the SuperLU process grid. */
551:   superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid);

553:   /* Initialize ScalePermstruct and LUstruct. */
554:   ScalePermstructInit(M, N, &lu->ScalePermstruct);
555:   LUstructInit(M, N, &lu->LUstruct);

557:   lu->options             = options;
558:   lu->options.Fact        = DOFACT;
559:   lu->CleanUpSuperLU_Dist = PETSC_TRUE;
560:   *F = B;
561:   return(0);
562: }

566: PetscErrorCode MatAssemblyEnd_SuperLU_DIST(Mat A,MatAssemblyType mode) {
567:   PetscErrorCode   ierr;
568:   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr);

571:   (*lu->MatAssemblyEnd)(A,mode);
572:   lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
573:   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
574:   return(0);
575: }

579: PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer)
580: {
581:   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)A->spptr;
582:   superlu_options_t options;
583:   PetscErrorCode    ierr;

586:   /* check if matrix is superlu_dist type */
587:   if (A->ops->solve != MatSolve_SuperLU_DIST) return(0);

589:   options = lu->options;
590:   PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");
591:   PetscViewerASCIIPrintf(viewer,"  Process grid nprow %D x npcol %D \n",lu->nprow,lu->npcol);
592:   PetscViewerASCIIPrintf(viewer,"  Equilibrate matrix %s \n",PetscTruths[options.Equil != NO]);
593:   PetscViewerASCIIPrintf(viewer,"  Matrix input mode %d \n",lu->MatInputMode);
594:   PetscViewerASCIIPrintf(viewer,"  Replace tiny pivots %s \n",PetscTruths[options.ReplaceTinyPivot != NO]);
595:   PetscViewerASCIIPrintf(viewer,"  Use iterative refinement %s \n",PetscTruths[options.IterRefine == DOUBLE]);
596:   PetscViewerASCIIPrintf(viewer,"  Processors in row %d col partition %d \n",lu->nprow,lu->npcol);
597:   PetscViewerASCIIPrintf(viewer,"  Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");
598:   if (options.ColPerm == NATURAL) {
599:     PetscViewerASCIIPrintf(viewer,"  Column permutation NATURAL\n");
600:   } else if (options.ColPerm == MMD_AT_PLUS_A) {
601:     PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_AT_PLUS_A\n");
602:   } else if (options.ColPerm == MMD_ATA) {
603:     PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_ATA\n");
604:   } else {
605:     SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown column permutation");
606:   }
607: 
608:   if (lu->FactPattern == SamePattern){
609:     PetscViewerASCIIPrintf(viewer,"  Repeated factorization SamePattern\n");
610:   } else {
611:     PetscViewerASCIIPrintf(viewer,"  Repeated factorization SamePattern_SameRowPerm\n");
612:   }
613:   return(0);
614: }

618: PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer)
619: {
620:   PetscErrorCode    ierr;
621:   PetscTruth        iascii;
622:   PetscViewerFormat format;
623:   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)(A->spptr);

626:   (*lu->MatView)(A,viewer);

628:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
629:   if (iascii) {
630:     PetscViewerGetFormat(viewer,&format);
631:     if (format == PETSC_VIEWER_ASCII_INFO) {
632:       MatFactorInfo_SuperLU_DIST(A,viewer);
633:     }
634:   }
635:   return(0);
636: }


642: PetscErrorCode  MatConvert_AIJ_SuperLU_DIST(Mat A,MatType type,MatReuse reuse,Mat *newmat)
643: {
644:   /* This routine is only called to convert to MATSUPERLU_DIST */
645:   /* from MATSEQAIJ if A has a single process communicator */
646:   /* or MATMPIAIJ otherwise, so we will ignore 'MatType type'. */
647:   PetscErrorCode   ierr;
648:   PetscMPIInt      size;
649:   MPI_Comm         comm;
650:   Mat              B=*newmat;
651:   Mat_SuperLU_DIST *lu;

654:   PetscNew(Mat_SuperLU_DIST,&lu);
655:   if (reuse == MAT_INITIAL_MATRIX) {
656:     MatDuplicate(A,MAT_COPY_VALUES,&B);
657:     lu->MatDuplicate         = B->ops->duplicate;
658:     lu->MatView              = B->ops->view;
659:     lu->MatAssemblyEnd       = B->ops->assemblyend;
660:     lu->MatLUFactorSymbolic  = B->ops->lufactorsymbolic;
661:     lu->MatDestroy           = B->ops->destroy;
662:   } else {
663:     lu->MatDuplicate         = A->ops->duplicate;
664:     lu->MatView              = A->ops->view;
665:     lu->MatAssemblyEnd       = A->ops->assemblyend;
666:     lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
667:     lu->MatDestroy           = A->ops->destroy;
668:   }
669:   lu->CleanUpSuperLU_Dist  = PETSC_FALSE;

671:   B->spptr                 = (void*)lu;
672:   B->ops->duplicate        = MatDuplicate_SuperLU_DIST;
673:   B->ops->view             = MatView_SuperLU_DIST;
674:   B->ops->assemblyend      = MatAssemblyEnd_SuperLU_DIST;
675:   B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
676:   B->ops->destroy          = MatDestroy_SuperLU_DIST;

678:   PetscObjectGetComm((PetscObject)A,&comm);
679:   MPI_Comm_size(comm,&size);
680:   if (size == 1) {
681:     PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_superlu_dist_C",
682:     "MatConvert_AIJ_SuperLU_DIST",MatConvert_AIJ_SuperLU_DIST);
683:     PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_seqaij_C",
684:     "MatConvert_SuperLU_DIST_AIJ",MatConvert_SuperLU_DIST_AIJ);
685:   } else {
686:     PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C",
687:                                              "MatConvert_AIJ_SuperLU_DIST",MatConvert_AIJ_SuperLU_DIST);
688:     PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C",
689:                                              "MatConvert_SuperLU_DIST_AIJ",MatConvert_SuperLU_DIST_AIJ);
690:   }
691:   PetscInfo(0,"Using SuperLU_DIST for SeqAIJ LU factorization and solves.\n");
692:   PetscObjectChangeTypeName((PetscObject)B,MATSUPERLU_DIST);
693:   *newmat = B;
694:   return(0);
695: }

700: PetscErrorCode MatDuplicate_SuperLU_DIST(Mat A, MatDuplicateOption op, Mat *M) {
701:   PetscErrorCode   ierr;
702:   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr;

705:   (*lu->MatDuplicate)(A,op,M);
706:   PetscMemcpy((*M)->spptr,lu,sizeof(Mat_SuperLU_DIST));
707:   return(0);
708: }

710: /*MC
711:   MATSUPERLU_DIST - MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices 
712:   via the external package SuperLU_DIST.

714:   If SuperLU_DIST is installed (see the manual for
715:   instructions on how to declare the existence of external packages),
716:   a matrix type can be constructed which invokes SuperLU_DIST solvers.
717:   After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST).

719:   This matrix inherits from MATSEQAIJ when constructed with a single process communicator,
720:   and from MATMPIAIJ otherwise.  As a result, for single process communicators, 
721:   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 
722:   for communicators controlling multiple processes.  It is recommended that you call both of
723:   the above preallocation routines for simplicity.  One can also call MatConvert for an inplace
724:   conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size)
725:   without data copy.

727:   Options Database Keys:
728: + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions()
729: . -mat_superlu_dist_r <n> - number of rows in processor partition
730: . -mat_superlu_dist_c <n> - number of columns in processor partition
731: . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed
732: . -mat_superlu_dist_equil - equilibrate the matrix
733: . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation
734: . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,NATURAL> - column permutation
735: . -mat_superlu_dist_replacetinypivot - replace tiny pivots
736: . -mat_superlu_dist_fact <SamePattern> (choose one of) SamePattern SamePattern_SameRowPerm
737: . -mat_superlu_dist_iterrefine - use iterative refinement
738: - -mat_superlu_dist_statprint - print factorization information

740:    Level: beginner

742: .seealso: PCLU
743: M*/

748: PetscErrorCode  MatCreate_SuperLU_DIST(Mat A)
749: {
751:   PetscMPIInt    size;

754:   MPI_Comm_size(A->comm,&size);
755:   if (size == 1) {
756:     MatSetType(A,MATSEQAIJ);
757:   } else {
758:     MatSetType(A,MATMPIAIJ);
759:     /*  A_diag = 0x0 ???  -- do we need it?
760:     Mat A_diag = ((Mat_MPIAIJ *)A->data)->A;
761:     MatConvert_AIJ_SuperLU_DIST(A_diag,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A_diag);
762:     */
763:   }
764:   MatConvert_AIJ_SuperLU_DIST(A,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A);
765:   return(0);
766: }