Actual source code: dscpack.c

  1: #define PETSCMAT_DLL

  3: /* 
  4:         Provides an interface to the DSCPACK (Domain-Separator Codes) sparse direct solver
  5: */

 7:  #include src/mat/impls/baij/seq/baij.h
 8:  #include src/mat/impls/baij/mpi/mpibaij.h

 11: #include "dscmain.h"

 14: typedef struct {
 15:   DSC_Solver        My_DSC_Solver;
 16:   PetscInt      num_local_strucs, *local_struc_old_num,
 17:                 num_local_cols, num_local_nonz,
 18:                 *global_struc_new_col_num,
 19:                 *global_struc_new_num, *global_struc_owner,
 20:                 dsc_id,bs,*local_cols_old_num,*replication;
 21:   PetscInt      order_code,scheme_code,factor_type, stat,
 22:                 LBLASLevel,DBLASLevel,max_mem_allowed;
 23:   MatStructure  flg;
 24:   IS            my_cols,iden,iden_dsc;
 25:   Vec           vec_dsc;
 26:   VecScatter    scat;
 27:   MPI_Comm      comm_dsc;

 29:   /* A few inheritance details */
 30:   PetscMPIInt    size;
 31:   PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*);
 32:   PetscErrorCode (*MatView)(Mat,PetscViewer);
 33:   PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType);
 34:   PetscErrorCode (*MatCholeskyFactorSymbolic)(Mat,IS,MatFactorInfo*,Mat*);
 35:   PetscErrorCode (*MatDestroy)(Mat);
 36:   PetscErrorCode (*MatPreallocate)(Mat,PetscInt,PetscInt,PetscInt*,PetscInt,PetscInt*);

 38:   /* Clean up flag for destructor */
 39:   PetscTruth CleanUpDSCPACK;
 40: } Mat_DSC;

 42: EXTERN PetscErrorCode MatDuplicate_DSCPACK(Mat,MatDuplicateOption,Mat*);
 44: EXTERN PetscErrorCode  MatConvert_BAIJ_DSCPACK(Mat,const MatType,MatReuse,Mat*);

 47: /* DSC function */
 50: void isort2(PetscInt size, PetscInt *list, PetscInt *idx_dsc) {
 51:   /* in increasing order */
 52:   /* idx_dsc will contain indices such that */
 53:   /* list can be accessed in sorted order */
 54:   PetscInt i, j, x, y;
 55: 
 56:   for (i=0; i<size; i++) idx_dsc[i] =i;

 58:   for (i=1; i<size; i++){
 59:     y= idx_dsc[i];
 60:     x=list[idx_dsc[i]];
 61:     for (j=i-1; ((j>=0) && (x<list[idx_dsc[j]])); j--)
 62:       idx_dsc[j+1]=idx_dsc[j];
 63:     idx_dsc[j+1]=y;
 64:   }
 65: }/*end isort2*/

 69: PetscErrorCode  BAIJtoMyANonz( PetscInt *AIndex, PetscInt *AStruct, PetscInt bs,
 70:                     RealNumberType *ANonz, PetscInt NumLocalStructs, 
 71:                     PetscInt NumLocalNonz,  PetscInt *GlobalStructNewColNum,                
 72:                     PetscInt *LocalStructOldNum,
 73:                     PetscInt *LocalStructLocalNum,
 74:                     RealNumberType **adr_MyANonz)
 75: /* 
 76:    Extract non-zero values of lower triangular part
 77:    of the permuted matrix that belong to this processor.

 79:    Only output parameter is adr_MyANonz -- is malloced and changed.
 80:    Rest are input parameters left unchanged.

 82:    When LocalStructLocalNum == PETSC_NULL,
 83:         AIndex, AStruct, and ANonz contain entire original matrix A 
 84:         in PETSc SeqBAIJ format,
 85:         otherwise,
 86:         AIndex, AStruct, and ANonz are indeces for the submatrix
 87:         of A whose colomns (in increasing order) belong to this processor.

 89:    Other variables supply information on ownership of columns
 90:    and the new numbering in a fill-reducing permutation

 92:    This information is used to setup lower half of A nonzeroes
 93:    for columns owned by this processor
 94:  */
 95: {
 97:   PetscInt            i, j, k, iold,inew, jj, kk, bs2=bs*bs,
 98:                  *idx, *NewColNum,
 99:                  MyANonz_last, max_struct=0, struct_size;
100:   RealNumberType *MyANonz;


104:   /* loop: to find maximum number of subscripts over columns
105:      assigned to this processor */
106:   for (i=0; i <NumLocalStructs; i++) {
107:     /* for each struct i (local) assigned to this processor */
108:     if (LocalStructLocalNum){
109:       iold = LocalStructLocalNum[i];
110:     } else {
111:       iold = LocalStructOldNum[i];
112:     }
113: 
114:     struct_size = AIndex[iold+1] - AIndex[iold];
115:     if ( max_struct <= struct_size) max_struct = struct_size;
116:   }

118:   /* allocate tmp arrays large enough to hold densest struct */
119:   PetscMalloc((2*max_struct+1)*sizeof(PetscInt),&NewColNum);
120:   idx = NewColNum + max_struct;
121: 
122:   PetscMalloc(NumLocalNonz*sizeof(RealNumberType),&MyANonz);
123:   *adr_MyANonz = MyANonz;

125:   /* loop to set up nonzeroes in MyANonz */
126:   MyANonz_last = 0 ; /* points to first empty space in MyANonz */
127:   for (i=0; i <NumLocalStructs; i++) {

129:     /* for each struct i (local) assigned to this processor */
130:     if (LocalStructLocalNum){
131:       iold = LocalStructLocalNum[i];
132:     } else {
133:       iold = LocalStructOldNum[i];
134:     }

136:     struct_size = AIndex[iold+1] - AIndex[iold];
137:     for (k=0, j=AIndex[iold]; j<AIndex[iold+1]; j++){
138:       NewColNum[k] = GlobalStructNewColNum[AStruct[j]];
139:       k++;
140:     }
141:     isort2(struct_size, NewColNum, idx);
142: 
143:     kk = AIndex[iold]*bs2; /* points to 1st element of iold block col in ANonz */
144:     inew = GlobalStructNewColNum[LocalStructOldNum[i]];

146:     for (jj = 0; jj < bs; jj++) {
147:       for (j=0; j<struct_size; j++){
148:         for ( k = 0; k<bs; k++){
149:           if (NewColNum[idx[j]] + k >= inew)
150:             MyANonz[MyANonz_last++] = ANonz[kk + idx[j]*bs2 + k*bs + jj];
151:         }
152:       }
153:       inew++;
154:     }
155:   } /* end outer loop for i */

157:   PetscFree(NewColNum);
158:   if (MyANonz_last != NumLocalNonz) SETERRQ2(PETSC_ERR_PLIB,"MyANonz_last %d != NumLocalNonz %d\n",MyANonz_last, NumLocalNonz);
159:   return(0);
160: }

165: PetscErrorCode  MatConvert_DSCPACK_BAIJ(Mat A,const MatType type,MatReuse reuse,Mat *newmat)
166: {
168:   Mat            B=*newmat;
169:   Mat_DSC        *lu=(Mat_DSC*)A->spptr;
170:   void           (*f)(void);

173:   if (reuse == MAT_INITIAL_MATRIX) {
174:     MatDuplicate(A,MAT_COPY_VALUES,&B);
175:   }
176:   /* Reset the original function pointers */
177:   B->ops->duplicate              = lu->MatDuplicate;
178:   B->ops->view                   = lu->MatView;
179:   B->ops->assemblyend            = lu->MatAssemblyEnd;
180:   B->ops->choleskyfactorsymbolic = lu->MatCholeskyFactorSymbolic;
181:   B->ops->destroy                = lu->MatDestroy;
182:   PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",&f);
183:   if (f) {
184:     PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C","",(PetscVoidFunction)lu->MatPreallocate);
185:   }

187:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_dscpack_C","",PETSC_NULL);
188:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_dscpack_seqbaij_C","",PETSC_NULL);
189:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_dscpack_C","",PETSC_NULL);
190:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_dscpack_mpibaij_C","",PETSC_NULL);

192:   PetscObjectChangeTypeName((PetscObject)B,type);
193:   *newmat = B;

195:   return(0);
196: }

201: PetscErrorCode MatDestroy_DSCPACK(Mat A)
202: {
203:   Mat_DSC        *lu=(Mat_DSC*)A->spptr;
205: 
207:   if (lu->CleanUpDSCPACK) {
208:     if (lu->dsc_id != -1) {
209:       if(lu->stat) DSC_DoStats(lu->My_DSC_Solver);
210:       DSC_FreeAll(lu->My_DSC_Solver);
211:       DSC_Close0(lu->My_DSC_Solver);
212: 
213:       PetscFree(lu->local_cols_old_num);
214:     }
215:     DSC_End(lu->My_DSC_Solver);
216: 
217:     MPI_Comm_free(&(lu->comm_dsc));
218:     ISDestroy(lu->my_cols);
219:     PetscFree(lu->replication);
220:     VecDestroy(lu->vec_dsc);
221:     ISDestroy(lu->iden_dsc);
222:     VecScatterDestroy(lu->scat);
223:     if (lu->size >1 && lu->iden) {ISDestroy(lu->iden);}
224:   }
225:   if (lu->size == 1) {
226:     MatConvert_DSCPACK_BAIJ(A,MATSEQBAIJ,MAT_REUSE_MATRIX,&A);
227:   } else {
228:     MatConvert_DSCPACK_BAIJ(A,MATMPIBAIJ,MAT_REUSE_MATRIX,&A);
229:   }
230:   (*A->ops->destroy)(A);
231:   return(0);
232: }

236: PetscErrorCode MatSolve_DSCPACK(Mat A,Vec b,Vec x) {
237:   Mat_DSC        *lu= (Mat_DSC*)A->spptr;
239:   RealNumberType *solution_vec,*rhs_vec;

242:   /* scatter b into seq vec_dsc */
243:   if ( !lu->scat ) {
244:     VecScatterCreate(b,lu->my_cols,lu->vec_dsc,lu->iden_dsc,&lu->scat);
245:   }
246:   VecScatterBegin(b,lu->vec_dsc,INSERT_VALUES,SCATTER_FORWARD,lu->scat);
247:   VecScatterEnd(b,lu->vec_dsc,INSERT_VALUES,SCATTER_FORWARD,lu->scat);

249:   if (lu->dsc_id != -1){
250:     VecGetArray(lu->vec_dsc,&rhs_vec);
251:     DSC_InputRhsLocalVec(lu->My_DSC_Solver, rhs_vec, lu->num_local_cols);
252:     VecRestoreArray(lu->vec_dsc,&rhs_vec);
253: 
254:     DSC_Solve(lu->My_DSC_Solver);
255:     if (ierr !=  DSC_NO_ERROR) {
256:       DSC_ErrorDisplay(lu->My_DSC_Solver);
257:       SETERRQ(PETSC_ERR_LIB,"Error in calling DSC_Solve");
258:     }

260:     /* get the permuted local solution */
261:     VecGetArray(lu->vec_dsc,&solution_vec);
262:     DSC_GetLocalSolution(lu->My_DSC_Solver,solution_vec, lu->num_local_cols);
263:     VecRestoreArray(lu->vec_dsc,&solution_vec);

265:   } /* end of if (lu->dsc_id != -1) */

267:   /* put permuted local solution solution_vec into x in the original order */
268:   VecScatterBegin(lu->vec_dsc,x,INSERT_VALUES,SCATTER_REVERSE,lu->scat);
269:   VecScatterEnd(lu->vec_dsc,x,INSERT_VALUES,SCATTER_REVERSE,lu->scat);

271:   return(0);
272: }

276: PetscErrorCode MatCholeskyFactorNumeric_DSCPACK(Mat A,MatFactorInfo *info,Mat *F) {
277:   Mat_SeqBAIJ    *a_seq;
278:   Mat_DSC        *lu=(Mat_DSC*)(*F)->spptr;
279:   Mat            *tseq,A_seq=PETSC_NULL;
280:   RealNumberType *my_a_nonz;
282:   PetscMPIInt    size;
283:   PetscInt       M=A->rmap.N,Mbs=M/lu->bs,max_mem_estimate,max_single_malloc_blk,
284:                  number_of_procs,i,j,next,iold,*idx,*iidx=0,*itmp;
285:   IS             my_cols_sorted;
286:   Mat            F_diag;
287: 
289:   MPI_Comm_size(A->comm,&size);
290:   if ( lu->flg == DIFFERENT_NONZERO_PATTERN){ /* first numeric factorization */
291:     /* convert A to A_seq */
292:     if (size > 1) {
293:       if (!lu->iden){
294:         ISCreateStride(PETSC_COMM_SELF,M,0,1,&lu->iden);
295:       }
296:       MatGetSubMatrices(A,1,&lu->iden,&lu->iden,MAT_INITIAL_MATRIX,&tseq);
297:       A_seq = tseq[0];
298:       a_seq = (Mat_SeqBAIJ*)A_seq->data;
299:     } else {
300:       a_seq = (Mat_SeqBAIJ*)A->data;
301:     }
302: 
303:     PetscMalloc(Mbs*sizeof(PetscInt),&lu->replication);
304:     for (i=0; i<Mbs; i++) lu->replication[i] = lu->bs;

306:     number_of_procs = DSC_Analyze(Mbs, a_seq->i, a_seq->j, lu->replication);
307: 
308:     i = size;
309:     if ( number_of_procs < i ) i = number_of_procs;
310:     number_of_procs = 1;
311:     while ( i > 1 ){
312:       number_of_procs  *= 2; i /= 2;
313:     }

315:     /* DSC_Solver starts */
316:     DSC_Open0( lu->My_DSC_Solver, number_of_procs, &lu->dsc_id, lu->comm_dsc );

318:     if (lu->dsc_id != -1) {
319:       DSC_Order(lu->My_DSC_Solver,lu->order_code,Mbs,a_seq->i,a_seq->j,lu->replication,
320:                    &M,&lu->num_local_strucs,
321:                    &lu->num_local_cols, &lu->num_local_nonz,  &lu->global_struc_new_col_num,
322:                    &lu->global_struc_new_num, &lu->global_struc_owner,
323:                    &lu->local_struc_old_num);
324:       if (ierr !=  DSC_NO_ERROR) {
325:         DSC_ErrorDisplay(lu->My_DSC_Solver);
326:         SETERRQ(PETSC_ERR_LIB,"Error when use DSC_Order()");
327:       }

329:       DSC_SFactor(lu->My_DSC_Solver,&max_mem_estimate,&max_single_malloc_blk,
330:                      lu->max_mem_allowed, lu->LBLASLevel, lu->DBLASLevel);
331:       if (ierr !=  DSC_NO_ERROR) {
332:         DSC_ErrorDisplay(lu->My_DSC_Solver);
333:         SETERRQ(PETSC_ERR_LIB,"Error when use DSC_Order");
334:       }

336:       BAIJtoMyANonz(a_seq->i, a_seq->j, lu->bs, a_seq->a,
337:                        lu->num_local_strucs, lu->num_local_nonz,
338:                        lu->global_struc_new_col_num,
339:                        lu->local_struc_old_num,
340:                        PETSC_NULL,
341:                        &my_a_nonz);
342:       if (ierr <0) {
343:           DSC_ErrorDisplay(lu->My_DSC_Solver);
344:           SETERRQ1(PETSC_ERR_LIB,"Error setting local nonzeroes at processor %d \n", lu->dsc_id);
345:       }

347:       /* get local_cols_old_num and IS my_cols to be used later */
348:       PetscMalloc(lu->num_local_cols*sizeof(PetscInt),&lu->local_cols_old_num);
349:       for (next = 0, i=0; i<lu->num_local_strucs; i++){
350:         iold = lu->bs*lu->local_struc_old_num[i];
351:         for (j=0; j<lu->bs; j++)
352:           lu->local_cols_old_num[next++] = iold++;
353:       }
354:       ISCreateGeneral(PETSC_COMM_SELF,lu->num_local_cols,lu->local_cols_old_num,&lu->my_cols);
355: 
356:     } else {    /* lu->dsc_id == -1 */
357:       lu->num_local_cols = 0;
358:       lu->local_cols_old_num = 0;
359:       ISCreateGeneral(PETSC_COMM_SELF,lu->num_local_cols,lu->local_cols_old_num,&lu->my_cols);
360:     }
361:     /* generate vec_dsc and iden_dsc to be used later */
362:     VecCreateSeq(PETSC_COMM_SELF,lu->num_local_cols,&lu->vec_dsc);
363:     ISCreateStride(PETSC_COMM_SELF,lu->num_local_cols,0,1,&lu->iden_dsc);
364:     lu->scat = PETSC_NULL;

366:     if ( size>1 ) {
367:       MatDestroyMatrices(1,&tseq);
368:     }
369:   } else { /* use previously computed symbolic factor */
370:     /* convert A to my A_seq */
371:     if (size > 1) {
372:       if (lu->dsc_id == -1) {
373:         itmp = 0;
374:       } else {
375:         PetscMalloc(2*lu->num_local_strucs*sizeof(PetscInt),&idx);
376:         iidx = idx + lu->num_local_strucs;
377:         PetscMalloc(lu->num_local_cols*sizeof(PetscInt),&itmp);
378: 
379:         isort2(lu->num_local_strucs, lu->local_struc_old_num, idx);
380:         for (next=0, i=0; i< lu->num_local_strucs; i++) {
381:           iold = lu->bs*lu->local_struc_old_num[idx[i]];
382:           for (j=0; j<lu->bs; j++){
383:             itmp[next++] = iold++; /* sorted local_cols_old_num */
384:           }
385:         }
386:         for (i=0; i< lu->num_local_strucs; i++) {
387:           iidx[idx[i]] = i;       /* inverse of idx */
388:         }
389:       } /* end of (lu->dsc_id == -1) */
390:       ISCreateGeneral(PETSC_COMM_SELF,lu->num_local_cols,itmp,&my_cols_sorted);
391:       MatGetSubMatrices(A,1,&my_cols_sorted,&lu->iden,MAT_INITIAL_MATRIX,&tseq);
392:       ISDestroy(my_cols_sorted);
393:       A_seq = tseq[0];
394: 
395:       if (lu->dsc_id != -1) {
396:         DSC_ReFactorInitialize(lu->My_DSC_Solver);

398:         a_seq = (Mat_SeqBAIJ*)A_seq->data;
399:         BAIJtoMyANonz(a_seq->i, a_seq->j, lu->bs, a_seq->a,
400:                        lu->num_local_strucs, lu->num_local_nonz,
401:                        lu->global_struc_new_col_num,
402:                        lu->local_struc_old_num,
403:                        iidx,
404:                        &my_a_nonz);
405:         if (ierr <0) {
406:           DSC_ErrorDisplay(lu->My_DSC_Solver);
407:           SETERRQ1(PETSC_ERR_LIB,"Error setting local nonzeroes at processor %d \n", lu->dsc_id);
408:         }
409:         PetscFree(idx);
410:         PetscFree(itmp);
411:       } /* end of if(lu->dsc_id != -1)  */
412:     } else { /* size == 1 */
413:       a_seq = (Mat_SeqBAIJ*)A->data;
414: 
415:       BAIJtoMyANonz(a_seq->i, a_seq->j, lu->bs, a_seq->a,
416:                        lu->num_local_strucs, lu->num_local_nonz,
417:                        lu->global_struc_new_col_num,
418:                        lu->local_struc_old_num,
419:                        PETSC_NULL,
420:                        &my_a_nonz);
421:       if (ierr <0) {
422:         DSC_ErrorDisplay(lu->My_DSC_Solver);
423:         SETERRQ1(PETSC_ERR_LIB,"Error setting local nonzeroes at processor %d \n", lu->dsc_id);
424:       }
425:     }
426:     if ( size>1 ) {MatDestroyMatrices(1,&tseq); }
427:   }
428: 
429:   if (lu->dsc_id != -1) {
430:     DSC_NFactor(lu->My_DSC_Solver, lu->scheme_code, my_a_nonz, lu->factor_type, lu->LBLASLevel, lu->DBLASLevel);
431:     PetscFree(my_a_nonz);
432:   }
433: 
434:   if (size > 1) {
435:     F_diag = ((Mat_MPIBAIJ *)(*F)->data)->A;
436:     F_diag->assembled = PETSC_TRUE;
437:   }
438:   (*F)->assembled   = PETSC_TRUE;
439:   lu->flg           = SAME_NONZERO_PATTERN;

441:   return(0);
442: }

444: /* Note the Petsc permutation r is ignored */
447: PetscErrorCode MatCholeskyFactorSymbolic_DSCPACK(Mat A,IS r,MatFactorInfo *info,Mat *F) {
448:   Mat        B;
449:   Mat_DSC    *lu;
451:   PetscInt bs,indx;
452:   PetscTruth flg;
453:   const char *ftype[]={"LDLT","LLT"},*ltype[]={"LBLAS1","LBLAS2","LBLAS3"},*dtype[]={"DBLAS1","DBLAS2"};


457:   /* Create the factorization matrix F */
458:   MatGetBlockSize(A,&bs);
459:   MatCreate(A->comm,&B);
460:   MatSetSizes(B,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);
461:   MatSetType(B,A->type_name);
462:   MatSeqBAIJSetPreallocation(B,bs,0,PETSC_NULL);
463:   MatMPIBAIJSetPreallocation(B,bs,0,PETSC_NULL,0,PETSC_NULL);
464: 
465:   lu = (Mat_DSC*)B->spptr;
466:   B->rmap.bs = bs;

468:   B->ops->choleskyfactornumeric  = MatCholeskyFactorNumeric_DSCPACK;
469:   B->ops->solve                  = MatSolve_DSCPACK;
470:   B->factor                      = FACTOR_CHOLESKY;

472:   /* Set the default input options */
473:   lu->order_code  = 2;
474:   lu->scheme_code = 1;
475:   lu->factor_type = 2;
476:   lu->stat        = 0; /* do not display stats */
477:   lu->LBLASLevel  = DSC_LBLAS3;
478:   lu->DBLASLevel  = DSC_DBLAS2;
479:   lu->max_mem_allowed = 256;
480:   MPI_Comm_dup(A->comm,&(lu->comm_dsc));
481:   /* Get the runtime input options */
482:   PetscOptionsBegin(A->comm,A->prefix,"DSCPACK Options","Mat");

484:   PetscOptionsInt("-mat_dscpack_order","order_code: \n\
485:          1 = ND, 2 = Hybrid with Minimum Degree, 3 = Hybrid with Minimum Deficiency", \
486:          "None",
487:          lu->order_code,&lu->order_code,PETSC_NULL);

489:   PetscOptionsInt("-mat_dscpack_scheme","scheme_code: \n\
490:          1 = standard factorization,  2 = factorization + selective inversion", \
491:          "None",
492:          lu->scheme_code,&lu->scheme_code,PETSC_NULL);
493: 
494:   PetscOptionsEList("-mat_dscpack_factor","factor_type","None",ftype,2,ftype[0],&indx,&flg);
495:   if (flg) {
496:     switch (indx) {
497:     case 0:
498:       lu->factor_type = DSC_LDLT;
499:       break;
500:     case 1:
501:       lu->factor_type = DSC_LLT;
502:       break;
503:     }
504:   }
505:   PetscOptionsInt("-mat_dscpack_MaxMemAllowed","in Mbytes","None",
506:          lu->max_mem_allowed,&lu->max_mem_allowed,PETSC_NULL);

508:   PetscOptionsInt("-mat_dscpack_stats","display stats: 0 = no display,  1 = display",
509:          "None", lu->stat,&lu->stat,PETSC_NULL);
510: 
511:   PetscOptionsEList("-mat_dscpack_LBLAS","BLAS level used in the local phase","None",ltype,3,ltype[2],&indx,&flg);
512:   if (flg) {
513:     switch (indx) {
514:     case 0:
515:       lu->LBLASLevel = DSC_LBLAS1;
516:       break;
517:     case 1:
518:       lu->LBLASLevel = DSC_LBLAS2;
519:       break;
520:     case 2:
521:       lu->LBLASLevel = DSC_LBLAS3;
522:       break;
523:     }
524:   }

526:   PetscOptionsEList("-mat_dscpack_DBLAS","BLAS level used in the distributed phase","None",dtype,2,dtype[1],&indx,&flg);
527:   if (flg) {
528:     switch (indx) {
529:     case 0:
530:       lu->DBLASLevel = DSC_DBLAS1;
531:       break;
532:     case 1:
533:       lu->DBLASLevel = DSC_DBLAS2;
534:       break;
535:     }
536:   }

538:   PetscOptionsEnd();
539: 
540:   lu->flg = DIFFERENT_NONZERO_PATTERN;

542:   lu->My_DSC_Solver = DSC_Begin();
543:   lu->CleanUpDSCPACK = PETSC_TRUE;
544:   *F = B;
545:   return(0);
546: }

550: PetscErrorCode MatAssemblyEnd_DSCPACK(Mat A,MatAssemblyType mode) {
552:   Mat_DSC *lu=(Mat_DSC*)A->spptr;

555:   (*lu->MatAssemblyEnd)(A,mode);
556:   lu->MatCholeskyFactorSymbolic  = A->ops->choleskyfactorsymbolic;
557:   A->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_DSCPACK;
558:   return(0);
559: }

563: PetscErrorCode MatFactorInfo_DSCPACK(Mat A,PetscViewer viewer)
564: {
565:   Mat_DSC *lu=(Mat_DSC*)A->spptr;
567:   const char    *s=0;
568: 
570:   PetscViewerASCIIPrintf(viewer,"DSCPACK run parameters:\n");

572:   switch (lu->order_code) {
573:   case 1: s = "ND"; break;
574:   case 2: s = "Hybrid with Minimum Degree"; break;
575:   case 3: s = "Hybrid with Minimum Deficiency"; break;
576:   }
577:   PetscViewerASCIIPrintf(viewer,"  order_code: %s \n",s);

579:   switch (lu->scheme_code) {
580:   case 1: s = "standard factorization"; break;
581:   case 2: s = "factorization + selective inversion"; break;
582:   }
583:   PetscViewerASCIIPrintf(viewer,"  scheme_code: %s \n",s);

585:   switch (lu->stat) {
586:   case 0: s = "NO"; break;
587:   case 1: s = "YES"; break;
588:   }
589:   PetscViewerASCIIPrintf(viewer,"  display stats: %s \n",s);
590: 
591:   if ( lu->factor_type == DSC_LLT) {
592:     s = "LLT";
593:   } else if ( lu->factor_type == DSC_LDLT){
594:     s = "LDLT";
595:   } else if (lu->factor_type == 0) {
596:     s = "None";
597:   } else {
598:     SETERRQ(PETSC_ERR_PLIB,"Unknown factor type");
599:   }
600:   PetscViewerASCIIPrintf(viewer,"  factor type: %s \n",s);

602:   if ( lu->LBLASLevel == DSC_LBLAS1) {
603:     s = "BLAS1";
604:   } else if ( lu->LBLASLevel == DSC_LBLAS2){
605:     s = "BLAS2";
606:   } else if ( lu->LBLASLevel == DSC_LBLAS3){
607:     s = "BLAS3";
608:   } else if (lu->LBLASLevel == 0) {
609:     s = "None";
610:   } else {
611:     SETERRQ(PETSC_ERR_PLIB,"Unknown local phase BLAS level");
612:   }
613:   PetscViewerASCIIPrintf(viewer,"  local phase BLAS level: %s \n",s);
614: 
615:   if ( lu->DBLASLevel == DSC_DBLAS1) {
616:     s = "BLAS1";
617:   } else if ( lu->DBLASLevel == DSC_DBLAS2){
618:     s = "BLAS2";
619:   } else if (lu->DBLASLevel == 0) {
620:     s = "None";
621:   } else {
622:     SETERRQ(PETSC_ERR_PLIB,"Unknown distributed phase BLAS level");
623:   }
624:   PetscViewerASCIIPrintf(viewer,"  distributed phase BLAS level: %s \n",s);
625:   return(0);
626: }

628: EXTERN PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer);
629: EXTERN PetscErrorCode MatView_MPIBAIJ(Mat,PetscViewer);


634: PetscErrorCode MatView_DSCPACK(Mat A,PetscViewer viewer) {
635:   PetscErrorCode    ierr;
636:   PetscMPIInt       size;
637:   PetscTruth        iascii;
638:   PetscViewerFormat format;
639:   Mat_DSC           *lu=(Mat_DSC*)A->spptr;

642:   /* Cannot view factored matrix */
643:   if (A->factor) {
644:     return(0);
645:   }
646:   /* This convertion ugliness is because MatView for BAIJ types calls MatConvert to AIJ */
647:   size = lu->size;
648:   if (size==1) {
649:     MatView_SeqBAIJ(A,viewer);
650:   } else {
651:     MatView_MPIBAIJ(A,viewer);
652:   }

654:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
655:   if (iascii) {
656:     PetscViewerGetFormat(viewer,&format);
657:     if (format == PETSC_VIEWER_ASCII_INFO) {
658:       MatFactorInfo_DSCPACK(A,viewer);
659:     }
660:   }
661:   return(0);
662: }

667: PetscErrorCode  MatMPIBAIJSetPreallocation_MPIDSCPACK(Mat  B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
668: {
669:   Mat     A;
670:   Mat_DSC *lu = (Mat_DSC*)B->spptr;

674:   /*
675:     After performing the MPIBAIJ Preallocation, we need to convert the local diagonal block matrix
676:     into DSCPACK type so that the block jacobi preconditioner (for example) can use DSCPACK.  I would
677:     like this to be done in the MatCreate routine, but the creation of this inner matrix requires
678:     block size info so that PETSc can determine the local size properly.  The block size info is set
679:     in the preallocation routine.
680:   */
681:   (*lu->MatPreallocate)(B,bs,d_nz,d_nnz,o_nz,o_nnz);
682:   A    = ((Mat_MPIBAIJ *)B->data)->A;
683:   MatConvert_BAIJ_DSCPACK(A,MATDSCPACK,MAT_REUSE_MATRIX,&A);
684:   return(0);
685: }

691: PetscErrorCode  MatConvert_BAIJ_DSCPACK(Mat A,const MatType type,MatReuse reuse,Mat *newmat)
692: {
693:   /* This routine is only called to convert to MATDSCPACK */
694:   /* from MATSEQBAIJ if A has a single process communicator */
695:   /* or MATMPIBAIJ otherwise, so we will ignore 'MatType type'. */
697:   MPI_Comm       comm;
698:   Mat            B=*newmat;
699:   Mat_DSC        *lu;
700:   void           (*f)(void);

703:   if (reuse == MAT_INITIAL_MATRIX) {
704:     MatDuplicate(A,MAT_COPY_VALUES,&B);
705:   }

707:   PetscObjectGetComm((PetscObject)A,&comm);
708:   PetscNew(Mat_DSC,&lu);

710:   lu->MatDuplicate               = A->ops->duplicate;
711:   lu->MatView                    = A->ops->view;
712:   lu->MatAssemblyEnd             = A->ops->assemblyend;
713:   lu->MatCholeskyFactorSymbolic  = A->ops->choleskyfactorsymbolic;
714:   lu->MatDestroy                 = A->ops->destroy;
715:   lu->CleanUpDSCPACK             = PETSC_FALSE;
716:   lu->bs                         = A->rmap.bs;
717:   lu->factor_type                = 0;
718:   lu->LBLASLevel                 = 0;
719:   lu->DBLASLevel                 = 0;

721:   B->spptr                       = (void*)lu;
722:   B->ops->duplicate              = MatDuplicate_DSCPACK;
723:   B->ops->view                   = MatView_DSCPACK;
724:   B->ops->assemblyend            = MatAssemblyEnd_DSCPACK;
725:   B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_DSCPACK;
726:   B->ops->destroy                = MatDestroy_DSCPACK;

728:   MPI_Comm_size(comm,&(lu->size));
729:   if (lu->size == 1) {
730:     PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_dscpack_C",
731:                                              "MatConvert_BAIJ_DSCPACK",MatConvert_BAIJ_DSCPACK);
732:     PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_dscpack_seqbaij_C",
733:                                              "MatConvert_DSCPACK_BAIJ",MatConvert_DSCPACK_BAIJ);
734:   } else {
735:       /* I really don't like needing to know the tag: MatMPIBAIJSetPreallocation_C */
736:     PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",&f);
737:     if (f) {
738:       lu->MatPreallocate = (PetscErrorCode (*)(Mat,PetscInt,PetscInt,PetscInt*,PetscInt,PetscInt*))f;
739:       PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
740:                                                "MatMPIBAIJSetPreallocation_MPIDSCPACK",
741:                                                MatMPIBAIJSetPreallocation_MPIDSCPACK);
742:     }
743:     PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_dscpack_C",
744:                                              "MatConvert_BAIJ_DSCPACK",MatConvert_BAIJ_DSCPACK);
745:     PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_dscpack_mpibaij_C",
746:                                              "MatConvert_DSCPACK_BAIJ",MatConvert_DSCPACK_BAIJ);
747:   }
748:   PetscObjectChangeTypeName((PetscObject)B,MATDSCPACK);
749:   *newmat = B;
750:   return(0);
751: }

756: PetscErrorCode MatDuplicate_DSCPACK(Mat A, MatDuplicateOption op, Mat *M) {
758:   Mat_DSC *lu=(Mat_DSC *)A->spptr;

761:   (*lu->MatDuplicate)(A,op,M);
762:   PetscMemcpy((*M)->spptr,lu,sizeof(Mat_DSC));
763:   return(0);
764: }

766: /*MC
767:   MATDSCPACK - MATDSCPACK = "dscpack" - A matrix type providing direct solvers (Cholesky) for sequential 
768:   or distributed matrices via the external package DSCPACK.

770:   If DSCPACK is installed (see the manual for
771:   instructions on how to declare the existence of external packages),
772:   a matrix type can be constructed which invokes DSCPACK solvers.
773:   After calling MatCreate(...,A), simply call MatSetType(A,MATDSCPACK).
774:   This matrix type is only supported for double precision real.

776:   This matrix inherits from MATSEQBAIJ if constructed with a single process communicator,
777:   and from MATMPIBAIJ otherwise.  As a result, for sequential matrices, MatSeqBAIJSetPreallocation is 
778:   supported, and similarly MatMPIBAIJSetPreallocation is supported for distributed matrices.  It is 
779:   recommended that you call both of the above preallocation routines for simplicity.  Also,
780:   MatConvert can be called to perform inplace conversion to and from MATSEQBAIJ or MATMPIBAIJ
781:   for sequential or distributed matrices respectively.

783:   Options Database Keys:
784: + -mat_type dscpack - sets the matrix type to dscpack during a call to MatSetFromOptions()
785: . -mat_dscpack_order <1,2,3> - DSCPACK ordering, 1:ND, 2:Hybrid with Minimum Degree, 3:Hybrid with Minimum Deficiency
786: . -mat_dscpack_scheme <1,2> - factorization scheme, 1:standard factorization,  2: factorization with selective inversion
787: . -mat_dscpack_factor <LLT,LDLT> - the type of factorization to be performed.
788: . -mat_dscpack_MaxMemAllowed <n> - the maximum memory to be used during factorization
789: . -mat_dscpack_stats <0,1> - display stats of the factorization and solves during MatDestroy(), 0: no display,  1: display
790: . -mat_dscpack_LBLAS <LBLAS1,LBLAS2,LBLAS3> - BLAS level used in the local phase
791: - -mat_dscpack_DBLAS <DBLAS1,DBLAS2> - BLAS level used in the distributed phase

793:    Level: beginner

795: .seealso: PCCHOLESKY
796: M*/

801: PetscErrorCode  MatCreate_DSCPACK(Mat A)
802: {
804:   PetscMPIInt    size;

807:   MPI_Comm_size(A->comm,&size);
808:   if (size == 1) {
809:     MatSetType(A,MATSEQBAIJ);
810:   } else {
811:     MatSetType(A,MATMPIBAIJ);
812:   }
813:   MatConvert_BAIJ_DSCPACK(A,MATDSCPACK,MAT_REUSE_MATRIX,&A);
814:   return(0);
815: }