Actual source code: mpispooles.c

  1: #define PETSCMAT_DLL

  3: /* 
  4:    Provides an interface to the Spooles parallel sparse solver (MPI SPOOLES)
  5: */

 7:  #include src/mat/impls/aij/seq/aij.h
 8:  #include src/mat/impls/sbaij/seq/sbaij.h
 9:  #include src/mat/impls/baij/seq/baij.h
 10:  #include src/mat/impls/aij/mpi/mpiaij.h
 11:  #include src/mat/impls/sbaij/mpi/mpisbaij.h
 12:  #include src/mat/impls/aij/seq/spooles/spooles.h

 14: EXTERN int SetSpoolesOptions(Mat, Spooles_options *);

 18: PetscErrorCode MatDestroy_MPIAIJSpooles(Mat A)
 19: {
 20:   Mat_Spooles   *lu = (Mat_Spooles*)A->spptr;
 22: 
 24:   if (lu->CleanUpSpooles) {
 25:     FrontMtx_free(lu->frontmtx);
 26:     IV_free(lu->newToOldIV);
 27:     IV_free(lu->oldToNewIV);
 28:     IV_free(lu->vtxmapIV);
 29:     InpMtx_free(lu->mtxA);
 30:     ETree_free(lu->frontETree);
 31:     IVL_free(lu->symbfacIVL);
 32:     SubMtxManager_free(lu->mtxmanager);
 33:     DenseMtx_free(lu->mtxX);
 34:     DenseMtx_free(lu->mtxY);
 35:     MPI_Comm_free(&(lu->comm_spooles));
 36:     if ( lu->scat ){
 37:       VecDestroy(lu->vec_spooles);
 38:       ISDestroy(lu->iden);
 39:       ISDestroy(lu->is_petsc);
 40:       VecScatterDestroy(lu->scat);
 41:     }
 42:   }
 43:   MatConvert_Spooles_Base(A,lu->basetype,MAT_REUSE_MATRIX,&A);
 44:   (*A->ops->destroy)(A);

 46:   return(0);
 47: }

 51: PetscErrorCode MatSolve_MPIAIJSpooles(Mat A,Vec b,Vec x)
 52: {
 53:   Mat_Spooles   *lu = (Mat_Spooles*)A->spptr;
 55:   int           size,rank,m=A->rmap.n,irow,*rowindY;
 56:   PetscScalar   *array;
 57:   DenseMtx      *newY ;
 58:   SubMtxManager *solvemanager ;
 59: #if defined(PETSC_USE_COMPLEX)
 60:   double x_real,x_imag;
 61: #endif

 64:   MPI_Comm_size(A->comm,&size);
 65:   MPI_Comm_rank(A->comm,&rank);
 66: 
 67:   /* copy b into spooles' rhs mtxY */
 68:   DenseMtx_init(lu->mtxY, lu->options.typeflag, 0, 0, m, 1, 1, m);
 69:   VecGetArray(b,&array);

 71:   DenseMtx_rowIndices(lu->mtxY, &m, &rowindY);  /* get m, rowind */
 72:   for ( irow = 0 ; irow < m ; irow++ ) {
 73:     rowindY[irow] = irow + lu->rstart;           /* global rowind */
 74: #if !defined(PETSC_USE_COMPLEX)
 75:     DenseMtx_setRealEntry(lu->mtxY, irow, 0, *array++);
 76: #else
 77:     DenseMtx_setComplexEntry(lu->mtxY,irow,0,PetscRealPart(*array),PetscImaginaryPart(*array));
 78:     array++;
 79: #endif
 80:   }
 81:   VecRestoreArray(b,&array);
 82: 
 83:   if ( lu->options.msglvl > 2 ) {
 84:     PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n 1 matrix in original ordering");
 85:     DenseMtx_writeForHumanEye(lu->mtxY, lu->options.msgFile);
 86:     fflush(lu->options.msgFile);
 87:   }
 88: 
 89:   /* permute and redistribute Y if necessary */
 90:   DenseMtx_permuteRows(lu->mtxY, lu->oldToNewIV);
 91:   if ( lu->options.msglvl > 2 ) {
 92:     PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n rhs matrix in new ordering");
 93:     DenseMtx_writeForHumanEye(lu->mtxY, lu->options.msgFile);
 94:    fflush(lu->options.msgFile);
 95:   }

 97:   MPI_Barrier(A->comm); /* for initializing firsttag, because the num. of tags used
 98:                                    by FrontMtx_MPI_split() is unknown */
 99:   lu->firsttag = 0;
100:   newY = DenseMtx_MPI_splitByRows(lu->mtxY, lu->vtxmapIV, lu->stats, lu->options.msglvl,
101:                                 lu->options.msgFile, lu->firsttag, lu->comm_spooles);
102:   DenseMtx_free(lu->mtxY);
103:   lu->mtxY = newY ;
104:   lu->firsttag += size ;
105:   if ( lu->options.msglvl > 2 ) {
106:     PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n split DenseMtx Y");
107:     DenseMtx_writeForHumanEye(lu->mtxY, lu->options.msgFile);
108:     fflush(lu->options.msgFile);
109:   }

111:   if ( FRONTMTX_IS_PIVOTING(lu->frontmtx) ) {
112:     /*   pivoting has taken place, redistribute the right hand side
113:          to match the final rows and columns in the fronts             */
114:     IV *rowmapIV ;
115:     rowmapIV = FrontMtx_MPI_rowmapIV(lu->frontmtx, lu->ownersIV, lu->options.msglvl,
116:                                     lu->options.msgFile, lu->comm_spooles);
117:     newY = DenseMtx_MPI_splitByRows(lu->mtxY, rowmapIV, lu->stats, lu->options.msglvl,
118:                                    lu->options.msgFile, lu->firsttag, lu->comm_spooles);
119:     DenseMtx_free(lu->mtxY);
120:     lu->mtxY = newY ;
121:     IV_free(rowmapIV);
122:     lu->firsttag += size;
123:   }
124:   if ( lu->options.msglvl > 2 ) {
125:     PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n rhs matrix after split");
126:     DenseMtx_writeForHumanEye(lu->mtxY, lu->options.msgFile);
127:     fflush(lu->options.msgFile);
128:   }

130:   if ( lu->nmycol > 0 ) IVcopy(lu->nmycol,lu->rowindX,IV_entries(lu->ownedColumnsIV)); /* must do for each solve */
131: 
132:   /* solve the linear system */
133:   solvemanager = SubMtxManager_new();
134:   SubMtxManager_init(solvemanager, NO_LOCK, 0);
135:   FrontMtx_MPI_solve(lu->frontmtx, lu->mtxX, lu->mtxY, solvemanager, lu->solvemap, lu->cpus,
136:                    lu->stats, lu->options.msglvl, lu->options.msgFile, lu->firsttag, lu->comm_spooles);
137:   SubMtxManager_free(solvemanager);
138:   if ( lu->options.msglvl > 2 ) {
139:     PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n solution in new ordering");
140:     DenseMtx_writeForHumanEye(lu->mtxX, lu->options.msgFile);
141:   }

143:   /* permute the solution into the original ordering */
144:   DenseMtx_permuteRows(lu->mtxX, lu->newToOldIV);
145:   if ( lu->options.msglvl > 2 ) {
146:     PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n solution in old ordering");
147:     DenseMtx_writeForHumanEye(lu->mtxX, lu->options.msgFile);
148:     fflush(lu->options.msgFile);
149:   }
150: 
151:   /* scatter local solution mtxX into mpi vector x */
152:   if( !lu->scat ){ /* create followings once for each numfactorization */
153:     /* vec_spooles <- mtxX */
154: #if !defined(PETSC_USE_COMPLEX) 
155:     VecCreateSeqWithArray(PETSC_COMM_SELF,lu->nmycol,lu->entX,&lu->vec_spooles);
156: #else    
157:     VecCreateSeq(PETSC_COMM_SELF,lu->nmycol,&lu->vec_spooles);
158: #endif 
159:     ISCreateStride(PETSC_COMM_SELF,lu->nmycol,0,1,&lu->iden);
160:     ISCreateGeneral(PETSC_COMM_SELF,lu->nmycol,lu->rowindX,&lu->is_petsc);
161:     VecScatterCreate(lu->vec_spooles,lu->iden,x,lu->is_petsc,&lu->scat);
162:   }
163: #if defined(PETSC_USE_COMPLEX)
164:     VecGetArray(lu->vec_spooles,&array);
165:     for (irow = 0; irow < lu->nmycol; irow++){
166:       DenseMtx_complexEntry(lu->mtxX,irow,0,&x_real,&x_imag);
167:       array[irow] = x_real+x_imag*PETSC_i;
168:     }
169:     VecRestoreArray(lu->vec_spooles,&array);
170: #endif 
171:   VecScatterBegin(lu->vec_spooles,x,INSERT_VALUES,SCATTER_FORWARD,lu->scat);
172:   VecScatterEnd(lu->vec_spooles,x,INSERT_VALUES,SCATTER_FORWARD,lu->scat);
173:   return(0);
174: }

178: PetscErrorCode MatFactorNumeric_MPIAIJSpooles(Mat A,MatFactorInfo *info,Mat *F)
179: {
180:   Mat_Spooles     *lu = (Mat_Spooles*)(*F)->spptr;
181:   PetscErrorCode  ierr;
182:   int             rank,size,lookahead=0,sierr;
183:   ChvManager      *chvmanager ;
184:   Chv             *rootchv ;
185:   Graph           *graph ;
186:   IVL             *adjIVL;
187:   DV              *cumopsDV ;
188:   double          droptol=0.0,*opcounts,minops,cutoff;
189: #if !defined(PETSC_USE_COMPLEX)
190:   double          *val;
191: #endif
192:   InpMtx          *newA ;
193:   PetscScalar     *av, *bv;
194:   PetscInt        *ai, *aj, *bi,*bj, nz, *ajj, *bjj, *garray,
195:                   i,j,irow,jcol,countA,countB,jB,*row,*col,colA_start,jj;
196:   PetscInt        M=A->rmap.N,m=A->rmap.n,root,nedges,tagbound,lasttag;
197:   Mat             F_diag;
198: 
200:   MPI_Comm_size(A->comm,&size);
201:   MPI_Comm_rank(A->comm,&rank);

203:   if (lu->flg == DIFFERENT_NONZERO_PATTERN) { /* first numeric factorization */
204:     /* get input parameters */
205:     SetSpoolesOptions(A, &lu->options);

207:     (*F)->ops->solve   = MatSolve_MPIAIJSpooles;
208:     (*F)->ops->destroy = MatDestroy_MPIAIJSpooles;
209:     (*F)->assembled    = PETSC_TRUE;
210:     if ((*F)->factor == FACTOR_LU){
211:       F_diag = ((Mat_MPIAIJ *)(*F)->data)->A;
212:     } else {
213:       F_diag = ((Mat_MPISBAIJ *)(*F)->data)->A;
214:     }
215:     F_diag->assembled  = PETSC_TRUE;

217:     /* to be used by MatSolve() */
218:     lu->mtxY = DenseMtx_new();
219:     lu->mtxX = DenseMtx_new();
220:     lu->scat = PETSC_NULL;

222:     IVzero(20, lu->stats);
223:     DVzero(20, lu->cpus);

225:     lu->mtxA = InpMtx_new();
226:   }
227: 
228:   /* copy A to Spooles' InpMtx object */
229:   if ( lu->options.symflag == SPOOLES_NONSYMMETRIC ) {
230:     Mat_MPIAIJ  *mat =  (Mat_MPIAIJ*)A->data;
231:     Mat_SeqAIJ  *aa=(Mat_SeqAIJ*)(mat->A)->data;
232:     Mat_SeqAIJ  *bb=(Mat_SeqAIJ*)(mat->B)->data;
233:     ai=aa->i; aj=aa->j; av=aa->a;
234:     bi=bb->i; bj=bb->j; bv=bb->a;
235:     lu->rstart = A->rmap.rstart;
236:     nz         = aa->nz + bb->nz;
237:     garray     = mat->garray;
238:   } else {         /* SPOOLES_SYMMETRIC  */
239:     Mat_MPISBAIJ  *mat = (Mat_MPISBAIJ*)A->data;
240:     Mat_SeqSBAIJ  *aa=(Mat_SeqSBAIJ*)(mat->A)->data;
241:     Mat_SeqBAIJ    *bb=(Mat_SeqBAIJ*)(mat->B)->data;
242:     ai=aa->i; aj=aa->j; av=aa->a;
243:     bi=bb->i; bj=bb->j; bv=bb->a;
244:     lu->rstart = A->rmap.rstart;
245:     nz         = aa->nz + bb->nz;
246:     garray     = mat->garray;
247:   }
248: 
249:   InpMtx_init(lu->mtxA, INPMTX_BY_ROWS, lu->options.typeflag, nz, 0);
250:   row   = InpMtx_ivec1(lu->mtxA);
251:   col   = InpMtx_ivec2(lu->mtxA);
252: #if !defined(PETSC_USE_COMPLEX)
253:   val   = InpMtx_dvec(lu->mtxA);
254: #endif

256:   jj = 0; irow = lu->rstart;
257:   for ( i=0; i<m; i++ ) {
258:     ajj = aj + ai[i];                 /* ptr to the beginning of this row */
259:     countA = ai[i+1] - ai[i];
260:     countB = bi[i+1] - bi[i];
261:     bjj = bj + bi[i];
262:     jB = 0;
263: 
264:     if (lu->options.symflag == SPOOLES_NONSYMMETRIC ){
265:       /* B part, smaller col index */
266:       colA_start = lu->rstart + ajj[0]; /* the smallest col index for A */
267:       for (j=0; j<countB; j++){
268:         jcol = garray[bjj[j]];
269:         if (jcol > colA_start) {
270:           jB = j;
271:           break;
272:         }
273:         row[jj] = irow; col[jj] = jcol;
274: #if !defined(PETSC_USE_COMPLEX)
275:         val[jj++] = *bv++;
276: #else
277:         InpMtx_inputComplexEntry(lu->mtxA,irow,jcol,PetscRealPart(*bv),PetscImaginaryPart(*bv));
278:         bv++; jj++;
279: #endif
280:         if (j==countB-1) jB = countB;
281:       }
282:     }
283:     /* A part */
284:     for (j=0; j<countA; j++){
285:       row[jj] = irow; col[jj] = lu->rstart + ajj[j];
286: #if !defined(PETSC_USE_COMPLEX)
287:       val[jj++] = *av++;
288: #else
289:       InpMtx_inputComplexEntry(lu->mtxA,irow,col[jj],PetscRealPart(*av),PetscImaginaryPart(*av));
290:       av++; jj++;
291: #endif
292:     }
293:     /* B part, larger col index */
294:     for (j=jB; j<countB; j++){
295:       row[jj] = irow; col[jj] = garray[bjj[j]];
296: #if !defined(PETSC_USE_COMPLEX)
297:       val[jj++] = *bv++;
298: #else
299:      InpMtx_inputComplexEntry(lu->mtxA,irow,col[jj],PetscRealPart(*bv),PetscImaginaryPart(*bv));
300:      bv++; jj++;
301: #endif
302:     }
303:     irow++;
304:   }
305: #if !defined(PETSC_USE_COMPLEX)
306:   InpMtx_inputRealTriples(lu->mtxA, nz, row, col, val);
307: #endif
308:   InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS);
309:   if ( lu->options.msglvl > 0 ) {
310:     printf("[%d] input matrix\n",rank);
311:     PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n [%d] input matrix\n",rank);
312:     InpMtx_writeForHumanEye(lu->mtxA, lu->options.msgFile);
313:     fflush(lu->options.msgFile);
314:   }

316:   if ( lu->flg == DIFFERENT_NONZERO_PATTERN){ /* first numeric factorization */
317:     /*
318:       find a low-fill ordering
319:       (1) create the Graph object
320:       (2) order the graph using multiple minimum degree
321:       (3) find out who has the best ordering w.r.t. op count,
322:           and broadcast that front tree object
323:     */
324:     graph = Graph_new();
325:     adjIVL = InpMtx_MPI_fullAdjacency(lu->mtxA, lu->stats,
326:               lu->options.msglvl, lu->options.msgFile, lu->comm_spooles);
327:     nedges = IVL_tsize(adjIVL);
328:     Graph_init2(graph, 0, M, 0, nedges, M, nedges, adjIVL, NULL, NULL);
329:     if ( lu->options.msglvl > 2 ) {
330:       PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n graph of the input matrix");
331:       Graph_writeForHumanEye(graph, lu->options.msgFile);
332:       fflush(lu->options.msgFile);
333:     }

335:     switch (lu->options.ordering) {
336:     case 0:
337:       lu->frontETree = orderViaBestOfNDandMS(graph,
338:                      lu->options.maxdomainsize, lu->options.maxzeros, lu->options.maxsize,
339:                      lu->options.seed + rank, lu->options.msglvl, lu->options.msgFile); break;
340:     case 1:
341:       lu->frontETree = orderViaMMD(graph,lu->options.seed + rank,lu->options.msglvl,lu->options.msgFile); break;
342:     case 2:
343:       lu->frontETree = orderViaMS(graph, lu->options.maxdomainsize,
344:                      lu->options.seed + rank,lu->options.msglvl,lu->options.msgFile); break;
345:     case 3:
346:       lu->frontETree = orderViaND(graph, lu->options.maxdomainsize,
347:                      lu->options.seed + rank,lu->options.msglvl,lu->options.msgFile); break;
348:     default:
349:       SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown Spooles's ordering");
350:     }

352:     Graph_free(graph);
353:     if ( lu->options.msglvl > 2 ) {
354:       PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n front tree from ordering");
355:       ETree_writeForHumanEye(lu->frontETree, lu->options.msgFile);
356:       fflush(lu->options.msgFile);
357:     }

359:     opcounts = DVinit(size, 0.0);
360:     opcounts[rank] = ETree_nFactorOps(lu->frontETree, lu->options.typeflag, lu->options.symflag);
361:     MPI_Allgather((void*) &opcounts[rank], 1, MPI_DOUBLE,
362:               (void*) opcounts, 1, MPI_DOUBLE, A->comm);
363:     minops = DVmin(size, opcounts, &root);
364:     DVfree(opcounts);
365: 
366:     lu->frontETree = ETree_MPI_Bcast(lu->frontETree, root,
367:                              lu->options.msglvl, lu->options.msgFile, lu->comm_spooles);
368:     if ( lu->options.msglvl > 2 ) {
369:       PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n best front tree");
370:       ETree_writeForHumanEye(lu->frontETree, lu->options.msgFile);
371:       fflush(lu->options.msgFile);
372:     }
373: 
374:     /* get the permutations, permute the front tree, permute the matrix */
375:     lu->oldToNewIV = ETree_oldToNewVtxPerm(lu->frontETree);
376:     lu->newToOldIV = ETree_newToOldVtxPerm(lu->frontETree);

378:     ETree_permuteVertices(lu->frontETree, lu->oldToNewIV);

380:     InpMtx_permute(lu->mtxA, IV_entries(lu->oldToNewIV), IV_entries(lu->oldToNewIV));
381: 
382:     if (  lu->options.symflag == SPOOLES_SYMMETRIC ) InpMtx_mapToUpperTriangle(lu->mtxA);

384:     InpMtx_changeCoordType(lu->mtxA, INPMTX_BY_CHEVRONS);
385:     InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS);

387:     /* generate the owners map IV object and the map from vertices to owners */
388:     cutoff   = 1./(2*size);
389:     cumopsDV = DV_new();
390:     DV_init(cumopsDV, size, NULL);
391:     lu->ownersIV = ETree_ddMap(lu->frontETree,
392:                        lu->options.typeflag, lu->options.symflag, cumopsDV, cutoff);
393:     DV_free(cumopsDV);
394:     lu->vtxmapIV = IV_new();
395:     IV_init(lu->vtxmapIV, M, NULL);
396:     IVgather(M, IV_entries(lu->vtxmapIV),
397:              IV_entries(lu->ownersIV), ETree_vtxToFront(lu->frontETree));
398:     if ( lu->options.msglvl > 2 ) {
399:       PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n map from fronts to owning processes");
400:       IV_writeForHumanEye(lu->ownersIV, lu->options.msgFile);
401:       PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n map from vertices to owning processes");
402:       IV_writeForHumanEye(lu->vtxmapIV, lu->options.msgFile);
403:       fflush(lu->options.msgFile);
404:     }

406:     /* redistribute the matrix */
407:     lu->firsttag = 0 ;
408:     newA = InpMtx_MPI_split(lu->mtxA, lu->vtxmapIV, lu->stats,
409:                         lu->options.msglvl, lu->options.msgFile, lu->firsttag, lu->comm_spooles);
410:     lu->firsttag += size ;

412:     InpMtx_free(lu->mtxA);
413:     lu->mtxA = newA ;
414:     InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS);
415:     if ( lu->options.msglvl > 2 ) {
416:       PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n split InpMtx");
417:       InpMtx_writeForHumanEye(lu->mtxA, lu->options.msgFile);
418:       fflush(lu->options.msgFile);
419:     }
420: 
421:     /* compute the symbolic factorization */
422:     lu->symbfacIVL = SymbFac_MPI_initFromInpMtx(lu->frontETree, lu->ownersIV, lu->mtxA,
423:                      lu->stats, lu->options.msglvl, lu->options.msgFile, lu->firsttag, lu->comm_spooles);
424:     lu->firsttag += lu->frontETree->nfront ;
425:     if ( lu->options.msglvl > 2 ) {
426:       PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n local symbolic factorization");
427:       IVL_writeForHumanEye(lu->symbfacIVL, lu->options.msgFile);
428:       fflush(lu->options.msgFile);
429:     }

431:     lu->mtxmanager = SubMtxManager_new();
432:     SubMtxManager_init(lu->mtxmanager, NO_LOCK, 0);
433:     lu->frontmtx = FrontMtx_new();

435:   } else { /* new num factorization using previously computed symbolic factor */
436:     if (lu->options.pivotingflag) {                  /* different FrontMtx is required */
437:       FrontMtx_free(lu->frontmtx);
438:       lu->frontmtx   = FrontMtx_new();
439:     }

441:     SubMtxManager_free(lu->mtxmanager);
442:     lu->mtxmanager = SubMtxManager_new();
443:     SubMtxManager_init(lu->mtxmanager, NO_LOCK, 0);

445:     /* permute mtxA */
446:     InpMtx_permute(lu->mtxA, IV_entries(lu->oldToNewIV), IV_entries(lu->oldToNewIV));
447:     if ( lu->options.symflag == SPOOLES_SYMMETRIC ) InpMtx_mapToUpperTriangle(lu->mtxA);
448: 
449:     InpMtx_changeCoordType(lu->mtxA, INPMTX_BY_CHEVRONS);
450:     InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS);

452:     /* redistribute the matrix */
453:     MPI_Barrier(A->comm);
454:     lu->firsttag = 0;
455:     newA = InpMtx_MPI_split(lu->mtxA, lu->vtxmapIV, lu->stats,
456:                         lu->options.msglvl, lu->options.msgFile, lu->firsttag,lu->comm_spooles);
457:     lu->firsttag += size ;

459:     InpMtx_free(lu->mtxA);
460:     lu->mtxA = newA ;
461:     InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS);
462:     if ( lu->options.msglvl > 2 ) {
463:       PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n split InpMtx");
464:       InpMtx_writeForHumanEye(lu->mtxA, lu->options.msgFile);
465:       fflush(lu->options.msgFile);
466:     }
467:   } /* end of if ( lu->flg == DIFFERENT_NONZERO_PATTERN) */

469:   FrontMtx_init(lu->frontmtx, lu->frontETree, lu->symbfacIVL, lu->options.typeflag, lu->options.symflag,
470:               FRONTMTX_DENSE_FRONTS, lu->options.pivotingflag, NO_LOCK, rank,
471:               lu->ownersIV, lu->mtxmanager, lu->options.msglvl, lu->options.msgFile);

473:     if ( lu->options.symflag == SPOOLES_SYMMETRIC ) {
474:     if ( lu->options.patchAndGoFlag == 1 ) {
475:       lu->frontmtx->patchinfo = PatchAndGoInfo_new();
476:       PatchAndGoInfo_init(lu->frontmtx->patchinfo, 1, lu->options.toosmall, lu->options.fudge,
477:                        lu->options.storeids, lu->options.storevalues);
478:     } else if ( lu->options.patchAndGoFlag == 2 ) {
479:       lu->frontmtx->patchinfo = PatchAndGoInfo_new();
480:       PatchAndGoInfo_init(lu->frontmtx->patchinfo, 2, lu->options.toosmall, lu->options.fudge,
481:                        lu->options.storeids, lu->options.storevalues);
482:     }
483:   }

485:   /* numerical factorization */
486:   chvmanager = ChvManager_new();
487:   ChvManager_init(chvmanager, NO_LOCK, 0);

489:   tagbound = maxTagMPI(lu->comm_spooles);
490:   lasttag  = lu->firsttag + 3*lu->frontETree->nfront + 2;
491:   /* if(!rank) PetscPrintf(PETSC_COMM_SELF,"\n firsttag: %d, nfront: %d\n",lu->firsttag, lu->frontETree->nfront);*/
492:   if ( lasttag > tagbound ) {
493:       SETERRQ3(PETSC_ERR_LIB,"fatal error in FrontMtx_MPI_factorInpMtx(), tag range is [%d,%d], tag_bound = %d",\
494:                lu->firsttag, lasttag, tagbound);
495:   }
496:   rootchv = FrontMtx_MPI_factorInpMtx(lu->frontmtx, lu->mtxA, lu->options.tau, droptol,
497:                      chvmanager, lu->ownersIV, lookahead, &sierr, lu->cpus,
498:                      lu->stats, lu->options.msglvl, lu->options.msgFile, lu->firsttag,lu->comm_spooles);
499:   ChvManager_free(chvmanager);
500:   lu->firsttag = lasttag;
501:   if ( lu->options.msglvl > 2 ) {
502:     PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n numeric factorization");
503:     FrontMtx_writeForHumanEye(lu->frontmtx, lu->options.msgFile);
504:     fflush(lu->options.msgFile);
505:   }

507:   if ( lu->options.symflag == SPOOLES_SYMMETRIC ) {
508:     if ( lu->options.patchAndGoFlag == 1 ) {
509:       if ( lu->frontmtx->patchinfo->fudgeIV != NULL ) {
510:         if (lu->options.msglvl > 0 ){
511:           PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n small pivots found at these locations");
512:           IV_writeForHumanEye(lu->frontmtx->patchinfo->fudgeIV, lu->options.msgFile);
513:         }
514:       }
515:       PatchAndGoInfo_free(lu->frontmtx->patchinfo);
516:     } else if ( lu->options.patchAndGoFlag == 2 ) {
517:       if (lu->options.msglvl > 0 ){
518:         if ( lu->frontmtx->patchinfo->fudgeIV != NULL ) {
519:           PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n small pivots found at these locations");
520:           IV_writeForHumanEye(lu->frontmtx->patchinfo->fudgeIV, lu->options.msgFile);
521:         }
522:         if ( lu->frontmtx->patchinfo->fudgeDV != NULL ) {
523:           PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n perturbations");
524:           DV_writeForHumanEye(lu->frontmtx->patchinfo->fudgeDV, lu->options.msgFile);
525:         }
526:       }
527:       PatchAndGoInfo_free(lu->frontmtx->patchinfo);
528:     }
529:   }
530:   if ( sierr >= 0 ) SETERRQ2(PETSC_ERR_LIB,"\n proc %d : factorization error at front %d", rank, sierr);
531: 
532:   /*  post-process the factorization and split 
533:       the factor matrices into submatrices */
534:   lasttag  = lu->firsttag + 5*size;
535:   if ( lasttag > tagbound ) {
536:       SETERRQ3(PETSC_ERR_LIB,"fatal error in FrontMtx_MPI_postProcess(), tag range is [%d,%d], tag_bound = %d",\
537:                lu->firsttag, lasttag, tagbound);
538:   }
539:   FrontMtx_MPI_postProcess(lu->frontmtx, lu->ownersIV, lu->stats, lu->options.msglvl,
540:                          lu->options.msgFile, lu->firsttag, lu->comm_spooles);
541:   lu->firsttag += 5*size ;
542:   if ( lu->options.msglvl > 2 ) {
543:     PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n numeric factorization after post-processing");
544:     FrontMtx_writeForHumanEye(lu->frontmtx, lu->options.msgFile);
545:     fflush(lu->options.msgFile);
546:   }
547: 
548:   /* create the solve map object */
549:   lu->solvemap = SolveMap_new();
550:   SolveMap_ddMap(lu->solvemap, lu->frontmtx->symmetryflag,
551:                FrontMtx_upperBlockIVL(lu->frontmtx),
552:                FrontMtx_lowerBlockIVL(lu->frontmtx),
553:                size, lu->ownersIV, FrontMtx_frontTree(lu->frontmtx),
554:                lu->options.seed, lu->options.msglvl, lu->options.msgFile);
555:   if ( lu->options.msglvl > 2 ) {
556:     SolveMap_writeForHumanEye(lu->solvemap, lu->options.msgFile);
557:     fflush(lu->options.msgFile);
558:   }

560:   /* redistribute the submatrices of the factors */
561:   FrontMtx_MPI_split(lu->frontmtx, lu->solvemap,
562:                    lu->stats, lu->options.msglvl, lu->options.msgFile, lu->firsttag, lu->comm_spooles);
563:   if ( lu->options.msglvl > 2 ) {
564:     PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n numeric factorization after split");
565:     FrontMtx_writeForHumanEye(lu->frontmtx, lu->options.msgFile);
566:     fflush(lu->options.msgFile);
567:   }

569:   /* create a solution DenseMtx object */
570:   lu->ownedColumnsIV = FrontMtx_ownedColumnsIV(lu->frontmtx, rank, lu->ownersIV,
571:                                          lu->options.msglvl, lu->options.msgFile);
572:   lu->nmycol = IV_size(lu->ownedColumnsIV);
573:   if ( lu->nmycol > 0) {
574:     DenseMtx_init(lu->mtxX, lu->options.typeflag, 0, 0, lu->nmycol, 1, 1, lu->nmycol);
575:     /* get pointers rowindX and entX */
576:     DenseMtx_rowIndices(lu->mtxX, &lu->nmycol, &lu->rowindX);
577:     lu->entX = DenseMtx_entries(lu->mtxX);
578:   } else { /* lu->nmycol == 0 */
579:     lu->entX    = 0;
580:     lu->rowindX = 0;
581:   }

583:   if ( lu->scat ){
584:     VecDestroy(lu->vec_spooles);
585:     ISDestroy(lu->iden);
586:     ISDestroy(lu->is_petsc);
587:     VecScatterDestroy(lu->scat);
588:   }
589:   lu->scat = PETSC_NULL;
590:   lu->flg = SAME_NONZERO_PATTERN;

592:   lu->CleanUpSpooles = PETSC_TRUE;
593:   return(0);
594: }

599: PetscErrorCode  MatConvert_MPIAIJ_MPIAIJSpooles(Mat A,MatType type,MatReuse reuse,Mat *newmat)
600: {
602:   Mat            B=*newmat;
603:   Mat_Spooles    *lu;

606:   PetscNew(Mat_Spooles,&lu);
607:   if (reuse == MAT_INITIAL_MATRIX) {
608:     MatDuplicate(A,MAT_COPY_VALUES,&B);
609:     lu->MatDuplicate              = B->ops->duplicate;
610:     lu->MatLUFactorSymbolic       = B->ops->lufactorsymbolic;
611:     lu->MatCholeskyFactorSymbolic = B->ops->choleskyfactorsymbolic;
612:     lu->MatView                   = B->ops->view;
613:     lu->MatAssemblyEnd            = B->ops->assemblyend;
614:     lu->MatDestroy                = B->ops->destroy;
615:   } else {
616:     lu->MatDuplicate              = A->ops->duplicate;
617:     lu->MatLUFactorSymbolic       = A->ops->lufactorsymbolic;
618:     lu->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic;
619:     lu->MatView                   = A->ops->view;
620:     lu->MatAssemblyEnd            = A->ops->assemblyend;
621:     lu->MatDestroy                = A->ops->destroy;
622:   }
623:   lu->basetype                  = MATMPIAIJ;
624:   lu->CleanUpSpooles            = PETSC_FALSE;

626:   B->spptr = (void*)lu;
627:   B->ops->duplicate             = MatDuplicate_Spooles;
628:   B->ops->lufactorsymbolic      = MatLUFactorSymbolic_MPIAIJSpooles;
629:   B->ops->view                  = MatView_SeqAIJSpooles;
630:   B->ops->assemblyend           = MatAssemblyEnd_MPIAIJSpooles;
631:   B->ops->destroy               = MatDestroy_MPIAIJSpooles;

633:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaijspooles_mpiaij_C",
634:                                            "MatConvert_Spooles_Base",MatConvert_Spooles_Base);
635:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijspooles_C",
636:                                            "MatConvert_MPIAIJ_MPIAIJSpooles",MatConvert_MPIAIJ_MPIAIJSpooles);
637:   PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJSPOOLES);
638:   *newmat = B;
639:   return(0);
640: }

643: /*MC
644:   MATMPIAIJSPOOLES - MATMPIAIJSPOOLES = "mpiaijspooles" - A matrix type providing direct solvers (LU) for distributed matrices 
645:   via the external package Spooles.

647:   If MPIAIJSPOOLES is installed (see the manual for
648:   instructions on how to declare the existence of external packages),
649:   a matrix type can be constructed which invokes SPOOLES solvers.
650:   After calling MatCreate(...,A), simply call MatSetType(A,MATMPIAIJSPOOLES).

652:   This matrix inherits from MATMPIAIJ.  As a result, MatMPIAIJSetPreallocation is 
653:   supported for this matrix type.  One can also call MatConvert for an inplace conversion to or from 
654:   the MATMPIAIJ type without data copy.

656:   Consult Spooles documentation for more information about the options database keys below.

658:   Options Database Keys:
659: + -mat_type mpiaijspooles - sets the matrix type to "mpiaijspooles" during a call to MatSetFromOptions()
660: . -mat_spooles_tau <tau> - upper bound on the magnitude of the largest element in L or U
661: . -mat_spooles_seed <seed> - random number seed used for ordering
662: . -mat_spooles_msglvl <msglvl> - message output level
663: . -mat_spooles_ordering <BestOfNDandMS,MMD,MS,ND> - ordering used
664: . -mat_spooles_maxdomainsize <n> - maximum subgraph size used by Spooles orderings
665: . -mat_spooles_maxzeros <n> - maximum number of zeros inside a supernode
666: . -mat_spooles_maxsize <n> - maximum size of a supernode
667: . -mat_spooles_FrontMtxInfo <true,fase> - print Spooles information about the computed factorization
668: . -mat_spooles_symmetryflag <0,1,2> - 0: SPOOLES_SYMMETRIC, 1: SPOOLES_HERMITIAN, 2: SPOOLES_NONSYMMETRIC
669: . -mat_spooles_patchAndGoFlag <0,1,2> - 0: no patch, 1: use PatchAndGo strategy 1, 2: use PatchAndGo strategy 2
670: . -mat_spooles_toosmall <dt> - drop tolerance for PatchAndGo strategy 1
671: . -mat_spooles_storeids <bool integer> - if nonzero, stores row and col numbers where patches were applied in an IV object
672: . -mat_spooles_fudge <delta> - fudge factor for rescaling diagonals with PatchAndGo strategy 2
673: - -mat_spooles_storevalues <bool integer> - if nonzero and PatchAndGo strategy 2 is used, store change in diagonal value in a DV object

675:    Level: beginner

677: .seealso: PCLU
678: M*/

683: PetscErrorCode  MatCreate_MPIAIJSpooles(Mat A)
684: {

688:   MatSetType(A,MATMPIAIJ);
689:   /*
690:   Mat A_diag = ((Mat_MPIAIJ *)A->data)->A;
691:   MatConvert_SeqAIJ_SeqAIJSpooles(A_diag,MATSEQAIJSPOOLES,MAT_REUSE_MATRIX,&A_diag);
692:   */
693:   MatConvert_MPIAIJ_MPIAIJSpooles(A,MATMPIAIJSPOOLES,MAT_REUSE_MATRIX,&A);
694:   return(0);
695: }