Actual source code: mpispooles.c
1: #define PETSCMAT_DLL
3: /*
4: Provides an interface to the Spooles parallel sparse solver (MPI SPOOLES)
5: */
7: #include src/mat/impls/aij/seq/aij.h
8: #include src/mat/impls/sbaij/seq/sbaij.h
9: #include src/mat/impls/baij/seq/baij.h
10: #include src/mat/impls/aij/mpi/mpiaij.h
11: #include src/mat/impls/sbaij/mpi/mpisbaij.h
12: #include src/mat/impls/aij/seq/spooles/spooles.h
14: EXTERN int SetSpoolesOptions(Mat, Spooles_options *);
18: PetscErrorCode MatDestroy_MPIAIJSpooles(Mat A)
19: {
20: Mat_Spooles *lu = (Mat_Spooles*)A->spptr;
22:
24: if (lu->CleanUpSpooles) {
25: FrontMtx_free(lu->frontmtx);
26: IV_free(lu->newToOldIV);
27: IV_free(lu->oldToNewIV);
28: IV_free(lu->vtxmapIV);
29: InpMtx_free(lu->mtxA);
30: ETree_free(lu->frontETree);
31: IVL_free(lu->symbfacIVL);
32: SubMtxManager_free(lu->mtxmanager);
33: DenseMtx_free(lu->mtxX);
34: DenseMtx_free(lu->mtxY);
35: MPI_Comm_free(&(lu->comm_spooles));
36: if ( lu->scat ){
37: VecDestroy(lu->vec_spooles);
38: ISDestroy(lu->iden);
39: ISDestroy(lu->is_petsc);
40: VecScatterDestroy(lu->scat);
41: }
42: }
43: MatConvert_Spooles_Base(A,lu->basetype,MAT_REUSE_MATRIX,&A);
44: (*A->ops->destroy)(A);
46: return(0);
47: }
51: PetscErrorCode MatSolve_MPIAIJSpooles(Mat A,Vec b,Vec x)
52: {
53: Mat_Spooles *lu = (Mat_Spooles*)A->spptr;
55: int size,rank,m=A->rmap.n,irow,*rowindY;
56: PetscScalar *array;
57: DenseMtx *newY ;
58: SubMtxManager *solvemanager ;
59: #if defined(PETSC_USE_COMPLEX)
60: double x_real,x_imag;
61: #endif
64: MPI_Comm_size(A->comm,&size);
65: MPI_Comm_rank(A->comm,&rank);
66:
67: /* copy b into spooles' rhs mtxY */
68: DenseMtx_init(lu->mtxY, lu->options.typeflag, 0, 0, m, 1, 1, m);
69: VecGetArray(b,&array);
71: DenseMtx_rowIndices(lu->mtxY, &m, &rowindY); /* get m, rowind */
72: for ( irow = 0 ; irow < m ; irow++ ) {
73: rowindY[irow] = irow + lu->rstart; /* global rowind */
74: #if !defined(PETSC_USE_COMPLEX)
75: DenseMtx_setRealEntry(lu->mtxY, irow, 0, *array++);
76: #else
77: DenseMtx_setComplexEntry(lu->mtxY,irow,0,PetscRealPart(*array),PetscImaginaryPart(*array));
78: array++;
79: #endif
80: }
81: VecRestoreArray(b,&array);
82:
83: if ( lu->options.msglvl > 2 ) {
84: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n 1 matrix in original ordering");
85: DenseMtx_writeForHumanEye(lu->mtxY, lu->options.msgFile);
86: fflush(lu->options.msgFile);
87: }
88:
89: /* permute and redistribute Y if necessary */
90: DenseMtx_permuteRows(lu->mtxY, lu->oldToNewIV);
91: if ( lu->options.msglvl > 2 ) {
92: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n rhs matrix in new ordering");
93: DenseMtx_writeForHumanEye(lu->mtxY, lu->options.msgFile);
94: fflush(lu->options.msgFile);
95: }
97: MPI_Barrier(A->comm); /* for initializing firsttag, because the num. of tags used
98: by FrontMtx_MPI_split() is unknown */
99: lu->firsttag = 0;
100: newY = DenseMtx_MPI_splitByRows(lu->mtxY, lu->vtxmapIV, lu->stats, lu->options.msglvl,
101: lu->options.msgFile, lu->firsttag, lu->comm_spooles);
102: DenseMtx_free(lu->mtxY);
103: lu->mtxY = newY ;
104: lu->firsttag += size ;
105: if ( lu->options.msglvl > 2 ) {
106: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n split DenseMtx Y");
107: DenseMtx_writeForHumanEye(lu->mtxY, lu->options.msgFile);
108: fflush(lu->options.msgFile);
109: }
111: if ( FRONTMTX_IS_PIVOTING(lu->frontmtx) ) {
112: /* pivoting has taken place, redistribute the right hand side
113: to match the final rows and columns in the fronts */
114: IV *rowmapIV ;
115: rowmapIV = FrontMtx_MPI_rowmapIV(lu->frontmtx, lu->ownersIV, lu->options.msglvl,
116: lu->options.msgFile, lu->comm_spooles);
117: newY = DenseMtx_MPI_splitByRows(lu->mtxY, rowmapIV, lu->stats, lu->options.msglvl,
118: lu->options.msgFile, lu->firsttag, lu->comm_spooles);
119: DenseMtx_free(lu->mtxY);
120: lu->mtxY = newY ;
121: IV_free(rowmapIV);
122: lu->firsttag += size;
123: }
124: if ( lu->options.msglvl > 2 ) {
125: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n rhs matrix after split");
126: DenseMtx_writeForHumanEye(lu->mtxY, lu->options.msgFile);
127: fflush(lu->options.msgFile);
128: }
130: if ( lu->nmycol > 0 ) IVcopy(lu->nmycol,lu->rowindX,IV_entries(lu->ownedColumnsIV)); /* must do for each solve */
131:
132: /* solve the linear system */
133: solvemanager = SubMtxManager_new();
134: SubMtxManager_init(solvemanager, NO_LOCK, 0);
135: FrontMtx_MPI_solve(lu->frontmtx, lu->mtxX, lu->mtxY, solvemanager, lu->solvemap, lu->cpus,
136: lu->stats, lu->options.msglvl, lu->options.msgFile, lu->firsttag, lu->comm_spooles);
137: SubMtxManager_free(solvemanager);
138: if ( lu->options.msglvl > 2 ) {
139: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n solution in new ordering");
140: DenseMtx_writeForHumanEye(lu->mtxX, lu->options.msgFile);
141: }
143: /* permute the solution into the original ordering */
144: DenseMtx_permuteRows(lu->mtxX, lu->newToOldIV);
145: if ( lu->options.msglvl > 2 ) {
146: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n solution in old ordering");
147: DenseMtx_writeForHumanEye(lu->mtxX, lu->options.msgFile);
148: fflush(lu->options.msgFile);
149: }
150:
151: /* scatter local solution mtxX into mpi vector x */
152: if( !lu->scat ){ /* create followings once for each numfactorization */
153: /* vec_spooles <- mtxX */
154: #if !defined(PETSC_USE_COMPLEX)
155: VecCreateSeqWithArray(PETSC_COMM_SELF,lu->nmycol,lu->entX,&lu->vec_spooles);
156: #else
157: VecCreateSeq(PETSC_COMM_SELF,lu->nmycol,&lu->vec_spooles);
158: #endif
159: ISCreateStride(PETSC_COMM_SELF,lu->nmycol,0,1,&lu->iden);
160: ISCreateGeneral(PETSC_COMM_SELF,lu->nmycol,lu->rowindX,&lu->is_petsc);
161: VecScatterCreate(lu->vec_spooles,lu->iden,x,lu->is_petsc,&lu->scat);
162: }
163: #if defined(PETSC_USE_COMPLEX)
164: VecGetArray(lu->vec_spooles,&array);
165: for (irow = 0; irow < lu->nmycol; irow++){
166: DenseMtx_complexEntry(lu->mtxX,irow,0,&x_real,&x_imag);
167: array[irow] = x_real+x_imag*PETSC_i;
168: }
169: VecRestoreArray(lu->vec_spooles,&array);
170: #endif
171: VecScatterBegin(lu->vec_spooles,x,INSERT_VALUES,SCATTER_FORWARD,lu->scat);
172: VecScatterEnd(lu->vec_spooles,x,INSERT_VALUES,SCATTER_FORWARD,lu->scat);
173: return(0);
174: }
178: PetscErrorCode MatFactorNumeric_MPIAIJSpooles(Mat A,MatFactorInfo *info,Mat *F)
179: {
180: Mat_Spooles *lu = (Mat_Spooles*)(*F)->spptr;
181: PetscErrorCode ierr;
182: int rank,size,lookahead=0,sierr;
183: ChvManager *chvmanager ;
184: Chv *rootchv ;
185: Graph *graph ;
186: IVL *adjIVL;
187: DV *cumopsDV ;
188: double droptol=0.0,*opcounts,minops,cutoff;
189: #if !defined(PETSC_USE_COMPLEX)
190: double *val;
191: #endif
192: InpMtx *newA ;
193: PetscScalar *av, *bv;
194: PetscInt *ai, *aj, *bi,*bj, nz, *ajj, *bjj, *garray,
195: i,j,irow,jcol,countA,countB,jB,*row,*col,colA_start,jj;
196: PetscInt M=A->rmap.N,m=A->rmap.n,root,nedges,tagbound,lasttag;
197: Mat F_diag;
198:
200: MPI_Comm_size(A->comm,&size);
201: MPI_Comm_rank(A->comm,&rank);
203: if (lu->flg == DIFFERENT_NONZERO_PATTERN) { /* first numeric factorization */
204: /* get input parameters */
205: SetSpoolesOptions(A, &lu->options);
207: (*F)->ops->solve = MatSolve_MPIAIJSpooles;
208: (*F)->ops->destroy = MatDestroy_MPIAIJSpooles;
209: (*F)->assembled = PETSC_TRUE;
210: if ((*F)->factor == FACTOR_LU){
211: F_diag = ((Mat_MPIAIJ *)(*F)->data)->A;
212: } else {
213: F_diag = ((Mat_MPISBAIJ *)(*F)->data)->A;
214: }
215: F_diag->assembled = PETSC_TRUE;
217: /* to be used by MatSolve() */
218: lu->mtxY = DenseMtx_new();
219: lu->mtxX = DenseMtx_new();
220: lu->scat = PETSC_NULL;
222: IVzero(20, lu->stats);
223: DVzero(20, lu->cpus);
225: lu->mtxA = InpMtx_new();
226: }
227:
228: /* copy A to Spooles' InpMtx object */
229: if ( lu->options.symflag == SPOOLES_NONSYMMETRIC ) {
230: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
231: Mat_SeqAIJ *aa=(Mat_SeqAIJ*)(mat->A)->data;
232: Mat_SeqAIJ *bb=(Mat_SeqAIJ*)(mat->B)->data;
233: ai=aa->i; aj=aa->j; av=aa->a;
234: bi=bb->i; bj=bb->j; bv=bb->a;
235: lu->rstart = A->rmap.rstart;
236: nz = aa->nz + bb->nz;
237: garray = mat->garray;
238: } else { /* SPOOLES_SYMMETRIC */
239: Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data;
240: Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)(mat->A)->data;
241: Mat_SeqBAIJ *bb=(Mat_SeqBAIJ*)(mat->B)->data;
242: ai=aa->i; aj=aa->j; av=aa->a;
243: bi=bb->i; bj=bb->j; bv=bb->a;
244: lu->rstart = A->rmap.rstart;
245: nz = aa->nz + bb->nz;
246: garray = mat->garray;
247: }
248:
249: InpMtx_init(lu->mtxA, INPMTX_BY_ROWS, lu->options.typeflag, nz, 0);
250: row = InpMtx_ivec1(lu->mtxA);
251: col = InpMtx_ivec2(lu->mtxA);
252: #if !defined(PETSC_USE_COMPLEX)
253: val = InpMtx_dvec(lu->mtxA);
254: #endif
256: jj = 0; irow = lu->rstart;
257: for ( i=0; i<m; i++ ) {
258: ajj = aj + ai[i]; /* ptr to the beginning of this row */
259: countA = ai[i+1] - ai[i];
260: countB = bi[i+1] - bi[i];
261: bjj = bj + bi[i];
262: jB = 0;
263:
264: if (lu->options.symflag == SPOOLES_NONSYMMETRIC ){
265: /* B part, smaller col index */
266: colA_start = lu->rstart + ajj[0]; /* the smallest col index for A */
267: for (j=0; j<countB; j++){
268: jcol = garray[bjj[j]];
269: if (jcol > colA_start) {
270: jB = j;
271: break;
272: }
273: row[jj] = irow; col[jj] = jcol;
274: #if !defined(PETSC_USE_COMPLEX)
275: val[jj++] = *bv++;
276: #else
277: InpMtx_inputComplexEntry(lu->mtxA,irow,jcol,PetscRealPart(*bv),PetscImaginaryPart(*bv));
278: bv++; jj++;
279: #endif
280: if (j==countB-1) jB = countB;
281: }
282: }
283: /* A part */
284: for (j=0; j<countA; j++){
285: row[jj] = irow; col[jj] = lu->rstart + ajj[j];
286: #if !defined(PETSC_USE_COMPLEX)
287: val[jj++] = *av++;
288: #else
289: InpMtx_inputComplexEntry(lu->mtxA,irow,col[jj],PetscRealPart(*av),PetscImaginaryPart(*av));
290: av++; jj++;
291: #endif
292: }
293: /* B part, larger col index */
294: for (j=jB; j<countB; j++){
295: row[jj] = irow; col[jj] = garray[bjj[j]];
296: #if !defined(PETSC_USE_COMPLEX)
297: val[jj++] = *bv++;
298: #else
299: InpMtx_inputComplexEntry(lu->mtxA,irow,col[jj],PetscRealPart(*bv),PetscImaginaryPart(*bv));
300: bv++; jj++;
301: #endif
302: }
303: irow++;
304: }
305: #if !defined(PETSC_USE_COMPLEX)
306: InpMtx_inputRealTriples(lu->mtxA, nz, row, col, val);
307: #endif
308: InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS);
309: if ( lu->options.msglvl > 0 ) {
310: printf("[%d] input matrix\n",rank);
311: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n [%d] input matrix\n",rank);
312: InpMtx_writeForHumanEye(lu->mtxA, lu->options.msgFile);
313: fflush(lu->options.msgFile);
314: }
316: if ( lu->flg == DIFFERENT_NONZERO_PATTERN){ /* first numeric factorization */
317: /*
318: find a low-fill ordering
319: (1) create the Graph object
320: (2) order the graph using multiple minimum degree
321: (3) find out who has the best ordering w.r.t. op count,
322: and broadcast that front tree object
323: */
324: graph = Graph_new();
325: adjIVL = InpMtx_MPI_fullAdjacency(lu->mtxA, lu->stats,
326: lu->options.msglvl, lu->options.msgFile, lu->comm_spooles);
327: nedges = IVL_tsize(adjIVL);
328: Graph_init2(graph, 0, M, 0, nedges, M, nedges, adjIVL, NULL, NULL);
329: if ( lu->options.msglvl > 2 ) {
330: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n graph of the input matrix");
331: Graph_writeForHumanEye(graph, lu->options.msgFile);
332: fflush(lu->options.msgFile);
333: }
335: switch (lu->options.ordering) {
336: case 0:
337: lu->frontETree = orderViaBestOfNDandMS(graph,
338: lu->options.maxdomainsize, lu->options.maxzeros, lu->options.maxsize,
339: lu->options.seed + rank, lu->options.msglvl, lu->options.msgFile); break;
340: case 1:
341: lu->frontETree = orderViaMMD(graph,lu->options.seed + rank,lu->options.msglvl,lu->options.msgFile); break;
342: case 2:
343: lu->frontETree = orderViaMS(graph, lu->options.maxdomainsize,
344: lu->options.seed + rank,lu->options.msglvl,lu->options.msgFile); break;
345: case 3:
346: lu->frontETree = orderViaND(graph, lu->options.maxdomainsize,
347: lu->options.seed + rank,lu->options.msglvl,lu->options.msgFile); break;
348: default:
349: SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown Spooles's ordering");
350: }
352: Graph_free(graph);
353: if ( lu->options.msglvl > 2 ) {
354: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n front tree from ordering");
355: ETree_writeForHumanEye(lu->frontETree, lu->options.msgFile);
356: fflush(lu->options.msgFile);
357: }
359: opcounts = DVinit(size, 0.0);
360: opcounts[rank] = ETree_nFactorOps(lu->frontETree, lu->options.typeflag, lu->options.symflag);
361: MPI_Allgather((void*) &opcounts[rank], 1, MPI_DOUBLE,
362: (void*) opcounts, 1, MPI_DOUBLE, A->comm);
363: minops = DVmin(size, opcounts, &root);
364: DVfree(opcounts);
365:
366: lu->frontETree = ETree_MPI_Bcast(lu->frontETree, root,
367: lu->options.msglvl, lu->options.msgFile, lu->comm_spooles);
368: if ( lu->options.msglvl > 2 ) {
369: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n best front tree");
370: ETree_writeForHumanEye(lu->frontETree, lu->options.msgFile);
371: fflush(lu->options.msgFile);
372: }
373:
374: /* get the permutations, permute the front tree, permute the matrix */
375: lu->oldToNewIV = ETree_oldToNewVtxPerm(lu->frontETree);
376: lu->newToOldIV = ETree_newToOldVtxPerm(lu->frontETree);
378: ETree_permuteVertices(lu->frontETree, lu->oldToNewIV);
380: InpMtx_permute(lu->mtxA, IV_entries(lu->oldToNewIV), IV_entries(lu->oldToNewIV));
381:
382: if ( lu->options.symflag == SPOOLES_SYMMETRIC ) InpMtx_mapToUpperTriangle(lu->mtxA);
384: InpMtx_changeCoordType(lu->mtxA, INPMTX_BY_CHEVRONS);
385: InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS);
387: /* generate the owners map IV object and the map from vertices to owners */
388: cutoff = 1./(2*size);
389: cumopsDV = DV_new();
390: DV_init(cumopsDV, size, NULL);
391: lu->ownersIV = ETree_ddMap(lu->frontETree,
392: lu->options.typeflag, lu->options.symflag, cumopsDV, cutoff);
393: DV_free(cumopsDV);
394: lu->vtxmapIV = IV_new();
395: IV_init(lu->vtxmapIV, M, NULL);
396: IVgather(M, IV_entries(lu->vtxmapIV),
397: IV_entries(lu->ownersIV), ETree_vtxToFront(lu->frontETree));
398: if ( lu->options.msglvl > 2 ) {
399: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n map from fronts to owning processes");
400: IV_writeForHumanEye(lu->ownersIV, lu->options.msgFile);
401: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n map from vertices to owning processes");
402: IV_writeForHumanEye(lu->vtxmapIV, lu->options.msgFile);
403: fflush(lu->options.msgFile);
404: }
406: /* redistribute the matrix */
407: lu->firsttag = 0 ;
408: newA = InpMtx_MPI_split(lu->mtxA, lu->vtxmapIV, lu->stats,
409: lu->options.msglvl, lu->options.msgFile, lu->firsttag, lu->comm_spooles);
410: lu->firsttag += size ;
412: InpMtx_free(lu->mtxA);
413: lu->mtxA = newA ;
414: InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS);
415: if ( lu->options.msglvl > 2 ) {
416: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n split InpMtx");
417: InpMtx_writeForHumanEye(lu->mtxA, lu->options.msgFile);
418: fflush(lu->options.msgFile);
419: }
420:
421: /* compute the symbolic factorization */
422: lu->symbfacIVL = SymbFac_MPI_initFromInpMtx(lu->frontETree, lu->ownersIV, lu->mtxA,
423: lu->stats, lu->options.msglvl, lu->options.msgFile, lu->firsttag, lu->comm_spooles);
424: lu->firsttag += lu->frontETree->nfront ;
425: if ( lu->options.msglvl > 2 ) {
426: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n local symbolic factorization");
427: IVL_writeForHumanEye(lu->symbfacIVL, lu->options.msgFile);
428: fflush(lu->options.msgFile);
429: }
431: lu->mtxmanager = SubMtxManager_new();
432: SubMtxManager_init(lu->mtxmanager, NO_LOCK, 0);
433: lu->frontmtx = FrontMtx_new();
435: } else { /* new num factorization using previously computed symbolic factor */
436: if (lu->options.pivotingflag) { /* different FrontMtx is required */
437: FrontMtx_free(lu->frontmtx);
438: lu->frontmtx = FrontMtx_new();
439: }
441: SubMtxManager_free(lu->mtxmanager);
442: lu->mtxmanager = SubMtxManager_new();
443: SubMtxManager_init(lu->mtxmanager, NO_LOCK, 0);
445: /* permute mtxA */
446: InpMtx_permute(lu->mtxA, IV_entries(lu->oldToNewIV), IV_entries(lu->oldToNewIV));
447: if ( lu->options.symflag == SPOOLES_SYMMETRIC ) InpMtx_mapToUpperTriangle(lu->mtxA);
448:
449: InpMtx_changeCoordType(lu->mtxA, INPMTX_BY_CHEVRONS);
450: InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS);
452: /* redistribute the matrix */
453: MPI_Barrier(A->comm);
454: lu->firsttag = 0;
455: newA = InpMtx_MPI_split(lu->mtxA, lu->vtxmapIV, lu->stats,
456: lu->options.msglvl, lu->options.msgFile, lu->firsttag,lu->comm_spooles);
457: lu->firsttag += size ;
459: InpMtx_free(lu->mtxA);
460: lu->mtxA = newA ;
461: InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS);
462: if ( lu->options.msglvl > 2 ) {
463: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n split InpMtx");
464: InpMtx_writeForHumanEye(lu->mtxA, lu->options.msgFile);
465: fflush(lu->options.msgFile);
466: }
467: } /* end of if ( lu->flg == DIFFERENT_NONZERO_PATTERN) */
469: FrontMtx_init(lu->frontmtx, lu->frontETree, lu->symbfacIVL, lu->options.typeflag, lu->options.symflag,
470: FRONTMTX_DENSE_FRONTS, lu->options.pivotingflag, NO_LOCK, rank,
471: lu->ownersIV, lu->mtxmanager, lu->options.msglvl, lu->options.msgFile);
473: if ( lu->options.symflag == SPOOLES_SYMMETRIC ) {
474: if ( lu->options.patchAndGoFlag == 1 ) {
475: lu->frontmtx->patchinfo = PatchAndGoInfo_new();
476: PatchAndGoInfo_init(lu->frontmtx->patchinfo, 1, lu->options.toosmall, lu->options.fudge,
477: lu->options.storeids, lu->options.storevalues);
478: } else if ( lu->options.patchAndGoFlag == 2 ) {
479: lu->frontmtx->patchinfo = PatchAndGoInfo_new();
480: PatchAndGoInfo_init(lu->frontmtx->patchinfo, 2, lu->options.toosmall, lu->options.fudge,
481: lu->options.storeids, lu->options.storevalues);
482: }
483: }
485: /* numerical factorization */
486: chvmanager = ChvManager_new();
487: ChvManager_init(chvmanager, NO_LOCK, 0);
489: tagbound = maxTagMPI(lu->comm_spooles);
490: lasttag = lu->firsttag + 3*lu->frontETree->nfront + 2;
491: /* if(!rank) PetscPrintf(PETSC_COMM_SELF,"\n firsttag: %d, nfront: %d\n",lu->firsttag, lu->frontETree->nfront);*/
492: if ( lasttag > tagbound ) {
493: SETERRQ3(PETSC_ERR_LIB,"fatal error in FrontMtx_MPI_factorInpMtx(), tag range is [%d,%d], tag_bound = %d",\
494: lu->firsttag, lasttag, tagbound);
495: }
496: rootchv = FrontMtx_MPI_factorInpMtx(lu->frontmtx, lu->mtxA, lu->options.tau, droptol,
497: chvmanager, lu->ownersIV, lookahead, &sierr, lu->cpus,
498: lu->stats, lu->options.msglvl, lu->options.msgFile, lu->firsttag,lu->comm_spooles);
499: ChvManager_free(chvmanager);
500: lu->firsttag = lasttag;
501: if ( lu->options.msglvl > 2 ) {
502: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n numeric factorization");
503: FrontMtx_writeForHumanEye(lu->frontmtx, lu->options.msgFile);
504: fflush(lu->options.msgFile);
505: }
507: if ( lu->options.symflag == SPOOLES_SYMMETRIC ) {
508: if ( lu->options.patchAndGoFlag == 1 ) {
509: if ( lu->frontmtx->patchinfo->fudgeIV != NULL ) {
510: if (lu->options.msglvl > 0 ){
511: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n small pivots found at these locations");
512: IV_writeForHumanEye(lu->frontmtx->patchinfo->fudgeIV, lu->options.msgFile);
513: }
514: }
515: PatchAndGoInfo_free(lu->frontmtx->patchinfo);
516: } else if ( lu->options.patchAndGoFlag == 2 ) {
517: if (lu->options.msglvl > 0 ){
518: if ( lu->frontmtx->patchinfo->fudgeIV != NULL ) {
519: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n small pivots found at these locations");
520: IV_writeForHumanEye(lu->frontmtx->patchinfo->fudgeIV, lu->options.msgFile);
521: }
522: if ( lu->frontmtx->patchinfo->fudgeDV != NULL ) {
523: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n perturbations");
524: DV_writeForHumanEye(lu->frontmtx->patchinfo->fudgeDV, lu->options.msgFile);
525: }
526: }
527: PatchAndGoInfo_free(lu->frontmtx->patchinfo);
528: }
529: }
530: if ( sierr >= 0 ) SETERRQ2(PETSC_ERR_LIB,"\n proc %d : factorization error at front %d", rank, sierr);
531:
532: /* post-process the factorization and split
533: the factor matrices into submatrices */
534: lasttag = lu->firsttag + 5*size;
535: if ( lasttag > tagbound ) {
536: SETERRQ3(PETSC_ERR_LIB,"fatal error in FrontMtx_MPI_postProcess(), tag range is [%d,%d], tag_bound = %d",\
537: lu->firsttag, lasttag, tagbound);
538: }
539: FrontMtx_MPI_postProcess(lu->frontmtx, lu->ownersIV, lu->stats, lu->options.msglvl,
540: lu->options.msgFile, lu->firsttag, lu->comm_spooles);
541: lu->firsttag += 5*size ;
542: if ( lu->options.msglvl > 2 ) {
543: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n numeric factorization after post-processing");
544: FrontMtx_writeForHumanEye(lu->frontmtx, lu->options.msgFile);
545: fflush(lu->options.msgFile);
546: }
547:
548: /* create the solve map object */
549: lu->solvemap = SolveMap_new();
550: SolveMap_ddMap(lu->solvemap, lu->frontmtx->symmetryflag,
551: FrontMtx_upperBlockIVL(lu->frontmtx),
552: FrontMtx_lowerBlockIVL(lu->frontmtx),
553: size, lu->ownersIV, FrontMtx_frontTree(lu->frontmtx),
554: lu->options.seed, lu->options.msglvl, lu->options.msgFile);
555: if ( lu->options.msglvl > 2 ) {
556: SolveMap_writeForHumanEye(lu->solvemap, lu->options.msgFile);
557: fflush(lu->options.msgFile);
558: }
560: /* redistribute the submatrices of the factors */
561: FrontMtx_MPI_split(lu->frontmtx, lu->solvemap,
562: lu->stats, lu->options.msglvl, lu->options.msgFile, lu->firsttag, lu->comm_spooles);
563: if ( lu->options.msglvl > 2 ) {
564: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n numeric factorization after split");
565: FrontMtx_writeForHumanEye(lu->frontmtx, lu->options.msgFile);
566: fflush(lu->options.msgFile);
567: }
569: /* create a solution DenseMtx object */
570: lu->ownedColumnsIV = FrontMtx_ownedColumnsIV(lu->frontmtx, rank, lu->ownersIV,
571: lu->options.msglvl, lu->options.msgFile);
572: lu->nmycol = IV_size(lu->ownedColumnsIV);
573: if ( lu->nmycol > 0) {
574: DenseMtx_init(lu->mtxX, lu->options.typeflag, 0, 0, lu->nmycol, 1, 1, lu->nmycol);
575: /* get pointers rowindX and entX */
576: DenseMtx_rowIndices(lu->mtxX, &lu->nmycol, &lu->rowindX);
577: lu->entX = DenseMtx_entries(lu->mtxX);
578: } else { /* lu->nmycol == 0 */
579: lu->entX = 0;
580: lu->rowindX = 0;
581: }
583: if ( lu->scat ){
584: VecDestroy(lu->vec_spooles);
585: ISDestroy(lu->iden);
586: ISDestroy(lu->is_petsc);
587: VecScatterDestroy(lu->scat);
588: }
589: lu->scat = PETSC_NULL;
590: lu->flg = SAME_NONZERO_PATTERN;
592: lu->CleanUpSpooles = PETSC_TRUE;
593: return(0);
594: }
599: PetscErrorCode MatConvert_MPIAIJ_MPIAIJSpooles(Mat A,MatType type,MatReuse reuse,Mat *newmat)
600: {
602: Mat B=*newmat;
603: Mat_Spooles *lu;
606: PetscNew(Mat_Spooles,&lu);
607: if (reuse == MAT_INITIAL_MATRIX) {
608: MatDuplicate(A,MAT_COPY_VALUES,&B);
609: lu->MatDuplicate = B->ops->duplicate;
610: lu->MatLUFactorSymbolic = B->ops->lufactorsymbolic;
611: lu->MatCholeskyFactorSymbolic = B->ops->choleskyfactorsymbolic;
612: lu->MatView = B->ops->view;
613: lu->MatAssemblyEnd = B->ops->assemblyend;
614: lu->MatDestroy = B->ops->destroy;
615: } else {
616: lu->MatDuplicate = A->ops->duplicate;
617: lu->MatLUFactorSymbolic = A->ops->lufactorsymbolic;
618: lu->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic;
619: lu->MatView = A->ops->view;
620: lu->MatAssemblyEnd = A->ops->assemblyend;
621: lu->MatDestroy = A->ops->destroy;
622: }
623: lu->basetype = MATMPIAIJ;
624: lu->CleanUpSpooles = PETSC_FALSE;
626: B->spptr = (void*)lu;
627: B->ops->duplicate = MatDuplicate_Spooles;
628: B->ops->lufactorsymbolic = MatLUFactorSymbolic_MPIAIJSpooles;
629: B->ops->view = MatView_SeqAIJSpooles;
630: B->ops->assemblyend = MatAssemblyEnd_MPIAIJSpooles;
631: B->ops->destroy = MatDestroy_MPIAIJSpooles;
633: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaijspooles_mpiaij_C",
634: "MatConvert_Spooles_Base",MatConvert_Spooles_Base);
635: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijspooles_C",
636: "MatConvert_MPIAIJ_MPIAIJSpooles",MatConvert_MPIAIJ_MPIAIJSpooles);
637: PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJSPOOLES);
638: *newmat = B;
639: return(0);
640: }
643: /*MC
644: MATMPIAIJSPOOLES - MATMPIAIJSPOOLES = "mpiaijspooles" - A matrix type providing direct solvers (LU) for distributed matrices
645: via the external package Spooles.
647: If MPIAIJSPOOLES is installed (see the manual for
648: instructions on how to declare the existence of external packages),
649: a matrix type can be constructed which invokes SPOOLES solvers.
650: After calling MatCreate(...,A), simply call MatSetType(A,MATMPIAIJSPOOLES).
652: This matrix inherits from MATMPIAIJ. As a result, MatMPIAIJSetPreallocation is
653: supported for this matrix type. One can also call MatConvert for an inplace conversion to or from
654: the MATMPIAIJ type without data copy.
656: Consult Spooles documentation for more information about the options database keys below.
658: Options Database Keys:
659: + -mat_type mpiaijspooles - sets the matrix type to "mpiaijspooles" during a call to MatSetFromOptions()
660: . -mat_spooles_tau <tau> - upper bound on the magnitude of the largest element in L or U
661: . -mat_spooles_seed <seed> - random number seed used for ordering
662: . -mat_spooles_msglvl <msglvl> - message output level
663: . -mat_spooles_ordering <BestOfNDandMS,MMD,MS,ND> - ordering used
664: . -mat_spooles_maxdomainsize <n> - maximum subgraph size used by Spooles orderings
665: . -mat_spooles_maxzeros <n> - maximum number of zeros inside a supernode
666: . -mat_spooles_maxsize <n> - maximum size of a supernode
667: . -mat_spooles_FrontMtxInfo <true,fase> - print Spooles information about the computed factorization
668: . -mat_spooles_symmetryflag <0,1,2> - 0: SPOOLES_SYMMETRIC, 1: SPOOLES_HERMITIAN, 2: SPOOLES_NONSYMMETRIC
669: . -mat_spooles_patchAndGoFlag <0,1,2> - 0: no patch, 1: use PatchAndGo strategy 1, 2: use PatchAndGo strategy 2
670: . -mat_spooles_toosmall <dt> - drop tolerance for PatchAndGo strategy 1
671: . -mat_spooles_storeids <bool integer> - if nonzero, stores row and col numbers where patches were applied in an IV object
672: . -mat_spooles_fudge <delta> - fudge factor for rescaling diagonals with PatchAndGo strategy 2
673: - -mat_spooles_storevalues <bool integer> - if nonzero and PatchAndGo strategy 2 is used, store change in diagonal value in a DV object
675: Level: beginner
677: .seealso: PCLU
678: M*/
683: PetscErrorCode MatCreate_MPIAIJSpooles(Mat A)
684: {
688: MatSetType(A,MATMPIAIJ);
689: /*
690: Mat A_diag = ((Mat_MPIAIJ *)A->data)->A;
691: MatConvert_SeqAIJ_SeqAIJSpooles(A_diag,MATSEQAIJSPOOLES,MAT_REUSE_MATRIX,&A_diag);
692: */
693: MatConvert_MPIAIJ_MPIAIJSpooles(A,MATMPIAIJSPOOLES,MAT_REUSE_MATRIX,&A);
694: return(0);
695: }