Actual source code: ispai.c
1: #define PETSCKSP_DLL
3: /*
4: 3/99 Modified by Stephen Barnard to support SPAI version 3.0
5: */
7: /*
8: Provides an interface to the SPAI Sparse Approximate Inverse Preconditioner
9: Code written by Stephen Barnard.
11: Note: there is some BAD memory bleeding below!
13: This code needs work
15: 1) get rid of all memory bleeding
16: 2) fix PETSc/interface so that it gets if the matrix is symmetric from the matrix
17: rather than having the sp flag for PC_SPAI
19: */
21: #include private/pcimpl.h
22: #include petscspai.h
24: /*
25: These are the SPAI include files
26: */
28: #include "spai.h"
29: #include "matrix.h"
32: EXTERN PetscErrorCode ConvertMatToMatrix(MPI_Comm,Mat,Mat,matrix**);
33: EXTERN PetscErrorCode ConvertMatrixToMat(MPI_Comm,matrix *,Mat *);
34: EXTERN PetscErrorCode ConvertVectorToVec(MPI_Comm,vector *v,Vec *Pv);
35: EXTERN PetscErrorCode MM_to_PETSC(char *,char *,char *);
37: typedef struct {
39: matrix *B; /* matrix in SPAI format */
40: matrix *BT; /* transpose of matrix in SPAI format */
41: matrix *M; /* the approximate inverse in SPAI format */
43: Mat PM; /* the approximate inverse PETSc format */
45: double epsilon; /* tolerance */
46: int nbsteps; /* max number of "improvement" steps per line */
47: int max; /* max dimensions of is_I, q, etc. */
48: int maxnew; /* max number of new entries per step */
49: int block_size; /* constant block size */
50: int cache_size; /* one of (1,2,3,4,5,6) indicting size of cache */
51: int verbose; /* SPAI prints timing and statistics */
53: int sp; /* symmetric nonzero pattern */
54: MPI_Comm comm_spai; /* communicator to be used with spai */
55: } PC_SPAI;
57: /**********************************************************************/
61: static PetscErrorCode PCSetUp_SPAI(PC pc)
62: {
63: PC_SPAI *ispai = (PC_SPAI*)pc->data;
65: Mat AT;
69: init_SPAI();
71: if (ispai->sp) {
72: ConvertMatToMatrix(ispai->comm_spai,pc->pmat,pc->pmat,&ispai->B);
73: } else {
74: /* Use the transpose to get the column nonzero structure. */
75: MatTranspose(pc->pmat,&AT);
76: ConvertMatToMatrix(ispai->comm_spai,pc->pmat,AT,&ispai->B);
77: MatDestroy(AT);
78: }
80: /* Destroy the transpose */
81: /* Don't know how to do it. PETSc developers? */
82:
83: /* construct SPAI preconditioner */
84: /* FILE *messages */ /* file for warning messages */
85: /* double epsilon */ /* tolerance */
86: /* int nbsteps */ /* max number of "improvement" steps per line */
87: /* int max */ /* max dimensions of is_I, q, etc. */
88: /* int maxnew */ /* max number of new entries per step */
89: /* int block_size */ /* block_size == 1 specifies scalar elments
90: block_size == n specifies nxn constant-block elements
91: block_size == 0 specifies variable-block elements */
92: /* int cache_size */ /* one of (1,2,3,4,5,6) indicting size of cache */
93: /* cache_size == 0 indicates no caching */
94: /* int verbose */ /* verbose == 0 specifies that SPAI is silent
95: verbose == 1 prints timing and matrix statistics */
97: bspai(ispai->B,&ispai->M,
98: stdout,
99: ispai->epsilon,
100: ispai->nbsteps,
101: ispai->max,
102: ispai->maxnew,
103: ispai->block_size,
104: ispai->cache_size,
105: ispai->verbose);
107: ConvertMatrixToMat(pc->comm,ispai->M,&ispai->PM);
109: /* free the SPAI matrices */
110: sp_free_matrix(ispai->B);
111: sp_free_matrix(ispai->M);
113: return(0);
114: }
116: /**********************************************************************/
120: static PetscErrorCode PCApply_SPAI(PC pc,Vec xx,Vec y)
121: {
122: PC_SPAI *ispai = (PC_SPAI*)pc->data;
126: /* Now using PETSc's multiply */
127: MatMult(ispai->PM,xx,y);
128: return(0);
129: }
131: /**********************************************************************/
135: static PetscErrorCode PCDestroy_SPAI(PC pc)
136: {
138: PC_SPAI *ispai = (PC_SPAI*)pc->data;
141: if (ispai->PM) {MatDestroy(ispai->PM);}
142: MPI_Comm_free(&(ispai->comm_spai));
143: PetscFree(ispai);
144: return(0);
145: }
147: /**********************************************************************/
151: static PetscErrorCode PCView_SPAI(PC pc,PetscViewer viewer)
152: {
153: PC_SPAI *ispai = (PC_SPAI*)pc->data;
155: PetscTruth iascii;
158: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
159: if (iascii) {
160: PetscViewerASCIIPrintf(viewer," SPAI preconditioner\n");
161: PetscViewerASCIIPrintf(viewer," epsilon %G\n", ispai->epsilon);
162: PetscViewerASCIIPrintf(viewer," nbsteps %d\n", ispai->nbsteps);
163: PetscViewerASCIIPrintf(viewer," max %d\n", ispai->max);
164: PetscViewerASCIIPrintf(viewer," maxnew %d\n", ispai->maxnew);
165: PetscViewerASCIIPrintf(viewer," block_size %d\n",ispai->block_size);
166: PetscViewerASCIIPrintf(viewer," cache_size %d\n",ispai->cache_size);
167: PetscViewerASCIIPrintf(viewer," verbose %d\n", ispai->verbose);
168: PetscViewerASCIIPrintf(viewer," sp %d\n", ispai->sp);
170: }
171: return(0);
172: }
177: PetscErrorCode PCSPAISetEpsilon_SPAI(PC pc,double epsilon1)
178: {
179: PC_SPAI *ispai = (PC_SPAI*)pc->data;
181: ispai->epsilon = epsilon1;
182: return(0);
183: }
185:
186: /**********************************************************************/
191: PetscErrorCode PCSPAISetNBSteps_SPAI(PC pc,int nbsteps1)
192: {
193: PC_SPAI *ispai = (PC_SPAI*)pc->data;
195: ispai->nbsteps = nbsteps1;
196: return(0);
197: }
200: /**********************************************************************/
202: /* added 1/7/99 g.h. */
206: PetscErrorCode PCSPAISetMax_SPAI(PC pc,int max1)
207: {
208: PC_SPAI *ispai = (PC_SPAI*)pc->data;
210: ispai->max = max1;
211: return(0);
212: }
215: /**********************************************************************/
220: PetscErrorCode PCSPAISetMaxNew_SPAI(PC pc,int maxnew1)
221: {
222: PC_SPAI *ispai = (PC_SPAI*)pc->data;
224: ispai->maxnew = maxnew1;
225: return(0);
226: }
229: /**********************************************************************/
234: PetscErrorCode PCSPAISetBlockSize_SPAI(PC pc,int block_size1)
235: {
236: PC_SPAI *ispai = (PC_SPAI*)pc->data;
238: ispai->block_size = block_size1;
239: return(0);
240: }
243: /**********************************************************************/
248: PetscErrorCode PCSPAISetCacheSize_SPAI(PC pc,int cache_size)
249: {
250: PC_SPAI *ispai = (PC_SPAI*)pc->data;
252: ispai->cache_size = cache_size;
253: return(0);
254: }
257: /**********************************************************************/
262: PetscErrorCode PCSPAISetVerbose_SPAI(PC pc,int verbose)
263: {
264: PC_SPAI *ispai = (PC_SPAI*)pc->data;
266: ispai->verbose = verbose;
267: return(0);
268: }
271: /**********************************************************************/
276: PetscErrorCode PCSPAISetSp_SPAI(PC pc,int sp)
277: {
278: PC_SPAI *ispai = (PC_SPAI*)pc->data;
280: ispai->sp = sp;
281: return(0);
282: }
285: /* -------------------------------------------------------------------*/
289: /*@
290: PCSPAISetEpsilon -- Set the tolerance for the SPAI preconditioner
292: Input Parameters:
293: + pc - the preconditioner
294: - eps - epsilon (default .4)
296: Notes: Espilon must be between 0 and 1. It controls the
297: quality of the approximation of M to the inverse of
298: A. Higher values of epsilon lead to more work, more
299: fill, and usually better preconditioners. In many
300: cases the best choice of epsilon is the one that
301: divides the total solution time equally between the
302: preconditioner and the solver.
303:
304: Level: intermediate
306: .seealso: PCSPAI, PCSetType()
307: @*/
308: PetscErrorCode PCSPAISetEpsilon(PC pc,double epsilon1)
309: {
310: PetscErrorCode ierr,(*f)(PC,double);
312: PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetEpsilon_C",(void (**)(void))&f);
313: if (f) {
314: (*f)(pc,epsilon1);
315: }
316: return(0);
317: }
318:
319: /**********************************************************************/
323: /*@
324: PCSPAISetNBSteps - set maximum number of improvement steps per row in
325: the SPAI preconditioner
327: Input Parameters:
328: + pc - the preconditioner
329: - n - number of steps (default 5)
331: Notes: SPAI constructs to approximation to every column of
332: the exact inverse of A in a series of improvement
333: steps. The quality of the approximation is determined
334: by epsilon. If an approximation achieving an accuracy
335: of epsilon is not obtained after ns steps, SPAI simply
336: uses the best approximation constructed so far.
338: Level: intermediate
340: .seealso: PCSPAI, PCSetType(), PCSPAISetMaxNew()
341: @*/
342: PetscErrorCode PCSPAISetNBSteps(PC pc,int nbsteps1)
343: {
344: PetscErrorCode ierr,(*f)(PC,int);
346: PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetNBSteps_C",(void (**)(void))&f);
347: if (f) {
348: (*f)(pc,nbsteps1);
349: }
350: return(0);
351: }
353: /**********************************************************************/
355: /* added 1/7/99 g.h. */
358: /*@
359: PCSPAISetMax - set the size of various working buffers in
360: the SPAI preconditioner
362: Input Parameters:
363: + pc - the preconditioner
364: - n - size (default is 5000)
366: Level: intermediate
368: .seealso: PCSPAI, PCSetType()
369: @*/
370: PetscErrorCode PCSPAISetMax(PC pc,int max1)
371: {
372: PetscErrorCode ierr,(*f)(PC,int);
374: PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetMax_C",(void (**)(void))&f);
375: if (f) {
376: (*f)(pc,max1);
377: }
378: return(0);
379: }
381: /**********************************************************************/
385: /*@
386: PCSPAISetMaxNew - set maximum number of new nonzero candidates per step
387: in SPAI preconditioner
389: Input Parameters:
390: + pc - the preconditioner
391: - n - maximum number (default 5)
393: Level: intermediate
395: .seealso: PCSPAI, PCSetType(), PCSPAISetNBSteps()
396: @*/
397: PetscErrorCode PCSPAISetMaxNew(PC pc,int maxnew1)
398: {
399: PetscErrorCode ierr,(*f)(PC,int);
401: PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetMaxNew_C",(void (**)(void))&f);
402: if (f) {
403: (*f)(pc,maxnew1);
404: }
405: return(0);
406: }
408: /**********************************************************************/
412: /*@
413: PCSPAISetBlockSize - set the block size for the SPAI preconditioner
415: Input Parameters:
416: + pc - the preconditioner
417: - n - block size (default 1)
419: Notes: A block
420: size of 1 treats A as a matrix of scalar elements. A
421: block size of s > 1 treats A as a matrix of sxs
422: blocks. A block size of 0 treats A as a matrix with
423: variable sized blocks, which are determined by
424: searching for dense square diagonal blocks in A.
425: This can be very effective for finite-element
426: matrices.
428: SPAI will convert A to block form, use a block
429: version of the preconditioner algorithm, and then
430: convert the result back to scalar form.
432: In many cases the a block-size parameter other than 1
433: can lead to very significant improvement in
434: performance.
437: Level: intermediate
439: .seealso: PCSPAI, PCSetType()
440: @*/
441: PetscErrorCode PCSPAISetBlockSize(PC pc,int block_size1)
442: {
443: PetscErrorCode ierr,(*f)(PC,int);
445: PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetBlockSize_C",(void (**)(void))&f);
446: if (f) {
447: (*f)(pc,block_size1);
448: }
449: return(0);
450: }
452: /**********************************************************************/
456: /*@
457: PCSPAISetCacheSize - specify cache size in the SPAI preconditioner
459: Input Parameters:
460: + pc - the preconditioner
461: - n - cache size {0,1,2,3,4,5} (default 5)
463: Notes: SPAI uses a hash table to cache messages and avoid
464: redundant communication. If suggest always using
465: 5. This parameter is irrelevant in the serial
466: version.
468: Level: intermediate
470: .seealso: PCSPAI, PCSetType()
471: @*/
472: PetscErrorCode PCSPAISetCacheSize(PC pc,int cache_size)
473: {
474: PetscErrorCode ierr,(*f)(PC,int);
476: PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetCacheSize_C",(void (**)(void))&f);
477: if (f) {
478: (*f)(pc,cache_size);
479: }
480: return(0);
481: }
483: /**********************************************************************/
487: /*@
488: PCSPAISetVerbose - verbosity level for the SPAI preconditioner
490: Input Parameters:
491: + pc - the preconditioner
492: - n - level (default 1)
494: Notes: print parameters, timings and matrix statistics
496: Level: intermediate
498: .seealso: PCSPAI, PCSetType()
499: @*/
500: PetscErrorCode PCSPAISetVerbose(PC pc,int verbose)
501: {
502: PetscErrorCode ierr,(*f)(PC,int);
504: PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetVerbose_C",(void (**)(void))&f);
505: if (f) {
506: (*f)(pc,verbose);
507: }
508: return(0);
509: }
511: /**********************************************************************/
515: /*@
516: PCSPAISetSp - specify a symmetric matrix sparsity pattern in the SPAI preconditioner
518: Input Parameters:
519: + pc - the preconditioner
520: - n - 0 or 1
522: Notes: If A has a symmetric nonzero pattern use -sp 1 to
523: improve performance by eliminating some communication
524: in the parallel version. Even if A does not have a
525: symmetric nonzero pattern -sp 1 may well lead to good
526: results, but the code will not follow the published
527: SPAI algorithm exactly.
530: Level: intermediate
532: .seealso: PCSPAI, PCSetType()
533: @*/
534: PetscErrorCode PCSPAISetSp(PC pc,int sp)
535: {
536: PetscErrorCode ierr,(*f)(PC,int);
538: PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetSp_C",(void (**)(void))&f);
539: if (f) {
540: (*f)(pc,sp);
541: }
542: return(0);
543: }
545: /**********************************************************************/
547: /**********************************************************************/
551: static PetscErrorCode PCSetFromOptions_SPAI(PC pc)
552: {
553: PC_SPAI *ispai = (PC_SPAI*)pc->data;
555: int nbsteps1,max1,maxnew1,block_size1,cache_size,verbose,sp;
556: double epsilon1;
557: PetscTruth flg;
560: PetscOptionsHead("SPAI options");
561: PetscOptionsReal("-pc_spai_epsilon","","PCSPAISetEpsilon",ispai->epsilon,&epsilon1,&flg);
562: if (flg) {
563: PCSPAISetEpsilon(pc,epsilon1);
564: }
565: PetscOptionsInt("-pc_spai_nbsteps","","PCSPAISetNBSteps",ispai->nbsteps,&nbsteps1,&flg);
566: if (flg) {
567: PCSPAISetNBSteps(pc,nbsteps1);
568: }
569: /* added 1/7/99 g.h. */
570: PetscOptionsInt("-pc_spai_max","","PCSPAISetMax",ispai->max,&max1,&flg);
571: if (flg) {
572: PCSPAISetMax(pc,max1);
573: }
574: PetscOptionsInt("-pc_spai_maxnew","","PCSPAISetMaxNew",ispai->maxnew,&maxnew1,&flg);
575: if (flg) {
576: PCSPAISetMaxNew(pc,maxnew1);
577: }
578: PetscOptionsInt("-pc_spai_block_size","","PCSPAISetBlockSize",ispai->block_size,&block_size1,&flg);
579: if (flg) {
580: PCSPAISetBlockSize(pc,block_size1);
581: }
582: PetscOptionsInt("-pc_spai_cache_size","","PCSPAISetCacheSize",ispai->cache_size,&cache_size,&flg);
583: if (flg) {
584: PCSPAISetCacheSize(pc,cache_size);
585: }
586: PetscOptionsInt("-pc_spai_verbose","","PCSPAISetVerbose",ispai->verbose,&verbose,&flg);
587: if (flg) {
588: PCSPAISetVerbose(pc,verbose);
589: }
590: PetscOptionsInt("-pc_spai_sp","","PCSPAISetSp",ispai->sp,&sp,&flg);
591: if (flg) {
592: PCSPAISetSp(pc,sp);
593: }
594: PetscOptionsTail();
595: return(0);
596: }
598: /**********************************************************************/
600: /*MC
601: PCSPAI - Use the Sparse Approximate Inverse method of Grote and Barnard
602: as a preconditioner (SIAM J. Sci. Comput.; vol 18, nr 3)
604: Options Database Keys:
605: + -pc_spai_set_epsilon <eps> - set tolerance
606: . -pc_spai_nbstep <n> - set nbsteps
607: . -pc_spai_max <m> - set max
608: . -pc_spai_max_new <m> - set maxnew
609: . -pc_spai_block_size <n> - set block size
610: . -pc_spai_cache_size <n> - set cache size
611: . -pc_spai_sp <m> - set sp
612: - -pc_spai_set_verbose <true,false> - verbose output
614: Notes: This only works with AIJ matrices.
616: Level: beginner
618: Concepts: approximate inverse
620: .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC,
621: PCSPAISetEpsilon(), PCSPAISetMax(), PCSPAISetMaxNew(), PCSPAISetBlockSize(),
622: PCSPAISetVerbose(), PCSPAISetSp()
623: M*/
628: PetscErrorCode PCCreate_SPAI(PC pc)
629: {
630: PC_SPAI *ispai;
634: PetscNew(PC_SPAI,&ispai);
635: pc->data = (void*)ispai;
637: pc->ops->destroy = PCDestroy_SPAI;
638: pc->ops->apply = PCApply_SPAI;
639: pc->ops->applyrichardson = 0;
640: pc->ops->setup = PCSetUp_SPAI;
641: pc->ops->view = PCView_SPAI;
642: pc->ops->setfromoptions = PCSetFromOptions_SPAI;
644: pc->name = 0;
645: ispai->epsilon = .4;
646: ispai->nbsteps = 5;
647: ispai->max = 5000;
648: ispai->maxnew = 5;
649: ispai->block_size = 1;
650: ispai->cache_size = 5;
651: ispai->verbose = 0;
653: ispai->sp = 1;
654: MPI_Comm_dup(pc->comm,&(ispai->comm_spai));
656: PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetEpsilon_C",
657: "PCSPAISetEpsilon_SPAI",
658: PCSPAISetEpsilon_SPAI);
659: PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetNBSteps_C",
660: "PCSPAISetNBSteps_SPAI",
661: PCSPAISetNBSteps_SPAI);
662: PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetMax_C",
663: "PCSPAISetMax_SPAI",
664: PCSPAISetMax_SPAI);
665: PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetMaxNew_CC",
666: "PCSPAISetMaxNew_SPAI",
667: PCSPAISetMaxNew_SPAI);
668: PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetBlockSize_C",
669: "PCSPAISetBlockSize_SPAI",
670: PCSPAISetBlockSize_SPAI);
671: PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetCacheSize_C",
672: "PCSPAISetCacheSize_SPAI",
673: PCSPAISetCacheSize_SPAI);
674: PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetVerbose_C",
675: "PCSPAISetVerbose_SPAI",
676: PCSPAISetVerbose_SPAI);
677: PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetSp_C",
678: "PCSPAISetSp_SPAI",
679: PCSPAISetSp_SPAI);
681: return(0);
682: }
685: /**********************************************************************/
687: /*
688: Converts from a PETSc matrix to an SPAI matrix
689: */
692: PetscErrorCode ConvertMatToMatrix(MPI_Comm comm, Mat A,Mat AT,matrix **B)
693: {
694: matrix *M;
695: int i,j,col;
696: int row_indx;
697: int len,pe,local_indx,start_indx;
698: int *mapping;
700: const int *cols;
701: const double *vals;
702: int *num_ptr,n,mnl,nnl,rank,size,nz,rstart,rend;
703: struct compressed_lines *rows;
706:
707: MPI_Comm_size(comm,&size);
708: MPI_Comm_rank(comm,&rank);
709: MatGetSize(A,&n,&n);
710: MatGetLocalSize(A,&mnl,&nnl);
712: /*
713: not sure why a barrier is required. commenting out
714: MPI_Barrier(comm);
715: */
717: M = new_matrix((void*)comm);
718:
719: M->n = n;
720: M->bs = 1;
721: M->max_block_size = 1;
723: M->mnls = (int*)malloc(sizeof(int)*size);
724: M->start_indices = (int*)malloc(sizeof(int)*size);
725: M->pe = (int*)malloc(sizeof(int)*n);
726: M->block_sizes = (int*)malloc(sizeof(int)*n);
727: for (i=0; i<n; i++) M->block_sizes[i] = 1;
729: MPI_Allgather(&mnl,1,MPI_INT,M->mnls,1,MPI_INT,comm);
731: M->start_indices[0] = 0;
732: for (i=1; i<size; i++) {
733: M->start_indices[i] = M->start_indices[i-1] + M->mnls[i-1];
734: }
736: M->mnl = M->mnls[M->myid];
737: M->my_start_index = M->start_indices[M->myid];
739: for (i=0; i<size; i++) {
740: start_indx = M->start_indices[i];
741: for (j=0; j<M->mnls[i]; j++)
742: M->pe[start_indx+j] = i;
743: }
745: if (AT) {
746: M->lines = new_compressed_lines(M->mnls[rank],1);
747: } else {
748: M->lines = new_compressed_lines(M->mnls[rank],0);
749: }
751: rows = M->lines;
753: /* Determine the mapping from global indices to pointers */
754: PetscMalloc(M->n*sizeof(int),&mapping);
755: pe = 0;
756: local_indx = 0;
757: for (i=0; i<M->n; i++) {
758: if (local_indx >= M->mnls[pe]) {
759: pe++;
760: local_indx = 0;
761: }
762: mapping[i] = local_indx + M->start_indices[pe];
763: local_indx++;
764: }
767: PetscMalloc(mnl*sizeof(int),&num_ptr);
769: /*********************************************************/
770: /************** Set up the row structure *****************/
771: /*********************************************************/
773: /* count number of nonzeros in every row */
774: MatGetOwnershipRange(A,&rstart,&rend);
775: for (i=rstart; i<rend; i++) {
776: MatGetRow(A,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);
777: MatRestoreRow(A,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);
778: }
780: /* allocate buffers */
781: len = 0;
782: for (i=0; i<mnl; i++) {
783: if (len < num_ptr[i]) len = num_ptr[i];
784: }
786: for (i=rstart; i<rend; i++) {
787: row_indx = i-rstart;
788: len = num_ptr[row_indx];
789: rows->ptrs[row_indx] = (int*)malloc(len*sizeof(int));
790: rows->A[row_indx] = (double*)malloc(len*sizeof(double));
791: }
793: /* copy the matrix */
794: for (i=rstart; i<rend; i++) {
795: row_indx = i - rstart;
796: MatGetRow(A,i,&nz,&cols,&vals);
797: for (j=0; j<nz; j++) {
798: col = cols[j];
799: len = rows->len[row_indx]++;
800: rows->ptrs[row_indx][len] = mapping[col];
801: rows->A[row_indx][len] = vals[j];
802: }
803: rows->slen[row_indx] = rows->len[row_indx];
804: MatRestoreRow(A,i,&nz,&cols,&vals);
805: }
808: /************************************************************/
809: /************** Set up the column structure *****************/
810: /*********************************************************/
812: if (AT) {
814: /* count number of nonzeros in every column */
815: for (i=rstart; i<rend; i++) {
816: MatGetRow(AT,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);
817: MatRestoreRow(AT,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);
818: }
820: /* allocate buffers */
821: len = 0;
822: for (i=0; i<mnl; i++) {
823: if (len < num_ptr[i]) len = num_ptr[i];
824: }
826: for (i=rstart; i<rend; i++) {
827: row_indx = i-rstart;
828: len = num_ptr[row_indx];
829: rows->rptrs[row_indx] = (int*)malloc(len*sizeof(int));
830: }
832: /* copy the matrix (i.e., the structure) */
833: for (i=rstart; i<rend; i++) {
834: row_indx = i - rstart;
835: MatGetRow(AT,i,&nz,&cols,&vals);
836: for (j=0; j<nz; j++) {
837: col = cols[j];
838: len = rows->rlen[row_indx]++;
839: rows->rptrs[row_indx][len] = mapping[col];
840: }
841: MatRestoreRow(AT,i,&nz,&cols,&vals);
842: }
843: }
845: PetscFree(num_ptr);
846: PetscFree(mapping);
848: order_pointers(M);
849: M->maxnz = calc_maxnz(M);
851: *B = M;
853: return(0);
854: }
856: /**********************************************************************/
858: /*
859: Converts from an SPAI matrix B to a PETSc matrix PB.
860: This assumes that the the SPAI matrix B is stored in
861: COMPRESSED-ROW format.
862: */
865: PetscErrorCode ConvertMatrixToMat(MPI_Comm comm,matrix *B,Mat *PB)
866: {
867: int size,rank;
869: int m,n,M,N;
870: int d_nz,o_nz;
871: int *d_nnz,*o_nnz;
872: int i,k,global_row,global_col,first_diag_col,last_diag_col;
873: PetscScalar val;
876: MPI_Comm_size(comm,&size);
877: MPI_Comm_rank(comm,&rank);
878:
879: m = n = B->mnls[rank];
880: d_nz = o_nz = 0;
882: /* Determine preallocation for MatCreateMPIAIJ */
883: PetscMalloc(m*sizeof(int),&d_nnz);
884: PetscMalloc(m*sizeof(int),&o_nnz);
885: for (i=0; i<m; i++) d_nnz[i] = o_nnz[i] = 0;
886: first_diag_col = B->start_indices[rank];
887: last_diag_col = first_diag_col + B->mnls[rank];
888: for (i=0; i<B->mnls[rank]; i++) {
889: for (k=0; k<B->lines->len[i]; k++) {
890: global_col = B->lines->ptrs[i][k];
891: if ((global_col >= first_diag_col) && (global_col <= last_diag_col))
892: d_nnz[i]++;
893: else
894: o_nnz[i]++;
895: }
896: }
898: M = N = B->n;
899: /* Here we only know how to create AIJ format */
900: MatCreate(comm,PB);
901: MatSetSizes(*PB,m,n,M,N);
902: MatSetType(*PB,MATAIJ);
903: MatSeqAIJSetPreallocation(*PB,d_nz,d_nnz);
904: MatMPIAIJSetPreallocation(*PB,d_nz,d_nnz,o_nz,o_nnz);
906: for (i=0; i<B->mnls[rank]; i++) {
907: global_row = B->start_indices[rank]+i;
908: for (k=0; k<B->lines->len[i]; k++) {
909: global_col = B->lines->ptrs[i][k];
910: val = B->lines->A[i][k];
911: MatSetValues(*PB,1,&global_row,1,&global_col,&val,ADD_VALUES);
912: }
913: }
915: PetscFree(d_nnz);
916: PetscFree(o_nnz);
918: MatAssemblyBegin(*PB,MAT_FINAL_ASSEMBLY);
919: MatAssemblyEnd(*PB,MAT_FINAL_ASSEMBLY);
921: return(0);
922: }
924: /**********************************************************************/
926: /*
927: Converts from an SPAI vector v to a PETSc vec Pv.
928: */
931: PetscErrorCode ConvertVectorToVec(MPI_Comm comm,vector *v,Vec *Pv)
932: {
934: int size,rank,m,M,i,*mnls,*start_indices,*global_indices;
935:
937: MPI_Comm_size(comm,&size);
938: MPI_Comm_rank(comm,&rank);
939:
940: m = v->mnl;
941: M = v->n;
942:
943:
944: VecCreateMPI(comm,m,M,Pv);
946: PetscMalloc(size*sizeof(int),&mnls);
947: MPI_Allgather((void*)&v->mnl,1,MPI_INT,(void*)mnls,1,MPI_INT,comm);
948:
949: PetscMalloc(size*sizeof(int),&start_indices);
950: start_indices[0] = 0;
951: for (i=1; i<size; i++)
952: start_indices[i] = start_indices[i-1] +mnls[i-1];
953:
954: PetscMalloc(v->mnl*sizeof(int),&global_indices);
955: for (i=0; i<v->mnl; i++)
956: global_indices[i] = start_indices[rank] + i;
958: PetscFree(mnls);
959: PetscFree(start_indices);
960:
961: VecSetValues(*Pv,v->mnl,global_indices,v->v,INSERT_VALUES);
963: PetscFree(global_indices);
965: VecAssemblyBegin(*Pv);
966: VecAssemblyEnd(*Pv);
967:
968: return(0);
969: }