Actual source code: ispai.c

  1: #define PETSCKSP_DLL

  3: /* 
  4:    3/99 Modified by Stephen Barnard to support SPAI version 3.0 
  5: */

  7: /*
  8:       Provides an interface to the SPAI Sparse Approximate Inverse Preconditioner
  9:    Code written by Stephen Barnard.

 11:       Note: there is some BAD memory bleeding below!

 13:       This code needs work

 15:    1) get rid of all memory bleeding
 16:    2) fix PETSc/interface so that it gets if the matrix is symmetric from the matrix
 17:       rather than having the sp flag for PC_SPAI

 19: */

 21:  #include private/pcimpl.h
 22:  #include petscspai.h

 24: /*
 25:     These are the SPAI include files
 26: */
 28: #include "spai.h"
 29: #include "matrix.h"

 32: EXTERN PetscErrorCode ConvertMatToMatrix(MPI_Comm,Mat,Mat,matrix**);
 33: EXTERN PetscErrorCode ConvertMatrixToMat(MPI_Comm,matrix *,Mat *);
 34: EXTERN PetscErrorCode ConvertVectorToVec(MPI_Comm,vector *v,Vec *Pv);
 35: EXTERN PetscErrorCode MM_to_PETSC(char *,char *,char *);

 37: typedef struct {

 39:   matrix *B;              /* matrix in SPAI format */
 40:   matrix *BT;             /* transpose of matrix in SPAI format */
 41:   matrix *M;              /* the approximate inverse in SPAI format */

 43:   Mat    PM;              /* the approximate inverse PETSc format */

 45:   double epsilon;         /* tolerance */
 46:   int    nbsteps;         /* max number of "improvement" steps per line */
 47:   int    max;             /* max dimensions of is_I, q, etc. */
 48:   int    maxnew;          /* max number of new entries per step */
 49:   int    block_size;      /* constant block size */
 50:   int    cache_size;      /* one of (1,2,3,4,5,6) indicting size of cache */
 51:   int    verbose;         /* SPAI prints timing and statistics */

 53:   int    sp;              /* symmetric nonzero pattern */
 54:   MPI_Comm comm_spai;     /* communicator to be used with spai */
 55: } PC_SPAI;

 57: /**********************************************************************/

 61: static PetscErrorCode PCSetUp_SPAI(PC pc)
 62: {
 63:   PC_SPAI *ispai = (PC_SPAI*)pc->data;
 65:   Mat      AT;


 69:   init_SPAI();

 71:   if (ispai->sp) {
 72:     ConvertMatToMatrix(ispai->comm_spai,pc->pmat,pc->pmat,&ispai->B);
 73:   } else {
 74:     /* Use the transpose to get the column nonzero structure. */
 75:     MatTranspose(pc->pmat,&AT);
 76:     ConvertMatToMatrix(ispai->comm_spai,pc->pmat,AT,&ispai->B);
 77:     MatDestroy(AT);
 78:   }

 80:   /* Destroy the transpose */
 81:   /* Don't know how to do it. PETSc developers? */
 82: 
 83:   /* construct SPAI preconditioner */
 84:   /* FILE *messages */     /* file for warning messages */
 85:   /* double epsilon */     /* tolerance */
 86:   /* int nbsteps */        /* max number of "improvement" steps per line */
 87:   /* int max */            /* max dimensions of is_I, q, etc. */
 88:   /* int maxnew */         /* max number of new entries per step */
 89:   /* int block_size */     /* block_size == 1 specifies scalar elments
 90:                               block_size == n specifies nxn constant-block elements
 91:                               block_size == 0 specifies variable-block elements */
 92:   /* int cache_size */     /* one of (1,2,3,4,5,6) indicting size of cache */
 93:                            /* cache_size == 0 indicates no caching */
 94:   /* int    verbose    */  /* verbose == 0 specifies that SPAI is silent
 95:                               verbose == 1 prints timing and matrix statistics */

 97:   bspai(ispai->B,&ispai->M,
 98:                    stdout,
 99:                    ispai->epsilon,
100:                    ispai->nbsteps,
101:                    ispai->max,
102:                    ispai->maxnew,
103:                    ispai->block_size,
104:                    ispai->cache_size,
105:                ispai->verbose);

107:   ConvertMatrixToMat(pc->comm,ispai->M,&ispai->PM);

109:   /* free the SPAI matrices */
110:   sp_free_matrix(ispai->B);
111:   sp_free_matrix(ispai->M);

113:   return(0);
114: }

116: /**********************************************************************/

120: static PetscErrorCode PCApply_SPAI(PC pc,Vec xx,Vec y)
121: {
122:   PC_SPAI *ispai = (PC_SPAI*)pc->data;

126:   /* Now using PETSc's multiply */
127:   MatMult(ispai->PM,xx,y);
128:   return(0);
129: }

131: /**********************************************************************/

135: static PetscErrorCode PCDestroy_SPAI(PC pc)
136: {
138:   PC_SPAI *ispai = (PC_SPAI*)pc->data;

141:   if (ispai->PM) {MatDestroy(ispai->PM);}
142:   MPI_Comm_free(&(ispai->comm_spai));
143:   PetscFree(ispai);
144:   return(0);
145: }

147: /**********************************************************************/

151: static PetscErrorCode PCView_SPAI(PC pc,PetscViewer viewer)
152: {
153:   PC_SPAI    *ispai = (PC_SPAI*)pc->data;
155:   PetscTruth iascii;

158:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
159:   if (iascii) {
160:     PetscViewerASCIIPrintf(viewer,"    SPAI preconditioner\n");
161:     PetscViewerASCIIPrintf(viewer,"    epsilon %G\n",   ispai->epsilon);
162:     PetscViewerASCIIPrintf(viewer,"    nbsteps %d\n",   ispai->nbsteps);
163:     PetscViewerASCIIPrintf(viewer,"    max %d\n",       ispai->max);
164:     PetscViewerASCIIPrintf(viewer,"    maxnew %d\n",    ispai->maxnew);
165:     PetscViewerASCIIPrintf(viewer,"    block_size %d\n",ispai->block_size);
166:     PetscViewerASCIIPrintf(viewer,"    cache_size %d\n",ispai->cache_size);
167:     PetscViewerASCIIPrintf(viewer,"    verbose %d\n",   ispai->verbose);
168:     PetscViewerASCIIPrintf(viewer,"    sp %d\n",        ispai->sp);

170:   }
171:   return(0);
172: }

177: PetscErrorCode  PCSPAISetEpsilon_SPAI(PC pc,double epsilon1)
178: {
179:   PC_SPAI    *ispai = (PC_SPAI*)pc->data;
181:   ispai->epsilon = epsilon1;
182:   return(0);
183: }
185: 
186: /**********************************************************************/

191: PetscErrorCode  PCSPAISetNBSteps_SPAI(PC pc,int nbsteps1)
192: {
193:   PC_SPAI    *ispai = (PC_SPAI*)pc->data;
195:   ispai->nbsteps = nbsteps1;
196:   return(0);
197: }

200: /**********************************************************************/

202: /* added 1/7/99 g.h. */
206: PetscErrorCode  PCSPAISetMax_SPAI(PC pc,int max1)
207: {
208:   PC_SPAI    *ispai = (PC_SPAI*)pc->data;
210:   ispai->max = max1;
211:   return(0);
212: }

215: /**********************************************************************/

220: PetscErrorCode  PCSPAISetMaxNew_SPAI(PC pc,int maxnew1)
221: {
222:   PC_SPAI    *ispai = (PC_SPAI*)pc->data;
224:   ispai->maxnew = maxnew1;
225:   return(0);
226: }

229: /**********************************************************************/

234: PetscErrorCode  PCSPAISetBlockSize_SPAI(PC pc,int block_size1)
235: {
236:   PC_SPAI    *ispai = (PC_SPAI*)pc->data;
238:   ispai->block_size = block_size1;
239:   return(0);
240: }

243: /**********************************************************************/

248: PetscErrorCode  PCSPAISetCacheSize_SPAI(PC pc,int cache_size)
249: {
250:   PC_SPAI    *ispai = (PC_SPAI*)pc->data;
252:   ispai->cache_size = cache_size;
253:   return(0);
254: }

257: /**********************************************************************/

262: PetscErrorCode  PCSPAISetVerbose_SPAI(PC pc,int verbose)
263: {
264:   PC_SPAI    *ispai = (PC_SPAI*)pc->data;
266:   ispai->verbose = verbose;
267:   return(0);
268: }

271: /**********************************************************************/

276: PetscErrorCode  PCSPAISetSp_SPAI(PC pc,int sp)
277: {
278:   PC_SPAI    *ispai = (PC_SPAI*)pc->data;
280:   ispai->sp = sp;
281:   return(0);
282: }

285: /* -------------------------------------------------------------------*/

289: /*@
290:   PCSPAISetEpsilon -- Set the tolerance for the SPAI preconditioner

292:   Input Parameters:
293: + pc - the preconditioner
294: - eps - epsilon (default .4)

296:   Notes:  Espilon must be between 0 and 1. It controls the
297:                  quality of the approximation of M to the inverse of
298:                  A. Higher values of epsilon lead to more work, more
299:                  fill, and usually better preconditioners. In many
300:                  cases the best choice of epsilon is the one that
301:                  divides the total solution time equally between the
302:                  preconditioner and the solver.
303:   
304:   Level: intermediate

306: .seealso: PCSPAI, PCSetType()
307:   @*/
308: PetscErrorCode  PCSPAISetEpsilon(PC pc,double epsilon1)
309: {
310:   PetscErrorCode ierr,(*f)(PC,double);
312:   PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetEpsilon_C",(void (**)(void))&f);
313:   if (f) {
314:     (*f)(pc,epsilon1);
315:   }
316:   return(0);
317: }
318: 
319: /**********************************************************************/

323: /*@
324:   PCSPAISetNBSteps - set maximum number of improvement steps per row in 
325:         the SPAI preconditioner

327:   Input Parameters:
328: + pc - the preconditioner
329: - n - number of steps (default 5)

331:   Notes:  SPAI constructs to approximation to every column of
332:                  the exact inverse of A in a series of improvement
333:                  steps. The quality of the approximation is determined
334:                  by epsilon. If an approximation achieving an accuracy
335:                  of epsilon is not obtained after ns steps, SPAI simply
336:                  uses the best approximation constructed so far.

338:   Level: intermediate

340: .seealso: PCSPAI, PCSetType(), PCSPAISetMaxNew()
341: @*/
342: PetscErrorCode  PCSPAISetNBSteps(PC pc,int nbsteps1)
343: {
344:   PetscErrorCode ierr,(*f)(PC,int);
346:   PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetNBSteps_C",(void (**)(void))&f);
347:   if (f) {
348:     (*f)(pc,nbsteps1);
349:   }
350:   return(0);
351: }

353: /**********************************************************************/

355: /* added 1/7/99 g.h. */
358: /*@
359:   PCSPAISetMax - set the size of various working buffers in 
360:         the SPAI preconditioner

362:   Input Parameters:
363: + pc - the preconditioner
364: - n - size (default is 5000)

366:   Level: intermediate

368: .seealso: PCSPAI, PCSetType()
369: @*/
370: PetscErrorCode  PCSPAISetMax(PC pc,int max1)
371: {
372:   PetscErrorCode ierr,(*f)(PC,int);
374:   PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetMax_C",(void (**)(void))&f);
375:   if (f) {
376:     (*f)(pc,max1);
377:   }
378:   return(0);
379: }

381: /**********************************************************************/

385: /*@
386:   PCSPAISetMaxNew - set maximum number of new nonzero candidates per step
387:    in SPAI preconditioner

389:   Input Parameters:
390: + pc - the preconditioner
391: - n - maximum number (default 5)

393:   Level: intermediate

395: .seealso: PCSPAI, PCSetType(), PCSPAISetNBSteps()
396: @*/
397: PetscErrorCode  PCSPAISetMaxNew(PC pc,int maxnew1)
398: {
399:   PetscErrorCode ierr,(*f)(PC,int);
401:   PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetMaxNew_C",(void (**)(void))&f);
402:   if (f) {
403:     (*f)(pc,maxnew1);
404:   }
405:   return(0);
406: }

408: /**********************************************************************/

412: /*@
413:   PCSPAISetBlockSize - set the block size for the SPAI preconditioner

415:   Input Parameters:
416: + pc - the preconditioner
417: - n - block size (default 1)

419:   Notes: A block
420:                  size of 1 treats A as a matrix of scalar elements. A
421:                  block size of s > 1 treats A as a matrix of sxs
422:                  blocks. A block size of 0 treats A as a matrix with
423:                  variable sized blocks, which are determined by
424:                  searching for dense square diagonal blocks in A.
425:                  This can be very effective for finite-element
426:                  matrices.

428:                  SPAI will convert A to block form, use a block
429:                  version of the preconditioner algorithm, and then
430:                  convert the result back to scalar form.

432:                  In many cases the a block-size parameter other than 1
433:                  can lead to very significant improvement in
434:                  performance.


437:   Level: intermediate

439: .seealso: PCSPAI, PCSetType()
440: @*/
441: PetscErrorCode  PCSPAISetBlockSize(PC pc,int block_size1)
442: {
443:   PetscErrorCode ierr,(*f)(PC,int);
445:   PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetBlockSize_C",(void (**)(void))&f);
446:   if (f) {
447:     (*f)(pc,block_size1);
448:   }
449:   return(0);
450: }

452: /**********************************************************************/

456: /*@
457:   PCSPAISetCacheSize - specify cache size in the SPAI preconditioner

459:   Input Parameters:
460: + pc - the preconditioner
461: - n -  cache size {0,1,2,3,4,5} (default 5)

463:   Notes:    SPAI uses a hash table to cache messages and avoid
464:                  redundant communication. If suggest always using
465:                  5. This parameter is irrelevant in the serial
466:                  version.

468:   Level: intermediate

470: .seealso: PCSPAI, PCSetType()
471: @*/
472: PetscErrorCode  PCSPAISetCacheSize(PC pc,int cache_size)
473: {
474:   PetscErrorCode ierr,(*f)(PC,int);
476:   PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetCacheSize_C",(void (**)(void))&f);
477:   if (f) {
478:     (*f)(pc,cache_size);
479:   }
480:   return(0);
481: }

483: /**********************************************************************/

487: /*@
488:   PCSPAISetVerbose - verbosity level for the SPAI preconditioner

490:   Input Parameters:
491: + pc - the preconditioner
492: - n - level (default 1)

494:   Notes: print parameters, timings and matrix statistics

496:   Level: intermediate

498: .seealso: PCSPAI, PCSetType()
499: @*/
500: PetscErrorCode  PCSPAISetVerbose(PC pc,int verbose)
501: {
502:   PetscErrorCode ierr,(*f)(PC,int);
504:   PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetVerbose_C",(void (**)(void))&f);
505:   if (f) {
506:     (*f)(pc,verbose);
507:   }
508:   return(0);
509: }

511: /**********************************************************************/

515: /*@
516:   PCSPAISetSp - specify a symmetric matrix sparsity pattern in the SPAI preconditioner

518:   Input Parameters:
519: + pc - the preconditioner
520: - n - 0 or 1

522:   Notes: If A has a symmetric nonzero pattern use -sp 1 to
523:                  improve performance by eliminating some communication
524:                  in the parallel version. Even if A does not have a
525:                  symmetric nonzero pattern -sp 1 may well lead to good
526:                  results, but the code will not follow the published
527:                  SPAI algorithm exactly.


530:   Level: intermediate

532: .seealso: PCSPAI, PCSetType()
533: @*/
534: PetscErrorCode  PCSPAISetSp(PC pc,int sp)
535: {
536:   PetscErrorCode ierr,(*f)(PC,int);
538:   PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetSp_C",(void (**)(void))&f);
539:   if (f) {
540:     (*f)(pc,sp);
541:   }
542:   return(0);
543: }

545: /**********************************************************************/

547: /**********************************************************************/

551: static PetscErrorCode PCSetFromOptions_SPAI(PC pc)
552: {
553:   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
555:   int            nbsteps1,max1,maxnew1,block_size1,cache_size,verbose,sp;
556:   double         epsilon1;
557:   PetscTruth     flg;

560:   PetscOptionsHead("SPAI options");
561:     PetscOptionsReal("-pc_spai_epsilon","","PCSPAISetEpsilon",ispai->epsilon,&epsilon1,&flg);
562:     if (flg) {
563:       PCSPAISetEpsilon(pc,epsilon1);
564:     }
565:     PetscOptionsInt("-pc_spai_nbsteps","","PCSPAISetNBSteps",ispai->nbsteps,&nbsteps1,&flg);
566:     if (flg) {
567:       PCSPAISetNBSteps(pc,nbsteps1);
568:     }
569:     /* added 1/7/99 g.h. */
570:     PetscOptionsInt("-pc_spai_max","","PCSPAISetMax",ispai->max,&max1,&flg);
571:     if (flg) {
572:       PCSPAISetMax(pc,max1);
573:     }
574:     PetscOptionsInt("-pc_spai_maxnew","","PCSPAISetMaxNew",ispai->maxnew,&maxnew1,&flg);
575:     if (flg) {
576:       PCSPAISetMaxNew(pc,maxnew1);
577:     }
578:     PetscOptionsInt("-pc_spai_block_size","","PCSPAISetBlockSize",ispai->block_size,&block_size1,&flg);
579:     if (flg) {
580:       PCSPAISetBlockSize(pc,block_size1);
581:     }
582:     PetscOptionsInt("-pc_spai_cache_size","","PCSPAISetCacheSize",ispai->cache_size,&cache_size,&flg);
583:     if (flg) {
584:       PCSPAISetCacheSize(pc,cache_size);
585:     }
586:     PetscOptionsInt("-pc_spai_verbose","","PCSPAISetVerbose",ispai->verbose,&verbose,&flg);
587:     if (flg) {
588:       PCSPAISetVerbose(pc,verbose);
589:     }
590:     PetscOptionsInt("-pc_spai_sp","","PCSPAISetSp",ispai->sp,&sp,&flg);
591:     if (flg) {
592:       PCSPAISetSp(pc,sp);
593:     }
594:   PetscOptionsTail();
595:   return(0);
596: }

598: /**********************************************************************/

600: /*MC
601:    PCSPAI - Use the Sparse Approximate Inverse method of Grote and Barnard
602:      as a preconditioner (SIAM J. Sci. Comput.; vol 18, nr 3)

604:    Options Database Keys:
605: +  -pc_spai_set_epsilon <eps> - set tolerance
606: .  -pc_spai_nbstep <n> - set nbsteps
607: .  -pc_spai_max <m> - set max
608: .  -pc_spai_max_new <m> - set maxnew
609: .  -pc_spai_block_size <n> - set block size
610: .  -pc_spai_cache_size <n> - set cache size
611: .  -pc_spai_sp <m> - set sp
612: -  -pc_spai_set_verbose <true,false> - verbose output

614:    Notes: This only works with AIJ matrices.

616:    Level: beginner

618:    Concepts: approximate inverse

620: .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,
621:     PCSPAISetEpsilon(), PCSPAISetMax(), PCSPAISetMaxNew(), PCSPAISetBlockSize(),
622:     PCSPAISetVerbose(), PCSPAISetSp()
623: M*/

628: PetscErrorCode  PCCreate_SPAI(PC pc)
629: {
630:   PC_SPAI *ispai;

634:   PetscNew(PC_SPAI,&ispai);
635:   pc->data           = (void*)ispai;

637:   pc->ops->destroy         = PCDestroy_SPAI;
638:   pc->ops->apply           = PCApply_SPAI;
639:   pc->ops->applyrichardson = 0;
640:   pc->ops->setup           = PCSetUp_SPAI;
641:   pc->ops->view            = PCView_SPAI;
642:   pc->ops->setfromoptions  = PCSetFromOptions_SPAI;

644:   pc->name          = 0;
645:   ispai->epsilon    = .4;
646:   ispai->nbsteps    = 5;
647:   ispai->max        = 5000;
648:   ispai->maxnew     = 5;
649:   ispai->block_size = 1;
650:   ispai->cache_size = 5;
651:   ispai->verbose    = 0;

653:   ispai->sp         = 1;
654:   MPI_Comm_dup(pc->comm,&(ispai->comm_spai));

656:   PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetEpsilon_C",
657:                     "PCSPAISetEpsilon_SPAI",
658:                      PCSPAISetEpsilon_SPAI);
659:   PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetNBSteps_C",
660:                     "PCSPAISetNBSteps_SPAI",
661:                      PCSPAISetNBSteps_SPAI);
662:   PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetMax_C",
663:                     "PCSPAISetMax_SPAI",
664:                      PCSPAISetMax_SPAI);
665:   PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetMaxNew_CC",
666:                     "PCSPAISetMaxNew_SPAI",
667:                      PCSPAISetMaxNew_SPAI);
668:   PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetBlockSize_C",
669:                     "PCSPAISetBlockSize_SPAI",
670:                      PCSPAISetBlockSize_SPAI);
671:   PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetCacheSize_C",
672:                     "PCSPAISetCacheSize_SPAI",
673:                      PCSPAISetCacheSize_SPAI);
674:   PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetVerbose_C",
675:                     "PCSPAISetVerbose_SPAI",
676:                      PCSPAISetVerbose_SPAI);
677:   PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetSp_C",
678:                     "PCSPAISetSp_SPAI",
679:                      PCSPAISetSp_SPAI);

681:   return(0);
682: }

685: /**********************************************************************/

687: /*
688:    Converts from a PETSc matrix to an SPAI matrix 
689: */
692: PetscErrorCode ConvertMatToMatrix(MPI_Comm comm, Mat A,Mat AT,matrix **B)
693: {
694:   matrix                  *M;
695:   int                     i,j,col;
696:   int                     row_indx;
697:   int                     len,pe,local_indx,start_indx;
698:   int                     *mapping;
700:   const int               *cols;
701:   const double            *vals;
702:   int                     *num_ptr,n,mnl,nnl,rank,size,nz,rstart,rend;
703:   struct compressed_lines *rows;

706: 
707:   MPI_Comm_size(comm,&size);
708:   MPI_Comm_rank(comm,&rank);
709:   MatGetSize(A,&n,&n);
710:   MatGetLocalSize(A,&mnl,&nnl);

712:   /*
713:     not sure why a barrier is required. commenting out
714:   MPI_Barrier(comm);
715:   */

717:   M = new_matrix((void*)comm);
718: 
719:   M->n = n;
720:   M->bs = 1;
721:   M->max_block_size = 1;

723:   M->mnls          = (int*)malloc(sizeof(int)*size);
724:   M->start_indices = (int*)malloc(sizeof(int)*size);
725:   M->pe            = (int*)malloc(sizeof(int)*n);
726:   M->block_sizes   = (int*)malloc(sizeof(int)*n);
727:   for (i=0; i<n; i++) M->block_sizes[i] = 1;

729:   MPI_Allgather(&mnl,1,MPI_INT,M->mnls,1,MPI_INT,comm);

731:   M->start_indices[0] = 0;
732:   for (i=1; i<size; i++) {
733:     M->start_indices[i] = M->start_indices[i-1] + M->mnls[i-1];
734:   }

736:   M->mnl = M->mnls[M->myid];
737:   M->my_start_index = M->start_indices[M->myid];

739:   for (i=0; i<size; i++) {
740:     start_indx = M->start_indices[i];
741:     for (j=0; j<M->mnls[i]; j++)
742:       M->pe[start_indx+j] = i;
743:   }

745:   if (AT) {
746:     M->lines = new_compressed_lines(M->mnls[rank],1);
747:   } else {
748:     M->lines = new_compressed_lines(M->mnls[rank],0);
749:   }

751:   rows     = M->lines;

753:   /* Determine the mapping from global indices to pointers */
754:   PetscMalloc(M->n*sizeof(int),&mapping);
755:   pe         = 0;
756:   local_indx = 0;
757:   for (i=0; i<M->n; i++) {
758:     if (local_indx >= M->mnls[pe]) {
759:       pe++;
760:       local_indx = 0;
761:     }
762:     mapping[i] = local_indx + M->start_indices[pe];
763:     local_indx++;
764:   }


767:   PetscMalloc(mnl*sizeof(int),&num_ptr);

769:   /*********************************************************/
770:   /************** Set up the row structure *****************/
771:   /*********************************************************/

773:   /* count number of nonzeros in every row */
774:   MatGetOwnershipRange(A,&rstart,&rend);
775:   for (i=rstart; i<rend; i++) {
776:     MatGetRow(A,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);
777:     MatRestoreRow(A,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);
778:   }

780:   /* allocate buffers */
781:   len = 0;
782:   for (i=0; i<mnl; i++) {
783:     if (len < num_ptr[i]) len = num_ptr[i];
784:   }

786:   for (i=rstart; i<rend; i++) {
787:     row_indx             = i-rstart;
788:     len                  = num_ptr[row_indx];
789:     rows->ptrs[row_indx] = (int*)malloc(len*sizeof(int));
790:     rows->A[row_indx]    = (double*)malloc(len*sizeof(double));
791:   }

793:   /* copy the matrix */
794:   for (i=rstart; i<rend; i++) {
795:     row_indx = i - rstart;
796:     MatGetRow(A,i,&nz,&cols,&vals);
797:     for (j=0; j<nz; j++) {
798:       col = cols[j];
799:       len = rows->len[row_indx]++;
800:       rows->ptrs[row_indx][len] = mapping[col];
801:       rows->A[row_indx][len]    = vals[j];
802:     }
803:     rows->slen[row_indx] = rows->len[row_indx];
804:     MatRestoreRow(A,i,&nz,&cols,&vals);
805:   }


808:   /************************************************************/
809:   /************** Set up the column structure *****************/
810:   /*********************************************************/

812:   if (AT) {

814:     /* count number of nonzeros in every column */
815:     for (i=rstart; i<rend; i++) {
816:       MatGetRow(AT,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);
817:       MatRestoreRow(AT,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);
818:     }

820:     /* allocate buffers */
821:     len = 0;
822:     for (i=0; i<mnl; i++) {
823:       if (len < num_ptr[i]) len = num_ptr[i];
824:     }

826:     for (i=rstart; i<rend; i++) {
827:       row_indx = i-rstart;
828:       len      = num_ptr[row_indx];
829:       rows->rptrs[row_indx]     = (int*)malloc(len*sizeof(int));
830:     }

832:     /* copy the matrix (i.e., the structure) */
833:     for (i=rstart; i<rend; i++) {
834:       row_indx = i - rstart;
835:       MatGetRow(AT,i,&nz,&cols,&vals);
836:       for (j=0; j<nz; j++) {
837:         col = cols[j];
838:         len = rows->rlen[row_indx]++;
839:         rows->rptrs[row_indx][len] = mapping[col];
840:       }
841:       MatRestoreRow(AT,i,&nz,&cols,&vals);
842:     }
843:   }

845:   PetscFree(num_ptr);
846:   PetscFree(mapping);

848:   order_pointers(M);
849:   M->maxnz = calc_maxnz(M);

851:   *B = M;

853:   return(0);
854: }

856: /**********************************************************************/

858: /*
859:    Converts from an SPAI matrix B  to a PETSc matrix PB.
860:    This assumes that the the SPAI matrix B is stored in
861:    COMPRESSED-ROW format.
862: */
865: PetscErrorCode ConvertMatrixToMat(MPI_Comm comm,matrix *B,Mat *PB)
866: {
867:   int         size,rank;
869:   int         m,n,M,N;
870:   int         d_nz,o_nz;
871:   int         *d_nnz,*o_nnz;
872:   int         i,k,global_row,global_col,first_diag_col,last_diag_col;
873:   PetscScalar val;

876:   MPI_Comm_size(comm,&size);
877:   MPI_Comm_rank(comm,&rank);
878: 
879:   m = n = B->mnls[rank];
880:   d_nz = o_nz = 0;

882:   /* Determine preallocation for MatCreateMPIAIJ */
883:   PetscMalloc(m*sizeof(int),&d_nnz);
884:   PetscMalloc(m*sizeof(int),&o_nnz);
885:   for (i=0; i<m; i++) d_nnz[i] = o_nnz[i] = 0;
886:   first_diag_col = B->start_indices[rank];
887:   last_diag_col = first_diag_col + B->mnls[rank];
888:   for (i=0; i<B->mnls[rank]; i++) {
889:     for (k=0; k<B->lines->len[i]; k++) {
890:       global_col = B->lines->ptrs[i][k];
891:       if ((global_col >= first_diag_col) && (global_col <= last_diag_col))
892:         d_nnz[i]++;
893:       else
894:         o_nnz[i]++;
895:     }
896:   }

898:   M = N = B->n;
899:   /* Here we only know how to create AIJ format */
900:   MatCreate(comm,PB);
901:   MatSetSizes(*PB,m,n,M,N);
902:   MatSetType(*PB,MATAIJ);
903:   MatSeqAIJSetPreallocation(*PB,d_nz,d_nnz);
904:   MatMPIAIJSetPreallocation(*PB,d_nz,d_nnz,o_nz,o_nnz);

906:   for (i=0; i<B->mnls[rank]; i++) {
907:     global_row = B->start_indices[rank]+i;
908:     for (k=0; k<B->lines->len[i]; k++) {
909:       global_col = B->lines->ptrs[i][k];
910:       val = B->lines->A[i][k];
911:       MatSetValues(*PB,1,&global_row,1,&global_col,&val,ADD_VALUES);
912:     }
913:   }

915:   PetscFree(d_nnz);
916:   PetscFree(o_nnz);

918:   MatAssemblyBegin(*PB,MAT_FINAL_ASSEMBLY);
919:   MatAssemblyEnd(*PB,MAT_FINAL_ASSEMBLY);

921:   return(0);
922: }

924: /**********************************************************************/

926: /*
927:    Converts from an SPAI vector v  to a PETSc vec Pv.
928: */
931: PetscErrorCode ConvertVectorToVec(MPI_Comm comm,vector *v,Vec *Pv)
932: {
934:   int size,rank,m,M,i,*mnls,*start_indices,*global_indices;
935: 
937:   MPI_Comm_size(comm,&size);
938:   MPI_Comm_rank(comm,&rank);
939: 
940:   m = v->mnl;
941:   M = v->n;
942: 
943: 
944:   VecCreateMPI(comm,m,M,Pv);

946:   PetscMalloc(size*sizeof(int),&mnls);
947:   MPI_Allgather((void*)&v->mnl,1,MPI_INT,(void*)mnls,1,MPI_INT,comm);
948: 
949:   PetscMalloc(size*sizeof(int),&start_indices);
950:   start_indices[0] = 0;
951:   for (i=1; i<size; i++)
952:     start_indices[i] = start_indices[i-1] +mnls[i-1];
953: 
954:   PetscMalloc(v->mnl*sizeof(int),&global_indices);
955:   for (i=0; i<v->mnl; i++)
956:     global_indices[i] = start_indices[rank] + i;

958:   PetscFree(mnls);
959:   PetscFree(start_indices);
960: 
961:   VecSetValues(*Pv,v->mnl,global_indices,v->v,INSERT_VALUES);

963:   PetscFree(global_indices);

965:   VecAssemblyBegin(*Pv);
966:   VecAssemblyEnd(*Pv);
967: 
968:   return(0);
969: }