Actual source code: redundant.c

  1: #define PETSCKSP_DLL

  3: /*
  4:   This file defines a "solve the problem redundantly on each subgroup of processor" preconditioner.
  5: */
 6:  #include private/pcimpl.h
 7:  #include petscksp.h

  9: typedef struct {
 10:   PC         pc;                   /* actual preconditioner used on each processor */
 11:   Vec        xsub,ysub;            /* vectors of a subcommunicator to hold parallel vectors of pc->comm */
 12:   Vec        xdup,ydup;            /* parallel vector that congregates xsub or ysub facilitating vector scattering */
 13:   Mat        pmats;                /* matrix and optional preconditioner matrix belong to a subcommunicator */
 14:   VecScatter scatterin,scatterout; /* scatter used to move all values to each processor group (subcommunicator) */
 15:   PetscTruth useparallelmat;
 16:   MPI_Comm   subcomm;              /* processors belong to a subcommunicator implement a PC in parallel */
 17:   MPI_Comm   dupcomm;              /* processors belong to pc->comm with their rank remapped in the way 
 18:                                       that vector xdup/ydup has contiguous rank while appending xsub/ysub along their colors */
 19:   PetscInt   nsubcomm;             /* num of subcommunicators, which equals the num of redundant matrix systems */
 20:   PetscInt   color;                /* color of processors in a subcommunicator */
 21: } PC_Redundant;

 23:  #include src/sys/viewer/impls/ascii/asciiimpl.h
 24: #include "petscfix.h"
 25: #include <stdarg.h>

 27: PetscErrorCode  PetscViewerRestoreSubcomm(PetscViewer viewer,PetscViewer *outviewer)
 28: {
 30:   PetscMPIInt    size;


 35:   MPI_Comm_size(viewer->comm,&size);
 36:   if (size == 1) {
 37:     PetscObjectDereference((PetscObject)viewer);
 38:     if (outviewer) *outviewer = 0;
 39:   } else if (viewer->ops->restoresingleton) {
 40:     (*viewer->ops->restoresingleton)(viewer,outviewer);
 41:   }
 42:   return(0);
 43: }

 47: PetscErrorCode PetscViewerDestroy_ASCIIh(PetscViewer viewer)
 48: {
 49:   PetscMPIInt       rank;
 50:   PetscErrorCode    ierr;
 51:   PetscViewer_ASCII *vascii = (PetscViewer_ASCII *)viewer->data;
 52:   PetscViewerLink   *vlink;
 53:   PetscTruth        flg;

 56:   if (vascii->sviewer) {
 57:     SETERRQ(PETSC_ERR_ORDER,"ASCII PetscViewer destroyed before restoring singleton PetscViewer");
 58:   }
 59:   MPI_Comm_rank(viewer->comm,&rank);
 60:   if (!rank && vascii->fd != stderr && vascii->fd != stdout) {
 61:     if (vascii->fd) fclose(vascii->fd);
 62:     if (vascii->storecompressed) {
 63:       char par[PETSC_MAX_PATH_LEN],buf[PETSC_MAX_PATH_LEN];
 64:       FILE *fp;
 65:       PetscStrcpy(par,"gzip ");
 66:       PetscStrcat(par,vascii->filename);
 67: #if defined(PETSC_HAVE_POPEN)
 68:       PetscPOpen(PETSC_COMM_SELF,PETSC_NULL,par,"r",&fp);
 69:       if (fgets(buf,1024,fp)) {
 70:         SETERRQ2(PETSC_ERR_LIB,"Error from compression command %s\n%s",par,buf);
 71:       }
 72:       PetscPClose(PETSC_COMM_SELF,fp);
 73: #else
 74:       SETERRQ(PETSC_ERR_SUP_SYS,"Cannot run external programs on this machine");
 75: #endif
 76:     }
 77:   }
 78:   PetscStrfree(vascii->filename);
 79:   PetscFree(vascii);

 81:   /* remove the viewer from the list in the MPI Communicator */
 82:   if (Petsc_Viewer_keyval == MPI_KEYVAL_INVALID) {
 83:     MPI_Keyval_create(MPI_NULL_COPY_FN,Petsc_DelViewer,&Petsc_Viewer_keyval,(void*)0);
 84:   }

 86:   MPI_Attr_get(viewer->comm,Petsc_Viewer_keyval,(void**)&vlink,(PetscMPIInt*)&flg);
 87:   if (flg) {
 88:     if (vlink && vlink->viewer == viewer) {
 89:       MPI_Attr_put(viewer->comm,Petsc_Viewer_keyval,vlink->next);
 90:       PetscFree(vlink);
 91:     } else {
 92:       while (vlink && vlink->next) {
 93:         if (vlink->next->viewer == viewer) {
 94:           PetscViewerLink *nv = vlink->next;
 95:           vlink->next = vlink->next->next;
 96:           PetscFree(nv);
 97:         }
 98:         vlink = vlink->next;
 99:       }
100:     }
101:   }
102:   return(0);
103: }

107: PetscErrorCode PetscViewerDestroy_ASCII_Subcomm(PetscViewer viewer)
108: {
109:   PetscViewer_ASCII *vascii = (PetscViewer_ASCII *)viewer->data;
110:   PetscErrorCode    ierr;
112:   PetscViewerRestoreSubcomm(vascii->bviewer,&viewer);
113:   return(0);
114: }

118: PetscErrorCode PetscViewerFlush_ASCII_Subcomm_0(PetscViewer viewer)
119: {
120:   PetscViewer_ASCII *vascii = (PetscViewer_ASCII *)viewer->data;

123:   fflush(vascii->fd);
124:   return(0);
125: }

129: PetscErrorCode PetscViewerGetSubcomm_ASCII(PetscViewer viewer,MPI_Comm subcomm,PetscViewer *outviewer)
130: {
131:   PetscMPIInt       rank;
132:   PetscErrorCode    ierr;
133:   PetscViewer_ASCII *vascii = (PetscViewer_ASCII *)viewer->data,*ovascii;
134:   const char        *name;

137:   if (vascii->sviewer) {
138:     SETERRQ(PETSC_ERR_ORDER,"Subcomm already obtained from PetscViewer and not restored");
139:   }
140:   /* PetscViewerCreate(PETSC_COMM_SELF,outviewer); */
141:   PetscViewerCreate(subcomm,outviewer);
142:   PetscViewerSetType(*outviewer,PETSC_VIEWER_ASCII);
143:   ovascii      = (PetscViewer_ASCII*)(*outviewer)->data;
144:   ovascii->fd  = vascii->fd;
145:   ovascii->tab = vascii->tab;

147:   vascii->sviewer = *outviewer;

149:   (*outviewer)->format     = viewer->format;
150:   (*outviewer)->iformat    = viewer->iformat;

152:   PetscObjectGetName((PetscObject)viewer,&name);
153:   PetscObjectSetName((PetscObject)(*outviewer),name);

155:   MPI_Comm_rank(viewer->comm,&rank);
156:   ((PetscViewer_ASCII*)((*outviewer)->data))->bviewer = viewer;
157:   (*outviewer)->ops->destroy = PetscViewerDestroy_ASCII_Subcomm;
158:   if (rank) {
159:     (*outviewer)->ops->flush = 0;
160:   } else {
161:     (*outviewer)->ops->flush = PetscViewerFlush_ASCII_Subcomm_0;
162:   }
163:   return(0);
164: }

168: PetscErrorCode PetscViewerRestoreSubcomm_ASCII(PetscViewer viewer,MPI_Comm subcomm,PetscViewer *outviewer)
169: {
170:   PetscErrorCode    ierr;
171:   PetscViewer_ASCII *vascii = (PetscViewer_ASCII *)(*outviewer)->data;
172:   PetscViewer_ASCII *ascii  = (PetscViewer_ASCII *)viewer->data;

175:   if (!ascii->sviewer) {
176:     SETERRQ(PETSC_ERR_ORDER,"Subcomm never obtained from PetscViewer");
177:   }
178:   if (ascii->sviewer != *outviewer) {
179:     SETERRQ(PETSC_ERR_ARG_WRONG,"This PetscViewer did not generate singleton");
180:   }

182:   ascii->sviewer             = 0;
183:   vascii->fd                 = stdout;
184:   (*outviewer)->ops->destroy = PetscViewerDestroy_ASCIIh;
185:   PetscViewerDestroy(*outviewer);
186:   PetscViewerFlush(viewer);
187:   return(0);
188: }

192: static PetscErrorCode PCView_Redundant(PC pc,PetscViewer viewer)
193: {
194:   PC_Redundant   *red = (PC_Redundant*)pc->data;
196:   PetscMPIInt    rank;
197:   PetscTruth     iascii,isstring;
198:   PetscViewer    sviewer,subviewer;
199:   PetscInt       color = red->color;

202:   MPI_Comm_rank(pc->comm,&rank);
203:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
204:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_STRING,&isstring);
205:   if (iascii) {
206:     PetscViewerASCIIPrintf(viewer,"  Redundant solver preconditioner: Actual PC follows\n");
207:     PetscViewerGetSubcomm_ASCII(viewer,red->pc->comm,&subviewer);
208:     /* PetscViewerGetSingleton(viewer,&sviewer); */
209:     if (!color) {
210:       PetscViewerASCIIPushTab(viewer);
211:       PCView(red->pc,subviewer);
212:       PetscViewerASCIIPopTab(viewer);
213:     }
214:     /* PetscViewerRestoreSingleton(viewer,&sviewer); */
215:     PetscViewerRestoreSubcomm_ASCII(viewer,red->pc->comm,&subviewer);
216:   } else if (isstring) {
217:     PetscViewerStringSPrintf(viewer," Redundant solver preconditioner");
218:     PetscViewerGetSingleton(viewer,&sviewer);
219:     if (!rank) {
220:       PCView(red->pc,sviewer);
221:     }
222:     PetscViewerRestoreSingleton(viewer,&sviewer);
223:   } else {
224:     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported for PC redundant",((PetscObject)viewer)->type_name);
225:   }
226:   return(0);
227: }

229:  #include include/private/matimpl.h
230:  #include private/vecimpl.h
231:  #include src/mat/impls/aij/mpi/mpiaij.h
232:  #include src/mat/impls/aij/seq/aij.h

234: typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
235:   PetscInt       nzlocal,nsends,nrecvs;
236:   PetscInt       *send_rank,*sbuf_nz,*sbuf_j,**rbuf_j;
237:   PetscScalar    *sbuf_a,**rbuf_a;
238:   PetscErrorCode (*MatDestroy)(Mat);
239: } Mat_Redundant;

243: PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
244: {
245:   PetscErrorCode       ierr;
246:   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
247:   PetscInt             i;

250:   PetscFree(redund->send_rank);
251:   PetscFree(redund->sbuf_j);
252:   PetscFree(redund->sbuf_a);
253:   for (i=0; i<redund->nrecvs; i++){
254:     PetscFree(redund->rbuf_j[i]);
255:     PetscFree(redund->rbuf_a[i]);
256:   }
257:   PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);
258:   PetscFree(redund);
259:   return(0);
260: }

264: PetscErrorCode MatDestroy_MatRedundant(Mat A)
265: {
266:   PetscErrorCode       ierr;
267:   PetscContainer container;
268:   Mat_Redundant        *redund=PETSC_NULL;

271:   PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);
272:   if (container) {
273:     PetscContainerGetPointer(container,(void **)&redund);
274:   } else {
275:     SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
276:   }
277:   A->ops->destroy = redund->MatDestroy;
278:   PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);
279:   (*A->ops->destroy)(A);
280:   PetscContainerDestroy(container);
281:   return(0);
282: }

286: PetscErrorCode MatGetRedundantMatrix_AIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
287: {
288:   PetscMPIInt    rank,size;
289:   MPI_Comm       comm=mat->comm;
291:   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
292:   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
293:   PetscInt       *rowrange=mat->rmap.range;
294:   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
295:   Mat            A=aij->A,B=aij->B,C=*matredundant;
296:   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
297:   PetscScalar    *sbuf_a;
298:   PetscInt       nzlocal=a->nz+b->nz;
299:   PetscInt       j,cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
300:   PetscInt       rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N;
301:   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
302:   PetscScalar    *vals,*aworkA,*aworkB;
303:   PetscMPIInt    tag1,tag2,tag3,imdex;
304:   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
305:                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
306:   MPI_Status     recv_status,*send_status;
307:   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
308:   PetscInt       **rbuf_j=PETSC_NULL;
309:   PetscScalar    **rbuf_a=PETSC_NULL;
310:   Mat_Redundant  *redund=PETSC_NULL;
311:   PetscContainer container;

314:   MPI_Comm_rank(comm,&rank);
315:   MPI_Comm_size(comm,&size);

317:   if (reuse == MAT_REUSE_MATRIX) {
318:     MatGetSize(C,&M,&N);
319:     if (M != N || M != mat->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
320:     MatGetLocalSize(C,&M,&N);
321:     if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
322:     PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);
323:     if (container) {
324:       PetscContainerGetPointer(container,(void **)&redund);
325:     } else {
326:       SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
327:     }
328:     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");

330:     nsends    = redund->nsends;
331:     nrecvs    = redund->nrecvs;
332:     send_rank = redund->send_rank; recv_rank = send_rank + size;
333:     sbuf_nz   = redund->sbuf_nz;     rbuf_nz = sbuf_nz + nsends;
334:     sbuf_j    = redund->sbuf_j;
335:     sbuf_a    = redund->sbuf_a;
336:     rbuf_j    = redund->rbuf_j;
337:     rbuf_a    = redund->rbuf_a;
338:   }

340:   if (reuse == MAT_INITIAL_MATRIX){
341:     PetscMPIInt  subrank,subsize;
342:     PetscInt     nleftover,np_subcomm;
343:     /* get the destination processors' id send_rank, nsends and nrecvs */
344:     MPI_Comm_rank(subcomm,&subrank);
345:     MPI_Comm_size(subcomm,&subsize);
346:     PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank);
347:     recv_rank = send_rank + size;
348:     np_subcomm = size/nsubcomm;
349:     nleftover  = size - nsubcomm*np_subcomm;
350:     nsends = 0; nrecvs = 0;
351:     for (i=0; i<size; i++){ /* i=rank*/
352:       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
353:         send_rank[nsends] = i; nsends++;
354:         recv_rank[nrecvs++] = i;
355:       }
356:     }
357:     if (rank >= size - nleftover){/* this proc is a leftover processor */
358:       i = size-nleftover-1;
359:       j = 0;
360:       while (j < nsubcomm - nleftover){
361:         send_rank[nsends++] = i;
362:         i--; j++;
363:       }
364:     }

366:     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
367:       for (i=0; i<nleftover; i++){
368:         recv_rank[nrecvs++] = size-nleftover+i;
369:       }
370:     }

372:     /* allocate sbuf_j, sbuf_a */
373:     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
374:     PetscMalloc(i*sizeof(PetscInt),&sbuf_j);
375:     PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);
376:   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
377: 
378:   /* copy mat's local entries into the buffers */
379:   if (reuse == MAT_INITIAL_MATRIX){
380:     rownz_max = 0;
381:     rptr = sbuf_j;
382:     cols = sbuf_j + rend-rstart + 1;
383:     vals = sbuf_a;
384:     rptr[0] = 0;
385:     for (i=0; i<rend-rstart; i++){
386:       row = i + rstart;
387:       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
388:       ncols  = nzA + nzB;
389:       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
390:       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
391:       /* load the column indices for this row into cols */
392:       lwrite = 0;
393:       for (l=0; l<nzB; l++) {
394:         if ((ctmp = bmap[cworkB[l]]) < cstart){
395:           vals[lwrite]   = aworkB[l];
396:           cols[lwrite++] = ctmp;
397:         }
398:       }
399:       for (l=0; l<nzA; l++){
400:         vals[lwrite]   = aworkA[l];
401:         cols[lwrite++] = cstart + cworkA[l];
402:       }
403:       for (l=0; l<nzB; l++) {
404:         if ((ctmp = bmap[cworkB[l]]) >= cend){
405:           vals[lwrite]   = aworkB[l];
406:           cols[lwrite++] = ctmp;
407:         }
408:       }
409:       vals += ncols;
410:       cols += ncols;
411:       rptr[i+1] = rptr[i] + ncols;
412:       if (rownz_max < ncols) rownz_max = ncols;
413:     }
414:     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
415:   } else { /* only copy matrix values into sbuf_a */
416:     rptr = sbuf_j;
417:     vals = sbuf_a;
418:     rptr[0] = 0;
419:     for (i=0; i<rend-rstart; i++){
420:       row = i + rstart;
421:       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
422:       ncols  = nzA + nzB;
423:       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
424:       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
425:       lwrite = 0;
426:       for (l=0; l<nzB; l++) {
427:         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
428:       }
429:       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
430:       for (l=0; l<nzB; l++) {
431:         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
432:       }
433:       vals += ncols;
434:       rptr[i+1] = rptr[i] + ncols;
435:     }
436:   } /* endof if (reuse == MAT_INITIAL_MATRIX) */

438:   /* send nzlocal to others, and recv other's nzlocal */
439:   /*--------------------------------------------------*/
440:   if (reuse == MAT_INITIAL_MATRIX){
441:     PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);
442:     s_waits2 = s_waits3 + nsends;
443:     s_waits1 = s_waits2 + nsends;
444:     r_waits1 = s_waits1 + nsends;
445:     r_waits2 = r_waits1 + nrecvs;
446:     r_waits3 = r_waits2 + nrecvs;
447:   } else {
448:     PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);
449:     r_waits3 = s_waits3 + nsends;
450:   }

452:   PetscObjectGetNewTag((PetscObject)mat,&tag3);
453:   if (reuse == MAT_INITIAL_MATRIX){
454:     /* get new tags to keep the communication clean */
455:     PetscObjectGetNewTag((PetscObject)mat,&tag1);
456:     PetscObjectGetNewTag((PetscObject)mat,&tag2);
457:     PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);
458:     rbuf_nz = sbuf_nz + nsends;
459: 
460:     /* post receives of other's nzlocal */
461:     for (i=0; i<nrecvs; i++){
462:       MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);
463:     }
464:     /* send nzlocal to others */
465:     for (i=0; i<nsends; i++){
466:       sbuf_nz[i] = nzlocal;
467:       MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);
468:     }
469:     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
470:     count = nrecvs;
471:     while (count) {
472:       MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);
473:       recv_rank[imdex] = recv_status.MPI_SOURCE;
474:       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
475:       PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);

477:       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
478:       rbuf_nz[imdex] += i + 2;
479:       PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);
480:       MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);
481:       count--;
482:     }
483:     /* wait on sends of nzlocal */
484:     if (nsends) {MPI_Waitall(nsends,s_waits1,send_status);}
485:     /* send mat->i,j to others, and recv from other's */
486:     /*------------------------------------------------*/
487:     for (i=0; i<nsends; i++){
488:       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
489:       MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);
490:     }
491:     /* wait on receives of mat->i,j */
492:     /*------------------------------*/
493:     count = nrecvs;
494:     while (count) {
495:       MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);
496:       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
497:       count--;
498:     }
499:     /* wait on sends of mat->i,j */
500:     /*---------------------------*/
501:     if (nsends) {
502:       MPI_Waitall(nsends,s_waits2,send_status);
503:     }
504:   } /* endof if (reuse == MAT_INITIAL_MATRIX) */

506:   /* post receives, send and receive mat->a */
507:   /*----------------------------------------*/
508:   for (imdex=0; imdex<nrecvs; imdex++) {
509:     MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);
510:   }
511:   for (i=0; i<nsends; i++){
512:     MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);
513:   }
514:   count = nrecvs;
515:   while (count) {
516:     MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);
517:     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
518:     count--;
519:   }
520:   if (nsends) {
521:     MPI_Waitall(nsends,s_waits3,send_status);
522:   }

524:   PetscFree2(s_waits3,send_status);
525: 
526:   /* create redundant matrix */
527:   /*-------------------------*/
528:   if (reuse == MAT_INITIAL_MATRIX){
529:     /* compute rownz_max for preallocation */
530:     for (imdex=0; imdex<nrecvs; imdex++){
531:       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
532:       rptr = rbuf_j[imdex];
533:       for (i=0; i<j; i++){
534:         ncols = rptr[i+1] - rptr[i];
535:         if (rownz_max < ncols) rownz_max = ncols;
536:       }
537:     }
538: 
539:     MatCreate(subcomm,&C);
540:     MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);
541:     MatSetFromOptions(C);
542:     MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);
543:     MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);
544:   } else {
545:     C = *matredundant;
546:   }

548:   /* insert local matrix entries */
549:   rptr = sbuf_j;
550:   cols = sbuf_j + rend-rstart + 1;
551:   vals = sbuf_a;
552:   for (i=0; i<rend-rstart; i++){
553:     row   = i + rstart;
554:     ncols = rptr[i+1] - rptr[i];
555:     MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);
556:     vals += ncols;
557:     cols += ncols;
558:   }
559:   /* insert received matrix entries */
560:   for (imdex=0; imdex<nrecvs; imdex++){
561:     rstart = rowrange[recv_rank[imdex]];
562:     rend   = rowrange[recv_rank[imdex]+1];
563:     rptr = rbuf_j[imdex];
564:     cols = rbuf_j[imdex] + rend-rstart + 1;
565:     vals = rbuf_a[imdex];
566:     for (i=0; i<rend-rstart; i++){
567:       row   = i + rstart;
568:       ncols = rptr[i+1] - rptr[i];
569:       MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);
570:       vals += ncols;
571:       cols += ncols;
572:     }
573:   }
574:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
575:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
576:   MatGetSize(C,&M,&N);
577:   if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N);
578:   if (reuse == MAT_INITIAL_MATRIX){
579:     PetscContainer container;
580:     *matredundant = C;
581:     /* create a supporting struct and attach it to C for reuse */
582:     PetscNew(Mat_Redundant,&redund);
583:     PetscContainerCreate(PETSC_COMM_SELF,&container);
584:     PetscContainerSetPointer(container,redund);
585:     PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);
586:     PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);
587: 
588:     redund->nzlocal = nzlocal;
589:     redund->nsends  = nsends;
590:     redund->nrecvs  = nrecvs;
591:     redund->send_rank = send_rank;
592:     redund->sbuf_nz = sbuf_nz;
593:     redund->sbuf_j  = sbuf_j;
594:     redund->sbuf_a  = sbuf_a;
595:     redund->rbuf_j  = rbuf_j;
596:     redund->rbuf_a  = rbuf_a;

598:     redund->MatDestroy = C->ops->destroy;
599:     C->ops->destroy    = MatDestroy_MatRedundant;
600:   }
601:   return(0);
602: }

606: static PetscErrorCode PCSetUp_Redundant(PC pc)
607: {
608:   PC_Redundant   *red  = (PC_Redundant*)pc->data;
610:   PetscInt       mstart,mend,mlocal,m;
611:   PetscMPIInt    size;
612:   MatReuse       reuse = MAT_INITIAL_MATRIX;
613:   MatStructure   str   = DIFFERENT_NONZERO_PATTERN;
614:   MPI_Comm       comm;
615:   Vec            vec;

617:   PetscInt       mlocal_sub;
618:   PetscMPIInt    subsize,subrank;
619:   PetscInt       rstart_sub,rend_sub,mloc_sub;

622:   PetscObjectGetComm((PetscObject)pc->pmat,&comm);
623:   MatGetVecs(pc->pmat,&vec,0);
624:   PCSetFromOptions(red->pc);
625:   VecGetSize(vec,&m);
626:   if (!pc->setupcalled) {
627:     /* create working vectors xsub/ysub and xdup/ydup */
628:     VecGetLocalSize(vec,&mlocal);
629:     VecGetOwnershipRange(vec,&mstart,&mend);

631:     /* get local size of xsub/ysub */
632:     MPI_Comm_size(red->subcomm,&subsize);
633:     MPI_Comm_rank(red->subcomm,&subrank);
634:     rstart_sub = pc->pmat->rmap.range[red->nsubcomm*subrank]; /* rstart in xsub/ysub */
635:     if (subrank+1 < subsize){
636:       rend_sub = pc->pmat->rmap.range[red->nsubcomm*(subrank+1)];
637:     } else {
638:       rend_sub = m;
639:     }
640:     mloc_sub = rend_sub - rstart_sub;
641:     VecCreateMPI(red->subcomm,mloc_sub,PETSC_DECIDE,&red->ysub);
642:     /* create xsub with empty local arrays, because xdup's arrays will be placed into it */
643:     VecCreateMPIWithArray(red->subcomm,mloc_sub,PETSC_DECIDE,PETSC_NULL,&red->xsub);

645:     /* create xdup and ydup. ydup has empty local arrays because ysub's arrays will be place into it. 
646:        Note: we use communicator dupcomm, not pc->comm! */
647:     VecCreateMPI(red->dupcomm,mloc_sub,PETSC_DECIDE,&red->xdup);
648:     VecCreateMPIWithArray(red->dupcomm,mloc_sub,PETSC_DECIDE,PETSC_NULL,&red->ydup);
649: 
650:     /* create vec scatters */
651:     if (!red->scatterin){
652:       IS       is1,is2;
653:       PetscInt *idx1,*idx2,i,j,k;
654:       PetscMalloc(2*red->nsubcomm*mlocal*sizeof(PetscInt),&idx1);
655:       idx2 = idx1 + red->nsubcomm*mlocal;
656:       j = 0;
657:       for (k=0; k<red->nsubcomm; k++){
658:         for (i=mstart; i<mend; i++){
659:           idx1[j]   = i;
660:           idx2[j++] = i + m*k;
661:         }
662:       }
663:       ISCreateGeneral(comm,red->nsubcomm*mlocal,idx1,&is1);
664:       ISCreateGeneral(comm,red->nsubcomm*mlocal,idx2,&is2);
665:       VecScatterCreate(vec,is1,red->xdup,is2,&red->scatterin);
666:       ISDestroy(is1);
667:       ISDestroy(is2);

669:       ISCreateStride(comm,mlocal,mstart+ red->color*m,1,&is1);
670:       ISCreateStride(comm,mlocal,mstart,1,&is2);
671:       VecScatterCreate(red->xdup,is1,vec,is2,&red->scatterout);
672:       ISDestroy(is1);
673:       ISDestroy(is2);
674:       PetscFree(idx1);
675:     }
676:   }
677:   VecDestroy(vec);

679:   /* if pmatrix set by user is sequential then we do not need to gather the parallel matrix */
680:   MPI_Comm_size(comm,&size);
681:   if (size == 1) {
682:     red->useparallelmat = PETSC_FALSE;
683:   }

685:   if (red->useparallelmat) {
686:     if (pc->setupcalled == 1 && pc->flag == DIFFERENT_NONZERO_PATTERN) {
687:       /* destroy old matrices */
688:       if (red->pmats) {
689:         MatDestroy(red->pmats);
690:       }
691:     } else if (pc->setupcalled == 1) {
692:       reuse = MAT_REUSE_MATRIX;
693:       str   = SAME_NONZERO_PATTERN;
694:     }
695: 
696:     /* grab the parallel matrix and put it into processors of a subcomminicator */
697:     /*--------------------------------------------------------------------------*/
698:     VecGetLocalSize(red->ysub,&mlocal_sub);
699:     MatGetRedundantMatrix_AIJ(pc->pmat,red->nsubcomm,red->subcomm,mlocal_sub,reuse,&red->pmats);
700:     /* tell PC of the subcommunicator its operators */
701:     PCSetOperators(red->pc,red->pmats,red->pmats,str);
702:   } else {
703:     PCSetOperators(red->pc,pc->mat,pc->pmat,pc->flag);
704:   }
705:   PCSetFromOptions(red->pc);
706:   PCSetUp(red->pc);
707:   return(0);
708: }

712: static PetscErrorCode PCApply_Redundant(PC pc,Vec x,Vec y)
713: {
714:   PC_Redundant   *red = (PC_Redundant*)pc->data;
716:   PetscScalar    *array;

719:   /* scatter x to xdup */
720:   VecScatterBegin(x,red->xdup,INSERT_VALUES,SCATTER_FORWARD,red->scatterin);
721:   VecScatterEnd(x,red->xdup,INSERT_VALUES,SCATTER_FORWARD,red->scatterin);
722: 
723:   /* place xdup's local array into xsub */
724:   VecGetArray(red->xdup,&array);
725:   VecPlaceArray(red->xsub,(const PetscScalar*)array);

727:   /* apply preconditioner on each processor */
728:   PCApply(red->pc,red->xsub,red->ysub);
729:   VecResetArray(red->xsub);
730:   VecRestoreArray(red->xdup,&array);
731: 
732:   /* place ysub's local array into ydup */
733:   VecGetArray(red->ysub,&array);
734:   VecPlaceArray(red->ydup,(const PetscScalar*)array);

736:   /* scatter ydup to y */
737:   VecScatterBegin(red->ydup,y,INSERT_VALUES,SCATTER_FORWARD,red->scatterout);
738:   VecScatterEnd(red->ydup,y,INSERT_VALUES,SCATTER_FORWARD,red->scatterout);
739:   VecResetArray(red->ydup);
740:   VecRestoreArray(red->ysub,&array);
741:   return(0);
742: }

746: static PetscErrorCode PCDestroy_Redundant(PC pc)
747: {
748:   PC_Redundant   *red = (PC_Redundant*)pc->data;

752:   if (red->scatterin)  {VecScatterDestroy(red->scatterin);}
753:   if (red->scatterout) {VecScatterDestroy(red->scatterout);}
754:   if (red->ysub)       {VecDestroy(red->ysub);}
755:   if (red->xsub)       {VecDestroy(red->xsub);}
756:   if (red->xdup)       {VecDestroy(red->xdup);}
757:   if (red->ydup)       {VecDestroy(red->ydup);}
758:   if (red->pmats) {
759:     MatDestroy(red->pmats);
760:   }


763:   PCDestroy(red->pc);
764:   PetscFree(red);
765:   return(0);
766: }

770: static PetscErrorCode PCSetFromOptions_Redundant(PC pc)
771: {
773:   return(0);
774: }

779: PetscErrorCode  PCRedundantSetScatter_Redundant(PC pc,VecScatter in,VecScatter out)
780: {
781:   PC_Redundant   *red = (PC_Redundant*)pc->data;

785:   red->scatterin  = in;
786:   red->scatterout = out;
787:   PetscObjectReference((PetscObject)in);
788:   PetscObjectReference((PetscObject)out);
789:   return(0);
790: }

795: /*@
796:    PCRedundantSetScatter - Sets the scatter used to copy values into the
797:      redundant local solve and the scatter to move them back into the global
798:      vector.

800:    Collective on PC

802:    Input Parameters:
803: +  pc - the preconditioner context
804: .  in - the scatter to move the values in
805: -  out - the scatter to move them out

807:    Level: advanced

809: .keywords: PC, redundant solve
810: @*/
811: PetscErrorCode  PCRedundantSetScatter(PC pc,VecScatter in,VecScatter out)
812: {
813:   PetscErrorCode ierr,(*f)(PC,VecScatter,VecScatter);

819:   PetscObjectQueryFunction((PetscObject)pc,"PCRedundantSetScatter_C",(void (**)(void))&f);
820:   if (f) {
821:     (*f)(pc,in,out);
822:   }
823:   return(0);
824: }

829: PetscErrorCode  PCRedundantGetPC_Redundant(PC pc,PC *innerpc)
830: {
831:   PC_Redundant *red = (PC_Redundant*)pc->data;

834:   *innerpc = red->pc;
835:   return(0);
836: }

841: /*@
842:    PCRedundantGetPC - Gets the sequential PC created by the redundant PC.

844:    Not Collective

846:    Input Parameter:
847: .  pc - the preconditioner context

849:    Output Parameter:
850: .  innerpc - the sequential PC 

852:    Level: advanced

854: .keywords: PC, redundant solve
855: @*/
856: PetscErrorCode  PCRedundantGetPC(PC pc,PC *innerpc)
857: {
858:   PetscErrorCode ierr,(*f)(PC,PC*);

863:   PetscObjectQueryFunction((PetscObject)pc,"PCRedundantGetPC_C",(void (**)(void))&f);
864:   if (f) {
865:     (*f)(pc,innerpc);
866:   }
867:   return(0);
868: }

873: PetscErrorCode  PCRedundantGetOperators_Redundant(PC pc,Mat *mat,Mat *pmat)
874: {
875:   PC_Redundant *red = (PC_Redundant*)pc->data;

878:   if (mat)  *mat  = red->pmats;
879:   if (pmat) *pmat = red->pmats;
880:   return(0);
881: }

886: /*@
887:    PCRedundantGetOperators - gets the sequential matrix and preconditioner matrix

889:    Not Collective

891:    Input Parameter:
892: .  pc - the preconditioner context

894:    Output Parameters:
895: +  mat - the matrix
896: -  pmat - the (possibly different) preconditioner matrix

898:    Level: advanced

900: .keywords: PC, redundant solve
901: @*/
902: PetscErrorCode  PCRedundantGetOperators(PC pc,Mat *mat,Mat *pmat)
903: {
904:   PetscErrorCode ierr,(*f)(PC,Mat*,Mat*);

910:   PetscObjectQueryFunction((PetscObject)pc,"PCRedundantGetOperators_C",(void (**)(void))&f);
911:   if (f) {
912:     (*f)(pc,mat,pmat);
913:   }
914:   return(0);
915: }

917: /* -------------------------------------------------------------------------------------*/
918: /*MC
919:      PCREDUNDANT - Runs a preconditioner for the entire problem on each processor


922:      Options for the redundant preconditioners can be set with -redundant_pc_xxx

924:    Level: intermediate


927: .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PCRedundantSetScatter(),
928:            PCRedundantGetPC(), PCRedundantGetOperators()

930: M*/

935: PetscErrorCode  PCCreate_Redundant(PC pc)
936: {
938:   PC_Redundant   *red;
939:   const char     *prefix;
940:   PetscInt       nsubcomm,np_subcomm,nleftover,i,j,color;
941:   PetscMPIInt    rank,size,subrank,*subsize,duprank;
942:   MPI_Comm       subcomm=0,dupcomm=0;

945:   PetscNew(PC_Redundant,&red);
946:   PetscLogObjectMemory(pc,sizeof(PC_Redundant));
947:   red->useparallelmat   = PETSC_TRUE;
948: 
949:   MPI_Comm_rank(pc->comm,&rank);
950:   MPI_Comm_size(pc->comm,&size);
951:   nsubcomm = size;
952:   PetscOptionsGetInt(PETSC_NULL,"-nsubcomm",&nsubcomm,PETSC_NULL);
953:   if (nsubcomm > size) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Num of subcommunicators %D cannot be larger than MPI_Comm_size %D",nsubcomm,size);

955:   /*--------------------------------------------------------------------------------------------------
956:     To avoid data scattering from subcomm back to original comm, we create subcomm by iteratively taking a
957:     processe into a subcomm. 
958:     An example: size=4, nsubcomm=3
959:      pc->comm:
960:       rank:     [0]  [1]  [2]  [3]
961:       color:     0    1    2    0

963:      subcomm:
964:       subrank:  [0]  [0]  [0]  [1]    

966:      dupcomm:
967:       duprank:  [0]  [2]  [3]  [1]

969:      Here, subcomm[color = 0] has subsize=2, owns process [0] and [3]
970:            subcomm[color = 1] has subsize=1, owns process [1]
971:            subcomm[color = 2] has subsize=1, owns process [2]
972:           dupcomm has same number of processes as pc->comm, and its duprank maps
973:           processes in subcomm contiguously into a 1d array:
974:             duprank: [0] [1]      [2]         [3]
975:             rank:    [0] [3]      [1]         [2]
976:                     subcomm[0] subcomm[1]  subcomm[2]
977:    ----------------------------------------------------------------------------------------*/

979:   /* get size of each subcommunicators */
980:   PetscMalloc((1+nsubcomm)*sizeof(PetscMPIInt),&subsize);
981:   np_subcomm = size/nsubcomm;
982:   nleftover  = size - nsubcomm*np_subcomm;
983:   for (i=0; i<nsubcomm; i++){
984:     subsize[i] = np_subcomm;
985:     if (i<nleftover) subsize[i]++;
986:   }

988:   /* find color for this proc */
989:   color   = rank%nsubcomm;
990:   subrank = rank/nsubcomm;

992:   MPI_Comm_split(pc->comm,color,subrank,&subcomm);
993:   red->subcomm  = subcomm;
994:   red->color    = color;
995:   red->nsubcomm = nsubcomm;

997:   j = 0; duprank = 0;
998:   for (i=0; i<nsubcomm; i++){
999:     if (j == color){
1000:       duprank += subrank;
1001:       break;
1002:     }
1003:     duprank += subsize[i]; j++;
1004:   }
1005: 
1006:   /* create dupcomm with same size as comm, but its rank, duprank, maps subcomm's contiguously into dupcomm */
1007:   MPI_Comm_split(pc->comm,0,duprank,&dupcomm);
1008:   red->dupcomm = dupcomm;
1009:   PetscFree(subsize);

1011:   /* create the sequential PC that each processor has copy of */
1012:   PCCreate(subcomm,&red->pc);
1013:   PCSetType(red->pc,PCLU);
1014:   PCGetOptionsPrefix(pc,&prefix);
1015:   PCSetOptionsPrefix(red->pc,prefix);
1016:   PCAppendOptionsPrefix(red->pc,"redundant_");

1018:   pc->ops->apply             = PCApply_Redundant;
1019:   pc->ops->applytranspose    = 0;
1020:   pc->ops->setup             = PCSetUp_Redundant;
1021:   pc->ops->destroy           = PCDestroy_Redundant;
1022:   pc->ops->setfromoptions    = PCSetFromOptions_Redundant;
1023:   pc->ops->setuponblocks     = 0;
1024:   pc->ops->view              = PCView_Redundant;
1025:   pc->ops->applyrichardson   = 0;

1027:   pc->data     = (void*)red;

1029:   PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantSetScatter_C","PCRedundantSetScatter_Redundant",
1030:                     PCRedundantSetScatter_Redundant);
1031:   PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantGetPC_C","PCRedundantGetPC_Redundant",
1032:                     PCRedundantGetPC_Redundant);
1033:   PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantGetOperators_C","PCRedundantGetOperators_Redundant",
1034:                     PCRedundantGetOperators_Redundant);

1036:   return(0);
1037: }